Parallel Tasks

Parallel tasks allows you to run commands in parallel.

Sample implementation:

File tasks

class mara_pipelines.parallel_tasks.files.ParallelReadFile(id: str, description: str, file_pattern: str, read_mode: mara_pipelines.parallel_tasks.files.ReadMode, compression: mara_pipelines.commands.files.Compression, target_table: str, file_dependencies: Optional[List[str]] = None, date_regex: Optional[str] = None, partition_target_table_by_day_id: bool = False, truncate_partitions: bool = False, commands_before: Optional[List[mara_pipelines.pipelines.Command]] = None, commands_after: Optional[List[mara_pipelines.pipelines.Command]] = None, mapper_script_file_name: Optional[str] = None, make_unique: bool = False, db_alias: Optional[str] = None, delimiter_char: Optional[str] = None, quote_char: Optional[str] = None, null_value_string: Optional[str] = None, skip_header: Optional[bool] = None, csv_format: bool = False, timezone: Optional[str] = None, max_number_of_parallel_tasks: Optional[int] = None)
class mara_pipelines.parallel_tasks.files.ParallelReadSqlite(id: str, description: str, file_pattern: str, read_mode: mara_pipelines.parallel_tasks.files.ReadMode, sql_file_name: str, target_table: str, file_dependencies: Optional[List[str]] = None, date_regex: Optional[str] = None, partition_target_table_by_day_id: bool = False, truncate_partitions: bool = False, commands_before: Optional[List[mara_pipelines.pipelines.Command]] = None, commands_after: Optional[List[mara_pipelines.pipelines.Command]] = None, db_alias: Optional[str] = None, timezone=None, max_number_of_parallel_tasks: Optional[int] = None)

Python tasks

class mara_pipelines.parallel_tasks.python.ParallelExecutePython(id: str, description: str, file_name: str, parameter_function: Callable, max_number_of_parallel_tasks: Optional[int] = None, commands_before: Optional[List[mara_pipelines.pipelines.Command]] = None, commands_after: Optional[List[mara_pipelines.pipelines.Command]] = None)
class mara_pipelines.parallel_tasks.python.ParallelRunFunction(id: str, description: str, function: Callable, parameter_function: Callable, max_number_of_parallel_tasks: Optional[int] = None, commands_before: Optional[List[mara_pipelines.pipelines.Command]] = None, commands_after: Optional[List[mara_pipelines.pipelines.Command]] = None)

SQL tasks

class mara_pipelines.parallel_tasks.sql.ParallelExecuteSQL(id: str, description: str, parameter_function: Callable, parameter_placeholders: List[str], max_number_of_parallel_tasks: Optional[int] = None, sql_statement: Optional[str] = None, file_name: Optional[str] = None, commands_before: Optional[List[mara_pipelines.pipelines.Command]] = None, commands_after: Optional[List[mara_pipelines.pipelines.Command]] = None, db_alias: Optional[str] = None, echo_queries: Optional[bool] = None, timezone: Optional[str] = None, replace: Optional[Dict[str, str]] = None)