Source code for flowcraft.generator.components.metagenomics


try:
    from generator.process import Process
except ImportError:
    from flowcraft.generator.process import Process


[docs]class Concoct(Process): """ CONCOCT process template interface for the taxonomic independent binning of metagenomic assemblies. This process is set with: - ``input_type``: assembly - ``output_type``: assembly - ``ptype``: post_assembly It contains one **secondary channel link end**: - ``MAIN_fq`` (alias: ``_MAIN_assembly``): Receives the FastQ files from the last process with ``fastq`` output type. """ def __init__(self, **kwargs): super().__init__(**kwargs) self.input_type = "fasta" self.output_type = "fasta" self.link_end.append({"link": "__fastq", "alias": "_LAST_fastq"}) self.params = { "clusters": { "default": 400, "description": "Maximum number of clusters for VGMM. Default: 400" }, "lengthThreshold": { "default": 1000, "description": "Contigs shorter than this value will not be included. Default: 1000." }, "readLength": { "default": 100, "description": "Specify read length for coverage." "Default: 0.9" }, "iterations": { "default": 500, "description": "Number of iterations for the VBGMM. Default: 500" }, "clearInput": { "default": "false", "description": "Permanently removes temporary input files. This option " "is only useful to remove temporary files in large " "workflows and prevents nextflow's resume functionality. " "Use with caution." } } self.directives = { "concoct": { "container": "flowcraft/concoct", "version": "1.0.0-1", "cpus": 4, "memory": "{ 5.GB * task.attempt }" } } self.status_channels = [ "concoct", "report_concoct" ]
[docs]class Kraken(Process): """kraken process template interface This process is set with: - ``input_type``: fastq - ``output_type``: txt - ``ptype``: taxonomic classification """ def __init__(self, **kwargs): super().__init__(**kwargs) self.input_type = "fastq" self.output_type = "txt" self.params = { "krakenDB": { "default": "'minikraken_20171013_4GB'", "description": "Specifies kraken database." } } self.directives = { "kraken": { "container": "flowcraft/kraken", "version": "1.0-0.1", "memory": "{5.Gb*task.attempt}", "cpus": 3 } } self.status_channels = [ "kraken" ]
[docs]class Kraken2(Process): """kraken2 process template interface This process is set with: - ``input_type``: fastq - ``output_type``: txt - ``ptype``: taxonomic classification """ def __init__(self, **kwargs): super().__init__(**kwargs) self.input_type = "fastq" self.output_type = None self.params = { "kraken2DB": { "default": "'minikraken2_v1_8GB'", "description": "Specifies kraken2 database. Requires full path if database not on " "KRAKEN2_DB_PATH." } } self.directives = { "kraken2": { "container": "flowcraft/kraken2", "version": "2.0.7-1", "memory": "{8.Gb*task.attempt}", "cpus": 4 } } self.status_channels = [ "kraken2" ]
[docs]class Maxbin2(Process): """MaxBin2, a metagenomics binning software This process is set with: - ``input_type``: assembly - ``output_type``: assembly - ``ptype``: post_assembly It contains one **secondary channel link end**: - ``MAIN_fq`` (alias: ``_MAIN_assembly``): Receives the FastQ files from the last process with ``fastq`` output type. """ def __init__(self, **kwargs): super().__init__(**kwargs) self.input_type = "fasta" self.output_type = "fasta" self.link_end.append({"link": "__fastq", "alias": "_LAST_fastq"}) self.params = { "min_contig_lenght": { "default": 1000, "description": "minimum contig length. Default: 1000" }, "max_iteration": { "default": 50, "description": "maximum Expectation-Maximization algorithm" "iteration number. Default: 50" }, "prob_threshold": { "default": 0.9, "description": "probability threshold for EM final classification." "Default: 0.9" }, "clearInput": { "default": "false", "description": "Permanently removes temporary input files. This option " "is only useful to remove temporary files in large " "workflows and prevents nextflow's resume functionality. " "Use with caution." } } self.directives = { "maxbin2": { "container": "flowcraft/maxbin2", "version": "2.2.4-1", "cpus": 3, "memory": "{ 5.GB * task.attempt }" } } self.status_channels = [ "maxbin2", "report_maxbin2" ]
[docs]class Megahit(Process): """megahit process template interface This process is set with: - ``input_type``: fastq - ``output_type``: assembly - ``ptype``: assembly It contains one **secondary channel link end**: - ``SIDE_max_len`` (alias: ``SIDE_max_len``): Receives max read length """ def __init__(self, **kwargs): super().__init__(**kwargs) self.input_type = "fastq" self.output_type = "fasta" self.link_end.append({"link": "SIDE_max_len", "alias": "SIDE_max_len"}) self.dependencies = ["integrity_coverage"] self.params = { "megahitKmers": { "default": "'auto'", "description": "If 'auto' the megahit k-mer lengths will be determined " "from the maximum read length of each assembly. If " "'default', megahit will use the default k-mer lengths. " "(default: $params.megahitKmers)" }, "fastg": { "default": "false", "description": "Converts megahit intermediate contigs to fastg" }, "clearInput": { "default": "false", "description": "Permanently removes temporary input files. This option " "is only useful to remove temporary files in large " "workflows and prevents nextflow's resume functionality. " "Use with caution." } } self.directives = {"megahit": { "cpus": 4, "memory": "{ 5.GB * task.attempt }", "container": "flowcraft/megahit", "version": "1.1.3-0.1", "scratch": "true" }, "megahit_fastg": { "container": "flowcraft/megahit", "version": "1.1.3-0.1", } } self.status_channels = [ "megahit", "megahit_fastg" ]
[docs]class Metabat2(Process): """ MetaBat2 process template interface for the taxonomic independent binning of metagenomic assemblies. This process is set with: - ``input_type``: assembly - ``output_type``: assembly - ``ptype``: post_assembly It contains one **dependency process**: - ``assembly_mapping``: Requires the BAM file generated by the assembly mapping process """ def __init__(self, **kwargs): super().__init__(**kwargs) self.input_type = "fasta" self.output_type = "fasta" self.dependencies = ["assembly_mapping"] self.params = { "maxPercentage": { "default": 95, "description": "Percentage of 'good' contigs considered for binning decided by connection. Default: 95." }, "minContig": { "default": 2500, "description": "Minimum size of a contig for binning (should be >=1500). Default: 2500." }, "clearInput": { "default": "false", "description": "Permanently removes temporary input files. This option " "is only useful to remove temporary files in large " "workflows and prevents nextflow's resume functionality. " "Use with caution." } } self.directives = { "metabat2": { "container": "flowcraft/metabat", "version": "2.13-1", "cpus": 4, "memory": "{ 5.GB * task.attempt }" } } self.status_channels = [ "metabat2", "report_metabat2" ]
[docs]class Metaspades(Process): """Metaspades process template interface This process is set with: - ``input_type``: fastq - ``output_type``: assembly - ``ptype``: assembly It contains one **secondary channel link end**: - ``SIDE_max_len`` (alias: ``SIDE_max_len``): Receives max read length """ def __init__(self, **kwargs): super().__init__(**kwargs) self.input_type = "fastq" self.output_type = "fasta" self.link_end.append({"link": "SIDE_max_len", "alias": "SIDE_max_len"}) self.dependencies = ["integrity_coverage"] self.params = { "metaspadesKmers": { "default": "'auto'", "description": "If 'auto' the metaSPAdes k-mer lengths will be determined " "from the maximum read length of each assembly. If " "'default', metaSPAdes will use the default k-mer lengths. " "(default: $params.metaspadesKmers)" }, "clearInput": { "default": "false", "description": "Permanently removes temporary input files. This option " "is only useful to remove temporary files in large " "workflows and prevents nextflow's resume functionality. " "Use with caution." } } self.directives = {"metaspades": { "cpus": 4, "memory": "{ 5.GB * task.attempt }", "container": "flowcraft/spades", "version": "3.11.1-1", "scratch": "true" }}
[docs]class Midas_species(Process): """Midas species process template interface This process is set with: - ``input_type``: fastq - ``output_type``: txt - ``ptype``: taxonomic classification (species) """ def __init__(self, **kwargs): super().__init__(**kwargs) self.input_type = "fastq" self.output_type = "txt" self.params = { "midasDB": { "default": "null", "description": "Specifies Midas database." } } self.directives = { "midas_species": { "container": "flowcraft/midas", "version": "1.3.2-0.1", "memory": "{2.Gb*task.attempt}", "cpus": 3 } } self.status_channels = [ "midas_species" ]
[docs]class RemoveHost(Process): """bowtie2 to remove host reads process template interface This process is set with: - ``input_type``: fastq - ``output_type``: fastq - ``ptype``: removal os host reads """ def __init__(self, **kwargs): super().__init__(**kwargs) self.input_type = "fastq" self.output_type = "fastq" self.params = { "refIndex": { "default": "'/index_hg19/hg19'", "description": "Specifies the reference indexes to be provided " "to bowtie2." }, "clearInput": { "default": "false", "description": "Permanently removes temporary input files. This option " "is only useful to remove temporary files in large " "workflows and prevents nextflow's resume functionality. " "Use with caution." } } self.directives = { "remove_host": { "container": "flowcraft/remove_host", "version": "2-0.1", "memory": "{5.Gb*task.attempt}", "cpus": 3 } } self.status_channels = [ "remove_host", "report_remove_host" ]
[docs]class Metaprob(Process): """MetaProb to bin metagenomic reads interface This process is set with: - ``input_type``: fastq - ``output_type``: csv - ``ptype``: binning of reads """ def __init__(self, **kwargs): super().__init__(**kwargs) self.input_type = "fastq" self.output_type = "csv" self.params = { "feature": { "default": 1, "description": "Feature used to compute. Default: 1" }, "metaProbQMer": { "default": 5, "description": "Threshold of shared q-mer to create graph " "adiacences. Default: 5" } } self.directives = { "metaProb": { "container": "flowcraft/metaprob", "version": "2-1", "cpus": 1, "memory": "{ 30.GB * task.attempt }" } } self.status_channels = [ "metaProb" ]
[docs]class SplitAssembly(Process): """Component to filter metagenomic assemblies by contig size If the contig is larger than $param.size, it gets separated from the original assembly to continue the processes downstream of the pipeline. This process is set with: - ``input_type``: fasta - ``output_type``: fasta - ``ptype``: assembly filter """ def __init__(self, **kwargs): super().__init__(**kwargs) self.input_type = "fasta" self.output_type = "fasta" self.params = { "size": { "default": "null", "description": "Minimum contig size" } } self.directives = { "split_assembly": { "cpus": 1, "memory": "{ 1.GB * task.attempt }" } } self.status_channels = [ "split_assembly" ]