From 3fd3391ba2cb449b26da5a539a4b8cfd4b79bbb5 Mon Sep 17 00:00:00 2001 From: cimendes Date: Tue, 18 Jun 2019 14:50:33 +0100 Subject: [PATCH 1/6] Merge branch 'dev' of /home/ines/git/flowcraft with conflicts. --- .../generator/components/metagenomics.py | 61 ++++++++++++++++++- flowcraft/generator/templates/vamb.nf | 50 +++++++++++++++ ...est_assemblerflow.py => test_flowcraft.py} | 11 ++-- 3 files changed, 116 insertions(+), 6 deletions(-) create mode 100644 flowcraft/generator/templates/vamb.nf rename flowcraft/tests/{test_assemblerflow.py => test_flowcraft.py} (80%) diff --git a/flowcraft/generator/components/metagenomics.py b/flowcraft/generator/components/metagenomics.py index 30eaa60b..15a1e6ff 100644 --- a/flowcraft/generator/components/metagenomics.py +++ b/flowcraft/generator/components/metagenomics.py @@ -28,7 +28,7 @@ def __init__(self, **kwargs): self.input_type = "fasta" self.output_type = "fasta" - self.link_end.append({"link": "__fastq", "alias": "_LAST_fastq"}) + self.link_end.append({"link": "SIDE_max_len", "alias": "SIDE_max_len"}) self.params = { "clusters": { @@ -561,3 +561,62 @@ def __init__(self, **kwargs): self.status_channels = [ "split_assembly" ] + + +class Vamb(Process): + """ + Vamb process template interface for the + taxonomic independent binning of metagenomic + assemblies. + + This process is set with: + - ``input_type``: assembly + - ``output_type``: assembly + - ``ptype``: post_assembly + + It contains one **dependency process**: + + - ``assembly_mapping``: Requires the BAM file generated by the + assembly mapping process + + """ + def __init__(self, **kwargs): + + super().__init__(**kwargs) + + self.input_type = "fasta" + self.output_type = "fasta" + + self.dependencies = ["assembly_mapping"] + + self.params = { + "minContig": { + "default": 2000, + "description": "Ignore contigs shorter than this. Default: 2000" + }, + "minAlignScore":{ + "default": 50, + "description": "Ignore reads with alignment score below this. Default: 50" + }, + "clearInput": { + "default": "false", + "description": + "Permanently removes temporary input files. This option " + "is only useful to remove temporary files in large " + "workflows and prevents nextflow's resume functionality. " + "Use with caution." + } + } + + self.directives = { + "vamb": { + "container": "flowcraft/vamb", + "version": "1.0.1-1", + "cpus": 4, + "memory": "{ 5.GB * task.attempt }" + } + } + + self.status_channels = [ + "vamb" + ] \ No newline at end of file diff --git a/flowcraft/generator/templates/vamb.nf b/flowcraft/generator/templates/vamb.nf new file mode 100644 index 00000000..78c34375 --- /dev/null +++ b/flowcraft/generator/templates/vamb.nf @@ -0,0 +1,50 @@ +IN_min_contig_{{ pid }} = Channel.value(params.minContig{{ param_id }}) +IN_min_align_score_{{ pid }} = Channel.value(params.minAlignScore{{ param_id }}) + +clear = params.clearInput{{ param_id }} ? "true" : "false" +checkpointClear_{{ pid }} = Channel.value(clear) + +process vamb_{{ pid }} { + + // Send POST request to platform + {% include "post.txt" ignore missing %} + + tag { sample_id } + + //publishDir "results/assembly/binning/vamb_{{ pid }}/${sample_id}/" + + input: + set sample_id, file(assembly), file(bam_file), file(bam_index) from {{ input_channel }} + val length_threshold from IN_min_contig_{{ pid }} + val min_score from IN_min_align_score_{{ pid }} + val clear from checkpointClear_{{ pid }} + + output: + + {% with task_name="vamb"%} + {%- include "compiler_channels.txt" ignore missing -%} + {% endwith %} + + script: + """ + { + # run METABAT2 + run.py results/ ${assembly} ${bam_file} -m ${length_threshold} -s ${min_score} + + # In case no sequences are binned + if [ -z "\$(ls -A *metabat-bins*/)" ]; then + echo "false" > false_bin.fa + mv false_bin.fa *metabat-bins*/ + echo "false" > bin_status.txt; + else + echo "true" > bin_status.txt + fi + + } || { + echo fail > .status + } + """ +} + + +{{ forks }} \ No newline at end of file diff --git a/flowcraft/tests/test_assemblerflow.py b/flowcraft/tests/test_flowcraft.py similarity index 80% rename from flowcraft/tests/test_assemblerflow.py rename to flowcraft/tests/test_flowcraft.py index 402b584b..c2eab5d7 100644 --- a/flowcraft/tests/test_assemblerflow.py +++ b/flowcraft/tests/test_flowcraft.py @@ -51,11 +51,12 @@ def test_build_file_2(tmp): "{}".format(p), "--pipeline-only"]) af.build(args) - assert sorted(os.listdir(tmp)) == [".forkTree.json", ".treeDag.json", - "containers.config", - "lib", "manifest.config", "params.config", - "resources.config", "teste.html", - "teste.nf", "user.config"] + assert sorted(os.listdir(tmp)) == ["containers.config", + "lib", "nextflow.config", "params.config", + "resources", "resources.config", + "teste.html", "teste.nf", "user.config"] + + assert sorted(os.listdir(os.path.join(tmp, "resources"))) == ["forkTree.json", "treeDag.json"] def test_build_recipe(tmp): From 1abb970f5d379f474d5d0d3458b39ffa3e04b7ab Mon Sep 17 00:00:00 2001 From: cimendes Date: Fri, 21 Jun 2019 18:55:25 +0100 Subject: [PATCH 2/6] fix bug publishdir --- changelog.md | 7 +++++++ flowcraft/generator/templates/downsample_fastq.nf | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/changelog.md b/changelog.md index 645f2d1a..e8a934ba 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,12 @@ # Changelog +## Upcoming in `dev` + +### Bug fixes + +- `downsample_fastq` now saves resulting fastq files in the appropriate results directory + + ## 1.4.2 ### New components diff --git a/flowcraft/generator/templates/downsample_fastq.nf b/flowcraft/generator/templates/downsample_fastq.nf index db7fcc3b..a59be2a3 100644 --- a/flowcraft/generator/templates/downsample_fastq.nf +++ b/flowcraft/generator/templates/downsample_fastq.nf @@ -17,7 +17,7 @@ process downsample_fastq_{{ pid }} { {% include "post.txt" ignore missing %} tag { "${sample_id}" } - publishDir "results/downsample_fastq_{{ pid }}/", pattern: "_ss.*" + publishDir "results/downsample_fastq_{{ pid }}/", pattern: "*_ss.*" input: set sample_id, file(fastq_pair) from {{ input_channel }} From 704782b7719d39b0996985f8fe388bfc5a0775c6 Mon Sep 17 00:00:00 2001 From: cimendes Date: Tue, 16 Jul 2019 17:09:32 +0100 Subject: [PATCH 3/6] add pphred33 when encoding fails to be determined --- changelog.md | 1 + flowcraft/templates/trimmomatic.py | 10 ++++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/changelog.md b/changelog.md index 8cb47b92..b100cd1e 100644 --- a/changelog.md +++ b/changelog.md @@ -12,6 +12,7 @@ down-sampling occurring - `treeDag.json` and `forktree.json` files are no longer hidden and are now stored in the `resources` directory +- `trimmomatic` now forces `-phred33` when the encoding can't be determined ## 1.4.2 diff --git a/flowcraft/templates/trimmomatic.py b/flowcraft/templates/trimmomatic.py index eefb3cce..832840a7 100644 --- a/flowcraft/templates/trimmomatic.py +++ b/flowcraft/templates/trimmomatic.py @@ -46,7 +46,7 @@ # TODO: More control over read trimming # TODO: Add option to remove adapters -# TODO: What to do when there is encoding failure +# TODO: What to do when there is encoding failure - forcing phred33 at the moment __version__ = "1.0.3" __build__ = "29062018" @@ -329,10 +329,12 @@ def main(sample_id, fastq_pair, trim_range, trim_opts, phred, adapters_file, phred = int(phred) phred_flag = "-phred{}".format(str(phred)) cli += [phred_flag] - # Could not detect phred encoding. Do not add explicit encoding to - # trimmomatic and let it guess + # Could not detect phred encoding. + # Forcing as phred33 to avoid encoding errors except ValueError: - pass + logger.info("Could not detect quality encoding. Setting it to phred33") + phred_flag = "-phred33" + cli += [phred_flag] # Add input samples to CLI cli += fastq_pair From 6d46aeb8f952f4bf5847d956bcf69e483488f5fc Mon Sep 17 00:00:00 2001 From: cimendes Date: Tue, 16 Jul 2019 17:28:40 +0100 Subject: [PATCH 4/6] add newline (CodeFactor) --- flowcraft/generator/components/metagenomics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flowcraft/generator/components/metagenomics.py b/flowcraft/generator/components/metagenomics.py index 15a1e6ff..692a940c 100644 --- a/flowcraft/generator/components/metagenomics.py +++ b/flowcraft/generator/components/metagenomics.py @@ -619,4 +619,4 @@ def __init__(self, **kwargs): self.status_channels = [ "vamb" - ] \ No newline at end of file + ] From 5b566ab2b6116c442f69a599cbd6558fc31c18ae Mon Sep 17 00:00:00 2001 From: cimendes Date: Wed, 17 Jul 2019 14:44:40 +0100 Subject: [PATCH 5/6] retry with phred64 encoding --- changelog.md | 3 +- flowcraft/templates/trimmomatic.py | 76 ++++++++++++++++++++---------- 2 files changed, 54 insertions(+), 25 deletions(-) diff --git a/changelog.md b/changelog.md index b100cd1e..9e897e58 100644 --- a/changelog.md +++ b/changelog.md @@ -12,7 +12,8 @@ down-sampling occurring - `treeDag.json` and `forktree.json` files are no longer hidden and are now stored in the `resources` directory -- `trimmomatic` now forces `-phred33` when the encoding can't be determined +- `trimmomatic` now forces `-phred33` when the encoding can't be determined. If it still fails, the encoding is +changed to `-phred64` and the run is retried. ## 1.4.2 diff --git a/flowcraft/templates/trimmomatic.py b/flowcraft/templates/trimmomatic.py index 832840a7..f60a6e77 100644 --- a/flowcraft/templates/trimmomatic.py +++ b/flowcraft/templates/trimmomatic.py @@ -283,6 +283,43 @@ def merge_default_adapters(): return filepath +def run_trimmomatic(cli, logfile, sample_id): + """ + Runs trimmomatic command + Parameters + ---------- + cli : lst + list containing trimmomatic command + logfile : str + Path to file for trimmomatic to write log + sample_id: str + Sample Identification string. + """ + + logger.debug("Running trimmomatic subprocess with command: {}".format(cli)) + + p = subprocess.Popen(cli, stdout=PIPE, stderr=PIPE) + stdout, stderr = p.communicate() + + # Attempt to decode STDERR output from bytes. If unsuccessful, coerce to + # string + try: + stderr = stderr.decode("utf8") + except (UnicodeDecodeError, AttributeError): + stderr = str(stderr) + + logger.info("Finished trimmomatic subprocess with STDOUT:\\n" + "======================================\\n{}".format(stdout)) + logger.info("Finished trimmomatic subprocesswith STDERR:\\n" + "======================================\\n{}".format(stderr)) + logger.info("Finished trimmomatic with return code: {}".format( + p.returncode)) + + trimmomatic_log(logfile, sample_id) + + return p.returncode + + @MainWrapper def main(sample_id, fastq_pair, trim_range, trim_opts, phred, adapters_file, clear): @@ -380,37 +417,28 @@ def main(sample_id, fastq_pair, trim_range, trim_opts, phred, adapters_file, logfile ] - logger.debug("Running trimmomatic subprocess with command: {}".format(cli)) - - p = subprocess.Popen(cli, stdout=PIPE, stderr=PIPE) - stdout, stderr = p.communicate() - - # Attempt to decode STDERR output from bytes. If unsuccessful, coerce to - # string - try: - stderr = stderr.decode("utf8") - except (UnicodeDecodeError, AttributeError): - stderr = str(stderr) - - logger.info("Finished trimmomatic subprocess with STDOUT:\\n" - "======================================\\n{}".format(stdout)) - logger.info("Finished trimmomatic subprocesswith STDERR:\\n" - "======================================\\n{}".format(stderr)) - logger.info("Finished trimmomatic with return code: {}".format( - p.returncode)) - - trimmomatic_log(logfile, sample_id) + returncode = run_trimmomatic(cli, logfile, sample_id) - if p.returncode == 0 and os.path.exists("{}_1_trim.fastq.gz".format( + if returncode == 0 and os.path.exists("{}_1_trim.fastq.gz".format( SAMPLE_ID)): clean_up(fastq_pair, clear) # Check if trimmomatic ran successfully. If not, write the error message # to the status channel and exit. with open(".status", "w") as status_fh: - if p.returncode != 0: - status_fh.write("fail") - return + if returncode != 0: + # retry to run trimmomatic by changing the encoding from phred33 to phred64 + if "-phred33" in cli: + cli[7] = "-phred64" + returncode = run_trimmomatic(cli, logfile, sample_id) + if returncode != 0: + status_fh.write("fail") + return + else: + status_fh.write("pass") + else: + status_fh.write("fail") + return else: status_fh.write("pass") From c22eb0c3d764a6422a1c1e20e23fa240aba668b3 Mon Sep 17 00:00:00 2001 From: cimendes Date: Wed, 17 Jul 2019 14:47:37 +0100 Subject: [PATCH 6/6] add log message --- flowcraft/templates/trimmomatic.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/flowcraft/templates/trimmomatic.py b/flowcraft/templates/trimmomatic.py index f60a6e77..0c905fc0 100644 --- a/flowcraft/templates/trimmomatic.py +++ b/flowcraft/templates/trimmomatic.py @@ -429,8 +429,12 @@ def main(sample_id, fastq_pair, trim_range, trim_opts, phred, adapters_file, if returncode != 0: # retry to run trimmomatic by changing the encoding from phred33 to phred64 if "-phred33" in cli: + + logger.info("Trimmomatic failed while running with phred33. Setting it to phred64 and trying again...") cli[7] = "-phred64" + returncode = run_trimmomatic(cli, logfile, sample_id) + if returncode != 0: status_fh.write("fail") return