diff --git a/Dockerfile b/Dockerfile index ea17150..6e0b39f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,6 +2,9 @@ FROM continuumio/anaconda3 LABEL "author"="Mathieu Fourment" LABEL "company"="University of Technology Sydney" +# bust cache and re-run all +ADD http://date.jsontest.com /etc/builddate + RUN apt-get update && \ apt-get install -y --no-install-recommends \ autoconf \ @@ -62,14 +65,17 @@ RUN ln -s /bitorch/benchmarks/benchmark.py /usr/local/bin/bitorch-benchmark \ RUN . /opt/conda/etc/profile.d/conda.sh && conda activate bito && /usr/local/bin/bitorch-benchmark --help RUN git clone --depth 1 https://github.com/4ment/phylojax /phylojax -RUN cd /phylojax && /opt/conda/envs/bito/bin/pip install jax==0.2.24 jaxlib . && /opt/conda/envs/bito/bin/phylojax --help +RUN cd /phylojax && /opt/conda/envs/bito/bin/pip install jax==0.2.24 jaxlib==0.3.7 numpy==1.22 . && /opt/conda/envs/bito/bin/phylojax --help RUN ln -s /phylojax/benchmarks/benchmark.py /usr/local/bin/phylojax-benchmark \ && chmod +x /usr/local/bin/phylojax-benchmark RUN pip install phylostan==1.0.5 && phylostan --help +RUN pip install tqdm -RUN git clone --depth 1 https://github.com/christiaanjs/treeflow.git /treeflow -RUN cd /treeflow && /opt/conda/envs/bito/bin/pip install . +# workaround to bust cache if the master branch has been updated ... +# ADD https://api.github.com/repos/christiaanjs/treeflow/git/refs/heads/master /.git-hashref +RUN git clone --depth 10 https://github.com/christiaanjs/treeflow.git /treeflow +RUN cd /treeflow && git checkout e3414dcc9e764d06abc3e19c1d0f55110499e2ea && /opt/conda/envs/bito/bin/pip install . RUN . /opt/conda/etc/profile.d/conda.sh && conda activate bito && treeflow_benchmark --help RUN echo "source activate bito" > ~/.bashrc diff --git a/configs/rhino.config b/configs/rhino.config index 6aed506..7b9d29f 100644 --- a/configs/rhino.config +++ b/configs/rhino.config @@ -7,26 +7,42 @@ manifest { } profiles { + + standard { + + process { + executor = 'local' + container = 'quay.io/matsengrp/autodiff-experiments:phylo_patch' + cpus = 4 + memory = 20.GB + } + + singularity { + enabled = true + autoMounts = true + cacheDir = '/fh/scratch/delete30/matsen_e/mathieu/temp/containers/' + runOptions = '--contain -W /tmp/' + } + } + + rhino { process { executor = 'slurm' - scratch = '/fh/scratch/delete30/matsen_e/mathieu/temp/nf-scratch/' + //scratch = '/fh/scratch/delete30/matsen_e/mathieu/temp/nf-scratch/' queue = 'campus-new' - cpus = 3 - memory = 20.GB - //container = 'quay.io/matsengrp/autodiff-experiments:rhino_config' - container = '4ment/autodiff-experiments' + cpus = 4 + memory = 32.GB + container = 'quay.io/matsengrp/autodiff-experiments:phylo_patch' time = '200h' withLabel: normal { time = '5h' } withLabel: fast { - cpus=2 time = '60m' } withLabel: ultrafast{ - cpus=2 time = '5m' } } diff --git a/modules/macro_flu.nf b/modules/macro_flu.nf index 369acfa..d77de89 100644 --- a/modules/macro_flu.nf +++ b/modules/macro_flu.nf @@ -139,7 +139,7 @@ process PREPARE_TORCHTREE { } process RUN_TORCHTREE { - label 'fast' + label 'normal' label 'bito' errorStrategy 'ignore' @@ -158,15 +158,16 @@ process RUN_TORCHTREE { process RUN_PHYLOJAX { label 'bito' - errorStrategy 'ignore' - publishDir "$params.results/macro/phylojax", mode: 'copy' input: tuple val(size), val(rep), path(tree_file), val(rate), path(seq_file) output: tuple path("phylojax.${size}.${rep}.txt"), path("phylojax.${size}.${rep}.log") + + when: + size <= 750 """ { time \ phylojax -i ${seq_file} \ @@ -181,19 +182,21 @@ process RUN_PHYLOJAX { } process RUN_TREEFLOW { - label 'fast' label 'bito' + errorStrategy 'ignore' + publishDir "$params.results/macro/treeflow", mode: 'copy' input: - tuple val(size), val(rep), path(tree_file), path(seq_file) + tuple val(size), val(rep), path(tree_file), val(rate), path(seq_file) output: tuple path("treeflow.${size}.${rep}.txt"), path("treeflow.${size}.${rep}.log") """ { time \ treeflow_vi -i ${seq_file} \ -t ${tree_file} \ + --init-values clock_rate=${rate} \ -n ${params.iterations} > treeflow.${size}.${rep}.txt ; } 2> treeflow.${size}.${rep}.log """ } @@ -222,7 +225,7 @@ workflow macro_flu { RUN_TORCHTREE(data_run.join(PREPARE_TORCHTREE.out, by: [0, 1])) - RUN_TREEFLOW(data_run) + RUN_TREEFLOW(data.map {it.take(5)} ) ch_files = Channel.empty() ch_files = ch_files.mix( diff --git a/modules/micro.nf b/modules/micro.nf index 1c9bbff..da9067f 100644 --- a/modules/micro.nf +++ b/modules/micro.nf @@ -29,7 +29,7 @@ process RUN_PHYSHER_BENCHMARK { } process RUN_TORCHTREE_BENCHMARK { - label 'normal' + //label 'normal' label 'bito' publishDir "$params.results/micro/${phylox}", mode: 'copy' @@ -67,6 +67,7 @@ process RUN_PHYLOJAX_BENCHMARK { tuple val(size), val(rep), path(lsd_newick), path(seq_file) output: path("phylojax.${size}.${rep}.csv") + """ phylojax-benchmark -i $seq_file \ -t $lsd_newick \ @@ -119,6 +120,9 @@ process COMBIME_CSV { tail -q -n+2 *[0-9].csv >> micro.csv """ } +//head -n1 ${files[0]} > micro +//tail -q -n+2 *.csv >> micro +//mv micro micro.csv workflow micro { take: diff --git a/modules/treetime_validation.nf b/modules/treetime_validation.nf index 6890ba2..2e35c1d 100644 --- a/modules/treetime_validation.nf +++ b/modules/treetime_validation.nf @@ -4,6 +4,8 @@ nextflow.enable.dsl = 2 params.subtrees = Channel.of(20, 50, 100, 200, 500, 750, 1000, 1250, 1500, 2000) params.subtrees_replicates = Channel.of(0..9) +//params.subtrees = Channel.of(20, 50)//, 100, 200, 500, 750, 1000, 1250, 1500, 2000) +//params.subtrees_replicates = Channel.of(0..1) params.base = "$baseDir" alignment_file = "$params.base/resources/flu_H3N2/H3N2_HA_2011_2013.fasta" diff --git a/run_rhino.sh b/run_rhino.sh index f3622ae..505b7b0 100755 --- a/run_rhino.sh +++ b/run_rhino.sh @@ -1,7 +1,7 @@ #!/bin/bash set -e -source /app/lmod/lmod/init/profile +# source /app/lmod/lmod/init/profile module load nextflow module load Singularity @@ -10,9 +10,8 @@ export PATH=$SINGULARITYROOT/bin/:$PATH /usr/bin/time nextflow \ -C ./configs/rhino.config \ run main.nf \ - --results "$(date -I)-rhino-results" \ + --results "batch-results-$(date -I)" \ -profile rhino \ - -with-report ./r_output/nextflow_report.html \ - -work-dir ./r_output/work/ \ - -with-trace \ - -resume + -with-report "batch-results-$(date -I)"/nextflow_report.html \ + -with-trace "batch-results-$(date -I)"/trace.txt \ + -work-dir "batch-results-$(date -I)/work/" diff --git a/scripts/parse-trace.py b/scripts/parse-trace.py index 3a43817..29f983a 100644 --- a/scripts/parse-trace.py +++ b/scripts/parse-trace.py @@ -13,8 +13,12 @@ with open(sys.argv[2], "r") as fp: for line in fp: a = line.split("\t") + if 'FAILED' in a or 'ABORTED' in a: + continue if "task_id" == a[0]: print("program\tsize\treplicate\t" + line, end="") + if a[1] not in dic: + continue elif "macro_flu:RUN_" in a[3]: for f in dic[a[1]]: if ".txt" in f: