Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
215 changes: 150 additions & 65 deletions association.nf
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def helpMessage() {
--outdir The output directory where the results will be saved and what will be used as a prefix (default outs)
--split Number read entries per fastq chunk for faster processing (default: 2000000)
--labels tsv with the oligo pool fasta and a group label (ex: positive_control) if no labels desired a file will be automatically generated
--variants tsv with reference_name variant_positions ref_bases alt_bases, only input for variant analyses workflow

Extras:
--h, --help Print this help message
Expand Down Expand Up @@ -117,6 +118,14 @@ if (params.containsKey("labels")){
params.label_file=null
}

// variants file saved in params.variants_file
if (params.containsKey("variants")){
params.variants_file=file(params.variants)
if (!params.variants_file.exists()) exit 1, "variants file ${params.variants_file} does not exist"
} else {
params.variants_file=null
}

// Has the run name been specified by the user?
// this has the bonus effect of catching both -name and --name
custom_runName = params.name
Expand Down Expand Up @@ -153,6 +162,7 @@ summary['Run name'] = params.name
summary['Working dir'] = workflow.workDir
summary['Container Engine'] = workflow.containerEngine
if(workflow.containerEngine) summary['Container'] = workflow.container
if(params.variants_file!=null) summary['Variants file'] = params.variants_file
summary['Current home'] = "$HOME"
summary['Current user'] = "$USER"
summary['Current path'] = "$PWD"
Expand Down Expand Up @@ -444,81 +454,156 @@ process 'collect_chunks'{
* contributions: Sean Whalen
*/

process 'map_element_barcodes' {
tag "assign"
label "shorttime"
publishDir "${params.outdir}/${params.name}", mode:'copy'

conda 'conf/mpraflow_py36.yml'

input:
val(name) from params.name
val(mapq) from params.mapq
val(baseq) from params.baseq
val(cigar) from params.cigar
file(fastq_bc) from params.fastq_bc_file
file count_fastq from bc_ch
file count_bam from ch_merge
file bam from s_merge
output:
file "${name}_coords_to_barcodes.pickle" into map_ch
file "${name}_barcodes_per_candidate-no_repeats-no_jackpots.feather" into count_table_ch
file "${name}_barcode_counts.pickle"
shell:
"""
echo "test assign inputs"
echo ${mapq}
echo ${baseq}
echo $fastq_bc
zcat $fastq_bc | head

echo ${count_fastq}
echo ${count_bam}
cat ${count_fastq}
cat ${count_bam}

python ${"$baseDir"}/src/nf_ori_map_barcodes.py ${"$baseDir"} ${fastq_bc} ${count_fastq} \
$bam ${count_bam} ${name} ${mapq} ${baseq} ${cigar}
"""
if (params.variants_file == null){
process 'map_element_barcodes' {
tag "assign"
label "shorttime"
publishDir "${params.outdir}/${params.name}", mode:'copy'

conda 'conf/mpraflow_py36.yml'

input:
val(name) from params.name
val(mapq) from params.mapq
val(baseq) from params.baseq
val(cigar) from params.cigar
file(fastq_bc) from params.fastq_bc_file
file count_fastq from bc_ch
file count_bam from ch_merge
file bam from s_merge
output:
file "${name}_coords_to_barcodes.pickle" into map_ch
file "${name}_barcodes_per_candidate-no_repeats-no_jackpots.feather" into count_table_ch
file "${name}_barcode_counts.pickle"
shell:
"""
echo "test assign inputs"
echo ${mapq}
echo ${baseq}
echo $fastq_bc
zcat $fastq_bc | head

echo ${count_fastq}
echo ${count_bam}
cat ${count_fastq}
cat ${count_bam}

python ${"$baseDir"}/src/nf_ori_map_barcodes.py ${"$baseDir"} ${fastq_bc} ${count_fastq} \
$bam ${count_bam} ${name} ${mapq} ${baseq} ${cigar}
"""
}
} else {
process 'map_element_barcodes_mut' {
tag "assign"
label "shorttime"
publishDir "${params.outdir}/${params.name}", mode:'copy'

conda 'conf/mpraflow_py36.yml'

input:
val(name) from params.name
val(mapq) from params.mapq
val(baseq) from params.baseq
val(cigar) from params.cigar
file(fastq_bc) from params.fastq_bc_file
file count_fastq from bc_ch
file count_bam from ch_merge
file bam from s_merge
file(variants) from params.variants_file
output:
file "${name}_coords_to_barcodes.pickle" into map_ch
file "${name}_barcodes_per_candidate-no_repeats-no_jackpots.feather" into count_table_ch
file "${name}_barcode_counts.pickle"

shell:
"""
echo "test assign inputs"
echo ${mapq}
echo ${baseq}
echo $fastq_bc
zcat $fastq_bc | head

echo ${count_fastq}
echo ${count_bam}
cat ${count_fastq}
cat ${count_bam}

python ${"$baseDir"}/src/nf_ori_map_barcodes.py ${"$baseDir"} ${fastq_bc} ${count_fastq} \
$bam ${count_bam} ${name} ${mapq} ${baseq} ${cigar} --mutations ${variants}
"""
}
}


/*
* Filter barcodes for minimum coverage and unique mapping
* contributions: Gracie Gordon
*/

process 'filter_barcodes' {
tag "$filter"
label "shorttime"
publishDir "${params.outdir}/${params.name}", mode:'copy'

conda 'conf/mpraflow_py36.yml'

input:
val(min_cov) from params.min_cov
val(min_frac) from params.min_frac
val(out) from params.name
file(map) from map_ch
file(table) from count_table_ch
file(label) from fixed_label
output:
file "${out}_filtered_coords_to_barcodes.pickle"
file "${out}_original_counts.png"
file "original_count_summary.txt"
file "${out}_filtered_counts.png"
file "filtered_count_summary.txt"

shell:
"""
python ${"$baseDir"}/src/nf_filter_barcodes.py ${out} ${map} ${table} \
${min_cov} ${min_frac} $label
"""
if (params.variants_file == null) {
process 'filter_barcodes' {
tag "$filter"
label "shorttime"
publishDir "${params.outdir}/${params.name}", mode:'copy'

conda 'conf/mpraflow_py36.yml'

input:
val(min_cov) from params.min_cov
val(min_frac) from params.min_frac
val(out) from params.name
file(map) from map_ch
file(table) from count_table_ch
file(label) from fixed_label
output:
file "${out}_filtered_coords_to_barcodes.pickle"
file "${out}_original_counts.png"
file "original_count_summary.txt"
file "${out}_filtered_counts.png"
file "filtered_count_summary.txt"

shell:
"""
python ${"$baseDir"}/src/nf_filter_barcodes.py ${out} ${map} ${table} \
${min_cov} ${min_frac} $label
"""
}
} else {
process 'filter_barcodes_mut' {
tag "$filter"
label "shorttime"
publishDir "${params.outdir}/${params.name}", mode:'copy'

conda 'conf/mpraflow_py36.yml'

input:
val(min_cov) from params.min_cov
val(min_frac) from params.min_frac
val(out) from params.name
file(map) from map_ch
file(table) from count_table_ch
file(label) from fixed_label
file(design) from fixed_design
file(variants) from params.variants_file
output:
file "${out}_filtered_coords_to_barcodes.pickle"
file "${out}_original_counts.png"
file "original_count_summary.txt"
file "${out}_filtered_counts.png"
file "filtered_count_summary.txt"
file "label_mutExpand.txt"
file "design_mutExpand.fa"

shell:
"""
python ${"$baseDir"}/src/expand_variants.py ${design} ${label} ${variants} design_mutExpand.fa label_mutExpand.txt
python ${"$baseDir"}/src/nf_filter_barcodes.py ${out} ${map} ${table} \
${min_cov} ${min_frac} label_mutExpand.txt
"""
}
}





/*
* Completion e-mail notification
*/
Expand Down
34 changes: 17 additions & 17 deletions conf/cluster.config
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,20 @@
//}

//uncomment for SLURM cluster
//process {
// withLabel: longtime {
// executor='slurm'
// //queue='long'
// clusterOptions = '-t 3-00:00:0 --mem=6G'
// }
// withLabel: shorttime {
// executor='slurm'
// //queue='short'
// clusterOptions = '-t 00-01:00:0 --mem=6G'
// }
// withLabel: highmem {
// executor='slurm'
// //queue='short'
// clusterOptions = '-t 00-20:00:0 --mem=80G'
// }
//}
process {
withLabel: longtime {
executor='slurm'
//queue='default'
clusterOptions = '-t 3-00:00:0 --mem=6G'
}
withLabel: shorttime {
executor='slurm'
//queue='default'
clusterOptions = '-t 00-01:00:0 --mem=6G'
}
withLabel: highmem {
executor='slurm'
queue='bigmem'
clusterOptions = '-t 00-20:00:0 --mem=80G'
}
}
Comment on lines +45 to +61
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you revert this changes?
I think it is better to to leave the default without any cluster environment.
It is also documented like this in the docs

2 changes: 1 addition & 1 deletion conf/global.config
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//MPRAflow version
params.version="2.3.4"
params.version="2.3.5"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think PR has multiple thinks (fixes and features) in it. So due to (semantic versioning)[https://semver.org/lang/de/] it is not a increase of the patch version.

Can you redirect the PR to the development branch? I think we have to do some steps (update documentation, fix some issues you found (hard coded BC length)) before creating a new release.


// nextflow version required
params.nf_required_version="20.10"
Expand Down
27 changes: 0 additions & 27 deletions conf/mpraflow_py27.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,37 +4,10 @@ channels:
- defaults
- conda-forge
dependencies:
- _libgcc_mutex=0.1=main
- bcftools=1.9=h68d8f2e_9
- bzip2=1.0.8=h7b6447c_0
- ca-certificates=2019.11.27=0
- certifi=2019.11.28=py27_0
- curl=7.67.0=hbc83047_0
- gsl=2.5=h294904e_1
- htslib=1.9=h244ad75_9
- krb5=1.16.1=h173b8e3_7
- libblas=3.8.0=11_openblas
- libcblas=3.8.0=11_openblas
- libcurl=7.67.0=h20c2e04_0
- libdeflate=1.3=h516909a_0
- libedit=3.1.20181209=hc058e9b_0
- libffi=3.2.1=hd88cf55_4
- libgcc-ng=9.1.0=hdf63c60_0
- libgfortran-ng=7.3.0=hdf63c60_0
- libopenblas=0.3.6=h5a2b251_2
- libssh2=1.8.2=h1ba5d50_0
- libstdcxx-ng=9.1.0=hdf63c60_0
- ncurses=6.1=he6710b0_1
- openssl=1.1.1d=h7b6447c_3
- perl=5.26.2=h14c3975_0
- pip=19.3.1=py27_0
- pysam=0.15.3=py27hbcae180_3
- python=2.7.17=h9bab390_0
- readline=7.0=h7b6447c_5
- samtools=1.9=h10a08f8_12
- setuptools=42.0.2=py27_0
- sqlite=3.30.1=h7b6447c_0
- tk=8.6.8=hbc83047_0
- wheel=0.33.6=py27_0
- xz=5.2.4=h14c3975_4
- zlib=1.2.11=h7b6447c_3
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason for this change?

Otherwiese I would leav eit as it is.

4 changes: 3 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@ channels:
- bioconda
- defaults
dependencies:
- nextflow=20.01
- nextflow=20.10
- samtools
- conda<4.7
Comment on lines -6 to +8
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason for teh NF increase and especially adding the other two software packages?

samtools should be handled by the environmen files I guess. Conda I don't know. do you have issues with it?

Loading