diff --git a/assets/placeholder_no_file.tsv b/assets/placeholder_no_file.tsv new file mode 100644 index 00000000..4f2e970e --- /dev/null +++ b/assets/placeholder_no_file.tsv @@ -0,0 +1 @@ +PLACEHOLDER diff --git a/bin/metrics_vs_depth_qc.py b/bin/metrics_vs_depth_qc.py new file mode 100755 index 00000000..fd5d6631 --- /dev/null +++ b/bin/metrics_vs_depth_qc.py @@ -0,0 +1,304 @@ +#!/usr/bin/env python + +import json +from pathlib import Path + +import click +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sns +from matplotlib.backends.backend_pdf import PdfPages +from scipy.stats import linregress + + +def load_group_samples(group_definition_file, group_name): + with open(group_definition_file, "r") as f: + groups = json.load(f) + if group_name not in groups: + raise ValueError(f"Group '{group_name}' not present in {group_definition_file}") + return [str(x) for x in groups[group_name]] + + +def _find_column(columns, candidates): + lower = {c.lower(): c for c in columns} + for cand in candidates: + if cand.lower() in lower: + return lower[cand.lower()] + return None + + +def load_depth_table(depth_gene_sample_file, samples): + depth_df = pd.read_csv(depth_gene_sample_file, sep="\t", header=0) + sample_col = _find_column(depth_df.columns, ["SAMPLE_ID", "sample"]) + gene_col = _find_column(depth_df.columns, ["GENE", "gene"]) + depth_col = _find_column(depth_df.columns, ["MEAN_GENE_DEPTH", "mean_gene_depth"]) + + if sample_col is None or gene_col is None or depth_col is None: + raise ValueError("Depth table must contain SAMPLE_ID, GENE and MEAN_GENE_DEPTH columns.") + + depth_df = depth_df.rename(columns={sample_col: "SAMPLE_ID", gene_col: "GENE", depth_col: "MEAN_GENE_DEPTH"}) + depth_df["SAMPLE_ID"] = depth_df["SAMPLE_ID"].astype(str) + depth_df["GENE"] = depth_df["GENE"].astype(str) + depth_df = depth_df[depth_df["SAMPLE_ID"].isin(samples)].copy().reset_index(drop=True) + depth_df["MEAN_GENE_DEPTH"] = pd.to_numeric(depth_df["MEAN_GENE_DEPTH"], errors="coerce") + return depth_df + + +def load_mutdensity(mutdensity_file, samples, metric_name): + mut = pd.read_csv(mutdensity_file, sep="\t", header=0) + required = {"SAMPLE_ID", "GENE", "MUTDENSITY_MB"} + if not required.issubset(set(mut.columns)): + raise ValueError(f"Mutdensity file {mutdensity_file} must contain {sorted(required)}") + + mut = mut[(mut["SAMPLE_ID"].astype(str).isin(samples)) + & (mut["GENE"] != "ALL_GENES") + & (mut["MUTTYPES"] == "SNV") + ].copy() + mut["SAMPLE_ID"] = mut["SAMPLE_ID"].astype(str) + mut["GENE"] = mut["GENE"].astype(str) + mut["metric_value"] = pd.to_numeric(mut["MUTDENSITY_MB"], errors="coerce") + mut["metric_name"] = metric_name + if "REGIONS" in mut.columns: + mut["metric_name"] = mut["metric_name"] + "." + mut["REGIONS"].fillna("unknown").astype(str) + return mut[["SAMPLE_ID", "GENE", "metric_name", "metric_value"]] + +def load_adjmutdensity(mutdensity_file, samples, metric_name): + mut = pd.read_csv(mutdensity_file, sep="\t", header=0) + required = {"SAMPLE", "GENE", "synonymous", "missense", "nonsense", "essential_splice", "truncating", "nonsynonymous_splice", "all_impacts"} + if not required.issubset(set(mut.columns)): + raise ValueError(f"Mutdensity file {mutdensity_file} must contain {sorted(required)}") + + mut = mut[(mut["SAMPLE"].astype(str).isin(samples))].copy() + mut["SAMPLE_ID"] = mut["SAMPLE"].astype(str) + mut["GENE"] = mut["GENE"].astype(str) + + mut_dfs = [] + for impact in ["synonymous", "missense", "nonsense", "essential_splice", "truncating", "nonsynonymous_splice", "all_impacts"]: + subset_mut = mut[["SAMPLE_ID", "GENE", impact]].copy() + subset_mut["metric_value"] = subset_mut[impact] + subset_mut["metric_name"] = f"{impact}_density" + mut_dfs.append(subset_mut[["SAMPLE_ID", "GENE", "metric_name", "metric_value"]]) + + return mut_dfs + + +def load_omegas(omegas_file, samples): + omega = pd.read_csv(omegas_file, sep="\t", header=0) + sample_col = "sample" + gene_col = "gene" + dnds_col = "dnds" + if sample_col is None or gene_col is None or dnds_col is None: + raise ValueError(f"Omega file {omegas_file} must contain sample, gene and dnds/omega columns") + + omegas_dfs = [] + omega = omega.rename(columns={sample_col: "SAMPLE_ID", gene_col: "GENE", dnds_col: "metric_value"}) + for impact in ["missense", "truncating"]: + subset_omega = omega[(omega["impact"] == impact) + & ~(omega["GENE"].astype(str).str.contains("--")) + ].copy() + subset_omega["SAMPLE_ID"] = subset_omega["SAMPLE_ID"].astype(str) + subset_omega = subset_omega[subset_omega["SAMPLE_ID"].isin(samples)].copy() + subset_omega["metric_value"] = pd.to_numeric(subset_omega["metric_value"], errors="coerce") + subset_omega["metric_name"] = f"omega_gloc_{impact}" + omegas_dfs.append(subset_omega[["SAMPLE_ID", "GENE", "metric_name", "metric_value"]]) + + return omegas_dfs + + +def summarize_effect_by_gene(df): + rows = [] + work = df.dropna(subset=["MEAN_GENE_DEPTH", "metric_value"]).copy() + all_groups = [("all_samples", work)] + [(s, g) for s, g in work.groupby("GENE")] + for group_name, gdf in all_groups: + n = len(gdf) + if n < 3: + rows.append( + { + "sample_scope": group_name, + "n_points": n, + "pearson_r": np.nan, + "spearman_r": np.nan, + "slope_metric_per_depth": np.nan, + "linreg_pvalue": np.nan, + } + ) + continue + lr = linregress(gdf["MEAN_GENE_DEPTH"], gdf["metric_value"]) + rows.append( + { + "sample_scope": group_name, + "n_points": n, + "pearson_r": gdf["MEAN_GENE_DEPTH"].corr(gdf["metric_value"], method="pearson"), + "spearman_r": gdf["MEAN_GENE_DEPTH"].corr(gdf["metric_value"], method="spearman"), + "slope_metric_per_depth": lr.slope, + "linreg_pvalue": lr.pvalue, + } + ) + return pd.DataFrame(rows) + + +def summarize_missingness(depth_df, merged_df): + m = depth_df.merge( + merged_df[["SAMPLE_ID", "GENE", "metric_name", "metric_value"]], + on=["SAMPLE_ID", "GENE"], + how="left", + ) + m["is_missing_metric"] = m["metric_value"].isna() + by_gene = ( + m.groupby(by = ["GENE", "metric_name"]) + .apply( + lambda g: pd.Series( + { + "n_samples_total": int(len(g)), + "n_missing_metric": int(g["is_missing_metric"].sum()), + "missing_fraction": float(g["is_missing_metric"].mean()), + "median_depth_missing": g.loc[g["is_missing_metric"], "MEAN_GENE_DEPTH"].median(), + "median_depth_nonmissing": g.loc[~g["is_missing_metric"], "MEAN_GENE_DEPTH"].median(), + "min_depth_missing": g.loc[g["is_missing_metric"], "MEAN_GENE_DEPTH"].min(), + "max_depth_missing": g.loc[g["is_missing_metric"], "MEAN_GENE_DEPTH"].max(), + } + ) + ) + .reset_index() + ) + return by_gene.sort_values(["missing_fraction", "n_missing_metric"], ascending=[False, False]).reset_index(drop=True) + + +def plot_scatter_per_gene(df, group_name, metric_name, output_pdf): + genes = sorted(df["GENE"].dropna().unique().tolist()) + if not genes: + return + + # keep only the top 200 genes + genes = genes[:200] + + sns.set_style("whitegrid") + per_page = 12 + ncols = 3 + nrows = 4 + with PdfPages(output_pdf) as pdf: + for start in range(0, len(genes), per_page): + page_genes = genes[start : start + per_page] + fig, axes = plt.subplots(nrows, ncols, figsize=(12, 16), sharex=False, sharey=False) + axes = axes.flatten() + for i, gene in enumerate(page_genes): + ax = axes[i] + sdf = df[df["GENE"] == gene].dropna(subset=["MEAN_GENE_DEPTH", "metric_value"]) + if sdf.empty: + ax.set_title(f"{gene} (no data)") + continue + sns.scatterplot( + data=sdf, + x="MEAN_GENE_DEPTH", + y="metric_value", + alpha=0.5, + s=16, + linewidth=0, + ax=ax, + ) + title = f"{gene} (n={len(sdf)})" + if len(sdf) >= 3: + lr = linregress(sdf["MEAN_GENE_DEPTH"], sdf["metric_value"]) + title += f" | slope={lr.slope:.2e}, p={lr.pvalue:.2e}" + line_color = "darkred" if lr.pvalue < 0.05 else "darkgrey" + sns.regplot( + data=sdf, + x="MEAN_GENE_DEPTH", + y="metric_value", + scatter=False, + line_kws={"color": line_color, "linewidth": 1.2}, + ax=ax, + ) + + y_min, y_max = sdf["metric_value"].min(), sdf["metric_value"].max() + val_range = y_max - y_min if y_max > y_min else (abs(y_max) * 0.1 if y_max != 0 else 1.0) + ax.set_ylim(bottom=-0.05 * val_range if y_min >= 0 else y_min - 0.05 * val_range) + + ax.set_title(title, fontsize=9) + ax.set_xlabel("MEAN_GENE_DEPTH") + ax.set_ylabel(metric_name) + + for j in range(len(page_genes), len(axes)): + axes[j].axis("off") + + fig.suptitle(f"{group_name} | {metric_name} vs depth (per gene)", fontsize=14) + fig.tight_layout(rect=[0, 0, 1, 0.97]) + pdf.savefig(fig) + plt.close(fig) + + +@click.command() +@click.option("--mutdensity-file", required=True, type=click.Path(exists=True)) +@click.option("--depth-gene-sample-file", required=True, type=click.Path(exists=True)) +@click.option("--group-definition", required=True, type=click.Path(exists=True)) +@click.option("--group-name", required=True, type=str) +@click.option("--output-dir", required=True, type=click.Path()) +@click.option("--adjusted-mutdensity-file", required=False, type=click.Path(exists=True)) +@click.option("--omegas-file", required=False, type=click.Path(exists=True)) +def main( + mutdensity_file, + depth_gene_sample_file, + group_definition, + group_name, + output_dir, + adjusted_mutdensity_file, + omegas_file, +): + outdir = Path(output_dir) + outdir.mkdir(parents=True, exist_ok=True) + + samples = load_group_samples(group_definition, group_name) + depth_df = load_depth_table(depth_gene_sample_file, samples) + + metric_frames = [load_mutdensity(mutdensity_file, samples, "mutdensity")] + if adjusted_mutdensity_file: + try: + metric_frames.extend(load_adjmutdensity(adjusted_mutdensity_file, samples, "adjusted_mutdensity")) + except Exception as e: + print(f"Warning: skipping adjusted mutdensity file {adjusted_mutdensity_file}: {e}") + if omegas_file: + try: + metric_frames.extend(load_omegas(omegas_file, samples)) + except Exception as e: + print(f"Warning: skipping omegas file {omegas_file}: {e}") + + metrics_df = pd.concat(metric_frames, ignore_index=True) + + status_rows = [] + for metric_name in metrics_df["metric_name"].unique(): + print(f"Processing metric '{metric_name}'...") + + metric_specific_df = metrics_df[metrics_df["metric_name"] == metric_name].copy() + merged = depth_df.merge(metric_specific_df, on=["SAMPLE_ID", "GENE"], how="left") + merged["metric_name"] = metric_name + + summary_effect = summarize_effect_by_gene(merged) + summary_effect.insert(0, "metric_name", metric_name) + summary_effect.to_csv(outdir / f"{group_name}.{metric_name}.depth_effect_summary.tsv", sep="\t", index=False) + + missingness = summarize_missingness(depth_df, merged) + missingness.to_csv(outdir / f"{group_name}.{metric_name}.depth_missingness_by_gene.tsv", sep="\t", index=False) + + plot_scatter_per_gene( + merged, + group_name=group_name, + metric_name=metric_name, + output_pdf=outdir / f"{group_name}.{metric_name}.depth_scatter_per_gene.pdf", + ) + + status_rows.append( + { + "group_name": group_name, + "metric_name": metric_name, + "n_depth_rows": int(len(depth_df)), + "n_metric_rows": int(len(metric_specific_df)), + "n_merged_nonmissing": int(merged["metric_value"].notna().sum()), + } + ) + + pd.DataFrame(status_rows).to_csv(outdir / f"{group_name}.metrics_vs_depth_qc.status.tsv", sep="\t", index=False) + + +if __name__ == "__main__": + main() diff --git a/bin/plot_saturation_in_genes.py b/bin/plot_saturation_in_genes.py index 8e40f935..39f23caf 100755 --- a/bin/plot_saturation_in_genes.py +++ b/bin/plot_saturation_in_genes.py @@ -81,9 +81,9 @@ def group_mutations(df, mode="aminoacid", count_mutations=True): mode: 'aminoacid', 'protein_position', 'nucleotide_change', 'nucleotide_position' count_mutations: if True, count mutations (for mutation tables); if False, just group (for panel tables) - ## define these as different options within a function: - # this should be coupled with a proper processing of the consensus_enriched_expanded table - # so that it is also grouped by in the same way, there is no need for counting in there + define these as different options within a function: + this should be coupled with a proper processing of the consensus_enriched_expanded table + so that it is also grouped by in the same way, there is no need for counting in there """ if mode == "aminoacid": @@ -221,12 +221,15 @@ def plot_genes(df, mode=None): g.savefig(plot_path, bbox_inches='tight', dpi=300) plt.close(g.fig) -def plot_domains(df, mode=None): +def plot_domains(df, genes = None, mode=None): + if genes is None: + genes = df["GENE_NAME"].unique() + seg_type = 'domain' suffix = f"_{mode}" if mode else "" pdf_path = f"{plots_dir}/saturation_domains_all{suffix}.pdf" with PdfPages(pdf_path) as pdf: - for gene in df["GENE_NAME"].unique(): + for gene in genes: df_gene = df[(df["GENE_NAME"] == gene) & (df["SEGMENT_TYPE"] == "domain")] if df_gene.empty: continue @@ -254,12 +257,15 @@ def plot_domains(df, mode=None): pdf.savefig(g.fig, bbox_inches='tight', dpi=300) plt.close(g.fig) -def plot_exons(df, mode=None): +def plot_exons(df, genes = None, mode=None): + if genes is None: + genes = df["GENE_NAME"].unique() + seg_type = 'exon' suffix = f"_{mode}" if mode else "" pdf_path = f"{plots_dir}/saturation_exons_all{suffix}.pdf" with PdfPages(pdf_path) as pdf: - for gene in df["GENE_NAME"].unique(): + for gene in genes: df_gene = df[(df["GENE_NAME"] == gene) & (df["SEGMENT_TYPE"] == "exon")] if df_gene.empty: continue @@ -289,13 +295,16 @@ def plot_exons(df, mode=None): # Frequency-stratified domain plot -def plot_domains_by_freq(df, mode=None): +def plot_domains_by_freq(df, genes = None, mode=None): + if genes is None: + genes = df["GENE_NAME"].unique() + freq_bin_order = ['3+', '2', '1'] seg_type = 'domain' suffix = f"_{mode}" if mode else "" pdf_path = f"{plots_dir}/saturation_domains_byfreq_all{suffix}.pdf" with PdfPages(pdf_path) as pdf: - for gene in df["GENE_NAME"].unique(): + for gene in genes: df_gene = df[(df["GENE_NAME"] == gene) & (df["SEGMENT_TYPE"] == "domain")] if df_gene.empty: continue @@ -335,13 +344,15 @@ def plot_domains_by_freq(df, mode=None): plt.close(fig) # Frequency-stratified exon plot -def plot_exons_by_freq(df, mode=None): +def plot_exons_by_freq(df, genes = None, mode=None): + if genes is None: + genes = df["GENE_NAME"].unique() freq_bin_order = ['3+', '2', '1'] seg_type = 'exon' suffix = f"_{mode}" if mode else "" pdf_path = f"{plots_dir}/saturation_exons_byfreq_all{suffix}.pdf" with PdfPages(pdf_path) as pdf: - for gene in df["GENE_NAME"].unique(): + for gene in genes: df_gene = df[(df["GENE_NAME"] == gene) & (df["SEGMENT_TYPE"] == "exon")] if df_gene.empty: continue @@ -385,14 +396,20 @@ def plot_exons_by_freq(df, mode=None): # Call all three plotting functions from the same table def plot_all_saturation_tables(df, mode=None): plot_genes(df, mode) - plot_domains(df, mode) - plot_exons(df, mode) + + genes_list = df["GENE_NAME"].unique() + genes_list = genes_list[:200] + plot_domains(df, genes=genes_list, mode=mode) + plot_exons(df, genes=genes_list, mode=mode) # Call all three frequency-stratified plotting functions from the same table def plot_all_saturation_tables_by_freq(df, mode=None): - plot_domains_by_freq(df, mode) - plot_exons_by_freq(df, mode) + genes_list = df["GENE_NAME"].unique() + genes_list = genes_list[:200] + + plot_domains_by_freq(df, genes=genes_list, mode=mode) + plot_exons_by_freq(df, genes=genes_list, mode=mode) def generate_all_saturation_plots(consensus_enriched_expanded, somatic_maf_clean, diff --git a/conf/modules.config b/conf/modules.config index 04ae8fa4..f94fd632 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -532,6 +532,10 @@ process { ext.recode_list = "${params.mutepi_genes_to_recode}" } + withName: PLOTMETRICSVSDEPTHQC { + ext.all_adjusted_mutdensities = params.omega | (params.profileall && params.mutationdensity) + ext.all_omegas_globalloc = (params.omega && params.omega_globalloc) + } withName: SUBSETONCODRIVECLUSTL { ext.filters = { "" } diff --git a/conf/results_outputs.config b/conf/results_outputs.config index 5dc2d67a..7eef1e45 100644 --- a/conf/results_outputs.config +++ b/conf/results_outputs.config @@ -110,6 +110,13 @@ process { pattern: '**{tsv,csv,pdf,png}', ] } + withName: PLOTMETRICSVSDEPTHQC{ + publishDir = [ + path: { "${params.outdir}/qc/metrics_vs_depth" }, + mode: params.publish_dir_mode, + pattern: '**{tsv,pdf,png}', + ] + } withName: APPLYOMEGAQC { publishDir = [ diff --git a/docs/output.md b/docs/output.md index 574d2d27..7847ef71 100644 --- a/docs/output.md +++ b/docs/output.md @@ -293,6 +293,7 @@ Optional: - plotneedles - plotselection - plotsomaticmaf +- qc/metrics_vs_depth (depth-vs-mutdensity/omega QC scatterplots and TSV summaries) ## Additional outputs diff --git a/modules/local/plot/qc/metrics_vs_depth/main.nf b/modules/local/plot/qc/metrics_vs_depth/main.nf new file mode 100644 index 00000000..89229935 --- /dev/null +++ b/modules/local/plot/qc/metrics_vs_depth/main.nf @@ -0,0 +1,53 @@ +process PLOT_METRICS_VS_DEPTH_QC { + + tag "${group_name}" + label 'process_low' + + label 'deepcsa_core' + + input: + path (all_mutdensities) + path (depth_gene_sample) + path (groups_json) + val (group_name) + path (all_adjusted_mutdensities, stageAs: 'adjusted_mutdensities.tsv') + path (all_omegas_globalloc, stageAs: 'omega_gloc.tsv') + + output: + path("${group_name}.metrics_depth_qc/*.pdf"), optional: true , emit: plots + path("${group_name}.metrics_depth_qc/*.tsv"), optional: true , emit: tables + path "versions.yml" , topic: versions + + script: + def adjusted_arg = task.ext.all_adjusted_mutdensities ? "--adjusted-mutdensity-file ${all_adjusted_mutdensities}" : "" + def omega_arg = task.ext.all_omegas_globalloc ? "--omegas-file ${all_omegas_globalloc}" : "" + """ + mkdir ${group_name}.metrics_depth_qc + metrics_vs_depth_qc.py \\ + --mutdensity-file ${all_mutdensities} \\ + --depth-gene-sample-file ${depth_gene_sample} \\ + --group-definition ${groups_json} \\ + --group-name ${group_name} \\ + --output-dir ${group_name}.metrics_depth_qc \\ + ${adjusted_arg} \\ + ${omega_arg} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ + + stub: + """ + mkdir -p ${group_name}.metrics_depth_qc + touch ${group_name}.metrics_depth_qc/${group_name}.mutdensity.depth_scatter_per_sample.pdf + touch ${group_name}.metrics_depth_qc/${group_name}.mutdensity.depth_effect_summary.tsv + touch ${group_name}.metrics_depth_qc/${group_name}.metrics_vs_depth_qc.status.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/subworkflows/local/plotting_qc/main.nf b/subworkflows/local/plotting_qc/main.nf index 8c3fd8f5..ab183920 100644 --- a/subworkflows/local/plotting_qc/main.nf +++ b/subworkflows/local/plotting_qc/main.nf @@ -1,21 +1,24 @@ -include { PLOT_MUTDENSITY_QC as PLOTMUTDENSITYQC } from '../../../modules/local/plot/qc/mutation_densities/main' -include { ANNOTATE_OMEGA_QC as APPLYOMEGAQC } from '../../../modules/local/plot/qc/annotate_omega/main' -include { PLOT_MUTATION_SPECIFIC as PLOTMUTATIONSPECIFIC } from '../../../modules/local/plot/qc/mutation_specific/main' +include { PLOT_MUTDENSITY_QC as PLOTMUTDENSITYQC } from '../../../modules/local/plot/qc/mutation_densities/main' +include { PLOT_METRICS_VS_DEPTH_QC as PLOTMETRICSVSDEPTHQC } from '../../../modules/local/plot/qc/metrics_vs_depth/main' +include { ANNOTATE_OMEGA_QC as APPLYOMEGAQC } from '../../../modules/local/plot/qc/annotate_omega/main' +include { PLOT_MUTATION_SPECIFIC as PLOTMUTATIONSPECIFIC } from '../../../modules/local/plot/qc/mutation_specific/main' workflow PLOTTING_QC { take: all_mutations - // positive_selection_results_ready all_mutdensities - // all_samples_depth - // all_groups + all_adjusted_mutdensities + all_omegas_globalloc + average_depth_gene_sample all_omegas panel groups_definition group_name + // all_samples_depth + // all_groups // full_panel_rich // seqinfo_df // domain_df @@ -27,33 +30,31 @@ workflow PLOTTING_QC { // Channel.of([ [ id: "all_samples" ] ]) // .join( all_mutations ) // .set{ mutations } - PLOTMUTATIONSPECIFIC(all_mutations) - - - // pdb_tool_df = params.annotations3d - // ? channel.fromPath( "${params.annotations3d}/pdb_tool_df.tsv", checkIfExists: true).first() - // : channel.empty() - // plotting only for the entire cohort group // channel.of([ [ id: "all_samples" ] ]) // .join( positive_selection_results_ready ) // .set{ all_samples_results } + PLOTMUTATIONSPECIFIC(all_mutations) + PLOTMUTDENSITYQC(all_mutdensities, panel, groups_definition, group_name) - // mutation density per gene cohort-level - // mutation density per gene & sample - // synonymous - // non-protein-affecting - // pending: - // protein-affecting - // truncating - // missense + + PLOTMETRICSVSDEPTHQC( + all_mutdensities, + average_depth_gene_sample.map { it -> it[1] }, + groups_definition, + group_name, + all_adjusted_mutdensities, + all_omegas_globalloc + ) APPLYOMEGAQC(all_omegas, PLOTMUTDENSITYQC.out.compiled_flagged.collect()) emit: - mutdensity_plots = PLOTMUTDENSITYQC.out.plots - flagged_omegas = APPLYOMEGAQC.out.all_omegas_annotated + mutdensity_plots = PLOTMUTDENSITYQC.out.plots + metrics_vs_depth_plots = PLOTMETRICSVSDEPTHQC.out.plots + metrics_vs_depth_tables = PLOTMETRICSVSDEPTHQC.out.tables + flagged_omegas = APPLYOMEGAQC.out.all_omegas_annotated } diff --git a/tests/deepcsa.nf.test b/tests/deepcsa.nf.test index 6bf7c50b..dd040ca2 100644 --- a/tests/deepcsa.nf.test +++ b/tests/deepcsa.nf.test @@ -358,4 +358,3 @@ nextflow_pipeline { // } // } } - diff --git a/workflows/deepcsa.nf b/workflows/deepcsa.nf index a180ea2d..6367a0ad 100644 --- a/workflows/deepcsa.nf +++ b/workflows/deepcsa.nf @@ -110,11 +110,11 @@ include { SELECT_MUTDENSITIES as SYNMUTREADSDENSITY } from '../m include { DNA_2_PROTEIN_MAPPING as DNA2PROTEINMAPPING } from '../modules/local/dna2protein/main' -include { MAF_2_VCF as MAF2VCF } from '../modules/local/maf2vcf/main' -include { SIGPROFILER_MATRIXGENERATOR as SIGPROMATRIXGENERATOR } from '../modules/local/signatures/sigprofiler/matrixgenerator/main' -include { SIGPROFILERASSIGNMENT_COSMIC_FIT as SIGPROFILERASSIGNMENTINDELS } from '../modules/local/signatures/sigprofiler/assignment/cosmic_fit/main' +include { MAF_2_VCF as MAF2VCF } from '../modules/local/maf2vcf/main' +include { SIGPROFILER_MATRIXGENERATOR as SIGPROMATRIXGENERATOR } from '../modules/local/signatures/sigprofiler/matrixgenerator/main' +include { SIGPROFILERASSIGNMENT_COSMIC_FIT as SIGPROFILERASSIGNMENTINDELS } from '../modules/local/signatures/sigprofiler/assignment/cosmic_fit/main' -include { MUTATIONS_2_SIGNATURES as MUTS2SIGS } from '../modules/local/mutations2sbs/main' +include { MUTATIONS_2_SIGNATURES as MUTS2SIGS } from '../modules/local/mutations2sbs/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -156,8 +156,9 @@ workflow DEEPCSA { site_comparison_results = channel.empty() all_compiled_omegas = channel.empty() - all_compiled_omegasgloballoc = channel.empty() + all_compiled_omegasgloballoc = channel.value(file("${projectDir}/assets/placeholder_no_file.tsv", checkIfExists: true)) all_mutdensities_file = channel.empty() + all_adjusted_mutdensities_file = channel.value(file("${projectDir}/assets/placeholder_no_file.tsv", checkIfExists: true)) all_compiled_stabilities = channel.empty() // if the user wants to use custom gene groups, import the gene groups table @@ -342,7 +343,7 @@ workflow DEEPCSA { // Concatenate all outputs into a single file MUTDENSITYADJUSTED.out.mutdensities.map{ it -> it[1]}.flatten() .set{ all_adjusted_mutdensities } - all_adjusted_mutdensities.collectFile(name: "all_adjusted_mutdensities.tsv", storeDir:"${params.outdir}/mutdensity_adjusted", skip: 1, keepHeader: true) + all_adjusted_mutdensities.collectFile(name: "all_adjusted_mutdensities.tsv", storeDir:"${params.outdir}/mutdensity_adjusted", skip: 1, keepHeader: true).set{ all_adjusted_mutdensities_file } MUTDENSITYADJUSTED.out.mutdensities_flat.map{ it -> it[1]}.flatten() .set{ all_adjusted_mutdensities_flat } @@ -594,6 +595,9 @@ workflow DEEPCSA { PLOTTINGQC( somatic_mutations, all_mutdensities_file.first(), + all_adjusted_mutdensities_file.first(), + all_compiled_omegasgloballoc.first(), + PLOTDEPTHSEXONSCONS.out.average_depth_gene_sample.first(), all_compiled_omegas, // site_comparison_results, // ANNOTATEDEPTHS.out.all_samples_depths,