Skip to content

Commit 9cc703f

Browse files
committed
refactor so merge fasta and merge results use same function; new version
1 parent bdab170 commit 9cc703f

File tree

5 files changed

+51
-46
lines changed

5 files changed

+51
-46
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "tcdo_pg_tools"
7-
version = "0.0.4-beta"
7+
version = "0.0.5-beta"
88
description = "Commandline tools to support proteogenomics analyses at MSK TCDO and beyond"
99
readme = "README.rst"
1010
authors = [

src/tcdo_pg_tools/cli.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
import click
22
from tcdo_pg_tools.fusion_merge import fusion_merge
33
from tcdo_pg_tools.coverage_calculator import coverage_calculator
4-
from tcdo_pg_tools.merge_pg_results import merge_pg_results
5-
from tcdo_pg_tools.merge_fasta import merge_fasta
4+
from tcdo_pg_tools.merge_proteome import merge_pg_results, merge_fasta
65

76
@click.group()
87
def cli():

src/tcdo_pg_tools/merge_fasta.py

Lines changed: 0 additions & 23 deletions
This file was deleted.
Lines changed: 45 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env python3
22
"""
33
author: Asher Preska Steinberg
4-
merge proteomegenerator results across multiple samples on AA seq identity
4+
merge proteomegenerator fasta and results across multiple samples on AA seq identity
55
"""
66
import os
77
import pandas as pd
@@ -48,24 +48,9 @@ def plot_upset(countdat, upset_path):
4848
plt.savefig(upset_path, dpi=300, bbox_inches="tight")
4949
return
5050

51-
@click.command()
52-
@click.option('-i', '--input_csv', required=True, type=click.Path(exists=True),
53-
help='four column csv (fasta: fasta path, '
54-
'protein_table: protein.tsv path, '
55-
'name: sample name, condition: condition)')
56-
@click.option('-t', '--info_table', required=False,
57-
default='info_table.tsv',
58-
type=click.Path(), help="Path to index tsv for merged protein IDs")
59-
@click.option('-fa','--merged_fasta', required=False,
60-
type=click.Path(), default='merged.fasta',
61-
help="Path to merged fasta file")
62-
@click.option('--upset', is_flag=True, default=False, help="plot upset")
63-
@click.option('--upset_path', required=False,
64-
type=click.Path(), default='upset_plot.svg',
65-
help="Path to upset plot")
66-
def merge_pg_results(input_csv, info_table, merged_fasta, upset, upset_path, unique_proteins=True):
51+
def merge_proteome(input_csv, info_table, merged_fasta, upset, upset_path, unique_proteins=True):
6752
"""
68-
merge proteomegenerator results across multiple samples on AA seq identity
53+
merge proteomegenerator fasta/results across multiple samples on AA seq identity
6954
"""
7055
# read in metadata
7156
metadata = pd.read_csv(input_csv)
@@ -130,5 +115,45 @@ def merge_pg_results(input_csv, info_table, merged_fasta, upset, upset_path, uni
130115
# plot upset plot:
131116
if upset:
132117
plot_upset(countdat, upset_path)
133-
if __name__ == '__main__':
134-
merge_pg_results()
118+
return
119+
120+
@click.command()
121+
@click.option('-i', '--input_csv', required=True, type=click.Path(exists=True),
122+
help='four column csv (fasta: fasta path, '
123+
'protein_table: protein.tsv path, '
124+
'name: sample name, condition: condition)')
125+
@click.option('-t', '--info_table', required=False,
126+
default='info_table.tsv',
127+
type=click.Path(), help="Path to index tsv for merged protein IDs")
128+
@click.option('-fa','--merged_fasta', required=False,
129+
type=click.Path(), default='merged.fasta',
130+
help="Path to merged fasta file")
131+
@click.option('--upset', is_flag=True, default=False, help="plot upset")
132+
@click.option('--upset_path', required=False,
133+
type=click.Path(), default='upset_plot.svg',
134+
help="Path to upset plot")
135+
def merge_pg_results(input_csv, info_table, merged_fasta, upset, upset_path):
136+
"""
137+
merge proteomegenerator results across multiple samples on AA seq identity
138+
"""
139+
return merge_proteome(input_csv, info_table, merged_fasta, upset, upset_path, unique_proteins=True)
140+
141+
@click.command()
142+
@click.option('-i', '--input_csv', required=True, type=click.Path(exists=True),
143+
help='three column csv (fasta: fasta path, '
144+
'name: sample name, condition: condition)')
145+
@click.option('-t', '--info_table', required=False,
146+
default='info_table.tsv',
147+
type=click.Path(), help="Path to index tsv for merged protein IDs")
148+
@click.option('-fa','--merged_fasta', required=False,
149+
type=click.Path(), default='merged.fasta',
150+
help="Path to merged fasta file")
151+
@click.option('--upset', is_flag=True, default=False, help="plot upset")
152+
@click.option('--upset_path', required=False,
153+
type=click.Path(), default='upset_plot.svg',
154+
help="Path to upset plot")
155+
def merge_fasta(input_csv, info_table, merged_fasta, upset, upset_path):
156+
"""
157+
merge multiple fasta on sequence identity
158+
"""
159+
return merge_proteome(input_csv, info_table, merged_fasta, upset, upset_path, unique_proteins=False)

tests/fasta_merge_test/test.csv

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
fasta,sample,condition
2+
/Volumes/kentsis/proteomics/data/Laura/Multi-protease_fractionation/fasta_files/AML_Celllines/Fasta/Celllines/PROTEOMEGENERATOR_47038_proteome_CD34_comb_fasta.fa,CD34+,control
3+
/Volumes/kentsis/proteomics/data/Laura/Multi-protease_fractionation/fasta_files/AML_Celllines/Fasta/Celllines/PROTEOMEGENERATOR_45382_proteome_MV4-11_fasta.fa,MV4-11,AML
4+
/Volumes/kentsis/proteomics/data/Laura/Multi-protease_fractionation/fasta_files/AML_Celllines/Fasta/Celllines/PROTEOMEGENERATOR_45642_proteome_Kasumi_1_fasta.fa,Kasumi-1,AML

0 commit comments

Comments
 (0)