-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcheck_sr_distances.py
More file actions
67 lines (54 loc) · 1.75 KB
/
check_sr_distances.py
File metadata and controls
67 lines (54 loc) · 1.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from argparse import ArgumentParser
from operator import itemgetter
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sys
from itertools import combinations
parser = ArgumentParser()
parser.add_argument("summaryfile", help="Contig distance summary file")
parser.add_argument("cellline", help="Name of cell line")
parser.add_argument("--mindepth", help="Minimal depth", type=int, default=25)
args = parser.parse_args()
distances2 = {}
with open(args.summaryfile) as f:
for line in f:
sline = line.split()
ctg1 = sline[0].split("_")[0]
ctg2 = sline[0].split("_")[1]
if sline[1] == "NA":
continue
if float(sline[2]) < args.mindepth:
continue
moddist = float(sline[1])
depth = int(sline[2])
if ctg1.rstrip(args.cellline) < ctg2.rstrip(args.cellline):
cstr = ctg1+"_"+ctg2
else:
cstr = ctg2+"_"+ctg1
if cstr in distances2:
if abs(moddist) < abs(distances2[cstr][0]):
distances2[cstr] = (moddist,depth)
else:
distances2[cstr] = (moddist,depth)
dists = []
depths = []
for key,[dist,depth] in distances2.items():
if dist > -500:
dists.append(dist)
depths.append(depth)
print(len(dists))
print(len(depths))
a,bin_edges = np.histogram(dists,bins=range(-500,610,10),weights=depths)
b,bin_edges = np.histogram(dists,bins=range(-500,610,10))
averages=[]
for idx, item in enumerate(a):
averages.append(a[idx]/b[idx])
print(averages)
#plt.subplot(2,2,1)
#plt.hist(dists,bins=range(-500,610,10),weights=depths)
#plt.subplot(2,2,2)
#plt.hist(dists,bins=range(-500,610,10))
#plt.subplot(2,2,3)
plt.plot(range(-500,600,10),averages)
plt.show()