-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgive_sequence.py
More file actions
62 lines (50 loc) · 1.85 KB
/
give_sequence.py
File metadata and controls
62 lines (50 loc) · 1.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from argparse import ArgumentParser
from Bio import SeqIO
from scaffold import Longreads, revcomp
from collections import defaultdict
parser = ArgumentParser()
parser.add_argument("inputfiles", help="Input Files in Error-Rate or PAF format", nargs="+")
parser.add_argument("sequencefile", help="Sequences of specific reads")
parser.add_argument("contigs", help="Specify the contigs. Format: '3APD-823APD'")
parser.add_argument("lrid", help="Specify the longread id")
parser.add_argument("linename", help="Line name")
parser.add_argument("--blacklistfile", help="File containing long read ids where certain contig mappings should be ignored.")
args = parser.parse_args()
blacklist = defaultdict(list)
if args.blacklistfile:
with open(args.blacklistfile) as f:
for line in f:
sline = line.split()
if sline[0] == "contig":
blacklist[sline[1]] = "y"
else:
blacklist[sline[0]].append(sline[1])
#print(set([args.lrid]))
scafs = Longreads(args.inputfiles, blacklist, args.linename, whitelist_lreads=set([args.lrid]))
scafs.turn_longreads_around()
scafs.sort_by_starts()
#print(scafs.lreads)
lrseqs = dict()
for read in SeqIO.parse(args.sequencefile, "fastq"):
lrseqs[read.id] = str(read.seq)
ctg1, ctg2 = args.contigs.split("-")
read = scafs.lreads[args.lrid]
sc = 0
ec = 0
for ctg in read["maps"]:
if ctg["name"] == ctg1:
#print("\t".join([ctg["name"], str(ctg["scr"]), str(ctg["ecr"]), str(ctg["scc"]), str(ctg["ecc"])]))
sc = ctg["ecr"]
if ctg["name"] == ctg2:
ec = ctg["scr"]-1
print("start: " + str(sc))
print("end: " + str(ec))
if "reverse" in read:
tmp = ec
ec = read["length"] - sc
sc = read["length"] - tmp
print("start: " + str(sc))
print("end: " + str(ec))
print(revcomp(lrseqs[args.lrid][sc:ec]))
else:
print(lrseqs[args.lrid][sc:ec])