Skip to content

Commit f27cc56

Browse files
committed
{AH} raise exception in AlignmentFile._open when given a StringIO object, see #137
2 parents 62f3c65 + 7d678ea commit f27cc56

File tree

2 files changed

+49
-29
lines changed

2 files changed

+49
-29
lines changed

pysam/cbcf.pyx

Lines changed: 17 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,11 @@ from cpython.bytes cimport PyBytes_FromStringAndSize
202202
from cpython.unicode cimport PyUnicode_DecodeASCII
203203
from cpython.version cimport PY_MAJOR_VERSION
204204

205-
from pysam.chtslib cimport hisremote
205+
from pysam.chtslib cimport hisremote
206+
207+
208+
from warnings import warn
209+
206210

207211
__all__ = ['VariantFile',
208212
'VariantHeader',
@@ -3412,7 +3416,7 @@ cdef class VariantFile(object):
34123416

34133417
return vars
34143418

3415-
def open(self, filename, mode=None,
3419+
def open(self, filename, mode='rb',
34163420
index_filename=None,
34173421
VariantHeader header=None,
34183422
drop_samples=False):
@@ -3422,6 +3426,7 @@ cdef class VariantFile(object):
34223426
closed and a new file will be opened.
34233427
"""
34243428
cdef bcf_hdr_t *hdr
3429+
cdef BGZF *bgzfp
34253430
cdef hts_idx_t *idx
34263431
cdef tbx_t *tidx
34273432
cdef char *cfilename
@@ -3432,17 +3437,6 @@ cdef class VariantFile(object):
34323437
if self.is_open:
34333438
self.close()
34343439

3435-
# read mode autodetection
3436-
if mode is None:
3437-
try:
3438-
self.open(filename, 'rb', header=header)
3439-
return
3440-
except ValueError, msg:
3441-
pass
3442-
3443-
self.open(filename, 'r', header=header)
3444-
return
3445-
34463440
if mode not in ('r','w','rb','wb', 'wh', 'wbu', 'rU', 'wb0'):
34473441
raise ValueError('invalid file opening mode `{}`'.format(mode))
34483442

@@ -3481,42 +3475,38 @@ cdef class VariantFile(object):
34813475
self.htsfile = hts_open(cfilename, cmode)
34823476

34833477
if not self.htsfile:
3484-
raise ValueError(
3485-
"could not open file `{}` (mode='{}')".format(
3486-
(filename, mode)))
3478+
raise ValueError("could not open file `{}` (mode='{}')".format((filename, mode)))
34873479

34883480
with nogil:
34893481
bcf_hdr_write(self.htsfile, self.header.ptr)
34903482

34913483
elif mode.startswith(b'r'):
34923484
# open file for reading
3493-
if filename != b'-' and not self.is_remote \
3494-
and not os.path.exists(filename):
3485+
if filename != b'-' and not self.is_remote and not os.path.exists(filename):
34953486
raise IOError('file `{}` not found'.format(filename))
34963487

34973488
cfilename, cmode = filename, mode
34983489
with nogil:
34993490
self.htsfile = hts_open(cfilename, cmode)
35003491

35013492
if not self.htsfile:
3502-
raise ValueError(
3503-
"could not open file `{}` (mode='{}') - "
3504-
"is it VCF/BCF format?".format(filename, mode))
3493+
raise ValueError("could not open file `{}` (mode='{}') - is it VCF/BCF format?".format(filename, mode))
35053494

35063495
if self.htsfile.format.format not in (bcf, vcf):
3507-
raise ValueError(
3508-
"invalid file `{}` (mode='{}') - "
3509-
"is it VCF/BCF format?".format(filename, mode))
3496+
raise ValueError("invalid file `{}` (mode='{}') - is it VCF/BCF format?".format(filename, mode))
3497+
3498+
if self.htsfile.format.compression == bgzf:
3499+
bgzfp = hts_get_bgzfp(self.htsfile)
3500+
if bgzfp and bgzf_check_EOF(bgzfp) == 0:
3501+
warn('[%s] Warning: no BGZF EOF marker; file may be truncated'.format(filename))
35103502

35113503
with nogil:
35123504
hdr = bcf_hdr_read(self.htsfile)
35133505

35143506
try:
35153507
self.header = makeVariantHeader(hdr)
35163508
except ValueError:
3517-
raise ValueError(
3518-
"file `{}` does not have valid header (mode='{}') - "
3519-
"is it VCF/BCF format?".format(filename, mode))
3509+
raise ValueError("file `{}` does not have valid header (mode='{}') - is it VCF/BCF format?".format(filename, mode))
35203510

35213511
# check for index and open if present
35223512
if self.htsfile.format.format == bcf:

tests/VariantFile_test.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,37 @@ def testEmptyFileVCFGZOnlyHeader(self):
113113
DATADIR,
114114
"example_vcf42_only_header.vcf")) as inf:
115115
self.assertEqual(len(list(inf.fetch())), 0)
116-
116+
117+
def testDetectVCF(self):
118+
with pysam.VariantFile(os.path.join(DATADIR,
119+
"example_vcf40.vcf")) as inf:
120+
self.assertEqual(inf.category, 'VARIANTS')
121+
self.assertEqual(inf.format, 'VCF')
122+
self.assertEqual(inf.compression, 'NONE')
123+
self.assertFalse(inf.is_remote)
124+
self.assertFalse(inf.is_stream)
125+
self.assertEqual(len(list(inf.fetch())), 5)
126+
127+
def testDetectVCFGZ(self):
128+
with pysam.VariantFile(os.path.join(DATADIR,
129+
"example_vcf40.vcf.gz")) as inf:
130+
self.assertEqual(inf.category, 'VARIANTS')
131+
self.assertEqual(inf.format, 'VCF')
132+
self.assertEqual(inf.compression, 'BGZF')
133+
self.assertFalse(inf.is_remote)
134+
self.assertFalse(inf.is_stream)
135+
self.assertEqual(len(list(inf.fetch())), 5)
136+
137+
def testDetectBCF(self):
138+
with pysam.VariantFile(os.path.join(DATADIR,
139+
"example_vcf40.bcf")) as inf:
140+
self.assertEqual(inf.category, 'VARIANTS')
141+
self.assertEqual(inf.format, 'BCF')
142+
self.assertEqual(inf.compression, 'BGZF')
143+
self.assertFalse(inf.is_remote)
144+
self.assertFalse(inf.is_stream)
145+
self.assertEqual(len(list(inf.fetch())), 5)
146+
117147

118148
class TestHeader(unittest.TestCase):
119149

@@ -280,7 +310,7 @@ def testSampleAlleleIndices(self):
280310

281311
class TestIndexFilename(unittest.TestCase):
282312

283-
filenames = [('example_vcf40.vcf.gz', 'example_vcf40.vcf.tbi'),
313+
filenames = [('example_vcf40.vcf.gz', 'example_vcf40.vcf.gz.tbi'),
284314
('example_vcf40.vcf.gz', 'example_vcf40.vcf.gz.csi'),
285315
('example_vcf40.bcf', 'example_vcf40.bcf.csi')]
286316

0 commit comments

Comments
 (0)