Skip to content

Maybe csi index is better? #112

Description

@zoomlion

Hi, I'm now using CoolBox to drawy my interaction plot in metatheria genomes. But I found that some chromosomes will always raise the error that tbi cannot be generated for gtf/bed files. I modified it to csi file and then it works. Is it possible that switching to csi index is a better alternative for chromosomes' size over 512 Mb?

def build_bed_index(file):
    file = osp.expanduser(file)
    if file.endswith(".bgz"):
        bgz_file = file
    else:
        bgz_file = file + '.bgz'
        log.info(f"Bgzip bed file, save to {bgz_file}")
        bgz_bed(file, bgz_file)
    if not osp.exists(bgz_file + '.csi'):
        log.info(f"Make csi of bgz file, save to {bgz_file}.csi")
        index_bed(bgz_file)
    return bgz_file

def index_bed(bgz_path):
    cmd = ['tabix', '-p', 'bed', bgz_path, '-C']
    subp.check_call(cmd)

...

def tabix_index(filename, preset="gff"):
    """Call tabix to create an index for a bgzip-compressed file."""
    subp.check_call([
        'tabix', '-p', preset, filename, '-C'
    ])

def build_gtf_index(file):
    file = osp.expanduser(file)
    if file.endswith(".gtf"):
        bgz_file = file + ".bgz"
        if not osp.exists(bgz_file):
            log.info(f"Process the gtf and do bgzip, save to {bgz_file}.")
            process_gtf(file, bgz_file)
    elif file.endswith(".gtf.gz"):
        bgz_file = file.rstrip(".gz") + ".bgz"
        log.info(f"Convert .gtf.gz to .gtf.bgz, save to {bgz_file}.")
        if not osp.exists(bgz_file):
            gtf_gz_to_bgz(file, bgz_file)
    elif file.endswith(".gtf.bgz"):
        bgz_file = file
    else:
        raise IOError(f"GTF track only support GTF file(.gtf or .gtf.gz), got {file}.")

    idx_file = bgz_file + ".csi"
    if not osp.exists(idx_file):
        log.info(f"Tabix index not found, build it in {idx_file}")
        tabix_index(bgz_file)
    return bgz_file

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions