diff --git a/association.nf b/association.nf index bb34057..2a8abac 100644 --- a/association.nf +++ b/association.nf @@ -43,6 +43,7 @@ def helpMessage() { --outdir The output directory where the results will be saved and what will be used as a prefix (default outs) --split Number read entries per fastq chunk for faster processing (default: 2000000) --labels tsv with the oligo pool fasta and a group label (ex: positive_control) if no labels desired a file will be automatically generated + --variants tsv with reference_name variant_positions ref_bases alt_bases, only input for variant analyses workflow Extras: --h, --help Print this help message @@ -117,6 +118,14 @@ if (params.containsKey("labels")){ params.label_file=null } +// variants file saved in params.variants_file +if (params.containsKey("variants")){ + params.variants_file=file(params.variants) + if (!params.variants_file.exists()) exit 1, "variants file ${params.variants_file} does not exist" +} else { + params.variants_file=null +} + // Has the run name been specified by the user? // this has the bonus effect of catching both -name and --name custom_runName = params.name @@ -153,6 +162,7 @@ summary['Run name'] = params.name summary['Working dir'] = workflow.workDir summary['Container Engine'] = workflow.containerEngine if(workflow.containerEngine) summary['Container'] = workflow.container +if(params.variants_file!=null) summary['Variants file'] = params.variants_file summary['Current home'] = "$HOME" summary['Current user'] = "$USER" summary['Current path'] = "$PWD" @@ -444,81 +454,156 @@ process 'collect_chunks'{ * contributions: Sean Whalen */ -process 'map_element_barcodes' { - tag "assign" - label "shorttime" - publishDir "${params.outdir}/${params.name}", mode:'copy' - - conda 'conf/mpraflow_py36.yml' - - input: - val(name) from params.name - val(mapq) from params.mapq - val(baseq) from params.baseq - val(cigar) from params.cigar - file(fastq_bc) from params.fastq_bc_file - file count_fastq from bc_ch - file count_bam from ch_merge - file bam from s_merge - output: - file "${name}_coords_to_barcodes.pickle" into map_ch - file "${name}_barcodes_per_candidate-no_repeats-no_jackpots.feather" into count_table_ch - file "${name}_barcode_counts.pickle" - shell: - """ - echo "test assign inputs" - echo ${mapq} - echo ${baseq} - echo $fastq_bc - zcat $fastq_bc | head - - echo ${count_fastq} - echo ${count_bam} - cat ${count_fastq} - cat ${count_bam} - - python ${"$baseDir"}/src/nf_ori_map_barcodes.py ${"$baseDir"} ${fastq_bc} ${count_fastq} \ - $bam ${count_bam} ${name} ${mapq} ${baseq} ${cigar} - """ +if (params.variants_file == null){ + process 'map_element_barcodes' { + tag "assign" + label "shorttime" + publishDir "${params.outdir}/${params.name}", mode:'copy' + + conda 'conf/mpraflow_py36.yml' + + input: + val(name) from params.name + val(mapq) from params.mapq + val(baseq) from params.baseq + val(cigar) from params.cigar + file(fastq_bc) from params.fastq_bc_file + file count_fastq from bc_ch + file count_bam from ch_merge + file bam from s_merge + output: + file "${name}_coords_to_barcodes.pickle" into map_ch + file "${name}_barcodes_per_candidate-no_repeats-no_jackpots.feather" into count_table_ch + file "${name}_barcode_counts.pickle" + shell: + """ + echo "test assign inputs" + echo ${mapq} + echo ${baseq} + echo $fastq_bc + zcat $fastq_bc | head + + echo ${count_fastq} + echo ${count_bam} + cat ${count_fastq} + cat ${count_bam} + + python ${"$baseDir"}/src/nf_ori_map_barcodes.py ${"$baseDir"} ${fastq_bc} ${count_fastq} \ + $bam ${count_bam} ${name} ${mapq} ${baseq} ${cigar} + """ + } +} else { + process 'map_element_barcodes_mut' { + tag "assign" + label "shorttime" + publishDir "${params.outdir}/${params.name}", mode:'copy' + + conda 'conf/mpraflow_py36.yml' + + input: + val(name) from params.name + val(mapq) from params.mapq + val(baseq) from params.baseq + val(cigar) from params.cigar + file(fastq_bc) from params.fastq_bc_file + file count_fastq from bc_ch + file count_bam from ch_merge + file bam from s_merge + file(variants) from params.variants_file + output: + file "${name}_coords_to_barcodes.pickle" into map_ch + file "${name}_barcodes_per_candidate-no_repeats-no_jackpots.feather" into count_table_ch + file "${name}_barcode_counts.pickle" + + shell: + """ + echo "test assign inputs" + echo ${mapq} + echo ${baseq} + echo $fastq_bc + zcat $fastq_bc | head + + echo ${count_fastq} + echo ${count_bam} + cat ${count_fastq} + cat ${count_bam} + + python ${"$baseDir"}/src/nf_ori_map_barcodes.py ${"$baseDir"} ${fastq_bc} ${count_fastq} \ + $bam ${count_bam} ${name} ${mapq} ${baseq} ${cigar} --mutations ${variants} + """ + } } + /* * Filter barcodes for minimum coverage and unique mapping * contributions: Gracie Gordon */ - -process 'filter_barcodes' { - tag "$filter" - label "shorttime" - publishDir "${params.outdir}/${params.name}", mode:'copy' - - conda 'conf/mpraflow_py36.yml' - - input: - val(min_cov) from params.min_cov - val(min_frac) from params.min_frac - val(out) from params.name - file(map) from map_ch - file(table) from count_table_ch - file(label) from fixed_label - output: - file "${out}_filtered_coords_to_barcodes.pickle" - file "${out}_original_counts.png" - file "original_count_summary.txt" - file "${out}_filtered_counts.png" - file "filtered_count_summary.txt" - - shell: - """ - python ${"$baseDir"}/src/nf_filter_barcodes.py ${out} ${map} ${table} \ - ${min_cov} ${min_frac} $label - """ +if (params.variants_file == null) { + process 'filter_barcodes' { + tag "$filter" + label "shorttime" + publishDir "${params.outdir}/${params.name}", mode:'copy' + + conda 'conf/mpraflow_py36.yml' + + input: + val(min_cov) from params.min_cov + val(min_frac) from params.min_frac + val(out) from params.name + file(map) from map_ch + file(table) from count_table_ch + file(label) from fixed_label + output: + file "${out}_filtered_coords_to_barcodes.pickle" + file "${out}_original_counts.png" + file "original_count_summary.txt" + file "${out}_filtered_counts.png" + file "filtered_count_summary.txt" + + shell: + """ + python ${"$baseDir"}/src/nf_filter_barcodes.py ${out} ${map} ${table} \ + ${min_cov} ${min_frac} $label + """ + } +} else { + process 'filter_barcodes_mut' { + tag "$filter" + label "shorttime" + publishDir "${params.outdir}/${params.name}", mode:'copy' + + conda 'conf/mpraflow_py36.yml' + + input: + val(min_cov) from params.min_cov + val(min_frac) from params.min_frac + val(out) from params.name + file(map) from map_ch + file(table) from count_table_ch + file(label) from fixed_label + file(design) from fixed_design + file(variants) from params.variants_file + output: + file "${out}_filtered_coords_to_barcodes.pickle" + file "${out}_original_counts.png" + file "original_count_summary.txt" + file "${out}_filtered_counts.png" + file "filtered_count_summary.txt" + file "label_mutExpand.txt" + file "design_mutExpand.fa" + + shell: + """ + python ${"$baseDir"}/src/expand_variants.py ${design} ${label} ${variants} design_mutExpand.fa label_mutExpand.txt + python ${"$baseDir"}/src/nf_filter_barcodes.py ${out} ${map} ${table} \ + ${min_cov} ${min_frac} label_mutExpand.txt + """ + } } - - /* * Completion e-mail notification */ diff --git a/conf/cluster.config b/conf/cluster.config index 3de117e..fcc7f3b 100644 --- a/conf/cluster.config +++ b/conf/cluster.config @@ -42,20 +42,20 @@ //} //uncomment for SLURM cluster -//process { -// withLabel: longtime { -// executor='slurm' -// //queue='long' -// clusterOptions = '-t 3-00:00:0 --mem=6G' -// } -// withLabel: shorttime { -// executor='slurm' -// //queue='short' -// clusterOptions = '-t 00-01:00:0 --mem=6G' -// } -// withLabel: highmem { -// executor='slurm' -// //queue='short' -// clusterOptions = '-t 00-20:00:0 --mem=80G' -// } -//} +process { + withLabel: longtime { + executor='slurm' + //queue='default' + clusterOptions = '-t 3-00:00:0 --mem=6G' + } + withLabel: shorttime { + executor='slurm' + //queue='default' + clusterOptions = '-t 00-01:00:0 --mem=6G' + } + withLabel: highmem { + executor='slurm' + queue='bigmem' + clusterOptions = '-t 00-20:00:0 --mem=80G' + } +} diff --git a/conf/global.config b/conf/global.config index ed02699..af17dd3 100644 --- a/conf/global.config +++ b/conf/global.config @@ -1,5 +1,5 @@ //MPRAflow version -params.version="2.3.4" +params.version="2.3.5" // nextflow version required params.nf_required_version="20.10" diff --git a/conf/mpraflow_py27.yml b/conf/mpraflow_py27.yml index 65e78db..843dcc3 100644 --- a/conf/mpraflow_py27.yml +++ b/conf/mpraflow_py27.yml @@ -4,37 +4,10 @@ channels: - defaults - conda-forge dependencies: - - _libgcc_mutex=0.1=main - bcftools=1.9=h68d8f2e_9 - - bzip2=1.0.8=h7b6447c_0 - - ca-certificates=2019.11.27=0 - - certifi=2019.11.28=py27_0 - - curl=7.67.0=hbc83047_0 - - gsl=2.5=h294904e_1 - htslib=1.9=h244ad75_9 - krb5=1.16.1=h173b8e3_7 - - libblas=3.8.0=11_openblas - - libcblas=3.8.0=11_openblas - - libcurl=7.67.0=h20c2e04_0 - - libdeflate=1.3=h516909a_0 - - libedit=3.1.20181209=hc058e9b_0 - - libffi=3.2.1=hd88cf55_4 - - libgcc-ng=9.1.0=hdf63c60_0 - - libgfortran-ng=7.3.0=hdf63c60_0 - - libopenblas=0.3.6=h5a2b251_2 - - libssh2=1.8.2=h1ba5d50_0 - - libstdcxx-ng=9.1.0=hdf63c60_0 - - ncurses=6.1=he6710b0_1 - - openssl=1.1.1d=h7b6447c_3 - perl=5.26.2=h14c3975_0 - - pip=19.3.1=py27_0 - pysam=0.15.3=py27hbcae180_3 - python=2.7.17=h9bab390_0 - - readline=7.0=h7b6447c_5 - samtools=1.9=h10a08f8_12 - - setuptools=42.0.2=py27_0 - - sqlite=3.30.1=h7b6447c_0 - - tk=8.6.8=hbc83047_0 - - wheel=0.33.6=py27_0 - - xz=5.2.4=h14c3975_4 - - zlib=1.2.11=h7b6447c_3 diff --git a/environment.yml b/environment.yml index 495ee18..b8598c7 100644 --- a/environment.yml +++ b/environment.yml @@ -3,4 +3,6 @@ channels: - bioconda - defaults dependencies: - - nextflow=20.01 + - nextflow=20.10 + - samtools + - conda<4.7 diff --git a/examples/example_design.fa b/examples/example_design.fa index 95e1a3f..03d6214 100644 --- a/examples/example_design.fa +++ b/examples/example_design.fa @@ -1,400 +1,3 @@ ->LCL_positive_ONLY_rs113331329 -ggattacgagcgcgagccactgtgcctggccatctctactaaaattacaaaaattagccaggtgtggtggcacatgcctgtaatcccagctacattaggaggctgaggcacgaaaattgcttgaactcaggaggcagaggttgcaatgagctgagatcgcattactgcactcaagcctg ->LCL_positive_ONLY_rs2300644 -tacacaggtgtgtttaccacatatttgctaaaactgtgttttaacaatttccataattttgacaaatcttatcagtgaaaaaataaaatactattataattttatttgcaattctttgattactaataaagtcaaagatctctttaggcttatcagccagtctgtattttatttagcca ->LCL_positive_ONLY_rs1166698 -TTTCCTTTATGACTAAAAGGTGGGTTTTCATAACGTTTTCTTAGGATGATGAAATAGAAAGTGAAGCAAAAAAAGAATCACTTTCTCCCGGAAAATTGAAACTAACTTTTGAAGAACTGGAGCGACAAAGACAAGAAAACCGAAAGAAGCAAGCTGAAGAGGAAGCAAGAAAACGTTTA ->LCL_positive_ONLY_rs7537908 -tccctgacatctggtaggggccttggcacttgcaggaaggccttggcattctgttgcacataaacaattgcacagcatgtcaacagcagataagactgctctgACTGTGGtaatttctcctaccttacagataggtttactaagattcctggtcatagaattgcacccatttcctgaca ->LCL_positive_ONLY_rs17499247 -GAAATGTTACGATCTCCCAATTCAACATTATAATTTTACTGCAGGATAAATAAAGAAAACAGGAAAGGAGGAAAGCATTGATTACAAATATCTTAACAATGAGCAAATGTGCAAGGAAAAAATATATTAAGACAAATCCAAGGTAGGTAGATGTACGTTTTTCTGTTCTTTTGAAAATT ->LCL_positive_ONLY_rs11818000 -GTGGTGCACAGACCTGGGTTACCCAGGCACCTGTGGGGGATGCCCCGTGTGGGCAGAGACGTGAGTGCGTGTGTGTGCTCAGGTTTACATCTGTGTGGGCAGAGACGTAGAGGAGCTGGATACATATATACTTGAGGACAGACACAGGAGGATGGTGGGGGTGAGCAAACAACCCAGGG ->LCL_positive_ONLY_rs34481144 -GCATCTCATAGTTGGGGGGCTGGCCACTGTTGACAGGAGAGAAGAAGGTTTGGACAGTGTGATTCATGGTGTCCAGCGAAGACCAGCGGCGGTCGGGTTACTGGGATGGTTCTCAGTGAGCCCTCCCTTTCCCCAGTAGTTTCGGTTTCTCAACAGTTTCCTTTTCCTGGCATTTGTCA ->LCL_positive_ONLY_rs10896949 -tatggtggcagtcaagaaagaatgagagccaagtgaaaggggaaaccccttataaaaccatcagatcttgtgacacttattcactaccatgagaacagtataggggaaactgcccccatgattcagttatctcccactgggtccctcccacagcatgtgggaattatgggaactacaat ->LCL_positive_ONLY_rs3884627 -ATGGGAGTAGAGCCTTATTTTTAGACCCAGGGTTGATTTCGTGAGACTTTCTGGGAAAGTATTTATTGAAAGGGGAAGTCCTAAGAGATTAGAAATTTACCCAGTAATTGACGAATCTGATACTTAGAGAAAAACCCCCTTCCCACAAGGGGCTTGTATCTGCTGACCCTGCACACTTG ->LCL_positive_ONLY_rs2731096 -TTCCCTTGAGGAAATGTGATAATACAAACAGAAAATTGTATCTGCTAGAACTTGTCTCAGCAAGAATACTCTATGTGGTTGTATGAAATGCTGATAATGACTAAGCTGGGGAGAATCCCCTAGTGATGTAGTAAACTGCCTTATTAGGTGAGTATGTCTGTTAACTGCAGGGAAGTATG ->LCL_positive_ONLY_rs7298123 -tgcctgtaatcccagaactttgggaggcagaggcgggcagatcacctgaggtcaggagtttgagacaagcctggccaacatggcaaaaccctgtctctactaaaaatacaaaaattagcagggcgtggtggtgggcgcctgtagtcccaactacttaggaggctgaggcaggagaattg ->LCL_positive_ONLY_rs4766666 -agggaaagagtacaaaagagataaattttaaagctgggtgtccagggcagacatcacatgtcggcaggttctgtggtgccccctgagccataaaaccagcaagtttttattagcaatcttcaaagggaggaaatgtacatatagggtgtgggtcacagagaacacatgattcaagggcg ->LCL_positive_ONLY_rs7147964 -ATTGCAACATTCTATAGAAGGGAAAATCCAGCAACCTATTTTTAGTAGAGAAGTCTGGTGATCTTTGGCCACATGATGTCACTGGTGGTCCAGAAGATTTTTTGCCATCTCTGCCTCAGTGCCATGGGCAAGTTCCCTCCATCAACCTAGTCACGACCTCTTATTGTTCAAATGTAGGC ->LCL_positive_ONLY_rs11629326 -aatgactgtaagagCAGTCAAAGTCCTGTCAACTTGCAGCTGTTTTCTGGCACCAGCTCAAAGTGAGGCTACACCCTTCTCCCTCTTGCACATGAGTCAGACTCTGTCACTACTGGGCTGCTTGCCCACTCAGCTTAAAACCGTCTGGCACACCTCCCAGCAAGAACATGGATTCTATT ->LCL_positive_ONLY_rs9673012 -tctatgaagccatccttgactcctgtaagacagaccagggaggtgttctgtgttaccacagctttctgagtctgctctcccaGAGTGCTCAGGCTGTGTCAGAATTGCTCCTTGATTTAGCTGGGTCCTGTACGCTTATCTCCTCCCTCTCACCTTCATCTTGTCTCCTCTATCACAGA ->LCL_positive_ONLY_rs11636538 -CATAGCCTCCCAGTCAGAGAAGCTCAGGAAGGCCCTAAGCCTGAGAAGCAGAGTTCCCCTATCAGTCTGACTGCTTTTCTCATGCGAAAACTTCTGGTGAGTCACGTTTTCCTGGCCATACACCTTTATACTGGAAGCAAACATATAAACAAGTCAGATCTTTTTCAAAGCAGATGGAT ->LCL_positive_ONLY_rs28786199 -tattatgtatctcagagtagctgcaagagaagacttgaaatgttctcagctcatagaagtgataaatgctcaaggtgatgatggataacgcagataccctgacttgatcgttacacattctgtgcatgtaatagatactcacatatacccagtaaacatgtaaaatatcacataccaat ->LCL_positive_ONLY_rs80347459 -ACCCAGGCATGCTCATGAACCTTTTGAAGTTGGTCTCCTCCCAGTTGATTCAGACTCAGTCATGTCAGCCCTGAGCCAGCAAGCCTCCACAAGCACTGGGCACTCCAGTGTGTCACCAGAGAAGACCAGGGACTAGAAGAGCCCCAGGTGCTCAAAGCAGTCGCACAGGATTGCCCACG ->LCL_positive_ONLY_rs66659109 -CCTTTTGAAGTTGGTCTCCTCCCAGTTGATTCAGACTCAGTCATGTCAGCCCTGAGCCAGCAAGCCTCCACAAGCACTGGGCACTCCAGTGTGTCACCAGAGAAGACCAGGGACTAGAAGAGCCCCAGGTGCTCAAAGCAGTCGCACAGGATTGCCCACGACTTTGGCTATTCCCACCC ->LCL_positive_ONLY_rs11865038 -TCTTGTCCTGAGAATGGCCAGGTCCCCTGTCAGCAGCTGGTTGGTTGGCCTGTGGGGAAGGAAGGAGGGTGGAGTTGTCCTCATCCTCACGGCTTTGGTCCCTCCCTCCCTCCCCATTCCTCGAAGGAACAGGGTCTGTCTTGGCCGCCATGacagatgagaatactgaggctcaaagc ->LCL_positive_ONLY_rs2036338 -AACTCAGATGTTTCCCATAAGATTGCAATAAGAGTTGCTAAGCTGAGAAATGAGATTTGTTTTCAGTTTCAGAGAAAATGAATCATTCAGTCAGGGCAGGAGACGATAAAGAAGAAAGAAGAAAAGGGACTGTTTTCAGACCAAGACAACATTTGGACTAAAAACTTGTTTCCTTAAAT ->LCL_positive_ONLY_rs77453980 -ACAAAGTGAGTCATCTCAAGTTGAGTAACTTGTTCTGAGTAAGATAATGACTCACTGAGAAAGCCTGAAATACCATCTAAATGGCTTCTATTAGAGCCCACTGCTGTCTGTGGACTGTTTTATCAGTGTGATTTATTTACAGGTCTGCTTCTAAATTGCGTATTTGTTCCATCATTTTG ->LCL_positive_ONLY_rs76154201 -ttaccaaaaacaacacaacataaaccaaaaaatcttgctgggatttttcactgggtttgcattgaatcattgtgtctaacattgactcttctaacacataaatacagcatacatctccgttcacttatgtaatctttttcagaagttttatggtttcagcaataaagtcattaacgtct ->LCL_positive_ONLY_rs75530705 -caacataaaccaaaaaatcttgctgggatttttcactgggtttgcattgaatcattgtgtctaacattgactcttctaacacataaatacagcatacatctccgttcacttatgtaatctttttcagaagttttatggtttcagcaataaagtcattaacgtcttttgttagatttatt ->LCL_positive_ONLY_rs6503804 -CCGAGCCCCCTCTCCGGGGTGCACATAACGCTCCAGCGCGCGCACACACCTCGGACAGTCCCCTCGCGTCTCACGTGACACGCGGCCCGGACGTGCCACCCTGCCCACGCGCTCACGCACGCGTGCACACACCAGCCTCCTGGTCGCTGCCCCACTCCGTCCCGACGCCCCTGCGACCC ->LCL_positive_ONLY_rs9966367 -TGGGATTATCATAAGCTTTTGGAGTAGACAGGAGCCACACCTAGGGCATGATTTAGAGGTGGGGAAGCTGTTTGGTTAAGTGTAAGTTTATGGCTAGGGGGGATTCCCACAGTATCACTATTAACTCCAAGTTATGATGAAACCAAAACTTTGTCTACACATGCCTCTCTCATGATTAA ->LCL_positive_ONLY_rs2005271 -aaaaTTTTTTTtgaaaataacaaaactcgctaccttacagattaattgtgagaaatgaataaatgaatgcgtatgaggggctttgtaaacGGCTGAGCACCACCCCAAAAAAGTCCCTAACTTTTGGCTTTTCTAAGATTGCCTTTTTAGGGCTTGATGTGGGTTGGGCTCTTCCCCAT ->LCL_positive_ONLY_rs73926986 -TACCCTTTTCCTGTCTAAGGTTGCTCCTCACCACCCAACCAGGGGTAAGGTGCTTGCTACTGTCTCCTCACTCACATCCGGTCTAGATTAGGTCAGGATGGAGCAGGGGAACCCCAGAGTCCTGCTTGCTGTGCTGGGGTTTGTTCATATAGAAACAAAGAGCCCATCCAAGTGATTAA ->LCL_positive_ONLY_rs12996507 -ccttctctaagcctcagtttcctcatttggaaaatgtgaatagtagctacctcagagttgttgggaaagtaaaatggcatgatacattgcaaagtggttagtatagagcctgacccataagcactcattaaatgttagctattattTACTCCTGGTTCAGATCTTTCTCCCAAGTTGCG ->LCL_positive_ONLY_rs12713819 -cattcagtctattaccattagactattatcattccctttttgtccaatcccatttctttttctttttaagacacagggtctctctctgtcgcccaggctggagtgcagtggtgcaatcattgctcactgtaaccttgaactcctgggctcaagcaactctcccacctcagcctcccaag ->LCL_positive_ONLY_rs61419297 -gcatactacattttgttatccatttaactgttaatggacactggggttgtttttaccttttggctatcgtgaataatgttgtcacagacgttggtgtgaaaatactggtttgagtttcactttcagttcttttggatatatactcagaattagagttgctgagtcataggataattcta ->LCL_positive_ONLY_rs3744749 -aacagcacccagcccagcccactgtcaagggctgttgcaggaatATGACAACAGCCACCAATATTTGCATAGCAGAGATGCCCAGTTTCGTTTTCTATTTGAAAGTTTCTCTGAAGGGGGATGTGCTAGAGACACGAGAACAACTGCTACCATCTTAATAACTTTTCTGGCAATACACG ->LCL_positive_ONLY_rs9616392 -GCCACACAAAGTGcagtcatgtgtaacgaccaggcgtgttctgagaaatgcgtcattaggcgactttgtcatggtgcaaacatcacagcgtgcactccacacgccacacgccacacgcctagatggtgcagccgctctgcacctaggctgcctggcaggccctgctgctcccaggccgc ->LCL_positive_ONLY_rs1707988 -taggttttacttttacagtattatccattttaagttaattttttgtaatggtgtgacgtatgaattgaggttcaatttttttgtatatggtttgcatatggctcaatttttttgtaattatttcaacatcatttgttgaaaagtttctccactgaattacctttgcaatttggccaaaa ->LCL_positive_ONLY_rs62359378 -cccaatatggctgagcttggggctttttatgggctcagaatagggagtgtgcgctgattggtttgtgagtatccaaaaaaggtcaaagtgaagacgccactcaaatatgggcgtgacagtgtagaaaactaattaggaaagggtagatatgtgtaaaataggtgtagaatggggaacaa ->LCL_positive_ONLY_rs61354333 -aaaaaaaaGTCATATCACCAATGAGATCTGACCACCCTGCCTAAAATAGATTATCCAGTGtgtggtagttttgtaatgtgtcaacctgcgtaggtggaacatttcctagaattcattttctggtatgcttctagttagagtgggtcacaaggaagattcttgagaaatttggagggtgg ->LCL_positive_ONLY_rs13186030 -tcctgcctcagcctcccgaatagctgggatgacaggtgcgtgccaccacgcctggctaatttttgtatttttagtagaggtggggtttccccatgttagccaggctggtctcgagctcctgacctcatgatccgccctcctcggcctcccaaagtgctgggattgcaggcgtgagccac ->LCL_positive_ONLY_rs1904841 -tcaaactcctgagctcaggtgatcctcccgcggcttcacaaagtgctgggattacaggcatgagccaccgcgctcagccAAAAAAGTTACTTTTAAAATGTGAAAAAAGAAACTGAAAGTAGAGTTCTAGTCTGTCACCAGTTACTTCCAGTTACCATCCCAGGTGGAAAGCAACAGCT ->LCL_positive_ONLY_rs9274607 -TAGGGGCAATACtaatggttataaagcaattagaacaacgcctggcaaacattaCTTCTGACCTCAACCAAGACAATAAATATCTCCACTTCTCTTCTTCTCTCCCTTTCTCTCTTTCTTTTCCCAAAATGTTAGGTTCTGCTTTTAAAGTAGAGAATACAATCTAAAATCAGAATATA ->LCL_positive_ONLY_rs9357934 -gcagagtggagtgtagattattttattacaaaattaggtggcaaaggactgtggggtttgtttgtttgtttgagccagagtcttgctctgtcactcaggctggagtgcagtggctcaatcttggctcattgcaacctctgcctcctgggttcaagcgattcccctgcctcagcctcctg ->LCL_positive_ONLY_rs9450712 -aagaagcgaaaggggagttctgaatatttttcttactactggaggtttctgtgaggttcagccccccacaatggggatttctcacctcactgaggttcaatgtacccctattgggatttttcacctcttttgaggttcaacccctcgtaatggggatttctcagttctttgaggttcag ->LCL_positive_ONLY_rs113906222 -TGAGAAGCAGGGCAGCCCCGCCCACGAGAGAGGAGGCAGGCTCTTGGGGTTTGCCAACAAGTGACAGTGTGTGTCATCTGTGCACGTGTCTGTGCGTGAGCATGCCTGTGTGTTGCACCCCAAACCTCCATAATACCAACAACACTCAACTGCCAGCCACGCAGGGAATGGGGAAGAGG ->LCL_positive_ONLY_rs3807866 -GGGCAGCCCCAACCCACAGCGAAGAGCTTAAGTACTGGGATCCGAGACGGGATTTCACACACCTGTAACTAGCACCAGAGTTAAGGGGTGGGGAGGGGCAGCGTGGGCAAAGCGAAACGAAACCGAGCAAAAACAAAAACTACGGCCTTGCCGCTTGCTGTCTCCTTCTAGTTGGCCAG ->LCL_positive_ONLY_rs3763469 -CAAGTTATATGGAAAAACAGCTGCAATTAGAACTTGATTCTCACTTTAAGAAAGAAAGATTCTTGTTTGGTTTTCTCCACTTTCATTTTTTGTTTCTAGGTCCAGGGCCTCCCACCAAATGCTGACGGCTGCCTGCTTCAAACCCTGCCACATCAGCAGGGAGGGAGCACAGCGGCATT ->LCL_positive_ONLY_rs10282336 -tcttttaagtttctcatcagccaactggtatcactggctcaggcaattgcaatgttaaacagttgtcactgattattttcaacaaacaggataggactattttcactgaggcatttctgagtcatatcaaatgaagaaaagccctgaaaatagtgcctttctaaggagctgccagacag ->LCL_positive_ONLY_rs2487161 -TCTGGGGACTGGTTATCGACTGGAGTTTTTGGGGAATTTGAACGATGACTCACACTTTCAGTCTGGTGGGTTTTCTATTTTACTAGGAATCCTtgacaaaggctctctcttctgaccaaactttagtcagtctcctgagtcctttctgattaggcgcagccttgggctcctctctcttc ->LCL_positive_ONLY_rs6472538 -CTATTTCTTCCTCTCAATGAGATAGTTATAAAAGGGTGTACTGTATTCACAATGACACAGACTCTTATCACTAGATTAGAAAATGCCACATATGACTAACAGGTGATTTCAGTTTCATTTTAGTATTCGATCTTAAAGGTTTACACATCCTTTAATTATACTTGTTTTTAGTGAAGAAG ->LCL_positive_ONLY_rs2277138 -GGGCAGTAAGTAAGTCTCATCCTCTCAAATTAGTGGGTGATTAAATGGAGAGCCCCACCAATGCACCAGCTCTGTGTTCTGACTCAGACTGTGACATCAGGCTGCCCTTGGCACAGACTTGCCAAGGGTCTTGCCTTTTCCTTTCCCGCTTGGTGCAGTCACCTGGAATTGCTTCGTGT ->LCL_positive_ONLY_rs11265934 -tccccatgtgttgtgggagtgactgggtgggagataattgaatcatggggctgggttttcccgtgctgttctcatgagagtgaataagtttcatgagatcagatggttttatgaaggggagttccctggcacatgctctcttgcctgccgctatgtaagacaggactttctcctcattc ->LCL_positive_ONLY_rs10820682 -gtaacattgtgcttttaatcatttgttgcacataaaacagtcacagcattaccaacattaccttcagtaacacagttactgaaaagtTATGTCGAGGAGGGAGATGAGTAATCAAATTTGTATTTCAAAATCATTTGAAATAGTTCTCTTGATGGTTATATCACCAGTTTCATAAACAT ->HepG2_positive_ONLY_rs4648649 -GGTGTGTGCCATTATCAAAAGAAAAGAGAGGCTggccaggtgtggtggctcatgcctataatcccagcactttaggaggccgtggtgggtggatcacctgaggtcaggagttcaacaccagcctggccaacatggtgaaaccccgtctctactaaaaatacaaaaattagccaggtgtg ->HepG2_positive_ONLY_rs4240913 -ggccgggcacagtggctcacgcctgtaatccaagcacttggggaggccgaggcgggcagatcacgagatcaggagttcaagaccagcctggccaatatgatgaaaccccgtctctactaaaaatacaaaattagtcgggtatggtggcacatgcctgtaattccagctgcttgggagcc ->HepG2_positive_ONLY_rs12062022 -cctgtaatctcagtactttgggaggccaaggcgggtggatcacttgagagcaggagttcgagaccagcctgaccaacatggtgaaaccccgtctctactaaaaatacaaaaatcagccaggcatggtggcatgtgcctgtaatcccaactacttgggaggctgaggcaggagaatcgct ->HepG2_positive_ONLY_rs7550238 -gtggctcatgcctgtaatcccagcactttgggaggccgaggcgggtggatcatctgaggtcaggagttcgagaccagcctggccaacatggtgaaaccccatctctactaaaaaatacaaaaattagctggtcgtggtggcaggcaccttaatcccagctacttgggaggcagaggcag ->HepG2_positive_ONLY_rs35997236 -aactgcatgctcatccaaaacatgacaatttcaataagtatcttcaactccaaaataaatctcaaaacgttttcacttgtaccttaaccctccacagccactgctctTCTCACTGGTCTCCTTGTGTCTCTCTTCTCCAATCAACAGTCTGATTCTCAATCAGACCTCTCAGTTTAGAA ->HepG2_positive_ONLY_rs12771399 -CTCATCTCCAAGGGCTTGTTTACTGCTTCATACATAAAAAGAggccaggtatgatggctcatgcctgtaatcccaacactttgggaggccgagatgagatgattgcttgagcacaggagttcgagaccagcctgtacaacatagggagaccatgtctctatgaaaaattaaaaacttag ->HepG2_positive_ONLY_rs56232455 -CCGGTTCAGGGCTCCTGCCAGGCCTGCGCTGACGCCTCCACACTCCAGGCCCCACAACCGTTCCCCCAAAGCCCTCACCTCTgccgggcgccgcggctcacgcctgtaatcccagcactttgggaggctgaggtgggaggatcatgaggtcaggagatcgagaccatcctggctgacac ->HepG2_positive_ONLY_rs28681202 -GGGGGAAGGGCCAGTGATGGGTCTGGAGCGAAGGCCAAGAGTGGGGCACCTGGGGGAAGGTATGAGGTCCCTCCCAGGATCAGACTGTCCCGAAATGTCGTCTGccttgggaggccaaggcgggtggaacatctaaggtcaggagttcgagaccagcctggtccacatggtgaaactct ->HepG2_positive_ONLY_rs28678266 -CCAGTGATGGGTCTGGAGCGAAGGCCAAGAGTGGGGCACCTGGGGGAAGGTATGAGGTCCCTCCCAGGATCAGACTGTCCCGAAATGTCGTCTGccttgggaggccaaggcgggtggaacatctaaggtcaggagttcgagaccagcctggtccacatggtgaaactctgtctgtacta ->HepG2_positive_ONLY_rs61891085 -ctgtaatctctacattttgggaggccgaggcaggcagttcaccttaggtcaacagttcgagaccagcctggccaacatggtgaaaccccgtatttactaaaaatacaaaaattagctgggtgtggtcgcatgtggctgcaatcccagctactcaagaggctgaggcaggagaactgctt ->HepG2_positive_ONLY_rs11231026 -GGCTGGAACTCCTCCGGTCAAGATCAAGTGGAGCCTCTCCTGCGGggctgggcgtggttgctcacacctgtaatcccagcactttaggaagccgaggcgggcagatcaactgaagtcaggagttcgagaccaaccaggtcaacatggtgaacactccgtctctactaaaaatacaaaaa ->HepG2_positive_ONLY_rs7125402 -TAAAGACCTGctttgggaggccaaggcgggcagattgctttgagctcacgagttcaagaccagcctgggcagcatggcaaaactctatccctacaaaaaacacaaaaattagccgggtgtgatggtgtgcacctgtagtcccaggtactcaggaggctggggtgggaggatggcttgaa ->HepG2_positive_ONLY_rs2513052 -GCTGAAAAGAGGGAAAGCAAAGATTATGGGGGGAAAGGTGGGACTTTTAACAGCGTACTGTGGACCTTGGCTTTTCCCAGCCAAGGTCACTACCGGCAAGCAATATTCCCAGCATCCTCTGCTATAATCCTCTAGAATAAATAACTGTTTGGGAACTTCATACAAAAAGCTGACTCCCA ->HepG2_positive_ONLY_rs56364420 -ggccatgtgaagatgtgcctgcctcccctttgcagtggctcatgcctggaatcccagcactttgggaggccgaggtgggcagatcacgaggtcaggagatcgagaccatcctggctaacatggtgaaaccccatctctactaaaaatacaaaaaattagccaggtgtggtggtgggcac ->HepG2_positive_ONLY_rs11064202 -gaaggcccatgcctgtaatcccagcactttgggaggccaaggcaggtggatcacctgaggtcaggagttcaagaccagcctggccaacatggtgaaaccccctctctactaaaatacagaaattagccgggcatgatggcaggtgcctgtaatcccagctactcaggagactgagacgg ->HepG2_positive_ONLY_rs56201260 -GTCAAATATCAAGTGCACTTTCATTAAAAATGGCAAATGTTggctgggcacggtggctcacacctgtaatcctagcactttgggaggccaaggcaggtggatcacctgaggtcaggagtttgagaccagcctggccaacatggtgaaaccccatctctactacaaatatataaattagc ->HepG2_positive_ONLY_rs11611123 -CTGACACTAAAGAGAACATGCCCACAATgctgggcgtggtggctcatgcctgtaatcccagcactttgggaggccgaggcgggcggatcacgaggtcaggagattgagaccatcctagctaacacggtgaaaccccgtctctactaaaaatacaaaaattagccgggcatgttggcggg ->HepG2_positive_ONLY_rs11168904 -gtggcttgcttttaaacaggctgggcatgatggctcactcctgtaatcccagcagtttgggaggccgaggcagaggatcacttgaggccgggagtttgaaaccagcctgattaacatagtgagaccctatctctgtgtgtgtgtgtatgtgtatgtgtgtatatatatatatacacata ->HepG2_positive_ONLY_rs11065917 -TTAATGTCTCAATACATTCGTAATAAAAAGTTACCGCAAggctgagagcgatggctcacacctgtaatcccagcactttgggaggccgaggcgggtggatcatttgaggtcaggagttcaagaccgggctggccaacatggtgaaaccccatctctattaaaaatacaaggccgggcgc ->HepG2_positive_ONLY_rs7133199 -tgagccactgtgctcggcACGTGCATTGATTGTCATGACTGCATCTTAAGAAGAAAAAAGGAAGGGTggccagacgcagtggctcccacactgtaagtgctttgggaggccaaggcgggggaatttcttgagcccaggagttcgagaccaacctgggcaacacagcagatactgtctct ->HepG2_positive_ONLY_rs10773820 -GCCACATTAAAAGACATCCTTCATTTAAAATAAAACGACCTggccaggtgtggtggctcacgcctgtaatctcagcactttgggagtccgaggcggatggatcacctgaggtcaggagtttgagaccagcctggccaacatggcgaaaccctgtctctactaaaaatacaaaaaccagc ->HepG2_positive_ONLY_rs10773821 -ATTAAAAGACATCCTTCATTTAAAATAAAACGACCTggccaggtgtggtggctcacgcctgtaatctcagcactttgggagtccgaggcggatggatcacctgaggtcaggagtttgagaccagcctggccaacatggcgaaaccctgtctctactaaaaatacaaaaaccagccgggc ->HepG2_positive_ONLY_rs10132396 -aatcaattacctaaccttccacttaagacaatggaaaaatgggccaggtatggtggctcacgcctgtaatcccaggaccttcggaggccgaggctagcagatcactggaggtcaggagtttgagaccagcctggccaacatggtgaaaccctgtttctaccaaacaatgcaaaaattag ->HepG2_positive_ONLY_rs6574205 -TACCAGAATCACCTTTTATGGAGAGTTCTAAAAATTCTCTTATGTAATGGGTATCCAGACCTGCTACTTTAAAGATCGGTAGTAggtcgggtgctgtggctcagactgtaataccagcactttgggaggccaaggccagtggatcacctgaggtcgggagttcaagaccagcctgccca ->HepG2_positive_ONLY_rs12883876 -caaatacatctctaaaaacaatgttttctgctgggcagggtggctcatgccggtaatcctagcactttgggaggtcaaggcaggcaggttgcttgaggtcaggagttcaagaccatcctggccaacatggtgaaaccctgtctctactaaaaatacaaaaaaattagctgagcatggtg ->HepG2_positive_ONLY_rs7171665 -gcttacgcctgtaatcccagcactttgggaggccgaggtaggtggatcacttgaggtcaggagttcgagaccagcctgggcaaaatggtgaaacccagtctctaattaaaaacataaaaattagccaagtgtggtggcgtgtgcctgtaatcccaactacttgagagactgaggcacaa ->HepG2_positive_ONLY_rs28781888 -gacagggtttcaacatgtaggccaggctggtcttgaactcctgacctcaggtgatctgccagcctcagtatcataaagtgctgggatttcaggtgtgagccactgtgactggccAGAAGCATTCTCTTTAAAAATCAGTGGGCATCCAgctgggtgtggtggctcacgcctgtaatccc ->HepG2_positive_ONLY_rs11643316 -CCAAATAAAGGTGCTCCCTTCTAAGTGGCTTACAAGGGTAATTTTTTTCCCCCCAGTAATGACGAGGAGGGATATTTCCTCCTTGTGGGGCTTCAGACCTTAGTCTTTGACCTTTGGCCTTCTGAGCACTGTGCTGTGTTAACACAGTTAATCAGCAACTGTACTTCCCAACCGGAGAT ->HepG2_positive_ONLY_rs113333037 -gtggctcatgcctgcaatcccagcactttgggaggctgaggcgggtggctcaccgaaggtcagaagttcgagaccagcctgggcaacatgatgaaacccccgtctctacaaaacatagaaaaattagccaggcttggtggtgggcacctgtaacgccagctactcgggagactgacaga ->HepG2_positive_ONLY_rs1634802 -tggctgtaatcccagcactttgggaggccgaggtgggcagatcacctgaggtcaggagttcgagaccagcctggccagcatggtgaaaccccatctctactaaaaatacaaaattacccgggtatggtagcacgcgcctgtaatccaagctactggggaagctgagacaagagaatcac ->HepG2_positive_ONLY_rs8066475 -tggacaacacagtgagacctagtttgttaaaaaataagaaTAGggctgggcgcggtgactcacgtctgtaatcccagtactttggaaggctgaggcgggcagatcatctgaggttgggagttcaagaccagcctgaccaacatggagaaaccccatctatagaaaaaatacaaaaatta ->HepG2_positive_ONLY_rs9913356 -tgagattgtgccactgcactccagcctgggcaacacaggaagactccatctcaaataaaataaaataaaCTAAGAcggtggctcaagcctgtaatcccaacactttgggaggctgaggcaggcagatcacgagatcaggagatcgagaccatcctggctaacacggtgaaaccctggct ->HepG2_positive_ONLY_rs55668363 -CAGCCCCCACCCTTTGGAGGCCATATCCACCCTCCCCTCCTGAAGTGTCACTGGGCCTCAACTCTGCCCCAACATCCTCTCTCTCTCGCGTGCTCTCTCTCTCTCCTCCAAAAAGCAAATATTTGGAAGGGGACTTTGACATCTTCTTTCTAGTTCAGATGTTTATatctctttgagtc ->HepG2_positive_ONLY_rs113313477 -tgactcacgcctgtaattccagcactttgggaggccaaggtgggtggatcacaaggtcaggagatcgagaccatcctggctaacatggtgaaaccccatctctactaaaaatacaaaaaagtagccgggagtggtggcaggtgcctgtagtcccagctactcgggaggctgaggcagga ->HepG2_positive_ONLY_rs112493812 -aaataaacaactgatttttgtacgttgatcttgtatcctccaactttgcaaaattaatttattactattaagactttGTggctgggcacggtggctcatgcctgtaatcccagcactttgggaggccaaggcgggcagatcacgaggtcaggagatcgagaccatcctggccaacacgg ->HepG2_positive_ONLY_rs903567 -gagcagaggttcatagagcttaaacaactaggatttccctacctaggatatggtagagGTAAAGAGTATTCCAGcgggcagcagtagctcatgcctgtaatcccagcactttgggaggccgaggcgggcggatcacctgaggccaggagttcgagagcagcctgaccaacatggagaca ->HepG2_positive_ONLY_rs62075850 -TTGGCCTCAAATAATCTCACGACTATATACAGCCCTggccagccgtggtgtctcacacctgtaatcccagcactttgggaagctgaggcaggcagatcacttgaggccaggagttcaaaaccagcctggccaacaacgtgaaaccccatcactactaaaaatacaaaaattagccaggc ->HepG2_positive_ONLY_rs55724082 -CTCAGCCTATAGTCGGTCTATCTTCCATATCAGACTGGGAACTAGCCATCCTGCTAGGTACTCCCTAAAGACGACACCCTTggccaggcgcagtggctcatgcaggtaatcccagcactttgggaggccgaggcgggcggatcacgaggtcaggatatcaagaccatcctggctaacat ->HepG2_positive_ONLY_rs2898705 -tggctcaagcctgtaatcccagcacttcgggaggctgaggcgggcggatcacctgaagttaggagttcgagaccagcctggtcaacaagacgaaaccccgtctctactacaaatacaaaaaaattagcagggcgtggtggtgcgcatctaatcccaggtactcgggaggctgaagcaga ->HepG2_positive_ONLY_rs59810129 -taatcccagcactttgggaggccgaggtgggtggatcacctgaggttgggagttcgagaccagcctgaccaacatggagaaaccccatctctactaaaaatacaaaatcagccaggcatggtggcgcacgcctgtaatcccagctactcgggaggctaaggcaggagagtcgcttgaac ->HepG2_positive_ONLY_rs579255 -aatcccagcactttgggaggccgaggtgggtggatcacctgaggttgggagttcgagaccagcctgaccaacatggagaaaccccatctctactaaaaatacaaaatcagccaggcatggtggcgcacgcctgtaatcccagctactcgggaggctaaggcaggagagtcgcttgaacc ->HepG2_positive_ONLY_rs12942988 -cacgcctgtaatcccagcactttgggaggctgaggcgggtgaatcacaaggtcaggagttcaagaccaaccctggccaagatggtgaaagcccgttgctactgaaactacaaaaattagccatgcgtggtggtgggtgcctgtaatcccagctacttgggagactgaggcaggagaatc ->HepG2_positive_ONLY_rs2847165 -gtggctcatgcctgtaatccggcactttgggaggccgaggcaggtggatcacctgagctcaggagttcgagaccagcctggacaacatggtgaaaccccgtctctaccaaaaatacaaaaattagccaggtgtggtggtgcacacctgtaatcacagctactccagaggctgaggtacg ->HepG2_positive_ONLY_rs1579765 -GCGGCCCACTCGGGTCAAATTCCAGCGGCCTGCCCAGCCGCGCCCAGCGGGCCCAGGAATGCGGAAGGGTGGCGGAGCTACCAAAGGAGCGGGGGACGAGGGCCGGGCTGCGGACGACCGCCGCAGCGCAGGCCGCGATATCGCAGCGGATCGGAGCAGGCCGGAGGGGCAATTAAGAC ->HepG2_positive_ONLY_rs59481042 -gcctgtaattcaagcacttttcaaggccaaggcgggaggatcacttgaggtcaagagttcgagaccaccctgggcaacatagcgaaacctcttctcttccaaaaataacaaaaattaactgggtgtggtggcctgtgcctgtagtcgcaggtacttgggaggcttaagtgggagaatca ->HepG2_positive_ONLY_rs78654455 -ggtgggtggatcacctgaggtcaggagttcgagaccagcctagcaaacatagcaaaaccctgtctctactaaaagtacaaaaattagccgggcatggtggtacacccctgtaatcccagctactcaggaggctgaggcaggagaatagcttgaacctggggggcggaggttgcagtgag ->HepG2_positive_ONLY_rs7252519 -ctgagattgcaccactgcactccagcctgggcaacggagcaagacgctgtctccaaaaaaaatacaaaaaTggctgggcgcagtggttcacacctgtaatcccagcgctttgggaggccgaggcgggcggatcatgaggtcaggagatcgagaccagcctggccaagatggtgaaaccc ->HepG2_positive_ONLY_rs151148846 -ctcatacctgtaatcccagcactttgggagggcgaggcaggccggtcacttgaggtcaggagttcgagaccagcctggccaacatgctgaaactctgtctctactaaaaatacaaaaattagctgggcgtggtggagggcacctgcagtcccagctactcgggaggctgaggcaggaga ->HepG2_positive_ONLY_rs76331192 -TTTATACACTTGTTCATTTTTTTGTTTCTCTCTTTACTGGACTGTAAATGCTGTTATTGTTGGACTTAATGACTTGGCCAATATTGTATCCCTGGGACATAGCAAGTATTCAGCACAGTTGCtgaataactaagtaaataaataagtagataataaatCATGAGTTAATGGATTTTTCC ->HepG2_positive_ONLY_rs11682189 -tgtacAAggccgggcacagtggctcatgcctgtaatcccagcactttgggaggccgaggtacgcggatcaaaaggtcaggagatcgagatcatcctggctaacacggtgaaaccccgtatctactaaaaatacaaaaaattagccaggcgtagcagcgtgcacctgtagtcccagctgc ->positive_overlap_rs4240912 -TGTTATGATTAAGATAATGCCGGGCAggccgggcacagtggctcacgcctgtaatccaagcacttggggaggccgaggcgggcagatcacgagatcaggagttcaagaccagcctggccaatatgatgaaaccccgtctctactaaaaatacaaaattagtcgggtatggtggcacatg ->positive_overlap_rs74057483 -CCCCTAAATGCACTCTTGGGTTATGCAGTATAATTTTCAGCGTAAAAGACAAGTAAAGAGCCATAGCGAAGTGAAAAACAACCACgtgcagtggccgggaattgaacccgggtctcccgcatgggaggcgagaattctaccactaaaccaccaacgcCTCTCTGGAACTACCCCCTGGA ->positive_overlap_rs41268482 -GGTGCCCTCAGCCTGGGGAGGGAGGCAGGTGAGGAGCTGCTAGTCACCACCTATCACAGGACCCACGTGGGTATCAGGGGGCAAGTCCTCTCATGCCCAGGCCCTGTCTCACCACAGGAGGCAGAAACCCAGGCTGGGATAAAAGGGCTCAGGGCCAGAGCACCCCATTCACTCACTTC ->positive_overlap_rs4934698 -aatatcttgtgaatcaattccttataataaatctcattttGggctgggcacggtggctcacgcctataatcccttcactttgggaggccaaggtgggcggatcacttgaggtcaggagttcgagaccagcctggccaacatggtgaaacctcatctctattagaaatacaaaaaattag ->positive_overlap_rs11010121 -CATTATTTTGTATAGTCACAAAAACAAATTTAGGACTTTTCCCAAGTTTATTCATATGCTGGTGATCATTAATCACAATTAggccaggcacagtggctcacgcctgtaatctcagcactttgggaggccaaggcaggcagatcacgaggtcgggagatcaagaccatcctgggcaacat ->positive_overlap_rs11631183 -Attaggaatgctttcagctacaaggaacagaaaagtctgacagaacgctgggcgaggtggctcacgtcggtaatcccagcattttgggagaccaaggcgggtggatcgcgaggtcaggagttcaagaccagatggtgaaaccctgtctctactaaaaatacaaaaattagccgggcatg ->positive_overlap_rs28364709 -GCGCCACGCCGCCCGGGTACCCGGCAGATCGCGAGGTGGAACGGGCCGTCGCTGCGGGGGACAGCGTTCCGAGGCAGTTGGTCCTCTCCGGGATGCCGTAGGCATCAGCTGACCGGCCCAGCCCACGTGACTACAGGGGCACTTGATGGGAATCATGGCAGCATCCAGGCCATTGTCCC ->positive_overlap_rs112895815 -GACAAACATGGTGAGGAATTAGGAAATTCAAGGATGATGAAACCTggccgggcacggtggctcacgcctgtaatcccagcactttgggaagccgaggcgggtggatcacgaggtcaggagtttgagaccagcctggccaacatggtgaaaccccgtctctacaaaaatacaaaaattag ->positive_overlap_rs7221536 -CCCCTGGATGGTGCGGGGTGCTTTCTCCACCCCCACACTCCCTGCTCAGCTCCTCGTGCTGCCCTGCATGCCCAGGCTTGTGAGCCAAGCTGCTTTTTGGGGCAGGGAGTAGCAGCAGGTGGGAGGGGTTACCCATCAGCCCTTGCAAGTCCCCCACTCAGGCCTCTGGAAGGTCCAGG ->positive_overlap_rs2546491 -TTAAGAGTTCAGCAATGGTGGCAAAGAGAGGGAATTAACGGGATAACACATAACAGATTAAATTATTAAAAACAGATGTCATGGCTggcacggtagctcacgtctgtaatcccagcactttggaaggccaaggtaggtgaatcacgaggtcaggagttcgagaccagcctggccaacat ->positive_overlap_rs28384491 -CCCGGGAACCCGAGGCCCCGCCCCCTAGGTGGTCCTAGCCTTTGCGCGTGCGCGGCGTGTCTGCGTGGGCGCATGCGCATAACGGCCGCCATCTTAACAGCGCGTTCCCGTTGGCGTCTGAGGTAAGTTTTTGTTTCTGGGCGGCGTTCGGTGGTGTCCCGGTGCAGCCACGCGAGAGT ->positive_overlap_rs8102037 -agtcgaaggtgtatggcaggccgggcgtggtggttcagcctgtaatcccagcactctgggaggccgaggtgggcagattacttgaggtcgggagttcgagaccagcctggccaacatggcaaaaccccatctctactaaaaatacaaaaattggctgagtgtagtggcatgcgccagta ->positive_overlap_rs10412158 -TCAGTTGCCCAGCGAAAACTTCCGCTGGGCGGAGAGCGCTGCGCGCGCATCCTGAATCTGAGGCGCCCGCGCAGGCGCCGCTGACTTCCTGACGGCCCCTGGGCTTCGCCTGCCTGGGAACTCCGTTTCCCAGATGCCTCCGCGGCAGGCCCGCCCTCTGATTCCAGACGGAATGGGGA ->positive_overlap_rs7249464 -GTCTCTCCGCCTCTACCCCTACCTGGCCCAGGCCCCGCCCACCTCCTCGCAGGCCCCGCCCAGGCCTGGCTTCTGTCCTTCCCCGCTCAGACGCGCGCAAACCCGGAAGCAGATCGCGTGGAGTGAAGGTCCCTCAGCGGCGCGTGAGTTTCGCTCCATCTTGTATGAAGTCTGCTCTT ->positive_overlap_rs4239498 -TAGCTAACTGAAATAATGATTTTTGTTCTTCTTGCATATGAGGTTGCACTGGTACCTGAAAAAGAggctgggtgctgtggctcactcctataatcccaccattttcagagactggggcaagggcattgcttgagtacaggagttcgagaccagcctgggcaacatagtgagaccccccc ->positive_overlap_rs4806711 -CCTCTATAACGGCGCGAGAGTGAGACGTCATCGGTGAGCGACTAACGCTAGAAACAGTGGTGCGCGGAGAGGAGAGGTGAGTGTGATGGAGACCACGGGGAGCGGGAGGCTGGGCTCCTGGGTCTGGGAGAAGAAGTGTGTGAGGAAAAAGGCGGGTCTTTACAGCTTGGtttttgttt ->positive_overlap_rs1617806 -tttgctttcactaagaattggctggtcccaggaggggcagcctcagctagaaaggtttttaagatgtcaaaacagactgggtgcagtggctcacgcctgtaatcccagcactttgggaggccgaggcgggaggatcacaaggtcaggagttcaagaccagcctggccaagatggtgaaa ->positive_overlap_rs2464981 -TACGGGACGAAAGAGTAATAGAAGCTGGATCCCTCCAGAAACCTAAAAAAAATGAAGAAAAAAAggccgggcgtggtggctcacacctgtaatcccagcactttaggaggccaaggcgggtggatcacaaggtctagagatcgagaccatcctggccaatatggtgaaaccccgtctct ->positive_overlap_rs6062371 -aaatagctctaactatgagtcaggaaaatataccaggaagttattaagaggaagaaagcggccgggtgcagtggctcacacctgtaatcgcagcactttgggaggccgaggtgggtggatcacctgaggtcgggagttcacgaccagcctcaccaacatggagaaaccccatctctaat ->positive_overlap_rs7645667 -GTGACGTTGCACAGGCCTATCATAGTGCCCGAGAATTCGTGGGTGCTCAAGAGGATGGGTGTGGCCTGGCAGCGCAGGCGCACTAGAGGCCTGTAGGGTCGGGGCGCCTGCGCAGTCGCTCTTCCTCAGGCGGCGGCCATGGCGGGACAGGAGGATCCGGTGCAGCGGGAGATTCACCA ->positive_overlap_rs7645759 -AGAATTCGTGGGTGCTCAAGAGGATGGGTGTGGCCTGGCAGCGCAGGCGCACTAGAGGCCTGTAGGGTCGGGGCGCCTGCGCAGTCGCTCTTCCTCAGGCGGCGGCCATGGCGGGACAGGAGGATCCGGTGCAGCGGGAGATTCACCAGGACTGGGCTAACCGGGAGTACATTGAGATA ->positive_overlap_rs6448414 -cccccagccttgaggccccgcctctgactccacccctgtctctttctccgctccgcccctcctcctcgtccctccctcAAGGCCCGGAAGCGAAAGCCTCTCCACCTCTTCCGAGCGGGGTCACGGCCCGGCCGTCGGTAACCTGGTTTCCGAGAGTGCCGGGCGGTCGGCGGGTCAGG ->positive_overlap_rs7661222 -AAATGCTTATAACTTTGAAACACTTAAGACatacaaagatctggccaggtgcggtggctcatgcctgtaatcccagcactttgggaggccgaggtgggtggatcacttgaggtcaggagttcgagaccagcctggccaatatggcaaaaccccatctctactaaaaatacaaaaaaatt ->positive_overlap_rs2351011 -tgtttgtccaatgccatatagcaagtaaatggaagggacaggattaaaaccTGACACTTggccaggcacagtggctcacacctgtaatcccaccactttgggaggccaaggcaggtagatcatggggtcaggagttcgagaccagcctggccaatatggtgaaaccctgtctctaataa ->positive_overlap_rs3172921 -CCCAGGATGCAACGAGCATTTGCGCCTGCGTACAAGGCTCAATTTATTCGTTTCCCCGCCCCTTTCATGACCTTCACCGGGAGGCTGAGGTCGGAGTCCCGATTTTCTCCTGCTGCTGTGGCCCGGACATGGCGACTCCCGGCCCTGTGATTCCGGAGGTCCCCTTTGAACCATCGAAG ->positive_overlap_rs4960410 -GGAAAGCACTCTCAACTCCGGCGCCCGCAGGCCACTTCCGCCTATGTGTCCCTGCGCGCGTGCGCAGACGCGTGGCCCCGCCTCCCGACAGGCCTGTGTGGCCGGTGGCGGGCGGGCCGTTACCTCCCGCGCATGCTCTATGGGGTAGAGACGCTAGGCCGGCACTGCCTAGGTGGGCC ->positive_overlap_rs9393894 -TGACAAATGGCAGGGGGATGACAGAGAAGAACTCATGAAAAAGGTAATAGATAAATTAGAGTTAAGAGTTGAATGATTTTAggccgggcacagtggcttacgcctgtaatcccagcactttgggaggccgaggcgggcagatcacgaggtcaggagatcgagaccatcctggctaacac ->positive_overlap_rs10948061 -GGTTCTTGGCTCGGTTATGCTCTATTTCTCATTCTCTGTTGATAGGACAGGGTAGCCCTCTCAAAAACAATAGAAATCCCTggctgggtgcggtggctcacgcctgtaatcccagcactttgggaggccgaggcgggcggatcacaaggtcaggagatcgagaccatcctggttaacac ->positive_overlap_rs1713862 -CCGGCCCTCCGTGCGATCACCCATCTCCCTCACCAGGAAAGTAGCTCCAAACCGCCAATCAGCGGCGACGCTGGACGTAGACGTCCTACCCCGTGATATTAAAGCAAGATGGCCGCGCCCTGCAGATTGTCTCTTGTTGCGTAAGTTTTTTTGACCGTCACTCGTGTCAGCTTCAAAGT ->positive_overlap_rs2696715 -caaaagcttaactactaataacgtactgtaagagaaaagaaaatgttaagaaaatcataacggatcaggcgcggtggctcacgcctgtaatctcggcactttgggaggctgaggcgggtggatcacaaggtcaggagttcgagaccaacctggccaatgtggtgaaaccccatctctac ->negative_overlap_rs11210514 -gaatgttccacaaaaattctactattgatttgcctcactcctccagtttttgaatatttcaaacaatatcactgtaaacattcttgtacatggctctgggcacacgcatgcatgagtttctctagggactatatgtaggagagaagtttcttaatcagaaggtatgcatatcttcagct ->negative_overlap_rs115675756 -ttgcccaggcaggtctcgtactcctgggttcaagctattcccctgtctctgtcgccctaaggactgggattacaggcgtgagccactgcgcctggcAACTTGCTGCTTTGTATAATCTCAGTAGAAGggccgggcgcagtggctcaagctgtaatcccagcactttgggaggccgaggc ->negative_overlap_rs61622554 -actctttgggtccacaccacctttaagagctgtaacacttgccacggaggtccgcggcttcattcctgaagtcagcaagaccacgaacccactggaaggaagaaactctggacacatctgaaggaacaaactccagacacaccatctttaagagctgtaacaccgcgaaggtctgtggc ->negative_overlap_rs7513666 -agttgcaccatctcggctcactgcaacctccgcctcccgggtccaagcgattctcctgcctcagcctcctgagtagctgggactacaggcgcctgccactacacctggctaatttttgtatttttagtagagatggggtttcaccatattggccagactggtctcgaactcctgaactt ->negative_overlap_rs787333 -tctagaaacaGAATTGcaataccacacatggtaagcatttaacaaacgcctgttgagtgaatgGGTGAAAAATAGTAATTCAATGTACCGTCAATAAACAGTTAGAGAAAGTCTTGAAAATATTTTAAGTGgggtgtggtggttcatgcctgtcatcccagccctttgggagacctgat ->negative_overlap_rs11593858 -gtccgggtggtcattggatcgctgtattcttcactgccatacattcccagtttttaaggctattgatttaggaatatcctggatgaagcggaagcattaattttattaaattttggcccttgaatacaagtctttctaatactctgtgtgaagaaatgggaagtacacatgcacttctg ->negative_overlap_rs2604963 -tatatacttttattatatagaaaatatatattacagaatatattattaaatattgtagaacaatatataatacagaaaaatatataatactcagtaatatattaaataCTTATTAAAATAGCAAGCTTATATAGGAAGAGTGATGGAGCATTGTGAGAAAGTTTCAGCTTTATTTCTTT ->negative_overlap_rs1856142 -TTACTGACTCATTAACTATATTGGGCAAGTTATTAGGTGGCAATTTCATATGGTTCAAACTAGcatttggtctctttaatcttctttttcccccatatgaaaaacaggaagagctgtggtacatattacacaggtgattgttgtgaaatttaaataagataatgcatgcaaagcactcc ->negative_overlap_rs61905625 -ttttagaacacagtaatacgtgtaacataattcataagtttcctagtcttctcagaatggaaatgattattggcttccacttcaagtcaccagttttattatctcctcatagaagagttagcctgtcctttgacaatctgaagcaaggccttgacttctcctctctagctgagaaagtg ->negative_overlap_rs117774247 -ggttaggagctggagaccagcccggccaacacagcgaaaccctgtctccaccaaaaaaatacgaaaaccagtcaggcgcggcggcacacgcctgcagtcgcaggcactcgcaggctgaggcaggagaatcaggcagggaggttgcagtgagccgagatggcggcagcgcagtccagctt ->negative_overlap_rs61904519 -ACCAAAAAAATTCTCCACCAAGGGGCGAGCTCCACCCATCTCACTTGTAATCAGTAGCATCGCCATTGTCTGAGAAATATTTATTCTGTCCAGTTTAAGCTTAATTTTCACAAACACTAAAAAGTAGTAAAATAGACTATAGCTTTTCCTTTAACTATATTTATATATAAATCCCTGAG ->negative_overlap_rs10750836 -ggctgggattacaggcgtcttgctgcatctcgaagcgtcttcagcgtgctcttgcttgctggctctgatgatggaggtgttcaagccatcgtggaaggaatgcagtggcctctaggagtgagaatgacaccagccaacatccagcaagaagacgtggacttcagtcctaggcccatgga ->negative_overlap_rs2462621 -atttaactttactgcttctgtttcctatctgtaaaatgcagatgatgcccaccacatggaattgtagagactatagggaataatgtaaacaggaatattttgtaaggaatgaaggatgatgcaaataataatattgatattgttctatagttaataaattttcatttacacaatttcat ->negative_overlap_rs7116100 -aaggccagcctgacccacatggagaaaccccatctctactacaaatataaaattagcctggcatggtggcacatgcctgtaatcccagcttcttgggggctgaggaaggaggattgcttgaacctgggaggcgaaggttgtggtgagctgagatcgtgcccttgcactccagcctgggg ->negative_overlap_rs79241008 -ttgcgccactgcactccagcctgggcaacagagcaagactctgtctcaaaaataataataataataataataatCTGTTCATTTGCATTCCTAAGAGCCATCCTTCCAGATCATCTATTTCCCAAAGTTGATCCCGGCCAGGCTGAATTCTCTTCCTCTGGGCTTCCCCAGCACGTGGC ->negative_overlap_rs113094203 -GCTGGGCATGGCTGtgtgaaccccacatatctgagatggtctcagttaatttagaaaatttattttgctaaggttgaggatgcgcacccgtgacacagcctcaggaagtcctgacgacatgtgcccaacgtggtcagggcacagcttggttttatacatcttagggagacatgagaacc ->negative_overlap_rs7137111 -aaatgaggataatattagtaattacctcatggtatttttTTTCttcttttttttaattatactttaagttctagggtacatgtgcacaacgtgcaggtttgttacatatgtatacatatgccatgttggtgttctgcacccattaactcgtcatttacattaggtatatctcttaatgc ->negative_overlap_rs12582521 -ATAGCTGAGAATCAGGCAAAAAAAATCGGGAAAGTGGGTAATTATCTTATTCCCCACTATCTCTCCTTTCCACTGGCCACCCTTCCAGGTATCCTGTGATCTGAATTCCAGCTCTGCTCAGTTCAGCCTGGGGAATCAGAAGAAAAATTGTCTCTGTCCACAAATCCCTTTCCCCTACC ->negative_overlap_rs112743412 -aaggacaccctattcaaaaaatggtgctgggaaaattggatagctatatgcagaagaataaaattggattcctatctctcaacatatataaaaactaactcaagattgattaaatacttaaatataaaacctgaaactataaaaattctagaagaaaacttaaggaaacctcttcagga ->negative_overlap_rs3007067 -tagtgtgcatgtttaattgctttattgaggtataatttgtatgctatataattcacctatttaaaatgtacaattcaatgaattttagtgtattcactgagtttgtaagcattaccacaatcttcgaacatttcatctcctcccaaaaaacctccacacacattagcagttactcccca ->negative_overlap_rs78374941 -aaaaattttccacaactagtgaaagatacacatattgcagtgaatcacaagaaatccacacctagacacatcatactttagagcactaaggtcaaaagatgttaaagcagctggagggagtggaaggattaccagagcaattccagtctgattgacagctaacttctcagcagcaaaga ->negative_overlap_rs79908905 -taggcctatatggtacagcctattgctcctaggctagaaacctgtatggcatgttactgtattgcatactatagacagttataccacaacggttaagtatttatgtacctaaacgtgcaaaaggtacactaaaaacatagtattataatcttataggaccaccgtctgtcactgaacaa ->negative_overlap_rs7342601 -gaagttacctctgatggcttccattttttccatcaaatatgatgtagtgtggagagtgggctcacaacgaggccatgtttgtcgacggatagtgaggacactctccaaagaaacccggcccctaagatttctgggccacacacaaggttcatttgacattggctgatgtgaatttacag ->negative_overlap_rs55835622 -CCAACGGAGCCAGTGAGAACATATCCCACATTAAAGTGACACAGAGCTAGGGGCCAAAGTTTGTCCTCAAACCCAACCGAAAGCACAGACGTCCAGTGGCCAGTAAGGATCACGAATCTTTGTGGAAAAATTGGGAGCAGGAGTCTTTGCATTTTCAGGGCAGGCCATGGTTGTTCCAG ->negative_overlap_rs3848349 -CACTTGGTCTAGCAAATTTTAGATGCTTTTCCTCAGATATCAATAATAAATGTACAGGAAAAAAATTAGTTTTTCAATTTTTGAAAACTATTTTTCAAAAAATGAATACAAGGCCTTACACAGCATCCTCCACATTACTTTTTTCAACTTTAATACCACATCATAAAATGTTTTAAAAA ->negative_overlap_rs12599335 -accatgttggccaggctggtctcaaactcctgacctcaagtgatccctccaccttggtttcccaaagtgctgggattacaggcgtgagctaccacacctggccAAGCATTCTGTATTGTGtttcttttatttttttattttttgagatacagtcttgttctgacacccaggctggagtg ->negative_overlap_rs4454990 -aaagtgctgggattgcaggcgtgagccactgggcccagcccagtatatatttttttattttaaaattatattaaaaatatgaaacactttgtgaatttgcacgacctccttgcacaggggtcatgctgatcctctctgtacaaagtttgtattaagtatttatgtgtggaattttccat ->negative_overlap_rs464972 -CGGGGGCTGGACGGGCGAAGGAAGGCGTGAGGGCTGGGTTCTGAGCGGGGTTCCAGTTCCACCCCAGGGCCTGGACGGGCCGAGGAAGGTGTGAGCGCTGGCTTCTgagctgggttctgggttccagctccacccctgctgtgtggtctcagccaccacaacgccctctctgagccATG ->negative_overlap_rs62056785 -CAGGCTCAGCAGGACCAATTTGAGTTCTATCTGATCCCCCTCGGCCCCTTAACTGACCCATCCTACAGGAGACAGGGAAATGTCTTTCCTACCGCGGTTGATTCTGGGGTGTCATTTTGTGTTTTGTGATGGCTGCTTATATTTACTGTATAAGCATTGTATTTACTGTATAAGCATTG ->negative_overlap_rs11079774 -GACTTGCTTTTGTTTTTGTTTTTAATGCTGTGTGTTCACCTAATTTCTGGTTAACATGTTGGCATTGATATGTTTATACGCAGATGAAACTTGGTTTTGATTAAACAttggctggcatataggaacttggctttcagaatgtatccttcattactaactgataaagctggtttactctg ->LCL_lowest_expr_rs10034650 -CCCCGCAGGGAGGAGGAGAAGGGGAGGATTTGACTTTGGAAGTCTCTGTGACCAGGTGAGGACCGCAAGCGTAAAGGTGGTGGCAGAGTGGTCGGGGACAGGCCTCACCCACAAACATGTGGCCCCTGGATGAGCCCAGCTCTCAGCTGCTTCGTGGGACTGCAGCTGGACGCCCAGGCC ->LCL_lowest_expr_rs10047360 -tgcatatatttcatcctggtgcagtctggactttcagtgaacaggtcacctaaatagtgaacattgtacccaatcggtaatttttcaaccctccatattttgtagcctccaatgtctatttttccactctgtatagccatgtatactcattgtttagcttccacttataagtgagaacat ->LCL_lowest_expr_rs11209683 -tacttcatctcctatggctctcaccttttgctttgctccagctatactggtcttctcagctcctgttggaggccaagttccttatgaccttggggcctttacactccctggaatgttccacctcaatattcacatgtctggcttttcatcattcaggtcttagcttaaatgtcacctcct ->LCL_lowest_expr_rs112490828 -ggactccagcctgggtgacagaatgagaccctgtctcaaaaaacaaaataattttttttaaacaataaaattaaaaaacataaaataaaattttttttaaaaaGCCACTACCCACTTCTGATGGCCCAGCTGCTCCCTTGGGACCCTCCAGGCCTCTCCATGCCCCAGCATGAAAGGATT ->LCL_lowest_expr_rs12582521 -ATAGCTGAGAATCAGGCAAAAAAAATCGGGAAAGTGGGTAATTATCTTATTCCCCACTATCTCTCCTTTCCACTGGCCACCCTTCCAGGTATCCTGTGATCTGAATTCCAGCTCTGCTCAGTTCAGCCTGGGGAATCAGAAGAAAAATTGTCTCTGTCCACAAATCCCTTTCCCCTACCT ->LCL_lowest_expr_rs1363977 -TGGCTTTAACTTTACTTTCAGAAAAGAATATATTGACAAAATGCAAATAGTGAAAACAGGTATTCCCTAGTTATTAATCATATGATTGAGACTAGACAAGCGGTTATGTTTTATCTTCAGAAACTTGAAGAAAAATTATTGTTGTAAAAATACATTCTTGCATATAAGATGTGTAAATGA ->LCL_lowest_expr_rs193993 -CTGTGTTATTTTTTGTCAcatcatgcttataatcatctcaaaagataaagtaatcatcattactctgtgtttataagtgagaaaactgatactaagggacagatttgcccaaagtcaccaagtcagtgagaaaatcagtacttaaaatttgtcttctaagtccaatagttattcaattat ->LCL_lowest_expr_rs226063 -GATATTCTCCTTATCCAGCTCCTGGGTTCCCAGCCCTCAGTAGGCCTCATTCCAGTGCCTCAAAGGCAGACCAATGCATTAAGGGAGTATAGGAAAGAGGAGAGGAGAAAGGGAACACTAGAGGTTGTCATACCTGCTTTATGTAAAATAATTTACCATTTTGAAGACAGATCCCTTCCT ->LCL_lowest_expr_rs2424713 -TTTTGAAACAGTTTTCATATTGAAGAATAGAGCAAAAGGGTAATTGACATTACCAACAGTTTTCAGAATAAGAGAAAAGGggccgggcacagtggcttacgcttgtaatcccagcactttgggaggccgaggcgggtggatcacctgaggtcaggagtttgagaccagcctggccaacat ->LCL_lowest_expr_rs356055 -ggttccagcccctgcacttccaccttgatgtgtgatctggtggtggtggggggcagggggtccctttggtgctgtggcctcagtttccctattagtgcactgaggaggAAGGAGATACACTGTTCCCAGGTCCAGTCAGTGCTGGTACCTAGCTACGCAGTTTCTTCCAGTGTAGACGCC ->HepG2_positives_TFs_C:SLEA_hg18:chr2:210861483-210861650|2:V_Rxra_UP:GGCCGTGACCCCGTGAT;23:V_Rxra_UP:GGCCGTGACCCCGTGAT;44:V_Rxra_UP:GGCCGTGACCCCGTGAT;65:V_HNF1_C:AGTTAATGATTAACCAA;86:V_Rxra_UP:GGCCGTGACCCCGTGAT;107:V_HNF1_C:AGTTAATGATTAACCAA;128:V_HNF1_C:AGTTAATGATTAACCAA;149:V_Rxra_UP:GGCCGTGACCCCGTGAT:001 -TGTCATTGGCCGTGACCCCGTGATATGTGGCCGTGACCCCGTGATCTTCGGCCGTGACCCCGTGATTGCAAGTTAATGATTAACCAATACAGGCCGTGACCCCGTGATAATTAGTTAATGATTAACCAAATGAAGTTAATGATTAACCAATCCCGGCCGTGACCCCGTGATTTTTATAA ->HepG2_positives_TFs_C:SLEA_hg18:chr2:210861483-210861650|7:V_COUPTF_Q6:CCCCCTGACCTTTGCCCCCTGCC;33:V_HNF4_Q6:AAGGTCCAG;45:V_HNF1_C:AGTTAATGATTAACCAA;65:V_HNF3ALPHA_Q6:TGTTTGCTTTG;79:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC;101:V_PPARA_02:CCGGGTCATTGGGGTCAGG;123:V_Rxra_UP:GGCCGTGACCCCGTGAT;143:V_XBP1_01:GTGATGACGTGTCCCAT:001 -TGTCATTTTTGCCCCCCTGACCTTTGCCCCCTGCCTACAAGGTCCAGTCCAGTTAATGATTAACCAAGCATGTTTGCTTTGTTGGGGGATCGCGTGCCAGCCCATGCCGGGTCATTGGGGTCAGGTCCGGCCGTGACCCCGTGATTATGTGATGACGTGTCCCATAAGTGGTTTTATAA ->HepG2_positives_TFs_C:SLEA_hg18:chr2:210861483-210861650|18:V_PPARA_02:CCGGGTCATTGGGGTCAGG;40:V_XBP1_01:GTGATGACGTGTCCCAT;60:V_Rxra_UP:GGCCGTGACCCCGTGAT;80:V_HNF3ALPHA_Q6:TGTTTGCTTTG;94:V_COUPTF_Q6:CCCCCTGACCTTTGCCCCCTGCC;120:V_HNF4_Q6:AAGGTCCAG;132:V_HNF1_C:AGTTAATGATTAACCAA:001 -TGTCATTTTTGCCAGCTGGTGTACCGGGTCATTGGGGTCAGGACTGTGATGACGTGTCCCATATTGGCCGTGACCCCGTGATTGATGTTTGCTTTGTTCCCCCCTGACCTTTGCCCCCTGCCAGTAAGGTCCAGAATAGTTAATGATTAACCAAATTGAGGGTTAAAGTGGTTTTATAA ->HepG2_positives_TFs_C:SLEA_hg18:chr2:210861483-210861650|13:V_COUPTF_Q6:CCCCCTGACCTTTGCCCCCTGCC;39:V_XBP1_01:GTGATGACGTGTCCCAT;59:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC;81:V_HNF3ALPHA_Q6:TGTTTGCTTTG;95:V_PPARA_02:CCGGGTCATTGGGGTCAGG;117:V_HNF1_C:AGTTAATGATTAACCAA;137:V_Rxra_UP:GGCCGTGACCCCGTGAT:001 -TGTCATTTTTGCCAGCTGCCCCCTGACCTTTGCCCCCTGCCCACGTGATGACGTGTCCCATTATGGGGATCGCGTGCCAGCCCGAGTGTTTGCTTTGTCACCGGGTCATTGGGGTCAGGGTTAGTTAATGATTAACCAAACTGGCCGTGACCCCGTGATGGGTTAAAGTGGTTTTATAA ->HepG2_positives_TFs_C:SLEA_hg18:chr2:210861483-210861650|16:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC;38:V_COUPTF_Q6:CCCCCTGACCTTTGCCCCCTGCC;64:V_HNF6_Q6:CAAAATCAATAA;79:V_XBP1_01:GTGATGACGTGTCCCAT;99:V_HNF3ALPHA_Q6:TGTTTGCTTTG;113:V_PPARA_02:CCGGGTCATTGGGGTCAGG;135:V_HNF1_C:AGTTAATGATTAACCAA:001 -TGTCATTTTTGCCAGCTGGTGGGGGATCGCGTGCCAGCCCTCACCCCCTGACCTTTGCCCCCTGCCTGCCAAAATCAATAATTGGTGATGACGTGTCCCATACATGTTTGCTTTGTAACCGGGTCATTGGGGTCAGGGTAAGTTAATGATTAACCAAGAGGGTTAAAGTGGTTTTATAA ->HepG2_positives_TFs_C:SLEA_hg18:chr2:210861483-210861650|24:V_HNF4_Q6:AAGGTCCAG;36:V_HNF1_C:AGTTAATGATTAACCAA;56:V_HNF3ALPHA_Q6:TGTTTGCTTTG;70:V_Rxra_UP:GGCCGTGACCCCGTGAT;90:V_HNF6_Q6:CAAAATCAATAA;105:V_XBP1_01:GTGATGACGTGTCCCAT;125:V_PPARA_02:CCGGGTCATTGGGGTCAGG:001 -TGTCATTTTTGCCAGCTGGTGTAGATGTTAAGGTCCAGTGTAGTTAATGATTAACCAACTTTGTTTGCTTTGTGCGGCCGTGACCCCGTGATGCACAAAATCAATAATAAGTGATGACGTGTCCCATCCACCGGGTCATTGGGGTCAGGATCCCATTGAGGGTTAAAGTGGTTTTATAA ->HepG2_positives_TFs_C:SLEA_hg18:chr2:210861483-210861650|6:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC;27:V_HNF1_C:AGTTAATGATTAACCAA;46:V_HNF1_C:AGTTAATGATTAACCAA;65:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC;86:V_HNF1_C:AGTTAATGATTAACCAA;105:V_HNF1_C:AGTTAATGATTAACCAA;124:V_HNF1_C:AGTTAATGATTAACCAA;143:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC:001 -TGTCATTTTTGGGGGATCGCGTGCCAGCCCAAAGTTAATGATTAACCAACGAGTTAATGATTAACCAACAGGGGATCGCGTGCCAGCCCCAAGTTAATGATTAACCAAAAAGTTAATGATTAACCAACCAGTTAATGATTAACCAAATGGGGATCGCGTGCCAGCCCGTGGTTTTATAA ->HepG2_positives_TFs_C:SLEA_hg18:chr2:210861483-210861650|25:V_PPARA_02:CCGGGTCATTGGGGTCAGG;47:V_XBP1_01:GTGATGACGTGTCCCAT;67:V_Rxra_UP:GGCCGTGACCCCGTGAT;87:V_HNF3ALPHA_Q6:TGTTTGCTTTG;101:V_COUPTF_Q6:CCCCCTGACCTTTGCCCCCTGCC;127:V_HNF1_C:AGTTAATGATTAACCAA:001 -TGTCATTTTTGCCAGCTGGTGTAGATGTTACCGGGTCATTGGGGTCAGGCGCGTGATGACGTGTCCCATACCGGCCGTGACCCCGTGATCAGTGTTTGCTTTGATGCCCCCTGACCTTTGCCCCCTGCCATGAGTTAATGATTAACCAAATCCCATTGAGGGTTAAAGTGGTTTTATAA ->HepG2_positives_TFs_C:SLEA_hg18:chr2:210861483-210861650|19:V_HNF3ALPHA_Q6:TGTTTGCTTTG;33:V_HNF6_Q6:CAAAATCAATAA;48:V_HNF1_C:AGTTAATGATTAACCAA;68:V_Rxra_UP:GGCCGTGACCCCGTGAT;88:V_XBP1_01:GTGATGACGTGTCCCAT;108:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC;130:V_PPARA_02:CCGGGTCATTGGGGTCAGG:001 -TGTCATTTTTGCCAGCTGGTGTAGTGTTTGCTTTGTACCAAAATCAATAAGCCAGTTAATGATTAACCAACCTGGCCGTGACCCCGTGATAGGGTGATGACGTGTCCCATTTTGGGGATCGCGTGCCAGCCCACACCGGGTCATTGGGGTCAGGATTGAGGGTTAAAGTGGTTTTATAA ->HepG2_positives_TFs_C:SLEA_hg18:chr2:210861483-210861650|19:V_PPARA_02:CCGGGTCATTGGGGTCAGG;41:V_XBP1_01:GTGATGACGTGTCCCAT;61:V_Rxra_UP:GGCCGTGACCCCGTGAT;81:V_HNF3ALPHA_Q6:TGTTTGCTTTG;95:V_COUPTF_Q6:CCCCCTGACCTTTGCCCCCTGCC;121:V_HNF1_C:AGTTAATGATTAACCAA;141:V_HNF4_Q6:AAGGTCCAG:001 -TGTCATTTTTGCCAGCTGGTGTAGCCGGGTCATTGGGGTCAGGCTCGTGATGACGTGTCCCATTTTGGCCGTGACCCCGTGATGAGTGTTTGCTTTGTCACCCCCTGACCTTTGCCCCCTGCCGTTAGTTAATGATTAACCAAGATAAGGTCCAGTTGAGGGTTAAAGTGGTTTTATAA ->HepG2_positives_TFs_C:SLEA_hg18:chr2:210861483-210861650|2:V_HNF1_C:AGTTAATGATTAACCAA;23:V_Rxra_UP:GGCCGTGACCCCGTGAT;44:V_Rxra_UP:GGCCGTGACCCCGTGAT;65:V_HNF1_C:AGTTAATGATTAACCAA;86:V_Rxra_UP:GGCCGTGACCCCGTGAT;107:V_Rxra_UP:GGCCGTGACCCCGTGAT;128:V_HNF1_C:AGTTAATGATTAACCAA;149:V_HNF1_C:AGTTAATGATTAACCAA:001 -TGTCATTAGTTAATGATTAACCAAATGTGGCCGTGACCCCGTGATCTTCGGCCGTGACCCCGTGATTGCAAGTTAATGATTAACCAATACAGGCCGTGACCCCGTGATAATTGGCCGTGACCCCGTGATATGAAGTTAATGATTAACCAATCCCAGTTAATGATTAACCAATTTTATAA ->HepG2_positives_TFs_C:SLEA_hg18:chr2:210861483-210861650|17:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC;39:V_COUPTF_Q6:CCCCCTGACCTTTGCCCCCTGCC;65:V_XBP1_01:GTGATGACGTGTCCCAT;85:V_HNF6_Q6:CAAAATCAATAA;100:V_Rxra_UP:GGCCGTGACCCCGTGAT;120:V_HNF3ALPHA_Q6:TGTTTGCTTTG;134:V_HNF1_C:AGTTAATGATTAACCAA:001 -TGTCATTTTTGCCAGCTGGTGTGGGGATCGCGTGCCAGCCCCACCCCCCTGACCTTTGCCCCCTGCCGCAGTGATGACGTGTCCCATTACCAAAATCAATAACATGGCCGTGACCCCGTGATAGTTGTTTGCTTTGTGTAGTTAATGATTAACCAATGAGGGTTAAAGTGGTTTTATAA ->HepG2_positives_TFs_C:SLEA_hg18:chr2:210861483-210861650|51:V_HNF1_C:AGTTAATGATTAACCAA;73:V_HNF3ALPHA_Q6:TGTTTGCTTTG;89:V_HNF3ALPHA_Q6:TGTTTGCTTTG;105:V_HNF3ALPHA_Q6:TGTTTGCTTTG:001 -TGTCATTTTTGCCAGCTGGTGTAGATGTTAAAAATTACTGTCACTCTTCCGCCTGCAGTTAATGATTAACCAAGCTGTTGTTTGCTTTGCAGGCTGTTTGCTTTGGGTAATGTTTGCTTTGTAGTTCCCATGACAATGTACTAGATATTATCCCATTGAGGGTTAAAGTGGTTTTATAA ->HepG2_positives_TFs_C:SLEA_hg18:chr2:210861483-210861650|17:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC;39:V_COUPTF_Q6:CCCCCTGACCTTTGCCCCCTGCC;65:V_HNF6_Q6:CAAAATCAATAA;80:V_XBP1_01:GTGATGACGTGTCCCAT;100:V_Rxra_UP:GGCCGTGACCCCGTGAT;120:V_HNF3ALPHA_Q6:TGTTTGCTTTG;134:V_HNF1_C:AGTTAATGATTAACCAA:001 -TGTCATTTTTGCCAGCTGGTGTGGGGATCGCGTGCCAGCCCCACCCCCCTGACCTTTGCCCCCTGCCGCACAAAATCAATAATGAGTGATGACGTGTCCCATCATGGCCGTGACCCCGTGATAGTTGTTTGCTTTGTGTAGTTAATGATTAACCAATGAGGGTTAAAGTGGTTTTATAA ->HepG2_positives_TFs_C:SLEA_hg18:chr2:210861483-210861650|5:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC;26:V_HNF1_C:AGTTAATGATTAACCAA;45:V_HNF1_C:AGTTAATGATTAACCAA;64:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC;85:V_HNF1_C:AGTTAATGATTAACCAA;104:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC;125:V_HNF1_C:AGTTAATGATTAACCAA;144:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC:001 -TGTCATTTTTGGGGATCGCGTGCCAGCCCAAAGTTAATGATTAACCAACCAGTTAATGATTAACCAAGCGGGGATCGCGTGCCAGCCCACAGTTAATGATTAACCAATAGGGGATCGCGTGCCAGCCCCAAGTTAATGATTAACCAATTGGGGATCGCGTGCCAGCCCTGGTTTTATAA ->HepG2_positives_TFs_C:SLEA_hg18:chr2:210861483-210861650|6:V_HNF3ALPHA_Q6:TGTTTGCTTTG;20:V_HNF6_Q6:CAAAATCAATAA;35:V_HNF1_C:AGTTAATGATTAACCAA;55:V_Rxra_UP:GGCCGTGACCCCGTGAT;75:V_XBP1_01:GTGATGACGTGTCCCAT;95:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC;117:V_COUPTF_Q6:CCCCCTGACCTTTGCCCCCTGCC;143:V_PPARA_02:CCGGGTCATTGGGGTCAGG:001 -TGTCATTTTTGTGTTTGCTTTGAGACAAAATCAATAACTGAGTTAATGATTAACCAAACTGGCCGTGACCCCGTGATTTAGTGATGACGTGTCCCATTCAGGGGATCGCGTGCCAGCCCGTTCCCCCTGACCTTTGCCCCCTGCCTATCCGGGTCATTGGGGTCAGGGTGGTTTTATAA ->HepG2_positives_TFs_C:SLEA_hg18:chr2:210861483-210861650|6:V_XBP1_01:GTGATGACGTGTCCCAT;26:V_PPARA_02:CCGGGTCATTGGGGTCAGG;48:V_HNF3ALPHA_Q6:TGTTTGCTTTG;62:V_COUPTF_Q6:CCCCCTGACCTTTGCCCCCTGCC;88:V_Rxra_UP:GGCCGTGACCCCGTGAT;108:V_HNF6_Q6:CAAAATCAATAA;123:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC;145:V_HNF1_C:AGTTAATGATTAACCAA:001 -TGTCATTTTTGGTGATGACGTGTCCCATTAACCGGGTCATTGGGGTCAGGGCCTGTTTGCTTTGTTTCCCCCTGACCTTTGCCCCCTGCCAGGGGCCGTGACCCCGTGATTTTCAAAATCAATAATCCGGGGATCGCGTGCCAGCCCTTAAGTTAATGATTAACCAAGTGGTTTTATAA ->HepG2_positives_TFs_C:SLEA_hg18:chr2:210861483-210861650|4:V_COUPTF_Q6:CCCCCTGACCTTTGCCCCCTGCC;32:V_HNF3ALPHA_Q6:TGTTTGCTTTG;48:V_COUPTF_Q6:CCCCCTGACCTTTGCCCCCTGCC;76:V_HNF3ALPHA_Q6:TGTTTGCTTTG;92:V_HNF3ALPHA_Q6:TGTTTGCTTTG;108:V_COUPTF_Q6:CCCCCTGACCTTTGCCCCCTGCC;136:V_HNF3ALPHA_Q6:TGTTTGCTTTG;152:V_HNF3ALPHA_Q6:TGTTTGCTTTG:001 -TGTCATTTTCCCCCTGACCTTTGCCCCCTGCCAATTATGTTTGCTTTGCCGCCCCCCCTGACCTTTGCCCCCTGCCGTTACTGTTTGCTTTGGCATTTGTTTGCTTTGAATTTCCCCCTGACCTTTGCCCCCTGCCTGTACTGTTTGCTTTGCCATTTGTTTGCTTTGTGGTTTTATAA ->HepG2_positives_TFs_C:SLEA_hg18:chr2:210861483-210861650|4:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC;25:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC;46:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC;67:V_HNF1_C:AGTTAATGATTAACCAA;86:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC;107:V_HNF1_C:AGTTAATGATTAACCAA;126:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC;147:V_HNF1_C:AGTTAATGATTAACCAA:001 -TGTCATTTTGGGGATCGCGTGCCAGCCCTAGGGGATCGCGTGCCAGCCCCGGGGGATCGCGTGCCAGCCCCCAGTTAATGATTAACCAACAGGGGATCGCGTGCCAGCCCTTAGTTAATGATTAACCAAATGGGGATCGCGTGCCAGCCCTCAGTTAATGATTAACCAAGGTTTTATAA ->HepG2_positives_TFs_C:SLEA_hg18:chr2:210861483-210861650|17:V_COUPTF_Q6:CCCCCTGACCTTTGCCCCCTGCC;43:V_PPARA_02:CCGGGTCATTGGGGTCAGG;65:V_HNF6_Q6:CAAAATCAATAA;80:V_XBP1_01:GTGATGACGTGTCCCAT;100:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC;122:V_HNF4_Q6:AAGGTCCAG;134:V_HNF1_C:AGTTAATGATTAACCAA:001 -TGTCATTTTTGCCAGCTGGTGTCCCCCTGACCTTTGCCCCCTGCCCTTCCGGGTCATTGGGGTCAGGGCACAAAATCAATAATGAGTGATGACGTGTCCCATCATGGGGATCGCGTGCCAGCCCTTCAAGGTCCAGTGTAGTTAATGATTAACCAATGAGGGTTAAAGTGGTTTTATAA ->HepG2_negatives_TFs_C:SLEA_hg18:chr2:210861483-210861650|56:V_HNF3ALPHA_Q6:TGTTTGCTTTG;70:V_COUPTF_Q6:CCCCCTGACCTTTGCCCCCTGCC;96:V_XBP1_01:GTGATGACGTGTCCCAT:001 -TGTCATTTTTGCCAGCTGGTGTAGATGTTAAAAATTACTGTCACTCTTCCGCCTGCTACTTTGTTTGCTTTGTGCCCCCCTGACCTTTGCCCCCTGCCCACGTGATGACGTGTCCCATGGTTAGTTCCCATGACAATGTACTAGATATTATCCCATTGAGGGTTAAAGTGGTTTTATAA ->HepG2_negatives_TFs_C:SLEA_hg18:chr2:210861483-210861650|27:V_HNF4_Q6:AAGGTCCAG;144:V_HNF4_Q6:AAGGTCCAG:001 -TGTCATTTTTGCCAGCTGGTGTAGATGTTAAAAAGGTCCAGCACTCTTCCGCCTGCTACTTTATTTTGCACCTGCTGTTACTTGAGTTACAGGCATTTCACACATGGTAATTTAATAAGGTTAGTTCCCATGACAATGTACTAGATATTAAGGTCCAGAGGGTTAAAGTGGTTTTATAA ->HepG2_negatives_TFs_C:SLEA_hg18:chr2:210861483-210861650|20:V_HNF6_Q6:CAAAATCAATAA;35:V_COUPTF_Q6:CCCCCTGACCTTTGCCCCCTGCC;61:V_HNF4_Q6:AAGGTCCAG;73:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC;95:V_Rxra_UP:GGCCGTGACCCCGTGAT;115:V_HNF3ALPHA_Q6:TGTTTGCTTTG;129:V_PPARA_02:CCGGGTCATTGGGGTCAGG:001 -TGTCATTTTTGCCAGCTGGTGTAGACAAAATCAATAACTGCCCCCTGACCTTTGCCCCCTGCCTTTAAGGTCCAGTGTGGGGATCGCGTGCCAGCCCTCAGGCCGTGACCCCGTGATAGGTGTTTGCTTTGGACCCGGGTCATTGGGGTCAGGCATTGAGGGTTAAAGTGGTTTTATAA ->HepG2_negatives_TFs_C:SLEA_hg18:chr2:210861483-210861650|103:V_HNF4_Q6:AAGGTCCAG;155:V_HNF4_Q6:AAGGTCCAG:001 -TGTCATTTTTGCCAGCTGGTGTAGATGTTAAAAATTACTGTCACTCTTCCGCCTGCTACTTTATTTTGCACCTGCTGTTACTTGAGTTACAGGCATTTCACACATGGTAAGGTCCAGAGGTTAGTTCCCATGACAATGTACTAGATATTATCCCATTGAGAAGGTCCAGGGTTTTATAA ->HepG2_negatives_TFs_C:SLEA_hg18:chr2:210861483-210861650|44:V_COUPTF_Q6:CCCCCTGACCTTTGCCCCCTGCC;70:V_PPARA_02:CCGGGTCATTGGGGTCAGG;92:V_Rxra_UP:GGCCGTGACCCCGTGAT;112:V_HNF3ALPHA_Q6:TGTTTGCTTTG:001 -TGTCATTTTTGCCAGCTGGTGTAGATGTTAAAAATTACTGTCACTCTTCCCCCCTGACCTTTGCCCCCTGCCTGCCCGGGTCATTGGGGTCAGGATTGGCCGTGACCCCGTGATATATGTTTGCTTTGCATGACAATGTACTAGATATTATCCCATTGAGGGTTAAAGTGGTTTTATAA ->HepG2_negatives_TFs_C:SLEA_hg18:chr2:210861483-210861650|21:V_HNF3ALPHA_Q6:TGTTTGCTTTG;34:V_HNF3ALPHA_Q6:TGTTTGCTTTG;47:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC;68:V_HNF3ALPHA_Q6:TGTTTGCTTTG;81:V_HNF3ALPHA_Q6:TGTTTGCTTTG;94:V_HNF3ALPHA_Q6:TGTTTGCTTTG;107:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC;128:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC:001 -TGTCATTTTTGCCAGCTGGTGTAGATTGTTTGCTTTGCTTGTTTGCTTTGGCGGGGATCGCGTGCCAGCCCCTTGTTTGCTTTGAGTGTTTGCTTTGTCTGTTTGCTTTGTTGGGGATCGCGTGCCAGCCCGAGGGGATCGCGTGCCAGCCCCCATTGAGGGTTAAAGTGGTTTTATAA ->HepG2_negatives_TFs_C:SLEA_hg18:chr2:210861483-210861650|54:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC;76:V_COUPTF_Q6:CCCCCTGACCTTTGCCCCCTGCC;102:V_HNF3ALPHA_Q6:TGTTTGCTTTG:001 -TGTCATTTTTGCCAGCTGGTGTAGATGTTAAAAATTACTGTCACTCTTCCGCCTGCTACGGGGATCGCGTGCCAGCCCTACCCCCCTGACCTTTGCCCCCTGCCTGGTGTTTGCTTTGGGTTAGTTCCCATGACAATGTACTAGATATTATCCCATTGAGGGTTAAAGTGGTTTTATAA ->HepG2_negatives_TFs_C:SLEA_hg18:chr2:210861483-210861650|71:V_HNF4_Q6:AAGGTCCAG;85:V_HNF3ALPHA_Q6:TGTTTGCTTTG:001 -TGTCATTTTTGCCAGCTGGTGTAGATGTTAAAAATTACTGTCACTCTTCCGCCTGCTACTTTATTTTGCACCTGCTAAGGTCCAGGTTACTGTTTGCTTTGACATGGTAATTTAATAAGGTTAGTTCCCATGACAATGTACTAGATATTATCCCATTGAGGGTTAAAGTGGTTTTATAA ->HepG2_negatives_TFs_C:SLEA_hg18:chr2:210861483-210861650|24:V_HNF3ALPHA_Q6:TGTTTGCTTTG;55:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC;94:V_AHRARNT_02:GGGGATCGCGTGCCAGCCC;133:V_HNF3ALPHA_Q6:TGTTTGCTTTG:001 -TGTCATTTTTGCCAGCTGGTGTAGATGTTTGTTTGCTTTGTCACTCTTCCGCCTGCTACTGGGGATCGCGTGCCAGCCCACTTGAGTTACAGGCATTTCGGGGATCGCGTGCCAGCCCGGTTAGTTCCCATGACAATGTGTTTGCTTTGATCCCATTGAGGGTTAAAGTGGTTTTATAA ->HepG2_negatives_TFs_C:SLEA_hg18:chr2:210861483-210861650|19:V_COUPTF_Q6:CCCCCTGACCTTTGCCCCCTGCC;62:V_HNF6_Q6:CAAAATCAATAA;94:V_COUPTF_Q6:CCCCCTGACCTTTGCCCCCTGCC;137:V_HNF6_Q6:CAAAATCAATAA:001 -TGTCATTTTTGCCAGCTGGTGTAGCCCCCTGACCTTTGCCCCCTGCCTCCGCCTGCTACTTTATTTTCAAAATCAATAAACTTGAGTTACAGGCATTTCCCCCCTGACCTTTGCCCCCTGCCAGTTCCCATGACAATGTACTCAAAATCAATAAATTGAGGGTTAAAGTGGTTTTATAA +version https://git-lfs.github.com/spec/v1 +oid sha256:10b7ac6bcd2d22f9007b27534175e9dfe1b04486005a484d0985c92fdfc43aae +size 48802 diff --git a/src/expand_variants.py b/src/expand_variants.py new file mode 100644 index 0000000..8fd7d1f --- /dev/null +++ b/src/expand_variants.py @@ -0,0 +1,67 @@ +import argparse +import csv +import itertools +import typing +import pandas as pd +import Bio.SeqIO as bsio +import Bio.Seq as bseq + + +class Namespace(argparse.Namespace): + in_design: typing.TextIO + in_labels: typing.TextIO + variants: typing.TextIO + out_design: typing.TextIO + out_labels: typing.TextIO + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('in_design', type=argparse.FileType('r')) + parser.add_argument('in_labels', type=argparse.FileType('r')) + parser.add_argument('variants', type=argparse.FileType('r')) + parser.add_argument('out_design', type=argparse.FileType('w')) + parser.add_argument('out_labels', type=argparse.FileType('w')) + args = parser.parse_args(namespace=Namespace()) + + # Read the design + design = pd.DataFrame([[fa.name, fa.seq] for fa in bsio.parse(args.in_design, 'fasta')], columns=['name', 'sequence']) + + # Read the labels + labels = pd.read_table(args.in_labels, names=['name', 'label']) + + # Read the mutations + mutations = pd.read_table(args.variants, names=['name', 'pos', 'reference', 'alternate'], header=0) + + # Merge them all together + merged_df = design.merge(labels, how='outer', on='name').merge(mutations, how='outer', on='name').fillna(value={'pos': '', 'reference': '', 'alternate': '', 'label': 'unknown'}) + + # Split mutations + out_labs = [] + out_fa = [] + + def split_rows(row): + if row['pos']: + variants = [(int(x), y, z) for x, y, z in zip(row['pos'].split(','), row['reference'].split(','), row['alternate'].split(','))] + for is_mut in itertools.product(range(2), repeat=len(variants)): + seq = row['sequence'] + name = row['name'] + for flag, (pos, ref, alt) in zip(is_mut, variants): + seq = seq[:pos - 1] + [ref, alt][flag] + seq[pos:] + name += f':{pos}={[ref, alt][flag]}' + out_labs.append([name, row['label']]) + out_fa.append(bsio.SeqRecord(seq, name, name, row['label'])) + else: + seq = row['sequence'] + out_labs.append([row['name'], row['label']]) + out_fa.append(bsio.SeqRecord(seq, row['name'], row['name'], row['label'])) + + merged_df.apply(split_rows, axis=1) + + # Write output + bsio.write(out_fa, args.out_design, 'fasta') + pd.DataFrame(out_labs).to_csv(args.out_labels, sep='\t', header=False, index=False, quoting=csv.QUOTE_NONE) + + +if __name__ == '__main__': + main() diff --git a/src/make_master_tables.R b/src/make_master_tables.R index ce6d20b..da0fdba 100644 --- a/src/make_master_tables.R +++ b/src/make_master_tables.R @@ -23,7 +23,7 @@ for (i in 5:(length(args)-replicates)){ file=args[i] rep=args[i+replicates] - tab=as.data.frame(read.table(file,header=TRUE)) + tab=as.data.frame(read.delim(file,header=TRUE)) filter_tab=tab[tab$n_obs_bc >= thresh,] diff --git a/src/merge_label.py b/src/merge_label.py index 159a4d0..04a1804 100644 --- a/src/merge_label.py +++ b/src/merge_label.py @@ -111,7 +111,7 @@ counts.insert(0, 'Label', label) #print(counts) -mask=(counts['Barcode'].str.len() == 15) +mask=(counts['Barcode'].str.len() == 16) # Scott change - hardcoded barcode length is a bad idea, but so is spending a year refactoring this. #print(mask) counts[mask] counts_filtered_t = counts[mask] @@ -156,7 +156,7 @@ res.insert(0, 'dna_count',(res.dna_sum+1) / (res.n_obs_bc+1) / dna_total * 10**6) res.insert(1, 'rna_count',(res.rna_sum+1) / (res.n_obs_bc+1) / rna_total * 10**6) -print(res_t) +#print(res_t) print('test') print(res.head()) res = res[['dna_count','rna_count','n_obs_bc']] diff --git a/src/nf_ori_map_barcodes.py b/src/nf_ori_map_barcodes.py index 72cb8fd..3e50710 100644 --- a/src/nf_ori_map_barcodes.py +++ b/src/nf_ori_map_barcodes.py @@ -13,6 +13,13 @@ ##CMD python ori_map_barcodes.py data_assoc/ #verify trailing arguments + +import argparse +parser = argparse.ArgumentParser() +parser.add_argument('--mutations') +args, argv = parser.parse_known_args() +mutations_f = args.mutations + project_dir=sys.argv[1] fastq_in=sys.argv[2] n_fastq_f=sys.argv[3] @@ -41,12 +48,47 @@ #f=open(n_fastq_f, 'r') #n_fastq=f.read().replace('\n','') + + with open(n_bam_f, 'r') as file: n_bam = file.read().replace('\n','') +if mutations_f is None: + mutations = None +else: + print('mutations') + print(mutations_f) + mutations = {} + with open(mutations_f) as fp: + next(fp) + for row in fp: + name, positions, ref_bases, alt_bases = row.split() + mutations[name] = [ + [int(pos) for pos in positions.split(',')], + ref_bases.upper().split(','), + alt_bases.upper().split(',') + ] + print('counts') print(n_bam) print(n_fastq) + +def augment_rname_mutation(read): + rname = read.reference_name + if mutations is not None and isinstance(rname, str) and rname in mutations: + pos, ref, alt = mutations[rname] + for p, r, a in zip(pos, ref, alt): + try: + qidx = read.get_reference_positions().index(p - 1) + except ValueError: + return None + if read.query_alignment_sequence[qidx].upper() in (r, a): + rname += f':{p}={read.query_alignment_sequence[qidx]}' + else: # read has unexpected mutation + return None + + return rname + #map coords to BCs and filter based on given read/map quality and an exact cigar string if provided def get_coords_to_barcodes(fastq_in, n_fastq,bamfile,n_bam,mapq=30,baseq=30,cigar=''): coords_to_barcodes_fn = f'{prefix}_coords_to_barcodes.pickle' @@ -70,9 +112,10 @@ def get_coords_to_barcodes(fastq_in, n_fastq,bamfile,n_bam,mapq=30,baseq=30,ciga query_to_coords = {} bad_pairs = 0 poor_quality = 0 + unknown_mutation = 0 print('start') #get names of the aligned reads that havae a high enough quality and match sequence - for i, read in tqdm(enumerate(bam), 'paired-end reads', total = n_bam_records): + for i, read in tqdm(enumerate(bam), 'paired-end reads', total = n_bam_records): # type: int, pysam.AlignedSegment #print(read.cigarstring) #print(read) #print(read.reference_name) @@ -85,7 +128,7 @@ def get_coords_to_barcodes(fastq_in, n_fastq,bamfile,n_bam,mapq=30,baseq=30,ciga # bad_pairs += 1 # continue - if read.mapping_quality < int(mapq): + if not read.has_tag('XA') and read.mapping_quality < int(mapq): poor_quality += 1 continue #only save exact matches and high read quality @@ -94,15 +137,20 @@ def get_coords_to_barcodes(fastq_in, n_fastq,bamfile,n_bam,mapq=30,baseq=30,ciga ## filter reads with too low of map quality and exact cigar match if provided else: + qname = read.query_name + rname = augment_rname_mutation(read) + if rname is None: + unknown_mutation += 1 + continue if cigar == "": - if isinstance(read.reference_name, str): - query_to_coords[read.query_name] = read.reference_name + if isinstance(rname, str): + query_to_coords[qname] = rname else: if read.cigarstring==cigar: - if isinstance(read.reference_name, str): - query_to_coords[read.query_name] = read.reference_name + if isinstance(rname, str): + query_to_coords[qname] = rname - print(f'bad pairs: {bad_pairs} poor quality: {poor_quality}') + print(f'bad pairs: {bad_pairs} poor quality: {poor_quality} unknown mutation: {unknown_mutation}') #print(query_to_coords) #print(read.reference_name) #print(fastq)