Genome Browser tracks¶
In [4]:
%%capture
# load the magic extension and imports
%reload_ext nextcode
import pandas as pd
import GOR_query_helper as GQH
%env LOG_QUERY=1
project = "test-hg19"
%env GOR_API_PROJECT={project}
In [5]:
%%gor manifest <<
nor SubjectReports/Participants.rep.link
| select pn,kind,affected,sex,study_name
| replace affected if(affected='yes','affected','unaffected')
| pivot -gc study_name kind -v index,father,mother -e ''
| hide index_affected,father_sex,mother_sex
Query ran in 0.46 sec Query fetched 776 rows in 0.05 sec (total time 0.50 sec)
In [6]:
manifest
Out[6]:
study_name | index_pn | index_sex | father_pn | father_affected | mother_pn | mother_affected | |
---|---|---|---|---|---|---|---|
0 | GCA825341 | TEST_2941719 | female | NaN | NaN | NaN | NaN |
1 | GCA802054 | TEST_2909078 | male | NaN | NaN | NaN | NaN |
2 | GCA863723 | TEST_2992440 | female | NaN | NaN | NaN | NaN |
3 | GCA820675 | TEST_2936504 | female | NaN | NaN | NaN | NaN |
4 | GCA833767 | TEST_2953629 | female | NaN | NaN | NaN | NaN |
... | ... | ... | ... | ... | ... | ... | ... |
771 | GCA852666 | TEST_2977861 | male | NaN | NaN | NaN | NaN |
772 | GCA792495 | TEST_2901892 | male | TEST_2895471 | unaffected | TEST_2901923 | unaffected |
773 | GCA817697 | TEST_2925474 | female | NaN | NaN | TEST_2919688 | unaffected |
774 | GCA819745 | TEST_2931900 | male | TEST_2931924 | unaffected | TEST_2931915 | unaffected |
775 | GCA855762 | TEST_2983016 | female | TEST_2983018 | unaffected | TEST_2983017 | unaffected |
776 rows × 7 columns
In [7]:
def global_block():
return """
<tracks>
<global>
gorPath = ""
lessthan = chr(60)
# Begin global definitions for template study_tracks.gbt
bamFeatureFilters = [['Expand flags', '| bamflag -v | prefix readispaired- flag', 'Shows additional information on the selected reads'],
['Strand eror', ' | bamflag -v | prefix readispaired- flag | where flag_qStrand = flag_mStrand', 'Find reads where both items in a pair are reported on the same strand'],
['Forward_Reverse', 'gor_pos_file | bamflag -v | where qStrand = 1 | pileup | select 1,2,depth-Tdepth | calc strand "for" | merge <(gor_pos_file | bamflag -v | where qStrand = 0 | pileup | select 1,2,depth-Tdepth | calc strand "rev") | pivot strand -v for,rev -e 0', 'Pileup for forward and reverse strand']]
bamPath = 'source/bam/'
bamProperties = '{bamReadHeight: 8, bamPairReads: 0, bamShowToolTips: 2, bamCutOff: 20}'
gorPath = ''
varPath = 'source/var/'
gtPath = 'gt/'
sumPath = 'sum/'
myRefVal = '1e-5' # Use None to have no ref-line
myPainter = 'Manhattan' # Use Manhattan or Scatter
myRange = 'Local auto' # Use a number, Global auto or Local auto
myColor = 'Gray'
myGridOn = 'true'
myHeight = 50 # Use 50, 200, etc.
varreports = [['Annotation Report', '| varjoin #VEP# -l -r | varjoin ref/variants/dbsnp.gorz -l -r | varjoin ref/variants/freq_max.gorz -l -r ', 'Variants annotated with functional impact and frequency data']]
myVarPainterClass = 'com.decode.genomebrowser.beans.tracks.painters.VariationPainter'
covRefVal = '10' # Use None to have no ref-line
covMaxHeight = '50'
covPainter = 'Manhattan' # Use Manhattan or Scatter
covRange = 'Local auto' # Use a number, Global auto or Local auto
covColor = 'Gray'
covGridOn = 'true'
covHeight = 75 # Use 50, 200, etc.
covStart = 1 # Zero bases shit
covStop = 2
covValueCol = 3
covMaxRange = 1000000
covPath = "source/cov/"
def levelcmd(chr, posfromi, posto, level_range, pn):
import math
posfrom = posfromi - 10000
# region_size = 1000*math.pow(4,math.ceil(math.log(1.0+lr/1000)/math.log(4)))
region_size = covMaxRange*2
region_start = math.floor(region_size*math.floor(posfrom/region_size))
region_end = math.floor(region_size*(1+math.ceil(posto/region_size)))
coverage_file = '<('+covPath+'segment_cov.gord -f '+pn+')'
theFile = coverage_file + " | segspan -gc depth -maxseg 1"
cmd = "level=7 create xxx = gor -p %s:%i-%i %s | group 100 -avg -ic depth; gor -p %s:%i-%i [xxx] | group 10000 -avg -fc avg_depth | rename avg_avg_depth avg_depth" %(chr, region_start, region_end, theFile, chr, posfrom, posto)
if (less(level_range, 1000)):
theFile = coverage_file + " | segspan -gc depth -maxseg 1"
cmd = "level=0 gor -p %s:%i-%i %s | select 1,bpStart,bpStop,depth" %(chr, posfrom, posto, theFile)
elif (less(level_range, 5000)):
theFile = coverage_file + " | segspan -gc depth -maxseg 1"
cmd = "level=1 gor -p %s:%i-%i %s | group 5 -avg -ic depth" %(chr, posfrom, posto, theFile)
elif (less(level_range, 10000)):
theFile = coverage_file + " | segspan -gc depth -maxseg 1"
cmd = "level=2 gor -p %s:%i-%i %s | group 10 -avg -ic depth" %(chr, posfrom, posto, theFile)
elif (less(level_range, 50000)):
theFile = coverage_file + " | segspan -gc depth -maxseg 1"
cmd = "level=3 gor -p %s:%i-%i %s | group 50 -avg -ic depth" %(chr, posfrom, posto, theFile)
elif (less(level_range, 100000)):
theFile = coverage_file + " | segspan -gc depth -maxseg 99"
cmd = "level=4 create xxx = gor -p %s:%i-%i %s | group 100 -avg -ic depth ; gor -p %s:%i-%i [xxx]" %(chr, region_start, region_end, theFile, chr, posfrom, posto)
elif (less(level_range, 500000)):
theFile = coverage_file + " | segspan -gc depth -maxseg 99"
cmd = "level=5 create xxx = gor -p %s:%i-%i %s | group 100 -avg -ic depth | join -segseg %s | calc weight (min(bpstop, bpstopx)-max(bpstart,bpstartx))*depth | group 1 -gc bpstop -sum -ic weight | calc depth round(sum_weight/100.0) | select 1-3,depth; gor -p %s:%i-%i [xxx]" %(chr, region_start, region_end, theFile, coverage_file, chr, posfrom, posto)
elif (less(level_range, 1000000)):
theFile = coverage_file + " | segspan -gc depth -maxseg 99"
cmd = "level=6 create xxx = gor -p %s:%i-%i %s | group 100 -avg -ic depth | join -segseg %s | calc weight (min(bpstop, bpstopx)-max(bpstart,bpstartx))*depth | group 1 -gc bpstop -sum -ic weight | calc depth round(sum_weight/100.0) | select 1-3,depth; gor -p %s:%i-%i [xxx]" %(chr, region_start, region_end, theFile, coverage_file, chr, posfrom, posto)
return cmd
# End global definitions for template study_tracks.gbt
</global>
"""
In [13]:
def case_block(case):
return f"""
<track name="Var summary" type="SNVs" visible="true" selected="false">
ffiletrack(featureFilters=varreports,height=50 ,filename='studies/{case}/tr3/gregor_step2.gorz | where gdx_view != "" | select 1,2,Reference,Call,zygosity_proband,GDX_classification,VEP_Impact,spliceai_max_impact,father_GT,auto_evidence,auto_classification,MULTI_rank,MIMI_rank,GDX_view', name='Pipeline Searches', isheader='true', idcol=-1, chrcol=0, startcol=1, stopcol=1, valuecol=-1, primdim='', color='Cyan',painter='Manhattan',painterClass=myVarPainterClass,grid='true',scale='Linear',aggregate='Min', max='Local auto',min='0',refline='None',stacking='true'),
ffiletrack(featureFilters=varreports,height=50 ,filename='studies/{case}/tr3/gregor_step2.gorz | select 1,2,Reference,Call,zygosity_proband,GDX_classification,VEP_Impact,spliceai_max_impact,father_GT,auto_evidence,auto_classification,MULTI_rank,MIMI_rank,GDX_view', name='All filtered (Gregor2)', isheader='true', idcol=-1, chrcol=0, startcol=1, stopcol=1, valuecol=-1, primdim='', color='Cyan',painter='Manhattan',painterClass=myVarPainterClass,grid='true',scale='Linear',aggregate='Min', max='Local auto',min='0',refline='None',stacking='true')
</track>
<track name="Key Cand Genes" type="Genes" visible="true" selected="true">
gorpipetrack(height=myHeight,command="create x = gor studies/{case}/genes_hpo_phrank.gor | rank genome pheno_score -o desc | rank genome norm_pheno_score -o desc | where 25 >= rank_pheno_score or 25 >= rank_norm_pheno_score; gor -p $chr [x]",startcol = 1, stopcol = 2, valuecol = -1, idcol=3, maxRange=-1,name='Key Cand Genes', grid='false',color='Red',painter='Box',stacking='true',featureFilters=commentfilters)
</track>
<track name="Cand Genes" type="Genes" visible="true" selected="false">
gorpipetrack(height=myHeight,command="gor -p $chr studies/{case}/genes_hpo_phrank.gor",startcol = 1, stopcol = 2, valuecol = -1, idcol=3, maxRange=-1,name='Candidate Genes', grid='false',color=myColor,painter='Box',stacking='true',featureFilters=commentfilters)
</track>
"""
In [9]:
def sample_block(pn,relationship,visible):
return f"""
<track name="{relationship}-{pn}" type="BAM" visible="{visible}" selected="false">
bam_gor_track(bamProperties, file=bamPath+'bam.gord -f {pn}')
</track>
<track name="{relationship}-{pn}" type="SNVs" visible="false" selected="false">
ffiletrack(featureFilters=varreports,height=50 ,filename=varPath+'wgs_varcalls.gord -f {pn}', name='{relationship}-{pn} Variations', isheader='true', idcol=-1, chrcol=0, startcol=1, stopcol=1, valuecol=-1, primdim='', color='Cyan',painter='Manhattan',painterClass=myVarPainterClass,grid='true',scale='Linear',aggregate='Min', max='Local auto',min='0',refline='None',stacking='true')
</track>
<track name="{relationship}-{pn}" type="Coverage" visible="false" selected="false">
gorpipetrack(height=covHeight,command='$levelcmd("$chr",$pos_from,$pos_to,$range,"{pn}")', startcol = covStart, stopcol = covStop, valuecol = covValueCol, maxRange=covMaxRange,name='{relationship}-{pn} Coverage',color=covColor,painter=covPainter,grid=covGridOn,scale='Linear',aggregate='Avg', max=covMaxHeight,min='0',refline=covRefVal,stacking='true')
</track>
<track name="{relationship}-{pn}" type="CNVs" visible="{visible}" selected="false">
ffiletrack(featureFilters=varreports,height=15 ,filename=varPath+'cnv_varcalls.gord -fs -f {pn} | where caller != "SCRAMBLE" | select 1-svtype | where contains(alt,"DEL")', name='DEL - {relationship}-{pn}', isheader='true', idcol=-1, chrcol=0, startcol=1, stopcol=2, valuecol=-1, color='Blue',painter='Box',stacking='true') ,ffiletrack(featureFilters=varreports,height=15 ,filename=varPath+'cnv_varcalls.gord -fs -f {pn} | where caller != "SCRAMBLE" | select 1-svtype | where contains(alt,"DUP")', name='DUP', isheader='true', idcol=-1, chrcol=0, startcol=1, stopcol=2, valuecol=-1, primdim='', color='Green',painter='Box',stacking='true') ,ffiletrack(featureFilters=varreports,height=15 ,filename=varPath+'cnv_varcalls.gord -fs -f {pn} | where caller != "SCRAMBLE" | select 1-svtype | where contains(alt,"INS")', name='INS', isheader='true', idcol=-1, chrcol=0, startcol=1, stopcol=2, valuecol=-1, primdim='', color='Red',painter='Box',stacking='true') ,ffiletrack(featureFilters=varreports,height=15 ,filename=varPath+'cnv_varcalls.gord -fs -f {pn} | where caller = "SCRAMBLE" | select 1-svtype,caller', name='Scramble', isheader='true', idcol=-1, chrcol=0, startcol=1, stopcol=2, valuecol=-1, primdim='', color='Orange',painter='Box',stacking='true')
</track>
"""
In [10]:
project
Out[10]:
'test-hg19'
In [14]:
import os;
def NaN2E(x):
if str(x) == 'nan' or str(x) == 'NaN': return ''
else: return x
for i in range(0,len(manifest)):
gb = global_block()
cb = case_block(manifest.at[i,'study_name'])
index = f"""{manifest.at[i,'index_pn']}"""
father = f"""{NaN2E(manifest.at[i,'father_pn'])}"""
mother = f"""{NaN2E(manifest.at[i,'mother_pn'])}"""
sb = sample_block(index,'index','true')
if father != '':
sb += sample_block(father,'father','false')
if mother != '':
sb += sample_block(mother,'mother','false')
gb += cb
gb += sb
gb += '\n\n</tracks>\n'
#print(gb)
gb_path = f"""/mnt/csa/env/dev/projects/{project}/studies/"""+manifest.at[i,'study_name']+'/tracks_new.gb'
#print(gb_path)
os.makedirs(os.path.dirname(gb_path), exist_ok=True)
with open(gb_path, 'w') as f:
f.write(gb)
In [ ]: