SMN1 calling from BAM files¶

SMA_caller

In [31]:

%%capture
# load the magic extension and imports
%reload_ext nextcode
import pandas as pd
import GOR_query_helper as GQH
%env LOG_QUERY=1

project = "gdx-demo"
%env GOR_API_PROJECT={project}

SMA caller¶

This notebook shows how the code in pipa/sma.py can be implemented in GOR to implement a caller that takes BAM/CRAM input and returns the SMA status.

In [32]:

qh = GQH.GOR_Query_Helper()
mydefs = ""

In [33]:

mydefs = qh.add_many("""
def #SMN1_POS# = '70247773';
def #SMN2_POS# = '69372353';
def #bamfile# = source/samples/2796000-2796999/2796799/DEMO_2796799.bam;
""")

We can fetch the reads needed from a BAM file in several ways. First we do it with two range seek and then with a join of two sites.

In [34]:

%%gor
$mydefs
gor -p chr5:70245773-70249773 #bamfile#
| merge <(gor -p chr5:69370353-69374353 #bamfile#)

Query ran in 0.90 sec
Query fetched 475 rows in 0.32 sec (total time 1.22 sec)

Out[34]:

	Chromo	Pos	End	QName	Flag	MapQ	Cigar	MD	MRNM	MPOS	ISIZE	SEQ	QUAL	TAG_VALUES
0	chr5	69370484	69370633	LH00291:112:22FNNFLT3:3:2164:49219:7297	163	60	150M	NaN	chr5	69370578	245	TAAGGTATAAGCGGGCTCAGGAACATCATTGGACATACTGAAAGAA...	=;<?4>???@DD@BBDCB2DBAABB<CCAAECBCCAACDEBBBEBB...	XA=chr5,+70245909,150M,3; MC=151M BD=LLMNMMNLM...
1	chr5	69370490	69370639	LH00291:112:22FNNFLT3:6:2186:32418:3243	163	60	150M	NaN	chr5	69370588	249	ATAAGCGGGCTCAGGAACATCATTGGACATACTGAAAGAAGAAAAA...	=;;<AB?AACCBBDBAABB@BB@@DCBCCAACDEBBBEBBEBBBBB...	XA=chr5,+70245915,150M,2; MC=151M BD=LLNMLNLLD...
2	chr5	69370578	69370728	LH00291:112:22FNNFLT3:3:2164:49219:7297	83	60	151M	NaN	chr5	69370484	-245	GGGAGGCCAAGGCAGGCGAATCACCTGAAGTCGGGAGTTCCAGATC...	>=??BD,DCEEDCEACAABEACECDBECCADDDEDCCDFDCBCF...	XA=chr5,-70246003,151M,2; MC=150M BD=JMMNOOONN...
3	chr5	69370588	69370738	LH00291:112:22FNNFLT3:6:2186:32418:3243	83	60	151M	NaN	chr5	69370490	-249	GGCAGGCGAATCACCTGAAGTCGGGAGTTCCAGATCAGCCTGACCA...	>????C@B@ABEACECCADCBACCCDDCCCFDDBCFEFDFDCADFB...	XA=chr5,-70246013,151M,3; MC=150M BD=OOONOOOML...
4	chr5	69371122	69371271	LH00291:112:22FNNFLT3:4:1278:22340:17182	99	0	150M	NaN	chr5	69371167	196	GCTCTGCCTCCCAGGTTCACACCATTCTCTTGCCTCAGCCTCCCGA...	>@?>ACDBCBCCCECA@CCCBBBCAACDDDAFFDEDDFFCE6DDAB...	XA=chr5,+70246547,150M,0; MC=151M BD=LLOMNPPPN...
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
470	chr5	70248294	70248443	LH00291:112:22FNNFLT3:7:1194:11624:22454	83	0	150M	NaN	chr5	70248112	-332	GACACCACTAAAGAAACGATCAGACAGATCTGGAATGTGAAGCGTT...	=;@=AD@DAAADCAAAACABEDCAEEDBBFCDDBBDDDDBDEBDCA...	MC=151M BD=OJKNOLPPNENNMEMNNOONOMOJOMOONOMMLKM...
471	chr5	70248387	70248537	LH00291:112:22FNNFLT3:3:1129:4208:11495	81	8	151M	NaN	chr5	70247927	-611	AGAAAAAAGGAAGTGGAATGGGTAACTCTTCTTGATTAAAAGTTAT...	?*;,=?@CBCADCCCC,ACC)C+AAEC1CBFB+DBCBBBBE+BBBD...	XA=chr5,-69372967,151M,1; MC=150M BD=MLDDDENON...
472	chr5	70249023	70249128	LH00291:112:22FNNFLT3:3:1179:29543:4733	163	49	106M	NaN	chr5	70249023	106	GCCCACGCTGGAGTGCAGTGGTGCAATCTCAGCTCACTGCAACCTC...	0?==?@?CBCBAD@DDBD@D5@DDBBACDCCEEDCCCDEECBCCDC...	XA=chr5,+69373603,106M,2; MC=106M BD=LLOJKILMN...
473	chr5	70249023	70249128	LH00291:112:22FNNFLT3:3:1179:29543:4733	83	49	106M	NaN	chr5	70249023	-106	GCCCACGCTGGAGTGCAGTGGTGCAATCTCAGCTCACTGCAACCTC...	FCDFBBFF,DDECDF6E+DDDDF6BB/FCFEF2/FB2DF6BADFC3...	XA=chr5,-69373603,106M,2; MC=106M BD=NENKNMOOM...
474	chr5	70249703	70249853	LH00291:112:22FNNFLT3:2:1202:23329:25017	99	0	151M	NaN	chr5	70249794	242	ACAGAAAACATCAAAGCCACCACACTACCTGGCTGGATTTATCCTA...	>>>@?@AABBACCBBEDCCCBBBCCDACDDEDFEFDCBBABBDDDA...	XA=chr5,+69374283,151M,0; MC=151M BD=LLJOOMEEL...

475 rows × 14 columns

and now with the join method that is more easily extended to multi-site calling:

In [35]:

%%gor
$mydefs

create #sma_pos# = norrows 1 | calc chrom 'chr5' | calc pos '69372353,70247773' | split pos | select chrom,pos;

gor [#sma_pos#] | join -snpsnp -ir -f 2000 #bamfile#

Query ran in 0.18 sec
Query fetched 475 rows in 0.74 sec (total time 0.93 sec)

Out[35]:

	Chromo	Pos	End	QName	Flag	MapQ	Cigar	MD	MRNM	MPOS	ISIZE	SEQ	QUAL	TAG_VALUES
0	chr5	69370484	69370633	LH00291:112:22FNNFLT3:3:2164:49219:7297	163	60	150M	NaN	chr5	69370578	245	TAAGGTATAAGCGGGCTCAGGAACATCATTGGACATACTGAAAGAA...	=;<?4>???@DD@BBDCB2DBAABB<CCAAECBCCAACDEBBBEBB...	XA=chr5,+70245909,150M,3; MC=151M BD=LLMNMMNLM...
1	chr5	69370490	69370639	LH00291:112:22FNNFLT3:6:2186:32418:3243	163	60	150M	NaN	chr5	69370588	249	ATAAGCGGGCTCAGGAACATCATTGGACATACTGAAAGAAGAAAAA...	=;;<AB?AACCBBDBAABB@BB@@DCBCCAACDEBBBEBBEBBBBB...	XA=chr5,+70245915,150M,2; MC=151M BD=LLNMLNLLD...
2	chr5	69370578	69370728	LH00291:112:22FNNFLT3:3:2164:49219:7297	83	60	151M	NaN	chr5	69370484	-245	GGGAGGCCAAGGCAGGCGAATCACCTGAAGTCGGGAGTTCCAGATC...	>=??BD,DCEEDCEACAABEACECDBECCADDDEDCCDFDCBCF...	XA=chr5,-70246003,151M,2; MC=150M BD=JMMNOOONN...
3	chr5	69370588	69370738	LH00291:112:22FNNFLT3:6:2186:32418:3243	83	60	151M	NaN	chr5	69370490	-249	GGCAGGCGAATCACCTGAAGTCGGGAGTTCCAGATCAGCCTGACCA...	>????C@B@ABEACECCADCBACCCDDCCCFDDBCFEFDFDCADFB...	XA=chr5,-70246013,151M,3; MC=150M BD=OOONOOOML...
4	chr5	69371122	69371271	LH00291:112:22FNNFLT3:4:1278:22340:17182	99	0	150M	NaN	chr5	69371167	196	GCTCTGCCTCCCAGGTTCACACCATTCTCTTGCCTCAGCCTCCCGA...	>@?>ACDBCBCCCECA@CCCBBBCAACDDDAFFDEDDFFCE6DDAB...	XA=chr5,+70246547,150M,0; MC=151M BD=LLOMNPPPN...
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
470	chr5	70248294	70248443	LH00291:112:22FNNFLT3:7:1194:11624:22454	83	0	150M	NaN	chr5	70248112	-332	GACACCACTAAAGAAACGATCAGACAGATCTGGAATGTGAAGCGTT...	=;@=AD@DAAADCAAAACABEDCAEEDBBFCDDBBDDDDBDEBDCA...	MC=151M BD=OJKNOLPPNENNMEMNNOONOMOJOMOONOMMLKM...
471	chr5	70248387	70248537	LH00291:112:22FNNFLT3:3:1129:4208:11495	81	8	151M	NaN	chr5	70247927	-611	AGAAAAAAGGAAGTGGAATGGGTAACTCTTCTTGATTAAAAGTTAT...	?*;,=?@CBCADCCCC,ACC)C+AAEC1CBFB+DBCBBBBE+BBBD...	XA=chr5,-69372967,151M,1; MC=150M BD=MLDDDENON...
472	chr5	70249023	70249128	LH00291:112:22FNNFLT3:3:1179:29543:4733	163	49	106M	NaN	chr5	70249023	106	GCCCACGCTGGAGTGCAGTGGTGCAATCTCAGCTCACTGCAACCTC...	0?==?@?CBCBAD@DDBD@D5@DDBBACDCCEEDCCCDEECBCCDC...	XA=chr5,+69373603,106M,2; MC=106M BD=LLOJKILMN...
473	chr5	70249023	70249128	LH00291:112:22FNNFLT3:3:1179:29543:4733	83	49	106M	NaN	chr5	70249023	-106	GCCCACGCTGGAGTGCAGTGGTGCAATCTCAGCTCACTGCAACCTC...	FCDFBBFF,DDECDF6E+DDDDF6BB/FCFEF2/FB2DF6BADFC3...	XA=chr5,-69373603,106M,2; MC=106M BD=NENKNMOOM...
474	chr5	70249703	70249853	LH00291:112:22FNNFLT3:2:1202:23329:25017	99	0	151M	NaN	chr5	70249794	242	ACAGAAAACATCAAAGCCACCACACTACCTGGCTGGATTTATCCTA...	>>>@?@AABBACCBBEDCCCBBBCCDACDDEDFEFDCBBABBDDDA...	XA=chr5,+69374283,151M,0; MC=151M BD=LLJOOMEEL...

475 rows × 14 columns

Now we can use PILEUP to call the SNPs of interest:

In [36]:

%%gor
$mydefs

create #sma_pos# = norrows 1 | calc chrom 'chr5' | calc pos '69372353,70247773' | split pos | select chrom,pos;

gor [#sma_pos#] | join -snpsnp -ir -f 2000 #bamfile# 
| pileup -gt | where SNP = 1

Query ran in 0.14 sec
Query fetched 4 rows in 0.80 sec (total time 0.93 sec)

Out[36]:

	Chrom	Pos	RefBase	MajorAllele	SecondAllele	Chi	Depth	Cdepth	Gdepth	Tdepth	GT	pGT	LOD	GT2	SNP
0	chr5	69373667	a	G	G	0.0	1	0	1	0	G	0.3719	0.3	R	1
1	chr5	70245876	t	C	C	0.0	1	1	0	0	C	0.3774	0.3	M	1
2	chr5	70247901	T	T	G	4.7	40	0	11	29	K	1.0000	27.8	T	1
3	chr5	70249068	g	C	C	0.0	2	2	0	0	C	0.5699	0.6	M	1

To get only our locations of interest we simply intersect the pileup output:

In [38]:

%%gor
$mydefs

create #sma_pos# = norrows 1 | calc chrom 'chr5' | calc pos '69372353,70247773' | split pos | select chrom,pos;

gor [#sma_pos#] | join -snpsnp -ir -f 2000 #bamfile# 
| pileup -gt | join -snpsnp [#sma_pos#]

Query ran in 0.21 sec
Query fetched 2 rows in 0.87 sec (total time 1.09 sec)

Out[38]:

	Chrom	Pos	RefBase	MajorAllele	SecondAllele	Chi	Depth	Adepth	Cdepth	Gdepth	Tdepth	Dels	Ins	GT	pGT	LOD	GT2	SNP
0	chr5	69372353	T	T	T	0.0	9	0	0	0	9	0	0	T	1.0	5.6	W	0
1	chr5	70247773	C	C	C	0.0	44	0	44	0	0	0	0	C	1.0	16.2	M	0

and out final logic

In [47]:

mydefs = qh.add_many("""
def #base_quality_threshold# = 20;
def #smn1_count_threshold# = 10;
def #ratio_threshold# = 0.05;
def #total_count_threshold# = 10;

create #sma_pos# = norrows 1 | calc chrom 'chr5' | calc pos '69372353,70247773' | split pos | select chrom,pos;

create #pile# = gor [#sma_pos#] | join -snpsnp -ir -f 2000 #bamfile#
| pileup -gt -bq #base_quality_threshold#
| join -snpsnp [#sma_pos#];

create #output# = nor [#pile#] | calc type if(pos = 69372353, 'smn2','smn1')
| select type,depth-Tdepth
| pivot -v 'smn1','smn2' type 
| calc c_count_total = smn1_Cdepth + smn2_Cdepth
| calc t_count_total = smn2_Tdepth + smn2_Tdepth
| calc output if(c_count_total + t_count_total < #total_count_threshold# ,'SMN1/2 counts lower than threshold',
       if(t_count_total = 0,'division by zero',
       if(smn1_Cdepth <= #smn1_count_threshold# and c_count_total / t_count_total <= #ratio_threshold#,'hom call: '+str(c_count_total),'no call: '+str(c_count_total/ t_count_total))))
| select output;
""")

In [48]:

%%gor
$mydefs

nor [#output#]

Query ran in 1.32 sec
Query fetched 1 rows in 0.00 sec (total time 1.32 sec)

Out[48]:

	output
0	no call: 2.75

Here is the full code and for comparison the sma.py code below.

In [49]:

print(mydefs)

def #SMN1_POS# = '70247773';
def #SMN2_POS# = '69372353';
def #bamfile# = source/samples/2796000-2796999/2796799/DEMO_2796799.bam;
def #base_quality_threshold# = 20;
def #smn1_count_threshold# = 10;
def #ratio_threshold# = 0.05;
def #total_count_threshold# = 10;
create #sma_pos# = norrows 1 | calc chrom 'chr5' | calc pos '69372353,70247773' | split pos | select chrom,pos;
create #pile# = gor [#sma_pos#] | join -snpsnp -ir -f 2000 #bamfile#
| pileup -gt -bq #base_quality_threshold#
| join -snpsnp [#sma_pos#];
create #output# = nor [#pile#] | calc type if(pos = 69372353, 'smn2','smn1')
| select type,depth-Tdepth
| pivot -v 'smn1','smn2' type 
| calc c_count_total = smn1_Cdepth + smn2_Cdepth
| calc t_count_total = smn2_Tdepth + smn2_Tdepth
| calc output if(c_count_total + t_count_total < #total_count_threshold# ,'SMN1/2 counts lower than threshold',
       if(t_count_total = 0,'division by zero',
       if(smn1_Cdepth <= #smn1_count_threshold# and c_count_total / t_count_total <= #ratio_threshold#,'hom call: '+str(c_count_total),'no call: '+str(c_count_total/ t_count_total))))
| select output;

In [ ]:

#!/mnt/fb1/ProdBin/pipa/pyVE/pipa/bin/python

# Python script to call presence of SMN1 gene.
# Can be tested by running:
# $ pip install -r requirements.txt
# $ python pipeline/sma.py call \
#     --case FAM000001 \
#     --reference /mnt/hnas/reference/hg19/hg19.fa \
#     tests/data/alignments/1842535.smn.bam \
#     tests/data/alignments/1842014.smn.bam
import fire
import logging
import pysam
import subprocess
import sys
import vcf

from itertools import chain

# create logger
logger = logging.getLogger('sma')
logger.setLevel(logging.DEBUG)

# create console handler
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)

# create formatter and add it to the handler
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)

# add the handler to the logger
logger.addHandler(ch)

# SMN1/2 differentiating positions
SMN1_POS = {'chrom': '5', 'pos': 70247773}
SMN2_POS = {'chrom': '5', 'pos': 69372353}

# SMN1 primary transcript
SMN1_TRANSCRIPT = 'NM_000344'
SMN1_EXON_7_LENGTH = 498

# call decision map
CALL_MAP = {
    True: 'hom',
    False: 'wt',
    None: 'nc',
}


def _run_mpileup(samtools, sample, alignment, reference, position, base_quality_threshold, verbosity):
    output = f'{sample}.{position}.mpileup.vcf'

    # run samtools mpileup from differentiating positions
    cmd = [
        samtools,
        'mpileup',
        '-v',
        '-u',
        '-q',
        f'{base_quality_threshold}',
        '-Q',
        '-O',
        '-s',
        '-B',
        '-I',
        '-R',
        '-t',
        'AD',
        '-t',
        'DP',
        '-t',
        'SP',
        '-f',
        f'{reference}',
        '-r',
        f'{position}',
        '-o',
        f'{output}',
        alignment,
    ]

    # run cromwell command
    if verbosity:
        logger.info('Running samtools mpileup command:')
        logger.info(cmd)
    try:
        subprocess.run(
            cmd,
            check=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )
    except subprocess.CalledProcessError as e:
        log_err = f'{sample}.{position}.mpileup.err.log'
        with open(log_err, 'w') as fh:
            fh.write(str(e.stderr))

        log_out = f'{sample}.{position}.mpileup.out.log'
        with open(log_out, 'w') as fh:
            fh.write(str(e.stdout))

        logger.critical('Failed to run samtools mpileup. See logs:')
        logger.critical(log_out)
        logger.critical(log_err)
        exit(e.returncode)

    return output


def _get_counts(vcf_file, alt_base):
    """Given a VCF file name and alternate base,
    returns the ref and alt counts

    :param vcf_file: str of full path to vcf file name (required)
    :param alt_base: str of alt base of interest, case sensitive (required)
    """
    vcf_reader = vcf.Reader(filename=vcf_file)
    try:
        record = next(vcf_reader)
    except StopIteration:
        # assume we have no coverage if we got an empty vcf
        return 0, 0, 0

    try:
        sample_call = list(record)[0]
    except IndexError:
        # assume we have no coverage if we got a vcf with no sample calls
        return 0, 0, 0

    total_count = sample_call.data.DP
    ref_count = sample_call.data.AD[0]

    alts = record.ALT
    if alt_base in alts:
        alt_pos = alts.index(alt_base) + 1
        alt_count = sample_call.data.AD[alt_pos]
    else:
        alt_count = 0

    return ref_count, alt_count, total_count


def _make_call(samtools, alignment, alignment_type, reference, smn1_count_threshold, base_quality_threshold, ratio_threshold, verbosity, total_count_threshold=10):
    # open alignment file
    try:
        if alignment_type == 'bam':
            sam_file = pysam.AlignmentFile(alignment, 'rb')
        else:
            sam_file = pysam.AlignmentFile(alignment, 'rc')
    except (FileNotFoundError, PermissionError) as e:
        logger.error('Cannot open alignment file. %s' % e)
        return ['NA' for _ in range(5)]

    try:
        sample = sam_file.header['RG'][0]['SM']
    except KeyError:
        logger.error('Failed to get sample from alignment header.')
        return ['NA' for _ in range(5)]

    sam_file.close()

    smn1_pileup_vcf = _run_mpileup(
        samtools,
        sample,
        alignment,
        reference,
        f"chr{SMN1_POS['chrom']}:{SMN1_POS['pos']}-{SMN1_POS['pos']}",
        base_quality_threshold,
        verbosity,
    )

    smn2_pileup_vcf = _run_mpileup(
        samtools,
        sample,
        alignment,
        reference,
        f"chr{SMN2_POS['chrom']}:{SMN2_POS['pos']}-{SMN2_POS['pos']}",
        base_quality_threshold,
        verbosity,
    )

    c_count_smn1, t_count_smn1, total_count_smn1 = _get_counts(smn1_pileup_vcf, 'T')
    t_count_smn2, c_count_smn2, _ = _get_counts(smn2_pileup_vcf, 'C')

    c_count_total = c_count_smn1 + c_count_smn2
    t_count_total = t_count_smn1 + t_count_smn2

    # make the homozygous call if we have a set count of SMN2 reads
    # and the SMN1/SMN2 ratio is below the threshold
    if c_count_total + t_count_total < total_count_threshold:
        logger.info(f"{sample}: c_count_smn1:{c_count_smn1}, t_count_smn1:{t_count_smn1}")
        logger.info(f"{sample}: c_count_smn2:{c_count_smn2}, t_count_smn2:{t_count_smn2}")
        logger.info(f"{sample}: c_count_total:{c_count_total}, t_count_total:{t_count_total}")
        logger.warning(f"{sample} has a total of SMN1/2 counts lower than threshold of {total_count_threshold}")
        hom_call = None
    else:
        try:
            hom_call = c_count_smn1 <= smn1_count_threshold and c_count_total / t_count_total <= ratio_threshold
        except ZeroDivisionError:
            # make a no_call for no-coverage smn2 samples
            hom_call = None

    # add call for this sample
    return [
        str(sample),
        CALL_MAP[hom_call],
        '99',
        str(c_count_total),
        '-1' if hom_call else '0',
    ]


def call(case, *alignments, alignments_list=None, alignment_type='bam', reference=None, smn1_count_threshold=10, ratio_threshold=0.05,
         base_quality_threshold=20, split_calls=False, skip_wt=False, samtools='samtools', verbosity=0):
    """Calls total absence of a working copy of SMN1 gene linked to SMA.

    :param case: Case identifier.
    :param alignments: Alignment files.
    :param alignments_list: Optional alignment list file. Ignored if positional alignments are provided.
    :param alignment_type: alignment input file type, valid values are 'bam' or 'cram'. [default: 'bam']
    :param reference: Reference fasta.
    :param smn1_count_threshold: Minimum number of reads covering SMN2 differentiating position. [default: 10]
    :param ratio_threshold: SMN1/SMN2 ratio cutoff. Assumes no SMN1 below this number. [default: 0.05]
    :param base_quality_threshold: Read base quality threshold to use for samtools mpileup. [default: 20]
    :param split_calls: If true, outputs one sample call per line. [default: false]
    :param skip_wt: If true, doesn't output lines with all wild-type calls. [default: false]
    :param samtools: Samtools executable path. [default: samtools]
    :param verbosity: Verbose level. [default: 0]
    """
    if reference is None:
        logger.critical('Please provide reference fasta, see <--reference>')
        exit(1)
    if not alignments and alignments_list is None:
        logger.critical(
            'Please provide alignments files or a list of alignments, see <alignments> or <--alignments-list>'
        )
        exit(1)
    elif not alignments:
        try:
            with open(alignments_list, 'r') as fh:
                alignments = fh.read().splitlines()
        except (FileNotFoundError, PermissionError) as e:
            logger.critical('Cannot open alignments list file. %s' % e)
            exit(1)

    # output calls in GeneDx CNV format
    # start headers
    headers = [
        '#case',
        'genes',
        'exons',
        'cnv_type',
        'transcript',
        'chromosome',
        'start',
        'length',
    ]

    # number of sample columns to output, use just one if asked to split calls
    samples_number = 1 if split_calls else len(alignments)

    # extend headers for how many sample calls we have
    for i in range(1, samples_number + 1):
        headers.extend(
            ['%s_%d' % (header, i) for header in [
                "sample",
                "genotype",
                "score",
                "avg_coverage",
                "avg_deviance",
            ]]
        )

    # write headers
    sys.stdout.write('\t'.join(headers))
    sys.stdout.write('\n')

    # fill common columns
    common_columns = [
        str(case),
        'SMN1',
        'ex7',
        'CNV del',
        SMN1_TRANSCRIPT,
        SMN1_POS['chrom'],
        str(SMN1_POS['pos']),
        str(SMN1_EXON_7_LENGTH),
    ]

    # if asked print one sample call per line
    if split_calls:
        for alignment in alignments:
            c = _make_call(
                samtools,
                alignment,
                alignment_type,
                reference,
                smn1_count_threshold,
                base_quality_threshold,
                ratio_threshold,
                verbosity,
            )

            # skip calls where we couldn't even get the sample from input
            if c[0] == 'NA':
                continue

            # if asked skip wild-type calls
            if skip_wt and c[1] == 'wt':
                continue

            sys.stdout.write('\t'.join(common_columns + c))
            sys.stdout.write('\n')
    # else print all samples calls in one line
    else:
        calls = []
        for alignment in alignments:
            calls.append(
                _make_call(
                    samtools,
                    alignment,
                    alignment_type,
                    reference,
                    smn1_count_threshold,
                    base_quality_threshold,
                    ratio_threshold,
                    verbosity,
                )
            )

        # if asked to skip wild-type calls, only output call lines where at least on sample is not wt
        if not skip_wt or any([c[1] != 'wt' for c in calls]):
            sys.stdout.write('\t'.join(common_columns + list(chain(*calls))))
            sys.stdout.write('\n')


def main():
    fire.Fire({
        'call': call,
    })


if __name__ == '__main__':
    main()

In [ ]: