Projects >> gatk >>c98946a469d08e5121a6269c85a37853270ed4b1

Chunk
Conflicting content
 * the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like
 * to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
 * normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that
<<<<<<< HEAD
 * the AD isn't necessarily calculated exactly for indels. Only reads which are statistically favoring one allele over the other are counted.
 * Because of this fact, the sum of AD may be much lower than the individual sample depth, especially when there are
 * many non-informatice reads.
=======
 * the AD isn't necessarily calculated exactly for indels (it counts as non-reference only those indels that
 * are unambiguously informative about the alternate allele). Because of this fact and
>>>>>>> 293e682e54a6cb8c77f415f0445251121bb3e91d
 * because the AD includes reads and bases that were filtered by the Unified Genotyper, one should not base
 * assumptions about the underlying genotype based on it; instead, the genotype likelihoods (PLs) are what
 * determine the genotype calls.
Solution content
 * the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like
 * to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
 * normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that
 * the AD isn't necessarily calculated exactly for indels. Only reads which are statistically favoring one allele over the other are counted.
 * Because of this fact, the sum of AD may be different than the individual sample depth, especially when there are
 * many non-informatice reads.
 * Because the AD includes reads and bases that were filtered by the Unified Genotyper and in case of indels is based on a statistical computation,
 * one should not base assumptions about the underlying genotype based on it;
 * instead, the genotype likelihoods (PLs) are what determine the genotype calls.
 */
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {

    public void annotate(final RefMetaDataTracker tracker,
                         final AnnotatorCompatible walker,
                         final ReferenceContext ref,
                         final AlignmentContext stratifiedContext,
                         final VariantContext vc,
                         final Genotype g,
                         final GenotypeBuilder gb,
                         final PerReadAlleleLikelihoodMap alleleLikelihoodMap) {
        if ( g == null || !g.isCalled() )
            return;

        if (alleleLikelihoodMap != null && !alleleLikelihoodMap.isEmpty())
            annotateWithLikelihoods(alleleLikelihoodMap, ref.getBase(), vc, gb);
        else if ( vc.isSNP() && stratifiedContext != null)
            annotateWithPileup(stratifiedContext, vc, gb);
    }

    private void annotateWithPileup(final AlignmentContext stratifiedContext, final VariantContext vc, final GenotypeBuilder gb) {

        HashMap alleleCounts = new HashMap();
        for ( Allele allele : vc.getAlleles() )
            alleleCounts.put(allele.getBases()[0], 0);

        ReadBackedPileup pileup = stratifiedContext.getBasePileup();
        for ( PileupElement p : pileup ) {
            if ( alleleCounts.containsKey(p.getBase()) )
                alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase())+1);
        }

        // we need to add counts in the correct order
        int[] counts = new int[alleleCounts.size()];
        counts[0] = alleleCounts.get(vc.getReference().getBases()[0]);
        for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
            counts[i+1] = alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]);

        gb.AD(counts);
    }

    private void annotateWithLikelihoods(final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap, final byte refBase, final VariantContext vc, final GenotypeBuilder gb) {
        final HashMap alleleCounts = new HashMap();

        for ( final Allele allele : vc.getAlleles() ) {
            alleleCounts.put(allele, 0);
        }
        for (Map.Entry> el : perReadAlleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
            final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
            if (a.isNoCall())
                continue; // read is non-informative
            if (!vc.getAlleles().contains(a))
                continue; // sanity check - shouldn't be needed
            alleleCounts.put(a,alleleCounts.get(a)+1);

        }
        final int[] counts = new int[alleleCounts.size()];
        counts[0] = alleleCounts.get(vc.getReference());
        for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
            counts[i+1] = alleleCounts.get( vc.getAlternateAllele(i) );

        gb.AD(counts);
    }

    public List getKeyNames() { return Arrays.asList(VCFConstants.GENOTYPE_ALLELE_DEPTHS); }

    public List getDescriptions() {
        return Arrays.asList(VCFStandardHeaderLines.getFormatLine(getKeyNames().get(0)));
    }
}
File
DepthPerAlleleBySample.java
Developer's decision
Manual
Kind of conflict
Comment