* the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like
* to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
* normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that
* the AD isn't necessarily calculated exactly for indels. Only reads which are statistically favoring one allele over the other are counted.
* Because of this fact, the sum of AD may be different than the individual sample depth, especially when there are
* many non-informatice reads.
* Because the AD includes reads and bases that were filtered by the Unified Genotyper and in case of indels is based on a statistical computation,
* one should not base assumptions about the underlying genotype based on it;
* instead, the genotype likelihoods (PLs) are what determine the genotype calls.
*/
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
public void annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
final ReferenceContext ref,
final AlignmentContext stratifiedContext,
final VariantContext vc,
final Genotype g,
final GenotypeBuilder gb,
final PerReadAlleleLikelihoodMap alleleLikelihoodMap) {
if ( g == null || !g.isCalled() )
return;
if (alleleLikelihoodMap != null && !alleleLikelihoodMap.isEmpty())
annotateWithLikelihoods(alleleLikelihoodMap, ref.getBase(), vc, gb);
else if ( vc.isSNP() && stratifiedContext != null)
annotateWithPileup(stratifiedContext, vc, gb);
}
private void annotateWithPileup(final AlignmentContext stratifiedContext, final VariantContext vc, final GenotypeBuilder gb) {
HashMap alleleCounts = new HashMap();
for ( Allele allele : vc.getAlleles() )
alleleCounts.put(allele.getBases()[0], 0);
ReadBackedPileup pileup = stratifiedContext.getBasePileup();
for ( PileupElement p : pileup ) {
if ( alleleCounts.containsKey(p.getBase()) )
alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase())+1);
}
// we need to add counts in the correct order
int[] counts = new int[alleleCounts.size()];
counts[0] = alleleCounts.get(vc.getReference().getBases()[0]);
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
counts[i+1] = alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]);
gb.AD(counts);
}
private void annotateWithLikelihoods(final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap, final byte refBase, final VariantContext vc, final GenotypeBuilder gb) {
final HashMap alleleCounts = new HashMap();
for ( final Allele allele : vc.getAlleles() ) {
alleleCounts.put(allele, 0);
}
for (Map.Entry> el : perReadAlleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
if (a.isNoCall())
continue; // read is non-informative
if (!vc.getAlleles().contains(a))
continue; // sanity check - shouldn't be needed
alleleCounts.put(a,alleleCounts.get(a)+1);
}
final int[] counts = new int[alleleCounts.size()];
counts[0] = alleleCounts.get(vc.getReference());
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
counts[i+1] = alleleCounts.get( vc.getAlternateAllele(i) );
gb.AD(counts);
}
public List getKeyNames() { return Arrays.asList(VCFConstants.GENOTYPE_ALLELE_DEPTHS); }
public List getDescriptions() {
return Arrays.asList(VCFStandardHeaderLines.getFormatLine(getKeyNames().get(0)));
}
} |