43 #include <Bpp/Seq/SequenceWithAnnotationTools.h> 44 #include <Bpp/Seq/SequenceWithQuality.h> 45 #include <Bpp/Seq/Container/VectorSiteContainer.h> 46 #include <Bpp/Seq/SiteTools.h> 47 #include <Bpp/Seq/SequenceWalker.h> 61 struct tm* ct = localtime(&t);
62 out <<
"##fileformat=VCFv4.0" << endl;
63 out <<
"##fileDate=" << (ct->tm_year + 1900) << (ct->tm_mon + 1) << ct->tm_mday << endl;
64 out <<
"##source=Bio++" << endl;
65 out <<
"##FILTER=<ID=gap,Description=\"At least one sequence contains a gap\">" << endl;
66 out <<
"##FILTER=<ID=unk,Description=\"At least one sequence contains an unresolved character\">" << endl;
67 if (genotypes_.size() > 0)
68 out <<
"##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">" << endl;
72 out <<
"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO";
73 if (genotypes_.size() > 0) {
75 for (
size_t i = 0; i < genotypes_.size(); ++i)
76 out <<
"\t" << genotypes_[i];
85 SequenceWalker walker(refSeq);
86 size_t offset = refSeq.
start();
87 int gap = refSeq.getAlphabet()->getGapCharacterCode();
88 map<int, string> chars;
89 for (
int i = 0; i < static_cast<int>(AlphabetTools::DNA_ALPHABET.getNumberOfTypes()); ++i)
90 chars[i] = AlphabetTools::DNA_ALPHABET.intToChar(i);
93 vector<int> gt(genotypes_.size());
95 for (
size_t i = 0; i < sites.getNumberOfSites(); i++) {
99 if (SiteTools::hasGap(sites.getSite(i))) {
102 if (SiteTools::hasUnknown(sites.getSite(i))) {
110 map<int, size_t> counts;
111 SiteTools::getCounts(sites.getSite(i), counts);
118 for (
int x = 0; x < 4; ++x) {
120 size_t f = counts[x];
127 ac += TextTools::toString(f);
135 out << chr <<
"\t" << (offset + walker.getSequencePosition(i) + 1) <<
"\t.\t" << chars[refSeq[i]] <<
"\t" << alt <<
"\t.\t" << filter <<
"\tAC=" << ac;
137 if (genotypes_.size() > 0) {
139 for (
size_t g = 0; g < genotypes_.size(); ++g) {
141 if (sequences.size() == 0)
143 else if (sequences.size() > 1)
144 throw Exception(
"VcfOutputMafIterator::writeBlock(). Duplicated sequence for species '" + genotypes_[g] +
",.");
146 int state = (*sequences[0])[i];
147 if (AlphabetTools::DNA_ALPHABET.isGap(state) || AlphabetTools::DNA_ALPHABET.isUnresolved(state))
150 out <<
"\t" << snps[state];
const MafSequence & getSequenceForSpecies(const std::string &species) const
void writeBlock_(std::ostream &out, const MafBlock &block) const
std::vector< const MafSequence * > getSequencesForSpecies(const std::string &species) const
AlignedSequenceContainer & getAlignment()
A synteny block data structure, the basic unit of a MAF alignement file.
const std::string & getChromosome() const
void writeHeader_(std::ostream &out) const
A sequence class which is used to store data from MAF files.