43 #include <Bpp/Seq/SequenceWithAnnotationTools.h> 55 if (blockBuffer_.size() == 0) {
58 MafBlock* block = iterator_->nextBlock();
62 vector< vector<bool> > aln;
63 for (
size_t i = 0; i < species_.size(); ++i) {
65 if (seq->hasAnnotation(SequenceMask::MASK)) {
66 aln.push_back(dynamic_cast<const SequenceMask&>(seq->getAnnotation(SequenceMask::MASK)).getMask());
69 size_t nr = aln.size();
78 for (i = 0; i < windowSize_; ++i) {
79 for (
size_t j = 0; j < nr; ++j) {
82 window_.push_back(col);
86 ApplicationTools::message->endLine();
87 ApplicationTools::displayTask(
"Sliding window for mask filter",
true);
89 while (i + step_ < nc) {
91 ApplicationTools::displayGauge(i - windowSize_, nc - windowSize_ - 1,
'>');
94 for (
size_t u = 0; u < window_.size(); ++u)
95 for (
size_t v = 0; v < window_[u].size(); ++v)
96 if (window_[u][v]) sum++;
97 if (sum > maxMasked_) {
98 if (pos.size() == 0) {
99 pos.push_back(i - windowSize_);
102 if (i - windowSize_ <= pos[pos.size() - 1]) {
103 pos[pos.size() - 1] = i;
105 pos.push_back(i - windowSize_);
112 for (
size_t k = 0; k < step_; ++k) {
113 for (
size_t j = 0; j < nr; ++j) {
116 window_.push_back(col);
123 unsigned int sum = 0;
124 for (
size_t u = 0; u < window_.size(); ++u)
125 for (
size_t v = 0; v < window_[u].size(); ++v)
126 if (window_[u][v]) sum++;
127 if (sum > maxMasked_) {
128 if (pos.size() == 0) {
129 pos.push_back(i - windowSize_);
132 if (i - windowSize_ < pos[pos.size() - 1]) {
133 pos[pos.size() - 1] = i;
135 pos.push_back(i - windowSize_);
141 ApplicationTools::displayTaskDone();
144 if (pos.size() == 0) {
145 blockBuffer_.push_back(block);
147 (*logstream_ <<
"MASK CLEANER: block is clean and kept as is.").endLine();
149 }
else if (pos.size() == 2 && pos.front() == 0 && pos.back() == block->
getNumberOfSites()) {
152 (*logstream_ <<
"MASK CLEANER: block was entirely removed. Tried to get the next one.").endLine();
156 (*logstream_ <<
"MASK CLEANER: block with size "<< block->
getNumberOfSites() <<
" will be split into " << (pos.size() / 2 + 1) <<
" blocks.").endLine();
159 ApplicationTools::message->endLine();
160 ApplicationTools::displayTask(
"Spliting block",
true);
162 for (i = 0; i < pos.size(); i+=2) {
164 ApplicationTools::displayGauge(i, pos.size() - 2,
'=');
166 (*logstream_ <<
"MASK CLEANER: removing region (" << pos[i] <<
", " << pos[i+1] <<
") from block.").endLine();
182 blockBuffer_.push_back(newBlock);
185 if (keepTrashedBlocks_) {
194 trashBuffer_.push_back(outBlock);
208 blockBuffer_.push_back(newBlock);
211 ApplicationTools::displayTaskDone();
215 }
while (blockBuffer_.size() == 0);
218 MafBlock* block = blockBuffer_.front();
219 blockBuffer_.pop_front();
const MafSequence & getSequenceForSpecies(const std::string &species) const
void setScore(double score)
unsigned int getPass() const
size_t getNumberOfSites() const
MafBlock * analyseCurrentBlock_()
MafSequence * subSequence(size_t startAt, size_t length) const
Extract a sub-sequence.
A synteny block data structure, the basic unit of a MAF alignement file.
void addSequence(const MafSequence &sequence)
void setPass(unsigned int pass)
size_t getNumberOfSequences() const
const MafSequence & getSequence(const std::string &name) const
A sequence class which is used to store data from MAF files.