00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include <climits>
00020 #include "ncl/nxstreesblock.h"
00021
00022 #include <sstream>
00023 #include <stack>
00024
00025 #include "ncl/nxsreader.h"
00026 using namespace std;
00027 #define REGRESSION_TESTING_GET_TRANS_TREE_DESC 0
00028 #define DEBUGGING_TREES_BLOCK 0
00029 enum PrevTreeTokenDesc
00030 {
00031 NXS_TREE_OPEN_PARENS_TOKEN,
00032 NXS_TREE_CLOSE_PARENS_TOKEN,
00033 NXS_TREE_COMMA_TOKEN,
00034 NXS_TREE_CLADE_NAME_TOKEN,
00035 NXS_TREE_COLON_TOKEN,
00036 NXS_TREE_BRLEN_TOKEN
00037 };
00038
00039 NxsSimpleNode * NxsSimpleNode::FindTaxonIndex(unsigned leafIndex)
00040 {
00041 if (leafIndex == taxIndex)
00042 return this;
00043 NxsSimpleNode *n = lChild;
00044 while (n)
00045 {
00046 NxsSimpleNode * r = n->FindTaxonIndex(leafIndex);
00047 if (r)
00048 return r;
00049 n = n->rSib;
00050 }
00051 return NULL;
00052 }
00053
00054
00055 void NxsSimpleTree::RerootAt(unsigned leafIndex)
00056 {
00057 NxsSimpleNode * newRoot = NULL;
00058 if (root)
00059 {
00060 if (leafIndex < leaves.size())
00061 newRoot = leaves[leafIndex];
00062 if (newRoot == NULL)
00063 newRoot = root->FindTaxonIndex(leafIndex);
00064 }
00065 if (newRoot == NULL)
00066 {
00067 NxsString eMsg;
00068 eMsg << "Reroot failed. Leaf number " << (leafIndex + 1) << " was not found in the tree.";
00069 throw NxsNCLAPIException(eMsg);
00070 }
00071 NxsSimpleNode * p = newRoot->edgeToPar.parent;
00072 if (!p || p == root)
00073 return;
00074 std::stack<NxsSimpleNode *> toFlip;
00075 while (p != root)
00076 {
00077 toFlip.push(p);
00078 p = p->edgeToPar.parent;
00079 }
00080 while (!toFlip.empty())
00081 {
00082 NxsSimpleNode *subRoot = toFlip.top();
00083 toFlip.pop();
00084 FlipRootsChildToRoot(subRoot);
00085 }
00086 }
00087 void NxsSimpleTree::FlipRootsChildToRoot(NxsSimpleNode *subRoot)
00088 {
00089 std::vector<NxsSimpleNode *> rc = root->GetChildren();
00090 if (rc.size() < 2)
00091 {
00092 NCL_ASSERT(!rc.empty());
00093 NCL_ASSERT(rc[0] == subRoot);
00094
00095 std::vector<NxsSimpleNode *> tmp;
00096 tmp.swap(allNodes);
00097 allNodes.reserve(tmp.size() - 1);
00098 for (std::vector<NxsSimpleNode *>::const_iterator nIt = tmp.begin(); nIt != tmp.end(); ++nIt)
00099 {
00100 if (*nIt != root)
00101 allNodes.push_back(*nIt);
00102 }
00103 delete root;
00104 root = subRoot;
00105 subRoot->edgeToPar.parent = NULL;
00106 return;
00107 }
00108
00109 if (rc.size() == 2)
00110 {
00111
00112
00113 NxsSimpleNode * formerSib = subRoot->rSib;
00114 if (formerSib == NULL)
00115 formerSib = root->lChild;
00116 NCL_ASSERT(formerSib != subRoot);
00117
00118 std::vector<NxsSimpleNode *> tmp;
00119 tmp.swap(allNodes);
00120 allNodes.reserve(tmp.size() - 1);
00121 for (std::vector<NxsSimpleNode *>::const_iterator nIt = tmp.begin(); nIt != tmp.end(); ++nIt)
00122 {
00123 if (*nIt != root)
00124 allNodes.push_back(*nIt);
00125 }
00126 delete root;
00127 root = NULL;
00128
00129 formerSib->edgeToPar.parent = subRoot;
00130 if (formerSib->edgeToPar.defaultEdgeLen)
00131 {
00132 if (!subRoot->edgeToPar.defaultEdgeLen)
00133 {
00134 formerSib->edgeToPar.defaultEdgeLen = false;
00135 formerSib->edgeToPar.hasIntEdgeLens = subRoot->edgeToPar.hasIntEdgeLens;
00136 formerSib->edgeToPar.iEdgeLen = subRoot->edgeToPar.iEdgeLen;
00137 formerSib->edgeToPar.dEdgeLen = subRoot->edgeToPar.dEdgeLen;
00138 }
00139 }
00140 else
00141 {
00142 if (!subRoot->edgeToPar.defaultEdgeLen)
00143 {
00144 if (formerSib->edgeToPar.hasIntEdgeLens)
00145 {
00146 if (subRoot->edgeToPar.hasIntEdgeLens)
00147 formerSib->edgeToPar.iEdgeLen += subRoot->edgeToPar.iEdgeLen;
00148 else
00149 {
00150 formerSib->edgeToPar.hasIntEdgeLens = false;
00151 formerSib->edgeToPar.dEdgeLen = subRoot->edgeToPar.dEdgeLen + (double) formerSib->edgeToPar.iEdgeLen;
00152 }
00153 }
00154 else
00155 {
00156 if (subRoot->edgeToPar.hasIntEdgeLens)
00157 formerSib->edgeToPar.dEdgeLen += (double)subRoot->edgeToPar.iEdgeLen;
00158 else
00159 formerSib->edgeToPar.dEdgeLen += subRoot->edgeToPar.dEdgeLen;
00160 }
00161 }
00162 }
00163 NxsSimpleNode * subRootRChild = subRoot->GetLastChild();
00164 if (subRootRChild == NULL)
00165 subRoot->lChild = formerSib;
00166 else
00167 subRootRChild->rSib = formerSib;
00168 subRoot->rSib = NULL;
00169 root = subRoot;
00170 subRoot->edgeToPar.parent = NULL;
00171 }
00172 else
00173 {
00174
00175 root->edgeToPar = subRoot->edgeToPar;
00176 std::swap(root->edgeToPar.child, root->edgeToPar.parent);
00177 NxsSimpleNode * subRootRChild = subRoot->GetLastChild();
00178 if (subRootRChild == NULL)
00179 subRoot->lChild = root;
00180 else
00181 subRootRChild->rSib = root;
00182 if (root->lChild == subRoot)
00183 root->lChild = subRoot->rSib;
00184 else
00185 {
00186 NxsSimpleNode * rOtherChild = root->lChild;
00187 while (rOtherChild)
00188 {
00189 if (rOtherChild->rSib == subRoot)
00190 {
00191 rOtherChild->rSib = subRoot->rSib;
00192 break;
00193 }
00194 rOtherChild = rOtherChild->rSib;
00195 NCL_ASSERT(rOtherChild);
00196 }
00197 }
00198 subRoot->rSib = NULL;
00199 root = subRoot;
00200 subRoot->edgeToPar.parent = NULL;
00201 }
00202 }
00203
00204 void NxsSimpleEdge::WriteAsNewick(std::ostream &out, bool nhx) const
00205 {
00206 if (!defaultEdgeLen)
00207 {
00208 out << ':';
00209 if (lenAsString.empty())
00210 if (hasIntEdgeLens)
00211 out << iEdgeLen;
00212 else
00213 out << dEdgeLen;
00214 else
00215 out << lenAsString;
00216 }
00217 for (std::vector<NxsComment>::const_iterator uc = unprocessedComments.begin(); uc != unprocessedComments.end(); ++uc)
00218 out << '[' << uc->GetText() << ']';
00219 if (nhx && !parsedInfo.empty())
00220 {
00221 out << "[&&NHX";
00222 for (std::map<std::string, std::string>::const_iterator p = parsedInfo.begin(); p != parsedInfo.end(); ++p)
00223 out << ':' << p->first << '=' << p->second;
00224 out << ']';
00225 }
00226 }
00227
00228 void NxsSimpleNode::WriteAsNewick(std::ostream &out, bool nhx, bool useLeafNames, bool escapeNames, const NxsTaxaBlockAPI *taxa) const
00229 {
00230 if (lChild)
00231 {
00232 out << '(';
00233 const std::vector<NxsSimpleNode *> children = GetChildren();
00234 for (std::vector<NxsSimpleNode *>::const_iterator child = children.begin(); child != children.end(); ++child)
00235 {
00236 if (child != children.begin())
00237 out << ',';
00238 (*child)->WriteAsNewick(out, nhx, useLeafNames, escapeNames, taxa);
00239 }
00240 out << ')';
00241 if (!name.empty())
00242 {
00243 if (escapeNames)
00244 out << NxsString::GetEscaped(name);
00245 else
00246 out << name;
00247 }
00248 else if (taxIndex != UINT_MAX)
00249 out << (1 + taxIndex);
00250 }
00251 else
00252 {
00253 NCL_ASSERT (taxIndex != UINT_MAX);
00254 if (useLeafNames)
00255 {
00256 if (name.empty() && taxa)
00257 {
00258 std::string n = taxa->GetTaxonLabel(taxIndex);
00259 if (escapeNames)
00260 out << NxsString::GetEscaped(n);
00261 else
00262 out << n;
00263 }
00264 else
00265 {
00266 if (escapeNames)
00267 out << NxsString::GetEscaped(name);
00268 else
00269 out << name;
00270 }
00271 }
00272 else
00273 out << (1 + taxIndex);
00274 }
00275 edgeToPar.WriteAsNewick(out, nhx);
00276 }
00277
00278 void NxsSimpleNode::AddSelfAndDesToPreorder(std::vector<const NxsSimpleNode *> &p) const
00279 {
00280 p.push_back(this);
00281 NxsSimpleNode * currCh = this->lChild;
00282 while (currCh)
00283 {
00284 currCh->AddSelfAndDesToPreorder(p);
00285 currCh = currCh->rSib;
00286 }
00287 }
00288
00289 std::vector<const NxsSimpleNode *> NxsSimpleTree::GetPreorderTraversal() const
00290 {
00291 std::vector<const NxsSimpleNode *> p;
00292 if (root)
00293 root->AddSelfAndDesToPreorder(p);
00294 return p;
00295 }
00296
00297 std::vector<std::vector<int> > NxsSimpleTree::GetIntPathDistances(bool toMRCA) const
00298 {
00299 if (root == NULL || root->lChild == NULL)
00300 return std::vector<std::vector<int> >();
00301
00302 typedef std::map<unsigned, int> TaxonIndToDistMap;
00303 typedef std::map<unsigned, TaxonIndToDistMap> PairwiseDistMap;
00304 typedef PairwiseDistMap::iterator PairwiseDistRow;
00305
00306 std::map<const NxsSimpleNode *, TaxonIndToDistMap > ndToDist;
00307 const std::vector<const NxsSimpleNode *> preord = GetPreorderTraversal();
00308 unsigned maxIndex = 0;
00309 PairwiseDistMap pairwiseDist;
00310 for (std::vector<const NxsSimpleNode *>::const_reverse_iterator nIt = preord.rbegin(); nIt != preord.rend(); ++nIt)
00311 {
00312 const NxsSimpleNode *nd = *nIt;
00313 if (nd->lChild)
00314 {
00315 TaxonIndToDistMap nm;
00316 ndToDist[nd] = nm;
00317 TaxonIndToDistMap & tidm = ndToDist[nd];
00318 const NxsSimpleNode * currChild = nd->lChild;
00319 if (nd->taxIndex != UINT_MAX)
00320 {
00321 if (maxIndex < nd->taxIndex)
00322 maxIndex = nd->taxIndex;
00323 tidm[nd->taxIndex] = 0;
00324 }
00325 while (currChild)
00326 {
00327 TaxonIndToDistMap currChildEls;
00328 TaxonIndToDistMap * currChildElsPtr;
00329 int currEdgeLen = currChild->edgeToPar.GetIntEdgeLen();
00330 if (currChild->lChild)
00331 {
00332 NCL_ASSERT(ndToDist.find(currChild) != ndToDist.end());
00333 currChildElsPtr = &(ndToDist[currChild]);
00334 }
00335 else
00336 {
00337 if (maxIndex < currChild->taxIndex)
00338 maxIndex = currChild->taxIndex;
00339 currChildEls[currChild->taxIndex] = 0;
00340 currChildElsPtr = &currChildEls;
00341 }
00342 for (TaxonIndToDistMap::const_iterator i = tidm.begin(); i != tidm.end(); ++i)
00343 {
00344 const unsigned iIndex = i->first;
00345 const int idist = i->second;
00346 for (TaxonIndToDistMap::const_iterator j = currChildElsPtr->begin(); j != currChildElsPtr->end(); ++j)
00347 {
00348 const unsigned jIndex = j->first;
00349 const int jdist = j->second;
00350 const int ndToJDist = jdist + currEdgeLen;
00351 if (toMRCA)
00352 {
00353 PairwiseDistRow iRow = pairwiseDist.find(iIndex);
00354 PairwiseDistRow jRow = pairwiseDist.find(jIndex);
00355 NCL_ASSERT(iRow == pairwiseDist.end() || (iRow->second.find(jIndex) == iRow->second.end()));
00356 NCL_ASSERT(jRow == pairwiseDist.end() || (jRow->second.find(iIndex) == jRow->second.end()));
00357 pairwiseDist[iIndex][jIndex] = idist;
00358 pairwiseDist[jIndex][iIndex] = ndToJDist;
00359 }
00360 else
00361 {
00362 const unsigned fIndex = (iIndex < jIndex ? iIndex : jIndex);
00363 const unsigned sIndex = (iIndex < jIndex ? jIndex : iIndex);
00364 PairwiseDistRow r = pairwiseDist.find(fIndex);
00365 const bool found = (r != pairwiseDist.end() && (r->second.find(sIndex) != r->second.end()));
00366 if (!found)
00367 pairwiseDist[fIndex][sIndex] = currEdgeLen + idist + jdist;
00368 }
00369 }
00370 }
00371 for (TaxonIndToDistMap::const_iterator j = currChildElsPtr->begin(); j != currChildElsPtr->end(); ++j)
00372 tidm[j->first] = currEdgeLen + j->second;
00373 currChild = currChild->rSib;
00374 }
00375 }
00376 }
00377 if (maxIndex == 0)
00378 return std::vector<std::vector<int> >();
00379 std::vector<int> toTipDistRow(maxIndex+1, INT_MAX);
00380 std::vector<std::vector<int> > pathDistMat(maxIndex+1, toTipDistRow);
00381 for (unsigned diagInd = 0; diagInd <= maxIndex; ++diagInd)
00382 pathDistMat[diagInd][diagInd] = 0;
00383
00384 for (PairwiseDistMap::const_iterator iit = pairwiseDist.begin(); iit != pairwiseDist.end(); ++iit)
00385 {
00386 const unsigned iInd = iit->first;
00387 const TaxonIndToDistMap & toDistMap = iit->second;
00388 for (TaxonIndToDistMap::const_iterator jit = toDistMap.begin(); jit != toDistMap.end(); ++jit)
00389 {
00390 const unsigned jInd = jit->first;
00391 if (jInd != iInd)
00392 {
00393 const int d = jit->second;
00394 pathDistMat[iInd][jInd] = d;
00395 pathDistMat[jInd][iInd] = d;
00396 }
00397 }
00398 }
00399
00400 return pathDistMat;
00401 }
00402
00403
00404
00405
00406 std::vector<std::vector<double> > NxsSimpleTree::GetDblPathDistances(bool toMRCA) const
00407 {
00408 if (root == NULL || root->lChild == NULL)
00409 return std::vector<std::vector<double> >();
00410
00411 typedef std::map<unsigned, double> TaxonIndToDistMap;
00412 typedef std::map<unsigned, TaxonIndToDistMap> PairwiseDistMap;
00413 typedef PairwiseDistMap::iterator PairwiseDistRow;
00414
00415 std::map<const NxsSimpleNode *, TaxonIndToDistMap > ndToDist;
00416 const std::vector<const NxsSimpleNode *> preord = GetPreorderTraversal();
00417 unsigned maxIndex = 0;
00418 PairwiseDistMap pairwiseDist;
00419 for (std::vector<const NxsSimpleNode *>::const_reverse_iterator nIt = preord.rbegin(); nIt != preord.rend(); ++nIt)
00420 {
00421 const NxsSimpleNode *nd = *nIt;
00422 if (nd->lChild)
00423 {
00424 TaxonIndToDistMap nm;
00425 ndToDist[nd] = nm;
00426 TaxonIndToDistMap & tidm = ndToDist[nd];
00427 if (nd->taxIndex != UINT_MAX)
00428 {
00429 if (maxIndex < nd->taxIndex)
00430 maxIndex = nd->taxIndex;
00431 tidm[nd->taxIndex] = 0.0;
00432 }
00433
00434 const NxsSimpleNode * currChild = nd->lChild;
00435 while (currChild)
00436 {
00437 TaxonIndToDistMap currChildEls;
00438 TaxonIndToDistMap * currChildElsPtr;
00439 double currEdgeLen = currChild->edgeToPar.GetDblEdgeLen();
00440 if (currChild->lChild)
00441 {
00442 NCL_ASSERT(ndToDist.find(currChild) != ndToDist.end());
00443 currChildElsPtr = &(ndToDist[currChild]);
00444 }
00445 else
00446 {
00447 if (maxIndex < currChild->taxIndex)
00448 maxIndex = currChild->taxIndex;
00449 currChildEls[currChild->taxIndex] = 0.0;
00450 currChildElsPtr = &currChildEls;
00451 }
00452
00453 for (TaxonIndToDistMap::const_iterator i = tidm.begin(); i != tidm.end(); ++i)
00454 {
00455
00456 const unsigned iIndex = i->first;
00457 const double idist = i->second;
00458 for (TaxonIndToDistMap::const_iterator j = currChildElsPtr->begin(); j != currChildElsPtr->end(); ++j)
00459 {
00460 const unsigned jIndex = j->first;
00461 const double jdist = j->second;
00462 const double ndToJDist = jdist + currEdgeLen;
00463 if (toMRCA)
00464 {
00465 PairwiseDistRow iRow = pairwiseDist.find(iIndex);
00466 PairwiseDistRow jRow = pairwiseDist.find(jIndex);
00467 NCL_ASSERT(iRow == pairwiseDist.end() || (iRow->second.find(jIndex) == iRow->second.end()));
00468 NCL_ASSERT(jRow == pairwiseDist.end() || (jRow->second.find(iIndex) == jRow->second.end()));
00469 pairwiseDist[iIndex][jIndex] = idist;
00470 pairwiseDist[jIndex][iIndex] = ndToJDist;
00471 }
00472 else
00473 {
00474 const unsigned fIndex = (iIndex < jIndex ? iIndex : jIndex);
00475 const unsigned sIndex = (iIndex < jIndex ? jIndex : iIndex);
00476 PairwiseDistRow r = pairwiseDist.find(fIndex);
00477 const bool found = (r != pairwiseDist.end() && (r->second.find(sIndex) != r->second.end()));
00478 if (!found)
00479 pairwiseDist[fIndex][sIndex] = idist + ndToJDist;
00480 }
00481 }
00482 }
00483 for (TaxonIndToDistMap::const_iterator j = currChildElsPtr->begin(); j != currChildElsPtr->end(); ++j)
00484 tidm[j->first] = currEdgeLen + j->second;
00485 currChild = currChild->rSib;
00486 }
00487 }
00488 }
00489 if (maxIndex == 0)
00490 return std::vector<std::vector<double> >();
00491 std::vector<double> toTipDistRow(maxIndex+1, DBL_MAX);
00492 std::vector<std::vector<double> > pathDistMat(maxIndex+1, toTipDistRow);
00493 for (unsigned diagInd = 0; diagInd <= maxIndex; ++diagInd)
00494 pathDistMat[diagInd][diagInd] = 0.0;
00495
00496
00497 for (PairwiseDistMap::const_iterator iit = pairwiseDist.begin(); iit != pairwiseDist.end(); ++iit)
00498 {
00499 const unsigned iInd = iit->first;
00500 pathDistMat[iInd][iInd] = 0.0;
00501 const TaxonIndToDistMap & toDistMap = iit->second;
00502 for (TaxonIndToDistMap::const_iterator jit = toDistMap.begin(); jit != toDistMap.end(); ++jit)
00503 {
00504 const unsigned jInd = jit->first;
00505 const double d = jit->second;
00506 pathDistMat[iInd][jInd] = d;
00507 if (!toMRCA)
00508 pathDistMat[jInd][iInd] = d;
00509 }
00510 }
00511
00512 return pathDistMat;
00513 }
00514
00515 std::string parseNHXComment(const std::string comment, std::map<std::string, std::string> *infoMap)
00516 {
00517 if (comment.length() < 6 || comment[0] != '&' || comment[1] != '&' || comment[2] != 'N' ||comment[3] != 'H' || comment[4] != 'X' )
00518 return comment;
00519 size_t colonPos = comment.find(':', 5);
00520 if (colonPos == string::npos)
00521 return comment.substr(5, string::npos);
00522 for (;;)
00523 {
00524 size_t eqPos = comment.find('=', colonPos);
00525 if (eqPos == string::npos || (eqPos <= (colonPos + 1)))
00526 return comment.substr(colonPos, string::npos);
00527 std::string key = comment.substr(colonPos + 1, eqPos - 1 - colonPos);
00528 colonPos = comment.find(':', eqPos + 1);
00529 if (colonPos == eqPos + 1)
00530 {
00531 if (infoMap)
00532 (*infoMap)[key] = string();
00533 }
00534 else if (colonPos == string::npos)
00535 {
00536 std::string lastVal = comment.substr(eqPos + 1);
00537 if (infoMap)
00538 (*infoMap)[key] = lastVal;
00539 return std::string();
00540 }
00541 else
00542 {
00543 std::string value = comment.substr(eqPos + 1, colonPos - eqPos - 1);
00544 if (infoMap)
00545 (*infoMap)[key] = value;
00546 }
00547 }
00548 }
00549
00550 void NxsSimpleEdge::DealWithNexusComments(const std::vector<NxsComment> & ecs, bool NHXComments)
00551 {
00552 for (std::vector<NxsComment>::const_iterator ecsIt = ecs.begin(); ecsIt != ecs.end(); ++ecsIt)
00553 {
00554 if (NHXComments)
00555 {
00556 std::string ns = ecsIt->GetText();
00557 std::map<std::string, std::string> currCmt;
00558 std::string unparsed = parseNHXComment(ns, &currCmt);
00559 for (std::map<std::string, std::string>::const_iterator c = currCmt.begin(); c != currCmt.end(); ++c)
00560 {
00561 const std::string & k = c->first;
00562 const std::string & v = c->second;
00563 this->parsedInfo[k] = v;
00564 }
00565 if (!unparsed.empty())
00566 {
00567 if (unparsed.length() == ns.length())
00568 this->unprocessedComments.push_back(*ecsIt);
00569 else
00570 {
00571 NxsComment nc(unparsed, ecsIt->GetLineNumber(), ecsIt->GetColumnNumber());
00572 this->unprocessedComments.push_back(nc);
00573 }
00574 }
00575 }
00576 else
00577 this->unprocessedComments.push_back(*ecsIt);
00578 }
00579 }
00580
00581 void NxsSimpleTree::Initialize(const NxsFullTreeDescription & td)
00582 {
00583 if (!td.IsProcessed())
00584 throw NxsNCLAPIException("A tree description must be processed by ProcessTree before calling NxsSimpleTree::NxsSimpleTree");
00585 Clear();
00586 std::string s;
00587 const std::string & n = td.GetNewick();
00588 s.reserve(n.length() + 1);
00589 s.assign(n.c_str());
00590 s.append(1, ';');
00591 istringstream newickstream(s);
00592 NxsToken token(newickstream);
00593 token.SetEOFAllowed(false);
00594 realEdgeLens = td.SomeEdgesHaveLengths() && (! td.EdgeLengthsAreAllIntegers());
00595 const bool NHXComments = td.HasNHXComments();
00596 NxsString emsg;
00597 double lastFltEdgeLen;
00598 long lastIntEdgeLen;
00599 long currTaxNumber;
00600 token.GetNextToken();
00601 NCL_ASSERT(token.Equals("("));
00602 root = AllocNewNode(0L);
00603 NxsSimpleNode * currNd = root;
00604 NxsSimpleEdge * currEdge = &(currNd->edgeToPar);
00605 NxsSimpleNode * tmpNode;
00606 bool prevInternalOrLength;
00607 bool currInternalOrLength = false;
00608 for (;;)
00609 {
00610 currEdge->DealWithNexusComments(token.GetEmbeddedComments(), NHXComments);
00611 if (token.Equals(";"))
00612 {
00613 if (currNd != root)
00614 throw NxsNCLAPIException("Semicolon found before the end of the tree description. This means that more \"(\" characters than \")\" were found.");
00615 break;
00616 }
00617 const NxsString & tstr = token.GetTokenReference();
00618 const char * t = tstr.c_str();
00619 bool handled;
00620 handled = false;
00621 prevInternalOrLength = currInternalOrLength;
00622 currInternalOrLength = false;
00623
00624 if (tstr.length() == 1)
00625 {
00626 handled = true;
00627 if (t[0] == '(')
00628 {
00629 tmpNode = AllocNewNode(currNd);
00630 currNd->AddChild(tmpNode);
00631 currNd = tmpNode;
00632 currEdge = &(currNd->edgeToPar);
00633 }
00634 else if (t[0] == ')')
00635 {
00636 currNd = currNd->GetParent();
00637 NCL_ASSERT(currNd);
00638 currEdge = &(currNd->edgeToPar);
00639 currInternalOrLength = true;
00640 }
00641 else if (t[0] == ':')
00642 {
00643 token.SetLabileFlagBit(NxsToken::hyphenNotPunctuation);
00644 token.GetNextToken();
00645 currEdge->DealWithNexusComments(token.GetEmbeddedComments(), NHXComments);
00646 t = token.GetTokenReference().c_str();
00647 if (realEdgeLens)
00648 {
00649 if (!NxsString::to_double(t, &lastFltEdgeLen))
00650 {
00651 emsg << "Expecting a number as a branch length. Found " << tstr;
00652 throw NxsException(emsg, token);
00653 }
00654 currEdge->SetDblEdgeLen(lastFltEdgeLen, t);
00655 }
00656 else
00657 {
00658 if (!NxsString::to_long(t, &lastIntEdgeLen))
00659 {
00660 emsg << "Expecting a number as a branch length. Found " << tstr;
00661 throw NxsException(emsg, token);
00662 }
00663 currEdge->SetIntEdgeLen(lastIntEdgeLen, t);
00664 }
00665 currInternalOrLength = true;
00666 }
00667 else if (t[0] == ',')
00668 {
00669 currNd = currNd->GetParent();
00670 NCL_ASSERT(currNd);
00671 tmpNode = AllocNewNode(currNd);
00672 currNd->AddChild(tmpNode);
00673 currNd = tmpNode;
00674 currEdge = &(currNd->edgeToPar);
00675 }
00676 else
00677 handled = false;
00678 }
00679 if (!handled)
00680 {
00681 bool wasReadAsNumber = NxsString::to_long(t, &currTaxNumber);
00682 if (wasReadAsNumber)
00683 {
00684 if (currTaxNumber < 1)
00685 {
00686 if (!prevInternalOrLength)
00687 {
00688 emsg << "Expecting a taxon number greater than 1. Found " << tstr;
00689 throw NxsException(emsg, token);
00690 }
00691 wasReadAsNumber = false;
00692 }
00693 }
00694 if (wasReadAsNumber)
00695 {
00696 currNd->taxIndex = (unsigned)currTaxNumber - 1;
00697 if (currNd->lChild == NULL)
00698 {
00699 while (currNd->taxIndex >= leaves.size())
00700 leaves.push_back(0L);
00701 leaves[currNd->taxIndex] = currNd;
00702 }
00703 }
00704 else
00705 currNd->name = t;
00706 }
00707 token.GetNextToken();
00708 }
00709 }
00710 unsigned NxsTreesBlock::TreeLabelToNumber(const std::string & name) const
00711 {
00712 NxsString r(name.c_str());
00713 r.ToUpper();
00714 std::map<std::string, unsigned>::const_iterator cntiIt = capNameToInd.find(r);
00715 if (cntiIt == capNameToInd.end())
00716 return 0;
00717 return cntiIt->second + 1;
00718 }
00719 unsigned NxsTreesBlock::GetMaxIndex() const
00720 {
00721 if (trees.size() == 0)
00722 return UINT_MAX;
00723 return (unsigned)trees.size() - 1;
00724 }
00729 unsigned NxsTreesBlock::GetIndicesForLabel(const std::string &label, NxsUnsignedSet *inds) const
00730 {
00731 NxsString emsg;
00732 const unsigned numb = TreeLabelToNumber(label);
00733 if (numb > 0)
00734 {
00735 if (inds)
00736 inds->insert(numb - 1);
00737 return 1;
00738 }
00739 return GetIndicesFromSetOrAsNumber(label, inds, treeSets, GetMaxIndex(), "tree");
00740 }
00741 bool NxsTreesBlock::AddNewIndexSet(const std::string &label, const NxsUnsignedSet & inds)
00742 {
00743 NxsString nlabel(label.c_str());
00744 const bool replaced = treeSets.count(nlabel) > 0;
00745 treeSets[nlabel] = inds;
00746 return replaced;
00747 }
00751 bool NxsTreesBlock::AddNewPartition(const std::string &label, const NxsPartition & inds)
00752 {
00753 NxsString ls(label.c_str());
00754 bool replaced = treePartitions.count(ls) > 0;
00755 treePartitions[ls] = inds;
00756 return replaced;
00757 }
00761 NxsTreesBlock::NxsTreesBlock(
00762 NxsTaxaBlockAPI *tb)
00763 :NxsTaxaBlockSurrogate(tb, NULL),
00764 processedTreeValidationFunction(NULL),
00765 ptvArg(NULL)
00766 {
00767 id = "TREES";
00768 defaultTreeInd = UINT_MAX;
00769 writeTranslateTable = true;
00770 allowImplicitNames = false;
00771 processAllTreesDuringParse = true;
00772 writeFromNodeEdgeDataStructure = false;
00773 }
00777 NxsTreesBlock::~NxsTreesBlock()
00778 {
00779 }
00784 void NxsTreesBlock::ReplaceTaxaBlockPtr(
00785 NxsTaxaBlockAPI *tb)
00786 {
00787 NCL_ASSERT(tb != NULL);
00788 taxa = tb;
00789 }
00796 NxsString NxsTreesBlock::GetTreeDescription(
00797 unsigned i)
00798 {
00799 return NxsString(GetFullTreeDescription(i).GetNewick().c_str());
00800 }
00805 bool NxsTreesBlock::IsRootedTree(
00806 unsigned i)
00807 {
00808 return GetFullTreeDescription(i).IsRooted();
00809 }
00814 NxsString NxsTreesBlock::GetTreeName(
00815 unsigned i)
00816 {
00817 return NxsString(GetFullTreeDescription(i).GetName().c_str());
00818 }
00823 bool NxsTreesBlock::IsDefaultTree(
00824 unsigned i)
00825 {
00826 return (i == GetNumDefaultTree());
00827 }
00828 const NxsFullTreeDescription & NxsTreesBlock::GetFullTreeDescription(unsigned i) const
00829 {
00830 NCL_ASSERT(i < trees.size());
00831 return trees.at(i);
00832 }
00837 void NxsTreesBlock::Report(
00838 std::ostream &out) NCL_COULD_BE_CONST
00839 {
00840 const unsigned ntrees = GetNumTrees();
00841 out << '\n' << id << " block contains ";
00842 if (ntrees == 0)
00843 {
00844 out << "no trees" << endl;
00845 return;
00846 }
00847 if (ntrees == 1)
00848 out << "one tree" << endl;
00849 else
00850 out << ntrees << " trees" << endl;
00851 for (unsigned k = 0; k < ntrees; k++)
00852 {
00853 const NxsFullTreeDescription & tree = GetFullTreeDescription(k);
00854 out << " " << (k+1) << " " << tree.GetName();
00855 out << " (";
00856 if (tree.IsRooted())
00857 out << "rooted";
00858 else
00859 out << "unrooted";
00860 if (defaultTreeInd == k)
00861 out << ",default tree)" << endl;
00862 else
00863 out << ')' << endl;
00864 }
00865 }
00873 void NxsTreesBlock::BriefReport(
00874 NxsString &s) NCL_COULD_BE_CONST
00875 {
00876 const unsigned ntrees = GetNumTrees();
00877 s << "\n\n" << id << " block contains ";
00878 if (ntrees == 0)
00879 s += "no trees\n";
00880 else if (ntrees == 1)
00881 s += "one tree\n";
00882 else
00883 s << ntrees << " trees\n";
00884 }
00889 void NxsTreesBlock::Reset()
00890 {
00891 NxsBlock::Reset();
00892 ResetSurrogate();
00893 defaultTreeInd = UINT_MAX;
00894 trees.clear();
00895 capNameToInd.clear();
00896 treeSets.clear();
00897 treePartitions.clear();
00898 constructingTaxaBlock = false;
00899 newtaxa = false;
00900 }
00906 unsigned NxsTreesBlock::GetNumDefaultTree()
00907 {
00908 return (defaultTreeInd == UINT_MAX ? 0 : defaultTreeInd);
00909 }
00913 unsigned NxsTreesBlock::GetNumTrees() const
00914 {
00915 return (unsigned)trees.size();
00916 }
00920 unsigned NxsTreesBlock::GetNumTrees()
00921 {
00922 return (unsigned)trees.size();
00923 }
00924 void NxsTreesBlock::WriteTranslateCommand(std::ostream & out) const
00925 {
00926 NCL_ASSERT(taxa);
00927 out << " TRANSLATE" << "\n";
00928 const unsigned nt = taxa->GetNTaxTotal();
00929 for (unsigned i = 0; i < nt; ++i)
00930 {
00931 if (i > 0)
00932 out << ",\n";
00933 out << " " << i + 1 << ' ' << NxsString::GetEscaped(taxa->GetTaxonLabel(i));
00934 }
00935 out << ";\n";
00936 }
00937
00938 void NxsTreesBlock::WriteTreesCommand(std::ostream & out) const
00939 {
00940 if (constructingTaxaBlock)
00941 {
00942
00943
00944
00945 throw NxsNCLAPIException("WriteTreesCommand block cannot be called while the Trees Block is still being constructed");
00946 }
00947 NxsTreesBlock *ncthis = const_cast<NxsTreesBlock *>(this);
00948 NxsSimpleTree nst(0, 0.0);
00949 const bool useLeafNames = !(this->writeTranslateTable);
00950 for (unsigned k = 0; k < trees.size(); k++)
00951 {
00952 # if defined REGRESSION_TESTING_GET_TRANS_TREE_DESC
00953 NxsTreesBlock *nc = const_cast<NxsTreesBlock *>(this);
00954 NxsString transTreeDesc = nc->GetTranslatedTreeDescription(k);
00955 # endif
00956 NxsFullTreeDescription & treeDesc = trees.at(k);
00957 ncthis->ProcessTree(treeDesc);
00958 const std::string & name = treeDesc.GetName();
00959 out << " TREE ";
00960 if (k == defaultTreeInd)
00961 out << "* ";
00962 out << NxsString::GetEscaped(name) << " = [&";
00963 out << (treeDesc.IsRooted() ? 'R' : 'U');
00964 out << ']';
00965 if (writeFromNodeEdgeDataStructure)
00966 {
00967 nst.Initialize(treeDesc);
00968 nst.WriteAsNewick(out, true, useLeafNames, true, taxa);
00969 }
00970 else
00971 out << treeDesc.GetNewick();
00972 out << ";\n";
00973 }
00974 }
00978 void NxsTreesBlock::WriteAsNexus(std::ostream &out) const
00979 {
00980 if (GetNumTrees() == 0)
00981 return;
00982 out << "BEGIN TREES;\n";
00983 WriteBasicBlockCommands(out);
00984 if (this->writeTranslateTable)
00985 WriteTranslateCommand(out);
00986 WriteTreesCommand(out);
00987 WriteSkippedCommands(out);
00988 out << "END;\n";
00989 }
00990 NxsTreesBlock *NxsTreesBlockFactory::GetBlockReaderForID(const std::string & idneeded, NxsReader *reader, NxsToken *)
00991 {
00992 if (reader == NULL || idneeded != "TREES")
00993 return NULL;
00994 NxsTreesBlock * nb = new NxsTreesBlock(NULL);
00995 nb->SetCreateImpliedBlock(true);
00996 nb->SetImplementsLinkAPI(true);
00997 return nb;
00998 }
00999 void NxsTreesBlock::ConstructDefaultTranslateTable(NxsToken &token, const char * cmd)
01000 {
01001 if (taxa == NULL)
01002 {
01003 if (nxsReader == NULL)
01004 GenerateNxsException(token, "A Taxa block must be read before the Trees block can be read.");
01005 unsigned nTb;
01006 nxsReader->GetTaxaBlockByTitle(NULL, &nTb);
01007 AssureTaxaBlock(nTb == 0 && allowImplicitNames && createImpliedBlock, token, cmd);
01008 }
01009 const unsigned nt = taxa->GetNTaxTotal();
01010 if (nt == 0)
01011 {
01012 if (allowImplicitNames)
01013 {
01014 if (nexusReader)
01015 nexusReader->NexusWarnToken("A TAXA block should be read before the TREES block (but no TAXA block was found). Taxa will be inferred from their usage in the TREES block.", NxsReader::AMBIGUOUS_CONTENT_WARNING , token);
01016 constructingTaxaBlock = true;
01017 newtaxa = true;
01018 }
01019 else
01020 GenerateNxsException(token, "Taxa block must be read before the Trees block can be read.");
01021 }
01022 if (!constructingTaxaBlock)
01023 {
01024 for (unsigned i = 0; i < nt; ++i)
01025 {
01026 NxsString s;
01027 s += (i + 1);
01028 capNameToInd[s] = i;
01029 NxsString t(taxa->GetTaxonLabel(i).c_str());
01030 t.ToUpper();
01031 capNameToInd[t] = i;
01032 }
01033 }
01034 }
01035 void NxsTreesBlock::HandleTranslateCommand(NxsToken &token)
01036 {
01037 for (unsigned n = 0;; ++n)
01038 {
01039 token.GetNextToken();
01040 if (token.Equals(";"))
01041 break;
01042 NxsString key(token.GetTokenReference().c_str());
01043 unsigned keyInd = taxa->TaxLabelToNumber(key);
01044 token.GetNextToken();
01045 NxsString value(token.GetTokenReference().c_str());
01046 unsigned valueInd = taxa->TaxLabelToNumber(value);
01047 if (valueInd == 0)
01048 {
01049 if (constructingTaxaBlock)
01050 {
01051 taxa->SetNtax(n+1);
01052
01053 unsigned newVal = taxa->AddTaxonLabel(value);
01054 NxsString numV;
01055 numV += (1 + newVal);
01056 if (capNameToInd.find(numV) == capNameToInd.end())
01057 capNameToInd[numV] = newVal;
01058
01059
01060
01061
01062 value.ToUpper();
01063 if (capNameToInd.find(value) == capNameToInd.end())
01064 capNameToInd[value] = newVal;
01065
01066 }
01067 else if (nexusReader)
01068 {
01069 errormsg << "Unknown taxon " << value << " in TRANSLATE command.\nThe translate key "<< key << " has NOT been added to the translation table!";
01070 nexusReader->NexusWarnToken(errormsg, NxsReader::PROBABLY_INCORRECT_CONTENT_WARNING, token);
01071 errormsg.clear();
01072 }
01073 }
01074 if (valueInd > 0)
01075 {
01076 if (keyInd != 0 && keyInd != valueInd && nexusReader)
01077 {
01078 errormsg << "TRANSLATE command overwriting the taxon " << key << " with a redirection to " << value;
01079 nexusReader->NexusWarnToken(errormsg, NxsReader::OVERWRITING_CONTENT_WARNING, token);
01080 errormsg.clear();
01081 }
01082 key.ToUpper();
01083 capNameToInd[key] = valueInd - 1;
01084 }
01085 token.GetNextToken();
01086 if (token.Equals(";"))
01087 break;
01088 if (!token.Equals(","))
01089 {
01090 errormsg << "Expecting a , or ; after a translate key-value pair. Found " << token.GetTokenReference();
01091 throw NxsException(errormsg, token);
01092 }
01093 }
01094 constructingTaxaBlock = false;
01095 }
01096
01097
01098
01099
01100 void NxsTreesBlock::ProcessTokenVecIntoTree(
01101 const ProcessedNxsCommand & tokenVec,
01102 NxsFullTreeDescription & td,
01103 NxsLabelToIndicesMapper *taxa,
01104 std::map<std::string, unsigned> &capNameToInd,
01105 bool allowNewTaxa,
01106 NxsReader * nexusReader,
01107 const bool respectCase)
01108 {
01109 ProcessedNxsCommand::const_iterator tvIt = tokenVec.begin();
01110 ostringstream tokenStream;
01111 long line = 0;
01112 long col = 0;
01113 file_pos pos = 0;
01114 if (!tokenVec.empty())
01115 {
01116 line = tvIt->GetLineNumber();
01117 col = tvIt->GetColumnNumber();
01118 pos = tvIt->GetFilePosition();
01119 for (;tvIt != tokenVec.end(); ++tvIt)
01120 tokenStream << NxsString::GetEscaped(tvIt->GetToken());
01121 tokenStream << ';';
01122 }
01123 std::string s = tokenStream.str();
01124 istringstream newickstream(s);
01125 NxsToken token(newickstream);
01126 try
01127 {
01128 ProcessTokenStreamIntoTree(token, td, taxa, capNameToInd, allowNewTaxa, nexusReader, respectCase);
01129 }
01130 catch (NxsException & x)
01131 {
01132 x.pos += pos;
01133 x.line += line;
01134 x.col += col;
01135 throw x;
01136 }
01137 }
01138
01139 std::vector<std::string> NxsFullTreeDescription::GetTreeTokens() const
01140 {
01141 const std::string & n = this->GetNewick();
01142 std::string y;
01143 const std::string *p = &n;
01144 if (n.empty() || *n.rend() != ';') {
01145 y = n;
01146 y.append(1, ';');
01147 p = &y;
01148 }
01149 istringstream newickstream(*p);
01150 NxsToken tokenizer(newickstream);
01151 std::list<std::string> tl;
01152 tokenizer.SetLabileFlagBit(NxsToken::hyphenNotPunctuation);
01153 tokenizer.GetNextToken();
01154 while (!tokenizer.EqualsCaseSensitive(";"))
01155 {
01156 tl.push_back(tokenizer.GetTokenReference());
01157 tokenizer.SetLabileFlagBit(NxsToken::hyphenNotPunctuation);
01158 tokenizer.GetNextToken();
01159 }
01160 return std::vector<std::string>(tl.begin(), tl.end());
01161 }
01162
01163
01164 void NxsTreesBlock::ProcessTokenStreamIntoTree(
01165 NxsToken & token,
01166 NxsFullTreeDescription & td,
01167 NxsLabelToIndicesMapper *taxa,
01168 std::map<std::string, unsigned> &capNameToInd,
01169 bool allowNewTaxa,
01170 NxsReader * nexusReader,
01171 const bool respectCase)
01172 {
01173 NxsString errormsg;
01174 int & flags = td.flags;
01175 bool NHXComments = false;
01176 bool someMissingEdgeLens = false;
01177 bool someHaveEdgeLens = false;
01178 bool someRealEdgeLens = false;
01179 bool hasPolytomies = false;
01180 bool hasDegTwoNodes = false;
01181 bool hasInternalLabels = false;
01182 bool hasInternalLabelsInTaxa = false;
01183 bool hasInternalLabelsNotInTaxa = false;
01184 const bool rooted = (flags & NxsFullTreeDescription::NXS_IS_ROOTED_BIT);
01185 std::stack<unsigned> nchildren;
01186 std::set<unsigned> taxaEncountered;
01187 double minDblEdgeLen = DBL_MAX;
01188 int minIntEdgeLen = INT_MAX;
01189 double lastFltEdgeLen;
01190 long lastIntEdgeLen;
01191 bool taxsetRead = false;
01192 token.GetNextToken();
01193 ostringstream newickStream;
01194 if (!token.Equals("("))
01195 {
01196 errormsg << "Expecting a ( to start the tree description, but found " << token.GetTokenReference();
01197 throw NxsException(errormsg, token);
01198 }
01199 nchildren.push(0);
01200 newickStream << '(';
01201 int prevToken = NXS_TREE_OPEN_PARENS_TOKEN;
01202 token.GetNextToken();
01203 for (;;)
01204 {
01205 const std::vector<NxsComment> & ecs = token.GetEmbeddedComments();
01206 for (std::vector<NxsComment>::const_iterator ecsIt = ecs.begin(); ecsIt != ecs.end(); ++ecsIt)
01207 {
01208 if (!NHXComments)
01209 {
01210 const std::string & ns = ecsIt->GetText();
01211 if (ns.length() > 5 && ns[0] == '&' && ns[1] == '&' && ns[2] == 'N' &&ns[3] == 'H' && ns[4] == 'X')
01212 NHXComments = true;
01213 }
01214 ecsIt->WriteAsNexus(newickStream);
01215 }
01216 if (token.Equals(";"))
01217 {
01218 if (!nchildren.empty())
01219 throw NxsException("Semicolon found before the end of the tree description. This means that more \"(\" characters than \")\" were found.", token);
01220 break;
01221 }
01222 const NxsString & tstr = token.GetTokenReference();
01223 const char * t = tstr.c_str();
01224 bool handled;
01225 handled = false;
01226 if (tstr.length() == 1)
01227 {
01228 if (t[0] == '(')
01229 {
01230 if (nchildren.empty())
01231 throw NxsException("End of tree description. Expected ; but found (", token);
01232 if (prevToken == NXS_TREE_CLOSE_PARENS_TOKEN || prevToken == NXS_TREE_CLADE_NAME_TOKEN || prevToken == NXS_TREE_BRLEN_TOKEN)
01233 {
01234 errormsg << "Expecting a , before a new subtree definition:\n \")(\"\n \"name(\" and\n \"branch-length(\"\n are prohibited.";
01235 if (nexusReader)
01236 nexusReader->NexusWarnToken(errormsg, NxsReader::PROBABLY_INCORRECT_CONTENT_WARNING, token);
01237 else
01238 throw NxsException(errormsg, token);
01239
01240
01241
01242 if (!someMissingEdgeLens && (prevToken == NXS_TREE_CLOSE_PARENS_TOKEN || prevToken == NXS_TREE_CLADE_NAME_TOKEN))
01243 someMissingEdgeLens = true;
01244 nchildren.top() += 1;
01245 newickStream << ',';
01246 prevToken = NXS_TREE_COMMA_TOKEN;
01247 }
01248 else if (prevToken == NXS_TREE_COLON_TOKEN)
01249 throw NxsException("Expecting a branch length after a : but found (", token);
01250 nchildren.top() += 1;
01251 nchildren.push(0);
01252 newickStream << '(';
01253 prevToken = NXS_TREE_OPEN_PARENS_TOKEN;
01254 handled = true;
01255 }
01256 else if (t[0] == ')')
01257 {
01258 if (nchildren.empty())
01259 throw NxsException("End of tree description. Expected ; but found )", token);
01260 if (prevToken == NXS_TREE_OPEN_PARENS_TOKEN || prevToken == NXS_TREE_COMMA_TOKEN)
01261 throw NxsException("Expecting a clade description before the subtree's closing )\n \"()\" and \",)\" are prohibited.", token);
01262 if (prevToken == NXS_TREE_COLON_TOKEN)
01263 throw NxsException("Expecting a branch length after a : but found (", token);
01264 if (!someMissingEdgeLens && (prevToken == NXS_TREE_CLOSE_PARENS_TOKEN || prevToken == NXS_TREE_CLADE_NAME_TOKEN))
01265 someMissingEdgeLens = true;
01266 if (nchildren.top() == 1)
01267 hasDegTwoNodes = true;
01268 else if ((!hasPolytomies) && (nchildren.top() > 2))
01269 {
01270 if (rooted)
01271 {
01272 if (nchildren.top() > 2)
01273 hasPolytomies = true;
01274 }
01275 else if (nchildren.top() > 3 || nchildren.size() > 1)
01276 hasPolytomies = true;
01277 }
01278 nchildren.pop();
01279 newickStream << ')';
01280 prevToken = NXS_TREE_CLOSE_PARENS_TOKEN;
01281 handled = true;
01282 }
01283 else if (t[0] == ':')
01284 {
01285 if (prevToken != NXS_TREE_CLOSE_PARENS_TOKEN && prevToken != NXS_TREE_CLADE_NAME_TOKEN)
01286 throw NxsException("Found a : separator for a subtree at an inappropriate location. A colon is only permitted after a clade name or )-symbol.", token);
01287 if (taxsetRead && prevToken == NXS_TREE_CLADE_NAME_TOKEN)
01288 throw NxsException("Found a : separator after a taxset name. Branch lengths cannot be assigned to multi-taxon taxsets.", token);
01289 newickStream << ':';
01290 prevToken = NXS_TREE_COLON_TOKEN;
01291 handled = true;
01292 token.SetLabileFlagBit(NxsToken::hyphenNotPunctuation);
01293 }
01294 else if (t[0] == ',')
01295 {
01296 if (prevToken == NXS_TREE_OPEN_PARENS_TOKEN)
01297 throw NxsException("Found a empty subclade found. The combination \"(,\" is prohibited.", token);
01298 if (prevToken == NXS_TREE_COMMA_TOKEN)
01299 throw NxsException("Found a empty subclade found. The combination \",,\" is prohibited.", token);
01300 if (prevToken == NXS_TREE_COLON_TOKEN)
01301 throw NxsException("Found a , when a branch length was expected found. The combination \":,\" is prohibited.", token);
01302 if (!someMissingEdgeLens && (prevToken == NXS_TREE_CLOSE_PARENS_TOKEN || prevToken == NXS_TREE_CLADE_NAME_TOKEN))
01303 someMissingEdgeLens = true;
01304 nchildren.top() += 1;
01305 newickStream << ',';
01306 prevToken = NXS_TREE_COMMA_TOKEN;
01307 handled = true;
01308 }
01309 }
01310 if (!handled)
01311 {
01312 if (prevToken == NXS_TREE_COLON_TOKEN)
01313 {
01314 bool handledLength = false;
01315 if (!someRealEdgeLens)
01316 {
01317 if (NxsString::to_long(t, &lastIntEdgeLen))
01318 {
01319 handledLength = true;
01320 if (lastIntEdgeLen < minIntEdgeLen)
01321 minIntEdgeLen = lastIntEdgeLen;
01322 }
01323 }
01324 if (!handledLength)
01325 {
01326 if (!NxsString::to_double(t, &lastFltEdgeLen))
01327 {
01328 errormsg << "Expecting a number as a branch length. Found " << tstr;
01329 throw NxsException(errormsg, token);
01330 }
01331 someRealEdgeLens = true;
01332 if (lastFltEdgeLen < minDblEdgeLen)
01333 minDblEdgeLen = lastFltEdgeLen;
01334 }
01335 newickStream << tstr;
01336 someHaveEdgeLens = true;
01337 prevToken = NXS_TREE_BRLEN_TOKEN;
01338 }
01339 else
01340 {
01341 if (prevToken == NXS_TREE_BRLEN_TOKEN || prevToken == NXS_TREE_CLADE_NAME_TOKEN)
01342 {
01343 errormsg << "Found a name " << tstr << " which should be preceded by a ( or a ,";
01344 throw NxsException(errormsg, token);
01345 }
01346 taxsetRead = false;
01347 NxsString ucl(t);
01348 if (!respectCase)
01349 ucl.ToUpper();
01350 NxsString toAppend;
01351 std::map<std::string, unsigned>::const_iterator tt = capNameToInd.find(ucl);
01352 unsigned ind = (tt == capNameToInd.end() ? UINT_MAX : tt->second);
01353 if (taxaEncountered.find(ind) != taxaEncountered.end())
01354 {
01355 errormsg << "Taxon number " << ind + 1 << " (coded by the token " << tstr << ") has already been encountered in this tree. Duplication of taxa in a tree is prohibited.";
01356 throw NxsException(errormsg, token);
01357 }
01358 if (prevToken == NXS_TREE_CLOSE_PARENS_TOKEN)
01359 {
01360 hasInternalLabels = true;
01361 if (ind == UINT_MAX)
01362 {
01363 hasInternalLabelsNotInTaxa = true;
01364 toAppend += tstr;
01365 }
01366 else
01367 {
01368 hasInternalLabelsInTaxa = true;
01369 taxaEncountered.insert(ind);
01370 toAppend += (1 + ind);
01371 }
01372 }
01373 else
01374 {
01375 if (ind == UINT_MAX)
01376 {
01377 std::set<unsigned> csinds;
01378 unsigned nadded = taxa->GetIndexSet(tstr, &csinds);
01379 if (nadded == 0)
01380 {
01381 if (!allowNewTaxa)
01382 {
01383 errormsg << "Expecting a Taxon label after a \"" << (prevToken == NXS_TREE_OPEN_PARENS_TOKEN ? '(' : ',') << "\" character. Found \"" << tstr << "\" but this is not a recognized taxon label.";
01384 throw NxsException(errormsg, token);
01385 }
01386 std::string tasstring(tstr.c_str());
01387 unsigned valueInd = taxa->AppendNewLabel(tasstring);
01388 NxsString numV;
01389 numV += (valueInd+1);
01390 if (capNameToInd.find(numV) == capNameToInd.end())
01391 capNameToInd[numV] = valueInd;
01392 if (!respectCase)
01393 NxsString::to_upper(tasstring);
01394 capNameToInd[tasstring] = valueInd;
01395 toAppend += (1 + valueInd);
01396 }
01397 else
01398 {
01399 bool firstTaxonAdded = true;
01400 for (std::set<unsigned>::const_iterator cit = csinds.begin(); cit != csinds.end(); ++cit)
01401 {
01402 if (taxaEncountered.find(*cit) != taxaEncountered.end())
01403 {
01404 errormsg << "Taxon number " << *cit + 1 << " (one of the members of the taxset " << tstr << ") has already been encountered in this tree. Duplication of taxa in a tree is prohibited.";
01405 throw NxsException(errormsg, token);
01406 }
01407 taxaEncountered.insert(*cit);
01408 if (!firstTaxonAdded)
01409 toAppend.append(1, ',');
01410 toAppend += (1 + *cit);
01411 firstTaxonAdded = false;
01412 }
01413 if (nadded > 1)
01414 {
01415 taxsetRead = true;
01416 someMissingEdgeLens = true;
01417 }
01418 }
01419 }
01420 else
01421 {
01422 taxaEncountered.insert(ind);
01423 toAppend += (1 + ind);
01424 }
01425 }
01426 newickStream << toAppend;
01427 prevToken = NXS_TREE_CLADE_NAME_TOKEN;
01428 }
01429 }
01430 token.GetNextToken();
01431 }
01432 td.flags |= NxsFullTreeDescription::NXS_TREE_PROCESSED;
01433 if (someHaveEdgeLens)
01434 {
01435 flags |= NxsFullTreeDescription::NXS_HAS_SOME_EDGE_LENGTHS_BIT;
01436 if (someRealEdgeLens)
01437 {
01438 flags &= ~(NxsFullTreeDescription::NXS_INT_EDGE_LENGTHS_BIT);
01439 td.minDblEdgeLen = minDblEdgeLen;
01440 }
01441 else
01442 {
01443 flags |= NxsFullTreeDescription::NXS_INT_EDGE_LENGTHS_BIT;
01444 td.minIntEdgeLen = minIntEdgeLen;
01445 }
01446 }
01447 td.newick = newickStream.str();
01448 if (someMissingEdgeLens)
01449 flags |= NxsFullTreeDescription::NXS_MISSING_SOME_EDGE_LENGTHS_BIT;
01450 if (hasPolytomies)
01451 flags |= NxsFullTreeDescription::NXS_HAS_POLYTOMY_BIT;
01452 if (hasDegTwoNodes)
01453 flags |= NxsFullTreeDescription::NXS_HAS_DEG_TWO_NODES_BIT;
01454 if (hasInternalLabels)
01455 {
01456 flags |= NxsFullTreeDescription::NXS_HAS_INTERNAL_NAMES_BIT;
01457 if (hasInternalLabelsNotInTaxa)
01458 flags |= NxsFullTreeDescription::NXS_HAS_NEW_INTERNAL_NAMES_BIT;
01459 if (hasInternalLabelsInTaxa)
01460 flags |= NxsFullTreeDescription::NXS_KNOWN_INTERNAL_NAMES_BIT;
01461 }
01462 if (NHXComments)
01463 flags |= NxsFullTreeDescription::NXS_HAS_NHX_BIT;
01464 if (taxaEncountered.size() == taxa->GetMaxIndex() + 1)
01465 flags |= NxsFullTreeDescription::NXS_HAS_ALL_TAXA_BIT;
01466 }
01467
01468 void NxsTreesBlock::ProcessTree(NxsFullTreeDescription & ftd) const
01469 {
01470 if (ftd.flags & NxsFullTreeDescription::NXS_TREE_PROCESSED)
01471 return;
01472 ftd.newick.append(1, ';');
01473 const std::string incomingNewick = ftd.newick;
01474 ftd.newick.clear();
01475 istringstream newickstream(incomingNewick);
01476 NxsToken token(newickstream);
01477 ProcessTokenStreamIntoTree(token, ftd, taxa, capNameToInd, constructingTaxaBlock, nexusReader);
01478 }
01479
01480 void NxsTreesBlock::HandleTreeCommand(NxsToken &token, bool rooted)
01481 {
01482 NCL_ASSERT(taxa);
01483 token.GetNextToken();
01484 if (token.Equals("*"))
01485 {
01486 defaultTreeInd = (unsigned)trees.size();
01487 token.GetNextToken();
01488 }
01489 NxsString treeName = token.GetToken();
01490 DemandEquals(token, "after tree name in TREE command");
01491 file_pos fp = 0;
01492 int fline = token.GetFileLine();
01493 int fcol = token.GetFileColumn();
01494 fp = token.GetFilePosition();
01495 try {
01496
01497
01498
01499 token.SetLabileFlagBit(NxsToken::saveCommandComments);
01500 token.SetLabileFlagBit(NxsToken::parentheticalToken);
01501 token.GetNextToken();
01502 NxsString s = token.GetToken();
01503 if (!s.empty() && s[0] == '&')
01504 {
01505 if (s[1] == 'R' || s[1] == 'r')
01506 rooted = true;
01507 else if (s[1] == 'U' || s[1] == 'u')
01508 rooted = false;
01509 else
01510 {
01511 errormsg << "[" << token.GetToken() << "] is not a valid command comment in a TREE command";
01512 throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
01513 }
01514
01515 token.SetLabileFlagBit(NxsToken::parentheticalToken);
01516 token.GetNextToken();
01517 s = token.GetToken();
01518 }
01519 if (!s.empty() && s[0] != '(')
01520 {
01521 errormsg << "Expecting command comment or tree description in TREE (or UTREE) command, but found " << token.GetToken() << " instead";
01522 throw NxsException(errormsg);
01523 }
01524 }
01525 catch (NxsX_UnexpectedEOF &)
01526 {
01527 errormsg << "Unexpected end of file in tree description.\n";
01528 errormsg << "This probably indicates that the parentheses in the newick description are not balanced, and one or more closing parentheses are needed.";
01529 throw NxsException(errormsg, fp, fline, fcol);
01530 }
01531 std::string mt;
01532 int f = (rooted ? NxsFullTreeDescription::NXS_IS_ROOTED_BIT : 0);
01533 trees.push_back(NxsFullTreeDescription(mt, treeName, f));
01534 NxsFullTreeDescription & td = trees[trees.size() -1];
01535 ReadTreeFromOpenParensToken(td, token);
01536 }
01537
01538 void NxsTreesBlock::ReadTreeFromOpenParensToken(NxsFullTreeDescription &td, NxsToken & token)
01539 {
01540 file_pos fp = 0;
01541 int fline = token.GetFileLine();
01542 int fcol = token.GetFileColumn();
01543 ostringstream newickStream;
01544 newickStream << token.GetTokenReference();
01545 token.GetNextToken();
01546 const std::vector<NxsComment> & ecs = token.GetEmbeddedComments();
01547 for (std::vector<NxsComment>::const_iterator ecsIt = ecs.begin(); ecsIt != ecs.end(); ++ecsIt)
01548 ecsIt->WriteAsNexus(newickStream);
01549 while (!token.Equals(";"))
01550 {
01551 if (token.Equals("(") || token.Equals(")") || token.Equals(","))
01552 GenerateUnexpectedTokenNxsException(token, "root taxon information");
01553 newickStream << NxsString::GetEscaped(token.GetTokenReference());
01554 token.GetNextToken();
01555 const std::vector<NxsComment> & iecs = token.GetEmbeddedComments();
01556 for (std::vector<NxsComment>::const_iterator iecsIt = iecs.begin(); iecsIt != iecs.end(); ++iecsIt)
01557 iecsIt->WriteAsNexus(newickStream);
01558 }
01559 td.newick = newickStream.str();
01560 if (processAllTreesDuringParse)
01561 {
01562 try
01563 {
01564 ProcessTree(td);
01565 if (this->processedTreeValidationFunction)
01566 {
01567 if (!this->processedTreeValidationFunction(td, this->ptvArg, this))
01568 trees.pop_back();
01569 }
01570 }
01571 catch (NxsException &x)
01572 {
01573 x.pos += fp;
01574 x.line += fline - 1;
01575 x.col += fcol;
01576 throw x;
01577 }
01578 }
01579 }
01585 void NxsTreesBlock::Read(
01586 NxsToken &token)
01587 {
01588 isEmpty = false;
01589 isUserSupplied = true;
01590 DemandEndSemicolon(token, "BEGIN TREES");
01591
01592 bool readTranslate = false;
01593 bool readTree = false;
01594 errormsg.clear();
01595 constructingTaxaBlock = false;
01596 newtaxa = false;
01597 capNameToInd.clear();
01598 unsigned numSigInts = NxsReader::getNumSignalIntsCaught();
01599 const bool checkingSignals = NxsReader::getNCLCatchesSignals();
01600
01601 for (;;)
01602 {
01603 token.GetNextToken();
01604 if (checkingSignals && NxsReader::getNumSignalIntsCaught() != numSigInts)
01605 {
01606 throw NxsSignalCanceledParseException("Reading TREES Block");
01607 }
01608 NxsBlock::NxsCommandResult res = HandleBasicBlockCommands(token);
01609 if (res == NxsBlock::NxsCommandResult(STOP_PARSING_BLOCK))
01610 {
01611 if (constructingTaxaBlock)
01612 {
01613 if (taxa && taxa->GetNTax() > 0)
01614 newtaxa = true;
01615 constructingTaxaBlock = false;
01616 }
01617 return;
01618 }
01619 if (res != NxsBlock::NxsCommandResult(HANDLED_COMMAND))
01620 {
01621 if (token.Equals("TRANSLATE"))
01622 {
01623 if (readTree)
01624 WarnDangerousContent("TRANSLATE command must precede any TREE commands in a TREES block", token);
01625 if (readTranslate)
01626 {
01627 WarnDangerousContent("Only one TRANSLATE command may be read in a TREES block", token);
01628 capNameToInd.clear();
01629 }
01630 readTranslate = true;
01631 ConstructDefaultTranslateTable(token, "TRANSLATE");
01632 HandleTranslateCommand(token);
01633 }
01634 else
01635 {
01636 bool utreeCmd = token.Equals("UTREE");
01637 bool treeCmd = token.Equals("TREE");
01638 if (utreeCmd || treeCmd)
01639 {
01640 if (!readTranslate && ! readTree)
01641 ConstructDefaultTranslateTable(token, token.GetTokenReference().c_str());
01642 readTree = true;
01643 HandleTreeCommand(token, treeCmd);
01644 }
01645 else
01646 SkipCommand(token);
01647 }
01648 }
01649 }
01650 }
01658 NxsString NxsTreesBlock::GetTranslatedTreeDescription(
01659 unsigned i)
01660 {
01661 NCL_ASSERT(i < trees.size());
01662 NCL_ASSERT(taxa);
01663 NxsFullTreeDescription & ftd = trees.at(i);
01664 ProcessTree(ftd);
01665 std::string incomingNewick = ftd.newick;
01666 incomingNewick.append(1, ';');
01667 istringstream newickstream(incomingNewick);
01668 NxsToken token(newickstream);
01669 token.GetNextToken();
01670 if (!token.Equals("("))
01671 {
01672 errormsg << "Expecting a ( to start the tree description, but found " << token.GetTokenReference();
01673 throw NxsException(errormsg, token);
01674 }
01675 int prevToken = NXS_TREE_OPEN_PARENS_TOKEN;
01676 long taxIndLong;
01677 const unsigned ntax = taxa->GetNTaxTotal();
01678 ostringstream translated;
01679 for (;;)
01680 {
01681 const std::vector<NxsComment> & ecs = token.GetEmbeddedComments();
01682 for (std::vector<NxsComment>::const_iterator ecsIt = ecs.begin(); ecsIt != ecs.end(); ++ecsIt)
01683 ecsIt->WriteAsNexus(translated);
01684 if (token.Equals(";"))
01685 break;
01686 const NxsString & t = token.GetTokenReference();
01687 bool handled;
01688 handled = false;
01689 if (t.length() == 1)
01690 {
01691 if (t[0] == '(')
01692 {
01693 translated << '(';
01694 prevToken = NXS_TREE_OPEN_PARENS_TOKEN;
01695 handled = true;
01696 }
01697 else if (t[0] == ')')
01698 {
01699 translated << ')';
01700 prevToken = NXS_TREE_CLOSE_PARENS_TOKEN;
01701 handled = true;
01702 }
01703 else if (t[0] == ':')
01704 {
01705 translated << ':';
01706 prevToken = NXS_TREE_COLON_TOKEN;
01707 handled = true;
01708 token.SetLabileFlagBit(NxsToken::hyphenNotPunctuation);
01709 }
01710 else if (t[0] == ',')
01711 {
01712 translated << ',';
01713 prevToken = NXS_TREE_COMMA_TOKEN;
01714 handled = true;
01715 }
01716 }
01717 if (!handled)
01718 {
01719 if (prevToken == NXS_TREE_COLON_TOKEN)
01720 {
01721 translated << t;
01722 prevToken = NXS_TREE_BRLEN_TOKEN;
01723 }
01724 else
01725 {
01726 if (NxsString::to_long(t.c_str(), &taxIndLong) && taxIndLong <= (long) ntax && taxIndLong > 0)
01727 translated << NxsString::GetEscaped(taxa->GetTaxonLabel((unsigned) taxIndLong - 1));
01728 else if (prevToken == NXS_TREE_CLOSE_PARENS_TOKEN)
01729 translated << t;
01730 else
01731 {
01732 errormsg << "Expecting a taxon index in a tree description, but found " << t;
01733 throw NxsException(errormsg, token);
01734 }
01735 }
01736 }
01737 token.GetNextToken();
01738 }
01739 return NxsString(translated.str().c_str());
01740 }
01741
01742 void NxsTreesBlock::ReadPhylipTreeFile(NxsToken & token)
01743 {
01744 bool prevAIN = allowImplicitNames;
01745 allowImplicitNames = true;
01746 bool firstTree = true;
01747 const bool prevEOFAllowed = token.GetEOFAllowed();
01748 token.SetEOFAllowed(false);
01749 try
01750 {
01751 for (;;)
01752 {
01753 token.SetLabileFlagBit(NxsToken::saveCommandComments);
01754 token.SetLabileFlagBit(NxsToken::parentheticalToken);
01755 token.GetNextToken();
01756 NxsString s = token.GetToken();
01757 bool rooted = false;
01758 if (!s.empty() && s[0] == '&')
01759 {
01760 if (s[1] == 'R' || s[1] == 'r')
01761 rooted = true;
01762 else if (s[1] == 'U' || s[1] == 'u')
01763 rooted = false;
01764 else
01765 {
01766 errormsg << "[" << token.GetToken() << "] is not a valid command comment in a TREE command";
01767 throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
01768 }
01769
01770 token.SetLabileFlagBit(NxsToken::parentheticalToken);
01771 token.GetNextToken();
01772 s = token.GetToken();
01773 }
01774 if (!s.empty() && s[0] != '(')
01775 {
01776 errormsg << "Expecting a tree description, but found \"" << token.GetToken() << "\" instead";
01777 throw NxsException(errormsg);
01778 }
01779 if (firstTree)
01780 {
01781 ConstructDefaultTranslateTable(token, token.GetTokenReference().c_str());
01782 firstTree = false;
01783 }
01784 int f = (rooted ? NxsFullTreeDescription::NXS_IS_ROOTED_BIT : 0);
01785 std::string mt;
01786 trees.push_back(NxsFullTreeDescription(mt, mt, f));
01787 NxsFullTreeDescription & td = trees[trees.size() -1];
01788 ReadTreeFromOpenParensToken(td, token);
01789 this->constructingTaxaBlock = false;
01790 }
01791 }
01792 catch (NxsX_UnexpectedEOF &)
01793 {
01794 allowImplicitNames = prevAIN;
01795 token.SetEOFAllowed(prevEOFAllowed);
01796 if (firstTree)
01797 {
01798 errormsg << "Unexpected end of file in tree description.\n";
01799 errormsg << "This probably indicates that the parentheses in the newick description are not balanced, and one or more closing parentheses are needed.";
01800 throw NxsException(errormsg);
01801 }
01802 }
01803 catch (...)
01804 {
01805 allowImplicitNames = prevAIN;
01806 token.SetEOFAllowed(prevEOFAllowed);
01807 throw;
01808 }
01809 token.SetEOFAllowed(prevEOFAllowed);
01810 allowImplicitNames = prevAIN;
01811 }
01812