00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include <cstdlib>
00020 #include <cassert>
00021 #include "ncl/nxstoken.h"
00022
00023 using namespace std;
00024
00025 #define NEW_NXS_TOKEN_READ_CHAR
00026
00027
00028
00029
00035 NxsSimpleCommandStrings ProcessedNxsToken::ParseSimpleCmd(
00036 const std::vector<ProcessedNxsToken> &pnc,
00037 bool convertToLower)
00038 {
00039 NxsSimpleCommandStrings nscs;
00040 if (pnc.empty())
00041 return nscs;
00042
00043
00044 std::vector<ProcessedNxsToken>::const_iterator wordIt = pnc.begin();
00045
00046 nscs.cmdName = wordIt->GetToken();
00047 if (convertToLower)
00048 NxsString::to_lower(nscs.cmdName);
00049 nscs.cmdPos = wordIt->GetFilePosInfoConstRef();
00050 ++wordIt;
00051
00052 std::string key;
00053
00054 NxsString errorMsg;
00055 NxsTokenPosInfo keyPos = nscs.cmdPos;
00056 bool eqRead = false;
00057 for (; wordIt != pnc.end(); ++wordIt)
00058 {
00059 std::string w = wordIt->GetToken();
00060 if (convertToLower)
00061 NxsString::to_lower(w);
00062 if (key.empty())
00063 {
00064 key = w;
00065 if (nscs.HasKey(key))
00066 {
00067 errorMsg << "Command option (" << key << ") repeated in the " << nscs.cmdName << " command.";
00068 throw NxsException(errorMsg, wordIt->GetFilePosInfoConstRef());
00069 }
00070 keyPos = wordIt->GetFilePosInfoConstRef();
00071 }
00072 else if (!eqRead)
00073 {
00074 if (w != "=")
00075 {
00076 errorMsg << "Expecting an = after the " << key << " command option of the " << nscs.cmdName << " command.";
00077 throw NxsException(errorMsg, wordIt->GetFilePosInfoConstRef());
00078 }
00079 eqRead = true;
00080 }
00081 else {
00082 if (w == "(")
00083 {
00084 ++wordIt;
00085 w = wordIt->GetToken();
00086 std::vector<std::string> vals;
00087 NxsSimpleCommandStrings::MatString mat;
00088 if (w == "(")
00089 {
00090 while (w != ")")
00091 {
00092 if (w != "(")
00093 {
00094 errorMsg << "Expecting a ( to begin another row of values in the " << key << " command option of the " << nscs.cmdName << " command.";
00095 throw NxsException(errorMsg, keyPos);
00096 }
00097
00098 ++wordIt;
00099 w = wordIt->GetToken();
00100 while (wordIt != pnc.end())
00101 {
00102 w = wordIt->GetToken();
00103 if (convertToLower)
00104 NxsString::to_lower(w);
00105 if (w == ")")
00106 break;
00107 vals.push_back(w);
00108 ++wordIt;
00109 }
00110 if (wordIt == pnc.end())
00111 {
00112 errorMsg << "Expecting a ) to end the list of values for the " << key << " command option of the " << nscs.cmdName << " command.";
00113 throw NxsException(errorMsg, keyPos);
00114 }
00115 ++wordIt;
00116 mat.push_back(vals);
00117 vals.clear();
00118 w = wordIt->GetToken();
00119 if (wordIt == pnc.end())
00120 {
00121 errorMsg << "Expecting a ) to end the list of values for the " << key << " command option of the " << nscs.cmdName << " command.";
00122 throw NxsException(errorMsg, keyPos);
00123 }
00124 }
00125 nscs.matOpts[key] = NxsSimpleCommandStrings::MatFromFile(wordIt->GetFilePosInfoConstRef(), mat);
00126 }
00127 else
00128 {
00129 while (wordIt != pnc.end())
00130 {
00131 w = wordIt->GetToken();
00132 if (convertToLower)
00133 NxsString::to_lower(w);
00134 if (w == ")")
00135 break;
00136 vals.push_back(w);
00137 ++wordIt;
00138 }
00139 if (wordIt == pnc.end())
00140 {
00141 errorMsg << "Expecting a ) to end the list of values for the " << key << " command option of the " << nscs.cmdName << " command.";
00142 throw NxsException(errorMsg, keyPos);
00143 }
00144 nscs.multiOpts[key] = NxsSimpleCommandStrings::MultiValFromFile(wordIt->GetFilePosInfoConstRef(), vals);
00145 }
00146 }
00147 else
00148 {
00149 std::string val = w;
00150 nscs.opts[key] = NxsSimpleCommandStrings::SingleValFromFile( wordIt->GetFilePosInfoConstRef(), val);
00151 }
00152 eqRead = false;
00153 key.clear();
00154 }
00155 }
00156 if (eqRead)
00157 {
00158 errorMsg << "Expecting a value after the = sign in the " << key << " command option of the " << nscs.cmdName << " command.";
00159 throw NxsException(errorMsg, keyPos);
00160 }
00161 if (!key.empty())
00162 {
00163 errorMsg << "Expecting an = after the " << key << " command option of the " << nscs.cmdName << " command.";
00164 throw NxsException(errorMsg, keyPos);
00165 }
00166 return nscs;
00167 }
00168
00169
00170
00171
00172 NxsX_UnexpectedEOF::NxsX_UnexpectedEOF(NxsToken &token)
00173 :NxsException("Unexpected end-of-file", token)
00174 {
00175 std::string t = token.GetBlockName();
00176 NxsString::to_upper(t);
00177 if (!t.empty())
00178 msg << " while reading " << t << " block.";
00179 }
00180
00182 bool WriteCommandAsNexus(std::ostream & out, const ProcessedNxsCommand &c)
00183 {
00184 if (c.empty())
00185 return false;
00186 out << " ";
00187 for(ProcessedNxsCommand::const_iterator cIt = c.begin(); cIt != c.end(); ++cIt)
00188 {
00189 out << ' ';
00190 cIt->WriteAsNexus(out);
00191 }
00192 out << ";";
00193 return true;
00194 }
00195
00196
00197
00203 void ProcessedNxsToken::IncrementNotLast(std::vector<ProcessedNxsToken>::const_iterator & tokIt, const std::vector<ProcessedNxsToken>::const_iterator &endIt, const char * contextString)
00204 {
00205 ++tokIt;
00206 if (tokIt == endIt)
00207 {
00208 NxsString errormsg = "Unexpected ; ";
00209 if (contextString)
00210 errormsg.append(contextString);
00211 --tokIt;
00212 throw NxsException(errormsg, *tokIt);
00213 }
00214 }
00219 inline void NxsToken::AdvanceToNextCharInStream()
00220 {
00221 if (nextCharInStream == EOF)
00222 return;
00223 nextCharInStream = (signed char) (inputStream.rdbuf())->sbumpc();
00224 posOffBy = -1;
00225 if (nextCharInStream == 13 || nextCharInStream == 10)
00226 {
00227 if(nextCharInStream == 13)
00228 {
00229 if ((inputStream.rdbuf())->sgetc() == 10)
00230 {
00231 (inputStream.rdbuf())->sbumpc();
00232 posOffBy = -2;
00233 }
00234 }
00235 nextCharInStream = '\n';
00236 }
00237 }
00238
00239
00240 #if defined(NEW_NXS_TOKEN_READ_CHAR)
00241
00247 inline char NxsToken::GetNextChar()
00248 {
00249
00250
00251
00252
00253
00254
00255 signed char ch = nextCharInStream;
00256 AdvanceToNextCharInStream();
00257 if(ch == EOF)
00258 {
00259 atEOF = true;
00260 if (eofAllowed)
00261 return '\0';
00262 throw NxsX_UnexpectedEOF(*this);
00263 }
00264 if(ch == '\n')
00265 {
00266 fileLine++;
00267 fileColumn = 1L;
00268 atEOL = true;
00269 return '\n';
00270 }
00271 if (ch == '\t')
00272 fileColumn += 4 - ((fileColumn - 1)%4);
00273 else
00274 fileColumn++;
00275 atEOL = false;
00276 return ch;
00277 }
00278
00279 #else // if !defined(NEW_NXS_TOKEN_READ_CHAR)
00280
00294 inline char NxsToken::GetNextChar()
00295 {
00296 int ch = inputStream.get();
00297 int failed = inputStream.bad();
00298 if (failed)
00299 {
00300 errormsg = "Unknown error reading data file (check to make sure file exists)";
00301 throw NxsException(errormsg);
00302 }
00303
00304 if (ch == 13 || ch == 10)
00305 {
00306 fileLine++;
00307 fileColumn = 1L;
00308
00309 if (ch == 13 && (int)inputStream.peek() == 10)
00310 ch = inputStream.get();
00311
00312 atEOL = 1;
00313 }
00314 else if (ch == EOF)
00315 atEOF = 1;
00316 else
00317 {
00318 fileColumn++;
00319 atEOL = 0;
00320 }
00321
00322 # if defined(__DECCXX)
00323 filepos = 0L;
00324 # else
00325 file_pos filepos = inputStream.tellg();
00326 # endif
00327
00328 if (atEOF)
00329 return '\0';
00330 else if (atEOL)
00331 return '\n';
00332 else
00333 return (char)ch;
00334 }
00335 #endif
00336
00337 std::map<std::string, std::string> NxsToken::ParseAsSimpleKeyValuePairs(const ProcessedNxsCommand & tv, const char *cmdName)
00338 {
00339 std::map<std::string, std::string> kv;
00340 std::string key;
00341 ProcessedNxsCommand::const_iterator tvIt = tv.begin();
00342 ProcessedNxsCommand::const_iterator prevIt;
00343 ProcessedNxsCommand::const_iterator endIt = tv.end();
00344 while (tvIt != endIt)
00345 {
00346 key = tvIt->GetToken().c_str();
00347 prevIt = tvIt++;
00348 if (tvIt == endIt || tvIt->GetToken() != "=")
00349 {
00350 NxsString m("Expecting = after ");
00351 m += key.c_str();
00352 m += " in ";
00353 m += cmdName;
00354 m += " command.";
00355 if (tvIt == endIt)
00356 throw NxsException(m, prevIt->GetFilePosition(), prevIt->GetLineNumber(), prevIt->GetColumnNumber());
00357 else
00358 throw NxsException(m, tvIt->GetFilePosition(), tvIt->GetLineNumber(), tvIt->GetColumnNumber());
00359 }
00360 prevIt = tvIt++;
00361 if (tvIt == endIt)
00362 {
00363 NxsString m("Expecting a value after = in the ");
00364 m += key.c_str();
00365 m += " subcommand of the in ";
00366 m += cmdName;
00367 m += " command.";
00368 throw NxsException(m, prevIt->GetFilePosition(), prevIt->GetLineNumber(), prevIt->GetColumnNumber());
00369 }
00370 kv[key] = tvIt->GetToken();
00371 tvIt++;
00372 }
00373 return kv;
00374 }
00380 void NxsToken::ProcessAsCommand(ProcessedNxsCommand *tokenVec)
00381 {
00382 ;
00383 while (!this->Equals(";"))
00384 {
00385 if (tokenVec)
00386 tokenVec->push_back(ProcessedNxsToken(*this));
00387 this->GetNextToken();
00388 }
00389 }
00390
00391
00396 std::string NxsToken::GetQuoted(const std::string &s)
00397 {
00398 std::string withQuotes;
00399 withQuotes.reserve(s.length() + 4);
00400 withQuotes.push_back('\'');
00401 for (NxsString::const_iterator sIt = s.begin(); sIt != s.end(); sIt++)
00402 {
00403 withQuotes.push_back(*sIt);
00404 if (*sIt == '\'')
00405 withQuotes.push_back('\'');
00406 }
00407 withQuotes.push_back('\'');
00408 return withQuotes;
00409 }
00410
00418 unsigned NxsToken::DemandPositiveInt(NxsToken &token, NxsString & errormsg, const char *contextString)
00419 {
00420 token.GetNextToken();
00421 int i = atoi(token.GetToken().c_str());
00422 if (i <= 0)
00423 {
00424 errormsg.assign(contextString);
00425 errormsg += " must be a number greater than 0. Found ";
00426 errormsg += token.GetToken();
00427 errormsg += " instead";
00428 throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
00429 }
00430 return (unsigned) i;
00431 }
00432
00433
00441 void NxsToken::DemandEndSemicolon(NxsToken &token, NxsString & errormsg, const char *contextString)
00442 {
00443 token.GetNextToken();
00444 if (!token.Equals(";"))
00445 {
00446 errormsg = "Expecting ';' to terminate the ";
00447 errormsg += contextString;
00448 errormsg += " command, but found ";
00449 errormsg += token.GetToken();
00450 errormsg += " instead";
00451 throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
00452 }
00453 }
00458 bool NxsToken::NeedsQuotes(const std::string &s)
00459 {
00460 for (std::string::const_iterator sIt = s.begin(); sIt != s.end(); sIt++)
00461 {
00462 const char &c = (*sIt);
00463 if (!isgraph(c))
00464 return true;
00465 else if (strchr("\'[(){}\"-]/\\,;:=*`+<>", c) != NULL)
00466 {
00467
00468 if (c == '\'' || c == '[')
00469 return true;
00470 return (s.length() > 1);
00471 }
00472 }
00473 return false;
00474 }
00475
00476
00477
00483 NxsToken::NxsToken(
00484 istream &i)
00485 : inputStream(i),
00486 eofAllowed(true)
00487 {
00488 posOffBy = 0;
00489 atEOF = false;
00490 atEOL = false;
00491 comment.clear();
00492 fileColumn = 1L;
00493 fileLine = 1L;
00494 labileFlags = 0;
00495 saved = '\0';
00496 special = '\0';
00497
00498 whitespace[0] = ' ';
00499 whitespace[1] = '\t';
00500 whitespace[2] = '\n';
00501 whitespace[3] = '\0';
00502 # if defined(NEW_NXS_TOKEN_READ_CHAR)
00503 nextCharInStream = 'a';
00504 AdvanceToNextCharInStream();
00505 # endif
00506 }
00507
00511 NxsToken::~NxsToken()
00512 {
00513 }
00514
00524 bool NxsToken::GetComment()
00525 {
00526
00527
00528
00529 NxsString currentComment;
00530 bool command = false;
00531
00532 bool formerEOFAllowed = eofAllowed;
00533 eofAllowed = false;
00534 try
00535 {
00536 char ch = GetNextChar();
00537
00538
00539
00540 int printing = 0;
00541 if (ch == '!')
00542 printing = 1;
00543 else if (ch == '&' && (labileFlags & saveCommandComments) && token.empty())
00544 command = true;
00545 currentComment.push_back(ch);
00546 if (ch != ']')
00547 {
00548 int level = 1;
00549 for(;;)
00550 {
00551 ch = GetNextChar();
00552 if (ch == ']')
00553 {
00554 level--;
00555 if (level == 0)
00556 break;
00557 }
00558 else if (ch == '[')
00559 level++;
00560 currentComment.push_back(ch);
00561 }
00562
00563 if (printing)
00564 {
00565
00566
00567
00568 NxsString foroutput(currentComment.c_str() + 1);
00569 comment = foroutput;
00570 OutputComment(foroutput);
00571 }
00572 if (command)
00573 token = currentComment;
00574 else
00575 embeddedComments.push_back(NxsComment(currentComment, GetFileLine(), GetFileColumn()));
00576 }
00577 }
00578 catch (NxsX_UnexpectedEOF & x)
00579 {
00580 x.msg << " (end-of-file inside comment)";
00581 eofAllowed = formerEOFAllowed;
00582 throw x;
00583 }
00584 eofAllowed = formerEOFAllowed ;
00585 return command;
00586 }
00587
00592 void NxsToken::GetCurlyBracketedToken()
00593 {
00594 bool formerEOFAllowed = eofAllowed;
00595 eofAllowed = false;
00596 try
00597 {
00598 int level = 1;
00599 while(level > 0)
00600 {
00601 char ch = GetNextChar();
00602 if (ch == '}')
00603 level--;
00604 else if (ch == '{')
00605 level++;
00606 AppendToToken(ch);
00607 }
00608 }
00609 catch (NxsX_UnexpectedEOF & x)
00610 {
00611 x.msg << " (end-of-file inside {} braced statement)";
00612 eofAllowed = formerEOFAllowed;
00613 throw x;
00614 }
00615 eofAllowed = formerEOFAllowed ;
00616 }
00617
00626 void NxsToken::GetDoubleQuotedToken()
00627 {
00628 bool formerEOFAllowed = eofAllowed;
00629 eofAllowed = false;
00630 try
00631 {
00632 for(;;)
00633 {
00634 char ch = GetNextChar();
00635 if (ch == '\"')
00636 break;
00637 else
00638 AppendToToken(ch);
00639 }
00640 }
00641 catch (NxsX_UnexpectedEOF & x)
00642 {
00643 x.msg << " (end-of-file inside \" quoted statement)";
00644 eofAllowed = formerEOFAllowed;
00645 throw x;
00646 }
00647 eofAllowed = formerEOFAllowed ;
00648 }
00649
00656 void NxsToken::GetQuoted()
00657 {
00658 bool formerEOFAllowed = eofAllowed;
00659 eofAllowed = false;
00660 long fl = fileLine;
00661 long fc = fileColumn;
00662
00663 try
00664 {
00665 for(;;)
00666 {
00667 char ch = GetNextChar();
00668 if (ch == '\'')
00669 {
00670 ch = GetNextChar();
00671 if (ch == '\'')
00672 AppendToToken(ch);
00673 else
00674 {
00675 saved = ch;
00676 break;
00677 }
00678 }
00679 else
00680 AppendToToken(ch);
00681 }
00682 }
00683 catch (NxsX_UnexpectedEOF & x)
00684 {
00685 x.msg << " (end-of-file inside \' quoted token that started on line " << fl<< ", column " <<fc << ')';
00686 eofAllowed = formerEOFAllowed;
00687 throw x;
00688 }
00689 eofAllowed = formerEOFAllowed ;
00690 }
00691
00696 void NxsToken::GetParentheticalToken()
00697 {
00698
00699
00700
00701 int level = 1;
00702 std::vector<NxsComment> prevEmbedded = embeddedComments;
00703 embeddedComments.clear();
00704 char ch;
00705 ch = GetNextChar();
00706 for(;;)
00707 {
00708 if (atEOF)
00709 break;
00710
00711 if (ch == '\'')
00712 {
00713 AppendToToken('\'');
00714 GetQuoted();
00715 AppendToToken('\'');
00716 ch = saved;
00717 saved = '\0';
00718 if (atEOF)
00719 {
00720 if (ch == ')' && level == 1)
00721 {
00722 AppendToToken(')');
00723 break;
00724 }
00725 else
00726 {
00727 NxsX_UnexpectedEOF x(*this);
00728 x.msg << "(end-of-file inside () statement)";
00729 }
00730 }
00731 continue;
00732 }
00733 if (ch == '[')
00734 {
00735 GetComment();
00736 assert(embeddedComments.size() == 1);
00737 AppendToToken('[');
00738 const std::string & body = embeddedComments[0].GetText();
00739 token.append(body.begin(), body.end());
00740 AppendToToken(']');
00741 embeddedComments.clear();
00742
00743 }
00744 else
00745 {
00746 if (ch == ')')
00747 level--;
00748 else if (ch == '(')
00749 level++;
00750
00751 AppendToToken(ch);
00752 }
00753
00754 if (level == 0)
00755 break;
00756 ch = GetNextChar();
00757 }
00758 embeddedComments = prevEmbedded;
00759 }
00760
00767 bool NxsToken::Abbreviation(
00768 NxsString s)
00769 {
00770 int k;
00771 int slen = (int)s.size();
00772 int tlen = (int)token.size();
00773 char tokenChar, otherChar;
00774
00775
00776
00777
00778 int mlen;
00779 for (mlen = 0; mlen < slen; mlen++)
00780 {
00781 if (!isupper(s[mlen]))
00782 break;
00783 }
00784
00785
00786
00787
00788 if (tlen < mlen)
00789 return false;
00790
00791
00792
00793
00794 if (tlen > slen)
00795 return false;
00796
00797
00798
00799 for (k = 0; k < mlen; k++)
00800 {
00801 tokenChar = (char)toupper( token[k]);
00802 otherChar = s[k];
00803 if (tokenChar != otherChar)
00804 return false;
00805 }
00806
00807
00808
00809 for (k = mlen; k < tlen; k++)
00810 {
00811 tokenChar = (char)toupper( token[k]);
00812 otherChar = (char)toupper( s[k]);
00813 if (tokenChar != otherChar)
00814 return false;
00815 }
00816
00817 return true;
00818 }
00819
00824 bool NxsToken::Begins(
00825 NxsString s,
00826 bool respect_case)
00827 {
00828 unsigned k;
00829 char tokenChar, otherChar;
00830
00831 unsigned slen = (unsigned)s.size();
00832 if (slen > token.size())
00833 return false;
00834
00835 for (k = 0; k < slen; k++)
00836 {
00837 if (respect_case)
00838 {
00839 tokenChar = token[k];
00840 otherChar = s[k];
00841 }
00842 else
00843 {
00844 tokenChar = (char)toupper( token[k]);
00845 otherChar = (char)toupper( s[k]);
00846 }
00847
00848 if (tokenChar != otherChar)
00849 return false;
00850 }
00851
00852 return true;
00853 }
00854
00879 void NxsToken::GetNextToken()
00880 {
00881 ResetToken();
00882
00883 char ch = ' ';
00884 if (saved == '\0' || IsWhitespace(saved))
00885 {
00886
00887
00888 while( IsWhitespace(ch) && !atEOF)
00889 ch = GetNextChar();
00890 saved = ch;
00891 }
00892
00893 for(;;)
00894 {
00895
00896
00897 if (labileFlags & singleCharacterToken && !token.empty())
00898 break;
00899
00900
00901
00902 if (saved != '\0')
00903 {
00904 ch = saved;
00905 saved = '\0';
00906 }
00907 else
00908 ch = GetNextChar();
00909
00910
00911
00912 if (atEOF)
00913 break;
00914 if (strchr("\n\r \t", ch) != NULL)
00915 {
00916 if (ch == '\n' && labileFlags & newlineIsToken)
00917 {
00918 if (token.empty())
00919 {
00920 atEOL = 1;
00921 AppendToToken(ch);
00922 }
00923 else
00924 {
00925
00926
00927
00928 atEOL = 0;
00929 saved = ch;
00930 }
00931 break;
00932 }
00933 else
00934 {
00935
00936
00937
00938 if (!token.empty())
00939 break;
00940 }
00941 }
00942 else if (ch == '_')
00943 {
00944
00945
00946
00947 if (!(labileFlags & preserveUnderscores))
00948 ch = ' ';
00949 AppendToToken(ch);
00950 }
00951
00952 else if (ch == '[')
00953 {
00954
00955
00956
00957
00958 comment.clear();
00959 if (GetComment())
00960 break;
00961 }
00962 else if (IsPunctuation(ch))
00963 {
00964 if (ch == '(' && (labileFlags & parentheticalToken))
00965 {
00966 AppendToToken(ch);
00967 GetParentheticalToken();
00968 }
00969 else if (ch == '{' && (labileFlags & curlyBracketedToken))
00970 {
00971 AppendToToken(ch);
00972 GetCurlyBracketedToken();
00973 }
00974 else if (ch == '\"' && (labileFlags & doubleQuotedToken))
00975 GetDoubleQuotedToken();
00976 else if (ch == '\'' && token.empty())
00977 GetQuoted();
00978 else
00979 {
00980
00981
00982 if (token.size() > 0)
00983 saved = ch;
00984 else
00985 AppendToToken(ch);
00986 }
00987 break;
00988 }
00989 else
00990 AppendToToken(ch);
00991 }
00992
00993 labileFlags = 0;
00994 }
00995
00999 void NxsToken::StripWhitespace()
01000 {
01001 NxsString s;
01002 for (unsigned j = 0; j < token.size(); j++)
01003 {
01004 if (IsWhitespace( token[j]))
01005 continue;
01006 s += token[j];
01007 }
01008 token = s;
01009 }
01010
01014 void NxsToken::ToUpper()
01015 {
01016 for (unsigned i = 0; i < token.size(); i++)
01017 token[i] = (char)toupper(token[i]);
01018 }
01019