00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00023 #include <iomanip>
00024 #include <climits>
00025
00026 #include "ncl/nxscharactersblock.h"
00027
00028 #include "ncl/nxsreader.h"
00029 #include "ncl/nxsassumptionsblock.h"
00030 #include "ncl/nxssetreader.h"
00031 #include <algorithm>
00032 using namespace std;
00033
00034 CodonRecodingStruct getCodonRecodingStruct(NxsGeneticCodesEnum gCode);
00035 std::vector<NxsDiscreteStateCell> getToCodonRecodingMapper(NxsGeneticCodesEnum gCode);
00036
00037
00038 void NxsDiscreteDatatypeMapper::DebugWriteMapperFields(std::ostream & out) const
00039 {
00040 out << nStates << "\"fundamental\" states\n";
00041 out << "Symbols = \"" << symbols << "\"\n";
00042 if (respectCase)
00043 out << "Symbol comparison respects case (is case-sensitive)\n";
00044 else
00045 out << "Symbol comparison does not respect case (is case-insensitive)\n";
00046 if (gapChar == '\0')
00047 out << "No Gaps\n";
00048 else
00049 out << "Gap char is " << gapChar << "\n";
00050
00051 out << "State codes:\n";
00052 int nsc = (int)GetNumStateCodes();
00053 for (int scc = 0; scc < nsc; ++scc)
00054 {
00055 int sc = scc + sclOffset;
00056 out << sc << ' ';
00057 if (sc == NXS_MISSING_CODE)
00058 out << missing << '\n';
00059 else if (sc == NXS_GAP_STATE_CODE)
00060 out << gapChar << '\n';
00061 else
00062 {
00063 const std::set<NxsDiscreteStateCell> & ssfc(GetStateSetForCode(sc));
00064 std::set<NxsDiscreteStateCell>::const_iterator sIt = ssfc.begin();
00065 if (ssfc.size() == 1)
00066 {
00067 out << symbols[*sIt];
00068 }
00069 else
00070 {
00071 if (IsPolymorphic(sc))
00072 out << '(';
00073 else
00074 out << '{';
00075 for (; sIt != ssfc.end(); ++sIt)
00076 {
00077 if (*sIt == NXS_MISSING_CODE)
00078 out << missing;
00079 else if (*sIt == NXS_GAP_STATE_CODE)
00080 out << gapChar;
00081 else
00082 out << symbols[*sIt];
00083 }
00084 if (IsPolymorphic(sc))
00085 out << ')';
00086 else
00087 out << '}';
00088 }
00089 out << '\n';
00090 }
00091 }
00092
00093 std::map<char, NxsString>::const_iterator eeIt = extraEquates.begin();
00094 if (eeIt != extraEquates.end())
00095 {
00096 out << "Extra equates:\n";
00097 for (; eeIt != extraEquates.end(); ++eeIt)
00098 out << eeIt->first << " -> " << eeIt->second << '\n';
00099 }
00100 out.flush();
00101 }
00102
00103 static unsigned char lcBaseToInd(char );
00104
00105 static unsigned char lcBaseToInd(char c) {
00106 if (c == 'a')
00107 return 0;
00108 if (c == 'c')
00109 return 1;
00110 if (c == 'g')
00111 return 2;
00112 if (c == 't')
00113 return 3;
00114 throw NxsException("Expecting a DNA base");
00115 }
00116
00117 NxsCodonTriplet::NxsCodonTriplet(const char *triplet)
00118 {
00119 std::string s(triplet);
00120 if (s.length() != 3)
00121 throw NxsException("Expecting a triplet of bases");
00122 NxsString::to_lower(s);
00123 this->firstPos = lcBaseToInd(s[0]);
00124 this->secondPos = lcBaseToInd(s[1]);
00125 this->thirdPos = lcBaseToInd(s[2]);
00126 }
00127
00128
00129 NxsCodonTriplet::MutDescription NxsCodonTriplet::getSingleMut(const NxsCodonTriplet & other) const {
00130 if (firstPos == other.firstPos) {
00131 if (secondPos == other.secondPos) {
00132 if (thirdPos == other.thirdPos)
00133 return MutDescription(0,0);
00134 return MutDescription((int)thirdPos, (int)other.thirdPos);
00135 }
00136 if (thirdPos == other.thirdPos)
00137 return MutDescription((int)secondPos, (int)other.secondPos);
00138 return MutDescription(-1, -1);
00139 }
00140 if (secondPos == other.secondPos) {
00141 if (thirdPos == other.thirdPos)
00142 return MutDescription((int)firstPos, (int)other.firstPos);
00143 return MutDescription(-1, -1);
00144 }
00145 return MutDescription(-1, -1);
00146 }
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156 void NxsDiscreteDatatypeMapper::DeleteStateIndices(const std::set<NxsDiscreteStateCell> & deletedInds)
00157 {
00158 if (deletedInds.empty())
00159 return;
00160 if (*(deletedInds.begin()) < 0 || *(deletedInds.rbegin()) >= (NxsDiscreteStateCell)this->nStates)
00161 throw NxsException("DeleteStateIndices can only delete fundamental states");
00162 if (!(NxsCharactersBlock::GetDefaultEquates(this->datatype).empty() && extraEquates.empty()))
00163 throw NxsException("DeleteStateIndices can not currently work on datatypes with equates");
00164 std::vector<NxsDiscreteStateCell> remap;
00165 NxsDiscreteStateCell newIndex = 0;
00166 std::string nsym;
00167 for (NxsDiscreteStateCell i = 0; i < (NxsDiscreteStateCell) this->nStates; ++i)
00168 {
00169 if (deletedInds.find(i) == deletedInds.end())
00170 {
00171 remap.push_back(newIndex++);
00172 nsym.append(1, symbols[i]);
00173 }
00174 else
00175 remap.push_back(NXS_INVALID_STATE_CODE);
00176 }
00177 const unsigned oldNStates = nStates;
00178 std::vector<NxsDiscreteStateSetInfo> oldStateSetsVec = this->stateSetsVec;
00179 symbols = nsym;
00180
00181 this->RefreshMappings(0L);
00182
00183 for (unsigned i = oldNStates - sclOffset; i < oldStateSetsVec.size(); ++i)
00184 {
00185 const NxsDiscreteStateSetInfo & ssi = oldStateSetsVec[i];
00186 std::set<NxsDiscreteStateCell> stSet;
00187 for (std::set<NxsDiscreteStateCell>::const_iterator s = ssi.states.begin(); s != ssi.states.end(); ++s)
00188 {
00189 NxsDiscreteStateCell u = *s;
00190 if (u < 0)
00191 stSet.insert(u);
00192 else
00193 {
00194 NxsDiscreteStateCell r = remap.at(u);
00195 if (r >= 0)
00196 stSet.insert(r);
00197 }
00198 }
00199
00200 AddStateSet(stSet, ssi.nexusSymbol, true, ssi.isPolymorphic);
00201 }
00202 }
00203
00204 std::vector<NxsDiscreteStateCell> getToCodonRecodingMapper(NxsGeneticCodesEnum gCode)
00205 {
00206 std::vector<NxsDiscreteStateCell> v;
00207 if(gCode == NXS_GCODE_STANDARD) {
00208 const NxsDiscreteStateCell trnxs_gcode_standard[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, -1, 54, 55, 56, 57, 58, 59, 60};
00209 std::copy(trnxs_gcode_standard, trnxs_gcode_standard + 64, back_inserter(v));
00210 return v;
00211 }
00212 if(gCode == NXS_GCODE_VERT_MITO) {
00213 const NxsDiscreteStateCell trnxs_gcode_vert_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, -1, 46, -1, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59};
00214 std::copy(trnxs_gcode_vert_mito, trnxs_gcode_vert_mito + 64, back_inserter(v));
00215 return v;
00216 }
00217 if(gCode == NXS_GCODE_YEAST_MITO) {
00218 const NxsDiscreteStateCell trnxs_gcode_yeast_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61};
00219 std::copy(trnxs_gcode_yeast_mito, trnxs_gcode_yeast_mito + 64, back_inserter(v));
00220 return v;
00221 }
00222 if(gCode == NXS_GCODE_MOLD_MITO) {
00223 const NxsDiscreteStateCell trnxs_gcode_mold_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61};
00224 std::copy(trnxs_gcode_mold_mito, trnxs_gcode_mold_mito + 64, back_inserter(v));
00225 return v;
00226 }
00227 if(gCode == NXS_GCODE_INVERT_MITO) {
00228 const NxsDiscreteStateCell trnxs_gcode_invert_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61};
00229 std::copy(trnxs_gcode_invert_mito, trnxs_gcode_invert_mito + 64, back_inserter(v));
00230 return v;
00231 }
00232 if(gCode == NXS_GCODE_CILIATE) {
00233 const NxsDiscreteStateCell trnxs_gcode_ciliate[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, -1, 56, 57, 58, 59, 60, 61, 62};
00234 std::copy(trnxs_gcode_ciliate, trnxs_gcode_ciliate + 64, back_inserter(v));
00235 return v;
00236 }
00237 if(gCode == NXS_GCODE_ECHINO_MITO) {
00238 const NxsDiscreteStateCell trnxs_gcode_echino_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61};
00239 std::copy(trnxs_gcode_echino_mito, trnxs_gcode_echino_mito + 64, back_inserter(v));
00240 return v;
00241 }
00242 if(gCode == NXS_GCODE_EUPLOTID) {
00243 const NxsDiscreteStateCell trnxs_gcode_euplotid[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61};
00244 std::copy(trnxs_gcode_euplotid, trnxs_gcode_euplotid + 64, back_inserter(v));
00245 return v;
00246 }
00247 if(gCode == NXS_GCODE_PLANT_PLASTID) {
00248 const NxsDiscreteStateCell trnxs_gcode_plant_plastid[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, -1, 54, 55, 56, 57, 58, 59, 60};
00249 std::copy(trnxs_gcode_plant_plastid, trnxs_gcode_plant_plastid + 64, back_inserter(v));
00250 return v;
00251 }
00252 if(gCode == NXS_GCODE_ALT_YEAST) {
00253 const NxsDiscreteStateCell trnxs_gcode_alt_yeast[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, -1, 54, 55, 56, 57, 58, 59, 60};
00254 std::copy(trnxs_gcode_alt_yeast, trnxs_gcode_alt_yeast + 64, back_inserter(v));
00255 return v;
00256 }
00257 if(gCode == NXS_GCODE_ASCIDIAN_MITO) {
00258 const NxsDiscreteStateCell trnxs_gcode_ascidian_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61};
00259 std::copy(trnxs_gcode_ascidian_mito, trnxs_gcode_ascidian_mito + 64, back_inserter(v));
00260 return v;
00261 }
00262 if(gCode == NXS_GCODE_ALT_FLATWORM_MITO) {
00263 const NxsDiscreteStateCell trnxs_gcode_alt_flatworm_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, -1, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62};
00264 std::copy(trnxs_gcode_alt_flatworm_mito, trnxs_gcode_alt_flatworm_mito + 64, back_inserter(v));
00265 return v;
00266 }
00267 if(gCode == NXS_GCODE_BLEPHARISMA_MACRO) {
00268 const NxsDiscreteStateCell trnxs_gcode_blepharisma_macro[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, 49, 50, 51, 52, 53, 54, -1, 55, 56, 57, 58, 59, 60, 61};
00269 std::copy(trnxs_gcode_blepharisma_macro, trnxs_gcode_blepharisma_macro + 64, back_inserter(v));
00270 return v;
00271 }
00272 if(gCode == NXS_GCODE_CHLOROPHYCEAN_MITO) {
00273 const NxsDiscreteStateCell trnxs_gcode_chlorophycean_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, 49, 50, 51, 52, 53, 54, -1, 55, 56, 57, 58, 59, 60, 61};
00274 std::copy(trnxs_gcode_chlorophycean_mito, trnxs_gcode_chlorophycean_mito + 64, back_inserter(v));
00275 return v;
00276 }
00277 if(gCode == NXS_GCODE_TREMATODE_MITO) {
00278 const NxsDiscreteStateCell trnxs_gcode_trematode_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61};
00279 std::copy(trnxs_gcode_trematode_mito, trnxs_gcode_trematode_mito + 64, back_inserter(v));
00280 return v;
00281 }
00282 if(gCode == NXS_GCODE_SCENEDESMUS_MITO) {
00283 const NxsDiscreteStateCell trnxs_gcode_scenedesmus_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, 49, 50, -1, 51, 52, 53, -1, 54, 55, 56, 57, 58, 59, 60};
00284 std::copy(trnxs_gcode_scenedesmus_mito, trnxs_gcode_scenedesmus_mito + 64, back_inserter(v));
00285 return v;
00286 }
00287 if(gCode == NXS_GCODE_THRAUSTOCHYTRIUM_MITO) {
00288 const NxsDiscreteStateCell trnxs_gcode_thraustochytrium_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, -1, 54, 55, 56, -1, 57, 58, 59};
00289 std::copy(trnxs_gcode_thraustochytrium_mito, trnxs_gcode_thraustochytrium_mito + 64, back_inserter(v));
00290 return v;
00291 }
00292 throw NxsException("Unrecognized genetic code.");
00293 }
00294
00295
00296 CodonRecodingStruct getCodonRecodingStruct(NxsGeneticCodesEnum gCode)
00297 {
00298 CodonRecodingStruct c;
00299 unsigned n;
00300
00301 if(gCode == NXS_GCODE_STANDARD) {
00302 const int ccitacnxs_gcode_standard[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 51, 52, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63};
00303 n = 61;
00304 const int caaindnxs_gcode_standard[] = {8, 11, 8, 11, 16, 16, 16, 16, 14, 15, 14, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 15, 15, 15, 15, 1, 18, 1, 9, 4, 9, 4};
00305 const char * ccodstrnxs_gcode_standard[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
00306 std::copy(ccitacnxs_gcode_standard, ccitacnxs_gcode_standard + n, back_inserter(c.compressedCodonIndToAllCodonsInd));
00307 std::copy(caaindnxs_gcode_standard, caaindnxs_gcode_standard + n, back_inserter(c.aaInd));
00308 std::copy(ccodstrnxs_gcode_standard, ccodstrnxs_gcode_standard + n, back_inserter(c.codonStrings));
00309 return c;
00310 }
00311 if(gCode == NXS_GCODE_VERT_MITO) {
00312 const int ccitacnxs_gcode_vert_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
00313 n = 60;
00314 const int caaindnxs_gcode_vert_mito[] = {8, 11, 8, 11, 16, 16, 16, 16, 15, 15, 10, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 15, 15, 15, 15, 18, 1, 18, 1, 9, 4, 9, 4};
00315 const char * ccodstrnxs_gcode_vert_mito[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGC", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
00316 std::copy(ccitacnxs_gcode_vert_mito, ccitacnxs_gcode_vert_mito + n, back_inserter(c.compressedCodonIndToAllCodonsInd));
00317 std::copy(caaindnxs_gcode_vert_mito, caaindnxs_gcode_vert_mito + n, back_inserter(c.aaInd));
00318 std::copy(ccodstrnxs_gcode_vert_mito, ccodstrnxs_gcode_vert_mito + n, back_inserter(c.codonStrings));
00319 return c;
00320 }
00321 if(gCode == NXS_GCODE_YEAST_MITO) {
00322 const int ccitacnxs_gcode_yeast_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
00323 n = 62;
00324 const int caaindnxs_gcode_yeast_mito[] = {8, 11, 8, 11, 16, 16, 16, 16, 14, 15, 14, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 15, 15, 15, 15, 18, 1, 18, 1, 9, 4, 9, 4};
00325 const char * ccodstrnxs_gcode_yeast_mito[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
00326 std::copy(ccitacnxs_gcode_yeast_mito, ccitacnxs_gcode_yeast_mito + n, back_inserter(c.compressedCodonIndToAllCodonsInd));
00327 std::copy(caaindnxs_gcode_yeast_mito, caaindnxs_gcode_yeast_mito + n, back_inserter(c.aaInd));
00328 std::copy(ccodstrnxs_gcode_yeast_mito, ccodstrnxs_gcode_yeast_mito + n, back_inserter(c.codonStrings));
00329 return c;
00330 }
00331 if(gCode == NXS_GCODE_MOLD_MITO) {
00332 const int ccitacnxs_gcode_mold_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
00333 n = 62;
00334 const int caaindnxs_gcode_mold_mito[] = {8, 11, 8, 11, 16, 16, 16, 16, 14, 15, 14, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 15, 15, 15, 15, 18, 1, 18, 1, 9, 4, 9, 4};
00335 const char * ccodstrnxs_gcode_mold_mito[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
00336 std::copy(ccitacnxs_gcode_mold_mito, ccitacnxs_gcode_mold_mito + n, back_inserter(c.compressedCodonIndToAllCodonsInd));
00337 std::copy(caaindnxs_gcode_mold_mito, caaindnxs_gcode_mold_mito + n, back_inserter(c.aaInd));
00338 std::copy(ccodstrnxs_gcode_mold_mito, ccodstrnxs_gcode_mold_mito + n, back_inserter(c.codonStrings));
00339 return c;
00340 }
00341 if(gCode == NXS_GCODE_INVERT_MITO) {
00342 const int ccitacnxs_gcode_invert_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
00343 n = 62;
00344 const int caaindnxs_gcode_invert_mito[] = {8, 11, 8, 11, 16, 16, 16, 16, 15, 15, 15, 15, 10, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 15, 15, 15, 15, 18, 1, 18, 1, 9, 4, 9, 4};
00345 const char * ccodstrnxs_gcode_invert_mito[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
00346 std::copy(ccitacnxs_gcode_invert_mito, ccitacnxs_gcode_invert_mito + n, back_inserter(c.compressedCodonIndToAllCodonsInd));
00347 std::copy(caaindnxs_gcode_invert_mito, caaindnxs_gcode_invert_mito + n, back_inserter(c.aaInd));
00348 std::copy(ccodstrnxs_gcode_invert_mito, ccodstrnxs_gcode_invert_mito + n, back_inserter(c.codonStrings));
00349 return c;
00350 }
00351 if(gCode == NXS_GCODE_CILIATE) {
00352 const int ccitacnxs_gcode_ciliate[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63};
00353 n = 63;
00354 const int caaindnxs_gcode_ciliate[] = {8, 11, 8, 11, 16, 16, 16, 16, 14, 15, 14, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 13, 19, 13, 19, 15, 15, 15, 15, 1, 18, 1, 9, 4, 9, 4};
00355 const char * ccodstrnxs_gcode_ciliate[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAA", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
00356 std::copy(ccitacnxs_gcode_ciliate, ccitacnxs_gcode_ciliate + n, back_inserter(c.compressedCodonIndToAllCodonsInd));
00357 std::copy(caaindnxs_gcode_ciliate, caaindnxs_gcode_ciliate + n, back_inserter(c.aaInd));
00358 std::copy(ccodstrnxs_gcode_ciliate, ccodstrnxs_gcode_ciliate + n, back_inserter(c.codonStrings));
00359 return c;
00360 }
00361 if(gCode == NXS_GCODE_ECHINO_MITO) {
00362 const int ccitacnxs_gcode_echino_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
00363 n = 62;
00364 const int caaindnxs_gcode_echino_mito[] = {11, 11, 8, 11, 16, 16, 16, 16, 15, 15, 15, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 15, 15, 15, 15, 18, 1, 18, 1, 9, 4, 9, 4};
00365 const char * ccodstrnxs_gcode_echino_mito[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
00366 std::copy(ccitacnxs_gcode_echino_mito, ccitacnxs_gcode_echino_mito + n, back_inserter(c.compressedCodonIndToAllCodonsInd));
00367 std::copy(caaindnxs_gcode_echino_mito, caaindnxs_gcode_echino_mito + n, back_inserter(c.aaInd));
00368 std::copy(ccodstrnxs_gcode_echino_mito, ccodstrnxs_gcode_echino_mito + n, back_inserter(c.codonStrings));
00369 return c;
00370 }
00371 if(gCode == NXS_GCODE_EUPLOTID) {
00372 const int ccitacnxs_gcode_euplotid[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
00373 n = 62;
00374 const int caaindnxs_gcode_euplotid[] = {8, 11, 8, 11, 16, 16, 16, 16, 14, 15, 14, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 15, 15, 15, 15, 1, 1, 18, 1, 9, 4, 9, 4};
00375 const char * ccodstrnxs_gcode_euplotid[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
00376 std::copy(ccitacnxs_gcode_euplotid, ccitacnxs_gcode_euplotid + n, back_inserter(c.compressedCodonIndToAllCodonsInd));
00377 std::copy(caaindnxs_gcode_euplotid, caaindnxs_gcode_euplotid + n, back_inserter(c.aaInd));
00378 std::copy(ccodstrnxs_gcode_euplotid, ccodstrnxs_gcode_euplotid + n, back_inserter(c.codonStrings));
00379 return c;
00380 }
00381 if(gCode == NXS_GCODE_PLANT_PLASTID) {
00382 const int ccitacnxs_gcode_plant_plastid[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 51, 52, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63};
00383 n = 61;
00384 const int caaindnxs_gcode_plant_plastid[] = {8, 11, 8, 11, 16, 16, 16, 16, 14, 15, 14, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 15, 15, 15, 15, 1, 18, 1, 9, 4, 9, 4};
00385 const char * ccodstrnxs_gcode_plant_plastid[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
00386 std::copy(ccitacnxs_gcode_plant_plastid, ccitacnxs_gcode_plant_plastid + n, back_inserter(c.compressedCodonIndToAllCodonsInd));
00387 std::copy(caaindnxs_gcode_plant_plastid, caaindnxs_gcode_plant_plastid + n, back_inserter(c.aaInd));
00388 std::copy(ccodstrnxs_gcode_plant_plastid, ccodstrnxs_gcode_plant_plastid + n, back_inserter(c.codonStrings));
00389 return c;
00390 }
00391 if(gCode == NXS_GCODE_ALT_YEAST) {
00392 const int ccitacnxs_gcode_alt_yeast[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 51, 52, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63};
00393 n = 61;
00394 const int caaindnxs_gcode_alt_yeast[] = {8, 11, 8, 11, 16, 16, 16, 16, 14, 15, 14, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 15, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 15, 15, 15, 15, 1, 18, 1, 9, 4, 9, 4};
00395 const char * ccodstrnxs_gcode_alt_yeast[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
00396 std::copy(ccitacnxs_gcode_alt_yeast, ccitacnxs_gcode_alt_yeast + n, back_inserter(c.compressedCodonIndToAllCodonsInd));
00397 std::copy(caaindnxs_gcode_alt_yeast, caaindnxs_gcode_alt_yeast + n, back_inserter(c.aaInd));
00398 std::copy(ccodstrnxs_gcode_alt_yeast, ccodstrnxs_gcode_alt_yeast + n, back_inserter(c.codonStrings));
00399 return c;
00400 }
00401 if(gCode == NXS_GCODE_ASCIDIAN_MITO) {
00402 const int ccitacnxs_gcode_ascidian_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
00403 n = 62;
00404 const int caaindnxs_gcode_ascidian_mito[] = {8, 11, 8, 11, 16, 16, 16, 16, 5, 15, 5, 15, 10, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 15, 15, 15, 15, 18, 1, 18, 1, 9, 4, 9, 4};
00405 const char * ccodstrnxs_gcode_ascidian_mito[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
00406 std::copy(ccitacnxs_gcode_ascidian_mito, ccitacnxs_gcode_ascidian_mito + n, back_inserter(c.compressedCodonIndToAllCodonsInd));
00407 std::copy(caaindnxs_gcode_ascidian_mito, caaindnxs_gcode_ascidian_mito + n, back_inserter(c.aaInd));
00408 std::copy(ccodstrnxs_gcode_ascidian_mito, ccodstrnxs_gcode_ascidian_mito + n, back_inserter(c.codonStrings));
00409 return c;
00410 }
00411 if(gCode == NXS_GCODE_ALT_FLATWORM_MITO) {
00412 const int ccitacnxs_gcode_alt_flatworm_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
00413 n = 63;
00414 const int caaindnxs_gcode_alt_flatworm_mito[] = {11, 11, 8, 11, 16, 16, 16, 16, 15, 15, 15, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 19, 15, 15, 15, 15, 18, 1, 18, 1, 9, 4, 9, 4};
00415 const char * ccodstrnxs_gcode_alt_flatworm_mito[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAA", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
00416 std::copy(ccitacnxs_gcode_alt_flatworm_mito, ccitacnxs_gcode_alt_flatworm_mito + n, back_inserter(c.compressedCodonIndToAllCodonsInd));
00417 std::copy(caaindnxs_gcode_alt_flatworm_mito, caaindnxs_gcode_alt_flatworm_mito + n, back_inserter(c.aaInd));
00418 std::copy(ccodstrnxs_gcode_alt_flatworm_mito, ccodstrnxs_gcode_alt_flatworm_mito + n, back_inserter(c.codonStrings));
00419 return c;
00420 }
00421 if(gCode == NXS_GCODE_BLEPHARISMA_MACRO) {
00422 const int ccitacnxs_gcode_blepharisma_macro[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63};
00423 n = 62;
00424 const int caaindnxs_gcode_blepharisma_macro[] = {8, 11, 8, 11, 16, 16, 16, 16, 14, 15, 14, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 13, 19, 15, 15, 15, 15, 1, 18, 1, 9, 4, 9, 4};
00425 const char * ccodstrnxs_gcode_blepharisma_macro[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
00426 std::copy(ccitacnxs_gcode_blepharisma_macro, ccitacnxs_gcode_blepharisma_macro + n, back_inserter(c.compressedCodonIndToAllCodonsInd));
00427 std::copy(caaindnxs_gcode_blepharisma_macro, caaindnxs_gcode_blepharisma_macro + n, back_inserter(c.aaInd));
00428 std::copy(ccodstrnxs_gcode_blepharisma_macro, ccodstrnxs_gcode_blepharisma_macro + n, back_inserter(c.codonStrings));
00429 return c;
00430 }
00431 if(gCode == NXS_GCODE_CHLOROPHYCEAN_MITO) {
00432 const int ccitacnxs_gcode_chlorophycean_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63};
00433 n = 62;
00434 const int caaindnxs_gcode_chlorophycean_mito[] = {8, 11, 8, 11, 16, 16, 16, 16, 14, 15, 14, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 9, 19, 15, 15, 15, 15, 1, 18, 1, 9, 4, 9, 4};
00435 const char * ccodstrnxs_gcode_chlorophycean_mito[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
00436 std::copy(ccitacnxs_gcode_chlorophycean_mito, ccitacnxs_gcode_chlorophycean_mito + n, back_inserter(c.compressedCodonIndToAllCodonsInd));
00437 std::copy(caaindnxs_gcode_chlorophycean_mito, caaindnxs_gcode_chlorophycean_mito + n, back_inserter(c.aaInd));
00438 std::copy(ccodstrnxs_gcode_chlorophycean_mito, ccodstrnxs_gcode_chlorophycean_mito + n, back_inserter(c.codonStrings));
00439 return c;
00440 }
00441 if(gCode == NXS_GCODE_TREMATODE_MITO) {
00442 const int ccitacnxs_gcode_trematode_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
00443 n = 62;
00444 const int caaindnxs_gcode_trematode_mito[] = {11, 11, 8, 11, 16, 16, 16, 16, 15, 15, 15, 15, 10, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 15, 15, 15, 15, 18, 1, 18, 1, 9, 4, 9, 4};
00445 const char * ccodstrnxs_gcode_trematode_mito[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
00446 std::copy(ccitacnxs_gcode_trematode_mito, ccitacnxs_gcode_trematode_mito + n, back_inserter(c.compressedCodonIndToAllCodonsInd));
00447 std::copy(caaindnxs_gcode_trematode_mito, caaindnxs_gcode_trematode_mito + n, back_inserter(c.aaInd));
00448 std::copy(ccodstrnxs_gcode_trematode_mito, ccodstrnxs_gcode_trematode_mito + n, back_inserter(c.codonStrings));
00449 return c;
00450 }
00451 if(gCode == NXS_GCODE_SCENEDESMUS_MITO) {
00452 const int ccitacnxs_gcode_scenedesmus_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63};
00453 n = 61;
00454 const int caaindnxs_gcode_scenedesmus_mito[] = {8, 11, 8, 11, 16, 16, 16, 16, 14, 15, 14, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 9, 19, 15, 15, 15, 1, 18, 1, 9, 4, 9, 4};
00455 const char * ccodstrnxs_gcode_scenedesmus_mito[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAG", "TAT", "TCC", "TCG", "TCT", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
00456 std::copy(ccitacnxs_gcode_scenedesmus_mito, ccitacnxs_gcode_scenedesmus_mito + n, back_inserter(c.compressedCodonIndToAllCodonsInd));
00457 std::copy(caaindnxs_gcode_scenedesmus_mito, caaindnxs_gcode_scenedesmus_mito + n, back_inserter(c.aaInd));
00458 std::copy(ccodstrnxs_gcode_scenedesmus_mito, ccodstrnxs_gcode_scenedesmus_mito + n, back_inserter(c.codonStrings));
00459 return c;
00460 }
00461 if(gCode == NXS_GCODE_THRAUSTOCHYTRIUM_MITO) {
00462 const int ccitacnxs_gcode_thraustochytrium_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 51, 52, 53, 54, 55, 57, 58, 59, 61, 62, 63};
00463 n = 60;
00464 const int caaindnxs_gcode_thraustochytrium_mito[] = {8, 11, 8, 11, 16, 16, 16, 16, 14, 15, 14, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 15, 15, 15, 15, 1, 18, 1, 4, 9, 4};
00465 const char * ccodstrnxs_gcode_thraustochytrium_mito[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGC", "TGG", "TGT", "TTC", "TTG", "TTT"};
00466 std::copy(ccitacnxs_gcode_thraustochytrium_mito, ccitacnxs_gcode_thraustochytrium_mito + n, back_inserter(c.compressedCodonIndToAllCodonsInd));
00467 std::copy(caaindnxs_gcode_thraustochytrium_mito, caaindnxs_gcode_thraustochytrium_mito + n, back_inserter(c.aaInd));
00468 std::copy(ccodstrnxs_gcode_thraustochytrium_mito, ccodstrnxs_gcode_thraustochytrium_mito + n, back_inserter(c.codonStrings));
00469 return c;
00470 }
00471 throw NxsException("Unrecognized genetic code.");
00472 }
00473
00474 CodonRecodingStruct NxsCharactersBlock::RemoveStopCodons(NxsGeneticCodesEnum gCode)
00475 {
00476 NxsDiscreteDatatypeMapper * mapper = this->GetMutableDatatypeMapperForChar(0);
00477 if (mapper == 0L)
00478 throw NxsException("Invalid characters block (no datatype mapper)");
00479 if (mapper->GetDatatype() != codon)
00480 throw NxsException("Characters block must be of the type codons when RemoveStopCodons is called");
00481 if (mapper->geneticCode != NXS_GCODE_NO_CODE)
00482 throw NxsException("Characters block must be an uncompressed codons type when RemoveStopCodons is called");
00483
00484 const std::vector<NxsDiscreteStateCell> v = getToCodonRecodingMapper(gCode);
00485 CodonRecodingStruct c = getCodonRecodingStruct(gCode);
00486 const unsigned nRS = c.compressedCodonIndToAllCodonsInd.size();
00487 const unsigned offset = 64 - nRS;
00488 NxsDiscreteStateMatrix dMat(this->discreteMatrix);
00489 unsigned rowInd = 0;
00490 for (NxsDiscreteStateMatrix::iterator rowIt = dMat.begin(); rowIt != dMat.end(); ++rowIt)
00491 {
00492 NxsDiscreteStateRow & row = *rowIt;
00493 unsigned charInd = 0;
00494 for (NxsDiscreteStateRow::iterator cellIt = row.begin(); cellIt != row.end(); ++cellIt)
00495 {
00496 const NxsDiscreteStateCell cell = *cellIt;
00497 if (cell >= 64)
00498 *cellIt = cell - offset;
00499 else if (cell >= 0)
00500 {
00501 const NxsDiscreteStateCell recoded = v[cell];
00502 if (recoded < 0)
00503 {
00504 NxsString m;
00505 m << "Stop codon found at character ";
00506 m << charInd + 1;
00507 m << " for taxon ";
00508 m << rowInd + 1;
00509 throw NxsException(m);
00510 }
00511 *cellIt = recoded;
00512 }
00513 ++charInd;
00514 }
00515 ++rowInd;
00516 }
00517 dMat.swap(this->discreteMatrix);
00518 std::set<NxsDiscreteStateCell> deletedInds;
00519 for (NxsDiscreteStateCell i = 0; i < 64; ++i)
00520 {
00521 if (v[(int)i] < 0)
00522 deletedInds.insert(i);
00523 }
00524 mapper->DeleteStateIndices(deletedInds);
00525 return c;
00526 }
00527
00528 unsigned NxsCharactersBlock::NumAmbigInTaxon(const unsigned taxInd, const NxsUnsignedSet * charIndices, const bool countOnlyCompletelyMissing, const bool treatGapsAsMissing) const
00529 {
00530 const NxsDiscreteStateRow & row = GetDiscreteMatrixRow(taxInd);
00531 unsigned nAmbig = 0;
00532 const NxsDiscreteDatatypeMapper * m;
00533 if (charIndices == NULL)
00534 {
00535 unsigned cInd = 0;
00536 for (NxsDiscreteStateRow::const_iterator cellIt = row.begin(); cellIt != row.end(); ++cellIt)
00537 {
00538 m = GetDatatypeMapperForChar(cInd++);
00539 NCL_ASSERT(m);
00540 const NxsDiscreteStateCell & c = *cellIt;
00541 if (c < 0 || c >= (NxsDiscreteStateCell) m->GetNumStates())
00542 {
00543 if (countOnlyCompletelyMissing)
00544 {
00545 if (c == NXS_MISSING_CODE)
00546 nAmbig++;
00547 }
00548 else
00549 {
00550 if (c != NXS_GAP_STATE_CODE || treatGapsAsMissing)
00551 nAmbig++;
00552 }
00553 }
00554 }
00555 }
00556 else
00557 {
00558 for (NxsUnsignedSet::const_iterator c = charIndices->begin(); c != charIndices->end(); ++c)
00559 {
00560 const unsigned cIndex = *c;
00561 m = GetDatatypeMapperForChar(cIndex);
00562 const NxsDiscreteStateCell & sc = row.at(cIndex);
00563 if (sc < 0 || sc >= (NxsDiscreteStateCell) m->GetNumStates())
00564 {
00565 if (countOnlyCompletelyMissing)
00566 {
00567 if (sc == NXS_MISSING_CODE)
00568 nAmbig++;
00569 }
00570 else
00571 {
00572 if (sc != NXS_GAP_STATE_CODE || treatGapsAsMissing)
00573 nAmbig++;
00574 }
00575 }
00576 }
00577 }
00578 return nAmbig;
00579 }
00580
00581 bool NxsCharactersBlock::FirstTaxonStatesAreSubsetOfSecond(
00582 const unsigned firstTaxonInd,
00583 const unsigned secondTaxonInd,
00584 const NxsUnsignedSet * charIndices,
00585 const bool treatAmbigAsMissing,
00586 const bool treatGapAsMissing) const
00587 {
00588 const NxsDiscreteStateRow & firstRow = GetDiscreteMatrixRow(firstTaxonInd);
00589 const NxsDiscreteStateRow & secondRow = GetDiscreteMatrixRow(secondTaxonInd);
00590 const NxsDiscreteDatatypeMapper * m;
00591 if (charIndices == NULL)
00592 {
00593 unsigned cInd = 0;
00594 NxsDiscreteStateRow::const_iterator firstIt = firstRow.begin();
00595 NxsDiscreteStateRow::const_iterator secondIt = secondRow.begin();
00596 for (; firstIt != firstRow.end(); ++firstIt, ++secondIt)
00597 {
00598 m = GetDatatypeMapperForChar(cInd++);
00599 const NxsDiscreteStateCell ns = (NxsDiscreteStateCell) m->GetNumStates();
00600 NxsDiscreteStateCell f = *firstIt;
00601 NxsDiscreteStateCell s = *secondIt;
00602 if (treatAmbigAsMissing)
00603 {
00604 if (f >= ns)
00605 f = NXS_MISSING_CODE;
00606 if (s >= ns)
00607 s = NXS_MISSING_CODE;
00608 }
00609 if (!m->FirstIsSubset(f, s, treatGapAsMissing))
00610 return false;
00611 }
00612 }
00613 else
00614 {
00615 for (NxsUnsignedSet::const_iterator c = charIndices->begin(); c != charIndices->end(); ++c)
00616 {
00617 const unsigned cIndex = *c;
00618 m = GetDatatypeMapperForChar(cIndex);
00619 const NxsDiscreteStateCell ns = m->GetNumStates();
00620 NxsDiscreteStateCell f = firstRow.at(cIndex);
00621 NxsDiscreteStateCell s = secondRow.at(cIndex);
00622 if (treatAmbigAsMissing)
00623 {
00624 if (f >= ns)
00625 f = NXS_MISSING_CODE;
00626 if (s >= ns)
00627 s = NXS_MISSING_CODE;
00628 }
00629 if (!m->FirstIsSubset(f, s, treatGapAsMissing))
00630 return false;
00631 }
00632 }
00633 return true;
00634 }
00635
00636 std::pair<unsigned, unsigned> NxsCharactersBlock::GetPairwiseDist(
00637 const unsigned firstTaxonInd,
00638 const unsigned secondTaxonInd,
00639 const NxsUnsignedSet * charIndices,
00640 const bool treatAmbigAsMissing,
00641 const bool treatGapAsMissing) const
00642 {
00643 const NxsDiscreteStateRow & firstRow = GetDiscreteMatrixRow(firstTaxonInd);
00644 const NxsDiscreteStateRow & secondRow = GetDiscreteMatrixRow(secondTaxonInd);
00645 const NxsDiscreteDatatypeMapper * m;
00646 unsigned nDiffs = 0;
00647 unsigned nSites = 0;
00648 if (charIndices == NULL)
00649 {
00650 unsigned cInd = 0;
00651 NxsDiscreteStateRow::const_iterator firstIt = firstRow.begin();
00652 NxsDiscreteStateRow::const_iterator secondIt = secondRow.begin();
00653 for (; firstIt != firstRow.end(); ++firstIt, ++secondIt)
00654 {
00655 m = GetDatatypeMapperForChar(cInd++);
00656 const NxsDiscreteStateCell ns = m->GetNumStates();
00657 NxsDiscreteStateCell f = *firstIt;
00658 NxsDiscreteStateCell s = *secondIt;
00659 if (treatAmbigAsMissing)
00660 {
00661 if (f >= ns)
00662 f = NXS_MISSING_CODE;
00663 if (s >= ns)
00664 s = NXS_MISSING_CODE;
00665 }
00666 if (f < 0 || s < 0)
00667 {
00668 if (treatGapAsMissing && (f == NXS_GAP_STATE_CODE || s == NXS_GAP_STATE_CODE))
00669 continue;
00670 if (f == NXS_MISSING_CODE || s == NXS_MISSING_CODE)
00671 continue;
00672 }
00673 nSites++;
00674 const std::set<NxsDiscreteStateCell> & ssim = m->GetStateIntersection(f, s);
00675 if (!ssim.empty())
00676 ++nDiffs;
00677 }
00678 }
00679 else
00680 {
00681 for (NxsUnsignedSet::const_iterator c = charIndices->begin(); c != charIndices->end(); ++c)
00682 {
00683 m = GetDatatypeMapperForChar(*c);
00684 const NxsDiscreteStateCell ns = (NxsDiscreteStateCell) m->GetNumStates();
00685 NxsDiscreteStateCell f = firstRow.at(*c);
00686 NxsDiscreteStateCell s = secondRow.at(*c);
00687 if (treatAmbigAsMissing)
00688 {
00689 if (f >= ns)
00690 f = NXS_MISSING_CODE;
00691 if (s >= ns)
00692 s = NXS_MISSING_CODE;
00693 }
00694 if (f < 0 || s < 0)
00695 {
00696 if (treatGapAsMissing && (f == NXS_GAP_STATE_CODE || s == NXS_GAP_STATE_CODE))
00697 continue;
00698 if (f == NXS_MISSING_CODE || s == NXS_MISSING_CODE)
00699 continue;
00700 }
00701 nSites++;
00702 const std::set<NxsDiscreteStateCell> & ssi = m->GetStateIntersection(f, s);
00703 if (!ssi.empty())
00704 ++nDiffs;
00705 }
00706 }
00707 return std::pair<unsigned, unsigned>(nDiffs, nSites);
00708 }
00709
00710
00711 void NxsDiscreteDatatypeMapper::BuildStateSubsetMatrix() const
00712 {
00713 if (stateIntersectionMatrix.empty())
00714 BuildStateIntersectionMatrix();
00715 isStateSubsetMatrix.clear();
00716 isStateSubsetMatrixGapsMissing.clear();
00717 const unsigned nsPlus = stateSetsVec.size();
00718 IsStateSubsetRow r(nsPlus, false);
00719 isStateSubsetMatrix.assign(nsPlus, r);
00720 isStateSubsetMatrixGapsMissing.assign(nsPlus, r);
00721 for (unsigned i = 0; i < nsPlus; ++i)
00722 {
00723 for (unsigned j = 0; j < nsPlus; ++j)
00724 {
00725 if (!stateIntersectionMatrix[i][j].empty())
00726 {
00727 isStateSubsetMatrix[i][j] = true;
00728 isStateSubsetMatrixGapsMissing[i][j] = true;
00729 }
00730 }
00731 }
00732 isStateSubsetMatrixGapsMissing[0][1] = true;
00733 isStateSubsetMatrixGapsMissing[1][0] = true;
00734 }
00735
00736 void NxsDiscreteDatatypeMapper::BuildStateIntersectionMatrix() const
00737 {
00738 const std::set<NxsDiscreteStateCell> emptySet;
00739
00740 stateIntersectionMatrix.clear();
00741
00742 const unsigned nsPlus = stateSetsVec.size();
00743 const unsigned offset = (unsigned)(sclOffset + 2);
00744 StateIntersectionRow emptyRow(nsPlus, emptySet);
00745 stateIntersectionMatrix.assign(nsPlus, emptyRow);
00746 for (unsigned i = offset; i < nsPlus; ++i)
00747 {
00748 for (unsigned j = i; j < nsPlus; ++j)
00749 {
00750 const unsigned offi = i + sclOffset;
00751 const unsigned offj = j + sclOffset;
00752 std::set<NxsDiscreteStateCell> intersect;
00753 const std::set<NxsDiscreteStateCell> &fs = GetStateSetForCode(offi);
00754 const std::set<NxsDiscreteStateCell> &ss = GetStateSetForCode(offj);
00755 set_intersection(fs.begin(), fs.end(), ss.begin(), ss.end(), inserter(intersect, intersect.begin()));
00756 stateIntersectionMatrix[i - NXS_GAP_STATE_CODE][j - NXS_GAP_STATE_CODE] = intersect;
00757 if (i != j)
00758 stateIntersectionMatrix[j - NXS_GAP_STATE_CODE][i - NXS_GAP_STATE_CODE] = stateIntersectionMatrix[i - NXS_GAP_STATE_CODE][j - NXS_GAP_STATE_CODE];
00759 }
00760 }
00761
00762 std::set<NxsDiscreteStateCell> tmpSet;
00763 NCL_ASSERT(1 == NXS_MISSING_CODE - NXS_GAP_STATE_CODE);
00764 tmpSet.insert(NXS_GAP_STATE_CODE);
00765 stateIntersectionMatrix[0][0] = tmpSet;
00766
00767 tmpSet.clear();
00768 tmpSet.insert(NXS_MISSING_CODE);
00769 stateIntersectionMatrix[1][1] = tmpSet;
00770 for (unsigned i = offset; i < nsPlus; ++i)
00771 {
00772 const unsigned offi = i + sclOffset;
00773 stateIntersectionMatrix[1][i - NXS_GAP_STATE_CODE] = GetStateSetForCode(offi);
00774 }
00775 }
00776
00777
00778 NxsGeneticCodesEnum geneticCodeNameToEnum(std::string n)
00779 {
00780 NxsString::to_lower(n);
00781 if (n == "standard")
00782 return NXS_GCODE_STANDARD;
00783 if (n == "vertmito")
00784 return NXS_GCODE_VERT_MITO;
00785 if (n == "yeastmito")
00786 return NXS_GCODE_YEAST_MITO;
00787 if (n == "moldmito")
00788 return NXS_GCODE_MOLD_MITO;
00789 if (n == "invertmito")
00790 return NXS_GCODE_INVERT_MITO;
00791 if (n == "ciliate")
00792 return NXS_GCODE_CILIATE;
00793 if (n == "echinomito")
00794 return NXS_GCODE_ECHINO_MITO;
00795 if (n == "euplotid")
00796 return NXS_GCODE_EUPLOTID;
00797 if (n == "plantplastid")
00798 return NXS_GCODE_PLANT_PLASTID;
00799 if (n == "altyeast")
00800 return NXS_GCODE_ALT_YEAST;
00801 if (n == "ascidianmito")
00802 return NXS_GCODE_ASCIDIAN_MITO;
00803 if (n == "altflatwormmito")
00804 return NXS_GCODE_ALT_FLATWORM_MITO;
00805 if (n == "blepharismamacro")
00806 return NXS_GCODE_BLEPHARISMA_MACRO;
00807 if (n == "chlorophyceanmito")
00808 return NXS_GCODE_CHLOROPHYCEAN_MITO;
00809 if (n == "trematodemito")
00810 return NXS_GCODE_TREMATODE_MITO;
00811 if (n == "scenedesmusmito")
00812 return NXS_GCODE_SCENEDESMUS_MITO;
00813 if (n == "thraustochytriummito")
00814 return NXS_GCODE_THRAUSTOCHYTRIUM_MITO;
00815 NxsString err = "Unrecognized genetic code name: ";
00816 err << n;
00817 throw NxsException(err);
00818 }
00819
00820 std::string geneticCodeEnumToName(NxsGeneticCodesEnum n)
00821 {
00822 if (n == NXS_GCODE_STANDARD)
00823 return "Standard";
00824 if (n == NXS_GCODE_VERT_MITO)
00825 return "VertMito";
00826 if (n == NXS_GCODE_YEAST_MITO)
00827 return "YeastMito";
00828 if (n == NXS_GCODE_MOLD_MITO)
00829 return "MoldMito";
00830 if (n == NXS_GCODE_INVERT_MITO)
00831 return "InvertMito";
00832 if (n == NXS_GCODE_CILIATE)
00833 return "Ciliate";
00834 if (n == NXS_GCODE_ECHINO_MITO)
00835 return "EchinoMito";
00836 if (n == NXS_GCODE_EUPLOTID)
00837 return "Euplotid";
00838 if (n == NXS_GCODE_PLANT_PLASTID)
00839 return "PlantPlastid";
00840 if (n == NXS_GCODE_ALT_YEAST)
00841 return "AltYeast";
00842 if (n == NXS_GCODE_ASCIDIAN_MITO)
00843 return "AscidianMito";
00844 if (n == NXS_GCODE_ALT_FLATWORM_MITO)
00845 return "AltFlatwormMito";
00846 if (n == NXS_GCODE_BLEPHARISMA_MACRO)
00847 return "BlepharismaMacro";
00848 if (n == NXS_GCODE_CHLOROPHYCEAN_MITO)
00849 return "ChlorophyceanMito";
00850 if (n == NXS_GCODE_TREMATODE_MITO)
00851 return "Trematodemito";
00852 if (n == NXS_GCODE_SCENEDESMUS_MITO)
00853 return "ScenedesmusMito";
00854 if (n == NXS_GCODE_THRAUSTOCHYTRIUM_MITO)
00855 return "ThraustochytriumMito";
00856 NxsString err = "Unrecognized genetic code enumeration: ";
00857 err << n;
00858 throw NxsException(err);
00859 }
00860
00861 std::vector<std::string> getGeneticCodeNames()
00862 {
00863 std::vector<std::string> n(NXS_GCODE_CODE_ENUM_SIZE);
00864 n[NXS_GCODE_STANDARD] = "Standard" ;
00865 n[NXS_GCODE_VERT_MITO] = "VertMito" ;
00866 n[NXS_GCODE_YEAST_MITO] = "YeastMito" ;
00867 n[NXS_GCODE_MOLD_MITO] = "MoldMito" ;
00868 n[NXS_GCODE_INVERT_MITO] = "InvertMito" ;
00869 n[NXS_GCODE_CILIATE] = "Ciliate" ;
00870 n[NXS_GCODE_ECHINO_MITO] = "EchinoMito" ;
00871 n[NXS_GCODE_EUPLOTID] = "Euplotid" ;
00872 n[NXS_GCODE_PLANT_PLASTID] = "PlantPlastid" ;
00873 n[NXS_GCODE_ALT_YEAST] = "AltYeast" ;
00874 n[NXS_GCODE_ASCIDIAN_MITO] = "AscidianMito" ;
00875 n[NXS_GCODE_ALT_FLATWORM_MITO] = "AltFlatwormMito" ;
00876 n[NXS_GCODE_BLEPHARISMA_MACRO] = "BlepharismaMacro" ;
00877 n[NXS_GCODE_CHLOROPHYCEAN_MITO] = "ChlorophyceanMito" ;
00878 n[NXS_GCODE_TREMATODE_MITO] = "TrematodeMito" ;
00879 n[NXS_GCODE_SCENEDESMUS_MITO] = "ScenedesmusMito" ;
00880 n[NXS_GCODE_THRAUSTOCHYTRIUM_MITO] = "ThraustochytriumMito" ;
00881 return n;
00882 }
00883
00884
00885
00886
00887
00888
00889
00890
00891
00892
00893
00894
00895
00896
00897
00898
00899
00900
00901
00902
00903
00904
00905 std::string getGeneticCodeAAOrder(NxsGeneticCodesEnum codeIndex)
00906 {
00907 std::vector<std::string> code(NXS_GCODE_CODE_ENUM_SIZE);
00908 code[NXS_GCODE_STANDARD] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSS*CWCLFLF";
00909 code[NXS_GCODE_VERT_MITO] = "KNKNTTTT*S*SMIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF";
00910 code[NXS_GCODE_YEAST_MITO] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF";
00911 code[NXS_GCODE_MOLD_MITO] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF";
00912 code[NXS_GCODE_INVERT_MITO] = "KNKNTTTTSSSSMIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF";
00913 code[NXS_GCODE_CILIATE] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVQYQYSSSS*CWCLFLF";
00914 code[NXS_GCODE_ECHINO_MITO] = "NNKNTTTTSSSSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF";
00915 code[NXS_GCODE_EUPLOTID] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSCCWCLFLF";
00916 code[NXS_GCODE_PLANT_PLASTID] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSS*CWCLFLF";
00917 code[NXS_GCODE_ALT_YEAST] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLSLEDEDAAAAGGGGVVVV*Y*YSSSS*CWCLFLF";
00918 code[NXS_GCODE_ASCIDIAN_MITO] = "KNKNTTTTGSGSMIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF";
00919 code[NXS_GCODE_ALT_FLATWORM_MITO] = "NNKNTTTTSSSSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVYY*YSSSSWCWCLFLF";
00920 code[NXS_GCODE_BLEPHARISMA_MACRO] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*YQYSSSS*CWCLFLF";
00921 code[NXS_GCODE_CHLOROPHYCEAN_MITO] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*YLYSSSS*CWCLFLF";
00922 code[NXS_GCODE_TREMATODE_MITO] = "NNKNTTTTSSSSMIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF";
00923 code[NXS_GCODE_SCENEDESMUS_MITO] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*YLY*SSS*CWCLFLF";
00924 code[NXS_GCODE_THRAUSTOCHYTRIUM_MITO] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSS*CWC*FLF";
00925 int c = (int) codeIndex;
00926 return code.at(c);
00927 }
00928
00929
00930
00931 std::vector<NxsDiscreteStateCell> getGeneticCodeIndicesAAOrder(const NxsGeneticCodesEnum codeIndex);
00932
00933
00934 std::vector<NxsDiscreteStateCell> getGeneticCodeIndicesAAOrder(const NxsGeneticCodesEnum codeIndex)
00935 {
00936 std::vector<NxsDiscreteStateCell> aaInd(64);
00937 aaInd[0] = 8;
00938 aaInd[1] = 11;
00939 aaInd[2] = 8;
00940 aaInd[3] = 11;
00941 aaInd[4] = 16;
00942 aaInd[5] = 16;
00943 aaInd[6] = 16;
00944 aaInd[7] = 16;
00945 aaInd[8] = 14;
00946 aaInd[9] = 15;
00947 aaInd[10] = 14;
00948 aaInd[11] = 15;
00949 aaInd[12] = 7;
00950 aaInd[13] = 7;
00951 aaInd[14] = 10;
00952 aaInd[15] = 7;
00953 aaInd[16] = 13;
00954 aaInd[17] = 6;
00955 aaInd[18] = 13;
00956 aaInd[19] = 6;
00957 aaInd[20] = 12;
00958 aaInd[21] = 12;
00959 aaInd[22] = 12;
00960 aaInd[23] = 12;
00961 aaInd[24] = 14;
00962 aaInd[25] = 14;
00963 aaInd[26] = 14;
00964 aaInd[27] = 14;
00965 aaInd[28] = 9;
00966 aaInd[29] = 9;
00967 aaInd[30] = 9;
00968 aaInd[31] = 9;
00969 aaInd[32] = 3;
00970 aaInd[33] = 2;
00971 aaInd[34] = 3;
00972 aaInd[35] = 2;
00973 aaInd[36] = 0;
00974 aaInd[37] = 0;
00975 aaInd[38] = 0;
00976 aaInd[39] = 0;
00977 aaInd[40] = 5;
00978 aaInd[41] = 5;
00979 aaInd[42] = 5;
00980 aaInd[43] = 5;
00981 aaInd[44] = 17;
00982 aaInd[45] = 17;
00983 aaInd[46] = 17;
00984 aaInd[47] = 17;
00985 aaInd[48] = 20;
00986 aaInd[49] = 19;
00987 aaInd[50] = 20;
00988 aaInd[51] = 19;
00989 aaInd[52] = 15;
00990 aaInd[53] = 15;
00991 aaInd[54] = 15;
00992 aaInd[55] = 15;
00993 aaInd[56] = 20;
00994 aaInd[57] = 1;
00995 aaInd[58] = 18;
00996 aaInd[59] = 1;
00997 aaInd[60] = 9;
00998 aaInd[61] = 4;
00999 aaInd[62] = 9;
01000 aaInd[63] = 4;
01001 if (codeIndex == NXS_GCODE_VERT_MITO) {
01002 aaInd[8] = 20;
01003 aaInd[10] = 20;
01004 aaInd[12] = 10;
01005 aaInd[56] = 18;
01006 }
01007 else if (codeIndex == NXS_GCODE_YEAST_MITO) {
01008 aaInd[56] = 18;
01009 }
01010 else if (codeIndex == NXS_GCODE_MOLD_MITO) {
01011 aaInd[56] = 18;
01012 }
01013 else if (codeIndex == NXS_GCODE_INVERT_MITO) {
01014 aaInd[8] = 15;
01015 aaInd[10] = 15;
01016 aaInd[12] = 10;
01017 aaInd[56] = 18;
01018 }
01019 else if (codeIndex == NXS_GCODE_CILIATE) {
01020 aaInd[48] = 13;
01021 aaInd[50] = 13;
01022 }
01023 else if (codeIndex == NXS_GCODE_ECHINO_MITO) {
01024 aaInd[0] = 11;
01025 aaInd[8] = 15;
01026 aaInd[10] = 15;
01027 aaInd[56] = 18;
01028 }
01029 else if (codeIndex == NXS_GCODE_EUPLOTID) {
01030 aaInd[56] = 1;
01031 }
01032 else if (codeIndex == NXS_GCODE_ALT_YEAST) {
01033 aaInd[30] = 15;
01034 }
01035 else if (codeIndex == NXS_GCODE_ASCIDIAN_MITO) {
01036 aaInd[8] = 5;
01037 aaInd[10] = 5;
01038 aaInd[12] = 10;
01039 aaInd[56] = 18;
01040 }
01041 else if (codeIndex == NXS_GCODE_ALT_FLATWORM_MITO) {
01042 aaInd[0] = 11;
01043 aaInd[8] = 15;
01044 aaInd[10] = 15;
01045 aaInd[48] = 19;
01046 aaInd[56] = 18;
01047 }
01048 else if (codeIndex == NXS_GCODE_BLEPHARISMA_MACRO) {
01049 aaInd[50] = 13;
01050 }
01051 else if (codeIndex == NXS_GCODE_CHLOROPHYCEAN_MITO) {
01052 aaInd[50] = 9;
01053 }
01054 else if (codeIndex == NXS_GCODE_TREMATODE_MITO) {
01055 aaInd[0] = 11;
01056 aaInd[8] = 15;
01057 aaInd[10] = 15;
01058 aaInd[12] = 10;
01059 aaInd[56] = 18;
01060 }
01061 else if (codeIndex == NXS_GCODE_SCENEDESMUS_MITO) {
01062 aaInd[50] = 9;
01063 aaInd[52] = 20;
01064 }
01065 else if (codeIndex == NXS_GCODE_THRAUSTOCHYTRIUM_MITO) {
01066 aaInd[60] = 20;
01067 }
01068 return aaInd;
01069 }
01070
01071
01072 void NxsCharactersBlock::CodonPosPartitionToPosList(const NxsPartition &codonPos, std::list<int> * charIndices)
01073 {
01074 if (charIndices == 0L)
01075 return;
01076 const NxsUnsignedSet * firstPos = 0L;
01077 const NxsUnsignedSet * secondPos = 0L;
01078 const NxsUnsignedSet * thirdPos = 0L;
01079 for (NxsPartition::const_iterator pIt = codonPos.begin(); pIt != codonPos.end(); ++pIt)
01080 {
01081 if (pIt->first == "1")
01082 {
01083 NCL_ASSERT(firstPos == 0L);
01084 firstPos = &(pIt->second);
01085 }
01086 else if (pIt->first == "2")
01087 {
01088 NCL_ASSERT(secondPos == 0L);
01089 secondPos = &(pIt->second);
01090 }
01091 else if (pIt->first == "3")
01092 {
01093 NCL_ASSERT(thirdPos == 0L);
01094 thirdPos = &(pIt->second);
01095 }
01096 }
01097 if (firstPos == 0L || secondPos == 0L || thirdPos == 0L)
01098 throw NxsException("Expecting partition subsets named 1, 2, and 3");
01099 if (firstPos->size() != secondPos->size() || firstPos->size() != thirdPos->size())
01100 throw NxsException("Expecting the partition subsets named 1, 2, and 3 to have the same size");
01101 NxsUnsignedSet::const_iterator fIt = firstPos->begin();
01102 NxsUnsignedSet::const_iterator sIt = secondPos->begin();
01103 NxsUnsignedSet::const_iterator thIt = thirdPos->begin();
01104 const NxsUnsignedSet::const_iterator endIt = firstPos->end();
01105 for (; fIt != endIt; ++fIt, ++sIt, ++thIt)
01106 {
01107 charIndices->push_back(*fIt);
01108 charIndices->push_back(*sIt);
01109 charIndices->push_back(*thIt);
01110 }
01111 }
01112
01113
01114
01115 NxsCharactersBlock * NxsCharactersBlock::NewProteinCharactersBlock(
01116 const NxsCharactersBlock * codonBlock,
01117 bool mapPartialAmbigToUnknown,
01118 bool gapToUnknown,
01119 NxsGeneticCodesEnum codeIndex)
01120 {
01121 std::vector<NxsDiscreteStateCell> aas = getGeneticCodeIndicesAAOrder(codeIndex);
01122 return NxsCharactersBlock::NewProteinCharactersBlock(codonBlock, mapPartialAmbigToUnknown, gapToUnknown, aas);
01123 }
01124
01125
01126
01127
01128
01129 NxsCharactersBlock * NxsCharactersBlock::NewProteinCharactersBlock(
01130 const NxsCharactersBlock * codonBlock,
01131 bool mapPartialAmbigToUnknown,
01132 bool gapToUnknown,
01133 const std::vector<NxsDiscreteStateCell> & aaIndices)
01134 {
01135 if (!codonBlock)
01136 return NULL;
01137 if (codonBlock->GetDataType() != NxsCharactersBlock::codon)
01138 throw NxsException("NewProteinCharactersBlock must be called with a block of codon datatype");
01139 const unsigned nc = codonBlock->GetNCharTotal();
01140
01141
01142 NxsTaxaBlockAPI * taxa = codonBlock->GetTaxaBlockPtr(NULL);
01143 NxsCharactersBlock * aaBlock = new NxsCharactersBlock(taxa, NULL);
01144 aaBlock->SetNChar(nc);
01145 aaBlock->SetNTax(codonBlock->GetNTaxWithData());
01146 aaBlock->missing = codonBlock->missing;
01147 aaBlock->gap = (gapToUnknown ? '\0' : codonBlock->gap);
01148 aaBlock->gapMode = codonBlock->gapMode;
01149 aaBlock->datatype = NxsCharactersBlock::protein;
01150 aaBlock->originalDatatype = codonBlock->originalDatatype;
01151 aaBlock->ResetSymbols();
01152 aaBlock->tokens = false;
01153
01154
01155 NxsPartition dummy;
01156 std::vector<DataTypesEnum> dummyVec;
01157 aaBlock->CreateDatatypeMapperObjects(dummy, dummyVec);
01158 const NxsDiscreteDatatypeMapper * codonMapper = codonBlock->GetDatatypeMapperForChar(0);
01159 NxsDiscreteDatatypeMapper * aaMapper = aaBlock->GetMutableDatatypeMapperForChar(0);
01160 aaMapper->geneticCode = codonMapper->geneticCode;
01161
01162 const unsigned ntax = (taxa == 0L ? codonBlock->GetNTaxWithData() : taxa->GetNTax());
01163 aaBlock->datatypeReadFromFormat = false;
01164 aaBlock->statesFormat = STATES_PRESENT;
01165 aaBlock->restrictionDataype = false;
01166 aaBlock->supportMixedDatatype = false;
01167 aaBlock->convertAugmentedToMixed = false;
01168 aaBlock->writeInterleaveLen = INT_MAX;
01169
01170
01171 NxsDiscreteStateRow matRow(nc, 0);
01172 aaBlock->discreteMatrix.assign(ntax, matRow);
01173 if (mapPartialAmbigToUnknown && (gapToUnknown || codonBlock->GetGapSymbol() != '\0'))
01174 {
01175 for (unsigned taxInd = 0; taxInd < ntax; ++taxInd)
01176 {
01177 const NxsDiscreteStateRow & sourceRow = codonBlock->discreteMatrix.at(taxInd);
01178 NxsDiscreteStateRow & destRow = aaBlock->discreteMatrix.at(taxInd);
01179 for (unsigned c = 0; c < nc ; ++c)
01180 {
01181 const NxsDiscreteStateCell codon = sourceRow[c];
01182 if (codon < 0 || codon > 63)
01183 destRow[c] = NXS_MISSING_CODE;
01184 else
01185 destRow[c] = aaIndices.at(codon);
01186 }
01187 }
01188 }
01189 else
01190 {
01191 throw NxsException("NewProteinCharactersBlock is not implemented for cases in which you are not mapping any ambiguity to the missing state code.");
01192 }
01193 return aaBlock;
01194 }
01195
01196
01197
01198
01199
01200
01201
01202
01203
01204
01205
01206 NxsCharactersBlock * NxsCharactersBlock::NewCodonsCharactersBlock(
01207 const NxsCharactersBlock * dnaBlock,
01208 bool mapPartialAmbigToUnknown,
01209 bool gapsToUnknown,
01210 bool honorCharActive,
01211 const std::list<int> * charIndices,
01212 NxsCharactersBlock ** spareNucs)
01213 {
01214 if (!dnaBlock)
01215 return NULL;
01216 DataTypesEnum nucType = dnaBlock->GetDataType();
01217 if (nucType != NxsCharactersBlock::dna && nucType != NxsCharactersBlock::rna && nucType != NxsCharactersBlock::nucleotide)
01218 return NULL;
01219 std::list<int> charInds;
01220 const std::list<int> * sourceChars;
01221 std::list<int> culled;
01222 NxsUnsignedSet untranslated;
01223
01224
01225
01226 unsigned nc = dnaBlock->GetNCharTotal();
01227
01228 if (charIndices == NULL)
01229 {
01230 for (unsigned i = 0; i < nc; ++i)
01231 charInds.push_back((int)i);
01232 sourceChars = &charInds;
01233 }
01234 else
01235 sourceChars = charIndices;
01236
01237 if (honorCharActive)
01238 {
01239 for (std::list<int>::const_iterator cIt = sourceChars->begin(); cIt != sourceChars->end(); ++cIt)
01240 {
01241 const int c = *cIt;
01242 if (c < 0 || dnaBlock->IsActiveChar((unsigned) c))
01243 culled.push_back(c);
01244 }
01245 if (spareNucs)
01246 {
01247 for (unsigned c = 0; c < nc; ++c)
01248 {
01249 if (dnaBlock->IsActiveChar((unsigned) c))
01250 untranslated.insert(c);
01251 }
01252 }
01253 sourceChars = &culled;
01254 }
01255 else if (spareNucs)
01256 {
01257 for (unsigned c = 0; c < nc; ++c)
01258 untranslated.insert(c);
01259 }
01260
01261 const unsigned nnucs = (const unsigned)sourceChars->size();
01262 if (nnucs % 3)
01263 throw NxsException("Cannot create a codons block with a number of characters that is not a multiple of 3");
01264 const unsigned ncodons = nnucs/3;
01265
01266
01267 NxsTaxaBlockAPI * taxa = dnaBlock->GetTaxaBlockPtr(NULL);
01268 NxsCharactersBlock * codonsBlock = new NxsCharactersBlock(taxa, NULL);
01269 codonsBlock->SetNChar(ncodons);
01270 codonsBlock->SetNTax(dnaBlock->GetNTaxWithData());
01271 codonsBlock->missing = dnaBlock->missing;
01272 codonsBlock->gap = (gapsToUnknown ? '\0' : dnaBlock->gap);
01273 codonsBlock->gapMode = dnaBlock->gapMode;
01274 codonsBlock->symbols.assign(64, '\0');
01275 codonsBlock->tokens = false;
01276 const char * gsl[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAA", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
01277
01278 codonsBlock->globalStateLabels.reserve(64);
01279 for (unsigned i = 0 ; i < 64; ++i)
01280 codonsBlock->globalStateLabels.push_back(NxsString(gsl[i]));
01281
01282
01283 codonsBlock->datatype = NxsCharactersBlock::codon;
01284 codonsBlock->originalDatatype = nucType;
01285
01286 const NxsPartition dummy;
01287 const std::vector<DataTypesEnum> dummyVec;
01288 codonsBlock->CreateDatatypeMapperObjects(dummy, dummyVec);
01289 NxsDiscreteDatatypeMapper * codonMapper = codonsBlock->GetMutableDatatypeMapperForChar(0);
01290 codonMapper->geneticCode = NXS_GCODE_NO_CODE;
01291
01292 const unsigned ntax = (taxa == 0L ? dnaBlock->GetNTaxWithData() : taxa->GetNTax());
01293 codonsBlock->datatypeReadFromFormat = false;
01294 codonsBlock->statesFormat = STATES_PRESENT;
01295 codonsBlock->restrictionDataype = false;
01296 codonsBlock->supportMixedDatatype = false;
01297 codonsBlock->convertAugmentedToMixed = false;
01298 codonsBlock->writeInterleaveLen = INT_MAX;
01299
01300
01301 const int maxUnambigNucState = 3;
01302 const NxsDiscreteStateCell codonMissingState = NXS_MISSING_CODE;
01303 NxsDiscreteStateRow matRow(ncodons, 0);
01304 codonsBlock->discreteMatrix.assign(ntax, matRow);
01305 const std::list<int>::const_iterator endNucIt = sourceChars->end();
01306 if (mapPartialAmbigToUnknown && (gapsToUnknown || dnaBlock->GetGapSymbol() != '\0'))
01307 {
01308 for (unsigned taxInd = 0; taxInd < ntax; ++taxInd)
01309 {
01310 std::list<int>::const_iterator nucIt = sourceChars->begin();
01311 const NxsDiscreteStateRow & sourceRow = dnaBlock->discreteMatrix.at(taxInd);
01312 NxsDiscreteStateRow & destRow = codonsBlock->discreteMatrix.at(taxInd);
01313 for (unsigned codonInd = 0; codonInd < ncodons ; ++codonInd)
01314 {
01315 NCL_ASSERT(nucIt != endNucIt);
01316 const int fInd = *nucIt++;
01317 NCL_ASSERT(nucIt != endNucIt);
01318 const int sInd = *nucIt++;
01319 NCL_ASSERT(nucIt != endNucIt);
01320 const int tInd = *nucIt++;
01321 if (spareNucs)
01322 {
01323 untranslated.erase(fInd);
01324 untranslated.erase(sInd);
01325 untranslated.erase(tInd);
01326 }
01327 if (fInd < 0 || sInd < 0 || tInd < 0)
01328 destRow[codonInd] = codonMissingState;
01329 else
01330 {
01331 const NxsDiscreteStateCell fb = sourceRow[fInd];
01332 const NxsDiscreteStateCell sb = sourceRow[sInd];
01333 const NxsDiscreteStateCell tb = sourceRow[tInd];
01334 if (fb < 0 || sb < 0 || tb < 0 || fb > maxUnambigNucState || sb > maxUnambigNucState || tb > maxUnambigNucState)
01335 destRow[codonInd] = codonMissingState;
01336 else
01337 destRow[codonInd] = 16*fb + 4*sb + tb;
01338 }
01339 }
01340 }
01341 }
01342 else
01343 {
01344 throw NxsException("NewCodonsCharactersBlock is not implemented for cases in which you are not mapping any ambiguity to the missing state code.");
01345 }
01346 if (!untranslated.empty())
01347 {
01348 const unsigned nunt = (const unsigned)untranslated.size();
01349
01350 NxsCharactersBlock * untBlock = new NxsCharactersBlock(taxa, NULL);
01351 untBlock->SetNChar(nunt);
01352 untBlock->SetNTax(ntax);
01353 untBlock->missing = dnaBlock->missing;
01354 untBlock->gap = (gapsToUnknown ? '\0' : dnaBlock->gap);
01355 untBlock->gapMode = dnaBlock->gapMode;
01356 untBlock->datatype = nucType;
01357 untBlock->originalDatatype = dnaBlock->originalDatatype;
01358 untBlock->ResetSymbols();
01359 untBlock->tokens = false;
01360
01361
01362 untBlock->CreateDatatypeMapperObjects(dummy, dummyVec);
01363 untBlock->datatypeReadFromFormat = false;
01364 untBlock->statesFormat = STATES_PRESENT;
01365 untBlock->restrictionDataype = false;
01366 untBlock->supportMixedDatatype = false;
01367 untBlock->convertAugmentedToMixed = false;
01368 untBlock->writeInterleaveLen = INT_MAX;
01369
01370
01371 NxsDiscreteStateRow umatRow(nunt, 0);
01372 untBlock->discreteMatrix.assign(ntax, umatRow);
01373 if (mapPartialAmbigToUnknown && (gapsToUnknown || dnaBlock->GetGapSymbol() != '\0'))
01374 {
01375 for (unsigned taxInd = 0; taxInd < ntax; ++taxInd)
01376 {
01377 const NxsDiscreteStateRow & sourceRow = dnaBlock->discreteMatrix.at(taxInd);
01378 NxsDiscreteStateRow & destRow = untBlock->discreteMatrix.at(taxInd);
01379 unsigned untIndex = 0;
01380 for (NxsUnsignedSet::const_iterator uIt = untranslated.begin(); uIt != untranslated.end() ; ++uIt, ++untIndex)
01381 {
01382 const unsigned ind = *uIt;
01383 destRow.at(untIndex) = sourceRow[ind];
01384 }
01385 }
01386 }
01387 else
01388 {
01389 throw NxsException("NewProteinCharactersBlock is not implemented for cases in which you are not mapping any ambiguity to the missing state code.");
01390 }
01391 *spareNucs = untBlock;
01392 }
01393 else if (spareNucs)
01394 *spareNucs = NULL;
01395 return codonsBlock;
01396 }
01397
01398
01399 std::vector<double> NxsTransformationManager::GetDoubleWeights(const std::string &set_name) const
01400 {
01401 std::vector<double> r;
01402 const ListOfDblWeights *p = 0L;
01403 std::map<std::string, ListOfDblWeights>::const_iterator dIt = dblWtSets.begin();
01404 for (; dIt != dblWtSets.end(); ++dIt)
01405 {
01406 if (NxsString::case_insensitive_equals(dIt->first.c_str(), set_name.c_str()))
01407 {
01408 p = &(dIt->second);
01409 break;
01410 }
01411 }
01412 if (p)
01413 {
01414 ListOfDblWeights::const_iterator wIt = p->begin();
01415 const ListOfDblWeights::const_iterator ewIt = p->end();
01416 for (; wIt != ewIt; ++wIt)
01417 {
01418 double w = wIt->first;
01419 const std::set<unsigned> &s = wIt->second;
01420 std::set<unsigned>::const_reverse_iterator ip = s.rbegin();
01421 const std::set<unsigned>::const_reverse_iterator e = s.rend();
01422 for (; ip != e; ++ip)
01423 {
01424 if (*ip >= r.size())
01425 r.resize(1 + *ip, 1.0);
01426 r[*ip] = w;
01427 }
01428 }
01429 }
01430 return r;
01431 }
01432
01433 std::vector<int> NxsTransformationManager::GetIntWeights(const std::string &set_name) const
01434 {
01435 std::vector<int> r;
01436 const ListOfIntWeights *p = 0L;
01437 std::map<std::string, ListOfIntWeights>::const_iterator dIt = intWtSets.begin();
01438 for (; dIt != intWtSets.end(); ++dIt)
01439 {
01440 if (NxsString::case_insensitive_equals(dIt->first.c_str(), set_name.c_str()))
01441 {
01442 p = &(dIt->second);
01443 break;
01444 }
01445 }
01446 if (p)
01447 {
01448 ListOfIntWeights::const_iterator wIt = p->begin();
01449 const ListOfIntWeights::const_iterator ewIt = p->end();
01450 for (; wIt != ewIt; ++wIt)
01451 {
01452 int w = wIt->first;
01453 const std::set<unsigned> &s = wIt->second;
01454 std::set<unsigned>::const_reverse_iterator ip = s.rbegin();
01455 const std::set<unsigned>::const_reverse_iterator e = s.rend();
01456 for (; ip != e; ++ip)
01457 {
01458 if (*ip >= r.size())
01459 r.resize(1 + *ip, 1);
01460 r[*ip] = w;
01461 }
01462 }
01463 }
01464 return r;
01465 }
01466
01470 NxsDiscreteDatatypeMapper::NxsDiscreteDatatypeMapper(
01471 NxsCharactersBlock::DataTypesEnum datatypeE,
01472 const std::string & symbolsStr,
01473 char missingChar,
01474 char gap,
01475 char matchingChar,
01476 bool respectingCase,
01477 const std::map<char, NxsString> & moreEquates)
01478 :geneticCode(NXS_GCODE_NO_CODE),
01479 cLookup(NULL),
01480 stateCodeLookupPtr(NULL),
01481 symbols(symbolsStr),
01482 nStates(0),
01483 matchChar(matchingChar),
01484 gapChar(gap),
01485 missing(missingChar),
01486 respectCase(respectingCase),
01487 extraEquates(moreEquates),
01488 datatype(datatypeE),
01489 restrictionDataype(false),
01490 userDefinedEquatesBeforeConversion(false)
01491 {
01492 if (symbols.empty())
01493 symbols = NxsCharactersBlock::GetDefaultSymbolsForType(datatype);
01494 if (datatype == NxsCharactersBlock::mixed)
01495 throw NxsException("Cannot create a mixed datatype mapper");
01496 RefreshMappings(0L);
01497 }
01498
01499 void NxsDiscreteDatatypeMapper::DebugPrint(std::ostream & out) const
01500 {
01501 out << GetNumStatesIncludingGap() << "states (";
01502 if (gapChar == '\0')
01503 out << "no gaps";
01504 else
01505 out << "including the gap \"state\"";
01506 const int nsc = (int) stateSetsVec.size();
01507 out << '\n' << nsc << " state codes.\n";
01508 out << "NEXUS State Code States\n";
01509 for (int sc = sclOffset; sc < sclOffset + nsc; ++sc)
01510 {
01511 std::string nex;
01512 for (int c = 0; c < 127; ++c)
01513 {
01514 if (cLookup[c] == sc)
01515 nex.append(1, (char) c);
01516 }
01517 int buf = (int) (10 - nex.size());
01518 nex.append(buf, ' ');
01519 out << nex << " " << sc << " ";
01520 const std::set<NxsDiscreteStateCell> &ss = GetStateSetForCode(sc);
01521 std::string decoded;
01522 for (std::set<NxsDiscreteStateCell>::const_iterator s = ss.begin(); s != ss.end(); ++s)
01523 decoded.append(StateCodeToNexusString(*s));
01524 if (decoded.length() < 2)
01525 out << decoded;
01526 else if (IsPolymorphic(sc))
01527 out << '(' << decoded << ')';
01528 else
01529 out << '{' << decoded << '}';
01530 out << '\n';
01531 }
01532 }
01533
01538 void NxsCharactersBlock::CreateDatatypeMapperObjects(const NxsPartition & dtParts, const std::vector<DataTypesEnum> & dtcodes)
01539 {
01540 try {
01541 mixedTypeMapping.clear();
01542 if (datatype != mixed)
01543 {
01544 NxsDiscreteDatatypeMapper d(datatype, symbols, missing, gap, matchchar, respectingCase, userEquates);
01545 datatype = d.GetDatatype();
01546 DatatypeMapperAndIndexSet das(d, NxsUnsignedSet());
01547 datatypeMapperVec.clear();
01548 datatypeMapperVec.push_back(das);
01549 }
01550 else
01551 {
01552 datatypeMapperVec.clear();
01553 NCL_ASSERT(dtParts.size() == dtcodes.size());
01554 datatypeMapperVec.reserve(dtParts.size());
01555 std::vector<DataTypesEnum>::const_iterator cIt = dtcodes.begin();
01556
01557 for (NxsPartition::const_iterator pIt = dtParts.begin(); pIt != dtParts.end(); ++pIt, ++cIt)
01558 {
01559 std::string mt;
01560 if (*cIt == standard)
01561 mt.assign("0123456789");
01562 NxsDiscreteDatatypeMapper d(*cIt, mt, missing, gap, matchchar, respectingCase, userEquates);
01563 const NxsUnsignedSet & indexSet = pIt->second;
01564 DatatypeMapperAndIndexSet das(d, pIt->second);
01565 NxsUnsignedSet & mappedInds = mixedTypeMapping[*cIt];
01566 mappedInds.insert(indexSet.begin(), indexSet.end());
01567 datatypeMapperVec.push_back(das);
01568 }
01569 }
01570 }
01571 catch (const NxsException & x)
01572 {
01573 std::string y = "An error was detected while trying to create a datatype mapping structure. This portion of code tends to generate cryptic error messages, so if the following message is not helpful, double check the syntax in the FORMAT command of your block.\n";
01574 y.append(x.msg);
01575 throw NxsException(y, x.pos, x.line, x.col);
01576 }
01577 }
01578
01579
01580
01581
01606 bool NxsCharactersBlock::AugmentedSymbolsToMixed()
01607 {
01608 DataTypesEnum odt = GetOriginalDataType();
01609 if (IsMixedType() || (odt == GetDataType()))
01610 return false;
01611 const std::string origSymb = GetDefaultSymbolsForType(odt);
01612 const std::string cutSymb = symbols.substr(0, origSymb.length());
01613 if (origSymb != cutSymb)
01614 return false;
01615 const std::string augmentSymbols = symbols.substr(origSymb.length());
01616 if (augmentSymbols.empty())
01617 return false;
01618 for (std::string::const_iterator a = augmentSymbols.begin(); a != augmentSymbols.end(); ++a)
01619 {
01620 if (!isdigit(*a))
01621 return false;
01622 }
01623
01624 NxsUnsignedSet stdTypeChars;
01625 NxsUnsignedSet origTypeChars;
01626 std::set<NxsDiscreteStateCell> torigStateInds;
01627 std::set<NxsDiscreteStateCell> tstdStateInds;
01628 torigStateInds.insert(NXS_GAP_STATE_CODE);
01629 tstdStateInds.insert(NXS_GAP_STATE_CODE);
01630 for (NxsDiscreteStateCell j = 0; j < (NxsDiscreteStateCell)origSymb.length(); ++j)
01631 torigStateInds.insert(j);
01632 for (NxsDiscreteStateCell j = (NxsDiscreteStateCell)origSymb.length(); j < (NxsDiscreteStateCell)symbols.length(); ++j)
01633 tstdStateInds.insert(j);
01634 const std::set<NxsDiscreteStateCell> origStateInds(torigStateInds);
01635 const unsigned nosi = (unsigned)origStateInds.size();
01636 const std::set<NxsDiscreteStateCell> stdStateInds(tstdStateInds);
01637 const unsigned nssi = (unsigned)stdStateInds.size();
01638
01639
01640 const unsigned nChars = GetNCharTotal();
01641 GapModeEnum cached_gap_mode = this->gapMode;
01642 this->gapMode = GAP_MODE_NEWSTATE;
01643 try {
01644 for (unsigned colIndex = 0; colIndex < nChars; ++colIndex)
01645 {
01646 const std::set<NxsDiscreteStateCell> cs = GetNamedStateSetOfColumn(colIndex);
01647 std::set<NxsDiscreteStateCell> origUnion;
01648 set_union(origStateInds.begin(), origStateInds.end(), cs.begin(), cs.end(), inserter(origUnion, origUnion.begin()));
01649 if (origUnion.size() > nosi)
01650 {
01651 std::set<NxsDiscreteStateCell> stdUnion;
01652 set_union(stdStateInds.begin(), stdStateInds.end(), cs.begin(), cs.end(), inserter(stdUnion, stdUnion.begin()));
01653 if (stdUnion.size() > nssi)
01654 return false;
01655 stdTypeChars.insert(colIndex);
01656 }
01657 else
01658 origTypeChars.insert(colIndex);
01659 }
01660 }
01661 catch (...)
01662 {
01663 this->gapMode = cached_gap_mode;
01664 throw;
01665 }
01666 this->gapMode = cached_gap_mode;
01667
01668
01669
01670 VecDatatypeMapperAndIndexSet mdm = datatypeMapperVec;
01671 const NxsDiscreteDatatypeMapper & oldMapper = mdm[0].first;
01672 if (oldMapper.GetUserDefinedEquatesBeforeConversion())
01673 return false;
01674
01675
01676 std::map<char, NxsString> noEquates;
01677 datatypeMapperVec.clear();
01678 NxsDiscreteDatatypeMapper o(odt, origSymb, missing, gap, matchchar, respectingCase, noEquates);
01679 datatypeMapperVec.push_back(DatatypeMapperAndIndexSet(o, origTypeChars));
01680 NxsDiscreteDatatypeMapper s(NxsCharactersBlock::standard, augmentSymbols, missing, gap, matchchar, respectingCase, noEquates);
01681 datatypeMapperVec.push_back(DatatypeMapperAndIndexSet(s, stdTypeChars));
01682
01683
01684 NxsDiscreteDatatypeMapper & newOrigTMapper = datatypeMapperVec[0].first;
01685 NxsDiscreteDatatypeMapper & newStdTMapper = datatypeMapperVec[1].first;
01686
01687
01688 const NxsDiscreteStateCell nOrigStates = (NxsDiscreteStateCell) origSymb.size();
01689 std::map<NxsDiscreteStateCell, NxsDiscreteStateCell> oldToNewStateCode;
01690 NxsDiscreteStateMatrix::iterator rowIt = discreteMatrix.begin();
01691 for (unsigned colIndex = 0; rowIt != discreteMatrix.end(); ++colIndex, ++rowIt)
01692 {
01693 NxsDiscreteStateRow & row = *rowIt;
01694 unsigned column = 0;
01695 for (NxsDiscreteStateRow::iterator cell = row.begin(); cell != row.end(); ++cell, ++column)
01696 {
01697 const NxsDiscreteStateCell initStateCode = *cell;
01698 if (initStateCode >= 0 )
01699 {
01700 std::map<NxsDiscreteStateCell, NxsDiscreteStateCell>::const_iterator otnIt = oldToNewStateCode.find(initStateCode);
01701 if (otnIt == oldToNewStateCode.end())
01702 {
01703 const bool isOrigT = origTypeChars.count(column) > 0;
01704 const std::set<NxsDiscreteStateCell> oldSymbols = oldMapper.GetStateSetForCode(initStateCode);
01705 const std::string oldNexusString = oldMapper.StateCodeToNexusString(initStateCode);
01706 const char oldNexusChar = (oldNexusString.length() == 1 ? oldNexusString[0] : '\0');
01707 const bool isPoly = oldMapper.IsPolymorphic(initStateCode);
01708 NxsDiscreteStateCell newStateCode ;
01709 if (isOrigT)
01710 {
01711 newStateCode = newOrigTMapper.StateCodeForStateSet(oldSymbols, isPoly, true, oldNexusChar);
01712 newOrigTMapper.StateCodeToNexusString(newStateCode);
01713 }
01714 else
01715 {
01716 std::set<NxsDiscreteStateCell> transSymbols;
01717 for (std::set<NxsDiscreteStateCell>::const_iterator sIt = oldSymbols.begin(); sIt != oldSymbols.end(); ++sIt)
01718 {
01719 if (*sIt >= nOrigStates)
01720 transSymbols.insert(*sIt - nOrigStates);
01721 else
01722 {
01723 NCL_ASSERT(*sIt < 0);
01724 transSymbols.insert(*sIt);
01725 }
01726 }
01727 newStateCode = newStdTMapper.StateCodeForStateSet(transSymbols, isPoly, true, oldNexusChar);
01728 newStdTMapper.StateCodeToNexusString(newStateCode);
01729 }
01730 oldToNewStateCode[initStateCode] = newStateCode;
01731 *cell = newStateCode;
01732 }
01733 else
01734 *cell = otnIt->second;
01735 }
01736 }
01737 }
01738 datatype = NxsCharactersBlock::mixed;
01739 mixedTypeMapping.clear();
01740 mixedTypeMapping[odt] = origTypeChars;
01741 mixedTypeMapping[NxsCharactersBlock::standard] = stdTypeChars;
01742 return true;
01743 }
01748 void NxsCharactersBlock::HandleFormat(
01749 NxsToken &token)
01750 {
01751 errormsg.clear();
01752 ProcessedNxsCommand tokenVec;
01753 token.ProcessAsCommand( &tokenVec);
01754
01755 const ProcessedNxsCommand::const_iterator tvEnd = tokenVec.end();
01756 NxsPartition dtParts;
01757 std::vector<DataTypesEnum> dtv;
01758 std::vector<bool> isR;
01759 if (!datatypeReadFromFormat)
01760 {
01761 bool standardDataTypeAssumed = true;
01762 bool ignoreCaseAssumed = true;
01763 datatype = standard;
01764 originalDatatype = standard;
01765 ResetSymbols();
01766 respectingCase = false;
01767 restrictionDataype = false;
01768 for (ProcessedNxsCommand::const_iterator wIt = tokenVec.begin(); wIt != tvEnd; ++wIt)
01769 {
01770 if (wIt->Equals("DATATYPE"))
01771 {
01772 DemandEquals(wIt, tvEnd, " after keyword DATATYPE");
01773 ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, " after \"DATATYPE =\" in FORMAT command");
01774 if (wIt->Equals("STANDARD"))
01775 {
01776 datatype = standard;
01777 symbols = "01";
01778 }
01779 else if (wIt->Equals("DNA"))
01780 datatype = dna;
01781 else if (wIt->Equals("RNA"))
01782 datatype = rna;
01783 else if (wIt->Equals("NUCLEOTIDE"))
01784 datatype = nucleotide;
01785 else if (wIt->Equals("PROTEIN"))
01786 datatype = protein;
01787 else if (wIt->Equals("RESTRICTION"))
01788 {
01789 datatype = standard;
01790 restrictionDataype = true;
01791 }
01792 else if (wIt->Equals("CONTINUOUS"))
01793 {
01794 datatype = continuous;
01795 statesFormat = INDIVIDUALS;
01796 items = std::vector<std::string>(1, std::string("AVERAGE"));
01797 tokens = true;
01798 }
01799 else if (supportMixedDatatype && wIt->Equals("MIXED"))
01800 {
01801 datatype = mixed;
01802 ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, " after \"DATATYPE=MIXED\" in FORMAT command. Expecting (");
01803 if (!wIt->Equals("("))
01804 {
01805 errormsg << "Expecting ( after \"DATATYPE=MIXED\" but found " << wIt->GetToken();
01806 throw NxsException(errormsg, *wIt);
01807 }
01808 ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, " after \"DATATYPE=MIXED(\" in FORMAT command. Expecting a datatype");
01809 ostringstream fakestream;
01810 while (!wIt->Equals(")"))
01811 {
01812 fakestream << ' ' << NxsString::GetEscaped(wIt->GetToken());
01813 ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, " in \"DATATYPE=MIXED\" in FORMAT command. Expecting a closing ) to terminate the list.");
01814 }
01815 fakestream << ';';
01816 const std::string accumulated = fakestream.str();
01817 istringstream fakeinput(accumulated);
01818 NxsToken subToken(fakeinput);
01819 try
01820 {
01821 std::string mt("mixed datatype definition");
01822 subToken.GetNextToken();
01823 this->ReadPartitionDef(dtParts, *this, mt, "Character", "Datatype=Mixed", subToken, false, true, false);
01824 }
01825 catch (NxsException & x)
01826 {
01827 errormsg = x.msg;
01828 throw NxsException(errormsg, *wIt);
01829 }
01830 catch (...)
01831 {
01832 errormsg << "Error parsing \"DATATYPE=MIXED\" subcommand in FORMAT the command.";
01833 throw NxsException(errormsg, *wIt);
01834 }
01835 for (NxsPartition::const_iterator pIt = dtParts.begin(); pIt != dtParts.end(); ++pIt)
01836 {
01837 NxsString name(pIt->first.c_str());
01838 name.ToUpper();
01839 if (name == "RESTRICTION")
01840 {
01841 dtv.push_back(standard);
01842 isR.push_back(true);
01843 }
01844 else
01845 {
01846 isR.push_back(false);
01847 if (name == "STANDARD")
01848 dtv.push_back(standard);
01849 else if (name == "DNA")
01850 dtv.push_back(dna);
01851 else if (name == "RNA")
01852 dtv.push_back(rna);
01853 else if (name == "NUCLEOTIDE")
01854 dtv.push_back(nucleotide);
01855 else if (name == "PROTEIN")
01856 dtv.push_back(protein);
01857 else
01858 {
01859 errormsg << pIt->first << " is not a valid DATATYPE within a " << id << " block";
01860 throw NxsException(errormsg, *wIt);
01861 }
01862 }
01863 }
01864 }
01865 else
01866 {
01867 errormsg << wIt->GetToken() << " is not a valid DATATYPE within a " << id << " block";
01868 throw NxsException(errormsg, *wIt);
01869 }
01870 datatypeReadFromFormat = true;
01871 originalDatatype = datatype;
01872 ResetSymbols();
01873 standardDataTypeAssumed = false;
01874 if (!ignoreCaseAssumed)
01875 break;
01876 }
01877 else if (wIt->Equals("RESPECTCASE"))
01878 {
01879 ignoreCaseAssumed = false;
01880 respectingCase = true;
01881 if (!standardDataTypeAssumed)
01882 break;
01883 }
01884 }
01885 }
01886 for (ProcessedNxsCommand::const_iterator wIt = tokenVec.begin(); wIt != tvEnd; ++wIt)
01887 {
01888
01889 if (wIt->Equals("DATATYPE"))
01890 {
01891 ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "after DATATYPE in FORMAT command");
01892 ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "after DATATYPE = in FORMAT command");
01893 }
01894 else if (wIt->Equals("RESPECTCASE"))
01895 {
01896 if (!respectingCase)
01897 {
01898 errormsg << "Only one FORMAT command should occur per DATA or CHARACTERS block.";
01899 throw NxsException(errormsg, *wIt);
01900 }
01901 }
01902 else if (wIt->Equals("MISSING"))
01903 {
01904 DemandEquals(wIt, tvEnd, "after keyword MISSING");
01905 ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "after \"MISSING = \" in FORMAT command");
01906 const std::string t = wIt->GetToken();
01907 if (t.length() != 1)
01908 {
01909 errormsg << "MISSING symbol should be a single character, but " << t << " was specified";
01910 WarnDangerousContent(errormsg, *wIt);
01911 }
01912 else if (token.IsPunctuationToken(t) && !token.IsPlusMinusToken(t))
01913 {
01914 errormsg << "MISSING symbol specified cannot be a punctuation token (" << t << " was specified)";
01915 WarnDangerousContent(errormsg, *wIt);
01916 }
01917 else if (token.IsWhitespaceToken(t))
01918 {
01919 errormsg << "MISSING symbol specified cannot be a whitespace character (" << t << " was specified)";
01920 WarnDangerousContent(errormsg, *wIt);
01921 }
01922 missing = t[0];
01923 }
01924 else if (wIt->Equals("GAP"))
01925 {
01926 DemandEquals(wIt, tvEnd, "after keyword GAP");
01927 ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "after \"GAP = \" in FORMAT command");
01928 const std::string t = wIt->GetToken();
01929 if (t.length() != 1)
01930 {
01931 errormsg << "GAP symbol should be a single character, but " << t << " was specified";
01932 WarnDangerousContent(errormsg, *wIt);
01933 }
01934 else if (token.IsPunctuationToken(t) && !token.IsPlusMinusToken(t))
01935 {
01936 errormsg << "GAP symbol specified cannot be a punctuation token " << t << " was specified";
01937 WarnDangerousContent(errormsg, *wIt);
01938 }
01939 else if (token.IsWhitespaceToken(t))
01940 {
01941 errormsg << "GAP symbol specified cannot be a whitespace character " << t << " was specified";
01942 WarnDangerousContent(errormsg, *wIt);
01943 }
01944 gap = t[0];
01945 }
01946 else if (wIt->Equals("MATCHCHAR"))
01947 {
01948 DemandEquals(wIt, tvEnd, "after keyword MATCHCHAR");
01949 ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "after \"MATCHCHAR = \" in FORMAT command");
01950 const std::string t = wIt->GetToken();
01951 if (t.length() != 1)
01952 {
01953 errormsg << "MATCHCHAR symbol should be a single character, but " << t << " was specified";
01954 WarnDangerousContent(errormsg, *wIt);
01955 }
01956 else if (token.IsPunctuationToken(t) && !token.IsPlusMinusToken(t))
01957 {
01958 errormsg << "MATCHCHAR symbol specified cannot be a punctuation token (" << t << " was specified)";
01959 WarnDangerousContent(errormsg, *wIt);
01960 }
01961 else if (token.IsWhitespaceToken(t))
01962 {
01963 errormsg << "MATCHCHAR symbol specified cannot be a whitespace character (" << t << " was specified)";
01964 WarnDangerousContent(errormsg, *wIt);
01965 }
01966 matchchar = t[0];
01967 }
01968 else if (wIt->Equals("SYMBOLS") || wIt->Equals("SYMBOL"))
01969 {
01970 if (datatype == NxsCharactersBlock::continuous)
01971 throw NxsException("SYMBOLS subcommand not allowed for DATATYPE=CONTINUOUS", *wIt);
01972 if (restrictionDataype)
01973 throw NxsException("SYMBOLS subcommand not allowed for DATATYPE=RESTRICTION", *wIt);
01974 NxsDiscreteStateCell numDefStates;
01975 unsigned maxNewStates = NCL_MAX_STATES;
01976 switch(datatype)
01977 {
01978 case NxsCharactersBlock::dna:
01979 case NxsCharactersBlock::rna:
01980 case NxsCharactersBlock::nucleotide:
01981 numDefStates = 4;
01982 maxNewStates = NCL_MAX_STATES-4;
01983 break;
01984
01985 case NxsCharactersBlock::protein:
01986 numDefStates = 21;
01987 maxNewStates = NCL_MAX_STATES-21;
01988 break;
01989
01990 default:
01991 numDefStates = 0;
01992 symbols.clear();
01993 maxNewStates = NCL_MAX_STATES;
01994 }
01995 DemandEquals(wIt, tvEnd, "after keyword SYMBOLS");
01996 ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "\" to start the symbols list");
01997 if (!wIt->Equals("\""))
01998 {
01999 errormsg << "Expecting \" after Symbols= but " << wIt->GetToken() << " was found";
02000 throw NxsException(errormsg, *wIt);
02001 }
02002 ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "closing \" of symbols list");
02003 NxsString s;
02004 while (!wIt->Equals("\""))
02005 {
02006 s += wIt->GetToken().c_str();
02007 ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "closing \" of symbols list");
02008 }
02009
02010 const std::string tos = NxsString::strip_whitespace(s);
02011 const char * to = tos.c_str();
02012 unsigned tlen = (unsigned)tos.length();
02013 if (tlen > maxNewStates)
02014 {
02015 errormsg << "SYMBOLS defines " << tlen << " new states but only " << maxNewStates << " new states allowed for this DATATYPE";
02016 throw NxsException(errormsg, *wIt);
02017 }
02018
02019
02020
02021 std::string preprocessedS;
02022 for (unsigned i = 0; i < tlen; i++)
02023 {
02024 if (to[i] == '~')
02025 {
02026 if (i == 0 || i == tlen -1)
02027 {
02028 errormsg << "A ~ in a SYMBOLS list is interpreted as a range of symbols. The ~ cannot be the first or last character in the symbols list";
02029 throw NxsException(errormsg, token);
02030 }
02031 const int jj = i - 1 ;
02032 const char prevChar = to[jj];
02033 const char nextChar = to[i+1];
02034 if ((isdigit(prevChar) && isdigit(nextChar)) || (isalpha(prevChar) && isalpha(nextChar)))
02035 {
02036 if (nextChar > prevChar)
02037 {
02038 for (char c = (char)((int)prevChar + 1) ; c < nextChar;)
02039 {
02040 preprocessedS.append(1, c);
02041 c = (char) ((int)c + 1);
02042 }
02043 }
02044 else
02045 {
02046 errormsg << "Endpoint of SYMBOLS range must be greater than the starting point. This was not true of " << prevChar << '~' << nextChar;
02047 throw NxsException(errormsg, token);
02048 }
02049 }
02050 else
02051 {
02052 errormsg << prevChar << '~' << nextChar << " is an illegal SYMBOLS range. A range must go from a letter to a letter or from a number to number" ;
02053 throw NxsException(errormsg, token);
02054 }
02055 }
02056 else
02057 preprocessedS += to[i];
02058 }
02059 NxsString processedS;
02060 for (std::string::const_iterator pp = preprocessedS.begin(); pp != preprocessedS.end(); ++pp)
02061 {
02062 const char c = *pp;
02063 if (IsInSymbols(c))
02064 {
02065 errormsg << "The character " << c << " defined in SYMBOLS is predefined for this DATATYPE and should not occur in a SYMBOLS statement";
02066 if (nexusReader)
02067 {
02068 nexusReader->NexusWarnToken(errormsg, NxsReader::SKIPPING_CONTENT_WARNING, token);
02069 errormsg.clear();
02070 }
02071 }
02072 else if ( (respectingCase && (userEquates.find(c) != userEquates.end()))
02073 || (! respectingCase && (userEquates.find(toupper(c)) != userEquates.end() || userEquates.find(tolower(*pp)) != userEquates.end())))
02074 {
02075 errormsg << "The character " << *pp << " defined in SYMBOLS subcommand, has already been introduced as an EQUATE key. The use of a character as both a state symbol and an equate key is not allowed.";
02076 throw NxsException(errormsg, token);
02077 }
02078 else
02079 processedS += *pp;
02080 }
02081 if (!processedS.empty())
02082 {
02083 if (this->datatype == dna || this->datatype == rna || this->restrictionDataype || this->datatype == protein)
02084 {
02085 if (this->allowAugmentingOfSequenceSymbols)
02086 {
02087 errormsg << "Adding symbols to the " << GetNameOfDatatype(this->datatype) << " datatype will cause the matrix to be treated as if it were a";
02088 if (this->convertAugmentedToMixed)
02089 errormsg << " MIXED datatype matrix";
02090 else
02091 errormsg << " STANDARD datatype matrix";
02092 if (!this->convertAugmentedToMixed)
02093 nexusReader->NexusWarnToken(errormsg, NxsReader::AMBIGUOUS_CONTENT_WARNING, token);
02094 errormsg.clear();
02095 }
02096 else
02097 {
02098 errormsg << "Symbols cannot be added to the " << GetNameOfDatatype(this->datatype) << " datatype.";
02099 throw NxsException(errormsg, token);
02100 }
02101 }
02102
02103
02104
02105 symbols += processedS.c_str();
02106 }
02107 }
02108
02109 else if (wIt->Equals("EQUATE"))
02110 {
02111 if (datatype == NxsCharactersBlock::continuous)
02112 throw NxsException("EQUATE subcommand not allowed for DATATYPE=CONTINUOUS", *wIt);
02113
02114 DemandEquals(wIt, tvEnd, "after keyword EQUATE");
02115 ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "\" to start the Equate definition");
02116 if (!wIt->Equals("\""))
02117 {
02118 errormsg << "Expecting '\"' after keyword EQUATE but found " << wIt->GetToken() << " instead";
02119 throw NxsException(errormsg, *wIt);
02120 }
02121 ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "\" to end the Equate definition");
02122 while (!wIt->Equals("\""))
02123 {
02124 std::string t = wIt->GetToken();
02125 if (t.length() != 1)
02126 {
02127 errormsg << "Expecting single-character EQUATE symbol but found " << wIt->GetToken() << " instead";
02128 throw NxsException(errormsg, *wIt);
02129 }
02130 const char ch = t[0];
02131 bool badEquateSymbol = false;
02132
02133
02134
02135 if (token.IsPunctuationToken(t) && !token.IsPlusMinusToken(t))
02136 badEquateSymbol = true;
02137 else if (ch == '^')
02138 badEquateSymbol = true;
02139 if (badEquateSymbol)
02140 {
02141 errormsg << "EQUATE symbol specified (" << wIt->GetToken() << ") is not valid. Equate symbols cannot be any of the following: ()[]{}/\\,;:=*'\"`<>^";
02142 WarnDangerousContent(errormsg, *wIt);
02143 }
02144 if (ch == missing || ch == matchchar || ch == gap || IsInSymbols(ch))
02145 {
02146 errormsg << "EQUATE symbol specified (" << wIt->GetToken() << ") is not valid; An Equate symbol cannot be a state symbol or identical to the missing, gap, or matchchar symbols.";
02147 throw NxsException(errormsg, *wIt);
02148 }
02149
02150 DemandEquals(wIt, tvEnd, " in EQUATE definition");
02151 ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "State or set of states in Equate definition");
02152 NxsString s;
02153 s = wIt->GetToken().c_str();
02154 if (wIt->Equals("{"))
02155 {
02156 while (!wIt->Equals("}"))
02157 {
02158 ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "} to close the state set in an equate definition");
02159 s += wIt->GetToken().c_str();
02160 }
02161 }
02162 else if (wIt->Equals("("))
02163 {
02164 while (!wIt->Equals(")"))
02165 {
02166 ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, ") to close the state set in an equate definition");
02167 s += wIt->GetToken().c_str();
02168 }
02169 }
02170 const std::string nows = NxsString::strip_whitespace(s);
02171 userEquates[ch] = NxsString(nows.c_str());
02172 ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "\" to end the Equate definition");
02173 }
02174 }
02175
02176 else if (wIt->Equals("LABELS"))
02177 labels = true;
02178 else if (wIt->Equals("NOLABELS"))
02179 labels = false;
02180 else if (wIt->Equals("TRANSPOSE"))
02181 transposing = true;
02182 else if (wIt->Equals("INTERLEAVE"))
02183 interleaving = true;
02184 else if (wIt->Equals("ITEMS"))
02185 {
02186 DemandEquals(wIt, tvEnd, "after keyword ITEMS");
02187 items.clear();
02188
02189
02190 ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "after \"ITEMS =\" in FORMAT command");
02191 if (datatype == NxsCharactersBlock::continuous)
02192 {
02193 std::string s;
02194 if (wIt->Equals("("))
02195 {
02196 ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, ") to close Items list in FORMAT command");
02197 while (!wIt->Equals(")"))
02198 {
02199 s = wIt->GetToken();
02200 NxsString::to_upper(s);
02201 items.push_back(std::string(s.c_str()));
02202 ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, ") to close Items list in FORMAT command");
02203 }
02204 }
02205 else
02206 {
02207 s = wIt->GetToken();
02208 NxsString::to_upper(s);
02209 items.push_back(std::string(s.c_str()));
02210 }
02211 }
02212 else
02213 {
02214 if (!wIt->Equals("STATES"))
02215 throw NxsException("Sorry, only ITEMS=STATES is supported for discrete datatypes at this time", *wIt);
02216 items = std::vector<std::string>(1, std::string("STATES"));
02217 }
02218 }
02219 else if (wIt->Equals("STATESFORMAT"))
02220 {
02221 DemandEquals(wIt, tvEnd, "after keyword STATESFORMAT");
02222 ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "after \"STATESFORMAT =\" in FORMAT command");
02223 if (wIt->Equals("STATESPRESENT"))
02224 statesFormat = STATES_PRESENT;
02225 else
02226 {
02227 if (datatype == NxsCharactersBlock::continuous)
02228 {
02229 if (wIt->Equals("INDIVIDUALS"))
02230 statesFormat = INDIVIDUALS;
02231 else
02232 throw NxsException("Sorry, only STATESFORMAT=STATESPRESENT or STATESFORMAT=INDIVIDUALS are supported for continuous datatypes at this time", *wIt);
02233 }
02234 else
02235 throw NxsException("Sorry, only STATESFORMAT=STATESPRESENT supported for discrete datatypes at this time", *wIt);
02236 }
02237 }
02238 else if (wIt->Equals("TOKENS"))
02239 tokens = true;
02240 else if (wIt->Equals("NOTOKENS"))
02241 {
02242 if (datatype == NxsCharactersBlock::continuous)
02243 throw NxsException("NOTOKENS is not allowed for the CONTINUOUS datatype", *wIt);
02244 tokens = false;
02245 }
02246 }
02247 if (IsInSymbols(missing))
02248 {
02249 errormsg << "The \"missing\" character \'" << missing << "\' may not be included in the SYMBOLS list.";
02250 throw NxsException(errormsg, *tokenVec.begin());
02251 }
02252 if (IsInSymbols(matchchar))
02253 {
02254 errormsg << "The \"matchchar\" character \'" << matchchar << "\' may not be included in the SYMBOLS list.";
02255 throw NxsException(errormsg, *tokenVec.begin());
02256 }
02257 if (IsInSymbols(gap))
02258 {
02259 errormsg << "The \"gap\" character \'" << gap << "\' may not be included in the SYMBOLS list.";
02260 throw NxsException(errormsg, *tokenVec.begin());
02261 }
02262
02263 if (matchchar != '\0')
02264 {
02265 if ((matchchar == gap) || (!respectingCase && toupper(matchchar) == toupper(gap)))
02266 {
02267 errormsg << "MatchChar and Gap symbol cannot be identical! Both were set to " << gap;
02268 throw NxsException(errormsg, *tokenVec.begin());
02269 }
02270 if ((matchchar == missing) || (!respectingCase && toupper(matchchar) == toupper(missing)))
02271 {
02272 errormsg << "MatchChar and Missing symbol cannot be identical! Both were set to " << missing;
02273 throw NxsException(errormsg, *tokenVec.begin());
02274 }
02275 }
02276 if ((gap != '\0') && ((gap == missing) || (!respectingCase && toupper(gap) == toupper(missing))))
02277 {
02278 errormsg << "Gap symbol and Missing symbol cannot be identical! Both were set to " << missing;
02279 throw NxsException(errormsg, *tokenVec.begin());
02280 }
02281
02282
02283
02284 if (!tokens && datatype == continuous)
02285 GenerateNxsException(token, "TOKENS must be defined for DATATYPE=CONTINUOUS");
02286 if (tokens && (datatype == dna || datatype == rna || datatype == nucleotide))
02287 GenerateNxsException(token, "TOKENS not allowed for the DATATYPEs DNA, RNA, or NUCLEOTIDE");
02288 CreateDatatypeMapperObjects(dtParts, dtv);
02289 if (IsMixedType() && tokens)
02290 {
02291 errormsg = "The combination of DATATYPE=Mixed and TOKENS are not currently supported.";
02292 throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
02293 }
02294 unsigned mapInd = 0;
02295 for (std::vector<bool>::const_iterator b = isR.begin(); b != isR.end(); ++b, ++mapInd)
02296 {
02297 if (*b)
02298 {
02299 DatatypeMapperAndIndexSet &mapper = datatypeMapperVec.at(mapInd);
02300 mapper.first.SetWasRestrictionDataype(true);
02301 }
02302 }
02303 }
02304
02306 NxsDiscreteDatatypeMapper::NxsDiscreteDatatypeMapper()
02307 :geneticCode(NXS_GCODE_NO_CODE),
02308 datatype(NxsCharactersBlock::standard),
02309 restrictionDataype(false),
02310 userDefinedEquatesBeforeConversion(false)
02311 {
02312 symbols.assign("01");
02313 matchChar = '\0';
02314 gapChar = '\0';
02315 missing = '?';
02316 respectCase = false;
02317 RefreshMappings(0L);
02318 }
02319
02320 NxsDiscreteDatatypeMapper & NxsDiscreteDatatypeMapper::operator=(const NxsDiscreteDatatypeMapper& other)
02321 {
02322 symbols = other.symbols;
02323 lcsymbols = other.lcsymbols;
02324 nStates = other.nStates;
02325 matchChar = other.matchChar;
02326 gapChar = other.gapChar;
02327 missing = other.missing;
02328 respectCase = other.respectCase;
02329 extraEquates = other.extraEquates;
02330 datatype = other.datatype;
02331 geneticCode = other.geneticCode;
02332 sclOffset = other.sclOffset;
02333 stateSetsVec = other.stateSetsVec;
02334 stateCodeLookupPtr = &stateSetsVec[-sclOffset];
02335 charToStateCodeLookup = other.charToStateCodeLookup;
02336 cLookup = &charToStateCodeLookup[127];
02337 restrictionDataype = other.restrictionDataype;
02338 userDefinedEquatesBeforeConversion = other.userDefinedEquatesBeforeConversion;
02339 return *this;
02340 }
02341
02346 NxsDiscreteDatatypeMapper::NxsDiscreteDatatypeMapper(
02347 NxsCharactersBlock::DataTypesEnum datatypeE,
02348 bool usegaps)
02349 :geneticCode(NXS_GCODE_NO_CODE),
02350 cLookup(NULL),
02351 stateCodeLookupPtr(NULL),
02352 matchChar('.'),
02353 gapChar('\0'),
02354 missing('?'),
02355 respectCase(false),
02356 datatype(datatypeE),
02357 restrictionDataype(false),
02358 userDefinedEquatesBeforeConversion(false)
02359 {
02360 symbols = NxsCharactersBlock::GetDefaultSymbolsForType(datatype);
02361 if (usegaps)
02362 gapChar = '-';
02363 if (datatype == NxsCharactersBlock::mixed)
02364 throw NxsException("Cannot create a mixed datatype mapper");
02365 RefreshMappings(0L);
02366 }
02367
02377 bool NxsDiscreteDatatypeMapper::IsSemanticallyEquivalent(const NxsDiscreteDatatypeMapper &other) const
02378 {
02379 if (datatype != other.datatype)
02380 return false;
02381 if (symbols != other.symbols)
02382 return false;
02383 bool thisHasGap = (gapChar != '\0');
02384 bool otherHasGap = (other.gapChar != '\0');
02385 if (thisHasGap != otherHasGap)
02386 return false;
02387 const NxsDiscreteStateCell nsc = (NxsDiscreteStateCell)GetHighestStateCode();
02388 if(nsc != (NxsDiscreteStateCell) other.GetHighestStateCode())
02389 return false;
02390 for (NxsDiscreteStateCell i = 0; i <= nsc; ++i)
02391 {
02392 if (GetStateSetForCode(i) != other.GetStateSetForCode(i))
02393 return false;
02394 }
02395 return true;
02396 }
02397
02403 void NxsDiscreteDatatypeMapper::RefreshMappings(NxsToken *token)
02404 {
02405 nStates = (unsigned)symbols.length();
02406 if (nStates == 0)
02407 {
02408 if (datatype != NxsCharactersBlock::continuous)
02409 throw NxsException("Cannot create a datatype mapper with no symbols");
02410 return;
02411 }
02412 if (!respectCase)
02413 {
02414 NxsString::to_upper(symbols);
02415 lcsymbols = symbols;
02416 }
02417 else
02418 lcsymbols.clear();
02419
02420 NxsString::to_lower(lcsymbols);
02421
02422 if (missing == '\0')
02423 throw NxsException("Cannot create a datatype mapper with no missing data symbol");
02424
02425 charToStateCodeLookup.assign(384, NXS_INVALID_STATE_CODE);
02426 cLookup = &charToStateCodeLookup[127];
02427 stateIntersectionMatrix.clear();
02428 isStateSubsetMatrix.clear();
02429 isStateSubsetMatrixGapsMissing.clear();
02430
02431 stateSetsVec.clear();
02432 stateCodeLookupPtr = 0L;
02433 sclOffset = (gapChar == '\0' ? -1 : -2);
02434
02435 std::string bogus;
02436 std::istringstream bogusStream(bogus);
02437 NxsToken bogusToken(bogusStream);
02438 token = (token == NULL ? &bogusToken : token);
02439
02440
02441 std::set<NxsDiscreteStateCell> stSet;
02442 std::set<NxsDiscreteStateCell> missingSet;
02443 if (gapChar != '\0')
02444 {
02445 stSet.insert(NXS_GAP_STATE_CODE);
02446
02447
02448
02449
02450 stateSetsVec.push_back(NxsDiscreteStateSetInfo(stSet, false, gapChar));
02451 cLookup[(int) gapChar] = NXS_GAP_STATE_CODE;
02452
02453 missingSet.insert(NXS_GAP_STATE_CODE);
02454 }
02455
02456
02457
02458
02459
02460
02461 NCL_ASSERT(missing != '\0');
02462 NCL_ASSERT(nStates > 0);
02463 for (NxsDiscreteStateCell s = 0; s < (NxsDiscreteStateCell) nStates; ++s)
02464 missingSet.insert(s);
02465
02466 char sym = (respectCase ? missing : (char) toupper(missing));
02467 stateSetsVec.push_back(NxsDiscreteStateSetInfo(missingSet, false, sym));
02468 const NxsDiscreteStateCell stateCode = (const NxsDiscreteStateCell)stateSetsVec.size() + sclOffset - 1;
02469 NCL_ASSERT(NXS_MISSING_CODE == stateCode);
02470 if (respectCase)
02471 cLookup[(int) missing] = stateCode;
02472 else
02473 {
02474 cLookup[(int) tolower(missing)] = stateCode;
02475 cLookup[(int) toupper(missing)] = stateCode;
02476 }
02477 NCL_ASSERT(cLookup[(int) missing] == NXS_MISSING_CODE);
02478 for (NxsDiscreteStateCell s = 0; s < (NxsDiscreteStateCell) nStates; ++s)
02479 {
02480 stSet.clear();
02481 stSet.insert(s);
02482 AddStateSet(stSet, symbols[s], respectCase, false);
02483 }
02484
02485
02486 std::map<char, NxsString> defEq = NxsCharactersBlock::GetDefaultEquates(datatype);
02487
02488
02489
02490 bool convertToStandard = false;
02491 if (((datatype == NxsCharactersBlock::nucleotide) || (datatype == NxsCharactersBlock::dna)) && symbols != "ACGT")
02492 convertToStandard = true;
02493 else if ((datatype == NxsCharactersBlock::rna) && symbols != "ACGU")
02494 convertToStandard = true;
02495 else if ((datatype == NxsCharactersBlock::protein) && symbols != "ACDEFGHIKLMNPQRSTVWY*")
02496 convertToStandard = true;
02497 if (convertToStandard)
02498 {
02499 if (!extraEquates.empty())
02500 userDefinedEquatesBeforeConversion = true;
02501 defEq.insert(extraEquates.begin(), extraEquates.end());
02502 extraEquates.clear();
02503 defEq.swap(extraEquates);
02504
02505
02506
02507
02508 if (respectCase)
02509 {
02510 std::string lcsym = NxsCharactersBlock::GetDefaultSymbolsForType(datatype);
02511 NxsString::to_lower(lcsym);
02512 std::string ucsym = lcsym;
02513 NxsString::to_upper(ucsym);
02514 for (unsigned i = 0; i < ucsym.length(); ++i)
02515 {
02516 if (ucsym[i] != lcsym[i])
02517 {
02518 NxsString u;
02519 u.append(1, ucsym[i]);
02520 extraEquates[lcsym[i]] = u;
02521 }
02522 }
02523 }
02524 datatype = NxsCharactersBlock::standard;
02525 }
02526
02527
02528 std::set<char> targetSet(symbols.begin(), symbols.end());
02529 char allStateEquateKey = '\0';
02530 std::map<char, NxsString>::const_iterator eqIt = defEq.begin();
02531 NxsString taxonName;
02532 for (; eqIt != defEq.end(); ++eqIt)
02533 {
02534 const char c = eqIt->first;
02535 const char u = toupper(c);
02536 bool addEq = true;
02537 if (c == missing || c == matchChar || c == gapChar)
02538 addEq = false;
02539 if (!respectCase && (u == toupper(missing) || u == toupper(matchChar) || u == toupper(gapChar)))
02540 addEq = false;
02541 if (addEq)
02542 {
02543 const NxsString & s = eqIt->second;
02544 unsigned slen = s.length();
02545 if (slen == 2 + symbols.length())
02546 {
02547 if (s[0] == '{' && s[slen -1] == '}')
02548 {
02549 std::set<char> contained;
02550 for (unsigned j = 1; j < slen - 1; ++j)
02551 contained.insert(s[j]);
02552 if (contained == targetSet)
02553 {
02554 allStateEquateKey = c;
02555 NxsDiscreteStateCell sc = StateCodeForNexusPossibleMultiStateSet(allStateEquateKey, s, *token, UINT_MAX, UINT_MAX, 0L, taxonName);
02556 cLookup[(int) allStateEquateKey] = sc;
02557 break;
02558 }
02559 }
02560 }
02561
02562 }
02563 }
02564
02565 eqIt = defEq.begin();
02566 for (; eqIt != defEq.end(); ++eqIt)
02567 {
02568 const char c = eqIt->first;
02569 if (c == allStateEquateKey)
02570 continue;
02571 const char u = toupper(c);
02572 bool addEq = true;
02573 if (c == missing || c == matchChar || c == gapChar)
02574 addEq = false;
02575 if (!respectCase && (u == toupper(missing) || u == toupper(matchChar) || u == toupper(gapChar)))
02576 addEq = false;
02577 if (addEq)
02578 {
02579 const NxsString & s = eqIt->second;
02580 NxsDiscreteStateCell sc = StateCodeForNexusPossibleMultiStateSet(c, s, *token, UINT_MAX, UINT_MAX, 0L, taxonName);
02581 cLookup[(int) c] = sc;
02582 }
02583 }
02584
02585
02586
02587
02588
02589 std::map<char, NxsString> neededExtraEquates;
02590 for (eqIt = extraEquates.begin(); eqIt != extraEquates.end(); ++eqIt)
02591 {
02592 const char c = eqIt->first;
02593 const char u = toupper(c);
02594 if (PositionInSymbols(c) == NXS_INVALID_STATE_CODE)
02595 {
02596 bool addEq = true;
02597 if (c == missing || c == matchChar || c == gapChar)
02598 addEq = false;
02599 if (!respectCase && (u == toupper(missing) || u == toupper(matchChar) || u == toupper(gapChar)))
02600 addEq = false;
02601 if (addEq)
02602 {
02603 const NxsDiscreteStateCell prevCode = cLookup[(int) c];
02604 const NxsString & s = eqIt->second;
02605 NxsDiscreteStateCell sc = StateCodeForNexusPossibleMultiStateSet(c, s, *token, UINT_MAX, UINT_MAX, 0L, taxonName);
02606 cLookup[(int) c] = sc;
02607 if (sc != prevCode)
02608 neededExtraEquates[c] = s;
02609 }
02610 }
02611 else
02612 {
02613 NCL_ASSERT(convertToStandard);
02614 }
02615 }
02616 extraEquates = neededExtraEquates;
02617 }
02618
02630 NxsDiscreteStateCell NxsDiscreteDatatypeMapper::StateCodeForStateSet(const std::set<NxsDiscreteStateCell> & sset, bool isPolymorphic, bool addToLookup, char nexusSymbol)
02631 {
02632 if (sset.size() == 1)
02633 {
02634 NxsDiscreteStateCell c = *sset.begin();
02635 ValidateStateIndex(c);
02636 return c;
02637 }
02638 NCL_ASSERT(stateCodeLookupPtr);
02639 NxsDiscreteStateSetInfo *sclStart = stateCodeLookupPtr + nStates;
02640 const NxsDiscreteStateCell nCodes = (NxsDiscreteStateCell)stateSetsVec.size();
02641
02642
02643 for (NxsDiscreteStateCell i = nStates - sclOffset; i < nCodes; ++i)
02644 {
02645 NxsDiscreteStateSetInfo & stateSetInfo = *sclStart++;
02646 if (sset == stateSetInfo.states && isPolymorphic == stateSetInfo.isPolymorphic)
02647 return i + sclOffset;
02648 }
02649 for (std::set<NxsDiscreteStateCell>::const_iterator sIt = sset.begin(); sIt != sset.end(); ++sIt)
02650 ValidateStateIndex(*sIt);
02651 if (!isPolymorphic)
02652 {
02653 const unsigned nsymbs = (const unsigned)sset.size();
02654 if (gapChar != '\0' && nsymbs == GetNumStatesIncludingGap())
02655 return NXS_MISSING_CODE;
02656 }
02657 if (!addToLookup)
02658 return NXS_INVALID_STATE_CODE;
02659 return AddStateSet(sset, nexusSymbol, true, isPolymorphic);
02660 }
02661
02668 NxsDiscreteStateCell NxsDiscreteDatatypeMapper::AddStateSet(const std::set<NxsDiscreteStateCell> & states, char nexusSymbol, bool symRespectCase, bool isPolymorphic)
02669 {
02670 stateIntersectionMatrix.clear();
02671 isStateSubsetMatrix.clear();
02672 isStateSubsetMatrixGapsMissing.clear();
02673
02674
02675 bool reallyIsPoly = (states.size() > 1 && isPolymorphic);
02676 char sym = (symRespectCase ? nexusSymbol : (char) toupper(nexusSymbol));
02677 stateSetsVec.push_back(NxsDiscreteStateSetInfo(states, reallyIsPoly, sym));
02678
02679
02680
02681
02682 stateCodeLookupPtr = &stateSetsVec[-sclOffset];
02683
02684 const NxsDiscreteStateCell stateCode = (const NxsDiscreteStateCell)stateSetsVec.size() + sclOffset - 1;
02685 if (nexusSymbol != '\0')
02686 {
02687 if (symRespectCase)
02688 cLookup[(int) nexusSymbol] = stateCode;
02689 else
02690 {
02691 cLookup[(int) tolower(nexusSymbol)] = stateCode;
02692 cLookup[(int) toupper(nexusSymbol)] = stateCode;
02693 }
02694 }
02695 return stateCode;
02696 }
02697
02698
02699
02703 void NxsDiscreteDatatypeMapper::ValidateStateIndex(NxsDiscreteStateCell c) const
02704 {
02705 if (c < NXS_MISSING_CODE)
02706 {
02707 if (c == NXS_GAP_STATE_CODE)
02708 {
02709 if (gapChar == '\0')
02710 throw NxsNCLAPIException("Illegal usage of NXS_GAP_STATE_CODE in a datatype without gaps");
02711 return;
02712 }
02713 if (c == NXS_INVALID_STATE_CODE)
02714 throw NxsNCLAPIException("Illegal usage of NXS_INVALID_STATE_CODE as a state index");
02715 throw NxsNCLAPIException("Illegal usage of unknown negative state index");
02716 }
02717 else if (c >= (NxsDiscreteStateCell) nStates)
02718 throw NxsNCLAPIException("Illegal usage of state index >= the number of states");
02719 }
02720
02724 void NxsDiscreteDatatypeMapper::ValidateStateCode(NxsDiscreteStateCell c) const
02725 {
02726 if (c < sclOffset)
02727 {
02728 if (c == NXS_GAP_STATE_CODE)
02729 throw NxsNCLAPIException("Illegal usage of NXS_GAP_STATE_CODE in a datatype without gaps");
02730 if (c == NXS_INVALID_STATE_CODE)
02731 throw NxsNCLAPIException("Illegal usage of NXS_INVALID_STATE_CODE as a state code");
02732 throw NxsNCLAPIException("Illegal usage of unknown negative state index");
02733 }
02734 else if (c >= (((NxsDiscreteStateCell) stateSetsVec.size()) + sclOffset))
02735 throw NxsNCLAPIException("Illegal usage of state code > the highest state code");
02736 }
02737
02738
02739 void NxsDiscreteDatatypeMapper::GenerateNxsExceptionMatrixReading(char const* message, unsigned int taxInd, unsigned int charInd,
02740 NxsToken& token, const NxsString &nameStr)
02741 {
02742 NxsString e = "Error reading character ";
02743 e << charInd + 1<<" for taxon " << taxInd + 1;
02744 if (!nameStr.empty())
02745 {
02746 NxsString nasn;
02747 nasn << taxInd + 1;
02748 if (nasn != nameStr)
02749 e << " (name \""<< nameStr <<"\")";
02750 }
02751 e << ":\n" << message;
02752 throw NxsException(e, token);
02753 }
02754
02759 bool NxsDiscreteDatatypeMapper::IsPolymorphic(NxsDiscreteStateCell c) const
02760 {
02761 NCL_ASSERT(stateCodeLookupPtr);
02762 ValidateStateCode(c);
02763 return stateCodeLookupPtr[c].isPolymorphic;
02764 }
02765
02766
02773 NxsDiscreteStateCell NxsDiscreteDatatypeMapper::PositionInSymbols(char c) const
02774 {
02775 NxsDiscreteStateCell p = (NxsDiscreteStateCell)symbols.find(c);
02776 if (p >= 0 && p < (NxsDiscreteStateCell) nStates)
02777 return p;
02778 if (!respectCase)
02779 {
02780 p = (NxsDiscreteStateCell)lcsymbols.find(c);
02781 if (p >= 0 && p < (NxsDiscreteStateCell) nStates)
02782 return p;
02783 }
02784 return NXS_INVALID_STATE_CODE;
02785 }
02786
02787
02797 void NxsDiscreteDatatypeMapper::WriteStateCodeAsNexusString(std::ostream & out, NxsDiscreteStateCell scode, bool demandSymbols) const
02798 {
02799 ValidateStateCode(scode);
02800 const NxsDiscreteStateSetInfo * ssi = &(stateSetsVec.at(scode-sclOffset));
02801 const NxsDiscreteStateSetInfo & stateSetInfo = stateCodeLookupPtr[scode];
02802 NCL_ASSERT (ssi == &stateSetInfo);
02803 char c = stateSetInfo.nexusSymbol;
02804 if (c != '\0')
02805 {
02806 out << c;
02807 return;
02808 }
02809 std::string towrite;
02810 std::set<NxsDiscreteStateCell>::const_iterator sIt = stateSetInfo.states.begin();
02811 const std::set<NxsDiscreteStateCell>::const_iterator endIt = stateSetInfo.states.end();
02812 for (; sIt != endIt; ++sIt)
02813 {
02814 const NxsDiscreteStateCell state = *sIt;
02815 const NxsDiscreteStateSetInfo & subStateSetInfo = stateCodeLookupPtr[state];
02816 const char subc = subStateSetInfo.nexusSymbol;
02817 if (subc != '\0')
02818 towrite.append(1, subc);
02819 else if (demandSymbols)
02820 {
02821 NxsString err("No symbol found for state code ");
02822 err << state;
02823 throw NxsNCLAPIException(err);
02824 }
02825 else
02826 return;
02827 }
02828
02829 out << (stateSetInfo.isPolymorphic ? '(' : '{');
02830 out << towrite;
02831 out << (stateSetInfo.isPolymorphic ? ')' : '}');
02832 }
02833
02834 unsigned NxsDiscreteDatatypeMapper::GetNumStatesInStateCode(NxsDiscreteStateCell scode) const
02835 {
02836 ValidateStateCode(scode);
02837 const NxsDiscreteStateSetInfo & stateSetInfo = stateCodeLookupPtr[scode];
02838 return (unsigned)stateSetInfo.states.size();
02839 }
02840
02841 void NxsDiscreteDatatypeMapper::WriteStartOfFormatCommand(std::ostream & out) const
02842 {
02843 out << " FORMAT Datatype=" << NxsCharactersBlock::GetNameOfDatatype(datatype);
02844 if (this->missing != '?')
02845 {
02846 out << " Missing=";
02847 out << this->missing;
02848 }
02849 if (this->gapChar != '\0')
02850 {
02851 out << " Gap=";
02852 out << this->gapChar;
02853 }
02854 if (this->datatype != NxsCharactersBlock::continuous)
02855 {
02856 unsigned numDefStates = 4;
02857 if (this->datatype == NxsCharactersBlock::protein)
02858 numDefStates = 21;
02859 else if (this->datatype == NxsCharactersBlock::standard)
02860 numDefStates = 0;
02861 unsigned nSym = (unsigned)this->symbols.length();
02862 if (nSym > numDefStates && this->datatype != NxsCharactersBlock::codon)
02863 {
02864 out << " Symbols=\"";
02865 for (unsigned i = numDefStates; i < nSym; ++i)
02866 {
02867 char c = symbols[i];
02868 if (c == '\0')
02869 break;
02870 out << c;
02871 }
02872 out <<"\"";
02873 }
02874 }
02875 const std::map<char, NxsString> defEquates = NxsCharactersBlock::GetDefaultEquates(datatype);
02876 std::map<char, NxsString> toWrite;
02877 const std::map<char, NxsString>::const_iterator notFound = defEquates.end();
02878 std::map<char, NxsString>::const_iterator inDefEquates;
02879 for (std::map<char, NxsString>::const_iterator i = extraEquates.begin(); i != extraEquates.end(); ++i)
02880 {
02881 const char key = (*i).first;
02882 const NxsString val = i->second;
02883 inDefEquates = defEquates.find(key);
02884 if (inDefEquates == notFound || inDefEquates->second != val)
02885 toWrite[key] = val;
02886 }
02887 if (toWrite.size() > 0)
02888 {
02889 out << " Equate=\"";
02890 for (std::map<char, NxsString>::const_iterator j = toWrite.begin(); j != toWrite.end(); ++j)
02891 out << ' ' << j->first << '=' << j->second;
02892 out <<"\"";
02893 }
02894 }
02895
02896 bool NxsCharactersBlock::HandleNextContinuousState(NxsToken &token, unsigned taxNum, unsigned charNum, ContinuousCharRow & row, const NxsString & )
02897 {
02898 if (interleaving)
02899 token.SetLabileFlagBit(NxsToken::newlineIsToken);
02900 token.SetLabileFlagBit(NxsToken::hyphenNotPunctuation);
02901 std::vector<double> v;
02902 std::vector<int> scored;
02903 token.GetNextToken();
02904 NxsString t;
02905 if (interleaving && token.AtEOL())
02906 return false;
02907 if (token.Equals("("))
02908 {
02909 token.SetLabileFlagBit(NxsToken::hyphenNotPunctuation);
02910 token.GetNextToken();
02911 while (!token.Equals(")"))
02912 {
02913 t = token.GetToken();
02914 if (t.length() == 1 && (t[0] == missing || t[0] == gap))
02915 {
02916 v.push_back(DBL_MAX);
02917 scored.push_back(0);
02918 }
02919 else if (t.length() == 1 && t[0] == matchchar)
02920 {
02921 v.push_back(DBL_MAX);
02922 scored.push_back(2);
02923 }
02924 else if (!t.IsADouble())
02925 GenerateUnexpectedTokenNxsException(token, "a number");
02926 else
02927 {
02928 v.push_back(t.ConvertToDouble());
02929 scored.push_back(1);
02930 }
02931 token.SetLabileFlagBit(NxsToken::hyphenNotPunctuation);
02932 token.GetNextToken();
02933 }
02934 }
02935 else
02936 {
02937 t = token.GetToken();
02938 if (t.length() == 1 && (t[0] == missing || t[0] == gap))
02939 {
02940 v.push_back(DBL_MAX);
02941 scored.push_back(0);
02942 }
02943 else if (t.length() == 1 && t[0] == matchchar)
02944 {
02945 v.push_back(DBL_MAX);
02946 scored.push_back(2);
02947 }
02948 else if (!t.IsADouble())
02949 GenerateUnexpectedTokenNxsException(token, "a number");
02950 else
02951 {
02952 v.push_back(t.ConvertToDouble());
02953 scored.push_back(1);
02954 }
02955 }
02956 unsigned n_read = (unsigned)v.size();
02957 if (n_read < items.size())
02958 {
02959 errormsg.clear();
02960 errormsg << "For each cell of the MATRIX a value for each of the " << (unsigned)items.size() << " ITEMS listed in the FORMAT command is expected.\nOnly " << n_read << " values read.";
02961 GenerateNxsException(token);
02962 }
02963
02964
02965 if (charNum == UINT_MAX)
02966 return true;
02967
02968 if (charNum > row.size())
02969 GenerateNxsException(token, "Internal Error: character index out of range in continuousMatrix.");
02970
02971 ContinuousCharCell & cell = row[charNum];
02972 cell.clear();
02973
02974 std::vector<std::string >::const_iterator itemIt = items.begin();
02975 std::string key;
02976 unsigned curr_ind_in_v = 0;
02977 for (; itemIt != items.end(); ++itemIt, ++curr_ind_in_v)
02978 {
02979 key = *itemIt;
02980 if (scored[curr_ind_in_v] == 1)
02981 cell[key] = vector<double>(1, v[curr_ind_in_v]);
02982 else if (scored[curr_ind_in_v] == 0)
02983 cell[key] = vector<double>();
02984 else
02985 {
02986 if (taxNum == 0)
02987 GenerateNxsException(token, "MATCHCHAR cannot be used in the first taxon");
02988 const vector<double> & first_taxon_vector = continuousMatrix[0][charNum][key];
02989 if (first_taxon_vector.empty())
02990 GenerateNxsException(token, "First taxon does not have a value to copy, but a MATCHCHAR was found.");
02991 else
02992 cell[key] = vector<double>(1, first_taxon_vector[0]);
02993 }
02994 }
02995 unsigned curr_ind_mapped = 1;
02996 if (!key.empty() && curr_ind_in_v < n_read)
02997 {
02998 vector<double> & curr_cell_vector = cell[key];
02999 for (; curr_ind_in_v < n_read; ++curr_ind_in_v, ++curr_ind_mapped)
03000 {
03001 if (scored[curr_ind_in_v] == 1)
03002 curr_cell_vector.push_back(v[curr_ind_in_v]);
03003 else if (scored[curr_ind_in_v] != 0)
03004 curr_cell_vector.push_back(DBL_MAX);
03005 else
03006 {
03007 if (taxNum == 0)
03008 GenerateNxsException(token, "MATCHCHAR cannot be used in the first taxon");
03009 const vector<double> & first_taxon_vector = continuousMatrix[0][charNum][key];
03010 if (first_taxon_vector.size() < curr_ind_mapped+1)
03011 GenerateNxsException(token, "First taxon does not have a value to copy, but a MATCHCHAR was found.");
03012 else
03013 curr_cell_vector.push_back(first_taxon_vector[curr_ind_mapped]);
03014 }
03015 }
03016 }
03017 return true;
03018 }
03019
03020 NxsDiscreteStateCell NxsDiscreteDatatypeMapper::StateCodeForNexusChar(
03021 const char currChar,
03022 NxsToken &token,
03023 unsigned taxNum,
03024 unsigned charNum,
03025 const NxsDiscreteStateRow * firstTaxonRow,
03026 const NxsString & nameStr) const
03027 {
03028 NxsDiscreteStateCell currState = cLookup[static_cast<int>(currChar)];
03029 if (currState == NXS_INVALID_STATE_CODE)
03030 {
03031 NxsString emsg;
03032 if (currChar == matchChar)
03033 {
03034 if (firstTaxonRow == NULL)
03035 GenerateNxsExceptionMatrixReading("Unexpected use of MatchChar in first taxon with data.", taxNum, charNum, token, nameStr);
03036 if (firstTaxonRow->size() <= charNum)
03037 {
03038 emsg << "MatchChar found for character number " << charNum+1 << " but the first taxon does not have a character state stored for this character.";
03039 GenerateNxsExceptionMatrixReading(emsg, taxNum, charNum, token, nameStr);
03040 }
03041 currState = (*firstTaxonRow)[charNum];
03042 }
03043 else
03044 {
03045 emsg << "Invalid state specified \"" << token.GetToken() << "\"";
03046 GenerateNxsExceptionMatrixReading(emsg, taxNum, charNum, token, nameStr);
03047 }
03048 }
03049 return currState;
03050 }
03051
03052 bool NxsCharactersBlock::HandleNextDiscreteState(
03053 NxsToken &token,
03054 unsigned taxNum,
03055 unsigned charNum,
03056 NxsDiscreteStateRow & row,
03057 NxsDiscreteDatatypeMapper &mapper,
03058 const NxsDiscreteStateRow * firstTaxonRow,
03059 const NxsString & nameStr)
03060 {
03061 if (interleaving)
03062 token.SetLabileFlagBit(NxsToken::newlineIsToken);
03063 NCL_ASSERT(!tokens);
03064 token.SetLabileFlagBit(NxsToken::parentheticalToken);
03065 token.SetLabileFlagBit(NxsToken::curlyBracketedToken);
03066 token.SetLabileFlagBit(NxsToken::singleCharacterToken);
03067
03068 token.GetNextToken();
03069
03070 if (interleaving && token.AtEOL())
03071 return false;
03072 const NxsString &stateAsNexus = token.GetTokenReference();
03073 NxsDiscreteStateCell sc = mapper.EncodeNexusStateString(stateAsNexus, token, taxNum, charNum, firstTaxonRow, nameStr);
03074 NCL_ASSERT(charNum < row.size());
03075 row[charNum] = sc;
03076 return true;
03077 }
03078
03079 NxsDiscreteStateCell NxsDiscreteDatatypeMapper::StateCodeForNexusPossibleMultiStateSet(
03080 const char nexusSymbol,
03081 const std::string &stateAsNexus,
03082 NxsToken & token,
03083 const unsigned taxNum,
03084 const unsigned charNum,
03085 const NxsDiscreteStateRow * firstTaxonRow, const NxsString &nameStr)
03086 {
03087 NCL_ASSERT(stateAsNexus.length() > 0);
03088 const char firstChar = stateAsNexus[0];
03089 if (firstChar == '(' || firstChar == '{')
03090 return StateCodeForNexusMultiStateSet(nexusSymbol, stateAsNexus, token, taxNum, charNum, firstTaxonRow, nameStr);
03091 if (stateAsNexus.length() > 1)
03092 {
03093 NxsString emsg;
03094 emsg << "Expecting {} or () around a multiple character state set. Found " << stateAsNexus << " for taxon " << nameStr;
03095 GenerateNxsExceptionMatrixReading(emsg, taxNum, charNum, token, nameStr);
03096 }
03097
03098 NxsDiscreteStateCell currState = StateCodeForNexusChar(firstChar, token, taxNum, charNum, firstTaxonRow, nameStr);
03099 cLookup[(int) nexusSymbol] = currState;
03100 return currState;
03101 }
03102
03103 NxsDiscreteStateCell NxsDiscreteDatatypeMapper::StateCodeForNexusMultiStateSet(
03104 const char nexusSymbol,
03105 const std::string &stateAsNexus,
03106 NxsToken & token,
03107 const unsigned taxNum,
03108 const unsigned charNum,
03109 const NxsDiscreteStateRow * firstTaxonRow,
03110 const NxsString &nameStr)
03111 {
03112 const char firstChar = stateAsNexus[0];
03113 NxsString emsg;
03114 const bool poly = (firstChar == '(');
03115 if ((!poly) && firstChar != '{')
03116 {
03117 emsg << "Expecting a state symbol of set of symbols in () or {} braces. Found " << stateAsNexus;
03118 GenerateNxsExceptionMatrixReading(emsg, taxNum, charNum, token, nameStr);
03119 }
03120 bool tildeFound = false;
03121 NxsDiscreteStateCell prevState = NXS_INVALID_STATE_CODE;
03122 char prevChar = firstChar;
03123 std::string::const_iterator cIt = stateAsNexus.begin();
03124 std::string::const_iterator endIt = stateAsNexus.end();
03125 --endIt;
03126 NCL_ASSERT((poly && *endIt == ')') || (!poly && *endIt == '}'));
03127 std::set<NxsDiscreteStateCell> sset;
03128 for (++cIt; cIt != endIt; ++cIt)
03129 {
03130 const char currChar = *cIt;
03131 if ((strchr("\n\r \t", currChar) == NULL) && currChar != ',')
03132 {
03133 if (currChar == '~')
03134 {
03135 if (prevState < 0 || prevState >= (NxsDiscreteStateCell)nStates)
03136 {
03137 emsg << "A state range cannot start with " << prevChar;
03138 GenerateNxsExceptionMatrixReading(emsg, taxNum, charNum, token, nameStr);
03139 }
03140 tildeFound = true;
03141 }
03142 else
03143 {
03144
03145 NxsDiscreteStateCell currState;
03146 if (tildeFound)
03147 {
03148 currState = PositionInSymbols(currChar);
03149 if (currState == NXS_INVALID_STATE_CODE)
03150 {
03151 emsg << "A state range cannot end with " << currChar;
03152 GenerateNxsExceptionMatrixReading(emsg, taxNum, charNum, token, nameStr);
03153 }
03154 if (currState < prevState)
03155 {
03156 emsg << prevChar << '~' << currChar << " is not a valid state range (the end state is a lower index than the start)";
03157 GenerateNxsExceptionMatrixReading(emsg, taxNum, charNum, token, nameStr);
03158 }
03159 for (NxsDiscreteStateCell i = prevState; i <= currState; ++i)
03160 sset.insert(i);
03161 tildeFound = false;
03162 }
03163 else
03164 {
03165 currState = StateCodeForNexusChar(currChar, token, taxNum, charNum, firstTaxonRow, nameStr);
03166 sset.insert(currState);
03167 }
03168 prevState = currState;
03169 prevChar = currChar;
03170 }
03171 }
03172 }
03173 if (prevChar == '~')
03174 {
03175 emsg << "State range not terminated -- ending in ~" << *endIt;
03176 GenerateNxsExceptionMatrixReading(emsg, taxNum, charNum, token, nameStr);
03177 }
03178 if (sset.empty())
03179 {
03180 emsg << "An illegal (empty) state range was found \"" << stateAsNexus << '\"';
03181 GenerateNxsExceptionMatrixReading(emsg, taxNum, charNum, token, nameStr);
03182 }
03183 return StateCodeForStateSet(sset, poly, true, nexusSymbol);
03184 }
03185
03186
03187
03194 bool NxsCharactersBlock::HandleNextTokenState(
03195 NxsToken &token,
03196 unsigned taxNum,
03197 unsigned charNum,
03198 NxsDiscreteStateRow & row,
03199 NxsDiscreteDatatypeMapper &mapper,
03200 const NxsDiscreteStateRow * firstTaxonRow,
03201 const NxsString & nameStr)
03202 {
03203 if (interleaving)
03204 token.SetLabileFlagBit(NxsToken::newlineIsToken);
03205 token.GetNextToken();
03206 if (interleaving && token.AtEOL())
03207 return false;
03208 if (token.GetTokenLength() == 0)
03209 GenerateNxsException(token, "Unexpected empty token encountered");
03210
03211 int polymorphism = token.Equals("(");
03212 int uncertainty = token.Equals("{");
03213 if (!uncertainty && !polymorphism)
03214 {
03215 row[charNum] = HandleTokenState(token, taxNum, charNum, mapper, firstTaxonRow, nameStr);
03216 return true;
03217 }
03218
03219
03220
03221 errormsg = "Currently polymorphism and ambiguity are not supported for matrices in TOKENS mode: ";
03222 errormsg << token.GetToken() << " found while reading character " << charNum + 1 << " of taxon \"" << nameStr << '\"';
03223 throw NxsException(errormsg, token);
03224
03225 bool tildeFound = false;
03226 NxsDiscreteStateCell prevState = NXS_INVALID_STATE_CODE;
03227 std::string prevToken = token.GetToken();
03228 std::set<NxsDiscreteStateCell> sset;
03229 for (;;)
03230 {
03231
03232
03233
03234 token.SetLabileFlagBit(NxsToken::tildeIsPunctuation);
03235 token.GetNextToken();
03236
03237 if (token.Equals(","))
03238 {
03239 ;
03240 }
03241 if (polymorphism)
03242 {
03243 if (token.Equals(")"))
03244 {
03245 if (tildeFound)
03246 mapper.GenerateNxsExceptionMatrixReading("Range of states still being specified when ')' encountered", taxNum, charNum, token, nameStr);
03247 break;
03248 }
03249 if (token.Equals("{"))
03250 mapper.GenerateNxsExceptionMatrixReading("Illegal range of states '{' found inside '()'", taxNum, charNum, token, nameStr);
03251
03252 }
03253 else if (uncertainty)
03254 {
03255 if (token.Equals("}"))
03256 {
03257 if (tildeFound)
03258 mapper.GenerateNxsExceptionMatrixReading("Range of states still being specified when '}' encountered", taxNum, charNum, token, nameStr);
03259 break;
03260 }
03261 if (token.Equals("("))
03262 mapper.GenerateNxsExceptionMatrixReading("Illegal range of states '(' found inside '{}'", taxNum, charNum, token, nameStr);
03263 }
03264 else if (token.Equals("~"))
03265 {
03266 if (prevState < 0 || prevState >= (NxsDiscreteStateCell)symbols.length())
03267 {
03268 errormsg.clear();
03269 errormsg << "A state range cannot start with " << prevToken;
03270 mapper.GenerateNxsExceptionMatrixReading(errormsg, taxNum, charNum, token, nameStr);
03271 }
03272 tildeFound = true;
03273 }
03274 else
03275 {
03276 NxsDiscreteStateCell currState;
03277 if (tildeFound)
03278 {
03279 currState = HandleTokenState(token, taxNum, charNum, mapper, firstTaxonRow, nameStr);
03280 if (currState <= prevState)
03281 {
03282 errormsg = "Last state in specified range (";
03283 errormsg << token.GetToken() << ") must be greater than the first";
03284 mapper.GenerateNxsExceptionMatrixReading(errormsg, taxNum, charNum, token, nameStr);
03285 }
03286 for (NxsDiscreteStateCell i = prevState; i <= currState; ++i)
03287 sset.insert(i);
03288 tildeFound = false;
03289 }
03290 else
03291 {
03292
03293
03294
03295
03296 currState = HandleTokenState(token, taxNum, charNum, mapper, firstTaxonRow, nameStr);
03297 sset.insert(currState);
03298 }
03299 prevState = currState;
03300 prevToken = token.GetToken();
03301 }
03302 }
03303
03304 if (prevToken == "~")
03305 {
03306 errormsg.clear();
03307 errormsg << "State range not terminated -- ending in ~" << token.GetToken();
03308 mapper.GenerateNxsExceptionMatrixReading(errormsg, taxNum, charNum, token, nameStr);
03309 }
03310 if (sset.empty())
03311 {
03312 errormsg.clear();
03313 errormsg << "An illegal (empty) state range -- either {} or ()";
03314 mapper.GenerateNxsExceptionMatrixReading(errormsg, taxNum, charNum, token, nameStr);
03315 }
03316 row[charNum] = mapper.StateCodeForStateSet(sset, (const bool)(polymorphism != 0), true, '\0');
03317 return true;
03318 }
03319
03320 NxsDiscreteStateCell NxsCharactersBlock::HandleTokenState(
03321 NxsToken &token,
03322 unsigned taxNum,
03323 unsigned charNum,
03324 NxsDiscreteDatatypeMapper &,
03325 const NxsDiscreteStateRow * ,
03326 const NxsString & nameStr)
03327 {
03328
03329 const std::string t = token.GetToken(respectingCase);
03330 NxsStringVectorMap::const_iterator bagIter = charStates.find(charNum);
03331
03332
03333
03334
03335 NxsStringVector::const_iterator ci_begin = bagIter->second.begin();
03336 NxsStringVector::const_iterator ci_end = bagIter->second.end();
03337 NxsStringVector::const_iterator cit;
03338 NxsDiscreteStateCell k = 0;
03339 for (; ci_begin != ci_end; ++ci_begin, ++k)
03340 {
03341 if (respectingCase)
03342 {
03343 if (*ci_begin == t)
03344 return k;
03345 }
03346 else
03347 {
03348 if (NxsString::case_insensitive_equals(t.c_str(), ci_begin->c_str()))
03349 return k;
03350 }
03351 }
03352
03353 errormsg = "Unrecognized state ";
03354 errormsg << t << " found while reading character " << charNum + 1 << " of taxon number " << taxNum + 1;
03355 if (!nameStr.empty())
03356 errormsg << "(name \"" << nameStr << "\")";
03357 throw NxsException(errormsg, token);
03358 }
03359
03360
03361 unsigned NxsCharactersBlock::GetMaxIndex() const
03362 {
03363 unsigned nct = GetNCharTotal();
03364 if (nct == 0)
03365 return UINT_MAX;
03366 return nct - 1;
03367 }
03368
03373 unsigned NxsCharactersBlock::GetIndicesForLabel(const std::string &label, NxsUnsignedSet *inds) const
03374 {
03375 NxsString emsg;
03376 const unsigned numb = CharLabelToNumber(label);
03377 if (numb != 0)
03378 {
03379 if (inds)
03380 inds->insert(numb - 1);
03381 return 1;
03382 }
03383 if (!defCodonPosPartitionName.empty())
03384 {
03385 std::string t(label);
03386 NxsString::to_upper(t);
03387 std::string n;
03388 if (t == "POS1")
03389 n.assign("1");
03390 else if (t == "POS2")
03391 n.assign("2");
03392 else if (t == "POS3")
03393 n.assign("3");
03394 else if (t == "NONCODING")
03395 n.assign("N");
03396 if (!n.empty())
03397 {
03398 NxsPartitionsByName::const_iterator pit = codonPosPartitions.find(defCodonPosPartitionName);
03399 if (pit != codonPosPartitions.end())
03400 {
03401 const NxsPartition & p = pit->second;
03402 for (NxsPartition::const_iterator s = p.begin(); s != p.end(); ++s)
03403 {
03404 if (NxsString::case_insensitive_equals(n.c_str(), s->first.c_str()))
03405 {
03406 unsigned nel = (unsigned)s->second.size();
03407 if (inds)
03408 inds->insert(s->second.begin(), s->second.end());
03409 return nel;
03410 }
03411 }
03412 }
03413 }
03414 }
03415 if (NxsString::case_insensitive_equals(label.c_str(), "CONSTANT"))
03416 {
03417 NxsUnsignedSet c;
03418 FindConstantCharacters(c);
03419 if (inds)
03420 inds->insert(c.begin(), c.end());
03421 return (unsigned)c.size();
03422 }
03423 if (NxsString::case_insensitive_equals(label.c_str(), "GAPPED"))
03424 {
03425 NxsUnsignedSet c;
03426 FindGappedCharacters(c);
03427 if (inds)
03428 inds->insert(c.begin(), c.end());
03429 return (unsigned)c.size();
03430 }
03431 return GetIndicesFromSetOrAsNumber(label, inds, charSets, GetMaxIndex(), "character");
03432 }
03433
03437 bool NxsCharactersBlock::AddNewIndexSet(const std::string &label, const NxsUnsignedSet & inds)
03438 {
03439 NxsString ls(label.c_str());
03440 bool replaced = charSets.count(ls) > 0;
03441 charSets[ls] = inds;
03442 return replaced;
03443 }
03444
03448 bool NxsCharactersBlock::AddNewPartition(const std::string &label, const NxsPartition & inds)
03449 {
03450 NxsString ls(label.c_str());
03451 ls.ToUpper();
03452 bool replaced = charPartitions.count(ls) > 0;
03453 charPartitions[ls] = inds;
03454 return replaced;
03455 }
03456
03460 bool NxsCharactersBlock::AddNewCodonPosPartition(const std::string &label, const NxsPartition & inds, bool isDef)
03461 {
03462 NxsString ls(label.c_str());
03463 ls.ToUpper();
03464 bool replaced = codonPosPartitions.count(ls) > 0;
03465 codonPosPartitions[ls] = inds;
03466 if (isDef)
03467 defCodonPosPartitionName = ls;
03468 return replaced;
03469 }
03470
03474 bool NxsCharactersBlock::AddNewExSet(const std::string &label, const NxsUnsignedSet & inds)
03475 {
03476 NxsString ls(label.c_str());
03477 bool replaced = exSets.count(ls) > 0;
03478 exSets[ls] = inds;
03479 return replaced;
03480 }
03481
03485 NxsCharactersBlock::NxsCharactersBlock(
03486 NxsTaxaBlockAPI *tb,
03487 NxsAssumptionsBlockAPI *ab)
03488 :NxsTaxaBlockSurrogate(tb, NULL)
03489 {
03490 assumptionsBlock = ab;
03491 id = "CHARACTERS";
03492 supportMixedDatatype = false;
03493 convertAugmentedToMixed = false;
03494 allowAugmentingOfSequenceSymbols = false;
03495 writeInterleaveLen = -1;
03496 Reset();
03497 }
03501 unsigned NxsCharactersBlock::ApplyExset(
03502 NxsUnsignedSet &exset)
03503 {
03504 excluded.clear();
03505 set_union(eliminated.begin(), eliminated.end(), exset.begin(), exset.end(), inserter(excluded, excluded.begin()));
03506 return (unsigned) excluded.size();
03507 }
03508
03512 unsigned NxsCharactersBlock::ApplyIncludeset(
03513 NxsUnsignedSet &inset)
03514 {
03515 NxsUnsignedSet inc(inset);
03516 inc.erase(eliminated.begin(), eliminated.end());
03517 excluded.erase(inc.begin(), inc.end());
03518 return nChar - (unsigned) excluded.size();
03519 }
03520
03525 unsigned NxsCharactersBlock::CharLabelToNumber(
03526 const std::string &inp) const
03527 {
03528 NxsString s(inp.c_str());
03529 s.ToUpper();
03530 std::map<std::string, unsigned>::const_iterator ltindIt = ucCharLabelToIndex.find(s);
03531 if (ltindIt == ucCharLabelToIndex.end())
03532 return 0;
03533 return 1 + ltindIt->second;
03534 }
03535
03543 void NxsCharactersBlock::Consume(
03544 NxsCharactersBlock &other)
03545 {
03546 if (assumptionsBlock)
03547 assumptionsBlock->SetCallback(NULL);
03548 assumptionsBlock = other.assumptionsBlock;
03549 other.assumptionsBlock = NULL;
03550 if (assumptionsBlock)
03551 assumptionsBlock->SetCallback(this);
03552
03553 nChar = other.nChar;
03554 nTaxWithData = other.nTaxWithData;
03555 matchchar = other.matchchar;
03556 respectingCase = other.respectingCase;
03557 transposing = other.transposing;
03558 interleaving = other.interleaving;
03559 tokens = other.tokens;
03560 labels = other.labels;
03561 missing = other.missing;
03562 gap = other.gap;
03563 gapMode = other.gapMode;
03564 symbols = other.symbols;
03565 userEquates = other.userEquates;
03566 defaultEquates = other.defaultEquates;
03567 discreteMatrix = other.discreteMatrix;
03568 continuousMatrix = other.continuousMatrix;
03569 eliminated = other.eliminated;
03570 excluded = other.excluded;
03571 ucCharLabelToIndex = other.ucCharLabelToIndex;
03572 indToCharLabel = other.indToCharLabel;
03573 charStates = other.charStates;
03574 globalStateLabels = other.globalStateLabels;
03575 items = other.items;
03576 charSets = other.charSets;
03577 charPartitions = other.charPartitions;
03578 exSets = other.exSets;
03579 datatype = other.datatype;
03580 originalDatatype = other.originalDatatype;
03581 datatypeReadFromFormat = other.datatypeReadFromFormat;
03582 statesFormat = other.statesFormat;
03583 datatypeMapperVec = other.datatypeMapperVec;
03584 isEmpty = false;
03585 isUserSupplied = other.isUserSupplied;
03586 supportMixedDatatype = other.supportMixedDatatype;
03587 convertAugmentedToMixed = other.convertAugmentedToMixed;
03588 allowAugmentingOfSequenceSymbols = other.allowAugmentingOfSequenceSymbols;
03589 writeInterleaveLen = other.writeInterleaveLen;
03590 other.Reset();
03591 transfMgr.Reset();
03592 }
03593
03594
03595 void NxsCharactersBlock::WriteStatesForTaxonAsNexus(
03596 std::ostream &out,
03597 unsigned taxNum,
03598 unsigned beginCharInd,
03599 unsigned endCharInd) const {
03600 NCL_ASSERT(endCharInd <= this->nChar);
03601
03602 if (datatype == continuous)
03603 {
03604 const ContinuousCharRow & row = GetContinuousMatrixRow(taxNum);
03605 if (!row.empty())
03606 {
03607 NCL_ASSERT(endCharInd <= row.size());
03608 for (unsigned charInd = beginCharInd; charInd < endCharInd; ++charInd)
03609 {
03610 out << ' ';
03611 ShowStateLabels(out, taxNum, charInd, UINT_MAX);
03612 }
03613 }
03614 }
03615 else
03616 {
03617 const NxsDiscreteStateRow & row = GetDiscreteMatrixRow(taxNum);
03618 const unsigned rs = (const unsigned)row.size();
03619 NCL_ASSERT(endCharInd <= rs);
03620 if (rs > 0)
03621 {
03622 if (this->datatype == NxsCharactersBlock::codon)
03623 {
03624 for (unsigned charInd = beginCharInd; charInd < endCharInd; ++charInd)
03625 {
03626 NxsDiscreteStateCell sc = row[charInd];
03627 if (sc == NXS_GAP_STATE_CODE)
03628 out << gap << gap << gap;
03629 else if (sc >= 0 && sc < (NxsDiscreteStateCell) globalStateLabels.size())
03630 out << globalStateLabels[sc];
03631 else
03632 out << missing << missing << missing;
03633 }
03634 }
03635 else
03636 {
03637 const NxsDiscreteDatatypeMapper * dm = GetDatatypeMapperForChar(0);
03638 if (dm == NULL)
03639 throw NxsNCLAPIException("No DatatypeMapper in WriteStatesForTaxonAsNexus");
03640 if (IsMixedType())
03641 {
03642
03643 for (unsigned charInd = beginCharInd; charInd < endCharInd; ++charInd)
03644 {
03645 dm = GetDatatypeMapperForChar(charInd);
03646 if (dm == NULL)
03647 {
03648 errormsg = "No DatatypeMapper for character ";
03649 errormsg << charInd + 1 << " in WriteStatesForTaxonAsNexus";
03650 throw NxsNCLAPIException(errormsg);
03651 }
03652 const NxsDiscreteStateCell c = row.at(charInd);
03653 dm->WriteStateCodeAsNexusString(out, c);
03654 }
03655 }
03656 else
03657 {
03658 if (tokens)
03659 {
03660 for (unsigned charInd = beginCharInd; charInd < endCharInd; ++charInd)
03661 {
03662 NxsDiscreteStateCell sc = row[charInd];
03663 out << ' ';
03664 if (sc == NXS_GAP_STATE_CODE)
03665 out << gap;
03666 else
03667 {
03668 NxsString sl = GetStateLabel(charInd, sc);
03669 if (sl == " ")
03670 {
03671 errormsg = "Writing character state ";
03672 errormsg << 1 + sc << " for character " << 1+charInd << ", but no appropriate chararcter label or symbol was found.";
03673 throw NxsNCLAPIException(errormsg);
03674 }
03675 else
03676 out << NxsString::GetEscaped(sl);
03677 }
03678 }
03679 }
03680 else
03681 {
03682 std::vector<NxsDiscreteStateCell>::const_iterator endIt = row.begin() + beginCharInd;
03683 std::vector<NxsDiscreteStateCell>::const_iterator begIt = endIt;
03684 if (endCharInd == row.size())
03685 endIt = row.end();
03686 else
03687 endIt += endCharInd - beginCharInd;
03688 dm->WriteStateCodeRowAsNexus(out, begIt, endIt);
03689 }
03690 }
03691 }
03692 }
03693 }
03694 }
03695
03696
03702 void NxsCharactersBlock::DebugShowMatrix(
03703 std::ostream &out,
03704 bool ,
03705 const char *marginText) const
03706 {
03707 if (!taxa)
03708 return;
03709 const unsigned width = taxa->GetMaxTaxonLabelLength();
03710 const unsigned ntt = GetNTaxTotal();
03711 for (unsigned i = 0; i < ntt; i++)
03712 {
03713 bool skip = true;
03714 if (datatype == continuous)
03715 {
03716 const ContinuousCharRow & row = GetContinuousMatrixRow(i);
03717 skip = row.empty();
03718 }
03719 else
03720 {
03721 const NxsDiscreteStateRow & row = GetDiscreteMatrixRow(i);
03722 skip = row.empty();
03723 }
03724 if (!skip)
03725 {
03726 if (marginText != NULL)
03727 out << marginText;
03728 const NxsString currTaxonLabel = taxa->GetTaxonLabel(i);
03729 out << currTaxonLabel;
03730 unsigned currTaxonLabelLen = (unsigned)currTaxonLabel.size();
03731 unsigned diff = width - currTaxonLabelLen;
03732 std::string spacer(diff+5, ' ');
03733 out << spacer;
03734 WriteStatesForTaxonAsNexus(out, i, 0, nChar);
03735 out << endl;
03736 }
03737 }
03738 }
03739
03740 unsigned NxsCharactersBlock::GetMaxObsNumStates(bool countMissingStates, bool onlyActiveChars) NCL_COULD_BE_CONST
03741 {
03742 unsigned maxN = 1;
03743 for (unsigned j = 0; j < nChar; j++)
03744 {
03745 if (!onlyActiveChars || IsActiveChar(j))
03746 maxN = std::max(maxN, GetObsNumStates(j, countMissingStates));
03747 }
03748 return maxN;
03749 }
03750
03754 unsigned NxsCharactersBlock::GetNumActiveChar() NCL_COULD_BE_CONST
03755 {
03756 unsigned num_active_char = 0;
03757 for (unsigned i = 0; i < nChar; i++)
03758 {
03759 if (IsActiveChar(i))
03760 num_active_char++;
03761 }
03762 return num_active_char;
03763 }
03764
03765
03766
03767
03768
03769
03770 NxsString NxsCharactersBlock::GetStateLabelImpl(
03771 unsigned i,
03772 unsigned j) const
03773 {
03774 NxsString s = " ";
03775 NxsStringVectorMap::const_iterator cib = charStates.find(i);
03776 if (cib != charStates.end() && j < static_cast<unsigned>(cib->second.size()))
03777 return cib->second[j];
03778 if (!globalStateLabels.empty() && (j < globalStateLabels.size()))
03779 return globalStateLabels[j];
03780 return s;
03781 }
03782
03783
03788 bool NxsCharactersBlock::IsInSymbols(
03789 char ch) NCL_COULD_BE_CONST
03790 {
03791 char char_in_question = (respectingCase ? ch : (char)toupper(ch));
03792 for (std::string::const_iterator sIt = symbols.begin(); sIt != symbols.end(); ++sIt)
03793 {
03794 const char char_in_symbols = (respectingCase ? *sIt : (char)toupper(*sIt));
03795 if (char_in_symbols == char_in_question)
03796 return true;
03797 }
03798 return false;
03799 }
03800
03806 void NxsCharactersBlock::HandleCharlabels(
03807 NxsToken &token)
03808 {
03809 ucCharLabelToIndex.clear();
03810 indToCharLabel.clear();
03811 unsigned ind = 0;
03812 for (;;)
03813 {
03814 token.GetNextToken();
03815 if (token.Equals(";"))
03816 break;
03817 else
03818 {
03819 if (ind >= nChar)
03820 GenerateNxsException(token, "Number of character labels exceeds NCHAR specified in DIMENSIONS command");
03821 NxsString t = token.GetToken();
03822 if (t != " ")
03823 {
03824 indToCharLabel[ind] = t;
03825 t.ToUpper();
03826 ucCharLabelToIndex[t] = ind;
03827 }
03828 ind++;
03829 }
03830 }
03831 }
03832
03840 void NxsCharactersBlock::HandleCharstatelabels(
03841 NxsToken &token)
03842 {
03843 unsigned currChar = 0;
03844 bool semicolonFoundInInnerLoop = false;
03845 bool tokenAlreadyRead = false;
03846 bool save = true;
03847
03848 charStates.clear();
03849 ucCharLabelToIndex.clear();
03850 indToCharLabel.clear();
03851
03852 for (;;)
03853 {
03854 save = true;
03855
03856 if (semicolonFoundInInnerLoop)
03857 break;
03858
03859 if (tokenAlreadyRead)
03860 tokenAlreadyRead = false;
03861 else
03862 token.GetNextToken();
03863
03864 if (token.Equals(";"))
03865 break;
03866
03867
03868
03869 int sn = atoi(token.GetToken().c_str());
03870 unsigned n = (unsigned)sn;
03871 if (sn < 1 || n > nChar || n <= currChar)
03872 {
03873 errormsg = "Invalid character number (";
03874 errormsg += token.GetToken();
03875 errormsg += ") found in CHARSTATELABELS command (either out of range or not interpretable as an integer)";
03876 throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
03877 }
03878
03879 currChar = n;
03880
03881 token.GetNextToken();
03882
03883
03884
03885
03886 if (save)
03887 {
03888 NxsString t = token.GetToken();
03889 if (t != " " && !token.Equals("/"))
03890 {
03891 indToCharLabel[currChar - 1] = t;
03892 t.ToUpper();
03893 ucCharLabelToIndex[t] = currChar - 1;
03894 }
03895 }
03896 if (!token.Equals("/"))
03897 token.GetNextToken();
03898
03899
03900
03901
03902
03903
03904
03905 if (!token.Equals("/"))
03906 {
03907 if (!token.Equals(",") && !token.Equals(";"))
03908 {
03909 errormsg = "Expecting a comma or semicolon here, but found \"";
03910 errormsg += token.GetToken();
03911 errormsg += "\" instead";
03912 throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
03913 }
03914 if (token.Equals(","))
03915 token.GetNextToken();
03916 tokenAlreadyRead = true;
03917 continue;
03918 }
03919
03920
03921
03922 for (;;)
03923 {
03924 token.GetNextToken();
03925
03926 if (token.Equals(";"))
03927 {
03928 semicolonFoundInInnerLoop = true;
03929 break;
03930 }
03931
03932 if (token.Equals(","))
03933 break;
03934
03935 if (save)
03936 {
03937 if (datatype == continuous)
03938 GenerateNxsException(token, "State Labels cannot be specified when the datatype is continuous");
03939
03940
03941 NxsString cslabel = token.GetToken();
03942 charStates[n - 1].push_back(cslabel);
03943 }
03944
03945 }
03946 }
03947 }
03948
03956 void NxsCharactersBlock::HandleDimensions(
03957 NxsToken &token,
03958 NxsString newtaxaLabel,
03959 NxsString ntaxLabel,
03960 NxsString ncharLabel)
03961 {
03962 nChar = 0;
03963 unsigned ntaxRead = 0;
03964 for (;;)
03965 {
03966 token.GetNextToken();
03967 if (token.Equals(newtaxaLabel))
03968 newtaxa = true;
03969 else if (token.Equals(ntaxLabel))
03970 {
03971 DemandEquals(token, "after NTAX in DIMENSIONS command");
03972 ntaxRead = DemandPositiveInt(token, ntaxLabel.c_str());
03973 }
03974 else if (token.Equals(ncharLabel))
03975 {
03976 DemandEquals(token, "in DIMENSIONS command");
03977 nChar = DemandPositiveInt(token, ncharLabel.c_str());
03978 }
03979 else if (token.Equals(";"))
03980 break;
03981 }
03982
03983 if (nChar == 0)
03984 {
03985 errormsg = "DIMENSIONS command must have an NCHAR subcommand .";
03986 throw NxsException(errormsg, token);
03987 }
03988 if (newtaxa)
03989 {
03990 if (ntaxRead == 0)
03991 {
03992 errormsg = "DIMENSIONS command must have an NTAX subcommand when the NEWTAXA option is in effect.";
03993 throw NxsException(errormsg, token);
03994 }
03995 AssureTaxaBlock(createImpliedBlock, token, "Dimensions");
03996 if (!createImpliedBlock)
03997 {
03998 taxa->Reset();
03999 if (nexusReader)
04000 nexusReader->RemoveBlockFromUsedBlockList(taxa);
04001 }
04002 taxa->SetNtax(ntaxRead);
04003 nTaxWithData = ntaxRead;
04004 }
04005 else
04006 {
04007 AssureTaxaBlock(false, token, "Dimensions");
04008 const unsigned ntaxinblock = taxa->GetNTax();
04009 if (ntaxinblock == 0)
04010 {
04011 errormsg = "A TAXA block must be read before character data, or the DIMENSIONS command must use the NEWTAXA.";
04012 throw NxsException(errormsg, token);
04013 }
04014
04015 if (ntaxinblock < ntaxRead)
04016 {
04017 errormsg = ntaxLabel;
04018 errormsg += " in ";
04019 errormsg += id;
04020 errormsg += " block must be less than or equal to NTAX in TAXA block\nNote: one circumstance that can cause this error is \nforgetting to specify ";
04021 errormsg += ntaxLabel;
04022 errormsg += " in DIMENSIONS command when \na TAXA block has not been provided";
04023 throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
04024 }
04025 nTaxWithData = (ntaxRead == 0 ? ntaxinblock : ntaxRead);
04026 }
04027 }
04028
04039 void NxsCharactersBlock::HandleEliminate(
04040 NxsToken &token)
04041 {
04042 if (!eliminated.empty() && nexusReader)
04043 nexusReader->NexusWarnToken("Only one ELIMINATE command should be used in a CHARACTERS or DATA block (it must appear before the MATRIX command).\n New character eliminations will be added to the previous eliminated characters (the previously eliminated characters will continue to be excluded).", NxsReader::UNCOMMON_SYNTAX_WARNING, token);
04044 token.GetNextToken();
04045 NxsSetReader::ReadSetDefinition(token, *this, "Character", "Eliminate", &eliminated);
04046 NCL_ASSERT(eliminated.size() <= nChar);
04047 for (NxsUnsignedSet::const_iterator elIt = eliminated.begin(); elIt != eliminated.end(); ++elIt)
04048 excluded.insert(*elIt);
04049 }
04050
04051
04052
04057 void NxsCharactersBlock::HandleStdMatrix(
04058 NxsToken &token)
04059 {
04060 NCL_ASSERT(taxa != NULL);
04061 unsigned indOfTaxInCommand;
04062 unsigned indOfTaxInMemory;
04063 unsigned currChar = 0;
04064 unsigned firstChar = 0;
04065 unsigned lastChar = nChar;
04066 unsigned nextFirst = 0;
04067 unsigned page = 0;
04068 const bool continuousData = (datatype == NxsCharactersBlock::continuous);
04069 const unsigned ntlabels = taxa->GetNumTaxonLabels();
04070 errormsg.clear();
04071 bool taxaBlockNeedsLabels = (ntlabels == 0);
04072 if (!taxaBlockNeedsLabels && ntlabels < nTaxWithData)
04073 {
04074 errormsg << "Not enough taxlabels are known to read characters for " << nTaxWithData << " taxa in the Matrix command.";
04075 throw NxsException(errormsg, token);
04076 }
04077 ContinuousCharRow emptyContRow;
04078 NxsDiscreteStateRow emptyDiscRow;
04079 ContinuousCharRow *contRowPtr = NULL;
04080 NxsDiscreteStateRow *discRowPtr = NULL;
04081 ContinuousCharRow *ftContRowPtr = NULL;
04082 NxsDiscreteStateRow *ftDiscRowPtr = NULL;
04083 const bool isContinuous = (datatype == NxsCharactersBlock::continuous);
04084 if (isContinuous)
04085 emptyContRow.resize(nChar);
04086 else
04087 emptyDiscRow.assign(nChar, NXS_INVALID_STATE_CODE);
04088 std::vector<unsigned> toInMem(nTaxWithData, UINT_MAX);
04089 std::vector<unsigned> nCharsRead(nTaxWithData, 0);
04090
04091 unsigned numSigInts = NxsReader::getNumSignalIntsCaught();
04092 const bool checkingSignals = NxsReader::getNCLCatchesSignals();
04093 const unsigned MAX_NUM_CHARS_BETWEEN_SIGNAL_CHECKS = 1000;
04094 for (; currChar < nChar; page++)
04095 {
04096 for (indOfTaxInCommand = 0; indOfTaxInCommand < nTaxWithData ; indOfTaxInCommand++)
04097 {
04098 unsigned numCharsSinceLastSignalCheck = 0;
04099 if (checkingSignals && NxsReader::getNumSignalIntsCaught() != numSigInts)
04100 {
04101 if (datatype == NxsCharactersBlock::continuous)
04102 continuousMatrix.clear();
04103 else
04104 discreteMatrix.clear();
04105 throw NxsSignalCanceledParseException("Reading Characters Block");
04106 }
04107 NxsString nameStr;
04108 if (labels)
04109 {
04110 token.GetNextToken();
04111 nameStr = token.GetToken();
04112 if (taxaBlockNeedsLabels)
04113 {
04114 if (taxa->IsAlreadyDefined(nameStr))
04115 {
04116 errormsg << "Data for this taxon (" << nameStr << ") has already been saved";
04117 throw NxsException(errormsg, token);
04118 }
04119 indOfTaxInMemory = taxa->AddTaxonLabel(nameStr);
04120 }
04121 else
04122 {
04123 unsigned numOfTaxInMemory = taxa->TaxLabelToNumber(nameStr);
04124 if (numOfTaxInMemory == 0)
04125 {
04126 if (token.Equals(";"))
04127 {
04128 if (currChar != nChar)
04129 errormsg << "Unexpected ; (after only " << currChar << " characters were read)";
04130 else
04131 errormsg << "Unexpected ; (after characters were read for only " << indOfTaxInCommand << "out of " << nTaxWithData << " taxa)";
04132 }
04133 else
04134 errormsg << "Could not find taxon named \"" << nameStr << "\" among stored taxon labels";
04135 if (currChar > 0)
04136 errormsg << "\n Expecting data for taxon \"" << taxa->GetTaxonLabel(toInMem[indOfTaxInCommand]) << "\"";
04137 throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
04138 }
04139 indOfTaxInMemory = numOfTaxInMemory - 1;
04140 }
04141 }
04142 else
04143 {
04144 indOfTaxInMemory = indOfTaxInCommand;
04145 nameStr << (indOfTaxInMemory + 1);
04146 }
04147 if (page == 0)
04148 {
04149 if (isContinuous)
04150 {
04151 NCL_ASSERT(indOfTaxInMemory < continuousMatrix.size());
04152 continuousMatrix[indOfTaxInMemory] = emptyContRow;
04153 }
04154 else
04155 {
04156 NCL_ASSERT(indOfTaxInMemory < discreteMatrix.size());
04157 discreteMatrix[indOfTaxInMemory] = emptyDiscRow;
04158 }
04159 if (toInMem[indOfTaxInCommand] != UINT_MAX)
04160 {
04161 errormsg << "Characters for taxon \"" << nameStr << "\" (number " << indOfTaxInMemory + 1 << "and \"" << taxa->GetTaxonLabel(indOfTaxInMemory) << "\" according to the taxa block) have already been stored";
04162 throw NxsException(errormsg, token);
04163 }
04164 toInMem[indOfTaxInCommand] = indOfTaxInMemory;
04165 }
04166 else
04167 {
04168 if (toInMem[indOfTaxInCommand] != indOfTaxInMemory)
04169 {
04170 errormsg << "Ordering of taxa must be identical to that in first interleave page. Taxon \"" << nameStr << "\" was not expected.";
04171 throw NxsException(errormsg, token);
04172 }
04173 }
04174
04175 if (firstChar > 0 && nCharsRead[indOfTaxInCommand] >= firstChar)
04176 {
04177 errormsg << "Data for this taxon (" << nameStr << ") have already been saved";
04178 throw NxsException(errormsg, token);
04179 }
04180 if (isContinuous)
04181 {
04182 contRowPtr = &continuousMatrix[indOfTaxInMemory];
04183 if (ftDiscRowPtr == NULL)
04184 ftContRowPtr = contRowPtr;
04185 }
04186 else
04187 {
04188 discRowPtr = &discreteMatrix[indOfTaxInMemory];
04189 if (ftDiscRowPtr == NULL)
04190 ftDiscRowPtr = discRowPtr;
04191 }
04192
04193
04194
04195
04196 bool atEOL = false;
04197 for (currChar = firstChar; currChar < lastChar; currChar++)
04198 {
04199 if (checkingSignals)
04200 {
04201 if (numCharsSinceLastSignalCheck >= MAX_NUM_CHARS_BETWEEN_SIGNAL_CHECKS)
04202 {
04203 if (NxsReader::getNumSignalIntsCaught() != numSigInts)
04204 {
04205 if (datatype == NxsCharactersBlock::continuous)
04206 continuousMatrix.clear();
04207 else
04208 discreteMatrix.clear();
04209 throw NxsSignalCanceledParseException("Reading Characters Block");
04210 }
04211 numCharsSinceLastSignalCheck = 0;
04212 }
04213 else
04214 numCharsSinceLastSignalCheck++;
04215 }
04216
04217 NxsDiscreteDatatypeMapper * currMapper = GetMutableDatatypeMapperForChar(currChar);
04218
04219 if (continuousData)
04220 atEOL = HandleNextContinuousState(token, indOfTaxInMemory, currChar, *contRowPtr, nameStr);
04221 else
04222 {
04223 NCL_ASSERT(currMapper);
04224 if (tokens)
04225 atEOL = HandleNextTokenState(token, indOfTaxInMemory, currChar, *discRowPtr, *currMapper, ftDiscRowPtr, nameStr);
04226 else
04227 atEOL = HandleNextDiscreteState(token, indOfTaxInMemory, currChar, *discRowPtr, *currMapper, ftDiscRowPtr, nameStr);
04228 }
04229 if (interleaving && !atEOL)
04230 {
04231 if (lastChar < nChar && currChar != lastChar)
04232 {
04233 errormsg << "Each line within an interleave page must comprise the same number of characters. Error reading taxon \"" << nameStr << '\"';
04234 throw NxsException(errormsg, token);
04235 }
04236
04237
04238 nextFirst = currChar;
04239
04240
04241
04242 lastChar = currChar;
04243 }
04244 }
04245 if (lastChar > 0)
04246 nCharsRead[indOfTaxInCommand] = lastChar - 1;
04247 if (lastChar < nChar && indOfTaxInCommand > 0)
04248 {
04249 token.SetLabileFlagBit(NxsToken::newlineIsToken);
04250 token.GetNextToken();
04251 if (!token.AtEOL())
04252 {
04253 errormsg << "Each line within an interleave page must comprise the same number of characters\n. Expecting the end of a line, but found " << token.GetToken() << " when reading data for taxon \"" << nameStr << '\"';
04254 throw NxsException(errormsg, token);
04255 }
04256 }
04257 else
04258 {
04259 const char nextch = token.PeekAtNextChar();
04260 if (indOfTaxInCommand > 0 && (!atEOL) && (strchr(";[\n\r \t", nextch) == NULL) && nexusReader)
04261 {
04262 errormsg << "Expecting a whitespace character at the end of the characters for taxon \""<< nameStr << "\" but found " << nextch;
04263 nexusReader->NexusWarnToken(errormsg, NxsReader::UNCOMMON_SYNTAX_WARNING, token);
04264 errormsg.clear();
04265 }
04266 }
04267 }
04268 firstChar = nextFirst;
04269 lastChar = nChar;
04270 taxaBlockNeedsLabels = false;
04271 }
04272 }
04273
04277 void NxsCharactersBlock::HandleTransposedMatrix(
04278 NxsToken &token)
04279 {
04280 NCL_ASSERT(taxa);
04281 unsigned currTaxon = 0;
04282 unsigned firstTaxon = 0;
04283 unsigned lastTaxon = nTaxWithData;
04284 unsigned nextFirst = 0;
04285 unsigned page = 0;
04286 const bool continuousData = (datatype == NxsCharactersBlock::continuous);
04287 unsigned indOfCharInCommand, indOfCharInMemory;
04288 const bool isContinuous = (datatype == NxsCharactersBlock::continuous);
04289
04290 if (isContinuous)
04291 {
04292 ContinuousCharRow emptyContRow(nChar);
04293 for (unsigned i = 0; i < nTaxWithData; ++ i)
04294 continuousMatrix[i] = emptyContRow;
04295 }
04296 else
04297 {
04298 NxsDiscreteStateRow emptyDiscRow(nChar, NXS_INVALID_STATE_CODE);
04299 for (unsigned i = 0; i < nTaxWithData; ++ i)
04300 discreteMatrix[i] = emptyDiscRow;
04301 }
04302 vector<unsigned> toInMem(nChar, UINT_MAX);
04303 vector<unsigned> nTaxRead(nChar, 0);
04304 bool needsCharLabels = indToCharLabel.empty();
04305 for (;; page++)
04306 {
04307 for (indOfCharInCommand = 0; indOfCharInCommand < nChar; indOfCharInCommand++)
04308 {
04309 NxsString rawToken;
04310 if (labels)
04311 {
04312 token.GetNextToken();
04313 if (needsCharLabels)
04314 {
04315 rawToken = token.GetToken();
04316 NxsString s = rawToken;
04317 s.ToUpper();
04318 if (ucCharLabelToIndex.count(s) > 0)
04319 {
04320 errormsg << "Data for this character (" << token.GetToken() << ") has already been saved";
04321 throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
04322 }
04323 ucCharLabelToIndex[s] = indOfCharInCommand;
04324 indToCharLabel[indOfCharInCommand] = rawToken;
04325 indOfCharInMemory = indOfCharInCommand;
04326 }
04327 else
04328 {
04329 rawToken = token.GetToken();
04330 NxsString s = rawToken;
04331 s.ToUpper();
04332 LabelToIndexMap::const_iterator iter = ucCharLabelToIndex.find(s);
04333 if (iter == ucCharLabelToIndex.end())
04334 {
04335 errormsg << "Could not find character named " << token.GetToken() << " among stored character labels";
04336 throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
04337 }
04338 indOfCharInMemory = iter->second;
04339 }
04340 }
04341 else
04342 indOfCharInMemory = indOfCharInCommand;
04343
04344 if (page == 0)
04345 {
04346 if (toInMem[indOfCharInCommand] != UINT_MAX)
04347 {
04348 errormsg << "States for character " << indOfCharInCommand;
04349 if (!rawToken.empty())
04350 errormsg << " (" << rawToken << ") ";
04351 errormsg << "have already been stored";
04352 throw NxsException(errormsg, token);
04353 }
04354 toInMem[indOfCharInCommand] = indOfCharInMemory;
04355 }
04356 else
04357 {
04358 if (toInMem[indOfCharInCommand] != indOfCharInMemory)
04359 {
04360 errormsg << "The order of characters must be in the same order in each page of the interleaved matrix. Character " << rawToken << " was unexpected.";
04361 throw NxsException(errormsg, token);
04362 }
04363 }
04364 if (firstTaxon > 0 && nTaxRead[indOfCharInCommand] >= firstTaxon)
04365 {
04366 errormsg << "Data for this character ";
04367 if (!rawToken.empty())
04368 errormsg << '(' << rawToken << ") ";
04369 errormsg << "has already been saved";
04370 throw NxsException(errormsg, token);
04371 }
04372
04373 NxsDiscreteDatatypeMapper * currMapper = GetMutableDatatypeMapperForChar(indOfCharInMemory);
04374
04375 for (currTaxon = firstTaxon; currTaxon < lastTaxon; currTaxon++)
04376 {
04377 bool atEOL = false;
04378 NxsString nameStr;
04379 nameStr << 1+currTaxon;
04380 if (continuousData)
04381 {
04382 ContinuousCharRow *contRowPtr = &continuousMatrix[currTaxon];
04383 atEOL = HandleNextContinuousState(token, currTaxon, indOfCharInMemory, *contRowPtr, nameStr);
04384 }
04385 else
04386 {
04387 NxsDiscreteStateRow *discRowPtr = &discreteMatrix[currTaxon];
04388 if (tokens)
04389 atEOL = HandleNextTokenState(token, currTaxon, indOfCharInMemory, *discRowPtr, *currMapper, NULL, nameStr);
04390 else
04391 atEOL = HandleNextDiscreteState(token, currTaxon, indOfCharInMemory, *discRowPtr, *currMapper, NULL, nameStr);
04392 }
04393 if (interleaving && !atEOL)
04394 {
04395 if (lastTaxon < nTaxWithData && currTaxon != lastTaxon)
04396 GenerateNxsException(token, "Each line within an interleave page must comprise the same number of taxa");
04397
04398
04399 nextFirst = currTaxon;
04400
04401
04402
04403 lastTaxon = currTaxon;
04404 }
04405 }
04406 if (currTaxon > 0)
04407 nTaxRead[indOfCharInCommand] = currTaxon - 1;
04408 if (lastTaxon < nTaxWithData && indOfCharInCommand > 0)
04409 {
04410 token.SetLabileFlagBit(NxsToken::newlineIsToken);
04411 token.GetNextToken();
04412 if (!token.AtEOL())
04413 {
04414 errormsg = "Each line within an interleave page must comprise the same number of taxa\n.";
04415 errormsg << "Expecting the end of a line, but found " << token.GetToken();
04416 throw NxsException(errormsg, token);
04417 }
04418 }
04419 }
04420 firstTaxon = nextFirst;
04421 lastTaxon = nTaxWithData;
04422 if (currTaxon == nTaxWithData)
04423 break;
04424 needsCharLabels = false;
04425 }
04426 }
04427
04432 void NxsCharactersBlock::HandleMatrix(
04433 NxsToken &token)
04434 {
04435 const NxsPartition dtParts;
04436 const std::vector<DataTypesEnum> dtv;
04437 if (datatypeMapperVec.empty())
04438 CreateDatatypeMapperObjects(dtParts, dtv);
04439 if (taxa == NULL)
04440 AssureTaxaBlock(false, token, "Matrix");
04441
04442 if (tokens && GetDataType() == standard)
04443 {
04444
04445
04446
04447
04448
04449
04450 const unsigned nStatesWSymbols = (const unsigned)symbols.length();
04451 unsigned nStatesTotal = nStatesWSymbols;
04452 for (NxsStringVectorMap::const_iterator cib = this->charStates.begin(); cib != this->charStates.end(); ++cib)
04453 {
04454 const NxsStringVector & stateLabelsVec = cib->second;
04455 const unsigned ns = (unsigned)stateLabelsVec.size();
04456 if (ns > nStatesTotal)
04457 nStatesTotal = ns;
04458 }
04459 if (nStatesTotal > nStatesWSymbols)
04460 {
04461 symbols.append(nStatesTotal-nStatesWSymbols, '\0');
04462 CreateDatatypeMapperObjects(dtParts, dtv);
04463 }
04464 }
04465 const unsigned ntax = taxa->GetNTax();
04466 if (ntax == 0)
04467 {
04468 errormsg = "Must precede ";
04469 errormsg << id << " block with a TAXA block or specify NEWTAXA and NTAX in the DIMENSIONS command";
04470 throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
04471 }
04472
04473 discreteMatrix.clear();
04474 continuousMatrix.clear();
04475
04476 if (datatype == NxsCharactersBlock::continuous)
04477 {
04478 continuousMatrix.clear();
04479 continuousMatrix.resize(ntax);
04480 }
04481 else
04482 {
04483 discreteMatrix.clear();
04484 discreteMatrix.resize(ntax);
04485 }
04486 if (IsMixedType())
04487 {
04488 if (transposing)
04489 throw NxsUnimplementedException("Reading of transposed, mixed datatype matrices will probably never be supported by NCL");
04490
04491 }
04492 if (transposing)
04493 HandleTransposedMatrix(token);
04494 else
04495 HandleStdMatrix(token);
04496 DemandEndSemicolon(token, "MATRIX");
04497 if (assumptionsBlock)
04498 assumptionsBlock->SetCallback(this);
04499 if (convertAugmentedToMixed)
04500 AugmentedSymbolsToMixed();
04501 }
04502
04509 void NxsCharactersBlock::HandleStatelabels(
04510 NxsToken &token)
04511 {
04512 if (datatype == continuous)
04513 GenerateNxsException(token, "STATELABELS cannot be specified when the datatype is continuous");
04514 charStates.clear();
04515 for (;;)
04516 {
04517 token.GetNextToken();
04518 if (token.Equals(";"))
04519 break;
04520
04521 int n = atoi(token.GetToken().c_str());
04522 if (n < 1 || n > (int)nChar)
04523 {
04524 errormsg = "Invalid character number (";
04525 errormsg << token.GetToken() << ") found in STATELABELS command (either out of range or not interpretable as an integer)";
04526 throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
04527 }
04528 NxsStringVector & v = charStates[n - 1];
04529 for (;;)
04530 {
04531 token.GetNextToken();
04532 if (token.Equals(";") || token.Equals(","))
04533 break;
04534 v.push_back(token.GetToken());
04535 }
04536 }
04537 }
04538
04544 void NxsCharactersBlock::Read(
04545 NxsToken &token)
04546 {
04547 isEmpty = false;
04548 isUserSupplied = true;
04549
04550 NxsString s;
04551 s = "BEGIN ";
04552 s += id;
04553 DemandEndSemicolon(token, s.c_str());
04554 nTaxWithData = 0;
04555
04556 for (;;)
04557 {
04558 token.GetNextToken();
04559 NxsBlock::NxsCommandResult res = HandleBasicBlockCommands(token);
04560 if (res == NxsBlock::NxsCommandResult(STOP_PARSING_BLOCK))
04561 {
04562 if (discreteMatrix.empty() && continuousMatrix.empty())
04563 {
04564 errormsg.clear();
04565 errormsg << "\nA " << id << " block must contain a Matrix command";
04566 throw NxsException(errormsg, token);
04567 }
04568 return;
04569 }
04570 if (res != NxsBlock::NxsCommandResult(HANDLED_COMMAND))
04571 {
04572 if (token.Equals("DIMENSIONS"))
04573 HandleDimensions(token, "NEWTAXA", "NTAX", "NCHAR");
04574 else if (token.Equals("FORMAT"))
04575 HandleFormat(token);
04576 else if (token.Equals("ELIMINATE"))
04577 HandleEliminate(token);
04578 else if (token.Equals("TAXLABELS"))
04579 HandleTaxLabels(token);
04580 else if (token.Equals("CHARSTATELABELS"))
04581 HandleCharstatelabels(token);
04582 else if (token.Equals("CHARLABELS"))
04583 HandleCharlabels(token);
04584 else if (token.Equals("STATELABELS"))
04585 HandleStatelabels(token);
04586 else if (token.Equals("MATRIX"))
04587 HandleMatrix(token);
04588 else
04589 SkipCommand(token);
04590 }
04591 }
04592 }
04593
04598 void NxsCharactersBlock::Report(
04599 std::ostream &out) NCL_COULD_BE_CONST
04600 {
04601 out << '\n' << id << " block contains ";
04602 if (nTaxWithData == 0)
04603 out << "no taxa";
04604 else if (nTaxWithData == 1)
04605 out << "one taxon";
04606 else
04607 out << nTaxWithData << " taxa";
04608 out << " and ";
04609 if (nChar == 0)
04610 out << "no characters";
04611 else if (nChar == 1)
04612 out << "one character";
04613 else
04614 out << nChar << " characters";
04615 out << endl;
04616
04617 out << " Data type is \"" << this->GetDatatypeName() << "\"" << endl;
04618
04619 if (respectingCase)
04620 out << " Respecting case" << endl;
04621 else
04622 out << " Ignoring case" << endl;
04623
04624 if (tokens)
04625 out << " Multicharacter tokens allowed in data matrix" << endl;
04626 else
04627 out << " Data matrix entries are expected to be single symbols" << endl;
04628
04629 if (labels && transposing)
04630 out << " Character labels are expected on left side of matrix" << endl;
04631 else if (labels && !transposing)
04632 out << " Taxon labels are expected on left side of matrix" << endl;
04633 else
04634 out << " No labels are expected on left side of matrix" << endl;
04635
04636 if (!indToCharLabel.empty())
04637 {
04638 out << " Character and character state labels:" << endl;
04639 for (unsigned k = 0; k < nChar; k++)
04640 {
04641 const std::map<unsigned, std::string>::const_iterator toLit = indToCharLabel.find(k);
04642 const unsigned kNum = 1 + k;
04643 if (toLit == indToCharLabel.end())
04644 out << " " << kNum << " (no label provided for this character)" << endl;
04645 else
04646 out << " " << kNum << " " << toLit->second << endl;
04647
04648
04649
04650 NxsStringVectorMap::const_iterator cib = charStates.find(k);
04651 if (cib != charStates.end())
04652 {
04653 int ns = (int)cib->second.size();
04654 for (int m = 0; m < ns; m++)
04655 out << " " << cib->second[m] << endl;
04656 }
04657 }
04658 }
04659
04660 if (transposing && interleaving)
04661 out << " Matrix transposed and interleaved" << endl;
04662 else if (transposing && !interleaving)
04663 out << " Matrix transposed but not interleaved" << endl;
04664 else if (!transposing && interleaving)
04665 out << " Matrix interleaved but not transposed" << endl;
04666 else
04667 out << " Matrix neither transposed nor interleaved" << endl;
04668
04669 out << " Missing data symbol is '" << missing << '\'' << endl;
04670
04671 if (matchchar != '\0')
04672 out << " Match character is '" << matchchar << '\'' << endl;
04673 else
04674 out << " No match character specified" << endl;
04675
04676 if (gap != '\0')
04677 out << " Gap character specified is '" << gap << '\'' << endl;
04678 else
04679 out << " No gap character specified" << endl;
04680
04681 out << " Valid symbols are: " << symbols << endl;
04682
04683 int numEquateMacros = (int)(userEquates.size() + defaultEquates.size());
04684 if (numEquateMacros > 0)
04685 {
04686 out << " Equate macros in effect:" << endl;
04687 std::map<char, NxsString>::const_iterator i = defaultEquates.begin();
04688 for (; i != defaultEquates.end(); ++i)
04689 {
04690 out << " " << (*i).first << " = " << i->second << endl;
04691 }
04692 i = userEquates.begin();
04693 for (; i != userEquates.end(); ++i)
04694 {
04695 out << " " << (*i).first << " = " << i->second << endl;
04696 }
04697 }
04698 else
04699 out << " No equate macros have been defined" << endl;
04700
04701 if (eliminated.empty())
04702 out << " No characters were eliminated" << endl;
04703 else
04704 {
04705 out << " The following characters were eliminated:" << endl;
04706 NxsUnsignedSet::const_iterator k;
04707 for (k = eliminated.begin(); k != eliminated.end(); k++)
04708 {
04709 out << " " << ((*k)+1) << endl;
04710 }
04711 }
04712
04713
04714 if (excluded.empty())
04715 out << " no characters excluded" << endl;
04716 else
04717 {
04718 out << " The following characters have been excluded:\n";
04719 for (NxsUnsignedSet::const_iterator eIt = excluded.begin(); eIt != excluded.end(); ++eIt)
04720 out << " " << (*eIt+1) << endl;
04721 }
04722 out << " Data matrix:" << endl;
04723 DebugShowMatrix(out, false, " ");
04724 }
04725
04726 void NxsCharactersBlock::WriteAsNexus(std::ostream &out) const
04727 {
04728 out << "BEGIN CHARACTERS;\n";
04729 WriteBasicBlockCommands(out);
04730 out << " DIMENSIONS";
04731 if (this->taxa)
04732 {
04733 const unsigned wod = GetNTaxWithData();
04734 if (wod > 0)
04735 {
04736 const unsigned tnt = taxa->GetNTax();
04737 if (wod != tnt)
04738 out << " NTax=" << wod;
04739 }
04740 }
04741 const unsigned multiplier = (this->datatype == NxsCharactersBlock::codon ? 3 : 1);
04742 out << " NChar=" << multiplier*(this->nChar) << ";\n";
04743 this->WriteEliminateCommand(out);
04744 this->WriteFormatCommand(out);
04745 this->WriteCharStateLabelsCommand(out);
04746 this->WriteMatrixCommand(out);
04747 WriteSkippedCommands(out);
04748 out << "END;\n";
04749 }
04750
04751
04752 void NxsCharactersBlock::WriteEliminateCommand(
04753 std::ostream &out) const
04754 {
04755 if (eliminated.empty())
04756 return;
04757 out << " ELIMINATE";
04758 for (NxsUnsignedSet::const_iterator u = this->eliminated.begin(); u != this->eliminated.end(); ++u)
04759 out << ' ' << (1 + *u);
04760 out << ";\n";
04761 }
04762
04763
04764 void NxsCharactersBlock::WriteMatrixCommand(
04765 std::ostream &out) const
04766 {
04767 if (taxa == NULL)
04768 return;
04769 unsigned width = taxa->GetMaxTaxonLabelLength();
04770 const unsigned ntaxTotal = taxa->GetNTax();
04771 out << "Matrix\n";
04772 int prec = 6;
04773 if (datatype == continuous)
04774 prec = out.precision(10);
04775 unsigned stride = (this->writeInterleaveLen < 1 ? this->nChar : this->writeInterleaveLen);
04776 unsigned begChar = 0;
04777 while (begChar < this->nChar)
04778 {
04779 if (begChar > 0)
04780 out << '\n';
04781 unsigned endChar = std::min(begChar + stride, this->nChar);
04782 for (unsigned i = 0; i < ntaxTotal; i++)
04783 {
04784 if (this->TaxonIndHasData(i))
04785 {
04786 const std::string currTaxonLabel = NxsString::GetEscaped(taxa->GetTaxonLabel(i));
04787 out << currTaxonLabel;
04788 unsigned currTaxonLabelLen = (unsigned)currTaxonLabel.size();
04789 unsigned diff = width - currTaxonLabelLen;
04790 for (unsigned k = 0; k < diff+5; k++)
04791 out << ' ';
04792
04793 WriteStatesForMatrixRow(out, i, UINT_MAX, begChar, endChar);
04794 out << '\n';
04795 }
04796 }
04797 begChar = endChar;
04798 }
04799 out << ";\n";
04800 if (datatype == continuous)
04801 out.precision(prec);
04802 }
04803
04804 std::string NxsCharactersBlock::GetMatrixRowAsStr(const unsigned rowIndex) const
04805 {
04806 if (!this->TaxonIndHasData(rowIndex))
04807 return std::string();
04808 std::ostringstream o;
04809 WriteStatesForMatrixRow(o, rowIndex, UINT_MAX, 0, this->nChar);
04810 return o.str();
04811 }
04812
04813 void NxsCharactersBlock::WriteStatesForMatrixRow(
04814 std::ostream &out,
04815 unsigned currTaxonIndex,
04816 unsigned ,
04817 unsigned beginChar,
04818 unsigned endChar) const
04819 {
04820 WriteStatesForTaxonAsNexus(out, currTaxonIndex, beginChar, endChar);
04821 }
04822
04823
04824 void NxsCharactersBlock::WriteCharLabelsCommand(std::ostream &out) const
04825 {
04826 if (indToCharLabel.empty())
04827 return;
04828 out << " CHARLABELS";
04829 std::map<unsigned, std::string>::const_iterator resultSearchIt;
04830 const std::map<unsigned, std::string>::const_iterator endIt = indToCharLabel.end();
04831 unsigned emptyLabelsToWrite = 0;
04832 for (unsigned oit = 0; oit < nChar; ++oit)
04833 {
04834 resultSearchIt = indToCharLabel.find(oit);
04835 if (resultSearchIt == endIt)
04836 emptyLabelsToWrite++;
04837 else
04838 {
04839 for (unsigned j = 0; j < emptyLabelsToWrite; ++j)
04840 out << " _";
04841 emptyLabelsToWrite = 0;
04842 out << ' ' << NxsString::GetEscaped(resultSearchIt->second);
04843 }
04844 }
04845 out << ";\n";
04846 }
04847
04848 void NxsCharactersBlock::WriteCharStateLabelsCommand(std::ostream &out) const
04849 {
04850 if (charStates.empty())
04851 {
04852 this->WriteCharLabelsCommand(out);
04853 return;
04854 }
04855 const NxsString mtString;
04856 bool isFirst = true;
04857 std::map<unsigned, std::string>::const_iterator resultSearchIt;
04858 const std::map<unsigned, std::string>::const_iterator endIt = indToCharLabel.end();
04859 const NxsStringVectorMap::const_iterator endCSIt = this->charStates.end();
04860 for (unsigned oit = 0; oit < nChar; ++oit)
04861 {
04862 resultSearchIt = indToCharLabel.find(oit);
04863 NxsString escapedCLabel;
04864 if (resultSearchIt != endIt)
04865 escapedCLabel = NxsString::GetEscaped(resultSearchIt->second).c_str();
04866 const NxsStringVectorMap::const_iterator cib = this->charStates.find(oit);
04867 if (isFirst)
04868 {
04869 out << " CharStateLabels \n ";
04870 isFirst = false;
04871 }
04872 else
04873 out << ",\n ";
04874 out << 1 + oit << ' ';
04875 if (cib != endCSIt)
04876 {
04877 const NxsStringVector & stateLabelsVec = cib->second;
04878 unsigned ns = (unsigned)stateLabelsVec.size();
04879 if (!escapedCLabel.empty())
04880 out << escapedCLabel;
04881 out << " / ";
04882 for (unsigned m = 0; m < ns; m++)
04883 out << " " << NxsString::GetEscaped(stateLabelsVec[m]);
04884 }
04885 else if (!escapedCLabel.empty())
04886 out << escapedCLabel;
04887 else out << '/';
04888 }
04889 out << ";\n";
04890 }
04891
04892 void NxsCharactersBlock::WriteFormatCommand(std::ostream &out) const
04893 {
04894 const NxsDiscreteDatatypeMapper * mapper = GetDatatypeMapperForChar(0);
04895 if (IsMixedType())
04896 {
04897 out << " FORMAT Datatype=MIXED(";
04898 bool first = true;
04899 for (std::vector<DatatypeMapperAndIndexSet>::const_iterator mIt = datatypeMapperVec.begin(); mIt != datatypeMapperVec.end(); ++mIt)
04900 {
04901 if (first)
04902 first = false;
04903 else
04904 out << ", ";
04905 out << GetNameOfDatatype(mIt->first.GetDatatype()) << ':';
04906 NxsSetReader::WriteSetAsNexusValue(mIt->second, out);
04907 }
04908 out << ')';
04909 if (this->missing != '?')
04910 out << " Missing=" << this->missing;
04911 if (this->gap != '\0')
04912 out << " Gap=" << this->gap;
04913 }
04914 else
04915 mapper->WriteStartOfFormatCommand(out);
04916
04917 if (this->respectingCase)
04918 out << " RespectCase";
04919
04920 if (this->matchchar != '\0')
04921 out << " MatchChar=" << this->matchchar;
04922 if (this->datatype == continuous)
04923 {
04924 out << " Items = (";
04925 for (vector<std::string>::const_iterator iIt = items.begin(); iIt != items.end(); ++iIt)
04926 out << *iIt << ' ';
04927 out << ")";
04928 if (this->statesFormat == STATES_PRESENT)
04929 out << " StatesFormat=StatesPresent";
04930 }
04931 else if (this->statesFormat == INDIVIDUALS)
04932 out << " StatesFormat=Individuals";
04933
04934 if (this->tokens && this->datatype != NxsCharactersBlock::continuous)
04935 out << " Tokens";
04936 if (this->writeInterleaveLen > 1 && (this->nChar > (unsigned)this->writeInterleaveLen ))
04937 {
04938 out << " Interleave";
04939 }
04940 out << ";\n";
04941 }
04942
04943 std::map<char, NxsString> NxsCharactersBlock::GetDefaultEquates(DataTypesEnum dt)
04944 {
04945 std::map<char, NxsString> defEquates;
04946 if (dt == NxsCharactersBlock::dna || dt == NxsCharactersBlock::rna || dt == NxsCharactersBlock::nucleotide)
04947 {
04948 defEquates['R'] = NxsString("{AG}");
04949 defEquates['M'] = NxsString("{AC}");
04950 defEquates['S'] = NxsString("{CG}");
04951 defEquates['V'] = NxsString("{ACG}");
04952 if (dt == NxsCharactersBlock::dna || dt == NxsCharactersBlock::nucleotide)
04953 {
04954 defEquates['Y'] = NxsString("{CT}");
04955 defEquates['K'] = NxsString("{GT}");
04956 defEquates['W'] = NxsString("{AT}");
04957 defEquates['H'] = NxsString("{ACT}");
04958 defEquates['B'] = NxsString("{CGT}");
04959 defEquates['D'] = NxsString("{AGT}");
04960 defEquates['N'] = NxsString("{ACGT}");
04961 defEquates['X'] = NxsString("{ACGT}");
04962 if (dt == NxsCharactersBlock::nucleotide)
04963 defEquates['U'] ='T';
04964 }
04965 else
04966 {
04967 defEquates['Y'] = NxsString("{CU}");
04968 defEquates['K'] = NxsString("{GU}");
04969 defEquates['W'] = NxsString("{AU}");
04970 defEquates['H'] = NxsString("{ACU}");
04971 defEquates['B'] = NxsString("{CGU}");
04972 defEquates['D'] = NxsString("{AGU}");
04973 defEquates['N'] = NxsString("{ACGU}");
04974 defEquates['X'] = NxsString("{ACGU}");
04975 }
04976 }
04977 else if (dt == NxsCharactersBlock::protein)
04978 {
04979 defEquates['B'] = NxsString("{DN}");
04980 defEquates['Z'] = NxsString("{EQ}");
04981 defEquates['X'] = NxsString("{ACDEFGHIKLMNPQRSTVWY*}");
04982 }
04983
04984
04985
04986 NxsString upperKeys;
04987 for (std::map<char, NxsString>::const_iterator k = defEquates.begin(); k != defEquates.end(); ++k)
04988 {
04989 upperKeys += k->first;
04990 }
04991 for (std::string::const_iterator k = upperKeys.begin(); k != upperKeys.end(); ++k)
04992 {
04993 const char c = *k;
04994 const char lc = (char)tolower(c);
04995 defEquates[lc] = defEquates[c];
04996 }
04997
04998 return defEquates;
04999 }
05000
05001 const char * NxsCharactersBlock::GetNameOfDatatype(DataTypesEnum datatype)
05002 {
05003 switch(datatype)
05004 {
05005 case NxsCharactersBlock::codon:
05006 case NxsCharactersBlock::dna:
05007 return "DNA";
05008 case NxsCharactersBlock::rna:
05009 return "RNA";
05010 case NxsCharactersBlock::nucleotide:
05011 return "Nucleotide";
05012 case NxsCharactersBlock::protein:
05013 return "Protein";
05014 case NxsCharactersBlock::continuous:
05015 return "Continuous";
05016 default:
05017 return "Standard";
05018 }
05019 }
05020
05024 void NxsCharactersBlock::Reset()
05025 {
05026 ResetSurrogate();
05027 NxsBlock::Reset();
05028 nTaxWithData = 0;
05029 nChar = 0;
05030 newtaxa = false;
05031 interleaving = false;
05032 transposing = false;
05033 respectingCase = false;
05034 labels = true;
05035 tokens = false;
05036 datatype = NxsCharactersBlock::standard;
05037 originalDatatype = NxsCharactersBlock::standard;
05038 datatypeReadFromFormat = false;
05039 missing = '?';
05040 gap = '\0';
05041 gapMode = GAP_MODE_MISSING;
05042 matchchar = '\0';
05043 symbols.clear();
05044 ResetSymbols();
05045
05046 ucCharLabelToIndex.clear();
05047 indToCharLabel.clear();
05048 charSets.clear();
05049 charPartitions.clear();
05050 codonPosPartitions.clear();
05051 defCodonPosPartitionName.clear();
05052 exSets.clear();
05053 charStates.clear();
05054 globalStateLabels.clear();
05055 userEquates.clear();
05056 defaultEquates.clear();
05057 eliminated.clear();
05058 datatypeMapperVec.clear();
05059 discreteMatrix.clear();
05060 continuousMatrix.clear();
05061 items = std::vector<std::string>(1, std::string("STATES"));
05062 statesFormat = STATES_PRESENT;
05063 restrictionDataype = false;
05064 }
05065
05066 std::string NxsCharactersBlock::GetDefaultSymbolsForType(NxsCharactersBlock::DataTypesEnum dt)
05067 {
05068 switch(dt)
05069 {
05070 case NxsCharactersBlock::nucleotide:
05071 case NxsCharactersBlock::dna:
05072 return std::string("ACGT");
05073 case NxsCharactersBlock::rna:
05074 return std::string("ACGU");
05075 case NxsCharactersBlock::protein:
05076 return std::string("ACDEFGHIKLMNPQRSTVWY*");
05077 case NxsCharactersBlock::standard:
05078 return std::string("01");
05079 default:
05080 return std::string();
05081
05082 }
05083 return std::string();
05084 }
05089 void NxsCharactersBlock::ResetSymbols()
05090 {
05091 symbols = GetDefaultSymbolsForType(datatype);
05092 userEquates.clear();
05093 defaultEquates = GetDefaultEquates(datatype);
05094 datatypeMapperVec.clear();
05095 }
05096
05103 void NxsCharactersBlock::ShowStateLabels(
05104 std::ostream &out,
05105 unsigned taxInd,
05106 unsigned charInd,
05107 unsigned ) const
05108 {
05109 if (datatype == continuous)
05110 {
05111 const ContinuousCharCell & cell = continuousMatrix.at(taxInd).at(charInd);
05112 std::vector<std::string>::const_iterator itemIt = items.begin();
05113 bool parensNeeded = items.size() > 1;
05114 if (items.size() == 1)
05115 {
05116 ContinuousCharCell::const_iterator oit = cell.find(*itemIt);
05117 if (oit != cell.end() && oit->second.size() > 1)
05118 parensNeeded = true;
05119 }
05120 if (parensNeeded)
05121 out << '(';
05122 for (; itemIt != items.end(); ++itemIt)
05123 {
05124 ContinuousCharCell::const_iterator cit = cell.find(*itemIt);
05125 if (cit == cell.end())
05126 out << missing << ' ';
05127 else
05128 {
05129 if (cit->second.empty())
05130 out << missing << ' ';
05131 else
05132 {
05133 vector<double>::const_iterator vIt = cit->second.begin();
05134 for(; vIt != cit->second.end(); ++vIt)
05135 {
05136 if (*vIt == DBL_MAX)
05137 out << missing << ' ';
05138 else
05139 out << *vIt << ' ';
05140 }
05141 }
05142 }
05143 }
05144 if (parensNeeded)
05145 out << ") ";
05146 else
05147 out << ' ';
05148 return;
05149 }
05150 const NxsDiscreteDatatypeMapper * mapper = GetDatatypeMapperForChar(charInd);
05151 NCL_ASSERT(mapper != NULL);
05152 const NxsDiscreteStateCell currStateCode = discreteMatrix.at(taxInd).at(charInd);
05153 if (tokens)
05154 {
05155 out << ' ';
05156 if (currStateCode >= 0 && currStateCode < (NxsDiscreteStateCell) mapper->GetNumStates())
05157 {
05158 NxsStringVectorMap::const_iterator ci = charStates.find(charInd);
05159 if (ci != charStates.end() && ((NxsDiscreteStateCell) ci->second.size()) > currStateCode)
05160 out << ci->second[currStateCode];
05161 else if (currStateCode < 0)
05162 {
05163 if (currStateCode == NXS_MISSING_CODE)
05164 out << this->GetMissingSymbol();
05165 else if (currStateCode == NXS_GAP_STATE_CODE)
05166 out << this->GetGapSymbol();
05167 else
05168 out << '_';
05169 }
05170 else if (globalStateLabels.size() > (unsigned) currStateCode)
05171 out << globalStateLabels[currStateCode];
05172 else
05173 out << '_';
05174 return;
05175 }
05176 }
05177 mapper->WriteStateCodeAsNexusString(out, currStateCode);
05178 }
05179
05186 void NxsCharactersBlock::WriteStates(
05187 NxsDiscreteDatum &d,
05188 char *s,
05189 unsigned slen) NCL_COULD_BE_CONST
05190 {
05191 std::ostringstream outs;
05192 ShowStates(outs, d.taxInd, d.charInd);
05193 std::string sfo = outs.str();
05194 if (s == NULL || sfo.length() > slen)
05195 throw NxsNCLAPIException("Char buffer too small in NxsCharactersBlock::WriteStates");
05196 strcpy(s, sfo.c_str());
05197 }
05198
05205 unsigned NxsCharactersBlock::GetNumStates(
05206 unsigned taxInd,
05207 unsigned charInd) NCL_COULD_BE_CONST
05208 {
05209 const NxsDiscreteDatatypeMapper * mapper = GetDatatypeMapperForChar(charInd);
05210 NCL_ASSERT(mapper != NULL);
05211 const NxsDiscreteStateCell currStateCode = discreteMatrix.at(taxInd).at(charInd);
05212 return mapper->GetNumStatesInStateCode(currStateCode);
05213 }
05214
05217 void NxsCharactersBlock::ExcludeCharacter(
05218 unsigned i)
05219 {
05220 if (i >= nChar)
05221 {
05222 errormsg = "Character index is ExcludeCharacter out-of-range. Must be < ";
05223 errormsg << nChar;
05224 throw NxsNCLAPIException(errormsg);
05225 }
05226 excluded.insert(i);
05227 }
05230 void NxsCharactersBlock::IncludeCharacter(
05231 unsigned i)
05232 {
05233 if (i >= nChar)
05234 {
05235 errormsg = "Character index is ExcludeCharacter out-of-range. Must be < ";
05236 errormsg << nChar;
05237 throw NxsNCLAPIException(errormsg);
05238 }
05239 excluded.erase(i);
05240 }
05241
05242 bool NxsCharactersBlock::IsGapState(
05243 unsigned taxInd,
05244 unsigned charInd) NCL_COULD_BE_CONST
05245 {
05246 if (this->datatype == continuous)
05247 return false;
05248 const NxsDiscreteStateRow & row = discreteMatrix.at(taxInd);
05249 return (row.size() > charInd && row[charInd] == NXS_GAP_STATE_CODE);
05250 }
05251
05252 bool NxsCharactersBlock::IsMissingState(
05253 unsigned taxInd,
05254 unsigned charInd) NCL_COULD_BE_CONST
05255 {
05256 if (this->datatype == continuous)
05257 {
05258 return !continuousMatrix.at(taxInd).empty();
05259 }
05260 const NxsDiscreteStateRow & row = discreteMatrix.at(taxInd);
05261 return (row.size() <= charInd || (row[charInd] == NXS_MISSING_CODE));
05262 }
05263
05264
05265 void NxsCharactersBlock::FindConstantCharacters(NxsUnsignedSet &c) const
05266 {
05267 vector<NxsDiscreteStateCell> iv;
05268 for (unsigned colIndex = 0; colIndex < nChar; ++colIndex)
05269 {
05270 const NxsDiscreteDatatypeMapper * mapper = GetDatatypeMapperForChar(colIndex);
05271 if (mapper == NULL)
05272 throw NxsNCLAPIException("No DatatypeMapper in FindConstantCharacters");
05273
05274 std::set<NxsDiscreteStateCell> intersectionSet = mapper->GetStateSetForCode(NXS_MISSING_CODE);
05275 for (NxsDiscreteStateMatrix::const_iterator rowIt = discreteMatrix.begin(); rowIt != discreteMatrix.end(); ++rowIt)
05276 {
05277 const NxsDiscreteStateRow & row = *rowIt;
05278 if (row.size() > colIndex)
05279 {
05280 const NxsDiscreteStateCell sc = row[colIndex];
05281 std::set<NxsDiscreteStateCell> currSet = mapper->GetStateSetForCode(sc);
05282 iv.clear();
05283 set_intersection(currSet.begin(), currSet.end(), intersectionSet.begin(), intersectionSet.end(), back_inserter(iv));
05284 intersectionSet.clear();
05285 if (iv.empty())
05286 break;
05287 intersectionSet.insert(iv.begin(), iv.end());
05288 }
05289 }
05290 if (!intersectionSet.empty())
05291 c.insert(colIndex);
05292 }
05293 }
05294
05295 void NxsCharactersBlock::FindGappedCharacters(NxsUnsignedSet &c) const
05296 {
05297 vector<NxsDiscreteStateCell> iv;
05298 for (unsigned colIndex = 0; colIndex < nChar; ++colIndex)
05299 {
05300 for (NxsDiscreteStateMatrix::const_iterator rowIt = discreteMatrix.begin(); rowIt != discreteMatrix.end(); ++rowIt)
05301 {
05302 const NxsDiscreteStateRow & row = *rowIt;
05303 if (row.size() > colIndex && row[colIndex] == NXS_GAP_STATE_CODE)
05304 {
05305 c.insert(colIndex);
05306 break;
05307 }
05308 }
05309 }
05310 }
05311
05312
05313
05314
05315
05316 std::set<NxsDiscreteStateCell> NxsCharactersBlock::GetNamedStateSetOfColumn(const unsigned colIndex) const
05317 {
05318 const NxsDiscreteDatatypeMapper * mapper = GetDatatypeMapperForChar(colIndex);
05319 if (mapper == NULL)
05320 throw NxsNCLAPIException("No DatatypeMapper in GetNamedStateSetOfColumn");
05321
05322 std::set<NxsDiscreteStateCell> sset;
05323 std::set<NxsDiscreteStateCell> scodes;
05324 const unsigned maxnstates = mapper->GetNumStatesIncludingGap();
05325 for (NxsDiscreteStateMatrix::const_iterator rowIt = discreteMatrix.begin(); rowIt != discreteMatrix.end(); ++rowIt)
05326 {
05327 const NxsDiscreteStateRow & row = *rowIt;
05328 if (row.size() > colIndex)
05329 {
05330 const NxsDiscreteStateCell sc = row[colIndex];
05331 const bool isIgnoredGap = (sc == NXS_GAP_STATE_CODE) && (this->gapMode == GAP_MODE_MISSING);
05332 const bool toBeCounted = !(sc == NXS_MISSING_CODE || isIgnoredGap);
05333 if (toBeCounted && scodes.count(sc) == 0)
05334 {
05335 scodes.insert(sc);
05336 const std::set<NxsDiscreteStateCell> & ts = mapper->GetStateSetForCode(sc);
05337 sset.insert(ts.begin(), ts.end());
05338 if (sset.size() == maxnstates)
05339 break;
05340 }
05341 }
05342 }
05343 return sset;
05344 }
05345
05346 std::set<NxsDiscreteStateCell> NxsCharactersBlock::GetMaximalStateSetOfColumn(const unsigned colIndex) const
05347 {
05348 const NxsDiscreteDatatypeMapper * mapper = GetDatatypeMapperForChar(colIndex);
05349 if (mapper == NULL)
05350 throw NxsNCLAPIException("No DatatypeMapper in GetMaximalStateSetOfColumn");
05351
05352 std::set<NxsDiscreteStateCell> sset;
05353 std::set<NxsDiscreteStateCell> scodes;
05354 const unsigned maxnstates = mapper->GetNumStatesIncludingGap();
05355 for (NxsDiscreteStateMatrix::const_iterator rowIt = discreteMatrix.begin(); rowIt != discreteMatrix.end(); ++rowIt)
05356 {
05357 const NxsDiscreteStateRow & row = *rowIt;
05358 if (row.size() > colIndex)
05359 {
05360 const NxsDiscreteStateCell sc = row[colIndex];
05361 if (scodes.count(sc) == 0)
05362 {
05363 scodes.insert(sc);
05364 const std::set<NxsDiscreteStateCell> & ts = mapper->GetStateSetForCode(sc);
05365 sset.insert(ts.begin(), ts.end());
05366 if (sset.size() == maxnstates)
05367 break;
05368 }
05369 }
05370 }
05371 return sset;
05372 }
05373
05374 bool NxsCharactersBlock::IsPolymorphic(
05375 unsigned taxInd,
05376 unsigned charInd) NCL_COULD_BE_CONST
05377 {
05378 const NxsDiscreteDatatypeMapper * mapper = GetDatatypeMapperForChar(charInd);
05379 NCL_ASSERT(mapper);
05380 if (taxInd >= discreteMatrix.size())
05381 throw NxsNCLAPIException("Taxon index out of range of NxsCharactersBlock::IsPolymorphic");
05382 const NxsDiscreteStateRow & row = discreteMatrix[taxInd];
05383 if (row.size() <= charInd)
05384 throw NxsNCLAPIException("Character index out of range of NxsCharactersBlock::IsPolymorphic");
05385 return mapper->IsPolymorphic(row[charInd]);
05386 }
05387
05388
05394 void NxsCharactersBlock::ShowStates(
05395 std::ostream &out,
05396 unsigned taxInd,
05397 unsigned charInd) NCL_COULD_BE_CONST
05398 {
05399 bool ft = tokens;
05400 tokens = false;
05401 ShowStateLabels(out, taxInd, charInd, UINT_MAX);
05402 tokens = ft;
05403 }
05404
05405
05406
05407
05408 void NxsCharactersBlock::CopyCharactersContents(const NxsCharactersBlock &other)
05409 {
05410 assumptionsBlock = other.assumptionsBlock;
05411 nChar = other.nChar;
05412 nTaxWithData = other.nTaxWithData;
05413 matchchar = other.matchchar;
05414 respectingCase = other.respectingCase;
05415 transposing = other.transposing;
05416 interleaving = other.interleaving;
05417 tokens = other.tokens;
05418 labels = other.labels;
05419 missing = other.missing;
05420 gap = other.gap;
05421 gapMode = other.gapMode;
05422 symbols = other.symbols;
05423 userEquates = other.userEquates;
05424 datatypeMapperVec = other.datatypeMapperVec;
05425 discreteMatrix = other.discreteMatrix;
05426 continuousMatrix = other.continuousMatrix;
05427 eliminated = other.eliminated;
05428 excluded = other.excluded;
05429 ucCharLabelToIndex = other.ucCharLabelToIndex;
05430 indToCharLabel = other.indToCharLabel;
05431 charStates = other.charStates;
05432 globalStateLabels = other.globalStateLabels;
05433 items = other.items;
05434 charSets = other.charSets;
05435 exSets = other.exSets;
05436 charPartitions = other.charPartitions;
05437 codonPosPartitions = other.codonPosPartitions;
05438 defCodonPosPartitionName = other.defCodonPosPartitionName;
05439 transfMgr = other.transfMgr;
05440 datatype = other.datatype;
05441 statesFormat = other.statesFormat;
05442 supportMixedDatatype = other.supportMixedDatatype;
05443 convertAugmentedToMixed = other.convertAugmentedToMixed;
05444 allowAugmentingOfSequenceSymbols = other.allowAugmentingOfSequenceSymbols;
05445 restrictionDataype = other.restrictionDataype;
05446 writeInterleaveLen = other.writeInterleaveLen;
05447 }
05448
05449
05450 NxsCharactersBlock *NxsCharactersBlockFactory::GetBlockReaderForID(const std::string & idneeded, NxsReader *reader, NxsToken *)
05451 {
05452 if (reader == NULL || idneeded != "CHARACTERS")
05453 return NULL;
05454 NxsCharactersBlock * nb = new NxsCharactersBlock(NULL, NULL);
05455 nb->SetCreateImpliedBlock(true);
05456 nb->SetImplementsLinkAPI(true);
05457 return nb;
05458 }
05459
05460
05461
05462
05463
05464 std::vector<std::vector<int> > NxsDiscreteDatatypeMapper::GetPythonicStateVectors() const
05465 {
05466
05467 std::vector<std::vector<int> > pv(this->GetNumStateCodes());
05468
05469 const int endIndex = (((int) stateSetsVec.size()) + sclOffset);
05470 for (int i = 0; i < endIndex; ++i)
05471 {
05472 NxsDiscreteStateRow r = this->GetStateVectorForCode(i);
05473 pv[i].reserve(r.size());
05474 for (NxsDiscreteStateRow::const_iterator rIt = r.begin(); rIt != r.end(); ++rIt)
05475 pv[i].push_back((int)*rIt);
05476 }
05477 return pv;
05478 }