/* * Portable Agile C++ Classes (PACC) * Copyright (C) 2001-2003 by Marc Parizeau * http://manitou.gel.ulaval.ca/~parizeau/PACC * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * Contact: * Laboratoire de Vision et Systemes Numeriques * Departement de genie electrique et de genie informatique * Universite Laval, Quebec, Canada, G1K 7P4 * http://vision.gel.ulaval.ca * */ /*! * \file PACC/XML/Node.cpp * \brief Class methods for the %XML parse tree node. * \author Marc Parizeau, Laboratoire de vision et systèmes numériques, Université Laval * $Revision: 1.7.2.2 $ * $Date: 2007/09/10 18:24:10 $ */ #include "XML/Node.hpp" #include "XML/Iterator.hpp" #include "XML/Streamer.hpp" #include "Util/Assert.hpp" #include <sstream> #include <stdexcept> using namespace std; using namespace PACC; map<string,char> XML::Node::smMap; /*! */ XML::Node::Node(void) : mType(eRoot) { mParent = mFirstChild = mLastChild = mPrevSibling = mNextSibling = NULL; } /*! */ XML::Node::Node(const string& inValue, XML::NodeType inType) : mType(inType) { (*this)[""] = inValue; mParent = mFirstChild = mLastChild = mPrevSibling = mNextSibling = NULL; } /*! */ XML::Node::Node(const string& inValue, const XML::AttributeList& inAttrList) : AttributeList(inAttrList), mType(eData) { (*this)[""] = inValue; mParent = mFirstChild = mLastChild = mPrevSibling = mNextSibling = NULL; } /*! */ XML::Node::Node(const XML::Node& inNode) : AttributeList() { mParent = mFirstChild = mLastChild = mPrevSibling = mNextSibling = NULL; operator=(inNode); } /*! This method recursively deletes all of its children. */ XML::Node::~Node(void) { // delete all child nodes eraseChildren(); // detach from parent and siblings detachFromSiblingsAndParent(); // cleanup node pointers mParent = mFirstChild = mLastChild = mPrevSibling = mNextSibling = NULL; } /*! \return A reference to this node \attention The copied tree must not be a sub-tree of this node. Otherwise, the internal tree structure will become corrupted. */ XML::Node& XML::Node::operator=(const Node& inRoot) { // do not self assign! if(&inRoot == this) return *this; // delete all child nodes XML::Iterator lChild = getFirstChild(); while(lChild) delete &(*(lChild++)); // fix child pointers mFirstChild = mLastChild = NULL; // assign type and attributes mType = inRoot.mType; map<string,string>::operator=(inRoot); // copy all children of inRoot for(XML::ConstIterator lNode = inRoot.getFirstChild(); lNode; ++lNode) { // allocate and copy node Node* lChildNode = new Node(*lNode); // is this the first child? if(mFirstChild == NULL) mFirstChild = mLastChild = lChildNode; else { //adjust sibling pointers mLastChild->mNextSibling = lChildNode; lChildNode->mPrevSibling = mLastChild; mLastChild = lChildNode; } // adjust parent pointer lChildNode->mParent = this; } return *this; } /*! \return A reference to the converted string. The default quotes are "&", "<", ">", "'", and """. Argument \c ioMap can be used to specify any conversion table. */ string& XML::Node::convertFromQuotes(string& ioString, map<string,char>& ioMap) { if(ioMap.empty()) { // initialize quote list ioMap["amp"] = '&'; ioMap["lt"] = '<'; ioMap["gt"] = '>'; ioMap["apos"] = '\''; ioMap["quot"] = '"'; } string::size_type lStart, lEnd = 0; while((lStart = ioString.find('&', lEnd)) < ioString.size() && (lEnd = ioString.find(';', lStart)) < ioString.size()) { string lToken = ioString.substr(lStart+1, lEnd-lStart-1); if(ioMap.find(lToken) != ioMap.end()) { ioString[lStart] = ioMap[lToken]; ioString.erase(lStart+1, lEnd-lStart); lEnd = lStart+1; } } return ioString; } /*!\return A pointer to this node. This method removes this node from its parent tree. The list of sibling nodes is repaired accordingly. */ XML::Node* XML::Node::detachFromSiblingsAndParent(void) { // adjust sibling list if(mPrevSibling) mPrevSibling->mNextSibling = mNextSibling; if(mNextSibling) mNextSibling->mPrevSibling = mPrevSibling; if(mParent) { // adjust parent first and last child pointers if(mParent->mFirstChild == this) mParent->mFirstChild = mNextSibling; if(mParent->mLastChild == this) mParent->mLastChild = mPrevSibling; } mPrevSibling = mNextSibling = mParent = NULL; return this; } /*! */ void XML::Node::eraseChildren(void) { // delete all child nodes XML::Iterator lChild = getFirstChild(); while(lChild) delete &(*(lChild++)); } /*!\return The number of child nodes. */ unsigned int XML::Node::getChildCount(void) const { unsigned int lCount = 0; for(ConstIterator lChild = getFirstChild(); lChild; ++lChild) ++lCount; return lCount; } /*!\return A pointer to the inserted child node. */ XML::Node* XML::Node::insertAsLastChild(XML::Node* inChild) { PACC_AssertM(inChild, "Cannot add null pointer node"); PACC_AssertM(!inChild->mParent && !inChild->mPrevSibling && !inChild->mNextSibling, "Node must be detached before it can be added!"); // is this new child the first? if(mFirstChild == NULL) mFirstChild = inChild; else { // insert after last inChild->mPrevSibling = mLastChild; mLastChild->mNextSibling = inChild; } // adjust parent pointers inChild->mParent = this; mLastChild = inChild; return inChild; } /*!\return A pointer to the inserted sibling node. */ XML::Node* XML::Node::insertAsPreviousSibling(XML::Node* inSibling) { PACC_AssertM(inSibling, "Cannot insert null pointer node"); PACC_AssertM(!inSibling->mParent && !inSibling->mPrevSibling && !inSibling->mNextSibling, "Node must be detached before it can be inserted!"); // is this new sibling the first? if(mPrevSibling == NULL) { inSibling->mNextSibling = this; mPrevSibling = inSibling; // adjust first child of parent if(mParent) mParent->mFirstChild = inSibling; } else { // this node is neither the first or the last mPrevSibling->mNextSibling = inSibling; inSibling->mPrevSibling = mPrevSibling; inSibling->mNextSibling = this; mPrevSibling = inSibling; } // adjust parent pointer inSibling->mParent = mParent; return inSibling; } /*!\return A node pointer to the parsed element. Any tag name defined in \c inNoParseTags will be treated as if its content is a string token (content will not be parsed). */ XML::Node* XML::Node::parse(PACC::Tokenizer& inTokenizer, const set<string>& inNoParseTags) { Node* lNode = NULL; // look for start tag string lToken; inTokenizer.setDelimiters("", "<"); if(!inTokenizer.getNextToken(lToken)) return 0; // remove any leading white space size_type lPos = lToken.find_first_not_of(" \t\r\n"); if(lPos == string::npos) { if(!inTokenizer.getNextToken(lToken)) return 0; } else if(lPos > 0) lToken.erase(0, lPos); if(lToken[0] == '<') { // check for end tag if(inTokenizer.peekNextChar() == '/') { // found end tag; inTokenizer.setDelimiters("", "/"); inTokenizer.getNextToken(lToken); return 0; } // found start tag lNode = new Node; lNode->parseStartTag(inTokenizer, lToken); if(lToken[0] == '/') { // found end tag; next token must be '>' inTokenizer.setDelimiters("", ">"); if(!inTokenizer.getNextToken(lToken)) lNode->throwError(inTokenizer, "unexpected eof"); if(lToken[0] != '>') lNode->throwError(inTokenizer, "invalid start tag"); } else if(lNode->getType() == eData) { // either read or parse tag content if(inNoParseTags.find((*lNode)[""]) != inNoParseTags.end()) { lNode->readContentAsString(inTokenizer); } else { Node* lChild; // parse all child while((lChild=parse(inTokenizer, inNoParseTags)) != NULL) lNode->insertAsLastChild(lChild); // test for valid end tag inTokenizer.setDelimiters("", " \t\n\r>"); if(!inTokenizer.getNextToken(lToken)) lNode->throwError(inTokenizer, "unexpected eof"); if(lToken != (*lNode)[""]) lNode->throwError(inTokenizer, "invalid end tag"); } // next token must be '>' inTokenizer.setDelimiters(" \t\n\r", ">"); if(!inTokenizer.getNextToken(lToken)) lNode->throwError(inTokenizer, "unexpected eof"); if(lToken[0] != '>') lNode->throwError(inTokenizer, "invalid end tag"); } // else node is not markup } else { // found a simple string node lNode = new Node; lNode->mType = eString; // remove any ending white space lPos = lToken.find_last_not_of(" \t\r\n"); PACC_AssertM(lPos != string::npos, "Internal error!"); if(lPos < lToken.size()-1) lToken.resize(lPos+1); // convert basic quotes (*lNode)[""] = convertFromQuotes(lToken); } return lNode; } /*! Ending token is returned through argument \c outToken. */ void XML::Node::parseAttributeList(PACC::Tokenizer& inTokenizer, string& outToken) { inTokenizer.setDelimiters(" \t\n\r", "=/?>"); // next token should be an attribute name if(!inTokenizer.getNextToken(outToken)) throwError(inTokenizer, "unexpected eof"); // parse all attributes while(outToken[0] != '>' && outToken[0] != '/' && outToken[0] != '?') { if(outToken[0] == '=') throwError(inTokenizer, "missing attribute name"); // ok, found an attribute name! string lName = outToken; // next token should be '=' inTokenizer.setDelimiters(" \t\n\r", "="); if(!inTokenizer.getNextToken(outToken) || outToken[0] != '=') throwError(inTokenizer, "invalid attribute"); inTokenizer.setDelimiters(" \t\n\r", "'\""); // next token must be '"' or "'" if(!inTokenizer.getNextToken(outToken)) throwError(inTokenizer, "unexpected eof"); string lValue; switch(outToken[0]) { case '\'': inTokenizer.setDelimiters("", "'"); if(!inTokenizer.getNextToken(outToken)) throwError(inTokenizer, "unexpected eof"); if(outToken[0] != '\'') { lValue = outToken; if(!inTokenizer.getNextToken(outToken)) throwError(inTokenizer, "unexpected eof"); } break; case '"': inTokenizer.setDelimiters("", "\""); if(!inTokenizer.getNextToken(outToken)) throwError(inTokenizer, "unexpected eof"); if(outToken[0] != '"') { lValue = outToken; if(!inTokenizer.getNextToken(outToken)) throwError(inTokenizer, "unexpected eof"); } break; default: throwError(inTokenizer, "invalid attribute value"); } // insert attribute (*this)[lName] = convertFromQuotes(lValue); inTokenizer.setDelimiters(" \t\n\r", "=/?>"); if(!inTokenizer.getNextToken(outToken)) throwError(inTokenizer, "unexpected eof"); } } /*! This method assumes that token "<" has already been read. It returns the ending token through argument \c outToken. */ void XML::Node::parseStartTag(PACC::Tokenizer& inTokenizer, string& outToken) { // parse tag name inTokenizer.setDelimiters("", " \t\n\r/>"); if(!inTokenizer.getNextToken(outToken)) throwError(inTokenizer, "unexpected eof"); if(outToken.find_first_of(" \t\n\r/>") != string::npos) throwError(inTokenizer, "invalid start tag"); string& lValue = (*this)[""]; switch(outToken[0]) { case '!': if(outToken.size() >= 3 && outToken[1] == '-' && outToken[2] == '-') { // process comment mType = eComment; outToken.erase(0, 3); inTokenizer.setDelimiters("", ">"); do { int lSize = outToken.size(); if(lSize > 2 && outToken[lSize-2] == '-' && outToken[lSize-1] == '-') { lValue += outToken.erase(lSize-2, 2); break; } else lValue += outToken; } while(inTokenizer.getNextToken(outToken)); if(!inTokenizer.getNextToken(outToken)) throwError(inTokenizer, "unexpected eof"); } else if(outToken.size() >= 8 && memcmp(outToken.data()+1, "[CDATA[", 7) == 0) { // process cdata section mType = eCDATA; outToken.erase(0, 8); inTokenizer.setDelimiters("", ">"); do { int lSize = outToken.size(); if(lSize >= 2 && outToken[lSize-2] == ']' && outToken[lSize-1] == ']') { lValue += outToken.erase(lSize-2, 2); break; } else lValue += outToken; } while(inTokenizer.getNextToken(outToken)); if(!inTokenizer.getNextToken(outToken)) throwError(inTokenizer, "unexpected eof"); } else { // process special element (doctype, attribute, etc.) mType = eSpecial; lValue = outToken.erase(0, 1); inTokenizer.setDelimiters("", ">"); if(!inTokenizer.getNextToken(outToken)) throwError(inTokenizer, "unexpected eof"); if(outToken[0] != '>') { lValue += outToken; if(!inTokenizer.getNextToken(outToken)) throwError(inTokenizer, "unexpected eof"); } } break; case '?': if(outToken == "?xml") { // process xml declaration mType = eDecl; lValue = outToken.erase(0, 1); parseAttributeList(inTokenizer, outToken); if(outToken[0] != '?') throwError(inTokenizer, "invalid xml declaration"); if(!inTokenizer.getNextToken(outToken)) throwError(inTokenizer, "unexpected eof"); if(outToken[0] != '>') throwError(inTokenizer, "invalid xml declaration"); } else { // process special processing instruction mType = ePI; lValue = outToken.substr(1, outToken.size()-1); inTokenizer.setDelimiters("", "?>"); while(inTokenizer.getNextToken(outToken)) { if(outToken[0] == '?') { if(!inTokenizer.getNextToken(outToken)) throwError(inTokenizer, "unexpected eof"); if(outToken[0] == '>') break; else lValue += '?'; } lValue += outToken; } if(outToken.empty()) throwError(inTokenizer, "unexpected eof"); } break; default: // process data markup mType = eData; lValue = outToken; parseAttributeList(inTokenizer, outToken); } } /*! */ void XML::Node::readContentAsString(PACC::Tokenizer& inTokenizer) { // create child node Node* lChild = new Node; insertAsLastChild(lChild); lChild->setType(eNoParse); // parse until end tag inTokenizer.setDelimiters("", "<>"); string lToken; int lCount = 1; const string& lTag = (*this)[""]; string& lString = (*lChild)[""]; while(lCount > 0) { // check every start tag if(!inTokenizer.getNextToken(lToken)) throwError(inTokenizer, string("unexpected eof")); if(lToken[0] == '<') { if(!inTokenizer.getNextToken(lToken)) throwError(inTokenizer, string("unexpected eof")); if(lToken[0] == '/' && memcmp(lToken.data()+1, lTag.data(), lTag.size()) == 0) --lCount; else if(lToken[lToken.size()-1] != '/' && memcmp(lToken.data(), lTag.data(), lTag.size()) == 0) ++lCount; if(lCount > 0) { lString += "<"; lString += lToken; } } else lString += lToken; } // remove any leading white space size_type lPos = lString.find_first_not_of(" \t\r\n"); if(lPos == string::npos) { // string is all white space lString.clear(); } else { // erase leading white space lString.erase(0, lPos); // remove any ending white space lPos = lString.find_last_not_of(" \t\r\n"); PACC_AssertM(lPos != string::npos, "Internal error!"); // erase trailing white space if(lPos < lString.size()-1) lString.resize(lPos+1); } } /*! Argument \c inIndent is used to control indentation. By default (\c inIndent=true), the sub-tree rooted by this node will be serialized with indentation. If \c inIndent=false, then the node will be serialized without any form of indentation (including line feeds). */ void XML::Node::serialize(XML::Streamer& outStream, bool inIndent) const { switch(mType) { case eCDATA: { outStream.insertCDATA(getValue()); break; } case eComment: { outStream.insertComment(getValue()); break; } case eData: { // check for tag with single string content ConstIterator lChild = getFirstChild(); if(lChild && lChild->mType == eString && !lChild->getNextSibling()) { // disable indentation outStream.openTag(getValue(), false); } else { outStream.openTag(getValue(), inIndent); } // serialize attribute list for(map<string,string>::const_iterator i = begin(); i != end(); ++i) { if(i->first != "") outStream.insertAttribute(i->first, i->second); } // serialize child nodes while(lChild) (lChild++)->serialize(outStream, inIndent); outStream.closeTag(); break; } case eNoParse: { outStream.insertStringContent(getValue(), false); break; } case ePI: { string lValue = string("<?") + getValue() + string("?>"); outStream.insertStringContent(lValue, false); break; } case eSpecial: { string lValue = string("<!") + getValue() + string(">"); outStream.insertStringContent(lValue, false); break; } case eString: { outStream.insertStringContent(getValue(), true); break; } case eDecl: { string lValue = "<?xml version=\""; if(isDefined("version")) lValue += getAttribute("version"); else lValue += "1.0"; lValue += "\""; if(isDefined("encoding")) lValue += " encoding=\"" + getAttribute("encoding") + "\""; lValue += "?>"; outStream.insertStringContent(lValue, false); break; } default: { PACC_AssertM(false, "Unknown node type!"); } } } /*! */ void XML::Node::throwError(PACC::Tokenizer& inTokenizer, const string& inMessage) const { ostringstream lStream; lStream << "\nXML parse error"; if(inTokenizer.getStreamName() != "") lStream << " in file \"" << inTokenizer.getStreamName() << "\","; lStream << " at line "; lStream << inTokenizer.getLineNumber(); switch(mType) { case eCDATA: lStream << "\nfor CDATA \""; break; case eComment: lStream << "\nfor comment \""; break; case eData: lStream << "\nfor markup \""; break; case ePI: lStream << "\nfor processing instruction \""; break; case eRoot: lStream << "\nfor root element \""; break; case eSpecial: lStream << "\nfor special element \""; break; case eString: lStream << "\nfor literal string \""; break; case eDecl: lStream << "\nfor declaration \""; break; default: lStream << "\nfor unknown element \""; break; } if(getValue().size() < 40) lStream << getValue() << "\": " << inMessage; else lStream << getValue().substr(0,40) << "...\": " << inMessage; throw runtime_error(lStream.str()); } /*! */ ostream& PACC::operator<<(ostream &outStream, const XML::Node& inNode) { XML::Streamer lStream(outStream); inNode.serialize(lStream); return outStream; } /*! */ istream& PACC::operator>>(istream &inStream, XML::Node& outNode) { Tokenizer lTokenizer(inStream); XML::Node* lNode = XML::Node::parse(lTokenizer, set<string>()); outNode = *lNode; delete lNode; return inStream; }