/* * Portable Agile C++ Classes (PACC) * Copyright (C) 2001-2003 by Marc Parizeau * http://manitou.gel.ulaval.ca/~parizeau/PACC * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * Contact: * Laboratoire de Vision et Systemes Numeriques * Departement de genie electrique et de genie informatique * Universite Laval, Quebec, Canada, G1K 7P4 * http://vision.gel.ulaval.ca * */ /*! * \file PACC/Util/Tokenizer.hpp * \brief Class definition for the input stream tokenizer. * \author Marc Parizeau, Laboratoire de vision et systèmes numériques, Université Laval * $Revision: 1.6.2.1 $ * $Date: 2007/09/10 18:24:10 $ */ #ifndef PACC_Tokenizer_hpp_ #define PACC_Tokenizer_hpp_ #include #include #include namespace PACC { using namespace std; /*! \brief Input stream tokenizer \author Marc Parizeau, Laboratoire de vision et systèmes numériques, Université Laval \ingroup Util \ingroup XML This class is for tokenizing an input stream. Tokens are either strings of characters seperated by white space, or special single-characters. By default, there are no special single-character tokens, and white space characters are spaces, tabs, cariage returns, and line-feeds. But these can be changed using method Tokenizer::setDelimiters. For instance, if "a" and "d" are single character tokens, then string "abcd" will be tokenized in "a"+"bc" + "d". By default, the tokenizer uses an internal read buffer to accelerate the parse of the stream (up to 4x). The buffer size can be specified in the object constructor (default=1024) or set by method Tokenizer::setBufferSize. A buffer size of 0 actually disactivate the use of the internal buffer. */ class Tokenizer { public: //! Construct a default tokenizer with no input stream. Tokenizer(unsigned int inBufSize=1024); //! Construct a tokenizer with input stream \c inStream. Tokenizer(istream& inStream, unsigned int inBufSize=1024); //! Delete tokenizer. ~Tokenizer(void); //! Parse next token of default input stream. string getNextToken(void); //! Parse next token of default input stream. bool getNextToken(string& outToken); //! Return the next character from the default input stream. int peekNextChar(void); //! Put back token \c inToken. void putbackToken(const string& inToken); //! Return the current line number. unsigned int getLineNumber(void) {return mLine;} //! Return the name of the current input stream. const string& getStreamName(void) const {return mName;} //! Return the current single character delimiters. string getSingleCharTokens(void) const; //! Return the current white space delimiters. string getWhiteSpace(void) const; //! Set the size of the internal read buffer to size \c inSize. void setBufferSize(unsigned int inSize); //! Set the white space delimiters to the characters of \c inWhiteSpace, and the single character delimiters to those of \c inToken. void setDelimiters(const string& inWhiteSpace, const string& inTokens); //! Set the current line number to \c inLine. void setLineNumber(unsigned int inLine) {mLine = inLine;} //! Set the input stream to \c inStream. void setStream(istream& inStream); //! Set the name of the current input stream to \c inName. void setStreamName(const string& inName) {mName = inName;} protected: enum delimiters {eWhiteSpace=1, eSingleChar}; unsigned int mLine; //!< Current line number. string mName; //!< Name of default input stream. istream* mStream; //!< Default input stream. char mDelimiters[256]; //!< Single character tokens. char* mBuffer; //!< Input read buffer. unsigned int mBufSize; //!< Size of the input read buffer. char* mBufPtr; //!< Pointer to next character in the read buffer. unsigned int mBufCount; //!< Number of characters left in the read buffer. stack mTokens; //!< Stack of putback tokens. //! Fill the input read buffer inline unsigned int fillBuffer(void) { mBufCount = mStream->readsome(mBufPtr=mBuffer, mBufSize); if(mBufCount == 0) { // in case of console input, wait for the next character mStream->read(mBuffer, 1); mBufCount = mStream->gcount(); } return mBufCount; } private: Tokenizer(const Tokenizer&); // disable copy-constructor Tokenizer& operator=(const Tokenizer&); // disable asignment operator }; } // end of PACC namespace #endif // PACC_Tokenizer_hpp_