CLucene - a full-featured, c++ search engine
API Documentation
00001 /*------------------------------------------------------------------------------ 00002 * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team 00003 * 00004 * Distributable under the terms of either the Apache License (Version 2.0) or 00005 * the GNU Lesser General Public License, as specified in the COPYING file. 00006 ------------------------------------------------------------------------------*/ 00007 #ifndef _lucene_analysis_AnalysisHeader_ 00008 #define _lucene_analysis_AnalysisHeader_ 00009 00010 CL_CLASS_DEF(util,Reader) 00011 CL_NS_DEF(analysis) 00012 00013 typedef CL_NS(util)::CLSetList<const TCHAR*, CL_NS(util)::Compare::TChar, CL_NS(util)::Deletor::tcArray> CLTCSetList; 00014 00029 class Token:LUCENE_BASE{ 00030 private: 00031 int32_t _startOffset; 00032 int32_t _endOffset; 00033 const TCHAR* _type; 00034 int32_t positionIncrement; 00035 size_t bufferTextLen; 00036 00037 public: 00038 #ifndef LUCENE_TOKEN_WORD_LENGTH 00039 TCHAR* _termText; 00040 #else 00041 TCHAR _termText[LUCENE_TOKEN_WORD_LENGTH+1]; 00042 #endif 00043 int32_t _termTextLen; 00044 static const TCHAR* defaultType; 00045 00046 Token(); 00047 ~Token(); 00049 Token(const TCHAR* text, const int32_t start, const int32_t end, const TCHAR* typ=defaultType); 00050 void set(const TCHAR* text, const int32_t start, const int32_t end, const TCHAR* typ=defaultType); 00051 00052 size_t bufferLength(){ return bufferTextLen; } 00053 void growBuffer(size_t size); 00054 00077 void setPositionIncrement(int32_t posIncr); 00078 int32_t getPositionIncrement() const; 00079 const TCHAR* termText() const; 00080 size_t termTextLength(); 00081 void resetTermTextLen(); 00082 void setText(const TCHAR* txt); 00083 00092 int32_t startOffset() const { return _startOffset; } 00093 void setStartOffset(int32_t val){ _startOffset =val; } 00094 00099 int32_t endOffset() const { return _endOffset; } 00100 void setEndOffset(int32_t val){ _endOffset =val; } 00101 00103 const TCHAR* type() const { return _type; } 00104 void setType(const TCHAR* val) { _type = val; } 00105 00106 TCHAR* toString() const; 00107 }; 00108 00121 class TokenStream:LUCENE_BASE { 00122 public: 00124 virtual bool next(Token* token) = 0; 00125 00127 virtual void close() = 0; 00128 00129 virtual ~TokenStream(){ 00130 } 00131 00136 _CL_DEPRECATED(next(Token)) Token* next(); 00137 }; 00138 00139 00150 class CLUCENE_EXPORT Analyzer:LUCENE_BASE{ 00151 public: 00157 virtual TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader)=0; 00158 00159 virtual ~Analyzer(){ 00160 } 00161 00175 virtual int32_t getPositionIncrementGap(const TCHAR* fieldName); 00176 }; 00177 00178 00183 class Tokenizer:public TokenStream { 00184 protected: 00186 CL_NS(util)::Reader* input; 00187 00188 public: 00190 Tokenizer(); 00192 Tokenizer(CL_NS(util)::Reader* _input); 00193 00194 // ** By default, closes the input Reader. */ 00195 virtual void close(); 00196 virtual ~Tokenizer(); 00197 }; 00198 00203 class TokenFilter:public TokenStream { 00204 protected: 00206 TokenStream* input; 00208 bool deleteTokenStream; 00209 00215 TokenFilter(TokenStream* in, bool deleteTS=false); 00216 virtual ~TokenFilter(); 00217 public: 00219 void close(); 00220 }; 00221 00222 CL_NS_END 00223 #endif