CLucene - a full-featured, c++ search engine
API Documentation
00001 /*------------------------------------------------------------------------------ 00002 * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team 00003 * 00004 * Distributable under the terms of either the Apache License (Version 2.0) or 00005 * the GNU Lesser General Public License, as specified in the COPYING file. 00006 ------------------------------------------------------------------------------*/ 00007 #ifndef _lucene_document_Field_ 00008 #define _lucene_document_Field_ 00009 00010 /* 00011 Fieldable reading: 00012 https://issues.apache.org/jira/browse/LUCENE-1219?page=com.atlassian.jira.plugin.system.issuetabpanels:comment- tabpanel&focusedCommentId=12578199#action_12578199 00013 http://lucene.markmail.org/message/ioi4f6z24cbd5bdm?q=Fieldable#query:Fieldable+page:1+mid:fxmvzb6up7zve7k4+state:results 00014 00015 TODO: - Solve some inconsistencies between CL and JL - mainly in the constructors area. 00016 - Write some more tests to make sure we conform with JL - mainly in the tokenizing and omitNorms area 00017 - Is there a bug in JL when calling setOmitNorms after a Tokenized field was created? 00018 - TokenStream* implementation - mend all 3 pointers to one void* ? 00019 */ 00020 00021 CL_CLASS_DEF(util,Reader) 00022 CL_CLASS_DEF(analysis,TokenStream) 00023 namespace jstreams{ 00024 template <class T> 00025 class StreamBase; 00026 } 00027 00028 CL_NS_DEF(document) 00043 class CLUCENE_EXPORT Field :LUCENE_BASE{ 00044 struct Internal; 00045 Internal* internal; 00046 public: 00047 enum Store{ 00053 STORE_YES=1, 00055 STORE_NO=2, 00056 00064 STORE_COMPRESS=4 00065 }; 00066 00067 enum Index{ 00071 INDEX_NO=16, 00072 00077 INDEX_TOKENIZED=32, 00078 00083 INDEX_UNTOKENIZED=64, 00084 00095 INDEX_NONORMS=128 00096 }; 00097 00098 enum TermVector{ 00100 TERMVECTOR_NO=256, 00101 00104 TERMVECTOR_YES=512, 00105 00111 TERMVECTOR_WITH_POSITIONS = TERMVECTOR_YES | 1024, 00112 00118 TERMVECTOR_WITH_OFFSETS = TERMVECTOR_YES | 2048, 00119 00127 TERMVECTOR_WITH_POSITIONS_OFFSETS = TERMVECTOR_WITH_OFFSETS | TERMVECTOR_WITH_POSITIONS 00128 }; 00129 00130 enum { LAZY_YES = 4096 }; 00131 00132 Field(const TCHAR* name, const TCHAR* value, int _config); 00133 Field(const TCHAR* name, CL_NS(util)::Reader* reader, int _config); 00134 Field(const TCHAR* name, jstreams::StreamBase<char>* stream, int _config); 00135 ~Field(); 00136 00139 const TCHAR* name() const; 00140 00144 TCHAR* stringValue() const; 00145 00149 CL_NS(util)::Reader* readerValue() const; 00150 00154 jstreams::StreamBase<char>* streamValue() const; 00155 00159 CL_NS(analysis)::TokenStream* tokenStreamValue() const; 00160 00161 // True iff the value of the field is to be stored in the index for return 00162 // with search hits. It is an error for this to be true if a field is 00163 // Reader-valued. 00164 bool isStored() const; 00165 00166 // True iff the value of the field is to be indexed, so that it may be 00167 // searched on. 00168 bool isIndexed() const; 00169 00170 // True iff the value of the field should be tokenized as text prior to 00171 // indexing. Un-tokenized fields are indexed as a single word and may not be 00172 // Reader-valued. 00173 bool isTokenized() const; 00174 00181 bool isCompressed() const; 00182 00191 bool isTermVectorStored() const; 00192 00197 bool isStoreOffsetWithTermVector() const; 00198 00202 bool isStorePositionWithTermVector() const; 00203 00215 float_t getBoost() const; 00216 00233 void setBoost(const float_t value); 00234 00236 bool isBinary() const; 00237 00239 bool getOmitNorms() const; 00240 00246 void setOmitNorms(const bool omitNorms); 00247 00255 bool isLazy() const; 00256 00257 // Prints a Field for human consumption. 00258 TCHAR* toString(); 00259 00274 void setValue(const TCHAR* value); 00275 00277 void setValue(CL_NS(util)::Reader* value); 00278 00280 void setValue(jstreams::StreamBase<char>* value) ; 00281 00283 void setValue(CL_NS(analysis)::TokenStream* value); 00284 00285 protected: 00286 //Set configs using XOR. This resets all the settings 00287 //For example, to use term vectors with positions and offsets do: 00288 //object->setConfig(TERMVECTOR_WITH_POSITIONS | TERMVECTOR_WITH_OFFSETS); 00289 inline void setConfig(const uint32_t termVector); 00290 00291 inline void _resetValue(); 00292 }; 00293 CL_NS_END 00294 #endif