CLucene - a full-featured, c++ search engine
API Documentation
00001 /*------------------------------------------------------------------------------ 00002 * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team 00003 * 00004 * Distributable under the terms of either the Apache License (Version 2.0) or 00005 * the GNU Lesser General Public License, as specified in the COPYING file. 00006 ------------------------------------------------------------------------------*/ 00007 #ifndef _lucene_index_IndexWriter_ 00008 #define _lucene_index_IndexWriter_ 00009 00010 00011 //#include "CLucene/analysis/AnalysisHeader.h" 00012 #include "CLucene/util/VoidMapSetDefinitions.h" 00013 CL_CLASS_DEF(search,Similarity) 00014 CL_CLASS_DEF(store,Lock) 00015 CL_CLASS_DEF(store,TransactionalRAMDirectory) 00016 CL_CLASS_DEF(analysis,Analyzer) 00017 CL_CLASS_DEF(store,Directory) 00018 CL_CLASS_DEF(store,LuceneLock) 00019 CL_CLASS_DEF(document,Document) 00020 CL_CLASS_DEF(index,SegmentInfos) 00021 CL_CLASS_DEF(index,IndexReader) 00022 CL_CLASS_DEF(index,SegmentReader) 00023 CL_CLASS_DEF(index,SegmentInfos) 00024 00025 //#include "CLucene/store/TransactionalRAMDirectory.h" 00026 //#include "SegmentHeader.h" 00027 #include "CLucene/LuceneThreads.h" 00028 00029 CL_NS_DEF(index) 00030 00031 00054 class CLUCENE_EXPORT IndexWriter:LUCENE_BASE { 00055 bool isOpen; //indicates if the writers is open - this way close can be called multiple times 00056 00057 // how to analyze text 00058 CL_NS(analysis)::Analyzer* analyzer; 00059 00060 CL_NS(search)::Similarity* similarity; // how to normalize 00061 00067 bool useCompoundFile; 00068 bool closeDir; 00069 00070 CL_NS(store)::TransactionalRAMDirectory* ramDirectory; // for temp segs 00071 00072 CL_NS(store)::LuceneLock* writeLock; 00073 00074 void _IndexWriter(const bool create); 00075 00076 void _finalize(); 00077 00078 // where this index resides 00079 CL_NS(store)::Directory* directory; 00080 00081 00082 int32_t getSegmentsCounter(); 00083 int32_t maxFieldLength; 00084 int32_t mergeFactor; 00085 int32_t minMergeDocs; 00086 int32_t maxMergeDocs; 00087 int32_t termIndexInterval; 00088 00089 int64_t writeLockTimeout; 00090 int64_t commitLockTimeout; 00091 public: 00092 DEFINE_MUTEX(THIS_LOCK) 00093 00094 // Release the write lock, if needed. 00095 SegmentInfos* segmentInfos; 00096 00097 // Release the write lock, if needed. 00098 ~IndexWriter(); 00099 00113 LUCENE_STATIC_CONSTANT(int32_t, DEFAULT_MAX_FIELD_LENGTH = 10000); 00114 LUCENE_STATIC_CONSTANT(int32_t, FIELD_TRUNC_POLICY__WARN = -1); 00115 int32_t getMaxFieldLength() const{ return maxFieldLength; } 00116 void setMaxFieldLength(int32_t val){ maxFieldLength = val; } 00117 00121 LUCENE_STATIC_CONSTANT(int32_t, DEFAULT_MAX_BUFFERED_DOCS = 10); 00129 void setMaxBufferedDocs(int32_t val){ minMergeDocs = val; } 00133 int32_t getMaxBufferedDocs(){ return minMergeDocs; } 00134 00138 LUCENE_STATIC_CONSTANT(int64_t, WRITE_LOCK_TIMEOUT = 1000); 00142 void setWriteLockTimeout(int64_t writeLockTimeout) { this->writeLockTimeout = writeLockTimeout; } 00146 int64_t getWriteLockTimeout() { return writeLockTimeout; } 00147 00151 LUCENE_STATIC_CONSTANT(int64_t, COMMIT_LOCK_TIMEOUT = 10000); 00155 void setCommitLockTimeout(int64_t commitLockTimeout) { this->commitLockTimeout = commitLockTimeout; } 00159 int64_t getCommitLockTimeout() { return commitLockTimeout; } 00160 00161 static const char* WRITE_LOCK_NAME; //"write.lock"; 00162 static const char* COMMIT_LOCK_NAME; //"commit.lock"; 00163 00167 LUCENE_STATIC_CONSTANT(int32_t, DEFAULT_MERGE_FACTOR = 10); 00168 /* Determines how often segment indices are merged by addDocument(). With 00169 * smaller values, less RAM is used while indexing, and searches on 00170 * unoptimized indices are faster, but indexing speed is slower. With larger 00171 * values more RAM is used while indexing and searches on unoptimized indices 00172 * are slower, but indexing is faster. Thus larger values (> 10) are best 00173 * for batched index creation, and smaller values (< 10) for indices that are 00174 * interactively maintained. 00175 * 00176 * <p>This must never be less than 2. The default value is 10. 00177 */ 00178 int32_t getMergeFactor() const{ return mergeFactor; } 00179 void setMergeFactor(int32_t val){ mergeFactor = val; } 00180 00181 00188 LUCENE_STATIC_CONSTANT(int32_t, DEFAULT_TERM_INDEX_INTERVAL = 128); 00210 void setTermIndexInterval(int32_t interval) { termIndexInterval = interval; } 00215 int32_t getTermIndexInterval() { return termIndexInterval; } 00216 00224 int32_t getMinMergeDocs() const{ return minMergeDocs; } 00225 void setMinMergeDocs(int32_t val){ minMergeDocs = val; } 00226 00234 LUCENE_STATIC_CONSTANT(int32_t, DEFAULT_MAX_MERGE_DOCS = 0x7FFFFFFFL); 00242 int32_t getMaxMergeDocs() const{ return maxMergeDocs; } 00243 void setMaxMergeDocs(int32_t val){ maxMergeDocs = val; } 00244 00260 IndexWriter(const char* path, CL_NS(analysis)::Analyzer* a, const bool create, const bool closeDir=true); 00261 00262 00268 IndexWriter(CL_NS(store)::Directory* d, CL_NS(analysis)::Analyzer* a, const bool create, const bool closeDir=false); 00269 00274 void close(); 00275 00279 int32_t docCount(); 00280 00281 00288 void addDocument(CL_NS(document)::Document* doc, CL_NS(analysis)::Analyzer* analyzer=NULL); 00289 00290 00295 void optimize(); 00296 00297 00309 void addIndexes(CL_NS(store)::Directory** dirs); 00310 00315 void addIndexes(IndexReader** readers); 00316 00317 00319 CL_NS(store)::Directory* getDirectory() { return directory; } 00320 00326 bool getUseCompoundFile() { return useCompoundFile; } 00327 00332 void setUseCompoundFile(bool value) { useCompoundFile = value; } 00333 00334 00339 void setSimilarity(CL_NS(search)::Similarity* similarity) { this->similarity = similarity; } 00340 00345 CL_NS(search)::Similarity* getSimilarity() { return this->similarity; } 00346 00348 CL_NS(analysis)::Analyzer* getAnalyzer() { return analyzer; } 00349 00350 private: 00351 class LockWith2; 00352 class LockWithCFS; 00353 friend class LockWith2; 00354 friend class LockWithCFS; 00355 00357 void flushRamSegments(); 00358 00360 void maybeMergeSegments(); 00361 00365 void mergeSegments(const uint32_t minSegment); 00366 00369 void mergeSegments(const uint32_t minSegment, const uint32_t end); 00370 00371 void deleteFiles(AStringArrayWithDeletor& files); 00372 void readDeleteableFiles(AStringArrayWithDeletor& files); 00373 void writeDeleteableFiles(AStringArrayWithDeletor& files); 00374 00375 /* 00376 * Some operating systems (e.g. Windows) don't permit a file to be deleted 00377 * while it is opened for read (e.g. by another process or thread). So we 00378 * assume that when a delete fails it is because the file is open in another 00379 * process, and queue the file for subsequent deletion. 00380 */ 00381 void deleteSegments(CL_NS(util)::CLVector<SegmentReader*>* segments); 00382 void deleteFiles(AStringArrayWithDeletor& files, CL_NS(store)::Directory* directory); 00383 void deleteFiles(AStringArrayWithDeletor& files, AStringArrayWithDeletor& deletable); 00384 00385 00386 // synchronized 00387 char* newSegmentName(); 00388 }; 00389 00390 CL_NS_END 00391 #endif