CLucene - a full-featured, c++ search engine
API Documentation
00001 /*------------------------------------------------------------------------------ 00002 * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team 00003 * 00004 * Distributable under the terms of either the Apache License (Version 2.0) or 00005 * the GNU Lesser General Public License, as specified in the COPYING file. 00006 ------------------------------------------------------------------------------*/ 00007 #ifndef _lucene_util_Reader_ 00008 #define _lucene_util_Reader_ 00009 00010 00011 #include "streambase.h" 00012 #include "stringreader.h" 00013 #include "fileinputstream.h" 00014 #include "bufferedstream.h" 00015 00016 CL_NS_DEF(util) 00020 class CLUCENE_EXPORT Reader:LUCENE_BASE { 00021 typedef jstreams::StreamBase<TCHAR> jsReader; 00022 public: 00023 bool deleteReader; 00024 jsReader* reader; 00025 00026 Reader(jsReader* reader, bool deleteReader){ 00027 this->reader = reader; 00028 this->deleteReader = deleteReader; 00029 } 00030 virtual ~Reader(){ 00031 if ( deleteReader ) 00032 delete reader; 00033 reader = NULL; 00034 } 00035 inline int read(){ 00036 const TCHAR*b; 00037 const int32_t nread = reader->read(b, 1,1); 00038 if ( nread < -1 ) //if not eof 00039 _CLTHROWA(CL_ERR_IO,reader->getError() ); 00040 else if ( nread == -1 ) 00041 return -1; 00042 else 00043 return b[0]; 00044 } 00045 00046 // Read one line, return the length of the line read 00047 inline int32_t readLine(TCHAR* buffer){ 00048 int32_t i = 0; 00049 while (true) { 00050 int32_t b = read(); 00051 if (b < 1) 00052 break; 00053 if (b == '\n' || b == '\r') { 00054 if (i > 0) 00055 break; 00056 else 00057 continue; 00058 } 00059 buffer[i++] = b; 00060 } 00061 buffer[i] = 0; 00062 return i; 00063 } 00067 inline int32_t read(const TCHAR*& start){ 00068 int32_t nread = reader->read(start,1,0); 00069 if ( nread < -1 ) //if not eof 00070 _CLTHROWA(CL_ERR_IO,reader->getError()); 00071 else 00072 return nread; 00073 } 00074 inline int32_t read(const TCHAR*& start, int32_t len){ 00075 int32_t nread = reader->read(start, len, len); 00076 if ( nread < -1 ) //if not eof 00077 _CLTHROWA(CL_ERR_IO,reader->getError()); 00078 else 00079 return nread; 00080 } 00081 inline int64_t skip(int64_t ntoskip){ 00082 int64_t skipped = reader->skip(ntoskip); 00083 if ( skipped < 0 ) 00084 _CLTHROWA(CL_ERR_IO,reader->getError()); 00085 else 00086 return skipped; 00087 } 00088 inline int64_t mark(int32_t readAheadlimit){ 00089 int64_t pos = reader->mark(readAheadlimit); 00090 if ( pos < 0 ) 00091 _CLTHROWA(CL_ERR_IO,reader->getError()); 00092 else 00093 return pos; 00094 } 00095 int64_t reset(int64_t pos){ 00096 int64_t r = reader->reset(pos); 00097 if ( r < 0 ) 00098 _CLTHROWA(CL_ERR_IO,reader->getError()); 00099 else 00100 return r; 00101 } 00102 }; 00103 00105 class StringReader: public Reader{ 00106 public: 00107 StringReader ( const TCHAR* value ); 00108 StringReader ( const TCHAR* value, const int32_t length ); 00109 StringReader ( const TCHAR* value, const int32_t length, bool copyData ); 00110 ~StringReader(); 00111 }; 00112 00117 class SimpleInputStreamReader: public jstreams::BufferedInputStream<TCHAR>{ 00118 int32_t decode(TCHAR* start, int32_t space); 00119 int encoding; 00120 enum{ 00121 ASCII=1, 00122 UTF8=2, 00123 UCS2_LE=3 00124 }; 00125 bool finishedDecoding; 00126 jstreams::StreamBase<char>* input; 00127 int32_t charsLeft; 00128 00129 jstreams::InputStreamBuffer<char> charbuf; 00130 int32_t fillBuffer(TCHAR* start, int32_t space); 00131 public: 00132 SimpleInputStreamReader(jstreams::StreamBase<char> *i, const char* encoding); 00133 ~SimpleInputStreamReader(); 00134 }; 00135 00140 class FileReader: public Reader{ 00141 jstreams::FileInputStream* input; 00142 public: 00143 FileReader ( const char* path, const char* enc, 00144 const int32_t cachelen = 13, 00145 const int32_t cachebuff = 14 ); //todo: optimise these cache values 00146 ~FileReader (); 00147 00148 int32_t read(const TCHAR*& start, int32_t _min, int32_t _max); 00149 int64_t mark(int32_t readlimit); 00150 int64_t reset(int64_t); 00151 }; 00152 00153 CL_NS_END 00154 #endif