CLucene - a full-featured, c++ search engine
API Documentation
00001 /*------------------------------------------------------------------------------ 00002 * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team 00003 * 00004 * Distributable under the terms of either the Apache License (Version 2.0) or 00005 * the GNU Lesser General Public License, as specified in the COPYING file. 00006 ------------------------------------------------------------------------------*/ 00007 #ifndef _lucene_search_Similarity_ 00008 #define _lucene_search_Similarity_ 00009 00010 CL_CLASS_DEF(index,Term) 00011 #include "CLucene/util/VoidMapSetDefinitions.h" 00012 00013 CL_NS_DEF(search) 00014 00015 class Searcher;//save including the searchheader.h 00016 class DefaultSimilarity; 00017 00051 class CLUCENE_EXPORT Similarity:LUCENE_BASE { 00052 public: 00053 virtual ~Similarity(); 00054 00061 static void setDefault(Similarity* similarity); 00062 00071 static Similarity* getDefault(); 00072 00085 static uint8_t encodeNorm(float_t f); 00086 00090 static float_t decodeNorm(uint8_t b); 00091 00092 static uint8_t floatToByte(float_t f); 00093 static float_t byteToFloat(uint8_t b); 00094 00104 float_t idf(CL_NS(util)::CLVector<CL_NS(index)::Term*>* terms, Searcher* searcher); 00105 //float_t idf(Term** terms, Searcher* searcher); 00106 00107 00123 float_t idf(CL_NS(index)::Term* term, Searcher* searcher); 00124 00125 00141 inline float_t tf(int32_t freq){ return tf((float_t)freq); } 00142 00164 virtual float_t lengthNorm(const TCHAR* fieldName, int32_t numTokens) = 0; 00165 00176 virtual float_t queryNorm(float_t sumOfSquaredWeights) = 0; 00177 00191 virtual float_t sloppyFreq(int32_t distance) = 0; 00192 00206 virtual float_t tf(float_t freq) = 0; 00207 00221 virtual float_t idf(int32_t docFreq, int32_t numDocs) = 0; 00222 00235 virtual float_t coord(int32_t overlap, int32_t maxOverlap) = 0; 00236 }; 00237 00238 00240 class DefaultSimilarity: public Similarity { 00241 public: 00242 DefaultSimilarity(); 00243 ~DefaultSimilarity(); 00244 00246 float_t lengthNorm(const TCHAR* fieldName, int32_t numTerms); 00247 00249 float_t queryNorm(float_t sumOfSquaredWeights); 00250 00252 inline float_t tf(float_t freq); 00253 00255 float_t sloppyFreq(int32_t distance); 00256 00258 float_t idf(int32_t docFreq, int32_t numDocs); 00259 00261 float_t coord(int32_t overlap, int32_t maxOverlap); 00262 }; 00263 00264 CL_NS_END 00265 #endif