00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #ifndef XAPIAN_INCLUDED_ENQUIRE_H
00027 #define XAPIAN_INCLUDED_ENQUIRE_H
00028
00029 #include <string>
00030 #include <time.h>
00031
00032 #include <xapian/base.h>
00033 #include <xapian/types.h>
00034
00035 namespace Xapian {
00036
00037 class Database;
00038 class Document;
00039 class ErrorHandler;
00040 class MSetIterator;
00041 class Query;
00042 class TermIterator;
00043 class Weight;
00044
00048 class MSet {
00049 public:
00050 class Internal;
00052 Xapian::Internal::RefCntPtr<Internal> internal;
00053
00054 public:
00055
00057
00058
00060 MSet();
00061
00063 ~MSet();
00064
00066 MSet(const MSet & other);
00067
00069 void operator=(const MSet &other);
00070
00086 void fetch(const MSetIterator &begin, const MSetIterator &end) const;
00087
00090 void fetch(const MSetIterator &item) const;
00091
00094 void fetch() const;
00095
00100 Xapian::percent convert_to_percent(Xapian::weight wt) const;
00101
00103 Xapian::percent convert_to_percent(const MSetIterator &it) const;
00104
00112 Xapian::doccount get_termfreq(const std::string &tname) const;
00113
00121 Xapian::weight get_termweight(const std::string &tname) const;
00122
00130 Xapian::doccount get_firstitem() const;
00131
00141 Xapian::doccount get_matches_lower_bound() const;
00142
00155 Xapian::doccount get_matches_estimated() const;
00156
00166 Xapian::doccount get_matches_upper_bound() const;
00167
00173 Xapian::weight get_max_possible() const;
00174
00188 Xapian::weight get_max_attained() const;
00189
00191 Xapian::termcount size() const;
00192
00193 Xapian::doccount max_size() const;
00194
00196 bool empty() const;
00197
00199 void swap(MSet & other);
00200
00202 MSetIterator begin() const;
00203
00205 MSetIterator end() const;
00206
00208 MSetIterator back() const;
00209
00219 MSetIterator operator[](Xapian::doccount i) const;
00220
00222
00223 typedef MSetIterator value_type;
00224 typedef MSetIterator iterator;
00225 typedef MSetIterator const_iterator;
00226 typedef MSetIterator & reference;
00227 typedef MSetIterator & const_reference;
00228 typedef MSetIterator * pointer;
00229 typedef Xapian::doccount_diff difference_type;
00230 typedef Xapian::doccount size_type;
00232
00236 std::string get_description() const;
00237 };
00238
00242 class MSetIterator {
00243 private:
00244 friend class MSet;
00245 friend bool operator==(const MSetIterator &a, const MSetIterator &b);
00246 friend bool operator!=(const MSetIterator &a, const MSetIterator &b);
00247
00248 MSetIterator(Xapian::doccount index_, const MSet & mset_)
00249 : index(index_), mset(mset_) { }
00250
00251 Xapian::doccount index;
00252 MSet mset;
00253
00254 public:
00258 MSetIterator() : index(0), mset() { }
00259
00260 ~MSetIterator() { }
00261
00263 MSetIterator(const MSetIterator &other) {
00264 index = other.index;
00265 mset = other.mset;
00266 }
00267
00269 void operator=(const MSetIterator &other) {
00270 index = other.index;
00271 mset = other.mset;
00272 }
00273
00275 MSetIterator & operator++() {
00276 ++index;
00277 return *this;
00278 }
00279
00281 MSetIterator operator++(int) {
00282 MSetIterator tmp = *this;
00283 ++index;
00284 return tmp;
00285 }
00286
00288 MSetIterator & operator--() {
00289 --index;
00290 return *this;
00291 }
00292
00294 MSetIterator operator--(int) {
00295 MSetIterator tmp = *this;
00296 --index;
00297 return tmp;
00298 }
00299
00301 Xapian::docid operator*() const;
00302
00321 Xapian::Document get_document() const;
00322
00329 Xapian::doccount get_rank() const {
00330 return mset.get_firstitem() + index;
00331 }
00332
00334 Xapian::weight get_weight() const;
00335
00352 Xapian::doccount get_collapse_count() const;
00353
00359 Xapian::percent get_percent() const;
00360
00364 std::string get_description() const;
00365
00367
00368 typedef std::bidirectional_iterator_tag iterator_category;
00369 typedef Xapian::docid value_type;
00370 typedef Xapian::doccount_diff difference_type;
00371 typedef Xapian::docid * pointer;
00372 typedef Xapian::docid & reference;
00374 };
00375
00376 inline bool operator==(const MSetIterator &a, const MSetIterator &b)
00377 {
00378 return (a.index == b.index);
00379 }
00380
00381 inline bool operator!=(const MSetIterator &a, const MSetIterator &b)
00382 {
00383 return (a.index != b.index);
00384 }
00385
00386 class ESetIterator;
00387
00392 class ESet {
00393 public:
00394 class Internal;
00396 Xapian::Internal::RefCntPtr<Internal> internal;
00397
00399 ESet();
00400
00402 ~ESet();
00403
00405 ESet(const ESet & other);
00406
00408 void operator=(const ESet &other);
00409
00414 Xapian::termcount get_ebound() const;
00415
00417 Xapian::termcount size() const;
00418
00420 bool empty() const;
00421
00423 void swap(ESet & other);
00424
00426 ESetIterator begin() const;
00427
00429 ESetIterator end() const;
00430
00432 ESetIterator back() const;
00433
00435 ESetIterator operator[](Xapian::doccount i) const;
00436
00441 std::string get_description() const;
00442 };
00443
00445 class ESetIterator {
00446 private:
00447 friend class ESet;
00448 friend bool operator==(const ESetIterator &a, const ESetIterator &b);
00449 friend bool operator!=(const ESetIterator &a, const ESetIterator &b);
00450
00451 ESetIterator(Xapian::termcount index_, const ESet & eset_)
00452 : index(index_), eset(eset_) { }
00453
00454 Xapian::termcount index;
00455 ESet eset;
00456
00457 public:
00461 ESetIterator() : index(0), eset() { }
00462
00463 ~ESetIterator() { }
00464
00466 ESetIterator(const ESetIterator &other) {
00467 index = other.index;
00468 eset = other.eset;
00469 }
00470
00472 void operator=(const ESetIterator &other) {
00473 index = other.index;
00474 eset = other.eset;
00475 }
00476
00478 ESetIterator & operator++() {
00479 ++index;
00480 return *this;
00481 }
00482
00484 ESetIterator operator++(int) {
00485 ESetIterator tmp = *this;
00486 ++index;
00487 return tmp;
00488 }
00489
00491 ESetIterator & operator--() {
00492 --index;
00493 return *this;
00494 }
00495
00497 ESetIterator operator--(int) {
00498 ESetIterator tmp = *this;
00499 --index;
00500 return tmp;
00501 }
00502
00504 const std::string & operator *() const;
00505
00507 Xapian::weight get_weight() const;
00508
00512 std::string get_description() const;
00513
00515
00516 typedef std::bidirectional_iterator_tag iterator_category;
00517 typedef std::string value_type;
00518 typedef Xapian::termcount_diff difference_type;
00519 typedef std::string * pointer;
00520 typedef std::string & reference;
00522 };
00523
00524 inline bool operator==(const ESetIterator &a, const ESetIterator &b)
00525 {
00526 return (a.index == b.index);
00527 }
00528
00529 inline bool operator!=(const ESetIterator &a, const ESetIterator &b)
00530 {
00531 return (a.index != b.index);
00532 }
00533
00538 class RSet {
00539 public:
00541 class Internal;
00542
00544 Internal *internal;
00545
00547 RSet(const RSet &rset);
00548
00550 void operator=(const RSet &rset);
00551
00553 RSet();
00554
00556 ~RSet();
00557
00559 Xapian::doccount size() const;
00560
00562 bool empty() const;
00563
00565 void add_document(Xapian::docid did);
00566
00568 void add_document(const Xapian::MSetIterator & i) { add_document(*i); }
00569
00571 void remove_document(Xapian::docid did);
00572
00574 void remove_document(const Xapian::MSetIterator & i) { remove_document(*i); }
00575
00577 bool contains(Xapian::docid did) const;
00578
00580 bool contains(const Xapian::MSetIterator & i) { return contains(*i); }
00581
00586 std::string get_description() const;
00587 };
00588
00591 class MatchDecider {
00592 public:
00595 virtual int operator()(const Xapian::Document &doc) const = 0;
00596
00598 virtual ~MatchDecider() {}
00599 };
00600
00603 class ExpandDecider {
00604 public:
00607 virtual int operator()(const std::string & tname) const = 0;
00608
00610 virtual ~ExpandDecider() {}
00611 };
00612
00623 class Enquire {
00624 private:
00626 Enquire(const Enquire &);
00627
00629 void operator=(const Enquire &);
00630
00631 public:
00632 class Internal;
00634 Xapian::Internal::RefCntPtr<Internal> internal;
00635
00651 Enquire(const Database &databases, ErrorHandler * errorhandler_ = 0);
00652
00655 ~Enquire();
00656
00663 void set_query(const Xapian::Query & query_);
00664
00671 const Xapian::Query & get_query();
00672
00679 void set_weighting_scheme(const Weight &weight_);
00680
00707 void set_collapse_key(Xapian::valueno collapse_key);
00708
00715 void set_sort_forward(bool sort_forward);
00716
00734 void set_cutoff(Xapian::percent percent_cutoff, Xapian::weight weight_cutoff = 0);
00735
00752 void set_sorting(Xapian::valueno sort_key, int sort_bands,
00753 bool sort_by_relevance = false);
00754
00766 void set_bias(Xapian::weight bias_weight, time_t bias_halflife);
00767
00793 MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00794 Xapian::doccount checkatleast = 0,
00795 const RSet * omrset = 0,
00796 const MatchDecider * mdecider = 0) const;
00797 MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00798 const RSet * omrset,
00799 const MatchDecider * mdecider = 0) const {
00800 return get_mset(first, maxitems, 0, omrset, mdecider);
00801 }
00802
00803 static const int include_query_terms = 1;
00804 static const int use_exact_termfreq = 2;
00827 ESet get_eset(Xapian::termcount maxitems,
00828 const RSet & omrset,
00829 int flags = 0,
00830 double k = 1.0,
00831 const Xapian::ExpandDecider * edecider = 0) const;
00832
00846 inline ESet get_eset(Xapian::termcount maxitems, const RSet & omrset,
00847 const Xapian::ExpandDecider * edecider) const {
00848 return get_eset(maxitems, omrset, 0, 1.0, edecider);
00849 }
00850
00879 TermIterator get_matching_terms_begin(Xapian::docid did) const;
00880
00882 TermIterator get_matching_terms_end(Xapian::docid did) const;
00883
00906 TermIterator get_matching_terms_begin(const MSetIterator &it) const;
00907
00909 TermIterator get_matching_terms_end(const MSetIterator &it) const;
00910
00913 void register_match_decider(const std::string &name,
00914 const MatchDecider *mdecider = NULL);
00915
00919 std::string get_description() const;
00920 };
00921
00922 }
00923
00924 class SocketServer;
00925
00926 namespace Xapian {
00927
00929 class Weight {
00930 friend class Enquire;
00931 friend class ::SocketServer;
00932 public:
00933 class Internal;
00934 protected:
00935 Weight(const Weight &);
00936 private:
00937 void operator=(Weight &);
00938
00946 virtual Weight * clone() const = 0;
00947
00948 protected:
00949 const Internal * internal;
00950 Xapian::doclength querysize;
00951 Xapian::termcount wqf;
00952 std::string tname;
00953
00954 public:
00955 Weight() { }
00956 virtual ~Weight() { }
00957
00970 Weight * create(const Internal * internal_, Xapian::doclength querysize_,
00971 Xapian::termcount wqf_, std::string tname_) const {
00972 Weight * wt = clone();
00973 wt->internal = internal_;
00974 wt->querysize = querysize_;
00975 wt->wqf = wqf_;
00976 wt->tname = tname_;
00977 return wt;
00978 }
00979
00984 virtual std::string name() const = 0;
00985
00987 virtual std::string serialise() const = 0;
00988
00990 virtual Weight * unserialise(const std::string &s) const = 0;
00991
00999 virtual Xapian::weight get_sumpart(Xapian::termcount wdf,
01000 Xapian::doclength len) const = 0;
01001
01007 virtual Xapian::weight get_maxpart() const = 0;
01008
01017 virtual Xapian::weight get_sumextra(Xapian::doclength len) const = 0;
01018
01022 virtual Xapian::weight get_maxextra() const = 0;
01023
01025 virtual bool get_sumpart_needs_doclength() const { return true; }
01026 };
01027
01029 class BoolWeight : public Weight {
01030 public:
01031 Weight * clone() const {
01032 return new BoolWeight;
01033 }
01034 BoolWeight() { }
01035 ~BoolWeight() { }
01036 std::string name() const { return "Bool"; }
01037 std::string serialise() const { return ""; }
01038 Weight * unserialise(const std::string & ) const {
01039 return new BoolWeight;
01040 }
01041 Xapian::weight get_sumpart(Xapian::termcount , Xapian::doclength ) const { return 0; }
01042 Xapian::weight get_maxpart() const { return 0; }
01043
01044 Xapian::weight get_sumextra(Xapian::doclength ) const { return 0; }
01045 Xapian::weight get_maxextra() const { return 0; }
01046
01047 bool get_sumpart_needs_doclength() const { return false; }
01048 };
01049
01062 class BM25Weight : public Weight {
01063 private:
01064 mutable Xapian::weight termweight;
01065 mutable Xapian::doclength lenpart;
01066 mutable double BD;
01067
01068 double A, B, C, D;
01069 Xapian::doclength min_normlen;
01070
01071 mutable bool weight_calculated;
01072
01073 void calc_termweight() const;
01074
01075 public:
01094 BM25Weight(double A_, double B_, double C_, double D_,
01095 double min_normlen_)
01096 : A(A_), B(B_), C(C_), D(D_), min_normlen(min_normlen_),
01097 weight_calculated(false)
01098 {
01099 if (A < 0) A = 0;
01100 if (B < 0) B = 0;
01101 if (C < 0) C = 0;
01102 if (D < 0) D = 0; else if (D > 1) D = 1;
01103 }
01104 BM25Weight() : A(1), B(1), C(0), D(0.5), min_normlen(0.5),
01105 weight_calculated(false) { }
01106
01107 Weight * clone() const {
01108 return new BM25Weight(A, B, C, D, min_normlen);
01109 }
01110 ~BM25Weight() { }
01111 std::string name() const { return "BM25"; }
01112 std::string serialise() const;
01113 Weight * unserialise(const std::string & s) const;
01114 Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01115 Xapian::weight get_maxpart() const;
01116
01117 Xapian::weight get_sumextra(Xapian::doclength len) const;
01118 Xapian::weight get_maxextra() const;
01119
01120 bool get_sumpart_needs_doclength() const { return (lenpart != 0); }
01121 };
01122
01135 class TradWeight : public Weight {
01136 private:
01137 mutable Xapian::weight termweight;
01138 mutable Xapian::doclength lenpart;
01139
01140 double param_k;
01141
01142 mutable bool weight_calculated;
01143
01144 void calc_termweight() const;
01145
01146 public:
01154 explicit TradWeight(double k) : param_k(k), weight_calculated(false) {
01155 if (param_k < 0) param_k = 0;
01156 }
01157
01158 TradWeight() : param_k(1.0), weight_calculated(false) { }
01159
01160 Weight * clone() const {
01161 return new TradWeight(param_k);
01162 }
01163 ~TradWeight() { }
01164 std::string name() const { return "Trad"; }
01165 std::string serialise() const;
01166 Weight * unserialise(const std::string & s) const;
01167
01168 Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01169 Xapian::weight get_maxpart() const;
01170
01171 Xapian::weight get_sumextra(Xapian::doclength len) const;
01172 Xapian::weight get_maxextra() const;
01173
01174 bool get_sumpart_needs_doclength() const { return (lenpart != 0); }
01175 };
01176
01177 }
01178
01179 #endif