00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #ifndef XAPIAN_INCLUDED_ENQUIRE_H
00027 #define XAPIAN_INCLUDED_ENQUIRE_H
00028
00029 #include <string>
00030 #include <time.h>
00031
00032 #include <xapian/base.h>
00033 #include <xapian/types.h>
00034
00035 namespace Xapian {
00036
00037 class Database;
00038 class Document;
00039 class ErrorHandler;
00040 class MSetIterator;
00041 class Query;
00042 class TermIterator;
00043 class Weight;
00044
00048 class MSet {
00049 public:
00050 class Internal;
00052 Xapian::Internal::RefCntPtr<Internal> internal;
00053
00054 public:
00055
00057
00058
00060 MSet();
00061
00063 ~MSet();
00064
00066 MSet(const MSet & other);
00067
00069 void operator=(const MSet &other);
00070
00086 void fetch(const MSetIterator &begin, const MSetIterator &end) const;
00087
00090 void fetch(const MSetIterator &item) const;
00091
00094 void fetch() const;
00095
00100 Xapian::percent convert_to_percent(Xapian::weight wt) const;
00101
00103 Xapian::percent convert_to_percent(const MSetIterator &it) const;
00104
00112 Xapian::doccount get_termfreq(const std::string &tname) const;
00113
00121 Xapian::weight get_termweight(const std::string &tname) const;
00122
00130 Xapian::doccount get_firstitem() const;
00131
00138 Xapian::doccount get_matches_lower_bound() const;
00139
00149 Xapian::doccount get_matches_estimated() const;
00150
00157 Xapian::doccount get_matches_upper_bound() const;
00158
00164 Xapian::weight get_max_possible() const;
00165
00179 Xapian::weight get_max_attained() const;
00180
00181 Xapian::doccount size() const;
00182
00183 Xapian::doccount max_size() const;
00184
00185 bool empty() const;
00186
00187 void swap(MSet & other);
00188
00189 MSetIterator begin() const;
00190
00191 MSetIterator end() const;
00192
00193 MSetIterator back() const;
00194
00204 MSetIterator operator[](Xapian::doccount i) const;
00205
00207
00208 typedef MSetIterator value_type;
00209 typedef MSetIterator iterator;
00210 typedef MSetIterator const_iterator;
00211 typedef MSetIterator & reference;
00212 typedef MSetIterator & const_reference;
00213 typedef MSetIterator * pointer;
00214 typedef Xapian::doccount_diff difference_type;
00215 typedef Xapian::doccount size_type;
00217
00221 std::string get_description() const;
00222 };
00223
00227 class MSetIterator {
00228 private:
00229 friend class MSet;
00230 friend bool operator==(const MSetIterator &a, const MSetIterator &b);
00231 friend bool operator!=(const MSetIterator &a, const MSetIterator &b);
00232
00233 MSetIterator(Xapian::doccount index_, const MSet & mset_)
00234 : index(index_), mset(mset_) { }
00235
00236 Xapian::doccount index;
00237 MSet mset;
00238
00239 public:
00243 MSetIterator() : index(0), mset() { }
00244
00245 ~MSetIterator() { }
00246
00248 MSetIterator(const MSetIterator &other) {
00249 index = other.index;
00250 mset = other.mset;
00251 }
00252
00254 void operator=(const MSetIterator &other) {
00255 index = other.index;
00256 mset = other.mset;
00257 }
00258
00260 MSetIterator & operator++() {
00261 ++index;
00262 return *this;
00263 }
00264
00265 void operator++(int) {
00266 ++index;
00267 }
00268
00270 Xapian::docid operator*() const;
00271
00290 Xapian::Document get_document() const;
00291
00298 Xapian::doccount get_rank() const {
00299 return mset.get_firstitem() + index;
00300 }
00301
00303 Xapian::weight get_weight() const;
00304
00321 Xapian::doccount get_collapse_count() const;
00322
00328 Xapian::percent get_percent() const;
00329
00333 std::string get_description() const;
00334
00336
00337 typedef std::input_iterator_tag iterator_category;
00338 typedef Xapian::docid value_type;
00339 typedef Xapian::doccount_diff difference_type;
00340 typedef Xapian::docid * pointer;
00341 typedef Xapian::docid & reference;
00343 };
00344
00345 inline bool operator==(const MSetIterator &a, const MSetIterator &b)
00346 {
00347 return (a.index == b.index);
00348 }
00349
00350 inline bool operator!=(const MSetIterator &a, const MSetIterator &b)
00351 {
00352 return (a.index != b.index);
00353 }
00354
00355 class ESetIterator;
00356
00361 class ESet {
00362 public:
00363 class Internal;
00365 Xapian::Internal::RefCntPtr<Internal> internal;
00366
00368 ESet();
00369
00371 ~ESet();
00372
00374 ESet(const ESet & other);
00375
00377 void operator=(const ESet &other);
00378
00383 Xapian::termcount get_ebound() const;
00384
00386 Xapian::termcount size() const;
00387
00389 bool empty() const;
00390
00392 ESetIterator begin() const;
00393
00395 ESetIterator end() const;
00396
00401 std::string get_description() const;
00402 };
00403
00405 class ESetIterator {
00406 private:
00407 friend class ESet;
00408 friend bool operator==(const ESetIterator &a, const ESetIterator &b);
00409 friend bool operator!=(const ESetIterator &a, const ESetIterator &b);
00410
00411 ESetIterator(Xapian::termcount index_, const ESet & eset_)
00412 : index(index_), eset(eset_) { }
00413
00414 Xapian::termcount index;
00415 ESet eset;
00416
00417 public:
00421 ESetIterator() : index(0), eset() { }
00422
00423 ~ESetIterator() { }
00424
00426 ESetIterator(const ESetIterator &other) {
00427 index = other.index;
00428 eset = other.eset;
00429 }
00430
00432 void operator=(const ESetIterator &other) {
00433 index = other.index;
00434 eset = other.eset;
00435 }
00436
00438 ESetIterator & operator++() {
00439 ++index;
00440 return *this;
00441 }
00442
00443 void operator++(int) {
00444 ++index;
00445 }
00446
00448 const std::string & operator *() const;
00449
00451 Xapian::weight get_weight() const;
00452
00456 std::string get_description() const;
00457
00459
00460 typedef std::input_iterator_tag iterator_category;
00461 typedef std::string value_type;
00462 typedef Xapian::termcount_diff difference_type;
00463 typedef std::string * pointer;
00464 typedef std::string & reference;
00466 };
00467
00468 inline bool operator==(const ESetIterator &a, const ESetIterator &b)
00469 {
00470 return (a.index == b.index);
00471 }
00472
00473 inline bool operator!=(const ESetIterator &a, const ESetIterator &b)
00474 {
00475 return (a.index != b.index);
00476 }
00477
00482 class RSet {
00483 public:
00485 class Internal;
00486
00488 Internal *internal;
00489
00491 RSet(const RSet &rset);
00492
00494 void operator=(const RSet &rset);
00495
00497 RSet();
00498
00500 ~RSet();
00501
00503 Xapian::doccount size() const;
00504
00506 bool empty() const;
00507
00509 void add_document(Xapian::docid did);
00510
00512 void add_document(const Xapian::MSetIterator & i) { add_document(*i); }
00513
00515 void remove_document(Xapian::docid did);
00516
00518 void remove_document(const Xapian::MSetIterator & i) { remove_document(*i); }
00519
00521 bool contains(Xapian::docid did) const;
00522
00524 bool contains(const Xapian::MSetIterator & i) { return contains(*i); }
00525
00530 std::string get_description() const;
00531 };
00532
00535 class MatchDecider {
00536 public:
00539 virtual int operator()(const Xapian::Document &doc) const = 0;
00540
00542 virtual ~MatchDecider() {}
00543 };
00544
00547 class ExpandDecider {
00548 public:
00551 virtual int operator()(const std::string & tname) const = 0;
00552
00554 virtual ~ExpandDecider() {}
00555 };
00556
00570 class Enquire {
00571 private:
00573 Enquire(const Enquire &);
00574
00576 void operator=(const Enquire &);
00577
00578 public:
00579 class Internal;
00581 Xapian::Internal::RefCntPtr<Internal> internal;
00582
00598 Enquire(const Database &databases, ErrorHandler * errorhandler_ = 0);
00599
00602 ~Enquire();
00603
00611 void set_query(const Xapian::Query & query_);
00612
00619 const Xapian::Query & get_query();
00620
00627 void set_weighting_scheme(const Weight &weight_);
00628
00655 void set_collapse_key(Xapian::valueno collapse_key);
00656
00663 void set_sort_forward(bool sort_forward);
00664
00682 void set_cutoff(Xapian::percent percent_cutoff, Xapian::weight weight_cutoff = 0);
00683
00697 void set_sorting(Xapian::valueno sort_key, int sort_bands);
00698
00710 void set_bias(Xapian::weight bias_weight, time_t bias_halflife);
00711
00732 MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00733 const RSet * omrset = 0,
00734 const MatchDecider * mdecider = 0) const;
00735
00736 static const int include_query_terms = 1;
00737 static const int use_exact_termfreq = 2;
00761 ESet get_eset(Xapian::termcount maxitems,
00762 const RSet & omrset,
00763 int flags = 0,
00764 double k = 1.0,
00765 const Xapian::ExpandDecider * edecider = 0) const;
00766
00781 inline ESet get_eset(Xapian::termcount maxitems, const RSet & omrset,
00782 const Xapian::ExpandDecider * edecider) const {
00783 return get_eset(maxitems, omrset, 0, 1.0, edecider);
00784 }
00785
00815 TermIterator get_matching_terms_begin(Xapian::docid did) const;
00816
00818 TermIterator get_matching_terms_end(Xapian::docid did) const;
00819
00843 TermIterator get_matching_terms_begin(const MSetIterator &it) const;
00844
00846 TermIterator get_matching_terms_end(const MSetIterator &it) const;
00847
00850 void register_match_decider(const std::string &name,
00851 const MatchDecider *mdecider = NULL);
00852
00856 std::string get_description() const;
00857 };
00858
00859 }
00860
00861 class SocketServer;
00862
00863 namespace Xapian {
00864
00866 class Weight {
00867 friend class Enquire;
00868 friend class ::SocketServer;
00869 public:
00870 class Internal;
00871 protected:
00872 Weight(const Weight &);
00873 private:
00874 void operator=(Weight &);
00875
00883 virtual Weight * clone() const = 0;
00884
00885 protected:
00886 const Internal * internal;
00887 Xapian::doclength querysize;
00888 Xapian::termcount wqf;
00889 std::string tname;
00890
00891 public:
00892 Weight() { }
00893 virtual ~Weight() { }
00894
00907 Weight * create(const Internal * internal_, Xapian::doclength querysize_,
00908 Xapian::termcount wqf_, std::string tname_) const {
00909 Weight * wt = clone();
00910 wt->internal = internal_;
00911 wt->querysize = querysize_;
00912 wt->wqf = wqf_;
00913 wt->tname = tname_;
00914 return wt;
00915 }
00916
00921 virtual std::string name() const = 0;
00922
00924 virtual std::string serialise() const = 0;
00925
00927 virtual Weight * unserialise(const std::string &s) const = 0;
00928
00936 virtual Xapian::weight get_sumpart(Xapian::termcount wdf,
00937 Xapian::doclength len) const = 0;
00938
00944 virtual Xapian::weight get_maxpart() const = 0;
00945
00954 virtual Xapian::weight get_sumextra(Xapian::doclength len) const = 0;
00955
00959 virtual Xapian::weight get_maxextra() const = 0;
00960
00962 virtual bool get_sumpart_needs_doclength() const { return true; }
00963 };
00964
00966 class BoolWeight : public Weight {
00967 public:
00968 Weight * clone() const {
00969 return new BoolWeight;
00970 }
00971 BoolWeight() { }
00972 ~BoolWeight() { }
00973 std::string name() const { return "Bool"; }
00974 std::string serialise() const { return ""; }
00975 Weight * unserialise(const std::string & ) const {
00976 return new BoolWeight;
00977 }
00978 Xapian::weight get_sumpart(Xapian::termcount , Xapian::doclength ) const { return 0; }
00979 Xapian::weight get_maxpart() const { return 0; }
00980
00981 Xapian::weight get_sumextra(Xapian::doclength ) const { return 0; }
00982 Xapian::weight get_maxextra() const { return 0; }
00983
00984 bool get_sumpart_needs_doclength() const { return false; }
00985 };
00986
00999 class BM25Weight : public Weight {
01000 private:
01001 mutable Xapian::weight termweight;
01002 mutable Xapian::doclength lenpart;
01003 mutable double BD;
01004
01005 double A, B, C, D;
01006 Xapian::doclength min_normlen;
01007
01008 mutable bool weight_calculated;
01009
01010 void calc_termweight() const;
01011
01012 public:
01031 BM25Weight(double A_, double B_, double C_, double D_,
01032 double min_normlen_)
01033 : A(A_), B(B_), C(C_), D(D_), min_normlen(min_normlen_),
01034 weight_calculated(false)
01035 {
01036 if (A < 0) A = 0;
01037 if (B < 0) B = 0;
01038 if (C < 0) C = 0;
01039 if (D < 0) D = 0; else if (D > 1) D = 1;
01040 }
01041 BM25Weight() : A(1), B(1), C(0), D(0.5), min_normlen(0.5),
01042 weight_calculated(false) { }
01043
01044 Weight * clone() const {
01045 return new BM25Weight(A, B, C, D, min_normlen);
01046 }
01047 ~BM25Weight() { }
01048 std::string name() const { return "BM25"; }
01049 std::string serialise() const;
01050 Weight * unserialise(const std::string & s) const;
01051 Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01052 Xapian::weight get_maxpart() const;
01053
01054 Xapian::weight get_sumextra(Xapian::doclength len) const;
01055 Xapian::weight get_maxextra() const;
01056
01057 bool get_sumpart_needs_doclength() const { return (lenpart != 0); }
01058 };
01059
01072 class TradWeight : public Weight {
01073 private:
01074 mutable Xapian::weight termweight;
01075 mutable Xapian::doclength lenpart;
01076
01077 double param_k;
01078
01079 mutable bool weight_calculated;
01080
01081 void calc_termweight() const;
01082
01083 public:
01091 explicit TradWeight(double k) : param_k(k), weight_calculated(false) {
01092 if (param_k < 0) param_k = 0;
01093 }
01094
01095 TradWeight() : param_k(1.0), weight_calculated(false) { }
01096
01097 Weight * clone() const {
01098 return new TradWeight(param_k);
01099 }
01100 ~TradWeight() { }
01101 std::string name() const { return "Trad"; }
01102 std::string serialise() const;
01103 Weight * unserialise(const std::string & s) const;
01104
01105 Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01106 Xapian::weight get_maxpart() const;
01107
01108 Xapian::weight get_sumextra(Xapian::doclength len) const;
01109 Xapian::weight get_maxextra() const;
01110
01111 bool get_sumpart_needs_doclength() const { return (lenpart != 0); }
01112 };
01113
01114 }
01115
01116 #endif