00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #ifndef XAPIAN_INCLUDED_ENQUIRE_H
00027 #define XAPIAN_INCLUDED_ENQUIRE_H
00028
00029 #include <string>
00030 #include <time.h>
00031
00032 #include <xapian/base.h>
00033 #include <xapian/error.h>
00034 #include <xapian/types.h>
00035
00036 namespace Xapian {
00037
00038 class Database;
00039 class Document;
00040 class ErrorHandler;
00041 class MSetIterator;
00042 class Query;
00043 class TermIterator;
00044 class Weight;
00045
00049 class MSet {
00050 public:
00051 class Internal;
00053 Xapian::Internal::RefCntPtr<Internal> internal;
00054
00056 explicit MSet(MSet::Internal * internal_);
00057
00059 MSet();
00060
00062 ~MSet();
00063
00065 MSet(const MSet & other);
00066
00068 void operator=(const MSet &other);
00069
00085 void fetch(const MSetIterator &begin, const MSetIterator &end) const;
00086
00089 void fetch(const MSetIterator &item) const;
00090
00093 void fetch() const;
00094
00099 Xapian::percent convert_to_percent(Xapian::weight wt) const;
00100
00102 Xapian::percent convert_to_percent(const MSetIterator &it) const;
00103
00111 Xapian::doccount get_termfreq(const std::string &tname) const;
00112
00120 Xapian::weight get_termweight(const std::string &tname) const;
00121
00129 Xapian::doccount get_firstitem() const;
00130
00140 Xapian::doccount get_matches_lower_bound() const;
00141
00154 Xapian::doccount get_matches_estimated() const;
00155
00165 Xapian::doccount get_matches_upper_bound() const;
00166
00172 Xapian::weight get_max_possible() const;
00173
00187 Xapian::weight get_max_attained() const;
00188
00190 Xapian::doccount size() const;
00191
00193 Xapian::doccount max_size() const { return size(); }
00194
00196 bool empty() const;
00197
00199 void swap(MSet & other);
00200
00202 MSetIterator begin() const;
00203
00205 MSetIterator end() const;
00206
00208 MSetIterator back() const;
00209
00219 MSetIterator operator[](Xapian::doccount i) const;
00220
00222
00223 typedef MSetIterator value_type;
00224 typedef MSetIterator iterator;
00225 typedef MSetIterator const_iterator;
00226 typedef MSetIterator & reference;
00227 typedef MSetIterator & const_reference;
00228 typedef MSetIterator * pointer;
00229 typedef Xapian::doccount_diff difference_type;
00230 typedef Xapian::doccount size_type;
00232
00236 std::string get_description() const;
00237 };
00238
00242 class MSetIterator {
00243 private:
00244 friend class MSet;
00245 friend bool operator==(const MSetIterator &a, const MSetIterator &b);
00246 friend bool operator!=(const MSetIterator &a, const MSetIterator &b);
00247
00248 MSetIterator(Xapian::doccount index_, const MSet & mset_)
00249 : index(index_), mset(mset_) { }
00250
00251 Xapian::doccount index;
00252 MSet mset;
00253
00254 public:
00258 MSetIterator() : index(0), mset() { }
00259
00260 ~MSetIterator() { }
00261
00263 MSetIterator(const MSetIterator &other) {
00264 index = other.index;
00265 mset = other.mset;
00266 }
00267
00269 void operator=(const MSetIterator &other) {
00270 index = other.index;
00271 mset = other.mset;
00272 }
00273
00275 MSetIterator & operator++() {
00276 ++index;
00277 return *this;
00278 }
00279
00281 MSetIterator operator++(int) {
00282 MSetIterator tmp = *this;
00283 ++index;
00284 return tmp;
00285 }
00286
00288 MSetIterator & operator--() {
00289 --index;
00290 return *this;
00291 }
00292
00294 MSetIterator operator--(int) {
00295 MSetIterator tmp = *this;
00296 --index;
00297 return tmp;
00298 }
00299
00301 Xapian::docid operator*() const;
00302
00321 Xapian::Document get_document() const;
00322
00329 Xapian::doccount get_rank() const {
00330 return mset.get_firstitem() + index;
00331 }
00332
00334 Xapian::weight get_weight() const;
00335
00352 Xapian::doccount get_collapse_count() const;
00353
00359 Xapian::percent get_percent() const;
00360
00364 std::string get_description() const;
00365
00367
00368 typedef std::bidirectional_iterator_tag iterator_category;
00369 typedef Xapian::docid value_type;
00370 typedef Xapian::doccount_diff difference_type;
00371 typedef Xapian::docid * pointer;
00372 typedef Xapian::docid & reference;
00374 };
00375
00376 inline bool operator==(const MSetIterator &a, const MSetIterator &b)
00377 {
00378 return (a.index == b.index);
00379 }
00380
00381 inline bool operator!=(const MSetIterator &a, const MSetIterator &b)
00382 {
00383 return (a.index != b.index);
00384 }
00385
00386 class ESetIterator;
00387
00392 class ESet {
00393 public:
00394 class Internal;
00396 Xapian::Internal::RefCntPtr<Internal> internal;
00397
00399 ESet();
00400
00402 ~ESet();
00403
00405 ESet(const ESet & other);
00406
00408 void operator=(const ESet &other);
00409
00414 Xapian::termcount get_ebound() const;
00415
00417 Xapian::termcount size() const;
00418
00420 Xapian::termcount max_size() const { return size(); }
00421
00423 bool empty() const;
00424
00426 void swap(ESet & other);
00427
00429 ESetIterator begin() const;
00430
00432 ESetIterator end() const;
00433
00435 ESetIterator back() const;
00436
00438 ESetIterator operator[](Xapian::termcount i) const;
00439
00444 std::string get_description() const;
00445 };
00446
00448 class ESetIterator {
00449 private:
00450 friend class ESet;
00451 friend bool operator==(const ESetIterator &a, const ESetIterator &b);
00452 friend bool operator!=(const ESetIterator &a, const ESetIterator &b);
00453
00454 ESetIterator(Xapian::termcount index_, const ESet & eset_)
00455 : index(index_), eset(eset_) { }
00456
00457 Xapian::termcount index;
00458 ESet eset;
00459
00460 public:
00464 ESetIterator() : index(0), eset() { }
00465
00466 ~ESetIterator() { }
00467
00469 ESetIterator(const ESetIterator &other) {
00470 index = other.index;
00471 eset = other.eset;
00472 }
00473
00475 void operator=(const ESetIterator &other) {
00476 index = other.index;
00477 eset = other.eset;
00478 }
00479
00481 ESetIterator & operator++() {
00482 ++index;
00483 return *this;
00484 }
00485
00487 ESetIterator operator++(int) {
00488 ESetIterator tmp = *this;
00489 ++index;
00490 return tmp;
00491 }
00492
00494 ESetIterator & operator--() {
00495 --index;
00496 return *this;
00497 }
00498
00500 ESetIterator operator--(int) {
00501 ESetIterator tmp = *this;
00502 --index;
00503 return tmp;
00504 }
00505
00507 const std::string & operator *() const;
00508
00510 Xapian::weight get_weight() const;
00511
00515 std::string get_description() const;
00516
00518
00519 typedef std::bidirectional_iterator_tag iterator_category;
00520 typedef std::string value_type;
00521 typedef Xapian::termcount_diff difference_type;
00522 typedef std::string * pointer;
00523 typedef std::string & reference;
00525 };
00526
00527 inline bool operator==(const ESetIterator &a, const ESetIterator &b)
00528 {
00529 return (a.index == b.index);
00530 }
00531
00532 inline bool operator!=(const ESetIterator &a, const ESetIterator &b)
00533 {
00534 return (a.index != b.index);
00535 }
00536
00541 class RSet {
00542 public:
00544 class Internal;
00545
00547 Xapian::Internal::RefCntPtr<Internal> internal;
00548
00550 RSet(const RSet &rset);
00551
00553 void operator=(const RSet &rset);
00554
00556 RSet();
00557
00559 ~RSet();
00560
00562 Xapian::doccount size() const;
00563
00565 bool empty() const;
00566
00568 void add_document(Xapian::docid did);
00569
00571 void add_document(const Xapian::MSetIterator & i) { add_document(*i); }
00572
00574 void remove_document(Xapian::docid did);
00575
00577 void remove_document(const Xapian::MSetIterator & i) { remove_document(*i); }
00578
00580 bool contains(Xapian::docid did) const;
00581
00583 bool contains(const Xapian::MSetIterator & i) { return contains(*i); }
00584
00589 std::string get_description() const;
00590 };
00591
00594 class MatchDecider {
00595 public:
00598 virtual int operator()(const Xapian::Document &doc) const = 0;
00599
00601 virtual ~MatchDecider() {}
00602 };
00603
00606 class ExpandDecider {
00607 public:
00610 virtual int operator()(const std::string & tname) const = 0;
00611
00613 virtual ~ExpandDecider() {}
00614 };
00615
00626 class Enquire {
00627 private:
00629 Enquire(const Enquire &);
00630
00632 void operator=(const Enquire &);
00633
00634 public:
00635 class Internal;
00637 Xapian::Internal::RefCntPtr<Internal> internal;
00638
00654 Enquire(const Database &databases, ErrorHandler * errorhandler_ = 0);
00655
00658 ~Enquire();
00659
00666 void set_query(const Xapian::Query & query, Xapian::termcount qlen = 0);
00667
00674 const Xapian::Query & get_query();
00675
00682 void set_weighting_scheme(const Weight &weight_);
00683
00710 void set_collapse_key(Xapian::valueno collapse_key);
00711
00712 typedef enum {
00713 ASCENDING = 1,
00714 DESCENDING = 0,
00715 DONT_CARE = 2
00716 } docid_order;
00717
00741 void set_docid_order(docid_order order);
00742
00747 void set_sort_forward(bool sort_forward) {
00748 set_docid_order(sort_forward ? ASCENDING : DESCENDING);
00749 }
00750
00769 void set_cutoff(Xapian::percent percent_cutoff, Xapian::weight weight_cutoff = 0);
00770
00776 void set_sorting(Xapian::valueno sort_key, int sort_bands,
00777 bool sort_by_relevance = false) {
00778 if (sort_bands > 1) {
00779 throw Xapian::UnimplementedError("sort bands are no longer supported");
00780 }
00781 if (sort_bands == 0 || sort_key == Xapian::valueno(-1)) {
00782 set_sort_by_relevance();
00783 } else if (!sort_by_relevance) {
00784 set_sort_by_value(sort_key);
00785 } else {
00786 set_sort_by_value_then_relevance(sort_key);
00787 }
00788 }
00789
00792 void set_sort_by_relevance();
00793
00804 void set_sort_by_value(Xapian::valueno sort_key, bool ascending = true);
00805 void set_sort_by_value_then_relevance(Xapian::valueno sort_key,
00806 bool ascending = true);
00807
00808
00809
00810
00822 void set_bias(Xapian::weight bias_weight, time_t bias_halflife);
00823
00849 MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00850 Xapian::doccount checkatleast = 0,
00851 const RSet * omrset = 0,
00852 const MatchDecider * mdecider = 0) const;
00853 MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00854 const RSet * omrset,
00855 const MatchDecider * mdecider = 0) const {
00856 return get_mset(first, maxitems, 0, omrset, mdecider);
00857 }
00858
00859 static const int include_query_terms = 1;
00860 static const int use_exact_termfreq = 2;
00883 ESet get_eset(Xapian::termcount maxitems,
00884 const RSet & omrset,
00885 int flags = 0,
00886 double k = 1.0,
00887 const Xapian::ExpandDecider * edecider = 0) const;
00888
00902 inline ESet get_eset(Xapian::termcount maxitems, const RSet & omrset,
00903 const Xapian::ExpandDecider * edecider) const {
00904 return get_eset(maxitems, omrset, 0, 1.0, edecider);
00905 }
00906
00935 TermIterator get_matching_terms_begin(Xapian::docid did) const;
00936
00938 TermIterator get_matching_terms_end(Xapian::docid did) const;
00939
00962 TermIterator get_matching_terms_begin(const MSetIterator &it) const;
00963
00965 TermIterator get_matching_terms_end(const MSetIterator &it) const;
00966
00973 void register_match_decider(const std::string &name,
00974 const MatchDecider *mdecider = NULL);
00975
00979 std::string get_description() const;
00980 };
00981
00982 }
00983
00984 class SocketServer;
00985
00986 namespace Xapian {
00987
00989 class Weight {
00990 friend class Enquire;
00991 friend class ::SocketServer;
00992 public:
00993 class Internal;
00994 protected:
00995 Weight(const Weight &);
00996 private:
00997 void operator=(Weight &);
00998
01008 virtual Weight * clone() const = 0;
01009
01010 protected:
01011 const Internal * internal;
01012 Xapian::doclength querysize;
01013 Xapian::termcount wqf;
01014 std::string tname;
01015
01016 public:
01017 Weight() { }
01018 virtual ~Weight() { }
01019
01032 Weight * create(const Internal * internal_, Xapian::doclength querysize_,
01033 Xapian::termcount wqf_, std::string tname_) const {
01034 Weight * wt = clone();
01035 wt->internal = internal_;
01036 wt->querysize = querysize_;
01037 wt->wqf = wqf_;
01038 wt->tname = tname_;
01039 return wt;
01040 }
01041
01046 virtual std::string name() const = 0;
01047
01049 virtual std::string serialise() const = 0;
01050
01052 virtual Weight * unserialise(const std::string &s) const = 0;
01053
01061 virtual Xapian::weight get_sumpart(Xapian::termcount wdf,
01062 Xapian::doclength len) const = 0;
01063
01069 virtual Xapian::weight get_maxpart() const = 0;
01070
01079 virtual Xapian::weight get_sumextra(Xapian::doclength len) const = 0;
01080
01084 virtual Xapian::weight get_maxextra() const = 0;
01085
01087 virtual bool get_sumpart_needs_doclength() const { return true; }
01088 };
01089
01091 class BoolWeight : public Weight {
01092 public:
01093 BoolWeight * clone() const {
01094 return new BoolWeight;
01095 }
01096 BoolWeight() { }
01097 ~BoolWeight() { }
01098 std::string name() const { return "Bool"; }
01099 std::string serialise() const { return ""; }
01100 BoolWeight * unserialise(const std::string & ) const {
01101 return new BoolWeight;
01102 }
01103 Xapian::weight get_sumpart(Xapian::termcount , Xapian::doclength ) const { return 0; }
01104 Xapian::weight get_maxpart() const { return 0; }
01105
01106 Xapian::weight get_sumextra(Xapian::doclength ) const { return 0; }
01107 Xapian::weight get_maxextra() const { return 0; }
01108
01109 bool get_sumpart_needs_doclength() const { return false; }
01110 };
01111
01124 class BM25Weight : public Weight {
01125 private:
01126 mutable Xapian::weight termweight;
01127 mutable Xapian::doclength lenpart;
01128
01129 double k1, k2, k3, b;
01130 Xapian::doclength min_normlen;
01131
01132 mutable bool weight_calculated;
01133
01134 void calc_termweight() const;
01135
01136 public:
01155 BM25Weight(double k1_, double k2_, double k3_, double b_,
01156 double min_normlen_)
01157 : k1(k1_), k2(k2_), k3(k3_), b(b_), min_normlen(min_normlen_),
01158 weight_calculated(false)
01159 {
01160 if (k1 < 0) k1 = 0;
01161 if (k2 < 0) k2 = 0;
01162 if (k3 < 0) k3 = 0;
01163 if (b < 0) b = 0; else if (b > 1) b = 1;
01164 }
01165 BM25Weight() : k1(1), k2(0), k3(1), b(0.5), min_normlen(0.5),
01166 weight_calculated(false) { }
01167
01168 BM25Weight * clone() const;
01169 ~BM25Weight() { }
01170 std::string name() const;
01171 std::string serialise() const;
01172 BM25Weight * unserialise(const std::string & s) const;
01173 Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01174 Xapian::weight get_maxpart() const;
01175
01176 Xapian::weight get_sumextra(Xapian::doclength len) const;
01177 Xapian::weight get_maxextra() const;
01178
01179 bool get_sumpart_needs_doclength() const;
01180 };
01181
01195 class TradWeight : public Weight {
01196 private:
01197 mutable Xapian::weight termweight;
01198 mutable Xapian::doclength lenpart;
01199
01200 double param_k;
01201
01202 mutable bool weight_calculated;
01203
01204 void calc_termweight() const;
01205
01206 public:
01214 explicit TradWeight(double k) : param_k(k), weight_calculated(false) {
01215 if (param_k < 0) param_k = 0;
01216 }
01217
01218 TradWeight() : param_k(1.0), weight_calculated(false) { }
01219
01220 TradWeight * clone() const;
01221 ~TradWeight() { }
01222 std::string name() const;
01223 std::string serialise() const;
01224 TradWeight * unserialise(const std::string & s) const;
01225
01226 Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01227 Xapian::weight get_maxpart() const;
01228
01229 Xapian::weight get_sumextra(Xapian::doclength len) const;
01230 Xapian::weight get_maxextra() const;
01231
01232 bool get_sumpart_needs_doclength() const;
01233 };
01234
01235 }
01236
01237 #endif