00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #ifndef OM_HGUARD_OMENQUIRE_H
00027 #define OM_HGUARD_OMENQUIRE_H
00028
00029 #include "om/omtypes.h"
00030 #include "om/omdocument.h"
00031 #include "om/omdatabase.h"
00032 #include "om/omerror.h"
00033 #include <string>
00034
00035 class OmQuery;
00036 class OmErrorHandler;
00037 class OmWeight;
00038
00042 class OmMSetIterator {
00043 public:
00044 friend class OmMSet;
00045
00046 class Internal;
00048 Internal *internal;
00049
00050 friend bool operator==(const OmMSetIterator &a,
00051 const OmMSetIterator &b);
00052
00053 private:
00054 OmMSetIterator(Internal *internal_);
00055
00056 public:
00060 OmMSetIterator();
00061
00062 ~OmMSetIterator();
00063
00065 OmMSetIterator(const OmMSetIterator &other);
00066
00068 void operator=(const OmMSetIterator &other);
00069
00071 OmMSetIterator & operator++();
00072
00073 void operator++(int);
00074
00076 om_docid operator *() const;
00077
00096 OmDocument get_document() const;
00097
00104 om_doccount get_rank() const;
00105
00107 om_weight get_weight() const;
00108
00113 om_percent get_percent() const;
00114
00118 std::string get_description() const;
00119
00121
00122 typedef std::input_iterator_tag iterator_category;
00123 typedef om_docid value_type;
00124 typedef om_doccount_diff difference_type;
00125 typedef om_docid * pointer;
00126 typedef om_docid & reference;
00128 };
00129
00130 inline bool operator!=(const OmMSetIterator &a,
00131 const OmMSetIterator &b)
00132 {
00133 return !(a == b);
00134 }
00135
00139 class OmMSet {
00140 public:
00141 class Internal;
00143 Internal *internal;
00144
00145 public:
00146
00148
00149
00151 OmMSet();
00152
00154 ~OmMSet();
00155
00157 OmMSet(const OmMSet & other);
00158
00160 void operator=(const OmMSet &other);
00161
00177 void fetch(const OmMSetIterator &begin,
00178 const OmMSetIterator &end) const;
00179
00182 void fetch(const OmMSetIterator &item) const;
00183
00186 void fetch() const;
00187
00192 om_percent convert_to_percent(om_weight wt) const;
00193
00195 om_percent convert_to_percent(const OmMSetIterator &it) const;
00196
00204 om_doccount get_termfreq(const om_termname &tname) const;
00205
00213 om_weight get_termweight(const om_termname &tname) const;
00214
00221 om_doccount get_firstitem() const;
00222
00229 om_doccount get_matches_lower_bound() const;
00230
00240 om_doccount get_matches_estimated() const;
00241
00248 om_doccount get_matches_upper_bound() const;
00249
00255 om_weight get_max_possible() const;
00256
00270 om_weight get_max_attained() const;
00271
00272 om_doccount size() const;
00273
00274 om_doccount max_size() const;
00275
00276 bool empty() const;
00277
00278 void swap(OmMSet & other);
00279
00280 OmMSetIterator begin() const;
00281
00282 OmMSetIterator end() const;
00283
00284 OmMSetIterator back() const;
00285
00295 OmMSetIterator operator[](om_doccount i) const;
00296
00298
00299 typedef std::input_iterator_tag iterator_category;
00300 typedef OmMSetIterator value_type;
00301 typedef OmMSetIterator iterator;
00302 typedef OmMSetIterator const_iterator;
00303 typedef OmMSetIterator & reference;
00304 typedef OmMSetIterator & const_reference;
00305 typedef OmMSetIterator * pointer;
00306 typedef om_doccount_diff difference_type;
00307 typedef om_doccount size_type;
00309
00313 std::string get_description() const;
00314 };
00315
00317 class OmESetIterator {
00318 public:
00319 friend class OmESet;
00320 class Internal;
00322 Internal *internal;
00323
00324 friend bool operator==(const OmESetIterator &a,
00325 const OmESetIterator &b);
00326
00327 private:
00328
00329 OmESetIterator(Internal *internal_);
00330
00331 public:
00335 OmESetIterator();
00336
00338 ~OmESetIterator();
00339
00341 OmESetIterator(const OmESetIterator &other);
00342
00344 void operator=(const OmESetIterator &other);
00345
00346 OmESetIterator & operator++();
00347
00348 void operator++(int);
00349
00351 const om_termname & operator *() const;
00352
00354 om_weight get_weight() const;
00355
00359 std::string get_description() const;
00360
00362
00363 typedef std::input_iterator_tag iterator_category;
00364 typedef om_termname value_type;
00365 typedef om_termcount_diff difference_type;
00366 typedef om_termname * pointer;
00367 typedef om_termname & reference;
00369 };
00370
00371 inline bool
00372 operator!=(const OmESetIterator &a, const OmESetIterator &b)
00373 {
00374 return !(a == b);
00375 }
00376
00381 class OmESet {
00382 public:
00383 class Internal;
00385 Internal *internal;
00386
00388 OmESet();
00389
00391 ~OmESet();
00392
00394 OmESet(const OmESet & other);
00395
00397 void operator=(const OmESet &other);
00398
00403 om_termcount get_ebound() const;
00404
00406 om_termcount size() const;
00407
00409 bool empty() const;
00410
00412 OmESetIterator begin() const;
00413
00415 OmESetIterator end() const;
00416
00421 std::string get_description() const;
00422 };
00423
00428 class OmRSet {
00429 public:
00431 class Internal;
00433 Internal *internal;
00434
00436 OmRSet(const OmRSet &rset);
00437
00439 void operator=(const OmRSet &rset);
00440
00442 OmRSet();
00443
00445 ~OmRSet();
00446
00448 om_doccount size() const;
00449
00451 bool empty() const;
00452
00454 void add_document(om_docid did);
00455
00457 void add_document(const OmMSetIterator & i) { add_document(*i); }
00458
00460 void remove_document(om_docid did);
00461
00463 void remove_document(const OmMSetIterator & i) { remove_document(*i); }
00464
00466 bool contains(om_docid did) const;
00467
00469 bool contains(const OmMSetIterator & i) { return contains(*i); }
00470
00475 std::string get_description() const;
00476 };
00477
00480 class OmMatchDecider {
00481 public:
00484 virtual int operator()(const OmDocument &doc) const = 0;
00485
00487 virtual ~OmMatchDecider() {}
00488 };
00489
00492 class OmExpandDecider {
00493 public:
00496 virtual int operator()(const om_termname & tname) const = 0;
00497
00499 virtual ~OmExpandDecider() {}
00500 };
00501
00515 class OmEnquire {
00516 private:
00518 OmEnquire(const OmEnquire &);
00519
00521 void operator=(const OmEnquire &);
00522
00523 public:
00524 class Internal;
00526 Internal *internal;
00527
00543 OmEnquire(const OmDatabase &databases,
00544 OmErrorHandler * errorhandler_ = 0);
00545
00554 ~OmEnquire();
00555
00563 void set_query(const OmQuery & query_);
00564
00571 const OmQuery & get_query();
00572
00579 void set_weighting_scheme(const OmWeight &weight_);
00580
00587 void set_collapse_key(om_valueno collapse_key);
00588
00595 void set_sort_forward(bool sort_forward);
00596
00614 void set_cutoff(int percent_cutoff, om_weight weight_cutoff = 0);
00615
00629 void set_sorting(om_valueno sort_key, int sort_bands);
00630
00642 void set_bias(om_weight bias_weight, time_t bias_halflife);
00643
00664 OmMSet get_mset(om_doccount first,
00665 om_doccount maxitems,
00666 const OmRSet * omrset = 0,
00667 const OmMatchDecider * mdecider = 0) const;
00668
00669 static const int include_query_terms = 1;
00670 static const int use_exact_termfreq = 2;
00694 OmESet get_eset(om_termcount maxitems,
00695 const OmRSet & omrset,
00696 int flags = 0,
00697 double k = 1.0,
00698 const OmExpandDecider * edecider = 0) const;
00699
00714 inline OmESet get_eset(om_termcount maxitems, const OmRSet & omrset,
00715 const OmExpandDecider * edecider) const {
00716 return get_eset(maxitems, omrset, 0, 1.0, edecider);
00717 }
00718
00748 OmTermIterator get_matching_terms_begin(om_docid did) const;
00749
00751 OmTermIterator get_matching_terms_end(om_docid did) const;
00752
00776 OmTermIterator get_matching_terms_begin(const OmMSetIterator &it) const;
00777
00779 OmTermIterator get_matching_terms_end(const OmMSetIterator &it) const;
00780
00783 void register_match_decider(const std::string &name,
00784 const OmMatchDecider *mdecider = NULL);
00785
00789 std::string get_description() const;
00790 };
00791
00792 class SocketServer;
00793
00795 class OmWeight {
00796 friend class OmEnquire;
00797 friend class SocketServer;
00798 public:
00799 class Internal;
00800 private:
00801 OmWeight(const OmWeight &);
00802 void operator=(OmWeight &);
00803
00805
00806
00807
00808
00809
00810 virtual OmWeight * clone() const = 0;
00811
00812 protected:
00813 const Internal * internal;
00814 om_doclength querysize;
00815 om_termcount wqf;
00816 om_termname tname;
00817
00818 public:
00819 OmWeight() { }
00820 virtual ~OmWeight() { }
00821
00833 OmWeight * create(const Internal * internal_, om_doclength querysize_,
00834 om_termcount wqf_, om_termname tname_) const {
00835 OmWeight * wt = clone();
00836 wt->internal = internal_;
00837 wt->querysize = querysize_;
00838 wt->wqf = wqf_;
00839 wt->tname = tname_;
00840 return wt;
00841 }
00842
00844
00845
00846 virtual std::string name() const = 0;
00847
00849 virtual std::string serialise() const = 0;
00850
00852 virtual OmWeight * OmWeight::unserialise(const std::string &s) const = 0;
00853
00861 virtual om_weight get_sumpart(om_termcount wdf,
00862 om_doclength len) const = 0;
00863
00869 virtual om_weight get_maxpart() const = 0;
00870
00879 virtual om_weight get_sumextra(om_doclength len) const = 0;
00880
00884 virtual om_weight get_maxextra() const = 0;
00885
00887 virtual bool get_sumpart_needs_doclength() const { return true; }
00888 };
00889
00891 class BoolWeight : public OmWeight {
00892 public:
00893 OmWeight * clone() const {
00894 return new BoolWeight;
00895 }
00896 BoolWeight() { }
00897 ~BoolWeight() { }
00898 std::string name() const { return "Bool"; }
00899 std::string serialise() const { return ""; }
00900 OmWeight * unserialise(const std::string & ) const {
00901 return new BoolWeight;
00902 }
00903 om_weight get_sumpart(om_termcount , om_doclength ) const { return 0; }
00904 om_weight get_maxpart() const { return 0; }
00905
00906 om_weight get_sumextra(om_doclength ) const { return 0; }
00907 om_weight get_maxextra() const { return 0; }
00908
00909 bool get_sumpart_needs_doclength() const { return false; }
00910 };
00911
00913
00914
00915
00916
00917
00918
00919
00920
00921
00922
00923 class BM25Weight : public OmWeight {
00924 private:
00925 mutable om_weight termweight;
00926 mutable om_doclength lenpart;
00927 mutable double BD;
00928
00929 double A, B, C, D;
00930 om_doclength min_normlen;
00931
00932 mutable bool weight_calculated;
00933
00934 void calc_termweight() const;
00935
00936 public:
00955 BM25Weight(double A_, double B_, double C_, double D_,
00956 double min_normlen_)
00957 : A(A_), B(B_), C(C_), D(D_), min_normlen(min_normlen_),
00958 weight_calculated(false)
00959 {
00960 if (A < 0) A = 0;
00961 if (B < 0) B = 0;
00962 if (C < 0) C = 0;
00963 if (D < 0) D = 0; else if (D > 1) D = 1;
00964 }
00965 BM25Weight() : A(1), B(1), C(0), D(0.5), min_normlen(0.5),
00966 weight_calculated(false) { }
00967
00968 OmWeight * clone() const {
00969 return new BM25Weight(A, B, C, D, min_normlen);
00970 }
00971 ~BM25Weight() { }
00972 std::string name() const { return "BM25"; }
00973 std::string serialise() const;
00974 OmWeight * unserialise(const std::string & s) const;
00975 om_weight get_sumpart(om_termcount wdf, om_doclength len) const;
00976 om_weight get_maxpart() const;
00977
00978 om_weight get_sumextra(om_doclength len) const;
00979 om_weight get_maxextra() const;
00980
00981 bool get_sumpart_needs_doclength() const { return (lenpart != 0); }
00982 };
00983
00985
00986
00987
00988
00989
00990
00991
00992
00993
00994
00995 class TradWeight : public OmWeight {
00996 private:
00997 mutable om_weight termweight;
00998 mutable om_doclength lenpart;
00999
01000 double param_k;
01001
01002 mutable bool weight_calculated;
01003
01004 void calc_termweight() const;
01005
01006 public:
01008
01009
01010
01011
01012
01013 TradWeight(double k = 1) : param_k(k), weight_calculated(false) {
01014 if (param_k < 0) param_k = 0;
01015 }
01016 OmWeight * clone() const {
01017 return new TradWeight(param_k);
01018 }
01019 ~TradWeight() { }
01020 std::string name() const { return "Trad"; }
01021 std::string serialise() const;
01022 OmWeight * unserialise(const std::string & s) const;
01023
01024 om_weight get_sumpart(om_termcount wdf, om_doclength len) const;
01025 om_weight get_maxpart() const;
01026
01027 om_weight get_sumextra(om_doclength len) const;
01028 om_weight get_maxextra() const;
01029
01030 bool get_sumpart_needs_doclength() const { return (lenpart != 0); }
01031 };
01032
01033 #endif