00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef XAPIAN_INCLUDED_WEIGHT_H
00023 #define XAPIAN_INCLUDED_WEIGHT_H
00024
00025 #include <string>
00026
00027 #include <xapian/types.h>
00028 #include <xapian/visibility.h>
00029
00030 namespace Xapian {
00031
00033 class XAPIAN_VISIBILITY_DEFAULT Weight {
00034 protected:
00036 typedef enum {
00037 COLLECTION_SIZE = 1,
00038 RSET_SIZE = 2,
00039 AVERAGE_LENGTH = 4,
00040 TERMFREQ = 8,
00041 RELTERMFREQ = 16,
00042 QUERY_LENGTH = 32,
00043 WQF = 64,
00044 WDF = 128,
00045 DOC_LENGTH = 256,
00046 DOC_LENGTH_MIN = 512,
00047 DOC_LENGTH_MAX = 1024,
00048 WDF_MAX = 2048
00049 } stat_flags;
00050
00060 void need_stat(stat_flags flag) {
00061 stats_needed = stat_flags(stats_needed | flag);
00062 }
00063
00068 virtual void init(double factor) = 0;
00069
00070 private:
00072 void operator=(const Weight &);
00073
00086 virtual Weight * clone() const = 0;
00087
00089 stat_flags stats_needed;
00090
00092 Xapian::doccount collection_size_;
00093
00095 Xapian::doccount rset_size_;
00096
00098 Xapian::doclength average_length_;
00099
00101 Xapian::doccount termfreq_;
00102
00104 Xapian::doccount reltermfreq_;
00105
00107 Xapian::termcount query_length_;
00108
00110 Xapian::termcount wqf_;
00111
00113 Xapian::termcount doclength_lower_bound_;
00114
00116 Xapian::termcount doclength_upper_bound_;
00117
00119 Xapian::termcount wdf_upper_bound_;
00120
00121 public:
00122 class Internal;
00123
00125 virtual ~Weight();
00126
00141 virtual std::string name() const = 0;
00142
00149 virtual std::string serialise() const = 0;
00150
00163 virtual Weight * unserialise(const std::string & s) const = 0;
00164
00173 virtual Xapian::weight get_sumpart(Xapian::termcount wdf,
00174 Xapian::termcount doclen) const = 0;
00175
00181 virtual Xapian::weight get_maxpart() const = 0;
00182
00190 virtual Xapian::weight get_sumextra(Xapian::termcount doclen) const = 0;
00191
00198 virtual Xapian::weight get_maxextra() const = 0;
00199
00207 Weight * clone_() const { return clone(); }
00208
00218 void init_(const Internal & stats, Xapian::termcount query_len_,
00219 const std::string & term, Xapian::termcount wqf_,
00220 double factor);
00221
00231 void init_(const Internal & stats, Xapian::termcount query_len_,
00232 double factor, Xapian::doccount termfreq,
00233 Xapian::doccount reltermfreq);
00234
00241 void init_(const Internal & stats, Xapian::termcount query_len_);
00242
00249 bool get_sumpart_needs_doclength_() const {
00250 return stats_needed & DOC_LENGTH;
00251 }
00252
00258 bool get_sumpart_needs_wdf_() const {
00259 return stats_needed & WDF;
00260 }
00261
00262 protected:
00264 Weight(const Weight &);
00265
00267 Weight() : stats_needed() { }
00268
00270 Xapian::doccount get_collection_size() const { return collection_size_; }
00271
00273 Xapian::doccount get_rset_size() const { return rset_size_; }
00274
00276 Xapian::doclength get_average_length() const { return average_length_; }
00277
00279 Xapian::doccount get_termfreq() const { return termfreq_; }
00280
00282 Xapian::doccount get_reltermfreq() const { return reltermfreq_; }
00283
00285 Xapian::termcount get_query_length() const { return query_length_; }
00286
00288 Xapian::termcount get_wqf() const { return wqf_; }
00289
00294 Xapian::termcount get_doclength_upper_bound() const {
00295 return doclength_upper_bound_;
00296 }
00297
00302 Xapian::termcount get_doclength_lower_bound() const {
00303 return doclength_lower_bound_;
00304 }
00305
00310 Xapian::termcount get_wdf_upper_bound() const {
00311 return wdf_upper_bound_;
00312 }
00313 };
00314
00319 class XAPIAN_VISIBILITY_DEFAULT BoolWeight : public Weight {
00320 BoolWeight * clone() const;
00321
00322 void init(double factor);
00323
00324 public:
00326 BoolWeight() { }
00327
00328 std::string name() const;
00329
00330 std::string serialise() const;
00331 BoolWeight * unserialise(const std::string & s) const;
00332
00333 Xapian::weight get_sumpart(Xapian::termcount wdf,
00334 Xapian::termcount doclen) const;
00335 Xapian::weight get_maxpart() const;
00336
00337 Xapian::weight get_sumextra(Xapian::termcount doclen) const;
00338 Xapian::weight get_maxextra() const;
00339 };
00340
00342 class XAPIAN_VISIBILITY_DEFAULT BM25Weight : public Weight {
00344 mutable Xapian::doclength len_factor;
00345
00347 mutable Xapian::weight termweight;
00348
00350 double param_k1, param_k2, param_k3, param_b;
00351
00353 Xapian::doclength param_min_normlen;
00354
00355 BM25Weight * clone() const;
00356
00357 void init(double factor);
00358
00359 public:
00387 BM25Weight(double k1, double k2, double k3, double b, double min_normlen)
00388 : param_k1(k1), param_k2(k2), param_k3(k3), param_b(b),
00389 param_min_normlen(min_normlen)
00390 {
00391 if (param_k1 < 0) param_k1 = 0;
00392 if (param_k2 < 0) param_k2 = 0;
00393 if (param_k3 < 0) param_k3 = 0;
00394 if (param_b < 0) {
00395 param_b = 0;
00396 } else if (param_b > 1) {
00397 param_b = 1;
00398 }
00399 need_stat(COLLECTION_SIZE);
00400 need_stat(RSET_SIZE);
00401 need_stat(TERMFREQ);
00402 need_stat(RELTERMFREQ);
00403 need_stat(WDF);
00404 need_stat(WDF_MAX);
00405 need_stat(WDF);
00406 if (param_k2 != 0 || (param_k1 != 0 && param_b != 0)) {
00407 need_stat(DOC_LENGTH_MIN);
00408 need_stat(AVERAGE_LENGTH);
00409 }
00410 if (param_k1 != 0 && param_b != 0) need_stat(DOC_LENGTH);
00411 if (param_k2 != 0) need_stat(QUERY_LENGTH);
00412 if (param_k3 != 0) need_stat(WQF);
00413 }
00414
00415 BM25Weight()
00416 : param_k1(1), param_k2(0), param_k3(1), param_b(0.5),
00417 param_min_normlen(0.5)
00418 {
00419 need_stat(COLLECTION_SIZE);
00420 need_stat(RSET_SIZE);
00421 need_stat(TERMFREQ);
00422 need_stat(RELTERMFREQ);
00423 need_stat(WDF);
00424 need_stat(WDF_MAX);
00425 need_stat(WDF);
00426 need_stat(DOC_LENGTH_MIN);
00427 need_stat(AVERAGE_LENGTH);
00428 need_stat(DOC_LENGTH);
00429 need_stat(WQF);
00430 }
00431
00432 std::string name() const;
00433
00434 std::string serialise() const;
00435 BM25Weight * unserialise(const std::string & s) const;
00436
00437 Xapian::weight get_sumpart(Xapian::termcount wdf,
00438 Xapian::termcount doclen) const;
00439 Xapian::weight get_maxpart() const;
00440
00441 Xapian::weight get_sumextra(Xapian::termcount doclen) const;
00442 Xapian::weight get_maxextra() const;
00443 };
00444
00454 class XAPIAN_VISIBILITY_DEFAULT TradWeight : public Weight {
00456 mutable Xapian::doclength len_factor;
00457
00459 mutable Xapian::weight termweight;
00460
00462 double param_k;
00463
00464 TradWeight * clone() const;
00465
00466 void init(double factor);
00467
00468 public:
00476 explicit TradWeight(double k = 1.0) : param_k(k) {
00477 if (param_k < 0) param_k = 0;
00478 if (param_k != 0.0) {
00479 need_stat(AVERAGE_LENGTH);
00480 need_stat(DOC_LENGTH);
00481 }
00482 need_stat(COLLECTION_SIZE);
00483 need_stat(RSET_SIZE);
00484 need_stat(TERMFREQ);
00485 need_stat(RELTERMFREQ);
00486 need_stat(DOC_LENGTH_MIN);
00487 need_stat(WDF);
00488 need_stat(WDF_MAX);
00489 need_stat(WDF);
00490 }
00491
00492 std::string name() const;
00493
00494 std::string serialise() const;
00495 TradWeight * unserialise(const std::string & s) const;
00496
00497 Xapian::weight get_sumpart(Xapian::termcount wdf,
00498 Xapian::termcount doclen) const;
00499 Xapian::weight get_maxpart() const;
00500
00501 Xapian::weight get_sumextra(Xapian::termcount doclen) const;
00502 Xapian::weight get_maxextra() const;
00503 };
00504
00505 }
00506
00507 #endif // XAPIAN_INCLUDED_WEIGHT_H