00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef XAPIAN_INCLUDED_WEIGHT_H
00023 #define XAPIAN_INCLUDED_WEIGHT_H
00024
00025 #include <string>
00026
00027 #include <xapian/types.h>
00028 #include <xapian/visibility.h>
00029
00030 namespace Xapian {
00031
00033 class XAPIAN_VISIBILITY_DEFAULT Weight {
00034 protected:
00036 typedef enum {
00037 COLLECTION_SIZE = 1,
00038 RSET_SIZE = 2,
00039 AVERAGE_LENGTH = 4,
00040 TERMFREQ = 8,
00041 RELTERMFREQ = 16,
00042 QUERY_LENGTH = 32,
00043 WQF = 64,
00044 WDF = 128,
00045 DOC_LENGTH = 256,
00046 DOC_LENGTH_MIN = 512,
00047 DOC_LENGTH_MAX = 1024,
00048 WDF_MAX = 2048
00049 } stat_flags;
00050
00060 void need_stat(stat_flags flag) {
00061 stats_needed = stat_flags(stats_needed | flag);
00062 }
00063
00068 virtual void init(double factor) = 0;
00069
00070 private:
00072 void operator=(const Weight &);
00073
00083 virtual Weight * clone() const = 0;
00084
00086 stat_flags stats_needed;
00087
00089 Xapian::doccount collection_size_;
00090
00092 Xapian::doccount rset_size_;
00093
00095 Xapian::doclength average_length_;
00096
00098 Xapian::doccount termfreq_;
00099
00101 Xapian::doccount reltermfreq_;
00102
00104 Xapian::termcount query_length_;
00105
00107 Xapian::termcount wqf_;
00108
00110 Xapian::termcount doclength_lower_bound_;
00111
00113 Xapian::termcount doclength_upper_bound_;
00114
00116 Xapian::termcount wdf_upper_bound_;
00117
00118 public:
00119 class Internal;
00120
00122 virtual ~Weight();
00123
00138 virtual std::string name() const = 0;
00139
00146 virtual std::string serialise() const = 0;
00147
00157 virtual Weight * unserialise(const std::string & s) const = 0;
00158
00167 virtual Xapian::weight get_sumpart(Xapian::termcount wdf,
00168 Xapian::termcount doclen) const = 0;
00169
00175 virtual Xapian::weight get_maxpart() const = 0;
00176
00184 virtual Xapian::weight get_sumextra(Xapian::termcount doclen) const = 0;
00185
00192 virtual Xapian::weight get_maxextra() const = 0;
00193
00201 Weight * clone_() const { return clone(); }
00202
00212 void init_(const Internal & stats, Xapian::termcount query_len_,
00213 const std::string & term, Xapian::termcount wqf_,
00214 double factor);
00215
00225 void init_(const Internal & stats, Xapian::termcount query_len_,
00226 double factor, Xapian::doccount termfreq,
00227 Xapian::doccount reltermfreq);
00228
00235 void init_(const Internal & stats, Xapian::termcount query_len_);
00236
00243 bool get_sumpart_needs_doclength_() const {
00244 return stats_needed & DOC_LENGTH;
00245 }
00246
00252 bool get_sumpart_needs_wdf_() const {
00253 return stats_needed & WDF;
00254 }
00255
00256 protected:
00258 Weight(const Weight &);
00259
00261 Weight() : stats_needed() { }
00262
00264 Xapian::doccount get_collection_size() const { return collection_size_; }
00265
00267 Xapian::doccount get_rset_size() const { return rset_size_; }
00268
00270 Xapian::doclength get_average_length() const { return average_length_; }
00271
00273 Xapian::doccount get_termfreq() const { return termfreq_; }
00274
00276 Xapian::doccount get_reltermfreq() const { return reltermfreq_; }
00277
00279 Xapian::termcount get_query_length() const { return query_length_; }
00280
00282 Xapian::termcount get_wqf() const { return wqf_; }
00283
00288 Xapian::termcount get_doclength_upper_bound() const {
00289 return doclength_upper_bound_;
00290 }
00291
00296 Xapian::termcount get_doclength_lower_bound() const {
00297 return doclength_lower_bound_;
00298 }
00299
00304 Xapian::termcount get_wdf_upper_bound() const {
00305 return wdf_upper_bound_;
00306 }
00307 };
00308
00313 class XAPIAN_VISIBILITY_DEFAULT BoolWeight : public Weight {
00314 BoolWeight * clone() const;
00315
00316 void init(double factor);
00317
00318 public:
00320 BoolWeight() { }
00321
00322 std::string name() const;
00323
00324 std::string serialise() const;
00325 BoolWeight * unserialise(const std::string & s) const;
00326
00327 Xapian::weight get_sumpart(Xapian::termcount wdf,
00328 Xapian::termcount doclen) const;
00329 Xapian::weight get_maxpart() const;
00330
00331 Xapian::weight get_sumextra(Xapian::termcount doclen) const;
00332 Xapian::weight get_maxextra() const;
00333 };
00334
00336 class XAPIAN_VISIBILITY_DEFAULT BM25Weight : public Weight {
00338 mutable Xapian::doclength len_factor;
00339
00341 mutable Xapian::weight termweight;
00342
00344 double param_k1, param_k2, param_k3, param_b;
00345
00347 Xapian::doclength param_min_normlen;
00348
00349 BM25Weight * clone() const;
00350
00351 void init(double factor);
00352
00353 public:
00381 BM25Weight(double k1, double k2, double k3, double b, double min_normlen)
00382 : param_k1(k1), param_k2(k2), param_k3(k3), param_b(b),
00383 param_min_normlen(min_normlen)
00384 {
00385 if (param_k1 < 0) param_k1 = 0;
00386 if (param_k2 < 0) param_k2 = 0;
00387 if (param_k3 < 0) param_k3 = 0;
00388 if (param_b < 0) {
00389 param_b = 0;
00390 } else if (param_b > 1) {
00391 param_b = 1;
00392 }
00393 need_stat(COLLECTION_SIZE);
00394 need_stat(RSET_SIZE);
00395 need_stat(TERMFREQ);
00396 need_stat(RELTERMFREQ);
00397 need_stat(WDF);
00398 need_stat(WDF_MAX);
00399 need_stat(WDF);
00400 if (param_k2 != 0 || (param_k1 != 0 && param_b != 0)) {
00401 need_stat(DOC_LENGTH_MIN);
00402 need_stat(AVERAGE_LENGTH);
00403 }
00404 if (param_k1 != 0 && param_b != 0) need_stat(DOC_LENGTH);
00405 if (param_k2 != 0) need_stat(QUERY_LENGTH);
00406 if (param_k3 != 0) need_stat(WQF);
00407 }
00408
00409 BM25Weight()
00410 : param_k1(1), param_k2(0), param_k3(1), param_b(0.5),
00411 param_min_normlen(0.5)
00412 {
00413 need_stat(COLLECTION_SIZE);
00414 need_stat(RSET_SIZE);
00415 need_stat(TERMFREQ);
00416 need_stat(RELTERMFREQ);
00417 need_stat(WDF);
00418 need_stat(WDF_MAX);
00419 need_stat(WDF);
00420 need_stat(DOC_LENGTH_MIN);
00421 need_stat(AVERAGE_LENGTH);
00422 need_stat(DOC_LENGTH);
00423 need_stat(WQF);
00424 }
00425
00426 std::string name() const;
00427
00428 std::string serialise() const;
00429 BM25Weight * unserialise(const std::string & s) const;
00430
00431 Xapian::weight get_sumpart(Xapian::termcount wdf,
00432 Xapian::termcount doclen) const;
00433 Xapian::weight get_maxpart() const;
00434
00435 Xapian::weight get_sumextra(Xapian::termcount doclen) const;
00436 Xapian::weight get_maxextra() const;
00437 };
00438
00448 class XAPIAN_VISIBILITY_DEFAULT TradWeight : public Weight {
00450 mutable Xapian::doclength len_factor;
00451
00453 mutable Xapian::weight termweight;
00454
00456 double param_k;
00457
00458 TradWeight * clone() const;
00459
00460 void init(double factor);
00461
00462 public:
00470 explicit TradWeight(double k = 1.0) : param_k(k) {
00471 if (param_k < 0) param_k = 0;
00472 if (param_k != 0.0) {
00473 need_stat(AVERAGE_LENGTH);
00474 need_stat(DOC_LENGTH);
00475 }
00476 need_stat(COLLECTION_SIZE);
00477 need_stat(RSET_SIZE);
00478 need_stat(TERMFREQ);
00479 need_stat(RELTERMFREQ);
00480 need_stat(DOC_LENGTH_MIN);
00481 need_stat(WDF);
00482 need_stat(WDF_MAX);
00483 need_stat(WDF);
00484 }
00485
00486 std::string name() const;
00487
00488 std::string serialise() const;
00489 TradWeight * unserialise(const std::string & s) const;
00490
00491 Xapian::weight get_sumpart(Xapian::termcount wdf,
00492 Xapian::termcount doclen) const;
00493 Xapian::weight get_maxpart() const;
00494
00495 Xapian::weight get_sumextra(Xapian::termcount doclen) const;
00496 Xapian::weight get_maxextra() const;
00497 };
00498
00499 }
00500
00501 #endif // XAPIAN_INCLUDED_WEIGHT_H