Index: common/multialltermslist.h
===================================================================
--- common/multialltermslist.h	(revision 11064)
+++ common/multialltermslist.h	(working copy)
@@ -1,12 +1,12 @@
-/* multialltermslist.h
- *
- * Copyright 1999,2000,2001 BrightStation PLC
- * Copyright 2003,2007 Olly Betts
+/** @file multialltermslist.h
+ * @brief Class for merging AllTermsList objects from subdatabases.
+ */
+/* Copyright (C) 2007,2008 Olly Betts
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,66 +15,65 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
- * USA
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
  */
 
-#ifndef OM_HGUARD_MULTIALLTERMSLIST_H
-#define OM_HGUARD_MULTIALLTERMSLIST_H
+#ifndef XAPIAN_INCLUDED_MULTIALLTERMSLIST_H
+#define XAPIAN_INCLUDED_MULTIALLTERMSLIST_H
 
 #include "alltermslist.h"
 
-#include <vector>
+#include "database.h"
 
-/** class for alltermslists over several databases */
-class MultiAllTermsList : public AllTermsList
-{
-    private:
-	/// Copying is not allowed.
-	MultiAllTermsList(const MultiAllTermsList &);
-
-	/// Assignment is not allowed.
-	void operator=(const MultiAllTermsList &);
+#include <string>
+#include <vector>
 
-	std::vector<TermList *> lists;
+/// Class for merging AllTermsList objects from subdatabases.
+class MultiAllTermsList : public AllTermsList {
+    /// Don't allow assignment.
+    void operator=(const MultiAllTermsList &);
 
-	/// The current term being pointed at.
-	std::string current;
+    /// Don't allow copying.
+    MultiAllTermsList(const MultiAllTermsList &);
 
-	/// Flag indicating the end of the lists
-	bool is_at_end;
+    /// Current termname (or empty if we haven't started yet).
+    std::string current_term;
 
-	/// Flag saying whether we've started iterating yet
-	bool started;
+    /// Vector of sub-termlists which we use as a heap.
+    std::vector<TermList *> termlists;
 
-	void update_current();
-    public:
-	/// Standard constructor for base class.
-	MultiAllTermsList(const std::vector<TermList *> &lists_);
+  public:
+    /// Constructor.
+    MultiAllTermsList(const std::vector<Xapian::Internal::RefCntPtr<Xapian::Database::Internal> > & dbs,
+		      const std::string & prefix);
 
-	/// Standard destructor for base class.
-	~MultiAllTermsList();
+    /// Destructor.
+    ~MultiAllTermsList();
 
-        // Gets size of termlist
-	Xapian::termcount get_approx_size() const;
+    /// Return approximate size of this termlist.
+    Xapian::termcount get_approx_size() const;
 
-	// Gets current termname
-	std::string get_termname() const;
+    /// Return the termname at the current position.
+    std::string get_termname() const;
 
-	// Get num of docs indexed by term
-	Xapian::doccount get_termfreq() const;
+    /// Return the term frequency for the term at the current position.
+    Xapian::doccount get_termfreq() const;
 
-	// Get num of docs indexed by term
-	Xapian::termcount get_collection_freq() const;
+    /// Return the collection frequency for the term at the current position.
+    Xapian::termcount get_collection_freq() const;
 
-	TermList *skip_to(const std::string &tname);
+    /// Advance the current position to the next term in the termlist.
+    TermList *next();
 
-	/** next() causes the AllTermsList to move to the next term in the list.
-	 */
-	TermList *next();
+    /** Skip forward to the specified term.
+     *
+     *  If the specified term isn't in the list, position ourselves on the
+     *  first term after @a term (or at_end() if no terms after @a term exist).
+     */
+    TermList *skip_to(const std::string &term);
 
-	// True if we're off the end of the list
-	bool at_end() const;
+    /// Return true if the current position is past the last term in this list.
+    bool at_end() const;
 };
 
-#endif /* OM_HGUARD_MULTIALLTERMSLIST_H */
+#endif // XAPIAN_INCLUDED_MULTIALLTERMSLIST_H
Index: api/omdatabase.cc
===================================================================
--- api/omdatabase.cc	(revision 11064)
+++ api/omdatabase.cc	(working copy)
@@ -180,14 +180,7 @@
     if (internal.size() == 1)
 	RETURN(TermIterator(internal[0]->open_allterms(prefix)));
 
-    vector<TermList *> lists;
-
-    vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
-    for (i = internal.begin(); i != internal.end(); ++i) {
-	lists.push_back((*i)->open_allterms(prefix));
-    }
-
-    RETURN(TermIterator(new MultiAllTermsList(lists)));
+    RETURN(TermIterator(new MultiAllTermsList(internal, prefix)));
 }
 
 bool
Index: backends/multi/multi_alltermslist.cc
===================================================================
--- backends/multi/multi_alltermslist.cc	(revision 11064)
+++ backends/multi/multi_alltermslist.cc	(working copy)
@@ -1,12 +1,12 @@
-/* multi_alltermslist.cc
- *
- * Copyright 1999,2000,2001 BrightStation PLC
- * Copyright 2003,2007 Olly Betts
+/** @file multialltermslist.cc
+ * @brief Class for merging AllTermsList objects from subdatabases.
+ */
+/* Copyright (C) 2007,2008 Olly Betts
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,8 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
- * USA
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
  */
 
 #include <config.h>
@@ -25,137 +24,169 @@
 
 #include "omassert.h"
 
-using namespace std;
+#include <algorithm>
 
-MultiAllTermsList::MultiAllTermsList(const vector<TermList *> &lists_)
-	: lists(lists_), is_at_end(false), started(false)
-{
-}
+using namespace std;
 
-MultiAllTermsList::~MultiAllTermsList()
-{
-    vector<TermList *>::const_iterator i;
-    for (i = lists.begin(); i != lists.end(); ++i) {
-	delete *i;
+/// Comparison functor which orders TermList* by ascending term name.
+struct CompareTermListsByTerm {
+    /// Order by ascending term name.
+    bool operator()(const TermList *a, const TermList *b) {
+	return a->get_termname() > b->get_termname();
+    }
+};
+
+template<class CLASS> struct delete_ptr {
+    void operator()(CLASS *p) { delete p; }
+};
+
+MultiAllTermsList::MultiAllTermsList(const vector<Xapian::Internal::RefCntPtr<Xapian::Database::Internal> > & dbs,
+				     const string & prefix)
+{
+    // The 0 and 1 cases should be handled by our caller.
+    AssertRel(dbs.size(), >=, 2);
+    termlists.reserve(dbs.size());
+    try {
+	vector<Xapian::Internal::RefCntPtr<Xapian::Database::Internal> >::const_iterator i;
+	for (i = dbs.begin(); i != dbs.end(); ++i) {
+	    termlists.push_back((*i)->open_allterms(prefix));
+	}
+    } catch (...) {
+	for_each(termlists.begin(), termlists.end(), delete_ptr<TermList>());
+	throw;
     }
-    lists.clear();
 }
 
-void
-MultiAllTermsList::update_current()
+MultiAllTermsList::~MultiAllTermsList()
 {
-    bool found_term = false;
-
-    vector<TermList *>::const_iterator i;
-    for (i = lists.begin(); i != lists.end(); ++i) {
-	if ((*i)->at_end()) {
-	    continue;
-	} else if (!found_term) {
-	    current = (*i)->get_termname();
-	    found_term = true;
-	} else {
-	    string newterm = (*i)->get_termname();
-	    if (newterm < current) {
-		current = newterm;
-	    }
-	}
-    }
-    if (!found_term) {
-	is_at_end = true;
-    }
+    for_each(termlists.begin(), termlists.end(), delete_ptr<TermList>());
 }
 
-Xapian::termcount
+// FIXME: should we calculate this before we start deleting termlists which
+// have reached their end?
+Xapian::termcount 
 MultiAllTermsList::get_approx_size() const
 {
-    Xapian::termcount size = 0;
+    Xapian::termcount total_size = 0;
 
     vector<TermList *>::const_iterator i;
-    for (i = lists.begin(); i != lists.end(); ++i) {
-	size += (*i)->get_approx_size();
+    for (i = termlists.begin(); i != termlists.end(); ++i) {
+	total_size += (*i)->get_approx_size();
     }
-    return size;
+
+    return total_size;
 }
 
 string
 MultiAllTermsList::get_termname() const
 {
-    Assert(started);
-    return current;
+    return current_term;
 }
 
 Xapian::doccount
 MultiAllTermsList::get_termfreq() const
 {
-    Assert(started);
-    Xapian::doccount termfreq = 0;
-
-    vector<TermList *>::const_iterator i;
-    for (i = lists.begin(); i != lists.end(); ++i) {
-	if (!(*i)->at_end() &&
-	    (*i)->get_termname() == current) {
-	    termfreq += (*i)->get_termfreq();
-	}
+    if (termlists.empty()) return 0;
+    vector<TermList *>::const_iterator i = termlists.begin();
+    Xapian::doccount total_tf = (*i)->get_termfreq();
+    while (++i != termlists.end()) {
+	if ((*i)->get_termname() == current_term)
+	    total_tf += (*i)->get_termfreq();
     }
-    return termfreq;
+    return total_tf;
 }
 
 Xapian::termcount
 MultiAllTermsList::get_collection_freq() const
 {
-    Xapian::termcount collection_freq = 0;
-
-    vector<TermList *>::const_iterator i;
-    for (i = lists.begin(); i != lists.end(); ++i) {
-	if (!(*i)->at_end() &&
-	    (*i)->get_termname() == current) {
-	    collection_freq += (*i)->get_collection_freq();
-	}
+    if (termlists.empty()) return 0;
+    vector<TermList *>::const_iterator i = termlists.begin();
+    Xapian::termcount total_cf = (*i)->get_collection_freq();
+    while (++i != termlists.end()) {
+	if ((*i)->get_termname() == current_term)
+	    total_cf += (*i)->get_collection_freq();
     }
-    return collection_freq;
+    return total_cf;
 }
 
 TermList *
-MultiAllTermsList::skip_to(const string &tname)
+MultiAllTermsList::next()
 {
-    started = true;
+    if (current_term.empty()) {
+	// Make termlists into a heap so that the one (or one of the ones) with
+	// earliest sorting term is at the top of the heap.
+	vector<TermList*>::iterator i = termlists.begin();
+	while (i != termlists.end()) {
+	    (*i)->next();
+	    if ((*i)->at_end()) {
+		i = termlists.erase(i);
+	    } else {
+		++i;
+	    }
+	}
+	make_heap(termlists.begin(), termlists.end(),
+		  CompareTermListsByTerm());
+    } else {
+	// Advance to the next termname.
+	do {
+	    TermList * tl = termlists.front();
+	    pop_heap(termlists.begin(), termlists.end(),
+		     CompareTermListsByTerm());
+	    tl->next();
+	    if (tl->at_end()) {
+		delete tl;
+		termlists.pop_back();
+	    } else {
+		termlists.back() = tl;
+		push_heap(termlists.begin(), termlists.end(),
+			  CompareTermListsByTerm());
+	    }
+	} while (!termlists.empty() &&
+		 termlists.front()->get_termname() == current_term);
+    }
 
-    vector<TermList *>::const_iterator i;
-    for (i = lists.begin(); i != lists.end(); ++i) {
-	if (!(*i)->at_end())
-	    (*i)->skip_to(tname);
+    if (termlists.size() <= 1) {
+	if (termlists.empty()) return NULL;
+	TermList * tl = termlists[0];
+	termlists.clear();
+	return tl;
     }
-    update_current();
 
+    current_term = termlists.front()->get_termname();
     return NULL;
 }
 
 TermList *
-MultiAllTermsList::next()
+MultiAllTermsList::skip_to(const std::string &term)
 {
-    if (!started) {
-	started = true;
-
-	vector<TermList *>::const_iterator i;
-	for (i = lists.begin(); i != lists.end(); ++i) {
-	    (*i)->next();
-	}
-    } else {
-	vector<TermList *>::const_iterator i;
-	for (i = lists.begin(); i != lists.end(); ++i) {
-	    if (!(*i)->at_end() && (*i)->get_termname() == current) {
-		(*i)->next();
-	    }
+    // Assume the skip is likely to be a long distance, and rebuild the heap
+    // from scratch.  FIXME: It would be useful to profile this against an
+    // approach more like that next() uses if this ever gets heavy use.
+    vector<TermList*>::iterator i = termlists.begin();
+    while (i != termlists.end()) {
+	(*i)->skip_to(term);
+	if ((*i)->at_end()) {
+	    i = termlists.erase(i);
+	} else {
+	    ++i;
 	}
     }
-    update_current();
+
+    if (termlists.size() <= 1) {
+	if (termlists.empty()) return NULL;
+	TermList * tl = termlists[0];
+	termlists.clear();
+	return tl;
+    }
+
+    make_heap(termlists.begin(), termlists.end(), CompareTermListsByTerm());
+    
+    current_term = termlists.front()->get_termname();
     return NULL;
 }
 
 bool
 MultiAllTermsList::at_end() const
 {
-    Assert(started);
-
-    return is_at_end;
+    return termlists.empty();
 }
