/**********************************************************************
 *
 * collectset.cpp -- 
 * Copyright (C) 1999  The New Zealand Digital Library Project
 *
 * A component of the Greenstone digital library software
 * from the New Zealand Digital Library Project at the
 * University of Waikato, New Zealand.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *********************************************************************/


#include "collectset.h"
#include "collectserver.h"
#include "colservrconfig.h"
#include "gsdlsitecfg.h"
#include "gsdltools.h"
#include "fileutil.h"
#include "filter.h"
#include "browsefilter.h"
#include "sqlbrowsefilter.h"
#include "sqlqueryfilter.h"
#include "queryfilter.h"

#ifdef ENABLE_MG
#include "mgqueryfilter.h"
#include "mgsource.h"
#endif
#ifdef ENABLE_MGPP
#include "mgppqueryfilter.h"
#include "mgppsource.h"
#endif
#ifdef ENABLE_LUCENE
#include "lucenequeryfilter.h"
#include "lucenesource.h"
#endif

#include <assert.h>

#ifdef USE_GDBM
#include "gdbmclass.h"
#endif

#ifdef USE_JDBM
#include "jdbmnaiveclass.h"
#endif

#ifdef USE_SQLITE
#include "sqlitedbclass.h"
#endif

#ifdef USE_MSSQL
#include "mssqldbclass.h"
#endif

// @EXTENSION HEADERS@

collectset::collectset (text_t& gsdlhome, text_t& collecthome) 
{
  // gsdlhome and collecthome will be set as a result of calling this function
  // collecthome will default to "<gsdlhome>/collect" if not explicitly
  // specified in config file

  text_tarray collections;

#ifdef ENABLE_MG
  mgsearch = NULL;
#endif
#ifdef ENABLE_MGPP
  mgppsearch = NULL;
#endif
#ifdef ENABLE_LUCENE
  lucenesearch = NULL;
#endif

  // get gsdlhome (if we fail the error will be picked up later -- in
  // cgiwrapper)

  if (site_cfg_read (gsdlhome, collecthome, httpdomain, httpprefix)) {
    if (!gsdlhome.empty() && directory_exists(gsdlhome)) {
      if (read_dir (collecthome, collections)) {
    
	text_tarray::const_iterator thiscol = collections.begin();
	text_tarray::const_iterator endcol = collections.end();

	while (thiscol != endcol) {
	  // ignore the modelcol
	  if (*thiscol == "modelcol") {
	    ++thiscol;
	    continue;
	  }

	  this->add_collection (*thiscol, gsdlhome, collecthome);

	  ++thiscol;
	}

	this->add_all_collection_groups(gsdlhome, collecthome);
      }
    }
  }

  set_gsdl_env_vars(gsdlhome);
}


collectset::collectset (text_t& httpprefix_arg)
{
  httpprefix = httpprefix_arg;

#ifdef ENABLE_MG
  mgsearch = NULL;
#endif
#ifdef ENABLE_MGPP
  mgppsearch = NULL;
#endif
#ifdef ENABLE_LUCENE
  lucenesearch = NULL;
#endif

}

collectset::collectset ()
{
#ifdef ENABLE_MG
  mgsearch = NULL;
#endif
#ifdef ENABLE_MGPP
  mgppsearch = NULL;
#endif
#ifdef ENABLE_LUCENE
  lucenesearch = NULL;
#endif
}

collectset::~collectset () {
  collectservermapclass::iterator here = cservers.begin();
  collectservermapclass::iterator end = cservers.end();

  while (here != end) {
    if ((*here).second.c != NULL) {
      delete (*here).second.c;
    }
    ++here;
  }
  cservers.clear();
}

bool collectset::init (ostream &logout) {
  collectservermapclass::iterator here = cservers.begin();
  collectservermapclass::iterator end = cservers.end();

  while (here != end) {
    assert ((*here).second.c != NULL);
    if ((*here).second.c != NULL) {
      const colservrconf &configinfo = (*here).second.c->get_configinfo ();

      // configure this collection server

      // note that we read build.cfg before collect.cfg so that the indexmaps
      // are available to decode defaultindex, defaultsubcollection, and
      // defaultlanguage

      bool failed_build_cfg = false;
      if (!build_cfg_read (*((*here).second.c), configinfo.gsdlhome,
			   configinfo.collecthome, configinfo.collection)) {
	failed_build_cfg = true;

	outconvertclass text_t2ascii;
	logout << text_t2ascii 
	       << "Warning: couldn't read build.cfg file for collection \"" 
	       << configinfo.collection << "\""
	       << "  gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
	       << "  collecthome=\"" << configinfo.collecthome << "\"\n";
      }

      bool failed_collect_cfg = false;
      if (!collect_cfg_read (*((*here).second.c), configinfo.gsdlhome,
			     configinfo.collecthome, configinfo.collection)) {
	failed_collect_cfg = true;
	outconvertclass text_t2ascii;
	logout << text_t2ascii 
	       << "Warning: couldn't read collect.cfg file for collection \""
	       << configinfo.collection << "\""
	       << "  gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
	       << "  collecthome=\"" << configinfo.collecthome << "\"\n";
      }

      
      bool is_colgroup = (*here).second.c->is_collection_group();

      if (failed_collect_cfg) {
	++here;
	continue;
      }

      if (failed_build_cfg && (!is_colgroup)) {
	++here;
	continue;
      }
      // let a failed build.cfg through if its 'collect.cfg' marks it as 'collectgroup true'

      if (!(*here).second.c->init (logout)) return false;

      (*here).second.c->configure("httpdomain",httpdomain);
      (*here).second.c->configure("httpprefix",httpprefix);
    }
    ++here;
  }

  return true;
}

collectservermapclass collectset::servers()
{ return cservers;
}


void collectset::add_all_collections(const text_t &gsdlhome, 
				     const text_t& collecthome) 
{
  text_tarray collections;

  if (read_dir(collecthome, collections)) {

    text_tarray::const_iterator thiscol = collections.begin();
    text_tarray::const_iterator endcol = collections.end();

    while (thiscol != endcol) {
    
      // ignore the modelcol
      if (*thiscol == "modelcol") {
	++thiscol;
	continue;
      }
    
      // create collection server for this collection
      this->add_collection (*thiscol, gsdlhome, collecthome);
    
      ++thiscol;
    }

    this->add_all_collection_groups(gsdlhome,collecthome);
  }
}

bool collectset::collection_is_collect_group (const text_t& collect_dir)
{
  text_t is_collect_group_str = "false";
  text_t collect_cfg = filename_cat(collect_dir, "etc", "collect.cfg");

  if (file_exists(collect_cfg)) {
    char *collect_cfgc = collect_cfg.getcstr();
    ifstream confin(collect_cfgc);

    if (confin) {
      text_tarray cfgline;

      while (read_cfg_line(confin, cfgline) >= 0) {
	if (cfgline.size() == 2) {
	  text_t key = cfgline[0];
	  cfgline.erase(cfgline.begin());
	  if (key == "collectgroup") {
	    is_collect_group_str = cfgline[0];
	    break;
	  }
	}
      }

      confin.close();
    }

    delete []collect_cfgc;
  }

  bool is_collect_group = (is_collect_group_str == "true") ? true : false;

  return is_collect_group;
}


// add_collection sets up the collectionserver and calls
// add_collectserver
void collectset::add_collection (const text_t& collection, 
				 const text_t& gsdlhome,
				 const text_t& collecthome) 
{
  // read config file to see if built with mg, mgpp, or lucene
  text_t buildtype = "mg"; // mg is default
  text_t infodbtype = "gdbm"; // gdbm is default

  this->remove_collection(collection);
  
  collectserver *cserver = NULL;

  text_t build_cfg = filename_cat(collecthome, collection, "index", "build.cfg");
  if (file_exists (build_cfg)) {
    char *build_cfgc = build_cfg.getcstr();
    ifstream confin(build_cfgc);
    
    if (confin) {
      text_tarray cfgline;

      while (read_cfg_line(confin, cfgline) >= 0) {
	if (cfgline.size() == 2) {
	  text_t key = cfgline[0];
	  cfgline.erase(cfgline.begin());
	  if (key == "buildtype") {
	    buildtype = cfgline[0];
	  }
	  if (key == "infodbtype") {
	    infodbtype = cfgline[0];
	  }
	}
      }
      confin.close();
    }
    delete []build_cfgc;

    cserver = new collectserver();

    // Create a dbclass of the correct type
    dbclass *db_ptr = NULL; // index database in index/text
    dbclass *oaidb_ptr = NULL; // etc/oai-inf database

    if (infodbtype == "sqlite")
      {
#ifdef USE_SQLITE
	sqlitedbclass *sql_db_ptr = new sqlitedbclass(gsdlhome);
	db_ptr = sql_db_ptr;

	sqlitedbclass *sql_oaidb_ptr = new sqlitedbclass(gsdlhome);
	oaidb_ptr = sql_oaidb_ptr;

	// add a sql browse filter
	sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
	sqlbrowsefilter->set_sql_db_ptr(sql_db_ptr);
	cserver->add_filter (sqlbrowsefilter);  

	// add a sql query filter
	sqlqueryfilterclass *sqlqueryfilter = new sqlqueryfilterclass();
	sqlqueryfilter->set_sql_db_ptr(sql_db_ptr);
	cserver->add_filter (sqlqueryfilter);  

#else
	cerr << "Warning: infodbtype of 'sqlite' was not compiled in to " << endl;
	cerr << "         this installation of Greenstone";
#endif
      }
  
    if (infodbtype == "mssql")
      {
#ifdef USE_MSSQL
	mssqldbclass *mssql_db_ptr = new mssqldbclass(gsdlhome);
	db_ptr = mssql_db_ptr;

	mssqldbclass *mssql_oaidb_ptr = new mssqldbclass(gsdlhome);
	oaidb_ptr = mssql_oaidb_ptr;

	// add a sql browse filter
	sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
	sqlbrowsefilter->set_sql_db_ptr(mssql_db_ptr);
	cserver->add_filter (sqlbrowsefilter);  
#else
	cerr << "Warning: infodbtype of 'mssql' was not compiled in to " << endl;
	cerr << "         this installation of Greenstone";
#endif
      }

    if (infodbtype == "jdbm") {

#ifdef USE_JDBM
	jdbmnaiveclass *jdbm_db_ptr = new jdbmnaiveclass(gsdlhome);
	db_ptr = jdbm_db_ptr;

	jdbmnaiveclass *jdbm_oaidb_ptr = new jdbmnaiveclass(gsdlhome);
	oaidb_ptr = jdbm_oaidb_ptr;
#else
	cerr << "Warning: infodbtype of 'jdbm' was not compiled in to " << endl;
	cerr << "         this installation of Greenstone";
#endif
    }

    // @EXTENSION DATASOURCES@

    // Use GDBM if the infodb type is empty or not one of the values above
    if (db_ptr == NULL) {
#ifdef USE_GDBM
      db_ptr = new gdbmclass(gsdlhome);
      oaidb_ptr = new gdbmclass(gsdlhome);
#else
	cerr << "Warning: infodbtype of 'gdbm' was not compiled in to " << endl;
	cerr << "         this installation of Greenstone";
#endif
    }

    // add a null filter
    filterclass *filter = new filterclass ();
    cserver->add_filter (filter);
  
    // add a browse filter
    browsefilterclass *browsefilter = new browsefilterclass();
    browsefilter->set_db_ptr(db_ptr);
    // set the oaidb ptr for the browse filter. As with the db_ptr, the oaidb_ptr
    // is not managed and deleted by the browsefilter class, but by the sourceclass
    browsefilter->set_oaidb_ptr(oaidb_ptr);
    cserver->add_filter (browsefilter);  

    if (buildtype == "mg") {
#ifdef ENABLE_MG
      mgsearch = new mgsearchclass();
 
      // add a query filter
      mgqueryfilterclass *queryfilter = new mgqueryfilterclass();
      queryfilter->set_db_ptr(db_ptr);
      queryfilter->set_textsearchptr (mgsearch);
      cserver->add_filter (queryfilter);
    
      // add a mg source
      mgsourceclass *mgsource = new mgsourceclass ();
      mgsource->set_db_ptr(db_ptr);
      mgsource->set_oaidb_ptr(oaidb_ptr);
      mgsource->set_textsearchptr (mgsearch);
      cserver->add_source (mgsource);
#else
      cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
#endif
    }
    else if (buildtype == "mgpp") {
#ifdef ENABLE_MGPP
      mgppsearch = new mgppsearchclass();

      // add a query filter
      mgppqueryfilterclass *queryfilter = new mgppqueryfilterclass();
      queryfilter->set_db_ptr(db_ptr);
      queryfilter->set_textsearchptr (mgppsearch);
      cserver->add_filter (queryfilter);
      
      // add a mgpp source
      mgppsourceclass *mgppsource = new mgppsourceclass ();
      mgppsource->set_db_ptr(db_ptr);
      mgppsource->set_oaidb_ptr(oaidb_ptr);
      mgppsource->set_textsearchptr (mgppsearch);
      cserver->add_source (mgppsource);
#else
      cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
#endif
    }
    else if (buildtype == "lucene") {
#ifdef ENABLE_LUCENE
      lucenesearch = new lucenesearchclass();
      lucenesearch->set_gsdlhome(gsdlhome); 

      // add a query filter
      lucenequeryfilterclass *queryfilter = new lucenequeryfilterclass();
      queryfilter->set_db_ptr(db_ptr);
      queryfilter->set_textsearchptr (lucenesearch);
      cserver->add_filter (queryfilter);
      
      // add a lucene source
      lucenesourceclass *lucenesource = new lucenesourceclass ();
      lucenesource->set_db_ptr(db_ptr);
      lucenesource->set_oaidb_ptr(oaidb_ptr);
      lucenesource->set_textsearchptr (lucenesearch);
      cserver->add_source (lucenesource);
#else
      cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
#endif
    }
    else {
      cerr << "Warning: unrecognized buildtype " << buildtype << endl;
    }

  }
  else {
    // see if it is a collectgroup col
    text_t this_collect_dir = filename_cat(collecthome, collection);
    if (collection_is_collect_group(this_collect_dir)) {
	// by this point we know we will need a cserver
	cserver = new collectserver();
    }
    // else not a collect group, or there was no collect.cfg 
    // => leave cserver as NULL so it will not be added into cservers
  }

  if (cserver != NULL) {
    // inform collection server and everything it contains about its
    // collection name
    cserver->configure ("collection", collection);
    cserver->configure ("gsdlhome", gsdlhome);
    cserver->configure ("collecthome", collecthome); 
    cservers.addcollectserver (cserver);
  }
}

void collectset::remove_all_collections () {

#ifdef ENABLE_MG
  // first unload any cached mg databases
  if (mgsearch != NULL) {
    mgsearch->unload_database();
  }
#endif
#ifdef ENABLE_MGPP
  if (mgppsearch != NULL) {
	mgppsearch->unload_database();
  }
#endif 
#ifdef ENABLE_LUCENE
  if (lucenesearch != NULL) {
    lucenesearch->unload_database();
  }
#endif

  // now delete the collection server objects
  collectservermapclass::iterator here = cservers.begin();
  collectservermapclass::iterator end = cservers.end();

  while (here != end) {
    if ((*here).second.c != NULL) {
      delete (*here).second.c;
    }
    ++here;
  }
  // since all collection server objects are deleted (which deleted their source objects 
  // which in turn deleted their search objects), we now NULL the local reference to the
  // search objects. See the extensive comment for this in remove_collection(text_t).
#ifdef ENABLE_MG
	  if (mgsearch != NULL) {
		mgsearch = NULL;
	  }
#endif 
#ifdef ENABLE_MGPP
	  if (mgppsearch != NULL) {
		mgppsearch = NULL;
	  }
#endif 
#ifdef ENABLE_LUCENE
	  if (lucenesearch != NULL) {
		lucenesearch = NULL;
	  }
#endif 

  cservers.clear();
}

void collectset::add_collection_group(const text_t& collection, 
				      const text_t& gsdlhome,
				      const text_t& collecthome)
{
  text_tarray group;

  text_t collect_group_dir = filename_cat (collecthome, collection);

  // need to read collect.cfg for 'collectgroup' as class hasn't been initialised through 'init' yet
  if (collection_is_collect_group(collect_group_dir)) {
    if (read_dir (collect_group_dir, group)) {
      
      text_tarray::const_iterator thiscol = group.begin();
      text_tarray::const_iterator endcol = group.end();
      
      while (thiscol != endcol) {
	// ignore the etc directory
	if (*thiscol == "etc") {
	  ++thiscol;
	  continue;
	}
	
	//text_t group_col = filename_cat(collection,*thiscol);
	// later we check for / in the name. When this is used in a path (via fileanme_cat) the / will be converted  to \ on windows
	text_t group_col = collection + "/" + *thiscol;
	this->add_collection (group_col, gsdlhome, collecthome);
	
	++thiscol;
      }
    }
  }
}

void collectset::add_all_collection_groups (const text_t& gsdlhome,
					    const text_t& collecthome) 

{
  collectservermapclass::iterator here = cservers.begin();
  collectservermapclass::iterator end = cservers.end();
  
  while (here != end) {
    text_t collection = (*here).second.c->get_collection_name();
    this->add_collection_group(collection,gsdlhome,collecthome);

    ++here;
  }
}


// remove_collection deletes the collection server of collection.
// This only needs to be called if a collectionserver is to be
// removed while the library is running. The destructor function
// cleans up all collectservers when the program exits.
void collectset::remove_collection (const text_t &collection) {

  // do nothing if no collection server exists for this collection 
  if (cservers.getcollectserver(collection) == NULL) return;

#ifdef ENABLE_MG
  // first unload any cached mg databases - we may need to do something
  // similar to this for mgpp and lucene too
  if (mgsearch != NULL) {
    mgsearch->unload_database();
  }
#endif
#ifdef ENABLE_MGPP
  if (mgppsearch != NULL) {
	mgppsearch->unload_database();
  }
#endif 
#ifdef ENABLE_LUCENE
  if (lucenesearch != NULL) {
    lucenesearch->unload_database();
  }
#endif 

  // now delete the collection server object
  collectservermapclass::iterator here = cservers.begin();
  collectservermapclass::iterator end = cservers.end();

  while (here != end) {
    if ((*here).second.c != NULL && (*here).first == collection) {
      delete (*here).second.c;

	  // The above code deletes the collection server object for this collection, which then
	  // deletes the <indexer>source object, which then deletes the <indexer>search object.
	  // Since we have a reference to the <index>search object here, we have to set it to NULL
	  // at this point, because we test it against null-ness elsewhere in this code. (Without
	  // setting it to NULL, we end up with server crashing issues.)
	  // Ideally, we'd like to know that we are NULLing the pointer to the exact same object
	  // as was freed above, but we can't know that without complicated object access to make
	  // the necessary pointer comparison. Fortunately, this class maintains only one type of 
	  // <index>search object (of a/any kind) at any time, so we can NULL this confidently now.
#ifdef ENABLE_MG
	  if (mgsearch != NULL) {
		mgsearch = NULL;
	  }
#endif 
#ifdef ENABLE_MGPP
	  if (mgppsearch != NULL) {
		mgppsearch = NULL;
	  }
#endif 
#ifdef ENABLE_LUCENE
	  if (lucenesearch != NULL) {
		lucenesearch = NULL;
	  }
#endif 

	   // continue cleaning up the collection server
       cservers.erase (here);

      return;
    } // end if
    ++here;
  }
}


// remove_collection deletes the collection server of collection.
// This only needs to be called if a collectionserver is to be
// removed while the library is running. The destructor function
// cleans up all collectservers when the program exits.
void collectset::remove_collection (const text_t &collection, ostream &logout) {

  remove_collection(collection);

  outconvertclass text_t2ascii;
  logout << text_t2ascii << "collectset::remove_collection: Removed collectserver for "
	 << collection << "\n";
}

void collectset::configure(const text_t &key, const text_tarray &cfgline)
{
  if ((key == "collection") || (key == "collectdir")) return;

  collectservermapclass::iterator here = cservers.begin();
  collectservermapclass::iterator end = cservers.end();

  while (here != end) {
    assert ((*here).second.c != NULL);
    if ((*here).second.c != NULL) {
      if (key == "collectinfo") {
	if ((*here).first == cfgline[0]) {
	  if (cfgline.size()==3) {
	    (*here).second.c->configure ("gsdlhome", cfgline[1]);
	    (*here).second.c->configure ("gdbmhome", cfgline[2]);
	  }
	  else {
	    (*here).second.c->configure ("gsdlhome", cfgline[1]);
	    (*here).second.c->configure ("collecthome", cfgline[2]);
	    (*here).second.c->configure ("gdbmhome", cfgline[3]);
	  }
	}
      } else {
	(*here).second.c->configure (key, cfgline);
      }
    }

    ++here;
  }
}

void collectset::getCollectionList (text_tarray &collist) 
{
  collist.erase(collist.begin(),collist.end());

  collectservermapclass::iterator here = cservers.begin();
  collectservermapclass::iterator end = cservers.end();
  while (here != end) {
    assert ((*here).second.c != NULL);
    if ((*here).second.c != NULL) {
      collist.push_back ((*here).second.c->get_collection_name());
    }
    ++here;
  }
}

