/**********************************************************************
 *
 * lucenequeryfilter.cpp -- 
 * Copyright (C) 1999  The New Zealand Digital Library Project
 *
 * A component of the Greenstone digital library software
 * from the New Zealand Digital Library Project at the
 * University of Waikato, New Zealand.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *********************************************************************/

#include "lucenequeryfilter.h"
#include "fileutil.h"
#include "lucenesearch.h"

lucenequeryfilterclass::lucenequeryfilterclass () 
  : fieldedqueryfilterclass() {

  
  FilterOption_t filtopt;
  
  // -- onePerQuery SortField, enumerated, used to list available sorting fields
  filtopt.clear();
  filtopt.name = "SortField";
  filtopt.type = FilterOption_t::enumeratedt;
  filtopt.repeatable = FilterOption_t::onePerQuery;
  filtopt.defaultValue = "";
  filterOptions["SortField"] = filtopt;

  // -- onePerQuery SortOder      enumerated (0=ascending, 1=descending)
  filtopt.clear();
  filtopt.name = "SortOrder";
  filtopt.type = FilterOption_t::enumeratedt;
  filtopt.repeatable = FilterOption_t::onePerQuery;
  filtopt.defaultValue = "ascending";
  filtopt.validValues.push_back("ascending");
  filtopt.validValues.push_back("descending");
  filterOptions["SortOrder"] = filtopt;

  // -- onePerQuery Fuzziness string 0.0-1.0
  filtopt.clear();
  filtopt.name = "Fuzziness";
  filtopt.type = FilterOption_t::stringt;
  filtopt.repeatable = FilterOption_t::onePerQuery;
  filtopt.defaultValue = "";
  filterOptions["Fuzziness"] = filtopt;

 // -- onePerQuery FilterString  string
  filtopt.clear();
  filtopt.name = "FilterString";
  filtopt.type = FilterOption_t::stringt;
  filtopt.repeatable = FilterOption_t::onePerQuery;
  filtopt.defaultValue = "";
  filterOptions["FilterString"] = filtopt;
}

lucenequeryfilterclass::~lucenequeryfilterclass () {
}



void lucenequeryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
  fieldedqueryfilterclass::configure(key, cfgline);

  if (key == "textlevel") {
    ((lucenesearchclass *)textsearchptr)->set_text_level(cfgline[0]);
  }
  else if (key == "indexsortfieldmap") {
    sortfieldmap.importmap (cfgline);
  }
  else if (key == "indexsortfields") {
  filterOptions["SortField"].validValues.erase(filterOptions["SortField"].validValues.begin(), filterOptions["SortField"].validValues.end());
    text_tarray::const_iterator here = cfgline.begin();
    text_tarray::const_iterator end = cfgline.end();
    while (here != end) {
      if (!(*here).empty()) {
	filterOptions["SortField"].validValues.push_back(*here);
      }
      ++here;
    }
  }
  else if (key == "defaultsortfield") { 
    sortfieldmap.from2to (cfgline[0], filterOptions["SortField"].defaultValue);
  }
}

bool lucenequeryfilterclass::init (ostream &logout) {
  
  if (!fieldedqueryfilterclass::init(logout)) {
    return false;
  }
  
  if (filterOptions["SortField"].defaultValue.empty() && filterOptions["SortField"].validValues.begin() != filterOptions["SortField"].validValues.end() && !filterOptions["SortField"].validValues[0].empty()) {
      filterOptions["SortField"].defaultValue = filterOptions["SortField"].validValues[0];
  }

   return true;
}

void lucenequeryfilterclass::set_queryparam_defaults(queryparamclass &query ) {

  fieldedqueryfilterclass::set_queryparam_defaults(query);
  query.filterstring = filterOptions["FilterString"].defaultValue;  
  query.sortfield = filterOptions["SortField"].defaultValue;
  query.sortorder = (filterOptions["SortOrder"].defaultValue == "descending");
  query.fuzziness = filterOptions["Fuzziness"].defaultValue;  

}

bool lucenequeryfilterclass::set_queryparam_field(const OptionValue_t &option, queryparamclass &query) {

  if (option.name == "FilterString") {
    query.filterstring = option.value;
    return true;
  }
  if (option.name == "SortField") {
    query.sortfield = option.value;
    return true;
  }
  if (option.name == "SortOrder") {
    query.sortorder = (option.value == "descending");
    return true;
  }
  if (option.name == "Fuzziness") {
    query.fuzziness = option.value;
    return true;
  } 
  return fieldedqueryfilterclass::set_queryparam_field(option, query);
}

void lucenequeryfilterclass::filter(const FilterRequest_t &request,
				  FilterResponse_t &response,
				  comerror_t &err, ostream &logout) {  

  outconvertclass text_t2ascii;

  response.clear ();
  err = noError;
  if (db_ptr == NULL) {
    // most likely a configuration problem
    logout << text_t2ascii 
	   << "configuration error: queryfilter contains a null dbclass\n\n";
    err = configurationError;
    return;
  }
  if (textsearchptr == NULL) {
    // most likely a configuration problem
    logout << text_t2ascii 
	   << "configuration error: queryfilter contains a null textsearchclass (lucene)\n\n";
    err = configurationError;
    return;
  }
  if (full_text_browse(request.filterResultOptions)) {
    browsefilter(request, response, err, logout);
    return;
  }
  // open the database
  db_ptr->setlogout(&logout);
  if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
    // most likely a system problem (we have already checked that the database exists)
    logout << text_t2ascii
	   << "system problem: open on database \"" << db_filename << "\" failed\n\n";
    err = systemProblem;
    return;
  }


  // get the query parameters
  int startresults, endresults;
  vector<queryparamclass> queryfilterparams;
  parse_query_params (request, queryfilterparams, startresults, 
		      endresults, logout);  
 
   
  // do query
  queryresultsclass queryresults;
  do_multi_query (request, queryfilterparams, queryresults, err, logout);
  response.error_message = queryresults.error_message;
  if (err != noError) return;

  // assemble document results
  if (need_matching_docs (request.filterResultOptions))
  {
    // Loop through the query results (ordered by ranking)
    int resultnum = 1;
    vector<text_t>::iterator docorder_iterator = queryresults.docs.docorder.begin();
    while (docorder_iterator != queryresults.docs.docorder.end())
    {
      text_t doc_OID = (*docorder_iterator);
      // logout << "Matching doc OID: " << doc_OID << endl;

      // Make sure this result is in the docset, and either in the request set or the request set is empty
      docresultmap::iterator doc_result = queryresults.docs.docset.find (doc_OID);
      if (doc_result != queryresults.docs.docset.end() && (request.docSet.empty() || in_set(request.docSet, doc_OID)))
      {
	// Add the matching document
	ResultDocInfo_t resultdoc;
	resultdoc.OID = doc_OID;
	resultdoc.result_num = resultnum;
	resultdoc.ranking = (int)((*doc_result).second.docweight * 10000.0 + 0.5);
	resultdoc.num_terms_matched = (*doc_result).second.num_query_terms_matched;
	response.docInfo.push_back (resultdoc);

	resultnum++;
      }

      docorder_iterator++;
    }
  }
  
  // assemble the term results
  if (need_term_info(request.filterResultOptions)) {
    // note: the terms have already been sorted and uniqued - ?? have they??

    TermInfo_t terminfo;
    bool terms_first = true;

    termfreqclassarray::iterator terms_here = queryresults.terms.begin();
    termfreqclassarray::iterator terms_end = queryresults.terms.end();

    while (terms_here != terms_end) {
      terminfo.clear();
      terminfo.term = (*terms_here).termstr;
      terminfo.freq = (*terms_here).termfreq;
      // lucene doesn't return any termvariants at this stage, 
      // so make sure the original term is set
      terminfo.matchTerms.push_back(terminfo.term);
      
      // this bit gets the matchTerms ie the equivalent (stem/casefold) terms
      if (terms_first) {
	text_tset::iterator termvariants_here = queryresults.termvariants.begin();
	text_tset::iterator termvariants_end = queryresults.termvariants.end();
	while (termvariants_here != termvariants_end) {
	  terminfo.matchTerms.push_back (*termvariants_here);
	  ++termvariants_here;
	}
      }
      terms_first = false;
      
      response.termInfo.push_back (terminfo);

      ++terms_here;
    }

    // add the stop words
    text_tset::iterator stopwords_here = queryresults.stopwords.begin();
    text_tset::iterator stopwords_end = queryresults.stopwords.end();
    while (stopwords_here != stopwords_end) {
      response.stopwords.insert(*stopwords_here);
      ++stopwords_here;
    }
  }

  db_ptr->closedatabase();  // Important that local library doesn't leave any files open
  response.numDocs = queryresults.docs_matched;
  response.isApprox = queryresults.is_approx;
}

void lucenequeryfilterclass::browsefilter(const FilterRequest_t &request,
					FilterResponse_t &response,
					comerror_t &err, ostream &logout) {  

  outconvertclass text_t2ascii;

  // get the query parameters
  int startresults, endresults;
  
  vector<queryparamclass> queryfilterparams;
  parse_query_params (request, queryfilterparams, startresults, 
		      endresults, logout);  

  vector<queryparamclass>::const_iterator query_here = queryfilterparams.begin();
   
  // do query
  queryresultsclass queryresults;
  queryresults.clear();
  
  int numDocs = endresults-startresults;
  textsearchptr->setcollectdir (collectdir);

  if (!((lucenesearchclass*)textsearchptr)->browse_search((*query_here), startresults, numDocs, queryresults)) {
    // most likely a system problem
    logout << text_t2ascii
	   << "system problem: could not do full text browse with lucene for index \""
	   << (*query_here).index << (*query_here).subcollection
	   << (*query_here).language << "\".\n\n";
    err = systemProblem;
    return;
  }

  // assemble the term results
  TermInfo_t terminfo;
  
  termfreqclassarray::iterator terms_here = queryresults.terms.begin();
  termfreqclassarray::iterator terms_end = queryresults.terms.end();

  while (terms_here != terms_end) {
    terminfo.clear();
    terminfo.term = (*terms_here).termstr;
    terminfo.freq = (*terms_here).termfreq;
    
    response.termInfo.push_back (terminfo);

    ++terms_here;
  }
  

}

// lucenesearchptr and db_ptr are assumed to be valid
void lucenequeryfilterclass::do_multi_query (const FilterRequest_t &request,
					   const vector<queryparamclass> &query_params,
					   queryresultsclass &multiresults,
					   comerror_t &err, ostream &logout) {
  outconvertclass text_t2ascii;

  err = noError;
  textsearchptr->setcollectdir (collectdir);
  multiresults.clear();
  
  vector<queryparamclass>::const_iterator query_here = query_params.begin();
  vector<queryparamclass>::const_iterator query_end = query_params.end();
  while (query_here != query_end) {
    queryresultsclass thisqueryresults;
    if (!textsearchptr->search((*query_here), thisqueryresults)) {
      // most likely a system problem
      logout << text_t2ascii
	     << "system problem: could not do search with lucene for index \""
	     << (*query_here).index << (*query_here).level 
	     << (*query_here).subcollection
	     << (*query_here).language << "\".\n\n";
      err = systemProblem;
      return;
    }

    // check for syntax error
    if (thisqueryresults.syntax_error==true) {
      logout << text_t2ascii
	     << "syntax problem: invalid query string \""
	     << (*query_here).querystring<<"\".\n";
      err = syntaxError;
      return;
    }
    // combine the results
    if (need_matching_docs (request.filterResultOptions)) {
            
      if (query_params.size() == 1) {
	multiresults.error_message = thisqueryresults.error_message;
	multiresults.docs = thisqueryresults.docs; // just one set of results
	multiresults.docs_matched = thisqueryresults.docs_matched;
	multiresults.is_approx = thisqueryresults.is_approx;
	
      } else {
	if ((*query_here).combinequery == "and") {
	  multiresults.docs.combine_and (thisqueryresults.docs);
	} else if ((*query_here).combinequery == "or") {
	  multiresults.docs.combine_or (thisqueryresults.docs);
	} else if ((*query_here).combinequery == "not") {
	  multiresults.docs.combine_not (thisqueryresults.docs);
	}
	multiresults.docs_matched = multiresults.docs.docset.size();
	multiresults.is_approx = Exact;
      }
    }

    // combine the term information 
    if (need_term_info (request.filterResultOptions)) {
      // append the terms
      multiresults.orgterms.insert(multiresults.orgterms.end(),
				   thisqueryresults.orgterms.begin(),
				   thisqueryresults.orgterms.end());

      
      // add the term variants - 
      text_tset::iterator termvar_here = thisqueryresults.termvariants.begin();
      text_tset::iterator termvar_end = thisqueryresults.termvariants.end();
      while (termvar_here != termvar_end) {
	multiresults.termvariants.insert(*termvar_here);
	++termvar_here;
      }

      // add the stop words
      text_tset::iterator stopwords_here = thisqueryresults.stopwords.begin();
      text_tset::iterator stopwords_end = thisqueryresults.stopwords.end();
      while (stopwords_here != stopwords_end) {
	multiresults.stopwords.insert(*stopwords_here);
	++stopwords_here;
      }
    }
   
    ++query_here;
  }

  // sort and unique the query terms
  multiresults.sortuniqqueryterms ();
}


