/**********************************************************************
 *
 * mgppsearch.cpp -- 
 * Copyright (C) 1999-2002  The New Zealand Digital Library Project
 *
 * A component of the Greenstone digital library software
 * from the New Zealand Digital Library Project at the
 * University of Waikato, New Zealand.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *********************************************************************/


#include "gsdlconf.h"
#include "mgppsearch.h"
#include "fileutil.h"
#include "GSDLQueryParser.h"
#include "MGQuery.h"
#include "TextGet.h"
#include "queryinfo.h"
#include "gsdlunicode.h"


text_t mgppsearchclass::getindexsuffix(const queryparamclass &qp) {
  return getindexsuffix(qp.collection, qp.index+qp.subcollection+qp.language);

}

text_t mgppsearchclass::getindexsuffix (const text_t &collection, 
			      const text_t &index) {

  text_t indexsuffix = "index";  
  indexsuffix = filename_cat (indexsuffix, index);
  if (indexstem.empty()) {
    // no index stem, use the coll name
    indexsuffix = filename_cat (indexsuffix, collection);
  } else {
    indexsuffix = filename_cat (indexsuffix, indexstem);
  }
  return indexsuffix;
}

////////////////////
// mgppsearch class //
////////////////////

mgppsearchclass::mgppsearchclass ()
  : searchclass() {

  textlevel = "Doc";
  indexData = NULL;
}

mgppsearchclass::~mgppsearchclass () 
{
  if (cache != NULL) 
    {
      delete cache;
      cache = NULL;
    }

  if (indexData !=NULL) {
    indexData->UnloadData();
    delete indexData;
    indexData = NULL;
  }
  
}

void mgppsearchclass::set_text_level(const text_t &textlevel_arg)
{
  textlevel = textlevel_arg;
}

void mgppsearchclass::set_indexstem(const text_t &stem)
{
  indexstem = stem;
}


bool mgppsearchclass::search(const queryparamclass &queryparams, 
			     queryresultsclass &queryresult) {
 
#ifdef __WIN32__
  char basepath[]="";
#else
  char basepath[] = "/";
#endif
 
  char *indexname = (filename_cat(collectdir, getindexsuffix(queryparams))).getcstr(); 

 // load index data
  if (indexData == NULL) {
    indexData = new IndexData();
  }
  if (!indexData->LoadData (basepath, indexname)) {
    cerr<<"couldn't load index data\n"<<endl;
    return false;
  }

  // set default stem method from values originally set on prefs page
  int defaultStemMethod = 0;
  if (queryparams.casefolding) {
    defaultStemMethod |= STEM_CaseFolding;
  }
  if (queryparams.stemming) {
    defaultStemMethod |= STEM_Stemming;
  }
  if (queryparams.accentfolding) {
    defaultStemMethod |= STEM_AccentFolding;
  }  	
  
  // set default Boolean combiner from all/some setting
  // if match_mode == 1, ie all, default=1 ie AND
  // if match_mode == 0, ie some, default=0, ie OR
  int defaultBoolCombine = 0;
  if (queryparams.match_mode){
    defaultBoolCombine = 1;
  }

  // use default query info settings - change to reflect user preferences??
  QueryInfo queryInfo;
  
  SetCStr (queryInfo.docLevel, (queryparams.level.getcstr())); 
  queryInfo.maxDocs = (unsigned long)queryparams.maxdocs;
  queryInfo.sortByRank = (queryparams.search_type == 1);
  queryInfo.exactWeights = false;
  queryInfo.needRankInfo = true; // used for overall term freq as well as ranking
  queryInfo.needTermFreqs = true;
  
  ExtQueryResult queryResult;
  
  UCArray queryArray;
  // greenstone gives us the query encoded in unicode. We want utf8.
  char* utf8querystring=to_utf8(queryparams.querystring).getcstr();
  SetCStr(queryArray, utf8querystring);
  delete []utf8querystring;

  // create the mgpp query tree
  QueryNode *queryTree = NULL;
  queryTree = ParseQuery(queryArray, defaultBoolCombine, defaultStemMethod, queryparams.maxnumeric);
  if (queryTree == NULL) { // syntax error
    queryresult.syntax_error = true;
    return true; // should we return true or false?
  }
  UCArray level;
  UCArrayClear(level);
  
  //set the level for results
  SetCStr(level, textlevel.getcstr());

  
  // do the query
  MGQuery(*indexData, queryInfo, queryTree, queryResult, level);
  
 
  // convert ExtQueryResult to queryresultclass
  
  queryresult.docs_matched = (int)queryResult.docs.size();

  if (queryresult.docs_matched == (int)queryResult.actualNumDocs) {
    queryresult.is_approx = Exact; 
  }
  else if (queryresult.docs_matched < (int)queryResult.actualNumDocs) {
    queryresult.is_approx = MoreThan;
  }
  else {
    queryresult.is_approx = Approximate;
  }

  docresultclass doc;
  for (int i=0; i<(int)queryResult.docs.size(); ++i) {
    doc.clear();
    doc.docnum = (int)queryResult.levels[i];
    doc.docweight = queryResult.ranks[i];
    queryresult.docs.docset[doc.docnum] = doc;
    queryresult.docs.docorder.push_back(doc.docnum);

  }

  // term info
  termfreqclass term;
  for (int k=0; k<(int)queryResult.termFreqs.size(); ++k) {
    term.clear();
    char* termfreq_cstr=GetCStr(queryResult.termFreqs[k].term);
    term.termstr = to_uni(termfreq_cstr);
    delete []termfreq_cstr;
    term.termstemstr = term.termstr;
    // we don't set term.utf8equivterms ?? - jrm21
    term.termfreq = queryResult.termFreqs[k].termFreq;
    queryresult.terms.push_back(term);
    queryresult.orgterms.push_back(term); // should this change??
    
    for (int j=0; j<(int)queryResult.termFreqs[k].equivTerms.size(); ++j) {
      char* equivterm_cstr=GetCStr(queryResult.termFreqs[k].equivTerms[j]);
      queryresult.termvariants.insert(to_uni(equivterm_cstr));
      delete []equivterm_cstr;
    }

  }
  // clean up
  unload_database();  // Important that local library doesn't leave any files open
  delete []indexname;
  return true;

}


bool mgppsearchclass::browse_search(const queryparamclass &queryparams, int start, int numDocs,
				    queryresultsclass &queryresult) {

#ifdef __WIN32__
  char basepath[]="";
#else
  char basepath[] = "/";
#endif

  char *indexname = (filename_cat(collectdir, getindexsuffix(queryparams))).getcstr();
 
  if (indexData == NULL) {
    indexData = new IndexData();
  }
  if (!indexData->LoadData (basepath, indexname)) {
    cerr<<"couldn't load index data\n"<<endl;
    return false;
  }

  UCArray level;
  UCArrayClear(level);
  
  //browse always at top level
  SetCStr(level, "Doc"); // this name may change.
  

  BrowseQueryNode browseNode;
  browseNode.startPosition = start;
  browseNode.numTerms = numDocs;

  BrowseQueryResult browseResult;

  UCArrayClear(browseNode.term);
  // greenstone gives us the query encoded in unicode. We want utf8.
  char* utf8querystring=to_utf8(queryparams.querystring).getcstr();
  SetCStr(browseNode.term, utf8querystring);
  delete []utf8querystring;

  // do the actual query
  MGBrowseQuery(*indexData, level, browseNode, browseResult);

  // load results into term info
  termfreqclass term;
  for (int i=0; i<(int)browseResult.termFreqs.size(); ++i) {
    term.clear();
    char* term_cstr = GetCStr(browseResult.termFreqs[i].term);
    term.termstr = to_uni(term_cstr);
    delete []term_cstr;
    term.termstemstr = term.termstr;
    term.termfreq = browseResult.termFreqs[i].termFreq;
    queryresult.terms.push_back(term);
    queryresult.orgterms.push_back(term);
  }

  // clean up
  unload_database();  // Important that local library doesn't leave any files open
  delete []indexname;

  return true;
}

// the document text for 'docnum' is placed in 'output'
// docTargetDocument returns 'true' if it was able to
// try to get a document
// collection is needed to see if an index from the 
// collection is loaded. THe default index bits are just there cos
// the mg version needs them

bool mgppsearchclass::docTargetDocument(const text_t &/*defaultindex*/,
					const text_t &/*defaultsubcollection*/,
					const text_t &/*defaultlanguage*/,
					const text_t &collection,
					int docnum,
					text_t &output) {
  
#ifdef __WIN32__
  char basepath[]="";
#else
  char basepath[] = "/";
#endif
  char *textname = (filename_cat(collectdir, getindexsuffix(collection, "text"))).getcstr();
 
  TextData textdata;
  if(!textdata.LoadData(basepath, textname)) {
    cout<<"couldn't load text data\n"<<endl;
    return false;
  }
  UCArray doctext;
  UCArray level;
  SetCStr(level, textlevel.getcstr());
  if (!GetDocText(textdata, level, (unsigned long)docnum, doctext)) {
    cout<<"couldn't retrieve document text\n";
    return false;
  }

  // convert UCArray to text_t
  output.clear();
  char* doctext_cstr = GetCStr(doctext);
  output = to_uni(doctext_cstr); // convert from utf-8 to unicode
  delete []doctext_cstr;

  // here need to remove the <Document>, <Section>, <Paragraph> tags

  
  //clean up
  textdata.UnloadData ();
  delete []textname;

  return true;

}

// used to clear any cached databases for persistent versions of 
// Greenstone like the Windows local library
void mgppsearchclass::unload_database () {

  if (indexData !=NULL) {
    indexData->UnloadData();
  }
}





