/**************************************************************************
 *
 * MGQuery.cpp -- Query related functions
 * Copyright (C) 1999  Rodger McNab
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 **************************************************************************/

#include "MGQuery.h"
#include "bitio_m_stdio.h"
#include "bitio_gen.h"
#include "Terms.h"
#include "QueryResultsSort.h"
#include <assert.h>


void PrintIndent (ostream &s, int indent) {
  while (indent-- > 0) s << " ";
}


void PrintIndentText (ostream &s, char *text, int indent) {
  PrintIndent (s, indent);
  s << text;
}

void PrintNode (ostream &s, QueryNode *node, int indent) {
  if (node == NULL) {
    PrintIndentText (s, (char*)"NULL\n", indent);
  } else {
    node->Print (s, indent+2);
  }
}

  

QueryNode::QueryNode () {
}

QueryNode::~QueryNode () {
}

void QueryNode::Calculate (IndexData &/*indexData*/,
			   const QueryInfo &/*queryInfo*/,
			   QueryResult &result) const {
  result.Clear();
}

void QueryNode::Free () {
}

void QueryNode::Print (ostream &/*s*/, int /*indent*/) const {
}



AndQueryNode::AndQueryNode () {
  leftNode = NULL;
  rightNode = NULL;
}

AndQueryNode::~AndQueryNode () {
  Free ();
}

void AndQueryNode::Calculate (IndexData &indexData, const QueryInfo &queryInfo,
			      QueryResult &result) const {
  result.Clear();

  // an And between nothing and something is nothing...
  if (leftNode == NULL || rightNode == NULL) return;
  
  // calculate the result from the left tree and the result
  // from the right tree
  QueryResult rightResult;
  leftNode->Calculate (indexData, queryInfo, result);
  rightNode->Calculate (indexData, queryInfo, rightResult);

  // merge the results, this can be done in place as the results
  // will always shrink with an And

  bool haveAccum = (queryInfo.sortByRank || queryInfo.needRankInfo);
  if (haveAccum && (result.ranks.size() != result.docs.size() ||
		    rightResult.ranks.size() != rightResult.docs.size())) {
    // shouldn't ever get here
    haveAccum = false;
    assert (0);
  }

  // combine document numbers and corresponding ranks
  mg_u_long leftI = 0;
  mg_u_long rightI = 0;
  mg_u_long outI = 0;
  while (leftI < result.docs.size() &&
	 rightI < rightResult.docs.size()) {
    if (result.docs[leftI] < rightResult.docs[rightI]) {
      ++leftI;
    } else if (result.docs[leftI] > rightResult.docs[rightI]) {
      ++rightI;
    } else {
      // the documents are equal
      result.docs[outI] = result.docs[leftI];
      if (haveAccum)
	result.ranks[outI] = result.ranks[leftI] + rightResult.ranks[rightI];
      ++leftI;
      ++rightI;
      ++outI;
    }
  }

  // erase unused document numbers and ranks
  result.docs.erase(result.docs.begin()+outI, result.docs.end());
  if (haveAccum)
    result.ranks.erase(result.ranks.begin()+outI, result.ranks.end());
    
  // combine term frequency information
  if (queryInfo.needTermFreqs)
    result.termFreqs.insert (result.termFreqs.end(),
			     rightResult.termFreqs.begin(),
			     rightResult.termFreqs.end());
}

void AndQueryNode::Free () {
  if (leftNode != NULL) {
    delete leftNode;
    leftNode = NULL;
  }
  if (rightNode != NULL) {
    delete rightNode;
    rightNode = NULL;
  }
}

void AndQueryNode::Print (ostream &s, int indent) const {
  PrintIndentText (s, (char*)"leftNode:\n", indent);
  PrintNode (s, leftNode, indent+2);
  PrintIndentText (s, (char*)"AND\n", indent);
  PrintIndentText (s, (char*)"rightNode:\n", indent);
  PrintNode (s, rightNode, indent+2);
}


OrQueryNode::OrQueryNode () {
  leftNode = NULL;
  rightNode = NULL;
}

OrQueryNode::~OrQueryNode () {
  Free ();
}

void OrQueryNode::Calculate (IndexData &indexData, const QueryInfo &queryInfo,
			      QueryResult &result) const {
  result.Clear();
  
  // calculate the result from the left tree and the result
  // from the right tree
  QueryResult leftResult;
  QueryResult rightResult;
  if (leftNode != NULL)
    leftNode->Calculate (indexData, queryInfo, leftResult);
  if (rightNode != NULL)
    rightNode->Calculate (indexData, queryInfo, rightResult);

  // merge the results
  
  bool haveAccum = (queryInfo.sortByRank || queryInfo.needRankInfo);
  if (haveAccum && (leftResult.ranks.size() != leftResult.docs.size() ||
		    rightResult.ranks.size() != rightResult.docs.size())) {
    // shouldn't ever get here
    haveAccum = false;
    assert (0);
  }

  // combine document numbers and corresponding ranks
  mg_u_long leftSize = leftResult.docs.size();
  mg_u_long rightSize = rightResult.docs.size();
  mg_u_long leftI = 0;
  mg_u_long rightI = 0;
  mg_u_long leftDocNum = 0;
  mg_u_long rightDocNum = 0;
  while (leftI < leftSize || rightI < rightSize) {
    // check leftI
    if (leftI < leftResult.docs.size())
      leftDocNum = leftResult.docs[leftI];
    else leftDocNum = (mg_u_long)ULONG_MAX;
    
    // check rightI
    if (rightI < rightResult.docs.size())
      rightDocNum = rightResult.docs[rightI];
    else rightDocNum = (mg_u_long)ULONG_MAX;
    
    // combine
    if (leftDocNum < rightDocNum) {
      result.docs.push_back (leftDocNum);
      if (haveAccum)
	result.ranks.push_back (leftResult.ranks[leftI]);
      ++leftI;
      
    } else if (leftDocNum > rightDocNum) {
      result.docs.push_back (rightDocNum);
      if (haveAccum)
	result.ranks.push_back (rightResult.ranks[rightI]);
      ++rightI;
      
    } else { // equal
      result.docs.push_back (leftDocNum);
      if (haveAccum)
	result.ranks.push_back (leftResult.ranks[leftI] +
				rightResult.ranks[rightI]);
      ++leftI;
      ++rightI;
    }
  }

  // combine term frequency information
  if (queryInfo.needTermFreqs) {
    result.termFreqs.insert (result.termFreqs.end(),
			     leftResult.termFreqs.begin(),
			     leftResult.termFreqs.end());
    result.termFreqs.insert (result.termFreqs.end(),
			     rightResult.termFreqs.begin(),
			     rightResult.termFreqs.end());
  }
}

void OrQueryNode::Free () {
  if (leftNode != NULL) {
    delete leftNode;
    leftNode = NULL;
  }
  if (rightNode != NULL) {
    delete rightNode;
    rightNode = NULL;
  }
}

void OrQueryNode::Print (ostream &s, int indent) const {
  PrintIndentText (s, (char*)"leftNode:\n", indent);
  PrintNode (s, leftNode, indent+2);
  PrintIndentText (s, (char*)"OR\n", indent);
  PrintIndentText (s, (char*)"rightNode:\n", indent);
  PrintNode (s, rightNode, indent+2);
}



NotQueryNode::NotQueryNode () {
  queryNode = NULL;
  notNode = NULL;
}

NotQueryNode::~NotQueryNode () {
  Free ();
}

void NotQueryNode::Calculate (IndexData &indexData, const QueryInfo &queryInfo,
			      QueryResult &result) const {
  result.Clear();

  // check for nothing
  if (queryNode == NULL) return;
  if (notNode == NULL) {
    queryNode->Calculate (indexData, queryInfo, result);
    return;
  }
  
  // calculate the result from the query tree and the result
  // from the not tree
  QueryResult notResult;
  queryNode->Calculate (indexData, queryInfo, result);
  notNode->Calculate (indexData, queryInfo, notResult);

  // merge the results, this can be done in place as the results
  // will always shrink with a Not

  bool haveAccum = (queryInfo.sortByRank || queryInfo.needRankInfo);
  if (haveAccum && (result.ranks.size() != result.docs.size() ||
		    notResult.ranks.size() != notResult.docs.size())) {
    // shouldn't ever get here
    haveAccum = false;
    assert (0);
  }

  // combine document numbers and corresponding ranks
  mg_u_long queryI = 0;
  mg_u_long notI = 0;
  mg_u_long outI = 0;
  while (queryI < result.docs.size() &&
	 notI < notResult.docs.size()) {
    if (result.docs[queryI] < notResult.docs[notI]) {
      // found a document not in the notResults
      result.docs[outI] = result.docs[queryI];
      if (haveAccum)
	result.ranks[outI] = result.ranks[queryI];
      ++queryI;
      ++outI;
    } else if (result.docs[queryI] > notResult.docs[notI]) {
      ++notI;
    } else {
      // the documents are equal, ignore both
      ++queryI;
      ++notI;
    }
  }

  // erase unused document numbers and ranks
  result.docs.erase(result.docs.begin()+outI, result.docs.end());
  if (haveAccum)
    result.ranks.erase(result.ranks.begin()+outI, result.ranks.end());
  
  // combine term frequency information
  if (queryInfo.needTermFreqs)
    result.termFreqs.insert (result.termFreqs.end(),
			     notResult.termFreqs.begin(),
			     notResult.termFreqs.end());
}

void NotQueryNode::Free () {
  if (queryNode != NULL) {
    delete queryNode;
    queryNode = NULL;
  }
  if (notNode != NULL) {
    delete notNode;
    notNode = NULL;
  }
}

void NotQueryNode::Print (ostream &s, int indent) const {
  PrintIndentText (s, (char*)"queryNode:\n", indent);
  PrintNode (s, queryNode, indent+2);
  PrintIndentText (s, (char*)"NOT\n", indent);
  PrintIndentText (s, (char*)"notNode:\n", indent);
  PrintNode (s, notNode, indent+2);
}


void TagNode::Clear () {
  UCArrayClear (tagName);
}

void TagNode::Calculate (IndexData &indexData,
			 FragRangeArray &fragRange) const {
  fragRange.erase (fragRange.begin(), fragRange.end());
  if (tagName.empty()) return;

  // get information about this tag
  block_dict_el tagEl;
  mg_u_long tagElNum;
  if (!SearchBlockDictEl (indexData.dictFile, indexData.biTags,
			  indexData.bdh.entries_per_tblk,
			  indexData.bdh.tag_dict_size,
			  tagName, tagEl, tagElNum))
    return;

  // seek to the appropriate place in the inverted file
  fseek (indexData.invfFile, tagEl.invf_ptr, SEEK_SET);

  stdio_bitio_buffer buffer(indexData.invfFile);
  
  mg_u_long pTag = tagEl.frag_occur*2;
  mg_u_long B = BIO_Bblock_Init (indexData.bdh.num_frags+pTag, pTag);
  mg_u_long fragNum = 0;
  mg_u_long i;
  FragRange thisFrag;
  for (i=0; i<tagEl.frag_occur; ++i) {
    // get start
    mg_u_long delta = buffer.bblock_decode (B, NULL)-1;
    fragNum += delta;

    thisFrag.rangeStart = fragNum;

    // get end
    delta = buffer.bblock_decode (B, NULL)-1;
    fragNum += delta;

    thisFrag.rangeEnd = fragNum;
    fragRange.push_back (thisFrag);
  }

  buffer.done();
}

void TagNode::Free () {
  Clear ();
}

void TagNode::Print (ostream &s, int indent) const {
  PrintIndent (s, indent);
  s << "TAG: \"" << tagName << "\"\n";
}


void TermNode::Clear () {
  UCArrayClear (term);
  termWeight = 1;
  stemMethod = 0;
  startRange = NO_TERM_RANGE_START;
  endRange = NO_TERM_RANGE_END;
}

TermNode::TermNode () {
  Clear ();
}

void TermNode::Calculate (IndexData &indexData,
			  bool needFragFreqs,
			  FragRangeArray *fragLimits,
			  FragData &fragData,
			  UCArrayVector &equivTerms) const {
  fragData.Clear ();
  equivTerms.erase(equivTerms.begin(), equivTerms.end());

  // get a list of term numbers
  vector<mg_u_long> equivNums;
  FindWordNumbers (indexData, term, stemMethod, equivNums);

  // get the information for each word and merge it with
  // previous results
  FragData tempFragData1;
  FragData tempFragData2;
  UCArray equivWord;
  vector<mg_u_long>::iterator here = equivNums.begin();
  vector<mg_u_long>::iterator end = equivNums.end();
  while (here != end) {
    // get the information for this word
    ReadTermFragData (indexData, needFragFreqs, *here,
		      tempFragData1, fragLimits, equivWord);
    equivTerms.push_back(equivWord);
    // combine with last results
    tempFragData2 = fragData;
    CombineFragData (needFragFreqs, tempFragData1, tempFragData2, fragData);
    
    ++here;
  }
}

void TermNode::Free () {
  Clear ();
}

void TermNode::Print (ostream &s, int indent) const {
  PrintIndent (s, indent);
  s << "TERM: \"" << term << "\"\n";
  PrintIndent (s, indent+2);
  s << "termWeight: " << termWeight << "\n";
  PrintIndent (s, indent+2);
  s << "stemMethod: " << stemMethod << "\n";
  PrintIndent (s, indent+2);
  s << "startRange: " << startRange << "\n";
  PrintIndent (s, indent+2);
  s << "endRange: " << endRange << "\n";
}


ProxMatchQueryNode::ProxMatchQueryNode () {
  tagNodePtr = NULL;
}

ProxMatchQueryNode::~ProxMatchQueryNode () {
  Free ();
}

void ProxMatchQueryNode::Calculate (IndexData &indexData,
				    const QueryInfo &queryInfo,
				    QueryResult &result) const {
  result.Clear ();

  bool needFragFreqs = (queryInfo.sortByRank || queryInfo.needRankInfo);

  // read in the tag if needed
  FragRangeArray fragLimits;
  FragRangeArray *fragLimitsPtr = NULL;
  if (tagNodePtr == NULL && terms.size() > 1) {
    // multiple terms must be compared relative to some tag
    // otherwise phrase matches could span documents
    TagNode tempTagNode;
    tempTagNode.tagName = indexData.curLevel;
    tempTagNode.Calculate (indexData, fragLimits);
    fragLimitsPtr = &fragLimits;
    
  } else if (tagNodePtr != NULL) {
    (*tagNodePtr).Calculate (indexData, fragLimits);
    fragLimitsPtr = &fragLimits;
  }

  UCArray tagOrLevel = indexData.curLevel;
  if (tagNodePtr != NULL) tagOrLevel = (*tagNodePtr).tagName;

  // read in the first term
  FragData termData;
  UCArrayVector equivTerms;
  TermNodeArray::const_iterator termHere=terms.begin(), termEnd = terms.end();
  if (termHere != termEnd) {
    (*termHere).Calculate (indexData, needFragFreqs, fragLimitsPtr, 
			   termData, equivTerms);

    // convert initial fragment information
    FragsToQueryResult (indexData,
			queryInfo,
			termData,
			tagOrLevel,
			(*termHere).term,
			(*termHere).stemMethod,
			(*termHere).termWeight,
			equivTerms,
			result);
  
    ++termHere;

    if (termHere == termEnd) return; // nothing more to do
  }

  // read and combine the rest of the terms
  FragData comTermData;
  while (termHere != termEnd) {
    (*termHere).Calculate (indexData, needFragFreqs,
			   fragLimitsPtr, comTermData, equivTerms);

    AndFragsToQueryResult (indexData,
			   queryInfo,
			   comTermData,
			   tagOrLevel,
			   (*termHere).term,
			   (*termHere).stemMethod,
			   (*termHere).termWeight,
			   equivTerms,
			   result);
    
    AndCombineFragData (needFragFreqs, termData, comTermData,
			(*termHere).startRange,
			(*termHere).endRange,
			fragLimitsPtr);
    ++termHere;
  }

  // remove unwanted document numbers
  RemoveUnwantedResults (indexData,
			 queryInfo,
			 termData,
			 result);
}

void ProxMatchQueryNode::Free () {
  if (tagNodePtr != NULL) {
    delete tagNodePtr;
    tagNodePtr = NULL;
  }
  terms.erase (terms.begin(), terms.end());
}

void ProxMatchQueryNode::Print (ostream &s, int indent) const {
  PrintIndentText (s, (char*)"PROXMATCH\n", indent);
  if (tagNodePtr != NULL) tagNodePtr->Print (s, indent+2);
  
  TermNodeArray::const_iterator here = terms.begin();
  TermNodeArray::const_iterator end = terms.end();
  while (here != end) {
    (*here).Print (s, indent+2);
    ++here;
  }
}


void BrowseQueryNode::Clear () {
  UCArrayClear(term);
}

void BrowseQueryNode::Calculate (IndexData &indexData, BrowseQueryResult &result) const {

  mg_u_long number=0;
  FindNearestWordNumber(indexData, term, number);
  if (number + startPosition > 0 ) {
    number = number+startPosition;
  }
  else {
    number = 1;
  }

  GetTermList (indexData, number, numTerms, result.termFreqs);

}



void BrowseQueryNode::Free () {
  Clear();
}


void BrowseQueryNode::Print (ostream &s, int indent) const {
  PrintIndentText(s, (char*)"BROWSEQUERYNODE\n", indent);
  PrintIndent (s, indent+2);
  s << "TERM:"<<term<<"\n";
  PrintIndent (s, indent+2);
  s << "Start position: "<< startPosition<<", Num terms: "<< numTerms<<"\n";


}



void MGQuery (IndexData &indexData,
	      const QueryInfo &queryInfo,
	      const QueryNode *queryTree,
	      QueryResult &result) {
  result.Clear ();

  // make sure level is current
  if (!indexData.LoadLevel (queryInfo.docLevel)) {
    return;
  }
  // do query
  if (queryTree == NULL) return;
  
  (*queryTree).Calculate (indexData, queryInfo, result);

  // make weights into ranks if needed
  mg_u_long i;
  if (queryInfo.sortByRank || queryInfo.needRankInfo) {
    for (i=0; i<result.ranks.size(); ++i) {
      result.ranks[i] /=
	indexData.weightData.GetLowerApproxDocWeight (result.docs[i]);
    }
  }
  
  mg_u_long resultsSize = queryInfo.maxDocs;
  if (resultsSize == 0 || resultsSize > result.docs.size())
    resultsSize = result.docs.size();

  result.actualNumDocs = result.docs.size(); // the total number of docs 
                                   //returned, before pruning based on maxDocs

  // sort results by rank if needed
  GTRank gtRank;
  if (queryInfo.sortByRank) {
    // need in ascending order for SelectAddHeap
    MakeHeap (result.docs.begin(), result.ranks.begin(),
	      resultsSize, gtRank);
    SelectAddHeap (result.docs.begin(), result.ranks.begin(), resultsSize,
		   result.docs.begin()+resultsSize,
		   result.ranks.begin()+resultsSize,
		   (mg_u_long) result.ranks.size()-resultsSize,
		   gtRank);

    // sort into descending order
    SortHeap (result.docs.begin(), result.ranks.begin(), resultsSize, gtRank);
  }
  
  // get exact weights if needed
  if (queryInfo.exactWeights &&
      (queryInfo.sortByRank || queryInfo.needRankInfo)) {
    mg_u_long exactDiskPtr =
      indexData.levels.levelInfo[indexData.curLevel].exactWeightsDiskPtr;
    
    for (i=0; i<resultsSize; ++i) {
      result.ranks[i] =  result.ranks[i] *
	indexData.weightData.GetLowerApproxDocWeight (result.docs[i]) /
	GetExactDocWeight (indexData.exactWeightsFile, exactDiskPtr,
			   result.docs[i]);
    }

    // re-sort top candidates based on exact weights
    if (queryInfo.sortByRank) {
      MakeHeap (result.docs.begin(), result.ranks.begin(),
		resultsSize, gtRank);
      SortHeap (result.docs.begin(), result.ranks.begin(),
		resultsSize, gtRank);
    }
  }

  // get rid of unwanted ranking results
  if (result.ranks.empty()) {
    // do nothing
  } else if (!queryInfo.needRankInfo) {
    result.ranks.erase(result.ranks.begin(), result.ranks.end());
  } else {
    result.ranks.erase(result.ranks.begin()+resultsSize, result.ranks.end());
  }

  // remove extra docs that are unwanted
  result.docs.erase(result.docs.begin()+resultsSize, result.docs.end());
}

// new MGQuery to retrieve doc and section nums
// this will return doc nums for the level queried at (set in queryInfo)
// in QueryResult.docs and if a second level is specified, 
// it will return corresponding docnums for that level in QueryResult.levels
// If there is no level specified, or that level is invalid, the query
// level is used
void MGQuery (IndexData &indexData,
	      const QueryInfo &queryInfo,
	      const QueryNode *queryTree,
	      ExtQueryResult &realresult, UCArray &level) {
  realresult.Clear ();
  QueryResult result; // temp result

  // do the normal query
  MGQuery (indexData, queryInfo, queryTree, result);

  // now that have the final result stuff, convert to ExtQueryResult,
  // add in level nums if needed

  realresult.docs = result.docs;
  realresult.ranks = result.ranks;
  realresult.termFreqs = result.termFreqs;
  realresult.actualNumDocs = result.actualNumDocs;

  if (queryInfo.docLevel == level || level.empty()) {
    realresult.levels = result.docs;
    return;
  }

  // else need to convert from queryInfo.docLevel to level

  // the original level info
  FragLevelConvert sectionLevelConverter = indexData.levelConverter;

  // the new level info
  if (!indexData.LoadLevel(level)) {
    realresult.levels = result.docs;
    return;
  }
  
  mg_u_long DocNum = 0;
  
  for (mg_u_long i=0; i<realresult.docs.size(); ++i) {

    // do an if ! here????
    indexData.levelConverter.LevelToLevel(sectionLevelConverter, realresult.docs[i], DocNum);
    realresult.levels.push_back(DocNum);
  }
  
}



// new function for full text browsing, 
void MGBrowseQuery (IndexData &indexData, UCArray &level,
		  const BrowseQueryNode &node,
		  BrowseQueryResult &result) {

  indexData.LoadLevel(level);
  node.Calculate(indexData, result); 

}
