/**************************************************************************
 *
 * Terms.h -- Query related functions
 * Copyright (C) 1999  Rodger McNab
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 **************************************************************************/

#ifndef TERMS_H
#define TERMS_H

#include "IndexData.h"
#include "mglong.h"

#if defined(GSDL_USE_OBJECTSPACE)
#  include <ospace\std\iostream>
#elif defined(GSDL_USE_IOS_H)
#  include <iostream.h>
#else
#  include <iostream>
#endif


class QueryInfo {
public:
  // general query information
  UCArray docLevel;
  mg_u_long maxDocs; // 0 = all
  bool sortByRank;
  bool exactWeights;

  // information need to return
  bool needRankInfo;
  bool needTermFreqs;

  void Clear ();
  QueryInfo () { Clear (); }
};


class TermFreqData {
public:
  UCArray tag;  // level tag or query tag
  UCArray term; // unstemmed term
  int stemMethod;
  UCArrayVector equivTerms; // the stemmed and casefolded variants of the term
  mg_u_long matchDocs; // tf for level - num levels
			   // containing this term
  mg_u_long termFreq;  // overall term freq - num words that
				// are this term
  void Clear ();
  TermFreqData () { Clear (); }
};

ostream &operator<< (ostream &s, const TermFreqData &t);
bool operator== (const TermFreqData &t1, const TermFreqData &t2);

typedef vector<TermFreqData> TermFreqArray;


typedef vector<float> RankArray;
typedef vector<mg_u_long> DocNumArray;

class QueryResult {
public:
  DocNumArray docs;
  RankArray ranks; // used for accumulators during query
  
  TermFreqArray termFreqs;

  mg_u_long actualNumDocs;
  void Clear ();
  QueryResult ();
  void printShort(ostream &s);
};

ostream &operator<< (ostream &s, const QueryResult &r);
bool operator== (const QueryResult &r1, const QueryResult &r2);

typedef vector<mg_u_long> FragNumArray;
typedef vector<mg_u_long> FragFreqArray;

class FragData {
public:
  mg_u_long matchDocs;    // ft for level
  FragNumArray fragNums;
  FragFreqArray fragFreqs;

  void Clear ();
  FragData () { Clear (); }
};


class FragRange {
public:
  mg_u_long rangeStart;
  mg_u_long rangeEnd;

  void Clear () { rangeStart = rangeEnd = 0; }
  FragRange () { Clear (); }
};

typedef vector<FragRange> FragRangeArray;



void FindWordNumbers (IndexData &indexData,
		      const UCArray &term,
		      mg_u_long stemMethod,
		      vector<mg_u_long> &equivWords);

void ReadTermFragData (IndexData &indexData,
		       bool needFragFreqs,
		       mg_u_long termNum,
		       FragData &fragData,
		       FragRangeArray *fragLimits,
		       UCArray &termWord);

void CombineFragData (bool needFragFreqs,
		      const FragData &f1,
		      const FragData &f2,
		      FragData &outFragData);

// output will be in fragData (as this is an and operation)
void AndCombineFragData (bool needFragFreqs,
			 FragData &fragData,
			 const FragData &comFragData,
			 mg_s_long startRange,
			 mg_s_long endRange,
			 const FragRangeArray *fragLimits);

void FragsToQueryResult (IndexData &indexData,
			 const QueryInfo &queryInfo,
			 const FragData &termData,
			 const UCArray &tag,
			 const UCArray &term,
			 mg_u_long stemMethod,
			 mg_u_long termWeight,
			 UCArrayVector &equivTerms,
			 QueryResult &result);

void AndFragsToQueryResult (IndexData &indexData,
			    const QueryInfo &queryInfo,
			    const FragData &termData,
			    const UCArray &tag,
			    const UCArray &term,
			    mg_u_long stemMethod,
			    mg_u_long termWeight,
			    UCArrayVector &equivTerms,
			    QueryResult &result);

void RemoveUnwantedResults (IndexData &indexData,
			    const QueryInfo &queryInfo,
			    const FragData &termData,
			    QueryResult &result);

//-----------------------------------------------------------------
// new QueryResult class to handle retrieval of doc and level nums.
// Use this class with extended version of MGQuery

class ExtQueryResult : public QueryResult {
public:
  DocNumArray levels; // used for returning a different granularity, eg
  // search sections but return Document numbers, or search Documents, 
  // return Section numbers.
 
  void Clear ();
  ExtQueryResult ();
};

ostream &operator<< (ostream &s, const ExtQueryResult &r);
bool operator== (const ExtQueryResult &r1, const ExtQueryResult &r2);

//------------------------------------------------------------
// new functions to handle full text browse

class BrowseQueryResult {
 public:
  TermFreqArray termFreqs;
  void Clear();
  BrowseQueryResult ();

};


ostream &operator<< (ostream &s, const BrowseQueryResult &r);
bool operator== (const BrowseQueryResult &r1, const BrowseQueryResult &r2);

void FindNearestWordNumber (IndexData &indexData,
			    const UCArray &term,
			    mg_u_long &number);

void GetTermList(IndexData &indexData,
		 mg_u_long startTerm,
		 mg_u_long numTerms,
		 TermFreqArray &terms);

void GetTermList (IndexData &indexData,
		 mg_u_long startTerm,
		 mg_u_long numTerms,
		 UCArrayVector &terms);

#endif
