/**************************************************************************
 *
 * mgpp_invf_dump.cpp -- Program to dump uot an inverted fil
 * Copyright (C) 1994  Neil Sharman
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 **************************************************************************/

#define _XOPEN_SOURCE 1
// This was added for Solaris, but it makes things worse on Solaris for me...
// #define _XOPEN_SOURCE_EXTENDED 1

// need this to avoid bizarre compiler problems under VC++ 6.0
#if defined (__WIN32__) && !defined (GSDL_USE_IOS_H)
# include <iostream>
#endif

/* getopt is in posix.2, so cygwin should have it in unistd, but doesn't */
#if defined (__WIN32__) || defined (__CYGWIN__)
# include "getopt_old.h"
#else
# include <unistd.h>
#endif

#include "sysfuncs.h"
#include "messages.h"
#include "bitio_m_stdio.h"
#include "bitio_gen.h"
#include "netorder.h"  /* [RPAP - Jan 97: Endian Ordering] */

#include "mg_files.h"
#include "locallib.h"
#include "words.h"
#include "invf.h"
#include "WordData.h"

static void PrintInvfWord (FILE *invfFile,
			   invf_dict_header &idh,
			   invf_file_header &ifh,
			   word_dict_el &wordEl,
			   mg_u_long wordStart,
			   bool printFrags) {
  cout << wordEl.frag_occur << " \"" << wordEl.el << "\"\n";
  
  if (printFrags) {
    // seek to the appropriate place in the inverted file
    fseek (invfFile, wordStart, SEEK_SET);
    
    stdio_bitio_buffer buffer(invfFile);
    
    mg_u_long B = BIO_Bblock_Init (idh.num_frags, wordEl.frag_occur);
    mg_u_long fragNum = 0;
    mg_u_long i;
    for (i=0; i<wordEl.frag_occur; ++i) {
      mg_u_long delta = buffer.bblock_decode (B, NULL);
      fragNum += delta;
      cout << " " << fragNum;
      
      if (!ifh.word_level_index ) {
	mg_u_long count = buffer.gamma_decode (NULL);
	cout << "(" << count << ")";
      } else {
	cout << "(1)";
      }
    }
    
    cout << "\n";
  
    buffer.done();
  }
}

static void PrintInvfTag (FILE *invfFile,
			  invf_dict_header &idh,
			  invf_file_header &/*ifh*/,
			  dict_el &tagEl,
			  mg_u_long tagStart,
			  bool printFrags) {
  cout << tagEl.frag_occur << " \"<" << tagEl.el << ">\"\n";
  
  if (printFrags) {
    // seek to the appropriate place in the inverted file
    fseek (invfFile, tagStart, SEEK_SET);
    
    stdio_bitio_buffer buffer(invfFile);
    
    mg_u_long pTag = tagEl.frag_occur*2;
    mg_u_long B = BIO_Bblock_Init (idh.num_frags+pTag, pTag);
    mg_u_long fragNum = 0;
    mg_u_long i;
    for (i=0; i<tagEl.frag_occur; ++i) {
      mg_u_long delta = buffer.bblock_decode (B, NULL)-1;
      fragNum += delta;
      cout << " " << fragNum;
      cout << "-";
      delta = buffer.bblock_decode (B, NULL)-1;
      fragNum += delta;
      cout << fragNum;
    }
    
    cout << "\n";
    
    buffer.done();
  }
}

static void PrintHeaderInfo (invf_dict_header &idh,
			     invf_file_header &ifh) {
  cerr << "Lookback:         " << idh.lookback << "\n";
  cerr << "Word Dict Size:   " << idh.word_dict_size << "\n";
  cerr << "Tag Dict Size:    " << idh.tag_dict_size << "\n";
  cerr << "Num Documents:    " << idh.num_docs << "\n";
  cerr << "Num Fragments:    " << idh.num_frags << "\n";
  cerr << "Num Words:        " << idh.num_words << "\n";

  cerr << "Skip Mode:        " << ifh.skip_mode << "\n";
  cerr << "Word Level Index: " << ifh.word_level_index << "\n";

  cerr << "\n";
}


static void process_files (char *filename,
			   bool printHeader,
			   bool printWords,
			   bool printTags,
			   bool printFrags) {
  // open the dictionary
  FILE *dictFile = open_file (filename, INVF_DICT_SUFFIX, "rb",
			MAGIC_STEM_BUILD, MG_ABORT);
  invf_dict_header idh;
  idh.Read (dictFile);

  // open the inverted file
  FILE *invfFile = open_file (filename, INVF_SUFFIX, "rb",
			MAGIC_INVF, MG_ABORT);
  
  invf_file_header ifh;
  ifh.Read (invfFile);

  if (ifh.skip_mode != SKIP_MODE_NO_SKIPS)
    FatalError (1, "The invf file contains skips. Unable to dump.");

  // print out header information
  if (printHeader) {
    PrintHeaderInfo (idh, ifh);
  }

  // open the inverted index
  FILE *invfIdxFile = open_file (filename, INVF_IDX_SUFFIX, "rb",
				 MAGIC_INVI, MG_ABORT);

  // go to the start of the word dictionary
  fseek (dictFile, idh.word_dict_start, SEEK_SET);

  // process all the words
  if (printWords) {
    mg_u_long wordNum;
    mg_u_long wordStart;
    word_dict_el wordEl;
    wordEl.SetNumLevels (idh.num_levels);
    for (wordNum=0; wordNum<idh.word_dict_size; ++wordNum) {
      wordEl.Read (dictFile, idh.num_levels);
      ReadUL (invfIdxFile, wordStart);
      PrintInvfWord (invfFile, idh, ifh, wordEl, wordStart, printFrags);
    }
  }

  // process all the tags
  if (printTags) {
    mg_u_long tagNum;
    mg_u_long tagStart;
    dict_el tagEl;
    for (tagNum=0; tagNum<idh.tag_dict_size; ++tagNum) {
      tagEl.Read (dictFile);
      ReadUL (invfIdxFile, tagStart);
      PrintInvfTag (invfFile, idh, ifh, tagEl, tagStart, printFrags);
    }
  }
  // close the open files
  fclose (invfIdxFile);
  fclose (invfFile);
  fclose (dictFile);
}


int main (int argc, char **argv) {
  char *dir_name, *filename = (char*)"";
  int ch;
  msg_prefix = argv[0];
  dir_name = getenv ("MGDATA");
  opterr = 0;

  bool printHeader = false;
  bool printWords = false;
  bool printTags = false;
  bool printFrags = false;
  
  msg_prefix = argv[0];
  while ((ch = getopt (argc, argv, "hrwtnf:d:")) != -1) {
    switch (ch) {
    case 'f':		// input file
      filename = optarg;
      break;
    case 'd':
      set_basepath(optarg);
      break;
    case 'r':
      printHeader = true;
      break;
    case 'w':
      printWords = true;
      break;
    case 'n':
      printFrags = true;
      break;
    case 't':
      printTags = true;
      break;
    case 'h':
    case '?':
      fprintf (stderr, "usage: %s [-h] [-r] [-w] [-t] [-n] [-f input_file]"
	       "[-d data directory]\n(-rwnt:print header, words, tags, fragnums)\n", 
	       argv[0]);
      exit (1);
    }
  }
  
  process_files (filename, printHeader, printWords, printTags, printFrags);

  return 0;
}
