/**************************************************************************
 *
 * mgpp_passes.cpp -- Driver for the various passes
 * Copyright (C) 1994  Neil Sharman
 * Copyright (C) 1999  Rodger McNab
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 **************************************************************************/

#define _XOPEN_SOURCE 1
// This was added for Solaris, but it makes things worse on Solaris for me...
// #define _XOPEN_SOURCE_EXTENDED 1

#if defined(WIN32) || defined(_WIN32) || defined(__WIN32__)
#pragma warning(disable:4786)
#endif

// need this to avoid bizarre compiler problems under VC++ 6.0
#if defined (__WIN32__) && !defined (GSDL_USE_IOS_H)
# include <iostream>
#endif

#include "sysfuncs.h"

#ifdef HAVE_MALLINFO
# include <malloc.h> 
#endif

#if defined __WIN32__
# include <io.h>
# include "getopt_old.h"
# define close _close
# define open _open
#elif defined __CYGWIN__
#include "getopt_old.h"
#else
# include <unistd.h>
#endif

#include "memlib.h"
#include "messages.h"
#include "longlong.h"
#include "mg_files.h"
#include "mg.h"
#include "build.h"
#include "text.h"
#include "stemmer.h"
#include "FileBuf.h"
#include "TextEl.h"
#include "TagInfo.h"
#include "words.h"
#define MAX_PASSES 5

#define SPECIAL 1
#define TEXT_PASS_1 2
#define TEXT_PASS_2 4
#define IVF_PASS_1 8
#define IVF_PASS_2 16

#define MIN_BUF 8192


mg_u_long invf_buffer_size = 5 * 1024 * 1024;	/* 5Mb */

static char Passes = 0;
static char **files = NULL;
static int num_files = 0;


struct pass_data {
  char *name;
  int (*init) (const TagInfo &tagInfo, char *);
  int (*process) (const TagInfo &tagInfo, const TextElArray &doc);
  int (*done) (const TagInfo &tagInfo, char *);
};


static pass_data PassData[MAX_PASSES] = {
  {(char*)"special", init_special, process_special, done_special},
  {(char*)"text.pass1", init_text_1, process_text_1, done_text_1},
  {(char*)"text.pass2", init_text_2, process_text_2, done_text_2},
  {(char*)"ivf.pass1", init_ivf_1, process_ivf_1, done_ivf_1},
  {(char*)"ivf.pass2", init_ivf_2, process_ivf_2, done_ivf_2},
};

static char *usage_str = (char*)"\nUSAGE:\n"
"  %s [-J doc-tag] [-K level-tag] [-L index-level]\n"
"  %*s [-m invf-memory] [-T1] [-T2] [-I1] [-I2] [-S]\n"
"  %*s [-C] [-h] [-d directory] [-M maxnumeric] -f name\n\n";



static void Usage (char *err) {
  if (err) Message (err);
  fprintf (stderr, usage_str, msg_prefix, strlen (msg_prefix), "",
	   strlen (msg_prefix), "");
  exit (1);
}




int OpenNextFile (int in_fd) {
  if (in_fd > 0) close (in_fd);
  if (num_files == 0) return (-1);
  if ((in_fd = open (files[0], O_RDONLY)) == -1)
    FatalError (1, "Cannot open %s", files[0]);
  ++files;
  --num_files;
  return (in_fd);
}


static void Driver (int in_fd, char *file_name,
		    const TagInfo &tagInfo, bool compatMode) {
  //  cout << tagInfo;

  int pass;

  mg_u_long numBytes = 0;
  mg_u_long numDocs = 0;

  // initialise all the passes
  for (pass = 0; pass < MAX_PASSES; ++pass) {
    if (Passes & (1 << pass)) {
      if (PassData[pass].init (tagInfo, file_name) == COMPERROR)
	FatalError (1, "Error during init of \"%s\"", PassData[pass].name);
    }
  }

  
  // set up various variables
  FileBuf buf;
  TextElArray doc;
  mg_u_long docLen = 0;

  // read and process each file (start with an open file)
  do {
    
    // read and process each document in this file
    buf.SetFD (in_fd);
    while (ReadDoc (buf, tagInfo.docTag, doc, docLen, compatMode)) {

      // give this document to each pass
      for (pass = 0; pass < MAX_PASSES; ++pass) {
	if (Passes & (1 << pass)) {
	  if (PassData[pass].process (tagInfo, doc) == COMPERROR)
	    FatalError(1,"Error during processing of \"%s\"",PassData[pass].name);
	}
      }
      
      // another document has been processed
      numBytes += docLen;
      ++numDocs;
    }
    
  } while ((in_fd = OpenNextFile (in_fd)) > 0);

  
  // do done for each pass
  for (pass = 0; pass < MAX_PASSES; ++pass) {
    if (Passes & (1 << pass)) {
      if (PassData[pass].done (tagInfo, file_name) == COMPERROR)
	FatalError (1, "Error during done of \"%s\"", PassData[pass].name);
    }
  }
}

int main (int argc, char **argv) {
  int ch, in_fd, maxnum;
  char *filename = NULL;
  bool compatMode = false;
  TagInfo tagInfo;
  tagInfo.SetDocTag ("Document");
  
  msg_prefix = argv[0];

  opterr = 0;
  while ((ch=getopt(argc, argv, "J:K:L:M:f:d:m:I:T:SCh"))!=-1){
    switch (ch) {
    case 'J':
      tagInfo.SetDocTag (optarg);
      break;
    case 'K':
      tagInfo.AddLevelTag (optarg);
      break;
    case 'L':
      tagInfo.SetIndexLevel (optarg);
      break;
    case 'M':
      maxnum = atoi(optarg);
      if (maxnum > 4 && maxnum < 512) {
	MAXNUMERIC = maxnum;
      }
      break;
    case 'f':
      filename = optarg;
      break;
    case 'd':
      set_basepath (optarg);
      break;
    case 'm':
      invf_buffer_size = (int) (atof (optarg) * 1024 * 1024);
      break;
    case 'I':
      if (*optarg == '1')
	Passes |= IVF_PASS_1;
      else if (*optarg == '2')
	Passes |= IVF_PASS_2;
      else
	Usage ((char*)"Invalid pass number");
      break;
    case 'T':
      if (*optarg == '1')
	Passes |= TEXT_PASS_1;
      else if (*optarg == '2')
	Passes |= TEXT_PASS_2;
      else
	Usage ((char*)"Invalid pass number");
      break;
    case 'S':
      Passes |= SPECIAL;
      break;
    case 'C':
      compatMode = true;
      break;
    case 'h':
    case '?':
      Usage (NULL);
    }
  }

  if (!filename || *filename == '\0')
    FatalError (1, "A document collection name must be specified.");

  if ((Passes & (IVF_PASS_1 | IVF_PASS_2)) == (IVF_PASS_1 | IVF_PASS_2))
    FatalError (1, "I1 and I2 cannot be done simultaneously.");

  if ((Passes & (TEXT_PASS_1 | TEXT_PASS_2)) == (TEXT_PASS_1 | TEXT_PASS_2))
    FatalError (1, "T1 and T2 cannot be done simultaneously.");

  if (!Passes)
    FatalError (1, "S, T1, T2, I1 or I2 must be specified.");

  if (optind < argc) {
    if ((in_fd = open (argv[optind], O_RDONLY)) == -1)
      FatalError (1, "Cannot open %s", argv[optind]);
    files = &argv[optind + 1];
    num_files = argc - (optind + 1);
    
  } else in_fd = 0;  // stdin

  
  if (compatMode) tagInfo.SetDocTag ("Document");

  // a document tag is also a level tag
  tagInfo.levelTags.insert (tagInfo.docTag); 
  
  Driver (in_fd, filename, tagInfo, compatMode);

  return (0);
}
