/**********************************************************************
 *
 * txt2db.cpp -- 
 * A component of the Greenstone digital library software
 * from the New Zealand Digital Library Project at the 
 * University of Waikato, New Zealand.
 *
 * Copyright (C) 1999  The New Zealand Digital Library Project
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 **********************************************************************/


#ifdef _MSC_VER
#include "autoconf.h"
#include "systems.h"
#include "gdbmconst.h"

extern "C" {
#include "gdbm.h"
}

#else
#include <gdbm.h>
#include <errno.h>
#endif

#include "gsdlconf.h"
#include "text_t.h"
#include <stdlib.h>
#include <stdio.h>
#include <cstring>

#if defined(GSDL_USE_OBJECTSPACE)
#  include <ospace\std\iostream>
#elif defined(GSDL_USE_IOS_H)
#  include <iostream.h>
#else
#  include <iostream>
#endif


void print_usage (char *program_name) {
  cerr << "usage: " << program_name << " [options] database-name" << endl << endl;
  cerr << "options:" << endl;
  cerr << " -append        append to existing database" << endl << endl;
}


int main (int argc, char *argv[]) {
  int block_size = 0;
  GDBM_FILE dbf;
  char c;
  text_t key;
  text_t value;
  text_t tmp;
  int num_dashes = 0;

  // sanity check
  if (argc != 2 && argc != 3) {
    print_usage (argv[0]);
    exit (0);
  }
  
  char *dbname;
  int append = 0;
  int delkey = 0;

  if (argc == 3) {
    if (strcmp (argv[1], "-append") == 0) {
      append = 1;
      dbname = argv[2];
    } else {
      cerr << argv[1] << " is not a valid option." << endl << endl;
      print_usage (argv[0]);
      exit (0);
    }
  } else dbname = argv[1];

  
  // open the database
  // note that GDBM_FAST is obsolete on newer versions of gdbm
  int read_write = GDBM_NEWDB | GDBM_FAST;
  if (append) read_write = GDBM_WRCREAT | GDBM_FAST;
  
#ifdef _MSC_VER
  dbf = gdbm_open (dbname, block_size, read_write, 00664, NULL, 1);
#else
  dbf = gdbm_open  (dbname, block_size, read_write, 00664, NULL);
#endif
  if (dbf == NULL) {
    cerr << argv[0] << ": Couldn't create/open '" << dbname << "'" << endl;
    cerr << "  " << gdbm_strerror(gdbm_errno) << endl;
    cerr << "  gdbm errno = " << gdbm_errno << endl;
#ifndef _MSC_VER
    cerr << "  OS errno = " << errno << endl;
    perror("gdbm_open failed: ");
#endif

    exit (-1);
  }
  
  cin.get(c);
  while (!cin.eof()) {
    num_dashes = 0;
    key = "";
    value = "";
    
    // Parse out 'key' from [key]\n

    // scan for first occurrence of [
    while (!cin.eof() && c != '[') cin.get(c);

    if (!cin.eof()) cin.get(c); // skip [

    // now look for closing ], building up 'key' as we go
    while (!cin.eof() && c != ']') {
      key.push_back ((unsigned char)c);
      cin.get(c);
    }

    if (!cin.eof()) {
      // most likely an eol char, but if '-', then signifies record
      // is to be deleted, not added
      cin.get(c); 
      if (c == '-') {	
	delkey = 1;
      }
      else {
	delkey = 0;
      }
    }
    while (!cin.eof() && (c == '\n' || c == '\r')) cin.get(c);
    
    // look for 70 dashes
    tmp = "";
    while (!cin.eof() && (num_dashes < 70)) {
      if (c == '\n') {
	tmp.push_back ((unsigned char)c);
	num_dashes = 0;	
	
      } else if (c == '\r') { 
	// Here we are able to process both Windows-specific text files 
	// (containing carriage-return, newline) and Linux text files 
	// (containing only newline characters) by ignoring the Windows' 
	// carriage-return altogether so that we produce a uniform database
	// file from either system's type of text file. 
	// If we don't ignore the carriage return here, txt.gz files
	// produced on Windows cause a GS library running on Linux to break.
	num_dashes = 0;

      } else if (c == '-') {
	tmp.push_back ((unsigned char)c);
	++num_dashes;
	
      } else {
	value += tmp;
	value.push_back ((unsigned char)c);
	tmp = "";
	num_dashes = 0;
      }
      cin.get(c);
    }
    
    // if the key is not an empty string store this key-value pair
    if (!key.empty()) {
      // convert key to a datum datatype
      datum key_data;
      key_data.dptr = key.getcstr();
      if (key_data.dptr == NULL) {
	cerr << "NULL key_data.dptr" << endl;
	exit (0);
      }
      key_data.dsize = strlen(key_data.dptr);

      if (delkey) {
	// delete the given key
	if (gdbm_delete(dbf, key_data) < 0) {
	  cerr << "gdbm_delete returned an error trying to delete key " << key << ": " << gdbm_strerror (gdbm_errno) <<endl;
	}
      }
      else {

	// add/append

	// convert value to a datum datatype
	datum value_data;
	value_data.dptr = value.getcstr();
	if (value_data.dptr == NULL) {
	  cerr << "NULL value_data.dptr" << endl;
	  exit (0);
	}
	value_data.dsize = strlen(value_data.dptr);
      
	// store the value
	if (gdbm_store (dbf, key_data, value_data, GDBM_REPLACE) < 0) {
	  cerr << "gdbm_store returned an error" << endl;
	  exit (0);
	}

	
	free(value_data.dptr);
      }

      free(key_data.dptr);
    }
  }
  
  gdbm_close (dbf);
  
  return 0;
}
