/**********************************************************************
 *
 * oaiaction.cpp --
 *
 * Copyright (C) 2004-2010  The New Zealand Digital Library Project
 *
 * A component of the Greenstone digital library software
 * from the New Zealand Digital Library Project at the
 * University of Waikato, New Zealand.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *********************************************************************/

#include "oaiaction.h"
#include "oaitools.h"
#include "recptprototools.h"

#if defined(GSDL_USE_IOS_H)
#  if defined(__WIN32__)
#    include <strstrea.h> // vc4
#  else
#    include <strstream.h>
#  endif
#else
#  include <sstream>
#endif

#include <time.h>

oaiaction::oaiaction(const text_t &name)
{
  this->logout = new ofstream("oai.log", ios::app);
  this->configuration = NULL;
  this->name = name;
  this->mEarliestDatestamp = "";
}

//----------------------------------------------------------------------------------------------

// Over-ridden by child classes
bool oaiaction::validateAction(recptproto *protocol, oaiargs &params)
{ 
  this->errorType = ""; 
  return true;
}

//----------------------------------------------------------------------------------------------

/**********
 * Compare the supplied metadataPrefix to all those that
 * are supported. If there is NO match, return true. If 
 * it DOES match one, return false.
 */
bool oaiaction::formatNotSupported(text_t &metaFormat)
{
  // is it in our list?
  if (this->configuration->getMetadataSet().count(metaFormat) == 0) return true;
  return false;
}

//----------------------------------------------------------------------------------------------

/**********
 * Function for outputting the appropriate error(s) with the (version 2.0) request.
 * The error(s) MUST be included in the response, and take the form:
 * <error code="errorType">Description of error</error>
 */
void oaiaction::output_error(ostream &output, text_t &errorType)
{
  text_t description = "";

  if(errorType == "badArgument"){
    description = "The request includes illegal arguments, is missing required arguments, repeats arguments or the value for an argument has an illegal syntax";
  }
  else if(errorType == "noRecordsMatch"){
    description = "No record matches all the requested parameters";
  }
  else if(errorType == "cannotDisseminateFormat"){
    description = "The metadata format specified is not supported by the item or by the repository";
  }
  else if(errorType == "idDoesNotExist"){
    description = "The value of the identifier is unknown or illegal in this repository";
  }
  else if(errorType == "badVerb"){
    description = "Value of the verb argument is illegal, missing, or repeated";
  }
  else if(errorType == "noMetadataFormats"){
    description = "There are no metadata formats available for the item";
  }
  else if(errorType == "badResumptionToken"){
    description = "The value of the resumptionToken argument is invalid or expired";
  }
  else if(errorType == "noSetHierarchy"){
    description = "The repository does not support sets";
  }

  output << "  <error code=\"" << errorType << "\">" << description << "</error>\n";
}

//----------------------------------------------------------------------------------------------

text_t oaiaction::getName()
{
  return this->name;
}

//----------------------------------------------------------------------------------------------

/**********
 * Used in version 2.0 to provide the <datestamp> tag for each document. The function is passed
 * the 'last modified' date of the file in epoch time (time in seconds since 1970-01-01 on Unix 
 * systems) and converts it to YYYY-MM-DD format. 
 */
text_t oaiaction::parseDatestamp(time_t &rawtime)
{
  text_t year, month, day, lastModified;
  tm *ptm;
  ptm = gmtime(&rawtime);
  int raw_month = ptm->tm_mon + 1;
  int raw_day = ptm->tm_mday;

  year = (ptm->tm_year + 1900); // Takes off 1900 for year by default, so replace it to get YYYY format

  // Need the month in MM format, so if month is 1..9, add a 0 to the front
  if(raw_month < 10){    
    month = "0";
    month += raw_month;
  }
  else month = raw_month;

  if(raw_day < 10){    
    day  = "0";
    day += raw_day;
  }
  else day = raw_day;
  
  lastModified = year + "-" + month + "-" + day;

  return lastModified;
}

//----------------------------------------------------------------------------------------------
/**********
 * Used by both versions to get the date & time of the client's request. The <responseDate> tag is
 * in the format YYYY-MM-DDTHH:MM:SSZ, where T is simply the letter 'T' and Z is the local offset from
 * GMT (now UTC). Examples of Z are +12:00 (NZ) or -03:00 for version 1.1. In version 2.0, the time
 * is expected to be in UTC, and Z should simply be the character 'Z'.
 */
void oaiaction::getResponseDate(text_t &date)
{
  time_t rawtime;
  tm    *ptm;

  time(&rawtime);         // Get the epoch time

  ptm = gmtime(&rawtime); // Convert to UTC-time formatted tm object

  text_t month, day, hour, minute, second;
  int    raw_month  = ptm->tm_mon + 1;  // Note Jan = 0 ... Dec = 11, so add 1
  int    raw_day    = ptm->tm_mday;
  int    raw_hour   = ptm->tm_hour;
  int    raw_minute = ptm->tm_min;
  int    raw_second = ptm->tm_sec;

  // Need the month in MM format, so if month is 1..9, add a 0 to the front
  if(raw_month < 10){    
    month = "0";
  }
  month += raw_month;
  
  // Same for days, hours, minutes and seconds
  if(raw_day < 10){    
    day = "0";
  }
  day += raw_day;

  if(raw_hour < 10){    
    hour = "0";
  }
  hour += raw_hour;

  if(raw_minute < 10){    
    minute = "0";
  }
  minute += raw_minute;
  
  if(raw_second < 10){    
    second = "0";
  }
  second += raw_second;
 
  // Want YYYY-MM-DDTHH:MM:SS+UTC, where UTC is num hours from UTC(GMT) to localtime
  date += (ptm->tm_year + 1900); // Takes off 1900 for year, so replace it to get YYYY format
  date += "-";
  date += month;
  date += "-";
  date += day;
  date += "T";
  date += hour;
  date += ":";
  date += minute;
  date += ":";
  date += second;
  // If we're using v1.1, then tack on local time offset, otherwise don't
  if(this->configuration->getOAIVersion() == 110){
    date += _LOCALTIME_; // Defined in oaiaction.h. States how many hours localtime is from 
                         // UTC (GMT), e.g. "+8:00", "-5:00"
  }
  else
    date += "Z";         // If v2.0, we put 'Z' on the end rather than the localtime offset
}

//----------------------------------------------------------------------------------------------
/**********
 * Does different request tags depending on the version of the OAI protocol running
 */
void oaiaction::getRequestURL(oaiargs &params, text_t &requestURL)
{
  // Iterators for moving through the list of parameters (keys) specified
  text_tmap::const_iterator here;
  text_tmap::const_iterator end;
  int numArgs = params.getSize();

  here = params.begin();
  end  = params.end();
  
  text_t baseURL = this->configuration->getBaseURL();

  int version = this->configuration->getOAIVersion();
  
  switch(version){
  case 110:
    /* Takes the form:
     * <requestURL>http://baseURL.com/oaimain?verb="someVerb"&amp;key=value</requestURL>
     */
    requestURL = "  <requestURL>" + baseURL;
    
    if(numArgs == 0) break; // If no args, all done - the error will be picked up later 
    
    // The following lines will give us the "label=value" syntax
    requestURL += "?";
    requestURL += here->first;
    requestURL += "=";
    requestURL += html_safe(here->second); // parse the argument to ensure it is URL-encoding compliant
    ++here;
    
    while(here != end){
      requestURL +="&amp;"; // Stick in the ampersand in URL encoding
      requestURL += (here->first + "=" + html_safe(here->second));
      ++here;
    }
    requestURL += "</requestURL>\n";
    break;
  
  case 200:
  default:
    /* Takes the form:
     * <request verb="someVerb" key="value" key="value">
     *          http://baseURL.com/oaimain</request>
     */
    if(numArgs == 0) {
      requestURL = "  <request>" + baseURL + "</request>\n";
      break;
    }
    requestURL = "  <request " + here->first + "=\"" + html_safe(here->second) + "\"";
    ++here;
    while(here != end){
      requestURL += (" " + here->first + "=\"" + html_safe(here->second) + "\"");      
      ++here;
    }
    requestURL += ">\n           " + baseURL + "</request>\n";
    break;
  }
}

//----------------------------------------------------------------------------------------------
/**********
 * Send the (OAI version-dependent) response text to the output stream
 */
void oaiaction::getResponse(ostream &output, recptproto *protocol, oaiargs &params)
{
  bool   error;
  text_t date, requestURL;

  // Write the response date & time into 'date'
  this->getResponseDate(date);
  int version = this->configuration->getOAIVersion();

  // validate the action
  error = !this->validateAction(protocol, params);

  // raise an error for duplicated arguments and set the
  // error type "manually" here...
  if (params.hasDuplicateArg() && !error) {
    this->errorType = "badArgument";
    error = true;
  }

  // start with the required http header
  if (version <= 110 && error){
    output << "Status: 400 " << this->errorType << "\n";
    output << "Content-Type: text/xml\n\n";
    return;
  }
  
  output << "Status: 200\n";
  output << "Content-Type: text/xml\n\n";
  
  // output xml header parts
  output << "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";

  if(version <= 110){
    // output OAI v1.1 action header tag
    output << "<" << this->name;
    output << "\n       xmlns=\"http://www.openarchives.org/OAI/1.1/OAI_" << this->name << "\" ";
    output << "\n       xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" ";
    output << "\n       xsi:schemaLocation=\"http://www.openarchives.org/OAI/1.1/OAI_" << this->name;
    output << "\n           http://www.openarchives.org/OAI/1.1/OAI_" << this->name << ".xsd\">\n";
  }
  else { 
    text_t baseDocRoot = this->configuration->getRelativeBaseDocRoot();
    output << "<?xml-stylesheet type=\"text/xsl\" href=\""<<baseDocRoot<<"/web/style/oai2.xsl\" ?>\n";
    // output OAI v2.0 action header tag
    output << "<OAI-PMH xmlns=\"http://www.openarchives.org/OAI/2.0/\"\n"
	   << "         xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
	   << "         xsi:schemaLocation=\"http://www.openarchives.org/OAI/2.0/\n"
	   << "             http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd\">\n";
  }
  // output current time for response
  output << "  <responseDate>" << date << "</responseDate>\n";
 
  // output request URL. This differs depending on the OAI protocol version currently running, so
  // the entire field - including tags - must be put into the text_t variable by getRequestURL()
  this->getRequestURL(params, requestURL);

  output << requestURL ;

  if (error == false) {
    // a string stream to write the content of the action to; this is done so that we can
    // avoid outputting the action tag if the action's body is unsuccessful, in which
    // case the leading tag must be suppressed
#if defined(GSDL_USE_IOS_H)
    ostrstream outstream;
#else
    ostringstream outstream;
#endif

    // Version 2.0 needs an <Identify>, etc. tag after the OAI-PMH header, IF there is no error
    //
    // An action that outputs no content should raise an error state to suppress the 
    // matching opening and close tags if it outputs no content in OAI 2.0
    error = !this->output_content(outstream, protocol, params);

    // output the leading tag if no error occurred
    if (error == false) {
      if (version >= 200) {
	this->output_action_tag(output, true);
      }
    }

    // now output the body of the action content
#if defined(GSDL_USE_IOS_H)
    outstream << ends;  // Ensure outstream is null-terminated correctly
#endif
    output << outstream.str();
  }
  else {
    if (version >= 200) {
      this->output_error(output, this->errorType);
    }
  }

  // close out our response - both versions need this line, but v2.0 only needs it if there was no error
  if((version == 110) || (version >= 200 && error == false)){
    this->output_action_tag(output, false);
  }
  if(version >= 200){
    output << "</OAI-PMH>\n";
  }
}

void oaiaction::output_action_tag(ostream &output, bool openTag)
{
  output << " <";
  if (!openTag) {
    output << "/";
  }
  output << this->name << ">" << endl;
}

void oaiaction::output_record_header(ostream &output, const text_t &oaiLabel, const text_t &lastModified, 
				     const text_t &deleted_status, const text_tarray &memberOf, int oaiVersion)
{
  if(deleted_status == "D") { // record marked as deleted
    output << "    <header status=\"deleted\">" << endl;
  } else { // deleted_status is "" or E for exists
    output << "    <header>" << endl;
  }
    output << "      <identifier>" << oaiLabel     << "</identifier>" << endl;
    output << "      <datestamp>"  << lastModified << "</datestamp>" << endl;
    
    text_t collection_id;
    // Find the collection id from oai:repos-id:collection:doc
    oaiclassifier::getCollectionFromOAIID(oaiLabel, collection_id);
    if(oaiVersion >= 200){

      // A doc is only a member of its collection set if hte collection is valid
      // Can have super collecitons containging collecitons that are not individually valid
      if (this->configuration->isValidCollection(collection_id)) {
	output << "      <setSpec>" << collection_id << "</setSpec>" << endl;
      }
      // are we part of a super collection?
      text_tarray super_colls = this->configuration->getSuperCollectionsForThisCollection(collection_id);
      if (super_colls.size()!=0) {
	text_tarray::const_iterator super_start = super_colls.begin();
	text_tarray::const_iterator super_end = super_colls.end();
	while(super_start != super_end) {
	  output << "      <setSpec>" << *super_start << "</setSpec>" << endl;
	  ++ super_start;
	}
      }
      
      // Do we have subsets?
      text_tarray::const_iterator member = memberOf.begin();
      text_tarray::const_iterator memEnd = memberOf.end();
      while (member != memEnd) {
	text_t oaiSet = *member;
	oaiclassifier::toOAI(collection_id, oaiSet);
	output << "      <setSpec>" << oaiSet << "</setSpec>" << endl;
	++member;
      }
    }
    output << "    </header>" << endl;
}

// Method that looks for the requested metaname in the doc_info, and if found, sets the value
// in the argument 'metavalue'.
bool oaiaction::getMeta(ResultDocInfo_t &doc_info, const text_t &metaname, text_t &metavalue)
{
  //ofstream         logout("oai.log", ios::app);
  //logout << "oaiaction::getMeta(): Looking for metaname = " << metaname << endl;

  // use map::find, rather than testing map["array-index"], as the latter will create that index and
  // insert an empty value into it, when we just want to test for it.
  // See http://www.cplusplus.com/reference/map/map/operator[]/
  // See also http://stackoverflow.com/questions/1939953/how-to-find-if-a-given-key-exists-in-a-c-stdmap

  if(doc_info.metadata.find(metaname) == doc_info.metadata.end()) {
    //logout << "\t\t\t Could not find meta " << metaname << endl;
    //logout.close();
    return false;
  } else { // found meta
    metavalue = doc_info.metadata[metaname].values[0];
    //logout << "\t\t\t Found value = " << metavalue << endl;
    //logout.close();
    return true;
  }
  
}

void oaiaction::getLastModifiedDate(ResultDocInfo_t &doc_info, text_t &lastModified)
{
  text_t temp;

  
  MetadataInfo_tmap::iterator current = doc_info.metadata.begin();
  MetadataInfo_tmap::iterator end = doc_info.metadata.end();

  while(current != end){
    temp = current->first;
    lc(temp); // lowercased for string comparison below
    if(temp == "gs.oaidatestamp" && current->second.values[0] != "") { // user specified a (non-empty) oaidatestamp as gs metadata
		// assume it is correct format
		lastModified = current->second.values[0];
        return;
    }
    else{ 
      if(lastModified == "" && current->second.values.size() >= 1) {

	if (temp == "oaiinf.timestamp") { // new way is to store oai timestamp in oai db and get it from there
	  // check if there was a timestamp for the doc in the etc/oai-inf database
	  lastModified = current->second.values[0];
	
	} else if (temp == "oailastmodified") { // old way, being phased out
	  // check if there was an oailastmodified timestamp for the doc in the collection index db
	  lastModified = current->second.values[0];
	}

	if(lastModified != "") { // if we've now set the lastModified value, convert it for display and return
	  time_t raw_time = (time_t)lastModified.getint();
	  lastModified = this->parseDatestamp(raw_time);
	
	  return;
	}
      }
    } // else keep looking for oai timestamp
    ++current;
  }  

}

bool oaiaction::inDateRange(const text_t &from, const text_t &until, const text_t &collection, 
			    const text_t &OID, recptproto *protocol, ostream &logout)
{
  FilterResponse_t response;
  text_tset        metadata;
  bool status_ok = get_oai_info(OID, collection, "", metadata, false, protocol, response, logout);
     // get timestamp from etc/oai-inf.<db> now, no longer from index.db
  bool not_too_early = false, not_too_recent = false;

  if(status_ok) {
    ResultDocInfo_t doc_info = response.docInfo[0];
    text_t lastModDate;
    this->getLastModifiedDate(doc_info, lastModDate);
    
    // All date fields should be in the form YYYY-MM-DD, which allows for a direct comparison
    if(from != ""){
      if(from <= lastModDate)
	not_too_early = true;
    }
    else 
      not_too_early = true; // If there's no FROM field, then the record can't be too early

    if(until != ""){
      if(lastModDate <= until)
	not_too_recent = true;
    }
    else 
      not_too_recent = true; // If there's no UNTIL field, then the record can't be too recent    
    
    if(not_too_early && not_too_recent)
      return true;
    else
      return false;
  }
  else
    return false;
}

// works out the earliest date/timestamp of the OAI repository,
// which compares and then selects the earliest timestamp value among all collections.
// In the past, the earliest date/timestamp used to be the unix epoch,
// later it became the earliest among each collection's builddate in build.cfg,
// and now, after implementing the OAI deletion policy, it needed to become the earliest
// among each collection's OAI_EARLIESTTIMESTAMP_OID record's timestamp value.

text_t oaiaction::calcEarliestDatestamp(recptproto *protocol, oaiargs &params) {
	
	text_t earliestDatestamp = ""; // do not set default to unix epoch time "1970-01-01" yet
	
	//text_t version = (this->configuration->getOAIVersion() <= 110) ? (text_t)"1.1":(text_t)"2.0";
	//if(version == "2.0"){
	
	// earliestDatestamp *should* be the YYYY-MM-DD format of the oldest lastmodified record in the
	// repository, but we're just setting it to be the default oldest possible date - ugly, but judged
	// not to be worth the effort of trolling through all the lastmodified dates (by others with more 
	// say than me)
	
	// The above was before. However, now we mirror GS3 way of dealing with 
	// earliestDatestamp by going through the earliestDatestamp field of each OAI 
	// collection's build.cfg in order to work out earliestdatestamp of this Repository:
	// by going through all the collections and getting the earliest among the
	// "earliestDatestamp" values stored for each collection in its build.cfg
	// (the earliestDatestamp for a collection has already been extracted from 
	// their build.cfg file at this point by collectserver::configure. The field
	// is declared in comtypes.h)
	
	// We've changed this again to grab the timestamp field of the OAI_EARLIESTTIMESTAMP_OID
	// record of each collection's oai-inf.db (representing the time that collection was
	// built for the very first time) and choosing the earliest among all of them.
	
	// Get a list of the OAI-enabled collections available
	text_tarray& collections = this->configuration->getCollectionsList();
	if (collections.size() > 0)
	{	
		// get the identifier from the params
		text_t identifier = params["identifier"];
		text_t oai_OID_prefix = "oai:"+this->configuration->getRepositoryId()+":";
		identifier.replace(oai_OID_prefix, "");

		// Get the current collection from the identifier
		text_t collection_name = "";
		oaiclassifier::toGSDL(collection_name, identifier);

		// Find the starting collection
		text_tarray::iterator collection_iterator = collections.begin();
		while (collection_iterator != collections.end())
		{
			if (collection_name == "" || collection_name == *collection_iterator)
			{
			  break;
			}

			collection_iterator++;
		}

		ofstream         logout("oai.log", ios::app);
		
		// Now loop through the remaining collections
		// to work out the earliest datestamp
		while (collection_iterator != collections.end())
		{
			collection_name = (*collection_iterator);
			
			text_tset        metadata;
			FilterResponse_t response;

			// get timestamps from etc/oai-inf.<db> now, no longer from build.cfg
			// request the special record with OID=OAI_EARLIESTTIMESTAMP_OID for the collection
			bool status_ok = get_oai_info(OAI_EARLIESTTIMESTAMP_OID, collection_name, "", metadata, false, protocol, response, logout);
			
			if(status_ok) {
			  ResultDocInfo_t doc_info = response.docInfo[0];
			  text_t collEarliestTimestamp;
			  // the timestamp we want lives in the "lastmodified" field of the OAI_EARLIESTTIMESTAMP_OID record
			  this->getLastModifiedDate(doc_info, collEarliestTimestamp);
			  
			  if(earliestDatestamp == "") { // first earliestdatestamp we've seen for an oai collection
			    earliestDatestamp = collEarliestTimestamp;
			  } else if(collEarliestTimestamp < earliestDatestamp) {
			    earliestDatestamp = collEarliestTimestamp;
			  }
			} /*else { // collection may not have oai-inf.db (yet), check build.cfg?
			    // No: no need to use the old way as fallback
			   
			ColInfoResponse_t cinfo;
			comerror_t err;	
			protocol->get_collectinfo(collection_name, cinfo, err, cerr);
			if (err == noError) {				
				text_t eDatestamp = cinfo.earliestDatestamp;
				time_t raw_time = (time_t)eDatestamp.getint();
				eDatestamp = this->parseDatestamp(raw_time);
				
				if(earliestDatestamp == "") { // first earliestdatestamp we've seen for an oai collection
					earliestDatestamp = eDatestamp;
				} else if(eDatestamp < earliestDatestamp) {
					earliestDatestamp = eDatestamp;
				}
			}
		      }
		      */
			collection_iterator++;
			
		}
		logout.close();
	}
		
	//}
	
	// if repository's earliestDatestamp is still unset, default to unix epoch time
	if(earliestDatestamp == "") { 		
		earliestDatestamp = "1970-01-01";
	}
	
	this->mEarliestDatestamp = earliestDatestamp;
	return mEarliestDatestamp;
}
