/**********************************************************************
 *
 * cgiutils.cpp -- general cgi utilities
 * Copyright (C) 1999  The New Zealand Digital Library Project
 *
 * A component of the Greenstone digital library software
 * from the New Zealand Digital Library Project at the
 * University of Waikato, New Zealand.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *********************************************************************/

#include "cgiutils.h"
#include "fileutil.h"
#include "gsdlunicode.h"
#include "fileutil.h"
#include "unitool.h" // in mg, for output_utf8_char
#include <cstdlib>
#include <time.h>

#if defined(GSDL_USE_OBJECTSPACE)
#  include <ospace\std\iostream>
#  include <ospace\std\fstream>
#elif defined(GSDL_USE_IOS_H)
#  include <iostream.h>
#  include <fstream.h>
#else
#  include <iostream>
#  include <fstream>
#endif

// set to false to undo security changes (url-encoding arguments)
static bool do_safe_cgi_args = false;

static unsigned short hexdigit (unsigned short c) {
  if (c >= '0' && c <= '9') return (c-'0');
  if (c >= 'a' && c <= 'f') return (c-'a'+10);
  if (c >= 'A' && c <= 'F') return (c-'A'+10);
  return c;
}


static void c2hex (unsigned short c, text_t &t) {
  t.clear();
  
  if (c >= 256) {
    t = "20"; // ' '
    return;
  }
  
  unsigned short o1, o2;
  
  o1 = (c/16) % 16;
  o2 = c % 16;
  if (o1 >= 10) o1 += 'a' - 10;
  else o1 += '0';
  if (o2 >= 10) o2 += 'a' - 10;
  else o2 += '0';
  
  t.push_back(o1);
  t.push_back(o2);
}

static text_t::iterator getline (text_t::iterator first, 
				 text_t::iterator last,
				 bool include_crlf) {
  while (first != last) {
    if (((first+1) != last) && (*first == 13) && (*(first+1) == 10)) {
      // found <CRLF>
      if (include_crlf) first += 2;
      break;
    }

    first++;
  }

  return first;
}

static void process_post_section (text_t &argname, text_t &argdata, text_t &filename, text_t &filedata,
				  text_t &filetype, bool &isfile, text_t &argstr,
				  fileupload_tmap &fileuploads, const text_t &gsdlhome) {

  if (!argname.empty()) {

    if (!isfile) {
      // argdata includes a trailing <CRLF> that we must remove
      if ((argdata.size() > 1) && (*(argdata.end()-2) == 13) && (*(argdata.end()-1) == 10)) {
	argdata.erase(argdata.end()-2, argdata.end());
      }
      if (!argstr.empty()) argstr += "&";
 
      // we need to convert arg to cgi safe variant - escape '&' and '%', '+', '=', turn space to +
      cgi_safe_post_arg(argdata);
      argstr += argname + "=" + argdata;

    } else if (!filename.empty()) {
      // filedata includes a trailing <CRLF> that we must remove
      if ((filedata.size() > 1) && (*(filedata.end()-2) == 13) && (*(filedata.end()-1) == 10)) {
	filedata.erase(filedata.end()-2, filedata.end());
      }

      // create tmp_name for storing the file on disk, using the current timestamp
      text_t tmp_name(time(NULL));
      tmp_name = filename_cat(gsdlhome, "tmp", tmp_name);

      char *tmp_name_c = tmp_name.getcstr();

      // write the file data to disk
      outconvertclass out;
      ofstream filestream(tmp_name_c, ios::out | ios::binary);
      filestream << out << filedata;
      filestream.close();
      delete tmp_name_c;

      // populate the fields of a fileupload_t and put it in the
      // fileuploads map
      fileupload_t fu;
      // note that filename currently may or may not include the path since
      // some browsers (e.g. IE) include the path while others
      // (e.g. mozilla) do not. we should probably remove the path from
      // this field here to get a consistent value across all browsers.
      text_t::iterator slash = findlastchar(filename.begin(), filename.end(), '\\');
      if (slash != filename.end()) {
	filename = substr(slash+1, filename.end());
      }
      fu.name = filename;
      fu.type = filetype;
      // size has yet to be implemented
      fu.size = filedata.size();

      fu.tmp_name = tmp_name;
      fileuploads[argname] = fu;
    }
  }
  isfile = false;
  argname.clear();
  argdata.clear();
  filename.clear();
  filedata.clear();
  filetype.clear();
}

// parse data obtained through a CGI POST request
text_t parse_post_data (text_t &content_type, text_t &raw_post_data, 
			fileupload_tmap &fileuploads, const text_t &gsdlhome) {

  text_t argstr;

  text_t::iterator content_type_begin = content_type.begin();
  text_t::iterator content_type_end = content_type.end();
  if (findword(content_type_begin, content_type_end, "multipart/form-data") == content_type_end) {
    // a simple post request
    return raw_post_data;

  } else {
    // multipart/form data - may contain one or more uploaded files

    /*
      content_type should look something like the following
        multipart/form-data; boundary=---------------------------7d411e1a50330
	
      while raw_post_data will be as follows
        -----------------------------7d43e73450330CRLF
	Content-Disposition: form-data; name="e"<CRLF>
	<CRLF>
	d-0testss--1-0-00---4----0--0-110--1en-Zz-1---10-about-0--00031-001utfZz-8-0<CRLF>
	-----------------------------7d43e73450330<CRLF>
	Content-Disposition: form-data; name="afile"; filename="C:\somedoc.doc"<CRLF>
	Content-Type: application/msword<CRLF>
	<CRLF>
	<Content of file><CRLF>
	
    */

    // first get the boundary from content-type
    text_t::iterator boundary_begin = findword(content_type_begin, content_type_end, "boundary=");
    if (boundary_begin+9 < content_type_end) 
      {
	// skip over "boundary=" part of string
	boundary_begin += 9;
      }
    else {
      // error
      cerr << "Error: malformed boundary? '" <<  content_type << "'" << endl;
      return "";
    }
    text_t boundary = substr(boundary_begin, getline(boundary_begin, content_type_end, false));
    int boundary_len = boundary.size();


    text_t argname, argdata, filename, filedata, filetype;
    bool isfile = false;
    text_t::iterator data_here = raw_post_data.begin();
    text_t::iterator data_end = raw_post_data.end();
    while (data_here != data_end) {
      
      // get the next available line (including the trailing <CRLF>
      text_t line = substr(data_here, getline(data_here, data_end, true));

      data_here += line.size();
      text_t::iterator line_begin = line.begin();
      text_t::iterator line_end = line.end();
      if (findword(line_begin, line_end, boundary) != line_end) {
	// we've found a boundary
	process_post_section(argname, argdata, filename, filedata, filetype,
			     isfile, argstr, fileuploads, gsdlhome);

      } else if (findword(line_begin, line_end, "Content-Disposition: form-data") != line_end) {
	// we've found the the beginning of a new section
	argname.clear();
	argdata.clear();

	// get the name of this piece of form data
	text_t::iterator it = findword(line_begin, line_end, "name=\"");
	if (it == line_end) break; // error - this shouldn't happen
	it = findchar(it, line_end, '"');
	if ((it != line_end) && (it+1 != line_end)) {
	  argname = substr(it+1, findchar(it+1, line_end, '"'));
	}

	// if this piece of form data contains filename="" it's a file
	// upload and needs to be treated special
	it = (findword(line_begin, line_end, "filename=\""));
	if (it != line_end) {
	  // we've found a file upload
	  isfile = true;
	  it = findchar(it, line_end, '"');
	  if ((it != line_end) && (it+1 != line_end)) {
	    filename = substr(it+1, findchar(it+1, line_end, '"'));
	  }

	  // the next line is the content-type of this section
	  line = substr(data_here, getline(data_here, data_end, true));
	  data_here += line.size();
	  line_begin = line.begin();
	  line_end = line.end();
	  it = (findword(line_begin, line_end, "Content-Type: "));
	  if (it != line_end) {
	    filetype = substr(it+14, getline(it, line_end, false));
	  }
	}

	// eat up the next line as it's just a <CRLF> on it's own
	data_here += 2;

      } else {
	if (isfile) filedata += line;
	else argdata += line;
      }

    }

    // process last section
    process_post_section(argname, argdata, filename, filedata, filetype,
			 isfile, argstr, fileuploads, gsdlhome);

    return argstr;
  }
}

// convert %xx and + to their appropriate equivalents
// IE 6.0 and later use "%u" followed by 4 hex digits... MS IIS extension!
// NOTE: this method is crap. It assumes the input encoding is utf-8. If it 
// actually was, then this returns utf-8, and needs to_uni on the 
// result to get it back to unicode. If the encoding wasn't utf-8, then the 
// output may be crap. Seems to work for 8 bit encodings.
// Really, this should be given the encoding, and should always return unicode. 
void decode_cgi_arg (text_t &argstr) {
  text_t::iterator in = argstr.begin();
  text_t::iterator out = in;
  text_t::iterator end = argstr.end();
  
  while (in != end) {
    if (*in == '+') *out = ' ';
    
    else if (*in == '%') {
      unsigned short c = '%';
      ++in;
      if (in != end) { // this is an encoding...
	if (*in == 'u') { // convert %uHHHH to unicode then current encoding
	  // this assumes a short int is at least 16 bits...
	  ++in;  
	  if (in != end)
	    c=hexdigit(*in++) << 12;
	  if (in != end)
	    c+=hexdigit(*in++) << 8;
	  if (in != end)
	    c+=hexdigit(*in++) << 4;
	  if (in != end)
	    c+=hexdigit(*in);
	  /* BAD!! The following assumes the interface is using utf-8. But
	     at this point we don't know what encoding we are using, unless
	     we can parse it out of the string we are currently decoding... */
	  text_t uni=" ";
	  uni[0]=c;
	  text_t utf8=to_utf8(uni);
	  int last_byte=utf8.size()-1;
	  for (int i=0;i<last_byte;++i)
	    *out++ = utf8[i];
	  c=utf8[last_byte];
	} else {  // convert %HH to hex value
	  c = hexdigit (*in);
	  ++in;
	  if (in != end && c < 16) { // sanity check on the previous character
	    c = c*16 + hexdigit (*in);
	  }
	}
      }
      *out = c;
    } else *out = *in;
    
    if (in != end) ++in;
    ++out;
  }
  
  // remove the excess characters
  argstr.erase (out, end);
  
}

//Need to escape special chars in post data so they don't interfere with arg parsing once its a get style string
void cgi_safe_post_arg(text_t &argstr) {

  text_t::iterator in = argstr.begin();
  text_t out = "";
  text_t::iterator end = argstr.end();
  
  while (in != end) {
    if (*in == '&') out += "%26";
    else if (*in == '%') out += "%2525";
    else if (*in == '+') out += "%2B";
    else if (*in == '=') out += "%3D";
    else if (*in == ' ') out += "+";
    else { // append whatever char is in *in, but as a char, not int
            //out += *in; // appends as int
      out.push_back(*in);
    }
    ++in;
  }
  
  argstr.erase (argstr.begin(), end);
  argstr += out;  
}



// Ensure dangerous tags and chars in cgi-args are URL encoded, to prevent obvious XSS attempts
// (e.g. c=<script>alert("hacked")</script>) and log poisoning (apache writes unrecognised URLs
// into log. If the user entered c=garbage <?php ...> in the URL, it gets written out into the
// apache log and that log file can be included in a local file inclusion (LFI) or 
// remote file include (RFI) attack.
// This function encodes <>, &, ", ', / which are scripting chars or chars which can be used to
// break out of an html/XML/javascript context.
void safe_cgi_arg (const text_t &key, text_t &argstr) {
  if(!do_safe_cgi_args) {
    return;
  }

  text_t::iterator in = argstr.begin();
  text_t out = "";
  text_t::iterator end = argstr.end();
  
  while (in != end) {
    if (*in == '<') out += "%3C";
    else if (*in == '>') out += "%3E";
    else if (*in == '&') out += "%26";
    else if (*in == '\"') out += "%22";
    else if (*in == '\'') out += "%27";
    //else if (*in == '/') out += "%2F"; //unfortunately URL-encoding / breaks subcollections, as this uses /
    else { // append whatever char is in *in, but as a char, not int
            //out += *in; // appends as int
      out.push_back(*in);
    }
    ++in;
  }
  
  argstr.erase (argstr.begin(), end);
  argstr += out;  
}


// given a list of characters (or "all") to decode, and given the string, str, where those 
// characters are to be decoded, this method replaces any occurrences of the url-encoded 
// variants of those characters with their actual characters in the given string str.
void unsafe_cgi_arg(const text_t &chars, text_t &str) {
  if(!do_safe_cgi_args) {
    return;
  }

  text_t allchars = "<>&\"\'/";

  text_t chars_to_decode = (chars == "all" || chars == "ALL") ? allchars : chars;

  text_t::iterator in = chars_to_decode.begin();
  text_t::iterator end = chars_to_decode.end();

  char hex_char[4];

  // using sprint to urlencode a character. See http://www.programmingforums.org/thread15443.html

  while (in != end) { 
    
    // *in is a character from the accepted list of chars_to_decode list
    
    // 1. create the url-encoded value of the char *in in variable hex_char
    // sprintf adds in a null byte at the end
    sprintf(hex_char,"%%%02X",*in);
    
    // 2. Need the actual char to be decoded as a text_t string, so we can do a string replace with it
    text_t tmp = "";
    tmp.push_back(*in);
    
    // 3. replaces occurrences of hex_char (the url_encoded version of the char *in) in str with its decoded version    
    str.replace(hex_char, tmp);

    ++in;
  }  
}


// split up the cgi arguments
void split_cgi_args (const cgiargsinfoclass &argsinfo, text_t argstr, 
		     cgiargsclass &args) {
  args.clear();
  
  text_t::const_iterator here = argstr.begin();
  text_t::const_iterator end = argstr.end();
  // get seems to be not unicode, while post is, so don't want to just assume encoding is 1 (not unicode)
  unsigned short args_encoding = argstr.getencoding();

  text_t key, value;
 
  // extract out the key=value pairs
  while (here != end) {
    // get the next key and value pair
    here = getdelimitstr (here, end, '=', key);
    here = getdelimitstr (here, end, '&', value);
    
    // convert %xx and + to their appropriate equivalents
    decode_cgi_arg (value);

    safe_cgi_arg(key, value); // mitigate obvious cross-site scripting hacks in URL cgi-params

    value.setencoding(args_encoding); //1 // other encoding
    // store this key=value pair
    if (!key.empty()) {

      // if arg occurs multiple times (as is the case with multiple
      // checkboxes using the same name) we'll create a comma separated
      // list of all the values (this uses a hack that encodes naturally
      // occurring commas as %2C - values will therefore need to be decoded
      // again before use) - it should use an array instead
      const cgiarginfo *info = argsinfo.getarginfo (key);
      if (info==NULL) {
	// If info is NULL, we can't tell if the arg is multiple value or not
	// Because we need to have dynamically named arguments multivalued, we
	// will always assume multiplevalue = true
	// If the arg is not multi valued, then you need to decode the commas.
	if (args.getarg(key)==NULL) {
	  // encode_commas returns a text_t without encoding bit set
	  text_t newvalue = encode_commas(value);
	  newvalue.setencoding(args_encoding);
	  args.setarg (key, newvalue, cgiarg_t::cgi_arg);
	}
	else {
	  text_t newvalue = args[key];

	  newvalue += "," + encode_commas(value);
	  newvalue.setencoding(args_encoding); // other encoding
	  args.setarg (key, newvalue, cgiarg_t::cgi_arg);
	}
      }
      else {
	if (info->multiplevalue) {

	  text_t newvalue = args[key];
	  if (args.lookupcgiarg(key).source == cgiarg_t::cgi_arg) newvalue += ",";
	  newvalue += encode_commas(value);
	  newvalue.setencoding(args_encoding); // other encoding
	  args.setarg (key, newvalue, cgiarg_t::cgi_arg);

	} else {
	  args.setarg (key, value, cgiarg_t::cgi_arg);
	}
      }
    }
  }
}

text_t encode_commas (const text_t &intext) {

  text_t outtext;

  text_t::const_iterator here = intext.begin ();
  text_t::const_iterator end = intext.end ();

  while (here != end) {
    if (*here == ',') outtext += "%2C";
    else outtext.push_back (*here);
    ++here;
  }
  return outtext;
}

text_t decode_commas (const text_t &intext) {

  text_t outtext;
  
  text_t::const_iterator here = intext.begin ();
  text_t::const_iterator end = intext.end ();
  
  // for loop
  int intext_len = intext.size();
  for(int i = 0; i < intext_len; i++) {
	  if ((i+2)<intext_len) {
		  if(intext[i] == '%' && intext[i+1] == '2'
			&& (intext[i+2] == 'C' || intext[i+2] == 'c')) {
				i += 2;
				outtext.push_back(',');
				continue;
		  }
	  }
	  outtext.push_back (intext[i]);
   }
  return outtext; 
}

// set utf8 to true if input is in utf-8, otherwise expects input in unicode
text_t minus_safe (const text_t &intext, bool utf8) {

  text_t outtext;

  text_t::const_iterator here = intext.begin ();
  text_t::const_iterator end = intext.end ();

  while (here != end) {
    if (*here == '-') outtext += "Zz-";
    else outtext.push_back (*here);
    ++here;
  }
  if (utf8) {
    outtext = cgi_safe_utf8 (outtext);
  } else {
    outtext = cgi_safe_unicode (outtext);
  }
  return outtext;
}

// takes utf-8 input
text_t cgi_safe_utf8 (const text_t &intext) {
  text_t outtext;
  
  text_t::const_iterator here = intext.begin ();
  text_t::const_iterator end = intext.end ();
  unsigned short c;
  text_t ttmp;
  
  while (here != end) {
    c = *here;
    if (((c >= 'a') && (c <= 'z')) ||
	((c >= 'A') && (c <= 'Z')) ||
	((c >= '0') && (c <= '9')) ||
	(c == '%') || (c == '-')) {
      // alphanumeric character
      outtext.push_back(c);
    } else if (c == ' ') {
      // space
      outtext.push_back('+');
    } else if (c > 255) { // not utf-8 character
      cerr << "WARNING: expected utf-8 char, but got unicode!!\n";
    } else {
      // everything else
      outtext.push_back('%');
      c2hex(c, ttmp);
      outtext += ttmp;
    }
    
    ++here;
  }
  
  return outtext;
}
// takes unicode input
text_t cgi_safe_unicode (const text_t &intext) {
  text_t outtext;
  
  text_t::const_iterator here = intext.begin ();
  text_t::const_iterator end = intext.end ();
  unsigned short c;
  text_t ttmp;
  
  while (here != end) {
    c = *here;
    if (((c >= 'a') && (c <= 'z')) ||
	((c >= 'A') && (c <= 'Z')) ||
	((c >= '0') && (c <= '9')) ||
	(c == '%') || (c == '-')) {
      // alphanumeric character
      outtext.push_back(c);
    } else if (c == ' ') {
      // space
      outtext.push_back('+');
    } else if (c > 127) { // unicode character
      unsigned char buf[3]; // up to 3 bytes
      buf[0]='\0';buf[1]='\0';buf[2]='\0';
      output_utf8_char(c,buf, buf+2);
      outtext.push_back('%');
      c2hex(buf[0], ttmp);
      outtext += ttmp;
      outtext.push_back('%');
      c2hex(buf[1], ttmp);
      outtext += ttmp;
      if (buf[2]) {
      outtext.push_back('%');
      c2hex(buf[2], ttmp);
      outtext += ttmp;
      }
    } else {
      // everything else
      outtext.push_back('%');
      c2hex(c, ttmp);
      outtext += ttmp;
    }
    
    ++here;
  }
  
  return outtext;
}




static text_t::const_iterator get_next_save_arg (text_t::const_iterator first, 
					   text_t::const_iterator last,
					   text_t &argname) {
  first = getdelimitstr (first, last, '-', argname);
  return first;
}


// check_save_conf_str checks the configuration string for
// the saved args and makes sure it does not conflict with
// the information about the arguments. If an error is encountered
// it will return false and the program should not produce any
// output.
bool check_save_conf_str (const text_t &saveconf, 
			  const cgiargsinfoclass &argsinfo,
			  ostream &logout) {
  outconvertclass text_t2ascii;

  text_tset argsset;
  text_t::const_iterator saveconfhere = saveconf.begin ();
  text_t::const_iterator saveconfend = saveconf.end ();
  text_t argname;
  const cgiarginfo *info;

  // first check to make sure all saved arguments can be saved
  
  while (saveconfhere != saveconfend) {
    saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);

    if (!argname.empty()) {
      // save the argument name for later
      argsset.insert (argname);

      // check the argument
      info =  argsinfo.getarginfo (argname);
      if (info == NULL) {
	logout << text_t2ascii << "Error: the cgi argument \"" << argname 
	       << "\" is used in the configuration string for the\n"
	       << "saved arguments but does not exist as a valid argument.\n\n";
	return false;
      }
      if (info->savedarginfo == cgiarginfo::mustnot) {
	logout << text_t2ascii << "Error: the cgi argument \"" << argname
	       << "\" is used in the configuration string for the\n"
	       << "saved arguments but has been specified as an argument whose\n"
	       << "state must not be saved.\n\n";
	return false;
      }
    }
  }


  // next check that all saved arguments that should be saved
  // are saved
  cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
  cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();

  while (argsinfohere != argsinfoend) {
    if (((*argsinfohere).second.savedarginfo == cgiarginfo::must) &&
	(argsset.find((*argsinfohere).second.shortname) == argsset.end())) {
      logout << text_t2ascii << "Error: the cgi argument \"" 
	     << (*argsinfohere).second.shortname << "\" was specified as needing to\n"
	     << "be save but was not listed in the saved arguments.\n\n";
      return false;
    }

    ++argsinfohere;
  }
  
  return true; // made it, no clashes
}


// create_save_conf_str will create a configuration string
// based on the information in argsinfo. This method of configuration
// is not recomended as small changes can produce large changes in
// the resulting configuration string (for instance a totally different
// ordering). Only arguments which "must" be saved are included in
// the resulting string.
text_t create_save_conf_str (const cgiargsinfoclass &argsinfo,
			     ostream &/*logout*/) {
  cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
  cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
  text_t saveconf;
  bool first = true;

  while (argsinfohere != argsinfoend) {
    // save this argument if it must be saved
    if ((*argsinfohere).second.savedarginfo == cgiarginfo::must) {
      if (!first) saveconf.push_back ('-');
      else first = false;
      saveconf += (*argsinfohere).second.shortname;
    }

    ++argsinfohere;
  }
  
  return saveconf;
}


// expand_save_args will expand the saved arguments based
// on saveconf placing the results in args if they are not
// already defined. If it encounters an error it will return false
// and output more information to logout.
bool expand_save_args (const cgiargsinfoclass &argsinfo,
		       const text_t &saveconf, 
		       cgiargsclass &args,
		       ostream &logout) {
  outconvertclass text_t2ascii;

  text_t *arg_e = args.getarg("e");
  if (arg_e == NULL) return true; // no compressed arguments
  if (arg_e->empty()) return true; // no compressed arguments

  text_t argname, argvalue;
  const cgiarginfo *argnameinfo;
  
  text_t::const_iterator saveconfhere = saveconf.begin();
  text_t::const_iterator saveconfend = saveconf.end();
  
  text_t::iterator arg_ebegin = arg_e->begin();
  text_t::iterator arg_eend = arg_e->end();
  text_t::iterator arg_ehere = arg_ebegin;
  while (saveconfhere != saveconfend && arg_ehere != arg_eend) {
    saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);

    if (!argname.empty()) {
      // found another entry
      argnameinfo = argsinfo.getarginfo (argname);

      if (argnameinfo == NULL) {
	// no information about the argument could be found
	// we can't keep going because we don't know whether
	// this argument is a single or multiple character value
	logout << text_t2ascii << "Error: the cgi argument \"" << argname
	       << "\" was specified as being a compressed argument\n"
	       << "but no information about it could be found within the "
	       << "cgiargsinfoclass.\n";
	return false;

      } else {

	// found the argument information
	if (argnameinfo->multiplechar) {
	  text_t::const_iterator sav = arg_ehere;
	  arg_ehere = getdelimitstr (arg_ehere, arg_eend, '-', argvalue);
	  if (distance(arg_ebegin, arg_ehere) > 2) {
	    // replace any '-' chars escaped with 'Zz'
	    bool first = true;
	    while ((*(arg_ehere-3) == 'Z') && (*(arg_ehere-2) == 'z')) {
	      if (first) argvalue.clear();

	      // Hey, here's a wild idea. Why don't we check that there is
	      // another hyphen in the cgiarge before we get a pointer to it and
	      // add one. That way we are far less likely to wander off into
	      // random memory merrily parsing arguments that are then lovingly
	      // spewed all over the HTML page returned at the usage logs.
	      text_t::iterator minus_itr = findchar (arg_ehere, arg_eend, '-');
	      if (minus_itr == arg_eend)
	      {
		logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" was specified as being a compressed argument but we have run out of cgiarge to decompress!\n";
		return false;              
	      }
	      arg_ehere = minus_itr + 1;

	      while (sav != (arg_ehere-1)) {
		if (!((*sav == 'Z') && (*(sav+1) == 'z') && (*(sav+2) == '-')) &&
		    !((*(sav-1) == 'Z') && (*sav == 'z') && (*(sav+1) == '-'))) argvalue.push_back (*sav);
		++sav;
	      }
	      first = false;
	    }
	  }
	  argvalue.setencoding(1); // other encoding
	  if (!argvalue.empty()) args.setdefaultarg (argname, argvalue, cgiarg_t::compressed_arg);
	} else {
	  args.setdefaultcarg (argname,*arg_ehere, cgiarg_t::compressed_arg);
	  ++arg_ehere;
	}
      }
    }
  }

  return true;
}


// adds the default values for those arguments which have not
// been specified
void add_default_args (const cgiargsinfoclass &argsinfo,
		       cgiargsclass &args,
		       ostream &/*logout*/) {
  cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
  cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();

  while (argsinfohere != argsinfoend) {
    if ((*argsinfohere).second.defaultstatus != cgiarginfo::none) {
      args.setdefaultarg ((*argsinfohere).second.shortname,
			  (*argsinfohere).second.argdefault, cgiarg_t::default_arg);
    }
    ++argsinfohere;
  }
}

void add_fileupload_args (const cgiargsinfoclass &argsinfo,
			  cgiargsclass &args,
			  fileupload_tmap &fileuploads,
			  ostream &logout) {
 
  const cgiarginfo *info = argsinfo.getarginfo("a");
  fileupload_tmap::const_iterator this_file = fileuploads.begin();
  fileupload_tmap::const_iterator end_file = fileuploads.end();
  while (this_file != end_file) {
    const cgiarginfo *info = argsinfo.getarginfo((*this_file).first);
    if (info != NULL) {

      if ((*info).fileupload && (file_exists((*this_file).second.tmp_name))) {

	args.setargfile((*this_file).first, (*this_file).second);
      }
    }
    this_file++;
  }
}

// compress_save_args will compress the arguments and return
// them in compressed_args. If an error was encountered
// compressed_args will be set to to "", an error will be
// written to logout, and the function will return false.
bool compress_save_args (const cgiargsinfoclass &argsinfo,
			 const text_t &saveconf,
			 cgiargsclass &args,
			 text_t &compressed_args,
			 outconvertclass &outconvert,
			 ostream &logout) {
  outconvertclass text_t2ascii;

  compressed_args.clear();

  text_t argname, argvalue;
  const cgiarginfo *argnameinfo;
  
  text_t::const_iterator saveconfhere = saveconf.begin();
  text_t::const_iterator saveconfend = saveconf.end();
  
  while (saveconfhere != saveconfend) {
    saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);

    if (!argname.empty()) {
      // found another entry
      argnameinfo = argsinfo.getarginfo (argname);

      if (argnameinfo == NULL) {
	// no information about the argument could be found
	// we can't keep going because we don't know whether
	// this argument is a single or multiple character value
	logout << text_t2ascii << "Error: the cgi argument \"" << argname
	       << "\" was specified as being a compressed argument\n"
	       << "but no information about it could be found within the "
	       << "cgiargsinfoclass.\n";
	compressed_args.clear();
	return false;

      } else {
	// found the argument information
	if (argnameinfo->multiplechar) {
	  // multiple character argument -- sort out any '-' chars
	  if (args["w"]=="utf-16be") // browsers don't like \0 in urls...
	    compressed_args += minus_safe (args[argname], false);
	  else
	    compressed_args += minus_safe (outconvert.convert(args[argname]), true);
	  
	  if (saveconfhere != saveconfend) compressed_args.push_back ('-');

	} else {
	  // single character argument
	  if (args[argname].size() == 0) {
	    logout << text_t2ascii << "Error: the cgi argument \"" << argname
		   << "\" was specified as being a compressed argument which\n"
		   << "should have a one character value but it was empty.\n\n";
	    compressed_args.clear ();
	    return false;

	  } else if (args[argname].size() > 1) {
	    logout << text_t2ascii << "Error: the cgi argument \"" << argname
		   << "\" was specified as being a compressed argument which\n"
		   << "should have a one character value but it had multiple characters.\n\n";
	    compressed_args.clear ();
	    return false;
	  }

	  // everything is ok
	  compressed_args += args[argname];
	}
      }
    }
  }

  return true;
}


// args_tounicode converts any arguments which are not in unicode
// to unicode using inconvert
void args_tounicode (cgiargsclass &args, inconvertclass &inconvert) {
  cgiargsclass::iterator here = args.begin();
  cgiargsclass::iterator end = args.end();

  while (here != end) {
    if ((*here).second.value.getencoding() > 0) {
      // Call reset() before converting each argument, to prevent problems when converting the last
      //   argument left the converter in a bad state
      inconvert.reset();
      (*here).second.value = inconvert.convert((*here).second.value);
    }
    
    ++here;
  }
}

// fcgienv will be loaded with environment name-value pairs
// if using fastcgi (had to do this as getenv doesn't work
// with our implementation of fastcgi). if fcgienv is empty
// we'll simply use getenv
text_t gsdl_getenv (const text_t &name, text_tmap &fcgienv) {
  if (fcgienv.empty()) {
    char *n = name.getcstr();
    char *v = getenv(n);
    delete []n;
    if (v != NULL) return v;
    return g_EmptyText;

  } else return fcgienv[name];
}
