/**********************************************************************
 *
 * securitytools.cpp -- a C++ port of the required functions from the OWASP ESAPI for Java
 * Copyright (C) 2014  The New Zealand Digital Library Project
 *
 * A component of the Greenstone digital library software
 * from the New Zealand Digital Library Project at the
 * University of Waikato, New Zealand.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *********************************************************************/
/*
 * https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet
 * http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/classesapi_1_1_encoder.html
 * http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/classesapi_1_1_default_encoder.html
 * http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/_default_encoder_8cpp_source.html
 * 
 * The OWASP-ESAPI for C++'s online API is outdated/different to the actual method definitions in the
 * downloaded version of the code at runtime-src/packages/security/installed/include
 * Further, most of the necessary methods in the ESAPI-for-C++ have not been implemented yet.
 * The ESAPI-for-C, whose code is more complete, doesn't seem to have the same structure as the Java version.
 *
 * As a consequence, this file now contains custom Greenstone C++ functions that port the Java versions of the 
 * required methods from the OWASP for Java API at http://code.google.com/p/owasp-esapi-java/. 
 * In particular the codecs and the DefaultEncoder at 
 * http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/#esapi%2Fcodecs
 * http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/#esapi%2Freference%253Fstate%253Dclosed
*/

//http://www.cplusplus.com/reference/cctype/isalnum/
#include <ctype.h>
#include <stdio.h>
#include "securitytools.h"

static bool security_on = true;

// function prototype - unused. Using ctype::isalphanum(int) instead
bool isAlphaNumeric(const unsigned short c);

/*  
    Rule 5 of the OWASP XSS cheat sheet states: 
    https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet#RULE_.235_-_URL_Escape_Before_Inserting_Untrusted_Data_into_HTML_URL_Parameter_Values
    
    WARNING: Do not encode complete or relative URLs with URL encoding! If untrusted input is meant to be placed into 
    href, src or other URL-based attributes, it should be validated to make sure it does not point to an unexpected 
    protocol, especially Javascript links. URLs should then be encoded based on the context of display like any other
    piece of data. For example, user driven URLs in HREF links should be attribute encoded. For example:

    String userURL = request.getParameter( "userURL" )
    boolean isValidURL = ESAPI.validator().isValidInput("URLContext", userURL, "URL", 255, false); 
    if (isValidURL) {  
    <a href="<%=encoder.encodeForHTMLAttribute(userURL)%>">link</a>
    }

    The following function is simpler than theirs as it only looks for any "javascript:" in the string. If present, it 
    returns false and the string should get URL encoded. Otherwise, the protocol is assumed to be valid and it returns true.
*/
bool isValidURLProtocol(const text_t& url) {
  text_t::const_iterator here = url.begin();
  text_t::const_iterator end = url.end();

  if(findword(here, end, "javascript:") != end) {
    return false;
  }
  return true;
}


text_t encodeForHTMLAttr(const text_t& in, const text_t& immuneChars) {
  text_t out;
  text_t::const_iterator here = in.begin();
  text_t::const_iterator end = in.end();
  while (here != end) {
    out += encodeForHTML(immuneChars, *here); // IMMUNE_HTMLATTR by default
    ++here;
  }
  return out;
}
  
text_t encodeForHTML(const text_t& in, const text_t& immuneChars) {
  text_t out;
  text_t::const_iterator here = in.begin();
  text_t::const_iterator end = in.end();
  while (here != end) {
    out += encodeForHTML(immuneChars, *here); // IMMUNE_HTML by default
    ++here;
  }
  return out;
}

text_t encodeForCSS(const text_t& in, const text_t& immuneChars) {
  text_t out;
  text_t::const_iterator here = in.begin();
  text_t::const_iterator end = in.end();
  while (here != end) {
    out += encodeForCSS(immuneChars, *here); // IMMUNE_CSS by default
    ++here;
  }
  return out;
}


text_t encodeForURL(const text_t& in, const text_t& immuneChars) {
  text_t out;
  text_t::const_iterator here = in.begin();
  text_t::const_iterator end = in.end();
  while (here != end) {
    out += encodeForURL(immuneChars, *here); // IMMUNE_URL by default
    ++here;
  }
  return out;
}

text_t encodeForJavascript(const text_t& in, const text_t& immuneChars, bool dmsafe) {
  text_t out;
  text_t::const_iterator here = in.begin();
  text_t::const_iterator end = in.end();
  while (here != end) {
    out += encodeForJavascript(immuneChars, *here, dmsafe); // IMMUNE_JAVASCRIPT by default
    ++here;
  }
  return out;
}

text_t encodeForSQL(const text_t& in, const text_t& immuneChars, const SQLMode mode) {
  text_t out;
  text_t::const_iterator here = in.begin();
  text_t::const_iterator end = in.end();
  while (here != end) {
    out += encodeForSQL(immuneChars, *here, mode); // IMMUNE_SQL and STANDARD SQLMode by default
    ++here;
  }
  return out;
}


/*
The encodeForURL() here function follows the same rules as Java's URLEncoder, since that is called
by the OWASP-for-Java code when the OWASP project wishes to encode strings for URL contexts:
http://docs.oracle.com/javase/6/docs/api/java/net/URLEncoder.html

When encoding a String, the following rules apply:

    The alphanumeric characters "a" through "z", "A" through "Z" and "0" through "9" remain the same.
    The special characters ".", "-", "*", and "_" remain the same.
    The space character " " is converted into a plus sign "+".
    All other characters are unsafe and are first converted into one or more bytes using some encoding scheme. Then each byte is represented by the 3-character string "%xy", where xy is the two-digit hexadecimal representation of the byte. The recommended encoding scheme to use is UTF-8. However, for compatibility reasons, if an encoding is not specified, then the default encoding of the platform is used. 

*/
text_t encodeForURL(const text_t& immuneChars, const unsigned short in) {
  
  text_t result = "";
  text_t::const_iterator here = immuneChars.begin();
  text_t::const_iterator end = immuneChars.end();

  // Check if the character is in the list of chars immune to encoding
  if(findchar(here, end, in) != end) {
    result.push_back(in);
  }

  else if(isalnum((int)in)) {
    result.push_back(in);
  }

  // for URLs, space becomes +
  else if(in == ' ' ) {
    result.push_back('+');
  }  

  // all other chars converted to hexadecimal %XY
  else {
    char hex_char[4];
    sprintf(hex_char,"%%%02X",in);
    result = text_t(hex_char); // result += hex_char;
  }

  return result;
}

// encodes for both HTML and HTML attributes. 
// The chars in the immuneChars array determines which of the two this is
// See http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/HTMLEntityCodec.java
text_t encodeForHTML(const text_t& immuneChars, const unsigned short in) {
  
  text_t result = "";
  text_t::const_iterator here = immuneChars.begin();
  text_t::const_iterator end = immuneChars.end();

  // Check if the character is in the list of chars immune to encoding
  if(findchar(here, end, in) != end) {
    result.push_back(in);
  }

  else if(isalnum((int)in)) {
    result.push_back(in);
  }

  // check for illegal characters
  // http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/HTMLEntityCodec.java
  // 0x1f is the unit separator, an invisible character, 0x7f is the ascii control code for delete, not sure about 0x9f
  // Encode all these as the UTF-8 replacement char ufffd, which is used to replace an unknown or unrepresentable character

  else if ( ( in <= 0x1f && in != '\t' && in != '\n' && in != '\r' ) || ( in >= 0x7f && in <= 0x9f ) ) {
    result = "&#x" + REPLACEMENT_HEX + ";";
    // Let's entity encode this instead of returning it
    //c = REPLACEMENT_CHAR;
  }  

  // all other chars are to be converted to hexadecimal AB, then return the hex entity, which is of the form &#xAB;
  else {
    char hex_char[3];
    sprintf(hex_char,"%02X",in);
    result = "&#x" + text_t(hex_char) + ";";
  }

  return result;
}

// http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/CSSCodec.java
// return the hex and end in whitespace to terminate
text_t encodeForCSS(const text_t& immuneChars, const unsigned short in) {
  
  text_t result = "";
  text_t::const_iterator here = immuneChars.begin();
  text_t::const_iterator end = immuneChars.end();

  // Check if the character is in the list of chars immune to encoding
  if(findchar(here, end, in) != end) {
    result.push_back(in);
  }

  else if(isalnum((int)in)) {
    result.push_back(in);
  }

  // all other chars converted to hexadecimal AB, then return the hex entity, which is of the form &#xAB;
  else {
    char hex_char[3];
    sprintf(hex_char,"%02X",in);    
    // return the hex and end in whitespace to terminate
    result = "\\" + text_t(hex_char) + " ";
  }

  return result;
}

// http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/JavaScriptCodec.java
text_t encodeForJavascript(const text_t& immuneChars, const unsigned short in, bool dmsafe) {
  
  text_t result = "";
  text_t::const_iterator start = immuneChars.begin();
  text_t::const_iterator end = immuneChars.end();

  // Check if the character is in the list of chars immune to encoding
  if(findchar(start, end, in) != end) {
    result.push_back(in);
  }

  else if(isalnum((int)in)) {
    result.push_back(in);
  }

  // Do not use these shortcuts as they can be used to break out of a context
  // if ( ch == 0x00 ) return "\\0";
  // if ( ch == 0x08 ) return "\\b";
  // if ( ch == 0x09 ) return "\\t";
  // if ( ch == 0x0a ) return "\\n";
  // if ( ch == 0x0b ) return "\\v";
  // if ( ch == 0x0c ) return "\\f";
  // if ( ch == 0x0d ) return "\\r";
  // if ( ch == 0x22 ) return "\\\"";
  // if ( ch == 0x27 ) return "\\'";
  // if ( ch == 0x5c ) return "\\\\";


  // encode up to 256 with hexadecimal \\xHH, otherwise encode with \\uHHHH
  else {

    // encode up to 256 with \\xHH
    if(in < 256) {
      char hex_char[3];
      sprintf(hex_char,"%02X",in);

      if(dmsafe) { // double escape backslashes for macro files
	result = "\\\\x" + text_t(hex_char);
      } else {
	result = "\\x" + text_t(hex_char);
      }
    }
    // otherwise encode with \\uHHHH
    else {
      char hex_char[5];
      sprintf(hex_char,"%04X",in);
      if(dmsafe) { // double escape backslashes for macro files
	result = "\\\\u" + text_t(hex_char);
      } else {
	result = "\\u" + text_t(hex_char);      
      }
    }
    
  }

  return result;
}


/* 

 C++ port of OWASP-ESAPI for MySQL. Not sure if this is is the same for SQLite

http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/MySQLCodec.java
 Defense Option 3 of https://www.owasp.org/index.php/SQL_Injection_Prevention_Cheat_Sheet
 which states: 
 "This technique works like this. Each DBMS supports one or more character escaping schemes 
 specific to certain kinds of queries. If you then escape all user supplied input using the 
 proper escaping scheme for the database you are using, the DBMS will not confuse that input 
 with SQL code written by the developer, thus avoiding any possible SQL injection vulnerabilities."

http://www.php.net/manual/en/mysqli.real-escape-string.php
http://www.php.net/manual/en/function.mysql-real-escape-string.php
http://www.php.net/manual/en/function.sqlite-escape-string.php
http://stackoverflow.com/questions/8838913/difference-between-mysql-sqlite-etc-databases
http://stackoverflow.com/questions/633245/sql-escape-with-sqlite-in-c-sharp

*/
text_t encodeForSQL(const text_t& immuneChars, const unsigned short in, const SQLMode mode) {
  
  text_t result = "";
  text_t::const_iterator start = immuneChars.begin();
  text_t::const_iterator end = immuneChars.end();

  // Check if the character is in the list of chars immune to encoding
  if(findchar(start, end, in) != end) {
    result.push_back(in);
  }

  else if(isalnum((int)in)) {
    result.push_back(in);
  }

  // switch( mode ) {
  // case ANSI: return encodeCharacterANSI( c );
  // case STANDARD: return encodeCharacterMySQL( c );
  // }

  if(mode == STANDARD) { // encodeCharacterMySQL: Encode a character suitable for MySQL

    if ( in == 0x00 ) result = "\\0";
    else if ( in == 0x08 ) result = "\\b";
    else if ( in == 0x09 ) result = "\\t";
    else if ( in == 0x0a ) result = "\\n";
    else if ( in == 0x0d ) result = "\\r";
    else if ( in == 0x1a ) result = "\\Z";
    else if ( in == 0x22 ) result = "\\\"";
    else if ( in == 0x25 ) result = "\\%";
    else if ( in == 0x27 ) result = "\\'";
    else if ( in == 0x5c ) result = "\\\\";
    else if ( in == 0x5f ) result = "\\_";
    else { 
      result = "\\";
      result.push_back(in);
    }

  } else { // mode is ANSI, encodeCharacterANSI:

    /* Encode for ANSI SQL. 
     Apostrophe is encoded
     Bug ###: In ANSI Mode Strings can also be passed in using the quotation. 
     In ANSI_QUOTES mode a quotation is considered to be an identifier, thus 
     cannot be used at all in a value and will be dropped completely. 
     returns a string encoded to standards of MySQL running in ANSI mode
    */

    if ( in == '\'' ) result = "\'\'";
    else if ( in == '\"' ) result = "";
    else result.push_back(in);
    
  }

  return result;
}

// Unused at present.
// See Codec.hex[] initialization and Codec.getHexForNonAlphanumeric(c) and Codec.toHex(c)
// http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/Codec.java
// See Integer.toHexString()
// http://docs.oracle.com/javase/6/docs/api/java/lang/Integer.html#toHexString%28int%29
// http://stackoverflow.com/questions/3370004/what-is-static-block-in-c-or-c
bool isAlphaNumeric(const unsigned short c) {
  if(c >= 0xFF) {   // >= 256 need no further checking, it is not alphanumeric
    return false;
  }
  // alphanumeric: 0 - 9 || A - Z || a - z
  if ( c >= 0x30 && c <= 0x39 || c >= 0x41 && c <= 0x5A || c >= 0x61 && c <= 0x7A ) {
    return true;
  }
  // < 255, but not alphanumeric
  return false;
}
