/**********************************************************************
 *
 * text_t.h -- a simple 16-bit character string class
 * Copyright (C) 1999  The New Zealand Digital Library Project
 *
 * A component of the Greenstone digital library software
 * from the New Zealand Digital Library Project at the
 * University of Waikato, New Zealand.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 * $Id: text_t.h 24110 2011-06-02 21:19:39Z sjm84 $
 *
 *********************************************************************/


#ifndef TEXT_T_H
#define TEXT_T_H

#include "gsdlconf.h"

#if defined(GSDL_USE_OBJECTSPACE)
#  include <ospace\std\vector>
#  include <ospace\std\list>
#  include <ospace\std\set>
#  include <ospace\std\map>
#elif defined(GSDL_USE_STL_H)
#  include <vector.h>
#  include <list.h>
#  include <set.h>
#  include <map.h>
#else
#  include <vector>
#  include <list>
#  include <set>
#  include <map>
#endif
#include <string>

// use the standard namespace
#if !defined (GSDL_NAMESPACE_BROKEN)
#if defined(GSDL_USE_OBJECTSPACE)
using namespace ospace::std;
#else
using namespace std;
#endif
#endif

// class prototypes
class text_t;
class inconvertclass;
class outconvertclass;


// for those stupid compilers which need it
#if defined(GSDL_NEED_DESTROY_USHORT)
inline void destroy(unsigned short *) {};
inline void destroy(int *) {};
#endif

typedef vector<unsigned short> usvector;
extern const text_t g_EmptyText;

// The class text_t can handle long strings which may contain
// null characters. It uses unsigned shorts to represent up to
// 64K character values.
class text_t {
  public:
  //type support for ucvector
  typedef usvector::iterator iterator;
  typedef usvector::const_iterator const_iterator;
  typedef usvector::reference reference;
  typedef usvector::const_reference const_reference;
  typedef usvector::size_type size_type;
  typedef usvector::difference_type difference_type;
  typedef usvector::const_reverse_iterator const_reverse_iterator;
  typedef usvector::reverse_iterator reverse_iterator;
  
protected:
  usvector text;
  unsigned short encoding; // 0 = unicode, 1 = other
  
public:
  // constructors
  text_t ();
  text_t (int i);
  text_t (const char *s); // assumed to be a normal c string
  text_t (const char *s, size_type nLength); // support for arrays of chars
  void setencoding (unsigned short theencoding) {encoding=theencoding;};
  unsigned short getencoding () {return encoding;};

  usvector& text_as_usvector() { return text ; };
  const usvector& text_as_usvector() const { return text ; };

  // basic container support
  iterator begin () {return text.begin();}
  const_iterator begin () const {return text.begin();}
  iterator end () {return text.end();}
  const_iterator end () const {return text.end();}

  void erase(iterator pos) {text.erase(pos);}
  void erase(iterator first, iterator last) {text.erase(first, last);}
  void push_back(unsigned short c) {text.push_back(c);}
  void pop_back() {text.pop_back();}
  text_t &operator=(const text_t &x) {text=x.text; encoding=x.encoding; return *this;}
  reference operator[](size_type n) {return text[n];};
  const_reference operator[](size_type n) const {return text[n];};

  void reserve (size_type n) {text.reserve(n);}
  size_type capacity() const { return text.capacity(); }
  
  bool empty () const {return text.empty();}
  size_type size() const {return text.size();}
  friend inline bool operator!=(const text_t& x, const text_t& y)
	{return (x.text != y.text);}
  friend inline bool operator==(const text_t& x, const text_t& y) 
	{return (x.text == y.text);}
  friend inline bool operator<(const text_t& x, const text_t& y) 
	{return (x.text < y.text);}
  friend inline bool operator>(const text_t& x, const text_t& y) 
	{return (x.text > y.text);}
  friend inline bool operator>=(const text_t& x, const text_t& y) 
	{return (x.text >= y.text);}
  friend inline bool operator<=(const text_t& x, const text_t& y) 
	{return (x.text <= y.text);}
  
  // added functionality
  void clear () {text.erase(text.begin(),text.end());}
  void append (const text_t &t);
  void appendrange (iterator first, iterator last);
  void appendrange (const_iterator first, const_iterator last);
  text_t &operator+= (const text_t &t) {append(t);return *this;}

  // support for integers
  void appendint (int i);
  void setint (int i) {clear();appendint(i);}
  text_t &operator=(int i) {setint (i);return *this;}
  text_t &operator+= (int i) {appendint(i);return *this;}
  int getint () const;

  // same as getint but returns an unsigned long
  unsigned long getulong () const;

  // support for arrays of chars
  void appendcarr (const char *s, size_type len);
  void setcarr (char *s, size_type len) {clear();appendcarr(s,len);}

  // support for const null-terminated C strings
  void appendcstr (const char *s);
  void setcstr (const char *s) {clear();appendcstr(s);}
  text_t &operator= (const char *s) {setcstr(s);return *this;} // c string
  text_t &operator+= (const char *s) {appendcstr(s);return *this;} // c string

  // strings returned from getcarr and getcstr become the callers 
  // responsibility and should be deallocated with "delete []"
  char *getcarr(size_type &len) const;
  char *getcstr() const;

  int replace(text_t toreplace, text_t replacement);
};

// new stream converter ...
ostream& operator<< (ostream &o, const text_t &text);

inline text_t operator+(const text_t &t1, const text_t &t2)
{
  text_t tnew = t1;
  tnew.append(t2);
  return tnew;
}

inline text_t operator+(const text_t &t1, int i1)
{
  text_t tnew = t1;
  tnew.appendint(i1);
  return tnew;
}

inline text_t operator+(const text_t &t1, char *s1)
{
  text_t tnew = t1;
  tnew.appendcstr(s1);
  return tnew;
}




struct eqtext_t 
{
  bool operator()(const text_t &t1, const text_t &t2) const
  { return t1 == t2; }
};

struct lttext_t 
{
  bool operator()(const text_t &t1, const text_t &t2) const
  { return t1 < t2; }
};


// frequently used derived types
typedef set<text_t,lttext_t> text_tset;
typedef list<text_t> text_tlist;  // more efficient for insertions/deletions
typedef vector<text_t> text_tarray; // more space efficient than text_tlist
typedef map<text_t, text_t, lttext_t> text_tmap;


// general functions which work on text_ts

// find a character within a range
text_t::const_iterator findchar (text_t::const_iterator first, text_t::const_iterator last, 
				 unsigned short c);
text_t::iterator findchar (text_t::iterator first, text_t::iterator last, 
			   unsigned short c);
// Find the last occurrence of c between first and last_plus_one -1. Returns last_plus_one if not found.
text_t::iterator findlastchar (text_t::iterator first, text_t::iterator last_plus_one, 
			       unsigned short c);
text_t::iterator findword (text_t::iterator first, text_t::iterator last,
			   const text_t &word);
text_t::const_iterator findword (text_t::const_iterator first, text_t::const_iterator last, 
				 const text_t& word);

// get a string up to the next delimiter (which is skipped)
text_t::const_iterator getdelimitstr (text_t::const_iterator first, 
				      text_t::const_iterator last,
				      unsigned short c, text_t &outstr);
text_t::iterator getdelimitstr (text_t::iterator first, text_t::iterator last,
				unsigned short c, text_t &outstr);

text_t::const_iterator getdelimitstr (text_t::const_iterator first, text_t::const_iterator last,
				      text_t w, text_t &outstr);

// split a string with a character
void splitchar (text_t::const_iterator first, text_t::const_iterator last,
		unsigned short c, text_tset &outlist);
void splitchar (text_t::const_iterator first, text_t::const_iterator last,
		unsigned short c, text_tlist &outlist);
void splitchar (text_t::const_iterator first, text_t::const_iterator last,
		unsigned short c, text_tarray &outlist);

void splitword (text_t::const_iterator first, text_t::const_iterator last,
		text_t w, text_tlist &outlist);

// join a string using a character
void joinchar (const text_tset &inlist, unsigned short c, text_t &outtext);
void joinchar (const text_tlist &inlist, unsigned short c, text_t &outtext);
void joinchar (const text_tarray &inlist, unsigned short c, text_t &outtext);
void joinchar (const text_tset &inlist, const text_t &c, text_t &outtext);
void joinchar (const text_tlist &inlist, const text_t &c, text_t &outtext);
void joinchar (const text_tarray &inlist, const text_t &c, text_t &outtext);

// count the occurances of a character within a range
int countchar (text_t::const_iterator first, text_t::const_iterator last,
	       unsigned short c);

// return a substring of string from first up to but not including last
text_t substr (text_t::const_iterator first, text_t::const_iterator last);

// convert to lowercase
void lc (text_t::iterator first, text_t::iterator last);
inline void lc (text_t &t) {lc (t.begin(), t.end());}

// convert to uppercase
void uc (text_t::iterator first, text_t::iterator last);
inline void uc (text_t &t) {uc (t.begin(), t.end());}

// checks to see if it is a number (i.e. contains only 0-9)
bool is_number (const text_t &text);

// checks to see if the text has any letters or digits
bool has_unicode_letdig (const text_t &text);

// checks to see if a text_t starts with the specified prefix
bool starts_with(const text_t& text, const text_t& prefix);
// checks to see if a text_t ends with the specified suffix
bool ends_with(const text_t& text, const text_t& suffix);

// trims whitespace of the front and end of the string
text_t trim(const text_t& text);

// conversion classes used for getting information in to and out of
// the text_t class.

class convertclass 
{
public:
  enum status_t {finished, stopped, unfinished};
  
  convertclass ();
  virtual ~convertclass ();
  virtual void reset ();
};



// convert from a char stream to the text_t class
// the default version assumes the input is a ascii
// character array
class inconvertclass : public convertclass
{
public:
  inconvertclass ();
  ~inconvertclass ();

  virtual void reset ();
  void setinput (char *thestart, size_t thelen);

  // output will be cleared before the conversion
  virtual void convert (text_t &output, status_t &status);

  // will treat the text_t as a 8-bit string and convert
  // it to a 16-bit string using the about convert method.
  text_t convert (const text_t &t);

protected:
  char *start;
  size_t len;
};

// to get something which will do the conversion
// to ascii declare a (non global!) instance like
// this
// inconvertclass ascii2text_t;

#if defined(GSDL_USE_IOS_H)
#include <iostream.h> // darwin doesn't have ostream.h...
#else
#include <ostream>
#endif

// Convert from a text_t class to a char stream
// This default version assumes the output is a ascii
// character array. If you set the output stream you
// can use this class to output to a stream using the
// << operator. The << operator can also be conveniently
// used to set the output stream by doing something like
//
// cout << text_t2ascii << textstr << anothertextstr;
//
// this class assumes that the input text doesn't change
// while the conversion takes place
class outconvertclass : public convertclass
{
public:
  outconvertclass ();
  ~outconvertclass ();

  virtual void reset ();
  virtual void setinput (text_t *theinput);
  virtual void setdata(text_t *input, text_t::iterator texthere);
  // note that convert does not null-terminate the
  // output array of characters
  virtual void convert (char *output, size_t maxlen, 
			size_t &len, status_t &status);

  // will convert the 16-bit string to a 8-bit stream
  // and place the result in a text_t. This method uses
  // the above convert function.
  text_t convert (const text_t &t);

  virtual void setostream (ostream *theouts);
  ostream *getostream ();

protected:
  text_t *input;
  text_t::iterator texthere; // only valid if input is valid

  ostream *outs;
};

// to get something which will do the conversion
// to text_t declare a (non global!) instance like
// this
// outconvertclass text_t2ascii;


// stream operators for the output class
outconvertclass &operator<< (ostream &theouts, outconvertclass &outconverter);
outconvertclass &operator<< (outconvertclass &outconverter, const text_t &t);


#endif
