/**********************************************************************
 *
 * Unimarc.cpp
 * Copyright (C) 2003  UNESCO
 *
 * A component of the Greenstone digital library software
 * from the New Zealand Digital Library Project at the
 * University of Waikato, New Zealand.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *********************************************************************/

#include "stdafx.h"

///////////////////////////////////////////////////////////////////////////////////
// MarcRecord - This class can instantiate objects that represent a MARC
//              bibilographic record and includes methods for manipulating the 
//              record.
//
//             example:
//
//	#include "Unimarc.h"
//
//     // Create a MarcRecord object with a raw MARC record
//     MarcRecord mr(buf,1)
//     mr.PrettyFormat("The title is %245, which is the field with tag 245");
#include <vector>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "Unimarc.h"

//-------------------------------------------------------------------------------
// ostream& operator<<(ostream& s, const MarcLeader& leader)
//
// Overloaded instance of the output operator for MarcLeader objects
//---------------------------------------------------------------------------------
std::ostream& operator<<(std::ostream& s, const MarcLeader& leader)
{
	s.write(leader.lreclen_, 5);	// Logical record length
	s.write(&leader.status_, 1);	// Record status  
	s.write(&leader.type_, 1);		// Type of record
	s.write(&leader.bibLevel_, 1);	// Bibliographic level
	s.write(&leader.ctrlType_, 1);	// Type of control
	s.write(&leader.undPos_, 1);	// Undefined character position
	s.write(&leader.indCount_, 1);	// Indicator count
	s.write(&leader.scd_, 1);		// Subfield code count
	s.write(leader.base_, 5);   	// Base address of data
	s.write(&leader.encLevel_, 1);	// Encoding level
	s.write(&leader.dcf_, 1);		// Descriptive cataloging form
	s.write(&leader.lrr_, 1);		// Linked record requirement
	  // Directory  entry map
	s.write(&leader.llof_, 1);		// Length of the length-of-field portion
	s.write(&leader.lscp_, 1);		// Length of starting-character-position portion
	s.write(&leader.lidp_, 1);		// Length of implementation-defined portion
	s.write(&leader.uep_, 1);		// Undefined entry map character position
	return s;
}

//----------------------------------------------------------------------------------
// istream& operator>>(istream& s, const MarcLeader& leader)
//
// Overloaded instance of the input operator for MarcLeader objects
//----------------------------------------------------------------------------------
std::istream& operator>>(std::istream& s, MarcLeader& leader)
{
	s.read(leader.lreclen_, 5);	    // Logical record length
	s.read(&leader.status_, 1);	    // Record status  
	s.read(&leader.type_, 1);		// Type of record
	s.read(&leader.bibLevel_, 1);	// Bibliographic level
	s.read(&leader.ctrlType_, 1);	// Type of control
	s.read(&leader.undPos_, 1);	    // Undefined character position
	s.read(&leader.indCount_, 1);	// Indicator count
	s.read(&leader.scd_, 1);		// Subfield code count
	s.read(leader.base_, 5);   	    // Base address of data
	s.read(&leader.encLevel_, 1);	// Encoding level
	s.read(&leader.dcf_, 1);		// Descriptive cataloging form
	s.read(&leader.lrr_, 1);		// Linked record requirement
	  // Directory  entry map
	s.read(&leader.llof_, 1);		// Length of the length-of-field portion
	s.read(&leader.lscp_, 1);		// Length of starting-character-position portion
	s.read(&leader.lidp_, 1);		// Length of implementation-defined portion
	s.read(&leader.uep_, 1);		// Undefined entry map character position
	return s;
}

/************************************************************************
Local prototypes
 ***********************************************************************/
static void Printx(FILE *fp, char *s, int x);

//----------------------------------------------------------------------
// MarcRecord::MarcRecord(char *data, int length, int copyData)
// 
// Constructor that creates a new MarcRecord object. 
// data is raw marc record data for a single record.  If copyData is 1, 
// data is encapsulated within this object.  If 0, does not copy data and 
// responsibility falls to caller to maintain data pointer.
//----------------------------------------------------------------------
MarcRecord::MarcRecord(char* data, int length, int copyData)
{
	encapsulated_ = copyData;
	
	/* encapsulate or set pointer to data */
	if (encapsulated_)
	{
		data_ = new char[length + 1]; 
		memcpy(data_, data, length);
		data_[length] = '\0';
	}
	else
		data = data;
	
	// Lay leader overlay onto data
	leader_ = *(MarcLeader *) (data_);
	
	// Parse the record.  sets dirCount_, directory_ 
	if (!ParseMarcRecord()) 
	{
		// marcRecord_destroy(m);
		// return NULL;
	}
}

//----------------------------------------------------------------------
// void MarcRecord::~MarcRecord()
//
// Destructor, destroys a MarcRecord object and all used resources.
//----------------------------------------------------------------------
MarcRecord::~MarcRecord()
{
	if ((encapsulated_) && (data_))
		delete[] data_;

	for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
		directory_[i].field_.erase(directory_[i].field_.begin(),directory_[i].field_.end());
	directory_.erase(directory_.begin(),directory_.end());
}

#define MARC_STATE_IDLE 0
#define MARC_STATE_TAG 1
#define MARC_STATE_WIDTH 2
//----------------------------------------------------------------------
// int MarcRecord::PrettyFormat(char* format, char* buf, int maxlen)
//
//  Formats the contents of the marc record according to a format string.
//  Useful for generating a buffer to pretty-print the record.
//  format contains a format-specifier string.  The format-specifier string
//  is kindof like printf's, except the only control character is '%'.
//  Following the '%' should be the integer MARC field code as defined in 
//  the (huge) list of available MARC fields from the library of congress
//  (http://lcweb.loc.gov/marc/).  buf is a previously allocated 
// buffer into which the pretty version will be copied.  maxlen is the max 
// length to write into buf.  After call, buf contains pretty formatted text.
//----------------------------------------------------------------------
int MarcRecord::PrettyFormat(char* format, char* buf, int maxlen)
{
	char b[1024];
	int bufpos = 0, fmtpos = 0;
	
	b[0] = '\0';
	buf[0] = '\0';
	
	for (; format[fmtpos] != '\0'; )
	{
		switch (format[fmtpos])
		{
			case '%': 
				{
					char tag[4];
					strncpy(tag, (char *)(format + fmtpos + 1), 3);
					tag[3] = '\0';
					GetSubField(tag, format[fmtpos + 4], b,	sizeof(b) - 1);
					if (*b)
					{
						int l = strlen(b);
						strncat(buf, b, maxlen - bufpos - 1);
						bufpos += l;
						if (bufpos >= maxlen)
						{
							buf[maxlen - 1] = '\0';
							return maxlen;
						}
					}
					fmtpos += 5;
					break;
				}
			default:
				buf[bufpos] = format[fmtpos];
				bufpos++;
				fmtpos++;
				buf[bufpos] = '\0';
				break;
		}
	}
	return bufpos;
}

//----------------------------------------------------------------------
// int MarcRecord::GetField(char* tag, char* buf, int maxlen)
//
// append all subfields in a field together. 
//----------------------------------------------------------------------
int MarcRecord::GetField(char* tag, char* buf, int maxlen)
{
	int remain = maxlen - 1;
	
	buf[0] = '\0';
	
	if ((tag == NULL) || (*tag == '\0'))
		return 0;
	
	for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
	{
		if (!strncmp(tag, directory_[i].tag_, strlen(tag)))
		{
			for (std::vector<MarcField>::size_type j=0; j!=directory_[i].field_.size(); ++j)
			{
				int flen = directory_[i].field_[j].len_;
				if (flen > remain)
				{
					strncat(buf, directory_[i].field_[j].data_ + 1, remain);
					buf[maxlen] = '\0';
					return 1;
				}
				else 
				{
					strncat(buf, directory_[i].field_[j].data_ + 1, flen);
					remain -= flen;
				}
			}
			return 1;
		}
	}
	return 0;
}

//----------------------------------------------------------------------
// int MarcRecord::GetSubFields(char* tag, char subtag, char* buf, int maxlen)
//
// append all fields with a certain subfield together.
//----------------------------------------------------------------------
int MarcRecord::GetSubFields(char* tag, char subtag, char* buf, int maxlen)
{
	int remain = maxlen - 1;
	
	buf[0] = '\0';
	
	if ((tag == NULL) || (*tag == '\0'))
		return 0;
	
	for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
	{
		if (!strncmp(tag, directory_[i].tag_, strlen(tag)))
		{
			for (std::vector<MarcField>::size_type j=0; j!=directory_[i].field_.size(); ++j)
			{
				if (*(directory_[i].field_[j].data_) == subtag)
				{
					int flen = directory_[i].field_[j].len_;
					if (flen > remain)
					{
						strncpy(buf, directory_[i].field_[j].data_ + 1, remain);
						buf[maxlen] = '\0';
						return 1;
					}
					else 
					{
						strcpy(buf, directory_[i].field_[j].data_ + 1);
						remain -= flen;
					}
				}
			}
		}
	}
	return 0;
}

int MarcRecord::HasField(char* tag)
{
	
	for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
		if (!strncmp(tag, directory_[i].tag_, strlen(tag)))
			return 1;
		
		return 0;
}

int MarcRecord::HasSubField(char* tag, char subtag)
{
	
	for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
		if (!strncmp(tag, directory_[i].tag_, strlen(tag)))
			for (std::vector<MarcField>::size_type j=0; j!=directory_[i].field_.size(); ++j)
				if (*directory_[i].field_[j].data_ == subtag)
					return 1;
				return 0;
}

int MarcRecord::GetSubField(char *tag, char subtag, char *buf, 	int maxlen)
{
	
	buf[0] = '\0';
	
	if ((tag == NULL) || (*tag == '\0'))
		return 0;
	
	for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
	{
		if (!strncmp(tag, directory_[i].tag_, strlen(tag)))
		{
			for (std::vector<MarcField>::size_type j=0; j!=directory_[i].field_.size(); ++j)
			{
				if (*(directory_[i].field_[j].data_) == subtag)
				{
					int l = directory_[i].field_[j].len_ - 1 > maxlen ? maxlen : directory_[i].field_[j].len_ - 1;
					strncpy(buf, directory_[i].field_[j].data_ + 1, l);
					buf[l] = '\0';
					return 1;
				}
			}
		}
	}
	return 0;
}


int GetNum(char *s, int n)
{
	char nbuf[10]; /* no more than 9 digits */
	
	strncpy(&nbuf[0], s, n);
	nbuf[n] = '\0';
	return (atoi(nbuf));
}

void MarcRecord::Dump()
{
	int dc = 0, fc = 0;
	
	printf("tags: ");
	for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
	{
		Printx(stdout, directory_[i].tag_, 3);
		dc++;
		for (std::vector<MarcField>::size_type j=0; j!=directory_[i].field_.size(); ++j)
		{
			if (directory_[i].field_[j].data_)
			{
			  // The following is a call to the C-library, and so editing the
			  // code from "unsigned long" to mg_u_long (as has been done elsewhere
			  // in the code as a systematic change to allow 64-bit Unix system
			  // to cope with the Unesco code that was written on the assumption
			  // that 'long' is always 4 bytes in size -- it is *not* an appropriate
			  // to change the reference to "unsigned long" here.
			  //
			  // putchar() takes a 'int' as its argument for much deeper legacy
			  // reasons concerning C compilers (pre C90) as discussed here:
			  //   https://stackoverflow.com/questions/40712493/why-argument-type-of-putchar-fputc-and-putc-is-not-char
			  // This does make the use of "unsigned long" in the original Unesco code a bit
			  // suspect, since "int" would make more sense, however given the way
			  // putchar() works -- passing the argument as a larger integer type
			  // than is needed, but then 'cutting' out the unsigned char val that
			  // is sitting there in the LSB and writing it out -- this means sticking
			  // to the Unesco coded "unsigned long" is an acceptable (if slightly confusing)
			  // thing to do.
			  
			  
			  putchar((unsigned long)directory_[i].field_[j].data_); // **** could be more needs to be done here!!
			}
			fc++;
			/*
			printx(stdout, (char*)f->data + 1, f->len);
			printf("\n"); */
		}
		printf(",");
	}
	putchar('\n');
}





void MarcRecord::PrintDetailed(FILE *fp)
{
	int count, i;
	
	fprintf(fp, "RecordLength:\t\t%i\n", GetNum(leader_.lreclen_, 5));
	fprintf(fp, "RecordStatus:\t\t%c\n", leader_.status_);
	fprintf(fp, "RecordType:\t\t%c\n", leader_.type_);
	fprintf(fp, "BibLevel:\t\t%c\n", leader_.bibLevel_);
	fprintf(fp, "ControlType:\t\t%c\n", leader_.ctrlType_);
	fprintf(fp, "UndefCharPos:\t\t%c\n", leader_.undPos_);
	fprintf(fp, "IndCount:\t\t%i\n", GetNum(&leader_.indCount_, 1));
	fprintf(fp, "SubCodeCount:\t\t%i\n", GetNum(&leader_.scd_, 1));
	fprintf(fp, "DataBaseAddr:\t\t%i\n", GetNum(leader_.base_, 5));
	fprintf(fp, "EncodingLevel:\t\t%c\n", leader_.encLevel_);
	fprintf(fp, "DescCatForm:\t\t%c\n", leader_.dcf_);
	fprintf(fp, "LinkedRecReq:\t\t%c\n", leader_.lrr_);
	fprintf(fp, "LengthOf LengthOfField:\t%i\n", GetNum(&leader_.llof_, 1));
	fprintf(fp, "LengthOf StartCharPos:\t%i\n", GetNum(&leader_.lscp_, 1));
	fprintf(fp, "LengthOf ImpDefined:\t%i\n", GetNum(&leader_.lidp_, 1));
	fprintf(fp, "UndefinedEntry:\t\t%i\n", GetNum(&leader_.uep_, 1));
	
	count = (GetNum(leader_.base_, 5) - MARC_LEADER_LEN) / MARC_DIRENT_LEN;
	for (i = 0; i < count; i++)
	{
		DirectoryEntry dir = *(DirectoryEntry*)(data_+MARC_LEADER_LEN+(i*MARC_DIRENT_LEN));
		fprintf(fp, "\n");
		fprintf(fp, "Tag:\t\t");
		Printx(fp, dir.tag_, 3);
		fprintf(fp, "\nLength:\t\t%i\n", GetNum(dir.len_, 4));
		fprintf(fp, "StartPos:\t%i\n", GetNum(dir.scp_, 5));
		fprintf(fp, "Data:\t\t");
		Printx(fp, (char *)(data_ + GetNum(leader_.base_, 5) + GetNum(dir.scp_, 5)), GetNum(dir.len_, 4));
		fprintf(fp, "\n");
	}
}

/*
desc:		parses the marc record and builds appropriate data structures in m

params:

	m		newly created marcRecord
	
returns:

	1 on success
	0 on parse or memory failure
*/
int MarcRecord::ParseMarcRecord()
{
	dirCount_ = (GetNum(leader_.base_, 5) - MARC_LEADER_LEN) / MARC_DIRENT_LEN;
	
	// For each MarcDirectory entry, create list of subfields
	MarcDirectoryEntry dir_entry;
	for (int i = 0; i < dirCount_; i++)
	{
		char*  data;
		// Extract the directory component
		DirectoryEntry dir_entry = *(DirectoryEntry*)(data_+MARC_LEADER_LEN+(i*MARC_DIRENT_LEN));

		MarcDirectoryEntry marc_dir_entry;
		
		// Get a pointer to the data for this MarcDirectory
		data = (char *)(data_ + GetNum(leader_.base_, 5) + GetNum(dir_entry.scp_, 5));
		
		// set the tag for this MarcDirectory 
		marc_dir_entry.tag_ = dir_entry.tag_;
		
		/* add the list of subfields for this MarcDirectory */
		if (!AddSubFields(marc_dir_entry, data))
			return 0;
		directory_.push_back(marc_dir_entry);
		
	}
	
	return 1;
}

void Printx(FILE* fp, char* s, int x)
{
	int i;
	for (i = 0; i < x; i++)
		fprintf(fp, "%c", s[i]);
}

//----------------------------------------------------------------------
// int MarcRecord::AddSubFields(MarcDirectory& d, char* b)
//
// Parses and adds subfields to MarcDirectory
//
// params:
//
//	d		MarcDirectory to which subfields are added
//	b		buffer of data for d
//
// returns:
//
//	1 on success
//	0 on parse or memory error
//----------------------------------------------------------------------
int MarcRecord::AddSubFields(MarcDirectoryEntry& d, char* b)
{
	char *p, *start;
	int cont = 1, length = 0;
	
	/* create new field */
	MarcField field;
	
	
	start = b;
	for (p = b; ((*p != '\0') && (cont == 1)); p = (char *)p + 1)
	{
		switch (*p)
		{
			case SUBFDELIM:
				/* end of current subfield, but more exist */
				
				/* set length and pointer to data for this field */
				field.len_  = length;
				field.data_ = start;
				d.field_.push_back(field);
				/* by definition, there should be more subfields, but we'll
				check for NULL just in case
				*/
				if (*p + 1)
				{
				}
				else 
				{
					/* uh oh */
					return 0;
				}
				length = 0;
				start = (char *)p + 1;
				break;
			case FIELDTERM:
				/* end of subfield list */
				
				/* set length and pointer to data for this field */
				field.len_  = length;
				field.data_ = start;
				d.field_.push_back(field);
				
				/* break out of loop */
				cont = 0;
				break;
			case RECTERM:
				/* end of record */
				cont = 0;
				break;
			default:
				/* regular character */			
				length++;
				break;
		}
	}
	return 1;
}

void MarcRecord::Format(char *buf, int maxlen, int format)
{
	char b[1024];
	char date[24];
	char title[128];
	
	b[0] = '\0';
	buf[0] = '\0';
	
	/* FIXME: get rid of this debugging crap sometime */
	Dump();
	
	switch (format)
	{
		case MARC_FMT_BRIEF: 
			{
				GetSubField("245", 'a', title, 	sizeof(title) - 1);
				GetSubField("260", 'c', date, sizeof(date) - 1);
				if ((strlen(date) + strlen(title) + 2) <=(size_t)maxlen)
				{
					sprintf(buf, "%s %s", date, title);
				}
				else 
				{
					/* FIXME: we're overflowing the buffer! */
				}
				break;
			}
		case MARC_FMT_FULL: 
			{
				int dc = 0, fc = 0;
				int remain = maxlen;
				
	            for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
				{
					dc++;
		            for (std::vector<MarcField>::size_type j=0; j!=directory_[i].field_.size(); ++j)
					{
						fc++;
						if ((directory_[i].field_[j].len_ + 2) > remain)
						{
							strncat(buf, (char *)directory_[i].field_[j].data_ + 1, remain - 2);
							* (buf + remain - 2) = '\n';
							* (buf + remain - 1) = '\0';
							/* we're out of room in the buffer. */
							return;
						}
						if (directory_[i].field_[j].len_ > 0)
						{
							strncat(buf, (char*)directory_[i].field_[j].data_ + 1, directory_[i].field_[j].len_);
							strncat(buf, "\n\0", 2);
							remain -=(directory_[i].field_[j].len_ + 2);
						}
					} 
				}
				/*
				marcRecord_getSubField(m, "245", 'a', title, 
				sizeof(title) - 1);
				marcRecord_getSubField(m, "245", 'a', title, 
				sizeof(title) - 1);
				marcRecord_getSubField(m, "260", 'c', date, 
				sizeof(date) - 1);
				sprintf(buf, "%s %s", date, title);
				*/
				break;
			}
		default:
			break;
	}
}

int MarcRecord::RecordLength()
{
	register int l = 0;
	
	
	for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
		for (std::vector<MarcField>::size_type j=0; j!=directory_[i].field_.size(); ++j)
			l += directory_[i].field_[i].len_;
		
        return l;
}

