###########################################################################
#
# PreProcessPlugin.pm -- plugin that runs the provided cmdline cmd to
# convert a file.
# This happens in file_block_read phase.
#
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the 
# University of Waikato, New Zealand.
#
# Copyright (C) 2024 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################

package PreProcessPlugin;

use strict; 
no strict 'subs';
no strict 'refs'; # allow filehandles to be variables and viceversa

#use BaseImporter;
use CommonUtil;
sub BEGIN {
    @PreProcessPlugin::ISA = ('CommonUtil');
}

my $arguments =
    [ { 'name' => "exec_cmd",
        'desc' => "{PreProcessPlugin.exec_cmd}",
        'type' => "string",
        'deft' => "",
	'reqd' => "yes" },
      { 'name' => "no_block_original_file",
        'desc'=> "{PreProcessPlugin.no_block_original_file}",
        'type' => "flag",
        'reqd' => "no" },
      { 'name' => "run_once",
            'desc' => "{PreProcessPlugin.run_once}",
            'type' => "flag",
            'reqd' => "no"},
      { 'name' => "process_extension",
	'desc' => "{PreProcessPlugin.process_extension}",
	'type' => "string",
	'deft' => "",
            'reqd' => "no" },
      { 'name' => "process_exp",
	'desc' => "{BaseImporter.process_exp}",
	'type' => "regexp",
	'deft' => "",
	'reqd' => "no" }
    ];

my $options = { 'name'     => "PreProcessPlugin",
		'desc'     => "{PreProcessPlugin.desc}",
		'abstract' => "no",
		'inherits' => "yes",
		'args'     => $arguments };


sub new {
    my ($class) = shift (@_);
    my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
    push(@$pluginlist, $class);

    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
    push(@{$hashArgOptLists->{"OptList"}},$options);

    my $self = new CommonUtil($pluginlist, $inputargs, $hashArgOptLists);
    $self = bless $self, $class;

    if ($self->{'info_only'}) {

	# If running pluginfo, we don't need to go further. 
	return $self;
    }

    if (!$self->{'process_exp'} && $self->{'process_extension'}) {
	$self->{'process_exp'} = "\\." . $self->{'process_extension'} . "\$";
    }
    my $outhandle = $self->{'outhandle'};

    $self->{'block_original'} = 1;
    if ($self->{'no_block_original_file'}) {
        $self->{'block_original'} = 0;
    }
    
    # one-time replacements in exec_cmd happen here
    # don't nest GSDL env paths in (escaped) double quotes, as they're likely to be part of larger paths
    # It's up to the user to do the (escaped) double-quoting around paths.
    my $cmd = $self->{'exec_cmd'};

    if($cmd) {
	print STDERR "PreProcessPlugin Command (Original): $cmd\n";
	$cmd =~ s@%%GSDL3SRCHOME@$ENV{'GSDL3SRCHOME'}@g;
	$cmd =~ s@%%GSDLHOME@$ENV{'GSDLHOME'}@g;
	$cmd =~ s@%%GSDL3HOME@$ENV{'GSDL3HOME'}@g;
        $cmd =~ s@%%GSDLCOLLECTDIR@$ENV{'GSDLCOLLECTDIR'}@g;
	$self->{'exec_cmd'} = $cmd;
	print STDERR "PreProcessPlugin Command (New): $cmd\n";
    }
    return $self;
}

sub init {
    my $self = shift (@_);
    
    $self->SUPER::init(@_);
}

sub begin {
    my $self = shift (@_);
 #   $self->SUPER::begin(@_);


}

sub deinit {
    my $self = shift (@_);
  #  $self->SUPER::deinit(@_);

}

sub remove_all {
 my $self = shift (@_);
}

sub remove_one {
    my $self = shift (@_);
    
}

sub end {
    my $self = shift(@_);
}

sub metadata_read {
    my $self = shift(@_);
    return undef;
}

sub read {
    my $self = shift(@_);
    return undef;
}

sub compile_stats {
    my $self = shift(@_);
}
# everywhere that this is called, we want to say we can't process something
sub can_process_this_file {
    my $self = shift(@_);
    return 0;
}

sub file_block_read {
    my $self = shift (@_);
    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
    if ($self->{'run_once'} && $self->{'has_been_run'}) {
        return undef;
    }
    
    # here we need to run the command
#    $self->SUPER::file_block_read(@_); # this will do part of the things and then return undef
    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
    if (!$self->{'run_once'} && (!-f $filename_full_path || $self->{'process_exp'} eq "" || $filename_full_path !~ /$self->{'process_exp'}/) ) {
        return undef;
    }
    
    
    # do our thing
    
    # run the command
    my $success = $self->run_command($filename_full_path);
    if ($success) {
        print STDERR "Success!\n";
    } else {
        print STDERR "no success!";
    }
    if ($self->{'run_once'}) {
        $self->{'has_been_run'} = 1;
    } else {
        # we ran on a file
        if ($self->{'block_original'}) {
        $self->block_filename($block_hash, $filename_full_path);
        }
    }
    return 1;
}

# run the actual pre-processing command
sub run_command {
    my $self = shift (@_);
    my ($filename) = @_;
    #my ($tmp_dirname, $tmp_filename, $utf8_tailname, $lc_suffix, $tailname, $suffix) = @_;    
    
    my $outhandle = $self->{'outhandle'};
    my $failhandle = $self->{'failhandle'};
    my $verbosity = $self->{'verbosity'};
    

    # 2. Execute the conversion command and get the type of the result,
    # making sure the converter gives us the appropriate output type

    # On Linux: if the program isn't installed, $? tends to come back with 127, in any case neither 0 nor 1.
    # On Windows: echo %ERRORLEVEL% ends up as 9009 if the program is not installed.
    # If running the command returns 0, let's assume success and so the act of running the command
    # should produce either a text file or output to stdout.

    my $cmd = $self->{'exec_cmd'};
    if(!$cmd) { # empty string for instance
	print $outhandle "PreProcessPlugin  error: a command to execute is required, cmd provided is |$cmd|\n";
	return "0";
    }

    # replace remaining occurrences of placeholders in cmd string, the ones that change per input file
    #$cmd =~ s@\"@\\"@g;
    $cmd =~ s@%%INPUT_FILE@\"$filename\"@g; # refer to the softlink

    # not sure if we are having output here yet
#    if(defined $output_dirname) {
#	$cmd =~ s@%%OUTPUT@\"$output_dirname\"@g;
#    } else {
#	$cmd =~ s@%%OUTPUT@\"$output_filename\"@g;
#    }

    # Allow the user to use %%GSDL(3|3SRC)HOME and replace them here with the
    # OS-specific $GSDL(3|3SRC)HOME or %GSDL(3|3SRC)HOME%
#    $cmd =~ s@%%GSDLHOME@\"$ENV{'GSDLHOME'}\"@g;
 #   $cmd =~ s@%%GSDL3HOME@\"$ENV{'GSDL3HOME'}\"@g;
  #  $cmd =~ s@%%GSDL3SRCHOME@\"$ENV{'GSDL3SRCHOME'}\"@g;

    # Some debugging
    if ($self->{'verbosity'} > 2) {
	print STDERR "PreProcessPlugin: executing cmd \n|$cmd|\n";
	print STDERR "   on infile |$filename|\n";
	#print STDERR "   to produce expected $output_filename\n";
    }

    # Run the command at last
    my $status = system($cmd);

    if($status == 127 || $status == 9009) { # means the cmd isn't recognised on Unix and Windows, respectively
	print $outhandle "PreProcessPlugin error: cmd unrecognised, may not be installed (got $status when running $cmd)\n";
	return 0;
    }

    if($status != 0) {
	print $outhandle "PreProcessPlugin error: running $cmd failed with exit value $status\n";
	return 0;
    }

    # remove symbolic link to original file
    #&FileUtils::removeFiles($tmp_filename);


#    if(defined $output_dirname && ! -d $output_dirname) {
#	print $outhandle "$plugin_name Conversion error: Output directory $output_dirname doesn't exist\n";
#	return "";
 #   }
  #  elsif (! -f $output_filename) {
#	print $outhandle "$plugin_name Conversion error: Output file $output_filename doesn't exist\n";
#	return "";
#    }

    # else, conversion success
    
    return 1;
}


1;
