######################################################################
#
#  EPrints OAI 1.1 Handler
#
#   Responds to incoming OAI requests
#
# Conforming to:
# http://www.openarchives.org/OAI/1.1/openarchivesprotocol.htm
#
######################################################################
#
#  This file is part of EPrints 2.
#  
#  Copyright (c) 2000,2001,2002 University of Southampton, UK. SO17 1BJ.
#  
#  EPrints 2 is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#  
#  EPrints 2 is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#  
#  You should have received a copy of the GNU General Public License
#  along with EPrints 2; if not, write to the Free Software
#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
######################################################################

#cjg need to recomment all subs


use EPrints::Database;
use EPrints::EPrint;
use EPrints::OpenArchives;
use EPrints::Session;
use EPrints::Subject;

# use FileHandle;
# use IO::File;
# use POSIX;
# use XML::Writer;

use URI::Escape;

use strict;

# New session
my $session = new EPrints::Session();
Apache::exit( 0 ) unless( defined $session );
# What are we begin asked?
my $verb = $session->param( "verb" );

if( !defined $verb ) 
{
	# no verb
	send_http_error( $session, 400, "No OAI verb received" );
} 
else 
{
	if( $verb eq "GetRecord" )
	{
		get_record( $session );
	}
	elsif( $verb eq "Identify" )
	{
		identify( $session );
	}
	elsif( $verb eq "ListIdentifiers" )
	{
		list_identifiers( $session );
	}
	elsif( $verb eq "ListMetadataFormats" )
	{
		list_metadata_formats( $session );
	}
	elsif( $verb eq "ListRecords" )
	{
		list_records( $session );
	}
	elsif( $verb eq "ListSets" )
	{
		list_sets( $session );
	}
	else
	{
		# Unknown verb
		send_http_error( $session, 400, "Unknown OAI verb: $verb" );
	}
}

$session->terminate();
exit;


######################################################################
#
# get_record( $session )
#
#  Respond to a GetRecord verb:  Retrieve a single metadata record
#
######################################################################

#???
sub get_record
{
	my( $session ) = @_;

	my $identifier = $session->param( "identifier" );
	my $metadata_format = $session->param( "metadataPrefix" );
	
	if( !defined $identifier )
	{
		send_http_error( $session, 400, "No identifier sent in GetRecord verb" );
		return;
	}
	if( !defined $metadata_format )
	{
		send_http_error( $session,
		                  400,
		                  "No metadata format specified in GetRecord verb" );
		return;
	}

	if( !defined $session->get_archive()->get_conf( "oai", "metadata_namespaces" )->{$metadata_format} )
	{
		send_http_error( $session, 400, "\"$metadata_format\" is not a supported metadata format" );
		return;
	}

	# OK, we've got an ID & a format, even if we get no data we can now send
	# a response

	my $response = make_basic_oai_response( $session, "GetRecord" );

	my $eprint = new EPrints::EPrint(
		$session,
		EPrints::OpenArchives::from_oai_identifier(
			$session,
			$identifier ),
		$session->get_archive()->get_dataset( "archive" ) );

	if( defined $eprint )
	{
		my $fn = $session->get_archive()->get_conf( 
			"oai","metadata_functions" )->{$metadata_format};

		# The eprint exists, so write the record
		# if the metadataFormat isn't available for
		# this record, only the header will be output.

		$response->appendChild( $session->make_indent( 2 ) );
		$response->appendChild( 
			EPrints::OpenArchives::make_record(
				$session,
				$eprint,
				$fn ) );
	}

	send_response( $session, $response );
}



######################################################################
#
# identify( $session )
#
#  Identify ourselves
#
######################################################################


#???
sub identify
{
	my( $session ) = @_;

	my $response = make_basic_oai_response( $session, "Identify" );

	$response->appendChild( $session->render_data_element(
		2,
		"repositoryName",
		EPrints::Session::best_language( 
			$session->get_archive(),
			$session->get_langid(),
			%{$session->get_archive()->get_conf( "archivename" )} ) ) );

	$response->appendChild( $session->render_data_element(
		2,
		"baseURL",
		$session->get_archive()->get_conf( "oai","base_url" ) ) );

	$response->appendChild( $session->render_data_element(
		2,
		"protocolVersion",
		"1.1" ) );

	$response->appendChild( $session->render_data_element(
		2,
		"adminEmail",
		"mailto:".$session->get_archive()->get_conf( "adminemail" ) ) );

	my $d1 = $session->make_element( "description" );
	my $oaiid = $session->make_element( 	
		"oai-identifier",
		"xmlns"=>"http://www.openarchives.org/OAI/1.1/oai-identifier",
		"xmlns:xsi"=>"http://www.w3.org/2001/XMLSchema-instance",
		"xsi:schemaLocation"=>"http://www.openarchives.org/OAI/1.1/oai-identifier http://www.openarchives.org/OAI/1.1/oai-identifier.xsd" );
	$d1->appendChild( $session->make_indent( 4 ) );
	$d1->appendChild( $oaiid );
	$response->appendChild( $session->make_indent( 2 ) );
	$response->appendChild( $d1 );

	$oaiid->appendChild( $session->render_data_element(
		6,
		"scheme",
		"oai" ) );

	$oaiid->appendChild( $session->render_data_element(
		6,
		"repositoryIdentifier",
		$session->get_archive()->get_conf( "oai","archive_id" ) ) );

	$oaiid->appendChild( $session->render_data_element(
		6,
		"delimiter",
		":" ) );

	$oaiid->appendChild( $session->render_data_element(
		6,
		"sampleIdentifier",
		$session->get_archive()->get_conf( "oai","sample_identifier" ) ) );

	my $d2 = $session->make_element( "description" );
	my $eprints = $session->make_element( 	
		"eprints", 
		"xmlns"=>"http://www.openarchives.org/OAI/1.1/eprints",
		"xmlns:xsi"=>"http://www.w3.org/2001/XMLSchema-instance",
		"xsi:schemaLocation"=>"http://www.openarchives.org/OAI/1.1/eprints http://www.openarchives.org/OAI/1.1/eprints.xsd" );
	$d2->appendChild( $session->make_indent( 4 ) );
	$d2->appendChild( $eprints );
	$response->appendChild( $session->make_indent( 2 ) );
	$response->appendChild( $d2 );

	$eprints->appendChild( render_text_url( 
		$session,
		"content", 
		$session->get_archive()->get_conf( "oai","content" ) ) );
                          
	$eprints->appendChild( render_text_url( 
		$session,
		"metadataPolicy", 
		$session->get_archive()->get_conf( "oai","metadata_policy" ) ) );

	$eprints->appendChild( render_text_url( 
		$session,
		"dataPolicy", 
		$session->get_archive()->get_conf( "oai","data_policy" ) ) );

	$eprints->appendChild( render_text_url( 
		$session,
		"submissionPolicy", 
		$session->get_archive()->get_conf( "oai","submission_policy" ) ) );

	foreach( @{$session->get_archive()->get_conf( "oai","comments" )} ) 
	{
		$eprints->appendChild( $session->render_data_element(
			6,
			"comment", 
			$_ ) );
	}
		
	send_response( $session, $response );
}


######################################################################
#
# write_text_url( $writer, $name, $texturl )
#                                 hashref
#  Write a TextURL type block to writer, of name $name. Block will 
#  contain a text and/or url element, defined in %texturl.
#  If texturl contains neither then this method returns without action.
#
######################################################################

sub render_text_url
{
	my( $session, $name, $texturl ) = @_;

	my $f = $session->make_doc_fragment();

	$f->appendChild( $session->make_indent( 6 ) );
	my $e = $session->make_element( $name );
	$f->appendChild( $e );

	if ( defined $texturl->{"text"} ) 
	{
		$e->appendChild( $session->render_data_element(
			8,
			"text",
			$texturl->{"text"} ) );
	}

	if ( defined $texturl->{"url"} ) 
	{
		$e->appendChild( $session->render_data_element(
			8,
			"URL",
			$texturl->{"url"} ) );
	}

	return $f;
}

sub get_searchexp_from_params
{
	my( $session, $mode ) = @_;

	my $token = $session->param( "resumptionToken" );

	if( defined $token )
	{
		unless( $token =~ m#^(archive|deletion)/(\d+)/(\d+)(/(.*))?$# )
		{
			send_http_error( $session, 400, "ResumptionToken invalid" );
			return;
		}
		my( $offset, $cache_id, $metadata_format ) = ( $2, $3, $5 );
		my $searchexp = EPrints::SearchExpression->new( 
			dataset => $session->get_archive->get_dataset( $1 ),
			session => $session,
			keep_cache => 1,
			cache_id => $cache_id );

		unless( defined $searchexp )
		{
			send_http_error( $session, 400, "ResumptionToken expired" );
			return;
		}
		return( $searchexp, $offset, $metadata_format );
	}

	# Get arguments
	my $start_date = $session->param( "from" );
	my $end_date = $session->param( "until" );
	my $codedset = $session->param( "set" );
	my $metadata_format = $session->param( "metadataPrefix" );
	

	my( $key,$value );

	my $ds = $session->get_archive()->get_dataset( "archive" );

	my $searchexp = new EPrints::SearchExpression(
		session => $session,
		keep_cache => 1,
		allow_blank => 1,
		filters => $session->get_archive()->get_conf( "oai","filters" ),
		dataset => $ds );

	if( defined $codedset )
	{
		my( $head , @tail ) = EPrints::OpenArchives::decode_setspec( $codedset );
		( $key , $value ) = split( /=/ , $head );
		if( scalar @tail > 0 )
		{
			$value = pop @tail;
		}
		my $views = $session->get_archive()->get_conf( "oai","sets" ); #cjg
		my $info;
		foreach( @{$views} )
		{
			$info = $_ if( $_->{id} eq $key );
		}
		if( !defined $info )
		{
			send_http_error( $session, 400, "Invalid set parameter; unknown key ( $key )" );
			return;
		}
		my @fields;
 		foreach( split( "/", $info->{fields} ) )
		{
			my $field = EPrints::Utils::field_from_config_string( $ds, $_ );
			unless( $field->is_browsable() )
			{
				my $type = $field->get_type();
				EPrints::Config::abort( <<END );
Cannot generate oai set for field "$_"
- Type "$type" cannot be browsed.
END
				return;
			}
			push @fields, $field;
		}
		$searchexp->add_field( \@fields, $value, "EX" );
	}


	# Extract and validate start & end dates, if given.
	my $date_range;

	if( defined $start_date )
	{
		if( $start_date !~ /^\d\d\d\d-\d\d-\d\d$/ )
		{
			send_http_error( $session, 400, "Invalid from parameter" );
			return;
		}
		$date_range = $start_date."-";
	}

	if( defined $end_date )
	{
		if( $end_date !~ /^\d\d\d\d-\d\d-\d\d$/ )
		{
			send_http_error( $session, 400, "Invalid until parameter" );
			return;
		}
		$date_range = "-" if( !defined $date_range );
		$date_range.= $end_date;
	}
	
	if( defined $date_range )
	{
		$searchexp->add_field( 
				$ds->get_field( "datestamp" ), 
				$date_range );
	}

	return( $searchexp, 0, $metadata_format );
}


######################################################################
#
# list_identifiers( $session )
#
#  Respond to ListIdentifiers verb.
#
######################################################################

sub list_identifiers
{
	my( $session ) = @_;

	_list( $session, 1 );
}

sub list_records
{
	my( $session ) = @_;

	_list( $session, 2 );
}

sub _list
{
	my( $session , $mode ) = @_;

	#mode 1 = ID 
	#mode 2 = full metadata
	
	my $PAGESIZE = 100;


	my( $searchexp, $offset, $metadata_format ) = get_searchexp_from_params( $session, $mode );

	if( !defined $searchexp )
	{
		# something went wrong
		return;
	}
	
	if( $mode == 2 )
	{
		if( !defined $metadata_format )
		{
			send_http_error( $session, 400, "Missing metadataPrefix parameter" );
			return;
		}

		if( !defined $session->get_archive()->get_conf( "oai","metadata_namespaces" )->{$metadata_format} )
		{
			send_http_error( $session, 400, "\"$metadata_format\" is not a supported metadata format" );
			return;
		}
	}

	$searchexp->perform_search();
	my $count = $searchexp->count();
	my $cache = $searchexp->get_cache_id();
	my $searchdsid = $searchexp->get_dataset()->id();
	my %opts = ();
	$opts{status}="deleted" if( $searchdsid eq "deletion" );

	my $response;
	my $mdtoken = "";
	
	if( $mode == 1 )
	{
		$response = make_basic_oai_response( $session, "ListIdentifiers" );

		my $ids = $searchexp->get_ids( $offset, $PAGESIZE );
		foreach( @{$ids} )
		{
			$response->appendChild( $session->render_data_element(
				2,
				"identifier",
				EPrints::OpenArchives::to_oai_identifier( 
					$session->get_archive()->get_conf( "oai","archive_id" ),
					$_ ),
				%opts ) );
		}
	}

	if( $mode == 2 )
	{
		$response = make_basic_oai_response( $session, "ListRecords" );

		my $fn = $session->get_archive()->get_conf( 
			"oai","metadata_functions" )->{$metadata_format};

		my @records = $searchexp->get_records( $offset, $PAGESIZE );
                my $eprint;
                foreach $eprint ( @records )
                {
			$response->appendChild( $session->make_indent( 2 ) );
			$response->appendChild( 
				EPrints::OpenArchives::make_record(
					$session,
					$eprint,
					$fn ) );
		}	

		$mdtoken = "/".$metadata_format;
	}

	$searchexp->dispose();



	if( $count > $offset+$PAGESIZE )
	{
		$response->appendChild( $session->render_data_element(
			2,
			"resumptionToken",
			$searchdsid."/".($offset+$PAGESIZE)."/".$cache.$mdtoken ) );
	}
	elsif( $searchdsid eq "archive" )
	{
		## End of archive items, time to do deleted ones.
		my $delsearchexp = $searchexp->clone;
		$delsearchexp->set_dataset(
			$session->get_archive()->get_dataset( "deletion" ) );
		$delsearchexp->perform_search();
		my $cache = $delsearchexp->get_cache_id();
		$delsearchexp->dispose();
			
		$response->appendChild( $session->render_data_element(
			2,
			"resumptionToken",
			"deletion/0/$cache".$mdtoken ) );
	}
		
	send_response( $session, $response );
}


######################################################################
#
# list_metadata_formats( $session )
#
######################################################################

sub list_metadata_formats
{
	my( $session ) = @_;
	
	my $identifier = $session->param( "identifier" );

	my @all_formats = keys %{$session->get_archive()->get_conf( "oai","metadata_namespaces" )};
	my @metadata_formats;

	if( defined $identifier )
	{
		my $eprint = new EPrints::EPrint(
			$session,
			EPrints::OpenArchives::from_oai_identifier(
				$session,
				$identifier ),
			$session->get_archive()->get_dataset( "archive" ) );

		if( defined $eprint )
		{
			foreach( @all_formats )
			{
				my $fn = $session->get_archive()->get_conf(
					"oai","metadata_functions" )->{$_};
				my $md = &{$fn}( $eprint, $session );
				if( defined $md )
				{
					push @metadata_formats, $_;
				}
			}
		}
	}
	else
	{
		@metadata_formats = @all_formats;
	}
	
	my $response = make_basic_oai_response( $session, "ListMetadataFormats" );
	
	foreach ( @metadata_formats )
	{
		my $mdf = $session->make_element( "metadataFormat" );

		$mdf->appendChild( $session->render_data_element(
			4,
			"metadataPrefix",
			$_ ) );

		$mdf->appendChild( $session->render_data_element(
			4,
			"schema",
			$session->get_archive()->get_conf( "oai","metadata_schemas" )->{$_} ) );
		$mdf->appendChild( $session->render_data_element(
			4,
			"metadataNamespace",
			$session->get_archive()->get_conf( "oai","metadata_namespaces" )->{$_} ) );
		$response->appendChild( $session->make_indent( 2 ) );
		$response->appendChild( $mdf );
	}
	
	send_response( $session, $response );
}



######################################################################
#
# list_sets( $session )
#
#  Respond to a ListSets verb.
#
######################################################################

sub list_sets
{
	my( $session ) = @_;

	my @sets = ();
	my %setnames = ();

	my $response = make_basic_oai_response( $session, "ListSets" );

	my $ds = $session->get_archive()->get_dataset( "archive" );
	my $ds_del = $session->get_archive()->get_dataset( "deletion" );

	my $viewconf = $session->get_archive()->get_conf( "oai","sets" );
	my $info;
	foreach $info ( @{$viewconf} )
	{
		my $fieldname;
		my %v = ();
		foreach $fieldname ( split( "/" , $info->{fields} ) )
		{
			my $field = EPrints::Utils::field_from_config_string( $ds, $fieldname );
			if( $field->is_type( "subject" ) )
			{
				my $topsubj = EPrints::Subject->new(
					$session,
					$field->get_property( "top" ) );
				my $i;
				foreach $i ( @{$topsubj->get_subjects( 0, 0, 1 )} )
				{
					my @kb = split( ":", $i->[0] );
					foreach( @kb )
					{
						$_ = EPrints::OpenArchives::encode_setspec( $_ );
					}
					my $key = join( ":", @kb );
					$v{$key} = $i->[1];
				}
			}
			else
			{
				my $v1 = $field->get_values( $session, $ds );
				my $delfield = $field->clone();
				#$delfield->set_dataset( $ds_del );
				my $v2 = $delfield->get_values( $session, $ds_del );
				foreach( @{$v1}, @{$v2} )
				{
					my $key = EPrints::OpenArchives::encode_setspec( $_ );
					if( !defined $key ) { $key=""; }
					$v{$key} = EPrints::Utils::tree_to_utf8( $field->get_value_label( $session, $_ ) );
				}
			}
		}
		unless( $info->{allow_null} ) { delete $v{""}; }
		foreach( keys %v ) 
		{	
			my $set = $session->make_element( "set" );
			$response->appendChild( $session->make_indent( 2 ) );
			$response->appendChild( $set );
			my $spec = EPrints::OpenArchives::encode_setspec( $info->{id}."=" ).$_;
			$set->appendChild( $session->render_data_element( 
				4,
				"setSpec",
				$spec ) );
			my $name = $session->get_view_name( $ds, $info->{id} )." = ".$v{$_};
			$set->appendChild( $session->render_data_element( 
				4,
				"setName",
				$name ) );
		}
	}

	send_response( $session, $response );

}


######################################################################
#
# send_http_error( $session, $code, $message )
#
#  Send an HTTP error as a response
#
######################################################################

sub send_http_error
{
	my( $session, $code, $message ) = @_;

	my $r = Apache->request;
	$r->content_type( 'text/html' );
	$r->status_line( "$code $message" );
	$r->send_http_header;
	my $title = "Error $code in OAI request";
	$r->print( <<END );
<html>
<head><title>$title</title></head>
<body>
  <h1>$title</h1>
  <p>$message</p>
</body>
END
}


sub make_basic_oai_response
{
	my( $session, $verb ) = @_;

	# Namespaces and Schemas
	my %verb_schemas =
	(
		"GetRecord" => "http://www.openarchives.org/OAI/1.1/OAI_GetRecord.xsd",
		"Identify"  => "http://www.openarchives.org/OAI/1.1/OAI_Identify.xsd",
		"ListIdentifiers" => "http://www.openarchives.org/OAI/1.1/OAI_ListIdentifiers.xsd",
		"ListMetadataFormats" => "http://www.openarchives.org/OAI/1.1/OAI_ListMetadataFormats.xsd",
		"ListRecords" => "http://www.openarchives.org/OAI/1.1/OAI_ListRecords.xsd",
		"ListSets" => "http://www.openarchives.org/OAI/1.1/OAI_ListSets.xsd"
	);
	my %verb_namespaces =
	(
		"GetRecord" => "http://www.openarchives.org/OAI/1.1/OAI_GetRecord",
		"Identify"  => "http://www.openarchives.org/OAI/1.1/OAI_Identify",
		"ListIdentifiers" => "http://www.openarchives.org/OAI/1.1/OAI_ListIdentifiers",
		"ListMetadataFormats" => "http://www.openarchives.org/OAI/1.1/OAI_ListMetadataFormats",
		"ListRecords" => "http://www.openarchives.org/OAI/1.1/OAI_ListRecords",
		"ListSets" => "http://www.openarchives.org/OAI/1.1/OAI_ListSets"
	);

	my $response = $session->make_element( 
			$verb,
			"xmlns"=>$verb_namespaces{$verb},
			"xmlns:xsi"=>"http://www.w3.org/2001/XMLSchema-instance",
			"xsi:schemaLocation"=>$verb_namespaces{$verb}." ".$verb_schemas{$verb} );

	my $responseDate = $session->make_element( "responseDate" );
	$responseDate->appendChild( $session->make_text( EPrints::OpenArchives::full_timestamp() ) );
	$response->appendChild( $session->make_indent( 2 ) );
	$response->appendChild( $responseDate );

	my $requestURL = $session->make_element( "requestURL" );
	my @bits = ();
	foreach( $session->param ) 
	{
		push @bits , "$_=".EPrints::Utils::url_escape( $session->param($_) );
	}
	my $url = $session->get_archive->get_conf( "base_url" );
	$url .= $session->get_uri;
	$url .= "?" . join( '&', @bits );

	$requestURL->appendChild( $session->make_text( $url ) );
	$response->appendChild( $session->make_indent( 2 ) );
	$response->appendChild( $requestURL );
	
	return $response;
}

sub send_response
{
	my( $session, $response )  = @_;

	my $content = "text/xml";
	if( $session->param( "debug" ) eq "yes" )
	{
		$content = "text/plain";
	}
	$session->send_http_header( content_type=>$content );

	#cjg Need proper XML <?xml line

	$response->appendChild( $session->make_text( "\n\n" ) );

	print <<END;
<?xml version="1.0" encoding="UTF-8" ?>

END
	print EPrints::XML::to_string( $response );

	EPrints::XML::dispose( $response );
}
	


