=head1 NAME

EPrints::Plugin::Import::DOI

=cut

package EPrints::Plugin::Import::DOI;

use strict;

use EPrints::Plugin::Import::TextFile;
require URI;

our @ISA = qw/ EPrints::Plugin::Import::TextFile /;

sub new
{
	my( $class, %params ) = @_;

	my $self = $class->SUPER::new( %params );

	$self->{name} = "DOI (via CrossRef)";
	$self->{visible} = "all";
	$self->{produce} = [ 'dataobj/eprint', 'list/eprint' ];
	$self->{screen} = "Import::DOI";

	return $self;
}

sub screen
{
	my( $self, %params ) = @_;

	return $self->{repository}->plugin( "Screen::Import::DOI", %params );
}

sub input_text_fh
# cf. http://help.crossref.org/query-results
# cf. http://help.crossref.org/unixsd for metadata format
# cf. http://www.crossref.org/help/schema_doc/crossref_query_output3.0/query_output3.0.html
# cf. http://www.crossref.org/schemas/crossref_query_output3.0.xsd
# cf. http://www.crossref.org/help/schema_doc/unixref1.1/unixref1.1.html
# cf. http://doi.crossref.org/schemas/unixref1.1.xsd
# cf. https://jats.nlm.nih.gov/publishing/1.1/xsd.html
{
	my( $plugin, %opts ) = @_;

	my @ids;

	my $pid = $plugin->param( "pid" );
	my $session = $plugin->{repository};
	my $use_prefix = $plugin->param( "use_prefix" ) || 0;
	my $doi_field = $plugin->param( "doi_field" ) || 'id_number';

	unless( $pid )
	{
		$plugin->handler->message( "error", $plugin->html_phrase( "missing_pid" ));
		return undef;
	}

	my $fh = $opts{fh};
	while( my $doi = <$fh> ) # process all available DOIs (one per line)
	{
		$doi =~ s/^\s*([^\s]*)\s*$/$1/; # remove surrounding blancs
		next unless length($doi);

		# START check and exclude DOI from fetch if DOI already exists somewhere in the 'eprint' dataset - Alan Stiles, Open University, 20140408
# default
#		my $duplicates = $session->dataset( 'archive' )->search(
# modify?
		my $duplicates = $session->dataset( 'eprint' )->search(
						filters =>
						[
							{ meta_fields => [$doi_field], value => $doi, match => "EX" }
						]
		);
		if ( $duplicates->count() > 0 )
		{
			$plugin->handler->message( "warning", $plugin->html_phrase( "duplicate_doi",
				doi => $plugin->{session}->make_text( $doi ),
				msg => $duplicates->item( 0 )->render_citation_link(),
			));
			next;
		}
		# END check and exclude DOI from fetch if DOI already exists in the 'archive' dataset - Alan Stiles, Open University, 20140408

		# prepare request for metadata
# https://doi.crossref.org/servlet/query?pid=<USERNAME>:<PASSWORD>&id=10.1577/H02-043
# where:
#   pid is your Crossref-supplied login name and password or Crossref Query Services email address.
#   format is the desired results format ( xsd_xml | unixref | unixsd)
#   id is a Crossref DOI

# https://doi.crossref.org/search/doi?pid=<EMAIL or USERNAME:PASSWORD>&format=unixsd&doi=<DOI>
		my %params = (
			pid => $pid,
			noredirect => "true",
		  	format => "unixsd",
			id => $doi,
		);

		my $url = URI->new( "http://doi.crossref.org/openurl" );
		$url->query_form( %params );

		my $dom_doc;
		eval {
			$dom_doc = EPrints::XML::parse_url( $url );
		};

		# evaluate reply
		my $dom_top = $dom_doc->getDocumentElement if defined $dom_doc;
		my $dom_result = ($dom_top->getElementsByTagName( "query_result" ))[0] if defined $dom_top;

		# check for no reply
		if( $@ || !defined $dom_result)
		{
			$plugin->handler->message( "warning", $plugin->html_phrase( "no_reply",
				doi => $plugin->{session}->make_text( $doi )
			));
			next;
		}

		my $dom_body = ($dom_result->getElementsByTagName( "body" ))[0];

		my $dom_query = ($dom_body->getElementsByTagName( "query" ))[0];
		my $status = $dom_query->getAttribute( "status" );

		# check for bad reply
		if( defined($status) && $status ne "resolved" )
		{
			my $dom_msg = ($dom_query->getElementsByTagName( "msg" ))[0];
			my $msg = "";
			$msg = EPrints::Utils::tree_to_utf8( $dom_msg ) if defined $dom_msg;
			if( $status =~ /^system/ )
			{
				$plugin->handler->message( "warning", $plugin->html_phrase( "unresolved_doi",
					doi => $plugin->{session}->make_text( $doi ),
					msg => $plugin->{session}->make_text( $msg )
				));
				next;
			}
			else
			{
				$plugin->handler->message( "warning", $plugin->html_phrase( "invalid_doi",
					doi => $plugin->{session}->make_text( $doi ),
					msg => $plugin->{session}->make_text( $msg )
				));
				next;
			}
		}

		# Only include prefix if config parameter set - Alan Stiles, Open University, 20140408
		if ( $use_prefix )
		{
			$doi =~ s/^(doi:)?/doi:/i;
		}
		else
		{
			$doi =~ s/^(doi:)?//i;
		}
		my $data = { doi => $doi };

		# exploit reply, i.e. fill $data
		ExploitNode( $dom_body, $data );

		EPrints::XML::dispose( $dom_doc );

		if( defined $data->{"error"})
		{
			$plugin->handler->message( "error", $plugin->html_phrase( "error",
				doi => $plugin->{session}->make_text( $doi ),
				msg => $plugin->{session}->make_text( $data->{"error"} )
			));
			next;
		}
		my $epdata = $plugin->convert_input( $doi_field, $data );
		next unless( defined $epdata );

		my $dataobj = $plugin->epdata_to_dataobj( $opts{dataset}, $epdata );
		push @ids, $dataobj->get_id if defined $dataobj;
	}

	return EPrints::List->new(
		dataset => $opts{dataset},
		session => $plugin->{session},
		ids=>\@ids );
}

=head1 ExploitNode

exploits given root immediately, by some helping functions or recursively
returns found data

=cut
sub ExploitNode
{
	my( $root, $data ) = @_;

	my @skip = ("abstract","archive_locations","citation_list","component_list","crossmark","doi_data","identifier","item_number","noisbn","program","publisher_item","sa_component");
	my @descent = ("book","book_metadata","book_series_metadata","book_set_metadata","conference","conference_paper","content_item","crm-item","crossref","crossref_metadata","database","database_date","database_metadata","dataset","dissertation","doi_record","event_metadata","institution","journal","journal_article","journal_issue","journal_metadata","journal_volume","pages","proceedings_metadata","proceedings_series_metadata","publisher","query","report-paper","report-paper_metadata","report-paper_series_metadata","series_metadata","standard","standard_metadata","standards_body","standard_series_metadata","titles");

  	# Where to put <contract_number>{0,1}</contract_number>, <edition_number match="optional">{0,1}</edition_number> or <component_number match="optional">{0,1}</component_number>
	# and what about <xsd:element ref="coden" minOccurs="0"/>, <publication_type>{0,1}</publication_type>, <identifier id_type="">{0,unbounded}</identifier> or <component_list>{0,1}</component_list>
	Root: foreach my $node ( $root->getChildNodes )
	{
		next unless ( EPrints::XML::is_dom( $node, "Element" ) );

		my $name = $node->tagName;
		foreach my $not ( @skip )
		{
			next Root if $not eq $name;
		}
		foreach my $leaf ( @descent )
		{
			if( $name eq $leaf )
			{
				ExploitNode( $node, $data );
				next Root;
			}
		}
		my $AttT = undef;
		my $AttMT = undef;
		my $AttN = undef;
		$AttT = $node->getAttribute( "type" ) if $node->hasAttribute( "type" );
		$name .= ".".$AttT if defined $AttT;

		$AttMT = $node->getAttribute( "media_type" ) if $node->hasAttribute( "media_type" );
		$name .= ".".$AttMT if defined $AttMT;

		$AttN = $node->getAttribute( "name" ) if $node->hasAttribute( "name" );
		$name .= ".".$AttN if defined $AttN && ! $AttN =~ /^xref/;

		if( $name eq "contributors" )
		{
			SetContributors( $data, $node );
		}
		elsif( $name eq "person_name" )
		{
			$data->{"creators"} = SetCreator( $node );
		}
		elsif( $name ne "conference_date" && $name =~ /_date/ )
		{
			$data->{$name} = SetDate( $node );
		}
		else
		{
			$data->{$name} = EPrints::Utils::tree_to_utf8( $node ) unless defined($data->{$name});
		}
	}
}

=head1 SetDate

recognizes /^(start,end)?(day|month|year)/
returns string (format (dd-)?(mm-)?yyyy) of date (start/end -> "start - end")

=cut
sub SetDate
{
	my( $root ) = @_;

	my $date = {};
	foreach my $node ($root->childNodes)
	{
		next unless EPrints::XML::is_dom( $node, "Element" );

		my $name = $node->nodeName;
		$name =~ s/^start_//;
		$date->{$name} = EPrints::Utils::tree_to_utf8( $node );
	}
	my $EndDate = "";
	my $StartDate = "";
	$StartDate = $date->{"year"} if defined $date->{"year"};
	$StartDate = $date->{"month"}."-".$StartDate if defined $date->{"month"} && length ($StartDate);
	$StartDate = $date->{"day"}."-".$StartDate if defined $date->{"day"} && defined $date->{"month"} && length ($StartDate);
	$EndDate = $date->{"end_year"} if defined $date->{"end_year"};
	$EndDate = $date->{"end_month"}."-".$EndDate if defined $date->{"end_month"} && length ($EndDate);
	$EndDate = $date->{"end_day"}."-".$EndDate if defined $date->{"end_day"} && defined $date->{"end_month"} && length ($EndDate);

	if ( !length ($StartDate) || !length ($EndDate))
	{
		$StartDate = $StartDate . $EndDate
	}
	elsif ( length ($StartDate) && length ($EndDate))
	{
		$StartDate = $StartDate . " - " . $EndDate
	}
	return $StartDate;
}

=head1 GetDate

returns best value for given element name

=cut
sub GetDate
{
	my( $data, $name ) = @_;

	foreach my $type ( "", ".print", ".online", ".other" )
	{
		return $data->{$name.$type} if defined $data->{$name.$type}
	}
	return undef;
}

=head1 SetCreator

recognizes contributor's childNodes
returns map containig all subnames in eprint's format

=cut
sub SetCreator
{
	my( $contributor ) = @_;

	my $creator = {};
	$creator->{name} = {};
	foreach my $part ($contributor->childNodes)
	{
		next unless EPrints::XML::is_dom( $part, "Element" );

		if( $part->nodeName eq "given_name" )
		{
			$creator->{name}->{given} = EPrints::Utils::tree_to_utf8($part);
		}
		elsif( $part->nodeName eq "surname" )
		{
			$creator->{name}->{family} = EPrints::Utils::tree_to_utf8($part);
		}
		elsif( $part->nodeName eq "ORCID" )
		{
			$creator->{id} = EPrints::Utils::tree_to_utf8($part);
		}
	}
	return $creator;
}

=head1 SetContributors

recognizes all contributor types by considering contributor's role
returns maps of all distinguished contributor types

=cut
sub SetContributors
{
	my( $data, $node ) = @_;

	my @corp_creators;
	my @creators;
	my @editors;

	foreach my $contributor ($node->childNodes) # contributor, organization or person_name
	{
		next unless EPrints::XML::is_dom( $contributor, "Element" );

		my $creator = {};
		$creator->{name} = {};
		my $role = "author";
		if( $contributor->hasAttribute( "contributor_role" ) )
		{
			$role = $contributor->getAttribute( "contributor_role" );
		}
		if( $contributor->tagName eq "organization" )
		{
			push @corp_creators, EPrints::Utils::tree_to_utf8( $contributor );
			next;
		}
		$creator = SetCreator( $contributor );
		if( $role eq "editor" )
		{
			push @editors, $creator if exists $creator->{name}->{family};
		}
		else
		{
			push @creators, $creator if exists $creator->{name}->{family};
		}
	}
	$data->{"corp_creators"} = \@corp_creators if @corp_creators;
	$data->{"creators"} = \@creators if @creators;
	$data->{"editors"} = \@editors if @editors;
}

sub convert_input
{
	my( $plugin, $doi_field, $data ) = @_;

	my $epdata = {};
	my $PagePattern = qr/.*?(\d+)\D*$/;
	my $ISSNpattern = qr/^(\d{4})(\d{3}[\dX])$/;
	my $ISBNpatternO = qr/^(\d)(\d{5})(\d{3})([\dX])$/; # cf. https://www.isbn-international.org/sites/default/files/ISBN%20Manual%202012%20-corr.pdf
	my $ISBNpatternN = qr/^(\d{3})(\d)(\d{4})(\d{4})([\dX])$/;

	if( defined $data->{"creators"} )
	{
		$epdata->{creators} = $data->{"creators"};
	}
	elsif( defined $data->{"author"} )
	{
		$epdata->{creators} = [
			{
				name=>{ family=>$data->{"author"} },
			}
		];
	}
	if( defined $data->{"editors"} )
	{
		$epdata->{editors} = $data->{"editors"};
	}
	if( defined $data->{"corp_creators"} )
	{
		$epdata->{corp_creators} = $data->{"corp_creators"};
	}
	$epdata->{corp_creators} = $data->{"standards_body_name"} if defined($data->{"standards_body_name"});

	if( defined $data->{"isbn.electronic"} )
	{
		$epdata->{type} = "book";
		my $isbn = $data->{"isbn.electronic"};
		$isbn =~ s/$ISBNpatternN/$1-$2-$3-$4-$5/;
		$isbn =~ s/$ISBNpatternO/$1-$2-$3-$4/;
# modify?
#		$epdata->{isbn} = $isbn;
#		$epdata->{eisbn} = $isbn;
	}
	if( defined $data->{"isbn.print"} )
	{
		$epdata->{type} = "book";
		my $isbn = $data->{"isbn.print"};
		$isbn =~ s/$ISBNpatternN/$1-$2-$3-$4-$5/;
		$isbn =~ s/$ISBNpatternO/$1-$2-$3-$4/;
		$epdata->{isbn} = $isbn;
	}
	if( defined $data->{"issn.electronic"} )
	{
		my $issn = $data->{"issn.electronic"};
		$issn =~ s/$ISSNpattern/$1-$2/;
# modify?
#		$epdata->{issn} = $issn;
#		$epdata->{eissn} = $issn;
	}
	if( defined $data->{"issn.print"} )
	{
		my $issn = $data->{"issn.print"};
		$issn =~ s/$ISSNpattern/$1-$2/;
		$epdata->{issn} = $issn;
	}
	if( defined $data->{"doi"} )
	{
		my $doi = $data->{"doi"};
		$doi =~ s/^\s*doi:\s*//gi;
		$epdata->{$doi_field} = $doi;
	}
	if( defined $data->{"resource_t"} )
	{
		$epdata->{official_url} = $data->{"resource_t"};
	}

	if( defined $data->{"full_title"} || defined $data->{"journal_title"} || defined $data->{"journal-title"} )
	{
		$epdata->{type} = "article";
		$epdata->{publication} = $data->{"full_title"} if defined($data->{"full_title"});
		$epdata->{publication} = $data->{"journal-title"} if defined($data->{"journal-title"});
		$epdata->{publication} = $data->{"journal_title"} if defined($data->{"journal_title"});
	}

	if( defined $data->{"article_title"} )
	{
		$epdata->{type} = "article";
		$epdata->{title} = $data->{"article_title"};
	}

	if( defined $data->{"series_title"} )
	{
		$epdata->{type} = "book";
		$epdata->{note} = 'series title: ' . $data->{"series_title"};
	}

	if( defined $data->{"volume_title"} )
	{
		$epdata->{type} = "book";
		$epdata->{book_title} = $data->{"volume_title"};
	}

	if( defined $data->{"proceedings_title"} )
	{
		$epdata->{type} = "book_section";
		$epdata->{book_title} = $data->{"proceedings_title"};
	}

	$epdata->{date} = GetDate($data, "approval_date");
	$epdata->{thesis_type} = $data->{"degree"} if defined $data->{"degree"} && $data->{"degree"} =~ /^(diploma|masters|doctoral|postdoctoral|others)$/;

	if( defined $data->{"publisher"} )
	{
		$epdata->{publisher} = $data->{"publisher"};
	}
	elsif( defined $data->{"publisher_name"} )
	{
		$epdata->{publisher} = $data->{"publisher_name"};
	}
	elsif( defined $data->{"crm-item.publisher-name"} )
	{
		$epdata->{publisher} = $data->{"crm-item.publisher-name"};
	}
	$epdata->{place_of_pub} = $data->{"publisher_place"} if defined $data->{"publisher_place"};
#	$epdata->{place_of_pub} = $data->{"archive_locations"} if defined $data->{"archive_locations"};
	$epdata->{place_of_pub} = $data->{"institution_place"} if defined $data->{"institution_place"};

	$epdata->{abstract} = $data->{"description"} if defined $data->{"description"};
	$epdata->{keywords} = $data->{"proceedings_subject"} if defined $data->{"proceedings_subject"};

	$epdata->{volume} = $data->{"volume"} if defined $data->{"volume"};
	$epdata->{number} = $data->{"issue"} if defined $data->{"issue"};

	$epdata->{pagerange} = $data->{"first_page"}."-" if defined $data->{"first_page"};
	if( defined $data->{"last_page"} )
	{
		$epdata->{pagerange} = "-" unless defined $epdata->{pagerange};
		$epdata->{pagerange} .= $data->{"last_page"};
	}
	if( defined $data->{"first_page"} && defined $data->{"last_page"} )
	{
		my $first = $data->{"first_page"} =~ s/$PagePattern/$1/r if $data->{"first_page"} =~ /$PagePattern/;
		my $last  = $data->{"last_page"}  =~ s/$PagePattern/$1/r if $data->{"last_page"}  =~ /$PagePattern/;
		$epdata->{pages} = $last - $first + 1 if length($first) && length($last);
	}

	$epdata->{department} = $data->{"institution_department"} if defined $data->{"institution_department"};
	$epdata->{institution} = $data->{"institution_name"} if defined $data->{"institution_name"};

	$epdata->{event_title} = $data->{"conference_name"} if defined $data->{"conference_name"};
	$epdata->{event_location} = $data->{"conference_location"} if defined $data->{"conference_location"};
	$epdata->{event_dates} = $data->{"conference_date"} if defined $data->{"conference_date"};

	if( defined $data->{"year"} && $data->{"year"} =~ /^[0-9]{4}$/ )
	{
		$epdata->{date} = $data->{"year"}; # does 'media_type' matter?
		if( defined $data->{"month"} && $data->{"month"} =~ /^[0-9]{2}$/ )
		{
			$epdata->{date} .= '-' . $data->{"month"};
		}
	}

	my $date = GetDate($data, "publication_date");
	if( defined $date )
	{
		$epdata->{date_type} = "published";
		$epdata->{date} = $date; # deliberately overwriting approval_date
	}

	$date = GetDate($data, "creation_date");
	if( defined $date )
	{
		$epdata->{note} .= ", " if defined $epdata->{note};
		$epdata->{note} .= "creation date: ".$date;
	}

	if( !defined $epdata->{type} )
	{
		$epdata->{type} = "article" if defined $data->{"doi.journal_article"} ||
						defined $data->{"doi.journal_issue"} ||
						defined $data->{"doi.journal_title"} || defined $data->{"doi.journal_volume"};
		$epdata->{type} = "book" if defined $data->{"doi.book_content"} || defined $data->{"doi.book_series"} || defined $data->{"doi.book_title"};
		$epdata->{type} = "book" if defined $data->{"component_number"} || defined $data->{"edition_number"};
		$epdata->{type} = "conference_item" if defined $data->{"doi.conference_paper"} ||
						defined $data->{"doi.conference_series"} || defined $data->{"doi.conference_title"};
		$epdata->{type} = "dataset" if defined $data->{"doi.dataset"};
		$epdata->{type} = "monograph" if defined $data->{"doi.report-paper_content"} ||
						defined $data->{"doi.report-paper_series"} || defined $data->{"doi.report-paper_title"};
		$epdata->{type} = "thesis" if defined $data->{"doi.dissertation"};
	}
	if( defined $epdata->{type} && $epdata->{type} eq "book" )
	{
		$epdata->{book_title} = $data->{"title"} if defined($data->{"title"});
		$epdata->{book_title} .= ": " . $data->{"subtitle"} if defined($data->{"subtitle"});
	}
	else
	{
		$epdata->{title} = $data->{"title"} if defined($data->{"title"});
		$epdata->{title} .= ": " . $data->{"subtitle"} if defined($data->{"subtitle"});
	}

	return $epdata;
}

sub url_encode
{
	my ($str) = @_;
	$str =~ s/([^A-Za-z0-9])/sprintf("%%%02X", ord($1))/seg;
	return $str;
}

1;

=head1 COPYRIGHT

=for COPYRIGHT BEGIN

Copyright 2000-2011 University of Southampton.

=for COPYRIGHT END

=for LICENSE BEGIN

This file is part of EPrints L<http://www.eprints.org/>.

EPrints is free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

EPrints is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
License for more details.

You should have received a copy of the GNU Lesser General Public
License along with EPrints.  If not, see L<http://www.gnu.org/licenses/>.

=for LICENSE END