#!/usr/bin/perl -w -I/opt/eprints/perl_lib

######################################################################
#
#  This file is part of GNU EPrints 2.
#  
#  Copyright (c) 2000-2004 University of Southampton, UK. SO17 1BJ.
#  
#  EPrints 2 is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#  
#  EPrints 2 is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#  
#  You should have received a copy of the GNU General Public License
#  along with EPrints 2; if not, write to the Free Software
#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
######################################################################

=pod

=head1 NAME

B<export_hashes> - export the hash values for all documents in an repository.

=head1 SYNOPSIS

B<export_hashes> I<repository_id> [B<options>] [I<filename>]

=head1 DESCRIPTION

Every time a document in eprints is modified a new .xsh file is generated
containing a hash of each file. This script creates a hash of each of these
.xsh files and create a super .xsh file containing each of those hashes.

If no filename is given this script outputs to standard out.

The XML file produced may then be archived safely. You can then create an MD5 of that file and do something to prove you had it on the date you created it.

For example, publish it in an small-ad in a national paper.

Now you can prove you had that MD5 on that day, which proves you had the file output by export_hashes that day. The MD5's in that file prove that you had the .xsh file of a given document. Those files should prove that you had a given file.

This all assumes that nobody works out a way to do MD5's in reverse. And there's no legal precident yet. 

That's why this is an experimental feature.

=head1 ARGUMENTS

=over 8

=item I<repository_id> 

The ID of the EPrint repository to use.

=item I<filename>

A filename to write to. If omited this will write to stdout.

=back

=head1 OPTIONS

=over 8

=item B<--help>

Print a brief help message and exit.

=item B<--man>

Print the full manual page and then exit.

=item B<--quiet>

Be vewwy vewwy quiet. This option will supress all output unless an error occurs.

=item B<--verbose>

Explain in detail what is going on.
May be repeated for greater effect.

=item B<--version>

Output version information and exit.

=item B<--all>

Output hashes of ALL .xsh files for each document, not just the most recent. This takes longer but should be logged periodically.

=back   


=head1 AUTHOR

This is part of this EPrints 2 system. EPrints 2 is developed by Christopher Gutteridge.

=head1 VERSION

EPrints Version: eprints-2-cvs-2006-06-27

=head1 CONTACT

For more information goto B<http://www.eprints.org/> which give information on mailing lists and the like.

Chris Gutteridge may be contacted at B<support@eprints.org>

Should you need a real world address for some reason, EPrints can be contacted in the real world at

 EPrints c/o Christopher Gutteridge
 Department of Electronics and Computer Science
 University of Southampton
 SO17 1BJ
 United Kingdom

=head1 COPYRIGHT

This file is part of GNU EPrints 2.

Copyright (c) 2000-2004 University of Southampton, UK. SO17 1BJ.

EPrints 2 is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

EPrints 2 is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with EPrints 2; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


=cut


use Getopt::Long;
use Pod::Usage;
use strict;

use EPrints;

my $version = 0;
my $verbose = 0;
my $quiet = 0;
my $all = 0;
my $purge = 1;
my $help = 0;
my $man = 0;

GetOptions( 
	'help|?' => \$help,
	'man' => \$man,
	'all' => \$all , 
	'version' => \$version,
	'verbose+' => \$verbose,
	'silent' => \$quiet,
	'quiet' => \$quiet
) || pod2usage( 2 );
EPrints::Utils::cmd_version( "export_hashes" ) if $version;
pod2usage( 1 ) if $help;
pod2usage( -exitstatus => 0, -verbose => 2 ) if $man;
pod2usage( 2 ) if( scalar @ARGV != 2 && @ARGV != 1 );

my $noise = 1;
$noise = 0 if( $quiet );
$noise = 1+$verbose if( $verbose );

# Set STDOUT to auto flush (without needing a \n)
$|=1;

my $session = new EPrints::Session( 1, $ARGV[0], $noise );
exit( 1 ) unless defined $session;

my $ds = $session->get_repository->get_dataset( "document" );
my $info = { files=>[], all=>$all };
$ds->map( $session, \&show_hash, $info );

my $filename;
if( EPrints::Utils::is_set( $ARGV[1] ) )
{
	$filename = $ARGV[1];
}

EPrints::Probity::create_log(
		$session,
		$info->{files}, 
		$filename );

$session->terminate();
exit;

sub show_hash
{
	my( $session, $dataset, $doc, $info ) = @_;

	my $id = $doc->get_id;
	my $eprint = $doc->get_eprint;
	if( !defined $eprint )
	{
		$session->get_repository->log( "No eprint for document: $id" );
		return;
	}
	my $path = $eprint->local_path;

 	opendir CDIR, $path or return;
	my @filesread = readdir CDIR;
	closedir CDIR;

	my $latest;

	my $file;
	foreach( sort @filesread )
	{
		if( $_ =~ m/^$id\.(.*).xsh$/ )
		{
			my $filename = $path."/".$_;
			if( $info->{all} )
			{
				push @{$info->{files}}, $filename;
			}
			$latest = $filename;
		}
	}

	unless( $info->{all} )
	{
		push @{$info->{files}}, $latest;
	}

}
