#!/usr/local/bin/perl5.8.0 -w -I/opt/ep2stable/perl_lib 

######################################################################
#
#  This file is part of GNU EPrints 2.
#  
#  Copyright (c) 2000-2004 University of Southampton, UK. SO17 1BJ.
#  
#  EPrints 2 is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#  
#  EPrints 2 is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#  
#  You should have received a copy of the GNU General Public License
#  along with EPrints 2; if not, write to the Free Software
#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
######################################################################


=pod

=head1 NAME

B<indexer> - Indexing daemon for EPrints

=head1 SYNOPSIS

B<indexer> start [B<options>]

B<indexer> stop 

B<indexer> status

=head1 DESCRIPTION

This daemon runs in the background and creates index files for all eprints archives. It builds indexes for each archive in turn then starts again.

Messages and errors are logged to /opt/eprints2/var/indexer.log unless you change the log options. If it appears to be having problems try raising the log level and examining the log.

Once all the archives have been indexed, the indexer rolls the logs (up to logfile.5) and then starts again. See --rollcount for ways to customise this.

=over 8

=back

=head1 OPTIONS

=over 8

=item B<--help>

Print a brief help message and exit.

=item B<--man>

Print the full manual page and then exit.

=item B<--quiet>

Be vewwy vewwy quiet. This option will supress all output unless an error occurs.

=item B<--force>

Start up, even if the PID file exists (implying another copy is running). This
is useful for starting after a crash, but be carefully not to run to copies at
once as BAD THINGS will happen.

=item B<--verbose>

Explain in detail what is going on.
May be repeated for greater effect.

=item B<--logfile> I<filename>

Log to I<filename> rather than default indexer log.

=item B<--loglevel> I<level>

Set the level of detail to log. Level may be 0-5.

=over 8

=item 0

Do not log anything.

=item 1

Report start, stop and errors.

=item 2 [default]

All above, plus report names of archives being indexed and warnings.

=item 3

All above, plus report processes starting and stopping and each dataset being indexed.

=item 4

All above, plus report details of index process.

=item 5

All above, plus report id of every item indexed.

=back

=item B<--rollcount> I<number>

Set the number of once-through logs that should be kept. If set to zero then indexer will never roll the logs but rather just keep writing to the main log.

=item B<--notdaemon>

Do not become a daemon, remain attached to the current terminal (errors still go to the log file).

=item B<--once>

Only index each archive once rather than loop for ever.

=item B<--version>

Output version information and exit.

=back   

=head1 AUTHOR

This is part of this EPrints 2 system. EPrints 2 is developed by Christopher Gutteridge.

=head1 VERSION

EPrints Version: 2.3.7.99.4-beta

=head1 CONTACT

For more information goto B<http://www.eprints.org/> which give information on mailing lists and the like.

Chris Gutteridge may be contacted at B<support@eprints.org>

Should you need a real world address for some reason, EPrints can be contacted in the real world at

 EPrints c/o Christopher Gutteridge
 Department of Electronics and Computer Science
 University of Southampton
 SO17 1BJ
 United Kingdom

=head1 COPYRIGHT

This file is part of GNU EPrints 2.

Copyright (c) 2000-2004 University of Southampton, UK. SO17 1BJ.

EPrints 2 is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

EPrints 2 is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with EPrints 2; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

=cut

use EPrints::Session;
use EPrints::EPrint;
use EPrints::Config;
use EPrints::Index;

use strict;
use Getopt::Long;
use Pod::Usage;

my $version = 0;
my $verbose = 0;
my $quiet = 0;
my $help = 0;
my $man = 0;

my $force = 0;
my $logfile;
my $loglevel = 2;
my $rollcount = 5;
my $notdaemon = 0;
my $once = 0;

GetOptions( 
	'help|?' => \$help,
	'man' => \$man,
	'version' => \$version,
	'verbose+' => \$verbose,
	'silent' => \$quiet,
	'force' => \$force,
	'quiet' => \$quiet,
	'notdaemon' => \$notdaemon,
	'once' => \$once,
	'rollcount=s' => \$rollcount,
	'logfile=s' => \$logfile,
	'loglevel=s' => \$loglevel
) || pod2usage( 2 );
EPrints::Utils::cmd_version( "indexer" ) if $version;
pod2usage( 1 ) if $help;
pod2usage( -exitstatus => 0, -verbose => 2 ) if $man;
pod2usage( 2 ) if( scalar @ARGV != 1 );
pod2usage( 2 ) if( $ARGV[0] ne "start" && $ARGV[0] ne "stop" && $ARGV[0] ne "status" );

our $noise = 1;
$noise = 0 if( $quiet );
$noise = 1+$verbose if( $verbose );

my $p = {
	loglevel => $loglevel+0,
	rollcount => $rollcount+0,
	daemon => !$notdaemon,
	noise => $noise,
	once => $once,
	logfile => $logfile 
};

$p->{pidfile} = EPrints::Config::get("var_path")."/indexer.pid";
if( !defined $p->{logfile} ) 
{
	$p->{logfile} = EPrints::Config::get("var_path")."/indexer.log";
}

if( $ARGV[0] eq "status" )
{
	if( !-e $p->{pidfile} )
	{
		print "Indexer is not running\n";
		exit;
	}

	my $pid = get_pid( $p );
	print "Indexer appears to be running with PID $pid\n";
	exit;
}


if( $ARGV[0] eq "stop" )
{
	if( !-e $p->{pidfile} )
	{
		print <<END;
$p->{pidfile} does not appear to exist.

Maybe something bad happend? If indexer is still running you will have to
shut it down by hand.
END
		exit 1;
	}

	my $pid = get_pid( $p );

	if( !defined $pid )
	{	
		die "Could not find PID in $p->{pidfile}.  Weird. Better kill it by hand.";
	}
	
	# if "kill" is not in bin then this will cause trouble. 
	print "Sending TERM signal to $pid\n" if( $p->{noise} > 1 );
	kill 15, $pid;


	# give it 10 seconds
	my $counter = 10;
	for( 1..$counter )
	{
		if( !-e $p->{pidfile} )
		{
			print "...Killed $pid\n" if( $p->{noise} > 1 );
			exit 0;
		}
		sleep 1;
		print "tick\n" if( $p->{noise} > 2 );
	}

	print <<END;
pidfile did not disappear within $counter seconds, so something didn't 
work somewhere. Try killing process number $pid (if it exists) and 
then removing $p->{pidfile}
END

	exit;
}


#foreach my $arc_id ( EPrints::Config::get_archive_ids() )
#{
#
#	my $session  = new EPrints::Session( 1 , $arc_id , $noise );
#	if( !defined $session ) 
#	{
#		print STDERR "Error opening session: $arc_id\n";
#		exit( 1 );
#	}
#	$session->terminate;
#}

$0 = "EPrints Indexer for ".$EPrints::SystemSettings::conf->{base_path}.": Parent Process";
if( !$force && -e $p->{pidfile} )
{
	my $pid = get_pid( $p );
	print <<END;

EPrints indexer appears to be running with process ID $pid. 
It may have crashed. 

To check if the process is still running (on a linux system)
use:

ps auwwx | grep EPrints

Options to "ps" vary on other systems. You may also try:

ps -ef | grep EPrints

If indexer is not already running you may either:
 * delete the PID file: $p->{pidfile} 
 * run indexer with the --force option

END
	exit 1;
}

close STDERR;
if( $p->{loglevel} > 0 )
{
	open( STDERR, ">>$p->{logfile}" ) || die "Error opening $p->{logfile}: $!";
	select( STDERR );
	$| = 1;

	rolllogs( $p );
	indexlog();
	indexlog();
	indexlog( "**** Indexer starting..." );
}


if( $p->{daemon} )
{
	indexlog( "**** Becoming Daemon" );
	close STDIN;
	close STDOUT;
	exit if fork;
	exit if fork;
}



$SIG{TERM} = sub { 
	indexlog( "*** TERM signal received" ) if( $p->{loglevel} > 1 );
	stopindexer( $p );
};



open( PID, ">>$p->{pidfile}" ) || die "Can't open $p->{pidfile} for writing: $!";
print PID <<END;
# This file is automatically generated to indicate what process ID
# indexer is running as. If this file exists then indexer is assumed
# to be running.
END
print PID $$."\n";
print PID EPrints::Utils::get_timestamp()."\n";
close PID;

indexlog( "** Indexer control process started with process ID: $$" ) if( $p->{loglevel} > 2 );

while( 1 ) 
{
	indexlog() if( $p->{loglevel} > 0 );
	indexlog( "*** Starting indexing" ) if( $p->{loglevel} > 1 );
	my @arc_ids = EPrints::Config::get_archive_ids();
	foreach my $arc_id ( sort @arc_ids )
	{

		# nb. using load_archive_config not get_archive_config. This
		# reloads the XML file each time round.
        	my $archive_config = EPrints::Config::load_archive_config( $arc_id );

		# Don't index things which don't want to be index
		if( !$archive_config->{index} )
		{
			# sleep for a little while so we don't hammer the machine
			# if there are lots of non-indexing archives, or even
			# no indexing archives for a while.
			indexlog( "*** Will not index '$arc_id': indexing disabled by config" ) if( $p->{loglevel} > 1 );
			sleep 60;
			next;
		}

		indexlog( "*** Staring index of archive '$arc_id'" ) if( $p->{loglevel} > 1 );

		$p->{kid} = fork();
		if( $p->{kid} == 0 )
		{
			indexlog( "** Worker process started: $$" ) if( $p->{loglevel} > 2 );
			my $cmd = "/usr/bin/renice 8 $$";	
			`$cmd`;
			my $index;
			my $session;
			$SIG{TERM} = sub { 
				indexlog( "** Worker process terminated: $$" ) if( $p->{loglevel} > 2 );
				$index->cleanup if( defined $index );
				$session->terminate if( defined $session );
				exit;
			};
			$session = new EPrints::Session( 1 , $arc_id , $p->{noise} );

			my @ds_ids = &EPrints::DataSet::get_sql_dataset_ids;
			foreach my $ds_id ( sort @ds_ids )
			{
				indexlog( "** Indexing dataset '$ds_id' of '$arc_id'" ) if( $p->{loglevel} > 2 );
				$0 = "EPrints Indexer for ".$EPrints::SystemSettings::conf->{base_path}.": $arc_id/$ds_id";
				if( !defined $session ) 
				{
					indexlog( "**** Error starting eprints session on '$arc_id'" ) if( $p->{loglevel} > 0 );
					exit( 1 );
				}
				my $ilog;
				my $elog;
				if( $p->{loglevel} > 1 )
				{
					$elog = eval 'sub { my( $txt ) = @_; indexlog( "*** ".$txt ); }';
				}
				else
				{
					$elog = sub {;};
				}

				if( $p->{loglevel} > 4 )
				{
					$ilog = \&indexlog;
				}
				
				$index = new EPrints::Index( 
						$session, 
						$session->get_archive->get_dataset( $ds_id ),
						$ilog,
						$elog );
				$index->change_pname( 1 );
				indexlog( "* Creating temporary index tables for '$ds_id' of '$arc_id'" ) if( $p->{loglevel} > 3 );
				$index->create;
				indexlog( "* Creating index for '$ds_id' of '$arc_id'" ) if( $p->{loglevel} > 3 );
				$index->build;
				indexlog( "* Installing index for '$ds_id' of '$arc_id'" ) if( $p->{loglevel} > 3 );
				$index->install;
				indexlog( "* Cleaning up after '$ds_id' of '$arc_id'" ) if( $p->{loglevel} > 3 );
				$index->cleanup;
				undef $index;
				indexlog( "* Done indexing dataset '$ds_id' of '$arc_id'" ) if( $p->{loglevel} > 3 );
			}


			indexlog( "** Sub process exiting: $$" ) if( $p->{loglevel} > 2 );
			$session->terminate;
			exit;
		}
		wait; 
		undef $p->{kid};

		indexlog( "*** Done indexing archive '$arc_id'" ) if( $p->{loglevel} > 1 );
		sleep 5;
	}
	indexlog( "*** Done indexing all archives" ) if( $p->{loglevel} > 1 );
	indexlog() if( $p->{loglevel} > 0 );

	stopindexer($p) if( $p->{once} );

	rolllogs( $p );
}


####################################################################################

sub rolllogs
{
	my( $p ) = @_;

	return if( $p->{loglevel} <= 0 );
	return if( $p->{rollcount} <= 0 );

	indexlog( "** End of log. Closing and rolling." ) if( $p->{loglevel} > 2 );
	for( my $n = $p->{rollcount}; $n > 0; --$n )
	{
		my $src = $p->{logfile};	
		if( $n > 1 ) { $src.='.'.($n-1); }
		next unless( -f $src );
		my $tgt = $p->{logfile}.'.'.$n;
		rename( $src, $tgt ) || warn "Error renaming: $!";
	}
	close STDERR;
	open( STDERR, ">>$p->{logfile}" ) || warn "Error opening: $p->{logfile}: $!";
	select( STDERR );
	$| = 1;
}


sub get_pid
{
	my( $p ) = @_;

	print "Reading $p->{pidfile}\n" if( $p->{noise} > 1 );
	open( PID, $p->{pidfile} ) || die( "Could not open $p->{pidfile}: $!" );
	my $pid;
	while( <PID> )
	{
		chomp;
		if( m/^\d+$/ )
		{
			$pid = $_;
			last;
		}
	}
	close PID;

	return $pid;
}

sub indexlog
{
	my( $txt ) = @_;

	if( !defined $txt )
	{
		print STDERR "\n";
		return;
	}

	print STDERR "[".localtime()."] ".$txt."\n";
}

sub stopindexer
{
	my( $p ) = @_;

	indexlog( "** Control process $$ stopping..." ) if( $p->{loglevel} > 2 );
	indexlog( "* Unlinking $p->{pidfile}" ) if( $p->{loglevel} > 3 );
	unlink( $p->{pidfile} ) || die( "Can't unlink $p->{pidfile}" );
	if( defined $p->{kid} )
	{
		indexlog( "* Sending TERM signal to worker process: $p->{kid}" ) if( $p->{loglevel} > 2 );
		kill 15, $p->{kid};
	}

	indexlog( "** Control process $$ stopped", 1 ) if( $p->{loglevel} > 2 );
	indexlog( "**** Indexer stopped" ) if( $p->{loglevel} > 0 );
	indexlog() if( $p->{loglevel} > 0 );
	exit;
}
