#!/usr/bin/perl -w -I/opt/eprints2/perl_lib

###############################################################
#
# This file is a contribution to EPrints 2.
#
# Copyright (c) 2004 Laszlo Csirmaz, 
# Central European University, Budapest, Hungary
# 
# This file is under GNU General Public License
#
###############################################################

=pod

=head1 NAME

B<phrases> - print out phrases used by EPrint

=head1 SYNOPSIS

B<phrases> I<archiveid> [B<-pin>] [B<-full>]

=head1 DESCRIPTION

This utility searches the EPrints perl library, utilities, cgi scripts, and
archive specific config files for B<phrase> and B<html_phrase> procedure
calls. Prints out calls which refer to variable phrases, or to a constant
phrase with no corresponding entry in the system or archive phrase file. It
also prints out phrases which are not referred directly. The ouput is
similar to this one:

 cgi/users/review:
 line=  96  cgi/users/review:checkout_info
  NOT FOUND for: /en/es/hu

 Not directly referenced phrases (A: archive, D: default)
 A: document_typename_ascii: en/es/hu
 A: document_typename_coverimage: en/es/hu
 D: cgi/advsearch:preamble: en/es/hu
 D: cgi/latest:day_0: en/es/hu

The only argument for B<phrases> should be the ID of the archive.

=head1 OPTIONS

=over

=item I<-pin>

With this argument only calls with pins are printed out 
together with the pin identifier such as

 cgi/users/staff/edit_subject:
 line=  39  "cgi/users/edit_subject:intro_blurb", continuelink

=item I<-full>

This forces a full list of all calls with all arguments. (For pins only the
pin identifier is shown, not the pin value.) 

=back

=head1 AUTHOR

Laszlo Csirmaz, Central European University, Budapest, Hungary

=head1 COPYRIGHT

Copyright (c) 2004 Laszlo Csirmaz, 
Central European University, Budapest, Hungary

This software is free; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

=cut

use strict;
use EPrints::Session;
use EPrints::ImportXML;

if( scalar @ARGV <1 )
{
    print "Usage: phrases <ARCHIVE> [-pin] [-full]

Prints all phrases found in <ARCHIVE>. 
If \"-pin\" is given, lists only phrases with pins.
With \"-full\" all phrase calls and arguments are listed. Use only one.
";
    exit 1;
}

my $pin=0; my $full=0;
$pin=1 if( scalar @ARGV > 1 && $ARGV[1] eq "-pin" );
$full=1 if( scalar @ARGV > 1 && $ARGV[1] eq "-full" );

my $noise = 0;
my $outerdir = `pwd`;
chomp ($outerdir);
my $session = new EPrints::Session( 1, $ARGV[0], $noise, 1 );
##              ( offline, database, noise, checkdb )

defined $session || die "Session not defined\n" ;

my $maindir = $EPrints::SystemSettings::conf->{base_path};

chdir $maindir;

my $archive=$session->{archive};

my $ar="archives/".$ARGV[0]."/cfg";

foreach my $dir ( "perl_lib", "bin", "cgi", $ar ){
    foreach my $file( `find $dir -type f` ){
       program($file);
    }
}

not_used() unless ($pin);

exit;

my $globalfile=""; my $globalfileprinted;

sub printfilename {
    return if($globalfileprinted);
    chomp $globalfile;
    print "\n$globalfile:\n";
    $globalfileprinted=1;
}

## go over a perl file:
sub program {
    my $f=shift;
    return if ($f =~ /~$/ );
    $globalfile=$f; $globalfileprinted=0;
##    print "\n$f\n";
    if( !open(F, "$outerdir/$f" ) ){
         print "Cannot open file $f\n"; return;
    }
    my $lineno=0;
    while(<F>){
       chomp; $lineno++;
       next if( /^\s*#/ );
       next if( /sub\s+phrase\s*($|{)/ );
       next if( /sub\s+html_phrase\s*($|{)/ );
       read_phrase( $1,\$lineno ) if( /\W\s*phrase\s*\((.*)$/ );
       read_phrase( $1,\$lineno ) if( /^\s*phrase\s*\((.*)$/ );
       read_phrase( "",\$lineno ) if( /\W\s*phrase\s*$/ );
       read_phrase( $1,\$lineno ) if( /\W\s*html_phrase\s*\((.*)$/ );
       read_phrase( $1,\$lineno ) if( /^\s*html_phrase\s*\((.*)$/ );
       read_phrase( "",\$lineno ) if( /\W\s*html_phrase\s*$/ );
    }
    close (F);
}

sub read_phrase {
    my ($arg,$lno) = @_;
    my $paren = -1;
    my $n=0; my @a; $a[$n]="";
    my $line=$$lno;
    ## check for balanced ()
    while ($paren<0) {
       if($arg eq "" ){
          return if(eof(F));
          $arg = <F>; $$lno++; chomp $arg;
       }
       $arg =~ s/^\s*//; #get rid of leading spaces
### print ">>> [$arg]\n";
       if( $arg =~ /^\"([^\"]*)\"/ ){ # starts with a quotation mark
           $a[$n] .= "\"$1\"";
           $arg =~ s/^\"[^\"]*\"//;
       } elsif( $arg =~ /^#/ ){
           $arg = "";
       } elsif( $arg =~ /^\(/ ){
           $paren--; $a[$n] .= "("; $arg =~ s/^.//;
       } elsif( $arg =~ /^,/ ){
           if($paren == -1 ){ $n++; $a[$n]=""; } else {$a[$n].=","; }
           $arg =~ s/^.//;
       } elsif( $arg =~ /^\)/ ){
           $paren++; if($paren == 0){ deal_phrase($line,$n,\@a); }
           $a[$n].= ")"; $arg =~ s/^.//;
       } else { ## skip until \",()
           $arg =~ /^([^\",\(\)]*)(.*)$/ ;
           $a[$n] .= $1; $arg = $2; 
###           print "[",$$lno,"] SPLITTING LINE as: [$1] [$2]\n";
       }
    }
}

sub deal_phrase {
    my ($line,$n,$a) = @_;
    if($pin){ #print out only those which have pins
        my $pins=0;
        for my $i (1..$n){
           $pins=1 if($a->[$i] =~ /^\s*[\w]+\s*=>/ );
        }
        return unless $pins;
	printfilename();
        printf "line=%4d  %s",$line,$a->[0];
        for my $i (1..$n){
           my $par=$a->[$i];
           if( $par =~ /^\s*([\w]+)\s*=>/ ){
               print ", ",$1;
           }
        }
        print "\n";
        return;
    }
    if($full){
       printfilename();
       printf "line=%4d  %s", $line,$a->[0];
       
       for my $i (1..$n){
          my $par=$a->[$i];
          if( $par =~ /^\s*([\w]+)\s*=>/ ){
              print ", ",$1,"=>";
          } else {
              print ", ",$par;
          }
       }
       print "\n";
       if($a->[0] =~ /^\"([^\"]+)\"$/ ){
          mark_used($1,$line,1);
       }
       return;
    }
    if($a->[0] =~ /^\"([^\"]+)\"$/ ){
       mark_used($1,$line,0);
    } else {
       printfilename();
       printf "line=%4d  %s\n", $line,$a->[0];
    }
}

sub mark_used {
    my ($phrase,$line,$printed) = @_;
    my $missinglang="";
    foreach my $langid ( keys %{$archive->{langs}} ){
      my $l=$archive->{langs}->{$langid};
      if( defined $l->{data}->{$phrase} ){
          $l->{data}->{$phrase} = "USED";
      } elsif ( defined $l->{archivedata}->{$phrase} ){
          $l->{archivedata}->{$phrase} = "USED";
      } else {
          $missinglang .= "/$langid";
      }
    }
    return if( $missinglang eq "");
    if(! $printed){
       printfilename();
       printf "line=%4d  %s\n",$line,$phrase;
    }
    print " NOT FOUND for: $missinglang\n";
}


sub not_used {
  my %hash=();
  foreach my $langid (keys %{$archive->{langs}} ){
    my $l=$archive->{langs}->{$langid};
    foreach my $phrase (keys %{$l->{data}} ){
        next if( $l->{data}->{$phrase} eq "USED" );
        my $item="D: $phrase";
        my $k= $hash{$item};
        $hash{$item}=$k."/$langid" if(defined $k);
        $hash{$item}="$langid" if(! defined $k);
    }
    foreach my $phrase (keys %{$l->{archivedata}} ){
        next if($l->{archivedata}->{$phrase} eq "USED" );
        my $item="A: $phrase";
        my $k= $hash{$item};
        $hash{$item}=$k."/$langid" if(defined $k);
        $hash{$item}="$langid" if(! defined $k);
    }
  }
  print "\nNot directly referenced phrases (A: archive, D: default)\n";
  foreach my $item (sort %hash){
     print $item,": ",$hash{$item},"\n" if(defined $hash{$item} );
  }
}

sub not_used_2 {
  foreach my $langid (keys %{$archive->{langs}} ){
    print "\nlangid: $langid\n";
    my @stack=();
    my $l=$archive->{langs}->{$langid};
    foreach my $phrase (keys %{$l->{data}} ){
        push @stack, $phrase if( $l->{data}->{$phrase} ne "USED" );
    }
    foreach my $item (sort @stack){
        print "D: $item\n";
    }
    @stack=();
    foreach my $phrase (keys %{$l->{archivedata}} ){
        push @stack,$phrase if($l->{archivedata}->{$phrase} ne "USED" );
    }
    foreach my $item (sort @stack){
        print "A: $item\n";
    }
  }
}


