#!/usr/bin/perl -w

# export of the citation in BibTex-Format 
# This script is a contribution to EPrints 2.
# This script is under GNU General Public License
# Copyright (c) 2005
# M. Belling 2005-06-23
#
# Michael Belling		
# University of Cologne	        
# Library for Computer Science and Business Computing
# Pohligstr. 1
# D-50969 Koeln       
# mbelling@informatik.uni-koeln.DE

use File::Find;
use HTML::LinkExtor;
use FileHandle;
use DBI;

# Connect database
$dbh = DBI->connect("DBI:mysql:DATABASE:HOST-NAME:PORT-NUMBER",'ACCOUNT-NAME' ,"PASSWORD", ) or die "Can not connect: $DBI::errstr";

# Hash eprinttyp => bibtextyp
%bibtextype = (
	       book => "book",
	       bookchapter => "inbook",
	       proceedings => "proceedings",
	       confpaper => "inproceedings",
	       confposter => "inproceedings",
	       incollection => "incollection",
	       techreport => "techreport",
	       journale => "article",
	       journalp => "article",
	       newsarticle => "article",
	       other => "misc",
	       preprint => "techreport",
	       thesis => "phdthesis"
	       );
# new Parser-Object
find(\&analyse, 'ARCHIVE-ROOT/html/en/view/bibtex/');

# Reading new file
foreach (@file_array) {
    open (BIBTEX, "ARCHIVE-ROOT/html/en/view/bibtex/$_")|| die "Can not open file: $!\n";
    
# Citation-array
    while (<BIBTEX>) {
	(/Citation-begin (.+) Citation-end/) && push (@citations, $1);
    }
}

foreach (@citations) {	
    
# Parsing the eprints-id
    (/ARCHIVE-URL\/archive\/0+(\d+)\/\"/) &&  ($id = $1);
    
# SQL-statements to select the eprint-typ
    $fh1 = new FileHandle "> ARCHIVE-ROOT/html/en/data/$id.bibtex";
    $sql1 = "select type from archive where eprintid = '$id'";
    $sth1 = $dbh->prepare("$sql1");
    $sth1->execute();
    $result = $sth1->fetchrow_array();
    $sth1->finish();
    
# Parse the citations
    (/Aut-begin (.+) Aut-end/) && push (@dataset, $1);
    (/Year-begin (.+) Year-end/) && push (@dataset, $1);
    (/Title-begin (.+) Title-end/) && push (@dataset, $1);
    (/Editor-begin (.+) Editor-end/) && push (@dataset, $1);
    (/Volume-begin (.+) Volume-end/) && push (@dataset, $1);
    (/Pages-begin (.+) Pages-end/) && push (@dataset, $1);
    (/Conference-begin (.+) Conference-end/) && push (@dataset, $1);
    (/Chapter-begin (.+) Chapter-end/) && push (@dataset, $1);
    (/Publisher-begin (.+) Publisher-end/) && push (@dataset, $1);
    (/Publication-begin (.+) Publication-end/) && push (@dataset, $1);
    (/Institution-begin (.+) Institution-end/) && push (@dataset, $1);
    (/Number-begin (.+) Number-end/) && push (@dataset, $1);
    
    
# Create bibtex-entry based on the citations on @dataset
    $fh1->print ("\@$bibtextype{$result} {GDEA-$id, \n");
    foreach (@dataset) {
	unicode_latex ();	
	$fh1->print ("$_, \n");
    }
    $fh1->print ("} \n");
    $fh1->close;
    undef @dataset;
}

########################################
# Subroutines
########################################

# Creating a array with all citations
sub analyse {
    $file = "$_";
    push (@file_array, $file);
}


# replace Unicode with Latex
# only our important characters
sub unicode_latex {
    s/&#xC4;/\\"A/g;
    s/&#xC0;/\\`A/g;
    s/&#xC1;/\\'A/g;
    s/&#xE4;/\\"a/g;
    s/&#xE0;/\\`a/g;
    s/&#xE1;/\\'a/g;
    s/&#xC8;/\\`E/g;
    s/&#xC9;/\\'E/g;
    s/&#xE9;/\\'e/g;
    s/&#xE8;/\\`e/g;
    s/&#xD6;/\\"O/g;
    s/&#xD2;/\\`O/g;
    s/&#xD3;/\\'O/g;
    s/&#xF6;/\\"o/g;
    s/&#xF2;/\\`o/g;
    s/&#xF3;/\\'o/g;
    s/&#xDC;/\\"U/g;
    s/&#xD9;/\\`U/g;
    s/&#xDA;/\\'U/g;
    s/&#xFC;/\\"u/g;
    s/&#xF9;/\\`u/g;
    s/&#xFA;/\\'u/g;
    s/&#xDF;/\\ss/g;
}
