<?php

/******************************************************
  php-bibHTML - a bibtex to HTML conversion library.
		built upon the Bibliophile BibTex parsers v2.2,
		http://bibliophile.sourceforge.net/downloads.php

  (c) 2012-2013, Hugo Jonker (hugo.jonker@uni.lu)

  Main usage:
  - edit conf-php-bibHTML.php to point to the right bibtex file
  - call PHPBibHTML::bib_to_htmllist() to get an HTML-formatted string

  Auxiliary (useful) functions:
  - PHPBibHTML::latex_to_html_string(str)
  	converts a latex string to HTML (mostly replacing accented chars)
  - PHPBibHTML::missing_fields(array)
  	returns which required fields are missing from the input array
  - PHPBibHTML::fields_missing(file)
  	returns array with required fields missing from the input file


  Dependencies:
  - conf-php-bibHTML.php - configuration
  - biblink.php		 - to show sanitised bibtex entries
  - inc-bibparse.php
    inc-parseauthors.php
    			 - BibTex parser by Grimshaw et al.

  License/usage:
  This program is released under the terms of the GPL v3 license,
  a copy of which should be included with this distribution.

  This program is provided "AS IS", without any warranties of any kind,
  either expressed or implied.

******************************************************/

include_once('conf-php-bibHTML.php');	// configuration
include_once('inc-parsebib.php');	// bibtex file parser
include_once('inc-parseauthors.php');	// bibtex author format parser


// Limit scope of function names (namespaces only work from php 5.4 on)
class PHPBibHTML {

// fake constructor to avoid errors
function PHPBibHTML() {}

/** get_latex_initial
 * Get the initial of a latex string
 *
 * input: latex string
 * output: first letter of the LaTex string
 *
 * Examples (3 cases):
 * 1. X; 2. \"e; 3. \~{a} 
 */
function get_initial($str)  {
	$init = substr($str, 0, 1);

	// case 2 or 3?
	if ( $init == '\\' ) {
		// add next two chars
		$init .= substr($str, 1, 2);

		// case 3?
		if (substr($init, -1) == '{') {
			$init .= substr($str, 3,2);
		}
	}

	return $init;
}

/** canonalise_authors
 * Fix a bibtex author string for readability 
 *
 * input: a bibtex author string
 * output: an HTML author string.
 *
 * Example:
 * author = { Hugo L. Jonker and J. Pang & Naipeng von Dong}
 * output: H.L. Jonker, J. Pang and N. von Dong
 */
function canonalise_authors($str)  {
	$authparse = new PARSECREATORS;
	$autharray = $authparse->parse($str);

	$ret = '';

	foreach($autharray as $auth)  {
		 $inits = '';
		if( !empty($auth['firstname']) ) {
			$inits = PHPBibHTML::get_initial($auth['firstname']) . '.';
		}
		if( !empty($auth['initials']) )  {
			$inits .= str_replace(' ', '.', $auth['initials']);
			$inits .= '.';
		}
		$ret .= $inits . ' ' . 
			(empty($auth['prefix']) ? '' : $auth['prefix'] .  ' ') .
			$auth['surname'] . ' and ';
	}

	// chop off last " and "
	$ret = substr($ret, 0, -5);

	// Convert all-but-last " and " to commas
	$numfields = sizeof(explode(' and ', $ret)) - 1;
	if ($numfields > 1) {
		$arrstr = explode(' and ', $ret, $numfields);
		$ret = implode(', ', $arrstr);
	}

	return PHPBibHTML::latex_to_html_string($ret);
}

/** Latex_to_html_string
 *  converts a latex string to an HTML string using $mapfile
 * 
 *  input: a latex string
 *  output: an HTML string with accents and characters replaced
 * 
 *  example: "\'e" --> "&eacute;"
 * 
 *  imports tex to HTML mapping from $mapfile, format:
 * 	LINE ::= HTML_OPCODE TAB HTML_UNICODE TAB TEX_OPCODE
 * 
 *  ### NOTE: This is *NOT* perfect!
 *            This function does not do tokenization, and is fragile wrt.
 *            newcommands (e.g., \o will _always_ be substituted, since
 *            there exists at least one entry starting with \o in
 *            $mapfile.)
 * 
 *            Note: longer matches in $mapfile take priority -- a property
 *            of strtr().
 */
function latex_to_html_string($str)  {
	global $mapfile;
	static $texhtml = array();

	// only load file if needed
	if (count($texhtml) == 0)  {
		if (strpos($str, '\url{') !== FALSE)  {
			$str = preg_replace('/\\\\url{([^}]*)}/', '\1', $str);
		}

		// chop off last newline, replace others with tabs for explode
		$bigstr = file_get_contents($mapfile);
		$bigstr = substr(str_replace("\n", "\t", $bigstr), 0,
			strlen($bigstr) - 1);
		$bigarr = explode("\t", $bigstr);
		$bigarrsz = count($bigarr);

		// Array is now [i]: HTML; [i+1]: unicode; [i+2]: latex
		// Array is now [i]: latex; [i+1]: HTML; [i+2]: unicode
		// Parse that into sth convenient for strtr
		for($i = 0; $i < $bigarrsz; $i = $i + 3)  {
			$texhtml[$bigarr[$i]] = $bigarr[$i+1];
		}

		// remove latex grouping symbols
		$texhtml['{'] = '';
		$texhtml['}'] = '';
	}


	// Now return the strtr -- strtr favours larger matches, thus
	// allowing us to translate both \o and \omicron in one go
	return strtr($str, $texhtml);
}

/** l2h
 *  Identical to latex_to_html_string, but shorter to type :)
 */
function l2h($str)  {
	return PHPBibHTML::latex_to_html_string($str);
}

/** fields_missing:	FILE
 *  returns required fields missing in the bibtex file FILE
 * 
 *  input: a bibtex filename
 *  output: 
 *   - an array ($bibtexkey => array(missing fields)),
 *   - FALSE if no missing entries,
 *   - -1 if file could not be read.
 */
function fields_missing($file)  {
	$misfields = array();
	$ret = array();

	if (!is_readable($file))  {
		trigger_error("php-bibHTML: couldn't read bibtexfile.\n");
		$ret = -1;
	} else {
		// parse the file
		$errors = ini_get('error_reporting');
		error_reporting($errors ^ E_NOTICE);
		$parse = new PARSEENTRIES();
		$parse->expandMacro = TRUE;
		$parse->openBib($file);
		$parse->extractEntries();
		$parse->closeBib();
		list($preamble, $strings, $entries, $undefinedStrings) =
			$parse->returnArrays();
		error_reporting($errors);


		// check each of the entries for errors
		foreach($entries as $entry)  {
			$misfields = PHPBibHTML::missing_fields($entry);
			if (!empty($misfields))  {
				$ret[$entry['bibtexCitation']] = $misfields;
			}
		}

	}

	return (empty($ret) ? FALSE : $ret);
}

/** missing_fields:	ARRAY
 *  returns (an array of) required fields missing from the input bibtex array ARRAY
 * 
 *  input: an array of one single parsed bibtex entry, mapping fields to values
 *  	i.e. array( 'bibtexEntryType' => article, 'author' => 'me',  ...)
 *  output: an array of the missing fields, if any, or FALSE if none.
 */
function missing_fields($bibtex)  {
	$reqfields = array(
		'article' => array('author', 'title', 'journal', 'year'),
		'book' => array('author', 'title', 'publisher', 'year'),
		'booklet' => array('title'),
		'conference' => array('author', 'title', 'booktitle', 'year'),
		'inbook' => array('author', 'title', 'chapter', 'pages',
			'publisher', 'year'),
		'incollection' => array('author', 'title', 'booktitle',
			'year'),
		'inproceedings' => array('author', 'title', 'booktitle',
			'year'),
		'manual' => array('title'),
		'mastersthesis' => array('author', 'title', 'school', 'year'),
		'misc' => array(),
		'phdthesis' => array('author', 'title', 'school', 'year'),
		'proceedings' => array('title', 'year'),
		'techreport' => array('author', 'title', 'institution',
			'year'),
		'unpublished' => array('author', 'title', 'note')
	);

	if (!array_key_exists('bibtexEntryType', $bibtex) ||
		!array_key_exists($bibtex['bibtexEntryType'], $reqfields))  {
		trigger_error('php-bibHTML: missingfields(): cannot discern ' .
			'the bibtex-type of the argument; the argument\'s ' .
			'field "bibtexEntryType" missing or not recognised' .
			".\n");
	} else {
		$type = $bibtex['bibtexEntryType'];
		foreach($reqfields[$type] as $field)  {
			if(!array_key_exists($field, $bibtex))  {
				if ($field == 'author' &&
					($type=='book' || $type == 'inbook')
				   && array_key_exists('editor', $bibtex)
				)  {
					continue;
				}
				if ($type == 'inbook' && $field == 'chapter' 
				   && array_key_exists('pages', $bibtex))  {
					continue;
				}
				if ($type == 'inbook' && $field == 'pages' 
				   && array_key_exists('chapter', $bibtex))  {
					continue;
				}
				$ret[] = $field;
			}
		}
	}

	return (empty($ret) ? FALSE : $ret);
}
				
				
/** bib2html
 *  converts one (parsed) bibtex entry into an HTML string
 * 
 *  input: an array of one single parsed bibtex entry, mapping fields to values
 *  	i.e. array( 'author' => 'me', 'year' => 1900, ...)
 *  output: an HTML-formatted string for this entry
 */
function bib2html($bibtex)  {
	$ret = '';

	// Check for missing fields
	$misfields = PHPBibHTML::missing_fields($bibtex);
	if (!empty($misfields))  {
		trigger_error('php-bibHTML: missing required field(s) in entry ' .
			$bibtex['bibtexCitation'] . '. Missing fields: ' .
			implode(', ', $misfields) . ".\n");
	}

	// Set default values for template vars
	$address = (array_key_exists('address', $bibtex) ? 
			PHPBibHTML::l2h($bibtex['address']) : '' );
	$annote = (array_key_exists('annote', $bibtex) ? 
			PHPBibHTML::l2h($bibtex['annote']) : '' );
	$author = (array_key_exists('author', $bibtex) ? 
			PHPBibHTML::canonalise_authors($bibtex['author']) : '' );
	$bktitle = (array_key_exists('booktitle', $bibtex) ?
			PHPBibHTML::l2h($bibtex['booktitle']) : '' );
	$chap = (array_key_exists('chapter', $bibtex) ? 
			PHPBibHTML::l2h($bibtex['chapter']) : '' );
	$edition = (array_key_exists('edition', $bibtex) ? 
			PHPBibHTML::l2h($bibtex['edition']) : '' );
	$editor = (array_key_exists('editor', $bibtex) ? 
			PHPBibHTML::canonalise_authors($bibtex['editor']) : '' );
	$howpub = (array_key_exists('howpublished', $bibtex) ? 
			PHPBibHTML::l2h($bibtex['howpublished']) : '' );
	$inst = (array_key_exists('institution', $bibtex) ? 
			PHPBibHTML::l2h($bibtex['institution']) : '' );
	$journal = (array_key_exists('journal', $bibtex) ?
			PHPBibHTML::l2h($bibtex['journal']) : '');
	$key = (array_key_exists('key', $bibtex) ? 
			PHPBibHTML::l2h($bibtex['key']) : '' );
	$month = (array_key_exists('month', $bibtex) ? 
			PHPBibHTML::l2h($bibtex['month']) : '' );
	$note = (array_key_exists('note', $bibtex) ? 
			PHPBibHTML::l2h($bibtex['note']) : '' );
	$num = (array_key_exists('number', $bibtex) ?
			$bibtex['number'] : '');
	$org = (array_key_exists('organization', $bibtex) ? 
			PHPBibHTML::l2h($bibtex['organization']) : '' );
	$pg = (array_key_exists('pages', $bibtex) ? 
			PHPBibHTML::l2h($bibtex['pages']) : '' );
	$pub = (array_key_exists('publisher', $bibtex) ?
			PHPBibHTML::l2h($bibtex['publisher']) : '');
	$school = (array_key_exists('school', $bibtex) ? 
			PHPBibHTML::l2h($bibtex['school']) : '' );
	$sry = (array_key_exists('series', $bibtex) ?
			PHPBibHTML::l2h($bibtex['series']) : '');
	$title = (array_key_exists('title', $bibtex) ? 
			PHPBibHTML::l2h($bibtex['title']) : '');
	$type = (array_key_exists('type', $bibtex) ? 
			PHPBibHTML::l2h($bibtex['type']) : '' );
	$vol = (array_key_exists('volume', $bibtex) ?
			$bibtex['volume'] : '');
	$year = (array_key_exists('year', $bibtex) ?
			$bibtex['year'] : '');
	
	// Extra 's' in case of multiple editors
	$s = (strpos($editor, 'and') > 0 ? 's' : '');

	// Type of the bibtex entry
	$bibtype = $bibtex['bibtexEntryType'];

	switch($bibtype)  {
	case 'article':
	case 'conference':
	case 'inbook':
	case 'incollection':
	case 'inproceedings':
	case 'phdthesis':
	case 'book':
	case 'proceedings':
	case 'misc':
	case 'manual':
	case 'booklet':
	case 'mastersthesis':
	case 'techreport':
	case 'unpublished':
		include(TEMPLATEDIR . $bibtype . '.tmpl');

	}

	// Load template
	$ret = $phpbibHTML_bibentry_tmpl . "\n";

	// Clean up.
	// In some cases (empty bibtex fields, incorrect templates),
	// " ,", ". ," and ", ." may appear in output. output space can be
	// any form of white space, including newlines.
 	$ret = preg_replace("/\.(\s|\R)*,/", '.', $ret);
 	$ret = preg_replace("/,(\s|\R)*\./", '.', $ret);
 	$ret = preg_replace("/(\s|\R)*,/", ',', $ret);

	// add links to pdf / slides / bibtex / etc.
	// slides etc. are detected based on extra fields in the bib source
	$ret .= PHPBibHTML::extra_links_str($bibtex);

	return $ret;
}

/** extra_links_str
 *  returns a string containing "extra links" of a bibtex entry.
 *  names of these links determined by conf var $biblink
 * 
 *  Input:	one bibtex entry
 *  Output:	a string containing at least a link to the plain biblisting.
 *  Example:	[ <a href="biblink.php?id=ID">bib</a> ]
 * 
 *  Currently adds links for the following bibfields:
 * 	preprint, url, ee, doi, slides
 */
function extra_links_str($bibtex)  {
	global $biblink;	// how to label bibfields ee, url, etc.

	$ret = "\t\t\t" . '<span class="biblinks">';
	$ret .= '[&nbsp;<a href="' . BIBLINKFILE . '?id=' .
		$bibtex['bibtexCitation'] . '">bib</a> ';

	if(array_key_exists('preprint', $bibtex))  {
		$ret .= '| <a href="' . $bibtex['preprint'] .
			'">preprint</a> ';
	}
	if(array_key_exists('url', $bibtex))  {
		$ret .= '| <a href="' . $bibtex['url'] .
			'">' . $biblink['url'] . '</a> ';
	}
	if(array_key_exists('ee', $bibtex))  {
		$ret .= '| <a href="' . $bibtex['ee'] .
			'">' . $biblink['ee'] .'</a> ';
	}
	if(array_key_exists('doi', $bibtex))  {
		$ret .= '| <a href="http://dx.doi.org/' . $bibtex['doi'] .
			'">' . $biblink['doi'] . '</a> ';
	}
	if(array_key_exists('slides', $bibtex))  {
		$ret .= '| <a href="' . $bibtex['slides'] .
			'">slides</a> ';
	}
	$ret = substr($ret, 0, strlen($ret) - 1) . '&nbsp;';
	$ret .= "]</span>\n";

	return $ret;
}

/** reversetypes()
 *  return a mapping from bibtype to label
 *  basically swaps the configuration var $bibfile around
 */
function reversetypes()  {
	global $bibtypes;

	$ret = array();

	if (!is_array($bibtypes))  {
		trigger_error('php-bibHTML: reversetypes(): ' .
			"$bibtypes not an array.\n", E_USER_ERROR);
		return FALSE;
	}

	foreach($bibtypes as $label => $types)  {
		foreach($types as $type)  {
			if (array_key_exists($type, $ret))  {
				trigger_error('php-bibHTML: config error: ' .
					$type . ' assigned multiple labels ' . 
					'in $bibtypes.' . "\n",
					E_USER_ERROR);
				return FALSE;
			}
			$ret[$type] = $label;
		}
	}

	return $ret;
}


/** bib_to_htmllist( BIBfile, splitbytype )
 *  returns an HTML string of the entries in BIBfile, grouped by year
 *  	HTML formatting handled in templates from TEMPLATEDIR
 * 
 *  splitbytype: whether to make subgroups by type (articles,
 * 	proceedings, ...) or not.
 */
function bib_to_htmllist($bibfile = BIBLIOFILE, $splitbytype = SPLITBYTYPE)  {
	global $rvwkeys;
	global $bibtypes;

	$cssfile = CSSFILE;
	$ret = '';

	// test if bibfile is a file
	if (!is_readable($bibfile) || substr($bibfile, -4) != '.bib')  {
		return "php-bibHTML: bibfile not a valid, readable .bib file.\n";
	}

	// generate mapping from bibtex types to fields
	$revtypes = PHPBibHTML::reversetypes();

	// *** start parsing the file
	// Stop notices to prevent warnings on screen in case of
	// missing fields in bib
	$errors = ini_get('error_reporting');
	error_reporting($errors ^ E_NOTICE);

	// instantiate bibtex parser bibtex 
	$parse = new PARSEENTRIES();
        $parse->expandMacro = TRUE;

	// Parse bibtex file and load result into arrays
	$parse->openBib($bibfile);
	$parse->extractEntries();
	$parse->closeBib();
	list($preamble, $strings, $entries, $undefinedStrings) =
		$parse->returnArrays();

	// Restore error reporting
	error_reporting($errors);
	// *** parsing done

	// store reviewed entries in array reviewd, rest in misc	
	$reviewd = array(); $misc = array();
	foreach($entries as $entry)  {
		$tp = $entry['bibtexEntryType'];
		$label = $revtypes[$tp];
		$yr = (isset($entry['year']) ? $entry['year'] : 'other');
		if (in_array($tp, $rvwkeys))  {
			if ( $splitbytype )  {
				$reviewd[$yr][$label][] = $entry;
			} else {
				$reviewd[$yr][] = $entry;
			}
		} else {
			$misc[$yr][] = $entry;
		}
	}
	krsort($reviewd);	// sort by year, most recent first
	krsort($misc);		// sort by year, most recent first

	if ($splitbytype ) {
		foreach($reviewd as $year => $pubs) {
			// Set variables used in templates
			// (to avoid notices)
			$yearid = 'y' . $year;
			$type = '';
			$typeid = '';

			// load year template & append
			include(TEMPLATEDIR . 'php-bibHTML.tmpl');
			$ret .= "\n" . $phpbibHTML_year_start . "\n";

			foreach($bibtypes as $label => $types)  {
				if (!array_key_exists($label, $pubs)) {
					continue;
				}
				
				// set vars used in templates
				$type = $label;
				$typeid = str_replace(' ', '_', $type) . $year;

				// load & append type template
				include(TEMPLATEDIR . 'php-bibHTML.tmpl');
				$ret .= $phpbibHTML_type_start . "\n";
				
				foreach($pubs[$label] as $entry)  {
					$ret .= $phpbibHTML_entry_start . "\n";
					$ret .= PHPBibHTML::bib2html($entry);
					$ret .= $phpbibHTML_entry_end . "\n";
				}
				$ret .= $phpbibHTML_type_end . "\n";
			}
			$ret .= $phpbibHTML_year_end . "\n";
		}
	} else {
		$typeid = ''; $type = '';
		foreach($reviewd as $year => $pubs) {
			include(TEMPLATEDIR . 'php-bibHTML.tmpl');
			$ret .= "\n" . $phpbibHTML_year_start . "\n";
			foreach($pubs as $entry)  {
				$ret .= $phpbibHTML_entry_start . "\n";
				$ret .= PHPBibHTML::bib2html($entry);
				$ret .= $phpbibHTML_entry_end . "\n";
			}
			$ret .= $phpbibHTML_year_end . "\n";
		}
	}

	return $phpbibHTML_header . "\n" . $ret . "\n" .
		$phpbibHTML_footer . "\n";
}

function attribution()  {
	return '<p class="attribution">Generated using <a ' .
		'href="http://satoss.uni.lu/software/php-bibHTML/">' .
		'php-bibHTML</a> by <a href="http://satoss.uni.lu/hugo/">' .
		'Hugo Jonker</a>, based on the <a ' .
		'href="http://bibliophile.sourceforge.net/">BiblioPhile</a> ' .
		'PHP BibTex parser.</p>' . "\n\n";
}

} // end of PHPBibHTML class

?>
