<?php
/*
Released through http://bibliophile.sourceforge.net under the GPL licence.
Do whatever you like with this -- some credit to the author(s) would be appreciated.

A collection of PHP classes to manipulate bibtex files.

If you make improvements, please consider contacting the administrators at bibliophile.sourceforge.net so that your improvements can be added to the release package.

Mark Grimshaw 2004/2005
http://bibliophile.sourceforge.net

28/04/2005 - Mark Grimshaw.
	Efficiency improvements.

11/02/2006 - Daniel Reidsma.
	Changes to preg_matching to account for Latex characters in names such as {\"{o}}

2012-08-28 - Hugo Jonker
   - some fixes to prefix handling to satisfy example #6
     (Vallee should not be parsed as a first name)
   - trimming of all fields outputted
   - return named keyed array instead of numerical keys:
     firstname => FIRSTNAME, initials => INITS, etc.

2013-05-16 - Hugo Jonker
   - Corrected preg matching for nested curly braces wrt. last name.

2014-05-24 - Hugo Jonker
   - found bug in parsing "R. {van der Meyden}".
     fixed and added related test cases.
*/

// For a quick command-line test (php -f PARSECREATORS.php) after installation, uncomment these lines:

/**********************************************
	$authors = "Mark \~N. Grimshaw and Bush III, G. W. & " .
		"H{\'a}mmer, Jr., M. C. and " .
		"von Frankenstein, Ferdinand Cecil, P.H. & " .
		"von Frankenstein, Ferdinand Cecil P.H. & " .
		"Charles Louis Xavier Joseph de la Vallee P{\\\"{o}}ussin" .
		" and F. C{a}ssez and R. {van {der} Meyden} " .
		" and van Oranje-Nassau, Queen of the Netherlands, Beatrix";
	$creator = new PARSECREATORS();
	$creatorArray = $creator->parse($authors);
	print_r($creatorArray);
***********************************************/

// Expected result according to 
//	http://maverick.inria.fr/~Xavier.Decoret/resources/xdkbibtex/bibtex_summary.html#names
// #  First		Inits	Surname				prefix
// 1. Mark		\~N	Grimshaw
// 2. 			G W	Bush III
// 3.			M C	H{\'a}mmer, Jr
// 4. 			P H	Frankenstein Ferdinand Cecil 	von
// 5. Fer.. Cecil	P H	Frankenstein 			von
// 6. Char..Joseph		P{\"{o}}ussin			de la Vallee
// 7.                   F       C{a}ssez
// 8.                   R       van {der} Meyden
// 9. Beatrix			Oranje-Nassau, Queen..		van

class PARSECREATORS
{
	function PARSECREATORS()
	{
	}

	/* Create writer arrays from bibtex input.
	'author field can be (delimiters between authors are 'and' or '&'):
	1. <first-tokens> <von-tokens> <last-tokens>
	2. <von-tokens> <last-tokens>, <first-tokens>
	3. <von-tokens> <last-tokens>, <jr-tokens>, <first-tokens>
	*/

	function parse($input)
	{
		$input = trim($input);

		// split into individual authors (by " and ", ' & ')
		$authorArray = preg_split("/\s(and|&)\s/i", $input);
		foreach($authorArray as $value)
		{
			// remove whitespace - the preg_match below is anchored to end of string
			$value = trim($value);
			$appellation = $prefix = $surname = $firstname = $initials = '';
			$this->prefix = array();
			$author = explode(",", preg_replace("/\s{2,}/", ' ', trim($value)));
			$size = sizeof($author);
// No commas therefore something like Mark Grimshaw, Mark Nicholas Grimshaw, M N Grimshaw, Mark N. Grimshaw
			if($size == 1)
			{
// Is complete surname enclosed in {...}, unless the string starts with a backslash (\) because then it is
// probably a special latex-sign.. 
// 2006.02.11 DR: in the last case, any NESTED curly braces should also be taken into account! so second 
// clause rules out things such as author="a{\"{o}}"
// 2013-05-16 HJ: be smarter about nested curly braces:
//     as the last word is the name, take the last word, do not allow curly braces inside
// 2014-05-24 HJ: be even smarter about nested curly braces:
//     allow matches {..} pairs inside the last match.
				//if (preg_match("/(.*)\s*{([^\\\][^}]*)}$/", $value, $matches) )
				if (preg_match("/(.*)\s*{([^\\\]([^{]*{[^}]*})*[^}]*)}$/", $value, $matches))

				{
					$author = split(" ", $matches[1]);
					$surname = $matches[2];
				} else {
					$author = split(" ", $value);
// last of array is surname (no prefix if entered correctly)
					$surname = array_pop($author);
				}
			}
// Something like Grimshaw, Mark or Grimshaw, Mark Nicholas  or Grimshaw, M N or Grimshaw, Mark N.
			else if($size == 2)
			{
// first of array is surname (perhaps with prefix)
				list($surname, $prefix) = $this->grabSurname(array_shift($author));
			}
// If $size is 3, we're looking at something like Bush, Jr. III, George W
			else
			{
// middle of array is 'Jr.', 'IV' etc.
				$appellation = join(' ', array_splice($author, 1, 1));
// first of array is surname (perhaps with prefix)
				list($surname, $prefix) = $this->grabSurname(array_shift($author));
			}
			$remainder = join(" ", $author);
			list($firstname, $initials) = $this->grabFirstnameInitials($remainder);
			if(!empty($this->prefix))
				$prefix = join(' ', $this->prefix);
			if(!empty($appellation))
				$surname = $surname . ',' . $appellation;
			$creators[] = array('firstname' => trim($firstname),
				'initials' => trim($initials),
				'surname' => trim($surname),
				'prefix' => trim($prefix));
		}
		if(isset($creators))
			return $creators;
		return FALSE;
	}

// grab firstname and initials which may be of form "A.B.C." or "A. B. C. " or " A B C " etc.
	function grabFirstnameInitials($remainder)
	{
		$firstname = $initials = '';
		$doingprefix = FALSE;
		$array = split(" ", $remainder);
		foreach($array as $value)
		{
			$firstChar = substr($value, 0, 1);
			if($doingprefix || (ord($firstChar) >= 97) && (ord($firstChar) <= 122)) {
				$doingprefix = TRUE;
				$this->prefix[] = $value;
			}
			else if(preg_match("/[a-zA-Z]{2,}/", trim($value)))
				$firstnameArray[] = trim($value);
			else
				$initialsArray[] = str_replace(".", " ", trim($value));
		}
		if(isset($initialsArray))
		{
			foreach($initialsArray as $initial)
				$initials .= ' ' . trim($initial);
		}
		if(isset($firstnameArray))
			$firstname = join(" ", $firstnameArray);
		return array($firstname, $initials);
	}

// surname may have title such as 'den', 'von', 'de la' etc. - characterised by first character lowercased.  Any 
// uppercased part means lowercased parts following are part of the surname (e.g. Van den Bussche)
	function grabSurname($input)
	{
		$surnameArray = split(" ", $input);
		$noPrefix = $surname = FALSE;
		foreach($surnameArray as $value)
		{
			$firstChar = substr($value, 0, 1);
			if(!$noPrefix && (ord($firstChar) >= 97) && (ord($firstChar) <= 122))
				$prefix[] = $value;
			else
			{
				$surname[] = $value;
				$noPrefix = TRUE;
			}
		}
		if($surname)
			$surname = join(" ", $surname);
		if(isset($prefix))
		{
			$prefix = join(" ", $prefix);
			return array($surname, $prefix);
		}
		return array($surname, FALSE);
	}
}
?>
