Location: PHPKode > scripts > preg_magic > preg_magic/preg_magic.php
<?php
/**
 * preg_magic
 *
 * Class/Function to assist with parsing great piles of garbage (ie: HTML) into nice
 * hashes/associative arrays with a few regular expressions.
 *
 * <b>License</b>
 * <pre>
 * This library is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 2 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 * </pre>
 * 
 * @see preg_magic()
 * @version 1.0
 * @author Shannon Wynter ({@link http://fremnet.net/contact http://fremnet.net/contact})
 * @copyright Copyright Shannon Wynter & Fremnet.net
 * @license http://opensource.org/licenses/gpl-license.php GNU Public License
 * @package preg_magic
 */
/**
 * preg_magic
 *
 * Alias for {@link preg_magic::execute() preg_magic::execute()}
 *
 * <b>Basic synapsis:</b>
 *
 * {@link preg_magic() preg_magic}($Fields, $Target)
 *
 * $Fields is an associative array that contains the structure you expect to extract from the file.
 * It is very important to understand that you must supply the fields in the <i>order</i> that you expect them to
 * be matched.
 *
 * There are currently two types of fields
 *	- simple
 *	- table
 *
 * A simple field is simply defined as 'name' => 'expression to match'
 *
 * A table field is a whole lot more complicated but it's also very flexable, and is defined as follows
 * <pre>
 *	'@table'       => true,
 *	'@table_start' => 'the expression to match at the start of the table',
 *	'@table_end'   => 'the expression to match at the end of the table',
 *	'@table_fields => array(of more fields),
 * </pre>
 *
 * Both @table_start and @table_end will be ignored if this is the root element of the array
 *
 * So an example structure might look like
 * {@example example/fields_example.php}
 * This example shows two simple fields, and a table
 *
 * The structure can be recursive, ie, you can have more table and simple fields beneith the table field
 *
 * A full example might look like this
 * {@example example/full_example.php}
 *
 * @param array $Fields An associative array of fields to be use
 * @param string $Target The target string to process
 * @return array|false An associative array containing data, or false on failure to match
 */
function preg_magic($Fields, $Target) {
	// Hand over to the class
	return preg_magic::execute($Fields, $Target);
}

/**
 * preg_magic
 *
 * Class of static functions containing preg_magic
 *
 * You should call the {@link preg_magic() preg_magic()} function to do all your hard work as
 * the class is pretty much just used as a namespace
 *
 * @see preg_magic()
 * @version 1.0
 * @author Shannon Wynter <http://fremnet.net/contact>
 * @copyright Copyright Shannon Wynter & Fremnet.net
 * @license http://opensource.org/licenses/gpl-license.php GNU Public License
 * @package preg_magic
 */
class preg_magic {

	/**
     * execute
	 *
	 * Performs the preg_match.
	 *
	 * @see preg_magic()
	 * @access public
	 * @param array $Fields An associative array of fields to be use
	 * @param string $Target The target string to process
	 * @return array|false An associative array containing data, or false on failure to match
	 */
	static public function execute($Fields, $Target) {
		// Check to see if we're working on a table
		$Table = (isset($Fields['@table']) && $Fields['@table']);

		// Get the expression to run
		$Expr = self::build_expr(($Table ? $Fields['@table_fields'] : $Fields));

		// Sort out just what fields we are using
		$ProcessFields = ($Table ? $Fields['@table_fields'] : $Fields);

		$Result = array();
		if ($Table) {
			// Parsing a table entry
			if (!preg_match_all("/$Expr/s", $Target, $Match, PREG_SET_ORDER))
				return false;

			// Loop through each row to perform association
			foreach ($Match as $Row) {
				$AssocRow = self::associate($ProcessFields, $Row);
				if ($AssocRow === false)
					return false;
				// Append table record
				$Result[] = $AssocRow;
			}
		} else {
			// Flat entry
			if (!preg_match("/$Expr/s", $Target, $Match))
				return false;
			$Result = self::associate($ProcessFields, $Match);
		}

		return $Result;
	}

	/**
	 * associate
	 *
	 * Performs a similar duty to {@link http://www.php.net/array_combine array_combine}
	 * with the exception that this is capable of checking for tables so recursive field
	 * definitions can function.
     *
	 * @access protected
	 * @param array $Fields The fields passed to {@link preg_magic::execute() execute()}
	 * @param array $Data The array of matches from {@link http://www.php.net/preg_match preg_match}
	 * @return array|false An associative array of $Fields to $Data or false on failure
	 */
	static protected function associate($Fields, $Data) {
		// First entry is the 'raw match', turf it we're not using it
		array_shift($Data);

		$Result = array();

		$Keys = array_keys($Fields);

		for ($i = 0; $i < count($Keys); $i++) {
			// Does this key specify a table?
			if (is_array($Fields[$Keys[$i]]) && isset($Fields[$Keys[$i]]['@table']) && $Fields[$Keys[$i]]['@table']) {
				// Re-run preg_magic on just the data for this key
				$Result[$Keys[$i]] = self::execute($Fields[$Keys[$i]], $Data[$i]);
				if ($Result[$Keys[$i]] === false)
					return false;
			} else {
				// Associate this data with this key
				$Result[$Keys[$i]] = $Data[$i];
			}
		}
		return $Result;
	}

	/**
	 * build_expr
	 *
	 * Builds as much of the expression as required to make the closest match possible
     * based on the $Fields provided to {@link preg_magic::execute() execute()}.
	 *
	 * Automatically escapes submatches if there is recursion
	 *
	 * @access protected
	 * @param array $Fields The fields passed to {@link preg_magic::execute() execute()}
	 * @param int $Depth Reserved - Do not use this unless you know what you are doing
	 * @return string The combined regular expression
	 */
	static protected function build_expr($Fields, $Depth = 0) {
		$Expr = '';
		foreach ($Fields as $Name => $Value) {
			$AddToExpr = '';
			if (is_array($Value)) {
				// Original provisions allow for more types then just table, the other two functions will probably need updating if other types are added
				if ($Value['@table']) {
					$AddToExpr = $Value['@table_start'] . ($Depth == 0 ? '(' : '');
					$AddToExpr .=  '(?:' . self::build_expr($Value['@table_fields'], $Depth + 1) .')+?';
					$AddToExpr .= ($Depth == 0 ? ')' : '') . $Value['@table_end'];
				}
			} elseif ($Depth > 0) {
				// Escape out any stray matching brackets
				$AddToExpr = self::escape_open_bracket($Value);
			} else {
				// The simplest of all
				$AddToExpr = $Value;
			}
			$Expr .= (strlen($Expr) > 0 ? '.*?' : '') . $AddToExpr;
		}
		return $Expr;
	}

	/**
	 * escape_open_bracket
	 *
	 * Ugly little function to escape ( to (?: for deep expressions
	 *
	 * @access protected
	 * @param string $Regex
	 * @return string
	 */
	static protected function escape_open_bracket($Regex) {
		return str_replace(array("\x01", "\x02"), array('(?:', '\('),
			str_replace('(', '(?:',
				str_replace(array('(?:', '\(',), array("\x01", "\x02"),
					$Regex
				)
			)
		);
	}
}
Return current item: preg_magic