<?php
/**
* preg_magic
*
* Class/Function to assist with parsing great piles of garbage (ie: HTML) into nice
* hashes/associative arrays with a few regular expressions.
*
* <b>License</b>
* <pre>
* This library is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* </pre>
*
* @see preg_magic()
* @version 1.0
* @author Shannon Wynter ({@link http://fremnet.net/contact http://fremnet.net/contact})
* @copyright Copyright Shannon Wynter & Fremnet.net
* @license http://opensource.org/licenses/gpl-license.php GNU Public License
* @package preg_magic
*/
/**
* preg_magic
*
* Alias for {@link preg_magic::execute() preg_magic::execute()}
*
* <b>Basic synapsis:</b>
*
* {@link preg_magic() preg_magic}($Fields, $Target)
*
* $Fields is an associative array that contains the structure you expect to extract from the file.
* It is very important to understand that you must supply the fields in the <i>order</i> that you expect them to
* be matched.
*
* There are currently two types of fields
* - simple
* - table
*
* A simple field is simply defined as 'name' => 'expression to match'
*
* A table field is a whole lot more complicated but it's also very flexable, and is defined as follows
* <pre>
* '@table' => true,
* '@table_start' => 'the expression to match at the start of the table',
* '@table_end' => 'the expression to match at the end of the table',
* '@table_fields => array(of more fields),
* </pre>
*
* Both @table_start and @table_end will be ignored if this is the root element of the array
*
* So an example structure might look like
* {@example example/fields_example.php}
* This example shows two simple fields, and a table
*
* The structure can be recursive, ie, you can have more table and simple fields beneith the table field
*
* A full example might look like this
* {@example example/full_example.php}
*
* @param array $Fields An associative array of fields to be use
* @param string $Target The target string to process
* @return array|false An associative array containing data, or false on failure to match
*/
function preg_magic($Fields, $Target) {
// Hand over to the class
return preg_magic::execute($Fields, $Target);
}
/**
* preg_magic
*
* Class of static functions containing preg_magic
*
* You should call the {@link preg_magic() preg_magic()} function to do all your hard work as
* the class is pretty much just used as a namespace
*
* @see preg_magic()
* @version 1.0
* @author Shannon Wynter <http://fremnet.net/contact>
* @copyright Copyright Shannon Wynter & Fremnet.net
* @license http://opensource.org/licenses/gpl-license.php GNU Public License
* @package preg_magic
*/
class preg_magic {
/**
* execute
*
* Performs the preg_match.
*
* @see preg_magic()
* @access public
* @param array $Fields An associative array of fields to be use
* @param string $Target The target string to process
* @return array|false An associative array containing data, or false on failure to match
*/
static public function execute($Fields, $Target) {
// Check to see if we're working on a table
$Table = (isset($Fields['@table']) && $Fields['@table']);
// Get the expression to run
$Expr = self::build_expr(($Table ? $Fields['@table_fields'] : $Fields));
// Sort out just what fields we are using
$ProcessFields = ($Table ? $Fields['@table_fields'] : $Fields);
$Result = array();
if ($Table) {
// Parsing a table entry
if (!preg_match_all("/$Expr/s", $Target, $Match, PREG_SET_ORDER))
return false;
// Loop through each row to perform association
foreach ($Match as $Row) {
$AssocRow = self::associate($ProcessFields, $Row);
if ($AssocRow === false)
return false;
// Append table record
$Result[] = $AssocRow;
}
} else {
// Flat entry
if (!preg_match("/$Expr/s", $Target, $Match))
return false;
$Result = self::associate($ProcessFields, $Match);
}
return $Result;
}
/**
* associate
*
* Performs a similar duty to {@link http://www.php.net/array_combine array_combine}
* with the exception that this is capable of checking for tables so recursive field
* definitions can function.
*
* @access protected
* @param array $Fields The fields passed to {@link preg_magic::execute() execute()}
* @param array $Data The array of matches from {@link http://www.php.net/preg_match preg_match}
* @return array|false An associative array of $Fields to $Data or false on failure
*/
static protected function associate($Fields, $Data) {
// First entry is the 'raw match', turf it we're not using it
array_shift($Data);
$Result = array();
$Keys = array_keys($Fields);
for ($i = 0; $i < count($Keys); $i++) {
// Does this key specify a table?
if (is_array($Fields[$Keys[$i]]) && isset($Fields[$Keys[$i]]['@table']) && $Fields[$Keys[$i]]['@table']) {
// Re-run preg_magic on just the data for this key
$Result[$Keys[$i]] = self::execute($Fields[$Keys[$i]], $Data[$i]);
if ($Result[$Keys[$i]] === false)
return false;
} else {
// Associate this data with this key
$Result[$Keys[$i]] = $Data[$i];
}
}
return $Result;
}
/**
* build_expr
*
* Builds as much of the expression as required to make the closest match possible
* based on the $Fields provided to {@link preg_magic::execute() execute()}.
*
* Automatically escapes submatches if there is recursion
*
* @access protected
* @param array $Fields The fields passed to {@link preg_magic::execute() execute()}
* @param int $Depth Reserved - Do not use this unless you know what you are doing
* @return string The combined regular expression
*/
static protected function build_expr($Fields, $Depth = 0) {
$Expr = '';
foreach ($Fields as $Name => $Value) {
$AddToExpr = '';
if (is_array($Value)) {
// Original provisions allow for more types then just table, the other two functions will probably need updating if other types are added
if ($Value['@table']) {
$AddToExpr = $Value['@table_start'] . ($Depth == 0 ? '(' : '');
$AddToExpr .= '(?:' . self::build_expr($Value['@table_fields'], $Depth + 1) .')+?';
$AddToExpr .= ($Depth == 0 ? ')' : '') . $Value['@table_end'];
}
} elseif ($Depth > 0) {
// Escape out any stray matching brackets
$AddToExpr = self::escape_open_bracket($Value);
} else {
// The simplest of all
$AddToExpr = $Value;
}
$Expr .= (strlen($Expr) > 0 ? '.*?' : '') . $AddToExpr;
}
return $Expr;
}
/**
* escape_open_bracket
*
* Ugly little function to escape ( to (?: for deep expressions
*
* @access protected
* @param string $Regex
* @return string
*/
static protected function escape_open_bracket($Regex) {
return str_replace(array("\x01", "\x02"), array('(?:', '\('),
str_replace('(', '(?:',
str_replace(array('(?:', '\(',), array("\x01", "\x02"),
$Regex
)
)
);
}
}