<?php
/* Copyright (c) 2005, Axis Data Management Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of Axis Data Management Corp nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AXIS DATA MANAGEMENT CORP
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
*
* Similar to strtok(), but all delimiters are significant (i.e.,
* consecutive delimiters are not globbed into one), and empty string
* tokens are permitted. E.g., input String "" has one token of ""
* regardless of delimiter, and " " (with delimiter of space) produces
* two tokens of value "" (one for before the delimiter, and one for after).
*
* This class is called CSV Tokenizer, because this is the behavior
* required when using Comma-Separated-Value files (like with spreadsheets
* and databases).
*
* Here is how this class is typically used:
*
* require "CSVTokenizer.php";
* ...
* $toker = new CSVTokenizer();
* $toker->setDelimiter($delim);
* $toker->setString($str);
* while (($token = $toker->nextToken()) !== false) {
* // Do something with the $token's.
*
* @see #displayTokens for other useful ways to use this class.
* @author Blaine Simpson. hide@address.com
*/
class CSVTokenizer {
private $buffer;
private $delimiter = " "; // Default to a SINGLE space character
private $offset = -1; // -1 means "Not Initialized"
// > strlen($buffer) means all-used-up
private function checkready() {
if ($this->offset < 0)
throw new Exception("Object not ready for use. You must set the "
. " delimiter and input string.");
}
/**
* @param indelim Must be one character long.
*/
public function setDelimiter($indelim) {
if (strlen($indelim) != 1)
throw new Exception(
"Your input delimiter '$indelim' is not 1 character long");
$this->delimiter = $indelim;
}
/**
* Makes a COPY of the input string to work with.
*/
public function setString($instr) {
$this->buffer = $instr;
$this->offset = 0;
}
public function countTokens() {
$this->checkready();
$count = 0;
$offset = $this->offset;
while ($offset <= strlen($this->buffer)) {
$count++;
if ($offset == strlen($this->buffer)) break;
$offset = strpos($this->buffer, $this->delimiter, $offset);
if ($offset === false) break;
$offset++;
}
return $count;
}
public function nextToken() {
$this->checkready();
$token = false;
$startIndex = $this->offset;
if ($startIndex <= strlen($this->buffer)) {
// Advance $this->offset to next delimiter
$this->offset = (($startIndex == strlen($this->buffer))
? $this->offset
: strpos($this->buffer, $this->delimiter, $startIndex)
);
if ($this->offset === false) $this->offset = strlen($this->buffer);
// Advance to 1 past this delimiter
$this->offset++;
// Set $token
//echo "Setting token to " .
//$startIndex." w/ length ".($this->offset - 1 - $startIndex)."\n";
$token = substr($this->buffer, $startIndex,
$this->offset - 1 - $startIndex);
if ($token === false) $token = "";
}
//echo "Returning (" . $token . ") ? " . ($token === false) . "\n";
return $token;
}
/**
* Echos token counts and values for the given input string and delimiter
* character.
*
* Use is trivially easy:
* require_once "CSVTokenizer.php";
* CSVTokenizer::displayTokens("one|two|three", "|");
*
* @param delim Must be a single character
*/
public static function displayTokens($str, $delim) {
$toker = new CSVTokenizer();
$toker->setDelimiter($delim);
$toker->setString($str);
$count = 0;
echo $toker->countTokens() . " tokens total.\n";
while (($token = $toker->nextToken()) !== false)
echo " " . ++$count . ": (" . $token . ") ["
. $toker->countTokens() . " remaining]\n";
echo $toker->countTokens() . " tokens remaining.\n";
}
}
?>