Location: PHPKode > scripts > Misc String > misc-string/miscstring.php
<?php
/**
 * This file contains 3 classes - strUnicode, strNormal and strWhitespace.
 * The classes are independent, so you can easily use them to extend
 * your own classes.
 * 
 * You are free to use this code as you see fit.
 * 
 * Created on 12-Nov-06
 *
 * @package MiscString
 * @author Vladislav Bailovic <hide@address.com>
 */

/**
 * Unicode-safe basic string manipulation.
 * Some PHP string manipulation functions don't work well with
 * multibyte strings, so here are their replacements.
 * 
 * @package MiscString
 * @author Vladislav Bailovic <hide@address.com>
 */
class strUnicode {
	
	/**
	 * Unicode-safe substr() port.
	 * Works just like substr(), except that it handles Unicode strings better.
	 * 
	 * @param string s String to extract from
	 * @param int start Where to start substring extraction
	 * @param int length Substring length
	 * @return string Extracted substring
	 */
	function substr ($s, $start=0, $length=false) {
		if (!is_numeric($start)) $start = 0;
		if (false === $length) $length = $this->strlen($s);
		$maxLen = $this->strlen($s) - $start;
		if ($length > $maxLen) $length = $maxLen;
		if (!$start) {
			return preg_replace ('/^(.{'.$length.'}).*$/mu', '\1', $s);
		} else {
			return preg_replace ('/^.{'.$start.'}(.{'.$length.'}).*$/mu', '\1', $s);
		}
	}
	
	/**
	 * Count Unicode string length.
	 * Works just like strlen(), except that it handles Unicode strings better.
	 * 
	 * @param string s String to check
	 * @return int String length
	 */
	function strlen ($s) {
		$c = preg_match_all ('/./u', $s, $m);
		return ($c);
	}
}

/**
 * Whitespace normalization methods.
 * 
 * @package MiscString
 * @author Vladislav Bailovic <hide@address.com>
 */
class strWhitespace {
	
	/**
	 * Used for converting tabs to spaces and vice versa.
	 * @access private
	 */
	var $_tabWidth = 4;
	
	/**
	 * Sets default tab width.
	 * 
	 * @param int width New tab width
	 * @return bool true on success, false otherwise
	 */
	function setTabWidth ($width) {
		if (!is_numeric($width)) return false;
		$this->_tabWidth = $width;
		return true;
	}
	
	/**
	 * Normalizes newlines - converts all \r and \r\n pairs to \n.
	 * 
	 * @param string $s String to convert
	 * @return string Converted string
	 */
	function normalizeNewlines ($s) {
		return preg_replace("/\r\n|\n|\r/", "\n", $s);
	}
	
	/**
	 * Replaces multiple space characters with single one.
	 * 
	 * @param string $s String to convert
	 * @return string Converted string
	 */
	function normalizeSpace ($s) {
		return preg_replace ('/ +/', ' ', $s);
	}
	
	/**
	 * Replaces all multiple whitespace characters with single space.
	 * 
	 * @param string $s String to convert
	 * @return string Converted string
	 */
	function normalizeWhitespace ($s) {
		return trim(preg_replace ('/\s+/', ' ', $s));
	}
	
	/**
	 * Replaces all newlines with spaces.
	 * 
	 * @param string $s String to convert
	 * @return string Converted string
	 */
	function stripNewlines ($s) {
		$s = $this->normalizeNewlines($s);
		return preg_replace ('/\n/', ' ', $s);
	}
	
	/**
	 * Strips empty lines.
	 * 
	 * @param string $s String to convert
	 * @return string Converted string
	 */
	function stripEmptyLines ($s) {
		return preg_replace ('/\n\s*\n/', '', $s);
	}
	
	/**
	 * Replaces all tabs with default number of spaces.
	 * 
	 * @param string $s String to convert
	 * @return string Converted string
	 */
	function spacesToTabs ($s) {
		return preg_replace('/ {'.$this->_tabWidth.'}/', "\t", $s);
	}
	
	/**
	 * Replaces all default numbers of spaces with tabs.
	 * 
	 * @param string $s String to convert
	 * @return string Converted string
	 */
	function tabsToSpaces ($s) {
		return preg_replace('/\t/', str_repeat(" ", $this->_tabWidth), $s);
	}
	
	/**
	 * Replaces all double spaces spaces with &nbsp;.
	 * 
	 * @param string $s String to convert
	 * @return string Converted string
	 */
	function htmlHardSpaces ($s) {
		return preg_replace('/  /', '&nbsp;', $s);
	}
	

}


/**
 * Various common string normalization methods.
 * 
 * @package MiscString
 * @author Vladislav Bailovic <hide@address.com>
 */
class strNormal {
	
	/**
	 * Converts space-, dash-, underscore-, or otherwise delimited string into
	 * CamelCased (or camelCased) version. Not Unicode-safe.
	 * 
	 * @param string $s String to convert
	 * @param bool $cfl /ConvertFirstLetter/ If set, first letter will be in
	 * lowercase.
	 * @return string Converted string
	 */
	function camelize ($s, $cfl = false) {
		$s = preg_replace('/[^A-Za-z0-9]/', ' ', $s);
		$s = ucwords($s);
		if ($cfl) $s = strtolower(substr($s, 0, 1)) . substr($s, 1);
		$s = preg_replace("/\s/", '', $s);
		return $s;
	}
	
	/**
	 * Replaces non-word characters with underscores. Not Unicode-safe.
	 * 
	 * @param string $s String to convert
	 * @return string Underscored string
	 */
	function underscores ($s) {
		$s = preg_replace("/\s+/", '_', $s);
		$s = preg_replace("/[^\w-]+/", '_', $s);
		return preg_replace("/_+/", '_', $s);
	}
	
	/**
	 * Normalizes a string (all lowercase, underscore- separated words). 
	 * Not Unicode-safe.
	 * 
	 * @param string $s String to convert
	 * @return string Normalized string
	 */
	function normalize ($s) {
		$s = strtolower($s);
		$s = $this->underscores($s);
		$s = rtrim ($s, '_');
		return $s;	
	}
	
	/**
	 * Convert given string to a valid method name (valid php fnc. name,
	 * camelCased). Not Unicode-safe.
	 * 
	 * @param string $s String to convert
	 * @return string Converted string
	 */
	function methodize ($s) {
		if (!$s) return false;
		if (!preg_match("/^[A-Za-z].*/", $s)) {
			$s = "A $s";
		}
		return $this->camelize($s, true);
	}
	
	/**
	 * Converts string to a normalized valid PHP varname. Not Unicode-safe.
	 * 
	 * @param string $s String to convert
	 * @return string Converted string
	 */
	function variablize ($s) {
		if (!$s) return false;
		if (!preg_match("/^[A-Za-z].*/", $s)) {
			$s = "A $s";
		}
		$s = $this->normalize($s, true);
		return preg_replace('/-/', '', $s);
	}
	
	/**
	 * Converts camelized string to normal (all lowercase, 
	 * underscore- separated words) representation. Not Unicode-safe.
	 * 
	 * @param string $s String to convert
	 * @return string Converted string
	 */
	function camelToNormal ($s) {
		$ret = preg_replace("/([0-9a-zA-Z])([A-Z])/", '\1_\2', $s);
		return strtolower($ret);
	}
}
?>
Return current item: Misc String