<?php
class Google_i18n_PhoneNumber_Util
{
// The minimum and maximum length of the national significant number.
protected $MIN_LENGTH_FOR_NSN = 3;
protected $MAX_LENGTH_FOR_NSN = 15;
protected $META_DATA_FILE_PREFIX = "Google/i18n/PhoneNumber/Data/MetadataProto";
protected $COUNTRY_CODE_TO_REGION_CODE_MAP_CLASS_NAME = "CountryCodeToRegionCodeMap";
protected $currentFilePrefix;
// A mapping from a country code to the region codes which denote the country/region
// represented by that country code. In the case of multiple countries sharing a calling code,
// such as the NANPA countries, the one indicated with "isMainCountryForCode" in the metadata
// should be first.
protected $countryCodeToRegionCodeMap = null;
// The set of countries the library supports.
// There are roughly 220 of them and we set the initial capacity of the HashSet to 300 to offer a
// load factor of roughly 0.75.
protected $supportedCountries = array();
// The set of countries that share country code 1.
// There are roughly 26 countries of them and we set the initial capacity of the HashSet to 35
// to offer a load factor of roughly 0.75.
protected $nanpaCountries = array();
protected $NANPA_COUNTRY_CODE = 1;
// The PLUS_SIGN signifies the international prefix.
protected $PLUS_SIGN = '+';
// These mappings map a character (key) to a specific digit that should replace it for
// normalization purposes. Non-European digits that may be used in phone numbers are mapped to a
// European equivalent.
// NOTE: DIGIT_MAPPINGS --> digitMappings
protected $DIGIT_MAPPINGS = array();
// Only upper-case variants of alpha characters are stored.
protected $ALPHA_MAPPINGS = array();
// For performance reasons, amalgamate both into one map.
protected $ALL_NORMALIZATION_MAPPINGS = array();
// A list of all country codes where national significant numbers (excluding any national prefix)
// exist that start with a leading zero.
protected $LEADING_ZERO_COUNTRIES = array();
// Pattern that makes it easy to distinguish whether a country has a unique international dialing
// prefix or not. If a country has a unique international prefix (e.g. 011 in USA), it will be
// represented as a string that contains a sequence of ASCII digits. If there are multiple
// available international prefixes in a country, they will be represented as a regex string that
// always contains character(s) other than ASCII digits.
// Note this regex also includes tilde, which signals waiting for the tone.
protected $UNIQUE_INTERNATIONAL_PREFIX = '/[\\d]+(?:[~\u2053\u223C\uFF5E][\\d]+)?/';
// Regular expression of acceptable punctuation found in phone numbers. This excludes punctuation
// found as a leading character only.
// This consists of dash characters, white space characters, full stops, slashes,
// square brackets, parentheses and tildes. It also includes the letter 'x' as that is found as a
// placeholder for carrier information in some phone numbers.
protected $VALID_PUNCTUATION = '-x\u2010-\u2015\u2212\uFF0D-\uFF0F \u00A0\u200B\u2060\u3000()\uFF08\uFF09\uFF3B\uFF3D.\\[\\]/~\u2053\u223C\uFF5E';
// Digits accepted in phone numbers
// TODO: Well, this sucks. We need to pull all these out of constants and turn them all into variables
// to be set when the class is instantiated, or we need to bake the values in right here. :(
// protected VALID_DIGITS = Arrays.toString(DIGIT_MAPPINGS.keySet().toArray()).replaceAll(", ", "");
protected $VALID_DIGITS = '0123456789'; // TODO: All the unicode mappings too. :-|
// We accept alpha characters in phone numbers, ASCII only, upper and lower case.
// TODO: Well, this sucks. We need to pull all these out of constants and turn them all into variables
// to be set when the class is instantiated, or we need to bake the values in right here. :(
// protected $VALID_ALPHA = Arrays.toString(ALPHA_MAPPINGS.keySet().toArray()).replaceAll(", ", "") + Arrays.toString(ALPHA_MAPPINGS.keySet().toArray()).toLowerCase().replaceAll(", ", "");
protected $VALID_ALPHA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
protected $PLUS_CHARS = "+\uFF0B";
protected $PLUS_CHARS_PATTERN;
protected $CAPTURING_DIGIT_PATTERN;
// Regular expression of acceptable characters that may start a phone number for the purposes of
// parsing. This allows us to strip away meaningless prefixes to phone numbers that may be
// mistakenly given to us. This consists of digits, the plus symbol and arabic-indic digits. This
// does not contain alpha characters, although they may be used later in the number. It also does
// not include other punctuation, as this will be stripped later during parsing and is of no
// information value when parsing a number.
protected $VALID_START_CHAR;
protected $VALID_START_CHAR_PATTERN;
// Regular expression of characters typically used to start a second phone number for the purposes
// of parsing. This allows us to strip off parts of the number that are actually the start of
// another number, such as for: (530) 583-6985 x302/x2303 -> the second extension here makes this
// actually two phone numbers, (530) 583-6985 x302 and (530) 583-6985 x2303. We remove the second
// extension so that the first number is parsed correctly.
protected $SECOND_NUMBER_START = '[\\\\/] *x';
protected $SECOND_NUMBER_START_PATTERN;
// Regular expression of trailing characters that we want to remove. We remove all characters that
// are not alpha or numerical characters. The hash character is retained here, as it may signify
// the previous block was an extension.
protected $UNWANTED_END_CHARS = '[[\\P{N}&&\\P{L}]&&[^#]]+$';
protected $UNWANTED_END_CHAR_PATTERN;
// We use this pattern to check if the phone number has at least three letters in it - if so, then
// we treat it as a number where some phone-number digits are represented by letters.
protected $VALID_ALPHA_PHONE_PATTERN = '/(?:.*?[A-Za-z]){3}.*/';
// Regular expression of viable phone numbers. This is location independent. Checks we have at
// least three leading digits, and only valid punctuation, alpha characters and
// digits in the phone number. Does not include extension data.
// The symbol 'x' is allowed here as valid punctuation since it is often used as a placeholder for
// carrier codes, for example in Brazilian phone numbers. We also allow multiple "+" characters at
// the start.
// Corresponds to the following:
// plus_sign*([punctuation]*[digits]){3,}([punctuation]|[digits]|[alpha])*
protected $VALID_PHONE_NUMBER;
// Default extension prefix to use when formatting. This will be put in front of any extension
// component of the number, after the main national number is formatted. For example, if you wish
// the default extension formatting to be " extn: 3456", then you should specify " extn: " here
// as the default extension prefix. This can be overridden by country-specific preferences.
protected $DEFAULT_EXTN_PREFIX = " ext. ";
// Regexp of all possible ways to write extensions, for use when parsing. This will be run as a
// case-insensitive regexp match. Wide character versions are also provided after each ascii
// version. There are two regular expressions here: the more generic one starts with optional
// white space and ends with an optional full stop (.), followed by zero or more spaces/tabs and
// then the numbers themselves. The other one covers the special case of American numbers where
// the extension is written with a hash at the end, such as "- 503#".
// Note that the only capturing groups should be around the digits that you want to capture as
// part of the extension, or else parsing will fail!
protected $KNOWN_EXTN_PATTERNS;
// Regexp of all known extension prefixes used by different countries followed by 1 or more valid
// digits, for use when parsing.
protected $EXTN_PATTERN;
// We append optionally the extension pattern to the end here, as a valid phone number may
// have an extension prefix appended, followed by 1 or more digits.
protected $VALID_PHONE_NUMBER_PATTERN;
protected $NON_DIGITS_PATTERN = '/(\\D+)/';
protected $FIRST_GROUP_PATTERN = '/(\\$1)/';
protected $NP_PATTERN = '/\\$NP/';
protected $FG_PATTERN = '/\\$FG/';
protected $CC_PATTERN = '/\\$CC/';
protected $instance = null;
// A mapping from a region code to the PhoneMetadata for that region.
protected $countryToMetadataMap = array();
// Factory hammer factory factory.
public static function &getInstance()
{
if (is_null($this->instance)) {
$class = __CLASS__;
$this->instance = new $class;
}
return $this->instance;
}
// Called by getInstance()
public function __construct()
{
$this->DIGIT_MAPPINGS = $this->getDigitMappings();
$this->ALPHA_MAPPINGS = $this->getAlphaMappings();
$this->ALL_NORMALIZATION_MAPPINGS = $this->getAllNormalizationMappings();
$this->LEADING_ZERO_COUNTRIES = $this->getLeadingZeroCountries();
// We can't set consts / variables in the same we can with Java; pull all the instantiation into the constructor,
// and make everything instance variables.
$this->currentFilePrefix = $this->META_DATA_FILE_PREFIX;
$this->PLUS_CHARS_PATTERN = '/[' . $this->PLUS_CHARS . ']+/';
$this->CAPTURING_DIGIT_PATTERN = '/([' . VALID_DIGITS . '])/';
// Regular expression of acceptable characters that may start a phone number for the purposes of
// parsing. This allows us to strip away meaningless prefixes to phone numbers that may be
// mistakenly given to us. This consists of digits, the plus symbol and arabic-indic digits. This
// does not contain alpha characters, although they may be used later in the number. It also does
// not include other punctuation, as this will be stripped later during parsing and is of no
// information value when parsing a number.
$this->VALID_START_CHAR = "[" . PLUS_CHARS . VALID_DIGITS . "]";
$this->VALID_START_CHAR_PATTERN = '/'.$this->VALID_START_CHAR.'/';
// Regular expression of characters typically used to start a second phone number for the purposes
// of parsing. This allows us to strip off parts of the number that are actually the start of
// another number, such as for: (530) 583-6985 x302/x2303 -> the second extension here makes this
// actually two phone numbers, (530) 583-6985 x302 and (530) 583-6985 x2303. We remove the second
// extension so that the first number is parsed correctly.
$this->SECOND_NUMBER_START_PATTERN = '/'.$this->SECOND_NUMBER_START.'/';
// Regular expression of trailing characters that we want to remove. We remove all characters that
// are not alpha or numerical characters. The hash character is retained here, as it may signify
// the previous block was an extension.
$this->UNWANTED_END_CHAR_PATTERN = '/'.$this->UNWANTED_END_CHARS.'/';
// Regular expression of viable phone numbers. This is location independent. Checks we have at
// least three leading digits, and only valid punctuation, alpha characters and
// digits in the phone number. Does not include extension data.
// The symbol 'x' is allowed here as valid punctuation since it is often used as a placeholder for
// carrier codes, for example in Brazilian phone numbers. We also allow multiple "+" characters at
// the start.
// Corresponds to the following:
// plus_sign*([punctuation]*[digits]){3,}([punctuation]|[digits]|[alpha])*
$this->VALID_PHONE_NUMBER = "/[" . $this->PLUS_CHARS . "]*(?:[" . $this->VALID_PUNCTUATION . "]*[" . $this->VALID_DIGITS . "]){3,}[" . $this->VALID_ALPHA . $this->VALID_PUNCTUATION . $this->VALID_DIGITS . "]*/";
// Regexp of all possible ways to write extensions, for use when parsing. This will be run as a
// case-insensitive regexp match. Wide character versions are also provided after each ascii
// version. There are two regular expressions here: the more generic one starts with optional
// white space and ends with an optional full stop (.), followed by zero or more spaces/tabs and
// then the numbers themselves. The other one covers the special case of American numbers where
// the extension is written with a hash at the end, such as "- 503#".
// Note that the only capturing groups should be around the digits that you want to capture as
// part of the extension, or else parsing will fail!
$this->KNOWN_EXTN_PATTERNS = '/[ \u00A0\\t,]*(?:ext(?:ensio)?n?|\uFF45\uFF58\uFF54\uFF4E?|[,x\uFF58#\uFF03~\uFF5E]|int|anexo|\uFF49\uFF4E\uFF54)[:\\.\uFF0E]?[ \u00A0\\t,-]*([' . $this->VALID_DIGITS . ']{1,7})#?|[- ]+([' . $this->VALID_DIGITS . ']{1,5})#/';
// Regexp of all known extension prefixes used by different countries followed by 1 or more valid
// digits, for use when parsing.
$this->EXTN_PATTERN = '/(?:' . $this->KNOWN_EXTN_PATTERNS . ')$/i';
// We append optionally the extension pattern to the end here, as a valid phone number may
// have an extension prefix appended, followed by 1 or more digits.
$this->VALID_PHONE_NUMBER_PATTERN = '/'. $this->VALID_PHONE_NUMBER . '(?:' . $this->KNOWN_EXTN_PATTERNS . ')?/i';
}
protected function getDigitMappings()
{
return array(
'0' => '0',
'\uFF10' => '0', // Fullwidth digit 0
'\u0660' => '0', // Arabic-indic digit 0
'1' => '1',
'\uFF11' => '1', // Fullwidth digit 1
'\u0661' => '1', // Arabic-indic digit 1
'2' => '2',
'\uFF12' => '2', // Fullwidth digit 2
'\u0662' => '2', // Arabic-indic digit 2
'3' => '3',
'\uFF13' => '3', // Fullwidth digit 3
'\u0663' => '3', // Arabic-indic digit 3
'4' => '4',
'\uFF14' => '4', // Fullwidth digit 4
'\u0664' => '4', // Arabic-indic digit 4
'5' => '5',
'\uFF15' => '5', // Fullwidth digit 5
'\u0665' => '5', // Arabic-indic digit 5
'6' => '6',
'\uFF16' => '6', // Fullwidth digit 6
'\u0666' => '6', // Arabic-indic digit 6
'7' => '7',
'\uFF17' => '7', // Fullwidth digit 7
'\u0667' => '7', // Arabic-indic digit 7
'8' => '8',
'\uFF18' => '8', // Fullwidth digit 8
'\u0668' => '8', // Arabic-indic digit 8
'9' => '9',
'\uFF19' => '9', // Fullwidth digit 9
'\u0669' => '9', // Arabic-indic digit 9
);
}
protected function getAlphaMappings()
{
return array(
'A' => '2',
'B' => '2',
'C' => '2',
'D' => '3',
'E' => '3',
'F' => '3',
'G' => '4',
'H' => '4',
'I' => '4',
'J' => '5',
'K' => '5',
'L' => '5',
'M' => '6',
'N' => '6',
'O' => '6',
'P' => '7',
'Q' => '7',
'R' => '7',
'S' => '7',
'T' => '8',
'U' => '8',
'V' => '8',
'W' => '9',
'X' => '9',
'Y' => '9',
'Z' => '9',
);
}
protected function getAllNormalizedMappings()
{
return array_merge($this->getDigitMappings(), $this->getAlphaMappings());
}
protected function getLeadingZeroCountries()
{
return array(
39, // Italy
47, // Norway
225, // Cote d'Ivoire
227, // Niger
228, // Togo
241, // Gabon
379, // Vatican City
);
}
}