Source for file String.php
Documentation is available at String.php
* Copyright (c) 2006 - 2010 PHPExcel
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
* @package PHPExcel_Shared
* @copyright Copyright (c) 2006 - 2010 PHPExcel (http://www.codeplex.com/PHPExcel)
* @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL
* @version 1.7.2, 2010-01-11
* @package PHPExcel_Shared
* @copyright Copyright (c) 2006 - 2010 PHPExcel (http://www.codeplex.com/PHPExcel)
const STRING_REGEXP_FRACTION = '(-?)(\d+)\s+(\d+\/\d+)';
* Control characters array
private static $_controlCharacters = array();
private static $_decimalSeparator;
private static $_thousandsSeparator;
* Is mbstring extension avalable?
private static $_isMbstringEnabled;
* Is iconv extension avalable?
private static $_isIconvEnabled;
* Build control characters array
for ($i = 0; $i <= 31; ++ $i) {
if ($i != 9 && $i != 10 && $i != 13) {
self::$_controlCharacters[$find] = $replace;
* Get whether mbstring extension is available
if (isset (self::$_isMbstringEnabled)) {
return self::$_isMbstringEnabled;
self::$_isMbstringEnabled = function_exists('mb_convert_encoding') ?
return self::$_isMbstringEnabled;
* Get whether iconv extension is available
if (isset (self::$_isIconvEnabled)) {
return self::$_isIconvEnabled;
// Check that iconv exists
// Sometimes iconv is not working, and e.g. iconv('UTF-8', 'UTF-16LE', 'x') just returns false,
// we cannot use iconv when that happens
// Also, sometimes iconv_substr('A', 0, 1, 'UTF-8') just returns false in PHP 5.2.0
// we cannot use iconv in that case either (http://bugs.php.net/bug.php?id=37773)
if (function_exists('iconv')
&& @iconv('UTF-8', 'UTF-16LE', 'x')
self::$_isIconvEnabled = true;
self::$_isIconvEnabled = false;
return self::$_isIconvEnabled;
* Convert from OpenXML escaped control character to PHP control character
* That's correct, control characters are stored directly in the shared-strings table.
* We do encode characters that cannot be represented in XML using the following escape sequence:
* _xHHHH_ where H represents a hexadecimal character in the character's value...
* So you could end up with something like _x0008_ in a string (either in a cell value (<v>)
* element or in the shared string <t> element.
* @param string $value Value to unescape
if(empty(self::$_controlCharacters)) {
self::_buildControlCharacters();
return str_replace( array_keys(self::$_controlCharacters), array_values(self::$_controlCharacters), $value );
* Convert from PHP control character to OpenXML escaped control character
* That's correct, control characters are stored directly in the shared-strings table.
* We do encode characters that cannot be represented in XML using the following escape sequence:
* _xHHHH_ where H represents a hexadecimal character in the character's value...
* So you could end up with something like _x0008_ in a string (either in a cell value (<v>)
* element or in the shared string <t> element.
* @param string $value Value to escape
if(empty(self::$_controlCharacters)) {
self::_buildControlCharacters();
* Try to sanitize UTF8, stripping invalid byte sequences. Not perfect. Does not surrogate characters.
if (self::getIsIconvEnabled()) {
$value = @iconv('UTF-8', 'UTF-8', $value);
if (self::getIsMbstringEnabled()) {
* Check if a string contains UTF8 data
public static function IsUTF8($value = '') {
* Formats a numeric value as a string for output in various output writers forcing
* point as decimal separator in case locale is other than English.
* Converts a UTF-8 string into BIFF8 Unicode string data (8-bit string length)
* Writes the string using uncompressed notation, no rich text, no Asian phonetics
* If mbstring extension is not available, ASCII is assumed, and compressed notation is used
* although this will give wrong results for non-ASCII strings
* see OpenOffice.org's Documentation of the Microsoft Excel File Format, sect. 2.5.3
* @param string $value UTF-8 encoded string
$ln = self::CountCharacters($value, 'UTF-8');
$opt = (self::getIsIconvEnabled() || self::getIsMbstringEnabled()) ?
$chars = self::ConvertEncoding($value, 'UTF-16LE', 'UTF-8');
$data = pack('CC', $ln, $opt) . $chars;
* Converts a UTF-8 string into BIFF8 Unicode string data (16-bit string length)
* Writes the string using uncompressed notation, no rich text, no Asian phonetics
* If mbstring extension is not available, ASCII is assumed, and compressed notation is used
* although this will give wrong results for non-ASCII strings
* see OpenOffice.org's Documentation of the Microsoft Excel File Format, sect. 2.5.3
* @param string $value UTF-8 encoded string
$ln = self::CountCharacters($value, 'UTF-8');
$opt = (self::getIsIconvEnabled() || self::getIsMbstringEnabled()) ?
$chars = self::ConvertEncoding($value, 'UTF-16LE', 'UTF-8');
$data = pack('vC', $ln, $opt) . $chars;
* Convert string from one encoding to another. First try mbstring, then iconv, or no convertion
* @param string $to Encoding to convert to, e.g. 'UTF-8'
* @param string $from Encoding to convert from, e.g. 'UTF-16LE'
if (self::getIsIconvEnabled()) {
$value = iconv($from, $to, $value);
if (self::getIsMbstringEnabled()) {
* Get character count. First try mbstring, then iconv, finally strlen
* @param string $enc Encoding
* @return int Character count
if (self::getIsIconvEnabled()) {
if (self::getIsMbstringEnabled()) {
* Get a substring of a UTF-8 encoded string
* @param string $pValue UTF-8 encoded string
* @param int $start Start offset
* @param int $length Maximum number of characters in substring
public static function Substring($pValue = '', $pStart = 0, $pLength = 0)
if (self::getIsIconvEnabled()) {
$string = iconv_substr($pValue, $pStart, $pLength, 'UTF-8');
if (self::getIsMbstringEnabled()) {
$string = mb_substr($pValue, $pStart, $pLength, 'UTF-8');
$string = substr($pValue, $pStart, $pLength);
* Identify whether a string contains a fractional numeric value,
* and convert it to a numeric if it is
* @param string &$operand string value to test
if (preg_match('/^'. self::STRING_REGEXP_FRACTION. '$/i', $operand, $match)) {
$sign = ($match[1] == '-') ? '-' : '+';
$fractionFormula = '='. $sign. $match[2]. $sign. $match[3];
} // function convertToNumberIfFraction()
* Get the decimal separator. If it has not yet been set explicitly, try to obtain number
* formatting information from locale.
if (!isset (self::$_decimalSeparator)) {
$localeconv = localeconv();
self::$_decimalSeparator = $localeconv['decimal_point'] != ''
? $localeconv['decimal_point'] : $localeconv['mon_decimal_point'];
if (self::$_decimalSeparator == '')
self::$_decimalSeparator = '.';
return self::$_decimalSeparator;
* Set the decimal separator. Only used by PHPExcel_Style_NumberFormat::toFormattedString()
* to format output by PHPExcel_Writer_HTML and PHPExcel_Writer_PDF
* @param string $pValue Character for decimal separator
self::$_decimalSeparator = $pValue;
* Get the thousands separator. If it has not yet been set explicitly, try to obtain number
* formatting information from locale.
if (!isset (self::$_thousandsSeparator)) {
$localeconv = localeconv();
self::$_thousandsSeparator = $localeconv['thousands_sep'] != ''
? $localeconv['thousands_sep'] : $localeconv['mon_thousands_sep'];
return self::$_thousandsSeparator;
* Set the thousands separator. Only used by PHPExcel_Style_NumberFormat::toFormattedString()
* to format output by PHPExcel_Writer_HTML and PHPExcel_Writer_PDF
* @param string $pValue Character for thousands separator
self::$_thousandsSeparator = $pValue;
|