To escape or not to escape. Part 2.
After testing and playing with the remap_string function on a real project, I've made some changes and fixes:
- I've introduced an encoding signature, such that you cannot re-encode by mistake an object that was already encoded. Currently I'm using the ord(ESC)ord(ESC) as signature, which in principle should not affect any future application of this function.
- I have changed the multi-byte UTF8 signature to 'y0y'. The previous signature (0y similar to 0x notation for hex chars) was not completely isolated and could have produced wrong decoding under certain conditions.
- I have included here the helper functions encode() and decode() which encode strings, arrays and objects. These two functions are currently using a global var "non_encoding_fields" to skip the object or array keys which should not be encoded.
/*
* Replace all non alpha characters with numeric codes
* I'm using this to convert all data before storing into mongoDB / mySQL
*/
function remap_string( $text, $decode=false ) {
if ($decode) {
if (strpos($text, chr(27) . chr(27)) !== 0) return $text;
$text = substr($text, 2);
return preg_replace_callback(
array("/(y0y([0-9]{3})+y0y)/","/([0-9]{3})/"),
function($matches) {
if (isset($matches[2])) {
$code = substr($matches[0],3,strlen($matches[0])-6);
$r = '';
foreach(str_split($code, 3) as $c) $r .= chr($c);
return $r;
} else return chr($matches[0]);
},
$text);
}
if (strpos($text, chr(27) . chr(27)) === 0) return $text;
return chr(27) . chr(27) . preg_replace_callback(
"/([^A-Za-z ,.:;-_+=!#()]{1})/u",
function($matches) {
$l = strlen($matches[0]);
if (1==$l) return str_pad(ord($matches[0]), 3, '0', STR_PAD_LEFT);
for($i=0;$i<$l;$i++) $a.=str_pad(ord($matches[0][$i]), 3, '0', STR_PAD_LEFT);
return "y0y" . $a . "y0y";
},
$text);
}
// Encodes an entire object
function encode(&$obj) {
if (!is_array($obj)) return remap_string($obj);
array_walk_recursive( $obj,
function(&$item, $key) {
global $non_encoded_fields;
if (($key===0) || (!in_array($key, $non_encoded_fields))) {
$item = remap_string($item);
return true;
}
}
);
return $obj;
}
// Decodes an entire object
function decode(&$obj) {
if (!is_array($obj)) return remap_string($obj, true);
array_walk_recursive( $obj,
function(&$item, $key) {
global $non_encoded_fields;
if (($key===0) || (!in_array($key, $non_encoded_fields))) {
$item = remap_string($item, true);
return true;
}
}
);
return $obj;
}
Comments
Post a Comment