diff --git a/src/Reader.php b/src/Reader.php index 3b3dbf5..72a06b7 100644 --- a/src/Reader.php +++ b/src/Reader.php @@ -92,13 +92,8 @@ class Reader } elseif ($a === 'null') { return null; } elseif (substr($a, 0, 1) === '"') { - // string, handle special characters - $a = substr($a, 1, -1); - $a = str_replace("\\\\", chr(0x7f), $a); - $a = str_replace("\\n", "\n", $a); - $a = str_replace("\\r", "\r", $a); - $a = str_replace("\\\"", "\"", $a); - return str_replace(chr(0x7f), "\\", $a); + // remove quotes around string + return substr($a, 1, -1); } elseif (is_numeric($a)) { if (filter_var($a, FILTER_VALIDATE_INT) !== false) { return intval($a); diff --git a/src/Tokenizer.php b/src/Tokenizer.php index 6b2b482..705a5bf 100644 --- a/src/Tokenizer.php +++ b/src/Tokenizer.php @@ -16,6 +16,7 @@ class Tokenizer $isString = false; $isComment = false; + $isEscape = false; $parens = [0, 0, 0]; $parenIndexes = ['(' => 0, ')' => 0, '[' => 1, ']' => 1, '{' => 2, '}' => 2]; @@ -31,13 +32,25 @@ class Tokenizer $c = substr($a, $i, 1); if ($isString) { - // Inside string, add all characters - $current .= $c; - - // Stop at first double quote - if ($c == '"') { - // If previous character is not a backslash - if (strlen($current) < 2 || substr($current, -2, 1) != "\\") { + if ($isEscape) { + if ($c == 'n') { + $current .= "\n"; + } elseif ($c == 'r') { + $current .= "\r"; + } elseif ($c == 't') { + $current .= "\t"; + } elseif ($c == "\\" || $c == '"') { + $current .= $c; + } else { + throw new MadLispException("invalid escape sequence \\$c"); + } + $isEscape = false; + } elseif ($c == "\\") { + $isEscape = true; + } else { + // Not handling escape sequence + $current .= $c; + if ($c == '"') { $addCurrent(); $isString = false; } @@ -49,12 +62,12 @@ class Tokenizer } } else { // Not inside string or comment - if ($c == '"') { // Start of string $addCurrent(); $current .= $c; $isString = true; + $isEscape = false; } elseif ($c == ';') { // Start of comment $addCurrent();