rewrite Tokenizer to handle strings better

This commit is contained in:
Pekka Laiho 2020-12-15 09:03:21 +07:00
parent 2a20690b17
commit ea7382caa3
2 changed files with 23 additions and 15 deletions

View File

@ -92,13 +92,8 @@ class Reader
} elseif ($a === 'null') {
return null;
} elseif (substr($a, 0, 1) === '"') {
// string, handle special characters
$a = substr($a, 1, -1);
$a = str_replace("\\\\", chr(0x7f), $a);
$a = str_replace("\\n", "\n", $a);
$a = str_replace("\\r", "\r", $a);
$a = str_replace("\\\"", "\"", $a);
return str_replace(chr(0x7f), "\\", $a);
// remove quotes around string
return substr($a, 1, -1);
} elseif (is_numeric($a)) {
if (filter_var($a, FILTER_VALIDATE_INT) !== false) {
return intval($a);

View File

@ -16,6 +16,7 @@ class Tokenizer
$isString = false;
$isComment = false;
$isEscape = false;
$parens = [0, 0, 0];
$parenIndexes = ['(' => 0, ')' => 0, '[' => 1, ']' => 1, '{' => 2, '}' => 2];
@ -31,13 +32,25 @@ class Tokenizer
$c = substr($a, $i, 1);
if ($isString) {
// Inside string, add all characters
if ($isEscape) {
if ($c == 'n') {
$current .= "\n";
} elseif ($c == 'r') {
$current .= "\r";
} elseif ($c == 't') {
$current .= "\t";
} elseif ($c == "\\" || $c == '"') {
$current .= $c;
} else {
throw new MadLispException("invalid escape sequence \\$c");
}
$isEscape = false;
} elseif ($c == "\\") {
$isEscape = true;
} else {
// Not handling escape sequence
$current .= $c;
// Stop at first double quote
if ($c == '"') {
// If previous character is not a backslash
if (strlen($current) < 2 || substr($current, -2, 1) != "\\") {
$addCurrent();
$isString = false;
}
@ -49,12 +62,12 @@ class Tokenizer
}
} else {
// Not inside string or comment
if ($c == '"') {
// Start of string
$addCurrent();
$current .= $c;
$isString = true;
$isEscape = false;
} elseif ($c == ';') {
// Start of comment
$addCurrent();