mirror of
https://github.com/peklaiho/madlisp.git
synced 2024-11-22 05:14:45 +00:00
add Unicode support to Tokenizer
This commit is contained in:
parent
ed18cb2fd5
commit
170b8d424f
@ -92,7 +92,11 @@ class Reader
|
||||
} elseif ($a === 'null') {
|
||||
return null;
|
||||
} elseif (substr($a, 0, 1) === '"') {
|
||||
// remove quotes around string
|
||||
// Remove quotes around string.
|
||||
//
|
||||
// Hopefully this should work correctly with Unicode strings as well,
|
||||
// because we just want to remove one byte from beginning and end,
|
||||
// so mb_substr should not be needed?
|
||||
return substr($a, 1, -1);
|
||||
} elseif (is_numeric($a)) {
|
||||
if (filter_var($a, FILTER_VALIDATE_INT) !== false) {
|
||||
|
@ -28,8 +28,17 @@ class Tokenizer
|
||||
}
|
||||
};
|
||||
|
||||
for ($i = 0; $i < strlen($a); $i++) {
|
||||
$c = substr($a, $i, 1);
|
||||
// Use mbstring extension if available to support Unicode characters
|
||||
if (extension_loaded('mbstring')) {
|
||||
$lenfn = 'mb_strlen';
|
||||
$subfn = 'mb_substr';
|
||||
} else {
|
||||
$lenfn = 'strlen';
|
||||
$subfn = 'substr';
|
||||
}
|
||||
|
||||
for ($i = 0; $i < $lenfn($a); $i++) {
|
||||
$c = $subfn($a, $i, 1);
|
||||
|
||||
if ($isString) {
|
||||
if ($isEscape) {
|
||||
|
@ -103,4 +103,31 @@ class TokenizerTest extends TestCase
|
||||
$result = $tokenizer->tokenize($input);
|
||||
$this->assertSame($expected, $result);
|
||||
}
|
||||
|
||||
public function unicodeProvider(): array
|
||||
{
|
||||
return [
|
||||
["(∫≈♡)", ['(', '∫≈♡', ')']],
|
||||
[
|
||||
"αβγδ\"εζηθ\"ικλμ;νξοπ\nρς[σ τ]υ{\"φ\":\"χ\"}ψω",
|
||||
['αβγδ', "\"εζηθ\"", 'ικλμ', 'ρς', '[', 'σ', 'τ', ']', 'υ', '{', '"φ"', '"χ"', '}', 'ψω']
|
||||
],
|
||||
["[←↑\"→↓\"⇐⇑⇒⇓]", ['[', '←↑', '"→↓"', '⇐⇑⇒⇓', ']']],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Unicode inputs.
|
||||
* @dataProvider unicodeProvider
|
||||
*/
|
||||
public function testUnicode(string $input, array $expected)
|
||||
{
|
||||
if (!extension_loaded('mbstring')) {
|
||||
$this->markTestSkipped('The mbstring extension is not available.');
|
||||
}
|
||||
|
||||
$tokenizer = new Tokenizer();
|
||||
$result = $tokenizer->tokenize($input);
|
||||
$this->assertSame($expected, $result);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user