diff --git a/.gitignore b/.gitignore index d1502b0..3d7e132 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ vendor/ composer.lock +.phpunit.result.cache diff --git a/phpunit.xml b/phpunit.xml new file mode 100644 index 0000000..f31a55d --- /dev/null +++ b/phpunit.xml @@ -0,0 +1,7 @@ + + + + test + + + diff --git a/test/TokenizerTest.php b/test/TokenizerTest.php new file mode 100644 index 0000000..af03490 --- /dev/null +++ b/test/TokenizerTest.php @@ -0,0 +1,106 @@ +expectException(MadLispException::class); + $this->expectExceptionMessage($message); + + $tokenizer = new Tokenizer(); + $tokenizer->tokenize($input); + } + + public function tokenProvider(): array + { + return [ + // Ignored characters + ["", []], + [" ", []], + ["\t", []], + ["\n", []], + ["\r", []], + [":", []], + [" \t\n\r: ", []], + [" aa\t\n\rbb:\r\ncc\t ", ['aa', 'bb', 'cc']], + + // Comments + [";comment", []], + ["a;c(o[m{m}e]n)t\nb", ['a', 'b']], // parens inside comment + ["a;com\"ment\nb", ['a', 'b']], // quote inside comment + ["a;comment\rb", ['a', 'b']], // end with \r + ["a;;comment\nb", ['a', 'b']], // two ; + ["a;com\nb;ment\rc", ['a', 'b', 'c']], // two comments + + // Parens + ["aa(bb[cc{dd}ee]gg)ff", ['aa', '(', 'bb', '[', 'cc', '{', 'dd', '}', 'ee', ']', 'gg', ')', 'ff']], + // Special characters: '`~ + ["aa'bb`cc~dd~ee`gg'ff", ['aa', "'", 'bb', '`', 'cc', '~', 'dd', '~', 'ee', '`', 'gg', "'", 'ff']], + // Other non-alphabet characters are symbols + ["(aa!@#$%^&*-_=+bb<>,./?\\|cc)", ['(', "aa!@#$%^&*-_=+bb<>,./?\\|cc", ')']], + + // Strings + ['"abc"', ['"abc"']], + ['aa"bb"cc', ['aa', '"bb"', 'cc']], + ['aa"bb;cc"dd', ['aa', '"bb;cc"', 'dd']], // comment inside string + ['aa"bb""cc"dd', ['aa', '"bb"', '"cc"', 'dd']], // two strings + ["aa\"bb\\\"cc\"dd", ['aa', "\"bb\"cc\"", 'dd']], // quote inside string + ["aa\"bb\n\rcc\"dd", ['aa', "\"bb\n\rcc\"", 'dd']], // linebreaks inside string + ["aa\"bb\\n\\r\\tcc\"dd", ['aa', "\"bb\n\r\tcc\"", 'dd']], // escaped linebreaks + ["aa\"bb\\\\n\\\\rcc\"dd", ['aa', "\"bb\\n\\rcc\"", 'dd']], // escaped backslashes + ["aa\"bb\\\\\"cc", ['aa', "\"bb\\\"", 'cc']], + ["aa\"bb\\\\\\\"cc\"dd", ['aa', "\"bb\\\"cc\"", 'dd']], + + // Test everything together + ["(abc<+=-_!?>\"str\n\\r;\\\"\";com\"ment\r{\"a\":\"b\"})", ['(', 'abc<+=-_!?>', "\"str\n\r;\"\"", '{', '"a"', '"b"', '}', ')']], + ]; + } + + /** + * Test valid inputs. + * @dataProvider tokenProvider + */ + public function testTokenize(string $input, array $expected) + { + $tokenizer = new Tokenizer(); + $result = $tokenizer->tokenize($input); + $this->assertSame($expected, $result); + } +}