From 9fc1f30c7686e8a06f9d88bd2e849ce76af2c1c4 Mon Sep 17 00:00:00 2001 From: "Joseph T. Lyons" Date: Wed, 20 Nov 2024 08:42:51 -0500 Subject: [PATCH] Fix base prefix bugs (#75) --- CHANGELOG.md | 7 ++ src/lenient_parse.gleam | 2 +- src/lenient_parse/internal/parser.gleam | 74 +++++++++++++-- src/lenient_parse/internal/token.gleam | 4 +- src/lenient_parse/internal/tokenizer.gleam | 70 +++------------ test/data.gleam | 20 ++++- test/data/integer/invalid_integer_data.gleam | 11 +++ test/data/integer/valid_integer_data.gleam | 17 ++++ test/tokenizer_test.gleam | 95 +++++++++++--------- 9 files changed, 185 insertions(+), 115 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 61c31b1..cf21bca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## v1.3.5 - 2024-11-19 + +- Fixed a bug where base prefix substrings would be recognized as such later in the string. + - The `0b` in `0XDEAD_BEEF0b` should be catorgized as digits and not a base prefix. +- Fixed a bug where, in base 0 mode, `lenient_parse` would parse a string with an incomplete base prefix. + - `01` should reject, as it is missing a specifier (`b`, `B`, `o`, `O`, `x`, `X`). + ## v1.3.4 - 2024-11-19 - Fixed a bug where `lenient_parse` was rejecting strings containing leading / trailing uncommon whitespace characters. diff --git a/src/lenient_parse.gleam b/src/lenient_parse.gleam index 4d89d5b..8340237 100644 --- a/src/lenient_parse.gleam +++ b/src/lenient_parse.gleam @@ -29,6 +29,6 @@ pub fn to_int_with_base( use <- bool.guard(!is_valid_base, Error(InvalidBaseValue(base))) text - |> tokenizer.tokenize_int(base: base) + |> tokenizer.tokenize_int |> parser.parse_int_tokens(base: base) } diff --git a/src/lenient_parse/internal/parser.gleam b/src/lenient_parse/internal/parser.gleam index 17badbe..f3bba83 100644 --- a/src/lenient_parse/internal/parser.gleam +++ b/src/lenient_parse/internal/parser.gleam @@ -9,8 +9,8 @@ import lenient_parse/internal/base_constants.{ import lenient_parse/internal/build import lenient_parse/internal/convert.{digits_to_int, digits_to_int_with_base} import lenient_parse/internal/token.{ - type Token, BasePrefix, DecimalPoint, Digit, ExponentSymbol, Sign, Underscore, - Unknown, Whitespace, + type Token, DecimalPoint, Digit, ExponentSymbol, Sign, Underscore, Unknown, + Whitespace, } import parse_error.{ type ParseError, BasePrefixOnly, EmptyString, InvalidDecimalPosition, @@ -128,7 +128,7 @@ pub fn parse_int_tokens( base if base == base_0 || base == base_2 || base == base_8 || base == base_16 -> { - let parse_data = parse_base_prefix(tokens, next_index) + let parse_data = parse_base_prefix(tokens, next_index, base) use ParseData(base_data, next_index, tokens) <- result.try(parse_data) let #(base, prefix_data) = case base_data { @@ -234,21 +234,74 @@ fn parse_sign( fn parse_base_prefix( tokens tokens: List(Token), index index: Int, + base base: Int, ) -> Result(ParseData(Option(#(#(Int, Int), String, Int))), ParseError) { case tokens { [Unknown(#(start_index, _), character), ..] -> Error(UnknownCharacter(start_index, character)) - [BasePrefix(index_range, prefix, base), ..rest] -> { - Ok(ParseData( - data: Some(#(index_range, prefix, base)), - next_index: index_range.1, - tokens: rest, - )) + [Digit(index_range, "0", _), ..rest] -> { + let lookahead = rest |> list.first + + case lookahead { + Ok(Digit(_, specifier, _)) + if { base == base_0 || base == base_2 } + && { specifier == "b" || specifier == "B" } + -> + form_base_prefix( + tokens: rest, + index_range: index_range, + specifier: specifier, + base: base_2, + ) + Ok(Digit(_, specifier, _)) + if { base == base_0 || base == base_8 } + && { specifier == "o" || specifier == "O" } + -> + form_base_prefix( + tokens: rest, + index_range: index_range, + specifier: specifier, + base: base_8, + ) + Ok(Digit(_, specifier, _)) + if { base == base_0 || base == base_16 } + && { specifier == "x" || specifier == "X" } + -> + form_base_prefix( + tokens: rest, + index_range: index_range, + specifier: specifier, + base: base_16, + ) + Ok(Digit(#(start_index, _), character, _)) if base == base_0 -> { + Error(UnknownCharacter(start_index, character)) + } + _ -> Ok(ParseData(data: None, next_index: index, tokens: tokens)) + } } _ -> Ok(ParseData(data: None, next_index: index, tokens: tokens)) } } +fn form_base_prefix( + tokens tokens: List(Token), + index_range index_range: #(Int, Int), + specifier specifier: String, + base base: Int, +) -> Result(ParseData(Option(#(#(Int, Int), String, Int))), ParseError) { + let #(start_index, end_index) = index_range + let rest = case tokens { + [] -> [] + [_, ..tokens] -> tokens + } + + Ok(ParseData( + data: Some(#(#(start_index, end_index + 1), "0" <> specifier, base)), + next_index: end_index + 2, + tokens: rest, + )) +} + fn parse_decimal_point( tokens tokens: List(Token), index index: Int, @@ -338,6 +391,9 @@ fn do_parse_digits( has_base_prefix: has_base_prefix, ) } + [Digit(#(start_index, _), character, _), ..] if base == base_0 -> { + Error(UnknownCharacter(start_index, character)) + } [Digit(#(_, end_index), _, value), ..rest] if value < base -> { do_parse_digits( tokens: rest, diff --git a/src/lenient_parse/internal/token.gleam b/src/lenient_parse/internal/token.gleam index 8707d6c..58ea377 100644 --- a/src/lenient_parse/internal/token.gleam +++ b/src/lenient_parse/internal/token.gleam @@ -1,6 +1,6 @@ import lenient_parse/internal/whitespace.{type WhitespaceData} import parse_error.{ - type ParseError, BasePrefixOnly, InvalidDecimalPosition, InvalidDigitPosition, + type ParseError, InvalidDecimalPosition, InvalidDigitPosition, InvalidExponentSymbolPosition, InvalidSignPosition, InvalidUnderscorePosition, OutOfBaseRange, UnknownCharacter, } @@ -8,7 +8,6 @@ import parse_error.{ pub type Token { Sign(#(Int, Int), String, Bool) Digit(#(Int, Int), character: String, value: Int) - BasePrefix(#(Int, Int), prefix: String, base: Int) Underscore(#(Int, Int)) DecimalPoint(#(Int, Int)) ExponentSymbol(#(Int, Int), String) @@ -24,7 +23,6 @@ pub fn to_error(token: Token, base: Int) -> ParseError { OutOfBaseRange(start_index, character, value, base) Digit(#(start_index, _), character, _) -> InvalidDigitPosition(start_index, character) - BasePrefix(index_range, prefix, _) -> BasePrefixOnly(index_range, prefix) Underscore(#(start_index, _)) -> InvalidUnderscorePosition(start_index) DecimalPoint(#(start_index, _)) -> InvalidDecimalPosition(start_index) ExponentSymbol(#(start_index, _), exponent_symbol) -> diff --git a/src/lenient_parse/internal/tokenizer.gleam b/src/lenient_parse/internal/tokenizer.gleam index b30bed4..2137fc1 100644 --- a/src/lenient_parse/internal/tokenizer.gleam +++ b/src/lenient_parse/internal/tokenizer.gleam @@ -2,12 +2,10 @@ import gleam/dict.{type Dict} import gleam/list import gleam/option.{type Option, None, Some} import gleam/string -import lenient_parse/internal/base_constants.{ - base_0, base_10, base_16, base_2, base_8, -} +import lenient_parse/internal/base_constants.{base_10} import lenient_parse/internal/token.{ - type Token, BasePrefix, DecimalPoint, Digit, ExponentSymbol, Sign, Underscore, - Unknown, Whitespace, + type Token, DecimalPoint, Digit, ExponentSymbol, Sign, Underscore, Unknown, + Whitespace, } import lenient_parse/internal/whitespace.{type WhitespaceData} @@ -56,23 +54,19 @@ fn do_tokenize_float( } } -pub fn tokenize_int(text text: String, base base: Int) -> List(Token) { +pub fn tokenize_int(text text: String) -> List(Token) { text |> string.to_graphemes |> do_tokenize_int( - base: base, index: 0, whitespace_character_dict: whitespace.character_dict(), - base_prefix_found: False, acc: [], ) } fn do_tokenize_int( characters characters: List(String), - base base: Int, index index: Int, - base_prefix_found base_prefix_found: Bool, whitespace_character_dict whitespace_character_dict: Dict( String, WhitespaceData, @@ -82,43 +76,17 @@ fn do_tokenize_int( case characters { [] -> acc |> list.reverse [first, ..rest] -> { - let lookahead = rest |> list.first - - let #(index, token, rest, base_prefix_found) = case - base_prefix_found, - first, - lookahead - { - False, "0", Ok(specifier) - if { base == base_0 || base == base_2 } - && { specifier == "b" || specifier == "B" } - -> base_prefix_token_data(index, specifier, base_2, rest) - False, "0", Ok(specifier) - if { base == base_0 || base == base_8 } - && { specifier == "o" || specifier == "O" } - -> base_prefix_token_data(index, specifier, base_8, rest) - False, "0", Ok(specifier) - if { base == base_0 || base == base_16 } - && { specifier == "x" || specifier == "X" } - -> base_prefix_token_data(index, specifier, base_16, rest) - _, _, _ -> { - let token = - common_token( - character: first, - index: index, - tokenize_character_as_digit: fn(_) { True }, - whitespace_character_dict: whitespace_character_dict, - ) - - #(index + 1, token, rest, base_prefix_found) - } - } + let token = + common_token( + character: first, + index: index, + tokenize_character_as_digit: fn(_) { True }, + whitespace_character_dict: whitespace_character_dict, + ) do_tokenize_int( characters: rest, - base: base, - index: index, - base_prefix_found: base_prefix_found, + index: index + 1, whitespace_character_dict: whitespace_character_dict, acc: [token, ..acc], ) @@ -126,20 +94,6 @@ fn do_tokenize_int( } } -fn base_prefix_token_data( - index: Int, - specifier: String, - base: Int, - rest: List(String), -) -> #(Int, Token, List(String), Bool) { - let token = BasePrefix(#(index, index + 2), "0" <> specifier, base) - let new_rest = case rest { - [] -> [] - [_, ..rest] -> rest - } - #(index + 2, token, new_rest, True) -} - fn common_token( character character: String, index index: Int, diff --git a/test/data.gleam b/test/data.gleam index 20642ed..6646b56 100644 --- a/test/data.gleam +++ b/test/data.gleam @@ -16,14 +16,30 @@ pub fn integer_test_data() -> List(IntegerTestData) { |> list.flatten } +// To prevent error code 7 (argument list too long) when passing large datasets +// to the Python programs, we divide the data into smaller lists. +const test_data_chunk_size = 200 + pub fn python_processed_float_data() { let float_test_data = float_test_data() - let processed_values = python_parse.to_floats(float_test_data) + + let processed_values = + float_test_data + |> list.sized_chunk(test_data_chunk_size) + |> list.map(python_parse.to_floats) + |> list.flatten + float_test_data |> list.zip(processed_values) } pub fn python_processed_integer_data() { let integer_test_data = integer_test_data() - let processed_values = python_parse.to_ints(integer_test_data) + + let processed_values = + integer_test_data + |> list.sized_chunk(test_data_chunk_size) + |> list.map(python_parse.to_ints) + |> list.flatten + integer_test_data |> list.zip(processed_values) } diff --git a/test/data/integer/invalid_integer_data.gleam b/test/data/integer/invalid_integer_data.gleam index 65e6957..2be8505 100644 --- a/test/data/integer/invalid_integer_data.gleam +++ b/test/data/integer/invalid_integer_data.gleam @@ -682,6 +682,17 @@ fn invalid_mixed() -> List(IntegerTestData) { expected_program_output: Error(UnknownCharacter(7, ".")), python_error_function: invalid_literal_for_int_error, ), + // Base 0, has no prefix, default to decimal - should not parse with leading + // 0 because a leading 0 is considered a flag for start of base prefix + // string and we are missing the specifier. When in base 0, to parse a + // string as base 10, it must not have a leading 0, unless it is the value + // 0. + integer_test_data( + input: "01", + base: base_0, + expected_program_output: Error(UnknownCharacter(1, "1")), + python_error_function: invalid_literal_for_int_error, + ), integer_test_data( input: " \n0x ABC.", base: base_0, diff --git a/test/data/integer/valid_integer_data.gleam b/test/data/integer/valid_integer_data.gleam index 76ab59a..65f00d8 100644 --- a/test/data/integer/valid_integer_data.gleam +++ b/test/data/integer/valid_integer_data.gleam @@ -185,6 +185,16 @@ fn valid_simple_base_prefix() -> List(IntegerTestData) { expected_program_output: Ok(0xDEADBEEF), expected_python_output: Ok("3735928559"), ), + // Base 0, has no prefix, default to decimal - should parse, as it is just + // the integer 0. If we specified anything else after this, it would have to + // be a specifier, or this would be an error, as base 0 looks for a base + // prefix. + IntegerTestData( + input: "0", + base: base_0, + expected_program_output: Ok(0), + expected_python_output: Ok("0"), + ), // Base 0, has no prefix, default to decimal IntegerTestData( input: " \n6_666", @@ -248,6 +258,13 @@ fn valid_simple_base_prefix() -> List(IntegerTestData) { expected_program_output: Ok(0xDEAD_BEEF), expected_python_output: Ok("3735928559"), ), + // Has a base prefix substring at the end of the input string that isn't treated as a base prefix + IntegerTestData( + input: "0XDEAD_BEEF0b", + base: base_16, + expected_program_output: Ok(0xDEAD_BEEF0b), + expected_python_output: Ok("956397711115"), + ), ] } diff --git a/test/tokenizer_test.gleam b/test/tokenizer_test.gleam index 9fbb611..1be4c4d 100644 --- a/test/tokenizer_test.gleam +++ b/test/tokenizer_test.gleam @@ -5,12 +5,8 @@ import lenient_parse/internal/whitespace.{ carriage_return, form_feed, horizontal_tab, line_feed, space, windows_newline, } -import lenient_parse/internal/base_constants.{ - base_0, base_10, base_16, base_2, base_8, -} import lenient_parse/internal/token.{ - BasePrefix, DecimalPoint, Digit, ExponentSymbol, Sign, Underscore, Unknown, - Whitespace, + DecimalPoint, Digit, ExponentSymbol, Sign, Underscore, Unknown, Whitespace, } import lenient_parse/internal/tokenizer import startest/expect @@ -53,7 +49,7 @@ pub fn tokenize_float_test() { // right base, so we mark these as Digits. pub fn tokenize_int_base_10_test() { " \t\n\r\f\r\n+-0123456789eE._abcZ" - |> tokenizer.tokenize_int(base: base_10) + |> tokenizer.tokenize_int |> expect.to_equal([ Whitespace(#(0, 1), space), Whitespace(#(1, 2), horizontal_tab), @@ -86,7 +82,7 @@ pub fn tokenize_int_base_10_test() { pub fn tokenize_int_base_2_test() { "0102101" - |> tokenizer.tokenize_int(base: base_2) + |> tokenizer.tokenize_int |> expect.to_equal([ Digit(#(0, 1), "0", 0), Digit(#(1, 2), "1", 1), @@ -100,7 +96,7 @@ pub fn tokenize_int_base_2_test() { pub fn tokenize_int_base_16_test() { "dead_beefZ" - |> tokenizer.tokenize_int(base: base_16) + |> tokenizer.tokenize_int |> expect.to_equal([ Digit(#(0, 1), "d", 0xD), Digit(#(1, 2), "e", 0xE), @@ -117,7 +113,7 @@ pub fn tokenize_int_base_16_test() { pub fn tokenize_int_base_35_test() { "1234567890abcdefghijklmnopqrstuvwxyz" - |> tokenizer.tokenize_int(base: 35) + |> tokenizer.tokenize_int |> expect.to_equal([ Digit(#(0, 1), "1", 1), Digit(#(1, 2), "2", 2), @@ -160,7 +156,7 @@ pub fn tokenize_int_base_35_test() { pub fn tokenize_int_base_36_test() { "159az" - |> tokenizer.tokenize_int(base: 36) + |> tokenizer.tokenize_int |> expect.to_equal([ Digit(#(0, 1), "1", 1), Digit(#(1, 2), "5", 5), @@ -172,12 +168,13 @@ pub fn tokenize_int_base_36_test() { pub fn tokenize_int_with_0b_prefix_and_base_0_test() { " 0b1010b" - |> tokenizer.tokenize_int(base: base_0) + |> tokenizer.tokenize_int |> expect.to_equal([ Whitespace(#(0, 1), space), Whitespace(#(1, 2), space), Whitespace(#(2, 3), space), - BasePrefix(#(3, 5), "0b", base_2), + Digit(#(3, 4), "0", 0), + Digit(#(4, 5), "b", 11), Digit(#(5, 6), "1", 1), Digit(#(6, 7), "0", 0), Digit(#(7, 8), "1", 1), @@ -188,12 +185,13 @@ pub fn tokenize_int_with_0b_prefix_and_base_0_test() { pub fn tokenize_int_with_0o_prefix_and_base_0_test() { " 0o0123456780o" - |> tokenizer.tokenize_int(base: base_0) + |> tokenizer.tokenize_int |> expect.to_equal([ Whitespace(#(0, 1), space), Whitespace(#(1, 2), space), Whitespace(#(2, 3), space), - BasePrefix(#(3, 5), "0o", base_8), + Digit(#(3, 4), "0", 0), + Digit(#(4, 5), "o", 24), Digit(#(5, 6), "0", 0), Digit(#(6, 7), "1", 1), Digit(#(7, 8), "2", 2), @@ -210,11 +208,12 @@ pub fn tokenize_int_with_0o_prefix_and_base_0_test() { pub fn tokenize_int_with_0x_prefix_and_base_0_test() { " +0XDEAD_BEEF0x " - |> tokenizer.tokenize_int(base: base_0) + |> tokenizer.tokenize_int |> expect.to_equal([ Whitespace(#(0, 1), space), Sign(#(1, 2), "+", True), - BasePrefix(#(2, 4), "0X", base_16), + Digit(#(2, 3), "0", 0), + Digit(#(3, 4), "X", 33), Digit(#(4, 5), "D", 13), Digit(#(5, 6), "E", 14), Digit(#(6, 7), "A", 10), @@ -232,12 +231,13 @@ pub fn tokenize_int_with_0x_prefix_and_base_0_test() { pub fn tokenize_int_with_0b_prefix_and_base_2_test() { " 0b1010 a" - |> tokenizer.tokenize_int(base: base_2) + |> tokenizer.tokenize_int |> expect.to_equal([ Whitespace(#(0, 1), space), Whitespace(#(1, 2), space), Whitespace(#(2, 3), space), - BasePrefix(#(3, 5), "0b", base_2), + Digit(#(3, 4), "0", 0), + Digit(#(4, 5), "b", 11), Digit(#(5, 6), "1", 1), Digit(#(6, 7), "0", 0), Digit(#(7, 8), "1", 1), @@ -249,12 +249,13 @@ pub fn tokenize_int_with_0b_prefix_and_base_2_test() { pub fn tokenize_int_with_0o_prefix_and_base_8_test() { " 0o77 a" - |> tokenizer.tokenize_int(base: base_8) + |> tokenizer.tokenize_int |> expect.to_equal([ Whitespace(#(0, 1), space), Whitespace(#(1, 2), space), Whitespace(#(2, 3), space), - BasePrefix(#(3, 5), "0o", base_8), + Digit(#(3, 4), "0", 0), + Digit(#(4, 5), "o", 24), Digit(#(5, 6), "7", 7), Digit(#(6, 7), "7", 7), Whitespace(#(7, 8), space), @@ -264,12 +265,13 @@ pub fn tokenize_int_with_0o_prefix_and_base_8_test() { pub fn tokenize_int_with_0x_prefix_and_base_16_test() { " 0x_ABC ." - |> tokenizer.tokenize_int(base: base_16) + |> tokenizer.tokenize_int |> expect.to_equal([ Whitespace(#(0, 1), space), Whitespace(#(1, 2), space), Whitespace(#(2, 3), space), - BasePrefix(#(3, 5), "0x", base_16), + Digit(#(3, 4), "0", 0), + Digit(#(4, 5), "x", 33), Underscore(#(5, 6)), Digit(#(6, 7), "A", 10), Digit(#(7, 8), "B", 11), @@ -283,9 +285,10 @@ pub fn tokenize_int_with_0x_prefix_and_base_16_test() { pub fn tokenize_int_with_lowercase_binary_prefix_and_base_0_test() { "0b101" - |> tokenizer.tokenize_int(base: base_0) + |> tokenizer.tokenize_int |> expect.to_equal([ - BasePrefix(#(0, 2), "0b", base_2), + Digit(#(0, 1), "0", 0), + Digit(#(1, 2), "b", 11), Digit(#(2, 3), "1", 1), Digit(#(3, 4), "0", 0), Digit(#(4, 5), "1", 1), @@ -294,9 +297,10 @@ pub fn tokenize_int_with_lowercase_binary_prefix_and_base_0_test() { pub fn tokenize_int_with_uppercase_binary_prefix_and_base_0_test() { "0B101" - |> tokenizer.tokenize_int(base: base_0) + |> tokenizer.tokenize_int |> expect.to_equal([ - BasePrefix(#(0, 2), "0B", base_2), + Digit(#(0, 1), "0", 0), + Digit(#(1, 2), "B", 11), Digit(#(2, 3), "1", 1), Digit(#(3, 4), "0", 0), Digit(#(4, 5), "1", 1), @@ -305,9 +309,10 @@ pub fn tokenize_int_with_uppercase_binary_prefix_and_base_0_test() { pub fn tokenize_int_with_lowercase_octal_prefix_and_base_0_test() { "0o777" - |> tokenizer.tokenize_int(base: base_0) + |> tokenizer.tokenize_int |> expect.to_equal([ - BasePrefix(#(0, 2), "0o", base_8), + Digit(#(0, 1), "0", 0), + Digit(#(1, 2), "o", 24), Digit(#(2, 3), "7", 7), Digit(#(3, 4), "7", 7), Digit(#(4, 5), "7", 7), @@ -316,9 +321,10 @@ pub fn tokenize_int_with_lowercase_octal_prefix_and_base_0_test() { pub fn tokenize_int_with_uppercase_octal_prefix_and_base_0_test() { "0O777" - |> tokenizer.tokenize_int(base: base_0) + |> tokenizer.tokenize_int |> expect.to_equal([ - BasePrefix(#(0, 2), "0O", base_8), + Digit(#(0, 1), "0", 0), + Digit(#(1, 2), "O", 24), Digit(#(2, 3), "7", 7), Digit(#(3, 4), "7", 7), Digit(#(4, 5), "7", 7), @@ -327,9 +333,10 @@ pub fn tokenize_int_with_uppercase_octal_prefix_and_base_0_test() { pub fn tokenize_int_with_lowercase_hexadecimal_prefix_and_base_0_test() { "0xABC" - |> tokenizer.tokenize_int(base: base_0) + |> tokenizer.tokenize_int |> expect.to_equal([ - BasePrefix(#(0, 2), "0x", base_16), + Digit(#(0, 1), "0", 0), + Digit(#(1, 2), "x", 33), Digit(#(2, 3), "A", 10), Digit(#(3, 4), "B", 11), Digit(#(4, 5), "C", 12), @@ -338,9 +345,10 @@ pub fn tokenize_int_with_lowercase_hexadecimal_prefix_and_base_0_test() { pub fn tokenize_int_with_uppercase_hexadecimal_prefix_and_base_0_test() { "0XABC" - |> tokenizer.tokenize_int(base: base_0) + |> tokenizer.tokenize_int |> expect.to_equal([ - BasePrefix(#(0, 2), "0X", base_16), + Digit(#(0, 1), "0", 0), + Digit(#(1, 2), "X", 33), Digit(#(2, 3), "A", 10), Digit(#(3, 4), "B", 11), Digit(#(4, 5), "C", 12), @@ -349,7 +357,7 @@ pub fn tokenize_int_with_uppercase_hexadecimal_prefix_and_base_0_test() { pub fn tokenize_int_with_no_prefix_and_base_0_test() { " \n+1990_04_12.0e4 " - |> tokenizer.tokenize_int(base: base_0) + |> tokenizer.tokenize_int |> expect.to_equal([ Whitespace(#(0, 1), space), Whitespace(#(1, 2), space), @@ -377,9 +385,10 @@ pub fn tokenize_int_with_no_prefix_and_base_0_test() { pub fn tokenize_int_with_base_2_and_binary_prefix_test() { "0b101" - |> tokenizer.tokenize_int(base: base_2) + |> tokenizer.tokenize_int |> expect.to_equal([ - BasePrefix(#(0, 2), "0b", base_2), + Digit(#(0, 1), "0", 0), + Digit(#(1, 2), "b", 11), Digit(#(2, 3), "1", 1), Digit(#(3, 4), "0", 0), Digit(#(4, 5), "1", 1), @@ -388,9 +397,10 @@ pub fn tokenize_int_with_base_2_and_binary_prefix_test() { pub fn tokenize_int_with_base_8_and_octal_prefix_test() { "0o777" - |> tokenizer.tokenize_int(base: base_8) + |> tokenizer.tokenize_int |> expect.to_equal([ - BasePrefix(#(0, 2), "0o", base_8), + Digit(#(0, 1), "0", 0), + Digit(#(1, 2), "o", 24), Digit(#(2, 3), "7", 7), Digit(#(3, 4), "7", 7), Digit(#(4, 5), "7", 7), @@ -399,9 +409,10 @@ pub fn tokenize_int_with_base_8_and_octal_prefix_test() { pub fn tokenize_int_with_base_16_and_hexadecimal_prefix_test() { "0xABC" - |> tokenizer.tokenize_int(base: base_16) + |> tokenizer.tokenize_int |> expect.to_equal([ - BasePrefix(#(0, 2), "0x", base_16), + Digit(#(0, 1), "0", 0), + Digit(#(1, 2), "x", 33), Digit(#(2, 3), "A", 10), Digit(#(3, 4), "B", 11), Digit(#(4, 5), "C", 12), @@ -423,6 +434,6 @@ pub fn tokenize_int_with_all_whitespace_characters_test() { whitespace_character_strings |> string.join("") - |> tokenizer.tokenize_int(base: base_10) + |> tokenizer.tokenize_int |> expect.to_equal(expected_tokens) }