Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
JosephTLyons committed Dec 7, 2024
1 parent c7546ae commit e5367f2
Show file tree
Hide file tree
Showing 9 changed files with 142 additions and 14 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Changelog

## v1.3.6 - 2024-12-XX

- Handle values too large for JavaScript runtime
- TODO: New error(s) introduced

## v1.3.5 - 2024-11-20

- Fixed a bug where base prefix substrings would be recognized as such later in the string.
Expand Down
2 changes: 1 addition & 1 deletion src/lenient_parse.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@ pub fn to_int_with_base(

text
|> tokenizer.tokenize_int
|> parser.parse_int_tokens(base:)
|> parser.parse_int_tokens(base)
}
63 changes: 60 additions & 3 deletions src/lenient_parse/internal/build.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@ import bigi
import gleam/bool
import gleam/deque.{type Deque}
import gleam/int
import gleam/list
import gleam/string
import lenient_parse/internal/base_constants.{base_10}
import lenient_parse/internal/convert
import lenient_parse/internal/pilkku/pilkku
import lenient_parse/internal/scale
import parse_error.{type ParseError, NotASafeInteger}

pub fn float_value(
is_positive is_positive: Bool,
Expand All @@ -19,14 +22,17 @@ pub fn float_value(
let #(digits, _) = scale.deques(whole_digits, fractional_digits, exponent)

// `bigi.undigits` documentation says it can fail if:
// - the base is less than 2: We are hardcoding base 10, so this doesn't apply
// - the base is less than 2: We are hardcoding base 10, so this doesn't
// apply.
// - if the digits are out of range for the given base: For float parsing, the
// tokenizer has already marked these digits as `Unknown` tokens and the
// parser has already raised an error. Therefore, the error case here should
// be unreachable. We do not want to `let assert Ok()`, just in case there
// is some bug in the prior code. Using the fallback will result in some
// precision loss, but it is better than crashing.
let float_value = case digits |> deque.to_list |> bigi.undigits(base_10) {
// precision loss, but it is better than crashing. We may want to raise an
// actual error in the future.
let digits_list = digits |> deque.to_list
let float_value = case digits_list |> bigi.undigits(base_10) {
Ok(coefficient) -> {
let sign =
case is_positive {
Expand All @@ -46,6 +52,7 @@ pub fn float_value(
Ok(float_value) if float_value == 0.0 && !is_positive -> Ok(-0.0)
Ok(float_value) -> Ok(float_value)
// TODO: Add tests and return an error for this case
// TODO: Add error
Error(_) -> Error(Nil)
}
}
Expand All @@ -66,3 +73,53 @@ pub fn float_value(
}
}
}

pub fn integer_value(
digits digits: Deque(Int),
base base: Int,
is_positive is_positive: Bool,
) -> Result(Int, ParseError) {
// `bigi.undigits` documentation says it can fail if:
// - the base is less than 2: We've already ensured that the user has picked
// a base >= 2 and <= 36, so this doesn't apply.
// - if the digits are out of range for the given base: For integer parsing,
// the tokenizer has already marked these digits as `Unknown` tokens and the
// parser has already raised an error. Therefore, the error case here should
// be unreachable. We do not want to `let assert Ok()`, just in case there
// is some bug in the prior code. If the fallback is hit, issues may arise
// on JavaScript. We may want to raise an actual error in the future.
let digits_list = digits |> deque.to_list
case digits_list |> bigi.undigits(base) {
Ok(big_int) ->
case big_int |> bigi.to_int {
Ok(value) -> {
let value = case is_positive {
True -> value
False -> -value
}
Ok(value)
}
Error(_) -> {
let value_string =
digits_list |> list.map(int.to_string) |> string.join("")

let value_string = case is_positive {
True -> value_string
False -> "-" <> value_string
}
Error(NotASafeInteger(value_string))
}
}
Error(_) -> {
let value = digits |> convert.digits_to_int_with_base(base)
let value = case is_positive {
True -> value
False -> -value
}
Ok(value)
}
}
}
// TODO: For float, test limits and raise error
// TODO: Test erlang before and after negative safe integer check
// TODO: Test javascript before and after invalid base value check
9 changes: 2 additions & 7 deletions src/lenient_parse/internal/parser.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import lenient_parse/internal/base_constants.{
base_0, base_10, base_16, base_2, base_8,
}
import lenient_parse/internal/build
import lenient_parse/internal/convert.{digits_to_int, digits_to_int_with_base}
import lenient_parse/internal/convert.{digits_to_int}
import lenient_parse/internal/token.{
type Token, DecimalPoint, Digit, ExponentSymbol, Sign, Underscore, Unknown,
Whitespace,
Expand Down Expand Up @@ -173,12 +173,7 @@ pub fn parse_int_tokens(
Error(BasePrefixOnly(index_range, prefix))
Some(_), _, True -> Error(WhitespaceOnlyString)
_, _, _ -> {
let value = digits |> digits_to_int_with_base(base:)
let value = case is_positive {
True -> value
False -> -value
}
Ok(value)
build.integer_value(digits:, base:, is_positive:)
}
}
}
Expand Down
10 changes: 10 additions & 0 deletions src/parse_error.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,14 @@ pub type ParseError {
/// - `base`: The invalid base as an `Int`. The base must be between 2 and 36
/// inclusive.
InvalidBaseValue(base: Int)

/// Represents an error when the parsed number is outside the safe integer
/// range when ran on the JavaScript target.
///
/// For more information on safe integers and their range, see:
/// - [MDN: Number.MAX_SAFE_INTEGER](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/MAX_SAFE_INTEGER)
/// - [MDN: Number.MIN_SAFE_INTEGER](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/MIN_SAFE_INTEGER)
NotASafeInteger(value: String)
}
// TODO: An error for when the float is too big and cannot be represented on (JavaScript? Both targets?)
// Or should the error be more generalized to not specify the target?
1 change: 1 addition & 0 deletions test/build_test.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,4 @@ pub fn build_float_explicit_0_whole_test() {
)
|> expect.to_equal(0.1)
}
// TODO: Built int tests
56 changes: 56 additions & 0 deletions test/data/integer/limit_test.gleam
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import lenient_parse
@target(javascript)
import parse_error.{NotASafeInteger}
import startest/expect

const min_safe_integer = "-9007199254740991"

const min_safe_integer_minus_one = "-9007199254740992"

const max_safe_integer = "9007199254740991"

const max_safe_integer_plus_one = "9007199254740992"

@target(erlang)
pub fn erlang_min_safe_integer_test() {
min_safe_integer
|> lenient_parse.to_int
|> expect.to_equal(Ok(-9_007_199_254_740_991))

min_safe_integer_minus_one
|> lenient_parse.to_int
|> expect.to_equal(Ok(-9_007_199_254_740_992))
}

@target(erlang)
pub fn erlang_max_safe_integer_test() {
max_safe_integer
|> lenient_parse.to_int
|> expect.to_equal(Ok(9_007_199_254_740_991))

max_safe_integer_plus_one
|> lenient_parse.to_int
|> expect.to_equal(Ok(9_007_199_254_740_992))
}

@target(javascript)
pub fn javascript_min_safe_integer_test() {
min_safe_integer
|> lenient_parse.to_int
|> expect.to_equal(Ok(-9_007_199_254_740_991))

min_safe_integer_minus_one
|> lenient_parse.to_int
|> expect.to_equal(Error(NotASafeInteger(min_safe_integer_minus_one)))
}

@target(javascript)
pub fn javascript_max_safe_integer_test() {
max_safe_integer
|> lenient_parse.to_int
|> expect.to_equal(Ok(9_007_199_254_740_991))

max_safe_integer_plus_one
|> lenient_parse.to_int
|> expect.to_equal(Error(NotASafeInteger(max_safe_integer_plus_one)))
}
8 changes: 6 additions & 2 deletions test/helpers.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ import lenient_parse/internal/whitespace.{type WhitespaceData}
import parse_error.{
type ParseError, BasePrefixOnly, EmptyString, InvalidBaseValue,
InvalidDecimalPosition, InvalidDigitPosition, InvalidExponentSymbolPosition,
InvalidSignPosition, InvalidUnderscorePosition, OutOfBaseRange,
UnknownCharacter, WhitespaceOnlyString,
InvalidSignPosition, InvalidUnderscorePosition, NotASafeInteger,
OutOfBaseRange, UnknownCharacter, WhitespaceOnlyString,
}

pub fn to_printable_text(text: String) -> String {
Expand Down Expand Up @@ -84,5 +84,9 @@ pub fn error_to_string(error: ParseError) -> String {
<> "\" at index: "
<> index |> int.to_string
InvalidBaseValue(base) -> "invalid base value: " <> base |> int.to_string
NotASafeInteger(value) ->
"integer value \""
<> value
<> "\" cannot safely be represented on the JavaScript target"
}
}
2 changes: 1 addition & 1 deletion test/to_int_parse_test.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ pub fn to_int_tests() {
use <- it(message)

input
|> lenient_parse.to_int_with_base(base:)
|> lenient_parse.to_int_with_base(base)
|> expect.to_equal(expected_program_output)
}),
)
Expand Down

0 comments on commit e5367f2

Please sign in to comment.