Skip to content

Commit

Permalink
Support scientific notation (#39)
Browse files Browse the repository at this point in the history
  • Loading branch information
JosephTLyons authored Nov 6, 2024
1 parent 3c42420 commit c328aff
Show file tree
Hide file tree
Showing 17 changed files with 871 additions and 125 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

## v1.3.0 - xxx

Breaking changes:
- Added support for scientific notation for float parsing.
- Ex: `lenient_parse.to_float("4e-3")` // Ok(0.004)
- Breaking changes:
- `GleamIntParseError` and `GleamIntParseErrorReason` have been removed.
- `InvalidCharacter` has been renamed to `UnknownCharacter`.
- A new `InvalidDigitPosition` error has been introduced.
Expand Down
111 changes: 104 additions & 7 deletions src/lenient_parse/internal/parse.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@ import gleam/bool
import gleam/int
import gleam/list
import gleam/option.{type Option, None, Some}
import gleam/order
import gleam/result
import lenient_parse/internal/token.{
type Token, DecimalPoint, Digit, Sign, Underscore, Unknown, Whitespace,
type Token, DecimalPoint, Digit, Exponent, Sign, Underscore, Unknown,
Whitespace,
}

import lenient_parse/internal/tokenizer
import parse_error.{
type ParseError, EmptyString, InvalidDecimalPosition,
type ParseError, EmptyString, InvalidDecimalPosition, InvalidExponentPosition,
InvalidUnderscorePosition, UnknownCharacter, WhitespaceOnlyString,
}

Expand All @@ -30,11 +33,44 @@ pub fn parse_float(input: String) -> Result(Float, ParseError) {
let decimal_result = parse_decimal_point(tokens, index)
use #(decimal_specified, tokens, index) <- result.try(decimal_result)

let fractional_digit_result = parse_digit(tokens, index)
let fractional_digit_result = case decimal_specified {
True -> parse_digit(tokens, index)
False -> Ok(#(None, 0, tokens, index))
}
use #(fractional_digit, fractional_length, tokens, index) <- result.try(
fractional_digit_result,
)

let exponent_symbol_result = parse_exponent_symbol(tokens, index)
use #(exponent_symbol, tokens, index) <- result.try(exponent_symbol_result)

let exponent_sign_result = case exponent_symbol {
Some(exponent_symbol) -> {
use <- bool.guard(
option.is_none(whole_digit) && option.is_none(fractional_digit),
Error(InvalidExponentPosition(exponent_symbol, index - 1)),
)
parse_sign(tokens, index)
}
None -> Ok(#(False, tokens, index))
}
use #(exponent_digit_is_positive, tokens, index) <- result.try(
exponent_sign_result,
)

// Feels a bit hacky :( - Improve this
let exponent_digit_result = case exponent_symbol {
Some(exponent_symbol) -> {
case parse_digit(tokens, index) {
Ok(#(Some(digit), digit_length, tokens, index)) ->
Ok(#(digit, digit_length, tokens, index))
_ -> Error(InvalidExponentPosition(exponent_symbol, index - 1))
}
}
None -> Ok(#(0, 1, tokens, index))
}
use #(exponent_digit, _, tokens, index) <- result.try(exponent_digit_result)

let trailing_whitespace_result = parse_whitespace(tokens, index)
use #(_, tokens, index) <- result.try(trailing_whitespace_result)

Expand All @@ -48,20 +84,26 @@ pub fn parse_float(input: String) -> Result(Float, ParseError) {
whole_digit: whole_digit,
fractional_digit: fractional_digit,
fractional_length: fractional_length,
exponent_digit_is_positive: exponent_digit_is_positive,
exponent_digit: exponent_digit,
))
Some(whole_digit), None ->
Ok(form_float(
is_positive: is_positive,
whole_digit: whole_digit,
fractional_digit: 0,
fractional_length: fractional_length,
exponent_digit_is_positive: exponent_digit_is_positive,
exponent_digit: exponent_digit,
))
None, Some(fractional_digit) ->
Ok(form_float(
is_positive: is_positive,
whole_digit: 0,
fractional_digit: fractional_digit,
fractional_length: fractional_length,
exponent_digit_is_positive: exponent_digit_is_positive,
exponent_digit: exponent_digit,
))
_, _ -> {
// TODO: This sucks - hardcoded to take care of one specific test case during the rewrite: "."
Expand Down Expand Up @@ -162,7 +204,7 @@ fn parse_sign(
[first, ..rest] -> {
case first {
Unknown(character) -> Error(UnknownCharacter(character, index))
Sign(is_positive) -> Ok(#(is_positive, rest, index + 1))
Sign(_, is_positive) -> Ok(#(is_positive, rest, index + 1))
_ -> Ok(#(True, tokens, index))
}
}
Expand All @@ -179,7 +221,23 @@ fn parse_decimal_point(
case first {
Unknown(character) -> Error(UnknownCharacter(character, index))
DecimalPoint -> Ok(#(True, rest, index + 1))
_ -> Ok(#(False, rest, index))
_ -> Ok(#(False, tokens, index))
}
}
}
}

fn parse_exponent_symbol(
tokens: List(Token),
index: Int,
) -> Result(#(Option(String), List(Token), Int), ParseError) {
case tokens {
[] -> Ok(#(None, tokens, index))
[first, ..rest] -> {
case first {
Unknown(character) -> Error(UnknownCharacter(character, index))
Exponent(exponent) -> Ok(#(Some(exponent), rest, index + 1))
_ -> Ok(#(None, tokens, index))
}
}
}
Expand Down Expand Up @@ -223,6 +281,7 @@ fn do_parse_digit(
}

case first {
Unknown(character) -> Error(UnknownCharacter(character, index))
Digit(digit) -> {
do_parse_digit(
tokens: rest,
Expand All @@ -247,7 +306,6 @@ fn do_parse_digit(
}
Whitespace(whitespace) if at_beginning ->
Error(UnknownCharacter(whitespace, index))
Unknown(character) -> Error(UnknownCharacter(character, index))
_ -> {
case digit_length > 0 {
True -> Ok(#(Some(acc), digit_length, tokens, index))
Expand All @@ -264,17 +322,26 @@ fn form_float(
whole_digit whole_digit: Int,
fractional_digit fractional_digit: Int,
fractional_length fractional_length: Int,
exponent_digit_is_positive exponent_digit_is_positive: Bool,
exponent_digit exponent_digit: Int,
) -> Float {
let whole_float = whole_digit |> int.to_float
let fractional_float =
fractional_digit
|> int.to_float
|> normalize_fractional(fractional_length)
let float_value = whole_float +. fractional_float
case is_positive {
let float_value = case is_positive {
True -> float_value
False -> float_value *. -1.0
}

let exponent_digit = case exponent_digit_is_positive {
True -> exponent_digit
False -> exponent_digit * -1
}

power(float_value, exponent_digit)
}

fn normalize_fractional(fractional: Float, fractional_length: Int) -> Float {
Expand All @@ -283,3 +350,33 @@ fn normalize_fractional(fractional: Float, fractional_length: Int) -> Float {
False -> normalize_fractional(fractional /. 10.0, fractional_length - 1)
}
}

fn power(base: Float, exponent: Int) {
do_power(
base: base,
exponent: exponent,
scale_factor: 1,
exponent_is_positive: exponent >= 0,
)
}

fn do_power(
base base: Float,
exponent exponent: Int,
scale_factor scale_factor: Int,
exponent_is_positive exponent_is_positive,
) -> Float {
case int.compare(exponent, 0) {
order.Eq -> {
let scale_factor_float = scale_factor |> int.to_float
case exponent_is_positive {
True -> base *. scale_factor_float
False -> base /. scale_factor_float
}
}
order.Gt ->
do_power(base, exponent - 1, scale_factor * 10, exponent_is_positive)
order.Lt ->
do_power(base, exponent + 1, scale_factor * 10, exponent_is_positive)
}
}
10 changes: 6 additions & 4 deletions src/lenient_parse/internal/token.gleam
Original file line number Diff line number Diff line change
@@ -1,25 +1,27 @@
import gleam/int
import parse_error.{
type ParseError, InvalidDecimalPosition, InvalidDigitPosition,
InvalidSignPosition, InvalidUnderscorePosition, UnknownCharacter,
InvalidExponentPosition, InvalidSignPosition, InvalidUnderscorePosition,
UnknownCharacter,
}

pub type Token {
Sign(Bool)
Sign(String, Bool)
Digit(Int)
Underscore
DecimalPoint
Exponent(String)
Whitespace(String)
Unknown(String)
}

pub fn to_error(token: Token, index) -> ParseError {
case token {
Sign(True) -> InvalidSignPosition("+", index)
Sign(False) -> InvalidSignPosition("-", index)
Sign(sign, _) -> InvalidSignPosition(sign, index)
Digit(digit) -> InvalidDigitPosition(digit |> int.to_string, index)
Underscore -> InvalidUnderscorePosition(index)
DecimalPoint -> InvalidDecimalPosition(index)
Exponent(exponent) -> InvalidExponentPosition(exponent, index)
Whitespace(whitespace) -> UnknownCharacter(whitespace, index)
Unknown(character) -> UnknownCharacter(character, index)
}
Expand Down
8 changes: 5 additions & 3 deletions src/lenient_parse/internal/tokenizer.gleam
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import gleam/list
import gleam/string
import lenient_parse/internal/token.{
type Token, DecimalPoint, Digit, Sign, Underscore, Unknown, Whitespace,
type Token, DecimalPoint, Digit, Exponent, Sign, Underscore, Unknown,
Whitespace,
}

pub fn tokenize(text: String) -> List(Token) {
Expand All @@ -13,8 +14,8 @@ fn do_tokenize(characters: List(String), acc: List(Token)) -> List(Token) {
[] -> acc |> list.reverse
[first, ..rest] -> {
let token = case first {
"-" -> Sign(False)
"+" -> Sign(True)
"-" -> Sign("-", False)
"+" -> Sign("+", True)
"0" -> Digit(0)
"1" -> Digit(1)
"2" -> Digit(2)
Expand All @@ -25,6 +26,7 @@ fn do_tokenize(characters: List(String), acc: List(Token)) -> List(Token) {
"7" -> Digit(7)
"8" -> Digit(8)
"9" -> Digit(9)
"e" | "E" -> Exponent(first)
"." -> DecimalPoint
"_" -> Underscore
" " | "\n" | "\t" | "\r" | "\f" | "\r\n" -> Whitespace(first)
Expand Down
30 changes: 21 additions & 9 deletions src/parse_error.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,20 @@ pub type ParseError {
/// - `index`: The position of the invalid sign in the input string.
InvalidSignPosition(character: String, index: Int)

/// Represents an error when a digit is in an invalid position within
/// the number string.
/// Represents an error when a digit is in an invalid position within the
/// number string.
///
/// - `character`: The digit character that caused the error as a `String`.
/// - `index`: The position of the invalid digit in the input string.
InvalidDigitPosition(character: String, index: Int)

/// Represents an error when an exponent character (e or E) is in an invalid
/// position within the number string.
///
/// - `character`: The exponent character that caused the error as a `String`.
/// - `index`: The position of the invalid exponent in the input string.
InvalidExponentPosition(character: String, index: Int)

/// Represents an error when an invalid character is encountered during
/// parsing.
///
Expand All @@ -45,15 +52,10 @@ pub type ParseError {
@internal
pub fn to_string(error: ParseError) -> String {
case error {
UnknownCharacter(character, index) ->
"unknown character \""
<> character
<> "\" at index: "
<> index |> int.to_string
InvalidUnderscorePosition(index) ->
"invalid underscore at position: " <> index |> int.to_string
EmptyString -> "empty string"
WhitespaceOnlyString -> "whitespace only string"
InvalidUnderscorePosition(index) ->
"invalid underscore at position: " <> index |> int.to_string
InvalidDecimalPosition(index) ->
"invalid decimal at position: " <> index |> int.to_string
InvalidSignPosition(sign, index) ->
Expand All @@ -63,5 +65,15 @@ pub fn to_string(error: ParseError) -> String {
<> digit
<> "\" at position: "
<> index |> int.to_string
InvalidExponentPosition(exponent, index) ->
"invalid exponent \""
<> exponent
<> "\" at position: "
<> index |> int.to_string
UnknownCharacter(character, index) ->
"unknown character \""
<> character
<> "\" at index: "
<> index |> int.to_string
}
}
18 changes: 18 additions & 0 deletions test/data.gleam
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import data/float/invalid_float_data
import data/float/valid_float_data
import data/integer/invalid_integer_data
import data/integer/valid_integer_data
import gleam/list
import types.{
type FloatTestData, type IntegerTestData, FloatTestData, IntegerTestData,
}

pub fn float_data() -> List(FloatTestData) {
[valid_float_data.data(), invalid_float_data.data()]
|> list.flatten
}

pub fn integer_data() -> List(IntegerTestData) {
[valid_integer_data.data(), invalid_integer_data.data()]
|> list.flatten
}
Loading

0 comments on commit c328aff

Please sign in to comment.