-
Notifications
You must be signed in to change notification settings - Fork 50
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
WIP - An alternative approach to #31 (see also #33) #34
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
package io.circe.yaml.parser | ||
|
||
import cats.data.ValidatedNel | ||
import cats.instances.list._ | ||
import cats.syntax.either._ | ||
import cats.syntax.traverse._ | ||
import io.circe.{Json, JsonNumber, JsonObject, ParsingFailure} | ||
import org.yaml.snakeyaml.constructor.SafeConstructor | ||
import org.yaml.snakeyaml.nodes._ | ||
import scala.collection.JavaConverters._ | ||
import scala.collection.immutable.Queue | ||
|
||
abstract class NodeAlg[T] { | ||
def int(node: ScalarNode): T | ||
def float(node: ScalarNode): T | ||
def timestamp(node: ScalarNode): T | ||
def bool(node: ScalarNode): T | ||
def yNull(node: ScalarNode): T | ||
def string(node: ScalarNode): T | ||
def otherScalar(node: ScalarNode): T | ||
|
||
def sequence(node: SequenceNode): T = fromValues { | ||
node.getValue.asScala.foldLeft(Queue.empty[T]) { | ||
(accum, next) => accum enqueue any(next) | ||
} | ||
} | ||
|
||
def mapping(node: MappingNode): T = fromFields { | ||
node.getValue.asScala.map { | ||
nodeTuple => nodeTuple.getKeyNode match { | ||
case keyNode: ScalarNode => keyNode.getValue -> any(nodeTuple.getValueNode) | ||
case _ => throw ParsingFailure("Only string keys can be represented in JSON", null) | ||
} | ||
} | ||
} | ||
|
||
def fromValues(ts: Iterable[T]): T | ||
def fromFields(ts: Iterable[(String, T)]): T | ||
|
||
final def any(node: Node): T = node match { | ||
case node: ScalarNode => node.getTag match { | ||
case Tag.INT => int(node) | ||
case Tag.FLOAT => float(node) | ||
case Tag.TIMESTAMP => timestamp(node) | ||
case Tag.BOOL => bool(node) | ||
case Tag.NULL => yNull(node) | ||
case Tag.STR => string(node) | ||
case _ => otherScalar(node) | ||
} | ||
case node: SequenceNode => sequence(node) | ||
case node: MappingNode => mapping(node) | ||
} | ||
} | ||
|
||
final class LiftedAlg[A](lifted: NodeAlg[A]) extends NodeAlg[Either[ParsingFailure, A]] { | ||
private def wrap(what: String)(err: Throwable) = ParsingFailure(s"Failed to parse $what", err) | ||
def int(node: ScalarNode): Either[ParsingFailure, A] = | ||
Either.catchNonFatal(lifted.int(node)).leftMap(wrap("integer value")) | ||
|
||
def float(node: ScalarNode): Either[ParsingFailure, A] = | ||
Either.catchNonFatal(lifted.float(node)).leftMap(wrap("float value")) | ||
|
||
def timestamp(node: ScalarNode): Either[ParsingFailure, A] = | ||
Either.catchNonFatal(lifted.timestamp(node)).leftMap(wrap("timestamp value")) | ||
|
||
def bool(node: ScalarNode): Either[ParsingFailure, A] = | ||
Either.catchNonFatal(lifted.bool(node)).leftMap(wrap("boolean value")) | ||
|
||
def yNull(node: ScalarNode): Either[ParsingFailure, A] = | ||
Either.catchNonFatal(lifted.yNull(node)).leftMap(wrap("null value")) | ||
|
||
def string(node: ScalarNode): Either[ParsingFailure, A] = | ||
Either.catchNonFatal(lifted.string(node)).leftMap(wrap("string value")) | ||
|
||
def otherScalar(node: ScalarNode): Either[ParsingFailure, A] = | ||
Either.catchNonFatal(lifted.otherScalar(node)).leftMap(wrap("scalar value")) | ||
|
||
override def sequence(node: SequenceNode): Either[ParsingFailure, A] = | ||
Either.catchNonFatal(lifted.sequence(node)).leftMap(wrap("sequence")) | ||
|
||
override def mapping(node: MappingNode): Either[ParsingFailure, A] = | ||
Either.catchNonFatal(lifted.mapping(node)).leftMap(wrap("mapping")) | ||
|
||
def fromValues(ts: Iterable[Either[ParsingFailure, A]]): Either[ParsingFailure, A] = try { | ||
Either.right { | ||
lifted.fromValues { | ||
ts.map(_.valueOr(throw _)) | ||
} | ||
} | ||
} catch { | ||
case f @ ParsingFailure(_, _) => Either.left(f) | ||
} | ||
|
||
def fromFields(ts: Iterable[(String, Either[ParsingFailure, A])]): Either[ParsingFailure, A] = try { | ||
Either.right { | ||
lifted.fromFields { | ||
ts.map { | ||
case (key, value) => key -> value.valueOr(throw _) | ||
} | ||
} | ||
} | ||
} catch { | ||
case f @ ParsingFailure(_, _) => Either.left(f) | ||
} | ||
} | ||
|
||
final class AccumulatingAlg[A](base: NodeAlg[A]) extends NodeAlg[ValidatedNel[ParsingFailure, A]] { | ||
private val lifted = new LiftedAlg(base) | ||
def int(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.int(node).toValidatedNel | ||
def float(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.float(node).toValidatedNel | ||
def timestamp(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.timestamp(node).toValidatedNel | ||
def bool(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.bool(node).toValidatedNel | ||
def yNull(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.yNull(node).toValidatedNel | ||
def string(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.string(node).toValidatedNel | ||
def otherScalar(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.otherScalar(node).toValidatedNel | ||
|
||
def fromFields(ts: Iterable[(String, ValidatedNel[ParsingFailure, A])]): ValidatedNel[ParsingFailure, A] = | ||
ts.toList.traverseU { | ||
case (key, value) => value.map(key -> _) | ||
}.map(base.fromFields) | ||
|
||
def fromValues(ts: Iterable[ValidatedNel[ParsingFailure, A]]): ValidatedNel[ParsingFailure, A] = | ||
ts.toList.sequenceU.map(base.fromValues) | ||
} | ||
|
||
class DefaultAlg extends NodeAlg[Json] { | ||
protected object Constructor extends SafeConstructor { | ||
def flatten(node: MappingNode): Unit = flattenMapping(node) | ||
} | ||
|
||
final protected def number(str: String): Json = JsonNumber.fromString(str).map(Json.fromJsonNumber).getOrElse { | ||
throw new NumberFormatException(s"Invalid numeric string $str") | ||
} | ||
|
||
def int(node: ScalarNode): Json = number(node.getValue) | ||
def float(node: ScalarNode): Json = number(node.getValue) | ||
def timestamp(node: ScalarNode): Json = Json.fromString(node.getValue) | ||
def bool(node: ScalarNode): Json = Json.fromBoolean(node.getValue.toBoolean) | ||
def yNull(node: ScalarNode): Json = Json.Null | ||
def string(node: ScalarNode): Json = Json.fromString(node.getValue) | ||
def otherScalar(node: ScalarNode): Json = if (!node.getTag.startsWith(Tag.PREFIX)) { | ||
Json.fromJsonObject(JsonObject.singleton(node.getTag.getValue.stripPrefix("!"), Json.fromString(node.getValue))) | ||
} else Json.fromString(node.getValue) | ||
|
||
def fromValues(ts: Iterable[Json]): Json = Json.fromValues(ts) | ||
def fromFields(ts: Iterable[(String, Json)]): Json = Json.fromFields(ts) | ||
|
||
override def mapping(node: MappingNode): Json = { | ||
Constructor.flatten(node) | ||
super.mapping(node) | ||
} | ||
} | ||
|
||
case class ConfiguredAlg( | ||
numericTimestamps: Boolean | ||
) extends DefaultAlg { | ||
final override def timestamp(node: ScalarNode): Json = if (!numericTimestamps) { | ||
super.timestamp(node) | ||
} else { | ||
val constructor = new SafeConstructor.ConstructYamlTimestamp() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some part of me doesn't really like the idea of instantiating a constructor for every single timestamp node (especially since it involves a non-trivial amount of setting up its own internal state), but it probably doesn't matter much. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because of that mutable internal state, I'm not sure what the other option is. You're not guaranteed that the algebra isn't being used from multiple threads. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately they don't expose just the timestamp parsing logic - otherwise we could bypass this allocation entirely. Since the spec is pretty fixed, maybe it's worthwhile just duplicating their regexes and doing it internally here? |
||
constructor.construct(node) | ||
Json.fromLong(constructor.getCalendar.getTimeInMillis) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
package io.circe.yaml.parser | ||
|
||
|
||
import cats.data.ValidatedNel | ||
import cats.syntax.either._ | ||
import io.circe._ | ||
import java.io.{Reader, StringReader} | ||
import org.yaml.snakeyaml.Yaml | ||
import scala.collection.JavaConverters._ | ||
|
||
class Parser(algebra: NodeAlg[Json] = new DefaultAlg) { | ||
|
||
/** | ||
* Configure the parser | ||
* @param numericTimestamps if true, timestamps will be returned as epoch millisecond [[Long]]s | ||
* @return A configured parser | ||
*/ | ||
def configured( | ||
numericTimestamps: Boolean = false | ||
): Parser = new Parser(ConfiguredAlg( | ||
numericTimestamps = numericTimestamps | ||
)) | ||
|
||
|
||
/** | ||
* Parse YAML from the given [[Reader]], returning either [[ParsingFailure]] or [[Json]] | ||
*/ | ||
def parse(yaml: Reader): Either[ParsingFailure, Json] = for { | ||
parsed <- parseSingle(yaml) | ||
json <- Either.catchNonFatal(algebra.any(parsed)).leftMap { | ||
case p @ ParsingFailure(_, _) => p | ||
case err => ParsingFailure(err.getMessage, err) | ||
} | ||
} yield json | ||
|
||
/** | ||
* Parse YAML from the given [[Reader]], accumulating errors and returning either a list of [[ParsingFailure]]s | ||
* or a [[Json]] | ||
*/ | ||
def parseAccumulating(yaml: Reader): ValidatedNel[ParsingFailure, Json] = parseSingle(yaml).toValidatedNel andThen { | ||
parsed => new AccumulatingAlg(algebra).any(parsed) | ||
} | ||
|
||
/** | ||
* Parse YAML from the given string, returning either [[ParsingFailure]] or [[Json]] | ||
*/ | ||
def parse(yaml: String): Either[ParsingFailure, Json] = parse(new StringReader(yaml)) | ||
|
||
/** | ||
* Parse YAML from the given string, accumulating errors and returning either a list of [[ParsingFailure]]s | ||
* or a [[Json]] | ||
*/ | ||
def parseAccumulating(yaml: String): ValidatedNel[ParsingFailure, Json] = parseAccumulating(new StringReader(yaml)) | ||
|
||
/** | ||
* Parse a succession of documents from the given [[Reader]], returning the result as a [[Stream]] of [[Either]] | ||
*/ | ||
def parseDocuments(yaml: Reader): Stream[Either[ParsingFailure, Json]] = { | ||
val alg = new LiftedAlg(algebra) | ||
parseStream(yaml).map(alg.any) | ||
} | ||
|
||
/** | ||
* Parse a succession of documents from the given [[Reader]], accumulating errors within each document and | ||
* returning the result as a [[Stream]] of [[ValidatedNel]] | ||
*/ | ||
def parseDocumentsAccumulating(yaml: Reader): Stream[ValidatedNel[ParsingFailure, Json]] = { | ||
val alg = new AccumulatingAlg(algebra) | ||
parseStream(yaml).map(alg.any) | ||
} | ||
|
||
/** | ||
* Parse a succession of documents from the given string, returning the result as a [[Stream]] of [[Either]] | ||
*/ | ||
def parseDocuments(yaml: String): Stream[Either[ParsingFailure, Json]] = parseDocuments(new StringReader(yaml)) | ||
|
||
/** | ||
* Parse a succession of documents from the given string, accumulating errors within each document and | ||
* returning the result as a [[Stream]] of [[ValidatedNel]] | ||
*/ | ||
def parseDocumentsAccumulating(yaml: String): Stream[ValidatedNel[ParsingFailure, Json]] = | ||
parseDocumentsAccumulating(new StringReader(yaml)) | ||
|
||
private[this] def parseSingle(reader: Reader) = | ||
Either.catchNonFatal(new Yaml().compose(reader)).leftMap(err => ParsingFailure(err.getMessage, err)) | ||
|
||
private[this] def parseStream(reader: Reader) = | ||
new Yaml().composeAll(reader).asScala.toStream | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,94 +1,3 @@ | ||
package io.circe.yaml | ||
|
||
import cats.syntax.either._ | ||
import io.circe._ | ||
import java.io.{Reader, StringReader} | ||
import org.yaml.snakeyaml.Yaml | ||
import org.yaml.snakeyaml.constructor.SafeConstructor | ||
import org.yaml.snakeyaml.nodes._ | ||
import scala.collection.JavaConverters._ | ||
|
||
package object parser { | ||
|
||
|
||
/** | ||
* Parse YAML from the given [[Reader]], returning either [[ParsingFailure]] or [[Json]] | ||
* @param yaml | ||
* @return | ||
*/ | ||
def parse(yaml: Reader): Either[ParsingFailure, Json] = for { | ||
parsed <- parseSingle(yaml) | ||
json <- yamlToJson(parsed) | ||
} yield json | ||
|
||
def parse(yaml: String): Either[ParsingFailure, Json] = parse(new StringReader(yaml)) | ||
|
||
def parseDocuments(yaml: Reader): Stream[Either[ParsingFailure, Json]] = parseStream(yaml).map(yamlToJson) | ||
def parseDocuments(yaml: String): Stream[Either[ParsingFailure, Json]] = parseDocuments(new StringReader(yaml)) | ||
|
||
private[this] def parseSingle(reader: Reader) = | ||
Either.catchNonFatal(new Yaml().compose(reader)).leftMap(err => ParsingFailure(err.getMessage, err)) | ||
|
||
private[this] def parseStream(reader: Reader) = | ||
new Yaml().composeAll(reader).asScala.toStream | ||
|
||
private[this] object CustomTag { | ||
def unapply(tag: Tag): Option[String] = if (!tag.startsWith(Tag.PREFIX)) | ||
Some(tag.getValue) | ||
else | ||
None | ||
} | ||
|
||
private[this] class FlatteningConstructor extends SafeConstructor { | ||
def flatten(node: MappingNode): MappingNode = { | ||
flattenMapping(node) | ||
node | ||
} | ||
} | ||
|
||
private[this] val flattener: FlatteningConstructor = new FlatteningConstructor | ||
|
||
private[this] def yamlToJson(node: Node): Either[ParsingFailure, Json] = { | ||
|
||
def convertScalarNode(node: ScalarNode) = Either.catchNonFatal(node.getTag match { | ||
case Tag.INT | Tag.FLOAT => JsonNumber.fromString(node.getValue).map(Json.fromJsonNumber).getOrElse { | ||
throw new NumberFormatException(s"Invalid numeric string ${node.getValue}") | ||
} | ||
case Tag.BOOL => Json.fromBoolean(node.getValue.toBoolean) | ||
case Tag.NULL => Json.Null | ||
case CustomTag(other) => | ||
Json.fromJsonObject(JsonObject.singleton(other.stripPrefix("!"), Json.fromString(node.getValue))) | ||
case other => Json.fromString(node.getValue) | ||
}).leftMap { | ||
err => | ||
ParsingFailure(err.getMessage, err) | ||
} | ||
|
||
def convertKeyNode(node: Node) = node match { | ||
case scalar: ScalarNode => Right(scalar.getValue) | ||
case _ => Left(ParsingFailure("Only string keys can be represented in JSON", null)) | ||
} | ||
|
||
node match { | ||
case mapping: MappingNode => | ||
flattener.flatten(mapping).getValue.asScala.foldLeft( | ||
Either.right[ParsingFailure, JsonObject](JsonObject.empty) | ||
) { | ||
(objEither, tup) => for { | ||
obj <- objEither | ||
key <- convertKeyNode(tup.getKeyNode) | ||
value <- yamlToJson(tup.getValueNode) | ||
} yield obj.add(key, value) | ||
}.map(Json.fromJsonObject) | ||
case sequence: SequenceNode => | ||
sequence.getValue.asScala.foldLeft(Either.right[ParsingFailure, List[Json]](List.empty[Json])) { | ||
(arrEither, node) => for { | ||
arr <- arrEither | ||
value <- yamlToJson(node) | ||
} yield value :: arr | ||
}.map(arr => Json.fromValues(arr.reverse)) | ||
case scalar: ScalarNode => convertScalarNode(scalar) | ||
} | ||
} | ||
|
||
} | ||
package object parser extends Parser |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
package io.circe.yaml | ||
|
||
import io.circe.Json | ||
import java.text.SimpleDateFormat | ||
import java.util.Calendar | ||
import org.scalacheck.Gen | ||
import org.scalatest.{FlatSpec, Matchers} | ||
import org.scalatest.prop.GeneratorDrivenPropertyChecks | ||
|
||
class ConfiguredParserTests extends FlatSpec with Matchers with GeneratorDrivenPropertyChecks { | ||
|
||
"ConfiguredParser" should "parse timestamps as longs" in forAll(Gen.calendar) { cal => | ||
val dateStr = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss.SX").format(cal.getTime) | ||
whenever(cal.get(Calendar.YEAR) <= 9999 && cal.get(Calendar.YEAR) >= -9999 ) { | ||
parser.configured(numericTimestamps = true).parse( | ||
s""" | ||
|timestamp: !!timestamp $dateStr | ||
""".stripMargin | ||
) shouldEqual Right(Json.obj("timestamp" -> Json.fromLong(cal.getTimeInMillis))) | ||
} | ||
} | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
null
isn't new but it makes me a little uncomfortable—so far in circe I haven't neededParsingFailure
s that don't wrap an underlying exception, but this seems like a reasonable thing to want in this case, so maybe that member should be anOption
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I could just give it an actual exception here. I don't think you should change
ParsingFailure
.