Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP - An alternative approach to #31 (see also #33) #34

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 160 additions & 0 deletions src/main/scala/io/circe/yaml/parser/NodeAlg.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
package io.circe.yaml.parser

import cats.data.ValidatedNel
import cats.instances.either._
import cats.instances.list._
import cats.syntax.either._
import cats.syntax.traverse._
import io.circe.{Json, JsonNumber, JsonObject, ParsingFailure}
import org.yaml.snakeyaml.constructor.SafeConstructor
import org.yaml.snakeyaml.nodes._
import scala.collection.JavaConverters._
import scala.collection.immutable.Queue

abstract class NodeAlg[T] {
def int(node: ScalarNode): T
def float(node: ScalarNode): T
def timestamp(node: ScalarNode): T
def bool(node: ScalarNode): T
def yNull(node: ScalarNode): T
def string(node: ScalarNode): T
def otherScalar(node: ScalarNode): T

def sequence(node: SequenceNode): T = fromValues {
node.getValue.asScala.foldLeft(Queue.empty[T]) {
(accum, next) => accum enqueue any(next)
}
}

def mapping(node: MappingNode): T = fromFields {
node.getValue.asScala.map {
nodeTuple => nodeTuple.getKeyNode match {
case keyNode: ScalarNode => keyNode.getValue -> any(nodeTuple.getValueNode)
case _ => throw ParsingFailure("Only string keys can be represented in JSON", null)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The null isn't new but it makes me a little uncomfortable—so far in circe I haven't needed ParsingFailures that don't wrap an underlying exception, but this seems like a reasonable thing to want in this case, so maybe that member should be an Option.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could just give it an actual exception here. I don't think you should change ParsingFailure.

}
}
}

def fromValues(ts: Iterable[T]): T
def fromFields(ts: Iterable[(String, T)]): T

final def any(node: Node): T = node match {
case node: ScalarNode => node.getTag match {
case Tag.INT => int(node)
case Tag.FLOAT => float(node)
case Tag.TIMESTAMP => timestamp(node)
case Tag.BOOL => bool(node)
case Tag.NULL => yNull(node)
case Tag.STR => string(node)
case _ => otherScalar(node)
}
case node: SequenceNode => sequence(node)
case node: MappingNode => mapping(node)
}
}

final class LiftedAlg[A](lifted: NodeAlg[A]) extends NodeAlg[Either[ParsingFailure, A]] {
private def wrap(what: String)(err: Throwable) = ParsingFailure(s"Failed to parse $what", err)
def int(node: ScalarNode): Either[ParsingFailure, A] =
Either.catchNonFatal(lifted.int(node)).leftMap(wrap("integer value"))

def float(node: ScalarNode): Either[ParsingFailure, A] =
Either.catchNonFatal(lifted.float(node)).leftMap(wrap("float value"))

def timestamp(node: ScalarNode): Either[ParsingFailure, A] =
Either.catchNonFatal(lifted.timestamp(node)).leftMap(wrap("timestamp value"))

def bool(node: ScalarNode): Either[ParsingFailure, A] =
Either.catchNonFatal(lifted.bool(node)).leftMap(wrap("boolean value"))

def yNull(node: ScalarNode): Either[ParsingFailure, A] =
Either.catchNonFatal(lifted.yNull(node)).leftMap(wrap("null value"))

def string(node: ScalarNode): Either[ParsingFailure, A] =
Either.catchNonFatal(lifted.string(node)).leftMap(wrap("string value"))

def otherScalar(node: ScalarNode): Either[ParsingFailure, A] =
Either.catchNonFatal(lifted.otherScalar(node)).leftMap(wrap("scalar value"))

override def sequence(node: SequenceNode): Either[ParsingFailure, A] =
Either.catchNonFatal(lifted.sequence(node)).leftMap(wrap("sequence"))

override def mapping(node: MappingNode): Either[ParsingFailure, A] =
Either.catchNonFatal(lifted.mapping(node)).leftMap(wrap("mapping"))

def fromValues(ts: Iterable[Either[ParsingFailure, A]]): Either[ParsingFailure, A] = try {
Either.right {
lifted.fromValues {
ts.map(_.valueOr(throw _))
}
}
} catch {
case f @ ParsingFailure(_, _) => Either.left(f)
}

def fromFields(ts: Iterable[(String, Either[ParsingFailure, A])]): Either[ParsingFailure, A] = try {
Either.right {
lifted.fromFields {
ts.map {
case (key, value) => key -> value.valueOr(throw _)
}
}
}
} catch {
case f @ ParsingFailure(_, _) => Either.left(f)
}
}

final class AccumlatingAlg[A](base: NodeAlg[A]) extends NodeAlg[ValidatedNel[ParsingFailure, A]] {
Copy link
Member

@travisbrown travisbrown Jun 10, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nitpick but is the missing u intentional?

private val lifted = new LiftedAlg(base)
def int(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.int(node).toValidatedNel
def float(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.float(node).toValidatedNel
def timestamp(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.timestamp(node).toValidatedNel
def bool(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.bool(node).toValidatedNel
def yNull(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.yNull(node).toValidatedNel
def string(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.string(node).toValidatedNel
def otherScalar(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.otherScalar(node).toValidatedNel

def fromFields(ts: Iterable[(String, ValidatedNel[ParsingFailure, A])]): ValidatedNel[ParsingFailure, A] =
ts.toList.traverseU {
case (key, value) => value.map(key -> _)
}.map(base.fromFields)

def fromValues(ts: Iterable[ValidatedNel[ParsingFailure, A]]): ValidatedNel[ParsingFailure, A] =
ts.toList.sequenceU.map(base.fromValues)
}

class DefaultAlg extends NodeAlg[Json] {
protected object Constructor extends SafeConstructor {
def flatten(node: MappingNode): Unit = flattenMapping(node)
}

final protected def number(str: String): Json = JsonNumber.fromString(str).map(Json.fromJsonNumber).getOrElse {
throw new NumberFormatException(s"Invalid numeric string $str")
}

def int(node: ScalarNode): Json = number(node.getValue)
def float(node: ScalarNode): Json = number(node.getValue)
def timestamp(node: ScalarNode): Json = Json.fromString(node.getValue)
def bool(node: ScalarNode): Json = Json.fromBoolean(node.getValue.toBoolean)
def yNull(node: ScalarNode): Json = Json.Null
def string(node: ScalarNode): Json = Json.fromString(node.getValue)
def otherScalar(node: ScalarNode): Json = if (!node.getTag.startsWith(Tag.PREFIX)) {
Json.fromJsonObject(JsonObject.singleton(node.getTag.getValue.stripPrefix("!"), Json.fromString(node.getValue)))
} else Json.fromString(node.getValue)

def fromValues(ts: Iterable[Json]): Json = Json.fromValues(ts)
def fromFields(ts: Iterable[(String, Json)]): Json = Json.fromFields(ts)
}

case class ConfiguredAlg(
numericTimestamps: Boolean
) extends DefaultAlg {
final override def timestamp(node: ScalarNode): Json = if (!numericTimestamps) {
super.timestamp(node)
} else {
val constructor = new SafeConstructor.ConstructYamlTimestamp()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some part of me doesn't really like the idea of instantiating a constructor for every single timestamp node (especially since it involves a non-trivial amount of setting up its own internal state), but it probably doesn't matter much.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because of that mutable internal state, I'm not sure what the other option is. You're not guaranteed that the algebra isn't being used from multiple threads.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately they don't expose just the timestamp parsing logic - otherwise we could bypass this allocation entirely.

Since the spec is pretty fixed, maybe it's worthwhile just duplicating their regexes and doing it internally here?

constructor.construct(node)
Json.fromLong(constructor.getCalendar.getTimeInMillis)
}
}
91 changes: 91 additions & 0 deletions src/main/scala/io/circe/yaml/parser/Parser.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package io.circe.yaml.parser


import cats.data.ValidatedNel
import cats.syntax.either._
import io.circe._
import java.io.{Reader, StringReader}
import org.yaml.snakeyaml.Yaml
import scala.collection.JavaConverters._

class Parser(algebra: NodeAlg[Json] = new DefaultAlg) {

/**
* Configure the parser
* @param numericTimestamps if true, timestamps will be returned as epoch millisecond [[Long]]s
* @return A configured parser
*/
def configured(
numericTimestamps: Boolean = false
): Parser = new Parser(ConfiguredAlg(
numericTimestamps = numericTimestamps
))


/**
* Parse YAML from the given [[Reader]], returning either [[ParsingFailure]] or [[Json]]
*/
def parse(yaml: Reader): Either[ParsingFailure, Json] = for {
parsed <- parseSingle(yaml)
json <- Either.catchNonFatal(algebra.any(parsed)).leftMap {
case p @ ParsingFailure(_, _) => p
case err => ParsingFailure(err.getMessage, err)
}
} yield json

/**
* Parse YAML from the given [[Reader]], accumulating errors and returning either a list of [[ParsingFailure]]s
* or a [[Json]]
*/
def parseAccumulating(yaml: Reader): ValidatedNel[ParsingFailure, Json] = for {
parsed <- parseSingle(yaml)
json <- new AccumlatingAlg(algebra).any(parsed)
} yield json

/**
* Parse YAML from the given string, returning either [[ParsingFailure]] or [[Json]]
*/
def parse(yaml: String): Either[ParsingFailure, Json] = parse(new StringReader(yaml))

/**
* Parse YAML from the given string, accumulating errors and returning either a list of [[ParsingFailure]]s
* or a [[Json]]
*/
def parseAccumulating(yaml: String): ValidatedNel[ParsingFailure, Json] = parseAccumulating(new StringReader(yaml))

/**
* Parse a succession of documents from the given [[Reader]], returning the result as a [[Stream]] of [[Either]]
*/
def parseDocuments(yaml: Reader): Stream[Either[ParsingFailure, Json]] = {
val alg = new LiftedAlg(algebra)
parseStream(yaml).map(alg.any)
}

/**
* Parse a succession of documents from the given [[Reader]], accumulating errors within each document and
* returning the result as a [[Stream]] of [[ValidatedNel]]
*/
def parseDocumentsAccumulating(yaml: Reader): Stream[ValidatedNel[ParsingFailure, Json]] = {
val alg = new AccumlatingAlg(algebra)
parseStream(yaml).map(alg.any)
}

/**
* Parse a succession of documents from the given string, returning the result as a [[Stream]] of [[Either]]
*/
def parseDocuments(yaml: String): Stream[Either[ParsingFailure, Json]] = parseDocuments(new StringReader(yaml))

/**
* Parse a succession of documents from the given string, accumulating errors within each document and
* returning the result as a [[Stream]] of [[ValidatedNel]]
*/
def parseDocumentsAccumulating(yaml: String): Stream[ValidatedNel[ParsingFailure, Json]] =
parseDocumentsAccumulating(new StringReader(yaml))

private[this] def parseSingle(reader: Reader) =
Either.catchNonFatal(new Yaml().compose(reader)).leftMap(err => ParsingFailure(err.getMessage, err))

private[this] def parseStream(reader: Reader) =
new Yaml().composeAll(reader).asScala.toStream

}
93 changes: 1 addition & 92 deletions src/main/scala/io/circe/yaml/parser/package.scala
Original file line number Diff line number Diff line change
@@ -1,94 +1,3 @@
package io.circe.yaml

import cats.syntax.either._
import io.circe._
import java.io.{Reader, StringReader}
import org.yaml.snakeyaml.Yaml
import org.yaml.snakeyaml.constructor.SafeConstructor
import org.yaml.snakeyaml.nodes._
import scala.collection.JavaConverters._

package object parser {


/**
* Parse YAML from the given [[Reader]], returning either [[ParsingFailure]] or [[Json]]
* @param yaml
* @return
*/
def parse(yaml: Reader): Either[ParsingFailure, Json] = for {
parsed <- parseSingle(yaml)
json <- yamlToJson(parsed)
} yield json

def parse(yaml: String): Either[ParsingFailure, Json] = parse(new StringReader(yaml))

def parseDocuments(yaml: Reader): Stream[Either[ParsingFailure, Json]] = parseStream(yaml).map(yamlToJson)
def parseDocuments(yaml: String): Stream[Either[ParsingFailure, Json]] = parseDocuments(new StringReader(yaml))

private[this] def parseSingle(reader: Reader) =
Either.catchNonFatal(new Yaml().compose(reader)).leftMap(err => ParsingFailure(err.getMessage, err))

private[this] def parseStream(reader: Reader) =
new Yaml().composeAll(reader).asScala.toStream

private[this] object CustomTag {
def unapply(tag: Tag): Option[String] = if (!tag.startsWith(Tag.PREFIX))
Some(tag.getValue)
else
None
}

private[this] class FlatteningConstructor extends SafeConstructor {
def flatten(node: MappingNode): MappingNode = {
flattenMapping(node)
node
}
}

private[this] val flattener: FlatteningConstructor = new FlatteningConstructor

private[this] def yamlToJson(node: Node): Either[ParsingFailure, Json] = {

def convertScalarNode(node: ScalarNode) = Either.catchNonFatal(node.getTag match {
case Tag.INT | Tag.FLOAT => JsonNumber.fromString(node.getValue).map(Json.fromJsonNumber).getOrElse {
throw new NumberFormatException(s"Invalid numeric string ${node.getValue}")
}
case Tag.BOOL => Json.fromBoolean(node.getValue.toBoolean)
case Tag.NULL => Json.Null
case CustomTag(other) =>
Json.fromJsonObject(JsonObject.singleton(other.stripPrefix("!"), Json.fromString(node.getValue)))
case other => Json.fromString(node.getValue)
}).leftMap {
err =>
ParsingFailure(err.getMessage, err)
}

def convertKeyNode(node: Node) = node match {
case scalar: ScalarNode => Right(scalar.getValue)
case _ => Left(ParsingFailure("Only string keys can be represented in JSON", null))
}

node match {
case mapping: MappingNode =>
flattener.flatten(mapping).getValue.asScala.foldLeft(
Either.right[ParsingFailure, JsonObject](JsonObject.empty)
) {
(objEither, tup) => for {
obj <- objEither
key <- convertKeyNode(tup.getKeyNode)
value <- yamlToJson(tup.getValueNode)
} yield obj.add(key, value)
}.map(Json.fromJsonObject)
case sequence: SequenceNode =>
sequence.getValue.asScala.foldLeft(Either.right[ParsingFailure, List[Json]](List.empty[Json])) {
(arrEither, node) => for {
arr <- arrEither
value <- yamlToJson(node)
} yield value :: arr
}.map(arr => Json.fromValues(arr.reverse))
case scalar: ScalarNode => convertScalarNode(scalar)
}
}

}
package object parser extends Parser