Skip to content

Commit

Permalink
Support parsing and producing XML from flyweights
Browse files Browse the repository at this point in the history
  • Loading branch information
rdaum committed Dec 8, 2024
1 parent 338a39a commit c9591f1
Show file tree
Hide file tree
Showing 7 changed files with 306 additions and 12 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ strum = { version = "0.26", features = ["derive"] }
text-diff = "0.4"
ustr = "1.0"
uuid = { version = "1.11", features = ["v4"] }
xml-rs = "0.8.24"

## Required for MOO builtins.
chrono-tz = "0.10"
Expand Down
4 changes: 2 additions & 2 deletions crates/common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ pub use encode::{
pub use var::{
v_bool, v_empty_list, v_empty_map, v_empty_str, v_err, v_float, v_flyweight, v_int, v_list,
v_list_iter, v_map, v_map_iter, v_none, v_obj, v_objid, v_str, v_string, Associative,
ErrorPack, IndexMode, List, Map, Sequence, Str, Var, Variant, AMBIGUOUS, FAILED_MATCH, NOTHING,
SYSTEM_OBJECT,
ErrorPack, Flyweight, IndexMode, List, Map, Sequence, Str, Var, Variant, AMBIGUOUS,
FAILED_MATCH, NOTHING, SYSTEM_OBJECT,
};
pub use var::{Error, Obj, Symbol, VarType};

Expand Down
16 changes: 15 additions & 1 deletion crates/compiler/src/builtins.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use bincode::{Decode, Encode};
use lazy_static::lazy_static;
use moor_values::Symbol;
use moor_values::VarType;
use moor_values::VarType::TYPE_MAP;
use moor_values::VarType::{TYPE_FLYWEIGHT, TYPE_MAP};
/// Global registry of built-in function names.
use std::collections::HashMap;
use ArgCount::{Q, U};
Expand Down Expand Up @@ -977,6 +977,20 @@ fn mk_builtin_table() -> Vec<Builtin> {
types: vec![Typed(TYPE_MAP), Any],
implemented: true,
},
Builtin {
name: Symbol::mk("xml_parse"),
min_args: Q(1),
max_args: Q(2),
types: vec![Typed(TYPE_STR), Typed(TYPE_MAP)],
implemented: true,
},
Builtin {
name: Symbol::mk("to_xml"),
min_args: Q(1),
max_args: Q(2),
types: vec![Typed(TYPE_FLYWEIGHT), Typed(TYPE_MAP)],
implemented: true,
},
]
}

Expand Down
1 change: 1 addition & 0 deletions crates/kernel/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ md-5.workspace = true
onig.workspace = true
pwhash.workspace = true
rand.workspace = true
xml-rs.workspace = true

## Error declaration/ handling
thiserror.workspace = true
Expand Down
287 changes: 279 additions & 8 deletions crates/kernel/src/builtins/bf_values.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,23 @@
// this program. If not, see <https://www.gnu.org/licenses/>.
//

use md5::Digest;
use moor_compiler::{offset_for_builtin, to_literal};
use moor_values::Error::{E_ARGS, E_INVARG, E_RANGE, E_TYPE};
use moor_values::Variant;
use moor_values::{v_bool, v_float, v_int, v_objid, v_str};
use moor_values::{AsByteBuffer, Sequence};

use crate::bf_declare;
use crate::builtins::BfRet::Ret;
use crate::builtins::{world_state_bf_err, BfCallState, BfErr, BfRet, BuiltinFunction};
use moor_values::Associative;
use md5::Digest;
use moor_compiler::{offset_for_builtin, to_literal};
use moor_values::model::WorldState;
use moor_values::Error::{E_ARGS, E_INVARG, E_INVIND, E_PERM, E_RANGE, E_TYPE};
use moor_values::{
v_bool, v_float, v_int, v_list, v_obj, v_objid, v_str, v_string, Flyweight, List, Map, Obj,
};
use moor_values::{v_flyweight, Associative};
use moor_values::{AsByteBuffer, Sequence};
use moor_values::{Symbol, Variant, SYSTEM_OBJECT};
use std::io::{BufReader, BufWriter};
use tracing::error;
use xml::reader::XmlEvent;
use xml::EmitterConfig;

fn bf_typeof(bf_args: &mut BfCallState<'_>) -> Result<BfRet, BfErr> {
let arg = &bf_args.args[0];
Expand Down Expand Up @@ -208,6 +214,269 @@ fn bf_object_bytes(bf_args: &mut BfCallState<'_>) -> Result<BfRet, BfErr> {
}
bf_declare!(object_bytes, bf_object_bytes);

/// Uses xml-rs to parse a string into a series of flyweights
/// representing the XML structure.
/// Delegates for the flyweights are resolved as follows:
/// a) For each tag, there should be an object: $tag_<tag> for that tag name,
/// b) Alternatively, if a map is provided as the second argument, the tag name is looked up
/// in the map, and the object is resolved from that.
fn bf_xml_parse(bf_args: &mut BfCallState<'_>) -> Result<BfRet, BfErr> {
if !bf_args.config.flyweight_type {
return Err(BfErr::Code(E_PERM));
}

if bf_args.args.len() != 1 && bf_args.args.len() != 2 {
return Err(BfErr::Code(E_ARGS));
}

let Variant::Str(xml) = bf_args.args[0].variant() else {
return Err(BfErr::Code(E_INVARG));
};

let map = if bf_args.args.len() == 2 {
let Variant::Map(m) = bf_args.args[1].variant() else {
return Err(BfErr::Code(E_INVARG));
};
Some(m)
} else {
None
};

let reader = BufReader::new(xml.as_string().as_bytes());
let parser = xml::EventReader::new(reader);
let mut output_tree = Vec::new();

// Structure is (tag, Vec<(attribute, value)>, Vec<...>)
let mut current_node = Vec::new();
for e in parser {
match e {
Ok(XmlEvent::StartElement {
name, attributes, ..
}) => {
let tag = name.local_name;
let obj = match map {
Some(m) => {
let key = tag.to_string();
let key = v_str(key.as_str());
let Ok(obj) = m.index(&key) else {
return Err(BfErr::Code(E_INVARG));
};
let Variant::Obj(o) = obj.variant() else {
return Err(BfErr::Code(E_TYPE));
};
o.clone()
}
None => {
let key = format!("tag_{}", tag);
let key = Symbol::mk(&key);

// resolve via system object
let prop_value = bf_args
.world_state
.retrieve_property(&bf_args.caller_perms(), &SYSTEM_OBJECT, key)
.map_err(world_state_bf_err)?;

let Variant::Obj(o) = prop_value.variant() else {
return Err(BfErr::Code(E_TYPE));
};

o.clone()
}
};

let attributes: Vec<_> = attributes
.iter()
.map(|a| {
let key = format!("{}", a.name);
let key = Symbol::mk(&key);
let value = v_str(a.value.as_str());
(key, value)
})
.collect();
let entry = (obj, attributes, Vec::new());
current_node.push(entry);
}
Ok(XmlEvent::EndElement { .. }) => {
let (obj, attributes, children) =
current_node.pop().ok_or(BfErr::Code(E_INVARG))?;
// Turn this into a flyweight and push into the children of the parent
let children = List::mk_list(&children);
let fl = v_flyweight(obj.clone(), &attributes, children, None);
if let Some(parent) = current_node.last_mut() {
parent.2.push(fl);
} else {
output_tree.push(fl);
}
}
Ok(XmlEvent::Characters(str)) => {
// Character data between tags is stored a String in the parent's content
if let Some(parent) = current_node.last_mut() {
parent.2.push(v_str(str.as_str()));
}
}
Ok(_) => {
// Ignore other events (CDATA, etc)
}
Err(_) => {
return Err(BfErr::Code(E_INVARG));
}
}
}

// Return output tree as a v_list.
let result = v_list(&output_tree);
Ok(Ret(result))
}
bf_declare!(xml_parse, bf_xml_parse);

enum Tag {
StartElement(String, Vec<(String, String)>),
EndElement(()),
Text(String),
}

fn flyweight_to_xml_tag(
fl: &Flyweight,
map: Option<&Map>,
perms: &Obj,
ws: &mut dyn WorldState,
) -> Result<Vec<Tag>, BfErr> {
let mut tags = Vec::new();

// tag name can be derived by either looking in the optionally provided map, or by
// seeking a `tag` property on the delegate object.
let tag_name = match map {
Some(m) => {
let key = v_obj(fl.delegate().clone());
let Ok(tag) = m.index(&key) else {
return Err(BfErr::Code(E_INVARG));
};
let Variant::Str(s) = tag.variant() else {
return Err(BfErr::Code(E_INVARG));
};
s.as_string().to_string()
}
None => {
let key = Symbol::mk("tag");
let tag = ws
.retrieve_property(perms, fl.delegate(), key)
.map_err(world_state_bf_err)?;

let Variant::Str(s) = tag.variant() else {
return Err(BfErr::Code(E_TYPE));
};

s.as_string().to_string()
}
};

let mut attributes = Vec::with_capacity(fl.slots().len());
for (key, value) in fl.slots() {
let key = key.to_string();
let value = match value.variant() {
Variant::Str(s) => s.as_string().to_string(),
Variant::Int(i) => i.to_string(),
Variant::Float(f) => f.to_string(),
_ => {
error!("Invalid attribute type");
return Err(BfErr::Code(E_INVARG));
}
};
attributes.push((key, value));
}

tags.push(Tag::StartElement(tag_name, attributes));

for child in fl.contents().iter() {
match child.variant() {
Variant::Flyweight(fl) => {
let child_tags = flyweight_to_xml_tag(fl, map, perms, ws)?;
tags.extend(child_tags);
}
Variant::Str(s) => {
tags.push(Tag::Text(s.as_string().to_string()));
}
_ => {
error!("Invalid child type");
return Err(BfErr::Code(E_INVARG));
}
}
}
tags.push(Tag::EndElement(()));

Ok(tags)
}

/// to_xml(root_flyweight, [tag map]) -> string
///
/// Turn a tree of flyweights into an XML document.
/// Valid flyweights must have:
/// - delegate object with a tag property OR there's a second map argument that maps object ids to tags
/// - attributes property that is a map of strings to string or numbers (which we call tostr on)
/// - any children must be either other valid flyweights, or string values.
fn bf_to_xml(bf_args: &mut BfCallState<'_>) -> Result<BfRet, BfErr> {
if !bf_args.config.flyweight_type {
return Err(BfErr::Code(E_PERM));
}

if bf_args.args.len() != 1 && bf_args.args.len() != 2 {
return Err(BfErr::Code(E_ARGS));
}

let root = &bf_args.args[0];
let map = if bf_args.args.len() == 2 {
let Variant::Map(m) = bf_args.args[1].variant() else {
return Err(BfErr::Code(E_INVARG));
};
Some(m)
} else {
None
};

let mut output = Vec::new();
{
let mut output_buf = BufWriter::new(&mut output);
let mut writer = EmitterConfig::new()
.perform_indent(true)
.create_writer(&mut output_buf);

// Root needs to be a flyweight
let Variant::Flyweight(fl) = root.variant() else {
error!("Root must be a flyweight");
return Err(BfErr::Code(E_INVARG));
};

let root_tag = flyweight_to_xml_tag(fl, map, &bf_args.caller_perms(), bf_args.world_state)?;
for tag in root_tag {
match tag {
Tag::StartElement(name, attributes) => {
let element_builder = xml::writer::XmlEvent::start_element(name.as_str());
let element_builder =
attributes.iter().fold(element_builder, |builder, (k, v)| {
builder.attr(k.as_str(), v.as_str())
});
writer
.write(element_builder)
.map_err(|_| BfErr::Code(E_INVIND))?;
}
Tag::Text(text) => {
writer
.write(xml::writer::XmlEvent::characters(text.as_str()))
.map_err(|_| BfErr::Code(E_INVIND))?;
}
Tag::EndElement(_) => {
writer
.write(xml::writer::XmlEvent::end_element())
.map_err(|_| BfErr::Code(E_INVIND))?;
}
}
}
}
let output_as_string = String::from_utf8(output).map_err(|_| BfErr::Code(E_INVIND))?;
Ok(Ret(v_string(output_as_string)))
}
bf_declare!(to_xml, bf_to_xml);

pub(crate) fn register_bf_values(builtins: &mut [Box<dyn BuiltinFunction>]) {
builtins[offset_for_builtin("typeof")] = Box::new(BfTypeof {});
builtins[offset_for_builtin("tostr")] = Box::new(BfTostr {});
Expand All @@ -221,4 +490,6 @@ pub(crate) fn register_bf_values(builtins: &mut [Box<dyn BuiltinFunction>]) {
builtins[offset_for_builtin("object_bytes")] = Box::new(BfObjectBytes {});
builtins[offset_for_builtin("value_hash")] = Box::new(BfValueHash {});
builtins[offset_for_builtin("length")] = Box::new(BfLength {});
builtins[offset_for_builtin("xml_parse")] = Box::new(BfXmlParse {});
builtins[offset_for_builtin("to_xml")] = Box::new(BfToXml {});
}
2 changes: 1 addition & 1 deletion crates/kernel/src/vm/moo_frame.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ pub(crate) struct MooStackFrame {
pub(crate) program: Program,
/// The program counter.
pub(crate) pc: usize,
/// The common of the variables currently in scope, by their offset.
/// The values of the variables currently in scope, by their offset.
pub(crate) environment: BitArray<Var, 256, Bitset16<16>>,
/// The current used scope size, used when entering and exiting local scopes.
pub(crate) environment_width: usize,
Expand Down

0 comments on commit c9591f1

Please sign in to comment.