diff --git a/.travis.yml b/.travis.yml index c09e80d..8c91a74 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,7 @@ language: rust rust: - stable + - beta - nightly matrix: allow_failures: diff --git a/Cargo.toml b/Cargo.toml index a237ae9..d6d3fdd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,18 +6,22 @@ documentation = "https://antage.github.io/opencorpora/opencorpora/index.html" readme = "README.md" license = "MIT" keywords = ["opencorpora", "dictionary", "russian"] -version = "0.1.2" +categories = ["parser-implementations"] +version = "0.1.3" authors = ["Anton Ageev "] -exclude = ["dict/*"] +exclude = ["target", "dict/*", "Makefile"] include = ["**/*.rs", "Cargo.toml", "LICENSE"] +[badges] +travis-ci = { repository = "https://github.com/antage/opencorpora" } + [dependencies] -quick-xml = "^0.1" -quick-error = "^1.0" +quick-xml = "^0.6.2" +quick-error = "^1.1.0" [dev-dependencies] -hyper = "^0.8" -bzip2 = "^0.3" +hyper = "^0.10.5" +bzip2 = "^0.3.1" [profile.test] opt-level = 3 diff --git a/LICENSE b/LICENSE index 46f5d74..eef8456 100644 --- a/LICENSE +++ b/LICENSE @@ -1,5 +1,5 @@ MIT License -Copyright (c) 2016 Anton Ageev +Copyright (c) 2016-2017 Anton Ageev Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: diff --git a/src/lib.rs b/src/lib.rs index 68779c4..d714996 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -54,14 +54,13 @@ quick_error! { #[derive(Debug)] pub enum DictError { /// Ошибка разбора XML. - XmlError { err: ::quick_xml::error::Error, pos: usize } { - from(e: (::quick_xml::error::Error, usize)) -> { - err: e.0, - pos: e.1 + XmlError { err: ::quick_xml::errors::Error } { + from(e: ::quick_xml::errors::Error) -> { + err: e } cause(err) description(err.description()) - display("XML error at {} position: {}", pos, err) + display("XML error: {}", err) } /// Текст не соответствует кодировке UTF-8. @@ -111,20 +110,22 @@ enum ParsingState { } fn string_from_bytes(b: &[u8]) -> Result { - let s = try!(std::str::from_utf8(b)); + let s = std::str::from_utf8(b)?; Ok(s.to_owned()) } fn integer_from_bytes(b: &[u8]) -> Result where N: std::str::FromStr { - let s = try!(std::str::from_utf8(b)); - Ok(try!(s.parse())) + let s = std::str::from_utf8(b)?; + Ok(s.parse()?) } -fn get_restriction_scope(el: &quick_xml::Element) -> Result { +fn get_restriction_scope(el: &quick_xml::events::BytesStart) -> Result { + use quick_xml::events::attributes::Attribute; + for attr in el.attributes() { - let (name, value) = try!(attr); + let Attribute { key: name, value } = attr?; match name { b"type" => { match value { @@ -135,7 +136,7 @@ fn get_restriction_scope(el: &quick_xml::Element) -> Result { - let s = try!(std::str::from_utf8(value)); + let s = std::str::from_utf8(value)?; let errmsg = format!("invalid restriction scope: '{}'", s); return Err(DictError::ParsingError(errmsg)); }, @@ -148,7 +149,7 @@ fn get_restriction_scope(el: &quick_xml::Element) -> Result>, name: &[u8]) -> Result, DictError> { - let s = try!(string_from_bytes(name)); + let s = string_from_bytes(name)?; match map.get(&s) { Some(grm) => { Ok(grm.clone()) @@ -160,7 +161,7 @@ fn get_grammeme(map: &std::collections::HashMap>, } fn get_lemma(map: &std::collections::HashMap>, id_str: &[u8]) -> Result, DictError> { - let id = try!(integer_from_bytes(id_str)); + let id = integer_from_bytes(id_str)?; match map.get(&id) { Some(lmt) => { Ok(lmt.clone()) @@ -179,7 +180,9 @@ impl Dict { use std::str; use std::collections::HashMap; - use quick_xml::{XmlReader, Event}; + use quick_xml::reader::Reader; + use quick_xml::events::Event; + use quick_xml::events::attributes::Attribute; let mut state = ParsingState::Start; @@ -194,22 +197,84 @@ impl Dict { let mut grammeme_by_name = HashMap::>::new(); let mut lemma_by_id = HashMap::>::new(); - let reader = XmlReader::from_reader(BufReader::new(r)); + let mut reader = Reader::from_reader(BufReader::new(r)); + let mut buf = Vec::new(); - for ev in reader { - match ev { + loop { + match reader.read_event(&mut buf) { + Ok(Event::Empty(ref el)) => { + match el.name() { + b"g" if state == ParsingState::LemmaL => { + for attr in el.attributes() { + let Attribute { key: name, value } = attr?; + match name { + b"v" => { + let grammeme = get_grammeme(&grammeme_by_name, value)?; + current_lemma.grammemes.push(grammeme); + }, + _ => (), + } + } + }, + b"g" if state == ParsingState::LemmaF => { + for attr in el.attributes() { + let Attribute { key: name, value } = attr?; + match name { + b"v" => { + let grammeme = get_grammeme(&grammeme_by_name, value)?; + current_form.grammemes.push(grammeme); + }, + _ => (), + } + } + }, + b"link" if state == ParsingState::Links => { + let mut current_link = Link::default(); + for attr in el.attributes() { + let Attribute { key: name, value } = attr?; + match name { + b"id" => { + current_link.id = integer_from_bytes(value)?; + }, + b"from" => { + let lemma = get_lemma(&lemma_by_id, value)?; + current_link.from = lemma; + }, + b"to" => { + let lemma = get_lemma(&lemma_by_id, value)?; + current_link.to = lemma; + }, + b"type" => { + let kind_id: usize = integer_from_bytes(value)?; + for lk in &dict.link_kinds { + if kind_id == lk.id { + current_link.kind = lk.clone(); + } + } + }, + _ => (), + } + } + dict.links.push(current_link.clone()); + }, + ref name => { + let s = str::from_utf8(name)?; + return Err(DictError::ParsingError(format!("unexpected single tag: '{}'", s))); + }, + } + }, Ok(Event::Start(ref el)) => { match el.name() { b"dictionary" if state == ParsingState::Start => { state = ParsingState::Dictionary; for attr in el.attributes() { - let (name, value) = try!(attr); + let Attribute { key: name, value } = attr?; match name { b"version" => { - dict.version = try!(string_from_bytes(value)); + dict.version = string_from_bytes(value)?; }, b"revision" => { - dict.revision = try!(integer_from_bytes(value)) + dict.revision = integer_from_bytes(value)?; }, _ => (), } @@ -224,13 +289,13 @@ impl Dict { state = ParsingState::Grammeme; current_grammeme = Grammeme::default(); for attr in el.attributes() { - let (name, value) = try!(attr); + let Attribute { key: name, value } = attr?; match name { b"parent" => { if value.is_empty() { current_grammeme.parent = None; } else { - current_grammeme.parent = Some(try!(string_from_bytes(value))); + current_grammeme.parent = Some(string_from_bytes(value)?); } }, _ => (), @@ -254,7 +319,7 @@ impl Dict { state = ParsingState::Restriction; current_restriction = Restriction::default(); for attr in el.attributes() { - let (name, value) = try!(attr); + let Attribute { key: name, value } = attr?; match name { b"type" => { match value { @@ -268,14 +333,14 @@ impl Dict { current_restriction.kind = RestrictionKind::Forbidden; }, _ => { - let s = try!(str::from_utf8(value)); + let s = str::from_utf8(value)?; let errmsg = format!("invalid restriction kind: '{}'", s); return Err(DictError::ParsingError(errmsg)); } } }, b"auto" => { - current_restriction.auto = try!(integer_from_bytes(value)); + current_restriction.auto = integer_from_bytes(value)?; }, _ => (), } @@ -283,11 +348,11 @@ impl Dict { }, b"left" if state == ParsingState::Restriction => { state = ParsingState::RestrictionLeft; - current_restriction.left_scope = try!(get_restriction_scope(el)); + current_restriction.left_scope = get_restriction_scope(el)?; }, b"right" if state == ParsingState::Restriction => { state = ParsingState::RestrictionRight; - current_restriction.right_scope = try!(get_restriction_scope(el)); + current_restriction.right_scope = get_restriction_scope(el)?; }, b"lemmata" if state == ParsingState::Dictionary => { state = ParsingState::Lemmata; @@ -298,13 +363,13 @@ impl Dict { current_lemma = Lemma::default(); current_lemma.forms.clear(); for attr in el.attributes() { - let (name, value) = try!(attr); + let Attribute { key: name, value } = attr?; match name { b"id" => { - current_lemma.id = try!(integer_from_bytes(value)) + current_lemma.id = integer_from_bytes(value)?; }, b"rev" => { - current_lemma.revision = try!(integer_from_bytes(value)) + current_lemma.revision = integer_from_bytes(value)?; }, _ => (), } @@ -314,10 +379,10 @@ impl Dict { state = ParsingState::LemmaL; current_lemma.grammemes.clear(); for attr in el.attributes() { - let (name, value) = try!(attr); + let Attribute { key: name, value } = attr?; match name { b"t" => { - current_lemma.word = try!(string_from_bytes(value)); + current_lemma.word = string_from_bytes(value)?; }, _ => (), } @@ -328,34 +393,10 @@ impl Dict { current_form = Form::default(); current_form.grammemes.clear(); for attr in el.attributes() { - let (name, value) = try!(attr); + let Attribute { key: name, value } = attr?; match name { b"t" => { - current_form.word = try!(string_from_bytes(value)); - }, - _ => (), - } - } - }, - b"g" if state == ParsingState::LemmaL => { - for attr in el.attributes() { - let (name, value) = try!(attr); - match name { - b"v" => { - let grammeme = try!(get_grammeme(&grammeme_by_name, value)); - current_lemma.grammemes.push(grammeme); - }, - _ => (), - } - } - }, - b"g" if state == ParsingState::LemmaF => { - for attr in el.attributes() { - let (name, value) = try!(attr); - match name { - b"v" => { - let grammeme = try!(get_grammeme(&grammeme_by_name, value)); - current_form.grammemes.push(grammeme); + current_form.word = string_from_bytes(value)?; }, _ => (), } @@ -369,10 +410,10 @@ impl Dict { state = ParsingState::LinkType; current_link_kind = LinkKind::default(); for attr in el.attributes() { - let (name, value) = try!(attr); + let Attribute { key: name, value } = attr?; match name { b"id" => { - current_link_kind.id = try!(integer_from_bytes(value)); + current_link_kind.id = integer_from_bytes(value)?; }, _ => (), } @@ -382,37 +423,8 @@ impl Dict { state = ParsingState::Links; dict.links.clear(); }, - b"link" if state == ParsingState::Links => { - let mut current_link = Link::default(); - for attr in el.attributes() { - let (name, value) = try!(attr); - match name { - b"id" => { - current_link.id = try!(integer_from_bytes(value)); - }, - b"from" => { - let lemma = try!(get_lemma(&lemma_by_id, value)); - current_link.from = lemma; - }, - b"to" => { - let lemma = try!(get_lemma(&lemma_by_id, value)); - current_link.to = lemma; - }, - b"type" => { - let kind_id: usize = try!(integer_from_bytes(value)); - for lk in &dict.link_kinds { - if kind_id == lk.id { - current_link.kind = lk.clone(); - } - } - }, - _ => (), - } - } - dict.links.push(current_link.clone()); - }, ref name => { - let s = try!(str::from_utf8(name)); + let s = str::from_utf8(name)?; return Err(DictError::ParsingError(format!("unexpected opening tag: '{}'", s))); }, } @@ -420,30 +432,30 @@ impl Dict { Ok(Event::Text(ref el)) => { match state { ParsingState::GrammemeName => { - current_grammeme.name = try!(string_from_bytes(el.content())); + current_grammeme.name = string_from_bytes(&el.unescaped()?)?; }, ParsingState::GrammemeAlias => { - current_grammeme.alias = try!(string_from_bytes(el.content())); + current_grammeme.alias = string_from_bytes(&el.unescaped()?)?; }, ParsingState::GrammemeDescription => { - current_grammeme.description = try!(string_from_bytes(el.content())); + current_grammeme.description = string_from_bytes(&el.unescaped()?)?; }, ParsingState::RestrictionLeft => { - if el.content().len() > 0 { - current_restriction.left_grammeme = Some(try!(get_grammeme(&grammeme_by_name, el.content()))); + if el.len() > 0 { + current_restriction.left_grammeme = Some(get_grammeme(&grammeme_by_name, &el.unescaped()?)?); } else { current_restriction.left_grammeme = None; } }, ParsingState::RestrictionRight => { - if el.content().len() > 0 { - current_restriction.right_grammeme = Some(try!(get_grammeme(&grammeme_by_name, el.content()))); + if el.len() > 0 { + current_restriction.right_grammeme = Some(get_grammeme(&grammeme_by_name, &el.unescaped()?)?); } else { current_restriction.right_grammeme = None; } }, ParsingState::LinkType => { - current_link_kind.name = try!(string_from_bytes(el.content())); + current_link_kind.name = string_from_bytes(&el.unescaped()?)?; } _ => (), } @@ -500,7 +512,6 @@ impl Dict { state = ParsingState::Lemma; current_lemma.forms.push(current_form.clone()); }, - b"g" if state == ParsingState::LemmaL || state == ParsingState::LemmaF => {}, b"link_types" if state == ParsingState::LinkTypes => { state = ParsingState::Dictionary; }, @@ -511,17 +522,18 @@ impl Dict { b"links" if state == ParsingState::Links => { state = ParsingState::Dictionary; }, - b"link" if state == ParsingState::Links => {}, ref name => { - let s = try!(str::from_utf8(name)); + let s = str::from_utf8(name)?; return Err(DictError::ParsingError(format!("unexpected closing tag: '{}'", s))); }, } }, - Err((e, pos)) => return Result::Err(DictError::from((e, pos))), - _ => (), + Err(e) => return Result::Err(DictError::from(e)), + Ok(Event::Eof) => break, + Ok(Event::Decl(_)) => (), + e => panic!("!!! {:?}", e), } - }; + } if state != ParsingState::End { Err(DictError::ParsingError(format!("invalid state after parsing: {:?}", state))) diff --git a/tests/read_from_xml.rs b/tests/read_from_xml.rs index dd221cb..eb875b6 100644 --- a/tests/read_from_xml.rs +++ b/tests/read_from_xml.rs @@ -19,5 +19,6 @@ fn test_read_from_xml() { .unwrap(); let decompressed = BzDecoder::new(res); - assert!(Dict::read_from_xml(decompressed).is_ok()); + let dict = Dict::read_from_xml(decompressed); + assert!(dict.is_ok()); }