From 16e5bc2ce61e8475af5c15bf68313dba96ced178 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Fri, 5 Jan 2024 15:55:19 -0800 Subject: [PATCH] Properly unescaep PN_LOCAL_ESC. --- lib/rdf/turtle/freebase_reader.rb | 8 +++++++ lib/rdf/turtle/reader.rb | 9 +++++++- spec/freebase_spec.rb | 36 +++++++++++++++++++++++++++++++ spec/reader_spec.rb | 36 +++++++++++++++++++++++++++++++ spec/terminals_spec.rb | 21 +++++++----------- 5 files changed, 96 insertions(+), 14 deletions(-) diff --git a/lib/rdf/turtle/freebase_reader.rb b/lib/rdf/turtle/freebase_reader.rb index ce353be..4b81e9b 100644 --- a/lib/rdf/turtle/freebase_reader.rb +++ b/lib/rdf/turtle/freebase_reader.rb @@ -64,6 +64,14 @@ def read_pname(**options) end pfx_iri = prefix(ns) raise RDF::ReaderError.new("ERROR [line #{lineno}] prefix #{ns.inspect} is not defined", lineno: lineno) unless pfx_iri + + # Unescape PN_LOCAL_ESC + suffix = suffix.gsub(PN_LOCAL_ESC) {|esc| esc[1]} if + suffix.match?(PN_LOCAL_ESC) + + # Remove any redundant leading hash from suffix + suffix = suffix.sub(/^\#/, "") if pfx_iri.to_s.index("#") + uri = RDF::URI(pfx_iri + suffix) uri.validate! if validate? uri diff --git a/lib/rdf/turtle/reader.rb b/lib/rdf/turtle/reader.rb index dd801cb..7278a8a 100644 --- a/lib/rdf/turtle/reader.rb +++ b/lib/rdf/turtle/reader.rb @@ -227,7 +227,14 @@ def pname(prefix, suffix) error("undefined prefix", production: :pname, token: prefix) '' end - suffix = suffix.to_s.sub(/^\#/, "") if base.index("#") + + # Unescape PN_LOCAL_ESC + suffix = suffix.gsub(PN_LOCAL_ESC) {|esc| esc[1]} if + suffix.match?(PN_LOCAL_ESC) + + # Remove any redundant leading hash from suffix + suffix = suffix.sub(/^\#/, "") if base.index("#") + debug("pname", depth: options[:depth]) {"base: '#{base}', suffix: '#{suffix}'"} process_iri(base + suffix.to_s) end diff --git a/spec/freebase_spec.rb b/spec/freebase_spec.rb index 525bd1c..bd419c8 100644 --- a/spec/freebase_spec.rb +++ b/spec/freebase_spec.rb @@ -83,6 +83,42 @@ g = RDF::Graph.new << subject expect(g).to be_equivalent_graph("@prefix foo: ." + input, logger: @logger) end + + context "PN_LOCAL" do + { + "p:_a": "", # PN_CHARS_U + "p::": "", # PN_LOCAL + "p:0": "", # PN_LOCAL + "p:%B7": "", # PN_LOCAL + "p:a.b": "", # PN_LOCAL + + "p:\\_underscore": "", # PN_LOCAL_ESC + "p:\\~tilda": "", # PN_LOCAL_ESC + "p:\\.dot": "", # PN_LOCAL_ESC + "p:\\-dash": "", # PN_LOCAL_ESC + "p:\\!exclamation": "", # PN_LOCAL_ESC + "p:\\$dollar": "", # PN_LOCAL_ESC + "p:\\&er": "", # PN_LOCAL_ESC + "p:\\'squote": "", # PN_LOCAL_ESC + "p:\\(paren\\)": "", # PN_LOCAL_ESC + "p:\\*star": "", # PN_LOCAL_ESC + "p:\\+plus": "", # PN_LOCAL_ESC + "p:\\,comma": "", # PN_LOCAL_ESC + "p:\\;semi": "", # PN_LOCAL_ESC + "p:\\=equal": "", # PN_LOCAL_ESC + "p:\\/slash": "", # PN_LOCAL_ESC + "p:\\?question": "", # PN_LOCAL_ESC + "p:\\#numbersign": "", # PN_LOCAL_ESC + "p:\\@ampersand": "", # PN_LOCAL_ESC + "p:\\%percent": "", # PN_LOCAL_ESC + }.each do |pn, iri| + it pn do + ttl = %(@prefix p: .\n p:s p:p #{pn} .) + nt = %( #{iri} .) + expect(parse(ttl, validate: false)).to be_equivalent_graph(nt, logger: @logger) + end + end + end end describe "with simple sample data" do diff --git a/spec/reader_spec.rb b/spec/reader_spec.rb index 7df1921..2b0561a 100644 --- a/spec/reader_spec.rb +++ b/spec/reader_spec.rb @@ -489,6 +489,42 @@ nil => "http://test/"}) end + context "PN_LOCAL" do + { + "p:_a": "", # PN_CHARS_U + "p::": "", # PN_LOCAL + "p:0": "", # PN_LOCAL + "p:%B7": "", # PN_LOCAL + "p:a.b": "", # PN_LOCAL + + "p:\\_underscore": "", # PN_LOCAL_ESC + "p:\\~tilda": "", # PN_LOCAL_ESC + "p:\\.dot": "", # PN_LOCAL_ESC + "p:\\-dash": "", # PN_LOCAL_ESC + "p:\\!exclamation": "", # PN_LOCAL_ESC + "p:\\$dollar": "", # PN_LOCAL_ESC + "p:\\&er": "", # PN_LOCAL_ESC + "p:\\'squote": "", # PN_LOCAL_ESC + "p:\\(paren\\)": "", # PN_LOCAL_ESC + "p:\\*star": "", # PN_LOCAL_ESC + "p:\\+plus": "", # PN_LOCAL_ESC + "p:\\,comma": "", # PN_LOCAL_ESC + "p:\\;semi": "", # PN_LOCAL_ESC + "p:\\=equal": "", # PN_LOCAL_ESC + "p:\\/slash": "", # PN_LOCAL_ESC + "p:\\?question": "", # PN_LOCAL_ESC + "p:\\#numbersign": "", # PN_LOCAL_ESC + "p:\\@ampersand": "", # PN_LOCAL_ESC + "p:\\%percent": "", # PN_LOCAL_ESC + }.each do |pn, iri| + it pn do + ttl = %(PREFIX p: \n p:s p:p #{pn} .) + nt = %( #{iri} .) + expect(parse(ttl, validate: false)).to be_equivalent_graph(nt, logger: @logger) + end + end + end + { "@prefix foo: ." => [true, true], "@PrEfIx foo: ." => [false, true], diff --git a/spec/terminals_spec.rb b/spec/terminals_spec.rb index 3cc08c4..65c9d17 100644 --- a/spec/terminals_spec.rb +++ b/spec/terminals_spec.rb @@ -44,21 +44,16 @@ ! # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : / < = ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z ~ - ab\\u00E9xy ab\xC3\xA9xy> - \\u03B1:a \xCE\xB1:a - a\\u003Ab a\x3Ab - \\U00010000 \xF0\x90\x80\x80 - \\U000EFFFF \xF3\xAF\xBF\xBF + ab\\u00E9xy + \\u03B1:a + a\\u003Ab + \\U00010000 + \\U000EFFFF ).each do |string| it "matches " do - begin - string = "" - string.force_encoding(Encoding::UTF_8) - expect(string).to match(RDF::Turtle::Terminals::IRIREF) - rescue RSpec::Expectations::ExpectationNotMetError - pending "Escapes in IRIs" - fail - end + string = "" + string.force_encoding(Encoding::UTF_8) + expect(string).to match(RDF::Turtle::Terminals::IRIREF) end end end