forked from mislav/rfc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodels.rb
248 lines (204 loc) · 5.95 KB
/
models.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
require_relative 'rfc'
require 'active_support/core_ext/date_time/conversions'
# The main model which represents an RFC. It delegates persistance to RfcEntry
# and XML fetching to RfcFetcher.
class RfcDocument
extend Forwardable
attr_reader :entry
def_delegators :entry, :title, :abstract, :body, :publish_date
def_delegator :entry, :document_id, :id
def_delegator :entry, :obsoleted, :obsoleted?
def_delegator :entry, :updated_at, :last_modified
class << self
alias_method :wrap, :new
def search query, options = {}
RfcEntry.search_raw(query, options).map {|e| wrap e }
end
def fetch doc_id
entry = RfcEntry.get doc_id
entry ? wrap(entry) : yield
end
def resolve_url url
doc_id = File.basename(url).sub(/\.(html|xml|txt)$/, '')
if doc_id.start_with? 'draft-'
doc_id.sub!(/-\d+$/, '') # strip draft version
fetch(doc_id) {
doc = wrap(RfcEntry.new)
doc.initialize_draft(doc_id) { yield }
}
else
fetch(doc_id) { yield }
end
end
end
def initialize entry
@entry = entry
end
def initialize_draft doc_id
entry.document_id = doc_id
saved = fetch_and_render do |xml_doc, fetcher|
entry.title = fetcher.title
entry.keywords = xml_doc.keywords
entry.save
end
saved ? self : yield
end
def external_url
tracker_id = id =~ /^RFC(\d+)$/ ? ('rfc%d' % $1.to_i) : id.downcase
"http://datatracker.ietf.org/doc/#{tracker_id}/"
end
def pretty?
!entry.body.nil?
end
def make_pretty
if needs_fetch?
fetch_and_render
entry.save
end
end
def needs_fetch?
entry.fetcher_version.nil? or
entry.fetcher_version < RfcFetcher.version or
needs_rerender?
end
def needs_rerender?
entry.body and entry.updated_at.to_time < RFC.last_modified
end
def fetch_and_render xml_url = entry.xml_source
fetcher = RfcFetcher.new self.id, xml_url
entry.xml_source = fetcher.xml_url
entry.fetcher_version = fetcher.version
if fetcher.fetchable?
fetcher.fetch
doc = File.open(fetcher.path) {|file| RFC::Document.new file }
doc.href_resolver = href_resolver
entry.body = RFC::TemplateHelpers.render doc
yield doc, fetcher if block_given?
end
end
# Bypass discovery process by explicitly seting a known XML location
def set_xml_source xml_url
fetch_and_render xml_url
entry.save
end
# used in the RFC HTML generation phase
def href_resolver
->(xref) { "/#{xref}" if xref =~ /^RFC\d+$/ }
end
end
require 'dm-migrations'
require 'dm-timestamps'
require_relative 'searchable'
# A lighweight database model that stores metadata and rendered HTML for an RFC.
class RfcEntry
include DataMapper::Resource
extend Searchable
property :document_id, String, length: 70, key: true
property :title, String, length: 255
property :abstract, Text, length: 2200
property :keywords, Text, length: 500
property :body, Text
property :obsoleted, Boolean, default: false
property :publish_date, Date
property :popularity, Integer
property :xml_source, String, length: 100
property :fetcher_version, Integer
timestamps :updated_at
class << self
def get doc_id
super normalize_document_id(doc_id)
end
private
def normalize_document_id doc_id
if doc_id.to_s =~ /^ rfc -? (\d+) $/ix
"RFC%04d" % $1.to_i
else
doc_id.to_s
end
end
end
def keywords=(value)
if Array === value
super(value.empty?? nil : value.join(', '))
else
super
end
end
searchable title: 'A', keywords: 'B',
abstract: 'C', body: 'D'
end
require 'fileutils'
require 'net/http'
require 'nokogiri'
# Responsible for discovering and fetching of the XML source file for a
# specific publication.
class RfcFetcher
XML_URL = 'http://xml.resource.org/public/rfc/xml/%s.xml'
DRAFTS_URL = 'http://www.ietf.org/id/'
TRACKER_URL = 'http://datatracker.ietf.org/doc/%s/'
class << self
attr_accessor :download_dir
def version() 1 end
end
self.download_dir = File.join(ENV['TMPDIR'] || '/tmp', 'rfc-xml')
attr_reader :title, :path
def initialize doc_id, known_url = nil
@doc_id = doc_id.to_s.downcase
@xml_url = known_url unless known_url.nil?
end
def version() self.class.version end
def xml_url
return @xml_url if defined? @xml_url
@xml_url = find_xml
end
def fetchable?
!xml_url.nil?
end
def fetch
@path = File.join self.class.download_dir, @doc_id + '.xml'
unless File.exist? @path
FileUtils.mkdir_p File.dirname(@path)
system 'curl', '-L', '--silent', xml_url.to_s, '-o', @path
end
end
def request url
url = URI(url) unless url.respond_to? :host
res = Net::HTTP.start(url.host, url.port) {|http| yield http, url.request_uri }
res.error! if Net::HTTPServerError === res
res
end
def http_exist? url
Net::HTTPOK === request(url) {|http, path| http.head path }
end
def find_xml
xml_url = XML_URL % @doc_id
if @doc_id.start_with? 'rfc' and http_exist? xml_url
xml_url
else
find_tracker_xml
end
end
def get_html url
res = request(url) {|http, path| http.get path }
yield Nokogiri(res.body) if Net::HTTPOK === res
end
def find_tracker_xml
get_html TRACKER_URL % @doc_id do |html|
@title = html.at('//h1/text()').text.strip
if href = html.at('//table[@id="metatable"]//a[text()="xml"]/@href')
href.text
elsif html.search('#metatable td:nth-child(2)').text =~ /^Was (draft-[\w-]+)/
find_draft_xml $1
end
end
end
def find_draft_xml draft_name
drafts_url = URI(DRAFTS_URL)
get_html drafts_url do |html|
html.search("a[href*=#{draft_name}]").
map {|link| (drafts_url + link['href']).to_s }.
select {|href| File.basename(href, '.xml') =~ /^#{draft_name}(-\d+)?$/ }.
sort.last
end
end
end