-
Notifications
You must be signed in to change notification settings - Fork 0
/
datascrap.rb
38 lines (31 loc) · 837 Bytes
/
datascrap.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
require 'rubygems'
require 'nokogiri'
require 'open-uri'
require 'pry'
PAGE_URL= "http://annuaire-des-mairies.com/val-d-oise.html"
@root = "http://annuaire-des-mairies.com"
def get_the_email_of_a_townhal_from_its_webpage(url)
doc = Nokogiri::HTML(open(url))
doc.xpath('html/body/div/main/section[2]/div/table/tbody/tr[4]/td[2]').text
end
def get_all_the_urls_of_val_doise_townhalls
info = []
arr = []
doc = Nokogiri::HTML(open(PAGE_URL))
doc.xpath('//td/p/a/@href').each do |node|
h = {}
temp = node.text.sub(/^[.]/, @root)
h["email"] = get_the_email_of_a_townhal_from_its_webpage(temp)
info << h
end
doc.xpath('//td/p/a').each do |node|
arr << node.text
end
n = 0
info.each do |town|
town["name"] = arr[n]
n += 1
end
p info.reverse
end
get_all_the_urls_of_val_doise_townhalls