-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcat.js
82 lines (66 loc) · 1.92 KB
/
cat.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
const N3 = require('n3');
const fs = require('fs');
const parser = new N3.Parser();
const outputRaw = true;
//https://dumps.wikimedia.org/other/categoriesrdf/latest/
const rdfStream = fs.createReadStream('/home/me/d/simplewiki-20200516-categories.2.ttl');
const d = {} ; //new Map();
function id(x) {
return x.replace('https://simple.wikipedia.org/wiki/Category:', '');
}
function X(id) {
var x = d[id]; //.get(id);
if (x === undefined) {
x = { }; d[id] = x; //.set(id, x);
}
return x;
}
parser.parse(rdfStream, (err, q, prefixes)=>{
if (!q) {
//done
if (outputRaw) {
console.log(JSON.stringify(d,null,null));
} else {
const lunr = require('elasticlunr');
const index = lunr(function () {
this.addField('n'); //label
this.addField('S'); //list of supercategories
this.addField('s'); //list of subcategories
this.setRef('i'); //URI
});
var count = 0;
for(var i in d) {
const item = d[i];
item.i = i;
index.addDoc(item);
count++;
}
console.log(JSON.stringify(index.toJSON(),null,null));
}
return;
}
const s = id(q.subject.id);
var o = id(q.object.id);
const p = q.predicate.id
.replace('https://www.mediawiki.org/ontology#', '')
.replace('http://www.w3.org/2000/01/rdf-schema#', '');
var x = X(s);
if ('isInCategory'===p) {
if (x.S===undefined)
x.S = [];
x.S.push(o);
const y = X(o);
if (y.s===undefined)
y.s = [];
y.s.push(s);
} else if ('pages' ===p) {
} else if ('label' === p) {
if (typeof(o)==='string') {
if (o[0] === '"')
o = o.substring(1, o.length-1);
}
x.n = o;
} else if ('subcategories'===p) {
//console.log(s, '->', o);
}
});