-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Getting the public suffix list down to <30k
going a bit crazy here. Browsers don't support gzip/deflate data yet (waiting for the Compression Streams API) and other compression schemes where reasonable libs are available simply don't cut it on the compression rate. in the mean time, png is lossless and deflate compression - exactly what we need :) So this patch pre-process theh PSL list for easy lookup (and removes a lot of reduntant text) and export the result as a json dictionary. this is then converted to png by imagemagick. The browser loads the image, we access the pixel values and end up with our desired json dict. GH-68
- Loading branch information
Showing
8 changed files
with
218 additions
and
50 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,36 +1,67 @@ | ||
import {tldlookup, tldcommon} from './getdomain_lut.js' | ||
|
||
export function getDomain(url) { | ||
//TODO decode Punycodeed urls (RFC 3492 and RFC 5891) | ||
const parts = url.split('.').reverse(); | ||
let res = []; | ||
let lut = tldlookup; | ||
let v; | ||
|
||
for (v=0; v < parts.length; v++) { | ||
const part = parts[v]; | ||
if (!lut) break; | ||
if (part in lut) { | ||
res.push(part); | ||
lut = lut[part] | ||
} | ||
else if ('*' in lut) { | ||
res.push(e); | ||
lut = null; | ||
} else | ||
break; | ||
function loadImage(url) { | ||
let img = new Image(); | ||
return new Promise(res=>{ | ||
img.onload = ()=>{ | ||
res(img); | ||
} | ||
img.src = url; | ||
}); | ||
} | ||
|
||
async function getPixels(url) { | ||
let img = await loadImage(url); | ||
let canvas = document.createElement('canvas'); | ||
canvas.height = img.height; | ||
canvas.width = img.width; | ||
let context = canvas.getContext('2d'); | ||
context.drawImage(img, 0, 0); | ||
return context.getImageData(0, 0, img.width, img.height).data; | ||
} | ||
|
||
|
||
function pixeldata_to_json(pixeldata) { | ||
pixeldata = pixeldata.filter((_,i)=> i%4 ==0); | ||
const blob = new Blob([pixeldata], {type: 'text/plain; charset=utf-8'}); | ||
return blob.text(); | ||
} | ||
|
||
export class PslLookup { | ||
constructor(args) { | ||
args = args || {}; | ||
args = Object.assign({tableLoader: getPixels, tableurl: "./getdomain.json.png"}, args); | ||
this.psltable = args.tableLoader(args.tableurl) | ||
.then(pixeldata_to_json) | ||
.then(JSON.parse) | ||
.catch(e=>{console.log("something is failing",e)}); | ||
} | ||
if (v < parts.length) | ||
res.push(parts[v]); | ||
|
||
if (parts.length > 2 && parts[1] in tldcommon | ||
&& tldcommon[parts[1]].includes(parts[0]) && res.length < 3) { | ||
res = parts.slice(0, 3); | ||
async waitTableReady() { | ||
let lut = await this.psltable; | ||
this.psltable = lut; | ||
} | ||
|
||
v = parts.indexOf('blogspot'); | ||
if (v >= 0) | ||
res = parts.slice(0, v + 2); | ||
|
||
return res.reverse().join('.'); | ||
getDomain(url) { | ||
let lut = this.psltable; | ||
const parts = url.split('.').reverse(); | ||
let res = []; | ||
let v; | ||
|
||
for (v=0; v < parts.length; v++) { | ||
const part = parts[v]; | ||
if (!lut) break; | ||
if (part in lut) { | ||
res.push(part); | ||
lut = lut[part] | ||
} | ||
else if ('*' in lut) { | ||
res.push(part); | ||
lut = null; | ||
} else | ||
break; | ||
} | ||
if (v < parts.length) | ||
res.push(parts[v]); | ||
|
||
return res.reverse().join('.'); | ||
} | ||
} |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
../ext/webextension/src/lib/getdomain.json.png: | ||
python tld.py | convert pgm:- -define png:compression-filter=1 ../ext/webextension/src/lib/getdomain.json.png | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters