Skip to content

Commit

Permalink
adding fediverse blocklist support
Browse files Browse the repository at this point in the history
will it crash, burn, or both?
  • Loading branch information
NotaInutilis committed Oct 31, 2023
1 parent b2dba99 commit a43e5c3
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 15 deletions.
2 changes: 1 addition & 1 deletion headers/fediblockhole.csv
Original file line number Diff line number Diff line change
@@ -1 +1 @@
domain,severity,reject_media,reject_reports,public_comment,private_comment,obfuscate
domain,severity,public_comment,private_comment
11 changes: 11 additions & 0 deletions scripts/fediverse_domains_to_fediblockhole.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# This script converts fediverse_domains.txt into a .csv format used by FediBlockHole.
# Adds a comment for this specific blocklist (No-QAnon)
# Usage:
# python fediverse_domains_to_fediblockhole.py > fediblockhole.txt

text_file = open("fediverse_domains.txt", "r")
lines = text_file.readlines()
text_file.close()

for line in lines:
print(line.strip() + 'suspend,No-QAnon blocklist,No-QAnon blocklist')
11 changes: 11 additions & 0 deletions scripts/fediverse_domains_to_mastodon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# This script converts fediverse_domains.txt into a .csv format used by Mastodon.
# Adds a comment for this specific blocklist (No-QAnon)
# Usage:
# python fediverse_domains_to_mastodon.py > mastodon.txt

text_file = open("fediverse_domains.txt", "r")
lines = text_file.readlines()
text_file.close()

for line in lines:
print(line.strip() + ',suspend,false,false,No-QAnon blocklist,false')
49 changes: 35 additions & 14 deletions scripts/update.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,41 +4,62 @@
# e.g.
# ./scripts/update.sh

# Cleanup sources:
## Special cleanup for imported sources of other formats (AdBlock, hosts, etc.).
# Cleanup sources
## Special cleanup for imported sources of other formats (AdBlock, hosts, etc.)
find ./sources/_imported -type f -name "*.txt" -exec sed -ri 's/^[^#[:alnum:]]/#&/; s/^0\.0\.0\.0[[:space:]]*//i' {} \;
## Normalizes URLs into domains: lowercases, remove leading spaces, protocol (`x://`) `www.` subdomains, everything after `/`, only one space before `#`. Keeps comments intact.
## Normalizes URLs into domains: lowercases, remove leading spaces, protocol (`x://`) `www.` subdomains, everything after `/`, only one space before `#`. Keeps comments intact
find ./sources -type f -name "*.txt" -exec sed -ri 'h; s/[^#]*//1; x; s/#.*//; s/.*/\L&/; s/^[[:space:]]*//i; s/^.*:\/\///i; s/^[.*]*//i; s/^www\.//i; s/\/[^[:space:]]*//i; s/[[:space:]].*$/ /i; G; s/(.*)\n/\1/' {} \;
## Remove duplicate domains from each source file (keeps repeated comments and empty lines for organization).
## Remove duplicate domains from each source file (keeps repeated comments and empty lines for organization)
find ./sources -type f -name "*.txt" -exec bash -c '
awk "(\$0 ~ /^[[:space:]]*#/ || NF == 0 || !seen[\$0]++)" "$0" > "$0_temp.txt";
mv "$0_temp.txt" "$0";
' {} \;

# Combine all sources into a domains list.
find ./sources -type f -name "*.txt" -exec cat {} \; > domains.txt
# Combine all sources into a domains list
find ./sources -type f -iname "*.txt" -exec cat {} \; > domains.txt
## Fediverse domains list
find ./sources -type f -iname "*fediverse*.txt" -exec cat {} \; > fediverse_domains.txt

# Cleanup the domain list:
## Remove comments, inline comments, spaces and empty lines.
sed -i '/^#/d; s/#.*//; s/ //g; /^ *$/d' domains.txt
## Sort and remove duplicates.
# Cleanup the domains list
## Remove comments, inline comments, spaces and empty lines
sed -i '/^#/d; s/#.*//; s/ //g; /^ *$/d' domains.txt fediverse_domains.txt
## Sort and remove duplicates
sort -u domains.txt > domains_temp.txt
mv domains_temp.txt domains.txt
sort -u fediverse_domains.txt > fediverse_domains_temp.txt
mv fediverse_domains_temp.txt fediverse_domains.txt

# Generate blocklists:
## From the domain list.
# Generate blocklists from the domains list
## For DNS filtering
### Hosts
python scripts/domains_to_hosts.py > hosts.txt
cp hosts.txt etc_hosts.txt # Previous filename for PiHole installations still subscribed to the old url.
python scripts/domains_to_hosts_ipv6.py > hosts.txt.ipv6
### DNSmasq
python scripts/domains_to_dnsmasq.py > dnsmasq.txt

## For browser extensions.
## For browser extensions
### Netsane
python scripts/domains_to_netsane.py > netsane.txt
### Adblock
python scripts/domains_to_adblock.py > adblock_temp.txt
cp ./headers/adblock.txt adblock.txt
cat adblock_temp.txt >> adblock.txt
rm adblock_temp.txt
### uBlacklist
python scripts/domains_to_ublacklist.py > ublacklist_temp.txt
cp ./headers/adblock.txt ublacklist.txt # Currently using the same adblock header until uBlacklist implements its own header. https://github.com/iorate/ublacklist/issues/351
cat ublacklist_temp.txt >> ublacklist.txt
rm ublacklist_temp.txt
rm ublacklist_temp.txt

## Generate Fediverse blocklists
### Mastodon
python scripts/fediverse_domains_to_mastodon.py > mastodon_temp.txt
cp ./headers/mastodon.csv mastodon.csv
cat mastodon_temp.txt >> mastodon.csv
rm mastodon_temp.txt
### FediBlockHole
python scripts/fediverse_domains_to_fediblockhole.py > fediblockhole_temp.txt
cp ./headers/fediblockhole.csv fediblockhole.csv
cat fediblockhole_temp.txt >> fediblockhole.csv
rm fediblockhole_temp.txt
File renamed without changes.

0 comments on commit a43e5c3

Please sign in to comment.