From 26631eab2976e57825bf150d85afd24f43eda9f7 Mon Sep 17 00:00:00 2001 From: Levent Duivel Date: Tue, 12 Jul 2022 09:21:47 +0500 Subject: [PATCH] Initial commit --- .gitattributes | 1 + FOR-REVIEWERS.md | 11 ++ README.md | 62 ++++++++ config/config.sh | 16 ++ config/exclude-hosts-by-ips-dist.txt | 12 ++ config/exclude-hosts-custom.txt | 8 + config/exclude-hosts-dist.txt | 42 +++++ config/exclude-ips-custom.txt | 1 + config/exclude-ips-dist.txt | 15 ++ config/exclude-regexp-dist.awk | 185 +++++++++++++++++++++++ config/include-hosts-custom.txt | 11 ++ config/include-hosts-dist.txt | 23 +++ config/include-ips-custom.txt | 1 + config/include-ips-dist.txt | 1 + doall.sh | 10 ++ install.sh | 23 +++ parse.sh | 83 ++++++++++ process.sh | 15 ++ result/.gitkeep | 1 + result/blocked-ranges.txt | 0 result/dnsmasq-ipset.conf | 0 result/hostlist_original.txt | 0 result/hostlist_zones.txt | 0 result/iplist_all.txt | 0 result/iplist_blockedbyip.txt | 0 result/iplist_blockedbyip_noid2971.txt | 0 result/iplist_special_range.txt | 0 scripts/collapse_blockedbyip_noid2971.py | 19 +++ scripts/generate-pac-domains.awk | 40 +++++ scripts/generate-pac-ipaddrs.awk | 49 ++++++ scripts/getzones.awk | 38 +++++ scripts/resolve-dns-nxdomain.py | 126 +++++++++++++++ temp/.gitkeep | 1 + update.sh | 18 +++ 34 files changed, 812 insertions(+) create mode 100644 .gitattributes create mode 100644 FOR-REVIEWERS.md create mode 100644 README.md create mode 100644 config/config.sh create mode 100644 config/exclude-hosts-by-ips-dist.txt create mode 100644 config/exclude-hosts-custom.txt create mode 100644 config/exclude-hosts-dist.txt create mode 100644 config/exclude-ips-custom.txt create mode 100644 config/exclude-ips-dist.txt create mode 100644 config/exclude-regexp-dist.awk create mode 100644 config/include-hosts-custom.txt create mode 100644 config/include-hosts-dist.txt create mode 100644 config/include-ips-custom.txt create mode 100644 config/include-ips-dist.txt create mode 100644 doall.sh create mode 100644 install.sh create mode 100644 parse.sh create mode 100644 process.sh create mode 100644 result/.gitkeep create mode 100644 result/blocked-ranges.txt create mode 100644 result/dnsmasq-ipset.conf create mode 100644 result/hostlist_original.txt create mode 100644 result/hostlist_zones.txt create mode 100644 result/iplist_all.txt create mode 100644 result/iplist_blockedbyip.txt create mode 100644 result/iplist_blockedbyip_noid2971.txt create mode 100644 result/iplist_special_range.txt create mode 100644 scripts/collapse_blockedbyip_noid2971.py create mode 100644 scripts/generate-pac-domains.awk create mode 100644 scripts/generate-pac-ipaddrs.awk create mode 100644 scripts/getzones.awk create mode 100644 scripts/resolve-dns-nxdomain.py create mode 100644 temp/.gitkeep create mode 100644 update.sh diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..87e654b --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.csv filter=lfs diff=lfs merge=lfs -text diff --git a/FOR-REVIEWERS.md b/FOR-REVIEWERS.md new file mode 100644 index 0000000..c945a19 --- /dev/null +++ b/FOR-REVIEWERS.md @@ -0,0 +1,11 @@ +# For Reviewers + +## Justifications of serving hard to read PAC-scripts + +0. It's not obfuscated but compressed to fit into the 1MB limit on PAC-script size in most browsers. +1. In this repository you may find the open source codes of our pac-script generator -- we may translate it to English upon your request. +2. I understand it's difficult to evaluate if PAC-script is malicious or not. However, take into account the worst case damage it can inflict: + - It may leak addresses user visits via dnsResolve. + - It may return a proxy which collects addresses user visits or even modifies responses (this is explicitly allowed when user agrees to `proxy` permission in our browser extension). +3. PAC-scripts (remote or not) are executed in a kind of sandbox: they have access only to a restricted API (see https://github.com/anticensority/about-pac-scripts/blob/master/pac-script-api-chrome-55.md for details). +So they are quite benign. diff --git a/README.md b/README.md new file mode 100644 index 0000000..4022d0b --- /dev/null +++ b/README.md @@ -0,0 +1,62 @@ +Порт генератора сервиса [АнтиЗапрет](https://antizapret.prostovpn.org/) для OpenWrt +========================================= + +Генератор dnsmasq.conf файла для указания на использование ipset + +Данный набор скриптов создает файл dnsmasq.conf со списком сайтов, заблокированных на территории Российской Федерации Роскомнадзором и другими государственными органами, который можно использовать внутри роутера для автоматического тунеллирования заблокированных ресурсов. + +Помимо основного назначения скрипта (генерации dnsmasq.conf), он также умеет создавать: + +* Файл клиентской конфигурации (client-config, CCD) с заблокированными диапазонами IP-адресов для OpenVPN; +* Файл с заблокированными доменными зонами для Squid; +* Файл с заблокированными доменными зонами в LUA-переменной, для использования с DNS-резолвером knot-resolver. + +### Зависимости + +* dnsmasq-full +* Bash +* cURL +* GNU iconv +* GNU coreutils-stat +* GNU grep +* GNU gawk (gawk) +* sipcalc +* idn +* Python 3.6+ +* dnspython 2.0.0+ + +```sh +opkg install bash curl iconv coreutils-stat grep gawk sipcalc idn python3 python3-pip +pip install dnspython +``` + +### Конфигурационные файлы + +* **{in,ex}clude-{hosts,ips}-dist** — конфигурация дистрибутива, предназначена для изменения автором репозитория; +* **{in,ex}clude-{hosts,ips}-custom** — пользовательская конфигурация, предназначена для изменения конечным пользователем скрипта; +* **exclude-regexp-dist.awk** — файл с различным заблокированным «мусором», раздувающим PAC-файл: зеркалами сайтов, неработающими сайтами, и т.д. +* **config.sh** — файл с адресами прокси и прочей конфигурацией. + +### Установка и запуск + +1. Используйте инструкции схожего проекта на https://habr.com/ru/post/440030/ + Данный репозиторий лишь дополняет его для использования альтернативного сервиса и метода маршрутизации. + Не устанавливайте /etc/init.d/hirkn или запускайте его. + + В секции про "Ну и последнее — это формирование списков в iptables с помощью ipset:" + Замените конфигурацию на: +``` +config ipset + option name 'vpn_ipsum' + option storage 'hash' + option match 'dst_ip' + option hashsize '5000000' + option maxelem '5000000' +``` +2. Замените текущую установку dnsmasq на dnsmasq-full. + Изначальная версия не включает поддержку ipset в себя. + Если ваш провайдер использует DHCP, вам придется предварительно скачать пакет dnsmasq-full перед его удалением. + Вам необходимо указать параметр "conf-dir=/etc/dnsmasq.d" в файле /etc/dnsmasq.conf +3. Склонируйте git-репозиторий +4. Отредактируйте **config/config.sh** +5. Запустите **doall.sh**. diff --git a/config/config.sh b/config/config.sh new file mode 100644 index 0000000..780a55d --- /dev/null +++ b/config/config.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# HTTPS (TLS) proxy address +PACHTTPSHOST='proxy-ssl.antizapret.prostovpn.org:3143' + +# Regular proxy address +PACPROXYHOST='proxy-nossl.antizapret.prostovpn.org:29976' + +# Facebook and Twitter proxy address +PACFBTWHOST='proxy-fbtw-ssl.antizapret.prostovpn.org:3143' + +PACFILE="result/proxy-host-ssl.pac" +PACFILE_NOSSL="result/proxy-host-nossl.pac" + +# Perform DNS resolving to detect and filter non-existent domains +RESOLVE_NXDOMAIN="no" diff --git a/config/exclude-hosts-by-ips-dist.txt b/config/exclude-hosts-by-ips-dist.txt new file mode 100644 index 0000000..8168734 --- /dev/null +++ b/config/exclude-hosts-by-ips-dist.txt @@ -0,0 +1,12 @@ +^81\.91\.178\.252; +^37\.48\.77\.229; +^178\.208\.90\.38; +^213\.13\.30\.100; +^52\.169\.125\.34; +^81\.91\.178\.242; +^5\.61\.58\.119; +^45\.81\.227\.72; +^209\.99\.40\.222; +^95\.211\.189\.202; +^34\.252\.217\.230; +^103\.224\.212\.222; diff --git a/config/exclude-hosts-custom.txt b/config/exclude-hosts-custom.txt new file mode 100644 index 0000000..adb7063 --- /dev/null +++ b/config/exclude-hosts-custom.txt @@ -0,0 +1,8 @@ +youtube.com +ytimg.com +play.google.com +fonts.googleapis.com +gstatic.com +ggpht.com +googleapis.com +googlevideo.com diff --git a/config/exclude-hosts-dist.txt b/config/exclude-hosts-dist.txt new file mode 100644 index 0000000..125f1e8 --- /dev/null +++ b/config/exclude-hosts-dist.txt @@ -0,0 +1,42 @@ +youtube.com +googleusercontent.com +pornhub.com +navalny.zta.lk +youtube.com +youtu.be +ytimg.com +cloudfront.net +yt3.ggpht.com +yt4.ggpht.com +s.ytimg.com +i.ytimg.com +article31.club +akamaiedge.net +akamai.net +soupcdn.com +mediafire.com +pro100farma.net +segodel.com +15yo.ru +bethaze.ru +dabet.ru +zerkalo-tv.ru +nudism.ga +zenitbet44.com +leninjiv.com +1sx522.com +www.rutinadew.com +fespal.website +igrovyeavtomatynadengi.com +1-x520.com +e3b227af32e4d25d50.xyz +db534bb0cc17aa05bc.xyz +4f42f3b6338a06de4c.xyz +telemax-net.ru +ural.ru +coldfilm.ws +coldfilm.cc +coldfilm.ru +rt.pornhub.com +ru.pornhub.com +www.pornhub.com diff --git a/config/exclude-ips-custom.txt b/config/exclude-ips-custom.txt new file mode 100644 index 0000000..8d1c8b6 --- /dev/null +++ b/config/exclude-ips-custom.txt @@ -0,0 +1 @@ + diff --git a/config/exclude-ips-dist.txt b/config/exclude-ips-dist.txt new file mode 100644 index 0000000..45a8d8a --- /dev/null +++ b/config/exclude-ips-dist.txt @@ -0,0 +1,15 @@ +127.0.0.1 +0.0.0. +192.168.0. +192.168.1. +192.168.2. +192.168.44. +192.168.88. +192.168.100. +1.1.1.1 +1.2.3.4 +fe80:: +172.16.0. +10.0.0. +178.248.233.33 +82.192.95.170 diff --git a/config/exclude-regexp-dist.awk b/config/exclude-regexp-dist.awk new file mode 100644 index 0000000..b8d6f87 --- /dev/null +++ b/config/exclude-regexp-dist.awk @@ -0,0 +1,185 @@ +(/duckdns/) {next} +(/linode\.com/) {next} +(/upcloud\.com/) {next} +(/googleusercontent\.com/) {next} +(/\.sl\.pt/) {next} +(/\.biz\.ski/) {next} +(/\.sloat\.biz/) {next} +(/\.new-rutor\.org/) {next} +(/\.traderc\.biz/) {next} +(/\.o-q\.biz/) {next} +(/\.dcge\.biz/) {next} +(/fonbet/) {next} +(/betcity/) {next} +(/zerkalo/ && !/zerkalo\.io/) {next} +(/zigzag/) {next} +(/zenitbet/) {next} +(/winline/) {next} +(/ttrcasino/) {next} +(/paripartners/) {next} +(/parimatch/) {next} +(/ligastavok/) {next} +(/liga-stavok/) {next} +(/baltplay/) {next} +(/azino777/) {next} +(/azino.*777/) {next} +(/777.*azino/) {next} +(/vulkan/) {next} +(/leonbet/) {next} +(/ru\.leon/) {next} +(/ru\.adleon/) {next} +(/leonaccess/) {next} +(/leon-[0-9]{3}/) {next} +(/pm-[0-9]{2,3}\./) {next} +(/mf-[0-9]{2,3}\.online/) {next} +(/fon-[0-9]{2,3}\./) {next} +(/most.{3}\./) {next} +(/casino\-/) {next} +(/bcity\-/) {next} +(/1x\-/) {next} +(/^1xbet[^.]/) {next} +(/^1xbet-/) {next} +(/1xmob/) {next} +#(/melbet/) {next} +(/bk\-info/) {next} +(/bkinfo/) {next} +(/marathon/) {next} +(/gaminator/) {next} +(/joycasino/) {next} +(/goldenstar/) {next} +/marafon/ {next} +/olimp-tv\.org/ {print} +/olimp/ {next} +/kasino/ {next} +/depozit/ {next} +/kazino/ {next} +#/777/ {next} +/casino/ {next} +/admiral/ {next} +/zerkala/ {next} +/avtomat/ {next} +/igrat/ {next} +/azart/ {next} +#/besplatno/ {next} +/sloty/ {next} +/bet-boom/ {next} +/betsbc/ {next} +/^bk-/ {next} +/^bkr/ {next} +/bkinf0/ {next} +/bukmeker/ {next} +/ruletka/ {next} +/vulcan/ {next} +/vylkan/ {next} +/wulcan/ {next} +/wulkan/ {next} +/vullkan/ {next} +/volcan/ {next} +/^vlc/ {next} +/^vlk/ {next} +/eldorado/ {next} +/lotto/ {next} +/lottery/ {next} +/fbmetrix/ {next} +/^diplom-/ {next} +/^dosug-/ {next} +/^dosug[0-9]{2}/ {next} +/fon-bet/ {next} +/^hydra[0-9]{2}/ {next} +/^intim[0-9]{2}/ {next} +/^livetv[0-9]{2}/ {next} +/marafon/ {next} +#/^melb/ {next} +/^melm/ {next} +/^mf-[0-9]{2}/ {next} +/^most/ {next} +#/^new-/ {next} +/^pari-/ {next} +/^pokerdom/ {next} +/prostitutki/ {next} +/spravka/ {next} +/mossst/ {next} +/mostbet/ {next} +/diplom/ {next} +/pharaon/ {next} +/fortuna/ {next} +/^rotate/ {next} +/^ref.{5}\./ {next} +/play\-/ {next} +/^1w.{3,4}\./ {next} +/^mylove[0-9]{2,3}\./ {next} +#/^1x.{3,4}\./ {next} +/^mirror[0-9]{2,3}\./ {next} +/^mob.{3,4}\./ {next} +#/^777/ {next} +/hydra/ {next} +/spravok/ {next} +/spravka/ {next} +/zenit/ {next} +/zakladki/ {next} +/vullcan/ {next} +/vulslots/ {next} +/vulwinners/ {next} +/slots/ {next} +/traffaccess/ {next} +/tide24/ {next} +/swleon/ {next} +/sportingbull/ {next} +/sokol-24/ {next} +/silmag/ {next} +/faraon/ {next} +/marbet/ {next} +/joycazino/ {next} +/joy-cazino/ {next} +/jackpot/ {next} +/semyanich/ {next} +/semenarnia/ {next} +/prostitutki/ {next} +/shishkin-semena/ {next} +/vulkanstavka/ {next} +/bukvaved/ {next} +/rastarasha/ {next} +/errors-seeds/ {next} +/casino-x/ {next} +/kinogb/ {next} +/vulkanstars/ {next} +/vulcanwin/ {next} +/vlk-slots/ {next} +/rutorg/ {next} +/leonbets/ {next} +/parimatch/ {next} +/azartplay/ {next} +/bbplay2017/ {next} +/baltplay2017/ {next} +/hiwager/ {next} +/seedbanda/ {next} +#/gidonline/ {next} +/^alco/ {next} +/^www\.deviantart\.com$/ {next} +(/\.r\.cloudfront\.net/) {next} +(/\*/) {next} +(/\\/) {next} +/multikland\.net/ {next} +/synchroncode\.com/ {next} +/placehere\.link/ {next} +/delivembed\.cc/ {next} +/svetacdn\.in/ {next} +/^a[bdfk]-[0-9]{5}/ {next} +/^azimob[0-9]{5}/ {next} +/^bets-.{2}-.{5}\./ {next} +/^betwinner-/ {next} +/^gg[0-9]{2,3}\.bet/ {next} +/^goldfishka[0-9]{2,3}\./ {next} +/^kinovod[0-9]{2,3}\.cc/ {next} +/^lite-1x/ {next} +/^livetv[0-9]{2,3}.me/ {next} +/^loot[0-9]{2,3}\.bet/ {next} +/^melbet-/ {next} +/^ox-[0-9]{5}/ {next} +/^partypoker[0-9]{3,5}\.com/ {next} +/^pin-up[0-9]{2,3}\./ {next} +/^pinup[0-9]{2,3}\./ {next} +/^pinupbet[0-9]{2,3}\./ {next} +/^pinupbk[0-9]{2,3}\./ {next} +/^zfilm-hd-[0-9]{3,4}\./ {next} +/appspot\.online/ {next} diff --git a/config/include-hosts-custom.txt b/config/include-hosts-custom.txt new file mode 100644 index 0000000..f1e64d5 --- /dev/null +++ b/config/include-hosts-custom.txt @@ -0,0 +1,11 @@ +servarr.com +themoviedb.org +tmdb.org +torrentapi.org +shsh.host +xninja.xyz +parsecgaming.com +returnyoutubedislikeapi.com +spotify.com +rutor.info +nordvpn.com diff --git a/config/include-hosts-dist.txt b/config/include-hosts-dist.txt new file mode 100644 index 0000000..e5504d9 --- /dev/null +++ b/config/include-hosts-dist.txt @@ -0,0 +1,23 @@ +lostfilm.tv +archive.org +licdn.com +t-ru.org +rutrk.org +nnm-club.ws +nnmclub.ch +pravdabeslana.ru +simsync.io +game-debate.com +lib.ru +fbcdn.net +messenger.com +fb.com +t.co +twimg.com +cdninstagram.com +theins.ru +play.google.com +news.google.com +bbci.co.uk +acf.international +radiojar.com diff --git a/config/include-ips-custom.txt b/config/include-ips-custom.txt new file mode 100644 index 0000000..8d1c8b6 --- /dev/null +++ b/config/include-ips-custom.txt @@ -0,0 +1 @@ + diff --git a/config/include-ips-dist.txt b/config/include-ips-dist.txt new file mode 100644 index 0000000..8d1c8b6 --- /dev/null +++ b/config/include-ips-dist.txt @@ -0,0 +1 @@ + diff --git a/doall.sh b/doall.sh new file mode 100644 index 0000000..bb6e296 --- /dev/null +++ b/doall.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -e + +HERE="$(dirname "$(readlink -f "${0}")")" +cd "$HERE" + +./update.sh +./parse.sh +./process.sh diff --git a/install.sh b/install.sh new file mode 100644 index 0000000..f6840f0 --- /dev/null +++ b/install.sh @@ -0,0 +1,23 @@ +#!/bin/ash + +# Before install +# Connect to VPN and declare an interface. +# This script assumes you have installed WireGuard, and have default wg0 interface. + +# Dependencies: +# If not already, manually reinstall dnsmasq from minimal to full version +# opkg remove dnsmasq +# opkg install dnsmasq-full +# If your ISP uses plain DHCP configuration: +# You might need to fetch package manually before removing dnsmasq + +opkg install curl iconv coreutils-stat gawk sipcalc idn python3 python3-pip grep +pip install dnspython + +# Apply all of configurations from https://habr.com/ru/post/440030/ +# Do not install /etc/init.d/hirkn or execute it + +# Finishing +mkdir -p /etc/dnsmasq.d/ + +./doall.sh \ No newline at end of file diff --git a/parse.sh b/parse.sh new file mode 100644 index 0000000..13141d8 --- /dev/null +++ b/parse.sh @@ -0,0 +1,83 @@ +#!/bin/bash + +echo "Parsing antizapret lists. This might take a while due to CPU limitations." + +set -e + +source config/config.sh + +HERE="$(dirname "$(readlink -f "${0}")")" +cd "$HERE" + +# Extract domains from list +echo "Stage: Extracting domains from list" +gawk -F ';' '{print $2}' temp/list.csv | sort -u | gawk '/^$/ {next} /\\/ {next} /^[а-яА-Яa-zA-Z0-9\-_\.\*]*+$/ {gsub(/\*\./, ""); gsub(/\.$/, ""); print}' | CHARSET=UTF-8 idn > result/hostlist_original.txt + +# Generate zones from domains +# FIXME: nxdomain list parsing is disabled due to its instability on z-i +###cat exclude.txt temp/nxdomain.txt > temp/exclude.txt +echo "Stage: Generate zones from domains" + +echo "Created temp include/exclude files" +sort -u config/exclude-hosts-{dist,custom}.txt > temp/exclude-hosts.txt +sort -u config/exclude-ips-{dist,custom}.txt > temp/exclude-ips.txt +sort -u config/include-hosts-{dist,custom}.txt > temp/include-hosts.txt +sort -u config/include-ips-{dist,custom}.txt > temp/include-ips.txt +sort -u temp/include-hosts.txt result/hostlist_original.txt > temp/hostlist_original_with_include.txt + +echo "Adding distributed excluded hosts to preferences file" +gawk -F ';' '{split($1, a, /\|/); for (i in a) {print a[i]";"$2}}' temp/list.csv | \ + grep -f config/exclude-hosts-by-ips-dist.txt | gawk -F ';' '{print $2}' >> temp/exclude-hosts.txt + +echo "Removing excluded hosts from total hostlist" +gawk -f scripts/getzones.awk temp/hostlist_original_with_include.txt | grep -v -F -x -f temp/exclude-hosts.txt | sort -u > result/hostlist_zones.txt + + +if [[ "$RESOLVE_NXDOMAIN" == "yes" ]]; +then + echo "Resolving NXDomain zones" + scripts/resolve-dns-nxdomain.py result/hostlist_zones.txt >> temp/exclude-hosts.txt + echo "NXDomain zones exclusion " + gawk -f scripts/getzones.awk temp/hostlist_original_with_include.txt | grep -v -F -x -f temp/exclude-hosts.txt | sort -u > result/hostlist_zones.txt +fi + +# Generate a list of IP addresses +echo "Stage: Generate a list of IP addresses" + +# echo "generating iplist_special_range.txt" +# gawk -F';' '$1 ~ /\// {print $1}' temp/list.csv | grep -P '([0-9]{1,3}\.){3}[0-9]{1,3}\/[0-9]{1,2}' -o | sort -Vu > result/iplist_special_range.txt +# +# echo "generating iplist_all.txt" +# gawk -F ';' '($1 ~ /^([0-9]{1,3}\.){3}[0-9]{1,3}/) {gsub(/\|/, RS, $1); print $1}' temp/list.csv | \ +# gawk '/^([0-9]{1,3}\.){3}[0-9]{1,3}$/' | sort -u > result/iplist_all.txt +# +# echo "generating iplist_blockedbyip.txt" +# gawk -F ';' '($1 ~ /^([0-9]{1,3}\.){3}[0-9]{1,3}/) && (($2 == "" && $3 == "") || ($1 == $2)) {gsub(/\|/, RS); print $1}' temp/list.csv | \ +# gawk '/^([0-9]{1,3}\.){3}[0-9]{1,3}$/' | sort -u > result/iplist_blockedbyip.txt +# +# echo "generating iplist_blockedbyip_noid2971.txt" +# grep -F -v '33-4/2018' temp/list.csv | grep -F -v '33а-5536/2019' | \ +# gawk -F ';' '($1 ~ /^([0-9]{1,3}\.){3}[0-9]{1,3}/) && (($2 == "" && $3 == "") || ($1 == $2)) {gsub(/\|/, RS); print $1}' | \ +# gawk '/^([0-9]{1,3}\.){3}[0-9]{1,3}$/' | sort -u > result/iplist_blockedbyip_noid2971.txt + +echo "generating blocked-ranges.txt" +gawk -F ';' '$1 ~ /\// {print $1}' temp/list.csv | egrep -o '([0-9]{1,3}\.){3}[0-9]{1,3}\/[0-9]{1,2}' | sort -u > result/blocked-ranges.txt + +# Generate dnsmasq aliases +echo "Generating dnsmasq-ipset configuration" +echo -n > result/dnsmasq-ipset.conf +while read -r line +do + echo "server=/$line/127.0.0.1" >> result/dnsmasq-ipset.conf + echo "ipset=/$line/vpn_ipsum" >> result/dnsmasq-ipset.conf +done < result/hostlist_zones.txt + + +# Print results +echo "Blocked domains: $(wc -l result/hostlist_zones.txt)" >&2 +echo "iplist_all: $(wc -l result/iplist_all.txt)" >&2 +echo "iplist_special_range: $(wc -l result/iplist_special_range.txt)" >&2 +echo "iplist_blockedbyip: $(wc -l result/iplist_blockedbyip.txt)" >&2 +echo "iplist_blockedbyip_noid2971: $(wc -l result/iplist_blockedbyip_noid2971.txt)" >&2 + +exit 0 diff --git a/process.sh b/process.sh new file mode 100644 index 0000000..263ba00 --- /dev/null +++ b/process.sh @@ -0,0 +1,15 @@ +#!/bin/bash +set -e + +rm -rf /etc/dnsmasq.d/antizapret-openwrt.conf +ipset flush vpn_ipsum +mkdir -p /etc/dnsmasq.d/ +cp result/dnsmasq-ipset.conf /etc/dnsmasq.d/antizapret-openwrt.conf +# i have no idea why or how, but this command makes it all work +iptables -I PREROUTING -t mangle -m set --match-set vpn_ipsum dst -j MARK --set-mark 1 +/etc/init.d/dnsmasq restart +/etc/init.d/firewall restart +iptables -I PREROUTING -t mangle -m set --match-set vpn_ipsum dst -j MARK --set-mark 1 +echo "\"no lease, failing\" error is normal." + +exit 0 diff --git a/result/.gitkeep b/result/.gitkeep new file mode 100644 index 0000000..8d1c8b6 --- /dev/null +++ b/result/.gitkeep @@ -0,0 +1 @@ + diff --git a/result/blocked-ranges.txt b/result/blocked-ranges.txt new file mode 100644 index 0000000..e69de29 diff --git a/result/dnsmasq-ipset.conf b/result/dnsmasq-ipset.conf new file mode 100644 index 0000000..e69de29 diff --git a/result/hostlist_original.txt b/result/hostlist_original.txt new file mode 100644 index 0000000..e69de29 diff --git a/result/hostlist_zones.txt b/result/hostlist_zones.txt new file mode 100644 index 0000000..e69de29 diff --git a/result/iplist_all.txt b/result/iplist_all.txt new file mode 100644 index 0000000..e69de29 diff --git a/result/iplist_blockedbyip.txt b/result/iplist_blockedbyip.txt new file mode 100644 index 0000000..e69de29 diff --git a/result/iplist_blockedbyip_noid2971.txt b/result/iplist_blockedbyip_noid2971.txt new file mode 100644 index 0000000..e69de29 diff --git a/result/iplist_special_range.txt b/result/iplist_special_range.txt new file mode 100644 index 0000000..e69de29 diff --git a/scripts/collapse_blockedbyip_noid2971.py b/scripts/collapse_blockedbyip_noid2971.py new file mode 100644 index 0000000..8293590 --- /dev/null +++ b/scripts/collapse_blockedbyip_noid2971.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +import ipaddress +addrlist = open('result/iplist_blockedbyip_noid2971.txt', 'r').read() +speciallist = open('result/iplist_special_range.txt', 'r').read() +nlist = [ipaddress.IPv4Network(addr) for addr in addrlist.split()] +slist = [ipaddress.IPv4Network(addr) for addr in speciallist.split()] +print('IP Addresses before collapsing:', len(nlist)) + +for i, v in enumerate(nlist): + if any([addr.overlaps(v) for addr in slist]): + del nlist[i] + +print('IP Addresses after removing special ranges:', len(nlist)) + +collapsed_file_prefix = open('result/iplist_blockedbyip_noid2971_collapsed.txt', 'w') +cnt = 0 +for addr in nlist: + print(str(addr).replace('/32', ''), file=collapsed_file_prefix) + cnt+=1 diff --git a/scripts/generate-pac-domains.awk b/scripts/generate-pac-domains.awk new file mode 100644 index 0000000..c773863 --- /dev/null +++ b/scripts/generate-pac-domains.awk @@ -0,0 +1,40 @@ +{ + domainzone = gensub(/(.*)\.([^.]+$)/, "\\2", 1) + domainname = gensub(/(.*)\.([^.]+$)/, "\\1", 1) + domainlength = length(domainname) + domainarray[domainzone][domainlength][domainname] = domainname + #print "adding", $0, ":", domainzone, domainlength, domainname +} + + +function printarray(arrname, arr) { + firsttime_1 = 1 + firsttime_2 = 1 + + print arrname, "= {" + + for (domainzone in arr) { + if (firsttime_1 == 0) {printf ",\n"} firsttime_1 = 0; + + print "\"" domainzone "\":{" + + for (domainlength in arr[domainzone]) { + if (firsttime_2 == 0) {printf ",\n"} firsttime_2 = 0; + + printf " %s", "" domainlength ":\"" + for (domainname in arr[domainzone][domainlength]) { + printf "%s", domainname + } + printf "\"" + } + + firsttime_2 = 1; + printf "\n}" + } + print "};" +} + +# Final function +END { + printarray("domains", domainarray) +} diff --git a/scripts/generate-pac-ipaddrs.awk b/scripts/generate-pac-ipaddrs.awk new file mode 100644 index 0000000..b203464 --- /dev/null +++ b/scripts/generate-pac-ipaddrs.awk @@ -0,0 +1,49 @@ +#BEGIN {PROCINFO["sorted_in"] = "@unsorted"} +BEGIN {PROCINFO["sorted_in"] = "@ind_num_asc"; qq = 0} + +# Skipping empty strings +(!$1) {next} + +{d_ip[qq] = $1; qq+=1;} + +function iptodec(v) { + split(v,s,".") + return s[4] + s[3]*256 + s[2]*65536 + s[1]*16777216 +} + +function ipdecto36(r) { + baselen = split("0123456789abcdefghijklmnopqrstuvwxyz", base, "") + + rr = "" + do { + rr = base[(r % baselen) + 1] rr + } while (r = int(r / baselen)) + return rr +} + +function printarray_hex(arrname, arr) { + d_printed_end = 0 + previous_dec = 0 + print "var", arrname, "= \"\\" + for (i in arr) { + d_printed_end = 0 + printf "%s ", ipdecto36(iptodec(arr[i]) - previous_dec) + previous_dec = iptodec(arr[i]) + if (i % 40 == 0) { + print "\\" + d_printed_end = 1 + } + } + if (d_printed_end == 0) { + print "\\" + } + print "\".split(\" \");" + print "" +} + +# Final function +END { + #asort(d_ip) + + printarray_hex("d_ipaddr", d_ip) +} diff --git a/scripts/getzones.awk b/scripts/getzones.awk new file mode 100644 index 0000000..74483e6 --- /dev/null +++ b/scripts/getzones.awk @@ -0,0 +1,38 @@ +@include "config/exclude-regexp-dist.awk" + +# Skipping empty strings +(!$1) {next} + +# Exclude some domains +(/duckdns/) {next} +(/\.r\.cloudfront\.net/) {next} + +# Skipping IP addresses +(/^([0-9]{1,3}\.){3}[0-9]{1,3}$/) {next} + +# Removing leading "www." +{sub(/^www\./, "", $1)} + +# Removing ending dot +{sub(/\.$/, "", $1)} + +{ + if (/\.(ru|co|cu|com|info|net|org|gov|edu|int|mil|biz|pp|ne|msk|spb|nnov|od|in|ho|cc|dn|i|tut|v|dp|sl|ddns|dyndns|livejournal|herokuapp|azurewebsites|cloudfront|ucoz|3dn|nov|linode|amazonaws|sl-reverse|kiev|beget|kirov|akadns|scaleway|fastly|hldns|appspot|my1|hwcdn|deviantart|wixmp|netdna-ssl|brightcove|berlogovo|edgecastcdn|trafficmanager|pximg|github|hopto|u-stream|google|keenetic|eu)\.[^.]+$/) + {$1 = gensub(/(.+)\.([^.]+\.[^.]+\.[^.]+$)/, "\\2", 1)} + else + {$1 = gensub(/(.+)\.([^.]+\.[^.]+$)/, "\\2", 1)} +} + +# Sorting domains +{d_other[$1] = $1} + +function printarray(arrname, arr) { + for (i in arr) { + print i + } +} + +# Final function +END { + printarray("d_other", d_other) +} diff --git a/scripts/resolve-dns-nxdomain.py b/scripts/resolve-dns-nxdomain.py new file mode 100644 index 0000000..b00ad8f --- /dev/null +++ b/scripts/resolve-dns-nxdomain.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 + +import sys +import os +import asyncio +import dns.resolver +import dns.asyncresolver +import dns.exception +import dns._asyncio_backend + +# DNS timeout (in seconds) for the initial DNS resolving pass +INITIAL_PASS_TIMEOUT = 3 +# Number of concurrent resolving 'threads' for initial pass +INITIAL_PASS_CONCURRENCY = 100 + +# DNS timeout (in seconds) for the final (second) DNS resolving pass +FINAL_PASS_TIMEOUT = 10 +# Number of concurrent resolving 'threads' for final pass +FINAL_PASS_CONCURRENCY = 35 + + +class AZResolver(dns.asyncresolver.Resolver): + def __init__(self, *args, **kwargs): + self.limitConcurrency(25) # default limit + super().__init__(*args, **kwargs) + + def limitConcurrency(self, count): + self.limitingsemaphore = asyncio.Semaphore(count) + + async def nxresolve(self, domain): + async with self.limitingsemaphore: + try: + #print(domain, file=sys.stderr) + await self.resolve(domain) + + except (dns.exception.Timeout, dns.resolver.NXDOMAIN, + dns.resolver.YXDOMAIN, dns.resolver.NoNameservers): + return domain + except dns.resolver.NoAnswer: + # Do not thread domain as broken if the answer is empty + pass + +async def runTasksWithProgress(tasks): + progress = 0 + old_progress = 0 + ret = [] + + for task in asyncio.as_completed(tasks): + ret.append(await task) + progress = int(len(ret) / len(tasks) * 100) + if old_progress < progress: + print("{}%...".format(progress), end='\r', file=sys.stderr, flush=True) + old_progress = progress + print(file=sys.stderr) + return ret + +async def main(): + if len(sys.argv) != 2: + print("Incorrect arguments!") + sys.exit(1) + + r = AZResolver() + r.limitConcurrency(INITIAL_PASS_CONCURRENCY) + r.timeout = INITIAL_PASS_TIMEOUT + r.lifetime = INITIAL_PASS_TIMEOUT + + # Load domain file list and schedule resolving + tasks = [] + try: + with open(sys.argv[1], 'rb') as domainlist: + for domain in domainlist: + tasks.append(asyncio.ensure_future(r.nxresolve(domain.decode().strip()))) + except OSError as e: + print("Can't open file", sys.argv[1], e, file=sys.stderr) + sys.exit(2) + + print("Loaded list of {} elements, resolving NXDOMAINS".format(len(tasks)), file=sys.stderr) + #sys.exit(0) + + try: + # Resolve domains, first try + nxresolved_first = await runTasksWithProgress(tasks) + nxresolved_first = list(filter(None, nxresolved_first)) + + print("Got {} broken domains, trying to resolve them again " + "to make sure".format(len(nxresolved_first)), file=sys.stderr) + + # Second try + tasks = [] + r.limitConcurrency(FINAL_PASS_CONCURRENCY) + r.timeout = FINAL_PASS_TIMEOUT + r.lifetime = FINAL_PASS_TIMEOUT + + for domain in nxresolved_first: + tasks.append(asyncio.ensure_future(r.nxresolve(domain))) + nxresolved_second = await runTasksWithProgress(tasks) + nxresolved_second = list(filter(None, nxresolved_second)) + + print("Finally, got {} broken domains".format(len(nxresolved_second)), file=sys.stderr) + for domain in nxresolved_second: + print(domain) + + except (SystemExit, KeyboardInterrupt): + for task in tasks: + task.cancel() + + +if __name__ == '__main__': + if dns.__version__ == '2.0.0': + # Monkey-patch dnspython 2.0.0 bug #572 + # https://github.com/rthalley/dnspython/issues/572 + class monkeypatched_DatagramProtocol(dns._asyncio_backend._DatagramProtocol): + def error_received(self, exc): # pragma: no cover + if self.recvfrom and not self.recvfrom.done(): + self.recvfrom.set_exception(exc) + + def connection_lost(self, exc): + if self.recvfrom and not self.recvfrom.done(): + self.recvfrom.set_exception(exc) + + dns._asyncio_backend._DatagramProtocol = monkeypatched_DatagramProtocol + + try: + asyncio.run(main()) + except (SystemExit, KeyboardInterrupt): + sys.exit(3) diff --git a/temp/.gitkeep b/temp/.gitkeep new file mode 100644 index 0000000..8d1c8b6 --- /dev/null +++ b/temp/.gitkeep @@ -0,0 +1 @@ + diff --git a/update.sh b/update.sh new file mode 100644 index 0000000..e95f907 --- /dev/null +++ b/update.sh @@ -0,0 +1,18 @@ +#!/bin/bash +set -e + +HERE="$(dirname "$(readlink -f "${0}")")" +cd "$HERE" + +LISTLINK='https://raw.githubusercontent.com/zapret-info/z-i/master/dump.csv' +NXDOMAINLINK='https://raw.githubusercontent.com/zapret-info/z-i/master/nxdomain.txt' +curl -f --fail-early -o temp/list_orig.csv "$LISTLINK" || exit 1 +iconv -f CP1251 -t UTF-8 temp/list_orig.csv > temp/list.csv +curl -f --fail-early -o temp/nxdomain.txt "$NXDOMAINLINK" || exit 1 + +LISTSIZE="$(curl -sI "$LISTLINK" | gawk 'BEGIN {IGNORECASE=1;} /content-length/ {sub(/[ \t\r\n]+$/, "", $2); print $2}')" +[[ "$LISTSIZE" != "$(stat -c '%s' temp/list_orig.csv)" ]] && echo "List 1 size differs" && exit 2 +LISTSIZE="$(curl -sI "$NXDOMAINLINK" | gawk 'BEGIN {IGNORECASE=1;} /content-length/ {sub(/[ \t\r\n]+$/, "", $2); print $2}')" +[[ "$LISTSIZE" != "$(stat -c '%s' temp/nxdomain.txt)" ]] && echo "List 2 size differs" && exit 2 + +exit 0