Initial commit
This commit is contained in:
19
scripts/collapse_blockedbyip_noid2971.py
Normal file
19
scripts/collapse_blockedbyip_noid2971.py
Normal file
@@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env python3
|
||||
import ipaddress
|
||||
addrlist = open('result/iplist_blockedbyip_noid2971.txt', 'r').read()
|
||||
speciallist = open('result/iplist_special_range.txt', 'r').read()
|
||||
nlist = [ipaddress.IPv4Network(addr) for addr in addrlist.split()]
|
||||
slist = [ipaddress.IPv4Network(addr) for addr in speciallist.split()]
|
||||
print('IP Addresses before collapsing:', len(nlist))
|
||||
|
||||
for i, v in enumerate(nlist):
|
||||
if any([addr.overlaps(v) for addr in slist]):
|
||||
del nlist[i]
|
||||
|
||||
print('IP Addresses after removing special ranges:', len(nlist))
|
||||
|
||||
collapsed_file_prefix = open('result/iplist_blockedbyip_noid2971_collapsed.txt', 'w')
|
||||
cnt = 0
|
||||
for addr in nlist:
|
||||
print(str(addr).replace('/32', ''), file=collapsed_file_prefix)
|
||||
cnt+=1
|
||||
40
scripts/generate-pac-domains.awk
Normal file
40
scripts/generate-pac-domains.awk
Normal file
@@ -0,0 +1,40 @@
|
||||
{
|
||||
domainzone = gensub(/(.*)\.([^.]+$)/, "\\2", 1)
|
||||
domainname = gensub(/(.*)\.([^.]+$)/, "\\1", 1)
|
||||
domainlength = length(domainname)
|
||||
domainarray[domainzone][domainlength][domainname] = domainname
|
||||
#print "adding", $0, ":", domainzone, domainlength, domainname
|
||||
}
|
||||
|
||||
|
||||
function printarray(arrname, arr) {
|
||||
firsttime_1 = 1
|
||||
firsttime_2 = 1
|
||||
|
||||
print arrname, "= {"
|
||||
|
||||
for (domainzone in arr) {
|
||||
if (firsttime_1 == 0) {printf ",\n"} firsttime_1 = 0;
|
||||
|
||||
print "\"" domainzone "\":{"
|
||||
|
||||
for (domainlength in arr[domainzone]) {
|
||||
if (firsttime_2 == 0) {printf ",\n"} firsttime_2 = 0;
|
||||
|
||||
printf " %s", "" domainlength ":\""
|
||||
for (domainname in arr[domainzone][domainlength]) {
|
||||
printf "%s", domainname
|
||||
}
|
||||
printf "\""
|
||||
}
|
||||
|
||||
firsttime_2 = 1;
|
||||
printf "\n}"
|
||||
}
|
||||
print "};"
|
||||
}
|
||||
|
||||
# Final function
|
||||
END {
|
||||
printarray("domains", domainarray)
|
||||
}
|
||||
49
scripts/generate-pac-ipaddrs.awk
Normal file
49
scripts/generate-pac-ipaddrs.awk
Normal file
@@ -0,0 +1,49 @@
|
||||
#BEGIN {PROCINFO["sorted_in"] = "@unsorted"}
|
||||
BEGIN {PROCINFO["sorted_in"] = "@ind_num_asc"; qq = 0}
|
||||
|
||||
# Skipping empty strings
|
||||
(!$1) {next}
|
||||
|
||||
{d_ip[qq] = $1; qq+=1;}
|
||||
|
||||
function iptodec(v) {
|
||||
split(v,s,".")
|
||||
return s[4] + s[3]*256 + s[2]*65536 + s[1]*16777216
|
||||
}
|
||||
|
||||
function ipdecto36(r) {
|
||||
baselen = split("0123456789abcdefghijklmnopqrstuvwxyz", base, "")
|
||||
|
||||
rr = ""
|
||||
do {
|
||||
rr = base[(r % baselen) + 1] rr
|
||||
} while (r = int(r / baselen))
|
||||
return rr
|
||||
}
|
||||
|
||||
function printarray_hex(arrname, arr) {
|
||||
d_printed_end = 0
|
||||
previous_dec = 0
|
||||
print "var", arrname, "= \"\\"
|
||||
for (i in arr) {
|
||||
d_printed_end = 0
|
||||
printf "%s ", ipdecto36(iptodec(arr[i]) - previous_dec)
|
||||
previous_dec = iptodec(arr[i])
|
||||
if (i % 40 == 0) {
|
||||
print "\\"
|
||||
d_printed_end = 1
|
||||
}
|
||||
}
|
||||
if (d_printed_end == 0) {
|
||||
print "\\"
|
||||
}
|
||||
print "\".split(\" \");"
|
||||
print ""
|
||||
}
|
||||
|
||||
# Final function
|
||||
END {
|
||||
#asort(d_ip)
|
||||
|
||||
printarray_hex("d_ipaddr", d_ip)
|
||||
}
|
||||
38
scripts/getzones.awk
Normal file
38
scripts/getzones.awk
Normal file
@@ -0,0 +1,38 @@
|
||||
@include "config/exclude-regexp-dist.awk"
|
||||
|
||||
# Skipping empty strings
|
||||
(!$1) {next}
|
||||
|
||||
# Exclude some domains
|
||||
(/duckdns/) {next}
|
||||
(/\.r\.cloudfront\.net/) {next}
|
||||
|
||||
# Skipping IP addresses
|
||||
(/^([0-9]{1,3}\.){3}[0-9]{1,3}$/) {next}
|
||||
|
||||
# Removing leading "www."
|
||||
{sub(/^www\./, "", $1)}
|
||||
|
||||
# Removing ending dot
|
||||
{sub(/\.$/, "", $1)}
|
||||
|
||||
{
|
||||
if (/\.(ru|co|cu|com|info|net|org|gov|edu|int|mil|biz|pp|ne|msk|spb|nnov|od|in|ho|cc|dn|i|tut|v|dp|sl|ddns|dyndns|livejournal|herokuapp|azurewebsites|cloudfront|ucoz|3dn|nov|linode|amazonaws|sl-reverse|kiev|beget|kirov|akadns|scaleway|fastly|hldns|appspot|my1|hwcdn|deviantart|wixmp|netdna-ssl|brightcove|berlogovo|edgecastcdn|trafficmanager|pximg|github|hopto|u-stream|google|keenetic|eu)\.[^.]+$/)
|
||||
{$1 = gensub(/(.+)\.([^.]+\.[^.]+\.[^.]+$)/, "\\2", 1)}
|
||||
else
|
||||
{$1 = gensub(/(.+)\.([^.]+\.[^.]+$)/, "\\2", 1)}
|
||||
}
|
||||
|
||||
# Sorting domains
|
||||
{d_other[$1] = $1}
|
||||
|
||||
function printarray(arrname, arr) {
|
||||
for (i in arr) {
|
||||
print i
|
||||
}
|
||||
}
|
||||
|
||||
# Final function
|
||||
END {
|
||||
printarray("d_other", d_other)
|
||||
}
|
||||
126
scripts/resolve-dns-nxdomain.py
Normal file
126
scripts/resolve-dns-nxdomain.py
Normal file
@@ -0,0 +1,126 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import os
|
||||
import asyncio
|
||||
import dns.resolver
|
||||
import dns.asyncresolver
|
||||
import dns.exception
|
||||
import dns._asyncio_backend
|
||||
|
||||
# DNS timeout (in seconds) for the initial DNS resolving pass
|
||||
INITIAL_PASS_TIMEOUT = 3
|
||||
# Number of concurrent resolving 'threads' for initial pass
|
||||
INITIAL_PASS_CONCURRENCY = 100
|
||||
|
||||
# DNS timeout (in seconds) for the final (second) DNS resolving pass
|
||||
FINAL_PASS_TIMEOUT = 10
|
||||
# Number of concurrent resolving 'threads' for final pass
|
||||
FINAL_PASS_CONCURRENCY = 35
|
||||
|
||||
|
||||
class AZResolver(dns.asyncresolver.Resolver):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.limitConcurrency(25) # default limit
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def limitConcurrency(self, count):
|
||||
self.limitingsemaphore = asyncio.Semaphore(count)
|
||||
|
||||
async def nxresolve(self, domain):
|
||||
async with self.limitingsemaphore:
|
||||
try:
|
||||
#print(domain, file=sys.stderr)
|
||||
await self.resolve(domain)
|
||||
|
||||
except (dns.exception.Timeout, dns.resolver.NXDOMAIN,
|
||||
dns.resolver.YXDOMAIN, dns.resolver.NoNameservers):
|
||||
return domain
|
||||
except dns.resolver.NoAnswer:
|
||||
# Do not thread domain as broken if the answer is empty
|
||||
pass
|
||||
|
||||
async def runTasksWithProgress(tasks):
|
||||
progress = 0
|
||||
old_progress = 0
|
||||
ret = []
|
||||
|
||||
for task in asyncio.as_completed(tasks):
|
||||
ret.append(await task)
|
||||
progress = int(len(ret) / len(tasks) * 100)
|
||||
if old_progress < progress:
|
||||
print("{}%...".format(progress), end='\r', file=sys.stderr, flush=True)
|
||||
old_progress = progress
|
||||
print(file=sys.stderr)
|
||||
return ret
|
||||
|
||||
async def main():
|
||||
if len(sys.argv) != 2:
|
||||
print("Incorrect arguments!")
|
||||
sys.exit(1)
|
||||
|
||||
r = AZResolver()
|
||||
r.limitConcurrency(INITIAL_PASS_CONCURRENCY)
|
||||
r.timeout = INITIAL_PASS_TIMEOUT
|
||||
r.lifetime = INITIAL_PASS_TIMEOUT
|
||||
|
||||
# Load domain file list and schedule resolving
|
||||
tasks = []
|
||||
try:
|
||||
with open(sys.argv[1], 'rb') as domainlist:
|
||||
for domain in domainlist:
|
||||
tasks.append(asyncio.ensure_future(r.nxresolve(domain.decode().strip())))
|
||||
except OSError as e:
|
||||
print("Can't open file", sys.argv[1], e, file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
print("Loaded list of {} elements, resolving NXDOMAINS".format(len(tasks)), file=sys.stderr)
|
||||
#sys.exit(0)
|
||||
|
||||
try:
|
||||
# Resolve domains, first try
|
||||
nxresolved_first = await runTasksWithProgress(tasks)
|
||||
nxresolved_first = list(filter(None, nxresolved_first))
|
||||
|
||||
print("Got {} broken domains, trying to resolve them again "
|
||||
"to make sure".format(len(nxresolved_first)), file=sys.stderr)
|
||||
|
||||
# Second try
|
||||
tasks = []
|
||||
r.limitConcurrency(FINAL_PASS_CONCURRENCY)
|
||||
r.timeout = FINAL_PASS_TIMEOUT
|
||||
r.lifetime = FINAL_PASS_TIMEOUT
|
||||
|
||||
for domain in nxresolved_first:
|
||||
tasks.append(asyncio.ensure_future(r.nxresolve(domain)))
|
||||
nxresolved_second = await runTasksWithProgress(tasks)
|
||||
nxresolved_second = list(filter(None, nxresolved_second))
|
||||
|
||||
print("Finally, got {} broken domains".format(len(nxresolved_second)), file=sys.stderr)
|
||||
for domain in nxresolved_second:
|
||||
print(domain)
|
||||
|
||||
except (SystemExit, KeyboardInterrupt):
|
||||
for task in tasks:
|
||||
task.cancel()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if dns.__version__ == '2.0.0':
|
||||
# Monkey-patch dnspython 2.0.0 bug #572
|
||||
# https://github.com/rthalley/dnspython/issues/572
|
||||
class monkeypatched_DatagramProtocol(dns._asyncio_backend._DatagramProtocol):
|
||||
def error_received(self, exc): # pragma: no cover
|
||||
if self.recvfrom and not self.recvfrom.done():
|
||||
self.recvfrom.set_exception(exc)
|
||||
|
||||
def connection_lost(self, exc):
|
||||
if self.recvfrom and not self.recvfrom.done():
|
||||
self.recvfrom.set_exception(exc)
|
||||
|
||||
dns._asyncio_backend._DatagramProtocol = monkeypatched_DatagramProtocol
|
||||
|
||||
try:
|
||||
asyncio.run(main())
|
||||
except (SystemExit, KeyboardInterrupt):
|
||||
sys.exit(3)
|
||||
Reference in New Issue
Block a user