Initial commit

This commit is contained in:
2022-07-12 09:21:47 +05:00
commit 26631eab29
34 changed files with 812 additions and 0 deletions

View File

@@ -0,0 +1,19 @@
#!/usr/bin/env python3
import ipaddress
addrlist = open('result/iplist_blockedbyip_noid2971.txt', 'r').read()
speciallist = open('result/iplist_special_range.txt', 'r').read()
nlist = [ipaddress.IPv4Network(addr) for addr in addrlist.split()]
slist = [ipaddress.IPv4Network(addr) for addr in speciallist.split()]
print('IP Addresses before collapsing:', len(nlist))
for i, v in enumerate(nlist):
if any([addr.overlaps(v) for addr in slist]):
del nlist[i]
print('IP Addresses after removing special ranges:', len(nlist))
collapsed_file_prefix = open('result/iplist_blockedbyip_noid2971_collapsed.txt', 'w')
cnt = 0
for addr in nlist:
print(str(addr).replace('/32', ''), file=collapsed_file_prefix)
cnt+=1

View File

@@ -0,0 +1,40 @@
{
domainzone = gensub(/(.*)\.([^.]+$)/, "\\2", 1)
domainname = gensub(/(.*)\.([^.]+$)/, "\\1", 1)
domainlength = length(domainname)
domainarray[domainzone][domainlength][domainname] = domainname
#print "adding", $0, ":", domainzone, domainlength, domainname
}
function printarray(arrname, arr) {
firsttime_1 = 1
firsttime_2 = 1
print arrname, "= {"
for (domainzone in arr) {
if (firsttime_1 == 0) {printf ",\n"} firsttime_1 = 0;
print "\"" domainzone "\":{"
for (domainlength in arr[domainzone]) {
if (firsttime_2 == 0) {printf ",\n"} firsttime_2 = 0;
printf " %s", "" domainlength ":\""
for (domainname in arr[domainzone][domainlength]) {
printf "%s", domainname
}
printf "\""
}
firsttime_2 = 1;
printf "\n}"
}
print "};"
}
# Final function
END {
printarray("domains", domainarray)
}

View File

@@ -0,0 +1,49 @@
#BEGIN {PROCINFO["sorted_in"] = "@unsorted"}
BEGIN {PROCINFO["sorted_in"] = "@ind_num_asc"; qq = 0}
# Skipping empty strings
(!$1) {next}
{d_ip[qq] = $1; qq+=1;}
function iptodec(v) {
split(v,s,".")
return s[4] + s[3]*256 + s[2]*65536 + s[1]*16777216
}
function ipdecto36(r) {
baselen = split("0123456789abcdefghijklmnopqrstuvwxyz", base, "")
rr = ""
do {
rr = base[(r % baselen) + 1] rr
} while (r = int(r / baselen))
return rr
}
function printarray_hex(arrname, arr) {
d_printed_end = 0
previous_dec = 0
print "var", arrname, "= \"\\"
for (i in arr) {
d_printed_end = 0
printf "%s ", ipdecto36(iptodec(arr[i]) - previous_dec)
previous_dec = iptodec(arr[i])
if (i % 40 == 0) {
print "\\"
d_printed_end = 1
}
}
if (d_printed_end == 0) {
print "\\"
}
print "\".split(\" \");"
print ""
}
# Final function
END {
#asort(d_ip)
printarray_hex("d_ipaddr", d_ip)
}

38
scripts/getzones.awk Normal file
View File

@@ -0,0 +1,38 @@
@include "config/exclude-regexp-dist.awk"
# Skipping empty strings
(!$1) {next}
# Exclude some domains
(/duckdns/) {next}
(/\.r\.cloudfront\.net/) {next}
# Skipping IP addresses
(/^([0-9]{1,3}\.){3}[0-9]{1,3}$/) {next}
# Removing leading "www."
{sub(/^www\./, "", $1)}
# Removing ending dot
{sub(/\.$/, "", $1)}
{
if (/\.(ru|co|cu|com|info|net|org|gov|edu|int|mil|biz|pp|ne|msk|spb|nnov|od|in|ho|cc|dn|i|tut|v|dp|sl|ddns|dyndns|livejournal|herokuapp|azurewebsites|cloudfront|ucoz|3dn|nov|linode|amazonaws|sl-reverse|kiev|beget|kirov|akadns|scaleway|fastly|hldns|appspot|my1|hwcdn|deviantart|wixmp|netdna-ssl|brightcove|berlogovo|edgecastcdn|trafficmanager|pximg|github|hopto|u-stream|google|keenetic|eu)\.[^.]+$/)
{$1 = gensub(/(.+)\.([^.]+\.[^.]+\.[^.]+$)/, "\\2", 1)}
else
{$1 = gensub(/(.+)\.([^.]+\.[^.]+$)/, "\\2", 1)}
}
# Sorting domains
{d_other[$1] = $1}
function printarray(arrname, arr) {
for (i in arr) {
print i
}
}
# Final function
END {
printarray("d_other", d_other)
}

View File

@@ -0,0 +1,126 @@
#!/usr/bin/env python3
import sys
import os
import asyncio
import dns.resolver
import dns.asyncresolver
import dns.exception
import dns._asyncio_backend
# DNS timeout (in seconds) for the initial DNS resolving pass
INITIAL_PASS_TIMEOUT = 3
# Number of concurrent resolving 'threads' for initial pass
INITIAL_PASS_CONCURRENCY = 100
# DNS timeout (in seconds) for the final (second) DNS resolving pass
FINAL_PASS_TIMEOUT = 10
# Number of concurrent resolving 'threads' for final pass
FINAL_PASS_CONCURRENCY = 35
class AZResolver(dns.asyncresolver.Resolver):
def __init__(self, *args, **kwargs):
self.limitConcurrency(25) # default limit
super().__init__(*args, **kwargs)
def limitConcurrency(self, count):
self.limitingsemaphore = asyncio.Semaphore(count)
async def nxresolve(self, domain):
async with self.limitingsemaphore:
try:
#print(domain, file=sys.stderr)
await self.resolve(domain)
except (dns.exception.Timeout, dns.resolver.NXDOMAIN,
dns.resolver.YXDOMAIN, dns.resolver.NoNameservers):
return domain
except dns.resolver.NoAnswer:
# Do not thread domain as broken if the answer is empty
pass
async def runTasksWithProgress(tasks):
progress = 0
old_progress = 0
ret = []
for task in asyncio.as_completed(tasks):
ret.append(await task)
progress = int(len(ret) / len(tasks) * 100)
if old_progress < progress:
print("{}%...".format(progress), end='\r', file=sys.stderr, flush=True)
old_progress = progress
print(file=sys.stderr)
return ret
async def main():
if len(sys.argv) != 2:
print("Incorrect arguments!")
sys.exit(1)
r = AZResolver()
r.limitConcurrency(INITIAL_PASS_CONCURRENCY)
r.timeout = INITIAL_PASS_TIMEOUT
r.lifetime = INITIAL_PASS_TIMEOUT
# Load domain file list and schedule resolving
tasks = []
try:
with open(sys.argv[1], 'rb') as domainlist:
for domain in domainlist:
tasks.append(asyncio.ensure_future(r.nxresolve(domain.decode().strip())))
except OSError as e:
print("Can't open file", sys.argv[1], e, file=sys.stderr)
sys.exit(2)
print("Loaded list of {} elements, resolving NXDOMAINS".format(len(tasks)), file=sys.stderr)
#sys.exit(0)
try:
# Resolve domains, first try
nxresolved_first = await runTasksWithProgress(tasks)
nxresolved_first = list(filter(None, nxresolved_first))
print("Got {} broken domains, trying to resolve them again "
"to make sure".format(len(nxresolved_first)), file=sys.stderr)
# Second try
tasks = []
r.limitConcurrency(FINAL_PASS_CONCURRENCY)
r.timeout = FINAL_PASS_TIMEOUT
r.lifetime = FINAL_PASS_TIMEOUT
for domain in nxresolved_first:
tasks.append(asyncio.ensure_future(r.nxresolve(domain)))
nxresolved_second = await runTasksWithProgress(tasks)
nxresolved_second = list(filter(None, nxresolved_second))
print("Finally, got {} broken domains".format(len(nxresolved_second)), file=sys.stderr)
for domain in nxresolved_second:
print(domain)
except (SystemExit, KeyboardInterrupt):
for task in tasks:
task.cancel()
if __name__ == '__main__':
if dns.__version__ == '2.0.0':
# Monkey-patch dnspython 2.0.0 bug #572
# https://github.com/rthalley/dnspython/issues/572
class monkeypatched_DatagramProtocol(dns._asyncio_backend._DatagramProtocol):
def error_received(self, exc): # pragma: no cover
if self.recvfrom and not self.recvfrom.done():
self.recvfrom.set_exception(exc)
def connection_lost(self, exc):
if self.recvfrom and not self.recvfrom.done():
self.recvfrom.set_exception(exc)
dns._asyncio_backend._DatagramProtocol = monkeypatched_DatagramProtocol
try:
asyncio.run(main())
except (SystemExit, KeyboardInterrupt):
sys.exit(3)