This repository has been archived on 2025-04-04. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
antizapret-openwrt/parse.sh
2022-07-12 09:21:47 +05:00

84 lines
3.8 KiB
Bash
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
echo "Parsing antizapret lists. This might take a while due to CPU limitations."
set -e
source config/config.sh
HERE="$(dirname "$(readlink -f "${0}")")"
cd "$HERE"
# Extract domains from list
echo "Stage: Extracting domains from list"
gawk -F ';' '{print $2}' temp/list.csv | sort -u | gawk '/^$/ {next} /\\/ {next} /^[а-яА-Яa-zA-Z0-9\-_\.\*]*+$/ {gsub(/\*\./, ""); gsub(/\.$/, ""); print}' | CHARSET=UTF-8 idn > result/hostlist_original.txt
# Generate zones from domains
# FIXME: nxdomain list parsing is disabled due to its instability on z-i
###cat exclude.txt temp/nxdomain.txt > temp/exclude.txt
echo "Stage: Generate zones from domains"
echo "Created temp include/exclude files"
sort -u config/exclude-hosts-{dist,custom}.txt > temp/exclude-hosts.txt
sort -u config/exclude-ips-{dist,custom}.txt > temp/exclude-ips.txt
sort -u config/include-hosts-{dist,custom}.txt > temp/include-hosts.txt
sort -u config/include-ips-{dist,custom}.txt > temp/include-ips.txt
sort -u temp/include-hosts.txt result/hostlist_original.txt > temp/hostlist_original_with_include.txt
echo "Adding distributed excluded hosts to preferences file"
gawk -F ';' '{split($1, a, /\|/); for (i in a) {print a[i]";"$2}}' temp/list.csv | \
grep -f config/exclude-hosts-by-ips-dist.txt | gawk -F ';' '{print $2}' >> temp/exclude-hosts.txt
echo "Removing excluded hosts from total hostlist"
gawk -f scripts/getzones.awk temp/hostlist_original_with_include.txt | grep -v -F -x -f temp/exclude-hosts.txt | sort -u > result/hostlist_zones.txt
if [[ "$RESOLVE_NXDOMAIN" == "yes" ]];
then
echo "Resolving NXDomain zones"
scripts/resolve-dns-nxdomain.py result/hostlist_zones.txt >> temp/exclude-hosts.txt
echo "NXDomain zones exclusion "
gawk -f scripts/getzones.awk temp/hostlist_original_with_include.txt | grep -v -F -x -f temp/exclude-hosts.txt | sort -u > result/hostlist_zones.txt
fi
# Generate a list of IP addresses
echo "Stage: Generate a list of IP addresses"
# echo "generating iplist_special_range.txt"
# gawk -F';' '$1 ~ /\// {print $1}' temp/list.csv | grep -P '([0-9]{1,3}\.){3}[0-9]{1,3}\/[0-9]{1,2}' -o | sort -Vu > result/iplist_special_range.txt
#
# echo "generating iplist_all.txt"
# gawk -F ';' '($1 ~ /^([0-9]{1,3}\.){3}[0-9]{1,3}/) {gsub(/\|/, RS, $1); print $1}' temp/list.csv | \
# gawk '/^([0-9]{1,3}\.){3}[0-9]{1,3}$/' | sort -u > result/iplist_all.txt
#
# echo "generating iplist_blockedbyip.txt"
# gawk -F ';' '($1 ~ /^([0-9]{1,3}\.){3}[0-9]{1,3}/) && (($2 == "" && $3 == "") || ($1 == $2)) {gsub(/\|/, RS); print $1}' temp/list.csv | \
# gawk '/^([0-9]{1,3}\.){3}[0-9]{1,3}$/' | sort -u > result/iplist_blockedbyip.txt
#
# echo "generating iplist_blockedbyip_noid2971.txt"
# grep -F -v '33-4/2018' temp/list.csv | grep -F -v '33а-5536/2019' | \
# gawk -F ';' '($1 ~ /^([0-9]{1,3}\.){3}[0-9]{1,3}/) && (($2 == "" && $3 == "") || ($1 == $2)) {gsub(/\|/, RS); print $1}' | \
# gawk '/^([0-9]{1,3}\.){3}[0-9]{1,3}$/' | sort -u > result/iplist_blockedbyip_noid2971.txt
echo "generating blocked-ranges.txt"
gawk -F ';' '$1 ~ /\// {print $1}' temp/list.csv | egrep -o '([0-9]{1,3}\.){3}[0-9]{1,3}\/[0-9]{1,2}' | sort -u > result/blocked-ranges.txt
# Generate dnsmasq aliases
echo "Generating dnsmasq-ipset configuration"
echo -n > result/dnsmasq-ipset.conf
while read -r line
do
echo "server=/$line/127.0.0.1" >> result/dnsmasq-ipset.conf
echo "ipset=/$line/vpn_ipsum" >> result/dnsmasq-ipset.conf
done < result/hostlist_zones.txt
# Print results
echo "Blocked domains: $(wc -l result/hostlist_zones.txt)" >&2
echo "iplist_all: $(wc -l result/iplist_all.txt)" >&2
echo "iplist_special_range: $(wc -l result/iplist_special_range.txt)" >&2
echo "iplist_blockedbyip: $(wc -l result/iplist_blockedbyip.txt)" >&2
echo "iplist_blockedbyip_noid2971: $(wc -l result/iplist_blockedbyip_noid2971.txt)" >&2
exit 0