mirror of
https://github.com/bol-van/zapret.git
synced 2025-01-23 01:40:40 +00:00
ipset: use awk instead of grep to avoid wrong ip matches
This commit is contained in:
parent
2bf47f4fff
commit
8adf03603c
@ -26,7 +26,7 @@ file_extract_lines()
|
||||
# $2 - from line (starting with 0)
|
||||
# $3 - line count
|
||||
# awk "{ err=1 } NR < $(($2+1)) { next } { print; err=0 } NR == $(($2+$3)) { exit err } END {exit err}" "$1"
|
||||
awk "NR < $(($2+1)) { next } { print } NR == $(($2+$3)) { exit }" "$1"
|
||||
$AWK "NR < $(($2+1)) { next } { print } NR == $(($2+$3)) { exit }" "$1"
|
||||
}
|
||||
ipset_restore_chunked()
|
||||
{
|
||||
@ -160,7 +160,7 @@ elif exists ipset; then
|
||||
# only /tmp is considered tmpfs. other locations mean tmpdir was redirected to a disk
|
||||
SAVERAM=0
|
||||
[ "$TMPDIR" = "/tmp" ] && {
|
||||
RAMSIZE=$($GREP MemTotal /proc/meminfo | awk '{print $2}')
|
||||
RAMSIZE=$($GREP MemTotal /proc/meminfo | $AWK '{print $2}')
|
||||
[ "$RAMSIZE" -lt "110000" ] && SAVERAM=1
|
||||
}
|
||||
print_reloading_backend ipset
|
||||
|
19
ipset/def.sh
19
ipset/def.sh
@ -60,6 +60,12 @@ else
|
||||
GREP=$(which grep)
|
||||
fi
|
||||
|
||||
# GNU awk is faster
|
||||
if exists gawk; then
|
||||
AWK=gawk
|
||||
else
|
||||
AWK=awk
|
||||
fi
|
||||
|
||||
grep_supports_b()
|
||||
{
|
||||
@ -68,17 +74,16 @@ grep_supports_b()
|
||||
}
|
||||
get_ip_regex()
|
||||
{
|
||||
REG_IPV4='((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(/([0-9]|[12][0-9]|3[012]))?'
|
||||
REG_IPV6='[0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}|:)+(/([0-9][0-9]?|1[01][0-9]|12[0-8]))?'
|
||||
REG_IPV4='((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\/([0-9]|[12][0-9]|3[012]))?'
|
||||
REG_IPV6='[0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}|:)+(\/([0-9][0-9]?|1[01][0-9]|12[0-8]))?'
|
||||
# good but too slow
|
||||
# REG_IPV6='([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}(/[0-9]+)?|([0-9a-fA-F]{1,4}:){1,7}:(/[0-9]+)?|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}(/[0-9]+)?|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}(/[0-9]+)?|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}(/[0-9]+)?|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}(/[0-9]+)?|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}(/[0-9]+)?|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})(/[0-9]+)?|:((:[0-9a-fA-F]{1,4}){1,7}|:)(/([0-9][0-9]?|1[01][0-9]|12[0-8]))?'
|
||||
grep_supports_b && {
|
||||
REG_IPV4="\b$REG_IPV4\b"
|
||||
REG_IPV6="\b$REG_IPV6\b"
|
||||
}
|
||||
# grep_supports_b && {
|
||||
# REG_IPV4="\b$REG_IPV4\b"
|
||||
# REG_IPV6="\b$REG_IPV6\b"
|
||||
# }
|
||||
}
|
||||
|
||||
|
||||
ip2net4()
|
||||
{
|
||||
if [ -x "$IP2NET" ]; then
|
||||
|
@ -9,6 +9,11 @@ ZREESTR="$TMPDIR/reestr.txt"
|
||||
#ZURL_REESTR=https://reestr.rublacklist.net/api/current
|
||||
ZURL_REESTR=https://raw.githubusercontent.com/zapret-info/z-i/master/dump.csv
|
||||
|
||||
awkgrep()
|
||||
{
|
||||
# $1 - pattern
|
||||
nice -n 5 $AWK "{while ( match(\$0,/($1[ |;])/) ) { print substr(\$0,RSTART,RLENGTH-1); \$0=substr(\$0,RSTART+RLENGTH) } }"
|
||||
}
|
||||
|
||||
dig_reestr()
|
||||
{
|
||||
@ -25,12 +30,12 @@ dig_reestr()
|
||||
# find entries with https or without domain name - they should be banned by IP
|
||||
# 2971-18 is TELEGRAM. lots of proxy IPs banned, list grows very large
|
||||
(nice -n 5 $GREP -avE "$DOMMASK" "$ZREESTR" ; $GREP -a "https://" "$ZREESTR") |
|
||||
nice -n 5 $GREP -oE "$1" | cut_local | sort -u >$TMP
|
||||
awkgrep "$1" | cut_local | sort -u >$TMP
|
||||
|
||||
ip2net$4 <"$TMP" | zz "$3"
|
||||
|
||||
# other IPs go to regular zapret list
|
||||
tail -n +2 "$ZREESTR" | nice -n 5 $GREP -oE "$1" | cut_local | nice -n 5 $GREP -xvFf "$TMP" | ip2net$4 | zz "$2"
|
||||
tail -n +2 "$ZREESTR" | awkgrep "$1" | cut_local | nice -n 5 $GREP -xvFf "$TMP" | ip2net$4 | zz "$2"
|
||||
|
||||
rm -f "$TMP"
|
||||
}
|
||||
|
@ -25,7 +25,7 @@ if test $dlsize -lt 204800; then
|
||||
echo list file is too small. can be bad.
|
||||
exit 2
|
||||
fi
|
||||
(LANG=C cut -s -f2 -d';' "$ZREESTR" | LANG=C sed -Ee 's/^\*\.(.+)$/\1/' -ne 's/^[a-z0-9A-Z._-]+$/&/p' | awk '{ print tolower($0) }' ; cat "$ZUSERLIST" ) | sort -u | zz "$ZHOSTLIST"
|
||||
(LANG=C cut -s -f2 -d';' "$ZREESTR" | LANG=C sed -Ee 's/^\*\.(.+)$/\1/' -ne 's/^[a-z0-9A-Z._-]+$/&/p' | $AWK '{ print tolower($0) }' ; cat "$ZUSERLIST" ) | sort -u | zz "$ZHOSTLIST"
|
||||
rm -f "$ZREESTR"
|
||||
|
||||
hup_zapret_daemons
|
||||
|
@ -10,6 +10,12 @@ ZREESTR="$TMPDIR/reestr.txt"
|
||||
ZURL_REESTR=https://raw.githubusercontent.com/zapret-info/z-i/master/dump.csv
|
||||
|
||||
|
||||
awkgrep()
|
||||
{
|
||||
# $1 - pattern
|
||||
nice -n 5 $AWK "{while ( match(\$0,/($1[ |;])/) ) { print substr(\$0,RSTART,RLENGTH-1); \$0=substr(\$0,RSTART+RLENGTH) } }"
|
||||
}
|
||||
|
||||
dig_reestr()
|
||||
{
|
||||
# $1 - grep ipmask
|
||||
@ -18,10 +24,9 @@ dig_reestr()
|
||||
|
||||
echo processing reestr list $2
|
||||
|
||||
tail -n +2 "$ZREESTR" | nice -n 5 $GREP -oE "$1" | cut_local | ip2net$3 | zz "$2"
|
||||
tail -n +2 "$ZREESTR" | awkgrep "$1" | cut_local | ip2net$3 | zz "$2"
|
||||
}
|
||||
|
||||
|
||||
getuser && {
|
||||
# assume all https banned by ip
|
||||
curl -k --fail --max-time 600 --connect-timeout 5 --retry 3 --max-filesize 251658240 "$ZURL_REESTR" -o "$ZREESTR" ||
|
||||
|
Loading…
Reference in New Issue
Block a user