| 1 |
#!/bin/bash |
| 2 |
set -e |
| 3 |
|
| 4 |
#tmp=`pgrep checkdomain | wc -l` |
| 5 |
#(( tmp > 1 )) && echo something is already going on && pgrep -a checkdomain && exit 1 |
| 6 |
#unset tmp |
| 7 |
echo |
| 8 |
pgrep -a checkdomain |
| 9 |
echo |
| 10 |
echo press enter if you do not see more than one of those |
| 11 |
read |
| 12 |
|
| 13 |
#I just home multiple background process have their different env and the variables |
| 14 |
#are not overlapping from one another. The background processes here are showing |
| 15 |
#up as checkdomain.bash so it seems the fork is good enough |
| 16 |
function secondlevel { |
| 17 |
[[ -z $1 ]] && echo function $0 requires file argument && exit 1 |
| 18 |
|
| 19 |
echo writing to $1.domains |
| 20 |
#assuming there is no leading dot hence . means it is second-level |
| 21 |
#we keep the original FQDN in $1 and do NOT want to appear again |
| 22 |
#be it a domain.tld PTR or whatever |
| 23 |
#TODO strip out diffs so there's no duplicates |
| 24 |
for domain in `cat $1`; do |
| 25 |
level=`echo $domain | tr -dc [.]` |
| 26 |
[[ -z $level ]] && echo Error domain $domain level noexist && exit 1 |
| 27 |
[[ $level = "" ]] && echo Error domain $domain level is empty && exit 1 |
| 28 |
|
| 29 |
#we're starting at one level down already, nothing happens thereafter |
| 30 |
#in case we're already on a domain.tld PTR, and we got those in $1 |
| 31 |
#anyways. in case we're on something.domain.tld, then domain.tld will |
| 32 |
#appear -- that is why we need a 2nd pass to eventually strip it back out |
| 33 |
[[ $level = . ]] && echo $domain |
| 34 |
until [[ $level = . ]]; do |
| 35 |
domain=${domain#*\.} |
| 36 |
level=`echo $domain | tr -dc [.]` |
| 37 |
|
| 38 |
#avoid re-writing the same parent-domain over and over again |
| 39 |
#this is only possible since we've got an ordered list already |
| 40 |
#dealing with domain levels 4, 3, 2 |
| 41 |
if [[ $level = ... ]]; then |
| 42 |
[[ $domain != $previousc ]] && echo $domain |
| 43 |
previousc=$domain |
| 44 |
fi |
| 45 |
if [[ $level = .. ]]; then |
| 46 |
[[ $domain != $previousb ]] && echo $domain |
| 47 |
previousb=$domain |
| 48 |
fi |
| 49 |
if [[ $level = . ]]; then |
| 50 |
[[ $domain != $previous ]] && echo $domain |
| 51 |
previous=$domain |
| 52 |
fi |
| 53 |
#expected output: |
| 54 |
#181-183-130-205.genericrev.telcel.net.ve |
| 55 |
#genericrev.telcel.net.ve |
| 56 |
#telcel.net.ve |
| 57 |
#net.ve |
| 58 |
#181-183-130-206.genericrev.telcel.net.ve |
| 59 |
#181-183-130-207.genericrev.telcel.net.ve |
| 60 |
#... |
| 61 |
done |
| 62 |
unset level |
| 63 |
done > $1.domains && echo wrote to $1.domains; unset domain |
| 64 |
unset previous previousb previousc |
| 65 |
} |
| 66 |
|
| 67 |
#[[ ! -f public_suffix_list.dat ]] && wget https://publicsuffix.org/list/public_suffix_list.dat |
| 68 |
#[[ ! -f public_suffix_list.dat ]] && echo Error could not fetch public_suffix_list.dat && exit 1 |
| 69 |
|
| 70 |
echo -n entering domains/ and cleaning-up... |
| 71 |
mkdir -p domains/ |
| 72 |
rm -f domains/* |
| 73 |
cd domains/ && echo done |
| 74 |
|
| 75 |
#getting rid of rev.sfr.net and others who have this terrible manner of dealing |
| 76 |
#with reverse dns naming |
| 77 |
echo -n writing to ptr.unique.nomadness while avoiding ip.subdomain.madness... |
| 78 |
sed -r '/^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+\./d' ../ptr.unique > ptr.unique.nomadness && echo done |
| 79 |
|
| 80 |
#preparing for domain identification |
| 81 |
echo -n splitting ptr.unique.nomadness into 7 files... |
| 82 |
split -a2 -d -nl/7 ptr.unique.nomadness ptr.unique.nomadness && echo done |
| 83 |
#ls -lhF ptr.unique.nomadness[0-9][0-9] |
| 84 |
|
| 85 |
#let's get second-level domains and deference anything that's above second-level |
| 86 |
#note that second-level public suffixes will be dealt with as an aftermath |
| 87 |
for f in ptr.unique.nomadness[0-9][0-9]; do |
| 88 |
secondlevel $f & |
| 89 |
done; unset f |
| 90 |
jobs |
| 91 |
cat <<EOF |
| 92 |
|
| 93 |
you may want to watch it live |
| 94 |
|
| 95 |
ps auxfww | grep checkdomain |
| 96 |
watch ls -lF domains/ |
| 97 |
|
| 98 |
this program will proceed once all processes are done |
| 99 |
|
| 100 |
EOF |
| 101 |
|
| 102 |
time wait |
| 103 |
|
| 104 |
echo writing to ptr.unique.nomadness.sld |
| 105 |
for sld in `cat ~/masspie/SLDs`; do |
| 106 |
grep -E "^[^.]+\.$sld$" ptr.unique.nomadness |
| 107 |
done > ptr.unique.nomadness.sld && echo done; unset sld |
| 108 |
|
| 109 |
#no need to uniq this one as we're doing one more shot of this sorting hereby |
| 110 |
#echo writing to ptr.unique.nomadness.sld.unique |
| 111 |
#sort -u ptr.unique.nomadness.sld > ptr.unique.nomadness.sld.unique && echo done |
| 112 |
|
| 113 |
echo -n merging back and sorting into domains.unique ... |
| 114 |
sort --version-sort -u ptr.unique.nomadness[0-9][0-9].domains ptr.unique.nomadness.sld > domains.unique && echo done |
| 115 |
|
| 116 |
#we don't need the following as we're going to deal with second-level+fix only |
| 117 |
#echo -n getting rid of duplicates between PTR FQDNs and last results into ptr.unique.nomadness.unique.wodupl ... |
| 118 |
#grep -vf ptr.unique ptr.unique.nomadness.unique > ptr.unique.nomadness.unique.wodupl && echo done |
| 119 |
#--> grep: memory exhausted |
| 120 |
#wc -l ptr.unique.nomadness.unique |
| 121 |
#wc -l ptr.unique.nomadness.unique.wodupl |
| 122 |
|