go through down sites faster

This commit is contained in:
xfnw 2020-12-15 12:31:20 -05:00
parent 6b7002417d
commit be5b250319

10
urls.sh
View file

@ -1,9 +1,15 @@
wget --spider --force-html -r -l1 -H -U 'searplbot/1.0' $@ 2>&1 | tee wg
wget --spider --force-html --tries 1 --timeout 1 -r -l1 -H -U 'searplbot/1.0' $@ 2>&1 | tee -a wg
grep '^--' wg | awk '{ print $3 }' \
| grep -v '\.\(css\|js\|png\|gif\|jpg\|txt\|ico\|ttf\|svg\)$' \
| tee ur
| tee -a ur
rm wg
sleep 10
php crawl.php $(cat ur | shuf)
rm ur