searpl/urls.sh

17 lines
322 B
Bash
Raw Normal View History

2021-10-20 19:19:56 +00:00
wget -w 2 --random-wait --spider --force-html --tries 1 --timeout 2 -r -l1 -H -U 'searplbot/1.0' $@ 2>&1 | tee -a wg
2020-12-15 15:17:28 +00:00
grep '^--' wg | awk '{ print $3 }' \
| grep -v '\.\(css\|js\|png\|gif\|jpg\|txt\|ico\|ttf\|svg\|rss\|atom\)$' \
| sort | uniq \
2020-12-15 17:31:20 +00:00
| tee -a ur
rm wg
2020-12-15 15:17:28 +00:00
sleep 10
2020-12-15 15:59:23 +00:00
php crawl.php $(cat ur | shuf)
2020-12-15 17:31:20 +00:00
rm ur