2020-12-15 17:31:20 +00:00
|
|
|
wget --spider --force-html --tries 1 --timeout 1 -r -l1 -H -U 'searplbot/1.0' $@ 2>&1 | tee -a wg
|
2020-12-15 15:17:28 +00:00
|
|
|
|
|
|
|
grep '^--' wg | awk '{ print $3 }' \
|
|
|
|
| grep -v '\.\(css\|js\|png\|gif\|jpg\|txt\|ico\|ttf\|svg\)$' \
|
2021-01-01 02:05:50 +00:00
|
|
|
| sort | uniq \
|
2020-12-15 17:31:20 +00:00
|
|
|
| tee -a ur
|
|
|
|
|
|
|
|
rm wg
|
2020-12-15 15:17:28 +00:00
|
|
|
|
|
|
|
sleep 10
|
|
|
|
|
2020-12-15 15:59:23 +00:00
|
|
|
php crawl.php $(cat ur | shuf)
|
2020-12-15 17:31:20 +00:00
|
|
|
|
|
|
|
rm ur
|
|
|
|
|
|
|
|
|