go through down sites faster
This commit is contained in:
parent
6b7002417d
commit
be5b250319
1 changed files with 8 additions and 2 deletions
10
urls.sh
10
urls.sh
|
@ -1,9 +1,15 @@
|
||||||
wget --spider --force-html -r -l1 -H -U 'searplbot/1.0' $@ 2>&1 | tee wg
|
wget --spider --force-html --tries 1 --timeout 1 -r -l1 -H -U 'searplbot/1.0' $@ 2>&1 | tee -a wg
|
||||||
|
|
||||||
grep '^--' wg | awk '{ print $3 }' \
|
grep '^--' wg | awk '{ print $3 }' \
|
||||||
| grep -v '\.\(css\|js\|png\|gif\|jpg\|txt\|ico\|ttf\|svg\)$' \
|
| grep -v '\.\(css\|js\|png\|gif\|jpg\|txt\|ico\|ttf\|svg\)$' \
|
||||||
| tee ur
|
| tee -a ur
|
||||||
|
|
||||||
|
rm wg
|
||||||
|
|
||||||
sleep 10
|
sleep 10
|
||||||
|
|
||||||
php crawl.php $(cat ur | shuf)
|
php crawl.php $(cat ur | shuf)
|
||||||
|
|
||||||
|
rm ur
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue