From be5b2503194d4395ff971ce77af3e72d8ee06704 Mon Sep 17 00:00:00 2001 From: xfnw Date: Tue, 15 Dec 2020 12:31:20 -0500 Subject: [PATCH] go through down sites faster --- urls.sh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/urls.sh b/urls.sh index acd0229..0494843 100755 --- a/urls.sh +++ b/urls.sh @@ -1,9 +1,15 @@ -wget --spider --force-html -r -l1 -H -U 'searplbot/1.0' $@ 2>&1 | tee wg +wget --spider --force-html --tries 1 --timeout 1 -r -l1 -H -U 'searplbot/1.0' $@ 2>&1 | tee -a wg grep '^--' wg | awk '{ print $3 }' \ | grep -v '\.\(css\|js\|png\|gif\|jpg\|txt\|ico\|ttf\|svg\)$' \ - | tee ur + | tee -a ur + +rm wg sleep 10 php crawl.php $(cat ur | shuf) + +rm ur + +