From 8c4421108b6b070a146ec2990d1f3e93dd7d5953 Mon Sep 17 00:00:00 2001 From: xfnw Date: Tue, 15 Dec 2020 09:59:19 -0500 Subject: [PATCH] dont track svgs and drop the / from the end of urls so they wont be duplicated --- crawl.php | 1 + urls.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/crawl.php b/crawl.php index b9ca7bd..72bdb04 100644 --- a/crawl.php +++ b/crawl.php @@ -28,6 +28,7 @@ $arg = $argv; array_shift($arg); foreach ($arg as $url) { + $url = preg_replace('/\/$/','',$url); $file = file_get_contents($url); if (!$file) continue; diff --git a/urls.sh b/urls.sh index fe0f851..1e26497 100755 --- a/urls.sh +++ b/urls.sh @@ -1,3 +1,3 @@ wget --spider --force-html -r -l1 -H $@ 2>&1 \ | grep '^--' | awk '{ print $3 }' \ - | grep -v '\.\(css\|js\|png\|gif\|jpg\|txt\|ico\|ttf\)$' + | grep -v '\.\(css\|js\|png\|gif\|jpg\|txt\|ico\|ttf\|svg\)$'