unique urls

This commit is contained in:
xfnw 2020-12-14 17:22:28 -05:00
parent 69de9f49dc
commit c62b48ae1f
2 changed files with 3 additions and 3 deletions

View file

@ -1,2 +1,2 @@
CREATE TABLE indexed (id INTEGER PRIMARY KEY, title VARCHAR(255), url VARCHAR(512), content TEXT)
CREATE TABLE indexed (id INTEGER PRIMARY KEY, title VARCHAR(255), url VARCHAR(512) UNIQUE, content TEXT)

View file

@ -1,3 +1,3 @@
wget --spider --force-html -r -l2 -H $@ 2>&1 \
wget --spider --force-html -r -l1 -H $@ 2>&1 \
| grep '^--' | awk '{ print $3 }' \
| grep -v '\.\(css\|js\|png\|gif\|jpg\)$'
| grep -v '\.\(css\|js\|png\|gif\|jpg\|txt\|ico\|ttf\)$'