diff --git a/crawl.php b/crawl.php index ce55148..d19e0a4 100644 --- a/crawl.php +++ b/crawl.php @@ -5,7 +5,15 @@ error_reporting(E_ALL); $db = new PDO("sqlite:db.sqlite"); - +$context = stream_context_create( + array( + 'http' => array( + 'follow_location' => false, + 'timeout' => 2, + 'user_agent' => 'searplbot/1.0' + ) + ) +); $db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING ); //$stmt = $db->prepare($sql); @@ -29,14 +37,13 @@ array_shift($arg); foreach ($arg as $url) { echo "\n"; - $url = preg_replace('/\/$/','',$url); echo $url."\n"; $stmt = $db->prepare('DELETE FROM indexed WHERE url = ?'); - $stmt->execute([$url]); + $stmt->execute([htmlspecialchars(htmlspecialchars_decode($url))]); - $file = file_get_contents($url); - if (!$file) + $file = file_get_contents($url, false, $context); + if (!$file || strpos($http_response_header[0],'200 OK') === false) continue; $title = page_title($file); $document = preg_replace('/[ \t]+/', ' ', preg_replace('/[\r\n]+/', " ", strip_tags(preg_replace('/<(script|style)>(.*)<\/\1>/siU', ' ',$file)))); @@ -48,5 +55,5 @@ foreach ($arg as $url) { echo "title: ".$title."\n"; $stmt = $db->prepare('INSERT INTO indexed (title, url, content) VALUES (?, ?, ?)'); - $stmt->execute([$title, $url, $document]); + $stmt->execute([htmlspecialchars(htmlspecialchars_decode($title)), htmlspecialchars(htmlspecialchars_decode($url)), htmlspecialchars(htmlspecialchars_decode($document))]); } diff --git a/create.sql b/create.sql index f1d23b7..f215edf 100644 --- a/create.sql +++ b/create.sql @@ -1,2 +1,2 @@ -CREATE TABLE indexed (id INTEGER PRIMARY KEY, title VARCHAR(255), url VARCHAR(512) UNIQUE, content TEXT) +CREATE VIRTUAL TABLE indexed USING FTS5(title, url, content); diff --git a/index.php b/index.php index a0902bc..0b6f2f3 100644 --- a/index.php +++ b/index.php @@ -26,62 +26,27 @@ $db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING ); if (isset($_GET['q']) && preg_replace('/\s+/', '', $_GET['q']) != '') { - $sql = 'SELECT * FROM indexed WHERE 1=1'; - - $terms = explode(' ', trim(preg_replace('/\s+/', ' ', $_GET['q']))); - $params = array(); - foreach ($terms as $term) { - if (substr($term, 0, 1) == '-') { - - $sql = $sql . ' AND content NOT LIKE ?'; - array_push($params,'%'.substr($term,1).'%'); - } else { - - $sql = $sql . ' AND content LIKE ?'; - array_push($params,'%'.$term.'%'); - } - } - $sql = $sql . ';'; + $sql = "SELECT title,url,snippet(indexed,2,'','','...',15) as snippet FROM indexed WHERE indexed MATCH ? ORDER BY bm25(indexed,2,2,1)"; + $params = [$_GET['q']]; $stmt = $db->prepare($sql); + + set_error_handler(function ($_,$msg) {echo '