This commit is contained in:
xfnw 2020-12-14 16:59:16 -05:00
commit 69de9f49dc
5 changed files with 192 additions and 0 deletions

65
assets/style.css Normal file
View file

@ -0,0 +1,65 @@
body {
background: #141415;
font-family: 'Liberation Mono', monospace;
color: #cdcdcd;
}
.preview, .box {
padding: 10px;
margin-top: 10px;
background-color: #222;
}
.wrapper {
max-width: 700px;
margin: 0 auto;
}
a {
color: #aac;
}
h1 a {
text-decoration: none;
color: #cdcdcd;
}
h1 a::before {
content: "#";
color: #aac;
margin-right: 10px;
}
h1 a:hover::before {
text-decoration: underline;
}
.preview h1 {
margin: 0;
padding: 0;
margin-bottom: 5px;
}
.search-container input[type=text] {
padding: 6px;
margin-top: 3px;
font-size: 17px;
color: #cdcdcd;
background: #333;
border: 0;
width: calc(100% - 60px);
}
.search-container button {
padding: 6px 10px;
margin-top: 3px;
background: #333;
float: right;
font-size: 17px;
border: none;
cursor: pointer;
color:#cdcdcd;
}

42
crawl.php Normal file
View file

@ -0,0 +1,42 @@
<?php
ini_set('display_errors', '1');
ini_set('display_startup_errors', '1');
error_reporting(E_ALL);
$db = new PDO("sqlite:db.sqlite");
$db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING );
//$stmt = $db->prepare($sql);
//$stmt->execute($params);
function page_title($fp) {
$res = preg_match("/<title>(.*)<\/title>/siU", $fp, $title_matches);
if (!$res)
return null;
// Clean up title: remove EOL's and excessive whitespace.
$title = preg_replace('/\s+/', ' ', $title_matches[1]);
$title = trim($title);
return $title;
}
$arg = $argv;
array_shift($arg);
foreach ($arg as $url) {
$file = file_get_contents($url);
if (!$file)
continue;
$title = page_title($file);
$document = preg_replace('/[ \t]+/', ' ', preg_replace('/[\r\n]+/', "", strip_tags($file)));
if (!$title || !$document)
continue;
echo $title;
echo $document;
$stmt = $db->prepare('INSERT INTO indexed (title, url, content) VALUES (?, ?, ?)');
$stmt->execute([$title, $url, $document]);
}

2
create.php Normal file
View file

@ -0,0 +1,2 @@
CREATE TABLE indexed (id INTEGER PRIMARY KEY, title VARCHAR(255), url VARCHAR(512), content TEXT)

80
index.php Normal file
View file

@ -0,0 +1,80 @@
<?php
ini_set('display_errors', '1');
ini_set('display_startup_errors', '1');
error_reporting(E_ALL);
?>
<!DOCTYPE HTML>
<html lang="en">
<link rel="stylesheet" type="text/css" href="assets/style.css">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css">
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="description" content="a search engine">
<title>searpl</title>
<div class='wrapper'>
<h1>searpl</h1>
<div class='box search-container'>
<form action="./">
<input type="text" placeholder="Search.." name="q" value="<?php if (isset($_GET['q'])) {echo htmlspecialchars($_GET['q']); } ?>">
<button type="submit"><i class="fa fa-search"></i></button>
</form>
</div>
<?php
if (isset($_GET['q']) && preg_replace('/\s+/', '', $_GET['q']) != '') {
$db = new PDO("sqlite:db.sqlite");
$sql = 'SELECT * FROM indexed WHERE 1=1';
$terms = explode(' ', preg_replace('/\s+/', '', $_GET['q']));
$params = array();
foreach ($terms as $term) {
if (substr($term, 0, 1) == '-') {
$sql = $sql . ' AND content NOT LIKE ?';
array_push($params,'%'.substr($term,1).'%');
} else {
$sql = $sql . ' AND content LIKE ?';
array_push($params,'%'.$term.'%');
}
}
$sql = $sql . ';';
$db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING );
$stmt = $db->prepare($sql);
$stmt->execute($params);
$results = false;
while ($row = $stmt->fetch()) {
$results = true;
?>
<div class='box'>
<a href="<?php echo htmlspecialchars($row['url']); ?>"><?php echo htmlspecialchars($row['title']); ?></a>
<br>
...<?php
$content = $row['content'];
foreach ($terms as $param) {
$pos = strpos($content, $param);
if ($pos !== false) {
echo htmlspecialchars(substr($content,$pos-50,50));
echo '<strong>'.htmlspecialchars($param).'</strong>';
echo htmlspecialchars(substr($content,$pos+strlen($param),50)).'...';
}
}
?>
</div>
<?php
}
if (!$results)
echo '<div class="box">No results.</div>';
}
?>
</div>

3
urls.sh Executable file
View file

@ -0,0 +1,3 @@
wget --spider --force-html -r -l2 -H $@ 2>&1 \
| grep '^--' | awk '{ print $3 }' \
| grep -v '\.\(css\|js\|png\|gif\|jpg\)$'