better crawl.php logging

This commit is contained in:
xfnw 2021-01-08 16:54:16 -05:00
parent 3fdad57505
commit 459c295488

View file

@ -28,14 +28,19 @@ $arg = $argv;
array_shift($arg); array_shift($arg);
foreach ($arg as $url) { foreach ($arg as $url) {
echo "\n\n";
$url = preg_replace('/\/$/','',$url); $url = preg_replace('/\/$/','',$url);
echo $url."\n";
$file = file_get_contents($url); $file = file_get_contents($url);
if (!$file) if (!$file)
continue; continue;
$title = page_title($file); $title = page_title($file);
$document = preg_replace('/[ \t]+/', ' ', preg_replace('/[\r\n]+/', "", strip_tags($file))); $document = preg_replace('/[ \t]+/', ' ', preg_replace('/[\r\n]+/', "", strip_tags($file)));
if (!$title || !$document) if (!$title || !$document) {
echo "no title!\n";
continue; continue;
}
echo $title; echo $title;
echo $document; echo $document;