better crawl.php logging
This commit is contained in:
parent
3fdad57505
commit
459c295488
1 changed files with 6 additions and 1 deletions
|
@ -28,14 +28,19 @@ $arg = $argv;
|
|||
array_shift($arg);
|
||||
|
||||
foreach ($arg as $url) {
|
||||
echo "\n\n";
|
||||
$url = preg_replace('/\/$/','',$url);
|
||||
echo $url."\n";
|
||||
$file = file_get_contents($url);
|
||||
if (!$file)
|
||||
continue;
|
||||
$title = page_title($file);
|
||||
$document = preg_replace('/[ \t]+/', ' ', preg_replace('/[\r\n]+/', "", strip_tags($file)));
|
||||
if (!$title || !$document)
|
||||
if (!$title || !$document) {
|
||||
echo "no title!\n";
|
||||
continue;
|
||||
}
|
||||
|
||||
echo $title;
|
||||
echo $document;
|
||||
|
||||
|
|
Loading…
Reference in a new issue