better crawl.php logging
This commit is contained in:
parent
3fdad57505
commit
459c295488
1 changed files with 6 additions and 1 deletions
|
@ -28,14 +28,19 @@ $arg = $argv;
|
||||||
array_shift($arg);
|
array_shift($arg);
|
||||||
|
|
||||||
foreach ($arg as $url) {
|
foreach ($arg as $url) {
|
||||||
|
echo "\n\n";
|
||||||
$url = preg_replace('/\/$/','',$url);
|
$url = preg_replace('/\/$/','',$url);
|
||||||
|
echo $url."\n";
|
||||||
$file = file_get_contents($url);
|
$file = file_get_contents($url);
|
||||||
if (!$file)
|
if (!$file)
|
||||||
continue;
|
continue;
|
||||||
$title = page_title($file);
|
$title = page_title($file);
|
||||||
$document = preg_replace('/[ \t]+/', ' ', preg_replace('/[\r\n]+/', "", strip_tags($file)));
|
$document = preg_replace('/[ \t]+/', ' ', preg_replace('/[\r\n]+/', "", strip_tags($file)));
|
||||||
if (!$title || !$document)
|
if (!$title || !$document) {
|
||||||
|
echo "no title!\n";
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
echo $title;
|
echo $title;
|
||||||
echo $document;
|
echo $document;
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue