php 模仿蜘蛛抓取内容并分析

header(“Content-Type:text/html;charset=gbk”);
message=_POST[‘message’];
contents = @file_get_contents(“message”);
if(contents==”Forbidden”){ch = curl_init();
timeout = 5;
curl_setopt (
ch, CURLOPT_URL, “message”);
curl_setopt (
ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt (ch, CURLOPT_USERAGENT, “Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)”);
curl_setopt (
ch, CURLOPT_CONNECTTIMEOUT, timeout);contents = curl_exec(ch);
curl_close(
ch);
}
preg_match_all(“/<title(.*?)</title>/is”,contents,title);
preg_match_all(“/<meta(.*?)>/is”,contents,meta);
preg_match_all(“/<body(.*?)body>/is”,contents,body);
echo ‘title:’.strip_tags(title[0][0]).'<br><br>’;
for(
i=0;i<count(meta[0]);i++){
if(preg_match(“/keywords/i”,
meta[0][i])){
preg_match_all(“/content=”(.*?)”/is”,
meta[0][i],keywords);

}
if(preg_match(“/description/i”,meta[0][i])){
preg_match_all(“/content=”(.*?)”/is”,meta[0][i],description);

}
}
echo ‘keywords:’.strip_tags(keywords[1][0]).'<br><br>’;
echo ‘description:’.strip_tags(description[1][0]).'<br><br>’;
echo ‘body:’.strip_tags(
body[0][0]);
?>