|
今天突然想知道每天爬行了网站些什么页面,爬了几次,但本人网站都是静态页面,而且空间商的记录要每天最设保留日志第二天才会保留。因些想看能不能通过伪静态的逆向,研究了半个小时左右弄出了这个东西来,部分代码是网上找的.能节省精力尽力节省麻。。。 php文件 本人设为bot.php
View Code
1 <?php
2 $useragent = addslashes(strtolower($_SERVER['HTTP_USER_AGENT']));
3 if (strpos($useragent, 'googlebot')!== false){$bot = 'Google';}
4 elseif (strpos($useragent,'mediapartners-google') !== false){$bot = 'Google Adsense';}
5 elseif (strpos($useragent,'baiduspider') !== false){$bot = 'Baidu';}
6 elseif (strpos($useragent,'sogou spider') !== false){$bot = 'Sogou';}
7 elseif (strpos($useragent,'sogou web') !== false){$bot = 'Sogou web';}
8 elseif (strpos($useragent,'sosospider') !== false){$bot = 'SOSO';}
9 elseif (strpos($useragent,'yahoo') !== false){$bot = 'Yahoo';}
10 elseif (strpos($useragent,'msn') !== false){$bot = 'MSN';}
11 elseif (strpos($useragent,'msnbot') !== false){$bot = 'msnbot';}
12 elseif (strpos($useragent,'sohu') !== false){$bot = 'Sohu';}
13 elseif (strpos($useragent,'yodaoBot') !== false){$bot = 'Yodao';}
14 elseif (strpos($useragent,'twiceler') !== false){$bot = 'Twiceler';}
15 elseif (strpos($useragent,'ia_archiver') !== false){$bot = 'Alexa_';}
16 elseif (strpos($useragent,'iaarchiver') !== false){$bot = 'Alexa';}
17 elseif (strpos($useragent,'slurp') !== false){$bot = '雅虎';}
18 elseif (strpos($useragent,'bot') !== false){$bot = '其它蜘蛛';}
19 if(isset($bot)){
20 $fp = @fopen('bot.txt','a');
21 fwrite($fp,date('Y-m-d H:i:s')."\t".$_SERVER["REMOTE_ADDR"]."\t".$bot."\t".'http://'.$_SERVER['SERVER_NAME'].$_SERVER["HTTP_X_REWRITE_URL"]."\r\n");
22 fclose($fp);
23 }
24 $file=".".$_SERVER[HTTP_X_REWRITE_URL];
25 $f_head=substr($file,-5);
26 if($f_head==".html")
27 {
28 if(file_exists($file))
29 {
30 echo file_get_contents($file);
31 }else
32 {
33
34 header('HTTP/1.1 404 Not Found');
35 header("status: 404 Not Found");
36
37 echo "该页面无法找到";
38
39 }
40
41 }
42 else
43 {
44 header('HTTP/1.1 404 Not Found');
45 header("status: 404 Not Found");
46 echo "该页面无法找到";
47
48
49 }
50 ?> 伪静态文件代码
1 [ISAPI_Rewrite]
2
3 # 3600 = 1 hour
4 CacheClockRate 3600
5
6 RepeatLimit 32
7
8 # Protect httpd.ini and httpd.parse.errors files
9 # from accessing through HTTP
10 RewriteRule /index.html /index.php
11 RewriteRule ^/article/(.*) /bot.php [L]
12 RewriteRule ^/list/(.*) /bot.php [L] |
|
|