搜诶符合你 发表于 2018-8-4 13:59:25

Python 用HTMLParser解析HTML文件

from HTMLParser import HTMLParser  

  
class MyHTMLParser(HTMLParser):
  
def __init__(self):
  
HTMLParser.__init__(self)
  
self.links = []
  

  
def handle_starttag(self, tag, attrs):
  
#print "Encountered the beginning of a %s tag" % tag
  
if tag == "a":
  
if len(attrs) == 0: pass
  
else:
  
for (variable, value)in attrs:
  
if variable == "href":
  
self.links.append(value)
  

  
if __name__ == "__main__":
  
html_code = """
  <a href=&quot;www.google.com&quot;> google.com</a>
  <A Href=&quot;www.pythonclub.org&quot;> PythonClub </a>
  <A HREF = &quot;www.sina.com.cn&quot;> Sina </a>
  &quot;&quot;&quot;
  
hp = MyHTMLParser()
  
hp.feed(html_code)
  
hp.close()
  
print(hp.links)
页: [1]
查看完整版本: Python 用HTMLParser解析HTML文件