水莹儿 发表于 2017-5-4 09:17:29

python 面向对象入门

该博文涵盖了如下内容:
1.正则表达式 re
2.url库 urllib
3.debug 方法
4.面向对象封装方法
#encoding=utf-8
'''
python learn regular express
url : http://docs.python.org/library/re.html
parse html url : http://www.boddie.org.uk/python/HTML.html
author : liuzheng
'''
import re
import urllib
#分析javaeye blog 频道
class ParseHTML:
'''
parse html for infomation
parse javeeye page
'''
def __init__(self,url):
self.url = url
pass

#analyses html
def parse(self):
sock = urllib.urlopen(self.url)
html = sock.read()
self.__puts(html)
pass
#打印html 匹配数据
def __puts(self,html):
b =re.compile(r"<a href='([\w./:\\]+?)'[\s]*title=([^<>]+?)[\s]*target=([^<>]+?)>([^<>]+?)</a>",re.I)
m = re.findall(b,html)
#这里有encode 问题?,不知道,大家是否可以帮忙解答
printm

if __name__ == '__main__':
url = "http://www.iteye.com/blogs"
p = ParseHTML(url)
p.parse()

if __debug__:
print "debuging is %s" %__debug__
print "regular" + "* "* 30
#math
str = "800-820-8800"
m = re.match(r"(\d{3})-(\d{3})-(\d{4})", str)
print "result : " ,m.groups()
#split
print "split : %s" % re.split('\W', 'Words, words, words.')
#findall
text = "He was carefully disguised but captured quickly by police."
print "findall:%s" % re.findall(r"\w+ly",text)
#sub
text = "hello world!"
print "sub:%s" % re.sub(r"\s+","--",text)
页: [1]
查看完整版本: python 面向对象入门