|
正则表达式中的search && findall && finditer 区别:
import sys;
import re;
if __name__ == '__main__':
strVal = '''<a href="http://www.andylin02.com" target="_blank" class="dsdfv">aaaa</a>
<a href="http://www.congfeng.com" target="_blank" class="tdsfv">bbbbsdf</a>
<a href="http://www.st.com" target="_blank" class="txx">ccccccc</a>
<a href="http://www.qs.com" target="_blank" class="xxx">ddddd</a>
''';
print(strVal);
strPattern = r"(<\s*a\s.*href\s*=.*)";
#search
mtReSearch = re.search(strPattern, strVal);
print("======================= search result =======================");
print("%s ==> %d" %(mtReSearch.groups(), len(mtReSearch.groups()) ));
#findall
lsFind = re.findall(strPattern, strVal);
print("\n===================== findall result ========================");
print("%s ==> %d" %(lsFind, len(lsFind)));
#finditer
print("\n===================== finditer result =======================")
for mtFind in re.finditer(strPattern, strVal):
print(mtFind.groups());
print("------");
结果:
<a href="http://www.andylin02.com" target="_blank" class="dsdfv">aaaa</a>
<a href="http://www.congfeng.com" target="_blank" class="tdsfv">bbbbsdf</a>
<a href="http://www.st.com" target="_blank" class="txx">ccccccc</a>
<a href="http://www.qs.com" target="_blank" class="xxx">ddddd</a>
======================= search result =======================
('<a href="http://www.andylin02.com" target="_blank" class="dsdfv">aaaa</a>',) ==> 1
===================== findall result ========================
['<a href="http://www.andylin02.com" target="_blank" class="dsdfv">aaaa</a>', '<a href="http://www.congfeng.com" target="_blank" class="tdsfv">bbbbsdf</a>', '<a href="http://www.st.com" target="_blank" class="txx">ccccccc</a>', '<a href="http://www.qs.com" target="_blank" class="xxx">ddddd</a> '] ==> 4
===================== finditer result =======================
('<a href="http://www.andylin02.com" target="_blank" class="dsdfv">aaaa</a>',)
------
('<a href="http://www.congfeng.com" target="_blank" class="tdsfv">bbbbsdf</a>',)
------
('<a href="http://www.st.com" target="_blank" class="txx">ccccccc</a>',)
------
('<a href="http://www.qs.com" target="_blank" class="xxx">ddddd</a> ',)
------ |
|
|