|
原文地址:http://www.peterbe.com/plog/uniqifiers-benchmark
Fastest way to uniqify a list in Python
Suppose you have a list in python that looks like this:
['a','b','a']
#orlikethis:
[1,2,2,2,3,4,5,6,6,6,6]
and you want to remove all duplicates so you get this result:
['a','b']
#or
[1,2,3,4,5,6]
from random import shuffle, randintimport refrom sets import Setdef f1(seq): # Raymond Hettinger# not order preservingset = {}map(set.__setitem__, seq, [])return set.keys()def f2(seq): # *********# order preservingchecked = []for e in seq:if e not in checked:checked.append(e)return checkeddef f3(seq):# Not order preservingkeys = {}for e in seq:keys[e] = 1return keys.keys()def f4(seq): # ********** order preservingnoDupes = [][noDupes.append(i) for i in seq if not noDupes.count(i)]return noDupesdef f5(seq, idfun=None): # Alex Martelli ******* order preservingif idfun is None:def idfun(x): return xseen = {}result = []for item in seq:marker = idfun(item)# in old Python versions:# if seen.has_key(marker)# but in new ones:if marker in seen: continueseen[marker] = 1result.append(item)return resultdef f5b(seq, idfun=None): # Alex Martelli ******* order preservingif idfun is None:def idfun(x): return xseen = {}result = []for item in seq:marker = idfun(item)# in old Python versions:# if seen.has_key(marker)# but in new ones:if marker not in seen:seen[marker] = 1result.append(item)return resultdef f6(seq):# Not order preservingreturn list(Set(seq))def f7(seq):# Not order preservingreturn list(set(seq))def f8(seq): # Dave Kirby# Order preservingseen = set()return [x for x in seq if x not in seen and not seen.add(x)]def f9(seq):# Not order preservingreturn {}.fromkeys(seq).keys()def f10(seq, idfun=None): # Andrew Dalke# Order preservingreturn list(_f10(seq, idfun))def _f10(seq, idfun=None):seen = set()if idfun is None:for x in seq:if x in seen:continueseen.add(x)yield xelse:for x in seq:x = idfun(x)if x in seen:continueseen.add(x)yield xdef f11(seq): # f10 but simpler# Order preservingreturn list(_f10(seq))def _f11(seq):seen = set()for x in seq:if x in seen:continueseen.add(x)yield ximport timedef timing(f, n, a):print f.__name__,r = range(n)t1 = time.clock()for i in r:f(a); f(a); f(a); f(a); f(a); f(a); f(a); f(a); f(a); f(a)t2 = time.clock()print round(t2-t1, 3)def getRandomString(length=10, loweronly=1, numbersonly=0,lettersonly=0):""" return a very random string """_letters = 'abcdefghijklmnopqrstuvwxyz'if numbersonly:l = list('0123456789')elif lettersonly:l = list(_letters + _letters.upper())else:lowercase = _letters+'0123456789'*2l = list(lowercase + lowercase.upper())shuffle(l)s = ''.join(l)if len(s) < length:s = s + getRandomString(loweronly=1)s = s[:length]if loweronly:return s.lower()else:return stestdata = {}for i in range(35):k = getRandomString(5, lettersonly=1)v = getRandomString(100 )testdata[k] = vtestdata = [int(x) for x in list('21354612')]testdata += list('abcceeaa5efm')class X:def __init__(self, n):self.foo = ndef __repr__(self):return "<foo %r>"%self.foodef __cmp__(self, e):return cmp(self.foo, e.foo)testdata = []for i in range(10000):testdata.append(getRandomString(3, loweronly=True))#testdata = ['f','g','c','d','b','a','a']order_preserving = f2, f4, f5, f5b, f8, f10, f11order_preserving = f5, f5b, f8, f10, f11not_order_preserving = f1, f3, f6, f7, f9testfuncs = order_preserving + not_order_preservingfor f in testfuncs:if f in order_preserving:print "*",timing(f, 100, testdata) |
|