|
########################################################################
### Counter
########################################################################
def _count_elements(mapping, iterable):
'Tally elements from the iterable.'
mapping_get = mapping.get
for elem in iterable:
mapping[elem] = mapping_get(elem, 0) + 1
try: # Load C helper function if available
from _collections import _count_elements
except ImportError:
pass
'''
如果C的帮助函数可用的话,则加载; (Python3新增)
'''
class Counter(dict):
'''
Dict子类用于计算哈希项,有时称为包或多集,元素存储为字典键,它们的计数存储为字典值;
>>> c = Counter('abcdeabcdabcaba') # count elements from a string
>>> c.most_common(3) # three most common elements
[('a', 5), ('b', 4), ('c', 3)]
>>> sorted(c) # list all unique elements
['a', 'b', 'c', 'd', 'e']
>>> ''.join(sorted(c.elements())) # list elements with repetitions
'aaaaabbbbcccdde'
>>> sum(c.values()) # total of all counts
15
>>> c['a'] # count of letter 'a'
5
>>> for elem in 'shazam': # update counts from an iterable
... c[elem] += 1 # by adding 1 to each element's count
>>> c['a'] # now there are seven 'a'
7
>>> del c['b'] # remove all 'b'
>>> c['b'] # now there are zero 'b'
0
>>> d = Counter('simsalabim') # make another counter
>>> c.update(d) # add in the second counter
>>> c['a'] # now there are nine 'a'
9
>>> c.clear() # empty the counter
>>> c
Counter()
Note: If a count is set to zero or reduced to zero, it will remain
in the counter until the entry is deleted or the counter is cleared:
>>> c = Counter('aaabbc')
>>> c['b'] -= 2 # reduce the count of 'b' by two
>>> c.most_common() # 'b' is still in, but its count is zero
[('a', 3), ('c', 1), ('b', 0)]
'''
# References:
# http://en.wikipedia.org/wiki/Multiset
# http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
# http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
# http://code.activestate.com/recipes/259174/
# Knuth, TAOCP Vol. II section 4.6.3
def __init__(*args, **kwds):
'''
创建一个新的空Counter对象,可对输入可迭代元素进行计数,也可以对另外一个元素映射过来的元素进行计数;
主要是先调用父类(dict)的初始化,然后使用update函数来更新参数;
>>> c = Counter() # a new, empty counter
>>> c = Counter('gallahad') # a new counter from an iterable
>>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
>>> c = Counter(a=4, b=2) # a new counter from keyword args
'''
if not args:
raise TypeError("descriptor '__init__' of 'Counter' object "
"needs an argument")
self, *args = args
if len(args) > 1:
raise TypeError('expected at most 1 arguments, got %d' % len(args))
super(Counter, self).__init__()
self.update(*args, **kwds)
def __missing__(self, key):
'The count of elements not in the Counter is zero.'
# Needed so that self[missing_item] does not raise KeyError
return 0
'''
对于不存在的元素,返回计数器为0;
总结一下就是dict本身没有这个方法,但是如果当前类为dict的子类的话;
会在缺失的情况下查看有没有实现__missing__方法,如果有的话,就返回__miss__方法的值;
所以Counter作为dict的子类实现了__missing__方法,在缺失的时候返回0;
这也就是为什么在Counter类中,如果找不到key,会返回0而不是产生一个KeyError;
例如:
>>> import collections
>>> c = collections.Counter('abbcc')
>>> c['a']
2
>>> c['b']
2
>>> c['d']
0
'''
def most_common(self, n=None):
'''List the n most common elements and their counts from the most
common to the least. If n is None, then list all element counts.
>>> Counter('abcdeabcdabcaba').most_common(3)
[('a', 5), ('b', 4), ('c', 3)]
'''
# Emulate Bag.sortedByCount from Smalltalk
if n is None:
return sorted(self.items(), key=_itemgetter(1), reverse=True)
return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
'''
数量从大到写排列,返回一个TopN列表,如果n没有被指定,则返回所有元素;
当多个元素计数值相同时,按照字母序排列;
例如:
>>> Counter('abcdeabcdabcaba').most_common(3)
[('a', 5), ('b', 4), ('c', 3)]
'''
def elements(self):
'''Iterator over elements repeating each as many times as its count.
>>> c = Counter('ABCABC')
>>> sorted(c.elements())
['A', 'A', 'B', 'B', 'C', 'C']
# Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
>>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
>>> product = 1
>>> for factor in prime_factors.elements(): # loop over factors
... product *= factor # and multiply them
>>> product
1836
Note, if an element's count has been set to zero or is a negative
number, elements() will ignore it.
'''
# Emulate Bag.do from Smalltalk and Multiset.begin from C++.
return _chain.from_iterable(_starmap(_repeat, self.items()))
'''
返回一个迭代器。元素被重复了多少次,在该迭代器中就包含多少个该元素;
所有元素按照字母序排序,个数小于1的元素不被包含;
注:此处非所有元素集合,而是包含所有元素集合的迭代器;
例如:
>>> import collections
>>> c = collections.Counter(a=4, b=2, c=0, d=-2)
>>> list(c.elements())
['a', 'a', 'a', 'a', 'b', 'b']
'''
# Override dict methods where necessary
@classmethod
def fromkeys(cls, iterable, v=None):
# There is no equivalent method for counters because setting v=1
# means that no element can have a count greater than one.
raise NotImplementedError(
'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
'''
未实现的类方法;
'''
def update(*args, **kwds):
'''Like dict.update() but add counts instead of replacing them.
Source can be an iterable, a dictionary, or another Counter instance.
>>> c = Counter('which')
>>> c.update('witch') # add elements from another iterable
>>> d = Counter('watch')
>>> c.update(d) # add elements from another counter
>>> c['h'] # four 'h' in which, witch, and watch
4
'''
# The regular dict.update() operation makes no sense here because the
# replace behavior results in the some of original untouched counts
# being mixed-in with all of the other counts for a mismash that
# doesn't have a straight-forward interpretation in most counting
# contexts. Instead, we implement straight-addition. Both the inputs
# and outputs are allowed to contain zero and negative counts.
if not args:
raise TypeError("descriptor 'update' of 'Counter' object "
"needs an argument")
self, *args = args
if len(args) > 1:
raise TypeError('expected at most 1 arguments, got %d' % len(args))
iterable = args[0] if args else None
if iterable is not None:
if isinstance(iterable, Mapping):
if self:
self_get = self.get
for elem, count in iterable.items():
self[elem] = count + self_get(elem, 0)
else:
super(Counter, self).update(iterable) # fast path when counter is empty
else:
_count_elements(self, iterable)
if kwds:
self.update(kwds)
'''
更新计数器,其实就是增加;如果原来没有,则新建,如果有则加一;
例如:
>>> from collections import Counter
>>> c = Counter('which')
>>> c
Counter({'h': 2, 'i': 1, 'c': 1, 'w': 1})
>>> c.update('witch')
>>> c
Counter({'h': 3, 'i': 2, 'c': 2, 'w': 2, 't': 1})
>>> c['h']
3
>>> d = Counter('watch')
>>> d
Counter({'a': 1, 'h': 1, 'c': 1, 't': 1, 'w': 1})
>>> c.update(d)
>>> c
Counter({'h': 4, 'c': 3, 'w': 3, 'i': 2, 't': 2, 'a': 1})
>>> c['h']
4
'''
def subtract(*args, **kwds):
'''Like dict.update() but subtracts counts instead of replacing them.
Counts can be reduced below zero. Both the inputs and outputs are
allowed to contain zero and negative counts.
Source can be an iterable, a dictionary, or another Counter instance.
>>> c = Counter('which')
>>> c.subtract('witch') # subtract elements from another iterable
>>> c.subtract(Counter('watch')) # subtract elements from another counter
>>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
0
>>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
-1
'''
if not args:
raise TypeError("descriptor 'subtract' of 'Counter' object "
"needs an argument")
self, *args = args
if len(args) > 1:
raise TypeError('expected at most 1 arguments, got %d' % len(args))
iterable = args[0] if args else None
if iterable is not None:
self_get = self.get
if isinstance(iterable, Mapping):
for elem, count in iterable.items():
self[elem] = self_get(elem, 0) - count
else:
for elem in iterable:
self[elem] = self_get(elem, 0) - 1
if kwds:
self.subtract(kwds)
'''
相减,原来的计数器中的每一个元素的数量减去后添加的元素的数量;
例如:
>>> from collections import Counter
>>> c = Counter('which')
>>> c.subtract('witch')
>>> c
Counter({'h': 1, 'i': 0, 'c': 0, 'w': 0, 't': -1})
>>> c['h']
1
>>> d = Counter('watch')
>>> c.subtract(d)
>>> c['a']
-1
'''
def copy(self):
'Return a shallow copy.'
return self.__class__(self)
'''
浅拷贝;
例如:
>>> from collections import Counter
>>> c = Counter('abcdcba')
>>> c
Counter({'a': 2, 'c': 2, 'b': 2, 'd': 1})
>>> d = c.copy()
>>> d
Counter({'a': 2, 'c': 2, 'b': 2, 'd': 1})
'''
def __reduce__(self):
return self.__class__, (dict(self),)
'''
返回一个元组(类型,元组);
例如:
>>> c = Counter('abcdcba')
>>> c.__reduce__()
(<class 'collections.Counter'>, ({'a': 2, 'c': 2, 'b': 2, 'd': 1},))
>>> d = c.__reduce__()
>>> type(d)
<type 'tuple'>
'''
def __delitem__(self, elem):
'Like dict.__delitem__() but does not raise KeyError for missing values.'
if elem in self:
super().__delitem__(elem)
'''
删除元素,等同于del;
本质上就是一个不抛出KeyError的dict类的__delitem()__;
>>> c = Counter('abcdcba')
>>> c
Counter({'a': 2, 'c': 2, 'b': 2, 'd': 1})
>>> c['b'] = 0
>>> c
Counter({'a': 2, 'c': 2, 'd': 1, 'b': 0})
>>> c.__delitem__('a')
>>> c
Counter({'c': 2, 'd': 1, 'b': 0})
>>> del c['b']
>>> c
Counter({'c': 2, 'd': 1})
'''
def __repr__(self):
if not self:
return '%s()' % self.__class__.__name__
try:
items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
return '%s({%s})' % (self.__class__.__name__, items)
except TypeError:
# handle case where values are not orderable
return '{0}({1!r})'.format(self.__class__.__name__, dict(self))
'''
如果没有对象就返回类的名字,否则返回类的名字并且返回利用most_common()方法得到类中的信息;
例如:
>>> from collections import Counter
>>> c = Counter('aabbccdd')
>>> c.__repr__()
"Counter({'a': 2, 'c': 2, 'b': 2, 'd': 2})"
>>> c = Counter()
>>> c.__repr__()
'Counter()'
'''
# Multiset-style mathematical operations discussed in:
# Knuth TAOCP Volume II section 4.6.3 exercise 19
# and at http://en.wikipedia.org/wiki/Multiset
#
# Outputs guaranteed to only include positive counts.
#
# To strip negative and zero counts, add-in an empty counter:
# c += Counter()
def __add__(self, other):
'''Add counts from two counters.
>>> Counter('abbb') + Counter('bcc')
Counter({'b': 4, 'c': 2, 'a': 1})
'''
if not isinstance(other, Counter):
return NotImplemented
result = Counter()
for elem, count in self.items():
newcount = count + other[elem]
if newcount > 0:
result[elem] = newcount
for elem, count in other.items():
if elem not in self and count > 0:
result[elem] = count
return result
'''
加法运算,相当于+,结果中只会出现计数count大于0的元素;
例如:
>>> c1 = Counter({'a':0,'b':1,'c':3})
>>> c2 = Counter({'b':2,'c':1})
>>> c1.__add__(c2)
Counter({'c': 4, 'b': 3})
>>> c1 + c2
Counter({'c': 4, 'b': 3})
'''
def __sub__(self, other):
''' Subtract count, but keep only results with positive counts.
>>> Counter('abbbc') - Counter('bccd')
Counter({'b': 2, 'a': 1})
'''
if not isinstance(other, Counter):
return NotImplemented
result = Counter()
for elem, count in self.items():
newcount = count - other[elem]
if newcount > 0:
result[elem] = newcount
for elem, count in other.items():
if elem not in self and count < 0:
result[elem] = 0 - count
return result
'''
减法运算,相当于-,结果中只会出现计数count大于0的元素;
例如:
>>> c1 = Counter({'a':0,'b':1,'c':3})
>>> c2 = Counter({'b':2,'c':1})
>>> c1.__sub__(c2)
Counter({'c': 2})
>>> c1 - c2
Counter({'c': 2})
'''
def __or__(self, other):
'''Union is the maximum of value in either of the input counters.
>>> Counter('abbb') | Counter('bcc')
Counter({'b': 3, 'c': 2, 'a': 1})
'''
if not isinstance(other, Counter):
return NotImplemented
result = Counter()
for elem, count in self.items():
other_count = other[elem]
newcount = other_count if count < other_count else count
if newcount > 0:
result[elem] = newcount
for elem, count in other.items():
if elem not in self and count > 0:
result[elem] = count
return result
'''
并集运算,相当于|,结果中只会出现计数count大于0的元素及主要是选相同元素中count最大的一个;
例如:
>>> c1 = Counter({'a':0,'b':1,'c':3})
>>> c2 = Counter({'b':2,'c':1})
>>> c1.__or__(c2)
Counter({'c': 3, 'b': 2})
>>> c1 | c2
Counter({'c': 3, 'b': 2})
'''
def __and__(self, other):
''' Intersection is the minimum of corresponding counts.
>>> Counter('abbb') & Counter('bcc')
Counter({'b': 1})
'''
if not isinstance(other, Counter):
return NotImplemented
result = Counter()
for elem, count in self.items():
other_count = other[elem]
newcount = count if count < other_count else other_count
if newcount > 0:
result[elem] = newcount
return result
'''
交集运算,相当于&,结果中只会出现计数count大于0的元素及主要是选相同元素中count最小的一个;
例如:
>>> c1 = Counter({'a':0,'b':1,'c':3})
>>> c2 = Counter({'b':2,'c':1})
>>> c1.__and__(c2)
Counter({'c': 1, 'b': 1})
>>> c1 & c2
Counter({'c': 1, 'b': 1})
'''
def __pos__(self):
'Adds an empty counter, effectively stripping negative and zero counts'
result = Counter()
for elem, count in self.items():
if count > 0:
result[elem] = count
return result
'''
用于清除值为负数和零的计数; (Python3新增)
例如:
>>> from collections import Counter
>>> c1 = Counter({'a':0,'b':1,'c':-3})
>>> c1.__pos__()
Counter({'b': 1})
'''
def __neg__(self):
'''Subtracts from an empty counter. Strips positive and zero counts,
and flips the sign on negative counts.
'''
result = Counter()
for elem, count in self.items():
if count < 0:
result[elem] = 0 - count
return result
'''
用于清除值为正数或者零的计数,并将值为负数的计数,转换为正数; (Python3新增)
例如:
>>> c1 = Counter({'a':0,'b':1,'c':-3})
>>> c1.__neg__()
Counter({'c': 3})
'''
def _keep_positive(self):
'''Internal method to strip elements with a negative or zero count'''
nonpositive = [elem for elem, count in self.items() if not count > 0]
for elem in nonpositive:
del self[elem]
return self
def __iadd__(self, other):
'''Inplace add from another counter, keeping only positive counts.
>>> c = Counter('abbb')
>>> c += Counter('bcc')
>>> c
Counter({'b': 4, 'c': 2, 'a': 1})
'''
for elem, count in other.items():
self[elem] += count
return self._keep_positive()
'''
自加,相当于+=,结果中只会出现计数count大于0的元素; (Python3新增)
例如:
>>> c1 = Counter({'a':0,'b':1,'c':3})
>>> c1 += Counter({'b':2,'c':1})
>>> c1
Counter({'c': 4, 'b': 3})
'''
def __isub__(self, other):
'''Inplace subtract counter, but keep only results with positive counts.
>>> c = Counter('abbbc')
>>> c -= Counter('bccd')
>>> c
Counter({'b': 2, 'a': 1})
'''
for elem, count in other.items():
self[elem] -= count
return self._keep_positive()
'''
自减,相当于-=,结果中只会出现计数count大于0的元素; (Python3新增)
例如:
>>> c1 = Counter({'a':0,'b':1,'c':3})
>>> c1 -= Counter({'b':1,'c':1})
>>> c1
Counter({'c': 2})
'''
def __ior__(self, other):
'''Inplace union is the maximum of value from either counter.
>>> c = Counter('abbb')
>>> c |= Counter('bcc')
>>> c
Counter({'b': 3, 'c': 2, 'a': 1})
'''
for elem, other_count in other.items():
count = self[elem]
if other_count > count:
self[elem] = other_count
return self._keep_positive()
'''
自并集运算,相当于|=,结果中只会出现计数count大于0的元素及主要是选相同元素中count最大的一个; (Python3新增)
例如:
>>> c1 = Counter({'a':0,'b':1,'c':3})
>>> c1 |= Counter({'b':1,'d':2})
>>> c1
Counter({'c': 3, 'd': 2, 'b': 1})
'''
def __iand__(self, other):
'''Inplace intersection is the minimum of corresponding counts.
>>> c = Counter('abbb')
>>> c &= Counter('bcc')
>>> c
Counter({'b': 1})
'''
for elem, count in self.items():
other_count = other[elem]
if other_count < count:
self[elem] = other_count
return self._keep_positive()
'''
自交集运算,相当于&=,结果中只会出现计数count大于0的元素及主要是选相同元素中count最小的一个; (Python3新增)
例如:
>>> c1 = Counter({'a':0,'b':1,'c':3})
>>> c1 &= Counter({'b':1,'d':2})
>>> c1
Counter({'b': 1})
''' |
|