spell 发表于 2018-8-6 12:10:08

python编码转换实验

  Python 2.6.6 (r266:84292, Jul 23 2015, 15:22:56)
   on linux2
  Type "help", "copyright", "credits" or "license" for more information.
  >>> print ord('A')
  65
  >>>
  ...
  >>> a = {"a":"1","b","2"}
  File "<stdin>", line 1
  a = {"a":"1","b","2"}
  ^
  SyntaxError: invalid syntax
  >>> a = {"a":"1","b":"2"}
  >>> str(a)
  "{'a': '1', 'b': '2'}"
  >>> print a
  {'a': '1', 'b': '2'}
  >>> print type(a)
  <type 'dict'>
  >>> print type(str(a))
  <type 'str'>
  >>> b =
  >>> print type(b)
  <type 'list'>
  >>> print type(str(b))
  <type 'str'>
  >>> str(b)
  ''
  >>> b.__class__
  <type 'list'>
  >>> str(b).__class__
  <type 'str'>
  >>> isinstance(a, str)
  False
  >>> isinstance(a, dict)
  True
  >>> isinstance(a, unicode)
  False
  >>> isinstance(a, utf-8)
  Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  NameError: name 'utf' is not defined
  >>> isinstance(a, 'utf-8')
  Traceback (most recent call last):
  File "<stdin>", line 1, in <module>

  TypeError: isinstance() arg 2 must be a>  >>> isinstance(a, type)
  False
  >>> isinstance(a, unicode)
  False
  >>> isinstance(a, unicode)
  False
  >>> import chardet
  >>> chardet.detect(a)
  Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/usr/lib/python2.6/site-packages/chardet/__init__.py", line 30, in detect
  u.feed(aBuf)
  File "/usr/lib/python2.6/site-packages/chardet/universaldetector.py", line 74, in feed
  if aBuf[:3] == codecs.BOM:
  TypeError: unhashable type
  >>> chardet.detect(str(a))
  {'confidence': 1.0, 'encoding': 'ascii'}
  >>> chardet.detect(str(b))
  {'confidence': 1.0, 'encoding': 'ascii'}
  >>> c = ["我","是"]
  >>> chardet.detect(str(c))
  {'confidence': 1.0, 'encoding': 'ascii'}
  >>> print c
  ['\xe6\x88\x91', '\xe6\x98\xaf']
  >>> c.encode('unicode')
  Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  AttributeError: 'list' object has no attribute 'encode'
  >>> str(c).encode('unicode')
  Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  LookupError: unknown encoding: unicode
  >>> str(c).encode('utf-8')
  "['\\xe6\\x88\\x91', '\\xe6\\x98\\xaf']"
  >>> d = str(c)
  >>> chardet.detect(d)
  {'confidence': 1.0, 'encoding': 'ascii'}
  >>> chardet.detect(c)
  Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/usr/lib/python2.6/site-packages/chardet/__init__.py", line 30, in detect
  u.feed(aBuf)
  File "/usr/lib/python2.6/site-packages/chardet/universaldetector.py", line 108, in feed
  if self._highBitDetector.search(aBuf):
  TypeError: expected string or buffer
  >>> chardet.detect(d)
  {'confidence': 1.0, 'encoding': 'ascii'}
  >>> print d
  ['\xe6\x88\x91', '\xe6\x98\xaf']
  >>> print dc
  Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  NameError: name 'dc' is not defined
  >>> print c
  ['\xe6\x88\x91', '\xe6\x98\xaf']
  >>> print d.decode('ascii')
  ['\xe6\x88\x91', '\xe6\x98\xaf']
  >>> print type(d.decode('ascii'))
  <type 'unicode'>
  >>> print d.decode('ascii')
  ['\xe6\x88\x91', '\xe6\x98\xaf']
  >>> chardet.detect(c.decode('ascii')
  ... )
  Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  AttributeError: 'list' object has no attribute 'decode'
  >>> chardet.detect(d.decode('ascii'))
  Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/usr/lib/python2.6/site-packages/chardet/__init__.py", line 25, in detect
  raise ValueError('Expected a bytes object, not a unicode object')
  ValueError: Expected a bytes object, not a unicode object
  >>> type(d)
  <type 'str'>
  >>> print type(d.decode('ascii'))
  <type 'unicode'>
  >>>print d.decode('ascii')
  File "<stdin>", line 1
  print d.decode('ascii')
  ^
  IndentationError: unexpected indent
  >>> print d.decode('ascii')
  ['\xe6\x88\x91', '\xe6\x98\xaf']
  >>> print d.decode('ascii').encode('utf-8')
  ['\xe6\x88\x91', '\xe6\x98\xaf']
  >>> print d.decode('ascii').encode('utf-8')
  [
  >>> print d.decode('ascii')
  ['\xe6\x88\x91', '\xe6\x98\xaf']
  >>> e = d.decode('ascii')
  >>> print e
  ['\xe6\x88\x91', '\xe6\x98\xaf']
  >>> type(e)
  <type 'unicode'>
  >>> f = e.encode('utf-8')
  >>> f
  "['\\xe6\\x88\\x91', '\\xe6\\x98\\xaf']"
  >>> print f
  ['\xe6\x88\x91', '\xe6\x98\xaf']
  >>> type(f)
  <type 'str'>
  >>> print f.decode("unicode_escape")
  ['', 'ˉ']
  >>> print f.encode("raw_unicode_escape")
  ['\xe6\x88\x91', '\xe6\x98\xaf']
  >>> print f.encode("raw_unicode_escape").decode('utf-8')
  ['\xe6\x88\x91', '\xe6\x98\xaf']
  >>> print b
  
  >>> print c
  ['\xe6\x88\x91', '\xe6\x98\xaf']
  >>> print type(c)
  <type 'list'>
  >>> print type(d)
  <type 'str'>
  >>> print d
  ['\xe6\x88\x91', '\xe6\x98\xaf']
  >>> import syss
  Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  ImportError: No module named syss
  >>> import sys

  >>>>  <module 'sys' (built-in)>
  >>> sys.setdefaultencoding('utf-8')
  >>> print d
  ['\xe6\x88\x91', '\xe6\x98\xaf']
  >>> print type(c)
  <type 'list'>
  >>> print type(d)
  <type 'str'>
  >>> cc = ["我","是"]
  >>> print cc
  ['\xe6\x88\x91', '\xe6\x98\xaf']
  >>> print type(cc)
  <type 'list'>
  >>> dd = str(cc)
  >>> pirnt dd
  File "<stdin>", line 1
  pirnt dd
  ^
  SyntaxError: invalid syntax
  >>> print dd
  ['\xe6\x88\x91', '\xe6\x98\xaf']
  >>> print type(dd)
  <type 'str'>
  >>> chardet.detect(d)
  {'confidence': 1.0, 'encoding': 'ascii'}
  >>> chardet.detect(dd)
  {'confidence': 1.0, 'encoding': 'ascii'}
  >>> sys.defaultencoding()
  Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  AttributeError: 'module' object has no attribute 'defaultencoding'
  >>> sys.defaultencoding
  Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  AttributeError: 'module' object has no attribute 'defaultencoding'
  >>> sys.defaultencode
  Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  AttributeError: 'module' object has no attribute 'defaultencode'
  >>> sys.defaultencode()
  Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  AttributeError: 'module' object has no attribute 'defaultencode'
  >>> sys.defaultencoding()
  Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  AttributeError: 'module' object has no attribute 'defaultencoding'
  >>> sys.defaultencode
  Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  AttributeError: 'module' object has no attribute 'defaultencode'
  >>> q = '中国'
  >>> type(q)
  <type 'str'>
  >>> chardet.detect(q0
  ... )
  Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  NameError: name 'q0' is not defined
  >>> chardet.detect(q)
  {'confidence': 0.75249999999999995, 'encoding': 'utf-8'}
  >>> p = ['中国', '复兴']
  >>> chardet.detect(p)
  Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/usr/lib/python2.6/site-packages/chardet/__init__.py", line 30, in detect
  u.feed(aBuf)
  File "/usr/lib/python2.6/site-packages/chardet/universaldetector.py", line 108, in feed
  if self._highBitDetector.search(aBuf):
  TypeError: expected string or buffer
  >>> chardet.detect(str(p))
  {'confidence': 1.0, 'encoding': 'ascii'}
  >>> print type(dd)
  <type 'str'>
  >>> print dd.decode('unicode_escape')
  ['', 'ˉ']
  >>> print type(dd.decode('unicode_escape'))
  <type 'unicode'>
  >>> dd
  "['\\xe6\\x88\\x91', '\\xe6\\x98\\xaf']"
  >>> print dd
  ['\xe6\x88\x91', '\xe6\x98\xaf']
  >>> print dd.encode('raw_unicode_escape')
  ['\xe6\x88\x91', '\xe6\x98\xaf']
  >>> print type(dd.encode('raw_unicode_escape'))
  <type 'str'>
  >>> print type(dd.encode('raw_unicode_escape').decode('utf-8'))
  <type 'unicode'>
  >>> print type(dd.encode('raw_unicode_escape').decode('utf-8')
  ... )
  <type 'unicode'>
  >>> print dd
  ['\xe6\x88\x91', '\xe6\x98\xaf']
  >>> print dd, type(dd)
  ['\xe6\x88\x91', '\xe6\x98\xaf'] <type 'str'>
  >>> print dd.encode('raw_unicode_escape'), type(dd.encode('raw_unicode_escape'))
  ['\xe6\x88\x91', '\xe6\x98\xaf'] <type 'str'>
  >>> print dd.decode('utf-8'), type(dd.decode('utf-8')
  ... )
  ['\xe6\x88\x91', '\xe6\x98\xaf'] <type 'unicode'>
  >>> print dd.decode('utf-8')
  ['\xe6\x88\x91', '\xe6\x98\xaf']
  >>> print dd
  ['\xe6\x88\x91', '\xe6\x98\xaf']
  >>> print ee
  Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  NameError: name 'ee' is not defined
  >>> ee = u"dd"
  >>> ee = u"['\xe6\x88\x91', '\xe6\x98\xaf']"
  >>> print ee
  ['', 'ˉ']
  >>> ee
  u"['\xe6\x88\x91', '\xe6\x98\xaf']"
  >>> ee =
  >>> type(ee)
  <type 'list'>
  >>> print ee
  
  >>> print str(ee)
  
  >>> printee
  Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  NameError: name 'printee' is not defined
  >>> print ee
  
  >>> print json.dumps(ee).decode('unicode_escape')
  Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  NameError: name 'json' is not defined
  >>> import json
  >>> print json.dumps(ee).decode('unicode_escape')
  ["中国", "复兴"]
  >>> print str(ee).decode('unicode_escape')
  
  >>> x = '中国'
  >>> print x
  中国
  >>> x
  '\xe4\xb8\xad\xe5\x9b\xbd'
  >>> type(x)
  <type 'str'>
  >>> chardet.detect(x)
  {'confidence': 0.75249999999999995, 'encoding': 'utf-8'}
  >>> y = x.decode('utf-8')
  >>> y
  u'\u4e2d\u56fd'
  >>> print y
  中国
  >>> chardet.detect(y)
  Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/usr/lib/python2.6/site-packages/chardet/__init__.py", line 25, in detect
  raise ValueError('Expected a bytes object, not a unicode object')
  ValueError: Expected a bytes object, not a unicode object
  >>> x
  '\xe4\xb8\xad\xe5\x9b\xbd'
  >>> x = '\xe4\xb8\xad\xe5\x9b\xbd'
  >>> print x
  中国
  >>> x
  '\xe4\xb8\xad\xe5\x9b\xbd'
  >>> x = u'\xe4\xb8\xad\xe5\x9b\xbd'
  >>> print x
  -
  >>> x.decode('utf-8')
  u'\xe4\xb8\xad\xe5\x9b\xbd'
  >>> print x.decode('utf-8')
  -
  >>> chardet.detect(x)
  Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/usr/lib/python2.6/site-packages/chardet/__init__.py", line 25, in detect
  raise ValueError('Expected a bytes object, not a unicode object')
  ValueError: Expected a bytes object, not a unicode object
  >>> print type(x)
  <type 'unicode'>
  >>> x
  u'\xe4\xb8\xad\xe5\x9b\xbd'
  >>> pirnt x
  File "<stdin>", line 1
  pirnt x
  ^
  SyntaxError: invalid syntax
  >>> print x
  -
  >>> print x.encode('raw_unicode_escape')
  中国
  >>> y = x.encode('raw_unicode_escape')
  >>> y
  '\xe4\xb8\xad\xe5\x9b\xbd'
  >>> type y
  File "<stdin>", line 1
  type y
  ^
  SyntaxError: invalid syntax
  >>> type(y)
  <type 'str'>
  >>> print y
  中国
  >>> chardet.detect(y)
  {'confidence': 0.75249999999999995, 'encoding': 'utf-8'}
  >>> z = y.encode('utf-8')
  >>> print z
  中国
  >>> z
  '\xe4\xb8\xad\xe5\x9b\xbd'
  >>> y
  '\xe4\xb8\xad\xe5\x9b\xbd'
  >>> type(z)
  <type 'str'>
  >>> type(y)
  <type 'str'>
  >>> chardet.detect(y)
  {'confidence': 0.75249999999999995, 'encoding': 'utf-8'}
  >>> y
  '\xe4\xb8\xad\xe5\x9b\xbd'
  >>> z = y.encode('utf-8')
  >>> z = y.decode('utf-8')
  >>> z
  u'\u4e2d\u56fd'
  >>> print z
  中国
  >>> type(z)
  <type 'unicode'>
  >>> a
  u'\xe4\xb8\xad\xe5\x9b\xbd'
  >>> f='\u53eb\u6211'
  >>> print f
  \u53eb\u6211
  >>> f
  '\\u53eb\\u6211'
  >>> type(f)
  <type 'str'>
  >>> chardet.detect(f)
  {'confidence': 1.0, 'encoding': 'ascii'}
  >>> f.decode('ascii')
  u'\\u53eb\\u6211'
  >>> print f.decode('ascii')
  \u53eb\u6211
  >>> f.decode('unicode_escape')
  u'\u53eb\u6211'
  >>> print f.decode('unicode_escape')
  叫我
  >>> sys.getdefaultencoding()
  'utf-8'
  >>> dd = { 'name': u'功夫熊猫' }
  >>> print dd
  {'name': u'\u529f\u592b\u718a\u732b'}
  >>> dd
  {'name': u'\u529f\u592b\u718a\u732b'}
  >>> dd2 = { 'name': '功夫熊猫' }
  >>> dd2
  {'name': '\xe5\x8a\x9f\xe5\xa4\xab\xe7\x86\x8a\xe7\x8c\xab'}
  >>> print simplejson.dumps(dd, ensure_ascii=False)
  Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  NameError: name 'simplejson' is not defined
  >>> print json.dumps(dd, ensure_ascii=False)
  {"name": "功夫熊猫"}
  >>> print json.dumps(dd2, ensure_ascii=False)
  {"name": "功夫熊猫"}
  >>> print dd2
  {'name': '\xe5\x8a\x9f\xe5\xa4\xab\xe7\x86\x8a\xe7\x8c\xab'}
  >>>
页: [1]
查看完整版本: python编码转换实验