python编码转换实验

spell 发表于 2018-8-6 12:10:08

　　Python 2.6.6 (r266:84292, Jul 23 2015, 15:22:56)
　　 on linux2
　　Type "help", "copyright", "credits" or "license" for more information.
　　>>> print ord('A')
　　65
　　>>>
　　...
　　>>> a = {"a":"1","b","2"}
　　File "<stdin>", line 1
　　a = {"a":"1","b","2"}
　　^
　　SyntaxError: invalid syntax
　　>>> a = {"a":"1","b":"2"}
　　>>> str(a)
　　"{'a': '1', 'b': '2'}"
　　>>> print a
　　{'a': '1', 'b': '2'}
　　>>> print type(a)
　　<type 'dict'>
　　>>> print type(str(a))
　　<type 'str'>
　　>>> b =
　　>>> print type(b)
　　<type 'list'>
　　>>> print type(str(b))
　　<type 'str'>
　　>>> str(b)
　　''
　　>>> b.__class__
　　<type 'list'>
　　>>> str(b).__class__
　　<type 'str'>
　　>>> isinstance(a, str)
　　False
　　>>> isinstance(a, dict)
　　True
　　>>> isinstance(a, unicode)
　　False
　　>>> isinstance(a, utf-8)
　　Traceback (most recent call last):
　　File "<stdin>", line 1, in <module>
　　NameError: name 'utf' is not defined
　　>>> isinstance(a, 'utf-8')
　　Traceback (most recent call last):
　　File "<stdin>", line 1, in <module>

　　TypeError: isinstance() arg 2 must be a>　　>>> isinstance(a, type)
　　False
　　>>> isinstance(a, unicode)
　　False
　　>>> isinstance(a, unicode)
　　False
　　>>> import chardet
　　>>> chardet.detect(a)
　　Traceback (most recent call last):
　　File "<stdin>", line 1, in <module>
　　File "/usr/lib/python2.6/site-packages/chardet/__init__.py", line 30, in detect
　　u.feed(aBuf)
　　File "/usr/lib/python2.6/site-packages/chardet/universaldetector.py", line 74, in feed
　　if aBuf[:3] == codecs.BOM:
　　TypeError: unhashable type
　　>>> chardet.detect(str(a))
　　{'confidence': 1.0, 'encoding': 'ascii'}
　　>>> chardet.detect(str(b))
　　{'confidence': 1.0, 'encoding': 'ascii'}
　　>>> c = ["我","是"]
　　>>> chardet.detect(str(c))
　　{'confidence': 1.0, 'encoding': 'ascii'}
　　>>> print c
　　['\xe6\x88\x91', '\xe6\x98\xaf']
　　>>> c.encode('unicode')
　　Traceback (most recent call last):
　　File "<stdin>", line 1, in <module>
　　AttributeError: 'list' object has no attribute 'encode'
　　>>> str(c).encode('unicode')
　　Traceback (most recent call last):
　　File "<stdin>", line 1, in <module>
　　LookupError: unknown encoding: unicode
　　>>> str(c).encode('utf-8')
　　"['\\xe6\\x88\\x91', '\\xe6\\x98\\xaf']"
　　>>> d = str(c)
　　>>> chardet.detect(d)
　　{'confidence': 1.0, 'encoding': 'ascii'}
　　>>> chardet.detect(c)
　　Traceback (most recent call last):
　　File "<stdin>", line 1, in <module>
　　File "/usr/lib/python2.6/site-packages/chardet/__init__.py", line 30, in detect
　　u.feed(aBuf)
　　File "/usr/lib/python2.6/site-packages/chardet/universaldetector.py", line 108, in feed
　　if self._highBitDetector.search(aBuf):
　　TypeError: expected string or buffer
　　>>> chardet.detect(d)
　　{'confidence': 1.0, 'encoding': 'ascii'}
　　>>> print d
　　['\xe6\x88\x91', '\xe6\x98\xaf']
　　>>> print dc
　　Traceback (most recent call last):
　　File "<stdin>", line 1, in <module>
　　NameError: name 'dc' is not defined
　　>>> print c
　　['\xe6\x88\x91', '\xe6\x98\xaf']
　　>>> print d.decode('ascii')
　　['\xe6\x88\x91', '\xe6\x98\xaf']
　　>>> print type(d.decode('ascii'))
　　<type 'unicode'>
　　>>> print d.decode('ascii')
　　['\xe6\x88\x91', '\xe6\x98\xaf']
　　>>> chardet.detect(c.decode('ascii')
　　... )
　　Traceback (most recent call last):
　　File "<stdin>", line 1, in <module>
　　AttributeError: 'list' object has no attribute 'decode'
　　>>> chardet.detect(d.decode('ascii'))
　　Traceback (most recent call last):
　　File "<stdin>", line 1, in <module>
　　File "/usr/lib/python2.6/site-packages/chardet/__init__.py", line 25, in detect
　　raise ValueError('Expected a bytes object, not a unicode object')
　　ValueError: Expected a bytes object, not a unicode object
　　>>> type(d)
　　<type 'str'>
　　>>> print type(d.decode('ascii'))
　　<type 'unicode'>
　　>>>print d.decode('ascii')
　　File "<stdin>", line 1
　　print d.decode('ascii')
　　^
　　IndentationError: unexpected indent
　　>>> print d.decode('ascii')
　　['\xe6\x88\x91', '\xe6\x98\xaf']
　　>>> print d.decode('ascii').encode('utf-8')
　　['\xe6\x88\x91', '\xe6\x98\xaf']
　　>>> print d.decode('ascii').encode('utf-8')
　　[
　　>>> print d.decode('ascii')
　　['\xe6\x88\x91', '\xe6\x98\xaf']
　　>>> e = d.decode('ascii')
　　>>> print e
　　['\xe6\x88\x91', '\xe6\x98\xaf']
　　>>> type(e)
　　<type 'unicode'>
　　>>> f = e.encode('utf-8')
　　>>> f
　　"['\\xe6\\x88\\x91', '\\xe6\\x98\\xaf']"
　　>>> print f
　　['\xe6\x88\x91', '\xe6\x98\xaf']
　　>>> type(f)
　　<type 'str'>
　　>>> print f.decode("unicode_escape")
　　['', 'ˉ']
　　>>> print f.encode("raw_unicode_escape")
　　['\xe6\x88\x91', '\xe6\x98\xaf']
　　>>> print f.encode("raw_unicode_escape").decode('utf-8')
　　['\xe6\x88\x91', '\xe6\x98\xaf']
　　>>> print b
　　
　　>>> print c
　　['\xe6\x88\x91', '\xe6\x98\xaf']
　　>>> print type(c)
　　<type 'list'>
　　>>> print type(d)
　　<type 'str'>
　　>>> print d
　　['\xe6\x88\x91', '\xe6\x98\xaf']
　　>>> import syss
　　Traceback (most recent call last):
　　File "<stdin>", line 1, in <module>
　　ImportError: No module named syss
　　>>> import sys

　　>>>>　　<module 'sys' (built-in)>
　　>>> sys.setdefaultencoding('utf-8')
　　>>> print d
　　['\xe6\x88\x91', '\xe6\x98\xaf']
　　>>> print type(c)
　　<type 'list'>
　　>>> print type(d)
　　<type 'str'>
　　>>> cc = ["我","是"]
　　>>> print cc
　　['\xe6\x88\x91', '\xe6\x98\xaf']
　　>>> print type(cc)
　　<type 'list'>
　　>>> dd = str(cc)
　　>>> pirnt dd
　　File "<stdin>", line 1
　　pirnt dd
　　^
　　SyntaxError: invalid syntax
　　>>> print dd
　　['\xe6\x88\x91', '\xe6\x98\xaf']
　　>>> print type(dd)
　　<type 'str'>
　　>>> chardet.detect(d)
　　{'confidence': 1.0, 'encoding': 'ascii'}
　　>>> chardet.detect(dd)
　　{'confidence': 1.0, 'encoding': 'ascii'}
　　>>> sys.defaultencoding()
　　Traceback (most recent call last):
　　File "<stdin>", line 1, in <module>
　　AttributeError: 'module' object has no attribute 'defaultencoding'
　　>>> sys.defaultencoding
　　Traceback (most recent call last):
　　File "<stdin>", line 1, in <module>
　　AttributeError: 'module' object has no attribute 'defaultencoding'
　　>>> sys.defaultencode
　　Traceback (most recent call last):
　　File "<stdin>", line 1, in <module>
　　AttributeError: 'module' object has no attribute 'defaultencode'
　　>>> sys.defaultencode()
　　Traceback (most recent call last):
　　File "<stdin>", line 1, in <module>
　　AttributeError: 'module' object has no attribute 'defaultencode'
　　>>> sys.defaultencoding()
　　Traceback (most recent call last):
　　File "<stdin>", line 1, in <module>
　　AttributeError: 'module' object has no attribute 'defaultencoding'
　　>>> sys.defaultencode
　　Traceback (most recent call last):
　　File "<stdin>", line 1, in <module>
　　AttributeError: 'module' object has no attribute 'defaultencode'
　　>>> q = '中国'
　　>>> type(q)
　　<type 'str'>
　　>>> chardet.detect(q0
　　... )
　　Traceback (most recent call last):
　　File "<stdin>", line 1, in <module>
　　NameError: name 'q0' is not defined
　　>>> chardet.detect(q)
　　{'confidence': 0.75249999999999995, 'encoding': 'utf-8'}
　　>>> p = ['中国', '复兴']
　　>>> chardet.detect(p)
　　Traceback (most recent call last):
　　File "<stdin>", line 1, in <module>
　　File "/usr/lib/python2.6/site-packages/chardet/__init__.py", line 30, in detect
　　u.feed(aBuf)
　　File "/usr/lib/python2.6/site-packages/chardet/universaldetector.py", line 108, in feed
　　if self._highBitDetector.search(aBuf):
　　TypeError: expected string or buffer
　　>>> chardet.detect(str(p))
　　{'confidence': 1.0, 'encoding': 'ascii'}
　　>>> print type(dd)
　　<type 'str'>
　　>>> print dd.decode('unicode_escape')
　　['', 'ˉ']
　　>>> print type(dd.decode('unicode_escape'))
　　<type 'unicode'>
　　>>> dd
　　"['\\xe6\\x88\\x91', '\\xe6\\x98\\xaf']"
　　>>> print dd
　　['\xe6\x88\x91', '\xe6\x98\xaf']
　　>>> print dd.encode('raw_unicode_escape')
　　['\xe6\x88\x91', '\xe6\x98\xaf']
　　>>> print type(dd.encode('raw_unicode_escape'))
　　<type 'str'>
　　>>> print type(dd.encode('raw_unicode_escape').decode('utf-8'))
　　<type 'unicode'>
　　>>> print type(dd.encode('raw_unicode_escape').decode('utf-8')
　　... )
　　<type 'unicode'>
　　>>> print dd
　　['\xe6\x88\x91', '\xe6\x98\xaf']
　　>>> print dd, type(dd)
　　['\xe6\x88\x91', '\xe6\x98\xaf'] <type 'str'>
　　>>> print dd.encode('raw_unicode_escape'), type(dd.encode('raw_unicode_escape'))
　　['\xe6\x88\x91', '\xe6\x98\xaf'] <type 'str'>
　　>>> print dd.decode('utf-8'), type(dd.decode('utf-8')
　　... )
　　['\xe6\x88\x91', '\xe6\x98\xaf'] <type 'unicode'>
　　>>> print dd.decode('utf-8')
　　['\xe6\x88\x91', '\xe6\x98\xaf']
　　>>> print dd
　　['\xe6\x88\x91', '\xe6\x98\xaf']
　　>>> print ee
　　Traceback (most recent call last):
　　File "<stdin>", line 1, in <module>
　　NameError: name 'ee' is not defined
　　>>> ee = u"dd"
　　>>> ee = u"['\xe6\x88\x91', '\xe6\x98\xaf']"
　　>>> print ee
　　['', 'ˉ']
　　>>> ee
　　u"['\xe6\x88\x91', '\xe6\x98\xaf']"
　　>>> ee =
　　>>> type(ee)
　　<type 'list'>
　　>>> print ee
　　
　　>>> print str(ee)
　　
　　>>> printee
　　Traceback (most recent call last):
　　File "<stdin>", line 1, in <module>
　　NameError: name 'printee' is not defined
　　>>> print ee
　　
　　>>> print json.dumps(ee).decode('unicode_escape')
　　Traceback (most recent call last):
　　File "<stdin>", line 1, in <module>
　　NameError: name 'json' is not defined
　　>>> import json
　　>>> print json.dumps(ee).decode('unicode_escape')
　　["中国", "复兴"]
　　>>> print str(ee).decode('unicode_escape')
　　
　　>>> x = '中国'
　　>>> print x
　　中国
　　>>> x
　　'\xe4\xb8\xad\xe5\x9b\xbd'
　　>>> type(x)
　　<type 'str'>
　　>>> chardet.detect(x)
　　{'confidence': 0.75249999999999995, 'encoding': 'utf-8'}
　　>>> y = x.decode('utf-8')
　　>>> y
　　u'\u4e2d\u56fd'
　　>>> print y
　　中国
　　>>> chardet.detect(y)
　　Traceback (most recent call last):
　　File "<stdin>", line 1, in <module>
　　File "/usr/lib/python2.6/site-packages/chardet/__init__.py", line 25, in detect
　　raise ValueError('Expected a bytes object, not a unicode object')
　　ValueError: Expected a bytes object, not a unicode object
　　>>> x
　　'\xe4\xb8\xad\xe5\x9b\xbd'
　　>>> x = '\xe4\xb8\xad\xe5\x9b\xbd'
　　>>> print x
　　中国
　　>>> x
　　'\xe4\xb8\xad\xe5\x9b\xbd'
　　>>> x = u'\xe4\xb8\xad\xe5\x9b\xbd'
　　>>> print x
　　-
　　>>> x.decode('utf-8')
　　u'\xe4\xb8\xad\xe5\x9b\xbd'
　　>>> print x.decode('utf-8')
　　-
　　>>> chardet.detect(x)
　　Traceback (most recent call last):
　　File "<stdin>", line 1, in <module>
　　File "/usr/lib/python2.6/site-packages/chardet/__init__.py", line 25, in detect
　　raise ValueError('Expected a bytes object, not a unicode object')
　　ValueError: Expected a bytes object, not a unicode object
　　>>> print type(x)
　　<type 'unicode'>
　　>>> x
　　u'\xe4\xb8\xad\xe5\x9b\xbd'
　　>>> pirnt x
　　File "<stdin>", line 1
　　pirnt x
　　^
　　SyntaxError: invalid syntax
　　>>> print x
　　-
　　>>> print x.encode('raw_unicode_escape')
　　中国
　　>>> y = x.encode('raw_unicode_escape')
　　>>> y
　　'\xe4\xb8\xad\xe5\x9b\xbd'
　　>>> type y
　　File "<stdin>", line 1
　　type y
　　^
　　SyntaxError: invalid syntax
　　>>> type(y)
　　<type 'str'>
　　>>> print y
　　中国
　　>>> chardet.detect(y)
　　{'confidence': 0.75249999999999995, 'encoding': 'utf-8'}
　　>>> z = y.encode('utf-8')
　　>>> print z
　　中国
　　>>> z
　　'\xe4\xb8\xad\xe5\x9b\xbd'
　　>>> y
　　'\xe4\xb8\xad\xe5\x9b\xbd'
　　>>> type(z)
　　<type 'str'>
　　>>> type(y)
　　<type 'str'>
　　>>> chardet.detect(y)
　　{'confidence': 0.75249999999999995, 'encoding': 'utf-8'}
　　>>> y
　　'\xe4\xb8\xad\xe5\x9b\xbd'
　　>>> z = y.encode('utf-8')
　　>>> z = y.decode('utf-8')
　　>>> z
　　u'\u4e2d\u56fd'
　　>>> print z
　　中国
　　>>> type(z)
　　<type 'unicode'>
　　>>> a
　　u'\xe4\xb8\xad\xe5\x9b\xbd'
　　>>> f='\u53eb\u6211'
　　>>> print f
　　\u53eb\u6211
　　>>> f
　　'\\u53eb\\u6211'
　　>>> type(f)
　　<type 'str'>
　　>>> chardet.detect(f)
　　{'confidence': 1.0, 'encoding': 'ascii'}
　　>>> f.decode('ascii')
　　u'\\u53eb\\u6211'
　　>>> print f.decode('ascii')
　　\u53eb\u6211
　　>>> f.decode('unicode_escape')
　　u'\u53eb\u6211'
　　>>> print f.decode('unicode_escape')
　　叫我
　　>>> sys.getdefaultencoding()
　　'utf-8'
　　>>> dd = { 'name': u'功夫熊猫' }
　　>>> print dd
　　{'name': u'\u529f\u592b\u718a\u732b'}
　　>>> dd
　　{'name': u'\u529f\u592b\u718a\u732b'}
　　>>> dd2 = { 'name': '功夫熊猫' }
　　>>> dd2
　　{'name': '\xe5\x8a\x9f\xe5\xa4\xab\xe7\x86\x8a\xe7\x8c\xab'}
　　>>> print simplejson.dumps(dd, ensure_ascii=False)
　　Traceback (most recent call last):
　　File "<stdin>", line 1, in <module>
　　NameError: name 'simplejson' is not defined
　　>>> print json.dumps(dd, ensure_ascii=False)
　　{"name": "功夫熊猫"}
　　>>> print json.dumps(dd2, ensure_ascii=False)
　　{"name": "功夫熊猫"}
　　>>> print dd2
　　{'name': '\xe5\x8a\x9f\xe5\xa4\xab\xe7\x86\x8a\xe7\x8c\xab'}
　　>>>

页: [1]

运维网's Archiver

python编码转换实验