python编码转换实验
Python 2.6.6 (r266:84292, Jul 23 2015, 15:22:56)
[GCC 4.4.7 20120313 (Red Hat 4.4.7-11)] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> print ord('A')
65
>>>
...
>>> a = {"a":"1","b","2"}
File "<stdin>", line 1
a = {"a":"1","b","2"}
^
SyntaxError: invalid syntax
>>> a = {"a":"1","b":"2"}
>>> str(a)
"{'a': '1', 'b': '2'}"
>>> print a
{'a': '1', 'b': '2'}
>>> print type(a)
<type 'dict'>
>>> print type(str(a))
<type 'str'>
>>> b = [1,2,3]
>>> print type(b)
<type 'list'>
>>> print type(str(b))
<type 'str'>
>>> str(b)
'[1, 2, 3]'
>>> b.__class__
<type 'list'>
>>> str(b).__class__
<type 'str'>
>>> isinstance(a, str)
False
>>> isinstance(a, dict)
True
>>> isinstance(a, unicode)
False
>>> isinstance(a, utf-8)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
NameError: name 'utf' is not defined
>>> isinstance(a, 'utf-8')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
>>> isinstance(a, type)
False
>>> isinstance(a, unicode)
False
>>> isinstance(a, unicode)
False
>>> import chardet
>>> chardet.detect(a)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.6/site-packages/chardet/__init__.py", line 30, in detect
u.feed(aBuf)
File "/usr/lib/python2.6/site-packages/chardet/universaldetector.py", line 74, in feed
if aBuf[:3] == codecs.BOM:
TypeError: unhashable type
>>> chardet.detect(str(a))
{'confidence': 1.0, 'encoding': 'ascii'}
>>> chardet.detect(str(b))
{'confidence': 1.0, 'encoding': 'ascii'}
>>> c = ["我","是"]
>>> chardet.detect(str(c))
{'confidence': 1.0, 'encoding': 'ascii'}
>>> print c
['\xe6\x88\x91', '\xe6\x98\xaf']
>>> c.encode('unicode')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'list' object has no attribute 'encode'
>>> str(c).encode('unicode')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
LookupError: unknown encoding: unicode
>>> str(c).encode('utf-8')
"['\\xe6\\x88\\x91', '\\xe6\\x98\\xaf']"
>>> d = str(c)
>>> chardet.detect(d)
{'confidence': 1.0, 'encoding': 'ascii'}
>>> chardet.detect(c)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.6/site-packages/chardet/__init__.py", line 30, in detect
u.feed(aBuf)
File "/usr/lib/python2.6/site-packages/chardet/universaldetector.py", line 108, in feed
if self._highBitDetector.search(aBuf):
TypeError: expected string or buffer
>>> chardet.detect(d)
{'confidence': 1.0, 'encoding': 'ascii'}
>>> print d
['\xe6\x88\x91', '\xe6\x98\xaf']
>>> print dc
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
NameError: name 'dc' is not defined
>>> print c
['\xe6\x88\x91', '\xe6\x98\xaf']
>>> print d.decode('ascii')
['\xe6\x88\x91', '\xe6\x98\xaf']
>>> print type(d.decode('ascii'))
<type 'unicode'>
>>> print d.decode('ascii')
['\xe6\x88\x91', '\xe6\x98\xaf']
>>> chardet.detect(c.decode('ascii')
... )
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'list' object has no attribute 'decode'
>>> chardet.detect(d.decode('ascii'))
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.6/site-packages/chardet/__init__.py", line 25, in detect
raise ValueError('Expected a bytes object, not a unicode object')
ValueError: Expected a bytes object, not a unicode object
>>> type(d)
<type 'str'>
>>> print type(d.decode('ascii'))
<type 'unicode'>
>>> print d.decode('ascii')
File "<stdin>", line 1
print d.decode('ascii')
^
IndentationError: unexpected indent
>>> print d.decode('ascii')
['\xe6\x88\x91', '\xe6\x98\xaf']
>>> print d.decode('ascii').encode('utf-8')
['\xe6\x88\x91', '\xe6\x98\xaf']
>>> print d.decode('ascii').encode('utf-8')[0]
[
>>> print d.decode('ascii')
['\xe6\x88\x91', '\xe6\x98\xaf']
>>> e = d.decode('ascii')
>>> print e
['\xe6\x88\x91', '\xe6\x98\xaf']
>>> type(e)
<type 'unicode'>
>>> f = e.encode('utf-8')
>>> f
"['\\xe6\\x88\\x91', '\\xe6\\x98\\xaf']"
>>> print f
['\xe6\x88\x91', '\xe6\x98\xaf']
>>> type(f)
<type 'str'>
>>> print f.decode("unicode_escape")
['', 'ˉ']
>>> print f.encode("raw_unicode_escape")
['\xe6\x88\x91', '\xe6\x98\xaf']
>>> print f.encode("raw_unicode_escape").decode('utf-8')
['\xe6\x88\x91', '\xe6\x98\xaf']
>>> print b
[1, 2, 3]
>>> print c
['\xe6\x88\x91', '\xe6\x98\xaf']
>>> print type(c)
<type 'list'>
>>> print type(d)
<type 'str'>
>>> print d
['\xe6\x88\x91', '\xe6\x98\xaf']
>>> import syss
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ImportError: No module named syss
>>> import sys
>>> reload(sys)
<module 'sys' (built-in)>
>>> sys.setdefaultencoding('utf-8')
>>> print d
['\xe6\x88\x91', '\xe6\x98\xaf']
>>> print type(c)
<type 'list'>
>>> print type(d)
<type 'str'>
>>> cc = ["我","是"]
>>> print cc
['\xe6\x88\x91', '\xe6\x98\xaf']
>>> print type(cc)
<type 'list'>
>>> dd = str(cc)
>>> pirnt dd
File "<stdin>", line 1
pirnt dd
^
SyntaxError: invalid syntax
>>> print dd
['\xe6\x88\x91', '\xe6\x98\xaf']
>>> print type(dd)
<type 'str'>
>>> chardet.detect(d)
{'confidence': 1.0, 'encoding': 'ascii'}
>>> chardet.detect(dd)
{'confidence': 1.0, 'encoding': 'ascii'}
>>> sys.defaultencoding()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'module' object has no attribute 'defaultencoding'
>>> sys.defaultencoding
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'module' object has no attribute 'defaultencoding'
>>> sys.defaultencode
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'module' object has no attribute 'defaultencode'
>>> sys.defaultencode()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'module' object has no attribute 'defaultencode'
>>> sys.defaultencoding()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'module' object has no attribute 'defaultencoding'
>>> sys.defaultencode
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'module' object has no attribute 'defaultencode'
>>> q = '中国'
>>> type(q)
<type 'str'>
>>> chardet.detect(q0
... )
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
NameError: name 'q0' is not defined
>>> chardet.detect(q)
{'confidence': 0.75249999999999995, 'encoding': 'utf-8'}
>>> p = ['中国', '复兴']
>>> chardet.detect(p)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.6/site-packages/chardet/__init__.py", line 30, in detect
u.feed(aBuf)
File "/usr/lib/python2.6/site-packages/chardet/universaldetector.py", line 108, in feed
if self._highBitDetector.search(aBuf):
TypeError: expected string or buffer
>>> chardet.detect(str(p))
{'confidence': 1.0, 'encoding': 'ascii'}
>>> print type(dd)
<type 'str'>
>>> print dd.decode('unicode_escape')
['', 'ˉ']
>>> print type(dd.decode('unicode_escape'))
<type 'unicode'>
>>> dd
"['\\xe6\\x88\\x91', '\\xe6\\x98\\xaf']"
>>> print dd
['\xe6\x88\x91', '\xe6\x98\xaf']
>>> print dd.encode('raw_unicode_escape')
['\xe6\x88\x91', '\xe6\x98\xaf']
>>> print type(dd.encode('raw_unicode_escape'))
<type 'str'>
>>> print type(dd.encode('raw_unicode_escape').decode('utf-8'))
<type 'unicode'>
>>> print type(dd.encode('raw_unicode_escape').decode('utf-8')
... )
<type 'unicode'>
>>> print dd
['\xe6\x88\x91', '\xe6\x98\xaf']
>>> print dd, type(dd)
['\xe6\x88\x91', '\xe6\x98\xaf'] <type 'str'>
>>> print dd.encode('raw_unicode_escape'), type(dd.encode('raw_unicode_escape'))
['\xe6\x88\x91', '\xe6\x98\xaf'] <type 'str'>
>>> print dd.decode('utf-8'), type(dd.decode('utf-8')
... )
['\xe6\x88\x91', '\xe6\x98\xaf'] <type 'unicode'>
>>> print dd.decode('utf-8')
['\xe6\x88\x91', '\xe6\x98\xaf']
>>> print dd
['\xe6\x88\x91', '\xe6\x98\xaf']
>>> print ee
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
NameError: name 'ee' is not defined
>>> ee = u"dd"
>>> ee = u"['\xe6\x88\x91', '\xe6\x98\xaf']"
>>> print ee
['', 'ˉ']
>>> ee
u"['\xe6\x88\x91', '\xe6\x98\xaf']"
>>> ee = [u'中国', u'复兴']
>>> type(ee)
<type 'list'>
>>> print ee
[u'\u4e2d\u56fd', u'\u590d\u5174']
>>> print str(ee)
[u'\u4e2d\u56fd', u'\u590d\u5174']
>>> printee
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
NameError: name 'printee' is not defined
>>> print ee
[u'\u4e2d\u56fd', u'\u590d\u5174']
>>> print json.dumps(ee).decode('unicode_escape')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
NameError: name 'json' is not defined
>>> import json
>>> print json.dumps(ee).decode('unicode_escape')
["中国", "复兴"]
>>> print str(ee).decode('unicode_escape')
[u'中国', u'复兴']
>>> x = '中国'
>>> print x
中国
>>> x
'\xe4\xb8\xad\xe5\x9b\xbd'
>>> type(x)
<type 'str'>
>>> chardet.detect(x)
{'confidence': 0.75249999999999995, 'encoding': 'utf-8'}
>>> y = x.decode('utf-8')
>>> y
u'\u4e2d\u56fd'
>>> print y
中国
>>> chardet.detect(y)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.6/site-packages/chardet/__init__.py", line 25, in detect
raise ValueError('Expected a bytes object, not a unicode object')
ValueError: Expected a bytes object, not a unicode object
>>> x
'\xe4\xb8\xad\xe5\x9b\xbd'
>>> x = '\xe4\xb8\xad\xe5\x9b\xbd'
>>> print x
中国
>>> x
'\xe4\xb8\xad\xe5\x9b\xbd'
>>> x = u'\xe4\xb8\xad\xe5\x9b\xbd'
>>> print x
-
>>> x.decode('utf-8')
u'\xe4\xb8\xad\xe5\x9b\xbd'
>>> print x.decode('utf-8')
-
>>> chardet.detect(x)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.6/site-packages/chardet/__init__.py", line 25, in detect
raise ValueError('Expected a bytes object, not a unicode object')
ValueError: Expected a bytes object, not a unicode object
>>> print type(x)
<type 'unicode'>
>>> x
u'\xe4\xb8\xad\xe5\x9b\xbd'
>>> pirnt x
File "<stdin>", line 1
pirnt x
^
SyntaxError: invalid syntax
>>> print x
-
>>> print x.encode('raw_unicode_escape')
中国
>>> y = x.encode('raw_unicode_escape')
>>> y
'\xe4\xb8\xad\xe5\x9b\xbd'
>>> type y
File "<stdin>", line 1
type y
^
SyntaxError: invalid syntax
>>> type(y)
<type 'str'>
>>> print y
中国
>>> chardet.detect(y)
{'confidence': 0.75249999999999995, 'encoding': 'utf-8'}
>>> z = y.encode('utf-8')
>>> print z
中国
>>> z
'\xe4\xb8\xad\xe5\x9b\xbd'
>>> y
'\xe4\xb8\xad\xe5\x9b\xbd'
>>> type(z)
<type 'str'>
>>> type(y)
<type 'str'>
>>> chardet.detect(y)
{'confidence': 0.75249999999999995, 'encoding': 'utf-8'}
>>> y
'\xe4\xb8\xad\xe5\x9b\xbd'
>>> z = y.encode('utf-8')
>>> z = y.decode('utf-8')
>>> z
u'\u4e2d\u56fd'
>>> print z
中国
>>> type(z)
<type 'unicode'>
>>> a
u'\xe4\xb8\xad\xe5\x9b\xbd'
>>> f='\u53eb\u6211'
>>> print f
\u53eb\u6211
>>> f
'\\u53eb\\u6211'
>>> type(f)
<type 'str'>
>>> chardet.detect(f)
{'confidence': 1.0, 'encoding': 'ascii'}
>>> f.decode('ascii')
u'\\u53eb\\u6211'
>>> print f.decode('ascii')
\u53eb\u6211
>>> f.decode('unicode_escape')
u'\u53eb\u6211'
>>> print f.decode('unicode_escape')
叫我
>>> sys.getdefaultencoding()
'utf-8'
>>> dd = { 'name': u'功夫熊猫' }
>>> print dd
{'name': u'\u529f\u592b\u718a\u732b'}
>>> dd
{'name': u'\u529f\u592b\u718a\u732b'}
>>> dd2 = { 'name': '功夫熊猫' }
>>> dd2
{'name': '\xe5\x8a\x9f\xe5\xa4\xab\xe7\x86\x8a\xe7\x8c\xab'}
>>> print simplejson.dumps(dd, ensure_ascii=False)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
NameError: name 'simplejson' is not defined
>>> print json.dumps(dd, ensure_ascii=False)
{"name": "功夫熊猫"}
>>> print json.dumps(dd2, ensure_ascii=False)
{"name": "功夫熊猫"}
>>> print dd2
{'name': '\xe5\x8a\x9f\xe5\xa4\xab\xe7\x86\x8a\xe7\x8c\xab'}
>>>
免责声明:
① 本站未注明“稿件来源”的信息均来自网络整理。其文字、图片和音视频稿件的所属权归原作者所有。本站收集整理出于非商业性的教育和科研之目的,并不意味着本站赞同其观点或证实其内容的真实性。仅作为临时的测试数据,供内部测试之用。本站并未授权任何人以任何方式主动获取本站任何信息。
② 本站未注明“稿件来源”的临时测试数据将在测试完成后最终做删除处理。有问题或投稿请发送至: 邮箱/279061341@qq.com QQ/279061341