我正在尝试使用python读取avro文件。如何在python 3.5.2中读取avro文件
我成功地安装了Apache Avro的(我觉得我做,因为我能“进口Avro的”在Python壳)这里的指令
https://avro.apache.org/docs/1.8.1/gettingstartedpython.html
但是,当我尝试阅读下面的Avro的文件以下代码在上述指令中。导入avro相关内容时,我一直收到错误信息。
>>> import avro.schema
Traceback (most recent call last):
File "<pyshell#6>", line 1, in <module>
import avro.schema
File "<frozen importlib._bootstrap>", line 969, in _find_and_load
File "<frozen importlib._bootstrap>", line 954, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 896, in _find_spec
File "<frozen importlib._bootstrap_external>", line 1139, in find_spec
File "<frozen importlib._bootstrap_external>", line 1115, in _get_spec
File "<frozen importlib._bootstrap_external>", line 1096, in _legacy_get_spec
File "<frozen importlib._bootstrap>", line 444, in spec_from_loader
File "<frozen importlib._bootstrap_external>", line 533, in spec_from_file_location
File "I:\Program Files\lib\site-packages\avro-_avro_version_-py3.5.egg\avro\schema.py", line 340
except Exception, e:
^
SyntaxError: invalid syntax
>>> from avro.datafile import DataFileReader, DataFileWriter
Traceback (most recent call last):
File "I:\Program Files\lib\site-packages\avro-_avro_version_-py3.5.egg\avro\datafile.py", line 21, in <module>
from cStringIO import StringIO
ImportError: No module named 'cStringIO'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<pyshell#7>", line 1, in <module>
from avro.datafile import DataFileReader, DataFileWriter
File "I:\Program Files\lib\site-packages\avro-_avro_version_-py3.5.egg\avro\datafile.py", line 23, in <module>
from StringIO import StringIO
ImportError: No module named 'StringIO'
>>> from avro.io import DatumReader, DatumWriter
Traceback (most recent call last):
File "<pyshell#19>", line 1, in <module>
from avro.io import DatumReader, DatumWriter
File "<frozen importlib._bootstrap>", line 969, in _find_and_load
File "<frozen importlib._bootstrap>", line 954, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 896, in _find_spec
File "<frozen importlib._bootstrap_external>", line 1139, in find_spec
File "<frozen importlib._bootstrap_external>", line 1115, in _get_spec
File "<frozen importlib._bootstrap_external>", line 1096, in _legacy_get_spec
File "<frozen importlib._bootstrap>", line 444, in spec_from_loader
File "<frozen importlib._bootstrap_external>", line 533, in spec_from_file_location
File "I:\Program Files\lib\site-packages\avro-_avro_version_-py3.5.egg\avro\io.py", line 200
bits = (((ord(self.read(1)) & 0xffL)) |
^
SyntaxError: invalid syntax
那么我是否成功安装avro?为什么我收到这些错误?我在Windows 7上使用python 3.5.2。
编辑 我在Stephane Martin的建议后解决了这个问题。然后我尝试将avro文件读入python。我在一个已经被设置为Python正确路径的目录中有一堆avros。这里是我的代码
import avro.schema
from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter
reader = DataFileReader(open("part-00000-of-01733.avro", "r"), DatumReader())
for user in reader:
print (user)
reader.close()
并返回错误
Traceback (most recent call last):
File "I:\DJ data\read avro.py", line 5, in <module>
reader = DataFileReader(open("part-00000-of-01733.avro", "r"), DatumReader())
File "I:\Program Files\lib\site-packages\avro_python3-1.8.1-py3.5.egg\avro\datafile.py", line 349, in __init__
self._read_header()
File "I:\Program Files\lib\site-packages\avro_python3-1.8.1-py3.5.egg\avro\datafile.py", line 459, in _read_header
META_SCHEMA, META_SCHEMA, self.raw_decoder)
File "I:\Program Files\lib\site-packages\avro_python3-1.8.1-py3.5.egg\avro\io.py", line 525, in read_data
return self.read_record(writer_schema, reader_schema, decoder)
File "I:\Program Files\lib\site-packages\avro_python3-1.8.1-py3.5.egg\avro\io.py", line 725, in read_record
field_val = self.read_data(field.type, readers_field.type, decoder)
File "I:\Program Files\lib\site-packages\avro_python3-1.8.1-py3.5.egg\avro\io.py", line 515, in read_data
return self.read_fixed(writer_schema, reader_schema, decoder)
File "I:\Program Files\lib\site-packages\avro_python3-1.8.1-py3.5.egg\avro\io.py", line 568, in read_fixed
return decoder.read(writer_schema.size)
File "I:\Program Files\lib\site-packages\avro_python3-1.8.1-py3.5.egg\avro\io.py", line 170, in read
input_bytes = self.reader.read(n)
File "I:\Program Files\lib\encodings\cp1252.py", line 23, in decode
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
的UnicodeDecodeError:“字符映射”编解码器不能在863位解码字节的0x90:字符映射到
我确实知道在指令的例子中,首先创建一个模式。但什么是avsc文件?我应该如何创建它以及我的情况下的相应模式?
except Exception,e => python 2 only语法。该库可能不是蟒蛇3兼容 –