0
我有以下python2.7代码,其中预测值测试(阿拉伯字),但结果在数给出没有字符串如下:打印utf-8的numpy数组?
['\xd8\xa7\xd9\x84\xd9\x85\xd8\xa7\xd9\x84'
'\xd8\xa7\xd9\x84\xd9\x85\xd8\xa7\xd9\x84']
我的Python代码:
# -*- coding: utf-8 -*-
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.ensemble import ExtraTreesClassifier
class MeanEmbeddingVectorizer(object):
def __init__(self, word2vec):
self.word2vec = word2vec
# this line is different from python2 version - no more itervalues
self.dim = len(list(word2vec.values())[0])
def fit(self, X, y):
return self
def transform(self, X):
return np.array([
np.mean([self.word2vec[w] for w in words if w in self.word2vec]
or [np.zeros(self.dim)], axis=0)
for words in X
])
w2v = {
'من': [1, 1],
'العراق': [1.01, 1.01],
'مصر': [1.02, 1.02],
'مال': [-1, -1],
'حرف جر': [-1.01, -1.01],
'السودان': [-1.02, -1.02],
'فلوس': [1, -1],
'دولة': [1.01, -1.01],
'مصاري': [1.02, -1.02]
}
model = Pipeline([
("word2vec vectorizer", MeanEmbeddingVectorizer(w2v)),
("extra trees", ExtraTreesClassifier(n_estimators=200))])
X = [['في'],
['عقود']]
y = ['حرف جر', 'المال']
model.fit(X, y)
# never before seen words!!!
test_X = [['من'], ['فلوس']]
print(model.predict(test_X))
所以我如何打印阿拉伯文字!
我试图检查结果阵起诉:
arr = model.predict(test_X)
print(np.info(arr))
,并得到了以下结果:
class: ndarray
shape: (2,)
strides: (11,)
itemsize: 11
aligned: True
contiguous: True
fortran: True
data pointer: 0x1189760
byteorder: little
byteswap: False
type: |S11
None