2013-06-12 54 views
0

我使用Python 2.7,我安装了sklearn。python sgdclassifier sklearn

的data_io代码

import csv 
import json 
import os 
import pickle 
import psycopg2 

def paper_ids_to_string(ids): 
return " ".join([str(x) for x in ids]) 

conn_string = None 

def get_db_conn(): 
global conn_string 
if conn_string is None: 
    conn_string = get_paths()["postgres_conn_string"] 
if "##AskForPassword##" in conn_string: 
    password = raw_input("PostgreSQL Password: ") 
    conn_string = conn_string.replace("##AskForPassword##", password) 
conn = psycopg2.connect(conn_string) 
return conn 

def get_paths(): 
paths = json.loads(open("SETTINGS.json").read()) 
for key in paths: 
    paths[key] = os.path.expandvars(paths[key]) 
return paths 

def save_model(model): 
out_path = get_paths()["model_path"] 
pickle.dump(model, open(out_path, "w")) 

def load_model(): 
in_path = get_paths()["model_path"] 
return pickle.load(open(in_path)) 

def write_submission(predictions): 
submission_path = get_paths()["submission_path"] 
rows = [(author_id, paper_ids_to_string(predictions[author_id])) for author_id in predictions] 
writer = csv.writer(open(submission_path, "w"), lineterminator="\n") 
writer.writerow(("AuthorId", "PaperIds")) 
writer.writerows(rows) 

def get_features_db(table_name): 
conn = get_db_conn() 
query = get_features_query(table_name) 
cursor = conn.cursor() 
cursor.execute(query) 
res = cursor.fetchall() 
return res 

def get_features_query(table_name): 
query = open("feature_query.sql").read().strip() 
return query.replace("##DataTable##", table_name) 

这是火车代码

import data_io 
from sklearn.linear_model import SGDClassifier 

def main(): 
    print("Getting features for deleted papers from the database") 
    features_deleted = data_io.get_features_db("TrainDeleted") 

    print("Getting features for confirmed papers from the database") 
    features_conf = data_io.get_features_db("TrainConfirmed") 

    features = [x[2:] for x in features_deleted + features_conf] 
    target = [0 for x in range(len(features_deleted))] + [1 for x in range(len(features_conf))] 

    print("Training the Classifier") 
    classifier = SGDClassifier(alpha=0.0001, class_weight='auto', epsilon=0.1, eta0=0.0, fit_intercept=True, l1_ratio=0.15, learning_rate='optimal', loss='log', n_iter=5, n_jobs=1, penalty='l2', power_t=0.5, random_state=None, rho=None, shuffle=False, verbose=0, warm_start=False) 
    classifier.fit(features, target) 

    print("Saving the classifier") 
    data_io.save_model(classifier) 

if __name__=="__main__": 
    main() 

这个预测码

from collections import defaultdict 
import data_io 

def main(): 
print("Getting features for valid papers from the database") 
data = data_io.get_features_db("ValidPaper") 
author_paper_ids = [x[:2] for x in data] 
features = [x[2:] for x in data] 

print("Loading the classifier") 
classifier = data_io.load_model() 

print("Making predictions") 
predictions = classifier.predict_proba[:,1] 
predictions = list(predictions) 

author_predictions = defaultdict(list) 
paper_predictions = {} 

for (a_id, p_id), pred in zip(author_paper_ids, predictions): 
    author_predictions[a_id].append((pred, p_id)) 

for author_id in sorted(author_predictions): 
    paper_ids_sorted = sorted(author_predictions[author_id], reverse=True) 
    paper_predictions[author_id] = [x[1] for x in paper_ids_sorted] 

print("Writing predictions to file") 
data_io.write_submission(paper_predictions) 

if __name__=="__main__": 
    main() 

,当我尝试运行,我有以下类型错误: 预测= classifier.predict_proba [:,1] TypeError:'instan cemethod'对象不可自订

我该怎么办?

+1

请发布一个简约的复制脚本,而不是(我相信你可以在一个脚本中将这个例子修剪到10/20行)。如果您希望其他人快速提供帮助,请始终阅读并提供错误的完整追溯。通常追溯具有错误发生时涉及的所有功能的文件名和行号。将自己置于可能试图在StackOverflow上回答您的问题的人的位置。这种问题看起来像“请调试我的代码,而不是我”。 – ogrisel

回答

0

predict_proba是一种方法。您需要使用特征矩阵或向量来调用它。