0
我目前正在使用tensorflow根据其“宽”教程实现逻辑回归器:https://www.tensorflow.org/tutorials/wide
我的代码与教程非常匹配,但是,当我在模型,它每次都会猜测负面的类,这是大约77%的数据。我怎样才能让我的模型冒险进行积极的猜测?我不是正规化的,所以方差应该是最大值。该文档夸耀了84%的准确性,我使用完全相同的数据集。可能会出现什么问题?这里是训练码:Tensorflow LinearClassifier()总是猜测否定类
def train_logistic_model(training_path, response, predictors, num_labels):
# Get csv
df_train = pd.read_csv(training_path, header=0)
# Sanitize column names
unsanitized_column_names = df_train.columns.values
column_names = []
for col in unsanitized_column_names:
column_names.append(re.sub('[^A-Za-z0-9]+', '', col))
# Update dataframe with sanitized column names
df_train = pd.read_csv(training_path, names=column_names, skiprows=1)
# Slice off %10 of training data to test with
df_test = df_train.loc[(len(df_train.index) * .9):]
df_train = df_train.loc[:(len(df_train.index) * .9)]
response_name = column_names[response]
LABEL_COLUMN = "label"
df_train[LABEL_COLUMN] = (df_train[response_name].apply(lambda x: ">50K" in x)).astype(int)
df_test[LABEL_COLUMN] = (df_test[response_name].apply(lambda x: ">50K" in x)).astype(int)
del df_train[response_name]
del df_test[response_name]
# remove NaN elements
df_train = df_train.dropna(how='any', axis=0)
df_test = df_test.dropna(how='any', axis=0)
CATEGORICAL_COLUMNS = []
CONTINUOUS_COLUMNS = []
for key, value in predictors.items():
if value == 'Categorical':
CATEGORICAL_COLUMNS.append(column_names[key])
elif value == 'Continuous':
CONTINUOUS_COLUMNS.append(column_names[key])
# Input bulder function
def input_fn(df):
continuous_cols = {k: tf.constant(df[k].values) for k in CONTINUOUS_COLUMNS}
categorical_cols = {
k: tf.SparseTensor(
indices=[[i, 0] for i in range(df[k].size)],
values=df[k].values,
dense_shape=[df[k].size, 1]) for k in CATEGORICAL_COLUMNS
}
# Merges the two dictionaries into one.
feature_cols = {**continuous_cols, **categorical_cols}
label = tf.constant(df[LABEL_COLUMN].values)
return feature_cols, label
def train_input_fn():
return input_fn(df_train)
def eval_input_fn_test():
return input_fn(df_test)
cat_tensors = []
for col in CATEGORICAL_COLUMNS:
cat_tensors.append(tf.contrib.layers.sparse_column_with_hash_bucket(
column_name=col, hash_bucket_size=100))
cont_tensors = []
for cont in CONTINUOUS_COLUMNS:
cont_tensors.append(tf.contrib.layers.real_valued_column(cont))
feature_columns = cat_tensors + cont_tensors
model_dir = tempfile.mkdtemp()
logistic_model = tf.contrib.learn.LinearClassifier(feature_columns=feature_columns, n_classes=num_labels, model_dir=model_dir)
logistic_model.fit(input_fn=train_input_fn, steps=200)
# Test the model on reserve data
eval_result_test = logistic_model.evaluate(input_fn=eval_input_fn_test, steps=1)
# Test the model on training data
eval_result_train = logistic_model.evaluate(input_fn=eval_input_fn_train, steps=1)
for key in sorted(eval_result_train):
print("%s: %s" % (key, eval_result_train[key]))
return eval_result_test, model_dir