2017-09-04 26 views
0

我正在使用Long Short Term Memory算法处理时间序列项目。我需要将我的预测数据可视化

我需要使用其他列作为功能预测out_meteo.csv的最后一列。在算法结束时,我无法绘制我的数据的正确值:它给我提供了不切实际的和小的值,我认为它与MinMaxScaler和inverse_transform属性有关。

这里是Python代码我用来预测我的变量不使用其他功能(正常工作)

import numpy 
import matplotlib.pyplot as plt 
from pandas import read_csv 
import math 
from keras.models import Sequential 
from keras.layers import Dense 
from keras.layers import LSTM 
from sklearn.preprocessing import MinMaxScaler 
from sklearn.metrics import mean_squared_error 

# convert an array of values into a dataset matrix 
def create_dataset(dataset, look_back=1): 
    dataX, dataY = [], [] 
    for i in range(len(dataset)-look_back-1): 
     a = dataset[i:(i+look_back), :] 
     dataX.append(a) 
     dataY.append(dataset[i + look_back, 0]) 
    return numpy.array(dataX), numpy.array(dataY) 

# fix random seed for reproducibility 
numpy.random.seed(7) 

# load the dataset 
dataframe = read_csv('out_meteo.csv', usecols=[5], engine='python', header=0) 
dataset = dataframe.values 
dataset = dataset.astype('float32') 

# normalize the dataset 
scaler = MinMaxScaler(feature_range=(0, 1)) 
dataset = scaler.fit_transform(dataset) 

# split into train and test sets 
train_size = int(len(dataset) * 0.7) 
test_size = len(dataset) - train_size 
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:] 

# reshape into X=t and Y=t+1 
look_back = 3 
trainX, trainY = create_dataset(train, look_back) 
testX, testY = create_dataset(test, look_back) 

# reshape input to be [samples, time steps, features] 
trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) 
testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1])) 

# create and fit the LSTM network 
model = Sequential() 
model.add(LSTM(4, input_shape=(1, look_back))) 
model.add(Dense(1)) 
model.compile(loss='mean_squared_error', optimizer='adam') 
model.fit(trainX, trainY, epochs=15, batch_size=15, verbose=2) 

# make predictions 
trainPredict = model.predict(trainX) 
testPredict = model.predict(testX) 

# invert predictions 
trainPredict = scaler.inverse_transform(trainPredict) 
trainY = scaler.inverse_transform([trainY]) 
testPredict = scaler.inverse_transform(testPredict) 
testY = scaler.inverse_transform([testY]) 

# calculate root mean squared error 
trainScore = math.sqrt (mean_squared_error(trainY[0], trainPredict[:,:])) 
print('Train Score: %.2f RMSE' % (trainScore)) 
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,:])) 
print('Test Score: %.2f RMSE' % (testScore)) 

# shift train predictions for plotting 
trainPredictPlot = numpy.empty_like(dataset) 
trainPredictPlot[:, :] = numpy.nan 
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict 

# shift test predictions for plotting 
testPredictPlot = numpy.empty_like(dataset) 
testPredictPlot[:, :] = numpy.nan 
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict 
# plot baseline and predictions 
plt.plot(scaler.inverse_transform(dataset)) 
plt.plot(trainPredictPlot) 
plt.plot(testPredictPlot) 
plt.show() 

我已经修改了它读取所有的功能(我所有的CSV列),所以这是最后一个给我一个错误的阴谋

import numpy 
import matplotlib.pyplot as plt 
from pandas import read_csv 
import math 
from keras.models import Sequential 
from keras.layers import Dense 
from keras.layers import LSTM 
from sklearn.preprocessing import MinMaxScaler 
from sklearn.metrics import mean_squared_error 

# convert an array of values into a dataset matrix 
def create_dataset(dataset, look_back=1): 
    dataX, dataY = [], [] 
    for i in range(len(dataset)-look_back-1): 
     a = dataset[i:(i+look_back), :] 
     dataX.append(a) 
     dataY.append(dataset[i + look_back, 0]) 
    return numpy.array(dataX), numpy.array(dataY) 

# fix random seed for reproducibility 
numpy.random.seed(7) 

# load the dataset 
dataframe = read_csv('out_meteo.csv', usecols=[5], engine='python', header=0) 
dataset = dataframe.values 
dataset = dataset.astype('float32') 

# normalize the dataset 
scaler = MinMaxScaler(feature_range=(0, 1)) 
dataset = scaler.fit_transform(dataset) 

# split into train and test sets 
train_size = int(len(dataset) * 0.7) 
test_size = len(dataset) - train_size 
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:] 

# reshape into X=t and Y=t+1 
look_back = 3 
trainX, trainY = create_dataset(train, look_back) 
testX, testY = create_dataset(test, look_back) 

# reshape input to be [samples, time steps, features] 
trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) 
testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1])) 

# create and fit the LSTM network 
model = Sequential() 
model.add(LSTM(4, input_shape=(1, look_back))) 
model.add(Dense(1)) 
model.compile(loss='mean_squared_error', optimizer='adam') 
model.fit(trainX, trainY, epochs=15, batch_size=15, verbose=2) 

# make predictions 
trainPredict = model.predict(trainX) 
testPredict = model.predict(testX) 

# invert predictions 
trainPredict = scaler.inverse_transform(trainPredict) 
trainY = scaler.inverse_transform([trainY]) 
testPredict = scaler.inverse_transform(testPredict) 
testY = scaler.inverse_transform([testY]) 

# calculate root mean squared error 
trainScore = math.sqrt (mean_squared_error(trainY[0], trainPredict[:,:])) 
print('Train Score: %.2f RMSE' % (trainScore)) 
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,:])) 
print('Test Score: %.2f RMSE' % (testScore)) 

# shift train predictions for plotting 
trainPredictPlot = numpy.empty_like(dataset) 
trainPredictPlot[:, :] = numpy.nan 
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict 

# shift test predictions for plotting 
testPredictPlot = numpy.empty_like(dataset) 
testPredictPlot[:, :] = numpy.nan 
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict 
# plot baseline and predictions 
plt.plot(scaler.inverse_transform(dataset)) 
plt.plot(trainPredictPlot) 
plt.plot(testPredictPlot) 
plt.show() 

回答

0

你不需要规模回归ANN的Y值。在回看功能后调用你的比例。

look_back = 3 
trainX, trainY = create_dataset(train, look_back) 
testX, testY = create_dataset(test, look_back) 

scaler = MinMaxScaler(feature_range=(0, 1)) 
trainX = scaler.fit_transform(trainX) 
testX = scaler.transform(testX) 

然后绘制实际VS预测

+0

感谢您的答复,我已经编辑我的问题。这个问题更关系到使用几个功能并绘制最终结果 –

+0

非常混乱。你正在尝试绘制这些特征还是仅仅测试'testY'和'testPredict'?我说的是在网络之前对数据进行缩放,然后在训练之后进行缩放 – DJK