"LSTM:주가예측"의 두 판 사이의 차이
둘러보기로 가기
검색하러 가기
(새 문서: == 개요 == [https://github.com/huseinzol05/Stock-Prediction-Models HUSEIN ZOLKEPLI의 github 자료]가 유명하여 이를 기반으로 설명한다. 그리고 이를 토대로 개...) |
(차이 없음)
|
2021년 10월 29일 (금) 22:09 판
1 개요
HUSEIN ZOLKEPLI의 github 자료가 유명하여 이를 기반으로 설명한다. 그리고 이를 토대로 개량한 것을 링크를 보고 정리하였다.
현재일을 포함한 50일 거래일의 데이터를 바탕으로 이후의 값을 예측. 일반적으로 OHLCV를 넣어준다.
2 사용
과정 | 설명 | 코드 |
---|---|---|
import | 필요한 라이브러리를 부른다. | import pandas as pd
import numpy as np
import FinanceDataReader as fdr # 주가데이터를 알려주는 라이브러리.
form sklearn import preprocessing # 사이킷런의 데이터 전처리 기능을 이용한다.
|
데이터준비 | 데이터를 불러와 50개씩 나누어 교육데이터를 준비한다. | def call_dataset(ticker = '005930', stt = '2015-01-01', end = '2021-03-30', history_points = 50):
data = fdr.DataReader(ticker, stt, end)
data = data.iloc[:,0:-1]
print('data: ', data.shape)
data = data.values # 값만 갖고온다
data_normalizer = preprocessing.MinMaxScaler() # 데이터를 0~1 범위로 점철되게 하는 함수 call
data_normalized = data_normalizer.fit_transform(data) # 데이터를 0~1 범위로 점철되게 함수 수행
print('data_normalized: ', data_normalized.shape)
# using the last {history_points} open close high low volume data points, predict the next open value
ohlcv_histories_normalized = np.array([data_normalized[i:i + history_points].copy() for i in range(len(data_normalized) - history_points)]) # ohlcv를 가지고 오되, 관찰일수 만큼 누적해서 쌓는다. (열방향으로)
print('ohlcv_histories_normalized: ', ohlcv_histories_normalized.shape)
next_day_open_values_normalized = np.array([data_normalized[:, 0][i + history_points].copy() for i in range(len(data_normalized) - history_points)])
next_day_open_values_normalized = np.expand_dims(next_day_open_values_normalized, -1) # 1XN 벡터 -> NX1 벡터로
next_day_open_values = np.array([data[:, 0][i + history_points].copy() for i in range(len(data) - history_points)])
next_day_open_values = np.expand_dims(next_day_open_values, -1) # 1XN 벡터 -> NX1 벡터로
y_normalizer = preprocessing.MinMaxScaler()
y_normalizer.fit(next_day_open_values)
# 인풋 X : 그 이전의 OHLCV (from T = -50 to T = -1)
# 아웃풋 y : 예측하고자 하는 주가 T = 0
def calc_ema(values, time_period):
'''기술지표도 계산하지만.. 따로 사용하진 않는다.'''
# https://www.investopedia.com/ask/answers/122314/what-exponential-moving-average-ema-formula-and-how-ema-calculated.asp
sma = np.mean(values[:, 3])
ema_values = [sma]
k = 2 / (1 + time_period)
for i in range(len(his) - time_period, len(his)):
close = his[i][3]
ema_values.append(close * k + ema_values[-1] * (1 - k))
return ema_values[-1]
technical_indicators = []
for his in ohlcv_histories_normalized:
# note since we are using his[3] we are taking the SMA of the closing price
# print('his: ', his)
# print('his[:, 3]: ', his[:, 3])
sma = np.mean(his[:, 3]) # 각 데이터포인트별 Close Price 평균
macd = calc_ema(his, 12) - calc_ema(his, 26) # 12일 EMA - 26일 EMA
technical_indicators.append(np.array([sma]))
# technical_indicators.append(np.array([sma,macd,]))
technical_indicators = np.array(technical_indicators)
tech_ind_scaler = preprocessing.MinMaxScaler()
technical_indicators_normalized = tech_ind_scaler.fit_transform(technical_indicators)
technical_indicators = np.array(technical_indicators)
tech_ind_scaler = preprocessing.MinMaxScaler()
technical_indicators_normalized = tech_ind_scaler.fit_transform(technical_indicators)
assert ohlcv_histories_normalized.shape[0] == next_day_open_values_normalized.shape[0] == technical_indicators_normalized.shape[0]
print('ohlcv_histories_normalized.shape[0]: ', ohlcv_histories_normalized.shape[0])
return ohlcv_histories_normalized, technical_indicators_normalized, next_day_open_values_normalized, next_day_open_values, y_normalizer
|
초기설정 및 케라스 불러오기 | history_points = 50 # 몇 개의 히스토리로 진행할 것인지.
ticker = '000660' # 예시 티커. sk hynix
import keras
from keras.models import Model
from keras.layers import Dense, Dropout, LSTM, Input, Activation
from keras import optimizers
import numpy as np
np.random.seed(4)
# from tensorflow import set_random_seed
import tensorflow
print(tensorflow.__version__)
tensorflow.random.set_seed(44)
# from util import csv_to_dataset, history_points
# dataset
ohlcv_histories, _, next_day_open_values, unscaled_y, y_normaliser = call_dataset(ticker=ticker)
train_ratio = 0.7
n = int(ohlcv_histories.shape[0] * train_ratio)
ohlcv_train = ohlcv_histories[-n:-1]
y_train = next_day_open_values[-n:-1]
ohlcv_test = ohlcv_histories[:ohlcv_histories.shape[0]-n]
y_test = next_day_open_values[:ohlcv_histories.shape[0]-n]
unscaled_y_test = unscaled_y[:ohlcv_histories.shape[0]-n]
print('ohlcv_train.shape: ', ohlcv_train.shape)
print('ohlcv_test.shape: ',ohlcv_test.shape)
| |
모델작성 | # model architecture
lstm_input = Input(shape=(history_points, 5), name='lstm_input')
x = LSTM(50, name='lstm_0')(lstm_input)
x = Dropout(0.2, name='lstm_dropout_0')(x)
x = Dense(64, name='dense_0')(x)
x = Activation('sigmoid', name='sigmoid_0')(x)
x = Dense(1, name='dense_1')(x)
output = Activation('linear', name='linear_output')(x)
model = Model(inputs=lstm_input, outputs=output)
adam = optimizers.Adam(lr=0.0005)
model.compile(optimizer=adam, loss='mse')
model.fit(x=ohlcv_train, y=y_train, batch_size=32, epochs=50, shuffle=True, validation_split=0.1)
# evaluation
y_test_predicted = model.predict(ohlcv_test)
y_test_predicted = y_normaliser.inverse_transform(y_test_predicted)
y_predicted = model.predict(ohlcv_histories)
y_predicted = y_normaliser.inverse_transform(y_predicted)
assert unscaled_y_test.shape == y_test_predicted.shape
real_mse = np.mean(np.square(unscaled_y_test - y_test_predicted))
scaled_mse = real_mse / (np.max(unscaled_y_test) - np.min(unscaled_y_test)) * 100
print(scaled_mse)
from datetime import datetime
model.save(f'basic_model.h5')
| |
검증 | import matplotlib.pyplot as plt
plt.gcf().set_size_inches(22, 15, forward=True)
start = 0
end = -1
# real = plt.plot(unscaled_y_test[start:end], label='real')
# pred = plt.plot(y_test_predicted[start:end], label='predicted')
real = plt.plot(unscaled_y[start:end], label='real')
pred = plt.plot(y_predicted[start:end], label='predicted')
plt.legend(['Real', 'Predicted'])
plt.title('SK Hynix Using LSTM by TGG')
plt.show()
|