"LSTM:주가예측"의 두 판 사이의 차이

2021년 10월 29일 (금) 22:09 판

1 개요

HUSEIN ZOLKEPLI의 github 자료가 유명하여 이를 기반으로 설명한다. 그리고 이를 토대로 개량한 것을 링크를 보고 정리하였다.

현재일을 포함한 50일 거래일의 데이터를 바탕으로 이후의 값을 예측. 일반적으로 OHLCV를 넣어준다.

2 사용

과정	설명	코드
import	필요한 라이브러리를 부른다.	import pandas as pd import numpy as np import FinanceDataReader as fdr # 주가데이터를 알려주는 라이브러리. form sklearn import preprocessing # 사이킷런의 데이터 전처리 기능을 이용한다.
데이터준비	데이터를 불러와 50개씩 나누어 교육데이터를 준비한다.	def call_dataset(ticker = '005930', stt = '2015-01-01', end = '2021-03-30', history_points = 50): data = fdr.DataReader(ticker, stt, end) data = data.iloc[:,0:-1] print('data: ', data.shape) data = data.values # 값만 갖고온다 data_normalizer = preprocessing.MinMaxScaler() # 데이터를 0~1 범위로 점철되게 하는 함수 call data_normalized = data_normalizer.fit_transform(data) # 데이터를 0~1 범위로 점철되게 함수 수행 print('data_normalized: ', data_normalized.shape) # using the last {history_points} open close high low volume data points, predict the next open value ohlcv_histories_normalized = np.array([data_normalized[i:i + history_points].copy() for i in range(len(data_normalized) - history_points)]) # ohlcv를 가지고 오되, 관찰일수 만큼 누적해서 쌓는다. (열방향으로) print('ohlcv_histories_normalized: ', ohlcv_histories_normalized.shape) next_day_open_values_normalized = np.array([data_normalized[:, 0][i + history_points].copy() for i in range(len(data_normalized) - history_points)]) next_day_open_values_normalized = np.expand_dims(next_day_open_values_normalized, -1) # 1XN 벡터 -> NX1 벡터로 next_day_open_values = np.array([data[:, 0][i + history_points].copy() for i in range(len(data) - history_points)]) next_day_open_values = np.expand_dims(next_day_open_values, -1) # 1XN 벡터 -> NX1 벡터로 y_normalizer = preprocessing.MinMaxScaler() y_normalizer.fit(next_day_open_values) # 인풋 X : 그 이전의 OHLCV (from T = -50 to T = -1) # 아웃풋 y : 예측하고자 하는 주가 T = 0 def calc_ema(values, time_period): '''기술지표도 계산하지만.. 따로 사용하진 않는다.''' # https://www.investopedia.com/ask/answers/122314/what-exponential-moving-average-ema-formula-and-how-ema-calculated.asp sma = np.mean(values[:, 3]) ema_values = [sma] k = 2 / (1 + time_period) for i in range(len(his) - time_period, len(his)): close = his[i][3] ema_values.append(close * k + ema_values[-1] * (1 - k)) return ema_values[-1] technical_indicators = [] for his in ohlcv_histories_normalized: # note since we are using his[3] we are taking the SMA of the closing price # print('his: ', his) # print('his[:, 3]: ', his[:, 3]) sma = np.mean(his[:, 3]) # 각 데이터포인트별 Close Price 평균 macd = calc_ema(his, 12) - calc_ema(his, 26) # 12일 EMA - 26일 EMA technical_indicators.append(np.array([sma])) # technical_indicators.append(np.array([sma,macd,])) technical_indicators = np.array(technical_indicators) tech_ind_scaler = preprocessing.MinMaxScaler() technical_indicators_normalized = tech_ind_scaler.fit_transform(technical_indicators) technical_indicators = np.array(technical_indicators) tech_ind_scaler = preprocessing.MinMaxScaler() technical_indicators_normalized = tech_ind_scaler.fit_transform(technical_indicators) assert ohlcv_histories_normalized.shape[0] == next_day_open_values_normalized.shape[0] == technical_indicators_normalized.shape[0] print('ohlcv_histories_normalized.shape[0]: ', ohlcv_histories_normalized.shape[0]) return ohlcv_histories_normalized, technical_indicators_normalized, next_day_open_values_normalized, next_day_open_values, y_normalizer
초기설정 및 케라스 불러오기		history_points = 50 # 몇 개의 히스토리로 진행할 것인지. ticker = '000660' # 예시 티커. sk hynix import keras from keras.models import Model from keras.layers import Dense, Dropout, LSTM, Input, Activation from keras import optimizers import numpy as np np.random.seed(4) # from tensorflow import set_random_seed import tensorflow print(tensorflow.__version__) tensorflow.random.set_seed(44) # from util import csv_to_dataset, history_points # dataset ohlcv_histories, _, next_day_open_values, unscaled_y, y_normaliser = call_dataset(ticker=ticker) train_ratio = 0.7 n = int(ohlcv_histories.shape[0] * train_ratio) ohlcv_train = ohlcv_histories[-n:-1] y_train = next_day_open_values[-n:-1] ohlcv_test = ohlcv_histories[:ohlcv_histories.shape[0]-n] y_test = next_day_open_values[:ohlcv_histories.shape[0]-n] unscaled_y_test = unscaled_y[:ohlcv_histories.shape[0]-n] print('ohlcv_train.shape: ', ohlcv_train.shape) print('ohlcv_test.shape: ',ohlcv_test.shape)
모델작성		# model architecture lstm_input = Input(shape=(history_points, 5), name='lstm_input') x = LSTM(50, name='lstm_0')(lstm_input) x = Dropout(0.2, name='lstm_dropout_0')(x) x = Dense(64, name='dense_0')(x) x = Activation('sigmoid', name='sigmoid_0')(x) x = Dense(1, name='dense_1')(x) output = Activation('linear', name='linear_output')(x) model = Model(inputs=lstm_input, outputs=output) adam = optimizers.Adam(lr=0.0005) model.compile(optimizer=adam, loss='mse') model.fit(x=ohlcv_train, y=y_train, batch_size=32, epochs=50, shuffle=True, validation_split=0.1) # evaluation y_test_predicted = model.predict(ohlcv_test) y_test_predicted = y_normaliser.inverse_transform(y_test_predicted) y_predicted = model.predict(ohlcv_histories) y_predicted = y_normaliser.inverse_transform(y_predicted) assert unscaled_y_test.shape == y_test_predicted.shape real_mse = np.mean(np.square(unscaled_y_test - y_test_predicted)) scaled_mse = real_mse / (np.max(unscaled_y_test) - np.min(unscaled_y_test)) * 100 print(scaled_mse) from datetime import datetime model.save(f'basic_model.h5')
검증		import matplotlib.pyplot as plt plt.gcf().set_size_inches(22, 15, forward=True) start = 0 end = -1 # real = plt.plot(unscaled_y_test[start:end], label='real') # pred = plt.plot(y_test_predicted[start:end], label='predicted') real = plt.plot(unscaled_y[start:end], label='real') pred = plt.plot(y_predicted[start:end], label='predicted') plt.legend(['Real', 'Predicted']) plt.title('SK Hynix Using LSTM by TGG') plt.show()

"LSTM:주가예측"의 두 판 사이의 차이

2021년 10월 29일 (금) 22:09 판

1 개요

2 사용

둘러보기 메뉴

검색