BTCFXの1日分の約定データ(1分足データ)をもとに、取引方法を学習させてみたいと思います。Open AI GymのEnvというのを自作することで、学習環境を自作することができます。
Open AI Gymが用意してくれているEnvは、env = gym.make('CartPole-v0')
といった形で簡単に利用できますが、これと同じように自作のEnvも簡単に読み込めるようにすることができます。下記のようなディレクトリ構造にします。
envのディレクトリ構造
bf ├── __init__.py └── env.py
init.py
init.pyは下記のようにします。
from gym.envs.registration import register register( id='bf-v0', entry_point='bf.env:BfEnv' )
env.py
env.pyに、自作環境を書きます。約定データのjsonファイルを読み込み、学習環境の状態や、1ステップ毎にあげる報酬の設定などをします。
import math import gym import numpy as np import gym.spaces import json from random import randint class BfEnv(gym.Env): def __init__(self): super(BfEnv, self).__init__() self.action_space = gym.spaces.Discrete(3) self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(300, 5), dtype=np.float32) self.executions = self._readJsonFile() self._reset() def _readJsonFile(self): with open('executions_0213.json') as f: return np.array(json.load(f)) def _resetIdx(self): max = self.executions.shape[0] - 300 + 1 # 300は5分のこと。5分分回して評価する min = 300 # 5分前のデータをobservationとして使う self.idx = randint(min, max) self.startIdx = self.idx self.endIdx = self.idx + 300 - 1 def _printResetState(self): print('startIdx: ', self.startIdx, 'endIdx: ', self.endIdx) def _printState(self, reward): print('nowIdx: ', self.idx, 'reward: ', reward, 'done: ', self.done) def _nextIdx(self): self.idx += 5 def _reset(self): self._resetIdx() self.done = False self.positions = [] self.orders = [] self._observe() # self._printResetState() self.steps = 0 return self.observation def _observe(self): start = self.idx - 300 executions = self.executions[start:self.idx] observation = [exe[0:5] for exe in executions] self.observation = np.array(observation) def _step(self, action): if action == 0: # hold pass elif action == 1: # buy self.orders.append([self.idx, 'BUY', self.executions[self.idx][5]]) elif action == 2: # sell self.orders.append([self.idx, 'SELL', self.executions[self.idx][5]]) self._nextIdx() self._checkOrderTimeLimit() reward = self._checkPosition() self.done = self.idx >= self.endIdx self._observe() # self._printState(reward) return self.observation, reward, self.done, {} def _checkOrderTimeLimit(self): self.orders = [order for order in self.orders if self.idx < order[0] + 59] def _checkPosition(self): executed = [] noExecuted = [] for order in self.orders: for exe in self.executions[self.idx - 5: self.idx]: if exe[6] <= order[2] <= exe[7]: executed.append(order) break noExecuted.append(order) self.orders = noExecuted reward = 0 for exe in executed: isPosi = len(self.positions) > 0 if isPosi: side = self.positions[0][1] if exe[1] == side: self.positions.append(exe) #ポジ持ちすぎチェック if len(self.positions) > 10: reward += self._lossCut() else: #ポジ解除処理 reward += self._posiFin(exe) else: self.positions.append(exe) return reward def _posiFin(self, exe): posi = self.positions.pop(0) side = posi[1] price = posi[2] nowPrice = exe[2] if (side == 'BUY'): return nowPrice - price else: return price - nowPrice def _lossCut(self): totalPrice = 0 for posi in self.positions: totalPrice += posi[2] avePrice = math.floor(totalPrice / len(self.positions)) side = self.positions[0][1] if (side == 'BUY'): nowPrice = self.executions[self.idx][6] reward = nowPrice - avePrice else: nowPrice = self.executions[self.idx][7] reward = avePrice - nowPrice reward -= math.floor(nowPrice * 0.05 / 100) #0.05% self.positions = [] return reward def _render(self, mode='human', close=False): pass def _close(self): pass def _seed(self, seed=None): pass
bf.py
環境を作ったら、それを読み込んで学習を実行します。学習実行コードが下記です。といっても、どっかのサイトからぱくったもののほぼそのままです。Open AI Gymのお勉強記事の多くがこのコードで、kerasの本家サイトのサンプルのようです。
import gym from keras.layers import Dense, Activation, Flatten from keras.models import Sequential from keras.optimizers import Adam from rl.agents.dqn import DQNAgent from rl.memory import SequentialMemory from rl.policy import EpsGreedyQPolicy import bf ENV_NAME = 'bf-v0' env = gym.make(ENV_NAME) nb_actions = env.action_space.n print(nb_actions) # Next, we build a very simple model. model = Sequential() model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) model.add(Dense(32)) model.add(Activation('relu')) model.add(Dense(32)) model.add(Activation('relu')) model.add(Dense(32)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=80000, window_length=1) policy = EpsGreedyQPolicy(eps=0.003) dqn = DQNAgent( model=model, nb_actions=nb_actions, gamma=0.99, memory=memory, enable_double_dqn=True, enable_dueling_network=False, nb_steps_warmup=3, target_model_update=1e-2, policy=policy ) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.fit(env, nb_steps=1000000, visualize=False, verbose=2) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. print('TEST') dqn.test(env, nb_episodes=30, visualize=False)
実行結果
取引をしないことを学びました。