mirror of
https://github.com/dupenf/stock-transformer.git
synced 2024-11-25 16:08:34 +08:00
Add files via upload
This commit is contained in:
parent
c9dfdf2f9d
commit
8d81cd6b4e
0
__init__.py
Normal file
0
__init__.py
Normal file
8
a0_config.py
Normal file
8
a0_config.py
Normal file
@ -0,0 +1,8 @@
|
||||
import torch
|
||||
|
||||
|
||||
USE_CUDA = torch.cuda.is_available()
|
||||
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
|
||||
input_window = 7 # number of input time steps
|
||||
output_window = 1 # number of prediction steps (equals to one)
|
||||
batch_size = 100
|
56
d0_download.py
Normal file
56
d0_download.py
Normal file
@ -0,0 +1,56 @@
|
||||
import baostock as bs
|
||||
# http://baostock.com/baostock/index.php/%E9%A6%96%E9%A1%B5
|
||||
|
||||
# import akshare as ak
|
||||
# https://akshare.akfamily.xyz/introduction.html
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def download_code_hist(
|
||||
save_path="./datasets",
|
||||
code="sh.600000",
|
||||
start_date="2018-09-01",
|
||||
end_date="2024-06-30",
|
||||
freq="d",
|
||||
adjustflag="2"
|
||||
):
|
||||
lg = bs.login()
|
||||
#### 获取沪深A股历史K线数据 ####
|
||||
# 详细指标参数,参见“历史行情指标参数”章节;“分钟线”参数与“日线”参数不同。“分钟线”不包含指数。
|
||||
# 分钟线指标:date,time,code,open,high,low,close,volume,amount,adjustflag
|
||||
# 周月线指标:date,code,open,high,low,close,volume,amount,adjustflag,turn,pctChg
|
||||
fields = "date,time,code,open,high,low,close,volume,adjustflag",
|
||||
if freq == "d":
|
||||
fields = "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,isST"
|
||||
rs = bs.query_history_k_data_plus(
|
||||
code,
|
||||
# "date,time,code,open,high,low,close,volume,adjustflag",
|
||||
# "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,isST",
|
||||
fields=fields,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
frequency=freq,
|
||||
adjustflag=adjustflag, # hfq
|
||||
)
|
||||
|
||||
#### 打印结果集 ####
|
||||
data_list = []
|
||||
while (rs.error_code == "0") & rs.next():
|
||||
# 获取一条记录,将记录合并在一起
|
||||
data_list.append(rs.get_row_data())
|
||||
result = pd.DataFrame(data_list, columns=rs.fields)
|
||||
|
||||
# print(result)
|
||||
|
||||
#### 结果集输出到csv文件 ####
|
||||
filename = save_path + "/" + code + ".csv"
|
||||
result.to_csv(filename, index=True)
|
||||
# print(result)
|
||||
|
||||
print(result.head())
|
||||
|
||||
bs.logout()
|
||||
|
||||
|
||||
download_code_hist()
|
44
d1_features.py
Normal file
44
d1_features.py
Normal file
@ -0,0 +1,44 @@
|
||||
# Reference: https://github.com/ctxj/Time-Series-Transformer-Pytorch/tree/main
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import copy
|
||||
import math
|
||||
import time
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from torchinfo import summary
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
from torch.nn import TransformerEncoder, TransformerEncoderLayer
|
||||
|
||||
|
||||
|
||||
|
||||
def log_features():
|
||||
|
||||
df = pd.read_csv("./datasets/sh.600000.csv")
|
||||
close = df['close']
|
||||
log_prices = np.diff(np.log(close))
|
||||
log_prices_csum = log_prices.cumsum() # Cumulative sum of log prices
|
||||
print(log_prices_csum)
|
||||
print("------------")
|
||||
print(log_prices)
|
||||
|
||||
return log_prices
|
||||
|
||||
|
||||
# draw
|
||||
fig1, ax1 = plt.subplots(2, 1)
|
||||
ax1[0].plot(close, color='red')
|
||||
ax1[0].set_title('Closed Price')
|
||||
ax1[0].set_xlabel('Time Steps')
|
||||
|
||||
ax1[1].plot(log_prices_csum, color='blue')
|
||||
ax1[1].set_title('CSUM of Log Price')
|
||||
ax1[1].set_xlabel('Time Steps')
|
||||
|
||||
fig1.tight_layout()
|
||||
|
||||
|
||||
log_features()
|
56
d2_datasets.py
Normal file
56
d2_datasets.py
Normal file
@ -0,0 +1,56 @@
|
||||
# Reference: https://github.com/ctxj/Time-Series-Transformer-Pytorch/tree/main
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import copy
|
||||
import math
|
||||
import time
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# from torchinfo import summary
|
||||
from torchsummary import summary
|
||||
|
||||
from a0_config import device, output_window,input_window,batch_size,USE_CUDA
|
||||
|
||||
|
||||
|
||||
|
||||
def create_inout_sequences(input_data, input_window):
|
||||
inout_seq = []
|
||||
L = len(input_data)
|
||||
for i in range(L - input_window):
|
||||
train_seq = input_data[i:i + input_window]
|
||||
train_label = input_data[i + output_window: i + input_window + output_window]
|
||||
inout_seq.append((train_seq, train_label))
|
||||
|
||||
return torch.FloatTensor(np.array(inout_seq))
|
||||
|
||||
|
||||
|
||||
|
||||
def get_data(data_raw, split):
|
||||
split = round(split * len(data_raw))
|
||||
train_data = data_raw[:split]
|
||||
test_data = data_raw[split:]
|
||||
|
||||
train_data = train_data.cumsum()
|
||||
train_data = 2 * train_data # Training data scaling
|
||||
|
||||
test_data = test_data.cumsum()
|
||||
|
||||
train_sequence = create_inout_sequences(train_data, input_window)
|
||||
train_sequence = train_sequence[:-output_window]
|
||||
|
||||
test_sequence = create_inout_sequences(test_data, input_window)
|
||||
test_sequence = test_sequence[:-output_window]
|
||||
|
||||
return train_sequence.to(device), test_sequence.to(device)
|
||||
|
||||
|
||||
def get_batch(source, i, batch_size):
|
||||
seq_len = min(batch_size, len(source) - 1 - i)
|
||||
data = source[i:i+seq_len]
|
||||
data_in = torch.stack(torch.stack([item[0] for item in data]).chunk(input_window, 1))
|
||||
target = torch.stack(torch.stack([item[1] for item in data]).chunk(input_window, 1))
|
||||
return data_in, target
|
1411
datasets/features.csv
Normal file
1411
datasets/features.csv
Normal file
File diff suppressed because it is too large
Load Diff
1411
datasets/sh.600000.csv
Normal file
1411
datasets/sh.600000.csv
Normal file
File diff suppressed because it is too large
Load Diff
37
m0_position.py
Normal file
37
m0_position.py
Normal file
@ -0,0 +1,37 @@
|
||||
# Reference: https://github.com/ctxj/Time-Series-Transformer-Pytorch/tree/main
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import copy
|
||||
import math
|
||||
import time
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from torchinfo import summary
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
from torch.nn import TransformerEncoder, TransformerEncoderLayer
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class PositionalEncoding(nn.Module):
|
||||
|
||||
def __init__(self, d_model, dropout_p=0.1, max_len=5000):
|
||||
super().__init__()
|
||||
self.dropout = nn.Dropout(dropout_p)
|
||||
pe = torch.zeros(max_len, d_model)
|
||||
position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
|
||||
div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
|
||||
pe[:, 0::2] = torch.sin(position * div_term)
|
||||
pe[:, 1::2] = torch.cos(position * div_term)
|
||||
pe = pe.unsqueeze(0).transpose(0, 1)
|
||||
self.register_buffer('pe', pe)
|
||||
|
||||
def forward(self, x):
|
||||
return self.dropout(x + self.pe[:x.size(0), :])
|
||||
|
58
m1_transformer.py
Normal file
58
m1_transformer.py
Normal file
@ -0,0 +1,58 @@
|
||||
# Reference: https://github.com/ctxj/Time-Series-Transformer-Pytorch/tree/main
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import copy
|
||||
import math
|
||||
import time
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from torchinfo import summary
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
from torch.nn import TransformerEncoder, TransformerEncoderLayer
|
||||
from m0_position import PositionalEncoding
|
||||
|
||||
|
||||
class Transformer(nn.Module):
|
||||
def __init__(self, feature_size=200, num_layers=2, dropout=0.1):
|
||||
# feautre_size equals to embedding dimension (d_model)
|
||||
super().__init__()
|
||||
self.model_type = 'Transformer'
|
||||
|
||||
self.src_mask = None
|
||||
self.pos_encoder = PositionalEncoding(feature_size)
|
||||
|
||||
# Apply nhead multi-head attention
|
||||
# d_key, d_query, d_value = d_model // n_head
|
||||
self.encoder_layer = TransformerEncoderLayer(d_model=feature_size, nhead=10, dropout=dropout)
|
||||
|
||||
# Use num_layers encoders
|
||||
self.transformer_encoder = TransformerEncoder(self.encoder_layer, num_layers=num_layers)
|
||||
|
||||
# For simple time-series prediction, decoder just uses FC layer
|
||||
self.decoder = nn.Linear(feature_size, 1)
|
||||
self._init_weights()
|
||||
|
||||
def _init_weights(self):
|
||||
init_range = 0.1
|
||||
self.decoder.bias.data.zero_()
|
||||
self.decoder.weight.data.uniform_(-init_range, init_range)
|
||||
|
||||
def forward(self, src):
|
||||
if self.src_mask is None or self.src_mask.size(0) != len(src):
|
||||
device = src.device
|
||||
mask = self._generate_square_subsequent_mask(len(src)).to(device)
|
||||
self.src_mask = mask
|
||||
|
||||
src = self.pos_encoder(src)
|
||||
output = self.transformer_encoder(src, self.src_mask)
|
||||
output = self.decoder(output)
|
||||
return output
|
||||
|
||||
def _generate_square_subsequent_mask(self, size):
|
||||
mask = torch.tril(torch.ones(size, size) == 1) # Lower Triangular matrix
|
||||
mask = mask.float()
|
||||
mask = mask.masked_fill(mask == 0, float('-inf')) # Convert zeros to -inf
|
||||
mask = mask.masked_fill(mask == 1, float(0.0)) # Convert ones to 0
|
||||
return mask
|
74
m3_train.py
Normal file
74
m3_train.py
Normal file
@ -0,0 +1,74 @@
|
||||
|
||||
|
||||
# Reference: https://github.com/ctxj/Time-Series-Transformer-Pytorch/tree/main
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import copy
|
||||
import math
|
||||
import time
|
||||
import matplotlib.pyplot as plt
|
||||
from d2_datasets import get_batch
|
||||
|
||||
|
||||
|
||||
# def train(model,train_data, optimizer,scheduler, batch_size,):
|
||||
# model.train() # Turn on the evaluation mode
|
||||
# total_loss = 0.
|
||||
# start_time = time.time()
|
||||
|
||||
# for batch, i in enumerate(range(0, len(train_data) - 1, batch_size)):
|
||||
# data, targets = get_batch(train_data, i, batch_size)
|
||||
# optimizer.zero_grad()
|
||||
# output = model(data)
|
||||
# loss = criterion(output, targets)
|
||||
# loss.backward()
|
||||
# nn.utils.clip_grad_norm_(model.parameters(), 0.7)
|
||||
# optimizer.step()
|
||||
|
||||
# total_loss = total_loss + loss.item()
|
||||
# log_interval = int(len(train_data) / batch_size / 5)
|
||||
# if batch % log_interval == 0 and batch > 0:
|
||||
# cur_loss = total_loss / log_interval
|
||||
# elapsed = time.time() - start_time
|
||||
# print('| epoch {:3d} | {:5d}/{:5d} batches | '
|
||||
# 'lr {:02.10f} | {:5.2f} ms | '
|
||||
# 'loss {:5.7f}'.format(
|
||||
# epoch, batch, len(train_data) // batch_size, scheduler.get_lr()[0],
|
||||
# elapsed * 1000 / log_interval,
|
||||
# cur_loss))
|
||||
# total_loss = 0
|
||||
# start_time = time.time()
|
||||
|
||||
|
||||
|
||||
def evaluate(model, data_source,criterion):
|
||||
model.eval() # Turn on the evaluation mode
|
||||
total_loss = 0.
|
||||
eval_batch_size = 1000
|
||||
with torch.no_grad():
|
||||
for i in range(0, len(data_source) - 1, eval_batch_size):
|
||||
data, targets = get_batch(data_source, i, eval_batch_size)
|
||||
output = model(data)
|
||||
total_loss = total_loss + len(data[0]) * criterion(output, targets).cpu().item()
|
||||
return total_loss / len(data_source)
|
||||
|
||||
|
||||
def predict(model, sequences):
|
||||
start_timer = time.time()
|
||||
model.eval()
|
||||
predicted_seq = torch.Tensor(0)
|
||||
real_seq = torch.Tensor(0)
|
||||
with torch.no_grad():
|
||||
for i in range(0, len(sequences) - 1):
|
||||
data, target = get_batch(sequences, i, 1)
|
||||
output = model(data)
|
||||
predicted_seq = torch.cat((predicted_seq, output[-1].view(-1).cpu()), 0)
|
||||
real_seq = torch.cat((real_seq, target[-1].view(-1).cpu()), 0)
|
||||
timed = time.time() - start_timer
|
||||
print(f"{timed} sec")
|
||||
|
||||
return predicted_seq, real_seq
|
||||
|
||||
|
90
m4_training.py
Normal file
90
m4_training.py
Normal file
@ -0,0 +1,90 @@
|
||||
# Reference: https://github.com/ctxj/Time-Series-Transformer-Pytorch/tree/main
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import copy
|
||||
import math
|
||||
import time
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
|
||||
from m1_transformer import Transformer
|
||||
from m3_train import evaluate
|
||||
|
||||
|
||||
from a0_config import device, output_window, input_window, batch_size, USE_CUDA
|
||||
from d2_datasets import get_batch, get_data
|
||||
from d1_features import log_features
|
||||
|
||||
################################################################################
|
||||
model = Transformer().to(device)
|
||||
criterion = nn.MSELoss()
|
||||
lr = 0.00005
|
||||
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
|
||||
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)
|
||||
################################################################################
|
||||
|
||||
log_prices = log_features()
|
||||
train_data, test_data = get_data(log_prices, 0.9)
|
||||
################################################################################
|
||||
|
||||
N_EPOCHS = 150
|
||||
for epoch in range(1, N_EPOCHS + 1):
|
||||
epoch_start_time = time.time()
|
||||
model.train() # Turn on the evaluation mode
|
||||
total_loss = 0.0
|
||||
start_time = time.time()
|
||||
|
||||
for batch, i in enumerate(range(0, len(train_data) - 1, batch_size)):
|
||||
data, targets = get_batch(train_data, i, batch_size)
|
||||
optimizer.zero_grad()
|
||||
output = model(data)
|
||||
loss = criterion(output, targets)
|
||||
loss.backward()
|
||||
nn.utils.clip_grad_norm_(model.parameters(), 0.7)
|
||||
optimizer.step()
|
||||
|
||||
total_loss = total_loss + loss.item()
|
||||
log_interval = int(len(train_data) / batch_size / 5)
|
||||
if batch % log_interval == 0 and batch > 0:
|
||||
cur_loss = total_loss / log_interval
|
||||
elapsed = time.time() - start_time
|
||||
print(
|
||||
"| epoch {:3d} | {:5d}/{:5d} batches | "
|
||||
"lr {:02.10f} | {:5.2f} ms | "
|
||||
"loss {:5.7f}".format(
|
||||
epoch,
|
||||
batch,
|
||||
len(train_data) // batch_size,
|
||||
scheduler.get_last_lr()[0],
|
||||
elapsed * 1000 / log_interval,
|
||||
cur_loss,
|
||||
)
|
||||
)
|
||||
total_loss = 0
|
||||
start_time = time.time()
|
||||
|
||||
if epoch % N_EPOCHS == 0: # Valid model after last training epoch
|
||||
val_loss = evaluate(model, test_data, criterion=criterion)
|
||||
print("-" * 80)
|
||||
print(
|
||||
"| end of epoch {:3d} | time: {:5.2f}s | valid loss: {:5.7f}".format(
|
||||
epoch, (time.time() - epoch_start_time), val_loss
|
||||
)
|
||||
)
|
||||
print("-" * 80)
|
||||
|
||||
else:
|
||||
print("-" * 80)
|
||||
print(
|
||||
"| end of epoch {:3d} | time: {:5.2f}s".format(
|
||||
epoch, (time.time() - epoch_start_time)
|
||||
)
|
||||
)
|
||||
print("-" * 80)
|
||||
|
||||
scheduler.step()
|
||||
|
||||
torch.save(model, "saved_weights.pt")
|
32
m5_predict.py
Normal file
32
m5_predict.py
Normal file
@ -0,0 +1,32 @@
|
||||
# Reference: https://github.com/ctxj/Time-Series-Transformer-Pytorch/tree/main
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import copy
|
||||
import math
|
||||
import time
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
from m3_train import predict
|
||||
from d2_datasets import get_batch, get_data
|
||||
from d1_features import log_features
|
||||
|
||||
|
||||
|
||||
model = torch.load("saved_weights.pt")
|
||||
log_prices = log_features()
|
||||
train_data, test_data = get_data(log_prices, 0.9)
|
||||
predicted_seq, real_seq = predict(model, test_data)
|
||||
|
||||
fig2, ax2 = plt.subplots(1, 1)
|
||||
|
||||
ax2.plot(predicted_seq, color='red', alpha=0.7)
|
||||
ax2.plot(real_seq, color='blue', linewidth=0.7)
|
||||
ax2.legend(['Actual', 'Forecast'])
|
||||
ax2.set_xlabel('Time Steps')
|
||||
ax2.set_ylabel('Log Prices')
|
||||
|
||||
fig2.tight_layout()
|
||||
|
22
readme.txt
Normal file
22
readme.txt
Normal file
@ -0,0 +1,22 @@
|
||||
|
||||
|
||||
|
||||
步骤1: 下载数据
|
||||
步骤2: 针对数据做特征处理
|
||||
步骤3: 构建 dataloader,为训练做准备
|
||||
步骤4: 构建模型,这里使用transformer
|
||||
步骤5: 训练
|
||||
步骤6: 测试或者评估(predict,evaluate),这个在测试集合上。
|
||||
|
||||
|
||||
|
||||
操作:
|
||||
|
||||
python m4_training.py
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
3
requirements.txt
Normal file
3
requirements.txt
Normal file
@ -0,0 +1,3 @@
|
||||
# install some requirement libraries
|
||||
torch-summary
|
||||
yfinance
|
BIN
saved_weights.pt
Normal file
BIN
saved_weights.pt
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user