Add files via upload

This commit is contained in:
杜鹏飞 2024-08-05 14:15:06 +08:00 committed by GitHub
parent c9dfdf2f9d
commit 8d81cd6b4e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 3302 additions and 0 deletions

0
__init__.py Normal file
View File

8
a0_config.py Normal file
View File

@ -0,0 +1,8 @@
import torch
USE_CUDA = torch.cuda.is_available()
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
input_window = 7 # number of input time steps
output_window = 1 # number of prediction steps (equals to one)
batch_size = 100

56
d0_download.py Normal file
View File

@ -0,0 +1,56 @@
import baostock as bs
# http://baostock.com/baostock/index.php/%E9%A6%96%E9%A1%B5
# import akshare as ak
# https://akshare.akfamily.xyz/introduction.html
import pandas as pd
def download_code_hist(
save_path="./datasets",
code="sh.600000",
start_date="2018-09-01",
end_date="2024-06-30",
freq="d",
adjustflag="2"
):
lg = bs.login()
#### 获取沪深A股历史K线数据 ####
# 详细指标参数,参见“历史行情指标参数”章节;“分钟线”参数与“日线”参数不同。“分钟线”不包含指数。
# 分钟线指标date,time,code,open,high,low,close,volume,amount,adjustflag
# 周月线指标date,code,open,high,low,close,volume,amount,adjustflag,turn,pctChg
fields = "date,time,code,open,high,low,close,volume,adjustflag",
if freq == "d":
fields = "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,isST"
rs = bs.query_history_k_data_plus(
code,
# "date,time,code,open,high,low,close,volume,adjustflag",
# "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,isST",
fields=fields,
start_date=start_date,
end_date=end_date,
frequency=freq,
adjustflag=adjustflag, # hfq
)
#### 打印结果集 ####
data_list = []
while (rs.error_code == "0") & rs.next():
# 获取一条记录,将记录合并在一起
data_list.append(rs.get_row_data())
result = pd.DataFrame(data_list, columns=rs.fields)
# print(result)
#### 结果集输出到csv文件 ####
filename = save_path + "/" + code + ".csv"
result.to_csv(filename, index=True)
# print(result)
print(result.head())
bs.logout()
download_code_hist()

44
d1_features.py Normal file
View File

@ -0,0 +1,44 @@
# Reference: https://github.com/ctxj/Time-Series-Transformer-Pytorch/tree/main
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import copy
import math
import time
import matplotlib.pyplot as plt
from torchinfo import summary
from torch.utils.data import Dataset, DataLoader
from torch.nn import TransformerEncoder, TransformerEncoderLayer
def log_features():
df = pd.read_csv("./datasets/sh.600000.csv")
close = df['close']
log_prices = np.diff(np.log(close))
log_prices_csum = log_prices.cumsum() # Cumulative sum of log prices
print(log_prices_csum)
print("------------")
print(log_prices)
return log_prices
# draw
fig1, ax1 = plt.subplots(2, 1)
ax1[0].plot(close, color='red')
ax1[0].set_title('Closed Price')
ax1[0].set_xlabel('Time Steps')
ax1[1].plot(log_prices_csum, color='blue')
ax1[1].set_title('CSUM of Log Price')
ax1[1].set_xlabel('Time Steps')
fig1.tight_layout()
log_features()

56
d2_datasets.py Normal file
View File

@ -0,0 +1,56 @@
# Reference: https://github.com/ctxj/Time-Series-Transformer-Pytorch/tree/main
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import copy
import math
import time
import matplotlib.pyplot as plt
# from torchinfo import summary
from torchsummary import summary
from a0_config import device, output_window,input_window,batch_size,USE_CUDA
def create_inout_sequences(input_data, input_window):
inout_seq = []
L = len(input_data)
for i in range(L - input_window):
train_seq = input_data[i:i + input_window]
train_label = input_data[i + output_window: i + input_window + output_window]
inout_seq.append((train_seq, train_label))
return torch.FloatTensor(np.array(inout_seq))
def get_data(data_raw, split):
split = round(split * len(data_raw))
train_data = data_raw[:split]
test_data = data_raw[split:]
train_data = train_data.cumsum()
train_data = 2 * train_data # Training data scaling
test_data = test_data.cumsum()
train_sequence = create_inout_sequences(train_data, input_window)
train_sequence = train_sequence[:-output_window]
test_sequence = create_inout_sequences(test_data, input_window)
test_sequence = test_sequence[:-output_window]
return train_sequence.to(device), test_sequence.to(device)
def get_batch(source, i, batch_size):
seq_len = min(batch_size, len(source) - 1 - i)
data = source[i:i+seq_len]
data_in = torch.stack(torch.stack([item[0] for item in data]).chunk(input_window, 1))
target = torch.stack(torch.stack([item[1] for item in data]).chunk(input_window, 1))
return data_in, target

1411
datasets/features.csv Normal file

File diff suppressed because it is too large Load Diff

1411
datasets/sh.600000.csv Normal file

File diff suppressed because it is too large Load Diff

37
m0_position.py Normal file
View File

@ -0,0 +1,37 @@
# Reference: https://github.com/ctxj/Time-Series-Transformer-Pytorch/tree/main
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import copy
import math
import time
import matplotlib.pyplot as plt
from torchinfo import summary
from torch.utils.data import Dataset, DataLoader
from torch.nn import TransformerEncoder, TransformerEncoderLayer
class PositionalEncoding(nn.Module):
def __init__(self, d_model, dropout_p=0.1, max_len=5000):
super().__init__()
self.dropout = nn.Dropout(dropout_p)
pe = torch.zeros(max_len, d_model)
position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0).transpose(0, 1)
self.register_buffer('pe', pe)
def forward(self, x):
return self.dropout(x + self.pe[:x.size(0), :])

58
m1_transformer.py Normal file
View File

@ -0,0 +1,58 @@
# Reference: https://github.com/ctxj/Time-Series-Transformer-Pytorch/tree/main
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import copy
import math
import time
import matplotlib.pyplot as plt
from torchinfo import summary
from torch.utils.data import Dataset, DataLoader
from torch.nn import TransformerEncoder, TransformerEncoderLayer
from m0_position import PositionalEncoding
class Transformer(nn.Module):
def __init__(self, feature_size=200, num_layers=2, dropout=0.1):
# feautre_size equals to embedding dimension (d_model)
super().__init__()
self.model_type = 'Transformer'
self.src_mask = None
self.pos_encoder = PositionalEncoding(feature_size)
# Apply nhead multi-head attention
# d_key, d_query, d_value = d_model // n_head
self.encoder_layer = TransformerEncoderLayer(d_model=feature_size, nhead=10, dropout=dropout)
# Use num_layers encoders
self.transformer_encoder = TransformerEncoder(self.encoder_layer, num_layers=num_layers)
# For simple time-series prediction, decoder just uses FC layer
self.decoder = nn.Linear(feature_size, 1)
self._init_weights()
def _init_weights(self):
init_range = 0.1
self.decoder.bias.data.zero_()
self.decoder.weight.data.uniform_(-init_range, init_range)
def forward(self, src):
if self.src_mask is None or self.src_mask.size(0) != len(src):
device = src.device
mask = self._generate_square_subsequent_mask(len(src)).to(device)
self.src_mask = mask
src = self.pos_encoder(src)
output = self.transformer_encoder(src, self.src_mask)
output = self.decoder(output)
return output
def _generate_square_subsequent_mask(self, size):
mask = torch.tril(torch.ones(size, size) == 1) # Lower Triangular matrix
mask = mask.float()
mask = mask.masked_fill(mask == 0, float('-inf')) # Convert zeros to -inf
mask = mask.masked_fill(mask == 1, float(0.0)) # Convert ones to 0
return mask

74
m3_train.py Normal file
View File

@ -0,0 +1,74 @@
# Reference: https://github.com/ctxj/Time-Series-Transformer-Pytorch/tree/main
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import copy
import math
import time
import matplotlib.pyplot as plt
from d2_datasets import get_batch
# def train(model,train_data, optimizer,scheduler, batch_size,):
# model.train() # Turn on the evaluation mode
# total_loss = 0.
# start_time = time.time()
# for batch, i in enumerate(range(0, len(train_data) - 1, batch_size)):
# data, targets = get_batch(train_data, i, batch_size)
# optimizer.zero_grad()
# output = model(data)
# loss = criterion(output, targets)
# loss.backward()
# nn.utils.clip_grad_norm_(model.parameters(), 0.7)
# optimizer.step()
# total_loss = total_loss + loss.item()
# log_interval = int(len(train_data) / batch_size / 5)
# if batch % log_interval == 0 and batch > 0:
# cur_loss = total_loss / log_interval
# elapsed = time.time() - start_time
# print('| epoch {:3d} | {:5d}/{:5d} batches | '
# 'lr {:02.10f} | {:5.2f} ms | '
# 'loss {:5.7f}'.format(
# epoch, batch, len(train_data) // batch_size, scheduler.get_lr()[0],
# elapsed * 1000 / log_interval,
# cur_loss))
# total_loss = 0
# start_time = time.time()
def evaluate(model, data_source,criterion):
model.eval() # Turn on the evaluation mode
total_loss = 0.
eval_batch_size = 1000
with torch.no_grad():
for i in range(0, len(data_source) - 1, eval_batch_size):
data, targets = get_batch(data_source, i, eval_batch_size)
output = model(data)
total_loss = total_loss + len(data[0]) * criterion(output, targets).cpu().item()
return total_loss / len(data_source)
def predict(model, sequences):
start_timer = time.time()
model.eval()
predicted_seq = torch.Tensor(0)
real_seq = torch.Tensor(0)
with torch.no_grad():
for i in range(0, len(sequences) - 1):
data, target = get_batch(sequences, i, 1)
output = model(data)
predicted_seq = torch.cat((predicted_seq, output[-1].view(-1).cpu()), 0)
real_seq = torch.cat((real_seq, target[-1].view(-1).cpu()), 0)
timed = time.time() - start_timer
print(f"{timed} sec")
return predicted_seq, real_seq

90
m4_training.py Normal file
View File

@ -0,0 +1,90 @@
# Reference: https://github.com/ctxj/Time-Series-Transformer-Pytorch/tree/main
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import copy
import math
import time
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from m1_transformer import Transformer
from m3_train import evaluate
from a0_config import device, output_window, input_window, batch_size, USE_CUDA
from d2_datasets import get_batch, get_data
from d1_features import log_features
################################################################################
model = Transformer().to(device)
criterion = nn.MSELoss()
lr = 0.00005
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)
################################################################################
log_prices = log_features()
train_data, test_data = get_data(log_prices, 0.9)
################################################################################
N_EPOCHS = 150
for epoch in range(1, N_EPOCHS + 1):
epoch_start_time = time.time()
model.train() # Turn on the evaluation mode
total_loss = 0.0
start_time = time.time()
for batch, i in enumerate(range(0, len(train_data) - 1, batch_size)):
data, targets = get_batch(train_data, i, batch_size)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, targets)
loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), 0.7)
optimizer.step()
total_loss = total_loss + loss.item()
log_interval = int(len(train_data) / batch_size / 5)
if batch % log_interval == 0 and batch > 0:
cur_loss = total_loss / log_interval
elapsed = time.time() - start_time
print(
"| epoch {:3d} | {:5d}/{:5d} batches | "
"lr {:02.10f} | {:5.2f} ms | "
"loss {:5.7f}".format(
epoch,
batch,
len(train_data) // batch_size,
scheduler.get_last_lr()[0],
elapsed * 1000 / log_interval,
cur_loss,
)
)
total_loss = 0
start_time = time.time()
if epoch % N_EPOCHS == 0: # Valid model after last training epoch
val_loss = evaluate(model, test_data, criterion=criterion)
print("-" * 80)
print(
"| end of epoch {:3d} | time: {:5.2f}s | valid loss: {:5.7f}".format(
epoch, (time.time() - epoch_start_time), val_loss
)
)
print("-" * 80)
else:
print("-" * 80)
print(
"| end of epoch {:3d} | time: {:5.2f}s".format(
epoch, (time.time() - epoch_start_time)
)
)
print("-" * 80)
scheduler.step()
torch.save(model, "saved_weights.pt")

32
m5_predict.py Normal file
View File

@ -0,0 +1,32 @@
# Reference: https://github.com/ctxj/Time-Series-Transformer-Pytorch/tree/main
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import copy
import math
import time
import matplotlib.pyplot as plt
from m3_train import predict
from d2_datasets import get_batch, get_data
from d1_features import log_features
model = torch.load("saved_weights.pt")
log_prices = log_features()
train_data, test_data = get_data(log_prices, 0.9)
predicted_seq, real_seq = predict(model, test_data)
fig2, ax2 = plt.subplots(1, 1)
ax2.plot(predicted_seq, color='red', alpha=0.7)
ax2.plot(real_seq, color='blue', linewidth=0.7)
ax2.legend(['Actual', 'Forecast'])
ax2.set_xlabel('Time Steps')
ax2.set_ylabel('Log Prices')
fig2.tight_layout()

22
readme.txt Normal file
View File

@ -0,0 +1,22 @@
步骤1 下载数据
步骤2 针对数据做特征处理
步骤3 构建 dataloader为训练做准备
步骤4 构建模型这里使用transformer
步骤5 训练
步骤6 测试或者评估predictevaluate这个在测试集合上。
操作:
python m4_training.py

3
requirements.txt Normal file
View File

@ -0,0 +1,3 @@
# install some requirement libraries
torch-summary
yfinance

BIN
saved_weights.pt Normal file

Binary file not shown.