mirror of
https://github.com/dupenf/stock-lstm.git
synced 2024-11-25 16:22:36 +08:00
v1.0
This commit is contained in:
parent
91c1780e07
commit
1d4a445d4c
0
__init__.py
Normal file
0
__init__.py
Normal file
51
d0_download.py
Normal file
51
d0_download.py
Normal file
@ -0,0 +1,51 @@
|
||||
import baostock as bs
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def download_code_hist(
|
||||
save_path="./datasets",
|
||||
code="sh.600000",
|
||||
start_date="2018-09-01",
|
||||
end_date="2024-06-30",
|
||||
freq="d",
|
||||
adjustflag="2"
|
||||
):
|
||||
lg = bs.login()
|
||||
#### 获取沪深A股历史K线数据 ####
|
||||
# 详细指标参数,参见“历史行情指标参数”章节;“分钟线”参数与“日线”参数不同。“分钟线”不包含指数。
|
||||
# 分钟线指标:date,time,code,open,high,low,close,volume,amount,adjustflag
|
||||
# 周月线指标:date,code,open,high,low,close,volume,amount,adjustflag,turn,pctChg
|
||||
fields = "date,time,code,open,high,low,close,volume,adjustflag",
|
||||
if freq == "d":
|
||||
fields = "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,isST"
|
||||
rs = bs.query_history_k_data_plus(
|
||||
code,
|
||||
# "date,time,code,open,high,low,close,volume,adjustflag",
|
||||
# "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,isST",
|
||||
fields=fields,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
frequency=freq,
|
||||
adjustflag=adjustflag, # hfq
|
||||
)
|
||||
|
||||
#### 打印结果集 ####
|
||||
data_list = []
|
||||
while (rs.error_code == "0") & rs.next():
|
||||
# 获取一条记录,将记录合并在一起
|
||||
data_list.append(rs.get_row_data())
|
||||
result = pd.DataFrame(data_list, columns=rs.fields)
|
||||
|
||||
# print(result)
|
||||
|
||||
#### 结果集输出到csv文件 ####
|
||||
filename = save_path + "/" + code + ".csv"
|
||||
result.to_csv(filename, index=True)
|
||||
# print(result)
|
||||
|
||||
print(result.head())
|
||||
|
||||
bs.logout()
|
||||
|
||||
|
||||
download_code_hist()
|
45
d1_showCand.py
Normal file
45
d1_showCand.py
Normal file
@ -0,0 +1,45 @@
|
||||
# import requirement libraries and tools
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import torch
|
||||
import torch.optim as optim
|
||||
import yfinance as yf
|
||||
import torch.nn as nn
|
||||
import torch.functional as F
|
||||
import plotly.graph_objects as go
|
||||
|
||||
from tqdm.notebook import tqdm
|
||||
from torchsummary import summary
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
from torch.utils.data import TensorDataset, DataLoader
|
||||
|
||||
|
||||
df = pd.read_csv("./datasets/sh.600000.csv")
|
||||
|
||||
|
||||
# Create a trace for the candlestick chart
|
||||
candlestick_trace = go.Candlestick(
|
||||
x=df.index,
|
||||
open=df['open'],
|
||||
high=df['high'],
|
||||
low=df['low'],
|
||||
close=df['close'],
|
||||
name='Candlestick'
|
||||
)
|
||||
|
||||
# Create the layout
|
||||
layout = go.Layout(
|
||||
title='GOOG Candlestick Chart',
|
||||
xaxis=dict(title='date'),
|
||||
yaxis=dict(title='price', rangemode='normal')
|
||||
)
|
||||
|
||||
# Create the figure and add the candlestick trace and layout
|
||||
fig = go.Figure(data=[candlestick_trace], layout=layout)
|
||||
|
||||
# Update the layout of the figure
|
||||
fig.update_layout(xaxis_rangeslider_visible=False)
|
||||
|
||||
# Show the figure
|
||||
fig.show()
|
67
d2_viewer.py
Normal file
67
d2_viewer.py
Normal file
@ -0,0 +1,67 @@
|
||||
# import requirement libraries and tools
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import torch
|
||||
import torch.optim as optim
|
||||
import yfinance as yf
|
||||
import torch.nn as nn
|
||||
import torch.functional as F
|
||||
import plotly.graph_objects as go
|
||||
|
||||
from tqdm.notebook import tqdm
|
||||
from torchsummary import summary
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
from torch.utils.data import TensorDataset, DataLoader
|
||||
|
||||
|
||||
df = pd.read_csv("./datasets/sh.600000.csv")
|
||||
# Move column 'Close' to the first position
|
||||
col_close = df.pop('close')
|
||||
df.insert(0, 'close', col_close)
|
||||
df.head()
|
||||
df.tail()
|
||||
|
||||
df.shape
|
||||
df.info()
|
||||
|
||||
df.describe().T
|
||||
df.duplicated().sum()
|
||||
|
||||
df.plot(subplots=True, figsize=(15, 15))
|
||||
plt.suptitle('stock attributes from 2016 to 2023', y=0.91)
|
||||
plt.show()
|
||||
|
||||
df.asfreq('w', method='ffill').plot(subplots=True, figsize=(15,15), style='-')
|
||||
plt.suptitle('Stock attributes over time(Weekly frequency)', y=0.91)
|
||||
plt.show()
|
||||
|
||||
|
||||
df.asfreq('m', method='ffill').plot(subplots=True, figsize=(15,15), style='-')
|
||||
plt.suptitle('Stock attributes over time(Monthly frequency)', y=0.91)
|
||||
plt.show()
|
||||
|
||||
df[['close']]
|
||||
|
||||
|
||||
|
||||
# computing moving average(ma)
|
||||
ma_day = [10, 20, 50]
|
||||
|
||||
for ma in ma_day:
|
||||
col_name = f'MA for {ma} days'
|
||||
df[col_name] = df['close'].rolling(ma).mean()
|
||||
|
||||
df[['close', 'MA for 10 days', 'MA for 20 days', 'MA for 50 days']].plot(figsize=(15,5))
|
||||
plt.title('Comparision some MA and Close of Google stock')
|
||||
plt.show()
|
||||
|
||||
|
||||
|
||||
# use pct_change to find the percent change for each day
|
||||
df['Daily_Return'] = df['close'].pct_change()
|
||||
# plot the daily return percentage
|
||||
df.Daily_Return.plot(legend=True, figsize=(15,5))
|
||||
plt.title('Daily return percentage of stock')
|
||||
plt.show()
|
||||
|
72
d3_prepareddata.py
Normal file
72
d3_prepareddata.py
Normal file
@ -0,0 +1,72 @@
|
||||
# import requirement libraries and tools
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import torch
|
||||
import torch.optim as optim
|
||||
import yfinance as yf
|
||||
import torch.nn as nn
|
||||
import torch.functional as F
|
||||
import plotly.graph_objects as go
|
||||
|
||||
from tqdm.notebook import tqdm
|
||||
from torchsummary import summary
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
from torch.utils.data import TensorDataset, DataLoader
|
||||
|
||||
|
||||
|
||||
def get_datasets(batch_size=32, shuffle=False):
|
||||
df = pd.read_csv("./datasets/sh.600000.csv")
|
||||
|
||||
# normalize data
|
||||
df2 = df.copy(deep=True)
|
||||
scaler = MinMaxScaler(feature_range=(0,15)).fit(df2.low.values.reshape(-1,1))
|
||||
df2['open'] = scaler.transform(df2.open.values.reshape(-1,1))
|
||||
df2['high'] = scaler.transform(df2.high.values.reshape(-1,1))
|
||||
df2['low'] = scaler.transform(df2.low.values.reshape(-1,1))
|
||||
df2['close'] = scaler.transform(df2.close.values.reshape(-1,1))
|
||||
df2.to_csv("./datasets/features.csv")
|
||||
data = df2[['open','high','low', 'close']].values
|
||||
|
||||
# divide the entire dataset into three parts. 80% for the training set, 10% for the validation set and the remaining 10% for the test set:
|
||||
seq_len=11 # 11 day
|
||||
sequences=[]
|
||||
for index in range(len(data) - seq_len + 1):
|
||||
sequences.append(data[index: index + seq_len])
|
||||
sequences= np.array(sequences)
|
||||
|
||||
valid_set_size_percentage = 10
|
||||
test_set_size_percentage = 10
|
||||
|
||||
valid_set_size = int(np.round(valid_set_size_percentage/100*sequences.shape[0]))
|
||||
test_set_size = int(np.round(test_set_size_percentage/100*sequences.shape[0]))
|
||||
train_set_size = sequences.shape[0] - (valid_set_size + test_set_size)
|
||||
|
||||
x_train = sequences[:train_set_size,:-1,:]
|
||||
y_train = sequences[:train_set_size,-1,:]
|
||||
|
||||
x_valid = sequences[train_set_size:train_set_size+valid_set_size,:-1,:]
|
||||
y_valid = sequences[train_set_size:train_set_size+valid_set_size,-1,:]
|
||||
|
||||
# 剩下的都是test set
|
||||
x_test = sequences[train_set_size+valid_set_size:,:-1,:]
|
||||
y_test = sequences[train_set_size+valid_set_size:,-1,:]
|
||||
|
||||
x_train = torch.tensor(x_train).float()
|
||||
y_train = torch.tensor(y_train).float()
|
||||
|
||||
x_valid = torch.tensor(x_valid).float()
|
||||
y_valid = torch.tensor(y_valid).float()
|
||||
|
||||
train_dataset = TensorDataset(x_train,y_train)
|
||||
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle)
|
||||
|
||||
valid_dataset = TensorDataset(x_valid,y_valid)
|
||||
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=shuffle)
|
||||
|
||||
|
||||
return train_dataloader, valid_dataloader, sequences, scaler
|
||||
|
||||
|
||||
# get_datasets()
|
1411
datasets/features.csv
Normal file
1411
datasets/features.csv
Normal file
File diff suppressed because it is too large
Load Diff
1411
datasets/sh.600000.csv
Normal file
1411
datasets/sh.600000.csv
Normal file
File diff suppressed because it is too large
Load Diff
19
m1_model.py
Normal file
19
m1_model.py
Normal file
@ -0,0 +1,19 @@
|
||||
from torch import nn
|
||||
from torchsummary import summary
|
||||
|
||||
|
||||
class NeuralNetwork(nn.Module):
|
||||
def __init__(self, num_feature):
|
||||
super(NeuralNetwork, self).__init__()
|
||||
self.lstm = nn.LSTM(num_feature,64,batch_first=True)
|
||||
self.fc = nn.Linear(64,num_feature)
|
||||
|
||||
def forward(self, x):
|
||||
output, (hidden, cell) = self.lstm(x)
|
||||
x = self.fc(hidden)
|
||||
return x
|
||||
|
||||
|
||||
|
||||
|
||||
|
70
m2_test.py
Normal file
70
m2_test.py
Normal file
@ -0,0 +1,70 @@
|
||||
|
||||
# # import requirement libraries and tools
|
||||
# import numpy as np
|
||||
# import pandas as pd
|
||||
# import matplotlib.pyplot as plt
|
||||
# import torch
|
||||
# import torch.optim as optim
|
||||
# import yfinance as yf
|
||||
# import torch.nn as nn
|
||||
# import torch.functional as F
|
||||
# import plotly.graph_objects as go
|
||||
|
||||
# from tqdm.notebook import tqdm
|
||||
|
||||
# from sklearn.preprocessing import MinMaxScaler
|
||||
# from torch.utils.data import TensorDataset, DataLoader
|
||||
|
||||
|
||||
|
||||
# # def test():
|
||||
# # model=torch.load('saved_weights.pt')
|
||||
# # x_test= torch.tensor(x_test).float()
|
||||
# # with torch.no_grad():
|
||||
# # y_test_pred = model(x_test)
|
||||
# # y_test_pred = y_test_pred.numpy()[0]
|
||||
|
||||
# # idx=0
|
||||
# # plt.plot(np.arange(y_train.shape[0], y_train.shape[0]+y_test.shape[0]),
|
||||
# # y_test[:,idx], color='black', label='test target')
|
||||
|
||||
# # plt.plot(np.arange(y_train.shape[0], y_train.shape[0]+y_test_pred.shape[0]),
|
||||
# # y_test_pred[:,idx], color='green', label='test prediction')
|
||||
|
||||
# # plt.title('future stock prices')
|
||||
# # plt.xlabel('time [days]')
|
||||
# # plt.ylabel('normalized price')
|
||||
# # plt.legend(loc='best')
|
||||
|
||||
|
||||
# # index_values = df[len(df) - len(y_test):].index
|
||||
# # col_values = ['Open', 'Low', 'High', 'Close']
|
||||
# # df_results = pd.DataFrame(data=y_test_pred, index=index_values, columns=col_values)
|
||||
|
||||
|
||||
|
||||
# # # Create a trace for the candlestick chart
|
||||
# # candlestick_trace = go.Candlestick(
|
||||
# # x=df_results.index,
|
||||
# # open=df_results['Open'],
|
||||
# # high=df_results['High'],
|
||||
# # low=df_results['Low'],
|
||||
# # close=df_results['Close'],
|
||||
# # name='Candlestick'
|
||||
# # )
|
||||
|
||||
# # # Create the layout
|
||||
# # layout = go.Layout(
|
||||
# # title='GOOG Candlestick Chart',
|
||||
# # xaxis=dict(title='Date'),
|
||||
# # yaxis=dict(title='Price', rangemode='normal')
|
||||
# # )
|
||||
|
||||
# # # Create the figure and add the candlestick trace and layout
|
||||
# # fig = go.Figure(data=[candlestick_trace], layout=layout)
|
||||
|
||||
# # # Update the layout of the figure
|
||||
# # fig.update_layout(xaxis_rangeslider_visible=False)
|
||||
|
||||
# # # Show the figure
|
||||
# # fig.show()
|
71
m3_train.py
Normal file
71
m3_train.py
Normal file
@ -0,0 +1,71 @@
|
||||
# import requirement libraries and tools
|
||||
import torch
|
||||
import torch.optim as optim
|
||||
import torch.nn as nn
|
||||
import torch.functional as F
|
||||
from m1_model import NeuralNetwork
|
||||
from torchsummary import summary
|
||||
from d3_prepareddata import get_datasets
|
||||
|
||||
|
||||
|
||||
def train(dataloader, model,optimizer,mse):
|
||||
epoch_loss = 0
|
||||
model.train()
|
||||
|
||||
for batch in dataloader:
|
||||
optimizer.zero_grad()
|
||||
x,y= batch
|
||||
x = x.to("cuda")
|
||||
y = y.to("cuda")
|
||||
|
||||
pred = model(x)
|
||||
|
||||
loss = mse(pred[0],y)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
epoch_loss += loss.item()
|
||||
|
||||
return epoch_loss
|
||||
|
||||
|
||||
def evaluate(dataloader,model,mse):
|
||||
epoch_loss = 0
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
for batch in dataloader:
|
||||
x,y= batch
|
||||
x = x.to("cuda")
|
||||
y = y.to("cuda")
|
||||
pred = model(x)
|
||||
loss = mse(pred[0],y)
|
||||
epoch_loss += loss.item()
|
||||
|
||||
return epoch_loss / len(dataloader)
|
||||
|
||||
|
||||
|
||||
def main():
|
||||
m = NeuralNetwork(4).to("cuda")
|
||||
# summary(m, (4, ))
|
||||
optimizer = optim.Adam(m.parameters())
|
||||
mse = nn.MSELoss()
|
||||
|
||||
n_epochs = 50
|
||||
best_valid_loss = float('inf')
|
||||
train_dataloader, valid_dataloader, _, _= get_datasets()
|
||||
for epoch in range(1, n_epochs + 1):
|
||||
train_loss = train(train_dataloader,m,mse=mse,optimizer=optimizer)
|
||||
valid_loss = evaluate(valid_dataloader,m,mse=mse)
|
||||
print("train_loss>",train_loss)
|
||||
print("valid_loss>",valid_loss)
|
||||
#save the best model
|
||||
if valid_loss < best_valid_loss:
|
||||
best_valid_loss = valid_loss
|
||||
torch.save(m, 'saved_weights.pt')
|
||||
# print("Epoch ",epoch+1)
|
||||
print(f'\tTrain Loss: {train_loss:.5f} | ' + f'\tVal Loss: {valid_loss:.5f}\n')
|
||||
|
||||
|
||||
main()
|
46
m5_predict.py
Normal file
46
m5_predict.py
Normal file
@ -0,0 +1,46 @@
|
||||
|
||||
|
||||
# import requirement libraries and tools
|
||||
import torch
|
||||
import torch.optim as optim
|
||||
import torch.nn as nn
|
||||
import torch.functional as F
|
||||
from m1_model import NeuralNetwork
|
||||
from torchsummary import summary
|
||||
from d3_prepareddata import get_datasets
|
||||
import pandas as pd
|
||||
|
||||
def predict():
|
||||
|
||||
model=torch.load('saved_weights.pt').to("cuda")
|
||||
_, _, sequences,scaler = get_datasets()
|
||||
# Get the last sequence of historical data as features for predicting the next 10 days
|
||||
last_sequence = sequences[-1:, 1:, :]
|
||||
print(last_sequence)
|
||||
last_sequence = torch.from_numpy(last_sequence).float()
|
||||
|
||||
|
||||
# Generate predictions for the next 10 days
|
||||
PRED_DAYS = 10
|
||||
with torch.no_grad():
|
||||
for i in range(PRED_DAYS):
|
||||
last_sequence = last_sequence.to("cuda")
|
||||
pred_i = model(last_sequence)
|
||||
last_sequence = torch.cat((last_sequence, pred_i), dim=1)
|
||||
last_sequence = last_sequence[:, 1:, :]
|
||||
|
||||
|
||||
last_sequence = last_sequence.to("cpu")
|
||||
pred_days = last_sequence.reshape(PRED_DAYS, 4).numpy()
|
||||
|
||||
# inverse transform the predicted values
|
||||
pred_days = scaler.inverse_transform(pred_days)
|
||||
|
||||
df_pred = pd.DataFrame(
|
||||
data=pred_days,
|
||||
columns=['open', 'high', 'low', 'close']
|
||||
)
|
||||
|
||||
print(df_pred)
|
||||
|
||||
predict()
|
BIN
saved_weights.pt
Normal file
BIN
saved_weights.pt
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user