This commit is contained in:
dupenf 2024-08-04 19:05:34 +08:00 committed by GitHub
parent 91c1780e07
commit 1d4a445d4c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 3263 additions and 0 deletions

0
__init__.py Normal file
View File

51
d0_download.py Normal file
View File

@ -0,0 +1,51 @@
import baostock as bs
import pandas as pd
def download_code_hist(
save_path="./datasets",
code="sh.600000",
start_date="2018-09-01",
end_date="2024-06-30",
freq="d",
adjustflag="2"
):
lg = bs.login()
#### 获取沪深A股历史K线数据 ####
# 详细指标参数,参见“历史行情指标参数”章节;“分钟线”参数与“日线”参数不同。“分钟线”不包含指数。
# 分钟线指标date,time,code,open,high,low,close,volume,amount,adjustflag
# 周月线指标date,code,open,high,low,close,volume,amount,adjustflag,turn,pctChg
fields = "date,time,code,open,high,low,close,volume,adjustflag",
if freq == "d":
fields = "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,isST"
rs = bs.query_history_k_data_plus(
code,
# "date,time,code,open,high,low,close,volume,adjustflag",
# "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,isST",
fields=fields,
start_date=start_date,
end_date=end_date,
frequency=freq,
adjustflag=adjustflag, # hfq
)
#### 打印结果集 ####
data_list = []
while (rs.error_code == "0") & rs.next():
# 获取一条记录,将记录合并在一起
data_list.append(rs.get_row_data())
result = pd.DataFrame(data_list, columns=rs.fields)
# print(result)
#### 结果集输出到csv文件 ####
filename = save_path + "/" + code + ".csv"
result.to_csv(filename, index=True)
# print(result)
print(result.head())
bs.logout()
download_code_hist()

45
d1_showCand.py Normal file
View File

@ -0,0 +1,45 @@
# import requirement libraries and tools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.optim as optim
import yfinance as yf
import torch.nn as nn
import torch.functional as F
import plotly.graph_objects as go
from tqdm.notebook import tqdm
from torchsummary import summary
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import TensorDataset, DataLoader
df = pd.read_csv("./datasets/sh.600000.csv")
# Create a trace for the candlestick chart
candlestick_trace = go.Candlestick(
x=df.index,
open=df['open'],
high=df['high'],
low=df['low'],
close=df['close'],
name='Candlestick'
)
# Create the layout
layout = go.Layout(
title='GOOG Candlestick Chart',
xaxis=dict(title='date'),
yaxis=dict(title='price', rangemode='normal')
)
# Create the figure and add the candlestick trace and layout
fig = go.Figure(data=[candlestick_trace], layout=layout)
# Update the layout of the figure
fig.update_layout(xaxis_rangeslider_visible=False)
# Show the figure
fig.show()

67
d2_viewer.py Normal file
View File

@ -0,0 +1,67 @@
# import requirement libraries and tools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.optim as optim
import yfinance as yf
import torch.nn as nn
import torch.functional as F
import plotly.graph_objects as go
from tqdm.notebook import tqdm
from torchsummary import summary
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import TensorDataset, DataLoader
df = pd.read_csv("./datasets/sh.600000.csv")
# Move column 'Close' to the first position
col_close = df.pop('close')
df.insert(0, 'close', col_close)
df.head()
df.tail()
df.shape
df.info()
df.describe().T
df.duplicated().sum()
df.plot(subplots=True, figsize=(15, 15))
plt.suptitle('stock attributes from 2016 to 2023', y=0.91)
plt.show()
df.asfreq('w', method='ffill').plot(subplots=True, figsize=(15,15), style='-')
plt.suptitle('Stock attributes over time(Weekly frequency)', y=0.91)
plt.show()
df.asfreq('m', method='ffill').plot(subplots=True, figsize=(15,15), style='-')
plt.suptitle('Stock attributes over time(Monthly frequency)', y=0.91)
plt.show()
df[['close']]
# computing moving average(ma)
ma_day = [10, 20, 50]
for ma in ma_day:
col_name = f'MA for {ma} days'
df[col_name] = df['close'].rolling(ma).mean()
df[['close', 'MA for 10 days', 'MA for 20 days', 'MA for 50 days']].plot(figsize=(15,5))
plt.title('Comparision some MA and Close of Google stock')
plt.show()
# use pct_change to find the percent change for each day
df['Daily_Return'] = df['close'].pct_change()
# plot the daily return percentage
df.Daily_Return.plot(legend=True, figsize=(15,5))
plt.title('Daily return percentage of stock')
plt.show()

72
d3_prepareddata.py Normal file
View File

@ -0,0 +1,72 @@
# import requirement libraries and tools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.optim as optim
import yfinance as yf
import torch.nn as nn
import torch.functional as F
import plotly.graph_objects as go
from tqdm.notebook import tqdm
from torchsummary import summary
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import TensorDataset, DataLoader
def get_datasets(batch_size=32, shuffle=False):
df = pd.read_csv("./datasets/sh.600000.csv")
# normalize data
df2 = df.copy(deep=True)
scaler = MinMaxScaler(feature_range=(0,15)).fit(df2.low.values.reshape(-1,1))
df2['open'] = scaler.transform(df2.open.values.reshape(-1,1))
df2['high'] = scaler.transform(df2.high.values.reshape(-1,1))
df2['low'] = scaler.transform(df2.low.values.reshape(-1,1))
df2['close'] = scaler.transform(df2.close.values.reshape(-1,1))
df2.to_csv("./datasets/features.csv")
data = df2[['open','high','low', 'close']].values
# divide the entire dataset into three parts. 80% for the training set, 10% for the validation set and the remaining 10% for the test set:
seq_len=11 # 11 day
sequences=[]
for index in range(len(data) - seq_len + 1):
sequences.append(data[index: index + seq_len])
sequences= np.array(sequences)
valid_set_size_percentage = 10
test_set_size_percentage = 10
valid_set_size = int(np.round(valid_set_size_percentage/100*sequences.shape[0]))
test_set_size = int(np.round(test_set_size_percentage/100*sequences.shape[0]))
train_set_size = sequences.shape[0] - (valid_set_size + test_set_size)
x_train = sequences[:train_set_size,:-1,:]
y_train = sequences[:train_set_size,-1,:]
x_valid = sequences[train_set_size:train_set_size+valid_set_size,:-1,:]
y_valid = sequences[train_set_size:train_set_size+valid_set_size,-1,:]
# 剩下的都是test set
x_test = sequences[train_set_size+valid_set_size:,:-1,:]
y_test = sequences[train_set_size+valid_set_size:,-1,:]
x_train = torch.tensor(x_train).float()
y_train = torch.tensor(y_train).float()
x_valid = torch.tensor(x_valid).float()
y_valid = torch.tensor(y_valid).float()
train_dataset = TensorDataset(x_train,y_train)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle)
valid_dataset = TensorDataset(x_valid,y_valid)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=shuffle)
return train_dataloader, valid_dataloader, sequences, scaler
# get_datasets()

1411
datasets/features.csv Normal file

File diff suppressed because it is too large Load Diff

1411
datasets/sh.600000.csv Normal file

File diff suppressed because it is too large Load Diff

19
m1_model.py Normal file
View File

@ -0,0 +1,19 @@
from torch import nn
from torchsummary import summary
class NeuralNetwork(nn.Module):
def __init__(self, num_feature):
super(NeuralNetwork, self).__init__()
self.lstm = nn.LSTM(num_feature,64,batch_first=True)
self.fc = nn.Linear(64,num_feature)
def forward(self, x):
output, (hidden, cell) = self.lstm(x)
x = self.fc(hidden)
return x

70
m2_test.py Normal file
View File

@ -0,0 +1,70 @@
# # import requirement libraries and tools
# import numpy as np
# import pandas as pd
# import matplotlib.pyplot as plt
# import torch
# import torch.optim as optim
# import yfinance as yf
# import torch.nn as nn
# import torch.functional as F
# import plotly.graph_objects as go
# from tqdm.notebook import tqdm
# from sklearn.preprocessing import MinMaxScaler
# from torch.utils.data import TensorDataset, DataLoader
# # def test():
# # model=torch.load('saved_weights.pt')
# # x_test= torch.tensor(x_test).float()
# # with torch.no_grad():
# # y_test_pred = model(x_test)
# # y_test_pred = y_test_pred.numpy()[0]
# # idx=0
# # plt.plot(np.arange(y_train.shape[0], y_train.shape[0]+y_test.shape[0]),
# # y_test[:,idx], color='black', label='test target')
# # plt.plot(np.arange(y_train.shape[0], y_train.shape[0]+y_test_pred.shape[0]),
# # y_test_pred[:,idx], color='green', label='test prediction')
# # plt.title('future stock prices')
# # plt.xlabel('time [days]')
# # plt.ylabel('normalized price')
# # plt.legend(loc='best')
# # index_values = df[len(df) - len(y_test):].index
# # col_values = ['Open', 'Low', 'High', 'Close']
# # df_results = pd.DataFrame(data=y_test_pred, index=index_values, columns=col_values)
# # # Create a trace for the candlestick chart
# # candlestick_trace = go.Candlestick(
# # x=df_results.index,
# # open=df_results['Open'],
# # high=df_results['High'],
# # low=df_results['Low'],
# # close=df_results['Close'],
# # name='Candlestick'
# # )
# # # Create the layout
# # layout = go.Layout(
# # title='GOOG Candlestick Chart',
# # xaxis=dict(title='Date'),
# # yaxis=dict(title='Price', rangemode='normal')
# # )
# # # Create the figure and add the candlestick trace and layout
# # fig = go.Figure(data=[candlestick_trace], layout=layout)
# # # Update the layout of the figure
# # fig.update_layout(xaxis_rangeslider_visible=False)
# # # Show the figure
# # fig.show()

71
m3_train.py Normal file
View File

@ -0,0 +1,71 @@
# import requirement libraries and tools
import torch
import torch.optim as optim
import torch.nn as nn
import torch.functional as F
from m1_model import NeuralNetwork
from torchsummary import summary
from d3_prepareddata import get_datasets
def train(dataloader, model,optimizer,mse):
epoch_loss = 0
model.train()
for batch in dataloader:
optimizer.zero_grad()
x,y= batch
x = x.to("cuda")
y = y.to("cuda")
pred = model(x)
loss = mse(pred[0],y)
loss.backward()
optimizer.step()
epoch_loss += loss.item()
return epoch_loss
def evaluate(dataloader,model,mse):
epoch_loss = 0
model.eval()
with torch.no_grad():
for batch in dataloader:
x,y= batch
x = x.to("cuda")
y = y.to("cuda")
pred = model(x)
loss = mse(pred[0],y)
epoch_loss += loss.item()
return epoch_loss / len(dataloader)
def main():
m = NeuralNetwork(4).to("cuda")
# summary(m, (4, ))
optimizer = optim.Adam(m.parameters())
mse = nn.MSELoss()
n_epochs = 50
best_valid_loss = float('inf')
train_dataloader, valid_dataloader, _, _= get_datasets()
for epoch in range(1, n_epochs + 1):
train_loss = train(train_dataloader,m,mse=mse,optimizer=optimizer)
valid_loss = evaluate(valid_dataloader,m,mse=mse)
print("train_loss>",train_loss)
print("valid_loss>",valid_loss)
#save the best model
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(m, 'saved_weights.pt')
# print("Epoch ",epoch+1)
print(f'\tTrain Loss: {train_loss:.5f} | ' + f'\tVal Loss: {valid_loss:.5f}\n')
main()

46
m5_predict.py Normal file
View File

@ -0,0 +1,46 @@
# import requirement libraries and tools
import torch
import torch.optim as optim
import torch.nn as nn
import torch.functional as F
from m1_model import NeuralNetwork
from torchsummary import summary
from d3_prepareddata import get_datasets
import pandas as pd
def predict():
model=torch.load('saved_weights.pt').to("cuda")
_, _, sequences,scaler = get_datasets()
# Get the last sequence of historical data as features for predicting the next 10 days
last_sequence = sequences[-1:, 1:, :]
print(last_sequence)
last_sequence = torch.from_numpy(last_sequence).float()
# Generate predictions for the next 10 days
PRED_DAYS = 10
with torch.no_grad():
for i in range(PRED_DAYS):
last_sequence = last_sequence.to("cuda")
pred_i = model(last_sequence)
last_sequence = torch.cat((last_sequence, pred_i), dim=1)
last_sequence = last_sequence[:, 1:, :]
last_sequence = last_sequence.to("cpu")
pred_days = last_sequence.reshape(PRED_DAYS, 4).numpy()
# inverse transform the predicted values
pred_days = scaler.inverse_transform(pred_days)
df_pred = pd.DataFrame(
data=pred_days,
columns=['open', 'high', 'low', 'close']
)
print(df_pred)
predict()

BIN
saved_weights.pt Normal file

Binary file not shown.