v1.0

2024-11-25 16:22:36 +08:00 · 2024-08-04 19:05:34 +08:00 · 2024-08-04 19:05:34 +08:00 · 1d4a445d4c
commit 1d4a445d4c
parent 91c1780e07
12 changed files with 3263 additions and 0 deletions
--- a/init.py
+++ b/init.py
--- a/d0_download.py
+++ b/d0_download.py
@ -0,0 +1,51 @@
+import baostock as bs
+import pandas as pd
+
+
+def download_code_hist(
+    save_path="./datasets",
+    code="sh.600000",
+    start_date="2018-09-01",
+    end_date="2024-06-30",
+    freq="d",
+    adjustflag="2"
+):
+    lg = bs.login()
+    #### 获取沪深A股历史K线数据 ####
+    # 详细指标参数，参见“历史行情指标参数”章节；“分钟线”参数与“日线”参数不同。“分钟线”不包含指数。
+    # 分钟线指标：date,time,code,open,high,low,close,volume,amount,adjustflag
+    # 周月线指标：date,code,open,high,low,close,volume,amount,adjustflag,turn,pctChg
+    fields = "date,time,code,open,high,low,close,volume,adjustflag",
+    if freq == "d":
+        fields = "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,isST"
+    rs = bs.query_history_k_data_plus(
+        code,
+        # "date,time,code,open,high,low,close,volume,adjustflag",
+        # "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,isST",
+        fields=fields,
+        start_date=start_date,
+        end_date=end_date,
+        frequency=freq,
+        adjustflag=adjustflag, # hfq
+    )
+
+    #### 打印结果集 ####
+    data_list = []
+    while (rs.error_code == "0") & rs.next():
+        # 获取一条记录，将记录合并在一起
+        data_list.append(rs.get_row_data())
+    result = pd.DataFrame(data_list, columns=rs.fields)
+    
+    # print(result)
+
+    #### 结果集输出到csv文件 ####
+    filename = save_path + "/" + code + ".csv"
+    result.to_csv(filename, index=True)
+    # print(result)
+    
+    print(result.head())
+    
+    bs.logout()
+
+
+download_code_hist()
--- a/d1_showCand.py
+++ b/d1_showCand.py
@ -0,0 +1,45 @@
+# import requirement libraries and tools
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import torch
+import torch.optim as optim
+import yfinance as yf
+import torch.nn as nn
+import torch.functional as F
+import plotly.graph_objects as go
+
+from tqdm.notebook import tqdm
+from torchsummary import summary
+from sklearn.preprocessing import MinMaxScaler
+from torch.utils.data import TensorDataset, DataLoader
+
+
+df = pd.read_csv("./datasets/sh.600000.csv")
+
+
+# Create a trace for the candlestick chart
+candlestick_trace = go.Candlestick(
+    x=df.index,
+    open=df['open'],
+    high=df['high'],
+    low=df['low'],
+    close=df['close'],
+    name='Candlestick'
+)
+
+# Create the layout
+layout = go.Layout(
+    title='GOOG Candlestick Chart',
+    xaxis=dict(title='date'),
+    yaxis=dict(title='price', rangemode='normal')
+)
+
+# Create the figure and add the candlestick trace and layout
+fig = go.Figure(data=[candlestick_trace], layout=layout)
+
+# Update the layout of the figure
+fig.update_layout(xaxis_rangeslider_visible=False)
+
+# Show the figure
+fig.show()
--- a/d2_viewer.py
+++ b/d2_viewer.py
@ -0,0 +1,67 @@
+# import requirement libraries and tools
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import torch
+import torch.optim as optim
+import yfinance as yf
+import torch.nn as nn
+import torch.functional as F
+import plotly.graph_objects as go
+
+from tqdm.notebook import tqdm
+from torchsummary import summary
+from sklearn.preprocessing import MinMaxScaler
+from torch.utils.data import TensorDataset, DataLoader
+
+
+df = pd.read_csv("./datasets/sh.600000.csv")
+# Move column 'Close' to the first position
+col_close = df.pop('close')
+df.insert(0, 'close', col_close)
+df.head()
+df.tail()
+
+df.shape
+df.info()
+
+df.describe().T
+df.duplicated().sum()
+
+df.plot(subplots=True, figsize=(15, 15))
+plt.suptitle('stock attributes from 2016 to 2023', y=0.91)
+plt.show()
+
+df.asfreq('w', method='ffill').plot(subplots=True, figsize=(15,15), style='-')
+plt.suptitle('Stock attributes over time(Weekly frequency)', y=0.91)
+plt.show()
+
+
+df.asfreq('m', method='ffill').plot(subplots=True, figsize=(15,15), style='-')
+plt.suptitle('Stock attributes over time(Monthly frequency)', y=0.91)
+plt.show()
+
+df[['close']]
+
+
+
+# computing moving average(ma)
+ma_day = [10, 20, 50]
+
+for ma in ma_day:
+    col_name = f'MA for {ma} days'
+    df[col_name] = df['close'].rolling(ma).mean()
+
+df[['close', 'MA for 10 days', 'MA for 20 days', 'MA for 50 days']].plot(figsize=(15,5))
+plt.title('Comparision some MA and Close of Google stock')
+plt.show()
+
+
+
+# use pct_change to find the percent change for each day
+df['Daily_Return'] = df['close'].pct_change()
+# plot the daily return percentage
+df.Daily_Return.plot(legend=True, figsize=(15,5))
+plt.title('Daily return percentage of stock')
+plt.show()
+
--- a/d3_prepareddata.py
+++ b/d3_prepareddata.py
@ -0,0 +1,72 @@
+# import requirement libraries and tools
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import torch
+import torch.optim as optim
+import yfinance as yf
+import torch.nn as nn
+import torch.functional as F
+import plotly.graph_objects as go
+
+from tqdm.notebook import tqdm
+from torchsummary import summary
+from sklearn.preprocessing import MinMaxScaler
+from torch.utils.data import TensorDataset, DataLoader
+
+
+
+def get_datasets(batch_size=32, shuffle=False):
+    df = pd.read_csv("./datasets/sh.600000.csv")
+
+    # normalize data
+    df2 = df.copy(deep=True)
+    scaler = MinMaxScaler(feature_range=(0,15)).fit(df2.low.values.reshape(-1,1))
+    df2['open'] = scaler.transform(df2.open.values.reshape(-1,1))
+    df2['high'] = scaler.transform(df2.high.values.reshape(-1,1))
+    df2['low'] = scaler.transform(df2.low.values.reshape(-1,1))
+    df2['close'] = scaler.transform(df2.close.values.reshape(-1,1))    
+    df2.to_csv("./datasets/features.csv")
+    data = df2[['open','high','low', 'close']].values
+    
+    # divide the entire dataset into three parts. 80% for the training set, 10% for the validation set and the remaining 10% for the test set:
+    seq_len=11  # 11 day 
+    sequences=[]
+    for index in range(len(data) - seq_len + 1): 
+        sequences.append(data[index: index + seq_len])
+    sequences= np.array(sequences)
+
+    valid_set_size_percentage = 10 
+    test_set_size_percentage = 10 
+    
+    valid_set_size = int(np.round(valid_set_size_percentage/100*sequences.shape[0]))  
+    test_set_size  = int(np.round(test_set_size_percentage/100*sequences.shape[0]))
+    train_set_size = sequences.shape[0] - (valid_set_size + test_set_size)
+    
+    x_train = sequences[:train_set_size,:-1,:]
+    y_train = sequences[:train_set_size,-1,:]
+        
+    x_valid = sequences[train_set_size:train_set_size+valid_set_size,:-1,:]
+    y_valid = sequences[train_set_size:train_set_size+valid_set_size,-1,:]
+        
+    # 剩下的都是test set
+    x_test = sequences[train_set_size+valid_set_size:,:-1,:]
+    y_test = sequences[train_set_size+valid_set_size:,-1,:]
+
+    x_train = torch.tensor(x_train).float()
+    y_train = torch.tensor(y_train).float()
+
+    x_valid = torch.tensor(x_valid).float()
+    y_valid = torch.tensor(y_valid).float()
+
+    train_dataset = TensorDataset(x_train,y_train)
+    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle)
+
+    valid_dataset = TensorDataset(x_valid,y_valid)
+    valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=shuffle)
+
+
+    return train_dataloader, valid_dataloader, sequences, scaler
+
+
+# get_datasets()
--- a/datasets/features.csv
+++ b/datasets/features.csv
--- a/datasets/sh.600000.csv
+++ b/datasets/sh.600000.csv
--- a/m1_model.py
+++ b/m1_model.py
@ -0,0 +1,19 @@
+from torch import nn
+from torchsummary import summary
+
+
+class NeuralNetwork(nn.Module):
+    def __init__(self, num_feature):
+        super(NeuralNetwork, self).__init__()
+        self.lstm  = nn.LSTM(num_feature,64,batch_first=True)
+        self.fc    = nn.Linear(64,num_feature)
+        
+    def forward(self, x):
+        output, (hidden, cell) = self.lstm(x)
+        x = self.fc(hidden)
+        return x
+
+
+
+    
+    
--- a/m2_test.py
+++ b/m2_test.py
@ -0,0 +1,70 @@
+
+# # import requirement libraries and tools
+# import numpy as np
+# import pandas as pd
+# import matplotlib.pyplot as plt
+# import torch
+# import torch.optim as optim
+# import yfinance as yf
+# import torch.nn as nn
+# import torch.functional as F
+# import plotly.graph_objects as go
+
+# from tqdm.notebook import tqdm
+
+# from sklearn.preprocessing import MinMaxScaler
+# from torch.utils.data import TensorDataset, DataLoader
+
+
+
+# # def test():
+# #   model=torch.load('saved_weights.pt')
+# #   x_test= torch.tensor(x_test).float()
+# #   with torch.no_grad():
+# #     y_test_pred = model(x_test)
+# #   y_test_pred = y_test_pred.numpy()[0]
+
+# #   idx=0
+# #   plt.plot(np.arange(y_train.shape[0], y_train.shape[0]+y_test.shape[0]),
+# #           y_test[:,idx], color='black', label='test target')
+
+# #   plt.plot(np.arange(y_train.shape[0], y_train.shape[0]+y_test_pred.shape[0]),
+# #           y_test_pred[:,idx], color='green', label='test prediction')
+
+# #   plt.title('future stock prices')
+# #   plt.xlabel('time [days]')
+# #   plt.ylabel('normalized price')
+# #   plt.legend(loc='best')
+
+
+# #   index_values = df[len(df) - len(y_test):].index
+# #   col_values = ['Open', 'Low', 'High', 'Close']
+# #   df_results = pd.DataFrame(data=y_test_pred, index=index_values, columns=col_values)
+
+
+
+# #   # Create a trace for the candlestick chart
+# #   candlestick_trace = go.Candlestick(
+# #       x=df_results.index,
+# #       open=df_results['Open'],
+# #       high=df_results['High'],
+# #       low=df_results['Low'],
+# #       close=df_results['Close'],
+# #       name='Candlestick'
+# #   )
+
+# #   # Create the layout
+# #   layout = go.Layout(
+# #       title='GOOG Candlestick Chart',
+# #       xaxis=dict(title='Date'),
+# #       yaxis=dict(title='Price', rangemode='normal')
+# #   )
+
+# #   # Create the figure and add the candlestick trace and layout
+# #   fig = go.Figure(data=[candlestick_trace], layout=layout)
+
+# #   # Update the layout of the figure
+# #   fig.update_layout(xaxis_rangeslider_visible=False)
+
+# #   # Show the figure
+# #   fig.show()
--- a/m3_train.py
+++ b/m3_train.py
@ -0,0 +1,71 @@
+# import requirement libraries and tools
+import torch
+import torch.optim as optim
+import torch.nn as nn
+import torch.functional as F
+from m1_model import NeuralNetwork
+from torchsummary import summary
+from d3_prepareddata import get_datasets
+
+
+
+def train(dataloader, model,optimizer,mse):
+    epoch_loss = 0
+    model.train()  
+    
+    for batch in dataloader:
+        optimizer.zero_grad()          
+        x,y= batch
+        x = x.to("cuda")
+        y = y.to("cuda")
+        
+        pred = model(x)
+        
+        loss = mse(pred[0],y)        
+        loss.backward()               
+        optimizer.step()      
+        epoch_loss += loss.item()  
+        
+    return epoch_loss
+
+
+def evaluate(dataloader,model,mse):
+    epoch_loss = 0
+    model.eval()  
+    
+    with torch.no_grad():
+      for batch in dataloader:   
+          x,y= batch
+          x = x.to("cuda")
+          y = y.to("cuda")
+          pred = model(x)
+          loss = mse(pred[0],y)              
+          epoch_loss += loss.item()  
+        
+    return epoch_loss / len(dataloader)
+
+
+
+def main():
+    m = NeuralNetwork(4).to("cuda")
+    # summary(m, (4, ))
+    optimizer = optim.Adam(m.parameters())
+    mse = nn.MSELoss()
+    
+    n_epochs = 50
+    best_valid_loss = float('inf')
+    train_dataloader, valid_dataloader, _, _= get_datasets()
+    for epoch in range(1, n_epochs + 1):
+        train_loss = train(train_dataloader,m,mse=mse,optimizer=optimizer)
+        valid_loss = evaluate(valid_dataloader,m,mse=mse)
+        print("train_loss>",train_loss)
+        print("valid_loss>",valid_loss)
+        #save the best model
+        if valid_loss < best_valid_loss:
+            best_valid_loss = valid_loss
+            torch.save(m, 'saved_weights.pt')
+        # print("Epoch ",epoch+1)
+        print(f'\tTrain Loss: {train_loss:.5f} | ' + f'\tVal Loss: {valid_loss:.5f}\n')
+        
+        
+main()
--- a/m5_predict.py
+++ b/m5_predict.py
@ -0,0 +1,46 @@
+
+
+# import requirement libraries and tools
+import torch
+import torch.optim as optim
+import torch.nn as nn
+import torch.functional as F
+from m1_model import NeuralNetwork
+from torchsummary import summary
+from d3_prepareddata import get_datasets
+import pandas as pd
+
+def predict():
+    
+    model=torch.load('saved_weights.pt').to("cuda")
+    _, _, sequences,scaler = get_datasets()
+    # Get the last sequence of historical data as features for predicting the next 10 days
+    last_sequence = sequences[-1:, 1:, :]
+    print(last_sequence)
+    last_sequence = torch.from_numpy(last_sequence).float()
+    
+
+    # Generate predictions for the next 10 days
+    PRED_DAYS = 10
+    with torch.no_grad():
+        for i in range(PRED_DAYS):
+            last_sequence = last_sequence.to("cuda")
+            pred_i = model(last_sequence)
+            last_sequence = torch.cat((last_sequence, pred_i), dim=1)
+            last_sequence = last_sequence[:, 1:, :]
+
+
+    last_sequence = last_sequence.to("cpu")
+    pred_days = last_sequence.reshape(PRED_DAYS, 4).numpy()
+
+    # inverse transform the predicted values
+    pred_days = scaler.inverse_transform(pred_days)
+
+    df_pred = pd.DataFrame(
+        data=pred_days,
+        columns=['open', 'high', 'low', 'close']
+    )
+
+    print(df_pred)
+    
+predict()
--- a/saved_weights.pt
+++ b/saved_weights.pt