Add files via upload

This commit is contained in:
杜鹏飞 2024-08-06 11:08:31 +08:00 committed by GitHub
commit 652f2378e8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 1493 additions and 0 deletions

61
0_csvpath.py Normal file
View File

@ -0,0 +1,61 @@
import os
# base_dir = os.path.join(os.path.dirname(__file__), "../datasets")
def csv_paths(dataset_path=""):
qfq_weekly = os.path.join(dataset_path, "weekly_qfq")
qfq_daily = os.path.join(dataset_path, "daily_qfq")
hfq_weekly = os.path.join(dataset_path, "weekly_hfq")
hfq_daily = os.path.join(dataset_path, "daily_hfq")
features_weekly = os.path.join(dataset_path, "weekly_features")
features_daily = os.path.join(dataset_path, "daily_features")
templates_weekly = os.path.join(dataset_path, "weekly_templates")
templates_daily = os.path.join(dataset_path, "daily_templates")
results_weekly = os.path.join(dataset_path, "weekly_results")
results_daily = os.path.join(dataset_path, "daily_results")
if not os.path.exists(qfq_daily):
os.makedirs(qfq_daily)
if not os.path.exists(qfq_weekly):
os.makedirs(qfq_weekly)
if not os.path.exists(hfq_weekly):
os.makedirs(hfq_weekly)
if not os.path.exists(hfq_daily):
os.makedirs(hfq_daily)
if not os.path.exists(features_weekly):
os.makedirs(features_weekly)
if not os.path.exists(features_daily):
os.makedirs(features_daily)
if not os.path.exists(templates_weekly):
os.makedirs(templates_weekly)
if not os.path.exists(templates_daily):
os.makedirs(templates_daily)
if not os.path.exists(results_weekly):
os.makedirs(results_weekly)
if not os.path.exists(results_daily):
os.makedirs(results_daily)
week = {
"qfq": qfq_weekly,
"hfq": hfq_weekly,
"features": features_weekly,
"templates": templates_weekly,
"results": results_weekly
}
daily = {
"qfq": qfq_daily,
"hfq": hfq_daily,
"features": features_daily,
"templates": templates_daily,
"results": results_daily
}
all_codes_path = os.path.join(dataset_path, "0all.csv")
return all_codes_path, week, daily

87
0utils.py Normal file
View File

@ -0,0 +1,87 @@
import pandas as pd
from bson.json_util import dumps
from io import StringIO
import json
from datetime import datetime
import time
def download_to_column(df):
##################################################################
# df = stock_hfq_df
# aa = df.loc[:, "收盘"]
# df["前收盘"] = aa.shift()
# df.at[0, "前收盘"] = df.at[0, "收盘"]
#
# tt = df.loc[:, "换手率"]
# df["前换手率"] = tt.shift()
# df.at[0, "前换手率"] = df.at[0, "换手率"]
# df["换手率"] = df["换手率"].astype(float)
# df["前换手率"] = df["换手率"].pct_change()
# df["收盘"] = df["收盘"].astype(float)
# df["前收盘"] = df["收盘"].pct_change()
##################################################################
stock = pd.DataFrame()
# stock["d"] = time.strftime("%Y-%m-%d", time.localtime(df.loc[:, "日期"]))
stock["dateT"] = df.loc[:, "日期"].astype(str)
stock["open"] = df.loc[:, "开盘"].astype(float)
stock["close"] = df.loc[:, "收盘"].astype(float)
# stock["pre_close"] = df.loc[:, "前收盘"]
stock["high"] = df.loc[:, "最高"].astype(float)
stock["low"] = df.loc[:, "最低"].astype(float)
stock["turnover"] = df.loc[:, "换手率"].astype(float)
# stock['pre_turnover'] = df.loc[:, "前换手率"]
stock["zf"] = df.loc[:, "振幅"].astype(float)
stock["zdf"] = df.loc[:, "涨跌幅"].astype(float)
# # kdj9(stock)
# # kdj45(stock)
# # ma(stock)
# data = stock.replace(np.nan, 0)
# data = data.replace(np.inf, 0)
# data = data.fillna(0)
return stock
def jsonl_2_dp(jsonl):
json_data = dumps(jsonl, indent=2)
pd_stocks = pd.read_json(StringIO(json_data))
return pd_stocks
def dp_2_jsonl(dataset):
docs = dataset.to_json(orient="records")
docs = json.loads(docs)
# collection.insert_many(docs)
return docs
def timestamp2time(timestamp=1707769336):
dateArray = datetime.datetime.fromtimestamp(timestamp)
# otherStyleTime = dateArray.strftime("%Y-%m-%d %H:%M:%S")
otherStyleTime = dateArray.strftime("%Y-%m-%d")
print(otherStyleTime)
return otherStyleTime
def time2timestamp(t="2024-02-13 04:22:16"):
# 字符类型的时间
# 转为时间数组
timeArray = time.strptime(t, "%Y-%m-%d %H:%M:%S")
print(timeArray)
# timeArray可以调用tm_year等
# 转为时间戳
timeStamp = int(time.mktime(timeArray))
print(timeStamp)
return timeStamp
######################################################
######################################################
######################################################
def data_clean(df):
return
######################################################
######################################################

0
__init__.py Normal file
View File

120
m0_download.py Normal file
View File

@ -0,0 +1,120 @@
import baostock as bs
import pandas as pd
#### 登陆系统 ####
# lg = bs.login()
# 显示登陆返回信息
# print("login respond error_code:" + lg.error_code)
# print("login respond error_msg:" + lg.error_msg)
def get_all_codes(save_path="./datasets"):
# s = bs.query_all_stock()
# s_df = s.get_data()
# s_df.to_csv("./datasets/all_codes.csv", encoding="utf-8", index=False)
#### 登陆系统 ####
lg = bs.login()
# 显示登陆返回信息
print("login respond error_code:" + lg.error_code)
print("login respond error_msg:" + lg.error_msg)
#### 获取证券信息 ####
rs = bs.query_all_stock(day="2024-06-20")
print("query_all_stock respond error_code:" + rs.error_code)
print("query_all_stock respond error_msg:" + rs.error_msg)
#### 打印结果集 ####
data_list = []
while (rs.error_code == "0") & rs.next():
# 获取一条记录,将记录合并在一起
data_list.append(rs.get_row_data())
result = pd.DataFrame(data_list, columns=rs.fields)
#### 结果集输出到csv文件 ####
result.to_csv(save_path+"/all_codes.csv", encoding="utf-8", index=False)
print(result)
#### 登出系统 ####
bs.logout()
def download_code_hist(
save_path="./datasets/origins",
code="sh.600000",
start_date="1999-09-01",
end_date="2024-06-30",
freq="d",
adjustflag="1"
):
#### 获取沪深A股历史K线数据 ####
# 详细指标参数,参见“历史行情指标参数”章节;“分钟线”参数与“日线”参数不同。“分钟线”不包含指数。
# 分钟线指标date,time,code,open,high,low,close,volume,amount,adjustflag
# 周月线指标date,code,open,high,low,close,volume,amount,adjustflag,turn,pctChg
fields = "date,time,code,open,high,low,close,volume,adjustflag",
if freq == "d":
fields = "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,isST"
rs = bs.query_history_k_data_plus(
code,
# "date,time,code,open,high,low,close,volume,adjustflag",
# "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,isST",
fields=fields,
start_date=start_date,
end_date=end_date,
frequency=freq,
adjustflag=adjustflag, # hfq
)
#### 打印结果集 ####
data_list = []
while (rs.error_code == "0") & rs.next():
# 获取一条记录,将记录合并在一起
data_list.append(rs.get_row_data())
result = pd.DataFrame(data_list, columns=rs.fields)
# print(result)
#### 结果集输出到csv文件 ####
filename = save_path + "/" + code + ".csv"
result.to_csv(filename, index=True)
# print(result)
def download_codes(save_path="./datasets/origins", filters=[],freq="d",adjustflag="1"):
codes = pd.read_csv("./datasets/all_codes.csv")
codes = codes["code"].tolist()
lg = bs.login()
# 显示登陆返回信息
print("login respond error_code:" + lg.error_code)
print("login respond error_msg:" + lg.error_msg)
for code in codes:
ignore = True
for f in filters:
if code.startswith(f):
ignore = False
if ignore:
continue
print(code)
# if code < "sz.300138":
# continue
download_code_hist(save_path=save_path ,code=code,freq=freq,adjustflag=adjustflag)
### 登出系统 ####
bs.logout()
# get_all_codes(save_path="./datasets")
download_codes(
save_path="./datasets/cyday",
filters=["sh.68","sz.3"],
# filters=["sz.301"]
freq="d",
adjustflag="1" # hfq
)

4
m10_predict.py Normal file
View File

@ -0,0 +1,4 @@
import torch
model = torch.load("./model.pt")

44
m1_data_clean.py Normal file
View File

@ -0,0 +1,44 @@
import pandas as pd
import torch
import os
import numpy as np
def clean_volume_0(df):
# null replace to 0
df = df.replace(np.nan, 0)
df = df.replace(np.inf, 0)
df = df.fillna(0)
df["volume"] = df["volume"].astype(int)
index = df.loc[df["volume"] == 0].index
df = df.drop(df.index[index])
index = df.loc[df["close"] == 0].index
df = df.drop(df.index[index])
df.reset_index(drop=True)
return df
def process_features(in_dir, out_dir):
# codes = pd.read_csv("./datasets/all_codes.csv")
# codes = codes["code"].tolist()
# codes = ["sz.300001"]
file_dir = in_dir # "./datasets/origins"
a_s = [a for a in sorted(os.listdir(file_dir), key=lambda x: str(x[5:]))]
for a in a_s:
file = os.path.join(file_dir, a)
df = pd.read_csv(file)
df = clean_volume_0(df)
df.to_csv(os.path.join(out_dir, a))
print(a)
process_features(in_dir="./datasets/cyday", out_dir="./datasets/cleaned/")

286
m2_features.py Normal file
View File

@ -0,0 +1,286 @@
import akshare as ak
from io import StringIO
from bson.json_util import dumps
import json
import os
import numpy as np
import pandas as pd
X_Length = 30
pd.set_option("display.width", 1000)
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", 1000)
def calc_pre_high_list(df):
# 往前追溯3个高点如果只有一个 则另外两个和第一个相等。
# df["high"] = df["high"].astype(float)
ph = []
pl = []
for i in range(len(df)):
h = df.iloc[i]["high"]
l = df.iloc[i]["low"]
if len(ph) < 1:
ph.append(h)
pl.append(l)
df.iloc[i]["ph"] = h
df.iloc[i]["pl"] = l
continue
ma20 = df.iloc[i]["ma20"]
ma60 = df.iloc[i]["ma60"]
if ma20 > ma60:
r = all(v > h for v in ph)
if not r:
pre_v = -1
for v in reversed(ph):
if v > h:
pre_v = v
if pre_v > 0:
ph.append(pre_v)
else:
ph.append(h)
else:
r = all(v < h for v in pl)
if not r:
pre_v = -1
for v in reversed(pl):
if v < h:
pre_v = v
if pre_v > 0:
ph.append(pre_v)
else:
ph.append(l)
df.iloc[i]["ph"] = ph[:-1]
df.iloc[i]["pl"] = pl[:-1]
return df
def calc_pre_high_list2(df):
# 往前追溯3个高点如果只有一个 则另外两个和第一个相等。
# df["high"] = df["high"].astype(float)
ph = []
pl = []
h_i = 0
l_i = 0
for i in range(len(df)):
h = df.iloc[i]["high"]
l = df.iloc[i]["low"]
if len(ph) < 1:
ph.append(h)
pl.append(l)
df.iloc[i]["ph"] = h
df.iloc[i]["pl"] = l
continue
c = df.iloc[i]["close"]
ma20 = df.iloc[i]["ma20"]
ma60 = df.iloc[i]["ma60"]
if c > ma20:
r = all(v > h for v in ph)
if not r:
pre_v = -1
for v in reversed(ph):
if v > h:
pre_v = v
if pre_v > 0:
ph.append(pre_v)
else:
ph.append(h)
else:
r = all(v < h for v in pl)
if not r:
pre_v = -1
for v in reversed(pl):
if v < h:
pre_v = v
if pre_v > 0:
ph.append(pre_v)
else:
ph.append(l)
df.iloc[i]["ph"] = ph[:-1]
df.iloc[i]["pl"] = pl[:-1]
return df
def kdj_window(df, window=160, m1=60, m2=60, low="low", high="high", close="close"):
low_list = df[low].rolling(window).min()
low_list.fillna(value=df[low].expanding().min(), inplace=True)
high_list = df[high].rolling(window).max()
high_list.fillna(value=df[high].expanding().max(), inplace=True)
rsv = (df[close] - low_list) / (high_list - low_list) * 100
df['k' + str(window)] = rsv.ewm(alpha=1 / m1, adjust=False).mean()
df['d' + str(window)] = df['k9'].ewm(alpha=1 / m2, adjust=False).mean()
df['j' + str(window)] = 3 * df['k9'] - 2 * df['d9']
def kdj4(df, low="low", high="high", close="close"):
low_list = df[low].rolling(window=4).min()
low_list.fillna(value=df[low].expanding().min(), inplace=True)
high_list = df[high].rolling(window=4).max()
high_list.fillna(value=df[high].expanding().max(), inplace=True)
rsv = (df[close] - low_list) / (high_list - low_list) * 100
df['k4'] = rsv.ewm(com=3).mean()
df['d4'] = df['k4'].ewm(com=3).mean()
df['j4'] = 3 * df['k4'] - 2 * df['d4']
def kdj160(df, low="low", high="high", close="close"):
low_list = df[low].rolling(window=160).min()
low_list.fillna(value=df[low].expanding().min(), inplace=True)
high_list = df[high].rolling(window=160).max()
high_list.fillna(value=df[high].expanding().max(), inplace=True)
rsv = (df[close] - low_list) / (high_list - low_list) * 100
df['k160'] = rsv.ewm(com=60).mean()
df['d160'] = df['k160'].ewm(com=60).mean()
df['j160'] = 3 * df['k160'] - 2 * df['d160']
def kdj9(df, low="low", high="high", close="close"):
low_list = df[low].rolling(window=9).min()
low_list.fillna(value=df[low].expanding().min(), inplace=True)
high_list = df[high].rolling(window=9).max()
high_list.fillna(value=df[high].expanding().max(), inplace=True)
rsv = (df[close] - low_list) / (high_list - low_list) * 100
df['k9'] = rsv.ewm(com=3).mean()
df['d9'] = df['k9'].ewm(com=3).mean()
df['j9'] = 3 * df['k9'] - 2 * df['d9']
def kdj45(df, low="low", high="high", close="close"):
low_list = df[low].rolling(window=45).min()
low_list.fillna(value=df[low].expanding().min(), inplace=True)
high_list = df[high].rolling(window=45).max()
high_list.fillna(value=df[high].expanding().max(), inplace=True)
rsv = (df[close] - low_list) / (high_list - low_list) * 100
df['k45'] = rsv.ewm(com=15).mean()
df['d45'] = df['k45'].ewm(com=15).mean()
df['j45'] = 3 * df['k45'] - 2 * df['d45']
def ma(df, close="close"):
df['ma5'] = df[close].rolling(window=5).mean().dropna()
df['ma20'] = df[close].rolling(window=20).mean().dropna()
df["ma60"] = df[close].rolling(window=60).mean().dropna()
# df['ma1000'] = df[close].rolling(window=1000).mean().dropna()
# def zdf(data):
# if data.shape[0] % 16 != 0:
# print("error================================> not 16 ")
# zdf = pd.DataFrame()
# zdf["zdf"] = -1
# for i in range(int(data.shape[0] / 16)-1):
# s = int(i * 16 + 16)
# e = int(s + 16)
# print(e)
# # print(e)
# p1 = data[s-1:s]["close"].values[0]
# p2 = data[e-1:e]["close"].values[0]
# print(p1)
# print(p2)
# zdf[e-1:e]["zdf"] = p2 / p1
# # break
# print(zdf["zdf"])
# data["zdf"] = zdf["zdf"]
# return data
def get_features(data):
lines = data.shape[0]
if lines < 120:
return None
ma(data)
# data = zdf(data)
data = data.loc[60:, :]
# data = data.copy()
# data.reset_index(drop=True)
# data = calc_pre_high_list(data)
dataset = pd.DataFrame()
close = data.iloc[0:1]["close"].values[0]
volume = data.iloc[0:1]["volume"].values[0]
# print(volume)
dataset["date"] = data["date"]
# dataset["high"] = data["high"].astype(float) / close
# dataset["low"] = data["low"].astype(float) / close
# dataset["open"] = data["open"].astype(float) / close
# dataset["close"] = data["close"].astype(float) / close
# dataset["ma5"] = data["ma5"].astype(float) / close
# dataset["ma20"] = data["ma20"].astype(float) / close
# dataset["ma60"] = data["ma60"].astype(float) / close
# dataset["volume"] = data["volume"].astype(float) / volume
dataset["zdf"] = data["close"].astype(float) / data["preclose"]
dataset["high"] = data["high"].astype(float)
dataset["low"] = data["low"].astype(float)
dataset["open"] = data["open"].astype(float)
dataset["close"] = data["close"].astype(float)
dataset["ma5"] = data["ma5"].astype(float)
dataset["ma20"] = data["ma20"].astype(float)
dataset["ma60"] = data["ma60"].astype(float)
dataset["volume"] = data["volume"].astype(float)
if all(dataset['zdf'] >= 0.7) and all(dataset['zdf'] < 1.3):
return dataset
return None
# null replace to 0
# dataset = dataset.replace(np.nan, 0)
# dataset = dataset.replace(np.inf, 0)
# dataset = dataset.fillna(0)
# return dataset
def process_features(in_dir,out_dir):
# codes = pd.read_csv("./datasets/all_codes.csv")
# codes = codes["code"].tolist()
# codes = ["sz.300001"]
file_dir = in_dir # "./datasets/origins"
a_s = [a for a in sorted(os.listdir(file_dir), key=lambda x:str(x[5:]))]
for a in a_s:
# if a.startswith("sz.30"):
# continue
file = os.path.join(file_dir,a)
df = pd.read_csv(file)
df = get_features(df)
if df is not None:
# file = "./datasets/features/"+ a
file = out_dir + a
df.to_csv(file, index=False)
print(a)
# break
process_features(in_dir="./datasets/cleaned",out_dir="./datasets/features/")
# process_features(in_dir="./02stocks/vit15minutes/datasets/origins",
# out_dir="./02stocks/vit15minutes/datasets/features/")

192
m3_loaddatasets.py Normal file
View File

@ -0,0 +1,192 @@
import pandas as pd
import torch
import os
import math
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
max_y = 0
min_y = 99999
const_y_days = 5
def y_feature_extract(y1):
y = y1["zdf"].values[0]
y = y - 1 + 0.3
y = int(y * 1000) # [0, 4500] num_classes = 4500
return y
def y_feature_extract_1_0(y1):
y = y1["zdf"].max()
if y > 1.1:
return 1
else:
return 0
def feature_extract_x_7(x1):
close = x1[0:1]["close"].values[0]
volume = x1[0:1]["volume"].values[0]
# print(x)
# print(x.shape)
factor = 1
x = pd.DataFrame()
x["high"] = x1["high"] / close * factor
x["low"] = x1["low"] / close * factor
x["open"] = x1["open"] / close * factor
x["close"] = x1["close"] / close * factor
x["ma5"] = x1["ma5"] / close * factor
x["ma20"] = x1["ma20"] / close * factor
x["ma60"] = x1["ma60"] / close * factor
# x["volume"] = x1["volume"] / volume * factor
return x
def feature_extract_x_2(x1):
close = x1[0:1]["close"].values[0]
volume = x1[0:1]["volume"].values[0]
# print(x)
# print(x.shape)
factor = 10
x = pd.DataFrame()
x["high"] = (x1["high"] - x1["close"]) / close * factor
x["low"] = (x1["low"] - x1["close"]) / close * factor
x["open"] = (x1["open"] - x1["close"]) / close * factor
# x["close"] = (x1["close"] - x["close"]) / close * factor
x["zdf"] = x1["zdf"] - 1
x["ma5"] = (x1["ma5"] - x1["close"]) / close * factor
x["ma20"] = (x1["ma20"] - x1["close"]) / close * factor
x["ma60"] = (x1["ma60"] - x1["close"]) / close * factor
x["volume"] = x1["volume"] / volume * factor
return x
def feature_extract_x(x1):
close = x1[0:1]["close"].values[0]
volume = x1[0:1]["volume"].values[0]
# print(x)
# print(x.shape)
factor = 10
x = pd.DataFrame()
x["high"] = x1["high"] / close * factor
x["low"] = x1["low"] / close * factor
x["open"] = x1["open"] / close * factor
x["close"] = x1["close"] / close * factor
x["ma5"] = x1["ma5"] / close * factor
x["ma20"] = x1["ma20"] / close * factor
x["ma60"] = x1["ma60"] / close * factor
x["volume"] = x1["volume"] / volume * factor
return x
def features(x1, y1):
y = y_feature_extract_1_0(y1)
# if y >= 600 or y <= 0:
# print("error--------------------y > 600")
# print(y)
# print(y1)
x = feature_extract_x_2(x1)
return x, [y]
class DatasetsDay(Dataset):
def __init__(self, df, day_length=40):
self.df = df
self.day_length = day_length
def __len__(self):
d = self.df.shape[0]
d = d - self.day_length - const_y_days
return 0 if d < 0 else d
def __getitem__(self, i):
a = i
e = a + self.day_length
if e >= self.df.shape[0]:
print(
"error================================================datasets_loader?"
)
return torch.ones(0, 0), torch.ones(0, 0)
x1 = self.df.iloc[a:e, :]
y1 = self.df.iloc[e : e + const_y_days, :]
# print(x)
# print(y)
############################################
x, y = features(x1, y1)
############################################
#
x = torch.Tensor(x.values)
# x = torch.unsqueeze(x, dim=0)
# print(x.shape)
# print("-----------------------x.shape")
y = torch.LongTensor(y)
y = torch.squeeze(y)
return x, y
def get_last_item(self):
s = self.df.shape[0] - self.day_length
e = self.df.shape[0]
x1 = self.df.iloc[s:e, :]
x = feature_extract_x(x1)
x = torch.Tensor(x.values)
return x
def test(features_dir="./datasets/features1/"):
seq_length = 50
files = [a for a in sorted(os.listdir(features_dir), key=lambda x: str(x[5:]))]
for file in files:
print(file)
d = pd.read_csv(os.path.join(features_dir, file))
a = DatasetsDay(d, day_length=seq_length)
# print(len(a))
global max_y
global min_y
max_y = 0
min_y = 99999
loader = DataLoader(a, batch_size=1,shuffle=True)
for step, (x, y) in enumerate(loader):
# print(step)
print(x.shape)
print(y.shape)
# print("-----------------------x.shape")
# print(x)
# print(y)
# print(x.shape)
# print(y.shape)
break
pass
print(max_y)
print(min_y)
# files = [print(a) for a in sorted(os.listdir("./datasets/features"), key=lambda x: x[5:])]
# test(features_dir="./02stocks/vit15minutes/datasets/features")
# test(features_dir="./datasets/features")

0
m4_embedding.py Normal file
View File

15
m5_position.py Normal file
View File

@ -0,0 +1,15 @@
import json
import os
import time
import datetime
import pandas as pd
import akshare as ak
def day_zt_xiao_k(df):
shape = df.shape

133
m6_vit.py Normal file
View File

@ -0,0 +1,133 @@
from torch import nn
from einops.layers.torch import Rearrange
from torch import Tensor
import torch
from einops import repeat
class Attention(nn.Module):
def __init__(self, dim, n_heads, dropout):
super().__init__()
self.n_heads = n_heads
self.att = torch.nn.MultiheadAttention(
embed_dim=dim, num_heads=n_heads, dropout=dropout
)
self.q = torch.nn.Linear(dim, dim)
self.k = torch.nn.Linear(dim, dim)
self.v = torch.nn.Linear(dim, dim)
def forward(self, x):
q = self.q(x)
k = self.k(x)
v = self.v(x)
attn_output, attn_output_weights = self.att(x, x, x)
return attn_output
class PreNorm(nn.Module):
def __init__(self, dim, fn):
super().__init__()
self.norm = nn.LayerNorm(dim)
self.fn = fn
def forward(self, x, **kwargs):
return self.fn(self.norm(x), **kwargs)
class FeedForward(nn.Sequential):
def __init__(self, dim, hidden_dim, dropout=0.0):
super().__init__(
nn.Linear(dim, hidden_dim),
nn.GELU(),
nn.Dropout(dropout),
nn.Linear(hidden_dim, dim),
nn.Dropout(dropout),
)
ff = FeedForward(dim=128, hidden_dim=256)
ff(torch.ones((1, 5, 128))).shape
class ResidualAdd(nn.Module):
def __init__(self, fn):
super().__init__()
self.fn = fn
def forward(self, x, **kwargs):
res = x
x = self.fn(x, **kwargs)
x += res
return x
class ViT3D(nn.Module):
def __init__(
self,
features_len=8,
seq_len=20,
# img_size=144,
# patch_size=4,
emb_dim=512,
n_layers=24,
num_classes=600,
dropout=0.1,
heads=8,
):
super(ViT3D, self).__init__()
# self.feature_embedding = nn.Sequential(
# # break-down the image in s1 x s2 patches and flat them
# # Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=patch_size, p2=patch_size),
# # nn.Linear(patch_size * patch_size * in_channels, emb_size)
# nn.Linear(features_len, emb_dim)
# )
self.feature_embedding = nn.Linear(features_len, emb_dim)
self.pos_embedding = nn.Parameter(torch.randn(1, seq_len + 1, emb_dim))
self.cls_token = nn.Parameter(torch.rand(1, 1, emb_dim))
# Transformer Encoder
self.n_layers = n_layers
self.layers = nn.ModuleList([])
for _ in range(n_layers):
transformer_block = nn.Sequential(
ResidualAdd(
PreNorm(emb_dim, Attention(emb_dim, n_heads=heads, dropout=dropout))
),
ResidualAdd(
PreNorm(emb_dim, FeedForward(emb_dim, emb_dim, dropout=dropout))
),
)
self.layers.append(transformer_block)
# Classification head
self.head = nn.Sequential(
nn.LayerNorm(emb_dim),
nn.Linear(emb_dim, num_classes)
)
self.softmax = nn.Softmax(dim=1)
def forward(self, x):
# Get patch embedding vectors
x = self.feature_embedding(x)
b, n, _ = x.shape
# Add cls token to inputs
cls_tokens = repeat(self.cls_token, "1 1 d -> b 1 d", b=b)
x = torch.cat([cls_tokens, x], dim=1)
x += self.pos_embedding[:, : (n + 1)]
# Transformer layers
for i in range(self.n_layers):
x = self.layers[i](x)
# Output based on classification token
x = self.head(x[:, 0, :])
x = self.softmax(x)
return x
# model = ViT3D()
# print(model)
# print(model(torch.ones((1,50,8))))

153
m6_vit_1d.py Normal file
View File

@ -0,0 +1,153 @@
import torch
from torch import nn
from einops import rearrange, repeat, pack, unpack
from einops.layers.torch import Rearrange
# classes
class FeedForward(nn.Module):
def __init__(self, dim, hidden_dim, dropout=0.0):
super().__init__()
self.net = nn.Sequential(
nn.LayerNorm(dim),
nn.Linear(dim, hidden_dim),
nn.GELU(),
nn.Dropout(dropout),
nn.Linear(hidden_dim, dim),
nn.Dropout(dropout),
)
def forward(self, x):
return self.net(x)
class Attention(nn.Module):
def __init__(self, dim, heads=8, dim_head=64, dropout=0.0):
super().__init__()
inner_dim = dim_head * heads
project_out = not (heads == 1 and dim_head == dim)
self.heads = heads
self.scale = dim_head**-0.5
self.norm = nn.LayerNorm(dim)
self.attend = nn.Softmax(dim=-1)
self.dropout = nn.Dropout(dropout)
self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False)
self.to_out = (
nn.Sequential(nn.Linear(inner_dim, dim), nn.Dropout(dropout))
if project_out
else nn.Identity()
)
def forward(self, x):
x = self.norm(x)
qkv = self.to_qkv(x).chunk(3, dim=-1)
q, k, v = map(lambda t: rearrange(t, "b n (h d) -> b h n d", h=self.heads), qkv)
dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale
attn = self.attend(dots)
attn = self.dropout(attn)
out = torch.matmul(attn, v)
out = rearrange(out, "b h n d -> b n (h d)")
return self.to_out(out)
class Transformer(nn.Module):
def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout=0.0):
super().__init__()
self.layers = nn.ModuleList([])
for _ in range(depth):
self.layers.append(
nn.ModuleList(
[
Attention(dim, heads=heads, dim_head=dim_head, dropout=dropout),
FeedForward(dim, mlp_dim, dropout=dropout),
]
)
)
def forward(self, x):
for attn, ff in self.layers:
x = attn(x) + x
x = ff(x) + x
return x
class ViT1D(nn.Module):
def __init__(
self,
*,
seq_len,
patch_size,
num_classes,
dim,
depth,
heads,
mlp_dim,
channels=3,
dim_head=64,
dropout=0.0,
emb_dropout=0.0
):
super().__init__()
assert (seq_len % patch_size) == 0
num_patches = seq_len // patch_size
patch_dim = channels * patch_size
self.to_patch_embedding = nn.Sequential(
Rearrange("b c (n p) -> b n (p c)", p=patch_size),
nn.LayerNorm(patch_dim),
nn.Linear(patch_dim, dim),
nn.LayerNorm(dim),
)
self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, dim))
self.cls_token = nn.Parameter(torch.randn(dim))
self.dropout = nn.Dropout(emb_dropout)
self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout)
self.mlp_head = nn.Sequential(nn.LayerNorm(dim), nn.Linear(dim, num_classes))
def forward(self, series):
x = self.to_patch_embedding(series)
b, n, _ = x.shape
cls_tokens = repeat(self.cls_token, "d -> b d", b=b)
x, ps = pack([cls_tokens, x], "b * d")
x += self.pos_embedding[:, : (n + 1)]
x = self.dropout(x)
x = self.transformer(x)
cls_tokens, _ = unpack(x, ps, "b * d")
return self.mlp_head(cls_tokens)
if __name__ == "__main__":
v = ViT1D(
seq_len=256,
patch_size=16, # 这里也就是特征的数量。
num_classes=1000,
dim=1024,
depth=6,
heads=8,
mlp_dim=2048,
dropout=0.1,
emb_dropout=0.1,
)
time_series = torch.randn(4, 3, 256)
logits = v(time_series) # (4, 1000)

107
m7_train.py Normal file
View File

@ -0,0 +1,107 @@
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torch import nn
from einops.layers.torch import Rearrange
from torch import Tensor
import torch.optim as optim
import numpy as np
import torch
import os
import pandas as pd
from m6_vit import ViT3D
from m3_loaddatasets import DatasetsDay
import matplotlib.pyplot as plt
import numpy as np
torch.set_printoptions(threshold=float("inf"))
################################################################################
seq_length = 30
features_len=8
device = "cuda"
model = ViT3D(
seq_len=seq_length,
features_len=features_len,
num_classes=2
).to(device)
optimizer = optim.AdamW(model.parameters(), lr=1e-2)
criterion = nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
optimizer, factor=0.1, patience=3, verbose=True
) # .to(device)
################################################################################
features_files = "./datasets/features"
# features_files = "./02stocks/vitday/datasets/feature1"
files = [ os.path.join(features_files, a) for a in sorted(os.listdir(features_files),
key=lambda x: (x[4:])) ]
################################################################################
################################################################################
for epoch in range(1000):
model.train()
################################################################################
for file in files:
print(file)
epoch_losses = []
d = pd.read_csv(file)
a = DatasetsDay(d,day_length=seq_length)
loader = DataLoader(a, batch_size=128, shuffle=False)
for step, (x, y) in enumerate(loader):
x, y = x.to(device), y.to(device)
# print(x)
# print(f">>> Y: ", y)
optimizer.zero_grad()
outputs = model(x)
# print(f">>> outputs:", outputs.shape)
loss = criterion(outputs, y)
loss.backward()
optimizer.step()
epoch_losses.append(loss.item())
print(loss.item())
#################################################################
#################################################################
# print(outputs)
# o = torch.argmax(outputs,dim=1)
# if torch.equal(y,o):
# print(f"succeedsssssssssssssssssssssssss->",o)
# else:
# print(f"eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee:",y,o)
# p1 = outputs[0:1][:]
# p2 = outputs[1:1][:]
# p1 = p1.squeeze()
# p2 = p2.squeeze()
# print(p1)
# plt.plot(p1.detach().cpu().numpy())
# plt.plot(p2.detach().cpu().numpy())
# plt.show()
#################################################################
#################################################################
# break
mean_loss = np.mean(epoch_losses)
print(f">>> Epoch {epoch} train loss: ", mean_loss)
# epoch_losses = []
# # Something was strange when using this?
# # model.eval()
# for step, (inputs, labels) in enumerate(test_dataloader):
# inputs, labels = inputs.to(device), labels.to(device)
# outputs = model(inputs)
# loss = criterion(outputs, labels)
# epoch_losses.append(loss.item())
# print(f">>> Epoch {epoch} test loss: ", np.mean(epoch_losses))
# if epoch % 100 == 0:
# # models_save = os.path.join(
# # models_path, "./{}-{}-{}.pt".format(m_name, epoch, mean_loss)
# # )
scheduler.step(mean_loss)
torch.save(model,"./model.pt")
# break
# break

40
m8_evaluate.py Normal file
View File

@ -0,0 +1,40 @@
import torch
import os
from torch.utils.data import DataLoader
import pandas as pd
from m3_loaddatasets import DatasetsDay
import numpy as np
from m6_vit import Vit3D
np.set_printoptions(threshold=10000)
################################################################################
seq_length = 50
device = "cuda"
model = torch.load("./model1.pt").to(device)
################################################################################
features_files = "./datasets/feature1"
# features_files = "./02stocks/vitday/datasets/feature1"
files = [ os.path.join(features_files, a) for a in sorted(os.listdir(features_files),
key=lambda x: (x[4:])) ]
################################################################################
model.eval()
################################################################################
for epoch in range(1000):
################################################################################
for file in files:
print(file)
d = pd.read_csv(file)
a = DatasetsDay(d,day_length=seq_length)
loader = DataLoader(a, batch_size=1, shuffle=True)
for step, (x, y) in enumerate(loader):
x, y = x.to(device), y.to(device)
print(f">>> Y: ", y)
outputs = model(x)
print(f">>> outputs:", outputs)
print(torch.argmax(outputs,dim=1))
break
break
break

39
m9_infer.py Normal file
View File

@ -0,0 +1,39 @@
import torch
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torch import nn
from einops.layers.torch import Rearrange
from torch import Tensor
import torch.optim as optim
import numpy as np
import torch
import os
import pandas as pd
from m3_loaddatasets import DatasetsDay
from m6_vit import Vit3D
#############################################################################
seq_length = 50
features_len=8
device = "cuda"
m = torch.load("./model1.pt").to(device)
#############################################################################
features_files = "./datasets/features1"
files = [ os.path.join(features_files, a) for a in sorted(os.listdir(features_files),
key=lambda x: (x[4:])) ]
zdfs = []
for file in files:
print(file)
epoch_losses = []
d = pd.read_csv(file)
a = DatasetsDay(d,day_length=seq_length)
x = DatasetsDay.get_last_item()
x = x.to(device)
outputs = m(x)
cls = outputs.argmax()
zdf = cls / 1000 - 0.3 + 1
print(zdf)
zdfs.append(zdf)

33
t1_create_sample.py Normal file
View File

@ -0,0 +1,33 @@
import os
import pandas as pd
def create_1():
features_dir="./datasets/features"
files = [a for a in sorted(os.listdir(features_dir), key=lambda x: str(x[5:]))]
for file in files:
print(file)
d = pd.read_csv(os.path.join(features_dir, file))
a = pd.DataFrame()
a = d.iloc[0:51][:]
p = os.path.join(features_dir, "test"+file)
a.to_csv(p)
# create_1()
def create_2():
code = "sz.300001.csv"
file="./datasets/features/" + code
d = pd.read_csv(file)
a = pd.DataFrame()
a = d.iloc[0:52][:]
p = os.path.join("./datasets/feature1", code)
a.to_csv(p)
create_2()

0
z-predict.py Normal file
View File

179
zutils.py Normal file
View File

@ -0,0 +1,179 @@
import pandas as pd
from bson.json_util import dumps
from io import StringIO
import json
from datetime import datetime
import time
def download_to_column(df):
##################################################################
# df = stock_hfq_df
# aa = df.loc[:, "收盘"]
# df["前收盘"] = aa.shift()
# df.at[0, "前收盘"] = df.at[0, "收盘"]
#
# tt = df.loc[:, "换手率"]
# df["前换手率"] = tt.shift()
# df.at[0, "前换手率"] = df.at[0, "换手率"]
# df["换手率"] = df["换手率"].astype(float)
# df["前换手率"] = df["换手率"].pct_change()
# df["收盘"] = df["收盘"].astype(float)
# df["前收盘"] = df["收盘"].pct_change()
##################################################################
stock = pd.DataFrame()
# stock["d"] = time.strftime("%Y-%m-%d", time.localtime(df.loc[:, "日期"]))
stock["dateT"] = df.loc[:, "日期"].astype(str)
stock["open"] = df.loc[:, "开盘"].astype(float)
stock["close"] = df.loc[:, "收盘"].astype(float)
# stock["pre_close"] = df.loc[:, "前收盘"]
stock["high"] = df.loc[:, "最高"].astype(float)
stock["low"] = df.loc[:, "最低"].astype(float)
stock["turnover"] = df.loc[:, "换手率"].astype(float)
# stock['pre_turnover'] = df.loc[:, "前换手率"]
stock["zf"] = df.loc[:, "振幅"].astype(float)
stock["zdf"] = df.loc[:, "涨跌幅"].astype(float)
# # kdj9(stock)
# # kdj45(stock)
# # ma(stock)
# data = stock.replace(np.nan, 0)
# data = data.replace(np.inf, 0)
# data = data.fillna(0)
return stock
def jsonl_2_dp(jsonl):
json_data = dumps(jsonl, indent=2)
pd_stocks = pd.read_json(StringIO(json_data))
return pd_stocks
def dp_2_jsonl(dataset):
docs = dataset.to_json(orient="records")
docs = json.loads(docs)
# collection.insert_many(docs)
return docs
def timestamp2time(timestamp=1707769336):
dateArray = datetime.datetime.fromtimestamp(timestamp)
# otherStyleTime = dateArray.strftime("%Y-%m-%d %H:%M:%S")
otherStyleTime = dateArray.strftime("%Y-%m-%d")
print(otherStyleTime)
return otherStyleTime
def time2timestamp(t="2024-02-13 04:22:16"):
# 字符类型的时间
# 转为时间数组
timeArray = time.strptime(t, "%Y-%m-%d %H:%M:%S")
print(timeArray)
# timeArray可以调用tm_year等
# 转为时间戳
timeStamp = int(time.mktime(timeArray))
print(timeStamp)
return timeStamp
######################################################
######################################################
######################################################
def data_clean(df):
return
######################################################
######################################################
def trend_high_price(df):
# 往前追溯3个高点如果只有一个 则另外两个和第一个相等。
# df["high"] = df["high"].astype(float)
ph = []
pl = []
for i in range(len(df)):
h = df.iloc[i]["high"]
l = df.iloc[i]["low"]
if len(ph) < 1:
ph.append(h)
pl.append(l)
df.iloc[i]["ph"] = h
df.iloc[i]["pl"] = l
continue
ma20 = df.iloc[i]["ma20"]
ma60 = df.iloc[i]["ma60"]
if ma20 > ma60:
r = all(v > h for v in ph)
if not r:
pre_v = -1
for v in reversed(ph):
if v > h:
pre_v = v
if pre_v > 0:
ph.append(pre_v)
else:
ph.append(h)
else:
r = all(v < h for v in pl)
if not r:
pre_v = -1
for v in reversed(pl):
if v < h:
pre_v = v
if pre_v > 0:
ph.append(pre_v)
else:
ph.append(l)
df.iloc[i]["ph"] = ph[:-1]
df.iloc[i]["pl"] = pl[:-1]
return df
def trend_high_price2(df):
# 往前追溯3个高点如果只有一个 则另外两个和第一个相等。
# df["high"] = df["high"].astype(float)
ph = []
pl = []
h_i = 0
l_i = 0
for i in range(len(df)):
h = df.iloc[i]["high"]
l = df.iloc[i]["low"]
if len(ph) < 1:
ph.append(h)
pl.append(l)
df.iloc[i]["ph"] = h
df.iloc[i]["pl"] = l
continue
c = df.iloc[i]["close"]
ma20 = df.iloc[i]["ma20"]
ma60 = df.iloc[i]["ma60"]
if c > ma20:
r = all(v > h for v in ph)
if not r:
pre_v = -1
for v in reversed(ph):
if v > h:
pre_v = v
if pre_v > 0:
ph.append(pre_v)
else:
ph.append(h)
else:
r = all(v < h for v in pl)
if not r:
pre_v = -1
for v in reversed(pl):
if v < h:
pre_v = v
if pre_v > 0:
ph.append(pre_v)
else:
ph.append(l)
df.iloc[i]["ph"] = ph[:-1]
df.iloc[i]["pl"] = pl[:-1]
return df