mirror of
https://github.com/dupenf/stock-vit.git
synced 2024-11-25 16:35:10 +08:00
Add files via upload
This commit is contained in:
commit
652f2378e8
61
0_csvpath.py
Normal file
61
0_csvpath.py
Normal file
@ -0,0 +1,61 @@
|
||||
import os
|
||||
|
||||
# base_dir = os.path.join(os.path.dirname(__file__), "../datasets")
|
||||
|
||||
|
||||
def csv_paths(dataset_path=""):
|
||||
qfq_weekly = os.path.join(dataset_path, "weekly_qfq")
|
||||
qfq_daily = os.path.join(dataset_path, "daily_qfq")
|
||||
hfq_weekly = os.path.join(dataset_path, "weekly_hfq")
|
||||
hfq_daily = os.path.join(dataset_path, "daily_hfq")
|
||||
|
||||
features_weekly = os.path.join(dataset_path, "weekly_features")
|
||||
features_daily = os.path.join(dataset_path, "daily_features")
|
||||
|
||||
templates_weekly = os.path.join(dataset_path, "weekly_templates")
|
||||
templates_daily = os.path.join(dataset_path, "daily_templates")
|
||||
|
||||
results_weekly = os.path.join(dataset_path, "weekly_results")
|
||||
results_daily = os.path.join(dataset_path, "daily_results")
|
||||
|
||||
if not os.path.exists(qfq_daily):
|
||||
os.makedirs(qfq_daily)
|
||||
if not os.path.exists(qfq_weekly):
|
||||
os.makedirs(qfq_weekly)
|
||||
|
||||
if not os.path.exists(hfq_weekly):
|
||||
os.makedirs(hfq_weekly)
|
||||
if not os.path.exists(hfq_daily):
|
||||
os.makedirs(hfq_daily)
|
||||
|
||||
if not os.path.exists(features_weekly):
|
||||
os.makedirs(features_weekly)
|
||||
if not os.path.exists(features_daily):
|
||||
os.makedirs(features_daily)
|
||||
|
||||
if not os.path.exists(templates_weekly):
|
||||
os.makedirs(templates_weekly)
|
||||
if not os.path.exists(templates_daily):
|
||||
os.makedirs(templates_daily)
|
||||
|
||||
if not os.path.exists(results_weekly):
|
||||
os.makedirs(results_weekly)
|
||||
if not os.path.exists(results_daily):
|
||||
os.makedirs(results_daily)
|
||||
|
||||
week = {
|
||||
"qfq": qfq_weekly,
|
||||
"hfq": hfq_weekly,
|
||||
"features": features_weekly,
|
||||
"templates": templates_weekly,
|
||||
"results": results_weekly
|
||||
}
|
||||
daily = {
|
||||
"qfq": qfq_daily,
|
||||
"hfq": hfq_daily,
|
||||
"features": features_daily,
|
||||
"templates": templates_daily,
|
||||
"results": results_daily
|
||||
}
|
||||
all_codes_path = os.path.join(dataset_path, "0all.csv")
|
||||
return all_codes_path, week, daily
|
87
0utils.py
Normal file
87
0utils.py
Normal file
@ -0,0 +1,87 @@
|
||||
import pandas as pd
|
||||
from bson.json_util import dumps
|
||||
from io import StringIO
|
||||
import json
|
||||
from datetime import datetime
|
||||
import time
|
||||
|
||||
|
||||
def download_to_column(df):
|
||||
##################################################################
|
||||
# df = stock_hfq_df
|
||||
# aa = df.loc[:, "收盘"]
|
||||
# df["前收盘"] = aa.shift()
|
||||
# df.at[0, "前收盘"] = df.at[0, "收盘"]
|
||||
#
|
||||
# tt = df.loc[:, "换手率"]
|
||||
# df["前换手率"] = tt.shift()
|
||||
# df.at[0, "前换手率"] = df.at[0, "换手率"]
|
||||
|
||||
# df["换手率"] = df["换手率"].astype(float)
|
||||
# df["前换手率"] = df["换手率"].pct_change()
|
||||
# df["收盘"] = df["收盘"].astype(float)
|
||||
# df["前收盘"] = df["收盘"].pct_change()
|
||||
##################################################################
|
||||
stock = pd.DataFrame()
|
||||
# stock["d"] = time.strftime("%Y-%m-%d", time.localtime(df.loc[:, "日期"]))
|
||||
stock["dateT"] = df.loc[:, "日期"].astype(str)
|
||||
stock["open"] = df.loc[:, "开盘"].astype(float)
|
||||
stock["close"] = df.loc[:, "收盘"].astype(float)
|
||||
# stock["pre_close"] = df.loc[:, "前收盘"]
|
||||
stock["high"] = df.loc[:, "最高"].astype(float)
|
||||
stock["low"] = df.loc[:, "最低"].astype(float)
|
||||
stock["turnover"] = df.loc[:, "换手率"].astype(float)
|
||||
# stock['pre_turnover'] = df.loc[:, "前换手率"]
|
||||
stock["zf"] = df.loc[:, "振幅"].astype(float)
|
||||
stock["zdf"] = df.loc[:, "涨跌幅"].astype(float)
|
||||
# # kdj9(stock)
|
||||
# # kdj45(stock)
|
||||
# # ma(stock)
|
||||
# data = stock.replace(np.nan, 0)
|
||||
# data = data.replace(np.inf, 0)
|
||||
# data = data.fillna(0)
|
||||
|
||||
return stock
|
||||
|
||||
|
||||
def jsonl_2_dp(jsonl):
|
||||
json_data = dumps(jsonl, indent=2)
|
||||
pd_stocks = pd.read_json(StringIO(json_data))
|
||||
return pd_stocks
|
||||
|
||||
|
||||
def dp_2_jsonl(dataset):
|
||||
docs = dataset.to_json(orient="records")
|
||||
docs = json.loads(docs)
|
||||
# collection.insert_many(docs)
|
||||
return docs
|
||||
|
||||
|
||||
def timestamp2time(timestamp=1707769336):
|
||||
dateArray = datetime.datetime.fromtimestamp(timestamp)
|
||||
# otherStyleTime = dateArray.strftime("%Y-%m-%d %H:%M:%S")
|
||||
otherStyleTime = dateArray.strftime("%Y-%m-%d")
|
||||
print(otherStyleTime)
|
||||
return otherStyleTime
|
||||
|
||||
|
||||
def time2timestamp(t="2024-02-13 04:22:16"):
|
||||
# 字符类型的时间
|
||||
# 转为时间数组
|
||||
timeArray = time.strptime(t, "%Y-%m-%d %H:%M:%S")
|
||||
print(timeArray)
|
||||
# timeArray可以调用tm_year等
|
||||
# 转为时间戳
|
||||
timeStamp = int(time.mktime(timeArray))
|
||||
print(timeStamp)
|
||||
return timeStamp
|
||||
|
||||
|
||||
######################################################
|
||||
######################################################
|
||||
######################################################
|
||||
def data_clean(df):
|
||||
return
|
||||
|
||||
######################################################
|
||||
######################################################
|
0
__init__.py
Normal file
0
__init__.py
Normal file
120
m0_download.py
Normal file
120
m0_download.py
Normal file
@ -0,0 +1,120 @@
|
||||
import baostock as bs
|
||||
import pandas as pd
|
||||
|
||||
#### 登陆系统 ####
|
||||
# lg = bs.login()
|
||||
# 显示登陆返回信息
|
||||
# print("login respond error_code:" + lg.error_code)
|
||||
# print("login respond error_msg:" + lg.error_msg)
|
||||
|
||||
|
||||
|
||||
|
||||
def get_all_codes(save_path="./datasets"):
|
||||
# s = bs.query_all_stock()
|
||||
# s_df = s.get_data()
|
||||
# s_df.to_csv("./datasets/all_codes.csv", encoding="utf-8", index=False)
|
||||
#### 登陆系统 ####
|
||||
lg = bs.login()
|
||||
# 显示登陆返回信息
|
||||
print("login respond error_code:" + lg.error_code)
|
||||
print("login respond error_msg:" + lg.error_msg)
|
||||
|
||||
#### 获取证券信息 ####
|
||||
rs = bs.query_all_stock(day="2024-06-20")
|
||||
print("query_all_stock respond error_code:" + rs.error_code)
|
||||
print("query_all_stock respond error_msg:" + rs.error_msg)
|
||||
|
||||
#### 打印结果集 ####
|
||||
data_list = []
|
||||
while (rs.error_code == "0") & rs.next():
|
||||
# 获取一条记录,将记录合并在一起
|
||||
data_list.append(rs.get_row_data())
|
||||
result = pd.DataFrame(data_list, columns=rs.fields)
|
||||
|
||||
#### 结果集输出到csv文件 ####
|
||||
result.to_csv(save_path+"/all_codes.csv", encoding="utf-8", index=False)
|
||||
print(result)
|
||||
|
||||
#### 登出系统 ####
|
||||
bs.logout()
|
||||
|
||||
|
||||
def download_code_hist(
|
||||
save_path="./datasets/origins",
|
||||
code="sh.600000",
|
||||
start_date="1999-09-01",
|
||||
end_date="2024-06-30",
|
||||
freq="d",
|
||||
adjustflag="1"
|
||||
):
|
||||
|
||||
#### 获取沪深A股历史K线数据 ####
|
||||
# 详细指标参数,参见“历史行情指标参数”章节;“分钟线”参数与“日线”参数不同。“分钟线”不包含指数。
|
||||
# 分钟线指标:date,time,code,open,high,low,close,volume,amount,adjustflag
|
||||
# 周月线指标:date,code,open,high,low,close,volume,amount,adjustflag,turn,pctChg
|
||||
fields = "date,time,code,open,high,low,close,volume,adjustflag",
|
||||
if freq == "d":
|
||||
fields = "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,isST"
|
||||
rs = bs.query_history_k_data_plus(
|
||||
code,
|
||||
# "date,time,code,open,high,low,close,volume,adjustflag",
|
||||
# "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,isST",
|
||||
fields=fields,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
frequency=freq,
|
||||
adjustflag=adjustflag, # hfq
|
||||
)
|
||||
|
||||
#### 打印结果集 ####
|
||||
data_list = []
|
||||
while (rs.error_code == "0") & rs.next():
|
||||
# 获取一条记录,将记录合并在一起
|
||||
data_list.append(rs.get_row_data())
|
||||
result = pd.DataFrame(data_list, columns=rs.fields)
|
||||
|
||||
# print(result)
|
||||
|
||||
#### 结果集输出到csv文件 ####
|
||||
filename = save_path + "/" + code + ".csv"
|
||||
result.to_csv(filename, index=True)
|
||||
# print(result)
|
||||
|
||||
|
||||
def download_codes(save_path="./datasets/origins", filters=[],freq="d",adjustflag="1"):
|
||||
codes = pd.read_csv("./datasets/all_codes.csv")
|
||||
codes = codes["code"].tolist()
|
||||
|
||||
lg = bs.login()
|
||||
# 显示登陆返回信息
|
||||
print("login respond error_code:" + lg.error_code)
|
||||
print("login respond error_msg:" + lg.error_msg)
|
||||
|
||||
for code in codes:
|
||||
ignore = True
|
||||
for f in filters:
|
||||
if code.startswith(f):
|
||||
ignore = False
|
||||
if ignore:
|
||||
continue
|
||||
|
||||
print(code)
|
||||
# if code < "sz.300138":
|
||||
# continue
|
||||
|
||||
download_code_hist(save_path=save_path ,code=code,freq=freq,adjustflag=adjustflag)
|
||||
### 登出系统 ####
|
||||
bs.logout()
|
||||
|
||||
|
||||
# get_all_codes(save_path="./datasets")
|
||||
download_codes(
|
||||
save_path="./datasets/cyday",
|
||||
filters=["sh.68","sz.3"],
|
||||
# filters=["sz.301"]
|
||||
freq="d",
|
||||
adjustflag="1" # hfq
|
||||
)
|
||||
|
||||
|
4
m10_predict.py
Normal file
4
m10_predict.py
Normal file
@ -0,0 +1,4 @@
|
||||
import torch
|
||||
|
||||
model = torch.load("./model.pt")
|
||||
|
44
m1_data_clean.py
Normal file
44
m1_data_clean.py
Normal file
@ -0,0 +1,44 @@
|
||||
import pandas as pd
|
||||
import torch
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
|
||||
def clean_volume_0(df):
|
||||
|
||||
# null replace to 0
|
||||
df = df.replace(np.nan, 0)
|
||||
df = df.replace(np.inf, 0)
|
||||
df = df.fillna(0)
|
||||
|
||||
df["volume"] = df["volume"].astype(int)
|
||||
|
||||
index = df.loc[df["volume"] == 0].index
|
||||
df = df.drop(df.index[index])
|
||||
|
||||
index = df.loc[df["close"] == 0].index
|
||||
df = df.drop(df.index[index])
|
||||
df.reset_index(drop=True)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def process_features(in_dir, out_dir):
|
||||
# codes = pd.read_csv("./datasets/all_codes.csv")
|
||||
# codes = codes["code"].tolist()
|
||||
|
||||
# codes = ["sz.300001"]
|
||||
file_dir = in_dir # "./datasets/origins"
|
||||
|
||||
a_s = [a for a in sorted(os.listdir(file_dir), key=lambda x: str(x[5:]))]
|
||||
for a in a_s:
|
||||
file = os.path.join(file_dir, a)
|
||||
df = pd.read_csv(file)
|
||||
|
||||
df = clean_volume_0(df)
|
||||
|
||||
df.to_csv(os.path.join(out_dir, a))
|
||||
print(a)
|
||||
|
||||
|
||||
process_features(in_dir="./datasets/cyday", out_dir="./datasets/cleaned/")
|
286
m2_features.py
Normal file
286
m2_features.py
Normal file
@ -0,0 +1,286 @@
|
||||
import akshare as ak
|
||||
from io import StringIO
|
||||
from bson.json_util import dumps
|
||||
import json
|
||||
import os
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
X_Length = 30
|
||||
|
||||
pd.set_option("display.width", 1000)
|
||||
pd.set_option("display.max_rows", None)
|
||||
pd.set_option("display.max_columns", None)
|
||||
pd.set_option("display.max_rows", None)
|
||||
pd.set_option("display.max_colwidth", 1000)
|
||||
|
||||
|
||||
def calc_pre_high_list(df):
|
||||
# 往前追溯3个高点,如果只有一个, 则另外两个和第一个相等。
|
||||
# df["high"] = df["high"].astype(float)
|
||||
ph = []
|
||||
pl = []
|
||||
for i in range(len(df)):
|
||||
h = df.iloc[i]["high"]
|
||||
l = df.iloc[i]["low"]
|
||||
if len(ph) < 1:
|
||||
ph.append(h)
|
||||
pl.append(l)
|
||||
df.iloc[i]["ph"] = h
|
||||
df.iloc[i]["pl"] = l
|
||||
continue
|
||||
|
||||
ma20 = df.iloc[i]["ma20"]
|
||||
ma60 = df.iloc[i]["ma60"]
|
||||
if ma20 > ma60:
|
||||
r = all(v > h for v in ph)
|
||||
if not r:
|
||||
pre_v = -1
|
||||
for v in reversed(ph):
|
||||
if v > h:
|
||||
pre_v = v
|
||||
if pre_v > 0:
|
||||
ph.append(pre_v)
|
||||
else:
|
||||
ph.append(h)
|
||||
else:
|
||||
r = all(v < h for v in pl)
|
||||
if not r:
|
||||
pre_v = -1
|
||||
for v in reversed(pl):
|
||||
if v < h:
|
||||
pre_v = v
|
||||
if pre_v > 0:
|
||||
ph.append(pre_v)
|
||||
else:
|
||||
ph.append(l)
|
||||
|
||||
df.iloc[i]["ph"] = ph[:-1]
|
||||
df.iloc[i]["pl"] = pl[:-1]
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def calc_pre_high_list2(df):
|
||||
# 往前追溯3个高点,如果只有一个, 则另外两个和第一个相等。
|
||||
# df["high"] = df["high"].astype(float)
|
||||
ph = []
|
||||
pl = []
|
||||
h_i = 0
|
||||
l_i = 0
|
||||
for i in range(len(df)):
|
||||
h = df.iloc[i]["high"]
|
||||
l = df.iloc[i]["low"]
|
||||
if len(ph) < 1:
|
||||
ph.append(h)
|
||||
pl.append(l)
|
||||
df.iloc[i]["ph"] = h
|
||||
df.iloc[i]["pl"] = l
|
||||
continue
|
||||
c = df.iloc[i]["close"]
|
||||
ma20 = df.iloc[i]["ma20"]
|
||||
ma60 = df.iloc[i]["ma60"]
|
||||
if c > ma20:
|
||||
r = all(v > h for v in ph)
|
||||
if not r:
|
||||
pre_v = -1
|
||||
for v in reversed(ph):
|
||||
if v > h:
|
||||
pre_v = v
|
||||
if pre_v > 0:
|
||||
ph.append(pre_v)
|
||||
else:
|
||||
ph.append(h)
|
||||
else:
|
||||
r = all(v < h for v in pl)
|
||||
if not r:
|
||||
pre_v = -1
|
||||
for v in reversed(pl):
|
||||
if v < h:
|
||||
pre_v = v
|
||||
if pre_v > 0:
|
||||
ph.append(pre_v)
|
||||
else:
|
||||
ph.append(l)
|
||||
|
||||
df.iloc[i]["ph"] = ph[:-1]
|
||||
df.iloc[i]["pl"] = pl[:-1]
|
||||
|
||||
return df
|
||||
|
||||
def kdj_window(df, window=160, m1=60, m2=60, low="low", high="high", close="close"):
|
||||
low_list = df[low].rolling(window).min()
|
||||
low_list.fillna(value=df[low].expanding().min(), inplace=True)
|
||||
high_list = df[high].rolling(window).max()
|
||||
high_list.fillna(value=df[high].expanding().max(), inplace=True)
|
||||
|
||||
rsv = (df[close] - low_list) / (high_list - low_list) * 100
|
||||
df['k' + str(window)] = rsv.ewm(alpha=1 / m1, adjust=False).mean()
|
||||
df['d' + str(window)] = df['k9'].ewm(alpha=1 / m2, adjust=False).mean()
|
||||
df['j' + str(window)] = 3 * df['k9'] - 2 * df['d9']
|
||||
|
||||
|
||||
def kdj4(df, low="low", high="high", close="close"):
|
||||
low_list = df[low].rolling(window=4).min()
|
||||
low_list.fillna(value=df[low].expanding().min(), inplace=True)
|
||||
high_list = df[high].rolling(window=4).max()
|
||||
high_list.fillna(value=df[high].expanding().max(), inplace=True)
|
||||
|
||||
rsv = (df[close] - low_list) / (high_list - low_list) * 100
|
||||
df['k4'] = rsv.ewm(com=3).mean()
|
||||
df['d4'] = df['k4'].ewm(com=3).mean()
|
||||
df['j4'] = 3 * df['k4'] - 2 * df['d4']
|
||||
|
||||
|
||||
def kdj160(df, low="low", high="high", close="close"):
|
||||
low_list = df[low].rolling(window=160).min()
|
||||
low_list.fillna(value=df[low].expanding().min(), inplace=True)
|
||||
high_list = df[high].rolling(window=160).max()
|
||||
high_list.fillna(value=df[high].expanding().max(), inplace=True)
|
||||
|
||||
rsv = (df[close] - low_list) / (high_list - low_list) * 100
|
||||
df['k160'] = rsv.ewm(com=60).mean()
|
||||
df['d160'] = df['k160'].ewm(com=60).mean()
|
||||
df['j160'] = 3 * df['k160'] - 2 * df['d160']
|
||||
|
||||
|
||||
def kdj9(df, low="low", high="high", close="close"):
|
||||
low_list = df[low].rolling(window=9).min()
|
||||
low_list.fillna(value=df[low].expanding().min(), inplace=True)
|
||||
high_list = df[high].rolling(window=9).max()
|
||||
high_list.fillna(value=df[high].expanding().max(), inplace=True)
|
||||
|
||||
rsv = (df[close] - low_list) / (high_list - low_list) * 100
|
||||
df['k9'] = rsv.ewm(com=3).mean()
|
||||
df['d9'] = df['k9'].ewm(com=3).mean()
|
||||
df['j9'] = 3 * df['k9'] - 2 * df['d9']
|
||||
|
||||
|
||||
def kdj45(df, low="low", high="high", close="close"):
|
||||
low_list = df[low].rolling(window=45).min()
|
||||
low_list.fillna(value=df[low].expanding().min(), inplace=True)
|
||||
high_list = df[high].rolling(window=45).max()
|
||||
high_list.fillna(value=df[high].expanding().max(), inplace=True)
|
||||
|
||||
rsv = (df[close] - low_list) / (high_list - low_list) * 100
|
||||
df['k45'] = rsv.ewm(com=15).mean()
|
||||
df['d45'] = df['k45'].ewm(com=15).mean()
|
||||
df['j45'] = 3 * df['k45'] - 2 * df['d45']
|
||||
|
||||
|
||||
def ma(df, close="close"):
|
||||
df['ma5'] = df[close].rolling(window=5).mean().dropna()
|
||||
df['ma20'] = df[close].rolling(window=20).mean().dropna()
|
||||
df["ma60"] = df[close].rolling(window=60).mean().dropna()
|
||||
# df['ma1000'] = df[close].rolling(window=1000).mean().dropna()
|
||||
|
||||
|
||||
|
||||
# def zdf(data):
|
||||
# if data.shape[0] % 16 != 0:
|
||||
# print("error================================> not 16 ")
|
||||
|
||||
# zdf = pd.DataFrame()
|
||||
# zdf["zdf"] = -1
|
||||
# for i in range(int(data.shape[0] / 16)-1):
|
||||
# s = int(i * 16 + 16)
|
||||
# e = int(s + 16)
|
||||
# print(e)
|
||||
# # print(e)
|
||||
# p1 = data[s-1:s]["close"].values[0]
|
||||
# p2 = data[e-1:e]["close"].values[0]
|
||||
# print(p1)
|
||||
# print(p2)
|
||||
# zdf[e-1:e]["zdf"] = p2 / p1
|
||||
# # break
|
||||
# print(zdf["zdf"])
|
||||
# data["zdf"] = zdf["zdf"]
|
||||
|
||||
# return data
|
||||
|
||||
def get_features(data):
|
||||
lines = data.shape[0]
|
||||
if lines < 120:
|
||||
return None
|
||||
|
||||
ma(data)
|
||||
# data = zdf(data)
|
||||
data = data.loc[60:, :]
|
||||
# data = data.copy()
|
||||
# data.reset_index(drop=True)
|
||||
|
||||
# data = calc_pre_high_list(data)
|
||||
dataset = pd.DataFrame()
|
||||
|
||||
close = data.iloc[0:1]["close"].values[0]
|
||||
volume = data.iloc[0:1]["volume"].values[0]
|
||||
# print(volume)
|
||||
dataset["date"] = data["date"]
|
||||
|
||||
# dataset["high"] = data["high"].astype(float) / close
|
||||
# dataset["low"] = data["low"].astype(float) / close
|
||||
# dataset["open"] = data["open"].astype(float) / close
|
||||
# dataset["close"] = data["close"].astype(float) / close
|
||||
|
||||
# dataset["ma5"] = data["ma5"].astype(float) / close
|
||||
# dataset["ma20"] = data["ma20"].astype(float) / close
|
||||
# dataset["ma60"] = data["ma60"].astype(float) / close
|
||||
|
||||
# dataset["volume"] = data["volume"].astype(float) / volume
|
||||
|
||||
|
||||
dataset["zdf"] = data["close"].astype(float) / data["preclose"]
|
||||
|
||||
|
||||
dataset["high"] = data["high"].astype(float)
|
||||
dataset["low"] = data["low"].astype(float)
|
||||
dataset["open"] = data["open"].astype(float)
|
||||
dataset["close"] = data["close"].astype(float)
|
||||
|
||||
dataset["ma5"] = data["ma5"].astype(float)
|
||||
dataset["ma20"] = data["ma20"].astype(float)
|
||||
dataset["ma60"] = data["ma60"].astype(float)
|
||||
dataset["volume"] = data["volume"].astype(float)
|
||||
|
||||
|
||||
|
||||
|
||||
if all(dataset['zdf'] >= 0.7) and all(dataset['zdf'] < 1.3):
|
||||
return dataset
|
||||
|
||||
return None
|
||||
|
||||
# null replace to 0
|
||||
# dataset = dataset.replace(np.nan, 0)
|
||||
# dataset = dataset.replace(np.inf, 0)
|
||||
# dataset = dataset.fillna(0)
|
||||
|
||||
# return dataset
|
||||
|
||||
|
||||
|
||||
def process_features(in_dir,out_dir):
|
||||
# codes = pd.read_csv("./datasets/all_codes.csv")
|
||||
# codes = codes["code"].tolist()
|
||||
|
||||
# codes = ["sz.300001"]
|
||||
file_dir = in_dir # "./datasets/origins"
|
||||
|
||||
a_s = [a for a in sorted(os.listdir(file_dir), key=lambda x:str(x[5:]))]
|
||||
for a in a_s:
|
||||
# if a.startswith("sz.30"):
|
||||
# continue
|
||||
file = os.path.join(file_dir,a)
|
||||
df = pd.read_csv(file)
|
||||
df = get_features(df)
|
||||
if df is not None:
|
||||
# file = "./datasets/features/"+ a
|
||||
file = out_dir + a
|
||||
df.to_csv(file, index=False)
|
||||
print(a)
|
||||
# break
|
||||
|
||||
process_features(in_dir="./datasets/cleaned",out_dir="./datasets/features/")
|
||||
|
||||
# process_features(in_dir="./02stocks/vit15minutes/datasets/origins",
|
||||
# out_dir="./02stocks/vit15minutes/datasets/features/")
|
192
m3_loaddatasets.py
Normal file
192
m3_loaddatasets.py
Normal file
@ -0,0 +1,192 @@
|
||||
import pandas as pd
|
||||
import torch
|
||||
import os
|
||||
import math
|
||||
from torch.utils.data import Dataset
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
max_y = 0
|
||||
min_y = 99999
|
||||
|
||||
const_y_days = 5
|
||||
|
||||
|
||||
def y_feature_extract(y1):
|
||||
y = y1["zdf"].values[0]
|
||||
y = y - 1 + 0.3
|
||||
y = int(y * 1000) # [0, 4500] num_classes = 4500
|
||||
|
||||
return y
|
||||
|
||||
def y_feature_extract_1_0(y1):
|
||||
y = y1["zdf"].max()
|
||||
if y > 1.1:
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
def feature_extract_x_7(x1):
|
||||
|
||||
close = x1[0:1]["close"].values[0]
|
||||
volume = x1[0:1]["volume"].values[0]
|
||||
|
||||
# print(x)
|
||||
# print(x.shape)
|
||||
factor = 1
|
||||
x = pd.DataFrame()
|
||||
x["high"] = x1["high"] / close * factor
|
||||
x["low"] = x1["low"] / close * factor
|
||||
x["open"] = x1["open"] / close * factor
|
||||
x["close"] = x1["close"] / close * factor
|
||||
|
||||
x["ma5"] = x1["ma5"] / close * factor
|
||||
x["ma20"] = x1["ma20"] / close * factor
|
||||
x["ma60"] = x1["ma60"] / close * factor
|
||||
|
||||
# x["volume"] = x1["volume"] / volume * factor
|
||||
|
||||
return x
|
||||
|
||||
|
||||
def feature_extract_x_2(x1):
|
||||
close = x1[0:1]["close"].values[0]
|
||||
volume = x1[0:1]["volume"].values[0]
|
||||
|
||||
# print(x)
|
||||
# print(x.shape)
|
||||
factor = 10
|
||||
x = pd.DataFrame()
|
||||
x["high"] = (x1["high"] - x1["close"]) / close * factor
|
||||
x["low"] = (x1["low"] - x1["close"]) / close * factor
|
||||
x["open"] = (x1["open"] - x1["close"]) / close * factor
|
||||
# x["close"] = (x1["close"] - x["close"]) / close * factor
|
||||
x["zdf"] = x1["zdf"] - 1
|
||||
|
||||
x["ma5"] = (x1["ma5"] - x1["close"]) / close * factor
|
||||
x["ma20"] = (x1["ma20"] - x1["close"]) / close * factor
|
||||
x["ma60"] = (x1["ma60"] - x1["close"]) / close * factor
|
||||
|
||||
x["volume"] = x1["volume"] / volume * factor
|
||||
|
||||
return x
|
||||
|
||||
def feature_extract_x(x1):
|
||||
|
||||
close = x1[0:1]["close"].values[0]
|
||||
volume = x1[0:1]["volume"].values[0]
|
||||
|
||||
# print(x)
|
||||
# print(x.shape)
|
||||
factor = 10
|
||||
x = pd.DataFrame()
|
||||
x["high"] = x1["high"] / close * factor
|
||||
x["low"] = x1["low"] / close * factor
|
||||
x["open"] = x1["open"] / close * factor
|
||||
x["close"] = x1["close"] / close * factor
|
||||
|
||||
x["ma5"] = x1["ma5"] / close * factor
|
||||
x["ma20"] = x1["ma20"] / close * factor
|
||||
x["ma60"] = x1["ma60"] / close * factor
|
||||
|
||||
x["volume"] = x1["volume"] / volume * factor
|
||||
|
||||
return x
|
||||
|
||||
|
||||
def features(x1, y1):
|
||||
y = y_feature_extract_1_0(y1)
|
||||
# if y >= 600 or y <= 0:
|
||||
# print("error--------------------y > 600")
|
||||
# print(y)
|
||||
# print(y1)
|
||||
|
||||
x = feature_extract_x_2(x1)
|
||||
|
||||
return x, [y]
|
||||
|
||||
|
||||
class DatasetsDay(Dataset):
|
||||
def __init__(self, df, day_length=40):
|
||||
self.df = df
|
||||
self.day_length = day_length
|
||||
|
||||
def __len__(self):
|
||||
d = self.df.shape[0]
|
||||
d = d - self.day_length - const_y_days
|
||||
return 0 if d < 0 else d
|
||||
|
||||
def __getitem__(self, i):
|
||||
a = i
|
||||
e = a + self.day_length
|
||||
|
||||
if e >= self.df.shape[0]:
|
||||
print(
|
||||
"error================================================datasets_loader?"
|
||||
)
|
||||
return torch.ones(0, 0), torch.ones(0, 0)
|
||||
|
||||
x1 = self.df.iloc[a:e, :]
|
||||
y1 = self.df.iloc[e : e + const_y_days, :]
|
||||
# print(x)
|
||||
# print(y)
|
||||
############################################
|
||||
x, y = features(x1, y1)
|
||||
############################################
|
||||
#
|
||||
x = torch.Tensor(x.values)
|
||||
# x = torch.unsqueeze(x, dim=0)
|
||||
# print(x.shape)
|
||||
# print("-----------------------x.shape")
|
||||
|
||||
y = torch.LongTensor(y)
|
||||
y = torch.squeeze(y)
|
||||
|
||||
return x, y
|
||||
|
||||
def get_last_item(self):
|
||||
s = self.df.shape[0] - self.day_length
|
||||
e = self.df.shape[0]
|
||||
x1 = self.df.iloc[s:e, :]
|
||||
x = feature_extract_x(x1)
|
||||
x = torch.Tensor(x.values)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
def test(features_dir="./datasets/features1/"):
|
||||
seq_length = 50
|
||||
files = [a for a in sorted(os.listdir(features_dir), key=lambda x: str(x[5:]))]
|
||||
for file in files:
|
||||
print(file)
|
||||
d = pd.read_csv(os.path.join(features_dir, file))
|
||||
a = DatasetsDay(d, day_length=seq_length)
|
||||
# print(len(a))
|
||||
|
||||
global max_y
|
||||
global min_y
|
||||
max_y = 0
|
||||
min_y = 99999
|
||||
|
||||
loader = DataLoader(a, batch_size=1,shuffle=True)
|
||||
for step, (x, y) in enumerate(loader):
|
||||
|
||||
|
||||
|
||||
# print(step)
|
||||
print(x.shape)
|
||||
print(y.shape)
|
||||
# print("-----------------------x.shape")
|
||||
# print(x)
|
||||
# print(y)
|
||||
# print(x.shape)
|
||||
# print(y.shape)
|
||||
break
|
||||
pass
|
||||
print(max_y)
|
||||
print(min_y)
|
||||
|
||||
|
||||
# files = [print(a) for a in sorted(os.listdir("./datasets/features"), key=lambda x: x[5:])]
|
||||
|
||||
# test(features_dir="./02stocks/vit15minutes/datasets/features")
|
||||
# test(features_dir="./datasets/features")
|
0
m4_embedding.py
Normal file
0
m4_embedding.py
Normal file
15
m5_position.py
Normal file
15
m5_position.py
Normal file
@ -0,0 +1,15 @@
|
||||
|
||||
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import datetime
|
||||
import pandas as pd
|
||||
import akshare as ak
|
||||
|
||||
|
||||
|
||||
def day_zt_xiao_k(df):
|
||||
shape = df.shape
|
||||
|
||||
|
133
m6_vit.py
Normal file
133
m6_vit.py
Normal file
@ -0,0 +1,133 @@
|
||||
from torch import nn
|
||||
from einops.layers.torch import Rearrange
|
||||
from torch import Tensor
|
||||
import torch
|
||||
from einops import repeat
|
||||
|
||||
|
||||
class Attention(nn.Module):
|
||||
def __init__(self, dim, n_heads, dropout):
|
||||
super().__init__()
|
||||
self.n_heads = n_heads
|
||||
self.att = torch.nn.MultiheadAttention(
|
||||
embed_dim=dim, num_heads=n_heads, dropout=dropout
|
||||
)
|
||||
self.q = torch.nn.Linear(dim, dim)
|
||||
self.k = torch.nn.Linear(dim, dim)
|
||||
self.v = torch.nn.Linear(dim, dim)
|
||||
|
||||
def forward(self, x):
|
||||
q = self.q(x)
|
||||
k = self.k(x)
|
||||
v = self.v(x)
|
||||
attn_output, attn_output_weights = self.att(x, x, x)
|
||||
return attn_output
|
||||
|
||||
|
||||
class PreNorm(nn.Module):
|
||||
def __init__(self, dim, fn):
|
||||
super().__init__()
|
||||
self.norm = nn.LayerNorm(dim)
|
||||
self.fn = fn
|
||||
|
||||
def forward(self, x, **kwargs):
|
||||
return self.fn(self.norm(x), **kwargs)
|
||||
|
||||
|
||||
class FeedForward(nn.Sequential):
|
||||
def __init__(self, dim, hidden_dim, dropout=0.0):
|
||||
super().__init__(
|
||||
nn.Linear(dim, hidden_dim),
|
||||
nn.GELU(),
|
||||
nn.Dropout(dropout),
|
||||
nn.Linear(hidden_dim, dim),
|
||||
nn.Dropout(dropout),
|
||||
)
|
||||
|
||||
|
||||
ff = FeedForward(dim=128, hidden_dim=256)
|
||||
ff(torch.ones((1, 5, 128))).shape
|
||||
|
||||
|
||||
class ResidualAdd(nn.Module):
|
||||
def __init__(self, fn):
|
||||
super().__init__()
|
||||
self.fn = fn
|
||||
|
||||
def forward(self, x, **kwargs):
|
||||
res = x
|
||||
x = self.fn(x, **kwargs)
|
||||
x += res
|
||||
return x
|
||||
|
||||
|
||||
class ViT3D(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
features_len=8,
|
||||
seq_len=20,
|
||||
# img_size=144,
|
||||
# patch_size=4,
|
||||
emb_dim=512,
|
||||
n_layers=24,
|
||||
num_classes=600,
|
||||
dropout=0.1,
|
||||
heads=8,
|
||||
):
|
||||
super(ViT3D, self).__init__()
|
||||
|
||||
# self.feature_embedding = nn.Sequential(
|
||||
# # break-down the image in s1 x s2 patches and flat them
|
||||
# # Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=patch_size, p2=patch_size),
|
||||
# # nn.Linear(patch_size * patch_size * in_channels, emb_size)
|
||||
# nn.Linear(features_len, emb_dim)
|
||||
# )
|
||||
self.feature_embedding = nn.Linear(features_len, emb_dim)
|
||||
|
||||
self.pos_embedding = nn.Parameter(torch.randn(1, seq_len + 1, emb_dim))
|
||||
self.cls_token = nn.Parameter(torch.rand(1, 1, emb_dim))
|
||||
|
||||
# Transformer Encoder
|
||||
self.n_layers = n_layers
|
||||
self.layers = nn.ModuleList([])
|
||||
for _ in range(n_layers):
|
||||
transformer_block = nn.Sequential(
|
||||
ResidualAdd(
|
||||
PreNorm(emb_dim, Attention(emb_dim, n_heads=heads, dropout=dropout))
|
||||
),
|
||||
ResidualAdd(
|
||||
PreNorm(emb_dim, FeedForward(emb_dim, emb_dim, dropout=dropout))
|
||||
),
|
||||
)
|
||||
self.layers.append(transformer_block)
|
||||
|
||||
# Classification head
|
||||
self.head = nn.Sequential(
|
||||
nn.LayerNorm(emb_dim),
|
||||
nn.Linear(emb_dim, num_classes)
|
||||
)
|
||||
self.softmax = nn.Softmax(dim=1)
|
||||
|
||||
def forward(self, x):
|
||||
# Get patch embedding vectors
|
||||
x = self.feature_embedding(x)
|
||||
b, n, _ = x.shape
|
||||
|
||||
# Add cls token to inputs
|
||||
cls_tokens = repeat(self.cls_token, "1 1 d -> b 1 d", b=b)
|
||||
x = torch.cat([cls_tokens, x], dim=1)
|
||||
x += self.pos_embedding[:, : (n + 1)]
|
||||
|
||||
# Transformer layers
|
||||
for i in range(self.n_layers):
|
||||
x = self.layers[i](x)
|
||||
|
||||
# Output based on classification token
|
||||
x = self.head(x[:, 0, :])
|
||||
x = self.softmax(x)
|
||||
return x
|
||||
|
||||
|
||||
# model = ViT3D()
|
||||
# print(model)
|
||||
# print(model(torch.ones((1,50,8))))
|
153
m6_vit_1d.py
Normal file
153
m6_vit_1d.py
Normal file
@ -0,0 +1,153 @@
|
||||
import torch
|
||||
from torch import nn
|
||||
|
||||
from einops import rearrange, repeat, pack, unpack
|
||||
from einops.layers.torch import Rearrange
|
||||
|
||||
# classes
|
||||
|
||||
|
||||
class FeedForward(nn.Module):
|
||||
def __init__(self, dim, hidden_dim, dropout=0.0):
|
||||
super().__init__()
|
||||
self.net = nn.Sequential(
|
||||
nn.LayerNorm(dim),
|
||||
nn.Linear(dim, hidden_dim),
|
||||
nn.GELU(),
|
||||
nn.Dropout(dropout),
|
||||
nn.Linear(hidden_dim, dim),
|
||||
nn.Dropout(dropout),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.net(x)
|
||||
|
||||
|
||||
class Attention(nn.Module):
|
||||
def __init__(self, dim, heads=8, dim_head=64, dropout=0.0):
|
||||
super().__init__()
|
||||
inner_dim = dim_head * heads
|
||||
project_out = not (heads == 1 and dim_head == dim)
|
||||
|
||||
self.heads = heads
|
||||
self.scale = dim_head**-0.5
|
||||
|
||||
self.norm = nn.LayerNorm(dim)
|
||||
self.attend = nn.Softmax(dim=-1)
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
|
||||
self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False)
|
||||
|
||||
self.to_out = (
|
||||
nn.Sequential(nn.Linear(inner_dim, dim), nn.Dropout(dropout))
|
||||
if project_out
|
||||
else nn.Identity()
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.norm(x)
|
||||
qkv = self.to_qkv(x).chunk(3, dim=-1)
|
||||
q, k, v = map(lambda t: rearrange(t, "b n (h d) -> b h n d", h=self.heads), qkv)
|
||||
|
||||
dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale
|
||||
|
||||
attn = self.attend(dots)
|
||||
attn = self.dropout(attn)
|
||||
|
||||
out = torch.matmul(attn, v)
|
||||
out = rearrange(out, "b h n d -> b n (h d)")
|
||||
return self.to_out(out)
|
||||
|
||||
|
||||
class Transformer(nn.Module):
|
||||
def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout=0.0):
|
||||
super().__init__()
|
||||
self.layers = nn.ModuleList([])
|
||||
for _ in range(depth):
|
||||
self.layers.append(
|
||||
nn.ModuleList(
|
||||
[
|
||||
Attention(dim, heads=heads, dim_head=dim_head, dropout=dropout),
|
||||
FeedForward(dim, mlp_dim, dropout=dropout),
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
for attn, ff in self.layers:
|
||||
x = attn(x) + x
|
||||
x = ff(x) + x
|
||||
return x
|
||||
|
||||
|
||||
class ViT1D(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
seq_len,
|
||||
patch_size,
|
||||
num_classes,
|
||||
dim,
|
||||
depth,
|
||||
heads,
|
||||
mlp_dim,
|
||||
channels=3,
|
||||
dim_head=64,
|
||||
dropout=0.0,
|
||||
emb_dropout=0.0
|
||||
):
|
||||
super().__init__()
|
||||
assert (seq_len % patch_size) == 0
|
||||
|
||||
num_patches = seq_len // patch_size
|
||||
patch_dim = channels * patch_size
|
||||
|
||||
self.to_patch_embedding = nn.Sequential(
|
||||
Rearrange("b c (n p) -> b n (p c)", p=patch_size),
|
||||
nn.LayerNorm(patch_dim),
|
||||
nn.Linear(patch_dim, dim),
|
||||
nn.LayerNorm(dim),
|
||||
)
|
||||
|
||||
self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, dim))
|
||||
self.cls_token = nn.Parameter(torch.randn(dim))
|
||||
self.dropout = nn.Dropout(emb_dropout)
|
||||
|
||||
self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout)
|
||||
|
||||
self.mlp_head = nn.Sequential(nn.LayerNorm(dim), nn.Linear(dim, num_classes))
|
||||
|
||||
def forward(self, series):
|
||||
x = self.to_patch_embedding(series)
|
||||
b, n, _ = x.shape
|
||||
|
||||
cls_tokens = repeat(self.cls_token, "d -> b d", b=b)
|
||||
|
||||
x, ps = pack([cls_tokens, x], "b * d")
|
||||
|
||||
x += self.pos_embedding[:, : (n + 1)]
|
||||
x = self.dropout(x)
|
||||
|
||||
x = self.transformer(x)
|
||||
|
||||
cls_tokens, _ = unpack(x, ps, "b * d")
|
||||
|
||||
return self.mlp_head(cls_tokens)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
v = ViT1D(
|
||||
seq_len=256,
|
||||
patch_size=16, # 这里也就是特征的数量。
|
||||
num_classes=1000,
|
||||
dim=1024,
|
||||
depth=6,
|
||||
heads=8,
|
||||
mlp_dim=2048,
|
||||
dropout=0.1,
|
||||
emb_dropout=0.1,
|
||||
)
|
||||
|
||||
time_series = torch.randn(4, 3, 256)
|
||||
logits = v(time_series) # (4, 1000)
|
107
m7_train.py
Normal file
107
m7_train.py
Normal file
@ -0,0 +1,107 @@
|
||||
from torch.utils.data import DataLoader
|
||||
from torch.utils.data import random_split
|
||||
from torch import nn
|
||||
from einops.layers.torch import Rearrange
|
||||
from torch import Tensor
|
||||
import torch.optim as optim
|
||||
import numpy as np
|
||||
import torch
|
||||
import os
|
||||
import pandas as pd
|
||||
from m6_vit import ViT3D
|
||||
from m3_loaddatasets import DatasetsDay
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
|
||||
torch.set_printoptions(threshold=float("inf"))
|
||||
|
||||
################################################################################
|
||||
seq_length = 30
|
||||
features_len=8
|
||||
device = "cuda"
|
||||
|
||||
model = ViT3D(
|
||||
seq_len=seq_length,
|
||||
features_len=features_len,
|
||||
num_classes=2
|
||||
).to(device)
|
||||
|
||||
optimizer = optim.AdamW(model.parameters(), lr=1e-2)
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
|
||||
optimizer, factor=0.1, patience=3, verbose=True
|
||||
) # .to(device)
|
||||
################################################################################
|
||||
features_files = "./datasets/features"
|
||||
# features_files = "./02stocks/vitday/datasets/feature1"
|
||||
|
||||
files = [ os.path.join(features_files, a) for a in sorted(os.listdir(features_files),
|
||||
key=lambda x: (x[4:])) ]
|
||||
################################################################################
|
||||
|
||||
################################################################################
|
||||
for epoch in range(1000):
|
||||
model.train()
|
||||
################################################################################
|
||||
for file in files:
|
||||
print(file)
|
||||
epoch_losses = []
|
||||
d = pd.read_csv(file)
|
||||
a = DatasetsDay(d,day_length=seq_length)
|
||||
loader = DataLoader(a, batch_size=128, shuffle=False)
|
||||
for step, (x, y) in enumerate(loader):
|
||||
x, y = x.to(device), y.to(device)
|
||||
# print(x)
|
||||
# print(f">>> Y: ", y)
|
||||
|
||||
optimizer.zero_grad()
|
||||
outputs = model(x)
|
||||
# print(f">>> outputs:", outputs.shape)
|
||||
loss = criterion(outputs, y)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
epoch_losses.append(loss.item())
|
||||
print(loss.item())
|
||||
#################################################################
|
||||
#################################################################
|
||||
# print(outputs)
|
||||
# o = torch.argmax(outputs,dim=1)
|
||||
# if torch.equal(y,o):
|
||||
# print(f"succeedsssssssssssssssssssssssss->",o)
|
||||
# else:
|
||||
# print(f"eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee:",y,o)
|
||||
# p1 = outputs[0:1][:]
|
||||
# p2 = outputs[1:1][:]
|
||||
# p1 = p1.squeeze()
|
||||
# p2 = p2.squeeze()
|
||||
# print(p1)
|
||||
# plt.plot(p1.detach().cpu().numpy())
|
||||
# plt.plot(p2.detach().cpu().numpy())
|
||||
# plt.show()
|
||||
#################################################################
|
||||
#################################################################
|
||||
# break
|
||||
|
||||
mean_loss = np.mean(epoch_losses)
|
||||
print(f">>> Epoch {epoch} train loss: ", mean_loss)
|
||||
# epoch_losses = []
|
||||
# # Something was strange when using this?
|
||||
# # model.eval()
|
||||
# for step, (inputs, labels) in enumerate(test_dataloader):
|
||||
# inputs, labels = inputs.to(device), labels.to(device)
|
||||
# outputs = model(inputs)
|
||||
# loss = criterion(outputs, labels)
|
||||
# epoch_losses.append(loss.item())
|
||||
|
||||
# print(f">>> Epoch {epoch} test loss: ", np.mean(epoch_losses))
|
||||
|
||||
# if epoch % 100 == 0:
|
||||
# # models_save = os.path.join(
|
||||
# # models_path, "./{}-{}-{}.pt".format(m_name, epoch, mean_loss)
|
||||
# # )
|
||||
scheduler.step(mean_loss)
|
||||
torch.save(model,"./model.pt")
|
||||
|
||||
# break
|
||||
# break
|
40
m8_evaluate.py
Normal file
40
m8_evaluate.py
Normal file
@ -0,0 +1,40 @@
|
||||
import torch
|
||||
import os
|
||||
from torch.utils.data import DataLoader
|
||||
import pandas as pd
|
||||
from m3_loaddatasets import DatasetsDay
|
||||
import numpy as np
|
||||
from m6_vit import Vit3D
|
||||
|
||||
|
||||
np.set_printoptions(threshold=10000)
|
||||
################################################################################
|
||||
seq_length = 50
|
||||
device = "cuda"
|
||||
model = torch.load("./model1.pt").to(device)
|
||||
################################################################################
|
||||
features_files = "./datasets/feature1"
|
||||
# features_files = "./02stocks/vitday/datasets/feature1"
|
||||
files = [ os.path.join(features_files, a) for a in sorted(os.listdir(features_files),
|
||||
key=lambda x: (x[4:])) ]
|
||||
################################################################################
|
||||
model.eval()
|
||||
################################################################################
|
||||
for epoch in range(1000):
|
||||
################################################################################
|
||||
for file in files:
|
||||
print(file)
|
||||
d = pd.read_csv(file)
|
||||
a = DatasetsDay(d,day_length=seq_length)
|
||||
loader = DataLoader(a, batch_size=1, shuffle=True)
|
||||
for step, (x, y) in enumerate(loader):
|
||||
x, y = x.to(device), y.to(device)
|
||||
print(f">>> Y: ", y)
|
||||
outputs = model(x)
|
||||
print(f">>> outputs:", outputs)
|
||||
print(torch.argmax(outputs,dim=1))
|
||||
break
|
||||
break
|
||||
break
|
||||
|
||||
|
39
m9_infer.py
Normal file
39
m9_infer.py
Normal file
@ -0,0 +1,39 @@
|
||||
import torch
|
||||
from torch.utils.data import DataLoader
|
||||
from torch.utils.data import random_split
|
||||
from torch import nn
|
||||
from einops.layers.torch import Rearrange
|
||||
from torch import Tensor
|
||||
import torch.optim as optim
|
||||
import numpy as np
|
||||
import torch
|
||||
import os
|
||||
import pandas as pd
|
||||
from m3_loaddatasets import DatasetsDay
|
||||
from m6_vit import Vit3D
|
||||
|
||||
#############################################################################
|
||||
seq_length = 50
|
||||
features_len=8
|
||||
device = "cuda"
|
||||
m = torch.load("./model1.pt").to(device)
|
||||
#############################################################################
|
||||
features_files = "./datasets/features1"
|
||||
files = [ os.path.join(features_files, a) for a in sorted(os.listdir(features_files),
|
||||
key=lambda x: (x[4:])) ]
|
||||
|
||||
zdfs = []
|
||||
for file in files:
|
||||
print(file)
|
||||
epoch_losses = []
|
||||
|
||||
d = pd.read_csv(file)
|
||||
a = DatasetsDay(d,day_length=seq_length)
|
||||
x = DatasetsDay.get_last_item()
|
||||
x = x.to(device)
|
||||
outputs = m(x)
|
||||
cls = outputs.argmax()
|
||||
zdf = cls / 1000 - 0.3 + 1
|
||||
print(zdf)
|
||||
zdfs.append(zdf)
|
||||
|
33
t1_create_sample.py
Normal file
33
t1_create_sample.py
Normal file
@ -0,0 +1,33 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
|
||||
def create_1():
|
||||
features_dir="./datasets/features"
|
||||
files = [a for a in sorted(os.listdir(features_dir), key=lambda x: str(x[5:]))]
|
||||
for file in files:
|
||||
print(file)
|
||||
d = pd.read_csv(os.path.join(features_dir, file))
|
||||
|
||||
a = pd.DataFrame()
|
||||
a = d.iloc[0:51][:]
|
||||
|
||||
p = os.path.join(features_dir, "test"+file)
|
||||
a.to_csv(p)
|
||||
|
||||
|
||||
|
||||
# create_1()
|
||||
|
||||
|
||||
def create_2():
|
||||
code = "sz.300001.csv"
|
||||
file="./datasets/features/" + code
|
||||
d = pd.read_csv(file)
|
||||
|
||||
a = pd.DataFrame()
|
||||
a = d.iloc[0:52][:]
|
||||
|
||||
p = os.path.join("./datasets/feature1", code)
|
||||
a.to_csv(p)
|
||||
|
||||
create_2()
|
0
z-predict.py
Normal file
0
z-predict.py
Normal file
179
zutils.py
Normal file
179
zutils.py
Normal file
@ -0,0 +1,179 @@
|
||||
import pandas as pd
|
||||
from bson.json_util import dumps
|
||||
from io import StringIO
|
||||
import json
|
||||
from datetime import datetime
|
||||
import time
|
||||
|
||||
|
||||
def download_to_column(df):
|
||||
##################################################################
|
||||
# df = stock_hfq_df
|
||||
# aa = df.loc[:, "收盘"]
|
||||
# df["前收盘"] = aa.shift()
|
||||
# df.at[0, "前收盘"] = df.at[0, "收盘"]
|
||||
#
|
||||
# tt = df.loc[:, "换手率"]
|
||||
# df["前换手率"] = tt.shift()
|
||||
# df.at[0, "前换手率"] = df.at[0, "换手率"]
|
||||
|
||||
# df["换手率"] = df["换手率"].astype(float)
|
||||
# df["前换手率"] = df["换手率"].pct_change()
|
||||
# df["收盘"] = df["收盘"].astype(float)
|
||||
# df["前收盘"] = df["收盘"].pct_change()
|
||||
##################################################################
|
||||
stock = pd.DataFrame()
|
||||
# stock["d"] = time.strftime("%Y-%m-%d", time.localtime(df.loc[:, "日期"]))
|
||||
stock["dateT"] = df.loc[:, "日期"].astype(str)
|
||||
stock["open"] = df.loc[:, "开盘"].astype(float)
|
||||
stock["close"] = df.loc[:, "收盘"].astype(float)
|
||||
# stock["pre_close"] = df.loc[:, "前收盘"]
|
||||
stock["high"] = df.loc[:, "最高"].astype(float)
|
||||
stock["low"] = df.loc[:, "最低"].astype(float)
|
||||
stock["turnover"] = df.loc[:, "换手率"].astype(float)
|
||||
# stock['pre_turnover'] = df.loc[:, "前换手率"]
|
||||
stock["zf"] = df.loc[:, "振幅"].astype(float)
|
||||
stock["zdf"] = df.loc[:, "涨跌幅"].astype(float)
|
||||
# # kdj9(stock)
|
||||
# # kdj45(stock)
|
||||
# # ma(stock)
|
||||
# data = stock.replace(np.nan, 0)
|
||||
# data = data.replace(np.inf, 0)
|
||||
# data = data.fillna(0)
|
||||
|
||||
return stock
|
||||
|
||||
|
||||
def jsonl_2_dp(jsonl):
|
||||
json_data = dumps(jsonl, indent=2)
|
||||
pd_stocks = pd.read_json(StringIO(json_data))
|
||||
return pd_stocks
|
||||
|
||||
|
||||
def dp_2_jsonl(dataset):
|
||||
docs = dataset.to_json(orient="records")
|
||||
docs = json.loads(docs)
|
||||
# collection.insert_many(docs)
|
||||
return docs
|
||||
|
||||
|
||||
def timestamp2time(timestamp=1707769336):
|
||||
dateArray = datetime.datetime.fromtimestamp(timestamp)
|
||||
# otherStyleTime = dateArray.strftime("%Y-%m-%d %H:%M:%S")
|
||||
otherStyleTime = dateArray.strftime("%Y-%m-%d")
|
||||
print(otherStyleTime)
|
||||
return otherStyleTime
|
||||
|
||||
|
||||
def time2timestamp(t="2024-02-13 04:22:16"):
|
||||
# 字符类型的时间
|
||||
# 转为时间数组
|
||||
timeArray = time.strptime(t, "%Y-%m-%d %H:%M:%S")
|
||||
print(timeArray)
|
||||
# timeArray可以调用tm_year等
|
||||
# 转为时间戳
|
||||
timeStamp = int(time.mktime(timeArray))
|
||||
print(timeStamp)
|
||||
return timeStamp
|
||||
|
||||
|
||||
######################################################
|
||||
######################################################
|
||||
######################################################
|
||||
def data_clean(df):
|
||||
return
|
||||
|
||||
######################################################
|
||||
######################################################
|
||||
def trend_high_price(df):
|
||||
# 往前追溯3个高点,如果只有一个, 则另外两个和第一个相等。
|
||||
# df["high"] = df["high"].astype(float)
|
||||
ph = []
|
||||
pl = []
|
||||
for i in range(len(df)):
|
||||
h = df.iloc[i]["high"]
|
||||
l = df.iloc[i]["low"]
|
||||
if len(ph) < 1:
|
||||
ph.append(h)
|
||||
pl.append(l)
|
||||
df.iloc[i]["ph"] = h
|
||||
df.iloc[i]["pl"] = l
|
||||
continue
|
||||
|
||||
ma20 = df.iloc[i]["ma20"]
|
||||
ma60 = df.iloc[i]["ma60"]
|
||||
if ma20 > ma60:
|
||||
r = all(v > h for v in ph)
|
||||
if not r:
|
||||
pre_v = -1
|
||||
for v in reversed(ph):
|
||||
if v > h:
|
||||
pre_v = v
|
||||
if pre_v > 0:
|
||||
ph.append(pre_v)
|
||||
else:
|
||||
ph.append(h)
|
||||
else:
|
||||
r = all(v < h for v in pl)
|
||||
if not r:
|
||||
pre_v = -1
|
||||
for v in reversed(pl):
|
||||
if v < h:
|
||||
pre_v = v
|
||||
if pre_v > 0:
|
||||
ph.append(pre_v)
|
||||
else:
|
||||
ph.append(l)
|
||||
|
||||
df.iloc[i]["ph"] = ph[:-1]
|
||||
df.iloc[i]["pl"] = pl[:-1]
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def trend_high_price2(df):
|
||||
# 往前追溯3个高点,如果只有一个, 则另外两个和第一个相等。
|
||||
# df["high"] = df["high"].astype(float)
|
||||
ph = []
|
||||
pl = []
|
||||
h_i = 0
|
||||
l_i = 0
|
||||
for i in range(len(df)):
|
||||
h = df.iloc[i]["high"]
|
||||
l = df.iloc[i]["low"]
|
||||
if len(ph) < 1:
|
||||
ph.append(h)
|
||||
pl.append(l)
|
||||
df.iloc[i]["ph"] = h
|
||||
df.iloc[i]["pl"] = l
|
||||
continue
|
||||
c = df.iloc[i]["close"]
|
||||
ma20 = df.iloc[i]["ma20"]
|
||||
ma60 = df.iloc[i]["ma60"]
|
||||
if c > ma20:
|
||||
r = all(v > h for v in ph)
|
||||
if not r:
|
||||
pre_v = -1
|
||||
for v in reversed(ph):
|
||||
if v > h:
|
||||
pre_v = v
|
||||
if pre_v > 0:
|
||||
ph.append(pre_v)
|
||||
else:
|
||||
ph.append(h)
|
||||
else:
|
||||
r = all(v < h for v in pl)
|
||||
if not r:
|
||||
pre_v = -1
|
||||
for v in reversed(pl):
|
||||
if v < h:
|
||||
pre_v = v
|
||||
if pre_v > 0:
|
||||
ph.append(pre_v)
|
||||
else:
|
||||
ph.append(l)
|
||||
|
||||
df.iloc[i]["ph"] = ph[:-1]
|
||||
df.iloc[i]["pl"] = pl[:-1]
|
||||
|
||||
return df
|
Loading…
Reference in New Issue
Block a user