From 652f2378e8df0bce4cfc47f7b23d870ad02dbc2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=9C=E9=B9=8F=E9=A3=9E?= <pengfeidu@163.com>
Date: Tue, 6 Aug 2024 11:08:31 +0800
Subject: [PATCH] Add files via upload

---
 0_csvpath.py        |  61 ++++++++++
 0utils.py           |  87 ++++++++++++++
 __init__.py         |   0
 m0_download.py      | 120 +++++++++++++++++++
 m10_predict.py      |   4 +
 m1_data_clean.py    |  44 +++++++
 m2_features.py      | 286 ++++++++++++++++++++++++++++++++++++++++++++
 m3_loaddatasets.py  | 192 +++++++++++++++++++++++++++++
 m4_embedding.py     |   0
 m5_position.py      |  15 +++
 m6_vit.py           | 133 ++++++++++++++++++++
 m6_vit_1d.py        | 153 ++++++++++++++++++++++++
 m7_train.py         | 107 +++++++++++++++++
 m8_evaluate.py      |  40 +++++++
 m9_infer.py         |  39 ++++++
 t1_create_sample.py |  33 +++++
 z-predict.py        |   0
 zutils.py           | 179 +++++++++++++++++++++++++++
 18 files changed, 1493 insertions(+)
 create mode 100644 0_csvpath.py
 create mode 100644 0utils.py
 create mode 100644 __init__.py
 create mode 100644 m0_download.py
 create mode 100644 m10_predict.py
 create mode 100644 m1_data_clean.py
 create mode 100644 m2_features.py
 create mode 100644 m3_loaddatasets.py
 create mode 100644 m4_embedding.py
 create mode 100644 m5_position.py
 create mode 100644 m6_vit.py
 create mode 100644 m6_vit_1d.py
 create mode 100644 m7_train.py
 create mode 100644 m8_evaluate.py
 create mode 100644 m9_infer.py
 create mode 100644 t1_create_sample.py
 create mode 100644 z-predict.py
 create mode 100644 zutils.py

diff --git a/0_csvpath.py b/0_csvpath.py
new file mode 100644
index 0000000..afc16a6
--- /dev/null
+++ b/0_csvpath.py
@@ -0,0 +1,61 @@
+import os
+
+# base_dir = os.path.join(os.path.dirname(__file__), "../datasets")
+
+
+def csv_paths(dataset_path=""):
+    qfq_weekly = os.path.join(dataset_path, "weekly_qfq")
+    qfq_daily = os.path.join(dataset_path, "daily_qfq")
+    hfq_weekly = os.path.join(dataset_path, "weekly_hfq")
+    hfq_daily = os.path.join(dataset_path, "daily_hfq")
+
+    features_weekly = os.path.join(dataset_path, "weekly_features")
+    features_daily = os.path.join(dataset_path, "daily_features")
+
+    templates_weekly = os.path.join(dataset_path, "weekly_templates")
+    templates_daily = os.path.join(dataset_path, "daily_templates")
+
+    results_weekly = os.path.join(dataset_path, "weekly_results")
+    results_daily = os.path.join(dataset_path, "daily_results")
+
+    if not os.path.exists(qfq_daily):
+        os.makedirs(qfq_daily)
+    if not os.path.exists(qfq_weekly):
+        os.makedirs(qfq_weekly)
+
+    if not os.path.exists(hfq_weekly):
+        os.makedirs(hfq_weekly)
+    if not os.path.exists(hfq_daily):
+        os.makedirs(hfq_daily)
+
+    if not os.path.exists(features_weekly):
+        os.makedirs(features_weekly)
+    if not os.path.exists(features_daily):
+        os.makedirs(features_daily)
+
+    if not os.path.exists(templates_weekly):
+        os.makedirs(templates_weekly)
+    if not os.path.exists(templates_daily):
+        os.makedirs(templates_daily)
+
+    if not os.path.exists(results_weekly):
+        os.makedirs(results_weekly)
+    if not os.path.exists(results_daily):
+        os.makedirs(results_daily)
+
+    week = {
+        "qfq": qfq_weekly,
+        "hfq": hfq_weekly,
+        "features": features_weekly,
+        "templates": templates_weekly,
+        "results": results_weekly
+    }
+    daily = {
+        "qfq": qfq_daily,
+        "hfq": hfq_daily,
+        "features": features_daily,
+        "templates": templates_daily,
+        "results": results_daily
+    }
+    all_codes_path = os.path.join(dataset_path, "0all.csv")
+    return all_codes_path, week, daily
diff --git a/0utils.py b/0utils.py
new file mode 100644
index 0000000..c4db582
--- /dev/null
+++ b/0utils.py
@@ -0,0 +1,87 @@
+import pandas as pd
+from bson.json_util import dumps
+from io import StringIO
+import json
+from datetime import datetime
+import time
+
+
+def download_to_column(df):
+    ##################################################################
+    # df = stock_hfq_df
+    # aa = df.loc[:, "收盘"]
+    # df["前收盘"] = aa.shift()
+    # df.at[0, "前收盘"] = df.at[0, "收盘"]
+    #
+    # tt = df.loc[:, "换手率"]
+    # df["前换手率"] = tt.shift()
+    # df.at[0, "前换手率"] = df.at[0, "换手率"]
+
+    # df["换手率"] = df["换手率"].astype(float)
+    # df["前换手率"] = df["换手率"].pct_change()
+    # df["收盘"] = df["收盘"].astype(float)
+    # df["前收盘"] = df["收盘"].pct_change()
+    ##################################################################
+    stock = pd.DataFrame()
+    # stock["d"] = time.strftime("%Y-%m-%d", time.localtime(df.loc[:, "日期"]))
+    stock["dateT"] = df.loc[:, "日期"].astype(str)
+    stock["open"] = df.loc[:, "开盘"].astype(float)
+    stock["close"] = df.loc[:, "收盘"].astype(float)
+    # stock["pre_close"] = df.loc[:, "前收盘"]
+    stock["high"] = df.loc[:, "最高"].astype(float)
+    stock["low"] = df.loc[:, "最低"].astype(float)
+    stock["turnover"] = df.loc[:, "换手率"].astype(float)
+    # stock['pre_turnover'] = df.loc[:, "前换手率"]
+    stock["zf"] = df.loc[:, "振幅"].astype(float)
+    stock["zdf"] = df.loc[:, "涨跌幅"].astype(float)
+    # # kdj9(stock)
+    # # kdj45(stock)
+    # # ma(stock)
+    # data = stock.replace(np.nan, 0)
+    # data = data.replace(np.inf, 0)
+    # data = data.fillna(0)
+
+    return stock
+
+
+def jsonl_2_dp(jsonl):
+    json_data = dumps(jsonl, indent=2)
+    pd_stocks = pd.read_json(StringIO(json_data))
+    return pd_stocks
+
+
+def dp_2_jsonl(dataset):
+    docs = dataset.to_json(orient="records")
+    docs = json.loads(docs)
+    # collection.insert_many(docs)
+    return docs
+
+
+def timestamp2time(timestamp=1707769336):
+    dateArray = datetime.datetime.fromtimestamp(timestamp)
+    # otherStyleTime = dateArray.strftime("%Y-%m-%d %H:%M:%S")
+    otherStyleTime = dateArray.strftime("%Y-%m-%d")
+    print(otherStyleTime)
+    return otherStyleTime
+
+
+def time2timestamp(t="2024-02-13 04:22:16"):
+    # 字符类型的时间
+    # 转为时间数组
+    timeArray = time.strptime(t, "%Y-%m-%d %H:%M:%S")
+    print(timeArray)
+    # timeArray可以调用tm_year等
+    # 转为时间戳
+    timeStamp = int(time.mktime(timeArray))
+    print(timeStamp)
+    return timeStamp
+
+
+######################################################
+######################################################
+######################################################
+def data_clean(df):
+    return
+
+######################################################
+######################################################
\ No newline at end of file
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/m0_download.py b/m0_download.py
new file mode 100644
index 0000000..91ba8f8
--- /dev/null
+++ b/m0_download.py
@@ -0,0 +1,120 @@
+import baostock as bs
+import pandas as pd
+
+#### 登陆系统 ####
+# lg = bs.login()
+# 显示登陆返回信息
+# print("login respond error_code:" + lg.error_code)
+# print("login respond  error_msg:" + lg.error_msg)
+
+
+
+
+def get_all_codes(save_path="./datasets"):
+    # s = bs.query_all_stock()
+    # s_df = s.get_data()
+    # s_df.to_csv("./datasets/all_codes.csv", encoding="utf-8", index=False)
+    #### 登陆系统 ####
+    lg = bs.login()
+    # 显示登陆返回信息
+    print("login respond error_code:" + lg.error_code)
+    print("login respond  error_msg:" + lg.error_msg)
+
+    #### 获取证券信息 ####
+    rs = bs.query_all_stock(day="2024-06-20")
+    print("query_all_stock respond error_code:" + rs.error_code)
+    print("query_all_stock respond  error_msg:" + rs.error_msg)
+
+    #### 打印结果集 ####
+    data_list = []
+    while (rs.error_code == "0") & rs.next():
+        # 获取一条记录，将记录合并在一起
+        data_list.append(rs.get_row_data())
+    result = pd.DataFrame(data_list, columns=rs.fields)
+
+    #### 结果集输出到csv文件 ####
+    result.to_csv(save_path+"/all_codes.csv", encoding="utf-8", index=False)
+    print(result)
+
+    #### 登出系统 ####
+    bs.logout()
+
+
+def download_code_hist(
+    save_path="./datasets/origins",
+    code="sh.600000",
+    start_date="1999-09-01",
+    end_date="2024-06-30",
+    freq="d",
+    adjustflag="1"
+):
+
+    #### 获取沪深A股历史K线数据 ####
+    # 详细指标参数，参见“历史行情指标参数”章节；“分钟线”参数与“日线”参数不同。“分钟线”不包含指数。
+    # 分钟线指标：date,time,code,open,high,low,close,volume,amount,adjustflag
+    # 周月线指标：date,code,open,high,low,close,volume,amount,adjustflag,turn,pctChg
+    fields = "date,time,code,open,high,low,close,volume,adjustflag",
+    if freq == "d":
+        fields = "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,isST"
+    rs = bs.query_history_k_data_plus(
+        code,
+        # "date,time,code,open,high,low,close,volume,adjustflag",
+        # "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,isST",
+        fields=fields,
+        start_date=start_date,
+        end_date=end_date,
+        frequency=freq,
+        adjustflag=adjustflag, # hfq
+    )
+
+    #### 打印结果集 ####
+    data_list = []
+    while (rs.error_code == "0") & rs.next():
+        # 获取一条记录，将记录合并在一起
+        data_list.append(rs.get_row_data())
+    result = pd.DataFrame(data_list, columns=rs.fields)
+    
+    # print(result)
+
+    #### 结果集输出到csv文件 ####
+    filename = save_path + "/" + code + ".csv"
+    result.to_csv(filename, index=True)
+    # print(result)
+
+
+def download_codes(save_path="./datasets/origins", filters=[],freq="d",adjustflag="1"):
+    codes = pd.read_csv("./datasets/all_codes.csv")
+    codes = codes["code"].tolist()
+
+    lg = bs.login()
+    # 显示登陆返回信息
+    print("login respond error_code:" + lg.error_code)
+    print("login respond  error_msg:" + lg.error_msg)
+
+    for code in codes:
+        ignore = True
+        for f in filters:
+            if code.startswith(f):
+                ignore = False
+        if ignore:
+            continue
+
+        print(code)
+        # if code < "sz.300138":
+        #     continue
+        
+        download_code_hist(save_path=save_path ,code=code,freq=freq,adjustflag=adjustflag)
+    ### 登出系统 ####
+    bs.logout()
+
+
+# get_all_codes(save_path="./datasets")
+download_codes(
+    save_path="./datasets/cyday",
+    filters=["sh.68","sz.3"],
+    # filters=["sz.301"]
+    freq="d",
+    adjustflag="1" # hfq
+    )
+
+
diff --git a/m10_predict.py b/m10_predict.py
new file mode 100644
index 0000000..4621a49
--- /dev/null
+++ b/m10_predict.py
@@ -0,0 +1,4 @@
+import torch
+
+model = torch.load("./model.pt")
+
diff --git a/m1_data_clean.py b/m1_data_clean.py
new file mode 100644
index 0000000..ace1c7b
--- /dev/null
+++ b/m1_data_clean.py
@@ -0,0 +1,44 @@
+import pandas as pd
+import torch
+import os
+import numpy as np
+
+
+def clean_volume_0(df):
+
+    # null replace to 0
+    df = df.replace(np.nan, 0)
+    df = df.replace(np.inf, 0)
+    df = df.fillna(0)
+
+    df["volume"] = df["volume"].astype(int)
+
+    index = df.loc[df["volume"] == 0].index
+    df = df.drop(df.index[index])
+
+    index = df.loc[df["close"] == 0].index
+    df = df.drop(df.index[index])
+    df.reset_index(drop=True)
+
+    return df
+
+
+def process_features(in_dir, out_dir):
+    # codes = pd.read_csv("./datasets/all_codes.csv")
+    # codes = codes["code"].tolist()
+
+    # codes = ["sz.300001"]
+    file_dir = in_dir  # "./datasets/origins"
+
+    a_s = [a for a in sorted(os.listdir(file_dir), key=lambda x: str(x[5:]))]
+    for a in a_s:
+        file = os.path.join(file_dir, a)
+        df = pd.read_csv(file)
+
+        df = clean_volume_0(df)
+
+        df.to_csv(os.path.join(out_dir, a))
+        print(a)
+
+
+process_features(in_dir="./datasets/cyday", out_dir="./datasets/cleaned/")
diff --git a/m2_features.py b/m2_features.py
new file mode 100644
index 0000000..29070b2
--- /dev/null
+++ b/m2_features.py
@@ -0,0 +1,286 @@
+import akshare as ak
+from io import StringIO
+from bson.json_util import dumps
+import json
+import os
+import numpy as np
+import pandas as pd
+
+X_Length = 30
+
+pd.set_option("display.width", 1000)
+pd.set_option("display.max_rows", None)
+pd.set_option("display.max_columns", None)
+pd.set_option("display.max_rows", None)
+pd.set_option("display.max_colwidth", 1000)
+
+
+def calc_pre_high_list(df):
+    #  往前追溯3个高点，如果只有一个， 则另外两个和第一个相等。
+    # df["high"] = df["high"].astype(float)
+    ph = []
+    pl = []
+    for i in range(len(df)):
+        h = df.iloc[i]["high"]
+        l = df.iloc[i]["low"]
+        if len(ph) < 1:
+            ph.append(h)
+            pl.append(l)
+            df.iloc[i]["ph"] = h
+            df.iloc[i]["pl"] = l
+            continue
+
+        ma20 = df.iloc[i]["ma20"]
+        ma60 = df.iloc[i]["ma60"]
+        if ma20 > ma60:
+            r = all(v > h for v in ph)
+            if not r:
+                pre_v = -1
+                for v in reversed(ph):
+                    if v > h:
+                        pre_v = v
+                if pre_v > 0:
+                    ph.append(pre_v)
+                else:
+                    ph.append(h)
+        else:
+            r = all(v < h for v in pl)
+            if not r:
+                pre_v = -1
+                for v in reversed(pl):
+                    if v < h:
+                        pre_v = v
+                if pre_v > 0:
+                    ph.append(pre_v)
+                else:
+                    ph.append(l)
+
+        df.iloc[i]["ph"] = ph[:-1]
+        df.iloc[i]["pl"] = pl[:-1]
+
+    return df
+
+
+def calc_pre_high_list2(df):
+    #  往前追溯3个高点，如果只有一个， 则另外两个和第一个相等。
+    # df["high"] = df["high"].astype(float)
+    ph = []
+    pl = []
+    h_i = 0
+    l_i = 0
+    for i in range(len(df)):
+        h = df.iloc[i]["high"]
+        l = df.iloc[i]["low"]
+        if len(ph) < 1:
+            ph.append(h)
+            pl.append(l)
+            df.iloc[i]["ph"] = h
+            df.iloc[i]["pl"] = l
+            continue
+        c = df.iloc[i]["close"]
+        ma20 = df.iloc[i]["ma20"]
+        ma60 = df.iloc[i]["ma60"]
+        if c > ma20:
+            r = all(v > h for v in ph)
+            if not r:
+                pre_v = -1
+                for v in reversed(ph):
+                    if v > h:
+                        pre_v = v
+                if pre_v > 0:
+                    ph.append(pre_v)
+                else:
+                    ph.append(h)
+        else:
+            r = all(v < h for v in pl)
+            if not r:
+                pre_v = -1
+                for v in reversed(pl):
+                    if v < h:
+                        pre_v = v
+                if pre_v > 0:
+                    ph.append(pre_v)
+                else:
+                    ph.append(l)
+
+        df.iloc[i]["ph"] = ph[:-1]
+        df.iloc[i]["pl"] = pl[:-1]
+
+    return df
+
+def kdj_window(df, window=160, m1=60, m2=60, low="low", high="high", close="close"):
+    low_list = df[low].rolling(window).min()
+    low_list.fillna(value=df[low].expanding().min(), inplace=True)
+    high_list = df[high].rolling(window).max()
+    high_list.fillna(value=df[high].expanding().max(), inplace=True)
+
+    rsv = (df[close] - low_list) / (high_list - low_list) * 100
+    df['k' + str(window)] = rsv.ewm(alpha=1 / m1, adjust=False).mean()
+    df['d' + str(window)] = df['k9'].ewm(alpha=1 / m2, adjust=False).mean()
+    df['j' + str(window)] = 3 * df['k9'] - 2 * df['d9']
+
+
+def kdj4(df, low="low", high="high", close="close"):
+    low_list = df[low].rolling(window=4).min()
+    low_list.fillna(value=df[low].expanding().min(), inplace=True)
+    high_list = df[high].rolling(window=4).max()
+    high_list.fillna(value=df[high].expanding().max(), inplace=True)
+
+    rsv = (df[close] - low_list) / (high_list - low_list) * 100
+    df['k4'] = rsv.ewm(com=3).mean()
+    df['d4'] = df['k4'].ewm(com=3).mean()
+    df['j4'] = 3 * df['k4'] - 2 * df['d4']
+
+
+def kdj160(df, low="low", high="high", close="close"):
+    low_list = df[low].rolling(window=160).min()
+    low_list.fillna(value=df[low].expanding().min(), inplace=True)
+    high_list = df[high].rolling(window=160).max()
+    high_list.fillna(value=df[high].expanding().max(), inplace=True)
+
+    rsv = (df[close] - low_list) / (high_list - low_list) * 100
+    df['k160'] = rsv.ewm(com=60).mean()
+    df['d160'] = df['k160'].ewm(com=60).mean()
+    df['j160'] = 3 * df['k160'] - 2 * df['d160']
+
+
+def kdj9(df, low="low", high="high", close="close"):
+    low_list = df[low].rolling(window=9).min()
+    low_list.fillna(value=df[low].expanding().min(), inplace=True)
+    high_list = df[high].rolling(window=9).max()
+    high_list.fillna(value=df[high].expanding().max(), inplace=True)
+
+    rsv = (df[close] - low_list) / (high_list - low_list) * 100
+    df['k9'] = rsv.ewm(com=3).mean()
+    df['d9'] = df['k9'].ewm(com=3).mean()
+    df['j9'] = 3 * df['k9'] - 2 * df['d9']
+
+
+def kdj45(df, low="low", high="high", close="close"):
+    low_list = df[low].rolling(window=45).min()
+    low_list.fillna(value=df[low].expanding().min(), inplace=True)
+    high_list = df[high].rolling(window=45).max()
+    high_list.fillna(value=df[high].expanding().max(), inplace=True)
+
+    rsv = (df[close] - low_list) / (high_list - low_list) * 100
+    df['k45'] = rsv.ewm(com=15).mean()
+    df['d45'] = df['k45'].ewm(com=15).mean()
+    df['j45'] = 3 * df['k45'] - 2 * df['d45']
+
+
+def ma(df, close="close"):
+    df['ma5'] = df[close].rolling(window=5).mean().dropna()
+    df['ma20'] = df[close].rolling(window=20).mean().dropna()
+    df["ma60"] = df[close].rolling(window=60).mean().dropna()
+    # df['ma1000'] = df[close].rolling(window=1000).mean().dropna()
+
+
+
+# def zdf(data):
+#     if data.shape[0] % 16 != 0:
+#         print("error================================> not 16 ")
+        
+#     zdf = pd.DataFrame()
+#     zdf["zdf"] = -1    
+#     for i in range(int(data.shape[0] / 16)-1):
+#         s = int(i * 16 + 16)
+#         e = int(s + 16)
+#         print(e)
+#         # print(e)
+#         p1 = data[s-1:s]["close"].values[0]
+#         p2 = data[e-1:e]["close"].values[0]
+#         print(p1)
+#         print(p2)
+#         zdf[e-1:e]["zdf"] = p2 / p1
+#         # break
+#     print(zdf["zdf"])
+#     data["zdf"] = zdf["zdf"]
+    
+#     return data
+
+def get_features(data):    
+    lines = data.shape[0]
+    if lines < 120:
+        return None
+        
+    ma(data)
+    # data = zdf(data)
+    data = data.loc[60:, :]
+    # data = data.copy()    
+    # data.reset_index(drop=True)
+    
+    # data = calc_pre_high_list(data)
+    dataset = pd.DataFrame()
+    
+    close = data.iloc[0:1]["close"].values[0]
+    volume = data.iloc[0:1]["volume"].values[0]
+    # print(volume)
+    dataset["date"] = data["date"]        
+    
+    # dataset["high"] = data["high"].astype(float) / close
+    # dataset["low"] = data["low"].astype(float) / close
+    # dataset["open"] = data["open"].astype(float) / close
+    # dataset["close"] = data["close"].astype(float) / close
+    
+    # dataset["ma5"] = data["ma5"].astype(float) / close
+    # dataset["ma20"] = data["ma20"].astype(float) / close
+    # dataset["ma60"] = data["ma60"].astype(float) / close
+    
+    # dataset["volume"] = data["volume"].astype(float) / volume
+    
+    
+    dataset["zdf"] = data["close"].astype(float) / data["preclose"]
+    
+    
+    dataset["high"] = data["high"].astype(float) 
+    dataset["low"] = data["low"].astype(float) 
+    dataset["open"] = data["open"].astype(float) 
+    dataset["close"] = data["close"].astype(float)
+    
+    dataset["ma5"] = data["ma5"].astype(float) 
+    dataset["ma20"] = data["ma20"].astype(float)
+    dataset["ma60"] = data["ma60"].astype(float)
+    dataset["volume"] = data["volume"].astype(float) 
+    
+    
+    
+    
+    if all(dataset['zdf'] >= 0.7) and all(dataset['zdf'] < 1.3):
+        return dataset
+    
+    return None
+    
+    # null replace to 0
+    # dataset = dataset.replace(np.nan, 0)
+    # dataset = dataset.replace(np.inf, 0)
+    # dataset = dataset.fillna(0)
+    
+    # return dataset
+
+
+
+def process_features(in_dir,out_dir):
+    # codes = pd.read_csv("./datasets/all_codes.csv")
+    # codes = codes["code"].tolist()
+    
+    # codes = ["sz.300001"]
+    file_dir = in_dir # "./datasets/origins"
+    
+    a_s = [a for a in sorted(os.listdir(file_dir), key=lambda x:str(x[5:]))]
+    for a in a_s:    
+        # if a.startswith("sz.30"):
+        #     continue
+        file  = os.path.join(file_dir,a)        
+        df = pd.read_csv(file)
+        df = get_features(df)
+        if df is not None:
+            # file = "./datasets/features/"+ a
+            file = out_dir + a
+            df.to_csv(file, index=False)
+            print(a)
+        # break
+    
+process_features(in_dir="./datasets/cleaned",out_dir="./datasets/features/")
+
+# process_features(in_dir="./02stocks/vit15minutes/datasets/origins",
+#                  out_dir="./02stocks/vit15minutes/datasets/features/")
\ No newline at end of file
diff --git a/m3_loaddatasets.py b/m3_loaddatasets.py
new file mode 100644
index 0000000..6a9b3a3
--- /dev/null
+++ b/m3_loaddatasets.py
@@ -0,0 +1,192 @@
+import pandas as pd
+import torch
+import os
+import math
+from torch.utils.data import Dataset
+from torch.utils.data import DataLoader
+
+max_y = 0
+min_y = 99999
+
+const_y_days = 5
+
+
+def y_feature_extract(y1):
+    y = y1["zdf"].values[0]
+    y = y - 1 + 0.3
+    y = int(y * 1000)  # [0, 4500] num_classes = 4500
+    
+    return y
+
+def y_feature_extract_1_0(y1):
+    y = y1["zdf"].max()
+    if y > 1.1:
+        return 1
+    else:
+        return 0
+
+def feature_extract_x_7(x1):
+
+    close = x1[0:1]["close"].values[0]
+    volume = x1[0:1]["volume"].values[0]
+
+    # print(x)
+    # print(x.shape)
+    factor = 1
+    x = pd.DataFrame()
+    x["high"] = x1["high"] / close * factor
+    x["low"] = x1["low"] / close * factor
+    x["open"] = x1["open"] / close * factor
+    x["close"] = x1["close"] / close * factor
+
+    x["ma5"] = x1["ma5"] / close * factor
+    x["ma20"] = x1["ma20"] / close * factor
+    x["ma60"] = x1["ma60"] / close * factor
+
+    # x["volume"] = x1["volume"] / volume * factor
+
+    return x
+
+
+def feature_extract_x_2(x1):
+    close = x1[0:1]["close"].values[0]
+    volume = x1[0:1]["volume"].values[0]
+
+    # print(x)
+    # print(x.shape)
+    factor = 10
+    x = pd.DataFrame()
+    x["high"] = (x1["high"] - x1["close"]) / close * factor
+    x["low"] = (x1["low"] - x1["close"]) / close * factor
+    x["open"] = (x1["open"] - x1["close"]) / close * factor
+    # x["close"] = (x1["close"] - x["close"]) / close * factor
+    x["zdf"] = x1["zdf"] - 1
+
+    x["ma5"] = (x1["ma5"] - x1["close"]) / close * factor
+    x["ma20"] = (x1["ma20"] - x1["close"]) / close * factor
+    x["ma60"] = (x1["ma60"] - x1["close"]) / close * factor
+
+    x["volume"] = x1["volume"] / volume * factor
+
+    return x
+
+def feature_extract_x(x1):
+
+    close = x1[0:1]["close"].values[0]
+    volume = x1[0:1]["volume"].values[0]
+
+    # print(x)
+    # print(x.shape)
+    factor = 10
+    x = pd.DataFrame()
+    x["high"] = x1["high"] / close * factor
+    x["low"] = x1["low"] / close * factor
+    x["open"] = x1["open"] / close * factor
+    x["close"] = x1["close"] / close * factor
+
+    x["ma5"] = x1["ma5"] / close * factor
+    x["ma20"] = x1["ma20"] / close * factor
+    x["ma60"] = x1["ma60"] / close * factor
+
+    x["volume"] = x1["volume"] / volume * factor
+
+    return x
+
+
+def features(x1, y1):
+    y = y_feature_extract_1_0(y1)
+    # if y >= 600 or y <= 0:
+    #     print("error--------------------y > 600")
+    #     print(y)
+    #     print(y1)
+        
+    x = feature_extract_x_2(x1)
+
+    return x, [y]
+
+
+class DatasetsDay(Dataset):
+    def __init__(self, df, day_length=40):
+        self.df = df
+        self.day_length = day_length
+
+    def __len__(self):
+        d = self.df.shape[0]
+        d = d - self.day_length - const_y_days
+        return 0 if d < 0 else d
+
+    def __getitem__(self, i):
+        a = i
+        e = a + self.day_length
+
+        if e >= self.df.shape[0]:
+            print(
+                "error================================================datasets_loader?"
+            )
+            return torch.ones(0, 0), torch.ones(0, 0)
+
+        x1 = self.df.iloc[a:e, :]
+        y1 = self.df.iloc[e : e + const_y_days, :]
+        # print(x)
+        # print(y)
+        ############################################
+        x, y = features(x1, y1)
+        ############################################
+        #
+        x = torch.Tensor(x.values)
+        # x = torch.unsqueeze(x, dim=0)
+        # print(x.shape)
+        # print("-----------------------x.shape")
+
+        y = torch.LongTensor(y)
+        y = torch.squeeze(y)
+
+        return x, y
+
+    def get_last_item(self):
+        s = self.df.shape[0] - self.day_length
+        e = self.df.shape[0]
+        x1 = self.df.iloc[s:e, :]
+        x = feature_extract_x(x1)
+        x = torch.Tensor(x.values)
+
+        return x
+
+
+def test(features_dir="./datasets/features1/"):
+    seq_length = 50
+    files = [a for a in sorted(os.listdir(features_dir), key=lambda x: str(x[5:]))]
+    for file in files:
+        print(file)
+        d = pd.read_csv(os.path.join(features_dir, file))
+        a = DatasetsDay(d, day_length=seq_length)
+        # print(len(a))
+
+        global max_y
+        global min_y
+        max_y = 0
+        min_y = 99999
+
+        loader = DataLoader(a, batch_size=1,shuffle=True)
+        for step, (x, y) in enumerate(loader):
+            
+            
+            
+            # print(step)
+            print(x.shape)
+            print(y.shape)
+            # print("-----------------------x.shape")
+            # print(x)
+            # print(y)
+            # print(x.shape)
+            # print(y.shape)
+            break
+            pass
+        print(max_y)
+        print(min_y)
+
+
+# files = [print(a) for a in sorted(os.listdir("./datasets/features"), key=lambda x: x[5:])]
+
+# test(features_dir="./02stocks/vit15minutes/datasets/features")
+# test(features_dir="./datasets/features")
diff --git a/m4_embedding.py b/m4_embedding.py
new file mode 100644
index 0000000..e69de29
diff --git a/m5_position.py b/m5_position.py
new file mode 100644
index 0000000..ff51f75
--- /dev/null
+++ b/m5_position.py
@@ -0,0 +1,15 @@
+
+
+import json
+import os
+import time
+import datetime
+import pandas as pd
+import akshare as ak
+
+
+
+def day_zt_xiao_k(df):
+    shape = df.shape
+    
+
diff --git a/m6_vit.py b/m6_vit.py
new file mode 100644
index 0000000..70f7e0a
--- /dev/null
+++ b/m6_vit.py
@@ -0,0 +1,133 @@
+from torch import nn
+from einops.layers.torch import Rearrange
+from torch import Tensor
+import torch
+from einops import repeat
+
+
+class Attention(nn.Module):
+    def __init__(self, dim, n_heads, dropout):
+        super().__init__()
+        self.n_heads = n_heads
+        self.att = torch.nn.MultiheadAttention(
+            embed_dim=dim, num_heads=n_heads, dropout=dropout
+        )
+        self.q = torch.nn.Linear(dim, dim)
+        self.k = torch.nn.Linear(dim, dim)
+        self.v = torch.nn.Linear(dim, dim)
+
+    def forward(self, x):
+        q = self.q(x)
+        k = self.k(x)
+        v = self.v(x)
+        attn_output, attn_output_weights = self.att(x, x, x)
+        return attn_output
+
+
+class PreNorm(nn.Module):
+    def __init__(self, dim, fn):
+        super().__init__()
+        self.norm = nn.LayerNorm(dim)
+        self.fn = fn
+
+    def forward(self, x, **kwargs):
+        return self.fn(self.norm(x), **kwargs)
+
+
+class FeedForward(nn.Sequential):
+    def __init__(self, dim, hidden_dim, dropout=0.0):
+        super().__init__(
+            nn.Linear(dim, hidden_dim),
+            nn.GELU(),
+            nn.Dropout(dropout),
+            nn.Linear(hidden_dim, dim),
+            nn.Dropout(dropout),
+        )
+
+
+ff = FeedForward(dim=128, hidden_dim=256)
+ff(torch.ones((1, 5, 128))).shape
+
+
+class ResidualAdd(nn.Module):
+    def __init__(self, fn):
+        super().__init__()
+        self.fn = fn
+
+    def forward(self, x, **kwargs):
+        res = x
+        x = self.fn(x, **kwargs)
+        x += res
+        return x
+
+
+class ViT3D(nn.Module):
+    def __init__(
+        self,
+        features_len=8,
+        seq_len=20,
+        # img_size=144,
+        # patch_size=4,
+        emb_dim=512,
+        n_layers=24,
+        num_classes=600,
+        dropout=0.1,
+        heads=8,
+    ):
+        super(ViT3D, self).__init__()
+
+        # self.feature_embedding = nn.Sequential(
+        #     # break-down the image in s1 x s2 patches and flat them
+        #     # Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=patch_size, p2=patch_size),
+        #     # nn.Linear(patch_size * patch_size * in_channels, emb_size)
+        #     nn.Linear(features_len, emb_dim)
+        # )
+        self.feature_embedding = nn.Linear(features_len, emb_dim)
+
+        self.pos_embedding = nn.Parameter(torch.randn(1, seq_len + 1, emb_dim))
+        self.cls_token = nn.Parameter(torch.rand(1, 1, emb_dim))
+
+        # Transformer Encoder
+        self.n_layers = n_layers
+        self.layers = nn.ModuleList([])
+        for _ in range(n_layers):
+            transformer_block = nn.Sequential(
+                ResidualAdd(
+                    PreNorm(emb_dim, Attention(emb_dim, n_heads=heads, dropout=dropout))
+                ),
+                ResidualAdd(
+                    PreNorm(emb_dim, FeedForward(emb_dim, emb_dim, dropout=dropout))
+                ),
+            )
+            self.layers.append(transformer_block)
+
+        # Classification head
+        self.head = nn.Sequential(
+            nn.LayerNorm(emb_dim), 
+            nn.Linear(emb_dim, num_classes)
+        )
+        self.softmax = nn.Softmax(dim=1)
+
+    def forward(self, x):
+        # Get patch embedding vectors
+        x = self.feature_embedding(x)
+        b, n, _ = x.shape
+
+        # Add cls token to inputs
+        cls_tokens = repeat(self.cls_token, "1 1 d -> b 1 d", b=b)
+        x = torch.cat([cls_tokens, x], dim=1)
+        x += self.pos_embedding[:, : (n + 1)]
+
+        # Transformer layers
+        for i in range(self.n_layers):
+            x = self.layers[i](x)
+
+        # Output based on classification token
+        x =  self.head(x[:, 0, :])
+        x = self.softmax(x)
+        return x
+
+
+# model = ViT3D()
+# print(model)
+# print(model(torch.ones((1,50,8))))
diff --git a/m6_vit_1d.py b/m6_vit_1d.py
new file mode 100644
index 0000000..f9bfab0
--- /dev/null
+++ b/m6_vit_1d.py
@@ -0,0 +1,153 @@
+import torch
+from torch import nn
+
+from einops import rearrange, repeat, pack, unpack
+from einops.layers.torch import Rearrange
+
+# classes
+
+
+class FeedForward(nn.Module):
+    def __init__(self, dim, hidden_dim, dropout=0.0):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.LayerNorm(dim),
+            nn.Linear(dim, hidden_dim),
+            nn.GELU(),
+            nn.Dropout(dropout),
+            nn.Linear(hidden_dim, dim),
+            nn.Dropout(dropout),
+        )
+
+    def forward(self, x):
+        return self.net(x)
+
+
+class Attention(nn.Module):
+    def __init__(self, dim, heads=8, dim_head=64, dropout=0.0):
+        super().__init__()
+        inner_dim = dim_head * heads
+        project_out = not (heads == 1 and dim_head == dim)
+
+        self.heads = heads
+        self.scale = dim_head**-0.5
+
+        self.norm = nn.LayerNorm(dim)
+        self.attend = nn.Softmax(dim=-1)
+        self.dropout = nn.Dropout(dropout)
+
+        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False)
+
+        self.to_out = (
+            nn.Sequential(nn.Linear(inner_dim, dim), nn.Dropout(dropout))
+            if project_out
+            else nn.Identity()
+        )
+
+    def forward(self, x):
+        x = self.norm(x)
+        qkv = self.to_qkv(x).chunk(3, dim=-1)
+        q, k, v = map(lambda t: rearrange(t, "b n (h d) -> b h n d", h=self.heads), qkv)
+
+        dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale
+
+        attn = self.attend(dots)
+        attn = self.dropout(attn)
+
+        out = torch.matmul(attn, v)
+        out = rearrange(out, "b h n d -> b n (h d)")
+        return self.to_out(out)
+
+
+class Transformer(nn.Module):
+    def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout=0.0):
+        super().__init__()
+        self.layers = nn.ModuleList([])
+        for _ in range(depth):
+            self.layers.append(
+                nn.ModuleList(
+                    [
+                        Attention(dim, heads=heads, dim_head=dim_head, dropout=dropout),
+                        FeedForward(dim, mlp_dim, dropout=dropout),
+                    ]
+                )
+            )
+
+    def forward(self, x):
+        for attn, ff in self.layers:
+            x = attn(x) + x
+            x = ff(x) + x
+        return x
+
+
+class ViT1D(nn.Module):
+    def __init__(
+        self,
+        *,
+        seq_len,
+        patch_size,
+        num_classes,
+        dim,
+        depth,
+        heads,
+        mlp_dim,
+        channels=3,
+        dim_head=64,
+        dropout=0.0,
+        emb_dropout=0.0
+    ):
+        super().__init__()
+        assert (seq_len % patch_size) == 0
+
+        num_patches = seq_len // patch_size
+        patch_dim = channels * patch_size
+
+        self.to_patch_embedding = nn.Sequential(
+            Rearrange("b c (n p) -> b n (p c)", p=patch_size),
+            nn.LayerNorm(patch_dim),
+            nn.Linear(patch_dim, dim),
+            nn.LayerNorm(dim),
+        )
+
+        self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, dim))
+        self.cls_token = nn.Parameter(torch.randn(dim))
+        self.dropout = nn.Dropout(emb_dropout)
+
+        self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout)
+
+        self.mlp_head = nn.Sequential(nn.LayerNorm(dim), nn.Linear(dim, num_classes))
+
+    def forward(self, series):
+        x = self.to_patch_embedding(series)
+        b, n, _ = x.shape
+
+        cls_tokens = repeat(self.cls_token, "d -> b d", b=b)
+
+        x, ps = pack([cls_tokens, x], "b * d")
+
+        x += self.pos_embedding[:, : (n + 1)]
+        x = self.dropout(x)
+
+        x = self.transformer(x)
+
+        cls_tokens, _ = unpack(x, ps, "b * d")
+
+        return self.mlp_head(cls_tokens)
+
+
+if __name__ == "__main__":
+
+    v = ViT1D(
+        seq_len=256,
+        patch_size=16,  # 这里也就是特征的数量。
+        num_classes=1000,
+        dim=1024,
+        depth=6,
+        heads=8,
+        mlp_dim=2048,
+        dropout=0.1,
+        emb_dropout=0.1,
+    )
+
+    time_series = torch.randn(4, 3, 256)
+    logits = v(time_series)  # (4, 1000)
diff --git a/m7_train.py b/m7_train.py
new file mode 100644
index 0000000..10e082d
--- /dev/null
+++ b/m7_train.py
@@ -0,0 +1,107 @@
+from torch.utils.data import DataLoader
+from torch.utils.data import random_split
+from torch import nn
+from einops.layers.torch import Rearrange
+from torch import Tensor
+import torch.optim as optim
+import numpy as np
+import torch
+import os
+import pandas as pd
+from m6_vit import ViT3D
+from m3_loaddatasets import DatasetsDay
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+torch.set_printoptions(threshold=float("inf"))
+
+################################################################################
+seq_length = 30
+features_len=8
+device = "cuda"
+
+model = ViT3D(    
+    seq_len=seq_length,
+    features_len=features_len,
+    num_classes=2
+    ).to(device)
+
+optimizer = optim.AdamW(model.parameters(), lr=1e-2)
+criterion = nn.CrossEntropyLoss()
+scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
+        optimizer, factor=0.1, patience=3, verbose=True
+    )  # .to(device)
+################################################################################
+features_files = "./datasets/features"
+# features_files = "./02stocks/vitday/datasets/feature1"
+
+files = [ os.path.join(features_files, a) for a in sorted(os.listdir(features_files), 
+                                                          key=lambda x: (x[4:])) ]
+################################################################################
+
+################################################################################
+for epoch in range(1000):    
+    model.train()
+################################################################################    
+    for file in files:
+        print(file)
+        epoch_losses = []
+        d = pd.read_csv(file)
+        a = DatasetsDay(d,day_length=seq_length)
+        loader = DataLoader(a, batch_size=128, shuffle=False)
+        for step, (x, y) in enumerate(loader):
+            x, y = x.to(device), y.to(device)
+            # print(x)
+            # print(f">>> Y: ", y)
+            
+            optimizer.zero_grad()
+            outputs = model(x)
+            # print(f">>> outputs:", outputs.shape)
+            loss = criterion(outputs, y)
+            loss.backward()
+            optimizer.step()
+            epoch_losses.append(loss.item())
+            print(loss.item())
+            #################################################################
+            #################################################################
+            # print(outputs)
+            # o = torch.argmax(outputs,dim=1)
+            # if torch.equal(y,o):
+            #     print(f"succeedsssssssssssssssssssssssss->",o)
+            # else:
+            #     print(f"eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee:",y,o)
+            # p1 = outputs[0:1][:]
+            # p2 = outputs[1:1][:]
+            # p1 = p1.squeeze()
+            # p2 = p2.squeeze()
+            # print(p1)
+            # plt.plot(p1.detach().cpu().numpy())
+            # plt.plot(p2.detach().cpu().numpy())
+            # plt.show()            
+            #################################################################
+            #################################################################
+            # break
+            
+        mean_loss = np.mean(epoch_losses)
+        print(f">>> Epoch {epoch} train loss: ", mean_loss)
+        # epoch_losses = []
+        # # Something was strange when using this?
+        # # model.eval()
+        # for step, (inputs, labels) in enumerate(test_dataloader):
+        #     inputs, labels = inputs.to(device), labels.to(device)
+        #     outputs = model(inputs)
+        #     loss = criterion(outputs, labels)
+        #     epoch_losses.append(loss.item())
+            
+        #     print(f">>> Epoch {epoch} test loss: ", np.mean(epoch_losses))
+        
+        # if epoch % 100 == 0:
+        #         #         models_save = os.path.join(
+        #         #     models_path, "./{}-{}-{}.pt".format(m_name, epoch, mean_loss)
+        #         # )
+        scheduler.step(mean_loss)
+        torch.save(model,"./model.pt")
+        
+    #     break
+    # break
\ No newline at end of file
diff --git a/m8_evaluate.py b/m8_evaluate.py
new file mode 100644
index 0000000..00566c5
--- /dev/null
+++ b/m8_evaluate.py
@@ -0,0 +1,40 @@
+import torch
+import os
+from torch.utils.data import DataLoader
+import pandas as pd
+from m3_loaddatasets import DatasetsDay
+import numpy as np
+from m6_vit import Vit3D
+
+
+np.set_printoptions(threshold=10000)
+################################################################################
+seq_length = 50
+device = "cuda"
+model = torch.load("./model1.pt").to(device)
+################################################################################
+features_files = "./datasets/feature1"
+# features_files = "./02stocks/vitday/datasets/feature1"
+files = [ os.path.join(features_files, a) for a in sorted(os.listdir(features_files), 
+                                                          key=lambda x: (x[4:])) ]
+################################################################################
+model.eval()
+################################################################################
+for epoch in range(1000):    
+################################################################################    
+    for file in files:
+        print(file)
+        d = pd.read_csv(file)
+        a = DatasetsDay(d,day_length=seq_length)
+        loader = DataLoader(a, batch_size=1, shuffle=True)
+        for step, (x, y) in enumerate(loader):
+            x, y = x.to(device), y.to(device)
+            print(f">>> Y: ", y)
+            outputs = model(x)            
+            print(f">>> outputs:", outputs)
+            print(torch.argmax(outputs,dim=1))
+            break
+        break
+    break
+
+        
diff --git a/m9_infer.py b/m9_infer.py
new file mode 100644
index 0000000..a6668a6
--- /dev/null
+++ b/m9_infer.py
@@ -0,0 +1,39 @@
+import torch
+from torch.utils.data import DataLoader
+from torch.utils.data import random_split
+from torch import nn
+from einops.layers.torch import Rearrange
+from torch import Tensor
+import torch.optim as optim
+import numpy as np
+import torch
+import os
+import pandas as pd
+from m3_loaddatasets import DatasetsDay
+from m6_vit import Vit3D
+
+#############################################################################
+seq_length = 50
+features_len=8
+device = "cuda"
+m = torch.load("./model1.pt").to(device)
+#############################################################################
+features_files = "./datasets/features1"
+files = [ os.path.join(features_files, a) for a in sorted(os.listdir(features_files), 
+                                                          key=lambda x: (x[4:])) ]
+
+zdfs = []
+for file in files:
+    print(file)
+    epoch_losses = []
+    
+    d = pd.read_csv(file)
+    a = DatasetsDay(d,day_length=seq_length)
+    x = DatasetsDay.get_last_item()
+    x = x.to(device)
+    outputs = m(x)
+    cls = outputs.argmax()
+    zdf = cls / 1000 - 0.3 + 1
+    print(zdf)
+    zdfs.append(zdf)
+
diff --git a/t1_create_sample.py b/t1_create_sample.py
new file mode 100644
index 0000000..2946758
--- /dev/null
+++ b/t1_create_sample.py
@@ -0,0 +1,33 @@
+import os
+import pandas as pd
+
+def create_1():
+    features_dir="./datasets/features"
+    files = [a for a in sorted(os.listdir(features_dir), key=lambda x: str(x[5:]))]
+    for file in files:
+        print(file)
+        d = pd.read_csv(os.path.join(features_dir, file))
+        
+        a = pd.DataFrame()
+        a = d.iloc[0:51][:]
+
+        p = os.path.join(features_dir, "test"+file)
+        a.to_csv(p)
+        
+        
+        
+# create_1()
+
+
+def create_2():
+    code = "sz.300001.csv"
+    file="./datasets/features/" + code 
+    d = pd.read_csv(file)
+    
+    a = pd.DataFrame()
+    a = d.iloc[0:52][:]
+
+    p = os.path.join("./datasets/feature1", code)
+    a.to_csv(p)
+    
+create_2()
\ No newline at end of file
diff --git a/z-predict.py b/z-predict.py
new file mode 100644
index 0000000..e69de29
diff --git a/zutils.py b/zutils.py
new file mode 100644
index 0000000..37cfded
--- /dev/null
+++ b/zutils.py
@@ -0,0 +1,179 @@
+import pandas as pd
+from bson.json_util import dumps
+from io import StringIO
+import json
+from datetime import datetime
+import time
+
+
+def download_to_column(df):
+    ##################################################################
+    # df = stock_hfq_df
+    # aa = df.loc[:, "收盘"]
+    # df["前收盘"] = aa.shift()
+    # df.at[0, "前收盘"] = df.at[0, "收盘"]
+    #
+    # tt = df.loc[:, "换手率"]
+    # df["前换手率"] = tt.shift()
+    # df.at[0, "前换手率"] = df.at[0, "换手率"]
+
+    # df["换手率"] = df["换手率"].astype(float)
+    # df["前换手率"] = df["换手率"].pct_change()
+    # df["收盘"] = df["收盘"].astype(float)
+    # df["前收盘"] = df["收盘"].pct_change()
+    ##################################################################
+    stock = pd.DataFrame()
+    # stock["d"] = time.strftime("%Y-%m-%d", time.localtime(df.loc[:, "日期"]))
+    stock["dateT"] = df.loc[:, "日期"].astype(str)
+    stock["open"] = df.loc[:, "开盘"].astype(float)
+    stock["close"] = df.loc[:, "收盘"].astype(float)
+    # stock["pre_close"] = df.loc[:, "前收盘"]
+    stock["high"] = df.loc[:, "最高"].astype(float)
+    stock["low"] = df.loc[:, "最低"].astype(float)
+    stock["turnover"] = df.loc[:, "换手率"].astype(float)
+    # stock['pre_turnover'] = df.loc[:, "前换手率"]
+    stock["zf"] = df.loc[:, "振幅"].astype(float)
+    stock["zdf"] = df.loc[:, "涨跌幅"].astype(float)
+    # # kdj9(stock)
+    # # kdj45(stock)
+    # # ma(stock)
+    # data = stock.replace(np.nan, 0)
+    # data = data.replace(np.inf, 0)
+    # data = data.fillna(0)
+
+    return stock
+
+
+def jsonl_2_dp(jsonl):
+    json_data = dumps(jsonl, indent=2)
+    pd_stocks = pd.read_json(StringIO(json_data))
+    return pd_stocks
+
+
+def dp_2_jsonl(dataset):
+    docs = dataset.to_json(orient="records")
+    docs = json.loads(docs)
+    # collection.insert_many(docs)
+    return docs
+
+
+def timestamp2time(timestamp=1707769336):
+    dateArray = datetime.datetime.fromtimestamp(timestamp)
+    # otherStyleTime = dateArray.strftime("%Y-%m-%d %H:%M:%S")
+    otherStyleTime = dateArray.strftime("%Y-%m-%d")
+    print(otherStyleTime)
+    return otherStyleTime
+
+
+def time2timestamp(t="2024-02-13 04:22:16"):
+    # 字符类型的时间
+    # 转为时间数组
+    timeArray = time.strptime(t, "%Y-%m-%d %H:%M:%S")
+    print(timeArray)
+    # timeArray可以调用tm_year等
+    # 转为时间戳
+    timeStamp = int(time.mktime(timeArray))
+    print(timeStamp)
+    return timeStamp
+
+
+######################################################
+######################################################
+######################################################
+def data_clean(df):
+    return
+
+######################################################
+######################################################
+def trend_high_price(df):
+    #  往前追溯3个高点，如果只有一个， 则另外两个和第一个相等。
+    # df["high"] = df["high"].astype(float)
+    ph = []
+    pl = []
+    for i in range(len(df)):
+        h = df.iloc[i]["high"]
+        l = df.iloc[i]["low"]
+        if len(ph) < 1:
+            ph.append(h)
+            pl.append(l)
+            df.iloc[i]["ph"] = h
+            df.iloc[i]["pl"] = l
+            continue
+
+        ma20 = df.iloc[i]["ma20"]
+        ma60 = df.iloc[i]["ma60"]
+        if ma20 > ma60:
+            r = all(v > h for v in ph)
+            if not r:
+                pre_v = -1
+                for v in reversed(ph):
+                    if v > h:
+                        pre_v = v
+                if pre_v > 0:
+                    ph.append(pre_v)
+                else:
+                    ph.append(h)
+        else:
+            r = all(v < h for v in pl)
+            if not r:
+                pre_v = -1
+                for v in reversed(pl):
+                    if v < h:
+                        pre_v = v
+                if pre_v > 0:
+                    ph.append(pre_v)
+                else:
+                    ph.append(l)
+
+        df.iloc[i]["ph"] = ph[:-1]
+        df.iloc[i]["pl"] = pl[:-1]
+
+    return df
+
+
+def trend_high_price2(df):
+    #  往前追溯3个高点，如果只有一个， 则另外两个和第一个相等。
+    # df["high"] = df["high"].astype(float)
+    ph = []
+    pl = []
+    h_i = 0
+    l_i = 0
+    for i in range(len(df)):
+        h = df.iloc[i]["high"]
+        l = df.iloc[i]["low"]
+        if len(ph) < 1:
+            ph.append(h)
+            pl.append(l)
+            df.iloc[i]["ph"] = h
+            df.iloc[i]["pl"] = l
+            continue
+        c = df.iloc[i]["close"]
+        ma20 = df.iloc[i]["ma20"]
+        ma60 = df.iloc[i]["ma60"]
+        if c > ma20:
+            r = all(v > h for v in ph)
+            if not r:
+                pre_v = -1
+                for v in reversed(ph):
+                    if v > h:
+                        pre_v = v
+                if pre_v > 0:
+                    ph.append(pre_v)
+                else:
+                    ph.append(h)
+        else:
+            r = all(v < h for v in pl)
+            if not r:
+                pre_v = -1
+                for v in reversed(pl):
+                    if v < h:
+                        pre_v = v
+                if pre_v > 0:
+                    ph.append(pre_v)
+                else:
+                    ph.append(l)
+
+        df.iloc[i]["ph"] = ph[:-1]
+        df.iloc[i]["pl"] = pl[:-1]
+
+    return df