mirror of
https://github.com/dupenf/stock-vit.git
synced 2024-11-25 16:35:10 +08:00
45 lines
1.0 KiB
Python
45 lines
1.0 KiB
Python
|
import pandas as pd
|
||
|
import torch
|
||
|
import os
|
||
|
import numpy as np
|
||
|
|
||
|
|
||
|
def clean_volume_0(df):
|
||
|
|
||
|
# null replace to 0
|
||
|
df = df.replace(np.nan, 0)
|
||
|
df = df.replace(np.inf, 0)
|
||
|
df = df.fillna(0)
|
||
|
|
||
|
df["volume"] = df["volume"].astype(int)
|
||
|
|
||
|
index = df.loc[df["volume"] == 0].index
|
||
|
df = df.drop(df.index[index])
|
||
|
|
||
|
index = df.loc[df["close"] == 0].index
|
||
|
df = df.drop(df.index[index])
|
||
|
df.reset_index(drop=True)
|
||
|
|
||
|
return df
|
||
|
|
||
|
|
||
|
def process_features(in_dir, out_dir):
|
||
|
# codes = pd.read_csv("./datasets/all_codes.csv")
|
||
|
# codes = codes["code"].tolist()
|
||
|
|
||
|
# codes = ["sz.300001"]
|
||
|
file_dir = in_dir # "./datasets/origins"
|
||
|
|
||
|
a_s = [a for a in sorted(os.listdir(file_dir), key=lambda x: str(x[5:]))]
|
||
|
for a in a_s:
|
||
|
file = os.path.join(file_dir, a)
|
||
|
df = pd.read_csv(file)
|
||
|
|
||
|
df = clean_volume_0(df)
|
||
|
|
||
|
df.to_csv(os.path.join(out_dir, a))
|
||
|
print(a)
|
||
|
|
||
|
|
||
|
process_features(in_dir="./datasets/cyday", out_dir="./datasets/cleaned/")
|