Files
NewStock/main/train/Classify/Classify2.py
2025-11-29 00:23:12 +08:00

66 lines
2.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import numpy as np
import pandas as pd
from qlib.data.dataset import DatasetH
dates = pd.to_datetime(pd.date_range("2020-01-01", "2020-01-10"))
instruments = ["SH600000", "SH600001"]
index = pd.MultiIndex.from_product([dates, instruments], names=["datetime", "instrument"])
data = {
"feature_1": np.random.randn(len(index)),
"feature_2": np.random.randn(len(index)),
"label": np.random.randn(len(index)) * 0.01
}
my_df = pd.DataFrame(data, index=index)
my_df.iloc[1, 0] = np.nan # 人为制造一个缺失值
my_df.iloc[5, 2] = np.nan # 人为制造一个标签缺失值
print("----------- 原始 DataFrame -----------")
print(my_df.head())
# 2. 创建包含 StaticDataLoader 和 Processors 的完整配置
data_handler_config = {
"class": "DataHandlerLP",
"module_path": "qlib.data.dataset.handler",
"kwargs": {
# 核心部分:配置数据加载器
"data_loader": {
"class": "StaticDataLoader",
"module_path": "qlib.data.dataset.loader",
"kwargs": {
"config": my_df, # <--- 在这里将你的DataFrame传入
}
},
"shared_processors": [
],
"infer_processors": [
# {"class": "DropnaLabel", "module_path": "qlib.data.dataset.processor"},
],
"learn_processors": [
{"class": "Fillna", "module_path": "qlib.data.dataset.processor", "kwargs": {"fill_value": 0}},
]
},
}
from qlib.utils import init_instance_by_config
# 3. 使用配置初始化 DataHandler
# 这一步会自动加载 StaticDataLoader 的数据,并运行所有定义的处理器
dh = init_instance_by_config(data_handler_config)
ds = DatasetH(
dh,
segments={
"train": ("20190101", "20221231"),
"valid": ("20220101", "20231231"),
"test": ("20240101", "20250101"),
},
)
# 4. 验证结果
# DK_L (Learn) 数据经过了 DropnaLabel -> ZScoreNorm -> Fillna
learn_data = ds.prepare("all", data_key='learn', segments='train')
print("----------- train DataFrame -----------")
print(learn_data)