(exception)勉强赚钱rank
This commit is contained in:
@@ -1,21 +1,8 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
def read_and_merge_h5_data(h5_filename, key, columns, df=None):
|
||||
"""
|
||||
读取 HDF5 文件中的数据,根据指定的 columns 筛选数据,
|
||||
如果传入 df 参数,则将其与读取的数据根据 ts_code 和 trade_date 合并。
|
||||
|
||||
参数:
|
||||
- h5_filename: HDF5 文件名
|
||||
- key: 数据存储在 HDF5 文件中的 key
|
||||
- columns: 要读取的列名列表
|
||||
- df: 需要合并的 DataFrame(如果为空,则不进行合并)
|
||||
|
||||
返回:
|
||||
- 合并后的 DataFrame
|
||||
"""
|
||||
# 处理 _ 开头的列名
|
||||
def read_and_merge_h5_data(h5_filename, key, columns, df=None, join='left', on=['ts_code', 'trade_date'], prefix=None):
|
||||
processed_columns = []
|
||||
for col in columns:
|
||||
if col.startswith('_'):
|
||||
@@ -32,14 +19,22 @@ def read_and_merge_h5_data(h5_filename, key, columns, df=None):
|
||||
new_col = f'_{col}'
|
||||
data.rename(columns={col: new_col}, inplace=True)
|
||||
|
||||
if prefix is not None:
|
||||
for col in data.columns:
|
||||
if col not in ['ts_code', 'trade_date']: # 只有不在 columns 中的列才需要加下划线
|
||||
new_col = f'{prefix}_{col}'
|
||||
data.rename(columns={col: new_col}, inplace=True)
|
||||
|
||||
# 如果传入的 df 不为空,则进行合并
|
||||
if df is not None and not df.empty:
|
||||
# 确保两个 DataFrame 都有 ts_code 和 trade_date 列
|
||||
df['trade_date'] = pd.to_datetime(df['trade_date'], format='%Y%m%d')
|
||||
data['trade_date'] = pd.to_datetime(data['trade_date'], format='%Y%m%d')
|
||||
print(f'{join} merge on {on}')
|
||||
if 'trade_date' in on:
|
||||
# 确保两个 DataFrame 都有 ts_code 和 trade_date 列
|
||||
df['trade_date'] = pd.to_datetime(df['trade_date'], format='%Y%m%d')
|
||||
data['trade_date'] = pd.to_datetime(data['trade_date'], format='%Y%m%d')
|
||||
|
||||
# 根据 ts_code 和 trade_date 合并
|
||||
merged_df = pd.merge(df, data, on=['ts_code', 'trade_date'], how='left')
|
||||
merged_df = pd.merge(df, data, on=on, how=join)
|
||||
else:
|
||||
# 如果 df 为空,则直接返回读取的数据
|
||||
merged_df = data
|
||||
@@ -84,4 +79,42 @@ def calculate_risk_adjusted_return(df, days=1, method='ratio', lambda_=0.5, eps=
|
||||
else:
|
||||
raise ValueError("Invalid method. Use 'ratio' or 'difference'.")
|
||||
|
||||
return df
|
||||
return df
|
||||
|
||||
|
||||
# import polars as pl
|
||||
#
|
||||
# def read_and_merge_h5_data_polars(h5_filename, key, columns, df=None, join='left', on=['ts_code', 'trade_date']):
|
||||
# processed_columns = []
|
||||
# for col in columns:
|
||||
# if col.startswith('_'):
|
||||
# processed_columns.append(col[1:]) # 去掉下划线
|
||||
# else:
|
||||
# processed_columns.append(col)
|
||||
#
|
||||
# # 从 HDF5 文件读取数据,选择需要的列
|
||||
# pd_df = pd.read_hdf(h5_filename, key=key, columns=processed_columns)
|
||||
#
|
||||
# # 将 Pandas DataFrame 转换为 Polars DataFrame
|
||||
# data = pl.from_pandas(pd_df)
|
||||
#
|
||||
# # 修改列名,如果列名以前有 _,加上 _
|
||||
# data = data.rename({col: f'_{col}' for col in data.columns if col not in columns})
|
||||
#
|
||||
# # 如果传入的 df 不为空,则进行合并
|
||||
# if df is not None and not df.is_empty():
|
||||
# print(f'{join} merge on {on}')
|
||||
#
|
||||
# # 确保两个 DataFrame 都有 ts_code 和 trade_date 列
|
||||
# # df = df.with_columns(pl.col('trade_date').str.strptime(pl.Datetime, format='%Y%m%d'))
|
||||
# # data = data.with_columns(pl.col('trade_date').str.strptime(pl.Datetime, format='%Y%m%d'))
|
||||
#
|
||||
# # 根据 ts_code 和 trade_date 合并
|
||||
# merged_df = df.join(data, on=on, how=join)
|
||||
# else:
|
||||
# # 如果 df 为空,则直接返回读取的数据
|
||||
# merged_df = data
|
||||
#
|
||||
# return merged_df
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user