2025-06-18 10:25:05 +08:00
|
|
|
|
import traceback
|
|
|
|
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
|
|
import pandas as pd
|
2025-06-23 22:21:59 +08:00
|
|
|
|
from tqsdk import (
|
|
|
|
|
|
TqApi,
|
|
|
|
|
|
TqAuth,
|
|
|
|
|
|
TqBacktest,
|
|
|
|
|
|
TqSim,
|
|
|
|
|
|
BacktestFinished,
|
|
|
|
|
|
) # 确保导入所有需要的回测/模拟类
|
2025-06-18 10:25:05 +08:00
|
|
|
|
import os
|
|
|
|
|
|
import datetime
|
|
|
|
|
|
from datetime import date # 导入 datetime.date
|
|
|
|
|
|
|
|
|
|
|
|
# --- 配置您的天勤账号信息 ---
|
|
|
|
|
|
# 请替换为您的实盘账号或模拟账号信息
|
|
|
|
|
|
# 如果您没有天勤账号,可以注册并获取测试Token:https://www.shinnytech.com/tqsdk/doc/quickstart/
|
|
|
|
|
|
TQ_USER_NAME = "emanresu" # 例如: "123456"
|
|
|
|
|
|
TQ_PASSWORD = "dfgvfgdfgg" # 例如: "your_password"
|
|
|
|
|
|
|
2025-06-23 22:21:59 +08:00
|
|
|
|
BEIJING_TZ = "Asia/Shanghai"
|
2025-06-18 10:25:05 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def collect_and_save_tqsdk_data_stream(
|
2025-06-23 22:21:59 +08:00
|
|
|
|
symbol: str,
|
|
|
|
|
|
freq: str,
|
|
|
|
|
|
start_date_str: str,
|
|
|
|
|
|
end_date_str: str,
|
|
|
|
|
|
mode: str = "backtest", # 默认为回测模式,因为获取历史数据通常用于回测
|
|
|
|
|
|
output_dir: str = "../data",
|
|
|
|
|
|
tq_user: str = TQ_USER_NAME,
|
|
|
|
|
|
tq_pwd: str = TQ_PASSWORD,
|
2025-06-22 23:03:50 +08:00
|
|
|
|
) -> pd.DataFrame:
|
2025-06-18 10:25:05 +08:00
|
|
|
|
"""
|
|
|
|
|
|
通过 TqSdk 在指定模式下(回测或模拟)运行,监听并收集指定品种、频率、日期范围的K线数据流,
|
|
|
|
|
|
并将其保存到本地CSV文件。此函数会模拟 TqSdk 的时间流运行。
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
symbol (str): 交易品种代码,例如 "SHFE.rb2405", "KQ.i9999"。
|
|
|
|
|
|
freq (str): 数据频率,例如 "1min", "5min", "day"。注意:tick数据量过大不推荐此方法直接收集。
|
|
|
|
|
|
start_date_str (str): 数据流开始日期,格式 'YYYY-MM-DD'。
|
|
|
|
|
|
end_date_str (str): 数据流结束日期,格式 'YYYY-MM-DD'。
|
|
|
|
|
|
mode (str): 运行模式,可选 "sim" (模拟/实盘) 或 "backtest" (回测)。默认为 "backtest"。
|
|
|
|
|
|
output_dir (str): 数据保存的根目录,默认为 "./data"。
|
|
|
|
|
|
tq_user (str): 天勤量化账号。
|
|
|
|
|
|
tq_pwd (str): 天勤量化密码。
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
pd.DataFrame or None: 收集到的K线数据DataFrame,如果获取失败则返回 None。
|
|
|
|
|
|
请注意,对于非常大的数据量,直接返回DataFrame可能消耗大量内存。
|
|
|
|
|
|
"""
|
|
|
|
|
|
if not tq_user or not tq_pwd:
|
|
|
|
|
|
print("错误: 请在代码中配置您的天勤量化账号和密码。")
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
api = None
|
|
|
|
|
|
collected_data = [] # 用于收集每一根完整K线的数据
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
2025-06-23 22:21:59 +08:00
|
|
|
|
start_dt_data_obj = datetime.datetime.strptime(start_date_str, "%Y-%m-%d")
|
|
|
|
|
|
end_dt_data_obj = datetime.datetime.strptime(end_date_str, "%Y-%m-%d")
|
2025-06-18 10:25:05 +08:00
|
|
|
|
|
|
|
|
|
|
if mode == "backtest":
|
|
|
|
|
|
backtest_start_date = start_dt_data_obj.date()
|
|
|
|
|
|
backtest_end_date = end_dt_data_obj.date()
|
2025-06-23 22:21:59 +08:00
|
|
|
|
print(
|
|
|
|
|
|
f"初始化天勤回测API,回测日期范围:{backtest_start_date} 至 {backtest_end_date}"
|
|
|
|
|
|
)
|
2025-06-18 10:25:05 +08:00
|
|
|
|
api = TqApi(
|
2025-06-23 22:21:59 +08:00
|
|
|
|
backtest=TqBacktest(
|
|
|
|
|
|
start_dt=backtest_start_date, end_dt=backtest_end_date
|
|
|
|
|
|
),
|
|
|
|
|
|
auth=TqAuth(tq_user, tq_pwd),
|
2025-06-18 10:25:05 +08:00
|
|
|
|
)
|
|
|
|
|
|
elif mode == "sim":
|
|
|
|
|
|
print("初始化天勤模拟/实盘API")
|
|
|
|
|
|
api = TqApi(account=TqSim(), auth=TqAuth(tq_user, tq_pwd))
|
|
|
|
|
|
# 如果您有实盘账户,可以使用:
|
|
|
|
|
|
# api = TqApi(account=TqAccount(tq_user, tq_pwd), auth=TqAuth(tq_user, tq_pwd))
|
|
|
|
|
|
else:
|
|
|
|
|
|
print(f"错误: 不支持的模式 '{mode}'。请使用 'sim' 或 'backtest'。")
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
# K线数据获取的duration_seconds
|
|
|
|
|
|
duration_seconds = 0
|
|
|
|
|
|
if "min" in freq:
|
|
|
|
|
|
duration_seconds = int(freq.replace("min", "")) * 60
|
|
|
|
|
|
elif freq == "day":
|
|
|
|
|
|
duration_seconds = 24 * 60 * 60
|
|
|
|
|
|
elif freq == "week":
|
|
|
|
|
|
duration_seconds = 7 * 24 * 60 * 60
|
|
|
|
|
|
elif freq == "month":
|
|
|
|
|
|
duration_seconds = 30 * 24 * 60 * 60 # 大约一个月
|
|
|
|
|
|
else:
|
2025-06-23 22:21:59 +08:00
|
|
|
|
print(
|
|
|
|
|
|
f"错误: 不支持的数据频率 '{freq}'。目前支持 '1min', '5min', 'day', 'week', 'month'。"
|
|
|
|
|
|
)
|
2025-06-18 10:25:05 +08:00
|
|
|
|
print("注意:Tick数据量巨大,不建议用此方法直接收集,因为它会耗尽内存。")
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
# 获取K线序列,这里获取的是指定频率的K线,天勤会根据模式从历史或实时流中推送
|
|
|
|
|
|
klines = api.get_kline_serial(symbol, duration_seconds)
|
2025-06-23 22:21:59 +08:00
|
|
|
|
quote = api.get_quote(symbol=symbol)
|
|
|
|
|
|
underlying_symbol = quote.underlying_symbol
|
2025-06-18 10:25:05 +08:00
|
|
|
|
|
2025-06-23 22:21:59 +08:00
|
|
|
|
print(
|
|
|
|
|
|
f"开始在 '{mode}' 模式下收集 {symbol} 从 {start_date_str} 到 {end_date_str} 的 {freq} 数据..."
|
|
|
|
|
|
)
|
2025-06-18 10:25:05 +08:00
|
|
|
|
|
|
|
|
|
|
last_kline_datetime = None # 用于跟踪上一根已完成K线的时间
|
2025-06-29 12:03:43 +08:00
|
|
|
|
swap_month_dt = None
|
2025-06-18 10:25:05 +08:00
|
|
|
|
|
|
|
|
|
|
while api.wait_update():
|
2025-06-23 22:21:59 +08:00
|
|
|
|
if underlying_symbol is None:
|
|
|
|
|
|
underlying_symbol = quote.underlying_symbol
|
2025-06-18 10:25:05 +08:00
|
|
|
|
|
|
|
|
|
|
# 检查是否有新的完整K线生成,或者当前K线是最后一次更新 (在回测结束时)
|
|
|
|
|
|
# TqSdk会在K线完成时发送最后一次更新,或者在回测结束时强制更新
|
2025-06-23 22:21:59 +08:00
|
|
|
|
if api.is_changing(quote, "underlying_symbol"):
|
2025-06-29 12:03:43 +08:00
|
|
|
|
swap_month_dt = pd.to_datetime(
|
|
|
|
|
|
quote.datetime, unit="ns", utc=True
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
2025-06-18 10:25:05 +08:00
|
|
|
|
if api.is_changing(klines):
|
|
|
|
|
|
# 只有当K线序列发生变化时才处理
|
|
|
|
|
|
# 关注最新一根 K 线(即 klines.iloc[-1])
|
|
|
|
|
|
current_kline = klines.iloc[-2]
|
|
|
|
|
|
|
|
|
|
|
|
# TqSdk 会在K线结束后,或者回测结束时,确保K线为最终状态。
|
|
|
|
|
|
# 判断当前K线是否已经结束 (is_last=True) 并且与上一次保存的K线不同
|
|
|
|
|
|
# 或者,在回测模式下,回测结束时,最后一根K线也会被视为“完成”
|
|
|
|
|
|
# 判断条件:K线时间戳不是 None 且 大于上一次记录的 K线时间
|
2025-06-23 22:21:59 +08:00
|
|
|
|
if not pd.isna(current_kline["datetime"]) and (
|
|
|
|
|
|
last_kline_datetime is None
|
|
|
|
|
|
or (
|
|
|
|
|
|
last_kline_datetime is not None
|
|
|
|
|
|
and current_kline["datetime"] > last_kline_datetime
|
|
|
|
|
|
)
|
|
|
|
|
|
):
|
2025-06-18 10:25:05 +08:00
|
|
|
|
# 将datetime (微秒) 转换为可读格式
|
|
|
|
|
|
|
|
|
|
|
|
# 检查K线的时间戳是否在我们要获取的日期范围内
|
|
|
|
|
|
# 注意:get_kline_serial 会获取指定范围前后的一小段数据,我们需要过滤
|
|
|
|
|
|
|
2025-06-23 22:21:59 +08:00
|
|
|
|
kline_dt = pd.to_datetime(
|
|
|
|
|
|
current_kline["datetime"], unit="ns", utc=True
|
2025-06-29 12:03:43 +08:00
|
|
|
|
).tz_convert(BEIJING_TZ)
|
|
|
|
|
|
|
|
|
|
|
|
if swap_month_dt is not None and kline_dt.hour == swap_month_dt.hour:
|
|
|
|
|
|
underlying_symbol = quote.underlying_symbol
|
|
|
|
|
|
|
|
|
|
|
|
kline_dt = kline_dt.strftime(
|
2025-06-23 22:21:59 +08:00
|
|
|
|
"%Y-%m-%d %H:%M:%S"
|
|
|
|
|
|
)
|
2025-06-29 12:03:43 +08:00
|
|
|
|
|
2025-06-18 10:25:05 +08:00
|
|
|
|
kline_data_to_save = {
|
2025-06-23 22:21:59 +08:00
|
|
|
|
"datetime": kline_dt,
|
|
|
|
|
|
"open": current_kline["open"],
|
|
|
|
|
|
"high": current_kline["high"],
|
|
|
|
|
|
"low": current_kline["low"],
|
|
|
|
|
|
"close": current_kline["close"],
|
|
|
|
|
|
"volume": current_kline["volume"],
|
|
|
|
|
|
"open_oi": current_kline["open_oi"],
|
|
|
|
|
|
"close_oi": current_kline["close_oi"],
|
|
|
|
|
|
"underlying_symbol": underlying_symbol,
|
2025-06-18 10:25:05 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
collected_data.append(kline_data_to_save)
|
2025-06-23 22:21:59 +08:00
|
|
|
|
last_kline_datetime = current_kline["datetime"]
|
2025-06-18 10:25:05 +08:00
|
|
|
|
# print(f"收集到 K线: {kline_dt}, close: {current_kline['close']}") # 用于调试
|
|
|
|
|
|
|
|
|
|
|
|
# 在回测模式下,当回测结束时,api.wait_update() 会抛出异常,此时我们可以退出循环
|
2025-06-23 22:21:59 +08:00
|
|
|
|
if api.is_changing(api.get_account()) or api.is_changing(
|
|
|
|
|
|
api.get_position()
|
|
|
|
|
|
):
|
2025-06-18 10:25:05 +08:00
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
# TqBacktest 在数据结束时会抛出 "api已关闭" 或类似的异常,这是正常现象。
|
|
|
|
|
|
# 我们在这里捕获并判断是否是正常结束。
|
|
|
|
|
|
if "api已关闭" in str(e) or "数据已全部输出" in str(e):
|
|
|
|
|
|
print("数据流已结束 (TqSdk API 关闭或数据全部输出)。")
|
|
|
|
|
|
else:
|
|
|
|
|
|
print(f"数据收集过程中发生错误: {e}")
|
|
|
|
|
|
traceback.print_exc()
|
|
|
|
|
|
# 无论如何,都尝试处理剩余数据并保存
|
|
|
|
|
|
finally:
|
|
|
|
|
|
if collected_data:
|
2025-06-23 22:21:59 +08:00
|
|
|
|
df = pd.DataFrame(collected_data).set_index("datetime")
|
2025-06-18 10:25:05 +08:00
|
|
|
|
df = df.sort_index() # 确保数据按时间排序
|
|
|
|
|
|
|
|
|
|
|
|
# 构造保存路径
|
|
|
|
|
|
freq_folder = freq.replace("min", "m") if "min" in freq else freq
|
2025-06-23 22:21:59 +08:00
|
|
|
|
if freq == "day":
|
|
|
|
|
|
freq_folder = "daily"
|
|
|
|
|
|
if freq == "week":
|
|
|
|
|
|
freq_folder = "weekly"
|
|
|
|
|
|
if freq == "month":
|
|
|
|
|
|
freq_folder = "monthly"
|
2025-06-18 10:25:05 +08:00
|
|
|
|
|
2025-06-23 22:21:59 +08:00
|
|
|
|
safe_symbol = symbol.replace(".", "_")
|
2025-06-18 10:25:05 +08:00
|
|
|
|
|
|
|
|
|
|
save_folder = os.path.join(output_dir, safe_symbol)
|
|
|
|
|
|
os.makedirs(save_folder, exist_ok=True)
|
|
|
|
|
|
|
2025-06-19 15:28:26 +08:00
|
|
|
|
file_name = f"{safe_symbol}_{freq}.csv"
|
2025-06-18 10:25:05 +08:00
|
|
|
|
file_path = os.path.join(save_folder, file_name)
|
|
|
|
|
|
|
2025-06-23 22:21:59 +08:00
|
|
|
|
print(df.head())
|
2025-06-18 10:25:05 +08:00
|
|
|
|
df.to_csv(file_path, index=True)
|
|
|
|
|
|
print(f"数据已成功保存到: {file_path}, 共 {len(df)} 条记录。")
|
|
|
|
|
|
|
|
|
|
|
|
if api:
|
|
|
|
|
|
api.close()
|
|
|
|
|
|
return df
|
|
|
|
|
|
else:
|
|
|
|
|
|
print("没有收集到任何数据。")
|
|
|
|
|
|
if api:
|
|
|
|
|
|
api.close()
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
2025-06-23 22:21:59 +08:00
|
|
|
|
|
2025-06-18 10:25:05 +08:00
|
|
|
|
# --- 示例用法 ---
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
import os
|
|
|
|
|
|
|
|
|
|
|
|
current_dir = os.getcwd()
|
|
|
|
|
|
print("当前工作目录:", current_dir)
|
|
|
|
|
|
|
|
|
|
|
|
# !!!重要:请先在这里替换成您的天勤账号和密码!!!
|
|
|
|
|
|
# 否则程序无法运行。
|
|
|
|
|
|
TQ_USER_NAME = "emanresu" # 例如: "123456"
|
|
|
|
|
|
TQ_PASSWORD = "dfgvfgdfgg" # 例如: "your_password"
|
|
|
|
|
|
|
|
|
|
|
|
# 这种方式适合获取相对较短或中等长度的历史K线数据。
|
|
|
|
|
|
df_if_backtest_daily = collect_and_save_tqsdk_data_stream(
|
2025-07-28 14:36:58 +08:00
|
|
|
|
symbol="KQ.m@SHFE.rb",
|
2025-06-23 22:21:59 +08:00
|
|
|
|
# symbol='SHFE.rb2510',
|
|
|
|
|
|
# symbol='KQ.i@SHFE.bu',
|
2025-09-16 09:59:38 +08:00
|
|
|
|
freq="min5",
|
2025-07-15 22:45:51 +08:00
|
|
|
|
start_date_str="2021-01-01",
|
2025-09-16 09:59:38 +08:00
|
|
|
|
end_date_str="2025-09-20",
|
2025-06-18 10:25:05 +08:00
|
|
|
|
mode="backtest", # 指定为回测模式
|
|
|
|
|
|
tq_user=TQ_USER_NAME,
|
2025-06-23 22:21:59 +08:00
|
|
|
|
tq_pwd=TQ_PASSWORD,
|
2025-06-18 10:25:05 +08:00
|
|
|
|
)
|
|
|
|
|
|
if df_if_backtest_daily is not None:
|
|
|
|
|
|
print(df_if_backtest_daily.tail())
|