"""GTJA 公式转换器。 将 GTJA 原始公式转换为框架可识别的 DSL 字符串表达式。 转换过程中会验证公式是否能被正确解析为 DSL 节点。 """ import re from pathlib import Path from typing import Any from src.factors.dsl import Node, FunctionNode from src.factors.api import ( close, open, high, low, vol, amount, pre_close, change, pct_chg, ts_mean, ts_std, ts_max, ts_min, ts_sum, ts_delay, ts_delta, ts_corr, ts_cov, ts_var, ts_skew, ts_kurt, ts_pct_change, ts_ema, ts_atr, ts_rsi, ts_obv, ts_rank, ts_sma, ts_wma, ts_decay_linear, ts_argmax, ts_argmin, ts_count, ts_prod, ts_sumac, cs_rank, cs_zscore, cs_neutralize, cs_winsorize, cs_demean, log, exp, sqrt, sign, cos, sin, abs, max_, min_, clip, atan, log1p, if_, where, ) # 动态补充缺失的 cs_mean try: from src.factors.api import cs_mean except ImportError: def cs_mean(x): return FunctionNode("cs_mean", x) try: from .preprocessor import clean_gtja_formula, filter_unsupported_formulas except ImportError: from preprocessor import clean_gtja_formula, filter_unsupported_formulas class GtjaConverter: # 安全的函数命名空间,用于验证公式语法的合理性 SAFE_NAMESPACE: dict[str, Any] = { "close": close, "open": open, "high": high, "low": low, "vol": vol, "volume": vol, "amount": amount, "pre_close": pre_close, "change": change, "pct_chg": pct_chg, "ts_mean": ts_mean, "ts_std": ts_std, "ts_max": ts_max, "ts_min": ts_min, "ts_sum": ts_sum, "ts_delay": ts_delay, "ts_delta": ts_delta, "ts_corr": ts_corr, "ts_cov": ts_cov, "ts_var": ts_var, "ts_skew": ts_skew, "ts_kurt": ts_kurt, "ts_pct_change": ts_pct_change, "ts_ema": ts_ema, "ts_atr": ts_atr, "ts_rsi": ts_rsi, "ts_obv": ts_obv, "ts_rank": ts_rank, "ts_sma": ts_sma, "ts_wma": ts_wma, "ts_decay_linear": ts_decay_linear, "ts_argmax": ts_argmax, "ts_argmin": ts_argmin, "ts_count": ts_count, "ts_prod": ts_prod, "ts_sumac": ts_sumac, "cs_rank": cs_rank, "cs_zscore": cs_zscore, "cs_neutralize": cs_neutralize, "cs_winsorize": cs_winsorize, "cs_demean": cs_demean, "cs_mean": cs_mean, "log": log, "exp": exp, "sqrt": sqrt, "sign": sign, "cos": cos, "sin": sin, "abs": abs, "max_": max_, "min_": min_, "clip": clip, "atan": atan, "log1p": log1p, "if_": if_, "where": where, } def __init__(self): self.errors: list[str] = [] self.warnings: list[str] = [] self._registration_results: list[dict[str, Any]] = [] def convert(self, formula: str) -> str | None: if not filter_unsupported_formulas(formula): self.warnings.append( f"包含暂不支持的算子/循环依赖,已跳过: {formula[:50]}..." ) return None clean_formula = clean_gtja_formula(formula) try: # 使用 AST api 执行,验证所有函数的输入/参数类型是否有效 self._validate_formula(clean_formula) return clean_formula except Exception as e: self.errors.append( f"语法节点构建失败: {formula[:50]}... \n\t-> 解析所得: {clean_formula}\n\t-> 错误: {e}" ) return None def _validate_formula(self, formula: str) -> Node: # __builtins__: {} 禁止任何外部危险执行,彻底保证安全 return eval(formula, {"__builtins__": {}}, self.SAFE_NAMESPACE) def convert_batch( self, formulas: dict[str, str], auto_register: bool = False, output_path: Path | None = None, ) -> dict[str, str | None]: """批量转换公式。 Args: formulas: 公式字典,key 为因子名(如 "Alpha1"),value 为公式字符串 auto_register: 是否自动注册成功的因子到因子库 output_path: 因子库文件路径,默认使用 data/factors.jsonl Returns: 转换结果字典,key 为因子名,value 为 DSL 表达式或 None """ results = {} self._registration_results = [] for name, formula in formulas.items(): result = self.convert(formula) results[name] = result # 自动注册成功的因子 if auto_register and result is not None: reg_result = register_gtja_factor(name, result, output_path) self._registration_results.append({"alpha_name": name, **reg_result}) return results def get_registration_report(self) -> dict[str, Any]: """获取注册报告。 Returns: 包含注册统计信息的字典 """ if not hasattr(self, "_registration_results"): return { "total": 0, "success": 0, "skipped": 0, "failed": 0, "details": [], } success = sum(1 for r in self._registration_results if r["status"] == "success") skipped = sum(1 for r in self._registration_results if r["status"] == "skipped") failed = sum(1 for r in self._registration_results if r["status"] == "failed") return { "total": len(self._registration_results), "success": success, "skipped": skipped, "failed": failed, "details": self._registration_results, } def get_stats(self) -> dict[str, Any]: return { "errors": len(self.errors), "warnings": len(self.warnings), "error_details": self.errors, "warning_details": self.warnings, } def convert_to_dsl(formula_str: str) -> str | None: converter = GtjaConverter() return converter.convert(formula_str) def parse_multiline_formulas(text: str) -> dict[str, str]: formulas = {} for line in text.strip().split("\n"): line = line.strip() if not line: continue if ":" in line: name, expr = line.split(":", 1) if name.strip() and expr.strip(): formulas[name.strip()] = expr.strip() return formulas def get_next_factor_id(filepath: Path) -> str: """生成下一个 factor_id。 从现有文件中提取最大序号,生成新的 F_XXX 格式 ID。 Args: filepath: JSONL 文件路径 Returns: 新的 factor_id,如 "F_001" """ import builtins import json if not filepath.exists(): return "F_001" try: with builtins.open(filepath, "r", encoding="utf-8") as f: lines = f.readlines() except Exception: return "F_001" max_num = 0 pattern = re.compile(r"^F_(\d+)$") for line in lines: line = line.strip() if not line: continue try: data = json.loads(line) factor_id = data.get("factor_id", "") match = pattern.match(factor_id) if match: num = int(match.group(1)) max_num = max(max_num, num) except (json.JSONDecodeError, ValueError): continue return f"F_{max_num + 1:03d}" def extract_alpha_number(alpha_name: str) -> int | None: """从 Alpha 名称中提取数字。 Args: alpha_name: 如 "Alpha1", "Alpha123" Returns: 数字部分,如 1, 123;如果无法解析返回 None """ match = re.match(r"[Aa]lpha(\d+)", alpha_name) if match: return int(match.group(1)) return None def register_gtja_factor( alpha_name: str, dsl_expr: str, output_path: Path | None = None, ) -> dict[str, Any]: """注册单个 GTJA 因子到因子库。 Args: alpha_name: 原始 Alpha 名称,如 "Alpha1" dsl_expr: DSL 表达式字符串 output_path: 因子库文件路径,默认使用 data/factors.jsonl Returns: 注册结果字典,包含 status 和 message """ from src.factors.metadata import FactorManager from src.factors.metadata.exceptions import DuplicateFactorError, ValidationError from src.config.settings import settings # 提取数字并构建标准化名称 alpha_num = extract_alpha_number(alpha_name) if alpha_num is None: return { "status": "failed", "message": f"无法从 '{alpha_name}' 提取数字编号", } # 标准化名称: GTJA_alpha001, GTJA_alpha123 factor_name = f"GTJA_alpha{alpha_num:03d}" # 确定输出路径 if output_path is None: output_path = settings.data_path_resolved / "factors.jsonl" # 初始化 FactorManager manager = FactorManager(str(output_path)) try: # 检查是否已存在(处理空文件的情况) try: existing = manager.get_factors_by_name(factor_name) if len(existing) > 0: return { "status": "skipped", "message": f"因子 '{factor_name}' 已存在", } except Exception: # 如果查询失败(如文件为空),继续尝试注册 pass # 生成 factor_id factor_id = get_next_factor_id(output_path) # 构建因子记录 factor_record = { "factor_id": factor_id, "name": factor_name, "desc": f"GTJA {alpha_name} 因子", "dsl": dsl_expr, "category": "gtja_alpha", "source": "GTJA191", } # 注册因子 manager.add_factor(factor_record) return { "status": "success", "message": f"{factor_id}: {factor_name}", "factor_id": factor_id, "factor_name": factor_name, } except DuplicateFactorError as e: return { "status": "failed", "message": f"因子 ID 重复: {e}", } except ValidationError as e: return { "status": "failed", "message": f"验证失败: {e}", } except Exception as e: return { "status": "failed", "message": f"注册失败: {e}", } if __name__ == "__main__": # 使用示例:多行字符串输入 converter = GtjaConverter() # 多行字符串,格式为 "因子名: 表达式",支持空行 test_input = """ Alpha1: (-1 * CORR(RANK(DELTA(LOG(VOLUME), 1)), RANK(((CLOSE -OPEN) / OPEN)), 6)) Alpha2: (-1 * DELTA((((CLOSE -LOW) -(HIGH -CLOSE)) / (HIGH -LOW)), 1)) Alpha3: SUM((CLOSE=DELAY(CLOSE,1)?0:CLOSE-(CLOSE>DELAY(CLOSE,1)?MIN(LOW,DELAY(CLOSE,1)):MAX(HIGH,DELAY(CLOSE,1)))),6) Alpha4: ((((SUM(CLOSE, 8) / 8) + STD(CLOSE, 8)) < (SUM(CLOSE, 2) / 2)) ? (-1 * 1) : (((SUM(CLOSE, 2) / 2) <((SUM(CLOSE, 8) / 8) -STD(CLOSE, 8))) ? 1 : (((1 < (VOLUME / MEAN(VOLUME,20))) || ((VOLUME /MEAN(VOLUME,20)) == 1)) ? 1 : (-1 * 1)))) Alpha5: (-1 * TSMAX(CORR(TSRANK(VOLUME, 5), TSRANK(HIGH, 5), 5), 3)) Alpha6: (RANK(SIGN(DELTA((((OPEN * 0.85) + (HIGH * 0.15))), 4)))* -1) Alpha7: ((RANK(MAX((VWAP -CLOSE), 3)) + RANK(MIN((VWAP -CLOSE), 3))) * RANK(DELTA(VOLUME, 3))) Alpha8: RANK(DELTA(((((HIGH + LOW) / 2) * 0.2) + (VWAP * 0.8)), 4) * -1) Alpha9: SMA(((HIGH+LOW)/2-(DELAY(HIGH,1)+DELAY(LOW,1))/2)*(HIGH-LOW)/VOLUME,7,2) Alpha10: (RANK(MAX(((RET < 0) ? STD(RET, 20) : CLOSE)^2),5)) Alpha11: SUM(((CLOSE-LOW)-(HIGH-CLOSE))./(HIGH-LOW).*VOLUME,6) Alpha12: (RANK((OPEN -(SUM(VWAP, 10) / 10)))) * (-1 * (RANK(ABS((CLOSE -VWAP))))) Alpha13: (((HIGH * LOW)^0.5) -VWAP) Alpha14: CLOSE-DELAY(CLOSE,5) Alpha15: OPEN/DELAY(CLOSE,1)-1 Alpha16: (-1 * TSMAX(RANK(CORR(RANK(VOLUME), RANK(VWAP), 5)), 5)) Alpha17: RANK((VWAP -MAX(VWAP, 15)))^DELTA(CLOSE, 5) Alpha18: CLOSE/DELAY(CLOSE,5) Alpha19: (CLOSEDELAY(CLOSE,1)?STD(CLOSE:20),0),20,1)/(SMA((CLOSE>DELAY(CLOSE,1)?STD(CLOSE,20):0),20,1)+SMA((CLOSE<=DELAY(CLOSE,1)?STD(CLOSE,20):0),20,1))*100 Alpha24: SMA(CLOSE-DELAY(CLOSE,5),5,1) Alpha25: ((-1 * RANK((DELTA(CLOSE, 7) * (1 -RANK(DECAYLINEAR((VOLUME/MEAN(VOLUME,20)), 9)))))) * (1 +RANK(SUM(RET, 250)))) Alpha26: ((((SUM(CLOSE, 7) / 7) -CLOSE)) + ((CORR(VWAP, DELAY(CLOSE, 5), 230)))) Alpha27: WMA((CLOSE-DELAY(CLOSE,3))/DELAY(CLOSE,3)*100+(CLOSE-DELAY(CLOSE,6))/DELAY(CLOSE,6)*100,12) Alpha28: 3*SMA((CLOSE-TSMIN(LOW,9))/(TSMAX(HIGH,9)-TSMIN(LOW,9))*100,3,1)-2*SMA(SMA((CLOSE-TSMIN(LOW,9))/(MAX(HIGH,9)-TSMAX(LOW,9))*100,3,1),3,1) Alpha29: (CLOSE-DELAY(CLOSE,6))/DELAY(CLOSE,6)*VOLUME Alpha30: WMA((REGRESI(CLOSE/DELAY(CLOSE)-1,MKT,SMB,HML,60))^2,20) Alpha31: (CLOSE-MEAN(CLOSE,12))/MEAN(CLOSE,12)*100 Alpha32: (-1 * SUM(RANK(CORR(RANK(HIGH), RANK(VOLUME), 3)), 3)) Alpha33: ((((-1 * TSMIN(LOW, 5)) + DELAY(TSMIN(LOW, 5), 5)) * RANK(((SUM(RET, 240) -SUM(RET, 20)) / 220))) *TSRANK(VOLUME, 5)) Alpha34: MEAN(CLOSE,12)/CLOSE Alpha35: (MIN(RANK(DECAYLINEAR(DELTA(OPEN, 1), 15)), RANK(DECAYLINEAR(CORR((VOLUME), ((OPEN * 0.65) +(OPEN *0.35)), 17),7))) * -1) Alpha36: RANK(SUM(CORR(RANK(VOLUME), RANK(VWAP)), 6), 2) Alpha37: (-1 * RANK(((SUM(OPEN, 5) * SUM(RET, 5)) -DELAY((SUM(OPEN,5) * SUM(RET, 5)), 10)))) Alpha38: (((SUM(HIGH, 20) / 20) < HIGH) ? (-1 * DELTA(HIGH, 2)) : 0) Alpha39: ((RANK(DECAYLINEAR(DELTA((CLOSE), 2),8)) -RANK(DECAYLINEAR(CORR(((VWAP * 0.3) + (OPEN * 0.7)),SUM(MEAN(VOLUME,180), 37), 14), 12))) * -1) Alpha40: SUM((CLOSE>DELAY(CLOSE,1)?VOLUME:0),26)/SUM((CLOSE<=DELAY(CLOSE,1)?VOLUME:0),26)*100 Alpha41: (RANK(MAX(DELTA((VWAP), 3), 5))* -1) Alpha42: ((-1 * RANK(STD(HIGH, 10))) * CORR(HIGH, VOLUME, 10)) Alpha43: SUM((CLOSE>DELAY(CLOSE,1)?VOLUME:(CLOSE=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12)/(SUM(((HIGH+LOW)>=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12)+SUM(((HIGH+LOW)<=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12)) Alpha50: SUM(((HIGH+LOW)<=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12)/(SUM(((HIGH+LOW)<=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12)+SUM(((HIGH+LOW)>=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12))-SUM(((HIGH+LOW)>=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12)/(SUM(((HIGH+LOW)>=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12)+SUM(((HIGH+LOW)<=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12)) Alpha51: SUM(((HIGH+LOW)<=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12)/(SUM(((HIGH+LOW)<=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12)+SUM(((HIGH+LOW)>=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12)) Alpha52: SUM(MAX(0,HIGH-DELAY((HIGH+LOW+CLOSE)/3,1)),26)/SUM(MAX(0,DELAY((HIGH+LOW+CLOSE)/3,1)-L),26)*100 Alpha53: COUNT(CLOSE>DELAY(CLOSE,1),12)/12*100 Alpha54: (-1 * RANK((STD(ABS(CLOSE -OPEN)) + (CLOSE -OPEN)) + CORR(CLOSE, OPEN,10))) Alpha55: SUM(16*(CLOSE-DELAY(CLOSE,1)+(CLOSE-OPEN)/2+DELAY(CLOSE,1)-DELAY(OPEN,1))/((ABS(HIGH-DELAY(CLOSE,1))>ABS(LOW-DELAY(CLOSE,1))&ABS(HIGH-DELAY(CLOSE,1))>ABS(HIGH-DELAY(LOW,1))?ABS(HIGH-DELAY(CLOSE,1))+ABS(LOW-DELAY(CLOSE,1))/2+ABS(DELAY(CLOSE,1)-DELAY(OPEN,1))/4:(ABS(LOW-DELAY(CLOSE,1))>ABS(HIGH-DELAY(LOW,1))&ABS(LOW-DELAY(CLOSE,1))>ABS(HIGH-DELAY(CLOSE,1))?ABS(LOW-DELAY(CLOSE,1))+ABS(HIGH-DELAY(CLOSE,1))/2+ABS(DELAY(CLOSE,1)-DELAY(OPEN,1))/4:ABS(HIGH-DELAY(LOW,1))+ABS(DELAY(CLOSE,1)-DELAY(OPEN,1))/4)))*MAX(ABS(HIGH-DELAY(CLOSE,1)),ABS(LOW-DELAY(CLOSE,1))),20) Alpha56: (RANK((OPEN -TSMIN(OPEN, 12))) < RANK((RANK(CORR(SUM(((HIGH + LOW) / 2), 19), SUM(MEAN(VOLUME,40), 19), 13))^5))) Alpha57: SMA((CLOSE-TSMIN(LOW,9))/(TSMAX(HIGH,9)-TSMIN(LOW,9))*100,3,1) Alpha58: COUNT(CLOSE>DELAY(CLOSE,1),20)/20*100 Alpha59: SUM((CLOSE=DELAY(CLOSE,1)?0:CLOSE-(CLOSE>DELAY(CLOSE,1)?MIN(LOW,DELAY(CLOSE,1)):MAX(HIGH,DELAY(CLOSE,1)))),20) Alpha60: SUM(((CLOSE-LOW)-(HIGH-CLOSE))./(HIGH-LOW).*VOLUME,20) Alpha61: (MAX(RANK(DECAYLINEAR(DELTA(VWAP, 1), 12)),RANK(DECAYLINEAR(RANK(CORR((LOW),MEAN(VOLUME,80), 8)), 17))) * -1) Alpha62: (-1 * CORR(HIGH, RANK(VOLUME), 5)) Alpha63: SMA(MAX(CLOSE-DELAY(CLOSE,1),0),6,1)/SMA(ABS(CLOSE-DELAY(CLOSE,1)),6,1)*100 Alpha64: (MAX(RANK(DECAYLINEAR(CORR(RANK(VWAP), RANK(VOLUME), 4), 4)),RANK(DECAYLINEAR(MAX(CORR(RANK(CLOSE), RANK(MEAN(VOLUME,60)), 4), 13), 14))) * -1) Alpha65: MEAN(CLOSE,6)/CLOSE Alpha66: (CLOSE-MEAN(CLOSE,6))/MEAN(CLOSE,6)*100 Alpha67: SMA(MAX(CLOSE-DELAY(CLOSE,1),0),24,1)/SMA(ABS(CLOSE-DELAY(CLOSE,1)),24,1)*100 Alpha68: SMA(((HIGH+LOW)/2-(DELAY(HIGH,1)+DELAY(LOW,1))/2)*(HIGH-LOW)/VOLUME,15,2) Alpha69: (SUM(DTM,20)>SUM(DBM,20)?(SUM(DTM,20)-SUM(DBM,20))/SUM(DTM,20): (SUM(DTM,20)=SUM(DBM,20)? 0: (SUM(DTM,20)-SUM(DBM,20))/SUM(DBM,20))) Alpha70: STD(AMOUNT,6) Alpha71: (CLOSE-MEAN(CLOSE,24))/MEAN(CLOSE,24)*100 Alpha72: SMA((TSMAX(HIGH,6)-CLOSE)/(TSMAX(HIGH,6)-TSMIN(LOW,6))*100,15,1) Alpha73: ((TSRANK(DECAYLINEAR(DECAYLINEAR(CORR((CLOSE), VOLUME, 10), 16), 4), 5) - RANK(DECAYLINEAR(CORR(VWAP, MEAN(VOLUME,30), 4),3))) * -1) Alpha74: (RANK(CORR(SUM(((LOW * 0.35) + (VWAP * 0.65)), 20), SUM(MEAN(VOLUME,40), 20), 7)) + RANK(CORR(RANK(VWAP), RANK(VOLUME), 6))) Alpha75: COUNT(CLOSE>OPEN &BANCHMARKINDEXCLOSEDELAY(CLOSE,1)?VOLUME:(CLOSE=DELAY(OPEN,1)?0:MAX((OPEN-LOW),(OPEN-DELAY(OPEN,1)))),20) Alpha94: SUM((CLOSE>DELAY(CLOSE,1)?VOLUME:(CLOSE0?CLOSE-DELAY(CLOSE,1):0),12)-SUM((CLOSE-DELAY(CLOSE,1)<0?ABS(CLOSE-DELAY(CLOSE,1)):0),12))/(SUM((CLOSE-DELAY(CLOSE,1)>0?CLOSE-DELAY(CLOSE,1):0),12)+SUM((CLOSE-DELAY(CLOSE,1)<0?ABS(CLOSE-DELAY(CLOSE,1)):0),12))*100 Alpha113: (-1 * ((RANK((SUM(DELAY(CLOSE, 5), 20) / 20)) * CORR(CLOSE, VOLUME, 2)) *RANK(CORR(SUM(CLOSE, 5),SUM(CLOSE, 20), 2)))) Alpha114: ((RANK(DELAY(((HIGH -LOW) / (SUM(CLOSE, 5) / 5)), 2)) * RANK(RANK(VOLUME))) / (((HIGH -LOW) /(SUM(CLOSE, 5) / 5)) / (VWAP -CLOSE))) Alpha115: (RANK(CORR(((HIGH * 0.9) + (CLOSE * 0.1)), MEAN(VOLUME,30), 10))^RANK(CORR(TSRANK(((HIGH + LOW) /2), 4), TSRANK(VOLUME, 10), 7))) Alpha116: REGBETA(CLOSE,SEQUENCE,20) Alpha117: ((TSRANK(VOLUME, 32) * (1 -TSRANK(((CLOSE + HIGH) -LOW), 16))) * (1 -TSRANK(RET, 32))) Alpha118: SUM(HIGH-OPEN,20)/SUM(OPEN-LOW,20)*100 Alpha119: (RANK(DECAYLINEAR(CORR(VWAP, SUM(MEAN(VOLUME,5), 26), 5), 7)) -RANK(DECAYLINEAR(TSRANK(MIN(CORR(RANK(OPEN), RANK(MEAN(VOLUME,15)), 21), 9), 7), 8))) Alpha120: (RANK((VWAP -CLOSE)) / RANK((VWAP + CLOSE))) Alpha121: ((RANK((VWAP -MIN(VWAP, 12)))^TSRANK(CORR(TSRANK(VWAP, 20), TSRANK(MEAN(VOLUME,60), 2), 18), 3)) * -1) Alpha122: (SMA(SMA(SMA(LOG(CLOSE),13,2),13,2),13,2)-DELAY(SMA(SMA(SMA(LOG(CLOSE),13,2),13,2),13,2),1))/DELAY(SMA(SMA(SMA(LOG(CLOSE),13,2),13,2),13,2),1) Alpha123: ((RANK(CORR(SUM(((HIGH + LOW) / 2), 20), SUM(MEAN(VOLUME,60), 20), 9))DELAY((HIGH+LOW+CLOSE)/3,1)?(HIGH+LOW+CLOSE)/3*VOLUME:0),14)/SUM(((HIGH+LOW+CLOSE)/3ABS(LOW-DELAY(CLOSE,1)) &ABS(HIGH-DELAY(CLOSE,1))>ABS(HIGH-DELAY(LOW,1))?ABS(HIGH-DELAY(CLOSE,1))+ABS(LOW-DELAY(CLOSE,1))/2+ABS(DELAY(CLOSE,1)-DELAY(OPEN,1))/4:(ABS(LOW-DELAY(CLOSE,1))>ABS(HIGH-DELAY(LOW,1)) &ABS(LOW-DELAY(CLOSE,1))>ABS(HIGH-DELAY(CLOSE,1))?ABS(LOW-DELAY(CLOSE,1))+ABS(HIGH-DELAY(CLOSE,1))/2+ABS(DELAY(CLOSE,1)-DELAY(OPEN,1))/4:ABS(HIGH-DELAY(LOW,1))+ABS(DELAY(CLOSE,1)-DELAY(OPEN,1))/4)))*MAX(ABS(HIGH-DELAY(CLOSE,1)),ABS(LOW-DELAY(CLOSE,1))) Alpha138: ((RANK(DECAYLINEAR(DELTA((((LOW * 0.7) + (VWAP *0.3))), 3), 20)) -TSRANK(DECAYLINEAR(TSRANK(CORR(TSRANK(LOW, 8), TSRANK(MEAN(VOLUME,60), 17), 5), 19), 16), 7)) * -1) Alpha139: (-1 * CORR(OPEN, VOLUME, 10)) Alpha140: MIN(RANK(DECAYLINEAR(((RANK(OPEN) + RANK(LOW)) -(RANK(HIGH) + RANK(CLOSE))), 8)),TSRANK(DECAYLINEAR(CORR(TSRANK(CLOSE, 8), TSRANK(MEAN(VOLUME,60), 20), 8), 7), 3)) Alpha141: (RANK(CORR(RANK(HIGH), RANK(MEAN(VOLUME,15)), 9))* -1) Alpha142: (((-1 * RANK(TSRANK(CLOSE, 10))) * RANK(DELTA(DELTA(CLOSE, 1), 1))) *RANK(TSRANK((VOLUME/MEAN(VOLUME,20)), 5))) Alpha143: CLOSE>DELAY(CLOSE,1)?(CLOSE-DELAY(CLOSE,1))/DELAY(CLOSE,1)*SELF:SELF Alpha144: SUMIF(ABS(CLOSE/DELAY(CLOSE,1)-1)/AMOUNT,20,CLOSEDELAY(CLOSE,1))?1/(CLOSE-DELAY(CLOSE,1)):1)-MIN(((CLOSE>DELAY(CLOSE,1))?1/(CLOSE-DELAY(CLOSE,1)):1),12))/(HIGH-LOW)*100,13,2) Alpha165: MAX(SUMAC(CLOSE-MEAN(CLOSE,48)))-MIN(SUMAC(CLOSE-MEAN(CLOSE,48)))/STD(CLOSE,48) Alpha166: -20*(20-1 )^1.5*SUM(CLOSE/DELAY(CLOSE,1)-1-MEAN(CLOSE/DELAY(CLOSE,1)-1,20),20)/((20-1)*(20-2)(SUM((CLOSE/DELAY(CLOSE,1),20)^2,20))^1.5) Alpha167: SUM((CLOSE-DELAY(CLOSE,1)>0?CLOSE-DELAY(CLOSE,1):0),12) Alpha168: (-1*VOLUME/MEAN(VOLUME,20)) Alpha169: SMA(MEAN(DELAY(SMA(CLOSE-DELAY(CLOSE,1),9,1),1),12)-MEAN(DELAY(SMA(CLOSE-DELAY(CLOSE,1),9,1),1),26),10,1) Alpha170: ((((RANK((1 / CLOSE)) * VOLUME) / MEAN(VOLUME,20)) * ((HIGH * RANK((HIGH -CLOSE))) / (SUM(HIGH, 5) /5))) -RANK((VWAP -DELAY(VWAP, 5)))) Alpha171: ((-1 * ((LOW -CLOSE) * (OPEN^5))) / ((CLOSE -HIGH) * (CLOSE^5))) Alpha172: MEAN(ABS(SUM((LD>0 & LD>HD)?LD:0,14)*100/SUM(TR,14)-SUM((HD>0 &HD>LD)?HD:0,14)*100/SUM(TR,14))/(SUM((LD>0 & LD>HD)?LD:0,14)*100/SUM(TR,14)+SUM((HD>0 &HD>LD)?HD:0,14)*100/SUM(TR,14))*100,6) Alpha173: 3*SMA(CLOSE,13,2)-2*SMA(SMA(CLOSE,13,2),13,2)+SMA(SMA(SMA(LOG(CLOSE),13,2),13,2),13,2) Alpha174: SMA((CLOSE>DELAY(CLOSE,1)?STD(CLOSE,20):0),20,1) Alpha175: MEAN(MAX(MAX((HIGH-LOW),ABS(DELAY(CLOSE,1)-HIGH)),ABS(DELAY(CLOSE,1)-LOW)),6) Alpha176: CORR(RANK(((CLOSE -TSMIN(LOW, 12)) / (TSMAX(HIGH, 12) -TSMIN(LOW,12)))),RANK(VOLUME), 6) Alpha177: ((20-HIGHDAY(HIGH,20))/20)*100 Alpha178: (CLOSE-DELAY(CLOSE,1))/DELAY(CLOSE,1)*VOLUME Alpha179: (RANK(CORR(VWAP, VOLUME, 4)) *RANK(CORR(RANK(LOW),RANK(MEAN(VOLUME,50)), 12))) Alpha180: ((MEAN(VOLUME,20)OPEN & BANCHMARKINDEXCLOSE>BANCHMARKINDEXOPEN)OR(CLOSE0 & LD>HD)?LD:0,14)*100/SUM(TR,14)-SUM((HD>0 &HD>LD)?HD:0,14)*100/SUM(TR,14))/(SUM((LD>0 &LD>HD)?LD:0,14)*100/SUM(TR,14)+SUM((HD>0 &HD>LD)?HD:0,14)*100/SUM(TR,14))*100,6)+DELAY(MEAN(ABS(SUM((LD>0 &LD>HD)?LD:0,14)*100/SUM(TR,14)-SUM((HD>0 & HD>LD)?HD:0,14)*100/SUM(TR,14))/(SUM((LD>0&LD>HD)?LD:0,14)*100/SUM(TR,14)+SUM((HD>0 & HD>LD)?HD:0,14)*100/SUM(TR,14))*100,6),6))/2 Alpha187: SUM((OPEN<=DELAY(OPEN,1)?0:MAX((HIGH-OPEN),(OPEN-DELAY(OPEN,1)))),20) Alpha188: ((HIGH-LOW–SMA(HIGH-LOW,11,2))/SMA(HIGH-LOW,11,2))*100 Alpha189: MEAN(ABS(CLOSE-MEAN(CLOSE,6)),6) Alpha190: LOG((COUNT(CLOSE/DELAY(CLOSE)-1>((CLOSE/DELAY(CLOSE,19))^(1/20)-1),20)-1)*(SUMIF(((CLOSE/DELAY(CLOSE)-1-(CLOSE/DELAY(CLOSE,19))^(1/20)-1))^2,20,CLOSE/DELAY(CLOSE)-1<(CLOSE/DELAY(CLOSE,19))^(1/20)-1))/((COUNT((CLOSE/DELAY(CLOSE)-1<(CLOSE/DELAY(CLOSE,19))^(1/20)-1),20))*(SUMIF((CLOSE/DELAY(CLOSE)-1-((CLOSE/DELAY(CLOSE,19))^(1/20)-1))^2,20,CLOSE/DELAY(CLOSE)-1>(CLOSE/DELAY(CLOSE,19))^(1/20)-1)))) Alpha191: ((CORR(MEAN(VOLUME,20), LOW, 5) + ((HIGH + LOW) / 2)) -CLOSE) """ print("=" * 60) print("GTJA Alpha191 因子转换测试(带自动注册)") print("=" * 60) # 解析多行字符串 formulas = parse_multiline_formulas(test_input) print(f"\n共解析到 {len(formulas)} 个因子\n") # 使用批量转换并自动注册 # auto_register=True 会自动将转换成功的因子注册到因子库 results = converter.convert_batch( formulas, auto_register=True, # 启用自动注册 ) # 显示每个因子的转换和注册结果 for name, dsl_str in results.items(): print(f"因子名称: {name}") if dsl_str: print(f"DSL 表达式: {dsl_str}") else: print("转换失败或包含不支持的算子") print() # 打印转换统计 stats = converter.get_stats() print("\n" + "=" * 60) print("转换统计:") print(f" 错误: {stats['errors']}") print(f" 警告: {stats['warnings']}(暂不支持的因子)") if stats["errors"] > 0: print("\n错误详情:") for error in stats["error_details"]: print(f" - {error}") if stats["warnings"] > 0: print("\n警告详情(这些因子不会被注册):") for warning in stats["warning_details"]: print(f" - {warning}") # 打印注册报告 reg_report = converter.get_registration_report() if reg_report["total"] > 0: print("\n" + "=" * 60) print("因子注册报告:") print(f" 总计尝试: {reg_report['total']}") print(f" 成功注册: {reg_report['success']}") print(f" 已存在跳过: {reg_report['skipped']}") print(f" 注册失败: {reg_report['failed']}") # 打印成功的因子 success_items = [d for d in reg_report["details"] if d["status"] == "success"] if success_items: print("\n成功注册的因子:") for item in success_items: print(f" - {item['message']}")