feat(factors): 添加 cs_mean 函数并增强 max_/min_ 单参数支持
- 新增 cs_mean 截面均值函数,支持 GTJA Alpha127 等因子转换 - max_/min_ 支持单参数调用,默认使用 252 天(约 1 年)滚动窗口
This commit is contained in:
@@ -86,7 +86,9 @@ SELECTED_FACTORS = [
|
||||
]
|
||||
|
||||
# 因子定义字典(完整因子库,用于存放尚未注册到metadata的因子)
|
||||
FACTOR_DEFINITIONS = {}
|
||||
FACTOR_DEFINITIONS = {
|
||||
'test': '[([(col("close")) - (col("close").shift([dyn int: 5]).over([col("ts_code")]))]) / (col("close").shift([dyn int: 5]).over([col("ts_code")]))]'
|
||||
}
|
||||
|
||||
|
||||
def get_label_factor(label_name: str) -> dict:
|
||||
|
||||
@@ -418,6 +418,26 @@ def cs_demean(x: Union[Node, str]) -> FunctionNode:
|
||||
return FunctionNode("cs_demean", x)
|
||||
|
||||
|
||||
def cs_mean(x: Union[Node, str]) -> FunctionNode:
|
||||
"""截面均值。
|
||||
|
||||
计算因子在横截面上的平均值。
|
||||
|
||||
Args:
|
||||
x: 输入因子表达式或字段名字符串
|
||||
|
||||
Returns:
|
||||
FunctionNode: 函数调用节点
|
||||
|
||||
Example:
|
||||
>>> from src.factors.api import close, cs_mean
|
||||
>>> expr = cs_mean((close - 100) ** 2)
|
||||
>>> print(expr)
|
||||
cs_mean(((close - 100) ** 2))
|
||||
"""
|
||||
return FunctionNode("cs_mean", x)
|
||||
|
||||
|
||||
# ==================== 数学函数 ====================
|
||||
|
||||
|
||||
@@ -507,41 +527,53 @@ def abs(x: Union[Node, str]) -> FunctionNode:
|
||||
return FunctionNode("abs", x)
|
||||
|
||||
|
||||
def max_(x: Union[Node, str], y: Union[Node, str, int, float]) -> FunctionNode:
|
||||
"""逐元素最大值。
|
||||
def max_(
|
||||
x: Union[Node, str], y: Union[Node, str, int, float, None] = None
|
||||
) -> FunctionNode:
|
||||
"""最大值。
|
||||
|
||||
智能分发逻辑:
|
||||
- 单参数:调用 ts_max(x, 252) 计算滚动窗口最大值(默认 252 天≈1年)
|
||||
- 如果 y 是正整数 (y > 0),调用 ts_max(x, y) 滚动窗口最大值
|
||||
- 否则,调用逐元素 max(x, y)
|
||||
|
||||
注意:避免 MAX(CLOSE - DELAY(CLOSE, 1), 0) 这类场景被错误路由到 ts_max
|
||||
|
||||
Args:
|
||||
x: 第一个因子表达式或字段名字符串
|
||||
y: 第二个因子表达式、字段名字符串或正整数(窗口大小)
|
||||
x: 第一个因子表达式或字段名字符串,或单参数时的输入序列
|
||||
y: 可选,第二个因子表达式、字段名字符串或正整数(窗口大小)
|
||||
|
||||
Returns:
|
||||
FunctionNode: 函数调用节点
|
||||
"""
|
||||
if y is None:
|
||||
# 单参数:默认使用 252 天(约 1 年交易日)窗口
|
||||
return ts_max(x, 252)
|
||||
if isinstance(y, int) and y > 0:
|
||||
return ts_max(x, y)
|
||||
return FunctionNode("max", x, _ensure_node(y))
|
||||
|
||||
|
||||
def min_(x: Union[Node, str], y: Union[Node, str, int, float]) -> FunctionNode:
|
||||
"""逐元素最小值。
|
||||
def min_(
|
||||
x: Union[Node, str], y: Union[Node, str, int, float, None] = None
|
||||
) -> FunctionNode:
|
||||
"""最小值。
|
||||
|
||||
智能分发逻辑:
|
||||
- 单参数:调用 ts_min(x, 252) 计算滚动窗口最小值(默认 252 天≈1年)
|
||||
- 如果 y 是正整数 (y > 0),调用 ts_min(x, y) 滚动窗口最小值
|
||||
- 否则,调用逐元素 min(x, y)
|
||||
|
||||
Args:
|
||||
x: 第一个因子表达式或字段名字符串
|
||||
y: 第二个因子表达式、字段名字符串或正整数(窗口大小)
|
||||
x: 第一个因子表达式或字段名字符串,或单参数时的输入序列
|
||||
y: 可选,第二个因子表达式、字段名字符串或正整数(窗口大小)
|
||||
|
||||
Returns:
|
||||
FunctionNode: 函数调用节点
|
||||
"""
|
||||
if y is None:
|
||||
# 单参数:默认使用 252 天(约 1 年交易日)窗口
|
||||
return ts_min(x, 252)
|
||||
if isinstance(y, int) and y > 0:
|
||||
return ts_min(x, y)
|
||||
return FunctionNode("min", x, _ensure_node(y))
|
||||
|
||||
@@ -88,6 +88,7 @@ class PolarsTranslator:
|
||||
self.register_handler("cs_rank", self._handle_cs_rank)
|
||||
self.register_handler("cs_zscore", self._handle_cs_zscore)
|
||||
self.register_handler("cs_neutral", self._handle_cs_neutral)
|
||||
self.register_handler("cs_mean", self._handle_cs_mean)
|
||||
|
||||
# 元素级数学函数 (element_wise)
|
||||
self.register_handler("abs", self._handle_abs)
|
||||
@@ -681,6 +682,18 @@ class PolarsTranslator:
|
||||
# 简单实现:减去截面均值(可在未来扩展为分组中性化)
|
||||
return expr - expr.mean()
|
||||
|
||||
@cross_section
|
||||
def _handle_cs_mean(self, node: FunctionNode) -> pl.Expr:
|
||||
"""处理 cs_mean(expr) -> 截面均值。
|
||||
|
||||
计算因子在横截面上的平均值,常用于 Alpha127 等因子。
|
||||
例如:MEAN((100*(CLOSE-MAX(CLOSE,12))/(MAX(CLOSE,12)))^2) 中的 MEAN
|
||||
"""
|
||||
if len(node.args) != 1:
|
||||
raise ValueError("cs_mean 需要 1 个参数: (expr)")
|
||||
expr = self.translate(node.args[0])
|
||||
return expr.mean()
|
||||
|
||||
# ==================== 元素级数学函数 (element_wise) ====================
|
||||
# 这些函数对每个元素独立计算,不添加 over
|
||||
|
||||
|
||||
26
src/scripts/GtjaConvertor/__init__.py
Normal file
26
src/scripts/GtjaConvertor/__init__.py
Normal file
@@ -0,0 +1,26 @@
|
||||
"""GTJA Alpha191 因子转换器。
|
||||
|
||||
将国泰君安的 Alpha191 因子公式转换为框架可识别的 DSL 字符串表达式。
|
||||
|
||||
模块结构:
|
||||
- preprocessor: GTJA 语法清洗工具
|
||||
- converter: 转换主程序
|
||||
|
||||
使用示例:
|
||||
>>> from src.scripts.GtjaConvertor import GtjaConverter
|
||||
>>> converter = GtjaConverter()
|
||||
>>> # 输入 GTJA 原始表达式
|
||||
>>> dsl_str = converter.convert("(-1 * CORR(RANK(DELTA(LOG(VOLUME), 1)), RANK(((CLOSE - OPEN) / OPEN)), 6))")
|
||||
>>> print(dsl_str)
|
||||
(-1 * ts_corr(cs_rank(ts_delta(log(vol), 1)), cs_rank(((close - open) / open)), 6))
|
||||
"""
|
||||
|
||||
from .preprocessor import clean_gtja_formula
|
||||
from .converter import convert_to_dsl, GtjaConverter, parse_multiline_formulas
|
||||
|
||||
__all__ = [
|
||||
"clean_gtja_formula",
|
||||
"convert_to_dsl",
|
||||
"GtjaConverter",
|
||||
"parse_multiline_formulas",
|
||||
]
|
||||
849
src/scripts/GtjaConvertor/converter.py
Normal file
849
src/scripts/GtjaConvertor/converter.py
Normal file
@@ -0,0 +1,849 @@
|
||||
"""GTJA 公式转换器。
|
||||
|
||||
将 GTJA 原始公式转换为框架可识别的 DSL 字符串表达式。
|
||||
转换过程中会验证公式是否能被正确解析为 DSL 节点。
|
||||
"""
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from src.factors.dsl import Node, FunctionNode
|
||||
from src.factors.api import (
|
||||
close,
|
||||
open,
|
||||
high,
|
||||
low,
|
||||
vol,
|
||||
amount,
|
||||
pre_close,
|
||||
change,
|
||||
pct_chg,
|
||||
ts_mean,
|
||||
ts_std,
|
||||
ts_max,
|
||||
ts_min,
|
||||
ts_sum,
|
||||
ts_delay,
|
||||
ts_delta,
|
||||
ts_corr,
|
||||
ts_cov,
|
||||
ts_var,
|
||||
ts_skew,
|
||||
ts_kurt,
|
||||
ts_pct_change,
|
||||
ts_ema,
|
||||
ts_atr,
|
||||
ts_rsi,
|
||||
ts_obv,
|
||||
ts_rank,
|
||||
ts_sma,
|
||||
ts_wma,
|
||||
ts_decay_linear,
|
||||
ts_argmax,
|
||||
ts_argmin,
|
||||
ts_count,
|
||||
ts_prod,
|
||||
ts_sumac,
|
||||
cs_rank,
|
||||
cs_zscore,
|
||||
cs_neutralize,
|
||||
cs_winsorize,
|
||||
cs_demean,
|
||||
log,
|
||||
exp,
|
||||
sqrt,
|
||||
sign,
|
||||
cos,
|
||||
sin,
|
||||
abs,
|
||||
max_,
|
||||
min_,
|
||||
clip,
|
||||
atan,
|
||||
log1p,
|
||||
if_,
|
||||
where,
|
||||
)
|
||||
|
||||
# 动态补充缺失的 cs_mean
|
||||
try:
|
||||
from src.factors.api import cs_mean
|
||||
except ImportError:
|
||||
|
||||
def cs_mean(x):
|
||||
return FunctionNode("cs_mean", x)
|
||||
|
||||
|
||||
try:
|
||||
from .preprocessor import clean_gtja_formula, filter_unsupported_formulas
|
||||
except ImportError:
|
||||
from preprocessor import clean_gtja_formula, filter_unsupported_formulas
|
||||
|
||||
|
||||
class GtjaConverter:
|
||||
# 安全的函数命名空间,用于验证公式语法的合理性
|
||||
SAFE_NAMESPACE: dict[str, Any] = {
|
||||
"close": close,
|
||||
"open": open,
|
||||
"high": high,
|
||||
"low": low,
|
||||
"vol": vol,
|
||||
"volume": vol,
|
||||
"amount": amount,
|
||||
"pre_close": pre_close,
|
||||
"change": change,
|
||||
"pct_chg": pct_chg,
|
||||
"ts_mean": ts_mean,
|
||||
"ts_std": ts_std,
|
||||
"ts_max": ts_max,
|
||||
"ts_min": ts_min,
|
||||
"ts_sum": ts_sum,
|
||||
"ts_delay": ts_delay,
|
||||
"ts_delta": ts_delta,
|
||||
"ts_corr": ts_corr,
|
||||
"ts_cov": ts_cov,
|
||||
"ts_var": ts_var,
|
||||
"ts_skew": ts_skew,
|
||||
"ts_kurt": ts_kurt,
|
||||
"ts_pct_change": ts_pct_change,
|
||||
"ts_ema": ts_ema,
|
||||
"ts_atr": ts_atr,
|
||||
"ts_rsi": ts_rsi,
|
||||
"ts_obv": ts_obv,
|
||||
"ts_rank": ts_rank,
|
||||
"ts_sma": ts_sma,
|
||||
"ts_wma": ts_wma,
|
||||
"ts_decay_linear": ts_decay_linear,
|
||||
"ts_argmax": ts_argmax,
|
||||
"ts_argmin": ts_argmin,
|
||||
"ts_count": ts_count,
|
||||
"ts_prod": ts_prod,
|
||||
"ts_sumac": ts_sumac,
|
||||
"cs_rank": cs_rank,
|
||||
"cs_zscore": cs_zscore,
|
||||
"cs_neutralize": cs_neutralize,
|
||||
"cs_winsorize": cs_winsorize,
|
||||
"cs_demean": cs_demean,
|
||||
"cs_mean": cs_mean,
|
||||
"log": log,
|
||||
"exp": exp,
|
||||
"sqrt": sqrt,
|
||||
"sign": sign,
|
||||
"cos": cos,
|
||||
"sin": sin,
|
||||
"abs": abs,
|
||||
"max_": max_,
|
||||
"min_": min_,
|
||||
"clip": clip,
|
||||
"atan": atan,
|
||||
"log1p": log1p,
|
||||
"if_": if_,
|
||||
"where": where,
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.errors: list[str] = []
|
||||
self.warnings: list[str] = []
|
||||
self._registration_results: list[dict[str, Any]] = []
|
||||
|
||||
def convert(self, formula: str) -> str | None:
|
||||
if not filter_unsupported_formulas(formula):
|
||||
self.warnings.append(
|
||||
f"包含暂不支持的算子/循环依赖,已跳过: {formula[:50]}..."
|
||||
)
|
||||
return None
|
||||
|
||||
clean_formula = clean_gtja_formula(formula)
|
||||
|
||||
try:
|
||||
# 使用 AST api 执行,验证所有函数的输入/参数类型是否有效
|
||||
self._validate_formula(clean_formula)
|
||||
return clean_formula
|
||||
except Exception as e:
|
||||
self.errors.append(
|
||||
f"语法节点构建失败: {formula[:50]}... \n\t-> 解析所得: {clean_formula}\n\t-> 错误: {e}"
|
||||
)
|
||||
return None
|
||||
|
||||
def _validate_formula(self, formula: str) -> Node:
|
||||
# __builtins__: {} 禁止任何外部危险执行,彻底保证安全
|
||||
return eval(formula, {"__builtins__": {}}, self.SAFE_NAMESPACE)
|
||||
|
||||
def convert_batch(
|
||||
self,
|
||||
formulas: dict[str, str],
|
||||
auto_register: bool = False,
|
||||
output_path: Path | None = None,
|
||||
) -> dict[str, str | None]:
|
||||
"""批量转换公式。
|
||||
|
||||
Args:
|
||||
formulas: 公式字典,key 为因子名(如 "Alpha1"),value 为公式字符串
|
||||
auto_register: 是否自动注册成功的因子到因子库
|
||||
output_path: 因子库文件路径,默认使用 data/factors.jsonl
|
||||
|
||||
Returns:
|
||||
转换结果字典,key 为因子名,value 为 DSL 表达式或 None
|
||||
"""
|
||||
results = {}
|
||||
self._registration_results = []
|
||||
|
||||
for name, formula in formulas.items():
|
||||
result = self.convert(formula)
|
||||
results[name] = result
|
||||
|
||||
# 自动注册成功的因子
|
||||
if auto_register and result is not None:
|
||||
reg_result = register_gtja_factor(name, result, output_path)
|
||||
self._registration_results.append({"alpha_name": name, **reg_result})
|
||||
|
||||
return results
|
||||
|
||||
def get_registration_report(self) -> dict[str, Any]:
|
||||
"""获取注册报告。
|
||||
|
||||
Returns:
|
||||
包含注册统计信息的字典
|
||||
"""
|
||||
if not hasattr(self, "_registration_results"):
|
||||
return {
|
||||
"total": 0,
|
||||
"success": 0,
|
||||
"skipped": 0,
|
||||
"failed": 0,
|
||||
"details": [],
|
||||
}
|
||||
|
||||
success = sum(1 for r in self._registration_results if r["status"] == "success")
|
||||
skipped = sum(1 for r in self._registration_results if r["status"] == "skipped")
|
||||
failed = sum(1 for r in self._registration_results if r["status"] == "failed")
|
||||
|
||||
return {
|
||||
"total": len(self._registration_results),
|
||||
"success": success,
|
||||
"skipped": skipped,
|
||||
"failed": failed,
|
||||
"details": self._registration_results,
|
||||
}
|
||||
|
||||
def get_stats(self) -> dict[str, Any]:
|
||||
return {
|
||||
"errors": len(self.errors),
|
||||
"warnings": len(self.warnings),
|
||||
"error_details": self.errors,
|
||||
"warning_details": self.warnings,
|
||||
}
|
||||
|
||||
|
||||
def convert_to_dsl(formula_str: str) -> str | None:
|
||||
converter = GtjaConverter()
|
||||
return converter.convert(formula_str)
|
||||
|
||||
|
||||
def parse_multiline_formulas(text: str) -> dict[str, str]:
|
||||
formulas = {}
|
||||
for line in text.strip().split("\n"):
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
if ":" in line:
|
||||
name, expr = line.split(":", 1)
|
||||
if name.strip() and expr.strip():
|
||||
formulas[name.strip()] = expr.strip()
|
||||
return formulas
|
||||
|
||||
|
||||
def get_next_factor_id(filepath: Path) -> str:
|
||||
"""生成下一个 factor_id。
|
||||
|
||||
从现有文件中提取最大序号,生成新的 F_XXX 格式 ID。
|
||||
|
||||
Args:
|
||||
filepath: JSONL 文件路径
|
||||
|
||||
Returns:
|
||||
新的 factor_id,如 "F_001"
|
||||
"""
|
||||
import builtins
|
||||
import json
|
||||
|
||||
if not filepath.exists():
|
||||
return "F_001"
|
||||
|
||||
try:
|
||||
with builtins.open(filepath, "r", encoding="utf-8") as f:
|
||||
lines = f.readlines()
|
||||
except Exception:
|
||||
return "F_001"
|
||||
|
||||
max_num = 0
|
||||
pattern = re.compile(r"^F_(\d+)$")
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
data = json.loads(line)
|
||||
factor_id = data.get("factor_id", "")
|
||||
match = pattern.match(factor_id)
|
||||
if match:
|
||||
num = int(match.group(1))
|
||||
max_num = max(max_num, num)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
continue
|
||||
|
||||
return f"F_{max_num + 1:03d}"
|
||||
|
||||
|
||||
def extract_alpha_number(alpha_name: str) -> int | None:
|
||||
"""从 Alpha 名称中提取数字。
|
||||
|
||||
Args:
|
||||
alpha_name: 如 "Alpha1", "Alpha123"
|
||||
|
||||
Returns:
|
||||
数字部分,如 1, 123;如果无法解析返回 None
|
||||
"""
|
||||
match = re.match(r"[Aa]lpha(\d+)", alpha_name)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
return None
|
||||
|
||||
|
||||
def register_gtja_factor(
|
||||
alpha_name: str,
|
||||
dsl_expr: str,
|
||||
output_path: Path | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""注册单个 GTJA 因子到因子库。
|
||||
|
||||
Args:
|
||||
alpha_name: 原始 Alpha 名称,如 "Alpha1"
|
||||
dsl_expr: DSL 表达式字符串
|
||||
output_path: 因子库文件路径,默认使用 data/factors.jsonl
|
||||
|
||||
Returns:
|
||||
注册结果字典,包含 status 和 message
|
||||
"""
|
||||
from src.factors.metadata import FactorManager
|
||||
from src.factors.metadata.exceptions import DuplicateFactorError, ValidationError
|
||||
from src.config.settings import settings
|
||||
|
||||
# 提取数字并构建标准化名称
|
||||
alpha_num = extract_alpha_number(alpha_name)
|
||||
if alpha_num is None:
|
||||
return {
|
||||
"status": "failed",
|
||||
"message": f"无法从 '{alpha_name}' 提取数字编号",
|
||||
}
|
||||
|
||||
# 标准化名称: GTJA_alpha001, GTJA_alpha123
|
||||
factor_name = f"GTJA_alpha{alpha_num:03d}"
|
||||
|
||||
# 确定输出路径
|
||||
if output_path is None:
|
||||
output_path = settings.data_path_resolved / "factors.jsonl"
|
||||
|
||||
# 初始化 FactorManager
|
||||
manager = FactorManager(str(output_path))
|
||||
|
||||
try:
|
||||
# 检查是否已存在(处理空文件的情况)
|
||||
try:
|
||||
existing = manager.get_factors_by_name(factor_name)
|
||||
if len(existing) > 0:
|
||||
return {
|
||||
"status": "skipped",
|
||||
"message": f"因子 '{factor_name}' 已存在",
|
||||
}
|
||||
except Exception:
|
||||
# 如果查询失败(如文件为空),继续尝试注册
|
||||
pass
|
||||
|
||||
# 生成 factor_id
|
||||
factor_id = get_next_factor_id(output_path)
|
||||
|
||||
# 构建因子记录
|
||||
factor_record = {
|
||||
"factor_id": factor_id,
|
||||
"name": factor_name,
|
||||
"desc": f"GTJA {alpha_name} 因子",
|
||||
"dsl": dsl_expr,
|
||||
"category": "gtja_alpha",
|
||||
"source": "GTJA191",
|
||||
}
|
||||
|
||||
# 注册因子
|
||||
manager.add_factor(factor_record)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"message": f"{factor_id}: {factor_name}",
|
||||
"factor_id": factor_id,
|
||||
"factor_name": factor_name,
|
||||
}
|
||||
|
||||
except DuplicateFactorError as e:
|
||||
return {
|
||||
"status": "failed",
|
||||
"message": f"因子 ID 重复: {e}",
|
||||
}
|
||||
except ValidationError as e:
|
||||
return {
|
||||
"status": "failed",
|
||||
"message": f"验证失败: {e}",
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"status": "failed",
|
||||
"message": f"注册失败: {e}",
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 使用示例:多行字符串输入
|
||||
converter = GtjaConverter()
|
||||
|
||||
# 多行字符串,格式为 "因子名: 表达式",支持空行
|
||||
test_input = """
|
||||
Alpha1: (-1 * CORR(RANK(DELTA(LOG(VOLUME), 1)), RANK(((CLOSE -OPEN) / OPEN)), 6))
|
||||
|
||||
Alpha2: (-1 * DELTA((((CLOSE -LOW) -(HIGH -CLOSE)) / (HIGH -LOW)), 1))
|
||||
|
||||
Alpha3: SUM((CLOSE=DELAY(CLOSE,1)?0:CLOSE-(CLOSE>DELAY(CLOSE,1)?MIN(LOW,DELAY(CLOSE,1)):MAX(HIGH,DELAY(CLOSE,1)))),6)
|
||||
|
||||
Alpha4: ((((SUM(CLOSE, 8) / 8) + STD(CLOSE, 8)) < (SUM(CLOSE, 2) / 2)) ? (-1 * 1) : (((SUM(CLOSE, 2) / 2) <((SUM(CLOSE, 8) / 8) -STD(CLOSE, 8))) ? 1 : (((1 < (VOLUME / MEAN(VOLUME,20))) || ((VOLUME /MEAN(VOLUME,20)) == 1)) ? 1 : (-1 * 1))))
|
||||
|
||||
Alpha5: (-1 * TSMAX(CORR(TSRANK(VOLUME, 5), TSRANK(HIGH, 5), 5), 3))
|
||||
|
||||
Alpha6: (RANK(SIGN(DELTA((((OPEN * 0.85) + (HIGH * 0.15))), 4)))* -1)
|
||||
|
||||
Alpha7: ((RANK(MAX((VWAP -CLOSE), 3)) + RANK(MIN((VWAP -CLOSE), 3))) * RANK(DELTA(VOLUME, 3)))
|
||||
|
||||
Alpha8: RANK(DELTA(((((HIGH + LOW) / 2) * 0.2) + (VWAP * 0.8)), 4) * -1)
|
||||
|
||||
Alpha9: SMA(((HIGH+LOW)/2-(DELAY(HIGH,1)+DELAY(LOW,1))/2)*(HIGH-LOW)/VOLUME,7,2)
|
||||
|
||||
Alpha10: (RANK(MAX(((RET < 0) ? STD(RET, 20) : CLOSE)^2),5))
|
||||
|
||||
Alpha11: SUM(((CLOSE-LOW)-(HIGH-CLOSE))./(HIGH-LOW).*VOLUME,6)
|
||||
|
||||
Alpha12: (RANK((OPEN -(SUM(VWAP, 10) / 10)))) * (-1 * (RANK(ABS((CLOSE -VWAP)))))
|
||||
|
||||
Alpha13: (((HIGH * LOW)^0.5) -VWAP)
|
||||
|
||||
Alpha14: CLOSE-DELAY(CLOSE,5)
|
||||
|
||||
Alpha15: OPEN/DELAY(CLOSE,1)-1
|
||||
|
||||
Alpha16: (-1 * TSMAX(RANK(CORR(RANK(VOLUME), RANK(VWAP), 5)), 5))
|
||||
|
||||
Alpha17: RANK((VWAP -MAX(VWAP, 15)))^DELTA(CLOSE, 5)
|
||||
|
||||
Alpha18: CLOSE/DELAY(CLOSE,5)
|
||||
|
||||
Alpha19: (CLOSE<DELAY(CLOSE,5)?(CLOSE-DELAY(CLOSE,5))/DELAY(CLOSE,5):(CLOSE=DELAY(CLOSE,5)?0:(CLOSE-DELAY(CLOSE,5))/CLOSE))
|
||||
|
||||
Alpha20: (CLOSE-DELAY(CLOSE,6))/DELAY(CLOSE,6)*100
|
||||
|
||||
Alpha21: REGBETA(MEAN(CLOSE,6),SEQUENCE(6))
|
||||
|
||||
Alpha22: SMEAN(((CLOSE-MEAN(CLOSE,6))/MEAN(CLOSE,6)-DELAY((CLOSE-MEAN(CLOSE,6))/MEAN(CLOSE,6),3)),12,1)
|
||||
|
||||
Alpha23: SMA((CLOSE>DELAY(CLOSE,1)?STD(CLOSE:20),0),20,1)/(SMA((CLOSE>DELAY(CLOSE,1)?STD(CLOSE,20):0),20,1)+SMA((CLOSE<=DELAY(CLOSE,1)?STD(CLOSE,20):0),20,1))*100
|
||||
|
||||
Alpha24: SMA(CLOSE-DELAY(CLOSE,5),5,1)
|
||||
|
||||
Alpha25: ((-1 * RANK((DELTA(CLOSE, 7) * (1 -RANK(DECAYLINEAR((VOLUME/MEAN(VOLUME,20)), 9)))))) * (1 +RANK(SUM(RET, 250))))
|
||||
|
||||
Alpha26: ((((SUM(CLOSE, 7) / 7) -CLOSE)) + ((CORR(VWAP, DELAY(CLOSE, 5), 230))))
|
||||
|
||||
Alpha27: WMA((CLOSE-DELAY(CLOSE,3))/DELAY(CLOSE,3)*100+(CLOSE-DELAY(CLOSE,6))/DELAY(CLOSE,6)*100,12)
|
||||
|
||||
Alpha28: 3*SMA((CLOSE-TSMIN(LOW,9))/(TSMAX(HIGH,9)-TSMIN(LOW,9))*100,3,1)-2*SMA(SMA((CLOSE-TSMIN(LOW,9))/(MAX(HIGH,9)-TSMAX(LOW,9))*100,3,1),3,1)
|
||||
|
||||
Alpha29: (CLOSE-DELAY(CLOSE,6))/DELAY(CLOSE,6)*VOLUME
|
||||
|
||||
Alpha30: WMA((REGRESI(CLOSE/DELAY(CLOSE)-1,MKT,SMB,HML,60))^2,20)
|
||||
|
||||
Alpha31: (CLOSE-MEAN(CLOSE,12))/MEAN(CLOSE,12)*100
|
||||
|
||||
Alpha32: (-1 * SUM(RANK(CORR(RANK(HIGH), RANK(VOLUME), 3)), 3))
|
||||
|
||||
Alpha33: ((((-1 * TSMIN(LOW, 5)) + DELAY(TSMIN(LOW, 5), 5)) * RANK(((SUM(RET, 240) -SUM(RET, 20)) / 220))) *TSRANK(VOLUME, 5))
|
||||
|
||||
Alpha34: MEAN(CLOSE,12)/CLOSE
|
||||
|
||||
Alpha35: (MIN(RANK(DECAYLINEAR(DELTA(OPEN, 1), 15)), RANK(DECAYLINEAR(CORR((VOLUME), ((OPEN * 0.65) +(OPEN *0.35)), 17),7))) * -1)
|
||||
|
||||
Alpha36: RANK(SUM(CORR(RANK(VOLUME), RANK(VWAP)), 6), 2)
|
||||
|
||||
Alpha37: (-1 * RANK(((SUM(OPEN, 5) * SUM(RET, 5)) -DELAY((SUM(OPEN,5) * SUM(RET, 5)), 10))))
|
||||
|
||||
Alpha38: (((SUM(HIGH, 20) / 20) < HIGH) ? (-1 * DELTA(HIGH, 2)) : 0)
|
||||
|
||||
Alpha39: ((RANK(DECAYLINEAR(DELTA((CLOSE), 2),8)) -RANK(DECAYLINEAR(CORR(((VWAP * 0.3) + (OPEN * 0.7)),SUM(MEAN(VOLUME,180), 37), 14), 12))) * -1)
|
||||
|
||||
Alpha40: SUM((CLOSE>DELAY(CLOSE,1)?VOLUME:0),26)/SUM((CLOSE<=DELAY(CLOSE,1)?VOLUME:0),26)*100
|
||||
|
||||
Alpha41: (RANK(MAX(DELTA((VWAP), 3), 5))* -1)
|
||||
|
||||
Alpha42: ((-1 * RANK(STD(HIGH, 10))) * CORR(HIGH, VOLUME, 10))
|
||||
|
||||
Alpha43: SUM((CLOSE>DELAY(CLOSE,1)?VOLUME:(CLOSE<DELAY(CLOSE,1)?-VOLUME:0)),6)
|
||||
|
||||
Alpha44: (TSRANK(DECAYLINEAR(CORR(((LOW )), MEAN(VOLUME,10), 7), 6),4) + TSRANK(DECAYLINEAR(DELTA((VWAP),3), 10), 15))
|
||||
|
||||
Alpha45: (RANK(DELTA((((CLOSE * 0.6) + (OPEN *0.4))), 1)) * RANK(CORR(VWAP, MEAN(VOLUME,150), 15)))
|
||||
|
||||
Alpha46: (MEAN(CLOSE,3)+MEAN(CLOSE,6)+MEAN(CLOSE,12)+MEAN(CLOSE,24))/(4*CLOSE)
|
||||
|
||||
Alpha47: SMA((TSMAX(HIGH,6)-CLOSE)/(TSMAX(HIGH,6)-TSMIN(LOW,6))*100,9,1)
|
||||
|
||||
Alpha48: (-1*((RANK(((SIGN((CLOSE -DELAY(CLOSE, 1))) + SIGN((DELAY(CLOSE, 1) -DELAY(CLOSE, 2)))) +SIGN((DELAY(CLOSE, 2) -DELAY(CLOSE, 3)))))) * SUM(VOLUME, 5)) / SUM(VOLUME, 20))
|
||||
|
||||
Alpha49: SUM(((HIGH+LOW)>=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12)/(SUM(((HIGH+LOW)>=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12)+SUM(((HIGH+LOW)<=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12))
|
||||
|
||||
Alpha50: SUM(((HIGH+LOW)<=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12)/(SUM(((HIGH+LOW)<=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12)+SUM(((HIGH+LOW)>=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12))-SUM(((HIGH+LOW)>=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12)/(SUM(((HIGH+LOW)>=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12)+SUM(((HIGH+LOW)<=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12))
|
||||
|
||||
Alpha51: SUM(((HIGH+LOW)<=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12)/(SUM(((HIGH+LOW)<=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12)+SUM(((HIGH+LOW)>=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12))
|
||||
|
||||
Alpha52: SUM(MAX(0,HIGH-DELAY((HIGH+LOW+CLOSE)/3,1)),26)/SUM(MAX(0,DELAY((HIGH+LOW+CLOSE)/3,1)-L),26)*100
|
||||
|
||||
Alpha53: COUNT(CLOSE>DELAY(CLOSE,1),12)/12*100
|
||||
|
||||
Alpha54: (-1 * RANK((STD(ABS(CLOSE -OPEN)) + (CLOSE -OPEN)) + CORR(CLOSE, OPEN,10)))
|
||||
|
||||
Alpha55: SUM(16*(CLOSE-DELAY(CLOSE,1)+(CLOSE-OPEN)/2+DELAY(CLOSE,1)-DELAY(OPEN,1))/((ABS(HIGH-DELAY(CLOSE,1))>ABS(LOW-DELAY(CLOSE,1))&ABS(HIGH-DELAY(CLOSE,1))>ABS(HIGH-DELAY(LOW,1))?ABS(HIGH-DELAY(CLOSE,1))+ABS(LOW-DELAY(CLOSE,1))/2+ABS(DELAY(CLOSE,1)-DELAY(OPEN,1))/4:(ABS(LOW-DELAY(CLOSE,1))>ABS(HIGH-DELAY(LOW,1))&ABS(LOW-DELAY(CLOSE,1))>ABS(HIGH-DELAY(CLOSE,1))?ABS(LOW-DELAY(CLOSE,1))+ABS(HIGH-DELAY(CLOSE,1))/2+ABS(DELAY(CLOSE,1)-DELAY(OPEN,1))/4:ABS(HIGH-DELAY(LOW,1))+ABS(DELAY(CLOSE,1)-DELAY(OPEN,1))/4)))*MAX(ABS(HIGH-DELAY(CLOSE,1)),ABS(LOW-DELAY(CLOSE,1))),20)
|
||||
|
||||
Alpha56: (RANK((OPEN -TSMIN(OPEN, 12))) < RANK((RANK(CORR(SUM(((HIGH + LOW) / 2), 19), SUM(MEAN(VOLUME,40), 19), 13))^5)))
|
||||
|
||||
Alpha57: SMA((CLOSE-TSMIN(LOW,9))/(TSMAX(HIGH,9)-TSMIN(LOW,9))*100,3,1)
|
||||
|
||||
Alpha58: COUNT(CLOSE>DELAY(CLOSE,1),20)/20*100
|
||||
|
||||
Alpha59: SUM((CLOSE=DELAY(CLOSE,1)?0:CLOSE-(CLOSE>DELAY(CLOSE,1)?MIN(LOW,DELAY(CLOSE,1)):MAX(HIGH,DELAY(CLOSE,1)))),20)
|
||||
|
||||
Alpha60: SUM(((CLOSE-LOW)-(HIGH-CLOSE))./(HIGH-LOW).*VOLUME,20)
|
||||
|
||||
Alpha61: (MAX(RANK(DECAYLINEAR(DELTA(VWAP, 1), 12)),RANK(DECAYLINEAR(RANK(CORR((LOW),MEAN(VOLUME,80), 8)), 17))) * -1)
|
||||
|
||||
Alpha62: (-1 * CORR(HIGH, RANK(VOLUME), 5))
|
||||
|
||||
Alpha63: SMA(MAX(CLOSE-DELAY(CLOSE,1),0),6,1)/SMA(ABS(CLOSE-DELAY(CLOSE,1)),6,1)*100
|
||||
|
||||
Alpha64: (MAX(RANK(DECAYLINEAR(CORR(RANK(VWAP), RANK(VOLUME), 4), 4)),RANK(DECAYLINEAR(MAX(CORR(RANK(CLOSE), RANK(MEAN(VOLUME,60)), 4), 13), 14))) * -1)
|
||||
|
||||
Alpha65: MEAN(CLOSE,6)/CLOSE
|
||||
|
||||
Alpha66: (CLOSE-MEAN(CLOSE,6))/MEAN(CLOSE,6)*100
|
||||
|
||||
Alpha67: SMA(MAX(CLOSE-DELAY(CLOSE,1),0),24,1)/SMA(ABS(CLOSE-DELAY(CLOSE,1)),24,1)*100
|
||||
|
||||
Alpha68: SMA(((HIGH+LOW)/2-(DELAY(HIGH,1)+DELAY(LOW,1))/2)*(HIGH-LOW)/VOLUME,15,2)
|
||||
|
||||
Alpha69: (SUM(DTM,20)>SUM(DBM,20)?(SUM(DTM,20)-SUM(DBM,20))/SUM(DTM,20): (SUM(DTM,20)=SUM(DBM,20)? 0: (SUM(DTM,20)-SUM(DBM,20))/SUM(DBM,20)))
|
||||
|
||||
Alpha70: STD(AMOUNT,6)
|
||||
|
||||
Alpha71: (CLOSE-MEAN(CLOSE,24))/MEAN(CLOSE,24)*100
|
||||
|
||||
Alpha72: SMA((TSMAX(HIGH,6)-CLOSE)/(TSMAX(HIGH,6)-TSMIN(LOW,6))*100,15,1)
|
||||
|
||||
Alpha73: ((TSRANK(DECAYLINEAR(DECAYLINEAR(CORR((CLOSE), VOLUME, 10), 16), 4), 5) - RANK(DECAYLINEAR(CORR(VWAP, MEAN(VOLUME,30), 4),3))) * -1)
|
||||
|
||||
Alpha74: (RANK(CORR(SUM(((LOW * 0.35) + (VWAP * 0.65)), 20), SUM(MEAN(VOLUME,40), 20), 7)) + RANK(CORR(RANK(VWAP), RANK(VOLUME), 6)))
|
||||
|
||||
Alpha75: COUNT(CLOSE>OPEN &BANCHMARKINDEXCLOSE<BANCHMARKINDEXOPEN,50)/COUNT(BANCHMARKINDEXCLOSE<BANCHMARKINDEXOPEN,50)
|
||||
|
||||
Alpha76: STD(ABS((CLOSE/DELAY(CLOSE,1)-1))/VOLUME,20)/MEAN(ABS((CLOSE/DELAY(CLOSE,1)-1))/VOLUME,20)
|
||||
|
||||
Alpha77: MIN(RANK(DECAYLINEAR(((((HIGH + LOW) / 2) + HIGH) -(VWAP + HIGH)), 20)),RANK(DECAYLINEAR(CORR(((HIGH + LOW) / 2), MEAN(VOLUME,40), 3), 6)))
|
||||
|
||||
Alpha78: ((HIGH+LOW+CLOSE)/3-MA((HIGH+LOW+CLOSE)/3,12))/(0.015*MEAN(ABS(CLOSE-MEAN((HIGH+LOW+CLOSE)/3,12)),12))
|
||||
|
||||
Alpha79: SMA(MAX(CLOSE-DELAY(CLOSE,1),0),12,1)/SMA(ABS(CLOSE-DELAY(CLOSE,1)),12,1)*100
|
||||
|
||||
Alpha80: (VOLUME-DELAY(VOLUME,5))/DELAY(VOLUME,5)*100
|
||||
|
||||
Alpha81: SMA(VOLUME,21,2)
|
||||
|
||||
Alpha82: SMA((TSMAX(HIGH,6)-CLOSE)/(TSMAX(HIGH,6)-TSMIN(LOW,6))*100,20,1)
|
||||
|
||||
Alpha83: (-1 * RANK(COVIANCE(RANK(HIGH), RANK(VOLUME), 5)))
|
||||
|
||||
Alpha84: SUM((CLOSE>DELAY(CLOSE,1)?VOLUME:(CLOSE<DELAY(CLOSE,1)?-VOLUME:0)),20)
|
||||
|
||||
Alpha85: (TSRANK((VOLUME / MEAN(VOLUME,20)), 20) * TSRANK((-1 * DELTA(CLOSE, 7)), 8))
|
||||
|
||||
Alpha86: ((0.25 < (((DELAY(CLOSE, 20) -DELAY(CLOSE, 10)) / 10) -((DELAY(CLOSE, 10) -CLOSE) / 10))) ? (-1 * 1):(((((DELAY(CLOSE, 20) -DELAY(CLOSE, 10)) / 10) -((DELAY(CLOSE, 10) -CLOSE) / 10)) < 0) ? 1 : ((-1 * 1) *(CLOSE -DELAY(CLOSE, 1)))))
|
||||
|
||||
Alpha87: ((RANK(DECAYLINEAR(DELTA(VWAP, 4), 7)) + TSRANK(DECAYLINEAR(((((LOW * 0.9) + (LOW * 0.1)) -VWAP) /(OPEN -((HIGH + LOW) / 2))), 11), 7)) * -1)
|
||||
|
||||
Alpha88: (CLOSE-DELAY(CLOSE,20))/DELAY(CLOSE,20)*100
|
||||
|
||||
Alpha89: 2*(SMA(CLOSE,13,2)-SMA(CLOSE,27,2)-SMA(SMA(CLOSE,13,2)-SMA(CLOSE,27,2),10,2))
|
||||
|
||||
Alpha90: ( RANK(CORR(RANK(VWAP), RANK(VOLUME), 5)) * -1)
|
||||
|
||||
Alpha91: ((RANK((CLOSE -MAX(CLOSE, 5)))*RANK(CORR((MEAN(VOLUME,40)), LOW, 5))) * -1)
|
||||
|
||||
Alpha92: (MAX(RANK(DECAYLINEAR(DELTA(((CLOSE * 0.35) + (VWAP *0.65)), 2), 3)),TSRANK(DECAYLINEAR(ABS(CORR((MEAN(VOLUME,180)), CLOSE, 13)), 5), 15)) * -1)
|
||||
|
||||
Alpha93: SUM((OPEN>=DELAY(OPEN,1)?0:MAX((OPEN-LOW),(OPEN-DELAY(OPEN,1)))),20)
|
||||
|
||||
Alpha94: SUM((CLOSE>DELAY(CLOSE,1)?VOLUME:(CLOSE<DELAY(CLOSE,1)?-VOLUME:0)),30)
|
||||
|
||||
Alpha95: STD(AMOUNT,20)
|
||||
|
||||
Alpha96: SMA(SMA((CLOSE-TSMIN(LOW,9))/(TSMAX(HIGH,9)-TSMIN(LOW,9))*100,3,1),3,1)
|
||||
|
||||
Alpha97: STD(VOLUME,10)
|
||||
|
||||
Alpha98: ((((DELTA((SUM(CLOSE, 100) / 100), 100) / DELAY(CLOSE, 100)) < 0.05) || ((DELTA((SUM(CLOSE, 100) / 100), 100) /DELAY(CLOSE, 100)) == 0.05)) ? (-1 * (CLOSE -TSMIN(CLOSE, 100))) : (-1 * DELTA(CLOSE, 3)))
|
||||
|
||||
Alpha99: (-1 * RANK(COVIANCE(RANK(CLOSE), RANK(VOLUME), 5)))
|
||||
|
||||
Alpha100: STD(VOLUME,20)
|
||||
|
||||
Alpha101: ((RANK(CORR(CLOSE, SUM(MEAN(VOLUME,30), 37), 15)) < RANK(CORR(RANK(((HIGH * 0.1) + (VWAP * 0.9))),RANK(VOLUME), 11))) * -1)
|
||||
|
||||
Alpha102: SMA(MAX(VOLUME-DELAY(VOLUME,1),0),6,1)/SMA(ABS(VOLUME-DELAY(VOLUME,1)),6,1)*100
|
||||
|
||||
Alpha103: ((20-LOWDAY(LOW,20))/20)*100
|
||||
|
||||
Alpha104: (-1 * (DELTA(CORR(HIGH, VOLUME, 5), 5) * RANK(STD(CLOSE, 20))))
|
||||
|
||||
Alpha105: (-1 * CORR(RANK(OPEN), RANK(VOLUME), 10))
|
||||
|
||||
Alpha106: CLOSE-DELAY(CLOSE,20)
|
||||
|
||||
Alpha107: (((-1 * RANK((OPEN -DELAY(HIGH, 1)))) * RANK((OPEN -DELAY(CLOSE, 1)))) *RANK((OPEN -DELAY(LOW, 1))))
|
||||
|
||||
Alpha108: ((RANK((HIGH -MIN(HIGH, 2)))^RANK(CORR((VWAP), (MEAN(VOLUME,120)), 6))) * -1)
|
||||
|
||||
Alpha109: SMA(HIGH-LOW,10,2)/SMA(SMA(HIGH-LOW,10,2),10,2)
|
||||
|
||||
Alpha110: SUM(MAX(0,HIGH-DELAY(CLOSE,1)),20)/SUM(MAX(0,DELAY(CLOSE,1)-LOW),20)*100
|
||||
|
||||
Alpha111: SMA(VOL*((CLOSE-LOW)-(HIGH-CLOSE))/(HIGH-LOW),11,2)-SMA(VOL*((CLOSE-LOW)-(HIGH-CLOSE))/(HIGH-LOW),4,2)
|
||||
|
||||
Alpha112: (SUM((CLOSE-DELAY(CLOSE,1)>0?CLOSE-DELAY(CLOSE,1):0),12)-SUM((CLOSE-DELAY(CLOSE,1)<0?ABS(CLOSE-DELAY(CLOSE,1)):0),12))/(SUM((CLOSE-DELAY(CLOSE,1)>0?CLOSE-DELAY(CLOSE,1):0),12)+SUM((CLOSE-DELAY(CLOSE,1)<0?ABS(CLOSE-DELAY(CLOSE,1)):0),12))*100
|
||||
|
||||
Alpha113: (-1 * ((RANK((SUM(DELAY(CLOSE, 5), 20) / 20)) * CORR(CLOSE, VOLUME, 2)) *RANK(CORR(SUM(CLOSE, 5),SUM(CLOSE, 20), 2))))
|
||||
|
||||
Alpha114: ((RANK(DELAY(((HIGH -LOW) / (SUM(CLOSE, 5) / 5)), 2)) * RANK(RANK(VOLUME))) / (((HIGH -LOW) /(SUM(CLOSE, 5) / 5)) / (VWAP -CLOSE)))
|
||||
|
||||
Alpha115: (RANK(CORR(((HIGH * 0.9) + (CLOSE * 0.1)), MEAN(VOLUME,30), 10))^RANK(CORR(TSRANK(((HIGH + LOW) /2), 4), TSRANK(VOLUME, 10), 7)))
|
||||
|
||||
Alpha116: REGBETA(CLOSE,SEQUENCE,20)
|
||||
|
||||
Alpha117: ((TSRANK(VOLUME, 32) * (1 -TSRANK(((CLOSE + HIGH) -LOW), 16))) * (1 -TSRANK(RET, 32)))
|
||||
|
||||
Alpha118: SUM(HIGH-OPEN,20)/SUM(OPEN-LOW,20)*100
|
||||
|
||||
Alpha119: (RANK(DECAYLINEAR(CORR(VWAP, SUM(MEAN(VOLUME,5), 26), 5), 7)) -RANK(DECAYLINEAR(TSRANK(MIN(CORR(RANK(OPEN), RANK(MEAN(VOLUME,15)), 21), 9), 7), 8)))
|
||||
|
||||
Alpha120: (RANK((VWAP -CLOSE)) / RANK((VWAP + CLOSE)))
|
||||
|
||||
Alpha121: ((RANK((VWAP -MIN(VWAP, 12)))^TSRANK(CORR(TSRANK(VWAP, 20), TSRANK(MEAN(VOLUME,60), 2), 18), 3)) * -1)
|
||||
|
||||
Alpha122: (SMA(SMA(SMA(LOG(CLOSE),13,2),13,2),13,2)-DELAY(SMA(SMA(SMA(LOG(CLOSE),13,2),13,2),13,2),1))/DELAY(SMA(SMA(SMA(LOG(CLOSE),13,2),13,2),13,2),1)
|
||||
|
||||
Alpha123: ((RANK(CORR(SUM(((HIGH + LOW) / 2), 20), SUM(MEAN(VOLUME,60), 20), 9))<RANK(CORR(LOW, VOLUME,6))) * -1)
|
||||
|
||||
Alpha124: (CLOSE -VWAP) / DECAYLINEAR(RANK(TSMAX(CLOSE, 30)),2)
|
||||
|
||||
Alpha125: (RANK(DECAYLINEAR(CORR((VWAP), MEAN(VOLUME,80),17), 20)) / RANK(DECAYLINEAR(DELTA(((CLOSE * 0.5)+ (VWAP * 0.5)), 3), 16)))
|
||||
|
||||
Alpha126: (CLOSE+HIGH+LOW)/3
|
||||
|
||||
Alpha127: (MEAN((100*(CLOSE-MAX(CLOSE,12))/(MAX(CLOSE,12)))^2))^(1/2)
|
||||
|
||||
Alpha128: 100-(100/(1+SUM(((HIGH+LOW+CLOSE)/3>DELAY((HIGH+LOW+CLOSE)/3,1)?(HIGH+LOW+CLOSE)/3*VOLUME:0),14)/SUM(((HIGH+LOW+CLOSE)/3<DELAY((HIGH+LOW+CLOSE)/3,1)?(HIGH+LOW+CLOSE)/3*VOLUME:0),14)))
|
||||
|
||||
Alpha129: SUM((CLOSE-DELAY(CLOSE,1)<0?ABS(CLOSE-DELAY(CLOSE,1)):0),12)
|
||||
|
||||
Alpha130: (RANK(DECAYLINEAR(CORR(((HIGH + LOW) / 2), MEAN(VOLUME,40), 9), 10)) /RANK(DECAYLINEAR(CORR(RANK(VWAP), RANK(VOLUME), 7),3)))
|
||||
|
||||
Alpha131: (RANK(DELAT(VWAP, 1))^TSRANK(CORR(CLOSE,MEAN(VOLUME,50), 18), 18))
|
||||
|
||||
Alpha132: MEAN(AMOUNT,20)
|
||||
|
||||
Alpha133: ((20-HIGHDAY(HIGH,20))/20)*100-((20-LOWDAY(LOW,20))/20)*100
|
||||
|
||||
Alpha134: (CLOSE-DELAY(CLOSE,12))/DELAY(CLOSE,12)*VOLUME
|
||||
|
||||
Alpha135: SMA(DELAY(CLOSE/DELAY(CLOSE,20),1),20,1)
|
||||
|
||||
Alpha136: ((-1 * RANK(DELTA(RET, 3))) * CORR(OPEN, VOLUME, 10))
|
||||
|
||||
Alpha137: 16*(CLOSE-DELAY(CLOSE,1)+(CLOSE-OPEN)/2+DELAY(CLOSE,1)-DELAY(OPEN,1))/((ABS(HIGH-DELAY(CLOSE,1))>ABS(LOW-DELAY(CLOSE,1)) &ABS(HIGH-DELAY(CLOSE,1))>ABS(HIGH-DELAY(LOW,1))?ABS(HIGH-DELAY(CLOSE,1))+ABS(LOW-DELAY(CLOSE,1))/2+ABS(DELAY(CLOSE,1)-DELAY(OPEN,1))/4:(ABS(LOW-DELAY(CLOSE,1))>ABS(HIGH-DELAY(LOW,1)) &ABS(LOW-DELAY(CLOSE,1))>ABS(HIGH-DELAY(CLOSE,1))?ABS(LOW-DELAY(CLOSE,1))+ABS(HIGH-DELAY(CLOSE,1))/2+ABS(DELAY(CLOSE,1)-DELAY(OPEN,1))/4:ABS(HIGH-DELAY(LOW,1))+ABS(DELAY(CLOSE,1)-DELAY(OPEN,1))/4)))*MAX(ABS(HIGH-DELAY(CLOSE,1)),ABS(LOW-DELAY(CLOSE,1)))
|
||||
|
||||
Alpha138: ((RANK(DECAYLINEAR(DELTA((((LOW * 0.7) + (VWAP *0.3))), 3), 20)) -TSRANK(DECAYLINEAR(TSRANK(CORR(TSRANK(LOW, 8), TSRANK(MEAN(VOLUME,60), 17), 5), 19), 16), 7)) * -1)
|
||||
|
||||
Alpha139: (-1 * CORR(OPEN, VOLUME, 10))
|
||||
|
||||
Alpha140: MIN(RANK(DECAYLINEAR(((RANK(OPEN) + RANK(LOW)) -(RANK(HIGH) + RANK(CLOSE))), 8)),TSRANK(DECAYLINEAR(CORR(TSRANK(CLOSE, 8), TSRANK(MEAN(VOLUME,60), 20), 8), 7), 3))
|
||||
|
||||
Alpha141: (RANK(CORR(RANK(HIGH), RANK(MEAN(VOLUME,15)), 9))* -1)
|
||||
|
||||
Alpha142: (((-1 * RANK(TSRANK(CLOSE, 10))) * RANK(DELTA(DELTA(CLOSE, 1), 1))) *RANK(TSRANK((VOLUME/MEAN(VOLUME,20)), 5)))
|
||||
|
||||
Alpha143: CLOSE>DELAY(CLOSE,1)?(CLOSE-DELAY(CLOSE,1))/DELAY(CLOSE,1)*SELF:SELF
|
||||
|
||||
Alpha144: SUMIF(ABS(CLOSE/DELAY(CLOSE,1)-1)/AMOUNT,20,CLOSE<DELAY(CLOSE,1))/COUNT(CLOSE<DELAY(CLOSE,1),20)
|
||||
|
||||
Alpha145: (MEAN(VOLUME,9)-MEAN(VOLUME,26))/MEAN(VOLUME,12)*100
|
||||
|
||||
Alpha146: MEAN((CLOSE-DELAY(CLOSE,1))/DELAY(CLOSE,1)-SMA((CLOSE-DELAY(CLOSE,1))/DELAY(CLOSE,1),61,2),20)*((CLOSE-DELAY(CLOSE,1))/DELAY(CLOSE,1)-SMA((CLOSE-DELAY(CLOSE,1))/DELAY(CLOSE,1),61,2))/SMA(((CLOSE-DELAY(CLOSE,1))/DELAY(CLOSE,1)-((CLOSE-DELAY(CLOSE,1))/DELAY(CLOSE,1)-SMA((CLOSE-DELAY(CLOSE,1))/DELAY(CLOSE,1),61,2)))^2,60)
|
||||
|
||||
Alpha147: REGBETA(MEAN(CLOSE,12),SEQUENCE(12))
|
||||
|
||||
Alpha148: ((RANK(CORR((OPEN), SUM(MEAN(VOLUME,60), 9), 6)) < RANK((OPEN -TSMIN(OPEN, 14)))) * -1)
|
||||
|
||||
Alpha149: REGBETA(FILTER(CLOSE/DELAY(CLOSE,1)-1,BANCHMARKINDEXCLOSE<DELAY(BANCHMARKINDEXCLOSE,1)),FILTER(BANCHMARKINDEXCLOSE/DELAY(BANCHMARKINDEXCLOSE,1)-1,BANCHMARKINDEXCLOSE<DELAY(BANCHMARKINDEXCLOSE,1)),252)
|
||||
|
||||
Alpha150: (CLOSE+HIGH+LOW)/3*VOLUME
|
||||
|
||||
Alpha151: SMA(CLOSE-DELAY(CLOSE,20),20,1)
|
||||
|
||||
Alpha152: SMA(MEAN(DELAY(SMA(DELAY(CLOSE/DELAY(CLOSE,9),1),9,1),1),12)-MEAN(DELAY(SMA(DELAY(CLOSE/DELAY(CLOSE,9),1),9,1),1),26),9,1)
|
||||
|
||||
Alpha153: (MEAN(CLOSE,3)+MEAN(CLOSE,6)+MEAN(CLOSE,12)+MEAN(CLOSE,24))/4
|
||||
|
||||
Alpha154: (((VWAP -MIN(VWAP, 16))) < (CORR(VWAP, MEAN(VOLUME,180), 18)))
|
||||
|
||||
Alpha155: SMA(VOLUME,13,2)-SMA(VOLUME,27,2)-SMA(SMA(VOLUME,13,2)-SMA(VOLUME,27,2),10,2)
|
||||
|
||||
Alpha156: (MAX(RANK(DECAYLINEAR(DELTA(VWAP, 5), 3)), RANK(DECAYLINEAR(((DELTA(((OPEN * 0.15) + (LOW *0.85)),2) / ((OPEN * 0.15) + (LOW * 0.85))) * -1), 3))) * -1)
|
||||
|
||||
Alpha157: (MIN(PROD(RANK(RANK(LOG(SUM(TSMIN(RANK(RANK((-1 * RANK(DELTA((CLOSE -1), 5))))), 2), 1)))), 1), 5) +TSRANK(DELAY((-1 * RET), 6), 5))
|
||||
|
||||
Alpha158: ((HIGH-SMA(CLOSE,15,2))-(LOW-SMA(CLOSE,15,2)))/CLOSE
|
||||
|
||||
Alpha159: ((CLOSE-SUM(MIN(LOW,DELAY(CLOSE,1)),6))/SUM(MAX(HGIH,DELAY(CLOSE,1))-MIN(LOW,DELAY(CLOSE,1)),6)*12*24+(CLOSE-SUM(MIN(LOW,DELAY(CLOSE,1)),12))/SUM(MAX(HGIH,DELAY(CLOSE,1))-MIN(LOW,DELAY(CLOSE,1)),12)*6*24+(CLOSE-SUM(MIN(LOW,DELAY(CLOSE,1)),24))/SUM(MAX(HGIH,DELAY(CLOSE,1))-MIN(LOW,DELAY(CLOSE,1)),24)*6*24)*100/(6*12+6*24+12*24)
|
||||
|
||||
Alpha160: SMA((CLOSE<=DELAY(CLOSE,1)?STD(CLOSE,20):0),20,1)
|
||||
|
||||
Alpha161: MEAN(MAX(MAX((HIGH-LOW),ABS(DELAY(CLOSE,1)-HIGH)),ABS(DELAY(CLOSE,1)-LOW)),12)
|
||||
|
||||
Alpha162: (SMA(MAX(CLOSE-DELAY(CLOSE,1),0),12,1)/SMA(ABS(CLOSE-DELAY(CLOSE,1)),12,1)*100-MIN(SMA(MAX(CLOSE-DELAY(CLOSE,1),0),12,1)/SMA(ABS(CLOSE-DELAY(CLOSE,1)),12,1)*100,12))/(MAX(SMA(MAX(CLOSE-DELAY(CLOSE,1),0),12,1)/SMA(ABS(CLOSE-DELAY(CLOSE,1)),12,1)*100,12)-MIN(SMA(MAX(CLOSE-DELAY(CLOSE,1),0),12,1)/SMA(ABS(CLOSE-DELAY(CLOSE,1)),12,1)*100,12))
|
||||
|
||||
Alpha163: RANK(((((-1 * RET) * MEAN(VOLUME,20)) * VWAP) * (HIGH -CLOSE)))
|
||||
|
||||
Alpha164: SMA((((CLOSE>DELAY(CLOSE,1))?1/(CLOSE-DELAY(CLOSE,1)):1)-MIN(((CLOSE>DELAY(CLOSE,1))?1/(CLOSE-DELAY(CLOSE,1)):1),12))/(HIGH-LOW)*100,13,2)
|
||||
|
||||
Alpha165: MAX(SUMAC(CLOSE-MEAN(CLOSE,48)))-MIN(SUMAC(CLOSE-MEAN(CLOSE,48)))/STD(CLOSE,48)
|
||||
|
||||
Alpha166: -20*(20-1 )^1.5*SUM(CLOSE/DELAY(CLOSE,1)-1-MEAN(CLOSE/DELAY(CLOSE,1)-1,20),20)/((20-1)*(20-2)(SUM((CLOSE/DELAY(CLOSE,1),20)^2,20))^1.5)
|
||||
|
||||
Alpha167: SUM((CLOSE-DELAY(CLOSE,1)>0?CLOSE-DELAY(CLOSE,1):0),12)
|
||||
|
||||
Alpha168: (-1*VOLUME/MEAN(VOLUME,20))
|
||||
|
||||
Alpha169: SMA(MEAN(DELAY(SMA(CLOSE-DELAY(CLOSE,1),9,1),1),12)-MEAN(DELAY(SMA(CLOSE-DELAY(CLOSE,1),9,1),1),26),10,1)
|
||||
|
||||
Alpha170: ((((RANK((1 / CLOSE)) * VOLUME) / MEAN(VOLUME,20)) * ((HIGH * RANK((HIGH -CLOSE))) / (SUM(HIGH, 5) /5))) -RANK((VWAP -DELAY(VWAP, 5))))
|
||||
|
||||
Alpha171: ((-1 * ((LOW -CLOSE) * (OPEN^5))) / ((CLOSE -HIGH) * (CLOSE^5)))
|
||||
|
||||
Alpha172: MEAN(ABS(SUM((LD>0 & LD>HD)?LD:0,14)*100/SUM(TR,14)-SUM((HD>0 &HD>LD)?HD:0,14)*100/SUM(TR,14))/(SUM((LD>0 & LD>HD)?LD:0,14)*100/SUM(TR,14)+SUM((HD>0 &HD>LD)?HD:0,14)*100/SUM(TR,14))*100,6)
|
||||
|
||||
Alpha173: 3*SMA(CLOSE,13,2)-2*SMA(SMA(CLOSE,13,2),13,2)+SMA(SMA(SMA(LOG(CLOSE),13,2),13,2),13,2)
|
||||
|
||||
Alpha174: SMA((CLOSE>DELAY(CLOSE,1)?STD(CLOSE,20):0),20,1)
|
||||
|
||||
Alpha175: MEAN(MAX(MAX((HIGH-LOW),ABS(DELAY(CLOSE,1)-HIGH)),ABS(DELAY(CLOSE,1)-LOW)),6)
|
||||
|
||||
Alpha176: CORR(RANK(((CLOSE -TSMIN(LOW, 12)) / (TSMAX(HIGH, 12) -TSMIN(LOW,12)))),RANK(VOLUME), 6)
|
||||
|
||||
Alpha177: ((20-HIGHDAY(HIGH,20))/20)*100
|
||||
|
||||
Alpha178: (CLOSE-DELAY(CLOSE,1))/DELAY(CLOSE,1)*VOLUME
|
||||
|
||||
Alpha179: (RANK(CORR(VWAP, VOLUME, 4)) *RANK(CORR(RANK(LOW),RANK(MEAN(VOLUME,50)), 12)))
|
||||
|
||||
Alpha180: ((MEAN(VOLUME,20)<VOLUME)?((-1*TSRANK(ABS(DELTA(CLOSE,7)),60))*SIGN(DELTA(CLOSE,7)):(-1*VOLUME)))
|
||||
|
||||
Alpha181: SUM(((CLOSE/DELAY(CLOSE,1)-1)-MEAN((CLOSE/DELAY(CLOSE,1)-1),20))-(BANCHMARKINDEXCLOSE-MEAN(BANCHMARKINDEXCLOSE,20))^2,20)/SUM((BANCHMARKINDEXCLOSE-MEAN(BANCHMARKINDEXCLOSE,20))^3)
|
||||
|
||||
Alpha182: COUNT((CLOSE>OPEN & BANCHMARKINDEXCLOSE>BANCHMARKINDEXOPEN)OR(CLOSE<OPEN &BANCHMARKINDEXCLOSE<BANCHMARKINDEXOPEN),20)/20
|
||||
|
||||
Alpha183: MAX(SUMAC(CLOSE-MEAN(CLOSE,24)))-MIN(SUMAC(CLOSE-MEAN(CLOSE,24)))/STD(CLOSE,24)
|
||||
|
||||
Alpha184: (RANK(CORR(DELAY((OPEN -CLOSE), 1), CLOSE, 200)) + RANK((OPEN -CLOSE)))
|
||||
|
||||
Alpha185: RANK((-1 * ((1 -(OPEN / CLOSE))^2)))
|
||||
|
||||
Alpha186: (MEAN(ABS(SUM((LD>0 & LD>HD)?LD:0,14)*100/SUM(TR,14)-SUM((HD>0 &HD>LD)?HD:0,14)*100/SUM(TR,14))/(SUM((LD>0 &LD>HD)?LD:0,14)*100/SUM(TR,14)+SUM((HD>0 &HD>LD)?HD:0,14)*100/SUM(TR,14))*100,6)+DELAY(MEAN(ABS(SUM((LD>0 &LD>HD)?LD:0,14)*100/SUM(TR,14)-SUM((HD>0 & HD>LD)?HD:0,14)*100/SUM(TR,14))/(SUM((LD>0&LD>HD)?LD:0,14)*100/SUM(TR,14)+SUM((HD>0 & HD>LD)?HD:0,14)*100/SUM(TR,14))*100,6),6))/2
|
||||
|
||||
Alpha187: SUM((OPEN<=DELAY(OPEN,1)?0:MAX((HIGH-OPEN),(OPEN-DELAY(OPEN,1)))),20)
|
||||
|
||||
Alpha188: ((HIGH-LOW–SMA(HIGH-LOW,11,2))/SMA(HIGH-LOW,11,2))*100
|
||||
|
||||
Alpha189: MEAN(ABS(CLOSE-MEAN(CLOSE,6)),6)
|
||||
|
||||
Alpha190: LOG((COUNT(CLOSE/DELAY(CLOSE)-1>((CLOSE/DELAY(CLOSE,19))^(1/20)-1),20)-1)*(SUMIF(((CLOSE/DELAY(CLOSE)-1-(CLOSE/DELAY(CLOSE,19))^(1/20)-1))^2,20,CLOSE/DELAY(CLOSE)-1<(CLOSE/DELAY(CLOSE,19))^(1/20)-1))/((COUNT((CLOSE/DELAY(CLOSE)-1<(CLOSE/DELAY(CLOSE,19))^(1/20)-1),20))*(SUMIF((CLOSE/DELAY(CLOSE)-1-((CLOSE/DELAY(CLOSE,19))^(1/20)-1))^2,20,CLOSE/DELAY(CLOSE)-1>(CLOSE/DELAY(CLOSE,19))^(1/20)-1))))
|
||||
|
||||
Alpha191: ((CORR(MEAN(VOLUME,20), LOW, 5) + ((HIGH + LOW) / 2)) -CLOSE)
|
||||
"""
|
||||
|
||||
print("=" * 60)
|
||||
print("GTJA Alpha191 因子转换测试(带自动注册)")
|
||||
print("=" * 60)
|
||||
|
||||
# 解析多行字符串
|
||||
formulas = parse_multiline_formulas(test_input)
|
||||
print(f"\n共解析到 {len(formulas)} 个因子\n")
|
||||
|
||||
# 使用批量转换并自动注册
|
||||
# auto_register=True 会自动将转换成功的因子注册到因子库
|
||||
results = converter.convert_batch(
|
||||
formulas,
|
||||
auto_register=True, # 启用自动注册
|
||||
)
|
||||
|
||||
# 显示每个因子的转换和注册结果
|
||||
for name, dsl_str in results.items():
|
||||
print(f"因子名称: {name}")
|
||||
if dsl_str:
|
||||
print(f"DSL 表达式: {dsl_str}")
|
||||
else:
|
||||
print("转换失败或包含不支持的算子")
|
||||
print()
|
||||
|
||||
# 打印转换统计
|
||||
stats = converter.get_stats()
|
||||
print("\n" + "=" * 60)
|
||||
print("转换统计:")
|
||||
print(f" 错误: {stats['errors']}")
|
||||
print(f" 警告: {stats['warnings']}(暂不支持的因子)")
|
||||
|
||||
if stats["errors"] > 0:
|
||||
print("\n错误详情:")
|
||||
for error in stats["error_details"]:
|
||||
print(f" - {error}")
|
||||
|
||||
if stats["warnings"] > 0:
|
||||
print("\n警告详情(这些因子不会被注册):")
|
||||
for warning in stats["warning_details"]:
|
||||
print(f" - {warning}")
|
||||
|
||||
# 打印注册报告
|
||||
reg_report = converter.get_registration_report()
|
||||
if reg_report["total"] > 0:
|
||||
print("\n" + "=" * 60)
|
||||
print("因子注册报告:")
|
||||
print(f" 总计尝试: {reg_report['total']}")
|
||||
print(f" 成功注册: {reg_report['success']}")
|
||||
print(f" 已存在跳过: {reg_report['skipped']}")
|
||||
print(f" 注册失败: {reg_report['failed']}")
|
||||
|
||||
# 打印成功的因子
|
||||
success_items = [d for d in reg_report["details"] if d["status"] == "success"]
|
||||
if success_items:
|
||||
print("\n成功注册的因子:")
|
||||
for item in success_items:
|
||||
print(f" - {item['message']}")
|
||||
273
src/scripts/GtjaConvertor/preprocessor.py
Normal file
273
src/scripts/GtjaConvertor/preprocessor.py
Normal file
@@ -0,0 +1,273 @@
|
||||
"""GTJA 公式预处理器。
|
||||
|
||||
将 GTJA 原始语法清洗为框架可识别的 DSL 语法。
|
||||
修复了原版公式中的拼写错误、歧义重载、嵌套三元运算符等问题。
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
def clean_gtja_formula(formula: str) -> str:
|
||||
"""将 GTJA 原始语法清洗为框架可识别的 DSL 语法。"""
|
||||
|
||||
formula = formula.strip()
|
||||
|
||||
# 0. 清洗中文标点符号和空格
|
||||
formula = formula.replace("(", "(").replace(")", ")")
|
||||
formula = formula.replace(",", ",").replace("–", "-")
|
||||
formula = formula.replace("【", "[").replace("】", "]")
|
||||
|
||||
# 1. 替换基础算术运算符和逻辑运算符
|
||||
formula = formula.replace("./", "/").replace(".*", "*").replace("^", "**")
|
||||
formula = formula.replace("||", "|").replace("&&", "&")
|
||||
|
||||
# 2. 宏替换 (基础衍生宏)
|
||||
replacements = {
|
||||
r"\bRET\b": "(CLOSE / DELAY(CLOSE, 1) - 1)",
|
||||
r"\bVWAP\b": "(AMOUNT / VOLUME)",
|
||||
}
|
||||
for old, new in replacements.items():
|
||||
formula = re.sub(old, new, formula, flags=re.IGNORECASE)
|
||||
|
||||
# 3. 修复原版 GTJA 公式库中的各处天坑笔误 (Typo)
|
||||
typo_mapping = {
|
||||
r"\bHGIH\b": "HIGH", # Alpha 159 拼写错误
|
||||
r"\bDELAT\b": "DELTA", # Alpha 131 拼写错误
|
||||
r"\?STD\(CLOSE\s*:\s*20\)\s*,\s*0": "? STD(CLOSE, 20) : 0", # Alpha 23 冒号与逗号打反
|
||||
r"CLOSE\s*:\s*20": "CLOSE, 20", # 其他可能存在的冒号误触
|
||||
r"(?<=-)L\b": "LOW", # Alpha 52 极简缩写: -L)
|
||||
r"\)\(": ")*(", # Alpha 166 缺乘号: (20-2)(SUM...
|
||||
r"\(CLOSE/DELAY\(CLOSE,1\),20\)": "(CLOSE/DELAY(CLOSE,1)-1)",# Alpha 166 多余参数与格式错乱
|
||||
r"\*SIGN\(DELTA\(CLOSE,\s*7\)\)\s*:\s*\(-1\s*\*VOLUME\)\)\)": "*SIGN(DELTA(CLOSE, 7))) : (-1 * VOLUME))", # Alpha 180 括号位置打错
|
||||
r"\bOR\b": "|", # Alpha 182 异常逻辑符
|
||||
r"\bAND\b": "&", # 异常逻辑符
|
||||
}
|
||||
for bad, good in typo_mapping.items():
|
||||
formula = re.sub(bad, good, formula, flags=re.IGNORECASE)
|
||||
|
||||
# 4. 修复条件表达式中的赋值符为比较符
|
||||
# 把 = 变成 ==,但避开 <=, >=, !=, ==
|
||||
formula = re.sub(r"(?<![=<>!])=(?![=])", "==", formula)
|
||||
|
||||
# 5. 智能解析多态重载函数 (RANK, MEAN) -> (cs_/ts_)
|
||||
def resolve_overloaded_funcs(f: str) -> str:
|
||||
for target in ["RANK", "MEAN"]:
|
||||
while True:
|
||||
match = re.search(rf"(?<!cs_)(?<!ts_)\b{target}\s*\(", f, flags=re.IGNORECASE)
|
||||
if not match:
|
||||
break
|
||||
|
||||
start_idx = match.start()
|
||||
paren_start = match.end() - 1
|
||||
|
||||
depth = 1
|
||||
comma_count = 0
|
||||
paren_end = -1
|
||||
for i in range(paren_start + 1, len(f)):
|
||||
if f[i] == '(': depth += 1
|
||||
elif f[i] == ')':
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
paren_end = i
|
||||
break
|
||||
elif f[i] == ',' and depth == 1:
|
||||
comma_count += 1
|
||||
|
||||
if paren_end == -1:
|
||||
break
|
||||
|
||||
args_str = f[paren_start+1:paren_end].strip()
|
||||
arg_count = comma_count + 1 if args_str else 0
|
||||
|
||||
prefix = "ts_" if arg_count > 1 else "cs_"
|
||||
f = f[:start_idx] + prefix + target.lower() + f[paren_start:]
|
||||
return f
|
||||
|
||||
formula = resolve_overloaded_funcs(formula)
|
||||
|
||||
# 6. 三元运算符安全转换 (Condition) ? True : False -> if_(Condition, True, False)
|
||||
# 自右向左匹配,完美解决复杂嵌套
|
||||
def ternary_to_if(f: str) -> str:
|
||||
max_iterations = 100
|
||||
iteration = 0
|
||||
|
||||
while "?" in f and iteration < max_iterations:
|
||||
q_idx = f.rfind("?")
|
||||
if q_idx == -1: break
|
||||
|
||||
depth = 0
|
||||
c_idx = -1
|
||||
for i in range(q_idx + 1, len(f)):
|
||||
if f[i] == '(': depth += 1
|
||||
elif f[i] == ')': depth -= 1
|
||||
elif f[i] == ':' and depth == 0:
|
||||
c_idx = i
|
||||
break
|
||||
|
||||
if c_idx == -1:
|
||||
f = f[:q_idx] + "_" + f[q_idx+1:]
|
||||
continue
|
||||
|
||||
depth = 0
|
||||
a_start = 0
|
||||
for i in range(q_idx - 1, -1, -1):
|
||||
if f[i] == ')': depth += 1
|
||||
elif f[i] == '(':
|
||||
depth -= 1
|
||||
if depth < 0:
|
||||
a_start = i + 1
|
||||
break
|
||||
elif f[i] == ',' and depth == 0:
|
||||
a_start = i + 1
|
||||
break
|
||||
|
||||
depth = 0
|
||||
c_end = len(f)
|
||||
for i in range(c_idx + 1, len(f)):
|
||||
if f[i] == '(': depth += 1
|
||||
elif f[i] == ')':
|
||||
depth -= 1
|
||||
if depth < 0:
|
||||
c_end = i
|
||||
break
|
||||
elif f[i] == ',' and depth == 0:
|
||||
c_end = i
|
||||
break
|
||||
|
||||
A_str = f[a_start:q_idx].strip()
|
||||
B_str = f[q_idx+1:c_idx].strip()
|
||||
C_str = f[c_idx+1:c_end].strip()
|
||||
|
||||
replacement = f"if_({A_str}, {B_str}, {C_str})"
|
||||
f = f[:a_start] + replacement + f[c_end:]
|
||||
iteration += 1
|
||||
|
||||
return f
|
||||
|
||||
formula = ternary_to_if(formula)
|
||||
|
||||
# 7. 函数名直接映射 (GTJA -> DSL)
|
||||
function_mapping = {
|
||||
r"\bDELAY\s*\(": "ts_delay(",
|
||||
r"\bDELTA\s*\(": "ts_delta(",
|
||||
r"\bSTD\s*\(": "ts_std(",
|
||||
r"\bMAX\s*\(": "max_(",
|
||||
r"\bMIN\s*\(": "min_(",
|
||||
r"\bSUM\s*\(": "ts_sum(",
|
||||
r"\bVAR\s*\(": "ts_var(",
|
||||
r"\bCOV\s*\(": "ts_cov(",
|
||||
r"\bCOVIANCE\s*\(": "ts_cov(",
|
||||
r"\bCORR\s*\(": "ts_corr(",
|
||||
r"\bSMA\s*\(": "ts_sma(",
|
||||
r"\bSMEAN\s*\(": "ts_sma(",
|
||||
r"\bMA\s*\(": "ts_mean(",
|
||||
r"\bWMA\s*\(": "ts_wma(",
|
||||
r"\bDECAYLINEAR\s*\(": "ts_decay_linear(",
|
||||
r"\bHIGHDAY\s*\(": "ts_argmax(",
|
||||
r"\bLOWDAY\s*\(": "ts_argmin(",
|
||||
r"\bCOUNT\s*\(": "ts_count(",
|
||||
r"\bPROD\s*\(": "ts_prod(",
|
||||
r"\bSUMAC\s*\(": "ts_sumac(",
|
||||
r"\bTSRANK\s*\(": "ts_rank(",
|
||||
r"\bTSMAX\s*\(": "ts_max(",
|
||||
r"\bTSMIN\s*\(": "ts_min(",
|
||||
r"\bLOG\s*\(": "log(",
|
||||
r"\bEXP\s*\(": "exp(",
|
||||
r"\bSQRT\s*\(": "sqrt(",
|
||||
r"\bSIGN\s*\(": "sign(",
|
||||
r"\bABS\s*\(": "abs(",
|
||||
r"\bATAN\s*\(": "atan(",
|
||||
}
|
||||
for gtja_func, dsl_func in function_mapping.items():
|
||||
formula = re.sub(gtja_func, dsl_func, formula, flags=re.IGNORECASE)
|
||||
|
||||
# 8. 字段名映射
|
||||
field_mapping = {
|
||||
r"\bCLOSE\b": "close",
|
||||
r"\bOPEN\b": "open",
|
||||
r"\bHIGH\b": "high",
|
||||
r"\bLOW\b": "low",
|
||||
r"\bVOLUME\b": "vol",
|
||||
r"\bVOL\b": "vol",
|
||||
r"\bAMOUNT\b": "amount",
|
||||
r"\bPRE_CLOSE\b": "pre_close",
|
||||
r"\bCHANGE\b": "change",
|
||||
r"\bPCT_CHG\b": "pct_chg",
|
||||
}
|
||||
for gtja_field, dsl_field in field_mapping.items():
|
||||
formula = re.sub(gtja_field, dsl_field, formula, flags=re.IGNORECASE)
|
||||
|
||||
# 9. 智能补全默认缺省参数
|
||||
def add_default_args(f: str, func_name: str, default_val: str, required_args: int) -> str:
|
||||
pattern = f"{func_name}("
|
||||
result =[]
|
||||
i = 0
|
||||
while i < len(f):
|
||||
if f[i:i+len(pattern)] == pattern:
|
||||
paren_start = i + len(pattern) - 1
|
||||
|
||||
depth = 1
|
||||
paren_end = -1
|
||||
for j in range(paren_start + 1, len(f)):
|
||||
if f[j] == '(': depth += 1
|
||||
elif f[j] == ')':
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
paren_end = j
|
||||
break
|
||||
|
||||
if paren_end == -1:
|
||||
result.append(f[i])
|
||||
i += 1
|
||||
continue
|
||||
|
||||
args_content = f[paren_start+1:paren_end]
|
||||
|
||||
# 正确统计顶层逗号数量 (修复嵌套逗号被计入的 Bug)
|
||||
depth_comma = 0
|
||||
comma_count = 0
|
||||
for ch in args_content:
|
||||
if ch == '(': depth_comma += 1
|
||||
elif ch == ')': depth_comma -= 1
|
||||
elif ch == ',' and depth_comma == 0:
|
||||
comma_count += 1
|
||||
|
||||
arg_count = comma_count + 1 if args_content.strip() else 0
|
||||
|
||||
if arg_count < required_args:
|
||||
result.append(f"{func_name}({args_content}, {default_val})")
|
||||
else:
|
||||
result.append(f[i:paren_end+1])
|
||||
i = paren_end + 1
|
||||
else:
|
||||
result.append(f[i])
|
||||
i += 1
|
||||
return "".join(result)
|
||||
|
||||
formula = add_default_args(formula, "ts_delay", "1", 2)
|
||||
formula = add_default_args(formula, "ts_delta", "1", 2)
|
||||
formula = add_default_args(formula, "ts_std", "20", 2)
|
||||
formula = add_default_args(formula, "ts_corr", "5", 3)
|
||||
formula = add_default_args(formula, "ts_sma", "1", 3)
|
||||
|
||||
return formula
|
||||
|
||||
|
||||
def filter_unsupported_formulas(formula: str) -> bool:
|
||||
"""检查公式是否包含不支持的函数/算子。"""
|
||||
unsupported_patterns =[
|
||||
r"\bREGBETA\b", # OLS Beta
|
||||
r"\bREGRESI\b", # OLS 残差
|
||||
r"\bSEQUENCE\b", # 生成时间序列(作自变量)
|
||||
r"\bSELF\b", # 循环递归引用
|
||||
r"\bBANCHMARK\w*\b", # 基准指数(修正匹配 BANCHMARKINDEXCLOSE 等连写)
|
||||
r"\bINDEX\b", # 宏观变量引入
|
||||
r"\bMKT\b", r"\bSMB\b", r"\bHML\b", # Fama-French 因子
|
||||
r"\bDTM\b", r"\bDBM\b", r"\bTR\b", r"\bHD\b", r"\bLD\b", # 复杂的外部黑盒宏
|
||||
r"\bFILTER\b", # 条件屏蔽函数
|
||||
r"\bSUMIF\b", # 条件求和函数
|
||||
]
|
||||
for pattern in unsupported_patterns:
|
||||
if re.search(pattern, formula, re.IGNORECASE):
|
||||
return False
|
||||
return True
|
||||
|
||||
Reference in New Issue
Block a user