refactor(factor): 完全重构因子计算框架 - 引入DSL表达式系统

- 删除旧因子框架:移除 base.py、composite.py、data_loader.py、data_spec.py
  及所有子模块(momentum、financial、quality、sentiment等)
- 新增DSL表达式系统:实现 factor DSL 编译器和翻译器
  - dsl.py: 领域特定语言定义
  - compiler.py: AST编译与优化
  - translator.py: Polars表达式翻译
  - api.py: 统一API接口
- 新增数据路由层:data_router.py 实现字段到表的动态路由
- 新增API封装:api_pro_bar.py 提供pro_bar数据接口
- 更新执行引擎:engine.py 适配新的DSL架构
- 重构测试体系:删除旧测试,新增 test_dsl_promotion.py、
  test_factor_integration.py、test_pro_bar.py
- 清理文档:删除8个过时文档(factor_design、db_sync_guide等)
This commit is contained in:
2026-02-27 22:22:23 +08:00
parent c3c20ed7ea
commit a56433e440
51 changed files with 667 additions and 11287 deletions

View File

@@ -1,35 +1,35 @@
"""Token bucket rate limiter implementation.
"""API 速率限制器实现。
This module provides a thread-safe token bucket algorithm for rate limiting.
提供基于固定时间窗口的速率限制,适合 Tushare 等按分钟计费的 API。
"""
import time
import threading
from typing import Optional
from dataclasses import dataclass, field
from dataclasses import dataclass
@dataclass
class RateLimiterStats:
"""Statistics for rate limiter."""
"""速率限制器统计信息。"""
total_requests: int = 0
successful_requests: int = 0
denied_requests: int = 0
total_wait_time: float = 0.0
current_tokens: Optional[float] = None
current_window_requests: int = 0
window_start_time: float = 0.0
class TokenBucketRateLimiter:
"""Thread-safe token bucket rate limiter.
"""基于固定时间窗口的速率限制器。
Implements a token bucket algorithm for controlling request rate.
Tokens are added at a fixed rate up to the bucket capacity.
适合 Tushare 等按时间窗口(如每分钟)限制请求数的 API 场景。
在窗口期内,请求数达到上限后将阻塞或等待下一个窗口。
Attributes:
capacity: Maximum number of tokens in the bucket
refill_rate: Number of tokens added per second
initial_tokens: Initial number of tokens (default: capacity)
capacity: 每个时间窗口内允许的最大请求数
window_seconds: 时间窗口长度(秒)
"""
def __init__(
@@ -38,155 +38,157 @@ class TokenBucketRateLimiter:
refill_rate_per_second: float = 1.67,
initial_tokens: Optional[int] = None,
) -> None:
"""Initialize the token bucket rate limiter.
"""初始化速率限制器。
Args:
capacity: Maximum token capacity
refill_rate_per_second: Token refill rate per second
initial_tokens: Initial token count (default: capacity)
capacity: 每个时间窗口内允许的最大请求数
refill_rate_per_second: 保留参数(向后兼容),实际使用 window_seconds=60
initial_tokens: 保留参数(向后兼容)
"""
self.capacity = capacity
self.refill_rate = refill_rate_per_second
self.tokens = float(initial_tokens if initial_tokens is not None else capacity)
self.last_refill_time = time.monotonic()
# Tushare 通常按分钟限制,所以固定使用 60 秒窗口
self.window_seconds = 60.0
self._requests_in_window = 0
self._window_start = time.monotonic()
self._lock = threading.RLock()
self._stats = RateLimiterStats()
self._stats.current_tokens = self.tokens
self._stats.window_start_time = self._window_start
def _is_new_window(self) -> bool:
"""检查是否已进入新的时间窗口。"""
current_time = time.monotonic()
elapsed = current_time - self._window_start
return elapsed >= self.window_seconds
def _reset_window(self) -> None:
"""重置时间窗口。"""
self._window_start = time.monotonic()
self._requests_in_window = 0
self._stats.window_start_time = self._window_start
def acquire(self, timeout: float = float("inf")) -> tuple[bool, float]:
"""Acquire a token from the bucket.
"""获取请求许可。
Blocks until a token is available or timeout expires.
如果在当前窗口内请求数已达上限,则等待到下一个窗口。
Args:
timeout: Maximum time to wait for a token in seconds (default: inf)
timeout: 最大等待时间(秒),默认无限等待
Returns:
Tuple of (success, wait_time):
- success: True if token was acquired, False if timed out
- wait_time: Time spent waiting for token
(success, wait_time): 是否成功获取许可,以及等待时间
"""
start_time = time.monotonic()
wait_time = 0.0
with self._lock:
self._refill()
# 检查是否需要进入新窗口
if self._is_new_window():
self._reset_window()
if self.tokens >= 1:
self.tokens -= 1
# 如果当前窗口还有余量,直接通过
if self._requests_in_window < self.capacity:
self._requests_in_window += 1
self._stats.total_requests += 1
self._stats.successful_requests += 1
self._stats.current_tokens = self.tokens
self._stats.current_window_requests = self._requests_in_window
return True, 0.0
# Calculate time to wait for next token
tokens_needed = 1 - self.tokens
time_to_refill = tokens_needed / self.refill_rate
# 当前窗口已满,计算需要等待的时间
current_time = time.monotonic()
time_to_next_window = self.window_seconds - (
current_time - self._window_start
)
# Check if we can wait for the token within timeout
# Handle infinite timeout specially
is_infinite_timeout = timeout == float("inf")
if not is_infinite_timeout and time_to_refill > timeout:
if time_to_next_window <= 0:
# 刚好进入新窗口
self._reset_window()
self._requests_in_window = 1
self._stats.total_requests += 1
self._stats.successful_requests += 1
self._stats.current_window_requests = 1
return True, 0.0
# 检查是否能在超时时间内等待
if timeout != float("inf") and time_to_next_window > timeout:
self._stats.total_requests += 1
self._stats.denied_requests += 1
return False, timeout
# Wait for tokens - loop until we get one or timeout
while True:
# Calculate remaining time we can wait
elapsed = time.monotonic() - start_time
remaining_timeout = (
timeout - elapsed if not is_infinite_timeout else float("inf")
)
# 需要等待到下一个窗口
if timeout != float("inf"):
time_to_wait = min(time_to_next_window, timeout)
else:
time_to_wait = time_to_next_window
# Check if we've exceeded timeout
if not is_infinite_timeout and remaining_timeout <= 0:
self._stats.total_requests += 1
self._stats.denied_requests += 1
return False, elapsed
time.sleep(time_to_wait)
# Calculate wait time for next token
tokens_needed = max(0, 1 - self.tokens)
time_to_wait = (
tokens_needed / self.refill_rate if tokens_needed > 0 else 0.1
)
# If we can't wait long enough, fail
if not is_infinite_timeout and time_to_wait > remaining_timeout:
self._stats.total_requests += 1
self._stats.denied_requests += 1
return False, elapsed
# Wait outside the lock to allow other threads to refill
self._lock.release()
time.sleep(
min(time_to_wait, 0.1)
) # Cap wait to 100ms to check frequently
self._lock.acquire()
# Refill and check again
self._refill()
if self.tokens >= 1:
self.tokens -= 1
wait_time = time.monotonic() - start_time
self._stats.total_requests += 1
self._stats.successful_requests += 1
self._stats.total_wait_time += wait_time
self._stats.current_tokens = self.tokens
return True, wait_time
def acquire_nonblocking(self) -> tuple[bool, float]:
"""Try to acquire a token without blocking.
Returns:
Tuple of (success, wait_time):
- success: True if token was acquired, False otherwise
- wait_time: 0 for non-blocking, or required wait time if failed
"""
# 重新尝试获取许可
with self._lock:
self._refill()
# 再次检查窗口状态(可能其他线程已经重置了窗口)
if self._is_new_window():
self._reset_window()
if self.tokens >= 1:
self.tokens -= 1
if self._requests_in_window < self.capacity:
self._requests_in_window += 1
wait_time = time.monotonic() - start_time
self._stats.total_requests += 1
self._stats.successful_requests += 1
self._stats.current_tokens = self.tokens
self._stats.total_wait_time += wait_time
self._stats.current_window_requests = self._requests_in_window
return True, wait_time
else:
# 在极端情况下,等待后仍然无法获取(其他线程抢先)
wait_time = time.monotonic() - start_time
self._stats.total_requests += 1
self._stats.denied_requests += 1
return False, wait_time
def acquire_nonblocking(self) -> tuple[bool, float]:
"""尝试非阻塞地获取请求许可。
Returns:
(success, wait_time): 是否成功获取许可,以及需要等待的时间
"""
with self._lock:
# 检查是否需要进入新窗口
if self._is_new_window():
self._reset_window()
# 如果当前窗口还有余量,直接通过
if self._requests_in_window < self.capacity:
self._requests_in_window += 1
self._stats.total_requests += 1
self._stats.successful_requests += 1
self._stats.current_window_requests = self._requests_in_window
return True, 0.0
# Calculate time needed
tokens_needed = 1 - self.tokens
time_to_refill = tokens_needed / self.refill_rate
# 当前窗口已满,计算需要等待的时间
current_time = time.monotonic()
time_to_next_window = self.window_seconds - (
current_time - self._window_start
)
self._stats.total_requests += 1
self._stats.denied_requests += 1
return False, time_to_refill
def _refill(self) -> None:
"""Refill tokens based on elapsed time."""
current_time = time.monotonic()
elapsed = current_time - self.last_refill_time
self.last_refill_time = current_time
tokens_to_add = elapsed * self.refill_rate
self.tokens = min(self.capacity, self.tokens + tokens_to_add)
return False, max(0.0, time_to_next_window)
def get_available_tokens(self) -> float:
"""Get the current number of available tokens.
"""获取当前窗口剩余可用请求数。
Returns:
Current token count
当前窗口剩余可用请求数
"""
with self._lock:
self._refill()
return self.tokens
if self._is_new_window():
return float(self.capacity)
return float(self.capacity - self._requests_in_window)
def get_stats(self) -> RateLimiterStats:
"""Get rate limiter statistics.
"""获取速率限制器统计信息。
Returns:
RateLimiterStats instance
RateLimiterStats 实例
"""
with self._lock:
self._refill()
self._stats.current_tokens = self.tokens
self._stats.current_window_requests = self._requests_in_window
return self._stats