refactor: 清理代码日志、重构速率限制器、切换存储方案

- 移除 client.py 和 daily.py 中的调试日志 - 重构 rate_limiter 支持无限超时和更精确的令牌获取 - 变更 stock_basic 存储方案 HDF5 → CSV - 更新项目规则：强制使用 uv、禁止读取 config/ 目录 - 新增数据同步模块 sync.py 和测试 - .gitignore 添加 !data/ 允许跟踪数据文件
2026-02-01 02:29:54 +08:00
parent 38e78a5326
commit ec08a2578c
13 changed files with 710 additions and 47 deletions
--- a/src/data/rate_limiter.py
+++ b/src/data/rate_limiter.py
@@ -2,6 +2,7 @@

 This module provides a thread-safe token bucket algorithm for rate limiting.
 """
+
 import time
 import threading
 from typing import Optional
@@ -11,14 +12,12 @@ from dataclasses import dataclass, field
@dataclass
 class RateLimiterStats:
    """Statistics for rate limiter."""
+
    total_requests: int = 0
    successful_requests: int = 0
    denied_requests: int = 0
    total_wait_time: float = 0.0
-    current_tokens: float = field(default=None, init=False)
-
-    def __post_init__(self):
-        self.current_tokens = field(default=None)
+    current_tokens: Optional[float] = None


 class TokenBucketRateLimiter:
@@ -54,13 +53,13 @@ class TokenBucketRateLimiter:
        self._stats = RateLimiterStats()
        self._stats.current_tokens = self.tokens

-    def acquire(self, timeout: float = 30.0) -> tuple[bool, float]:
+    def acquire(self, timeout: float = float("inf")) -> tuple[bool, float]:
        """Acquire a token from the bucket.

        Blocks until a token is available or timeout expires.

        Args:
-            timeout: Maximum time to wait for a token in seconds
+            timeout: Maximum time to wait for a token in seconds (default: inf)

        Returns:
            Tuple of (success, wait_time):
@@ -84,32 +83,58 @@ class TokenBucketRateLimiter:
            tokens_needed = 1 - self.tokens
            time_to_refill = tokens_needed / self.refill_rate

-            if time_to_refill > timeout:
+            # Check if we can wait for the token within timeout
+            # Handle infinite timeout specially
+            is_infinite_timeout = timeout == float("inf")
+            if not is_infinite_timeout and time_to_refill > timeout:
                self._stats.total_requests += 1
                self._stats.denied_requests += 1
                return False, timeout

-            # Wait for tokens
-            self._lock.release()
-            time.sleep(time_to_refill)
-            self._lock.acquire()
+            # Wait for tokens - loop until we get one or timeout
+            while True:
+                # Calculate remaining time we can wait
+                elapsed = time.monotonic() - start_time
+                remaining_timeout = (
+                    timeout - elapsed if not is_infinite_timeout else float("inf")
+                )

-            wait_time = time.monotonic() - start_time
+                # Check if we've exceeded timeout
+                if not is_infinite_timeout and remaining_timeout <= 0:
+                    self._stats.total_requests += 1
+                    self._stats.denied_requests += 1
+                    return False, elapsed

-            with self._lock:
+                # Calculate wait time for next token
+                tokens_needed = max(0, 1 - self.tokens)
+                time_to_wait = (
+                    tokens_needed / self.refill_rate if tokens_needed > 0 else 0.1
+                )
+
+                # If we can't wait long enough, fail
+                if not is_infinite_timeout and time_to_wait > remaining_timeout:
+                    self._stats.total_requests += 1
+                    self._stats.denied_requests += 1
+                    return False, elapsed
+
+                # Wait outside the lock to allow other threads to refill
+                self._lock.release()
+                time.sleep(
+                    min(time_to_wait, 0.1)
+                )  # Cap wait to 100ms to check frequently
+                self._lock.acquire()
+
+                # Refill and check again
                self._refill()
                if self.tokens >= 1:
                    self.tokens -= 1
+                    wait_time = time.monotonic() - start_time
                    self._stats.total_requests += 1
                    self._stats.successful_requests += 1
                    self._stats.total_wait_time += wait_time
                    self._stats.current_tokens = self.tokens
                    return True, wait_time

-            self._stats.total_requests += 1
-            self._stats.denied_requests += 1
-            return False, wait_time
-
    def acquire_nonblocking(self) -> tuple[bool, float]:
        """Try to acquire a token without blocking.