210 lines
7.3 KiB
Python
210 lines
7.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
极简重启守护进程 - 不持有策略状态,只监控和重启
|
|
目标:崩溃后不影响策略,重启后可无缝接管
|
|
"""
|
|
|
|
import sys
|
|
import time
|
|
import psutil
|
|
import logging
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
import threading
|
|
import subprocess
|
|
|
|
|
|
class RestartDaemon:
|
|
"""重启守护进程 - 定时重启策略子进程"""
|
|
|
|
# 每日重启时间点
|
|
RESTART_TIMES = ["08:50", "20:50"]
|
|
|
|
def __init__(self, pid_dir="pids", log_dir="logs"):
|
|
self.pid_dir = Path(pid_dir)
|
|
self.log_dir = Path(log_dir)
|
|
self.logger = self._setup_logger()
|
|
self.running = False
|
|
self.thread = None
|
|
|
|
# 确保目录存在
|
|
self.pid_dir.mkdir(exist_ok=True)
|
|
self.log_dir.mkdir(exist_ok=True) # 确保日志目录存在
|
|
|
|
def _setup_logger(self):
|
|
"""配置日志"""
|
|
log_file = self.log_dir / "restart_daemon.log"
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s [%(levelname)s] %(message)s',
|
|
handlers=[
|
|
logging.FileHandler(log_file, encoding='utf-8'),
|
|
logging.StreamHandler(sys.stdout)
|
|
],
|
|
force=True # 防止日志配置冲突
|
|
)
|
|
logger = logging.getLogger("RestartDaemon")
|
|
logger.info("=" * 80)
|
|
logger.info("📝 日志系统初始化完成")
|
|
logger.info("📂 日志文件: %s", log_file.absolute())
|
|
return logger
|
|
|
|
def start(self):
|
|
"""启动守护进程"""
|
|
if self.running:
|
|
self.logger.warning("⚠️ 守护进程已在运行")
|
|
return
|
|
|
|
self.running = True
|
|
self.thread = threading.Thread(target=self._check_loop, daemon=True)
|
|
self.thread.start()
|
|
|
|
self.logger.info("=" * 80)
|
|
self.logger.info("✅ 重启守护进程已启动")
|
|
self.logger.info("⏰ 监控时间点: %s", ", ".join(self.RESTART_TIMES))
|
|
self.logger.info("📂 PID目录: %s", self.pid_dir.absolute())
|
|
self.logger.info("=" * 80)
|
|
|
|
# 主线程阻塞(保持进程运行)
|
|
try:
|
|
self.logger.info("📌 进程已常驻,按 Ctrl+C 退出...")
|
|
while self.running:
|
|
time.sleep(1)
|
|
except KeyboardInterrupt:
|
|
self.logger.info("\n⏹️ 收到退出信号,正在停止...")
|
|
self.stop()
|
|
|
|
def stop(self):
|
|
"""停止守护进程"""
|
|
self.running = False
|
|
if self.thread:
|
|
self.thread.join(timeout=5)
|
|
self.logger.info("✅ 重启守护进程已停止")
|
|
|
|
def _check_loop(self):
|
|
"""每分钟检查一次重启时间"""
|
|
last_restart_date = {t: None for t in self.RESTART_TIMES}
|
|
|
|
while self.running:
|
|
try:
|
|
now = datetime.now()
|
|
current_time = now.strftime("%H:%M")
|
|
current_date = now.date()
|
|
|
|
# 检查是否到达重启时间点
|
|
if current_time in self.RESTART_TIMES:
|
|
# 防重复:检查今天是否已执行
|
|
if last_restart_date[current_time] != current_date:
|
|
last_restart_date[current_time] = current_date
|
|
self._perform_restart(current_time)
|
|
|
|
time.sleep(60) # 每分钟检查一次
|
|
|
|
except Exception as e:
|
|
self.logger.error("❌ 检查循环出错: %s", e, exc_info=True)
|
|
self.logger.error("=" * 80)
|
|
time.sleep(60) # 出错后等待1分钟继续
|
|
|
|
def _perform_restart(self, time_point: str):
|
|
"""执行重启"""
|
|
self.logger.info("\n" + "=" * 80)
|
|
self.logger.info("⏰ 到达重启时间: %s", time_point)
|
|
self.logger.info("=" * 80)
|
|
|
|
# 1. 扫描所有PID文件
|
|
pid_files = list(self.pid_dir.glob("*.pid"))
|
|
if not pid_files:
|
|
self.logger.info("⚠️ 未发现运行中的策略")
|
|
return
|
|
|
|
self.logger.info("📋 发现 %d 个策略需要重启", len(pid_files))
|
|
|
|
# 2. 停止所有策略
|
|
stopped_count = 0
|
|
for pid_file in pid_files:
|
|
try:
|
|
with open(pid_file, 'r') as f:
|
|
pid = int(f.read().strip())
|
|
|
|
if psutil.pid_exists(pid):
|
|
proc = psutil.Process(pid)
|
|
self.logger.info("⏹️ 停止策略 PID %d: %s", pid, proc.name())
|
|
proc.terminate()
|
|
|
|
try:
|
|
proc.wait(timeout=30)
|
|
self.logger.info("✅ 已优雅停止 PID %d", pid)
|
|
stopped_count += 1
|
|
except psutil.TimeoutExpired:
|
|
proc.kill()
|
|
self.logger.info("🔥 强制终止 PID %d", pid)
|
|
stopped_count += 1
|
|
else:
|
|
self.logger.warning("⚠️ PID文件存在但进程已死: %d", pid)
|
|
except Exception as e:
|
|
self.logger.error("❌ 停止失败 %s: %s", pid_file, e, exc_info=True)
|
|
|
|
if stopped_count == 0:
|
|
self.logger.warning("⚠️ 未成功停止任何策略")
|
|
return
|
|
|
|
# 3. 等待资源释放
|
|
self.logger.info("\n⏳ 等待2秒资源释放...")
|
|
time.sleep(2)
|
|
|
|
# 4. 重新启动策略
|
|
self.logger.info("\n🚀 重新启动所有策略...")
|
|
restarted_count = 0
|
|
for pid_file in pid_files:
|
|
try:
|
|
# 从PID文件名推导配置路径
|
|
# DualModeTrendlineHawkesStrategy2_FG.pid -> strategies/DualModeTrendlineHawkesStrategy2/FG.py
|
|
name = pid_file.stem
|
|
if '_' not in name:
|
|
self.logger.error("❌ PID文件名格式错误: %s", name)
|
|
continue
|
|
|
|
strategy_name, symbol = name.split('_', 1)
|
|
config_file = Path("strategies") / strategy_name / "{}.py".format(symbol)
|
|
|
|
if not config_file.exists():
|
|
self.logger.error("❌ 配置文件不存在: %s", config_file)
|
|
continue
|
|
|
|
# 启动新进程(不阻塞,立即返回)
|
|
process = subprocess.Popen(
|
|
[sys.executable, "launcher.py", "--config", str(config_file)],
|
|
stdout=subprocess.DEVNULL, # launcher内会自行处理日志
|
|
stderr=subprocess.DEVNULL,
|
|
cwd=Path.cwd()
|
|
)
|
|
|
|
self.logger.info("✅ 启动新进程 PID %d: %s", process.pid, config_file.name)
|
|
restarted_count += 1
|
|
|
|
except Exception as e:
|
|
self.logger.error("❌ 启动失败: %s", e, exc_info=True)
|
|
|
|
# 5. 统计结果
|
|
self.logger.info("\n" + "=" * 80)
|
|
self.logger.info("📊 重启统计:")
|
|
self.logger.info(" 停止成功: %d个", stopped_count)
|
|
self.logger.info(" 启动成功: %d个", restarted_count)
|
|
|
|
if stopped_count == restarted_count and stopped_count > 0:
|
|
self.logger.info("✅ 所有策略重启成功")
|
|
else:
|
|
self.logger.warning("⚠️ 部分策略重启失败")
|
|
|
|
self.logger.info("=" * 80)
|
|
|
|
|
|
def main():
|
|
"""主入口"""
|
|
daemon = RestartDaemon()
|
|
daemon.start()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main() |