diff --git a/README.md b/README.md index 0d90622..f7feafa 100644 --- a/README.md +++ b/README.md @@ -44,8 +44,8 @@ 系统将自动注册名为idc-monitor的system服务 +注意:若您的服务器必须要求禁ping,程序可能无法正常运行,在安装脚本前,请手动将config.yml中的WAY改为http,然后再启动安装脚本 + **卸载:** 1. chmod +x ./uninstall.sh 2. ./uninstall.sh - -### 配置信息 \ No newline at end of file diff --git a/app/config.yml b/app/config.yml new file mode 100644 index 0000000..629d18c --- /dev/null +++ b/app/config.yml @@ -0,0 +1,7 @@ +ACCOUNT: '' # 填写核云IDC账号(手机号或邮箱) +API_KEY: '' # 填写核云IDC API密钥 +WAY: ping # 填写检测方式:ping 或 http,默认为ping +DOMAIN: '' # 当WAY为http时,填写要检测的域名(多个域名用英文逗号分隔) +SPAN: 300 # 监控间隔时间(秒),默认300秒(5分钟) +EXCEPTION_IPS: [] # 例外IP列表,这些IP关机时不会自动开机(例如:["1.2.3.4", "5.6.7.8"]) +JWT: '' # JWT Token(自动管理,无需手动填写) \ No newline at end of file diff --git a/app/install.sh b/app/install.sh index a2bfe0b..9e7b849 100644 --- a/app/install.sh +++ b/app/install.sh @@ -1,21 +1,30 @@ #!/bin/bash # 核云IDC服务商VPS自动监测重启程序 - 安装脚本 -# 该脚本会创建一个systemd服务,持续化运行当前目录下的python main.py命令 +# 该脚本会检查配置并创建systemd服务,持续化运行monitor.py + +# 颜色定义 +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color # 获取当前脚本所在目录的绝对路径 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_DIR="$(dirname "$SCRIPT_DIR")" # 定义服务名称 SERVICE_NAME="idc-monitor" SERVICE_FILE="/etc/systemd/system/${SERVICE_NAME}.service" +CONFIG_FILE="${SCRIPT_DIR}/config.yml" -echo "正在安装 ${SERVICE_NAME} 服务..." +echo "==========================================" +echo " 核云IDC VPS监控程序 - 安装向导" +echo "==========================================" +echo "" # 检查是否以root权限运行 if [ "$EUID" -ne 0 ]; then - echo "错误: 请以root权限运行此脚本 (sudo ./install.sh)" + echo -e "${RED}错误: 请以root权限运行此脚本 (sudo ./install.sh)${NC}" exit 1 fi @@ -24,123 +33,276 @@ if ! command -v python3 &> /dev/null; then if command -v python &> /dev/null; then PYTHON_CMD="python" else - echo "错误: 未找到Python,请先安装Python" + echo -e "${RED}错误: 未找到Python,请先安装Python 3${NC}" exit 1 fi else PYTHON_CMD="python3" fi -echo "检测到Python: ${PYTHON_CMD}" - -# 检查并安装pip -install_pip() { - echo "正在安装pip..." - - # 尝试使用系统包管理器安装pip - if command -v apt-get &> /dev/null; then - apt-get update && apt-get install -y python3-pip - PIP_CMD="pip3" - elif command -v yum &> /dev/null; then - yum install -y python3-pip - PIP_CMD="pip3" - elif command -v dnf &> /dev/null; then - dnf install -y python3-pip - PIP_CMD="pip3" - else - # 如果包管理器不可用,使用get-pip.py - echo "未检测到常用包管理器,尝试使用get-pip.py安装..." - curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py - ${PYTHON_CMD} get-pip.py - rm -f get-pip.py - PIP_CMD="pip" - fi - - # 验证pip安装 - if command -v pip3 &> /dev/null; then - PIP_CMD="pip3" - elif command -v pip &> /dev/null; then - PIP_CMD="pip" - else - echo "错误: pip安装失败" - return 1 - fi - - echo "✅ pip安装成功: ${PIP_CMD}" - return 0 -} +echo -e "${GREEN}✅${NC} 检测到Python: ${PYTHON_CMD}" # 检查pip是否存在 if command -v pip3 &> /dev/null; then PIP_CMD="pip3" - echo "检测到pip: ${PIP_CMD}" elif command -v pip &> /dev/null; then PIP_CMD="pip" - echo "检测到pip: ${PIP_CMD}" else - echo "警告: 未检测到pip" + echo -e "${YELLOW}警告: 未检测到pip${NC}" read -p "是否现在安装pip?(Y/n): " -n 1 -r echo if [[ $REPLY =~ ^[Nn]$ ]]; then - echo "跳过pip安装" - PIP_CMD="" + echo -e "${RED}错误: pip不可用,无法继续${NC}" + exit 1 else - if ! install_pip; then - echo "错误: pip安装失败,无法继续" + # 安装pip + if command -v apt-get &> /dev/null; then + apt-get update && apt-get install -y python3-pip + elif command -v yum &> /dev/null; then + yum install -y python3-pip + elif command -v dnf &> /dev/null; then + dnf install -y python3-pip + else + curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py + ${PYTHON_CMD} get-pip.py + rm -f get-pip.py + fi + + if command -v pip3 &> /dev/null; then + PIP_CMD="pip3" + elif command -v pip &> /dev/null; then + PIP_CMD="pip" + else + echo -e "${RED}错误: pip安装失败${NC}" exit 1 fi fi fi -# 安装Python依赖 -if [ -f "${PROJECT_DIR}/requirements.txt" ]; then - if [ -n "${PIP_CMD}" ]; then - echo "正在安装Python依赖包..." - cd ${PROJECT_DIR} - if ${PIP_CMD} install -r requirements.txt; then - echo "✅ Python依赖包安装成功" - else - echo "❌ Python依赖包安装失败" - read -p "是否继续安装服务?(y/N): " -n 1 -r - echo - if [[ ! $REPLY =~ ^[Yy]$ ]]; then - echo "安装已取消" - exit 1 - fi - fi - else - echo "警告: 跳过依赖包安装(pip不可用)" - fi -else - echo "警告: ${PROJECT_DIR}/requirements.txt 文件不存在,跳过依赖安装" +echo -e "${GREEN}✅${NC} 检测到pip: ${PIP_CMD}" + +# 检查配置文件是否存在 +if [ ! -f "${CONFIG_FILE}" ]; then + echo -e "${RED}错误: 配置文件不存在: ${CONFIG_FILE}${NC}" + exit 1 fi -# 检查main.py是否存在 -if [ ! -f "${PROJECT_DIR}/main.py" ]; then - echo "警告: ${PROJECT_DIR}/main.py 文件不存在" - echo "请确保main.py文件位于项目根目录下" - read -p "是否继续安装?(y/N): " -n 1 -r - echo - if [[ ! $REPLY =~ ^[Yy]$ ]]; then - echo "安装已取消" +# 读取现有配置 +echo "" +echo "正在检查配置文件..." + +# 使用grep解析YAML配置 +ACCOUNT=$(grep "^ACCOUNT:" "${CONFIG_FILE}" | sed 's/ACCOUNT:[[:space:]]*//' | sed 's/[[:space:]]*#.*//' | tr -d "'" | tr -d '"') +API_KEY=$(grep "^API_KEY:" "${CONFIG_FILE}" | sed 's/API_KEY:[[:space:]]*//' | sed 's/[[:space:]]*#.*//' | tr -d "'" | tr -d '"') +WAY=$(grep "^WAY:" "${CONFIG_FILE}" | sed 's/WAY:[[:space:]]*//' | sed 's/[[:space:]]*#.*//' | tr -d "'" | tr -d '"') +DOMAIN=$(grep "^DOMAIN:" "${CONFIG_FILE}" | sed 's/DOMAIN:[[:space:]]*//' | sed 's/[[:space:]]*#.*//' | tr -d "'" | tr -d '"') +SPAN=$(grep "^SPAN:" "${CONFIG_FILE}" | sed 's/SPAN:[[:space:]]*//' | sed 's/[[:space:]]*#.*//' | tr -d "'" | tr -d '"') +EXCEPTION_IPS=$(grep "^EXCEPTION_IPS:" "${CONFIG_FILE}" | sed 's/EXCEPTION_IPS:[[:space:]]*//' | sed 's/[[:space:]]*#.*//' | tr -d "'" | tr -d '"') + +NEED_UPDATE=false + +# 检查ACCOUNT +if [ -z "$ACCOUNT" ]; then + echo "" + echo "==================================================" + echo -e -n "${RED}请输入核云IDC账号(手机号或邮箱): ${NC}" + read ACCOUNT + if [ -z "$ACCOUNT" ]; then + echo -e "${RED}错误: 账号不能为空${NC}" exit 1 fi + NEED_UPDATE=true fi -# ... existing code ... +# 检查API_KEY +if [ -z "$API_KEY" ]; then + echo "" + echo "==================================================" + echo -e -n "${RED}请输入核云IDC API密钥: ${NC}" + read API_KEY + if [ -z "$API_KEY" ]; then + echo -e "${RED}错误: API密钥不能为空${NC}" + exit 1 + fi + NEED_UPDATE=true +fi + +# 检查WAY +if [ -z "$WAY" ]; then + echo "" + echo "==================================================" + echo -e "${RED}请选择检测方式:${NC}" + echo " 1. ping - Ping检测IP地址(默认)" + echo " 2. http - HTTP检测域名" + echo -e -n "${RED}请输入选项 1/2,直接回车默认为1: ${NC}" + read WAY_CHOICE + + if [ "$WAY_CHOICE" = "2" ]; then + WAY="http" + else + WAY="ping" + fi + NEED_UPDATE=true +fi + +# 如果WAY为http,检查DOMAIN +if [ "$WAY" = "http" ] && [ -z "$DOMAIN" ]; then + echo "" + echo "==================================================" + echo -e "${RED}请输入要检测的域名(多个域名用英文逗号分隔):${NC}" + echo "例如: example.com,test.com,demo.com" + echo -e -n "${RED}域名: ${NC}" + read DOMAIN + if [ -z "$DOMAIN" ]; then + echo -e "${RED}错误: 域名不能为空${NC}" + exit 1 + fi + NEED_UPDATE=true +fi + +# 检查SPAN +if [ -z "$SPAN" ]; then + echo "" + echo "==================================================" + echo -e -n "${RED}请输入监控间隔时间(秒,直接回车默认300秒): ${NC}" + read SPAN_INPUT + if [ -n "$SPAN_INPUT" ]; then + if [[ "$SPAN_INPUT" =~ ^[0-9]+$ ]]; then + if [ "$SPAN_INPUT" -lt 60 ]; then + echo -e "${RED}警告: 间隔时间过短,建议至少60秒${NC}" + fi + SPAN=$SPAN_INPUT + else + echo -e "${RED}警告: 输入无效,使用默认值300秒${NC}" + SPAN=300 + fi + else + SPAN=300 + fi + NEED_UPDATE=true +fi + +# 询问是否设置例外IP +echo "" +echo "==================================================" +echo -e "${RED}是否设置例外IP列表?(这些IP关机时不会自动开机)${NC}" +echo -e -n "${RED}请输入例外IP,多个IP用英文逗号分隔,直接回车跳过: ${NC}" +read EXCEPTION_INPUT + +if [ -n "$EXCEPTION_INPUT" ]; then + EXCEPTION_IPS="$EXCEPTION_INPUT" + NEED_UPDATE=true +else + if [ -z "$EXCEPTION_IPS" ]; then + EXCEPTION_IPS="" + fi +fi + +# 保存配置 +if [ "$NEED_UPDATE" = true ]; then + echo "" + echo "==================================================" + + # 使用Python更新YAML文件 + ${PYTHON_CMD} << PYEOF +import yaml + +config_file = "${CONFIG_FILE}" + +try: + with open(config_file, 'r', encoding='utf-8') as f: + config = yaml.safe_load(f) +except: + config = {} + +config['ACCOUNT'] = "${ACCOUNT}" +config['API_KEY'] = "${API_KEY}" +config['WAY'] = "${WAY}" +if "${WAY}" == "http": + config['DOMAIN'] = "${DOMAIN}" +config['SPAN'] = ${SPAN} + +# 处理例外IP +exception_ips_str = "${EXCEPTION_IPS}" +if exception_ips_str: + config['EXCEPTION_IPS'] = [ip.strip() for ip in exception_ips_str.split(',') if ip.strip()] +else: + config['EXCEPTION_IPS'] = [] + +# 清除JWT字段(重新生成) +if 'JWT' in config: + del config['JWT'] + +with open(config_file, 'w', encoding='utf-8') as f: + yaml.dump(config, f, allow_unicode=True, default_flow_style=False) + +print("\033[32m✅ 配置已保存\033[0m") +PYEOF + + if [ $? -ne 0 ]; then + echo -e "${RED}错误: 配置保存失败${NC}" + exit 1 + fi +else + echo -e "${GREEN}✅${NC} 配置检查完成,无需更新" +fi + +echo "" +echo "当前配置:" +echo " 账号: ${ACCOUNT}" +echo " 检测方式: ${WAY}" +if [ "$WAY" = "http" ]; then + echo " 域名: ${DOMAIN}" +fi +echo " 监控间隔: ${SPAN}秒" +if [ -n "$EXCEPTION_IPS" ]; then + echo " 例外IP: ${EXCEPTION_IPS}" +else + echo " 例外IP: 无" +fi +echo "" + +# 安装Python依赖 +if [ -f "${SCRIPT_DIR}/requirements.txt" ]; then + echo "正在安装Python依赖包..." + cd ${SCRIPT_DIR} + if ${PIP_CMD} install -r requirements.txt; then + echo -e "${GREEN}✅${NC} Python依赖包安装成功" + else + echo -e "${RED}❌${NC} Python依赖包安装失败" + exit 1 + fi +else + echo -e "${YELLOW}警告: requirements.txt 文件不存在${NC}" + exit 1 +fi + +# 检查monitor.py是否存在 +if [ ! -f "${SCRIPT_DIR}/monitor.py" ]; then + echo -e "${RED}错误: monitor.py 文件不存在${NC}" + exit 1 +fi + +# 创建日志目录 +LOG_DIR="${SCRIPT_DIR}/logs" +mkdir -p ${LOG_DIR} +chmod 755 ${LOG_DIR} + +echo "" +echo "正在创建systemd服务..." # 创建systemd服务文件 cat > ${SERVICE_FILE} << EOF [Unit] -Description=Heyun IDC Monitor Service +Description=Heyun IDC VPS Monitor Service After=network.target Wants=network-online.target [Service] Type=simple User=root -WorkingDirectory=${PROJECT_DIR} -ExecStart=${PYTHON_CMD} ${PROJECT_DIR}/main.py +WorkingDirectory=${SCRIPT_DIR} +ExecStart=${PYTHON_CMD} ${SCRIPT_DIR}/monitor.py Restart=always RestartSec=10 StandardOutput=journal @@ -153,14 +315,12 @@ LimitNOFILE=65536 # 安全设置 NoNewPrivileges=true ProtectSystem=strict -ReadWritePaths=${PROJECT_DIR} +ReadWritePaths=${SCRIPT_DIR} [Install] WantedBy=multi-user.target EOF -# ... existing code ... - # 重新加载systemd配置 systemctl daemon-reload @@ -168,24 +328,37 @@ systemctl daemon-reload systemctl enable ${SERVICE_NAME} # 启动服务 +echo "" +echo "正在启动服务..." systemctl start ${SERVICE_NAME} # 检查服务状态 +sleep 2 if systemctl is-active --quiet ${SERVICE_NAME}; then - echo "✅ ${SERVICE_NAME} 服务已成功安装并启动" - echo "服务状态: 运行中" + echo -e "${GREEN}✅${NC} ${SERVICE_NAME} 服务已成功安装并启动" + echo -e "${GREEN}✅${NC} 服务状态: 运行中" else - echo "❌ ${SERVICE_NAME} 服务启动失败" - echo "请检查日志: journalctl -u ${SERVICE_NAME} -f" + echo -e "${RED}❌${NC} ${SERVICE_NAME} 服务启动失败" + echo "" + echo "请检查日志:" + echo " journalctl -u ${SERVICE_NAME} -n 50 --no-pager" exit 1 fi echo "" -echo "常用命令:" -echo " 查看服务状态: systemctl status ${SERVICE_NAME}" -echo " 查看实时日志: journalctl -u ${SERVICE_NAME} -f" -echo " 停止服务: systemctl stop ${SERVICE_NAME}" -echo " 重启服务: systemctl restart ${SERVICE_NAME}" -echo " 卸载服务: ./uninstall.sh" +echo "==========================================" +echo -e "${GREEN}安装完成!${NC}" +echo "==========================================" echo "" -echo "安装完成!" \ No newline at end of file +echo "常用命令:" +echo -e " ${YELLOW}查看服务状态:${NC} systemctl status ${SERVICE_NAME}" +echo -e " ${YELLOW}查看实时日志:${NC} journalctl -u ${SERVICE_NAME} -f" +echo -e " ${YELLOW}查看正常日志:${NC} tail -f ${LOG_DIR}/monitor.log" +echo -e " ${YELLOW}查看异常日志:${NC} tail -f ${LOG_DIR}/error.log" +echo -e " ${YELLOW}停止服务:${NC} systemctl stop ${SERVICE_NAME}" +echo -e " ${YELLOW}重启服务:${NC} systemctl restart ${SERVICE_NAME}" +echo -e " ${YELLOW}卸载服务:${NC} cd ${SCRIPT_DIR} && ./uninstall.sh" +echo "" +echo "配置文件位置: ${CONFIG_FILE}" +echo "日志文件位置: ${LOG_DIR}" +echo "" \ No newline at end of file diff --git a/app/monitor.py b/app/monitor.py index e69de29..97263b0 100644 --- a/app/monitor.py +++ b/app/monitor.py @@ -0,0 +1,564 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +核云IDC VPS自动监测重启程序 +功能:实时监测服务器状态,发现关机自动开机 +""" + +import os +import sys +import time +import yaml +import json +import logging +import subprocess +import requests +from datetime import datetime +from pathlib import Path + + +class IDCMonitor: + """核云IDC监控器""" + + def __init__(self, config_path=None): + """初始化监控器""" + # 获取配置文件路径 + if config_path is None: + config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.yml') + + self.config_path = config_path + self.config = {} + self.base_url = "https://www.heyunidc.cn/v1" + self.jwt_token = None + self.retry_count = 0 # 开机重试次数 + + # 创建日志目录 + log_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'logs') + os.makedirs(log_dir, exist_ok=True) + + # 配置日志 + self.setup_logging(log_dir) + + # 加载配置 + self.load_config() + + self.logger.info("=" * 60) + self.logger.info("核云IDC VPS监控程序启动") + self.logger.info(f"监控方式: {self.config.get('WAY', 'ping')}") + self.logger.info(f"监控间隔: {self.config.get('SPAN', 300)}秒") + self.logger.info("=" * 60) + + def setup_logging(self, log_dir): + """配置日志系统""" + # 创建logger + self.logger = logging.getLogger('IDCMonitor') + self.logger.setLevel(logging.DEBUG) + + # 清除已有handler + self.logger.handlers.clear() + + # 正常日志文件 + normal_log = os.path.join(log_dir, 'monitor.log') + normal_handler = logging.FileHandler(normal_log, encoding='utf-8') + normal_handler.setLevel(logging.INFO) + normal_format = logging.Formatter( + '%(asctime)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) + normal_handler.setFormatter(normal_format) + self.logger.addHandler(normal_handler) + + # 异常日志文件 + error_log = os.path.join(log_dir, 'error.log') + error_handler = logging.FileHandler(error_log, encoding='utf-8') + error_handler.setLevel(logging.WARNING) + error_format = logging.Formatter( + '%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) + error_handler.setFormatter(error_format) + self.logger.addHandler(error_handler) + + # 控制台输出 + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setLevel(logging.INFO) + console_format = logging.Formatter( + '%(asctime)s - %(levelname)s - %(message)s', + datefmt='%H:%M:%S' + ) + console_handler.setFormatter(console_format) + self.logger.addHandler(console_handler) + + def load_config(self): + """加载配置文件""" + try: + with open(self.config_path, 'r', encoding='utf-8') as f: + self.config = yaml.safe_load(f) + + # 验证必要配置 + required_keys = ['ACCOUNT', 'API_KEY', 'WAY'] + for key in required_keys: + if key not in self.config or not self.config[key]: + raise ValueError(f"配置文件中缺少必要项: {key}") + + # 设置默认值 + self.config.setdefault('SPAN', 300) + self.config.setdefault('JWT', '') + self.config.setdefault('EXCEPTION_IPS', []) + + # 如果WAY为http,检查DOMAIN + if self.config['WAY'] == 'http': + if 'DOMAIN' not in self.config or not self.config['DOMAIN']: + raise ValueError("WAY为http时,必须配置DOMAIN") + + # 加载JWT token + if self.config.get('JWT'): + self.jwt_token = self.config['JWT'] + self.logger.info("已加载缓存的JWT Token") + + # 加载例外IP列表 + self.exception_ips = self.config.get('EXCEPTION_IPS', []) + if self.exception_ips: + self.logger.info(f"已加载 {len(self.exception_ips)} 个例外IP") + + self.logger.info("配置文件加载成功") + + except FileNotFoundError: + self.logger.error(f"配置文件不存在: {self.config_path}") + sys.exit(1) + except Exception as e: + self.logger.error(f"配置文件加载失败: {str(e)}") + sys.exit(1) + + def save_jwt_token(self, token): + """保存JWT Token到配置文件""" + try: + self.config['JWT'] = token + with open(self.config_path, 'w', encoding='utf-8') as f: + yaml.dump(self.config, f, allow_unicode=True, default_flow_style=False) + self.jwt_token = token + self.logger.debug("JWT Token已保存到配置文件") + except Exception as e: + self.logger.error(f"保存JWT Token失败: {str(e)}") + + def get_login_token(self): + """获取登录Token""" + try: + url = f"{self.base_url}/login_api" + data = { + 'account': self.config['ACCOUNT'], + 'password': self.config['API_KEY'] + } + + response = requests.post(url, data=data, timeout=10) + + if response.status_code == 200: + result = response.json() + if result.get('status') == 200 and 'jwt' in result: + token = result['jwt'] + self.save_jwt_token(token) + self.logger.info("成功获取新的JWT Token") + return token + else: + self.logger.error(f"登录失败: {result.get('msg', '未知错误')}") + return None + else: + self.logger.error(f"登录请求失败,HTTP状态码: {response.status_code}") + return None + + except Exception as e: + self.logger.error(f"获取Token异常: {str(e)}") + return None + + def get_headers(self): + """获取请求头(包含JWT)""" + if not self.jwt_token: + self.jwt_token = self.get_login_token() + if not self.jwt_token: + return None + + return { + 'Authorization': f'JWT {self.jwt_token}', + 'Content-Type': 'application/json' + } + + def get_vps_list(self): + """获取VPS列表""" + try: + headers = self.get_headers() + if not headers: + return None + + url = f"{self.base_url}/hosts?page=1&limit=100" + response = requests.get(url, headers=headers, timeout=10) + + if response.status_code == 200: + result = response.json() + if result.get('status') == 200: + self.logger.debug(f"成功获取VPS列表,共{result['data']['total']}台") + return result['data'] + elif result.get('status') == 405: + self.logger.warning("Token失效,重新获取Token") + self.jwt_token = None + return self.get_vps_list() # 递归调用重试 + else: + self.logger.error(f"获取VPS列表失败: {result.get('msg', '未知错误')}") + return None + else: + self.logger.error(f"获取VPS列表请求失败,HTTP状态码: {response.status_code}") + return None + + except Exception as e: + self.logger.error(f"获取VPS列表异常: {str(e)}") + return None + + def get_vps_status(self, host_id): + """获取指定VPS的状态""" + try: + headers = self.get_headers() + if not headers: + return None + + url = f"{self.base_url}/hosts/{host_id}/module/status?type=host" + response = requests.get(url, headers=headers, timeout=10) + + if response.status_code == 200: + result = response.json() + if result.get('status') == 200: + return result['data'] + elif result.get('status') == 405: + self.logger.warning("Token失效,重新获取Token") + self.jwt_token = None + return self.get_vps_status(host_id) # 递归调用重试 + else: + self.logger.error(f"获取VPS {host_id} 状态失败: {result.get('msg', '未知错误')}") + return None + else: + self.logger.error(f"获取VPS {host_id} 状态请求失败,HTTP状态码: {response.status_code}") + return None + + except Exception as e: + self.logger.error(f"获取VPS {host_id} 状态异常: {str(e)}") + return None + + def power_on_vps(self, host_id): + """开机指定VPS""" + try: + headers = self.get_headers() + if not headers: + return False + + url = f"{self.base_url}/hosts/{host_id}/module/on" + response = requests.put(url, headers=headers, timeout=10) + + if response.status_code == 200: + result = response.json() + if result.get('status') == 200: + self.logger.info(f"VPS {host_id} 开机指令发送成功") + return True + elif result.get('status') == 405: + self.logger.warning("Token失效,重新获取Token") + self.jwt_token = None + return self.power_on_vps(host_id) # 递归调用重试 + else: + self.logger.error(f"VPS {host_id} 开机失败: {result.get('msg', '未知错误')}") + return False + else: + self.logger.error(f"VPS {host_id} 开机请求失败,HTTP状态码: {response.status_code}") + return False + + except Exception as e: + self.logger.error(f"VPS {host_id} 开机异常: {str(e)}") + return False + + def ping_host(self, ip_address): + """Ping检测主机是否存活""" + try: + # Windows和Linux的ping命令参数不同 + if sys.platform == 'win32': + cmd = ['ping', '-n', '1', '-w', '2000', ip_address] + else: + cmd = ['ping', '-c', '1', '-W', '2', ip_address] + + result = subprocess.run( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + timeout=5 + ) + + return result.returncode == 0 + + except Exception as e: + self.logger.debug(f"Ping {ip_address} 异常: {str(e)}") + return False + + def check_http_host(self, domain): + """HTTP HEAD检测域名是否存活""" + try: + response = requests.head( + f"http://{domain}", + timeout=5, + allow_redirects=True + ) + return response.status_code < 400 + except Exception as e: + self.logger.debug(f"HTTP检测 {domain} 异常: {str(e)}") + return False + + def detect_hosts(self): + """检测所有主机存活状态""" + way = self.config.get('WAY', 'ping') + + if way == 'ping': + return self.detect_by_ping() + elif way == 'http': + return self.detect_by_http() + else: + self.logger.error(f"不支持的检测方式: {way}") + return [] + + def detect_by_ping(self): + """通过Ping检测主机""" + self.logger.info("开始Ping检测所有VPS...") + + vps_data = self.get_vps_list() + if not vps_data or 'host' not in vps_data: + self.logger.error("无法获取VPS列表") + return [] + + unreachable_hosts = [] + + for host in vps_data['host']: + host_id = host['id'] + ip = host.get('dedicatedip', '') + + if not ip: + self.logger.warning(f"VPS {host_id} 没有IP地址,跳过") + continue + + self.logger.debug(f"正在Ping检测: {ip} (ID: {host_id})") + + if not self.ping_host(ip): + # 检查是否为例外IP + if ip in self.exception_ips: + self.logger.info(f"VPS {host_id} ({ip}) Ping不通,但属于例外IP,跳过") + continue + + self.logger.warning(f"VPS {host_id} ({ip}) Ping不通") + unreachable_hosts.append({ + 'id': host_id, + 'ip': ip, + 'domain': host.get('domain', ''), + 'product_name': host.get('product_name', '') + }) + + if unreachable_hosts: + self.logger.warning(f"发现 {len(unreachable_hosts)} 台VPS无法Ping通") + else: + self.logger.info("所有VPS Ping检测正常") + + return unreachable_hosts + + def detect_by_http(self): + """通过HTTP检测域名""" + domains_str = self.config.get('DOMAIN', '') + if not domains_str: + self.logger.error("未配置DOMAIN") + return [] + + domains = [d.strip() for d in domains_str.split(',') if d.strip()] + + self.logger.info(f"开始HTTP检测 {len(domains)} 个域名...") + + # 获取VPS列表建立域名到ID的映射 + vps_data = self.get_vps_list() + if not vps_data or 'host' not in vps_data: + self.logger.error("无法获取VPS列表") + return [] + + # 建立域名到VPS ID的映射 + domain_to_vps = {} + for host in vps_data['host']: + domain = host.get('domain', '') + if domain: + domain_to_vps[domain] = host + + unreachable_hosts = [] + + for domain in domains: + self.logger.debug(f"正在HTTP检测: {domain}") + + if not self.check_http_host(domain): + self.logger.warning(f"域名 {domain} HTTP检测失败") + + # 查找对应的VPS + if domain in domain_to_vps: + host = domain_to_vps[domain] + ip = host.get('dedicatedip', '') + + # 检查是否为例外IP + if ip and ip in self.exception_ips: + self.logger.info(f"域名 {domain} 对应的VPS {host['id']} ({ip}) 属于例外IP,跳过") + continue + + unreachable_hosts.append({ + 'id': host['id'], + 'ip': ip, + 'domain': domain, + 'product_name': host.get('product_name', '') + }) + else: + self.logger.warning(f"域名 {domain} 未找到对应的VPS") + + if unreachable_hosts: + self.logger.warning(f"发现 {len(unreachable_hosts)} 个域名访问异常") + else: + self.logger.info("所有域名HTTP检测正常") + + return unreachable_hosts + + def check_and_power_on(self, unreachable_hosts): + """检查并开机无法访问的VPS""" + if not unreachable_hosts: + self.logger.info("未发现需要开机的VPS") + return + + self.logger.info(f"开始检查 {len(unreachable_hosts)} 台VPS的实际状态...") + + need_power_on = [] + all_are_on = True + + for host_info in unreachable_hosts: + host_id = host_info['id'] + + self.logger.info(f"检查VPS {host_id} ({host_info.get('domain', '')}) 的实际状态...") + status_data = self.get_vps_status(host_id) + + if status_data: + status = status_data.get('status', 'unknown') + des = status_data.get('des', '未知') + + if status == 'on': + self.logger.info(f"VPS {host_id} 实际状态: {des} (开机中)") + else: + self.logger.warning(f"VPS {host_id} 实际状态: {des} (关机)") + need_power_on.append(host_info) + all_are_on = False + else: + self.logger.error(f"无法获取VPS {host_id} 的状态") + all_are_on = False + + # 如果所有VPS都是开机状态,记录日志 + if all_are_on: + self.logger.info( + "检测到所有VPS均为开机状态,可能是禁Ping或网站临时异常,无需操作" + ) + return + + # 对需要开机的VPS执行开机操作 + if need_power_on: + self.logger.info(f"开始对 {len(need_power_on)} 台VPS执行开机操作...") + + for host_info in need_power_on: + host_id = host_info['id'] + self.logger.info(f"正在开启VPS {host_id} ({host_info.get('domain', '')})...") + self.power_on_vps(host_id) + + # 等待60秒后验证开机结果 + self.logger.info("等待60秒后验证开机结果...") + time.sleep(60) + + # 验证开机结果,最多尝试2次 + self.verify_power_on_result(need_power_on, max_retries=2) + + def verify_power_on_result(self, hosts_to_verify, max_retries=2): + """验证开机结果""" + self.retry_count += 1 + + if self.retry_count > max_retries: + self.logger.warning(f"已达到最大重试次数({max_retries}),仍有VPS未成功开机") + for host_info in hosts_to_verify: + self.logger.warning( + f"VPS {host_info['id']} ({host_info.get('domain', '')}) 开机失败" + ) + self.retry_count = 0 + return + + still_off = [] + + for host_info in hosts_to_verify: + host_id = host_info['id'] + + self.logger.info(f"验证VPS {host_id} 开机状态...") + status_data = self.get_vps_status(host_id) + + if status_data and status_data.get('status') == 'on': + self.logger.info(f"✅ VPS {host_id} 开机成功") + else: + self.logger.warning(f"❌ VPS {host_id} 仍未开机") + still_off.append(host_info) + + if still_off: + self.logger.info(f"还有 {len(still_off)} 台VPS未开机,进行第{self.retry_count}次重试...") + for host_info in still_off: + self.power_on_vps(host_info['id']) + + self.logger.info("等待60秒后再次验证...") + time.sleep(60) + + self.verify_power_on_result(still_off, max_retries) + else: + self.logger.info("所有VPS开机验证完成") + self.retry_count = 0 + + def run_once(self): + """执行一次监控循环""" + try: + self.logger.info("\n" + "=" * 60) + self.logger.info(f"开始第 {int(time.time())} 时间戳的监控循环") + self.logger.info("=" * 60) + + # 检测主机 + unreachable_hosts = self.detect_hosts() + + # 检查并开机 + self.check_and_power_on(unreachable_hosts) + + self.logger.info("本次监控循环完成\n") + + except Exception as e: + self.logger.error(f"监控循环异常: {str(e)}", exc_info=True) + + def run(self): + """运行监控程序(主循环)""" + span = self.config.get('SPAN', 300) + + self.logger.info(f"监控程序开始运行,间隔 {span} 秒") + + try: + while True: + self.run_once() + + self.logger.info(f"等待 {span} 秒后进行下一次检测...") + time.sleep(span) + + except KeyboardInterrupt: + self.logger.info("收到中断信号,程序退出") + except Exception as e: + self.logger.error(f"程序运行异常: {str(e)}", exc_info=True) + sys.exit(1) + + +def main(): + """主函数""" + try: + monitor = IDCMonitor() + monitor.run() + except Exception as e: + print(f"程序启动失败: {str(e)}") + sys.exit(1) + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/app/requirements.txt b/app/requirements.txt new file mode 100644 index 0000000..fda3075 --- /dev/null +++ b/app/requirements.txt @@ -0,0 +1,2 @@ +requests +pyyaml \ No newline at end of file diff --git a/app/uninstall.sh b/app/uninstall.sh index ea413fc..135891b 100644 --- a/app/uninstall.sh +++ b/app/uninstall.sh @@ -1,23 +1,35 @@ #!/bin/bash # 核云IDC服务商VPS自动监测重启程序 - 卸载脚本 -# 该脚本会移除systemd服务并清理相关文件 +# 该脚本会移除systemd服务 + +# 颜色定义 +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# 获取当前脚本所在目录的绝对路径 +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # 定义服务名称 SERVICE_NAME="idc-monitor" SERVICE_FILE="/etc/systemd/system/${SERVICE_NAME}.service" -echo "正在卸载 ${SERVICE_NAME} 服务..." +echo "==========================================" +echo " 核云IDC VPS监控程序 - 卸载向导" +echo "==========================================" +echo "" # 检查是否以root权限运行 if [ "$EUID" -ne 0 ]; then - echo "错误: 请以root权限运行此脚本 (sudo ./uninstall.sh)" + echo -e "${RED}错误: 请以root权限运行此脚本 (sudo ./uninstall.sh)${NC}" exit 1 fi # 检查服务是否存在 if [ ! -f "${SERVICE_FILE}" ]; then - echo "警告: 服务文件 ${SERVICE_FILE} 不存在" + echo -e "${YELLOW}警告: 服务文件 ${SERVICE_FILE} 不存在${NC}" echo "可能服务未安装或已被卸载" exit 0 fi @@ -26,18 +38,18 @@ fi echo "正在停止服务..." if systemctl is-active --quiet ${SERVICE_NAME}; then systemctl stop ${SERVICE_NAME} - echo "✅ 服务已停止" + echo -e "${GREEN}✅${NC} 服务已停止" else - echo "服务未运行,跳过停止步骤" + echo -e "${YELLOW}⚠️${NC} 服务未运行,跳过停止步骤" fi # 禁用服务(取消开机自启) echo "正在禁用服务..." if systemctl is-enabled --quiet ${SERVICE_NAME} 2>/dev/null; then systemctl disable ${SERVICE_NAME} - echo "✅ 服务已禁用" + echo -e "${GREEN}✅${NC} 服务已禁用" else - echo "服务未启用,跳过禁用步骤" + echo -e "${YELLOW}⚠️${NC} 服务未启用,跳过禁用步骤" fi # 重新加载systemd配置 @@ -48,25 +60,62 @@ systemctl reset-failed ${SERVICE_NAME} 2>/dev/null # 删除服务文件 echo "正在删除服务文件..." rm -f ${SERVICE_FILE} -echo "✅ 服务文件已删除: ${SERVICE_FILE}" +echo -e "${GREEN}✅${NC} 服务文件已删除: ${SERVICE_FILE}" -# 清理journal日志(可选) -echo "" -read -p "是否同时清理该服务的历史日志?(y/N): " -n 1 -r -echo -if [[ $REPLY =~ ^[Yy]$ ]]; then - journalctl --rotate - journalctl --vacuum-time=1s 2>/dev/null - echo "✅ 日志已清理" -else - echo "跳过日志清理" -fi +# 询问是否清理日志 +LOG_DIR="${SCRIPT_DIR}/logs" echo "" -echo "✅ ${SERVICE_NAME} 服务已成功卸载" +echo "==================================================" +echo -e "${YELLOW}请选择要清理的内容:${NC}" +echo " 1. 仅清理日志文件" +echo " 2. 清理日志文件和配置文件" +echo " 3. 不清理任何文件(保留所有数据)" +echo -e -n "${YELLOW}请输入选项 1/2/3,直接回车默认为3: ${NC}" +read CLEAN_CHOICE + +case "$CLEAN_CHOICE" in + 1) + if [ -d "${LOG_DIR}" ]; then + rm -rf ${LOG_DIR} + echo -e "${GREEN}✅${NC} 日志目录已清理: ${LOG_DIR}" + else + echo -e "${YELLOW}⚠️${NC} 日志目录不存在,跳过清理" + fi + echo -e "${YELLOW}⚠️${NC} 配置文件保留在: ${SCRIPT_DIR}/config.yml" + ;; + 2) + # 清理日志 + if [ -d "${LOG_DIR}" ]; then + rm -rf ${LOG_DIR} + echo -e "${GREEN}✅${NC} 日志目录已清理: ${LOG_DIR}" + else + echo -e "${YELLOW}⚠️${NC} 日志目录不存在,跳过清理" + fi + + # 清理配置文件 + CONFIG_FILE="${SCRIPT_DIR}/config.yml" + if [ -f "${CONFIG_FILE}" ]; then + rm -f ${CONFIG_FILE} + echo -e "${GREEN}✅${NC} 配置文件已删除: ${CONFIG_FILE}" + else + echo -e "${YELLOW}⚠️${NC} 配置文件不存在,跳过清理" + fi + ;; + *) + echo -e "${YELLOW}⚠️${NC} 跳过文件清理" + echo " 日志文件保留在: ${LOG_DIR}" + echo " 配置文件保留在: ${SCRIPT_DIR}/config.yml" + ;; +esac + +echo "" +echo "==========================================" +echo -e "${GREEN}✅ ${SERVICE_NAME} 服务已成功卸载${NC}" +echo "==========================================" echo "" echo "注意:" echo " - Python依赖包未被卸载,如需清理请手动执行: pip uninstall -r requirements.txt" -echo " - 项目文件未被删除,如需删除请手动清理项目目录" +echo " - 项目文件未被删除,如需删除请手动清理项目目录: ${SCRIPT_DIR}" echo "" echo "卸载完成!" \ No newline at end of file diff --git a/app/开发文档.md b/app/开发文档.md new file mode 100644 index 0000000..67334b9 --- /dev/null +++ b/app/开发文档.md @@ -0,0 +1,39 @@ +### 程序运行 + +1. 用户需先向程序config内添加以下几项定义量: + ACCOUNT,API_KEY,WAY + +2. 用户运行install.sh脚本 + 脚本会先检测config.yml中的配置文件 + 缺少ACCOUNT,API_KEY,WAY向用户询问填写什么,同时WAY默认为ping(问句以红色展示) + 若用户填写http,则让用户填写域名(可以访问的路径),通过英文逗号分隔域名 + 随后用户可添加例外参数,脚本检测到例外内的IP主机关机时,直接跳过,不进行开机 + +3. 脚本运行 + 创建名为idc_monitor的systemd服务,持续化运行当前路径下的main.py + +### 程序逻辑 +1. 1. 隔一段时间(SPAN)便根据WAY来探测目标存活 + 若为ping,则直接ping一遍所有的IP地址 + 若为域名,则使用HEAD方法探测相应域名是否正常响应 + +1. 2. 若存在不通的情况,进行如下操作 + 使用config.yml中的JWT请求VPS列表并挨个查询VPS状态是否为on(开机) + 若返回响应码为405,即响应报文为 + ``` + { + "status": 405, + "msg": "请登陆后再试" + } + ``` + 时,重新请求JWT并存储,随后再次请求VPS列表并查询状态 + +1. 3. 若均为开机状态,记录日志:什么时间点 - 发生了什么情况,实际没有机器关机,可能是禁ping或者服务器网站状态异常 + +1. 4. 若发现有机器关机,则使用开机接口(on)进行开机 + +1. 5. 所有机器都操作完成后隔60秒,再次查询刚才尝试开机操作的几台机器是否开机成功,若不超过则再来一遍 + +1. 6. 尝试开机操作两次后还是非开机状态,则中断本次循环,进入SPAN间隔准备下一次循环 + +1. 7. 以上所有操作均需记录为日志,分为正常日志和异常日志(例如:未发现存在机器关机,开机失败等) \ No newline at end of file diff --git a/web/index.php b/web/index.php index 4c4f95b..88747fa 100644 --- a/web/index.php +++ b/web/index.php @@ -97,6 +97,11 @@ if ($needConfig) { ✅ 保存配置并开始使用 + +