#!/bin/bash
# cf-vps-monitor - Cloudflare Worker VPS监控脚本
# 版本: 1.1.0
# 支持所有常见Linux系统,无需root权限
set -euo pipefail
# 初始化系统类型变量
OS=$(uname -s)
export OS
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
PURPLE='\033[0;35m'
CYAN='\033[0;36m'
NC='\033[0m' # No Color
# 全局变量 - 集中式文件管理
SCRIPT_DIR="$HOME/.cf-vps-monitor"
CONFIG_FILE="$SCRIPT_DIR/config/config"
LOG_FILE="$SCRIPT_DIR/logs/monitor.log"
PID_FILE="$SCRIPT_DIR/run/monitor.pid"
SERVICE_FILE="$SCRIPT_DIR/bin/vps-monitor-service.sh"
INSTALL_MANIFEST="$SCRIPT_DIR/system/install.manifest"
# 默认配置
DEFAULT_INTERVAL=10
DEFAULT_WORKER_URL=""
DEFAULT_SERVER_ID=""
DEFAULT_API_KEY=""
# 打印带颜色的消息
print_message() {
local color=$1
local message=$2
echo -e "${color}${message}${NC}"
}
# 日志函数(环境适配)
log() {
local message="$1"
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
echo "[$timestamp] $message" >> "$LOG_FILE"
# 只在非服务模式下输出到控制台(避免重复日志)
if [[ "${SERVICE_MODE:-false}" != "true" ]]; then
echo "[$timestamp] $message"
fi
}
# 错误处理
error_exit() {
local message="$1"
print_message "$RED" "错误: $message"
log "ERROR: $message"
exit 1
}
# 检查命令是否存在
command_exists() {
command -v "$1" >/dev/null 2>&1
}
# ==================== 系统兼容性层 ====================
# 检测systemd可用性
is_systemd_available() {
command_exists systemctl && systemctl --version >/dev/null 2>&1
}
# 检测用户级systemd可用性
is_user_systemd_available() {
if is_root_user; then
is_systemd_available
else
is_systemd_available && \
[[ -n "${XDG_RUNTIME_DIR:-}" ]] && \
systemctl --user --version >/dev/null 2>&1
fi
}
# 跨平台sed命令
safe_sed() {
local pattern="$1"
local file="$2"
if [[ "$OS" == "FreeBSD" ]] || [[ "$OS" == "Darwin" ]]; then
sed -i '' "$pattern" "$file" 2>/dev/null || true
else
sed -i "$pattern" "$file" 2>/dev/null || true
fi
}
# 安全的systemctl命令
safe_systemctl() {
if is_systemd_available; then
systemctl "$@" 2>/dev/null || true
else
return 1
fi
}
# 检查系统资源(防止fork错误)
check_system_resources() {
# 检查进程数限制(特别针对FreeBSD)
local max_proc=$(ulimit -u 2>/dev/null || echo "1024")
local current_proc=$(ps aux 2>/dev/null | wc -l || echo "100")
if [[ $current_proc -gt $((max_proc * 80 / 100)) ]]; then
print_message "$YELLOW" "警告: 进程数接近限制 ($current_proc/$max_proc)"
if [[ "$OS" == "FreeBSD" ]]; then
print_message "$CYAN" "FreeBSD建议: 增加用户进程限制或稍后重试"
fi
return 1
fi
return 0
}
# 验证PID有效性
validate_pid() {
local pid="$1"
[[ "$pid" =~ ^[0-9]+$ ]] && [[ "$pid" != "$$" ]] && kill -0 "$pid" 2>/dev/null
}
# 获取进程命令行(FreeBSD兼容)
get_process_command() {
local pid="$1"
if [[ "$OS" == "FreeBSD" ]]; then
# FreeBSD兼容语法
ps -p "$pid" -o command 2>/dev/null | tail -n +2 | head -1 || echo "unknown"
else
# Linux标准语法
ps -p "$pid" -o cmd= 2>/dev/null || echo "unknown"
fi
}
# 统一的监控进程检测函数(精确检测)
find_monitor_processes() {
local pids=""
# 层次1: PID文件检测(最可靠)
if [[ -f "$PID_FILE" ]]; then
local file_pid=$(cat "$PID_FILE" 2>/dev/null)
if validate_pid "$file_pid"; then
pids="$file_pid"
fi
fi
# 层次2: 精确脚本路径匹配
if [[ -z "$pids" ]] && [[ -f "${SERVICE_FILE:-}" ]]; then
if [[ "$OS" == "FreeBSD" ]]; then
pids=$(ps axww | grep "$SERVICE_FILE" | grep -v grep | awk '{print $1}')
else
pids=$(ps aux | grep "$SERVICE_FILE" | grep -v grep | awk '{print $2}')
fi
fi
# 层次3: 验证所有PID并确认命令行
local valid_pids=""
for pid in $pids; do
if validate_pid "$pid"; then
local cmd=$(get_process_command "$pid")
# 确认命令行确实包含我们的脚本
if [[ "$cmd" =~ (vps-monitor-service|cf-vps-monitor) ]]; then
valid_pids="$valid_pids $pid"
fi
fi
done
echo "$valid_pids" | sed 's/^[[:space:]]*//' | sed 's/[[:space:]]*$//'
}
# 检查监控服务是否运行
is_monitor_running() {
local pids=$(find_monitor_processes)
[[ -n "$pids" ]]
}
# 获取用户类型描述
get_user_type_description() {
if is_root_user; then
echo "系统管理员"
else
echo "普通用户"
fi
}
# 简洁的监控服务诊断
diagnose_monitor_service() {
print_message "$CYAN" "=== 监控服务诊断 ==="
# 检查关键文件
print_message "$BLUE" "文件状态:"
[[ -f "$SERVICE_FILE" ]] && print_message "$GREEN" " ✓ 服务脚本存在" || print_message "$RED" " ✗ 服务脚本不存在"
[[ -f "$CONFIG_FILE" ]] && print_message "$GREEN" " ✓ 配置文件存在" || print_message "$RED" " ✗ 配置文件不存在"
[[ -d "$(dirname "$LOG_FILE")" ]] && print_message "$GREEN" " ✓ 日志目录存在" || print_message "$RED" " ✗ 日志目录不存在"
# 显示相关进程(FreeBSD优化)
print_message "$BLUE" "相关进程:"
if [[ "$OS" == "FreeBSD" ]]; then
local processes=$(ps axww | grep -E "(monitor|vps)" | grep -v grep | grep -v diagnose)
else
local processes=$(ps aux | grep -E "(monitor|vps)" | grep -v grep | grep -v diagnose)
fi
if [[ -n "$processes" ]]; then
echo "$processes"
else
print_message "$YELLOW" " 无相关进程"
fi
print_message "$CYAN" "===================="
}
# 移除所有自启动设置
remove_autostart_settings() {
print_message "$BLUE" "移除自启动设置..."
local removed_count=0
# 1. 移除systemd服务
local service_path
if is_root_user; then
service_path="/etc/systemd/system/cf-vps-monitor.service"
else
service_path="$HOME/.config/systemd/user/cf-vps-monitor.service"
fi
if [[ -f "$service_path" ]] && is_systemd_available; then
local systemd_cmd="systemctl"
[[ ! $(is_root_user) ]] && systemd_cmd="systemctl --user"
$systemd_cmd stop cf-vps-monitor.service 2>/dev/null || true
$systemd_cmd disable cf-vps-monitor.service 2>/dev/null || true
rm -f "$service_path"
$systemd_cmd daemon-reload 2>/dev/null || true
print_message "$GREEN" " ✓ systemd服务已移除"
removed_count=$((removed_count + 1))
fi
# 2. 移除crontab条目
if command_exists crontab; then
local current_crontab=$(crontab -l 2>/dev/null || echo "")
if echo "$current_crontab" | grep -q "cf-vps-monitor"; then
echo "$current_crontab" | grep -v "cf-vps-monitor" | crontab - 2>/dev/null
print_message "$GREEN" " ✓ crontab自启动已移除"
removed_count=$((removed_count + 1))
fi
fi
# 3. 移除shell profile自启动(FreeBSD兼容)
local profile_files=(".bashrc" ".bash_profile" ".profile")
for profile in "${profile_files[@]}"; do
local profile_path="$HOME/$profile"
if [[ -f "$profile_path" ]] && grep -q "cf-vps-monitor auto-start" "$profile_path" 2>/dev/null; then
# FreeBSD兼容的sed语法
if [[ "$OS" == "FreeBSD" ]] || [[ "$OS" == "Darwin" ]]; then
sed -i '' '/# === cf-vps-monitor auto-start BEGIN ===/,/# === cf-vps-monitor auto-start END ===/d' "$profile_path" 2>/dev/null
else
sed -i '/# === cf-vps-monitor auto-start BEGIN ===/,/# === cf-vps-monitor auto-start END ===/d' "$profile_path" 2>/dev/null
fi
print_message "$GREEN" " ✓ 已从 $profile 移除自启动代码"
removed_count=$((removed_count + 1))
break
fi
done
# 显示结果
if [[ $removed_count -gt 0 ]]; then
print_message "$GREEN" "✓ 已移除 $removed_count 种自启动设置"
else
print_message "$YELLOW" "未找到需要移除的自启动设置"
fi
}
# 添加自启动设置
add_autostart_settings() {
print_message "$BLUE" "配置自启动设置..."
local added_count=0
# 1. 尝试配置systemd服务
local service_path
if is_root_user; then
service_path="/etc/systemd/system/cf-vps-monitor.service"
else
service_path="$HOME/.config/systemd/user/cf-vps-monitor.service"
fi
if is_systemd_available && [[ ! -f "$service_path" ]]; then
# 创建服务目录
mkdir -p "$(dirname "$service_path")" 2>/dev/null
# 创建systemd服务文件
cat > "$service_path" << EOF
[Unit]
Description=CF VPS Monitor Service
After=network.target
[Service]
Type=simple
ExecStart=$SERVICE_FILE
Restart=always
RestartSec=10
User=$USER
WorkingDirectory=$HOME
[Install]
WantedBy=default.target
EOF
local systemd_cmd="systemctl"
[[ ! $(is_root_user) ]] && systemd_cmd="systemctl --user"
$systemd_cmd daemon-reload 2>/dev/null || true
$systemd_cmd enable cf-vps-monitor.service 2>/dev/null || true
print_message "$GREEN" " ✓ systemd服务已配置"
added_count=$((added_count + 1))
fi
# 2. 配置crontab自启动
if command_exists crontab; then
local current_crontab=$(crontab -l 2>/dev/null || echo "")
if ! echo "$current_crontab" | grep -q "cf-vps-monitor"; then
local crontab_entry="@reboot sleep 30 && pgrep -f 'cf-vps-monitor|vps-monitor-service' >/dev/null || $SERVICE_FILE"
(echo "$current_crontab"; echo "$crontab_entry") | crontab - 2>/dev/null
print_message "$GREEN" " ✓ crontab自启动已配置"
added_count=$((added_count + 1))
fi
fi
# 3. 配置shell profile自启动
local profile="$HOME/.bashrc"
if [[ -f "$profile" ]] && ! grep -q "cf-vps-monitor auto-start" "$profile" 2>/dev/null; then
cat >> "$profile" << EOF
# === cf-vps-monitor auto-start BEGIN ===
# VPS监控服务自启动检测 (最后保障)
if [ -n "\$PS1" ] && [ "\$TERM" != "dumb" ]; then
if ! pgrep -f 'cf-vps-monitor|vps-monitor-service' >/dev/null 2>&1; then
(sleep 5 && nohup "$SERVICE_FILE" >/dev/null 2>&1 &) &
fi
fi
# === cf-vps-monitor auto-start END ===
EOF
print_message "$GREEN" " ✓ shell profile自启动已配置"
added_count=$((added_count + 1))
fi
# 显示结果
if [[ $added_count -gt 0 ]]; then
print_message "$GREEN" "✓ 已配置 $added_count 种自启动设置"
else
print_message "$YELLOW" "自启动设置已存在,无需重复配置"
fi
}
# 统一的命令接口
get_system_command() {
local cmd_type="$1"
local fallback="${2:-}"
case "$cmd_type" in
"memory_info")
if [[ "$OS" == "FreeBSD" ]] || [[ "$OS" == "OpenBSD" ]] || [[ "$OS" == "NetBSD" ]]; then
echo "sysctl"
elif [[ -f /proc/meminfo ]]; then
echo "proc"
elif command_exists free; then
echo "free"
else
echo "$fallback"
fi
;;
"disk_usage")
if command_exists df; then
echo "df"
elif command_exists du; then
echo "du"
else
echo "$fallback"
fi
;;
"network_stats")
if [[ -f /proc/net/dev ]]; then
echo "proc"
elif command_exists netstat; then
echo "netstat"
elif command_exists ss; then
echo "ss"
else
echo "$fallback"
fi
;;
"process_info")
if command_exists ps; then
echo "ps"
elif [[ -d /proc ]]; then
echo "proc"
else
echo "$fallback"
fi
;;
"cpu_info")
if [[ "$OS" == "FreeBSD" ]] || [[ "$OS" == "OpenBSD" ]] || [[ "$OS" == "NetBSD" ]]; then
echo "sysctl"
elif [[ -f /proc/stat ]]; then
echo "proc"
elif command_exists top; then
echo "top"
elif command_exists vmstat; then
echo "vmstat"
else
echo "$fallback"
fi
;;
*)
echo "$fallback"
;;
esac
}
# 跨平台的命令执行
execute_system_command() {
local cmd_type="$1"
local command_method="$2"
shift 2
local args=("$@")
case "$cmd_type:$command_method" in
"memory_info:sysctl")
sysctl vm.stats.vm 2>/dev/null || sysctl hw.physmem hw.usermem 2>/dev/null
;;
"memory_info:proc")
cat /proc/meminfo 2>/dev/null
;;
"memory_info:free")
free -b 2>/dev/null || free 2>/dev/null
;;
"disk_usage:df")
df -B1 "${args[@]}" 2>/dev/null || df "${args[@]}" 2>/dev/null
;;
"network_stats:proc")
cat /proc/net/dev 2>/dev/null
;;
"network_stats:netstat")
netstat -i 2>/dev/null
;;
"cpu_info:sysctl")
sysctl kern.cp_time 2>/dev/null
;;
"cpu_info:proc")
cat /proc/stat 2>/dev/null
;;
"cpu_info:top")
timeout 3 top -bn1 2>/dev/null | head -10
;;
"cpu_info:vmstat")
timeout 3 vmstat 1 2 2>/dev/null | tail -1
;;
*)
return 1
;;
esac
}
# 检测系统信息(优化版 - 减少fork操作)
detect_system() {
# 一次性获取系统基本信息(减少fork)
local system_info=$(uname -srm)
IFS=' ' read -r OS KERNEL_VERSION ARCH <<< "$system_info"
# FreeBSD特殊优化(避免不必要的检测)
if [[ "$OS" == "FreeBSD" ]]; then
IS_CONTAINER="false"
CONTAINER_TYPE="none"
VIRTUALIZATION="none"
VER=$(echo "$KERNEL_VERSION" | cut -d'-' -f1)
DISTRO_ID="freebsd"
DISTRO_NAME="FreeBSD"
print_message "$GREEN" "检测到系统: FreeBSD $VER"
elif [[ "$OS" == "Darwin" ]]; then
IS_CONTAINER="false"
CONTAINER_TYPE="none"
VIRTUALIZATION="none"
VER=$(sw_vers -productVersion 2>/dev/null || echo "$KERNEL_VERSION")
DISTRO_ID="macos"
DISTRO_NAME="macOS"
print_message "$GREEN" "检测到系统: macOS $VER"
else
# Linux系统的简化检测
IS_CONTAINER="false"
CONTAINER_TYPE="none"
VIRTUALIZATION="none"
# 简化的容器检测(只检查明显标志)
if [[ -f /.dockerenv ]]; then
IS_CONTAINER="true"
CONTAINER_TYPE="docker"
fi
# 简化的发行版检测
if [[ -f /etc/os-release ]]; then
local os_info=$(cat /etc/os-release 2>/dev/null)
DISTRO_ID=$(echo "$os_info" | grep '^ID=' | cut -d= -f2 | tr -d '"' || echo "linux")
VER=$(echo "$os_info" | grep '^VERSION_ID=' | cut -d= -f2 | tr -d '"' || echo "unknown")
DISTRO_NAME=$(echo "$os_info" | grep '^NAME=' | cut -d= -f2 | tr -d '"' || echo "Linux")
else
DISTRO_ID="linux"
VER="unknown"
DISTRO_NAME="Linux"
fi
print_message "$GREEN" "检测到系统: $DISTRO_NAME $VER"
fi
# 确保变量在全局可用
export OS ARCH KERNEL_VERSION VER DISTRO_ID DISTRO_NAME
export IS_CONTAINER CONTAINER_TYPE VIRTUALIZATION
}
# 检测包管理器(增强版)
detect_package_manager() {
PKG_MANAGER=""
PKG_INSTALL=""
PKG_UPDATE=""
PKG_SEARCH=""
PKG_INFO=""
# 根据系统类型和发行版检测包管理器
case "$OS" in
FreeBSD|OpenBSD|NetBSD)
if command_exists pkg; then
PKG_MANAGER="pkg"
PKG_INSTALL="pkg install -y"
PKG_UPDATE="pkg update"
PKG_SEARCH="pkg search"
PKG_INFO="pkg info"
elif command_exists pkg_add && [[ "$OS" == "OpenBSD" ]]; then
PKG_MANAGER="pkg_add"
PKG_INSTALL="pkg_add"
PKG_UPDATE="pkg_add -u"
PKG_SEARCH="pkg_info -Q"
PKG_INFO="pkg_info"
fi
;;
Darwin)
if command_exists brew; then
PKG_MANAGER="brew"
PKG_INSTALL="brew install"
PKG_UPDATE="brew update"
PKG_SEARCH="brew search"
PKG_INFO="brew info"
elif command_exists port; then
PKG_MANAGER="port"
PKG_INSTALL="port install"
PKG_UPDATE="port selfupdate"
PKG_SEARCH="port search"
PKG_INFO="port info"
fi
;;
Linux|*)
# 按优先级和发行版特性检测包管理器
if command_exists apt-get; then
PKG_MANAGER="apt-get"
PKG_INSTALL="apt-get install -y"
PKG_UPDATE="apt-get update"
PKG_SEARCH="apt-cache search"
PKG_INFO="apt-cache show"
elif command_exists apt; then
PKG_MANAGER="apt"
PKG_INSTALL="apt install -y"
PKG_UPDATE="apt update"
PKG_SEARCH="apt search"
PKG_INFO="apt show"
elif command_exists dnf; then
PKG_MANAGER="dnf"
PKG_INSTALL="dnf install -y"
PKG_UPDATE="dnf update -y"
PKG_SEARCH="dnf search"
PKG_INFO="dnf info"
elif command_exists yum; then
PKG_MANAGER="yum"
PKG_INSTALL="yum install -y"
PKG_UPDATE="yum update -y"
PKG_SEARCH="yum search"
PKG_INFO="yum info"
elif command_exists zypper; then
PKG_MANAGER="zypper"
PKG_INSTALL="zypper install -y"
PKG_UPDATE="zypper refresh"
PKG_SEARCH="zypper search"
PKG_INFO="zypper info"
elif command_exists pacman; then
PKG_MANAGER="pacman"
PKG_INSTALL="pacman -S --noconfirm"
PKG_UPDATE="pacman -Sy"
PKG_SEARCH="pacman -Ss"
PKG_INFO="pacman -Si"
elif command_exists apk; then
PKG_MANAGER="apk"
PKG_INSTALL="apk add"
PKG_UPDATE="apk update"
PKG_SEARCH="apk search"
PKG_INFO="apk info"
elif command_exists emerge; then
PKG_MANAGER="emerge"
PKG_INSTALL="emerge"
PKG_UPDATE="emerge --sync"
PKG_SEARCH="emerge --search"
PKG_INFO="emerge --info"
elif command_exists xbps-install; then
PKG_MANAGER="xbps"
PKG_INSTALL="xbps-install -y"
PKG_UPDATE="xbps-install -S"
PKG_SEARCH="xbps-query -Rs"
PKG_INFO="xbps-query -R"
elif command_exists swupd; then
PKG_MANAGER="swupd"
PKG_INSTALL="swupd bundle-add"
PKG_UPDATE="swupd update"
PKG_SEARCH="swupd search"
PKG_INFO="swupd bundle-info"
elif command_exists nix-env; then
PKG_MANAGER="nix"
PKG_INSTALL="nix-env -i"
PKG_UPDATE="nix-channel --update"
PKG_SEARCH="nix-env -qa"
PKG_INFO="nix-env -qa --description"
elif command_exists snap; then
PKG_MANAGER="snap"
PKG_INSTALL="snap install"
PKG_UPDATE="snap refresh"
PKG_SEARCH="snap find"
PKG_INFO="snap info"
elif command_exists flatpak; then
PKG_MANAGER="flatpak"
PKG_INSTALL="flatpak install -y"
PKG_UPDATE="flatpak update -y"
PKG_SEARCH="flatpak search"
PKG_INFO="flatpak info"
fi
;;
esac
if [[ -n "$PKG_MANAGER" ]]; then
print_message "$GREEN" "检测到包管理器: $PKG_MANAGER"
else
print_message "$YELLOW" "警告: 未检测到支持的包管理器,将尝试手动安装依赖"
fi
# 导出变量供其他函数使用
export PKG_MANAGER PKG_INSTALL PKG_UPDATE PKG_SEARCH PKG_INFO
}
# 检查并安装依赖(无需root权限的方法)
install_dependencies() {
print_message "$BLUE" "检查系统依赖..."
local missing_deps=()
# 检查必需的命令
if ! command_exists curl; then
missing_deps+=("curl")
fi
if ! command_exists bc; then
missing_deps+=("bc")
fi
# 检查可选的命令
local optional_missing=()
if ! command_exists ifstat; then
optional_missing+=("ifstat")
fi
if ! command_exists jq; then
optional_missing+=("jq")
fi
# 报告可选依赖状态
if [[ ${#optional_missing[@]} -gt 0 ]]; then
print_message "$YELLOW" "可选依赖未安装: ${optional_missing[*]}"
print_message "$YELLOW" "这些依赖缺失不会影响基本功能,但可能限制某些特性"
fi
if [[ ${#missing_deps[@]} -eq 0 ]]; then
print_message "$GREEN" "所有必需依赖已安装"
return 0
fi
print_message "$YELLOW" "缺少必需依赖: ${missing_deps[*]}"
# 根据不同发行版调整包名
local adjusted_deps=()
for dep in "${missing_deps[@]}"; do
case "$dep" in
"bc")
if [[ "$DISTRO_ID" == "alpine" ]]; then
adjusted_deps+=("bc")
else
adjusted_deps+=("bc")
fi
;;
"curl")
adjusted_deps+=("curl")
;;
*)
adjusted_deps+=("$dep")
;;
esac
done
# 尝试安装依赖
if [[ -n "$PKG_MANAGER" ]]; then
if command_exists sudo && sudo -n true 2>/dev/null; then
print_message "$BLUE" "尝试使用sudo安装依赖..."
# 先更新包列表(对于某些包管理器)
if [[ "$PKG_MANAGER" == "apt-get" ]] || [[ "$PKG_MANAGER" == "apt" ]]; then
sudo $PKG_UPDATE
fi
for dep in "${adjusted_deps[@]}"; do
print_message "$BLUE" "安装 $dep..."
if ! sudo $PKG_INSTALL "$dep"; then
print_message "$YELLOW" "警告: 无法安装 $dep"
fi
done
else
print_message "$YELLOW" "需要sudo权限安装依赖,请手动执行:"
print_message "$CYAN" " sudo $PKG_INSTALL ${adjusted_deps[*]}"
fi
else
print_message "$YELLOW" "未检测到包管理器,请手动安装依赖"
print_message "$CYAN" "常见安装命令:"
print_message "$CYAN" " Ubuntu/Debian: sudo apt-get install ${adjusted_deps[*]}"
print_message "$CYAN" " CentOS/RHEL: sudo yum install ${adjusted_deps[*]}"
print_message "$CYAN" " Fedora: sudo dnf install ${adjusted_deps[*]}"
print_message "$CYAN" " Alpine: sudo apk add ${adjusted_deps[*]}"
fi
# 简化的依赖检查
if ! command_exists curl && ! command_exists wget; then
print_message "$RED" "错误: curl和wget都不可用"
print_message "$CYAN" "请安装curl或wget后重试"
return 1
fi
if ! command_exists bc; then
print_message "$YELLOW" "警告: bc未安装,某些计算功能可能受限"
fi
print_message "$GREEN" "依赖检查完成"
}
# 创建集中式目录结构
create_directories() {
print_message "$BLUE" "创建集中式目录结构..."
# 创建主目录和子目录
mkdir -p "$SCRIPT_DIR"/{bin,config,logs,tmp,cache,run,system/{templates,backups}} || error_exit "无法创建目录结构"
# 创建安装清单文件
touch "$INSTALL_MANIFEST"
# 设置临时目录环境变量
export TMPDIR="$SCRIPT_DIR/tmp"
print_message "$GREEN" "✓ 集中式目录结构创建完成"
print_message "$CYAN" " 主目录: $SCRIPT_DIR"
}
# 加载配置
load_config() {
if [[ -f "$CONFIG_FILE" ]]; then
source "$CONFIG_FILE"
else
WORKER_URL=$(echo "$DEFAULT_WORKER_URL" | tr -d ' \n\r')
SERVER_ID=$(echo "$DEFAULT_SERVER_ID" | tr -d ' \n\r')
API_KEY=$(echo "$DEFAULT_API_KEY" | tr -d ' \n\r')
INTERVAL="$DEFAULT_INTERVAL"
fi
}
# 记录安装项到安装清单
record_installation() {
local type="$1" # 文件类型
local path="$2" # 文件路径
local action="$3" # 执行的操作
local backup="$4" # 备份信息
echo "$type:$path:$action:$backup" >> "$INSTALL_MANIFEST"
}
# 保存配置
save_config() {
# 确保保存前清理空白字符
WORKER_URL=$(echo "$WORKER_URL" | tr -d ' \n\r')
SERVER_ID=$(echo "$SERVER_ID" | tr -d ' \n\r')
API_KEY=$(echo "$API_KEY" | tr -d ' \n\r')
cat > "$CONFIG_FILE" << EOF
# VPS监控配置文件
WORKER_URL="$WORKER_URL"
SERVER_ID="$SERVER_ID"
API_KEY="$API_KEY"
INTERVAL="$INTERVAL"
EOF
print_message "$GREEN" "配置已保存到 $CONFIG_FILE"
}
# 获取CPU使用率
get_cpu_usage() {
local cpu_usage
local cpu_load
# FreeBSD系统
if [[ "$OS" == "FreeBSD" ]]; then
# 使用sysctl获取CPU使用率
if command_exists sysctl; then
local cpu_idle=$(sysctl -n kern.cp_time 2>/dev/null | awk '{print $5}' 2>/dev/null || echo "0")
local cpu_total=$(sysctl -n kern.cp_time 2>/dev/null | awk '{sum=0; for(i=1;i<=NF;i++) sum+=$i; print sum}' 2>/dev/null || echo "0")
# 确保获取到有效数值
cpu_idle=$(sanitize_integer "$cpu_idle" "0")
cpu_total=$(sanitize_integer "$cpu_total" "0")
if [[ $cpu_total -gt 0 && $cpu_idle -le $cpu_total ]]; then
cpu_usage=$(echo "scale=1; 100 - ($cpu_idle * 100 / $cpu_total)" | bc 2>/dev/null || echo "0")
# 确保cpu_usage是有效的数字
cpu_usage=$(sanitize_number "$cpu_usage" "0")
else
cpu_usage="0"
fi
else
cpu_usage="0"
fi
# FreeBSD负载平均值
local load1="0" load5="0" load15="0"
if command_exists sysctl; then
load1=$(sysctl -n vm.loadavg 2>/dev/null | awk '{print $2}' 2>/dev/null || echo "0")
load5=$(sysctl -n vm.loadavg 2>/dev/null | awk '{print $3}' 2>/dev/null || echo "0")
load15=$(sysctl -n vm.loadavg 2>/dev/null | awk '{print $4}' 2>/dev/null || echo "0")
# 清理负载数值
load1=$(sanitize_number "$load1" "0")
load5=$(sanitize_number "$load5" "0")
load15=$(sanitize_number "$load15" "0")
fi
cpu_load="$load1,$load5,$load15"
else
# Linux系统 - 多种方法提高兼容性
cpu_usage="0"
# 方法1: 使用/proc/stat(最准确的方法)
if [[ -f /proc/stat ]]; then
local cpu_line=$(head -n1 /proc/stat 2>/dev/null)
if [[ -n "$cpu_line" ]]; then
local cpu_times=($cpu_line)
if [[ ${#cpu_times[@]} -ge 8 ]]; then
local idle=${cpu_times[4]}
local iowait=${cpu_times[5]:-0}
local total=0
# 计算总CPU时间(user + nice + system + idle + iowait + irq + softirq + steal)
for i in {1..7}; do
if [[ -n "${cpu_times[i]}" && "${cpu_times[i]}" =~ ^[0-9]+$ ]]; then
total=$((total + cpu_times[i]))
fi
done
if [[ $total -gt 0 ]]; then
cpu_usage=$(echo "scale=1; 100 - (($idle + $iowait) * 100 / $total)" | bc 2>/dev/null || echo "0")
fi
fi
fi
fi
# 方法2: 使用top命令(如果/proc/stat不可用)
if [[ "$cpu_usage" == "0" ]] && command_exists top; then
# 尝试不同的top输出格式
local top_output=$(timeout 3 top -bn1 2>/dev/null | head -10)
if [[ -n "$top_output" ]]; then
# 匹配不同格式的CPU行
if [[ "$top_output" =~ %Cpu\(s\):[[:space:]]*([0-9.]+)[[:space:]]*us.*[[:space:]]+([0-9.]+)[[:space:]]*id ]]; then
# 格式: %Cpu(s): 12.5 us, 2.1 sy, 0.0 ni, 85.4 id
local idle_percent="${BASH_REMATCH[2]}"
cpu_usage=$(echo "scale=1; 100 - $idle_percent" | bc 2>/dev/null || echo "0")
elif [[ "$top_output" =~ CPU:[[:space:]]*([0-9.]+)%[[:space:]]*us.*[[:space:]]+([0-9.]+)%[[:space:]]*id ]]; then
# 格式: CPU: 12.5% us, 2.1% sy, 85.4% id
local idle_percent="${BASH_REMATCH[2]}"
cpu_usage=$(echo "scale=1; 100 - $idle_percent" | bc 2>/dev/null || echo "0")
fi
fi
fi
# 方法3: 使用vmstat命令(备用方法)
if [[ "$cpu_usage" == "0" ]] && command_exists vmstat; then
local vmstat_output=$(timeout 3 vmstat 1 2 2>/dev/null | tail -1)
if [[ -n "$vmstat_output" ]]; then
local idle_percent=$(echo "$vmstat_output" | awk '{print $(NF-2)}' 2>/dev/null || echo "100")
if [[ "$idle_percent" =~ ^[0-9]+$ ]]; then
cpu_usage=$((100 - idle_percent))
fi
fi
fi
# 确保cpu_usage是有效的数字
cpu_usage=$(sanitize_number "$cpu_usage" "0")
# 获取负载平均值 - 多种方法
local load1="0" load5="0" load15="0"
if [[ -f /proc/loadavg ]]; then
local load_data=$(cat /proc/loadavg 2>/dev/null | awk '{print $1" "$2" "$3}' || echo "0 0 0")
read -r load1 load5 load15 <<< "$load_data"
elif command_exists uptime; then
# 尝试从uptime命令获取负载
local uptime_output=$(uptime 2>/dev/null)
if [[ "$uptime_output" =~ load[[:space:]]+average:[[:space:]]*([0-9.]+),[[:space:]]*([0-9.]+),[[:space:]]*([0-9.]+) ]]; then
load1="${BASH_REMATCH[1]}"
load5="${BASH_REMATCH[2]}"
load15="${BASH_REMATCH[3]}"
elif [[ "$uptime_output" =~ ([0-9.]+)[[:space:]]+([0-9.]+)[[:space:]]+([0-9.]+)$ ]]; then
load1="${BASH_REMATCH[1]}"
load5="${BASH_REMATCH[2]}"
load15="${BASH_REMATCH[3]}"
fi
fi
# 清理和验证每个负载值
load1=$(sanitize_number "$load1" "0")
load5=$(sanitize_number "$load5" "0")
load15=$(sanitize_number "$load15" "0")
cpu_load="$load1,$load5,$load15"
fi
echo "{\"usage_percent\":$cpu_usage,\"load_avg\":[$cpu_load]}"
}
# 获取内存使用情况
get_memory_usage() {
local total used free usage_percent
# FreeBSD系统
if [[ "$OS" == "FreeBSD" ]]; then
if command_exists sysctl; then
# FreeBSD内存信息
local page_size=$(sysctl -n hw.pagesize 2>/dev/null || echo "4096")
local total_pages=$(sysctl -n vm.stats.vm.v_page_count 2>/dev/null || echo "0")
local free_pages=$(sysctl -n vm.stats.vm.v_free_count 2>/dev/null || echo "0")
local inactive_pages=$(sysctl -n vm.stats.vm.v_inactive_count 2>/dev/null || echo "0")
local cache_pages=$(sysctl -n vm.stats.vm.v_cache_count 2>/dev/null || echo "0")
# 清理和验证数值
page_size=$(sanitize_integer "$page_size" "4096")
total_pages=$(sanitize_integer "$total_pages" "0")
free_pages=$(sanitize_integer "$free_pages" "0")
inactive_pages=$(sanitize_integer "$inactive_pages" "0")
cache_pages=$(sanitize_integer "$cache_pages" "0")
# 计算内存(转换为KB)
if [[ $page_size -gt 0 && $total_pages -gt 0 ]]; then
total=$(( (total_pages * page_size) / 1024 ))
free=$(( ((free_pages + inactive_pages + cache_pages) * page_size) / 1024 ))
used=$((total - free))
# 确保数值合理
if [[ $used -lt 0 ]]; then
used=0
fi
if [[ $free -lt 0 ]]; then
free=0
fi
else
total=0
used=0
free=0
fi
else
total=0
used=0
free=0
fi
else
# Linux系统 - 修复内存计算逻辑,确保 used + free = total
total=0
used=0
free=0
# 方法1: 使用free命令(最常用且最准确)
if command_exists free; then
local mem_info=$(free -k 2>/dev/null | grep "^Mem:")
if [[ -n "$mem_info" ]]; then
total=$(echo "$mem_info" | awk '{print $2}')
# 尝试获取available列(第7列,现代Linux系统)
local available=$(echo "$mem_info" | awk '{print $7}' 2>/dev/null || echo "")
if [[ "$available" =~ ^[0-9]+$ ]]; then
# 如果有available列,使用它作为真正的可用内存
free=$available
used=$((total - free))
else
# 如果没有available列,使用传统方法计算
local mem_free=$(echo "$mem_info" | awk '{print $4}' 2>/dev/null || echo "0")
local buff_cache=$(echo "$mem_info" | awk '{print $6}' 2>/dev/null || echo "0")
# 验证数据有效性
if [[ "$mem_free" =~ ^[0-9]+$ ]] && [[ "$buff_cache" =~ ^[0-9]+$ ]]; then
free=$((mem_free + buff_cache))
used=$((total - free))
else
# 如果解析失败,使用第3列作为used,但需要重新计算free
local raw_used=$(echo "$mem_info" | awk '{print $3}' 2>/dev/null || echo "0")
if [[ "$raw_used" =~ ^[0-9]+$ ]]; then
used=$raw_used
free=$((total - used))
fi
fi
fi
fi
fi
# 方法2: 直接读取/proc/meminfo(备用方法)
if [[ "$total" == "0" ]] && [[ -f /proc/meminfo ]]; then
total=$(grep "^MemTotal:" /proc/meminfo | awk '{print $2}' 2>/dev/null || echo "0")
local mem_free=$(grep "^MemFree:" /proc/meminfo | awk '{print $2}' 2>/dev/null || echo "0")
local buffers=$(grep "^Buffers:" /proc/meminfo | awk '{print $2}' 2>/dev/null || echo "0")
local cached=$(grep "^Cached:" /proc/meminfo | awk '{print $2}' 2>/dev/null || echo "0")
local sreclaimable=$(grep "^SReclaimable:" /proc/meminfo | awk '{print $2}' 2>/dev/null || echo "0")
# 计算实际可用内存(包括可回收的内存)
free=$((mem_free + buffers + cached + sreclaimable))
used=$((total - free))
fi
# 方法3: 容器环境特殊处理 (Cgroup V1 & V2)
# 即使没有CONTAINER_ENV变量,如果检测到cgroup限制且限制合理,也优先使用
local cgroup_limit="0"
local cgroup_usage="0"
local cgroup_found=false
# 尝试检测 Cgroup V2
if [[ -f /sys/fs/cgroup/memory.max ]]; then
local max_raw=$(cat /sys/fs/cgroup/memory.max 2>/dev/null)
if [[ "$max_raw" != "max" && "$max_raw" =~ ^[0-9]+$ ]]; then
cgroup_limit="$max_raw"
if [[ -f /sys/fs/cgroup/memory.current ]]; then
cgroup_usage=$(cat /sys/fs/cgroup/memory.current 2>/dev/null || echo "0")
cgroup_found=true
fi
fi
fi
# 尝试检测 Cgroup V1 (如果V2没找到)
if [[ "$cgroup_found" == "false" && -f /sys/fs/cgroup/memory/memory.limit_in_bytes ]]; then
local limit_raw=$(cat /sys/fs/cgroup/memory/memory.limit_in_bytes 2>/dev/null || echo "0")
# 忽略极大的值 (未限制)
if [[ "$limit_raw" =~ ^[0-9]+$ && "$limit_raw" -lt 9223372036854771712 ]]; then
cgroup_limit="$limit_raw"
cgroup_usage=$(cat /sys/fs/cgroup/memory/memory.usage_in_bytes 2>/dev/null || echo "0")
cgroup_found=true
fi
fi
# 如果找到了有效的cgroup限制,并且限制值小于宿主机物理内存(total),则使用cgroup数据
# 或者如果total是0(上面获取失败),直接使用cgroup数据
if [[ "$cgroup_found" == "true" ]]; then
local cgroup_total_kb=$((cgroup_limit / 1024))
# 只有当cgroup限制明显即使有效(例如小于宿主机内存,或者我们确定是在容器里)时才使用
# 这里如果不确定宿主机内存,或者cgroup限制小于宿主机内存,就采用
if [[ "$total" == "0" || "$cgroup_total_kb" -lt "$total" ]]; then
total=$cgroup_total_kb
used=$((cgroup_usage / 1024))
free=$((total - used))
fi
fi
# 确保所有值都是有效数字
total=$(sanitize_integer "$total" "0")
used=$(sanitize_integer "$used" "0")
free=$(sanitize_integer "$free" "0")
# 数据一致性验证和修正 - 优化版本
if [[ $total -gt 0 ]]; then
# 确保所有值都是有效数字
total=$(sanitize_integer "$total" "0")
used=$(sanitize_integer "$used" "0")
free=$(sanitize_integer "$free" "0")
# 确保 used + free = total 的一致性
local sum=$((used + free))
local diff=$((sum - total))
# 如果差异超过1%,说明数据有问题,需要修正
local tolerance=$((total / 100))
if [[ $tolerance -lt 1024 ]]; then
tolerance=1024 # 最小容差1MB
fi
if [[ ${diff#-} -gt $tolerance ]]; then
# 数据不一致,优先保证total的准确性
if [[ $free -gt $total ]]; then
# free过大,重置为total
free=$total
used=0
elif [[ $used -gt $total ]]; then
# used过大,重置
used=$total
free=0
else
# 重新计算used,保证一致性
used=$((total - free))
fi
# 最终安全检查
if [[ $used -lt 0 ]]; then
used=0
free=$total
fi
if [[ $free -lt 0 ]]; then
free=0
used=$total
fi
fi
else
# 如果没有获取到数据,设置默认值
total=0
used=0
free=0
fi
fi
# 计算使用百分比
if [[ $total -gt 0 ]]; then
usage_percent=$(echo "scale=1; $used * 100 / $total" | bc 2>/dev/null || echo "0")
# 确保usage_percent是有效的数字
if ! [[ "$usage_percent" =~ ^[0-9]+\.?[0-9]*$ ]]; then
usage_percent="0"
fi
else
usage_percent="0"
fi
echo "{\"total\":$total,\"used\":$used,\"free\":$free,\"usage_percent\":$usage_percent}"
}
# 获取磁盘使用情况
get_disk_usage() {
local total used free usage_percent
# 多种方法获取磁盘信息,提高兼容性
if command_exists df; then
# 使用-k参数确保输出单位一致(KB)
local disk_info=$(df -k / 2>/dev/null | tail -1)
if [[ -n "$disk_info" ]]; then
# 从KB转换为GB,使用awk进行更安全的计算
total=$(echo "$disk_info" | awk '{printf "%.2f", $2 / 1024 / 1024}' 2>/dev/null || echo "0")
used=$(echo "$disk_info" | awk '{printf "%.2f", $3 / 1024 / 1024}' 2>/dev/null || echo "0")
free=$(echo "$disk_info" | awk '{printf "%.2f", $4 / 1024 / 1024}' 2>/dev/null || echo "0")
usage_percent=$(echo "$disk_info" | awk '{print $5}' | tr -d '%' 2>/dev/null || echo "0")
# 验证数据有效性
total=$(sanitize_number "$total" "0")
used=$(sanitize_number "$used" "0")
free=$(sanitize_number "$free" "0")
usage_percent=$(sanitize_integer "$usage_percent" "0")
else
total="0"
used="0"
free="0"
usage_percent="0"
fi
else
# 如果df不可用,尝试其他方法
total="0"
used="0"
free="0"
usage_percent="0"
fi
# 容器环境特殊处理
if [[ "${CONTAINER_ENV:-false}" == "true" && "$total" == "0" ]]; then
# 在容器中,尝试获取当前目录的磁盘使用情况
if command_exists df; then
local container_disk=$(df -k . 2>/dev/null | tail -1)
if [[ -n "$container_disk" ]]; then
total=$(echo "$container_disk" | awk '{printf "%.2f", $2 / 1024 / 1024}' 2>/dev/null || echo "0")
used=$(echo "$container_disk" | awk '{printf "%.2f", $3 / 1024 / 1024}' 2>/dev/null || echo "0")
free=$(echo "$container_disk" | awk '{printf "%.2f", $4 / 1024 / 1024}' 2>/dev/null || echo "0")
usage_percent=$(echo "$container_disk" | awk '{print $5}' | tr -d '%' 2>/dev/null || echo "0")
total=$(sanitize_number "$total" "0")
used=$(sanitize_number "$used" "0")
free=$(sanitize_number "$free" "0")
usage_percent=$(sanitize_integer "$usage_percent" "0")
fi
fi
fi
echo "{\"total\":$total,\"used\":$used,\"free\":$free,\"usage_percent\":$usage_percent}"
}
# 获取网络使用情况
get_network_usage() {
local upload_speed=0
local download_speed=0
local total_upload=0
local total_download=0
# FreeBSD系统
if [[ "$OS" == "FreeBSD" ]]; then
# 获取默认网络接口
local interface=""
# FreeBSD使用不同的route命令格式
if command_exists route; then
# 获取默认路由的接口
interface=$(route -n get default 2>/dev/null | grep 'interface:' | awk '{print $2}')
fi
# 如果没有找到,尝试查找活跃接口
if [[ -z "$interface" ]] && command_exists netstat; then
# 尝试1: 查找有流量的接口(排除lo)
interface=$(netstat -i -b | awk 'NR>1 && $1 !~ /^lo/ && ($8 > 0 || $11 > 0) {print $1; exit}')
# 尝试2: 如果没有找到活跃接口,使用第一个非lo接口
if [[ -z "$interface" ]]; then
interface=$(netstat -i -b | awk 'NR>1 && $1 !~ /^lo/ {print $1; exit}')
fi
fi
# 如果还是没找到,使用第一个非lo接口
if [[ -z "$interface" ]] && command_exists ifconfig; then
interface=$(ifconfig -l | tr ' ' '\n' | grep -v '^lo' | head -1)
fi
if [[ -n "$interface" ]] && command_exists netstat; then
# 使用netstat获取网络统计
# FreeBSD netstat -i -b 输出格式:
# Name Mtu Network Address Ipkts Ierrs Idrop Ibytes Opkts Oerrs Obytes Coll
# 同一接口可能有多行,我们只取第一行(Link层的统计)
local net_stats=$(netstat -i -b 2>/dev/null | grep "^$interface" | grep "/dev/null || echo "")
if [[ -n "$net_stats" ]]; then
local raw_download=$(echo "$net_stats" | awk '{print $8}' 2>/dev/null || echo "0") # Ibytes
local raw_upload=$(echo "$net_stats" | awk '{print $11}' 2>/dev/null || echo "0") # Obytes
# 清理和验证数值
total_download=$(sanitize_integer "$raw_download" "0")
total_upload=$(sanitize_integer "$raw_upload" "0")
else
# 如果没有找到Link统计,尝试其他方法
local net_stats_alt=$(netstat -i -b 2>/dev/null | grep "^$interface" | head -1 2>/dev/null || echo "")
if [[ -n "$net_stats_alt" ]]; then
local raw_download=$(echo "$net_stats_alt" | awk '{print $8}' 2>/dev/null || echo "0")
local raw_upload=$(echo "$net_stats_alt" | awk '{print $11}' 2>/dev/null || echo "0")
total_download=$(sanitize_integer "$raw_download" "0")
total_upload=$(sanitize_integer "$raw_upload" "0")
fi
fi
# 计算速度(简单方法)
local speed_file="${TMPDIR:-/tmp}/vps_monitor_net_${interface}_$(whoami)"
mkdir -p "$(dirname "$speed_file")" 2>/dev/null
local current_time=$(date +%s)
if [[ -f "$speed_file" ]]; then
local last_data=$(cat "$speed_file")
local last_time=$(echo "$last_data" | cut -d' ' -f1)
local last_rx=$(echo "$last_data" | cut -d' ' -f2)
local last_tx=$(echo "$last_data" | cut -d' ' -f3)
local time_diff=$((current_time - last_time))
if [[ $time_diff -gt 0 ]]; then
download_speed=$(( (total_download - last_rx) / time_diff ))
upload_speed=$(( (total_upload - last_tx) / time_diff ))
# 确保速度不为负数
[[ $download_speed -lt 0 ]] && download_speed=0
[[ $upload_speed -lt 0 ]] && upload_speed=0
fi
fi
# 保存当前数据供下次使用
echo "$current_time $total_download $total_upload" > "$speed_file"
fi
else
# Linux系统
# 获取默认网络接口 - 多种方法提高兼容性
local interface=""
# 方法1: 使用ip命令(现代Linux)
if command_exists ip; then
interface=$(ip route show default 2>/dev/null | awk '/default/ {print $5}' | head -1)
fi
# 方法2: 使用route命令(传统方法)
if [[ -z "$interface" ]] && command_exists route; then
interface=$(route -n 2>/dev/null | awk '/^0.0.0.0/ {print $8}' | head -1)
fi
# 方法3: 检查/proc/net/route(直接读取内核路由表)
if [[ -z "$interface" && -f "/proc/net/route" ]]; then
interface=$(awk '/^[^I]/ && $2 == "00000000" {print $1; exit}' /proc/net/route 2>/dev/null)
fi
# 方法4: 查找活跃的网络接口(改进版)
if [[ -z "$interface" && -f "/proc/net/dev" ]]; then
# 查找有流量的接口(排除lo和虚拟接口)
# 查找有流量的接口(排除lo和虚拟接口)
interface=$(awk '/^ *[^:]*:/ {
gsub(/:/, "", $1)
# 排除回环和虚拟接口
if ($1 != "lo" && $1 !~ /^(docker|br-|veth|tun|tap|virbr|vmnet)/) {
# 只要不是排除的接口,就认为是活动接口,不设置流量阈值
print $1
exit
}
}' /proc/net/dev)
fi
# 方法5: 如果还是没找到,使用第一个物理网络接口
if [[ -z "$interface" && -f "/proc/net/dev" ]]; then
# 优先选择常见的物理接口名称
for prefix in eth ens enp eno wlan wlp; do
interface=$(awk -v prefix="$prefix" '/^ *[^:]*:/ {
gsub(/:/, "", $1)
if ($1 ~ "^" prefix) {
print $1
exit
}
}' /proc/net/dev)
if [[ -n "$interface" ]]; then
break
fi
done
fi
# 方法6: 最后的备选方案
if [[ -z "$interface" && -f "/proc/net/dev" ]]; then
interface=$(awk '/^ *[^:]*:/ {
gsub(/:/, "", $1)
if ($1 != "lo" && $1 !~ /^(docker|br-|veth|tun|tap|virbr|vmnet)/) {
print $1
exit
}
}' /proc/net/dev)
fi
if [[ -n "$interface" && -f "/proc/net/dev" ]]; then
# 获取总流量
local net_line=$(grep "^ *$interface:" /proc/net/dev 2>/dev/null)
if [[ -n "$net_line" ]]; then
# 解析网络统计数据
# 格式: interface: bytes packets errs drop fifo frame compressed multicast
local stats=($net_line)
total_download=${stats[1]} # 接收字节数
total_upload=${stats[9]} # 发送字节数
# 确保是数字
if ! [[ "$total_download" =~ ^[0-9]+$ ]]; then
total_download=0
fi
if ! [[ "$total_upload" =~ ^[0-9]+$ ]]; then
total_upload=0
fi
fi
# 尝试获取实时速度
if command_exists ifstat && [[ -n "$interface" ]]; then
# 使用ifstat获取实时速度
local network_speed=$(timeout 3 ifstat -i "$interface" 1 1 2>/dev/null | tail -1)
if [[ -n "$network_speed" && "$network_speed" != *"no statistics"* ]]; then
download_speed=$(echo "$network_speed" | awk '{printf "%.0f", $1 * 1024}' 2>/dev/null || echo "0")
upload_speed=$(echo "$network_speed" | awk '{printf "%.0f", $2 * 1024}' 2>/dev/null || echo "0")
fi
else
# 如果没有ifstat,使用简单的方法计算速度
local speed_file="${TMPDIR:-/tmp}/vps_monitor_net_${interface}_$(whoami)"
mkdir -p "$(dirname "$speed_file")" 2>/dev/null
local current_time=$(date +%s)
if [[ -f "$speed_file" ]]; then
local last_data=$(cat "$speed_file")
local last_time=$(echo "$last_data" | cut -d' ' -f1)
local last_rx=$(echo "$last_data" | cut -d' ' -f2)
local last_tx=$(echo "$last_data" | cut -d' ' -f3)
local time_diff=$((current_time - last_time))
if [[ $time_diff -gt 0 ]]; then
download_speed=$(( (total_download - last_rx) / time_diff ))
upload_speed=$(( (total_upload - last_tx) / time_diff ))
fi
fi
# 保存当前数据供下次使用
echo "$current_time $total_download $total_upload" > "$speed_file"
fi
fi
fi
# 确保所有值都是数字
[[ "$upload_speed" =~ ^[0-9]+$ ]] || upload_speed=0
[[ "$download_speed" =~ ^[0-9]+$ ]] || download_speed=0
[[ "$total_upload" =~ ^[0-9]+$ ]] || total_upload=0
[[ "$total_download" =~ ^[0-9]+$ ]] || total_download=0
echo "{\"upload_speed\":$upload_speed,\"download_speed\":$download_speed,\"total_upload\":$total_upload,\"total_download\":$total_download}"
}
# 获取系统运行时间
get_uptime() {
local uptime_seconds=0
# FreeBSD系统
if [[ "$OS" == "FreeBSD" ]]; then
if command_exists sysctl; then
# FreeBSD使用sysctl获取启动时间
local boot_time_raw=$(sysctl -n kern.boottime 2>/dev/null | awk '{print $4}' | tr -d ',' 2>/dev/null || echo "0")
local boot_time=$(sanitize_integer "$boot_time_raw" "0")
local current_time=$(date +%s)
if [[ $boot_time -gt 0 && $current_time -gt $boot_time ]]; then
uptime_seconds=$((current_time - boot_time))
else
# 如果无法获取启动时间,尝试其他方法
if command_exists uptime; then
# 尝试解析uptime命令输出
local uptime_str=$(uptime 2>/dev/null | grep -o 'up [^,]*' | sed 's/up //' || echo "0")
# 简化处理,假设格式为 "X days" 或 "X:Y"
if [[ "$uptime_str" =~ ([0-9]+).*day ]]; then
uptime_seconds=$((${BASH_REMATCH[1]} * 86400))
else
uptime_seconds=0
fi
else
uptime_seconds=0
fi
fi
else
uptime_seconds=0
fi
else
# Linux系统
if [[ -f /proc/uptime ]]; then
uptime_seconds=$(cut -d. -f1 /proc/uptime)
elif command_exists uptime; then
# 解析uptime命令输出
local uptime_str=$(uptime | awk '{print $3}')
# 这里简化处理,实际可能需要更复杂的解析
uptime_seconds=$(echo "$uptime_str" | sed 's/,//' | awk '{print $1 * 86400}' 2>/dev/null || echo "0")
fi
fi
echo "$uptime_seconds"
}
# 验证和清理数值
sanitize_number() {
local value="$1"
local default_value="${2:-0}"
value=$(echo "$value" | sed 's/[^0-9.]//g')
if [[ "$value" =~ ^[0-9]*\.?[0-9]+$ ]] || [[ "$value" =~ ^[0-9]+\.?[0-9]*$ ]]; then
[[ "$value" =~ ^\. ]] && value="0$value"
[[ "$value" =~ \.$ ]] && value="${value}0"
echo "$value"
else
echo "$default_value"
fi
}
# 验证和清理整数
sanitize_integer() {
local value="$1"
local default_value="${2:-0}"
value=$(echo "$value" | sed 's/[^0-9]//g')
[[ "$value" =~ ^[0-9]+$ ]] && echo "$value" || echo "$default_value"
}
# 清理JSON字符串
clean_json_string() {
local input="$1"
# 移除可能的控制字符和非打印字符
echo "$input" | tr -d '\000-\037' | tr -d '\177-\377'
}
# 获取服务器配置(带简单重试)
get_config() {
local max_attempts=3
local attempt=1
while [[ $attempt -le $max_attempts ]]; do
log "正在获取服务器配置... (第 $attempt/$max_attempts 次)"
local response=$(curl -s -w "%{http_code}" -X GET "$WORKER_URL/api/config/$SERVER_ID" \
-H "X-API-Key: $API_KEY" 2>/dev/null || echo "000")
local http_code="${response: -3}"
local response_body="${response%???}"
if [[ "$http_code" == "200" ]]; then
log "配置获取成功"
# 简化的间隔解析
local new_interval=$(echo "$response_body" | sed -n 's/.*"interval":\([0-9]\+\).*/\1/p')
if [[ -n "$new_interval" && "$new_interval" =~ ^[0-9]+$ && "$new_interval" -gt 0 ]]; then
if [[ "$new_interval" != "$INTERVAL" ]]; then
log "服务器返回新的上报间隔: ${new_interval}秒 (当前: ${INTERVAL}秒)"
INTERVAL="$new_interval"
save_config
log "上报间隔已更新为: ${INTERVAL}秒"
fi
fi
save_config_cache "$response_body"
return 0
else
log "配置获取失败 (HTTP $http_code)"
case "$http_code" in
"401") log "认证失败 - 请检查API密钥" ;;
"404") log "服务器不存在 - 请检查服务器ID" ;;
"000") log "网络连接失败" ;;
esac
if [[ $attempt -lt $max_attempts ]]; then
log "等待2秒后重试..."
sleep 2
fi
fi
attempt=$((attempt + 1))
done
log "配置获取最终失败"
return 1
}
# 本地缓存管理
CACHE_DIR="$SCRIPT_DIR/cache"
CONFIG_CACHE_FILE="$CACHE_DIR/config.json"
METRICS_CACHE_FILE="$CACHE_DIR/last_metrics.json"
# 初始化缓存目录
init_cache() {
if [[ ! -d "$CACHE_DIR" ]]; then
mkdir -p "$CACHE_DIR"
log "创建缓存目录: $CACHE_DIR"
fi
}
# 保存配置到缓存
save_config_cache() {
local config_data="$1"
init_cache
echo "$config_data" > "$CONFIG_CACHE_FILE"
log "配置已缓存到本地"
}
# 从缓存加载配置
load_config_cache() {
if [[ -f "$CONFIG_CACHE_FILE" ]]; then
local cached_config=$(cat "$CONFIG_CACHE_FILE")
if [[ -n "$cached_config" ]]; then
log "从缓存加载配置"
echo "$cached_config"
return 0
fi
fi
return 1
}
# 上报监控数据
report_metrics() {
local timestamp=$(date +%s)
local cpu_raw=$(get_cpu_usage)
local memory_raw=$(get_memory_usage)
local disk_raw=$(get_disk_usage)
local network_raw=$(get_network_usage)
local uptime_raw=$(get_uptime)
# 验证运行时间
local uptime=$(sanitize_integer "$uptime_raw" "0")
# 清理JSON数据
cpu_raw=$(clean_json_string "$cpu_raw")
memory_raw=$(clean_json_string "$memory_raw")
disk_raw=$(clean_json_string "$disk_raw")
network_raw=$(clean_json_string "$network_raw")
# 简单验证JSON格式
[[ ! "$cpu_raw" =~ ^\{.*\}$ ]] && cpu_raw='{"usage_percent":0,"load_avg":[0,0,0]}'
[[ ! "$memory_raw" =~ ^\{.*\}$ ]] && memory_raw='{"total":0,"used":0,"free":0,"usage_percent":0}'
[[ ! "$disk_raw" =~ ^\{.*\}$ ]] && disk_raw='{"total":0,"used":0,"free":0,"usage_percent":0}'
[[ ! "$network_raw" =~ ^\{.*\}$ ]] && network_raw='{"upload_speed":0,"download_speed":0,"total_upload":0,"total_download":0}'
# 构建JSON数据
local data="{\"timestamp\":$timestamp,\"cpu\":$cpu_raw,\"memory\":$memory_raw,\"disk\":$disk_raw,\"network\":$network_raw,\"uptime\":$uptime}"
# 确保API KEY和ID没有多余的空格或换行
local clean_api_key=$(echo "$API_KEY" | tr -d ' \n\r')
local clean_server_id=$(echo "$SERVER_ID" | tr -d ' \n\r')
log "正在上报数据到 $WORKER_URL/api/report/$clean_server_id"
local response=$(curl -s -w "%{http_code}" -X POST "$WORKER_URL/api/report/$clean_server_id" \
-H "Content-Type: application/json" \
-H "X-API-Key: $clean_api_key" \
-d "$data" 2>/dev/null || echo "000")
local http_code="${response: -3}"
local response_body="${response%???}"
if [[ "$http_code" == "200" ]]; then
log "数据上报成功"
# 尝试从响应中解析新的间隔设置
if command_exists jq; then
# 如果有jq命令,使用jq解析
local new_interval=$(echo "$response_body" | jq -r '.interval // empty' 2>/dev/null)
if [[ -n "$new_interval" && "$new_interval" =~ ^[0-9]+$ && "$new_interval" -gt 0 ]]; then
if [[ "$new_interval" != "$INTERVAL" ]]; then
log "服务器返回新的上报间隔: ${new_interval}秒 (当前: ${INTERVAL}秒)"
INTERVAL="$new_interval"
# 更新配置文件
save_config
log "上报间隔已更新为: ${INTERVAL}秒"
# 创建重启标记文件,让主循环重启服务以应用新间隔
touch "$SCRIPT_DIR/restart_needed"
fi
fi
else
# 如果没有jq,使用简单的文本解析
local new_interval=$(echo "$response_body" | sed -n 's/.*"interval":\([0-9]\+\).*/\1/p')
if [[ -n "$new_interval" && "$new_interval" =~ ^[0-9]+$ && "$new_interval" -gt 0 ]]; then
if [[ "$new_interval" != "$INTERVAL" ]]; then
log "服务器返回新的上报间隔: ${new_interval}秒 (当前: ${INTERVAL}秒)"
INTERVAL="$new_interval"
# 更新配置文件
save_config
log "上报间隔已更新为: ${INTERVAL}秒"
# 创建重启标记文件,让主循环重启服务以应用新间隔
touch "$SCRIPT_DIR/restart_needed"
fi
fi
fi
return 0
else
# 错误分类处理
case "$http_code" in
"400"|"413")
log "数据上报失败 (HTTP $http_code): 数据格式或大小问题"
return 1 # 不可重试的错误
;;
"401"|"403")
log "数据上报失败 (HTTP $http_code): 认证失败"
return 1 # 不可重试的错误
;;
"404")
log "数据上报失败 (HTTP $http_code): 服务器不存在"
return 1 # 不可重试的错误
;;
"429"|"500"|"502"|"503"|"504"|"000")
log "数据上报失败 (HTTP $http_code): 可重试的错误"
return 2 # 可重试的错误
;;
*)
log "数据上报失败 (HTTP $http_code): 未知错误"
return 1 # 默认不可重试
;;
esac
fi
}
# 创建监控服务脚本
create_service_script() {
# 获取当前脚本的绝对路径
local main_script_path=$(realpath "$0")
cat > "$SERVICE_FILE" << EOF
#!/bin/bash
# cf-vps-monitor服务脚本 - 集中式文件管理
SCRIPT_DIR="$SCRIPT_DIR"
CONFIG_FILE="\$SCRIPT_DIR/config/config"
LOG_FILE="\$SCRIPT_DIR/logs/monitor.log"
PID_FILE="\$SCRIPT_DIR/run/monitor.pid"
MAIN_SCRIPT="$main_script_path"
# 设置服务模式标志(避免日志重复)
export SERVICE_MODE=true
# 确保日志目录存在
mkdir -p "\$(dirname "\$LOG_FILE")" 2>/dev/null
# 加载配置
if [[ -f "\$CONFIG_FILE" ]]; then
source "\$CONFIG_FILE"
else
echo "配置文件不存在: \$CONFIG_FILE"
exit 1
fi
# 从主脚本加载监控函数(简化版)
source_monitoring_functions() {
# 直接source主脚本,但设置标志避免执行主程序
if [[ -f "\$MAIN_SCRIPT" ]]; then
# 设置标志表示只加载函数
export FUNCTIONS_ONLY=true
source "\$MAIN_SCRIPT"
unset FUNCTIONS_ONLY
else
log "错误: 找不到主脚本 \$MAIN_SCRIPT"
exit 1
fi
}
# 加载监控函数
source_monitoring_functions
# 清理JSON字符串
clean_json_string() {
local input="\$1"
# 移除可能的控制字符和非打印字符
echo "\$input" | tr -d '\\000-\\037' | tr -d '\\177-\\377'
}
# 上报监控数据
report_metrics() {
local timestamp=\$(date +%s)
local cpu_raw=\$(get_cpu_usage)
local memory_raw=\$(get_memory_usage)
local disk_raw=\$(get_disk_usage)
local network_raw=\$(get_network_usage)
local uptime_raw=\$(get_uptime)
# 验证运行时间
local uptime=\$(sanitize_integer "\$uptime_raw" "0")
# 清理JSON数据
cpu_raw=\$(clean_json_string "\$cpu_raw")
memory_raw=\$(clean_json_string "\$memory_raw")
disk_raw=\$(clean_json_string "\$disk_raw")
network_raw=\$(clean_json_string "\$network_raw")
# 验证各个JSON组件(使用更宽松的验证)
if [[ -z "\$cpu_raw" || "\$cpu_raw" == "{}" || ! "\$cpu_raw" =~ ^\{.*\}\$ ]]; then
cpu_raw='{\\"usage_percent\\":0,\\"load_avg\\":[0,0,0]}'
fi
if [[ -z "\$memory_raw" || "\$memory_raw" == "{}" || ! "\$memory_raw" =~ ^\{.*\}\$ ]]; then
memory_raw='{\\"total\\":0,\\"used\\":0,\\"free\\":0,\\"usage_percent\\":0}'
fi
if [[ -z "\$disk_raw" || "\$disk_raw" == "{}" || ! "\$disk_raw" =~ ^\{.*\}\$ ]]; then
disk_raw='{\\"total\\":0,\\"used\\":0,\\"free\\":0,\\"usage_percent\\":0}'
fi
if [[ -z "\$network_raw" || "\$network_raw" == "{}" || ! "\$network_raw" =~ ^\{.*\}\$ ]]; then
network_raw='{\\"upload_speed\\":0,\\"download_speed\\":0,\\"total_upload\\":0,\\"total_download\\":0}'
fi
# 构建JSON数据
local data="{\\"timestamp\\":\$timestamp,\\"cpu\\":\$cpu_raw,\\"memory\\":\$memory_raw,\\"disk\\":\$disk_raw,\\"network\\":\$network_raw,\\"uptime\\":\$uptime}"
# 确保API KEY和ID没有多余的空格或换行
local clean_api_key=\$(echo "\$API_KEY" | tr -d ' \\n\\r')
local clean_server_id=\$(echo "\$SERVER_ID" | tr -d ' \\n\\r')
log "正在上报数据..."
local response=\$(curl -s -w "%{http_code}" -X POST "\$WORKER_URL/api/report/\$clean_server_id" \\
-H "Content-Type: application/json" \\
-H "X-API-Key: \$clean_api_key" \\
-d "\$data" 2>/dev/null || echo "000")
local http_code="\${response: -3}"
local response_body="\${response%???}"
if [[ "\$http_code" == "200" ]]; then
log "数据上报成功"
# 尝试从响应中解析新的间隔设置
# 使用简单的文本解析(避免依赖jq)
local new_interval=\$(echo "\$response_body" | sed -n 's/.*"interval":\\([0-9]\\+\\).*/\\1/p')
if [[ -n "\$new_interval" && "\$new_interval" =~ ^[0-9]+\$ && "\$new_interval" -gt 0 ]]; then
if [[ "\$new_interval" != "\$INTERVAL" ]]; then
log "服务器返回新的上报间隔: \${new_interval}秒 (当前: \${INTERVAL}秒)"
INTERVAL="\$new_interval"
# 更新配置文件
cat > "\$CONFIG_FILE" << EOL
# VPS监控配置文件
WORKER_URL="\$WORKER_URL"
SERVER_ID="\$SERVER_ID"
API_KEY="\$API_KEY"
INTERVAL="\$INTERVAL"
EOL
log "上报间隔已更新为: \${INTERVAL}秒"
# 创建重启标记文件,让主循环重新加载配置
touch "\$SCRIPT_DIR/restart_needed"
fi
fi
return 0
else
log "数据上报失败 (HTTP \$http_code): \$response_body"
# 简化的错误处理
case "\$http_code" in
"400") log "数据格式错误" ;;
"401") log "认证失败 - 请检查API密钥" ;;
"404") log "服务器不存在 - 请检查服务器ID" ;;
"429") log "请求过于频繁 - 将自动重试" ;;
"500"|"503") log "服务器错误 - 将在下个周期重试" ;;
"000") log "网络连接失败" ;;
*) log "未知错误 (HTTP \$http_code)" ;;
esac
return 1
fi
}
# 获取服务器配置
get_config() {
log "正在获取服务器配置..."
local response=\$(curl -s -w "%{http_code}" -X GET "\$WORKER_URL/api/config/\$SERVER_ID" \\
-H "X-API-Key: \$API_KEY" 2>/dev/null || echo "000")
local http_code="\${response: -3}"
local response_body="\${response%???}"
if [[ "\$http_code" == "200" ]]; then
log "配置获取成功"
# 尝试解析配置
local new_interval=""
# 使用改进的文本解析(避免依赖jq)
# 方法1: 使用grep + cut
new_interval=\$(echo "\$response_body" | grep -o '"report_interval":[0-9]*' | cut -d':' -f2 2>/dev/null)
# 方法2: 如果方法1失败,使用awk备用方案
if [[ -z "\$new_interval" ]]; then
new_interval=\$(echo "\$response_body" | awk -F'"report_interval":' '{if(NF>1) print \$2}' | awk -F',' '{print \$1}' | tr -d ' ' 2>/dev/null)
fi
# 验证并更新间隔设置
if [[ -n "\$new_interval" && "\$new_interval" =~ ^[0-9]+\$ && "\$new_interval" -gt 0 ]]; then
if [[ "\$new_interval" != "\$INTERVAL" ]]; then
log "检测到新的上报间隔: \${new_interval}秒 (当前: \${INTERVAL}秒)"
INTERVAL="\$new_interval"
# 更新配置文件
cat > "\$CONFIG_FILE" << EOL
# VPS监控配置文件
WORKER_URL="\$WORKER_URL"
SERVER_ID="\$SERVER_ID"
API_KEY="\$API_KEY"
INTERVAL="\$INTERVAL"
EOL
log "上报间隔已更新为: \${INTERVAL}秒"
return 0
else
log "配置无变化,当前间隔: \${INTERVAL}秒"
return 0
fi
else
log "警告: 无法解析配置中的上报间隔,保持当前设置"
return 1
fi
else
log "配置获取失败 (HTTP \$http_code): \$response_body"
# 简化的错误处理
case "\$http_code" in
"401") log "认证失败 - 请检查API密钥" ;;
"404") log "服务器不存在 - 请检查服务器ID" ;;
"429") log "请求过于频繁 - 将稍后重试" ;;
"500"|"503") log "服务器错误 - 将稍后重试" ;;
"000") log "网络连接失败" ;;
*) log "未知错误 (HTTP \$http_code)" ;;
esac
return 1
fi
}
# 主循环
main() {
log "VPS监控服务启动 (PID: \$\$)"
echo \$\$ > "\$PID_FILE"
# 信号处理
trap 'log "收到终止信号,正在停止..."; rm -f "\$PID_FILE"; exit 0' TERM INT
# 启动时获取一次配置
log "启动时获取服务器配置..."
get_config || log "启动时配置获取失败,使用当前配置"
local config_check_counter=0
local config_check_interval=10 # 每10个周期检查一次配置(约10分钟)
while true; do
# 定期检查配置更新
if [[ \$config_check_counter -ge \$config_check_interval ]]; then
log "定期检查配置更新..."
get_config || log "配置检查失败,继续使用当前配置"
config_check_counter=0
else
config_check_counter=\$((config_check_counter + 1))
fi
if ! report_metrics; then
log "上报失败,将在下个周期重试"
fi
# 检查是否需要重启以应用新的间隔设置
if [[ -f "\$SCRIPT_DIR/restart_needed" ]]; then
log "检测到间隔设置变更,正在重新加载配置..."
rm -f "\$SCRIPT_DIR/restart_needed"
# 重新加载配置
if [[ -f "\$CONFIG_FILE" ]]; then
source "\$CONFIG_FILE"
log "已重新加载配置,新的上报间隔: \${INTERVAL}秒"
fi
fi
sleep "\$INTERVAL"
done
}
# 启动主函数
main
EOF
chmod +x "$SERVICE_FILE"
print_message "$GREEN" "监控服务脚本创建完成: $SERVICE_FILE"
}
# ==================== 用户类型检测和systemd命令选择机制 ====================
# 检测当前用户类型
detect_user_type() {
if [[ $EUID -eq 0 ]]; then
echo "root"
else
echo "user"
fi
}
# 检查是否为root用户
is_root_user() {
[[ $EUID -eq 0 ]]
}
# 检查systemd服务可用性(根据用户类型)
check_systemd_availability() {
if ! command_exists systemctl; then
return 1
fi
if is_root_user; then
# root用户检查系统级systemd
systemctl status >/dev/null 2>&1
else
# 普通用户检查用户级systemd
systemctl --user status >/dev/null 2>&1
fi
}
# 创建systemd服务(兼容版)
create_systemd_service() {
print_message "$BLUE" "配置systemd服务..."
# 检查systemd可用性
if ! is_user_systemd_available; then
print_message "$YELLOW" "systemd不可用,跳过systemd服务配置"
return 1
fi
# 确定服务文件路径
local service_path
if is_root_user; then
service_path="/etc/systemd/system/cf-vps-monitor.service"
else
service_path="$HOME/.config/systemd/user/cf-vps-monitor.service"
mkdir -p "$(dirname "$service_path")"
fi
# 生成服务文件内容
print_message "$CYAN" " 生成systemd服务文件: $service_path"
# 创建服务文件模板目录
mkdir -p "$SCRIPT_DIR/system/templates"
# 生成服务文件内容
if is_root_user; then
cat > "$SCRIPT_DIR/system/templates/systemd.service" << EOF
[Unit]
Description=cf-vps-monitor Service - VPS Monitoring Agent
Documentation=https://github.com/kadidalax/cf-vps-monitor
After=network.target network-online.target
Wants=network-online.target
[Service]
Type=simple
ExecStart=$SERVICE_FILE
Restart=always
RestartSec=10
User=root
Group=root
WorkingDirectory=$SCRIPT_DIR
[Install]
WantedBy=multi-user.target
EOF
else
cat > "$SCRIPT_DIR/system/templates/systemd.service" << EOF
[Unit]
Description=cf-vps-monitor Service - VPS Monitoring Agent
Documentation=https://github.com/kadidalax/cf-vps-monitor
After=network.target network-online.target
Wants=network-online.target
[Service]
Type=simple
ExecStart=$SERVICE_FILE
Restart=always
RestartSec=10
WorkingDirectory=$SCRIPT_DIR
[Install]
WantedBy=default.target
EOF
fi
# 复制模板到系统位置
cp "$SCRIPT_DIR/system/templates/systemd.service" "$service_path"
# 记录到安装清单
record_installation "systemd" "$service_path" "create" "none"
# 重新加载systemd配置
if is_root_user; then
safe_systemctl daemon-reload
safe_systemctl enable cf-vps-monitor.service
else
safe_systemctl --user daemon-reload
safe_systemctl --user enable cf-vps-monitor.service
fi
print_message "$GREEN" "✓ systemd服务创建完成: $service_path"
return 0
}
# ==================== systemd lingering支持 ====================
# 简化的lingering启用
enable_lingering() {
# root用户不需要lingering
if is_root_user; then
return 0
fi
# 检查loginctl是否可用
if ! command_exists loginctl; then
return 1
fi
# 尝试启用lingering(静默处理)
loginctl enable-linger "$USER" 2>/dev/null || true
return 0
}
# 启动监控服务
start_service() {
local user_desc=$(get_user_type_description)
print_message "$BLUE" "启动监控服务 ($user_desc)..."
# 1. 检查是否已有进程在运行
if is_monitor_running; then
local pids=$(find_monitor_processes)
local first_pid=$(echo "$pids" | awk '{print $1}')
print_message "$YELLOW" "监控服务已在运行 (PID: $first_pid)"
return 0
fi
# 2. 清理旧的PID文件
rm -f "$PID_FILE" 2>/dev/null || true
# 3. 尝试使用systemd启动(简化版)
local service_path
if is_root_user; then
service_path="/etc/systemd/system/cf-vps-monitor.service"
else
service_path="$HOME/.config/systemd/user/cf-vps-monitor.service"
fi
if [[ -f "$service_path" ]] && is_systemd_available; then
local systemd_cmd="systemctl"
[[ ! $(is_root_user) ]] && systemd_cmd="systemctl --user"
if $systemd_cmd start cf-vps-monitor.service 2>/dev/null; then
$systemd_cmd enable cf-vps-monitor.service 2>/dev/null || true
print_message "$GREEN" "✓ 监控服务已启动 (systemd)"
# 自动配置其他自启动设置
echo
add_autostart_settings
echo
print_message "$CYAN" "提示: 已配置自启动设置,重启后监控服务会自动启动"
return 0
fi
fi
# 4. 传统方式启动
print_message "$BLUE" "使用传统方式启动服务..."
if [[ ! -f "$SERVICE_FILE" ]]; then
print_message "$RED" "✗ 服务脚本不存在: $SERVICE_FILE"
print_message "$CYAN" "请先运行安装命令"
return 1
fi
chmod +x "$SERVICE_FILE" 2>/dev/null || true
if command_exists nohup; then
nohup "$SERVICE_FILE" >> "$LOG_FILE" 2>&1 &
else
"$SERVICE_FILE" >> "$LOG_FILE" 2>&1 &
fi
local pid=$!
echo "$pid" > "$PID_FILE"
# 5. 验证启动成功
sleep 2
if kill -0 "$pid" 2>/dev/null; then
print_message "$GREEN" "✓ 监控服务已启动 (PID: $pid)"
print_message "$CYAN" "日志文件: $LOG_FILE"
# 自动配置自启动设置
echo
add_autostart_settings
echo
print_message "$CYAN" "提示: 已配置自启动设置,重启后监控服务会自动启动"
return 0
else
print_message "$RED" "✗ 监控服务启动失败"
if [[ -f "$LOG_FILE" ]]; then
print_message "$YELLOW" "查看日志: tail -f $LOG_FILE"
fi
rm -f "$PID_FILE"
return 1
fi
}
# 渐进式停止单个进程(改进版)
stop_single_process() {
local pid="$1"
# 首先验证PID
if ! validate_pid "$pid"; then
print_message "$YELLOW" " ⚠ PID $pid 无效或进程不存在"
return 1
fi
# 获取正确的进程信息
local cmd=$(get_process_command "$pid")
print_message "$BLUE" "停止进程: $cmd (PID: $pid)"
# 1. 温和停止(SIGTERM)
if kill "$pid" 2>/dev/null; then
sleep 2
# 2. 检查是否还在运行
if ! kill -0 "$pid" 2>/dev/null; then
print_message "$GREEN" " ✓ 进程已正常停止"
return 0
fi
# 3. 强制停止(SIGKILL)
if kill -9 "$pid" 2>/dev/null; then
sleep 1
# 4. 最终确认
if ! kill -0 "$pid" 2>/dev/null; then
print_message "$GREEN" " ✓ 进程已强制停止"
return 0
else
print_message "$RED" " ✗ 进程无法停止"
return 1
fi
fi
fi
print_message "$YELLOW" " ⚠ 无法发送信号"
return 1
}
# 停止监控服务
stop_service() {
local user_desc=$(get_user_type_description)
print_message "$BLUE" "停止监控服务 ($user_desc)..."
local stopped=false
# 1. 尝试使用systemd停止(简化版)
local service_path
if is_root_user; then
service_path="/etc/systemd/system/cf-vps-monitor.service"
else
service_path="$HOME/.config/systemd/user/cf-vps-monitor.service"
fi
if [[ -f "$service_path" ]] && is_systemd_available; then
local systemd_cmd="systemctl"
[[ ! $(is_root_user) ]] && systemd_cmd="systemctl --user"
if $systemd_cmd is-active cf-vps-monitor.service >/dev/null 2>&1; then
$systemd_cmd stop cf-vps-monitor.service 2>/dev/null
$systemd_cmd disable cf-vps-monitor.service 2>/dev/null || true
stopped=true
fi
fi
# 2. 查找并停止所有相关进程(使用精确检测)
local pids=$(find_monitor_processes)
if [[ -n "$pids" ]]; then
local stopped_count=0
local total_count=0
for pid in $pids; do
total_count=$((total_count + 1))
if stop_single_process "$pid"; then
stopped_count=$((stopped_count + 1))
stopped=true
fi
done
if [[ $stopped_count -gt 0 ]]; then
print_message "$GREEN" "✓ 已停止 $stopped_count/$total_count 个监控进程"
fi
fi
# 3. 清理PID文件
rm -f "$PID_FILE" 2>/dev/null || true
# 4. 结果报告和自启动清理
if [[ "$stopped" == "true" ]]; then
print_message "$GREEN" "✓ 监控服务已停止"
# 自动移除自启动设置
echo
remove_autostart_settings
echo
print_message "$CYAN" "提示: 已移除自启动设置,重启后监控服务不会自动启动"
print_message "$CYAN" "如需重新启用监控,请使用启动功能"
else
print_message "$YELLOW" "没有发现运行中的监控服务"
fi
}
# 检查服务状态
check_service_status() {
local user_desc=$(get_user_type_description)
print_message "$BLUE" "检查监控服务状态 ($user_desc)..."
echo
# 1. 使用精确检测逻辑
if is_monitor_running; then
local pids=$(find_monitor_processes)
local pid_count=$(echo "$pids" | wc -w)
print_message "$GREEN" "✓ 监控服务正在运行"
if [[ $pid_count -eq 1 ]]; then
local pid=$(echo "$pids" | awk '{print $1}')
local cmd=$(get_process_command "$pid")
print_message "$CYAN" " 进程信息: PID $pid"
print_message "$CYAN" " 命令行: $cmd"
else
print_message "$YELLOW" " 发现多个进程实例 ($pid_count 个):"
for pid in $pids; do
local cmd=$(get_process_command "$pid")
print_message "$CYAN" " PID $pid: $cmd"
done
fi
else
print_message "$RED" "✗ 监控服务未运行"
fi
# 2. 检查systemd状态(如果可用)
local service_path
if is_root_user; then
service_path="/etc/systemd/system/cf-vps-monitor.service"
else
service_path="$HOME/.config/systemd/user/cf-vps-monitor.service"
fi
if [[ -f "$service_path" ]] && is_systemd_available; then
local systemd_cmd="systemctl"
[[ ! $(is_root_user) ]] && systemd_cmd="systemctl --user"
echo
print_message "$BLUE" "systemd服务状态:"
if $systemd_cmd is-active cf-vps-monitor.service >/dev/null 2>&1; then
print_message "$GREEN" " ✓ systemd服务活跃"
$systemd_cmd status cf-vps-monitor.service --no-pager -l 2>/dev/null || true
else
print_message "$YELLOW" " ✗ systemd服务未活跃"
fi
fi
# 3. 检查PID文件状态
echo
print_message "$BLUE" "PID文件状态:"
if [[ -f "$PID_FILE" ]]; then
local file_pid=$(cat "$PID_FILE" 2>/dev/null)
if [[ -n "$file_pid" ]]; then
if kill -0 "$file_pid" 2>/dev/null; then
print_message "$GREEN" " ✓ PID文件有效 (PID: $file_pid)"
else
print_message "$YELLOW" " ⚠ PID文件存在但进程不存在 (清理中...)"
rm -f "$PID_FILE"
fi
else
print_message "$YELLOW" " ⚠ PID文件为空"
fi
else
print_message "$YELLOW" " ✗ PID文件不存在"
fi
# 4. 显示配置信息
echo
print_message "$BLUE" "配置信息:"
if [[ -f "$CONFIG_FILE" ]]; then
load_config
print_message "$CYAN" " Worker URL: $WORKER_URL"
print_message "$CYAN" " Server ID: $SERVER_ID"
print_message "$CYAN" " API Key: ${API_KEY:0:8}..."
print_message "$CYAN" " 上报间隔: ${INTERVAL}秒"
else
print_message "$YELLOW" " ✗ 配置文件不存在"
fi
# 5. 显示日志文件信息
echo
print_message "$BLUE" "日志文件:"
if [[ -f "$LOG_FILE" ]]; then
local log_size=$(du -h "$LOG_FILE" 2>/dev/null | cut -f1)
local log_lines=$(wc -l < "$LOG_FILE" 2>/dev/null || echo "0")
print_message "$CYAN" " 文件: $LOG_FILE"
print_message "$CYAN" " 大小: $log_size"
print_message "$CYAN" " 行数: $log_lines"
else
print_message "$YELLOW" " ✗ 日志文件不存在"
fi
# 显示自启动状态
echo
print_message "$CYAN" "自启动配置状态:"
local active_count=0
# 检查systemd服务状态
local service_path
if is_root_user; then
service_path="/etc/systemd/system/cf-vps-monitor.service"
print_message "$CYAN" " systemd服务 (系统管理员):"
if [[ -f "$service_path" ]] && command_exists systemctl; then
if systemctl is-enabled cf-vps-monitor.service >/dev/null 2>&1; then
print_message "$GREEN" " ✓ 服务已启用"
active_count=$((active_count + 1))
print_message "$GREEN" " ✓ 系统级服务 (重启后自动运行)"
else
print_message "$YELLOW" " ✗ 服务未启用"
fi
else
print_message "$YELLOW" " ✗ systemd服务文件不存在"
fi
else
service_path="$HOME/.config/systemd/user/cf-vps-monitor.service"
print_message "$CYAN" " systemd服务 (普通用户):"
if [[ -f "$service_path" ]] && command_exists systemctl; then
if systemctl --user is-enabled cf-vps-monitor.service >/dev/null 2>&1; then
print_message "$GREEN" " ✓ 服务已启用"
active_count=$((active_count + 1))
else
print_message "$YELLOW" " ✗ 服务未启用"
fi
else
print_message "$YELLOW" " ✗ systemd服务文件不存在"
fi
fi
# 检查crontab状态
print_message "$CYAN" " crontab自启动:"
if check_crontab_autostart; then
print_message "$GREEN" " ✓ 已配置 (重启后自动运行)"
active_count=$((active_count + 1))
else
print_message "$YELLOW" " ✗ 未配置"
fi
# 检查profile状态
print_message "$CYAN" " shell profile自启动:"
if [[ -f "$HOME/.bashrc" ]] && grep -q "cf-vps-monitor auto-start" "$HOME/.bashrc" 2>/dev/null; then
print_message "$GREEN" " ✓ 已配置 (登录时自动运行)"
active_count=$((active_count + 1))
else
print_message "$YELLOW" " ✗ 未配置"
fi
# 自启动状态总结
echo
print_message "$BLUE" "自启动保障总结:"
echo " 活跃方案数: $active_count / 3"
if [[ $active_count -eq 0 ]]; then
print_message "$RED" " 状态: 无自启动保障"
print_message "$YELLOW" " 建议: 重新安装服务以配置自启动"
elif [[ $active_count -eq 1 ]]; then
print_message "$YELLOW" " 状态: 基本保障"
print_message "$CYAN" " 建议: 重新安装服务以配置完整保障"
elif [[ $active_count -eq 2 ]]; then
print_message "$GREEN" " 状态: 良好保障"
else
print_message "$GREEN" " 状态: 完全保障 (推荐)"
fi
# 如果检测有问题,提供诊断选项
if ! is_monitor_running && [[ $active_count -gt 0 ]]; then
echo
print_message "$YELLOW" "提示: 配置了自启动但服务未运行,输入 'd' 查看详细诊断"
echo -n "是否查看诊断信息? (d/N): "
read -r -t 10 diag_choice
if [[ "$diag_choice" =~ ^[Dd]$ ]]; then
echo
diagnose_monitor_service
fi
fi
}
# ==================== crontab自启动方案 ====================
# 设置crontab自启动
setup_crontab_autostart() {
print_message "$BLUE" "配置crontab自启动..."
# 检查crontab可用性
if ! command_exists crontab; then
return 1
fi
# 获取当前crontab(减少fork操作)
local current_crontab=$(crontab -l 2>/dev/null || echo "")
# 检查是否已配置
if echo "$current_crontab" | grep -q "cf-vps-monitor"; then
print_message "$GREEN" "✓ crontab自启动已存在"
return 0
fi
# 备份当前crontab
local backup_file="$SCRIPT_DIR/system/backups/crontab_backup"
echo "$current_crontab" > "$backup_file"
# 优先级启动条目(简化进程检测)
local crontab_entry="@reboot sleep 30 && pgrep -f 'cf-vps-monitor|vps-monitor-service' >/dev/null || $SERVICE_FILE"
# 添加新条目(减少临时文件操作)
if (echo "$current_crontab"; echo "$crontab_entry") | crontab - 2>/dev/null; then
# 记录到安装清单
record_installation "crontab" "$USER" "add" "$backup_file"
print_message "$GREEN" "✓ crontab自启动已配置"
return 0
else
return 1
fi
}
# 检查crontab自启动状态
check_crontab_autostart() {
if ! command_exists crontab; then
return 1
fi
if crontab -l 2>/dev/null | grep -q "$SERVICE_FILE"; then
return 0
else
return 1
fi
}
# ==================== shell profile自启动方案 ====================
# 设置shell profile自启动
setup_profile_autostart() {
print_message "$BLUE" "配置shell profile自启动..."
local profile="$HOME/.bashrc"
# 检查是否已配置
if grep -q "cf-vps-monitor auto-start" "$profile" 2>/dev/null; then
return 0
fi
# 备份原文件
local backup_file="$SCRIPT_DIR/system/backups/bashrc_backup"
cp "$profile" "$backup_file" 2>/dev/null || touch "$backup_file"
# 添加自启动代码
cat >> "$profile" << EOF
# === cf-vps-monitor auto-start BEGIN ===
# VPS监控服务自启动检测 (最后保障)
if [ -n "\$PS1" ] && [ "\$TERM" != "dumb" ]; then
if ! pgrep -f 'cf-vps-monitor|vps-monitor-service' >/dev/null 2>&1; then
(sleep 5 && nohup "$SERVICE_FILE" >/dev/null 2>&1 &) &
fi
fi
# === cf-vps-monitor auto-start END ===
EOF
# 记录到安装清单
record_installation "profile" "$profile" "modify" "$backup_file"
print_message "$GREEN" "✓ shell profile自启动已配置"
return 0
}
# ==================== 多重自启动方案协调器 ====================
# 配置优先级自启动
setup_auto_start() {
print_message "$BLUE" "配置优先级自启动机制..."
echo
# 检查服务脚本
if [[ ! -f "$SERVICE_FILE" ]]; then
print_message "$RED" "✗ 服务脚本不存在,请先运行安装"
return 1
fi
local success_count=0
local total_attempts=3
# 优先级1: systemd服务
if is_user_systemd_available; then
print_message "$CYAN" "优先级1: systemd服务"
if create_systemd_service; then
success_count=$((success_count + 1))
print_message "$GREEN" " ✓ systemd服务已配置"
if ! is_root_user; then
enable_lingering >/dev/null 2>&1
fi
else
print_message "$YELLOW" " ✗ systemd服务配置失败"
fi
else
print_message "$YELLOW" " - systemd不可用,跳过"
total_attempts=$((total_attempts - 1))
fi
# 优先级2: crontab备用
print_message "$CYAN" "优先级2: crontab备用"
if setup_crontab_autostart; then
success_count=$((success_count + 1))
print_message "$GREEN" " ✓ crontab备用已配置"
else
print_message "$YELLOW" " ✗ crontab备用配置失败"
fi
# 优先级3: shell profile保障
print_message "$CYAN" "优先级3: shell profile保障"
if setup_profile_autostart; then
success_count=$((success_count + 1))
print_message "$GREEN" " ✓ profile保障已配置"
else
print_message "$YELLOW" " ✗ profile保障配置失败"
fi
echo
if [[ $success_count -eq 0 ]]; then
print_message "$RED" "✗ 所有自启动方案配置失败"
return 1
else
print_message "$GREEN" "✓ 配置了 $success_count/$total_attempts 种自启动方案"
if [[ $success_count -eq $total_attempts ]]; then
print_message "$GREEN" "完全保障"
elif [[ $success_count -ge 2 ]]; then
print_message "$GREEN" "良好保障"
else
print_message "$YELLOW" "基本保障"
fi
fi
print_message "$CYAN" "优先级: systemd > crontab > profile"
return 0
}
# 查看日志
view_logs() {
if [[ ! -f "$LOG_FILE" ]]; then
print_message "$YELLOW" "日志文件不存在: $LOG_FILE"
return
fi
print_message "$BLUE" "显示最近50行日志:"
echo "----------------------------------------"
tail -n 50 "$LOG_FILE"
echo "----------------------------------------"
print_message "$CYAN" "日志文件位置: $LOG_FILE"
}
# 测试连接
test_connection() {
print_message "$BLUE" "测试连接到监控服务器..."
load_config
if [[ -z "$WORKER_URL" || -z "$SERVER_ID" || -z "$API_KEY" ]]; then
print_message "$RED" "配置不完整,请先配置监控参数"
return 1
fi
print_message "$BLUE" "正在测试配置获取..."
if get_config; then
print_message "$GREEN" "✓ 配置获取测试成功"
else
print_message "$YELLOW" "⚠ 配置获取测试失败,但不影响基本功能"
fi
print_message "$BLUE" "正在测试数据上报..."
if report_metrics; then
print_message "$GREEN" "✓ 数据上报测试成功"
else
print_message "$RED" "✗ 数据上报测试失败,请检查配置和网络"
return 1
fi
print_message "$GREEN" "✓ 连接测试完成"
}
# 配置监控参数
configure_monitor() {
print_message "$BLUE" "配置监控参数"
echo
load_config
# Server ID
echo -n "请输入Server ID"
if [[ -n "$SERVER_ID" ]]; then
echo -n " (当前: $SERVER_ID)"
fi
echo -n ": "
read -r input_server_id
if [[ -n "$input_server_id" ]]; then
SERVER_ID="$input_server_id"
fi
# API Key
echo -n "请输入API Key"
if [[ -n "$API_KEY" ]]; then
echo -n " (当前: ${API_KEY:0:8}...)"
fi
echo -n ": "
read -r input_api_key
if [[ -n "$input_api_key" ]]; then
API_KEY="$input_api_key"
fi
# Worker URL
echo -n "请输入Worker URL"
if [[ -n "$WORKER_URL" ]]; then
echo -n " (当前: $WORKER_URL)"
fi
echo -n ": "
read -r input_url
if [[ -n "$input_url" ]]; then
WORKER_URL="$input_url"
fi
# 设置默认上报间隔为10秒,脚本会自动从服务器获取最新配置
if [[ -z "$INTERVAL" ]]; then
INTERVAL="10"
fi
print_message "$CYAN" "上报间隔设置为: ${INTERVAL}秒 (脚本运行后会自动从服务器获取最新配置)"
# 验证配置
if [[ -z "$WORKER_URL" || -z "$SERVER_ID" || -z "$API_KEY" ]]; then
print_message "$RED" "配置不完整,请确保所有必需参数都已填写"
return 1
fi
# 保存配置
save_config
print_message "$GREEN" "配置保存成功"
# 询问是否测试连接
echo
echo -n "是否测试连接? (y/N): "
read -r test_choice
if [[ "$test_choice" =~ ^[Yy]$ ]]; then
test_connection
fi
}
# 安装监控服务
install_monitor() {
print_message "$BLUE" "开始安装VPS监控服务..."
echo
# 检查系统资源(防止fork错误)
if ! check_system_resources; then
print_message "$YELLOW" "系统资源紧张,启用简化模式"
fi
# 检测系统
detect_system
detect_package_manager
# 安装依赖
install_dependencies
# 创建目录结构
create_directories
# 配置监控参数
if ! configure_monitor; then
error_exit "配置失败,安装中止"
fi
# 创建服务脚本
create_service_script
# 创建systemd服务(如果可用)
local systemd_available=false
if create_systemd_service; then
systemd_available=true
fi
# 配置多重自启动保障
echo
print_message "$BLUE" "配置自启动机制..."
if setup_auto_start; then
print_message "$GREEN" "✓ 自启动机制配置完成"
else
print_message "$YELLOW" "⚠ 自启动配置部分失败,但不影响基本功能"
fi
# 启动服务
echo
if start_service; then
print_message "$GREEN" "✓ VPS监控服务安装并启动成功"
echo
print_message "$CYAN" "安装信息:"
echo " 安装目录: $SCRIPT_DIR"
echo " 配置文件: $CONFIG_FILE"
echo " 日志文件: $LOG_FILE"
echo " 服务脚本: $SERVICE_FILE"
if [[ "$systemd_available" == "true" ]]; then
local service_path
if is_root_user; then
service_path="/etc/systemd/system/cf-vps-monitor.service"
else
service_path="$HOME/.config/systemd/user/cf-vps-monitor.service"
fi
echo " systemd服务: $service_path"
print_message "$GREEN" " 启动方式: systemd服务"
else
print_message "$GREEN" " 启动方式: 传统后台进程"
fi
echo
print_message "$GREEN" "✓ 已配置多重自启动保障,VPS重启后将自动运行"
echo
print_message "$YELLOW" "提示: 使用 '$0 status' 检查服务状态和自启动状态"
print_message "$YELLOW" "提示: 使用 '$0 logs' 查看运行日志"
else
error_exit "服务启动失败"
fi
}
# 集中式彻底卸载监控服务
uninstall_monitor() {
print_message "$YELLOW" "警告: 这将删除VPS监控服务及其数据"
echo -n "确认卸载? (y/N): "
read -r confirm
if [[ ! "$confirm" =~ ^[Yy]$ ]]; then
print_message "$BLUE" "取消卸载"
return 0
fi
print_message "$BLUE" "开始集中式卸载VPS监控服务..."
# 1. 停止服务
stop_service
# 2. 保护脚本本身
local script_path=$(realpath "$0")
local need_backup=false
if [[ "$script_path" == "$SCRIPT_DIR"/* ]]; then
need_backup=true
local backup_script="/tmp/cf-vps-monitor-backup.sh"
cp "$script_path" "$backup_script"
chmod +x "$backup_script"
print_message "$CYAN" "已备份脚本到: $backup_script"
fi
# 3. 清理系统集成文件(兼容所有系统)
if [[ -f "$INSTALL_MANIFEST" ]]; then
print_message "$CYAN" "清理系统集成文件..."
while IFS=':' read -r type path action backup; do
case "$type" in
"systemd")
print_message "$CYAN" " 移除systemd服务: $path"
rm -f "$path" 2>/dev/null || true
# 只在有systemctl的系统上重载
if command_exists systemctl; then
if is_root_user; then
systemctl daemon-reload 2>/dev/null || true
else
systemctl --user daemon-reload 2>/dev/null || true
fi
fi
;;
"crontab")
print_message "$CYAN" " 清理crontab条目"
(crontab -l 2>/dev/null || echo "") | grep -v "cf-vps-monitor" | crontab - 2>/dev/null || true
;;
"profile")
print_message "$CYAN" " 清理profile修改: $path"
# 兼容FreeBSD的sed语法
if [[ "$OS" == "FreeBSD" ]] || [[ "$OS" == "Darwin" ]]; then
sed -i '' '/# === cf-vps-monitor auto-start BEGIN ===/,/# === cf-vps-monitor auto-start END ===/d' "$path" 2>/dev/null || true
else
sed -i '/# === cf-vps-monitor auto-start BEGIN ===/,/# === cf-vps-monitor auto-start END ===/d' "$path" 2>/dev/null || true
fi
;;
esac
done < "$INSTALL_MANIFEST" 2>/dev/null || true
fi
# 4. 强制删除安装目录(多重保障)
print_message "$BLUE" "删除安装目录: $SCRIPT_DIR"
# 确保不在目标目录内执行删除
cd / 2>/dev/null || cd "$HOME" 2>/dev/null || true
# 尝试删除,如果失败提供详细信息
if ! rm -rf "$SCRIPT_DIR" 2>/dev/null; then
print_message "$YELLOW" "标准删除失败,尝试强制删除..."
# 尝试逐个删除文件
if [[ -d "$SCRIPT_DIR" ]]; then
find "$SCRIPT_DIR" -type f -exec rm -f {} \; 2>/dev/null || true
find "$SCRIPT_DIR" -type d -exec rmdir {} \; 2>/dev/null || true
# 最后尝试删除主目录
rmdir "$SCRIPT_DIR" 2>/dev/null || rm -rf "$SCRIPT_DIR" 2>/dev/null || true
fi
fi
# 5. 验证删除结果并提供反馈
if [[ -d "$SCRIPT_DIR" ]]; then
print_message "$YELLOW" "⚠ 安装目录仍然存在: $SCRIPT_DIR"
print_message "$CYAN" "可能原因: 文件被占用或权限不足"
print_message "$CYAN" "手动删除: rm -rf '$SCRIPT_DIR'"
# 显示目录内容帮助诊断
if [[ -r "$SCRIPT_DIR" ]]; then
print_message "$CYAN" "目录内容:"
ls -la "$SCRIPT_DIR" 2>/dev/null || true
fi
else
print_message "$GREEN" "✓ VPS监控服务已彻底卸载"
if [[ "$need_backup" == "true" ]]; then
print_message "$YELLOW" "注意: 当前脚本已被删除,但备份在: $backup_script"
else
print_message "$CYAN" "当前脚本未被删除,可以重新安装"
fi
fi
}
# 显示帮助信息
show_help() {
echo "VPS监控脚本 v2.0"
echo
echo "用法: $0 [选项] [参数]"
echo
echo "基本选项:"
echo " install 安装监控服务"
echo " uninstall 彻底卸载监控服务"
echo " start 启动监控服务"
echo " stop 停止监控服务"
echo " restart 重启监控服务"
echo " status 查看服务状态"
echo " logs 查看运行日志"
echo " config 配置监控参数"
echo " test 测试连接"
echo " menu 显示交互菜单"
echo " help 显示此帮助信息"
echo
echo "一键安装参数:"
echo " -i, --install 一键安装模式"
echo " -s, --server-id ID 服务器ID"
echo " -k, --api-key KEY API密钥"
echo " -u, --worker-url URL Worker地址"
echo
echo "示例:"
echo " $0 install # 交互式安装"
echo " $0 status # 查看服务状态"
echo " $0 logs # 查看日志"
echo
echo "一键安装示例:"
echo " $0 -i -s server123 -k abc123 -u https://worker.example.com"
echo
echo "注意: 上报间隔会自动从服务器获取,无需手动设置"
}
# 显示交互菜单
show_menu() {
while true; do
clear
print_message "$CYAN" "=================================="
print_message "$CYAN" " VPS监控服务管理菜单"
print_message "$CYAN" "=================================="
echo
echo "1. 安装监控服务"
echo "2. 启动监控服务"
echo
echo "3. 停止监控服务"
echo "4. 重启监控服务"
echo
echo "5. 查看服务状态"
echo "6. 查看运行日志"
echo
echo "7. 配置监控参数"
echo "8. 测试连接"
echo
print_message "$CYAN" "特殊操作:"
echo "9. 彻底卸载服务"
echo "0. 退出"
echo
print_message "$YELLOW" "请选择操作 (0-9): "
read -r choice
case $choice in
1)
echo
install_monitor
echo
print_message "$BLUE" "按任意键继续..."
read -r
;;
2)
echo
start_service
echo
print_message "$BLUE" "按任意键继续..."
read -r
;;
3)
echo
stop_service
echo
print_message "$BLUE" "按任意键继续..."
read -r
;;
4)
echo
stop_service
sleep 1
start_service
echo
print_message "$BLUE" "按任意键继续..."
read -r
;;
5)
echo
check_service_status
echo
print_message "$BLUE" "按任意键继续..."
read -r
;;
6)
echo
view_logs
echo
print_message "$BLUE" "按任意键继续..."
read -r
;;
7)
echo
configure_monitor
echo
print_message "$BLUE" "按任意键继续..."
read -r
;;
8)
echo
test_connection
echo
print_message "$BLUE" "按任意键继续..."
read -r
;;
9)
echo
uninstall_monitor
echo
print_message "$BLUE" "按任意键继续..."
read -r
;;
0)
print_message "$GREEN" "感谢使用VPS监控服务!"
exit 0
;;
*)
print_message "$RED" "无效选择,请重新输入"
sleep 1
;;
esac
done
}
# 解析命令行参数
parse_arguments() {
local install_mode=false
local server_id=""
local api_key=""
local worker_url=""
while [[ $# -gt 0 ]]; do
case $1 in
-i|--install)
install_mode=true
shift
;;
-s|--server-id)
server_id="$2"
shift 2
;;
-k|--api-key)
api_key="$2"
shift 2
;;
-u|--worker-url)
worker_url="$2"
shift 2
;;
-h|--help)
show_help
exit 0
;;
*)
# 如果是基本命令,返回处理
return 1
;;
esac
done
# 如果是一键安装模式
if [[ "$install_mode" == "true" ]]; then
one_click_install "$server_id" "$api_key" "$worker_url"
exit $?
fi
return 1
}
# 一键安装函数
one_click_install() {
local server_id="$1"
local api_key="$2"
local worker_url="$3"
print_message "$BLUE" "开始一键安装VPS监控服务..."
echo
# 验证必需参数
if [[ -z "$server_id" || -z "$api_key" || -z "$worker_url" ]]; then
print_message "$RED" "错误: 缺少必需参数"
echo "必需参数: -s <服务器ID> -k -u "
echo "使用 '$0 --help' 查看详细帮助"
return 1
fi
# 设置默认间隔为10秒(会自动从服务器获取最新配置)
local interval="10"
print_message "$CYAN" "安装参数:"
echo " 服务器ID: $server_id"
echo " API密钥: ${api_key:0:8}..."
echo " Worker地址: $worker_url"
echo " 初始上报间隔: ${interval}秒 (运行后会自动从服务器获取最新配置)"
echo
# 检测系统
detect_system
detect_package_manager
# 安装依赖
install_dependencies
# 创建目录结构
create_directories
# 设置配置参数
WORKER_URL="$worker_url"
SERVER_ID="$server_id"
API_KEY="$api_key"
INTERVAL="$interval"
# 保存配置
save_config
print_message "$GREEN" "配置保存成功"
# 测试连接
print_message "$BLUE" "测试连接..."
if ! report_metrics; then
print_message "$YELLOW" "警告: 连接测试失败,但将继续安装"
print_message "$YELLOW" "请检查网络连接和配置参数"
else
print_message "$GREEN" "✓ 连接测试成功"
fi
# 创建服务脚本
create_service_script
# 创建systemd服务(如果可用)
local systemd_available=false
if create_systemd_service; then
systemd_available=true
fi
# 配置多重自启动保障
echo
print_message "$BLUE" "配置自启动机制..."
if setup_auto_start; then
print_message "$GREEN" "✓ 自启动机制配置完成"
else
print_message "$YELLOW" "⚠ 自启动配置部分失败,但不影响基本功能"
fi
# 启动服务
echo
if start_service; then
print_message "$GREEN" "✓ VPS监控服务一键安装成功"
echo
print_message "$CYAN" "安装信息:"
echo " 安装目录: $SCRIPT_DIR"
echo " 配置文件: $CONFIG_FILE"
echo " 日志文件: $LOG_FILE"
echo " 服务脚本: $SERVICE_FILE"
if [[ "$systemd_available" == "true" ]]; then
local service_path
if is_root_user; then
service_path="/etc/systemd/system/cf-vps-monitor.service"
else
service_path="$HOME/.config/systemd/user/cf-vps-monitor.service"
fi
echo " systemd服务: $service_path"
print_message "$GREEN" " 启动方式: systemd服务"
else
print_message "$GREEN" " 启动方式: 传统后台进程"
fi
echo
print_message "$GREEN" "✓ 已配置多重自启动保障,VPS重启后将自动运行"
echo
print_message "$YELLOW" "提示: 使用 '$0 status' 检查服务状态和自启动状态"
print_message "$YELLOW" "提示: 使用 '$0 logs' 查看运行日志"
return 0
else
print_message "$RED" "✗ 服务启动失败"
return 1
fi
}
# 主函数
main() {
# 首先尝试解析命令行参数
if parse_arguments "$@"; then
return
fi
# 如果没有参数,显示菜单
if [[ $# -eq 0 ]]; then
show_menu
return
fi
# 处理命令行参数
case "$1" in
install)
install_monitor
;;
uninstall)
uninstall_monitor
;;
start)
start_service
;;
stop)
stop_service
;;
restart)
stop_service
sleep 1
start_service
;;
status)
check_service_status
;;
logs)
view_logs
;;
config)
configure_monitor
;;
test)
test_connection
;;
menu)
show_menu
;;
help|--help|-h)
show_help
;;
*)
print_message "$RED" "未知选项: $1"
echo
show_help
exit 1
;;
esac
}
# 函数加载模式支持(用于服务脚本)
if [[ "${FUNCTIONS_ONLY:-false}" == "true" ]]; then
# 只加载函数,不执行主程序
return 0 2>/dev/null || exit 0
fi
# 脚本入口点
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
main "$@"
fi