注册
`nc`(Netcat)命令向指定端口发送数据包测试网络是否存在波动。
技术分享/ 文章详情 /

`nc`(Netcat)命令向指定端口发送数据包测试网络是否存在波动。

wuran 2025/11/14 47 0 0

sh dameng_monitor.sh

#!/bin/bash

# dameng_monitor.sh - 达梦数据库监控脚本(修复Ctrl+C问题)

# 默认配置参数
DB_HOST="10.56.0.98"
DB_PORT="5236"
CHECK_INTERVAL="2"
LOG_FILE="/var/log/dameng_monitor.log"
PID_FILE="/var/run/dameng_monitor.pid"

# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'

# 全局变量
declare -i check_count=0
declare -i consecutive_failures=0
declare -i total_failures=0
declare -i total_success=0
start_time=$(date +%s)
DAEMON_MODE=false

# 显示使用说明
show_usage() {
    echo "达梦数据库监控脚本"
    echo ""
    echo "用法: $0 [选项] [命令]"
    echo ""
    echo "命令:"
    echo "  start     启动后台监控(守护进程)"
    echo "  stop      停止监控"
    echo "  status    查看状态"
    echo "  restart   重启监控"
    echo "  log       实时查看日志"
    echo "  (无命令)  前台运行模式(Ctrl+C可停止)"
    echo ""
    echo "选项:"
    echo "  -h, --host HOST        数据库主机地址 (默认: $DB_HOST)"
    echo "  -p, --port PORT        数据库端口 (默认: $DB_PORT)"
    echo "  -i, --interval SECONDS 检查间隔秒数 (默认: $CHECK_INTERVAL)"
    echo "  -l, --log FILE         日志文件路径 (默认: $LOG_FILE)"
    echo "  --pid FILE             PID文件路径 (默认: $PID_FILE)"
    echo "  --help                 显示此帮助信息"
    echo ""
    echo "示例:"
    echo "  $0 start                                   # 后台启动守护进程"
    echo "  $0 -h 192.168.1.100 -p 5236 start          # 指定IP和端口后台启动"
    echo "  $0 -h 10.1.1.1 -p 32141                   # 前台运行(Ctrl+C可停止)"
    echo "  $0 stop                                   # 停止后台监控"
    exit 0
}

# 解析命令行参数
parse_arguments() {
    while [[ $# -gt 0 ]]; do
        case $1 in
            -h|--host)
                DB_HOST="$2"
                shift 2
                ;;
            -p|--port)
                DB_PORT="$2"
                shift 2
                ;;
            -i|--interval)
                CHECK_INTERVAL="$2"
                shift 2
                ;;
            -l|--log)
                LOG_FILE="$2"
                shift 2
                ;;
            --pid)
                PID_FILE="$2"
                shift 2
                ;;
            --help)
                show_usage
                ;;
            start|stop|status|restart|log)
                COMMAND="$1"
                shift
                ;;
            *)
                echo "错误: 未知参数 $1"
                show_usage
                ;;
        esac
    done
}

# 验证参数
validate_parameters() {
    # 检查IP地址格式
    if ! [[ $DB_HOST =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
        echo "错误: 无效的IP地址格式: $DB_HOST"
        exit 1
    fi
    
    # 检查端口范围
    if ! [[ $DB_PORT =~ ^[0-9]+$ ]] || [ $DB_PORT -lt 1 ] || [ $DB_PORT -gt 65535 ]; then
        echo "错误: 无效的端口号: $DB_PORT"
        exit 1
    fi
    
    # 检查间隔时间
    if ! [[ $CHECK_INTERVAL =~ ^[0-9]+$ ]] || [ $CHECK_INTERVAL -lt 1 ]; then
        echo "错误: 无效的检查间隔: $CHECK_INTERVAL"
        exit 1
    fi
    
    # 检查日志文件目录
    local log_dir=$(dirname "$LOG_FILE")
    if [ ! -w "$log_dir" ] 2>/dev/null; then
        echo "警告: 日志目录可能不可写: $log_dir"
        echo "尝试创建目录..."
        mkdir -p "$log_dir" 2>/dev/null || {
            echo "错误: 无法创建日志目录"
            exit 1
        }
    fi
}

# 信号处理(仅用于前台模式)
cleanup() {
    local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
    echo "[$timestamp] 收到停止信号,正在关闭监控..." >> "$LOG_FILE"
    if [ "$DAEMON_MODE" = false ]; then
        echo -e "\n${YELLOW}正在停止监控...${NC}"
    fi
    rm -f "$PID_FILE"
    exit 0
}

# 检查PID文件
check_pid() {
    if [ -f "$PID_FILE" ]; then
        local pid=$(cat "$PID_FILE")
        if ps -p "$pid" > /dev/null 2>&1; then
            echo "错误: 监控进程已在运行 (PID: $pid)"
            echo "使用: $0 stop 来停止运行中的进程"
            exit 1
        else
            rm -f "$PID_FILE"
        fi
    fi
}

# 创建PID文件
create_pid_file() {
    echo $$ > "$PID_FILE"
}

# 初始化日志
init_logging() {
    mkdir -p "$(dirname "$LOG_FILE")"
    mkdir -p "$(dirname "$PID_FILE")"
    echo "=== 达梦数据库监控启动于 $(date) ===" >> "$LOG_FILE"
    echo "目标主机: $DB_HOST" >> "$LOG_FILE"
    echo "目标端口: $DB_PORT" >> "$LOG_FILE"
    echo "检查间隔: ${CHECK_INTERVAL}秒" >> "$LOG_FILE"
    echo "日志文件: $LOG_FILE" >> "$LOG_FILE"
    echo "进程PID: $$" >> "$LOG_FILE"
    echo "运行模式: $([ "$DAEMON_MODE" = true ] && echo "后台守护进程" || echo "前台模式")" >> "$LOG_FILE"
}

# 日志函数
log() {
    local level=$1
    local message=$2
    local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
    echo "[$timestamp] [$level] $message" >> "$LOG_FILE"
}

# 1. 基础网络检查
check_basic_network() {
    local success=true
    
    # Ping检查
    if ping -c 1 -W 1 "$DB_HOST" &>/dev/null; then
        if [ "$DAEMON_MODE" = false ]; then
            echo -n "✅ Ping正常 | "
        fi
        log "NETWORK" "Ping检查: 成功 - 主机 $DB_HOST"
    else
        if [ "$DAEMON_MODE" = false ]; then
            echo -n "❌ Ping失败 | "
        fi
        log "NETWORK" "Ping检查: 失败 - 主机 $DB_HOST"
        success=false
    fi
    
    return $([ "$success" = true ] && echo 0 || echo 1)
}

# 2. 端口连通性检查
check_port_connectivity() {
    # 使用netcat检查端口
    if nc -zv -w 1 "$DB_HOST" "$DB_PORT" &>/dev/null; then
        if [ "$DAEMON_MODE" = false ]; then
            echo "✅ 端口$DB_PORT正常"
        fi
        log "PORT" "端口检查: 成功 - $DB_HOST:$DB_PORT"
        return 0
    else
        if [ "$DAEMON_MODE" = false ]; then
            echo "❌ 端口$DB_PORT失败"
        fi
        log "PORT" "端口检查: 失败 - $DB_HOST:$DB_PORT"
        return 1
    fi
}

# 单次检查执行
perform_check() {
    local check_start=$(date +%s)
    check_count=$((check_count + 1))
    
    if [ "$DAEMON_MODE" = false ]; then
        echo -n "[#$check_count] "
    fi
    
    # 执行基础网络检查
    if check_basic_network && check_port_connectivity; then
        consecutive_failures=0
        total_success=$((total_success + 1))
        log "STATUS" "检查通过: 所有项目正常"
    else
        consecutive_failures=$((consecutive_failures + 1))
        total_failures=$((total_failures + 1))
        log "STATUS" "检查失败: 发现异常"
        
        # 连续失败告警
        if [ $consecutive_failures -ge 3 ]; then
            log "ALERT" "连续 $consecutive_failures 次检查失败"
        fi
    fi
    
    local check_duration=$(( $(date +%s) - check_start ))
    if [ $check_duration -gt 1 ]; then
        log "PERF" "检查耗时较长: ${check_duration}秒"
    fi
}

# 显示统计信息(仅前台模式)
show_statistics() {
    if [ "$DAEMON_MODE" = false ]; then
        local current_time=$(date +%s)
        local run_time=$((current_time - start_time))
        local success_rate=0
        
        if [ $check_count -gt 0 ]; then
            success_rate=$((total_success * 100 / check_count))
        fi
        
        echo -e "\n${BLUE}=== 监控统计 ===${NC}"
        echo -e "目标: $DB_HOST:$DB_PORT"
        echo -e "运行: $(date -u -d @$run_time +%T)"
        echo -e "检查: $check_count 次"
        echo -e "成功: $total_success 次"
        echo -e "失败: $total_failures 次"
        echo -e "成功率: ${success_rate}%"
        echo -e "连续失败: $consecutive_failures 次"
        echo -e "日志文件: $LOG_FILE"
    fi
}

# 真正的守护进程启动
start_daemon() {
    echo "启动后台守护进程..."
    echo "目标: $DB_HOST:$DB_PORT"
    echo "间隔: ${CHECK_INTERVAL}秒"
    echo "日志: $LOG_FILE"
    echo "PID文件: $PID_FILE"
    echo ""
    echo "注意: 守护进程将在后台运行,不受Ctrl+C影响"
    echo ""
    echo "管理命令:"
    echo "  $0 stop                   # 停止监控"
    echo "  $0 status                 # 查看状态"
    echo "  $0 log                    # 查看日志"
    echo ""
    
    # 检查是否已经运行
    check_pid
    
    # 使用nohup和setsid启动真正的守护进程
    nohup setsid "$0" --daemon -h "$DB_HOST" -p "$DB_PORT" -i "$CHECK_INTERVAL" -l "$LOG_FILE" > /dev/null 2>&1 &
    
    local daemon_pid=$!
    sleep 2
    
    # 检查守护进程是否启动成功
    if ps -p $daemon_pid > /dev/null 2>&1; then
        echo "✅ 守护进程启动成功 (PID: $daemon_pid)"
        echo "📋 使用 '$0 status' 查看运行状态"
    else
        echo "❌ 守护进程启动失败"
        exit 1
    fi
}

# 守护进程主循环(不受Ctrl+C影响)
daemon_main_loop() {
    # 忽略信号,让守护进程不受终端影响
    trap '' SIGINT SIGTERM SIGHUP
    
    DAEMON_MODE=true
    init_logging
    create_pid_file
    
    log "DAEMON" "守护进程启动 - 目标: $DB_HOST:$DB_PORT, 间隔: ${CHECK_INTERVAL}秒"
    
    # 守护进程主循环
    while true; do
        perform_check
        
        # 定期记录统计
        if [ $((check_count % 50)) -eq 0 ]; then
            local success_rate=0
            if [ $check_count -gt 0 ]; then
                success_rate=$((total_success * 100 / check_count))
            fi
            local run_time=$(( $(date +%s) - start_time ))
            log "STATISTICS" "运行统计 - 次数: $check_count, 成功率: ${success_rate}%, 运行时间: $(date -u -d @$run_time +%T)"
        fi
        
        sleep "$CHECK_INTERVAL"
    done
}

# 前台运行模式(受Ctrl+C影响)
foreground_main_loop() {
    # 设置信号处理(前台模式可以Ctrl+C停止)
    trap cleanup SIGINT SIGTERM
    
    DAEMON_MODE=false
    init_logging
    create_pid_file
    
    echo -e "${GREEN}🚀 启动达梦数据库监控(前台模式)${NC}"
    echo -e "目标: ${YELLOW}$DB_HOST:$DB_PORT${NC}"
    echo -e "间隔: ${YELLOW}${CHECK_INTERVAL}秒${NC}"
    echo -e "日志: ${YELLOW}$LOG_FILE${NC}"
    echo -e "提示: ${RED}Ctrl+C${NC} 可以停止此监控"
    echo -e "${BLUE}────────────────────────────────${NC}"
    
    while true; do
        perform_check
        
        # 定期显示统计
        if [ $((check_count % 20)) -eq 0 ]; then
            show_statistics
            echo ""
        fi
        
        sleep "$CHECK_INTERVAL"
    done
}

# 显示状态
show_status() {
    echo "监控配置:"
    echo "  目标主机: $DB_HOST"
    echo "  目标端口: $DB_PORT"
    echo "  检查间隔: ${CHECK_INTERVAL}秒"
    echo "  日志文件: $LOG_FILE"
    echo "  PID文件: $PID_FILE"
    echo ""
    
    if [ -f "$PID_FILE" ]; then
        local pid=$(cat "$PID_FILE")
        if ps -p "$pid" > /dev/null 2>&1; then
            echo "监控状态: 🟢 运行中(守护进程)"
            echo "进程PID: $pid"
            echo "启动时间: $(ps -p "$pid" -o lstart= 2>/dev/null || echo "未知")"
            echo "运行用户: $(ps -p "$pid" -o user= 2>/dev/null || echo "未知")"
            
            # 显示最近状态
            if [ -f "$LOG_FILE" ]; then
                echo ""
                echo "最近状态:"
                tail -5 "$LOG_FILE" | grep -E "(STATUS|ALERT)" | tail -3
            fi
        else
            echo "监控状态: 🔴 未运行(残留PID文件)"
            rm -f "$PID_FILE"
        fi
    else
        echo "监控状态: 🔴 未运行"
    fi
}

# 停止监控
stop_monitor() {
    echo "监控配置: $DB_HOST:$DB_PORT"
    echo ""
    
    if [ -f "$PID_FILE" ]; then
        local pid=$(cat "$PID_FILE")
        if ps -p "$pid" > /dev/null 2>&1; then
            echo "停止监控守护进程 (PID: $pid)..."
            # 杀死整个进程组
            kill -- -$(ps -o pgid= $pid | grep -o '[0-9]*') 2>/dev/null || kill $pid
            
            # 等待进程结束
            for i in {1..10}; do
                if ! ps -p "$pid" > /dev/null 2>&1; then
                    break
                fi
                echo "等待进程结束... ($i/10)"
                sleep 1
            done
            
            # 强制停止如果还在运行
            if ps -p "$pid" > /dev/null 2>&1; then
                echo "强制停止进程..."
                kill -9 "$pid" 2>/dev/null
            fi
            
            rm -f "$PID_FILE"
            echo "✅ 监控守护进程已停止"
        else
            echo "进程不存在,清理PID文件"
            rm -f "$PID_FILE"
        fi
    else
        echo "监控未运行(未找到PID文件)"
    fi
}

# 主程序
main() {
    # 检查是否是守护进程模式
    if [ "$1" = "--daemon" ]; then
        shift
        parse_arguments "$@"
        validate_parameters
        daemon_main_loop
        exit 0
    fi
    
    # 正常模式
    parse_arguments "$@"
    validate_parameters
    
    case "${COMMAND:-}" in
        start)
            start_daemon
            ;;
        stop)
            stop_monitor
            ;;
        status)
            show_status
            ;;
        restart)
            stop_monitor
            sleep 2
            start_daemon
            ;;
        log)
            if [ -f "$LOG_FILE" ]; then
                echo "实时日志: $LOG_FILE"
                echo "按 Ctrl+C 退出日志查看"
                echo "────────────────────────────────"
                tail -f "$LOG_FILE"
            else
                echo "日志文件不存在: $LOG_FILE"
            fi
            ;;
        *)
            # 无命令时前台运行(可Ctrl+C停止)
            check_pid
            foreground_main_loop
            ;;
    esac
}

# 启动主程序
main "$@"
评论
后发表回复

作者

文章

阅读量

获赞

扫一扫
联系客服