Linux网络连接故障全面排查:从端口检测到路由追踪的完整解决方案

Linux网络连接故障是运维工作中最棘手的问题之一,涉及DNS解析、端口连通性、防火墙配置、路由追踪等多个层面。本文通过真实生产环境案例,提供从基础检测到深度诊断的完整网络排查方案,帮助企业快速定位和解决网络连接问题。

图片[1]-Linux网络连接故障排查:快速解决端口不通、DNS解析失败和路由问题

一、网络基础连通性诊断

1. 基础网络状态检查

(1)全方位网络诊断脚本

<strong>#!/bin/bash</strong>
# network_basic_diagnosis.sh

echo "====== 网络基础连通性诊断 ======"
echo "诊断时间: $(date)"
echo ""

# 1. 网络接口状态检查
echo "1. 网络接口状态:"
ip addr show | grep -E "^( |[0-9])" | while read line; do
    if echo "$line" | grep -q "^[0-9]"; then
        interface=$(echo "$line" | awk -F: '{print $2}')
        state=$(echo "$line" | grep -o "state [A-Z]*" | awk '{print $2}')
        echo "接口: $interface, 状态: $state"
    fi
done

# 2. 默认网关检查
echo -e "\n2. 路由表信息:"
ip route show | grep default || echo "警告: 未找到默认路由"

# 3. DNS解析测试
echo -e "\n3. DNS解析测试:"
test_domains=("google.com" "baidu.com" "github.com")
for domain in "${test_domains[@]}"; do
    if nslookup "$domain" >/dev/null <strong>2</strong>><strong>&1</strong>; then
        echo "✅ $domain 解析成功"
    else
        echo "❌ $domain 解析失败"
    fi
done

# 4. 基础连通性测试
echo -e "\n4. 网络连通性测试:"
test_ips=("8.8.8.8" "114.114.114.114" "1.1.1.1")
for ip in "${test_ips[@]}"; do
    if ping -c 2 -W 1 "$ip" >/dev/null <strong>2</strong>><strong>&1</strong>; then
        echo "✅ $ip 可达"
    else
        echo "❌ $ip 不可达"
    fi
done

# 5. 网络连接统计
echo -e "\n5. 网络连接统计:"
ss -s | head -3

(2)网络接口深度分析

<strong>#!/bin/bash</strong>
# network_interface_analysis.sh

echo "====== 网络接口深度分析 ======"
echo ""

# 检查所有网络接口
analyze_interfaces() {
    echo "1. 网络接口详细状态:"
    
    for interface in $(ip link show | grep -E "^[0-9]+:" | awk -F: '{print $2}' | tr -d ' '); do
        echo "接口: $interface"
        
        # 接口状态
        state=$(ip link show "$interface" | grep -o "state [A-Z]*" | awk '{print $2}')
        echo "  状态: $state"
        
        # IP地址信息
        ip_addr=$(ip addr show "$interface" | grep "inet " | awk '{print $2}')
        if [ -n "$ip_addr" ]; then
            echo "  IP地址: $ip_addr"
        else
            echo "  IP地址: 未配置"
        fi
        
        # MAC地址
        mac_addr=$(ip link show "$interface" | grep "link/ether" | awk '{print $2}')
        if [ -n "$mac_addr" ]; then
            echo "  MAC地址: $mac_addr"
        fi
        
        # 接口统计信息
        echo "  接口统计:"
        ip -s link show "$interface" | grep -A 3 "RX:" | head -3 | while read stat; do
            echo "    $stat"
        done
        
        echo ""
    done
}

# 检查网络配置文件
check_network_config() {
    echo "2. 网络配置文件检查:"
    
    config_files=(
        "/etc/network/interfaces"
        "/etc/sysconfig/network-scripts/ifcfg-*"
        "/etc/netplan/*.yaml"
        "/etc/systemd/network/*.network"
    )
    
    for pattern in "${config_files[@]}"; do
        for file in $pattern; do
            if [ -f "$file" ]; then
                echo "配置文件: $file"
                if [ "$(stat -c %a "$file")" != "600" ] && [ "$(stat -c %a "$file")" != "644" ]; then
                    echo "  警告: 文件权限异常 - $(stat -c %a "$file")"
                fi
            fi
        done
    done
}

# 检查网络服务状态
check_network_services() {
    echo -e "\n3. 网络服务状态:"
    
    services=("NetworkManager" "network" "systemd-networkd" "networking")
    
    for service in "${services[@]}"; do
        if systemctl is-active "$service" >/dev/null <strong>2</strong>><strong>&1</strong>; then
            echo "✅ $service: 运行中"
        elif systemctl is-enabled "$service" >/dev/null <strong>2</strong>><strong>&1</strong>; then
            echo "⚠️  $service: 已启用但未运行"
        else
            echo "❌ $service: 未启用"
        fi
    done
}

analyze_interfaces
check_network_config
check_network_services

二、端口连通性与服务诊断

1. 端口监听与连接检测

(1)全方位端口扫描诊断

<strong>#!/bin/bash</strong>
# port_connectivity_diagnosis.sh

echo "====== 端口连通性深度诊断 ======"
echo ""

# 检查本地端口监听
check_local_listening_ports() {
    echo "1. 本地监听端口检查:"
    
    echo "TCP监听端口:"
    ss -tlnp | while read line; do
        if echo "$line" | grep -q "LISTEN"; then
            port=$(echo "$line" | awk '{print $4}' | awk -F: '{print $NF}')
            process=$(echo "$line" | awk '{print $7}')
            echo "  端口: $port, 进程: $process"
        fi
    done | sort -n | head -20
    
    echo -e "\nUDP监听端口:"
    ss -ulnp | while read line; do
        if echo "$line" | grep -q "UNCONN"; then
            port=$(echo "$line" | awk '{print $4}' | awk -F: '{print $NF}')
            process=$(echo "$line" | awk '{print $7}')
            echo "  端口: $port, 进程: $process"
        fi
    done | sort -n | head -20
}

# 检查端口连通性
check_port_connectivity() {
    local target_host=$1
    local ports=("22" "80" "443" "53" "3306" "5432" "6379")
    
    echo -e "\n2. 远程端口连通性测试 ($target_host):"
    
    for port in "${ports[@]}"; do
        if command -v nc >/dev/null <strong>2</strong>><strong>&1</strong>; then
            if nc -z -w 2 "$target_host" "$port" >/dev/null <strong>2</strong>><strong>&1</strong>; then
                echo "✅ 端口 $port: 开放"
            else
                echo "❌ 端口 $port: 关闭"
            fi
        else
            echo "⚠️  nc命令不可用,跳过端口测试"
            break
        fi
    done
}

# 检查常见服务状态
check_service_status() {
    echo -e "\n3. 常见网络服务状态:"
    
    services=(
        "sshd:22"
        "nginx:80" 
        "apache2:80"
        "httpd:80"
        "mysql:3306"
        "postgresql:5432"
        "redis:6379"
    )
    
    for service_info in "${services[@]}"; do
        service=$(echo "$service_info" | cut -d: -f1)
        port=$(echo "$service_info" | cut -d: -f2)
        
        if systemctl is-active "$service" >/dev/null <strong>2</strong>><strong>&1</strong>; then
            echo "✅ $service: 运行中 (端口: $port)"
        elif pgrep "$service" >/dev/null; then
            echo "⚠️  $service: 进程存在但服务未管理 (端口: $port)"
        else
            echo "❌ $service: 未运行"
        fi
    done
}

# 检查端口冲突
check_port_conflicts() {
    echo -e "\n4. 端口冲突检查:"
    
    ss -tulnp | awk '{print $5}' | grep -E ":[0-9]+" | awk -F: '{print $NF}' | sort -n | uniq -d | while read port; do
        echo "⚠️  端口冲突: $port"
        echo "占用进程:"
        ss -tulnp | grep ":$port " | awk '{print "  " $7}'
    done
}

main() {
    local target_host=${1:-"google.com"}
    
    check_local_listening_ports
    check_port_connectivity "$target_host"
    check_service_status
    check_port_conflicts
}

main "$@"

2. 网络服务故障排查

(1)服务级网络诊断

<strong>#!/bin/bash</strong>
# service_level_network_diagnosis.sh

echo "====== 网络服务级深度诊断 ======"
echo ""

# Web服务诊断
diagnose_web_service() {
    local url=${1:-"http://localhost"}
    
    echo "1. Web服务诊断 ($url):"
    
    if command -v curl >/dev/null <strong>2</strong>><strong>&1</strong>; then
        # HTTP响应检查
        echo "HTTP响应测试:"
        http_code=$(curl -s -o /dev/null -w "%{http_code}" "$url")
        echo "  状态码: $http_code"
        
        # 响应时间
        response_time=$(curl -s -o /dev/null -w "%{time_total}s" "$url")
        echo "  响应时间: $response_time"
        
        # SSL证书检查(如果是HTTPS)
        if [[ "$url" == https* ]]; then
            echo "SSL证书检查:"
            curl -s -o /dev/null -w "SSL验证: %{ssl_verify_result}\n" "$url"
        fi
    else
        echo "⚠️  curl不可用,跳过Web服务测试"
    fi
}

# 数据库连接诊断
diagnose_database_connection() {
    echo -e "\n2. 数据库连接诊断:"
    
    # MySQL连接测试
    if command -v mysql >/dev/null <strong>2</strong>><strong>&1</strong>; then
        if mysql -h localhost -u root -e "SELECT 1;" >/dev/null <strong>2</strong>><strong>&1</strong>; then
            echo "✅ MySQL: 连接正常"
        else
            echo "❌ MySQL: 连接失败"
        fi
    fi
    
    # PostgreSQL连接测试
    if command -v psql >/dev/null <strong>2</strong>><strong>&1</strong>; then
        if psql -h localhost -U postgres -c "SELECT 1;" >/dev/null <strong>2</strong>><strong>&1</strong>; then
            echo "✅ PostgreSQL: 连接正常"
        else
            echo "❌ PostgreSQL: 连接失败"
        fi
    fi
    
    # Redis连接测试
    if command -v redis-cli >/dev/null <strong>2</strong>><strong>&1</strong>; then
        if redis-cli ping >/dev/null <strong>2</strong>><strong>&1</strong>; then
            echo "✅ Redis: 连接正常"
        else
            echo "❌ Redis: 连接失败"
        fi
    fi
}

# DNS服务诊断
diagnose_dns_service() {
    echo -e "\n3. DNS服务诊断:"
    
    # 检查本地DNS解析
    if systemctl is-active systemd-resolved >/dev/null <strong>2</strong>><strong>&1</strong>; then
        echo "systemd-resolved: 运行中"
        resolvectl status | grep -A 5 "DNS Servers"
    fi
    
    # 检查DNS配置
    echo -e "\nDNS服务器配置:"
    cat /etc/resolv.conf | grep nameserver
    
    # DNS解析测试
    echo -e "\nDNS解析测试:"
    local test_domains=("localhost" "$(hostname)" "google.com")
    for domain in "${test_domains[@]}"; do
        if dig +short "$domain" >/dev/null <strong>2</strong>><strong>&1</strong>; then
            echo "✅ $domain: 解析成功"
        else
            echo "❌ $domain: 解析失败"
        fi
    done
}

# 邮件服务诊断
diagnose_mail_service() {
    echo -e "\n4. 邮件服务诊断:"
    
    # SMTP服务检查
    if nc -z localhost 25 >/dev/null <strong>2</strong>><strong>&1</strong>; then
        echo "✅ SMTP (25): 服务运行中"
    else
        echo "❌ SMTP (25): 服务未运行"
    fi
    
    # 如果安装了postfix
    if command -v postfix >/dev/null <strong>2</strong>><strong>&1</strong>; then
        echo "Postfix状态:"
        postfix status <strong>2</strong>>/dev/null || echo "Postfix未运行"
    fi
}

main() {
    local web_url=${1:-"http://localhost"}
    
    diagnose_web_service "$web_url"
    diagnose_database_connection
    diagnose_dns_service
    diagnose_mail_service
}

main "$@"

三、防火墙与安全策略排查

1. 防火墙规则深度分析

(1)多防火墙系统检测

<strong>#!/bin/bash</strong>
# firewall_comprehensive_check.sh

echo "====== 防火墙全面检查 ======"
echo ""

# 检查iptables
check_iptables() {
    echo "1. iptables规则检查:"
    
    if command -v iptables >/dev/null <strong>2</strong>><strong>&1</strong>; then
        echo "IPv4规则:"
        iptables -L -n --line-numbers | head -20
        
        if command -v ip6tables >/dev/null <strong>2</strong>><strong>&1</strong>; then
            echo -e "\nIPv6规则:"
            ip6tables -L -n --line-numbers | head -10
        fi
    else
        echo "iptables不可用"
    fi
}

# 检查firewalld
check_firewalld() {
    echo -e "\n2. firewalld检查:"
    
    if systemctl is-active firewalld >/dev/null <strong>2</strong>><strong>&1</strong>; then
        echo "firewalld状态: 运行中"
        echo "默认区域: $(firewall-cmd --get-default-zone)"
        echo "活跃区域: $(firewall-cmd --get-active-zones | head -5)"
        
        echo -e "\n开放服务:"
        firewall-cmd --list-services
        
        echo -e "\n开放端口:"
        firewall-cmd --list-ports
    else
        echo "firewalld: 未运行"
    fi
}

# 检查UFW
check_ufw() {
    echo -e "\n3. UFW检查:"
    
    if command -v ufw >/dev/null <strong>2</strong>><strong>&1</strong>; then
        ufw status verbose
    else
        echo "UFW: 未安装"
    fi
}

# 检查SELinux
check_selinux() {
    echo -e "\n4. SELinux检查:"
    
    if command -v sestatus >/dev/null <strong>2</strong>><strong>&1</strong>; then
        sestatus | head -3
    else
        echo "SELinux: 未安装"
    fi
    
    if command -v getenforce >/dev/null <strong>2</strong>><strong>&1</strong>; then
        echo "当前模式: $(getenforce)"
    fi
}

# 检查网络连接阻止
check_network_blocks() {
    echo -e "\n5. 网络连接阻止检查:"
    
    # 检查被拒绝的连接
    echo "最近被拒绝的连接:"
    ss -tul4 | grep -E "(LISTEN|UNCONN)" | while read line; do
        if echo "$line" | grep -q "0.0.0.0"; then
            echo "监听: $line"
        fi
    done | head -10
}

check_iptables
check_firewalld
check_ufw
check_selinux
check_network_blocks

四、网络性能与路由分析

1. 网络质量深度测试

(1)全方位网络性能诊断

<strong>#!/bin/bash</strong>
# network_performance_diagnosis.sh

echo "====== 网络性能深度诊断 ======"
echo ""

# 带宽和延迟测试
perform_bandwidth_test() {
    echo "1. 网络带宽和延迟测试:"
    
    local test_hosts=("8.8.8.8" "1.1.1.1" "114.114.114.114")
    
    for host in "${test_hosts[@]}"; do
        echo "测试目标: $host"
        
        # 延迟测试
        if ping -c 4 -W 1 "$host" >/dev/null <strong>2</strong>><strong>&1</strong>; then
            avg_latency=$(ping -c 4 -W 1 "$host" | grep "avg" | awk -F'/' '{print $5}')
            echo "  平均延迟: ${avg_latency}ms"
        else
            echo "  延迟: 不可达"
        fi
        
        # 带宽测试(简单版)
        if command -v iperf3 >/dev/null <strong>2</strong>><strong>&1</strong>; then
            echo "  运行iperf3测试..."
        else
            echo "  安装iperf3进行带宽测试: apt-get install iperf3"
        fi
    done
}

# 路由追踪分析
perform_traceroute_analysis() {
    local target_host=${1:-"google.com"}
    
    echo -e "\n2. 路由追踪分析 ($target_host):"
    
    if command -v traceroute >/dev/null <strong>2</strong>><strong>&1</strong>; then
        echo "路由路径:"
        traceroute -m 15 -w 1 "$target_host" <strong>2</strong>>/dev/null | head -20
    elif command -v tracepath >/dev/null <strong>2</strong>><strong>&1</strong>; then
        echo "路由路径 (tracepath):"
        tracepath "$target_host" | head -20
    else
        echo "安装traceroute或tracepath进行路由分析"
    fi
}

# 网络连接质量监控
monitor_connection_quality() {
    echo -e "\n3. 网络连接质量监控:"
    
    # 检查TCP连接状态
    echo "TCP连接状态统计:"
    ss -t -o state established | wc -l
    echo "个已建立连接"
    
    # 检查重传和错误
    echo -e "\n网络错误统计:"
    netstat -s | grep -E "segments retransmitted|packet receive errors" | head -5
}

# DNS性能测试
test_dns_performance() {
    echo -e "\n4. DNS性能测试:"
    
    local test_domains=("google.com" "github.com" "baidu.com")
    
    for domain in "${test_domains[@]}"; do
        echo "测试域名: $domain"
        
        # 查询时间测试
        if command -v dig >/dev/null <strong>2</strong>><strong>&1</strong>; then
            query_time=$(dig "$domain" | grep "Query time:" | awk '{print $4}')
            echo "  查询时间: ${query_time}ms"
        fi
    done
}

main() {
    local target_host=${1:-"google.com"}
    
    perform_bandwidth_test
    perform_traceroute_analysis "$target_host"
    monitor_connection_quality
    test_dns_performance
}

main "$@"

五、网络故障应急处理

1. 网络服务快速恢复

(1)网络故障应急脚本

<strong>#!/bin/bash</strong>
# network_emergency_recovery.sh

echo "====== 网络故障应急处理 ======"
echo ""

# 重启网络服务
restart_network_services() {
    echo "1. 重启网络服务..."
    
    # 根据系统类型选择服务
    if systemctl is-active NetworkManager >/dev/null <strong>2</strong>><strong>&1</strong>; then
        echo "重启NetworkManager..."
        systemctl restart NetworkManager
    fi
    
    if systemctl is-active network >/dev/null <strong>2</strong>><strong>&1</strong>; then
        echo "重启network服务..."
        systemctl restart network
    fi
    
    if systemctl is-active systemd-networkd >/dev/null <strong>2</strong>><strong>&1</strong>; then
        echo "重启systemd-networkd..."
        systemctl restart systemd-networkd
    fi
    
    # 重启网络接口
    echo "重启网络接口..."
    for interface in $(ip link show | grep "state UP" | awk -F: '{print $2}' | tr -d ' '); do
        echo "重启接口: $interface"
        ip link set "$interface" down
        sleep 2
        ip link set "$interface" up
    done
}

# 清除网络缓存
clear_network_cache() {
    echo -e "\n2. 清除网络缓存..."
    
    # 清除ARP缓存
    echo "清除ARP缓存..."
    ip neigh flush all
    
    # 清除路由缓存
    echo "清除路由缓存..."
    ip route flush cache
    
    # 重启DNS解析服务
    if systemctl is-active systemd-resolved >/dev/null <strong>2</strong>><strong>&1</strong>; then
        echo "重启systemd-resolved..."
        systemctl restart systemd-resolved
    fi
    
    # 清除DNS缓存
    echo "清除DNS缓存..."
    if command -v nscd >/dev/null <strong>2</strong>><strong>&1</strong>; then
        nscd -i hosts
    fi
}

# 临时防火墙规则调整
adjust_firewall_temporary() {
    echo -e "\n3. 临时防火墙调整..."
    
    read -p "是否临时禁用防火墙进行测试? (y/N): " -n 1 -r
    echo
    if [[ $REPLY =~ ^[Yy]$ ]]; then
        if systemctl is-active firewalld >/dev/null <strong>2</strong>><strong>&1</strong>; then
            echo "临时停止firewalld..."
            systemctl stop firewalld
        fi
        
        if command -v iptables >/dev/null <strong>2</strong>><strong>&1</strong>; then
            echo "设置临时iptables策略(允许所有)..."
            iptables -P INPUT ACCEPT
            iptables -P FORWARD ACCEPT
            iptables -P OUTPUT ACCEPT
            iptables -F
        fi
        
        echo "防火墙已临时禁用,记得重新启用!"
    fi
}

# 网络配置检查
check_network_configuration() {
    echo -e "\n4. 网络配置检查..."
    
    echo "当前IP配置:"
    ip addr show
    
    echo -e "\n路由表:"
    ip route show
    
    echo -e "\nDNS配置:"
    cat /etc/resolv.conf
}

# 执行应急处理
main() {
    echo "开始网络故障应急处理..."
    echo "警告: 这将重启网络服务和调整防火墙"
    
    read -p "确认继续? (y/N): " -n 1 -r
    echo
    if [[ ! $REPLY =~ ^[Yy]$ ]]; then
        echo "操作已取消"
        exit 1
    fi
    
    restart_network_services
    clear_network_cache
    adjust_firewall_temporary
    check_network_configuration
    
    echo -e "\n应急处理完成,请测试网络连接"
    echo "如果问题仍然存在,请检查物理连接和ISP状态"
}

main

总结

Linux网络连接故障排查需要系统化的方法和专业的工具链。通过本文提供的诊断脚本和排查方案,可以从基础连通性测试到深度性能分析,快速定位DNS解析、端口连通、防火墙策略、路由问题等网络故障。建议建立完善的网络监控体系,定期进行网络健康检查,并制定详细的应急处理流程。

© 版权声明
THE END
喜欢就支持一下吧
点赞12 分享
评论 抢沙发

请登录后发表评论

    暂无评论内容