Docker生产环境深度实战:镜像优化、安全加固与K8s集成指南

容器技术已成为现代应用部署的标准,但在生产环境中,Docker的使用远不止于简单的docker run。本文将深入探讨生产级Docker的最佳实践,从镜像安全到运行时防护,提供完整的解决方案。

图片[1]-Docker生产环境深度实战:镜像优化、安全加固与K8s集成指南

一、生产级Docker镜像构建与优化

(1) 安全加固的多阶段构建模式

# 生产级安全加固Dockerfile
# 阶段1: 安全扫描与依赖检查
FROM aquasec/trivy:latest AS security-scanner
WORKDIR /src
COPY . .
# 扫描操作系统漏洞
RUN trivy filesystem --exit-code 1 --no-progress /

# 阶段2: 依赖构建环境
FROM node:18-alpine AS builder
RUN apk add --no-cache \
    python3 \
    make \
    g++ \
    git

# 安装安全工具
RUN wget -q -O /etc/apk/keys/sgerrand.rsa.pub https://alpine-pkgs.sgerrand.com/sgerrand.rsa.pub && \
    wget https://github.com/sgerrand/alpine-pkg-glibc/releases/download/2.35-r0/glibc-2.35-r0.apk && \
    apk add glibc-2.35-r0.apk

WORKDIR /app

# 复制package文件并安装依赖
COPY package*.json ./
COPY yarn.lock ./

# 安全扫描依赖
RUN npx audit-ci --critical --allowlist 1234567

RUN yarn install --frozen-lockfile --production=false --ignore-scripts

# 复制源代码
COPY . .

# 运行安全测试
RUN npm audit --audit-level=high

# 构建应用
RUN yarn build

# 阶段3: 生产运行时环境
FROM node:18-alpine AS production

# 创建非root用户
RUN addgroup -g 1001 -S nodejs && \
    adduser -S nextjs -u 1001 && \
    mkdir -p /app && \
    chown -R nextjs:nodejs /app

# 安装安全更新
RUN apk update && apk upgrade --no-cache

# 安装必要的运行时工具
RUN apk add --no-cache \
    curl \
    tini \
    ca-certificates

# 使用tini作为init进程处理信号
ENTRYPOINT ["/sbin/tini", "--"]

WORKDIR /app

# 从构建阶段复制文件并设置正确的权限
COPY --from=builder --chown=nextjs:nodejs /app/.next ./.next
COPY --from=builder --chown=nextjs:nodejs /app/public ./public
COPY --from=builder --chown=nextjs:nodejs /app/package.json ./
COPY --from=builder --chown=nextjs:nodejs /app/node_modules ./node_modules
COPY --from=builder --chown=nextjs:nodejs /app/next.config.js ./

# 切换到非root用户
USER nextjs

# 暴露端口
EXPOSE 3000

# 健康检查
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
    CMD curl -f http://localhost:3000/api/health || exit 1

# 安全上下文配置
# 设置只读根文件系统
RUN chmod -R g-w,o-rwx /app && \
    find /app -type d -exec chmod 755 {} \; && \
    find /app -type f -exec chmod 644 {} \;

# 启动应用
CMD ["yarn", "start"]

(2) 自动化安全扫描与CI/CD集成

# .github/workflows/docker-security.yml
name: Docker Security Scan

on:
  push:
    branches: [ main, develop ]
  pull_request:
    branches: [ main ]

env:
  REGISTRY: ghcr.io
  IMAGE_NAME: ${{ github.repository }}

jobs:
  security-scan:
    name: Security Scanning
    runs-on: ubuntu-latest
    
    steps:
    - name: Checkout code
      uses: actions/checkout@v4
      
    - name: Set up Docker Buildx
      uses: docker/setup-buildx-action@v3
      
    - name: Build Docker image
      uses: docker/build-push-action@v5
      with:
        context: .
        file: ./Dockerfile
        push: <strong>false</strong>
        tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:security-scan
        cache-from: type=gha
        cache-to: type=gha,mode=max
        
    - name: Run Trivy vulnerability scanner
      uses: aquasecurity/trivy-action@master
      with:
        image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:security-scan
        format: 'sarif'
        output: 'trivy-results.sarif'
        severity: 'CRITICAL,HIGH'
        
    - name: Upload Trivy scan results to GitHub Security tab
      uses: github/codeql-action/upload-sarif@v3
      if: always()
      with:
        sarif_file: 'trivy-results.sarif'
        
    - name: Run Hadolint Dockerfile linter
      uses: hadolint/hadolint-action@v3.1.0
      with:
        dockerfile: ./Dockerfile
        failure-threshold: warning
        
    - name: Check for secrets in code
      uses: gitleaks/gitleaks-action@v2
      with:
        config-path: .gitleaks.toml
        
    - name: Dependency audit
      run: |
        npm audit --audit-level=high
        npm fund
        
  build-and-push:
    name: Build and Push
    runs-on: ubuntu-latest
    needs: security-scan
    if: github.ref == 'refs/heads/main'
    
    steps:
    - name: Checkout code
      uses: actions/checkout@v4
      
    - name: Set up Docker Buildx
      uses: docker/setup-buildx-action@v3
      
    - name: Log in to Container Registry
      uses: docker/login-action@v3
      with:
        registry: ${{ env.REGISTRY }}
        username: ${{ github.actor }}
        password: ${{ secrets.GITHUB_TOKEN }}
        
    - name: Extract metadata
      id: meta
      uses: docker/metadata-action@v5
      with:
        images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
        tags: |
          type=ref,event=branch
          type=ref,event=pr
          type=semver,pattern={{version}}
          type=semver,pattern={{major}}.{{minor}}
          type=sha,prefix={{branch}}-
          
    - name: Build and push Docker image
      uses: docker/build-push-action@v5
      with:
        context: .
        file: ./Dockerfile
        push: <strong>true</strong>
        tags: ${{ steps.meta.outputs.tags }}
        labels: ${{ steps.meta.outputs.labels }}
        cache-from: type=gha
        cache-to: type=gha,mode=max
        platforms: linux/amd64,linux/arm64
        
    - name: Deploy to Kubernetes
      uses: steebchen/kubectl@v2
      with:
        config: ${{ secrets.KUBECONFIG }}
        command: |
          set -x
          kubectl set image deployment/my-app app=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}
          kubectl rollout status deployment/my-app

二、容器运行时安全与监控

(1) Docker守护进程安全配置

<strong>#!/bin/bash</strong>
# docker-security-hardening.sh
# Docker生产环境安全加固脚本

set -euo pipefail

# 颜色定义
RED='3[0;31m'
GREEN='3[0;32m'
YELLOW='3[1;33m'
NC='3[0m'

log() {
    echo -e "${GREEN}[$(date '+%Y-%m-%d %H:%M:%S')]${NC} $1"
}

warn() {
    echo -e "${YELLOW}[WARNING] $1${NC}"
}

error() {
    echo -e "${RED}[ERROR] $1${NC}"
}

# 备份原始配置
backup_docker_config() {
    if [ -f /etc/docker/daemon.json ]; then
        cp /etc/docker/daemon.json /etc/docker/daemon.json.backup.$(date +%Y%m%d_%H%M%S)
        log "Docker配置已备份"
    fi
}

# 配置安全的Docker守护进程
configure_docker_daemon() {
    log "配置安全的Docker守护进程..."
    
    cat > /etc/docker/daemon.json << EOF
{
    "userns-remap": "default",
    "log-driver": "json-file",
    "log-opts": {
        "max-size": "10m",
        "max-file": "3"
    },
    "live-restore": true,
    "icc": false,
    "userland-proxy": false,
    "no-new-privileges": true,
    "selinux-enabled": true,
    "apparmor-profile": "docker-default",
    "cgroup-parent": "docker.slice",
    "storage-driver": "overlay2",
    "storage-opts": [
        "overlay2.override_kernel_check=true"
    ],
    "default-ulimits": {
        "nofile": {
            "Name": "nofile",
            "Hard": 65536,
            "Soft": 65536
        },
        "nproc": {
            "Name": "nproc",
            "Hard": 2048,
            "Soft": 1024
        }
    },
    "tls": true,
    "tlsverify": true,
    "tlscacert": "/etc/docker/ca.pem",
    "tlscert": "/etc/docker/server-cert.pem",
    "tlskey": "/etc/docker/server-key.pem",
    "hosts": [
        "fd://",
        "tcp://0.0.0.0:2376"
    ],
    "experimental": false,
    "metrics-addr": "0.0.0.0:9323",
    "authorization-plugins": ["docker.io/harbor/authorization-plugin:latest"]
}
EOF

    log "Docker守护进程安全配置完成"
}

# 配置用户命名空间映射
configure_user_namespace() {
    log "配置用户命名空间映射..."
    
    if ! grep -q "dockremap" /etc/subuid; then
        echo "dockremap:100000:65536" >> /etc/subuid
    fi
    
    if ! grep -q "dockremap" /etc/subgid; then
        echo "dockremap:100000:65536" >> /etc/subgid
    fi
    
    log "用户命名空间映射配置完成"
}

# 配置容器安全策略
configure_security_policies() {
    log "配置容器安全策略..."
    
    # 安装AppArmor配置文件
    cat > /etc/apparmor.d/docker-containers << 'EOF'
#include <tunables/global>

profile docker-containers flags=(attach_disconnected,mediate_deleted) {
  #include <abstractions/base>
  
  network inet stream,
  network inet6 stream,
  
  capability chown,
  capability dac_override,
  capability setuid,
  capability setgid,
  capability net_bind_service,
  
  deny @{PROC}/sysrq-trigger rwklx,
  deny @{PROC}/mem rwklx,
  deny @{PROC}/kmem rwklx,
  deny @{PROC}/kcore rwklx,
  
  deny mount,
  
  deny /sys/[^f]*/** wklx,
  deny /sys/f[^s]*/** wklx,
  deny /sys/fs/[^c]*/** wklx,
  deny /sys/fs/c[^g]*/** wklx,
  
  deny /sys/kernel/security/** rwklx,
}
EOF

    apparmor_parser -r /etc/apparmor.d/docker-containers
    
    log "AppArmor安全策略配置完成"
}

# 配置Docker守护进程系统服务
configure_docker_service() {
    log "配置Docker系统服务..."
    
    cat > /etc/systemd/system/docker.service.d/security.conf << EOF
[Service]
LimitNOFILE=1048576
LimitNPROC=2048
LimitCORE=0
TasksMax=2048
Delegate=yes
CPUAccounting=yes
MemoryAccounting=yes
BlockIOAccounting=yes
EOF

    systemctl daemon-reload
    log "Docker系统服务配置完成"
}

# 安装容器安全监控工具
install_security_tools() {
    log "安装容器安全监控工具..."
    
    # 安装Falco - 容器运行时安全监控
    curl -s https://falco.org/repo/falcosecurity-3672BA8F.asc | apt-key add -
    echo "deb https://download.falco.org/packages/deb stable main" | tee -a /etc/apt/sources.list.d/falcosecurity.list
    apt-get update
    apt-get install -y falco
    
    # 配置Falco规则
    cat > /etc/falco/falco_rules.local.yaml << EOF
- rule: Write below binary dir
  desc: 尝试在二进制目录中写入文件
  condition: >
    bin_dir and evt.dir = < and open_write
    and not user_known_write_below_binary_dir_activities
  output: >
    文件 below binary dir written (user=%user.name command=%proc.cmdline file=%fd.name)
  priority: ERROR
  tags: [filesystem]
  
- rule: Launch privileged container
  desc: 检测特权容器启动
  condition: >
    container_started and container.privileged=true
  output: >
    特权容器启动 (user=%user.name command=%proc.cmdline %container.info)
  priority: ERROR
  tags: [container]
EOF

    systemctl enable falco
    systemctl start falco
    
    log "容器安全监控工具安装完成"
}

# 配置容器网络安全性
configure_network_security() {
    log "配置容器网络安全性..."
    
    # 创建安全的Docker网络
    docker network create \
        --driver bridge \
        --opt com.docker.network.bridge.name=docker-security \
        --opt com.docker.network.bridge.enable_icc=false \
        --opt com.docker.network.bridge.enable_ip_masquerade=true \
        security-net
        
    log "安全网络配置完成"
}

# 主执行函数
main() {
    log "开始Docker生产环境安全加固..."
    
    # 检查root权限
    if [ "$EUID" -ne 0 ]; then
        error "请使用root权限运行此脚本"
        exit 1
    fi
    
    backup_docker_config
    configure_user_namespace
    configure_docker_daemon
    configure_security_policies
    configure_docker_service
    install_security_tools
    configure_network_security
    
    log "Docker安全加固完成,请重启Docker服务: systemctl restart docker"
    warn "注意:重启Docker服务会影响正在运行的容器"
}

# 执行主函数
main "$@"

(2) 容器运行时安全监控系统

# docker-compose.security.yml
version: '3.8'

x-security: <strong>&default-security</strong>
  security_opt:
    - no-new-privileges:<strong>true</strong>
  cap_drop:
    - ALL
  cap_add:
    - CHOWN
    - DAC_OVERRIDE
    - SETGID
    - SETUID
    - NET_BIND_SERVICE
  read_only: <strong>true</strong>
  tmpfs:
    - /tmp:rw,noexec,nosuid,size=64m

services:
  # 主应用容器
  webapp:
    build: .
    image: my-registry/webapp:${TAG:-latest}
    container_name: webapp-secure
    restart: unless-stopped
    
    <<: <strong>*default-security</strong>
    
    # 额外的安全配置
    security_opt:
      - no-new-privileges:<strong>true</strong>
      - apparmor:docker-containers
    privileged: <strong>false</strong>
    network_mode: "security-net"
    
    environment:
      - NODE_ENV=production
      - READONLY_FILESYSTEM=true
      
    volumes:
      - app-data:/app/data:rw
      - /etc/localtime:/etc/localtime:ro
      
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s
      
    logging:
      driver: "json-file"
      options:
        max-size: "10m"
        max-file: "3"
        labels: "production,security"
        
    deploy:
      resources:
        limits:
          memory: 512M
          cpus: '1.0'
        reservations:
          memory: 256M
          cpus: '0.5'

  # Falco安全监控
  falco:
    image: falcosecurity/falco:latest
    container_name: falco-security
    privileged: <strong>true</strong>
    restart: unless-stopped
    
    volumes:
      - /var/run/docker.sock:/host/var/run/docker.sock
      - /dev:/host/dev:ro
      - /proc:/host/proc:ro
      - /boot:/host/boot:ro
      - /lib/modules:/host/lib/modules:ro
      - /usr:/host/usr:ro
      - /etc:/host/etc:ro
      - ./falco/rules.yaml:/etc/falco/falco_rules.yaml:ro
      - ./falco/alerts.log:/var/log/falco.log:rw
        
    environment:
      - HOST_ROOT=/
      - FALCO_BPF_PROBE=""
      
    cap_add:
      - SYS_PTRACE
      - SYS_ADMIN
      
    command:
      - /usr/bin/falco
      - --cri
      - /var/run/docker.sock
      - -K
      - /var/run/docker.sock
      - --pkgsource=auto
      
    logging:
      driver: "json-file"
      options:
        max-size: "100m"
        max-file: "5"

  # 容器安全扫描器
  trivy:
    image: aquasec/trivy:latest
    container_name: trivy-scanner
    restart: on-failure
    
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock
      - ./trivy/cache:/root/.cache:rw
      - ./trivy/reports:/reports:rw
      
    command: >
      sh -c "
      while true; do
        trivy image --format json --output /reports/scan-$(date +%Y%m%d-%H%M%S).json my-registry/webapp:latest
        sleep 3600
      done"
      
    logging:
      driver: "json-file"
      options:
        max-size: "10m"
        max-file: "3"

  # 安全事件处理器
  security-handler:
    image: python:3.9-alpine
    container_name: security-handler
    restart: unless-stopped
    
    volumes:
      - ./security-handler:/app
      - ./falco/alerts.log:/var/log/falco.log:ro
      
    working_dir: /app
    command: python security_handler.py
    
    environment:
      - SLACK_WEBHOOK_URL=${SLACK_WEBHOOK_URL}
      - EMAIL_SERVER=${EMAIL_SERVER}
      - LOG_LEVEL=INFO
      
    depends_on:
      - falco

volumes:
  app-data:
    driver: local
    driver_opts:
      type: none
      o: bind
      device: /opt/app/data

networks:
  default:
    name: security-net
    external: <strong>true</strong>

三、Kubernetes生产级Docker集成

(1) 安全的Kubernetes部署配置

# k8s/secure-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: webapp-secure
  namespace: production
  labels:
    app: webapp
    security-tier: high
spec:
  replicas: 3
  revisionHistoryLimit: 3
  selector:
    matchLabels:
      app: webapp
  strategy:
    type: RollingUpdate
    rollingUpdate:
      maxSurge: 1
      maxUnavailable: 0
  template:
    metadata:
      labels:
        app: webapp
        version: v1.0.0
      annotations:
        # 安全相关注解
        container.apparmor.security.beta.kubernetes.io/webapp: runtime/default
        seccomp.security.alpha.kubernetes.io/pod: runtime/default
        # 监控注解
        prometheus.io/scrape: "true"
        prometheus.io/port: "3000"
        prometheus.io/path: "/metrics"
    spec:
      # Pod安全上下文
      securityContext:
        runAsNonRoot: <strong>true</strong>
        runAsUser: 1001
        runAsGroup: 1001
        fsGroup: 1001
        supplementalGroups: [1001]
        seccompProfile:
          type: RuntimeDefault
        # 阻止权限升级
        allowPrivilegeEscalation: <strong>false</strong>
        # 只读根文件系统
        readOnlyRootFilesystem: <strong>true</strong>
      # 服务账户
      serviceAccountName: webapp-sa
      automountServiceAccountToken: <strong>false</strong>
      # 节点选择器
      nodeSelector:
        kubernetes.io/arch: amd64
        security: enforced
      # 容忍度
      tolerations:
      - key: "security"
        operator: "Equal"
        value: "enforced"
        effect: "NoSchedule"
      # 亲和性
      affinity:
        podAntiAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
          - weight: 100
            podAffinityTerm:
              labelSelector:
                matchExpressions:
                - key: app
                  operator: In
                  values:
                  - webapp
              topologyKey: "kubernetes.io/hostname"
      containers:
      - name: webapp
        image: my-registry/webapp:${IMAGE_TAG}
        imagePullPolicy: IfNotPresent
        # 容器安全上下文
        securityContext:
          allowPrivilegeEscalation: <strong>false</strong>
          capabilities:
            drop:
              - ALL
            add:
              - CHOWN
              - DAC_OVERRIDE
              - SETGID
              - SETUID
          readOnlyRootFilesystem: <strong>true</strong>
          runAsNonRoot: <strong>true</strong>
          runAsUser: 1001
          runAsGroup: 1001
          seccompProfile:
            type: RuntimeDefault
        ports:
        - containerPort: 3000
          protocol: TCP
        env:
        - name: NODE_ENV
          value: "production"
        - name: READONLY_FILESYSTEM
          value: "true"
        # 环境变量从Secret读取
        - name: DATABASE_URL
          valueFrom:
            secretKeyRef:
              name: app-secrets
              key: database-url
        - name: JWT_SECRET
          valueFrom:
            secretKeyRef:
              name: app-secrets
              key: jwt-secret
        resources:
          requests:
            memory: "256Mi"
            cpu: "250m"
          limits:
            memory: "512Mi"
            cpu: "500m"
        livenessProbe:
          httpGet:
            path: /health
            port: 3000
            scheme: HTTP
          initialDelaySeconds: 30
          periodSeconds: 10
          timeoutSeconds: 5
          failureThreshold: 3
        readinessProbe:
          httpGet:
            path: /ready
            port: 3000
            scheme: HTTP
          initialDelaySeconds: 5
          periodSeconds: 5
          timeoutSeconds: 3
          failureThreshold: 1
        startupProbe:
          httpGet:
            path: /health
            port: 3000
            scheme: HTTP
          initialDelaySeconds: 10
          periodSeconds: 10
          failureThreshold: 10
        volumeMounts:
        - name: app-data
          mountPath: /app/data
          readOnly: <strong>false</strong>
        - name: tmp-volume
          mountPath: /tmp
        - name: config-volume
          mountPath: /app/config
          readOnly: <strong>true</strong>
        # 生命周期钩子
        lifecycle:
          preStop:
            exec:
              command: ["/bin/sh", "-c", "sleep 10"]
      volumes:
      - name: app-data
        persistentVolumeClaim:
          claimName: webapp-data-pvc
      - name: tmp-volume
        emptyDir:
          medium: Memory
          sizeLimit: 64Mi
      - name: config-volume
        configMap:
          name: webapp-config
          defaultMode: 0644
---
# Pod安全策略
apiVersion: policy/v1beta1
kind: PodSecurityPolicy
metadata:
  name: restricted-psp
  annotations:
    seccomp.security.alpha.kubernetes.io/allowedProfileNames: 'runtime/default'
    apparmor.security.beta.kubernetes.io/allowedProfileNames: 'runtime/default'
    seccomp.security.alpha.kubernetes.io/defaultProfileName: 'runtime/default'
    apparmor.security.beta.kubernetes.io/defaultProfileName: 'runtime/default'
spec:
  privileged: <strong>false</strong>
  allowPrivilegeEscalation: <strong>false</strong>
  requiredDropCapabilities:
    - ALL
  volumes:
    - 'configMap'
    - 'emptyDir'
    - 'projected'
    - 'secret'
    - 'downwardAPI'
    - 'persistentVolumeClaim'
  hostNetwork: <strong>false</strong>
  hostIPC: <strong>false</strong>
  hostPID: <strong>false</strong>
  runAsUser:
    rule: 'MustRunAsNonRoot'
  seLinux:
    rule: 'RunAsAny'
  supplementalGroups:
    rule: 'MustRunAs'
    ranges:
      - min: 1
        max: 65535
  fsGroup:
    rule: 'MustRunAs'
    ranges:
      - min: 1
        max: 65535
  readOnlyRootFilesystem: <strong>true</strong>
---
# 网络策略
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
  name: webapp-network-policy
  namespace: production
spec:
  podSelector:
    matchLabels:
      app: webapp
  policyTypes:
  - Ingress
  - Egress
  ingress:
  - from:
    - namespaceSelector:
        matchLabels:
          name: ingress-nginx
    - podSelector:
        matchLabels:
          app: monitoring
    ports:
    - protocol: TCP
      port: 3000
  egress:
  - to:
    - podSelector:
        matchLabels:
          app: database
    ports:
    - protocol: TCP
      port: 5432
  - to:
    - podSelector:
        matchLabels:
          app: redis
    ports:
    - protocol: TCP
      port: 6379
  - to:
    - namespaceSelector:
        matchLabels:
          name: monitoring
    ports:
    - protocol: TCP
      port: 9090
---
# 服务账户
apiVersion: v1
kind: ServiceAccount
metadata:
  name: webapp-sa
  namespace: production
automountServiceAccountToken: <strong>false</strong>
---
# 角色绑定
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  name: webapp-rolebinding
  namespace: production
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: webapp-role
subjects:
- kind: ServiceAccount
  name: webapp-sa
  namespace: production

(2) 持续安全监控与合规检查

<strong>#!/bin/bash</strong>
# k8s-security-audit.sh
# Kubernetes集群安全审计脚本

set -euo pipefail

# 颜色定义
RED='3[0;31m'
GREEN='3[0;32m'
YELLOW='3[1;33m'
NC='3[0m'

log() {
    echo -e "${GREEN}[$(date '+%Y-%m-%d %H:%M:%S')]${NC} $1"
}

warn() {
    echo -e "${YELLOW}[WARNING] $1${NC}"
}

error() {
    echo -e "${RED}[ERROR] $1${NC}"
}

# 检查Kubectl配置
check_kubectl() {
    if ! command -v kubectl &> /dev/null; then
        error "kubectl未安装"
        exit 1
    fi
    
    if ! kubectl cluster-info &> /dev/null; then
        error "无法连接到Kubernetes集群"
        exit 1
    fi
    
    log "Kubernetes集群连接正常"
}

# 检查Pod安全状态
audit_pod_security() {
    log "检查Pod安全配置..."
    
    # 检查特权容器
    local privileged_pods=$(kubectl get pods --all-namespaces -o json | \
        jq -r '.items[] | select(.spec.containers[]?.securityContext?.privileged == true) | "\(.metadata.namespace)/\(.metadata.name)"')
    
    if [ -n "$privileged_pods" ]; then
        warn "发现特权容器:"
        echo "$privileged_pods"
    else
        log "✓ 无特权容器"
    fi
    
    # 检查root用户运行
    local root_pods=$(kubectl get pods --all-namespaces -o json | \
        jq -r '.items[] | select(.spec.containers[]?.securityContext?.runAsNonRoot != true) | "\(.metadata.namespace)/\(.metadata.name)"')
    
    if [ -n "$root_pods" ]; then
        warn "发现以root用户运行的容器:"
        echo "$root_pods"
    else
        log "✓ 所有容器均以非root用户运行"
    fi
    
    # 检查只读根文件系统
    local writable_root_pods=$(kubectl get pods --all-namespaces -o json | \
        jq -r '.items[] | select(.spec.containers[]?.securityContext?.readOnlyRootFilesystem != true) | "\(.metadata.namespace)/\(.metadata.name)"')
    
    if [ -n "$writable_root_pods" ]; then
        warn "发现可写根文件系统的容器:"
        echo "$writable_root_pods"
    else
        log "✓ 所有容器均使用只读根文件系统"
    fi
}

# 检查网络策略
audit_network_policies() {
    log "检查网络策略..."
    
    local namespaces=$(kubectl get namespaces -o jsonpath='{.items[*].metadata.name}')
    local no_network_policy_ns=()
    
    for ns in $namespaces; do
        if [ "$ns" != "kube-system" ] && [ "$ns" != "kube-public" ]; then
            local policy_count=$(kubectl get networkpolicies -n "$ns" -o name | wc -l)
            if [ "$policy_count" -eq 0 ]; then
                no_network_policy_ns+=("$ns")
            fi
        fi
    done
    
    if [ ${#no_network_policy_ns[@]} -gt 0 ]; then
        warn "以下命名空间缺少网络策略:"
        printf '%s\n' "${no_network_policy_ns[@]}"
    else
        log "✓ 所有命名空间均配置了网络策略"
    fi
}

# 检查RBAC配置
audit_rbac() {
    log "检查RBAC配置..."
    
    # 检查集群角色绑定
    local cluster_admin_bindings=$(kubectl get clusterrolebindings -o json | \
        jq -r '.items[] | select(.roleRef.name == "cluster-admin") | .subjects[]? | select(.kind == "ServiceAccount") | "\(.namespace)/\(.name)"')
    
    if [ -n "$cluster_admin_bindings" ]; then
        warn "发现ServiceAccount具有cluster-admin权限:"
        echo "$cluster_admin_bindings"
    else
        log "✓ 无ServiceAccount具有cluster-admin权限"
    fi
    
    # 检查Pod创建权限
    local pod_creators=$(kubectl get clusterrolebindings,rolebindings --all-namespaces -o json | \
        jq -r '.items[] | select(.roleRef.rules[]? | select(.verbs[]? | contains("create")) and select(.resources[]? | contains("pods"))) | .metadata.name')
    
    if [ -n "$pod_creators" ]; then
        log "具有Pod创建权限的RoleBinding:"
        echo "$pod_creators"
    fi
}

# 检查镜像安全
audit_image_security() {
    log "检查镜像安全..."
    
    # 使用Trivy扫描所有运行的镜像
    local images=$(kubectl get pods --all-namespaces -o jsonpath='{.items[*].spec.containers[*].image}' | tr ' ' '\n' | sort -u)
    
    for image in $images; do
        log "扫描镜像: $image"
        
        # 使用Trivy进行漏洞扫描
        docker run --rm \
            -v /var/run/docker.sock:/var/run/docker.sock \
            aquasec/trivy:latest \
            image --severity HIGH,CRITICAL --ignore-unfixed "$image" || true
    done
}

# 检查资源限制
audit_resource_limits() {
    log "检查资源限制..."
    
    local no_limits_pods=$(kubectl get pods --all-namespaces -o json | \
        jq -r '.items[] | select(.spec.containers[]?.resources?.limits == null) | "\(.metadata.namespace)/\(.metadata.name)"')
    
    if [ -n "$no_limits_pods" ]; then
        warn "发现未设置资源限制的Pod:"
        echo "$no_limits_pods"
    else
        log "✓ 所有Pod均设置了资源限制"
    fi
}

# 生成安全报告
generate_security_report() {
    log "生成安全审计报告..."
    
    local report_file="k8s-security-audit-$(date +%Y%m%d-%H%M%S).txt"
    
    {
        echo "Kubernetes安全审计报告"
        echo "生成时间: $(date)"
        echo "集群: $(kubectl config current-context)"
        echo "="*50
        echo
    } > "$report_file"
    
    # 收集各种检查结果
    audit_pod_security >> "$report_file" <strong>2</strong>><strong>&1</strong>
    echo >> "$report_file"
    
    audit_network_policies >> "$report_file" <strong>2</strong>><strong>&1</strong>
    echo >> "$report_file"
    
    audit_rbac >> "$report_file" <strong>2</strong>><strong>&1</strong>
    echo >> "$report_file"
    
    audit_resource_limits >> "$report_file" <strong>2</strong>><strong>&1</strong>
    
    log "安全审计报告已保存至: $report_file"
}

# 主执行函数
main() {
    log "开始Kubernetes集群安全审计..."
    
    check_kubectl
    audit_pod_security
    audit_network_policies
    audit_rbac
    audit_resource_limits
    generate_security_report
    
    log "安全审计完成"
}

# 执行主函数
main "$@"

总结

Docker在生产环境中的安全使用需要从镜像构建、运行时防护到Kubernetes集成的全方位考虑。通过实施安全的多阶段构建、配置安全的守护进程、集成持续安全扫描,以及在Kubernetes中实施严格的安全策略,可以构建出既高效又安全的容器化应用。

核心安全实践:

  1. 镜像安全:多阶段构建、非root用户、漏洞扫描
  2. 运行时安全:用户命名空间、安全策略、资源限制
  3. 网络安全:网络策略、服务网格、TLS加密
  4. Kubernetes集成:Pod安全上下文、RBAC、网络策略
  5. 持续监控:安全审计、合规检查、事件响应

【进阶方向】
探索零信任架构在容器环境的应用、机密容器的使用,以及基于策略的自动化安全防护,进一步提升容器环境的安全水位。

© 版权声明
THE END
喜欢就支持一下吧
点赞10 分享
评论 抢沙发

请登录后发表评论

    暂无评论内容