容器技术已成为现代应用部署的标准,但在生产环境中,Docker的使用远不止于简单的docker run。本文将深入探讨生产级Docker的最佳实践,从镜像安全到运行时防护,提供完整的解决方案。
![图片[1]-Docker生产环境深度实战:镜像优化、安全加固与K8s集成指南](https://blogimg.vcvcc.cc/2025/11/20251119122718985-1024x768.png?imageView2/0/format/webp/q/75)
一、生产级Docker镜像构建与优化
(1) 安全加固的多阶段构建模式
# 生产级安全加固Dockerfile
# 阶段1: 安全扫描与依赖检查
FROM aquasec/trivy:latest AS security-scanner
WORKDIR /src
COPY . .
# 扫描操作系统漏洞
RUN trivy filesystem --exit-code 1 --no-progress /
# 阶段2: 依赖构建环境
FROM node:18-alpine AS builder
RUN apk add --no-cache \
python3 \
make \
g++ \
git
# 安装安全工具
RUN wget -q -O /etc/apk/keys/sgerrand.rsa.pub https://alpine-pkgs.sgerrand.com/sgerrand.rsa.pub && \
wget https://github.com/sgerrand/alpine-pkg-glibc/releases/download/2.35-r0/glibc-2.35-r0.apk && \
apk add glibc-2.35-r0.apk
WORKDIR /app
# 复制package文件并安装依赖
COPY package*.json ./
COPY yarn.lock ./
# 安全扫描依赖
RUN npx audit-ci --critical --allowlist 1234567
RUN yarn install --frozen-lockfile --production=false --ignore-scripts
# 复制源代码
COPY . .
# 运行安全测试
RUN npm audit --audit-level=high
# 构建应用
RUN yarn build
# 阶段3: 生产运行时环境
FROM node:18-alpine AS production
# 创建非root用户
RUN addgroup -g 1001 -S nodejs && \
adduser -S nextjs -u 1001 && \
mkdir -p /app && \
chown -R nextjs:nodejs /app
# 安装安全更新
RUN apk update && apk upgrade --no-cache
# 安装必要的运行时工具
RUN apk add --no-cache \
curl \
tini \
ca-certificates
# 使用tini作为init进程处理信号
ENTRYPOINT ["/sbin/tini", "--"]
WORKDIR /app
# 从构建阶段复制文件并设置正确的权限
COPY --from=builder --chown=nextjs:nodejs /app/.next ./.next
COPY --from=builder --chown=nextjs:nodejs /app/public ./public
COPY --from=builder --chown=nextjs:nodejs /app/package.json ./
COPY --from=builder --chown=nextjs:nodejs /app/node_modules ./node_modules
COPY --from=builder --chown=nextjs:nodejs /app/next.config.js ./
# 切换到非root用户
USER nextjs
# 暴露端口
EXPOSE 3000
# 健康检查
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD curl -f http://localhost:3000/api/health || exit 1
# 安全上下文配置
# 设置只读根文件系统
RUN chmod -R g-w,o-rwx /app && \
find /app -type d -exec chmod 755 {} \; && \
find /app -type f -exec chmod 644 {} \;
# 启动应用
CMD ["yarn", "start"]
(2) 自动化安全扫描与CI/CD集成
# .github/workflows/docker-security.yml
name: Docker Security Scan
on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main ]
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
jobs:
security-scan:
name: Security Scanning
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build Docker image
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile
push: <strong>false</strong>
tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:security-scan
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
with:
image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:security-scan
format: 'sarif'
output: 'trivy-results.sarif'
severity: 'CRITICAL,HIGH'
- name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@v3
if: always()
with:
sarif_file: 'trivy-results.sarif'
- name: Run Hadolint Dockerfile linter
uses: hadolint/hadolint-action@v3.1.0
with:
dockerfile: ./Dockerfile
failure-threshold: warning
- name: Check for secrets in code
uses: gitleaks/gitleaks-action@v2
with:
config-path: .gitleaks.toml
- name: Dependency audit
run: |
npm audit --audit-level=high
npm fund
build-and-push:
name: Build and Push
runs-on: ubuntu-latest
needs: security-scan
if: github.ref == 'refs/heads/main'
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to Container Registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=ref,event=branch
type=ref,event=pr
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=sha,prefix={{branch}}-
- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile
push: <strong>true</strong>
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
platforms: linux/amd64,linux/arm64
- name: Deploy to Kubernetes
uses: steebchen/kubectl@v2
with:
config: ${{ secrets.KUBECONFIG }}
command: |
set -x
kubectl set image deployment/my-app app=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}
kubectl rollout status deployment/my-app
二、容器运行时安全与监控
(1) Docker守护进程安全配置
<strong>#!/bin/bash</strong>
# docker-security-hardening.sh
# Docker生产环境安全加固脚本
set -euo pipefail
# 颜色定义
RED='3[0;31m'
GREEN='3[0;32m'
YELLOW='3[1;33m'
NC='3[0m'
log() {
echo -e "${GREEN}[$(date '+%Y-%m-%d %H:%M:%S')]${NC} $1"
}
warn() {
echo -e "${YELLOW}[WARNING] $1${NC}"
}
error() {
echo -e "${RED}[ERROR] $1${NC}"
}
# 备份原始配置
backup_docker_config() {
if [ -f /etc/docker/daemon.json ]; then
cp /etc/docker/daemon.json /etc/docker/daemon.json.backup.$(date +%Y%m%d_%H%M%S)
log "Docker配置已备份"
fi
}
# 配置安全的Docker守护进程
configure_docker_daemon() {
log "配置安全的Docker守护进程..."
cat > /etc/docker/daemon.json << EOF
{
"userns-remap": "default",
"log-driver": "json-file",
"log-opts": {
"max-size": "10m",
"max-file": "3"
},
"live-restore": true,
"icc": false,
"userland-proxy": false,
"no-new-privileges": true,
"selinux-enabled": true,
"apparmor-profile": "docker-default",
"cgroup-parent": "docker.slice",
"storage-driver": "overlay2",
"storage-opts": [
"overlay2.override_kernel_check=true"
],
"default-ulimits": {
"nofile": {
"Name": "nofile",
"Hard": 65536,
"Soft": 65536
},
"nproc": {
"Name": "nproc",
"Hard": 2048,
"Soft": 1024
}
},
"tls": true,
"tlsverify": true,
"tlscacert": "/etc/docker/ca.pem",
"tlscert": "/etc/docker/server-cert.pem",
"tlskey": "/etc/docker/server-key.pem",
"hosts": [
"fd://",
"tcp://0.0.0.0:2376"
],
"experimental": false,
"metrics-addr": "0.0.0.0:9323",
"authorization-plugins": ["docker.io/harbor/authorization-plugin:latest"]
}
EOF
log "Docker守护进程安全配置完成"
}
# 配置用户命名空间映射
configure_user_namespace() {
log "配置用户命名空间映射..."
if ! grep -q "dockremap" /etc/subuid; then
echo "dockremap:100000:65536" >> /etc/subuid
fi
if ! grep -q "dockremap" /etc/subgid; then
echo "dockremap:100000:65536" >> /etc/subgid
fi
log "用户命名空间映射配置完成"
}
# 配置容器安全策略
configure_security_policies() {
log "配置容器安全策略..."
# 安装AppArmor配置文件
cat > /etc/apparmor.d/docker-containers << 'EOF'
#include <tunables/global>
profile docker-containers flags=(attach_disconnected,mediate_deleted) {
#include <abstractions/base>
network inet stream,
network inet6 stream,
capability chown,
capability dac_override,
capability setuid,
capability setgid,
capability net_bind_service,
deny @{PROC}/sysrq-trigger rwklx,
deny @{PROC}/mem rwklx,
deny @{PROC}/kmem rwklx,
deny @{PROC}/kcore rwklx,
deny mount,
deny /sys/[^f]*/** wklx,
deny /sys/f[^s]*/** wklx,
deny /sys/fs/[^c]*/** wklx,
deny /sys/fs/c[^g]*/** wklx,
deny /sys/kernel/security/** rwklx,
}
EOF
apparmor_parser -r /etc/apparmor.d/docker-containers
log "AppArmor安全策略配置完成"
}
# 配置Docker守护进程系统服务
configure_docker_service() {
log "配置Docker系统服务..."
cat > /etc/systemd/system/docker.service.d/security.conf << EOF
[Service]
LimitNOFILE=1048576
LimitNPROC=2048
LimitCORE=0
TasksMax=2048
Delegate=yes
CPUAccounting=yes
MemoryAccounting=yes
BlockIOAccounting=yes
EOF
systemctl daemon-reload
log "Docker系统服务配置完成"
}
# 安装容器安全监控工具
install_security_tools() {
log "安装容器安全监控工具..."
# 安装Falco - 容器运行时安全监控
curl -s https://falco.org/repo/falcosecurity-3672BA8F.asc | apt-key add -
echo "deb https://download.falco.org/packages/deb stable main" | tee -a /etc/apt/sources.list.d/falcosecurity.list
apt-get update
apt-get install -y falco
# 配置Falco规则
cat > /etc/falco/falco_rules.local.yaml << EOF
- rule: Write below binary dir
desc: 尝试在二进制目录中写入文件
condition: >
bin_dir and evt.dir = < and open_write
and not user_known_write_below_binary_dir_activities
output: >
文件 below binary dir written (user=%user.name command=%proc.cmdline file=%fd.name)
priority: ERROR
tags: [filesystem]
- rule: Launch privileged container
desc: 检测特权容器启动
condition: >
container_started and container.privileged=true
output: >
特权容器启动 (user=%user.name command=%proc.cmdline %container.info)
priority: ERROR
tags: [container]
EOF
systemctl enable falco
systemctl start falco
log "容器安全监控工具安装完成"
}
# 配置容器网络安全性
configure_network_security() {
log "配置容器网络安全性..."
# 创建安全的Docker网络
docker network create \
--driver bridge \
--opt com.docker.network.bridge.name=docker-security \
--opt com.docker.network.bridge.enable_icc=false \
--opt com.docker.network.bridge.enable_ip_masquerade=true \
security-net
log "安全网络配置完成"
}
# 主执行函数
main() {
log "开始Docker生产环境安全加固..."
# 检查root权限
if [ "$EUID" -ne 0 ]; then
error "请使用root权限运行此脚本"
exit 1
fi
backup_docker_config
configure_user_namespace
configure_docker_daemon
configure_security_policies
configure_docker_service
install_security_tools
configure_network_security
log "Docker安全加固完成,请重启Docker服务: systemctl restart docker"
warn "注意:重启Docker服务会影响正在运行的容器"
}
# 执行主函数
main "$@"
(2) 容器运行时安全监控系统
# docker-compose.security.yml
version: '3.8'
x-security: <strong>&default-security</strong>
security_opt:
- no-new-privileges:<strong>true</strong>
cap_drop:
- ALL
cap_add:
- CHOWN
- DAC_OVERRIDE
- SETGID
- SETUID
- NET_BIND_SERVICE
read_only: <strong>true</strong>
tmpfs:
- /tmp:rw,noexec,nosuid,size=64m
services:
# 主应用容器
webapp:
build: .
image: my-registry/webapp:${TAG:-latest}
container_name: webapp-secure
restart: unless-stopped
<<: <strong>*default-security</strong>
# 额外的安全配置
security_opt:
- no-new-privileges:<strong>true</strong>
- apparmor:docker-containers
privileged: <strong>false</strong>
network_mode: "security-net"
environment:
- NODE_ENV=production
- READONLY_FILESYSTEM=true
volumes:
- app-data:/app/data:rw
- /etc/localtime:/etc/localtime:ro
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
labels: "production,security"
deploy:
resources:
limits:
memory: 512M
cpus: '1.0'
reservations:
memory: 256M
cpus: '0.5'
# Falco安全监控
falco:
image: falcosecurity/falco:latest
container_name: falco-security
privileged: <strong>true</strong>
restart: unless-stopped
volumes:
- /var/run/docker.sock:/host/var/run/docker.sock
- /dev:/host/dev:ro
- /proc:/host/proc:ro
- /boot:/host/boot:ro
- /lib/modules:/host/lib/modules:ro
- /usr:/host/usr:ro
- /etc:/host/etc:ro
- ./falco/rules.yaml:/etc/falco/falco_rules.yaml:ro
- ./falco/alerts.log:/var/log/falco.log:rw
environment:
- HOST_ROOT=/
- FALCO_BPF_PROBE=""
cap_add:
- SYS_PTRACE
- SYS_ADMIN
command:
- /usr/bin/falco
- --cri
- /var/run/docker.sock
- -K
- /var/run/docker.sock
- --pkgsource=auto
logging:
driver: "json-file"
options:
max-size: "100m"
max-file: "5"
# 容器安全扫描器
trivy:
image: aquasec/trivy:latest
container_name: trivy-scanner
restart: on-failure
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- ./trivy/cache:/root/.cache:rw
- ./trivy/reports:/reports:rw
command: >
sh -c "
while true; do
trivy image --format json --output /reports/scan-$(date +%Y%m%d-%H%M%S).json my-registry/webapp:latest
sleep 3600
done"
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
# 安全事件处理器
security-handler:
image: python:3.9-alpine
container_name: security-handler
restart: unless-stopped
volumes:
- ./security-handler:/app
- ./falco/alerts.log:/var/log/falco.log:ro
working_dir: /app
command: python security_handler.py
environment:
- SLACK_WEBHOOK_URL=${SLACK_WEBHOOK_URL}
- EMAIL_SERVER=${EMAIL_SERVER}
- LOG_LEVEL=INFO
depends_on:
- falco
volumes:
app-data:
driver: local
driver_opts:
type: none
o: bind
device: /opt/app/data
networks:
default:
name: security-net
external: <strong>true</strong>
三、Kubernetes生产级Docker集成
(1) 安全的Kubernetes部署配置
# k8s/secure-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: webapp-secure
namespace: production
labels:
app: webapp
security-tier: high
spec:
replicas: 3
revisionHistoryLimit: 3
selector:
matchLabels:
app: webapp
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 1
maxUnavailable: 0
template:
metadata:
labels:
app: webapp
version: v1.0.0
annotations:
# 安全相关注解
container.apparmor.security.beta.kubernetes.io/webapp: runtime/default
seccomp.security.alpha.kubernetes.io/pod: runtime/default
# 监控注解
prometheus.io/scrape: "true"
prometheus.io/port: "3000"
prometheus.io/path: "/metrics"
spec:
# Pod安全上下文
securityContext:
runAsNonRoot: <strong>true</strong>
runAsUser: 1001
runAsGroup: 1001
fsGroup: 1001
supplementalGroups: [1001]
seccompProfile:
type: RuntimeDefault
# 阻止权限升级
allowPrivilegeEscalation: <strong>false</strong>
# 只读根文件系统
readOnlyRootFilesystem: <strong>true</strong>
# 服务账户
serviceAccountName: webapp-sa
automountServiceAccountToken: <strong>false</strong>
# 节点选择器
nodeSelector:
kubernetes.io/arch: amd64
security: enforced
# 容忍度
tolerations:
- key: "security"
operator: "Equal"
value: "enforced"
effect: "NoSchedule"
# 亲和性
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app
operator: In
values:
- webapp
topologyKey: "kubernetes.io/hostname"
containers:
- name: webapp
image: my-registry/webapp:${IMAGE_TAG}
imagePullPolicy: IfNotPresent
# 容器安全上下文
securityContext:
allowPrivilegeEscalation: <strong>false</strong>
capabilities:
drop:
- ALL
add:
- CHOWN
- DAC_OVERRIDE
- SETGID
- SETUID
readOnlyRootFilesystem: <strong>true</strong>
runAsNonRoot: <strong>true</strong>
runAsUser: 1001
runAsGroup: 1001
seccompProfile:
type: RuntimeDefault
ports:
- containerPort: 3000
protocol: TCP
env:
- name: NODE_ENV
value: "production"
- name: READONLY_FILESYSTEM
value: "true"
# 环境变量从Secret读取
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: app-secrets
key: database-url
- name: JWT_SECRET
valueFrom:
secretKeyRef:
name: app-secrets
key: jwt-secret
resources:
requests:
memory: "256Mi"
cpu: "250m"
limits:
memory: "512Mi"
cpu: "500m"
livenessProbe:
httpGet:
path: /health
port: 3000
scheme: HTTP
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
readinessProbe:
httpGet:
path: /ready
port: 3000
scheme: HTTP
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 1
startupProbe:
httpGet:
path: /health
port: 3000
scheme: HTTP
initialDelaySeconds: 10
periodSeconds: 10
failureThreshold: 10
volumeMounts:
- name: app-data
mountPath: /app/data
readOnly: <strong>false</strong>
- name: tmp-volume
mountPath: /tmp
- name: config-volume
mountPath: /app/config
readOnly: <strong>true</strong>
# 生命周期钩子
lifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 10"]
volumes:
- name: app-data
persistentVolumeClaim:
claimName: webapp-data-pvc
- name: tmp-volume
emptyDir:
medium: Memory
sizeLimit: 64Mi
- name: config-volume
configMap:
name: webapp-config
defaultMode: 0644
---
# Pod安全策略
apiVersion: policy/v1beta1
kind: PodSecurityPolicy
metadata:
name: restricted-psp
annotations:
seccomp.security.alpha.kubernetes.io/allowedProfileNames: 'runtime/default'
apparmor.security.beta.kubernetes.io/allowedProfileNames: 'runtime/default'
seccomp.security.alpha.kubernetes.io/defaultProfileName: 'runtime/default'
apparmor.security.beta.kubernetes.io/defaultProfileName: 'runtime/default'
spec:
privileged: <strong>false</strong>
allowPrivilegeEscalation: <strong>false</strong>
requiredDropCapabilities:
- ALL
volumes:
- 'configMap'
- 'emptyDir'
- 'projected'
- 'secret'
- 'downwardAPI'
- 'persistentVolumeClaim'
hostNetwork: <strong>false</strong>
hostIPC: <strong>false</strong>
hostPID: <strong>false</strong>
runAsUser:
rule: 'MustRunAsNonRoot'
seLinux:
rule: 'RunAsAny'
supplementalGroups:
rule: 'MustRunAs'
ranges:
- min: 1
max: 65535
fsGroup:
rule: 'MustRunAs'
ranges:
- min: 1
max: 65535
readOnlyRootFilesystem: <strong>true</strong>
---
# 网络策略
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: webapp-network-policy
namespace: production
spec:
podSelector:
matchLabels:
app: webapp
policyTypes:
- Ingress
- Egress
ingress:
- from:
- namespaceSelector:
matchLabels:
name: ingress-nginx
- podSelector:
matchLabels:
app: monitoring
ports:
- protocol: TCP
port: 3000
egress:
- to:
- podSelector:
matchLabels:
app: database
ports:
- protocol: TCP
port: 5432
- to:
- podSelector:
matchLabels:
app: redis
ports:
- protocol: TCP
port: 6379
- to:
- namespaceSelector:
matchLabels:
name: monitoring
ports:
- protocol: TCP
port: 9090
---
# 服务账户
apiVersion: v1
kind: ServiceAccount
metadata:
name: webapp-sa
namespace: production
automountServiceAccountToken: <strong>false</strong>
---
# 角色绑定
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: webapp-rolebinding
namespace: production
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: webapp-role
subjects:
- kind: ServiceAccount
name: webapp-sa
namespace: production
(2) 持续安全监控与合规检查
<strong>#!/bin/bash</strong>
# k8s-security-audit.sh
# Kubernetes集群安全审计脚本
set -euo pipefail
# 颜色定义
RED='3[0;31m'
GREEN='3[0;32m'
YELLOW='3[1;33m'
NC='3[0m'
log() {
echo -e "${GREEN}[$(date '+%Y-%m-%d %H:%M:%S')]${NC} $1"
}
warn() {
echo -e "${YELLOW}[WARNING] $1${NC}"
}
error() {
echo -e "${RED}[ERROR] $1${NC}"
}
# 检查Kubectl配置
check_kubectl() {
if ! command -v kubectl &> /dev/null; then
error "kubectl未安装"
exit 1
fi
if ! kubectl cluster-info &> /dev/null; then
error "无法连接到Kubernetes集群"
exit 1
fi
log "Kubernetes集群连接正常"
}
# 检查Pod安全状态
audit_pod_security() {
log "检查Pod安全配置..."
# 检查特权容器
local privileged_pods=$(kubectl get pods --all-namespaces -o json | \
jq -r '.items[] | select(.spec.containers[]?.securityContext?.privileged == true) | "\(.metadata.namespace)/\(.metadata.name)"')
if [ -n "$privileged_pods" ]; then
warn "发现特权容器:"
echo "$privileged_pods"
else
log "✓ 无特权容器"
fi
# 检查root用户运行
local root_pods=$(kubectl get pods --all-namespaces -o json | \
jq -r '.items[] | select(.spec.containers[]?.securityContext?.runAsNonRoot != true) | "\(.metadata.namespace)/\(.metadata.name)"')
if [ -n "$root_pods" ]; then
warn "发现以root用户运行的容器:"
echo "$root_pods"
else
log "✓ 所有容器均以非root用户运行"
fi
# 检查只读根文件系统
local writable_root_pods=$(kubectl get pods --all-namespaces -o json | \
jq -r '.items[] | select(.spec.containers[]?.securityContext?.readOnlyRootFilesystem != true) | "\(.metadata.namespace)/\(.metadata.name)"')
if [ -n "$writable_root_pods" ]; then
warn "发现可写根文件系统的容器:"
echo "$writable_root_pods"
else
log "✓ 所有容器均使用只读根文件系统"
fi
}
# 检查网络策略
audit_network_policies() {
log "检查网络策略..."
local namespaces=$(kubectl get namespaces -o jsonpath='{.items[*].metadata.name}')
local no_network_policy_ns=()
for ns in $namespaces; do
if [ "$ns" != "kube-system" ] && [ "$ns" != "kube-public" ]; then
local policy_count=$(kubectl get networkpolicies -n "$ns" -o name | wc -l)
if [ "$policy_count" -eq 0 ]; then
no_network_policy_ns+=("$ns")
fi
fi
done
if [ ${#no_network_policy_ns[@]} -gt 0 ]; then
warn "以下命名空间缺少网络策略:"
printf '%s\n' "${no_network_policy_ns[@]}"
else
log "✓ 所有命名空间均配置了网络策略"
fi
}
# 检查RBAC配置
audit_rbac() {
log "检查RBAC配置..."
# 检查集群角色绑定
local cluster_admin_bindings=$(kubectl get clusterrolebindings -o json | \
jq -r '.items[] | select(.roleRef.name == "cluster-admin") | .subjects[]? | select(.kind == "ServiceAccount") | "\(.namespace)/\(.name)"')
if [ -n "$cluster_admin_bindings" ]; then
warn "发现ServiceAccount具有cluster-admin权限:"
echo "$cluster_admin_bindings"
else
log "✓ 无ServiceAccount具有cluster-admin权限"
fi
# 检查Pod创建权限
local pod_creators=$(kubectl get clusterrolebindings,rolebindings --all-namespaces -o json | \
jq -r '.items[] | select(.roleRef.rules[]? | select(.verbs[]? | contains("create")) and select(.resources[]? | contains("pods"))) | .metadata.name')
if [ -n "$pod_creators" ]; then
log "具有Pod创建权限的RoleBinding:"
echo "$pod_creators"
fi
}
# 检查镜像安全
audit_image_security() {
log "检查镜像安全..."
# 使用Trivy扫描所有运行的镜像
local images=$(kubectl get pods --all-namespaces -o jsonpath='{.items[*].spec.containers[*].image}' | tr ' ' '\n' | sort -u)
for image in $images; do
log "扫描镜像: $image"
# 使用Trivy进行漏洞扫描
docker run --rm \
-v /var/run/docker.sock:/var/run/docker.sock \
aquasec/trivy:latest \
image --severity HIGH,CRITICAL --ignore-unfixed "$image" || true
done
}
# 检查资源限制
audit_resource_limits() {
log "检查资源限制..."
local no_limits_pods=$(kubectl get pods --all-namespaces -o json | \
jq -r '.items[] | select(.spec.containers[]?.resources?.limits == null) | "\(.metadata.namespace)/\(.metadata.name)"')
if [ -n "$no_limits_pods" ]; then
warn "发现未设置资源限制的Pod:"
echo "$no_limits_pods"
else
log "✓ 所有Pod均设置了资源限制"
fi
}
# 生成安全报告
generate_security_report() {
log "生成安全审计报告..."
local report_file="k8s-security-audit-$(date +%Y%m%d-%H%M%S).txt"
{
echo "Kubernetes安全审计报告"
echo "生成时间: $(date)"
echo "集群: $(kubectl config current-context)"
echo "="*50
echo
} > "$report_file"
# 收集各种检查结果
audit_pod_security >> "$report_file" <strong>2</strong>><strong>&1</strong>
echo >> "$report_file"
audit_network_policies >> "$report_file" <strong>2</strong>><strong>&1</strong>
echo >> "$report_file"
audit_rbac >> "$report_file" <strong>2</strong>><strong>&1</strong>
echo >> "$report_file"
audit_resource_limits >> "$report_file" <strong>2</strong>><strong>&1</strong>
log "安全审计报告已保存至: $report_file"
}
# 主执行函数
main() {
log "开始Kubernetes集群安全审计..."
check_kubectl
audit_pod_security
audit_network_policies
audit_rbac
audit_resource_limits
generate_security_report
log "安全审计完成"
}
# 执行主函数
main "$@"
总结
Docker在生产环境中的安全使用需要从镜像构建、运行时防护到Kubernetes集成的全方位考虑。通过实施安全的多阶段构建、配置安全的守护进程、集成持续安全扫描,以及在Kubernetes中实施严格的安全策略,可以构建出既高效又安全的容器化应用。
核心安全实践:
- 镜像安全:多阶段构建、非root用户、漏洞扫描
- 运行时安全:用户命名空间、安全策略、资源限制
- 网络安全:网络策略、服务网格、TLS加密
- Kubernetes集成:Pod安全上下文、RBAC、网络策略
- 持续监控:安全审计、合规检查、事件响应
【进阶方向】
探索零信任架构在容器环境的应用、机密容器的使用,以及基于策略的自动化安全防护,进一步提升容器环境的安全水位。
© 版权声明
THE END














暂无评论内容