containerization
初级
未知
未知作者
更新于 2025-06-14
容器化与部署
容器化概述
容器化是现代应用部署的标准做法,为LLM应用提供了一致的运行环境、简化的部署流程和良好的可扩展性。
Docker容器化
1. Dockerfile最佳实践
DOCKERFILE
# 多阶段构建示例
FROM python:3.11-slim as builder
# 设置工作目录
WORKDIR /app
# 安装系统依赖
RUN apt-get update && apt-get install -y \
build-essential \
curl \
&& rm -rf /var/lib/apt/lists/*
# 复制依赖文件
COPY requirements.txt .
# 安装Python依赖
RUN pip install --no-cache-dir --user -r requirements.txt
# 生产阶段
FROM python:3.11-slim
# 创建非root用户
RUN groupadd -r appuser && useradd -r -g appuser appuser
# 设置工作目录
WORKDIR /app
# 安装运行时依赖
RUN apt-get update && apt-get install -y \
curl \
&& rm -rf /var/lib/apt/lists/*
# 从构建阶段复制Python包
COPY --from=builder /root/.local /home/appuser/.local
# 复制应用代码
COPY --chown=appuser:appuser . .
# 设置环境变量
ENV PATH=/home/appuser/.local/bin:$PATH
ENV PYTHONPATH=/app
ENV PYTHONUNBUFFERED=1
# 切换到非root用户
USER appuser
# 健康检查
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD curl -f http://localhost:8000/health || exit 1
# 暴露端口
EXPOSE 8000
# 启动命令
CMD ["python", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
2. Docker Compose配置
YAML
# docker-compose.yml
version: '3.8'
services:
llm-app:
build:
context: .
dockerfile: Dockerfile
ports:
- "8000:8000"
environment:
- DATABASE_URL=postgresql://user:password@postgres:5432/llmdb
- REDIS_URL=redis://redis:6379
- LLM_API_KEY=${LLM_API_KEY}
depends_on:
- postgres
- redis
volumes:
- ./logs:/app/logs
- ./data:/app/data
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
timeout: 10s
retries: 3
postgres:
image: postgres:15
environment:
- POSTGRES_DB=llmdb
- POSTGRES_USER=user
- POSTGRES_PASSWORD=password
volumes:
- postgres_data:/var/lib/postgresql/data
- ./init.sql:/docker-entrypoint-initdb.d/init.sql
ports:
- "5432:5432"
restart: unless-stopped
redis:
image: redis:7-alpine
ports:
- "6379:6379"
volumes:
- redis_data:/data
restart: unless-stopped
command: redis-server --appendonly yes
nginx:
image: nginx:alpine
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf
- ./ssl:/etc/nginx/ssl
depends_on:
- llm-app
restart: unless-stopped
volumes:
postgres_data:
redis_data:
networks:
default:
driver: bridge
3. 应用配置管理
PYTHON
import os
from typing import Optional
from pydantic import BaseSettings, Field
class Settings(BaseSettings):
"""应用配置"""
# 应用基础配置
app_name: str = Field(default="LLM Application", env="APP_NAME")
app_version: str = Field(default="1.0.0", env="APP_VERSION")
debug: bool = Field(default=False, env="DEBUG")
# 服务器配置
host: str = Field(default="0.0.0.0", env="HOST")
port: int = Field(default=8000, env="PORT")
workers: int = Field(default=1, env="WORKERS")
# 数据库配置
database_url: str = Field(env="DATABASE_URL")
redis_url: str = Field(env="REDIS_URL")
# LLM配置
llm_api_key: str = Field(env="LLM_API_KEY")
llm_model: str = Field(default="gpt-3.5-turbo", env="LLM_MODEL")
llm_temperature: float = Field(default=0.7, env="LLM_TEMPERATURE")
# 安全配置
secret_key: str = Field(env="SECRET_KEY")
allowed_hosts: list = Field(default=["*"], env="ALLOWED_HOSTS")
# 日志配置
log_level: str = Field(default="INFO", env="LOG_LEVEL")
log_file: Optional[str] = Field(default=None, env="LOG_FILE")
# 监控配置
enable_metrics: bool = Field(default=True, env="ENABLE_METRICS")
metrics_port: int = Field(default=9090, env="METRICS_PORT")
class Config:
env_file = ".env"
case_sensitive = False
# 全局配置实例
settings = Settings()
# 配置验证
def validate_config():
"""验证配置"""
errors = []
if not settings.database_url:
errors.append("DATABASE_URL is required")
if not settings.redis_url:
errors.append("REDIS_URL is required")
if not settings.llm_api_key:
errors.append("LLM_API_KEY is required")
if not settings.secret_key:
errors.append("SECRET_KEY is required")
if errors:
raise ValueError(f"Configuration errors: {', '.join(errors)}")
return True
# 使用示例
if __name__ == "__main__":
try:
validate_config()
print("配置验证通过")
print(f"应用: {settings.app_name} v{settings.app_version}")
print(f"服务器: {settings.host}:{settings.port}")
print(f"调试模式: {settings.debug}")
except ValueError as e:
print(f"配置错误: {e}")
Kubernetes部署
1. 基础部署配置
YAML
# deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-app
labels:
app: llm-app
version: v1
spec:
replicas: 3
selector:
matchLabels:
app: llm-app
template:
metadata:
labels:
app: llm-app
version: v1
spec:
containers:
- name: llm-app
image: your-registry/llm-app:latest
ports:
- containerPort: 8000
env:
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: app-secrets
key: database-url
- name: LLM_API_KEY
valueFrom:
secretKeyRef:
name: app-secrets
key: llm-api-key
- name: REDIS_URL
value: "redis://redis-service:6379"
resources:
requests:
memory: "512Mi"
cpu: "250m"
limits:
memory: "1Gi"
cpu: "500m"
livenessProbe:
httpGet:
path: /health
port: 8000
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /ready
port: 8000
initialDelaySeconds: 5
periodSeconds: 5
volumeMounts:
- name: app-config
mountPath: /app/config
- name: logs
mountPath: /app/logs
volumes:
- name: app-config
configMap:
name: app-config
- name: logs
emptyDir: {}
---
apiVersion: v1
kind: Service
metadata:
name: llm-app-service
spec:
selector:
app: llm-app
ports:
- protocol: TCP
port: 80
targetPort: 8000
type: ClusterIP
---
apiVersion: v1
kind: ConfigMap
metadata:
name: app-config
data:
app.yaml: |
app:
name: "LLM Application"
version: "1.0.0"
debug: false
llm:
model: "gpt-3.5-turbo"
temperature: 0.7
max_tokens: 1000
cache:
ttl: 3600
max_size: 1000
---
apiVersion: v1
kind: Secret
metadata:
name: app-secrets
type: Opaque
data:
database-url: <base64-encoded-database-url>
llm-api-key: <base64-encoded-api-key>
secret-key: <base64-encoded-secret-key>
2. 自动扩缩容配置
YAML
# hpa.yaml
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: llm-app-hpa
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: llm-app
minReplicas: 2
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
behavior:
scaleDown:
stabilizationWindowSeconds: 300
policies:
- type: Percent
value: 10
periodSeconds: 60
scaleUp:
stabilizationWindowSeconds: 60
policies:
- type: Percent
value: 50
periodSeconds: 60
---
# vpa.yaml (可选)
apiVersion: autoscaling.k8s.io/v1
kind: VerticalPodAutoscaler
metadata:
name: llm-app-vpa
spec:
targetRef:
apiVersion: apps/v1
kind: Deployment
name: llm-app
updatePolicy:
updateMode: "Auto"
resourcePolicy:
containerPolicies:
- containerName: llm-app
maxAllowed:
cpu: 2
memory: 4Gi
minAllowed:
cpu: 100m
memory: 256Mi
3. Ingress配置
YAML
# ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: llm-app-ingress
annotations:
kubernetes.io/ingress.class: nginx
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/proxy-body-size: "10m"
nginx.ingress.kubernetes.io/rate-limit: "100"
nginx.ingress.kubernetes.io/rate-limit-window: "1m"
cert-manager.io/cluster-issuer: "letsencrypt-prod"
spec:
tls:
- hosts:
- api.yourapp.com
secretName: llm-app-tls
rules:
- host: api.yourapp.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: llm-app-service
port:
number: 80
部署脚本
1. 自动化部署脚本
BASH
#!/bin/bash
# deploy.sh
set -e
# 配置变量
REGISTRY="your-registry.com"
IMAGE_NAME="llm-app"
VERSION=${1:-latest}
NAMESPACE=${2:-default}
KUBECONFIG=${KUBECONFIG:-~/.kube/config}
echo "开始部署 LLM 应用..."
echo "版本: $VERSION"
echo "命名空间: $NAMESPACE"
# 构建镜像
echo "构建 Docker 镜像..."
docker build -t $REGISTRY/$IMAGE_NAME:$VERSION .
# 推送镜像
echo "推送镜像到注册表..."
docker push $REGISTRY/$IMAGE_NAME:$VERSION
# 更新 Kubernetes 配置
echo "更新 Kubernetes 配置..."
sed -i "s|image: .*|image: $REGISTRY/$IMAGE_NAME:$VERSION|g" k8s/deployment.yaml
# 应用配置
echo "应用 Kubernetes 配置..."
kubectl apply -f k8s/ -n $NAMESPACE
# 等待部署完成
echo "等待部署完成..."
kubectl rollout status deployment/llm-app -n $NAMESPACE --timeout=300s
# 验证部署
echo "验证部署..."
kubectl get pods -l app=llm-app -n $NAMESPACE
# 获取服务地址
SERVICE_IP=$(kubectl get service llm-app-service -n $NAMESPACE -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
if [ -n "$SERVICE_IP" ]; then
echo "服务地址: http://$SERVICE_IP"
else
echo "服务正在获取外部IP..."
fi
echo "部署完成!"
2. 健康检查脚本
PYTHON
#!/usr/bin/env python3
# health_check.py
import requests
import sys
import time
import argparse
def check_health(url, timeout=30, interval=5):
"""检查应用健康状态"""
start_time = time.time()
while time.time() - start_time < timeout:
try:
response = requests.get(f"{url}/health", timeout=5)
if response.status_code == 200:
health_data = response.json()
print(f"✅ 应用健康检查通过")
print(f"状态: {health_data.get('status', 'unknown')}")
print(f"版本: {health_data.get('version', 'unknown')}")
print(f"启动时间: {health_data.get('uptime', 'unknown')}")
return True
else:
print(f"❌ 健康检查失败,状态码: {response.status_code}")
except requests.exceptions.RequestException as e:
print(f"⏳ 连接失败: {e}")
time.sleep(interval)
print(f"❌ 健康检查超时 ({timeout}秒)")
return False
def check_readiness(url, timeout=30, interval=5):
"""检查应用就绪状态"""
start_time = time.time()
while time.time() - start_time < timeout:
try:
response = requests.get(f"{url}/ready", timeout=5)
if response.status_code == 200:
print(f"✅ 应用就绪检查通过")
return True
else:
print(f"❌ 就绪检查失败,状态码: {response.status_code}")
except requests.exceptions.RequestException as e:
print(f"⏳ 连接失败: {e}")
time.sleep(interval)
print(f"❌ 就绪检查超时 ({timeout}秒)")
return False
def main():
parser = argparse.ArgumentParser(description="应用健康检查")
parser.add_argument("url", help="应用URL")
parser.add_argument("--timeout", type=int, default=30, help="超时时间(秒)")
parser.add_argument("--interval", type=int, default=5, help="检查间隔(秒)")
parser.add_argument("--check-ready", action="store_true", help="检查就绪状态")
args = parser.parse_args()
print(f"检查应用: {args.url}")
# 健康检查
health_ok = check_health(args.url, args.timeout, args.interval)
# 就绪检查
ready_ok = True
if args.check_ready:
ready_ok = check_readiness(args.url, args.timeout, args.interval)
if health_ok and ready_ok:
print("🎉 所有检查通过!")
sys.exit(0)
else:
print("💥 检查失败!")
sys.exit(1)
if __name__ == "__main__":
main()
小结
在本章中,我们学习了:
- Docker容器化:多阶段构建、最佳实践、Docker Compose
- Kubernetes部署:部署配置、服务发现、自动扩缩容
- 配置管理:环境变量、ConfigMap、Secret管理
- 自动化部署:部署脚本、健康检查、CI/CD集成
容器化和Kubernetes部署为LLM应用提供了可靠、可扩展的运行环境。
思考题
- 如何设计一个支持蓝绿部署的策略?
- 在什么情况下应该使用StatefulSet而不是Deployment?
- 如何处理容器化应用的数据持久化?
- 如何优化容器镜像的大小和构建时间?
下一章我们将学习生产环境的监控和日志管理。