Python 实现 AI 图像生成:调用 Stable Diffusion AP
编程语言 · 技术分享
从零开始学习使用 Python 调用 Stable Diffusion API 生成图像,涵盖本地部署、API 调用、ControlNet、图生图等进阶技巧。
1. 技术架构 Python 客户端Stable Diffusion API本地部署SD WebUI / ComfyUI云端 APIReplicate / Stability AIStable Diffusion 模型文生图txt2img图生图img2img局部重绘inpainting超分辨率upscale输出图像后处理管道存储本地/OSS 2. 图像生成方式对比 50%25%15%10%各生成方式使用占比统计文生图 (txt2img)图生图 (img2img)局部重绘 (inpainting)超分辨率 (upscale) 3. 环境准备 3.1 本地部署 Stable Diffusion WebUI # 克隆 Stable Diffusion WebUI git clone https://github.com/AUTOMATIC1111/stable-diffusion-webui.git cd stable-diffusion-webui # 启动(开启 API 模式) ./webui.sh --api --listen # Windows 用户 webui.bat --api --listen 3.2 安装依赖 pip install requests Pillow io base64 4. 核心代码实现 4.1 SD API 客户端封装 # sd_client.py import requests import base64 import io import json import time from pathlib import Path from PIL import Image from dataclasses import dataclass, field from typing import Optional @dataclass class GenerationConfig: """图像生成配置""" prompt: str = "" negative_prompt: str = "low quality, blurry, deformed" width: int = 512 height: int = 512 steps: int = 30 cfg_scale: float = 7.0 sampler_name: str = "DPM++ 2M Karras" seed: int = -1 # -1 表示随机 batch_size: int = 1 n_iter: int = 1 # 迭代次数 model: Optional[str] = None class StableDiffusionClient: """Stable Diffusion API 客户端""" def __init__(self, base_url: str = "http://127.0.0.1:7860"): self.base_url = base_url self.api_url = f"{base_url}/sdapi/v1" def _save_base64_image(self, b64_str: str, output_path: str) -> str: """将 base64 图片保存到文件""" img_data = base64.b64decode(b64_str) img = Image.open(io.BytesIO(img_data)) img.save(output_path) return output_path # ---- 文生图 ---- def txt2img(self, config: GenerationConfig, output_dir: str = "./output") -> list[str]: """文生图:从文本描述生成图像""" payload = { "prompt": config.prompt, "negative_prompt": config.negative_prompt, "width": config.width, "height": config.height, "steps": config.steps, "cfg_scale": config.cfg_scale, "sampler_name": config.sampler_name, "seed": config.seed, "batch_size": config.batch_size, "n_iter": config.n_iter, } if config.model: self._switch_model(config.model) response = requests.post(f"{self.api_url}/txt2img", json=payload) response.raise_for_status() data = response.json() Path(output_dir).mkdir(exist_ok=True) saved_paths = [] for i, img_b64 in enumerate(data["images"]): path = f"{output_dir}/txt2img_{int(time.time())}_{i}.png" self._save_base64_image(img_b64, path) saved_paths.append(path) print(f"已保存: {path}") return saved_paths # ---- 图生图 ---- def img2img(self, init_image_path: str, prompt: str, denoising_strength: float = 0.75, config: GenerationConfig = None, output_dir: str = "./output") -> list[str]: """图生图:基于参考图 + 提示词生成新图""" config = config or GenerationConfig() # 读取初始图片并转 base64 with open(init_image_path, "rb") as f: init_images = [base64.b64encode(f.read()).decode()] payload = { "init_images": init_images, "prompt": prompt, "negative_prompt": config.negative_prompt, "width": config.width, "height": config.height, "steps": config.steps, "cfg_scale": config.cfg_scale, "sampler_name": config.sampler_name, "denoising_strength": denoising_strength, "seed": config.seed, } response = requests.post(f"{self.api_url}/img2img", json=payload) response.raise_for_status() data = response.json() Path(output_dir).mkdir(exist_ok=True) saved_paths = [] for i, img_b64 in enumerate(data["images"]): path = f"{output_dir}/img2img_{int(time.time())}_{i}.png" self._save_base64_image(img_b64, path) saved_paths.append(path) print(f"已保存: {path}") return saved_paths # ---- 局部重绘 ---- def inpaint(self, init_image_path: str, mask_image_path: str, prompt: str, denoising_strength: float = 0.85, output_dir: str = "./output") -> list[str]: """局部重绘:只修改 mask 区域""" with open(init_image_path, "rb") as f: init_images = [base64.b64encode(f.read()).decode()] with open(mask_image_path, "rb") as f: mask = base64.b64encode(f.read()).decode() payload = { "init_images": init_images, "mask": mask, "prompt": prompt, "negative_prompt": "low quality, blurry", "denoising_strength": denoising_strength, "inpainting_fill": 1, # 0=fill, 1=original, 2=latent noise "inpaint_full_res": True, "steps": 30, "cfg_scale": 7.0, "sampler_name": "DPM++ 2M Karras", "width": 512, "height": 512, } response = requests.post(f"{self.api_url}/img2img", json=payload) response.raise_for_status() data = response.json() Path(output_dir).mkdir(exist_ok=True) saved_paths = [] for i, img_b64 in enumerate(data["images"]): path = f"{output_dir}/inpaint_{int(time.time())}_{i}.png" self._save_base64_image(img_b64, path) saved_paths.append(path) return saved_paths # ---- 超分辨率 ---- def upscale(self, image_path: str, scale: int = 2, output_dir: str = "./output") -> str: """使用 ESRGAN 进行超分辨率放大""" with open(image_path, "rb") as f: img_b64 = base64.b64encode(f.read()).decode() payload = { "image": img_b64, "upscaler_1": "R-ESRGAN 4x+", "upscaling_resize": scale, } response = requests.post(f"{self.api_url}/extra-single-image", json=payload) response.raise_for_status() data = response.json() Path(output_dir).mkdir(exist_ok=True) path = f"{output_dir}/upscaled_{int(time.time())}.png" self._save_base64_image(data["image"], path) print(f"超分辨率完成: {path}") return path # ---- 模型管理 ---- def _switch_model(self, model_name: str): """切换模型""" response = requests.post( f"{self.api_url}/options", json={"sd_model_checkpoint": model_name}, ) response.raise_for_status() time.sleep(3) # 等待模型加载 def list_models(self) -> list[str]: """列出可用模型""" response = requests.get(f"{self.api_url}/sd-models") return [m["title"] for m in response.json()] def list_samplers(self) -> list[str]: """列出可用采样器""" response = requests.get(f"{self.api_url}/samplers") return [s["name"] for s in response.json()] 4.2 批量生成示例 # batch_generate.py from sd_client import StableDiffusionClient, GenerationConfig def batch_generate_portraits(): """批量生成人物肖像""" sd = StableDiffusionClient() # 查看可用模型和采样器 print("可用模型:", sd.list_models()[:5]) print("可用采样器:", sd.list_samplers()) # 风格列表 styles = [ "cyberpunk neon city", "watercolor painting", "oil painting renaissance", "anime style", "photorealistic 8k", ] base_prompt = ( "portrait of a young woman, detailed face, beautiful eyes, " "dramatic lighting, masterpiece, best quality" ) for style in styles: config = GenerationConfig( prompt=f"{base_prompt}, {style}", negative_prompt="lowres, bad anatomy, bad hands, text, error", width=512, height=768, steps=30, cfg_scale=7.5, ) paths = sd.txt2img(config, output_dir=f"./output/{style.replace(' ', '_')}") print(f"风格 [{style}] -> {paths}") if __name__ == "__main__": batch_generate_portraits() 4.3 调用 Stability AI 云端 API # stability_cloud.py import requests import base64 from pathlib import Path from PIL import Image from io import BytesIO class StabilityAIClient: """Stability AI 官方云端 API""" def __init__(self, api_key: str): self.api_key = api_key self.base_url = "https://api.stability.ai/v2beta" def generate(self, prompt: str, aspect_ratio: str = "1:1", style: str = "photographic", output_path: str = "output.png") -> str: """调用 Stable Diffusion 3 生成图像""" response = requests.post( f"{self.base_url}/stable-image/generate/sd3", headers={ "Authorization": f"Bearer {self.api_key}", "Accept": "image/*", }, files={"none": ""}, data={ "prompt": prompt, "aspect_ratio": aspect_ratio, "style_preset": style, "output_format": "png", }, ) if response.status_code != 200: raise Exception(f"API 错误: {response.status_code} - {response.text}") with open(output_path, "wb") as f: f.write(response.content) print(f"已生成: {output_path}") return output_path # 使用示例 if __name__ == "__main__": client = StabilityAIClient(api_key="sk-your-api-key") client.generate( prompt="A majestic dragon flying over a neon-lit cyberpunk city at night, " "highly detailed, cinematic lighting, 8k", aspect_ratio="16:9", style="cinematic", output_path="dragon_city.png", ) 4.4 图像后处理管道 # postprocess.py from PIL import Image, ImageEnhance, ImageFilter from pathlib import Path class ImagePostProcessor: """图像后处理:调整色彩、锐化、添加水印""" @staticmethod def enhance(image_path: str, brightness: float = 1.1, contrast: float = 1.15, sharpness: float = 1.3, output_path: str = None) -> str: """综合增强""" img = Image.open(image_path) img = ImageEnhance.Brightness(img).enhance(brightness) img = ImageEnhance.Contrast(img).enhance(contrast) img = ImageEnhance.Sharpness(img).enhance(sharpness) output_path = output_path or image_path.replace(".", "_enhanced.") img.save(output_path, quality=95) return output_path @staticmethod def add_watermark(image_path: str, text: str = "AI Generated", output_path: str = None) -> str: """添加水印""" from PIL import ImageDraw, ImageFont img = Image.open(image_path).convert("RGBA") overlay = Image.new("RGBA", img.size, (0, 0, 0, 0)) draw = ImageDraw.Draw(overlay) # 半透明白色文字 draw.text( (img.width - 200, img.height - 40), text, fill=(255, 255, 255, 128), ) img = Image.alpha_composite(img, overlay).convert("RGB") output_path = output_path or image_path.replace(".", "_wm.") img.save(output_path, quality=95) return output_path @staticmethod def create_grid(image_paths: list[str], cols: int = 3, output_path: str = "grid.png") -> str: """将多张图片拼成网格""" images = [Image.open(p) for p in image_paths] w, h = images[0].size rows = (len(images) + cols - 1) // cols grid = Image.new("RGB", (w * cols, h * rows), "white") for i, img in enumerate(images): row, col = divmod(i, cols) grid.paste(img, (col * w, row * h)) grid.save(output_path, quality=95) print(f"网格图已保存: {output_path}") return output_path 5. Prompt 工程技巧 Prompt 结构主体描述风格关键词质量修饰词负面提示词 高质量 Prompt 模板 PROMPT_TEMPLATES = { "人物肖像": ( "{subject}, {style}, detailed face, expressive eyes, " "dramatic lighting, masterpiece, best quality, ultra detailed" ), "风景": ( "{scene}, {mood}, volumetric lighting, god rays, " "landscape photography, 8k uhd, cinematic composition" ), "产品设计": ( "{product}, minimalist design, studio lighting, " "white background, product photography, professional, 4k" ), "动漫": ( "{character}, anime style, vibrant colors, " "detailed illustration, cel shading, masterpiece" ), } NEGATIVE_PROMPTS = { "通用": "lowres, bad anatomy, bad hands, text, error, missing fingers, " "extra digit, cropped, worst quality, low quality, blurry", "写实": "illustration, painting, drawing, art, sketch, anime, cartoon, " "CG, render, 3D, watermark, text, font, signature", "动漫": "photo, realistic, 3d, western, ugly, duplicate, morbid, " "deformed, bad anatomy, blurry", } 6. 关键参数影响 35%20%15%15%10%5%不同参数对生成质量的影响权重Prompt 质量采样步数 (steps)CFG Scale采样器选择模型选择分辨率 参数推荐值说明steps25-35步数越多细节越好,但边际递减且更慢cfg_scale7-12越高越遵循 prompt,过高会过饱和samplerDPM++ 2M Karras兼顾速度与质量denoising_strength0.5-0.8图生图降噪强度,越高变化越大seed-1随机种子,固定可复现7. 完整使用流程 # complete_demo.py from sd_client import StableDiffusionClient, GenerationConfig from stability_cloud import StabilityAIClient from postprocess import ImagePostProcessor def main(): # ===== 方式一:本地 SD WebUI ===== sd = StableDiffusionClient("http://127.0.0.1:7860") # 文生图 config = GenerationConfig( prompt="A serene Japanese garden with cherry blossoms, " "koi pond, stone bridge, golden hour, cinematic, 8k", negative_prompt="lowres, blurry, text, watermark", width=768, height=512, steps=30, cfg_scale=7.5, ) paths = sd.txt2img(config) print(f"生成完成: {paths}") # 图生图 if paths: new_paths = sd.img2img( init_image_path=paths[0], prompt="same scene but in autumn, orange and red leaves, snow", denoising_strength=0.6, ) print(f"图生图完成: {new_paths}") # 超分辨率 if paths: upscaled = sd.upscale(paths[0], scale=2) print(f"超分辨率完成: {upscaled}") # 后处理 pp = ImagePostProcessor() if paths: enhanced = pp.enhance(paths[0]) watermarked = pp.add_watermark(enhanced, text="AI Art") print(f"后处理完成: {watermarked}") # ===== 方式二:云端 API ===== # cloud = StabilityAIClient("sk-xxx") # cloud.generate("A futuristic cityscape at sunset", "16:9", "cinematic") if __name__ == "__main__": main() 8. 总结 本文覆盖了 Stable Diffusion 图像生成的完整链路: 本地部署 SD WebUI 并开启 API 模式封装 Python 客户端 支持文生图、图生图、局部重绘、超分辨率云端 API 作为无 GPU 环境的替代方案Prompt 工程 模板化的提示词编写技巧后处理管道 增强色彩、添加水印、拼图网格 生成速度参考:RTX 4090 生成 512x512 约 3-5 秒,512x768 约 5-8 秒。
云端 API 约 10-20 秒。
内容整理自CSDN博客,仅供技术交流参考。
注册
登录控制台
