#!/usr/bin/env python from __future__ import annotations import os import random import tempfile import gradio as gr import imageio import numpy as np import torch from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler from gradio.themes.utils import sizes theme = gr.themes.Default(radius_size=sizes.radius_none).set( block_label_text_color = '#4D63FF', block_title_text_color = '#4D63FF', button_primary_text_color = '#4D63FF', button_primary_background_fill='#FFFFFF', button_primary_border_color='#4D63FF', button_primary_background_fill_hover='#EDEFFF', ) css = "footer {visibility: hidden}" MAX_NUM_FRAMES = int(os.getenv('MAX_NUM_FRAMES', '200')) DEFAULT_NUM_FRAMES = min(MAX_NUM_FRAMES, int(os.getenv('DEFAULT_NUM_FRAMES', '16'))) pipe = DiffusionPipeline.from_pretrained('damo-vilab/text-to-video-ms-1.7b', torch_dtype=torch.float16, variant='fp16') pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) pipe.enable_model_cpu_offload() pipe.enable_vae_slicing() def to_video(frames: list[np.ndarray], fps: int) -> str: out_file = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) writer = imageio.get_writer(out_file.name, format='FFMPEG', fps=fps) for frame in frames: writer.append_data(frame) writer.close() return out_file.name def generate(prompt: str, seed: int, num_frames: int, num_inference_steps: int) -> str: if seed == -1: seed = random.randint(0, 1000000) generator = torch.Generator().manual_seed(seed) frames = pipe(prompt, num_inference_steps=num_inference_steps, num_frames=num_frames, generator=generator).frames return to_video(frames, 8) examples = [ ['An astronaut riding a horse.', 0, 16, 25], ['A panda eating bamboo on a rock.', 0, 16, 25], ['Spiderman is surfing.', 0, 16, 25], ] with gr.Blocks(theme=theme, css=css) as demo: gr.Markdown("""
通过文本合成视频
""") with gr.Group(): with gr.Box(): with gr.Row(elem_id='prompt-container').style(equal_height=True): prompt = gr.Text( label='Prompt', show_label=False, max_lines=1, placeholder='输入提示', elem_id='prompt-text-input').style(container=False) run_button = gr.Button('生成视频').style( full_width=False) result = gr.Video(label='Result', show_label=False, elem_id='gallery') with gr.Accordion('高级选项', open=False): seed = gr.Slider( label='Seed', minimum=-1, maximum=1000000, step=1, value=-1, info='If set to -1, a different seed will be used each time.') num_frames = gr.Slider( label='Number of frames', minimum=16, maximum=MAX_NUM_FRAMES, step=1, value=16, info= 'Note that the content of the video also changes when you change the number of frames.' ) num_inference_steps = gr.Slider(label='Number of inference steps', minimum=10, maximum=50, step=1, value=25) inputs = [ prompt, seed, num_frames, num_inference_steps, ] gr.Examples(examples=examples, inputs=inputs, outputs=result, fn=generate, cache_examples=os.getenv('SYSTEM') == 'spaces', label="示例") prompt.submit(fn=generate, inputs=inputs, outputs=result) run_button.click(fn=generate, inputs=inputs, outputs=result) demo.queue(api_open=False, max_size=15).launch(server_name="0.0.0.0")