流式传输

2025-12-25 15:36:35 +08:00
parent e56f47076c
commit 14bfdcbf51
19 changed files with 1191 additions and 65 deletions
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@ -5,7 +5,15 @@
      "Bash(python run.py:*)",
      "Bash(tree:*)",
      "Bash(ls:*)",
-      "Bash(dir:*)"
+      "Bash(dir:*)",
+      "Bash(python:*)",
+      "Bash(cat:*)",
+      "Bash(netstat:*)",
+      "Bash(findstr:*)",
+      "Bash(taskkill:*)",
+      "Bash(where python:*)",
+      "Bash(source:*)",
+      "Bash(curl:*)"
    ]
  }
 }
--- a/examples/3d/SDK_USAGE.md
+++ b/examples/3d/SDK_USAGE.md
@ -87,6 +87,15 @@ animator.playAnimation();
 **`loadAnimationFrames(frames)`**
 加载动画帧数据

+**`appendAnimationFrames(frames)`**
+追加动画帧数据（流式场景）
+
+**`startStreaming()`**
+开启流式模式，允许边接收边播放
+
+**`endStreaming()`**
+结束流式模式
+
 **`playAnimation()`**
 播放动画

--- a/examples/3d/blendshapeAnimator.js
+++ b/examples/3d/blendshapeAnimator.js
@ -10,6 +10,10 @@ class BlendShapeAnimator {
        this.idleAnimations = {};
        this.blendShapeScale = config.blendShapeScale || 1.0;
        this.dataFps = config.dataFps || 30;
+        this.isStreaming = false;
+        this.streamingComplete = true;
+        this.streamingWaitStart = null;
+        this.streamingStallMs = 0;

        // 空闲动画参数
        this.blinkParams = config.blinkParams || {
@ -36,6 +40,14 @@ class BlendShapeAnimator {
        this.enabledExpressions = new Set();
        this.expressionDurations = {};

+        // 播放动画时禁用的 blendshape 列表（由空闲动画控制）
+        this.disabledShapesInAnimation = config.disabledShapesInAnimation || [
+            'eyeblinkleft', 'eyeblinkright',  // 眨眼
+            'browdownleft', 'browdownright',   // 眉毛下
+            'browinnerup',                      // 眉毛内上
+            'browouterupleft', 'browouterupright' // 眉毛外上
+        ];
+
        // 状态标志
        this.isBlinkEnabled = false;
        this.isEyeLookEnabled = false;
@ -63,6 +75,43 @@ class BlendShapeAnimator {
        this.animationShapeNames = this._collectAnimationShapeNames(this.animationFrames);
    }

+    appendAnimationFrames(frames) {
+        if (!Array.isArray(frames) || frames.length === 0) {
+            return;
+        }
+
+        this.animationFrames.push(...frames);
+        const newNames = this._collectAnimationShapeNames(frames);
+
+        if (newNames.length === 0) {
+            return;
+        }
+
+        const existingNames = new Set(this.animationShapeNames);
+        newNames.forEach(name => {
+            if (!existingNames.has(name)) {
+                existingNames.add(name);
+                this.animationShapeNames.push(name);
+            }
+        });
+    }
+
+    startStreaming() {
+        console.log('Starting streaming mode');
+        this.isStreaming = true;
+        this.streamingComplete = false;
+        this.streamingWaitStart = null;
+        this.streamingStallMs = 0;
+    }
+
+    endStreaming() {
+        console.log('Ending streaming mode');
+        this.streamingComplete = true;
+        this.isStreaming = false;
+        this.streamingWaitStart = null;
+        this.streamingStallMs = 0;
+    }
+
    // 播放动画
    playAnimation() {
        if (this.animationFrames.length === 0) {
@ -75,15 +124,26 @@ class BlendShapeAnimator {
            return;
        }

-        // 停止随机表情
+        // 停止并立即重置眼球移动
+        if (this.isEyeLookEnabled) {
+            this._stopRandomEyeLook();
+            this._immediateResetEyeLook();
+        }
+
+        // 停止并立即重置随机表情
        if (this.isExpressionEnabled && window.ExpressionLibrary) {
            window.ExpressionLibrary.randomPlayer.stop();
+            this._immediateResetExpressions();
        }

+        // 注意：不停止眨眼，让眨眼继续运行
+
        this.stopAnimation(false);
        this.isPlaying = true;
        this.currentFrameIndex = 0;
        this.animationStartTime = performance.now();
+        this.streamingWaitStart = null;
+        this.streamingStallMs = 0;

        this._animateFrame();
        this.onStatusChange('info', '播放中...');
@ -94,6 +154,11 @@ class BlendShapeAnimator {
        this.isPlaying = false;
        this._resetAnimationInfluences();

+        // 恢复眼球移动
+        if (resumeExpressions && this.isEyeLookEnabled) {
+            this._startRandomEyeLook();
+        }
+
        // 恢复随机表情
        if (resumeExpressions && this.isExpressionEnabled && window.ExpressionLibrary) {
            window.ExpressionLibrary.randomPlayer.start();
@ -130,12 +195,35 @@ class BlendShapeAnimator {
        if (!this.isPlaying) return;

        const now = performance.now();
+        if (this.streamingWaitStart !== null && this.animationFrames.length > this.currentFrameIndex + 1) {
+            this.streamingStallMs += now - this.streamingWaitStart;
+            this.streamingWaitStart = null;
+        }
+
        const frameDuration = 1000 / this.dataFps;
-        const elapsed = now - this.animationStartTime;
+        const elapsed = now - this.animationStartTime - this.streamingStallMs;
        const exactFrame = elapsed / frameDuration;
        const targetFrameIndex = Math.floor(exactFrame);

        if (targetFrameIndex >= this.animationFrames.length) {
+            if (this.isStreaming && !this.streamingComplete) {
+                if (this.streamingWaitStart === null) {
+                    this.streamingWaitStart = now;
+                    console.log(`Waiting for more frames... (current: ${this.animationFrames.length}, target: ${targetFrameIndex})`);
+                }
+                const waitTime = now - this.streamingWaitStart;
+                if (waitTime > 30000) {
+                    console.warn('Streaming timeout after 30s, stopping animation');
+                    this.stopAnimation();
+                    return;
+                }
+                if (waitTime > 1000 && Math.floor(waitTime / 1000) !== Math.floor((waitTime - 16) / 1000)) {
+                    console.log(`Still waiting... ${Math.floor(waitTime / 1000)}s`);
+                }
+                requestAnimationFrame(() => this._animateFrame());
+                return;
+            }
+            console.log(`Animation complete. Total frames: ${this.animationFrames.length}`);
            this.stopAnimation();
            return;
        }
@ -152,6 +240,11 @@ class BlendShapeAnimator {
            : Object.keys(currentBlendShapes);

        for (const key of shapeNames) {
+            // 跳过禁用列表中的 blendshape，让空闲动画继续控制它们
+            if (this.disabledShapesInAnimation.includes(key.toLowerCase())) {
+                continue;
+            }
+
            const currentValue = currentBlendShapes[key] || 0;
            const nextValue = nextBlendShapes[key] || 0;
            const interpolatedValue = this._lerp(currentValue, nextValue, smoothProgress);
@ -357,6 +450,38 @@ class BlendShapeAnimator {
        });
    }

+    _immediateResetEyeLook() {
+        const eyeLookShapes = [
+            'eyelookupleft', 'eyelookupright',
+            'eyelookdownleft', 'eyelookdownright',
+            'eyelookinleft', 'eyelookinright',
+            'eyelookoutleft', 'eyelookoutright'
+        ];
+
+        if (!this.morphTargetAdapter) return;
+
+        eyeLookShapes.forEach(name => {
+            this.morphTargetAdapter.setInfluence(name, 0);
+            delete this.idleAnimations[name];
+        });
+    }
+
+    _immediateResetExpressions() {
+        if (!this.morphTargetAdapter || !window.ExpressionLibrary) return;
+
+        // 获取所有表情的 blendshape 名称并立即重置
+        const expressions = window.ExpressionLibrary.expressions;
+        for (const exprKey in expressions) {
+            const expr = expressions[exprKey];
+            if (expr.shapes) {
+                for (const shapeName in expr.shapes) {
+                    this.morphTargetAdapter.setInfluence(shapeName, 0);
+                    delete this.idleAnimations[shapeName];
+                }
+            }
+        }
+    }
+
    // 随机表情控制
    toggleRandomExpression(enabled) {
        if (!window.ExpressionLibrary) {
--- a/examples/3d/index.html
+++ b/examples/3d/index.html
@ -30,6 +30,13 @@
            <input type="text" id="apiUrl" value="http://localhost:5001/text-to-blendshapes">
        </div>

+        <div class="input-group toggle-group">
+            <label>
+                <input type="checkbox" id="streamEnabled" checked>
+                启用流式传输
+            </label>
+        </div>
+
        <div class="input-group">
            <label>形态键强度: <span id="scaleValue">1.0</span></label>
            <input type="range" id="scaleSlider" min="0" max="2" step="0.1" value="1.0"
--- a/examples/3d/main.js
+++ b/examples/3d/main.js
@ -60,6 +60,7 @@ async function generateAnimation() {
    const text = document.getElementById('textInput').value.trim();
    const apiUrl = document.getElementById('apiUrl').value;
    const btn = document.getElementById('generateBtn');
+    const streamEnabled = document.getElementById('streamEnabled')?.checked;

    if (!text) {
        showStatus("请输入文字", "error");
@ -70,22 +71,11 @@ async function generateAnimation() {
    showStatus("生成中...", "info");

    try {
-        const response = await fetch(apiUrl, {
-            method: 'POST',
-            headers: { 'Content-Type': 'application/json' },
-            body: JSON.stringify({ text: text, language: 'zh-CN' })
-        });
-
-        const data = await response.json();
-
-        if (!response.ok || !data.success) {
-            throw new Error(data.error || '请求失败');
+        if (streamEnabled) {
+            await generateAnimationStream(text, apiUrl);
+        } else {
+            await generateAnimationBatch(text, apiUrl);
        }
-
-        animator.loadAnimationFrames(data.frames);
-        console.log("动画数据:", data.frames);
-        showStatus(`动画生成成功！共 ${data.frames.length} 帧`, "success");
-
    } catch (err) {
        showStatus("错误: " + err.message, "error");
    } finally {
@ -93,6 +83,190 @@ async function generateAnimation() {
    }
 }

+async function generateAnimationBatch(text, apiUrl) {
+    const url = normalizeApiUrl(apiUrl, false);
+    const response = await fetch(url, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ text: text, language: 'zh-CN', first_sentence_split_size: 1 })
+    });
+
+    const data = await response.json();
+
+    if (!response.ok || !data.success) {
+        throw new Error(data.error || '请求失败');
+    }
+
+    animator.loadAnimationFrames(data.frames);
+    console.log("动画数据:", data.frames);
+    showStatus(`动画生成成功！共 ${data.frames.length} 帧`, "success");
+}
+
+async function generateAnimationStream(text, apiUrl) {
+    const url = normalizeApiUrl(apiUrl, true);
+    const response = await fetch(url, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ text: text, language: 'zh-CN', first_sentence_split_size: 1 })
+    });
+
+    if (!response.ok) {
+        let errorMessage = `请求失败 (${response.status})`;
+        try {
+            const data = await response.json();
+            if (data?.error) {
+                errorMessage = data.error;
+            }
+        } catch (err) {
+            // ignore json parse errors
+        }
+        throw new Error(errorMessage);
+    }
+
+    if (!response.body) {
+        await generateAnimationBatch(text, apiUrl);
+        return;
+    }
+
+    animator.stopAnimation();
+    animator.loadAnimationFrames([]);
+    animator.startStreaming();
+
+    const reader = response.body.getReader();
+    const decoder = new TextDecoder();
+    let buffer = '';
+    let started = false;
+    const pendingFrames = [];
+    const streamBufferMs = 300;
+    const flushBatchMs = 50;
+    const minStartFrames = Math.max(1, Math.round(animator.dataFps * (streamBufferMs / 1000)));
+    const frameBatchSize = Math.max(1, Math.round(animator.dataFps * (flushBatchMs / 1000)));
+
+    const flushFrames = (force = false) => {
+        if (pendingFrames.length === 0) {
+            return;
+        }
+        if (!force && pendingFrames.length < frameBatchSize) {
+            return;
+        }
+        const framesToFlush = pendingFrames.splice(0, pendingFrames.length);
+        animator.appendAnimationFrames(framesToFlush);
+        console.log(`Flushed ${framesToFlush.length} frames, total: ${animator.animationFrames.length}`);
+        if (!started && animator.animationFrames.length >= minStartFrames) {
+            console.log(`Starting animation with ${animator.animationFrames.length} frames (min: ${minStartFrames})`);
+            animator.playAnimation();
+            started = true;
+        }
+    };
+
+    const handleMessage = (message) => {
+        if (message.type === 'frame') {
+            pendingFrames.push(message.frame);
+            flushFrames();
+            return;
+        }
+
+        if (message.type === 'status') {
+            const stageMessage = message.message || 'Streaming';
+            showStatus(stageMessage, 'info');
+            console.log('Stream status:', message);
+            return;
+        }
+
+        if (message.type === 'error') {
+            throw new Error(message.message || 'Streaming error');
+        }
+
+        if (message.type === 'end') {
+            console.log('Stream ended, flushing remaining frames');
+            flushFrames(true);
+            animator.endStreaming();
+            if (!started && animator.animationFrames.length > 0) {
+                animator.playAnimation();
+                started = true;
+            }
+            const totalFrames = message.frames ?? animator.animationFrames.length;
+            console.log(`Total frames received: ${totalFrames}, in animator: ${animator.animationFrames.length}`);
+            showStatus(`流式动画接收完成，共 ${totalFrames} 帧`, "success");
+        }
+    };
+
+    let streamError = null;
+    try {
+        while (true) {
+            const { value, done } = await reader.read();
+            if (done) {
+                break;
+            }
+
+            buffer += decoder.decode(value, { stream: true });
+            const lines = buffer.split('\n');
+            buffer = lines.pop() || '';
+
+            for (const line of lines) {
+                if (!line.trim()) {
+                    continue;
+                }
+
+                let message;
+                try {
+                    message = JSON.parse(line);
+                } catch (err) {
+                    continue;
+                }
+
+                handleMessage(message);
+            }
+        }
+        if (buffer.trim()) {
+            try {
+                handleMessage(JSON.parse(buffer));
+            } catch (err) {
+                // ignore trailing parse errors
+            }
+        }
+    } catch (err) {
+        streamError = err;
+        throw err;
+    } finally {
+        if (!streamError) {
+            flushFrames(true);
+            if (!started && animator.animationFrames.length > 0) {
+                animator.playAnimation();
+            }
+        }
+        animator.endStreaming();
+    }
+}
+
+function normalizeApiUrl(apiUrl, streamEnabled) {
+    if (!apiUrl) {
+        return apiUrl;
+    }
+
+    const trimmed = apiUrl.replace(/\/+$/, '');
+    const basePath = '/text-to-blendshapes';
+    const streamPath = `${basePath}/stream`;
+
+    if (streamEnabled) {
+        if (trimmed.endsWith(streamPath)) {
+            return trimmed;
+        }
+        if (trimmed.endsWith(basePath)) {
+            return trimmed + '/stream';
+        }
+        return trimmed + streamPath;
+    }
+
+    if (trimmed.endsWith(streamPath)) {
+        return trimmed.slice(0, -'/stream'.length);
+    }
+    if (trimmed.endsWith(basePath)) {
+        return trimmed;
+    }
+    return trimmed + basePath;
+}
+
 function playAnimation() {
    animator.playAnimation();
 }
--- a/examples/3d/styles.css
+++ b/examples/3d/styles.css
@ -59,6 +59,21 @@ body {
    color: #aaa;
 }

+.toggle-group label {
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    margin-bottom: 0;
+    color: #ddd;
+}
+
+.toggle-group input[type="checkbox"] {
+    width: 16px;
+    height: 16px;
+    margin: 0;
+    cursor: pointer;
+}
+
 input[type="text"],
 textarea {
    width: 100%;
--- a/services/a2f_api/README.md
+++ b/services/a2f_api/README.md
@ -55,6 +55,23 @@ python api.py
 }
 ```

+### POST /text-to-blendshapes/stream
+
+**说明:** 使用 NDJSON 流式返回，便于边收边播放。
+
+**响应:** 每行一个 JSON 对象，`type` 字段取值如下:
+- `status` - 阶段提示
+- `frame` - 单帧数据
+- `end` - 完成信息
+- `error` - 错误信息
+
+**示例:**
+```json
+{"type":"status","stage":"tts","message":"Generating audio"}
+{"type":"frame","frame":{"timeCode":0.0,"blendShapes":{"JawOpen":0.1}}}
+{"type":"end","frames":900,"audio_path":"...","csv_path":"..."}
+```
+
 ## 文件说明

 - `tts_service.py` - 文字转音频服务
--- a/services/a2f_api/pycache/api.cpython-311.pyc
+++ b/services/a2f_api/pycache/api.cpython-311.pyc
--- a/services/a2f_api/pycache/blend_shape_parser.cpython-311.pyc
+++ b/services/a2f_api/pycache/blend_shape_parser.cpython-311.pyc
--- a/services/a2f_api/pycache/text_to_blendshapes_service.cpython-311.pyc
+++ b/services/a2f_api/pycache/text_to_blendshapes_service.cpython-311.pyc
--- a/services/a2f_api/pycache/tts_service.cpython-311.pyc
+++ b/services/a2f_api/pycache/tts_service.cpython-311.pyc
--- a/services/a2f_api/api.py
+++ b/services/a2f_api/api.py
@ -1,34 +1,73 @@
-from flask import Flask, request, jsonify
-from flask_cors import CORS
+import json
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel
 from text_to_blendshapes_service import TextToBlendShapesService

-app = Flask(__name__)
-CORS(app)
+app = FastAPI()

-@app.route('/health', methods=['GET'])
-def health():
-    return jsonify({'status': 'ok'})
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)

-@app.route('/text-to-blendshapes', methods=['POST'])
-def text_to_blendshapes():
+class TextRequest(BaseModel):
+    text: str
+    language: str = 'zh-CN'
+    segment: bool = False
+    split_punctuations: str = None
+    max_sentence_length: int = None
+    first_sentence_split_size: int = None
+
+@app.get('/health')
+async def health():
+    return {'status': 'ok'}
+
+@app.post('/text-to-blendshapes')
+async def text_to_blendshapes(request: TextRequest):
    try:
-        data = request.get_json()
-        if not data or 'text' not in data:
-            return jsonify({'success': False, 'error': 'Missing text'}), 400
-
-        text = data['text']
-        language = data.get('language', 'zh-CN')
-
-        service = TextToBlendShapesService(lang=language)
-        result = service.text_to_blend_shapes(text)
-
-        return jsonify(result)
-
+        service = TextToBlendShapesService(lang=request.language)
+        result = service.text_to_blend_shapes(
+            request.text,
+            segment=request.segment,
+            split_punctuations=request.split_punctuations,
+            max_sentence_length=request.max_sentence_length
+        )
+        return result
    except Exception as e:
        import traceback
        traceback.print_exc()
-        return jsonify({'success': False, 'error': str(e)}), 500
+        return {'success': False, 'error': str(e)}
+
+@app.post('/text-to-blendshapes/stream')
+async def text_to_blendshapes_stream(request: TextRequest):
+    async def generate():
+        service = TextToBlendShapesService(lang=request.language)
+        try:
+            for message in service.iter_text_to_blend_shapes_stream(
+                request.text,
+                split_punctuations=request.split_punctuations,
+                max_sentence_length=request.max_sentence_length,
+                first_sentence_split_size=request.first_sentence_split_size
+            ):
+                yield json.dumps(message) + "\n"
+        except Exception as e:
+            yield json.dumps({'type': 'error', 'message': str(e)}) + "\n"
+
+    return StreamingResponse(
+        generate(),
+        media_type='application/x-ndjson',
+        headers={
+            'Cache-Control': 'no-cache',
+            'X-Accel-Buffering': 'no'
+        }
+    )

 if __name__ == '__main__':
+    import uvicorn
    print("Text to BlendShapes API: http://localhost:5001")
-    app.run(host='0.0.0.0', port=5001, debug=True)
+    uvicorn.run(app, host='0.0.0.0', port=5001)
--- a/services/a2f_api/blend_shape_parser.py
+++ b/services/a2f_api/blend_shape_parser.py
@ -17,9 +17,11 @@ class BlendShapeParser:

    @staticmethod
    def csv_to_blend_shapes(csv_path: str):
-        frames = []
-        with open(csv_path, 'r') as f:
+        return list(BlendShapeParser.iter_csv_to_blend_shapes(csv_path))

+    @staticmethod
+    def iter_csv_to_blend_shapes(csv_path: str):
+        with open(csv_path, 'r') as f:
            reader = csv.DictReader(f)
            for row in reader:
                frame = {'timeCode': float(row['timeCode']), 'blendShapes': {}}
@ -27,5 +29,4 @@ class BlendShapeParser:
                    col_name = f'blendShapes.{key}'
                    if col_name in row:
                        frame['blendShapes'][key] = float(row[col_name])
-                frames.append(frame)
-        return frames
+                yield frame
--- a/services/a2f_api/test.html
+++ b/services/a2f_api/test.html
@ -0,0 +1,282 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Text to BlendShapes 测试</title>
+    <style>
+        * { margin: 0; padding: 0; box-sizing: border-box; }
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            min-height: 100vh;
+            padding: 20px;
+        }
+        .container {
+            max-width: 800px;
+            margin: 0 auto;
+            background: white;
+            border-radius: 12px;
+            padding: 30px;
+            box-shadow: 0 20px 60px rgba(0,0,0,0.3);
+        }
+        h1 {
+            color: #333;
+            margin-bottom: 10px;
+            font-size: 28px;
+        }
+        .subtitle {
+            color: #666;
+            margin-bottom: 30px;
+            font-size: 14px;
+        }
+        .input-group {
+            margin-bottom: 20px;
+        }
+        label {
+            display: block;
+            margin-bottom: 8px;
+            color: #555;
+            font-weight: 500;
+        }
+        input, textarea, select {
+            width: 100%;
+            padding: 12px;
+            border: 2px solid #e0e0e0;
+            border-radius: 6px;
+            font-size: 14px;
+            transition: border-color 0.3s;
+        }
+        input:focus, textarea:focus, select:focus {
+            outline: none;
+            border-color: #667eea;
+        }
+        textarea {
+            resize: vertical;
+            min-height: 100px;
+            font-family: inherit;
+        }
+        button {
+            width: 100%;
+            padding: 14px;
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+            border: none;
+            border-radius: 6px;
+            font-size: 16px;
+            font-weight: 600;
+            cursor: pointer;
+            transition: transform 0.2s, box-shadow 0.2s;
+        }
+        button:hover {
+            transform: translateY(-2px);
+            box-shadow: 0 10px 20px rgba(102, 126, 234, 0.3);
+        }
+        button:active {
+            transform: translateY(0);
+        }
+        button:disabled {
+            opacity: 0.6;
+            cursor: not-allowed;
+            transform: none;
+        }
+        .loading {
+            display: none;
+            text-align: center;
+            margin: 20px 0;
+            color: #667eea;
+        }
+        .loading.show {
+            display: block;
+        }
+        .result {
+            margin-top: 30px;
+            padding: 20px;
+            background: #f8f9fa;
+            border-radius: 6px;
+            display: none;
+        }
+        .result.show {
+            display: block;
+        }
+        .result h3 {
+            color: #333;
+            margin-bottom: 15px;
+        }
+        .stats {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
+            gap: 15px;
+            margin-bottom: 20px;
+        }
+        .stat-card {
+            background: white;
+            padding: 15px;
+            border-radius: 6px;
+            text-align: center;
+        }
+        .stat-value {
+            font-size: 24px;
+            font-weight: bold;
+            color: #667eea;
+        }
+        .stat-label {
+            font-size: 12px;
+            color: #666;
+            margin-top: 5px;
+        }
+        .frames-preview {
+            max-height: 300px;
+            overflow-y: auto;
+            background: white;
+            padding: 15px;
+            border-radius: 6px;
+            font-family: monospace;
+            font-size: 12px;
+        }
+        .error {
+            background: #fee;
+            color: #c33;
+            padding: 15px;
+            border-radius: 6px;
+            margin-top: 20px;
+            display: none;
+        }
+        .error.show {
+            display: block;
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <h1>Text to BlendShapes</h1>
+        <p class="subtitle">将文字转换为 52 个 ARKit 形态键</p>
+
+        <div class="input-group">
+            <label for="text">输入文字</label>
+            <textarea id="text" placeholder="请输入要转换的文字...">你好世界，这是一个测试。</textarea>
+        </div>
+
+        <div class="input-group">
+            <label for="language">语言</label>
+            <select id="language">
+                <option value="zh-CN">中文</option>
+                <option value="en">English</option>
+                <option value="ja">日本語</option>
+                <option value="ko">한국어</option>
+            </select>
+        </div>
+
+        <div class="input-group">
+            <label for="apiUrl">API 地址</label>
+            <input type="text" id="apiUrl" value="http://localhost:5001/text-to-blendshapes">
+        </div>
+
+        <button id="submitBtn" onclick="convert()">转换</button>
+
+        <div class="loading" id="loading">
+            <p>⏳ 处理中，请稍候...</p>
+        </div>
+
+        <div class="error" id="error"></div>
+
+        <div class="result" id="result">
+            <h3>转换结果</h3>
+            <div class="stats">
+                <div class="stat-card">
+                    <div class="stat-value" id="frameCount">0</div>
+                    <div class="stat-label">总帧数</div>
+                </div>
+                <div class="stat-card">
+                    <div class="stat-value" id="duration">0s</div>
+                    <div class="stat-label">时长</div>
+                </div>
+                <div class="stat-card">
+                    <div class="stat-value">52</div>
+                    <div class="stat-label">形态键数量</div>
+                </div>
+            </div>
+            <h4 style="margin-bottom: 10px;">帧数据预览</h4>
+            <div class="frames-preview" id="framesPreview"></div>
+        </div>
+    </div>
+
+    <script>
+        async function convert() {
+            const text = document.getElementById('text').value.trim();
+            const language = document.getElementById('language').value;
+            const apiUrl = document.getElementById('apiUrl').value;
+
+            if (!text) {
+                showError('请输入文字');
+                return;
+            }
+
+            const submitBtn = document.getElementById('submitBtn');
+            const loading = document.getElementById('loading');
+            const result = document.getElementById('result');
+            const error = document.getElementById('error');
+
+            submitBtn.disabled = true;
+            loading.classList.add('show');
+            result.classList.remove('show');
+            error.classList.remove('show');
+
+            try {
+                const response = await fetch(apiUrl, {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json',
+                    },
+                    body: JSON.stringify({
+                        text: text,
+                        language: language
+                    })
+                });
+
+                const data = await response.json();
+
+                if (!response.ok || !data.success) {
+                    throw new Error(data.error || '请求失败');
+                }
+
+                displayResult(data);
+
+            } catch (err) {
+                showError(err.message);
+            } finally {
+                submitBtn.disabled = false;
+                loading.classList.remove('show');
+            }
+        }
+
+        function displayResult(data) {
+            const result = document.getElementById('result');
+            const frameCount = document.getElementById('frameCount');
+            const duration = document.getElementById('duration');
+            const framesPreview = document.getElementById('framesPreview');
+
+            const frames = data.frames || [];
+            frameCount.textContent = frames.length;
+
+            if (frames.length > 0) {
+                const lastFrame = frames[frames.length - 1];
+                duration.textContent = lastFrame.timeCode.toFixed(2) + 's';
+            }
+
+            framesPreview.textContent = JSON.stringify(frames.slice(0, 3), null, 2);
+            if (frames.length > 3) {
+                framesPreview.textContent += '\n\n... 共 ' + frames.length + ' 帧';
+            }
+
+            result.classList.add('show');
+        }
+
+        function showError(message) {
+            const error = document.getElementById('error');
+            error.textContent = '错误: ' + message;
+            error.classList.add('show');
+        }
+    </script>
+</body>
+</html>
--- a/services/a2f_api/text_to_blendshapes_service.py
+++ b/services/a2f_api/text_to_blendshapes_service.py
@ -1,23 +1,39 @@
 import os
+import re
 import tempfile
+import concurrent.futures
+import queue
+import threading
 from datetime import datetime
 from tts_service import TTSService
 from a2f_service import A2FService
 from blend_shape_parser import BlendShapeParser

 class TextToBlendShapesService:
+    DEFAULT_SPLIT_PUNCTUATIONS = '。！？；!?;,，'
+
    def __init__(self, lang='zh-CN', a2f_url="192.168.1.39:52000"):
        self.tts = TTSService(lang=lang)
        self.a2f = A2FService(a2f_url=a2f_url)
        self.parser = BlendShapeParser()

-    def text_to_blend_shapes(self, text: str, output_dir: str = None):
-        if output_dir is None:
-            output_dir = tempfile.gettempdir()
+    def text_to_blend_shapes(
+        self,
+        text: str,
+        output_dir: str = None,
+        segment: bool = False,
+        split_punctuations: str = None,
+        max_sentence_length: int = None
+    ):
+        if segment:
+            return self._text_to_blend_shapes_segmented(
+                text,
+                output_dir,
+                split_punctuations=split_punctuations,
+                max_sentence_length=max_sentence_length
+            )

-        os.makedirs(output_dir, exist_ok=True)
-        timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
-        audio_path = os.path.join(output_dir, f'tts_{timestamp}.wav')
+        output_dir, audio_path = self._prepare_output_paths(output_dir)

        self.tts.text_to_audio(text, audio_path)
        csv_path = self.a2f.audio_to_csv(audio_path)
@ -29,3 +45,235 @@ class TextToBlendShapesService:
            'audio_path': audio_path,
            'csv_path': csv_path
        }
+
+    def iter_text_to_blend_shapes_stream(
+        self,
+        text: str,
+        output_dir: str = None,
+        split_punctuations: str = None,
+        max_sentence_length: int = None,
+        first_sentence_split_size: int = None
+    ):
+        output_dir = output_dir or tempfile.gettempdir()
+        os.makedirs(output_dir, exist_ok=True)
+
+        sentences = self.split_sentences(
+            text,
+            split_punctuations=split_punctuations,
+            max_sentence_length=max_sentence_length,
+            first_sentence_split_size=first_sentence_split_size
+        )
+        if not sentences:
+            yield {'type': 'error', 'message': '文本为空'}
+            return
+
+        yield {'type': 'status', 'stage': 'split', 'sentences': len(sentences), 'message': f'已拆分为 {len(sentences)} 个句子'}
+
+        # 使用队列来收集处理完成的句子
+        result_queue = queue.Queue()
+
+        def process_and_queue(index, sentence):
+            """处理句子并放入队列"""
+            try:
+                print(f"[工作线程 {index}] 开始处理: {sentence[:30]}...")
+                frames, audio_path, csv_path = self._process_sentence(sentence, output_dir, index)
+                result_queue.put((index, 'success', frames, None))
+                print(f"[工作线程 {index}] 完成！已生成 {len(frames)} 帧并加入队列")
+            except Exception as e:
+                print(f"[工作线程 {index}] 失败: {str(e)}")
+                import traceback
+                traceback.print_exc()
+                result_queue.put((index, 'error', None, str(e)))
+
+        # 提交所有句子到线程池并发处理（增加并发数以加速）
+        with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
+            for index, sentence in enumerate(sentences):
+                executor.submit(process_and_queue, index, sentence)
+
+            # 按顺序从队列中取出结果并推送
+            completed = {}
+            next_index = 0
+            total_frames = 0
+            cumulative_time = 0.0  # 累计时间，用于连续句子
+
+            while next_index < len(sentences):
+                # 如果下一个句子还没完成，等待队列
+                if next_index not in completed:
+                    yield {
+                        'type': 'status',
+                        'stage': 'processing',
+                        'sentence_index': next_index,
+                        'sentences': len(sentences),
+                        'message': f'正在处理 {next_index + 1}/{len(sentences)}'
+                    }
+
+                    # 从队列中获取结果
+                    while next_index not in completed:
+                        try:
+                            index, status, frames, error = result_queue.get(timeout=1)
+                            completed[index] = (status, frames, error)
+                            print(f"[主线程] 收到句子 {index} 的处理结果")
+                        except queue.Empty:
+                            continue
+
+                # 推送下一个句子的帧
+                status, frames, error = completed[next_index]
+                if status == 'error':
+                    yield {'type': 'error', 'message': f'句子 {next_index} 处理失败: {error}'}
+                    return
+
+                # 如果是连续句子，调整时间码使其无缝衔接
+                is_continuation = self.is_continuation[next_index] if next_index < len(self.is_continuation) else False
+
+                print(f"[主线程] 正在推送句子 {next_index} 的 {len(frames)} 帧 {'(连续)' if is_continuation else ''}")
+
+                # 如果不是连续句子，重置累计时间
+                if not is_continuation and next_index > 0:
+                    cumulative_time = 0.0
+
+                for frame in frames:
+                    # 调整时间码：从累计时间开始
+                    frame['timeCode'] = cumulative_time + frame['timeCode']
+                    frame['sentenceIndex'] = next_index
+                    frame['isContinuation'] = is_continuation
+                    total_frames += 1
+                    yield {'type': 'frame', 'frame': frame}
+
+                # 更新累计时间为当前句子的最后一帧时间
+                if frames:
+                    cumulative_time = frames[-1]['timeCode']
+
+                next_index += 1
+
+        print(f"[主线程] 流式传输完成，共 {total_frames} 帧")
+        yield {
+            'type': 'end',
+            'frames': total_frames
+        }
+
+    def _process_sentence(self, sentence, output_dir, index):
+        """处理单个句子: TTS -> A2F -> 解析"""
+        import time
+        start_time = time.time()
+
+        print(f"[线程 {index}] 开始处理: {sentence[:30]}...")
+        _, audio_path = self._prepare_output_paths(output_dir, suffix=f's{index:03d}')
+
+        print(f"[线程 {index}] TTS 开始...")
+        tts_start = time.time()
+        self.tts.text_to_audio(sentence, audio_path)
+        tts_time = time.time() - tts_start
+        print(f"[线程 {index}] TTS 完成，耗时 {tts_time:.2f}秒，A2F 开始...")
+
+        a2f_start = time.time()
+        csv_path = self.a2f.audio_to_csv(audio_path)
+        a2f_time = time.time() - a2f_start
+        print(f"[线程 {index}] A2F 完成，耗时 {a2f_time:.2f}秒，解析中...")
+
+        parse_start = time.time()
+        frames = list(self.parser.iter_csv_to_blend_shapes(csv_path))
+        parse_time = time.time() - parse_start
+
+        total_time = time.time() - start_time
+        print(f"[线程 {index}] 完成！生成了 {len(frames)} 帧 | 总耗时: {total_time:.2f}秒 (TTS: {tts_time:.2f}s, A2F: {a2f_time:.2f}s, 解析: {parse_time:.2f}s)")
+
+        return frames, audio_path, csv_path
+
+    def _text_to_blend_shapes_segmented(
+        self,
+        text: str,
+        output_dir: str = None,
+        split_punctuations: str = None,
+        max_sentence_length: int = None
+    ):
+        frames = []
+        audio_paths = []
+        csv_paths = []
+
+        for message in self.iter_text_to_blend_shapes_stream(
+            text,
+            output_dir,
+            split_punctuations=split_punctuations,
+            max_sentence_length=max_sentence_length
+        ):
+            if message.get('type') == 'frame':
+                frames.append(message['frame'])
+            elif message.get('type') == 'error':
+                return {
+                    'success': False,
+                    'error': message.get('message', 'Unknown error')
+                }
+            elif message.get('type') == 'end':
+                audio_paths = message.get('audio_paths', [])
+                csv_paths = message.get('csv_paths', [])
+
+        return {
+            'success': True,
+            'frames': frames,
+            'audio_paths': audio_paths,
+            'csv_paths': csv_paths
+        }
+
+    def split_sentences(self, text: str, split_punctuations: str = None, max_sentence_length: int = None, first_sentence_split_size: int = None):
+        """拆分句子，并对第一句进行特殊处理以加速首帧"""
+        if not text:
+            return []
+
+        normalized = re.sub(r'[\r\n]+', '。', text.strip())
+        punctuations = split_punctuations or self.DEFAULT_SPLIT_PUNCTUATIONS
+        if punctuations:
+            escaped = re.escape(punctuations)
+            split_re = re.compile(rf'(?<=[{escaped}])')
+            chunks = split_re.split(normalized)
+        else:
+            chunks = [normalized]
+
+        sentences = [chunk.strip() for chunk in chunks if chunk.strip()]
+
+        # 记录哪些句子是拆分的（需要连续播放）
+        self.is_continuation = [False] * len(sentences)
+
+        # 可选：拆分第一句以加速首帧（并发处理）
+        if first_sentence_split_size and sentences:
+            first = sentences[0]
+            length = len(first)
+            parts = []
+
+            if length <= 12:
+                # 12字以内分两部分
+                mid = length // 2
+                parts = [first[:mid], first[mid:]]
+            else:
+                # 12字之后：前6字，再6字，剩下的
+                parts = [first[:6], first[6:12], first[12:]]
+
+            # 替换第一句为多个小句
+            sentences = parts + sentences[1:]
+            # 标记后续部分为连续播放
+            self.is_continuation = [False] + [True] * (len(parts) - 1) + [False] * (len(sentences) - len(parts))
+            print(f"[拆分优化] 第一句({length}字)拆分为{len(parts)}部分: {[len(p) for p in parts]} - 连续播放")
+
+        if not max_sentence_length or max_sentence_length <= 0:
+            return sentences
+
+        limited = []
+        for sentence in sentences:
+            if len(sentence) <= max_sentence_length:
+                limited.append(sentence)
+                continue
+
+            start = 0
+            while start < len(sentence):
+                limited.append(sentence[start:start + max_sentence_length])
+                start += max_sentence_length
+        return limited
+
+    def _prepare_output_paths(self, output_dir: str = None, suffix: str = None):
+        if output_dir is None:
+            output_dir = tempfile.gettempdir()
+
+        os.makedirs(output_dir, exist_ok=True)
+        timestamp = datetime.now().strftime('%Y%m%d%H%M%S%f')
+        suffix_part = f'_{suffix}' if suffix else ''
+        audio_path = os.path.join(output_dir, f'tts_{timestamp}{suffix_part}.wav')
+        return output_dir, audio_path
--- a/services/a2f_api/tts_service.py
+++ b/services/a2f_api/tts_service.py
@ -1,20 +1,35 @@
 import os
+import threading
 import pyttsx3

 class TTSService:
+    _lock = threading.Lock()
+
    def __init__(self, lang='zh-CN'):
        self.lang = lang
-        self.engine = pyttsx3.init()
-
-        if lang == 'zh-CN':
-            voices = self.engine.getProperty('voices')
-            for voice in voices:
-                if 'chinese' in voice.name.lower() or 'zh' in voice.id.lower():
-                    self.engine.setProperty('voice', voice.id)
-                    break

    def text_to_audio(self, text: str, output_path: str) -> str:
+        """将文本转换为WAV音频文件（使用pyttsx3）"""
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
-        self.engine.save_to_file(text, output_path)
-        self.engine.runAndWait()
-        return output_path
+
+        with self._lock:
+            engine = pyttsx3.init()
+            try:
+                # 设置中文语音
+                voices = engine.getProperty('voices')
+                for voice in voices:
+                    if 'chinese' in voice.name.lower() or 'zh' in voice.id.lower():
+                        engine.setProperty('voice', voice.id)
+                        break
+
+                # 设置语速
+                engine.setProperty('rate', 150)
+
+                # 保存为WAV
+                engine.save_to_file(text, output_path)
+                engine.runAndWait()
+
+                return output_path
+            finally:
+                engine.stop()
+                del engine
--- a/test_tts.py
+++ b/test_tts.py
@ -0,0 +1,13 @@
+import requests
+import json
+
+url = "http://localhost:5001/text-to-blendshapes"
+data = {
+    "text": "你好",
+    "language": "zh-CN"
+}
+
+print("发送测试请求...")
+response = requests.post(url, json=data)
+print(f"状态码: {response.status_code}")
+print(f"响应: {response.json()}")
--- a/需求.md
+++ b/需求.md
@ -0,0 +1,176 @@
+我现在已经跑通了a2f，能够将音频转化成csv格式文件，我想让你帮我实现一个项目：用py写实现两个函数，文字转音频文件/音
+频文件转csv文件/csv文件转52个形态键/ 最后在暴露出一个文字输入的接口，输出52个形态键的数据
+python_services项目你可以作为参考，# Babylon.js + A2F 低延迟实时嘴型方案设计文档
+
+## 1. 文档目的
+
+本文档用于指导在 **Babylon.js（Web）** 环境下，基于 **Audio2Face（A2F）** 实现“尽可能低延迟”的数字人嘴型驱动方案。目标并非严格意义上的零延迟实时，而是在 Web 约束下实现 **准实时（400–600ms 首帧延迟）** 且稳定可上线的工程方案。
+
+---
+
+## 2. 设计约束与前提
+
+### 2.1 技术约束
+
+* A2F 本身为 **非严格流式模型**，需要一定音频前瞻（lookahead）
+* Babylon.js MorphTarget 为 **CPU 驱动 + GPU 顶点更新**，性能敏感
+
+
+
+## 3. 总体方案概述
+
+### 3.1 核心思想
+
+* **文本按句拆分**，缩短首帧等待时间
+* **句级流水线处理**，而非整段阻塞
+* **音频与嘴型数据流式推送**
+* 前端仅负责 **插值播放**，不做重计算
+
+### 3.2 总体架构
+
+```
+文本输入
+  ↓
+句子拆分（强停顿标点）
+  ↓
+句子队列（Pipeline）
+  ↓
+┌──────────────┐
+│ 流式 TTS     │
+└──────────────┘
+  ↓ PCM chunk
+┌──────────────┐
+│ A2F（句级）  │
+└──────────────┘
+  ↓ BlendShape Frames
+┌──────────────┐
+│ 二进制传输   │
+└──────────────┘
+  ↓
+Babylon.js 插值播放
+```
+
+---
+
+## 4. 文本拆句策略
+
+### 4.1 拆分规则
+
+* 仅按 **强停顿标点** 拆分：
+
+  * `。` `！` `？` `；`
+
+
+
+### 4.3 句尾处理
+
+* 每句音频结尾 **补 150–300ms 静音**
+* 用于嘴型自然回到 neutral
+
+---
+
+## 5. 后端处理流程
+
+### 5.1 流水线调度
+
+* 同时最多处理 **2–3 句**
+* 始终保证：
+
+  * 当前句播放中
+  * 下一句已 ready
+
+### 5.2 TTS 要求
+
+* 必须支持 **流式 / chunk 输出**
+* chunk 大小建议：100–200ms PCM
+
+### 5.3 A2F 调用策略
+
+* 不等待整段文本
+* 以 **句为最小单元**调用
+
+---
+
+## 6. 数据格式设计（替代 CSV）
+
+### 6.1 为什么不用 CSV
+
+* 文本解析慢
+* 数据冗余大
+* 不支持流式 append
+
+### 6.2 推荐二进制结构
+
+```
+Frame {
+  uint16 timestamp_ms;
+  uint8  shape_count;
+  uint8  shape_indices[shape_count];
+  int8   shape_values[shape_count]; // -127 ~ 127
+}
+```
+
+### 6.3 优点
+
+* 数据量减少 60–80%
+* WebSocket 直传
+* JS 解析成本极低
+
+---
+
+## 7. 前端（Babylon.js）播放方案
+
+### 7.1 核心原则
+
+* **不用 onBeforeRender 逐帧 setInfluence**
+* 使用 **Animation / AnimationGroup**
+* 前端只负责：
+
+  * buffer
+  * 时间对齐
+  * 插值
+
+### 7.2 帧率与形态键
+
+| 项目           | 建议        |
+| ------------ | --------- |
+| 嘴型帧率         | 15–20 fps |
+| 形态键          | 20–30 个   |
+| Morph Normal | 关闭        |
+
+### 7.3 句间过渡
+
+* 句尾：morph → neutral（100ms lerp）
+* 句首：neutral → first frame（100ms fade in）
+
+---
+
+## 8. 延迟评估
+
+| 环节        | 典型延迟       |
+| --------- | ---------- |
+| 流式 TTS    | 100–200 ms |
+| A2F 计算    | 200–300 ms |
+| 网络        | 20–50 ms   |
+| 前端 buffer | ~100 ms    |
+
+**总首帧延迟：≈ 400–600 ms**
+
+---
+
+## 9. 风险与边界
+
+* A2F 不适合 <200ms 的强实时场景
+* 高并发时需限流（TTS / A2F GPU）
+* 超长文本必须强制拆句
+
+---
+
+## 10. 结论
+
+* **拆句 + 流水线** 是 Web + A2F 的最优解
+* CSV 必须淘汰，二进制流是必选项
+* 在 Babylon.js 中可实现稳定、可上线的准实时数字人嘴型系统
+
+---
+正常流式传输逻辑是后端将文字拆分成句，每个句调用TTS生成音频，再调用A2F生成blendshape数据，最后将blendshape数据发送给前端。要有并发处理能力，能够同时处理多句。优先将当前句的blendshape数据发送给前端，等下一句blendshape数据 ready 后再发送。FLASK后端可以使用异步处理，用FlaskAPI库的asyncio支持。前端可以使用WebSocket接收blendshape数据，实时播放。
--- a/需求.txt
+++ b/需求.txt
@ -1,3 +0,0 @@
-我现在已经跑通了a2f，能够将音频转化成csv格式文件，我想让你帮我实现一个项目：用py写实现两个函数，文字转音频文件/音
-频文件转csv文件/csv文件转52个形态键/ 最后在暴露出一个文字输入的接口，输出52个形态键的数据
-python_services项目你可以作为参考，