Server Py update

2025-03-29 21:18:19 -04:00
parent 5da627097d
commit 99a6c7d413
1 changed files with 73 additions and 52 deletions
@@ -52,12 +52,15 @@ manager = ConnectionManager()
 # Helper function to convert audio data
 async def decode_audio_data(audio_data: str) -> torch.Tensor:
    """Decode base64 audio data to a torch tensor"""
    try:
        # Decode base64 audio data
        binary_data = base64.b64decode(audio_data.split(',')[1] if ',' in audio_data else audio_data)
-    # Load audio from binary data
+        # Save to a temporary WAV file first
-    buf = BytesIO(binary_data)
+        temp_file = BytesIO(binary_data)
-    audio_tensor, sample_rate = torchaudio.load(buf)
+        
        # Load audio from binary data, explicitly specifying the format
        audio_tensor, sample_rate = torchaudio.load(temp_file, format="wav")
        # Resample if needed
        if sample_rate != generator.sample_rate:
@@ -70,6 +73,10 @@ async def decode_audio_data(audio_data: str) -> torch.Tensor:
            audio_tensor = audio_tensor.squeeze(0)
        return audio_tensor
    except Exception as e:
        print(f"Error decoding audio: {str(e)}")
        # Return a small silent audio segment as fallback
        return torch.zeros(generator.sample_rate // 2)  # 0.5 seconds of silence
 async def encode_audio_data(audio_tensor: torch.Tensor) -> str:
@@ -95,6 +102,7 @@ async def websocket_endpoint(websocket: WebSocket):
            action = request.get("action")
            if action == "generate":
                try:
                    text = request.get("text", "")
                    speaker_id = request.get("speaker", 0)
@@ -116,8 +124,15 @@ async def websocket_endpoint(websocket: WebSocket):
                        "type": "audio_response",
                        "audio": audio_base64
                    })
                except Exception as e:
                    print(f"Error generating audio: {str(e)}")
                    await websocket.send_json({
                        "type": "error",
                        "message": f"Error generating audio: {str(e)}"
                    })
            elif action == "add_to_context":
                try:
                    text = request.get("text", "")
                    speaker_id = request.get("speaker", 0)
                    audio_data = request.get("audio", "")
@@ -132,6 +147,12 @@ async def websocket_endpoint(websocket: WebSocket):
                        "type": "context_updated",
                        "message": "Audio added to context"
                    })
                except Exception as e:
                    print(f"Error adding to context: {str(e)}")
                    await websocket.send_json({
                        "type": "error",
                        "message": f"Error processing audio: {str(e)}"
                    })
            elif action == "clear_context":
                context_segments = []