diff --git a/cookbook/tools/elevenlabs_tools.py b/cookbook/tools/elevenlabs_tools.py index 5a2fd3e960..6f49da916a 100644 --- a/cookbook/tools/elevenlabs_tools.py +++ b/cookbook/tools/elevenlabs_tools.py @@ -30,5 +30,4 @@ audio_agent.print_response("Generate a very long audio of history of french revolution") - audio_agent.print_response("Generate a kick sound effect") diff --git a/libs/agno/agno/agent/agent.py b/libs/agno/agno/agent/agent.py index cbfa810a73..c1a7f3b6d7 100644 --- a/libs/agno/agno/agent/agent.py +++ b/libs/agno/agno/agent/agent.py @@ -536,11 +536,13 @@ def _run( if model_response_chunk.event == ModelResponseEvent.assistant_response.value: if model_response_chunk.content is not None and model_response.content is not None: model_response.content += model_response_chunk.content + # Update the run_response with the content self.run_response.content = model_response_chunk.content self.run_response.created_at = model_response_chunk.created_at yield self.create_run_response( - content=model_response_chunk.content, created_at=model_response_chunk.created_at + content=model_response_chunk.content, + created_at=model_response_chunk.created_at ) # If the model response is a tool_call_started, add the tool call to the run_response elif model_response_chunk.event == ModelResponseEvent.tool_call_started.value: @@ -1299,8 +1301,10 @@ def create_run_response( agent_id=self.agent_id, content=content, tools=self.run_response.tools, + audio=self.run_response.audio, images=self.run_response.images, videos=self.run_response.videos, + response_audio=self.run_response.response_audio, model=self.run_response.model, messages=self.run_response.messages, extra_data=self.run_response.extra_data, @@ -3325,6 +3329,7 @@ def print_response( if stream: _response_content: str = "" reasoning_steps: List[ReasoningStep] = [] + with Live(console=console) as live_log: status = Status("Thinking...", spinner="aesthetic", speed=0.4, refresh_per_second=10) live_log.update(status) @@ -3347,7 +3352,6 @@ def print_response( panels.append(message_panel) if render: live_log.update(Group(*panels)) - for resp in self.run( message=message, messages=messages, audio=audio, images=images, videos=videos, stream=True, **kwargs ): @@ -3356,7 +3360,6 @@ def print_response( _response_content += resp.content if resp.extra_data is not None and resp.extra_data.reasoning_steps is not None: reasoning_steps = resp.extra_data.reasoning_steps - response_content_stream: Union[str, Markdown] = _response_content # Escape special tags before markdown conversion if self.markdown: diff --git a/libs/agno/agno/models/base.py b/libs/agno/agno/models/base.py index e9120aa1fd..cd8b0b0d8f 100644 --- a/libs/agno/agno/models/base.py +++ b/libs/agno/agno/models/base.py @@ -462,8 +462,6 @@ def response_stream(self, messages: List[Message]) -> Iterator[ModelResponse]: # Handle tool calls if present if assistant_message.tool_calls is not None: - yield ModelResponse(content="\n\n") - # Prepare function calls function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(assistant_message, messages) function_call_results: List[Message] = [] diff --git a/libs/agno/agno/run/response.py b/libs/agno/agno/run/response.py index 7a20c3c89e..3b0829aa69 100644 --- a/libs/agno/agno/run/response.py +++ b/libs/agno/agno/run/response.py @@ -66,7 +66,7 @@ class RunResponse: tools: Optional[List[Dict[str, Any]]] = None images: Optional[List[ImageArtifact]] = None # Images attached to the response videos: Optional[List[VideoArtifact]] = None # Videos attached to the response - audio: Optional[List[AudioArtifact]] = None # AudioArtifact attached to the response + audio: Optional[List[AudioArtifact]] = None # Audio attached to the response response_audio: Optional[AudioOutput] = None # Model audio response extra_data: Optional[RunResponseExtraData] = None created_at: int = field(default_factory=lambda: int(time())) @@ -80,16 +80,17 @@ def to_dict(self) -> Dict[str, Any]: _dict["extra_data"] = self.extra_data.to_dict() if self.images is not None: - _dict["images"] = [img.model_dump() for img in self.images] + _dict["images"] = [img.model_dump(exclude_none=True) for img in self.images] if self.videos is not None: - _dict["videos"] = [vid.model_dump() for vid in self.videos] + _dict["videos"] = [vid.model_dump(exclude_none=True) for vid in self.videos] if self.audio is not None: - _dict["audio"] = [aud.model_dump() for aud in self.audio] + _dict["audio"] = [aud.model_dump(exclude_none=True) for aud in self.audio] if isinstance(self.content, BaseModel): _dict["content"] = self.content.model_dump(exclude_none=True) + return _dict def to_json(self) -> str: diff --git a/libs/agno/agno/tools/eleven_labs.py b/libs/agno/agno/tools/eleven_labs.py index 6b41b0ef6e..d12732822d 100644 --- a/libs/agno/agno/tools/eleven_labs.py +++ b/libs/agno/agno/tools/eleven_labs.py @@ -123,7 +123,7 @@ def generate_sound_effect(self, agent: Agent, prompt: str, duration_seconds: Opt Args: prompt (str): Text to generate audio from. - duration_seconds (Optional[float]): Duration in seconds to generate audio from. + duration_seconds (Optional[float]): Duration in seconds to generate audio from. Has to be between 0.5 and 22. Returns: str: Return the path to the generated audio file. """ @@ -155,7 +155,6 @@ def text_to_speech(self, agent: Agent, prompt: str) -> str: Args: prompt (str): Text to generate audio from. - voice_id (Optional[str]): The ID of the voice to use for audio generation. Uses default if none is specified. Returns: str: Return the path to the generated audio file. """