llm.voiceChat

llm.voiceChat

Send voice messages to an external LLM service and receive voice responses.

The llm.voiceChat() function is a method returned by the useLLM hook. This method is used to initiate a voice conversation with the external Large Language Model (LLM) service. This takes in a audio input and returns the audio LLM response.

Syntax

const { audioUrl } = await llm.voiceChat(options: LLMVoiceChatOptions);
const { audioUrl } = await llm.voiceChat(options: LLMVoiceChatOptions);

Parameters

  • options: LLMVoiceChatOptions- An object that takes the following properties:

    • transcribeAudioUrl: string; (required) - A string specifying the base64 encoded audio data to transcribe.
    • transcribeLanguage?: string (optional) - A string specifying the language of the audio.
    • transcribePrompt?: string (optional) - A string specifying the prompt to be provided to the language model.
    • chatMessages?: OpenAIMessage[] (optional) - An array of message objects to start the conversation. Each message object should have a role (either "system", "user", or "assistant") and a content string.
    • chatTemplate?: string (optional) - A string defining a model's template to structure the conversation.
    • chatInputs?: object (optional) - An object containing additional inputs to the model.
    • speakModelId?: string (optional) - The ID of the model to be used for the TTS operation. Default is set to the latest model.
    • speechVoideId?: string (optional) - The ID of the voice to be used for the TTS operation. Default is set to the latest voice.
    • speechVoiceSettings?: { stability: number; similarity_boost: number } (optional) - Fine-tuning parameters for the voice.

Returns

A Promise that resolves to an object:

  • { audioUrl: string, messages: [object] }

    • audioUrl: string: A URL to the generated audio.

    • messages: [object]: Array containing a single object, which includes one message from the user and one message from the assistant. An example could be:

    messages: 
          [
            { role: "user", content: "Hello"},
            { role: "assistant", content: "Hello! How can I assist you today?" },
          ]
    messages: 
          [
            { role: "user", content: "Hello"},
            { role: "assistant", content: "Hello! How can I assist you today?" },
          ]

Example

Here's an example of how to use llm.voiceChat() in a React component (live demo):

"use client";
 
import useLLM, { OpenAIMessage } from "usellm";
import { useState } from "react";
 
export default function VoiceChat() {
  const [status, setStatus] = useState<Status>("idle");
  const [audioUrl, setAudioUrl] = useState<string | null>(null);
  const llm = useLLM({
    serviceUrl: "https://usellm.org/api/llm", // For testing only. Follow this guide to create your own service URL: https://usellm.org/docs/api-reference/create-llm-service
  });
  const [history, setHistory] = useState<OpenAIMessage[]>([
    {
      role: "system",
      content:
        "You're a voice chatbot powered by the ChatGPT API and developed using useLLM. Reply in a conversational tone, keep answers brief!",
    },
  ]);
 
  async function handleClick() {
    if (status === "idle") {
      setAudioUrl(null);
      await llm.record();
      setStatus("recording");
    } else if (status === "recording") {
      setStatus("thinking");
      const { audioUrl } = await llm.stopRecording();
      const { audioUrl: responseAudioUrl, messages } = await llm.voiceChat({
        transcribeAudioUrl: audioUrl,
        transcribeLanguage: "en",
        transcribePrompt: "",
        chatMessages: history,
      });
      setHistory([...history, ...messages]);
      setAudioUrl(responseAudioUrl);
      setStatus("idle");
    }
  }
 
  const Icon = status === "recording" ? Square : Mic;
 
  return (
    <div className="p-4 flex flex-col items-start overflow-y-scroll">
      <h2 className="font-semibold text-2xl">AI Voice Chat</h2>
      <button
        className="p-2 border rounded bg-gray-100 hover:bg-gray-200 active:bg-gray-300 dark:bg-white dark:text-black font-medium mt-4"
        onClick={handleClick}
      >
        <Icon />
      </button>
      {status !== "idle" && (
        <div className="text-center mt-4 text-lg">{capitalize(status)}...</div>
      )}
      {audioUrl && <audio autoPlay className="mt-4" controls src={audioUrl} />}
    </div>
  );
}
 
function capitalize(word: string) {
  return word.charAt(0).toUpperCase() + word.substring(1);
}
 
const Mic = () => (
  // you can also use an icon library like `react-icons` here
  <svg
    xmlns="http://www.w3.org/2000/svg"
    width="24"
    height="24"
    viewBox="0 0 24 24"
    fill="none"
    stroke="currentColor"
    strokeWidth={2}
    strokeLinecap="round"
    strokeLinejoin="round"
  >
    <path d="M12 2a3 3 0 0 0-3 3v7a3 3 0 0 0 6 0V5a3 3 0 0 0-3-3Z"></path>
    <path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
    <line x1="12" x2="12" y1="19" y2="22"></line>
  </svg>
);
 
const Square = () => (
  <svg
    xmlns="http://www.w3.org/2000/svg"
    width="24"
    height="24"
    viewBox="0 0 24 24"
    fill="none"
    stroke="currentColor"
    strokeWidth={2}
    strokeLinecap="round"
    strokeLinejoin="round"
  >
    <rect width="18" height="18" x="3" y="3" rx="2" ry="2"></rect>
  </svg>
);
 
type Status =
  | "idle"
  | "recording"
  | "transcribing"
  | "understanding"
  | "thinking"
  | "speaking";
"use client";
 
import useLLM, { OpenAIMessage } from "usellm";
import { useState } from "react";
 
export default function VoiceChat() {
  const [status, setStatus] = useState<Status>("idle");
  const [audioUrl, setAudioUrl] = useState<string | null>(null);
  const llm = useLLM({
    serviceUrl: "https://usellm.org/api/llm", // For testing only. Follow this guide to create your own service URL: https://usellm.org/docs/api-reference/create-llm-service
  });
  const [history, setHistory] = useState<OpenAIMessage[]>([
    {
      role: "system",
      content:
        "You're a voice chatbot powered by the ChatGPT API and developed using useLLM. Reply in a conversational tone, keep answers brief!",
    },
  ]);
 
  async function handleClick() {
    if (status === "idle") {
      setAudioUrl(null);
      await llm.record();
      setStatus("recording");
    } else if (status === "recording") {
      setStatus("thinking");
      const { audioUrl } = await llm.stopRecording();
      const { audioUrl: responseAudioUrl, messages } = await llm.voiceChat({
        transcribeAudioUrl: audioUrl,
        transcribeLanguage: "en",
        transcribePrompt: "",
        chatMessages: history,
      });
      setHistory([...history, ...messages]);
      setAudioUrl(responseAudioUrl);
      setStatus("idle");
    }
  }
 
  const Icon = status === "recording" ? Square : Mic;
 
  return (
    <div className="p-4 flex flex-col items-start overflow-y-scroll">
      <h2 className="font-semibold text-2xl">AI Voice Chat</h2>
      <button
        className="p-2 border rounded bg-gray-100 hover:bg-gray-200 active:bg-gray-300 dark:bg-white dark:text-black font-medium mt-4"
        onClick={handleClick}
      >
        <Icon />
      </button>
      {status !== "idle" && (
        <div className="text-center mt-4 text-lg">{capitalize(status)}...</div>
      )}
      {audioUrl && <audio autoPlay className="mt-4" controls src={audioUrl} />}
    </div>
  );
}
 
function capitalize(word: string) {
  return word.charAt(0).toUpperCase() + word.substring(1);
}
 
const Mic = () => (
  // you can also use an icon library like `react-icons` here
  <svg
    xmlns="http://www.w3.org/2000/svg"
    width="24"
    height="24"
    viewBox="0 0 24 24"
    fill="none"
    stroke="currentColor"
    strokeWidth={2}
    strokeLinecap="round"
    strokeLinejoin="round"
  >
    <path d="M12 2a3 3 0 0 0-3 3v7a3 3 0 0 0 6 0V5a3 3 0 0 0-3-3Z"></path>
    <path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
    <line x1="12" x2="12" y1="19" y2="22"></line>
  </svg>
);
 
const Square = () => (
  <svg
    xmlns="http://www.w3.org/2000/svg"
    width="24"
    height="24"
    viewBox="0 0 24 24"
    fill="none"
    stroke="currentColor"
    strokeWidth={2}
    strokeLinecap="round"
    strokeLinejoin="round"
  >
    <rect width="18" height="18" x="3" y="3" rx="2" ry="2"></rect>
  </svg>
);
 
type Status =
  | "idle"
  | "recording"
  | "transcribing"
  | "understanding"
  | "thinking"
  | "speaking";

In the above example, user starts recording his voice message by clicking on record icon. Then to stop the recording, user clicks on stop icon. The response can be listened to by the user. This is because the audio URL returned by llm.voiceChat() method is used as the source for the <audio> HTML element.