Skip to content

Latest commit

 

History

History
198 lines (150 loc) · 6.23 KB

text-to-speech-javascript.md

File metadata and controls

198 lines (150 loc) · 6.23 KB
ms.topic manager ms.service ms.date ms.reviewer ms.author author recommendations
include
nitinme
azure-ai-openai
09/12/2024
eur
eur
eric-urban
false

Source code | Package (npm) | Samples

Prerequisites

Microsoft Entra ID prerequisites

For the recommended keyless authentication with Microsoft Entra ID, you need to:

  • Install the Azure CLI used for keyless authentication with Microsoft Entra ID.
  • Assign the Cognitive Services User role to your user account. You can assign roles in the Azure portal under Access control (IAM) > Add role assignment.

Set up

  1. Create a new folder synthesis-quickstart and go to the quickstart folder with the following command:

    mkdir synthesis-quickstart && cd synthesis-quickstart
  2. Create the package.json with the following command:

    npm init -y
  3. Install the OpenAI client library for JavaScript with:

    npm install openai
  4. For the recommended passwordless authentication:

    npm install @azure/identity

Retrieve resource information

[!INCLUDE resource authentication]

Caution

To use the recommended keyless authentication with the SDK, make sure that the AZURE_OPENAI_API_KEY environment variable isn't set.

Create a speech file

  1. Create the index.js file with the following code:

    const { writeFile } = require("fs/promises");
    const { AzureOpenAI } = require("openai");
    const { DefaultAzureCredential, getBearerTokenProvider } = require("@azure/identity");
    require("openai/shims/node");
    
    // You will need to set these environment variables or edit the following values
    const endpoint = process.env.AZURE_OPENAI_ENDPOINT || "Your endpoint";
    const speechFilePath = "<path to save the speech file>";
    
    // Required Azure OpenAI deployment name and API version
    const deploymentName = process.env.AZURE_OPENAI_DEPLOYMENT_NAME || "tts";
    const apiVersion = process.env.OPENAI_API_VERSION || "2024-08-01-preview";
    
    // keyless authentication    
    const credential = new DefaultAzureCredential();
    const scope = "https://cognitiveservices.azure.com/.default";
    const azureADTokenProvider = getBearerTokenProvider(credential, scope);
    
    function getClient() {
      return new AzureOpenAI({
        endpoint,
        azureADTokenProvider,
        apiVersion,
        deployment: deploymentName,
      });
    }
    
    async function generateAudioStream(
      client,
      params
    ) {
      const response = await client.audio.speech.create(params);
      if (response.ok) return response.body;
      throw new Error(`Failed to generate audio stream: ${response.statusText}`);
    }
    export async function main() {
      console.log("== Text to Speech Sample ==");
    
      const client = getClient();
      const streamToRead = await generateAudioStream(client, {
        model: deploymentName,
        voice: "alloy",
        input: "the quick brown chicken jumped over the lazy dogs",
      });
    
      console.log(`Streaming response to ${speechFilePath}`);
      await writeFile(speechFilePath, streamToRead);
      console.log("Finished streaming");
    }
    
    main().catch((err) => {
      console.error("The sample encountered an error:", err);
    });
  2. Sign in to Azure with the following command:

    az login
  3. Run the JavaScript file.

    node index.js
  1. Create the index.js file with the following code:

    const { writeFile } = require("fs/promises");
    const { AzureOpenAI } = require("openai");
    require("openai/shims/node");
    
    // You will need to set these environment variables or edit the following values
    const endpoint = process.env.AZURE_OPENAI_ENDPOINT || "Your endpoint";
    const apiKey = process.env.AZURE_OPENAI_API_KEY || "Your API key";
    const speechFilePath = "<path to save the speech file>";
    
    // Required Azure OpenAI deployment name and API version
    const deploymentName = process.env.AZURE_OPENAI_DEPLOYMENT_NAME || "tts";
    const apiVersion = process.env.OPENAI_API_VERSION || "2024-08-01-preview";
    
    function getClient() {
      return new AzureOpenAI({
        endpoint,
        apiKey,
        apiVersion,
        deployment: deploymentName,
      });
    }
    
    async function generateAudioStream(
      client,
      params
    ) {
      const response = await client.audio.speech.create(params);
      if (response.ok) return response.body;
      throw new Error(`Failed to generate audio stream: ${response.statusText}`);
    }
    export async function main() {
      console.log("== Text to Speech Sample ==");
    
      const client = getClient();
      const streamToRead = await generateAudioStream(client, {
        model: deploymentName,
        voice: "alloy",
        input: "the quick brown chicken jumped over the lazy dogs",
      });
    
      console.log(`Streaming response to ${speechFilePath}`);
      await writeFile(speechFilePath, streamToRead);
      console.log("Finished streaming");
    }
    
    main().catch((err) => {
      console.error("The sample encountered an error:", err);
    });
  2. Run the JavaScript file.

    node index.js