🔀 Merge pull request #1403 from jovotech/v4/dev

🔖 Prepare latest release
jovotech · Aug 18, 2022 · fa7378c · fa7378c
2 parents 85f3bac + 9219790
commit fa7378c
Show file tree

Hide file tree

Showing 16 changed files with 379 additions and 9 deletions.
diff --git a/docs/tts.md b/docs/tts.md
@@ -27,7 +27,7 @@ Learn more about Jovo TTS integrations in the following sections:
 
 Currently, the following integrations are available with Jovo `v4`:
 
-- Polly TTS (_work in progress_)
+- [Polly TTS](https://www.jovo.tech/marketplace/tts-polly)
 
 ## Configuration
 
@@ -57,13 +57,13 @@ new PollyTts({
   // ...
   cache: new S3TtsCache({ /* ... */ }),
   fallbackLocale: 'en',
-  fileExtension: 'mp3',
+  outputFormat: 'mp3',
 }),
 ```
 
 - `cache`: Initialize a TTS Cache integration here to store converted audio files on a cloud service, for example AWS S3. See [custom TTS cache](#custom-tts-cache) for more information.
 - `fallbackLocale`: The locale that gets used for the creation of the audio files in case no locale can be found in the [request](./request.md).
-- `fileExtension`: The desired format of the resulting audio, for example `mp3`.
+- `outputFormat`: The desired format of the resulting audio, for example `mp3`.
 
 ## Custom Implementation
 

diff --git a/framework/src/plugins/TtsPlugin.ts b/framework/src/plugins/TtsPlugin.ts
@@ -18,7 +18,7 @@ export enum TtsTextType {
 export interface TtsPluginConfig extends PluginConfig {
   cache?: TtsCachePlugin;
   fallbackLocale: string;
-  fileExtension: string;
+  outputFormat: string;
 }
 
 // Provide basic functionality that will then be used by all TTS plugins
@@ -105,7 +105,7 @@ export abstract class TtsPlugin<
     let ttsResponse;
 
     if (this.config.cache) {
-      ttsResponse = await this.config.cache.getItem(audioKey, locale, this.config.fileExtension);
+      ttsResponse = await this.config.cache.getItem(audioKey, locale, this.config.outputFormat);
       if (ttsResponse) {
         if (!ttsResponse.text) {
           ttsResponse.text = text;
@@ -134,10 +134,8 @@ export abstract class TtsPlugin<
   private buildAudioTag(data?: TtsData): string | undefined {
     if (data?.url) {
       return SsmlUtilities.buildAudioTag(data.url);
-    } else if (data?.encodedAudio && data?.contentType) {
-      return SsmlUtilities.buildAudioTag(
-        AudioUtilities.buildBase64Uri(data.encodedAudio, data.contentType),
-      );
+    } else if (data?.encodedAudio) {
+      return SsmlUtilities.buildAudioTag(data.encodedAudio);
     }
   }
 

diff --git a/integrations/tts-polly/.npmignore b/integrations/tts-polly/.npmignore
@@ -0,0 +1,5 @@
+.idea
+node_modules
+npm-debug.log
+/src
+/test
diff --git a/integrations/tts-polly/CHANGELOG.md b/integrations/tts-polly/CHANGELOG.md
@@ -0,0 +1 @@
+# Changelog
diff --git a/integrations/tts-polly/README.md b/integrations/tts-polly/README.md
@@ -0,0 +1,20 @@
+# Jovo Polly TTS Integration
+
+[![Jovo Framework](https://www.jovo.tech/img/github-header.png)](https://www.jovo.tech)
+
+<p>
+<a href="https://www.jovo.tech" target="_blank">Website</a> -  <a href="https://www.jovo.tech/docs" target="_blank">Docs</a> - <a href="https://www.jovo.tech/marketplace" target="_blank">Marketplace</a> - <a href="https://github.com/jovotech/jovo-v4-template" target="_blank">Template</a>   
+</p>
+
+<p>
+<a href="https://www.npmjs.com/package/@jovotech/tts-polly" target="_blank"><img src="https://badge.fury.io/js/@jovotech%2Ftts-polly.svg"></a>      
+<a href="https://opencollective.com/jovo-framework" target="_blank"><img src="https://opencollective.com/jovo-framework/tiers/badge.svg"></a>
+</p>
+
+This package enables you to integrate your Jovo app with Amazon Polly TTS.
+
+```bash
+$ npm install @jovotech/tts-polly
+```
+
+> Learn more in the docs: https://www.jovo.tech/marketplace/tts-polly
diff --git a/integrations/tts-polly/docs/README.md b/integrations/tts-polly/docs/README.md
@@ -0,0 +1,133 @@
+---
+title: 'Amazon Polly TTS Integration'
+excerpt: 'Turn text into audio files with the Jovo Framework text to speech (TTS) integration for Amazon Polly.'
+---
+
+# Amazon Polly TTS Integration
+
+Turn text into audio files with the Jovo Framework text to speech (TTS) integration for Amazon Polly.
+
+## Introduction
+
+[Polly](https://aws.amazon.com/polly/) is a [text to speech (TTS)](https://www.jovo.tech/docs/tts) service that turns text into lifelike speech with dozens of voices across a broad set of languages.
+
+Learn more in the following sections:
+
+- [Installation](#installation)
+- [Configuration](#configuration)
+
+
+## Installation
+
+You can install the plugin like this:
+
+```sh
+$ npm install @jovotech/tts-polly
+```
+
+TTS plugins can be added to Jovo platform integrations. Here is an example how it can be added to the [Jovo Core Platform](https://www.jovo.tech/marketplace/server-lambda) in your `app.ts` [app configuration](https://www.jovo.tech/marketplace/platform-core):
+
+```typescript
+import { CorePlatform } from '@jovotech/platform-core';
+import { PollyTts } from '@jovotech/tts-polly';
+// ...
+
+app.configure({
+  plugins: [
+    new CorePlatform({
+      plugins: [new PollyTts()],
+    }),
+    // ...
+  ],
+});
+```
+
+If you are running your Jovo app on [AWS Lambda](https://www.jovo.tech/marketplace/server-lambda), there is no need to add configurations if you want to stick to the [default options](#configuration). For apps outside AWS Lambda, you need to add a `region` and `credentials` to the [`libraryConfig`](#libraryconfig) like this:
+
+```typescript
+new PollyTts({
+  libraryConfig: {
+    region: 'us-east-1',
+    credentials: {
+      accessKeyId: '<YOUR-ACCESS-KEY-ID>',
+      secretAccessKey: '<YOUR-SECRET-ACCESS-KEY>'
+    },
+    // ...
+  },
+  // ...
+}),
+```
+
+Learn more about all configurations in the [configuration section](#configuration).
+
+## Configuration
+
+The following configurations can be added:
+
+```typescript
+new PollyTts({
+  outputFormat: 'mp3',
+  fallbackLocale: 'en-US',
+  voiceId: 'Matthew',
+  sampleRate: '16000',
+  engine: 'standard',
+  lexiconNames: [],
+  languageCode: 'en-IN',
+  speechMarkTypes: [],
+  cache: new SampleTtsCache({/* ... */}),
+  libraryConfig: {
+    region: 'us-east-1',
+    // ...
+  }
+}),
+```
+
+- `outputFormat`: The format in which the returned output will be encoded. See [`outputFormat` Polly docs](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-polly/interfaces/synthesizespeechcommandinput.html#outputformat) for more information. Default: `mp3`.
+- `fallbackLocale`: Used as a fallback if the locale from Jovo is not found. Default: `en-US`.
+- `voiceId`: Voice ID to use for the synthesis. See [`voiceId` Polly docs](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-polly/interfaces/synthesizespeechcommandinput.html#voiceid) for more information. Default: `Matthew`.
+- `sampleRate`: The audio frequency specified in Hz. See [`sampleRate` Polly docs](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-polly/interfaces/synthesizespeechcommandinput.html#samplerate) for more information. Default: `16000`.
+- `engine`: Specifies the engine (standard or neural) for Amazon Polly to use when processing input text for speech synthesis. See [`engine` Polly docs](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-polly/interfaces/synthesizespeechcommandinput.html#engine) for more information. Default: `standard`.
+- `lexiconNames`: List of one or more pronunciation lexicon names you want the service to apply during synthesis. See [`lexiconNames` Polly docs](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-polly/interfaces/synthesizespeechcommandinput.html#lexiconnames) for more information. Optional.
+- `languageCode`: Language code for the Synthesize Speech request. This is only necessary if using a bilingual voice, such as Aditi, which can be used for either Indian English (en-IN) or Hindi (hi-IN). See [`languageCode` Polly docs](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-polly/interfaces/synthesizespeechcommandinput.html#languagecode) for more information. Optional.
+- `speechMarkTypes`: The type of speech marks returned for the input text. See [`speechMarkTypes` Polly docs](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-polly/interfaces/synthesizespeechcommandinput.html#speechmarktypes) for more information. Optional.
+- `cache`: [TTS Cache](#tts-cache) integration. Optional.
+- [`libraryConfig`](#libraryconfig): [`PollyClientConfig` object](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-polly/interfaces/pollyclientconfig.html) that is passed to the Polly client. Use this for configurations like `region` or `credentials`. Optional.
+
+### TTS Cache
+
+Without a TTS cache, each time text is passed to Polly, you will incur the cost and time of generating the TTS response.
+
+Use a TTS cache to reduce costs and save time.
+
+See [TTS](https://www.jovo.tech/docs/tts) for more information and a list of TTS cache implementations.
+
+
+### libraryConfig
+
+The `libraryConfig` property can be used to pass configurations to the AWS Polly SDK that is used by this integration.
+
+```typescript
+new PollyTts({
+  libraryConfig: { /* ... */ },
+  // ...
+}),
+```
+
+You can learn more about all config options in the [official `PollyClientConfig` reference](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-polly/interfaces/pollyclientconfig.html).
+
+For example, you can add a `region` and `credentials` like shown below. This is necessary if you are hosting your Jovo app outside of an AWS environment.
+
+```typescript
+new PollyTts({
+  libraryConfig: {
+    region: 'us-east-1',
+    credentials: {
+      accessKeyId: '<YOUR-ACCESS-KEY-ID>',
+      secretAccessKey: '<YOUR-SECRET-ACCESS-KEY>'
+    },
+    // ...
+  },
+  // ...
+}),
+```
+
diff --git a/integrations/tts-polly/jest.config.js b/integrations/tts-polly/jest.config.js
@@ -0,0 +1 @@
+module.exports = require('../../jest.config');
diff --git a/integrations/tts-polly/package.json b/integrations/tts-polly/package.json
@@ -0,0 +1,50 @@
+{
+  "name": "@jovotech/tts-polly",
+  "version": "4.2.19",
+  "description": "Polly TTS Integration",
+  "main": "dist/cjs/index.js",
+  "module": "dist/esm5/index.js",
+  "es2015": "dist/esm2015/index.js",
+  "types": "dist/types/index.d.ts",
+  "sideEffects": false,
+  "files": [
+    "dist"
+  ],
+  "scripts": {
+    "prebuild": "rimraf dist",
+    "build": "tsc -b tsconfig.build.cjs.json tsconfig.build.esm5.json tsconfig.build.esm2015.json tsconfig.build.types.json",
+    "watch": "tsc -b tsconfig.build.cjs.json tsconfig.build.esm5.json tsconfig.build.esm2015.json tsconfig.build.types.json --watch",
+    "prettier": "prettier -w -l src test package.json",
+    "eslint": "eslint src test --fix --ext .ts",
+    "rimraf": "rimraf dist",
+    "test": "jest --runInBand"
+  },
+  "author": "jovotech",
+  "license": "Apache-2.0",
+  "dependencies": {
+    "@aws-sdk/client-polly": "^3.100.0"
+  },
+  "devDependencies": {
+    "@jovotech/framework": "^4.2.19",
+    "@types/jest": "^26.0.20",
+    "@types/node": "^12.20.37",
+    "@typescript-eslint/eslint-plugin": "^4.12.0",
+    "@typescript-eslint/parser": "^4.12.0",
+    "eslint": "^7.17.0",
+    "eslint-config-prettier": "^7.1.0",
+    "eslint-plugin-prettier": "^3.3.1",
+    "jest": "^27.3.1",
+    "prettier": "^2.4.1",
+    "rimraf": "^3.0.2",
+    "ts-jest": "^27.0.7",
+    "typescript": "~4.4.4"
+  },
+  "peerDependencies": {
+    "@jovotech/framework": "4.2.19"
+  },
+  "gitHead": "5a9ba2fe3100f807f627f55117e7b2ad4bbce9c1",
+  "prettier": "../../.prettierrc.js",
+  "publishConfig": {
+    "access": "public"
+  }
+}
diff --git a/integrations/tts-polly/src/PollyTts.ts b/integrations/tts-polly/src/PollyTts.ts
@@ -0,0 +1,107 @@
+import {
+  PollyClient,
+  PollyClientConfig,
+  SynthesizeSpeechCommand,
+  SynthesizeSpeechCommandInput,
+} from '@aws-sdk/client-polly';
+
+import { Readable } from 'stream';
+
+import {
+  TtsPluginConfig,
+  TtsPlugin,
+  Jovo,
+  AudioUtilities,
+  TtsTextType,
+  TtsData,
+  DeepPartial,
+} from '@jovotech/framework';
+
+export interface PollyTtsConfig extends TtsPluginConfig {
+  lexiconNames?: string[];
+  voiceId: string;
+  sampleRate: string;
+  languageCode?: string;
+  speechMarkTypes?: string[];
+  engine: string;
+  libraryConfig?: PollyClientConfig; // @see https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-polly/interfaces/pollyclientconfig.html
+}
+
+export type PollyTtsInitConfig = DeepPartial<PollyTtsConfig>;
+
+export class PollyTts extends TtsPlugin<PollyTtsConfig> {
+  readonly client: PollyClient;
+  supportedSsmlTags: string[] = [
+    'break',
+    'emphasis',
+    'lang',
+    'mark',
+    'p',
+    'phoneme',
+    'prosody',
+    's',
+    'say-as',
+    'speak',
+    'sub',
+    'w',
+    'amazon:breath',
+    'amazon:domain',
+    'amazon:effect',
+  ];
+
+  constructor(config?: PollyTtsInitConfig) {
+    super(config);
+
+    this.client = new PollyClient({
+      ...this.config.libraryConfig
+    });
+  }
+
+  getDefaultConfig(): PollyTtsConfig {
+    return {
+      outputFormat: 'mp3',
+      voiceId: 'Matthew',
+      sampleRate: '16000',
+      engine: 'standard',
+      fallbackLocale: 'en-US',
+    };
+  }
+
+  getKeyPrefix(): string | undefined {
+    return `polly-${this.config.voiceId.toLowerCase()}`;
+  }
+
+  async processTts(jovo: Jovo, text: string, textType: TtsTextType): Promise<TtsData | undefined> {
+    const params: SynthesizeSpeechCommandInput = {
+      Text: text,
+      TextType: textType,
+      OutputFormat: this.config.outputFormat,
+      VoiceId: this.config.voiceId,
+      SampleRate: this.config.sampleRate,
+      LanguageCode: this.config.languageCode,
+      SpeechMarkTypes: this.config.speechMarkTypes,
+      Engine: this.config.engine,
+      LexiconNames: this.config.lexiconNames,
+    };
+
+    const command = new SynthesizeSpeechCommand(params);
+
+    try {
+      const response = await this.client.send(command);
+      if (!response.AudioStream) {
+        return;
+      }
+
+      const result: TtsData = {
+        contentType: response.ContentType,
+        text,
+        fileExtension: this.config.outputFormat,
+        encodedAudio: await AudioUtilities.getBase64Audio(response.AudioStream as Readable),
+      };
+      return result;
+    } catch (error) {
+      console.log((error as Error).message);
+    }
+    return;
+  }
+}
diff --git a/integrations/tts-polly/src/index.ts b/integrations/tts-polly/src/index.ts
@@ -0,0 +1 @@
+export * from './PollyTts';
diff --git a/integrations/tts-polly/test/dummy.test.ts b/integrations/tts-polly/test/dummy.test.ts
@@ -0,0 +1,3 @@
+test('dummy test', () => {
+  expect(true).toBe(true);
+});
diff --git a/integrations/tts-polly/tsconfig.build.cjs.json b/integrations/tts-polly/tsconfig.build.cjs.json
@@ -0,0 +1,10 @@
+{
+  "extends": "../../tsconfig.build.json",
+  "compilerOptions": {
+    "outDir": "dist/cjs",
+    "module": "commonjs"
+  },
+  "include": [
+    "src"
+  ]
+}