diff --git a/docs/tts.md b/docs/tts.md index 953ae5d0fe..1f7b7356aa 100644 --- a/docs/tts.md +++ b/docs/tts.md @@ -27,7 +27,7 @@ Learn more about Jovo TTS integrations in the following sections: Currently, the following integrations are available with Jovo `v4`: -- Polly TTS (_work in progress_) +- [Polly TTS](https://www.jovo.tech/marketplace/tts-polly) ## Configuration @@ -57,13 +57,13 @@ new PollyTts({ // ... cache: new S3TtsCache({ /* ... */ }), fallbackLocale: 'en', - fileExtension: 'mp3', + outputFormat: 'mp3', }), ``` - `cache`: Initialize a TTS Cache integration here to store converted audio files on a cloud service, for example AWS S3. See [custom TTS cache](#custom-tts-cache) for more information. - `fallbackLocale`: The locale that gets used for the creation of the audio files in case no locale can be found in the [request](./request.md). -- `fileExtension`: The desired format of the resulting audio, for example `mp3`. +- `outputFormat`: The desired format of the resulting audio, for example `mp3`. ## Custom Implementation diff --git a/framework/src/plugins/TtsPlugin.ts b/framework/src/plugins/TtsPlugin.ts index ea8abbf4b3..cc8c0d62e5 100644 --- a/framework/src/plugins/TtsPlugin.ts +++ b/framework/src/plugins/TtsPlugin.ts @@ -18,7 +18,7 @@ export enum TtsTextType { export interface TtsPluginConfig extends PluginConfig { cache?: TtsCachePlugin; fallbackLocale: string; - fileExtension: string; + outputFormat: string; } // Provide basic functionality that will then be used by all TTS plugins @@ -105,7 +105,7 @@ export abstract class TtsPlugin< let ttsResponse; if (this.config.cache) { - ttsResponse = await this.config.cache.getItem(audioKey, locale, this.config.fileExtension); + ttsResponse = await this.config.cache.getItem(audioKey, locale, this.config.outputFormat); if (ttsResponse) { if (!ttsResponse.text) { ttsResponse.text = text; @@ -134,10 +134,8 @@ export abstract class TtsPlugin< private buildAudioTag(data?: TtsData): string | undefined { if (data?.url) { return SsmlUtilities.buildAudioTag(data.url); - } else if (data?.encodedAudio && data?.contentType) { - return SsmlUtilities.buildAudioTag( - AudioUtilities.buildBase64Uri(data.encodedAudio, data.contentType), - ); + } else if (data?.encodedAudio) { + return SsmlUtilities.buildAudioTag(data.encodedAudio); } } diff --git a/integrations/tts-polly/.npmignore b/integrations/tts-polly/.npmignore new file mode 100644 index 0000000000..c7fd7297eb --- /dev/null +++ b/integrations/tts-polly/.npmignore @@ -0,0 +1,5 @@ +.idea +node_modules +npm-debug.log +/src +/test diff --git a/integrations/tts-polly/CHANGELOG.md b/integrations/tts-polly/CHANGELOG.md new file mode 100644 index 0000000000..825c32f0d0 --- /dev/null +++ b/integrations/tts-polly/CHANGELOG.md @@ -0,0 +1 @@ +# Changelog diff --git a/integrations/tts-polly/README.md b/integrations/tts-polly/README.md new file mode 100644 index 0000000000..652644c389 --- /dev/null +++ b/integrations/tts-polly/README.md @@ -0,0 +1,20 @@ +# Jovo Polly TTS Integration + +[![Jovo Framework](https://www.jovo.tech/img/github-header.png)](https://www.jovo.tech) + +

+Website - Docs - Marketplace - Template +

+ +

+ + +

+ +This package enables you to integrate your Jovo app with Amazon Polly TTS. + +```bash +$ npm install @jovotech/tts-polly +``` + +> Learn more in the docs: https://www.jovo.tech/marketplace/tts-polly diff --git a/integrations/tts-polly/docs/README.md b/integrations/tts-polly/docs/README.md new file mode 100644 index 0000000000..1f94d822a7 --- /dev/null +++ b/integrations/tts-polly/docs/README.md @@ -0,0 +1,133 @@ +--- +title: 'Amazon Polly TTS Integration' +excerpt: 'Turn text into audio files with the Jovo Framework text to speech (TTS) integration for Amazon Polly.' +--- + +# Amazon Polly TTS Integration + +Turn text into audio files with the Jovo Framework text to speech (TTS) integration for Amazon Polly. + +## Introduction + +[Polly](https://aws.amazon.com/polly/) is a [text to speech (TTS)](https://www.jovo.tech/docs/tts) service that turns text into lifelike speech with dozens of voices across a broad set of languages. + +Learn more in the following sections: + +- [Installation](#installation) +- [Configuration](#configuration) + + +## Installation + +You can install the plugin like this: + +```sh +$ npm install @jovotech/tts-polly +``` + +TTS plugins can be added to Jovo platform integrations. Here is an example how it can be added to the [Jovo Core Platform](https://www.jovo.tech/marketplace/server-lambda) in your `app.ts` [app configuration](https://www.jovo.tech/marketplace/platform-core): + +```typescript +import { CorePlatform } from '@jovotech/platform-core'; +import { PollyTts } from '@jovotech/tts-polly'; +// ... + +app.configure({ + plugins: [ + new CorePlatform({ + plugins: [new PollyTts()], + }), + // ... + ], +}); +``` + +If you are running your Jovo app on [AWS Lambda](https://www.jovo.tech/marketplace/server-lambda), there is no need to add configurations if you want to stick to the [default options](#configuration). For apps outside AWS Lambda, you need to add a `region` and `credentials` to the [`libraryConfig`](#libraryconfig) like this: + +```typescript +new PollyTts({ + libraryConfig: { + region: 'us-east-1', + credentials: { + accessKeyId: '', + secretAccessKey: '' + }, + // ... + }, + // ... +}), +``` + +Learn more about all configurations in the [configuration section](#configuration). + +## Configuration + +The following configurations can be added: + +```typescript +new PollyTts({ + outputFormat: 'mp3', + fallbackLocale: 'en-US', + voiceId: 'Matthew', + sampleRate: '16000', + engine: 'standard', + lexiconNames: [], + languageCode: 'en-IN', + speechMarkTypes: [], + cache: new SampleTtsCache({/* ... */}), + libraryConfig: { + region: 'us-east-1', + // ... + } +}), +``` + +- `outputFormat`: The format in which the returned output will be encoded. See [`outputFormat` Polly docs](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-polly/interfaces/synthesizespeechcommandinput.html#outputformat) for more information. Default: `mp3`. +- `fallbackLocale`: Used as a fallback if the locale from Jovo is not found. Default: `en-US`. +- `voiceId`: Voice ID to use for the synthesis. See [`voiceId` Polly docs](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-polly/interfaces/synthesizespeechcommandinput.html#voiceid) for more information. Default: `Matthew`. +- `sampleRate`: The audio frequency specified in Hz. See [`sampleRate` Polly docs](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-polly/interfaces/synthesizespeechcommandinput.html#samplerate) for more information. Default: `16000`. +- `engine`: Specifies the engine (standard or neural) for Amazon Polly to use when processing input text for speech synthesis. See [`engine` Polly docs](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-polly/interfaces/synthesizespeechcommandinput.html#engine) for more information. Default: `standard`. +- `lexiconNames`: List of one or more pronunciation lexicon names you want the service to apply during synthesis. See [`lexiconNames` Polly docs](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-polly/interfaces/synthesizespeechcommandinput.html#lexiconnames) for more information. Optional. +- `languageCode`: Language code for the Synthesize Speech request. This is only necessary if using a bilingual voice, such as Aditi, which can be used for either Indian English (en-IN) or Hindi (hi-IN). See [`languageCode` Polly docs](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-polly/interfaces/synthesizespeechcommandinput.html#languagecode) for more information. Optional. +- `speechMarkTypes`: The type of speech marks returned for the input text. See [`speechMarkTypes` Polly docs](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-polly/interfaces/synthesizespeechcommandinput.html#speechmarktypes) for more information. Optional. +- `cache`: [TTS Cache](#tts-cache) integration. Optional. +- [`libraryConfig`](#libraryconfig): [`PollyClientConfig` object](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-polly/interfaces/pollyclientconfig.html) that is passed to the Polly client. Use this for configurations like `region` or `credentials`. Optional. + +### TTS Cache + +Without a TTS cache, each time text is passed to Polly, you will incur the cost and time of generating the TTS response. + +Use a TTS cache to reduce costs and save time. + +See [TTS](https://www.jovo.tech/docs/tts) for more information and a list of TTS cache implementations. + + +### libraryConfig + +The `libraryConfig` property can be used to pass configurations to the AWS Polly SDK that is used by this integration. + +```typescript +new PollyTts({ + libraryConfig: { /* ... */ }, + // ... +}), +``` + +You can learn more about all config options in the [official `PollyClientConfig` reference](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-polly/interfaces/pollyclientconfig.html). + +For example, you can add a `region` and `credentials` like shown below. This is necessary if you are hosting your Jovo app outside of an AWS environment. + +```typescript +new PollyTts({ + libraryConfig: { + region: 'us-east-1', + credentials: { + accessKeyId: '', + secretAccessKey: '' + }, + // ... + }, + // ... +}), +``` + diff --git a/integrations/tts-polly/jest.config.js b/integrations/tts-polly/jest.config.js new file mode 100644 index 0000000000..990bd44280 --- /dev/null +++ b/integrations/tts-polly/jest.config.js @@ -0,0 +1 @@ +module.exports = require('../../jest.config'); diff --git a/integrations/tts-polly/package.json b/integrations/tts-polly/package.json new file mode 100644 index 0000000000..76064c4a80 --- /dev/null +++ b/integrations/tts-polly/package.json @@ -0,0 +1,50 @@ +{ + "name": "@jovotech/tts-polly", + "version": "4.2.19", + "description": "Polly TTS Integration", + "main": "dist/cjs/index.js", + "module": "dist/esm5/index.js", + "es2015": "dist/esm2015/index.js", + "types": "dist/types/index.d.ts", + "sideEffects": false, + "files": [ + "dist" + ], + "scripts": { + "prebuild": "rimraf dist", + "build": "tsc -b tsconfig.build.cjs.json tsconfig.build.esm5.json tsconfig.build.esm2015.json tsconfig.build.types.json", + "watch": "tsc -b tsconfig.build.cjs.json tsconfig.build.esm5.json tsconfig.build.esm2015.json tsconfig.build.types.json --watch", + "prettier": "prettier -w -l src test package.json", + "eslint": "eslint src test --fix --ext .ts", + "rimraf": "rimraf dist", + "test": "jest --runInBand" + }, + "author": "jovotech", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/client-polly": "^3.100.0" + }, + "devDependencies": { + "@jovotech/framework": "^4.2.19", + "@types/jest": "^26.0.20", + "@types/node": "^12.20.37", + "@typescript-eslint/eslint-plugin": "^4.12.0", + "@typescript-eslint/parser": "^4.12.0", + "eslint": "^7.17.0", + "eslint-config-prettier": "^7.1.0", + "eslint-plugin-prettier": "^3.3.1", + "jest": "^27.3.1", + "prettier": "^2.4.1", + "rimraf": "^3.0.2", + "ts-jest": "^27.0.7", + "typescript": "~4.4.4" + }, + "peerDependencies": { + "@jovotech/framework": "4.2.19" + }, + "gitHead": "5a9ba2fe3100f807f627f55117e7b2ad4bbce9c1", + "prettier": "../../.prettierrc.js", + "publishConfig": { + "access": "public" + } +} diff --git a/integrations/tts-polly/src/PollyTts.ts b/integrations/tts-polly/src/PollyTts.ts new file mode 100644 index 0000000000..b9fd1c7c65 --- /dev/null +++ b/integrations/tts-polly/src/PollyTts.ts @@ -0,0 +1,107 @@ +import { + PollyClient, + PollyClientConfig, + SynthesizeSpeechCommand, + SynthesizeSpeechCommandInput, +} from '@aws-sdk/client-polly'; + +import { Readable } from 'stream'; + +import { + TtsPluginConfig, + TtsPlugin, + Jovo, + AudioUtilities, + TtsTextType, + TtsData, + DeepPartial, +} from '@jovotech/framework'; + +export interface PollyTtsConfig extends TtsPluginConfig { + lexiconNames?: string[]; + voiceId: string; + sampleRate: string; + languageCode?: string; + speechMarkTypes?: string[]; + engine: string; + libraryConfig?: PollyClientConfig; // @see https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-polly/interfaces/pollyclientconfig.html +} + +export type PollyTtsInitConfig = DeepPartial; + +export class PollyTts extends TtsPlugin { + readonly client: PollyClient; + supportedSsmlTags: string[] = [ + 'break', + 'emphasis', + 'lang', + 'mark', + 'p', + 'phoneme', + 'prosody', + 's', + 'say-as', + 'speak', + 'sub', + 'w', + 'amazon:breath', + 'amazon:domain', + 'amazon:effect', + ]; + + constructor(config?: PollyTtsInitConfig) { + super(config); + + this.client = new PollyClient({ + ...this.config.libraryConfig + }); + } + + getDefaultConfig(): PollyTtsConfig { + return { + outputFormat: 'mp3', + voiceId: 'Matthew', + sampleRate: '16000', + engine: 'standard', + fallbackLocale: 'en-US', + }; + } + + getKeyPrefix(): string | undefined { + return `polly-${this.config.voiceId.toLowerCase()}`; + } + + async processTts(jovo: Jovo, text: string, textType: TtsTextType): Promise { + const params: SynthesizeSpeechCommandInput = { + Text: text, + TextType: textType, + OutputFormat: this.config.outputFormat, + VoiceId: this.config.voiceId, + SampleRate: this.config.sampleRate, + LanguageCode: this.config.languageCode, + SpeechMarkTypes: this.config.speechMarkTypes, + Engine: this.config.engine, + LexiconNames: this.config.lexiconNames, + }; + + const command = new SynthesizeSpeechCommand(params); + + try { + const response = await this.client.send(command); + if (!response.AudioStream) { + return; + } + + const result: TtsData = { + contentType: response.ContentType, + text, + fileExtension: this.config.outputFormat, + encodedAudio: await AudioUtilities.getBase64Audio(response.AudioStream as Readable), + }; + return result; + } catch (error) { + console.log((error as Error).message); + } + return; + } +} diff --git a/integrations/tts-polly/src/index.ts b/integrations/tts-polly/src/index.ts new file mode 100644 index 0000000000..9b83c11605 --- /dev/null +++ b/integrations/tts-polly/src/index.ts @@ -0,0 +1 @@ +export * from './PollyTts'; diff --git a/integrations/tts-polly/test/dummy.test.ts b/integrations/tts-polly/test/dummy.test.ts new file mode 100644 index 0000000000..8ca53d8876 --- /dev/null +++ b/integrations/tts-polly/test/dummy.test.ts @@ -0,0 +1,3 @@ +test('dummy test', () => { + expect(true).toBe(true); +}); diff --git a/integrations/tts-polly/tsconfig.build.cjs.json b/integrations/tts-polly/tsconfig.build.cjs.json new file mode 100644 index 0000000000..1ea595949d --- /dev/null +++ b/integrations/tts-polly/tsconfig.build.cjs.json @@ -0,0 +1,10 @@ +{ + "extends": "../../tsconfig.build.json", + "compilerOptions": { + "outDir": "dist/cjs", + "module": "commonjs" + }, + "include": [ + "src" + ] +} \ No newline at end of file diff --git a/integrations/tts-polly/tsconfig.build.esm2015.json b/integrations/tts-polly/tsconfig.build.esm2015.json new file mode 100644 index 0000000000..a1cc657c06 --- /dev/null +++ b/integrations/tts-polly/tsconfig.build.esm2015.json @@ -0,0 +1,10 @@ +{ + "extends": "../../tsconfig.build.json", + "compilerOptions": { + "outDir": "dist/esm2015", + "module": "es2015" + }, + "include": [ + "src" + ] +} \ No newline at end of file diff --git a/integrations/tts-polly/tsconfig.build.esm5.json b/integrations/tts-polly/tsconfig.build.esm5.json new file mode 100644 index 0000000000..7fda8d8cf2 --- /dev/null +++ b/integrations/tts-polly/tsconfig.build.esm5.json @@ -0,0 +1,11 @@ +{ + "extends": "../../tsconfig.build.json", + "compilerOptions": { + "outDir": "dist/esm5", + "module": "es2015", + "target": "es5" + }, + "include": [ + "src" + ] +} \ No newline at end of file diff --git a/integrations/tts-polly/tsconfig.build.types.json b/integrations/tts-polly/tsconfig.build.types.json new file mode 100644 index 0000000000..9c42be2002 --- /dev/null +++ b/integrations/tts-polly/tsconfig.build.types.json @@ -0,0 +1,11 @@ +{ + "extends": "../../tsconfig.build.json", + "compilerOptions": { + "outDir": "dist/types", + "declaration": true, + "emitDeclarationOnly": true + }, + "include": [ + "src" + ] +} \ No newline at end of file diff --git a/integrations/tts-polly/tsconfig.json b/integrations/tts-polly/tsconfig.json new file mode 100644 index 0000000000..402616d3f9 --- /dev/null +++ b/integrations/tts-polly/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "outDir": "./dist/cjs" + }, + "include": [ + "src" + ] +}