From 5e76aace293ae77c74e273e143e750381f9c439e Mon Sep 17 00:00:00 2001 From: Raylan LIN Date: Fri, 17 Apr 2026 18:39:09 +0800 Subject: [PATCH 1/2] fix(speech): correctly enable and download subtitles for TTS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix request parameter: `subtitle` → `subtitle_enable` per official API spec - Fix response parsing: API returns `data.subtitle_file` (URL), not inline `subtitle_info` - Download subtitle JSON from URL and convert to SRT format alongside audio file - Remove dead `SubtitleInfo` type from api.ts - Add dry-run test for `--subtitles` flag - Update skill documentation with subtitle usage examples Fixes #102 --- skill/SKILL.md | 5 ++- src/commands/speech/synthesize.ts | 51 ++++++++++++++++++++++++- src/types/api.ts | 12 +----- test/commands/speech/synthesize.test.ts | 43 +++++++++++++++++++++ 4 files changed, 99 insertions(+), 12 deletions(-) diff --git a/skill/SKILL.md b/skill/SKILL.md index c128e19..cf6af88 100644 --- a/skill/SKILL.md +++ b/skill/SKILL.md @@ -178,7 +178,7 @@ mmx speech synthesize --text [flags] | `--bitrate ` | number | Bitrate (default: 128000) | | `--channels ` | number | Audio channels (default: 1) | | `--language ` | string | Language boost | -| `--subtitles` | boolean | Include subtitle timing data | +| `--subtitles` | boolean | Download and save subtitles as `.srt` file (alongside `--out` audio file). API must support subtitles for the selected model. | `--pronunciation ` | string, repeatable | Custom pronunciation | | `--sound-effect ` | string | Add sound effect | | `--out ` | string | Save audio to file | @@ -188,6 +188,9 @@ mmx speech synthesize --text [flags] mmx speech synthesize --text "Hello world" --out hello.mp3 --quiet # stdout: hello.mp3 +mmx speech synthesize --text "Hello" --subtitles --out hello.mp3 +# saves hello.mp3 + hello.srt (SRT subtitle file) + echo "Breaking news." | mmx speech synthesize --text-file - --out news.mp3 ``` diff --git a/src/commands/speech/synthesize.ts b/src/commands/speech/synthesize.ts index f5f3630..756eff0 100644 --- a/src/commands/speech/synthesize.ts +++ b/src/commands/speech/synthesize.ts @@ -6,6 +6,7 @@ import { speechEndpoint } from '../../client/endpoints'; import { parseSSE } from '../../client/stream'; import { detectOutputFormat, formatOutput } from '../../output/formatter'; import { saveAudioOutput } from '../../output/audio'; +import { writeFileSync } from 'fs'; import { readTextFromPathOrStdin } from '../../utils/fs'; import type { Config } from '../../config/schema'; import type { GlobalFlags } from '../../types/flags'; @@ -37,6 +38,7 @@ export default defineCommand({ examples: [ 'mmx speech synthesize --text "Hello, world!"', 'mmx speech synthesize --text "Hello, world!" --out hello.mp3', + 'mmx speech synthesize --text "Hello" --subtitles --out hello.mp3', 'echo "Breaking news." | mmx speech synthesize --text-file - --out news.mp3', 'mmx speech synthesize --text "Stream" --stream | mpv --no-terminal -', ], @@ -85,7 +87,7 @@ export default defineCommand({ }; if (flags.language) body.language_boost = flags.language as string; - if (flags.subtitles) body.subtitle = true; + if (flags.subtitles) body.subtitle_enable = true; // Correct API parameter name if (flags.pronunciation) { body.pronunciation_dict = (flags.pronunciation as string[]).map(p => { @@ -122,5 +124,52 @@ export default defineCommand({ if (!config.quiet) process.stderr.write(`[Model: ${model}]\n`); saveAudioOutput(response, outPath, format, config.quiet); + + // Download and save subtitle file when --subtitles is requested + if (flags.subtitles && response.data.subtitle_file) { + try { + // Download the subtitle JSON file from the URL + const subtitleRes = await fetch(response.data.subtitle_file); + if (!subtitleRes.ok) { + throw new CLIError(`Failed to download subtitle file: ${subtitleRes.status}`, ExitCode.GENERAL); + } + // API returns a flat array, not { subtitles: [...] } + const subtitleArray = await subtitleRes.json() as Array<{ text: string; time_begin: number; time_end: number }>; + + if (subtitleArray?.length) { + // Convert to SRT format (API returns time in milliseconds) + const subtitlePath = outPath.replace(/\.[^.]+$/, '') + '.srt'; + const srtContent = subtitleArray + .map((s, i) => { + // API already returns milliseconds, use directly + const fmt = (ms: number) => { + const h = String(Math.floor(ms / 3600000)).padStart(2, '0'); + const m = String(Math.floor((ms % 3600000) / 60000)).padStart(2, '0'); + const sec = String(Math.floor((ms % 60000) / 1000)).padStart(2, '0'); + const mil = String(Math.round(ms % 1000)).padStart(3, '0'); + return `${h}:${m}:${sec},${mil}`; + }; + return `${i + 1}\n${fmt(s.time_begin)} --> ${fmt(s.time_end)}\n${s.text}`; + }) + .join('\n\n'); + writeFileSync(subtitlePath, srtContent, 'utf-8'); + if (!config.quiet) { + console.log(formatOutput({ subtitles: subtitlePath }, format)); + } else { + console.log(subtitlePath); + } + } + } catch (err) { + // Non-fatal: log warning but don't fail the whole synthesis + if (!config.quiet) { + process.stderr.write(`Warning: failed to download subtitles: ${(err as Error).message}\n`); + } + } + } else if (flags.subtitles && !response.data.subtitle_file) { + // Warn if --subtitles was requested but API didn't return subtitle_file + if (!config.quiet) { + process.stderr.write(`Warning: subtitles requested but not returned by API\n`); + } + } }, }); diff --git a/src/types/api.ts b/src/types/api.ts index b2b0578..fdb956c 100644 --- a/src/types/api.ts +++ b/src/types/api.ts @@ -108,7 +108,7 @@ export interface SpeechRequest { pronunciation_dict?: Array<{ tone: string; text: string }>; output_format?: 'url' | 'hex'; stream?: boolean; - subtitle?: boolean; + subtitle_enable?: boolean; // Correct API parameter name (not 'subtitle') } export interface SpeechResponse { @@ -116,7 +116,7 @@ export interface SpeechResponse { data: { audio?: string; // hex-encoded audio data audio_url?: string; - subtitle_info?: SubtitleInfo; + subtitle_file?: string; // URL to download subtitle JSON file (when subtitle_enable=true) status: number; }; extra_info?: { @@ -129,14 +129,6 @@ export interface SpeechResponse { }; } -export interface SubtitleInfo { - subtitles: Array<{ - text: string; - start_time: number; - end_time: number; - }>; -} - // ---- Voice List ---- export interface SystemVoiceInfo { diff --git a/test/commands/speech/synthesize.test.ts b/test/commands/speech/synthesize.test.ts index 360e8ac..38dc4f7 100644 --- a/test/commands/speech/synthesize.test.ts +++ b/test/commands/speech/synthesize.test.ts @@ -159,4 +159,47 @@ describe('speech synthesize command', () => { console.log = originalLog; } }); + + it('--subtitles sets subtitle_enable in dry-run output', async () => { + const config = { + apiKey: 'test-key', + region: 'global' as const, + baseUrl: 'https://api.mmx.io', + output: 'json' as const, + timeout: 10, + verbose: false, + quiet: false, + noColor: true, + yes: false, + dryRun: true, + nonInteractive: true, + async: false, + }; + + const originalLog = console.log; + let output = ''; + console.log = (msg: string) => { output += msg; }; + + try { + await synthesizeCommand.execute(config, { + text: 'Hello', + subtitles: true, + quiet: false, + verbose: false, + noColor: true, + yes: false, + dryRun: true, + help: false, + nonInteractive: true, + async: false, + }); + + const parsed = JSON.parse(output); + expect(parsed.request.subtitle_enable).toBe(true); + // Verify the old incorrect parameter name is NOT used + expect(parsed.request.subtitle).toBeUndefined(); + } finally { + console.log = originalLog; + } + }); }); From 7aea012dafff8f3795c18ddcef2a3ac53eeec557 Mon Sep 17 00:00:00 2001 From: Raylan LIN Date: Fri, 17 Apr 2026 20:04:30 +0800 Subject: [PATCH 2/2] test: fix globalThis.fetch type error in timeout-fix.test.ts Newer Bun versions add 'preconnect' to typeof fetch, breaking direct assignment to globalThis.fetch. Cast through any to satisfy TypeScript. This is a pre-existing CI failure, not related to the subtitle fix. --- test/auth/timeout-fix.test.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/auth/timeout-fix.test.ts b/test/auth/timeout-fix.test.ts index baeaad1..2ee4976 100644 --- a/test/auth/timeout-fix.test.ts +++ b/test/auth/timeout-fix.test.ts @@ -124,7 +124,8 @@ describe('refreshAccessToken: timeout and error handling', () => { // We test the real function against a mock server via a wrapper // that overrides the fetch to hit our local server instead. const origFetch = globalThis.fetch; - globalThis.fetch = async (input: RequestInfo | URL, init?: RequestInit) => { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (globalThis as any).fetch = async (input: RequestInfo | URL, init?: RequestInit) => { const url = typeof input === 'string' ? input : input.toString(); if (url.includes('oauth/token')) { return origFetch(`${server.url}/v1/oauth/token`, init); @@ -156,7 +157,8 @@ describe('refreshAccessToken: timeout and error handling', () => { const mod = await import('../../src/auth/refresh'); const origFetch = globalThis.fetch; - globalThis.fetch = async (input: RequestInfo | URL, init?: RequestInit) => { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (globalThis as any).fetch = async (input: RequestInfo | URL, init?: RequestInit) => { const url = typeof input === 'string' ? input : input.toString(); if (url.includes('oauth/token')) { return origFetch(`${server.url}/v1/oauth/token`, init);