From 335eed0663c3217842f51cd93e2be799a2db2b13 Mon Sep 17 00:00:00 2001 From: "a.mostovenko" Date: Wed, 6 May 2026 10:03:14 +0200 Subject: [PATCH] feat: add --add-fuzzy flag and # ai-translated marker Every entry msgai translates now gets a "# ai-translated" translator comment so AI translations are visible in the .po file. The new --add-fuzzy flag additionally marks fresh translations with the gettext fuzzy flag, forcing human review before they ship. Co-Authored-By: Claude Opus 4.7 --- README.md | 7 +- src/cli/index.ts | 8 ++ src/cli/runTranslate.ts | 12 +- src/config.ts | 5 + src/po.ts | 66 ++++++++++ .../cli.translate.integration.test.ts.snap | 1 + test/cli/add-fuzzy.test.ts | 65 ++++++++++ test/cli/model.test.ts | 1 + test/config.test.ts | 4 + test/po.test.ts | 113 ++++++++++++++++++ 10 files changed, 280 insertions(+), 2 deletions(-) create mode 100644 test/cli/add-fuzzy.test.ts diff --git a/README.md b/README.md index 1094aa2..a654b33 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ Main features: - `🏷️` Respects gettext context (`msgctxt`) when translating entries - `🔁` Supports singular and plural translations - `⚠️` Skips fuzzy entries by default +- `🪪` Marks every AI translation with a `# ai-translated` translator comment - `🧭` Can infer source language or use `--source-lang` - `💻` Runs as a small CLI that updates files in place @@ -30,6 +31,8 @@ Any OpenAI model that supports `json_schema` structured outputs can be used via By default, entries marked as `fuzzy` are skipped. If you use `--include-fuzzy`, `msgai` will translate those entries too and remove the fuzzy flag after applying the result. +Every entry that `msgai` translates gets a `# ai-translated` translator comment so you can tell AI translations apart from human ones. Existing translator comments are preserved. Use `--add-fuzzy` to additionally mark fresh translations with the gettext `fuzzy` flag — useful when you want a human to review every AI translation before it ships. + ## 📦 Install Install the CLI globally: @@ -57,13 +60,14 @@ msgai messages.po --api-key sk-... Usage: ```bash -msgai [--dry-run] [--api-key KEY] [--source-lang LANG] [--model MODEL] [--include-fuzzy] [--fold-length N] [--context TEXT] [--config PATH] [--debug] +msgai [--dry-run] [--api-key KEY] [--source-lang LANG] [--model MODEL] [--include-fuzzy] [--add-fuzzy] [--fold-length N] [--context TEXT] [--config PATH] [--debug] ``` Options: - `--dry-run`: list untranslated `msgid` values only, with no API calls and no file changes - `--include-fuzzy`: include fuzzy entries for translation and clear their fuzzy flag after translation +- `--add-fuzzy`: mark every newly translated entry with the gettext `fuzzy` flag (so a human reviews it before it ships). Independent of `--include-fuzzy` - `--source-lang LANG`: set the source language of `msgid` strings as an ISO 639-1 code such as `en` or `uk` - `--model MODEL`: set the OpenAI model used for translation; default is `gpt-5.4`. Only models with `json_schema` structured outputs are supported. - `--api-key KEY`: pass the OpenAI API key directly instead of using `OPENAI_API_KEY` @@ -91,6 +95,7 @@ Example `msgai.config.yml`: source-lang: en model: gpt-5.4 include-fuzzy: false +add-fuzzy: false fold-length: 80 context: "use formal tone" debug: false diff --git a/src/cli/index.ts b/src/cli/index.ts index 689a3cf..d0ea99f 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -36,6 +36,11 @@ function parseArgs(argv: string[]): CliArgs { type: 'boolean', description: 'Include fuzzy entries for translation (re-translate and clear fuzzy flag)', }) + .option('add-fuzzy', { + type: 'boolean', + description: + 'Mark all newly translated entries with the gettext fuzzy flag (for human review)', + }) .option('api-key', { type: 'string', description: 'OpenAI API key (otherwise read from OPENAI_API_KEY env)', @@ -111,6 +116,7 @@ function parseArgs(argv: string[]): CliArgs { sourceLang, model, includeFuzzy: parsedArgs['include-fuzzy'], + addFuzzy: parsedArgs['add-fuzzy'], foldLength, context, debug: parsedArgs.debug, @@ -175,6 +181,7 @@ function main(argv: string[]): number | undefined { // Default remaining undefined booleans after config merge const dryRun = args.dryRun ?? false; const includeFuzzy = args.includeFuzzy ?? false; + const addFuzzy = args.addFuzzy ?? false; const debug = args.debug ?? false; debugLogger.log('cli.main', 'Dispatching runTranslateCommand'); @@ -185,6 +192,7 @@ function main(argv: string[]): number | undefined { sourceLang: args.sourceLang, model: args.model, includeFuzzy, + addFuzzy, foldLength: args.foldLength, context: args.context, debug, diff --git a/src/cli/runTranslate.ts b/src/cli/runTranslate.ts index 6c30c19..c3904b5 100644 --- a/src/cli/runTranslate.ts +++ b/src/cli/runTranslate.ts @@ -6,6 +6,8 @@ import { getEntriesToTranslate, applyTranslations, clearFuzzyFromEntries, + addFuzzyToEntries, + markEntriesAsAiTranslated, compilePo, getLanguage, getPluralForms, @@ -63,6 +65,7 @@ export type TranslateCommandArgs = { sourceLang?: string; model?: string; includeFuzzy?: boolean; + addFuzzy?: boolean; foldLength?: number; debug?: boolean; context?: string; @@ -74,6 +77,7 @@ export async function runTranslate( sourceLang?: string, model?: string, includeFuzzy?: boolean, + addFuzzy?: boolean, foldLength?: number, debug?: boolean, context?: string, @@ -86,6 +90,7 @@ export async function runTranslate( sourceLang, model: model ?? 'gpt-5.4', includeFuzzy: includeFuzzy === true, + addFuzzy: addFuzzy === true, }); const poContent = fs.readFileSync(poFilePath, 'utf8'); debugLogger.log('cli.runTranslate', 'Read PO file', { @@ -158,6 +163,10 @@ export async function runTranslate( if (includeFuzzy) { clearFuzzyFromEntries(parsedPo, batchResults); } + markEntriesAsAiTranslated(parsedPo, batchResults); + if (addFuzzy) { + addFuzzyToEntries(parsedPo, batchResults); + } fs.writeFileSync(poFilePath, compilePo(parsedPo, { foldLength })); debugLogger.log('cli.runTranslate', 'Wrote translated batch back to PO file', { batch: batchNum, @@ -182,7 +191,7 @@ export async function runTranslate( } export const USAGE = - 'Usage: msgai [--dry-run] [--api-key KEY] [--source-lang LANG] [--model MODEL] [--include-fuzzy] [--fold-length N] [--context TEXT] [--config PATH] [--debug]'; + 'Usage: msgai [--dry-run] [--api-key KEY] [--source-lang LANG] [--model MODEL] [--include-fuzzy] [--add-fuzzy] [--fold-length N] [--context TEXT] [--config PATH] [--debug]'; export function runTranslateCommand(args: TranslateCommandArgs): number | Promise { initDebugLogger(args.debug); @@ -228,6 +237,7 @@ export function runTranslateCommand(args: TranslateCommandArgs): number | Promis args.sourceLang, args.model, args.includeFuzzy, + args.addFuzzy, args.foldLength, args.debug, args.context, diff --git a/src/config.ts b/src/config.ts index ae0aff9..e4eb9c6 100644 --- a/src/config.ts +++ b/src/config.ts @@ -4,6 +4,7 @@ import { parse as parseYaml } from 'yaml'; const KEBAB_TO_CAMEL: Record = { 'source-lang': 'sourceLang', 'include-fuzzy': 'includeFuzzy', + 'add-fuzzy': 'addFuzzy', 'fold-length': 'foldLength', 'api-key': 'apiKey', 'dry-run': 'dryRun', @@ -23,6 +24,7 @@ export const configFileSchema = z sourceLang: z.string().optional(), model: z.string().optional(), includeFuzzy: z.boolean().optional(), + addFuzzy: z.boolean().optional(), foldLength: z .number() .int('foldLength must be an integer') @@ -56,6 +58,7 @@ export type ConfigFile = { sourceLang?: string; model?: string; includeFuzzy?: boolean; + addFuzzy?: boolean; foldLength?: number; context?: string; debug?: boolean; @@ -80,6 +83,7 @@ export type CliArgs = { sourceLang?: string; model?: string; includeFuzzy?: boolean; + addFuzzy?: boolean; foldLength?: number; context?: string; debug?: boolean; @@ -96,6 +100,7 @@ export function mergeConfigWithArgs( sourceLang: cliArgs.sourceLang !== undefined ? cliArgs.sourceLang : config.sourceLang, model: cliArgs.model !== undefined ? cliArgs.model : config.model, includeFuzzy: cliArgs.includeFuzzy !== undefined ? cliArgs.includeFuzzy : config.includeFuzzy, + addFuzzy: cliArgs.addFuzzy !== undefined ? cliArgs.addFuzzy : config.addFuzzy, foldLength: cliArgs.foldLength !== undefined ? cliArgs.foldLength : config.foldLength, context: cliArgs.context !== undefined ? cliArgs.context : config.context, debug: cliArgs.debug !== undefined ? cliArgs.debug : config.debug, diff --git a/src/po.ts b/src/po.ts index 261506a..c800927 100644 --- a/src/po.ts +++ b/src/po.ts @@ -137,6 +137,72 @@ export function applyTranslations(parsedPo: GetTextTranslations, results: PoEntr } } +const AI_TRANSLATED_MARKER = 'ai-translated'; + +/** + * Adds an "ai-translated" translator comment to entries corresponding to the given results. + * Idempotent: skips entries that already contain the marker. Preserves existing translator + * comments by appending the marker on a new line. + */ +export function markEntriesAsAiTranslated( + parsedPo: GetTextTranslations, + results: Array<{ msgid: string; msgctxt?: string }>, +): void { + for (const result of results) { + const context = result.msgctxt ?? ''; + const contextEntries = parsedPo.translations[context]; + if (contextEntries == null) continue; + const entry = contextEntries[result.msgid]; + if (entry == null) continue; + + if (entry.comments == null) { + entry.comments = {}; + } + const existing = entry.comments.translator; + if (typeof existing === 'string' && existing.length > 0) { + const lines = existing.split(/\r?\n|\r/).map((s) => s.trim()); + if (lines.includes(AI_TRANSLATED_MARKER)) continue; + entry.comments.translator = `${existing}\n${AI_TRANSLATED_MARKER}`; + } else { + entry.comments.translator = AI_TRANSLATED_MARKER; + } + } +} + +/** + * Adds the "fuzzy" flag to entries corresponding to the given results (mutates parsedPo.translations). + * Idempotent: does not duplicate the flag. Preserves other flags (e.g. "c-format"). + * Lookup is by result.msgctxt (default '') and result.msgid. + */ +export function addFuzzyToEntries( + parsedPo: GetTextTranslations, + results: Array<{ msgid: string; msgctxt?: string }>, +): void { + for (const result of results) { + const context = result.msgctxt ?? ''; + const contextEntries = parsedPo.translations[context]; + if (contextEntries == null) continue; + const entry = contextEntries[result.msgid]; + if (entry == null) continue; + + if (entry.comments == null) { + entry.comments = {}; + } + const existing = entry.comments.flag; + if (typeof existing === 'string' && existing.length > 0) { + const flags = existing + .split(',') + .map((s) => s.trim()) + .filter((s) => s !== ''); + if (flags.some((f) => f.toLowerCase() === 'fuzzy')) continue; + flags.push('fuzzy'); + entry.comments.flag = flags.join(', '); + } else { + entry.comments.flag = 'fuzzy'; + } + } +} + /** * Removes the "fuzzy" flag from entries corresponding to the given results (mutates parsedPo.translations). * Lookup is by result.msgctxt (default '') and result.msgid. diff --git a/test-integration/__snapshots__/cli.translate.integration.test.ts.snap b/test-integration/__snapshots__/cli.translate.integration.test.ts.snap index 369b258..a7cf3c3 100644 --- a/test-integration/__snapshots__/cli.translate.integration.test.ts.snap +++ b/test-integration/__snapshots__/cli.translate.integration.test.ts.snap @@ -7,6 +7,7 @@ msgstr "" "Plural-Forms: nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);\\n" "Content-Type: text/plain; charset=utf-8\\n" +# ai-translated msgid "Hello" msgstr "Привіт" " diff --git a/test/cli/add-fuzzy.test.ts b/test/cli/add-fuzzy.test.ts new file mode 100644 index 0000000..e2d2914 --- /dev/null +++ b/test/cli/add-fuzzy.test.ts @@ -0,0 +1,65 @@ +import { test, expect, jest, beforeEach } from '@jest/globals'; +import fs from 'node:fs'; +import { getTmpPo } from '../test-utils/getTmpPo'; +import { runTranslate } from '../../src/cli/runTranslate'; +import { translateStrings } from '../../src/translate'; + +jest.mock('../../src/translate', () => { + const actual = jest.requireActual('../../src/translate'); + return { + ...actual, + translateStrings: jest.fn(), + }; +}); + +const translateStringsMock = jest.mocked(translateStrings); + +beforeEach(() => { + translateStringsMock.mockReset(); +}); + +test('runTranslate always writes "# ai-translated" comment for translated entries', async () => { + translateStringsMock.mockResolvedValue([{ msgid: 'Hello', msgstr: 'Привіт' }]); + const tempPo = getTmpPo(` +msgid "Hello" +msgstr "" +`); + + try { + const code = await runTranslate(tempPo.poFilePath, 'fake-key', 'en'); + expect(code).toBe(0); + + const content = fs.readFileSync(tempPo.poFilePath, 'utf8'); + expect(content).toContain('# ai-translated'); + expect(content).toContain('msgstr "Привіт"'); + expect(content).not.toMatch(/#,\s*fuzzy/); + } finally { + tempPo.cleanup(); + } +}); + +test('runTranslate with addFuzzy=true marks translated entries as fuzzy', async () => { + translateStringsMock.mockResolvedValue([{ msgid: 'Hello', msgstr: 'Привіт' }]); + const tempPo = getTmpPo(` +msgid "Hello" +msgstr "" +`); + + try { + const code = await runTranslate( + tempPo.poFilePath, + 'fake-key', + 'en', + undefined, + undefined, + true, + ); + expect(code).toBe(0); + + const content = fs.readFileSync(tempPo.poFilePath, 'utf8'); + expect(content).toContain('# ai-translated'); + expect(content).toMatch(/#,\s*fuzzy/); + } finally { + tempPo.cleanup(); + } +}); diff --git a/test/cli/model.test.ts b/test/cli/model.test.ts index d7fc9e8..5c5203a 100644 --- a/test/cli/model.test.ts +++ b/test/cli/model.test.ts @@ -57,6 +57,7 @@ msgstr "" undefined, undefined, undefined, + undefined, true, ); diff --git a/test/config.test.ts b/test/config.test.ts index cd957fd..1c835f0 100644 --- a/test/config.test.ts +++ b/test/config.test.ts @@ -7,6 +7,7 @@ describe('parseConfigFile', () => { sourceLang: en model: gpt-4o includeFuzzy: true +addFuzzy: true foldLength: 80 context: "use formal tone" debug: true @@ -16,6 +17,7 @@ debug: true sourceLang: 'en', model: 'gpt-4o', includeFuzzy: true, + addFuzzy: true, foldLength: 80, context: 'use formal tone', debug: true, @@ -26,12 +28,14 @@ debug: true const yaml = ` source-lang: uk include-fuzzy: false +add-fuzzy: true fold-length: 0 `; const result = parseConfigFile(yaml); expect(result).toEqual({ sourceLang: 'uk', includeFuzzy: false, + addFuzzy: true, foldLength: 0, }); }); diff --git a/test/po.test.ts b/test/po.test.ts index 0a19665..abdb080 100644 --- a/test/po.test.ts +++ b/test/po.test.ts @@ -7,6 +7,8 @@ import { getEntriesToTranslate, applyTranslations, clearFuzzyFromEntries, + addFuzzyToEntries, + markEntriesAsAiTranslated, compilePo, isEntryFuzzy, } from '../src/po'; @@ -297,6 +299,117 @@ msgstr "" expect(keys.map((k) => k.msgid)).toEqual(['Hello', 'World']); }); +test('markEntriesAsAiTranslated adds translator comment to entry without comments', () => { + const tempPo = getTmpPo(` +msgid "Hello" +msgstr "" +`); + const parsed = parsePoContent(tempPo.poContent); + applyTranslations(parsed, [{ msgid: 'Hello', msgstr: 'Привіт' }]); + markEntriesAsAiTranslated(parsed, [{ msgid: 'Hello' }]); + expect(parsed.translations['']['Hello'].comments?.translator).toBe('ai-translated'); + + const compiled = compilePo(parsed).toString('utf8'); + expect(compiled).toContain('# ai-translated'); + tempPo.cleanup(); +}); + +test('markEntriesAsAiTranslated preserves existing translator comment, appends marker on new line', () => { + const tempPo = getTmpPo(` +# TODO: review wording +msgid "Hello" +msgstr "" +`); + const parsed = parsePoContent(tempPo.poContent); + markEntriesAsAiTranslated(parsed, [{ msgid: 'Hello' }]); + expect(parsed.translations['']['Hello'].comments?.translator).toBe( + 'TODO: review wording\nai-translated', + ); + + const compiled = compilePo(parsed).toString('utf8'); + expect(compiled).toContain('# TODO: review wording'); + expect(compiled).toContain('# ai-translated'); + tempPo.cleanup(); +}); + +test('markEntriesAsAiTranslated is idempotent (running twice does not duplicate marker)', () => { + const tempPo = getTmpPo(` +msgid "Hello" +msgstr "" +`); + const parsed = parsePoContent(tempPo.poContent); + markEntriesAsAiTranslated(parsed, [{ msgid: 'Hello' }]); + markEntriesAsAiTranslated(parsed, [{ msgid: 'Hello' }]); + expect(parsed.translations['']['Hello'].comments?.translator).toBe('ai-translated'); + tempPo.cleanup(); +}); + +test('markEntriesAsAiTranslated respects msgctxt when looking up entry', () => { + const parsed = po.parse( + Buffer.from( + ` +msgid "" +msgstr "" +"Language: uk\\n" +"Content-Type: text/plain; charset=UTF-8\\n" + +msgctxt "auth" +msgid "Hello" +msgstr "Вітаємо" + +msgid "Hello" +msgstr "Привіт" +`, + 'utf8', + ), + ); + markEntriesAsAiTranslated(parsed, [{ msgid: 'Hello', msgctxt: '' }]); + expect(parsed.translations['']['Hello'].comments?.translator).toBe('ai-translated'); + expect(parsed.translations['auth']['Hello'].comments?.translator).toBeUndefined(); +}); + +test('addFuzzyToEntries adds fuzzy flag to entry without flags', () => { + const tempPo = getTmpPo(` +msgid "Hello" +msgstr "" +`); + const parsed = parsePoContent(tempPo.poContent); + applyTranslations(parsed, [{ msgid: 'Hello', msgstr: 'Привіт' }]); + addFuzzyToEntries(parsed, [{ msgid: 'Hello' }]); + expect(isEntryFuzzy(parsed.translations['']['Hello'])).toBe(true); + + const compiled = compilePo(parsed).toString('utf8'); + expect(compiled).toMatch(/#,\s*fuzzy/); + tempPo.cleanup(); +}); + +test('addFuzzyToEntries preserves other flags (e.g. c-format)', () => { + const tempPo = getTmpPo(` +#, c-format +msgid "Hello %s" +msgstr "" +`); + const parsed = parsePoContent(tempPo.poContent); + addFuzzyToEntries(parsed, [{ msgid: 'Hello %s' }]); + const flag = parsed.translations['']['Hello %s'].comments?.flag ?? ''; + expect(flag).toMatch(/c-format/); + expect(flag).toMatch(/fuzzy/); +}); + +test('addFuzzyToEntries is idempotent (does not duplicate fuzzy flag)', () => { + const tempPo = getTmpPo(` +#, fuzzy +msgid "Hello" +msgstr "Старий" +`); + const parsed = parsePoContent(tempPo.poContent); + addFuzzyToEntries(parsed, [{ msgid: 'Hello' }]); + const flag = parsed.translations['']['Hello'].comments?.flag ?? ''; + const fuzzyCount = flag.split(',').filter((s) => s.trim() === 'fuzzy').length; + expect(fuzzyCount).toBe(1); + tempPo.cleanup(); +}); + test('clearFuzzyFromEntries removes fuzzy flag from .po after translation', () => { const tempPo = getTmpPo(` #, fuzzy