Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Main features:
- `🏷️` Respects gettext context (`msgctxt`) when translating entries
- `🔁` Supports singular and plural translations
- `⚠️` Skips fuzzy entries by default
- `🪪` Marks every AI translation with a `# ai-translated` translator comment
- `🧭` Can infer source language or use `--source-lang`
- `💻` Runs as a small CLI that updates files in place

Expand All @@ -30,6 +31,8 @@ Any OpenAI model that supports `json_schema` structured outputs can be used via

By default, entries marked as `fuzzy` are skipped. If you use `--include-fuzzy`, `msgai` will translate those entries too and remove the fuzzy flag after applying the result.

Every entry that `msgai` translates gets a `# ai-translated` translator comment so you can tell AI translations apart from human ones. Existing translator comments are preserved. Use `--add-fuzzy` to additionally mark fresh translations with the gettext `fuzzy` flag — useful when you want a human to review every AI translation before it ships.

## 📦 Install

Install the CLI globally:
Expand Down Expand Up @@ -57,13 +60,14 @@ msgai messages.po --api-key sk-...
Usage:

```bash
msgai <file.po> [--dry-run] [--api-key KEY] [--source-lang LANG] [--model MODEL] [--include-fuzzy] [--fold-length N] [--context TEXT] [--config PATH] [--debug]
msgai <file.po> [--dry-run] [--api-key KEY] [--source-lang LANG] [--model MODEL] [--include-fuzzy] [--add-fuzzy] [--fold-length N] [--context TEXT] [--config PATH] [--debug]
```

Options:

- `--dry-run`: list untranslated `msgid` values only, with no API calls and no file changes
- `--include-fuzzy`: include fuzzy entries for translation and clear their fuzzy flag after translation
- `--add-fuzzy`: mark every newly translated entry with the gettext `fuzzy` flag (so a human reviews it before it ships). Independent of `--include-fuzzy`
- `--source-lang LANG`: set the source language of `msgid` strings as an ISO 639-1 code such as `en` or `uk`
- `--model MODEL`: set the OpenAI model used for translation; default is `gpt-5.4`. Only models with `json_schema` structured outputs are supported.
- `--api-key KEY`: pass the OpenAI API key directly instead of using `OPENAI_API_KEY`
Expand Down Expand Up @@ -91,6 +95,7 @@ Example `msgai.config.yml`:
source-lang: en
model: gpt-5.4
include-fuzzy: false
add-fuzzy: false
fold-length: 80
context: "use formal tone"
debug: false
Expand Down
8 changes: 8 additions & 0 deletions src/cli/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ function parseArgs(argv: string[]): CliArgs {
type: 'boolean',
description: 'Include fuzzy entries for translation (re-translate and clear fuzzy flag)',
})
.option('add-fuzzy', {
type: 'boolean',
description:
'Mark all newly translated entries with the gettext fuzzy flag (for human review)',
})
.option('api-key', {
type: 'string',
description: 'OpenAI API key (otherwise read from OPENAI_API_KEY env)',
Expand Down Expand Up @@ -111,6 +116,7 @@ function parseArgs(argv: string[]): CliArgs {
sourceLang,
model,
includeFuzzy: parsedArgs['include-fuzzy'],
addFuzzy: parsedArgs['add-fuzzy'],
foldLength,
context,
debug: parsedArgs.debug,
Expand Down Expand Up @@ -175,6 +181,7 @@ function main(argv: string[]): number | undefined {
// Default remaining undefined booleans after config merge
const dryRun = args.dryRun ?? false;
const includeFuzzy = args.includeFuzzy ?? false;
const addFuzzy = args.addFuzzy ?? false;
const debug = args.debug ?? false;

debugLogger.log('cli.main', 'Dispatching runTranslateCommand');
Expand All @@ -185,6 +192,7 @@ function main(argv: string[]): number | undefined {
sourceLang: args.sourceLang,
model: args.model,
includeFuzzy,
addFuzzy,
foldLength: args.foldLength,
context: args.context,
debug,
Expand Down
12 changes: 11 additions & 1 deletion src/cli/runTranslate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import {
getEntriesToTranslate,
applyTranslations,
clearFuzzyFromEntries,
addFuzzyToEntries,
markEntriesAsAiTranslated,
compilePo,
getLanguage,
getPluralForms,
Expand Down Expand Up @@ -63,6 +65,7 @@ export type TranslateCommandArgs = {
sourceLang?: string;
model?: string;
includeFuzzy?: boolean;
addFuzzy?: boolean;
foldLength?: number;
debug?: boolean;
context?: string;
Expand All @@ -74,6 +77,7 @@ export async function runTranslate(
sourceLang?: string,
model?: string,
includeFuzzy?: boolean,
addFuzzy?: boolean,
foldLength?: number,
debug?: boolean,
context?: string,
Expand All @@ -86,6 +90,7 @@ export async function runTranslate(
sourceLang,
model: model ?? 'gpt-5.4',
includeFuzzy: includeFuzzy === true,
addFuzzy: addFuzzy === true,
});
const poContent = fs.readFileSync(poFilePath, 'utf8');
debugLogger.log('cli.runTranslate', 'Read PO file', {
Expand Down Expand Up @@ -158,6 +163,10 @@ export async function runTranslate(
if (includeFuzzy) {
clearFuzzyFromEntries(parsedPo, batchResults);
}
markEntriesAsAiTranslated(parsedPo, batchResults);
if (addFuzzy) {
addFuzzyToEntries(parsedPo, batchResults);
}
fs.writeFileSync(poFilePath, compilePo(parsedPo, { foldLength }));
debugLogger.log('cli.runTranslate', 'Wrote translated batch back to PO file', {
batch: batchNum,
Expand All @@ -182,7 +191,7 @@ export async function runTranslate(
}

export const USAGE =
'Usage: msgai <file.po> [--dry-run] [--api-key KEY] [--source-lang LANG] [--model MODEL] [--include-fuzzy] [--fold-length N] [--context TEXT] [--config PATH] [--debug]';
'Usage: msgai <file.po> [--dry-run] [--api-key KEY] [--source-lang LANG] [--model MODEL] [--include-fuzzy] [--add-fuzzy] [--fold-length N] [--context TEXT] [--config PATH] [--debug]';

export function runTranslateCommand(args: TranslateCommandArgs): number | Promise<number> {
initDebugLogger(args.debug);
Expand Down Expand Up @@ -228,6 +237,7 @@ export function runTranslateCommand(args: TranslateCommandArgs): number | Promis
args.sourceLang,
args.model,
args.includeFuzzy,
args.addFuzzy,
args.foldLength,
args.debug,
args.context,
Expand Down
5 changes: 5 additions & 0 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { parse as parseYaml } from 'yaml';
const KEBAB_TO_CAMEL: Record<string, string> = {
'source-lang': 'sourceLang',
'include-fuzzy': 'includeFuzzy',
'add-fuzzy': 'addFuzzy',
'fold-length': 'foldLength',
'api-key': 'apiKey',
'dry-run': 'dryRun',
Expand All @@ -23,6 +24,7 @@ export const configFileSchema = z
sourceLang: z.string().optional(),
model: z.string().optional(),
includeFuzzy: z.boolean().optional(),
addFuzzy: z.boolean().optional(),
foldLength: z
.number()
.int('foldLength must be an integer')
Expand Down Expand Up @@ -56,6 +58,7 @@ export type ConfigFile = {
sourceLang?: string;
model?: string;
includeFuzzy?: boolean;
addFuzzy?: boolean;
foldLength?: number;
context?: string;
debug?: boolean;
Expand All @@ -80,6 +83,7 @@ export type CliArgs = {
sourceLang?: string;
model?: string;
includeFuzzy?: boolean;
addFuzzy?: boolean;
foldLength?: number;
context?: string;
debug?: boolean;
Expand All @@ -96,6 +100,7 @@ export function mergeConfigWithArgs(
sourceLang: cliArgs.sourceLang !== undefined ? cliArgs.sourceLang : config.sourceLang,
model: cliArgs.model !== undefined ? cliArgs.model : config.model,
includeFuzzy: cliArgs.includeFuzzy !== undefined ? cliArgs.includeFuzzy : config.includeFuzzy,
addFuzzy: cliArgs.addFuzzy !== undefined ? cliArgs.addFuzzy : config.addFuzzy,
foldLength: cliArgs.foldLength !== undefined ? cliArgs.foldLength : config.foldLength,
context: cliArgs.context !== undefined ? cliArgs.context : config.context,
debug: cliArgs.debug !== undefined ? cliArgs.debug : config.debug,
Expand Down
66 changes: 66 additions & 0 deletions src/po.ts
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,72 @@ export function applyTranslations(parsedPo: GetTextTranslations, results: PoEntr
}
}

const AI_TRANSLATED_MARKER = 'ai-translated';

/**
* Adds an "ai-translated" translator comment to entries corresponding to the given results.
* Idempotent: skips entries that already contain the marker. Preserves existing translator
* comments by appending the marker on a new line.
*/
export function markEntriesAsAiTranslated(
parsedPo: GetTextTranslations,
results: Array<{ msgid: string; msgctxt?: string }>,
): void {
for (const result of results) {
const context = result.msgctxt ?? '';
const contextEntries = parsedPo.translations[context];
if (contextEntries == null) continue;
const entry = contextEntries[result.msgid];
if (entry == null) continue;

if (entry.comments == null) {
entry.comments = {};
}
const existing = entry.comments.translator;
if (typeof existing === 'string' && existing.length > 0) {
const lines = existing.split(/\r?\n|\r/).map((s) => s.trim());
if (lines.includes(AI_TRANSLATED_MARKER)) continue;
entry.comments.translator = `${existing}\n${AI_TRANSLATED_MARKER}`;
} else {
entry.comments.translator = AI_TRANSLATED_MARKER;
}
}
}

/**
* Adds the "fuzzy" flag to entries corresponding to the given results (mutates parsedPo.translations).
* Idempotent: does not duplicate the flag. Preserves other flags (e.g. "c-format").
* Lookup is by result.msgctxt (default '') and result.msgid.
*/
export function addFuzzyToEntries(
parsedPo: GetTextTranslations,
results: Array<{ msgid: string; msgctxt?: string }>,
): void {
for (const result of results) {
const context = result.msgctxt ?? '';
const contextEntries = parsedPo.translations[context];
if (contextEntries == null) continue;
const entry = contextEntries[result.msgid];
if (entry == null) continue;

if (entry.comments == null) {
entry.comments = {};
}
const existing = entry.comments.flag;
if (typeof existing === 'string' && existing.length > 0) {
const flags = existing
.split(',')
.map((s) => s.trim())
.filter((s) => s !== '');
if (flags.some((f) => f.toLowerCase() === 'fuzzy')) continue;
flags.push('fuzzy');
entry.comments.flag = flags.join(', ');
} else {
entry.comments.flag = 'fuzzy';
}
}
}

/**
* Removes the "fuzzy" flag from entries corresponding to the given results (mutates parsedPo.translations).
* Lookup is by result.msgctxt (default '') and result.msgid.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ msgstr ""
"Plural-Forms: nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);\\n"
"Content-Type: text/plain; charset=utf-8\\n"

# ai-translated
msgid "Hello"
msgstr "Привіт"
"
Expand Down
65 changes: 65 additions & 0 deletions test/cli/add-fuzzy.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import { test, expect, jest, beforeEach } from '@jest/globals';
import fs from 'node:fs';
import { getTmpPo } from '../test-utils/getTmpPo';
import { runTranslate } from '../../src/cli/runTranslate';
import { translateStrings } from '../../src/translate';

jest.mock('../../src/translate', () => {
const actual = jest.requireActual<typeof import('../../src/translate')>('../../src/translate');
return {
...actual,
translateStrings: jest.fn(),
};
});

const translateStringsMock = jest.mocked(translateStrings);

beforeEach(() => {
translateStringsMock.mockReset();
});

test('runTranslate always writes "# ai-translated" comment for translated entries', async () => {
translateStringsMock.mockResolvedValue([{ msgid: 'Hello', msgstr: 'Привіт' }]);
const tempPo = getTmpPo(`
msgid "Hello"
msgstr ""
`);

try {
const code = await runTranslate(tempPo.poFilePath, 'fake-key', 'en');
expect(code).toBe(0);

const content = fs.readFileSync(tempPo.poFilePath, 'utf8');
expect(content).toContain('# ai-translated');
expect(content).toContain('msgstr "Привіт"');
expect(content).not.toMatch(/#,\s*fuzzy/);
} finally {
tempPo.cleanup();
}
});

test('runTranslate with addFuzzy=true marks translated entries as fuzzy', async () => {
translateStringsMock.mockResolvedValue([{ msgid: 'Hello', msgstr: 'Привіт' }]);
const tempPo = getTmpPo(`
msgid "Hello"
msgstr ""
`);

try {
const code = await runTranslate(
tempPo.poFilePath,
'fake-key',
'en',
undefined,
undefined,
true,
);
expect(code).toBe(0);

const content = fs.readFileSync(tempPo.poFilePath, 'utf8');
expect(content).toContain('# ai-translated');
expect(content).toMatch(/#,\s*fuzzy/);
} finally {
tempPo.cleanup();
}
});
1 change: 1 addition & 0 deletions test/cli/model.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ msgstr ""
undefined,
undefined,
undefined,
undefined,
true,
);

Expand Down
4 changes: 4 additions & 0 deletions test/config.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ describe('parseConfigFile', () => {
sourceLang: en
model: gpt-4o
includeFuzzy: true
addFuzzy: true
foldLength: 80
context: "use formal tone"
debug: true
Expand All @@ -16,6 +17,7 @@ debug: true
sourceLang: 'en',
model: 'gpt-4o',
includeFuzzy: true,
addFuzzy: true,
foldLength: 80,
context: 'use formal tone',
debug: true,
Expand All @@ -26,12 +28,14 @@ debug: true
const yaml = `
source-lang: uk
include-fuzzy: false
add-fuzzy: true
fold-length: 0
`;
const result = parseConfigFile(yaml);
expect(result).toEqual({
sourceLang: 'uk',
includeFuzzy: false,
addFuzzy: true,
foldLength: 0,
});
});
Expand Down
Loading
Loading