rpfm_extensions/translator/mod.rs
1//---------------------------------------------------------------------------//
2// Copyright (c) 2017-2026 Ismael Gutiérrez González. All rights reserved.
3//
4// This file is part of the Rusted PackFile Manager (RPFM) project,
5// which can be found here: https://github.com/Frodo45127/rpfm.
6//
7// This file is licensed under the MIT license, which can be found here:
8// https://github.com/Frodo45127/rpfm/blob/master/LICENSE.
9//---------------------------------------------------------------------------//
10
11//! Mod translation and localization support.
12//!
13//! This module provides tools for managing translations of mod content, making it
14//! easier to localize mods for different languages. It tracks translation status,
15//! detects changes in source text, and supports auto-translation from vanilla data.
16//!
17//! # Overview
18//!
19//! The translation system works by:
20//!
21//! 1. Extracting all translatable strings from a pack's Loc files
22//! 2. Storing translations in a separate JSON file alongside the pack
23//! 3. Tracking which translations need updating when source text changes
24//! 4. Auto-translating from vanilla localisation data where possible
25//!
26//! # Translation Files
27//!
28//! Translations are stored in separate JSON files. Each file contains all source
29//! strings and their translations, along with metadata about translation status.
30//!
31//! # Auto-Translation
32//!
33//! The system can automatically translate strings that exist in the game's vanilla
34//! localisation files. This is useful for mods that reference vanilla content or
35//! use similar terminology.
36//!
37//! # Workflow
38//!
39//! 1. Create a [`PackTranslation`] from a pack
40//! 2. Export to a translation file for external editing
41//! 3. Import completed translations
42//! 4. Generate the final translated Loc file for the pack
43//!
44//! # Output
45//!
46//! Translated strings are output to a Loc file that overrides the original mod's
47//! entries. The filename depends on the game:
48//!
49//! - **Warhammer 1 and newer** (except Thrones): `!!!!!!translated_locs.loc` - loads
50//! first due to its naming, allowing translations to override the original entries
51//! - **Thrones of Britannia and older games**: `localisation.loc`
52//!
53//! # Example
54//!
55//! ```ignore
56//! use rpfm_extensions::translator::PackTranslation;
57//!
58//! // Create translation from pack
59//! let mut translation = PackTranslation::new(
60//! &[translations_path],
61//! &pack,
62//! "warhammer_3",
63//! "es", // Spanish
64//! &dependencies,
65//! &english_base,
66//! &local_fixes,
67//! )?;
68//!
69//! // Save translation file
70//! translation.save(&output_path)?;
71//!
72//! // Generate translated Loc file for the pack
73//! let loc_file = translation.generate_loc()?;
74//! ```
75
76use getset::{Getters, MutGetters, Setters};
77use itertools::Itertools;
78use rayon::prelude::*;
79use serde::{Serialize as SerdeSerialize, Serializer};
80use serde_derive::{Serialize, Deserialize};
81
82use std::collections::{BTreeMap, HashMap, HashSet};
83use std::fs::{DirBuilder, File};
84use std::io::{BufReader, BufWriter, Read, Write};
85use std::path::{Path, PathBuf};
86
87use rpfm_lib::error::{Result, RLibError};
88use rpfm_lib::files::{Container, FileType, loc::Loc, pack::Pack, RFile, RFileDecoded, table::{DecodedData, local::TableInMemory, Table}};
89use rpfm_lib::schema::*;
90
91use crate::dependencies::Dependencies;
92
93/// Filename for the generated translated Loc file.
94///
95/// The leading exclamation marks ensure this file loads before other Loc files,
96/// allowing translations to override the original mod's strings.
97pub const TRANSLATED_FILE_NAME: &str = "!!!!!!translated_locs.loc";
98
99/// Full path for the translated Loc file within a pack.
100pub const TRANSLATED_PATH: &str = "text/!!!!!!translated_locs.loc";
101
102/// Legacy path for translated Loc files (for backwards compatibility).
103pub const TRANSLATED_PATH_OLD: &str = "text/localisation.loc";
104
105//-------------------------------------------------------------------------------//
106// Enums & Structs
107//-------------------------------------------------------------------------------//
108
109/// Translation data for an entire pack.
110///
111/// Contains all translatable strings from a pack along with their translations
112/// and metadata about translation status.
113///
114/// # Persistence
115///
116/// This struct is serialized to JSON files for storage and can be loaded back
117/// when continuing translation work.
118///
119/// # Parent Translations
120///
121/// When a pack has dependencies, translations from parent mods are also loaded
122/// and used for auto-translation, ensuring consistent terminology across
123/// dependent mods.
124#[derive(Debug, Clone, Default, Getters, MutGetters, Setters, Serialize, Deserialize)]
125#[getset(get = "pub", get_mut = "pub", set = "pub")]
126pub struct PackTranslation {
127
128 /// Target language code for translations (e.g., "es", "de", "fr").
129 language: String,
130
131 /// Name of the pack these translations belong to.
132 pack_name: String,
133
134 /// Map of Loc keys to their translation data.
135 ///
136 /// Keys are the original Loc entry keys from the pack.
137 #[serde(serialize_with = "ordered_map_translations")]
138 translations: HashMap<String, Translation>,
139}
140
141/// Translation entry for a single localizable string.
142///
143/// Tracks both the original and translated text, along with status flags
144/// indicating whether the translation is up-to-date.
145#[derive(Debug, Clone, Default, Getters, MutGetters, Setters, Serialize, Deserialize)]
146#[getset(get = "pub", get_mut = "pub", set = "pub")]
147pub struct Translation {
148
149 /// The Loc key identifying this string.
150 key: String,
151
152 /// Original text in the base language (typically English).
153 ///
154 /// This is used to detect when the source text changes, requiring
155 /// re-translation.
156 value_original: String,
157
158 /// Translated text in the target language.
159 ///
160 /// May be empty if not yet translated.
161 value_translated: String,
162
163 /// Whether this translation needs review.
164 ///
165 /// Set to `true` when the original text changes after translation,
166 /// indicating the translation may be outdated.
167 needs_retranslation: bool,
168
169 /// Whether this string has been removed from the source pack.
170 ///
171 /// Translations for removed strings are kept for reference but marked
172 /// as removed. If the string reappears, it will be flagged for re-translation.
173 removed: bool,
174}
175
176//-------------------------------------------------------------------------------//
177// Implementations
178//-------------------------------------------------------------------------------//
179
180impl PackTranslation {
181
182 pub fn new(paths: &[PathBuf], pack: &Pack, game_key: &str, language: &str, dependencies: &Dependencies, base_english: &HashMap<String, String>, base_local_fixes: &HashMap<String, String>) -> Result<Self> {
183 let mut translations = Self::load(paths, &pack.disk_file_name(), game_key, language).unwrap_or_else(|_| {
184 Self {
185 language: language.to_owned(),
186 pack_name: pack.disk_file_name(),
187 ..Default::default()
188 }
189 });
190
191 // If the pack has dependencies, we have to try to load their translations too, then patch the live dependencies with them.
192 // Otherwise, we'll have a situation where data is compared and imported from the wrong language.
193 let mut parent_tr = vec![];
194 for (_, pack_name) in pack.dependencies() {
195 if let Ok(ptr) = Self::load(paths, pack_name, game_key, language) {
196 parent_tr.push(ptr);
197 }
198 }
199
200 // Once we got the previous translation loaded, get the files to translate from the Pack, updating our translation.
201 let mut locs = pack.files_by_type(&[FileType::Loc]);
202 let merged_loc = Self::sort_and_merge_locs_for_translation(&mut locs)?;
203 let merged_loc_data = merged_loc.data();
204 let merged_loc_hash = merged_loc_data
205 .par_iter()
206 .map(|x| (x[0].data_to_string(), x[1].data_to_string()))
207 .collect::<HashMap<_,_>>();
208
209 // Once we have the clean list of loc entries we have in our Pack, we need to update the translation with it.
210 // First we do a pass to mark all removed translations as such. This is separated from the rest because this pass is way slower than the rest.
211 for (tr_key, tr) in translations.translations_mut() {
212 let was_removed = tr.removed;
213 tr.removed = !merged_loc_hash.contains_key(&**tr_key);
214
215 // If the line has been removed, unmark it for translation.
216 // If the line has been re-added, only flag for retranslation if the original value changed or there's no translation yet.
217 if tr.removed {
218 tr.needs_retranslation = false;
219 } else if was_removed {
220 if let Some(current_value) = merged_loc_hash.get(&**tr_key) {
221 tr.needs_retranslation = tr.value_translated.is_empty() || *current_value != tr.value_original;
222 }
223 }
224 }
225
226 // Next, we update the translations data with the loc data of the merged loc.
227 for row in merged_loc.data().iter() {
228 let key = row[0].data_to_string();
229 let value = row[1].data_to_string();
230
231 match translations.translations.get_mut(&*key) {
232 Some(tr) => {
233 if value != tr.value_original {
234 tr.value_original = value.to_string();
235 tr.needs_retranslation = true;
236 }
237 },
238
239 None => {
240 let tr = Translation {
241 key: key.to_string(),
242 value_original: value.to_string(),
243 value_translated: String::new(),
244 needs_retranslation: true,
245 removed: false,
246 };
247
248 translations.translations.insert(key.to_string(), tr);
249 }
250 }
251 }
252
253 // Lastly, we do an auto-translation pass. We have two copies of base local: one normal and one patched with parent translations.
254 // This is needed because the base localisation data doesn't have the translation data for parent mods included.
255 let mut base_local_tr = dependencies.localisation_data().clone();
256 for ptr in parent_tr {
257 for (key, val) in ptr.translations() {
258 if !*val.needs_retranslation() && !val.value_translated().is_empty() {
259 if let Some(ptr_val) = base_local_tr.get_mut(key) {
260 *ptr_val = val.value_translated().to_string();
261 }
262 }
263 }
264 }
265
266 let tr_copy = translations.translations().clone();
267 translations.translations_mut().par_iter_mut().for_each(|(tr_key, tr)| {
268 if !tr.removed {
269
270 // Mark empty lines as translated.
271 if tr.value_original().trim().is_empty() && tr.value_translated().trim().is_empty() {
272 tr.value_translated = tr.value_original.to_owned();
273 tr.needs_retranslation = false;
274 }
275
276 // If the value is unchanged from english, just copy the vanilla translation.
277 //
278 // NOTE: This is really a patch for packs not using optimizing pass, because the optimizer actually removes these entries.
279 else if let Some(vanilla_data) = base_english.get(tr_key) {
280 if tr.value_original() == vanilla_data {
281 if let Some(vanilla_data) = base_local_fixes.get(tr_key) {
282 tr.value_translated = vanilla_data.to_owned();
283 tr.needs_retranslation = false;
284 } else if let Some(vanilla_data) = base_local_tr.get(tr_key) {
285 tr.value_translated = vanilla_data.to_owned();
286 tr.needs_retranslation = false;
287 }
288 }
289 }
290
291 // If the value is equal to another value in the english translation (but with a different key), we may be able to reuse it.
292 //
293 // Note that this is prone to give wrong translations as it doesn't have any context, so we only do it for lines that are not yet translated.
294 else if tr.value_translated().trim().is_empty() || *tr.needs_retranslation() {
295 if let Some((key, _)) = base_english.iter().find(|(_, value)| *value == tr.value_original()) {
296 if let Some(value_tr) = base_local_fixes.get(key) {
297 tr.value_translated = value_tr.to_owned();
298 tr.needs_retranslation = false;
299 } else if let Some(value_tr) = base_local_tr.get(key) {
300 tr.value_translated = value_tr.to_owned();
301 tr.needs_retranslation = false;
302 }
303 } else if let Some((_, value_tr)) = tr_copy.iter()
304 .find(|(_, tr_copy)| *tr_copy.value_original() == *tr.value_original() && !*tr_copy.needs_retranslation() && *tr.needs_retranslation()) {
305 tr.value_translated = value_tr.value_translated().to_owned();
306 tr.needs_retranslation = false;
307 }
308 }
309 }
310 });
311
312 Ok(translations)
313 }
314
315 // TODO: Move this to the normal merge functions.
316 pub fn sort_and_merge_locs_for_translation(locs: &mut [&RFile]) -> Result<Loc> {
317
318 // We need them in a specific order so the file priority removes unused loc entries from the translation.
319 locs.sort_by(|a, b| a.path_in_container_raw().cmp(b.path_in_container_raw()));
320 let locs = locs.iter()
321 .filter(|file| {
322 if let Some(name) = file.file_name() {
323 !name.is_empty() && name != TRANSLATED_FILE_NAME
324 } else {
325 false
326 }
327 })
328 .filter_map(|file| if let Ok(RFileDecoded::Loc(loc)) = file.decoded() { Some(loc) } else { None })
329 .collect::<Vec<_>>();
330
331 // Once we merge all the locs in the correct order, remove duplicated keys except the first one.
332 let mut merged_loc = Loc::merge(&locs)?;
333 let mut keys_found = HashSet::new();
334 let mut rows_to_delete = vec![];
335 for (index, row) in merged_loc.data().iter().enumerate() {
336 if keys_found.contains(&row[0].data_to_string()) {
337 rows_to_delete.push(index);
338 } else {
339 keys_found.insert(row[0].data_to_string());
340 }
341 }
342
343 rows_to_delete.reverse();
344 for row in &rows_to_delete {
345 merged_loc.data_mut().remove(*row);
346 }
347
348 Ok(merged_loc)
349 }
350
351 /// This function applies a [PackTranslation] to a Pack.
352 pub fn apply(&self, _pack: &mut Pack) -> Result<()> {
353 todo!()
354 }
355
356 /// This function loads a [PackTranslation] to memory from either a local json file, or a remote one.
357 pub fn load(paths: &[PathBuf], pack_name: &str, game_key: &str, language: &str) -> Result<Self> {
358 for path in paths {
359 match Self::load_json(path, pack_name, game_key, language) {
360 Ok(mut tr) => return {
361 for trad in tr.translations_mut() {
362 trad.1.value_translated = trad.1.value_translated.replace("\n||\n", "||");
363 trad.1.value_translated = trad.1.value_translated.replace("\r", "\\\\r");
364 trad.1.value_translated = trad.1.value_translated.replace("\n", "\\\\n");
365 trad.1.value_translated = trad.1.value_translated.replace("\t", "\\\\t");
366 }
367 Ok(tr)
368 },
369 Err(_) => continue,
370 }
371 }
372
373 Err(RLibError::TranslatorCouldNotLoadTranslation)
374 }
375
376 fn load_json(path: &Path, pack_name: &str, game_key: &str, language: &str) -> Result<Self> {
377 let path = path.join(format!("{game_key}/{pack_name}/{language}.json"));
378 let mut file = BufReader::new(File::open(path)?);
379 let mut data = Vec::with_capacity(file.get_ref().metadata()?.len() as usize);
380 file.read_to_end(&mut data)?;
381 serde_json::from_slice(&data).map_err(From::from)
382 }
383
384 /// This function saves a [PackTranslation] from memory to a `.json` file with the provided path.
385 pub fn save(&mut self, path: &Path, game_key: &str) -> Result<()> {
386 let path = path.join(format!("{}/{}/{}.json", game_key, self.pack_name, self.language));
387
388 // Make sure the path exists to avoid problems with updating schemas.
389 if let Some(parent_folder) = path.parent() {
390 DirBuilder::new().recursive(true).create(parent_folder)?;
391 }
392
393 let mut file = BufWriter::new(File::create(&path)?);
394 file.write_all(serde_json::to_string_pretty(&self)?.as_bytes())?;
395 Ok(())
396 }
397
398 pub fn definition() -> Definition {
399 let mut definition = Definition::default();
400
401 // We put the booleans first because they may act as a kind of filter.
402 definition.fields_mut().push(Field { name: "key".to_string(), field_type: FieldType::StringU8, is_key: true, ..Default::default() });
403 definition.fields_mut().push(Field { name: "needs_retranslation".to_string(), field_type: FieldType::Boolean, ..Default::default() });
404 definition.fields_mut().push(Field { name: "removed".to_string(), field_type: FieldType::Boolean, ..Default::default() });
405 definition.fields_mut().push(Field { name: "value_original".to_string(), field_type: FieldType::StringU8, ..Default::default() });
406 definition.fields_mut().push(Field { name: "value_translated".to_string(), field_type: FieldType::StringU8, ..Default::default() });
407
408 definition
409 }
410
411 pub fn from_table(&mut self, table: &TableInMemory) -> Result<()> {
412 self.translations_mut().clear();
413
414 for row in table.data().iter() {
415 let mut tr = Translation::default();
416
417 if let DecodedData::StringU8(ref data) = row[0] {
418 tr.set_key(data.to_owned());
419 }
420
421 if let DecodedData::Boolean(data) = row[1] {
422 tr.set_needs_retranslation(data);
423 }
424
425 if let DecodedData::Boolean(data) = row[2] {
426 tr.set_removed(data);
427 }
428
429 if let DecodedData::StringU8(ref data) = row[3] {
430 tr.set_value_original(data.to_owned());
431 }
432
433 if let DecodedData::StringU8(ref data) = row[4] {
434 tr.set_value_translated(data.to_owned());
435 }
436
437 self.translations_mut().insert(tr.key.to_owned(), tr);
438 }
439
440 Ok(())
441 }
442
443 pub fn to_table(&self) -> Result<TableInMemory> {
444 let definition = Self::definition();
445 let mut table = TableInMemory::new(&definition, None, "");
446
447 // Due to bugs in the table filters, we pre-sort the data by putting stuff that needs to be retranslated at the start.
448 let data = self.translations()
449 .iter()
450 .sorted_by(|(_, tr1), (_, tr2)| Ord::cmp(tr1.key(), tr2.key()))
451 .sorted_by(|(_, tr1), (_, tr2)| Ord::cmp(tr2.needs_retranslation(), tr1.needs_retranslation()))
452 .map(|(_, tr)| vec![
453 DecodedData::StringU8(tr.key().to_owned()),
454 DecodedData::Boolean(*tr.needs_retranslation()),
455 DecodedData::Boolean(*tr.removed()),
456 DecodedData::StringU8(tr.value_original().to_owned()),
457 DecodedData::StringU8(tr.value_translated().to_owned()),
458 ]).collect::<Vec<_>>();
459
460 table.set_data(&data)?;
461 Ok(table)
462 }
463}
464
465/// Special serializer function to sort the translations HashMap before serializing.
466fn ordered_map_translations<S>(value: &HashMap<String, Translation>, serializer: S) -> Result<S::Ok, S::Error> where S: Serializer, {
467 let ordered: BTreeMap<_, _> = value.iter().collect();
468 ordered.serialize(serializer)
469}