rpfm_extensions/translator/mod.rs
1//---------------------------------------------------------------------------//
2// Copyright (c) 2017-2026 Ismael Gutiérrez González. All rights reserved.
3//
4// This file is part of the Rusted PackFile Manager (RPFM) project,
5// which can be found here: https://github.com/Frodo45127/rpfm.
6//
7// This file is licensed under the MIT license, which can be found here:
8// https://github.com/Frodo45127/rpfm/blob/master/LICENSE.
9//---------------------------------------------------------------------------//
10
11//! Mod translation and localization support.
12//!
13//! This module provides tools for managing translations of mod content, making it
14//! easier to localize mods for different languages. It tracks translation status,
15//! detects changes in source text, and supports auto-translation from vanilla data.
16//!
17//! # Overview
18//!
19//! The translation system works by:
20//!
21//! 1. Extracting all translatable strings from a pack's Loc files
22//! 2. Storing translations in a separate JSON file alongside the pack
23//! 3. Tracking which translations need updating when source text changes
24//! 4. Auto-translating from vanilla localisation data where possible
25//!
26//! # Translation Files
27//!
28//! Translations are stored in separate JSON files. Each file contains all source
29//! strings and their translations, along with metadata about translation status.
30//!
31//! # Auto-Translation
32//!
33//! The system can automatically translate strings that exist in the game's vanilla
34//! localisation files. This is useful for mods that reference vanilla content or
35//! use similar terminology.
36//!
37//! # Workflow
38//!
39//! 1. Create a [`PackTranslation`] from a pack
40//! 2. Export to a translation file for external editing
41//! 3. Import completed translations
42//! 4. Generate the final translated Loc file for the pack
43//!
44//! # Output
45//!
46//! Translated strings are output to a Loc file that overrides the original mod's
47//! entries. The filename depends on the game:
48//!
49//! - **Warhammer 1 and newer** (except Thrones): `!!!!!!translated_locs.loc` - loads
50//! first due to its naming, allowing translations to override the original entries
51//! - **Thrones of Britannia and older games**: `localisation.loc`
52//!
53//! # Example
54//!
55//! ```ignore
56//! use rpfm_extensions::translator::PackTranslation;
57//!
58//! // Create translation from pack
59//! let mut translation = PackTranslation::new(
60//! &[translations_path],
61//! &pack,
62//! "warhammer_3",
63//! "es", // Spanish
64//! &dependencies,
65//! &english_base,
66//! &local_fixes,
67//! )?;
68//!
69//! // Save translation file
70//! translation.save(&output_path)?;
71//!
72//! // Generate translated Loc file for the pack
73//! let loc_file = translation.generate_loc()?;
74//! ```
75
76use getset::{Getters, MutGetters, Setters};
77use itertools::Itertools;
78use rayon::prelude::*;
79use serde::{Serialize as SerdeSerialize, Serializer};
80use serde_derive::{Serialize, Deserialize};
81
82use std::collections::{BTreeMap, HashMap, HashSet};
83use std::fs::{DirBuilder, File};
84use std::io::{BufReader, BufWriter, Read, Write};
85use std::path::{Path, PathBuf};
86
87use rpfm_lib::error::{Result, RLibError};
88use rpfm_lib::files::{Container, FileType, loc::Loc, pack::Pack, RFile, RFileDecoded, table::{DecodedData, local::TableInMemory, Table}};
89use rpfm_lib::schema::*;
90
91use crate::dependencies::Dependencies;
92
93/// Filename for the generated translated Loc file.
94///
95/// The leading exclamation marks ensure this file loads before other Loc files,
96/// allowing translations to override the original mod's strings.
97pub const TRANSLATED_FILE_NAME: &str = "!!!!!!translated_locs.loc";
98
99/// Full path for the translated Loc file within a pack.
100pub const TRANSLATED_PATH: &str = "text/!!!!!!translated_locs.loc";
101
102/// Legacy path for translated Loc files (for backwards compatibility).
103pub const TRANSLATED_PATH_OLD: &str = "text/localisation.loc";
104
105//-------------------------------------------------------------------------------//
106// Enums & Structs
107//-------------------------------------------------------------------------------//
108
109/// Translation data for an entire pack.
110///
111/// Contains all translatable strings from a pack along with their translations
112/// and metadata about translation status.
113///
114/// # Persistence
115///
116/// This struct is serialized to JSON files for storage and can be loaded back
117/// when continuing translation work.
118///
119/// # Parent Translations
120///
121/// When a pack has dependencies, translations from parent mods are also loaded
122/// and used for auto-translation, ensuring consistent terminology across
123/// dependent mods.
124#[derive(Debug, Clone, Default, Getters, MutGetters, Setters, Serialize, Deserialize)]
125#[getset(get = "pub", get_mut = "pub", set = "pub")]
126pub struct PackTranslation {
127
128 /// Target language code for translations (e.g., "es", "de", "fr").
129 language: String,
130
131 /// Name of the pack these translations belong to.
132 pack_name: String,
133
134 /// Map of Loc keys to their translation data.
135 ///
136 /// Keys are the original Loc entry keys from the pack.
137 #[serde(serialize_with = "ordered_map_translations")]
138 translations: HashMap<String, Translation>,
139}
140
141/// Translation entry for a single localizable string.
142///
143/// Tracks both the original and translated text, along with status flags
144/// indicating whether the translation is up-to-date.
145#[derive(Debug, Clone, Default, Getters, MutGetters, Setters, Serialize, Deserialize)]
146#[getset(get = "pub", get_mut = "pub", set = "pub")]
147pub struct Translation {
148
149 /// The Loc key identifying this string.
150 key: String,
151
152 /// Original text in the base language (typically English).
153 ///
154 /// This is used to detect when the source text changes, requiring
155 /// re-translation.
156 value_original: String,
157
158 /// Translated text in the target language.
159 ///
160 /// May be empty if not yet translated.
161 value_translated: String,
162
163 /// Whether this translation needs review.
164 ///
165 /// Set to `true` when the original text changes after translation,
166 /// indicating the translation may be outdated.
167 needs_retranslation: bool,
168
169 /// Whether this string has been removed from the source pack.
170 ///
171 /// Translations for removed strings are kept for reference but marked
172 /// as removed. If the string reappears, it will be flagged for re-translation.
173 removed: bool,
174}
175
176//-------------------------------------------------------------------------------//
177// Implementations
178//-------------------------------------------------------------------------------//
179
180impl PackTranslation {
181
182 pub fn new(paths: &[PathBuf], pack: &Pack, game_key: &str, language: &str, dependencies: &Dependencies, base_english: &HashMap<String, String>, base_local_fixes: &HashMap<String, String>) -> Result<Self> {
183 let mut translations = Self::load(paths, &pack.disk_file_name(), game_key, language).unwrap_or_else(|_| {
184 let mut tr = Self::default();
185 tr.language = language.to_owned();
186 tr.pack_name = pack.disk_file_name();
187 tr
188 });
189
190 // If the pack has dependencies, we have to try to load their translations too, then patch the live dependencies with them.
191 // Otherwise, we'll have a situation where data is compared and imported from the wrong language.
192 let mut parent_tr = vec![];
193 for (_, pack_name) in pack.dependencies() {
194 if let Ok(ptr) = Self::load(paths, pack_name, game_key, language) {
195 parent_tr.push(ptr);
196 }
197 }
198
199 // Once we got the previous translation loaded, get the files to translate from the Pack, updating our translation.
200 let mut locs = pack.files_by_type(&[FileType::Loc]);
201 let merged_loc = Self::sort_and_merge_locs_for_translation(&mut locs)?;
202 let merged_loc_data = merged_loc.data();
203 let merged_loc_hash = merged_loc_data
204 .par_iter()
205 .map(|x| (x[0].data_to_string(), x[1].data_to_string()))
206 .collect::<HashMap<_,_>>();
207
208 // Once we have the clean list of loc entries we have in our Pack, we need to update the translation with it.
209 // First we do a pass to mark all removed translations as such. This is separated from the rest because this pass is way slower than the rest.
210 for (tr_key, tr) in translations.translations_mut() {
211 let was_removed = tr.removed;
212 tr.removed = !merged_loc_hash.contains_key(&**tr_key);
213
214 // If the line has been removed, unmark it for translation.
215 // If the line has been re-added, only flag for retranslation if the original value changed or there's no translation yet.
216 if tr.removed {
217 tr.needs_retranslation = false;
218 } else if was_removed {
219 if let Some(current_value) = merged_loc_hash.get(&**tr_key) {
220 tr.needs_retranslation = tr.value_translated.is_empty() || *current_value != tr.value_original;
221 }
222 }
223 }
224
225 // Next, we update the translations data with the loc data of the merged loc.
226 for row in merged_loc.data().iter() {
227 let key = row[0].data_to_string();
228 let value = row[1].data_to_string();
229
230 match translations.translations.get_mut(&*key) {
231 Some(tr) => {
232 if value != tr.value_original {
233 tr.value_original = value.to_string();
234 tr.needs_retranslation = true;
235 }
236 },
237
238 None => {
239 let tr = Translation {
240 key: key.to_string(),
241 value_original: value.to_string(),
242 value_translated: String::new(),
243 needs_retranslation: true,
244 removed: false,
245 };
246
247 translations.translations.insert(key.to_string(), tr);
248 }
249 }
250 }
251
252 // Lastly, we do an auto-translation pass. We have two copies of base local: one normal and one patched with parent translations.
253 // This is needed because the base localisation data doesn't have the translation data for parent mods included.
254 let mut base_local_tr = dependencies.localisation_data().clone();
255 for ptr in parent_tr {
256 for (key, val) in ptr.translations() {
257 if !*val.needs_retranslation() && !val.value_translated().is_empty() {
258 if let Some(ptr_val) = base_local_tr.get_mut(key) {
259 *ptr_val = val.value_translated().to_string();
260 }
261 }
262 }
263 }
264
265 let tr_copy = translations.translations().clone();
266 translations.translations_mut().par_iter_mut().for_each(|(tr_key, tr)| {
267 if !tr.removed {
268
269 // Mark empty lines as translated.
270 if tr.value_original().trim().is_empty() && tr.value_translated().trim().is_empty() {
271 tr.value_translated = tr.value_original.to_owned();
272 tr.needs_retranslation = false;
273 }
274
275 // If the value is unchanged from english, just copy the vanilla translation.
276 //
277 // NOTE: This is really a patch for packs not using optimizing pass, because the optimizer actually removes these entries.
278 else if let Some(vanilla_data) = base_english.get(tr_key) {
279 if tr.value_original() == vanilla_data {
280 if let Some(vanilla_data) = base_local_fixes.get(tr_key) {
281 tr.value_translated = vanilla_data.to_owned();
282 tr.needs_retranslation = false;
283 } else if let Some(vanilla_data) = base_local_tr.get(tr_key) {
284 tr.value_translated = vanilla_data.to_owned();
285 tr.needs_retranslation = false;
286 }
287 }
288 }
289
290 // If the value is equal to another value in the english translation (but with a different key), we may be able to reuse it.
291 //
292 // Note that this is prone to give wrong translations as it doesn't have any context, so we only do it for lines that are not yet translated.
293 else if tr.value_translated().trim().is_empty() || *tr.needs_retranslation() {
294 if let Some((key, _)) = base_english.iter().find(|(_, value)| *value == tr.value_original()) {
295 if let Some(value_tr) = base_local_fixes.get(key) {
296 tr.value_translated = value_tr.to_owned();
297 tr.needs_retranslation = false;
298 } else if let Some(value_tr) = base_local_tr.get(key) {
299 tr.value_translated = value_tr.to_owned();
300 tr.needs_retranslation = false;
301 }
302 } else if let Some((_, value_tr)) = tr_copy.iter()
303 .find(|(_, tr_copy)| *tr_copy.value_original() == *tr.value_original() && !*tr_copy.needs_retranslation() && *tr.needs_retranslation()) {
304 tr.value_translated = value_tr.value_translated().to_owned();
305 tr.needs_retranslation = false;
306 }
307 }
308 }
309 });
310
311 Ok(translations)
312 }
313
314 // TODO: Move this to the normal merge functions.
315 pub fn sort_and_merge_locs_for_translation(locs: &mut [&RFile]) -> Result<Loc> {
316
317 // We need them in a specific order so the file priority removes unused loc entries from the translation.
318 locs.sort_by(|a, b| a.path_in_container_raw().cmp(b.path_in_container_raw()));
319 let locs = locs.iter()
320 .filter(|file| {
321 if let Some(name) = file.file_name() {
322 !name.is_empty() && name != TRANSLATED_FILE_NAME
323 } else {
324 false
325 }
326 })
327 .filter_map(|file| if let Ok(RFileDecoded::Loc(loc)) = file.decoded() { Some(loc) } else { None })
328 .collect::<Vec<_>>();
329
330 // Once we merge all the locs in the correct order, remove duplicated keys except the first one.
331 let mut merged_loc = Loc::merge(&locs)?;
332 let mut keys_found = HashSet::new();
333 let mut rows_to_delete = vec![];
334 for (index, row) in merged_loc.data().iter().enumerate() {
335 if keys_found.contains(&row[0].data_to_string()) {
336 rows_to_delete.push(index);
337 } else {
338 keys_found.insert(row[0].data_to_string());
339 }
340 }
341
342 rows_to_delete.reverse();
343 for row in &rows_to_delete {
344 merged_loc.data_mut().remove(*row);
345 }
346
347 Ok(merged_loc)
348 }
349
350 /// This function applies a [PackTranslation] to a Pack.
351 pub fn apply(&self, _pack: &mut Pack) -> Result<()> {
352 todo!()
353 }
354
355 /// This function loads a [PackTranslation] to memory from either a local json file, or a remote one.
356 pub fn load(paths: &[PathBuf], pack_name: &str, game_key: &str, language: &str) -> Result<Self> {
357 for path in paths {
358 match Self::load_json(path, pack_name, game_key, language) {
359 Ok(mut tr) => return {
360 for trad in tr.translations_mut() {
361 trad.1.value_translated = trad.1.value_translated.replace("\n||\n", "||");
362 trad.1.value_translated = trad.1.value_translated.replace("\r", "\\\\r");
363 trad.1.value_translated = trad.1.value_translated.replace("\n", "\\\\n");
364 trad.1.value_translated = trad.1.value_translated.replace("\t", "\\\\t");
365 }
366 Ok(tr)
367 },
368 Err(_) => continue,
369 }
370 }
371
372 Err(RLibError::TranslatorCouldNotLoadTranslation)
373 }
374
375 fn load_json(path: &Path, pack_name: &str, game_key: &str, language: &str) -> Result<Self> {
376 let path = path.join(format!("{game_key}/{pack_name}/{language}.json"));
377 let mut file = BufReader::new(File::open(path)?);
378 let mut data = Vec::with_capacity(file.get_ref().metadata()?.len() as usize);
379 file.read_to_end(&mut data)?;
380 serde_json::from_slice(&data).map_err(From::from)
381 }
382
383 /// This function saves a [PackTranslation] from memory to a `.json` file with the provided path.
384 pub fn save(&mut self, path: &Path, game_key: &str) -> Result<()> {
385 let path = path.join(format!("{}/{}/{}.json", game_key, self.pack_name, self.language));
386
387 // Make sure the path exists to avoid problems with updating schemas.
388 if let Some(parent_folder) = path.parent() {
389 DirBuilder::new().recursive(true).create(parent_folder)?;
390 }
391
392 let mut file = BufWriter::new(File::create(&path)?);
393 file.write_all(serde_json::to_string_pretty(&self)?.as_bytes())?;
394 Ok(())
395 }
396
397 pub fn definition() -> Definition {
398 let mut definition = Definition::default();
399
400 // We put the booleans first because they may act as a kind of filter.
401 definition.fields_mut().push(Field::new("key".to_string(), FieldType::StringU8, true, None, false, None, None, None, String::new(), -1, 0, BTreeMap::new(), None));
402 definition.fields_mut().push(Field::new("needs_retranslation".to_string(), FieldType::Boolean, false, None, false, None, None, None, String::new(), -1, 0, BTreeMap::new(), None));
403 definition.fields_mut().push(Field::new("removed".to_string(), FieldType::Boolean, false, None, false, None, None, None, String::new(), -1, 0, BTreeMap::new(), None));
404 definition.fields_mut().push(Field::new("value_original".to_string(), FieldType::StringU8, false, None, false, None, None, None, String::new(), -1, 0, BTreeMap::new(), None));
405 definition.fields_mut().push(Field::new("value_translated".to_string(), FieldType::StringU8, false, None, false, None, None, None, String::new(), -1, 0, BTreeMap::new(), None));
406
407 definition
408 }
409
410 pub fn from_table(&mut self, table: &TableInMemory) -> Result<()> {
411 self.translations_mut().clear();
412
413 for row in table.data().iter() {
414 let mut tr = Translation::default();
415
416 if let DecodedData::StringU8(ref data) = row[0] {
417 tr.set_key(data.to_owned());
418 }
419
420 if let DecodedData::Boolean(data) = row[1] {
421 tr.set_needs_retranslation(data);
422 }
423
424 if let DecodedData::Boolean(data) = row[2] {
425 tr.set_removed(data);
426 }
427
428 if let DecodedData::StringU8(ref data) = row[3] {
429 tr.set_value_original(data.to_owned());
430 }
431
432 if let DecodedData::StringU8(ref data) = row[4] {
433 tr.set_value_translated(data.to_owned());
434 }
435
436 self.translations_mut().insert(tr.key.to_owned(), tr);
437 }
438
439 Ok(())
440 }
441
442 pub fn to_table(&self) -> Result<TableInMemory> {
443 let definition = Self::definition();
444 let mut table = TableInMemory::new(&definition, None, "");
445
446 // Due to bugs in the table filters, we pre-sort the data by putting stuff that needs to be retranslated at the start.
447 let data = self.translations()
448 .iter()
449 .sorted_by(|(_, tr1), (_, tr2)| Ord::cmp(tr1.key(), tr2.key()))
450 .sorted_by(|(_, tr1), (_, tr2)| Ord::cmp(tr2.needs_retranslation(), tr1.needs_retranslation()))
451 .map(|(_, tr)| vec![
452 DecodedData::StringU8(tr.key().to_owned()),
453 DecodedData::Boolean(*tr.needs_retranslation()),
454 DecodedData::Boolean(*tr.removed()),
455 DecodedData::StringU8(tr.value_original().to_owned()),
456 DecodedData::StringU8(tr.value_translated().to_owned()),
457 ]).collect::<Vec<_>>();
458
459 table.set_data(&data)?;
460 Ok(table)
461 }
462}
463
464/// Special serializer function to sort the translations HashMap before serializing.
465fn ordered_map_translations<S>(value: &HashMap<String, Translation>, serializer: S) -> Result<S::Ok, S::Error> where S: Serializer, {
466 let ordered: BTreeMap<_, _> = value.iter().collect();
467 ordered.serialize(serializer)
468}