Skip to main content

rpfm_extensions/translator/
mod.rs

1//---------------------------------------------------------------------------//
2// Copyright (c) 2017-2026 Ismael Gutiérrez González. All rights reserved.
3//
4// This file is part of the Rusted PackFile Manager (RPFM) project,
5// which can be found here: https://github.com/Frodo45127/rpfm.
6//
7// This file is licensed under the MIT license, which can be found here:
8// https://github.com/Frodo45127/rpfm/blob/master/LICENSE.
9//---------------------------------------------------------------------------//
10
11//! Mod translation and localization support.
12//!
13//! This module provides tools for managing translations of mod content, making it
14//! easier to localize mods for different languages. It tracks translation status,
15//! detects changes in source text, and supports auto-translation from vanilla data.
16//!
17//! # Overview
18//!
19//! The translation system works by:
20//!
21//! 1. Extracting all translatable strings from a pack's Loc files
22//! 2. Storing translations in a separate JSON file alongside the pack
23//! 3. Tracking which translations need updating when source text changes
24//! 4. Auto-translating from vanilla localisation data where possible
25//!
26//! # Translation Files
27//!
28//! Translations are stored in separate JSON files. Each file contains all source
29//! strings and their translations, along with metadata about translation status.
30//!
31//! # Auto-Translation
32//!
33//! The system can automatically translate strings that exist in the game's vanilla
34//! localisation files. This is useful for mods that reference vanilla content or
35//! use similar terminology.
36//!
37//! # Workflow
38//!
39//! 1. Create a [`PackTranslation`] from a pack
40//! 2. Export to a translation file for external editing
41//! 3. Import completed translations
42//! 4. Generate the final translated Loc file for the pack
43//!
44//! # Output
45//!
46//! Translated strings are output to a Loc file that overrides the original mod's
47//! entries. The filename depends on the game:
48//!
49//! - **Warhammer 1 and newer** (except Thrones): `!!!!!!translated_locs.loc` - loads
50//!   first due to its naming, allowing translations to override the original entries
51//! - **Thrones of Britannia and older games**: `localisation.loc`
52//!
53//! # Example
54//!
55//! ```ignore
56//! use rpfm_extensions::translator::PackTranslation;
57//!
58//! // Create translation from pack
59//! let mut translation = PackTranslation::new(
60//!     &[translations_path],
61//!     &pack,
62//!     "warhammer_3",
63//!     "es",  // Spanish
64//!     &dependencies,
65//!     &english_base,
66//!     &local_fixes,
67//! )?;
68//!
69//! // Save translation file
70//! translation.save(&output_path)?;
71//!
72//! // Generate translated Loc file for the pack
73//! let loc_file = translation.generate_loc()?;
74//! ```
75
76use getset::{Getters, MutGetters, Setters};
77use itertools::Itertools;
78use rayon::prelude::*;
79use serde::{Serialize as SerdeSerialize, Serializer};
80use serde_derive::{Serialize, Deserialize};
81
82use std::collections::{BTreeMap, HashMap, HashSet};
83use std::fs::{DirBuilder, File};
84use std::io::{BufReader, BufWriter, Read, Write};
85use std::path::{Path, PathBuf};
86
87use rpfm_lib::error::{Result, RLibError};
88use rpfm_lib::files::{Container, FileType, loc::Loc, pack::Pack, RFile, RFileDecoded, table::{DecodedData, local::TableInMemory, Table}};
89use rpfm_lib::schema::*;
90
91use crate::dependencies::Dependencies;
92
93/// Filename for the generated translated Loc file.
94///
95/// The leading exclamation marks ensure this file loads before other Loc files,
96/// allowing translations to override the original mod's strings.
97pub const TRANSLATED_FILE_NAME: &str = "!!!!!!translated_locs.loc";
98
99/// Full path for the translated Loc file within a pack.
100pub const TRANSLATED_PATH: &str = "text/!!!!!!translated_locs.loc";
101
102/// Legacy path for translated Loc files (for backwards compatibility).
103pub const TRANSLATED_PATH_OLD: &str = "text/localisation.loc";
104
105//-------------------------------------------------------------------------------//
106//                              Enums & Structs
107//-------------------------------------------------------------------------------//
108
109/// Translation data for an entire pack.
110///
111/// Contains all translatable strings from a pack along with their translations
112/// and metadata about translation status.
113///
114/// # Persistence
115///
116/// This struct is serialized to JSON files for storage and can be loaded back
117/// when continuing translation work.
118///
119/// # Parent Translations
120///
121/// When a pack has dependencies, translations from parent mods are also loaded
122/// and used for auto-translation, ensuring consistent terminology across
123/// dependent mods.
124#[derive(Debug, Clone, Default, Getters, MutGetters, Setters, Serialize, Deserialize)]
125#[getset(get = "pub", get_mut = "pub", set = "pub")]
126pub struct PackTranslation {
127
128    /// Target language code for translations (e.g., "es", "de", "fr").
129    language: String,
130
131    /// Name of the pack these translations belong to.
132    pack_name: String,
133
134    /// Map of Loc keys to their translation data.
135    ///
136    /// Keys are the original Loc entry keys from the pack.
137    #[serde(serialize_with = "ordered_map_translations")]
138    translations: HashMap<String, Translation>,
139}
140
141/// Translation entry for a single localizable string.
142///
143/// Tracks both the original and translated text, along with status flags
144/// indicating whether the translation is up-to-date.
145#[derive(Debug, Clone, Default, Getters, MutGetters, Setters, Serialize, Deserialize)]
146#[getset(get = "pub", get_mut = "pub", set = "pub")]
147pub struct Translation {
148
149    /// The Loc key identifying this string.
150    key: String,
151
152    /// Original text in the base language (typically English).
153    ///
154    /// This is used to detect when the source text changes, requiring
155    /// re-translation.
156    value_original: String,
157
158    /// Translated text in the target language.
159    ///
160    /// May be empty if not yet translated.
161    value_translated: String,
162
163    /// Whether this translation needs review.
164    ///
165    /// Set to `true` when the original text changes after translation,
166    /// indicating the translation may be outdated.
167    needs_retranslation: bool,
168
169    /// Whether this string has been removed from the source pack.
170    ///
171    /// Translations for removed strings are kept for reference but marked
172    /// as removed. If the string reappears, it will be flagged for re-translation.
173    removed: bool,
174}
175
176//-------------------------------------------------------------------------------//
177//                             Implementations
178//-------------------------------------------------------------------------------//
179
180impl PackTranslation {
181
182    pub fn new(paths: &[PathBuf], pack: &Pack, game_key: &str, language: &str, dependencies: &Dependencies, base_english: &HashMap<String, String>, base_local_fixes: &HashMap<String, String>) -> Result<Self> {
183        let mut translations = Self::load(paths, &pack.disk_file_name(), game_key, language).unwrap_or_else(|_| {
184            let mut tr = Self::default();
185            tr.language = language.to_owned();
186            tr.pack_name = pack.disk_file_name();
187            tr
188        });
189
190        // If the pack has dependencies, we have to try to load their translations too, then patch the live dependencies with them.
191        // Otherwise, we'll have a situation where data is compared and imported from the wrong language.
192        let mut parent_tr = vec![];
193        for (_, pack_name) in pack.dependencies() {
194            if let Ok(ptr) = Self::load(paths, pack_name, game_key, language) {
195                parent_tr.push(ptr);
196            }
197        }
198
199        // Once we got the previous translation loaded, get the files to translate from the Pack, updating our translation.
200        let mut locs = pack.files_by_type(&[FileType::Loc]);
201        let merged_loc = Self::sort_and_merge_locs_for_translation(&mut locs)?;
202        let merged_loc_data = merged_loc.data();
203        let merged_loc_hash = merged_loc_data
204            .par_iter()
205            .map(|x| (x[0].data_to_string(), x[1].data_to_string()))
206            .collect::<HashMap<_,_>>();
207
208        // Once we have the clean list of loc entries we have in our Pack, we need to update the translation with it.
209        // First we do a pass to mark all removed translations as such. This is separated from the rest because this pass is way slower than the rest.
210        for (tr_key, tr) in translations.translations_mut() {
211            let was_removed = tr.removed;
212            tr.removed = !merged_loc_hash.contains_key(&**tr_key);
213
214            // If the line has been removed, unmark it for translation.
215            // If the line has been re-added, only flag for retranslation if the original value changed or there's no translation yet.
216            if tr.removed {
217                tr.needs_retranslation = false;
218            } else if was_removed {
219                if let Some(current_value) = merged_loc_hash.get(&**tr_key) {
220                    tr.needs_retranslation = tr.value_translated.is_empty() || *current_value != tr.value_original;
221                }
222            }
223        }
224
225        // Next, we update the translations data with the loc data of the merged loc.
226        for row in merged_loc.data().iter() {
227            let key = row[0].data_to_string();
228            let value = row[1].data_to_string();
229
230            match translations.translations.get_mut(&*key) {
231                Some(tr) => {
232                    if value != tr.value_original {
233                        tr.value_original = value.to_string();
234                        tr.needs_retranslation = true;
235                    }
236                },
237
238                None => {
239                    let tr = Translation {
240                        key: key.to_string(),
241                        value_original: value.to_string(),
242                        value_translated: String::new(),
243                        needs_retranslation: true,
244                        removed: false,
245                    };
246
247                    translations.translations.insert(key.to_string(), tr);
248                }
249            }
250        }
251
252        // Lastly, we do an auto-translation pass. We have two copies of base local: one normal and one patched with parent translations.
253        // This is needed because the base localisation data doesn't have the translation data for parent mods included.
254        let mut base_local_tr = dependencies.localisation_data().clone();
255        for ptr in parent_tr {
256            for (key, val) in ptr.translations() {
257                if !*val.needs_retranslation() && !val.value_translated().is_empty() {
258                    if let Some(ptr_val) = base_local_tr.get_mut(key) {
259                        *ptr_val = val.value_translated().to_string();
260                    }
261                }
262            }
263        }
264
265        let tr_copy = translations.translations().clone();
266        translations.translations_mut().par_iter_mut().for_each(|(tr_key, tr)| {
267            if !tr.removed {
268
269                // Mark empty lines as translated.
270                if tr.value_original().trim().is_empty() && tr.value_translated().trim().is_empty() {
271                    tr.value_translated = tr.value_original.to_owned();
272                    tr.needs_retranslation = false;
273                }
274
275                // If the value is unchanged from english, just copy the vanilla translation.
276                //
277                // NOTE: This is really a patch for packs not using optimizing pass, because the optimizer actually removes these entries.
278                else if let Some(vanilla_data) = base_english.get(tr_key) {
279                    if tr.value_original() == vanilla_data {
280                        if let Some(vanilla_data) = base_local_fixes.get(tr_key) {
281                            tr.value_translated = vanilla_data.to_owned();
282                            tr.needs_retranslation = false;
283                        } else if let Some(vanilla_data) = base_local_tr.get(tr_key) {
284                            tr.value_translated = vanilla_data.to_owned();
285                            tr.needs_retranslation = false;
286                        }
287                    }
288                }
289
290                // If the value is equal to another value in the english translation (but with a different key), we may be able to reuse it.
291                //
292                // Note that this is prone to give wrong translations as it doesn't have any context, so we only do it for lines that are not yet translated.
293                else if tr.value_translated().trim().is_empty() || *tr.needs_retranslation() {
294                    if let Some((key, _)) = base_english.iter().find(|(_, value)| *value == tr.value_original()) {
295                        if let Some(value_tr) = base_local_fixes.get(key) {
296                            tr.value_translated = value_tr.to_owned();
297                            tr.needs_retranslation = false;
298                        } else if let Some(value_tr) = base_local_tr.get(key) {
299                            tr.value_translated = value_tr.to_owned();
300                            tr.needs_retranslation = false;
301                        }
302                    } else if let Some((_, value_tr)) = tr_copy.iter()
303                        .find(|(_, tr_copy)| *tr_copy.value_original() == *tr.value_original() && !*tr_copy.needs_retranslation() && *tr.needs_retranslation()) {
304                        tr.value_translated = value_tr.value_translated().to_owned();
305                        tr.needs_retranslation = false;
306                    }
307                }
308            }
309        });
310
311        Ok(translations)
312    }
313
314    // TODO: Move this to the normal merge functions.
315    pub fn sort_and_merge_locs_for_translation(locs: &mut [&RFile]) -> Result<Loc> {
316
317        // We need them in a specific order so the file priority removes unused loc entries from the translation.
318        locs.sort_by(|a, b| a.path_in_container_raw().cmp(b.path_in_container_raw()));
319        let locs = locs.iter()
320            .filter(|file| {
321                if let Some(name) = file.file_name() {
322                    !name.is_empty() && name != TRANSLATED_FILE_NAME
323                } else {
324                    false
325                }
326            })
327            .filter_map(|file| if let Ok(RFileDecoded::Loc(loc)) = file.decoded() { Some(loc) } else { None })
328            .collect::<Vec<_>>();
329
330        // Once we merge all the locs in the correct order, remove duplicated keys except the first one.
331        let mut merged_loc = Loc::merge(&locs)?;
332        let mut keys_found = HashSet::new();
333        let mut rows_to_delete = vec![];
334        for (index, row) in merged_loc.data().iter().enumerate() {
335            if keys_found.contains(&row[0].data_to_string()) {
336                rows_to_delete.push(index);
337            } else {
338                keys_found.insert(row[0].data_to_string());
339            }
340        }
341
342        rows_to_delete.reverse();
343        for row in &rows_to_delete {
344            merged_loc.data_mut().remove(*row);
345        }
346
347        Ok(merged_loc)
348    }
349
350    /// This function applies a [PackTranslation] to a Pack.
351    pub fn apply(&self, _pack: &mut Pack) -> Result<()> {
352        todo!()
353    }
354
355    /// This function loads a [PackTranslation] to memory from either a local json file, or a remote one.
356    pub fn load(paths: &[PathBuf], pack_name: &str, game_key: &str, language: &str) -> Result<Self> {
357        for path in paths {
358            match Self::load_json(path, pack_name, game_key, language) {
359                Ok(mut tr) => return {
360                    for trad in tr.translations_mut() {
361                        trad.1.value_translated = trad.1.value_translated.replace("\n||\n", "||");
362                        trad.1.value_translated = trad.1.value_translated.replace("\r", "\\\\r");
363                        trad.1.value_translated = trad.1.value_translated.replace("\n", "\\\\n");
364                        trad.1.value_translated = trad.1.value_translated.replace("\t", "\\\\t");
365                    }
366                    Ok(tr)
367                },
368                Err(_) => continue,
369            }
370        }
371
372        Err(RLibError::TranslatorCouldNotLoadTranslation)
373    }
374
375    fn load_json(path: &Path, pack_name: &str, game_key: &str, language: &str) -> Result<Self> {
376        let path = path.join(format!("{game_key}/{pack_name}/{language}.json"));
377        let mut file = BufReader::new(File::open(path)?);
378        let mut data = Vec::with_capacity(file.get_ref().metadata()?.len() as usize);
379        file.read_to_end(&mut data)?;
380        serde_json::from_slice(&data).map_err(From::from)
381    }
382
383    /// This function saves a [PackTranslation] from memory to a `.json` file with the provided path.
384    pub fn save(&mut self, path: &Path, game_key: &str) -> Result<()> {
385        let path = path.join(format!("{}/{}/{}.json", game_key, self.pack_name, self.language));
386
387        // Make sure the path exists to avoid problems with updating schemas.
388        if let Some(parent_folder) = path.parent() {
389            DirBuilder::new().recursive(true).create(parent_folder)?;
390        }
391
392        let mut file = BufWriter::new(File::create(&path)?);
393        file.write_all(serde_json::to_string_pretty(&self)?.as_bytes())?;
394        Ok(())
395    }
396
397    pub fn definition() -> Definition {
398        let mut definition = Definition::default();
399
400        // We put the booleans first because they may act as a kind of filter.
401        definition.fields_mut().push(Field::new("key".to_string(), FieldType::StringU8, true, None, false, None, None, None, String::new(), -1, 0, BTreeMap::new(), None));
402        definition.fields_mut().push(Field::new("needs_retranslation".to_string(), FieldType::Boolean, false, None, false, None, None, None, String::new(), -1, 0, BTreeMap::new(), None));
403        definition.fields_mut().push(Field::new("removed".to_string(), FieldType::Boolean, false, None, false, None, None, None, String::new(), -1, 0, BTreeMap::new(), None));
404        definition.fields_mut().push(Field::new("value_original".to_string(), FieldType::StringU8, false, None, false, None, None, None, String::new(), -1, 0, BTreeMap::new(), None));
405        definition.fields_mut().push(Field::new("value_translated".to_string(), FieldType::StringU8, false, None, false, None, None, None, String::new(), -1, 0, BTreeMap::new(), None));
406
407        definition
408    }
409
410    pub fn from_table(&mut self, table: &TableInMemory) -> Result<()> {
411        self.translations_mut().clear();
412
413        for row in table.data().iter() {
414            let mut tr = Translation::default();
415
416            if let DecodedData::StringU8(ref data) = row[0] {
417                tr.set_key(data.to_owned());
418            }
419
420            if let DecodedData::Boolean(data) = row[1] {
421                tr.set_needs_retranslation(data);
422            }
423
424            if let DecodedData::Boolean(data) = row[2] {
425                tr.set_removed(data);
426            }
427
428            if let DecodedData::StringU8(ref data) = row[3] {
429                tr.set_value_original(data.to_owned());
430            }
431
432            if let DecodedData::StringU8(ref data) = row[4] {
433                tr.set_value_translated(data.to_owned());
434            }
435
436            self.translations_mut().insert(tr.key.to_owned(), tr);
437        }
438
439        Ok(())
440    }
441
442    pub fn to_table(&self) -> Result<TableInMemory> {
443        let definition = Self::definition();
444        let mut table = TableInMemory::new(&definition, None, "");
445
446        // Due to bugs in the table filters, we pre-sort the data by putting stuff that needs to be retranslated at the start.
447        let data = self.translations()
448            .iter()
449            .sorted_by(|(_, tr1), (_, tr2)| Ord::cmp(tr1.key(), tr2.key()))
450            .sorted_by(|(_, tr1), (_, tr2)| Ord::cmp(tr2.needs_retranslation(), tr1.needs_retranslation()))
451            .map(|(_, tr)| vec![
452                DecodedData::StringU8(tr.key().to_owned()),
453                DecodedData::Boolean(*tr.needs_retranslation()),
454                DecodedData::Boolean(*tr.removed()),
455                DecodedData::StringU8(tr.value_original().to_owned()),
456                DecodedData::StringU8(tr.value_translated().to_owned()),
457            ]).collect::<Vec<_>>();
458
459        table.set_data(&data)?;
460        Ok(table)
461    }
462}
463
464/// Special serializer function to sort the translations HashMap before serializing.
465fn ordered_map_translations<S>(value: &HashMap<String, Translation>, serializer: S) -> Result<S::Ok, S::Error> where S: Serializer, {
466    let ordered: BTreeMap<_, _> = value.iter().collect();
467    ordered.serialize(serializer)
468}