Skip to main content

rpfm_extensions/translator/
mod.rs

1//---------------------------------------------------------------------------//
2// Copyright (c) 2017-2026 Ismael Gutiérrez González. All rights reserved.
3//
4// This file is part of the Rusted PackFile Manager (RPFM) project,
5// which can be found here: https://github.com/Frodo45127/rpfm.
6//
7// This file is licensed under the MIT license, which can be found here:
8// https://github.com/Frodo45127/rpfm/blob/master/LICENSE.
9//---------------------------------------------------------------------------//
10
11//! Mod translation and localization support.
12//!
13//! This module provides tools for managing translations of mod content, making it
14//! easier to localize mods for different languages. It tracks translation status,
15//! detects changes in source text, and supports auto-translation from vanilla data.
16//!
17//! # Overview
18//!
19//! The translation system works by:
20//!
21//! 1. Extracting all translatable strings from a pack's Loc files
22//! 2. Storing translations in a separate JSON file alongside the pack
23//! 3. Tracking which translations need updating when source text changes
24//! 4. Auto-translating from vanilla localisation data where possible
25//!
26//! # Translation Files
27//!
28//! Translations are stored in separate JSON files. Each file contains all source
29//! strings and their translations, along with metadata about translation status.
30//!
31//! # Auto-Translation
32//!
33//! The system can automatically translate strings that exist in the game's vanilla
34//! localisation files. This is useful for mods that reference vanilla content or
35//! use similar terminology.
36//!
37//! # Workflow
38//!
39//! 1. Create a [`PackTranslation`] from a pack
40//! 2. Export to a translation file for external editing
41//! 3. Import completed translations
42//! 4. Generate the final translated Loc file for the pack
43//!
44//! # Output
45//!
46//! Translated strings are output to a Loc file that overrides the original mod's
47//! entries. The filename depends on the game:
48//!
49//! - **Warhammer 1 and newer** (except Thrones): `!!!!!!translated_locs.loc` - loads
50//!   first due to its naming, allowing translations to override the original entries
51//! - **Thrones of Britannia and older games**: `localisation.loc`
52//!
53//! # Example
54//!
55//! ```ignore
56//! use rpfm_extensions::translator::PackTranslation;
57//!
58//! // Create translation from pack
59//! let mut translation = PackTranslation::new(
60//!     &[translations_path],
61//!     &pack,
62//!     "warhammer_3",
63//!     "es",  // Spanish
64//!     &dependencies,
65//!     &english_base,
66//!     &local_fixes,
67//! )?;
68//!
69//! // Save translation file
70//! translation.save(&output_path)?;
71//!
72//! // Generate translated Loc file for the pack
73//! let loc_file = translation.generate_loc()?;
74//! ```
75
76use getset::{Getters, MutGetters, Setters};
77use itertools::Itertools;
78use rayon::prelude::*;
79use serde::{Serialize as SerdeSerialize, Serializer};
80use serde_derive::{Serialize, Deserialize};
81
82use std::collections::{BTreeMap, HashMap, HashSet};
83use std::fs::{DirBuilder, File};
84use std::io::{BufReader, BufWriter, Read, Write};
85use std::path::{Path, PathBuf};
86
87use rpfm_lib::error::{Result, RLibError};
88use rpfm_lib::files::{Container, FileType, loc::Loc, pack::Pack, RFile, RFileDecoded, table::{DecodedData, local::TableInMemory, Table}};
89use rpfm_lib::schema::*;
90
91use crate::dependencies::Dependencies;
92
93/// Filename for the generated translated Loc file.
94///
95/// The leading exclamation marks ensure this file loads before other Loc files,
96/// allowing translations to override the original mod's strings.
97pub const TRANSLATED_FILE_NAME: &str = "!!!!!!translated_locs.loc";
98
99/// Full path for the translated Loc file within a pack.
100pub const TRANSLATED_PATH: &str = "text/!!!!!!translated_locs.loc";
101
102/// Legacy path for translated Loc files (for backwards compatibility).
103pub const TRANSLATED_PATH_OLD: &str = "text/localisation.loc";
104
105//-------------------------------------------------------------------------------//
106//                              Enums & Structs
107//-------------------------------------------------------------------------------//
108
109/// Translation data for an entire pack.
110///
111/// Contains all translatable strings from a pack along with their translations
112/// and metadata about translation status.
113///
114/// # Persistence
115///
116/// This struct is serialized to JSON files for storage and can be loaded back
117/// when continuing translation work.
118///
119/// # Parent Translations
120///
121/// When a pack has dependencies, translations from parent mods are also loaded
122/// and used for auto-translation, ensuring consistent terminology across
123/// dependent mods.
124#[derive(Debug, Clone, Default, Getters, MutGetters, Setters, Serialize, Deserialize)]
125#[getset(get = "pub", get_mut = "pub", set = "pub")]
126pub struct PackTranslation {
127
128    /// Target language code for translations (e.g., "es", "de", "fr").
129    language: String,
130
131    /// Name of the pack these translations belong to.
132    pack_name: String,
133
134    /// Map of Loc keys to their translation data.
135    ///
136    /// Keys are the original Loc entry keys from the pack.
137    #[serde(serialize_with = "ordered_map_translations")]
138    translations: HashMap<String, Translation>,
139}
140
141/// Translation entry for a single localizable string.
142///
143/// Tracks both the original and translated text, along with status flags
144/// indicating whether the translation is up-to-date.
145#[derive(Debug, Clone, Default, Getters, MutGetters, Setters, Serialize, Deserialize)]
146#[getset(get = "pub", get_mut = "pub", set = "pub")]
147pub struct Translation {
148
149    /// The Loc key identifying this string.
150    key: String,
151
152    /// Original text in the base language (typically English).
153    ///
154    /// This is used to detect when the source text changes, requiring
155    /// re-translation.
156    value_original: String,
157
158    /// Translated text in the target language.
159    ///
160    /// May be empty if not yet translated.
161    value_translated: String,
162
163    /// Whether this translation needs review.
164    ///
165    /// Set to `true` when the original text changes after translation,
166    /// indicating the translation may be outdated.
167    needs_retranslation: bool,
168
169    /// Whether this string has been removed from the source pack.
170    ///
171    /// Translations for removed strings are kept for reference but marked
172    /// as removed. If the string reappears, it will be flagged for re-translation.
173    removed: bool,
174}
175
176//-------------------------------------------------------------------------------//
177//                             Implementations
178//-------------------------------------------------------------------------------//
179
180impl PackTranslation {
181
182    pub fn new(paths: &[PathBuf], pack: &Pack, game_key: &str, language: &str, dependencies: &Dependencies, base_english: &HashMap<String, String>, base_local_fixes: &HashMap<String, String>) -> Result<Self> {
183        let mut translations = Self::load(paths, &pack.disk_file_name(), game_key, language).unwrap_or_else(|_| {
184            Self {
185                language: language.to_owned(),
186                pack_name: pack.disk_file_name(),
187                ..Default::default()
188            }
189        });
190
191        // If the pack has dependencies, we have to try to load their translations too, then patch the live dependencies with them.
192        // Otherwise, we'll have a situation where data is compared and imported from the wrong language.
193        let mut parent_tr = vec![];
194        for (_, pack_name) in pack.dependencies() {
195            if let Ok(ptr) = Self::load(paths, pack_name, game_key, language) {
196                parent_tr.push(ptr);
197            }
198        }
199
200        // Once we got the previous translation loaded, get the files to translate from the Pack, updating our translation.
201        let mut locs = pack.files_by_type(&[FileType::Loc]);
202        let merged_loc = Self::sort_and_merge_locs_for_translation(&mut locs)?;
203        let merged_loc_data = merged_loc.data();
204        let merged_loc_hash = merged_loc_data
205            .par_iter()
206            .map(|x| (x[0].data_to_string(), x[1].data_to_string()))
207            .collect::<HashMap<_,_>>();
208
209        // Once we have the clean list of loc entries we have in our Pack, we need to update the translation with it.
210        // First we do a pass to mark all removed translations as such. This is separated from the rest because this pass is way slower than the rest.
211        for (tr_key, tr) in translations.translations_mut() {
212            let was_removed = tr.removed;
213            tr.removed = !merged_loc_hash.contains_key(&**tr_key);
214
215            // If the line has been removed, unmark it for translation.
216            // If the line has been re-added, only flag for retranslation if the original value changed or there's no translation yet.
217            if tr.removed {
218                tr.needs_retranslation = false;
219            } else if was_removed {
220                if let Some(current_value) = merged_loc_hash.get(&**tr_key) {
221                    tr.needs_retranslation = tr.value_translated.is_empty() || *current_value != tr.value_original;
222                }
223            }
224        }
225
226        // Next, we update the translations data with the loc data of the merged loc.
227        for row in merged_loc.data().iter() {
228            let key = row[0].data_to_string();
229            let value = row[1].data_to_string();
230
231            match translations.translations.get_mut(&*key) {
232                Some(tr) => {
233                    if value != tr.value_original {
234                        tr.value_original = value.to_string();
235                        tr.needs_retranslation = true;
236                    }
237                },
238
239                None => {
240                    let tr = Translation {
241                        key: key.to_string(),
242                        value_original: value.to_string(),
243                        value_translated: String::new(),
244                        needs_retranslation: true,
245                        removed: false,
246                    };
247
248                    translations.translations.insert(key.to_string(), tr);
249                }
250            }
251        }
252
253        // Lastly, we do an auto-translation pass. We have two copies of base local: one normal and one patched with parent translations.
254        // This is needed because the base localisation data doesn't have the translation data for parent mods included.
255        let mut base_local_tr = dependencies.localisation_data().clone();
256        for ptr in parent_tr {
257            for (key, val) in ptr.translations() {
258                if !*val.needs_retranslation() && !val.value_translated().is_empty() {
259                    if let Some(ptr_val) = base_local_tr.get_mut(key) {
260                        *ptr_val = val.value_translated().to_string();
261                    }
262                }
263            }
264        }
265
266        let tr_copy = translations.translations().clone();
267        translations.translations_mut().par_iter_mut().for_each(|(tr_key, tr)| {
268            if !tr.removed {
269
270                // Mark empty lines as translated.
271                if tr.value_original().trim().is_empty() && tr.value_translated().trim().is_empty() {
272                    tr.value_translated = tr.value_original.to_owned();
273                    tr.needs_retranslation = false;
274                }
275
276                // If the value is unchanged from english, just copy the vanilla translation.
277                //
278                // NOTE: This is really a patch for packs not using optimizing pass, because the optimizer actually removes these entries.
279                else if let Some(vanilla_data) = base_english.get(tr_key) {
280                    if tr.value_original() == vanilla_data {
281                        if let Some(vanilla_data) = base_local_fixes.get(tr_key) {
282                            tr.value_translated = vanilla_data.to_owned();
283                            tr.needs_retranslation = false;
284                        } else if let Some(vanilla_data) = base_local_tr.get(tr_key) {
285                            tr.value_translated = vanilla_data.to_owned();
286                            tr.needs_retranslation = false;
287                        }
288                    }
289                }
290
291                // If the value is equal to another value in the english translation (but with a different key), we may be able to reuse it.
292                //
293                // Note that this is prone to give wrong translations as it doesn't have any context, so we only do it for lines that are not yet translated.
294                else if tr.value_translated().trim().is_empty() || *tr.needs_retranslation() {
295                    if let Some((key, _)) = base_english.iter().find(|(_, value)| *value == tr.value_original()) {
296                        if let Some(value_tr) = base_local_fixes.get(key) {
297                            tr.value_translated = value_tr.to_owned();
298                            tr.needs_retranslation = false;
299                        } else if let Some(value_tr) = base_local_tr.get(key) {
300                            tr.value_translated = value_tr.to_owned();
301                            tr.needs_retranslation = false;
302                        }
303                    } else if let Some((_, value_tr)) = tr_copy.iter()
304                        .find(|(_, tr_copy)| *tr_copy.value_original() == *tr.value_original() && !*tr_copy.needs_retranslation() && *tr.needs_retranslation()) {
305                        tr.value_translated = value_tr.value_translated().to_owned();
306                        tr.needs_retranslation = false;
307                    }
308                }
309            }
310        });
311
312        Ok(translations)
313    }
314
315    // TODO: Move this to the normal merge functions.
316    pub fn sort_and_merge_locs_for_translation(locs: &mut [&RFile]) -> Result<Loc> {
317
318        // We need them in a specific order so the file priority removes unused loc entries from the translation.
319        locs.sort_by(|a, b| a.path_in_container_raw().cmp(b.path_in_container_raw()));
320        let locs = locs.iter()
321            .filter(|file| {
322                if let Some(name) = file.file_name() {
323                    !name.is_empty() && name != TRANSLATED_FILE_NAME
324                } else {
325                    false
326                }
327            })
328            .filter_map(|file| if let Ok(RFileDecoded::Loc(loc)) = file.decoded() { Some(loc) } else { None })
329            .collect::<Vec<_>>();
330
331        // Once we merge all the locs in the correct order, remove duplicated keys except the first one.
332        let mut merged_loc = Loc::merge(&locs)?;
333        let mut keys_found = HashSet::new();
334        let mut rows_to_delete = vec![];
335        for (index, row) in merged_loc.data().iter().enumerate() {
336            if keys_found.contains(&row[0].data_to_string()) {
337                rows_to_delete.push(index);
338            } else {
339                keys_found.insert(row[0].data_to_string());
340            }
341        }
342
343        rows_to_delete.reverse();
344        for row in &rows_to_delete {
345            merged_loc.data_mut().remove(*row);
346        }
347
348        Ok(merged_loc)
349    }
350
351    /// This function applies a [PackTranslation] to a Pack.
352    pub fn apply(&self, _pack: &mut Pack) -> Result<()> {
353        todo!()
354    }
355
356    /// This function loads a [PackTranslation] to memory from either a local json file, or a remote one.
357    pub fn load(paths: &[PathBuf], pack_name: &str, game_key: &str, language: &str) -> Result<Self> {
358        for path in paths {
359            match Self::load_json(path, pack_name, game_key, language) {
360                Ok(mut tr) => return {
361                    for trad in tr.translations_mut() {
362                        trad.1.value_translated = trad.1.value_translated.replace("\n||\n", "||");
363                        trad.1.value_translated = trad.1.value_translated.replace("\r", "\\\\r");
364                        trad.1.value_translated = trad.1.value_translated.replace("\n", "\\\\n");
365                        trad.1.value_translated = trad.1.value_translated.replace("\t", "\\\\t");
366                    }
367                    Ok(tr)
368                },
369                Err(_) => continue,
370            }
371        }
372
373        Err(RLibError::TranslatorCouldNotLoadTranslation)
374    }
375
376    fn load_json(path: &Path, pack_name: &str, game_key: &str, language: &str) -> Result<Self> {
377        let path = path.join(format!("{game_key}/{pack_name}/{language}.json"));
378        let mut file = BufReader::new(File::open(path)?);
379        let mut data = Vec::with_capacity(file.get_ref().metadata()?.len() as usize);
380        file.read_to_end(&mut data)?;
381        serde_json::from_slice(&data).map_err(From::from)
382    }
383
384    /// This function saves a [PackTranslation] from memory to a `.json` file with the provided path.
385    pub fn save(&mut self, path: &Path, game_key: &str) -> Result<()> {
386        let path = path.join(format!("{}/{}/{}.json", game_key, self.pack_name, self.language));
387
388        // Make sure the path exists to avoid problems with updating schemas.
389        if let Some(parent_folder) = path.parent() {
390            DirBuilder::new().recursive(true).create(parent_folder)?;
391        }
392
393        let mut file = BufWriter::new(File::create(&path)?);
394        file.write_all(serde_json::to_string_pretty(&self)?.as_bytes())?;
395        Ok(())
396    }
397
398    pub fn definition() -> Definition {
399        let mut definition = Definition::default();
400
401        // We put the booleans first because they may act as a kind of filter.
402        definition.fields_mut().push(Field { name: "key".to_string(), field_type: FieldType::StringU8, is_key: true, ..Default::default() });
403        definition.fields_mut().push(Field { name: "needs_retranslation".to_string(), field_type: FieldType::Boolean, ..Default::default() });
404        definition.fields_mut().push(Field { name: "removed".to_string(), field_type: FieldType::Boolean, ..Default::default() });
405        definition.fields_mut().push(Field { name: "value_original".to_string(), field_type: FieldType::StringU8, ..Default::default() });
406        definition.fields_mut().push(Field { name: "value_translated".to_string(), field_type: FieldType::StringU8, ..Default::default() });
407
408        definition
409    }
410
411    pub fn from_table(&mut self, table: &TableInMemory) -> Result<()> {
412        self.translations_mut().clear();
413
414        for row in table.data().iter() {
415            let mut tr = Translation::default();
416
417            if let DecodedData::StringU8(ref data) = row[0] {
418                tr.set_key(data.to_owned());
419            }
420
421            if let DecodedData::Boolean(data) = row[1] {
422                tr.set_needs_retranslation(data);
423            }
424
425            if let DecodedData::Boolean(data) = row[2] {
426                tr.set_removed(data);
427            }
428
429            if let DecodedData::StringU8(ref data) = row[3] {
430                tr.set_value_original(data.to_owned());
431            }
432
433            if let DecodedData::StringU8(ref data) = row[4] {
434                tr.set_value_translated(data.to_owned());
435            }
436
437            self.translations_mut().insert(tr.key.to_owned(), tr);
438        }
439
440        Ok(())
441    }
442
443    pub fn to_table(&self) -> Result<TableInMemory> {
444        let definition = Self::definition();
445        let mut table = TableInMemory::new(&definition, None, "");
446
447        // Due to bugs in the table filters, we pre-sort the data by putting stuff that needs to be retranslated at the start.
448        let data = self.translations()
449            .iter()
450            .sorted_by(|(_, tr1), (_, tr2)| Ord::cmp(tr1.key(), tr2.key()))
451            .sorted_by(|(_, tr1), (_, tr2)| Ord::cmp(tr2.needs_retranslation(), tr1.needs_retranslation()))
452            .map(|(_, tr)| vec![
453                DecodedData::StringU8(tr.key().to_owned()),
454                DecodedData::Boolean(*tr.needs_retranslation()),
455                DecodedData::Boolean(*tr.removed()),
456                DecodedData::StringU8(tr.value_original().to_owned()),
457                DecodedData::StringU8(tr.value_translated().to_owned()),
458            ]).collect::<Vec<_>>();
459
460        table.set_data(&data)?;
461        Ok(table)
462    }
463}
464
465/// Special serializer function to sort the translations HashMap before serializing.
466fn ordered_map_translations<S>(value: &HashMap<String, Translation>, serializer: S) -> Result<S::Ok, S::Error> where S: Serializer, {
467    let ordered: BTreeMap<_, _> = value.iter().collect();
468    ordered.serialize(serializer)
469}