Skip to main content

rpfm_extensions/translator/
mod.rs

1//---------------------------------------------------------------------------//
2// Copyright (c) 2017-2026 Ismael Gutiérrez González. All rights reserved.
3//
4// This file is part of the Rusted PackFile Manager (RPFM) project,
5// which can be found here: https://github.com/Frodo45127/rpfm.
6//
7// This file is licensed under the MIT license, which can be found here:
8// https://github.com/Frodo45127/rpfm/blob/master/LICENSE.
9//---------------------------------------------------------------------------//
10
11//! Mod translation and localization support.
12//!
13//! This module provides tools for managing translations of mod content, making it
14//! easier to localize mods for different languages. It tracks translation status,
15//! detects changes in source text, and supports auto-translation from vanilla data.
16//!
17//! # Overview
18//!
19//! The translation system works by:
20//!
21//! 1. Extracting all translatable strings from a pack's Loc files
22//! 2. Storing translations in a separate JSON file alongside the pack
23//! 3. Tracking which translations need updating when source text changes
24//! 4. Auto-translating from vanilla localisation data where possible
25//!
26//! # Translation Files
27//!
28//! Translations are stored in separate JSON files. Each file contains all source
29//! strings and their translations, along with metadata about translation status.
30//!
31//! # Auto-Translation
32//!
33//! The system can automatically translate strings that exist in the game's vanilla
34//! localisation files. This is useful for mods that reference vanilla content or
35//! use similar terminology.
36//!
37//! # Workflow
38//!
39//! 1. Create a [`PackTranslation`] from a pack
40//! 2. Export to a translation file for external editing
41//! 3. Import completed translations
42//! 4. Generate the final translated Loc file for the pack
43//!
44//! # Output
45//!
46//! Translated strings are output to a Loc file that overrides the original mod's
47//! entries. The filename depends on the game:
48//!
49//! - **Warhammer 1 and newer** (except Thrones): `!!!!!!translated_locs.loc` - loads
50//!   first due to its naming, allowing translations to override the original entries
51//! - **Thrones of Britannia and older games**: `localisation.loc`
52//!
53//! # Example
54//!
55//! ```ignore
56//! use rpfm_extensions::translator::PackTranslation;
57//!
58//! // Create translation from pack
59//! let mut translation = PackTranslation::new(
60//!     &[translations_path],
61//!     &pack,
62//!     "warhammer_3",
63//!     "es",  // Spanish
64//!     &dependencies,
65//!     &english_base,
66//!     &local_fixes,
67//! )?;
68//!
69//! // Save translation file
70//! translation.save(&output_path)?;
71//!
72//! // Generate translated Loc file for the pack
73//! let loc_file = translation.generate_loc()?;
74//! ```
75
76use getset::{Getters, MutGetters, Setters};
77use itertools::Itertools;
78use rayon::prelude::*;
79use serde::{Serialize as SerdeSerialize, Serializer};
80use serde_derive::{Serialize, Deserialize};
81
82use std::collections::{BTreeMap, HashMap, HashSet};
83use std::fs::{DirBuilder, File};
84use std::io::{BufReader, BufWriter, Read, Write};
85use std::path::{Path, PathBuf};
86
87use rpfm_lib::error::{Result, RLibError};
88use rpfm_lib::files::{Container, FileType, loc::Loc, pack::Pack, RFile, RFileDecoded, table::{DecodedData, local::TableInMemory, Table}};
89use rpfm_lib::schema::*;
90
91use crate::dependencies::Dependencies;
92
93/// Filename for the generated translated Loc file.
94///
95/// The leading exclamation marks ensure this file loads before other Loc files,
96/// allowing translations to override the original mod's strings.
97pub const TRANSLATED_FILE_NAME: &str = "!!!!!!translated_locs.loc";
98
99/// Full path for the translated Loc file within a pack.
100pub const TRANSLATED_PATH: &str = "text/!!!!!!translated_locs.loc";
101
102/// Legacy path for translated Loc files (for backwards compatibility).
103pub const TRANSLATED_PATH_OLD: &str = "text/localisation.loc";
104
105//-------------------------------------------------------------------------------//
106//                              Enums & Structs
107//-------------------------------------------------------------------------------//
108
109/// Translation data for an entire pack.
110///
111/// Contains all translatable strings from a pack along with their translations
112/// and metadata about translation status.
113///
114/// # Persistence
115///
116/// This struct is serialized to JSON files for storage and can be loaded back
117/// when continuing translation work.
118///
119/// # Parent Translations
120///
121/// When a pack has dependencies, translations from parent mods are also loaded
122/// and used for auto-translation, ensuring consistent terminology across
123/// dependent mods.
124#[derive(Debug, Clone, Default, Getters, MutGetters, Setters, Serialize, Deserialize)]
125#[getset(get = "pub", get_mut = "pub", set = "pub")]
126pub struct PackTranslation {
127
128    /// Target language code for translations (e.g., "es", "de", "fr").
129    language: String,
130
131    /// Name of the pack these translations belong to.
132    pack_name: String,
133
134    /// Map of Loc keys to their translation data.
135    ///
136    /// Keys are the original Loc entry keys from the pack.
137    #[serde(serialize_with = "ordered_map_translations")]
138    translations: HashMap<String, Translation>,
139}
140
141/// Translation entry for a single localizable string.
142///
143/// Tracks both the original and translated text, along with status flags
144/// indicating whether the translation is up-to-date.
145#[derive(Debug, Clone, Default, Getters, MutGetters, Setters, Serialize, Deserialize)]
146#[getset(get = "pub", get_mut = "pub", set = "pub")]
147pub struct Translation {
148
149    /// The Loc key identifying this string.
150    key: String,
151
152    /// Original text in the base language (typically English).
153    ///
154    /// This is used to detect when the source text changes, requiring
155    /// re-translation.
156    value_original: String,
157
158    /// Translated text in the target language.
159    ///
160    /// May be empty if not yet translated.
161    value_translated: String,
162
163    /// Whether this translation needs review.
164    ///
165    /// Set to `true` when the original text changes after translation,
166    /// indicating the translation may be outdated.
167    needs_retranslation: bool,
168
169    /// Whether this string has been removed from the source pack.
170    ///
171    /// Translations for removed strings are kept for reference but marked
172    /// as removed. If the string reappears, it will be flagged for re-translation.
173    removed: bool,
174}
175
176//-------------------------------------------------------------------------------//
177//                             Implementations
178//-------------------------------------------------------------------------------//
179
180impl PackTranslation {
181
182    pub fn new(paths: &[PathBuf], pack: &Pack, game_key: &str, language: &str, dependencies: &Dependencies, base_english: &HashMap<String, String>, base_local_fixes: &HashMap<String, String>) -> Result<Self> {
183        let mut translations = Self::load(paths, &pack.disk_file_name(), game_key, language).unwrap_or_else(|_| {
184            Self {
185                language: language.to_owned(),
186                pack_name: pack.disk_file_name(),
187                ..Default::default()
188            }
189        });
190
191        // If the pack has dependencies, we have to try to load their translations too, then patch the live dependencies with them.
192        // Otherwise, we'll have a situation where data is compared and imported from the wrong language.
193        let mut parent_tr = vec![];
194        for (_, pack_name) in pack.dependencies() {
195            if let Ok(ptr) = Self::load(paths, pack_name, game_key, language) {
196                parent_tr.push(ptr);
197            }
198        }
199
200        // Once we got the previous translation loaded, get the files to translate from the Pack, updating our translation.
201        let mut locs = pack.files_by_type(&[FileType::Loc]);
202        let merged_loc = Self::sort_and_merge_locs_for_translation(&mut locs)?;
203        let merged_loc_data = merged_loc.data();
204        let merged_loc_hash = merged_loc_data
205            .par_iter()
206            .map(|x| (x[0].data_to_string(), x[1].data_to_string()))
207            .collect::<HashMap<_,_>>();
208
209        // Once we have the clean list of loc entries we have in our Pack, we need to update the translation with it.
210        // First we do a pass to mark all removed translations as such. This is separated from the rest because this pass is way slower than the rest.
211        for (tr_key, tr) in translations.translations_mut() {
212            let was_removed = tr.removed;
213            tr.removed = !merged_loc_hash.contains_key(&**tr_key);
214
215            // If the line has been removed, unmark it for translation.
216            // If the line has been re-added, only flag for retranslation if the original value changed or there's no translation yet.
217            if tr.removed {
218                tr.needs_retranslation = false;
219            } else if was_removed {
220                if let Some(current_value) = merged_loc_hash.get(&**tr_key) {
221                    tr.needs_retranslation = tr.value_translated.is_empty() || *current_value != tr.value_original;
222                }
223            }
224        }
225
226        // Next, we update the translations data with the loc data of the merged loc.
227        for row in merged_loc.data().iter() {
228            let key = row[0].data_to_string();
229            let value = row[1].data_to_string();
230
231            match translations.translations.get_mut(&*key) {
232                Some(tr) => {
233                    if value != tr.value_original {
234                        tr.value_original = value.to_string();
235                        tr.needs_retranslation = true;
236                    }
237                },
238
239                None => {
240                    let tr = Translation {
241                        key: key.to_string(),
242                        value_original: value.to_string(),
243                        value_translated: String::new(),
244                        needs_retranslation: true,
245                        removed: false,
246                    };
247
248                    translations.translations.insert(key.to_string(), tr);
249                }
250            }
251        }
252
253        // Lastly, we do an auto-translation pass. We have two copies of base local: one normal and one patched with parent translations.
254        // This is needed because the base localisation data doesn't have the translation data for parent mods included.
255        let mut base_local_tr = dependencies.localisation_data().clone();
256        for ptr in parent_tr {
257            for (key, val) in ptr.translations() {
258                if !*val.needs_retranslation() && !val.value_translated().is_empty() {
259                    if let Some(ptr_val) = base_local_tr.get_mut(key) {
260                        *ptr_val = val.value_translated().to_string();
261                    }
262                }
263            }
264        }
265
266        let tr_copy = translations.translations().clone();
267        translations.translations_mut().par_iter_mut().for_each(|(tr_key, tr)| {
268            if !tr.removed {
269
270                // Fix incorrectly translated lines.
271                if !tr.value_original().trim().is_empty() && tr.value_translated().trim().is_empty() && !tr.needs_retranslation() {
272                    tr.needs_retranslation = true;
273                }
274
275                // Mark empty lines as translated.
276                else if tr.value_original().trim().is_empty() && tr.value_translated().trim().is_empty() {
277                    tr.value_translated = tr.value_original.to_owned();
278                    tr.needs_retranslation = false;
279                }
280
281                // If the value is unchanged from english, just copy the vanilla translation.
282                //
283                // NOTE: This is really a patch for packs not using optimizing pass, because the optimizer actually removes these entries.
284                else if let Some(vanilla_data) = base_english.get(tr_key) {
285                    if tr.value_original() == vanilla_data {
286                        if let Some(vanilla_data) = base_local_fixes.get(tr_key).filter(|v| !v.trim().is_empty()) {
287                            tr.value_translated = vanilla_data.to_owned();
288                            tr.needs_retranslation = false;
289                        } else if let Some(vanilla_data) = base_local_tr.get(tr_key).filter(|v| !v.trim().is_empty()) {
290                            tr.value_translated = vanilla_data.to_owned();
291                            tr.needs_retranslation = false;
292                        }
293                    }
294                }
295
296                // If the value is equal to another value in the english translation (but with a different key), we may be able to reuse it.
297                //
298                // Note that this is prone to give wrong translations as it doesn't have any context, so we only do it for lines that are not yet translated.
299                else if tr.value_translated().trim().is_empty() || *tr.needs_retranslation() {
300                    if let Some((key, _)) = base_english.iter().find(|(_, value)| *value == tr.value_original()) {
301                        if let Some(value_tr) = base_local_fixes.get(key).filter(|v| !v.trim().is_empty()) {
302                            tr.value_translated = value_tr.to_owned();
303                            tr.needs_retranslation = false;
304                        } else if let Some(value_tr) = base_local_tr.get(key).filter(|v| !v.trim().is_empty()) {
305                            tr.value_translated = value_tr.to_owned();
306                            tr.needs_retranslation = false;
307                        }
308                    } else if let Some((_, value_tr)) = tr_copy.iter()
309                        .find(|(_, tr_copy)| *tr_copy.value_original() == *tr.value_original() && !*tr_copy.needs_retranslation() && *tr.needs_retranslation() && !tr_copy.value_translated().trim().is_empty()) {
310                        tr.value_translated = value_tr.value_translated().to_owned();
311                        tr.needs_retranslation = false;
312                    }
313                }
314            }
315        });
316
317        Ok(translations)
318    }
319
320    // TODO: Move this to the normal merge functions.
321    pub fn sort_and_merge_locs_for_translation(locs: &mut [&RFile]) -> Result<Loc> {
322
323        // We need them in a specific order so the file priority removes unused loc entries from the translation.
324        locs.sort_by(|a, b| a.path_in_container_raw().cmp(b.path_in_container_raw()));
325        let locs = locs.iter()
326            .filter(|file| {
327                if let Some(name) = file.file_name() {
328                    !name.is_empty() && name != TRANSLATED_FILE_NAME
329                } else {
330                    false
331                }
332            })
333            .filter_map(|file| if let Ok(RFileDecoded::Loc(loc)) = file.decoded() { Some(loc) } else { None })
334            .collect::<Vec<_>>();
335
336        // Once we merge all the locs in the correct order, remove duplicated keys except the first one.
337        let mut merged_loc = Loc::merge(&locs)?;
338        let mut keys_found = HashSet::new();
339        let mut rows_to_delete = vec![];
340        for (index, row) in merged_loc.data().iter().enumerate() {
341            if keys_found.contains(&row[0].data_to_string()) {
342                rows_to_delete.push(index);
343            } else {
344                keys_found.insert(row[0].data_to_string());
345            }
346        }
347
348        rows_to_delete.reverse();
349        for row in &rows_to_delete {
350            merged_loc.data_mut().remove(*row);
351        }
352
353        Ok(merged_loc)
354    }
355
356    /// This function applies a [PackTranslation] to a Pack.
357    pub fn apply(&self, _pack: &mut Pack) -> Result<()> {
358        todo!()
359    }
360
361    /// This function loads a [PackTranslation] to memory from either a local json file, or a remote one.
362    pub fn load(paths: &[PathBuf], pack_name: &str, game_key: &str, language: &str) -> Result<Self> {
363        for path in paths {
364            match Self::load_json(path, pack_name, game_key, language) {
365                Ok(mut tr) => return {
366                    for trad in tr.translations_mut() {
367                        trad.1.value_translated = trad.1.value_translated.replace("\n||\n", "||");
368                        trad.1.value_translated = trad.1.value_translated.replace("\r", "\\\\r");
369                        trad.1.value_translated = trad.1.value_translated.replace("\n", "\\\\n");
370                        trad.1.value_translated = trad.1.value_translated.replace("\t", "\\\\t");
371                    }
372                    Ok(tr)
373                },
374                Err(_) => continue,
375            }
376        }
377
378        Err(RLibError::TranslatorCouldNotLoadTranslation)
379    }
380
381    fn load_json(path: &Path, pack_name: &str, game_key: &str, language: &str) -> Result<Self> {
382        let path = path.join(format!("{game_key}/{pack_name}/{language}.json"));
383        let mut file = BufReader::new(File::open(path)?);
384        let mut data = Vec::with_capacity(file.get_ref().metadata()?.len() as usize);
385        file.read_to_end(&mut data)?;
386        serde_json::from_slice(&data).map_err(From::from)
387    }
388
389    /// This function saves a [PackTranslation] from memory to a `.json` file with the provided path.
390    pub fn save(&mut self, path: &Path, game_key: &str) -> Result<()> {
391        let path = path.join(format!("{}/{}/{}.json", game_key, self.pack_name, self.language));
392
393        // Make sure the path exists to avoid problems with updating schemas.
394        if let Some(parent_folder) = path.parent() {
395            DirBuilder::new().recursive(true).create(parent_folder)?;
396        }
397
398        let mut file = BufWriter::new(File::create(&path)?);
399        file.write_all(serde_json::to_string_pretty(&self)?.as_bytes())?;
400        Ok(())
401    }
402
403    pub fn definition() -> Definition {
404        let mut definition = Definition::default();
405
406        // We put the booleans first because they may act as a kind of filter.
407        definition.fields_mut().push(Field { name: "key".to_string(), field_type: FieldType::StringU8, is_key: true, ..Default::default() });
408        definition.fields_mut().push(Field { name: "needs_retranslation".to_string(), field_type: FieldType::Boolean, ..Default::default() });
409        definition.fields_mut().push(Field { name: "removed".to_string(), field_type: FieldType::Boolean, ..Default::default() });
410        definition.fields_mut().push(Field { name: "value_original".to_string(), field_type: FieldType::StringU8, ..Default::default() });
411        definition.fields_mut().push(Field { name: "value_translated".to_string(), field_type: FieldType::StringU8, ..Default::default() });
412
413        definition
414    }
415
416    pub fn from_table(&mut self, table: &TableInMemory) -> Result<()> {
417        self.translations_mut().clear();
418
419        for row in table.data().iter() {
420            let mut tr = Translation::default();
421
422            if let DecodedData::StringU8(ref data) = row[0] {
423                tr.set_key(data.to_owned());
424            }
425
426            if let DecodedData::Boolean(data) = row[1] {
427                tr.set_needs_retranslation(data);
428            }
429
430            if let DecodedData::Boolean(data) = row[2] {
431                tr.set_removed(data);
432            }
433
434            if let DecodedData::StringU8(ref data) = row[3] {
435                tr.set_value_original(data.to_owned());
436            }
437
438            if let DecodedData::StringU8(ref data) = row[4] {
439                tr.set_value_translated(data.to_owned());
440            }
441
442            self.translations_mut().insert(tr.key.to_owned(), tr);
443        }
444
445        Ok(())
446    }
447
448    pub fn to_table(&self) -> Result<TableInMemory> {
449        let definition = Self::definition();
450        let mut table = TableInMemory::new(&definition, None, "");
451
452        // Due to bugs in the table filters, we pre-sort the data by putting stuff that needs to be retranslated at the start.
453        let data = self.translations()
454            .iter()
455            .sorted_by(|(_, tr1), (_, tr2)| Ord::cmp(tr1.key(), tr2.key()))
456            .sorted_by(|(_, tr1), (_, tr2)| Ord::cmp(tr2.needs_retranslation(), tr1.needs_retranslation()))
457            .map(|(_, tr)| vec![
458                DecodedData::StringU8(tr.key().to_owned()),
459                DecodedData::Boolean(*tr.needs_retranslation()),
460                DecodedData::Boolean(*tr.removed()),
461                DecodedData::StringU8(tr.value_original().to_owned()),
462                DecodedData::StringU8(tr.value_translated().to_owned()),
463            ]).collect::<Vec<_>>();
464
465        table.set_data(&data)?;
466        Ok(table)
467    }
468}
469
470/// Special serializer function to sort the translations HashMap before serializing.
471fn ordered_map_translations<S>(value: &HashMap<String, Translation>, serializer: S) -> Result<S::Ok, S::Error> where S: Serializer, {
472    let ordered: BTreeMap<_, _> = value.iter().collect();
473    ordered.serialize(serializer)
474}