Skip to main content

rpfm_lib/files/loc/
mod.rs

1//---------------------------------------------------------------------------//
2// Copyright (c) 2017-2026 Ismael Gutiérrez González. All rights reserved.
3//
4// This file is part of the Rusted PackFile Manager (RPFM) project,
5// which can be found here: https://github.com/Frodo45127/rpfm.
6//
7// This file is licensed under the MIT license, which can be found here:
8// https://github.com/Frodo45127/rpfm/blob/master/LICENSE.
9//---------------------------------------------------------------------------//
10
11//! Localisation table files for Total War games.
12//!
13//! Loc files store key-value pairs for text localisation, enabling translation of in-game
14//! strings. Each entry consists of a unique key, the localised text, and a boolean flag
15//! (purpose unknown, commonly called "tooltip").
16//!
17//! # Overview
18//!
19//! Unlike DB tables which require schema definitions, Loc files have a fixed structure:
20//! - **Key**: Unique identifier for the text entry (UTF-16 string)
21//! - **Text**: The localised string content (UTF-16 string)
22//! - **Tooltip**: Boolean flag of unknown purpose
23//!
24//! Loc files are used in all Total War games since Empire. In games prior to Troy, when
25//! using a non-English language, only the main `localisation.loc` file is loaded -
26//! individual loc files are ignored.
27//!
28//! # Binary Structure
29//!
30//! ## Header (14 bytes)
31//!
32//! | Bytes | Type            | Data                                           |
33//! | ----- | --------------- | ---------------------------------------------- |
34//! | 2     | [u16]           | Byte order mark. Always `0xFFFE`.              |
35//! | 3     | UTF-8 String    | File type identifier. Always `"LOC"`.          |
36//! | 1     | [u8]            | Unknown, always `0`. Possibly padding.         |
37//! | 4     | [i32]           | Version. Always `1` in known files.            |
38//! | 4     | [u32]           | Number of entries in the table.                |
39//!
40//! ## Data (per entry)
41//!
42//! | Bytes | Type            | Data                                           |
43//! | ----- | --------------- | ---------------------------------------------- |
44//! | 2 + * | Sized StringU16 | Localisation key (u16 length prefix + UTF-16). |
45//! | 2 + * | Sized StringU16 | Localised text (u16 length prefix + UTF-16).   |
46//! | 1     | [bool]          | Tooltip flag (unknown purpose).                |
47
48use csv::{StringRecordsIter, Writer};
49use getset::{Getters, Setters};
50use rayon::prelude::*;
51use serde_derive::{Serialize, Deserialize};
52
53use std::borrow::Cow;
54use std::collections::{BTreeMap, HashMap};
55use std::fs::File;
56
57use crate::binary::{ReadBytes, WriteBytes};
58use crate::error::{RLibError, Result};
59use crate::files::{DecodeableExtraData, Decodeable, EncodeableExtraData, Encodeable, table::{DecodedData, local::TableInMemory, Table}};
60use crate::schema::*;
61use crate::utils::check_size_mismatch;
62
63/// This represents the value that every Loc file has in their first 2 bytes.
64const BYTEORDER_MARK: u16 = 65279; // FF FE
65
66/// This represents the value that every Loc file has in their 2-5 bytes. The sixth byte is always a 0.
67const FILE_TYPE: &str = "LOC";
68
69/// Size of the header of a Loc file.
70const HEADER_SIZE: usize = 14;
71
72/// This is the name used in TSV-exported Loc files to identify them as Loc files.
73pub(crate) const TSV_NAME_LOC: &str = "Loc";
74pub(crate) const TSV_NAME_LOC_OLD: &str = "Loc PackedFile";
75
76/// Extension used by Loc files.
77pub const EXTENSION: &str = ".loc";
78
79/// Version used by Loc files. We've only seen version 1 so far, so we stick with that one.
80const VERSION: i32 = 1;
81
82#[cfg(test)] mod loc_test;
83
84//---------------------------------------------------------------------------//
85//                              Enum & Structs
86//---------------------------------------------------------------------------//
87
88/// In-memory representation of a decoded Loc (localisation) file.
89///
90/// Wraps a [`TableInMemory`] with a fixed three-column schema: key, text, and tooltip.
91/// Unlike DB tables, Loc files don't require external schema definitions.
92///
93/// # Structure
94///
95/// Each row contains:
96/// - `key` (StringU16): Unique identifier for the localised text
97/// - `text` (StringU16): The localised string content
98/// - `tooltip` (Boolean): Flag of unknown purpose
99///
100/// # Example
101///
102/// ```ignore
103/// use rpfm_lib::files::{Decodeable, loc::Loc};
104/// use std::io::Cursor;
105///
106/// # let loc_data = vec![];
107/// let mut reader = Cursor::new(loc_data);
108/// let loc = Loc::decode(&mut reader, &None).unwrap();
109///
110/// // Access entries
111/// for row in loc.data().iter() {
112///     // row[0] = key, row[1] = text, row[2] = tooltip
113/// }
114/// ```
115#[derive(PartialEq, Clone, Debug, Getters, Setters, Serialize, Deserialize)]
116#[getset(get = "pub", set = "pub")]
117pub struct Loc {
118
119    /// The underlying table data with key, text, and tooltip columns.
120    table: TableInMemory,
121}
122
123//---------------------------------------------------------------------------//
124//                           Implementation of Loc
125//---------------------------------------------------------------------------//
126
127impl Default for Loc {
128    fn default() -> Self {
129        Self::new()
130    }
131}
132
133/// Implementation of `Loc`.
134impl Loc {
135
136    /// Creates a new empty Loc table.
137    ///
138    /// Initializes with the standard three-column schema (key, text, tooltip)
139    /// but no data rows.
140    pub fn new() -> Self {
141        let definition = Self::new_definition();
142
143        Self {
144            table: TableInMemory::new(&definition, None, TSV_NAME_LOC),
145        }
146    }
147
148    /// Returns the fixed schema definition for Loc tables.
149    ///
150    /// The definition contains three fields:
151    /// - `key` (StringU16, primary key)
152    /// - `text` (StringU16)
153    /// - `tooltip` (Boolean)
154    pub(crate) fn new_definition() -> Definition {
155        let mut definition = Definition::new(VERSION, None);
156        let fields = vec![
157            Field::new("key".to_owned(), FieldType::StringU16, true, Some("PLACEHOLDER".to_owned()), false, None, None, None, String::new(), 0, 0, BTreeMap::new(), None),
158            Field::new("text".to_owned(), FieldType::StringU16, false, Some("PLACEHOLDER".to_owned()), false, None, None, None, String::new(), 0, 0, BTreeMap::new(), None),
159            Field::new("tooltip".to_owned(), FieldType::Boolean, false, Some("PLACEHOLDER".to_owned()), false, None, None, None, String::new(), 0, 0, BTreeMap::new(), None),
160        ];
161        definition.set_fields(fields);
162        definition
163    }
164
165    /// Returns the schema definition used by this Loc table.
166    pub fn definition(&self) -> &Definition {
167        self.table.definition()
168    }
169
170    /// Returns the table rows as a slice of decoded data.
171    pub fn data(&'_ self) -> Cow<'_, [Vec<DecodedData>]> {
172        self.table.data()
173    }
174
175    /// Returns a mutable reference to the table rows.
176    ///
177    /// Ensure modifications maintain valid structure (3 columns per row).
178    pub fn data_mut(&mut self) -> &mut Vec<Vec<DecodedData>> {
179        self.table.data_mut()
180    }
181
182    /// Creates a new row with default placeholder values.
183    pub fn new_row(&self) -> Vec<DecodedData> {
184        self.table().new_row()
185    }
186
187    /// Replaces all table data with the provided rows.
188    ///
189    /// # Errors
190    ///
191    /// Returns an error if rows don't match the expected 3-column structure.
192    pub fn set_data(&mut self, data: &[Vec<DecodedData>]) -> Result<()> {
193        self.table.set_data(data)
194    }
195
196    /// Returns the column index for a given column name, or `None` if not found.
197    ///
198    /// Valid column names: `"key"` (0), `"text"` (1), `"tooltip"` (2).
199    pub fn column_position_by_name(&self, column_name: &str) -> Option<usize> {
200        self.table().column_position_by_name(column_name)
201    }
202
203    /// Returns the number of entries in the Loc table.
204    pub fn len(&self) -> usize {
205        self.table.len()
206    }
207
208    /// Returns `true` if the Loc table has no entries.
209    pub fn is_empty(&self) -> bool {
210        self.table.is_empty()
211    }
212
213    /// Replaces the table definition and migrates existing data to match.
214    ///
215    /// Typically not needed for Loc files since the definition is fixed.
216    pub fn set_definition(&mut self, new_definition: &Definition) {
217        self.table.set_definition(new_definition);
218    }
219
220    /// Reads and validates the Loc file header.
221    ///
222    /// # Returns
223    ///
224    /// A tuple of `(version, entry_count)`. Version is always 1 in known files.
225    ///
226    /// # Errors
227    ///
228    /// Returns [`RLibError::DecodingLocNotALocTable`] if the header is invalid
229    /// (wrong byte order mark, wrong file type, or insufficient data).
230    pub fn read_header<R: ReadBytes>(data: &mut R) -> Result<(i32, u32)> {
231
232        // A valid Loc PackedFile has at least 14 bytes. This ensures they exists before anything else.
233        if data.len()? < HEADER_SIZE as u64 {
234            return Err(RLibError::DecodingLocNotALocTable)
235        }
236
237        // More checks to ensure this is a valid Loc file.
238        if BYTEORDER_MARK != data.read_u16()? {
239            return Err(RLibError::DecodingLocNotALocTable)
240        }
241
242        if FILE_TYPE != data.read_string_u8(3)? {
243            return Err(RLibError::DecodingLocNotALocTable)
244        }
245
246        let _ = data.read_u8()?;
247        let version = data.read_i32()?;
248        let entry_count = data.read_u32()?;
249
250        Ok((version, entry_count))
251    }
252
253    /// Merges multiple Loc tables into a single new table.
254    ///
255    /// Combines all rows from the source tables. Duplicate keys are preserved
256    /// (not deduplicated).
257    pub fn merge(sources: &[&Self]) -> Result<Self> {
258        let mut new_table = Self::new();
259        let sources = sources.par_iter()
260            .map(|table| {
261                let mut table = table.table().clone();
262                table.set_definition(new_table.definition());
263                table
264            })
265            .collect::<Vec<_>>();
266
267        let new_data = sources.par_iter()
268            .map(|table| table.data().to_vec())
269            .flatten()
270            .collect::<Vec<_>>();
271        new_table.set_data(&new_data)?;
272
273        Ok(new_table)
274    }
275
276    /// Imports a Loc table from TSV (tab-separated values) format.
277    ///
278    /// # Arguments
279    ///
280    /// * `records` - CSV reader iterator over TSV records.
281    /// * `field_order` - Mapping of column positions to field names.
282    pub fn tsv_import(records: StringRecordsIter<File>, field_order: &HashMap<u32, String>) -> Result<Self> {
283        let definition = Self::new_definition();
284        let table = TableInMemory::tsv_import(records, &definition, field_order, TSV_NAME_LOC, None)?;
285        let loc = Loc::from(table);
286        Ok(loc)
287    }
288
289    /// Exports the Loc table to TSV (tab-separated values) format.
290    ///
291    /// # Arguments
292    ///
293    /// * `writer` - CSV writer for the output file.
294    /// * `table_path` - Path used in the TSV metadata header.
295    pub fn tsv_export(&self, writer: &mut Writer<File>, table_path: &str) -> Result<()> {
296        self.table.tsv_export(writer, table_path, true)
297    }
298}
299
300impl Decodeable for Loc {
301
302    fn decode<R: ReadBytes>(data: &mut R, _extra_data: &Option<DecodeableExtraData>) -> Result<Self> {
303
304        // Version is always 1, so we ignore it.
305        let (_version, entry_count) = Self::read_header(data)?;
306
307        let definition = Self::new_definition();
308        let table = TableInMemory::decode(data, &definition, &HashMap::new(), Some(entry_count), false, TSV_NAME_LOC)?;
309
310        // If we are not in the last byte, it means we didn't parse the entire file, which means this file is corrupt.
311        check_size_mismatch(data.stream_position()? as usize, data.len()? as usize)?;
312
313        Ok(Self {
314            table,
315        })
316    }
317}
318
319impl Encodeable for Loc {
320
321    fn encode<W: WriteBytes>(&mut self, buffer: &mut W, _extra_data: &Option<EncodeableExtraData>) -> Result<()> {
322        buffer.write_u16(BYTEORDER_MARK)?;
323        buffer.write_string_u8(FILE_TYPE)?;
324        buffer.write_u8(0)?;
325        buffer.write_i32(*self.table.definition().version())?;
326        buffer.write_u32(self.table.len() as u32)?;
327
328        self.table.encode(buffer)
329    }
330}
331
332/// Implementation to create a `Loc` from a `Table` directly.
333impl From<TableInMemory> for Loc {
334    fn from(mut table: TableInMemory) -> Self {
335        table.set_table_name(TSV_NAME_LOC.to_owned());
336        Self {
337            table,
338        }
339    }
340}