rpfm_lib/files/loc/mod.rs
1//---------------------------------------------------------------------------//
2// Copyright (c) 2017-2026 Ismael Gutiérrez González. All rights reserved.
3//
4// This file is part of the Rusted PackFile Manager (RPFM) project,
5// which can be found here: https://github.com/Frodo45127/rpfm.
6//
7// This file is licensed under the MIT license, which can be found here:
8// https://github.com/Frodo45127/rpfm/blob/master/LICENSE.
9//---------------------------------------------------------------------------//
10
11//! Localisation table files for Total War games.
12//!
13//! Loc files store key-value pairs for text localisation, enabling translation of in-game
14//! strings. Each entry consists of a unique key, the localised text, and a boolean flag
15//! (purpose unknown, commonly called "tooltip").
16//!
17//! # Overview
18//!
19//! Unlike DB tables which require schema definitions, Loc files have a fixed structure:
20//! - **Key**: Unique identifier for the text entry (UTF-16 string)
21//! - **Text**: The localised string content (UTF-16 string)
22//! - **Tooltip**: Boolean flag of unknown purpose
23//!
24//! Loc files are used in all Total War games since Empire. In games prior to Troy, when
25//! using a non-English language, only the main `localisation.loc` file is loaded -
26//! individual loc files are ignored.
27//!
28//! # Binary Structure
29//!
30//! ## Header (14 bytes)
31//!
32//! | Bytes | Type | Data |
33//! | ----- | --------------- | ---------------------------------------------- |
34//! | 2 | [u16] | Byte order mark. Always `0xFFFE`. |
35//! | 3 | UTF-8 String | File type identifier. Always `"LOC"`. |
36//! | 1 | [u8] | Unknown, always `0`. Possibly padding. |
37//! | 4 | [i32] | Version. Always `1` in known files. |
38//! | 4 | [u32] | Number of entries in the table. |
39//!
40//! ## Data (per entry)
41//!
42//! | Bytes | Type | Data |
43//! | ----- | --------------- | ---------------------------------------------- |
44//! | 2 + * | Sized StringU16 | Localisation key (u16 length prefix + UTF-16). |
45//! | 2 + * | Sized StringU16 | Localised text (u16 length prefix + UTF-16). |
46//! | 1 | [bool] | Tooltip flag (unknown purpose). |
47
48use csv::{StringRecordsIter, Writer};
49use getset::{Getters, Setters};
50use rayon::prelude::*;
51use serde_derive::{Serialize, Deserialize};
52
53use std::borrow::Cow;
54use std::collections::{BTreeMap, HashMap};
55use std::fs::File;
56
57use crate::binary::{ReadBytes, WriteBytes};
58use crate::error::{RLibError, Result};
59use crate::files::{DecodeableExtraData, Decodeable, EncodeableExtraData, Encodeable, table::{DecodedData, local::TableInMemory, Table}};
60use crate::schema::*;
61use crate::utils::check_size_mismatch;
62
63/// This represents the value that every Loc file has in their first 2 bytes.
64const BYTEORDER_MARK: u16 = 65279; // FF FE
65
66/// This represents the value that every Loc file has in their 2-5 bytes. The sixth byte is always a 0.
67const FILE_TYPE: &str = "LOC";
68
69/// Size of the header of a Loc file.
70const HEADER_SIZE: usize = 14;
71
72/// This is the name used in TSV-exported Loc files to identify them as Loc files.
73pub(crate) const TSV_NAME_LOC: &str = "Loc";
74pub(crate) const TSV_NAME_LOC_OLD: &str = "Loc PackedFile";
75
76/// Extension used by Loc files.
77pub const EXTENSION: &str = ".loc";
78
79/// Version used by Loc files. We've only seen version 1 so far, so we stick with that one.
80const VERSION: i32 = 1;
81
82#[cfg(test)] mod loc_test;
83
84//---------------------------------------------------------------------------//
85// Enum & Structs
86//---------------------------------------------------------------------------//
87
88/// In-memory representation of a decoded Loc (localisation) file.
89///
90/// Wraps a [`TableInMemory`] with a fixed three-column schema: key, text, and tooltip.
91/// Unlike DB tables, Loc files don't require external schema definitions.
92///
93/// # Structure
94///
95/// Each row contains:
96/// - `key` (StringU16): Unique identifier for the localised text
97/// - `text` (StringU16): The localised string content
98/// - `tooltip` (Boolean): Flag of unknown purpose
99///
100/// # Example
101///
102/// ```ignore
103/// use rpfm_lib::files::{Decodeable, loc::Loc};
104/// use std::io::Cursor;
105///
106/// # let loc_data = vec![];
107/// let mut reader = Cursor::new(loc_data);
108/// let loc = Loc::decode(&mut reader, &None).unwrap();
109///
110/// // Access entries
111/// for row in loc.data().iter() {
112/// // row[0] = key, row[1] = text, row[2] = tooltip
113/// }
114/// ```
115#[derive(PartialEq, Clone, Debug, Getters, Setters, Serialize, Deserialize)]
116#[getset(get = "pub", set = "pub")]
117pub struct Loc {
118
119 /// The underlying table data with key, text, and tooltip columns.
120 table: TableInMemory,
121}
122
123//---------------------------------------------------------------------------//
124// Implementation of Loc
125//---------------------------------------------------------------------------//
126
127impl Default for Loc {
128 fn default() -> Self {
129 Self::new()
130 }
131}
132
133/// Implementation of `Loc`.
134impl Loc {
135
136 /// Creates a new empty Loc table.
137 ///
138 /// Initializes with the standard three-column schema (key, text, tooltip)
139 /// but no data rows.
140 pub fn new() -> Self {
141 let definition = Self::new_definition();
142
143 Self {
144 table: TableInMemory::new(&definition, None, TSV_NAME_LOC),
145 }
146 }
147
148 /// Returns the fixed schema definition for Loc tables.
149 ///
150 /// The definition contains three fields:
151 /// - `key` (StringU16, primary key)
152 /// - `text` (StringU16)
153 /// - `tooltip` (Boolean)
154 pub(crate) fn new_definition() -> Definition {
155 let mut definition = Definition::new(VERSION, None);
156 let fields = vec![
157 Field::new("key".to_owned(), FieldType::StringU16, true, Some("PLACEHOLDER".to_owned()), false, None, None, None, String::new(), 0, 0, BTreeMap::new(), None),
158 Field::new("text".to_owned(), FieldType::StringU16, false, Some("PLACEHOLDER".to_owned()), false, None, None, None, String::new(), 0, 0, BTreeMap::new(), None),
159 Field::new("tooltip".to_owned(), FieldType::Boolean, false, Some("PLACEHOLDER".to_owned()), false, None, None, None, String::new(), 0, 0, BTreeMap::new(), None),
160 ];
161 definition.set_fields(fields);
162 definition
163 }
164
165 /// Returns the schema definition used by this Loc table.
166 pub fn definition(&self) -> &Definition {
167 self.table.definition()
168 }
169
170 /// Returns the table rows as a slice of decoded data.
171 pub fn data(&'_ self) -> Cow<'_, [Vec<DecodedData>]> {
172 self.table.data()
173 }
174
175 /// Returns a mutable reference to the table rows.
176 ///
177 /// Ensure modifications maintain valid structure (3 columns per row).
178 pub fn data_mut(&mut self) -> &mut Vec<Vec<DecodedData>> {
179 self.table.data_mut()
180 }
181
182 /// Creates a new row with default placeholder values.
183 pub fn new_row(&self) -> Vec<DecodedData> {
184 self.table().new_row()
185 }
186
187 /// Replaces all table data with the provided rows.
188 ///
189 /// # Errors
190 ///
191 /// Returns an error if rows don't match the expected 3-column structure.
192 pub fn set_data(&mut self, data: &[Vec<DecodedData>]) -> Result<()> {
193 self.table.set_data(data)
194 }
195
196 /// Returns the column index for a given column name, or `None` if not found.
197 ///
198 /// Valid column names: `"key"` (0), `"text"` (1), `"tooltip"` (2).
199 pub fn column_position_by_name(&self, column_name: &str) -> Option<usize> {
200 self.table().column_position_by_name(column_name)
201 }
202
203 /// Returns the number of entries in the Loc table.
204 pub fn len(&self) -> usize {
205 self.table.len()
206 }
207
208 /// Returns `true` if the Loc table has no entries.
209 pub fn is_empty(&self) -> bool {
210 self.table.is_empty()
211 }
212
213 /// Replaces the table definition and migrates existing data to match.
214 ///
215 /// Typically not needed for Loc files since the definition is fixed.
216 pub fn set_definition(&mut self, new_definition: &Definition) {
217 self.table.set_definition(new_definition);
218 }
219
220 /// Reads and validates the Loc file header.
221 ///
222 /// # Returns
223 ///
224 /// A tuple of `(version, entry_count)`. Version is always 1 in known files.
225 ///
226 /// # Errors
227 ///
228 /// Returns [`RLibError::DecodingLocNotALocTable`] if the header is invalid
229 /// (wrong byte order mark, wrong file type, or insufficient data).
230 pub fn read_header<R: ReadBytes>(data: &mut R) -> Result<(i32, u32)> {
231
232 // A valid Loc PackedFile has at least 14 bytes. This ensures they exists before anything else.
233 if data.len()? < HEADER_SIZE as u64 {
234 return Err(RLibError::DecodingLocNotALocTable)
235 }
236
237 // More checks to ensure this is a valid Loc file.
238 if BYTEORDER_MARK != data.read_u16()? {
239 return Err(RLibError::DecodingLocNotALocTable)
240 }
241
242 if FILE_TYPE != data.read_string_u8(3)? {
243 return Err(RLibError::DecodingLocNotALocTable)
244 }
245
246 let _ = data.read_u8()?;
247 let version = data.read_i32()?;
248 let entry_count = data.read_u32()?;
249
250 Ok((version, entry_count))
251 }
252
253 /// Merges multiple Loc tables into a single new table.
254 ///
255 /// Combines all rows from the source tables. Duplicate keys are preserved
256 /// (not deduplicated).
257 pub fn merge(sources: &[&Self]) -> Result<Self> {
258 let mut new_table = Self::new();
259 let sources = sources.par_iter()
260 .map(|table| {
261 let mut table = table.table().clone();
262 table.set_definition(new_table.definition());
263 table
264 })
265 .collect::<Vec<_>>();
266
267 let new_data = sources.par_iter()
268 .map(|table| table.data().to_vec())
269 .flatten()
270 .collect::<Vec<_>>();
271 new_table.set_data(&new_data)?;
272
273 Ok(new_table)
274 }
275
276 /// Imports a Loc table from TSV (tab-separated values) format.
277 ///
278 /// # Arguments
279 ///
280 /// * `records` - CSV reader iterator over TSV records.
281 /// * `field_order` - Mapping of column positions to field names.
282 pub fn tsv_import(records: StringRecordsIter<File>, field_order: &HashMap<u32, String>) -> Result<Self> {
283 let definition = Self::new_definition();
284 let table = TableInMemory::tsv_import(records, &definition, field_order, TSV_NAME_LOC, None)?;
285 let loc = Loc::from(table);
286 Ok(loc)
287 }
288
289 /// Exports the Loc table to TSV (tab-separated values) format.
290 ///
291 /// # Arguments
292 ///
293 /// * `writer` - CSV writer for the output file.
294 /// * `table_path` - Path used in the TSV metadata header.
295 pub fn tsv_export(&self, writer: &mut Writer<File>, table_path: &str) -> Result<()> {
296 self.table.tsv_export(writer, table_path, true)
297 }
298}
299
300impl Decodeable for Loc {
301
302 fn decode<R: ReadBytes>(data: &mut R, _extra_data: &Option<DecodeableExtraData>) -> Result<Self> {
303
304 // Version is always 1, so we ignore it.
305 let (_version, entry_count) = Self::read_header(data)?;
306
307 let definition = Self::new_definition();
308 let table = TableInMemory::decode(data, &definition, &HashMap::new(), Some(entry_count), false, TSV_NAME_LOC)?;
309
310 // If we are not in the last byte, it means we didn't parse the entire file, which means this file is corrupt.
311 check_size_mismatch(data.stream_position()? as usize, data.len()? as usize)?;
312
313 Ok(Self {
314 table,
315 })
316 }
317}
318
319impl Encodeable for Loc {
320
321 fn encode<W: WriteBytes>(&mut self, buffer: &mut W, _extra_data: &Option<EncodeableExtraData>) -> Result<()> {
322 buffer.write_u16(BYTEORDER_MARK)?;
323 buffer.write_string_u8(FILE_TYPE)?;
324 buffer.write_u8(0)?;
325 buffer.write_i32(*self.table.definition().version())?;
326 buffer.write_u32(self.table.len() as u32)?;
327
328 self.table.encode(buffer)
329 }
330}
331
332/// Implementation to create a `Loc` from a `Table` directly.
333impl From<TableInMemory> for Loc {
334 fn from(mut table: TableInMemory) -> Self {
335 table.set_table_name(TSV_NAME_LOC.to_owned());
336 Self {
337 table,
338 }
339 }
340}