rpfm_lib/integrations/assembly_kit/table_data.rs
1//---------------------------------------------------------------------------//
2// Copyright (c) 2017-2026 Ismael Gutiérrez González. All rights reserved.
3//
4// This file is part of the Rusted PackFile Manager (RPFM) project,
5// which can be found here: https://github.com/Frodo45127/rpfm.
6//
7// This file is licensed under the MIT license, which can be found here:
8// https://github.com/Frodo45127/rpfm/blob/master/LICENSE.
9//---------------------------------------------------------------------------//
10
11//! Assembly Kit table data parsing and conversion.
12//!
13//! This module handles the parsing of Assembly Kit sample table data files and their
14//! conversion to RPFM's internal table format. These files contain actual row data
15//! that can be used for testing, lookup generation, and schema validation.
16//!
17//! # Overview
18//!
19//! Assembly Kit provides not only table structure definitions (see the `table_definition` module)
20//! but also sample data files containing actual table rows. These XML files are useful for:
21//!
22//! - **Schema validation**: Verifying field types match actual data
23//! - **Lookup generation**: Extracting hardcoded enum/lookup values from descriptions
24//! - **Testing**: Ensuring RPFM can correctly parse real game data
25//! - **Reference**: Understanding what values appear in specific fields
26//!
27//! # File Format
28//!
29//! Table data files are XML files with the same name as their corresponding definition
30//! files (without the `TWaD_` prefix). For example:
31//! - Definition: `TWaD_units_tables.xml`
32//! - Data: `units_tables.xml`
33//!
34//! Each file contains rows of data in XML format:
35//! ```xml
36//! <dataroot>
37//! <units_tables>
38//! <key>unit_1</key>
39//! <category>infantry</category>
40//! <is_naval>false</is_naval>
41//! </units_tables>
42//! <units_tables>
43//! <key>unit_2</key>
44//! <category>cavalry</category>
45//! <is_naval>false</is_naval>
46//! </units_tables>
47//! </dataroot>
48//! ```
49//!
50//! # Main Types
51//!
52//! - [`RawTable`]: Complete table with definition and all row data
53//! - [`RawTableRow`]: Single row of data
54//! - [`RawTableField`]: Individual field value within a row
55//!
56//! # Functionality
57//!
58//! The primary operations are:
59//!
60//! 1. **Batch Reading**: [`RawTable::read_all()`] reads all table data files from a directory
61//! 2. **Individual Reading**: [`RawTable::read()`] parses a single table data file
62//! 3. **Conversion to DB**: [`RawTable::to_db()`] converts to RPFM's [`DB`] format
63//! 4. **Conversion to Table**: [`RawTable::to_table()`] converts to in-memory table format
64//!
65//! # Workarounds and Special Handling
66//!
67//! ## Missing Fields
68//!
69//! Some games (Thrones, Attila, Rome 2, Shogun 2) omit fields from rows when the field
70//! value is empty. RPFM handles this by inserting default values for missing fields.
71//!
72//! ## Empty Field Markers
73//!
74//! Due to XML parser limitations, empty fields are temporarily filled with placeholder
75//! text (`"Frodo Best Waifu"`) which is removed after parsing.
76//!
77//! ## Field Renaming
78//!
79//! The XML parser requires uniform field names, so table-specific field names are
80//! replaced with generic `<datafield>` tags before parsing, with the original name
81//! stored as an attribute.
82
83use rayon::prelude::*;
84use regex::Regex;
85use serde_derive::Deserialize;
86use serde_xml_rs::from_reader;
87
88use std::fs::File;
89use std::io::{BufReader, Read};
90use std::path::Path;
91
92use crate::error::{Result, RLibError};
93use crate::files::{db::DB, table::{DecodedData, local::TableInMemory, Table}};
94use crate::schema::{Definition, FieldType};
95
96use super::table_definition::RawDefinition;
97
98//---------------------------------------------------------------------------//
99// Types for parsing the Assembly Kit DB Files into.
100//---------------------------------------------------------------------------//
101
102/// Complete table data parsed from Assembly Kit XML files.
103///
104/// This represents an entire table including its structure definition and all row data.
105/// Corresponds to a `.xml` data file in the Assembly Kit (e.g., `units_tables.xml`).
106///
107/// # Structure
108///
109/// The table contains:
110/// - An optional definition (field structure) - typically populated during parsing
111/// - All rows of data from the XML file
112///
113/// # Usage
114///
115/// After parsing with [`RawTable::read()`] or [`RawTable::read_all()`], the table
116/// can be converted to RPFM's internal formats:
117/// - [`RawTable::to_db()`] - Convert to DB format for saving as a PackFile table
118/// - [`RawTable::to_table()`] - Convert to in-memory table for manipulation
119#[derive(Debug, Default, Deserialize)]
120#[serde(rename = "dataroot")]
121pub struct RawTable {
122 /// Table structure definition (fields, types, relationships).
123 ///
124 /// This is populated by combining the parsed data structure with the
125 /// corresponding `TWaD_` definition file.
126 pub definition: Option<RawDefinition>,
127
128 /// All rows of data in the table.
129 pub rows: Vec<RawTableRow>,
130}
131
132/// Single row of data from an Assembly Kit table.
133///
134/// Each row contains a collection of field values. In the XML, this corresponds
135/// to one `<tablename>` element containing multiple field elements.
136#[derive(Debug, Default, Deserialize)]
137#[serde(rename = "datarow")]
138pub struct RawTableRow {
139
140 /// All field values in this row.
141 #[serde(rename = "datafield")]
142 pub fields: Vec<RawTableField>,
143}
144
145/// Individual field value within a table row.
146///
147/// This is the raw equivalent to RPFM's [`DecodedData`]. Each field has a name,
148/// a string value, and optionally a "state" flag marking localisable fields.
149///
150/// # XML Representation
151///
152/// In the original Assembly Kit XML, fields appear as:
153/// ```xml
154/// <field_name>value</field_name>
155/// <other_field some_attribute="...">value with attributes</other_field>
156/// ```
157///
158/// During parsing, these are normalized to:
159/// ```xml
160/// <datafield field_name="field_name">value</datafield>
161/// <datafield field_name="other_field" state="1">value with attributes</datafield>
162/// ```
163///
164/// # State Attribute for Localisable Fields
165///
166/// The `state` attribute is set to `"1"` when the original XML field tag had any
167/// attributes. In Assembly Kit files, fields with attributes are localisable fields
168/// (fields containing translatable text). These fields are filtered out when extracting
169/// non-localisable field definitions, ensuring that regular data fields and translation
170/// fields are processed separately.
171#[derive(Debug, Default, Deserialize)]
172#[serde(rename = "datafield")]
173pub struct RawTableField {
174 /// Name of the field (column name).
175 #[serde(rename = "@field_name")]
176 pub field_name: String,
177
178 /// String representation of the field value.
179 ///
180 /// All values are stored as strings in XML and must be parsed to their
181 /// actual types during conversion.
182 #[serde(rename = "#text")]
183 pub field_data: String,
184
185 /// State flag marking localisable (translatable) fields.
186 ///
187 /// Set to `"1"` when the original Assembly Kit XML field tag had any attributes,
188 /// which indicates the field is localisable (contains translatable text).
189 /// Such fields are filtered out during non-localisable field extraction to ensure
190 /// translation fields are handled separately from regular data fields.
191 #[serde(rename = "@state")]
192 pub state: Option<String>,
193}
194
195//---------------------------------------------------------------------------//
196// Implementations
197//---------------------------------------------------------------------------//
198
199/// Implementation of `RawTable`.
200impl RawTable {
201
202 /// Reads all table data files from an Assembly Kit directory.
203 ///
204 /// This function scans the directory for table data XML files and parses them
205 /// into [`RawTable`] instances. It first reads all table definitions, then
206 /// reads the corresponding data files.
207 ///
208 /// # Arguments
209 ///
210 /// * `raw_tables_folder` - Directory containing both definition and data files
211 /// * `version` - Assembly Kit version (0-2)
212 /// * `tables_to_skip` - Table names to exclude from parsing
213 ///
214 /// # Returns
215 ///
216 /// Returns a vector of successfully parsed tables. Tables that fail to parse
217 /// or are in the skip list are excluded.
218 ///
219 /// # Errors
220 ///
221 /// Returns an error if:
222 /// - The version is unsupported (not 0, 1, or 2)
223 /// - The directory cannot be read
224 /// - Definition files cannot be parsed
225 ///
226 /// # Note
227 ///
228 /// Individual table data files that fail to parse are silently skipped rather
229 /// than causing the entire operation to fail.
230 pub fn read_all(raw_tables_folder: &Path, version: i16, tables_to_skip: &[&str]) -> Result<Vec<Self>> {
231
232 // First, we try to read all `RawDefinitions` from the same folder.
233 let definitions = RawDefinition::read_all(raw_tables_folder, version, tables_to_skip)?;
234
235 // Then, depending on the version, we have to use one logic or another.
236 match version {
237
238 // Version 2 is Rome 2+. Version 1 is Shogun 2. Almost the same format, but we have to
239 // provide a different path for Shogun 2, so it has his own version.
240 // Version 0 is Napoleon and Empire. These two don't have an assembly kit, but CA released years ago their table files.
241 0..=2 => Ok(definitions.par_iter().filter_map(|definition| Self::read(definition, raw_tables_folder, version).ok()).collect()),
242 _ => Err(RLibError::AssemblyKitUnsupportedVersion(version))
243 }
244 }
245
246 /// Parses a single Assembly Kit table data file.
247 ///
248 /// Reads the XML data file corresponding to the provided definition and parses
249 /// it into a [`RawTable`]. The data file must have the same name as the definition
250 /// (without the `TWaD_` prefix).
251 ///
252 /// # Arguments
253 ///
254 /// * `raw_definition` - Table structure definition
255 /// * `raw_table_data_folder` - Directory containing the data XML files
256 /// * `version` - Assembly Kit version (0-2)
257 ///
258 /// # Returns
259 ///
260 /// Returns a [`RawTable`] with the definition and all parsed row data.
261 ///
262 /// # Errors
263 ///
264 /// Returns an error if:
265 /// - The version is unsupported (not 0, 1, or 2)
266 /// - The data file cannot be opened
267 /// - The XML is malformed
268 /// - The table is `translated_texts.xml` (returns [`RLibError::AssemblyKitTableTableIgnored`])
269 ///
270 /// # Special Cases
271 ///
272 /// ## translated_texts.xml
273 ///
274 /// This file (present in Rome 2, Attila, Thrones) is ~400MB and not needed for
275 /// schema processing, so it's explicitly ignored.
276 ///
277 /// ## XML Preprocessing
278 ///
279 /// Before parsing, the XML undergoes several transformations to work around
280 /// `serde_xml_rs` limitations:
281 /// 1. Table-specific row tags are renamed to generic `<rows>`
282 /// 2. Field tags are renamed to `<datafield>` with the name as an attribute
283 /// 3. Empty fields are filled with placeholder text (removed after parsing)
284 pub fn read(raw_definition: &RawDefinition, raw_table_data_folder: &Path, version: i16) -> Result<Self> {
285 match version {
286 0..=2 => {
287 let name_no_xml = raw_definition.name.as_ref().unwrap().split_at(raw_definition.name.as_ref().unwrap().len() - 4).0;
288
289 // This file is present in Rome 2, Attila and Thrones. It's almost 400mb. And we don't need it.
290 if raw_definition.name.as_ref().unwrap() == "translated_texts.xml" {
291 return Err(RLibError::AssemblyKitTableTableIgnored)
292 }
293
294 let raw_table_data_path = raw_table_data_folder.join(raw_definition.name.as_ref().unwrap());
295 let mut raw_table_data_file = BufReader::new(File::open(raw_table_data_path)?);
296
297 // Before deserializing the data, due to limitations of serde_xml_rs, we have to rename all rows, because unique names for
298 // rows in each file is not supported for deserializing. Same for the fields, we have to change them to something more generic.
299 let mut buffer = String::new();
300 raw_table_data_file.read_to_string(&mut buffer)?;
301 buffer = buffer.replace(&format!("<{name_no_xml} record_uuid"), "<rows record_uuid");
302 buffer = buffer.replace(&format!("<{name_no_xml}>"), "<rows>");
303 buffer = buffer.replace(&format!("</{name_no_xml}>"), "</rows>");
304 for field in &raw_definition.fields {
305 let field_name_regex = Regex::new(&format!("\n<{}>", field.name)).unwrap();
306 let field_name_regex2 = Regex::new(&format!("\n<{} .+?\">", field.name)).unwrap();
307 buffer = field_name_regex.replace_all(&buffer, &*format!("\n<datafield field_name=\"{}\">", field.name)).to_string();
308 buffer = field_name_regex2.replace_all(&buffer, &*format!("\n<datafield field_name=\"{}\" state=\"1\">", field.name)).to_string();
309 buffer = buffer.replace(&format!("</{}>", field.name), "</datafield>");
310 }
311
312 // Serde shits itself if it sees an empty field, so we have to work around that.
313 buffer = buffer.replace("\"></datafield>", "\">Frodo Best Waifu</datafield>");
314 buffer = buffer.replace("\"> </datafield>", "\"> Frodo Best Waifu</datafield>");
315 buffer = buffer.replace("\"> </datafield>", "\"> Frodo Best Waifu</datafield>");
316 buffer = buffer.replace("\"> </datafield>", "\"> Frodo Best Waifu</datafield>");
317 buffer = buffer.replace("\"> </datafield>", "\"> Frodo Best Waifu</datafield>");
318
319 // Only if the table has data we deserialize it. If not, we just create an empty one.
320 let mut raw_table = if buffer.contains("</rows>\r\n</dataroot>") || buffer.contains("</rows>\n</dataroot>") {
321 from_reader(buffer.as_bytes())?
322 } else {
323 Self::default()
324 };
325
326 // Remove the best waifus, because they end up appearing in lookups!!!
327 for row in &mut raw_table.rows {
328 for field in &mut row.fields {
329 field.field_data = field.field_data.replace("Frodo Best Waifu", "").trim().to_owned();
330 }
331 }
332
333 raw_table.definition = Some(raw_definition.clone());
334 Ok(raw_table)
335 }
336 _ => Err(RLibError::AssemblyKitUnsupportedVersion(version))
337 }
338 }
339
340 /// Converts the raw table to RPFM's DB format.
341 ///
342 /// This is a convenience wrapper around [`RawTable::to_table()`] that converts
343 /// the result to a [`DB`] struct suitable for saving as a PackFile table.
344 ///
345 /// # Arguments
346 ///
347 /// * `definition` - Optional RPFM schema definition for type validation and patching
348 ///
349 /// # Returns
350 ///
351 /// Returns a [`DB`] instance containing the converted table data.
352 ///
353 /// # Errors
354 ///
355 /// Returns an error if:
356 /// - The table has no definition
357 /// - Field types cannot be determined
358 /// - Data conversion fails (e.g., invalid number format)
359 pub fn to_db(&self, definition: Option<&Definition>) -> Result<DB> {
360 let table = Self::to_table(self, definition)?;
361 Ok(DB::from(table))
362 }
363
364 /// Converts the raw table to RPFM's in-memory table format.
365 ///
366 /// This function performs the main conversion from Assembly Kit's XML representation
367 /// to RPFM's internal table structure, including type conversion and handling of
368 /// missing fields.
369 ///
370 /// # Arguments
371 ///
372 /// * `definition` - Optional RPFM schema definition used for:
373 /// - Type validation and patching (e.g., fixing string types on empty fields)
374 /// - Providing default values for missing fields
375 ///
376 /// # Returns
377 ///
378 /// Returns a [`TableInMemory`] with all data converted to proper types.
379 ///
380 /// # Errors
381 ///
382 /// Returns an error if:
383 /// - The raw table has no definition (returns [`RLibError::RawTableMissingDefinition`])
384 /// - Field data cannot be parsed to the expected type
385 /// - The table structure is invalid
386 ///
387 /// # Type Conversion
388 ///
389 /// String values from XML are converted to typed data:
390 /// - `"true"`, `"1"` → `Boolean(true)`
391 /// - `"123"` → `I32(123)`, `F32(123.0)`, etc.
392 /// - `""` → Appropriate default value for the type
393 ///
394 /// # Missing Field Handling
395 ///
396 /// Some games (Thrones, Attila, Rome 2, Shogun 2) omit empty fields from rows.
397 /// This function inserts default values for any missing fields based on their type.
398 pub fn to_table(&self, definition: Option<&Definition>) -> Result<TableInMemory> {
399 let mut raw_definition = self.definition.as_ref().cloned().ok_or(RLibError::RawTableMissingDefinition)?;
400 let table_name = if let Some(ref raw_definition) = raw_definition.name {
401
402 // Remove the .xml of the name in the most awesome way there is.
403 let mut x = raw_definition.to_owned();
404 x.pop();
405 x.pop();
406 x.pop();
407 x.pop();
408
409 format!("{x}_tables")
410 } else { String::new() };
411
412 // We need to pre-patch some of the raw definition fields to avoid the "0 on empty fields" bug.
413 if let Some(definition) = definition {
414 for field in definition.fields_processed() {
415 if let Some(raw_field) = raw_definition.fields.iter_mut().find(|x| x.name == field.name()) {
416 match field.field_type() {
417 FieldType::StringU8 |
418 FieldType::OptionalStringU8 => {
419 if raw_field.field_type == "integer" {
420 raw_field.field_type = "text".to_owned();
421 }
422 },
423 _ => continue,
424 }
425 }
426 }
427 }
428
429 let mut table = TableInMemory::new(&From::from(&raw_definition), None, &table_name);
430 let mut entries = vec![];
431 for row in &self.rows {
432 let mut entry = vec![];
433
434 // Some games (Thrones, Attila, Rome 2 and Shogun 2) may have missing fields when said field is empty.
435 // To compensate it, if we don't find a field from the definition in the table, we add it empty.
436 for field_def in table.definition().fields() {
437 let mut exists = false;
438 for field in &row.fields {
439 if field_def.name() == field.field_name {
440 exists = true;
441
442 entry.push(match field_def.field_type() {
443 FieldType::Boolean => DecodedData::Boolean(field.field_data == "true" || field.field_data == "1"),
444 FieldType::F32 => DecodedData::F32(field.field_data.parse::<f32>().unwrap_or_default()),
445 FieldType::F64 => DecodedData::F64(field.field_data.parse::<f64>().unwrap_or_default()),
446 FieldType::I16 => DecodedData::I16(field.field_data.parse::<i16>().unwrap_or_default()),
447 FieldType::I32 => DecodedData::I32(field.field_data.parse::<i32>().unwrap_or_default()),
448 FieldType::I64 => DecodedData::I64(field.field_data.parse::<i64>().unwrap_or_default()),
449 FieldType::OptionalI16 => DecodedData::OptionalI16(field.field_data.parse::<i16>().unwrap_or_default()),
450 FieldType::OptionalI32 => DecodedData::OptionalI32(field.field_data.parse::<i32>().unwrap_or_default()),
451 FieldType::OptionalI64 => DecodedData::OptionalI64(field.field_data.parse::<i64>().unwrap_or_default()),
452 FieldType::ColourRGB => DecodedData::ColourRGB(field.field_data.to_string()),
453 FieldType::StringU8 => DecodedData::StringU8(if field.field_data == "Frodo Best Waifu" { String::new() } else { field.field_data.to_string() }),
454 FieldType::StringU16 => DecodedData::StringU16(if field.field_data == "Frodo Best Waifu" { String::new() } else { field.field_data.to_string() }),
455 FieldType::OptionalStringU8 => DecodedData::OptionalStringU8(if field.field_data == "Frodo Best Waifu" { String::new() } else { field.field_data.to_string() }),
456 FieldType::OptionalStringU16 => DecodedData::OptionalStringU16(if field.field_data == "Frodo Best Waifu" { String::new() } else { field.field_data.to_string() }),
457
458 // This type is not used in the raw tables so, if we find it, we skip it.
459 FieldType::SequenceU16(_) | FieldType::SequenceU32(_) => continue,
460 });
461 break;
462 }
463 }
464
465 // If the field doesn't exist, we create it empty.
466 if !exists {
467 entry.push(match field_def.field_type() {
468 FieldType::Boolean => DecodedData::Boolean(false),
469 FieldType::F32 => DecodedData::F32(0.0),
470 FieldType::F64 => DecodedData::F64(0.0),
471 FieldType::I16 => DecodedData::I16(0),
472 FieldType::I32 => DecodedData::I32(0),
473 FieldType::I64 => DecodedData::I64(0),
474 FieldType::OptionalI16 => DecodedData::OptionalI16(0),
475 FieldType::OptionalI32 => DecodedData::OptionalI32(0),
476 FieldType::OptionalI64 => DecodedData::OptionalI64(0),
477 FieldType::ColourRGB => DecodedData::ColourRGB(String::new()),
478 FieldType::StringU8 => DecodedData::StringU8(String::new()),
479 FieldType::StringU16 => DecodedData::StringU16(String::new()),
480 FieldType::OptionalStringU8 => DecodedData::OptionalStringU8(String::new()),
481 FieldType::OptionalStringU16 => DecodedData::OptionalStringU16(String::new()),
482
483 // This type is not used in the raw tables so, if we find it, we skip it.
484 FieldType::SequenceU16(_) | FieldType::SequenceU32(_) => unimplemented!("Does this ever happen?"),
485 });
486 }
487 }
488 entries.push(entry);
489 }
490
491 table.set_data(&entries)?;
492 Ok(table)
493 }
494}