Skip to main content

rpfm_lib/integrations/assembly_kit/
table_data.rs

1//---------------------------------------------------------------------------//
2// Copyright (c) 2017-2026 Ismael Gutiérrez González. All rights reserved.
3//
4// This file is part of the Rusted PackFile Manager (RPFM) project,
5// which can be found here: https://github.com/Frodo45127/rpfm.
6//
7// This file is licensed under the MIT license, which can be found here:
8// https://github.com/Frodo45127/rpfm/blob/master/LICENSE.
9//---------------------------------------------------------------------------//
10
11//! Assembly Kit table data parsing and conversion.
12//!
13//! This module handles the parsing of Assembly Kit sample table data files and their
14//! conversion to RPFM's internal table format. These files contain actual row data
15//! that can be used for testing, lookup generation, and schema validation.
16//!
17//! # Overview
18//!
19//! Assembly Kit provides not only table structure definitions (see the `table_definition` module)
20//! but also sample data files containing actual table rows. These XML files are useful for:
21//!
22//! - **Schema validation**: Verifying field types match actual data
23//! - **Lookup generation**: Extracting hardcoded enum/lookup values from descriptions
24//! - **Testing**: Ensuring RPFM can correctly parse real game data
25//! - **Reference**: Understanding what values appear in specific fields
26//!
27//! # File Format
28//!
29//! Table data files are XML files with the same name as their corresponding definition
30//! files (without the `TWaD_` prefix). For example:
31//! - Definition: `TWaD_units_tables.xml`
32//! - Data: `units_tables.xml`
33//!
34//! Each file contains rows of data in XML format:
35//! ```xml
36//! <dataroot>
37//!   <units_tables>
38//!     <key>unit_1</key>
39//!     <category>infantry</category>
40//!     <is_naval>false</is_naval>
41//!   </units_tables>
42//!   <units_tables>
43//!     <key>unit_2</key>
44//!     <category>cavalry</category>
45//!     <is_naval>false</is_naval>
46//!   </units_tables>
47//! </dataroot>
48//! ```
49//!
50//! # Main Types
51//!
52//! - [`RawTable`]: Complete table with definition and all row data
53//! - [`RawTableRow`]: Single row of data
54//! - [`RawTableField`]: Individual field value within a row
55//!
56//! # Functionality
57//!
58//! The primary operations are:
59//!
60//! 1. **Batch Reading**: [`RawTable::read_all()`] reads all table data files from a directory
61//! 2. **Individual Reading**: [`RawTable::read()`] parses a single table data file
62//! 3. **Conversion to DB**: [`RawTable::to_db()`] converts to RPFM's [`DB`] format
63//! 4. **Conversion to Table**: [`RawTable::to_table()`] converts to in-memory table format
64//!
65//! # Workarounds and Special Handling
66//!
67//! ## Missing Fields
68//!
69//! Some games (Thrones, Attila, Rome 2, Shogun 2) omit fields from rows when the field
70//! value is empty. RPFM handles this by inserting default values for missing fields.
71//!
72//! ## Empty Field Markers
73//!
74//! Due to XML parser limitations, empty fields are temporarily filled with placeholder
75//! text (`"Frodo Best Waifu"`) which is removed after parsing.
76//!
77//! ## Field Renaming
78//!
79//! The XML parser requires uniform field names, so table-specific field names are
80//! replaced with generic `<datafield>` tags before parsing, with the original name
81//! stored as an attribute.
82
83use rayon::prelude::*;
84use regex::Regex;
85use serde_derive::Deserialize;
86use serde_xml_rs::from_reader;
87
88use std::fs::File;
89use std::io::{BufReader, Read};
90use std::path::Path;
91
92use crate::error::{Result, RLibError};
93use crate::files::{db::DB, table::{DecodedData, local::TableInMemory, Table}};
94use crate::schema::{Definition, FieldType};
95
96use super::table_definition::RawDefinition;
97
98//---------------------------------------------------------------------------//
99// Types for parsing the Assembly Kit DB Files into.
100//---------------------------------------------------------------------------//
101
102/// Complete table data parsed from Assembly Kit XML files.
103///
104/// This represents an entire table including its structure definition and all row data.
105/// Corresponds to a `.xml` data file in the Assembly Kit (e.g., `units_tables.xml`).
106///
107/// # Structure
108///
109/// The table contains:
110/// - An optional definition (field structure) - typically populated during parsing
111/// - All rows of data from the XML file
112///
113/// # Usage
114///
115/// After parsing with [`RawTable::read()`] or [`RawTable::read_all()`], the table
116/// can be converted to RPFM's internal formats:
117/// - [`RawTable::to_db()`] - Convert to DB format for saving as a PackFile table
118/// - [`RawTable::to_table()`] - Convert to in-memory table for manipulation
119#[derive(Debug, Default, Deserialize)]
120#[serde(rename = "dataroot")]
121pub struct RawTable {
122    /// Table structure definition (fields, types, relationships).
123    ///
124    /// This is populated by combining the parsed data structure with the
125    /// corresponding `TWaD_` definition file.
126    pub definition: Option<RawDefinition>,
127
128    /// All rows of data in the table.
129    pub rows: Vec<RawTableRow>,
130}
131
132/// Single row of data from an Assembly Kit table.
133///
134/// Each row contains a collection of field values. In the XML, this corresponds
135/// to one `<tablename>` element containing multiple field elements.
136#[derive(Debug, Default, Deserialize)]
137#[serde(rename = "datarow")]
138pub struct RawTableRow {
139
140    /// All field values in this row.
141    #[serde(rename = "datafield")]
142    pub fields: Vec<RawTableField>,
143}
144
145/// Individual field value within a table row.
146///
147/// This is the raw equivalent to RPFM's [`DecodedData`]. Each field has a name,
148/// a string value, and optionally a "state" flag marking localisable fields.
149///
150/// # XML Representation
151///
152/// In the original Assembly Kit XML, fields appear as:
153/// ```xml
154/// <field_name>value</field_name>
155/// <other_field some_attribute="...">value with attributes</other_field>
156/// ```
157///
158/// During parsing, these are normalized to:
159/// ```xml
160/// <datafield field_name="field_name">value</datafield>
161/// <datafield field_name="other_field" state="1">value with attributes</datafield>
162/// ```
163///
164/// # State Attribute for Localisable Fields
165///
166/// The `state` attribute is set to `"1"` when the original XML field tag had any
167/// attributes. In Assembly Kit files, fields with attributes are localisable fields
168/// (fields containing translatable text). These fields are filtered out when extracting
169/// non-localisable field definitions, ensuring that regular data fields and translation
170/// fields are processed separately.
171#[derive(Debug, Default, Deserialize)]
172#[serde(rename = "datafield")]
173pub struct RawTableField {
174    /// Name of the field (column name).
175    #[serde(rename = "@field_name")]
176    pub field_name: String,
177
178    /// String representation of the field value.
179    ///
180    /// All values are stored as strings in XML and must be parsed to their
181    /// actual types during conversion.
182    #[serde(rename = "#text")]
183    pub field_data: String,
184
185    /// State flag marking localisable (translatable) fields.
186    ///
187    /// Set to `"1"` when the original Assembly Kit XML field tag had any attributes,
188    /// which indicates the field is localisable (contains translatable text).
189    /// Such fields are filtered out during non-localisable field extraction to ensure
190    /// translation fields are handled separately from regular data fields.
191    #[serde(rename = "@state")]
192    pub state: Option<String>,
193}
194
195//---------------------------------------------------------------------------//
196// Implementations
197//---------------------------------------------------------------------------//
198
199/// Implementation of `RawTable`.
200impl RawTable {
201
202    /// Reads all table data files from an Assembly Kit directory.
203    ///
204    /// This function scans the directory for table data XML files and parses them
205    /// into [`RawTable`] instances. It first reads all table definitions, then
206    /// reads the corresponding data files.
207    ///
208    /// # Arguments
209    ///
210    /// * `raw_tables_folder` - Directory containing both definition and data files
211    /// * `version` - Assembly Kit version (0-2)
212    /// * `tables_to_skip` - Table names to exclude from parsing
213    ///
214    /// # Returns
215    ///
216    /// Returns a vector of successfully parsed tables. Tables that fail to parse
217    /// or are in the skip list are excluded.
218    ///
219    /// # Errors
220    ///
221    /// Returns an error if:
222    /// - The version is unsupported (not 0, 1, or 2)
223    /// - The directory cannot be read
224    /// - Definition files cannot be parsed
225    ///
226    /// # Note
227    ///
228    /// Individual table data files that fail to parse are silently skipped rather
229    /// than causing the entire operation to fail.
230    pub fn read_all(raw_tables_folder: &Path, version: i16, tables_to_skip: &[&str]) -> Result<Vec<Self>> {
231
232        // First, we try to read all `RawDefinitions` from the same folder.
233        let definitions = RawDefinition::read_all(raw_tables_folder, version, tables_to_skip)?;
234
235        // Then, depending on the version, we have to use one logic or another.
236        match version {
237
238            // Version 2 is Rome 2+. Version 1 is Shogun 2. Almost the same format, but we have to
239            // provide a different path for Shogun 2, so it has his own version.
240            // Version 0 is Napoleon and Empire. These two don't have an assembly kit, but CA released years ago their table files.
241            0..=2 => Ok(definitions.par_iter().filter_map(|definition| Self::read(definition, raw_tables_folder, version).ok()).collect()),
242            _ => Err(RLibError::AssemblyKitUnsupportedVersion(version))
243        }
244    }
245
246    /// Parses a single Assembly Kit table data file.
247    ///
248    /// Reads the XML data file corresponding to the provided definition and parses
249    /// it into a [`RawTable`]. The data file must have the same name as the definition
250    /// (without the `TWaD_` prefix).
251    ///
252    /// # Arguments
253    ///
254    /// * `raw_definition` - Table structure definition
255    /// * `raw_table_data_folder` - Directory containing the data XML files
256    /// * `version` - Assembly Kit version (0-2)
257    ///
258    /// # Returns
259    ///
260    /// Returns a [`RawTable`] with the definition and all parsed row data.
261    ///
262    /// # Errors
263    ///
264    /// Returns an error if:
265    /// - The version is unsupported (not 0, 1, or 2)
266    /// - The data file cannot be opened
267    /// - The XML is malformed
268    /// - The table is `translated_texts.xml` (returns [`RLibError::AssemblyKitTableTableIgnored`])
269    ///
270    /// # Special Cases
271    ///
272    /// ## translated_texts.xml
273    ///
274    /// This file (present in Rome 2, Attila, Thrones) is ~400MB and not needed for
275    /// schema processing, so it's explicitly ignored.
276    ///
277    /// ## XML Preprocessing
278    ///
279    /// Before parsing, the XML undergoes several transformations to work around
280    /// `serde_xml_rs` limitations:
281    /// 1. Table-specific row tags are renamed to generic `<rows>`
282    /// 2. Field tags are renamed to `<datafield>` with the name as an attribute
283    /// 3. Empty fields are filled with placeholder text (removed after parsing)
284    pub fn read(raw_definition: &RawDefinition, raw_table_data_folder: &Path, version: i16) -> Result<Self> {
285        match version {
286            0..=2 => {
287                let name_no_xml = raw_definition.name.as_ref().unwrap().split_at(raw_definition.name.as_ref().unwrap().len() - 4).0;
288
289                // This file is present in Rome 2, Attila and Thrones. It's almost 400mb. And we don't need it.
290                if raw_definition.name.as_ref().unwrap() == "translated_texts.xml" {
291                    return Err(RLibError::AssemblyKitTableTableIgnored)
292                }
293
294                let raw_table_data_path = raw_table_data_folder.join(raw_definition.name.as_ref().unwrap());
295                let mut raw_table_data_file = BufReader::new(File::open(raw_table_data_path)?);
296
297                // Before deserializing the data, due to limitations of serde_xml_rs, we have to rename all rows, because unique names for
298                // rows in each file is not supported for deserializing. Same for the fields, we have to change them to something more generic.
299                let mut buffer = String::new();
300                raw_table_data_file.read_to_string(&mut buffer)?;
301                buffer = buffer.replace(&format!("<{name_no_xml} record_uuid"), "<rows record_uuid");
302                buffer = buffer.replace(&format!("<{name_no_xml}>"), "<rows>");
303                buffer = buffer.replace(&format!("</{name_no_xml}>"), "</rows>");
304                for field in &raw_definition.fields {
305                    let field_name_regex = Regex::new(&format!("\n<{}>", field.name)).unwrap();
306                    let field_name_regex2 = Regex::new(&format!("\n<{} .+?\">", field.name)).unwrap();
307                    buffer = field_name_regex.replace_all(&buffer, &*format!("\n<datafield field_name=\"{}\">", field.name)).to_string();
308                    buffer = field_name_regex2.replace_all(&buffer, &*format!("\n<datafield field_name=\"{}\" state=\"1\">", field.name)).to_string();
309                    buffer = buffer.replace(&format!("</{}>", field.name), "</datafield>");
310                }
311
312                // Serde shits itself if it sees an empty field, so we have to work around that.
313                buffer = buffer.replace("\"></datafield>", "\">Frodo Best Waifu</datafield>");
314                buffer = buffer.replace("\"> </datafield>", "\"> Frodo Best Waifu</datafield>");
315                buffer = buffer.replace("\">  </datafield>", "\">  Frodo Best Waifu</datafield>");
316                buffer = buffer.replace("\">   </datafield>", "\">   Frodo Best Waifu</datafield>");
317                buffer = buffer.replace("\">    </datafield>", "\">    Frodo Best Waifu</datafield>");
318
319                // Only if the table has data we deserialize it. If not, we just create an empty one.
320                let mut raw_table = if buffer.contains("</rows>\r\n</dataroot>") || buffer.contains("</rows>\n</dataroot>") {
321                    from_reader(buffer.as_bytes())?
322                } else {
323                    Self::default()
324                };
325
326                // Remove the best waifus, because they end up appearing in lookups!!!
327                for row in &mut raw_table.rows {
328                    for field in &mut row.fields {
329                        field.field_data = field.field_data.replace("Frodo Best Waifu", "").trim().to_owned();
330                    }
331                }
332
333                raw_table.definition = Some(raw_definition.clone());
334                Ok(raw_table)
335            }
336            _ => Err(RLibError::AssemblyKitUnsupportedVersion(version))
337        }
338    }
339
340    /// Converts the raw table to RPFM's DB format.
341    ///
342    /// This is a convenience wrapper around [`RawTable::to_table()`] that converts
343    /// the result to a [`DB`] struct suitable for saving as a PackFile table.
344    ///
345    /// # Arguments
346    ///
347    /// * `definition` - Optional RPFM schema definition for type validation and patching
348    ///
349    /// # Returns
350    ///
351    /// Returns a [`DB`] instance containing the converted table data.
352    ///
353    /// # Errors
354    ///
355    /// Returns an error if:
356    /// - The table has no definition
357    /// - Field types cannot be determined
358    /// - Data conversion fails (e.g., invalid number format)
359    pub fn to_db(&self, definition: Option<&Definition>) -> Result<DB> {
360        let table = Self::to_table(self, definition)?;
361        Ok(DB::from(table))
362    }
363
364    /// Converts the raw table to RPFM's in-memory table format.
365    ///
366    /// This function performs the main conversion from Assembly Kit's XML representation
367    /// to RPFM's internal table structure, including type conversion and handling of
368    /// missing fields.
369    ///
370    /// # Arguments
371    ///
372    /// * `definition` - Optional RPFM schema definition used for:
373    ///   - Type validation and patching (e.g., fixing string types on empty fields)
374    ///   - Providing default values for missing fields
375    ///
376    /// # Returns
377    ///
378    /// Returns a [`TableInMemory`] with all data converted to proper types.
379    ///
380    /// # Errors
381    ///
382    /// Returns an error if:
383    /// - The raw table has no definition (returns [`RLibError::RawTableMissingDefinition`])
384    /// - Field data cannot be parsed to the expected type
385    /// - The table structure is invalid
386    ///
387    /// # Type Conversion
388    ///
389    /// String values from XML are converted to typed data:
390    /// - `"true"`, `"1"` → `Boolean(true)`
391    /// - `"123"` → `I32(123)`, `F32(123.0)`, etc.
392    /// - `""` → Appropriate default value for the type
393    ///
394    /// # Missing Field Handling
395    ///
396    /// Some games (Thrones, Attila, Rome 2, Shogun 2) omit empty fields from rows.
397    /// This function inserts default values for any missing fields based on their type.
398    pub fn to_table(&self, definition: Option<&Definition>) -> Result<TableInMemory> {
399        let mut raw_definition = self.definition.as_ref().cloned().ok_or(RLibError::RawTableMissingDefinition)?;
400        let table_name = if let Some(ref raw_definition) = raw_definition.name {
401
402            // Remove the .xml of the name in the most awesome way there is.
403            let mut x = raw_definition.to_owned();
404            x.pop();
405            x.pop();
406            x.pop();
407            x.pop();
408
409            format!("{x}_tables")
410        } else { String::new() };
411
412        // We need to pre-patch some of the raw definition fields to avoid the "0 on empty fields" bug.
413        if let Some(definition) = definition {
414            for field in definition.fields_processed() {
415                if let Some(raw_field) = raw_definition.fields.iter_mut().find(|x| x.name == field.name()) {
416                    match field.field_type() {
417                        FieldType::StringU8 |
418                        FieldType::OptionalStringU8 => {
419                            if raw_field.field_type == "integer" {
420                                raw_field.field_type = "text".to_owned();
421                            }
422                        },
423                        _ => continue,
424                    }
425                }
426            }
427        }
428
429        let mut table = TableInMemory::new(&From::from(&raw_definition), None, &table_name);
430        let mut entries = vec![];
431        for row in &self.rows {
432            let mut entry = vec![];
433
434            // Some games (Thrones, Attila, Rome 2 and Shogun 2) may have missing fields when said field is empty.
435            // To compensate it, if we don't find a field from the definition in the table, we add it empty.
436            for field_def in table.definition().fields() {
437                let mut exists = false;
438                for field in &row.fields {
439                    if field_def.name() == field.field_name {
440                        exists = true;
441
442                        entry.push(match field_def.field_type() {
443                            FieldType::Boolean => DecodedData::Boolean(field.field_data == "true" || field.field_data == "1"),
444                            FieldType::F32 => DecodedData::F32(field.field_data.parse::<f32>().unwrap_or_default()),
445                            FieldType::F64 => DecodedData::F64(field.field_data.parse::<f64>().unwrap_or_default()),
446                            FieldType::I16 => DecodedData::I16(field.field_data.parse::<i16>().unwrap_or_default()),
447                            FieldType::I32 => DecodedData::I32(field.field_data.parse::<i32>().unwrap_or_default()),
448                            FieldType::I64 => DecodedData::I64(field.field_data.parse::<i64>().unwrap_or_default()),
449                            FieldType::OptionalI16 => DecodedData::OptionalI16(field.field_data.parse::<i16>().unwrap_or_default()),
450                            FieldType::OptionalI32 => DecodedData::OptionalI32(field.field_data.parse::<i32>().unwrap_or_default()),
451                            FieldType::OptionalI64 => DecodedData::OptionalI64(field.field_data.parse::<i64>().unwrap_or_default()),
452                            FieldType::ColourRGB => DecodedData::ColourRGB(field.field_data.to_string()),
453                            FieldType::StringU8 => DecodedData::StringU8(if field.field_data == "Frodo Best Waifu" { String::new() } else { field.field_data.to_string() }),
454                            FieldType::StringU16 => DecodedData::StringU16(if field.field_data == "Frodo Best Waifu" { String::new() } else { field.field_data.to_string() }),
455                            FieldType::OptionalStringU8 => DecodedData::OptionalStringU8(if field.field_data == "Frodo Best Waifu" { String::new() } else { field.field_data.to_string() }),
456                            FieldType::OptionalStringU16 => DecodedData::OptionalStringU16(if field.field_data == "Frodo Best Waifu" { String::new() } else { field.field_data.to_string() }),
457
458                            // This type is not used in the raw tables so, if we find it, we skip it.
459                            FieldType::SequenceU16(_) | FieldType::SequenceU32(_) => continue,
460                        });
461                        break;
462                    }
463                }
464
465                // If the field doesn't exist, we create it empty.
466                if !exists {
467                    entry.push(match field_def.field_type() {
468                        FieldType::Boolean => DecodedData::Boolean(false),
469                        FieldType::F32 => DecodedData::F32(0.0),
470                        FieldType::F64 => DecodedData::F64(0.0),
471                        FieldType::I16 => DecodedData::I16(0),
472                        FieldType::I32 => DecodedData::I32(0),
473                        FieldType::I64 => DecodedData::I64(0),
474                        FieldType::OptionalI16 => DecodedData::OptionalI16(0),
475                        FieldType::OptionalI32 => DecodedData::OptionalI32(0),
476                        FieldType::OptionalI64 => DecodedData::OptionalI64(0),
477                        FieldType::ColourRGB => DecodedData::ColourRGB(String::new()),
478                        FieldType::StringU8 => DecodedData::StringU8(String::new()),
479                        FieldType::StringU16 => DecodedData::StringU16(String::new()),
480                        FieldType::OptionalStringU8 => DecodedData::OptionalStringU8(String::new()),
481                        FieldType::OptionalStringU16 => DecodedData::OptionalStringU16(String::new()),
482
483                        // This type is not used in the raw tables so, if we find it, we skip it.
484                        FieldType::SequenceU16(_) | FieldType::SequenceU32(_) => unimplemented!("Does this ever happen?"),
485                    });
486                }
487            }
488            entries.push(entry);
489        }
490
491        table.set_data(&entries)?;
492        Ok(table)
493    }
494}