Skip to main content

rpfm_lib/integrations/assembly_kit/
table_definition.rs

1//---------------------------------------------------------------------------//
2// Copyright (c) 2017-2026 Ismael Gutiérrez González. All rights reserved.
3//
4// This file is part of the Rusted PackFile Manager (RPFM) project,
5// which can be found here: https://github.com/Frodo45127/rpfm.
6//
7// This file is licensed under the MIT license, which can be found here:
8// https://github.com/Frodo45127/rpfm/blob/master/LICENSE.
9//---------------------------------------------------------------------------//
10
11//! Assembly Kit table definition parsing and schema generation.
12//!
13//! This module handles the parsing of Assembly Kit schema files (table structure definitions)
14//! and their conversion to RPFM's internal schema format. It supports three different Assembly
15//! Kit versions used across Total War games.
16//!
17//! # Assembly Kit Schema Formats
18//!
19//! Different Total War games use different schema file formats:
20//!
21//! - **Version 0** (Empire, Napoleon): `.xsd` XML schema files with basic type and constraint information
22//! - **Version 1** (Shogun 2): `TWaD_*.xml` files with enhanced metadata
23//! - **Version 2** (Rome 2+): `TWaD_*.xml` files with full relationship data and field descriptions
24//!
25//! # Main Types
26//!
27//! ## Version 1 & 2 Formats
28//!
29//! - [`RawDefinition`]: Represents a complete table definition with all fields
30//! - [`RawField`]: Individual field definition with type, constraints, and relationship info
31//! - [`RawRelationshipsTable`]: Foreign key relationships between tables
32//! - [`RawRelationship`]: Single foreign key relationship
33//!
34//! ## Version 0 Format (Legacy)
35//!
36//! - [`RawDefinitionV0`]: XSD schema root structure
37//! - [`Element`]: XSD element with type and constraint information
38//! - [`Index`]: Database index definition (used to derive relationships)
39//!
40//! # Functionality
41//!
42//! The main operations this module provides:
43//!
44//! 1. **Batch Reading**: [`RawDefinition::read_all()`] reads all table definitions from a directory
45//! 2. **Individual Reading**: [`RawDefinition::read()`] parses a single definition file
46//! 3. **Field Filtering**: [`RawDefinition::get_non_localisable_fields()`] separates translatable fields
47//! 4. **Schema Conversion**: `From<&RawDefinition>` for [`Definition`] converts to RPFM format
48//!
49//! # Version 0 Processing
50//!
51//! Version 0 (Empire/Napoleon) uses a two-pass approach:
52//! 1. First pass: Parse XSD files and extract basic field information and primary keys
53//! 2. Second pass: Analyze index definitions to derive foreign key relationships
54//!
55//! This is necessary because Version 0 uses database-style indexes rather than explicit
56//! foreign key declarations.
57//!
58//! # Type Mapping
59//!
60//! Assembly Kit types are mapped to RPFM field types:
61//! - `yesno` → `Boolean`
62//! - `single` → `F32`, `double` → `F64`
63//! - `integer` → `I32`, `autonumber`/`card64` → `I64`
64//! - `colour` → `ColourRGB`
65//! - `text`/`expression` → `StringU8`/`StringU16` (or optional variants)
66
67use itertools::Itertools;
68use rayon::prelude::*;
69use serde_derive::Deserialize;
70use serde_xml_rs::from_reader;
71
72use std::collections::BTreeMap;
73use std::fs::File;
74use std::io::{BufReader, Read};
75use std::path::Path;
76
77use crate::error::{Result, RLibError};
78
79use super::*;
80use super::get_raw_definition_paths;
81use super::localisable_fields::RawLocalisableField;
82use super::table_data::RawTableRow;
83
84//---------------------------------------------------------------------------//
85// Types for parsing the Assembly Kit Schema Files into.
86//---------------------------------------------------------------------------//
87
88/// Raw table definition parsed from Assembly Kit schema files.
89///
90/// This is the raw equivalent to RPFM's [`Definition`] struct. In Assembly Kit files,
91/// this corresponds to a `TWaD_*.xml` file (versions 1-2) or `.xsd` file (version 0).
92///
93/// # Fields
94///
95/// * `name` - Table name with `.xml` extension (e.g., `"units_tables.xml"`)
96/// * `fields` - All field definitions for this table
97///
98/// # Example Structure
99///
100/// A `TWaD_units_tables.xml` file contains field definitions like:
101/// ```xml
102/// <root>
103///   <field primary_key="1" name="key" field_type="text" required="1"/>
104///   <field primary_key="0" name="category" field_type="text" required="0"
105///          column_source_table="unit_categories_tables"
106///          column_source_column="key"/>
107/// </root>
108/// ```
109#[derive(Clone, Debug, Default, Deserialize)]
110#[serde(rename = "root")]
111pub struct RawDefinition {
112
113    /// Table name with `.xml` extension (e.g., `"units_tables.xml"`) and without the 'TWaD_' prefix.
114    pub name: Option<String>,
115
116    /// All the field definitions within this table definition.
117    #[serde(rename = "field")]
118    pub fields: Vec<RawField>,
119}
120
121/// Individual field definition from Assembly Kit schema.
122///
123/// This is the raw equivalent to RPFM's [`Field`] struct, containing all metadata
124/// about a single table column.
125///
126/// # Type Information
127///
128/// Assembly Kit uses string-based type names:
129/// - `"yesno"` - Boolean value
130/// - `"single"`, `"double"` - Floating point numbers
131/// - `"integer"` - 32-bit integer
132/// - `"autonumber"`, `"card64"` - 64-bit integer (often auto-incrementing)
133/// - `"text"`, `"expression"` - String data
134/// - `"colour"` - RGB color value
135///
136/// # Foreign Key Relationships
137///
138/// Relationships are defined via `column_source_table` and `column_source_column`:
139/// - First element in `column_source_column` is the referenced primary key
140/// - Additional elements (if present) are lookup columns for concatenated display
141#[derive(Clone, Debug, Default, Deserialize)]
142#[serde(rename = "field")]
143pub struct RawField {
144
145    /// Primary key flag (`"1"` = true, `"0"` = false).
146    pub primary_key: String,
147
148    /// Field name (column name in the table).
149    pub name: String,
150
151    /// Assembly Kit type name (see struct documentation for type mapping).
152    pub field_type: String,
153
154    /// Required field flag (`"1"` = required, `"0"` = optional).
155    pub required: String,
156
157    /// Default value for this field when creating new rows.
158    pub default_value: Option<String>,
159
160    /// Maximum allowed string length for text fields.
161    pub max_length: Option<String>,
162
163    /// Filename flag - indicates this field contains a game file path.
164    pub is_filename: Option<String>,
165
166    /// Relative path where referenced files should be located.
167    ///
168    /// Multiple paths can be specified, separated by semicolons.
169    pub filename_relative_path: Option<String>,
170
171    /// Fragment path (internal use, not useful for modders).
172    pub fragment_path: Option<String>,
173
174    /// Referenced column names for foreign key relationships.
175    ///
176    /// First element is the referenced primary key column.
177    /// Additional elements are lookup columns for composite display.
178    pub column_source_column: Option<Vec<String>>,
179
180    /// Referenced table name for foreign key relationships.
181    pub column_source_table: Option<String>,
182
183    /// Human-readable description of the field's purpose.
184    pub field_description: Option<String>,
185
186    /// Encyclopaedia export flag (`"1"` = export, `"0"` = don't export).
187    ///
188    /// Indicates if this field should be included in game encyclopaedia exports.
189    pub encyclopaedia_export: Option<String>,
190
191    /// Highlight color flag for marking unused/deprecated fields.
192    ///
193    /// `"#c8c8c8"` (gray) indicates an unused field in Warhammer 3.
194    pub highlight_flag: Option<String>,
195
196    /// Custom flag for old game (Empire/Napoleon/Shogun 2) type handling.
197    ///
198    /// When true, uses UTF-16 strings instead of UTF-8.
199    pub is_old_game: Option<bool>,
200}
201
202/// Version 0 (Empire/Napoleon) XSD schema root structure.
203///
204/// Empire and Napoleon use `.xsd` XML Schema Definition files instead of
205/// the `TWaD_` format used in later games. This struct represents the root
206/// of such a schema file.
207#[derive(Clone, Debug, Default, Deserialize)]
208#[serde(rename = "xsd_schema")]
209pub struct RawDefinitionV0 {
210    /// XSD elements defining the table structure.
211    pub xsd_element: Vec<Element>,
212}
213
214/// Represents an XSD element definition from Assembly Kit v0 schema files.
215///
216/// Elements are the core building blocks of XSD schemas, representing individual
217/// fields in database tables. Each element can have type constraints (via `SimpleType`),
218/// nested structures (via `ComplexType`), and metadata annotations.
219///
220/// # Field Mapping
221///
222/// - `name`: Column name in the database table
223/// - `jet_type`: Microsoft Jet database type (e.g., "Text", "Long", "Boolean")
224/// - `min_occurs`: Minimum occurrences (0 = optional, 1 = required)
225/// - `xsd_annotation`: Contains metadata like index definitions
226/// - `xsd_simple_type`: Type constraints (e.g., string max length)
227/// - `xsd_complex_type`: Nested element sequences for complex types
228#[derive(Clone, Debug, Default, Deserialize)]
229#[serde(rename = "xsd_element")]
230pub struct Element {
231    /// The name of this element (field/column name).
232    #[serde(rename = "@name")]
233    pub name: Option<String>,
234
235    /// Microsoft Jet database type identifier.
236    ///
237    /// Common values: "Text" (string), "Long" (i32), "Boolean", "Single" (f32), "Double" (f64).
238    #[serde(rename = "@od_jetType")]
239    pub jet_type: Option<String>,
240
241    /// Minimum number of occurrences for this element.
242    ///
243    /// - `0`: Field is optional
244    /// - `1` or higher: Field is required
245    #[serde(rename = "@minOccurs")]
246    pub min_occurs: Option<i32>,
247
248    /// Annotation containing metadata like index definitions.
249    #[serde(rename = "xsd_annotation")]
250    pub xsd_annotation: Option<Annotation>,
251
252    /// Simple type definition with constraints (e.g., max string length).
253    #[serde(rename = "xsd_simpleType")]
254    pub xsd_simple_type: Option<Vec<SimpleType>>,
255
256    /// Complex type definition for nested element sequences.
257    #[serde(rename = "xsd_complexType")]
258    pub xsd_complex_type: Option<Vec<ComplexType>>,
259}
260
261/// Defines a simple type with restrictions in XSD schemas.
262///
263/// Simple types are used to apply constraints to basic data types, such as
264/// limiting the maximum length of a string field.
265#[derive(Clone, Debug, Default, Deserialize)]
266#[serde(rename = "xsd_simpleType")]
267pub struct SimpleType {
268    /// The restriction applied to this simple type (e.g., max length).
269    pub xsd_restriction: Option<Restriction>,
270}
271
272/// Defines a complex type containing nested element sequences.
273///
274/// Complex types are used when a field contains multiple sub-elements organized
275/// in a specific order. In Assembly Kit schemas, these are typically used for
276/// nested table structures, though most tables use simple flat structures.
277#[derive(Clone, Debug, Default, Deserialize)]
278#[serde(rename = "xsd_complexType")]
279pub struct ComplexType {
280    /// The ordered sequence of elements within this complex type.
281    #[serde(rename = "xsd_sequence")]
282    pub xsd_sequence: Sequence,
283}
284
285/// Represents an ordered sequence of XSD elements.
286///
287/// Sequences define the order in which child elements must appear within
288/// a complex type. Each element in the sequence can itself be a simple or
289/// complex type.
290#[derive(Clone, Debug, Default, Deserialize)]
291#[serde(rename = "xsd_sequence")]
292pub struct Sequence {
293    /// The ordered list of elements in this sequence.
294    pub xsd_element: Vec<Element>,
295}
296
297/// Defines restrictions/constraints on an XSD simple type.
298///
299/// Restrictions are used to constrain the values of a simple type, such as
300/// limiting the maximum length of a string. The `base` field specifies which
301/// base type the restriction applies to.
302#[derive(Clone, Debug, Default, Deserialize)]
303#[serde(rename = "xsd_restriction")]
304pub struct Restriction {
305    /// The base XSD type being restricted (e.g., "xsd:string", "xsd:int").
306    #[serde(rename = "@base")]
307    pub base: String,
308
309    /// Maximum length constraint for string types.
310    #[serde(rename = "xsd_maxLength")]
311    pub max_lenght: Option<MaxLength>
312}
313
314/// Specifies the maximum length constraint for a string field.
315///
316/// This constraint limits how many characters a string field can contain.
317/// Used in XSD restrictions to define database column size limits.
318#[derive(Clone, Debug, Default, Deserialize)]
319#[serde(rename = "xsd_maxLength")]
320pub struct MaxLength {
321    /// The maximum number of characters allowed.
322    #[serde(rename = "@value")]
323    pub value: i32
324}
325
326/// Contains annotation metadata for XSD elements.
327///
328/// Annotations provide additional information about schema elements that isn't
329/// part of the core validation rules. In Assembly Kit schemas, annotations are
330/// primarily used to store database index definitions via the `AppInfo` structure.
331#[derive(Clone, Debug, Default, Deserialize)]
332#[serde(rename = "xsd_annotation")]
333pub struct Annotation {
334    /// Application-specific information, containing index definitions.
335    #[serde(rename = "xsd_appinfo")]
336    pub xsd_appinfo: Option<AppInfo>
337}
338
339/// Contains application-specific information within XSD annotations.
340///
341/// This structure holds database-specific metadata that extends the base XSD schema.
342/// In Assembly Kit schemas, it primarily contains index definitions that describe
343/// primary keys, foreign keys, and unique constraints on table columns.
344#[derive(Clone, Debug, Default, Deserialize)]
345#[serde(rename = "xsd_appinfo")]
346pub struct AppInfo {
347    /// List of database index definitions for this element.
348    #[serde(rename = "od_index")]
349    pub od_index: Option<Vec<Index>>
350}
351
352/// Defines a database index on a table column.
353///
354/// Indexes are used to derive foreign key relationships in Assembly Kit v0 schemas.
355/// Since v0 schemas don't explicitly define relationships between tables, RPFM
356/// infers them by matching index names across tables.
357///
358/// # Relationship Inference
359///
360/// When an index name appears in multiple tables, RPFM creates a foreign key
361/// relationship between them. For example:
362///
363/// - Table A has index "fk_building" on column "building_key"
364/// - Table B has index "fk_building" on column "key"
365/// - RPFM infers: A.building_key → B.key
366///
367/// # Boolean String Fields
368///
369/// The `primary`, `unique`, and `clustered` fields use string values "true"/"false"
370/// instead of booleans due to the XSD format.
371#[derive(Clone, Debug, Default, Deserialize)]
372#[serde(rename = "od_index")]
373pub struct Index {
374    /// The name of this index.
375    ///
376    /// Index names are used to match relationships across tables. Identical names
377    /// in different tables indicate a foreign key relationship.
378    #[serde(rename = "@index-name")]
379    pub name: String,
380
381    /// The column(s) this index applies to.
382    ///
383    /// Multiple columns are separated by semicolons (e.g., "col1;col2").
384    #[serde(rename = "@index-key")]
385    pub key: String,
386
387    /// Whether this is a primary key index ("true"/"false").
388    #[serde(rename = "@primary")]
389    pub primary: String,
390
391    /// Whether this index enforces uniqueness ("true"/"false").
392    #[serde(rename = "@unique")]
393    pub unique: String,
394
395    /// Whether this is a clustered index ("true"/"false").
396    #[serde(rename = "@clustered")]
397    pub clustered: String,
398}
399
400/// Foreign key relationships table from Assembly Kit.
401///
402/// This corresponds to the `TWaD_relationships.xml` file found in Version 2
403/// Assembly Kits (Rome 2+). It defines all foreign key relationships between tables.
404#[derive(Clone, Debug, Default, Deserialize)]
405#[serde(rename = "root")]
406pub struct RawRelationshipsTable {
407    /// Table name (should be "relationships").
408    pub name: Option<String>,
409
410    /// All foreign key relationships defined in the Assembly Kit.
411    #[serde(rename = "relationship")]
412    pub relationships: Vec<RawRelationship>,
413}
414
415/// Single foreign key relationship definition.
416///
417/// Defines a foreign key constraint from one table's column to another table's column.
418///
419/// # Example
420///
421/// A relationship from `units_tables.category` to `unit_categories_tables.key`:
422/// ```xml
423/// <relationship>
424///   <table_name>units_tables</table_name>
425///   <column_name>category</column_name>
426///   <foreign_table_name>unit_categories_tables</foreign_table_name>
427///   <foreign_column_name>key</foreign_column_name>
428/// </relationship>
429/// ```
430#[derive(Clone, Debug, Default, Deserialize)]
431pub struct RawRelationship {
432    /// Source table name containing the foreign key column.
433    pub table_name: String,
434
435    /// Source column name (the foreign key field).
436    pub column_name: String,
437
438    /// Referenced table name.
439    pub foreign_table_name: String,
440
441    /// Referenced column name (typically a primary key).
442    pub foreign_column_name: String
443}
444
445//---------------------------------------------------------------------------//
446// Implementations
447//---------------------------------------------------------------------------//
448
449/// Implementation of `RawDefinition`.
450impl RawDefinition {
451
452    /// Reads all table definitions from an Assembly Kit directory.
453    ///
454    /// This function scans the provided directory for Assembly Kit definition files
455    /// and parses them into [`RawDefinition`] structs. The parsing logic varies
456    /// significantly by version.
457    ///
458    /// # Version-Specific Behavior
459    ///
460    /// ## Version 1 & 2 (Shogun 2, Rome 2+)
461    /// - Reads `TWaD_*.xml` files directly
462    /// - Each file is a complete, self-contained definition
463    ///
464    /// ## Version 0 (Empire, Napoleon)
465    /// - Reads `.xsd` XML Schema files
466    /// - Uses two-pass processing:
467    ///   1. Parse all XSD files and extract field info + primary keys
468    ///   2. Analyze index definitions to derive foreign key relationships
469    /// - This is necessary because Version 0 uses database-style indexes rather than
470    ///   explicit foreign key declarations
471    ///
472    /// # Arguments
473    ///
474    /// * `raw_definitions_folder` - Directory containing Assembly Kit definition files
475    /// * `version` - Assembly Kit version (0 = Empire/Napoleon, 1 = Shogun 2, 2 = Rome 2+)
476    /// * `tables_to_skip` - Table names (without extension) to exclude from parsing
477    ///
478    /// # Returns
479    ///
480    /// Returns a vector of successfully parsed table definitions. Tables in the
481    /// blacklist or skip list are excluded.
482    ///
483    /// # Errors
484    ///
485    /// Returns an error if:
486    /// - The version is unsupported (not 0, 1, or 2)
487    /// - The directory cannot be read
488    /// - Any definition file has malformed XML
489    pub fn read_all(raw_definitions_folder: &Path, version: i16, tables_to_skip: &[&str]) -> Result<Vec<Self>> {
490        let definitions = get_raw_definition_paths(raw_definitions_folder, version)?;
491        match version {
492            2 | 1 => {
493                definitions.iter()
494                    .filter(|x| !BLACKLISTED_TABLES.contains(&x.file_name().unwrap().to_str().unwrap()))
495                    .filter(|x| {
496                        let table_name = x.file_stem().unwrap().to_str().unwrap().split_at(5).1;
497                        !tables_to_skip.par_iter().any(|vanilla_name| vanilla_name == &table_name)
498                    })
499                    .map(|x| Self::read(x, version))
500                    .collect::<Result<Vec<Self>>>()
501            }
502            0 => {
503                let v0s = definitions.iter()
504                    .filter(|x| !BLACKLISTED_TABLES.contains(&x.file_name().unwrap().to_str().unwrap()))
505                    .filter(|x| {
506                        let table_name = x.file_stem().unwrap().to_str().unwrap();
507                        !tables_to_skip.par_iter().any(|vanilla_name| vanilla_name == &table_name)
508                    })
509                    .filter_map(|x| RawDefinitionV0::read(x).transpose())
510                    .map(|def_v0| {
511
512                        // NOTE: This from processes the primary keys already.
513                        let raw = match def_v0 {
514                            Ok(ref def_v0) => Self::from(def_v0),
515                            Err(_) => Self::default(),
516                        };
517                        def_v0.map(|def_v0| (def_v0, raw))
518                    })
519                    .collect::<Result<Vec<(RawDefinitionV0, RawDefinition)>>>()?;
520
521                // We need to do a second pass because without the entire set available we cannot figure out the references.
522                Ok(v0s.iter()
523                    .map(|(def_v0, new_def)| {
524                        let mut new_def = new_def.clone();
525
526                        if let Some(elements) = def_v0.xsd_element.get(1) {
527                            if let Some(ref table_name) = elements.name {
528                                if let Some(ref ann) = elements.xsd_annotation {
529                                    if let Some(ref app) = ann.xsd_appinfo {
530                                        if let Some(ref od_index) = app.od_index {
531                                            for index in od_index {
532
533                                                // Ignore indexes of unused fields, the primary key, and field-specific indexes.
534                                                if index.name == "PrimaryKey" || index.name == index.key.trim() {
535                                                    continue;
536                                                }
537
538                                                // Indexes follow the format "remotetablelocaltable", with a 61 char limit. To find the remote table,
539                                                // we need to remove the local one, and to do so, we need to find what part of the local one is actually in the index name.
540                                                let remote_table_name = if index.name.chars().count() == 61 {
541                                                    let mut table_name = table_name.clone();
542                                                    let mut remote_table_name = String::new();
543                                                    loop {
544                                                        if index.name.ends_with(&*table_name) {
545                                                            remote_table_name = index.name.clone();
546                                                            if let Some(sub) = index.name.len().checked_sub(table_name.len()) {
547                                                                remote_table_name.truncate(sub);
548                                                            } else {
549                                                                remote_table_name = String::new();
550                                                            }
551                                                            break;
552                                                        } else {
553                                                            if table_name.is_empty() {
554                                                                break;
555                                                            }
556
557                                                            table_name.pop();
558                                                        }
559                                                    }
560
561                                                    remote_table_name
562                                                } else {
563                                                    let mut remote_table_name = index.name.clone();
564                                                    if let Some(sub) = index.name.len().checked_sub(table_name.len()) {
565                                                        remote_table_name.truncate(sub);
566                                                    } else {
567                                                        remote_table_name = String::new();
568                                                    }
569                                                    remote_table_name
570                                                };
571
572                                                // Now we need to find the primary key of the remote table, if any.
573                                                if !remote_table_name.is_empty() {
574                                                    if let Some(remote_def) = v0s.par_iter().find_map_first(|(def_v0, new_def)| {
575                                                        if let Some(elements) = def_v0.xsd_element.get(1) {
576                                                            if let Some(ref table_name) = elements.name {
577                                                                if table_name == &remote_table_name {
578                                                                    Some(new_def)
579                                                                } else { None }
580                                                            } else { None }
581                                                        } else { None }
582                                                    }) {
583
584                                                        // No fucking clue if ANY reference is to a multikey table, but if is, we'll use the first key as ref key, and the rest as lookups.
585                                                        let primary_keys = remote_def.fields.iter().filter(|x| x.primary_key == "1" || x.name == "key").collect::<Vec<_>>();
586                                                        if !primary_keys.is_empty() {
587                                                            for field in &mut new_def.fields {
588                                                                if field.name == index.key.trim() {
589                                                                    field.column_source_table = Some(remote_table_name.to_string());
590                                                                    field.column_source_column = Some(primary_keys.iter().map(|x| x.name.to_string()).collect());
591                                                                }
592                                                            }
593                                                        }
594                                                    }
595                                                }
596                                            }
597                                        }
598                                    }
599                                }
600                            }
601                        }
602                        new_def
603                    })
604                    .collect())
605            }
606            _ => Err(RLibError::AssemblyKitUnsupportedVersion(version))
607        }
608    }
609
610    /// Parses a single Assembly Kit definition file.
611    ///
612    /// Reads and parses one table definition file from the Assembly Kit.
613    ///
614    /// # Arguments
615    ///
616    /// * `raw_definition_path` - Path to the definition file (e.g., `TWaD_units_tables.xml`)
617    /// * `version` - Assembly Kit version (1 = Shogun 2, 2 = Rome 2+)
618    ///
619    /// # Returns
620    ///
621    /// Returns the parsed [`RawDefinition`] with the table name set to the filename
622    /// without the `TWaD_` prefix (e.g., `"units_tables.xml"`).
623    ///
624    /// # Errors
625    ///
626    /// Returns an error if:
627    /// - The version is not 1 or 2 (use [`RawDefinitionV0::read()`] for version 0)
628    /// - The file cannot be opened (returns [`RLibError::AssemblyKitNotFound`])
629    /// - The XML is malformed
630    ///
631    /// # Note
632    ///
633    /// For Version 0 (Empire/Napoleon), use [`RawDefinitionV0::read()`] instead as the
634    /// file format is completely different (.xsd vs .xml).
635    pub fn read(raw_definition_path: &Path, version: i16) -> Result<Self> {
636        match version {
637            2 | 1 => {
638                let definition_file = BufReader::new(File::open(raw_definition_path).map_err(|_| RLibError::AssemblyKitNotFound)?);
639                let mut definition: Self = from_reader(definition_file)?;
640                definition.name = Some(raw_definition_path.file_name().unwrap().to_str().unwrap().split_at(5).1.to_string());
641                Ok(definition)
642            }
643
644            _ => Err(RLibError::AssemblyKitUnsupportedVersion(version))
645        }
646    }
647
648    /// Filters out localisable fields from the definition.
649    ///
650    /// Returns only the fields that are not marked as localisable (translatable) and
651    /// are present in the test row data. This is used when processing Assembly Kit
652    /// table data to separate regular fields from translation fields.
653    ///
654    /// # Arguments
655    ///
656    /// * `raw_localisable_fields` - List of all localisable fields from `TExc_LocalisableFields.xml`
657    /// * `test_row` - Sample row data used to verify field presence
658    ///
659    /// # Returns
660    ///
661    /// Returns a vector of [`Field`] instances for non-localisable fields that exist
662    /// in the test data.
663    ///
664    /// # Note
665    ///
666    /// Fields are excluded if:
667    /// - They're listed in `raw_localisable_fields` for this table
668    /// - They don't appear in the test row
669    /// - They have a "state" attribute (marked as modified/deprecated)
670    pub fn get_non_localisable_fields(&self, raw_localisable_fields: &[RawLocalisableField], test_row: &RawTableRow) -> Vec<Field> {
671        let raw_table_name = &self.name.as_ref().unwrap()[..self.name.as_ref().unwrap().len() - 4];
672        let localisable_fields_names = raw_localisable_fields.iter()
673            .filter(|x| x.table_name == raw_table_name)
674            .map(|x| &*x.field)
675            .collect::<Vec<&str>>();
676
677        self.fields.iter()
678            .filter(|x| match test_row.fields.iter().find(|y| x.name == y.field_name) {
679                Some(y) => y.state.is_none(),
680                None => false,
681            })
682            .filter(|x| !localisable_fields_names.contains(&&*x.name))
683            .map(From::from)
684            .collect::<Vec<Field>>()
685    }
686}
687
688impl From<&RawDefinition> for Definition {
689    fn from(raw_definition: &RawDefinition) -> Self {
690        let fields = raw_definition.fields.iter().map(From::from).collect::<Vec<_>>();
691        Self::new_with_fields(-100, &fields, &[], None)
692    }
693}
694
695
696impl From<&RawField> for Field {
697    fn from(raw_field: &RawField) -> Self {
698
699        let is_old_game = raw_field.is_old_game.unwrap_or(false);
700
701        let field_type = match &*raw_field.field_type {
702            "yesno" => FieldType::Boolean,
703            "single" => FieldType::F32,
704            "double" => FieldType::F64,
705            "integer" => FieldType::I32,
706            "autonumber" | "card64" => FieldType::I64,
707            "colour" => FieldType::ColourRGB,
708            "expression" | "text" => {
709                if raw_field.required == "1" {
710                    if is_old_game {
711                        FieldType::StringU16
712                    } else {
713                        FieldType::StringU8
714                    }
715                }
716                else if is_old_game {
717                    FieldType::OptionalStringU16
718                } else {
719                    FieldType::OptionalStringU8
720                }
721            },
722            _ => if is_old_game {
723                FieldType::StringU16
724            } else {
725                FieldType::StringU8
726            },
727        };
728
729        let (is_reference, lookup) = if let Some(x) = &raw_field.column_source_table {
730            if let Some(y) = &raw_field.column_source_column {
731                if y.len() > 1 { (Some((x.to_owned(), y[0].to_owned())), Some(y[1..].to_vec()))}
732                else { (Some((x.to_owned(), y[0].to_owned())), None) }
733            } else { (None, None) }
734        }
735        else { (None, None) };
736
737        // CA sometimes uses comma as separator, and has random spaces between paths.
738        let filename_relative_path = raw_field.filename_relative_path.clone().map(|x| {
739            x.split(',').map(|y| y.trim()).join(";")
740        });
741
742        // Some fields are marked as filename, but only have fragment paths, which do not seem to correlate to game file paths.
743        // We need to disable those to avoid false positives on diagnostics.
744        let is_filename = match raw_field.is_filename {
745            Some(_) => !(raw_field.fragment_path.is_some() && raw_field.filename_relative_path.is_none()),
746            None => false,
747        };
748
749        Self::new(
750            raw_field.name.to_owned(),
751            field_type,
752            raw_field.primary_key == "1",
753            raw_field.default_value.clone(),
754            is_filename,
755            filename_relative_path,
756            is_reference,
757            lookup,
758            if let Some(x) = &raw_field.field_description { x.to_owned() } else { String::new() },
759            0,
760            0,
761            BTreeMap::new(),
762            None
763        )
764    }
765}
766
767impl RawDefinitionV0 {
768
769    /// Parses a Version 0 (Empire/Napoleon) XSD schema file.
770    ///
771    /// Reads and parses an XSD (XML Schema Definition) file from the Empire or
772    /// Napoleon Assembly Kit. The XSD format is significantly different from the
773    /// `TWaD_` format used in later games.
774    ///
775    /// # Arguments
776    ///
777    /// * `raw_definition_path` - Path to the `.xsd` file
778    ///
779    /// # Returns
780    ///
781    /// Returns `Ok(Some(definition))` if the file was parsed successfully, `Ok(None)`
782    /// if the file was empty, or an error if parsing failed.
783    ///
784    /// # Errors
785    ///
786    /// Returns an error if:
787    /// - The file cannot be opened (returns [`RLibError::AssemblyKitNotFound`])
788    /// - The XML/XSD is malformed
789    ///
790    /// # Implementation Note
791    ///
792    /// Due to limitations in `serde_xml_rs`, this function performs extensive string
793    /// replacements on the XSD content before parsing to normalize XML namespace
794    /// prefixes (`xsd:` and `xs:` → `xsd_`, `od:` → `od_`).
795    pub fn read(raw_definition_path: &Path) -> Result<Option<Self>> {
796        let mut definition_file = BufReader::new(File::open(raw_definition_path).map_err(|_| RLibError::AssemblyKitNotFound)?);
797
798        // Before deserializing the data, due to limitations of serde_xml_rs, we have to rename all rows, because unique names for
799        // rows in each file is not supported for deserializing. Same for the fields, we have to change them to something more generic.
800        let mut buffer = String::new();
801        definition_file.read_to_string(&mut buffer)?;
802
803        if buffer.is_empty() {
804            return Ok(None)
805        }
806
807        // Rust doesn't like : in variable names when deserializing.
808        buffer = buffer.replace("xsd:schema", "xsd_schema");
809        buffer = buffer.replace("xsd:element", "xsd_element");
810        buffer = buffer.replace("xsd:complexType", "xsd_complexType");
811        buffer = buffer.replace("xsd:sequence", "xsd_sequence");
812        buffer = buffer.replace("xsd:attribute", "xsd_attribute");
813        buffer = buffer.replace("xsd:annotation", "xsd_annotation");
814        buffer = buffer.replace("xsd:appinfo", "xsd_appinfo");
815        buffer = buffer.replace("od:index", "od_index");
816        buffer = buffer.replace("xsd:sequence", "xsd_sequence");
817        buffer = buffer.replace("xsd:simpleType", "xsd_simpleType");
818        buffer = buffer.replace("xsd:restriction", "xsd_restriction");
819        buffer = buffer.replace("xsd:maxLength", "xsd_maxLength");
820        buffer = buffer.replace("od:jetType", "od_jetType");
821
822        buffer = buffer.replace("xs:schema", "xsd_schema");
823        buffer = buffer.replace("xs:element", "xsd_element");
824        buffer = buffer.replace("xs:complexType", "xsd_complexType");
825        buffer = buffer.replace("xs:sequence", "xsd_sequence");
826        buffer = buffer.replace("xs:attribute", "xsd_attribute");
827        buffer = buffer.replace("xs:annotation", "xsd_annotation");
828        buffer = buffer.replace("xs:appinfo", "xsd_appinfo");
829        buffer = buffer.replace("xs:sequence", "xsd_sequence");
830        buffer = buffer.replace("xs:simpleType", "xsd_simpleType");
831        buffer = buffer.replace("xs:restriction", "xsd_restriction");
832        buffer = buffer.replace("xs:maxLength", "xsd_maxLength");
833
834        // Only if the table has data we deserialize it. If not, we just create an empty one.
835        let definition: RawDefinitionV0 = from_reader(buffer.as_bytes())?;
836
837        //dbg!(&definition);
838        Ok(Some(definition))
839    }
840}
841
842/// Old games don't use references, but rather indexes like a database. This means we're unable to find
843/// the referenced column without having the reference definition. So ref data needs to be calculated after this.
844impl From<&RawDefinitionV0> for RawDefinition {
845    fn from(value: &RawDefinitionV0) -> Self {
846        let mut definition = Self::default();
847
848        // Second element has the fields.
849        if let Some(elements) = value.xsd_element.get(1) {
850            definition.name = elements.name.clone().map(|x| format!("{x}.xml"));
851
852            // Try to get the indexes to check what do we need to mark as key.
853            let primary_keys = if let Some(ref ann) = elements.xsd_annotation {
854                if let Some(ref app) = ann.xsd_appinfo {
855                    if let Some(ref od_index) = app.od_index {
856                        od_index.iter().find_map(|index| {
857                            if index.name == "PrimaryKey" {
858
859                                // Always trim to remove the final space, then split by space to find all the keys of the table.
860                                let keys = index.key.trim().split(' ').collect::<Vec<_>>();
861                                if keys.is_empty() {
862                                    None
863                                } else {
864                                    Some(keys)
865                                }
866                            } else {
867                                None
868                            }
869                        }).unwrap_or(vec![])
870                    } else { vec![] }
871                } else { vec![] }
872            } else { vec![] };
873
874            if let Some(complex) = &elements.xsd_complex_type {
875                if let Some(elements) = complex.first() {
876                    for element in &elements.xsd_sequence.xsd_element {
877
878                        // For a field to be valid we need name and type.
879                        if let Some(ref name) = element.name {
880                            if let Some(ref jet_type) = element.jet_type {
881
882                                let mut field = RawField::default();
883                                field.name = name.to_owned();
884
885                                field.field_type = match &**jet_type {
886                                    "yesno" => "yesno".to_owned(),
887                                    "integer" => "integer".to_owned(),
888                                    "longinteger" | "autonumber" => "autonumber".to_owned(),
889                                    "decimal" | "single" => "single".to_owned(),
890                                    "double" => "double".to_owned(),
891                                    "text" | "memo" | "oleobject" | "replicationid" => "text".to_owned(),
892
893                                    // These are dates as in a DateTime format. Treat them as text for now.
894                                    "datetime" => "text".to_owned(),
895
896                                    _ => todo!("{}", jet_type),
897                                };
898
899                                if primary_keys.contains(&&*field.name) {
900                                    field.primary_key = "1".to_owned();
901                                } else {
902                                    field.primary_key = "0".to_owned();
903                                }
904
905                                field.is_old_game = Some(true);
906
907                                definition.fields.push(field);
908                            }
909                        }
910                    }
911                }
912            }
913        }
914
915        definition
916    }
917}