Skip to main content

rpfm_lib/integrations/assembly_kit/
table_definition.rs

1//---------------------------------------------------------------------------//
2// Copyright (c) 2017-2026 Ismael Gutiérrez González. All rights reserved.
3//
4// This file is part of the Rusted PackFile Manager (RPFM) project,
5// which can be found here: https://github.com/Frodo45127/rpfm.
6//
7// This file is licensed under the MIT license, which can be found here:
8// https://github.com/Frodo45127/rpfm/blob/master/LICENSE.
9//---------------------------------------------------------------------------//
10
11//! Assembly Kit table definition parsing and schema generation.
12//!
13//! This module handles the parsing of Assembly Kit schema files (table structure definitions)
14//! and their conversion to RPFM's internal schema format. It supports three different Assembly
15//! Kit versions used across Total War games.
16//!
17//! # Assembly Kit Schema Formats
18//!
19//! Different Total War games use different schema file formats:
20//!
21//! - **Version 0** (Empire, Napoleon): `.xsd` XML schema files with basic type and constraint information
22//! - **Version 1** (Shogun 2): `TWaD_*.xml` files with enhanced metadata
23//! - **Version 2** (Rome 2+): `TWaD_*.xml` files with full relationship data and field descriptions
24//!
25//! # Main Types
26//!
27//! ## Version 1 & 2 Formats
28//!
29//! - [`RawDefinition`]: Represents a complete table definition with all fields
30//! - [`RawField`]: Individual field definition with type, constraints, and relationship info
31//! - [`RawRelationshipsTable`]: Foreign key relationships between tables
32//! - [`RawRelationship`]: Single foreign key relationship
33//!
34//! ## Version 0 Format (Legacy)
35//!
36//! - [`RawDefinitionV0`]: XSD schema root structure
37//! - [`Element`]: XSD element with type and constraint information
38//! - [`Index`]: Database index definition (used to derive relationships)
39//!
40//! # Functionality
41//!
42//! The main operations this module provides:
43//!
44//! 1. **Batch Reading**: [`RawDefinition::read_all()`] reads all table definitions from a directory
45//! 2. **Individual Reading**: [`RawDefinition::read()`] parses a single definition file
46//! 3. **Field Filtering**: [`RawDefinition::get_non_localisable_fields()`] separates translatable fields
47//! 4. **Schema Conversion**: `From<&RawDefinition>` for [`Definition`] converts to RPFM format
48//!
49//! # Version 0 Processing
50//!
51//! Version 0 (Empire/Napoleon) uses a two-pass approach:
52//! 1. First pass: Parse XSD files and extract basic field information and primary keys
53//! 2. Second pass: Analyze index definitions to derive foreign key relationships
54//!
55//! This is necessary because Version 0 uses database-style indexes rather than explicit
56//! foreign key declarations.
57//!
58//! # Type Mapping
59//!
60//! Assembly Kit types are mapped to RPFM field types:
61//! - `yesno` → `Boolean`
62//! - `single` → `F32`, `double` → `F64`
63//! - `integer` → `I32`, `autonumber`/`card64` → `I64`
64//! - `colour` → `ColourRGB`
65//! - `text`/`expression` → `StringU8`/`StringU16` (or optional variants)
66
67use itertools::Itertools;
68use rayon::prelude::*;
69use serde_derive::Deserialize;
70use serde_xml_rs::from_reader;
71
72use std::fs::File;
73use std::io::{BufReader, Read};
74use std::path::Path;
75
76use crate::error::{Result, RLibError};
77
78use super::*;
79use super::get_raw_definition_paths;
80use super::localisable_fields::RawLocalisableField;
81use super::table_data::RawTableRow;
82
83//---------------------------------------------------------------------------//
84// Types for parsing the Assembly Kit Schema Files into.
85//---------------------------------------------------------------------------//
86
87/// Raw table definition parsed from Assembly Kit schema files.
88///
89/// This is the raw equivalent to RPFM's [`Definition`] struct. In Assembly Kit files,
90/// this corresponds to a `TWaD_*.xml` file (versions 1-2) or `.xsd` file (version 0).
91///
92/// # Fields
93///
94/// * `name` - Table name with `.xml` extension (e.g., `"units_tables.xml"`)
95/// * `fields` - All field definitions for this table
96///
97/// # Example Structure
98///
99/// A `TWaD_units_tables.xml` file contains field definitions like:
100/// ```xml
101/// <root>
102///   <field primary_key="1" name="key" field_type="text" required="1"/>
103///   <field primary_key="0" name="category" field_type="text" required="0"
104///          column_source_table="unit_categories_tables"
105///          column_source_column="key"/>
106/// </root>
107/// ```
108#[derive(Clone, Debug, Default, Deserialize)]
109#[serde(rename = "root")]
110pub struct RawDefinition {
111
112    /// Table name with `.xml` extension (e.g., `"units_tables.xml"`) and without the 'TWaD_' prefix.
113    pub name: Option<String>,
114
115    /// All the field definitions within this table definition.
116    #[serde(rename = "field")]
117    pub fields: Vec<RawField>,
118}
119
120/// Individual field definition from Assembly Kit schema.
121///
122/// This is the raw equivalent to RPFM's [`Field`] struct, containing all metadata
123/// about a single table column.
124///
125/// # Type Information
126///
127/// Assembly Kit uses string-based type names:
128/// - `"yesno"` - Boolean value
129/// - `"single"`, `"double"` - Floating point numbers
130/// - `"integer"` - 32-bit integer
131/// - `"autonumber"`, `"card64"` - 64-bit integer (often auto-incrementing)
132/// - `"text"`, `"expression"` - String data
133/// - `"colour"` - RGB color value
134///
135/// # Foreign Key Relationships
136///
137/// Relationships are defined via `column_source_table` and `column_source_column`:
138/// - First element in `column_source_column` is the referenced primary key
139/// - Additional elements (if present) are lookup columns for concatenated display
140#[derive(Clone, Debug, Default, Deserialize)]
141#[serde(rename = "field")]
142pub struct RawField {
143
144    /// Primary key flag (`"1"` = true, `"0"` = false).
145    pub primary_key: String,
146
147    /// Field name (column name in the table).
148    pub name: String,
149
150    /// Assembly Kit type name (see struct documentation for type mapping).
151    pub field_type: String,
152
153    /// Required field flag (`"1"` = required, `"0"` = optional).
154    pub required: String,
155
156    /// Default value for this field when creating new rows.
157    pub default_value: Option<String>,
158
159    /// Maximum allowed string length for text fields.
160    pub max_length: Option<String>,
161
162    /// Filename flag - indicates this field contains a game file path.
163    pub is_filename: Option<String>,
164
165    /// Relative path where referenced files should be located.
166    ///
167    /// Multiple paths can be specified, separated by semicolons.
168    pub filename_relative_path: Option<String>,
169
170    /// Fragment path (internal use, not useful for modders).
171    pub fragment_path: Option<String>,
172
173    /// Referenced column names for foreign key relationships.
174    ///
175    /// First element is the referenced primary key column.
176    /// Additional elements are lookup columns for composite display.
177    pub column_source_column: Option<Vec<String>>,
178
179    /// Referenced table name for foreign key relationships.
180    pub column_source_table: Option<String>,
181
182    /// Human-readable description of the field's purpose.
183    pub field_description: Option<String>,
184
185    /// Encyclopaedia export flag (`"1"` = export, `"0"` = don't export).
186    ///
187    /// Indicates if this field should be included in game encyclopaedia exports.
188    pub encyclopaedia_export: Option<String>,
189
190    /// Highlight color flag for marking unused/deprecated fields.
191    ///
192    /// `"#c8c8c8"` (gray) indicates an unused field in Warhammer 3.
193    pub highlight_flag: Option<String>,
194
195    /// Custom flag for old game (Empire/Napoleon/Shogun 2) type handling.
196    ///
197    /// When true, uses UTF-16 strings instead of UTF-8.
198    pub is_old_game: Option<bool>,
199}
200
201/// Version 0 (Empire/Napoleon) XSD schema root structure.
202///
203/// Empire and Napoleon use `.xsd` XML Schema Definition files instead of
204/// the `TWaD_` format used in later games. This struct represents the root
205/// of such a schema file.
206#[derive(Clone, Debug, Default, Deserialize)]
207#[serde(rename = "xsd_schema")]
208pub struct RawDefinitionV0 {
209    /// XSD elements defining the table structure.
210    pub xsd_element: Vec<Element>,
211}
212
213/// Represents an XSD element definition from Assembly Kit v0 schema files.
214///
215/// Elements are the core building blocks of XSD schemas, representing individual
216/// fields in database tables. Each element can have type constraints (via `SimpleType`),
217/// nested structures (via `ComplexType`), and metadata annotations.
218///
219/// # Field Mapping
220///
221/// - `name`: Column name in the database table
222/// - `jet_type`: Microsoft Jet database type (e.g., "Text", "Long", "Boolean")
223/// - `min_occurs`: Minimum occurrences (0 = optional, 1 = required)
224/// - `xsd_annotation`: Contains metadata like index definitions
225/// - `xsd_simple_type`: Type constraints (e.g., string max length)
226/// - `xsd_complex_type`: Nested element sequences for complex types
227#[derive(Clone, Debug, Default, Deserialize)]
228#[serde(rename = "xsd_element")]
229pub struct Element {
230    /// The name of this element (field/column name).
231    #[serde(rename = "@name")]
232    pub name: Option<String>,
233
234    /// Microsoft Jet database type identifier.
235    ///
236    /// Common values: "Text" (string), "Long" (i32), "Boolean", "Single" (f32), "Double" (f64).
237    #[serde(rename = "@od_jetType")]
238    pub jet_type: Option<String>,
239
240    /// Minimum number of occurrences for this element.
241    ///
242    /// - `0`: Field is optional
243    /// - `1` or higher: Field is required
244    #[serde(rename = "@minOccurs")]
245    pub min_occurs: Option<i32>,
246
247    /// Annotation containing metadata like index definitions.
248    #[serde(rename = "xsd_annotation")]
249    pub xsd_annotation: Option<Annotation>,
250
251    /// Simple type definition with constraints (e.g., max string length).
252    #[serde(rename = "xsd_simpleType")]
253    pub xsd_simple_type: Option<Vec<SimpleType>>,
254
255    /// Complex type definition for nested element sequences.
256    #[serde(rename = "xsd_complexType")]
257    pub xsd_complex_type: Option<Vec<ComplexType>>,
258}
259
260/// Defines a simple type with restrictions in XSD schemas.
261///
262/// Simple types are used to apply constraints to basic data types, such as
263/// limiting the maximum length of a string field.
264#[derive(Clone, Debug, Default, Deserialize)]
265#[serde(rename = "xsd_simpleType")]
266pub struct SimpleType {
267    /// The restriction applied to this simple type (e.g., max length).
268    pub xsd_restriction: Option<Restriction>,
269}
270
271/// Defines a complex type containing nested element sequences.
272///
273/// Complex types are used when a field contains multiple sub-elements organized
274/// in a specific order. In Assembly Kit schemas, these are typically used for
275/// nested table structures, though most tables use simple flat structures.
276#[derive(Clone, Debug, Default, Deserialize)]
277#[serde(rename = "xsd_complexType")]
278pub struct ComplexType {
279    /// The ordered sequence of elements within this complex type.
280    #[serde(rename = "xsd_sequence")]
281    pub xsd_sequence: Sequence,
282}
283
284/// Represents an ordered sequence of XSD elements.
285///
286/// Sequences define the order in which child elements must appear within
287/// a complex type. Each element in the sequence can itself be a simple or
288/// complex type.
289#[derive(Clone, Debug, Default, Deserialize)]
290#[serde(rename = "xsd_sequence")]
291pub struct Sequence {
292    /// The ordered list of elements in this sequence.
293    pub xsd_element: Vec<Element>,
294}
295
296/// Defines restrictions/constraints on an XSD simple type.
297///
298/// Restrictions are used to constrain the values of a simple type, such as
299/// limiting the maximum length of a string. The `base` field specifies which
300/// base type the restriction applies to.
301#[derive(Clone, Debug, Default, Deserialize)]
302#[serde(rename = "xsd_restriction")]
303pub struct Restriction {
304    /// The base XSD type being restricted (e.g., "xsd:string", "xsd:int").
305    #[serde(rename = "@base")]
306    pub base: String,
307
308    /// Maximum length constraint for string types.
309    #[serde(rename = "xsd_maxLength")]
310    pub max_lenght: Option<MaxLength>
311}
312
313/// Specifies the maximum length constraint for a string field.
314///
315/// This constraint limits how many characters a string field can contain.
316/// Used in XSD restrictions to define database column size limits.
317#[derive(Clone, Debug, Default, Deserialize)]
318#[serde(rename = "xsd_maxLength")]
319pub struct MaxLength {
320    /// The maximum number of characters allowed.
321    #[serde(rename = "@value")]
322    pub value: i32
323}
324
325/// Contains annotation metadata for XSD elements.
326///
327/// Annotations provide additional information about schema elements that isn't
328/// part of the core validation rules. In Assembly Kit schemas, annotations are
329/// primarily used to store database index definitions via the `AppInfo` structure.
330#[derive(Clone, Debug, Default, Deserialize)]
331#[serde(rename = "xsd_annotation")]
332pub struct Annotation {
333    /// Application-specific information, containing index definitions.
334    #[serde(rename = "xsd_appinfo")]
335    pub xsd_appinfo: Option<AppInfo>
336}
337
338/// Contains application-specific information within XSD annotations.
339///
340/// This structure holds database-specific metadata that extends the base XSD schema.
341/// In Assembly Kit schemas, it primarily contains index definitions that describe
342/// primary keys, foreign keys, and unique constraints on table columns.
343#[derive(Clone, Debug, Default, Deserialize)]
344#[serde(rename = "xsd_appinfo")]
345pub struct AppInfo {
346    /// List of database index definitions for this element.
347    #[serde(rename = "od_index")]
348    pub od_index: Option<Vec<Index>>
349}
350
351/// Defines a database index on a table column.
352///
353/// Indexes are used to derive foreign key relationships in Assembly Kit v0 schemas.
354/// Since v0 schemas don't explicitly define relationships between tables, RPFM
355/// infers them by matching index names across tables.
356///
357/// # Relationship Inference
358///
359/// When an index name appears in multiple tables, RPFM creates a foreign key
360/// relationship between them. For example:
361///
362/// - Table A has index "fk_building" on column "building_key"
363/// - Table B has index "fk_building" on column "key"
364/// - RPFM infers: A.building_key → B.key
365///
366/// # Boolean String Fields
367///
368/// The `primary`, `unique`, and `clustered` fields use string values "true"/"false"
369/// instead of booleans due to the XSD format.
370#[derive(Clone, Debug, Default, Deserialize)]
371#[serde(rename = "od_index")]
372pub struct Index {
373    /// The name of this index.
374    ///
375    /// Index names are used to match relationships across tables. Identical names
376    /// in different tables indicate a foreign key relationship.
377    #[serde(rename = "@index-name")]
378    pub name: String,
379
380    /// The column(s) this index applies to.
381    ///
382    /// Multiple columns are separated by semicolons (e.g., "col1;col2").
383    #[serde(rename = "@index-key")]
384    pub key: String,
385
386    /// Whether this is a primary key index ("true"/"false").
387    #[serde(rename = "@primary")]
388    pub primary: String,
389
390    /// Whether this index enforces uniqueness ("true"/"false").
391    #[serde(rename = "@unique")]
392    pub unique: String,
393
394    /// Whether this is a clustered index ("true"/"false").
395    #[serde(rename = "@clustered")]
396    pub clustered: String,
397}
398
399/// Foreign key relationships table from Assembly Kit.
400///
401/// This corresponds to the `TWaD_relationships.xml` file found in Version 2
402/// Assembly Kits (Rome 2+). It defines all foreign key relationships between tables.
403#[derive(Clone, Debug, Default, Deserialize)]
404#[serde(rename = "root")]
405pub struct RawRelationshipsTable {
406    /// Table name (should be "relationships").
407    pub name: Option<String>,
408
409    /// All foreign key relationships defined in the Assembly Kit.
410    #[serde(rename = "relationship")]
411    pub relationships: Vec<RawRelationship>,
412}
413
414/// Single foreign key relationship definition.
415///
416/// Defines a foreign key constraint from one table's column to another table's column.
417///
418/// # Example
419///
420/// A relationship from `units_tables.category` to `unit_categories_tables.key`:
421/// ```xml
422/// <relationship>
423///   <table_name>units_tables</table_name>
424///   <column_name>category</column_name>
425///   <foreign_table_name>unit_categories_tables</foreign_table_name>
426///   <foreign_column_name>key</foreign_column_name>
427/// </relationship>
428/// ```
429#[derive(Clone, Debug, Default, Deserialize)]
430pub struct RawRelationship {
431    /// Source table name containing the foreign key column.
432    pub table_name: String,
433
434    /// Source column name (the foreign key field).
435    pub column_name: String,
436
437    /// Referenced table name.
438    pub foreign_table_name: String,
439
440    /// Referenced column name (typically a primary key).
441    pub foreign_column_name: String
442}
443
444//---------------------------------------------------------------------------//
445// Implementations
446//---------------------------------------------------------------------------//
447
448/// Implementation of `RawDefinition`.
449impl RawDefinition {
450
451    /// Reads all table definitions from an Assembly Kit directory.
452    ///
453    /// This function scans the provided directory for Assembly Kit definition files
454    /// and parses them into [`RawDefinition`] structs. The parsing logic varies
455    /// significantly by version.
456    ///
457    /// # Version-Specific Behavior
458    ///
459    /// ## Version 1 & 2 (Shogun 2, Rome 2+)
460    /// - Reads `TWaD_*.xml` files directly
461    /// - Each file is a complete, self-contained definition
462    ///
463    /// ## Version 0 (Empire, Napoleon)
464    /// - Reads `.xsd` XML Schema files
465    /// - Uses two-pass processing:
466    ///   1. Parse all XSD files and extract field info + primary keys
467    ///   2. Analyze index definitions to derive foreign key relationships
468    /// - This is necessary because Version 0 uses database-style indexes rather than
469    ///   explicit foreign key declarations
470    ///
471    /// # Arguments
472    ///
473    /// * `raw_definitions_folder` - Directory containing Assembly Kit definition files
474    /// * `version` - Assembly Kit version (0 = Empire/Napoleon, 1 = Shogun 2, 2 = Rome 2+)
475    /// * `tables_to_skip` - Table names (without extension) to exclude from parsing
476    ///
477    /// # Returns
478    ///
479    /// Returns a vector of successfully parsed table definitions. Tables in the
480    /// blacklist or skip list are excluded.
481    ///
482    /// # Errors
483    ///
484    /// Returns an error if:
485    /// - The version is unsupported (not 0, 1, or 2)
486    /// - The directory cannot be read
487    /// - Any definition file has malformed XML
488    pub fn read_all(raw_definitions_folder: &Path, version: i16, tables_to_skip: &[&str]) -> Result<Vec<Self>> {
489        let definitions = get_raw_definition_paths(raw_definitions_folder, version)?;
490        match version {
491            2 | 1 => {
492                definitions.iter()
493                    .filter(|x| !BLACKLISTED_TABLES.contains(&x.file_name().unwrap().to_str().unwrap()))
494                    .filter(|x| {
495                        let table_name = x.file_stem().unwrap().to_str().unwrap().split_at(5).1;
496                        !tables_to_skip.par_iter().any(|vanilla_name| vanilla_name == &table_name)
497                    })
498                    .map(|x| Self::read(x, version))
499                    .collect::<Result<Vec<Self>>>()
500            }
501            0 => {
502                let v0s = definitions.iter()
503                    .filter(|x| !BLACKLISTED_TABLES.contains(&x.file_name().unwrap().to_str().unwrap()))
504                    .filter(|x| {
505                        let table_name = x.file_stem().unwrap().to_str().unwrap();
506                        !tables_to_skip.par_iter().any(|vanilla_name| vanilla_name == &table_name)
507                    })
508                    .filter_map(|x| RawDefinitionV0::read(x).transpose())
509                    .map(|def_v0| {
510
511                        // NOTE: This from processes the primary keys already.
512                        let raw = match def_v0 {
513                            Ok(ref def_v0) => Self::from(def_v0),
514                            Err(_) => Self::default(),
515                        };
516                        def_v0.map(|def_v0| (def_v0, raw))
517                    })
518                    .collect::<Result<Vec<(RawDefinitionV0, RawDefinition)>>>()?;
519
520                // We need to do a second pass because without the entire set available we cannot figure out the references.
521                Ok(v0s.iter()
522                    .map(|(def_v0, new_def)| {
523                        let mut new_def = new_def.clone();
524
525                        if let Some(elements) = def_v0.xsd_element.get(1) {
526                            if let Some(ref table_name) = elements.name {
527                                if let Some(ref ann) = elements.xsd_annotation {
528                                    if let Some(ref app) = ann.xsd_appinfo {
529                                        if let Some(ref od_index) = app.od_index {
530                                            for index in od_index {
531
532                                                // Ignore indexes of unused fields, the primary key, and field-specific indexes.
533                                                if index.name == "PrimaryKey" || index.name == index.key.trim() {
534                                                    continue;
535                                                }
536
537                                                // Indexes follow the format "remotetablelocaltable", with a 61 char limit. To find the remote table,
538                                                // we need to remove the local one, and to do so, we need to find what part of the local one is actually in the index name.
539                                                let remote_table_name = if index.name.chars().count() == 61 {
540                                                    let mut table_name = table_name.clone();
541                                                    let mut remote_table_name = String::new();
542                                                    loop {
543                                                        if index.name.ends_with(&*table_name) {
544                                                            remote_table_name = index.name.clone();
545                                                            if let Some(sub) = index.name.len().checked_sub(table_name.len()) {
546                                                                remote_table_name.truncate(sub);
547                                                            } else {
548                                                                remote_table_name = String::new();
549                                                            }
550                                                            break;
551                                                        } else {
552                                                            if table_name.is_empty() {
553                                                                break;
554                                                            }
555
556                                                            table_name.pop();
557                                                        }
558                                                    }
559
560                                                    remote_table_name
561                                                } else {
562                                                    let mut remote_table_name = index.name.clone();
563                                                    if let Some(sub) = index.name.len().checked_sub(table_name.len()) {
564                                                        remote_table_name.truncate(sub);
565                                                    } else {
566                                                        remote_table_name = String::new();
567                                                    }
568                                                    remote_table_name
569                                                };
570
571                                                // Now we need to find the primary key of the remote table, if any.
572                                                if !remote_table_name.is_empty() {
573                                                    if let Some(remote_def) = v0s.par_iter().find_map_first(|(def_v0, new_def)| {
574                                                        if let Some(elements) = def_v0.xsd_element.get(1) {
575                                                            if let Some(ref table_name) = elements.name {
576                                                                if table_name == &remote_table_name {
577                                                                    Some(new_def)
578                                                                } else { None }
579                                                            } else { None }
580                                                        } else { None }
581                                                    }) {
582
583                                                        // No fucking clue if ANY reference is to a multikey table, but if is, we'll use the first key as ref key, and the rest as lookups.
584                                                        let primary_keys = remote_def.fields.iter().filter(|x| x.primary_key == "1" || x.name == "key").collect::<Vec<_>>();
585                                                        if !primary_keys.is_empty() {
586                                                            for field in &mut new_def.fields {
587                                                                if field.name == index.key.trim() {
588                                                                    field.column_source_table = Some(remote_table_name.to_string());
589                                                                    field.column_source_column = Some(primary_keys.iter().map(|x| x.name.to_string()).collect());
590                                                                }
591                                                            }
592                                                        }
593                                                    }
594                                                }
595                                            }
596                                        }
597                                    }
598                                }
599                            }
600                        }
601                        new_def
602                    })
603                    .collect())
604            }
605            _ => Err(RLibError::AssemblyKitUnsupportedVersion(version))
606        }
607    }
608
609    /// Parses a single Assembly Kit definition file.
610    ///
611    /// Reads and parses one table definition file from the Assembly Kit.
612    ///
613    /// # Arguments
614    ///
615    /// * `raw_definition_path` - Path to the definition file (e.g., `TWaD_units_tables.xml`)
616    /// * `version` - Assembly Kit version (1 = Shogun 2, 2 = Rome 2+)
617    ///
618    /// # Returns
619    ///
620    /// Returns the parsed [`RawDefinition`] with the table name set to the filename
621    /// without the `TWaD_` prefix (e.g., `"units_tables.xml"`).
622    ///
623    /// # Errors
624    ///
625    /// Returns an error if:
626    /// - The version is not 1 or 2 (use [`RawDefinitionV0::read()`] for version 0)
627    /// - The file cannot be opened (returns [`RLibError::AssemblyKitNotFound`])
628    /// - The XML is malformed
629    ///
630    /// # Note
631    ///
632    /// For Version 0 (Empire/Napoleon), use [`RawDefinitionV0::read()`] instead as the
633    /// file format is completely different (.xsd vs .xml).
634    pub fn read(raw_definition_path: &Path, version: i16) -> Result<Self> {
635        match version {
636            2 | 1 => {
637                let definition_file = BufReader::new(File::open(raw_definition_path).map_err(|_| RLibError::AssemblyKitNotFound)?);
638                let mut definition: Self = from_reader(definition_file)?;
639                definition.name = Some(raw_definition_path.file_name().unwrap().to_str().unwrap().split_at(5).1.to_string());
640                Ok(definition)
641            }
642
643            _ => Err(RLibError::AssemblyKitUnsupportedVersion(version))
644        }
645    }
646
647    /// Filters out localisable fields from the definition.
648    ///
649    /// Returns only the fields that are not marked as localisable (translatable) and
650    /// are present in the test row data. This is used when processing Assembly Kit
651    /// table data to separate regular fields from translation fields.
652    ///
653    /// # Arguments
654    ///
655    /// * `raw_localisable_fields` - List of all localisable fields from `TExc_LocalisableFields.xml`
656    /// * `test_row` - Sample row data used to verify field presence
657    ///
658    /// # Returns
659    ///
660    /// Returns a vector of [`Field`] instances for non-localisable fields that exist
661    /// in the test data.
662    ///
663    /// # Note
664    ///
665    /// Fields are excluded if:
666    /// - They're listed in `raw_localisable_fields` for this table
667    /// - They don't appear in the test row
668    /// - They have a "state" attribute (marked as modified/deprecated)
669    pub fn get_non_localisable_fields(&self, raw_localisable_fields: &[RawLocalisableField], test_row: &RawTableRow) -> Vec<Field> {
670        let raw_table_name = &self.name.as_ref().unwrap()[..self.name.as_ref().unwrap().len() - 4];
671        let localisable_fields_names = raw_localisable_fields.iter()
672            .filter(|x| x.table_name == raw_table_name)
673            .map(|x| &*x.field)
674            .collect::<Vec<&str>>();
675
676        self.fields.iter()
677            .filter(|x| match test_row.fields.iter().find(|y| x.name == y.field_name) {
678                Some(y) => y.state.is_none(),
679                None => false,
680            })
681            .filter(|x| !localisable_fields_names.contains(&&*x.name))
682            .map(From::from)
683            .collect::<Vec<Field>>()
684    }
685}
686
687impl From<&RawDefinition> for Definition {
688    fn from(raw_definition: &RawDefinition) -> Self {
689        let fields = raw_definition.fields.iter().map(From::from).collect::<Vec<_>>();
690        Self::new_with_fields(-100, &fields, &[], None)
691    }
692}
693
694
695impl From<&RawField> for Field {
696    fn from(raw_field: &RawField) -> Self {
697
698        let is_old_game = raw_field.is_old_game.unwrap_or(false);
699
700        let field_type = match &*raw_field.field_type {
701            "yesno" => FieldType::Boolean,
702            "single" => FieldType::F32,
703            "double" => FieldType::F64,
704            "integer" => FieldType::I32,
705            "autonumber" | "card64" => FieldType::I64,
706            "colour" => FieldType::ColourRGB,
707            "expression" | "text" => {
708                if raw_field.required == "1" {
709                    if is_old_game {
710                        FieldType::StringU16
711                    } else {
712                        FieldType::StringU8
713                    }
714                }
715                else if is_old_game {
716                    FieldType::OptionalStringU16
717                } else {
718                    FieldType::OptionalStringU8
719                }
720            },
721            _ => if is_old_game {
722                FieldType::StringU16
723            } else {
724                FieldType::StringU8
725            },
726        };
727
728        let (is_reference, lookup) = if let Some(x) = &raw_field.column_source_table {
729            if let Some(y) = &raw_field.column_source_column {
730                if y.len() > 1 { (Some((x.to_owned(), y[0].to_owned())), Some(y[1..].to_vec()))}
731                else { (Some((x.to_owned(), y[0].to_owned())), None) }
732            } else { (None, None) }
733        }
734        else { (None, None) };
735
736        // CA sometimes uses comma as separator, and has random spaces between paths.
737        let filename_relative_path = raw_field.filename_relative_path.clone().map(|x| {
738            x.split(',').map(|y| y.trim()).join(";")
739        });
740
741        // Some fields are marked as filename, but only have fragment paths, which do not seem to correlate to game file paths.
742        // We need to disable those to avoid false positives on diagnostics.
743        let is_filename = match raw_field.is_filename {
744            Some(_) => !(raw_field.fragment_path.is_some() && raw_field.filename_relative_path.is_none()),
745            None => false,
746        };
747
748        Self {
749            name: raw_field.name.to_owned(),
750            field_type,
751            is_key: raw_field.primary_key == "1",
752            default_value: raw_field.default_value.clone(),
753            is_filename,
754            filename_relative_path,
755            is_reference,
756            lookup,
757            description: if let Some(x) = &raw_field.field_description { x.to_owned() } else { String::new() },
758            ..Default::default()
759        }
760    }
761}
762
763impl RawDefinitionV0 {
764
765    /// Parses a Version 0 (Empire/Napoleon) XSD schema file.
766    ///
767    /// Reads and parses an XSD (XML Schema Definition) file from the Empire or
768    /// Napoleon Assembly Kit. The XSD format is significantly different from the
769    /// `TWaD_` format used in later games.
770    ///
771    /// # Arguments
772    ///
773    /// * `raw_definition_path` - Path to the `.xsd` file
774    ///
775    /// # Returns
776    ///
777    /// Returns `Ok(Some(definition))` if the file was parsed successfully, `Ok(None)`
778    /// if the file was empty, or an error if parsing failed.
779    ///
780    /// # Errors
781    ///
782    /// Returns an error if:
783    /// - The file cannot be opened (returns [`RLibError::AssemblyKitNotFound`])
784    /// - The XML/XSD is malformed
785    ///
786    /// # Implementation Note
787    ///
788    /// Due to limitations in `serde_xml_rs`, this function performs extensive string
789    /// replacements on the XSD content before parsing to normalize XML namespace
790    /// prefixes (`xsd:` and `xs:` → `xsd_`, `od:` → `od_`).
791    pub fn read(raw_definition_path: &Path) -> Result<Option<Self>> {
792        let mut definition_file = BufReader::new(File::open(raw_definition_path).map_err(|_| RLibError::AssemblyKitNotFound)?);
793
794        // Before deserializing the data, due to limitations of serde_xml_rs, we have to rename all rows, because unique names for
795        // rows in each file is not supported for deserializing. Same for the fields, we have to change them to something more generic.
796        let mut buffer = String::new();
797        definition_file.read_to_string(&mut buffer)?;
798
799        if buffer.is_empty() {
800            return Ok(None)
801        }
802
803        // Rust doesn't like : in variable names when deserializing.
804        buffer = buffer.replace("xsd:schema", "xsd_schema");
805        buffer = buffer.replace("xsd:element", "xsd_element");
806        buffer = buffer.replace("xsd:complexType", "xsd_complexType");
807        buffer = buffer.replace("xsd:sequence", "xsd_sequence");
808        buffer = buffer.replace("xsd:attribute", "xsd_attribute");
809        buffer = buffer.replace("xsd:annotation", "xsd_annotation");
810        buffer = buffer.replace("xsd:appinfo", "xsd_appinfo");
811        buffer = buffer.replace("od:index", "od_index");
812        buffer = buffer.replace("xsd:sequence", "xsd_sequence");
813        buffer = buffer.replace("xsd:simpleType", "xsd_simpleType");
814        buffer = buffer.replace("xsd:restriction", "xsd_restriction");
815        buffer = buffer.replace("xsd:maxLength", "xsd_maxLength");
816        buffer = buffer.replace("od:jetType", "od_jetType");
817
818        buffer = buffer.replace("xs:schema", "xsd_schema");
819        buffer = buffer.replace("xs:element", "xsd_element");
820        buffer = buffer.replace("xs:complexType", "xsd_complexType");
821        buffer = buffer.replace("xs:sequence", "xsd_sequence");
822        buffer = buffer.replace("xs:attribute", "xsd_attribute");
823        buffer = buffer.replace("xs:annotation", "xsd_annotation");
824        buffer = buffer.replace("xs:appinfo", "xsd_appinfo");
825        buffer = buffer.replace("xs:sequence", "xsd_sequence");
826        buffer = buffer.replace("xs:simpleType", "xsd_simpleType");
827        buffer = buffer.replace("xs:restriction", "xsd_restriction");
828        buffer = buffer.replace("xs:maxLength", "xsd_maxLength");
829
830        // Only if the table has data we deserialize it. If not, we just create an empty one.
831        let definition: RawDefinitionV0 = from_reader(buffer.as_bytes())?;
832
833        //dbg!(&definition);
834        Ok(Some(definition))
835    }
836}
837
838/// Old games don't use references, but rather indexes like a database. This means we're unable to find
839/// the referenced column without having the reference definition. So ref data needs to be calculated after this.
840impl From<&RawDefinitionV0> for RawDefinition {
841    fn from(value: &RawDefinitionV0) -> Self {
842        let mut definition = Self::default();
843
844        // Second element has the fields.
845        if let Some(elements) = value.xsd_element.get(1) {
846            definition.name = elements.name.clone().map(|x| format!("{x}.xml"));
847
848            // Try to get the indexes to check what do we need to mark as key.
849            let primary_keys = if let Some(ref ann) = elements.xsd_annotation {
850                if let Some(ref app) = ann.xsd_appinfo {
851                    if let Some(ref od_index) = app.od_index {
852                        od_index.iter().find_map(|index| {
853                            if index.name == "PrimaryKey" {
854
855                                // Always trim to remove the final space, then split by space to find all the keys of the table.
856                                let keys = index.key.trim().split(' ').collect::<Vec<_>>();
857                                if keys.is_empty() {
858                                    None
859                                } else {
860                                    Some(keys)
861                                }
862                            } else {
863                                None
864                            }
865                        }).unwrap_or(vec![])
866                    } else { vec![] }
867                } else { vec![] }
868            } else { vec![] };
869
870            if let Some(complex) = &elements.xsd_complex_type {
871                if let Some(elements) = complex.first() {
872                    for element in &elements.xsd_sequence.xsd_element {
873
874                        // For a field to be valid we need name and type.
875                        if let Some(ref name) = element.name {
876                            if let Some(ref jet_type) = element.jet_type {
877
878                                let mut field = RawField {
879                                    name: name.to_owned(),
880                                    field_type: match &**jet_type {
881                                        "yesno" => "yesno".to_owned(),
882                                        "integer" => "integer".to_owned(),
883                                        "longinteger" | "autonumber" => "autonumber".to_owned(),
884                                        "decimal" | "single" => "single".to_owned(),
885                                        "double" => "double".to_owned(),
886                                        "text" | "memo" | "oleobject" | "replicationid" => "text".to_owned(),
887
888                                        // These are dates as in a DateTime format. Treat them as text for now.
889                                        "datetime" => "text".to_owned(),
890
891                                        _ => todo!("{}", jet_type),
892                                    },
893                                    ..Default::default()
894                                };
895
896                                if primary_keys.contains(&&*field.name) {
897                                    field.primary_key = "1".to_owned();
898                                } else {
899                                    field.primary_key = "0".to_owned();
900                                }
901
902                                field.is_old_game = Some(true);
903
904                                definition.fields.push(field);
905                            }
906                        }
907                    }
908                }
909            }
910        }
911
912        definition
913    }
914}