rpfm_lib/integrations/assembly_kit/table_definition.rs
1//---------------------------------------------------------------------------//
2// Copyright (c) 2017-2026 Ismael Gutiérrez González. All rights reserved.
3//
4// This file is part of the Rusted PackFile Manager (RPFM) project,
5// which can be found here: https://github.com/Frodo45127/rpfm.
6//
7// This file is licensed under the MIT license, which can be found here:
8// https://github.com/Frodo45127/rpfm/blob/master/LICENSE.
9//---------------------------------------------------------------------------//
10
11//! Assembly Kit table definition parsing and schema generation.
12//!
13//! This module handles the parsing of Assembly Kit schema files (table structure definitions)
14//! and their conversion to RPFM's internal schema format. It supports three different Assembly
15//! Kit versions used across Total War games.
16//!
17//! # Assembly Kit Schema Formats
18//!
19//! Different Total War games use different schema file formats:
20//!
21//! - **Version 0** (Empire, Napoleon): `.xsd` XML schema files with basic type and constraint information
22//! - **Version 1** (Shogun 2): `TWaD_*.xml` files with enhanced metadata
23//! - **Version 2** (Rome 2+): `TWaD_*.xml` files with full relationship data and field descriptions
24//!
25//! # Main Types
26//!
27//! ## Version 1 & 2 Formats
28//!
29//! - [`RawDefinition`]: Represents a complete table definition with all fields
30//! - [`RawField`]: Individual field definition with type, constraints, and relationship info
31//! - [`RawRelationshipsTable`]: Foreign key relationships between tables
32//! - [`RawRelationship`]: Single foreign key relationship
33//!
34//! ## Version 0 Format (Legacy)
35//!
36//! - [`RawDefinitionV0`]: XSD schema root structure
37//! - [`Element`]: XSD element with type and constraint information
38//! - [`Index`]: Database index definition (used to derive relationships)
39//!
40//! # Functionality
41//!
42//! The main operations this module provides:
43//!
44//! 1. **Batch Reading**: [`RawDefinition::read_all()`] reads all table definitions from a directory
45//! 2. **Individual Reading**: [`RawDefinition::read()`] parses a single definition file
46//! 3. **Field Filtering**: [`RawDefinition::get_non_localisable_fields()`] separates translatable fields
47//! 4. **Schema Conversion**: `From<&RawDefinition>` for [`Definition`] converts to RPFM format
48//!
49//! # Version 0 Processing
50//!
51//! Version 0 (Empire/Napoleon) uses a two-pass approach:
52//! 1. First pass: Parse XSD files and extract basic field information and primary keys
53//! 2. Second pass: Analyze index definitions to derive foreign key relationships
54//!
55//! This is necessary because Version 0 uses database-style indexes rather than explicit
56//! foreign key declarations.
57//!
58//! # Type Mapping
59//!
60//! Assembly Kit types are mapped to RPFM field types:
61//! - `yesno` → `Boolean`
62//! - `single` → `F32`, `double` → `F64`
63//! - `integer` → `I32`, `autonumber`/`card64` → `I64`
64//! - `colour` → `ColourRGB`
65//! - `text`/`expression` → `StringU8`/`StringU16` (or optional variants)
66
67use itertools::Itertools;
68use rayon::prelude::*;
69use serde_derive::Deserialize;
70use serde_xml_rs::from_reader;
71
72use std::collections::BTreeMap;
73use std::fs::File;
74use std::io::{BufReader, Read};
75use std::path::Path;
76
77use crate::error::{Result, RLibError};
78
79use super::*;
80use super::get_raw_definition_paths;
81use super::localisable_fields::RawLocalisableField;
82use super::table_data::RawTableRow;
83
84//---------------------------------------------------------------------------//
85// Types for parsing the Assembly Kit Schema Files into.
86//---------------------------------------------------------------------------//
87
88/// Raw table definition parsed from Assembly Kit schema files.
89///
90/// This is the raw equivalent to RPFM's [`Definition`] struct. In Assembly Kit files,
91/// this corresponds to a `TWaD_*.xml` file (versions 1-2) or `.xsd` file (version 0).
92///
93/// # Fields
94///
95/// * `name` - Table name with `.xml` extension (e.g., `"units_tables.xml"`)
96/// * `fields` - All field definitions for this table
97///
98/// # Example Structure
99///
100/// A `TWaD_units_tables.xml` file contains field definitions like:
101/// ```xml
102/// <root>
103/// <field primary_key="1" name="key" field_type="text" required="1"/>
104/// <field primary_key="0" name="category" field_type="text" required="0"
105/// column_source_table="unit_categories_tables"
106/// column_source_column="key"/>
107/// </root>
108/// ```
109#[derive(Clone, Debug, Default, Deserialize)]
110#[serde(rename = "root")]
111pub struct RawDefinition {
112
113 /// Table name with `.xml` extension (e.g., `"units_tables.xml"`) and without the 'TWaD_' prefix.
114 pub name: Option<String>,
115
116 /// All the field definitions within this table definition.
117 #[serde(rename = "field")]
118 pub fields: Vec<RawField>,
119}
120
121/// Individual field definition from Assembly Kit schema.
122///
123/// This is the raw equivalent to RPFM's [`Field`] struct, containing all metadata
124/// about a single table column.
125///
126/// # Type Information
127///
128/// Assembly Kit uses string-based type names:
129/// - `"yesno"` - Boolean value
130/// - `"single"`, `"double"` - Floating point numbers
131/// - `"integer"` - 32-bit integer
132/// - `"autonumber"`, `"card64"` - 64-bit integer (often auto-incrementing)
133/// - `"text"`, `"expression"` - String data
134/// - `"colour"` - RGB color value
135///
136/// # Foreign Key Relationships
137///
138/// Relationships are defined via `column_source_table` and `column_source_column`:
139/// - First element in `column_source_column` is the referenced primary key
140/// - Additional elements (if present) are lookup columns for concatenated display
141#[derive(Clone, Debug, Default, Deserialize)]
142#[serde(rename = "field")]
143pub struct RawField {
144
145 /// Primary key flag (`"1"` = true, `"0"` = false).
146 pub primary_key: String,
147
148 /// Field name (column name in the table).
149 pub name: String,
150
151 /// Assembly Kit type name (see struct documentation for type mapping).
152 pub field_type: String,
153
154 /// Required field flag (`"1"` = required, `"0"` = optional).
155 pub required: String,
156
157 /// Default value for this field when creating new rows.
158 pub default_value: Option<String>,
159
160 /// Maximum allowed string length for text fields.
161 pub max_length: Option<String>,
162
163 /// Filename flag - indicates this field contains a game file path.
164 pub is_filename: Option<String>,
165
166 /// Relative path where referenced files should be located.
167 ///
168 /// Multiple paths can be specified, separated by semicolons.
169 pub filename_relative_path: Option<String>,
170
171 /// Fragment path (internal use, not useful for modders).
172 pub fragment_path: Option<String>,
173
174 /// Referenced column names for foreign key relationships.
175 ///
176 /// First element is the referenced primary key column.
177 /// Additional elements are lookup columns for composite display.
178 pub column_source_column: Option<Vec<String>>,
179
180 /// Referenced table name for foreign key relationships.
181 pub column_source_table: Option<String>,
182
183 /// Human-readable description of the field's purpose.
184 pub field_description: Option<String>,
185
186 /// Encyclopaedia export flag (`"1"` = export, `"0"` = don't export).
187 ///
188 /// Indicates if this field should be included in game encyclopaedia exports.
189 pub encyclopaedia_export: Option<String>,
190
191 /// Highlight color flag for marking unused/deprecated fields.
192 ///
193 /// `"#c8c8c8"` (gray) indicates an unused field in Warhammer 3.
194 pub highlight_flag: Option<String>,
195
196 /// Custom flag for old game (Empire/Napoleon/Shogun 2) type handling.
197 ///
198 /// When true, uses UTF-16 strings instead of UTF-8.
199 pub is_old_game: Option<bool>,
200}
201
202/// Version 0 (Empire/Napoleon) XSD schema root structure.
203///
204/// Empire and Napoleon use `.xsd` XML Schema Definition files instead of
205/// the `TWaD_` format used in later games. This struct represents the root
206/// of such a schema file.
207#[derive(Clone, Debug, Default, Deserialize)]
208#[serde(rename = "xsd_schema")]
209pub struct RawDefinitionV0 {
210 /// XSD elements defining the table structure.
211 pub xsd_element: Vec<Element>,
212}
213
214/// Represents an XSD element definition from Assembly Kit v0 schema files.
215///
216/// Elements are the core building blocks of XSD schemas, representing individual
217/// fields in database tables. Each element can have type constraints (via `SimpleType`),
218/// nested structures (via `ComplexType`), and metadata annotations.
219///
220/// # Field Mapping
221///
222/// - `name`: Column name in the database table
223/// - `jet_type`: Microsoft Jet database type (e.g., "Text", "Long", "Boolean")
224/// - `min_occurs`: Minimum occurrences (0 = optional, 1 = required)
225/// - `xsd_annotation`: Contains metadata like index definitions
226/// - `xsd_simple_type`: Type constraints (e.g., string max length)
227/// - `xsd_complex_type`: Nested element sequences for complex types
228#[derive(Clone, Debug, Default, Deserialize)]
229#[serde(rename = "xsd_element")]
230pub struct Element {
231 /// The name of this element (field/column name).
232 #[serde(rename = "@name")]
233 pub name: Option<String>,
234
235 /// Microsoft Jet database type identifier.
236 ///
237 /// Common values: "Text" (string), "Long" (i32), "Boolean", "Single" (f32), "Double" (f64).
238 #[serde(rename = "@od_jetType")]
239 pub jet_type: Option<String>,
240
241 /// Minimum number of occurrences for this element.
242 ///
243 /// - `0`: Field is optional
244 /// - `1` or higher: Field is required
245 #[serde(rename = "@minOccurs")]
246 pub min_occurs: Option<i32>,
247
248 /// Annotation containing metadata like index definitions.
249 #[serde(rename = "xsd_annotation")]
250 pub xsd_annotation: Option<Annotation>,
251
252 /// Simple type definition with constraints (e.g., max string length).
253 #[serde(rename = "xsd_simpleType")]
254 pub xsd_simple_type: Option<Vec<SimpleType>>,
255
256 /// Complex type definition for nested element sequences.
257 #[serde(rename = "xsd_complexType")]
258 pub xsd_complex_type: Option<Vec<ComplexType>>,
259}
260
261/// Defines a simple type with restrictions in XSD schemas.
262///
263/// Simple types are used to apply constraints to basic data types, such as
264/// limiting the maximum length of a string field.
265#[derive(Clone, Debug, Default, Deserialize)]
266#[serde(rename = "xsd_simpleType")]
267pub struct SimpleType {
268 /// The restriction applied to this simple type (e.g., max length).
269 pub xsd_restriction: Option<Restriction>,
270}
271
272/// Defines a complex type containing nested element sequences.
273///
274/// Complex types are used when a field contains multiple sub-elements organized
275/// in a specific order. In Assembly Kit schemas, these are typically used for
276/// nested table structures, though most tables use simple flat structures.
277#[derive(Clone, Debug, Default, Deserialize)]
278#[serde(rename = "xsd_complexType")]
279pub struct ComplexType {
280 /// The ordered sequence of elements within this complex type.
281 #[serde(rename = "xsd_sequence")]
282 pub xsd_sequence: Sequence,
283}
284
285/// Represents an ordered sequence of XSD elements.
286///
287/// Sequences define the order in which child elements must appear within
288/// a complex type. Each element in the sequence can itself be a simple or
289/// complex type.
290#[derive(Clone, Debug, Default, Deserialize)]
291#[serde(rename = "xsd_sequence")]
292pub struct Sequence {
293 /// The ordered list of elements in this sequence.
294 pub xsd_element: Vec<Element>,
295}
296
297/// Defines restrictions/constraints on an XSD simple type.
298///
299/// Restrictions are used to constrain the values of a simple type, such as
300/// limiting the maximum length of a string. The `base` field specifies which
301/// base type the restriction applies to.
302#[derive(Clone, Debug, Default, Deserialize)]
303#[serde(rename = "xsd_restriction")]
304pub struct Restriction {
305 /// The base XSD type being restricted (e.g., "xsd:string", "xsd:int").
306 #[serde(rename = "@base")]
307 pub base: String,
308
309 /// Maximum length constraint for string types.
310 #[serde(rename = "xsd_maxLength")]
311 pub max_lenght: Option<MaxLength>
312}
313
314/// Specifies the maximum length constraint for a string field.
315///
316/// This constraint limits how many characters a string field can contain.
317/// Used in XSD restrictions to define database column size limits.
318#[derive(Clone, Debug, Default, Deserialize)]
319#[serde(rename = "xsd_maxLength")]
320pub struct MaxLength {
321 /// The maximum number of characters allowed.
322 #[serde(rename = "@value")]
323 pub value: i32
324}
325
326/// Contains annotation metadata for XSD elements.
327///
328/// Annotations provide additional information about schema elements that isn't
329/// part of the core validation rules. In Assembly Kit schemas, annotations are
330/// primarily used to store database index definitions via the `AppInfo` structure.
331#[derive(Clone, Debug, Default, Deserialize)]
332#[serde(rename = "xsd_annotation")]
333pub struct Annotation {
334 /// Application-specific information, containing index definitions.
335 #[serde(rename = "xsd_appinfo")]
336 pub xsd_appinfo: Option<AppInfo>
337}
338
339/// Contains application-specific information within XSD annotations.
340///
341/// This structure holds database-specific metadata that extends the base XSD schema.
342/// In Assembly Kit schemas, it primarily contains index definitions that describe
343/// primary keys, foreign keys, and unique constraints on table columns.
344#[derive(Clone, Debug, Default, Deserialize)]
345#[serde(rename = "xsd_appinfo")]
346pub struct AppInfo {
347 /// List of database index definitions for this element.
348 #[serde(rename = "od_index")]
349 pub od_index: Option<Vec<Index>>
350}
351
352/// Defines a database index on a table column.
353///
354/// Indexes are used to derive foreign key relationships in Assembly Kit v0 schemas.
355/// Since v0 schemas don't explicitly define relationships between tables, RPFM
356/// infers them by matching index names across tables.
357///
358/// # Relationship Inference
359///
360/// When an index name appears in multiple tables, RPFM creates a foreign key
361/// relationship between them. For example:
362///
363/// - Table A has index "fk_building" on column "building_key"
364/// - Table B has index "fk_building" on column "key"
365/// - RPFM infers: A.building_key → B.key
366///
367/// # Boolean String Fields
368///
369/// The `primary`, `unique`, and `clustered` fields use string values "true"/"false"
370/// instead of booleans due to the XSD format.
371#[derive(Clone, Debug, Default, Deserialize)]
372#[serde(rename = "od_index")]
373pub struct Index {
374 /// The name of this index.
375 ///
376 /// Index names are used to match relationships across tables. Identical names
377 /// in different tables indicate a foreign key relationship.
378 #[serde(rename = "@index-name")]
379 pub name: String,
380
381 /// The column(s) this index applies to.
382 ///
383 /// Multiple columns are separated by semicolons (e.g., "col1;col2").
384 #[serde(rename = "@index-key")]
385 pub key: String,
386
387 /// Whether this is a primary key index ("true"/"false").
388 #[serde(rename = "@primary")]
389 pub primary: String,
390
391 /// Whether this index enforces uniqueness ("true"/"false").
392 #[serde(rename = "@unique")]
393 pub unique: String,
394
395 /// Whether this is a clustered index ("true"/"false").
396 #[serde(rename = "@clustered")]
397 pub clustered: String,
398}
399
400/// Foreign key relationships table from Assembly Kit.
401///
402/// This corresponds to the `TWaD_relationships.xml` file found in Version 2
403/// Assembly Kits (Rome 2+). It defines all foreign key relationships between tables.
404#[derive(Clone, Debug, Default, Deserialize)]
405#[serde(rename = "root")]
406pub struct RawRelationshipsTable {
407 /// Table name (should be "relationships").
408 pub name: Option<String>,
409
410 /// All foreign key relationships defined in the Assembly Kit.
411 #[serde(rename = "relationship")]
412 pub relationships: Vec<RawRelationship>,
413}
414
415/// Single foreign key relationship definition.
416///
417/// Defines a foreign key constraint from one table's column to another table's column.
418///
419/// # Example
420///
421/// A relationship from `units_tables.category` to `unit_categories_tables.key`:
422/// ```xml
423/// <relationship>
424/// <table_name>units_tables</table_name>
425/// <column_name>category</column_name>
426/// <foreign_table_name>unit_categories_tables</foreign_table_name>
427/// <foreign_column_name>key</foreign_column_name>
428/// </relationship>
429/// ```
430#[derive(Clone, Debug, Default, Deserialize)]
431pub struct RawRelationship {
432 /// Source table name containing the foreign key column.
433 pub table_name: String,
434
435 /// Source column name (the foreign key field).
436 pub column_name: String,
437
438 /// Referenced table name.
439 pub foreign_table_name: String,
440
441 /// Referenced column name (typically a primary key).
442 pub foreign_column_name: String
443}
444
445//---------------------------------------------------------------------------//
446// Implementations
447//---------------------------------------------------------------------------//
448
449/// Implementation of `RawDefinition`.
450impl RawDefinition {
451
452 /// Reads all table definitions from an Assembly Kit directory.
453 ///
454 /// This function scans the provided directory for Assembly Kit definition files
455 /// and parses them into [`RawDefinition`] structs. The parsing logic varies
456 /// significantly by version.
457 ///
458 /// # Version-Specific Behavior
459 ///
460 /// ## Version 1 & 2 (Shogun 2, Rome 2+)
461 /// - Reads `TWaD_*.xml` files directly
462 /// - Each file is a complete, self-contained definition
463 ///
464 /// ## Version 0 (Empire, Napoleon)
465 /// - Reads `.xsd` XML Schema files
466 /// - Uses two-pass processing:
467 /// 1. Parse all XSD files and extract field info + primary keys
468 /// 2. Analyze index definitions to derive foreign key relationships
469 /// - This is necessary because Version 0 uses database-style indexes rather than
470 /// explicit foreign key declarations
471 ///
472 /// # Arguments
473 ///
474 /// * `raw_definitions_folder` - Directory containing Assembly Kit definition files
475 /// * `version` - Assembly Kit version (0 = Empire/Napoleon, 1 = Shogun 2, 2 = Rome 2+)
476 /// * `tables_to_skip` - Table names (without extension) to exclude from parsing
477 ///
478 /// # Returns
479 ///
480 /// Returns a vector of successfully parsed table definitions. Tables in the
481 /// blacklist or skip list are excluded.
482 ///
483 /// # Errors
484 ///
485 /// Returns an error if:
486 /// - The version is unsupported (not 0, 1, or 2)
487 /// - The directory cannot be read
488 /// - Any definition file has malformed XML
489 pub fn read_all(raw_definitions_folder: &Path, version: i16, tables_to_skip: &[&str]) -> Result<Vec<Self>> {
490 let definitions = get_raw_definition_paths(raw_definitions_folder, version)?;
491 match version {
492 2 | 1 => {
493 definitions.iter()
494 .filter(|x| !BLACKLISTED_TABLES.contains(&x.file_name().unwrap().to_str().unwrap()))
495 .filter(|x| {
496 let table_name = x.file_stem().unwrap().to_str().unwrap().split_at(5).1;
497 !tables_to_skip.par_iter().any(|vanilla_name| vanilla_name == &table_name)
498 })
499 .map(|x| Self::read(x, version))
500 .collect::<Result<Vec<Self>>>()
501 }
502 0 => {
503 let v0s = definitions.iter()
504 .filter(|x| !BLACKLISTED_TABLES.contains(&x.file_name().unwrap().to_str().unwrap()))
505 .filter(|x| {
506 let table_name = x.file_stem().unwrap().to_str().unwrap();
507 !tables_to_skip.par_iter().any(|vanilla_name| vanilla_name == &table_name)
508 })
509 .filter_map(|x| RawDefinitionV0::read(x).transpose())
510 .map(|def_v0| {
511
512 // NOTE: This from processes the primary keys already.
513 let raw = match def_v0 {
514 Ok(ref def_v0) => Self::from(def_v0),
515 Err(_) => Self::default(),
516 };
517 def_v0.map(|def_v0| (def_v0, raw))
518 })
519 .collect::<Result<Vec<(RawDefinitionV0, RawDefinition)>>>()?;
520
521 // We need to do a second pass because without the entire set available we cannot figure out the references.
522 Ok(v0s.iter()
523 .map(|(def_v0, new_def)| {
524 let mut new_def = new_def.clone();
525
526 if let Some(elements) = def_v0.xsd_element.get(1) {
527 if let Some(ref table_name) = elements.name {
528 if let Some(ref ann) = elements.xsd_annotation {
529 if let Some(ref app) = ann.xsd_appinfo {
530 if let Some(ref od_index) = app.od_index {
531 for index in od_index {
532
533 // Ignore indexes of unused fields, the primary key, and field-specific indexes.
534 if index.name == "PrimaryKey" || index.name == index.key.trim() {
535 continue;
536 }
537
538 // Indexes follow the format "remotetablelocaltable", with a 61 char limit. To find the remote table,
539 // we need to remove the local one, and to do so, we need to find what part of the local one is actually in the index name.
540 let remote_table_name = if index.name.chars().count() == 61 {
541 let mut table_name = table_name.clone();
542 let mut remote_table_name = String::new();
543 loop {
544 if index.name.ends_with(&*table_name) {
545 remote_table_name = index.name.clone();
546 if let Some(sub) = index.name.len().checked_sub(table_name.len()) {
547 remote_table_name.truncate(sub);
548 } else {
549 remote_table_name = String::new();
550 }
551 break;
552 } else {
553 if table_name.is_empty() {
554 break;
555 }
556
557 table_name.pop();
558 }
559 }
560
561 remote_table_name
562 } else {
563 let mut remote_table_name = index.name.clone();
564 if let Some(sub) = index.name.len().checked_sub(table_name.len()) {
565 remote_table_name.truncate(sub);
566 } else {
567 remote_table_name = String::new();
568 }
569 remote_table_name
570 };
571
572 // Now we need to find the primary key of the remote table, if any.
573 if !remote_table_name.is_empty() {
574 if let Some(remote_def) = v0s.par_iter().find_map_first(|(def_v0, new_def)| {
575 if let Some(elements) = def_v0.xsd_element.get(1) {
576 if let Some(ref table_name) = elements.name {
577 if table_name == &remote_table_name {
578 Some(new_def)
579 } else { None }
580 } else { None }
581 } else { None }
582 }) {
583
584 // No fucking clue if ANY reference is to a multikey table, but if is, we'll use the first key as ref key, and the rest as lookups.
585 let primary_keys = remote_def.fields.iter().filter(|x| x.primary_key == "1" || x.name == "key").collect::<Vec<_>>();
586 if !primary_keys.is_empty() {
587 for field in &mut new_def.fields {
588 if field.name == index.key.trim() {
589 field.column_source_table = Some(remote_table_name.to_string());
590 field.column_source_column = Some(primary_keys.iter().map(|x| x.name.to_string()).collect());
591 }
592 }
593 }
594 }
595 }
596 }
597 }
598 }
599 }
600 }
601 }
602 new_def
603 })
604 .collect())
605 }
606 _ => Err(RLibError::AssemblyKitUnsupportedVersion(version))
607 }
608 }
609
610 /// Parses a single Assembly Kit definition file.
611 ///
612 /// Reads and parses one table definition file from the Assembly Kit.
613 ///
614 /// # Arguments
615 ///
616 /// * `raw_definition_path` - Path to the definition file (e.g., `TWaD_units_tables.xml`)
617 /// * `version` - Assembly Kit version (1 = Shogun 2, 2 = Rome 2+)
618 ///
619 /// # Returns
620 ///
621 /// Returns the parsed [`RawDefinition`] with the table name set to the filename
622 /// without the `TWaD_` prefix (e.g., `"units_tables.xml"`).
623 ///
624 /// # Errors
625 ///
626 /// Returns an error if:
627 /// - The version is not 1 or 2 (use [`RawDefinitionV0::read()`] for version 0)
628 /// - The file cannot be opened (returns [`RLibError::AssemblyKitNotFound`])
629 /// - The XML is malformed
630 ///
631 /// # Note
632 ///
633 /// For Version 0 (Empire/Napoleon), use [`RawDefinitionV0::read()`] instead as the
634 /// file format is completely different (.xsd vs .xml).
635 pub fn read(raw_definition_path: &Path, version: i16) -> Result<Self> {
636 match version {
637 2 | 1 => {
638 let definition_file = BufReader::new(File::open(raw_definition_path).map_err(|_| RLibError::AssemblyKitNotFound)?);
639 let mut definition: Self = from_reader(definition_file)?;
640 definition.name = Some(raw_definition_path.file_name().unwrap().to_str().unwrap().split_at(5).1.to_string());
641 Ok(definition)
642 }
643
644 _ => Err(RLibError::AssemblyKitUnsupportedVersion(version))
645 }
646 }
647
648 /// Filters out localisable fields from the definition.
649 ///
650 /// Returns only the fields that are not marked as localisable (translatable) and
651 /// are present in the test row data. This is used when processing Assembly Kit
652 /// table data to separate regular fields from translation fields.
653 ///
654 /// # Arguments
655 ///
656 /// * `raw_localisable_fields` - List of all localisable fields from `TExc_LocalisableFields.xml`
657 /// * `test_row` - Sample row data used to verify field presence
658 ///
659 /// # Returns
660 ///
661 /// Returns a vector of [`Field`] instances for non-localisable fields that exist
662 /// in the test data.
663 ///
664 /// # Note
665 ///
666 /// Fields are excluded if:
667 /// - They're listed in `raw_localisable_fields` for this table
668 /// - They don't appear in the test row
669 /// - They have a "state" attribute (marked as modified/deprecated)
670 pub fn get_non_localisable_fields(&self, raw_localisable_fields: &[RawLocalisableField], test_row: &RawTableRow) -> Vec<Field> {
671 let raw_table_name = &self.name.as_ref().unwrap()[..self.name.as_ref().unwrap().len() - 4];
672 let localisable_fields_names = raw_localisable_fields.iter()
673 .filter(|x| x.table_name == raw_table_name)
674 .map(|x| &*x.field)
675 .collect::<Vec<&str>>();
676
677 self.fields.iter()
678 .filter(|x| match test_row.fields.iter().find(|y| x.name == y.field_name) {
679 Some(y) => y.state.is_none(),
680 None => false,
681 })
682 .filter(|x| !localisable_fields_names.contains(&&*x.name))
683 .map(From::from)
684 .collect::<Vec<Field>>()
685 }
686}
687
688impl From<&RawDefinition> for Definition {
689 fn from(raw_definition: &RawDefinition) -> Self {
690 let fields = raw_definition.fields.iter().map(From::from).collect::<Vec<_>>();
691 Self::new_with_fields(-100, &fields, &[], None)
692 }
693}
694
695
696impl From<&RawField> for Field {
697 fn from(raw_field: &RawField) -> Self {
698
699 let is_old_game = raw_field.is_old_game.unwrap_or(false);
700
701 let field_type = match &*raw_field.field_type {
702 "yesno" => FieldType::Boolean,
703 "single" => FieldType::F32,
704 "double" => FieldType::F64,
705 "integer" => FieldType::I32,
706 "autonumber" | "card64" => FieldType::I64,
707 "colour" => FieldType::ColourRGB,
708 "expression" | "text" => {
709 if raw_field.required == "1" {
710 if is_old_game {
711 FieldType::StringU16
712 } else {
713 FieldType::StringU8
714 }
715 }
716 else if is_old_game {
717 FieldType::OptionalStringU16
718 } else {
719 FieldType::OptionalStringU8
720 }
721 },
722 _ => if is_old_game {
723 FieldType::StringU16
724 } else {
725 FieldType::StringU8
726 },
727 };
728
729 let (is_reference, lookup) = if let Some(x) = &raw_field.column_source_table {
730 if let Some(y) = &raw_field.column_source_column {
731 if y.len() > 1 { (Some((x.to_owned(), y[0].to_owned())), Some(y[1..].to_vec()))}
732 else { (Some((x.to_owned(), y[0].to_owned())), None) }
733 } else { (None, None) }
734 }
735 else { (None, None) };
736
737 // CA sometimes uses comma as separator, and has random spaces between paths.
738 let filename_relative_path = raw_field.filename_relative_path.clone().map(|x| {
739 x.split(',').map(|y| y.trim()).join(";")
740 });
741
742 // Some fields are marked as filename, but only have fragment paths, which do not seem to correlate to game file paths.
743 // We need to disable those to avoid false positives on diagnostics.
744 let is_filename = match raw_field.is_filename {
745 Some(_) => !(raw_field.fragment_path.is_some() && raw_field.filename_relative_path.is_none()),
746 None => false,
747 };
748
749 Self::new(
750 raw_field.name.to_owned(),
751 field_type,
752 raw_field.primary_key == "1",
753 raw_field.default_value.clone(),
754 is_filename,
755 filename_relative_path,
756 is_reference,
757 lookup,
758 if let Some(x) = &raw_field.field_description { x.to_owned() } else { String::new() },
759 0,
760 0,
761 BTreeMap::new(),
762 None
763 )
764 }
765}
766
767impl RawDefinitionV0 {
768
769 /// Parses a Version 0 (Empire/Napoleon) XSD schema file.
770 ///
771 /// Reads and parses an XSD (XML Schema Definition) file from the Empire or
772 /// Napoleon Assembly Kit. The XSD format is significantly different from the
773 /// `TWaD_` format used in later games.
774 ///
775 /// # Arguments
776 ///
777 /// * `raw_definition_path` - Path to the `.xsd` file
778 ///
779 /// # Returns
780 ///
781 /// Returns `Ok(Some(definition))` if the file was parsed successfully, `Ok(None)`
782 /// if the file was empty, or an error if parsing failed.
783 ///
784 /// # Errors
785 ///
786 /// Returns an error if:
787 /// - The file cannot be opened (returns [`RLibError::AssemblyKitNotFound`])
788 /// - The XML/XSD is malformed
789 ///
790 /// # Implementation Note
791 ///
792 /// Due to limitations in `serde_xml_rs`, this function performs extensive string
793 /// replacements on the XSD content before parsing to normalize XML namespace
794 /// prefixes (`xsd:` and `xs:` → `xsd_`, `od:` → `od_`).
795 pub fn read(raw_definition_path: &Path) -> Result<Option<Self>> {
796 let mut definition_file = BufReader::new(File::open(raw_definition_path).map_err(|_| RLibError::AssemblyKitNotFound)?);
797
798 // Before deserializing the data, due to limitations of serde_xml_rs, we have to rename all rows, because unique names for
799 // rows in each file is not supported for deserializing. Same for the fields, we have to change them to something more generic.
800 let mut buffer = String::new();
801 definition_file.read_to_string(&mut buffer)?;
802
803 if buffer.is_empty() {
804 return Ok(None)
805 }
806
807 // Rust doesn't like : in variable names when deserializing.
808 buffer = buffer.replace("xsd:schema", "xsd_schema");
809 buffer = buffer.replace("xsd:element", "xsd_element");
810 buffer = buffer.replace("xsd:complexType", "xsd_complexType");
811 buffer = buffer.replace("xsd:sequence", "xsd_sequence");
812 buffer = buffer.replace("xsd:attribute", "xsd_attribute");
813 buffer = buffer.replace("xsd:annotation", "xsd_annotation");
814 buffer = buffer.replace("xsd:appinfo", "xsd_appinfo");
815 buffer = buffer.replace("od:index", "od_index");
816 buffer = buffer.replace("xsd:sequence", "xsd_sequence");
817 buffer = buffer.replace("xsd:simpleType", "xsd_simpleType");
818 buffer = buffer.replace("xsd:restriction", "xsd_restriction");
819 buffer = buffer.replace("xsd:maxLength", "xsd_maxLength");
820 buffer = buffer.replace("od:jetType", "od_jetType");
821
822 buffer = buffer.replace("xs:schema", "xsd_schema");
823 buffer = buffer.replace("xs:element", "xsd_element");
824 buffer = buffer.replace("xs:complexType", "xsd_complexType");
825 buffer = buffer.replace("xs:sequence", "xsd_sequence");
826 buffer = buffer.replace("xs:attribute", "xsd_attribute");
827 buffer = buffer.replace("xs:annotation", "xsd_annotation");
828 buffer = buffer.replace("xs:appinfo", "xsd_appinfo");
829 buffer = buffer.replace("xs:sequence", "xsd_sequence");
830 buffer = buffer.replace("xs:simpleType", "xsd_simpleType");
831 buffer = buffer.replace("xs:restriction", "xsd_restriction");
832 buffer = buffer.replace("xs:maxLength", "xsd_maxLength");
833
834 // Only if the table has data we deserialize it. If not, we just create an empty one.
835 let definition: RawDefinitionV0 = from_reader(buffer.as_bytes())?;
836
837 //dbg!(&definition);
838 Ok(Some(definition))
839 }
840}
841
842/// Old games don't use references, but rather indexes like a database. This means we're unable to find
843/// the referenced column without having the reference definition. So ref data needs to be calculated after this.
844impl From<&RawDefinitionV0> for RawDefinition {
845 fn from(value: &RawDefinitionV0) -> Self {
846 let mut definition = Self::default();
847
848 // Second element has the fields.
849 if let Some(elements) = value.xsd_element.get(1) {
850 definition.name = elements.name.clone().map(|x| format!("{x}.xml"));
851
852 // Try to get the indexes to check what do we need to mark as key.
853 let primary_keys = if let Some(ref ann) = elements.xsd_annotation {
854 if let Some(ref app) = ann.xsd_appinfo {
855 if let Some(ref od_index) = app.od_index {
856 od_index.iter().find_map(|index| {
857 if index.name == "PrimaryKey" {
858
859 // Always trim to remove the final space, then split by space to find all the keys of the table.
860 let keys = index.key.trim().split(' ').collect::<Vec<_>>();
861 if keys.is_empty() {
862 None
863 } else {
864 Some(keys)
865 }
866 } else {
867 None
868 }
869 }).unwrap_or(vec![])
870 } else { vec![] }
871 } else { vec![] }
872 } else { vec![] };
873
874 if let Some(complex) = &elements.xsd_complex_type {
875 if let Some(elements) = complex.first() {
876 for element in &elements.xsd_sequence.xsd_element {
877
878 // For a field to be valid we need name and type.
879 if let Some(ref name) = element.name {
880 if let Some(ref jet_type) = element.jet_type {
881
882 let mut field = RawField::default();
883 field.name = name.to_owned();
884
885 field.field_type = match &**jet_type {
886 "yesno" => "yesno".to_owned(),
887 "integer" => "integer".to_owned(),
888 "longinteger" | "autonumber" => "autonumber".to_owned(),
889 "decimal" | "single" => "single".to_owned(),
890 "double" => "double".to_owned(),
891 "text" | "memo" | "oleobject" | "replicationid" => "text".to_owned(),
892
893 // These are dates as in a DateTime format. Treat them as text for now.
894 "datetime" => "text".to_owned(),
895
896 _ => todo!("{}", jet_type),
897 };
898
899 if primary_keys.contains(&&*field.name) {
900 field.primary_key = "1".to_owned();
901 } else {
902 field.primary_key = "0".to_owned();
903 }
904
905 field.is_old_game = Some(true);
906
907 definition.fields.push(field);
908 }
909 }
910 }
911 }
912 }
913 }
914
915 definition
916 }
917}