rpfm_lib/integrations/assembly_kit/table_definition.rs
1//---------------------------------------------------------------------------//
2// Copyright (c) 2017-2026 Ismael Gutiérrez González. All rights reserved.
3//
4// This file is part of the Rusted PackFile Manager (RPFM) project,
5// which can be found here: https://github.com/Frodo45127/rpfm.
6//
7// This file is licensed under the MIT license, which can be found here:
8// https://github.com/Frodo45127/rpfm/blob/master/LICENSE.
9//---------------------------------------------------------------------------//
10
11//! Assembly Kit table definition parsing and schema generation.
12//!
13//! This module handles the parsing of Assembly Kit schema files (table structure definitions)
14//! and their conversion to RPFM's internal schema format. It supports three different Assembly
15//! Kit versions used across Total War games.
16//!
17//! # Assembly Kit Schema Formats
18//!
19//! Different Total War games use different schema file formats:
20//!
21//! - **Version 0** (Empire, Napoleon): `.xsd` XML schema files with basic type and constraint information
22//! - **Version 1** (Shogun 2): `TWaD_*.xml` files with enhanced metadata
23//! - **Version 2** (Rome 2+): `TWaD_*.xml` files with full relationship data and field descriptions
24//!
25//! # Main Types
26//!
27//! ## Version 1 & 2 Formats
28//!
29//! - [`RawDefinition`]: Represents a complete table definition with all fields
30//! - [`RawField`]: Individual field definition with type, constraints, and relationship info
31//! - [`RawRelationshipsTable`]: Foreign key relationships between tables
32//! - [`RawRelationship`]: Single foreign key relationship
33//!
34//! ## Version 0 Format (Legacy)
35//!
36//! - [`RawDefinitionV0`]: XSD schema root structure
37//! - [`Element`]: XSD element with type and constraint information
38//! - [`Index`]: Database index definition (used to derive relationships)
39//!
40//! # Functionality
41//!
42//! The main operations this module provides:
43//!
44//! 1. **Batch Reading**: [`RawDefinition::read_all()`] reads all table definitions from a directory
45//! 2. **Individual Reading**: [`RawDefinition::read()`] parses a single definition file
46//! 3. **Field Filtering**: [`RawDefinition::get_non_localisable_fields()`] separates translatable fields
47//! 4. **Schema Conversion**: `From<&RawDefinition>` for [`Definition`] converts to RPFM format
48//!
49//! # Version 0 Processing
50//!
51//! Version 0 (Empire/Napoleon) uses a two-pass approach:
52//! 1. First pass: Parse XSD files and extract basic field information and primary keys
53//! 2. Second pass: Analyze index definitions to derive foreign key relationships
54//!
55//! This is necessary because Version 0 uses database-style indexes rather than explicit
56//! foreign key declarations.
57//!
58//! # Type Mapping
59//!
60//! Assembly Kit types are mapped to RPFM field types:
61//! - `yesno` → `Boolean`
62//! - `single` → `F32`, `double` → `F64`
63//! - `integer` → `I32`, `autonumber`/`card64` → `I64`
64//! - `colour` → `ColourRGB`
65//! - `text`/`expression` → `StringU8`/`StringU16` (or optional variants)
66
67use itertools::Itertools;
68use rayon::prelude::*;
69use serde_derive::Deserialize;
70use serde_xml_rs::from_reader;
71
72use std::fs::File;
73use std::io::{BufReader, Read};
74use std::path::Path;
75
76use crate::error::{Result, RLibError};
77
78use super::*;
79use super::get_raw_definition_paths;
80use super::localisable_fields::RawLocalisableField;
81use super::table_data::RawTableRow;
82
83//---------------------------------------------------------------------------//
84// Types for parsing the Assembly Kit Schema Files into.
85//---------------------------------------------------------------------------//
86
87/// Raw table definition parsed from Assembly Kit schema files.
88///
89/// This is the raw equivalent to RPFM's [`Definition`] struct. In Assembly Kit files,
90/// this corresponds to a `TWaD_*.xml` file (versions 1-2) or `.xsd` file (version 0).
91///
92/// # Fields
93///
94/// * `name` - Table name with `.xml` extension (e.g., `"units_tables.xml"`)
95/// * `fields` - All field definitions for this table
96///
97/// # Example Structure
98///
99/// A `TWaD_units_tables.xml` file contains field definitions like:
100/// ```xml
101/// <root>
102/// <field primary_key="1" name="key" field_type="text" required="1"/>
103/// <field primary_key="0" name="category" field_type="text" required="0"
104/// column_source_table="unit_categories_tables"
105/// column_source_column="key"/>
106/// </root>
107/// ```
108#[derive(Clone, Debug, Default, Deserialize)]
109#[serde(rename = "root")]
110pub struct RawDefinition {
111
112 /// Table name with `.xml` extension (e.g., `"units_tables.xml"`) and without the 'TWaD_' prefix.
113 pub name: Option<String>,
114
115 /// All the field definitions within this table definition.
116 #[serde(rename = "field")]
117 pub fields: Vec<RawField>,
118}
119
120/// Individual field definition from Assembly Kit schema.
121///
122/// This is the raw equivalent to RPFM's [`Field`] struct, containing all metadata
123/// about a single table column.
124///
125/// # Type Information
126///
127/// Assembly Kit uses string-based type names:
128/// - `"yesno"` - Boolean value
129/// - `"single"`, `"double"` - Floating point numbers
130/// - `"integer"` - 32-bit integer
131/// - `"autonumber"`, `"card64"` - 64-bit integer (often auto-incrementing)
132/// - `"text"`, `"expression"` - String data
133/// - `"colour"` - RGB color value
134///
135/// # Foreign Key Relationships
136///
137/// Relationships are defined via `column_source_table` and `column_source_column`:
138/// - First element in `column_source_column` is the referenced primary key
139/// - Additional elements (if present) are lookup columns for concatenated display
140#[derive(Clone, Debug, Default, Deserialize)]
141#[serde(rename = "field")]
142pub struct RawField {
143
144 /// Primary key flag (`"1"` = true, `"0"` = false).
145 pub primary_key: String,
146
147 /// Field name (column name in the table).
148 pub name: String,
149
150 /// Assembly Kit type name (see struct documentation for type mapping).
151 pub field_type: String,
152
153 /// Required field flag (`"1"` = required, `"0"` = optional).
154 pub required: String,
155
156 /// Default value for this field when creating new rows.
157 pub default_value: Option<String>,
158
159 /// Maximum allowed string length for text fields.
160 pub max_length: Option<String>,
161
162 /// Filename flag - indicates this field contains a game file path.
163 pub is_filename: Option<String>,
164
165 /// Relative path where referenced files should be located.
166 ///
167 /// Multiple paths can be specified, separated by semicolons.
168 pub filename_relative_path: Option<String>,
169
170 /// Fragment path (internal use, not useful for modders).
171 pub fragment_path: Option<String>,
172
173 /// Referenced column names for foreign key relationships.
174 ///
175 /// First element is the referenced primary key column.
176 /// Additional elements are lookup columns for composite display.
177 pub column_source_column: Option<Vec<String>>,
178
179 /// Referenced table name for foreign key relationships.
180 pub column_source_table: Option<String>,
181
182 /// Human-readable description of the field's purpose.
183 pub field_description: Option<String>,
184
185 /// Encyclopaedia export flag (`"1"` = export, `"0"` = don't export).
186 ///
187 /// Indicates if this field should be included in game encyclopaedia exports.
188 pub encyclopaedia_export: Option<String>,
189
190 /// Highlight color flag for marking unused/deprecated fields.
191 ///
192 /// `"#c8c8c8"` (gray) indicates an unused field in Warhammer 3.
193 pub highlight_flag: Option<String>,
194
195 /// Custom flag for old game (Empire/Napoleon/Shogun 2) type handling.
196 ///
197 /// When true, uses UTF-16 strings instead of UTF-8.
198 pub is_old_game: Option<bool>,
199}
200
201/// Version 0 (Empire/Napoleon) XSD schema root structure.
202///
203/// Empire and Napoleon use `.xsd` XML Schema Definition files instead of
204/// the `TWaD_` format used in later games. This struct represents the root
205/// of such a schema file.
206#[derive(Clone, Debug, Default, Deserialize)]
207#[serde(rename = "xsd_schema")]
208pub struct RawDefinitionV0 {
209 /// XSD elements defining the table structure.
210 pub xsd_element: Vec<Element>,
211}
212
213/// Represents an XSD element definition from Assembly Kit v0 schema files.
214///
215/// Elements are the core building blocks of XSD schemas, representing individual
216/// fields in database tables. Each element can have type constraints (via `SimpleType`),
217/// nested structures (via `ComplexType`), and metadata annotations.
218///
219/// # Field Mapping
220///
221/// - `name`: Column name in the database table
222/// - `jet_type`: Microsoft Jet database type (e.g., "Text", "Long", "Boolean")
223/// - `min_occurs`: Minimum occurrences (0 = optional, 1 = required)
224/// - `xsd_annotation`: Contains metadata like index definitions
225/// - `xsd_simple_type`: Type constraints (e.g., string max length)
226/// - `xsd_complex_type`: Nested element sequences for complex types
227#[derive(Clone, Debug, Default, Deserialize)]
228#[serde(rename = "xsd_element")]
229pub struct Element {
230 /// The name of this element (field/column name).
231 #[serde(rename = "@name")]
232 pub name: Option<String>,
233
234 /// Microsoft Jet database type identifier.
235 ///
236 /// Common values: "Text" (string), "Long" (i32), "Boolean", "Single" (f32), "Double" (f64).
237 #[serde(rename = "@od_jetType")]
238 pub jet_type: Option<String>,
239
240 /// Minimum number of occurrences for this element.
241 ///
242 /// - `0`: Field is optional
243 /// - `1` or higher: Field is required
244 #[serde(rename = "@minOccurs")]
245 pub min_occurs: Option<i32>,
246
247 /// Annotation containing metadata like index definitions.
248 #[serde(rename = "xsd_annotation")]
249 pub xsd_annotation: Option<Annotation>,
250
251 /// Simple type definition with constraints (e.g., max string length).
252 #[serde(rename = "xsd_simpleType")]
253 pub xsd_simple_type: Option<Vec<SimpleType>>,
254
255 /// Complex type definition for nested element sequences.
256 #[serde(rename = "xsd_complexType")]
257 pub xsd_complex_type: Option<Vec<ComplexType>>,
258}
259
260/// Defines a simple type with restrictions in XSD schemas.
261///
262/// Simple types are used to apply constraints to basic data types, such as
263/// limiting the maximum length of a string field.
264#[derive(Clone, Debug, Default, Deserialize)]
265#[serde(rename = "xsd_simpleType")]
266pub struct SimpleType {
267 /// The restriction applied to this simple type (e.g., max length).
268 pub xsd_restriction: Option<Restriction>,
269}
270
271/// Defines a complex type containing nested element sequences.
272///
273/// Complex types are used when a field contains multiple sub-elements organized
274/// in a specific order. In Assembly Kit schemas, these are typically used for
275/// nested table structures, though most tables use simple flat structures.
276#[derive(Clone, Debug, Default, Deserialize)]
277#[serde(rename = "xsd_complexType")]
278pub struct ComplexType {
279 /// The ordered sequence of elements within this complex type.
280 #[serde(rename = "xsd_sequence")]
281 pub xsd_sequence: Sequence,
282}
283
284/// Represents an ordered sequence of XSD elements.
285///
286/// Sequences define the order in which child elements must appear within
287/// a complex type. Each element in the sequence can itself be a simple or
288/// complex type.
289#[derive(Clone, Debug, Default, Deserialize)]
290#[serde(rename = "xsd_sequence")]
291pub struct Sequence {
292 /// The ordered list of elements in this sequence.
293 pub xsd_element: Vec<Element>,
294}
295
296/// Defines restrictions/constraints on an XSD simple type.
297///
298/// Restrictions are used to constrain the values of a simple type, such as
299/// limiting the maximum length of a string. The `base` field specifies which
300/// base type the restriction applies to.
301#[derive(Clone, Debug, Default, Deserialize)]
302#[serde(rename = "xsd_restriction")]
303pub struct Restriction {
304 /// The base XSD type being restricted (e.g., "xsd:string", "xsd:int").
305 #[serde(rename = "@base")]
306 pub base: String,
307
308 /// Maximum length constraint for string types.
309 #[serde(rename = "xsd_maxLength")]
310 pub max_lenght: Option<MaxLength>
311}
312
313/// Specifies the maximum length constraint for a string field.
314///
315/// This constraint limits how many characters a string field can contain.
316/// Used in XSD restrictions to define database column size limits.
317#[derive(Clone, Debug, Default, Deserialize)]
318#[serde(rename = "xsd_maxLength")]
319pub struct MaxLength {
320 /// The maximum number of characters allowed.
321 #[serde(rename = "@value")]
322 pub value: i32
323}
324
325/// Contains annotation metadata for XSD elements.
326///
327/// Annotations provide additional information about schema elements that isn't
328/// part of the core validation rules. In Assembly Kit schemas, annotations are
329/// primarily used to store database index definitions via the `AppInfo` structure.
330#[derive(Clone, Debug, Default, Deserialize)]
331#[serde(rename = "xsd_annotation")]
332pub struct Annotation {
333 /// Application-specific information, containing index definitions.
334 #[serde(rename = "xsd_appinfo")]
335 pub xsd_appinfo: Option<AppInfo>
336}
337
338/// Contains application-specific information within XSD annotations.
339///
340/// This structure holds database-specific metadata that extends the base XSD schema.
341/// In Assembly Kit schemas, it primarily contains index definitions that describe
342/// primary keys, foreign keys, and unique constraints on table columns.
343#[derive(Clone, Debug, Default, Deserialize)]
344#[serde(rename = "xsd_appinfo")]
345pub struct AppInfo {
346 /// List of database index definitions for this element.
347 #[serde(rename = "od_index")]
348 pub od_index: Option<Vec<Index>>
349}
350
351/// Defines a database index on a table column.
352///
353/// Indexes are used to derive foreign key relationships in Assembly Kit v0 schemas.
354/// Since v0 schemas don't explicitly define relationships between tables, RPFM
355/// infers them by matching index names across tables.
356///
357/// # Relationship Inference
358///
359/// When an index name appears in multiple tables, RPFM creates a foreign key
360/// relationship between them. For example:
361///
362/// - Table A has index "fk_building" on column "building_key"
363/// - Table B has index "fk_building" on column "key"
364/// - RPFM infers: A.building_key → B.key
365///
366/// # Boolean String Fields
367///
368/// The `primary`, `unique`, and `clustered` fields use string values "true"/"false"
369/// instead of booleans due to the XSD format.
370#[derive(Clone, Debug, Default, Deserialize)]
371#[serde(rename = "od_index")]
372pub struct Index {
373 /// The name of this index.
374 ///
375 /// Index names are used to match relationships across tables. Identical names
376 /// in different tables indicate a foreign key relationship.
377 #[serde(rename = "@index-name")]
378 pub name: String,
379
380 /// The column(s) this index applies to.
381 ///
382 /// Multiple columns are separated by semicolons (e.g., "col1;col2").
383 #[serde(rename = "@index-key")]
384 pub key: String,
385
386 /// Whether this is a primary key index ("true"/"false").
387 #[serde(rename = "@primary")]
388 pub primary: String,
389
390 /// Whether this index enforces uniqueness ("true"/"false").
391 #[serde(rename = "@unique")]
392 pub unique: String,
393
394 /// Whether this is a clustered index ("true"/"false").
395 #[serde(rename = "@clustered")]
396 pub clustered: String,
397}
398
399/// Foreign key relationships table from Assembly Kit.
400///
401/// This corresponds to the `TWaD_relationships.xml` file found in Version 2
402/// Assembly Kits (Rome 2+). It defines all foreign key relationships between tables.
403#[derive(Clone, Debug, Default, Deserialize)]
404#[serde(rename = "root")]
405pub struct RawRelationshipsTable {
406 /// Table name (should be "relationships").
407 pub name: Option<String>,
408
409 /// All foreign key relationships defined in the Assembly Kit.
410 #[serde(rename = "relationship")]
411 pub relationships: Vec<RawRelationship>,
412}
413
414/// Single foreign key relationship definition.
415///
416/// Defines a foreign key constraint from one table's column to another table's column.
417///
418/// # Example
419///
420/// A relationship from `units_tables.category` to `unit_categories_tables.key`:
421/// ```xml
422/// <relationship>
423/// <table_name>units_tables</table_name>
424/// <column_name>category</column_name>
425/// <foreign_table_name>unit_categories_tables</foreign_table_name>
426/// <foreign_column_name>key</foreign_column_name>
427/// </relationship>
428/// ```
429#[derive(Clone, Debug, Default, Deserialize)]
430pub struct RawRelationship {
431 /// Source table name containing the foreign key column.
432 pub table_name: String,
433
434 /// Source column name (the foreign key field).
435 pub column_name: String,
436
437 /// Referenced table name.
438 pub foreign_table_name: String,
439
440 /// Referenced column name (typically a primary key).
441 pub foreign_column_name: String
442}
443
444//---------------------------------------------------------------------------//
445// Implementations
446//---------------------------------------------------------------------------//
447
448/// Implementation of `RawDefinition`.
449impl RawDefinition {
450
451 /// Reads all table definitions from an Assembly Kit directory.
452 ///
453 /// This function scans the provided directory for Assembly Kit definition files
454 /// and parses them into [`RawDefinition`] structs. The parsing logic varies
455 /// significantly by version.
456 ///
457 /// # Version-Specific Behavior
458 ///
459 /// ## Version 1 & 2 (Shogun 2, Rome 2+)
460 /// - Reads `TWaD_*.xml` files directly
461 /// - Each file is a complete, self-contained definition
462 ///
463 /// ## Version 0 (Empire, Napoleon)
464 /// - Reads `.xsd` XML Schema files
465 /// - Uses two-pass processing:
466 /// 1. Parse all XSD files and extract field info + primary keys
467 /// 2. Analyze index definitions to derive foreign key relationships
468 /// - This is necessary because Version 0 uses database-style indexes rather than
469 /// explicit foreign key declarations
470 ///
471 /// # Arguments
472 ///
473 /// * `raw_definitions_folder` - Directory containing Assembly Kit definition files
474 /// * `version` - Assembly Kit version (0 = Empire/Napoleon, 1 = Shogun 2, 2 = Rome 2+)
475 /// * `tables_to_skip` - Table names (without extension) to exclude from parsing
476 ///
477 /// # Returns
478 ///
479 /// Returns a vector of successfully parsed table definitions. Tables in the
480 /// blacklist or skip list are excluded.
481 ///
482 /// # Errors
483 ///
484 /// Returns an error if:
485 /// - The version is unsupported (not 0, 1, or 2)
486 /// - The directory cannot be read
487 /// - Any definition file has malformed XML
488 pub fn read_all(raw_definitions_folder: &Path, version: i16, tables_to_skip: &[&str]) -> Result<Vec<Self>> {
489 let definitions = get_raw_definition_paths(raw_definitions_folder, version)?;
490 match version {
491 2 | 1 => {
492 definitions.iter()
493 .filter(|x| !BLACKLISTED_TABLES.contains(&x.file_name().unwrap().to_str().unwrap()))
494 .filter(|x| {
495 let table_name = x.file_stem().unwrap().to_str().unwrap().split_at(5).1;
496 !tables_to_skip.par_iter().any(|vanilla_name| vanilla_name == &table_name)
497 })
498 .map(|x| Self::read(x, version))
499 .collect::<Result<Vec<Self>>>()
500 }
501 0 => {
502 let v0s = definitions.iter()
503 .filter(|x| !BLACKLISTED_TABLES.contains(&x.file_name().unwrap().to_str().unwrap()))
504 .filter(|x| {
505 let table_name = x.file_stem().unwrap().to_str().unwrap();
506 !tables_to_skip.par_iter().any(|vanilla_name| vanilla_name == &table_name)
507 })
508 .filter_map(|x| RawDefinitionV0::read(x).transpose())
509 .map(|def_v0| {
510
511 // NOTE: This from processes the primary keys already.
512 let raw = match def_v0 {
513 Ok(ref def_v0) => Self::from(def_v0),
514 Err(_) => Self::default(),
515 };
516 def_v0.map(|def_v0| (def_v0, raw))
517 })
518 .collect::<Result<Vec<(RawDefinitionV0, RawDefinition)>>>()?;
519
520 // We need to do a second pass because without the entire set available we cannot figure out the references.
521 Ok(v0s.iter()
522 .map(|(def_v0, new_def)| {
523 let mut new_def = new_def.clone();
524
525 if let Some(elements) = def_v0.xsd_element.get(1) {
526 if let Some(ref table_name) = elements.name {
527 if let Some(ref ann) = elements.xsd_annotation {
528 if let Some(ref app) = ann.xsd_appinfo {
529 if let Some(ref od_index) = app.od_index {
530 for index in od_index {
531
532 // Ignore indexes of unused fields, the primary key, and field-specific indexes.
533 if index.name == "PrimaryKey" || index.name == index.key.trim() {
534 continue;
535 }
536
537 // Indexes follow the format "remotetablelocaltable", with a 61 char limit. To find the remote table,
538 // we need to remove the local one, and to do so, we need to find what part of the local one is actually in the index name.
539 let remote_table_name = if index.name.chars().count() == 61 {
540 let mut table_name = table_name.clone();
541 let mut remote_table_name = String::new();
542 loop {
543 if index.name.ends_with(&*table_name) {
544 remote_table_name = index.name.clone();
545 if let Some(sub) = index.name.len().checked_sub(table_name.len()) {
546 remote_table_name.truncate(sub);
547 } else {
548 remote_table_name = String::new();
549 }
550 break;
551 } else {
552 if table_name.is_empty() {
553 break;
554 }
555
556 table_name.pop();
557 }
558 }
559
560 remote_table_name
561 } else {
562 let mut remote_table_name = index.name.clone();
563 if let Some(sub) = index.name.len().checked_sub(table_name.len()) {
564 remote_table_name.truncate(sub);
565 } else {
566 remote_table_name = String::new();
567 }
568 remote_table_name
569 };
570
571 // Now we need to find the primary key of the remote table, if any.
572 if !remote_table_name.is_empty() {
573 if let Some(remote_def) = v0s.par_iter().find_map_first(|(def_v0, new_def)| {
574 if let Some(elements) = def_v0.xsd_element.get(1) {
575 if let Some(ref table_name) = elements.name {
576 if table_name == &remote_table_name {
577 Some(new_def)
578 } else { None }
579 } else { None }
580 } else { None }
581 }) {
582
583 // No fucking clue if ANY reference is to a multikey table, but if is, we'll use the first key as ref key, and the rest as lookups.
584 let primary_keys = remote_def.fields.iter().filter(|x| x.primary_key == "1" || x.name == "key").collect::<Vec<_>>();
585 if !primary_keys.is_empty() {
586 for field in &mut new_def.fields {
587 if field.name == index.key.trim() {
588 field.column_source_table = Some(remote_table_name.to_string());
589 field.column_source_column = Some(primary_keys.iter().map(|x| x.name.to_string()).collect());
590 }
591 }
592 }
593 }
594 }
595 }
596 }
597 }
598 }
599 }
600 }
601 new_def
602 })
603 .collect())
604 }
605 _ => Err(RLibError::AssemblyKitUnsupportedVersion(version))
606 }
607 }
608
609 /// Parses a single Assembly Kit definition file.
610 ///
611 /// Reads and parses one table definition file from the Assembly Kit.
612 ///
613 /// # Arguments
614 ///
615 /// * `raw_definition_path` - Path to the definition file (e.g., `TWaD_units_tables.xml`)
616 /// * `version` - Assembly Kit version (1 = Shogun 2, 2 = Rome 2+)
617 ///
618 /// # Returns
619 ///
620 /// Returns the parsed [`RawDefinition`] with the table name set to the filename
621 /// without the `TWaD_` prefix (e.g., `"units_tables.xml"`).
622 ///
623 /// # Errors
624 ///
625 /// Returns an error if:
626 /// - The version is not 1 or 2 (use [`RawDefinitionV0::read()`] for version 0)
627 /// - The file cannot be opened (returns [`RLibError::AssemblyKitNotFound`])
628 /// - The XML is malformed
629 ///
630 /// # Note
631 ///
632 /// For Version 0 (Empire/Napoleon), use [`RawDefinitionV0::read()`] instead as the
633 /// file format is completely different (.xsd vs .xml).
634 pub fn read(raw_definition_path: &Path, version: i16) -> Result<Self> {
635 match version {
636 2 | 1 => {
637 let definition_file = BufReader::new(File::open(raw_definition_path).map_err(|_| RLibError::AssemblyKitNotFound)?);
638 let mut definition: Self = from_reader(definition_file)?;
639 definition.name = Some(raw_definition_path.file_name().unwrap().to_str().unwrap().split_at(5).1.to_string());
640 Ok(definition)
641 }
642
643 _ => Err(RLibError::AssemblyKitUnsupportedVersion(version))
644 }
645 }
646
647 /// Filters out localisable fields from the definition.
648 ///
649 /// Returns only the fields that are not marked as localisable (translatable) and
650 /// are present in the test row data. This is used when processing Assembly Kit
651 /// table data to separate regular fields from translation fields.
652 ///
653 /// # Arguments
654 ///
655 /// * `raw_localisable_fields` - List of all localisable fields from `TExc_LocalisableFields.xml`
656 /// * `test_row` - Sample row data used to verify field presence
657 ///
658 /// # Returns
659 ///
660 /// Returns a vector of [`Field`] instances for non-localisable fields that exist
661 /// in the test data.
662 ///
663 /// # Note
664 ///
665 /// Fields are excluded if:
666 /// - They're listed in `raw_localisable_fields` for this table
667 /// - They don't appear in the test row
668 /// - They have a "state" attribute (marked as modified/deprecated)
669 pub fn get_non_localisable_fields(&self, raw_localisable_fields: &[RawLocalisableField], test_row: &RawTableRow) -> Vec<Field> {
670 let raw_table_name = &self.name.as_ref().unwrap()[..self.name.as_ref().unwrap().len() - 4];
671 let localisable_fields_names = raw_localisable_fields.iter()
672 .filter(|x| x.table_name == raw_table_name)
673 .map(|x| &*x.field)
674 .collect::<Vec<&str>>();
675
676 self.fields.iter()
677 .filter(|x| match test_row.fields.iter().find(|y| x.name == y.field_name) {
678 Some(y) => y.state.is_none(),
679 None => false,
680 })
681 .filter(|x| !localisable_fields_names.contains(&&*x.name))
682 .map(From::from)
683 .collect::<Vec<Field>>()
684 }
685}
686
687impl From<&RawDefinition> for Definition {
688 fn from(raw_definition: &RawDefinition) -> Self {
689 let fields = raw_definition.fields.iter().map(From::from).collect::<Vec<_>>();
690 Self::new_with_fields(-100, &fields, &[], None)
691 }
692}
693
694
695impl From<&RawField> for Field {
696 fn from(raw_field: &RawField) -> Self {
697
698 let is_old_game = raw_field.is_old_game.unwrap_or(false);
699
700 let field_type = match &*raw_field.field_type {
701 "yesno" => FieldType::Boolean,
702 "single" => FieldType::F32,
703 "double" => FieldType::F64,
704 "integer" => FieldType::I32,
705 "autonumber" | "card64" => FieldType::I64,
706 "colour" => FieldType::ColourRGB,
707 "expression" | "text" => {
708 if raw_field.required == "1" {
709 if is_old_game {
710 FieldType::StringU16
711 } else {
712 FieldType::StringU8
713 }
714 }
715 else if is_old_game {
716 FieldType::OptionalStringU16
717 } else {
718 FieldType::OptionalStringU8
719 }
720 },
721 _ => if is_old_game {
722 FieldType::StringU16
723 } else {
724 FieldType::StringU8
725 },
726 };
727
728 let (is_reference, lookup) = if let Some(x) = &raw_field.column_source_table {
729 if let Some(y) = &raw_field.column_source_column {
730 if y.len() > 1 { (Some((x.to_owned(), y[0].to_owned())), Some(y[1..].to_vec()))}
731 else { (Some((x.to_owned(), y[0].to_owned())), None) }
732 } else { (None, None) }
733 }
734 else { (None, None) };
735
736 // CA sometimes uses comma as separator, and has random spaces between paths.
737 let filename_relative_path = raw_field.filename_relative_path.clone().map(|x| {
738 x.split(',').map(|y| y.trim()).join(";")
739 });
740
741 // Some fields are marked as filename, but only have fragment paths, which do not seem to correlate to game file paths.
742 // We need to disable those to avoid false positives on diagnostics.
743 let is_filename = match raw_field.is_filename {
744 Some(_) => !(raw_field.fragment_path.is_some() && raw_field.filename_relative_path.is_none()),
745 None => false,
746 };
747
748 Self {
749 name: raw_field.name.to_owned(),
750 field_type,
751 is_key: raw_field.primary_key == "1",
752 default_value: raw_field.default_value.clone(),
753 is_filename,
754 filename_relative_path,
755 is_reference,
756 lookup,
757 description: if let Some(x) = &raw_field.field_description { x.to_owned() } else { String::new() },
758 ..Default::default()
759 }
760 }
761}
762
763impl RawDefinitionV0 {
764
765 /// Parses a Version 0 (Empire/Napoleon) XSD schema file.
766 ///
767 /// Reads and parses an XSD (XML Schema Definition) file from the Empire or
768 /// Napoleon Assembly Kit. The XSD format is significantly different from the
769 /// `TWaD_` format used in later games.
770 ///
771 /// # Arguments
772 ///
773 /// * `raw_definition_path` - Path to the `.xsd` file
774 ///
775 /// # Returns
776 ///
777 /// Returns `Ok(Some(definition))` if the file was parsed successfully, `Ok(None)`
778 /// if the file was empty, or an error if parsing failed.
779 ///
780 /// # Errors
781 ///
782 /// Returns an error if:
783 /// - The file cannot be opened (returns [`RLibError::AssemblyKitNotFound`])
784 /// - The XML/XSD is malformed
785 ///
786 /// # Implementation Note
787 ///
788 /// Due to limitations in `serde_xml_rs`, this function performs extensive string
789 /// replacements on the XSD content before parsing to normalize XML namespace
790 /// prefixes (`xsd:` and `xs:` → `xsd_`, `od:` → `od_`).
791 pub fn read(raw_definition_path: &Path) -> Result<Option<Self>> {
792 let mut definition_file = BufReader::new(File::open(raw_definition_path).map_err(|_| RLibError::AssemblyKitNotFound)?);
793
794 // Before deserializing the data, due to limitations of serde_xml_rs, we have to rename all rows, because unique names for
795 // rows in each file is not supported for deserializing. Same for the fields, we have to change them to something more generic.
796 let mut buffer = String::new();
797 definition_file.read_to_string(&mut buffer)?;
798
799 if buffer.is_empty() {
800 return Ok(None)
801 }
802
803 // Rust doesn't like : in variable names when deserializing.
804 buffer = buffer.replace("xsd:schema", "xsd_schema");
805 buffer = buffer.replace("xsd:element", "xsd_element");
806 buffer = buffer.replace("xsd:complexType", "xsd_complexType");
807 buffer = buffer.replace("xsd:sequence", "xsd_sequence");
808 buffer = buffer.replace("xsd:attribute", "xsd_attribute");
809 buffer = buffer.replace("xsd:annotation", "xsd_annotation");
810 buffer = buffer.replace("xsd:appinfo", "xsd_appinfo");
811 buffer = buffer.replace("od:index", "od_index");
812 buffer = buffer.replace("xsd:sequence", "xsd_sequence");
813 buffer = buffer.replace("xsd:simpleType", "xsd_simpleType");
814 buffer = buffer.replace("xsd:restriction", "xsd_restriction");
815 buffer = buffer.replace("xsd:maxLength", "xsd_maxLength");
816 buffer = buffer.replace("od:jetType", "od_jetType");
817
818 buffer = buffer.replace("xs:schema", "xsd_schema");
819 buffer = buffer.replace("xs:element", "xsd_element");
820 buffer = buffer.replace("xs:complexType", "xsd_complexType");
821 buffer = buffer.replace("xs:sequence", "xsd_sequence");
822 buffer = buffer.replace("xs:attribute", "xsd_attribute");
823 buffer = buffer.replace("xs:annotation", "xsd_annotation");
824 buffer = buffer.replace("xs:appinfo", "xsd_appinfo");
825 buffer = buffer.replace("xs:sequence", "xsd_sequence");
826 buffer = buffer.replace("xs:simpleType", "xsd_simpleType");
827 buffer = buffer.replace("xs:restriction", "xsd_restriction");
828 buffer = buffer.replace("xs:maxLength", "xsd_maxLength");
829
830 // Only if the table has data we deserialize it. If not, we just create an empty one.
831 let definition: RawDefinitionV0 = from_reader(buffer.as_bytes())?;
832
833 //dbg!(&definition);
834 Ok(Some(definition))
835 }
836}
837
838/// Old games don't use references, but rather indexes like a database. This means we're unable to find
839/// the referenced column without having the reference definition. So ref data needs to be calculated after this.
840impl From<&RawDefinitionV0> for RawDefinition {
841 fn from(value: &RawDefinitionV0) -> Self {
842 let mut definition = Self::default();
843
844 // Second element has the fields.
845 if let Some(elements) = value.xsd_element.get(1) {
846 definition.name = elements.name.clone().map(|x| format!("{x}.xml"));
847
848 // Try to get the indexes to check what do we need to mark as key.
849 let primary_keys = if let Some(ref ann) = elements.xsd_annotation {
850 if let Some(ref app) = ann.xsd_appinfo {
851 if let Some(ref od_index) = app.od_index {
852 od_index.iter().find_map(|index| {
853 if index.name == "PrimaryKey" {
854
855 // Always trim to remove the final space, then split by space to find all the keys of the table.
856 let keys = index.key.trim().split(' ').collect::<Vec<_>>();
857 if keys.is_empty() {
858 None
859 } else {
860 Some(keys)
861 }
862 } else {
863 None
864 }
865 }).unwrap_or(vec![])
866 } else { vec![] }
867 } else { vec![] }
868 } else { vec![] };
869
870 if let Some(complex) = &elements.xsd_complex_type {
871 if let Some(elements) = complex.first() {
872 for element in &elements.xsd_sequence.xsd_element {
873
874 // For a field to be valid we need name and type.
875 if let Some(ref name) = element.name {
876 if let Some(ref jet_type) = element.jet_type {
877
878 let mut field = RawField {
879 name: name.to_owned(),
880 field_type: match &**jet_type {
881 "yesno" => "yesno".to_owned(),
882 "integer" => "integer".to_owned(),
883 "longinteger" | "autonumber" => "autonumber".to_owned(),
884 "decimal" | "single" => "single".to_owned(),
885 "double" => "double".to_owned(),
886 "text" | "memo" | "oleobject" | "replicationid" => "text".to_owned(),
887
888 // These are dates as in a DateTime format. Treat them as text for now.
889 "datetime" => "text".to_owned(),
890
891 _ => todo!("{}", jet_type),
892 },
893 ..Default::default()
894 };
895
896 if primary_keys.contains(&&*field.name) {
897 field.primary_key = "1".to_owned();
898 } else {
899 field.primary_key = "0".to_owned();
900 }
901
902 field.is_old_game = Some(true);
903
904 definition.fields.push(field);
905 }
906 }
907 }
908 }
909 }
910 }
911
912 definition
913 }
914}