Skip to main content

rpfm_lib/compression/
mod.rs

1//---------------------------------------------------------------------------//
2// Copyright (c) 2017-2026 Ismael Gutiérrez González. All rights reserved.
3//
4// This file is part of the Rusted PackFile Manager (RPFM) project,
5// which can be found here: https://github.com/Frodo45127/rpfm.
6//
7// This file is licensed under the MIT license, which can be found here:
8// https://github.com/Frodo45127/rpfm/blob/master/LICENSE.
9//---------------------------------------------------------------------------//
10
11//! This module contains the code to compress/decompress data for Total War games.
12//!
13//! The traits [`Compressible`] and [`Decompressible`] within this module contain functions to compress and decompress
14//! data from/to CA's different supported compression formats. Implementations of these two traits for &[[`u8`]] are provided within this module.
15//!
16//! # Supported Formats
17//!
18//! See [`CompressionFormat`] for details on the supported compression formats (LZMA1, Lz4, Zstd) and their file structure.
19//!
20//! # Important Notes
21//!
22//! * Due to an game bug, compressing tables tends to cause crashes when starting for some people. This bug seems to have been fixed in WH3, but all other games before WH3
23//!   may still suffer from it, so unless manually forced to, this lib will not compress tables in those games. Tables will only be compressed in WH3 and newer games.
24//!
25//! * Compressed files are **only supported on PFH5 Packs** (Since Total War: Warhammer 2).
26
27use lz4_flex::frame::{FrameDecoder, FrameEncoder};
28use lzma_rs::{lzma_compress, lzma_decompress};
29use serde_derive::{Serialize, Deserialize};
30
31use std::fmt::Display;
32use std::io::{Cursor, Read, Seek, Write};
33
34use crate::binary::{ReadBytes, WriteBytes};
35use crate::error::{RLibError, Result};
36
37#[cfg(test)]
38mod test;
39
40// LZMA Alone doesn't have a defined magic number, but it always starts with one of these, depending on the compression level.
41const MAGIC_NUMBERS_LZMA: [u32; 9] = [
42    0x0100005D,
43    0x1000005D,
44    0x0800005D,
45    0x1000005D,
46    0x2000005D,
47    0x4000005D,
48    0x8000005D,
49    0x0000005D,
50    0x0400005D,
51];
52const MAGIC_NUMBER_LZ4: u32 = 0x184D2204;
53const MAGIC_NUMBER_ZSTD: u32 = 0xfd2fb528;
54
55//---------------------------------------------------------------------------//
56//                                  Traits
57//---------------------------------------------------------------------------//
58
59/// Internal trait to implement compression over a data type.
60///
61/// See also [`Decompressible`] for the reverse operation.
62pub trait Compressible {
63
64    /// This function compress the data of a file, returning the compressed data.
65    ///
66    /// # Arguments
67    ///
68    /// * `format` - The [`CompressionFormat`] to use for compression.
69    ///
70    /// # Returns
71    ///
72    /// A [`Vec<u8>`] containing the compressed data, or an error if compression failed.
73    fn compress(&self, format: CompressionFormat) -> Result<Vec<u8>>;
74}
75
76/// Internal trait to implement decompression over a data type.
77///
78/// See also [`Compressible`] for the reverse operation.
79pub trait Decompressible {
80
81    /// This function decompress the provided data, returning the decompressed data, or an error if the decompression failed.
82    ///
83    /// Compression format is auto-detected using each format's magic numbers. See [`CompressionFormat`] for details
84    /// on the supported formats.
85    ///
86    /// # Returns
87    ///
88    /// A [`Vec<u8>`] containing the decompressed data, or an error if decompression failed.
89    fn decompress(&self) -> Result<Vec<u8>>;
90}
91
92/// Compression formats supported by TW Games.
93///
94/// Not all games support all formats. Check their game info to know what formats each game support.
95#[derive(Debug, Copy, Clone, Default, PartialEq, Serialize, Deserialize)]
96pub enum CompressionFormat {
97
98    /// Dummy variant to disable compression.
99    #[default]None,
100
101    /// Legacy format. Supported by all PFH5 games (all Post-WH2 games).
102    ///
103    /// Specifically, Total War games use the Non-Streamed LZMA1 format with the following custom header:
104    ///
105    /// | Bytes | Type     | Data                                                                                |
106    /// | ----- | -------- | ----------------------------------------------------------------------------------- |
107    /// |  4    | [`u32`]  | Uncompressed size (as u32, max at 4GB).                                             |
108    /// |  1    | [`u8`]   | LZMA model properties (lc, lp, pb) in encoded form... I think. Usually it's `0x5D`. |
109    /// |  4    | [`u32`]  | Dictionary size (as u32)... I think. It's usually `[0x00, 0x00, 0x40, 0x00]`.       |
110    ///
111    /// For reference, a normal Non-Streamed LZMA1 header (from the original spec) contains:
112    ///
113    /// | Bytes | Type     | Data                                                        |
114    /// | ----- | -------- | ----------------------------------------------------------- |
115    /// |  1    | [`u8`]   | LZMA model properties (lc, lp, pb) in encoded form.         |
116    /// |  4    | [`u32`]  | Dictionary size (32-bit unsigned integer, little-endian).   |
117    /// |  8    | [`u64`]  | Uncompressed size (64-bit unsigned integer, little-endian). |
118    ///
119    /// This means one has to move the uncompressed size to the correct place in order for a compressed file to be readable,
120    /// and one has to remove the uncompressed size and prepend it to the file in order for the game to read the compressed file.
121    Lzma1,
122
123    /// New format introduced in WH3 6.2.
124    ///
125    /// This is a standard Lz4 implementation, with the following tweaks:
126    ///
127    /// | Bytes | Type      | Data                                          |
128    /// | ----- | --------- | --------------------------------------------- |
129    /// |  4    | [`u32`]   | Uncompressed size (as u32, max at 4GB).       |
130    /// |  *    | &[[`u8`]] | Lz4 data, starting with the Lz4 Magic Number. |
131    Lz4,
132
133    /// New format introduced in WH3 6.2.
134    ///
135    /// This is a standard Zstd implementation, with the following tweaks:
136    ///
137    /// | Bytes | Type      | Data                                            |
138    /// | ----- | --------- | ----------------------------------------------- |
139    /// |  4    | [`u32`]   | Uncompressed size (as u32, max at 4GB).         |
140    /// |  *    | &[[`u8`]] | Zstd data, starting with the Zstd Magic Number. |
141    ///
142    /// By default the Zstd compression is done with the checksum and content size flags enabled.
143    Zstd,
144}
145
146//---------------------------------------------------------------------------//
147//                              Implementations
148//---------------------------------------------------------------------------//
149
150impl Compressible for [u8] {
151    fn compress(&self, format: CompressionFormat) -> Result<Vec<u8>> {
152        match format {
153            CompressionFormat::None => Ok(self.to_vec()),
154            CompressionFormat::Lzma1 => {
155                let mut dst = vec![];
156                dst.write_i32(self.len() as i32)?;
157
158                let mut compressed_data = vec![];
159                let mut src = Cursor::new(self);
160                lzma_compress(&mut src, &mut compressed_data).unwrap();
161
162                if compressed_data.len() < 13 {
163                    return Err(RLibError::DataCannotBeCompressed);
164                }
165
166                dst.extend_from_slice(&compressed_data[..5]);
167                dst.extend_from_slice(&compressed_data[13..]);
168
169                Ok(dst)
170            },
171            CompressionFormat::Lz4 => {
172                let mut dst = vec![];
173                dst.write_u32(self.len() as u32)?;
174
175                let mut encoder = FrameEncoder::new(&mut dst);
176                encoder.write_all(self)?;
177                encoder.finish()?;
178
179                Ok(dst)
180            },
181            CompressionFormat::Zstd => {
182                let mut dst = vec![];
183                dst.write_u32(self.len() as u32)?;
184
185                let mut encoder = zstd::Encoder::new(&mut dst, 3)?;
186                encoder.include_checksum(true)?;
187                encoder.include_contentsize(true)?;
188                encoder.set_pledged_src_size(Some(self.len() as u64))?;
189
190                let mut src = Cursor::new(self.to_vec());
191                std::io::copy(&mut src, &mut encoder)?;
192                encoder.finish()?;
193                Ok(dst)
194            },
195        }
196    }
197}
198
199impl Decompressible for &[u8] {
200    fn decompress(&self) -> Result<Vec<u8>> {
201        if self.is_empty() {
202            return Ok(vec![]);
203        }
204
205        // We use the magic numbers to know in what format are the files compressed.
206        let mut src = Cursor::new(self);
207        let u_size = src.read_u32()?;
208        let magic_number = src.read_u32()?;
209
210        let format = if magic_number == MAGIC_NUMBER_ZSTD {
211            CompressionFormat::Zstd
212        } else if magic_number == MAGIC_NUMBER_LZ4 {
213            CompressionFormat::Lz4
214        } else if MAGIC_NUMBERS_LZMA.contains(&magic_number) {
215            CompressionFormat::Lzma1
216        }
217
218        // Special case files marked as compressed but not being compressed. This allows fixing them so they're readable again.
219        else {
220            CompressionFormat::None
221        };
222
223        // Fix the starting position of the file before processing it.
224        src.seek_relative(-4)?;
225
226        match format {
227            CompressionFormat::None => Ok(self.to_vec()),
228            CompressionFormat::Lzma1 => {
229
230                // LZMA1 headers have 13 bytes, but we only have 9 due to using a u32 size.
231                if self.len() < 9 {
232                    return Err(RLibError::DataCannotBeDecompressed);
233                }
234
235                // Unlike other formats, in this one we need to inject the uncompressed size in the file header. Otherwise it won't be a valid lzma file.
236                let mut fixed_data: Vec<u8> = Vec::with_capacity(self.len() + 4);
237                fixed_data.extend_from_slice(&src.read_slice(5, false)?);
238                fixed_data.write_u64(u_size as u64)?;
239                src.read_to_end(&mut fixed_data)?;
240
241                // Vanilla compressed files are LZMA Alone (or legacy) level 3 compressed files, reproducible by compressing them
242                // with default settings with 7-Zip. This should do the trick to get them decoded.
243                let mut dst = Vec::with_capacity(u_size as usize);
244                let mut reader = Cursor::new(fixed_data);
245                let result = lzma_decompress(&mut reader, &mut dst);
246
247                // Ok, history lesson. That method breaks sometimes due to difference in program's behavior when reading LZMA1 files with uncompressed size set.
248                // If that fails, we try passing a unknown size (u64::MAX) instead. This usually deals with the errors.
249                if result.is_err() {
250                    src.set_position(4);
251
252                    let mut fixed_data = Vec::with_capacity(self.len() + 4);
253                    fixed_data.extend_from_slice(&src.read_slice(5, false)?);
254                    fixed_data.write_u64(u64::MAX)?;
255                    src.read_to_end(&mut fixed_data)?;
256
257                    let mut dst = Vec::with_capacity(u_size as usize);
258                    let mut reader = Cursor::new(fixed_data);
259                    lzma_decompress(&mut reader, &mut dst)?;
260
261                    Ok(dst)
262                } else {
263                    Ok(dst)
264                }
265            },
266            CompressionFormat::Lz4 => {
267                let mut dst = Vec::with_capacity(u_size as usize);
268                let mut reader = FrameDecoder::new(src);
269                std::io::copy(&mut reader, &mut dst)?;
270                Ok(dst)
271            },
272            CompressionFormat::Zstd => {
273                let mut dst = Vec::with_capacity(u_size as usize);
274                zstd::stream::copy_decode(src, &mut dst)?;
275                Ok(dst)
276            },
277        }
278    }
279}
280
281impl From<&str> for CompressionFormat {
282    fn from(value: &str) -> Self {
283        match value {
284            "Lzma1" => Self::Lzma1,
285            "Lz4" => Self::Lz4,
286            "Zstd" => Self::Zstd,
287            _ => Self::None,
288        }
289    }
290}
291
292impl Display for CompressionFormat {
293    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
294        match self {
295            Self::Lzma1 => write!(f, "Lzma1"),
296            Self::Lz4 => write!(f, "Lz4"),
297            Self::Zstd => write!(f, "Zstd"),
298            Self::None => write!(f, "None"),
299        }
300    }
301}