rpfm_lib/compression/mod.rs
1//---------------------------------------------------------------------------//
2// Copyright (c) 2017-2026 Ismael Gutiérrez González. All rights reserved.
3//
4// This file is part of the Rusted PackFile Manager (RPFM) project,
5// which can be found here: https://github.com/Frodo45127/rpfm.
6//
7// This file is licensed under the MIT license, which can be found here:
8// https://github.com/Frodo45127/rpfm/blob/master/LICENSE.
9//---------------------------------------------------------------------------//
10
11//! This module contains the code to compress/decompress data for Total War games.
12//!
13//! The traits [`Compressible`] and [`Decompressible`] within this module contain functions to compress and decompress
14//! data from/to CA's different supported compression formats. Implementations of these two traits for &[[`u8`]] are provided within this module.
15//!
16//! # Supported Formats
17//!
18//! See [`CompressionFormat`] for details on the supported compression formats (LZMA1, Lz4, Zstd) and their file structure.
19//!
20//! # Important Notes
21//!
22//! * Due to an game bug, compressing tables tends to cause crashes when starting for some people. This bug seems to have been fixed in WH3, but all other games before WH3
23//! may still suffer from it, so unless manually forced to, this lib will not compress tables in those games. Tables will only be compressed in WH3 and newer games.
24//!
25//! * Compressed files are **only supported on PFH5 Packs** (Since Total War: Warhammer 2).
26
27use lz4_flex::frame::{FrameDecoder, FrameEncoder};
28use lzma_rs::{lzma_compress, lzma_decompress};
29use serde_derive::{Serialize, Deserialize};
30
31use std::fmt::Display;
32use std::io::{Cursor, Read, Seek, Write};
33
34use crate::binary::{ReadBytes, WriteBytes};
35use crate::error::{RLibError, Result};
36
37#[cfg(test)]
38mod test;
39
40// LZMA Alone doesn't have a defined magic number, but it always starts with one of these, depending on the compression level.
41const MAGIC_NUMBERS_LZMA: [u32; 9] = [
42 0x0100005D,
43 0x1000005D,
44 0x0800005D,
45 0x1000005D,
46 0x2000005D,
47 0x4000005D,
48 0x8000005D,
49 0x0000005D,
50 0x0400005D,
51];
52const MAGIC_NUMBER_LZ4: u32 = 0x184D2204;
53const MAGIC_NUMBER_ZSTD: u32 = 0xfd2fb528;
54
55//---------------------------------------------------------------------------//
56// Traits
57//---------------------------------------------------------------------------//
58
59/// Internal trait to implement compression over a data type.
60///
61/// See also [`Decompressible`] for the reverse operation.
62pub trait Compressible {
63
64 /// This function compress the data of a file, returning the compressed data.
65 ///
66 /// # Arguments
67 ///
68 /// * `format` - The [`CompressionFormat`] to use for compression.
69 ///
70 /// # Returns
71 ///
72 /// A [`Vec<u8>`] containing the compressed data, or an error if compression failed.
73 fn compress(&self, format: CompressionFormat) -> Result<Vec<u8>>;
74}
75
76/// Internal trait to implement decompression over a data type.
77///
78/// See also [`Compressible`] for the reverse operation.
79pub trait Decompressible {
80
81 /// This function decompress the provided data, returning the decompressed data, or an error if the decompression failed.
82 ///
83 /// Compression format is auto-detected using each format's magic numbers. See [`CompressionFormat`] for details
84 /// on the supported formats.
85 ///
86 /// # Returns
87 ///
88 /// A [`Vec<u8>`] containing the decompressed data, or an error if decompression failed.
89 fn decompress(&self) -> Result<Vec<u8>>;
90}
91
92/// Compression formats supported by TW Games.
93///
94/// Not all games support all formats. Check their game info to know what formats each game support.
95#[derive(Debug, Copy, Clone, Default, PartialEq, Serialize, Deserialize)]
96pub enum CompressionFormat {
97
98 /// Dummy variant to disable compression.
99 #[default]None,
100
101 /// Legacy format. Supported by all PFH5 games (all Post-WH2 games).
102 ///
103 /// Specifically, Total War games use the Non-Streamed LZMA1 format with the following custom header:
104 ///
105 /// | Bytes | Type | Data |
106 /// | ----- | -------- | ----------------------------------------------------------------------------------- |
107 /// | 4 | [`u32`] | Uncompressed size (as u32, max at 4GB). |
108 /// | 1 | [`u8`] | LZMA model properties (lc, lp, pb) in encoded form... I think. Usually it's `0x5D`. |
109 /// | 4 | [`u32`] | Dictionary size (as u32)... I think. It's usually `[0x00, 0x00, 0x40, 0x00]`. |
110 ///
111 /// For reference, a normal Non-Streamed LZMA1 header (from the original spec) contains:
112 ///
113 /// | Bytes | Type | Data |
114 /// | ----- | -------- | ----------------------------------------------------------- |
115 /// | 1 | [`u8`] | LZMA model properties (lc, lp, pb) in encoded form. |
116 /// | 4 | [`u32`] | Dictionary size (32-bit unsigned integer, little-endian). |
117 /// | 8 | [`u64`] | Uncompressed size (64-bit unsigned integer, little-endian). |
118 ///
119 /// This means one has to move the uncompressed size to the correct place in order for a compressed file to be readable,
120 /// and one has to remove the uncompressed size and prepend it to the file in order for the game to read the compressed file.
121 Lzma1,
122
123 /// New format introduced in WH3 6.2.
124 ///
125 /// This is a standard Lz4 implementation, with the following tweaks:
126 ///
127 /// | Bytes | Type | Data |
128 /// | ----- | --------- | --------------------------------------------- |
129 /// | 4 | [`u32`] | Uncompressed size (as u32, max at 4GB). |
130 /// | * | &[[`u8`]] | Lz4 data, starting with the Lz4 Magic Number. |
131 Lz4,
132
133 /// New format introduced in WH3 6.2.
134 ///
135 /// This is a standard Zstd implementation, with the following tweaks:
136 ///
137 /// | Bytes | Type | Data |
138 /// | ----- | --------- | ----------------------------------------------- |
139 /// | 4 | [`u32`] | Uncompressed size (as u32, max at 4GB). |
140 /// | * | &[[`u8`]] | Zstd data, starting with the Zstd Magic Number. |
141 ///
142 /// By default the Zstd compression is done with the checksum and content size flags enabled.
143 Zstd,
144}
145
146//---------------------------------------------------------------------------//
147// Implementations
148//---------------------------------------------------------------------------//
149
150impl Compressible for [u8] {
151 fn compress(&self, format: CompressionFormat) -> Result<Vec<u8>> {
152 match format {
153 CompressionFormat::None => Ok(self.to_vec()),
154 CompressionFormat::Lzma1 => {
155 let mut dst = vec![];
156 dst.write_i32(self.len() as i32)?;
157
158 let mut compressed_data = vec![];
159 let mut src = Cursor::new(self);
160 lzma_compress(&mut src, &mut compressed_data).unwrap();
161
162 if compressed_data.len() < 13 {
163 return Err(RLibError::DataCannotBeCompressed);
164 }
165
166 dst.extend_from_slice(&compressed_data[..5]);
167 dst.extend_from_slice(&compressed_data[13..]);
168
169 Ok(dst)
170 },
171 CompressionFormat::Lz4 => {
172 let mut dst = vec![];
173 dst.write_u32(self.len() as u32)?;
174
175 let mut encoder = FrameEncoder::new(&mut dst);
176 encoder.write_all(self)?;
177 encoder.finish()?;
178
179 Ok(dst)
180 },
181 CompressionFormat::Zstd => {
182 let mut dst = vec![];
183 dst.write_u32(self.len() as u32)?;
184
185 let mut encoder = zstd::Encoder::new(&mut dst, 3)?;
186 encoder.include_checksum(true)?;
187 encoder.include_contentsize(true)?;
188 encoder.set_pledged_src_size(Some(self.len() as u64))?;
189
190 let mut src = Cursor::new(self.to_vec());
191 std::io::copy(&mut src, &mut encoder)?;
192 encoder.finish()?;
193 Ok(dst)
194 },
195 }
196 }
197}
198
199impl Decompressible for &[u8] {
200 fn decompress(&self) -> Result<Vec<u8>> {
201 if self.is_empty() {
202 return Ok(vec![]);
203 }
204
205 // We use the magic numbers to know in what format are the files compressed.
206 let mut src = Cursor::new(self);
207 let u_size = src.read_u32()?;
208 let magic_number = src.read_u32()?;
209
210 let format = if magic_number == MAGIC_NUMBER_ZSTD {
211 CompressionFormat::Zstd
212 } else if magic_number == MAGIC_NUMBER_LZ4 {
213 CompressionFormat::Lz4
214 } else if MAGIC_NUMBERS_LZMA.contains(&magic_number) {
215 CompressionFormat::Lzma1
216 }
217
218 // Special case files marked as compressed but not being compressed. This allows fixing them so they're readable again.
219 else {
220 CompressionFormat::None
221 };
222
223 // Fix the starting position of the file before processing it.
224 src.seek_relative(-4)?;
225
226 match format {
227 CompressionFormat::None => Ok(self.to_vec()),
228 CompressionFormat::Lzma1 => {
229
230 // LZMA1 headers have 13 bytes, but we only have 9 due to using a u32 size.
231 if self.len() < 9 {
232 return Err(RLibError::DataCannotBeDecompressed);
233 }
234
235 // Unlike other formats, in this one we need to inject the uncompressed size in the file header. Otherwise it won't be a valid lzma file.
236 let mut fixed_data: Vec<u8> = Vec::with_capacity(self.len() + 4);
237 fixed_data.extend_from_slice(&src.read_slice(5, false)?);
238 fixed_data.write_u64(u_size as u64)?;
239 src.read_to_end(&mut fixed_data)?;
240
241 // Vanilla compressed files are LZMA Alone (or legacy) level 3 compressed files, reproducible by compressing them
242 // with default settings with 7-Zip. This should do the trick to get them decoded.
243 let mut dst = Vec::with_capacity(u_size as usize);
244 let mut reader = Cursor::new(fixed_data);
245 let result = lzma_decompress(&mut reader, &mut dst);
246
247 // Ok, history lesson. That method breaks sometimes due to difference in program's behavior when reading LZMA1 files with uncompressed size set.
248 // If that fails, we try passing a unknown size (u64::MAX) instead. This usually deals with the errors.
249 if result.is_err() {
250 src.set_position(4);
251
252 let mut fixed_data = Vec::with_capacity(self.len() + 4);
253 fixed_data.extend_from_slice(&src.read_slice(5, false)?);
254 fixed_data.write_u64(u64::MAX)?;
255 src.read_to_end(&mut fixed_data)?;
256
257 let mut dst = Vec::with_capacity(u_size as usize);
258 let mut reader = Cursor::new(fixed_data);
259 lzma_decompress(&mut reader, &mut dst)?;
260
261 Ok(dst)
262 } else {
263 Ok(dst)
264 }
265 },
266 CompressionFormat::Lz4 => {
267 let mut dst = Vec::with_capacity(u_size as usize);
268 let mut reader = FrameDecoder::new(src);
269 std::io::copy(&mut reader, &mut dst)?;
270 Ok(dst)
271 },
272 CompressionFormat::Zstd => {
273 let mut dst = Vec::with_capacity(u_size as usize);
274 zstd::stream::copy_decode(src, &mut dst)?;
275 Ok(dst)
276 },
277 }
278 }
279}
280
281impl From<&str> for CompressionFormat {
282 fn from(value: &str) -> Self {
283 match value {
284 "Lzma1" => Self::Lzma1,
285 "Lz4" => Self::Lz4,
286 "Zstd" => Self::Zstd,
287 _ => Self::None,
288 }
289 }
290}
291
292impl Display for CompressionFormat {
293 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
294 match self {
295 Self::Lzma1 => write!(f, "Lzma1"),
296 Self::Lz4 => write!(f, "Lz4"),
297 Self::Zstd => write!(f, "Zstd"),
298 Self::None => write!(f, "None"),
299 }
300 }
301}