rpfm_lib/utils.rs
1//---------------------------------------------------------------------------//
2// Copyright (c) 2017-2026 Ismael Gutiérrez González. All rights reserved.
3//
4// This file is part of the Rusted PackFile Manager (RPFM) project,
5// which can be found here: https://github.com/Frodo45127/rpfm.
6//
7// This file is licensed under the MIT license, which can be found here:
8// https://github.com/Frodo45127/rpfm/blob/master/LICENSE.
9//---------------------------------------------------------------------------//
10
11//! Generic utility functions for the crate.
12//!
13//! This module contains miscellaneous utility functions that don't fit into more specific modules.
14//! Functions are organized into categories:
15//!
16//! - **Generic utils**: String parsing, case-insensitive operations, line/column calculations
17//! - **Path utils**: File and folder enumeration, path sanitization, absolute path conversion
18//! - **Time utils**: File modification time queries, current time helpers
19//! - **Pelite utils**: PE (Windows executable) file inspection
20//! - **VWise utils**: WWise audio hash generation
21//! - **Filename sanitization**: Windows-compatible filename cleaning
22//! - **Decoder utils**: Size mismatch validation for binary decoders
23
24use pelite::pe64;
25use pelite::resources::{FindError, Resources, version_info::VersionInfo};
26use rayon::prelude::*;
27
28use std::cmp::Ordering;
29use std::fs::{canonicalize, read_dir, File};
30use std::path::{Path, PathBuf};
31use std::time::{SystemTime, UNIX_EPOCH};
32
33use crate::error::{RLibError, Result};
34
35//--------------------------------------------------------//
36// Generic utils.
37//--------------------------------------------------------//
38
39/// Parses a string to a boolean value.
40///
41/// Accepts common boolean representations (case-insensitive):
42/// - `true` or `"1"` → `true`
43/// - `false` or `"0"` → `false`
44///
45/// # Arguments
46///
47/// * `string` - The string to parse
48///
49/// # Returns
50///
51/// Returns `Ok(bool)` if the string is a valid boolean representation, or
52/// `Err` if the string cannot be parsed.
53///
54/// # Examples
55///
56/// ```
57/// # use rpfm_lib::utils::parse_str_as_bool;
58/// assert_eq!(parse_str_as_bool("true").unwrap(), true);
59/// assert_eq!(parse_str_as_bool("1").unwrap(), true);
60/// assert_eq!(parse_str_as_bool("FALSE").unwrap(), false);
61/// assert_eq!(parse_str_as_bool("0").unwrap(), false);
62/// assert!(parse_str_as_bool("maybe").is_err());
63/// ```
64pub fn parse_str_as_bool(string: &str) -> Result<bool> {
65 let str_lower_case = string.to_lowercase();
66 if str_lower_case == "true" || str_lower_case == "1" {
67 Ok(true)
68 }
69 else if str_lower_case == "false" || str_lower_case == "0" {
70 Ok(false)
71 }
72 else {
73 Err(RLibError::ParseBoolError(string.to_owned()))
74 }
75}
76
77/// Checks if a string starts with another string (case-insensitive).
78///
79/// This function performs a case-insensitive prefix check, handling UTF-8 strings correctly
80/// by working with character boundaries rather than byte boundaries.
81///
82/// # Arguments
83///
84/// * `full_str` - The string to check
85/// * `partial_str` - The prefix to look for
86///
87/// # Returns
88///
89/// Returns `true` if `full_str` starts with `partial_str` (ignoring case), `false` otherwise.
90///
91/// # Examples
92///
93/// ```
94/// # use rpfm_lib::utils::starts_with_case_insensitive;
95/// assert!(starts_with_case_insensitive("Hello World", "hello"));
96/// assert!(starts_with_case_insensitive("RPFM", "rpf"));
97/// assert!(!starts_with_case_insensitive("Short", "ThisIsLonger"));
98/// ```
99pub fn starts_with_case_insensitive(full_str: &str, partial_str: &str) -> bool {
100 let full_str_chars = full_str.chars().count();
101 let partial_str_chars = partial_str.chars().count();
102 if full_str_chars > partial_str_chars {
103 let partial_str_len_in_bytes = partial_str.len();
104
105 let full_str_max_index = full_str.char_indices().map(|(index, _)| index).find(|index| index >= &partial_str_len_in_bytes).unwrap_or(full_str.len());
106 let full_str_base = &full_str[..full_str_max_index];
107 caseless::default_caseless_match_str(full_str_base, partial_str)
108 } else {
109 false
110 }
111}
112
113/// Finds the closest valid UTF-8 character boundary at or after the given byte position.
114///
115/// When working with byte indices in UTF-8 strings, you may land in the middle of a
116/// multi-byte character. This function finds the next valid character boundary.
117///
118/// # Arguments
119///
120/// * `string` - The string to search in
121/// * `start_byte` - The byte index to start from
122///
123/// # Returns
124///
125/// Returns the byte index of the next valid character boundary (may be `start_byte` itself
126/// if it's already at a boundary).
127///
128/// # Panics
129///
130/// Panics if `start_byte` is more than 3 bytes away from the next valid boundary, which
131/// should never happen with valid UTF-8 (max character size is 4 bytes).
132pub fn closest_valid_char_byte(string: &str, start_byte: usize) -> usize {
133 if start_byte < string.len() && string.get(start_byte..).is_some() { start_byte }
134 else if start_byte + 1 < string.len() && string.get(start_byte + 1..).is_some() { start_byte + 1 }
135 else if start_byte + 2 < string.len() && string.get(start_byte + 2..).is_some() { start_byte + 2 }
136 else if start_byte + 3 < string.len() && string.get(start_byte + 3..).is_some() { start_byte + 3 }
137
138 // Characters are max 4 bytes. This can never happen unless you provide an invalid start_byte.
139 else { unimplemented!() }
140}
141
142/// Converts a byte position in a string to a line and column number.
143///
144/// This function is useful for error reporting and text editor-like functionality,
145/// where you need to display human-readable positions.
146///
147/// # Arguments
148///
149/// * `string` - The string to analyze
150/// * `pos` - The byte position in the string
151///
152/// # Returns
153///
154/// Returns a tuple of `(line, column)` where both are 0-indexed.
155///
156/// # Note
157///
158/// Works with both `\r\n` (Windows) and `\n` (Unix) line endings.
159pub fn line_column_from_string_pos(string: &str, pos: u64) -> (u64, u64) {
160 let mut row = 0;
161 let mut col = 0;
162 let mut pos_processed = 0;
163 let end_skip = if string.contains("\r\n") { 2 } else { 1 };
164
165 for (index, line) in string.lines().enumerate() {
166
167 // If we're not yet in the line, continue.
168 if pos > pos_processed + line.len() as u64 {
169 pos_processed += line.len() as u64 + end_skip;
170 continue;
171 }
172
173 // If we're in the line, find the column.
174 else {
175 row = index as u64;
176 col = pos.checked_sub(pos_processed).unwrap_or_default();
177 break;
178 }
179 }
180
181 (row, col)
182}
183
184//--------------------------------------------------------//
185// Path utils.
186//--------------------------------------------------------//
187
188/// Returns all files in a directory, optionally scanning subdirectories recursively.
189///
190/// # Arguments
191///
192/// * `current_path` - The directory to scan
193/// * `scan_subdirs` - If `true`, recursively scans subdirectories; if `false`, only scans the top level
194///
195/// # Returns
196///
197/// Returns a vector of paths to all files found, or an error if the directory cannot be read.
198///
199/// # Examples
200///
201/// ```no_run
202/// # use std::path::Path;
203/// # use rpfm_lib::utils::files_from_subdir;
204/// // Get only files in the current directory
205/// let files = files_from_subdir(Path::new("./data"), false)?;
206///
207/// // Get all files recursively
208/// let all_files = files_from_subdir(Path::new("./data"), true)?;
209/// # Ok::<(), rpfm_lib::error::RLibError>(())
210/// ```
211pub fn files_from_subdir(current_path: &Path, scan_subdirs: bool) -> Result<Vec<PathBuf>> {
212
213 // Fast path. Takes a few ms less than the other one.
214 if !scan_subdirs {
215 return Ok(read_dir(current_path)?
216 .flatten()
217 .filter(|file| {
218 if let Ok(metadata) = file.metadata() {
219 metadata.is_file()
220 } else { false }
221 })
222 .map(|file| file.path()).collect());
223 }
224
225 // Slow path. Can scan subdirs.
226 let mut file_list: Vec<PathBuf> = vec![];
227 match read_dir(current_path) {
228 Ok(files_in_current_path) => {
229 for file in files_in_current_path {
230
231 // Get his path and continue, or return an error if it can't be read.
232 match file {
233 Ok(file) => {
234 let file_path = file.path();
235
236 // If it's a file, add it to the list.
237 if file_path.is_file() {
238 file_list.push(file_path);
239 }
240
241 // If it's a folder, add his files to the list.
242 else if file_path.is_dir() && scan_subdirs {
243 let mut subfolder_files_path = files_from_subdir(&file_path, scan_subdirs)?;
244 file_list.append(&mut subfolder_files_path);
245 }
246 }
247 Err(_) => return Err(RLibError::ReadFileFolderError(current_path.to_string_lossy().to_string())),
248 }
249 }
250 }
251
252 // In case of reading error, report it.
253 Err(_) => return Err(RLibError::ReadFileFolderError(current_path.to_string_lossy().to_string())),
254 }
255
256 // Return the list of paths.
257 Ok(file_list)
258}
259
260/// Returns all leaf directories (folders with no subfolders) in a directory tree.
261///
262/// This function recursively scans a directory and returns all directories that don't
263/// contain any subdirectories (leaf nodes in the directory tree).
264///
265/// # Arguments
266///
267/// * `current_path` - The directory to start scanning from
268/// * `ignore_empty_folders` - If `true`, only includes folders that contain files; if `false`, includes all leaf folders
269///
270/// # Returns
271///
272/// Returns a vector of paths to all leaf directories, or an error if any directory cannot be read.
273///
274/// # Examples
275///
276/// ```no_run
277/// # use std::path::Path;
278/// # use rpfm_lib::utils::final_folders_from_subdir;
279/// // Get all leaf folders, including empty ones
280/// let leaves = final_folders_from_subdir(Path::new("./project"), false)?;
281///
282/// // Get only leaf folders that contain files
283/// let non_empty_leaves = final_folders_from_subdir(Path::new("./project"), true)?;
284/// # Ok::<(), rpfm_lib::error::RLibError>(())
285/// ```
286pub fn final_folders_from_subdir(current_path: &Path, ignore_empty_folders: bool) -> Result<Vec<PathBuf>> {
287 let mut folder_list: Vec<PathBuf> = vec![];
288 match read_dir(current_path) {
289 Ok(dir_entry_in_current_path) => {
290 let mut has_subfolders = false;
291 let mut has_files = false;
292 for dir_entry in dir_entry_in_current_path {
293
294 // Get his path and continue, or return an error if it can't be read.
295 match dir_entry {
296 Ok(dir_entry) => {
297 let path = dir_entry.path();
298
299 // If it's a file, skip it.
300 if path.is_file() {
301 has_files = true;
302 continue;
303 }
304
305 if path.is_dir() {
306 // If it's a folder, check it..
307 let mut subfolder_files_path = final_folders_from_subdir(&path, ignore_empty_folders)?;
308 folder_list.append(&mut subfolder_files_path);
309 has_subfolders = true;
310 }
311 }
312 Err(_) => return Err(RLibError::ReadFileFolderError(current_path.to_string_lossy().to_string())),
313 }
314 }
315
316 if !has_subfolders && (!ignore_empty_folders || has_files) {
317 folder_list.push(current_path.to_path_buf());
318 }
319 }
320
321 // In case of reading error, report it.
322 Err(_) => return Err(RLibError::ReadFileFolderError(current_path.to_string_lossy().to_string())),
323 }
324
325 // Return the list of paths.
326 Ok(folder_list)
327}
328
329/// Returns the oldest file in a directory based on modification time.
330///
331/// # Arguments
332///
333/// * `current_path` - The directory to search
334///
335/// # Returns
336///
337/// Returns `Some(PathBuf)` pointing to the oldest file, or `None` if the directory is empty.
338pub fn oldest_file_in_folder(current_path: &Path) -> Result<Option<PathBuf>> {
339 let files = files_in_folder_from_newest_to_oldest(current_path)?;
340 Ok(files.last().cloned())
341}
342
343/// Returns all files in a directory sorted by modification time (newest first).
344///
345/// # Arguments
346///
347/// * `current_path` - The directory to search (non-recursive)
348///
349/// # Returns
350///
351/// Returns a vector of file paths sorted from newest to oldest by modification time.
352pub fn files_in_folder_from_newest_to_oldest(current_path: &Path) -> Result<Vec<PathBuf>> {
353 let mut files = files_from_subdir(current_path, false)?;
354 files.sort();
355 files.sort_by(|a, b| {
356 if let Ok(a) = File::open(a) {
357 if let Ok(b) = File::open(b) {
358 if let Ok(a) = last_modified_time_from_file(&a) {
359 if let Ok(b) = last_modified_time_from_file(&b) {
360 b.cmp(&a)
361 } else { Ordering::Equal}
362 } else { Ordering::Equal}
363 } else { Ordering::Equal}
364 } else { Ordering::Equal}
365 });
366
367 Ok(files)
368}
369
370/// Converts a path to an absolute path string, stripping Windows UNC prefix if present.
371///
372/// This function canonicalizes the path and removes the Windows `\\?\` prefix if it exists.
373/// If canonicalization fails (e.g., path doesn't exist), it returns the path as-is.
374///
375/// # Arguments
376///
377/// * `path` - The path to convert
378///
379/// # Returns
380///
381/// Returns the absolute path as a string, with Windows UNC prefix removed.
382pub fn path_to_absolute_string(path: &Path) -> String {
383 let mut path_str = path.to_string_lossy().to_string();
384
385 match canonicalize(path) {
386 Ok(cannon_path) => {
387 let cannon_path_str = cannon_path.to_string_lossy();
388 if let Some(strip) = cannon_path_str.strip_prefix("\\\\?\\") {
389 path_str = strip.to_owned();
390 } else {
391 path_str = cannon_path_str.to_string();
392 }
393 },
394
395 // These errors are usually for trying to cannonicalize an already cannon path, or because the file doesn't exist.
396 Err(_) => {
397 if path_str.starts_with("\\\\?\\") {
398 path_str = path_str[4..].to_owned();
399 }
400 }
401 }
402
403 path_str
404}
405
406/// Converts a path to an absolute [`PathBuf`], optionally stripping Windows UNC prefix.
407///
408/// This function canonicalizes the path and optionally removes the Windows `\\?\` prefix.
409/// If canonicalization fails (e.g., path doesn't exist), it returns the path as-is.
410///
411/// # Arguments
412///
413/// * `path` - The path to convert
414/// * `strip_prefix` - If `true`, removes the Windows `\\?\` prefix
415///
416/// # Returns
417///
418/// Returns the absolute path, with Windows UNC prefix removed if `strip_prefix` is `true`.
419pub fn path_to_absolute_path(path: &Path, strip_prefix: bool) -> PathBuf {
420 let mut path = path.to_owned();
421
422 match canonicalize(&path) {
423 Ok(cannon_path) => {
424 let cannon_path_str = cannon_path.to_string_lossy();
425
426 if strip_prefix {
427 if let Some(strip) = cannon_path_str.strip_prefix("\\\\?\\") {
428 path = PathBuf::from(strip);
429 } else {
430 path = cannon_path;
431 }
432 } else {
433 path = cannon_path;
434 }
435 },
436
437 // These errors are usually for trying to cannonicalize an already cannon path, or because the file doesn't exist.
438 Err(_) => {
439 let path_str = path.to_string_lossy();
440 if strip_prefix {
441 if let Some(strip) = path_str.strip_prefix("\\\\?\\") {
442 path = PathBuf::from(strip);
443 }
444 }
445 }
446 }
447
448 path
449}
450
451
452//--------------------------------------------------------//
453// Time utils.
454//--------------------------------------------------------//
455
456/// Returns the current Unix timestamp in seconds.
457///
458/// # Returns
459///
460/// Returns the number of seconds since the Unix epoch (January 1, 1970 UTC).
461pub fn current_time() -> Result<u64> {
462 Ok(SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs())
463}
464
465/// Returns the last modification time of a file as a Unix timestamp.
466///
467/// # Arguments
468///
469/// * `file` - The file handle to query
470///
471/// # Returns
472///
473/// Returns the number of seconds since the Unix epoch when the file was last modified.
474pub fn last_modified_time_from_file(file: &File) -> Result<u64> {
475 Ok(file.metadata()?.modified()?.duration_since(UNIX_EPOCH)?.as_secs())
476}
477
478/// Returns the most recent modification time from a list of file paths.
479///
480/// This function checks all provided paths in parallel and returns the newest
481/// modification timestamp. Files that cannot be opened are silently ignored.
482///
483/// # Arguments
484///
485/// * `paths` - Slice of file paths to check
486///
487/// # Returns
488///
489/// Returns the newest modification time in seconds since Unix epoch, or `0` if no files could be read.
490pub fn last_modified_time_from_files(paths: &[PathBuf]) -> Result<u64> {
491 Ok(paths
492 .par_iter()
493 .filter_map(|path| File::open(path).ok())
494 .filter_map(|file| last_modified_time_from_file(&file).ok())
495 .max().unwrap_or(0)
496 )
497}
498
499//--------------------------------------------------------//
500// Pelite utils.
501//--------------------------------------------------------//
502
503/// Extracts version information from a Windows PE (Portable Executable) file.
504///
505/// This function parses the PE resources to extract version information embedded
506/// in the executable. Courtesy of the TES Loot team.
507///
508/// # Arguments
509///
510/// * `bytes` - The raw PE file bytes
511///
512/// # Returns
513///
514/// Returns the version information structure, or an error if parsing fails.
515pub(crate) fn pe_version_info(bytes: &'_ [u8]) -> std::result::Result<VersionInfo<'_>, FindError> {
516 pe_resources(bytes)?.version_info()
517}
518
519/// Extracts the resource section from a Windows PE file.
520///
521/// This function parses a PE file (32-bit or 64-bit) and extracts its resource section.
522/// Courtesy of the TES Loot team.
523///
524/// # Arguments
525///
526/// * `bytes` - The raw PE file bytes
527///
528/// # Returns
529///
530/// Returns the resources structure, or an error if parsing fails.
531pub(crate) fn pe_resources(bytes: &'_ [u8]) -> std::result::Result<Resources<'_>, pelite::Error> {
532 match pe64::PeFile::from_bytes(bytes) {
533 Ok(file) => {
534 use pelite::pe64::Pe;
535
536 file.resources()
537 }
538 Err(pelite::Error::PeMagic) => {
539 use pelite::pe32::{Pe, PeFile};
540
541 PeFile::from_bytes(bytes)?.resources()
542 }
543 Err(e) => Err(e),
544 }
545}
546
547//--------------------------------------------------------//
548// VWise utils.
549//--------------------------------------------------------//
550
551const VWISE_HASH_VALUE: u32 = 0x811C9DC5;
552const VWISE_MULT_VALUE: u32 = 0x01000193;
553const VWISE_AND_VALUE: u32 = 0xFFFFFFFF;
554
555/// Generates a WWise audio hash from a filename.
556///
557/// This function implements the WWise audio engine's hash algorithm for identifying
558/// audio files and events. The algorithm performs FNV-1a hashing on the lowercase,
559/// trimmed filename.
560///
561/// # Arguments
562///
563/// * `name` - The filename to hash (will be trimmed and lowercased)
564///
565/// # Returns
566///
567/// Returns the 32-bit WWise hash value.
568///
569/// # Note
570///
571/// Implementation courtesy of Asset Editor.
572pub fn hash_vwise(name: &str) -> u32 {
573 let name = name.trim().to_lowercase();
574 let mut hash_value = VWISE_HASH_VALUE;
575 for byte in name.as_bytes() {
576 hash_value *= VWISE_MULT_VALUE;
577 hash_value ^= *byte as u32;
578 hash_value &= VWISE_AND_VALUE;
579 }
580
581 hash_value
582}
583
584//--------------------------------------------------------//
585// Filename sanitization utils.
586//--------------------------------------------------------//
587
588/// Windows-invalid filename characters.
589///
590/// These characters cannot be used in Windows filenames: `< > : " / \ | ? *`
591pub const INVALID_CHARACTERS_WINDOWS: [char; 9] = [
592 '<',
593 '>',
594 ':',
595 '"',
596 '/',
597 '\\',
598 '|',
599 '?',
600 '*',
601];
602
603/// Default filename used when sanitization results in an empty name.
604pub const DEFAULT_FILENAME: &str = "unnamed_file";
605
606/// Sanitizes a file path by cleaning the filename component.
607///
608/// This function applies filename sanitization to the filename part of a path while
609/// preserving the directory structure. Invalid Windows characters are replaced with underscores.
610///
611/// # Arguments
612///
613/// * `path` - The path to sanitize
614///
615/// # Returns
616///
617/// Returns a new path with a sanitized filename.
618///
619/// # Examples
620///
621/// ```
622/// # use std::path::Path;
623/// # use rpfm_lib::utils::sanitize_path;
624/// let bad_path = Path::new("data/my:file?.txt");
625/// let clean_path = sanitize_path(bad_path);
626/// assert_eq!(clean_path, Path::new("data/my_file_.txt"));
627/// ```
628pub fn sanitize_path(path: &Path) -> PathBuf {
629 if let Some(file_name) = path.file_name() {
630 let sanitized_name = sanitize_filename(file_name.to_string_lossy().as_ref());
631 let mut sanitized_path = path.to_path_buf();
632 sanitized_path.set_file_name(sanitized_name);
633 sanitized_path
634 } else {
635 path.to_path_buf()
636 }
637}
638
639/// Sanitizes a filename by replacing invalid Windows characters.
640///
641/// This function ensures filenames are valid on Windows by:
642/// - Replacing invalid characters (`< > : " / \ | ? *`) with underscores
643/// - Removing leading/trailing whitespace and dots
644/// - Using a default name if the result is empty
645///
646/// # Arguments
647///
648/// * `filename` - The filename to sanitize
649///
650/// # Returns
651///
652/// Returns a Windows-compatible filename.
653///
654/// # Examples
655///
656/// ```
657/// # use rpfm_lib::utils::sanitize_filename;
658/// assert_eq!(sanitize_filename("my:file?.txt"), "my_file_.txt");
659/// assert_eq!(sanitize_filename(" .hidden "), "hidden");
660/// assert_eq!(sanitize_filename("<<<"), "___");
661/// assert_eq!(sanitize_filename("..."), "unnamed_file");
662/// ```
663pub fn sanitize_filename(filename: &str) -> String {
664 let mut sanitized = filename.to_string();
665
666 // Replace invalid characters with underscores.
667 for &ch in &INVALID_CHARACTERS_WINDOWS {
668 sanitized = sanitized.replace(ch, "_");
669 }
670
671 // Remove leading/trailing spaces and dots.
672 sanitized = sanitized.trim().trim_matches('.').to_string();
673
674 // If the filename becomes empty after sanitization, use a default name.
675 if sanitized.is_empty() {
676 sanitized = DEFAULT_FILENAME.to_string();
677 }
678
679 sanitized
680}
681
682//--------------------------------------------------------//
683// Decoder utils.
684//--------------------------------------------------------//
685
686/// Validates that a decoder cursor is at the expected position.
687///
688/// This function is used internally by binary decoders to verify that parsing ended
689/// at the expected byte position, helping detect format mismatches or decoding errors.
690///
691/// # Arguments
692///
693/// * `curr_pos` - The current cursor position
694/// * `expected_pos` - The expected cursor position
695///
696/// # Returns
697///
698/// Returns [`Ok`] if positions match, or an error if there's a size mismatch.
699///
700/// # Errors
701///
702/// Returns [`RLibError::DecodingMismatchSizeError`] if the positions don't match.
703pub(crate) fn check_size_mismatch(curr_pos: usize, expected_pos: usize) -> Result<()> {
704 if curr_pos != expected_pos {
705 return Err(RLibError::DecodingMismatchSizeError(expected_pos, curr_pos));
706 }
707
708 Ok(())
709}