Skip to main content

rpfm_lib/
utils.rs

1//---------------------------------------------------------------------------//
2// Copyright (c) 2017-2026 Ismael Gutiérrez González. All rights reserved.
3//
4// This file is part of the Rusted PackFile Manager (RPFM) project,
5// which can be found here: https://github.com/Frodo45127/rpfm.
6//
7// This file is licensed under the MIT license, which can be found here:
8// https://github.com/Frodo45127/rpfm/blob/master/LICENSE.
9//---------------------------------------------------------------------------//
10
11//! Generic utility functions for the crate.
12//!
13//! This module contains miscellaneous utility functions that don't fit into more specific modules.
14//! Functions are organized into categories:
15//!
16//! - **Generic utils**: String parsing, case-insensitive operations, line/column calculations
17//! - **Path utils**: File and folder enumeration, path sanitization, absolute path conversion
18//! - **Time utils**: File modification time queries, current time helpers
19//! - **Pelite utils**: PE (Windows executable) file inspection
20//! - **VWise utils**: WWise audio hash generation
21//! - **Filename sanitization**: Windows-compatible filename cleaning
22//! - **Decoder utils**: Size mismatch validation for binary decoders
23
24use pelite::pe64;
25use pelite::resources::{FindError, Resources, version_info::VersionInfo};
26use rayon::prelude::*;
27
28use std::cmp::Ordering;
29use std::fs::{canonicalize, read_dir, File};
30use std::path::{Path, PathBuf};
31use std::time::{SystemTime, UNIX_EPOCH};
32
33use crate::error::{RLibError, Result};
34
35//--------------------------------------------------------//
36// Generic utils.
37//--------------------------------------------------------//
38
39/// Parses a string to a boolean value.
40///
41/// Accepts common boolean representations (case-insensitive):
42/// - `true` or `"1"` → `true`
43/// - `false` or `"0"` → `false`
44///
45/// # Arguments
46///
47/// * `string` - The string to parse
48///
49/// # Returns
50///
51/// Returns `Ok(bool)` if the string is a valid boolean representation, or
52/// `Err` if the string cannot be parsed.
53///
54/// # Examples
55///
56/// ```
57/// # use rpfm_lib::utils::parse_str_as_bool;
58/// assert_eq!(parse_str_as_bool("true").unwrap(), true);
59/// assert_eq!(parse_str_as_bool("1").unwrap(), true);
60/// assert_eq!(parse_str_as_bool("FALSE").unwrap(), false);
61/// assert_eq!(parse_str_as_bool("0").unwrap(), false);
62/// assert!(parse_str_as_bool("maybe").is_err());
63/// ```
64pub fn parse_str_as_bool(string: &str) -> Result<bool> {
65    let str_lower_case = string.to_lowercase();
66    if str_lower_case == "true" || str_lower_case == "1" {
67        Ok(true)
68    }
69    else if str_lower_case == "false" || str_lower_case == "0" {
70        Ok(false)
71    }
72    else {
73        Err(RLibError::ParseBoolError(string.to_owned()))
74    }
75}
76
77/// Checks if a string starts with another string (case-insensitive).
78///
79/// This function performs a case-insensitive prefix check, handling UTF-8 strings correctly
80/// by working with character boundaries rather than byte boundaries.
81///
82/// # Arguments
83///
84/// * `full_str` - The string to check
85/// * `partial_str` - The prefix to look for
86///
87/// # Returns
88///
89/// Returns `true` if `full_str` starts with `partial_str` (ignoring case), `false` otherwise.
90///
91/// # Examples
92///
93/// ```
94/// # use rpfm_lib::utils::starts_with_case_insensitive;
95/// assert!(starts_with_case_insensitive("Hello World", "hello"));
96/// assert!(starts_with_case_insensitive("RPFM", "rpf"));
97/// assert!(!starts_with_case_insensitive("Short", "ThisIsLonger"));
98/// ```
99pub fn starts_with_case_insensitive(full_str: &str, partial_str: &str) -> bool {
100    let full_str_chars = full_str.chars().count();
101    let partial_str_chars = partial_str.chars().count();
102    if full_str_chars > partial_str_chars {
103        let partial_str_len_in_bytes = partial_str.len();
104
105        let full_str_max_index = full_str.char_indices().map(|(index, _)| index).find(|index| index >= &partial_str_len_in_bytes).unwrap_or(full_str.len());
106        let full_str_base = &full_str[..full_str_max_index];
107        caseless::default_caseless_match_str(full_str_base, partial_str)
108    } else {
109        false
110    }
111}
112
113/// Finds the closest valid UTF-8 character boundary at or after the given byte position.
114///
115/// When working with byte indices in UTF-8 strings, you may land in the middle of a
116/// multi-byte character. This function finds the next valid character boundary.
117///
118/// # Arguments
119///
120/// * `string` - The string to search in
121/// * `start_byte` - The byte index to start from
122///
123/// # Returns
124///
125/// Returns the byte index of the next valid character boundary (may be `start_byte` itself
126/// if it's already at a boundary).
127///
128/// # Panics
129///
130/// Panics if `start_byte` is more than 3 bytes away from the next valid boundary, which
131/// should never happen with valid UTF-8 (max character size is 4 bytes).
132pub fn closest_valid_char_byte(string: &str, start_byte: usize) -> usize {
133    if start_byte < string.len() && string.get(start_byte..).is_some() { start_byte }
134    else if start_byte + 1 < string.len() && string.get(start_byte + 1..).is_some() { start_byte + 1 }
135    else if start_byte + 2 < string.len() && string.get(start_byte + 2..).is_some() { start_byte + 2 }
136    else if start_byte + 3 < string.len() && string.get(start_byte + 3..).is_some() { start_byte + 3 }
137
138    // Characters are max 4 bytes. This can never happen unless you provide an invalid start_byte.
139    else { unimplemented!() }
140}
141
142/// Converts a byte position in a string to a line and column number.
143///
144/// This function is useful for error reporting and text editor-like functionality,
145/// where you need to display human-readable positions.
146///
147/// # Arguments
148///
149/// * `string` - The string to analyze
150/// * `pos` - The byte position in the string
151///
152/// # Returns
153///
154/// Returns a tuple of `(line, column)` where both are 0-indexed.
155///
156/// # Note
157///
158/// Works with both `\r\n` (Windows) and `\n` (Unix) line endings.
159pub fn line_column_from_string_pos(string: &str, pos: u64) -> (u64, u64) {
160    let mut row = 0;
161    let mut col = 0;
162    let mut pos_processed = 0;
163    let end_skip = if string.contains("\r\n") { 2 } else { 1 };
164
165    for (index, line) in string.lines().enumerate() {
166
167        // If we're not yet in the line, continue.
168        if pos > pos_processed + line.len() as u64 {
169            pos_processed += line.len() as u64 + end_skip;
170            continue;
171        }
172
173        // If we're in the line, find the column.
174        else {
175            row = index as u64;
176            col = pos.checked_sub(pos_processed).unwrap_or_default();
177            break;
178        }
179    }
180
181    (row, col)
182}
183
184//--------------------------------------------------------//
185// Path utils.
186//--------------------------------------------------------//
187
188/// Returns all files in a directory, optionally scanning subdirectories recursively.
189///
190/// # Arguments
191///
192/// * `current_path` - The directory to scan
193/// * `scan_subdirs` - If `true`, recursively scans subdirectories; if `false`, only scans the top level
194///
195/// # Returns
196///
197/// Returns a vector of paths to all files found, or an error if the directory cannot be read.
198///
199/// # Examples
200///
201/// ```no_run
202/// # use std::path::Path;
203/// # use rpfm_lib::utils::files_from_subdir;
204/// // Get only files in the current directory
205/// let files = files_from_subdir(Path::new("./data"), false)?;
206///
207/// // Get all files recursively
208/// let all_files = files_from_subdir(Path::new("./data"), true)?;
209/// # Ok::<(), rpfm_lib::error::RLibError>(())
210/// ```
211pub fn files_from_subdir(current_path: &Path, scan_subdirs: bool) -> Result<Vec<PathBuf>> {
212
213    // Fast path. Takes a few ms less than the other one.
214    if !scan_subdirs {
215        return Ok(read_dir(current_path)?
216            .flatten()
217            .filter(|file| {
218                if let Ok(metadata) = file.metadata() {
219                    metadata.is_file()
220                } else { false }
221            })
222            .map(|file| file.path()).collect());
223    }
224
225    // Slow path. Can scan subdirs.
226    let mut file_list: Vec<PathBuf> = vec![];
227    match read_dir(current_path) {
228        Ok(files_in_current_path) => {
229            for file in files_in_current_path {
230
231                // Get his path and continue, or return an error if it can't be read.
232                match file {
233                    Ok(file) => {
234                        let file_path = file.path();
235
236                        // If it's a file, add it to the list.
237                        if file_path.is_file() {
238                            file_list.push(file_path);
239                        }
240
241                        // If it's a folder, add his files to the list.
242                        else if file_path.is_dir() && scan_subdirs {
243                            let mut subfolder_files_path = files_from_subdir(&file_path, scan_subdirs)?;
244                            file_list.append(&mut subfolder_files_path);
245                        }
246                    }
247                    Err(_) => return Err(RLibError::ReadFileFolderError(current_path.to_string_lossy().to_string())),
248                }
249            }
250        }
251
252        // In case of reading error, report it.
253        Err(_) => return Err(RLibError::ReadFileFolderError(current_path.to_string_lossy().to_string())),
254    }
255
256    // Return the list of paths.
257    Ok(file_list)
258}
259
260/// Returns all leaf directories (folders with no subfolders) in a directory tree.
261///
262/// This function recursively scans a directory and returns all directories that don't
263/// contain any subdirectories (leaf nodes in the directory tree).
264///
265/// # Arguments
266///
267/// * `current_path` - The directory to start scanning from
268/// * `ignore_empty_folders` - If `true`, only includes folders that contain files; if `false`, includes all leaf folders
269///
270/// # Returns
271///
272/// Returns a vector of paths to all leaf directories, or an error if any directory cannot be read.
273///
274/// # Examples
275///
276/// ```no_run
277/// # use std::path::Path;
278/// # use rpfm_lib::utils::final_folders_from_subdir;
279/// // Get all leaf folders, including empty ones
280/// let leaves = final_folders_from_subdir(Path::new("./project"), false)?;
281///
282/// // Get only leaf folders that contain files
283/// let non_empty_leaves = final_folders_from_subdir(Path::new("./project"), true)?;
284/// # Ok::<(), rpfm_lib::error::RLibError>(())
285/// ```
286pub fn final_folders_from_subdir(current_path: &Path, ignore_empty_folders: bool) -> Result<Vec<PathBuf>> {
287    let mut folder_list: Vec<PathBuf> = vec![];
288    match read_dir(current_path) {
289        Ok(dir_entry_in_current_path) => {
290            let mut has_subfolders = false;
291            let mut has_files = false;
292            for dir_entry in dir_entry_in_current_path {
293
294                // Get his path and continue, or return an error if it can't be read.
295                match dir_entry {
296                    Ok(dir_entry) => {
297                        let path = dir_entry.path();
298
299                        // If it's a file, skip it.
300                        if path.is_file() {
301                            has_files = true;
302                            continue;
303                        }
304
305                        if path.is_dir() {
306                        // If it's a folder, check it..
307                            let mut subfolder_files_path = final_folders_from_subdir(&path, ignore_empty_folders)?;
308                            folder_list.append(&mut subfolder_files_path);
309                            has_subfolders = true;
310                        }
311                    }
312                    Err(_) => return Err(RLibError::ReadFileFolderError(current_path.to_string_lossy().to_string())),
313                }
314            }
315
316            if !has_subfolders && (!ignore_empty_folders || has_files) {
317                folder_list.push(current_path.to_path_buf());
318            }
319        }
320
321        // In case of reading error, report it.
322        Err(_) => return Err(RLibError::ReadFileFolderError(current_path.to_string_lossy().to_string())),
323    }
324
325    // Return the list of paths.
326    Ok(folder_list)
327}
328
329/// Returns the oldest file in a directory based on modification time.
330///
331/// # Arguments
332///
333/// * `current_path` - The directory to search
334///
335/// # Returns
336///
337/// Returns `Some(PathBuf)` pointing to the oldest file, or `None` if the directory is empty.
338pub fn oldest_file_in_folder(current_path: &Path) -> Result<Option<PathBuf>> {
339    let files = files_in_folder_from_newest_to_oldest(current_path)?;
340    Ok(files.last().cloned())
341}
342
343/// Returns all files in a directory sorted by modification time (newest first).
344///
345/// # Arguments
346///
347/// * `current_path` - The directory to search (non-recursive)
348///
349/// # Returns
350///
351/// Returns a vector of file paths sorted from newest to oldest by modification time.
352pub fn files_in_folder_from_newest_to_oldest(current_path: &Path) -> Result<Vec<PathBuf>> {
353    let mut files = files_from_subdir(current_path, false)?;
354    files.sort();
355    files.sort_by(|a, b| {
356        if let Ok(a) = File::open(a) {
357            if let Ok(b) = File::open(b) {
358                if let Ok(a) = last_modified_time_from_file(&a) {
359                    if let Ok(b) = last_modified_time_from_file(&b) {
360                        b.cmp(&a)
361                    } else { Ordering::Equal}
362                } else { Ordering::Equal}
363            } else { Ordering::Equal}
364        } else { Ordering::Equal}
365    });
366
367    Ok(files)
368}
369
370/// Converts a path to an absolute path string, stripping Windows UNC prefix if present.
371///
372/// This function canonicalizes the path and removes the Windows `\\?\` prefix if it exists.
373/// If canonicalization fails (e.g., path doesn't exist), it returns the path as-is.
374///
375/// # Arguments
376///
377/// * `path` - The path to convert
378///
379/// # Returns
380///
381/// Returns the absolute path as a string, with Windows UNC prefix removed.
382pub fn path_to_absolute_string(path: &Path) -> String {
383    let mut path_str = path.to_string_lossy().to_string();
384
385    match canonicalize(path) {
386        Ok(cannon_path) => {
387            let cannon_path_str = cannon_path.to_string_lossy();
388            if let Some(strip) = cannon_path_str.strip_prefix("\\\\?\\") {
389                path_str = strip.to_owned();
390            } else {
391                path_str = cannon_path_str.to_string();
392            }
393        },
394
395        // These errors are usually for trying to cannonicalize an already cannon path, or because the file doesn't exist.
396        Err(_) => {
397            if path_str.starts_with("\\\\?\\") {
398                path_str = path_str[4..].to_owned();
399            }
400        }
401    }
402
403    path_str
404}
405
406/// Converts a path to an absolute [`PathBuf`], optionally stripping Windows UNC prefix.
407///
408/// This function canonicalizes the path and optionally removes the Windows `\\?\` prefix.
409/// If canonicalization fails (e.g., path doesn't exist), it returns the path as-is.
410///
411/// # Arguments
412///
413/// * `path` - The path to convert
414/// * `strip_prefix` - If `true`, removes the Windows `\\?\` prefix
415///
416/// # Returns
417///
418/// Returns the absolute path, with Windows UNC prefix removed if `strip_prefix` is `true`.
419pub fn path_to_absolute_path(path: &Path, strip_prefix: bool) -> PathBuf {
420    let mut path = path.to_owned();
421
422    match canonicalize(&path) {
423        Ok(cannon_path) => {
424            let cannon_path_str = cannon_path.to_string_lossy();
425
426            if strip_prefix {
427                if let Some(strip) = cannon_path_str.strip_prefix("\\\\?\\") {
428                    path = PathBuf::from(strip);
429                } else {
430                    path = cannon_path;
431                }
432            } else {
433                path = cannon_path;
434            }
435        },
436
437        // These errors are usually for trying to cannonicalize an already cannon path, or because the file doesn't exist.
438        Err(_) => {
439            let path_str = path.to_string_lossy();
440            if strip_prefix {
441                if let Some(strip) = path_str.strip_prefix("\\\\?\\") {
442                    path = PathBuf::from(strip);
443                }
444            }
445        }
446    }
447
448    path
449}
450
451
452//--------------------------------------------------------//
453// Time utils.
454//--------------------------------------------------------//
455
456/// Returns the current Unix timestamp in seconds.
457///
458/// # Returns
459///
460/// Returns the number of seconds since the Unix epoch (January 1, 1970 UTC).
461pub fn current_time() -> Result<u64> {
462    Ok(SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs())
463}
464
465/// Returns the last modification time of a file as a Unix timestamp.
466///
467/// # Arguments
468///
469/// * `file` - The file handle to query
470///
471/// # Returns
472///
473/// Returns the number of seconds since the Unix epoch when the file was last modified.
474pub fn last_modified_time_from_file(file: &File) -> Result<u64> {
475    Ok(file.metadata()?.modified()?.duration_since(UNIX_EPOCH)?.as_secs())
476}
477
478/// Returns the most recent modification time from a list of file paths.
479///
480/// This function checks all provided paths in parallel and returns the newest
481/// modification timestamp. Files that cannot be opened are silently ignored.
482///
483/// # Arguments
484///
485/// * `paths` - Slice of file paths to check
486///
487/// # Returns
488///
489/// Returns the newest modification time in seconds since Unix epoch, or `0` if no files could be read.
490pub fn last_modified_time_from_files(paths: &[PathBuf]) -> Result<u64> {
491    Ok(paths
492        .par_iter()
493        .filter_map(|path| File::open(path).ok())
494        .filter_map(|file| last_modified_time_from_file(&file).ok())
495        .max().unwrap_or(0)
496    )
497}
498
499//--------------------------------------------------------//
500// Pelite utils.
501//--------------------------------------------------------//
502
503/// Extracts version information from a Windows PE (Portable Executable) file.
504///
505/// This function parses the PE resources to extract version information embedded
506/// in the executable. Courtesy of the TES Loot team.
507///
508/// # Arguments
509///
510/// * `bytes` - The raw PE file bytes
511///
512/// # Returns
513///
514/// Returns the version information structure, or an error if parsing fails.
515pub(crate) fn pe_version_info(bytes: &'_ [u8]) -> std::result::Result<VersionInfo<'_>, FindError> {
516    pe_resources(bytes)?.version_info()
517}
518
519/// Extracts the resource section from a Windows PE file.
520///
521/// This function parses a PE file (32-bit or 64-bit) and extracts its resource section.
522/// Courtesy of the TES Loot team.
523///
524/// # Arguments
525///
526/// * `bytes` - The raw PE file bytes
527///
528/// # Returns
529///
530/// Returns the resources structure, or an error if parsing fails.
531pub(crate) fn pe_resources(bytes: &'_ [u8]) -> std::result::Result<Resources<'_>, pelite::Error> {
532    match pe64::PeFile::from_bytes(bytes) {
533        Ok(file) => {
534            use pelite::pe64::Pe;
535
536            file.resources()
537        }
538        Err(pelite::Error::PeMagic) => {
539            use pelite::pe32::{Pe, PeFile};
540
541            PeFile::from_bytes(bytes)?.resources()
542        }
543        Err(e) => Err(e),
544    }
545}
546
547//--------------------------------------------------------//
548// VWise utils.
549//--------------------------------------------------------//
550
551const VWISE_HASH_VALUE: u32 = 0x811C9DC5;
552const VWISE_MULT_VALUE: u32 = 0x01000193;
553const VWISE_AND_VALUE: u32 = 0xFFFFFFFF;
554
555/// Generates a WWise audio hash from a filename.
556///
557/// This function implements the WWise audio engine's hash algorithm for identifying
558/// audio files and events. The algorithm performs FNV-1a hashing on the lowercase,
559/// trimmed filename.
560///
561/// # Arguments
562///
563/// * `name` - The filename to hash (will be trimmed and lowercased)
564///
565/// # Returns
566///
567/// Returns the 32-bit WWise hash value.
568///
569/// # Note
570///
571/// Implementation courtesy of Asset Editor.
572pub fn hash_vwise(name: &str) -> u32 {
573    let name = name.trim().to_lowercase();
574    let mut hash_value = VWISE_HASH_VALUE;
575    for byte in name.as_bytes() {
576        hash_value *= VWISE_MULT_VALUE;
577        hash_value ^= *byte as u32;
578        hash_value &= VWISE_AND_VALUE;
579    }
580
581    hash_value
582}
583
584//--------------------------------------------------------//
585// Filename sanitization utils.
586//--------------------------------------------------------//
587
588/// Windows-invalid filename characters.
589///
590/// These characters cannot be used in Windows filenames: `< > : " / \ | ? *`
591pub const INVALID_CHARACTERS_WINDOWS: [char; 9] = [
592    '<',
593    '>',
594    ':',
595    '"',
596    '/',
597    '\\',
598    '|',
599    '?',
600    '*',
601];
602
603/// Default filename used when sanitization results in an empty name.
604pub const DEFAULT_FILENAME: &str = "unnamed_file";
605
606/// Sanitizes a file path by cleaning the filename component.
607///
608/// This function applies filename sanitization to the filename part of a path while
609/// preserving the directory structure. Invalid Windows characters are replaced with underscores.
610///
611/// # Arguments
612///
613/// * `path` - The path to sanitize
614///
615/// # Returns
616///
617/// Returns a new path with a sanitized filename.
618///
619/// # Examples
620///
621/// ```
622/// # use std::path::Path;
623/// # use rpfm_lib::utils::sanitize_path;
624/// let bad_path = Path::new("data/my:file?.txt");
625/// let clean_path = sanitize_path(bad_path);
626/// assert_eq!(clean_path, Path::new("data/my_file_.txt"));
627/// ```
628pub fn sanitize_path(path: &Path) -> PathBuf {
629    if let Some(file_name) = path.file_name() {
630        let sanitized_name = sanitize_filename(file_name.to_string_lossy().as_ref());
631        let mut sanitized_path = path.to_path_buf();
632        sanitized_path.set_file_name(sanitized_name);
633        sanitized_path
634    } else {
635        path.to_path_buf()
636    }
637}
638
639/// Sanitizes a filename by replacing invalid Windows characters.
640///
641/// This function ensures filenames are valid on Windows by:
642/// - Replacing invalid characters (`< > : " / \ | ? *`) with underscores
643/// - Removing leading/trailing whitespace and dots
644/// - Using a default name if the result is empty
645///
646/// # Arguments
647///
648/// * `filename` - The filename to sanitize
649///
650/// # Returns
651///
652/// Returns a Windows-compatible filename.
653///
654/// # Examples
655///
656/// ```
657/// # use rpfm_lib::utils::sanitize_filename;
658/// assert_eq!(sanitize_filename("my:file?.txt"), "my_file_.txt");
659/// assert_eq!(sanitize_filename("   .hidden   "), "hidden");
660/// assert_eq!(sanitize_filename("<<<"), "___");
661/// assert_eq!(sanitize_filename("..."), "unnamed_file");
662/// ```
663pub fn sanitize_filename(filename: &str) -> String {
664    let mut sanitized = filename.to_string();
665
666    // Replace invalid characters with underscores.
667    for &ch in &INVALID_CHARACTERS_WINDOWS {
668        sanitized = sanitized.replace(ch, "_");
669    }
670
671    // Remove leading/trailing spaces and dots.
672    sanitized = sanitized.trim().trim_matches('.').to_string();
673
674    // If the filename becomes empty after sanitization, use a default name.
675    if sanitized.is_empty() {
676        sanitized = DEFAULT_FILENAME.to_string();
677    }
678
679    sanitized
680}
681
682//--------------------------------------------------------//
683// Decoder utils.
684//--------------------------------------------------------//
685
686/// Validates that a decoder cursor is at the expected position.
687///
688/// This function is used internally by binary decoders to verify that parsing ended
689/// at the expected byte position, helping detect format mismatches or decoding errors.
690///
691/// # Arguments
692///
693/// * `curr_pos` - The current cursor position
694/// * `expected_pos` - The expected cursor position
695///
696/// # Returns
697///
698/// Returns [`Ok`] if positions match, or an error if there's a size mismatch.
699///
700/// # Errors
701///
702/// Returns [`RLibError::DecodingMismatchSizeError`] if the positions don't match.
703pub(crate) fn check_size_mismatch(curr_pos: usize, expected_pos: usize) -> Result<()> {
704    if curr_pos != expected_pos {
705        return Err(RLibError::DecodingMismatchSizeError(expected_pos, curr_pos));
706    }
707
708    Ok(())
709}