std\sys\path/
windows.rs

1use crate::ffi::{OsStr, OsString};
2use crate::path::{Path, PathBuf, Prefix};
3use crate::sys::api::utf16;
4use crate::sys::pal::{c, fill_utf16_buf, os2path, to_u16s};
5use crate::{io, ptr};
6
7#[cfg(test)]
8mod tests;
9
10pub const MAIN_SEP_STR: &str = "\\";
11pub const MAIN_SEP: char = '\\';
12
13#[inline]
14pub fn is_sep_byte(b: u8) -> bool {
15    b == b'/' || b == b'\\'
16}
17
18#[inline]
19pub fn is_verbatim_sep(b: u8) -> bool {
20    b == b'\\'
21}
22
23pub fn is_verbatim(path: &[u16]) -> bool {
24    path.starts_with(utf16!(r"\\?\")) || path.starts_with(utf16!(r"\??\"))
25}
26
27/// Returns true if `path` looks like a lone filename.
28pub(crate) fn is_file_name(path: &OsStr) -> bool {
29    !path.as_encoded_bytes().iter().copied().any(is_sep_byte)
30}
31pub(crate) fn has_trailing_slash(path: &OsStr) -> bool {
32    let is_verbatim = path.as_encoded_bytes().starts_with(br"\\?\");
33    let is_separator = if is_verbatim { is_verbatim_sep } else { is_sep_byte };
34    if let Some(&c) = path.as_encoded_bytes().last() { is_separator(c) } else { false }
35}
36
37/// Appends a suffix to a path.
38///
39/// Can be used to append an extension without removing an existing extension.
40pub(crate) fn append_suffix(path: PathBuf, suffix: &OsStr) -> PathBuf {
41    let mut path = OsString::from(path);
42    path.push(suffix);
43    path.into()
44}
45
46struct PrefixParser<'a, const LEN: usize> {
47    path: &'a OsStr,
48    prefix: [u8; LEN],
49}
50
51impl<'a, const LEN: usize> PrefixParser<'a, LEN> {
52    #[inline]
53    fn get_prefix(path: &OsStr) -> [u8; LEN] {
54        let mut prefix = [0; LEN];
55        // SAFETY: Only ASCII characters are modified.
56        for (i, &ch) in path.as_encoded_bytes().iter().take(LEN).enumerate() {
57            prefix[i] = if ch == b'/' { b'\\' } else { ch };
58        }
59        prefix
60    }
61
62    fn new(path: &'a OsStr) -> Self {
63        Self { path, prefix: Self::get_prefix(path) }
64    }
65
66    fn as_slice(&self) -> PrefixParserSlice<'a, '_> {
67        PrefixParserSlice {
68            path: self.path,
69            prefix: &self.prefix[..LEN.min(self.path.len())],
70            index: 0,
71        }
72    }
73}
74
75struct PrefixParserSlice<'a, 'b> {
76    path: &'a OsStr,
77    prefix: &'b [u8],
78    index: usize,
79}
80
81impl<'a> PrefixParserSlice<'a, '_> {
82    fn strip_prefix(&self, prefix: &str) -> Option<Self> {
83        self.prefix[self.index..]
84            .starts_with(prefix.as_bytes())
85            .then_some(Self { index: self.index + prefix.len(), ..*self })
86    }
87
88    fn prefix_bytes(&self) -> &'a [u8] {
89        &self.path.as_encoded_bytes()[..self.index]
90    }
91
92    fn finish(self) -> &'a OsStr {
93        // SAFETY: The unsafety here stems from converting between &OsStr and
94        // &[u8] and back. This is safe to do because (1) we only look at ASCII
95        // contents of the encoding and (2) new &OsStr values are produced only
96        // from ASCII-bounded slices of existing &OsStr values.
97        unsafe { OsStr::from_encoded_bytes_unchecked(&self.path.as_encoded_bytes()[self.index..]) }
98    }
99}
100
101pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {
102    use Prefix::{DeviceNS, Disk, UNC, Verbatim, VerbatimDisk, VerbatimUNC};
103
104    let parser = PrefixParser::<8>::new(path);
105    let parser = parser.as_slice();
106    if let Some(parser) = parser.strip_prefix(r"\\") {
107        // \\
108
109        // The meaning of verbatim paths can change when they use a different
110        // separator.
111        if let Some(parser) = parser.strip_prefix(r"?\")
112            && !parser.prefix_bytes().iter().any(|&x| x == b'/')
113        {
114            // \\?\
115            if let Some(parser) = parser.strip_prefix(r"UNC\") {
116                // \\?\UNC\server\share
117
118                let path = parser.finish();
119                let (server, path) = parse_next_component(path, true);
120                let (share, _) = parse_next_component(path, true);
121
122                Some(VerbatimUNC(server, share))
123            } else {
124                let path = parser.finish();
125
126                // in verbatim paths only recognize an exact drive prefix
127                if let Some(drive) = parse_drive_exact(path) {
128                    // \\?\C:
129                    Some(VerbatimDisk(drive))
130                } else {
131                    // \\?\prefix
132                    let (prefix, _) = parse_next_component(path, true);
133                    Some(Verbatim(prefix))
134                }
135            }
136        } else if let Some(parser) = parser.strip_prefix(r".\") {
137            // \\.\COM42
138            let path = parser.finish();
139            let (prefix, _) = parse_next_component(path, false);
140            Some(DeviceNS(prefix))
141        } else {
142            let path = parser.finish();
143            let (server, path) = parse_next_component(path, false);
144            let (share, _) = parse_next_component(path, false);
145
146            if !server.is_empty() && !share.is_empty() {
147                // \\server\share
148                Some(UNC(server, share))
149            } else {
150                // no valid prefix beginning with "\\" recognized
151                None
152            }
153        }
154    } else {
155        // If it has a drive like `C:` then it's a disk.
156        // Otherwise there is no prefix.
157        parse_drive(path).map(Disk)
158    }
159}
160
161// Parses a drive prefix, e.g. "C:" and "C:\whatever"
162fn parse_drive(path: &OsStr) -> Option<u8> {
163    // In most DOS systems, it is not possible to have more than 26 drive letters.
164    // See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
165    fn is_valid_drive_letter(drive: &u8) -> bool {
166        drive.is_ascii_alphabetic()
167    }
168
169    match path.as_encoded_bytes() {
170        [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()),
171        _ => None,
172    }
173}
174
175// Parses a drive prefix exactly, e.g. "C:"
176fn parse_drive_exact(path: &OsStr) -> Option<u8> {
177    // only parse two bytes: the drive letter and the drive separator
178    if path.as_encoded_bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) {
179        parse_drive(path)
180    } else {
181        None
182    }
183}
184
185// Parse the next path component.
186//
187// Returns the next component and the rest of the path excluding the component and separator.
188// Does not recognize `/` as a separator character if `verbatim` is true.
189fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) {
190    let separator = if verbatim { is_verbatim_sep } else { is_sep_byte };
191
192    match path.as_encoded_bytes().iter().position(|&x| separator(x)) {
193        Some(separator_start) => {
194            let separator_end = separator_start + 1;
195
196            let component = &path.as_encoded_bytes()[..separator_start];
197
198            // Panic safe
199            // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index.
200            let path = &path.as_encoded_bytes()[separator_end..];
201
202            // SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\')
203            // is encoded in a single byte, therefore `bytes[separator_start]` and
204            // `bytes[separator_end]` must be code point boundaries and thus
205            // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices.
206            unsafe {
207                (
208                    OsStr::from_encoded_bytes_unchecked(component),
209                    OsStr::from_encoded_bytes_unchecked(path),
210                )
211            }
212        }
213        None => (path, OsStr::new("")),
214    }
215}
216
217/// Returns a UTF-16 encoded path capable of bypassing the legacy `MAX_PATH` limits.
218///
219/// This path may or may not have a verbatim prefix.
220pub(crate) fn maybe_verbatim(path: &Path) -> io::Result<Vec<u16>> {
221    let path = to_u16s(path)?;
222    get_long_path(path, true)
223}
224
225/// Gets a normalized absolute path that can bypass path length limits.
226///
227/// Setting prefer_verbatim to true suggests a stronger preference for verbatim
228/// paths even when not strictly necessary. This allows the Windows API to avoid
229/// repeating our work. However, if the path may be given back to users or
230/// passed to other application then it's preferable to use non-verbatim paths
231/// when possible. Non-verbatim paths are better understood by users and handled
232/// by more software.
233pub(crate) fn get_long_path(mut path: Vec<u16>, prefer_verbatim: bool) -> io::Result<Vec<u16>> {
234    // Normally the MAX_PATH is 260 UTF-16 code units (including the NULL).
235    // However, for APIs such as CreateDirectory[1], the limit is 248.
236    //
237    // [1]: https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createdirectorya#parameters
238    const LEGACY_MAX_PATH: usize = 248;
239    // UTF-16 encoded code points, used in parsing and building UTF-16 paths.
240    // All of these are in the ASCII range so they can be cast directly to `u16`.
241    const SEP: u16 = b'\\' as _;
242    const ALT_SEP: u16 = b'/' as _;
243    const QUERY: u16 = b'?' as _;
244    const COLON: u16 = b':' as _;
245    const DOT: u16 = b'.' as _;
246    const U: u16 = b'U' as _;
247    const N: u16 = b'N' as _;
248    const C: u16 = b'C' as _;
249
250    // \\?\
251    const VERBATIM_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP];
252    // \??\
253    const NT_PREFIX: &[u16] = &[SEP, QUERY, QUERY, SEP];
254    // \\?\UNC\
255    const UNC_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP, U, N, C, SEP];
256
257    if path.starts_with(VERBATIM_PREFIX) || path.starts_with(NT_PREFIX) || path == [0] {
258        // Early return for paths that are already verbatim or empty.
259        return Ok(path);
260    } else if path.len() < LEGACY_MAX_PATH {
261        // Early return if an absolute path is less < 260 UTF-16 code units.
262        // This is an optimization to avoid calling `GetFullPathNameW` unnecessarily.
263        match path.as_slice() {
264            // Starts with `D:`, `D:\`, `D:/`, etc.
265            // Does not match if the path starts with a `\` or `/`.
266            [drive, COLON, 0] | [drive, COLON, SEP | ALT_SEP, ..]
267                if *drive != SEP && *drive != ALT_SEP =>
268            {
269                return Ok(path);
270            }
271            // Starts with `\\`, `//`, etc
272            [SEP | ALT_SEP, SEP | ALT_SEP, ..] => return Ok(path),
273            _ => {}
274        }
275    }
276
277    // Firstly, get the absolute path using `GetFullPathNameW`.
278    // https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew
279    let lpfilename = path.as_ptr();
280    fill_utf16_buf(
281        // SAFETY: `fill_utf16_buf` ensures the `buffer` and `size` are valid.
282        // `lpfilename` is a pointer to a null terminated string that is not
283        // invalidated until after `GetFullPathNameW` returns successfully.
284        |buffer, size| unsafe { c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut()) },
285        |mut absolute| {
286            path.clear();
287
288            // Only prepend the prefix if needed.
289            if prefer_verbatim || absolute.len() + 1 >= LEGACY_MAX_PATH {
290                // Secondly, add the verbatim prefix. This is easier here because we know the
291                // path is now absolute and fully normalized (e.g. `/` has been changed to `\`).
292                let prefix = match absolute {
293                    // C:\ => \\?\C:\
294                    [_, COLON, SEP, ..] => VERBATIM_PREFIX,
295                    // \\.\ => \\?\
296                    [SEP, SEP, DOT, SEP, ..] => {
297                        absolute = &absolute[4..];
298                        VERBATIM_PREFIX
299                    }
300                    // Leave \\?\ and \??\ as-is.
301                    [SEP, SEP, QUERY, SEP, ..] | [SEP, QUERY, QUERY, SEP, ..] => &[],
302                    // \\ => \\?\UNC\
303                    [SEP, SEP, ..] => {
304                        absolute = &absolute[2..];
305                        UNC_PREFIX
306                    }
307                    // Anything else we leave alone.
308                    _ => &[],
309                };
310
311                path.reserve_exact(prefix.len() + absolute.len() + 1);
312                path.extend_from_slice(prefix);
313            } else {
314                path.reserve_exact(absolute.len() + 1);
315            }
316            path.extend_from_slice(absolute);
317            path.push(0);
318        },
319    )?;
320    Ok(path)
321}
322
323/// Make a Windows path absolute.
324pub(crate) fn absolute(path: &Path) -> io::Result<PathBuf> {
325    let path = path.as_os_str();
326    let prefix = parse_prefix(path);
327    // Verbatim paths should not be modified.
328    if prefix.map(|x| x.is_verbatim()).unwrap_or(false) {
329        // NULs in verbatim paths are rejected for consistency.
330        if path.as_encoded_bytes().contains(&0) {
331            return Err(io::const_error!(
332                io::ErrorKind::InvalidInput,
333                "strings passed to WinAPI cannot contain NULs",
334            ));
335        }
336        return Ok(path.to_owned().into());
337    }
338
339    let path = to_u16s(path)?;
340    let lpfilename = path.as_ptr();
341    fill_utf16_buf(
342        // SAFETY: `fill_utf16_buf` ensures the `buffer` and `size` are valid.
343        // `lpfilename` is a pointer to a null terminated string that is not
344        // invalidated until after `GetFullPathNameW` returns successfully.
345        |buffer, size| unsafe { c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut()) },
346        os2path,
347    )
348}
349
350pub(crate) fn is_absolute(path: &Path) -> bool {
351    path.has_root() && path.prefix().is_some()
352}