Skip to content

Commit 209a0d2

Browse files
authored
Merge pull request #13153 from squeek502/iterator-filename-limits
Windows: Fix Iterator name buffer size not handling all possible file name components
2 parents 5f5a20e + db80225 commit 209a0d2

File tree

6 files changed

+93
-1
lines changed

6 files changed

+93
-1
lines changed

lib/std/c/darwin.zig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,6 +1014,7 @@ pub const vm_machine_attribute_val_t = isize;
10141014
pub const CALENDAR_CLOCK = 1;
10151015

10161016
pub const PATH_MAX = 1024;
1017+
pub const NAME_MAX = 255;
10171018
pub const IOV_MAX = 16;
10181019

10191020
pub const STDIN_FILENO = 0;

lib/std/c/dragonfly.zig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ pub const SA = struct {
234234
};
235235

236236
pub const PATH_MAX = 1024;
237+
pub const NAME_MAX = 255;
237238
pub const IOV_MAX = KERN.IOV_MAX;
238239

239240
pub const ino_t = c_ulong;

lib/std/c/haiku.zig

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ pub const area_info = extern struct {
266266
};
267267

268268
pub const MAXPATHLEN = PATH_MAX;
269+
pub const MAXNAMLEN = NAME_MAX;
269270

270271
pub const image_info = extern struct {
271272
id: u32,
@@ -371,6 +372,9 @@ pub const KERN = struct {};
371372
pub const IOV_MAX = 1024;
372373

373374
pub const PATH_MAX = 1024;
375+
/// NOTE: Contains room for the terminating null character (despite the POSIX
376+
/// definition saying that NAME_MAX does not include the terminating null).
377+
pub const NAME_MAX = 256; // limits.h
374378

375379
pub const STDIN_FILENO = 0;
376380
pub const STDOUT_FILENO = 1;

lib/std/fs.zig

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,30 @@ pub const MAX_PATH_BYTES = switch (builtin.os.tag) {
4848
@compileError("PATH_MAX not implemented for " ++ @tagName(builtin.os.tag)),
4949
};
5050

51+
/// This represents the maximum size of a UTF-8 encoded file name component that
52+
/// the platform's common file systems support. File name components returned by file system
53+
/// operations are likely to fit into a UTF-8 encoded array of this length, but
54+
/// (depending on the platform) this assumption may not hold for every configuration.
55+
/// The byte count does not include a null sentinel byte.
56+
pub const MAX_NAME_BYTES = switch (builtin.os.tag) {
57+
.linux, .macos, .ios, .freebsd, .dragonfly => os.NAME_MAX,
58+
// Haiku's NAME_MAX includes the null terminator, so subtract one.
59+
.haiku => os.NAME_MAX - 1,
60+
.netbsd, .openbsd, .solaris => os.MAXNAMLEN,
61+
// Each UTF-16LE character may be expanded to 3 UTF-8 bytes.
62+
// If it would require 4 UTF-8 bytes, then there would be a surrogate
63+
// pair in the UTF-16LE, and we (over)account 3 bytes for it that way.
64+
.windows => os.windows.NAME_MAX * 3,
65+
// For WASI, the MAX_NAME will depend on the host OS, so it needs to be
66+
// as large as the largest MAX_NAME_BYTES (Windows) in order to work on any host OS.
67+
// TODO determine if this is a reasonable approach
68+
.wasi => os.windows.NAME_MAX * 3,
69+
else => if (@hasDecl(root, "os") and @hasDecl(root.os, "NAME_MAX"))
70+
root.os.NAME_MAX
71+
else
72+
@compileError("NAME_MAX not implemented for " ++ @tagName(builtin.os.tag)),
73+
};
74+
5175
pub const base64_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_".*;
5276

5377
/// Base64 encoder, replacing the standard `+/` with `-_` so that it can be used in a file name on any filesystem.
@@ -680,7 +704,7 @@ pub const IterableDir = struct {
680704
index: usize,
681705
end_index: usize,
682706
first_iter: bool,
683-
name_data: [256]u8,
707+
name_data: [MAX_NAME_BYTES]u8,
684708

685709
const Self = @This();
686710

lib/std/fs/test.zig

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -703,6 +703,50 @@ test "makePath in a directory that no longer exists" {
703703
try testing.expectError(error.FileNotFound, tmp.dir.makePath("sub-path"));
704704
}
705705

706+
fn testFilenameLimits(iterable_dir: IterableDir, maxed_filename: []const u8) !void {
707+
// setup, create a dir and a nested file both with maxed filenames, and walk the dir
708+
{
709+
var maxed_dir = try iterable_dir.dir.makeOpenPath(maxed_filename, .{});
710+
defer maxed_dir.close();
711+
712+
try maxed_dir.writeFile(maxed_filename, "");
713+
714+
var walker = try iterable_dir.walk(testing.allocator);
715+
defer walker.deinit();
716+
717+
var count: usize = 0;
718+
while (try walker.next()) |entry| {
719+
try testing.expectEqualStrings(maxed_filename, entry.basename);
720+
count += 1;
721+
}
722+
try testing.expectEqual(@as(usize, 2), count);
723+
}
724+
725+
// ensure that we can delete the tree
726+
try iterable_dir.dir.deleteTree(maxed_filename);
727+
}
728+
729+
test "max file name component lengths" {
730+
var tmp = tmpIterableDir(.{});
731+
defer tmp.cleanup();
732+
733+
if (builtin.os.tag == .windows) {
734+
// € is the character with the largest codepoint that is encoded as a single u16 in UTF-16,
735+
// so Windows allows for NAME_MAX of them
736+
const maxed_windows_filename = ("€".*) ** std.os.windows.NAME_MAX;
737+
try testFilenameLimits(tmp.iterable_dir, &maxed_windows_filename);
738+
} else if (builtin.os.tag == .wasi) {
739+
// On WASI, the maxed filename depends on the host OS, so in order for this test to
740+
// work on any host, we need to use a length that will work for all platforms
741+
// (i.e. the minimum MAX_NAME_BYTES of all supported platforms).
742+
const maxed_wasi_filename = [_]u8{'1'} ** 255;
743+
try testFilenameLimits(tmp.iterable_dir, &maxed_wasi_filename);
744+
} else {
745+
const maxed_ascii_filename = [_]u8{'1'} ** std.fs.MAX_NAME_BYTES;
746+
try testFilenameLimits(tmp.iterable_dir, &maxed_ascii_filename);
747+
}
748+
}
749+
706750
test "writev, readv" {
707751
var tmp = tmpDir(.{});
708752
defer tmp.cleanup();

lib/std/os/windows.zig

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2977,6 +2977,24 @@ pub const PMEMORY_BASIC_INFORMATION = *MEMORY_BASIC_INFORMATION;
29772977
/// from https://docs.microsoft.com/en-us/windows/desktop/FileIO/naming-a-file#maximum-path-length-limitation
29782978
pub const PATH_MAX_WIDE = 32767;
29792979

2980+
/// > [Each file name component can be] up to the value returned in the
2981+
/// > lpMaximumComponentLength parameter of the GetVolumeInformation function
2982+
/// > (this value is commonly 255 characters)
2983+
/// from https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation
2984+
///
2985+
/// > The value that is stored in the variable that *lpMaximumComponentLength points to is
2986+
/// > used to indicate that a specified file system supports long names. For example, for
2987+
/// > a FAT file system that supports long names, the function stores the value 255, rather
2988+
/// > than the previous 8.3 indicator. Long names can also be supported on systems that use
2989+
/// > the NTFS file system.
2990+
/// from https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getvolumeinformationw
2991+
///
2992+
/// The assumption being made here is that while lpMaximumComponentLength may vary, it will never
2993+
/// be larger than 255.
2994+
///
2995+
/// TODO: More verification of this assumption.
2996+
pub const NAME_MAX = 255;
2997+
29802998
pub const FORMAT_MESSAGE_ALLOCATE_BUFFER = 0x00000100;
29812999
pub const FORMAT_MESSAGE_ARGUMENT_ARRAY = 0x00002000;
29823000
pub const FORMAT_MESSAGE_FROM_HMODULE = 0x00000800;

0 commit comments

Comments
 (0)