diff --git a/lib/std/c/darwin.zig b/lib/std/c/darwin.zig index 0dfe7a2500b0..17a4957e94d1 100644 --- a/lib/std/c/darwin.zig +++ b/lib/std/c/darwin.zig @@ -1014,6 +1014,7 @@ pub const vm_machine_attribute_val_t = isize; pub const CALENDAR_CLOCK = 1; pub const PATH_MAX = 1024; +pub const NAME_MAX = 255; pub const IOV_MAX = 16; pub const STDIN_FILENO = 0; diff --git a/lib/std/c/dragonfly.zig b/lib/std/c/dragonfly.zig index 35436d7017a2..ddc0db270983 100644 --- a/lib/std/c/dragonfly.zig +++ b/lib/std/c/dragonfly.zig @@ -234,6 +234,7 @@ pub const SA = struct { }; pub const PATH_MAX = 1024; +pub const NAME_MAX = 255; pub const IOV_MAX = KERN.IOV_MAX; pub const ino_t = c_ulong; diff --git a/lib/std/c/haiku.zig b/lib/std/c/haiku.zig index ba7e55ccb1a6..30ab3542b988 100644 --- a/lib/std/c/haiku.zig +++ b/lib/std/c/haiku.zig @@ -266,6 +266,7 @@ pub const area_info = extern struct { }; pub const MAXPATHLEN = PATH_MAX; +pub const MAXNAMLEN = NAME_MAX; pub const image_info = extern struct { id: u32, @@ -371,6 +372,9 @@ pub const KERN = struct {}; pub const IOV_MAX = 1024; pub const PATH_MAX = 1024; +/// NOTE: Contains room for the terminating null character (despite the POSIX +/// definition saying that NAME_MAX does not include the terminating null). +pub const NAME_MAX = 256; // limits.h pub const STDIN_FILENO = 0; pub const STDOUT_FILENO = 1; diff --git a/lib/std/fs.zig b/lib/std/fs.zig index dfadb144eb09..8dfd8119302d 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -48,6 +48,30 @@ pub const MAX_PATH_BYTES = switch (builtin.os.tag) { @compileError("PATH_MAX not implemented for " ++ @tagName(builtin.os.tag)), }; +/// This represents the maximum size of a UTF-8 encoded file name component that +/// the platform's common file systems support. File name components returned by file system +/// operations are likely to fit into a UTF-8 encoded array of this length, but +/// (depending on the platform) this assumption may not hold for every configuration. +/// The byte count does not include a null sentinel byte. +pub const MAX_NAME_BYTES = switch (builtin.os.tag) { + .linux, .macos, .ios, .freebsd, .dragonfly => os.NAME_MAX, + // Haiku's NAME_MAX includes the null terminator, so subtract one. + .haiku => os.NAME_MAX - 1, + .netbsd, .openbsd, .solaris => os.MAXNAMLEN, + // Each UTF-16LE character may be expanded to 3 UTF-8 bytes. + // If it would require 4 UTF-8 bytes, then there would be a surrogate + // pair in the UTF-16LE, and we (over)account 3 bytes for it that way. + .windows => os.windows.NAME_MAX * 3, + // For WASI, the MAX_NAME will depend on the host OS, so it needs to be + // as large as the largest MAX_NAME_BYTES (Windows) in order to work on any host OS. + // TODO determine if this is a reasonable approach + .wasi => os.windows.NAME_MAX * 3, + else => if (@hasDecl(root, "os") and @hasDecl(root.os, "NAME_MAX")) + root.os.NAME_MAX + else + @compileError("NAME_MAX not implemented for " ++ @tagName(builtin.os.tag)), +}; + pub const base64_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_".*; /// Base64 encoder, replacing the standard `+/` with `-_` so that it can be used in a file name on any filesystem. @@ -680,7 +704,7 @@ pub const IterableDir = struct { index: usize, end_index: usize, first_iter: bool, - name_data: [256]u8, + name_data: [MAX_NAME_BYTES]u8, const Self = @This(); diff --git a/lib/std/fs/test.zig b/lib/std/fs/test.zig index 028110ff9c33..f6168054b64e 100644 --- a/lib/std/fs/test.zig +++ b/lib/std/fs/test.zig @@ -703,6 +703,50 @@ test "makePath in a directory that no longer exists" { try testing.expectError(error.FileNotFound, tmp.dir.makePath("sub-path")); } +fn testFilenameLimits(iterable_dir: IterableDir, maxed_filename: []const u8) !void { + // setup, create a dir and a nested file both with maxed filenames, and walk the dir + { + var maxed_dir = try iterable_dir.dir.makeOpenPath(maxed_filename, .{}); + defer maxed_dir.close(); + + try maxed_dir.writeFile(maxed_filename, ""); + + var walker = try iterable_dir.walk(testing.allocator); + defer walker.deinit(); + + var count: usize = 0; + while (try walker.next()) |entry| { + try testing.expectEqualStrings(maxed_filename, entry.basename); + count += 1; + } + try testing.expectEqual(@as(usize, 2), count); + } + + // ensure that we can delete the tree + try iterable_dir.dir.deleteTree(maxed_filename); +} + +test "max file name component lengths" { + var tmp = tmpIterableDir(.{}); + defer tmp.cleanup(); + + if (builtin.os.tag == .windows) { + // € is the character with the largest codepoint that is encoded as a single u16 in UTF-16, + // so Windows allows for NAME_MAX of them + const maxed_windows_filename = ("€".*) ** std.os.windows.NAME_MAX; + try testFilenameLimits(tmp.iterable_dir, &maxed_windows_filename); + } else if (builtin.os.tag == .wasi) { + // On WASI, the maxed filename depends on the host OS, so in order for this test to + // work on any host, we need to use a length that will work for all platforms + // (i.e. the minimum MAX_NAME_BYTES of all supported platforms). + const maxed_wasi_filename = [_]u8{'1'} ** 255; + try testFilenameLimits(tmp.iterable_dir, &maxed_wasi_filename); + } else { + const maxed_ascii_filename = [_]u8{'1'} ** std.fs.MAX_NAME_BYTES; + try testFilenameLimits(tmp.iterable_dir, &maxed_ascii_filename); + } +} + test "writev, readv" { var tmp = tmpDir(.{}); defer tmp.cleanup(); diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index d68a66ed872c..71dfc70d3773 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -2977,6 +2977,24 @@ pub const PMEMORY_BASIC_INFORMATION = *MEMORY_BASIC_INFORMATION; /// from https://docs.microsoft.com/en-us/windows/desktop/FileIO/naming-a-file#maximum-path-length-limitation pub const PATH_MAX_WIDE = 32767; +/// > [Each file name component can be] up to the value returned in the +/// > lpMaximumComponentLength parameter of the GetVolumeInformation function +/// > (this value is commonly 255 characters) +/// from https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation +/// +/// > The value that is stored in the variable that *lpMaximumComponentLength points to is +/// > used to indicate that a specified file system supports long names. For example, for +/// > a FAT file system that supports long names, the function stores the value 255, rather +/// > than the previous 8.3 indicator. Long names can also be supported on systems that use +/// > the NTFS file system. +/// from https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getvolumeinformationw +/// +/// The assumption being made here is that while lpMaximumComponentLength may vary, it will never +/// be larger than 255. +/// +/// TODO: More verification of this assumption. +pub const NAME_MAX = 255; + pub const FORMAT_MESSAGE_ALLOCATE_BUFFER = 0x00000100; pub const FORMAT_MESSAGE_ARGUMENT_ARRAY = 0x00002000; pub const FORMAT_MESSAGE_FROM_HMODULE = 0x00000800;