From c24434c5801b66dcd5430046220f1559a27eb173 Mon Sep 17 00:00:00 2001 From: Nashwan Azhari Date: Wed, 27 Mar 2024 19:39:44 +0200 Subject: [PATCH 1/4] Add `split_windows_volume_prefix()` function. This patch adds a new public function for splitting Windows paths into its volume prefix and rest of the path. --- src/filepath.gleam | 119 +++++++++++++++++++++++++++++++++ test/filepath_test.gleam | 141 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 260 insertions(+) diff --git a/src/filepath.gleam b/src/filepath.gleam index 3dae367..0dd25e9 100644 --- a/src/filepath.gleam +++ b/src/filepath.gleam @@ -167,6 +167,125 @@ fn pop_windows_drive_specifier(path: String) -> #(Option(String), String) { } } +/// Splits the Windows volume prefix from a given Windows path, +/// returning a tuple of two Strings with the value of the volume +/// prefix (if any) first, and the rest of the path (if any) second. +/// +/// Works with paths featuring `/`, `\`, or both, as long as the +/// volume prefix uses the same one consistently. +/// The orientation of the slashes in the volume prefix and the rest +/// of the path is preserved in the resulting tuple elements. +/// The separator between the prefix and the rest of the path is discarded. +/// +/// Full details on possible volume prefix syntax can be found at: +/// https://learn.microsoft.com/en-us/dotnet/standard/io/file-path-formats +/// https://googleprojectzero.blogspot.com/2016/02/the-definitive-guide-on-win32-to-nt.html +/// +/// ## Examples +/// +/// ```gleam +/// // Normal drive-lettered absolute path with either slashes or backslashes: +/// split_windows_volume_prefix("C:\\Users\\Administrator\\AppData") +/// // -> #("C:", "Users\\Administrator\\AppData") +/// ``` +/// +/// ```gleam +/// // DOS Local Device ("//./DEV/..."): +/// split_windows_volume_prefix("//./pipe/testpipe") +/// // -> #("//./pipe", "testpipe") +/// ``` +/// +/// ```gleam +/// // DOS Root Local Device ("//?/DEV/./..."): +/// split_windows_volume_prefix("//?/C:/Users/Administrator") +/// // -> #("//?/C:", "Users/Administrator") +/// ``` +/// +/// ```gleam +/// // UNC paths will include the IP/hostname and sharename portions: +/// split_windows_volume_prefix("//DESKTOP-123/MyShare/subdir/file.txt") +/// // -> #("//DESKTOP-123/MyShare", "subdir/file.txt") +/// ``` +/// +pub fn split_windows_volume_prefix(path path: String) -> #(String, String) { + case path { + // NOTE: DOS device paths may include ":" too, so we must match + // for them before matching for regular drives: + // DOS device paths: + "//." as start <> rest | "//?" as start <> rest -> { + split_rest_once(start, "/", rest) + } + "\\\\." as start <> rest | "\\\\?" as start <> rest -> { + split_rest_once(start, "\\", rest) + } + + // UNC paths where both the IP/hostname and share/drive name count + // as part of the volume prefix: + "//" as start <> rest -> { + split_rest_twice(start, "/", rest) + } + "\\\\" as start <> rest -> { + split_rest_twice(start, "\\", rest) + } + + // Check for normal absolute paths and drive-relative paths: + _ -> case string.split_once(path, on: ":") { + Ok(#(precolon, postcolon)) -> { + case precolon { + // The colon is the first character in the string + // so there is no drive to speak of: + "" -> #("", ":" <> postcolon) + + precolon -> case postcolon { + "/" <> rest -> #(precolon <> ":", rest) + "\\" <> rest -> #(precolon <> ":", rest) + // Path is a current-drive-relative path: + _ -> #(precolon <> ":", postcolon) + } + } + } + // Path has no colon and is likely a relative or absolute path: + Error(_) -> #("", path) + } + } +} + +// Helper function to extract one more path element from the `rest` of the +// path and form the final result for `split_windows_volume_prefix`. +fn split_rest_once(start: String, sep: String, rest: String) -> #(String, String) { + case string.split_once(rest, on: sep) { + Ok(#(drive, rest2)) -> { + case drive { + // The `rest` started with multiple redundant separators, + // which is acceptable, and we must recurse: + // eg: //./////pipe/testpipe + "" -> split_rest_once(start <> sep, sep, rest2) + _ -> #(start <> drive, rest2) + } + } + Error(_) -> case rest { + "" -> #("", start <> rest) + // NOTE: if the `rest` wasn't initially empty, it counts + // even if it doesn't have any `sep` in it: + _ -> #(start <> rest, "") + } + } +} + +// Helper function to extract two more path elements from the `rest` of the +// path and form the final result for `split_windows_volume_prefix`. +fn split_rest_twice(start: String, sep: String, rest: String) -> #(String, String) { + case split_rest_once(start, sep, rest) { + #("", _) -> #("", start <> rest) + // Avoid extraneous call to `split_rest_once` with the added separator + // if the `rest` is already empty after the first split: + #(_, "") -> #("", start <> rest) + #(drive1, rest1) -> { + split_rest_once(drive1 <> sep, sep, rest1) + } + } +} + /// Get the file extension of a path. /// /// ## Examples diff --git a/test/filepath_test.gleam b/test/filepath_test.gleam index 475c97b..2ff336c 100644 --- a/test/filepath_test.gleam +++ b/test/filepath_test.gleam @@ -1,3 +1,6 @@ +import gleam/list +import gleam/string + import filepath import gleeunit import gleeunit/should @@ -107,6 +110,144 @@ pub fn split_windows_6_test() { |> should.equal(["::", "one", "two"]) } +pub fn split_windows_volume_prefix_multi_test() { + let testfn = fn(testcase: #(String, #(String, String))) { + let #(path, expected_split) = testcase + + // Run test case as provided: + filepath.split_windows_volume_prefix(path) + |> should.equal(expected_split) + + // Invert path separators in test case and expected and re-test: + let #(current_separator, other_separator) = case string.contains(path, "/") { + True -> #("/", "\\") + False -> #("\\", "/") + } + let invert_separator_char = fn(c) { + case c { + c if c == current_separator -> other_separator + c if c == other_separator -> current_separator + c -> c + } + } + let invert_separators = fn(s) { + s + |> string.to_graphemes + |> list.map(invert_separator_char) + |> string.join("") + } + + let #(expected_volume, expected_rest) = expected_split + path + |> invert_separators + |> filepath.split_windows_volume_prefix + |> should.equal( + #(invert_separators(expected_volume), + invert_separators(expected_rest))) + } + + let testcases: List(#(String, #(String, String))) = [ + // Unix paths: + #("/", #("", "/")), + #("/usr/local/bin", #("", "/usr/local/bin")), + + // Base Windows cases: + #("", #("", "")), + #("/", #("", "/")), + #("\\", #("", "\\")), + #("file", #("", "file")), + #("dir1/dir2/file.txt", #("", "dir1/dir2/file.txt")), + #("::/one/two", #("", "::/one/two")), + #("::\\one\\two", #("", "::\\one\\two")), + #("C:", #("C:", "")), + #("c:", #("c:", "")), + #("C:/", #("C:", "")), + #("c:\\", #("c:", "")), + #("C:/one/two", #("C:", "one/two")), + #("c:/one/two", #("c:", "one/two")), + #("C:\\one\\two", #("C:", "one\\two")), + #("c:\\one\\two", #("c:", "one\\two")), + #("C:\\one\\two/three", #("C:", "one\\two/three")), + #("c:/one/two\\three", #("c:", "one/two\\three")), + + // Current-drive absolute paths: + #("/dir1/dir2/file.txt", #("", "/dir1/dir2/file.txt")), + #("/dir1/dir2\\file.txt", #("", "/dir1/dir2\\file.txt")), + #("\\dir1\\dir2\\file.txt", #("", "\\dir1\\dir2\\file.txt")), + + // Drive-relative paths: + #("C:dir1/dir2/file.txt", #("C:", "dir1/dir2/file.txt")), + #("C:dir1/dir2\\file.txt", #("C:", "dir1/dir2\\file.txt")), + #("C:dir1\\dir2\\file.txt", #("C:", "dir1\\dir2\\file.txt")), + + // Specialized Windows paths: + #("HKLM:", #("HKLM:", "")), + #("HKLM:/", #("HKLM:", "")), + #("//./pipe", #("//./pipe", "")), + #("//./pipe/", #("//./pipe", "")), + #("//./pipe/testpipe", #("//./pipe", "testpipe")), + #( + "HKLM:/SOFTWARE/Microsoft/Windows/CurrentVersion", + #("HKLM:", "SOFTWARE/Microsoft/Windows/CurrentVersion") + ), + #( + "//./Volume{b75e2c83-0000-0000-0000-602f00000000}/Test/Foo.txt", + #("//./Volume{b75e2c83-0000-0000-0000-602f00000000}", "Test/Foo.txt") + ), + #( + "//LOCALHOST/c$/temp/test-file.txt", + #("//LOCALHOST/c$", "temp/test-file.txt") + ), + #( + "//./c:/temp/test-file.txt", + #("//./c:", "temp/test-file.txt") + ), + #( + "//?/c:/temp/test-file.txt", + #("//?/c:", "temp/test-file.txt") + ), + #( + "//./UNC/LOCALHOST/c$/temp/test-file.txt", + #("//./UNC", "LOCALHOST/c$/temp/test-file.txt") + ), + #( + "//?/UNC/LOCALHOST/c$/temp/test-file.txt", + #("//?/UNC", "LOCALHOST/c$/temp/test-file.txt") + ), + #( + "//127.0.0.1/c$/temp/test-file.txt", + #("//127.0.0.1/c$", "temp/test-file.txt") + ), + #( + "//DESKTOP-123/MyShare/subdir/file.txt", + #("//DESKTOP-123/MyShare", "subdir/file.txt") + ), + + // Incomplete special paths which are interpreted as current-drive-relative: + #("//", #("", "//")), + #("//.", #("", "//.")), + #("//./", #("", "//./")), + + // Incomplete special paths: + #("//?", #("", "//?")), + #("//?/", #("", "//?/")), + #("//.///", #("", "//.///")), + #("//?///", #("", "//?///")), + #("//127.0.0.1", #("", "//127.0.0.1")), + #("//127.0.0.1/", #("", "//127.0.0.1/")), + + // Redundant slashes in special volume paths: + #("//./////pipe///testpipe", #("//./////pipe", "//testpipe")), + #("//?///////pipe///testpipe", #("//?///////pipe", "//testpipe")), + #( + "//127.0.0.1/////c$/temp/test-file.txt", + #("//127.0.0.1/////c$", "temp/test-file.txt") + ), + ] + + list.map(testcases, testfn) +} + pub fn join_0_test() { filepath.join("/one", "two") |> should.equal("/one/two") From b6ca38ed8550d3fdd5692dcc64564a4b1c263b3b Mon Sep 17 00:00:00 2001 From: Nashwan Azhari Date: Mon, 1 Apr 2024 19:58:30 +0300 Subject: [PATCH 2/4] Apply `gleam format` on `split_windows_volume_prefix()` updates. --- src/filepath.gleam | 137 +++++++++++---------- test/filepath_test.gleam | 257 +++++++++++++++++++-------------------- 2 files changed, 197 insertions(+), 197 deletions(-) diff --git a/src/filepath.gleam b/src/filepath.gleam index 0dd25e9..4d47de9 100644 --- a/src/filepath.gleam +++ b/src/filepath.gleam @@ -208,82 +208,93 @@ fn pop_windows_drive_specifier(path: String) -> #(Option(String), String) { /// ``` /// pub fn split_windows_volume_prefix(path path: String) -> #(String, String) { - case path { - // NOTE: DOS device paths may include ":" too, so we must match - // for them before matching for regular drives: - // DOS device paths: - "//." as start <> rest | "//?" as start <> rest -> { - split_rest_once(start, "/", rest) - } - "\\\\." as start <> rest | "\\\\?" as start <> rest -> { - split_rest_once(start, "\\", rest) - } + case path { + // NOTE: DOS device paths may include ":" too, so we must match + // for them before matching for regular drives: + // DOS device paths: + "//." as start <> rest | "//?" as start <> rest -> { + split_rest_once(start, "/", rest) + } + "\\\\." as start <> rest | "\\\\?" as start <> rest -> { + split_rest_once(start, "\\", rest) + } - // UNC paths where both the IP/hostname and share/drive name count - // as part of the volume prefix: - "//" as start <> rest -> { - split_rest_twice(start, "/", rest) - } - "\\\\" as start <> rest -> { - split_rest_twice(start, "\\", rest) - } + // UNC paths where both the IP/hostname and share/drive name count + // as part of the volume prefix: + "//" as start <> rest -> { + split_rest_twice(start, "/", rest) + } + "\\\\" as start <> rest -> { + split_rest_twice(start, "\\", rest) + } - // Check for normal absolute paths and drive-relative paths: - _ -> case string.split_once(path, on: ":") { - Ok(#(precolon, postcolon)) -> { - case precolon { - // The colon is the first character in the string - // so there is no drive to speak of: - "" -> #("", ":" <> postcolon) - - precolon -> case postcolon { - "/" <> rest -> #(precolon <> ":", rest) - "\\" <> rest -> #(precolon <> ":", rest) - // Path is a current-drive-relative path: - _ -> #(precolon <> ":", postcolon) - } - } - } - // Path has no colon and is likely a relative or absolute path: - Error(_) -> #("", path) + // Check for normal absolute paths and drive-relative paths: + _ -> + case string.split_once(path, on: ":") { + Ok(#(precolon, postcolon)) -> { + case precolon { + // The colon is the first character in the string + // so there is no drive to speak of: + "" -> #("", ":" <> postcolon) + + precolon -> + case postcolon { + "/" <> rest -> #(precolon <> ":", rest) + "\\" <> rest -> #(precolon <> ":", rest) + // Path is a current-drive-relative path: + _ -> #(precolon <> ":", postcolon) + } + } } - } + // Path has no colon and is likely a relative or absolute path: + Error(_) -> #("", path) + } + } } // Helper function to extract one more path element from the `rest` of the // path and form the final result for `split_windows_volume_prefix`. -fn split_rest_once(start: String, sep: String, rest: String) -> #(String, String) { - case string.split_once(rest, on: sep) { - Ok(#(drive, rest2)) -> { - case drive { - // The `rest` started with multiple redundant separators, - // which is acceptable, and we must recurse: - // eg: //./////pipe/testpipe - "" -> split_rest_once(start <> sep, sep, rest2) - _ -> #(start <> drive, rest2) - } - } - Error(_) -> case rest { - "" -> #("", start <> rest) - // NOTE: if the `rest` wasn't initially empty, it counts - // even if it doesn't have any `sep` in it: - _ -> #(start <> rest, "") - } +fn split_rest_once( + start: String, + sep: String, + rest: String, +) -> #(String, String) { + case string.split_once(rest, on: sep) { + Ok(#(drive, rest2)) -> { + case drive { + // The `rest` started with multiple redundant separators, + // which is acceptable, and we must recurse: + // eg: //./////pipe/testpipe + "" -> split_rest_once(start <> sep, sep, rest2) + _ -> #(start <> drive, rest2) + } } + Error(_) -> + case rest { + "" -> #("", start <> rest) + // NOTE: if the `rest` wasn't initially empty, it counts + // even if it doesn't have any `sep` in it: + _ -> #(start <> rest, "") + } + } } // Helper function to extract two more path elements from the `rest` of the // path and form the final result for `split_windows_volume_prefix`. -fn split_rest_twice(start: String, sep: String, rest: String) -> #(String, String) { - case split_rest_once(start, sep, rest) { - #("", _) -> #("", start <> rest) - // Avoid extraneous call to `split_rest_once` with the added separator - // if the `rest` is already empty after the first split: - #(_, "") -> #("", start <> rest) - #(drive1, rest1) -> { - split_rest_once(drive1 <> sep, sep, rest1) - } +fn split_rest_twice( + start: String, + sep: String, + rest: String, +) -> #(String, String) { + case split_rest_once(start, sep, rest) { + #("", _) -> #("", start <> rest) + // Avoid extraneous call to `split_rest_once` with the added separator + // if the `rest` is already empty after the first split: + #(_, "") -> #("", start <> rest) + #(drive1, rest1) -> { + split_rest_once(drive1 <> sep, sep, rest1) } + } } /// Get the file extension of a path. diff --git a/test/filepath_test.gleam b/test/filepath_test.gleam index 2ff336c..86e3217 100644 --- a/test/filepath_test.gleam +++ b/test/filepath_test.gleam @@ -1,6 +1,5 @@ import gleam/list import gleam/string - import filepath import gleeunit import gleeunit/should @@ -111,141 +110,131 @@ pub fn split_windows_6_test() { } pub fn split_windows_volume_prefix_multi_test() { - let testfn = fn(testcase: #(String, #(String, String))) { - let #(path, expected_split) = testcase - - // Run test case as provided: - filepath.split_windows_volume_prefix(path) - |> should.equal(expected_split) - - // Invert path separators in test case and expected and re-test: - let #(current_separator, other_separator) = case string.contains(path, "/") { - True -> #("/", "\\") - False -> #("\\", "/") - } - let invert_separator_char = fn(c) { - case c { - c if c == current_separator -> other_separator - c if c == other_separator -> current_separator - c -> c - } - } - let invert_separators = fn(s) { - s - |> string.to_graphemes - |> list.map(invert_separator_char) - |> string.join("") - } - - let #(expected_volume, expected_rest) = expected_split - path - |> invert_separators - |> filepath.split_windows_volume_prefix - |> should.equal( - #(invert_separators(expected_volume), - invert_separators(expected_rest))) + let testfn = fn(testcase: #(String, #(String, String))) { + let #(path, expected_split) = testcase + + // Run test case as provided: + filepath.split_windows_volume_prefix(path) + |> should.equal(expected_split) + + // Invert path separators in test case and expected and re-test: + let #(current_separator, other_separator) = case + string.contains(path, "/") + { + True -> #("/", "\\") + False -> #("\\", "/") + } + let invert_separator_char = fn(c) { + case c { + c if c == current_separator -> other_separator + c if c == other_separator -> current_separator + c -> c + } + } + let invert_separators = fn(s) { + s + |> string.to_graphemes + |> list.map(invert_separator_char) + |> string.join("") } - let testcases: List(#(String, #(String, String))) = [ - // Unix paths: - #("/", #("", "/")), - #("/usr/local/bin", #("", "/usr/local/bin")), - - // Base Windows cases: - #("", #("", "")), - #("/", #("", "/")), - #("\\", #("", "\\")), - #("file", #("", "file")), - #("dir1/dir2/file.txt", #("", "dir1/dir2/file.txt")), - #("::/one/two", #("", "::/one/two")), - #("::\\one\\two", #("", "::\\one\\two")), - #("C:", #("C:", "")), - #("c:", #("c:", "")), - #("C:/", #("C:", "")), - #("c:\\", #("c:", "")), - #("C:/one/two", #("C:", "one/two")), - #("c:/one/two", #("c:", "one/two")), - #("C:\\one\\two", #("C:", "one\\two")), - #("c:\\one\\two", #("c:", "one\\two")), - #("C:\\one\\two/three", #("C:", "one\\two/three")), - #("c:/one/two\\three", #("c:", "one/two\\three")), - - // Current-drive absolute paths: - #("/dir1/dir2/file.txt", #("", "/dir1/dir2/file.txt")), - #("/dir1/dir2\\file.txt", #("", "/dir1/dir2\\file.txt")), - #("\\dir1\\dir2\\file.txt", #("", "\\dir1\\dir2\\file.txt")), - - // Drive-relative paths: - #("C:dir1/dir2/file.txt", #("C:", "dir1/dir2/file.txt")), - #("C:dir1/dir2\\file.txt", #("C:", "dir1/dir2\\file.txt")), - #("C:dir1\\dir2\\file.txt", #("C:", "dir1\\dir2\\file.txt")), - - // Specialized Windows paths: - #("HKLM:", #("HKLM:", "")), - #("HKLM:/", #("HKLM:", "")), - #("//./pipe", #("//./pipe", "")), - #("//./pipe/", #("//./pipe", "")), - #("//./pipe/testpipe", #("//./pipe", "testpipe")), - #( - "HKLM:/SOFTWARE/Microsoft/Windows/CurrentVersion", - #("HKLM:", "SOFTWARE/Microsoft/Windows/CurrentVersion") - ), - #( - "//./Volume{b75e2c83-0000-0000-0000-602f00000000}/Test/Foo.txt", - #("//./Volume{b75e2c83-0000-0000-0000-602f00000000}", "Test/Foo.txt") - ), - #( - "//LOCALHOST/c$/temp/test-file.txt", - #("//LOCALHOST/c$", "temp/test-file.txt") - ), - #( - "//./c:/temp/test-file.txt", - #("//./c:", "temp/test-file.txt") - ), - #( - "//?/c:/temp/test-file.txt", - #("//?/c:", "temp/test-file.txt") - ), - #( - "//./UNC/LOCALHOST/c$/temp/test-file.txt", - #("//./UNC", "LOCALHOST/c$/temp/test-file.txt") - ), - #( - "//?/UNC/LOCALHOST/c$/temp/test-file.txt", - #("//?/UNC", "LOCALHOST/c$/temp/test-file.txt") - ), - #( - "//127.0.0.1/c$/temp/test-file.txt", - #("//127.0.0.1/c$", "temp/test-file.txt") - ), - #( - "//DESKTOP-123/MyShare/subdir/file.txt", - #("//DESKTOP-123/MyShare", "subdir/file.txt") - ), - - // Incomplete special paths which are interpreted as current-drive-relative: - #("//", #("", "//")), - #("//.", #("", "//.")), - #("//./", #("", "//./")), - - // Incomplete special paths: - #("//?", #("", "//?")), - #("//?/", #("", "//?/")), - #("//.///", #("", "//.///")), - #("//?///", #("", "//?///")), - #("//127.0.0.1", #("", "//127.0.0.1")), - #("//127.0.0.1/", #("", "//127.0.0.1/")), - - // Redundant slashes in special volume paths: - #("//./////pipe///testpipe", #("//./////pipe", "//testpipe")), - #("//?///////pipe///testpipe", #("//?///////pipe", "//testpipe")), - #( - "//127.0.0.1/////c$/temp/test-file.txt", - #("//127.0.0.1/////c$", "temp/test-file.txt") - ), - ] - - list.map(testcases, testfn) + let #(expected_volume, expected_rest) = expected_split + path + |> invert_separators + |> filepath.split_windows_volume_prefix + |> should.equal(#( + invert_separators(expected_volume), + invert_separators(expected_rest), + )) + } + + let testcases: List(#(String, #(String, String))) = [ + // Unix paths: + #("/", #("", "/")), + #("/usr/local/bin", #("", "/usr/local/bin")), + // Base Windows cases: + #("", #("", "")), + #("/", #("", "/")), + #("\\", #("", "\\")), + #("file", #("", "file")), + #("dir1/dir2/file.txt", #("", "dir1/dir2/file.txt")), + #("::/one/two", #("", "::/one/two")), + #("::\\one\\two", #("", "::\\one\\two")), + #("C:", #("C:", "")), + #("c:", #("c:", "")), + #("C:/", #("C:", "")), + #("c:\\", #("c:", "")), + #("C:/one/two", #("C:", "one/two")), + #("c:/one/two", #("c:", "one/two")), + #("C:\\one\\two", #("C:", "one\\two")), + #("c:\\one\\two", #("c:", "one\\two")), + #("C:\\one\\two/three", #("C:", "one\\two/three")), + #("c:/one/two\\three", #("c:", "one/two\\three")), + // Current-drive absolute paths: + #("/dir1/dir2/file.txt", #("", "/dir1/dir2/file.txt")), + #("/dir1/dir2\\file.txt", #("", "/dir1/dir2\\file.txt")), + #("\\dir1\\dir2\\file.txt", #("", "\\dir1\\dir2\\file.txt")), + // Drive-relative paths: + #("C:dir1/dir2/file.txt", #("C:", "dir1/dir2/file.txt")), + #("C:dir1/dir2\\file.txt", #("C:", "dir1/dir2\\file.txt")), + #("C:dir1\\dir2\\file.txt", #("C:", "dir1\\dir2\\file.txt")), + // Specialized Windows paths: + #("HKLM:", #("HKLM:", "")), + #("HKLM:/", #("HKLM:", "")), + #("//./pipe", #("//./pipe", "")), + #("//./pipe/", #("//./pipe", "")), + #("//./pipe/testpipe", #("//./pipe", "testpipe")), + #("HKLM:/SOFTWARE/Microsoft/Windows/CurrentVersion", #( + "HKLM:", + "SOFTWARE/Microsoft/Windows/CurrentVersion", + )), + #("//./Volume{b75e2c83-0000-0000-0000-602f00000000}/Test/Foo.txt", #( + "//./Volume{b75e2c83-0000-0000-0000-602f00000000}", + "Test/Foo.txt", + )), + #("//LOCALHOST/c$/temp/test-file.txt", #( + "//LOCALHOST/c$", + "temp/test-file.txt", + )), + #("//./c:/temp/test-file.txt", #("//./c:", "temp/test-file.txt")), + #("//?/c:/temp/test-file.txt", #("//?/c:", "temp/test-file.txt")), + #("//./UNC/LOCALHOST/c$/temp/test-file.txt", #( + "//./UNC", + "LOCALHOST/c$/temp/test-file.txt", + )), + #("//?/UNC/LOCALHOST/c$/temp/test-file.txt", #( + "//?/UNC", + "LOCALHOST/c$/temp/test-file.txt", + )), + #("//127.0.0.1/c$/temp/test-file.txt", #( + "//127.0.0.1/c$", + "temp/test-file.txt", + )), + #("//DESKTOP-123/MyShare/subdir/file.txt", #( + "//DESKTOP-123/MyShare", + "subdir/file.txt", + )), + // Incomplete special paths which are interpreted as current-drive-relative: + #("//", #("", "//")), + #("//.", #("", "//.")), + #("//./", #("", "//./")), + // Incomplete special paths: + #("//?", #("", "//?")), + #("//?/", #("", "//?/")), + #("//.///", #("", "//.///")), + #("//?///", #("", "//?///")), + #("//127.0.0.1", #("", "//127.0.0.1")), + #("//127.0.0.1/", #("", "//127.0.0.1/")), + // Redundant slashes in special volume paths: + #("//./////pipe///testpipe", #("//./////pipe", "//testpipe")), + #("//?///////pipe///testpipe", #("//?///////pipe", "//testpipe")), + #("//127.0.0.1/////c$/temp/test-file.txt", #( + "//127.0.0.1/////c$", + "temp/test-file.txt", + )), + ] + + list.map(testcases, testfn) } pub fn join_0_test() { From f9125e1bb43d4e2965f0f35613e9e07c22834488 Mon Sep 17 00:00:00 2001 From: Nashwan Azhari Date: Thu, 28 Mar 2024 23:24:42 +0200 Subject: [PATCH 3/4] Switch `split_windows()` to use `split_windows_volume_prefix()`. Note this introduces the breaking change of `split_windows()` and `split()` no longer lowercasing the drive letter, nor keeping a trailing slash in the first element representing the drive. --- src/filepath.gleam | 55 +++++++++++----------------------------- test/filepath_test.gleam | 4 +-- 2 files changed, 17 insertions(+), 42 deletions(-) diff --git a/src/filepath.gleam b/src/filepath.gleam index 4d47de9..f8e6708 100644 --- a/src/filepath.gleam +++ b/src/filepath.gleam @@ -14,7 +14,6 @@ import gleam/list import gleam/bool import gleam/string import gleam/result -import gleam/option.{type Option, None, Some} @external(erlang, "filepath_ffi", "is_windows") @external(javascript, "./filepath_ffi.mjs", "is_windows") @@ -62,9 +61,11 @@ fn remove_trailing_slash(path: String) -> String { // TODO: Windows support /// Split a path into its segments. /// -/// When running on Windows both `/` and `\` are treated as path separators, and -/// if the path starts with a drive letter then the drive letter then it is -/// lowercased. +/// If the path is an absolute Unix path, the first element will be a `/`. +/// +/// When running on Windows, both `/` and `\` are treated as path separators, +/// and the function will split the Windows volume prefix based on the rules +/// implemented by the `split_windows_volume_prefix()` function. /// /// ## Examples /// @@ -73,6 +74,12 @@ fn remove_trailing_slash(path: String) -> String { /// // -> ["/", "usr", "local", "bin"] /// ``` /// +/// ```gleam +/// // Windows-only behavior: +/// split("C:\\Users\\Administrator\\AppData") +/// // -> #("C:", "Users\\Administrator\\AppData") +/// ``` +/// pub fn split(path: String) -> List(String) { case is_windows() { True -> split_windows(path) @@ -117,15 +124,15 @@ pub fn split_unix(path: String) -> List(String) { /// ``` /// pub fn split_windows(path: String) -> List(String) { - let #(drive, path) = pop_windows_drive_specifier(path) + let #(drive, postdrive) = split_windows_volume_prefix(path) let segments = - string.split(path, "/") + string.split(postdrive, "/") |> list.flat_map(string.split(_, "\\")) let segments = case drive { - Some(drive) -> [drive, ..segments] - None -> segments + "" -> segments + drive -> [drive, ..segments] } case segments { @@ -135,38 +142,6 @@ pub fn split_windows(path: String) -> List(String) { } } -const codepoint_slash = 47 - -const codepoint_backslash = 92 - -const codepoint_colon = 58 - -const codepoint_a = 65 - -const codepoint_z = 90 - -const codepoint_a_up = 97 - -const codepoint_z_up = 122 - -fn pop_windows_drive_specifier(path: String) -> #(Option(String), String) { - let start = string.slice(from: path, at_index: 0, length: 3) - let codepoints = string.to_utf_codepoints(start) - case list.map(codepoints, string.utf_codepoint_to_int) { - [drive, colon, slash] if { - slash == codepoint_slash || slash == codepoint_backslash - } && colon == codepoint_colon && { - drive >= codepoint_a && drive <= codepoint_z || drive >= codepoint_a_up && drive <= codepoint_z_up - } -> { - let drive_letter = string.slice(from: path, at_index: 0, length: 1) - let drive = string.lowercase(drive_letter) <> ":/" - let path = string.drop_left(path, 3) - #(Some(drive), path) - } - _ -> #(None, path) - } -} - /// Splits the Windows volume prefix from a given Windows path, /// returning a tuple of two Strings with the value of the volume /// prefix (if any) first, and the rest of the path (if any) second. diff --git a/test/filepath_test.gleam b/test/filepath_test.gleam index 86e3217..9e327d5 100644 --- a/test/filepath_test.gleam +++ b/test/filepath_test.gleam @@ -91,12 +91,12 @@ pub fn split_windows_2_test() { pub fn split_windows_3_test() { filepath.split_windows("C:\\one\\two") - |> should.equal(["c:/", "one", "two"]) + |> should.equal(["C:", "one", "two"]) } pub fn split_windows_4_test() { filepath.split_windows("C:/one/two") - |> should.equal(["c:/", "one", "two"]) + |> should.equal(["C:", "one", "two"]) } pub fn split_windows_5_test() { From c00fb1763abb0ba499d235c0426dcf7e6f1695c8 Mon Sep 17 00:00:00 2001 From: Nashwan Azhari Date: Fri, 29 Mar 2024 00:09:34 +0200 Subject: [PATCH 4/4] Separate `is_absolute_{unix,windows}` implementations. --- src/filepath.gleam | 54 +++++++++++++++++++++++++++++-- test/filepath_test.gleam | 68 +++++++++++++++++++++++++++++++--------- 2 files changed, 106 insertions(+), 16 deletions(-) diff --git a/src/filepath.gleam b/src/filepath.gleam index f8e6708..2e3a5ef 100644 --- a/src/filepath.gleam +++ b/src/filepath.gleam @@ -375,8 +375,13 @@ fn get_directory_name( } } -// TODO: windows support -/// Check if a path is absolute. +/// Check whether a given path counts as an absolute path on the +/// operating system which it's currently being run on. +/// +/// On Unix systems, absolute paths start with a `/`. +/// +/// On Windows systems, absolute paths must contain a volume prefix +/// as dictated by the `split_windows_volume_prefix()` function. /// /// ## Examples /// @@ -391,9 +396,54 @@ fn get_directory_name( /// ``` /// pub fn is_absolute(path: String) -> Bool { + case is_windows() { + True -> is_absolute_windows(path) + False -> is_absolute_unix(path) + } +} + +/// Check whether a given Unix path is absolute. +/// +/// ## Examples +/// +/// ```gleam +/// is_absolute_unix("/usr/local/bin") +/// // -> True +/// ``` +/// +/// ```gleam +/// is_absolute_unix("usr/local/bin") +/// // -> False +/// ``` +/// +pub fn is_absolute_unix(path: String) -> Bool { string.starts_with(path, "/") } +/// Check whether a given Windows path is absolute. +/// +/// Paths on Windows only count as absolute if they have a proper volume +/// specifier as a prefix, as dictated by `split_windows_volume_prefix()`. +/// +/// ## Examples +/// +/// ```gleam +/// is_absolute_windows("C:\\dir1\\dir2\\file.txt") +/// // -> True +/// ``` +/// +/// ```gleam +/// is_absolute_windows("\\dir1\\dir2\\file.txt") +/// // -> False +/// ``` +/// +pub fn is_absolute_windows(path: String) -> Bool { + case split_windows_volume_prefix(path) { + #("", _) -> False + _ -> True + } +} + //TODO: windows support /// Expand `..` and `.` segments in a path. /// diff --git a/test/filepath_test.gleam b/test/filepath_test.gleam index 9e327d5..168f1d4 100644 --- a/test/filepath_test.gleam +++ b/test/filepath_test.gleam @@ -392,38 +392,78 @@ pub fn directory_name_7_test() { |> should.equal("one/two/three") } -pub fn is_absolute_0_test() { - filepath.is_absolute("") +pub fn is_absolute_unix_0_test() { + filepath.is_absolute_unix("") |> should.equal(False) } -pub fn is_absolute_1_test() { - filepath.is_absolute("file") +pub fn is_absolute_unix_1_test() { + filepath.is_absolute_unix("file") |> should.equal(False) } -pub fn is_absolute_2_test() { - filepath.is_absolute("/usr/local/bin") +pub fn is_absolute_unix_2_test() { + filepath.is_absolute_unix("/usr/local/bin") |> should.equal(True) } -pub fn is_absolute_3_test() { - filepath.is_absolute("usr/local/bin") +pub fn is_absolute_unix_3_test() { + filepath.is_absolute_unix("usr/local/bin") |> should.equal(False) } -pub fn is_absolute_4_test() { - filepath.is_absolute("../usr/local/bin") +pub fn is_absolute_unix_4_test() { + filepath.is_absolute_unix("../usr/local/bin") |> should.equal(False) } -pub fn is_absolute_5_test() { - filepath.is_absolute("./usr/local/bin") +pub fn is_absolute_unix_5_test() { + filepath.is_absolute_unix("./usr/local/bin") |> should.equal(False) } -pub fn is_absolute_6_test() { - filepath.is_absolute("/") +pub fn is_absolute_unix_6_test() { + filepath.is_absolute_unix("/") + |> should.equal(True) +} + +pub fn is_absolute_windows_0_test() { + filepath.is_absolute_windows("") + |> should.equal(False) +} + +pub fn is_absolute_windows_1_test() { + filepath.is_absolute_windows("\\") + |> should.equal(False) +} + +pub fn is_absolute_windows_2_test() { + filepath.is_absolute_windows("/") + |> should.equal(False) +} + +pub fn is_absolute_windows_3_test() { + filepath.is_absolute_windows("") + |> should.equal(False) +} + +pub fn is_absolute_windows_4_test() { + filepath.is_absolute_windows("C:\\Program Files") + |> should.equal(True) +} + +pub fn is_absolute_windows_5_test() { + filepath.is_absolute_windows("C:/Program Files") + |> should.equal(True) +} + +pub fn is_absolute_windows_6_test() { + filepath.is_absolute_windows("\\\\DESKTOP-123\\MyShare\\subdir\\file.txt") + |> should.equal(True) +} + +pub fn is_absolute_windows_7_test() { + filepath.is_absolute_windows("//DESKTOP-123/MyShare/subdir/file.txt") |> should.equal(True) }