Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/core/uri/accessors.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,4 +76,9 @@ auto URI::userinfo() const -> std::optional<std::string_view> {
return this->userinfo_;
}

auto URI::has_same_authority(const URI &other) const noexcept -> bool {
return this->userinfo_ == other.userinfo_ && this->host_ == other.host_ &&
this->port_ == other.port_;
}

} // namespace sourcemeta::core
29 changes: 26 additions & 3 deletions src/core/uri/filesystem.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <cctype> // std::tolower
#include <filesystem> // std::filesystem
#include <iterator> // std::advance, std::next
#include <optional> // std::optional
#include <string> // std::string
#include <string_view> // std::string_view

Expand All @@ -19,6 +20,28 @@ auto is_localhost_host(const std::string_view host) -> bool {
});
}

auto append_raw_segment(std::optional<std::string> &path,
const std::string_view segment) -> void {
if (segment.empty()) {
return;
}
if (!path.has_value()) {
path = std::string{segment};
return;
}
auto &current = path.value();
const bool current_ends_with_slash = current.ends_with('/');
const bool segment_starts_with_slash = segment.starts_with('/');
if (current_ends_with_slash && segment_starts_with_slash) {
current.append(segment, 1);
} else if (!current_ends_with_slash && !segment_starts_with_slash) {
current += '/';
current.append(segment);
} else {
current.append(segment);
}
}

} // namespace

namespace sourcemeta::core {
Expand Down Expand Up @@ -97,17 +120,17 @@ auto URI::from_path(const std::filesystem::path &path) -> URI {
// Process remaining path segments
for (; iterator != final_path.end(); ++iterator) {
if (iterator->empty()) {
result.append_path("/");
append_raw_segment(result.path_, "/");
} else if (*iterator == "/") {
if (std::next(iterator) == final_path.end()) {
result.append_path("/");
append_raw_segment(result.path_, "/");
}
} else {
// Store raw segment - escaping will happen during recompose()
const auto segment = iterator->string();

if (result.path_.has_value()) {
result.append_path(segment);
append_raw_segment(result.path_, segment);
} else {
// First segment: file:// URIs need leading slash
result.path_ = "/" + segment;
Expand Down
98 changes: 90 additions & 8 deletions src/core/uri/include/sourcemeta/core/uri.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include <sourcemeta/core/uri_error.h>
// NOLINTEND(misc-include-cleaner)

#include <concepts> // std::convertible_to
#include <concepts> // std::convertible_to, std::same_as
#include <cstddef> // std::size_t, std::ptrdiff_t
#include <cstdint> // std::uint32_t
#include <filesystem> // std::filesystem
Expand All @@ -20,8 +20,8 @@
#include <span> // std::span
#include <string> // std::string
#include <string_view> // std::string_view
#include <type_traits> // std::is_same_v
#include <utility> // std::pair
#include <type_traits> // std::is_same_v, std::remove_cvref_t
#include <utility> // std::pair, std::forward
#include <vector> // std::vector

/// @defgroup uri URI
Expand Down Expand Up @@ -64,7 +64,7 @@ class SOURCEMETA_CORE_URI_EXPORT URI {
template <typename T>
requires std::convertible_to<T, std::string_view> &&
(!std::is_same_v<std::decay_t<T>, URI>)
URI(T &&input) {
explicit URI(T &&input) {
this->parse(std::string_view{std::forward<T>(input)});
}

Expand Down Expand Up @@ -266,7 +266,9 @@ class SOURCEMETA_CORE_URI_EXPORT URI {
auto path(std::string &&path) -> URI &;

/// Append a path to the existing URI path or set a path if such component
/// does not exist in the URI. For example:
/// does not exist in the URI. The argument is treated as a path component
/// to merge in. Authority prefixes and `?` or `#` delimiters throw
/// `URIError`. For example:
///
/// ```cpp
/// #include <sourcemeta/core/uri.h>
Expand All @@ -275,7 +277,38 @@ class SOURCEMETA_CORE_URI_EXPORT URI {
/// sourcemeta::core::URI uri{"https://www.sourcemeta.com/foo"};
/// uri.append_path("bar/baz");
/// assert(uri.recompose() == "https://www.sourcemeta.com/foo/bar/baz");
auto append_path(const std::string &path) -> URI &;
auto append_path(std::string_view path) -> URI &;

/// Append a path to the existing URI from a parsed reference. The
/// reference must contain only a path. A scheme, authority, query, or
/// fragment throws `URIError`. For example:
///
/// ```cpp
/// #include <sourcemeta/core/uri.h>
/// #include <cassert>
///
/// sourcemeta::core::URI uri{"https://www.sourcemeta.com/foo"};
/// const sourcemeta::core::URI reference{"bar/baz"};
/// uri.append_path(reference);
/// assert(uri.recompose() == "https://www.sourcemeta.com/foo/bar/baz");
auto append_path(const URI &reference) -> URI &;

/// Append a path to the existing URI from a parsed reference, moving the
/// path out of the reference rather than copying it. The reference must
/// contain only a path. A scheme, authority, query, or fragment throws
/// `URIError`. For example:
///
/// ```cpp
/// #include <sourcemeta/core/uri.h>
/// #include <cassert>
/// #include <utility>
///
/// sourcemeta::core::URI uri{"https://www.sourcemeta.com/foo"};
/// sourcemeta::core::URI reference{"bar/baz"};
/// uri.append_path(std::move(reference));
/// assert(uri.recompose() == "https://www.sourcemeta.com/foo/bar/baz");
/// ```
auto append_path(URI &&reference) -> URI &;

/// If the URI has a path, this method sets or replace the extension in the
/// path. For example:
Expand Down Expand Up @@ -466,6 +499,21 @@ class SOURCEMETA_CORE_URI_EXPORT URI {
/// ```
[[nodiscard]] auto recompose() const -> std::string;

/// Recompose the path, query, and fragment of a URI as an RFC 3986
/// Section 4.2 relative reference. Scheme and authority are omitted. The
/// result only resolves back to the original URI when used against a base
/// that shares the same scheme and authority. For example:
///
/// ```cpp
/// #include <sourcemeta/core/uri.h>
/// #include <cassert>
///
/// const sourcemeta::core::URI
/// uri{"https://www.sourcemeta.com/foo?x=1#bar"};
/// assert(uri.recompose_relative() == "/foo?x=1#bar");
/// ```
[[nodiscard]] auto recompose_relative() const -> std::string;

/// Recompose a URI as established by RFC 3986, but without including the
/// fragment component. The result is an optional to handle the case where the
/// input URI only consists of a fragment. For example:
Expand Down Expand Up @@ -535,8 +583,8 @@ class SOURCEMETA_CORE_URI_EXPORT URI {
/// ```
auto relative_to(const URI &base) -> URI &;

/// Attempt to change the base of a URI . If the URI is not
/// relative to the former, leave the URI intact. For example:
/// Attempt to change the base of a URI. If the URI is not relative to
/// the former, leave the URI intact. For example:
///
/// ```cpp
/// #include <sourcemeta/core/uri.h>
Expand All @@ -550,6 +598,40 @@ class SOURCEMETA_CORE_URI_EXPORT URI {
/// ```
auto rebase(const URI &base, const URI &new_base) -> URI &;

/// Attempt to change the base of a URI, moving components out of
/// `new_base` rather than copying them. If the URI is not relative to
/// the former base, leave the URI intact. For example:
///
/// ```cpp
/// #include <sourcemeta/core/uri.h>
/// #include <cassert>
/// #include <utility>
///
/// sourcemeta::core::URI uri{"https://example.com/foo/bar/baz"};
/// const sourcemeta::core::URI base{"https://example.com/foo"};
/// sourcemeta::core::URI new_base{"/qux"};
/// uri.rebase(base, std::move(new_base));
/// assert(uri.recompose() == "/qux/bar/baz");
/// ```
auto rebase(const URI &base, URI &&new_base) -> URI &;

/// Check whether two URIs share the same authority component. The authority
/// is the user information, host, and port per RFC 3986 Section 3.2. The
/// scheme is not part of the authority and is not compared. Comparison is
/// byte-exact on the stored values, so call `canonicalize()` first if you
/// want host case-insensitivity or default-port elision. For example:
///
/// ```cpp
/// #include <sourcemeta/core/uri.h>
/// #include <cassert>
///
/// const sourcemeta::core::URI left{"https://example.com/foo"};
/// const sourcemeta::core::URI right{"https://example.com/bar"};
/// assert(left.has_same_authority(right));
/// ```
[[nodiscard]] auto has_same_authority(const URI &other) const noexcept
-> bool;

/// Get the user information part of the URI, if any. For example:
///
/// ```cpp
Expand Down
58 changes: 58 additions & 0 deletions src/core/uri/recompose.cc
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,64 @@ auto URI::recompose() const -> std::string {
return result;
}

auto URI::recompose_relative() const -> std::string {
std::string result;
result.reserve(128);

const auto result_path{this->path()};
if (result_path.has_value()) {
const auto &path_value = result_path.value();

// For a path-noscheme reference (no leading slash), the first segment
// cannot contain ':' per RFC 3986 Section 3.3 segment-nz-nc, or a
// re-parser would mistake the prefix for a scheme name. Percent-encode
// any ':' in the first segment.
if (!path_value.starts_with('/')) {
const auto first_slash = path_value.find('/');
const auto first_segment_length =
first_slash == std::string::npos ? path_value.size() : first_slash;
const std::string_view first_segment{path_value.data(),
first_segment_length};
if (first_segment.contains(':')) {
std::string encoded;
encoded.reserve(first_segment_length + 4);
for (const char character : first_segment) {
if (character == ':') {
encoded += "%3A";
} else {
encoded += character;
}
}
escape_component_to_string(result, encoded, URIEscapeMode::Path);
if (first_slash != std::string::npos) {
escape_component_to_string(
result, std::string_view{path_value}.substr(first_slash),
URIEscapeMode::Path);
}
} else {
escape_component_to_string(result, path_value, URIEscapeMode::Path);
}
} else {
escape_component_to_string(result, path_value, URIEscapeMode::Path);
}
}

const auto result_query{this->query()};
if (result_query.has_value()) {
result += '?';
escape_component_to_string(result, result_query.value().raw(),
URIEscapeMode::Fragment);
}

if (this->fragment_.has_value()) {
result += '#';
escape_component_to_string(result, this->fragment_.value(),
URIEscapeMode::Fragment);
}

return result;
}

auto URI::recompose_without_fragment() const -> std::optional<std::string> {
std::string result;
result.reserve(256);
Expand Down
Loading
Loading