-
Notifications
You must be signed in to change notification settings - Fork 7
Backend: Add BackendWithFallback to retry IO #215
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
299a72b
c77c747
4c8aa69
722d723
8d2cf82
b3a3330
62db64a
c8a987d
71e168f
3ddd9b0
89a9a9d
1fe38c8
10a624a
98be51e
db3549e
64e3ea5
fd1bb79
bb13421
6f8ba67
1ed46a5
eb69578
747b724
a6412b5
3cc8443
bc68811
b1dc5b7
761e2b6
d52ac09
7ab7add
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,65 @@ | ||||||||||||||||||||||||||||||||
| /* Copyright (c) Advanced Micro Devices, Inc. All rights reserved. | ||||||||||||||||||||||||||||||||
| * | ||||||||||||||||||||||||||||||||
| * SPDX-License-Identifier: MIT | ||||||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| #include "backend.h" | ||||||||||||||||||||||||||||||||
| #include "buffer.h" | ||||||||||||||||||||||||||||||||
| #include "file.h" | ||||||||||||||||||||||||||||||||
| #include "io.h" | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| #include <cstddef> | ||||||||||||||||||||||||||||||||
| #include <exception> | ||||||||||||||||||||||||||||||||
| #include <memory> | ||||||||||||||||||||||||||||||||
| #include <stdexcept> | ||||||||||||||||||||||||||||||||
| #include <sys/types.h> | ||||||||||||||||||||||||||||||||
| #include <system_error> | ||||||||||||||||||||||||||||||||
| #include <unistd.h> | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| using namespace hipFile; | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| ssize_t | ||||||||||||||||||||||||||||||||
| Backend::io(IoType type, std::shared_ptr<IFile> file, std::shared_ptr<IBuffer> buffer, size_t size, | ||||||||||||||||||||||||||||||||
| hoff_t file_offset, hoff_t buffer_offset) | ||||||||||||||||||||||||||||||||
| { | ||||||||||||||||||||||||||||||||
| return _io_impl(type, file, buffer, size, file_offset, buffer_offset); | ||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| ssize_t | ||||||||||||||||||||||||||||||||
| BackendWithFallback::io(IoType type, std::shared_ptr<IFile> file, std::shared_ptr<IBuffer> buffer, | ||||||||||||||||||||||||||||||||
| size_t size, hoff_t file_offset, hoff_t buffer_offset) | ||||||||||||||||||||||||||||||||
| { | ||||||||||||||||||||||||||||||||
| ssize_t nbytes{0}; | ||||||||||||||||||||||||||||||||
| try { | ||||||||||||||||||||||||||||||||
| nbytes = _io_impl(type, file, buffer, size, file_offset, buffer_offset); | ||||||||||||||||||||||||||||||||
| if (nbytes < 0) { | ||||||||||||||||||||||||||||||||
| // Typically we should not reach this point. But in case we do, throw | ||||||||||||||||||||||||||||||||
| // an exception to use the fallback backend. | ||||||||||||||||||||||||||||||||
| throw std::system_error(-static_cast<int>(nbytes), std::generic_category()); | ||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||
| catch (...) { | ||||||||||||||||||||||||||||||||
| std::exception_ptr e_ptr = std::current_exception(); | ||||||||||||||||||||||||||||||||
| if (fallback_backend && is_fallback_eligible(e_ptr, nbytes) && | ||||||||||||||||||||||||||||||||
| fallback_backend->score(file, buffer, size, file_offset, buffer_offset) >= 0) { | ||||||||||||||||||||||||||||||||
| nbytes = fallback_backend->io(type, file, buffer, size, file_offset, buffer_offset); | ||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||
| else { | ||||||||||||||||||||||||||||||||
| throw; | ||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||
| return nbytes; | ||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||
riley-dixon marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||||||||||
| return nbytes; | ||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| void | ||||||||||||||||||||||||||||||||
| BackendWithFallback::register_fallback_backend(std::shared_ptr<Backend> backend) | ||||||||||||||||||||||||||||||||
| { | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
| { | |
| { | |
| // Prevent self-registration, which can cause unbounded recursion on fallback. | |
| if (backend.get() == this) { | |
| return; | |
| } | |
| // Optionally prevent a simple two-node cycle: this -> backend -> this. | |
| if (backend) { | |
| if (auto backend_with_fallback = std::dynamic_pointer_cast<BackendWithFallback>(backend)) { | |
| if (backend_with_fallback->fallback_backend.get() == this) { | |
| return; | |
| } | |
| } | |
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I /mostly/ agree with copilot. register_fallback_backend() should throw an exception if self-registration is attempted
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added a check for self-registration & nullptr. I don't think we need something more comprehensive right now.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,11 +13,16 @@ | |
| #include "io.h" | ||
| #include "stats.h" | ||
|
|
||
| #include <cerrno> | ||
| #include <cstddef> | ||
| #include <cstdint> | ||
| #include <exception> | ||
| #include <fcntl.h> | ||
| #include <hip/hip_runtime_api.h> | ||
| #include <linux/stat.h> | ||
| #include <memory> | ||
| #include <stdexcept> | ||
| #include <system_error> | ||
|
|
||
| using namespace hipFile; | ||
| using namespace std; | ||
|
|
@@ -158,8 +163,8 @@ Fastpath::score(shared_ptr<IFile> file, shared_ptr<IBuffer> buffer, size_t size, | |
| } | ||
|
|
||
| ssize_t | ||
| Fastpath::io(IoType type, shared_ptr<IFile> file, shared_ptr<IBuffer> buffer, size_t size, hoff_t file_offset, | ||
| hoff_t buffer_offset) | ||
| Fastpath::_io_impl(IoType type, shared_ptr<IFile> file, shared_ptr<IBuffer> buffer, size_t size, | ||
| hoff_t file_offset, hoff_t buffer_offset) | ||
| { | ||
| if (!Context<Configuration>::get()->fastpath()) { | ||
| throw BackendDisabled(); | ||
|
|
@@ -193,22 +198,38 @@ Fastpath::io(IoType type, shared_ptr<IFile> file, shared_ptr<IBuffer> buffer, si | |
| switch (type) { | ||
| case IoType::Read: | ||
| nbytes = Context<Hip>::get()->hipAmdFileRead(handle, devptr, size, file_offset); | ||
| break; | ||
| case IoType::Write: | ||
| nbytes = Context<Hip>::get()->hipAmdFileWrite(handle, devptr, size, file_offset); | ||
| break; | ||
| default: | ||
| throw std::runtime_error("Invalid IoType"); | ||
| } | ||
| switch (type) { | ||
| case IoType::Read: | ||
| statsAddFastPathRead(nbytes); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it would be better if If I don't think a Same goes for the write stats.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wanted to keep updating stats outside of
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updating stats shouldn't fail/throw. Those functions are just bumping a counter. stats should be fixed.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i'll update all the stats calls to noexcept.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Stats changes have been reverted with the assumption that stats won't throw. |
||
| break; | ||
| case IoType::Write: | ||
| nbytes = Context<Hip>::get()->hipAmdFileWrite(handle, devptr, size, file_offset); | ||
| statsAddFastPathWrite(nbytes); | ||
| break; | ||
| default: | ||
| break; | ||
| throw std::runtime_error("Invalid IoType"); | ||
| } | ||
| return static_cast<ssize_t>(nbytes); | ||
| } | ||
|
|
||
| bool | ||
| Fastpath::is_fallback_eligible(std::exception_ptr e_ptr, ssize_t nbytes) const | ||
| { | ||
| (void)nbytes; | ||
| try { | ||
| std::rethrow_exception(e_ptr); | ||
| } | ||
| catch (const std::system_error &sys_err) { | ||
| switch (sys_err.code().value()) { | ||
| case ENODEV: | ||
| return true; | ||
| case EREMOTEIO: | ||
| return true; | ||
| default: | ||
| // System error not eligible for fallback. | ||
| return false; | ||
| } | ||
| } | ||
| catch (...) { | ||
| // Thrown exception not eligible for fallback. | ||
| return false; | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.