Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions custom_ops/gpu_ops/beam_search_softmax.cu
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,24 @@
// See the License for the specific language governing permissions and
// limitations under the License.

// Hackathon 10th Spring No.46 — compilation guards
#ifdef __NVCC__
#include <cub/cub.cuh>
#endif
#ifdef __HIPCC__
#include <hipcub/hipcub.hpp>
namespace cub = hipcub;
#endif
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#ifndef _WIN32
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#endif
#include <algorithm>
#include "helper.h"
#include "stdint.h"
Expand Down
3 changes: 3 additions & 0 deletions custom_ops/gpu_ops/custom_ftok.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.

// Hackathon 10th Spring No.46 — compilation guards
#pragma once

#ifndef _WIN32
#include <errno.h>
#include <stdio.h>
#include <sys/stat.h>
Expand All @@ -35,3 +37,4 @@ inline key_t custom_ftok(const char* path, int id) {
return static_cast<key_t>(((st.st_dev & 0x0f) << 28) |
((st.st_ino & 0xff) << 20) | (id & 0xfffff));
}
#endif
7 changes: 5 additions & 2 deletions custom_ops/gpu_ops/dequant_int8.cu
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include <fcntl.h>
// Hackathon 10th Spring No.46 — compilation guards
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#ifndef _WIN32
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#endif
#include <algorithm>
#include "helper.h"

Expand Down
4 changes: 4 additions & 0 deletions custom_ops/gpu_ops/env.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.

// Hackathon 10th Spring No.46 — compilation guards
#pragma once

#include <cstdlib>
#include <string>

inline uint32_t get_decoder_block_shape_q() {
static const char* decoder_block_shape_q_env =
std::getenv("FLAGS_dec_block_shape_q");
Expand Down
7 changes: 5 additions & 2 deletions custom_ops/gpu_ops/fused_get_rotary_embedding.cu
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include <fcntl.h>
// Hackathon 10th Spring No.46 — compilation guards
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#ifndef _WIN32
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#endif
#include <algorithm>
#include "paddle/extension.h"

Expand Down
7 changes: 5 additions & 2 deletions custom_ops/gpu_ops/fused_hadamard_quant_fp8.cu
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include <fcntl.h>
// Hackathon 10th Spring No.46 — compilation guards
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#ifndef _WIN32
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#endif
#include <algorithm>
#include "helper.h"

Expand Down
9 changes: 9 additions & 0 deletions custom_ops/gpu_ops/get_data_ptr_ipc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.

// Hackathon 10th Spring No.46 — compilation guards
#include <cstring>
#include "cuda_multiprocess.h"
#include "helper.h"

namespace {
#ifndef _WIN32
int sharedMemoryOpen2(const char *name, size_t sz, sharedMemoryInfo *info) {
info->size = sz;
info->shmFd = shm_open(name, O_RDWR, 0777);
Expand All @@ -31,10 +33,16 @@ int sharedMemoryOpen2(const char *name, size_t sz, sharedMemoryInfo *info) {

return 0;
}
#endif
} // namespace

std::vector<paddle::Tensor> GetDataPtrIpc(const paddle::Tensor &tmp_input,
const std::string &shm_name) {
#ifdef _WIN32
PD_THROW(
"GetDataPtrIpc is not supported on Windows "
"(POSIX shared memory required).");
#else
auto out_data_ptr_tensor =
paddle::full({1}, 0, paddle::DataType::INT64, paddle::CPUPlace());
auto out_data_ptr_tensor_ptr = out_data_ptr_tensor.data<int64_t>();
Expand All @@ -53,6 +61,7 @@ std::vector<paddle::Tensor> GetDataPtrIpc(const paddle::Tensor &tmp_input,

out_data_ptr_tensor_ptr[0] = reinterpret_cast<int64_t>(ptr);
return {out_data_ptr_tensor};
#endif
}

PD_BUILD_STATIC_OP(get_data_ptr_ipc)
Expand Down
9 changes: 8 additions & 1 deletion custom_ops/gpu_ops/get_output.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,14 @@
// See the License for the specific language governing permissions and
// limitations under the License.

// Hackathon 10th Spring No.46 — compilation guards
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#ifndef _WIN32
#include <sys/ipc.h>
#include <sys/msg.h>
#include <sys/types.h>
#endif
#include "custom_ftok.h"
#include "paddle/extension.h"

Expand All @@ -36,6 +39,9 @@ void GetOutput(const paddle::Tensor& x,
int64_t rank_id,
bool wait_flag,
int msg_queue_id) {
#ifdef _WIN32
PD_THROW("GetOutput is not supported on Windows (POSIX IPC required).");
#else
if (rank_id > 0) {
return;
}
Expand Down Expand Up @@ -81,6 +87,7 @@ void GetOutput(const paddle::Tensor& x,
#endif

return;
#endif
}

void GetOutputStatic(const paddle::Tensor& x, int64_t rank_id, bool wait_flag) {
Expand Down
14 changes: 13 additions & 1 deletion custom_ops/gpu_ops/get_output_ep.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,14 @@
// See the License for the specific language governing permissions and
// limitations under the License.

// Hackathon 10th Spring No.46 — compilation guards
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#ifndef _WIN32
#include <sys/ipc.h>
#include <sys/msg.h>
#include <sys/types.h>
#endif
#include "custom_ftok.h"
#include "msg_utils.h"
#include "paddle/extension.h"
Expand All @@ -29,6 +32,10 @@
void GetOutputKVSignal(const paddle::Tensor& x,
int64_t rank_id,
bool wait_flag) {
#ifdef _WIN32
PD_THROW(
"GetOutputKVSignal is not supported on Windows (POSIX IPC required).");
#else
int msg_queue_id = 1024;
if (const char* msg_que_str_tmp = std::getenv("INFERENCE_MSG_QUEUE_ID")) {
std::string msg_que_str(msg_que_str_tmp);
Expand Down Expand Up @@ -57,12 +64,16 @@ void GetOutputKVSignal(const paddle::Tensor& x,
out_data[i] = msg_rcv.mtext[i];
}
return;
#endif
}

void GetOutputEp(const paddle::Tensor& x,
int64_t rank_id,
bool wait_flag,
int msg_queue_id) {
#ifdef _WIN32
PD_THROW("GetOutputEp is not supported on Windows (POSIX IPC required).");
#else
static struct msgdata msg_rcv;
if (const char* inference_msg_queue_id_env_p =
std::getenv("INFERENCE_MSG_QUEUE_ID")) {
Expand Down Expand Up @@ -108,6 +119,7 @@ void GetOutputEp(const paddle::Tensor& x,
#endif

return;
#endif
}

void GetOutputEPStatic(const paddle::Tensor& x,
Expand Down
9 changes: 8 additions & 1 deletion custom_ops/gpu_ops/get_output_msg_with_topk.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,14 @@
// See the License for the specific language governing permissions and
// limitations under the License.

// Hackathon 10th Spring No.46 — compilation guards
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#ifndef _WIN32
#include <sys/ipc.h>
#include <sys/msg.h>
#include <sys/types.h>
#endif
#include "custom_ftok.h"
#include "paddle/extension.h"

Expand All @@ -40,6 +43,9 @@ void GetOutputTopK(const paddle::Tensor& x,
int k,
int64_t rank_id,
bool wait_flag) {
#ifdef _WIN32
PD_THROW("GetOutputTopK is not supported on Windows (POSIX IPC required).");
#else
static struct msgdata msg_rcv;
int msg_queue_id = 1;

Expand Down Expand Up @@ -101,6 +107,7 @@ void GetOutputTopK(const paddle::Tensor& x,
ranks_data[i] = (int64_t)msg_rcv.mtext_ranks[i];
}
return;
#endif
}

PD_BUILD_STATIC_OP(get_output_topk)
Expand Down
7 changes: 5 additions & 2 deletions custom_ops/gpu_ops/helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,25 @@
// See the License for the specific language governing permissions and
// limitations under the License.

// Hackathon 10th Spring No.46 — compilation guards
#pragma once

#include <cuda_fp8.h>

#ifndef PADDLE_WITH_COREX
#include "glog/logging.h"
#endif
#include <fcntl.h>
#include <nvml.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#ifndef _WIN32
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#endif
#include <cassert>
#include <cstdlib>
#include <cstring>
Expand Down
7 changes: 5 additions & 2 deletions custom_ops/gpu_ops/msg_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,21 @@
// See the License for the specific language governing permissions and
// limitations under the License.

// Hackathon 10th Spring No.46 — compilation guards
#pragma once

#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#ifndef _WIN32
#include <fcntl.h>
#include <sys/ipc.h>
#include <sys/mman.h>
#include <sys/msg.h>
#include <unistd.h>
#endif
#include "paddle/extension.h"

#define MAX_BSZ 512
Expand Down
7 changes: 7 additions & 0 deletions custom_ops/gpu_ops/remote_cache_kv_ipc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

// Hackathon 10th Spring No.46 — compilation guards
#include "remote_cache_kv_ipc.h"

RemoteCacheKvIpc::save_cache_kv_complete_signal_layerwise_meta_data
Expand All @@ -24,6 +25,11 @@ bool RemoteCacheKvIpc::kv_complete_signal_shmem_opened = false;
RemoteCacheKvIpc::save_cache_kv_complete_signal_layerwise_meta_data
RemoteCacheKvIpc::open_shm_and_get_complete_signal_meta_data(
const int rank_id, const int device_id, const bool keep_pd_step_flag) {
#ifdef _WIN32
PD_THROW(
"open_shm_and_get_complete_signal_meta_data is not supported on "
"Windows (POSIX shared memory required).");
#else
if (RemoteCacheKvIpc::kv_complete_signal_shmem_opened) {
if (keep_pd_step_flag) {
return RemoteCacheKvIpc::kv_complete_signal_meta_data;
Expand Down Expand Up @@ -103,6 +109,7 @@ RemoteCacheKvIpc::open_shm_and_get_complete_signal_meta_data(
RemoteCacheKvIpc::kv_complete_signal_identity_ptr = identity_ptr;
RemoteCacheKvIpc::kv_complete_signal_shmem_opened = true;
return meta_data;
#endif
}

void CUDART_CB
Expand Down
Loading
Loading