From 942b7bec4a06c207c7f5bb59127ecde7417e9ffa Mon Sep 17 00:00:00 2001 From: Dariomnz Date: Fri, 6 Sep 2024 13:33:10 +0200 Subject: [PATCH 01/60] 3.0.0: Implement fabric connector --- configure.ac | 26 +- include/base/fabric.h | 81 ++++ include/base/socket.h | 2 +- .../nfi_fabric_server_comm.h | 57 +++ .../nfi/nfi_xpn_server/nfi_xpn_server.h | 7 +- .../fabric_server/fabric_server_comm.h | 61 +++ include/xpn_server/xpn_server_comm.h | 2 +- include/xpn_server/xpn_server_conf.h | 1 + include/xpn_server/xpn_server_params.h | 7 + scripts/compile/build-me-xpn.sh | 16 +- scripts/compile/software/xpn.sh | 15 +- scripts/execute/xpn.sh | 2 +- src/base/Makefile.am | 5 + src/base/fabric.c | 454 ++++++++++++++++++ src/base/socket.c | 8 +- src/bypass/Makefile.in | 2 +- src/xpn_client/Makefile.am | 24 +- .../nfi_fabric_server_comm.c | 143 ++++++ .../nfi/nfi_xpn_server/nfi_xpn_server.c | 3 +- .../nfi/nfi_xpn_server/nfi_xpn_server_comm.c | 54 ++- .../xpn/xpn_simple/policy/xpn_policy_init.c | 10 + src/xpn_server/Makefile.am | 10 + .../fabric_server/fabric_server_comm.c | 182 +++++++ src/xpn_server/xpn_server.c | 61 ++- src/xpn_server/xpn_server_comm.c | 54 ++- src/xpn_server/xpn_server_params.c | 24 +- test/integrity/xpn_metadata/Makefile.in | 2 +- .../xpn-fault-tolerant/Makefile.in | 2 +- test/performance/xpn/Makefile.in | 2 +- 29 files changed, 1277 insertions(+), 40 deletions(-) create mode 100644 include/base/fabric.h create mode 100644 include/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.h create mode 100644 include/xpn_server/fabric_server/fabric_server_comm.h create mode 100644 src/base/fabric.c create mode 100644 src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.c create mode 100644 src/xpn_server/fabric_server/fabric_server_comm.c diff --git a/configure.ac b/configure.ac index 1b3b4affc..c2ef023d5 100644 --- a/configure.ac +++ b/configure.ac @@ -184,7 +184,8 @@ INCLUDEFLAGS="\ -I\$(top_srcdir)/include/xpn_client/nfi/nfi_local \ -I\$(top_srcdir)/include/xpn_server/ \ -I\$(top_srcdir)/include/xpn_server/mpi_server/ \ --I\$(top_srcdir)/include/xpn_server/sck_server/" +-I\$(top_srcdir)/include/xpn_server/sck_server/ \ +-I\$(top_srcdir)/include/xpn_server/fabric_server/" @@ -270,6 +271,29 @@ AM_CONDITIONAL([ENABLE_SCK_SERVER], [test "$NFI_SCK_SERVER" = "nfi_sck_server"]) ### END OF SCK_SERVER BLOCK. Do not remove this line. ### +### BEGIN OF FABRIC_SERVER BLOCK. Do not remove this line. ### +# +# Defines the necessary variables if fabric_server is enabled. +# +AC_ARG_ENABLE( [fabric_server], + [AS_HELP_STRING([--enable-fabric_server@<:@=/path/to/fabric/@:>@ (Don't use '~')],[Enable fabric_server module.])], + [ + [CDEFS="$CDEFS -DENABLE_FABRIC_SERVER"] + [INCLUDEFLAGS="$INCLUDEFLAGS -I$enableval/include -I\$(top_srcdir)/include/xpn_client/nfi/nfi_fabric_server"] + [NFI_FABRIC_SERVER="nfi_fabric_server"] + [NFI_FABRIC_SERVER_OBJECTS="\$(NFI_FABRIC_SERVER_OBJECTS)"] + [LIBLINK+=" -lfabric"] + [LIBS=$LFLAGS" "$LIBS" -L$enableval/lib -lfabric"] + ] +) +AC_SUBST(NFI_FABRIC_SERVER) +AC_SUBST(NFI_FABRIC_SERVER_OBJECTS) +AC_SUBST(LIBLINK) +AC_SUBST(LIBS) +AM_CONDITIONAL([ENABLE_FABRIC_SERVER], [test "$NFI_FABRIC_SERVER" = "nfi_fabric_server"]) +### END OF FABRIC_SERVER BLOCK. Do not remove this line. ### + + ### BEGIN OF MQTT BLOCK. Do not remove this line. ### # # Defines the necessary variables if gridftp is enabled. diff --git a/include/base/fabric.h b/include/base/fabric.h new file mode 100644 index 000000000..308c4a6c4 --- /dev/null +++ b/include/base/fabric.h @@ -0,0 +1,81 @@ + +/* + * Copyright 2000-2024 Felix Garcia Carballeira, Diego Camarmas Alonso, Alejandro Calderon Mateos, Dario Muñoz Muñoz + * + * This file is part of Expand. + * + * Expand is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Expand is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Expand. If not, see . + * + */ + + +#ifndef _FABRIC_H_ +#define _FABRIC_H_ + + #ifdef __cplusplus + extern "C" { + #endif + + /* ... Include / Inclusion ........................................... */ + + #include "all_system.h" + #include "debug_msg.h" + #include + #include + #include + #include + + + /* ... Const / Const ................................................. */ + + + /* ... Data structures / Estructuras de datos ........................ */ + + struct fabric_domain + { + struct fi_info *hints, *info; + struct fid_fabric *fabric; + struct fid_domain *domain; + }; + + struct fabric_comm + { + struct fabric_domain * fabric_domain; + struct fid_ep *ep; + struct fid_av *av; + struct fid_cq *cq; + fi_addr_t fi_addr; + }; + + /* ... Functions / Funciones ......................................... */ + + int fabric_init ( struct fabric_domain *fabric ); + + int fabric_new_comm ( struct fabric_domain *domain, struct fabric_comm *out_fabric_comm ); + + int fabric_get_addr( struct fabric_comm *fabric_comm, char * out_addr, size_t size_addr ); + int fabric_register_addr( struct fabric_comm *fabric_comm, char * addr_buf ); + int fabric_send ( struct fabric_comm *fabric, void * buffer, size_t size ); + int fabric_recv ( struct fabric_comm *fabric, void * buffer, size_t size ); + int fabric_close ( struct fabric_comm *fabric ); + int fabric_close_comm ( struct fabric_comm *fabric_comm ); + int fabric_destroy ( struct fabric_domain *domain ); + + /* ... Macros / Macros .................................................. */ + + #ifdef __cplusplus + } + #endif + +#endif diff --git a/include/base/socket.h b/include/base/socket.h index 88cd43f23..9b29489c8 100644 --- a/include/base/socket.h +++ b/include/base/socket.h @@ -48,7 +48,7 @@ int socket_send ( int socket, void * buffer, int size ); int socket_recv ( int socket, void * buffer, int size ); int socket_server_create ( int *out_socket ); - int socket_server_accept ( int socket, int *out_conection_socket ); + int socket_server_accept ( int socket, int *out_conection_socket, char *addr); int socket_client_connect ( char * srv_name, int *out_socket ); int socket_close ( int socket ); diff --git a/include/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.h b/include/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.h new file mode 100644 index 000000000..a7b6badc4 --- /dev/null +++ b/include/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.h @@ -0,0 +1,57 @@ + +/* + * Copyright 2020-2024 Felix Garcia Carballeira, Diego Camarmas Alonso, Alejandro Calderon Mateos, Dario Muñoz Muñoz + * + * This file is part of Expand. + * + * Expand is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Expand is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Expand. If not, see . + * + */ + + +#ifndef _NFI_FABRIC_SERVER_COMM_H_ +#define _NFI_FABRIC_SERVER_COMM_H_ + + #ifdef __cplusplus + extern "C" { + #endif + + /* ... Include / Inclusion ........................................... */ + + #include "all_system.h" + #include "base/utils.h" + #include "base/ns.h" + #include "socket.h" + #include "fabric.h" + #include "xpn_server/xpn_server_ops.h" + + + /* ... Const / Const ................................................. */ + + + /* ... Data structures / Estructuras de datos ........................ */ + + + /* ... Functions / Funciones ......................................... */ + + int nfi_fabric_server_comm_connect ( struct fabric_domain *fabric_domain, char * srv_name, char * port_name, struct fabric_comm *out_fabric_comm ); + + int nfi_fabric_server_comm_disconnect ( struct fabric_comm *fabric_comm ); + /* ................................................................... */ + + #ifdef __cplusplus + } + #endif + +#endif diff --git a/include/xpn_client/nfi/nfi_xpn_server/nfi_xpn_server.h b/include/xpn_client/nfi/nfi_xpn_server/nfi_xpn_server.h index 4c234c784..1125008f8 100644 --- a/include/xpn_client/nfi/nfi_xpn_server/nfi_xpn_server.h +++ b/include/xpn_client/nfi/nfi_xpn_server/nfi_xpn_server.h @@ -38,11 +38,13 @@ #include "nfi.h" #include "xpn_server/xpn_server_conf.h" #include "xpn_server/xpn_server_ops.h" - #include "nfi.h" #include "nfi_worker.h" #ifdef ENABLE_MPI_SERVER #include "mpi.h" #endif + #ifdef ENABLE_FABRIC_SERVER + #include "base/fabric.h" + #endif /* ... Const / Const ................................................. */ @@ -69,6 +71,9 @@ #ifdef ENABLE_SCK_SERVER int server_socket; // For sck_server #endif + #ifdef ENABLE_FABRIC_SERVER + struct fabric_comm fabric_comm; // For fabric_server + #endif // server port char port_name [XPN_SERVER_MAX_PORT_NAME]; char srv_name [XPN_SERVER_MAX_PORT_NAME]; diff --git a/include/xpn_server/fabric_server/fabric_server_comm.h b/include/xpn_server/fabric_server/fabric_server_comm.h new file mode 100644 index 000000000..554e3ff89 --- /dev/null +++ b/include/xpn_server/fabric_server/fabric_server_comm.h @@ -0,0 +1,61 @@ + +/* + * Copyright 2020-2024 Felix Garcia Carballeira, Diego Camarmas Alonso, Alejandro Calderon Mateos, Dario Muñoz Muñoz + * + * This file is part of Expand. + * + * Expand is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Expand is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Expand. If not, see . + * + */ + + +#ifndef _FABRIC_SERVER_COMM_H_ +#define _FABRIC_SERVER_COMM_H_ + + #ifdef __cplusplus + extern "C" { + #endif + + /* ... Include / Inclusion ........................................... */ + + #include "all_system.h" + #include "base/utils.h" + #include "base/time_misc.h" + #include "base/fabric.h" + + /* ... Const / Const ................................................. */ + + + /* ... Data structures / Estructuras de datos ........................ */ + + + /* ... Functions / Funciones ......................................... */ + + // int fabric_server_comm_init ( int argc, char *argv[], int thread_mode, char * port_name ); + // int fabric_server_comm_destroy ( char * port_name ); + + int fabric_server_comm_accept ( struct fabric_domain *fabric_domain, char * dest_addr, char * port_name, struct fabric_comm **new_sd ); + int fabric_server_comm_disconnect ( struct fabric_comm *fd ); + + // ssize_t fabric_server_comm_read_operation ( struct fabric_comm *fd, int *op, int *rank_client_id, int *tag_client_id ); + // ssize_t fabric_server_comm_write_data ( struct fabric_comm *fd, char *data, ssize_t size, int rank_client_id, int tag_client_id ); + // ssize_t fabric_server_comm_read_data ( struct fabric_comm *fd, char *data, ssize_t size, int rank_client_id, int tag_client_id ); + + /* ................................................................... */ + + #ifdef __cplusplus + } + #endif + +#endif diff --git a/include/xpn_server/xpn_server_comm.h b/include/xpn_server/xpn_server_comm.h index aab8c0b76..77672f442 100644 --- a/include/xpn_server/xpn_server_comm.h +++ b/include/xpn_server/xpn_server_comm.h @@ -46,7 +46,7 @@ int xpn_server_comm_init ( xpn_server_param_st *params ); int xpn_server_comm_destroy ( xpn_server_param_st *params ); - int xpn_server_comm_accept ( xpn_server_param_st *params, void **new_sd ); + int xpn_server_comm_accept ( xpn_server_param_st *params, char *addr, char *port, void **new_sd ); int xpn_server_comm_disconnect ( xpn_server_param_st *params, void *sd ); ssize_t xpn_server_comm_read_operation ( xpn_server_param_st *params, void *sd, int *op, int *rank_client_id, int *tag_client_id ); diff --git a/include/xpn_server/xpn_server_conf.h b/include/xpn_server/xpn_server_conf.h index 380f4caff..3e2f91ac3 100644 --- a/include/xpn_server/xpn_server_conf.h +++ b/include/xpn_server/xpn_server_conf.h @@ -39,6 +39,7 @@ #define XPN_SERVER_TYPE_MPI 0 #define XPN_SERVER_TYPE_SCK 1 + #define XPN_SERVER_TYPE_FABRIC 2 /* MAX_BUFFER_SIZE */ diff --git a/include/xpn_server/xpn_server_params.h b/include/xpn_server/xpn_server_params.h index 44eb9583b..68af08d5e 100644 --- a/include/xpn_server/xpn_server_params.h +++ b/include/xpn_server/xpn_server_params.h @@ -37,6 +37,9 @@ #include "base/utils.h" #include "base/workers.h" #include "xpn_server_conf.h" + #ifdef ENABLE_FABRIC_SERVER + #include "base/fabric.h" + #endif /* ... Const / Const ................................................. */ @@ -62,6 +65,10 @@ int server_socket; // For sck_server #endif + #ifdef ENABLE_FABRIC_SERVER + struct fabric_domain fabric_domain; // For fabric_server + #endif + int await_stop; // server arguments diff --git a/scripts/compile/build-me-xpn.sh b/scripts/compile/build-me-xpn.sh index 9c4da6756..1cdc90d9b 100755 --- a/scripts/compile/build-me-xpn.sh +++ b/scripts/compile/build-me-xpn.sh @@ -24,9 +24,10 @@ function usage { echo "" echo " Usage:" - echo " $0 -m -i " + echo " $0 -m -l -i " echo " Where:" echo " * = full path where the mpicc is installed." + echo " * = full path where the libfabric is installed." echo " * = full path where XPN is going to be installed." echo "" } @@ -43,12 +44,14 @@ echo " Begin." ## base path BASE_PATH="$(dirname "$(readlink -f "$0")")" - +LIBFABRIC_PATH="" ## get arguments -while getopts "m:i:" opt; do +while getopts "m:l:i:" opt; do case "${opt}" in m) MPICC_PATH=${OPTARG} ;; + l) LIBFABRIC_PATH=${OPTARG} + ;; i) INSTALL_PATH=${OPTARG} ;; *) echo " Error:" @@ -75,6 +78,11 @@ fi # 2) XPN and dependencies... -"$BASE_PATH"/software/xpn.sh -m "$MPICC_PATH" -i "$INSTALL_PATH" -s "$BASE_PATH"/../../../xpn +if [ "$LIBFABRIC_PATH" == "" ]; then + "$BASE_PATH"/software/xpn.sh -m "$MPICC_PATH" -i "$INSTALL_PATH" -s "$BASE_PATH"/../../../xpn +else + "$BASE_PATH"/software/xpn.sh -m "$MPICC_PATH" -l "$LIBFABRIC_PATH" -i "$INSTALL_PATH" -s "$BASE_PATH"/../../../xpn +fi + echo " End." diff --git a/scripts/compile/software/xpn.sh b/scripts/compile/software/xpn.sh index b3629cb0f..8359fd48a 100755 --- a/scripts/compile/software/xpn.sh +++ b/scripts/compile/software/xpn.sh @@ -24,20 +24,23 @@ function usage { echo "" echo " Usage:" - echo " $0 -m -i -s " + echo " $0 -m -l -i -s " echo " Where:" echo " * = full path where the mpicc is installed." + echo " * = full path where the libfabric is installed." echo " * = full path where XPN is going to be installed." echo " * = full path to the source code XPN." echo "" } - +LIBFABRIC_PATH="" ## get arguments -while getopts "m:i:s:" opt; do +while getopts "m:l:i:s:" opt; do case "${opt}" in m) MPICC_PATH=${OPTARG} ;; + l) LIBFABRIC_PATH=${OPTARG} + ;; i) INSTALL_PATH=${OPTARG} ;; s) SRC_PATH=${OPTARG} @@ -86,7 +89,11 @@ echo " * XPN: compiling and installing..." pushd . cd "$SRC_PATH" ACLOCAL_FLAGS="-I /usr/share/aclocal/" autoreconf -v -i -s -W all -./configure --prefix="${INSTALL_PATH}/xpn" --enable-sck_server --enable-mpi_server="${MPICC_PATH}" +if [ "$LIBFABRIC_PATH" == "" ]; then + ./configure --prefix="${INSTALL_PATH}/xpn" --enable-sck_server --enable-mpi_server="${MPICC_PATH}" +else + ./configure --prefix="${INSTALL_PATH}/xpn" --enable-sck_server --enable-mpi_server="${MPICC_PATH}" --enable-fabric_server="${LIBFABRIC_PATH}" +fi make clean make -j 8 #doxygen doc/doxygen-XPN.cfg diff --git a/scripts/execute/xpn.sh b/scripts/execute/xpn.sh index 7657961b9..acf81eaf9 100755 --- a/scripts/execute/xpn.sh +++ b/scripts/execute/xpn.sh @@ -118,7 +118,7 @@ start_xpn_servers() { -hostfile "${HOSTFILE}" \ mkdir -p ${XPN_STORAGE_PATH} - if [[ ${SERVER_TYPE} == "sck" ]]; then + if [[ ${SERVER_TYPE} == "sck" || ${SERVER_TYPE} == "fabric" ]]; then mpiexec -np "${NODE_NUM}" \ -hostfile "${HOSTFILE}" \ "${BASE_DIR}"/../../src/xpn_server/xpn_server -s ${SERVER_TYPE} -t pool "${ARGS}" & diff --git a/src/base/Makefile.am b/src/base/Makefile.am index c7de020e7..5bdc8bca4 100644 --- a/src/base/Makefile.am +++ b/src/base/Makefile.am @@ -43,6 +43,11 @@ BASE_SOURCE= @top_srcdir@/src/base/darray.c \ @top_srcdir@/src/base/utils.c +if ENABLE_FABRIC_SERVER +BASE_HEADER+= @top_srcdir@/include/base/fabric.h +BASE_SOURCE+= @top_srcdir@/src/base/fabric.c +endif + ############# # LIBRARIES # ############# diff --git a/src/base/fabric.c b/src/base/fabric.c new file mode 100644 index 000000000..8c6a6fb7b --- /dev/null +++ b/src/base/fabric.c @@ -0,0 +1,454 @@ + +/* + * Copyright 2020-2024 Felix Garcia Carballeira, Diego Camarmas Alonso, Alejandro Calderon Mateos, Luis Miguel Sanchez Garcia, Borja Bergua Guerra, Dario Muñoz Muñoz + * + * This file is part of Expand. + * + * Expand is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Expand is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Expand. If not, see . + * + */ + + +/* ... Include / Inclusion ........................................... */ + +#include "base/fabric.h" + + +/* ... Const / Const ................................................. */ + +// Because we use FI_THREAD_SAFE the threads need a mutex in inicialization and destruction +// But because we asociate a thread per endpoint the send/recv does not need the mutex +pthread_mutex_t fabric_init_mutex = PTHREAD_MUTEX_INITIALIZER; + +/* ... Global variables / Variables globales ........................ */ + + +/* ... Functions / Funciones ......................................... */ + +int set_hints( struct fi_info * hints) +{ + hints = fi_allocinfo(); + if (!hints) + return -FI_ENOMEM; + + /* + * Request FI_EP_RDM (reliable datagram) endpoint which will allow us + * to reliably send messages to peers without having to + * listen/connect/accept. + */ + hints->ep_attr->type = FI_EP_RDM; + + /* + * Request basic messaging capabilities from the provider (no tag + * matching, no RMA, no atomic operations) + */ + hints->caps = FI_MSG; + + /* + * Default to FI_DELIVERY_COMPLETE which will make sure completions do + * not get generated until our message arrives at the destination. + * Otherwise, the client might get a completion and exit before the + * server receives the message. This is to make the test simpler. + */ + hints->tx_attr->op_flags = FI_DELIVERY_COMPLETE; + + /* + * Set the mode bit to 0. Mode bits are used to convey requirements + * that an application must adhere to when using the fabric interfaces. + * Modes specify optimal ways of accessing the reported endpoint or + * domain. On input to fi_getinfo, applications set the mode bits that + * they support. + */ + hints->mode = 0; + + /* + * Set mr_mode to 0. mr_mode is used to specify the type of memory + * registration capabilities the application requires. In this example + * we are not using memory registration so this bit will be set to 0. + */ + hints->domain_attr->mr_mode = 0; + + hints->domain_attr->threading = FI_THREAD_SAFE; + + /* Done setting hints */ + + return 0; +} + +int fabric_try_free_port() +{ + int server_socket; + struct sockaddr_in server_addr; + socklen_t addr_len = sizeof(server_addr); + + // Crear el socket + server_socket = socket(AF_INET, SOCK_STREAM, 0); + if (server_socket < 0) { + perror("Error al crear el socket"); + return -1; + } + + // Configurar la dirección y el puerto del servidor + server_addr.sin_family = AF_INET; // IPv4 + server_addr.sin_addr.s_addr = INADDR_ANY; // Aceptar conexiones de cualquier IP + server_addr.sin_port = htons(0); // Puerto a escuchar (convertir a network byte order) + + // Asignar la dirección al socket + if (bind(server_socket, (struct sockaddr*)&server_addr, sizeof(server_addr)) < 0) { + perror("Error en bind"); + close(server_socket); + return -1; + } + + if (getsockname(server_socket, (struct sockaddr*)&server_addr, &addr_len) == -1) { + perror("Error en getsockname"); + close(server_socket); + return -1; + } + + debug_info("[FABRIC] [fabric_try_free_port] Socket free %d\n", ntohs(server_addr.sin_port)); + + return ntohs(server_addr.sin_port); +} + +int fabric_init ( struct fabric_domain *fabric ) +{ + int ret; + + debug_info("[FABRIC] [fabric_init] Start\n"); + + pthread_mutex_lock(&fabric_init_mutex); + /* + * The first libfabric call to happen for initialization is fi_getinfo + * which queries libfabric and returns any appropriate providers that + * fulfill the hints requirements. Any applicable providers will be + * returned as a list of fi_info structs (&info). Any info can be + * selected. In this test we select the first fi_info struct. Assuming + * all hints were set appropriately, the first fi_info should be most + * appropriate. The flag FI_SOURCE is set for the server to indicate + * that the address/port refer to source information. This is not set + * for the client because the fields refer to the server, not the + * caller (client). + */ + set_hints(fabric->hints); + + ret = fi_getinfo(fi_version(), NULL, NULL, 0, + fabric->hints, &fabric->info); + + debug_info("[FABRIC] [fabric_init] fi_getinfo = %d\n", ret); + if (ret) { + printf("fi_getinfo error (%d)\n", ret); + pthread_mutex_unlock(&fabric_init_mutex); + return ret; + } + + #ifdef DEBUG + debug_info("[FABRIC] [fabric_init] %s", fi_tostr(fabric->info, FI_TYPE_INFO)); + #endif + /* + * Initialize our fabric. The fabric network represents a collection of + * hardware and software resources that access a single physical or + * virtual network. All network ports on a system that can communicate + * with each other through their attached networks belong to the same + * fabric. + */ + + ret = fi_fabric(fabric->info->fabric_attr, &fabric->fabric, NULL); + debug_info("[FABRIC] [fabric_init] fi_fabric = %d\n", ret); + if (ret) { + printf("fi_fabric error (%d)\n", ret); + pthread_mutex_unlock(&fabric_init_mutex); + return ret; + } + + /* + * Initialize our domain (associated with our fabric). A domain defines + * the boundary for associating different resources together. + */ + + ret = fi_domain(fabric->fabric, fabric->info, &fabric->domain, NULL); + debug_info("[FABRIC] [fabric_init] fi_domain = %d\n", ret); + if (ret) { + printf("fi_domain error (%d)\n", ret); + pthread_mutex_unlock(&fabric_init_mutex); + return ret; + } + + pthread_mutex_unlock(&fabric_init_mutex); + return 0; +} + +int fabric_new_comm ( struct fabric_domain *domain, struct fabric_comm *out_fabric_comm ) +{ + struct fi_cq_attr cq_attr = {0}; + struct fi_av_attr av_attr = {0}; + int ret; + + debug_info("[FABRIC] [fabric_new_comm] Start\n"); + pthread_mutex_lock(&fabric_init_mutex); + + // First asing the domain to the fabric_comm + if (domain == NULL){ + pthread_mutex_unlock(&fabric_init_mutex); + return -1; + } + out_fabric_comm->fabric_domain = domain; + + /* + * Initialize our endpoint. Endpoints are transport level communication + * portals which are used to initiate and drive communication. There + * are three main types of endpoints: + * FI_EP_MSG - connected, reliable + * FI_EP_RDM - unconnected, reliable + * FI_EP_DGRAM - unconnected, unreliable + * The type of endpoint will be requested in hints/fi_getinfo. + * Different providers support different types of endpoints. + */ + + ret = fi_endpoint(out_fabric_comm->fabric_domain->domain, out_fabric_comm->fabric_domain->info, &out_fabric_comm->ep, NULL); + debug_info("[FABRIC] [fabric_new_comm] fi_endpoint = %d\n", ret); + if (ret) { + printf("fi_endpoint error (%d)\n", ret); + pthread_mutex_unlock(&fabric_init_mutex); + return ret; + } + + /* + * Initialize our completion queue. Completion queues are used to + * report events associated with data transfers. In this example, we + * use one CQ that tracks sends and receives, but often times there + * will be separate CQs for sends and receives. + */ + + cq_attr.size = 128; + cq_attr.format = FI_CQ_FORMAT_MSG; + cq_attr.wait_obj = FI_WAIT_UNSPEC; + ret = fi_cq_open(out_fabric_comm->fabric_domain->domain, &cq_attr, &out_fabric_comm->cq, NULL); + debug_info("[FABRIC] [fabric_new_comm] fi_cq_open = %d\n", ret); + if (ret) { + printf("fi_cq_open error (%d)\n", ret); + pthread_mutex_unlock(&fabric_init_mutex); + return ret; + } + + /* + * Bind our CQ to our endpoint to track any sends and receives that + * come in or out on that endpoint. A CQ can be bound to multiple + * endpoints but one EP can only have one send CQ and one receive CQ + * (which can be the same CQ). + */ + + ret = fi_ep_bind(out_fabric_comm->ep, &out_fabric_comm->cq->fid, FI_SEND | FI_RECV); + debug_info("[FABRIC] [fabric_new_comm] fi_ep_bind = %d\n", ret); + if (ret) { + printf("fi_ep_bind cq error (%d)\n", ret); + pthread_mutex_unlock(&fabric_init_mutex); + return ret; + } + + /* + * Initialize our address vector. Address vectors are used to map + * higher level addresses, which may be more natural for an application + * to use, into fabric specific addresses. An AV_TABLE av will map + * these addresses to indexed addresses, starting with fi_addr 0. These + * addresses are used in data transfer calls to specify which peer to + * send to/recv from. Address vectors are only used for FI_EP_RDM and + * FI_EP_DGRAM endpoints, allowing the application to avoid connection + * management. For FI_EP_MSG endpoints, the AV is replaced by the + * traditional listen/connect/accept steps. + */ + + av_attr.type = FI_AV_TABLE; + av_attr.count = 1; + ret = fi_av_open(out_fabric_comm->fabric_domain->domain, &av_attr, &out_fabric_comm->av, NULL); + debug_info("[FABRIC] [fabric_new_comm] fi_av_open = %d\n", ret); + if (ret) { + printf("fi_av_open error (%d)\n", ret); + pthread_mutex_unlock(&fabric_init_mutex); + return ret; + } + + /* + * Bind the AV to the EP. The EP can only send data to a peer in its + * AV. + */ + + ret = fi_ep_bind(out_fabric_comm->ep, &out_fabric_comm->av->fid, 0); + debug_info("[FABRIC] [fabric_new_comm] fi_ep_bind = %d\n", ret); + if (ret) { + printf("fi_ep_bind av error (%d)\n", ret); + pthread_mutex_unlock(&fabric_init_mutex); + return ret; + } + + /* + * Once we have all our resources initialized and ready to go, we can + * enable our EP in order to send/receive data. + */ + + ret = fi_enable(out_fabric_comm->ep); + debug_info("[FABRIC] [fabric_new_comm] fi_enable = %d\n", ret); + if (ret) { + printf("fi_enable error (%d)\n", ret); + pthread_mutex_unlock(&fabric_init_mutex); + return ret; + } + + pthread_mutex_unlock(&fabric_init_mutex); + return 0; +} + +int fabric_get_addr( struct fabric_comm *fabric_comm, char * out_addr, size_t size_addr ) +{ + int ret = -1; + ret = fi_getname(&fabric_comm->ep->fid, out_addr, &size_addr); + if (ret) { + printf("fi_getname error %d\n", ret); + return ret; + } + return ret; +} + +int fabric_register_addr( struct fabric_comm *fabric_comm, char * addr_buf ) +{ + int ret = -1; + ret = fi_av_insert(fabric_comm->av, addr_buf, 1, &fabric_comm->fi_addr, 0, NULL); + if (ret != 1) { + printf("av insert error\n"); + return -FI_ENOSYS; + } + return ret; +} + +int fabric_wait ( struct fabric_comm *fabric_comm ) +{ + struct fi_cq_err_entry comp; + int ret; + + debug_info("[FABRIC] [fabric_wait] Start\n"); + + ret = fi_cq_sreadfrom(fabric_comm->cq, &comp, 1, &fabric_comm->fi_addr, NULL, -1); + debug_info("[FABRIC] [fabric_wait] fi_cq_sread = %d\n", ret); + + if (ret < 0){ + printf("error reading cq (%d)\n", ret); + } + + return ret; +} + +int fabric_send ( struct fabric_comm *fabric_comm, void * buffer, size_t size ) +{ + int ret; + + debug_info("[FABRIC] [fabric_send] Start\n"); + do { + ret = fi_send(fabric_comm->ep, buffer, size, NULL, fabric_comm->fi_addr, buffer); + + if (ret == -FI_EAGAIN) + (void) fi_cq_read(fabric_comm->cq, NULL, 0); + } while (ret == -FI_EAGAIN); + + if (ret){ + printf("error posting send buffer (%d)\n", ret); + return -1; + } + ret = fabric_wait(fabric_comm); + if (ret < 0){ + printf("error waiting send buffer (%d)\n", ret); + return -1; + } + + debug_info("[FABRIC] [fabric_send] fi_send %ld\n", size); + return size; +} + +int fabric_recv ( struct fabric_comm *fabric_comm, void * buffer, size_t size ) +{ + int ret; + + debug_info("[FABRIC] [fabric_recv] Start\n"); + do { + ret = fi_recv(fabric_comm->ep, buffer, size, NULL, fabric_comm->fi_addr, buffer); + + if (ret == -FI_EAGAIN) + (void) fi_cq_read(fabric_comm->cq, NULL, 0); + } while (ret == -FI_EAGAIN); + + if (ret){ + printf("error posting recv buffer (%d)\n", ret); + return -1; + } + ret = fabric_wait(fabric_comm); + if (ret < 0){ + printf("error waiting recv buffer (%d)\n", ret); + return -1; + } + + debug_info("[FABRIC] [fabric_recv] fi_recv %ld\n", size); + return size; +} + +int fabric_close_comm ( struct fabric_comm *fabric_comm ) +{ + int ret; + debug_info("[FABRIC] [fabric_close_comm] Start\n"); + pthread_mutex_lock(&fabric_init_mutex); + debug_info("[FABRIC] [fabric_close_comm] Close endpoint\n"); + ret = fi_close(&fabric_comm->ep->fid); + if (ret) + printf("warning: error closing EP (%d)\n", ret); + + debug_info("[FABRIC] [fabric_close_comm] Close address vector\n"); + ret = fi_close(&fabric_comm->av->fid); + if (ret) + printf("warning: error closing AV (%d)\n", ret); + + debug_info("[FABRIC] [fabric_close_comm] Close completion queue\n"); + ret = fi_close(&fabric_comm->cq->fid); + if (ret) + printf("warning: error closing CQ (%d)\n", ret); + + pthread_mutex_unlock(&fabric_init_mutex); + return ret; +} + +int fabric_destroy ( struct fabric_domain *domain ) +{ + int ret; + + debug_info("[FABRIC] [fabric_destroy] Start\n"); + pthread_mutex_lock(&fabric_init_mutex); + + debug_info("[FABRIC] [fabric_close_comm] Close domain\n"); + ret = fi_close(&domain->domain->fid); + if (ret) + printf("warning: error closing domain (%d)\n", ret); + + debug_info("[FABRIC] [fabric_close_comm] Close fabric\n"); + ret = fi_close(&domain->fabric->fid); + if (ret) + printf("warning: error closing fabric (%d)\n", ret); + + debug_info("[FABRIC] [fabric_close_comm] Free hints \n"); + if (domain->hints) + fi_freeinfo(domain->info); + + debug_info("[FABRIC] [fabric_close_comm] Free info \n"); + if (domain->info) + fi_freeinfo(domain->info); + + pthread_mutex_unlock(&fabric_init_mutex); + return ret; +} diff --git a/src/base/socket.c b/src/base/socket.c index 030068e1c..72f874617 100644 --- a/src/base/socket.c +++ b/src/base/socket.c @@ -149,7 +149,7 @@ int socket_server_create ( int *out_socket ) return 0; } -int socket_server_accept ( int socket, int *out_conection_socket ) +int socket_server_accept ( int socket, int *out_conection_socket, char *addr) { struct sockaddr_in client_addr; socklen_t sock_size = sizeof(struct sockaddr_in); @@ -158,6 +158,12 @@ int socket_server_accept ( int socket, int *out_conection_socket ) printf("[SOCKET] [socket_accept_send] ERROR: socket accept\n"); return -1; } + + if (addr != NULL){ + char *aux_addr = inet_ntoa(client_addr.sin_addr); + strcpy(addr, aux_addr); + } + *out_conection_socket = new_socket; return 0; } diff --git a/src/bypass/Makefile.in b/src/bypass/Makefile.in index 9464b48fd..c2429ad72 100644 --- a/src/bypass/Makefile.in +++ b/src/bypass/Makefile.in @@ -8,7 +8,7 @@ FLAGS=-Wall -Wextra -fpic -std=c11 @CFLAGS@ INCLUDE=-I../../include -I../../include/base -I../../include/xpn_client -I../../include/bypass -DNOT_TO_USE_FCNTL_H -DNOT_TO_USE_STDLIB_H - LIBS=-L../../../xpn/src/xpn_client -L../../src/xpn_client -lxpn -lpthread -ldl + LIBS=-L../../../xpn/src/xpn_client -L../../src/xpn_client -lxpn -lpthread -ldl @LDFLAGS@ all: $(CC) $(FLAGS) $(INCLUDE) -c xpn_bypass.c diff --git a/src/xpn_client/Makefile.am b/src/xpn_client/Makefile.am index edda26de8..35eb35ed4 100644 --- a/src/xpn_client/Makefile.am +++ b/src/xpn_client/Makefile.am @@ -45,15 +45,18 @@ NFI_NFS3_HEADER= @top_srcdir@/include/xpn_client/nfi/nfi_nfs3/nfi_nfs3_err.h \ ### END OF NFI_NFS3_HEADER BLOCK. Do not remove this line. ### ### BEGIN OF NFI_XPN_SERVER_HEADER BLOCK. Do not remove this line. ### NFI_XPN_SERVER_HEADER= @top_srcdir@/include/xpn_client/nfi/nfi_xpn_server/nfi_xpn_server_comm.h \ - @top_srcdir@/include/xpn_client/nfi/nfi_xpn_server/nfi_xpn_server.h + @top_srcdir@/include/xpn_client/nfi/nfi_xpn_server/nfi_xpn_server.h \ + @top_srcdir@/include/xpn_server/xpn_server_conf.h ### END OF NFI_XPN_SERVER_HEADER BLOCK. Do not remove this line. ### ### BEGIN OF NFI_MPI_SERVER_HEADER BLOCK. Do not remove this line. ### NFI_MPI_SERVER_HEADER= @top_srcdir@/include/xpn_client/nfi/nfi_mpi_server/nfi_mpi_server_comm.h ### END OF NFI_MPI_SERVER_HEADER BLOCK. Do not remove this line. ### ### BEGIN OF NFI_SCK_SERVER_HEADER BLOCK. Do not remove this line. ### -NFI_SCK_SERVER_HEADER= @top_srcdir@/include/xpn_client/nfi/nfi_sck_server/nfi_sck_server_comm.h \ - @top_srcdir@/include/xpn_server/xpn_server_conf.h +NFI_SCK_SERVER_HEADER= @top_srcdir@/include/xpn_client/nfi/nfi_sck_server/nfi_sck_server_comm.h ### END OF NFI_SCK_SERVER_HEADER BLOCK. Do not remove this line. ### +### BEGIN OF NFI_SCK_SERVER_HEADER BLOCK. Do not remove this line. ### +NFI_FABRIC_SERVER_HEADER= @top_srcdir@/include/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.h +### END OF NFI_FABRIC_SERVER_HEADER BLOCK. Do not remove this line. ### ### BEGIN OF NFI_TCP_SERVER_HEADER BLOCK. Do not remove this line. ### NFI_TCP_SERVER_HEADER= @top_srcdir@/include/xpn_client/nfi/nfi_tcp_server/tcp_server_comm.h \ @top_srcdir@/include/xpn_client/nfi/nfi_tcp_server/tcp_server_conf.h \ @@ -90,6 +93,12 @@ if ENABLE_SCK_SERVER NFI_HEADER+=$(NFI_SCK_SERVER_HEADER) endif ### END OF ENABLE_SCK_SERVER_HEADER BLOCK. Do not remove this line. ### +### BEGIN OF ENABLE_FABRIC_SERVER_HEADER BLOCK. Do not remove this line. ### +if ENABLE_FABRIC_SERVER +NFI_HEADER+= $(NFI_FABRIC_SERVER_HEADER) +BASE_HEADER+= @top_srcdir@/include/base/fabric.h +endif +### END OF ENABLE_FABRIC_SERVER_HEADER BLOCK. Do not remove this line. ### ### BEGIN OF ENABLE_TCP_SERVER_HEADER BLOCK. Do not remove this line. ### if ENABLE_TCP_SERVER NFI_HEADER+=$(NFI_TCP_SERVER_HEADER) @@ -169,6 +178,9 @@ NFI_MPI_SERVER_OBJECTS= @top_srcdir@/src/xpn_client/nfi/nfi_mpi_server/nfi_mpi_s ### BEGIN OF NFI_SCK_SERVER_OBJECTS BLOCK. Do not remove this line. ### NFI_SCK_SERVER_OBJECTS= @top_srcdir@/src/xpn_client/nfi/nfi_sck_server/nfi_sck_server_comm.c ### END OF NFI_SCK_SERVER_OBJECTS BLOCK. Do not remove this line. ### +### BEGIN OF NFI_FABRIC_SERVER_OBJECTS BLOCK. Do not remove this line. ### +NFI_FABRIC_SERVER_OBJECTS= @top_srcdir@/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.c +### END OF NFI_FABRIC_SERVER_OBJECTS BLOCK. Do not remove this line. ### ### BEGIN OF NFI_TCP_SERVER_OBJECTS BLOCK. Do not remove this line. ### NFI_TCP_SERVER_OBJECTS= @top_srcdir@/src/xpn_client/nfi/nfi_tcp_server/nfi_tcp_server.c \ @top_srcdir@/src/xpn_client/nfi/nfi_tcp_server/tcp_server_comm.c \ @@ -199,6 +211,12 @@ if ENABLE_SCK_SERVER NFI_OBJECTS+=$(NFI_SCK_SERVER_OBJECTS) endif ### END OF ENABLE_SCK_SERVER_OBJECTS BLOCK. Do not remove this line. ### +### BEGIN OF ENABLE_FABRIC_SERVER_OBJECTS BLOCK. Do not remove this line. ### +if ENABLE_FABRIC_SERVER +NFI_OBJECTS+=$(NFI_FABRIC_SERVER_OBJECTS) +BASE_OBJECTS+= @top_srcdir@/src/base/fabric.c +endif +### END OF ENABLE_FABRIC_SERVER_OBJECTS BLOCK. Do not remove this line. ### ### BEGIN OF ENABLE_TCP_SERVER_OBJECTS BLOCK. Do not remove this line. ### if ENABLE_TCP_SERVER NFI_OBJECTS+=$(NFI_TCP_SERVER_OBJECTS) diff --git a/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.c b/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.c new file mode 100644 index 000000000..2361e22a0 --- /dev/null +++ b/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.c @@ -0,0 +1,143 @@ +/* + * Copyright 2020-2024 Felix Garcia Carballeira, Diego Camarmas Alonso, Alejandro Calderon Mateos, Dario Muñoz Muñoz + * + * This file is part of Expand. + * + * Expand is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Expand is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Expand. If not, see . + * + */ + + +/* ... Include / Inclusion ........................................... */ + +#include "nfi_fabric_server_comm.h" +#include "socket.h" +#include "fabric.h" +#include "xpn_server/xpn_server_conf.h" + + +/* ... Const / Const ................................................. */ + + +/* ... Global variables / Variables globales ........................ */ + + +/* ... Functions / Funciones ......................................... */ + +int nfi_fabric_server_comm_connect ( struct fabric_domain *fabric_domain, char * srv_name, char * port_name, struct fabric_comm *out_fabric_comm ) +{ + int ret; + int connection_socket; + char addr_buf[64]; + size_t addr_buf_size = 64, addr_buf_size_aux = 64; + + debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_connect] >> Begin\n"); + + ret = fabric_new_comm(fabric_domain, out_fabric_comm); + if (ret < 0){ + printf("Error: fabric_new_comm %d\n", ret); + return ret; + } + + // Lookup port name + ret = socket_client_connect(srv_name, &connection_socket); + if (ret < 0) + { + debug_error("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_connect] ERROR: socket connect\n"); + return -1; + } + int buffer = SOCKET_ACCEPT_CODE; + ret = socket_send(connection_socket, &buffer, sizeof(buffer)); + if (ret < 0) + { + debug_error("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_connect] ERROR: socket send\n"); + socket_close(connection_socket); + return -1; + } + ret = socket_recv(connection_socket, port_name, XPN_SERVER_MAX_PORT_NAME); + if (ret < 0) + { + debug_error("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_connect] ERROR: socket read\n"); + socket_close(connection_socket); + return -1; + } + + ret = socket_recv(connection_socket, addr_buf, addr_buf_size); + if (ret < 0) + { + debug_error("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_connect] ERROR: socket read\n"); + socket_close(connection_socket); + return -1; + } + + ret = fabric_register_addr(out_fabric_comm, addr_buf); + + ret = fabric_get_addr(out_fabric_comm, addr_buf, addr_buf_size_aux); + if (ret < 0){ + printf("Error: fabric_get_addr\n"); + return ret; + } + + ret = socket_send(connection_socket, addr_buf, addr_buf_size); + if (ret < 0) + { + debug_error("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_connect] ERROR: socket send\n"); + socket_close(connection_socket); + return -1; + } + + socket_close(connection_socket); + + if (ret < 0) { + printf("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_connect] ERROR: Lookup %s Port %s\n", srv_name, port_name); + return -1; + } + + debug_info("[NFI_FABRIC_SERVER_COMM] ----SERVER = %s PORT = %s\n", srv_name, port_name); + + + debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_connect] << End\n"); + + return ret; +} + +int nfi_fabric_server_comm_disconnect(struct fabric_comm *fabric_comm) +{ + int ret; + int code = XPN_SERVER_DISCONNECT; + + debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_disconnect] >> Begin\n"); + + debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_disconnect] Send disconnect message\n"); + ret = fabric_send(fabric_comm, &code, sizeof(code)); + if (ret < 0) { + printf("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_disconnect] ERROR: nfi_fabric_server_comm_write_operation fails\n"); + return ret; + } + + // Disconnect + debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_disconnect] Disconnect\n"); + + ret = fabric_close_comm(fabric_comm); + if (ret < 0) { + printf("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_disconnect] ERROR: MPI_Comm_disconnect fails\n"); + return ret; + } + + debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_disconnect] << End\n"); + + // Return OK + return ret; +} +/* ................................................................... */ diff --git a/src/xpn_client/nfi/nfi_xpn_server/nfi_xpn_server.c b/src/xpn_client/nfi/nfi_xpn_server/nfi_xpn_server.c index 1f2d61b7b..ad13b80e7 100644 --- a/src/xpn_client/nfi/nfi_xpn_server/nfi_xpn_server.c +++ b/src/xpn_client/nfi/nfi_xpn_server/nfi_xpn_server.c @@ -19,6 +19,7 @@ * */ + /* ... Include / Inclusion ........................................... */ #include "nfi_xpn_server.h" @@ -373,7 +374,7 @@ int nfi_xpn_server_init ( char *url, struct nfi_server *serv, int server_type ) debug_info("[SERV_ID=%d] [NFI_XPN] [nfi_xpn_server_init] Locality enable: %d\n", serv->id, server_aux->xpn_locality); // Initialize MPI Client communication side... - debug_info("[SERV_ID=%d] [NFI_XPN] [nfi_xpn_server_init] Initialize MPI Client communication side\n", serv->id); + debug_info("[SERV_ID=%d] [NFI_XPN] [nfi_xpn_server_init] Initialize Client communication side\n", serv->id); ret = nfi_xpn_server_comm_init(server_aux); if (ret < 0) diff --git a/src/xpn_client/nfi/nfi_xpn_server/nfi_xpn_server_comm.c b/src/xpn_client/nfi/nfi_xpn_server/nfi_xpn_server_comm.c index 603a508c0..e85383dc3 100644 --- a/src/xpn_client/nfi/nfi_xpn_server/nfi_xpn_server_comm.c +++ b/src/xpn_client/nfi/nfi_xpn_server/nfi_xpn_server_comm.c @@ -30,12 +30,19 @@ #ifdef ENABLE_SCK_SERVER #include "nfi_sck_server_comm.h" #endif - +#ifdef ENABLE_FABRIC_SERVER +#include "nfi_fabric_server_comm.h" +#include "fabric.h" +#endif /* ... Const / Const ................................................. */ /* ... Global variables / Variables globales ........................ */ +#ifdef ENABLE_FABRIC_SERVER +int fabric_initialiced = 0; +struct fabric_domain fabric_domain; +#endif /* ... Functions / Funciones ......................................... */ @@ -56,6 +63,16 @@ int nfi_xpn_server_comm_init ( struct nfi_xpn_server *params ) ret = 0; break; #endif + #ifdef ENABLE_FABRIC_SERVER + case XPN_SERVER_TYPE_FABRIC: + if (fabric_initialiced == 0){ + ret = fabric_init( &fabric_domain ); + fabric_initialiced = 1; + }else{ + ret = 0; + } + break; + #endif default: printf("[NFI_XPN_SERVER] [nfi_xpn_server_comm_init] server_type '%d' not recognized\n",params->server_type); @@ -83,6 +100,16 @@ int nfi_xpn_server_comm_destroy ( struct nfi_xpn_server *params ) ret = 0; break; #endif + #ifdef ENABLE_FABRIC_SERVER + case XPN_SERVER_TYPE_FABRIC: + if (fabric_initialiced == 1){ + ret = fabric_destroy( &fabric_domain ); + fabric_initialiced = 0; + }else{ + ret = 0; + } + break; + #endif default: printf("[NFI_XPN_SERVER] [nfi_xpn_server_comm_destroy] server_type '%d' not recognized\n",params->server_type); @@ -110,6 +137,11 @@ int nfi_xpn_server_comm_connect ( struct nfi_xpn_server *params ) ret = nfi_sck_server_comm_connect(params->srv_name, params->port_name, ¶ms->server_socket); break; #endif + #ifdef ENABLE_FABRIC_SERVER + case XPN_SERVER_TYPE_FABRIC: + ret = nfi_fabric_server_comm_connect(&fabric_domain, params->srv_name, params->port_name, ¶ms->fabric_comm); + break; + #endif default: printf("[NFI_XPN_SERVER] [nfi_xpn_server_comm_connect] server_type '%d' not recognized\n",params->server_type); @@ -137,6 +169,11 @@ int nfi_xpn_server_comm_disconnect ( struct nfi_xpn_server *params ) ret = nfi_sck_server_comm_disconnect(params->server_socket); break; #endif + #ifdef ENABLE_FABRIC_SERVER + case XPN_SERVER_TYPE_FABRIC: + ret = nfi_fabric_server_comm_disconnect(¶ms->fabric_comm); + break; + #endif default: printf("[NFI_XPN_SERVER] [nfi_xpn_server_comm_disconnect] server_type '%d' not recognized\n",params->server_type); @@ -164,6 +201,11 @@ int nfi_xpn_server_comm_write_operation ( struct nfi_xpn_server *params, int op) ret = socket_send(params->server_socket, &op, sizeof(op)); break; #endif + #ifdef ENABLE_FABRIC_SERVER + case XPN_SERVER_TYPE_FABRIC: + ret = fabric_send(¶ms->fabric_comm, &op, sizeof(op)); + break; + #endif default: printf("[NFI_XPN_SERVER] [nfi_xpn_server_comm_write_operation] server_type '%d' not recognized\n",params->server_type); @@ -191,6 +233,11 @@ ssize_t nfi_xpn_server_comm_write_data ( struct nfi_xpn_server *params, char *da ret = socket_send(params->server_socket, data, size); break; #endif + #ifdef ENABLE_FABRIC_SERVER + case XPN_SERVER_TYPE_FABRIC: + ret = fabric_send(¶ms->fabric_comm, data, size); + break; + #endif default: printf("[NFI_XPN_SERVER] [nfi_xpn_server_comm_write_data] server_type '%d' not recognized\n",params->server_type); @@ -218,6 +265,11 @@ ssize_t nfi_xpn_server_comm_read_data ( struct nfi_xpn_server *params, char *dat ret = socket_recv(params->server_socket, data, size); break; #endif + #ifdef ENABLE_FABRIC_SERVER + case XPN_SERVER_TYPE_FABRIC: + ret = fabric_recv(¶ms->fabric_comm, data, size); + break; + #endif default: printf("[NFI_XPN_SERVER] [nfi_xpn_server_comm_read_data] server_type '%d' not recognized\n",params->server_type); diff --git a/src/xpn_client/xpn/xpn_simple/policy/xpn_policy_init.c b/src/xpn_client/xpn/xpn_simple/policy/xpn_policy_init.c index 6ee9a7631..8858847b8 100644 --- a/src/xpn_client/xpn/xpn_simple/policy/xpn_policy_init.c +++ b/src/xpn_client/xpn/xpn_simple/policy/xpn_policy_init.c @@ -313,6 +313,16 @@ int XpnInitServer(struct conf_file_data *conf_data, struct xpn_partition * part, } #endif + #ifdef ENABLE_FABRIC_SERVER + else if (strcmp(prt, "fabric_server") == 0) { + ret = nfi_xpn_server_init(url_buf, serv, XPN_SERVER_TYPE_FABRIC); + if (ret < 0) { + errno = ESRCH; + return -1; + } + } + #endif + #ifdef ENABLE_NFS else if ((strcmp(prt, "nfs") == 0) || (strcmp(prt, "nfs2") == 0)) { //printf("[XPN]nfi_nfs_init: %s\n",url); diff --git a/src/xpn_server/Makefile.am b/src/xpn_server/Makefile.am index 0ef0e515a..901079648 100644 --- a/src/xpn_server/Makefile.am +++ b/src/xpn_server/Makefile.am @@ -15,6 +15,7 @@ XPN_SERVER_HEADER= @top_srcdir@/include/xpn_server/xpn_server_params.h \ @top_srcdir@/include/xpn_server/xpn_server_comm.h MPI_SERVER_HEADER= @top_srcdir@/include/xpn_server/mpi_server/mpi_server_comm.h SCK_SERVER_HEADER= @top_srcdir@/include/xpn_server/sck_server/sck_server_comm.h +FABRIC_SERVER_HEADER= @top_srcdir@/include/xpn_server/fabric_server/fabric_server_comm.h SERVER_HEADER=$(XPN_SERVER_HEADER) SERVER_HEADER+=$(COMMON_HEADER) @@ -28,6 +29,10 @@ if ENABLE_SCK_SERVER SERVER_HEADER+=$(SCK_SERVER_HEADER) endif +if ENABLE_FABRIC_SERVER +SERVER_HEADER+=$(FABRIC_SERVER_HEADER) +endif + ########### # OBJECTS # @@ -58,6 +63,7 @@ XPN_SERVER_OBJECTS= @top_srcdir@/src/xpn_server/xpn_server.c \ MPI_SERVER_OBJECTS= @top_srcdir@/src/xpn_server/mpi_server/mpi_server_comm.c SCK_SERVER_OBJECTS= @top_srcdir@/src/xpn_server/sck_server/sck_server_comm.c +FABRIC_SERVER_OBJECTS= @top_srcdir@/src/xpn_server/fabric_server/fabric_server_comm.c SERVER_OBJECTS=$(XPN_SERVER_OBJECTS) SERVER_OBJECTS+=$(BASE_OBJECTS) @@ -70,6 +76,10 @@ if ENABLE_SCK_SERVER SERVER_OBJECTS+=$(SCK_SERVER_OBJECTS) endif +if ENABLE_FABRIC_SERVER +SERVER_OBJECTS+=$(FABRIC_SERVER_OBJECTS) +BASE_OBJECTS+= @top_srcdir@/src/base/fabric.c +endif ############ diff --git a/src/xpn_server/fabric_server/fabric_server_comm.c b/src/xpn_server/fabric_server/fabric_server_comm.c new file mode 100644 index 000000000..2898f057f --- /dev/null +++ b/src/xpn_server/fabric_server/fabric_server_comm.c @@ -0,0 +1,182 @@ + +/* + * Copyright 2020-2024 Felix Garcia Carballeira, Diego Camarmas Alonso, Alejandro Calderon Mateos, Dario Muñoz Muñoz + * + * This file is part of Expand. + * + * Expand is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Expand is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Expand. If not, see . + * + */ + + +/* ... Include / Inclusion ........................................... */ + +#include "fabric_server_comm.h" + + +/* ... Const / Const ................................................. */ + + +/* ... Global variables / Variables globales ........................ */ + + +/* ... Functions / Funciones ......................................... */ + +// init, destroy + +// accept, disconnect +int fabric_server_comm_accept ( struct fabric_domain *fabric_domain, char *dest_addr, char *port_name, struct fabric_comm **new_sd ) +{ + int ret; + + *new_sd = malloc(sizeof(struct fabric_comm)); + if (*new_sd == NULL) { + printf("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_accept] ERROR: Memory allocation\n", 0); + return -1; + } + + debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_accept] >> Begin\n", 0); + + // Accept + debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_accept] Accept %s %s\n", 0, dest_addr, port_name); + + ret = fabric_new_comm(fabric_domain, *new_sd); + if (ret < 0) + { + printf("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_accept] ERROR: fabric_init fails\n", 0); + return -1; + } + + debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_accept] << End %ld\n", 0, **new_sd); + + return 0; +} + +int fabric_server_comm_disconnect ( struct fabric_comm *fd ) +{ + int ret; + + debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_disconnect] >> Begin\n", 0); + + if (fd == NULL) + { + printf("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_disconnect] ERROR: The fabric_comm is NULL\n", 0); + return -1; + } + + // Disconnect + debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_disconnect] Disconnect\n", 0); + + ret = fabric_close_comm(fd); + if (ret < 0) + { + printf("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_disconnect] ERROR: MPI_Comm_disconnect fails\n", 0); + return -1; + } + free(fd); + debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_disconnect] << End\n", 0); + + // Return OK + return 0; +} + +// ssize_t fabric_server_comm_read_operation ( struct fabric_comm *fd, int *op, int *rank_client_id, int *tag_client_id ) +// { +// int ret; +// int msg[2]; + +// debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_read_operation] >> Begin\n", 0); + +// // Get message +// debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_read_operation] Read operation %p\n", 0, fd); + +// ret = fabric_recv(fd, msg, sizeof(msg)); +// if (ret < 0) { +// debug_warning("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_read_operation] ERROR: fabric_recv fails\n", 0); +// } + +// *rank_client_id = 0; +// *tag_client_id = msg[0]; +// *op = msg[1]; + +// debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_read_operation] fabric_recv (SOURCE %d, TAG %d, OP %d, ERROR %d)\n", 0, *rank_client_id, *rank_client_id, *op, ret); +// debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_read_operation] << End\n", 0); + +// // Return OK +// return 0; +// } + + +// ssize_t fabric_server_comm_write_data ( struct fabric_comm *fd, char *data, ssize_t size, int rank_client_id, int tag_client_id ) +// { +// int ret; + +// debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_write_data] >> Begin\n", 0); + +// if (size == 0) { +// return 0; +// } +// if (size < 0) +// { +// printf("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_write_data] ERROR: size < 0\n", 0); +// return -1; +// } + +// // Send message +// debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_write_data] Write data tag %d\n", 0, tag_client_id); + +// ret = fabric_send(fd, data, size); +// if (ret < 0) { +// debug_warning("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_write_data] ERROR: fabric_send fails\n", 0); +// } + +// debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_write_data] << End\n", 0); + +// // Return bytes written +// return size; +// } + +// ssize_t fabric_server_comm_read_data ( struct fabric_comm *fd, char *data, ssize_t size, int rank_client_id, int tag_client_id ) +// { +// int ret; + +// debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_read_data] >> Begin\n", 0); + +// if (size == 0) { +// return 0; +// } +// if (size < 0) +// { +// printf("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_read_data] ERROR: size < 0\n", 0); +// return -1; +// } + +// // Get message +// debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_read_data] Read data tag %d\n", 0, tag_client_id); + +// ret = fabric_recv(fd, data, size); +// if (ret < 0) { +// debug_warning("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_read_data] ERROR: fabric_recv fails\n", 0); +// } + +// debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_read_data] fabric_recv (ERROR %d)\n", 0, ret); +// debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_read_data] << End\n", 0); + +// // Return bytes read +// return size; +// } + + +/* ................................................................... */ + diff --git a/src/xpn_server/xpn_server.c b/src/xpn_server/xpn_server.c index 8947cbea9..eaefa67a2 100644 --- a/src/xpn_server/xpn_server.c +++ b/src/xpn_server/xpn_server.c @@ -19,6 +19,7 @@ * */ + /* ... Include / Inclusion ........................................... */ #include "all_system.h" @@ -112,18 +113,21 @@ void xpn_server_dispatcher ( struct st_th th ) debug_info("[TH_ID=%d] [XPN_SERVER] [xpn_server_dispatcher] End\n", th.id); } -void xpn_server_accept ( void ) +void xpn_server_accept ( char *addr, char *port, void **comm ) { debug_info("[TH_ID=%d] [XPN_SERVER] [xpn_server_up] Start accepting\n", 0); int ret; - void *comm = NULL; - struct st_th th_arg; - ret = xpn_server_comm_accept(¶ms, &comm); + ret = xpn_server_comm_accept(¶ms, addr, port, comm); if (ret < 0) { return; } - debug_info("[TH_ID=%d] [XPN_SERVER] [xpn_server_up] Accept received\n", 0); +} + +void xpn_server_dispatch(void *comm) +{ + struct st_th th_arg; + debug_info("[TH_ID=%d] [XPN_SERVER] [xpn_server_dispatch] Accept received\n", 0); // Launch dispatcher per aplication th_arg.params = ¶ms; @@ -160,6 +164,8 @@ int xpn_server_up ( void ) int connection_socket; int recv_code = 0; int await_stop = 0; + char str_addr[INET_ADDRSTRLEN]; + void *comm = NULL; debug_info("[TH_ID=%d] [XPN_SERVER] [xpn_server_up] >> Begin\n", 0); @@ -207,7 +213,7 @@ int xpn_server_up ( void ) while (!the_end) { debug_info("[TH_ID=%d] [XPN_SERVER] [xpn_server_up] Listening to conections\n", 0); - ret = socket_server_accept(server_socket, &connection_socket); + ret = socket_server_accept(server_socket, &connection_socket, str_addr); if (ret < 0) continue; ret = socket_recv(connection_socket, &recv_code, sizeof(recv_code)); @@ -217,8 +223,47 @@ int xpn_server_up ( void ) switch (recv_code) { case SOCKET_ACCEPT_CODE: - socket_send(connection_socket, params.port_name, MPI_MAX_PORT_NAME); - xpn_server_accept(); + if (params.server_type == XPN_SERVER_TYPE_FABRIC){ + char aux_port[256] = {0}; + char addr_buf[64]; + size_t addr_buf_size = 64, addr_buf_size_aux = 64; + + xpn_server_accept(str_addr, aux_port, &comm); + socket_send(connection_socket, aux_port, MPI_MAX_PORT_NAME); + + struct fabric_comm *fabric_comm = (struct fabric_comm*)comm; + debug_info("fabric_get_addr\n"); + ret = fabric_get_addr(fabric_comm, addr_buf, addr_buf_size_aux); + if (ret < 0){ + printf("Error: fabric_get_addr\n"); + return ret; + } + + debug_info("socket_send addr_buf\n"); + ret = socket_send(connection_socket, addr_buf, addr_buf_size); + if (ret < 0) + { + debug_error("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_connect] ERROR: socket send\n"); + socket_close(connection_socket); + return -1; + } + + debug_info("socket_recv addr_buf\n"); + ret = socket_recv(connection_socket, addr_buf, addr_buf_size); + if (ret < 0) + { + debug_error("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_connect] ERROR: socket read\n"); + socket_close(connection_socket); + return -1; + } + debug_info("fabric_register_addr addr_buf\n"); + fabric_register_addr(fabric_comm, addr_buf); + xpn_server_dispatch(comm); + }else{ + socket_send(connection_socket, params.port_name, MPI_MAX_PORT_NAME); + xpn_server_accept(str_addr, params.port_name, &comm); + xpn_server_dispatch(comm); + } break; case SOCKET_FINISH_CODE: diff --git a/src/xpn_server/xpn_server_comm.c b/src/xpn_server/xpn_server_comm.c index 03918e618..b6a5e61b0 100644 --- a/src/xpn_server/xpn_server_comm.c +++ b/src/xpn_server/xpn_server_comm.c @@ -28,9 +28,13 @@ #ifdef ENABLE_MPI_SERVER #include "mpi_server_comm.h" #endif -#ifdef ENABLE_MPI_SERVER +#ifdef ENABLE_SCK_SERVER #include "sck_server_comm.h" #endif +#ifdef ENABLE_FABRIC_SERVER +#include "fabric_server_comm.h" +#include "fabric.h" +#endif /* ... Functions / Funciones ......................................... */ @@ -53,6 +57,12 @@ int xpn_server_comm_init ( xpn_server_param_st *params ) ret = sck_server_comm_init( ¶ms->server_socket, params->port_name ); break; #endif + + #ifdef ENABLE_FABRIC_SERVER + case XPN_SERVER_TYPE_FABRIC: + ret = fabric_init( ¶ms->fabric_domain ); + break; + #endif default: printf("[XPN_SERVER] [xpn_server_comm_init] server_type '%d' not recognized\n", params->server_type); @@ -79,6 +89,12 @@ int xpn_server_comm_destroy ( xpn_server_param_st *params ) ret = socket_close( params->server_socket ); break; #endif + + #ifdef ENABLE_FABRIC_SERVER + case XPN_SERVER_TYPE_FABRIC: + ret = fabric_destroy( ¶ms->fabric_domain ); + break; + #endif default: printf("[XPN_SERVER] [xpn_server_comm_destroy] server_type '%d' not recognized\n", params->server_type); @@ -88,7 +104,7 @@ int xpn_server_comm_destroy ( xpn_server_param_st *params ) return ret; } -int xpn_server_comm_accept ( xpn_server_param_st *params, void **new_sd ) +int xpn_server_comm_accept ( xpn_server_param_st *params, char *addr, char *port, void **new_sd ) { int ret = -1; @@ -105,6 +121,12 @@ int xpn_server_comm_accept ( xpn_server_param_st *params, void **new_sd ) ret = sck_server_comm_accept( params->server_socket, (int **)new_sd ); break; #endif + + #ifdef ENABLE_FABRIC_SERVER + case XPN_SERVER_TYPE_FABRIC: + ret = fabric_server_comm_accept( ¶ms->fabric_domain, addr, port, (struct fabric_comm **)new_sd); + break; + #endif default: printf("[XPN_SERVER] [xpn_server_comm_accept] server_type '%d' not recognized\n", params->server_type); @@ -132,6 +154,12 @@ int xpn_server_comm_disconnect ( xpn_server_param_st *params, void *sd ) break; #endif + #ifdef ENABLE_FABRIC_SERVER + case XPN_SERVER_TYPE_FABRIC: + ret = fabric_server_comm_disconnect((struct fabric_comm *)sd); + break; + #endif + default: printf("[XPN_SERVER] [xpn_server_comm_disconnect] server_type '%d' not recognized\n", params->server_type); break; @@ -157,7 +185,13 @@ ssize_t xpn_server_comm_read_operation ( xpn_server_param_st *params, void *sd, ret = socket_recv(*(int*)sd, op, sizeof(*op)); break; #endif - + + #ifdef ENABLE_FABRIC_SERVER + case XPN_SERVER_TYPE_FABRIC: + ret = fabric_recv((struct fabric_comm *)sd, op, sizeof(*op)); + break; + #endif + default: printf("[XPN_SERVER] [xpn_server_comm_read_operation] server_type '%d' not recognized\n", params->server_type); break; @@ -184,6 +218,12 @@ ssize_t xpn_server_comm_write_data ( xpn_server_param_st *params, void *sd, char break; #endif + #ifdef ENABLE_FABRIC_SERVER + case XPN_SERVER_TYPE_FABRIC: + ret = fabric_send((struct fabric_comm *)sd, data, size); + break; + #endif + default: printf("[XPN_SERVER] [xpn_server_comm_write_data] server_type '%d' not recognized\n", params->server_type); break; @@ -209,7 +249,13 @@ ssize_t xpn_server_comm_read_data ( xpn_server_param_st *params, void *sd, char ret = socket_recv(*(int*)sd, data, size); break; #endif - + + #ifdef ENABLE_FABRIC_SERVER + case XPN_SERVER_TYPE_FABRIC: + ret = fabric_recv((struct fabric_comm *)sd, data, size); + break; + #endif + default: printf("[XPN_SERVER] [xpn_server_comm_read_data] server_type '%d' not recognized\n", params->server_type); break; diff --git a/src/xpn_server/xpn_server_params.c b/src/xpn_server/xpn_server_params.c index 0d3734ee1..35bd6d50c 100644 --- a/src/xpn_server/xpn_server_params.c +++ b/src/xpn_server/xpn_server_params.c @@ -42,6 +42,10 @@ void xpn_server_params_show ( xpn_server_param_st *params ) if (params->server_type == XPN_SERVER_TYPE_SCK) { printf(" |\t-s :\tsck_server\n"); } + else + if (params->server_type == XPN_SERVER_TYPE_FABRIC) { + printf(" |\t-s :\tfabric_server\n"); + } else { printf(" |\t-s :\tError: unknown\n"); } @@ -79,7 +83,7 @@ void xpn_server_params_show_usage ( void ) debug_info("[Server=%d] [XPN_SERVER_PARAMS] [xpn_server_params_show_usage] >> Begin\n", -1) ; printf("Usage:\n") ; - printf("\t-s : mpi (for mpi server); sck (for sck server)\n") ; + printf("\t-s : mpi (for mpi server); sck (for sck server); fabric (for fabric server)\n") ; printf("\t-t : 0 (without thread); 1 (thread pool); 2 (on demand)\n") ; printf("\t-f : file of servers to be shutdown\n") ; printf("\t-h : host server to be shutdown\n") ; @@ -103,6 +107,9 @@ int xpn_server_params_get ( xpn_server_param_st *params, int argc, char *argv[] #ifdef ENABLE_MPI_SERVER params->server_type = XPN_SERVER_TYPE_MPI; #endif + #ifdef ENABLE_FABRIC_SERVER + params->server_type = XPN_SERVER_TYPE_FABRIC; + #endif params->await_stop = 0; strcpy(params->port_name, ""); strcpy(params->srv_name, ""); @@ -164,12 +171,19 @@ int xpn_server_params_get ( xpn_server_param_st *params, int argc, char *argv[] case 's': if ((i+1) < argc) { - if (strcmp("mpi", argv[i+1]) == 0) { + if (strcmp("sck", argv[i+1]) == 0) { + params->server_type = XPN_SERVER_TYPE_SCK; + } + #ifdef ENABLE_MPI_SERVER + else if (strcmp("mpi", argv[i+1]) == 0) { params->server_type = XPN_SERVER_TYPE_MPI; } - else if (strcmp("sck", argv[i+1]) == 0) { - params->server_type = XPN_SERVER_TYPE_SCK; + #endif + #ifdef ENABLE_FABRIC_SERVER + else if (strcmp("fabric", argv[i+1]) == 0) { + params->server_type = XPN_SERVER_TYPE_FABRIC; } + #endif else { printf("ERROR: unknown option %s\n", argv[i+1]); } @@ -196,7 +210,7 @@ int xpn_server_params_get ( xpn_server_param_st *params, int argc, char *argv[] // In sck_server worker for operations has to be sequential because you don't want to have to make a socket per operation. // It can be done because it is not reentrant - if (params->server_type == XPN_SERVER_TYPE_SCK) { + if (params->server_type == XPN_SERVER_TYPE_SCK || params->server_type == XPN_SERVER_TYPE_FABRIC) { params->thread_mode_operations = TH_NOT; } diff --git a/test/integrity/xpn_metadata/Makefile.in b/test/integrity/xpn_metadata/Makefile.in index 0ada0af3a..fdd0e6b52 100644 --- a/test/integrity/xpn_metadata/Makefile.in +++ b/test/integrity/xpn_metadata/Makefile.in @@ -8,7 +8,7 @@ CC = @CC@ MYHEADER = -I../../../include/ -I../../../include/base -I../../../include/xpn_client/ -I../../../include/xpn_client/nfi -I../../../include/xpn_client/nfi/nfi_local -I../../../include/xpn_client/nfi/nfi_xpn_server -I../../../include/xpn_client/xpn/xpn_simple MYLIBPATH = -L../../../src/base -L../../../src/xpn_client - LIBRARIES = -lxpn -lpthread -ldl -lmosquitto + LIBRARIES = -lxpn -lpthread -ldl -lmosquitto @LDFLAGS@ MYFLAGS = -O2 -Wall -D_REENTRANT -DPOSIX_THREADS -DHAVE_CONFIG_H diff --git a/test/performance/xpn-fault-tolerant/Makefile.in b/test/performance/xpn-fault-tolerant/Makefile.in index f7fabedc0..dc1a722aa 100644 --- a/test/performance/xpn-fault-tolerant/Makefile.in +++ b/test/performance/xpn-fault-tolerant/Makefile.in @@ -8,7 +8,7 @@ CC = @CC@ MYHEADER = -I../../../include/ -I../../../include/base -I../../../include/xpn_client/ MYLIBPATH = -L../../../src/base -L../../../src/xpn_client - LIBRARIES = -lxpn -lpthread -ldl -lmosquitto + LIBRARIES = -lxpn -lpthread -ldl -lmosquitto @LDFLAGS@ MYFLAGS = -O2 -Wall -D_REENTRANT -DPOSIX_THREADS -DHAVE_CONFIG_H -D_GNU_SOURCE diff --git a/test/performance/xpn/Makefile.in b/test/performance/xpn/Makefile.in index 009e9327f..091002c4a 100644 --- a/test/performance/xpn/Makefile.in +++ b/test/performance/xpn/Makefile.in @@ -8,7 +8,7 @@ CC = @CC@ MYHEADER = -I../../../include/ -I../../../include/base -I../../../include/xpn_client/ MYLIBPATH = -L../../../src/base -L../../../src/xpn_client - LIBRARIES = -lxpn -lpthread -ldl -lmosquitto + LIBRARIES = -lxpn -lpthread -ldl -lmosquitto @LDFLAGS@ MYFLAGS = -O2 -Wall -D_REENTRANT -DPOSIX_THREADS -DHAVE_CONFIG_H -D_GNU_SOURCE From 40a7b09a2072e36cd9f9e18dbb496fa1ee65d38d Mon Sep 17 00:00:00 2001 From: Dariomnz Date: Mon, 28 Oct 2024 11:09:05 +0100 Subject: [PATCH 02/60] Implement fabric_server in cpp --- CMakeLists.txt | 13 + build.sh | 2 +- .../fabric_server/fabric_server_comm.h | 61 --- src/base_cpp/CMakeLists.txt | 19 +- src/base_cpp/fabric.cpp | 401 ++++++++++++++++++ src/base_cpp/fabric.hpp | 65 +++ src/xpn_client/nfi/CMakeLists.txt | 1 + .../nfi_fabric_server_comm.c | 143 ------- .../nfi_fabric_server_comm.cpp | 259 +++++++++++ .../nfi_fabric_server_comm.hpp | 58 +++ src/xpn_client/nfi/nfi_server.cpp | 3 +- src/xpn_client/nfi/nfi_xpn_server_comm.cpp | 5 + src/xpn_client/nfi/nfi_xpn_server_comm.hpp | 1 + .../fabric_server/fabric_server_comm.c | 182 -------- .../fabric_server/fabric_server_comm.cpp | 251 +++++++++++ .../fabric_server/fabric_server_comm.hpp | 58 +++ src/xpn_server/mpi_server/mpi_server_comm.cpp | 9 +- src/xpn_server/mpi_server/mpi_server_comm.hpp | 2 +- src/xpn_server/sck_server/sck_server_comm.cpp | 18 +- src/xpn_server/sck_server/sck_server_comm.hpp | 2 +- src/xpn_server/xpn_server.cpp | 7 +- src/xpn_server/xpn_server.hpp | 2 +- src/xpn_server/xpn_server_comm.cpp | 3 + src/xpn_server/xpn_server_comm.hpp | 2 +- src/xpn_server/xpn_server_params.cpp | 6 +- src/xpn_server/xpn_server_params.hpp | 1 + 26 files changed, 1167 insertions(+), 407 deletions(-) delete mode 100644 include/xpn_server/fabric_server/fabric_server_comm.h create mode 100644 src/base_cpp/fabric.cpp create mode 100644 src/base_cpp/fabric.hpp delete mode 100644 src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.c create mode 100644 src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.cpp create mode 100644 src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.hpp delete mode 100644 src/xpn_server/fabric_server/fabric_server_comm.c create mode 100644 src/xpn_server/fabric_server/fabric_server_comm.cpp create mode 100644 src/xpn_server/fabric_server/fabric_server_comm.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 04a4410ff..0f8b3f3b6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,6 +6,9 @@ set(CMAKE_C_STANDARD 11) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED True) +set(CMAKE_SKIP_RPATH TRUE) + + add_compile_options("-D_GNU_SOURCE" "-DMPICH_SKIP_MPICXX" "-D_REENTRANT") add_compile_options("-fPIC" "-O2" "-g" "-g3" "-ggdb" "-Wall" "-Wextra") @@ -27,6 +30,16 @@ else() endif(ENABLE_MPI_SERVER) unset(ENABLE_MPI_SERVER CACHE) +option(ENABLE_FABRIC_SERVER "Enable the fabric_server module" OFF) +if(ENABLE_FABRIC_SERVER) + message(STATUS "ENABLE_FABRIC_SERVER : ${ENABLE_FABRIC_SERVER}") + add_compile_options("-DENABLE_FABRIC_SERVER") + set(FABRIC_PATH ${ENABLE_FABRIC_SERVER}) +else() + message(STATUS "ENABLE_FABRIC_SERVER : false") +endif(ENABLE_FABRIC_SERVER) +unset(ENABLE_FABRIC_SERVER CACHE) + link_libraries("pthread" "dl") add_subdirectory(src/base_c) diff --git a/build.sh b/build.sh index b77b6658c..d802b7655 100755 --- a/build.sh +++ b/build.sh @@ -4,7 +4,7 @@ set -e cd build -cmake -S .. -B . -D BUILD_TESTS=true -D CMAKE_INSTALL_PREFIX=$1 -D CMAKE_C_COMPILER=/home/lab/bin/mpich/bin/mpicc -D CMAKE_CXX_COMPILER=/home/lab/bin/mpich/bin/mpic++ +cmake -S .. -B . -D BUILD_TESTS=true -D CMAKE_INSTALL_PREFIX=$1 -D CMAKE_C_COMPILER=/home/lab/bin/mpich/bin/mpicc -D CMAKE_CXX_COMPILER=/home/lab/bin/mpich/bin/mpic++ -D ENABLE_FABRIC_SERVER=/home/lab/bin/libfabric cmake --build . -j 8 diff --git a/include/xpn_server/fabric_server/fabric_server_comm.h b/include/xpn_server/fabric_server/fabric_server_comm.h deleted file mode 100644 index 554e3ff89..000000000 --- a/include/xpn_server/fabric_server/fabric_server_comm.h +++ /dev/null @@ -1,61 +0,0 @@ - -/* - * Copyright 2020-2024 Felix Garcia Carballeira, Diego Camarmas Alonso, Alejandro Calderon Mateos, Dario Muñoz Muñoz - * - * This file is part of Expand. - * - * Expand is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Expand is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Expand. If not, see . - * - */ - - -#ifndef _FABRIC_SERVER_COMM_H_ -#define _FABRIC_SERVER_COMM_H_ - - #ifdef __cplusplus - extern "C" { - #endif - - /* ... Include / Inclusion ........................................... */ - - #include "all_system.h" - #include "base/utils.h" - #include "base/time_misc.h" - #include "base/fabric.h" - - /* ... Const / Const ................................................. */ - - - /* ... Data structures / Estructuras de datos ........................ */ - - - /* ... Functions / Funciones ......................................... */ - - // int fabric_server_comm_init ( int argc, char *argv[], int thread_mode, char * port_name ); - // int fabric_server_comm_destroy ( char * port_name ); - - int fabric_server_comm_accept ( struct fabric_domain *fabric_domain, char * dest_addr, char * port_name, struct fabric_comm **new_sd ); - int fabric_server_comm_disconnect ( struct fabric_comm *fd ); - - // ssize_t fabric_server_comm_read_operation ( struct fabric_comm *fd, int *op, int *rank_client_id, int *tag_client_id ); - // ssize_t fabric_server_comm_write_data ( struct fabric_comm *fd, char *data, ssize_t size, int rank_client_id, int tag_client_id ); - // ssize_t fabric_server_comm_read_data ( struct fabric_comm *fd, char *data, ssize_t size, int rank_client_id, int tag_client_id ); - - /* ................................................................... */ - - #ifdef __cplusplus - } - #endif - -#endif diff --git a/src/base_cpp/CMakeLists.txt b/src/base_cpp/CMakeLists.txt index 9d612737c..8fd1df559 100644 --- a/src/base_cpp/CMakeLists.txt +++ b/src/base_cpp/CMakeLists.txt @@ -1,13 +1,26 @@ file(GLOB XPN_BASE_CPP_HEADERS - "*.hpp" + "*.hpp" ) file(GLOB XPN_BASE_CPP_SOURCE - "*.cpp" + "*.cpp" ) +if(${FABRIC_PATH} STREQUAL "") + list(FILTER XPN_BASE_CPP_HEADERS EXCLUDE REGEX "fabric") + list(FILTER XPN_BASE_CPP_SOURCE EXCLUDE REGEX "fabric") +endif() + add_library(xpn_base_cpp OBJECT ${XPN_BASE_CPP_SOURCE} ${XPN_BASE_CPP_HEADERS}) target_include_directories(xpn_base_cpp PRIVATE "${PROJECT_SOURCE_DIR}/src" -) \ No newline at end of file +) + +if(NOT ${FABRIC_PATH} STREQUAL "") + target_link_libraries(xpn_base_cpp PUBLIC fabric) + target_link_directories(xpn_base_cpp PUBLIC ${FABRIC_PATH}/lib) + target_include_directories(xpn_base_cpp PUBLIC + ${FABRIC_PATH}/include + ) +endif() \ No newline at end of file diff --git a/src/base_cpp/fabric.cpp b/src/base_cpp/fabric.cpp new file mode 100644 index 000000000..5bb60c4e7 --- /dev/null +++ b/src/base_cpp/fabric.cpp @@ -0,0 +1,401 @@ + +/* + * Copyright 2020-2024 Felix Garcia Carballeira, Diego Camarmas Alonso, Alejandro Calderon Mateos, Dario Muñoz Muñoz + * + * This file is part of Expand. + * + * Expand is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Expand is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Expand. If not, see . + * + */ + +#include "base_cpp/fabric.hpp" +#include "base_cpp/debug.hpp" + +namespace XPN +{ + +std::mutex fabric::s_mutex; + +int set_hints( struct ::fi_info * hints) +{ + hints = fi_allocinfo(); + if (!hints) + return -FI_ENOMEM; + + /* + * Request FI_EP_RDM (reliable datagram) endpoint which will allow us + * to reliably send messages to peers without having to + * listen/connect/accept. + */ + hints->ep_attr->type = FI_EP_RDM; + + /* + * Request basic messaging capabilities from the provider (no tag + * matching, no RMA, no atomic operations) + */ + hints->caps = FI_MSG; + + /* + * Default to FI_DELIVERY_COMPLETE which will make sure completions do + * not get generated until our message arrives at the destination. + * Otherwise, the client might get a completion and exit before the + * server receives the message. This is to make the test simpler. + */ + hints->tx_attr->op_flags = FI_DELIVERY_COMPLETE; + + /* + * Set the mode bit to 0. Mode bits are used to convey requirements + * that an application must adhere to when using the fabric interfaces. + * Modes specify optimal ways of accessing the reported endpoint or + * domain. On input to fi_getinfo, applications set the mode bits that + * they support. + */ + hints->mode = 0; + + /* + * Set mr_mode to 0. mr_mode is used to specify the type of memory + * registration capabilities the application requires. In this example + * we are not using memory registration so this bit will be set to 0. + */ + hints->domain_attr->mr_mode = 0; + + hints->domain_attr->threading = FI_THREAD_SAFE; + + /* Done setting hints */ + + return 0; +} + +int fabric::init ( domain &fabric ) +{ + int ret; + + debug_info("[FABRIC] [fabric_init] Start\n"); + + std::unique_lock lock(s_mutex); + /* + * The first libfabric call to happen for initialization is fi_getinfo + * which queries libfabric and returns any appropriate providers that + * fulfill the hints requirements. Any applicable providers will be + * returned as a list of fi_info structs (&info). Any info can be + * selected. In this test we select the first fi_info struct. Assuming + * all hints were set appropriately, the first fi_info should be most + * appropriate. The flag FI_SOURCE is set for the server to indicate + * that the address/port refer to source information. This is not set + * for the client because the fields refer to the server, not the + * caller (client). + */ + set_hints(fabric.hints); + + ret = fi_getinfo(fi_version(), NULL, NULL, 0, + fabric.hints, &fabric.info); + + debug_info("[FABRIC] [fabric_init] fi_getinfo = "<fabric_attr, &fabric.fabric, NULL); + debug_info("[FABRIC] [fabric_init] fi_fabric = "< lock(s_mutex); + + // First asing the domain to the fabric_comm + out_fabric_comm.fabric_domain = &domain; + + /* + * Initialize our endpoint. Endpoints are transport level communication + * portals which are used to initiate and drive communication. There + * are three main types of endpoints: + * FI_EP_MSG - connected, reliable + * FI_EP_RDM - unconnected, reliable + * FI_EP_DGRAM - unconnected, unreliable + * The type of endpoint will be requested in hints/fi_getinfo. + * Different providers support different types of endpoints. + */ + + ret = fi_endpoint(out_fabric_comm.fabric_domain->domain, out_fabric_comm.fabric_domain->info, &out_fabric_comm.ep, NULL); + debug_info("[FABRIC] [fabric_new_comm] fi_endpoint = "<domain, &cq_attr, &out_fabric_comm.cq, NULL); + debug_info("[FABRIC] [fabric_new_comm] fi_cq_open = "<fid, FI_SEND | FI_RECV); + debug_info("[FABRIC] [fabric_new_comm] fi_ep_bind = "<domain, &av_attr, &out_fabric_comm.av, NULL); + debug_info("[FABRIC] [fabric_new_comm] fi_av_open = "<fid, 0); + debug_info("[FABRIC] [fabric_new_comm] fi_ep_bind = "<fid, out_addr, &size_addr); + if (ret) { + printf("fi_getname error %d\n", ret); + return ret; + } + debug_info("[FABRIC] [fabric_end_addr] Start\n"); + return ret; +} + +int fabric::register_addr( comm &fabric_comm, char * addr_buf ) +{ + int ret = -1; + debug_info("[FABRIC] [fabric_register_addr] Start\n"); + ret = fi_av_insert(fabric_comm.av, addr_buf, 1, &fabric_comm.fi_addr, 0, NULL); + if (ret != 1) { + printf("av insert error\n"); + return -FI_ENOSYS; + } + debug_info("[FABRIC] [fabric_register_addr] End\n"); + return ret; +} + +int fabric::wait ( comm &fabric_comm ) +{ + struct fi_cq_err_entry comp; + int ret; + + debug_info("[FABRIC] [fabric_wait] Start\n"); + + ret = fi_cq_sreadfrom(fabric_comm.cq, &comp, 1, &fabric_comm.fi_addr, NULL, -1); + debug_info("[FABRIC] [fabric_wait] fi_cq_sread = "< lock(s_mutex); + + debug_info("[FABRIC] [fabric_close_comm] Close endpoint\n"); + ret = fi_close(&fabric_comm.ep->fid); + if (ret) + printf("warning: error closing EP (%d)\n", ret); + + debug_info("[FABRIC] [fabric_close_comm] Close address vector\n"); + ret = fi_close(&fabric_comm.av->fid); + if (ret) + printf("warning: error closing AV (%d)\n", ret); + + debug_info("[FABRIC] [fabric_close_comm] Close completion queue\n"); + ret = fi_close(&fabric_comm.cq->fid); + if (ret) + printf("warning: error closing CQ (%d)\n", ret); + + return ret; +} + +int fabric::destroy ( domain &domain ) +{ + int ret; + + debug_info("[FABRIC] [fabric_destroy] Start\n"); + + std::unique_lock lock(s_mutex); + + debug_info("[FABRIC] [fabric_close_comm] Close domain\n"); + ret = fi_close(&domain.domain->fid); + if (ret) + printf("warning: error closing domain (%d)\n", ret); + + debug_info("[FABRIC] [fabric_close_comm] Close fabric\n"); + ret = fi_close(&domain.fabric->fid); + if (ret) + printf("warning: error closing fabric (%d)\n", ret); + + debug_info("[FABRIC] [fabric_close_comm] Free hints \n"); + if (domain.hints) + fi_freeinfo(domain.info); + + debug_info("[FABRIC] [fabric_close_comm] Free info \n"); + if (domain.info) + fi_freeinfo(domain.info); + + return ret; +} + +} // namespace XPN \ No newline at end of file diff --git a/src/base_cpp/fabric.hpp b/src/base_cpp/fabric.hpp new file mode 100644 index 000000000..2a116f402 --- /dev/null +++ b/src/base_cpp/fabric.hpp @@ -0,0 +1,65 @@ + +/* + * Copyright 2020-2024 Felix Garcia Carballeira, Diego Camarmas Alonso, Alejandro Calderon Mateos, Dario Muñoz Muñoz + * + * This file is part of Expand. + * + * Expand is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Expand is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Expand. If not, see . + * + */ + +#pragma once + +#include +#include +#include +#include + +#include + +namespace XPN { + +class fabric { +public: + struct domain { + struct fi_info *hints, *info; + struct fid_fabric *fabric; + struct fid_domain *domain; + }; + + struct comm { + struct domain *fabric_domain; + struct fid_ep *ep; + struct fid_av *av; + struct fid_cq *cq; + fi_addr_t fi_addr; + }; + +public: + static int init(domain &fabric); + + static int new_comm(domain &domain, comm &out_fabric_comm); + + static int get_addr(comm &fabric_comm, char *out_addr, size_t &size_addr); + static int register_addr(comm &fabric_comm, char *addr_buf); + static int wait(comm &fabric_comm); + static int send(comm &fabric, const void *buffer, size_t size); + static int recv(comm &fabric, void *buffer, size_t size); + static int close(comm &fabric); + static int destroy(domain &domain); + + static std::mutex s_mutex; +}; + +} // namespace XPN \ No newline at end of file diff --git a/src/xpn_client/nfi/CMakeLists.txt b/src/xpn_client/nfi/CMakeLists.txt index e38a49836..6b0ecafb2 100644 --- a/src/xpn_client/nfi/CMakeLists.txt +++ b/src/xpn_client/nfi/CMakeLists.txt @@ -8,6 +8,7 @@ file(GLOB_RECURSE XPN_NFI_SOURCE ) add_library(xpn_nfi OBJECT ${XPN_NFI_HEADERS} ${XPN_NFI_SOURCE}) +target_link_libraries(xpn_nfi PRIVATE xpn_base_cpp) target_include_directories(xpn_nfi PRIVATE "${PROJECT_SOURCE_DIR}/src" diff --git a/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.c b/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.c deleted file mode 100644 index 2361e22a0..000000000 --- a/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.c +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright 2020-2024 Felix Garcia Carballeira, Diego Camarmas Alonso, Alejandro Calderon Mateos, Dario Muñoz Muñoz - * - * This file is part of Expand. - * - * Expand is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Expand is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Expand. If not, see . - * - */ - - -/* ... Include / Inclusion ........................................... */ - -#include "nfi_fabric_server_comm.h" -#include "socket.h" -#include "fabric.h" -#include "xpn_server/xpn_server_conf.h" - - -/* ... Const / Const ................................................. */ - - -/* ... Global variables / Variables globales ........................ */ - - -/* ... Functions / Funciones ......................................... */ - -int nfi_fabric_server_comm_connect ( struct fabric_domain *fabric_domain, char * srv_name, char * port_name, struct fabric_comm *out_fabric_comm ) -{ - int ret; - int connection_socket; - char addr_buf[64]; - size_t addr_buf_size = 64, addr_buf_size_aux = 64; - - debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_connect] >> Begin\n"); - - ret = fabric_new_comm(fabric_domain, out_fabric_comm); - if (ret < 0){ - printf("Error: fabric_new_comm %d\n", ret); - return ret; - } - - // Lookup port name - ret = socket_client_connect(srv_name, &connection_socket); - if (ret < 0) - { - debug_error("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_connect] ERROR: socket connect\n"); - return -1; - } - int buffer = SOCKET_ACCEPT_CODE; - ret = socket_send(connection_socket, &buffer, sizeof(buffer)); - if (ret < 0) - { - debug_error("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_connect] ERROR: socket send\n"); - socket_close(connection_socket); - return -1; - } - ret = socket_recv(connection_socket, port_name, XPN_SERVER_MAX_PORT_NAME); - if (ret < 0) - { - debug_error("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_connect] ERROR: socket read\n"); - socket_close(connection_socket); - return -1; - } - - ret = socket_recv(connection_socket, addr_buf, addr_buf_size); - if (ret < 0) - { - debug_error("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_connect] ERROR: socket read\n"); - socket_close(connection_socket); - return -1; - } - - ret = fabric_register_addr(out_fabric_comm, addr_buf); - - ret = fabric_get_addr(out_fabric_comm, addr_buf, addr_buf_size_aux); - if (ret < 0){ - printf("Error: fabric_get_addr\n"); - return ret; - } - - ret = socket_send(connection_socket, addr_buf, addr_buf_size); - if (ret < 0) - { - debug_error("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_connect] ERROR: socket send\n"); - socket_close(connection_socket); - return -1; - } - - socket_close(connection_socket); - - if (ret < 0) { - printf("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_connect] ERROR: Lookup %s Port %s\n", srv_name, port_name); - return -1; - } - - debug_info("[NFI_FABRIC_SERVER_COMM] ----SERVER = %s PORT = %s\n", srv_name, port_name); - - - debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_connect] << End\n"); - - return ret; -} - -int nfi_fabric_server_comm_disconnect(struct fabric_comm *fabric_comm) -{ - int ret; - int code = XPN_SERVER_DISCONNECT; - - debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_disconnect] >> Begin\n"); - - debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_disconnect] Send disconnect message\n"); - ret = fabric_send(fabric_comm, &code, sizeof(code)); - if (ret < 0) { - printf("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_disconnect] ERROR: nfi_fabric_server_comm_write_operation fails\n"); - return ret; - } - - // Disconnect - debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_disconnect] Disconnect\n"); - - ret = fabric_close_comm(fabric_comm); - if (ret < 0) { - printf("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_disconnect] ERROR: MPI_Comm_disconnect fails\n"); - return ret; - } - - debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_disconnect] << End\n"); - - // Return OK - return ret; -} -/* ................................................................... */ diff --git a/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.cpp b/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.cpp new file mode 100644 index 000000000..afedaa112 --- /dev/null +++ b/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.cpp @@ -0,0 +1,259 @@ + +/* + * Copyright 2020-2024 Felix Garcia Carballeira, Diego Camarmas Alonso, Alejandro Calderon Mateos, Dario Muñoz Muñoz + * + * This file is part of Expand. + * + * Expand is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Expand is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Expand. If not, see . + * + */ + +#include "nfi_fabric_server_comm.hpp" +#include "xpn_server/xpn_server_params.hpp" +#include "base_cpp/debug.hpp" +#include "base_cpp/socket.hpp" +#include "base_cpp/ns.hpp" +#include +#include + +namespace XPN { + +nfi_fabric_server_control_comm::nfi_fabric_server_control_comm () +{ + debug_info("[NFI_FABRIC_SERVER_CONTROL_COMM] [nfi_fabric_server_control_comm] >> Begin"); + + fabric::init(m_domain); + + debug_info("[NFI_FABRIC_SERVER_CONTROL_COMM] [nfi_fabric_server_control_comm] >> End"); +} + +nfi_fabric_server_control_comm::~nfi_fabric_server_control_comm() +{ + debug_info("[NFI_FABRIC_SERVER_CONTROL_COMM] [~nfi_fabric_server_control_comm] >> Begin"); + + fabric::destroy(m_domain); + + debug_info("[NFI_FABRIC_SERVER_CONTROL_COMM] [~nfi_fabric_server_control_comm] >> End"); +} + +nfi_xpn_server_comm* nfi_fabric_server_control_comm::connect ( const std::string &srv_name ) +{ + int ret; + int connection_socket; + fabric::comm new_fabric_comm; + + char port_name[MAX_PORT_NAME]; + + debug_info("[NFI_FABRIC_SERVER_CONTROL_COMM] [nfi_fabric_server_control_comm_connect] >> Begin\n"); + + ret = fabric::new_comm(m_domain, new_fabric_comm); + if (ret < 0){ + printf("Error: fabric_new_comm %d\n", ret); + return nullptr; + } + + // Lookup port name + ret = socket::client_connect(srv_name, socket::get_xpn_port() ,connection_socket); + if (ret < 0) + { + debug_error("[NFI_FABRIC_SERVER_CONTROL_COMM] [nfi_fabric_server_control_comm_connect] ERROR: socket connect\n"); + return nullptr; + } + int buffer = socket::ACCEPT_CODE; + ret = socket::send(connection_socket, &buffer, sizeof(buffer)); + if (ret < 0) + { + debug_error("[NFI_FABRIC_SERVER_CONTROL_COMM] [nfi_fabric_server_control_comm_connect] ERROR: socket send\n"); + socket::close(connection_socket); + return nullptr; + } + ret = socket::recv(connection_socket, port_name, MAX_PORT_NAME); + if (ret < 0) + { + debug_error("[NFI_FABRIC_SERVER_CONTROL_COMM] [nfi_fabric_server_control_comm_connect] ERROR: socket read\n"); + socket::close(connection_socket); + return nullptr; + } + + // First recv the server address + size_t ad_len = MAX_PORT_NAME; + char ad_buff[MAX_PORT_NAME]; + ret = socket::recv(connection_socket, &ad_len, sizeof(ad_len)); + if (ret < 0){ + print("[Server="<(comm); + + debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_disconnect] >> Begin"); + + debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_disconnect] Send disconnect message"); + ret = in_comm->write_operation(xpn_server_ops::DISCONNECT); + if (ret < 0) { + printf("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_disconnect] ERROR: nfi_fabric_server_comm_write_operation fails"); + } + + // Disconnect + debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_disconnect] Disconnect"); + + ret = fabric::close(in_comm->m_comm); + if (ret < 0) { + printf("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_disconnect] ERROR: MPI_Comm_disconnect fails"); + } + + delete comm; + + debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_disconnect] << End"); +} + +int64_t nfi_fabric_server_comm::write_operation(xpn_server_ops op) { + int ret; + int msg[2]; + + debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_write_operation] >> Begin"); + + // Message generation + msg[0] = (int)(pthread_self() % 32450) + 1; + msg[1] = (int)op; + + // Send message + debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_write_operation] Write operation send tag "<< msg[0]); + + ret = fabric::send(m_comm, msg, sizeof(msg)); + if (ret < 0) { + debug_error("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_write_operation] ERROR: socket::send < 0 : "<< ret); + return -1; + } + + debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_write_operation] << End"); + + // Return OK + return 0; +} + +int64_t nfi_fabric_server_comm::write_data(const void *data, int64_t size) { + int ret; + + debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_write_data] >> Begin"); + + // Check params + if (size == 0) { + return 0; + } + if (size < 0) { + printf("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_write_data] ERROR: size < 0"); + return -1; + } + + // Send message + debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_write_data] Write data"); + + ret = fabric::send(m_comm, data, size); + if (ret < 0) { + printf("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_write_data] ERROR: MPI_Send fails"); + size = 0; + } + + debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_write_data] << End"); + + // Return bytes written + return size; +} + +int64_t nfi_fabric_server_comm::read_data(void *data, ssize_t size) { + int ret; + + debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_read_data] >> Begin"); + + // Check params + if (size == 0) { + return 0; + } + if (size < 0) { + printf("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_read_data] ERROR: size < 0"); + return -1; + } + + // Get message + debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_read_data] Read data"); + + ret = fabric::recv(m_comm, data, size); + if (ret < 0) { + printf("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_read_data] ERROR: MPI_Recv fails"); + size = 0; + } + + debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_read_data] << End"); + + // Return bytes read + return size; +} + +} //namespace XPN diff --git a/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.hpp b/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.hpp new file mode 100644 index 000000000..fe95e4a91 --- /dev/null +++ b/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.hpp @@ -0,0 +1,58 @@ + +/* + * Copyright 2020-2024 Felix Garcia Carballeira, Diego Camarmas Alonso, Alejandro Calderon Mateos, Dario Muñoz Muñoz + * + * This file is part of Expand. + * + * Expand is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Expand is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Expand. If not, see . + * + */ + +#pragma once + +#include +#include + +#include "nfi/nfi_xpn_server_comm.hpp" +#include "base_cpp/fabric.hpp" + +namespace XPN +{ + + class nfi_fabric_server_comm : public nfi_xpn_server_comm + { + public: + nfi_fabric_server_comm(fabric::comm comm) : m_comm(comm) {} + + int64_t write_operation(xpn_server_ops op) override; + int64_t read_data(void *data, int64_t size) override; + int64_t write_data(const void *data, int64_t size) override; + public: + fabric::comm m_comm; + }; + + class nfi_fabric_server_control_comm : public nfi_xpn_server_control_comm + { + public: + nfi_fabric_server_control_comm(); + ~nfi_fabric_server_control_comm(); + + nfi_xpn_server_comm* connect(const std::string &srv_name) override; + void disconnect(nfi_xpn_server_comm* comm) override; + + private: + fabric::domain m_domain; + }; + +} // namespace XPN diff --git a/src/xpn_client/nfi/nfi_server.cpp b/src/xpn_client/nfi/nfi_server.cpp index f1c75195b..d7df2d060 100644 --- a/src/xpn_client/nfi/nfi_server.cpp +++ b/src/xpn_client/nfi/nfi_server.cpp @@ -78,7 +78,8 @@ namespace XPN return std::make_unique(parser); } if (url.find(server_protocols::mpi_server) == 0 || - url.find(server_protocols::sck_server) == 0){ + url.find(server_protocols::sck_server) == 0 || + url.find(server_protocols::fabric_server) == 0 ){ return std::make_unique(parser); } diff --git a/src/xpn_client/nfi/nfi_xpn_server_comm.cpp b/src/xpn_client/nfi/nfi_xpn_server_comm.cpp index 1d7876ad4..58f714d06 100644 --- a/src/xpn_client/nfi/nfi_xpn_server_comm.cpp +++ b/src/xpn_client/nfi/nfi_xpn_server_comm.cpp @@ -26,6 +26,7 @@ #include "nfi_xpn_server_comm.hpp" #include "nfi_mpi_server/nfi_mpi_server_comm.hpp" #include "nfi_sck_server/nfi_sck_server_comm.hpp" +#include "nfi_fabric_server/nfi_fabric_server_comm.hpp" namespace XPN { @@ -39,6 +40,10 @@ namespace XPN { return std::make_unique(); }else + if (server_protocol == server_protocols::fabric_server) + { + return std::make_unique(); + }else if (server_protocol == server_protocols::file) { // return std::make_unique(params.get_argc(), params.get_argv(), params.have_threads()); diff --git a/src/xpn_client/nfi/nfi_xpn_server_comm.hpp b/src/xpn_client/nfi/nfi_xpn_server_comm.hpp index ea705dfcd..6d05eb94d 100644 --- a/src/xpn_client/nfi/nfi_xpn_server_comm.hpp +++ b/src/xpn_client/nfi/nfi_xpn_server_comm.hpp @@ -32,6 +32,7 @@ namespace XPN { constexpr const char * mpi_server = "mpi_server"; constexpr const char * sck_server = "sck_server"; + constexpr const char * fabric_server = "fabric_server"; constexpr const char * file = "file"; } class nfi_xpn_server_comm diff --git a/src/xpn_server/fabric_server/fabric_server_comm.c b/src/xpn_server/fabric_server/fabric_server_comm.c deleted file mode 100644 index 2898f057f..000000000 --- a/src/xpn_server/fabric_server/fabric_server_comm.c +++ /dev/null @@ -1,182 +0,0 @@ - -/* - * Copyright 2020-2024 Felix Garcia Carballeira, Diego Camarmas Alonso, Alejandro Calderon Mateos, Dario Muñoz Muñoz - * - * This file is part of Expand. - * - * Expand is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Expand is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Expand. If not, see . - * - */ - - -/* ... Include / Inclusion ........................................... */ - -#include "fabric_server_comm.h" - - -/* ... Const / Const ................................................. */ - - -/* ... Global variables / Variables globales ........................ */ - - -/* ... Functions / Funciones ......................................... */ - -// init, destroy - -// accept, disconnect -int fabric_server_comm_accept ( struct fabric_domain *fabric_domain, char *dest_addr, char *port_name, struct fabric_comm **new_sd ) -{ - int ret; - - *new_sd = malloc(sizeof(struct fabric_comm)); - if (*new_sd == NULL) { - printf("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_accept] ERROR: Memory allocation\n", 0); - return -1; - } - - debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_accept] >> Begin\n", 0); - - // Accept - debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_accept] Accept %s %s\n", 0, dest_addr, port_name); - - ret = fabric_new_comm(fabric_domain, *new_sd); - if (ret < 0) - { - printf("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_accept] ERROR: fabric_init fails\n", 0); - return -1; - } - - debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_accept] << End %ld\n", 0, **new_sd); - - return 0; -} - -int fabric_server_comm_disconnect ( struct fabric_comm *fd ) -{ - int ret; - - debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_disconnect] >> Begin\n", 0); - - if (fd == NULL) - { - printf("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_disconnect] ERROR: The fabric_comm is NULL\n", 0); - return -1; - } - - // Disconnect - debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_disconnect] Disconnect\n", 0); - - ret = fabric_close_comm(fd); - if (ret < 0) - { - printf("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_disconnect] ERROR: MPI_Comm_disconnect fails\n", 0); - return -1; - } - free(fd); - debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_disconnect] << End\n", 0); - - // Return OK - return 0; -} - -// ssize_t fabric_server_comm_read_operation ( struct fabric_comm *fd, int *op, int *rank_client_id, int *tag_client_id ) -// { -// int ret; -// int msg[2]; - -// debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_read_operation] >> Begin\n", 0); - -// // Get message -// debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_read_operation] Read operation %p\n", 0, fd); - -// ret = fabric_recv(fd, msg, sizeof(msg)); -// if (ret < 0) { -// debug_warning("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_read_operation] ERROR: fabric_recv fails\n", 0); -// } - -// *rank_client_id = 0; -// *tag_client_id = msg[0]; -// *op = msg[1]; - -// debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_read_operation] fabric_recv (SOURCE %d, TAG %d, OP %d, ERROR %d)\n", 0, *rank_client_id, *rank_client_id, *op, ret); -// debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_read_operation] << End\n", 0); - -// // Return OK -// return 0; -// } - - -// ssize_t fabric_server_comm_write_data ( struct fabric_comm *fd, char *data, ssize_t size, int rank_client_id, int tag_client_id ) -// { -// int ret; - -// debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_write_data] >> Begin\n", 0); - -// if (size == 0) { -// return 0; -// } -// if (size < 0) -// { -// printf("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_write_data] ERROR: size < 0\n", 0); -// return -1; -// } - -// // Send message -// debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_write_data] Write data tag %d\n", 0, tag_client_id); - -// ret = fabric_send(fd, data, size); -// if (ret < 0) { -// debug_warning("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_write_data] ERROR: fabric_send fails\n", 0); -// } - -// debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_write_data] << End\n", 0); - -// // Return bytes written -// return size; -// } - -// ssize_t fabric_server_comm_read_data ( struct fabric_comm *fd, char *data, ssize_t size, int rank_client_id, int tag_client_id ) -// { -// int ret; - -// debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_read_data] >> Begin\n", 0); - -// if (size == 0) { -// return 0; -// } -// if (size < 0) -// { -// printf("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_read_data] ERROR: size < 0\n", 0); -// return -1; -// } - -// // Get message -// debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_read_data] Read data tag %d\n", 0, tag_client_id); - -// ret = fabric_recv(fd, data, size); -// if (ret < 0) { -// debug_warning("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_read_data] ERROR: fabric_recv fails\n", 0); -// } - -// debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_read_data] fabric_recv (ERROR %d)\n", 0, ret); -// debug_info("[Server=%d] [FABRIC_SERVER_COMM] [fabric_server_comm_read_data] << End\n", 0); - -// // Return bytes read -// return size; -// } - - -/* ................................................................... */ - diff --git a/src/xpn_server/fabric_server/fabric_server_comm.cpp b/src/xpn_server/fabric_server/fabric_server_comm.cpp new file mode 100644 index 000000000..154f10a8e --- /dev/null +++ b/src/xpn_server/fabric_server/fabric_server_comm.cpp @@ -0,0 +1,251 @@ + +/* + * Copyright 2020-2024 Felix Garcia Carballeira, Diego Camarmas Alonso, Alejandro Calderon Mateos, Dario Muñoz Muñoz + * + * This file is part of Expand. + * + * Expand is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Expand is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Expand. If not, see . + * + */ + +#include "fabric_server_comm.hpp" +#include "base_cpp/debug.hpp" +#include "base_cpp/timer.hpp" +#include "base_cpp/ns.hpp" +#include "base_cpp/socket.hpp" +#include "base_c/filesystem.h" +#include + +namespace XPN +{ +fabric_server_control_comm::fabric_server_control_comm () +{ + debug_info("[Server="<> Begin"); + + fabric::init(m_domain); + + debug_info("[Server="<> End"); +} + +fabric_server_control_comm::~fabric_server_control_comm() +{ + debug_info("[Server="<> Begin"); + + fabric::destroy(m_domain); + + debug_info("[Server="<> End"); +} + +xpn_server_comm* fabric_server_control_comm::accept ( int socket ) +{ + debug_info("[Server="<> Begin"); + + // // Accept + // debug_info("[Server="<> Begin"); + + fabric_server_comm *in_comm = static_cast(comm); + + fabric::close(in_comm->m_comm); + + delete comm; + + debug_info("[Server="<> Begin"); + + // Get message + debug_info("[Server="<(msg[1]); + + debug_info("[Server="<> Begin"); + + if (size == 0) { + return 0; + } + if (size < 0) + { + print("[Server="<> Begin"); + + if (size == 0) { + return 0; + } + if (size < 0) + { + print("[Server="<. + * + */ + +#pragma once + +#include +#include + +#include "xpn_server/xpn_server_comm.hpp" +#include "base_cpp/fabric.hpp" + +namespace XPN +{ + + class fabric_server_comm : public xpn_server_comm + { + public: + fabric_server_comm(fabric::comm comm) : m_comm(comm) {} + ~fabric_server_comm() override {} + + int64_t read_operation(xpn_server_ops &op, int &rank_client_id, int &tag_client_id) override; + int64_t read_data(void *data, int64_t size, int rank_client_id, int tag_client_id) override; + int64_t write_data(const void *data, int64_t size, int rank_client_id, int tag_client_id) override; + public: + fabric::comm m_comm; + }; + + class fabric_server_control_comm : public xpn_server_control_comm + { + public: + fabric_server_control_comm(); + ~fabric_server_control_comm() override; + + xpn_server_comm* accept(int socket) override; + void disconnect(xpn_server_comm *comm) override; + private: + fabric::domain m_domain; + }; + +} // namespace XPN diff --git a/src/xpn_server/mpi_server/mpi_server_comm.cpp b/src/xpn_server/mpi_server/mpi_server_comm.cpp index 159f914a0..5eb932bfc 100644 --- a/src/xpn_server/mpi_server/mpi_server_comm.cpp +++ b/src/xpn_server/mpi_server/mpi_server_comm.cpp @@ -20,6 +20,7 @@ */ #include "mpi_server_comm.hpp" #include "base_cpp/debug.hpp" +#include "base_cpp/socket.hpp" #include "base_cpp/ns.hpp" namespace XPN @@ -122,7 +123,7 @@ mpi_server_control_comm::~mpi_server_control_comm() } // accept, disconnect -xpn_server_comm* mpi_server_control_comm::accept ( ) +xpn_server_comm* mpi_server_control_comm::accept ( int socket ) { int ret; @@ -130,6 +131,12 @@ xpn_server_comm* mpi_server_control_comm::accept ( ) debug_info("[Server="<> Begin"); + ret = socket::send(socket, m_port_name.data(), MAX_PORT_NAME); + if (ret < 0){ + print("[Server="<> Begin"); + debug_info("[Server="<> Begin"); + + ret = socket::send(socket, m_port_name.data(), MAX_PORT_NAME); + if (ret < 0){ + print("[Server="<accept(); + xpn_server_comm* comm = m_control_comm->accept(connection_socket); debug_info("[TH_ID="<m_port_name.data(), MAX_PORT_NAME); - accept(); + accept(connection_socket); break; case socket::STATS_wINDOW_CODE: diff --git a/src/xpn_server/xpn_server.hpp b/src/xpn_server/xpn_server.hpp index 7bd442cd0..8a0170005 100644 --- a/src/xpn_server/xpn_server.hpp +++ b/src/xpn_server/xpn_server.hpp @@ -39,7 +39,7 @@ namespace XPN int stop(); int print_stats(); - void accept(); + void accept(int socket); void dispatcher(xpn_server_comm *comm); void do_operation(xpn_server_comm *comm, xpn_server_ops op, int rank_client_id, int tag_client_id, timer timer); void finish(); diff --git a/src/xpn_server/xpn_server_comm.cpp b/src/xpn_server/xpn_server_comm.cpp index 3a1f7e640..002a824f6 100644 --- a/src/xpn_server/xpn_server_comm.cpp +++ b/src/xpn_server/xpn_server_comm.cpp @@ -25,6 +25,7 @@ #include "xpn_server_comm.hpp" #include "mpi_server/mpi_server_comm.hpp" #include "sck_server/sck_server_comm.hpp" +#include "fabric_server/fabric_server_comm.hpp" namespace XPN { @@ -36,6 +37,8 @@ namespace XPN return std::make_unique(params); case XPN_SERVER_TYPE_SCK: return std::make_unique(); + case XPN_SERVER_TYPE_FABRIC: + return std::make_unique(); default: fprintf(stderr, "[XPN_SERVER] [xpn_server_control_comm] server_type '%d' not recognized\n", params.server_type); } return nullptr; diff --git a/src/xpn_server/xpn_server_comm.hpp b/src/xpn_server/xpn_server_comm.hpp index 0bf770a67..f4cb11290 100644 --- a/src/xpn_server/xpn_server_comm.hpp +++ b/src/xpn_server/xpn_server_comm.hpp @@ -44,7 +44,7 @@ namespace XPN xpn_server_control_comm() = default; virtual ~xpn_server_control_comm() = default; - virtual xpn_server_comm* accept() = 0; + virtual xpn_server_comm* accept(int socket) = 0; virtual void disconnect(xpn_server_comm *comm) = 0; static std::unique_ptr Create(xpn_server_params ¶ms); diff --git a/src/xpn_server/xpn_server_params.cpp b/src/xpn_server/xpn_server_params.cpp index 66b2348c1..3bedff5f4 100644 --- a/src/xpn_server/xpn_server_params.cpp +++ b/src/xpn_server/xpn_server_params.cpp @@ -41,6 +41,8 @@ void xpn_server_params::show() { printf(" |\t-s :\tmpi_server\n"); } else if (server_type == XPN_SERVER_TYPE_SCK) { printf(" |\t-s :\tsck_server\n"); + } else if (server_type == XPN_SERVER_TYPE_FABRIC) { + printf(" |\t-s :\tfabric_server\n"); } else { printf(" |\t-s :\tError: unknown\n"); } @@ -148,6 +150,8 @@ xpn_server_params::xpn_server_params(int _argc, char *_argv[]) { server_type = XPN_SERVER_TYPE_MPI; } else if (strcmp("sck", argv[i + 1]) == 0) { server_type = XPN_SERVER_TYPE_SCK; + } else if (strcmp("fabric", argv[i + 1]) == 0) { + server_type = XPN_SERVER_TYPE_FABRIC; } else { printf("ERROR: unknown option %s\n", argv[i + 1]); show_usage(); @@ -177,7 +181,7 @@ xpn_server_params::xpn_server_params(int _argc, char *_argv[]) { // In sck_server worker for operations has to be sequential because you don't want to have to make a socket per // operation. It can be done because it is not reentrant - if (server_type == XPN_SERVER_TYPE_SCK) { + if (server_type == XPN_SERVER_TYPE_SCK || server_type == XPN_SERVER_TYPE_FABRIC) { thread_mode_operations = workers_mode::sequential; } diff --git a/src/xpn_server/xpn_server_params.hpp b/src/xpn_server/xpn_server_params.hpp index 0815dc622..ffc1db3b7 100644 --- a/src/xpn_server/xpn_server_params.hpp +++ b/src/xpn_server/xpn_server_params.hpp @@ -38,6 +38,7 @@ #define XPN_SERVER_TYPE_MPI 0 #define XPN_SERVER_TYPE_SCK 1 + #define XPN_SERVER_TYPE_FABRIC 2 namespace XPN { From 734cd9139f7fd77d6f56b6d5fc22f67e45bbc5e3 Mon Sep 17 00:00:00 2001 From: Dariomnz Date: Wed, 30 Oct 2024 17:09:41 +0100 Subject: [PATCH 03/60] Add test fot libfabric --- src/base_cpp/fabric.cpp | 55 ++-- test/CMakeLists.txt | 1 + test/integrity/libfabric/CMakeLists.txt | 5 + test/integrity/libfabric/fabric_test.cpp | 347 +++++++++++++++++++++++ 4 files changed, 382 insertions(+), 26 deletions(-) create mode 100644 test/integrity/libfabric/CMakeLists.txt create mode 100644 test/integrity/libfabric/fabric_test.cpp diff --git a/src/base_cpp/fabric.cpp b/src/base_cpp/fabric.cpp index 5bb60c4e7..73310c61c 100644 --- a/src/base_cpp/fabric.cpp +++ b/src/base_cpp/fabric.cpp @@ -81,7 +81,7 @@ int fabric::init ( domain &fabric ) { int ret; - debug_info("[FABRIC] [fabric_init] Start\n"); + debug_info("[FABRIC] [fabric_init] Start"); std::unique_lock lock(s_mutex); /* @@ -108,7 +108,7 @@ int fabric::init ( domain &fabric ) } #ifdef DEBUG - debug_info("[FABRIC] [fabric_init] %s", fi_tostr(fabric.info, FI_TYPE_INFO)); + debug_info("[FABRIC] [fabric_init] "< lock(s_mutex); // First asing the domain to the fabric_comm @@ -252,26 +252,26 @@ int fabric::new_comm ( domain &domain, comm &out_fabric_comm ) int fabric::get_addr( comm &fabric_comm, char * out_addr, size_t &size_addr ) { int ret = -1; - debug_info("[FABRIC] [fabric_get_addr] Start\n"); + debug_info("[FABRIC] [fabric_get_addr] Start"); ret = fi_getname(&fabric_comm.ep->fid, out_addr, &size_addr); if (ret) { printf("fi_getname error %d\n", ret); return ret; } - debug_info("[FABRIC] [fabric_end_addr] Start\n"); + debug_info("[FABRIC] [fabric_get_addr] End = "< lock(s_mutex); - debug_info("[FABRIC] [fabric_close_comm] Close endpoint\n"); + debug_info("[FABRIC] [fabric_close_comm] Close endpoint"); ret = fi_close(&fabric_comm.ep->fid); if (ret) printf("warning: error closing EP (%d)\n", ret); - debug_info("[FABRIC] [fabric_close_comm] Close address vector\n"); + debug_info("[FABRIC] [fabric_close_comm] Close address vector"); ret = fi_close(&fabric_comm.av->fid); if (ret) printf("warning: error closing AV (%d)\n", ret); - debug_info("[FABRIC] [fabric_close_comm] Close completion queue\n"); + debug_info("[FABRIC] [fabric_close_comm] Close completion queue"); ret = fi_close(&fabric_comm.cq->fid); if (ret) printf("warning: error closing CQ (%d)\n", ret); + + debug_info("[FABRIC] [fabric_close_comm] End = "< lock(s_mutex); - debug_info("[FABRIC] [fabric_close_comm] Close domain\n"); + debug_info("[FABRIC] [fabric_destroy] Close domain"); ret = fi_close(&domain.domain->fid); if (ret) printf("warning: error closing domain (%d)\n", ret); - debug_info("[FABRIC] [fabric_close_comm] Close fabric\n"); + debug_info("[FABRIC] [fabric_destroy] Close fabric"); ret = fi_close(&domain.fabric->fid); if (ret) printf("warning: error closing fabric (%d)\n", ret); - debug_info("[FABRIC] [fabric_close_comm] Free hints \n"); + debug_info("[FABRIC] [fabric_destroy] Free hints "); if (domain.hints) - fi_freeinfo(domain.info); + fi_freeinfo(domain.hints); - debug_info("[FABRIC] [fabric_close_comm] Free info \n"); + debug_info("[FABRIC] [fabric_destroy] Free info "); if (domain.info) fi_freeinfo(domain.info); + debug_info("[FABRIC] [fabric_destroy] End = "<. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace XPN; + +#define BUF_SIZE 1024 * 1024 + +char *src_addr = NULL, *dst_addr = NULL; +const char *oob_port = "9228"; +int listen_sock, oob_sock; + +std::vector buf(BUF_SIZE); +std::vector msg(BUF_SIZE); + +fabric::comm fabric_comm; +fabric::domain fabric_domain; + +static int sock_listen(char *node, const char *service) +{ + struct addrinfo *ai, hints; + int val, ret; + + memset(&hints, 0, sizeof hints); + hints.ai_flags = AI_PASSIVE; + + ret = getaddrinfo(node, service, &hints, &ai); + if (ret) { + printf("getaddrinfo() %s\n", gai_strerror(ret)); + return ret; + } + + listen_sock = socket(ai->ai_family, SOCK_STREAM, 0); + if (listen_sock < 0) { + printf("socket error"); + ret = listen_sock; + goto out; + } + + val = 1; + ret = setsockopt(listen_sock, SOL_SOCKET, SO_REUSEADDR, + (void *) &val, sizeof val); + if (ret) { + printf("setsockopt SO_REUSEADDR"); + goto out; + } + + ret = bind(listen_sock, ai->ai_addr, ai->ai_addrlen); + if (ret) { + printf("bind"); + goto out; + } + + ret = listen(listen_sock, 0); + if (ret) + printf("listen error"); + +out: + if (ret && listen_sock >= 0) + close(listen_sock); + freeaddrinfo(ai); + return ret; +} + +static int sock_setup(int sock) +{ + int ret, op; + long flags; + + op = 1; + ret = setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, + (void *) &op, sizeof(op)); + if (ret) + return ret; + + flags = fcntl(sock, F_GETFL); + if (flags < 0) + return -errno; + + if (fcntl(sock, F_SETFL, flags)) + return -errno; + + return 0; +} + +static int init_oob(void) +{ + struct addrinfo *ai = NULL; + int ret; + + if (!dst_addr) { + ret = sock_listen(src_addr, oob_port); + if (ret) + return ret; + + oob_sock = accept(listen_sock, NULL, 0); + if (oob_sock < 0) { + printf("accept error"); + ret = oob_sock; + return ret; + } + + close(listen_sock); + } else { + ret = getaddrinfo(dst_addr, oob_port, NULL, &ai); + if (ret) { + printf("getaddrinfo error"); + return ret; + } + + oob_sock = socket(ai->ai_family, SOCK_STREAM, 0); + if (oob_sock < 0) { + printf("socket error"); + ret = oob_sock; + goto free; + } + + ret = connect(oob_sock, ai->ai_addr, ai->ai_addrlen); + if (ret) { + printf("connect error"); + close(oob_sock); + goto free; + } + sleep(1); + } + + ret = sock_setup(oob_sock); + +free: + if (ai) + freeaddrinfo(ai); + return ret; +} + +static int sock_send(int fd, void *msg, size_t len) +{ + size_t sent; + ssize_t ret, err = 0; + + for (sent = 0; sent < len; ) { + ret = send(fd, ((char *) msg) + sent, len - sent, 0); + if (ret > 0) { + sent += ret; + } else { + err = -errno; + break; + } + } + + return err ? err: 0; +} + +static int sock_recv(int fd, void *msg, size_t len) +{ + size_t rcvd; + ssize_t ret, err = 0; + + for (rcvd = 0; rcvd < len; ) { + ret = recv(fd, ((char *) msg) + rcvd, len - rcvd, 0); + if (ret > 0) { + rcvd += ret; + } else if (ret == 0) { + err = -FI_ENOTCONN; + break; + } else { + err = -errno; + break; + } + } + + return err ? err: 0; +} + +static int exchange_addresses(void) +{ + #define BUF_SIZE_AUX 64 + char addr_buf[BUF_SIZE_AUX]; + int ret; + size_t addrlen = BUF_SIZE_AUX; + + ret = fabric::get_addr(fabric_comm, addr_buf, addrlen); + if (ret) { + printf("fi_getname error %d\n", ret); + return ret; + } + + ret = sock_send(oob_sock, addr_buf, BUF_SIZE_AUX); + if (ret) { + printf("sock_send error %d\n", ret); + return ret; + } + + memset(addr_buf, 0, BUF_SIZE_AUX); + ret = sock_recv(oob_sock, addr_buf, BUF_SIZE_AUX); + if (ret) { + printf("sock_recv error %d\n", ret); + return ret; + } + + ret = fabric::register_addr(fabric_comm, addr_buf); + if (ret != 1) { + printf("av insert error\n"); + return -FI_ENOSYS; + } + + return 0; +} + +static int post_recv(void) +{ + int ret; + + ret = fabric::recv(fabric_comm, buf.data(), buf.size()*sizeof(buf[0])); + + return ret; +} + +static int post_send(void) +{ + + static int count = 0; + if (dst_addr){ + sprintf(msg.data(), "Hello, server! I am the client you've been waiting for! %d", count++); + }else{ + sprintf(msg.data(), "Hello, client! I am the server you've been waiting for! %d", count++); + } + int ret; + + ret = fabric::send(fabric_comm, msg.data(), msg.size()*sizeof(msg[0])); + + return ret; +} + +static int run(void) +{ + int ret; + + if (dst_addr) { + printf("Client: send to server %s\n", dst_addr); + + for (int i = 0; i < 10; i++) + { + printf("Client: send buffer and wait for the server to recv\n"); + ret = post_send(); + if (ret<0) + return ret; + + printf("Client: post buffer and wait for message from server\n"); + ret = post_recv(); + if (ret<0) + return ret; + + printf("This is the message I received: %s\n", buf.data()); + } + + + } else { + + printf("Server: send to client data\n"); + for (int i = 0; i < 10; i++) + { + printf("Server: post buffer and wait for message from client\n"); + ret = post_recv(); + if (ret<0) + return ret; + + printf("This is the message I received: %s\n", buf.data()); + + printf("Server: send buffer and wait for the client to recv\n"); + ret = post_send(); + if (ret<0) + return ret; + } + } + + return 0; +} + +int main(int argc, char **argv) +{ + int ret; + + /* + * Server run with no args, client has server's address as an + * argument. + */ + if (argc > 1) + dst_addr = argv[1]; + + /* Init out-of-band addressing */ + ret = init_oob(); + if (ret) + return ret; + + /* + * Hints are used to request support for specific features from a + * provider. + */ + + ret = fabric::init(fabric_domain); + if (ret) + goto out; + ret = fabric::new_comm(fabric_domain, fabric_comm); + if (ret) + goto out; + + ret = exchange_addresses(); + if (ret) + goto out; + + ret = run(); +out: + fabric::close(fabric_comm); + fabric::destroy(fabric_domain); + return ret; +} \ No newline at end of file From 3547c18379c7aedfad794ee10199d5ad43d33bf8 Mon Sep 17 00:00:00 2001 From: Dariomnz Date: Wed, 30 Oct 2024 17:12:21 +0100 Subject: [PATCH 04/60] Fix cmake base_cpp --- scripts/compile/platform/unito-dario.sh | 3 ++- scripts/compile/software/xpn.sh | 8 +++++--- src/base_cpp/CMakeLists.txt | 4 ++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/scripts/compile/platform/unito-dario.sh b/scripts/compile/platform/unito-dario.sh index 743c21c47..ac545916e 100755 --- a/scripts/compile/platform/unito-dario.sh +++ b/scripts/compile/platform/unito-dario.sh @@ -32,11 +32,12 @@ spack load pkg-config #MPICC_PATH=$HOME/opt/spack/linux-ubuntu20.04-zen/gcc-9.4.0/mpich-4.0.2-a76rmlxbneoqdvemzjsyewp2akiiuxlj/bin/mpicc # MPICC_PATH=$HOME/dariomnz/bin/mpich/bin/mpicc MPICC_PATH=$HOME/dariomnz/bin/mpich/bin +FABRIC_PATH=/opt/libfabric INSTALL_PATH=$HOME/dariomnz/bin/ BASE_PATH=$(dirname $0) # 3) preconfigure build-me... -$BASE_PATH/../software/xpn.sh -m $MPICC_PATH -i $INSTALL_PATH -s $BASE_PATH/../../../../xpn +$BASE_PATH/../software/xpn.sh -m $MPICC_PATH -f $FABRIC_PATH -i $INSTALL_PATH -s $BASE_PATH/../../../../xpn $BASE_PATH/../software/ior.sh -m $MPICC_PATH/mpicc -i $INSTALL_PATH -s $BASE_PATH/../../../../ior # $BASE_PATH/../software/lz4.sh -m $MPICC_PATH -i $INSTALL_PATH -s $BASE_PATH/../../../../io500/build/pfind/lz4/ # $BASE_PATH/../software/io500.sh -m $MPICC_PATH -i $INSTALL_PATH -s $BASE_PATH/../../../../io500 diff --git a/scripts/compile/software/xpn.sh b/scripts/compile/software/xpn.sh index df422e5f5..951241663 100755 --- a/scripts/compile/software/xpn.sh +++ b/scripts/compile/software/xpn.sh @@ -35,11 +35,11 @@ function usage { LIBFABRIC_PATH="" ## get arguments -while getopts "m:l:i:s:" opt; do +while getopts "m:f:i:s:" opt; do case "${opt}" in m) MPICC_PATH=${OPTARG} ;; - l) LIBFABRIC_PATH=${OPTARG} + f) LIBFABRIC_PATH=${OPTARG} ;; i) INSTALL_PATH=${OPTARG} ;; @@ -85,13 +85,15 @@ echo " * XPN: preparing directories..." rm -fr "${INSTALL_PATH}/xpn" echo " * XPN: compiling and installing..." +echo " * XPN mpi: $MPICC_PATH" +echo " * XPN libfabric: $LIBFABRIC_PATH" pushd . cd "$SRC_PATH" rm -r build mkdir -p build cd build -cmake -S .. -B . -D BUILD_TESTS=ON -D CMAKE_INSTALL_PREFIX="${INSTALL_PATH}/xpn" -D CMAKE_C_COMPILER="${MPICC_PATH}"/mpicc -D CMAKE_CXX_COMPILER="${MPICC_PATH}"/mpicxx -D FABRIC_PATH="${LIBFABRIC_PATH}" +cmake -S .. -B . -D BUILD_TESTS=ON -D CMAKE_INSTALL_PREFIX="${INSTALL_PATH}/xpn" -D CMAKE_C_COMPILER="${MPICC_PATH}"/mpicc -D CMAKE_CXX_COMPILER="${MPICC_PATH}"/mpicxx -D ENABLE_FABRIC_SERVER="${LIBFABRIC_PATH}" cmake --build . -j diff --git a/src/base_cpp/CMakeLists.txt b/src/base_cpp/CMakeLists.txt index 8fd1df559..9d8a5cd49 100644 --- a/src/base_cpp/CMakeLists.txt +++ b/src/base_cpp/CMakeLists.txt @@ -7,7 +7,7 @@ file(GLOB XPN_BASE_CPP_SOURCE "*.cpp" ) -if(${FABRIC_PATH} STREQUAL "") +if("${FABRIC_PATH}" STREQUAL "") list(FILTER XPN_BASE_CPP_HEADERS EXCLUDE REGEX "fabric") list(FILTER XPN_BASE_CPP_SOURCE EXCLUDE REGEX "fabric") endif() @@ -17,7 +17,7 @@ target_include_directories(xpn_base_cpp PRIVATE "${PROJECT_SOURCE_DIR}/src" ) -if(NOT ${FABRIC_PATH} STREQUAL "") +if(NOT "${FABRIC_PATH}" STREQUAL "") target_link_libraries(xpn_base_cpp PUBLIC fabric) target_link_directories(xpn_base_cpp PUBLIC ${FABRIC_PATH}/lib) target_include_directories(xpn_base_cpp PUBLIC From 94665c47fd085ce4e27e9e75c9277b68e6d07196 Mon Sep 17 00:00:00 2001 From: Dariomnz Date: Wed, 30 Oct 2024 18:30:58 +0100 Subject: [PATCH 05/60] Fix fabric problem with hints --- src/base_cpp/fabric.cpp | 36 +++++++++++++++++------------------- src/base_cpp/fabric.hpp | 6 ++++-- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/base_cpp/fabric.cpp b/src/base_cpp/fabric.cpp index 73310c61c..050d3770b 100644 --- a/src/base_cpp/fabric.cpp +++ b/src/base_cpp/fabric.cpp @@ -19,6 +19,7 @@ * */ +#define DEBUG #include "base_cpp/fabric.hpp" #include "base_cpp/debug.hpp" @@ -27,10 +28,10 @@ namespace XPN std::mutex fabric::s_mutex; -int set_hints( struct ::fi_info * hints) +int fabric::set_hints( domain &fabric_domain ) { - hints = fi_allocinfo(); - if (!hints) + fabric_domain.hints = fi_allocinfo(); + if (!fabric_domain.hints) return -FI_ENOMEM; /* @@ -38,13 +39,13 @@ int set_hints( struct ::fi_info * hints) * to reliably send messages to peers without having to * listen/connect/accept. */ - hints->ep_attr->type = FI_EP_RDM; + fabric_domain.hints->ep_attr->type = FI_EP_RDM; /* * Request basic messaging capabilities from the provider (no tag * matching, no RMA, no atomic operations) */ - hints->caps = FI_MSG; + fabric_domain.hints->caps = FI_MSG; /* * Default to FI_DELIVERY_COMPLETE which will make sure completions do @@ -52,7 +53,7 @@ int set_hints( struct ::fi_info * hints) * Otherwise, the client might get a completion and exit before the * server receives the message. This is to make the test simpler. */ - hints->tx_attr->op_flags = FI_DELIVERY_COMPLETE; + fabric_domain.hints->tx_attr->op_flags = FI_DELIVERY_COMPLETE; /* * Set the mode bit to 0. Mode bits are used to convey requirements @@ -61,16 +62,16 @@ int set_hints( struct ::fi_info * hints) * domain. On input to fi_getinfo, applications set the mode bits that * they support. */ - hints->mode = 0; + fabric_domain.hints->mode = FI_CONTEXT; /* * Set mr_mode to 0. mr_mode is used to specify the type of memory * registration capabilities the application requires. In this example * we are not using memory registration so this bit will be set to 0. */ - hints->domain_attr->mr_mode = 0; + // hints->domain_attr->mr_mode = 0; - hints->domain_attr->threading = FI_THREAD_SAFE; + // hints->domain_attr->threading = FI_THREAD_SAFE; /* Done setting hints */ @@ -96,7 +97,7 @@ int fabric::init ( domain &fabric ) * for the client because the fields refer to the server, not the * caller (client). */ - set_hints(fabric.hints); + set_hints(fabric); ret = fi_getinfo(fi_version(), NULL, NULL, 0, fabric.hints, &fabric.info); @@ -140,7 +141,7 @@ int fabric::init ( domain &fabric ) return 0; } -int fabric::new_comm ( domain &domain, comm &out_fabric_comm ) +int fabric::new_comm ( domain &fabric_domain, comm &out_fabric_comm ) { struct fi_cq_attr cq_attr = {}; struct fi_av_attr av_attr = {}; @@ -149,9 +150,6 @@ int fabric::new_comm ( domain &domain, comm &out_fabric_comm ) debug_info("[FABRIC] [fabric_new_comm] Start"); std::unique_lock lock(s_mutex); - // First asing the domain to the fabric_comm - out_fabric_comm.fabric_domain = &domain; - /* * Initialize our endpoint. Endpoints are transport level communication * portals which are used to initiate and drive communication. There @@ -163,7 +161,7 @@ int fabric::new_comm ( domain &domain, comm &out_fabric_comm ) * Different providers support different types of endpoints. */ - ret = fi_endpoint(out_fabric_comm.fabric_domain->domain, out_fabric_comm.fabric_domain->info, &out_fabric_comm.ep, NULL); + ret = fi_endpoint(fabric_domain.domain, fabric_domain.info, &out_fabric_comm.ep, NULL); debug_info("[FABRIC] [fabric_new_comm] fi_endpoint = "<domain, &cq_attr, &out_fabric_comm.cq, NULL); + ret = fi_cq_open(fabric_domain.domain, &cq_attr, &out_fabric_comm.cq, NULL); debug_info("[FABRIC] [fabric_new_comm] fi_cq_open = "<domain, &av_attr, &out_fabric_comm.av, NULL); + ret = fi_av_open(fabric_domain.domain, &av_attr, &out_fabric_comm.av, NULL); debug_info("[FABRIC] [fabric_new_comm] fi_av_open = "< Date: Mon, 4 Nov 2024 10:59:20 +0100 Subject: [PATCH 06/60] Fix Timer float time --- src/base_cpp/timer.hpp | 17 ++++++++++++++++- test/integrity/libfabric/fabric_test.cpp | 16 +++++++++++++++- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/base_cpp/timer.hpp b/src/base_cpp/timer.hpp index ac81106a8..aebb6fe92 100644 --- a/src/base_cpp/timer.hpp +++ b/src/base_cpp/timer.hpp @@ -40,7 +40,7 @@ namespace XPN float elapsed() { - return std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - m_Start).count(); + return std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - m_Start).count() * 0.001f * 0.001f * 0.001f; } template @@ -49,6 +49,21 @@ namespace XPN return std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - m_Start).count(); } + float elapsedNano() + { + return std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - m_Start).count(); + } + + float elapsedMicro() + { + return std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - m_Start).count() * 0.001f; + } + + float elapsedMilli() + { + return std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - m_Start).count() * 0.001f * 0.001f; + } + std::chrono::time_point get_start() { return m_Start; diff --git a/test/integrity/libfabric/fabric_test.cpp b/test/integrity/libfabric/fabric_test.cpp index 885491eac..90026d1c8 100644 --- a/test/integrity/libfabric/fabric_test.cpp +++ b/test/integrity/libfabric/fabric_test.cpp @@ -31,7 +31,10 @@ #include #include #include +#include +#include #include +#include using namespace XPN; @@ -265,6 +268,7 @@ static int post_send(void) static int run(void) { int ret; + timer timer; if (dst_addr) { printf("Client: send to server %s\n", dst_addr); @@ -275,11 +279,14 @@ static int run(void) ret = post_send(); if (ret<0) return ret; - + std::cout<<"Send "< Date: Tue, 5 Nov 2024 17:17:19 +0100 Subject: [PATCH 07/60] Test new libfabric implementation --- src/base_cpp/fabric.cpp | 497 ++++++++++++++---- src/base_cpp/fabric.hpp | 89 +++- .../nfi_fabric_server_comm.cpp | 75 ++- .../nfi_fabric_server_comm.hpp | 6 +- .../fabric_server/fabric_server_comm.cpp | 108 ++-- .../fabric_server/fabric_server_comm.hpp | 6 +- test/integrity/libfabric/fabric_test.cpp | 32 +- 7 files changed, 573 insertions(+), 240 deletions(-) diff --git a/src/base_cpp/fabric.cpp b/src/base_cpp/fabric.cpp index 050d3770b..c580a80f9 100644 --- a/src/base_cpp/fabric.cpp +++ b/src/base_cpp/fabric.cpp @@ -19,7 +19,7 @@ * */ -#define DEBUG +// #define DEBUG #include "base_cpp/fabric.hpp" #include "base_cpp/debug.hpp" @@ -27,11 +27,77 @@ namespace XPN { std::mutex fabric::s_mutex; + + +void print_flags(uint64_t flags) { + debug_info(" Flags set:"); + + if (flags & FI_MSG) { debug_info(" FI_MSG"); } + if (flags & FI_RMA) { debug_info(" FI_RMA"); } + if (flags & FI_TAGGED) { debug_info(" FI_TAGGED"); } + if (flags & FI_ATOMIC) { debug_info(" FI_ATOMIC"); } + if (flags & FI_MULTICAST) { debug_info(" FI_MULTICAST"); } + if (flags & FI_COLLECTIVE) { debug_info(" FI_COLLECTIVE"); } + + if (flags & FI_READ) { debug_info(" FI_READ"); } + if (flags & FI_WRITE) { debug_info(" FI_WRITE"); } + if (flags & FI_RECV) { debug_info(" FI_RECV"); } + if (flags & FI_SEND) { debug_info(" FI_SEND"); } + if (flags & FI_REMOTE_READ) { debug_info(" FI_REMOTE_READ"); } + if (flags & FI_REMOTE_WRITE) { debug_info(" FI_REMOTE_WRITE"); } + + if (flags & FI_MULTI_RECV) { debug_info(" FI_MULTI_RECV"); } + if (flags & FI_REMOTE_CQ_DATA) { debug_info(" FI_REMOTE_CQ_DATA"); } + if (flags & FI_MORE) { debug_info(" FI_MORE"); } + if (flags & FI_PEEK) { debug_info(" FI_PEEK"); } + if (flags & FI_TRIGGER) { debug_info(" FI_TRIGGER"); } + if (flags & FI_FENCE) { debug_info(" FI_FENCE"); } + // if (flags & FI_PRIORITY) { debug_info(" FI_PRIORITY"); } + + if (flags & FI_COMPLETION) { debug_info(" FI_COMPLETION"); } + if (flags & FI_INJECT) { debug_info(" FI_INJECT"); } + if (flags & FI_INJECT_COMPLETE) { debug_info(" FI_INJECT_COMPLETE"); } + if (flags & FI_TRANSMIT_COMPLETE) { debug_info(" FI_TRANSMIT_COMPLETE"); } + if (flags & FI_DELIVERY_COMPLETE) { debug_info(" FI_DELIVERY_COMPLETE"); } + if (flags & FI_AFFINITY) { debug_info(" FI_AFFINITY"); } + if (flags & FI_COMMIT_COMPLETE) { debug_info(" FI_COMMIT_COMPLETE"); } + if (flags & FI_MATCH_COMPLETE) { debug_info(" FI_MATCH_COMPLETE"); } + + if (flags & FI_HMEM) { debug_info(" FI_HMEM"); } + if (flags & FI_VARIABLE_MSG) { debug_info(" FI_VARIABLE_MSG"); } + if (flags & FI_RMA_PMEM) { debug_info(" FI_RMA_PMEM"); } + if (flags & FI_SOURCE_ERR) { debug_info(" FI_SOURCE_ERR"); } + if (flags & FI_LOCAL_COMM) { debug_info(" FI_LOCAL_COMM"); } + if (flags & FI_REMOTE_COMM) { debug_info(" FI_REMOTE_COMM"); } + if (flags & FI_SHARED_AV) { debug_info(" FI_SHARED_AV"); } + if (flags & FI_PROV_ATTR_ONLY) { debug_info(" FI_PROV_ATTR_ONLY"); } + if (flags & FI_NUMERICHOST) { debug_info(" FI_NUMERICHOST"); } + if (flags & FI_RMA_EVENT) { debug_info(" FI_RMA_EVENT"); } + if (flags & FI_SOURCE) { debug_info(" FI_SOURCE"); } + if (flags & FI_NAMED_RX_CTX) { debug_info(" FI_NAMED_RX_CTX"); } + if (flags & FI_DIRECTED_RECV) { debug_info(" FI_DIRECTED_RECV"); } +} + +void print_fi_cq_err_entry(const fi_cq_err_entry& entry) { + debug_info("fi_cq_err_entry:"); + debug_info(" op_context: " << entry.op_context); + print_flags(entry.flags); + // debug_info(" flags: " << entry.flags); + debug_info(" len: " << entry.len); + debug_info(" buf: " << entry.buf); + debug_info(" data: " << entry.data); + debug_info(" tag: " << entry.tag); + debug_info(" olen: " << entry.olen); + debug_info(" err: " << entry.err); + debug_info(" prov_errno: " << entry.prov_errno); + debug_info(" err_data: " << entry.err_data); + debug_info(" err_data_size: " << entry.err_data_size); +} -int fabric::set_hints( domain &fabric_domain ) +int fabric::set_hints( fabric_ep &fabric_ep ) { - fabric_domain.hints = fi_allocinfo(); - if (!fabric_domain.hints) + fabric_ep.hints = fi_allocinfo(); + if (!fabric_ep.hints) return -FI_ENOMEM; /* @@ -39,13 +105,13 @@ int fabric::set_hints( domain &fabric_domain ) * to reliably send messages to peers without having to * listen/connect/accept. */ - fabric_domain.hints->ep_attr->type = FI_EP_RDM; + fabric_ep.hints->ep_attr->type = FI_EP_RDM; /* * Request basic messaging capabilities from the provider (no tag * matching, no RMA, no atomic operations) */ - fabric_domain.hints->caps = FI_MSG; + fabric_ep.hints->caps = FI_MSG | FI_TAGGED; /* * Default to FI_DELIVERY_COMPLETE which will make sure completions do @@ -53,7 +119,7 @@ int fabric::set_hints( domain &fabric_domain ) * Otherwise, the client might get a completion and exit before the * server receives the message. This is to make the test simpler. */ - fabric_domain.hints->tx_attr->op_flags = FI_DELIVERY_COMPLETE; + fabric_ep.hints->tx_attr->op_flags = FI_DELIVERY_COMPLETE; /* * Set the mode bit to 0. Mode bits are used to convey requirements @@ -62,7 +128,7 @@ int fabric::set_hints( domain &fabric_domain ) * domain. On input to fi_getinfo, applications set the mode bits that * they support. */ - fabric_domain.hints->mode = FI_CONTEXT; + fabric_ep.hints->mode = FI_CONTEXT; /* * Set mr_mode to 0. mr_mode is used to specify the type of memory @@ -78,9 +144,11 @@ int fabric::set_hints( domain &fabric_domain ) return 0; } -int fabric::init ( domain &fabric ) +int fabric::init ( fabric_ep &fabric_ep ) { - int ret; + int ret; + struct fi_cq_attr cq_attr = {}; + struct fi_av_attr av_attr = {}; debug_info("[FABRIC] [fabric_init] Start"); @@ -97,10 +165,10 @@ int fabric::init ( domain &fabric ) * for the client because the fields refer to the server, not the * caller (client). */ - set_hints(fabric); + set_hints(fabric_ep); ret = fi_getinfo(fi_version(), NULL, NULL, 0, - fabric.hints, &fabric.info); + fabric_ep.hints, &fabric_ep.info); debug_info("[FABRIC] [fabric_init] fi_getinfo = "<fabric_attr, &fabric.fabric, NULL); + ret = fi_fabric(fabric_ep.info->fabric_attr, &fabric_ep.fabric, NULL); debug_info("[FABRIC] [fabric_init] fi_fabric = "< lock(s_mutex); - + /* * Initialize our endpoint. Endpoints are transport level communication * portals which are used to initiate and drive communication. There @@ -161,8 +218,8 @@ int fabric::new_comm ( domain &fabric_domain, comm &out_fabric_comm ) * Different providers support different types of endpoints. */ - ret = fi_endpoint(fabric_domain.domain, fabric_domain.info, &out_fabric_comm.ep, NULL); - debug_info("[FABRIC] [fabric_new_comm] fi_endpoint = "<fid, FI_SEND | FI_RECV); - debug_info("[FABRIC] [fabric_new_comm] fi_ep_bind = "<fid, FI_SEND | FI_RECV); + debug_info("[FABRIC] [fabric_init] fi_ep_bind = "<fid, 0); - debug_info("[FABRIC] [fabric_new_comm] fi_ep_bind = "<fid, 0); + debug_info("[FABRIC] [fabric_init] fi_ep_bind = "< lock(fabric_ep.thread_cq_mutex); + fabric_ep.thread_cq_is_running = false; + } + fabric_ep.thread_cq_cv.notify_one(); + fabric_ep.thread_cq.join(); + debug_info("[FABRIC] [destroy_thread_cq] End"); + return 0; +} + +int fabric::run_thread_cq(fabric_ep &fabric_ep) +{ + int ret = 0; + struct fi_cq_err_entry comp = {}; + std::unique_lock lock(fabric_ep.thread_cq_mutex); + + while (fabric_ep.thread_cq_is_running) { + if (fabric_ep.thread_cq_cv.wait_for(lock, std::chrono::nanoseconds(1), [&fabric_ep]{ return !fabric_ep.thread_cq_is_running; })) { + break; + } + + if (fabric_ep.subs_to_wait == 0) { continue; } + { + std::unique_lock lock(fabric_ep.thread_fi_mutex); + ret = fi_cq_read(fabric_ep.cq, &comp, 1); + } + if (ret == -FI_EAGAIN){ continue; } + + fabric_context* context = static_cast(comp.op_context); + fabric_comm &comm = fabric_ep.m_comms[context->rank]; + context->entry = comp; + + { + std::unique_lock lock(comm.comm_mutex); + if (comp.flags & FI_SEND) { + debug_info("[FABRIC] [run_thread_cq] Send cq of rank_peer "<rank); + } + if (comp.flags & FI_RECV) { + debug_info("[FABRIC] [run_thread_cq] Recv cq of rank_peer "<rank); + } + + // print_fi_cq_err_entry(comp); + fabric_ep.subs_to_wait--; + comm.wait_context = false; + comm.comm_cv.notify_one(); + } + } + return ret; +} + +fabric::fabric_comm& fabric::new_comm ( fabric_ep &fabric_ep ) +{ + static uint32_t rank_counter = 0; + + debug_info("[FABRIC] [fabric_new_comm] Start"); + std::unique_lock lock(s_mutex); + + auto[key, inserted] = fabric_ep.m_comms.emplace(std::piecewise_construct, + std::forward_as_tuple(rank_counter), + std::forward_as_tuple()); + key->second.m_ep = &fabric_ep; + key->second.rank_peer = rank_counter; + rank_counter++; + debug_info("[FABRIC] [fabric_new_comm] rank_peer "<second.rank_peer); + debug_info("[FABRIC] [fabric_new_comm] End"); + return key->second; +} + +fabric::fabric_comm& fabric::any_comm ( fabric_ep &fabric_ep ) +{ + debug_info("[FABRIC] [any_comm] Start"); + // std::unique_lock lock(s_mutex); + + auto[key, inserted] = fabric_ep.m_comms.emplace(std::piecewise_construct, + std::forward_as_tuple(FABRIC_ANY_RANK), + std::forward_as_tuple()); + key->second.m_ep = &fabric_ep; + key->second.rank_peer = FABRIC_ANY_RANK; + key->second.fi_addr = FI_ADDR_UNSPEC; + debug_info("[FABRIC] [any_comm] End"); + return key->second; +} + +fabric::fabric_comm& fabric::get_any_rank_comm(fabric_ep &fabric_ep) +{ + return fabric_ep.m_comms[FABRIC_ANY_RANK]; +} + + +int fabric::get_addr( fabric_ep &fabric_ep, char * out_addr, size_t &size_addr ) { int ret = -1; debug_info("[FABRIC] [fabric_get_addr] Start"); - ret = fi_getname(&fabric_comm.ep->fid, out_addr, &size_addr); + ret = fi_getname(&fabric_ep.ep->fid, out_addr, &size_addr); if (ret) { printf("fi_getname error %d\n", ret); return ret; @@ -260,139 +424,252 @@ int fabric::get_addr( comm &fabric_comm, char * out_addr, size_t &size_addr ) return ret; } -int fabric::register_addr( comm &fabric_comm, char * addr_buf ) +int fabric::register_addr( fabric_ep &fabric_ep, fabric_comm& fabric_comm, char * addr_buf ) { int ret = -1; + fi_addr_t fi_addr; debug_info("[FABRIC] [fabric_register_addr] Start"); - ret = fi_av_insert(fabric_comm.av, addr_buf, 1, &fabric_comm.fi_addr, 0, NULL); + ret = fi_av_insert(fabric_ep.av, addr_buf, 1, &fi_addr, 0, NULL); if (ret != 1) { - printf("av insert error\n"); - return -FI_ENOSYS; + printf("av insert error %d\n", ret); + return ret; } + + fabric_comm.fi_addr = fi_addr; + debug_info("[FABRIC] [fabric_register_addr] End = "<> 32; + // msg.error = ret; + // debug_info("[FABRIC] [fabric_wait] End = "< lock(fabric_ep.thread_fi_mutex); + ret = fi_tsend(fabric_ep.ep, buffer, size, NULL, fabric_comm.fi_addr, tag_send, &fabric_comm.context); if (ret == -FI_EAGAIN) - (void) fi_cq_read(fabric_comm.cq, NULL, 0); + (void) fi_cq_read(fabric_ep.cq, NULL, 0); + + // debug_info("fi_tsend "< lock(fabric_comm.comm_mutex); + fabric_ep.subs_to_wait++; + fabric_comm.comm_cv.wait(lock, [&fabric_comm]{ return !fabric_comm.wait_context; }); + fabric_comm.wait_context = true; + } + + msg.size = size; + msg.error = fabric_comm.context.entry.err; + msg.tag = tag_send & 0x0000'0000'0000'FFFF; + msg.rank_peer = (tag_send & 0xFFFF'FF00'0000'0000) >> 40; + msg.rank_self_in_peer = (tag_send & 0x0000'00FF'FFFF'0000) >> 16; + + debug_info("[FABRIC] [fabric_send] fabric_comm.context.entry.tag "< lock(fabric_ep.thread_fi_mutex); + ret = fi_trecv(fabric_ep.ep, buffer, size, NULL, fabric_comm.fi_addr, tag_recv, mask, &fabric_comm.context); if (ret == -FI_EAGAIN) - (void) fi_cq_read(fabric_comm.cq, NULL, 0); + (void) fi_cq_read(fabric_ep.cq, NULL, 0); + // debug_info("fi_trecv "< lock(fabric_comm.comm_mutex); + fabric_ep.subs_to_wait++; + fabric_comm.comm_cv.wait(lock, [&fabric_comm]{ return !fabric_comm.wait_context; }); + fabric_comm.wait_context = true; } - ret = fabric::wait(fabric_comm); - if (ret < 0){ - printf("error waiting recv buffer (%d)\n", ret); - return -1; - } + msg.size = size; + msg.error = fabric_comm.context.entry.err; + + msg.tag = fabric_comm.context.entry.tag & 0x0000'0000'0000'FFFF; + msg.rank_self_in_peer = (fabric_comm.context.entry.tag & 0xFFFF'FF00'0000'0000) >> 40; + msg.rank_peer = (fabric_comm.context.entry.tag & 0x0000'00FF'FFFF'0000) >> 16; + + debug_info("[FABRIC] [fabric_recv] fabric_comm.context.entry.tag "< lock(s_mutex); - debug_info("[FABRIC] [fabric_close_comm] Close endpoint"); - ret = fi_close(&fabric_comm.ep->fid); - if (ret) - printf("warning: error closing EP (%d)\n", ret); - - debug_info("[FABRIC] [fabric_close_comm] Close address vector"); - ret = fi_close(&fabric_comm.av->fid); - if (ret) - printf("warning: error closing AV (%d)\n", ret); + remove_addr(fabric_ep, fabric_comm); - debug_info("[FABRIC] [fabric_close_comm] Close completion queue"); - ret = fi_close(&fabric_comm.cq->fid); - if (ret) - printf("warning: error closing CQ (%d)\n", ret); + fabric_ep.m_comms.erase(fabric_comm.rank_peer); debug_info("[FABRIC] [fabric_close_comm] End = "< lock(s_mutex); + + destroy_thread_cq(fabric_ep); + debug_info("[FABRIC] [fabric_close_comm] Close endpoint"); + if (fabric_ep.ep){ + ret = fi_close(&fabric_ep.ep->fid); + if (ret) + printf("warning: error closing EP (%d)\n", ret); + fabric_ep.ep = nullptr; + } + + debug_info("[FABRIC] [fabric_close_comm] Close address vector"); + if (fabric_ep.av){ + ret = fi_close(&fabric_ep.av->fid); + if (ret) + printf("warning: error closing AV (%d)\n", ret); + fabric_ep.av = nullptr; + } + + debug_info("[FABRIC] [fabric_close_comm] Close completion queue"); + if (fabric_ep.cq){ + ret = fi_close(&fabric_ep.cq->fid); + if (ret) + printf("warning: error closing CQ (%d)\n", ret); + fabric_ep.cq = nullptr; + } + debug_info("[FABRIC] [fabric_destroy] Close domain"); - ret = fi_close(&domain.domain->fid); - if (ret) - printf("warning: error closing domain (%d)\n", ret); + if (fabric_ep.domain){ + ret = fi_close(&fabric_ep.domain->fid); + if (ret) + printf("warning: error closing domain (%d)\n", ret); + fabric_ep.domain = nullptr; + } debug_info("[FABRIC] [fabric_destroy] Close fabric"); - ret = fi_close(&domain.fabric->fid); - if (ret) - printf("warning: error closing fabric (%d)\n", ret); + if (fabric_ep.fabric){ + ret = fi_close(&fabric_ep.fabric->fid); + if (ret) + printf("warning: error closing fabric (%d)\n", ret); + fabric_ep.fabric = nullptr; + } debug_info("[FABRIC] [fabric_destroy] Free hints "); - if (domain.hints) - fi_freeinfo(domain.hints); + if (fabric_ep.hints){ + fi_freeinfo(fabric_ep.hints); + fabric_ep.hints = nullptr; + } debug_info("[FABRIC] [fabric_destroy] Free info "); - if (domain.info) - fi_freeinfo(domain.info); + if (fabric_ep.info){ + fi_freeinfo(fabric_ep.info); + fabric_ep.info = nullptr; + } + debug_info("[FABRIC] [fabric_destroy] End = "< #include #include +#include #include +#include +#include +#include +#include namespace XPN { class fabric { public: - struct domain { - struct fi_info *hints, *info; - struct fid_fabric *fabric; - struct fid_domain *domain; + constexpr static const uint32_t FABRIC_ANY_RANK = 0xFFFFFFFF; + + struct fabric_ep; + + struct fabric_context{ + // context necesary for fabric interface + struct fi_context context; + uint32_t rank; + struct fi_cq_err_entry entry; }; - struct comm { - struct fid_ep *ep; - struct fid_av *av; - struct fid_cq *cq; + struct fabric_comm{ + uint32_t rank_peer; + uint32_t rank_self_in_peer; + fi_addr_t fi_addr; - struct fi_context recv_context; - struct fi_context send_context; + + fabric_ep *m_ep; + + std::mutex comm_mutex; + std::condition_variable comm_cv; + bool wait_context = true; + fabric_context context; }; + + struct fabric_ep { + struct fi_info *hints = nullptr; + struct fi_info *info = nullptr; + struct fid_fabric *fabric = nullptr; + struct fid_domain *domain = nullptr; + struct fid_ep *ep = nullptr; + struct fid_av *av = nullptr; + struct fid_cq *cq = nullptr; + std::unordered_map m_comms; + + std::thread thread_cq; + std::mutex thread_cq_mutex; + std::mutex thread_fi_mutex; + std::condition_variable thread_cq_cv; + bool thread_cq_is_running = true; + std::atomic_uint32_t subs_to_wait = 0; + }; + + struct fabric_msg{ + uint64_t size = 0; + uint32_t rank_peer = 0; + uint32_t rank_self_in_peer = 0; + uint32_t tag = 0; + int32_t error = 0; + }; + private: - static int set_hints(domain &fabric_domain); + static int set_hints(fabric_ep &fabric_ep); + static int run_thread_cq(fabric_ep &fabric_ep); + static fabric_comm& any_comm(fabric_ep &fabric_ep); public: - static int init(domain &fabric); + static int init(fabric_ep &fabric); + static int init_thread_cq(fabric_ep &fabric_ep); + + static int destroy(fabric_ep &fabric_ep); + static int destroy_thread_cq(fabric_ep &fabric_ep); - static int new_comm(domain &domain, comm &out_fabric_comm); + static fabric_comm& new_comm(fabric_ep &fabric_ep); + static fabric_comm& get_any_rank_comm(fabric_ep &fabric_ep); + static int close (fabric_ep& fabric_ep, fabric_comm &fabric_comm); - static int get_addr(comm &fabric_comm, char *out_addr, size_t &size_addr); - static int register_addr(comm &fabric_comm, char *addr_buf); - static int wait(comm &fabric_comm); - static int send(comm &fabric, const void *buffer, size_t size); - static int recv(comm &fabric, void *buffer, size_t size); - static int close(comm &fabric); - static int destroy(domain &domain); + static int get_addr(fabric_ep &fabric_ep, char *out_addr, size_t &size_addr); + static int register_addr(fabric_ep &fabric_ep, fabric_comm& fabric_comm, char * addr_buf); + static int remove_addr(fabric_ep &fabric_ep, fabric_comm& fabric_comm); + // static fabric_msg wait(fabric_ep &fabric_ep); + static fabric_msg send(fabric_ep &fabric_ep, fabric_comm& fabric_comm, const void * buffer, size_t size, uint32_t tag); + static fabric_msg recv(fabric_ep &fabric_ep, fabric_comm& fabric_comm, void *buffer, size_t size, uint32_t tag); static std::mutex s_mutex; }; diff --git a/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.cpp b/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.cpp index afedaa112..c3e944159 100644 --- a/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.cpp +++ b/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.cpp @@ -29,11 +29,13 @@ namespace XPN { +fabric::fabric_ep nfi_fabric_server_control_comm::m_ep; + nfi_fabric_server_control_comm::nfi_fabric_server_control_comm () { debug_info("[NFI_FABRIC_SERVER_CONTROL_COMM] [nfi_fabric_server_control_comm] >> Begin"); - - fabric::init(m_domain); + if (m_ep.ep == nullptr) + fabric::init(m_ep); debug_info("[NFI_FABRIC_SERVER_CONTROL_COMM] [nfi_fabric_server_control_comm] >> End"); } @@ -42,7 +44,8 @@ nfi_fabric_server_control_comm::~nfi_fabric_server_control_comm() { debug_info("[NFI_FABRIC_SERVER_CONTROL_COMM] [~nfi_fabric_server_control_comm] >> Begin"); - fabric::destroy(m_domain); + if (m_ep.ep != nullptr && m_ep.m_comms.size() == 1) + fabric::destroy(m_ep); debug_info("[NFI_FABRIC_SERVER_CONTROL_COMM] [~nfi_fabric_server_control_comm] >> End"); } @@ -51,17 +54,12 @@ nfi_xpn_server_comm* nfi_fabric_server_control_comm::connect ( const std::string { int ret; int connection_socket; - fabric::comm new_fabric_comm; char port_name[MAX_PORT_NAME]; debug_info("[NFI_FABRIC_SERVER_CONTROL_COMM] [nfi_fabric_server_control_comm_connect] >> Begin\n"); - ret = fabric::new_comm(m_domain, new_fabric_comm); - if (ret < 0){ - printf("Error: fabric_new_comm %d\n", ret); - return nullptr; - } + fabric::fabric_comm& new_fabric_comm = fabric::new_comm(m_ep); // Lookup port name ret = socket::client_connect(srv_name, socket::get_xpn_port() ,connection_socket); @@ -103,7 +101,7 @@ nfi_xpn_server_comm* nfi_fabric_server_control_comm::connect ( const std::string return nullptr; } - ret = fabric::register_addr(new_fabric_comm, ad_buff); + ret = fabric::register_addr(m_ep, new_fabric_comm, ad_buff); if (ret < 0){ print("[Server="<m_comm); + ret = fabric::close(m_ep, in_comm->m_comm); if (ret < 0) { printf("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_disconnect] ERROR: MPI_Comm_disconnect fails"); } @@ -174,7 +195,7 @@ void nfi_fabric_server_control_comm::disconnect(nfi_xpn_server_comm *comm) } int64_t nfi_fabric_server_comm::write_operation(xpn_server_ops op) { - int ret; + fabric::fabric_msg ret; int msg[2]; debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_write_operation] >> Begin"); @@ -186,8 +207,8 @@ int64_t nfi_fabric_server_comm::write_operation(xpn_server_ops op) { // Send message debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_write_operation] Write operation send tag "<< msg[0]); - ret = fabric::send(m_comm, msg, sizeof(msg)); - if (ret < 0) { + ret = fabric::send(*m_comm.m_ep, m_comm, msg, sizeof(msg), 0); + if (ret.error < 0) { debug_error("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_write_operation] ERROR: socket::send < 0 : "<< ret); return -1; } @@ -199,7 +220,7 @@ int64_t nfi_fabric_server_comm::write_operation(xpn_server_ops op) { } int64_t nfi_fabric_server_comm::write_data(const void *data, int64_t size) { - int ret; + fabric::fabric_msg ret; debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_write_data] >> Begin"); @@ -212,23 +233,24 @@ int64_t nfi_fabric_server_comm::write_data(const void *data, int64_t size) { return -1; } + int tag = (int)(pthread_self() % 32450) + 1; + // Send message debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_write_data] Write data"); - ret = fabric::send(m_comm, data, size); - if (ret < 0) { + ret = fabric::send(*m_comm.m_ep, m_comm, data, size, tag); + if (ret.error < 0) { printf("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_write_data] ERROR: MPI_Send fails"); - size = 0; } debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_write_data] << End"); // Return bytes written - return size; + return ret.size; } int64_t nfi_fabric_server_comm::read_data(void *data, ssize_t size) { - int ret; + fabric::fabric_msg ret; debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_read_data] >> Begin"); @@ -241,19 +263,20 @@ int64_t nfi_fabric_server_comm::read_data(void *data, ssize_t size) { return -1; } + int tag = (int)(pthread_self() % 32450) + 1; + // Get message debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_read_data] Read data"); - ret = fabric::recv(m_comm, data, size); - if (ret < 0) { + ret = fabric::recv(*m_comm.m_ep, m_comm, data, size, tag); + if (ret.error < 0) { printf("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_read_data] ERROR: MPI_Recv fails"); - size = 0; } debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_read_data] << End"); // Return bytes read - return size; + return ret.size; } } //namespace XPN diff --git a/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.hpp b/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.hpp index fe95e4a91..fc8c1547c 100644 --- a/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.hpp +++ b/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.hpp @@ -33,13 +33,13 @@ namespace XPN class nfi_fabric_server_comm : public nfi_xpn_server_comm { public: - nfi_fabric_server_comm(fabric::comm comm) : m_comm(comm) {} + nfi_fabric_server_comm(fabric::fabric_comm& comm) : m_comm(comm) {} int64_t write_operation(xpn_server_ops op) override; int64_t read_data(void *data, int64_t size) override; int64_t write_data(const void *data, int64_t size) override; public: - fabric::comm m_comm; + fabric::fabric_comm& m_comm; }; class nfi_fabric_server_control_comm : public nfi_xpn_server_control_comm @@ -52,7 +52,7 @@ namespace XPN void disconnect(nfi_xpn_server_comm* comm) override; private: - fabric::domain m_domain; + static fabric::fabric_ep m_ep; }; } // namespace XPN diff --git a/src/xpn_server/fabric_server/fabric_server_comm.cpp b/src/xpn_server/fabric_server/fabric_server_comm.cpp index 154f10a8e..47826acd7 100644 --- a/src/xpn_server/fabric_server/fabric_server_comm.cpp +++ b/src/xpn_server/fabric_server/fabric_server_comm.cpp @@ -33,7 +33,7 @@ fabric_server_control_comm::fabric_server_control_comm () { debug_info("[Server="<> Begin"); - fabric::init(m_domain); + fabric::init(m_ep); debug_info("[Server="<> End"); } @@ -42,7 +42,7 @@ fabric_server_control_comm::~fabric_server_control_comm() { debug_info("[Server="<> Begin"); - fabric::destroy(m_domain); + fabric::destroy(m_ep); debug_info("[Server="<> End"); } @@ -51,46 +51,7 @@ xpn_server_comm* fabric_server_control_comm::accept ( int socket ) { debug_info("[Server="<> Begin"); - // // Accept - // debug_info("[Server="<(comm); - fabric::close(in_comm->m_comm); + fabric::close(m_ep, in_comm->m_comm); delete comm; @@ -166,19 +147,19 @@ void fabric_server_control_comm::disconnect ( xpn_server_comm* comm ) int64_t fabric_server_comm::read_operation ( xpn_server_ops &op, int &rank_client_id, int &tag_client_id ) { - int ret; - int msg[2]; + fabric::fabric_msg ret = {}; + int msg[2] = {}; debug_info("[Server="<> Begin"); // Get message debug_info("[Server="<(msg[1]); @@ -191,7 +172,7 @@ int64_t fabric_server_comm::read_operation ( xpn_server_ops &op, int &rank_clien int64_t fabric_server_comm::read_data ( void *data, int64_t size, [[maybe_unused]] int rank_client_id, [[maybe_unused]] int tag_client_id ) { - int ret; + fabric::fabric_msg ret = {}; debug_info("[Server="<> Begin"); @@ -206,9 +187,9 @@ int64_t fabric_server_comm::read_data ( void *data, int64_t size, [[maybe_unused // Get message debug_info("[Server="<> Begin"); @@ -237,15 +218,16 @@ int64_t fabric_server_comm::write_data ( const void *data, int64_t size, [[maybe // Send message debug_info("[Server="< buf(BUF_SIZE); std::vector msg(BUF_SIZE); -fabric::comm fabric_comm; -fabric::domain fabric_domain; +fabric::fabric_ep fabric_ep; +fabric::fabric_comm* fabric_comm; static int sock_listen(char *node, const char *service) { @@ -212,7 +212,7 @@ static int exchange_addresses(void) int ret; size_t addrlen = BUF_SIZE_AUX; - ret = fabric::get_addr(fabric_comm, addr_buf, addrlen); + ret = fabric::get_addr(fabric_ep, addr_buf, addrlen); if (ret) { printf("fi_getname error %d\n", ret); return ret; @@ -231,22 +231,24 @@ static int exchange_addresses(void) return ret; } - ret = fabric::register_addr(fabric_comm, addr_buf); + ret = fabric::register_addr(fabric_ep, *fabric_comm, addr_buf); if (ret != 1) { printf("av insert error\n"); return -FI_ENOSYS; } + + return 0; } static int post_recv(void) { - int ret; + fabric::fabric_msg ret; - ret = fabric::recv(fabric_comm, buf.data(), buf.size()*sizeof(buf[0])); + ret = fabric::recv(fabric_ep, *fabric_comm, buf.data(), buf.size()*sizeof(buf[0]), 0); - return ret; + return ret.size; } static int post_send(void) @@ -258,11 +260,11 @@ static int post_send(void) }else{ sprintf(msg.data(), "Hello, client! I am the server you've been waiting for! %d", count++); } - int ret; + fabric::fabric_msg ret; - ret = fabric::send(fabric_comm, msg.data(), msg.size()*sizeof(msg[0])); + ret = fabric::send(fabric_ep, *fabric_comm, msg.data(), msg.size()*sizeof(msg[0]), 0); - return ret; + return ret.size; } static int run(void) @@ -339,13 +341,13 @@ int main(int argc, char **argv) * provider. */ - ret = fabric::init(fabric_domain); + ret = fabric::init(fabric_ep); if (ret) goto out; - std::cout << "[FABRIC] [fabric_init] "< Date: Thu, 7 Nov 2024 09:48:48 +0100 Subject: [PATCH 08/60] Improvements in the libfabric implementation --- src/base_cpp/fabric.cpp | 179 +++++++++++++++++++++------------------- src/base_cpp/fabric.hpp | 7 +- 2 files changed, 100 insertions(+), 86 deletions(-) diff --git a/src/base_cpp/fabric.cpp b/src/base_cpp/fabric.cpp index c580a80f9..ee62a8ace 100644 --- a/src/base_cpp/fabric.cpp +++ b/src/base_cpp/fabric.cpp @@ -232,9 +232,9 @@ int fabric::init ( fabric_ep &fabric_ep ) * will be separate CQs for sends and receives. */ - cq_attr.size = 128; + // cq_attr.size = 128; cq_attr.format = FI_CQ_FORMAT_TAGGED; - cq_attr.wait_obj = FI_WAIT_UNSPEC; + // cq_attr.wait_obj = FI_WAIT_UNSPEC; ret = fi_cq_open(fabric_ep.domain, &cq_attr, &fabric_ep.cq, NULL); debug_info("[FABRIC] [fabric_init] fi_cq_open = "< comp(comp_count); std::unique_lock lock(fabric_ep.thread_cq_mutex); while (fabric_ep.thread_cq_is_running) { - if (fabric_ep.thread_cq_cv.wait_for(lock, std::chrono::nanoseconds(1), [&fabric_ep]{ return !fabric_ep.thread_cq_is_running; })) { - break; - } - - if (fabric_ep.subs_to_wait == 0) { continue; } - { - std::unique_lock lock(fabric_ep.thread_fi_mutex); - ret = fi_cq_read(fabric_ep.cq, &comp, 1); + // if (fabric_ep.thread_cq_cv.wait_for(lock, std::chrono::nanoseconds(1), [&fabric_ep]{ return !fabric_ep.thread_cq_is_running; })) { + // break; + // } + if (fabric_ep.subs_to_wait == 0) { + fabric_ep.thread_cq_cv.wait(lock, [&fabric_ep]{ return fabric_ep.subs_to_wait != 0 || !fabric_ep.thread_cq_is_running; }); } + // if (!fabric_ep.thread_cq_is_running) break; + // if (fabric_ep.thread_cq_cv.wait_for(lock, std::chrono::nanoseconds(1), [&fabric_ep]{ return !fabric_ep.thread_cq_is_running; })) { + // break; + // } + + // if (fabric_ep.subs_to_wait == 0) { continue; } + // { + // std::unique_lock lock(fabric_ep.thread_fi_mutex); + ret = fi_cq_read(fabric_ep.cq, comp.data(), comp_count); + // ret = fi_cq_read(fabric_ep.cq, &comp[0], 8); + // } if (ret == -FI_EAGAIN){ continue; } - fabric_context* context = static_cast(comp.op_context); - fabric_comm &comm = fabric_ep.m_comms[context->rank]; - context->entry = comp; + //TODO: handle error + if (ret < 0) { continue; } + // Handle the cq entries + for (int i = 0; i < ret; i++) { - std::unique_lock lock(comm.comm_mutex); - if (comp.flags & FI_SEND) { - debug_info("[FABRIC] [run_thread_cq] Send cq of rank_peer "<rank); + fabric_context* context = static_cast(comp[i].op_context); + fabric_comm &comm = fabric_ep.m_comms[context->rank]; + context->entry = comp[i]; + + { + std::unique_lock lock(comm.comm_mutex); + if (comp[i].flags & FI_SEND) { + debug_info("[FABRIC] [run_thread_cq] Send cq of rank_peer "<rank); + } + if (comp[i].flags & FI_RECV) { + debug_info("[FABRIC] [run_thread_cq] Recv cq of rank_peer "<rank); + } + + // print_fi_cq_err_entry(comp); + fabric_ep.subs_to_wait--; + comm.wait_context = false; + comm.comm_cv.notify_one(); } - if (comp.flags & FI_RECV) { - debug_info("[FABRIC] [run_thread_cq] Recv cq of rank_peer "<rank); - } - - // print_fi_cq_err_entry(comp); - fabric_ep.subs_to_wait--; - comm.wait_context = false; - comm.comm_cv.notify_one(); } } return ret; @@ -454,35 +471,14 @@ int fabric::remove_addr(fabric_ep &fabric_ep, fabric_comm& fabric_comm) return ret; } -// fabric::fabric_msg fabric::wait ( fabric_ep &fabric_ep ) -// { - // struct fi_cq_err_entry comp = {}; - // int ret; - // fabric_msg msg = {}; - - // debug_info("[FABRIC] [fabric_wait] Start"); - // // ret = fi_cq_sreadfrom(fabric_ep.cq, &comp, 1, &fabric_ep.fi_addr, NULL, -1); - // // ret = fi_cq_sread(fabric_ep.cq, &comp, 1, NULL, -1); - // do { - // ret = fi_cq_read(fabric_ep.cq, &comp, 1); - // if (ret < 0 && ret != -FI_EAGAIN) { - // printf("error reading cq (%d)\n", ret); - // msg.error = ret; - // return msg; - // } - // } while (ret != 1); - // if (ret < 0){ - // printf("error reading cq (%d)\n", ret); - // } - // print_fi_cq_err_entry(comp); - // msg.size = comp.len; - // msg.tag = comp.tag & 0x00000000FFFFFFFF; - // msg.rank = comp.tag >> 32; - // msg.error = ret; - // debug_info("[FABRIC] [fabric_wait] End = "< lock(fabric_comm.comm_mutex); + fabric_ep.subs_to_wait++; + fabric_ep.thread_cq_cv.notify_one(); + fabric_comm.comm_cv.wait(lock, [&fabric_comm]{ return !fabric_comm.wait_context; }); + fabric_comm.wait_context = true; +} fabric::fabric_msg fabric::send ( fabric_ep &fabric_ep, fabric_comm& fabric_comm, const void * buffer, size_t size, uint32_t tag ) { @@ -499,32 +495,47 @@ fabric::fabric_msg fabric::send ( fabric_ep &fabric_ep, fabric_comm& fabric_comm debug_info("[FABRIC] [fabric_send] Start size "< lock(fabric_ep.thread_fi_mutex); - ret = fi_tsend(fabric_ep.ep, buffer, size, NULL, fabric_comm.fi_addr, tag_send, &fabric_comm.context); - - if (ret == -FI_EAGAIN) - (void) fi_cq_read(fabric_ep.cq, NULL, 0); + if (size > fabric_ep.info->tx_attr->inject_size){ - // debug_info("fi_tsend "< lock(fabric_comm.comm_mutex); - fabric_ep.subs_to_wait++; - fabric_comm.comm_cv.wait(lock, [&fabric_comm]{ return !fabric_comm.wait_context; }); - fabric_comm.wait_context = true; + do { + // std::unique_lock lock(fabric_ep.thread_fi_mutex); + ret = fi_tsend(fabric_ep.ep, buffer, size, NULL, fabric_comm.fi_addr, tag_send, &fabric_comm.context); + + if (ret == -FI_EAGAIN) + (void) fi_cq_read(fabric_ep.cq, NULL, 0); + + // debug_info("fi_tsend "< lock(fabric_comm.comm_mutex); + // fabric_ep.subs_to_wait++; + // fabric_comm.comm_cv.wait(lock, [&fabric_comm]{ return !fabric_comm.wait_context; }); + // fabric_comm.wait_context = true; + // } + // msg.error = fabric_comm.context.entry.err; + }else{ + + do { + ret = fi_tinject(fabric_ep.ep, buffer, size, fabric_comm.fi_addr, tag_send); + + if (ret == -FI_EAGAIN) + (void) fi_cq_read(fabric_ep.cq, NULL, 0); + } while (ret == -FI_EAGAIN); + debug_info("[FABRIC] [fabric_send] fi_tinject for rank_peer "<> 40; @@ -557,7 +568,7 @@ fabric::fabric_msg fabric::recv ( fabric_ep &fabric_ep, fabric_comm& fabric_comm debug_info("[FABRIC] [fabric_recv] Start size "< lock(fabric_ep.thread_fi_mutex); + // std::unique_lock lock(fabric_ep.thread_fi_mutex); ret = fi_trecv(fabric_ep.ep, buffer, size, NULL, fabric_comm.fi_addr, tag_recv, mask, &fabric_comm.context); if (ret == -FI_EAGAIN) @@ -572,15 +583,17 @@ fabric::fabric_msg fabric::recv ( fabric_ep &fabric_ep, fabric_comm& fabric_comm } debug_info("[FABRIC] [fabric_recv] Waiting on mutex of rank_peer "< lock(fabric_comm.comm_mutex); - fabric_ep.subs_to_wait++; - fabric_comm.comm_cv.wait(lock, [&fabric_comm]{ return !fabric_comm.wait_context; }); - fabric_comm.wait_context = true; - } + + wait(fabric_ep, fabric_comm); + // { + // std::unique_lock lock(fabric_comm.comm_mutex); + // fabric_ep.subs_to_wait++; + // fabric_comm.comm_cv.wait(lock, [&fabric_comm]{ return !fabric_comm.wait_context; }); + // fabric_comm.wait_context = true; + // } msg.size = size; - msg.error = fabric_comm.context.entry.err; + // msg.error = fabric_comm.context.entry.err; msg.tag = fabric_comm.context.entry.tag & 0x0000'0000'0000'FFFF; msg.rank_self_in_peer = (fabric_comm.context.entry.tag & 0xFFFF'FF00'0000'0000) >> 40; diff --git a/src/base_cpp/fabric.hpp b/src/base_cpp/fabric.hpp index dcae0e90a..f5eda07e4 100644 --- a/src/base_cpp/fabric.hpp +++ b/src/base_cpp/fabric.hpp @@ -32,6 +32,7 @@ #include #include #include +#include namespace XPN { @@ -45,7 +46,7 @@ class fabric { // context necesary for fabric interface struct fi_context context; uint32_t rank; - struct fi_cq_err_entry entry; + struct fi_cq_tagged_entry entry; }; struct fabric_comm{ @@ -74,7 +75,7 @@ class fabric { std::thread thread_cq; std::mutex thread_cq_mutex; - std::mutex thread_fi_mutex; + // std::mutex thread_fi_mutex; std::condition_variable thread_cq_cv; bool thread_cq_is_running = true; std::atomic_uint32_t subs_to_wait = 0; @@ -106,7 +107,7 @@ class fabric { static int get_addr(fabric_ep &fabric_ep, char *out_addr, size_t &size_addr); static int register_addr(fabric_ep &fabric_ep, fabric_comm& fabric_comm, char * addr_buf); static int remove_addr(fabric_ep &fabric_ep, fabric_comm& fabric_comm); - // static fabric_msg wait(fabric_ep &fabric_ep); + static void wait(fabric_ep &fabric_ep, fabric_comm &fabric_comm); static fabric_msg send(fabric_ep &fabric_ep, fabric_comm& fabric_comm, const void * buffer, size_t size, uint32_t tag); static fabric_msg recv(fabric_ep &fabric_ep, fabric_comm& fabric_comm, void *buffer, size_t size, uint32_t tag); From 3863695430f753174f6a842a273906544dfd1cd7 Mon Sep 17 00:00:00 2001 From: Dariomnz Date: Thu, 7 Nov 2024 11:08:53 +0100 Subject: [PATCH 09/60] Implement a threadless mode for fabric implementation --- src/base_cpp/fabric.cpp | 58 ++++++++++++++----- src/base_cpp/fabric.hpp | 7 ++- .../nfi_fabric_server_comm.cpp | 6 +- .../fabric_server/fabric_server_comm.cpp | 4 +- .../fabric_server/fabric_server_comm.hpp | 2 +- src/xpn_server/xpn_server_comm.cpp | 2 +- 6 files changed, 57 insertions(+), 22 deletions(-) diff --git a/src/base_cpp/fabric.cpp b/src/base_cpp/fabric.cpp index ee62a8ace..94c720461 100644 --- a/src/base_cpp/fabric.cpp +++ b/src/base_cpp/fabric.cpp @@ -93,7 +93,7 @@ void print_fi_cq_err_entry(const fi_cq_err_entry& entry) { debug_info(" err_data: " << entry.err_data); debug_info(" err_data_size: " << entry.err_data_size); } - + int fabric::set_hints( fabric_ep &fabric_ep ) { fabric_ep.hints = fi_allocinfo(); @@ -144,7 +144,7 @@ int fabric::set_hints( fabric_ep &fabric_ep ) return 0; } -int fabric::init ( fabric_ep &fabric_ep ) +int fabric::init ( fabric_ep &fabric_ep, bool have_threads ) { int ret; struct fi_cq_attr cq_attr = {}; @@ -153,6 +153,8 @@ int fabric::init ( fabric_ep &fabric_ep ) debug_info("[FABRIC] [fabric_init] Start"); std::unique_lock lock(s_mutex); + + fabric_ep.have_thread = have_threads; /* * The first libfabric call to happen for initialization is fi_getinfo * which queries libfabric and returns any appropriate providers that @@ -311,6 +313,8 @@ int fabric::init ( fabric_ep &fabric_ep ) int fabric::init_thread_cq(fabric_ep &fabric_ep) { + if (!fabric_ep.have_thread) return 0; + debug_info("[FABRIC] [init_thread_cq] Start"); fabric_ep.thread_cq = std::thread([&fabric_ep](){ run_thread_cq(fabric_ep); @@ -321,6 +325,8 @@ int fabric::init_thread_cq(fabric_ep &fabric_ep) int fabric::destroy_thread_cq(fabric_ep &fabric_ep) { + if (!fabric_ep.have_thread) return 0; + debug_info("[FABRIC] [destroy_thread_cq] Start"); { std::lock_guard lock(fabric_ep.thread_cq_mutex); @@ -341,12 +347,12 @@ int fabric::run_thread_cq(fabric_ep &fabric_ep) std::unique_lock lock(fabric_ep.thread_cq_mutex); while (fabric_ep.thread_cq_is_running) { - // if (fabric_ep.thread_cq_cv.wait_for(lock, std::chrono::nanoseconds(1), [&fabric_ep]{ return !fabric_ep.thread_cq_is_running; })) { - // break; - // } - if (fabric_ep.subs_to_wait == 0) { - fabric_ep.thread_cq_cv.wait(lock, [&fabric_ep]{ return fabric_ep.subs_to_wait != 0 || !fabric_ep.thread_cq_is_running; }); + if (fabric_ep.thread_cq_cv.wait_for(lock, std::chrono::nanoseconds(1), [&fabric_ep]{ return !fabric_ep.thread_cq_is_running; })) { + break; } + // if (fabric_ep.subs_to_wait == 0) { + // fabric_ep.thread_cq_cv.wait(lock, [&fabric_ep]{ return fabric_ep.subs_to_wait != 0 || !fabric_ep.thread_cq_is_running; }); + // } // if (!fabric_ep.thread_cq_is_running) break; // if (fabric_ep.thread_cq_cv.wait_for(lock, std::chrono::nanoseconds(1), [&fabric_ep]{ return !fabric_ep.thread_cq_is_running; })) { // break; @@ -380,7 +386,7 @@ int fabric::run_thread_cq(fabric_ep &fabric_ep) } // print_fi_cq_err_entry(comp); - fabric_ep.subs_to_wait--; + // fabric_ep.subs_to_wait--; comm.wait_context = false; comm.comm_cv.notify_one(); } @@ -473,11 +479,37 @@ int fabric::remove_addr(fabric_ep &fabric_ep, fabric_comm& fabric_comm) void fabric::wait ( fabric_ep& fabric_ep, fabric_comm &fabric_comm ) { - std::unique_lock lock(fabric_comm.comm_mutex); - fabric_ep.subs_to_wait++; - fabric_ep.thread_cq_cv.notify_one(); - fabric_comm.comm_cv.wait(lock, [&fabric_comm]{ return !fabric_comm.wait_context; }); - fabric_comm.wait_context = true; + if (fabric_ep.have_thread){ + debug_info("[FABRIC] [wait] With threads"); + std::unique_lock lock(fabric_comm.comm_mutex); + // fabric_ep.subs_to_wait++; + // fabric_ep.thread_cq_cv.notify_one(); + fabric_comm.comm_cv.wait(lock, [&fabric_comm]{ return !fabric_comm.wait_context; }); + fabric_comm.wait_context = true; + }else{ + debug_info("[FABRIC] [wait] Without threads"); + int ret = 0; + fi_cq_tagged_entry comp = {}; + do{ + ret = fi_cq_read(fabric_ep.cq, &comp, 1); + + if (ret == -FI_EAGAIN){ continue; } + + //TODO: handle error + if (ret < 0) { continue; } + + // Handle the cq entries + fabric_context* context = static_cast(comp.op_context); + context->entry = comp; + if (comp.flags & FI_SEND) { + debug_info("[FABRIC] [run_thread_cq] Send cq of rank_peer "<rank); + } + if (comp.flags & FI_RECV) { + debug_info("[FABRIC] [run_thread_cq] Recv cq of rank_peer "<rank); + } + // print_fi_cq_err_entry(comp); + }while (ret == -FI_EAGAIN); + } } fabric::fabric_msg fabric::send ( fabric_ep &fabric_ep, fabric_comm& fabric_comm, const void * buffer, size_t size, uint32_t tag ) diff --git a/src/base_cpp/fabric.hpp b/src/base_cpp/fabric.hpp index f5eda07e4..3fcecb50d 100644 --- a/src/base_cpp/fabric.hpp +++ b/src/base_cpp/fabric.hpp @@ -73,6 +73,7 @@ class fabric { struct fid_cq *cq = nullptr; std::unordered_map m_comms; + bool have_thread = true; std::thread thread_cq; std::mutex thread_cq_mutex; // std::mutex thread_fi_mutex; @@ -93,12 +94,12 @@ class fabric { static int set_hints(fabric_ep &fabric_ep); static int run_thread_cq(fabric_ep &fabric_ep); static fabric_comm& any_comm(fabric_ep &fabric_ep); -public: - static int init(fabric_ep &fabric); static int init_thread_cq(fabric_ep &fabric_ep); + static int destroy_thread_cq(fabric_ep &fabric_ep); +public: + static int init(fabric_ep &fabric, bool have_threads = true); static int destroy(fabric_ep &fabric_ep); - static int destroy_thread_cq(fabric_ep &fabric_ep); static fabric_comm& new_comm(fabric_ep &fabric_ep); static fabric_comm& get_any_rank_comm(fabric_ep &fabric_ep); diff --git a/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.cpp b/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.cpp index c3e944159..852fa74b1 100644 --- a/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.cpp +++ b/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.cpp @@ -34,8 +34,10 @@ fabric::fabric_ep nfi_fabric_server_control_comm::m_ep; nfi_fabric_server_control_comm::nfi_fabric_server_control_comm () { debug_info("[NFI_FABRIC_SERVER_CONTROL_COMM] [nfi_fabric_server_control_comm] >> Begin"); - if (m_ep.ep == nullptr) - fabric::init(m_ep); + if (m_ep.ep == nullptr){ + int xpn_thread = xpn_env::get_instance().xpn_thread; + fabric::init(m_ep, xpn_thread); + } debug_info("[NFI_FABRIC_SERVER_CONTROL_COMM] [nfi_fabric_server_control_comm] >> End"); } diff --git a/src/xpn_server/fabric_server/fabric_server_comm.cpp b/src/xpn_server/fabric_server/fabric_server_comm.cpp index 47826acd7..ba55db68b 100644 --- a/src/xpn_server/fabric_server/fabric_server_comm.cpp +++ b/src/xpn_server/fabric_server/fabric_server_comm.cpp @@ -29,11 +29,11 @@ namespace XPN { -fabric_server_control_comm::fabric_server_control_comm () +fabric_server_control_comm::fabric_server_control_comm (xpn_server_params ¶ms) { debug_info("[Server="<> Begin"); - fabric::init(m_ep); + fabric::init(m_ep, params.have_threads()); debug_info("[Server="<> End"); } diff --git a/src/xpn_server/fabric_server/fabric_server_comm.hpp b/src/xpn_server/fabric_server/fabric_server_comm.hpp index 0f840501f..5f6f351c7 100644 --- a/src/xpn_server/fabric_server/fabric_server_comm.hpp +++ b/src/xpn_server/fabric_server/fabric_server_comm.hpp @@ -46,7 +46,7 @@ namespace XPN class fabric_server_control_comm : public xpn_server_control_comm { public: - fabric_server_control_comm(); + fabric_server_control_comm(xpn_server_params ¶ms); ~fabric_server_control_comm() override; xpn_server_comm* accept(int socket) override; diff --git a/src/xpn_server/xpn_server_comm.cpp b/src/xpn_server/xpn_server_comm.cpp index 002a824f6..c111531f8 100644 --- a/src/xpn_server/xpn_server_comm.cpp +++ b/src/xpn_server/xpn_server_comm.cpp @@ -38,7 +38,7 @@ namespace XPN case XPN_SERVER_TYPE_SCK: return std::make_unique(); case XPN_SERVER_TYPE_FABRIC: - return std::make_unique(); + return std::make_unique(params); default: fprintf(stderr, "[XPN_SERVER] [xpn_server_control_comm] server_type '%d' not recognized\n", params.server_type); } return nullptr; From 67f1d7c939395a6ad6e304c6073aeb39503087c6 Mon Sep 17 00:00:00 2001 From: Dariomnz Date: Fri, 8 Nov 2024 11:24:08 +0100 Subject: [PATCH 10/60] Implement scalable endpoints --- src/base_cpp/fabric.cpp | 226 +++++++++++------- src/base_cpp/fabric.hpp | 4 +- .../nfi_fabric_server_comm.cpp | 7 + .../fabric_server/fabric_server_comm.cpp | 9 + src/xpn_server/xpn_server.cpp | 2 +- 5 files changed, 154 insertions(+), 94 deletions(-) diff --git a/src/base_cpp/fabric.cpp b/src/base_cpp/fabric.cpp index 94c720461..519847e0a 100644 --- a/src/base_cpp/fabric.cpp +++ b/src/base_cpp/fabric.cpp @@ -78,8 +78,8 @@ void print_flags(uint64_t flags) { if (flags & FI_DIRECTED_RECV) { debug_info(" FI_DIRECTED_RECV"); } } -void print_fi_cq_err_entry(const fi_cq_err_entry& entry) { - debug_info("fi_cq_err_entry:"); +void print_fi_cq_tagged_entry(const fi_cq_tagged_entry& entry) { + debug_info("fi_cq_tagged_entry:"); debug_info(" op_context: " << entry.op_context); print_flags(entry.flags); // debug_info(" flags: " << entry.flags); @@ -87,11 +87,11 @@ void print_fi_cq_err_entry(const fi_cq_err_entry& entry) { debug_info(" buf: " << entry.buf); debug_info(" data: " << entry.data); debug_info(" tag: " << entry.tag); - debug_info(" olen: " << entry.olen); - debug_info(" err: " << entry.err); - debug_info(" prov_errno: " << entry.prov_errno); - debug_info(" err_data: " << entry.err_data); - debug_info(" err_data_size: " << entry.err_data_size); + // debug_info(" olen: " << entry.olen); + // debug_info(" err: " << entry.err); + // debug_info(" prov_errno: " << entry.prov_errno); + // debug_info(" err_data: " << entry.err_data); + // debug_info(" err_data_size: " << entry.err_data_size); } int fabric::set_hints( fabric_ep &fabric_ep ) @@ -220,41 +220,10 @@ int fabric::init ( fabric_ep &fabric_ep, bool have_threads ) * Different providers support different types of endpoints. */ - ret = fi_endpoint(fabric_ep.domain, fabric_ep.info, &fabric_ep.ep, NULL); - debug_info("[FABRIC] [fabric_init] fi_endpoint = "<fid, FI_SEND | FI_RECV); - debug_info("[FABRIC] [fabric_init] fi_ep_bind = "<fid, 0); - debug_info("[FABRIC] [fabric_init] fi_ep_bind = "<fid, 0); + debug_info("[FABRIC] [fabric_init] fi_scalable_ep_bind = "<tx_attr->caps |= FI_MSG; + fabric_ep.info->tx_attr->caps |= FI_NAMED_RX_CTX; /* Required for scalable endpoints indexing */ + ret = fi_tx_context(fabric_ep.ep, 0, fabric_ep.info->tx_attr, &fabric_ep.tx_ep, NULL); + debug_info("[FABRIC] [fabric_init] fi_tx_context tx_ep = "<fid, FI_SEND); + debug_info("[FABRIC] [fabric_init] fi_ep_bind tx_ep = "<rx_attr->caps |= FI_MSG; + fabric_ep.info->rx_attr->caps |= FI_NAMED_RX_CTX; /* Required for scalable endpoints indexing */ + ret = fi_rx_context(fabric_ep.ep, 0, fabric_ep.info->rx_attr, &fabric_ep.rx_ep, NULL); + debug_info("[FABRIC] [fabric_init] fi_rx_context rx_ep = "<fid, FI_RECV); + debug_info("[FABRIC] [fabric_init] fi_ep_bind rx_ep = "< comp(comp_count); + const int comp_count = 8; + struct fi_cq_tagged_entry comp[comp_count] = {}; std::unique_lock lock(fabric_ep.thread_cq_mutex); while (fabric_ep.thread_cq_is_running) { @@ -361,7 +394,7 @@ int fabric::run_thread_cq(fabric_ep &fabric_ep) // if (fabric_ep.subs_to_wait == 0) { continue; } // { // std::unique_lock lock(fabric_ep.thread_fi_mutex); - ret = fi_cq_read(fabric_ep.cq, comp.data(), comp_count); + ret = fi_cq_read(fabric_ep.cq, comp, comp_count); // ret = fi_cq_read(fabric_ep.cq, &comp[0], 8); // } if (ret == -FI_EAGAIN){ continue; } @@ -482,33 +515,47 @@ void fabric::wait ( fabric_ep& fabric_ep, fabric_comm &fabric_comm ) if (fabric_ep.have_thread){ debug_info("[FABRIC] [wait] With threads"); std::unique_lock lock(fabric_comm.comm_mutex); - // fabric_ep.subs_to_wait++; - // fabric_ep.thread_cq_cv.notify_one(); fabric_comm.comm_cv.wait(lock, [&fabric_comm]{ return !fabric_comm.wait_context; }); fabric_comm.wait_context = true; }else{ debug_info("[FABRIC] [wait] Without threads"); + std::unique_lock lock(fabric_comm.comm_mutex); + int ret = 0; - fi_cq_tagged_entry comp = {}; - do{ - ret = fi_cq_read(fabric_ep.cq, &comp, 1); + const int comp_count = 8; + fi_cq_tagged_entry comp[comp_count] = {}; + while (fabric_comm.wait_context) + { + ret = fi_cq_read(fabric_ep.cq, &comp, comp_count); - if (ret == -FI_EAGAIN){ continue; } + if (ret == -FI_EAGAIN){ + // std::this_thread::yield(); + continue; + } //TODO: handle error - if (ret < 0) { continue; } + if (ret < 0) { + print("Error in fi_cq_read "<(comp.op_context); - context->entry = comp; - if (comp.flags & FI_SEND) { - debug_info("[FABRIC] [run_thread_cq] Send cq of rank_peer "<rank); + for (int i = 0; i < ret; i++) + { + // Handle the cq entries + fabric_context* context = static_cast(comp[i].op_context); + context->entry = comp[i]; + if (comp[i].flags & FI_SEND) { + debug_info("[FABRIC] [wait] Send cq of rank_peer "<rank); + } + if (comp[i].flags & FI_RECV) { + debug_info("[FABRIC] [wait] Recv cq of rank_peer "<rank); + } + print_fi_cq_tagged_entry(comp[i]); + fabric_ep.m_comms[context->rank].wait_context = false; + // fabric_ep.m_comms[context->rank].comm_cv.notify_one(); } - if (comp.flags & FI_RECV) { - debug_info("[FABRIC] [run_thread_cq] Recv cq of rank_peer "<rank); - } - // print_fi_cq_err_entry(comp); - }while (ret == -FI_EAGAIN); + } + fabric_comm.wait_context = true; } } @@ -528,16 +575,11 @@ fabric::fabric_msg fabric::send ( fabric_ep &fabric_ep, fabric_comm& fabric_comm debug_info("[FABRIC] [fabric_send] Start size "< fabric_ep.info->tx_attr->inject_size){ - - do { - // std::unique_lock lock(fabric_ep.thread_fi_mutex); - ret = fi_tsend(fabric_ep.ep, buffer, size, NULL, fabric_comm.fi_addr, tag_send, &fabric_comm.context); + ret = fi_tsend(fabric_ep.tx_ep, buffer, size, NULL, fabric_comm.fi_addr, tag_send, &fabric_comm.context); if (ret == -FI_EAGAIN) (void) fi_cq_read(fabric_ep.cq, NULL, 0); - - // debug_info("fi_tsend "< lock(fabric_comm.comm_mutex); - // fabric_ep.subs_to_wait++; - // fabric_comm.comm_cv.wait(lock, [&fabric_comm]{ return !fabric_comm.wait_context; }); - // fabric_comm.wait_context = true; - // } - // msg.error = fabric_comm.context.entry.err; }else{ do { - ret = fi_tinject(fabric_ep.ep, buffer, size, fabric_comm.fi_addr, tag_send); + ret = fi_tinject(fabric_ep.tx_ep, buffer, size, fabric_comm.fi_addr, tag_send); if (ret == -FI_EAGAIN) (void) fi_cq_read(fabric_ep.cq, NULL, 0); @@ -573,7 +608,6 @@ fabric::fabric_msg fabric::send ( fabric_ep &fabric_ep, fabric_comm& fabric_comm msg.rank_peer = (tag_send & 0xFFFF'FF00'0000'0000) >> 40; msg.rank_self_in_peer = (tag_send & 0x0000'00FF'FFFF'0000) >> 16; - debug_info("[FABRIC] [fabric_send] fabric_comm.context.entry.tag "< lock(fabric_ep.thread_fi_mutex); - ret = fi_trecv(fabric_ep.ep, buffer, size, NULL, fabric_comm.fi_addr, tag_recv, mask, &fabric_comm.context); + ret = fi_trecv(fabric_ep.rx_ep, buffer, size, NULL, fabric_comm.fi_addr, tag_recv, mask, &fabric_comm.context); if (ret == -FI_EAGAIN) (void) fi_cq_read(fabric_ep.cq, NULL, 0); - // debug_info("fi_trecv "< lock(fabric_comm.comm_mutex); - // fabric_ep.subs_to_wait++; - // fabric_comm.comm_cv.wait(lock, [&fabric_comm]{ return !fabric_comm.wait_context; }); - // fabric_comm.wait_context = true; - // } msg.size = size; // msg.error = fabric_comm.context.entry.err; @@ -631,7 +658,6 @@ fabric::fabric_msg fabric::recv ( fabric_ep &fabric_ep, fabric_comm& fabric_comm msg.rank_self_in_peer = (fabric_comm.context.entry.tag & 0xFFFF'FF00'0000'0000) >> 40; msg.rank_peer = (fabric_comm.context.entry.tag & 0x0000'00FF'FFFF'0000) >> 16; - debug_info("[FABRIC] [fabric_recv] fabric_comm.context.entry.tag "<fid); + if (ret) + printf("warning: error closing tx_context (%d)\n", ret); + fabric_ep.tx_ep = nullptr; + } + + debug_info("[FABRIC] [fabric_close_comm] Close rx_context"); + if (fabric_ep.rx_ep){ + ret = fi_close(&fabric_ep.rx_ep->fid); + if (ret) + printf("warning: error closing rx_context (%d)\n", ret); + fabric_ep.rx_ep = nullptr; + } + debug_info("[FABRIC] [fabric_close_comm] Close endpoint"); if (fabric_ep.ep){ ret = fi_close(&fabric_ep.ep->fid); diff --git a/src/base_cpp/fabric.hpp b/src/base_cpp/fabric.hpp index 3fcecb50d..07acf316f 100644 --- a/src/base_cpp/fabric.hpp +++ b/src/base_cpp/fabric.hpp @@ -59,7 +59,7 @@ class fabric { std::mutex comm_mutex; std::condition_variable comm_cv; - bool wait_context = true; + std::atomic_bool wait_context = true; fabric_context context; }; @@ -69,6 +69,8 @@ class fabric { struct fid_fabric *fabric = nullptr; struct fid_domain *domain = nullptr; struct fid_ep *ep = nullptr; + struct fid_ep *rx_ep = nullptr; + struct fid_ep *tx_ep = nullptr; struct fid_av *av = nullptr; struct fid_cq *cq = nullptr; std::unordered_map m_comms; diff --git a/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.cpp b/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.cpp index 852fa74b1..c09387852 100644 --- a/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.cpp +++ b/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.cpp @@ -183,6 +183,13 @@ void nfi_fabric_server_control_comm::disconnect(nfi_xpn_server_comm *comm) printf("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_disconnect] ERROR: nfi_fabric_server_comm_write_operation fails"); } + // Sincronization with server + uint32_t buff = 0; + ret = in_comm->read_data(&buff, sizeof(buff)); + if (ret < 0) { + printf("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_disconnect] ERROR: nfi_fabric_server_comm_write_operation fails"); + } + // Disconnect debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_disconnect] Disconnect"); diff --git a/src/xpn_server/fabric_server/fabric_server_comm.cpp b/src/xpn_server/fabric_server/fabric_server_comm.cpp index ba55db68b..2b00c06eb 100644 --- a/src/xpn_server/fabric_server/fabric_server_comm.cpp +++ b/src/xpn_server/fabric_server/fabric_server_comm.cpp @@ -163,6 +163,15 @@ int64_t fabric_server_comm::read_operation ( xpn_server_ops &op, int &rank_clien tag_client_id = msg[0]; op = static_cast(msg[1]); + if (op == xpn_server_ops::DISCONNECT) [[unlikely]] { + // Sincronization with client in disconnect + uint32_t buff = static_cast(xpn_server_ops::DISCONNECT); + int64_t res = write_data(&buff, sizeof(buff), rank_client_id, tag_client_id); + if (res < 0) { + debug_warning("[Server="<(type_op)); if (type_op == xpn_server_ops::DISCONNECT) { debug_info("[TH_ID="< Date: Fri, 8 Nov 2024 12:11:52 +0100 Subject: [PATCH 11/60] New feature: without limits in threads on_demand --- src/base_cpp/workers.cpp | 4 ++-- src/base_cpp/workers.hpp | 2 +- src/base_cpp/workers_on_demand.cpp | 8 ++++++-- src/base_cpp/workers_on_demand.hpp | 2 +- src/xpn_server/xpn_server.cpp | 2 +- 5 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/base_cpp/workers.cpp b/src/base_cpp/workers.cpp index f0e2d92aa..eec663957 100644 --- a/src/base_cpp/workers.cpp +++ b/src/base_cpp/workers.cpp @@ -29,13 +29,13 @@ namespace XPN { - std::unique_ptr workers::Create(workers_mode mode) + std::unique_ptr workers::Create(workers_mode mode, bool with_limits) { switch (mode) { case workers_mode::sequential: return std::make_unique(); case workers_mode::thread_pool: return std::make_unique(); - case workers_mode::thread_on_demand: return std::make_unique(); + case workers_mode::thread_on_demand: return std::make_unique(with_limits); default: std::cerr<<"Error: workers mode '"<(mode)<<"' not defined"< task) = 0; virtual void wait_all() = 0; public: - static std::unique_ptr Create(workers_mode mode); + static std::unique_ptr Create(workers_mode mode, bool with_limits = true); }; } // namespace XPN diff --git a/src/base_cpp/workers_on_demand.cpp b/src/base_cpp/workers_on_demand.cpp index 4b0d830a3..484f1df4c 100644 --- a/src/base_cpp/workers_on_demand.cpp +++ b/src/base_cpp/workers_on_demand.cpp @@ -24,9 +24,13 @@ namespace XPN { - workers_on_demand::workers_on_demand() + workers_on_demand::workers_on_demand(bool with_limits) { - m_num_threads = std::thread::hardware_concurrency() * 2; + if (with_limits){ + m_num_threads = std::thread::hardware_concurrency() * 2; + }else{ + m_num_threads = INT32_MAX; + } } workers_on_demand::~workers_on_demand() { diff --git a/src/base_cpp/workers_on_demand.hpp b/src/base_cpp/workers_on_demand.hpp index b43912b9a..34b579272 100644 --- a/src/base_cpp/workers_on_demand.hpp +++ b/src/base_cpp/workers_on_demand.hpp @@ -30,7 +30,7 @@ namespace XPN class workers_on_demand : public workers { public: - workers_on_demand(); + workers_on_demand(bool with_limits); ~workers_on_demand(); std::future launch(std::function task) override; diff --git a/src/xpn_server/xpn_server.cpp b/src/xpn_server/xpn_server.cpp index f56510dc1..0eb0e0918 100644 --- a/src/xpn_server/xpn_server.cpp +++ b/src/xpn_server/xpn_server.cpp @@ -143,7 +143,7 @@ int xpn_server::run() // * Workers initialization debug_info("[TH_ID="< Date: Fri, 8 Nov 2024 12:12:14 +0100 Subject: [PATCH 12/60] Implement various thread to read cq in fabric --- src/base_cpp/fabric.cpp | 34 ++++++++++++++++++++++------------ src/base_cpp/fabric.hpp | 17 +++++++++++------ 2 files changed, 33 insertions(+), 18 deletions(-) diff --git a/src/base_cpp/fabric.cpp b/src/base_cpp/fabric.cpp index 519847e0a..80cba2eb6 100644 --- a/src/base_cpp/fabric.cpp +++ b/src/base_cpp/fabric.cpp @@ -350,9 +350,12 @@ int fabric::init_thread_cq(fabric_ep &fabric_ep) if (!fabric_ep.have_thread) return 0; debug_info("[FABRIC] [init_thread_cq] Start"); - fabric_ep.thread_cq = std::thread([&fabric_ep](){ - run_thread_cq(fabric_ep); - }); + for (int i = 0; i < FABRIC_THREADS; i++) + { + fabric_ep.threads_cq[i].id = std::thread([&fabric_ep, i](){ + run_thread_cq(fabric_ep, i); + }); + } debug_info("[FABRIC] [init_thread_cq] End"); return 0; } @@ -362,25 +365,32 @@ int fabric::destroy_thread_cq(fabric_ep &fabric_ep) if (!fabric_ep.have_thread) return 0; debug_info("[FABRIC] [destroy_thread_cq] Start"); - { - std::lock_guard lock(fabric_ep.thread_cq_mutex); - fabric_ep.thread_cq_is_running = false; + + for (int i = 0; i < FABRIC_THREADS; i++) + { + auto& t = fabric_ep.threads_cq[i]; + { + std::lock_guard lock(t.thread_cq_mutex); + t.thread_cq_is_running = false; + } + t.thread_cq_cv.notify_one(); + t.id.join(); } - fabric_ep.thread_cq_cv.notify_one(); - fabric_ep.thread_cq.join(); + debug_info("[FABRIC] [destroy_thread_cq] End"); return 0; } -int fabric::run_thread_cq(fabric_ep &fabric_ep) +int fabric::run_thread_cq(fabric_ep &fabric_ep, uint32_t id) { int ret = 0; const int comp_count = 8; struct fi_cq_tagged_entry comp[comp_count] = {}; - std::unique_lock lock(fabric_ep.thread_cq_mutex); + auto& t = fabric_ep.threads_cq[id]; + std::unique_lock lock(t.thread_cq_mutex); - while (fabric_ep.thread_cq_is_running) { - if (fabric_ep.thread_cq_cv.wait_for(lock, std::chrono::nanoseconds(1), [&fabric_ep]{ return !fabric_ep.thread_cq_is_running; })) { + while (t.thread_cq_is_running) { + if (t.thread_cq_cv.wait_for(lock, std::chrono::nanoseconds(1), [&t]{ return !t.thread_cq_is_running; })) { break; } // if (fabric_ep.subs_to_wait == 0) { diff --git a/src/base_cpp/fabric.hpp b/src/base_cpp/fabric.hpp index 07acf316f..fa37494c8 100644 --- a/src/base_cpp/fabric.hpp +++ b/src/base_cpp/fabric.hpp @@ -39,6 +39,7 @@ namespace XPN { class fabric { public: constexpr static const uint32_t FABRIC_ANY_RANK = 0xFFFFFFFF; + constexpr static const int FABRIC_THREADS = 10; struct fabric_ep; @@ -75,12 +76,16 @@ class fabric { struct fid_cq *cq = nullptr; std::unordered_map m_comms; + struct thread_cq{ + std::thread id; + std::mutex thread_cq_mutex; + std::condition_variable thread_cq_cv; + bool thread_cq_is_running = true; + }; + bool have_thread = true; - std::thread thread_cq; - std::mutex thread_cq_mutex; - // std::mutex thread_fi_mutex; - std::condition_variable thread_cq_cv; - bool thread_cq_is_running = true; + std::array threads_cq; + std::atomic_uint32_t subs_to_wait = 0; }; @@ -94,7 +99,7 @@ class fabric { private: static int set_hints(fabric_ep &fabric_ep); - static int run_thread_cq(fabric_ep &fabric_ep); + static int run_thread_cq(fabric_ep &fabric_ep, uint32_t id); static fabric_comm& any_comm(fabric_ep &fabric_ep); static int init_thread_cq(fabric_ep &fabric_ep); static int destroy_thread_cq(fabric_ep &fabric_ep); From 4f008138b320faf6481da2c54697a721634b82b6 Mon Sep 17 00:00:00 2001 From: Dariomnz Date: Fri, 8 Nov 2024 12:48:23 +0100 Subject: [PATCH 13/60] Set fabric threads with a env variable --- src/base_cpp/fabric.cpp | 10 +++++++--- src/base_cpp/fabric.hpp | 4 ++-- src/base_cpp/xpn_env.cpp | 6 ++++++ src/base_cpp/xpn_env.hpp | 1 + src/xpn_server/fabric_server/fabric_server_comm.cpp | 2 +- 5 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/base_cpp/fabric.cpp b/src/base_cpp/fabric.cpp index 80cba2eb6..9a87d083f 100644 --- a/src/base_cpp/fabric.cpp +++ b/src/base_cpp/fabric.cpp @@ -350,7 +350,9 @@ int fabric::init_thread_cq(fabric_ep &fabric_ep) if (!fabric_ep.have_thread) return 0; debug_info("[FABRIC] [init_thread_cq] Start"); - for (int i = 0; i < FABRIC_THREADS; i++) + fabric_ep.threads_cq = std::vector(xpn_env::get_instance().xpn_fabric_threads); + + for (size_t i = 0; i < fabric_ep.threads_cq.size(); i++) { fabric_ep.threads_cq[i].id = std::thread([&fabric_ep, i](){ run_thread_cq(fabric_ep, i); @@ -366,7 +368,7 @@ int fabric::destroy_thread_cq(fabric_ep &fabric_ep) debug_info("[FABRIC] [destroy_thread_cq] Start"); - for (int i = 0; i < FABRIC_THREADS; i++) + for (size_t i = 0; i < fabric_ep.threads_cq.size(); i++) { auto& t = fabric_ep.threads_cq[i]; { @@ -376,7 +378,9 @@ int fabric::destroy_thread_cq(fabric_ep &fabric_ep) t.thread_cq_cv.notify_one(); t.id.join(); } - + + fabric_ep.threads_cq.clear(); + debug_info("[FABRIC] [destroy_thread_cq] End"); return 0; } diff --git a/src/base_cpp/fabric.hpp b/src/base_cpp/fabric.hpp index fa37494c8..5d1dc1ea1 100644 --- a/src/base_cpp/fabric.hpp +++ b/src/base_cpp/fabric.hpp @@ -39,7 +39,6 @@ namespace XPN { class fabric { public: constexpr static const uint32_t FABRIC_ANY_RANK = 0xFFFFFFFF; - constexpr static const int FABRIC_THREADS = 10; struct fabric_ep; @@ -84,7 +83,8 @@ class fabric { }; bool have_thread = true; - std::array threads_cq; + // std::array threads_cq; + std::vector threads_cq; std::atomic_uint32_t subs_to_wait = 0; }; diff --git a/src/base_cpp/xpn_env.cpp b/src/base_cpp/xpn_env.cpp index d52113888..3d89b8cc3 100644 --- a/src/base_cpp/xpn_env.cpp +++ b/src/base_cpp/xpn_env.cpp @@ -73,5 +73,11 @@ namespace XPN } // XPN_STATS_DIR xpn_stats_dir = std::getenv("XPN_STATS_DIR"); + + // XPN_FABRIC_THREADS + char *env_fabric_threads = std::getenv("XPN_FABRIC_THREADS"); + if ((env_fabric_threads != NULL) && (std::strlen(env_fabric_threads) > 0)){ + xpn_fabric_threads=atoi(env_fabric_threads); + } } } \ No newline at end of file diff --git a/src/base_cpp/xpn_env.hpp b/src/base_cpp/xpn_env.hpp index 4b8e8e7df..fa619c5c6 100644 --- a/src/base_cpp/xpn_env.hpp +++ b/src/base_cpp/xpn_env.hpp @@ -46,6 +46,7 @@ namespace XPN int xpn_session_connect = 1; int xpn_stats = 0; const char * xpn_stats_dir = nullptr; + int xpn_fabric_threads = 10; public: static xpn_env& get_instance() { diff --git a/src/xpn_server/fabric_server/fabric_server_comm.cpp b/src/xpn_server/fabric_server/fabric_server_comm.cpp index 2b00c06eb..96d68045c 100644 --- a/src/xpn_server/fabric_server/fabric_server_comm.cpp +++ b/src/xpn_server/fabric_server/fabric_server_comm.cpp @@ -163,7 +163,7 @@ int64_t fabric_server_comm::read_operation ( xpn_server_ops &op, int &rank_clien tag_client_id = msg[0]; op = static_cast(msg[1]); - if (op == xpn_server_ops::DISCONNECT) [[unlikely]] { + [[unlikely]] if (op == xpn_server_ops::DISCONNECT) { // Sincronization with client in disconnect uint32_t buff = static_cast(xpn_server_ops::DISCONNECT); int64_t res = write_data(&buff, sizeof(buff), rank_client_id, tag_client_id); From 61a5b4dd055d4732d24726ad5122d781e0d5bc75 Mon Sep 17 00:00:00 2001 From: Dariomnz Date: Tue, 26 Nov 2024 11:07:36 +0100 Subject: [PATCH 14/60] Integrate LFI library --- .gitmodules | 3 + CMakeLists.txt | 4 +- libs/lfi | 1 + src/base_cpp/CMakeLists.txt | 15 +- src/base_cpp/fabric.cpp | 780 ------------------ src/base_cpp/fabric.hpp | 125 --- .../nfi_fabric_server_comm.cpp | 155 +--- .../nfi_fabric_server_comm.hpp | 10 +- .../fabric_server/fabric_server_comm.cpp | 123 +-- .../fabric_server/fabric_server_comm.hpp | 9 +- src/xpn_server/xpn_server_comm.cpp | 2 +- test/CMakeLists.txt | 1 - test/integrity/libfabric/CMakeLists.txt | 5 - test/integrity/libfabric/fabric_test.cpp | 363 -------- 14 files changed, 79 insertions(+), 1517 deletions(-) create mode 160000 libs/lfi delete mode 100644 src/base_cpp/fabric.cpp delete mode 100644 src/base_cpp/fabric.hpp delete mode 100644 test/integrity/libfabric/CMakeLists.txt delete mode 100644 test/integrity/libfabric/fabric_test.cpp diff --git a/.gitmodules b/.gitmodules index 8b9f15888..abac4c6c5 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "docker"] path = docker url = https://github.com/xpn-arcos/xpn-docker.git +[submodule "libs/lfi"] + path = libs/lfi + url = https://github.com/dariomnz/lfi.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 0f8b3f3b6..f29751733 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,7 +34,9 @@ option(ENABLE_FABRIC_SERVER "Enable the fabric_server module" OFF) if(ENABLE_FABRIC_SERVER) message(STATUS "ENABLE_FABRIC_SERVER : ${ENABLE_FABRIC_SERVER}") add_compile_options("-DENABLE_FABRIC_SERVER") - set(FABRIC_PATH ${ENABLE_FABRIC_SERVER}) + set(LIBFABRIC_PATH ${ENABLE_FABRIC_SERVER}) + add_subdirectory(libs/lfi) + link_libraries(lfi) else() message(STATUS "ENABLE_FABRIC_SERVER : false") endif(ENABLE_FABRIC_SERVER) diff --git a/libs/lfi b/libs/lfi new file mode 160000 index 000000000..42e3dab13 --- /dev/null +++ b/libs/lfi @@ -0,0 +1 @@ +Subproject commit 42e3dab1310c8c6e1c1f57cc590599dce4031299 diff --git a/src/base_cpp/CMakeLists.txt b/src/base_cpp/CMakeLists.txt index 9d8a5cd49..f387126ca 100644 --- a/src/base_cpp/CMakeLists.txt +++ b/src/base_cpp/CMakeLists.txt @@ -7,20 +7,7 @@ file(GLOB XPN_BASE_CPP_SOURCE "*.cpp" ) -if("${FABRIC_PATH}" STREQUAL "") - list(FILTER XPN_BASE_CPP_HEADERS EXCLUDE REGEX "fabric") - list(FILTER XPN_BASE_CPP_SOURCE EXCLUDE REGEX "fabric") -endif() - add_library(xpn_base_cpp OBJECT ${XPN_BASE_CPP_SOURCE} ${XPN_BASE_CPP_HEADERS}) target_include_directories(xpn_base_cpp PRIVATE "${PROJECT_SOURCE_DIR}/src" -) - -if(NOT "${FABRIC_PATH}" STREQUAL "") - target_link_libraries(xpn_base_cpp PUBLIC fabric) - target_link_directories(xpn_base_cpp PUBLIC ${FABRIC_PATH}/lib) - target_include_directories(xpn_base_cpp PUBLIC - ${FABRIC_PATH}/include - ) -endif() \ No newline at end of file +) \ No newline at end of file diff --git a/src/base_cpp/fabric.cpp b/src/base_cpp/fabric.cpp deleted file mode 100644 index 9a87d083f..000000000 --- a/src/base_cpp/fabric.cpp +++ /dev/null @@ -1,780 +0,0 @@ - -/* - * Copyright 2020-2024 Felix Garcia Carballeira, Diego Camarmas Alonso, Alejandro Calderon Mateos, Dario Muñoz Muñoz - * - * This file is part of Expand. - * - * Expand is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Expand is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Expand. If not, see . - * - */ - -// #define DEBUG -#include "base_cpp/fabric.hpp" -#include "base_cpp/debug.hpp" - -namespace XPN -{ - -std::mutex fabric::s_mutex; - - -void print_flags(uint64_t flags) { - debug_info(" Flags set:"); - - if (flags & FI_MSG) { debug_info(" FI_MSG"); } - if (flags & FI_RMA) { debug_info(" FI_RMA"); } - if (flags & FI_TAGGED) { debug_info(" FI_TAGGED"); } - if (flags & FI_ATOMIC) { debug_info(" FI_ATOMIC"); } - if (flags & FI_MULTICAST) { debug_info(" FI_MULTICAST"); } - if (flags & FI_COLLECTIVE) { debug_info(" FI_COLLECTIVE"); } - - if (flags & FI_READ) { debug_info(" FI_READ"); } - if (flags & FI_WRITE) { debug_info(" FI_WRITE"); } - if (flags & FI_RECV) { debug_info(" FI_RECV"); } - if (flags & FI_SEND) { debug_info(" FI_SEND"); } - if (flags & FI_REMOTE_READ) { debug_info(" FI_REMOTE_READ"); } - if (flags & FI_REMOTE_WRITE) { debug_info(" FI_REMOTE_WRITE"); } - - if (flags & FI_MULTI_RECV) { debug_info(" FI_MULTI_RECV"); } - if (flags & FI_REMOTE_CQ_DATA) { debug_info(" FI_REMOTE_CQ_DATA"); } - if (flags & FI_MORE) { debug_info(" FI_MORE"); } - if (flags & FI_PEEK) { debug_info(" FI_PEEK"); } - if (flags & FI_TRIGGER) { debug_info(" FI_TRIGGER"); } - if (flags & FI_FENCE) { debug_info(" FI_FENCE"); } - // if (flags & FI_PRIORITY) { debug_info(" FI_PRIORITY"); } - - if (flags & FI_COMPLETION) { debug_info(" FI_COMPLETION"); } - if (flags & FI_INJECT) { debug_info(" FI_INJECT"); } - if (flags & FI_INJECT_COMPLETE) { debug_info(" FI_INJECT_COMPLETE"); } - if (flags & FI_TRANSMIT_COMPLETE) { debug_info(" FI_TRANSMIT_COMPLETE"); } - if (flags & FI_DELIVERY_COMPLETE) { debug_info(" FI_DELIVERY_COMPLETE"); } - if (flags & FI_AFFINITY) { debug_info(" FI_AFFINITY"); } - if (flags & FI_COMMIT_COMPLETE) { debug_info(" FI_COMMIT_COMPLETE"); } - if (flags & FI_MATCH_COMPLETE) { debug_info(" FI_MATCH_COMPLETE"); } - - if (flags & FI_HMEM) { debug_info(" FI_HMEM"); } - if (flags & FI_VARIABLE_MSG) { debug_info(" FI_VARIABLE_MSG"); } - if (flags & FI_RMA_PMEM) { debug_info(" FI_RMA_PMEM"); } - if (flags & FI_SOURCE_ERR) { debug_info(" FI_SOURCE_ERR"); } - if (flags & FI_LOCAL_COMM) { debug_info(" FI_LOCAL_COMM"); } - if (flags & FI_REMOTE_COMM) { debug_info(" FI_REMOTE_COMM"); } - if (flags & FI_SHARED_AV) { debug_info(" FI_SHARED_AV"); } - if (flags & FI_PROV_ATTR_ONLY) { debug_info(" FI_PROV_ATTR_ONLY"); } - if (flags & FI_NUMERICHOST) { debug_info(" FI_NUMERICHOST"); } - if (flags & FI_RMA_EVENT) { debug_info(" FI_RMA_EVENT"); } - if (flags & FI_SOURCE) { debug_info(" FI_SOURCE"); } - if (flags & FI_NAMED_RX_CTX) { debug_info(" FI_NAMED_RX_CTX"); } - if (flags & FI_DIRECTED_RECV) { debug_info(" FI_DIRECTED_RECV"); } -} - -void print_fi_cq_tagged_entry(const fi_cq_tagged_entry& entry) { - debug_info("fi_cq_tagged_entry:"); - debug_info(" op_context: " << entry.op_context); - print_flags(entry.flags); - // debug_info(" flags: " << entry.flags); - debug_info(" len: " << entry.len); - debug_info(" buf: " << entry.buf); - debug_info(" data: " << entry.data); - debug_info(" tag: " << entry.tag); - // debug_info(" olen: " << entry.olen); - // debug_info(" err: " << entry.err); - // debug_info(" prov_errno: " << entry.prov_errno); - // debug_info(" err_data: " << entry.err_data); - // debug_info(" err_data_size: " << entry.err_data_size); -} - -int fabric::set_hints( fabric_ep &fabric_ep ) -{ - fabric_ep.hints = fi_allocinfo(); - if (!fabric_ep.hints) - return -FI_ENOMEM; - - /* - * Request FI_EP_RDM (reliable datagram) endpoint which will allow us - * to reliably send messages to peers without having to - * listen/connect/accept. - */ - fabric_ep.hints->ep_attr->type = FI_EP_RDM; - - /* - * Request basic messaging capabilities from the provider (no tag - * matching, no RMA, no atomic operations) - */ - fabric_ep.hints->caps = FI_MSG | FI_TAGGED; - - /* - * Default to FI_DELIVERY_COMPLETE which will make sure completions do - * not get generated until our message arrives at the destination. - * Otherwise, the client might get a completion and exit before the - * server receives the message. This is to make the test simpler. - */ - fabric_ep.hints->tx_attr->op_flags = FI_DELIVERY_COMPLETE; - - /* - * Set the mode bit to 0. Mode bits are used to convey requirements - * that an application must adhere to when using the fabric interfaces. - * Modes specify optimal ways of accessing the reported endpoint or - * domain. On input to fi_getinfo, applications set the mode bits that - * they support. - */ - fabric_ep.hints->mode = FI_CONTEXT; - - /* - * Set mr_mode to 0. mr_mode is used to specify the type of memory - * registration capabilities the application requires. In this example - * we are not using memory registration so this bit will be set to 0. - */ - // hints->domain_attr->mr_mode = 0; - - // hints->domain_attr->threading = FI_THREAD_SAFE; - - /* Done setting hints */ - - return 0; -} - -int fabric::init ( fabric_ep &fabric_ep, bool have_threads ) -{ - int ret; - struct fi_cq_attr cq_attr = {}; - struct fi_av_attr av_attr = {}; - - debug_info("[FABRIC] [fabric_init] Start"); - - std::unique_lock lock(s_mutex); - - fabric_ep.have_thread = have_threads; - /* - * The first libfabric call to happen for initialization is fi_getinfo - * which queries libfabric and returns any appropriate providers that - * fulfill the hints requirements. Any applicable providers will be - * returned as a list of fi_info structs (&info). Any info can be - * selected. In this test we select the first fi_info struct. Assuming - * all hints were set appropriately, the first fi_info should be most - * appropriate. The flag FI_SOURCE is set for the server to indicate - * that the address/port refer to source information. This is not set - * for the client because the fields refer to the server, not the - * caller (client). - */ - set_hints(fabric_ep); - - ret = fi_getinfo(fi_version(), NULL, NULL, 0, - fabric_ep.hints, &fabric_ep.info); - - debug_info("[FABRIC] [fabric_init] fi_getinfo = "<fabric_attr, &fabric_ep.fabric, NULL); - debug_info("[FABRIC] [fabric_init] fi_fabric = "<fid, 0); - debug_info("[FABRIC] [fabric_init] fi_scalable_ep_bind = "<tx_attr->caps |= FI_MSG; - fabric_ep.info->tx_attr->caps |= FI_NAMED_RX_CTX; /* Required for scalable endpoints indexing */ - ret = fi_tx_context(fabric_ep.ep, 0, fabric_ep.info->tx_attr, &fabric_ep.tx_ep, NULL); - debug_info("[FABRIC] [fabric_init] fi_tx_context tx_ep = "<fid, FI_SEND); - debug_info("[FABRIC] [fabric_init] fi_ep_bind tx_ep = "<rx_attr->caps |= FI_MSG; - fabric_ep.info->rx_attr->caps |= FI_NAMED_RX_CTX; /* Required for scalable endpoints indexing */ - ret = fi_rx_context(fabric_ep.ep, 0, fabric_ep.info->rx_attr, &fabric_ep.rx_ep, NULL); - debug_info("[FABRIC] [fabric_init] fi_rx_context rx_ep = "<fid, FI_RECV); - debug_info("[FABRIC] [fabric_init] fi_ep_bind rx_ep = "<(xpn_env::get_instance().xpn_fabric_threads); - - for (size_t i = 0; i < fabric_ep.threads_cq.size(); i++) - { - fabric_ep.threads_cq[i].id = std::thread([&fabric_ep, i](){ - run_thread_cq(fabric_ep, i); - }); - } - debug_info("[FABRIC] [init_thread_cq] End"); - return 0; -} - -int fabric::destroy_thread_cq(fabric_ep &fabric_ep) -{ - if (!fabric_ep.have_thread) return 0; - - debug_info("[FABRIC] [destroy_thread_cq] Start"); - - for (size_t i = 0; i < fabric_ep.threads_cq.size(); i++) - { - auto& t = fabric_ep.threads_cq[i]; - { - std::lock_guard lock(t.thread_cq_mutex); - t.thread_cq_is_running = false; - } - t.thread_cq_cv.notify_one(); - t.id.join(); - } - - fabric_ep.threads_cq.clear(); - - debug_info("[FABRIC] [destroy_thread_cq] End"); - return 0; -} - -int fabric::run_thread_cq(fabric_ep &fabric_ep, uint32_t id) -{ - int ret = 0; - const int comp_count = 8; - struct fi_cq_tagged_entry comp[comp_count] = {}; - auto& t = fabric_ep.threads_cq[id]; - std::unique_lock lock(t.thread_cq_mutex); - - while (t.thread_cq_is_running) { - if (t.thread_cq_cv.wait_for(lock, std::chrono::nanoseconds(1), [&t]{ return !t.thread_cq_is_running; })) { - break; - } - // if (fabric_ep.subs_to_wait == 0) { - // fabric_ep.thread_cq_cv.wait(lock, [&fabric_ep]{ return fabric_ep.subs_to_wait != 0 || !fabric_ep.thread_cq_is_running; }); - // } - // if (!fabric_ep.thread_cq_is_running) break; - // if (fabric_ep.thread_cq_cv.wait_for(lock, std::chrono::nanoseconds(1), [&fabric_ep]{ return !fabric_ep.thread_cq_is_running; })) { - // break; - // } - - // if (fabric_ep.subs_to_wait == 0) { continue; } - // { - // std::unique_lock lock(fabric_ep.thread_fi_mutex); - ret = fi_cq_read(fabric_ep.cq, comp, comp_count); - // ret = fi_cq_read(fabric_ep.cq, &comp[0], 8); - // } - if (ret == -FI_EAGAIN){ continue; } - - //TODO: handle error - if (ret < 0) { continue; } - - // Handle the cq entries - for (int i = 0; i < ret; i++) - { - fabric_context* context = static_cast(comp[i].op_context); - fabric_comm &comm = fabric_ep.m_comms[context->rank]; - context->entry = comp[i]; - - { - std::unique_lock lock(comm.comm_mutex); - if (comp[i].flags & FI_SEND) { - debug_info("[FABRIC] [run_thread_cq] Send cq of rank_peer "<rank); - } - if (comp[i].flags & FI_RECV) { - debug_info("[FABRIC] [run_thread_cq] Recv cq of rank_peer "<rank); - } - - // print_fi_cq_err_entry(comp); - // fabric_ep.subs_to_wait--; - comm.wait_context = false; - comm.comm_cv.notify_one(); - } - } - } - return ret; -} - -fabric::fabric_comm& fabric::new_comm ( fabric_ep &fabric_ep ) -{ - static uint32_t rank_counter = 0; - - debug_info("[FABRIC] [fabric_new_comm] Start"); - std::unique_lock lock(s_mutex); - - auto[key, inserted] = fabric_ep.m_comms.emplace(std::piecewise_construct, - std::forward_as_tuple(rank_counter), - std::forward_as_tuple()); - key->second.m_ep = &fabric_ep; - key->second.rank_peer = rank_counter; - rank_counter++; - debug_info("[FABRIC] [fabric_new_comm] rank_peer "<second.rank_peer); - debug_info("[FABRIC] [fabric_new_comm] End"); - return key->second; -} - -fabric::fabric_comm& fabric::any_comm ( fabric_ep &fabric_ep ) -{ - debug_info("[FABRIC] [any_comm] Start"); - // std::unique_lock lock(s_mutex); - - auto[key, inserted] = fabric_ep.m_comms.emplace(std::piecewise_construct, - std::forward_as_tuple(FABRIC_ANY_RANK), - std::forward_as_tuple()); - key->second.m_ep = &fabric_ep; - key->second.rank_peer = FABRIC_ANY_RANK; - key->second.fi_addr = FI_ADDR_UNSPEC; - debug_info("[FABRIC] [any_comm] End"); - return key->second; -} - -fabric::fabric_comm& fabric::get_any_rank_comm(fabric_ep &fabric_ep) -{ - return fabric_ep.m_comms[FABRIC_ANY_RANK]; -} - - -int fabric::get_addr( fabric_ep &fabric_ep, char * out_addr, size_t &size_addr ) -{ - int ret = -1; - debug_info("[FABRIC] [fabric_get_addr] Start"); - ret = fi_getname(&fabric_ep.ep->fid, out_addr, &size_addr); - if (ret) { - printf("fi_getname error %d\n", ret); - return ret; - } - debug_info("[FABRIC] [fabric_get_addr] End = "< lock(fabric_comm.comm_mutex); - fabric_comm.comm_cv.wait(lock, [&fabric_comm]{ return !fabric_comm.wait_context; }); - fabric_comm.wait_context = true; - }else{ - debug_info("[FABRIC] [wait] Without threads"); - std::unique_lock lock(fabric_comm.comm_mutex); - - int ret = 0; - const int comp_count = 8; - fi_cq_tagged_entry comp[comp_count] = {}; - while (fabric_comm.wait_context) - { - ret = fi_cq_read(fabric_ep.cq, &comp, comp_count); - - if (ret == -FI_EAGAIN){ - // std::this_thread::yield(); - continue; - } - - //TODO: handle error - if (ret < 0) { - print("Error in fi_cq_read "<(comp[i].op_context); - context->entry = comp[i]; - if (comp[i].flags & FI_SEND) { - debug_info("[FABRIC] [wait] Send cq of rank_peer "<rank); - } - if (comp[i].flags & FI_RECV) { - debug_info("[FABRIC] [wait] Recv cq of rank_peer "<rank); - } - print_fi_cq_tagged_entry(comp[i]); - fabric_ep.m_comms[context->rank].wait_context = false; - // fabric_ep.m_comms[context->rank].comm_cv.notify_one(); - } - } - fabric_comm.wait_context = true; - } -} - -fabric::fabric_msg fabric::send ( fabric_ep &fabric_ep, fabric_comm& fabric_comm, const void * buffer, size_t size, uint32_t tag ) -{ - int ret; - fabric_msg msg = {}; - - // tag format 24 bits rank_peer 24 bits rank_self_in_peer 16 bits tag - uint64_t aux_rank_peer = fabric_comm.rank_peer; - uint64_t aux_rank_self_in_peer = fabric_comm.rank_self_in_peer; - uint64_t aux_tag = tag; - uint64_t tag_send = (aux_rank_peer << 40) | (aux_rank_self_in_peer << 16) | aux_tag; - - fabric_comm.context.rank = fabric_comm.rank_peer; - - debug_info("[FABRIC] [fabric_send] Start size "< fabric_ep.info->tx_attr->inject_size){ - do { - ret = fi_tsend(fabric_ep.tx_ep, buffer, size, NULL, fabric_comm.fi_addr, tag_send, &fabric_comm.context); - - if (ret == -FI_EAGAIN) - (void) fi_cq_read(fabric_ep.cq, NULL, 0); - } while (ret == -FI_EAGAIN); - - if (ret){ - printf("error posting send buffer (%d)\n", ret); - msg.error = -1; - return msg; - } - - debug_info("[FABRIC] [fabric_send] Waiting on rank_peer "<> 40; - msg.rank_self_in_peer = (tag_send & 0x0000'00FF'FFFF'0000) >> 16; - - debug_info("[FABRIC] [fabric_send] msg size "<> 40; - msg.rank_peer = (fabric_comm.context.entry.tag & 0x0000'00FF'FFFF'0000) >> 16; - - debug_info("[FABRIC] [fabric_recv] msg size "< lock(s_mutex); - - remove_addr(fabric_ep, fabric_comm); - - fabric_ep.m_comms.erase(fabric_comm.rank_peer); - - debug_info("[FABRIC] [fabric_close_comm] End = "< lock(s_mutex); - - destroy_thread_cq(fabric_ep); - - debug_info("[FABRIC] [fabric_close_comm] Close tx_context"); - if (fabric_ep.tx_ep){ - ret = fi_close(&fabric_ep.tx_ep->fid); - if (ret) - printf("warning: error closing tx_context (%d)\n", ret); - fabric_ep.tx_ep = nullptr; - } - - debug_info("[FABRIC] [fabric_close_comm] Close rx_context"); - if (fabric_ep.rx_ep){ - ret = fi_close(&fabric_ep.rx_ep->fid); - if (ret) - printf("warning: error closing rx_context (%d)\n", ret); - fabric_ep.rx_ep = nullptr; - } - - debug_info("[FABRIC] [fabric_close_comm] Close endpoint"); - if (fabric_ep.ep){ - ret = fi_close(&fabric_ep.ep->fid); - if (ret) - printf("warning: error closing EP (%d)\n", ret); - fabric_ep.ep = nullptr; - } - - debug_info("[FABRIC] [fabric_close_comm] Close address vector"); - if (fabric_ep.av){ - ret = fi_close(&fabric_ep.av->fid); - if (ret) - printf("warning: error closing AV (%d)\n", ret); - fabric_ep.av = nullptr; - } - - debug_info("[FABRIC] [fabric_close_comm] Close completion queue"); - if (fabric_ep.cq){ - ret = fi_close(&fabric_ep.cq->fid); - if (ret) - printf("warning: error closing CQ (%d)\n", ret); - fabric_ep.cq = nullptr; - } - - debug_info("[FABRIC] [fabric_destroy] Close domain"); - if (fabric_ep.domain){ - ret = fi_close(&fabric_ep.domain->fid); - if (ret) - printf("warning: error closing domain (%d)\n", ret); - fabric_ep.domain = nullptr; - } - - debug_info("[FABRIC] [fabric_destroy] Close fabric"); - if (fabric_ep.fabric){ - ret = fi_close(&fabric_ep.fabric->fid); - if (ret) - printf("warning: error closing fabric (%d)\n", ret); - fabric_ep.fabric = nullptr; - } - - debug_info("[FABRIC] [fabric_destroy] Free hints "); - if (fabric_ep.hints){ - fi_freeinfo(fabric_ep.hints); - fabric_ep.hints = nullptr; - } - - debug_info("[FABRIC] [fabric_destroy] Free info "); - if (fabric_ep.info){ - fi_freeinfo(fabric_ep.info); - fabric_ep.info = nullptr; - } - - - debug_info("[FABRIC] [fabric_destroy] End = "<. - * - */ - -#pragma once - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -namespace XPN { - -class fabric { -public: - constexpr static const uint32_t FABRIC_ANY_RANK = 0xFFFFFFFF; - - struct fabric_ep; - - struct fabric_context{ - // context necesary for fabric interface - struct fi_context context; - uint32_t rank; - struct fi_cq_tagged_entry entry; - }; - - struct fabric_comm{ - uint32_t rank_peer; - uint32_t rank_self_in_peer; - - fi_addr_t fi_addr; - - fabric_ep *m_ep; - - std::mutex comm_mutex; - std::condition_variable comm_cv; - std::atomic_bool wait_context = true; - fabric_context context; - }; - - struct fabric_ep { - struct fi_info *hints = nullptr; - struct fi_info *info = nullptr; - struct fid_fabric *fabric = nullptr; - struct fid_domain *domain = nullptr; - struct fid_ep *ep = nullptr; - struct fid_ep *rx_ep = nullptr; - struct fid_ep *tx_ep = nullptr; - struct fid_av *av = nullptr; - struct fid_cq *cq = nullptr; - std::unordered_map m_comms; - - struct thread_cq{ - std::thread id; - std::mutex thread_cq_mutex; - std::condition_variable thread_cq_cv; - bool thread_cq_is_running = true; - }; - - bool have_thread = true; - // std::array threads_cq; - std::vector threads_cq; - - std::atomic_uint32_t subs_to_wait = 0; - }; - - struct fabric_msg{ - uint64_t size = 0; - uint32_t rank_peer = 0; - uint32_t rank_self_in_peer = 0; - uint32_t tag = 0; - int32_t error = 0; - }; - -private: - static int set_hints(fabric_ep &fabric_ep); - static int run_thread_cq(fabric_ep &fabric_ep, uint32_t id); - static fabric_comm& any_comm(fabric_ep &fabric_ep); - static int init_thread_cq(fabric_ep &fabric_ep); - static int destroy_thread_cq(fabric_ep &fabric_ep); -public: - static int init(fabric_ep &fabric, bool have_threads = true); - - static int destroy(fabric_ep &fabric_ep); - - static fabric_comm& new_comm(fabric_ep &fabric_ep); - static fabric_comm& get_any_rank_comm(fabric_ep &fabric_ep); - static int close (fabric_ep& fabric_ep, fabric_comm &fabric_comm); - - static int get_addr(fabric_ep &fabric_ep, char *out_addr, size_t &size_addr); - static int register_addr(fabric_ep &fabric_ep, fabric_comm& fabric_comm, char * addr_buf); - static int remove_addr(fabric_ep &fabric_ep, fabric_comm& fabric_comm); - static void wait(fabric_ep &fabric_ep, fabric_comm &fabric_comm); - static fabric_msg send(fabric_ep &fabric_ep, fabric_comm& fabric_comm, const void * buffer, size_t size, uint32_t tag); - static fabric_msg recv(fabric_ep &fabric_ep, fabric_comm& fabric_comm, void *buffer, size_t size, uint32_t tag); - - static std::mutex s_mutex; -}; - -} // namespace XPN \ No newline at end of file diff --git a/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.cpp b/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.cpp index c09387852..da339cbdd 100644 --- a/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.cpp +++ b/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.cpp @@ -26,148 +26,60 @@ #include "base_cpp/ns.hpp" #include #include +#include "lfi.h" namespace XPN { -fabric::fabric_ep nfi_fabric_server_control_comm::m_ep; - -nfi_fabric_server_control_comm::nfi_fabric_server_control_comm () -{ - debug_info("[NFI_FABRIC_SERVER_CONTROL_COMM] [nfi_fabric_server_control_comm] >> Begin"); - if (m_ep.ep == nullptr){ - int xpn_thread = xpn_env::get_instance().xpn_thread; - fabric::init(m_ep, xpn_thread); - } - - debug_info("[NFI_FABRIC_SERVER_CONTROL_COMM] [nfi_fabric_server_control_comm] >> End"); -} - -nfi_fabric_server_control_comm::~nfi_fabric_server_control_comm() -{ - debug_info("[NFI_FABRIC_SERVER_CONTROL_COMM] [~nfi_fabric_server_control_comm] >> Begin"); - - if (m_ep.ep != nullptr && m_ep.m_comms.size() == 1) - fabric::destroy(m_ep); - - debug_info("[NFI_FABRIC_SERVER_CONTROL_COMM] [~nfi_fabric_server_control_comm] >> End"); -} - nfi_xpn_server_comm* nfi_fabric_server_control_comm::connect ( const std::string &srv_name ) { int ret; int connection_socket; - char port_name[MAX_PORT_NAME]; - debug_info("[NFI_FABRIC_SERVER_CONTROL_COMM] [nfi_fabric_server_control_comm_connect] >> Begin\n"); - - fabric::fabric_comm& new_fabric_comm = fabric::new_comm(m_ep); + debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_connect] >> Begin"); // Lookup port name - ret = socket::client_connect(srv_name, socket::get_xpn_port() ,connection_socket); + ret = socket::client_connect(srv_name, socket::get_xpn_port(), connection_socket); if (ret < 0) { - debug_error("[NFI_FABRIC_SERVER_CONTROL_COMM] [nfi_fabric_server_control_comm_connect] ERROR: socket connect\n"); + debug_error("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_connect] ERROR: socket connect\n"); return nullptr; } int buffer = socket::ACCEPT_CODE; ret = socket::send(connection_socket, &buffer, sizeof(buffer)); if (ret < 0) { - debug_error("[NFI_FABRIC_SERVER_CONTROL_COMM] [nfi_fabric_server_control_comm_connect] ERROR: socket send\n"); + debug_error("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_connect] ERROR: socket send\n"); socket::close(connection_socket); return nullptr; } ret = socket::recv(connection_socket, port_name, MAX_PORT_NAME); if (ret < 0) { - debug_error("[NFI_FABRIC_SERVER_CONTROL_COMM] [nfi_fabric_server_control_comm_connect] ERROR: socket read\n"); - socket::close(connection_socket); - return nullptr; - } - - // First recv the server address - size_t ad_len = MAX_PORT_NAME; - char ad_buff[MAX_PORT_NAME]; - ret = socket::recv(connection_socket, &ad_len, sizeof(ad_len)); - if (ret < 0){ - print("[Server="<read_data(&buff, sizeof(buff)); - if (ret < 0) { - printf("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_disconnect] ERROR: nfi_fabric_server_comm_write_operation fails"); - } - // Disconnect debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_disconnect] Disconnect"); - - ret = fabric::close(m_ep, in_comm->m_comm); + + ret = lfi_client_close(in_comm->m_comm); if (ret < 0) { printf("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_disconnect] ERROR: MPI_Comm_disconnect fails"); } @@ -204,7 +109,7 @@ void nfi_fabric_server_control_comm::disconnect(nfi_xpn_server_comm *comm) } int64_t nfi_fabric_server_comm::write_operation(xpn_server_ops op) { - fabric::fabric_msg ret; + int ret; int msg[2]; debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_write_operation] >> Begin"); @@ -216,8 +121,8 @@ int64_t nfi_fabric_server_comm::write_operation(xpn_server_ops op) { // Send message debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_write_operation] Write operation send tag "<< msg[0]); - ret = fabric::send(*m_comm.m_ep, m_comm, msg, sizeof(msg), 0); - if (ret.error < 0) { + ret = lfi_tsend(m_comm, msg, sizeof(msg), 0); + if (ret < 0) { debug_error("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_write_operation] ERROR: socket::send < 0 : "<< ret); return -1; } @@ -229,7 +134,7 @@ int64_t nfi_fabric_server_comm::write_operation(xpn_server_ops op) { } int64_t nfi_fabric_server_comm::write_data(const void *data, int64_t size) { - fabric::fabric_msg ret; + int ret; debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_write_data] >> Begin"); @@ -247,19 +152,20 @@ int64_t nfi_fabric_server_comm::write_data(const void *data, int64_t size) { // Send message debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_write_data] Write data"); - ret = fabric::send(*m_comm.m_ep, m_comm, data, size, tag); - if (ret.error < 0) { - printf("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_write_data] ERROR: MPI_Send fails"); + ret = lfi_tsend(m_comm, data, size, tag); + if (ret < 0) { + printf("[NFI_MPI_SERVER_COMM] [nfi_mpi_server_comm_write_data] ERROR: MPI_Send fails"); + size = 0; } debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_write_data] << End"); // Return bytes written - return ret.size; + return size; } int64_t nfi_fabric_server_comm::read_data(void *data, ssize_t size) { - fabric::fabric_msg ret; + int ret; debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_read_data] >> Begin"); @@ -277,15 +183,16 @@ int64_t nfi_fabric_server_comm::read_data(void *data, ssize_t size) { // Get message debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_read_data] Read data"); - ret = fabric::recv(*m_comm.m_ep, m_comm, data, size, tag); - if (ret.error < 0) { + ret = lfi_trecv(m_comm, data, size, tag); + if (ret < 0) { printf("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_read_data] ERROR: MPI_Recv fails"); + size = 0; } debug_info("[NFI_FABRIC_SERVER_COMM] [nfi_fabric_server_comm_read_data] << End"); // Return bytes read - return ret.size; + return size; } } //namespace XPN diff --git a/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.hpp b/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.hpp index fc8c1547c..a24e83770 100644 --- a/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.hpp +++ b/src/xpn_client/nfi/nfi_fabric_server/nfi_fabric_server_comm.hpp @@ -25,7 +25,6 @@ #include #include "nfi/nfi_xpn_server_comm.hpp" -#include "base_cpp/fabric.hpp" namespace XPN { @@ -33,26 +32,25 @@ namespace XPN class nfi_fabric_server_comm : public nfi_xpn_server_comm { public: - nfi_fabric_server_comm(fabric::fabric_comm& comm) : m_comm(comm) {} + nfi_fabric_server_comm(int& comm) : m_comm(comm) {} int64_t write_operation(xpn_server_ops op) override; int64_t read_data(void *data, int64_t size) override; int64_t write_data(const void *data, int64_t size) override; public: - fabric::fabric_comm& m_comm; + int m_comm; }; class nfi_fabric_server_control_comm : public nfi_xpn_server_control_comm { public: - nfi_fabric_server_control_comm(); - ~nfi_fabric_server_control_comm(); + nfi_fabric_server_control_comm() = default; + ~nfi_fabric_server_control_comm() = default; nfi_xpn_server_comm* connect(const std::string &srv_name) override; void disconnect(nfi_xpn_server_comm* comm) override; private: - static fabric::fabric_ep m_ep; }; } // namespace XPN diff --git a/src/xpn_server/fabric_server/fabric_server_comm.cpp b/src/xpn_server/fabric_server/fabric_server_comm.cpp index 96d68045c..64d918c2b 100644 --- a/src/xpn_server/fabric_server/fabric_server_comm.cpp +++ b/src/xpn_server/fabric_server/fabric_server_comm.cpp @@ -26,14 +26,24 @@ #include "base_cpp/socket.hpp" #include "base_c/filesystem.h" #include +#include "lfi.h" namespace XPN { -fabric_server_control_comm::fabric_server_control_comm (xpn_server_params ¶ms) +fabric_server_control_comm::fabric_server_control_comm () { debug_info("[Server="<> Begin"); - fabric::init(m_ep, params.have_threads()); + int port = 0; + m_server_comm = lfi_server_create(NULL, &port); + + m_port_name = std::to_string(port); + + if (m_server_comm < 0) + { + print("[Server="<> End"); } @@ -42,7 +52,7 @@ fabric_server_control_comm::~fabric_server_control_comm() { debug_info("[Server="<> Begin"); - fabric::destroy(m_ep); + lfi_server_close(m_server_comm); debug_info("[Server="<> End"); } @@ -59,72 +69,12 @@ xpn_server_comm* fabric_server_control_comm::accept ( int socket ) return nullptr; } - fabric::fabric_comm &new_comm = fabric::new_comm(m_ep); - - // First send the server address - size_t ad_len = MAX_PORT_NAME; - char ad_buff[MAX_PORT_NAME]; - ret = fabric::get_addr(m_ep, ad_buff, ad_len); - if (ret < 0){ - print("[Server="<(comm); - fabric::close(m_ep, in_comm->m_comm); + lfi_client_close(in_comm->m_comm); delete comm; @@ -147,32 +97,23 @@ void fabric_server_control_comm::disconnect ( xpn_server_comm* comm ) int64_t fabric_server_comm::read_operation ( xpn_server_ops &op, int &rank_client_id, int &tag_client_id ) { - fabric::fabric_msg ret = {}; int msg[2] = {}; + int ret = 0; debug_info("[Server="<> Begin"); // Get message debug_info("[Server="<(msg[1]); - [[unlikely]] if (op == xpn_server_ops::DISCONNECT) { - // Sincronization with client in disconnect - uint32_t buff = static_cast(xpn_server_ops::DISCONNECT); - int64_t res = write_data(&buff, sizeof(buff), rank_client_id, tag_client_id); - if (res < 0) { - debug_warning("[Server="<> Begin"); @@ -196,22 +137,21 @@ int64_t fabric_server_comm::read_data ( void *data, int64_t size, [[maybe_unused // Get message debug_info("[Server="<> Begin"); @@ -227,16 +167,15 @@ int64_t fabric_server_comm::write_data ( const void *data, int64_t size, [[maybe // Send message debug_info("[Server="< #include "xpn_server/xpn_server_comm.hpp" -#include "base_cpp/fabric.hpp" namespace XPN { @@ -33,26 +32,26 @@ namespace XPN class fabric_server_comm : public xpn_server_comm { public: - fabric_server_comm(fabric::fabric_comm& comm) : m_comm(comm) {} + fabric_server_comm(int comm) : m_comm(comm) {} ~fabric_server_comm() override {} int64_t read_operation(xpn_server_ops &op, int &rank_client_id, int &tag_client_id) override; int64_t read_data(void *data, int64_t size, int rank_client_id, int tag_client_id) override; int64_t write_data(const void *data, int64_t size, int rank_client_id, int tag_client_id) override; public: - fabric::fabric_comm& m_comm; + int m_comm; }; class fabric_server_control_comm : public xpn_server_control_comm { public: - fabric_server_control_comm(xpn_server_params ¶ms); + fabric_server_control_comm(); ~fabric_server_control_comm() override; xpn_server_comm* accept(int socket) override; void disconnect(xpn_server_comm *comm) override; private: - fabric::fabric_ep m_ep; + int m_server_comm; }; } // namespace XPN diff --git a/src/xpn_server/xpn_server_comm.cpp b/src/xpn_server/xpn_server_comm.cpp index c111531f8..002a824f6 100644 --- a/src/xpn_server/xpn_server_comm.cpp +++ b/src/xpn_server/xpn_server_comm.cpp @@ -38,7 +38,7 @@ namespace XPN case XPN_SERVER_TYPE_SCK: return std::make_unique(); case XPN_SERVER_TYPE_FABRIC: - return std::make_unique(params); + return std::make_unique(); default: fprintf(stderr, "[XPN_SERVER] [xpn_server_control_comm] server_type '%d' not recognized\n", params.server_type); } return nullptr; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 78268ee2c..6b03c79e6 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,6 +1,5 @@ add_subdirectory(integrity/bypass_c) add_subdirectory(integrity/xpn_metadata) -add_subdirectory(integrity/libfabric) add_subdirectory(performance/xpn) add_subdirectory(performance/xpn-fault-tolerant) \ No newline at end of file diff --git a/test/integrity/libfabric/CMakeLists.txt b/test/integrity/libfabric/CMakeLists.txt deleted file mode 100644 index 88907ec9c..000000000 --- a/test/integrity/libfabric/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ - -add_executable(fabric_test fabric_test.cpp) -target_link_libraries(fabric_test xpn) - -install(TARGETS fabric_test RUNTIME DESTINATION test/integrity/libfabric) \ No newline at end of file diff --git a/test/integrity/libfabric/fabric_test.cpp b/test/integrity/libfabric/fabric_test.cpp deleted file mode 100644 index 0f2cba5a8..000000000 --- a/test/integrity/libfabric/fabric_test.cpp +++ /dev/null @@ -1,363 +0,0 @@ - -/* - * Copyright 2020-2024 Felix Garcia Carballeira, Diego Camarmas Alonso, Alejandro Calderon Mateos, Dario Muñoz Muñoz - * - * This file is part of Expand. - * - * Expand is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Expand is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Expand. If not, see . - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace XPN; - -#define BUF_SIZE 1024 * 1024 - -char *src_addr = NULL, *dst_addr = NULL; -const char *oob_port = "9228"; -int listen_sock, oob_sock; - -std::vector buf(BUF_SIZE); -std::vector msg(BUF_SIZE); - -fabric::fabric_ep fabric_ep; -fabric::fabric_comm* fabric_comm; - -static int sock_listen(char *node, const char *service) -{ - struct addrinfo *ai, hints; - int val, ret; - - memset(&hints, 0, sizeof hints); - hints.ai_flags = AI_PASSIVE; - - ret = getaddrinfo(node, service, &hints, &ai); - if (ret) { - printf("getaddrinfo() %s\n", gai_strerror(ret)); - return ret; - } - - listen_sock = socket(ai->ai_family, SOCK_STREAM, 0); - if (listen_sock < 0) { - printf("socket error"); - ret = listen_sock; - goto out; - } - - val = 1; - ret = setsockopt(listen_sock, SOL_SOCKET, SO_REUSEADDR, - (void *) &val, sizeof val); - if (ret) { - printf("setsockopt SO_REUSEADDR"); - goto out; - } - - ret = bind(listen_sock, ai->ai_addr, ai->ai_addrlen); - if (ret) { - printf("bind"); - goto out; - } - - ret = listen(listen_sock, 0); - if (ret) - printf("listen error"); - -out: - if (ret && listen_sock >= 0) - close(listen_sock); - freeaddrinfo(ai); - return ret; -} - -static int sock_setup(int sock) -{ - int ret, op; - long flags; - - op = 1; - ret = setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, - (void *) &op, sizeof(op)); - if (ret) - return ret; - - flags = fcntl(sock, F_GETFL); - if (flags < 0) - return -errno; - - if (fcntl(sock, F_SETFL, flags)) - return -errno; - - return 0; -} - -static int init_oob(void) -{ - struct addrinfo *ai = NULL; - int ret; - - if (!dst_addr) { - ret = sock_listen(src_addr, oob_port); - if (ret) - return ret; - - oob_sock = accept(listen_sock, NULL, 0); - if (oob_sock < 0) { - printf("accept error"); - ret = oob_sock; - return ret; - } - - close(listen_sock); - } else { - ret = getaddrinfo(dst_addr, oob_port, NULL, &ai); - if (ret) { - printf("getaddrinfo error"); - return ret; - } - - oob_sock = socket(ai->ai_family, SOCK_STREAM, 0); - if (oob_sock < 0) { - printf("socket error"); - ret = oob_sock; - goto free; - } - - ret = connect(oob_sock, ai->ai_addr, ai->ai_addrlen); - if (ret) { - printf("connect error"); - close(oob_sock); - goto free; - } - sleep(1); - } - - ret = sock_setup(oob_sock); - -free: - if (ai) - freeaddrinfo(ai); - return ret; -} - -static int sock_send(int fd, void *msg, size_t len) -{ - size_t sent; - ssize_t ret, err = 0; - - for (sent = 0; sent < len; ) { - ret = send(fd, ((char *) msg) + sent, len - sent, 0); - if (ret > 0) { - sent += ret; - } else { - err = -errno; - break; - } - } - - return err ? err: 0; -} - -static int sock_recv(int fd, void *msg, size_t len) -{ - size_t rcvd; - ssize_t ret, err = 0; - - for (rcvd = 0; rcvd < len; ) { - ret = recv(fd, ((char *) msg) + rcvd, len - rcvd, 0); - if (ret > 0) { - rcvd += ret; - } else if (ret == 0) { - err = -FI_ENOTCONN; - break; - } else { - err = -errno; - break; - } - } - - return err ? err: 0; -} - -static int exchange_addresses(void) -{ - #define BUF_SIZE_AUX 64 - char addr_buf[BUF_SIZE_AUX]; - int ret; - size_t addrlen = BUF_SIZE_AUX; - - ret = fabric::get_addr(fabric_ep, addr_buf, addrlen); - if (ret) { - printf("fi_getname error %d\n", ret); - return ret; - } - - ret = sock_send(oob_sock, addr_buf, BUF_SIZE_AUX); - if (ret) { - printf("sock_send error %d\n", ret); - return ret; - } - - memset(addr_buf, 0, BUF_SIZE_AUX); - ret = sock_recv(oob_sock, addr_buf, BUF_SIZE_AUX); - if (ret) { - printf("sock_recv error %d\n", ret); - return ret; - } - - ret = fabric::register_addr(fabric_ep, *fabric_comm, addr_buf); - if (ret != 1) { - printf("av insert error\n"); - return -FI_ENOSYS; - } - - - - return 0; -} - -static int post_recv(void) -{ - fabric::fabric_msg ret; - - ret = fabric::recv(fabric_ep, *fabric_comm, buf.data(), buf.size()*sizeof(buf[0]), 0); - - return ret.size; -} - -static int post_send(void) -{ - - static int count = 0; - if (dst_addr){ - sprintf(msg.data(), "Hello, server! I am the client you've been waiting for! %d", count++); - }else{ - sprintf(msg.data(), "Hello, client! I am the server you've been waiting for! %d", count++); - } - fabric::fabric_msg ret; - - ret = fabric::send(fabric_ep, *fabric_comm, msg.data(), msg.size()*sizeof(msg[0]), 0); - - return ret.size; -} - -static int run(void) -{ - int ret; - timer timer; - - if (dst_addr) { - printf("Client: send to server %s\n", dst_addr); - - for (int i = 0; i < 10; i++) - { - printf("Client: send buffer and wait for the server to recv\n"); - ret = post_send(); - if (ret<0) - return ret; - std::cout<<"Send "< 1) - dst_addr = argv[1]; - - /* Init out-of-band addressing */ - ret = init_oob(); - if (ret) - return ret; - - /* - * Hints are used to request support for specific features from a - * provider. - */ - - ret = fabric::init(fabric_ep); - if (ret) - goto out; - - std::cout << "[FABRIC] [fabric_init] "< Date: Tue, 26 Nov 2024 11:11:22 +0100 Subject: [PATCH 15/60] Update LFI --- libs/lfi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/lfi b/libs/lfi index 42e3dab13..80e9b59ea 160000 --- a/libs/lfi +++ b/libs/lfi @@ -1 +1 @@ -Subproject commit 42e3dab1310c8c6e1c1f57cc590599dce4031299 +Subproject commit 80e9b59ea24dd2c09c5b509a77a1b0e780f7a943 From 1c34923e8a98eeb53be549ae16ed90c115da7161 Mon Sep 17 00:00:00 2001 From: Dariomnz Date: Wed, 18 Dec 2024 17:39:52 +0100 Subject: [PATCH 16/60] Fix some debug information --- CMakeLists.txt | 1 + src/base_c/debug_msg.c | 2 +- src/base_c/debug_msg.h | 4 ++-- src/utils/xpn_expand.cpp | 2 +- src/utils/xpn_server_stats.cpp | 4 ++-- src/utils/xpn_shrink.cpp | 30 +++++++++++++++--------------- src/xpn_client/nfi/nfi_server.hpp | 6 +++--- src/xpn_server/xpn_server_ops.cpp | 2 +- 8 files changed, 26 insertions(+), 25 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f29751733..907a34d50 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED True) set(CMAKE_SKIP_RPATH TRUE) +# add_compile_options("-DDEBUG") add_compile_options("-D_GNU_SOURCE" "-DMPICH_SKIP_MPICXX" "-D_REENTRANT") add_compile_options("-fPIC" "-O2" "-g" "-g3" "-ggdb" "-Wall" "-Wextra") diff --git a/src/base_c/debug_msg.c b/src/base_c/debug_msg.c index d4b20ee98..9114c1d86 100644 --- a/src/base_c/debug_msg.c +++ b/src/base_c/debug_msg.c @@ -41,7 +41,7 @@ void debug_msg_init ( void ) setbuf(stderr, NULL); } -int debug_msg_printf ( int src_type, char *src_fname, long src_line, FILE *fd, const char *msg_fmt, ... ) +int debug_msg_printf ( int src_type, const char *src_fname, long src_line, FILE *fd, const char *msg_fmt, ... ) { va_list valist; int ret; diff --git a/src/base_c/debug_msg.h b/src/base_c/debug_msg.h index 576997bff..e6d30a081 100644 --- a/src/base_c/debug_msg.h +++ b/src/base_c/debug_msg.h @@ -29,7 +29,7 @@ /* ... Include / Inclusion ........................................... */ - + #include #include "base_c/string_misc.h" @@ -62,7 +62,7 @@ // Debug API void debug_msg_init ( void ); - int debug_msg_printf ( int src_type, char *src_fname, long src_line, FILE *fd, const char *msg_fmt, ... ); + int debug_msg_printf ( int src_type, const char *src_fname, long src_line, FILE *fd, const char *msg_fmt, ... ); // Extra Debug API diff --git a/src/utils/xpn_expand.cpp b/src/utils/xpn_expand.cpp index 5ee7ff8cd..9e960a533 100644 --- a/src/utils/xpn_expand.cpp +++ b/src/utils/xpn_expand.cpp @@ -200,7 +200,7 @@ while(entry != NULL) { - debug_info("Rank "<d_name); + debug_info("Rank "<d_name); if (! strcmp(entry->d_name, ".")){ entry = readdir(dir); continue; diff --git a/src/utils/xpn_server_stats.cpp b/src/utils/xpn_server_stats.cpp index f19cd8fac..0586a4146 100644 --- a/src/utils/xpn_server_stats.cpp +++ b/src/utils/xpn_server_stats.cpp @@ -67,11 +67,11 @@ void print_stats(std::string hostfile, std::string action) debug_info("[TH_ID="<> Begin"); // Open host file - debug_info("[TH_ID="<= 0; j--) { new_file.map_offset_mdata(aux_actual_last_block*new_file.m_mdata.m_data.block_size, replication, local_offset, aux_serv); - if (rank == 0){ debug_info("aux_actual_last_block " "); + debug_info("[NFI_XPN] [nfi_write_operation] Execute operation: "<(op)<<" -> "); ret = m_comm->write_data((void *)&(msg), sizeof(msg)); debug_info("[NFI_XPN] [nfi_write_operation] >> End"); @@ -128,7 +128,7 @@ namespace XPN } // send request... - debug_info("[NFI_XPN] [nfi_server_do_request] Send operation: "<(op)); ret = nfi_write_operation(op, msg); if (ret < 0) { @@ -136,7 +136,7 @@ namespace XPN } // read response... - debug_info("[NFI_XPN] [nfi_server_do_request] Response operation: "<(op)); ret = m_comm->read_data((void *)&(req), sizeof(req)); if (ret < 0) { diff --git a/src/xpn_server/xpn_server_ops.cpp b/src/xpn_server/xpn_server_ops.cpp index 1b3e51068..2a0a3d649 100644 --- a/src/xpn_server/xpn_server_ops.cpp +++ b/src/xpn_server/xpn_server_ops.cpp @@ -42,7 +42,7 @@ namespace XPN void xpn_server::do_operation ( xpn_server_comm *comm, xpn_server_ops type_op, int rank, int tag, timer timer ) { debug_info("[TH_ID="<> Begin"); - debug_info("[TH_ID="<(type_op)); switch (type_op) { From 20c378fc016cc94e274ca7b33a26502946322141 Mon Sep 17 00:00:00 2001 From: Dariomnz Date: Fri, 20 Dec 2024 13:23:15 +0100 Subject: [PATCH 17/60] Change the fabric_server thread mode for better scaling --- libs/lfi | 2 +- .../fabric_server/fabric_server_comm.cpp | 18 +++- .../fabric_server/fabric_server_comm.hpp | 1 + src/xpn_server/xpn_server.cpp | 83 +++++++++++++++++-- src/xpn_server/xpn_server.hpp | 4 + src/xpn_server/xpn_server_params.cpp | 2 +- 6 files changed, 99 insertions(+), 11 deletions(-) diff --git a/libs/lfi b/libs/lfi index 80e9b59ea..82c27d0d4 160000 --- a/libs/lfi +++ b/libs/lfi @@ -1 +1 @@ -Subproject commit 80e9b59ea24dd2c09c5b509a77a1b0e780f7a943 +Subproject commit 82c27d0d4f52636b19e476c0179d8dd3d6009c88 diff --git a/src/xpn_server/fabric_server/fabric_server_comm.cpp b/src/xpn_server/fabric_server/fabric_server_comm.cpp index 64d918c2b..1205e1fac 100644 --- a/src/xpn_server/fabric_server/fabric_server_comm.cpp +++ b/src/xpn_server/fabric_server/fabric_server_comm.cpp @@ -94,22 +94,32 @@ void fabric_server_control_comm::disconnect ( xpn_server_comm* comm ) debug_info("[Server="<> Begin"); + + lfi_client_close(id); + + debug_info("[Server="<> Begin"); // Get message debug_info("[Server="<(msg[1]); @@ -137,7 +147,7 @@ int64_t fabric_server_comm::read_data ( void *data, int64_t size, [[maybe_unused // Get message debug_info("[Server="<(type_op)); + debug_info("[TH_ID="<(type_op)<<" client_rank "<> Begin"); + xpn_server_ops type_op = xpn_server_ops::size; + int rank_client_id = 0, tag_client_id = 0; + + while (!m_disconnect) + { + while(m_clients == 0 && !m_disconnect){ + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } + if (m_disconnect){ + break; + } + debug_info("[TH_ID="<read_operation(type_op, rank_client_id, tag_client_id); + if (ret < 0) { + debug_error("[TH_ID="<(type_op)<<" client_rank "<launch_no_future([this, timer, comm, type_op, rank_client_id, tag_client_id]{ + std::unique_ptr> op_stat; + if (xpn_env::get_instance().xpn_stats) { op_stat = std::make_unique>(m_stats.m_ops_stats[static_cast(type_op)], timer); } + do_operation(comm, type_op, rank_client_id, tag_client_id, timer); + }); + + debug_info("[TH_ID="<disconnect(comm); + + debug_info("[TH_ID="<accept(connection_socket); + m_clients++; debug_info("[TH_ID="<launch_no_future([this, comm]{ - this->dispatcher(comm); - }); + if (m_params.server_type == XPN_SERVER_TYPE_FABRIC){ + delete comm; + xpn_server_comm* general_comm = new fabric_server_comm(-1); + static bool only_one = true; + if (only_one){ + only_one = false; + auto fut = m_worker1->launch([this, general_comm]{ + this->fabric_dispatcher(general_comm); + return 0; + }); + } + }else{ + auto fut = m_worker1->launch([this, comm]{ + this->dispatcher(comm); + return 0; + }); + } } void xpn_server::finish ( void ) { // Wait and finalize for all current workers debug_info("[TH_ID="< m_window_stats; + std::atomic_bool m_disconnect = {false}; + std::atomic_int64_t m_clients = {0}; + public: // File operations void op_open ( xpn_server_comm &comm, st_xpn_server_path_flags &head, int rank_client_id, int tag_client_id ); diff --git a/src/xpn_server/xpn_server_params.cpp b/src/xpn_server/xpn_server_params.cpp index 3bedff5f4..c4e026cbe 100644 --- a/src/xpn_server/xpn_server_params.cpp +++ b/src/xpn_server/xpn_server_params.cpp @@ -181,7 +181,7 @@ xpn_server_params::xpn_server_params(int _argc, char *_argv[]) { // In sck_server worker for operations has to be sequential because you don't want to have to make a socket per // operation. It can be done because it is not reentrant - if (server_type == XPN_SERVER_TYPE_SCK || server_type == XPN_SERVER_TYPE_FABRIC) { + if (server_type == XPN_SERVER_TYPE_SCK) { thread_mode_operations = workers_mode::sequential; } From e398843e4fa0ed4c1dd6bbe7291ee654316d45ce Mon Sep 17 00:00:00 2001 From: Dariomnz Date: Fri, 10 Jan 2025 12:57:06 +0100 Subject: [PATCH 18/60] Dev files change in paths --- scripts/compile/platform/unito-dario.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/compile/platform/unito-dario.sh b/scripts/compile/platform/unito-dario.sh index ac545916e..d26c048ea 100755 --- a/scripts/compile/platform/unito-dario.sh +++ b/scripts/compile/platform/unito-dario.sh @@ -31,11 +31,15 @@ spack load pkg-config #MPICC_PATH=$HOME/opt/spack/linux-ubuntu20.04-zen/gcc-9.4.0/openmpi-4.1.3-4bpvwm3lcbftmjki6en35c4i5od6wjbr/bin/mpicc #MPICC_PATH=$HOME/opt/spack/linux-ubuntu20.04-zen/gcc-9.4.0/mpich-4.0.2-a76rmlxbneoqdvemzjsyewp2akiiuxlj/bin/mpicc # MPICC_PATH=$HOME/dariomnz/bin/mpich/bin/mpicc -MPICC_PATH=$HOME/dariomnz/bin/mpich/bin +MPICC_PATH=$HOME/dariomnz/bin/mpich-ch4-fabric/bin FABRIC_PATH=/opt/libfabric +FABRIC_PATH=$HOME/dariomnz/bin/libfabric-2.0.0 INSTALL_PATH=$HOME/dariomnz/bin/ BASE_PATH=$(dirname $0) +export LD_LIBRARY_PATH=$HOME/dariomnz/bin/mpich-ch4-fabric/lib:$HOME/dariomnz/bin/libfabric-2.0.0/lib:$LD_LIBRARY_PATH +export PATH=$HOME/dariomnz/bin/mpich-ch4-fabric/bin:$PATH + # 3) preconfigure build-me... $BASE_PATH/../software/xpn.sh -m $MPICC_PATH -f $FABRIC_PATH -i $INSTALL_PATH -s $BASE_PATH/../../../../xpn $BASE_PATH/../software/ior.sh -m $MPICC_PATH/mpicc -i $INSTALL_PATH -s $BASE_PATH/../../../../ior From cb6541e31cf3b1a0da780bade5a55ca1954b00e2 Mon Sep 17 00:00:00 2001 From: Dariomnz Date: Fri, 10 Jan 2025 12:58:17 +0100 Subject: [PATCH 19/60] Use two buffers in read_operation --- libs/lfi | 2 +- src/xpn_client/CMakeLists.txt | 4 +- src/xpn_server/CMakeLists.txt | 2 +- .../fabric_server/fabric_server_comm.cpp | 85 +++++++++++++++---- .../fabric_server/fabric_server_comm.hpp | 5 ++ src/xpn_server/xpn_server.cpp | 4 +- 6 files changed, 81 insertions(+), 21 deletions(-) diff --git a/libs/lfi b/libs/lfi index 82c27d0d4..44dc07a85 160000 --- a/libs/lfi +++ b/libs/lfi @@ -1 +1 @@ -Subproject commit 82c27d0d4f52636b19e476c0179d8dd3d6009c88 +Subproject commit 44dc07a854840f52392e69b26910b3d5b605bdb0 diff --git a/src/xpn_client/CMakeLists.txt b/src/xpn_client/CMakeLists.txt index 97174ba1c..08fd6671b 100644 --- a/src/xpn_client/CMakeLists.txt +++ b/src/xpn_client/CMakeLists.txt @@ -11,11 +11,11 @@ file(GLOB XPN_SOURCE ) add_library(xpn STATIC ${XPN_HEADERS} ${XPN_SOURCE}) -target_link_libraries(xpn xpn_base_c xpn_base_cpp xpn_core xpn_nfi) +target_link_libraries(xpn PRIVATE xpn_base_c xpn_base_cpp xpn_core xpn_nfi lfi) add_library(xpn_shared SHARED $) set_target_properties(xpn_shared PROPERTIES OUTPUT_NAME xpn) -target_link_libraries(xpn_shared PRIVATE xpn_base_c xpn_base_cpp xpn_core xpn_nfi) +target_link_libraries(xpn_shared PRIVATE xpn_base_c xpn_base_cpp xpn_core xpn_nfi lfi) target_include_directories(xpn PUBLIC "${PROJECT_SOURCE_DIR}/src" diff --git a/src/xpn_server/CMakeLists.txt b/src/xpn_server/CMakeLists.txt index 208d696b2..b26a8827e 100644 --- a/src/xpn_server/CMakeLists.txt +++ b/src/xpn_server/CMakeLists.txt @@ -9,7 +9,7 @@ file(GLOB_RECURSE XPN_SERVER_SOURCE add_executable(xpn_server ${XPN_SERVER_SOURCE}) -target_link_libraries(xpn_server PRIVATE xpn_base_c xpn_base_cpp) +target_link_libraries(xpn_server PRIVATE xpn_base_c xpn_base_cpp lfi) target_include_directories(xpn_server PRIVATE "${PROJECT_SOURCE_DIR}/src" diff --git a/src/xpn_server/fabric_server/fabric_server_comm.cpp b/src/xpn_server/fabric_server/fabric_server_comm.cpp index 1205e1fac..dd15bef32 100644 --- a/src/xpn_server/fabric_server/fabric_server_comm.cpp +++ b/src/xpn_server/fabric_server/fabric_server_comm.cpp @@ -27,6 +27,8 @@ #include "base_c/filesystem.h" #include #include "lfi.h" +#include "impl/fabric.hpp" +#include namespace XPN { @@ -106,31 +108,84 @@ void fabric_server_control_comm::disconnect ( int id ) int64_t fabric_server_comm::read_operation ( xpn_server_ops &op, int &rank_client_id, int &tag_client_id ) { - int msg[2] = {}; - int ret = 0; - int source = -1; - debug_info("[Server="<> Begin"); - // Get message - debug_info("[Server="<(*shm_comm); + + LFI::fabric_msg msg = LFI::LFI::async_recv(shm_msg, sizeof(shm_msg), 0, *shm_request); + if (msg.error < 0){ + return -1; + } } - rank_client_id = source; - tag_client_id = msg[0]; - op = static_cast(msg[1]); + if (!peer_request){ + // Check if comm exists + LFI::fabric_comm *peer_comm = LFI::LFI::get_comm(LFI::LFI::LFI_ANY_COMM_PEER); + if (peer_comm == nullptr){ + return -1; + } + peer_request = std::make_unique(*peer_comm); + + LFI::fabric_msg msg = LFI::LFI::async_recv(peer_msg, sizeof(peer_msg), 0, *peer_request); + if (msg.error < 0){ + return -1; + } + } + + std::vector> requests = {*shm_request, *peer_request}; + + int completed = LFI::LFI::wait_num(requests, 1); + + debug_info("[Server="<entry.tag & 0x0000'00FF'FFFF'0000) >> 16)<<", TAG "<entry.tag & 0x0000'00FF'FFFF'0000) >> 16)<<", TAG "<entry.tag & 0x0000'00FF'FFFF'0000) >> 16; + tag_client_id = shm_msg[0]; + op = static_cast(shm_msg[1]); + + shm_msg[0] = -1; + shm_msg[1] = -1; + + shm_request.release(); + + // shm_request->reset(); + // LFI::fabric_msg msg = LFI::LFI::async_recv(shm_msg, sizeof(shm_msg), 0, *shm_request); + // if (msg.error < 0){ + // return -1; + // } + }else if (completed == 1){ + rank_client_id = (peer_request->entry.tag & 0x0000'00FF'FFFF'0000) >> 16; + tag_client_id = peer_msg[0]; + op = static_cast(peer_msg[1]); + + peer_msg[0] = -1; + peer_msg[1] = -1; + + peer_request.release(); + + // peer_request->reset(); + // LFI::fabric_msg msg = LFI::LFI::async_recv(peer_msg, sizeof(peer_msg), 0, *peer_request); + // if (msg.error < 0){ + // return -1; + // } + }else{ + return -1; + } - debug_info("[Server="< shm_request, peer_request; + int shm_msg[2] = {}; + int peer_msg[2] = {}; }; class fabric_server_control_comm : public xpn_server_control_comm diff --git a/src/xpn_server/xpn_server.cpp b/src/xpn_server/xpn_server.cpp index 2320ef305..e74f69d55 100644 --- a/src/xpn_server/xpn_server.cpp +++ b/src/xpn_server/xpn_server.cpp @@ -154,13 +154,13 @@ void xpn_server::accept ( int connection_socket ) static bool only_one = true; if (only_one){ only_one = false; - auto fut = m_worker1->launch([this, general_comm]{ + m_worker1->launch_no_future([this, general_comm]{ this->fabric_dispatcher(general_comm); return 0; }); } }else{ - auto fut = m_worker1->launch([this, comm]{ + auto fut = m_worker1->launch_no_future([this, comm]{ this->dispatcher(comm); return 0; }); From e14d10d2f26a246e977dfaea8ab74fa637df4525 Mon Sep 17 00:00:00 2001 From: Dariomnz Date: Fri, 10 Jan 2025 13:01:37 +0100 Subject: [PATCH 20/60] Fix return fut in launch --- src/xpn_server/xpn_server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xpn_server/xpn_server.cpp b/src/xpn_server/xpn_server.cpp index e74f69d55..919182087 100644 --- a/src/xpn_server/xpn_server.cpp +++ b/src/xpn_server/xpn_server.cpp @@ -160,7 +160,7 @@ void xpn_server::accept ( int connection_socket ) }); } }else{ - auto fut = m_worker1->launch_no_future([this, comm]{ + m_worker1->launch_no_future([this, comm]{ this->dispatcher(comm); return 0; }); From 9149136b234f1e363ff9350d65d787fd62785b5e Mon Sep 17 00:00:00 2001 From: Dariomnz Date: Mon, 13 Jan 2025 12:44:01 +0100 Subject: [PATCH 21/60] Update fabric_server_comm with ldi async API --- libs/lfi | 2 +- .../fabric_server/fabric_server_comm.cpp | 63 +++++++++---------- .../fabric_server/fabric_server_comm.hpp | 3 +- 3 files changed, 32 insertions(+), 36 deletions(-) diff --git a/libs/lfi b/libs/lfi index 44dc07a85..fb8ae07f1 160000 --- a/libs/lfi +++ b/libs/lfi @@ -1 +1 @@ -Subproject commit 44dc07a854840f52392e69b26910b3d5b605bdb0 +Subproject commit fb8ae07f1c3982818dd8a66c430837fc2c7cdae4 diff --git a/src/xpn_server/fabric_server/fabric_server_comm.cpp b/src/xpn_server/fabric_server/fabric_server_comm.cpp index dd15bef32..2c66b1426 100644 --- a/src/xpn_server/fabric_server/fabric_server_comm.cpp +++ b/src/xpn_server/fabric_server/fabric_server_comm.cpp @@ -27,6 +27,7 @@ #include "base_c/filesystem.h" #include #include "lfi.h" +#include "lfi_async.h" #include "impl/fabric.hpp" #include @@ -111,69 +112,63 @@ int64_t fabric_server_comm::read_operation ( xpn_server_ops &op, int &rank_clien debug_info("[Server="<> Begin"); if (!shm_request){ - // Check if comm exists - LFI::fabric_comm *shm_comm = LFI::LFI::get_comm(LFI::LFI::LFI_ANY_COMM_SHM); - if (shm_comm == nullptr){ - return -1; + shm_request = {lfi_request_create(LFI_ANY_COMM_SHM), lfi_request_free}; + if (!shm_request){ + print("Error shm_request is null"); } - shm_request = std::make_unique(*shm_comm); - LFI::fabric_msg msg = LFI::LFI::async_recv(shm_msg, sizeof(shm_msg), 0, *shm_request); - if (msg.error < 0){ + if (lfi_trecv_async(shm_request.get(), shm_msg, sizeof(shm_msg), 0) < 0){ + print("Error in lfi_trecv_async") return -1; } } if (!peer_request){ - // Check if comm exists - LFI::fabric_comm *peer_comm = LFI::LFI::get_comm(LFI::LFI::LFI_ANY_COMM_PEER); - if (peer_comm == nullptr){ - return -1; + peer_request = {lfi_request_create(LFI_ANY_COMM_PEER), lfi_request_free}; + if (!peer_request){ + print("Error peer_request is null"); } - peer_request = std::make_unique(*peer_comm); - LFI::fabric_msg msg = LFI::LFI::async_recv(peer_msg, sizeof(peer_msg), 0, *peer_request); - if (msg.error < 0){ + if (lfi_trecv_async(peer_request.get(), peer_msg, sizeof(peer_msg), 0) < 0){ + print("Error in lfi_trecv_async") return -1; } } - std::vector> requests = {*shm_request, *peer_request}; + lfi_request *requests[2] = {shm_request.get(), peer_request.get()}; - int completed = LFI::LFI::wait_num(requests, 1); + int completed = lfi_wait_many(requests, 2, 1); - debug_info("[Server="<entry.tag & 0x0000'00FF'FFFF'0000) >> 16)<<", TAG "<entry.tag & 0x0000'00FF'FFFF'0000) >> 16)<<", TAG "<entry.tag & 0x0000'00FF'FFFF'0000) >> 16; + rank_client_id = lfi_request_source(shm_request.get()); tag_client_id = shm_msg[0]; op = static_cast(shm_msg[1]); shm_msg[0] = -1; shm_msg[1] = -1; - shm_request.release(); - - // shm_request->reset(); - // LFI::fabric_msg msg = LFI::LFI::async_recv(shm_msg, sizeof(shm_msg), 0, *shm_request); - // if (msg.error < 0){ - // return -1; - // } + // One option is to free the request and create another one or reuse the requets for a new recv + // shm_request.release(); + if (lfi_trecv_async(shm_request.get(), shm_msg, sizeof(shm_msg), 0) < 0){ + print("Error in lfi_trecv_async") + return -1; + } }else if (completed == 1){ - rank_client_id = (peer_request->entry.tag & 0x0000'00FF'FFFF'0000) >> 16; + rank_client_id = lfi_request_source(peer_request.get()); tag_client_id = peer_msg[0]; op = static_cast(peer_msg[1]); peer_msg[0] = -1; peer_msg[1] = -1; - peer_request.release(); - - // peer_request->reset(); - // LFI::fabric_msg msg = LFI::LFI::async_recv(peer_msg, sizeof(peer_msg), 0, *peer_request); - // if (msg.error < 0){ - // return -1; - // } + // One option is to free the request and create another one or reuse the requets for a new recv + // peer_request.release(); + if (lfi_trecv_async(peer_request.get(), peer_msg, sizeof(peer_msg), 0) < 0){ + print("Error in lfi_trecv_async") + return -1; + } }else{ return -1; } diff --git a/src/xpn_server/fabric_server/fabric_server_comm.hpp b/src/xpn_server/fabric_server/fabric_server_comm.hpp index c2f3c5d9d..2a454d5c3 100644 --- a/src/xpn_server/fabric_server/fabric_server_comm.hpp +++ b/src/xpn_server/fabric_server/fabric_server_comm.hpp @@ -42,7 +42,8 @@ namespace XPN int64_t write_data(const void *data, int64_t size, int rank_client_id, int tag_client_id) override; public: int m_comm; - std::unique_ptr shm_request, peer_request; + std::unique_ptr shm_request = {nullptr, nullptr}; + std::unique_ptr peer_request = {nullptr, nullptr}; int shm_msg[2] = {}; int peer_msg[2] = {}; }; From 0d25a365db185b4974758f1b4cea5a7d2104d4f6 Mon Sep 17 00:00:00 2001 From: Dariomnz Date: Wed, 15 Jan 2025 18:25:31 +0100 Subject: [PATCH 22/60] Fix some imports in fabric_server_comm --- src/xpn_server/fabric_server/fabric_server_comm.cpp | 1 - src/xpn_server/fabric_server/fabric_server_comm.hpp | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/xpn_server/fabric_server/fabric_server_comm.cpp b/src/xpn_server/fabric_server/fabric_server_comm.cpp index 2c66b1426..49be1ff4c 100644 --- a/src/xpn_server/fabric_server/fabric_server_comm.cpp +++ b/src/xpn_server/fabric_server/fabric_server_comm.cpp @@ -28,7 +28,6 @@ #include #include "lfi.h" #include "lfi_async.h" -#include "impl/fabric.hpp" #include namespace XPN diff --git a/src/xpn_server/fabric_server/fabric_server_comm.hpp b/src/xpn_server/fabric_server/fabric_server_comm.hpp index 2a454d5c3..842f2a29c 100644 --- a/src/xpn_server/fabric_server/fabric_server_comm.hpp +++ b/src/xpn_server/fabric_server/fabric_server_comm.hpp @@ -25,8 +25,7 @@ #include #include "xpn_server/xpn_server_comm.hpp" - -#include "impl/fabric.hpp" +#include "lfi_async.h" namespace XPN { From cb43ea2e06aa88ab93fa5bcda9b66093df6753a2 Mon Sep 17 00:00:00 2001 From: Dariomnz Date: Thu, 16 Jan 2025 16:50:45 +0100 Subject: [PATCH 23/60] Change storage of xpn_file to shared_ptr --- libs/lfi | 2 +- src/xpn_client/xpn/utils/xpn_file_table.cpp | 44 +++----------- src/xpn_client/xpn/xpn_api/xpn_api_dir.cpp | 20 +++---- src/xpn_client/xpn/xpn_api/xpn_api_f_file.cpp | 5 +- src/xpn_client/xpn/xpn_api/xpn_api_file.cpp | 60 +++++++++---------- src/xpn_client/xpn/xpn_api/xpn_api_rw.cpp | 58 +++++++++--------- src/xpn_client/xpn/xpn_api/xpn_api_stat.cpp | 12 ++-- src/xpn_client/xpn/xpn_file_table.hpp | 18 ++++-- 8 files changed, 95 insertions(+), 124 deletions(-) diff --git a/libs/lfi b/libs/lfi index fb8ae07f1..e1b4d41ca 160000 --- a/libs/lfi +++ b/libs/lfi @@ -1 +1 @@ -Subproject commit fb8ae07f1c3982818dd8a66c430837fc2c7cdae4 +Subproject commit e1b4d41ca106004dae483ca35600fa46e59093ab diff --git a/src/xpn_client/xpn/utils/xpn_file_table.cpp b/src/xpn_client/xpn/utils/xpn_file_table.cpp index fb148e2bc..46e363409 100644 --- a/src/xpn_client/xpn/utils/xpn_file_table.cpp +++ b/src/xpn_client/xpn/utils/xpn_file_table.cpp @@ -38,7 +38,7 @@ namespace XPN } } - int xpn_file_table::insert(const xpn_file& file) + int xpn_file_table::insert(std::shared_ptr file) { int fd; if (m_free_keys.empty()){ @@ -47,46 +47,16 @@ namespace XPN fd = m_free_keys.front(); m_free_keys.pop(); } - auto file_ptr = new (std::nothrow) xpn_file(file); - if (file_ptr == nullptr){ - return -1; - } - auto pair = std::make_pair(fd, file_ptr); - pair.second->m_links++; - m_files.insert(pair); - return fd; - } - - int xpn_file_table::insert(xpn_file* file) - { - int fd; - if (m_free_keys.empty()){ - fd = secuencial_key++; - }else{ - fd = m_free_keys.front(); - m_free_keys.pop(); - } - auto pair = std::make_pair(fd, file); - pair.second->m_links++; - m_files.insert(pair); + m_files.emplace(std::make_pair(fd, file)); return fd; } bool xpn_file_table::remove(int fd) { - bool has_fd = has(fd); - if (!has_fd){ - return false; - } - auto file = m_files.at(fd); int res = m_files.erase(fd); if (res == 1){ m_free_keys.push(fd); } - file->m_links--; - if (file->m_links <= 0){ - delete file; - } return res == 1 ? true : false; } @@ -94,16 +64,16 @@ namespace XPN int xpn_file_table::dup(int fd, int new_fd) { int ret = -1; - auto file = m_files.at(fd); - file->m_links++; + auto file = get(fd); + if (!file){ + return -1; + } if (new_fd != -1){ // Like posix dup2 close silently if its open if (has(new_fd)){ xpn_api::get_instance().close(new_fd); } - auto pair = std::make_pair(new_fd, file); - pair.second->m_links++; - m_files.insert(pair); + m_files.emplace(std::make_pair(new_fd, file)); ret = new_fd; }else{ ret = insert(file); diff --git a/src/xpn_client/xpn/xpn_api/xpn_api_dir.cpp b/src/xpn_client/xpn/xpn_api/xpn_api_dir.cpp index 4ca7c4324..ef22fbe5d 100644 --- a/src/xpn_client/xpn/xpn_api/xpn_api_dir.cpp +++ b/src/xpn_client/xpn/xpn_api/xpn_api_dir.cpp @@ -81,14 +81,14 @@ namespace XPN return -1; } - auto& file = m_file_table.get(dirp->fd); - XPN_DEBUG("Close : '"<> v_res(file.m_data_vfh.size()); - for (size_t i = 0; i < file.m_data_vfh.size(); i++) + auto file = m_file_table.get(dirp->fd); + XPN_DEBUG("Close : '"<m_path<<"'") + std::vector> v_res(file->m_data_vfh.size()); + for (size_t i = 0; i < file->m_data_vfh.size(); i++) { - if (file.m_data_vfh[i].is_initialized()){ + if (file->m_data_vfh[i].is_initialized()){ v_res[i] = m_worker->launch([i, &file](){ - return file.m_part.m_data_serv[i]->nfi_closedir(file.m_data_vfh[i]); + return file->m_part.m_data_serv[i]->nfi_closedir(file->m_data_vfh[i]); }); } } @@ -122,17 +122,17 @@ namespace XPN return nullptr; } - auto& file = m_file_table.get(dirp->fd); + auto file = m_file_table.get(dirp->fd); - int master_dir = file.m_mdata.master_dir(); - file.initialize_vfh_dir(master_dir); + int master_dir = file->m_mdata.master_dir(); + file->initialize_vfh_dir(master_dir); struct ::dirent * entry = new (std::nothrow) dirent; if (entry == nullptr){ return nullptr; } auto fut = m_worker->launch([master_dir, &file, &entry](){ - return file.m_part.m_data_serv[master_dir]->nfi_readdir(file.m_data_vfh[master_dir], *entry); + return file->m_part.m_data_serv[master_dir]->nfi_readdir(file->m_data_vfh[master_dir], *entry); }); res = fut.get(); diff --git a/src/xpn_client/xpn/xpn_api/xpn_api_f_file.cpp b/src/xpn_client/xpn/xpn_api/xpn_api_f_file.cpp index a2cc8a803..35f961aa7 100644 --- a/src/xpn_client/xpn/xpn_api/xpn_api_f_file.cpp +++ b/src/xpn_client/xpn/xpn_api/xpn_api_f_file.cpp @@ -122,8 +122,9 @@ namespace XPN { XPN_DEBUG_BEGIN; long res = 0; - if (m_file_table.has(stream->_fileno)){ - res = m_file_table.get(stream->_fileno).m_offset; + auto file = m_file_table.get(stream->_fileno); + if (file){ + res = file->m_offset; } XPN_DEBUG_END; return res; diff --git a/src/xpn_client/xpn/xpn_api/xpn_api_file.cpp b/src/xpn_client/xpn/xpn_api/xpn_api_file.cpp index ad71ecb41..2a3ca6306 100644 --- a/src/xpn_client/xpn/xpn_api/xpn_api_file.cpp +++ b/src/xpn_client/xpn/xpn_api/xpn_api_file.cpp @@ -58,30 +58,30 @@ namespace XPN return -1; } - xpn_file file(file_path, m_partitions.at(part_name)); + std::shared_ptr file = std::make_shared(file_path, m_partitions.at(part_name)); if ((O_DIRECTORY != (flags & O_DIRECTORY))) { - res = read_metadata(file.m_mdata); + res = read_metadata(file->m_mdata); if (res < 0 && O_CREAT != (flags & O_CREAT)){ XPN_DEBUG_END_CUSTOM(path<<", "<m_mdata.m_data.is_valid()){ + file->m_mdata.m_data.fill(file->m_mdata); } } if ((O_CREAT == (flags & O_CREAT))){ - std::vector> v_res(file.m_part.m_data_serv.size()); - for (size_t i = 0; i < file.m_part.m_data_serv.size(); i++) + std::vector> v_res(file->m_part.m_data_serv.size()); + for (size_t i = 0; i < file->m_part.m_data_serv.size(); i++) { - auto& serv = file.m_part.m_data_serv[i]; - if (file.exist_in_serv(i)){ + auto& serv = file->m_part.m_data_serv[i]; + if (file->exist_in_serv(i)){ v_res[i] = m_worker->launch([i, &serv, &file, flags, mode](){ - return serv->nfi_open(file.m_path, flags, mode, file.m_data_vfh[i]); + return serv->nfi_open(file->m_path, flags, mode, file->m_data_vfh[i]); }); } } @@ -101,18 +101,18 @@ namespace XPN if ((O_DIRECTORY != (flags & O_DIRECTORY))) { - write_metadata(file.m_mdata, false); + write_metadata(file->m_mdata, false); } }else{ - int master_file = file.m_mdata.master_file(); + int master_file = file->m_mdata.master_file(); std::future fut; if ((O_DIRECTORY == (flags & O_DIRECTORY))){ fut = m_worker->launch([&file, master_file, flags, mode](){ - return file.m_part.m_data_serv[master_file]->nfi_opendir(file.m_path, file.m_data_vfh[master_file]); + return file->m_part.m_data_serv[master_file]->nfi_opendir(file->m_path, file->m_data_vfh[master_file]); }); }else{ fut = m_worker->launch([&file, master_file, flags, mode](){ - return file.m_part.m_data_serv[master_file]->nfi_open(file.m_path, flags, mode, file.m_data_vfh[master_file]); + return file->m_part.m_data_serv[master_file]->nfi_open(file->m_path, flags, mode, file->m_data_vfh[master_file]); }); } res = fut.get(); @@ -120,12 +120,12 @@ namespace XPN if ((O_DIRECTORY == (flags & O_DIRECTORY))){ - file.m_type = file_type::dir; + file->m_type = file_type::dir; }else{ - file.m_type = file_type::file; + file->m_type = file_type::file; } - file.m_flags = flags; - file.m_mode = mode; + file->m_flags = flags; + file->m_mode = mode; res = m_file_table.insert(file); XPN_DEBUG_END_CUSTOM(path<<", "<> v_res(file.m_data_vfh.size()); - for (size_t i = 0; i < file.m_data_vfh.size(); i++) + std::vector> v_res(file->m_data_vfh.size()); + for (size_t i = 0; i < file->m_data_vfh.size(); i++) { - if (file.m_data_vfh[i].fd != -1) + if (file->m_data_vfh[i].fd != -1) { v_res[i] = m_worker->launch([i, &file](){ - return file.m_part.m_data_serv[i]->nfi_close(file.m_data_vfh[i]); + return file->m_part.m_data_serv[i]->nfi_close(file->m_data_vfh[i]); }); } } @@ -305,13 +305,10 @@ namespace XPN { XPN_DEBUG_BEGIN_CUSTOM(fd); int res = 0; - if (!m_file_table.has(fd)) - { + res = m_file_table.dup(fd); + if (res < 0){ errno = EBADF; - XPN_DEBUG_END_CUSTOM(fd); - return -1; } - res = m_file_table.dup(fd); XPN_DEBUG_END_CUSTOM(fd); return res; } @@ -320,13 +317,10 @@ namespace XPN { XPN_DEBUG_BEGIN_CUSTOM(fd<<", "<m_flags == O_WRONLY){ errno = EBADF; res = -1; XPN_DEBUG_END_CUSTOM(fd<<", "<m_type == file_type::dir){ errno = EISDIR; res = -1; XPN_DEBUG_END_CUSTOM(fd<<", "<(buffer), size); + xpn_rw_buffer rw_buff(*file.get(), file->m_offset, const_cast(buffer), size); rw_buff.calculate_reads(); std::vector> v_res(rw_buff.num_ops()); @@ -73,7 +73,7 @@ namespace XPN for (size_t i = 0; i < rw_buff.m_ops.size(); i++) { if (!rw_buff.m_ops[i].empty()){ - res = file.initialize_vfh(i); + res = file->initialize_vfh(i); if (res < 0){ break; } @@ -82,7 +82,7 @@ namespace XPN { v_res[index++] = m_worker->launch([i, &file, &op](){ XPN_DEBUG("Serv "<nfi_read(file.m_data_vfh[i], op.get_buffer(), op.offset_serv+xpn_metadata::HEADER_SIZE, op.get_size()); + return file->m_part.m_data_serv[i]->nfi_read(file->m_data_vfh[i], op.get_buffer(), op.offset_serv+xpn_metadata::HEADER_SIZE, op.get_size()); }); } } @@ -94,7 +94,7 @@ namespace XPN if (!fut.valid()) continue; aux_res = fut.get(); if (aux_res < 0){ - XPN_DEBUG_END_CUSTOM(file.m_path<<", "<m_path<<", "< 0){ - file.m_offset += res; + file->m_offset += res; } rw_buff.fix_ops_reads(); @@ -117,14 +117,14 @@ namespace XPN XPN_DEBUG_BEGIN_CUSTOM(fd<<", "<m_flags == O_RDONLY){ errno = EBADF; res = -1; XPN_DEBUG_END_CUSTOM(fd<<", "<m_type == file_type::dir){ errno = EISDIR; res = -1; XPN_DEBUG_END_CUSTOM(fd<<", "<(buffer), size); + xpn_rw_buffer rw_buff(*file.get(), file->m_offset, const_cast(buffer), size); rw_buff.calculate_writes(); std::vector> v_res(rw_buff.num_ops()); @@ -160,7 +160,7 @@ namespace XPN for (size_t i = 0; i < rw_buff.m_ops.size(); i++) { if (!rw_buff.m_ops[i].empty()){ - res = file.initialize_vfh(i); + res = file->initialize_vfh(i); if (res < 0){ break; } @@ -169,7 +169,7 @@ namespace XPN { v_res[index++] = m_worker->launch([i, &file, &op](){ XPN_DEBUG("Serv "<nfi_write(file.m_data_vfh[i], op.get_buffer(), op.offset_serv+xpn_metadata::HEADER_SIZE, op.get_size()); + return file->m_part.m_data_serv[i]->nfi_write(file->m_data_vfh[i], op.get_buffer(), op.offset_serv+xpn_metadata::HEADER_SIZE, op.get_size()); }); } } @@ -181,21 +181,21 @@ namespace XPN if (!fut.valid()) continue; aux_res = fut.get(); if (aux_res < 0){ - XPN_DEBUG_END_CUSTOM(file.m_path<<", ops: "<m_path<<", ops: "<m_part.m_replication_level+1); }else{ res = static_cast(rw_buff.m_size); - file.m_offset += res; + file->m_offset += res; // Update file_size in metadata - if (file.m_offset > static_cast(file.m_mdata.m_data.file_size)){ - file.m_mdata.m_data.file_size = file.m_offset; - write_metadata(file.m_mdata, true); + if (file->m_offset > static_cast(file->m_mdata.m_data.file_size)){ + file->m_mdata.m_data.file_size = file->m_offset; + write_metadata(file->m_mdata, true); } } @@ -209,13 +209,13 @@ namespace XPN off_t res = 0; struct ::stat st; - if (!m_file_table.has(fd)){ + auto file = m_file_table.get(fd); + if (!file){ errno = EBADF; res = -1; XPN_DEBUG_END_CUSTOM(fd<<", "<m_offset = offset; } break; case SEEK_CUR: - if (file.m_offset+offset<0) + if (file->m_offset+offset<0) { errno = EINVAL; res = -1; @@ -241,7 +241,7 @@ namespace XPN return res; } else { - file.m_offset += offset; + file->m_offset += offset; } break; @@ -261,7 +261,7 @@ namespace XPN return res; } else { - file.m_offset = st.st_size + offset; + file->m_offset = st.st_size + offset; } break; @@ -272,7 +272,7 @@ namespace XPN return res; } - res = file.m_offset; + res = file->m_offset; XPN_DEBUG_END_CUSTOM(fd<<", "<m_part.m_name + "/" + file->m_path; res = stat(file_path.c_str(), sb); XPN_DEBUG_END_CUSTOM(fd); @@ -204,16 +204,16 @@ namespace XPN XPN_DEBUG_BEGIN; int res = 0; - if (!m_file_table.has(fd)) + auto file = m_file_table.get(fd); + if (!file) { errno = EBADF; XPN_DEBUG_END_CUSTOM(fd); return -1; } - auto& file = m_file_table.get(fd); // Redirect to statvfs to not duplicate code - std::string file_path = file.m_part.m_name + "/" + file.m_path; + std::string file_path = file->m_part.m_name + "/" + file->m_path; res = statvfs(file_path.c_str(), buf); XPN_DEBUG_END; diff --git a/src/xpn_client/xpn/xpn_file_table.hpp b/src/xpn_client/xpn/xpn_file_table.hpp index 749796eb3..80395e6fb 100644 --- a/src/xpn_client/xpn/xpn_file_table.hpp +++ b/src/xpn_client/xpn/xpn_file_table.hpp @@ -41,13 +41,19 @@ namespace XPN xpn_file_table(xpn_file_table&&) = delete; // Delete move assignment operator xpn_file_table& operator=(xpn_file_table&&) = delete; - public: + public: bool has(int fd) {return m_files.find(fd) != m_files.end();} - // It must be checked if fd is in the file_table with has(fd) - xpn_file& get(int fd) {return *m_files.at(fd); } - int insert(const xpn_file& file); - int insert(xpn_file* file); + std::shared_ptr get(int fd) { + auto it = m_files.find(fd); + if (it == m_files.end()){ + return nullptr; + } + return it->second; + } + + // int insert(const xpn_file& file); + int insert(std::shared_ptr file); bool remove(int fd); // It must be checked if fd is in the file_table with has(fd) @@ -55,7 +61,7 @@ namespace XPN std::string to_string(); private: - std::unordered_map m_files; + std::unordered_map> m_files; std::queue m_free_keys; int secuencial_key = 1; }; From cbd9339ef3e9d23d706350fc5dee0a59cbb333fe Mon Sep 17 00:00:00 2001 From: Dariomnz Date: Thu, 23 Jan 2025 14:20:58 +0100 Subject: [PATCH 24/60] Fix some scripts --- scripts/compile/platform/unito-dario.sh | 4 ++-- scripts/compile/software/xpn.sh | 5 +++-- scripts/execute/xpn.sh | 26 ++++++++++++------------- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/scripts/compile/platform/unito-dario.sh b/scripts/compile/platform/unito-dario.sh index d26c048ea..21ae8e623 100755 --- a/scripts/compile/platform/unito-dario.sh +++ b/scripts/compile/platform/unito-dario.sh @@ -25,7 +25,7 @@ # 1) software (if needed)... #spack load openmpi #spack load mpich -spack load pkg-config +#spack load pkg-config # 2) working path... #MPICC_PATH=$HOME/opt/spack/linux-ubuntu20.04-zen/gcc-9.4.0/openmpi-4.1.3-4bpvwm3lcbftmjki6en35c4i5od6wjbr/bin/mpicc @@ -44,4 +44,4 @@ export PATH=$HOME/dariomnz/bin/mpich-ch4-fabric/bin:$PATH $BASE_PATH/../software/xpn.sh -m $MPICC_PATH -f $FABRIC_PATH -i $INSTALL_PATH -s $BASE_PATH/../../../../xpn $BASE_PATH/../software/ior.sh -m $MPICC_PATH/mpicc -i $INSTALL_PATH -s $BASE_PATH/../../../../ior # $BASE_PATH/../software/lz4.sh -m $MPICC_PATH -i $INSTALL_PATH -s $BASE_PATH/../../../../io500/build/pfind/lz4/ -# $BASE_PATH/../software/io500.sh -m $MPICC_PATH -i $INSTALL_PATH -s $BASE_PATH/../../../../io500 +# $BASE_PATH/../software/io500.sh -m $MPICC_PATH/mpicc -i $INSTALL_PATH -s $BASE_PATH/../../../../io500 diff --git a/scripts/compile/software/xpn.sh b/scripts/compile/software/xpn.sh index 951241663..016eb60fd 100755 --- a/scripts/compile/software/xpn.sh +++ b/scripts/compile/software/xpn.sh @@ -1,5 +1,6 @@ #!/bin/bash #set -x +set -e # # Copyright 2020-2024 Felix Garcia Carballeira, Diego Camarmas Alonso, Alejandro Calderon Mateos @@ -89,13 +90,13 @@ echo " * XPN mpi: $MPICC_PATH" echo " * XPN libfabric: $LIBFABRIC_PATH" pushd . cd "$SRC_PATH" -rm -r build +# rm -r build mkdir -p build cd build cmake -S .. -B . -D BUILD_TESTS=ON -D CMAKE_INSTALL_PREFIX="${INSTALL_PATH}/xpn" -D CMAKE_C_COMPILER="${MPICC_PATH}"/mpicc -D CMAKE_CXX_COMPILER="${MPICC_PATH}"/mpicxx -D ENABLE_FABRIC_SERVER="${LIBFABRIC_PATH}" -cmake --build . -j +cmake --build . -j "$(nproc)" cmake --install . diff --git a/scripts/execute/xpn.sh b/scripts/execute/xpn.sh index c45effc51..71fa9ceef 100755 --- a/scripts/execute/xpn.sh +++ b/scripts/execute/xpn.sh @@ -97,16 +97,16 @@ start_xpn_servers() { if command -v srun &> /dev/null then # Create dir - srun -n "${NODE_NUM}" -N "${NODE_NUM}" \ + srun -n "${NODE_NUM}" -N "${NODE_NUM}" \ -w "${HOSTFILE}" \ mkdir -p ${XPN_STORAGE_PATH} if [[ ${SERVER_TYPE} == "sck" ]]; then - srun -n "${NODE_NUM}" -N "${NODE_NUM}"\ + srun -n "${NODE_NUM}" -N "${NODE_NUM}"\ -w "${HOSTFILE}" \ --export=ALL \ "${BASE_DIR_BUILD}"/xpn_server/xpn_server -s ${SERVER_TYPE} -t pool "${ARGS}" & else - srun -n "${NODE_NUM}" -N "${NODE_NUM}" --mpi=none \ + srun -n "${NODE_NUM}" -N "${NODE_NUM}" --mpi=none \ -w "${HOSTFILE}" \ --export=ALL \ "${BASE_DIR_BUILD}"/xpn_server/xpn_server -s ${SERVER_TYPE} -t pool "${ARGS}" & @@ -224,11 +224,11 @@ rebuild_xpn_servers() { if command -v srun &> /dev/null then # Create dir - srun -n "${NODE_NUM}" -N "${NODE_NUM}" \ + srun -n "${NODE_NUM}" -N "${NODE_NUM}" \ -w "${HOSTFILE}" \ mkdir -p ${XPN_STORAGE_PATH} hosts=$(cat ${DEATH_FILE} ${REBUILD_FILE} | sort | paste -sd "," -) - srun -n "${NODE_NUM_SUM}" \ + srun -n "${NODE_NUM_SUM}" \ -w "${hosts}" \ "${BASE_DIR_BUILD}"/utils/xpn_rebuild_active_reader "${XPN_STORAGE_PATH}" "${DEATH_FILE}" "${REBUILD_FILE}" 524288 "${XPN_REPLICATION_LEVEL}" else @@ -254,7 +254,7 @@ preload_xpn() { # 1. Copy if command -v srun &> /dev/null then - srun -n "${NODE_NUM}" -N "${NODE_NUM}" \ + srun -n "${NODE_NUM}" -N "${NODE_NUM}" \ -w "${HOSTFILE}" \ "${BASE_DIR_BUILD}"/utils/xpn_preload "${SOURCE_PATH}" "${XPN_STORAGE_PATH}" 524288 "${XPN_REPLICATION_LEVEL}" else @@ -274,7 +274,7 @@ flush_xpn() { # 1. Copy if command -v srun &> /dev/null then - srun -n "${NODE_NUM}" -N "${NODE_NUM}" \ + srun -n "${NODE_NUM}" -N "${NODE_NUM}" \ -w "${HOSTFILE}" \ "${BASE_DIR_BUILD}"/utils/xpn_flush "${XPN_STORAGE_PATH}" "${DEST_PATH}" 524288 "${XPN_REPLICATION_LEVEL}" else @@ -296,7 +296,7 @@ expand_xpn() { # 1. Copy if command -v srun &> /dev/null then - srun -n "${NODE_NUM}" -N "${NODE_NUM}" \ + srun -n "${NODE_NUM}" -N "${NODE_NUM}" \ -w "${HOSTFILE}" \ "${BASE_DIR_BUILD}"/utils/xpn_expand "${XPN_STORAGE_PATH}" "${NODE_NUM_REST}" else @@ -326,7 +326,7 @@ shrink_xpn() { # 1. Copy if command -v srun &> /dev/null then - srun -n "${NODE_NUM}" -N "${NODE_NUM}" \ + srun -n "${NODE_NUM}" -N "${NODE_NUM}" \ -w "${HOSTFILE}" \ "${BASE_DIR_BUILD}"/utils/xpn_shrink "${XPN_STORAGE_PATH}" "${hostlist}" else @@ -367,7 +367,7 @@ usage_details() { echo "" echo " optional arguments:" echo " -h, --help Shows this help message and exits" - echo " -e, --execute Server type: mpi, sck or tcp." + echo " -S, --server_type Server type: mpi, sck or tcp." echo " -a, --args Add various additional daemon arguments." echo " -f, --foreground Starts the script in the foreground. Daemons are stopped by pressing 'q'." echo " -c, --config Path to configuration file." @@ -390,14 +390,14 @@ usage_details() { get_opts() { # Taken the general idea from https://stackoverflow.com/questions/70951038/how-to-use-getopt-long-option-in-bash-script mkconf_name=$(basename "$0") - mkconf_short_opt=e:r:w:s:t:x:d:k:p:n:a:c:m:l:b:fvh - mkconf_long_opt=execute:,rootdir:,workdir:,source_path:,destination_path:,xpn_storage_path:,numnodes:,args:,config:,deployment_file:,foreground_file,hostfile:,deathfile:,rebuildfile:,host:,replication_level:,verbose,help + mkconf_short_opt=S:r:w:s:t:x:d:k:p:n:a:c:m:l:b:fvh + mkconf_long_opt=server_type:,rootdir:,workdir:,source_path:,destination_path:,xpn_storage_path:,numnodes:,args:,config:,deployment_file:,foreground_file,hostfile:,deathfile:,rebuildfile:,host:,replication_level:,verbose,help TEMP=$(getopt -o $mkconf_short_opt --long $mkconf_long_opt --name "$mkconf_name" -- "$@") eval set -- "${TEMP}" while :; do case "${1}" in - -e | --execute ) SERVER_TYPE=$2; shift 2 ;; + -S | --server_type ) SERVER_TYPE=$2; shift 2 ;; -r | --rootdir ) DIR_ROOT=$2; shift 2 ;; -w | --workdir ) WORKDIR=$2; shift 2 ;; -s | --source_path ) SOURCE_PATH=$2; shift 2 ;; From 677a2faa8ef4f0398bd4806a6876b8c787529630 Mon Sep 17 00:00:00 2001 From: Dariomnz Date: Thu, 23 Jan 2025 14:39:36 +0100 Subject: [PATCH 25/60] Introduce the xpn_controller --- CMakeLists.txt | 1 + libs/lfi | 2 +- scripts/compile/software/xpn.sh | 2 +- scripts/execute/mk_conf.sh | 1 + src/base_cpp/CMakeLists.txt | 2 +- src/base_cpp/args.hpp | 193 +++++++++ src/base_cpp/debug.hpp | 3 + src/base_cpp/socket.cpp | 121 +++++- src/base_cpp/socket.hpp | 34 +- src/base_cpp/subprocess.hpp | 142 +++++++ src/base_cpp/xpn_env.cpp | 27 +- src/base_cpp/xpn_env.hpp | 3 +- src/utils/xpn_server_monitor.cpp | 4 +- src/utils/xpn_server_stats.cpp | 4 +- .../nfi_fabric_server_comm.cpp | 4 +- .../nfi_mpi_server/nfi_mpi_server_comm.cpp | 6 +- .../nfi_sck_server/nfi_sck_server_comm.cpp | 4 +- src/xpn_client/xpn/utils/xpn_conf.cpp | 12 +- src/xpn_client/xpn/xpn_conf.hpp | 14 +- src/xpn_controller/CMakeLists.txt | 19 + src/xpn_controller/xpn_controller.cpp | 60 +++ src/xpn_controller/xpn_controller.hpp | 174 ++++++++ src/xpn_controller/xpn_controller_ops.cpp | 377 ++++++++++++++++++ src/xpn_controller/xpn_controller_recv.cpp | 235 +++++++++++ src/xpn_controller/xpn_controller_send.cpp | 187 +++++++++ src/xpn_server/xpn_server.cpp | 30 +- test/CMakeLists.txt | 1 + test/integrity/xpn_reconnect/CMakeLists.txt | 5 + .../integrity/xpn_reconnect/xpn_reconnect.cpp | 104 +++++ 29 files changed, 1707 insertions(+), 64 deletions(-) create mode 100644 src/base_cpp/args.hpp create mode 100644 src/base_cpp/subprocess.hpp create mode 100644 src/xpn_controller/CMakeLists.txt create mode 100644 src/xpn_controller/xpn_controller.cpp create mode 100644 src/xpn_controller/xpn_controller.hpp create mode 100644 src/xpn_controller/xpn_controller_ops.cpp create mode 100644 src/xpn_controller/xpn_controller_recv.cpp create mode 100644 src/xpn_controller/xpn_controller_send.cpp create mode 100644 test/integrity/xpn_reconnect/CMakeLists.txt create mode 100644 test/integrity/xpn_reconnect/xpn_reconnect.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 907a34d50..620c7b235 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,6 +50,7 @@ add_subdirectory(src/base_cpp) add_subdirectory(src/bypass) add_subdirectory(src/xpn_client) add_subdirectory(src/xpn_server) +add_subdirectory(src/xpn_controller) add_subdirectory(src/utils) diff --git a/libs/lfi b/libs/lfi index e1b4d41ca..9e6bea9c3 160000 --- a/libs/lfi +++ b/libs/lfi @@ -1 +1 @@ -Subproject commit e1b4d41ca106004dae483ca35600fa46e59093ab +Subproject commit 9e6bea9c37a8a1412a013696885ff69f116a2394 diff --git a/scripts/compile/software/xpn.sh b/scripts/compile/software/xpn.sh index 016eb60fd..33b6ae5c7 100755 --- a/scripts/compile/software/xpn.sh +++ b/scripts/compile/software/xpn.sh @@ -1,5 +1,5 @@ #!/bin/bash -#set -x +# set -x set -e # diff --git a/scripts/execute/mk_conf.sh b/scripts/execute/mk_conf.sh index f06ceaba9..1ee9d3442 100755 --- a/scripts/execute/mk_conf.sh +++ b/scripts/execute/mk_conf.sh @@ -123,6 +123,7 @@ mk_conf_file_from_args() { echo "bsize = ${XPN_PARTITION_BSIZE}" >> ${CONFNAME} echo "replication_level = ${XPN_REPLICATION_LEVEL}" >> ${CONFNAME} echo "partition_name = ${XPN_PARTITION_NAME}" >> ${CONFNAME} + echo "controler_url = $(hostname)" >> ${CONFNAME} ITER=1 while read line diff --git a/src/base_cpp/CMakeLists.txt b/src/base_cpp/CMakeLists.txt index f387126ca..09d9351f6 100644 --- a/src/base_cpp/CMakeLists.txt +++ b/src/base_cpp/CMakeLists.txt @@ -8,6 +8,6 @@ file(GLOB XPN_BASE_CPP_SOURCE ) add_library(xpn_base_cpp OBJECT ${XPN_BASE_CPP_SOURCE} ${XPN_BASE_CPP_HEADERS}) -target_include_directories(xpn_base_cpp PRIVATE +target_include_directories(xpn_base_cpp PUBLIC "${PROJECT_SOURCE_DIR}/src" ) \ No newline at end of file diff --git a/src/base_cpp/args.hpp b/src/base_cpp/args.hpp new file mode 100644 index 000000000..ee9c0356d --- /dev/null +++ b/src/base_cpp/args.hpp @@ -0,0 +1,193 @@ + +/* + * Copyright 2020-2024 Felix Garcia Carballeira, Diego Camarmas Alonso, Alejandro Calderon Mateos, Dario Muñoz Muñoz + * + * This file is part of Expand. + * + * Expand is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Expand is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Expand. If not, see . + * + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include "debug.hpp" + +namespace XPN { + +class args { + public: + struct option { + enum class opt_type { value, flag }; + std::string sort_name; + std::string long_name; + std::string help; + opt_type type = opt_type::flag; + }; + + args(int argc, char* argv[], const std::vector