-
Notifications
You must be signed in to change notification settings - Fork 7
Open
Description
I have got the 2 previous step running OK but got stuck on step 3. Do I need specific version of CUDA? I am using openmpi-4.0.7 - any idea what else could I try?
$ make MPI=1 MPI_HOME=$HOME/mpi CUDA_HOME=/usr/local/cuda-12.4 NCCL_HOME=$HOME/msccl/executor/msccl-executor-nccl/build/ -j
make -C src build BUILDDIR=/gpfs/users/aslom/github/azure-msccl/msccl/tests/msccl-tests-nccl/build
make[1]: Entering directory '/gpfs/users/aslom/github/azure-msccl/msccl/tests/msccl-tests-nccl/src'
Compiling /gpfs/users/aslom/github/azure-msccl/msccl/tests/msccl-tests-nccl/build/verifiable/verifiable.o
Compiling all_reduce.cu > /gpfs/users/aslom/github/azure-msccl/msccl/tests/msccl-tests-nccl/build/all_reduce.o
Compiling common.cu > /gpfs/users/aslom/github/azure-msccl/msccl/tests/msccl-tests-nccl/build/common.o
Compiling all_gather.cu > /gpfs/users/aslom/github/azure-msccl/msccl/tests/msccl-tests-nccl/build/all_gather.o
Compiling broadcast.cu > /gpfs/users/aslom/github/azure-msccl/msccl/tests/msccl-tests-nccl/build/broadcast.o
Compiling reduce_scatter.cu > /gpfs/users/aslom/github/azure-msccl/msccl/tests/msccl-tests-nccl/build/reduce_scatter.o
Compiling reduce.cu > /gpfs/users/aslom/github/azure-msccl/msccl/tests/msccl-tests-nccl/build/reduce.o
Compiling alltoall.cu > /gpfs/users/aslom/github/azure-msccl/msccl/tests/msccl-tests-nccl/build/alltoall.o
Compiling scatter.cu > /gpfs/users/aslom/github/azure-msccl/msccl/tests/msccl-tests-nccl/build/scatter.o
Compiling gather.cu > /gpfs/users/aslom/github/azure-msccl/msccl/tests/msccl-tests-nccl/build/gather.o
Compiling sendrecv.cu > /gpfs/users/aslom/github/azure-msccl/msccl/tests/msccl-tests-nccl/build/sendrecv.o
Compiling hypercube.cu > /gpfs/users/aslom/github/azure-msccl/msccl/tests/msccl-tests-nccl/build/hypercube.o
alltoall.cu(60): error: identifier "ncclAllToAll" is undefined
do { ncclResult_t res = ncclAllToAll(sendbuff, recvbuff, count, type, comm, stream); if (res != ncclSuccess) { char hostname[1024]; getHostName(hostname, 1024); printf("%s: Test NCCL failure %s:%d " "'%s / %s'\n", hostname,"alltoall.cu",60, ncclGetErrorString(res), ncclGetLastError(
^
1 error detected in the compilation of "alltoall.cu".
make[1]: *** [Makefile:94: /gpfs/users/aslom/github/azure-msccl/msccl/tests/msccl-tests-nccl/build/alltoall.o] Error 2
make[1]: *** Waiting for unfinished jobs....
../verifiable/verifiable.cu(969): error: identifier "ncclFp8E4M3" is undefined
case ncclFp8E4M3: prepareInput2<<<block_n, 512, 0, stream>>>((__nv_fp8_e4m3*)elts, elt_n, op, rank_n, rank_me, seed, elt_ix0); break;
^
../verifiable/verifiable.cu(970): error: identifier "ncclFp8E5M2" is undefined
case ncclFp8E5M2: prepareInput2<<<block_n, 512, 0, stream>>>((__nv_fp8_e5m2*)elts, elt_n, op, rank_n, rank_me, seed, elt_ix0); break;
^
../verifiable/verifiable.cu(1049): error: identifier "ncclFp8E4M3" is undefined
case ncclFp8E4M3: prepareExpected2<<<block_n, 512, 0, stream>>>((__nv_fp8_e4m3*)elts, elt_n, op, rank_n, seed, elt_ix0); break;
^
../verifiable/verifiable.cu(1050): error: identifier "ncclFp8E5M2" is undefined
case ncclFp8E5M2: prepareExpected2<<<block_n, 512, 0, stream>>>((__nv_fp8_e5m2*)elts, elt_n, op, rank_n, seed, elt_ix0); break;
^
../verifiable/verifiable.cu(1123): error: identifier "ncclFp8E4M3" is undefined
case ncclFp8E4M3:
^
../verifiable/verifiable.cu(1124): error: identifier "ncclFp8E5M2" is undefined
case ncclFp8E5M2:
^
../verifiable/verifiable.cu(1252): error: identifier "ncclFp8E4M3" is undefined
floating |= elt_ty == ncclFp8E4M3;
^
../verifiable/verifiable.cu(1253): error: identifier "ncclFp8E5M2" is undefined
floating |= elt_ty == ncclFp8E5M2;
^
8 errors detected in the compilation of "../verifiable/verifiable.cu".
make[1]: *** [../verifiable/verifiable.mk:11: /gpfs/users/aslom/github/azure-msccl/msccl/tests/msccl-tests-nccl/build/verifiable/verifiable.o] Error 2
make[1]: Leaving directory '/gpfs/users/aslom/github/azure-msccl/msccl/tests/msccl-tests-nccl/src'
make: *** [Makefile:20: src.build] Error 2
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels