diff --git a/CMakeLists.txt b/CMakeLists.txt index d0d55de2..e4ca2150 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -269,9 +269,9 @@ add_custom_command( # add_subdirectory(bridge) -add_subdirectory(examples) +# add_subdirectory(examples) add_subdirectory("test") -add_subdirectory(demos) +# add_subdirectory(demos) diff --git a/bridge/include/bridge.h b/bridge/include/bridge.h index d747c1e2..f96f36a4 100644 --- a/bridge/include/bridge.h +++ b/bridge/include/bridge.h @@ -72,6 +72,14 @@ bridge_tensor_t conv2d( int padding ); +bridge_tensor_t nll_loss( + bridge_tensor_t input, + bridge_tensor_t target, + bridge_tensor_t weight, + int ignoreIndex, + int reduction +); + bridge_tensor_t matmul(bridge_tensor_t a, bridge_tensor_t b); bridge_tensor_t max_pool2d( @@ -104,8 +112,8 @@ proto_bridge_simple(softsign); proto_bridge_simple(tanhshrink); -void split_loop(int64_t idx, int64_t n); -void split_loop_filler(int64_t n,int64_t* ret); +// void split_loop(int64_t idx, int64_t n); +// void split_loop_filler(int64_t n,int64_t* ret); void show_webcam(void); diff --git a/bridge/lib/bridge.cpp b/bridge/lib/bridge.cpp index b34db755..960fdb28 100644 --- a/bridge/lib/bridge.cpp +++ b/bridge/lib/bridge.cpp @@ -16,7 +16,7 @@ #include #include -#include +// #include #define def_bridge_simple(Name) \ @@ -279,6 +279,43 @@ extern "C" bridge_tensor_t add_two_arrays(bridge_tensor_t a, bridge_tensor_t b) return torch_to_bridge(output); } +extern "C" bridge_tensor_t nll_loss( + bridge_tensor_t input, + bridge_tensor_t target, + bridge_tensor_t weight, + int ignoreIndex, + int reduction +) { + // Convert bridge_tensor_t to torch::Tensor + at::Tensor t_input = bridge_to_torch(input).to(torch::kFloat32); + at::Tensor t_target = bridge_to_torch(target).to(torch::kLong); + at::Tensor t_weight = bridge_to_torch(weight).to(torch::kFloat32); + + // Map reduction int to torch::Reduction enum + torch::nn::functional::NLLLossFuncOptions::reduction_t reduction_enum; + switch (reduction) { + case 0: reduction_enum = torch::kNone; break; + case 1: reduction_enum = torch::kMean; break; + case 2: reduction_enum = torch::kSum; break; + default: reduction_enum = torch::kMean; break; + } + + at::Tensor output = torch::nn::functional::nll_loss( + t_input, + t_target, + torch::nn::functional::NLLLossFuncOptions() + .weight(t_weight) + .ignore_index(ignoreIndex) + .reduction(reduction_enum) + ); + + if (output.dim() == 0) { + output = output.unsqueeze(0); // Makes scalar result 1D with 1 element + } + + return torch_to_bridge(output); +} + // extern "C" bridge_tensor_t capture_webcam_bridge(int cam_index) { // torch::Tensor image = capture_webcam(cam_index); // return torch_to_bridge(image); @@ -388,53 +425,53 @@ extern "C" float sumArray(float* arr, int* sizes, int dim) { } -extern "C" void split_loop(int64_t idx, int64_t n) { - for (int i = 0; i < n; ++i) { - std::cout << "idx(" << idx << "," << n << ") = " << i << std::endl; - std::cout.flush(); - } -} +// extern "C" void split_loop(int64_t idx, int64_t n) { +// for (int i = 0; i < n; ++i) { +// std::cout << "idx(" << idx << "," << n << ") = " << i << std::endl; +// std::cout.flush(); +// } +// } -extern "C" void split_loop_filler(int64_t n,int64_t* ret) { - for (int i = 0; i < n; ++i) { - *ret = i; - std::this_thread::sleep_for(std::chrono::seconds(0)); - } -} +// extern "C" void split_loop_filler(int64_t n,int64_t* ret) { +// for (int i = 0; i < n; ++i) { +// *ret = i; +// std::this_thread::sleep_for(std::chrono::seconds(0)); +// } +// } -cv::VideoCapture open_camera(int cam_index) { - cv::VideoCapture cap(cam_index, cv::CAP_AVFOUNDATION); - if (!cap.isOpened()) { - std::cerr << "Could not open camera index " << cam_index << std::endl; - return cv::VideoCapture(); - } - cap.set(cv::CAP_PROP_BUFFERSIZE, 1); // minimal internal buffering - cap.set(cv::CAP_PROP_FPS, 60); // request higher FPS if possible - return cap; -} +// cv::VideoCapture open_camera(int cam_index) { +// cv::VideoCapture cap(cam_index, cv::CAP_AVFOUNDATION); +// if (!cap.isOpened()) { +// std::cerr << "Could not open camera index " << cam_index << std::endl; +// return cv::VideoCapture(); +// } +// cap.set(cv::CAP_PROP_BUFFERSIZE, 1); // minimal internal buffering +// cap.set(cv::CAP_PROP_FPS, 60); // request higher FPS if possible +// return cap; +// } extern "C" void show_webcam(void) { - cv::VideoCapture cap; - cap = open_camera(0); + // cv::VideoCapture cap; + // cap = open_camera(0); - cv::Mat frame_bgr; + // cv::Mat frame_bgr; - while (true) { - if (!cap.read(frame_bgr) || frame_bgr.empty()) { - std::cerr << "[WARN] Empty frame, exiting" << std::endl; - break; - } + // while (true) { + // if (!cap.read(frame_bgr) || frame_bgr.empty()) { + // std::cerr << "[WARN] Empty frame, exiting" << std::endl; + // break; + // } - cv::imshow("webcam", frame_bgr); + // cv::imshow("webcam", frame_bgr); - if (cv::waitKey(1) == 27) { // ESC key - break; - } - } + // if (cv::waitKey(1) == 27) { // ESC key + // break; + // } + // } - cap.release(); - cv::destroyAllWindows(); + // cap.release(); + // cv::destroyAllWindows(); } \ No newline at end of file diff --git a/examples/split_loop/CMakeLists.txt b/examples/split_loop/CMakeLists.txt index 0cca4d92..2d68c3a5 100644 --- a/examples/split_loop/CMakeLists.txt +++ b/examples/split_loop/CMakeLists.txt @@ -1,55 +1,55 @@ -find_package(OpenCV 4 REQUIRED) +# find_package(OpenCV 4 REQUIRED) -find_library(ACCELERATE Accelerate REQUIRED) -find_library(METAL Metal REQUIRED) -find_library(FOUNDATION Foundation REQUIRED) +# find_library(ACCELERATE Accelerate REQUIRED) +# find_library(METAL Metal REQUIRED) +# find_library(FOUNDATION Foundation REQUIRED) -add_library(bridge_cv OBJECT ${BRIDGE_DIR}/include/bridge.h ${BRIDGE_DIR}/lib/bridge.cpp) +# add_library(bridge_cv OBJECT ${BRIDGE_DIR}/include/bridge.h ${BRIDGE_DIR}/lib/bridge.cpp) -target_link_directories(bridge_cv PRIVATE ${LIBTORCH_DIR}/lib) +# target_link_directories(bridge_cv PRIVATE ${LIBTORCH_DIR}/lib) -target_link_libraries( - bridge_cv - PRIVATE - -ltorch - -ltorch_cpu - -lc10 - -ltorch_global_deps - ${OpenCV_LIBS} - # ${TORCH_LIBRARIES} - ${ACCELERATE} - ${METAL} - ${FOUNDATION} -) +# target_link_libraries( +# bridge_cv +# PRIVATE +# -ltorch +# -ltorch_cpu +# -lc10 +# -ltorch_global_deps +# ${OpenCV_LIBS} +# # ${TORCH_LIBRARIES} +# ${ACCELERATE} +# ${METAL} +# ${FOUNDATION} +# ) -target_include_directories( - bridge_cv - PRIVATE - ${BRIDGE_DIR}/include - ${LIBTORCH_DIR}/include - ${LIBTORCH_DIR}/include/torch/csrc/api/include - # ${BRIDGE_DIR}/util -) +# target_include_directories( +# bridge_cv +# PRIVATE +# ${BRIDGE_DIR}/include +# ${LIBTORCH_DIR}/include +# ${LIBTORCH_DIR}/include/torch/csrc/api/include +# # ${BRIDGE_DIR}/util +# ) -# if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") -# target_compile_options(bridge_cv PRIVATE -Ofast -flto -ffast-math) -# target_link_options(bridge_cv PRIVATE -flto) -# endif() +# # if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") +# # target_compile_options(bridge_cv PRIVATE -Ofast -flto -ffast-math) +# # target_link_options(bridge_cv PRIVATE -flto) +# # endif() -set(BRIDGE_CV_OBJECT_FILES $) +# set(BRIDGE_CV_OBJECT_FILES $) -set(CHAI_CV_LINKER_ARGS - -M ${PROJECT_ROOT_DIR}/lib - ${BRIDGE_DIR}/include/bridge.h - ${BRIDGE_CV_OBJECT_FILES} - -L ${LIBTORCH_DIR}/lib - ${LIBTORCH_LIBS_LINKER_ARGS} - --ldflags "-Wl,-rpath,${LIBTORCH_DIR}/lib" -) +# set(CHAI_CV_LINKER_ARGS +# -M ${PROJECT_ROOT_DIR}/lib +# ${BRIDGE_DIR}/include/bridge.h +# ${BRIDGE_CV_OBJECT_FILES} +# -L ${LIBTORCH_DIR}/lib +# ${LIBTORCH_LIBS_LINKER_ARGS} +# --ldflags "-Wl,-rpath,${LIBTORCH_DIR}/lib" +# ) @@ -58,29 +58,29 @@ set(CHAI_CV_LINKER_ARGS -add_executable(SplitLoop - ${CMAKE_CURRENT_SOURCE_DIR}/split_loop.chpl - ${CHAI_LIB_FILES} -) +# add_executable(SplitLoop +# ${CMAKE_CURRENT_SOURCE_DIR}/split_loop.chpl +# ${CHAI_LIB_FILES} +# ) -add_dependencies(SplitLoop bridge_cv) -# add_dependencies(SplitLoop ChAI) -target_link_options(SplitLoop - PRIVATE - ${CHAI_CV_LINKER_ARGS} -) +# add_dependencies(SplitLoop bridge_cv) +# # add_dependencies(SplitLoop ChAI) +# target_link_options(SplitLoop +# PRIVATE +# ${CHAI_CV_LINKER_ARGS} +# ) -cmake_print_variables(CHAI_CV_LINKER_ARGS) -cmake_print_variables(OpenCV_LIBS) -cmake_print_variables(ACCELERATE) -cmake_print_variables(METAL) -cmake_print_variables(FOUNDATION) +# cmake_print_variables(CHAI_CV_LINKER_ARGS) +# cmake_print_variables(OpenCV_LIBS) +# cmake_print_variables(ACCELERATE) +# cmake_print_variables(METAL) +# cmake_print_variables(FOUNDATION) -set_target_properties(SplitLoop PROPERTIES - RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} -) +# set_target_properties(SplitLoop PROPERTIES +# RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} +# ) -# if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") -# target_compile_options(SplitLoop PRIVATE -Ofast -flto -ffast-math) -# target_link_options(SplitLoop PRIVATE -flto) -# endif() \ No newline at end of file +# # if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") +# # target_compile_options(SplitLoop PRIVATE -Ofast -flto -ffast-math) +# # target_link_options(SplitLoop PRIVATE -flto) +# # endif() \ No newline at end of file diff --git a/lib/Autograd.chpl b/lib/Autograd.chpl index d00e91ad..fabf741a 100644 --- a/lib/Autograd.chpl +++ b/lib/Autograd.chpl @@ -1084,13 +1084,12 @@ record nllLossOp : serializable { var target: shared BaseTensorResource(?); var weight: shared BaseTensorResource(?); var ignoreIndex: int; - var red: bool; var reduction: string; proc children do return (input,target,weight); proc forward() do - return ndarray.nllLoss(input.array,target.array,weight.array,ignoreIndex,red,reduction); + return ndarray.nllLoss(input.array,target.array,weight.array,ignoreIndex,reduction); proc spec : GradOpSpec do return new dict(("operation", "nllLoss")); } diff --git a/lib/Bridge.chpl b/lib/Bridge.chpl index 06e90e52..08a6ec19 100644 --- a/lib/Bridge.chpl +++ b/lib/Bridge.chpl @@ -87,11 +87,18 @@ module Bridge { in a: bridge_tensor_t, in b: bridge_tensor_t): bridge_tensor_t; - extern "split_loop" proc splitLoop(idx: int(64), n: int(64)): void; + extern "nll_loss" proc nllLoss( + in input: bridge_tensor_t, + in target: bridge_tensor_t, + in weight: bridge_tensor_t, + in ignoreIndex: int, + in reduction: int): bridge_tensor_t; + + // extern "split_loop" proc splitLoop(idx: int(64), n: int(64)): void; - extern "split_loop_filler" proc splitLoopFiller(n: int(64),ret: c_ptr(int(64))): void; + // extern "split_loop_filler" proc splitLoopFiller(n: int(64),ret: c_ptr(int(64))): void; - extern "show_webcam" proc showWebcam(): void; + // extern "show_webcam" proc showWebcam(): void; // extern "capture_webcam_bridge" proc captureWebcam( // in cam_index: int(32)): bridge_tensor_t; diff --git a/lib/DynamicTensor.chpl b/lib/DynamicTensor.chpl index 892bd9ae..1fe0e9a0 100644 --- a/lib/DynamicTensor.chpl +++ b/lib/DynamicTensor.chpl @@ -733,25 +733,29 @@ proc type dynamicTensor.nllLoss( target: dynamicTensor(eltType), weight: dynamicTensor(eltType), ignoreIndex: int = -1, - red: bool = true, reduction: string = "mean" ) { - for param rankIn in 2..2 { - if input.checkRank(rankIn) { - for param rank in 1..1 { - if target.checkRank(rankIn) && weight.checkRank(rank) { - return staticTensor.nllLoss(input.forceRank(rankIn),target.forceRank(rank),weight.forceRank(rank),ignoreIndex,red,reduction); - } - } + param inRank: int = 2; + param targetRank: int = 1; + param weightRank: int = 1; + + if input.checkRank(inRank) { + if target.checkRank(targetRank) && weight.checkRank(weightRank) { + var stInput: staticTensor(inRank,eltType) = input.forceRank(inRank); + var stTarget: staticTensor(targetRank,eltType) = target.forceRank(targetRank); + var stWeights: staticTensor(weightRank,eltType) = weight.forceRank(weightRank); + return staticTensor.nllLoss(stInput,stTarget,stWeights,ignoreIndex,reduction); } } + + halt("Could not determine rank in dynamicTensor.nllLoss. "); + return staticTensor.zeros(eltType, 1); } proc type dynamicTensor.nllLoss( input: dynamicTensor(?eltType), target: dynamicTensor(eltType), ignoreIndex: int = -1, - red: bool = true, reduction: string = "mean" ) { param inRank: int = 2; @@ -762,7 +766,7 @@ proc type dynamicTensor.nllLoss( var stInput: staticTensor(inRank,eltType) = input.forceRank(inRank); var stTarget: staticTensor(targetRank,eltType) = target.forceRank(targetRank); var weights: staticTensor(1,eltType) = staticTensor.ones(eltType,3); - return staticTensor.nllLoss(stInput,stTarget,weights,ignoreIndex,red,reduction); + return staticTensor.nllLoss(stInput,stTarget,weights,ignoreIndex,reduction); } } diff --git a/lib/NDArray.chpl b/lib/NDArray.chpl index fb417c4c..014b67c2 100644 --- a/lib/NDArray.chpl +++ b/lib/NDArray.chpl @@ -1579,6 +1579,28 @@ proc type ndarray.conv2d( ) : ndarray(inputRank,eltType); } +proc type ndarray.nllLoss( + input: ndarray(2,?eltType), + target: ndarray(1,eltType), + weight: ndarray(1, eltType), + ignoreIndex: int = -1, + reduction: string = "mean" +): ndarray(1,real(32)) { + var reduction_int: int = 1; + if reduction == "sum" then reduction_int = 2; + if reduction == "none" then reduction_int = 0; + + var result = Bridge.nllLoss( + input: Bridge.tensorHandle(eltType), + target: Bridge.tensorHandle(eltType), + weight: Bridge.tensorHandle(eltType), + ignoreIndex, + reduction_int + ) : ndarray(1,real(32)); + + return result; +} + proc type ndarray.convolve(features: ndarray(3,?eltType),kernel: ndarray(4,eltType), stride: int) do return ndarray.convolve(features,kernel,stride,padding = (0,0)); @@ -2135,43 +2157,6 @@ inline proc type ndarray.fromRanges(type eltType = real, rngs: range...?rank): n return a; } -proc type ndarray.nllLoss( - input: ndarray(2,?eltType), - target: ndarray(1,eltType), - weight: ndarray(1, eltType), - ignoreIndex: int = -1, - red: bool = true, - reduction: string = "mean" -): ndarray(1,eltType) { - const (N,C) = input.shape; - assert(target.shape[0] == N, "Target shape must match batch size."); - assert(weight.shape[0] == C, "Weights shape must match number of classes."); - - const dom = util.domainFromShape(N); - var loss = new ndarray(dom, eltType); - ref x = input.data; - ref y = target.data; - ref w = weight.data; - ref lossD = loss.data; - var wynSum: real = 0.0; - - forall n in 0..