diff --git a/CMakeLists.txt b/CMakeLists.txt index 37f937fe489..ef599b68922 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,16 +16,13 @@ include(cmake/ConfigGen.cmake) # ---[ Options caffe_option(CPU_ONLY "Build Caffe without CUDA support" OFF) # TODO: rename to USE_CUDA -caffe_option(USE_CUDNN "Build Caffe with cuDNN library support" ON IF NOT CPU_ONLY) +caffe_option(USE_CUDNN "Build Caffe with cuDNN libary support" ON IF NOT CPU_ONLY) caffe_option(BUILD_SHARED_LIBS "Build shared libraries" ON) caffe_option(BUILD_python "Build Python wrapper" ON) -set(python_version "2" CACHE STRING "Specify which Python version to use") +set(python_version "2" CACHE STRING "Specify which python version to use") caffe_option(BUILD_matlab "Build Matlab wrapper" OFF IF UNIX OR APPLE) caffe_option(BUILD_docs "Build documentation" ON IF UNIX OR APPLE) -caffe_option(BUILD_python_layer "Build the Caffe Python layer" ON) -caffe_option(USE_LMDB "Build with lmdb" ON) -caffe_option(USE_LEVELDB "Build with levelDB" ON) -caffe_option(USE_OPENCV "Build with OpenCV support" ON) +caffe_option(BUILD_python_layer "Build the Caffe python layer" ON) # ---[ Dependencies include(cmake/Dependencies.cmake) diff --git a/Makefile b/Makefile index 5fb6394e947..287fa4ea52f 100644 --- a/Makefile +++ b/Makefile @@ -169,23 +169,9 @@ ifneq ($(CPU_ONLY), 1) LIBRARY_DIRS += $(CUDA_LIB_DIR) LIBRARIES := cudart cublas curand endif - -LIBRARIES += glog gflags protobuf boost_system m hdf5_hl hdf5 - -# handle IO dependencies -USE_LEVELDB ?= 1 -USE_LMDB ?= 1 -USE_OPENCV ?= 1 - -ifeq ($(USE_LEVELDB), 1) - LIBRARIES += leveldb snappy -endif -ifeq ($(USE_LMDB), 1) - LIBRARIES += lmdb -endif -ifeq ($(USE_OPENCV), 1) - LIBRARIES += opencv_core opencv_highgui opencv_imgproc -endif +LIBRARIES += glog gflags protobuf leveldb snappy \ + lmdb boost_system hdf5_hl hdf5 m \ + opencv_core opencv_highgui opencv_imgproc #opencv_imgcodecs PYTHON_LIBRARIES := boost_python python2.7 WARNINGS := -Wall -Wno-sign-compare @@ -304,17 +290,6 @@ ifeq ($(USE_CUDNN), 1) COMMON_FLAGS += -DUSE_CUDNN endif -# configure IO libraries -ifeq ($(USE_OPENCV), 1) - COMMON_FLAGS += -DUSE_OPENCV -endif -ifeq ($(USE_LEVELDB), 1) - COMMON_FLAGS += -DUSE_LEVELDB -endif -ifeq ($(USE_LMDB), 1) - COMMON_FLAGS += -DUSE_LMDB -endif - # CPU-only configuration ifeq ($(CPU_ONLY), 1) OBJS := $(PROTO_OBJS) $(CXX_OBJS) @@ -354,9 +329,8 @@ else # OS X packages atlas as the vecLib framework LIBRARIES += cblas # 10.10 has accelerate while 10.9 has veclib - XCODE_CLT_VER := $(shell pkgutil --pkg-info=com.apple.pkg.CLTools_Executables | grep 'version' | sed 's/[^0-9]*\([0-9]\).*/\1/') - XCODE_CLT_GEQ_6 := $(shell [ $(XCODE_CLT_VER) -gt 5 ] && echo 1) - ifeq ($(XCODE_CLT_GEQ_6), 1) + XCODE_CLT_VER := $(shell pkgutil --pkg-info=com.apple.pkg.CLTools_Executables | grep -o 'version: 6') + ifneq (,$(findstring version: 6,$(XCODE_CLT_VER))) BLAS_INCLUDE ?= /System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/ LDFLAGS += -framework Accelerate else @@ -498,7 +472,7 @@ runtest: $(TEST_ALL_BIN) pytest: py cd python; python -m unittest discover -s caffe/test - + mattest: mat cd matlab; $(MATLAB_DIR)/bin/matlab -nodisplay -r 'caffe.run_tests(), exit()' diff --git a/Makefile.config.example b/Makefile.config.example index a20bad2f5ce..a873502559f 100644 --- a/Makefile.config.example +++ b/Makefile.config.example @@ -7,11 +7,6 @@ # CPU-only switch (uncomment to build without GPU support). # CPU_ONLY := 1 -# uncomment to disable IO dependencies and corresponding data layers -# USE_LEVELDB := 0 -# USE_LMDB := 0 -# USE_OPENCV := 0 - # To customize your choice of compiler, uncomment and set the following. # N.B. the default for Linux is g++ and the default for OSX is clang++ # CUSTOM_CXX := g++ diff --git a/cmake/ConfigGen.cmake b/cmake/ConfigGen.cmake index 8b259965359..566d6ca0aa7 100644 --- a/cmake/ConfigGen.cmake +++ b/cmake/ConfigGen.cmake @@ -56,18 +56,6 @@ function(caffe_generate_export_configs) list(APPEND Caffe_DEFINITIONS -DCPU_ONLY) endif() - if(USE_OPENCV) - list(APPEND Caffe_DEFINITIONS -DUSE_OPENCV) - endif() - - if(USE_LMDB) - list(APPEND Caffe_DEFINITIONS -DUSE_LMDB) - endif() - - if(USE_LEVELDB) - list(APPEND Caffe_DEFINITIONS -DUSE_LEVELDB) - endif() - if(NOT HAVE_CUDNN) set(HAVE_CUDNN FALSE) else() diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index d68d7bfba66..7c86dd55a30 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -29,27 +29,19 @@ include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR}) list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES}) # ---[ LMDB -if(USE_LMDB) - find_package(LMDB REQUIRED) - include_directories(SYSTEM ${LMDB_INCLUDE_DIR}) - list(APPEND Caffe_LINKER_LIBS ${LMDB_LIBRARIES}) - add_definitions(-DUSE_LMDB) -endif() +find_package(LMDB REQUIRED) +include_directories(SYSTEM ${LMDB_INCLUDE_DIR}) +list(APPEND Caffe_LINKER_LIBS ${LMDB_LIBRARIES}) # ---[ LevelDB -if(USE_LEVELDB) - find_package(LevelDB REQUIRED) - include_directories(SYSTEM ${LevelDB_INCLUDE}) - list(APPEND Caffe_LINKER_LIBS ${LevelDB_LIBRARIES}) - add_definitions(-DUSE_LEVELDB) -endif() +find_package(LevelDB REQUIRED) +include_directories(SYSTEM ${LevelDB_INCLUDE}) +list(APPEND Caffe_LINKER_LIBS ${LevelDB_LIBRARIES}) # ---[ Snappy -if(USE_LEVELDB) - find_package(Snappy REQUIRED) - include_directories(SYSTEM ${Snappy_INCLUDE_DIR}) - list(APPEND Caffe_LINKER_LIBS ${Snappy_LIBRARIES}) -endif() +find_package(Snappy REQUIRED) +include_directories(SYSTEM ${Snappy_INCLUDE_DIR}) +list(APPEND Caffe_LINKER_LIBS ${Snappy_LIBRARIES}) # ---[ CUDA include(cmake/Cuda.cmake) @@ -65,16 +57,13 @@ if(NOT HAVE_CUDA) endif() # ---[ OpenCV -if(USE_OPENCV) - find_package(OpenCV QUIET COMPONENTS core highgui imgproc imgcodecs) - if(NOT OpenCV_FOUND) # if not OpenCV 3.x, then imgcodecs are not found - find_package(OpenCV REQUIRED COMPONENTS core highgui imgproc) - endif() - include_directories(SYSTEM ${OpenCV_INCLUDE_DIRS}) - list(APPEND Caffe_LINKER_LIBS ${OpenCV_LIBS}) - message(STATUS "OpenCV found (${OpenCV_CONFIG_PATH})") - add_definitions(-DUSE_OPENCV) +find_package(OpenCV QUIET COMPONENTS core highgui imgproc imgcodecs) +if(NOT OpenCV_FOUND) # if not OpenCV 3.x, then imgcodecs are not found + find_package(OpenCV REQUIRED COMPONENTS core highgui imgproc) endif() +include_directories(SYSTEM ${OpenCV_INCLUDE_DIRS}) +list(APPEND Caffe_LINKER_LIBS ${OpenCV_LIBS}) +message(STATUS "OpenCV found (${OpenCV_CONFIG_PATH})") # ---[ BLAS if(NOT APPLE) diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake index 3d12e81a130..e094ac0040e 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake @@ -114,9 +114,6 @@ function(caffe_print_configuration_summary) caffe_status(" BUILD_matlab : ${BUILD_matlab}") caffe_status(" BUILD_docs : ${BUILD_docs}") caffe_status(" CPU_ONLY : ${CPU_ONLY}") - caffe_status(" USE_LMDB : ${USE_LMDB}") - caffe_status(" USE_LEVELDB : ${USE_LEVELDB}") - caffe_status(" USE_OPENCV : ${USE_OPENCV}") caffe_status("") caffe_status("Dependencies:") caffe_status(" BLAS : " APPLE THEN "Yes (vecLib)" ELSE "Yes (${BLAS})") @@ -124,16 +121,10 @@ function(caffe_print_configuration_summary) caffe_status(" glog : Yes") caffe_status(" gflags : Yes") caffe_status(" protobuf : " PROTOBUF_FOUND THEN "Yes (ver. ${PROTOBUF_VERSION})" ELSE "No" ) - if(USE_LMDB) - caffe_status(" lmdb : " LMDB_FOUND THEN "Yes (ver. ${LMDB_VERSION})" ELSE "No") - endif() - if(USE_LEVELDB) - caffe_status(" LevelDB : " LEVELDB_FOUND THEN "Yes (ver. ${LEVELDB_VERSION})" ELSE "No") - caffe_status(" Snappy : " SNAPPY_FOUND THEN "Yes (ver. ${Snappy_VERSION})" ELSE "No" ) - endif() - if(USE_OPENCV) - caffe_status(" OpenCV : Yes (ver. ${OpenCV_VERSION})") - endif() + caffe_status(" lmdb : " LMDB_FOUND THEN "Yes (ver. ${LMDB_VERSION})" ELSE "No") + caffe_status(" Snappy : " SNAPPY_FOUND THEN "Yes (ver. ${Snappy_VERSION})" ELSE "No" ) + caffe_status(" LevelDB : " LEVELDB_FOUND THEN "Yes (ver. ${LEVELDB_VERSION})" ELSE "No") + caffe_status(" OpenCV : Yes (ver. ${OpenCV_VERSION})") caffe_status(" CUDA : " HAVE_CUDA THEN "Yes (ver. ${CUDA_VERSION})" ELSE "No" ) caffe_status("") if(HAVE_CUDA) @@ -174,3 +165,4 @@ function(caffe_print_configuration_summary) caffe_status(" Install path : ${CMAKE_INSTALL_PREFIX}") caffe_status("") endfunction() + diff --git a/cmake/Templates/CaffeConfig.cmake.in b/cmake/Templates/CaffeConfig.cmake.in index 73f57ac2d74..8f23742e52e 100644 --- a/cmake/Templates/CaffeConfig.cmake.in +++ b/cmake/Templates/CaffeConfig.cmake.in @@ -17,24 +17,22 @@ # Caffe_HAVE_CUDNN - signals about cuDNN support -# OpenCV dependency (optional) +# OpenCV dependency -if(@USE_OPENCV@) - if(NOT OpenCV_FOUND) - set(Caffe_OpenCV_CONFIG_PATH "@OpenCV_CONFIG_PATH@") - if(Caffe_OpenCV_CONFIG_PATH) - get_filename_component(Caffe_OpenCV_CONFIG_PATH ${Caffe_OpenCV_CONFIG_PATH} ABSOLUTE) +if(NOT OpenCV_FOUND) + set(Caffe_OpenCV_CONFIG_PATH "@OpenCV_CONFIG_PATH@") + if(Caffe_OpenCV_CONFIG_PATH) + get_filename_component(Caffe_OpenCV_CONFIG_PATH ${Caffe_OpenCV_CONFIG_PATH} ABSOLUTE) - if(EXISTS ${Caffe_OpenCV_CONFIG_PATH} AND NOT TARGET opencv_core) - message(STATUS "Caffe: using OpenCV config from ${Caffe_OpenCV_CONFIG_PATH}") - include(${Caffe_OpenCV_CONFIG_PATH}/OpenCVModules.cmake) - endif() - - else() - find_package(OpenCV REQUIRED) + if(EXISTS ${Caffe_OpenCV_CONFIG_PATH} AND NOT TARGET opencv_core) + message(STATUS "Caffe: using OpenCV config from ${Caffe_OpenCV_CONFIG_PATH}") + include(${Caffe_OpenCV_CONFIG_PATH}/OpenCVModules.cmake) endif() - unset(Caffe_OpenCV_CONFIG_PATH) + + else() + find_package(OpenCV REQUIRED) endif() + unset(Caffe_OpenCV_CONFIG_PATH) endif() # Compute paths diff --git a/cmake/Templates/caffe_config.h.in b/cmake/Templates/caffe_config.h.in index 9302022d7da..6039e8f6b21 100644 --- a/cmake/Templates/caffe_config.h.in +++ b/cmake/Templates/caffe_config.h.in @@ -30,8 +30,3 @@ /* Matlab */ #cmakedefine HAVE_MATLAB - -/* IO libraries */ -#cmakedefine USE_OPENCV -#cmakedefine USE_LMDB -#cmakedefine USE_LEVELDB diff --git a/docs/installation.md b/docs/installation.md index 89a8c71c71a..d535c6d093d 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -17,19 +17,16 @@ When updating Caffe, it's best to `make clean` before re-compiling. ## Prerequisites -Caffe has several dependencies: +Caffe has several dependencies. * [CUDA](https://developer.nvidia.com/cuda-zone) is required for GPU mode. * library version 7.0 and the latest driver version are recommended, but 6.* is fine too * 5.5, and 5.0 are compatible but considered legacy * [BLAS](http://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) via ATLAS, MKL, or OpenBLAS. * [Boost](http://www.boost.org/) >= 1.55 -* `protobuf`, `glog`, `gflags`, `hdf5` - -Optional dependencies: - * [OpenCV](http://opencv.org/) >= 2.4 including 3.0 -* IO libraries: `lmdb`, `leveldb` (note: leveldb requires `snappy`) +* `protobuf`, `glog`, `gflags` +* IO libraries `hdf5`, `leveldb`, `snappy`, `lmdb` Pycaffe and Matcaffe interfaces have their own natural needs. diff --git a/docs/multigpu.md b/docs/multigpu.md deleted file mode 100644 index 01cfb8938b5..00000000000 --- a/docs/multigpu.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: Multi-GPU Usage, Hardware Configuration Assumptions, and Performance ---- - -# Multi-GPU Usage - -Currently Multi-GPU is only supported via the C/C++ paths and only for training. - -The GPUs to be used for training can be set with the "-gpu" flag on the command line to the 'caffe' tool. e.g. "build/tools/caffe train --solver=models/bvlc_alexnet/solver.prototxt --gpu=0,1" will train on GPUs 0 and 1. - -**NOTE**: each GPU runs the batchsize specified in your train_val.prototxt. So if you go from 1 GPU to 2 GPU, your effective batchsize will double. e.g. if your train_val.prototxt specified a batchsize of 256, if you run 2 GPUs your effective batch size is now 512. So you need to adjust the batchsize when running multiple GPUs and/or adjust your solver params, specifically learning rate. - -# Hardware Configuration Assumptions - -The current implementation uses a tree reduction strategy. e.g. if there are 4 GPUs in the system, 0:1, 2:3 will exchange gradients, then 0:2 (top of the tree) will exchange gradients, 0 will calculate -updated model, 0\-\>2, and then 0\-\>1, 2\-\>3. - -For best performance, P2P DMA access between devices is needed. Without P2P access, for example crossing PCIe root complex, data is copied through host and effective exchange bandwidth is greatly reduced. - -Current implementation has a "soft" assumption that the devices being used are homogeneous. In practice, any devices of the same general class should work together, but performance and total size is limited by the smallest device being used. e.g. if you combine a TitanX and a GTX980, peformance will be limited by the 980. Mixing vastly different levels of boards, e.g. Kepler and Fermi, is not supported. - -"nvidia-smi topo -m" will show you the connectivity matrix. You can do P2P through PCIe bridges, but not across socket level links at this time, e.g. across CPU sockets on a multi-socket motherboard. - -# Scaling Performance - -Performance is **heavily** dependent on the PCIe topology of the system, the configuration of the neural network you are training, and the speed of each of the layers. Systems like the DIGITS DevBox have an optimized PCIe topology (X99-E WS chipset). In general, scaling on 2 GPUs tends to be ~1.8X on average for networks like AlexNet, CaffeNet, VGG, GoogleNet. 4 GPUs begins to have falloff in scaling. Generally with "weak scaling" where the batchsize increases with the number of GPUs you will see 3.5x scaling or so. With "strong scaling", the system can become communication bound, especially with layer performance optimizations like those in [cuDNNv3](http://nvidia.com/cudnn), and you will likely see closer to mid 2.x scaling in performance. Networks that have heavy computation compared to the number of parameters tend to have the best scaling performance. \ No newline at end of file diff --git a/examples/cpp_classification/classification.cpp b/examples/cpp_classification/classification.cpp index de48fb692c8..dc8b863f53f 100644 --- a/examples/cpp_classification/classification.cpp +++ b/examples/cpp_classification/classification.cpp @@ -1,9 +1,7 @@ #include -#ifdef USE_OPENCV #include #include #include -#endif // USE_OPENCV #include #include #include @@ -11,7 +9,6 @@ #include #include -#ifdef USE_OPENCV using namespace caffe; // NOLINT(build/namespaces) using std::string; @@ -258,8 +255,3 @@ int main(int argc, char** argv) { << p.first << "\"" << std::endl; } } -#else -int main(int argc, char** argv) { - LOG(FATAL) << "This example requires OpenCV; compile with USE_OPENCV."; -} -#endif // USE_OPENCV diff --git a/examples/mnist/convert_mnist_data.cpp b/examples/mnist/convert_mnist_data.cpp index 8f29bafde85..54443f11dd3 100644 --- a/examples/mnist/convert_mnist_data.cpp +++ b/examples/mnist/convert_mnist_data.cpp @@ -9,13 +9,9 @@ #include #include #include - -#if defined(USE_LEVELDB) && defined(USE_LMDB) #include #include #include -#endif - #include #include @@ -24,8 +20,6 @@ #include "caffe/proto/caffe.pb.h" -#if defined(USE_LEVELDB) && defined(USE_LMDB) - using namespace caffe; // NOLINT(build/namespaces) using std::string; @@ -202,9 +196,3 @@ int main(int argc, char** argv) { } return 0; } -#else -int main(int argc, char** argv) { - LOG(FATAL) << "This example requires LevelDB and LMDB; " << - "compile with USE_LEVELDB and USE_LMDB."; -} -#endif // USE_LEVELDB and USE_LMDB diff --git a/examples/siamese/convert_mnist_siamese_data.cpp b/examples/siamese/convert_mnist_siamese_data.cpp index ad08036fb08..8008b4439c5 100644 --- a/examples/siamese/convert_mnist_siamese_data.cpp +++ b/examples/siamese/convert_mnist_siamese_data.cpp @@ -10,14 +10,12 @@ #include "glog/logging.h" #include "google/protobuf/text_format.h" +#include "leveldb/db.h" #include "stdint.h" #include "caffe/proto/caffe.pb.h" #include "caffe/util/math_functions.hpp" -#ifdef USE_LEVELDB -#include "leveldb/db.h" - uint32_t swap_endian(uint32_t val) { val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF); return (val << 16) | (val >> 16); @@ -123,8 +121,3 @@ int main(int argc, char** argv) { } return 0; } -#else -int main(int argc, char** argv) { - LOG(FATAL) << "This example requires LevelDB; compile with USE_LEVELDB."; -} -#endif // USE_LEVELDB diff --git a/examples/triplet/3d_triplet.prototxt b/examples/triplet/3d_triplet.prototxt new file mode 100644 index 00000000000..076e0be5040 --- /dev/null +++ b/examples/triplet/3d_triplet.prototxt @@ -0,0 +1,110 @@ +name: "3d_triplet" +input: "data" +input_dim: 9720 +input_dim: 1 +input_dim: 64 +input_dim: 64 +layer { + name: "conv1" + type: "Convolution" + bottom: "data" + top: "conv1" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 16 + kernel_size: 8 + stride: 1 + } +} +layer { + name: "pool1" + type: "Pooling" + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "relu1" + type: "ReLU" + bottom: "pool1" + top: "pool1" +} +layer { + name: "conv2" + type: "Convolution" + bottom: "pool1" + top: "conv2" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 7 + kernel_size: 5 + stride: 1 + } +} +layer { + name: "pool2" + type: "Pooling" + bottom: "conv2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "relu2" + type: "ReLU" + bottom: "pool2" + top: "pool2" +} +layer { + name: "ip1" + type: "InnerProduct" + bottom: "pool2" + top: "ip1" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + inner_product_param { + num_output: 256 + } +} +layer { + name: "relu3" + type: "ReLU" + bottom: "ip1" + top: "ip1" +} +layer { + name: "feat" + type: "InnerProduct" + bottom: "ip1" + top: "feat" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + inner_product_param { + num_output: 3 + } +} diff --git a/examples/triplet/3d_triplet_solver.prototxt b/examples/triplet/3d_triplet_solver.prototxt new file mode 100644 index 00000000000..eea97da7603 --- /dev/null +++ b/examples/triplet/3d_triplet_solver.prototxt @@ -0,0 +1,25 @@ +# The train/test net protocol buffer definition +net: "examples/triplet/3d_triplet_train_test.prototxt" +# test_iter specifies how many forward passes the test should carry out. +# In the case of 3d database, we have test batch size 250 and 250 test iterations: 50*(2+3)=250, +# covering the full 9720 testing images:162*6*10=9720. +test_iter: 100 +# Carry out testing every 500 training iterations. +test_interval: 100 +# The base learning rate, momentum and the weight decay of the network. +base_lr: 0.001 +momentum: 0.9 +weight_decay: 0.0000 +# The learning rate policy +lr_policy: "inv" +gamma: 0.0001 +power: 0.75 +# Display every 100 iterations +display: 100 +# The maximum number of iterations +max_iter: 80000 +# snapshot intermediate results +snapshot: 5000 +snapshot_prefix: "examples/triplet/3d_triplet" +# solver mode: CPU or GPU +solver_mode: CPU diff --git a/examples/triplet/3d_triplet_train_test.prototxt b/examples/triplet/3d_triplet_train_test.prototxt new file mode 100644 index 00000000000..60637b1a66b --- /dev/null +++ b/examples/triplet/3d_triplet_train_test.prototxt @@ -0,0 +1,181 @@ +name: "3d_triplet_train_test" +layer { + name: "data" + type: "Data" + top: "data" + top: "sim" + include { + phase: TRAIN + } + transform_param { + scale: 0.00390625 + } + data_param { + source: "examples/triplet/3d_triplet_train_leveldb" + batch_size: 250 + } +} +layer { + name: "data" + type: "Data" + top: "data" + top: "sim" + include { + phase: TEST + } + transform_param { + scale: 0.00390625 + } + data_param { + source: "examples/triplet/3d_triplet_test_leveldb" + batch_size: 250 + } +} +layer { + name: "conv1" + type: "Convolution" + bottom: "data" + top: "conv1" + param { + name: "conv1_w" + lr_mult: 1 + } + param { + name: "conv1_b" + lr_mult: 2 + } + convolution_param { + num_output: 16 + kernel_size: 8 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "pool1" + type: "Pooling" + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "relu1" + type: "ReLU" + bottom: "pool1" + top: "pool1" +} +layer { + name: "conv2" + type: "Convolution" + bottom: "pool1" + top: "conv2" + param { + name: "conv2_w" + lr_mult: 1 + } + param { + name: "conv2_b" + lr_mult: 2 + } + convolution_param { + num_output: 7 + kernel_size: 5 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "pool2" + type: "Pooling" + bottom: "conv2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "relu2" + type: "ReLU" + bottom: "pool2" + top: "pool2" +} +layer { + name: "ip1" + type: "InnerProduct" + bottom: "pool2" + top: "ip1" + param { + name: "ip1_w" + lr_mult: 1 + } + param { + name: "ip1_b" + lr_mult: 2 + } + inner_product_param { + num_output: 256 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu3" + type: "ReLU" + bottom: "ip1" + top: "ip1" +} +layer { + name: "feat" + type: "InnerProduct" + bottom: "ip1" + top: "feat" + param { + name: "feat_w" + lr_mult: 1 + } + param { + name: "feat_b" + lr_mult: 2 + } + inner_product_param { + num_output: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "loss" + type: "TripletLoss" + bottom: "feat" + bottom: "sim" + top: "loss" + triplet_loss_param { + margin: 1 + losstype: 0 + num_triplets: 3 + } +} diff --git a/examples/triplet/convert_3d_triplet_data.cpp b/examples/triplet/convert_3d_triplet_data.cpp new file mode 100644 index 00000000000..943efd9965f --- /dev/null +++ b/examples/triplet/convert_3d_triplet_data.cpp @@ -0,0 +1,221 @@ +// Usage: +// convert_3d_data input_image_file input_label_file output_db_file +#include // NOLINT(readability/streams) +#include +#include "caffe/proto/caffe.pb.h" +#include "caffe/util/math_functions.hpp" +#include "glog/logging.h" +#include "google/protobuf/text_format.h" +#include "leveldb/db.h" +#include "math.h" +#include "stdint.h" + +uint32_t swap_endian(uint32_t val) { + val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF); + return (val << 16) | (val >> 16); +} + +void read_image(std::ifstream* image_file, std::ifstream* label_file, + uint32_t index, uint32_t rows, uint32_t cols, + char* pixels, char* label_temp, signed char* label, int rgb_use) { + if (rgb_use == 0) { + image_file->seekg(index * rows * cols + 16); + image_file->read(pixels, rows * cols); + label_file->seekg(index * 4 + 8); + label_file->read(label_temp, 4); + for (int i = 0; i < 4; i++) + *(label+i) = (signed char)*(label_temp+i); + } else { + image_file->seekg(3 * index * rows * cols + 16); + image_file->read(pixels, 3 * rows * cols); + label_file->seekg(index * 4 + 8); + label_file->read(label_temp, 4); + for (int i = 0; i < 4; i++) + *(label+i) = (signed char)*(label_temp+i); + } +} + +void convert_dataset(const char* image_filename, const char* label_filename, + const char* db_filename, + const char* class_number, const char* rgb_use) { + int rgb_use1 = atoi(rgb_use); + int class_num = atoi(class_number); + // Open files + std::ifstream image_file(image_filename, std::ios::in | std::ios::binary); + std::ifstream label_file(label_filename, std::ios::in | std::ios::binary); + CHECK(image_file) << "Unable to open file " << image_filename; + CHECK(label_file) << "Unable to open file " << label_filename; + // Read the magic and the meta data + uint32_t magic; + uint32_t num_items; + uint32_t num_labels; + uint32_t rows; + uint32_t cols; + + image_file.read(reinterpret_cast(&magic), 4); + magic = swap_endian(magic); + CHECK_EQ(magic, 2051) << "Incorrect image file magic."; + label_file.read(reinterpret_cast(&magic), 4); + magic = swap_endian(magic); + CHECK_EQ(magic, 2050) << "Incorrect label file magic."; + image_file.read(reinterpret_cast(&num_items), 4); + num_items = swap_endian(num_items); + label_file.read(reinterpret_cast(&num_labels), 4); + num_labels = swap_endian(num_labels); + CHECK_EQ(num_items, num_labels); + image_file.read(reinterpret_cast(&rows), 4); + rows = swap_endian(rows); + image_file.read(reinterpret_cast(&cols), 4); + cols = swap_endian(cols); + + // Open leveldb + leveldb::DB* db; + leveldb::Options options; + options.create_if_missing = true; + options.error_if_exists = true; + leveldb::Status status = leveldb::DB::Open( + options, db_filename, &db); + CHECK(status.ok()) << "Failed to open leveldb " << db_filename + << ". Is it already existing?"; + + char* label_temp = new char[4]; // label for unsigned char* + signed char* label_i = new signed char[4]; // label for triplet + signed char* label_j = new signed char[4]; + signed char* label_k = new signed char[4]; + signed char* label_l = new signed char[4]; // label for pair wise + signed char* label_m = new signed char[4]; + int db_size; + if (rgb_use1 == 0) + db_size = rows * cols; + else + db_size = 3 * rows * cols; + char* pixels1 = new char[db_size]; + char* pixels2 = new char[db_size]; + char* pixels3 = new char[db_size]; + char* pixels4 = new char[db_size]; + char* pixels5 = new char[db_size]; + const int kMaxKeyLength = 10; + char key[kMaxKeyLength]; + std::string value; + caffe::Datum datum; + if (rgb_use1 == 0) + datum.set_channels(1); + else + datum.set_channels(3); + datum.set_height(rows); + datum.set_width(cols); + LOG(INFO) << "A total of " << num_items << " items."; + LOG(INFO) << "Rows: " << rows << " Cols: " << cols; + int counter = 0; + for (unsigned int times = 0; times < 10; ++times) { + // iteration in the samples of all class + for (unsigned int itemid = 0; itemid < num_items/class_num; ++itemid) { + // iteration in the samples in one class + for (unsigned int class_ind = 0; class_ind < class_num; ++class_ind) { + // use reference sample one by one at each iteration + int i = itemid % num_items + class_ind*num_items/class_num; + int j = caffe::caffe_rng_rand() % num_items; // pick triplet groups + int k = caffe::caffe_rng_rand() % num_items; + int l = caffe::caffe_rng_rand() % num_items; // pick pair wise groups + int m = caffe::caffe_rng_rand() % num_items; + read_image(&image_file, &label_file, i, rows, cols, // read triplet + pixels1, label_temp, label_i, rgb_use1); + read_image(&image_file, &label_file, j, rows, cols, + pixels2, label_temp, label_j, rgb_use1); + read_image(&image_file, &label_file, k, rows, cols, + pixels3, label_temp, label_k, rgb_use1); + read_image(&image_file, &label_file, l, rows, cols, // read pair wise + pixels4, label_temp, label_l, rgb_use1); + read_image(&image_file, &label_file, m, rows, cols, + pixels5, label_temp, label_m, rgb_use1); + + bool pair_pass = false; + bool triplet1_pass = false; + bool triplet2_pass = false; + bool triplet3_class_same = false; + bool triplet3_pass = false; + + int ij_diff_x = static_cast(*(label_i+1)-*(label_j+1)); + int ij_diff_y = static_cast(*(label_i+2)-*(label_j+2)); + int ij_diff_z = static_cast(*(label_i+3)-*(label_j+3)); + int im_diff_x = static_cast(*(label_i+1)-*(label_m+1)); + int im_diff_y = static_cast(*(label_i+2)-*(label_m+2)); + int im_diff_z = static_cast(*(label_i+3)-*(label_m+3)); + + int ij_x = ij_diff_x*ij_diff_x; + int ij_y = ij_diff_y*ij_diff_y; + int ij_z = ij_diff_z*ij_diff_z; + int im_x = im_diff_x*im_diff_x; + int im_y = im_diff_y*im_diff_y; + int im_z = im_diff_z*im_diff_z; + + float dist_ij = std::sqrt(ij_x + ij_y + ij_z); + float dist_im = std::sqrt(im_x + im_y + im_z); + if (*label_i == *label_j && dist_ij < 100/3 && dist_ij != 0) + pair_pass = true; + if (pair_pass && (*label_i != *label_k)) + triplet1_pass = true; + if (pair_pass && (*label_i != *label_l)) + triplet2_pass = true; + if (pair_pass && (*label_i == *label_m)) + triplet3_class_same = true; + if (triplet3_class_same && dist_im > 100/3) + triplet3_pass = true; + if (pair_pass && triplet1_pass && triplet2_pass && triplet3_pass) { + datum.set_data(pixels1, db_size); // set data + datum.set_label(static_cast(*label_i)); + datum.SerializeToString(&value); + snprintf(key, kMaxKeyLength, "%08d", counter); + db->Put(leveldb::WriteOptions(), std::string(key), value); + counter++; + datum.set_data(pixels2, db_size); // set data + datum.set_label(static_cast(*label_j)); + datum.SerializeToString(&value); + snprintf(key, kMaxKeyLength, "%08d", counter); + db->Put(leveldb::WriteOptions(), std::string(key), value); + counter++; + datum.set_data(pixels3, db_size); // set data + datum.set_label(static_cast(*label_k)); + datum.SerializeToString(&value); + snprintf(key, kMaxKeyLength, "%08d", counter); + db->Put(leveldb::WriteOptions(), std::string(key), value); + counter++; + datum.set_data(pixels4, db_size); // set data + datum.set_label(static_cast(*label_l)); + datum.SerializeToString(&value); + snprintf(key, kMaxKeyLength, "%08d", counter); + db->Put(leveldb::WriteOptions(), std::string(key), value); + counter++; + datum.set_data(pixels5, db_size); // set data + datum.set_label(static_cast(*label_m)); + datum.SerializeToString(&value); + snprintf(key, kMaxKeyLength, "%08d", counter); + db->Put(leveldb::WriteOptions(), std::string(key), value); + counter++; + } else { + class_ind--; + } + } // iteration in the samples of all class + } // iteration in the samples in one class + } // iteration in times + delete db; + delete pixels1; + delete pixels2; + delete pixels3; + delete pixels4; + delete pixels5; +} + +int main(int argc, char** argv) { + if (argc != 6) { + printf("This script converts the dataset to the leveldb format used\n" + "by caffe to train a triplet network.\n" + "Usage:\n" + " convert_3d_data input_image_file input_label_file " + "output_db_file class_number rgb_use \n"); + } else { + google::InitGoogleLogging(argv[0]); + convert_dataset(argv[1], argv[2], argv[3], argv[4], argv[5]); + } + return 0; +} diff --git a/examples/triplet/create_3d_triplet.sh b/examples/triplet/create_3d_triplet.sh new file mode 100755 index 00000000000..0fadd9b7e09 --- /dev/null +++ b/examples/triplet/create_3d_triplet.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env sh +# This script converts the mnist data into leveldb format. + +EXAMPLES=./build/examples/triplet +DATA=./data/linemod + +echo "Creating leveldb..." + +rm -rf ./examples/triplet/3d_triplet_train_leveldb +rm -rf ./examples/triplet/3d_triplet_test_leveldb + +$EXAMPLES/convert_3d_triplet_data.bin \ + $DATA/binary_image_train \ + $DATA/binary_label_train \ + ./examples/triplet/3d_triplet_train_leveldb \ + 6 \ + 0 +$EXAMPLES/convert_3d_triplet_data.bin \ + $DATA/binary_image_test \ + $DATA/binary_label_test \ + ./examples/triplet/3d_triplet_test_leveldb \ + 6 \ + 0 +echo "Done." diff --git a/examples/triplet/readme.md b/examples/triplet/readme.md new file mode 100644 index 00000000000..6636808691a --- /dev/null +++ b/examples/triplet/readme.md @@ -0,0 +1,186 @@ +--- +title: Triplet Network Tutorial +description: Train and test a triplet network on data generated by 3D model. +category: example +include_in_docs: true +layout: default +priority: 100 +--- + +# Triplet Network Training with Caffe +This example shows how you can use weight sharing and a contrastive loss +function to learn a model using a triplet network in Caffe. + +We will assume that you have caffe successfully compiled. If not, please refer +to the [Installation page](../../installation.html). This example builds on the +[MNIST tutorial](mnist.html) so it would be a good idea to read that before +continuing. + +*The guide specifies all paths and assumes all commands are executed from the +root caffe directory* + +## Prepare Datasets + +You will first need to convert the data from the some .ply models using +opencv_contrib cnn_3donj module. After construcing the binary files including images and labels and put them in ./data/linemod folder, just run: + + ./examples/triplet/create_3d_triplet.sh + +After running the script there should be two datasets, +`./examples/triplet/3d_triplet_train_leveldb`, and +`./examples/triplet/3d_triplet_test_leveldb`. + +## The Model +First, we will define the model that we want to train using the triplet network. +We will use the convolutional net defined in +`./examples/triplet/3d_triplet.prototxt`. + +layer { + name: "feat" + type: "InnerProduct" + bottom: "ip1" + top: "feat" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + inner_product_param { + num_output: 4 + } +} + +## Define the triplet Network + +In this section we will define the triplet network used for training. The +resulting network is defined in +`./examples/triplet/3d_triplet_train_test.prototxt`. + +### Reading in the Triplet Data + +We start with a data layer that reads from the LevelDB database we created +earlier. Each entry in this database contains the image data for a triplet of +images (`triplet_data`) and the label (`sim`) is not nessesary in our method. + + layers { + name: "triplet_data" + type: DATA + top: "triplet_data" + top: "sim" + data_param { + source: "examples/triplet/3d-triplet-train-leveldb" + scale: 0.00390625 + batch_size: 250 + } + include: { phase: TRAIN } + } + +In order to pack a triplet of images into the same blob in the database we pack one +image per channel. We want to be able to work with these three images separately, +so we add a slice layer after the data layer. This takes the `triplet_data` and +slices it along the channel dimension so that we have a single image in `data` +and its positive image in `data_pos.` & its negative image in `data_neg.`, as described in paper for 3D object classification and pose estimation, a pair wise term is also need alone with the triplet part. + +layer { + name: "slice_triplet" + type: "Slice" + bottom: "triplet_data" + top: "data" + top: "data_true" + top: "data_false" + top: "data_p1" + top: "data_p2" + slice_param { + slice_dim: 1 + slice_point: 1 + slice_point: 2 + slice_point: 3 + slice_point: 4 + } +} + +### Building the First part of the triplet Net + +Now we can specify the first side of the triplet net. This side operates on +`data` and produces `feat`. Starting from the net in +`./examples/triplet/3d_triplet.prototxt` we add default weight fillers. Then +we name the parameters of the convolutional and inner product layers. Naming the +parameters allows Caffe to share the parameters between layers on three channels of +the triplet net. In the definition this looks like: + + ... + param: "conv1_w" + param: "conv1_b" + ... + param: "conv2_w" + param: "conv2_b" + ... + param: "ip1_w" + param: "ip1_b" + ... + param: "ip2_w" + param: "ip2_b" + ... + +### Building the Second Side of the triplet Net + +Now we need to create the second path that operates on `data_pos` and produces +`feat_pos`. This path is exactly the same as the first. So we can just copy and +paste it. Then we change the name of each layer, input, and output by appending +`_pos` to differentiate the "paired" layers from the originals. + +### Building the Third Side of the triplet Net + +Now we need to create the second path that operates on `data_neg` and produces +`feat_neg`. This path is exactly the same as the first. So we can just copy and +paste it. Then we change the name of each layer, input, and output by appending +`_neg` to differentiate the "paired" layers from the originals. + +### Adding the Triplet Loss Function + +To train the network we will optimize a triplet loss function proposed in: +This cost function is implemented with the `TRIPLET_LOSS` layer: + +layer { + name: "loss" + type: "TripletLoss" + bottom: "feat" + bottom: "feat_true" + bottom: "feat_false" + bottom: "feat_p1" + bottom: "feat_p2" + bottom: "sim" + top: "loss" + triplet_loss_param { + margin: 1 + losstype: 1 + } +} + +## Define the Solver + +Nothing special needs to be done to the solver besides pointing it at the +correct model file. The solver is defined in +`./examples/triplet/3d_triplet_solver.prototxt`. + +## Training and Testing the Model + +Training the model is simple after you have written the network definition +protobuf and solver protobuf files. Simply run +`./examples/triplet/train_mnist_triplet.sh`: + + ./examples/triplet/train_3d_triplet.sh + +# Plotting the results + +First, we can draw the model and triplet networks by running the following +commands that draw the DAGs defined in the .prototxt files: + + ./python/draw_net.py \ + ./examples/triplet/3d_triplet.prototxt \ + ./examples/triplet/3d_triplet.png + + ./python/draw_net.py \ + ./examples/triplet/3d_triplet_train_test.prototxt \ + ./examples/triplet/3d_triplet_train_test.png \ No newline at end of file diff --git a/examples/triplet/train_3d_triplet.sh b/examples/triplet/train_3d_triplet.sh new file mode 100755 index 00000000000..e421af54493 --- /dev/null +++ b/examples/triplet/train_3d_triplet.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env sh + +TOOLS=./build/tools + +$TOOLS/caffe train --solver=examples/triplet/3d_triplet_solver.prototxt diff --git a/include/caffe/blob.hpp b/include/caffe/blob.hpp index fea5117ef10..dda7b1f8372 100644 --- a/include/caffe/blob.hpp +++ b/include/caffe/blob.hpp @@ -219,7 +219,6 @@ class Blob { const Dtype* cpu_data() const; void set_cpu_data(Dtype* data); - const int* gpu_shape() const; const Dtype* gpu_data() const; const Dtype* cpu_diff() const; const Dtype* gpu_diff() const; @@ -269,7 +268,6 @@ class Blob { protected: shared_ptr data_; shared_ptr diff_; - shared_ptr shape_data_; vector shape_; int count_; int capacity_; diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp index 89bab8d6f3a..8e64b3e5dc5 100644 --- a/include/caffe/common_layers.hpp +++ b/include/caffe/common_layers.hpp @@ -85,7 +85,7 @@ class ConcatLayer : public Layer { const vector*>& top); virtual inline const char* type() const { return "Concat"; } - virtual inline int MinBottomBlobs() const { return 1; } + virtual inline int MinBottomBlobs() const { return 2; } virtual inline int ExactNumTopBlobs() const { return 1; } protected: @@ -625,7 +625,7 @@ class SliceLayer : public Layer { virtual inline const char* type() const { return "Slice"; } virtual inline int ExactNumBottomBlobs() const { return 1; } - virtual inline int MinTopBlobs() const { return 1; } + virtual inline int MinTopBlobs() const { return 2; } protected: virtual void Forward_cpu(const vector*>& bottom, diff --git a/include/caffe/data_layers.hpp b/include/caffe/data_layers.hpp index 90fd0d19917..552d814131e 100644 --- a/include/caffe/data_layers.hpp +++ b/include/caffe/data_layers.hpp @@ -4,6 +4,7 @@ #include #include #include + #include "hdf5.h" #include "caffe/blob.hpp" @@ -274,10 +275,8 @@ class MemoryDataLayer : public BaseDataLayer { virtual inline int ExactNumTopBlobs() const { return 2; } virtual void AddDatumVector(const vector& datum_vector); -#ifdef USE_OPENCV virtual void AddMatVector(const vector& mat_vector, const vector& labels); -#endif // USE_OPENCV // Reset should accept const pointers, but can't, because the memory // will be given to Blob, which is mutable diff --git a/include/caffe/data_transformer.hpp b/include/caffe/data_transformer.hpp index 97b4ee6a8c4..0ad68c80216 100644 --- a/include/caffe/data_transformer.hpp +++ b/include/caffe/data_transformer.hpp @@ -50,7 +50,6 @@ class DataTransformer { void Transform(const vector & datum_vector, Blob* transformed_blob); -#ifdef USE_OPENCV /** * @brief Applies the transformation defined in the data layer's * transform_param block to a vector of Mat. @@ -75,7 +74,6 @@ class DataTransformer { * set_cpu_data() is used. See image_data_layer.cpp for an example. */ void Transform(const cv::Mat& cv_img, Blob* transformed_blob); -#endif // USE_OPENCV /** * @brief Applies the same transformation defined in the data layer's @@ -115,7 +113,6 @@ class DataTransformer { * @param mat_vector * A vector of Mat containing the data to be transformed. */ -#ifdef USE_OPENCV vector InferBlobShape(const vector & mat_vector); /** * @brief Infers the shape of transformed_blob will have when @@ -125,7 +122,6 @@ class DataTransformer { * cv::Mat containing the data to be transformed. */ vector InferBlobShape(const cv::Mat& cv_img); -#endif // USE_OPENCV protected: /** @@ -152,3 +148,4 @@ class DataTransformer { } // namespace caffe #endif // CAFFE_DATA_TRANSFORMER_HPP_ + diff --git a/include/caffe/loss_layers.hpp b/include/caffe/loss_layers.hpp index 8d41af34e88..0a513ae12a4 100644 --- a/include/caffe/loss_layers.hpp +++ b/include/caffe/loss_layers.hpp @@ -216,6 +216,73 @@ class ContrastiveLossLayer : public LossLayer { Blob summer_vec_; // tmp storage for gpu forward pass }; +template +class TripletLossLayer : public LossLayer { + public: + explicit TripletLossLayer(const LayerParameter& param) + : LossLayer(param), diff_() {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + + virtual inline int ExactNumBottomBlobs() const { return 2; } + virtual inline const char* type() const { return "TripletLoss"; } + /** + * Unlike most loss layers, in the TripletLossLayer we can backpropagate + * to the first three inputs. + */ + virtual inline bool AllowForceBackward(const int bottom_index) const { + return bottom_index != 1; + } + + protected: + /// @copydoc TripletLossLayer + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + + /** + * @brief Computes the Triplet error gradient w.r.t. the inputs. + * + * Computes the gradients with respect to the two input vectors (bottom[0] and + * bottom[1]), but not the similarity label (bottom[2]). + * + * @param top output Blob vector (length 1), providing the error gradient with + * respect to the outputs + * -# @f$ (1 \times 1 \times 1 \times 1) @f$ + * This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$, + * as @f$ \lambda @f$ is the coefficient of this layer's output + * @f$\ell_i@f$ in the overall Net loss + * @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence + * @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$. + * (*Assuming that this top Blob is not used as a bottom (input) by any + * other layer of the Net.) + * @param propagate_down see Layer::Backward. + * @param bottom input Blob vector (length 2) + * -# @f$ (N \times C \times 1 \times 1) @f$ + * the features @f$a@f$; Backward fills their diff with + * gradients if propagate_down[0] + * -# @f$ (N \times C \times 1 \times 1) @f$ + * the features @f$b@f$; Backward fills their diff with gradients if + * propagate_down[1] + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + Blob diff_; // cached for backward pass + Blob diff_pos; + Blob diff_neg; + Blob dist_sq_; // cached for backward pass + Blob dist_sq_pos; + Blob dist_sq_neg; + Blob diff_sq_; // tmp storage for gpu forward pass + Blob diff_sq_pos; + Blob diff_sq_neg; + Blob summer_vec_; // tmp storage for gpu forward pass +}; + /** * @brief Computes the Euclidean (L2) loss @f$ * E = \frac{1}{2N} \sum\limits_{n=1}^N \left| \left| \hat{y}_n - y_n diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp index 2ecf539baef..aba3e036004 100644 --- a/include/caffe/solver.hpp +++ b/include/caffe/solver.hpp @@ -82,8 +82,6 @@ class Solver { callbacks_.push_back(value); } - void CheckSnapshotWritePermissions(); - protected: // Make and apply the update value for the current iteration. virtual void ApplyUpdate() = 0; @@ -283,19 +281,19 @@ Solver* GetSolver(const SolverParameter& param) { switch (type) { case SolverParameter_SolverType_SGD: - return new SGDSolver(param); + return new SGDSolver(param); case SolverParameter_SolverType_NESTEROV: - return new NesterovSolver(param); + return new NesterovSolver(param); case SolverParameter_SolverType_ADAGRAD: - return new AdaGradSolver(param); + return new AdaGradSolver(param); case SolverParameter_SolverType_RMSPROP: - return new RMSPropSolver(param); + return new RMSPropSolver(param); case SolverParameter_SolverType_ADADELTA: - return new AdaDeltaSolver(param); + return new AdaDeltaSolver(param); case SolverParameter_SolverType_ADAM: - return new AdamSolver(param); + return new AdamSolver(param); default: - LOG(FATAL) << "Unknown SolverType: " << type; + LOG(FATAL) << "Unknown SolverType: " << type; } return (Solver*) NULL; } diff --git a/include/caffe/util/db_leveldb.hpp b/include/caffe/util/db_leveldb.hpp index e9fa0d32b66..10623554b67 100644 --- a/include/caffe/util/db_leveldb.hpp +++ b/include/caffe/util/db_leveldb.hpp @@ -1,4 +1,3 @@ -#ifdef USE_LEVELDB #ifndef CAFFE_UTIL_DB_LEVELDB_HPP #define CAFFE_UTIL_DB_LEVELDB_HPP @@ -72,4 +71,3 @@ class LevelDB : public DB { } // namespace caffe #endif // CAFFE_UTIL_DB_LEVELDB_HPP -#endif // USE_LEVELDB diff --git a/include/caffe/util/db_lmdb.hpp b/include/caffe/util/db_lmdb.hpp index 4e1568ace50..cc7c90afc4c 100644 --- a/include/caffe/util/db_lmdb.hpp +++ b/include/caffe/util/db_lmdb.hpp @@ -1,4 +1,3 @@ -#ifdef USE_LMDB #ifndef CAFFE_UTIL_DB_LMDB_HPP #define CAFFE_UTIL_DB_LMDB_HPP @@ -90,4 +89,3 @@ class LMDB : public DB { } // namespace caffe #endif // CAFFE_UTIL_DB_LMDB_HPP -#endif // USE_LMDB diff --git a/include/caffe/util/im2col.hpp b/include/caffe/util/im2col.hpp index 531fd29c57a..0051e2fa067 100644 --- a/include/caffe/util/im2col.hpp +++ b/include/caffe/util/im2col.hpp @@ -3,48 +3,24 @@ namespace caffe { -template -void im2col_nd_cpu(const Dtype* data_im, const int num_spatial_axes, - const int* im_shape, const int* col_shape, - const int* kernel_shape, const int* pad, const int* stride, - Dtype* data_col); - template void im2col_cpu(const Dtype* data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, Dtype* data_col); -template -void col2im_nd_cpu(const Dtype* data_col, const int num_spatial_axes, - const int* im_shape, const int* col_shape, - const int* kernel_shape, const int* pad, const int* stride, - Dtype* data_im); - template void col2im_cpu(const Dtype* data_col, const int channels, const int height, const int width, const int patch_h, const int patch_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, Dtype* data_im); -template -void im2col_nd_gpu(const Dtype* data_im, const int num_spatial_axes, - const int col_size, const int* im_shape, const int* col_shape, - const int* kernel_shape, const int* pad, const int* stride, - Dtype* data_col); - template void im2col_gpu(const Dtype* data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, Dtype* data_col); -template -void col2im_nd_gpu(const Dtype* data_col, const int num_spatial_axes, - const int im_size, const int* im_shape, const int* col_shape, - const int* kernel_shape, const int* pad, const int* stride, - Dtype* data_im); - template void col2im_gpu(const Dtype* data_col, const int channels, const int height, const int width, const int patch_h, const int patch_w, diff --git a/include/caffe/util/io.hpp b/include/caffe/util/io.hpp index 6070b4c7f3a..c0938ad0625 100644 --- a/include/caffe/util/io.hpp +++ b/include/caffe/util/io.hpp @@ -120,7 +120,6 @@ inline bool ReadImageToDatum(const string& filename, const int label, bool DecodeDatumNative(Datum* datum); bool DecodeDatum(Datum* datum, bool is_color); -#ifdef USE_OPENCV cv::Mat ReadImageToCVMat(const string& filename, const int height, const int width, const bool is_color); @@ -136,7 +135,6 @@ cv::Mat DecodeDatumToCVMatNative(const Datum& datum); cv::Mat DecodeDatumToCVMat(const Datum& datum, bool is_color); void CVMatToDatum(const cv::Mat& cv_img, Datum* datum); -#endif // USE_OPENCV } // namespace caffe diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp index 06bc0457e2d..211e3d9042d 100644 --- a/include/caffe/vision_layers.hpp +++ b/include/caffe/vision_layers.hpp @@ -58,110 +58,52 @@ class BaseConvolutionLayer : public Layer { void backward_gpu_bias(Dtype* bias, const Dtype* input); #endif - /// @brief The spatial dimensions of the input. - inline int input_shape(int i) { - return (*bottom_shape_)[channel_axis_ + i]; - } // reverse_dimensions should return true iff we are implementing deconv, so // that conv helpers know which dimensions are which. virtual bool reverse_dimensions() = 0; // Compute height_out_ and width_out_ from other parameters. virtual void compute_output_shape() = 0; - /// @brief The spatial dimensions of a filter kernel. - Blob kernel_shape_; - /// @brief The spatial dimensions of the stride. - Blob stride_; - /// @brief The spatial dimensions of the padding. - Blob pad_; - /// @brief The spatial dimensions of the convolution input. - Blob conv_input_shape_; - /// @brief The spatial dimensions of the col_buffer. - vector col_buffer_shape_; - /// @brief The spatial dimensions of the output. - vector output_shape_; - const vector* bottom_shape_; - - int num_spatial_axes_; - int bottom_dim_; - int top_dim_; - - int channel_axis_; + int kernel_h_, kernel_w_; + int stride_h_, stride_w_; int num_; int channels_; + int pad_h_, pad_w_; + int height_, width_; int group_; - int out_spatial_dim_; - int weight_offset_; int num_output_; + int height_out_, width_out_; bool bias_term_; bool is_1x1_; - bool force_nd_im2col_; private: // wrap im2col/col2im so we don't have to remember the (long) argument lists inline void conv_im2col_cpu(const Dtype* data, Dtype* col_buff) { - if (!force_nd_im2col_ && num_spatial_axes_ == 2) { - im2col_cpu(data, conv_in_channels_, - conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2], - kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], - pad_.cpu_data()[0], pad_.cpu_data()[1], - stride_.cpu_data()[0], stride_.cpu_data()[1], col_buff); - } else { - im2col_nd_cpu(data, num_spatial_axes_, conv_input_shape_.cpu_data(), - col_buffer_shape_.data(), kernel_shape_.cpu_data(), - pad_.cpu_data(), stride_.cpu_data(), col_buff); - } + im2col_cpu(data, conv_in_channels_, conv_in_height_, conv_in_width_, + kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, col_buff); } inline void conv_col2im_cpu(const Dtype* col_buff, Dtype* data) { - if (!force_nd_im2col_ && num_spatial_axes_ == 2) { - col2im_cpu(col_buff, conv_in_channels_, - conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2], - kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], - pad_.cpu_data()[0], pad_.cpu_data()[1], - stride_.cpu_data()[0], stride_.cpu_data()[1], data); - } else { - col2im_nd_cpu(col_buff, num_spatial_axes_, conv_input_shape_.cpu_data(), - col_buffer_shape_.data(), kernel_shape_.cpu_data(), - pad_.cpu_data(), stride_.cpu_data(), data); - } + col2im_cpu(col_buff, conv_in_channels_, conv_in_height_, conv_in_width_, + kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, data); } #ifndef CPU_ONLY inline void conv_im2col_gpu(const Dtype* data, Dtype* col_buff) { - if (!force_nd_im2col_ && num_spatial_axes_ == 2) { - im2col_gpu(data, conv_in_channels_, - conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2], - kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], - pad_.cpu_data()[0], pad_.cpu_data()[1], - stride_.cpu_data()[0], stride_.cpu_data()[1], col_buff); - } else { - im2col_nd_gpu(data, num_spatial_axes_, num_kernels_im2col_, - conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(), - kernel_shape_.gpu_data(), pad_.gpu_data(), - stride_.gpu_data(), col_buff); - } + im2col_gpu(data, conv_in_channels_, conv_in_height_, conv_in_width_, + kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, col_buff); } inline void conv_col2im_gpu(const Dtype* col_buff, Dtype* data) { - if (!force_nd_im2col_ && num_spatial_axes_ == 2) { - col2im_gpu(col_buff, conv_in_channels_, - conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2], - kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], - pad_.cpu_data()[0], pad_.cpu_data()[1], - stride_.cpu_data()[0], stride_.cpu_data()[1], data); - } else { - col2im_nd_gpu(col_buff, num_spatial_axes_, num_kernels_col2im_, - conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(), - kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(), - data); - } + col2im_gpu(col_buff, conv_in_channels_, conv_in_height_, conv_in_width_, + kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, data); } #endif - int num_kernels_im2col_; - int num_kernels_col2im_; int conv_out_channels_; int conv_in_channels_; int conv_out_spatial_dim_; + int conv_in_height_; + int conv_in_width_; int kernel_dim_; + int weight_offset_; int col_offset_; int output_offset_; @@ -308,7 +250,7 @@ class CuDNNConvolutionLayer : public ConvolutionLayer { cudnnTensorDescriptor_t bias_desc_; cudnnFilterDescriptor_t filter_desc_; vector conv_descs_; - int bottom_offset_, top_offset_, bias_offset_; + int bottom_offset_, top_offset_, weight_offset_, bias_offset_; size_t workspaceSizeInBytes; void *workspace; }; @@ -345,22 +287,11 @@ class Im2colLayer : public Layer { virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); - /// @brief The spatial dimensions of a filter kernel. - Blob kernel_shape_; - /// @brief The spatial dimensions of the stride. - Blob stride_; - /// @brief The spatial dimensions of the padding. - Blob pad_; - - int num_spatial_axes_; - int bottom_dim_; - int top_dim_; - - int channel_axis_; - int num_; + int kernel_h_, kernel_w_; + int stride_h_, stride_w_; int channels_; - - bool force_nd_im2col_; + int height_, width_; + int pad_h_, pad_w_; }; // Forward declare PoolingLayer and SplitLayer for use in LRNLayer. diff --git a/python/caffe/__init__.py b/python/caffe/__init__.py index ccda1bcae4f..6cc44e729f4 100644 --- a/python/caffe/__init__.py +++ b/python/caffe/__init__.py @@ -1,4 +1,4 @@ -from .pycaffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, RMSPropSolver, AdaDeltaSolver, AdamSolver +from .pycaffe import Net, SGDSolver from ._caffe import set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, layer_type_list from .proto.caffe_pb2 import TRAIN, TEST from .classifier import Classifier diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp index ccd5776ac40..cc49f60ab13 100644 --- a/python/caffe/_caffe.cpp +++ b/python/caffe/_caffe.cpp @@ -297,15 +297,6 @@ BOOST_PYTHON_MODULE(_caffe) { bp::class_, bp::bases >, shared_ptr >, boost::noncopyable>( "AdaGradSolver", bp::init()); - bp::class_, bp::bases >, - shared_ptr >, boost::noncopyable>( - "RMSPropSolver", bp::init()); - bp::class_, bp::bases >, - shared_ptr >, boost::noncopyable>( - "AdaDeltaSolver", bp::init()); - bp::class_, bp::bases >, - shared_ptr >, boost::noncopyable>( - "AdamSolver", bp::init()); bp::def("get_solver", &GetSolverFromFile, bp::return_value_policy()); diff --git a/python/caffe/io.py b/python/caffe/io.py index 0cad7211291..fc96266085f 100644 --- a/python/caffe/io.py +++ b/python/caffe/io.py @@ -329,7 +329,7 @@ def resize_image(im, new_dims, interp_order=1): return ret else: # ndimage interpolates anything but more slowly. - scale = tuple(np.array(new_dims, dtype=float) / np.array(im.shape[:2])) + scale = tuple(np.array(new_dims) / np.array(im.shape[:2])) resized_im = zoom(im, scale + (1,), order=interp_order) return resized_im.astype(np.float32) diff --git a/python/caffe/net_spec.py b/python/caffe/net_spec.py index 93fc01927db..77a0e0070ae 100644 --- a/python/caffe/net_spec.py +++ b/python/caffe/net_spec.py @@ -56,14 +56,8 @@ def to_proto(*tops): def assign_proto(proto, name, val): """Assign a Python object to a protobuf message, based on the Python type (in recursive fashion). Lists become repeated fields/messages, dicts - become messages, and other types are assigned directly. For convenience, - repeated fields whose values are not lists are converted to single-element - lists; e.g., `my_repeated_int_field=3` is converted to - `my_repeated_int_field=[3]`.""" - - is_repeated_field = hasattr(getattr(proto, name), 'extend') - if is_repeated_field and not isinstance(val, list): - val = [val] + become messages, and other types are assigned directly.""" + if isinstance(val, list): if isinstance(val[0], dict): for item in val: diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py index 8ea24da4fdd..4f980a92c38 100644 --- a/python/caffe/pycaffe.py +++ b/python/caffe/pycaffe.py @@ -10,8 +10,7 @@ from itertools import zip_longest as izip_longest import numpy as np -from ._caffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, \ - RMSPropSolver, AdaDeltaSolver, AdamSolver +from ._caffe import Net, SGDSolver import caffe.io # We directly update methods from Net here (rather than using composition or diff --git a/python/caffe/test/test_layer_type_list.py b/python/caffe/test/test_layer_type_list.py index 47f4cf6d008..7edc80df069 100644 --- a/python/caffe/test/test_layer_type_list.py +++ b/python/caffe/test/test_layer_type_list.py @@ -5,7 +5,6 @@ class TestLayerTypeList(unittest.TestCase): def test_standard_types(self): - #removing 'Data' from list for type_name in ['Data', 'Convolution', 'InnerProduct']: self.assertIn(type_name, caffe.layer_type_list(), '%s not in layer_type_list()' % type_name) diff --git a/python/caffe/test/test_net_spec.py b/python/caffe/test/test_net_spec.py index fee3c0aaebe..b4595e6531a 100644 --- a/python/caffe/test/test_net_spec.py +++ b/python/caffe/test/test_net_spec.py @@ -43,7 +43,8 @@ def anon_lenet(batch_size): def silent_net(): n = caffe.NetSpec() - n.data, n.data2 = L.DummyData(shape=dict(dim=3), ntop=2) + n.data, n.data2 = L.DummyData(shape=[dict(dim=[3]), dict(dim=[4, 2])], + ntop=2) n.silence_data = L.Silence(n.data, ntop=0) n.silence_data2 = L.Silence(n.data2, ntop=0) return n.to_proto() diff --git a/scripts/travis/travis_build_and_test.sh b/scripts/travis/travis_build_and_test.sh index 174f1ee5a0a..9ba737e28a9 100755 --- a/scripts/travis/travis_build_and_test.sh +++ b/scripts/travis/travis_build_and_test.sh @@ -1,6 +1,5 @@ #!/bin/bash -# Script called by Travis to build and test Caffe. -# Travis CI tests are CPU-only for lack of compatible hardware. +# Script called by Travis to do a CPU-only build of and test Caffe. set -e MAKE="make --jobs=$NUM_THREADS --keep-going" @@ -16,12 +15,7 @@ if $WITH_CMAKE; then if [ "$PYTHON_VERSION" = "3" ]; then PYTHON_ARGS="$PYTHON_ARGS -Dpython_version=3 -DBOOST_LIBRARYDIR=$CONDA_DIR/lib/" fi - if $WITH_IO; then - IO_ARGS="-DUSE_OPENCV=ON -DUSE_LMDB=ON -DUSE_LEVELDB=ON" - else - IO_ARGS="-DUSE_OPENCV=OFF -DUSE_LMDB=OFF -DUSE_LEVELDB=OFF" - fi - cmake -DBUILD_python=ON -DCMAKE_BUILD_TYPE=Release $CPU_ONLY $PYTHON_ARGS -DCMAKE_INCLUDE_PATH="$CONDA_DIR/include/" -DCMAKE_LIBRARY_PATH="$CONDA_DIR/lib/" $IO_ARGS .. + cmake -DBUILD_python=ON -DCMAKE_BUILD_TYPE=Release $CPU_ONLY $PYTHON_ARGS -DCMAKE_INCLUDE_PATH="$CONDA_DIR/include/" -DCMAKE_LIBRARY_PATH="$CONDA_DIR/lib/" .. $MAKE $MAKE pytest if ! $WITH_CUDA; then @@ -34,11 +28,6 @@ else if ! $WITH_CUDA; then export CPU_ONLY=1 fi - if $WITH_IO; then - export USE_LMDB=1 - export USE_LEVELDB=1 - export USE_OPENCV=1 - fi $MAKE all test pycaffe warn lint || true if ! $WITH_CUDA; then $MAKE runtest diff --git a/scripts/travis/travis_setup_makefile_config.sh b/scripts/travis/travis_setup_makefile_config.sh index 83aacf11fb0..1440be2af8b 100755 --- a/scripts/travis/travis_setup_makefile_config.sh +++ b/scripts/travis/travis_setup_makefile_config.sh @@ -11,12 +11,6 @@ if $WITH_CUDA; then echo "CUDA_ARCH := $GENCODE" >> Makefile.config fi -# Remove IO library settings from Makefile.config -# to avoid conflicts with CI configuration -sed -i -e '/USE_LMDB/d' Makefile.config -sed -i -e '/USE_LEVELDB/d' Makefile.config -sed -i -e '/USE_OPENCV/d' Makefile.config - cat << 'EOF' >> Makefile.config # Travis' nvcc doesn't like newer boost versions NVCCFLAGS := -Xcudafe --diag_suppress=cc_clobber_ignored -Xcudafe --diag_suppress=useless_using_declaration -Xcudafe --diag_suppress=set_but_not_used diff --git a/src/caffe/blob.cpp b/src/caffe/blob.cpp index c86fd5d1d94..8450aa140be 100644 --- a/src/caffe/blob.cpp +++ b/src/caffe/blob.cpp @@ -24,16 +24,11 @@ void Blob::Reshape(const vector& shape) { CHECK_LE(shape.size(), kMaxBlobAxes); count_ = 1; shape_.resize(shape.size()); - if (!shape_data_ || shape_data_->size() < shape.size() * sizeof(int)) { - shape_data_.reset(new SyncedMemory(shape.size() * sizeof(int))); - } - int* shape_data = static_cast(shape_data_->mutable_cpu_data()); for (int i = 0; i < shape.size(); ++i) { CHECK_GE(shape[i], 0); CHECK_LE(shape[i], INT_MAX / count_) << "blob size exceeds INT_MAX"; count_ *= shape[i]; shape_[i] = shape[i]; - shape_data[i] = shape[i]; } if (count_ > capacity_) { capacity_ = count_; @@ -72,12 +67,6 @@ Blob::Blob(const vector& shape) Reshape(shape); } -template -const int* Blob::gpu_shape() const { - CHECK(shape_data_); - return (const int*)shape_data_->gpu_data(); -} - template const Dtype* Blob::cpu_data() const { CHECK(data_); diff --git a/src/caffe/data_transformer.cpp b/src/caffe/data_transformer.cpp index 7189d67e289..4666d9bd881 100644 --- a/src/caffe/data_transformer.cpp +++ b/src/caffe/data_transformer.cpp @@ -1,6 +1,4 @@ -#ifdef USE_OPENCV #include -#endif // USE_OPENCV #include #include @@ -126,13 +124,11 @@ void DataTransformer::Transform(const Datum& datum, } } - template void DataTransformer::Transform(const Datum& datum, Blob* transformed_blob) { // If datum is encoded, decoded and transform the cv::image. if (datum.encoded()) { -#ifdef USE_OPENCV CHECK(!(param_.force_color() && param_.force_gray())) << "cannot set both force_color and force_gray"; cv::Mat cv_img; @@ -144,9 +140,6 @@ void DataTransformer::Transform(const Datum& datum, } // Transform the cv::image into blob. return Transform(cv_img, transformed_blob); -#else - LOG(FATAL) << "Encoded datum requires OpenCV; compile with USE_OPENCV."; -#endif // USE_OPENCV } else { if (param_.force_color() || param_.force_gray()) { LOG(ERROR) << "force_color and force_gray only for encoded datum"; @@ -201,7 +194,6 @@ void DataTransformer::Transform(const vector & datum_vector, } } -#ifdef USE_OPENCV template void DataTransformer::Transform(const vector & mat_vector, Blob* transformed_blob) { @@ -323,7 +315,6 @@ void DataTransformer::Transform(const cv::Mat& cv_img, } } } -#endif // USE_OPENCV template void DataTransformer::Transform(Blob* input_blob, @@ -441,7 +432,6 @@ void DataTransformer::Transform(Blob* input_blob, template vector DataTransformer::InferBlobShape(const Datum& datum) { if (datum.encoded()) { -#ifdef USE_OPENCV CHECK(!(param_.force_color() && param_.force_gray())) << "cannot set both force_color and force_gray"; cv::Mat cv_img; @@ -453,10 +443,8 @@ vector DataTransformer::InferBlobShape(const Datum& datum) { } // InferBlobShape using the cv::image. return InferBlobShape(cv_img); -#else - LOG(FATAL) << "Encoded datum requires OpenCV; compile with USE_OPENCV."; -#endif // USE_OPENCV } + const int crop_size = param_.crop_size(); const int datum_channels = datum.channels(); const int datum_height = datum.height(); @@ -486,7 +474,6 @@ vector DataTransformer::InferBlobShape( return shape; } -#ifdef USE_OPENCV template vector DataTransformer::InferBlobShape(const cv::Mat& cv_img) { const int crop_size = param_.crop_size(); @@ -517,7 +504,6 @@ vector DataTransformer::InferBlobShape( shape[0] = num; return shape; } -#endif // USE_OPENCV template void DataTransformer::InitRand() { diff --git a/src/caffe/layers/base_conv_layer.cpp b/src/caffe/layers/base_conv_layer.cpp index c6b47550292..ccb3adc7e89 100644 --- a/src/caffe/layers/base_conv_layer.cpp +++ b/src/caffe/layers/base_conv_layer.cpp @@ -1,4 +1,3 @@ -#include #include #include "caffe/filler.hpp" @@ -12,97 +11,50 @@ namespace caffe { template void BaseConvolutionLayer::LayerSetUp(const vector*>& bottom, const vector*>& top) { + CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, " + << "corresponding to (num, channels, height, width)"; // Configure the kernel size, padding, stride, and inputs. ConvolutionParameter conv_param = this->layer_param_.convolution_param(); - force_nd_im2col_ = conv_param.force_nd_im2col(); - channel_axis_ = bottom[0]->CanonicalAxisIndex(conv_param.axis()); - const int first_spatial_axis = channel_axis_ + 1; - const int num_axes = bottom[0]->num_axes(); - num_spatial_axes_ = num_axes - first_spatial_axis; - CHECK_GE(num_spatial_axes_, 0); - vector bottom_dim_blob_shape(1, num_spatial_axes_ + 1); - vector spatial_dim_blob_shape(1, std::max(num_spatial_axes_, 1)); - // Setup filter kernel dimensions (kernel_shape_). - kernel_shape_.Reshape(spatial_dim_blob_shape); - int* kernel_shape_data = kernel_shape_.mutable_cpu_data(); - if (conv_param.has_kernel_h() || conv_param.has_kernel_w()) { - CHECK_EQ(num_spatial_axes_, 2) - << "kernel_h & kernel_w can only be used for 2D convolution."; - CHECK_EQ(0, conv_param.kernel_size_size()) - << "Either kernel_size or kernel_h/w should be specified; not both."; - kernel_shape_data[0] = conv_param.kernel_h(); - kernel_shape_data[1] = conv_param.kernel_w(); + CHECK(!conv_param.has_kernel_size() != + !(conv_param.has_kernel_h() && conv_param.has_kernel_w())) + << "Filter size is kernel_size OR kernel_h and kernel_w; not both"; + CHECK(conv_param.has_kernel_size() || + (conv_param.has_kernel_h() && conv_param.has_kernel_w())) + << "For non-square filters both kernel_h and kernel_w are required."; + CHECK((!conv_param.has_pad() && conv_param.has_pad_h() + && conv_param.has_pad_w()) + || (!conv_param.has_pad_h() && !conv_param.has_pad_w())) + << "pad is pad OR pad_h and pad_w are required."; + CHECK((!conv_param.has_stride() && conv_param.has_stride_h() + && conv_param.has_stride_w()) + || (!conv_param.has_stride_h() && !conv_param.has_stride_w())) + << "Stride is stride OR stride_h and stride_w are required."; + if (conv_param.has_kernel_size()) { + kernel_h_ = kernel_w_ = conv_param.kernel_size(); } else { - const int num_kernel_dims = conv_param.kernel_size_size(); - CHECK(num_kernel_dims == 1 || num_kernel_dims == num_spatial_axes_) - << "kernel_size must be specified once, or once per spatial dimension " - << "(kernel_size specified " << num_kernel_dims << " times; " - << num_spatial_axes_ << " spatial dims);"; - for (int i = 0; i < num_spatial_axes_; ++i) { - kernel_shape_data[i] = - conv_param.kernel_size((num_kernel_dims == 1) ? 0 : i); - } + kernel_h_ = conv_param.kernel_h(); + kernel_w_ = conv_param.kernel_w(); } - for (int i = 0; i < num_spatial_axes_; ++i) { - CHECK_GT(kernel_shape_data[i], 0) << "Filter dimensions must be nonzero."; - } - // Setup stride dimensions (stride_). - stride_.Reshape(spatial_dim_blob_shape); - int* stride_data = stride_.mutable_cpu_data(); - if (conv_param.has_stride_h() || conv_param.has_stride_w()) { - CHECK_EQ(num_spatial_axes_, 2) - << "stride_h & stride_w can only be used for 2D convolution."; - CHECK_EQ(0, conv_param.stride_size()) - << "Either stride or stride_h/w should be specified; not both."; - stride_data[0] = conv_param.stride_h(); - stride_data[1] = conv_param.stride_w(); + CHECK_GT(kernel_h_, 0) << "Filter dimensions cannot be zero."; + CHECK_GT(kernel_w_, 0) << "Filter dimensions cannot be zero."; + if (!conv_param.has_pad_h()) { + pad_h_ = pad_w_ = conv_param.pad(); } else { - const int num_stride_dims = conv_param.stride_size(); - CHECK(num_stride_dims == 0 || num_stride_dims == 1 || - num_stride_dims == num_spatial_axes_) - << "stride must be specified once, or once per spatial dimension " - << "(stride specified " << num_stride_dims << " times; " - << num_spatial_axes_ << " spatial dims);"; - const int kDefaultStride = 1; - for (int i = 0; i < num_spatial_axes_; ++i) { - stride_data[i] = (num_stride_dims == 0) ? kDefaultStride : - conv_param.stride((num_stride_dims == 1) ? 0 : i); - CHECK_GT(stride_data[i], 0) << "Stride dimensions must be nonzero."; - } + pad_h_ = conv_param.pad_h(); + pad_w_ = conv_param.pad_w(); } - // Setup pad dimensions (pad_). - pad_.Reshape(spatial_dim_blob_shape); - int* pad_data = pad_.mutable_cpu_data(); - if (conv_param.has_pad_h() || conv_param.has_pad_w()) { - CHECK_EQ(num_spatial_axes_, 2) - << "pad_h & pad_w can only be used for 2D convolution."; - CHECK_EQ(0, conv_param.pad_size()) - << "Either pad or pad_h/w should be specified; not both."; - pad_data[0] = conv_param.pad_h(); - pad_data[1] = conv_param.pad_w(); + if (!conv_param.has_stride_h()) { + stride_h_ = stride_w_ = conv_param.stride(); } else { - const int num_pad_dims = conv_param.pad_size(); - CHECK(num_pad_dims == 0 || num_pad_dims == 1 || - num_pad_dims == num_spatial_axes_) - << "pad must be specified once, or once per spatial dimension " - << "(pad specified " << num_pad_dims << " times; " - << num_spatial_axes_ << " spatial dims);"; - const int kDefaultPad = 0; - for (int i = 0; i < num_spatial_axes_; ++i) { - pad_data[i] = (num_pad_dims == 0) ? kDefaultPad : - conv_param.pad((num_pad_dims == 1) ? 0 : i); - } + stride_h_ = conv_param.stride_h(); + stride_w_ = conv_param.stride_w(); } // Special case: im2col is the identity for 1x1 convolution with stride 1 // and no padding, so flag for skipping the buffer and transformation. - is_1x1_ = true; - for (int i = 0; i < num_spatial_axes_; ++i) { - is_1x1_ &= - kernel_shape_data[i] == 1 && stride_data[i] == 1 && pad_data[i] == 0; - if (!is_1x1_) { break; } - } + is_1x1_ = kernel_w_ == 1 && kernel_h_ == 1 + && stride_h_ == 1 && stride_w_ == 1 && pad_h_ == 0 && pad_w_ == 0; // Configure output channels and groups. - channels_ = bottom[0]->shape(channel_axis_); + channels_ = bottom[0]->channels(); num_output_ = this->layer_param_.convolution_param().num_output(); CHECK_GT(num_output_, 0); group_ = this->layer_param_.convolution_param().group(); @@ -119,29 +71,8 @@ void BaseConvolutionLayer::LayerSetUp(const vector*>& bottom, // Handle the parameters: weights and biases. // - blobs_[0] holds the filter weights // - blobs_[1] holds the biases (optional) - vector weight_shape(2); - weight_shape[0] = conv_out_channels_; - weight_shape[1] = conv_in_channels_ / group_; - for (int i = 0; i < num_spatial_axes_; ++i) { - weight_shape.push_back(kernel_shape_data[i]); - } bias_term_ = this->layer_param_.convolution_param().bias_term(); - vector bias_shape(bias_term_, num_output_); if (this->blobs_.size() > 0) { - CHECK_EQ(1 + bias_term_, this->blobs_.size()) - << "Incorrect number of weight blobs."; - if (weight_shape != this->blobs_[0]->shape()) { - Blob weight_shaped_blob(weight_shape); - LOG(FATAL) << "Incorrect weight shape: expected shape " - << weight_shaped_blob.shape_string() << "; instead, shape was " - << this->blobs_[0]->shape_string(); - } - if (bias_term_ && bias_shape != this->blobs_[1]->shape()) { - Blob bias_shaped_blob(bias_shape); - LOG(FATAL) << "Incorrect bias shape: expected shape " - << bias_shaped_blob.shape_string() << "; instead, shape was " - << this->blobs_[1]->shape_string(); - } LOG(INFO) << "Skipping parameter initialization"; } else { if (bias_term_) { @@ -151,20 +82,20 @@ void BaseConvolutionLayer::LayerSetUp(const vector*>& bottom, } // Initialize and fill the weights: // output channels x input channels per-group x kernel height x kernel width - this->blobs_[0].reset(new Blob(weight_shape)); + this->blobs_[0].reset(new Blob( + conv_out_channels_, conv_in_channels_ / group_, kernel_h_, kernel_w_)); shared_ptr > weight_filler(GetFiller( this->layer_param_.convolution_param().weight_filler())); weight_filler->Fill(this->blobs_[0].get()); // If necessary, initialize and fill the biases. if (bias_term_) { + vector bias_shape(1, num_output_); this->blobs_[1].reset(new Blob(bias_shape)); shared_ptr > bias_filler(GetFiller( this->layer_param_.convolution_param().bias_filler())); bias_filler->Fill(this->blobs_[1].get()); } } - kernel_dim_ = this->blobs_[0]->count(1); - weight_offset_ = conv_out_channels_ * kernel_dim_ / group_; // Propagate gradients to the parameters (as directed by backward pass). this->param_propagate_down_.resize(this->blobs_.size(), true); } @@ -172,68 +103,52 @@ void BaseConvolutionLayer::LayerSetUp(const vector*>& bottom, template void BaseConvolutionLayer::Reshape(const vector*>& bottom, const vector*>& top) { - const int first_spatial_axis = channel_axis_ + 1; - CHECK_EQ(bottom[0]->num_axes(), first_spatial_axis + num_spatial_axes_) - << "bottom num_axes may not change."; - num_ = bottom[0]->count(0, channel_axis_); - CHECK_EQ(bottom[0]->shape(channel_axis_), channels_) - << "Input size incompatible with convolution kernel."; + CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, " + << "corresponding to (num, channels, height, width)"; + num_ = bottom[0]->num(); + height_ = bottom[0]->height(); + width_ = bottom[0]->width(); + CHECK_EQ(bottom[0]->channels(), channels_) << "Input size incompatible with" + " convolution kernel."; // TODO: generalize to handle inputs of different shapes. for (int bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) { - CHECK(bottom[0]->shape() == bottom[bottom_id]->shape()) - << "All inputs must have the same shape."; + CHECK_EQ(num_, bottom[bottom_id]->num()) << "Inputs must have same num."; + CHECK_EQ(channels_, bottom[bottom_id]->channels()) + << "Inputs must have same channels."; + CHECK_EQ(height_, bottom[bottom_id]->height()) + << "Inputs must have same height."; + CHECK_EQ(width_, bottom[bottom_id]->width()) + << "Inputs must have same width."; } // Shape the tops. - bottom_shape_ = &bottom[0]->shape(); compute_output_shape(); - vector top_shape(bottom[0]->shape().begin(), - bottom[0]->shape().begin() + channel_axis_); - top_shape.push_back(num_output_); - for (int i = 0; i < num_spatial_axes_; ++i) { - top_shape.push_back(output_shape_[i]); - } for (int top_id = 0; top_id < top.size(); ++top_id) { - top[top_id]->Reshape(top_shape); + top[top_id]->Reshape(num_, num_output_, height_out_, width_out_); } if (reverse_dimensions()) { - conv_out_spatial_dim_ = bottom[0]->count(first_spatial_axis); + conv_in_height_ = height_out_; + conv_in_width_ = width_out_; + conv_out_spatial_dim_ = height_ * width_; } else { - conv_out_spatial_dim_ = top[0]->count(first_spatial_axis); + conv_in_height_ = height_; + conv_in_width_ = width_; + conv_out_spatial_dim_ = height_out_ * width_out_; } - col_offset_ = kernel_dim_ * conv_out_spatial_dim_; + kernel_dim_ = conv_in_channels_ * kernel_h_ * kernel_w_; + weight_offset_ = conv_out_channels_ * kernel_dim_ / group_ / group_; + col_offset_ = kernel_dim_ * conv_out_spatial_dim_ / group_; output_offset_ = conv_out_channels_ * conv_out_spatial_dim_ / group_; - // Setup input dimensions (conv_input_shape_). - vector bottom_dim_blob_shape(1, num_spatial_axes_ + 1); - conv_input_shape_.Reshape(bottom_dim_blob_shape); - int* conv_input_shape_data = conv_input_shape_.mutable_cpu_data(); - for (int i = 0; i < num_spatial_axes_ + 1; ++i) { - if (reverse_dimensions()) { - conv_input_shape_data[i] = top[0]->shape(channel_axis_ + i); - } else { - conv_input_shape_data[i] = bottom[0]->shape(channel_axis_ + i); - } - } // The im2col result buffer will only hold one image at a time to avoid // overly large memory usage. In the special case of 1x1 convolution // it goes lazily unused to save memory. - col_buffer_shape_.clear(); - col_buffer_shape_.push_back(kernel_dim_ * group_); - for (int i = 0; i < num_spatial_axes_; ++i) { - if (reverse_dimensions()) { - col_buffer_shape_.push_back(input_shape(i + 1)); - } else { - col_buffer_shape_.push_back(output_shape_[i]); - } + if (reverse_dimensions()) { + col_buffer_.Reshape(1, kernel_dim_, height_, width_); + } else { + col_buffer_.Reshape(1, kernel_dim_, height_out_, width_out_); } - col_buffer_.Reshape(col_buffer_shape_); - bottom_dim_ = bottom[0]->count(channel_axis_); - top_dim_ = top[0]->count(channel_axis_); - num_kernels_im2col_ = conv_in_channels_ * conv_out_spatial_dim_; - num_kernels_col2im_ = reverse_dimensions() ? top_dim_ : bottom_dim_; // Set up the all ones "bias multiplier" for adding biases by BLAS - out_spatial_dim_ = top[0]->count(first_spatial_axis); if (bias_term_) { - vector bias_multiplier_shape(1, out_spatial_dim_); + vector bias_multiplier_shape(1, height_out_ * width_out_); bias_multiplier_.Reshape(bias_multiplier_shape); caffe_set(bias_multiplier_.count(), Dtype(1), bias_multiplier_.mutable_cpu_data()); @@ -252,7 +167,7 @@ void BaseConvolutionLayer::forward_cpu_gemm(const Dtype* input, } for (int g = 0; g < group_; ++g) { caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, conv_out_channels_ / - group_, conv_out_spatial_dim_, kernel_dim_, + group_, conv_out_spatial_dim_, kernel_dim_ / group_, (Dtype)1., weights + weight_offset_ * g, col_buff + col_offset_ * g, (Dtype)0., output + output_offset_ * g); } @@ -262,7 +177,7 @@ template void BaseConvolutionLayer::forward_cpu_bias(Dtype* output, const Dtype* bias) { caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num_output_, - out_spatial_dim_, 1, (Dtype)1., bias, bias_multiplier_.cpu_data(), + height_out_ * width_out_, 1, (Dtype)1., bias, bias_multiplier_.cpu_data(), (Dtype)1., output); } @@ -274,7 +189,7 @@ void BaseConvolutionLayer::backward_cpu_gemm(const Dtype* output, col_buff = input; } for (int g = 0; g < group_; ++g) { - caffe_cpu_gemm(CblasTrans, CblasNoTrans, kernel_dim_, + caffe_cpu_gemm(CblasTrans, CblasNoTrans, kernel_dim_ / group_, conv_out_spatial_dim_, conv_out_channels_ / group_, (Dtype)1., weights + weight_offset_ * g, output + output_offset_ * g, (Dtype)0., col_buff + col_offset_ * g); @@ -294,7 +209,7 @@ void BaseConvolutionLayer::weight_cpu_gemm(const Dtype* input, } for (int g = 0; g < group_; ++g) { caffe_cpu_gemm(CblasNoTrans, CblasTrans, conv_out_channels_ / group_, - kernel_dim_, conv_out_spatial_dim_, + kernel_dim_ / group_, conv_out_spatial_dim_, (Dtype)1., output + output_offset_ * g, col_buff + col_offset_ * g, (Dtype)1., weights + weight_offset_ * g); } @@ -303,7 +218,7 @@ void BaseConvolutionLayer::weight_cpu_gemm(const Dtype* input, template void BaseConvolutionLayer::backward_cpu_bias(Dtype* bias, const Dtype* input) { - caffe_cpu_gemv(CblasNoTrans, num_output_, out_spatial_dim_, 1., + caffe_cpu_gemv(CblasNoTrans, num_output_, height_out_ * width_out_, 1., input, bias_multiplier_.cpu_data(), 1., bias); } @@ -321,7 +236,7 @@ void BaseConvolutionLayer::forward_gpu_gemm(const Dtype* input, } for (int g = 0; g < group_; ++g) { caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, conv_out_channels_ / - group_, conv_out_spatial_dim_, kernel_dim_, + group_, conv_out_spatial_dim_, kernel_dim_ / group_, (Dtype)1., weights + weight_offset_ * g, col_buff + col_offset_ * g, (Dtype)0., output + output_offset_ * g); } @@ -331,7 +246,7 @@ template void BaseConvolutionLayer::forward_gpu_bias(Dtype* output, const Dtype* bias) { caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num_output_, - out_spatial_dim_, 1, (Dtype)1., bias, bias_multiplier_.gpu_data(), + height_out_ * width_out_, 1, (Dtype)1., bias, bias_multiplier_.gpu_data(), (Dtype)1., output); } @@ -343,7 +258,7 @@ void BaseConvolutionLayer::backward_gpu_gemm(const Dtype* output, col_buff = input; } for (int g = 0; g < group_; ++g) { - caffe_gpu_gemm(CblasTrans, CblasNoTrans, kernel_dim_, + caffe_gpu_gemm(CblasTrans, CblasNoTrans, kernel_dim_ / group_, conv_out_spatial_dim_, conv_out_channels_ / group_, (Dtype)1., weights + weight_offset_ * g, output + output_offset_ * g, (Dtype)0., col_buff + col_offset_ * g); @@ -363,7 +278,7 @@ void BaseConvolutionLayer::weight_gpu_gemm(const Dtype* input, } for (int g = 0; g < group_; ++g) { caffe_gpu_gemm(CblasNoTrans, CblasTrans, conv_out_channels_ / group_, - kernel_dim_, conv_out_spatial_dim_, + kernel_dim_ / group_, conv_out_spatial_dim_, (Dtype)1., output + output_offset_ * g, col_buff + col_offset_ * g, (Dtype)1., weights + weight_offset_ * g); } @@ -372,7 +287,7 @@ void BaseConvolutionLayer::weight_gpu_gemm(const Dtype* input, template void BaseConvolutionLayer::backward_gpu_bias(Dtype* bias, const Dtype* input) { - caffe_gpu_gemv(CblasNoTrans, num_output_, out_spatial_dim_, 1., + caffe_gpu_gemv(CblasNoTrans, num_output_, height_out_ * width_out_, 1., input, bias_multiplier_.gpu_data(), 1., bias); } diff --git a/src/caffe/layers/concat_layer.cpp b/src/caffe/layers/concat_layer.cpp index 86b500de859..95fba105b9a 100644 --- a/src/caffe/layers/concat_layer.cpp +++ b/src/caffe/layers/concat_layer.cpp @@ -48,16 +48,11 @@ void ConcatLayer::Reshape(const vector*>& bottom, } top[0]->Reshape(top_shape); CHECK_EQ(bottom_count_sum, top[0]->count()); - if (bottom.size() == 1) { - top[0]->ShareData(*bottom[0]); - top[0]->ShareDiff(*bottom[0]); - } } template void ConcatLayer::Forward_cpu(const vector*>& bottom, const vector*>& top) { - if (bottom.size() == 1) { return; } Dtype* top_data = top[0]->mutable_cpu_data(); int offset_concat_axis = 0; const int top_concat_axis = top[0]->shape(concat_axis_); @@ -77,7 +72,6 @@ void ConcatLayer::Forward_cpu(const vector*>& bottom, template void ConcatLayer::Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { - if (bottom.size() == 1) { return; } const Dtype* top_diff = top[0]->cpu_diff(); int offset_concat_axis = 0; const int top_concat_axis = top[0]->shape(concat_axis_); diff --git a/src/caffe/layers/concat_layer.cu b/src/caffe/layers/concat_layer.cu index 617701e2621..3c64c7ef224 100644 --- a/src/caffe/layers/concat_layer.cu +++ b/src/caffe/layers/concat_layer.cu @@ -28,7 +28,6 @@ __global__ void Concat(const int nthreads, const Dtype* in_data, template void ConcatLayer::Forward_gpu(const vector*>& bottom, const vector*>& top) { - if (bottom.size() == 1) { return; } Dtype* top_data = top[0]->mutable_gpu_data(); int offset_concat_axis = 0; const int top_concat_axis = top[0]->shape(concat_axis_); @@ -49,7 +48,6 @@ void ConcatLayer::Forward_gpu(const vector*>& bottom, template void ConcatLayer::Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { - if (bottom.size() == 1) { return; } const Dtype* top_diff = top[0]->gpu_diff(); int offset_concat_axis = 0; const int top_concat_axis = top[0]->shape(concat_axis_); diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp index fb50bb095ed..928ef5ee468 100644 --- a/src/caffe/layers/conv_layer.cpp +++ b/src/caffe/layers/conv_layer.cpp @@ -10,17 +10,10 @@ namespace caffe { template void ConvolutionLayer::compute_output_shape() { - const int* kernel_shape_data = this->kernel_shape_.cpu_data(); - const int* stride_data = this->stride_.cpu_data(); - const int* pad_data = this->pad_.cpu_data(); - this->output_shape_.clear(); - for (int i = 0; i < this->num_spatial_axes_; ++i) { - // i + 1 to skip channel axis - const int input_dim = this->input_shape(i + 1); - const int output_dim = (input_dim + 2 * pad_data[i] - kernel_shape_data[i]) - / stride_data[i] + 1; - this->output_shape_.push_back(output_dim); - } + this->height_out_ = (this->height_ + 2 * this->pad_h_ - this->kernel_h_) + / this->stride_h_ + 1; + this->width_out_ = (this->width_ + 2 * this->pad_w_ - this->kernel_w_) + / this->stride_w_ + 1; } template @@ -31,11 +24,11 @@ void ConvolutionLayer::Forward_cpu(const vector*>& bottom, const Dtype* bottom_data = bottom[i]->cpu_data(); Dtype* top_data = top[i]->mutable_cpu_data(); for (int n = 0; n < this->num_; ++n) { - this->forward_cpu_gemm(bottom_data + n * this->bottom_dim_, weight, - top_data + n * this->top_dim_); + this->forward_cpu_gemm(bottom_data + bottom[i]->offset(n), weight, + top_data + top[i]->offset(n)); if (this->bias_term_) { const Dtype* bias = this->blobs_[1]->cpu_data(); - this->forward_cpu_bias(top_data + n * this->top_dim_, bias); + this->forward_cpu_bias(top_data + top[i]->offset(n), bias); } } } @@ -54,20 +47,20 @@ void ConvolutionLayer::Backward_cpu(const vector*>& top, if (this->bias_term_ && this->param_propagate_down_[1]) { Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff(); for (int n = 0; n < this->num_; ++n) { - this->backward_cpu_bias(bias_diff, top_diff + n * this->top_dim_); + this->backward_cpu_bias(bias_diff, top_diff + top[i]->offset(n)); } } if (this->param_propagate_down_[0] || propagate_down[i]) { for (int n = 0; n < this->num_; ++n) { // gradient w.r.t. weight. Note that we will accumulate diffs. if (this->param_propagate_down_[0]) { - this->weight_cpu_gemm(bottom_data + n * this->bottom_dim_, - top_diff + n * this->top_dim_, weight_diff); + this->weight_cpu_gemm(bottom_data + bottom[i]->offset(n), + top_diff + top[i]->offset(n), weight_diff); } // gradient w.r.t. bottom data, if necessary. if (propagate_down[i]) { - this->backward_cpu_gemm(top_diff + n * this->top_dim_, weight, - bottom_diff + n * this->bottom_dim_); + this->backward_cpu_gemm(top_diff + top[i]->offset(n), weight, + bottom_diff + bottom[i]->offset(n)); } } } diff --git a/src/caffe/layers/conv_layer.cu b/src/caffe/layers/conv_layer.cu index b429d2b47d0..b8a98ff7cc9 100644 --- a/src/caffe/layers/conv_layer.cu +++ b/src/caffe/layers/conv_layer.cu @@ -16,11 +16,11 @@ void ConvolutionLayer::Forward_gpu(const vector*>& bottom, const Dtype* bottom_data = bottom[i]->gpu_data(); Dtype* top_data = top[i]->mutable_gpu_data(); for (int n = 0; n < this->num_; ++n) { - this->forward_gpu_gemm(bottom_data + n * this->bottom_dim_, weight, - top_data + n * this->top_dim_); + this->forward_gpu_gemm(bottom_data + bottom[i]->offset(n), weight, + top_data + top[i]->offset(n)); if (this->bias_term_) { const Dtype* bias = this->blobs_[1]->gpu_data(); - this->forward_gpu_bias(top_data + n * this->top_dim_, bias); + this->forward_gpu_bias(top_data + top[i]->offset(n), bias); } } } @@ -37,7 +37,7 @@ void ConvolutionLayer::Backward_gpu(const vector*>& top, if (this->bias_term_ && this->param_propagate_down_[1]) { Dtype* bias_diff = this->blobs_[1]->mutable_gpu_diff(); for (int n = 0; n < this->num_; ++n) { - this->backward_gpu_bias(bias_diff, top_diff + n * this->top_dim_); + this->backward_gpu_bias(bias_diff, top_diff + top[i]->offset(n)); } } if (this->param_propagate_down_[0] || propagate_down[i]) { @@ -46,13 +46,13 @@ void ConvolutionLayer::Backward_gpu(const vector*>& top, for (int n = 0; n < this->num_; ++n) { // gradient w.r.t. weight. Note that we will accumulate diffs. if (this->param_propagate_down_[0]) { - this->weight_gpu_gemm(bottom_data + n * this->bottom_dim_, - top_diff + n * this->top_dim_, weight_diff); + this->weight_gpu_gemm(bottom_data + bottom[i]->offset(n), + top_diff + top[i]->offset(n), weight_diff); } // gradient w.r.t. bottom data, if necessary. if (propagate_down[i]) { - this->backward_gpu_gemm(top_diff + n * this->top_dim_, weight, - bottom_diff + n * this->bottom_dim_); + this->backward_gpu_gemm(top_diff + top[i]->offset(n), weight, + bottom_diff + bottom[i]->offset(n)); } } } diff --git a/src/caffe/layers/cudnn_conv_layer.cpp b/src/caffe/layers/cudnn_conv_layer.cpp index 3514fe2aba5..104d2b9d669 100644 --- a/src/caffe/layers/cudnn_conv_layer.cpp +++ b/src/caffe/layers/cudnn_conv_layer.cpp @@ -34,15 +34,14 @@ void CuDNNConvolutionLayer::LayerSetUp( } // Set the indexing parameters. + weight_offset_ = (this->num_output_ / this->group_) + * (this->channels_ / this->group_) * this->kernel_h_ * this->kernel_w_; bias_offset_ = (this->num_output_ / this->group_); // Create filter descriptor. - const int* kernel_shape_data = this->kernel_shape_.cpu_data(); - const int kernel_h = kernel_shape_data[0]; - const int kernel_w = kernel_shape_data[1]; cudnn::createFilterDesc(&filter_desc_, this->num_output_ / this->group_, this->channels_ / this->group_, - kernel_h, kernel_w); + this->kernel_h_, this->kernel_w_); // Create tensor descriptor(s) for data and corresponding convolution(s). for (int i = 0; i < bottom.size(); i++) { @@ -69,36 +68,29 @@ template void CuDNNConvolutionLayer::Reshape( const vector*>& bottom, const vector*>& top) { ConvolutionLayer::Reshape(bottom, top); - CHECK_EQ(2, this->num_spatial_axes_) - << "CuDNNConvolution input must have 2 spatial axes " - << "(e.g., height and width). " - << "Use 'engine: CAFFE' for general ND convolution."; - bottom_offset_ = this->bottom_dim_ / this->group_; - top_offset_ = this->top_dim_ / this->group_; - const int height = bottom[0]->shape(this->channel_axis_ + 1); - const int width = bottom[0]->shape(this->channel_axis_ + 2); - const int height_out = top[0]->shape(this->channel_axis_ + 1); - const int width_out = top[0]->shape(this->channel_axis_ + 2); - const int* pad_data = this->pad_.cpu_data(); - const int pad_h = pad_data[0]; - const int pad_w = pad_data[1]; - const int* stride_data = this->stride_.cpu_data(); - const int stride_h = stride_data[0]; - const int stride_w = stride_data[1]; + bottom_offset_ = (this->channels_ / this->group_) + * this->height_ * this->width_; + top_offset_ = (this->num_output_ / this->group_) + * this->height_out_ * this->width_out_; for (int i = 0; i < bottom.size(); i++) { cudnn::setTensor4dDesc(&bottom_descs_[i], this->num_, - this->channels_ / this->group_, height, width, - this->channels_ * height * width, - height * width, width, 1); + this->channels_ / this->group_, + this->height_, this->width_, + this->channels_ * this->height_ * this->width_, + this->height_ * this->width_, + this->width_, 1); cudnn::setTensor4dDesc(&top_descs_[i], this->num_, - this->num_output_ / this->group_, height_out, width_out, - this->num_output_ * this->out_spatial_dim_, - this->out_spatial_dim_, width_out, 1); + this->num_output_ / this->group_, + this->height_out_, this->width_out_, + this->num_output_ * this->height_out_ * this->width_out_, + this->height_out_ * this->width_out_, + this->width_out_, 1); cudnn::setConvolutionDesc(&conv_descs_[i], bottom_descs_[i], - filter_desc_, pad_h, pad_w, stride_h, stride_w); + filter_desc_, this->pad_h_, this->pad_w_, + this->stride_h_, this->stride_w_); } // Tensor descriptor for bias. diff --git a/src/caffe/layers/cudnn_conv_layer.cu b/src/caffe/layers/cudnn_conv_layer.cu index 691152021a3..b4e802e13d1 100644 --- a/src/caffe/layers/cudnn_conv_layer.cu +++ b/src/caffe/layers/cudnn_conv_layer.cu @@ -14,15 +14,15 @@ __global__ void sync_conv_groups() { } template void CuDNNConvolutionLayer::Forward_gpu( const vector*>& bottom, const vector*>& top) { - const int* kernel_shape_data = this->kernel_shape_.cpu_data(); - const int kernel_h = kernel_shape_data[0]; - const int kernel_w = kernel_shape_data[1]; - const size_t workspace_limit_bytes = - kernel_h * kernel_w * this->channels_ * sizeof(int) + 1; - const Dtype* weight = this->blobs_[0]->gpu_data(); for (int i = 0; i < bottom.size(); ++i) { const Dtype* bottom_data = bottom[i]->gpu_data(); Dtype* top_data = top[i]->mutable_gpu_data(); + const Dtype* weight = this->blobs_[0]->gpu_data(); + + size_t workspace_limit_bytes = this->kernel_h_ * + this->kernel_w_ * + this->channels_ * + sizeof(int) + 1; // Forward through cuDNN in parallel over groups. for (int g = 0; g < this->group_; g++) { @@ -69,7 +69,7 @@ void CuDNNConvolutionLayer::Forward_gpu( CUDNN_CHECK(cudnnConvolutionForward(handle_[g], cudnn::dataType::one, bottom_descs_[i], bottom_data + bottom_offset_ * g, - filter_desc_, weight + this->weight_offset_ * g, + filter_desc_, weight + weight_offset_ * g, conv_descs_[i], algo, workspace, workspaceSizeInBytes, cudnn::dataType::zero, @@ -128,7 +128,7 @@ void CuDNNConvolutionLayer::Backward_gpu(const vector*>& top, top_descs_[i], top_diff + top_offset_ * g, conv_descs_[i], cudnn::dataType::one, - filter_desc_, weight_diff + this->weight_offset_ * g)); + filter_desc_, weight_diff + weight_offset_ * g)); } // Gradient w.r.t. bottom data. @@ -139,7 +139,7 @@ void CuDNNConvolutionLayer::Backward_gpu(const vector*>& top, Dtype* bottom_diff = bottom[i]->mutable_gpu_diff(); CUDNN_CHECK(cudnnConvolutionBackwardData(handle_[2*this->group_ + g], cudnn::dataType::one, - filter_desc_, weight + this->weight_offset_ * g, + filter_desc_, weight + weight_offset_ * g, top_descs_[i], top_diff + top_offset_ * g, conv_descs_[i], cudnn::dataType::zero, diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp index 71f8cb099e8..0932d9feff3 100644 --- a/src/caffe/layers/data_layer.cpp +++ b/src/caffe/layers/data_layer.cpp @@ -1,6 +1,5 @@ -#ifdef USE_OPENCV #include -#endif // USE_OPENCV + #include #include diff --git a/src/caffe/layers/deconv_layer.cpp b/src/caffe/layers/deconv_layer.cpp index 91aabb315b2..a4612963b6b 100644 --- a/src/caffe/layers/deconv_layer.cpp +++ b/src/caffe/layers/deconv_layer.cpp @@ -10,17 +10,10 @@ namespace caffe { template void DeconvolutionLayer::compute_output_shape() { - const int* kernel_shape_data = this->kernel_shape_.cpu_data(); - const int* stride_data = this->stride_.cpu_data(); - const int* pad_data = this->pad_.cpu_data(); - this->output_shape_.clear(); - for (int i = 0; i < this->num_spatial_axes_; ++i) { - // i + 1 to skip channel axis - const int input_dim = this->input_shape(i + 1); - const int output_dim = stride_data[i] * (input_dim - 1) - + kernel_shape_data[i] - 2 * pad_data[i]; - this->output_shape_.push_back(output_dim); - } + this->height_out_ = this->stride_h_ * (this->height_ - 1) + this->kernel_h_ + - 2 * this->pad_h_; + this->width_out_ = this->stride_w_ * (this->width_ - 1) + this->kernel_w_ + - 2 * this->pad_w_; } template @@ -31,11 +24,11 @@ void DeconvolutionLayer::Forward_cpu(const vector*>& bottom, const Dtype* bottom_data = bottom[i]->cpu_data(); Dtype* top_data = top[i]->mutable_cpu_data(); for (int n = 0; n < this->num_; ++n) { - this->backward_cpu_gemm(bottom_data + n * this->bottom_dim_, weight, - top_data + n * this->top_dim_); + this->backward_cpu_gemm(bottom_data + bottom[i]->offset(n), weight, + top_data + top[i]->offset(n)); if (this->bias_term_) { const Dtype* bias = this->blobs_[1]->cpu_data(); - this->forward_cpu_bias(top_data + n * this->top_dim_, bias); + this->forward_cpu_bias(top_data + top[i]->offset(n), bias); } } } @@ -54,21 +47,21 @@ void DeconvolutionLayer::Backward_cpu(const vector*>& top, if (this->bias_term_ && this->param_propagate_down_[1]) { Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff(); for (int n = 0; n < this->num_; ++n) { - this->backward_cpu_bias(bias_diff, top_diff + n * this->top_dim_); + this->backward_cpu_bias(bias_diff, top_diff + top[i]->offset(n)); } } if (this->param_propagate_down_[0] || propagate_down[i]) { for (int n = 0; n < this->num_; ++n) { // Gradient w.r.t. weight. Note that we will accumulate diffs. if (this->param_propagate_down_[0]) { - this->weight_cpu_gemm(top_diff + n * this->top_dim_, - bottom_data + n * this->bottom_dim_, weight_diff); + this->weight_cpu_gemm(top_diff + top[i]->offset(n), + bottom_data + bottom[i]->offset(n), weight_diff); } // Gradient w.r.t. bottom data, if necessary, reusing the column buffer // we might have just computed above. if (propagate_down[i]) { - this->forward_cpu_gemm(top_diff + n * this->top_dim_, weight, - bottom_diff + n * this->bottom_dim_, + this->forward_cpu_gemm(top_diff + top[i]->offset(n), weight, + bottom_diff + bottom[i]->offset(n), this->param_propagate_down_[0]); } } diff --git a/src/caffe/layers/deconv_layer.cu b/src/caffe/layers/deconv_layer.cu index 5dbdcc3149f..8a1eed8aa16 100644 --- a/src/caffe/layers/deconv_layer.cu +++ b/src/caffe/layers/deconv_layer.cu @@ -16,11 +16,11 @@ void DeconvolutionLayer::Forward_gpu(const vector*>& bottom, const Dtype* bottom_data = bottom[i]->gpu_data(); Dtype* top_data = top[i]->mutable_gpu_data(); for (int n = 0; n < this->num_; ++n) { - this->backward_gpu_gemm(bottom_data + n * this->bottom_dim_, weight, - top_data + n * this->top_dim_); + this->backward_gpu_gemm(bottom_data + bottom[i]->offset(n), weight, + top_data + top[i]->offset(n)); if (this->bias_term_) { const Dtype* bias = this->blobs_[1]->gpu_data(); - this->forward_gpu_bias(top_data + n * this->top_dim_, bias); + this->forward_gpu_bias(top_data + top[i]->offset(n), bias); } } } @@ -39,20 +39,20 @@ void DeconvolutionLayer::Backward_gpu(const vector*>& top, if (this->bias_term_ && this->param_propagate_down_[1]) { Dtype* bias_diff = this->blobs_[1]->mutable_gpu_diff(); for (int n = 0; n < this->num_; ++n) { - this->backward_gpu_bias(bias_diff, top_diff + n * this->top_dim_); + this->backward_gpu_bias(bias_diff, top_diff + top[i]->offset(n)); } } if (this->param_propagate_down_[0] || propagate_down[i]) { for (int n = 0; n < this->num_; ++n) { // gradient w.r.t. weight. Note that we will accumulate diffs. if (this->param_propagate_down_[0]) { - this->weight_gpu_gemm(top_diff + n * this->top_dim_, - bottom_data + n * this->bottom_dim_, weight_diff); + this->weight_gpu_gemm(top_diff + top[i]->offset(n), + bottom_data + bottom[i]->offset(n), weight_diff); } // gradient w.r.t. bottom data, if necessary. if (propagate_down[i]) { - this->forward_gpu_gemm(top_diff + n * this->top_dim_, weight, - bottom_diff + n * this->bottom_dim_, + this->forward_gpu_gemm(top_diff + top[i]->offset(n), weight, + bottom_diff + bottom[i]->offset(n), this->param_propagate_down_[0]); } } diff --git a/src/caffe/layers/im2col_layer.cpp b/src/caffe/layers/im2col_layer.cpp index 595c9dbbe5e..1c802714e33 100644 --- a/src/caffe/layers/im2col_layer.cpp +++ b/src/caffe/layers/im2col_layer.cpp @@ -11,106 +11,54 @@ template void Im2colLayer::LayerSetUp(const vector*>& bottom, const vector*>& top) { ConvolutionParameter conv_param = this->layer_param_.convolution_param(); - force_nd_im2col_ = conv_param.force_nd_im2col(); - const int input_num_dims = bottom[0]->shape().size(); - channel_axis_ = bottom[0]->CanonicalAxisIndex(conv_param.axis()); - const int first_spatial_dim = channel_axis_ + 1; - num_spatial_axes_ = input_num_dims - first_spatial_dim; - CHECK_GE(num_spatial_axes_, 1); - vector dim_blob_shape(1, num_spatial_axes_); - // Setup filter kernel dimensions (kernel_shape_). - kernel_shape_.Reshape(dim_blob_shape); - int* kernel_shape_data = kernel_shape_.mutable_cpu_data(); - if (conv_param.has_kernel_h() || conv_param.has_kernel_w()) { - CHECK_EQ(num_spatial_axes_, 2) - << "kernel_h & kernel_w can only be used for 2D convolution."; - CHECK_EQ(0, conv_param.kernel_size_size()) - << "Either kernel_size or kernel_h/w should be specified; not both."; - kernel_shape_data[0] = conv_param.kernel_h(); - kernel_shape_data[1] = conv_param.kernel_w(); + CHECK(!conv_param.has_kernel_size() != + !(conv_param.has_kernel_h() && conv_param.has_kernel_w())) + << "Filter size is kernel_size OR kernel_h and kernel_w; not both"; + CHECK(conv_param.has_kernel_size() || + (conv_param.has_kernel_h() && conv_param.has_kernel_w())) + << "For non-square filters both kernel_h and kernel_w are required."; + CHECK((!conv_param.has_pad() && conv_param.has_pad_h() + && conv_param.has_pad_w()) + || (!conv_param.has_pad_h() && !conv_param.has_pad_w())) + << "pad is pad OR pad_h and pad_w are required."; + CHECK((!conv_param.has_stride() && conv_param.has_stride_h() + && conv_param.has_stride_w()) + || (!conv_param.has_stride_h() && !conv_param.has_stride_w())) + << "Stride is stride OR stride_h and stride_w are required."; + if (conv_param.has_kernel_size()) { + kernel_h_ = kernel_w_ = conv_param.kernel_size(); } else { - const int num_kernel_dims = conv_param.kernel_size_size(); - CHECK(num_kernel_dims == 1 || num_kernel_dims == num_spatial_axes_) - << "kernel_size must be specified once, or once per spatial dimension " - << "(kernel_size specified " << num_kernel_dims << " times; " - << num_spatial_axes_ << " spatial dims);"; - for (int i = 0; i < num_spatial_axes_; ++i) { - kernel_shape_data[i] = - conv_param.kernel_size((num_kernel_dims == 1) ? 0 : i); - } + kernel_h_ = conv_param.kernel_h(); + kernel_w_ = conv_param.kernel_w(); } - for (int i = 0; i < num_spatial_axes_; ++i) { - CHECK_GT(kernel_shape_data[i], 0) << "Filter dimensions must be nonzero."; - } - // Setup stride dimensions (stride_). - stride_.Reshape(dim_blob_shape); - int* stride_data = stride_.mutable_cpu_data(); - if (conv_param.has_stride_h() || conv_param.has_stride_w()) { - CHECK_EQ(num_spatial_axes_, 2) - << "stride_h & stride_w can only be used for 2D convolution."; - CHECK_EQ(0, conv_param.stride_size()) - << "Either stride or stride_h/w should be specified; not both."; - stride_data[0] = conv_param.stride_h(); - stride_data[1] = conv_param.stride_w(); + CHECK_GT(kernel_h_, 0) << "Filter dimensions cannot be zero."; + CHECK_GT(kernel_w_, 0) << "Filter dimensions cannot be zero."; + if (!conv_param.has_pad_h()) { + pad_h_ = pad_w_ = conv_param.pad(); } else { - const int num_stride_dims = conv_param.stride_size(); - CHECK(num_stride_dims == 0 || num_stride_dims == 1 || - num_stride_dims == num_spatial_axes_) - << "stride must be specified once, or once per spatial dimension " - << "(stride specified " << num_stride_dims << " times; " - << num_spatial_axes_ << " spatial dims);"; - const int kDefaultStride = 1; - for (int i = 0; i < num_spatial_axes_; ++i) { - stride_data[i] = (num_stride_dims == 0) ? kDefaultStride : - conv_param.stride((num_stride_dims == 1) ? 0 : i); - CHECK_GT(stride_data[i], 0) << "Stride dimensions must be nonzero."; - } + pad_h_ = conv_param.pad_h(); + pad_w_ = conv_param.pad_w(); } - // Setup pad dimensions (pad_). - pad_.Reshape(dim_blob_shape); - int* pad_data = pad_.mutable_cpu_data(); - if (conv_param.has_pad_h() || conv_param.has_pad_w()) { - CHECK_EQ(num_spatial_axes_, 2) - << "pad_h & pad_w can only be used for 2D convolution."; - CHECK_EQ(0, conv_param.pad_size()) - << "Either pad or pad_h/w should be specified; not both."; - pad_data[0] = conv_param.pad_h(); - pad_data[1] = conv_param.pad_w(); + if (!conv_param.has_stride_h()) { + stride_h_ = stride_w_ = conv_param.stride(); } else { - const int num_pad_dims = conv_param.pad_size(); - CHECK(num_pad_dims == 0 || num_pad_dims == 1 || - num_pad_dims == num_spatial_axes_) - << "pad must be specified once, or once per spatial dimension " - << "(pad specified " << num_pad_dims << " times; " - << num_spatial_axes_ << " spatial dims);"; - const int kDefaultPad = 0; - for (int i = 0; i < num_spatial_axes_; ++i) { - pad_data[i] = (num_pad_dims == 0) ? kDefaultPad : - conv_param.pad((num_pad_dims == 1) ? 0 : i); - } + stride_h_ = conv_param.stride_h(); + stride_w_ = conv_param.stride_w(); } } template void Im2colLayer::Reshape(const vector*>& bottom, const vector*>& top) { - vector top_shape = bottom[0]->shape(); - const int* kernel_shape_data = kernel_shape_.cpu_data(); - const int* stride_data = stride_.cpu_data(); - const int* pad_data = pad_.cpu_data(); - for (int i = 0; i < num_spatial_axes_; ++i) { - top_shape[channel_axis_] *= kernel_shape_data[i]; - const int input_dim = bottom[0]->shape(channel_axis_ + i + 1); - const int output_dim = (input_dim + 2 * pad_data[i] - kernel_shape_data[i]) - / stride_data[i] + 1; - top_shape[channel_axis_ + i + 1] = output_dim; - } - top[0]->Reshape(top_shape); - num_ = bottom[0]->count(0, channel_axis_); - bottom_dim_ = bottom[0]->count(channel_axis_); - top_dim_ = top[0]->count(channel_axis_); - - channels_ = bottom[0]->shape(channel_axis_); + CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, " + << "corresponding to (num, channels, height, width)"; + channels_ = bottom[0]->channels(); + height_ = bottom[0]->height(); + width_ = bottom[0]->width(); + top[0]->Reshape( + bottom[0]->num(), channels_ * kernel_h_ * kernel_w_, + (height_ + 2 * pad_h_ - kernel_h_) / stride_h_ + 1, + (width_ + 2 * pad_w_ - kernel_w_) / stride_w_ + 1); } template @@ -118,27 +66,10 @@ void Im2colLayer::Forward_cpu(const vector*>& bottom, const vector*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); - for (int n = 0; n < num_; ++n) { - DCHECK_EQ(bottom[0]->shape().size() - channel_axis_, num_spatial_axes_ + 1); - DCHECK_EQ(top[0]->shape().size() - channel_axis_, num_spatial_axes_ + 1); - DCHECK_EQ(kernel_shape_.count(), num_spatial_axes_); - DCHECK_EQ(pad_.count(), num_spatial_axes_); - DCHECK_EQ(stride_.count(), num_spatial_axes_); - if (!force_nd_im2col_ && num_spatial_axes_ == 2) { - im2col_cpu(bottom_data + n * bottom_dim_, channels_, - bottom[0]->shape(channel_axis_ + 1), - bottom[0]->shape(channel_axis_ + 2), - kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], - pad_.cpu_data()[0], pad_.cpu_data()[1], - stride_.cpu_data()[0], stride_.cpu_data()[1], - top_data + n * top_dim_); - } else { - im2col_nd_cpu(bottom_data + n * bottom_dim_, num_spatial_axes_, - bottom[0]->shape().data() + channel_axis_, - top[0]->shape().data() + channel_axis_, - kernel_shape_.cpu_data(), pad_.cpu_data(), stride_.cpu_data(), - top_data + n * top_dim_); - } + for (int n = 0; n < bottom[0]->num(); ++n) { + im2col_cpu(bottom_data + bottom[0]->offset(n), channels_, height_, + width_, kernel_h_, kernel_w_, pad_h_, pad_w_, + stride_h_, stride_w_, top_data + top[0]->offset(n)); } } @@ -147,22 +78,10 @@ void Im2colLayer::Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { const Dtype* top_diff = top[0]->cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); - for (int n = 0; n < num_; ++n) { - if (!force_nd_im2col_ && num_spatial_axes_ == 2) { - col2im_cpu(top_diff + n * top_dim_, channels_, - bottom[0]->shape(channel_axis_ + 1), - bottom[0]->shape(channel_axis_ + 2), - kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], - pad_.cpu_data()[0], pad_.cpu_data()[1], - stride_.cpu_data()[0], stride_.cpu_data()[1], - bottom_diff + n * bottom_dim_); - } else { - col2im_nd_cpu(top_diff + n * top_dim_, num_spatial_axes_, - bottom[0]->shape().data() + channel_axis_, - top[0]->shape().data() + channel_axis_, - kernel_shape_.cpu_data(), pad_.cpu_data(), stride_.cpu_data(), - bottom_diff + n * bottom_dim_); - } + for (int n = 0; n < top[0]->num(); ++n) { + col2im_cpu(top_diff + top[0]->offset(n), channels_, height_, width_, + kernel_h_, kernel_w_, pad_h_, pad_w_, + stride_h_, stride_w_, bottom_diff + bottom[0]->offset(n)); } } diff --git a/src/caffe/layers/im2col_layer.cu b/src/caffe/layers/im2col_layer.cu index cd507623c78..9c338b14cb7 100644 --- a/src/caffe/layers/im2col_layer.cu +++ b/src/caffe/layers/im2col_layer.cu @@ -12,23 +12,10 @@ void Im2colLayer::Forward_gpu(const vector*>& bottom, const vector*>& top) { const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = top[0]->mutable_gpu_data(); - const int num_kernels = channels_ * top[0]->count(channel_axis_ + 1); - for (int n = 0; n < num_; ++n) { - if (!force_nd_im2col_ && num_spatial_axes_ == 2) { - im2col_gpu(bottom_data + n * bottom_dim_, channels_, - bottom[0]->shape(channel_axis_ + 1), - bottom[0]->shape(channel_axis_ + 2), - kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], - pad_.cpu_data()[0], pad_.cpu_data()[1], - stride_.cpu_data()[0], stride_.cpu_data()[1], - top_data + n * top_dim_); - } else { - im2col_nd_gpu(bottom_data + n * bottom_dim_, num_spatial_axes_, - num_kernels, bottom[0]->gpu_shape() + channel_axis_, - top[0]->gpu_shape() + channel_axis_, - kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(), - top_data + n * top_dim_); - } + for (int n = 0; n < bottom[0]->num(); ++n) { + im2col_gpu(bottom_data + bottom[0]->offset(n), channels_, height_, + width_, kernel_h_, kernel_w_, pad_h_, pad_w_, + stride_h_, stride_w_, top_data + top[0]->offset(n)); } } @@ -37,22 +24,10 @@ void Im2colLayer::Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { const Dtype* top_diff = top[0]->gpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); - for (int n = 0; n < num_; ++n) { - if (!force_nd_im2col_ && num_spatial_axes_ == 2) { - col2im_gpu(top_diff + n * top_dim_, channels_, - bottom[0]->shape(channel_axis_ + 1), - bottom[0]->shape(channel_axis_ + 2), - kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], - pad_.cpu_data()[0], pad_.cpu_data()[1], - stride_.cpu_data()[0], stride_.cpu_data()[1], - bottom_diff + n * bottom_dim_); - } else { - col2im_nd_gpu(top_diff + n * top_dim_, num_spatial_axes_, bottom_dim_, - bottom[0]->gpu_shape() + channel_axis_, - top[0]->gpu_shape() + channel_axis_, - kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(), - bottom_diff + n * bottom_dim_); - } + for (int n = 0; n < top[0]->num(); ++n) { + col2im_gpu(top_diff + top[0]->offset(n), channels_, height_, width_, + kernel_h_, kernel_w_, pad_h_, pad_w_, + stride_h_, stride_w_, bottom_diff + bottom[0]->offset(n)); } } diff --git a/src/caffe/layers/image_data_layer.cpp b/src/caffe/layers/image_data_layer.cpp index 3d2190f8bbb..223ba3a75ca 100644 --- a/src/caffe/layers/image_data_layer.cpp +++ b/src/caffe/layers/image_data_layer.cpp @@ -1,4 +1,3 @@ -#ifdef USE_OPENCV #include #include // NOLINT(readability/streams) @@ -165,4 +164,3 @@ INSTANTIATE_CLASS(ImageDataLayer); REGISTER_LAYER_CLASS(ImageData); } // namespace caffe -#endif // USE_OPENCV diff --git a/src/caffe/layers/memory_data_layer.cpp b/src/caffe/layers/memory_data_layer.cpp index 2370aa04d3b..42de4198bc4 100644 --- a/src/caffe/layers/memory_data_layer.cpp +++ b/src/caffe/layers/memory_data_layer.cpp @@ -1,6 +1,4 @@ -#ifdef USE_OPENCV #include -#endif // USE_OPENCV #include @@ -55,7 +53,6 @@ void MemoryDataLayer::AddDatumVector(const vector& datum_vector) { has_new_data_ = true; } -#ifdef USE_OPENCV template void MemoryDataLayer::AddMatVector(const vector& mat_vector, const vector& labels) { @@ -79,7 +76,6 @@ void MemoryDataLayer::AddMatVector(const vector& mat_vector, Reset(top_data, top_label, num); has_new_data_ = true; } -#endif // USE_OPENCV template void MemoryDataLayer::Reset(Dtype* data, Dtype* labels, int n) { diff --git a/src/caffe/layers/slice_layer.cpp b/src/caffe/layers/slice_layer.cpp index 0a059ae88fe..e4418c9cf9c 100644 --- a/src/caffe/layers/slice_layer.cpp +++ b/src/caffe/layers/slice_layer.cpp @@ -67,16 +67,11 @@ void SliceLayer::Reshape(const vector*>& bottom, } } CHECK_EQ(count, bottom[0]->count()); - if (top.size() == 1) { - top[0]->ShareData(*bottom[0]); - top[0]->ShareDiff(*bottom[0]); - } } template void SliceLayer::Forward_cpu(const vector*>& bottom, const vector*>& top) { - if (top.size() == 1) { return; } int offset_slice_axis = 0; const Dtype* bottom_data = bottom[0]->cpu_data(); const int bottom_slice_axis = bottom[0]->shape(slice_axis_); @@ -97,7 +92,7 @@ void SliceLayer::Forward_cpu(const vector*>& bottom, template void SliceLayer::Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { - if (!propagate_down[0] || top.size() == 1) { return; } + if (!propagate_down[0]) { return; } int offset_slice_axis = 0; Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const int bottom_slice_axis = bottom[0]->shape(slice_axis_); diff --git a/src/caffe/layers/slice_layer.cu b/src/caffe/layers/slice_layer.cu index e8dc6cd98fc..796841d3f52 100644 --- a/src/caffe/layers/slice_layer.cu +++ b/src/caffe/layers/slice_layer.cu @@ -28,7 +28,6 @@ __global__ void Slice(const int nthreads, const Dtype* in_data, template void SliceLayer::Forward_gpu(const vector*>& bottom, const vector*>& top) { - if (top.size() == 1) { return; } int offset_slice_axis = 0; const Dtype* bottom_data = bottom[0]->gpu_data(); const int bottom_slice_axis = bottom[0]->shape(slice_axis_); @@ -49,7 +48,7 @@ void SliceLayer::Forward_gpu(const vector*>& bottom, template void SliceLayer::Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { - if (!propagate_down[0] || top.size() == 1) { return; } + if (!propagate_down[0]) { return; } int offset_slice_axis = 0; Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); const int bottom_slice_axis = bottom[0]->shape(slice_axis_); diff --git a/src/caffe/layers/triplet_loss_layer.cpp b/src/caffe/layers/triplet_loss_layer.cpp new file mode 100644 index 00000000000..7d11ff59ce4 --- /dev/null +++ b/src/caffe/layers/triplet_loss_layer.cpp @@ -0,0 +1,397 @@ +#include +#include + +#include "caffe/layer.hpp" +#include "caffe/loss_layers.hpp" +#include "caffe/util/io.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +void TripletLossLayer::LayerSetUp( + const vector*>& bottom, const vector*>& top) { + LossLayer::LayerSetUp(bottom, top); + // number of triplet in a batch + int num_triplets = this->layer_param_.triplet_loss_param().num_triplets(); + // dimension of each descriptor + int dim = bottom[0]->count()/bottom[0]->num(); + CHECK_EQ(bottom[0]->channels(), dim); + CHECK_EQ(bottom[0]->height(), 1); + CHECK_EQ(bottom[0]->width(), 1); + CHECK_EQ(bottom[1]->channels(), 1); + CHECK_EQ(bottom[1]->height(), 1); + CHECK_EQ(bottom[1]->width(), 1); + // In each set, we have: + // the descriptor of reference sample, closest sample, and negative samples + // number of sets in the whole batch + int num_set = bottom[0]->num()/(2 + num_triplets); + dist_sq_.Reshape(num_set, 1, 1, 1); + diff_pos.Reshape(num_set, dim, 1, 1); + dist_sq_pos.Reshape(num_set, 1, 1, 1); + diff_neg.Reshape(num_set, dim, 1, 1); + dist_sq_neg.Reshape(num_set, 1, 1, 1); + // vector of ones used to sum along channels + summer_vec_.Reshape(bottom[0]->channels(), 1, 1, 1); + for (int i = 0; i < bottom[0]->channels(); ++i) + summer_vec_.mutable_cpu_data()[i] = Dtype(1); +} + +template +void TripletLossLayer::Forward_cpu( + const vector*>& bottom, + const vector*>& top) { + Dtype margin = this->layer_param_.triplet_loss_param().margin(); + Dtype losstype = this->layer_param_.triplet_loss_param().losstype(); + int num_triplets = this->layer_param_.triplet_loss_param().num_triplets(); + CHECK_EQ(bottom[0]->num()%(2 + num_triplets), 0); + Dtype loss(0.0); + int dim = bottom[0]->count()/bottom[0]->num(); + int num_set = bottom[0]->num()/(2 + num_triplets); + if (losstype == 0) { + for (int i = 0; i < num_set; ++i) { + caffe_sub( + dim, + bottom[0]->cpu_data() + (2 + num_triplets)*i*dim, // reference + bottom[0]->cpu_data() + ((2 + num_triplets)*i + 1)*dim, // positive + diff_pos.mutable_cpu_data() + i*dim); // reference-pose_close + // Loss component calculated from reference and close one + dist_sq_pos.mutable_cpu_data()[i] = caffe_cpu_dot(dim, + diff_pos.cpu_data() + i*dim, diff_pos.cpu_data() + i*dim); + // a b is a similar pair for pair wise + // loss accumulated by the pair wise part + loss += dist_sq_pos.cpu_data()[i]; + for (int triplet = 0; triplet < num_triplets; ++triplet) { + // Triplet loss accumulation + // a and negative[triplet] is a similar pair for triplet + dist_sq_.mutable_cpu_data()[i] = dist_sq_pos.cpu_data()[i]; + // Loss component calculated from negative part + caffe_sub( + dim, + bottom[0]->cpu_data() + (2 + num_triplets)*i*dim, // reference + bottom[0]->cpu_data() + ((2 + num_triplets)*i + 2 + triplet)*dim, + diff_neg.mutable_cpu_data() + i*dim); // reference-negative + dist_sq_neg.mutable_cpu_data()[i] = caffe_cpu_dot(dim, + diff_neg.cpu_data() + i*dim, diff_neg.cpu_data() + i*dim); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[i] -= dist_sq_neg.cpu_data()[i]; + // loss accumulated accumulated by the triplet part + loss += std::max(margin + dist_sq_.cpu_data()[i], Dtype(0.0)); + } + } + loss = loss / static_cast(num_set) / Dtype(2); + top[0]->mutable_cpu_data()[0] = loss; + } else { + for (int i = 0; i < num_set; ++i) { + caffe_sub( + dim, + bottom[0]->cpu_data() + (2 + num_triplets)*i*dim, // reference + bottom[0]->cpu_data() + ((2 + num_triplets)*i + 1)*dim, // positive + diff_pos.mutable_cpu_data() + i*dim); // reference-pose_close + // Loss component calculated from reference and close one + dist_sq_pos.mutable_cpu_data()[i] = caffe_cpu_dot(dim, + diff_pos.cpu_data() + i*dim, diff_pos.cpu_data() + i*dim); + // a b is a similar pair for pair wise + // loss accumulated by the pair wise part + loss += dist_sq_pos.cpu_data()[i]; + for (int triplet = 0; triplet < num_triplets; ++triplet) { + dist_sq_.mutable_cpu_data()[i] = dist_sq_pos.mutable_cpu_data()[i]; + dist_sq_.mutable_cpu_data()[i] += margin; + // Loss component calculated from negative part + caffe_sub( + dim, + bottom[0]->cpu_data() + (2 + num_triplets)*i*dim, // reference + bottom[0]->cpu_data() + ((2 + num_triplets)*i + 2 + triplet)*dim, + diff_neg.mutable_cpu_data() + i*dim); // reference-negative + dist_sq_neg.mutable_cpu_data()[i] = caffe_cpu_dot(dim, + diff_neg.cpu_data() + i*dim, diff_neg.cpu_data() + i*dim); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[i] = 1 - \ + dist_sq_neg.cpu_data()[i] / dist_sq_.mutable_cpu_data()[i]; + // loss accumulated accumulated by the triplet part + loss += std::max(dist_sq_.cpu_data()[i], Dtype(0.0)); + } + } + loss = loss / static_cast(num_set) / Dtype(2); + top[0]->mutable_cpu_data()[0] = loss; + } +} + +template +void TripletLossLayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + Dtype margin = this->layer_param_.triplet_loss_param().margin(); + Dtype losstype = this->layer_param_.triplet_loss_param().losstype(); + int num_triplets = this->layer_param_.triplet_loss_param().num_triplets(); + int dim = bottom[0]->count()/bottom[0]->num(); + int num_set = bottom[0]->num()/(2 + num_triplets); + if (losstype == 0) { + // BP for feat1(extracted from reference) + for (int i = 0; i < 1; ++i) { + if (propagate_down[0]) { + const Dtype sign = 1; + const Dtype alpha = sign * top[0]->cpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_cpu_diff(); + // the pair part + caffe_cpu_axpby( + dim, + alpha, + diff_pos.cpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_triplets)*j + i)*dim); + // the num_triplets triplet part + for (int triplet = 0; triplet < num_triplets; ++triplet) { + caffe_sub( + dim, + bottom[0]->cpu_data() + (2 + num_triplets)*j*dim, // reference + bottom[0]->cpu_data() + ((2 + num_triplets)*j + 2 + triplet)*dim, + diff_neg.mutable_cpu_data() + j*dim); // reference-negative + // Triplet loss accumulation + // a and negative[triplet] is a similar pair for triplet + dist_sq_.mutable_cpu_data()[j] = dist_sq_pos.cpu_data()[j]; + dist_sq_neg.mutable_cpu_data()[j] = caffe_cpu_dot(dim, + diff_neg.cpu_data() + j*dim, diff_neg.cpu_data() + j*dim); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[j] -= dist_sq_neg.cpu_data()[j]; + // Loss component calculated from negative part + if ((margin + dist_sq_.cpu_data()[j]) > Dtype(0.0)) { + // similar pair in triplet + caffe_cpu_axpby( + dim, + alpha, + diff_pos.cpu_data() + (j*dim), + Dtype(1.0), + bout + (2 + num_triplets)*j*dim); + // dissimilar pair in triplet + caffe_cpu_axpby( + dim, + -alpha, + diff_neg.cpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_triplets)*j + i)*dim); + } + } + } + } + } + // BP for feat2(extracted from the closest sample) + for (int i = 1; i < 2; ++i) { + if (propagate_down[0]) { + const Dtype sign = -1; + const Dtype alpha = sign * top[0]->cpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_cpu_diff(); + // the pair part + caffe_cpu_axpby( + dim, + alpha, + diff_pos.cpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_triplets)*j + i)*dim); + // the num_triplets triplet part + for (int triplet = 0; triplet < num_triplets; ++triplet) { + caffe_sub( + dim, + bottom[0]->cpu_data() + (2 + num_triplets)*j*dim, // reference + bottom[0]->cpu_data() + ((2 + num_triplets)*j + 2 + triplet)*dim, + diff_neg.mutable_cpu_data() + j*dim); // reference-negative + // Triplet loss accumulation + // a and negative[triplet] is a similar pair for triplet + dist_sq_.mutable_cpu_data()[j] = dist_sq_pos.cpu_data()[j]; + dist_sq_neg.mutable_cpu_data()[j] = caffe_cpu_dot(dim, + diff_neg.cpu_data() + j*dim, diff_neg.cpu_data() + j*dim); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[j] -= dist_sq_neg.cpu_data()[j]; + if ((margin + dist_sq_.cpu_data()[j]) > Dtype(0.0)) { + // similar pair in triplet + caffe_cpu_axpby( + dim, + alpha, + diff_pos.cpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_triplets)*j + i)*dim); + } + } + } + } + } + // BP for negative feature used in the num_triplets triplet part + for (int i = 2; i < 2 + num_triplets; ++i) { + if (propagate_down[0]) { + const Dtype sign = 1; + const Dtype alpha = sign * top[0]->cpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_cpu_diff(); + caffe_sub( + dim, + bottom[0]->cpu_data() + (2 + num_triplets)*j*dim, // reference + bottom[0]->cpu_data() + ((2 + num_triplets)*j + i)*dim, + diff_neg.mutable_cpu_data() + j*dim); // reference-negative + // Triplet loss accumulation + // a and negative[triplet] is a similar pair for triplet + dist_sq_.mutable_cpu_data()[j] = dist_sq_pos.cpu_data()[j]; + dist_sq_neg.mutable_cpu_data()[j] = caffe_cpu_dot(dim, + diff_neg.cpu_data() + j*dim, diff_neg.cpu_data() + j*dim); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[j] -= dist_sq_neg.cpu_data()[j]; + if ((margin + dist_sq_.cpu_data()[j]) > Dtype(0.0)) { + // dissimilar pairs + caffe_cpu_axpby( + dim, + alpha, + diff_neg.cpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_triplets)*j + i)*dim); + } else { + caffe_set(dim, Dtype(0), bout + ((2 + num_triplets)*j + i)*dim); + } + } + } + } + } else { + for (int i = 0; i < 1; ++i) { + // BP for data1(feat1) + if (propagate_down[0]) { + const Dtype sign = 1; + const Dtype alpha = sign * top[0]->cpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_cpu_diff(); + // the pair part + caffe_cpu_axpby( + dim, + alpha, + diff_pos.cpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_triplets)*j + i)*dim); + // the num_triplets triplet part + for (int triplet = 0; triplet < num_triplets; ++triplet) { + dist_sq_.mutable_cpu_data()[j] = dist_sq_pos.mutable_cpu_data()[j]; + dist_sq_.mutable_cpu_data()[j] += margin; + // Loss component calculated from negative part + caffe_sub( + dim, + bottom[0]->cpu_data() + (2 + num_triplets)*j*dim, // reference + bottom[0]->cpu_data() + ((2 + num_triplets)*j + 2 + triplet)*dim, + diff_neg.mutable_cpu_data() + j*dim); // reference-negative + dist_sq_neg.mutable_cpu_data()[j] = caffe_cpu_dot(dim, + diff_neg.cpu_data() + j*dim, diff_neg.cpu_data() + j*dim); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[j] = 1 - \ + dist_sq_neg.cpu_data()[j] / dist_sq_.mutable_cpu_data()[j]; + // loss accumulated accumulated by the triplet part + if ((dist_sq_.cpu_data()[j]) > Dtype(0.0)) { + caffe_cpu_axpby( + dim, + alpha*dist_sq_neg.mutable_cpu_data()[j]\ + /((dist_sq_pos.mutable_cpu_data()[j]+margin)\ + *(dist_sq_pos.mutable_cpu_data()[j]+margin)), + diff_pos.cpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_triplets)*j + i)*dim); + caffe_cpu_axpby( + dim, + -alpha/(dist_sq_pos.mutable_cpu_data()[j] + margin), + diff_neg.cpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_triplets)*j + i)*dim); + } + } + } + } + } + for (int i = 1; i < 2; ++i) { + // BP for positive data(feat2) + if (propagate_down[0]) { + const Dtype sign = -1; + const Dtype alpha = sign * top[0]->cpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_cpu_diff(); + // the pair part + caffe_cpu_axpby( + dim, + alpha, + diff_pos.cpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_triplets)*j + i)*dim); + // the num_triplets triplet part + for (int triplet = 0; triplet < num_triplets; ++triplet) { + dist_sq_.mutable_cpu_data()[j] = dist_sq_pos.mutable_cpu_data()[j]; + dist_sq_.mutable_cpu_data()[j] += margin; + // Loss component calculated from negative part + caffe_sub( + dim, + bottom[0]->cpu_data() + (2 + num_triplets)*j*dim, // reference + bottom[0]->cpu_data() + ((2 + num_triplets)*j + 2 + triplet)*dim, + diff_neg.mutable_cpu_data() + j*dim); // reference-negative + dist_sq_neg.mutable_cpu_data()[j] = caffe_cpu_dot(dim, + diff_neg.cpu_data() + j*dim, diff_neg.cpu_data() + j*dim); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[j] = 1 - \ + dist_sq_neg.cpu_data()[j] / dist_sq_.mutable_cpu_data()[j]; + // loss accumulated accumulated by the triplet part + if ((dist_sq_.cpu_data()[j]) > Dtype(0.0)) { + caffe_cpu_axpby( + dim, + alpha*dist_sq_neg.mutable_cpu_data()[j]\ + /((dist_sq_pos.mutable_cpu_data()[j]+margin)\ + *(dist_sq_pos.mutable_cpu_data()[j]+margin)), + diff_pos.cpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_triplets)*j + i)*dim); + } + } + } + } + } + for (int i = 2; i < 2 + num_triplets; ++i) { + // BP for negative data(feat3) + if (propagate_down[0]) { + const Dtype sign = 1; + const Dtype alpha = sign * top[0]->cpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_cpu_diff(); + dist_sq_.mutable_cpu_data()[j] = dist_sq_pos.mutable_cpu_data()[j]; + dist_sq_.mutable_cpu_data()[j] += margin; + // Loss component calculated from negative part + caffe_sub( + dim, + bottom[0]->cpu_data() + (2 + num_triplets)*j*dim, // reference + bottom[0]->cpu_data() + ((2 + num_triplets)*j + i)*dim, + diff_neg.mutable_cpu_data() + j*dim); // reference-negative + dist_sq_neg.mutable_cpu_data()[j] = caffe_cpu_dot(dim, + diff_neg.cpu_data() + j*dim, diff_neg.cpu_data() + j*dim); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[j] = 1 - \ + dist_sq_neg.cpu_data()[j] / dist_sq_.mutable_cpu_data()[j]; + // loss accumulated accumulated by the triplet part + if ((dist_sq_.cpu_data()[j]) > Dtype(0.0)) { + caffe_cpu_axpby( + dim, + alpha/(dist_sq_pos.mutable_cpu_data()[j] + margin), + diff_neg.cpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_triplets)*j + i)*dim); + } else { + caffe_set(dim, Dtype(0), bout + ((2 + num_triplets)*j + i)*dim); + } + } + } + } + } +} + +#ifdef CPU_ONLY +STUB_GPU(TripletLossLayer); +#endif + +INSTANTIATE_CLASS(TripletLossLayer); +REGISTER_LAYER_CLASS(TripletLoss); + +} // namespace caffe diff --git a/src/caffe/layers/triplet_loss_layer.cu b/src/caffe/layers/triplet_loss_layer.cu new file mode 100644 index 00000000000..cd1fbb1201b --- /dev/null +++ b/src/caffe/layers/triplet_loss_layer.cu @@ -0,0 +1,392 @@ +#include +#include + +#include "caffe/layer.hpp" +#include "caffe/util/io.hpp" +#include "caffe/util/math_functions.hpp" +#include "caffe/vision_layers.hpp" + +namespace caffe { + +template +void TripletLossLayer::Forward_gpu( + const vector*>& bottom, + const vector*>& top) { + Dtype margin = this->layer_param_.triplet_loss_param().margin(); + Dtype losstype = this->layer_param_.triplet_loss_param().losstype(); + int num_triplets = this->layer_param_.triplet_loss_param().num_triplets(); + CHECK_EQ(bottom[0]->num()%(2 + num_triplets), 0); + Dtype loss(0.0); + int dim = bottom[0]->count()/bottom[0]->num(); + int num_set = bottom[0]->num()/(2 + num_triplets); + if (losstype == 0) { + for (int i = 0; i < num_set; ++i) { + caffe_gpu_sub( + dim, + bottom[0]->gpu_data() + (2 + num_triplets)*i*dim, // reference + bottom[0]->gpu_data() + ((2 + num_triplets)*i + 1)*dim, // positive + diff_pos.mutable_gpu_data() + i*dim); // reference-pose_close + caffe_gpu_dot( + dim, + diff_pos.gpu_data() + i*dim, + diff_pos.gpu_data() + i*dim, + dist_sq_pos.mutable_cpu_data() + i); + // a b is a similar pair for pair wise + // loss accumulated by the pair wise part + loss += dist_sq_pos.gpu_data()[i]; + for (int triplet = 0; triplet < num_triplets; ++triplet) { + // Triplet loss accumulation + // a and negative[triplet] is a similar pair for triplet + dist_sq_.mutable_gpu_data()[i] = dist_sq_pos.gpu_data()[i]; + // Loss component calculated from negative part + caffe_gpu_sub( + dim, + bottom[0]->gpu_data() + (2 + num_triplets)*i*dim, // reference + bottom[0]->gpu_data() + ((2 + num_triplets)*i + 2 + triplet)*dim, + diff_neg.mutable_gpu_data() + i*dim); // reference-negative + caffe_gpu_dot( + dim, + diff_neg.gpu_data() + i*dim, + diff_neg.gpu_data() + i*dim, + dist_sq_neg.mutable_cpu_data() + i); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_gpu_data()[i] -= dist_sq_neg.gpu_data()[i]; + // loss accumulated accumulated by the triplet part + loss += std::max(margin + dist_sq_.gpu_data()[i], Dtype(0.0)); + } + } + loss = loss / static_cast(num_set) / Dtype(2); + top[0]->mutable_gpu_data()[0] = loss; + } else { + for (int i = 0; i < num_set; ++i) { + caffe_gpu_sub( + dim, + bottom[0]->gpu_data() + (2 + num_triplets)*i*dim, // reference + bottom[0]->gpu_data() + ((2 + num_triplets)*i + 1)*dim, // positive + diff_pos.mutable_gpu_data() + i*dim); // reference-pose_close + // Loss component calculated from reference and close one + caffe_gpu_dot( + dim, + diff_pos.gpu_data() + i*dim, + diff_pos.gpu_data() + i*dim, + dist_sq_pos.mutable_cpu_data() + i); + // a b is a similar pair for pair wise + // loss accumulated by the pair wise part + loss += dist_sq_pos.gpu_data()[i]; + for (int triplet = 0; triplet < num_triplets; ++triplet) { + dist_sq_.mutable_gpu_data()[i] = dist_sq_pos.mutable_gpu_data()[i]; + dist_sq_.mutable_gpu_data()[i] += margin; + // Loss component calculated from negative part + caffe_gpu_sub( + dim, + bottom[0]->gpu_data() + (2 + num_triplets)*i*dim, // reference + bottom[0]->gpu_data() + ((2 + num_triplets)*i + 2 + triplet)*dim, + diff_neg.mutable_gpu_data() + i*dim); // reference-negative + caffe_gpu_dot( + dim, + diff_neg.gpu_data() + i*dim, + diff_neg.gpu_data() + i*dim, + dist_sq_neg.mutable_cpu_data() + i); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_gpu_data()[i] = 1 - \ + dist_sq_neg.gpu_data()[i] / dist_sq_.mutable_gpu_data()[i]; + // loss accumulated accumulated by the triplet part + loss += std::max(dist_sq_.gpu_data()[i], Dtype(0.0)); + } + } + loss = loss / static_cast(num_set) / Dtype(2); + top[0]->mutable_gpu_data()[0] = loss; + } +} + +template +void TripletLossLayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + Dtype margin = this->layer_param_.triplet_loss_param().margin(); + Dtype losstype = this->layer_param_.triplet_loss_param().losstype(); + int num_triplets = this->layer_param_.triplet_loss_param().num_triplets(); + int dim = bottom[0]->count()/bottom[0]->num(); + int num_set = bottom[0]->num()/(2 + num_triplets); + if (losstype == 0) { + // BP for feat1(extracted from reference) + for (int i = 0; i < 1; ++i) { + if (propagate_down[0]) { + const Dtype sign = 1; + const Dtype alpha = sign * top[0]->gpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_gpu_diff(); + // the pair part + caffe_gpu_axpby( + dim, + alpha, + diff_pos.gpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_triplets)*j + i)*dim); + // the num_triplets triplet part + for (int triplet = 0; triplet < num_triplets; ++triplet) { + caffe_gpu_sub( + dim, + bottom[0]->gpu_data() + (2 + num_triplets)*j*dim, // reference + bottom[0]->gpu_data() + ((2 + num_triplets)*j + 2 + triplet)*dim, + diff_neg.mutable_gpu_data() + j*dim); // reference-negative + caffe_gpu_dot( + dim, + diff_neg.gpu_data() + j*dim, + diff_neg.gpu_data() + j*dim, + dist_sq_neg.mutable_cpu_data() + j); + // Triplet loss accumulation + // a and negative[triplet] is a similar pair for triplet + dist_sq_.mutable_gpu_data()[j] = dist_sq_pos.gpu_data()[j]; + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_gpu_data()[j] -= dist_sq_neg.gpu_data()[j]; + // Loss component calculated from negative part + if ((margin + dist_sq_.gpu_data()[j]) > Dtype(0.0)) { + // similar pair in triplet + caffe_gpu_axpby( + dim, + alpha, + diff_pos.gpu_data() + (j*dim), + Dtype(1.0), + bout + (2 + num_triplets)*j*dim); + // dissimilar pair in triplet + caffe_gpu_axpby( + dim, + -alpha, + diff_neg.gpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_triplets)*j + i)*dim); + } + } + } + } + } + // BP for feat2(extracted from the closest sample) + for (int i = 1; i < 2; ++i) { + if (propagate_down[0]) { + const Dtype sign = -1; + const Dtype alpha = sign * top[0]->gpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_gpu_diff(); + // the pair part + caffe_gpu_axpby( + dim, + alpha, + diff_pos.gpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_triplets)*j + i)*dim); + // the num_triplets triplet part + for (int triplet = 0; triplet < num_triplets; ++triplet) { + caffe_gpu_sub( + dim, + bottom[0]->gpu_data() + (2 + num_triplets)*j*dim, // reference + bottom[0]->gpu_data() + ((2 + num_triplets)*j + 2 + triplet)*dim, + diff_neg.mutable_gpu_data() + j*dim); // reference-negative + // Triplet loss accumulation + // a and negative[triplet] is a similar pair for triplet + dist_sq_.mutable_gpu_data()[j] = dist_sq_pos.gpu_data()[j]; + caffe_gpu_dot( + dim, + diff_neg.gpu_data() + j*dim, + diff_neg.gpu_data() + j*dim, + dist_sq_neg.mutable_cpu_data() + j); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_gpu_data()[j] -= dist_sq_neg.gpu_data()[j]; + if ((margin + dist_sq_.gpu_data()[j]) > Dtype(0.0)) { + // similar pair in triplet + caffe_gpu_axpby( + dim, + alpha, + diff_pos.gpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_triplets)*j + i)*dim); + } + } + } + } + } + // BP for negative feature used in the num_triplets triplet part + for (int i = 2; i < 2 + num_triplets; ++i) { + if (propagate_down[0]) { + const Dtype sign = 1; + const Dtype alpha = sign * top[0]->gpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_gpu_diff(); + caffe_gpu_sub( + dim, + bottom[0]->gpu_data() + (2 + num_triplets)*j*dim, // reference + bottom[0]->gpu_data() + ((2 + num_triplets)*j + i)*dim, + diff_neg.mutable_gpu_data() + j*dim); // reference-negative + // Triplet loss accumulation + // a and negative[triplet] is a similar pair for triplet + dist_sq_.mutable_gpu_data()[j] = dist_sq_pos.gpu_data()[j]; + caffe_gpu_dot( + dim, + diff_neg.gpu_data() + j*dim, + diff_neg.gpu_data() + j*dim, + dist_sq_neg.mutable_cpu_data() + j); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_gpu_data()[j] -= dist_sq_neg.gpu_data()[j]; + if ((margin + dist_sq_.gpu_data()[j]) > Dtype(0.0)) { + // dissimilar pairs + caffe_gpu_axpby( + dim, + alpha, + diff_neg.gpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_triplets)*j + i)*dim); + } else { + caffe_gpu_set(dim, Dtype(0), bout + ((2 + num_triplets)*j + i)*dim); + } + } + } + } + } else { + for (int i = 0; i < 1; ++i) { + // BP for data1(feat1) + if (propagate_down[0]) { + const Dtype sign = 1; + const Dtype alpha = sign * top[0]->gpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_gpu_diff(); + // the pair part + caffe_gpu_axpby( + dim, + alpha, + diff_pos.gpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_triplets)*j + i)*dim); + // the num_triplets triplet part + for (int triplet = 0; triplet < num_triplets; ++triplet) { + dist_sq_.mutable_gpu_data()[j] = dist_sq_pos.mutable_gpu_data()[j]; + dist_sq_.mutable_gpu_data()[j] += margin; + // Loss component calculated from negative part + caffe_gpu_sub( + dim, + bottom[0]->gpu_data() + (2 + num_triplets)*j*dim, // reference + bottom[0]->gpu_data() + ((2 + num_triplets)*j + 2 + triplet)*dim, + diff_neg.mutable_gpu_data() + j*dim); // reference-negative + caffe_gpu_dot( + dim, + diff_neg.gpu_data() + j*dim, + diff_neg.gpu_data() + j*dim, + dist_sq_neg.mutable_cpu_data() + j); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_gpu_data()[j] = 1 - \ + dist_sq_neg.gpu_data()[j] / dist_sq_.mutable_gpu_data()[j]; + // loss accumulated accumulated by the triplet part + if ((dist_sq_.gpu_data()[j]) > Dtype(0.0)) { + caffe_gpu_axpby( + dim, + alpha*dist_sq_neg.mutable_gpu_data()[j]\ + /((dist_sq_pos.mutable_gpu_data()[j]+margin)\ + *(dist_sq_pos.mutable_gpu_data()[j]+margin)), + diff_pos.gpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_triplets)*j + i)*dim); + caffe_gpu_axpby( + dim, + -alpha/(dist_sq_pos.mutable_gpu_data()[j] + margin), + diff_neg.gpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_triplets)*j + i)*dim); + } + } + } + } + } + for (int i = 1; i < 2; ++i) { + // BP for positive data(feat2) + if (propagate_down[0]) { + const Dtype sign = -1; + const Dtype alpha = sign * top[0]->gpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_gpu_diff(); + // the pair part + caffe_gpu_axpby( + dim, + alpha, + diff_pos.gpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_triplets)*j + i)*dim); + // the num_triplets triplet part + for (int triplet = 0; triplet < num_triplets; ++triplet) { + dist_sq_.mutable_gpu_data()[j] = dist_sq_pos.mutable_gpu_data()[j]; + dist_sq_.mutable_gpu_data()[j] += margin; + // Loss component calculated from negative part + caffe_gpu_sub( + dim, + bottom[0]->gpu_data() + (2 + num_triplets)*j*dim, // reference + bottom[0]->gpu_data() + ((2 + num_triplets)*j + 2 + triplet)*dim, + diff_neg.mutable_gpu_data() + j*dim); // reference-negative + caffe_gpu_dot( + dim, + diff_neg.gpu_data() + j*dim, + diff_neg.gpu_data() + j*dim, + dist_sq_neg.mutable_cpu_data() + j); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_gpu_data()[j] = 1 - \ + dist_sq_neg.gpu_data()[j] / dist_sq_.mutable_gpu_data()[j]; + // loss accumulated accumulated by the triplet part + if ((dist_sq_.gpu_data()[j]) > Dtype(0.0)) { + caffe_gpu_axpby( + dim, + alpha*dist_sq_neg.mutable_gpu_data()[j]\ + /((dist_sq_pos.mutable_gpu_data()[j]+margin)\ + *(dist_sq_pos.mutable_gpu_data()[j]+margin)), + diff_pos.gpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_triplets)*j + i)*dim); + } + } + } + } + } + for (int i = 2; i < 2 + num_triplets; ++i) { + // BP for negative data(feat3) + if (propagate_down[0]) { + const Dtype sign = 1; + const Dtype alpha = sign * top[0]->gpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_gpu_diff(); + dist_sq_.mutable_gpu_data()[j] = dist_sq_pos.mutable_gpu_data()[j]; + dist_sq_.mutable_gpu_data()[j] += margin; + // Loss component calculated from negative part + caffe_gpu_sub( + dim, + bottom[0]->gpu_data() + (2 + num_triplets)*j*dim, // reference + bottom[0]->gpu_data() + ((2 + num_triplets)*j + i)*dim, + diff_neg.mutable_gpu_data() + j*dim); // reference-negative + caffe_gpu_dot( + dim, + diff_neg.gpu_data() + j*dim, + diff_neg.gpu_data() + j*dim, + dist_sq_neg.mutable_cpu_data() + j); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_gpu_data()[j] = 1 - \ + dist_sq_neg.gpu_data()[j] / dist_sq_.mutable_gpu_data()[j]; + // loss accumulated accumulated by the triplet part + if ((dist_sq_.gpu_data()[j]) > Dtype(0.0)) { + caffe_gpu_axpby( + dim, + alpha/(dist_sq_pos.mutable_gpu_data()[j] + margin), + diff_neg.gpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_triplets)*j + i)*dim); + } else { + caffe_gpu_set(dim, Dtype(0), bout + ((2 + num_triplets)*j + i)*dim); + } + } + } + } + } +} + +INSTANTIATE_LAYER_GPU_FUNCS(TripletLossLayer); + +} // namespace caffe diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp index f8db61c9258..f637f2ec6d4 100644 --- a/src/caffe/layers/window_data_layer.cpp +++ b/src/caffe/layers/window_data_layer.cpp @@ -1,4 +1,3 @@ -#ifdef USE_OPENCV #include #include @@ -469,4 +468,3 @@ INSTANTIATE_CLASS(WindowDataLayer); REGISTER_LAYER_CLASS(WindowData); } // namespace caffe -#endif // USE_OPENCV diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index ebb8b5d28c2..89d14013dc9 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -810,11 +810,12 @@ void Net::Backward() { BackwardFromTo(layers_.size() - 1, 0); if (debug_info_) { Dtype asum_data = 0, asum_diff = 0, sumsq_data = 0, sumsq_diff = 0; - for (int i = 0; i < learnable_params_.size(); ++i) { - asum_data += learnable_params_[i]->asum_data(); - asum_diff += learnable_params_[i]->asum_diff(); - sumsq_data += learnable_params_[i]->sumsq_data(); - sumsq_diff += learnable_params_[i]->sumsq_diff(); + for (int i = 0; i < params_.size(); ++i) { + if (param_owners_[i] >= 0) { continue; } + asum_data += params_[i]->asum_data(); + asum_diff += params_[i]->asum_diff(); + sumsq_data += params_[i]->sumsq_data(); + sumsq_diff += params_[i]->sumsq_diff(); } const Dtype l2norm_data = std::sqrt(sumsq_data); const Dtype l2norm_diff = std::sqrt(sumsq_diff); diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index f52c941b05e..331d31ce8f8 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -385,6 +385,7 @@ message LayerParameter { optional ThresholdParameter threshold_param = 128; optional TileParameter tile_param = 138; optional WindowDataParameter window_data_param = 129; + optional TripletLossParameter triplet_loss_param = 139; } // Message that stores parameters used to apply transformation @@ -468,27 +469,28 @@ message ContrastiveLossParameter { optional bool legacy_version = 2 [default = false]; } +message TripletLossParameter { + //margin for negative triplet + optional float margin = 1 [default = 1.0]; + optional uint32 losstype = 2 [default = 1]; + optional uint32 num_triplets = 3 [default = 3]; +} + message ConvolutionParameter { optional uint32 num_output = 1; // The number of outputs for the layer optional bool bias_term = 2 [default = true]; // whether to have bias terms - // Pad, kernel size, and stride are all given as a single value for equal - // dimensions in all spatial dimensions, or once per spatial dimension. - repeated uint32 pad = 3; // The padding size; defaults to 0 - repeated uint32 kernel_size = 4; // The kernel size - repeated uint32 stride = 6; // The stride; defaults to 1 - - // For 2D convolution only, the *_h and *_w versions may also be used to - // specify both spatial dimensions. - optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only) - optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only) - optional uint32 kernel_h = 11; // The kernel height (2D only) - optional uint32 kernel_w = 12; // The kernel width (2D only) - optional uint32 stride_h = 13; // The stride height (2D only) - optional uint32 stride_w = 14; // The stride width (2D only) - + // dimensions in height and width or as Y, X pairs. + optional uint32 pad = 3 [default = 0]; // The padding size (equal in Y, X) + optional uint32 pad_h = 9 [default = 0]; // The padding height + optional uint32 pad_w = 10 [default = 0]; // The padding width + optional uint32 kernel_size = 4; // The kernel size (square) + optional uint32 kernel_h = 11; // The kernel height + optional uint32 kernel_w = 12; // The kernel width optional uint32 group = 5 [default = 1]; // The group size for group conv - + optional uint32 stride = 6 [default = 1]; // The stride (equal in Y, X) + optional uint32 stride_h = 13; // The stride height + optional uint32 stride_w = 14; // The stride width optional FillerParameter weight_filler = 7; // The filler for the weight optional FillerParameter bias_filler = 8; // The filler for the bias enum Engine { @@ -497,24 +499,6 @@ message ConvolutionParameter { CUDNN = 2; } optional Engine engine = 15 [default = DEFAULT]; - - // The axis to interpret as "channels" when performing convolution. - // Preceding dimensions are treated as independent inputs; - // succeeding dimensions are treated as "spatial". - // With (N, C, H, W) inputs, and axis == 1 (the default), we perform - // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for - // groups g>1) filters across the spatial axes (H, W) of the input. - // With (N, C, D, H, W) inputs, and axis == 1, we perform - // N independent 3D convolutions, sliding (C/g)-channels - // filters across the spatial axes (D, H, W) of the input. - optional int32 axis = 16 [default = 1]; - - // Whether to force use of the general ND convolution, even if a specific - // implementation for blobs of the appropriate number of spatial dimensions - // is available. (Currently, there is only a 2D-specific convolution - // implementation; for input blobs with num_axes != 2, this option is - // ignored and the ND implementation will be used.) - optional bool force_nd_im2col = 17 [default = false]; } message DataParameter { @@ -1053,8 +1037,10 @@ message V1LayerParameter { SPLIT = 22; SLICE = 33; TANH = 23; + TRIPLET_LOSS = 40; WINDOW_DATA = 24; THRESHOLD = 31; + } optional LayerType type = 5; repeated BlobProto blobs = 6; @@ -1098,6 +1084,7 @@ message V1LayerParameter { optional TransformationParameter transform_param = 36; optional LossParameter loss_param = 42; optional V0LayerParameter layer = 1; + optional TripletLossParameter triplet_loss_param = 43; } // DEPRECATED: V0LayerParameter is the old way of specifying layer parameters diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 12c13dd8385..394ec3b3ad7 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -55,7 +55,6 @@ void Solver::Init(const SolverParameter& param) { << std::endl << param.DebugString(); param_ = param; CHECK_GE(param_.average_loss(), 1) << "average_loss should be non-negative."; - CheckSnapshotWritePermissions(); if (Caffe::root_solver() && param_.random_seed() >= 0) { Caffe::set_random_seed(param_.random_seed()); } @@ -422,37 +421,19 @@ void Solver::Snapshot() { CHECK(Caffe::root_solver()); string model_filename; switch (param_.snapshot_format()) { - case caffe::SolverParameter_SnapshotFormat_BINARYPROTO: - model_filename = SnapshotToBinaryProto(); - break; - case caffe::SolverParameter_SnapshotFormat_HDF5: - model_filename = SnapshotToHDF5(); - break; - default: - LOG(FATAL) << "Unsupported snapshot format."; + case caffe::SolverParameter_SnapshotFormat_BINARYPROTO: + model_filename = SnapshotToBinaryProto(); + break; + case caffe::SolverParameter_SnapshotFormat_HDF5: + model_filename = SnapshotToHDF5(); + break; + default: + LOG(FATAL) << "Unsupported snapshot format."; } SnapshotSolverState(model_filename); } -template -void Solver::CheckSnapshotWritePermissions() { - if (Caffe::root_solver() && param_.snapshot()) { - CHECK(param_.has_snapshot_prefix()) - << "In solver params, snapshot is specified but snapshot_prefix is not"; - string probe_filename = SnapshotFilename(".tempfile"); - std::ofstream probe_ofs(probe_filename.c_str()); - if (probe_ofs.good()) { - probe_ofs.close(); - std::remove(probe_filename.c_str()); - } else { - LOG(FATAL) << "Cannot write to snapshot prefix '" - << param_.snapshot_prefix() << "'. Make sure " - << "that the directory exists and is writeable."; - } - } -} - template string Solver::SnapshotFilename(const string extension) { string filename(param_.snapshot_prefix()); @@ -751,7 +732,7 @@ void SGDSolver::SnapshotSolverStateToBinaryProto( } string snapshot_filename = Solver::SnapshotFilename(".solverstate"); LOG(INFO) - << "Snapshotting solver state to binary proto file " << snapshot_filename; + << "Snapshotting solver state to binary proto file" << snapshot_filename; WriteProtoToBinaryFile(state, snapshot_filename.c_str()); } diff --git a/src/caffe/test/test_accuracy_layer.cpp b/src/caffe/test/test_accuracy_layer.cpp index ef0e57a37a1..94e529b5eee 100644 --- a/src/caffe/test/test_accuracy_layer.cpp +++ b/src/caffe/test/test_accuracy_layer.cpp @@ -250,6 +250,7 @@ TYPED_TEST(AccuracyLayerTest, TestForwardCPUTopK) { TYPED_TEST(AccuracyLayerTest, TestForwardCPUPerClass) { LayerParameter layer_param; + Caffe::set_mode(Caffe::CPU); AccuracyLayer layer(layer_param); layer.SetUp(this->blob_bottom_vec_, this->blob_top_per_class_vec_); layer.Forward(this->blob_bottom_vec_, this->blob_top_per_class_vec_); @@ -278,16 +279,16 @@ TYPED_TEST(AccuracyLayerTest, TestForwardCPUPerClass) { EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0), num_correct_labels / 100.0, 1e-4); for (int i = 0; i < num_class; ++i) { - TypeParam accuracy_per_class = (num_per_class[i] > 0 ? - static_cast(correct_per_class[i]) / num_per_class[i] : 0); EXPECT_NEAR(this->blob_top_per_class_->data_at(i, 0, 0, 0), - accuracy_per_class, 1e-4); + static_cast(correct_per_class[i]) / num_per_class[i], + 1e-4); } } TYPED_TEST(AccuracyLayerTest, TestForwardCPUPerClassWithIgnoreLabel) { LayerParameter layer_param; + Caffe::set_mode(Caffe::CPU); const TypeParam kIgnoreLabelValue = -1; layer_param.mutable_accuracy_param()->set_ignore_label(kIgnoreLabelValue); AccuracyLayer layer(layer_param); @@ -328,10 +329,9 @@ TYPED_TEST(AccuracyLayerTest, TestForwardCPUPerClassWithIgnoreLabel) { EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0), num_correct_labels / TypeParam(count), 1e-4); for (int i = 0; i < 10; ++i) { - TypeParam accuracy_per_class = (num_per_class[i] > 0 ? - static_cast(correct_per_class[i]) / num_per_class[i] : 0); EXPECT_NEAR(this->blob_top_per_class_->data_at(i, 0, 0, 0), - accuracy_per_class, 1e-4); + TypeParam(correct_per_class[i]) / num_per_class[i], + 1e-4); } } diff --git a/src/caffe/test/test_concat_layer.cpp b/src/caffe/test/test_concat_layer.cpp index ccd97eb1d66..088e0a41685 100644 --- a/src/caffe/test/test_concat_layer.cpp +++ b/src/caffe/test/test_concat_layer.cpp @@ -99,19 +99,6 @@ TYPED_TEST(ConcatLayerTest, TestSetupChannelsNegativeIndexing) { EXPECT_EQ(this->blob_top_->width(), this->blob_bottom_0_->width()); } -TYPED_TEST(ConcatLayerTest, TestForwardTrivial) { - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - ConcatLayer layer(layer_param); - this->blob_bottom_vec_0_.resize(1); - layer.SetUp(this->blob_bottom_vec_0_, this->blob_top_vec_); - layer.Forward(this->blob_bottom_vec_0_, this->blob_top_vec_); - for (int i = 0; i < this->blob_bottom_0_->count(); ++i) { - EXPECT_EQ(this->blob_bottom_0_->cpu_data()[i], - this->blob_top_->cpu_data()[i]); - } -} - TYPED_TEST(ConcatLayerTest, TestForwardNum) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; @@ -167,16 +154,6 @@ TYPED_TEST(ConcatLayerTest, TestForwardChannels) { } } -TYPED_TEST(ConcatLayerTest, TestGradientTrivial) { - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - ConcatLayer layer(layer_param); - GradientChecker checker(1e-2, 1e-2); - this->blob_bottom_vec_0_.resize(1); - checker.CheckGradientEltwise(&layer, this->blob_bottom_vec_0_, - this->blob_top_vec_); -} - TYPED_TEST(ConcatLayerTest, TestGradientNum) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; diff --git a/src/caffe/test/test_convolution_layer.cpp b/src/caffe/test/test_convolution_layer.cpp index 9df979a2d27..67d41fff844 100644 --- a/src/caffe/test/test_convolution_layer.cpp +++ b/src/caffe/test/test_convolution_layer.cpp @@ -19,87 +19,54 @@ template void caffe_conv(const Blob* in, ConvolutionParameter* conv_param, const vector > >& weights, Blob* out) { - const bool has_depth = (out->num_axes() == 5); - if (!has_depth) { CHECK_EQ(4, out->num_axes()); } // Kernel size, stride, and pad int kernel_h, kernel_w; - if (conv_param->has_kernel_h() || conv_param->has_kernel_w()) { + if (conv_param->has_kernel_size()) { + kernel_h = kernel_w = conv_param->kernel_size(); + } else { kernel_h = conv_param->kernel_h(); kernel_w = conv_param->kernel_w(); - } else { - kernel_h = kernel_w = conv_param->kernel_size(0); } int pad_h, pad_w; - if (conv_param->has_pad_h() || conv_param->has_pad_w()) { + if (!conv_param->has_pad_h()) { + pad_h = pad_w = conv_param->pad(); + } else { pad_h = conv_param->pad_h(); pad_w = conv_param->pad_w(); - } else { - pad_h = pad_w = conv_param->pad_size() ? conv_param->pad(0) : 0; } int stride_h, stride_w; - if (conv_param->has_stride_h() || conv_param->has_stride_w()) { + if (!conv_param->has_stride_h()) { + stride_h = stride_w = conv_param->stride(); + } else { stride_h = conv_param->stride_h(); stride_w = conv_param->stride_w(); - } else { - stride_h = stride_w = conv_param->stride_size() ? conv_param->stride(0) : 1; - } - int kernel_d, pad_d, stride_d; - if (has_depth) { - kernel_d = kernel_h; - stride_d = stride_h; - pad_d = pad_h; - } else { - kernel_d = stride_d = 1; - pad_d = 0; } // Groups int groups = conv_param->group(); - int o_g = out->shape(1) / groups; - int k_g = in->shape(1) / groups; + int o_g = out->channels() / groups; + int k_g = in->channels() / groups; int o_head, k_head; // Convolution - vector weight_offset(4 + has_depth); - vector in_offset(4 + has_depth); - vector out_offset(4 + has_depth); + const Dtype* in_data = in->cpu_data(); + const Dtype* weight_data = weights[0]->cpu_data(); Dtype* out_data = out->mutable_cpu_data(); - for (int n = 0; n < out->shape(0); n++) { + for (int n = 0; n < out->num(); n++) { for (int g = 0; g < groups; g++) { o_head = o_g * g; k_head = k_g * g; for (int o = 0; o < o_g; o++) { for (int k = 0; k < k_g; k++) { - for (int z = 0; z < (has_depth ? out->shape(2) : 1); z++) { - for (int y = 0; y < out->shape(2 + has_depth); y++) { - for (int x = 0; x < out->shape(3 + has_depth); x++) { - for (int r = 0; r < kernel_d; r++) { - for (int p = 0; p < kernel_h; p++) { - for (int q = 0; q < kernel_w; q++) { - int in_z = z * stride_d - pad_d + r; - int in_y = y * stride_h - pad_h + p; - int in_x = x * stride_w - pad_w + q; - if (in_z >= 0 && in_z < (has_depth ? in->shape(2) : 1) - && in_y >= 0 && in_y < in->shape(2 + has_depth) - && in_x >= 0 && in_x < in->shape(3 + has_depth)) { - weight_offset[0] = o + o_head; - weight_offset[1] = k; - if (has_depth) { weight_offset[2] = r; } - weight_offset[2 + has_depth] = p; - weight_offset[3 + has_depth] = q; - in_offset[0] = n; - in_offset[1] = k + k_head; - if (has_depth) { in_offset[2] = in_z; } - in_offset[2 + has_depth] = in_y; - in_offset[3 + has_depth] = in_x; - out_offset[0] = n; - out_offset[1] = o + o_head; - if (has_depth) { out_offset[2] = z; } - out_offset[2 + has_depth] = y; - out_offset[3 + has_depth] = x; - out_data[out->offset(out_offset)] += - in->data_at(in_offset) - * weights[0]->data_at(weight_offset); - } - } + for (int y = 0; y < out->height(); y++) { + for (int x = 0; x < out->width(); x++) { + for (int p = 0; p < kernel_h; p++) { + for (int q = 0; q < kernel_w; q++) { + int in_y = y * stride_h - pad_h + p; + int in_x = x * stride_w - pad_w + q; + if (in_y >= 0 && in_y < in->height() + && in_x >= 0 && in_x < in->width()) { + out_data[out->offset(n, o + o_head, y, x)] += + in_data[in->offset(n, k + k_head, in_y, in_x)] + * weight_data[weights[0]->offset(o + o_head, k, p, q)]; } } } @@ -112,18 +79,11 @@ void caffe_conv(const Blob* in, ConvolutionParameter* conv_param, // Bias if (conv_param->bias_term()) { const Dtype* bias_data = weights[1]->cpu_data(); - for (int n = 0; n < out->shape(0); n++) { - for (int o = 0; o < out->shape(1); o++) { - for (int z = 0; z < (has_depth ? out->shape(2) : 1); z++) { - for (int y = 0; y < out->shape(2 + has_depth); y++) { - for (int x = 0; x < out->shape(3 + has_depth); x++) { - out_offset[0] = n; - out_offset[1] = o; - if (has_depth) { out_offset[2] = z; } - out_offset[2 + has_depth] = y; - out_offset[3 + has_depth] = x; - out_data[out->offset(out_offset)] += bias_data[o]; - } + for (int n = 0; n < out->num(); n++) { + for (int o = 0; o < out->channels(); o++) { + for (int y = 0; y < out->height(); y++) { + for (int x = 0; x < out->width(); x++) { + out_data[out->offset(n, o, y, x)] += bias_data[o]; } } } @@ -190,8 +150,8 @@ TYPED_TEST(ConvolutionLayerTest, TestSetup) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); + convolution_param->set_kernel_size(3); + convolution_param->set_stride(2); convolution_param->set_num_output(4); this->blob_bottom_vec_.push_back(this->blob_bottom_2_); this->blob_top_vec_.push_back(this->blob_top_2_); @@ -228,8 +188,8 @@ TYPED_TEST(ConvolutionLayerTest, TestSimpleConvolution) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); + convolution_param->set_kernel_size(3); + convolution_param->set_stride(2); convolution_param->set_num_output(4); convolution_param->mutable_weight_filler()->set_type("gaussian"); convolution_param->mutable_bias_filler()->set_type("constant"); @@ -257,98 +217,13 @@ TYPED_TEST(ConvolutionLayerTest, TestSimpleConvolution) { } } -TYPED_TEST(ConvolutionLayerTest, Test0DConvolution) { - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - ConvolutionParameter* convolution_param = - layer_param.mutable_convolution_param(); - const int kNumOutput = 3; - convolution_param->set_num_output(kNumOutput); - convolution_param->set_axis(3); - convolution_param->mutable_weight_filler()->set_type("gaussian"); - convolution_param->mutable_bias_filler()->set_type("gaussian"); - shared_ptr > layer( - new ConvolutionLayer(layer_param)); - vector top_shape = this->blob_bottom_->shape(); - top_shape[3] = kNumOutput; - layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - EXPECT_EQ(top_shape, this->blob_top_->shape()); - layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); - // Check against reference convolution. - vector weight_offset(2); - const Blob* weight = layer->blobs()[0].get(); - const Blob* bias = layer->blobs()[1].get(); - const int num = this->blob_top_->count(3); - const int dim = this->blob_top_->shape(3); - const int bottom_dim = this->blob_bottom_->shape(3); - for (int n = 0; n < num; ++n) { - for (int d = 0; d < dim; ++d) { - weight_offset[0] = d; - Dtype value = bias->cpu_data()[d]; - for (int bottom_d = 0; bottom_d < bottom_dim; ++bottom_d) { - weight_offset[1] = bottom_d; - value += weight->data_at(weight_offset) * - this->blob_bottom_->cpu_data()[n * bottom_dim + bottom_d]; - } - EXPECT_NEAR(value, this->blob_top_->cpu_data()[n * dim + d], 1e-4); - } - } -} - -TYPED_TEST(ConvolutionLayerTest, TestSimple3DConvolution) { - typedef typename TypeParam::Dtype Dtype; - this->blob_bottom_vec_.push_back(this->blob_bottom_2_); - this->blob_top_vec_.push_back(this->blob_top_2_); - vector bottom_shape(5); - bottom_shape[0] = this->blob_bottom_vec_[0]->shape(0); - bottom_shape[1] = this->blob_bottom_vec_[0]->shape(1); - bottom_shape[2] = 5; - bottom_shape[3] = this->blob_bottom_vec_[0]->shape(2); - bottom_shape[4] = this->blob_bottom_vec_[0]->shape(3); - FillerParameter filler_param; - GaussianFiller filler(filler_param); - for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) { - this->blob_bottom_vec_[i]->Reshape(bottom_shape); - filler.Fill(this->blob_bottom_vec_[i]); - } - LayerParameter layer_param; - ConvolutionParameter* convolution_param = - layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); - convolution_param->set_num_output(4); - convolution_param->mutable_weight_filler()->set_type("gaussian"); - convolution_param->mutable_bias_filler()->set_type("gaussian"); - shared_ptr > layer( - new ConvolutionLayer(layer_param)); - layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); - // Check against reference convolution. - const Dtype* top_data; - const Dtype* ref_top_data; - caffe_conv(this->blob_bottom_, convolution_param, layer->blobs(), - this->MakeReferenceTop(this->blob_top_)); - top_data = this->blob_top_->cpu_data(); - ref_top_data = this->ref_blob_top_->cpu_data(); - for (int i = 0; i < this->blob_top_->count(); ++i) { - EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); - } - caffe_conv(this->blob_bottom_2_, convolution_param, layer->blobs(), - this->MakeReferenceTop(this->blob_top_2_)); - top_data = this->blob_top_2_->cpu_data(); - ref_top_data = this->ref_blob_top_->cpu_data(); - for (int i = 0; i < this->blob_top_->count(); ++i) { - EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); - } -} - TYPED_TEST(ConvolutionLayerTest, Test1x1Convolution) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(1); - convolution_param->add_stride(1); + convolution_param->set_kernel_size(1); + convolution_param->set_stride(1); convolution_param->set_num_output(4); convolution_param->mutable_weight_filler()->set_type("gaussian"); convolution_param->mutable_bias_filler()->set_type("constant"); @@ -374,8 +249,8 @@ TYPED_TEST(ConvolutionLayerTest, TestSimpleConvolutionGroup) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); + convolution_param->set_kernel_size(3); + convolution_param->set_stride(2); convolution_param->set_num_output(3); convolution_param->set_group(3); convolution_param->mutable_weight_filler()->set_type("gaussian"); @@ -413,8 +288,8 @@ TYPED_TEST(ConvolutionLayerTest, TestSobelConvolution) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); + convolution_param->set_kernel_size(3); + convolution_param->set_stride(2); convolution_param->set_num_output(1); convolution_param->set_bias_term(false); shared_ptr > layer( @@ -475,11 +350,14 @@ TYPED_TEST(ConvolutionLayerTest, TestSobelConvolution) { convolution_param->set_bias_term(false); layer.reset(new ConvolutionLayer(layer_param)); layer->blobs().resize(1); - layer->blobs()[0].reset(new Blob(1, 1, 1, 3)); + layer->blobs()[0].reset(new Blob(1, 3, 1, 3)); Dtype* weights_2 = layer->blobs()[0]->mutable_cpu_data(); - weights_2[0] = -1; - weights_2[1] = 0; - weights_2[2] = 1; + for (int c = 0; c < 3; ++c) { + int i = c * 3; // 1 x 3 filter + weights_2[i + 0] = -1; + weights_2[i + 1] = 0; + weights_2[i + 2] = 1; + } layer->SetUp(sep_blob_bottom_vec, sep_blob_top_vec); layer->Forward(sep_blob_bottom_vec, sep_blob_top_vec); // Test equivalence of full and separable filters. @@ -490,124 +368,6 @@ TYPED_TEST(ConvolutionLayerTest, TestSobelConvolution) { } } -TYPED_TEST(ConvolutionLayerTest, TestNDAgainst2D) { - typedef typename TypeParam::Dtype Dtype; - const int kernel_h = 11; - const int kernel_w = 13; - vector bottom_shape(4); - bottom_shape[0] = 15; - bottom_shape[1] = 18; - bottom_shape[2] = kernel_h * 2; - bottom_shape[3] = kernel_w * 2; - FillerParameter filler_param; - GaussianFiller filler(filler_param); - for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) { - this->blob_bottom_vec_[i]->Reshape(bottom_shape); - filler.Fill(this->blob_bottom_vec_[i]); - } - LayerParameter layer_param; - ConvolutionParameter* convolution_param = - layer_param.mutable_convolution_param(); - convolution_param->set_num_output(12); - convolution_param->set_bias_term(false); - convolution_param->set_group(6); - convolution_param->set_kernel_h(kernel_h); - convolution_param->set_kernel_w(kernel_w); - convolution_param->mutable_weight_filler()->set_type("gaussian"); - Blob weights; - Blob top_diff; - // Shape and fill weights and top_diff. - bool copy_diff; - bool reshape; - { - ConvolutionLayer layer(layer_param); - layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - top_diff.ReshapeLike(*this->blob_top_); - filler.Fill(&top_diff); - ASSERT_EQ(1, layer.blobs().size()); - copy_diff = false; reshape = true; - weights.CopyFrom(*layer.blobs()[0], copy_diff, reshape); - } - vector propagate_down(1, true); - Blob result_2d; - Blob backward_result_2d; - Blob backward_weight_result_2d; - // Test with 2D im2col - { - caffe_set(this->blob_top_->count(), Dtype(0), - this->blob_top_->mutable_cpu_data()); - caffe_set(this->blob_bottom_->count(), Dtype(0), - this->blob_bottom_->mutable_cpu_diff()); - caffe_set(weights.count(), Dtype(0), weights.mutable_cpu_diff()); - // Do SetUp and Forward; save Forward result in result_2d. - convolution_param->set_force_nd_im2col(false); - ConvolutionLayer layer_2d(layer_param); - layer_2d.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - ASSERT_EQ(1, layer_2d.blobs().size()); - copy_diff = false; reshape = false; - layer_2d.blobs()[0]->CopyFrom(weights, copy_diff, reshape); - layer_2d.Forward(this->blob_bottom_vec_, this->blob_top_vec_); - copy_diff = false; reshape = true; - result_2d.CopyFrom(*this->blob_top_, copy_diff, reshape); - // Copy pre-generated top diff into actual top diff; - // do Backward and save result in backward_result_2d. - ASSERT_EQ(this->blob_top_->shape(), top_diff.shape()); - caffe_copy(top_diff.count(), top_diff.cpu_data(), - this->blob_top_->mutable_cpu_diff()); - layer_2d.Backward(this->blob_top_vec_, propagate_down, - this->blob_bottom_vec_); - copy_diff = true; reshape = true; - backward_result_2d.CopyFrom(*this->blob_bottom_, copy_diff, reshape); - backward_weight_result_2d.CopyFrom(weights, copy_diff, reshape); - } - Blob result_nd; - Blob backward_result_nd; - Blob backward_weight_result_nd; - // Test with ND im2col - { - caffe_set(this->blob_top_->count(), Dtype(0), - this->blob_top_->mutable_cpu_data()); - caffe_set(this->blob_bottom_->count(), Dtype(0), - this->blob_bottom_->mutable_cpu_diff()); - caffe_set(weights.count(), Dtype(0), weights.mutable_cpu_diff()); - // Do SetUp and Forward; save Forward result in result_nd. - convolution_param->set_force_nd_im2col(true); - ConvolutionLayer layer_nd(layer_param); - layer_nd.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - ASSERT_EQ(1, layer_nd.blobs().size()); - copy_diff = false; reshape = false; - layer_nd.blobs()[0]->CopyFrom(weights, copy_diff, reshape); - layer_nd.Forward(this->blob_bottom_vec_, this->blob_top_vec_); - copy_diff = false; reshape = true; - result_nd.CopyFrom(*this->blob_top_, copy_diff, reshape); - // Copy pre-generated top diff into actual top diff; - // do Backward and save result in backward_result_nd. - ASSERT_EQ(this->blob_top_->shape(), top_diff.shape()); - caffe_copy(top_diff.count(), top_diff.cpu_data(), - this->blob_top_->mutable_cpu_diff()); - layer_nd.Backward(this->blob_top_vec_, propagate_down, - this->blob_bottom_vec_); - copy_diff = true; reshape = true; - backward_result_nd.CopyFrom(*this->blob_bottom_, copy_diff, reshape); - backward_weight_result_nd.CopyFrom(weights, copy_diff, reshape); - } - ASSERT_EQ(result_nd.count(), result_2d.count()); - for (int i = 0; i < result_2d.count(); ++i) { - EXPECT_EQ(result_2d.cpu_data()[i], result_nd.cpu_data()[i]); - } - ASSERT_EQ(backward_result_nd.count(), backward_result_2d.count()); - for (int i = 0; i < backward_result_2d.count(); ++i) { - EXPECT_EQ(backward_result_2d.cpu_diff()[i], - backward_result_nd.cpu_diff()[i]); - } - ASSERT_EQ(backward_weight_result_nd.count(), - backward_weight_result_2d.count()); - for (int i = 0; i < backward_weight_result_2d.count(); ++i) { - EXPECT_EQ(backward_weight_result_2d.cpu_diff()[i], - backward_weight_result_nd.cpu_diff()[i]); - } -} - TYPED_TEST(ConvolutionLayerTest, TestGradient) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; @@ -615,36 +375,8 @@ TYPED_TEST(ConvolutionLayerTest, TestGradient) { layer_param.mutable_convolution_param(); this->blob_bottom_vec_.push_back(this->blob_bottom_2_); this->blob_top_vec_.push_back(this->blob_top_2_); - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); - convolution_param->set_num_output(2); - convolution_param->mutable_weight_filler()->set_type("gaussian"); - convolution_param->mutable_bias_filler()->set_type("gaussian"); - ConvolutionLayer layer(layer_param); - GradientChecker checker(1e-2, 1e-3); - checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, - this->blob_top_vec_); -} - -TYPED_TEST(ConvolutionLayerTest, TestGradient3D) { - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - ConvolutionParameter* convolution_param = - layer_param.mutable_convolution_param(); - vector bottom_shape(5); - bottom_shape[0] = this->blob_bottom_vec_[0]->shape(0); - bottom_shape[1] = this->blob_bottom_vec_[0]->shape(1); - bottom_shape[2] = 5; - bottom_shape[3] = this->blob_bottom_vec_[0]->shape(2); - bottom_shape[4] = this->blob_bottom_vec_[0]->shape(3); - FillerParameter filler_param; - GaussianFiller filler(filler_param); - for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) { - this->blob_bottom_vec_[i]->Reshape(bottom_shape); - filler.Fill(this->blob_bottom_vec_[i]); - } - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); + convolution_param->set_kernel_size(3); + convolution_param->set_stride(2); convolution_param->set_num_output(2); convolution_param->mutable_weight_filler()->set_type("gaussian"); convolution_param->mutable_bias_filler()->set_type("gaussian"); @@ -661,8 +393,8 @@ TYPED_TEST(ConvolutionLayerTest, Test1x1Gradient) { layer_param.mutable_convolution_param(); this->blob_bottom_vec_.push_back(this->blob_bottom_2_); this->blob_top_vec_.push_back(this->blob_top_2_); - convolution_param->add_kernel_size(1); - convolution_param->add_stride(1); + convolution_param->set_kernel_size(1); + convolution_param->set_stride(1); convolution_param->set_num_output(2); convolution_param->mutable_weight_filler()->set_type("gaussian"); convolution_param->mutable_bias_filler()->set_type("gaussian"); @@ -677,8 +409,8 @@ TYPED_TEST(ConvolutionLayerTest, TestGradientGroup) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); + convolution_param->set_kernel_size(3); + convolution_param->set_stride(2); convolution_param->set_num_output(3); convolution_param->set_group(3); convolution_param->mutable_weight_filler()->set_type("gaussian"); @@ -740,8 +472,8 @@ TYPED_TEST(CuDNNConvolutionLayerTest, TestSetupCuDNN) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); + convolution_param->set_kernel_size(3); + convolution_param->set_stride(2); convolution_param->set_num_output(4); this->blob_bottom_vec_.push_back(this->blob_bottom_2_); this->blob_top_vec_.push_back(this->blob_top_2_); @@ -777,8 +509,8 @@ TYPED_TEST(CuDNNConvolutionLayerTest, TestSimpleConvolutionCuDNN) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); + convolution_param->set_kernel_size(3); + convolution_param->set_stride(2); convolution_param->set_num_output(4); convolution_param->mutable_weight_filler()->set_type("gaussian"); convolution_param->mutable_bias_filler()->set_type("constant"); @@ -810,8 +542,8 @@ TYPED_TEST(CuDNNConvolutionLayerTest, TestSimpleConvolutionGroupCuDNN) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); + convolution_param->set_kernel_size(3); + convolution_param->set_stride(2); convolution_param->set_num_output(3); convolution_param->set_group(3); convolution_param->mutable_weight_filler()->set_type("gaussian"); @@ -849,8 +581,8 @@ TYPED_TEST(CuDNNConvolutionLayerTest, TestSobelConvolutionCuDNN) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); + convolution_param->set_kernel_size(3); + convolution_param->set_stride(2); convolution_param->set_num_output(1); convolution_param->set_bias_term(false); shared_ptr > layer( @@ -911,11 +643,14 @@ TYPED_TEST(CuDNNConvolutionLayerTest, TestSobelConvolutionCuDNN) { convolution_param->set_bias_term(false); layer.reset(new CuDNNConvolutionLayer(layer_param)); layer->blobs().resize(1); - layer->blobs()[0].reset(new Blob(1, 1, 1, 3)); + layer->blobs()[0].reset(new Blob(1, 3, 1, 3)); TypeParam* weights_2 = layer->blobs()[0]->mutable_cpu_data(); - weights_2[0] = -1; - weights_2[1] = 0; - weights_2[2] = 1; + for (int c = 0; c < 3; ++c) { + int i = c * 3; // 1 x 3 filter + weights_2[i + 0] = -1; + weights_2[i + 1] = 0; + weights_2[i + 2] = 1; + } layer->SetUp(sep_blob_bottom_vec, sep_blob_top_vec); layer->Forward(sep_blob_bottom_vec, sep_blob_top_vec); // Test equivalence of full and separable filters. @@ -932,8 +667,8 @@ TYPED_TEST(CuDNNConvolutionLayerTest, TestGradientCuDNN) { layer_param.mutable_convolution_param(); this->blob_bottom_vec_.push_back(this->blob_bottom_2_); this->blob_top_vec_.push_back(this->blob_top_2_); - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); + convolution_param->set_kernel_size(3); + convolution_param->set_stride(2); convolution_param->set_num_output(2); convolution_param->mutable_weight_filler()->set_type("gaussian"); convolution_param->mutable_bias_filler()->set_type("gaussian"); @@ -947,8 +682,8 @@ TYPED_TEST(CuDNNConvolutionLayerTest, TestGradientGroupCuDNN) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); + convolution_param->set_kernel_size(3); + convolution_param->set_stride(2); convolution_param->set_num_output(3); convolution_param->set_group(3); convolution_param->mutable_weight_filler()->set_type("gaussian"); diff --git a/src/caffe/test/test_data_layer.cpp b/src/caffe/test/test_data_layer.cpp index 9e03954a543..afe2a40d227 100644 --- a/src/caffe/test/test_data_layer.cpp +++ b/src/caffe/test/test_data_layer.cpp @@ -1,4 +1,3 @@ -#ifdef USE_OPENCV #include #include @@ -349,7 +348,6 @@ class DataLayerTest : public MultiDeviceTest { TYPED_TEST_CASE(DataLayerTest, TestDtypesAndDevices); -#ifdef USE_LEVELDB TYPED_TEST(DataLayerTest, TestReadLevelDB) { const bool unique_pixels = false; // all pixels the same; images different this->Fill(unique_pixels, DataParameter_DB_LEVELDB); @@ -387,9 +385,7 @@ TYPED_TEST(DataLayerTest, TestReadCropTestLevelDB) { this->Fill(unique_pixels, DataParameter_DB_LEVELDB); this->TestReadCrop(TEST); } -#endif // USE_LEVELDB -#ifdef USE_LMDB TYPED_TEST(DataLayerTest, TestReadLMDB) { const bool unique_pixels = false; // all pixels the same; images different this->Fill(unique_pixels, DataParameter_DB_LMDB); @@ -428,6 +424,4 @@ TYPED_TEST(DataLayerTest, TestReadCropTestLMDB) { this->TestReadCrop(TEST); } -#endif // USE_LMDB } // namespace caffe -#endif // USE_OPENCV diff --git a/src/caffe/test/test_data_transformer.cpp b/src/caffe/test/test_data_transformer.cpp index 8a1013744e8..16570e20356 100644 --- a/src/caffe/test/test_data_transformer.cpp +++ b/src/caffe/test/test_data_transformer.cpp @@ -1,4 +1,3 @@ -#ifdef USE_OPENCV #include #include @@ -354,4 +353,3 @@ TYPED_TEST(DataTransformTest, TestMeanFile) { } } // namespace caffe -#endif // USE_OPENCV diff --git a/src/caffe/test/test_db.cpp b/src/caffe/test/test_db.cpp index 1b487b14c58..5b2ac230a0b 100644 --- a/src/caffe/test/test_db.cpp +++ b/src/caffe/test/test_db.cpp @@ -1,4 +1,3 @@ -#if defined(USE_LEVELDB) && defined(USE_LMDB) && defined(USE_OPENCV) #include #include "boost/scoped_ptr.hpp" @@ -133,4 +132,3 @@ TYPED_TEST(DBTest, TestWrite) { } } // namespace caffe -#endif // USE_LEVELDB, USE_LMDB and USE_OPENCV diff --git a/src/caffe/test/test_deconvolution_layer.cpp b/src/caffe/test/test_deconvolution_layer.cpp index 770e7b277ee..fc63d5efbe3 100644 --- a/src/caffe/test/test_deconvolution_layer.cpp +++ b/src/caffe/test/test_deconvolution_layer.cpp @@ -58,8 +58,8 @@ TYPED_TEST(DeconvolutionLayerTest, TestSetup) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); + convolution_param->set_kernel_size(3); + convolution_param->set_stride(2); convolution_param->set_num_output(4); this->blob_bottom_vec_.push_back(this->blob_bottom_2_); this->blob_top_vec_.push_back(this->blob_top_2_); @@ -96,8 +96,8 @@ TYPED_TEST(DeconvolutionLayerTest, TestSimpleDeconvolution) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); + convolution_param->set_kernel_size(3); + convolution_param->set_stride(2); convolution_param->set_num_output(4); convolution_param->mutable_weight_filler()->set_type("constant"); convolution_param->mutable_weight_filler()->set_value(1); @@ -144,8 +144,8 @@ TYPED_TEST(DeconvolutionLayerTest, TestGradient) { layer_param.mutable_convolution_param(); this->blob_bottom_vec_.push_back(this->blob_bottom_2_); this->blob_top_vec_.push_back(this->blob_top_2_); - convolution_param->add_kernel_size(2); - convolution_param->add_stride(1); + convolution_param->set_kernel_size(2); + convolution_param->set_stride(1); convolution_param->set_num_output(1); convolution_param->mutable_weight_filler()->set_type("gaussian"); convolution_param->mutable_bias_filler()->set_type("gaussian"); @@ -155,151 +155,4 @@ TYPED_TEST(DeconvolutionLayerTest, TestGradient) { this->blob_top_vec_); } -TYPED_TEST(DeconvolutionLayerTest, TestNDAgainst2D) { - typedef typename TypeParam::Dtype Dtype; - const int kernel_h = 11; - const int kernel_w = 13; - vector bottom_shape(4); - bottom_shape[0] = 15; - bottom_shape[1] = 12; - bottom_shape[2] = kernel_h * 2; - bottom_shape[3] = kernel_w * 2; - FillerParameter filler_param; - GaussianFiller filler(filler_param); - for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) { - this->blob_bottom_vec_[i]->Reshape(bottom_shape); - filler.Fill(this->blob_bottom_vec_[i]); - } - LayerParameter layer_param; - ConvolutionParameter* convolution_param = - layer_param.mutable_convolution_param(); - convolution_param->set_num_output(18); - convolution_param->set_bias_term(false); - convolution_param->set_group(6); - convolution_param->set_kernel_h(kernel_h); - convolution_param->set_kernel_w(kernel_w); - convolution_param->mutable_weight_filler()->set_type("gaussian"); - Blob weights; - Blob top_diff; - // Shape and fill weights and top_diff. - bool copy_diff; - bool reshape; - { - DeconvolutionLayer layer(layer_param); - layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - top_diff.ReshapeLike(*this->blob_top_); - filler.Fill(&top_diff); - ASSERT_EQ(1, layer.blobs().size()); - copy_diff = false; reshape = true; - weights.CopyFrom(*layer.blobs()[0], copy_diff, reshape); - } - vector propagate_down(1, true); - Blob result_2d; - Blob backward_result_2d; - Blob backward_weight_result_2d; - // Test with 2D im2col - { - caffe_set(this->blob_top_->count(), Dtype(0), - this->blob_top_->mutable_cpu_data()); - caffe_set(this->blob_bottom_->count(), Dtype(0), - this->blob_bottom_->mutable_cpu_diff()); - caffe_set(weights.count(), Dtype(0), weights.mutable_cpu_diff()); - // Do SetUp and Forward; save Forward result in result_2d. - convolution_param->set_force_nd_im2col(false); - DeconvolutionLayer layer_2d(layer_param); - layer_2d.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - ASSERT_EQ(1, layer_2d.blobs().size()); - copy_diff = false; reshape = false; - layer_2d.blobs()[0]->CopyFrom(weights, copy_diff, reshape); - layer_2d.Forward(this->blob_bottom_vec_, this->blob_top_vec_); - copy_diff = false; reshape = true; - result_2d.CopyFrom(*this->blob_top_, copy_diff, reshape); - // Copy pre-generated top diff into actual top diff; - // do Backward and save result in backward_result_2d. - ASSERT_EQ(this->blob_top_->shape(), top_diff.shape()); - caffe_copy(top_diff.count(), top_diff.cpu_data(), - this->blob_top_->mutable_cpu_diff()); - layer_2d.Backward(this->blob_top_vec_, propagate_down, - this->blob_bottom_vec_); - copy_diff = true; reshape = true; - backward_result_2d.CopyFrom(*this->blob_bottom_, copy_diff, reshape); - backward_weight_result_2d.CopyFrom(weights, copy_diff, reshape); - } - Blob result_nd; - Blob backward_result_nd; - Blob backward_weight_result_nd; - // Test with ND im2col - { - caffe_set(this->blob_top_->count(), Dtype(0), - this->blob_top_->mutable_cpu_data()); - caffe_set(this->blob_bottom_->count(), Dtype(0), - this->blob_bottom_->mutable_cpu_diff()); - caffe_set(weights.count(), Dtype(0), weights.mutable_cpu_diff()); - // Do SetUp and Forward; save Forward result in result_nd. - convolution_param->set_force_nd_im2col(true); - DeconvolutionLayer layer_nd(layer_param); - layer_nd.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - ASSERT_EQ(1, layer_nd.blobs().size()); - copy_diff = false; reshape = false; - layer_nd.blobs()[0]->CopyFrom(weights, copy_diff, reshape); - layer_nd.Forward(this->blob_bottom_vec_, this->blob_top_vec_); - copy_diff = false; reshape = true; - result_nd.CopyFrom(*this->blob_top_, copy_diff, reshape); - // Copy pre-generated top diff into actual top diff; - // do Backward and save result in backward_result_nd. - ASSERT_EQ(this->blob_top_->shape(), top_diff.shape()); - caffe_copy(top_diff.count(), top_diff.cpu_data(), - this->blob_top_->mutable_cpu_diff()); - layer_nd.Backward(this->blob_top_vec_, propagate_down, - this->blob_bottom_vec_); - copy_diff = true; reshape = true; - backward_result_nd.CopyFrom(*this->blob_bottom_, copy_diff, reshape); - backward_weight_result_nd.CopyFrom(weights, copy_diff, reshape); - } - ASSERT_EQ(result_nd.count(), result_2d.count()); - for (int i = 0; i < result_2d.count(); ++i) { - EXPECT_EQ(result_2d.cpu_data()[i], result_nd.cpu_data()[i]); - } - ASSERT_EQ(backward_result_nd.count(), backward_result_2d.count()); - for (int i = 0; i < backward_result_2d.count(); ++i) { - EXPECT_EQ(backward_result_2d.cpu_diff()[i], - backward_result_nd.cpu_diff()[i]); - } - ASSERT_EQ(backward_weight_result_nd.count(), - backward_weight_result_2d.count()); - for (int i = 0; i < backward_weight_result_2d.count(); ++i) { - EXPECT_EQ(backward_weight_result_2d.cpu_diff()[i], - backward_weight_result_nd.cpu_diff()[i]); - } -} - -TYPED_TEST(DeconvolutionLayerTest, TestGradient3D) { - typedef typename TypeParam::Dtype Dtype; - vector bottom_shape(5); - bottom_shape[0] = this->blob_bottom_vec_[0]->shape(0); - bottom_shape[1] = this->blob_bottom_vec_[0]->shape(1); - bottom_shape[2] = 2; - bottom_shape[3] = 3; - bottom_shape[4] = 2; - FillerParameter filler_param; - GaussianFiller filler(filler_param); - for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) { - this->blob_bottom_vec_[i]->Reshape(bottom_shape); - filler.Fill(this->blob_bottom_vec_[i]); - } - LayerParameter layer_param; - ConvolutionParameter* convolution_param = - layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(2); - convolution_param->add_stride(2); - convolution_param->add_pad(1); - convolution_param->set_num_output(2); - convolution_param->mutable_weight_filler()->set_type("gaussian"); - convolution_param->mutable_bias_filler()->set_type("gaussian"); - DeconvolutionLayer layer(layer_param); - GradientChecker checker(1e-2, 1e-3); - checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, - this->blob_top_vec_); -} - } // namespace caffe diff --git a/src/caffe/test/test_eltwise_layer.cpp b/src/caffe/test/test_eltwise_layer.cpp index 8031f6e9022..be0c1347709 100644 --- a/src/caffe/test/test_eltwise_layer.cpp +++ b/src/caffe/test/test_eltwise_layer.cpp @@ -80,7 +80,7 @@ TYPED_TEST(EltwiseLayerTest, TestProd) { const Dtype* in_data_b = this->blob_bottom_b_->cpu_data(); const Dtype* in_data_c = this->blob_bottom_c_->cpu_data(); for (int i = 0; i < count; ++i) { - EXPECT_NEAR(data[i], in_data_a[i] * in_data_b[i] * in_data_c[i], 1e-4); + EXPECT_EQ(data[i], in_data_a[i] * in_data_b[i] * in_data_c[i]); } } @@ -99,7 +99,7 @@ TYPED_TEST(EltwiseLayerTest, TestSum) { const Dtype* in_data_b = this->blob_bottom_b_->cpu_data(); const Dtype* in_data_c = this->blob_bottom_c_->cpu_data(); for (int i = 0; i < count; ++i) { - EXPECT_NEAR(data[i], in_data_a[i] + in_data_b[i] + in_data_c[i], 1e-4); + EXPECT_EQ(data[i], in_data_a[i] + in_data_b[i] + in_data_c[i]); } } diff --git a/src/caffe/test/test_im2col_kernel.cu b/src/caffe/test/test_im2col_kernel.cu index f0b75fcc68d..0017ac23e69 100644 --- a/src/caffe/test/test_im2col_kernel.cu +++ b/src/caffe/test/test_im2col_kernel.cu @@ -22,12 +22,6 @@ __global__ void im2col_gpu_kernel(const int n, const Dtype* data_im, const int height_col, const int width_col, Dtype* data_col); -template -__global__ void im2col_nd_gpu_kernel(const int n, const Dtype* data_im, - const int* im_shape, const int* col_shape, - const int* kernel_shape, const int* pad, const int* stride, - Dtype* data_col); - extern cudaDeviceProp CAFFE_TEST_CUDA_PROP; template @@ -36,18 +30,11 @@ class Im2colKernelTest : public GPUDeviceTest { Im2colKernelTest() // big so launches > 1024 threads : blob_bottom_(new Blob(5, 500, 10, 10)), - blob_kernel_shape_(new Blob()), - blob_stride_(new Blob()), - blob_pad_(new Blob()), blob_top_(new Blob()), blob_top_cpu_(new Blob()) { FillerParameter filler_param; GaussianFiller filler(filler_param); filler.Fill(this->blob_bottom_); - vector dim_blob_shape(1, 2); - blob_kernel_shape_->Reshape(dim_blob_shape); - blob_stride_->Reshape(dim_blob_shape); - blob_pad_->Reshape(dim_blob_shape); height_ = blob_bottom_->height(); width_ = blob_bottom_->width(); @@ -57,26 +44,14 @@ class Im2colKernelTest : public GPUDeviceTest { kernel_size_ = 3; height_col_ = (height_ + 2 * pad_ - kernel_size_) / stride_ + 1; width_col_ = (width_ + 2 * pad_ - kernel_size_) / stride_ + 1; - - for (int i = 0; i < 2; ++i) { - blob_kernel_shape_->mutable_cpu_data()[i] = kernel_size_; - blob_stride_->mutable_cpu_data()[i] = stride_; - blob_pad_->mutable_cpu_data()[i] = pad_; - } } virtual ~Im2colKernelTest() { - delete blob_bottom_; - delete blob_top_; - delete blob_top_cpu_; - delete blob_kernel_shape_; - delete blob_stride_; - delete blob_pad_; + delete blob_bottom_; + delete blob_top_; + delete blob_top_cpu_; } - Blob* const blob_kernel_shape_; - Blob* const blob_stride_; - Blob* const blob_pad_; Blob* const blob_bottom_; Blob* const blob_top_; Blob* const blob_top_cpu_; @@ -92,7 +67,7 @@ class Im2colKernelTest : public GPUDeviceTest { TYPED_TEST_CASE(Im2colKernelTest, TestDtypes); -TYPED_TEST(Im2colKernelTest, Test2D) { +TYPED_TEST(Im2colKernelTest, TestGPU) { // Reshape the blobs to correct size for im2col output this->blob_top_->Reshape(this->blob_bottom_->num(), this->channels_ * this->kernel_size_ * this->kernel_size_, @@ -147,58 +122,4 @@ TYPED_TEST(Im2colKernelTest, Test2D) { } } -TYPED_TEST(Im2colKernelTest, TestND) { - // Reshape the blobs to correct size for im2col output - this->blob_top_->Reshape(this->blob_bottom_->num(), - this->channels_ * this->kernel_size_ * this->kernel_size_, - this->height_col_, - this->width_col_); - - this->blob_top_cpu_->ReshapeLike(*this->blob_top_); - - const TypeParam* bottom_data_cpu = this->blob_bottom_->cpu_data(); - TypeParam* top_data_cpu = this->blob_top_cpu_->mutable_cpu_data(); - - // CPU Version - for (int n = 0; n < this->blob_bottom_->num(); ++n) { - im2col_nd_cpu(bottom_data_cpu + this->blob_bottom_->offset(n), 2, - this->blob_bottom_->shape().data() + 1, - this->blob_top_cpu_->shape().data() + 1, - this->blob_kernel_shape_->cpu_data(), - this->blob_pad_->cpu_data(), this->blob_stride_->cpu_data(), - top_data_cpu + this->blob_top_cpu_->offset(n)); - } - - // GPU version - int num_kernels = this->channels_ * this->height_col_ * this->width_col_; - int default_grid_dim = CAFFE_GET_BLOCKS(num_kernels); - const TypeParam* bottom_data_gpu = this->blob_bottom_->gpu_data(); - - // Launch with different grid sizes - for (int grid_div = 2; grid_div <= 8; grid_div++) { - for (int n = 0; n < this->blob_bottom_->num(); ++n) { - const int grid_dim = default_grid_dim / grid_div; - TypeParam* top_data_gpu = this->blob_top_->mutable_gpu_data(); - // NOLINT_NEXT_LINE(whitespace/operators) - im2col_nd_gpu_kernel<<>>( - num_kernels, bottom_data_gpu + this->blob_bottom_->offset(n), - this->blob_bottom_->gpu_shape() + 1, this->blob_top_->gpu_shape() + 1, - this->blob_kernel_shape_->gpu_data(), this->blob_pad_->gpu_data(), - this->blob_stride_->gpu_data(), - top_data_gpu + this->blob_top_->offset(n)); - CUDA_POST_KERNEL_CHECK; - } - - // Compare results against CPU version - for (int i = 0; i < this->blob_top_->count(); ++i) { - TypeParam cpuval = top_data_cpu[i]; - TypeParam gpuval = this->blob_top_->cpu_data()[i]; - EXPECT_EQ(cpuval, gpuval); - if (cpuval != gpuval) { - break; - } - } - } -} - } // namespace caffe diff --git a/src/caffe/test/test_im2col_layer.cpp b/src/caffe/test/test_im2col_layer.cpp index 293aa262059..f50abe103f8 100644 --- a/src/caffe/test/test_im2col_layer.cpp +++ b/src/caffe/test/test_im2col_layer.cpp @@ -21,7 +21,6 @@ class Im2colLayerTest : public MultiDeviceTest { : blob_bottom_(new Blob(2, 3, 6, 5)), blob_top_(new Blob()) { // fill the values - Caffe::set_random_seed(1701); FillerParameter filler_param; GaussianFiller filler(filler_param); filler.Fill(this->blob_bottom_); @@ -42,8 +41,8 @@ TYPED_TEST(Im2colLayerTest, TestSetup) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); + convolution_param->set_kernel_size(3); + convolution_param->set_stride(2); Im2colLayer layer(layer_param); layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); EXPECT_EQ(this->blob_top_->num(), 2); @@ -57,8 +56,8 @@ TYPED_TEST(Im2colLayerTest, TestForward) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); + convolution_param->set_kernel_size(3); + convolution_param->set_stride(2); Im2colLayer layer(layer_param); layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); @@ -74,27 +73,14 @@ TYPED_TEST(Im2colLayerTest, TestGradient) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); + convolution_param->set_kernel_size(3); + convolution_param->set_stride(2); Im2colLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, this->blob_top_vec_); } -TYPED_TEST(Im2colLayerTest, TestGradientForceND) { - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - ConvolutionParameter* convolution_param = - layer_param.mutable_convolution_param(); - convolution_param->add_kernel_size(3); - convolution_param->add_stride(2); - convolution_param->set_force_nd_im2col(true); - Im2colLayer layer(layer_param); - GradientChecker checker(1e-2, 1e-2); - checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, - this->blob_top_vec_); -} TYPED_TEST(Im2colLayerTest, TestRect) { typedef typename TypeParam::Dtype Dtype; @@ -103,7 +89,7 @@ TYPED_TEST(Im2colLayerTest, TestRect) { layer_param.mutable_convolution_param(); convolution_param->set_kernel_h(5); convolution_param->set_kernel_w(3); - convolution_param->add_stride(2); + convolution_param->set_stride(2); Im2colLayer layer(layer_param); layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); @@ -122,7 +108,7 @@ TYPED_TEST(Im2colLayerTest, TestRectGradient) { layer_param.mutable_convolution_param(); convolution_param->set_kernel_h(5); convolution_param->set_kernel_w(3); - convolution_param->add_stride(2); + convolution_param->set_stride(2); Im2colLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, diff --git a/src/caffe/test/test_image_data_layer.cpp b/src/caffe/test/test_image_data_layer.cpp index 481fcef7b27..931a5ebf137 100644 --- a/src/caffe/test/test_image_data_layer.cpp +++ b/src/caffe/test/test_image_data_layer.cpp @@ -1,4 +1,3 @@ -#ifdef USE_OPENCV #include #include #include @@ -178,4 +177,3 @@ TYPED_TEST(ImageDataLayerTest, TestShuffle) { } } // namespace caffe -#endif // USE_OPENCV diff --git a/src/caffe/test/test_io.cpp b/src/caffe/test/test_io.cpp index c2c919e90dc..4ab96311bbc 100644 --- a/src/caffe/test/test_io.cpp +++ b/src/caffe/test/test_io.cpp @@ -1,4 +1,3 @@ -#ifdef USE_OPENCV #include #include #include @@ -421,4 +420,3 @@ TEST_F(IOTest, TestDecodeDatumToCVMatContentNative) { } } // namespace caffe -#endif // USE_OPENCV diff --git a/src/caffe/test/test_layer_factory.cpp b/src/caffe/test/test_layer_factory.cpp index 7d5d39d8b91..c86fafd000c 100644 --- a/src/caffe/test/test_layer_factory.cpp +++ b/src/caffe/test/test_layer_factory.cpp @@ -31,16 +31,12 @@ TYPED_TEST(LayerFactoryTest, TestCreateLayer) { LayerParameter layer_param; // Data layers expect a DB if (iter->first == "Data") { -#ifdef USE_LEVELDB string tmp; MakeTempDir(&tmp); boost::scoped_ptr db(db::GetDB(DataParameter_DB_LEVELDB)); db->Open(tmp, db::NEW); db->Close(); layer_param.mutable_data_param()->set_source(tmp); -#else - continue; -#endif // USE_LEVELDB } layer_param.set_type(iter->first); layer = LayerRegistry::CreateLayer(layer_param); diff --git a/src/caffe/test/test_memory_data_layer.cpp b/src/caffe/test/test_memory_data_layer.cpp index 7269a4d441b..a79033f59f1 100644 --- a/src/caffe/test/test_memory_data_layer.cpp +++ b/src/caffe/test/test_memory_data_layer.cpp @@ -1,6 +1,4 @@ -#ifdef USE_OPENCV #include -#endif // USE_OPENCV #include #include @@ -115,7 +113,6 @@ TYPED_TEST(MemoryDataLayerTest, TestForward) { } } -#ifdef USE_OPENCV TYPED_TEST(MemoryDataLayerTest, AddDatumVectorDefaultTransform) { typedef typename TypeParam::Dtype Dtype; @@ -295,5 +292,5 @@ TYPED_TEST(MemoryDataLayerTest, TestSetBatchSize) { } } } -#endif // USE_OPENCV + } // namespace caffe diff --git a/src/caffe/test/test_slice_layer.cpp b/src/caffe/test/test_slice_layer.cpp index 2d2d0fdc005..ccd03646d19 100644 --- a/src/caffe/test/test_slice_layer.cpp +++ b/src/caffe/test/test_slice_layer.cpp @@ -88,21 +88,6 @@ TYPED_TEST(SliceLayerTest, TestSetupChannels) { EXPECT_EQ(this->blob_bottom_->width(), this->blob_top_0_->width()); } -TYPED_TEST(SliceLayerTest, TestTrivialSlice) { - // Test the trivial (single output) "slice" operation -- - // should be the identity. - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - SliceLayer layer(layer_param); - this->blob_top_vec_0_.resize(1); - layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_0_); - ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_0_->shape()); - for (int i = 0; i < this->blob_bottom_->count(); ++i) { - EXPECT_EQ(this->blob_bottom_->cpu_data()[i], - this->blob_top_0_->cpu_data()[i]); - } -} - TYPED_TEST(SliceLayerTest, TestSliceAcrossNum) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; @@ -176,18 +161,6 @@ TYPED_TEST(SliceLayerTest, TestSliceAcrossChannels) { } } -TYPED_TEST(SliceLayerTest, TestGradientTrivial) { - // Test the trivial (single output) "slice" operation -- - // should be the identity. - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - SliceLayer layer(layer_param); - GradientChecker checker(1e-2, 1e-3); - this->blob_top_vec_0_.resize(1); - checker.CheckGradientEltwise(&layer, this->blob_bottom_vec_, - this->blob_top_vec_0_); -} - TYPED_TEST(SliceLayerTest, TestGradientAcrossNum) { typedef typename TypeParam::Dtype Dtype; // Gradient checks are slow; reduce blob size. diff --git a/src/caffe/test/test_triplet_loss_layer b/src/caffe/test/test_triplet_loss_layer new file mode 100644 index 00000000000..6c25ce9bd4b --- /dev/null +++ b/src/caffe/test/test_triplet_loss_layer @@ -0,0 +1,125 @@ +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/vision_layers.hpp" + +#include "caffe/test/test_caffe_main.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +namespace caffe { + +template +class TripletLossLayerTest : public MultiDeviceTest { + typedef typename TypeParam::Dtype Dtype; + + protected: + TripletLossLayerTest() + : blob_bottom_data_(new Blob(50, 1, 1, 1)), + blob_bottom_y_(new Blob(50, 1, 1, 1)), + blob_top_loss_(new Blob()) { + // fill the values + FillerParameter filler_param; + filler_param.set_min(-1.0); + filler_param.set_max(1.0); // distances~=1.0 to test both sides of margin + UniformFiller filler(filler_param); + filler.Fill(this->blob_bottom_data_); + blob_bottom_vec_.push_back(blob_bottom_data_); + for (int i = 0; i < blob_bottom_y_->count(); ++i) { + blob_bottom_y_->mutable_cpu_data()[i] = caffe_rng_rand() % 2; // 0 or 1 + } + blob_bottom_vec_.push_back(blob_bottom_y_); + blob_top_vec_.push_back(blob_top_loss_); + } + virtual ~TripletLossLayerTest() { + delete blob_bottom_data_; + delete blob_bottom_y_; + delete blob_top_loss_; + } + + Blob* const blob_bottom_data_; + Blob* const blob_bottom_y_; + Blob* const blob_top_loss_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +TYPED_TEST_CASE(TripletLossLayerTest, TestDtypesAndDevices); + +TYPED_TEST(TripletLossLayerTest, TestForward) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + TripletLossLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // manually compute to compare + const Dtype margin = layer_param.triplet_loss_param().margin(); + const Dtype losstype = 0;//layer_param.triplet_loss_param().losstype(); + const int num_triplets = 3; + const int num_set = this->blob_bottom_data_->num()/(2 + num_triplets); + const int channels = this->blob_bottom_data_->channels(); + Dtype loss(0); + if (losstype == 0) { + for (int i = 0; i < num_set; ++i) { + Dtype dist_par(0); + for (int j = 0; j < channels; ++j) { + Dtype diff_pos = this->blob_bottom_data_->cpu_data()[(2+num_triplets)*i*channels+j] - + this->blob_bottom_data_->cpu_data()[((2+num_triplets)*i+1)*channels+j]; + dist_par = diff_pos*diff_pos; + loss += dist_par; + } + for (int triplet = 0; triplet < num_triplets; ++triplet) { + Dtype dist_sq(0); + for (int j = 0; j < channels; ++j) { + Dtype diff_pos = this->blob_bottom_data_->cpu_data()[(2+num_triplets)*i*channels+j] - + this->blob_bottom_data_->cpu_data()[((2+num_triplets)*i+1)*channels+j]; + dist_sq += diff_pos*diff_pos; + Dtype diff_neg = this->blob_bottom_data_->cpu_data()[(2+num_triplets)*i*channels+j] - + this->blob_bottom_data_->cpu_data()[((2+num_triplets)*i+2+triplet)*channels+j]; + dist_sq -= diff_neg*diff_neg; + } + loss += std::max(margin + dist_sq, Dtype(0.0)); + } + } + } /*else { + for (int i = 0; i < num; ++i) { + Dtype dist_sq(0); + Dtype dist_par(0); + for (int j = 0; j < channels; ++j) { + Dtype diff_pos = this->blob_bottom_data_i_->cpu_data()[i*channels+j] - + this->blob_bottom_data_j_->cpu_data()[i*channels+j]; + dist_sq += diff_pos*diff_pos; + dist_sq += margin; + Dtype diff_neg = this->blob_bottom_data_i_->cpu_data()[i*channels+j] - + this->blob_bottom_data_k_->cpu_data()[i*channels+j]; + dist_sq = 1 - diff_neg*diff_neg/dist_sq; + Dtype diff_par = this->blob_bottom_data_l_->cpu_data()[i*channels+j] - + this->blob_bottom_data_m_->cpu_data()[i*channels+j]; + dist_par = diff_par*diff_par; + } + loss += std::max(dist_sq, Dtype(0.0)); + loss += dist_par; + } + }*/ + loss /= static_cast(num_set) * Dtype(2); + EXPECT_NEAR(this->blob_top_loss_->cpu_data()[0], loss, 1e-6); +} + +TYPED_TEST(TripletLossLayerTest, TestGradient) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + TripletLossLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + GradientChecker checker(1e-2, 1e-2, 1701); + // check the gradient for the first 5 bottom layers + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_, 0); +} +} // namespace caffe diff --git a/src/caffe/test/test_upgrade_proto.cpp b/src/caffe/test/test_upgrade_proto.cpp index ee05b151e72..006720231a5 100644 --- a/src/caffe/test/test_upgrade_proto.cpp +++ b/src/caffe/test/test_upgrade_proto.cpp @@ -2892,7 +2892,6 @@ TEST_F(NetUpgradeTest, TestImageNet) { this->RunV1UpgradeTest(expected_v1_proto, expected_v2_proto); } // NOLINT(readability/fn_size) -#ifdef USE_OPENCV TEST_F(NetUpgradeTest, TestUpgradeV1LayerType) { LayerParameter layer_param; shared_ptr > layer; @@ -2907,25 +2906,16 @@ TEST_F(NetUpgradeTest, TestUpgradeV1LayerType) { layer_param.set_type(v2_layer_type); // Data layers expect a DB if (v2_layer_type == "Data") { - #ifdef USE_LEVELDB string tmp; MakeTempDir(&tmp); boost::scoped_ptr db(db::GetDB(DataParameter_DB_LEVELDB)); db->Open(tmp, db::NEW); db->Close(); layer_param.mutable_data_param()->set_source(tmp); - #else - continue; - #endif // USE_LEVELDB } - #ifndef USE_OPENCV - if (v2_layer_type == "ImageData" || v2_layer_type == "WindowData") { - continue; - } - #endif // !USE_OPENCV layer = LayerRegistry::CreateLayer(layer_param); EXPECT_EQ(v2_layer_type, layer->type()); } } -#endif // USE_OPENCV + } // NOLINT(readability/fn_size) // namespace caffe diff --git a/src/caffe/util/db.cpp b/src/caffe/util/db.cpp index ccda054d881..f55420e9840 100644 --- a/src/caffe/util/db.cpp +++ b/src/caffe/util/db.cpp @@ -8,31 +8,23 @@ namespace caffe { namespace db { DB* GetDB(DataParameter::DB backend) { switch (backend) { -#ifdef USE_LEVELDB case DataParameter_DB_LEVELDB: return new LevelDB(); -#endif // USE_LEVELDB -#ifdef USE_LMDB case DataParameter_DB_LMDB: return new LMDB(); -#endif // USE_LMDB default: LOG(FATAL) << "Unknown database backend"; } } DB* GetDB(const string& backend) { -#ifdef USE_LEVELDB if (backend == "leveldb") { return new LevelDB(); - } -#endif // USE_LEVELDB -#ifdef USE_LMDB - if (backend == "lmdb") { + } else if (backend == "lmdb") { return new LMDB(); + } else { + LOG(FATAL) << "Unknown database backend"; } -#endif // USE_LMDB - LOG(FATAL) << "Unknown database backend"; } } // namespace db diff --git a/src/caffe/util/db_leveldb.cpp b/src/caffe/util/db_leveldb.cpp index f5c4d8a660d..06c46627d31 100644 --- a/src/caffe/util/db_leveldb.cpp +++ b/src/caffe/util/db_leveldb.cpp @@ -1,4 +1,3 @@ -#ifdef USE_LEVELDB #include "caffe/util/db_leveldb.hpp" #include @@ -20,4 +19,3 @@ void LevelDB::Open(const string& source, Mode mode) { } // namespace db } // namespace caffe -#endif // USE_LEVELDB diff --git a/src/caffe/util/db_lmdb.cpp b/src/caffe/util/db_lmdb.cpp index 78dd880ac41..a054b796806 100644 --- a/src/caffe/util/db_lmdb.cpp +++ b/src/caffe/util/db_lmdb.cpp @@ -1,4 +1,3 @@ -#ifdef USE_LMDB #include "caffe/util/db_lmdb.hpp" #include @@ -50,4 +49,3 @@ void LMDBTransaction::Put(const string& key, const string& value) { } // namespace db } // namespace caffe -#endif // USE_LMDB diff --git a/src/caffe/util/im2col.cpp b/src/caffe/util/im2col.cpp index b0a7be50e5c..c48f31f35d4 100644 --- a/src/caffe/util/im2col.cpp +++ b/src/caffe/util/im2col.cpp @@ -1,7 +1,6 @@ #include #include #include -#include #include "caffe/util/im2col.hpp" #include "caffe/util/math_functions.hpp" @@ -45,98 +44,6 @@ template void im2col_cpu(const double* data_im, const int channels, const int pad_h, const int pad_w, const int stride_h, const int stride_w, double* data_col); -template -inline void im2col_nd_core_cpu(const Dtype* data_input, const bool im2col, - const int num_spatial_axes, const int* im_shape, const int* col_shape, - const int* kernel_shape, const int* pad, const int* stride, - Dtype* data_output) { - if (!im2col) { - int im_size = im_shape[0]; - for (int i = 0; i < num_spatial_axes; ++i) { - im_size *= im_shape[1 + i]; - } - caffe_set(im_size, Dtype(0), data_output); - } - int kernel_size = 1; - for (int i = 0; i < num_spatial_axes; ++i) { - kernel_size *= kernel_shape[i]; - } - const int channels_col = col_shape[0]; - vector d_offset(num_spatial_axes, 0); - vector d_iter(num_spatial_axes, 0); - for (int c = 0; c < channels_col; ++c) { - // Loop over spatial axes in reverse order to compute a per-axis offset. - int offset = c; - for (int d_i = num_spatial_axes - 1; d_i >= 0; --d_i) { - if (d_i < num_spatial_axes - 1) { - offset /= kernel_shape[d_i + 1]; - } - d_offset[d_i] = offset % kernel_shape[d_i]; - } - for (bool incremented = true; incremented; ) { - // Loop over spatial axes in forward order to compute the indices in the - // image and column, and whether the index lies in the padding. - int index_col = c; - int index_im = c / kernel_size; - bool is_padding = false; - for (int d_i = 0; d_i < num_spatial_axes; ++d_i) { - const int d = d_iter[d_i]; - const int d_pad = d * stride[d_i] - pad[d_i] + d_offset[d_i]; - is_padding |= d_pad < 0 || d_pad >= im_shape[d_i + 1]; - index_col *= col_shape[d_i + 1]; - index_col += d; - index_im *= im_shape[d_i + 1]; - index_im += d_pad; - } - if (im2col) { - if (is_padding) { - data_output[index_col] = 0; - } else { - data_output[index_col] = data_input[index_im]; - } - } else if (!is_padding) { // col2im - data_output[index_im] += data_input[index_col]; - } - // Loop over spatial axes in reverse order to choose an index, - // like counting. - incremented = false; - for (int d_i = num_spatial_axes - 1; d_i >= 0; --d_i) { - const int d_max = col_shape[d_i + 1]; - DCHECK_LT(d_iter[d_i], d_max); - if (d_iter[d_i] == d_max - 1) { - d_iter[d_i] = 0; - } else { // d_iter[d_i] < d_max - 1 - ++d_iter[d_i]; - incremented = true; - break; - } - } - } // while(incremented) { - } // for (int c = 0; c < channels_col; ++c) { -} - -template -void im2col_nd_cpu(const Dtype* data_im, const int num_spatial_axes, - const int* im_shape, const int* col_shape, - const int* kernel_shape, const int* pad, const int* stride, - Dtype* data_col) { - const bool kIm2Col = true; - im2col_nd_core_cpu(data_im, kIm2Col, num_spatial_axes, im_shape, col_shape, - kernel_shape, pad, stride, data_col); -} - -// Explicit instantiation -template void im2col_nd_cpu(const float* data_im, - const int num_spatial_axes, - const int* im_shape, const int* col_shape, - const int* kernel_shape, const int* pad, const int* stride, - float* data_col); -template void im2col_nd_cpu(const double* data_im, - const int num_spatial_axes, - const int* im_shape, const int* col_shape, - const int* kernel_shape, const int* pad, const int* stride, - double* data_col); - template void col2im_cpu(const Dtype* data_col, const int channels, const int height, const int width, const int patch_h, const int patch_w, @@ -173,27 +80,4 @@ template void col2im_cpu(const double* data_col, const int channels, const int pad_h, const int pad_w, const int stride_h, const int stride_w, double* data_im); -template -void col2im_nd_cpu(const Dtype* data_col, const int num_spatial_axes, - const int* im_shape, const int* col_shape, - const int* kernel_shape, const int* pad, const int* stride, - Dtype* data_im) { - const bool kIm2Col = false; - im2col_nd_core_cpu(data_col, kIm2Col, num_spatial_axes, im_shape, col_shape, - kernel_shape, pad, stride, data_im); -} - -// Explicit instantiation -template void col2im_nd_cpu(const float* data_col, - const int num_spatial_axes, - const int* im_shape, const int* col_shape, - const int* kernel_shape, const int* pad, const int* stride, - float* data_im); -template void col2im_nd_cpu(const double* data_col, - const int num_spatial_axes, - const int* im_shape, const int* col_shape, - const int* kernel_shape, const int* pad, const int* stride, - double* data_im); - - } // namespace caffe diff --git a/src/caffe/util/im2col.cu b/src/caffe/util/im2col.cu index 5a478ba62d2..c90f93eb67b 100644 --- a/src/caffe/util/im2col.cu +++ b/src/caffe/util/im2col.cu @@ -59,6 +59,7 @@ void im2col_gpu(const Dtype* data_im, const int channels, CUDA_POST_KERNEL_CHECK; } + // Explicit instantiation template void im2col_gpu(const float* data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, @@ -69,156 +70,6 @@ template void im2col_gpu(const double* data_im, const int channels, const int pad_h, const int pad_w, const int stride_h, const int stride_w, double* data_col); -template -__global__ void im2col_nd_gpu_kernel(const int n, const Dtype* data_im, - const int* im_shape, const int* col_shape, - const int* kernel_shape, const int* pad, const int* stride, - Dtype* data_col) { - int d_temp[num_axes]; // NOLINT(runtime/arrays) - int d_iter[num_axes]; // NOLINT(runtime/arrays) - int i; - CUDA_KERNEL_LOOP(index, n) { - // Initialize channel_in, computed in the loop below, with intermediate - // computations used to compute the spatial indices. - int channel_in = index; - int channel_out = 1; - for (i = num_axes - 1; i >= 0; --i) { - d_temp[i] = channel_in % col_shape[i + 1]; - channel_in /= col_shape[i + 1]; - channel_out *= kernel_shape[i]; - } - channel_out *= channel_in; - int data_col_inc = 1; - for (i = 0; i < num_axes; ++i) { - channel_out *= col_shape[i + 1]; - channel_out += d_temp[i]; - d_temp[i] = d_temp[i] * stride[i] - pad[i]; - channel_in *= im_shape[i + 1]; - channel_in += d_temp[i]; - data_col_inc *= col_shape[i + 1]; - d_iter[i] = 0; - } - Dtype* data_col_ptr = data_col + channel_out; - const Dtype* data_im_ptr = data_im + channel_in; - bool incremented; - do { - bool in_range = true; - for (i = 0; i < num_axes; ++i) { - const int d_iter_im = d_iter[i] + d_temp[i]; - in_range &= d_iter_im >= 0 && d_iter_im < im_shape[i + 1]; - if (!in_range) { break; } - } - if (in_range) { - int data_im_offset = d_iter[0]; - for (i = 1; i < num_axes; ++i) { - data_im_offset *= im_shape[i + 1]; - data_im_offset += d_iter[i]; - } - *data_col_ptr = data_im_ptr[data_im_offset]; - } else { - *data_col_ptr = 0; - } - data_col_ptr += data_col_inc; - incremented = false; - for (i = num_axes - 1; i >= 0; --i) { - const int d_max = kernel_shape[i]; - if (d_iter[i] == d_max - 1) { - d_iter[i] = 0; - } else { // d_iter[i] < d_max - 1 - ++d_iter[i]; - incremented = true; - break; - } - } // for (int i = num_axes - 1; i >= 0; --i) - } while (incremented); // do - } // CUDA_KERNEL_LOOP(index, n) -} - -template -void im2col_nd_gpu(const Dtype* data_im, const int num_spatial_axes, - const int num_kernels, const int* im_shape, const int* col_shape, - const int* kernel_shape, const int* pad, const int* stride, - Dtype* data_col) { - switch (num_spatial_axes) { - case 1: - im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) - <<>>( - num_kernels, data_im, im_shape, col_shape, - kernel_shape, pad, stride, data_col); - break; - case 2: - im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) - <<>>( - num_kernels, data_im, im_shape, col_shape, - kernel_shape, pad, stride, data_col); - break; - case 3: - im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) - <<>>( - num_kernels, data_im, im_shape, col_shape, - kernel_shape, pad, stride, data_col); - break; - case 4: - im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) - <<>>( - num_kernels, data_im, im_shape, col_shape, - kernel_shape, pad, stride, data_col); - break; - case 5: - im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) - <<>>( - num_kernels, data_im, im_shape, col_shape, - kernel_shape, pad, stride, data_col); - break; - case 6: - im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) - <<>>( - num_kernels, data_im, im_shape, col_shape, - kernel_shape, pad, stride, data_col); - break; - case 7: - im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) - <<>>( - num_kernels, data_im, im_shape, col_shape, - kernel_shape, pad, stride, data_col); - break; - case 8: - im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) - <<>>( - num_kernels, data_im, im_shape, col_shape, - kernel_shape, pad, stride, data_col); - break; - case 9: - im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) - <<>>( - num_kernels, data_im, im_shape, col_shape, - kernel_shape, pad, stride, data_col); - break; - case 10: - im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) - <<>>( - num_kernels, data_im, im_shape, col_shape, - kernel_shape, pad, stride, data_col); - break; - default: - LOG(FATAL) << "im2col_nd_gpu does not support computation with " - << num_spatial_axes << " spatial axes"; - } - CUDA_POST_KERNEL_CHECK; -} - -// Explicit instantiation -template void im2col_nd_gpu(const float* data_im, - const int num_spatial_axes, const int col_size, - const int* im_shape, const int* col_shape, - const int* kernel_shape, const int* pad, const int* stride, - float* data_col); -template void im2col_nd_gpu(const double* data_im, - const int num_spatial_axes, const int col_size, - const int* im_shape, const int* col_shape, - const int* kernel_shape, const int* pad, const int* stride, - double* data_col); - template __global__ void col2im_gpu_kernel(const int n, const Dtype* data_col, const int height, const int width, const int channels, @@ -290,159 +141,4 @@ template void col2im_gpu(const double* data_col, const int channels, const int pad_h, const int pad_w, const int stride_h, const int stride_w, double* data_im); -template -__global__ void col2im_nd_gpu_kernel(const int n, const Dtype* data_col, - const int* im_shape, const int* col_shape, - const int* kernel_shape, const int* pad, const int* stride, - Dtype* data_im) { - int d_im[num_axes]; // NOLINT(runtime/arrays) - int d_col_iter[num_axes]; // NOLINT(runtime/arrays) - int d_col_start[num_axes]; // NOLINT(runtime/arrays) - int d_col_end[num_axes]; // NOLINT(runtime/arrays) - CUDA_KERNEL_LOOP(index, n) { - // Initialize channel_in, computed in the loop below, with intermediate - // computations used to compute the spatial indices. - int channel_im = index; - // Calculate d_im (image dimensions). - for (int i = num_axes - 1; i >= 0; --i) { - d_im[i] = channel_im % im_shape[i + 1] + pad[i]; - channel_im /= im_shape[i + 1]; - } - // Calculate col start/end indices. - bool done = false; - for (int i = 0; i < num_axes; ++i) { - d_col_start[i] = d_col_iter[i] = - (d_im[i] < kernel_shape[i]) ? - 0 : (d_im[i] - kernel_shape[i]) / stride[i] + 1; - d_col_end[i] = min(d_im[i] / stride[i] + 1, col_shape[i + 1]); - if (d_col_start[i] >= d_col_end[i]) { - // Skip computation if the dimension is 0 at any spatial axis -- - // final val will be 0. - data_im[index] = 0; - done = true; - break; // for (int i = 0; i < num_axes; ++i) - } - } - if (done) { - continue; // CUDA_KERNEL_LOOP(index, n) - } - // Loop over the col to compute the output val. - Dtype val = 0; - bool incremented = true; - do { - // Compute the final offset. - int final_offset = 0; - int kernel_shape_prod = 1; - for (int i = num_axes - 1; i >= 0; --i) { - final_offset += - (d_im[i] - d_col_iter[i] * stride[i]) * kernel_shape_prod; - kernel_shape_prod *= kernel_shape[i]; - } - final_offset += kernel_shape_prod * channel_im; - for (int i = 0; i < num_axes; ++i) { - final_offset *= col_shape[i + 1]; - final_offset += d_col_iter[i]; - } - val += data_col[final_offset]; - incremented = false; - for (int i = num_axes - 1; i >= 0; --i) { - const int d_max = d_col_end[i]; - if (d_col_iter[i] == d_max - 1) { - d_col_iter[i] = d_col_start[i]; - } else { // d_col_iter[i] < d_max - 1 - ++d_col_iter[i]; - incremented = true; - break; // for (int i = num_axes - 1; i >= 0; --i) - } - } // for (int i = num_axes - 1; i >= 0; --i) - } while (incremented); - data_im[index] = val; - } // CUDA_KERNEL_LOOP(index, n) -} - -template -void col2im_nd_gpu(const Dtype* data_col, const int num_spatial_axes, - const int im_size, const int* im_shape, const int* col_shape, - const int* kernel_shape, const int* pad, const int* stride, - Dtype* data_im) { - switch (num_spatial_axes) { - case 1: - col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) - <<>>( - im_size, data_col, im_shape, col_shape, - kernel_shape, pad, stride, data_im); - break; - case 2: - col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) - <<>>( - im_size, data_col, im_shape, col_shape, - kernel_shape, pad, stride, data_im); - break; - case 3: - col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) - <<>>( - im_size, data_col, im_shape, col_shape, - kernel_shape, pad, stride, data_im); - break; - case 4: - col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) - <<>>( - im_size, data_col, im_shape, col_shape, - kernel_shape, pad, stride, data_im); - break; - case 5: - col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) - <<>>( - im_size, data_col, im_shape, col_shape, - kernel_shape, pad, stride, data_im); - break; - case 6: - col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) - <<>>( - im_size, data_col, im_shape, col_shape, - kernel_shape, pad, stride, data_im); - break; - case 7: - col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) - <<>>( - im_size, data_col, im_shape, col_shape, - kernel_shape, pad, stride, data_im); - break; - case 8: - col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) - <<>>( - im_size, data_col, im_shape, col_shape, - kernel_shape, pad, stride, data_im); - break; - case 9: - col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) - <<>>( - im_size, data_col, im_shape, col_shape, - kernel_shape, pad, stride, data_im); - break; - case 10: - col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) - <<>>( - im_size, data_col, im_shape, col_shape, - kernel_shape, pad, stride, data_im); - break; - default: - LOG(FATAL) << "col2im_nd_gpu does not support computation with " - << num_spatial_axes << " spatial axes"; - } - CUDA_POST_KERNEL_CHECK; -} - -// Explicit instantiation -template void col2im_nd_gpu(const float* data_col, - const int num_spatial_axes, const int im_size, - const int* im_shape, const int* col_shape, - const int* kernel_shape, const int* pad, const int* stride, - float* data_im); -template void col2im_nd_gpu(const double* data_col, - const int num_spatial_axes, const int im_size, - const int* im_shape, const int* col_shape, - const int* kernel_shape, const int* pad, const int* stride, - double* data_im); - } // namespace caffe diff --git a/src/caffe/util/io.cpp b/src/caffe/util/io.cpp index f2b1dd98423..6f03314202c 100644 --- a/src/caffe/util/io.cpp +++ b/src/caffe/util/io.cpp @@ -3,11 +3,9 @@ #include #include #include -#ifdef USE_OPENCV #include #include #include -#endif // USE_OPENCV #include #include @@ -69,7 +67,6 @@ void WriteProtoToBinaryFile(const Message& proto, const char* filename) { CHECK(proto.SerializeToOstream(&output)); } -#ifdef USE_OPENCV cv::Mat ReadImageToCVMat(const string& filename, const int height, const int width, const bool is_color) { cv::Mat cv_img; @@ -101,7 +98,6 @@ cv::Mat ReadImageToCVMat(const string& filename, cv::Mat ReadImageToCVMat(const string& filename) { return ReadImageToCVMat(filename, 0, 0, true); } - // Do the file extension and encoding match? static bool matchExt(const std::string & fn, std::string en) { @@ -115,7 +111,6 @@ static bool matchExt(const std::string & fn, return true; return false; } - bool ReadImageToDatum(const string& filename, const int label, const int height, const int width, const bool is_color, const std::string & encoding, Datum* datum) { @@ -140,7 +135,6 @@ bool ReadImageToDatum(const string& filename, const int label, return false; } } -#endif // USE_OPENCV bool ReadFileToDatum(const string& filename, const int label, Datum* datum) { @@ -162,7 +156,6 @@ bool ReadFileToDatum(const string& filename, const int label, } } -#ifdef USE_OPENCV cv::Mat DecodeDatumToCVMatNative(const Datum& datum) { cv::Mat cv_img; CHECK(datum.encoded()) << "Datum not encoded"; @@ -234,5 +227,6 @@ void CVMatToDatum(const cv::Mat& cv_img, Datum* datum) { } datum->set_data(buffer); } -#endif // USE_OPENCV + + } // namespace caffe diff --git a/src/caffe/util/upgrade_proto.cpp b/src/caffe/util/upgrade_proto.cpp index ac379e50f4f..4703eb4c1b4 100644 --- a/src/caffe/util/upgrade_proto.cpp +++ b/src/caffe/util/upgrade_proto.cpp @@ -193,7 +193,7 @@ bool UpgradeV0LayerParameter(const V1LayerParameter& v0_layer_connection, } if (v0_layer_param.has_pad()) { if (type == "conv") { - layer_param->mutable_convolution_param()->add_pad(v0_layer_param.pad()); + layer_param->mutable_convolution_param()->set_pad(v0_layer_param.pad()); } else if (type == "pool") { layer_param->mutable_pooling_param()->set_pad(v0_layer_param.pad()); } else { @@ -203,7 +203,7 @@ bool UpgradeV0LayerParameter(const V1LayerParameter& v0_layer_connection, } if (v0_layer_param.has_kernelsize()) { if (type == "conv") { - layer_param->mutable_convolution_param()->add_kernel_size( + layer_param->mutable_convolution_param()->set_kernel_size( v0_layer_param.kernelsize()); } else if (type == "pool") { layer_param->mutable_pooling_param()->set_kernel_size( @@ -224,7 +224,7 @@ bool UpgradeV0LayerParameter(const V1LayerParameter& v0_layer_connection, } if (v0_layer_param.has_stride()) { if (type == "conv") { - layer_param->mutable_convolution_param()->add_stride( + layer_param->mutable_convolution_param()->set_stride( v0_layer_param.stride()); } else if (type == "pool") { layer_param->mutable_pooling_param()->set_stride( @@ -816,6 +816,10 @@ bool UpgradeV1LayerParameter(const V1LayerParameter& v1_layer_param, layer_param->mutable_threshold_param()->CopyFrom( v1_layer_param.threshold_param()); } + if (v1_layer_param.has_triplet_loss_param()) { + layer_param->mutable_triplet_loss_param()->CopyFrom( + v1_layer_param.triplet_loss_param()); + } if (v1_layer_param.has_window_data_param()) { layer_param->mutable_window_data_param()->CopyFrom( v1_layer_param.window_data_param()); @@ -913,6 +917,8 @@ const char* UpgradeV1LayerType(const V1LayerParameter_LayerType type) { return "Slice"; case V1LayerParameter_LayerType_TANH: return "TanH"; + case V1LayerParameter_LayerType_TRIPLET_LOSS: + return "TripletLoss"; case V1LayerParameter_LayerType_WINDOW_DATA: return "WindowData"; case V1LayerParameter_LayerType_THRESHOLD: diff --git a/tools/caffe.cpp b/tools/caffe.cpp index e3f684b5ab3..ff63860a3c1 100644 --- a/tools/caffe.cpp +++ b/tools/caffe.cpp @@ -174,7 +174,6 @@ int train() { vector gpus; get_gpus(&gpus); if (gpus.size() == 0) { - LOG(INFO) << "Use CPU."; Caffe::set_mode(Caffe::CPU); } else { ostringstream s; diff --git a/tools/compute_image_mean.cpp b/tools/compute_image_mean.cpp index 2035d515195..b1fc7cae38f 100644 --- a/tools/compute_image_mean.cpp +++ b/tools/compute_image_mean.cpp @@ -24,7 +24,6 @@ DEFINE_string(backend, "lmdb", int main(int argc, char** argv) { ::google::InitGoogleLogging(argv[0]); -#ifdef USE_OPENCV #ifndef GFLAGS_GFLAGS_H_ namespace gflags = google; #endif @@ -116,8 +115,5 @@ int main(int argc, char** argv) { } LOG(INFO) << "mean_value channel [" << c << "]:" << mean_values[c] / dim; } -#else - LOG(FATAL) << "This tool requires OpenCV; compile with USE_OPENCV."; -#endif // USE_OPENCV return 0; } diff --git a/tools/convert_imageset.cpp b/tools/convert_imageset.cpp index e51a2631077..aad1f1fe216 100644 --- a/tools/convert_imageset.cpp +++ b/tools/convert_imageset.cpp @@ -43,7 +43,6 @@ DEFINE_string(encode_type, "", "Optional: What type should we encode the image as ('png','jpg',...)."); int main(int argc, char** argv) { -#ifdef USE_OPENCV ::google::InitGoogleLogging(argv[0]); // Print output to stderr (while still logging) FLAGS_alsologtostderr = 1; @@ -151,8 +150,5 @@ int main(int argc, char** argv) { txn->Commit(); LOG(INFO) << "Processed " << count << " files."; } -#else - LOG(FATAL) << "This tool requires OpenCV; compile with USE_OPENCV."; -#endif // USE_OPENCV return 0; } diff --git a/tools/extract_features.cpp b/tools/extract_features.cpp index 084c9bf88df..365dd495bbf 100644 --- a/tools/extract_features.cpp +++ b/tools/extract_features.cpp @@ -42,7 +42,7 @@ int feature_extraction_pipeline(int argc, char** argv) { " save_feature_dataset_name1[,name2,...] num_mini_batches db_type" " [CPU/GPU] [DEVICE_ID=0]\n" "Note: you can extract multiple features in one pass by specifying" - " multiple feature blob names and dataset names separated by ','." + " multiple feature blob names and dataset names seperated by ','." " The names cannot contain white space characters and the number of blobs" " and datasets must be equal."; return 1;