From 7b01b3d4fad472e13d4ab3284ab95763f37eaf10 Mon Sep 17 00:00:00 2001 From: Yuncheng Li Date: Tue, 15 Sep 2015 09:11:01 -0400 Subject: [PATCH 1/5] merged pull#1271; and passed the tests --- examples/mnist/lenet_local_solver.prototxt | 25 + .../mnist/lenet_local_train_test.prototxt | 171 +++++++ examples/mnist/train_lenet.sh | 2 +- examples/mnist/train_lenet_local.sh | 3 + .../mnist_siamese_local_solver.prototxt | 25 + .../mnist_siamese_local_train_test.prototxt | 435 ++++++++++++++++++ examples/siamese/train_mnist_siamese_local.sh | 5 + include/caffe/filler.hpp | 25 + include/caffe/vision_layers.hpp | 50 ++ src/caffe/layers/local_layer.cpp | 212 +++++++++ src/caffe/layers/local_layer.cu | 185 ++++++++ src/caffe/proto/caffe.proto | 14 +- src/caffe/test/test_local_layer.cpp | 121 +++++ 13 files changed, 1271 insertions(+), 2 deletions(-) create mode 100644 examples/mnist/lenet_local_solver.prototxt create mode 100644 examples/mnist/lenet_local_train_test.prototxt create mode 100644 examples/mnist/train_lenet_local.sh create mode 100644 examples/siamese/mnist_siamese_local_solver.prototxt create mode 100644 examples/siamese/mnist_siamese_local_train_test.prototxt create mode 100644 examples/siamese/train_mnist_siamese_local.sh create mode 100644 src/caffe/layers/local_layer.cpp create mode 100644 src/caffe/layers/local_layer.cu create mode 100644 src/caffe/test/test_local_layer.cpp diff --git a/examples/mnist/lenet_local_solver.prototxt b/examples/mnist/lenet_local_solver.prototxt new file mode 100644 index 00000000000..a4b02d25e7e --- /dev/null +++ b/examples/mnist/lenet_local_solver.prototxt @@ -0,0 +1,25 @@ +# The train/test net protocol buffer definition +net: "examples/mnist/lenet_local_train_test.prototxt" +# test_iter specifies how many forward passes the test should carry out. +# In the case of MNIST, we have test batch size 100 and 100 test iterations, +# covering the full 10,000 testing images. +test_iter: 100 +# Carry out testing every 500 training iterations. +test_interval: 500 +# The base learning rate, momentum and the weight decay of the network. +base_lr: 0.01 +momentum: 0.9 +weight_decay: 0.0005 +# The learning rate policy +lr_policy: "inv" +gamma: 0.0001 +power: 0.75 +# Display every 100 iterations +display: 100 +# The maximum number of iterations +max_iter: 10000 +# snapshot intermediate results +snapshot: 5000 +snapshot_prefix: "examples/mnist/lenet" +# solver mode: CPU or GPU +solver_mode: GPU diff --git a/examples/mnist/lenet_local_train_test.prototxt b/examples/mnist/lenet_local_train_test.prototxt new file mode 100644 index 00000000000..ff88b5f4919 --- /dev/null +++ b/examples/mnist/lenet_local_train_test.prototxt @@ -0,0 +1,171 @@ +name: "LeNet" +layers { + name: "mnist" + type: DATA + top: "data" + top: "label" + data_param { + source: "examples/mnist/mnist_train_lmdb" + backend: LMDB + batch_size: 64 + } + transform_param { + scale: 0.00390625 + } + include: { phase: TRAIN } +} +layers { + name: "mnist" + type: DATA + top: "data" + top: "label" + data_param { + source: "examples/mnist/mnist_test_lmdb" + backend: LMDB + batch_size: 100 + } + transform_param { + scale: 0.00390625 + } + include: { phase: TEST } +} + +layers { + name: "conv1" + type: CONVOLUTION + bottom: "data" + top: "conv1" + blobs_lr: 1 + blobs_lr: 2 + convolution_param { + num_output: 20 + kernel_size: 5 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layers { + name: "pool1" + type: POOLING + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layers { + name: "local1" + type: LOCAL + bottom: "pool1" + top: "local1" + blobs_lr: 1 + blobs_lr: 1 + local_param { + num_output: 5 + kernel_size: 5 + stride: 1 + pad: 0 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0.1 + } + } +} +layers { + name: "relu1" + type: RELU + bottom: "local1" + top: "local1" +} +layers { + name: "local2" + type: LOCAL + bottom: "local1" + top: "local2" + blobs_lr: 1 + blobs_lr: 1 + local_param { + num_output: 10 + kernel_size: 5 + stride: 1 + pad: 0 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0.1 + } + } +} +layers { + name: "relu2" + type: RELU + bottom: "local2" + top: "local2" +} +layers { + name: "ip1" + type: INNER_PRODUCT + bottom: "local2" + top: "ip1" + blobs_lr: 1 + blobs_lr: 2 + inner_product_param { + num_output: 500 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layers { + name: "relu2" + type: RELU + bottom: "ip1" + top: "ip1" +} +layers { + name: "ip2" + type: INNER_PRODUCT + bottom: "ip1" + top: "ip2" + blobs_lr: 1 + blobs_lr: 2 + inner_product_param { + num_output: 10 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layers { + name: "accuracy" + type: ACCURACY + bottom: "ip2" + bottom: "label" + top: "accuracy" + include: { phase: TEST } +} +layers { + name: "loss" + type: SOFTMAX_LOSS + bottom: "ip2" + bottom: "label" + top: "loss" +} diff --git a/examples/mnist/train_lenet.sh b/examples/mnist/train_lenet.sh index 1b6bf7d978d..1f718825ded 100755 --- a/examples/mnist/train_lenet.sh +++ b/examples/mnist/train_lenet.sh @@ -1,3 +1,3 @@ #!/usr/bin/env sh -./build/tools/caffe train --solver=examples/mnist/lenet_solver.prototxt +GLOG_logtostderr=0 GLOG_log_dir=examples/mnist/ ./build/tools/caffe train --solver=examples/mnist/lenet_solver.prototxt diff --git a/examples/mnist/train_lenet_local.sh b/examples/mnist/train_lenet_local.sh new file mode 100644 index 00000000000..b9e29e5ceb8 --- /dev/null +++ b/examples/mnist/train_lenet_local.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env sh + +GLOG_logtostderr=0 GLOG_log_dir=examples/mnist/ ./build/tools/caffe train --solver=examples/mnist/lenet_local_solver.prototxt --gpu=1 diff --git a/examples/siamese/mnist_siamese_local_solver.prototxt b/examples/siamese/mnist_siamese_local_solver.prototxt new file mode 100644 index 00000000000..c85bb90fc87 --- /dev/null +++ b/examples/siamese/mnist_siamese_local_solver.prototxt @@ -0,0 +1,25 @@ +# The train/test net protocol buffer definition +net: "examples/siamese/mnist_siamese_local_train_test.prototxt" +# test_iter specifies how many forward passes the test should carry out. +# In the case of MNIST, we have test batch size 100 and 100 test iterations, +# covering the full 10,000 testing images. +test_iter: 100 +# Carry out testing every 500 training iterations. +test_interval: 500 +# The base learning rate, momentum and the weight decay of the network. +base_lr: 0.01 +momentum: 0.9 +weight_decay: 0.0000 +# The learning rate policy +lr_policy: "inv" +gamma: 0.0001 +power: 0.75 +# Display every 100 iterations +display: 100 +# The maximum number of iterations +max_iter: 50000 +# snapshot intermediate results +snapshot: 5000 +snapshot_prefix: "examples/siamese/mnist_siamese" +# solver mode: CPU or GPU +solver_mode: GPU diff --git a/examples/siamese/mnist_siamese_local_train_test.prototxt b/examples/siamese/mnist_siamese_local_train_test.prototxt new file mode 100644 index 00000000000..4212217df15 --- /dev/null +++ b/examples/siamese/mnist_siamese_local_train_test.prototxt @@ -0,0 +1,435 @@ +name: "mnist_siamese_train_test" +layers { + name: "pair_data" + type: DATA + top: "pair_data" + top: "sim" + data_param { + source: "examples/siamese/mnist_siamese_train_leveldb" + scale: 0.00390625 + batch_size: 64 + } + include: { phase: TRAIN } +} +layers { + name: "pair_data" + type: DATA + top: "pair_data" + top: "sim" + data_param { + source: "examples/siamese/mnist_siamese_test_leveldb" + scale: 0.00390625 + batch_size: 100 + } + include: { phase: TEST } +} +layers { + name: "slice_pair" + type: SLICE + bottom: "pair_data" + top: "data" + top: "data_p" + slice_param { + slice_dim: 1 + slice_point: 1 + } +} + + + + +layers { + name: "conv1" + type: CONVOLUTION + bottom: "data" + top: "conv1" + blobs_lr: 1 + blobs_lr: 2 + convolution_param { + num_output: 20 + kernel_size: 5 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + param: "conv1_w" + param: "conv1_b" +} +layers { + name: "pool1" + type: POOLING + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} + + +layers { + name: "local1" + type: LOCAL + bottom: "pool1" + top: "local1" + blobs_lr: 1 + blobs_lr: 2 + local_param { + num_output: 5 + kernel_size: 5 + stride: 1 + pad: 0 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0.1 + } + } + param: "local1_w" + param: "local1_b" +} +layers { + name: "relu1" + type: RELU + bottom: "local1" + top: "local1" +} +layers { + name: "local2" + type: LOCAL + bottom: "local1" + top: "local2" + blobs_lr: 1 + blobs_lr: 2 + local_param { + num_output: 10 + kernel_size: 5 + stride: 1 + pad: 0 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0.1 + } + } + param: "local2_w" + param: "local2_b" +} +layers { + name: "relu2" + type: RELU + bottom: "local2" + top: "local2" +} + + + + + + + +layers { + name: "ip1" + type: INNER_PRODUCT + bottom: "local2" + top: "ip1" + blobs_lr: 1 + blobs_lr: 2 + inner_product_param { + num_output: 500 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + param: "ip1_w" + param: "ip1_b" +} +layers { + name: "relu1" + type: RELU + bottom: "ip1" + top: "ip1" +} +layers { + name: "ip2" + type: INNER_PRODUCT + bottom: "ip1" + top: "ip2" + blobs_lr: 1 + blobs_lr: 2 + inner_product_param { + num_output: 10 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + param: "ip2_w" + param: "ip2_b" +} + +layers { + name: "feat2" + type: INNER_PRODUCT + bottom: "ip2" + top: "feat2" + blobs_lr: 1 + blobs_lr: 2 + inner_product_param { + num_output: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + param: "feat2_w" + param: "feat2_b" +} + +layers { + name: "feat1" + type: INNER_PRODUCT + bottom: "local1" + top: "feat1" + blobs_lr: 1 + blobs_lr: 2 + inner_product_param { + num_output: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + param: "feat1_w" + param: "feat1_b" +} + + + +layers { + name: "conv1_p" + type: CONVOLUTION + bottom: "data_p" + top: "conv1_p" + blobs_lr: 1 + blobs_lr: 2 + convolution_param { + num_output: 20 + kernel_size: 5 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + param: "conv1_w" + param: "conv1_b" +} +layers { + name: "pool1_p" + type: POOLING + bottom: "conv1_p" + top: "pool1_p" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} + + +layers { + name: "local1_p" + type: LOCAL + bottom: "pool1_p" + top: "local1_p" + blobs_lr: 1 + blobs_lr: 2 + local_param { + num_output: 5 + kernel_size: 5 + stride: 1 + pad: 0 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0.1 + } + } + param: "local1_w" + param: "local1_b" +} +layers { + name: "relu1_p" + type: RELU + bottom: "local1_p" + top: "local1_p" +} +layers { + name: "local2_p" + type: LOCAL + bottom: "local1_p" + top: "local2_p" + blobs_lr: 1 + blobs_lr: 2 + local_param { + num_output: 10 + kernel_size: 5 + stride: 1 + pad: 0 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0.1 + } + } + param: "local2_w" + param: "local2_b" +} +layers { + name: "relu2_p" + type: RELU + bottom: "local2_p" + top: "local2_p" +} + + + + + +layers { + name: "ip1_p" + type: INNER_PRODUCT + bottom: "local2_p" + top: "ip1_p" + blobs_lr: 1 + blobs_lr: 2 + inner_product_param { + num_output: 500 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + param: "ip1_w" + param: "ip1_b" +} +layers { + name: "relu1_p" + type: RELU + bottom: "ip1_p" + top: "ip1_p" +} +layers { + name: "ip2_p" + type: INNER_PRODUCT + bottom: "ip1_p" + top: "ip2_p" + blobs_lr: 1 + blobs_lr: 2 + inner_product_param { + num_output: 10 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + param: "ip2_w" + param: "ip2_b" +} + +layers { + name: "feat2_p" + type: INNER_PRODUCT + bottom: "ip2_p" + top: "feat2_p" + blobs_lr: 1 + blobs_lr: 2 + inner_product_param { + num_output: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + param: "feat2_w" + param: "feat2_b" +} + + +layers { + name: "feat1_p" + type: INNER_PRODUCT + bottom: "local1_p" + top: "feat1_p" + blobs_lr: 1 + blobs_lr: 2 + inner_product_param { + num_output: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + param: "feat1_w" + param: "feat1_b" +} + + +layers { + name: "loss2" + type: CONTRASTIVE_LOSS + contrastive_loss_param { + margin: 1.0 + } + bottom: "feat2" + bottom: "feat2_p" + bottom: "sim" + top: "loss2" +} + + +layers { + name: "loss1" + type: CONTRASTIVE_LOSS + contrastive_loss_param { + margin: 1.0 + } + bottom: "feat1" + bottom: "feat1_p" + bottom: "sim" + top: "loss1" +} + diff --git a/examples/siamese/train_mnist_siamese_local.sh b/examples/siamese/train_mnist_siamese_local.sh new file mode 100644 index 00000000000..83035bf2ccc --- /dev/null +++ b/examples/siamese/train_mnist_siamese_local.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env sh + +TOOLS=./build/tools + +GLOG_logtostderr=0 GLOG_log_dir=examples/siamese/ $TOOLS/caffe train --solver=examples/siamese/mnist_siamese_local_solver.prototxt diff --git a/include/caffe/filler.hpp b/include/caffe/filler.hpp index 888f4a4ba3b..3f762920f87 100644 --- a/include/caffe/filler.hpp +++ b/include/caffe/filler.hpp @@ -166,6 +166,29 @@ class XavierFiller : public Filler { } }; +template +class TestLocalFiller : public Filler { + public: + explicit TestLocalFiller(const FillerParameter& param) + : Filler(param) {} + virtual void Fill(Blob* blob) { + LOG(INFO) << "Doing mutable cpu"; + LOG(INFO) << "blobs" << blob; + Dtype* data = blob->mutable_cpu_data(); + LOG(INFO) << "Done Doing mutable cpu"; + CHECK_EQ(blob->channels(), 1); + + for (int n=0; nnum(); n++) { + for (int j=0; jheight(); j++) { + for (int i=0; iwidth(); i++) { + *(data+blob->offset(n, 0, j, i)) = i; + } + } + } + } +}; + + /** * @brief Fills a Blob with values @f$ x \sim N(0, \sigma^2) @f$ where * @f$ \sigma^2 @f$ is set inversely proportional to number of incoming @@ -281,6 +304,8 @@ Filler* GetFiller(const FillerParameter& param) { return new UniformFiller(param); } else if (type == "xavier") { return new XavierFiller(param); + } else if (type == "test_local") { + return new TestLocalFiller(param); } else if (type == "msra") { return new MSRAFiller(param); } else if (type == "bilinear") { diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp index 211e3d9042d..a2adf43c9ba 100644 --- a/include/caffe/vision_layers.hpp +++ b/include/caffe/vision_layers.hpp @@ -211,6 +211,56 @@ class DeconvolutionLayer : public BaseConvolutionLayer { virtual void compute_output_shape(); }; + + +template +class LocalLayer : public Layer { + public: + explicit LocalLayer(const LayerParameter& param) + : Layer(param) {} + + virtual inline const char* type() const { return "Local"; } + + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline int MinBottomBlobs() const { return 1; } + virtual inline int MinTopBlobs() const { return 1; } + virtual inline bool EqualNumBottomTopBlobs() const { return true; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + + int kernel_size_; + int stride_; + int num_; + int channels_; + int pad_; + int height_, width_; + int height_out_, width_out_; + int num_output_; + bool bias_term_; + + int M_; + int K_; + int N_; + + Blob col_buffer_; +}; + + + + #ifdef USE_CUDNN /* * @brief cuDNN implementation of ConvolutionLayer. diff --git a/src/caffe/layers/local_layer.cpp b/src/caffe/layers/local_layer.cpp new file mode 100644 index 00000000000..2c45e1765f9 --- /dev/null +++ b/src/caffe/layers/local_layer.cpp @@ -0,0 +1,212 @@ +#include + +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/util/im2col.hpp" +#include "caffe/util/math_functions.hpp" +#include "caffe/vision_layers.hpp" + +namespace caffe { + +template +void LocalLayer::LayerSetUp(const vector*>& bottom, + const vector*>& top) { + CHECK_EQ(bottom.size(), 1) << "Conv Layer takes a single blob as input."; + CHECK_EQ(top.size(), 1) << "Conv Layer takes a single blob as output."; + + kernel_size_ = this->layer_param_.local_param().kernel_size(); + stride_ = this->layer_param_.local_param().stride(); + pad_ = this->layer_param_.local_param().pad(); + num_ = bottom[0]->num(); + channels_ = bottom[0]->channels(); + height_ = bottom[0]->height(); + width_ = bottom[0]->width(); + num_output_ = this->layer_param_.local_param().num_output(); + + height_out_ = (height_ + 2 * pad_ - kernel_size_) / stride_ + 1; + width_out_ = (width_ + 2 * pad_ - kernel_size_) / stride_ + 1; + + M_ = num_output_; + K_ = channels_ * kernel_size_ * kernel_size_; + N_ = height_out_ * width_out_; + + CHECK_GT(num_output_, 0); + CHECK_GE(height_, kernel_size_) << "height smaller than kernel size"; + CHECK_GE(width_, kernel_size_) << "width smaller than kernel size"; + // Set the parameters + bias_term_ = this->layer_param_.local_param().bias_term(); + + // Check if we need to set up the weights + if (this->blobs_.size() > 0) { + LOG(INFO) << "Skipping parameter initialization"; + } else { + if (bias_term_) { + this->blobs_.resize(2); + } else { + this->blobs_.resize(1); + } + // Intialize the weight + this->blobs_[0].reset(new Blob( + num_output_, 1, K_, N_)); + // fill the weights + shared_ptr > weight_filler(GetFiller( + this->layer_param_.local_param().weight_filler())); + weight_filler->Fill(this->blobs_[0].get()); + // If necessary, intiialize and fill the bias term + if (bias_term_) { + this->blobs_[1].reset(new Blob(1, 1, M_, N_)); + shared_ptr > bias_filler(GetFiller( + this->layer_param_.local_param().bias_filler())); + bias_filler->Fill(this->blobs_[1].get()); + } + } +} + +template +void LocalLayer::Reshape(const vector*>& bottom, + const vector*>& top) { + CHECK_EQ(bottom[0]->channels(), channels_) << "Input size incompatible with" + " weights."; + // TODO: generalize to handle inputs of different shapes. + for (int bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) { + CHECK_EQ(num_, bottom[bottom_id]->num()) << "Inputs must have same num."; + CHECK_EQ(channels_, bottom[bottom_id]->channels()) + << "Inputs must have same channels."; + CHECK_EQ(height_, bottom[bottom_id]->height()) + << "Inputs must have same height."; + CHECK_EQ(width_, bottom[bottom_id]->width()) + << "Inputs must have same width."; + } + + // Shape the tops. + for (int top_id = 0; top_id < top.size(); ++top_id) { + top[top_id]->Reshape(num_, num_output_, height_out_, width_out_); + } + + // The im2col result buffer would only hold one image at a time to avoid + // overly large memory usage. + col_buffer_.Reshape( + 1, channels_ * kernel_size_ * kernel_size_, height_out_, width_out_); + + for (int top_id = 0; top_id < top.size(); ++top_id) { + top[top_id]->Reshape(num_, num_output_, height_out_, width_out_); + } +} + +template +void LocalLayer::Forward_cpu(const vector*>& bottom, + const vector*>& top) { + + Dtype* x_data = col_buffer_.mutable_cpu_data(); + const Dtype* weight = this->blobs_[0]->cpu_data(); + const Dtype* bottom_data = bottom[0]->cpu_data(); + Dtype* top_data = top[0]->mutable_cpu_data(); + + Blob E; + E.Reshape(1, 1, 1, K_); + FillerParameter filler_param; + filler_param.set_value(1); + ConstantFiller filler(filler_param); + filler.Fill(&E); + + Blob intermediate; + intermediate.Reshape(1, 1, K_, N_); + for (int n=0; noffset(n), channels_, height_, + width_, kernel_size_, kernel_size_, pad_, pad_, stride_, stride_, x_data); + + for (int m=0; mblobs_[0]->offset(m), + intermediate.mutable_cpu_data()); + + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, 1, N_, K_, + (Dtype)1., E.cpu_data(), + intermediate.cpu_data(), + (Dtype)0., top_data + top[0]->offset(n, m)); + } + + if (bias_term_) { + caffe_add(M_ * N_, this->blobs_[1]->cpu_data(), + top_data + top[0]->offset(n), + top_data + top[0]->offset(n)); + } + } +} + +template +void LocalLayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + + const Dtype* top_diff = top[0]->cpu_diff(); + const Dtype* bottom_data = bottom[0]->cpu_data(); + Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); + Dtype* x_data = col_buffer_.mutable_cpu_data(); + Dtype* x_diff = col_buffer_.mutable_cpu_diff(); + const Dtype* weight = this->blobs_[0]->cpu_data(); + Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff(); + Dtype* bias_diff = NULL; + + Blob intermediate; + intermediate.Reshape(1, 1, 1, N_); + + Blob xt; + xt.Reshape(1, 1, K_, N_); + Dtype* xt_data = xt.mutable_cpu_data(); + + if (bias_term_) { + bias_diff = this->blobs_[1]->mutable_cpu_diff(); + memset(bias_diff, 0, sizeof(Dtype) * this->blobs_[1]->count()); + for (int n = 0; n < num_; ++n) { + caffe_add(M_ * N_, bias_diff, + top_diff + top[0]->offset(n), + bias_diff); + } + } + + memset(weight_diff, 0, sizeof(Dtype) * this->blobs_[0]->count()); + for (int n=0; noffset(n), channels_, height_, + width_, kernel_size_, kernel_size_, pad_, pad_, stride_, stride_, x_data); + + // gradient wrt weight + for (int m=0; mblobs_[0]->offset(m); + for (int k=0; koffset(n, m), + x_data+col_buffer_.offset(0,k), xt_data+xt.offset(0,0,k)); + } + caffe_cpu_axpby(K_*N_, Dtype(1.0), xt_data, Dtype(1.0), filter_weight_diff); + } + + // gradient wrt bottom data + if (propagate_down[0]) { + memset(x_diff, 0, col_buffer_.count() * sizeof(Dtype)); + for (int m=0; moffset(n, m), + weight+this->blobs_[0]->offset(m,0,k), + intermediate.mutable_cpu_data()); + + caffe_cpu_axpby(N_, Dtype(1.0), + intermediate.cpu_data(), Dtype(1.0), + x_diff+col_buffer_.offset(0,k)); + } + } + + // col2im back to the data + col2im_cpu(x_diff, channels_, height_, width_, kernel_size_, kernel_size_, + pad_, pad_, stride_, stride_, bottom_diff + bottom[0]->offset(n)); + + } + } + +} + +#ifdef CPU_ONLY +STUB_GPU(LocalLayer); +#endif + +INSTANTIATE_CLASS(LocalLayer); +REGISTER_LAYER_CLASS(Local); + +} // namespace caffe diff --git a/src/caffe/layers/local_layer.cu b/src/caffe/layers/local_layer.cu new file mode 100644 index 00000000000..f393ac6e4d7 --- /dev/null +++ b/src/caffe/layers/local_layer.cu @@ -0,0 +1,185 @@ +#include + +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/util/im2col.hpp" +#include "caffe/util/math_functions.hpp" +#include "caffe/vision_layers.hpp" + +namespace caffe { + + + +template +__global__ void local_update1_gpu_kernel(const Dtype* data_A, const Dtype* data_B, + Dtype* data_R, const int filter_num, + const int location_num, const int output_num) { + int total = filter_num * location_num * output_num; + CUDA_KERNEL_LOOP(index, total) { + int p = index % location_num; + int n = (index / location_num) % filter_num; + int q = (index / location_num) / filter_num; + data_R[index] += data_A[q*location_num+p] * data_B[n*location_num+p]; + } +} + +template +void local_update1_gpu(const Dtype* data_A, const Dtype* data_B, + Dtype* data_R, const int filter_num, + const int location_num, const int output_num) { + // data_A is output_num x location_num + // data_B is filter_num x location_num + // data_R is output_num x filter_num x location_num, the update performed is Rqnp += Aqp * Bnp + + // NOLINT_NEXT_LINE(whitespace/operators) + local_update1_gpu_kernel<<>>(data_A, data_B, data_R, filter_num, location_num, output_num); + CUDA_POST_KERNEL_CHECK; +} + +// Explicit instantiation +template void local_update1_gpu(const float* data_A, const float* data_B, + float* data_R, const int filter_num, + const int location_num, const int output_num); +template void local_update1_gpu(const double* data_A, const double* data_B, + double* data_R, const int filter_num, + const int location_num, const int output_num); + + +template +__global__ void local_update2_gpu_kernel(const Dtype* data_A, const Dtype* data_B, + Dtype* data_R, const int filter_num, + const int location_num, const int output_num) { + int total = filter_num * location_num; + CUDA_KERNEL_LOOP(index, total) { + int p = index % location_num; + int n = (index / location_num); + for (int q=0; q +void local_update2_gpu(const Dtype* data_A, const Dtype* data_B, + Dtype* data_R, const int filter_num, + const int location_num, const int output_num) { + // data_A is output_num x location_num + // data_B is output_num x filter_num x location_num + // data_R is filter_num x location_num, the update performed is Rnp += \sum_q(Aqp * Bqnp) + + // NOLINT_NEXT_LINE(whitespace/operators) + local_update2_gpu_kernel<<>>(data_A, data_B, data_R, filter_num, location_num, output_num); + CUDA_POST_KERNEL_CHECK; +} + +// Explicit instantiation +template void local_update2_gpu(const float* data_A, const float* data_B, + float* data_R, const int filter_num, + const int location_num, const int output_num); +template void local_update2_gpu(const double* data_A, const double* data_B, + double* data_R, const int filter_num, + const int location_num, const int output_num); + + + + +/// @brief refer to CPU forward -- the BLAS implementation is the same. +template +void LocalLayer::Forward_gpu(const vector*>& bottom, + const vector*>& top) { + + Dtype* x_data = col_buffer_.mutable_gpu_data(); + const Dtype* weight = this->blobs_[0]->gpu_data(); + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* top_data = top[0]->mutable_gpu_data(); + + Blob E; + E.Reshape(1, 1, 1, K_); + FillerParameter filler_param; + filler_param.set_value(1); + ConstantFiller filler(filler_param); + filler.Fill(&E); + + Blob intermediate; + intermediate.Reshape(1, 1, K_, N_); + for (int n=0; noffset(n), channels_, height_, + width_, kernel_size_, kernel_size_, pad_, pad_, stride_, stride_, x_data); + + for (int m=0; mblobs_[0]->offset(m), + intermediate.mutable_gpu_data()); + + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, 1, N_, K_, + (Dtype)1., E.gpu_data(), intermediate.gpu_data(), + (Dtype)0., top_data + top[0]->offset(n, m)); + } + + if (bias_term_) { + caffe_gpu_add(M_ * N_, this->blobs_[1]->gpu_data(), + top_data + top[0]->offset(n), + top_data + top[0]->offset(n)); + } + } + +} + +/// @brief refer to CPU backward -- the BLAS implementation is the same. +template +void LocalLayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + + const Dtype* top_diff = top[0]->gpu_diff(); + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); + Dtype* x_data = col_buffer_.mutable_gpu_data(); + Dtype* x_diff = col_buffer_.mutable_gpu_diff(); + const Dtype* weight = this->blobs_[0]->gpu_data(); + Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff(); + Dtype* bias_diff = NULL; + + Blob intermediate; + intermediate.Reshape(1, 1, 1, N_); + + Blob xt; + xt.Reshape(1, 1, K_, N_); + Dtype* xt_data = xt.mutable_gpu_data(); + if (bias_term_) { + bias_diff = this->blobs_[1]->mutable_gpu_diff(); + CUDA_CHECK(cudaMemset(bias_diff, 0, sizeof(Dtype) * this->blobs_[1]->count())); + for (int n = 0; n < num_; ++n) { + caffe_gpu_add(M_ * N_, bias_diff, + top_diff + top[0]->offset(n), + bias_diff); + } + } + + Blob buf; + buf.Reshape(1, 1, K_, N_); + Dtype* buf_data = buf.mutable_gpu_data(); + CUDA_CHECK(cudaMemset(weight_diff, 0, sizeof(Dtype) * this->blobs_[0]->count())); + for (int n=0; noffset(n), channels_, height_, + width_, kernel_size_, kernel_size_, pad_, pad_, stride_, stride_, x_data); + + local_update1_gpu(top_diff+top[0]->offset(n), x_data, weight_diff, K_, N_, M_); + + if (propagate_down[0]) { + CUDA_CHECK(cudaMemset(x_diff, 0, col_buffer_.count() * sizeof(Dtype))); + local_update2_gpu(top_diff+top[0]->offset(n), weight, x_diff, K_, N_, M_); + + // col2im back to the data + col2im_gpu(x_diff, channels_, height_, width_, kernel_size_, kernel_size_, + pad_, pad_, stride_, stride_, bottom_diff + bottom[0]->offset(n)); + } + } +} + + + + +INSTANTIATE_LAYER_GPU_FUNCS(LocalLayer); + +} // namespace caffe diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index aa299f8660b..ae8b8e6a65a 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -301,7 +301,7 @@ message ParamSpec { // NOTE // Update the next available ID when you add a new LayerParameter field. // -// LayerParameter next available layer-specific ID: 139 (last added: tile_param) +// LayerParameter next available layer-specific ID: 140 (last added: local_param) message LayerParameter { optional string name = 1; // the layer name optional string type = 2; // the layer type @@ -385,6 +385,7 @@ message LayerParameter { optional ThresholdParameter threshold_param = 128; optional TileParameter tile_param = 138; optional WindowDataParameter window_data_param = 129; + optional LocalParameter local_param = 139; } // Message that stores parameters used to apply transformation @@ -1179,3 +1180,14 @@ message PReLUParameter { // Whether or not slope paramters are shared across channels. optional bool channel_shared = 2 [default = false]; } + +// Message that stores parameters used by LocalLayer +message LocalParameter { + optional uint32 num_output = 1; // The number of outputs for the layer + optional bool bias_term = 2 [default = true]; // whether to have bias terms + optional uint32 pad = 3 [default = 0]; // The padding size + optional uint32 kernel_size = 4; // The kernel size + optional uint32 stride = 6 [default = 1]; // The stride + optional FillerParameter weight_filler = 7; // The filler for the weight + optional FillerParameter bias_filler = 8; // The filler for the bias +} diff --git a/src/caffe/test/test_local_layer.cpp b/src/caffe/test/test_local_layer.cpp new file mode 100644 index 00000000000..880c1bb49aa --- /dev/null +++ b/src/caffe/test/test_local_layer.cpp @@ -0,0 +1,121 @@ +#include +#include + +#include "cuda_runtime.h" +#include "gtest/gtest.h" +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +#include "caffe/test/test_caffe_main.hpp" + +namespace caffe { + +extern cudaDeviceProp CAFFE_TEST_CUDA_PROP; + +template +class LocalLayerTest: public MultiDeviceTest { + typedef typename TypeParam::Dtype Dtype; + protected: + LocalLayerTest() + : blob_bottom_(new Blob()), + blob_top_(new Blob()) {} + virtual void SetUp() { + blob_bottom_->Reshape(2, 3, 6, 4); + // fill the values + FillerParameter filler_param; + filler_param.set_value(1.); + GaussianFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + blob_bottom_vec_.push_back(blob_bottom_); + blob_top_vec_.push_back(blob_top_); + } + + virtual ~LocalLayerTest() { delete blob_bottom_; delete blob_top_; } + Blob* const blob_bottom_; + Blob* const blob_top_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +TYPED_TEST_CASE(LocalLayerTest, TestDtypesAndDevices); + +TYPED_TEST(LocalLayerTest, TestSetup) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + LocalParameter* convolution_param = + layer_param.mutable_local_param(); + convolution_param->set_kernel_size(3); + convolution_param->set_stride(2); + convolution_param->set_num_output(4); + shared_ptr > layer( + new LocalLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + EXPECT_EQ(this->blob_top_->num(), 2); + EXPECT_EQ(this->blob_top_->channels(), 4); + EXPECT_EQ(this->blob_top_->height(), 2); + EXPECT_EQ(this->blob_top_->width(), 1); + convolution_param->set_num_output(3); + layer.reset(new LocalLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + EXPECT_EQ(this->blob_top_->num(), 2); + EXPECT_EQ(this->blob_top_->channels(), 3); + EXPECT_EQ(this->blob_top_->height(), 2); + EXPECT_EQ(this->blob_top_->width(), 1); +} + + +TYPED_TEST(LocalLayerTest, TestSimpleConvolution) { + typedef typename TypeParam::Dtype Dtype; + // We will simply see if the convolution layer carries out averaging well. + FillerParameter filler_param; + filler_param.set_value(1.); + ConstantFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + LayerParameter layer_param; + LocalParameter* convolution_param = + layer_param.mutable_local_param(); + convolution_param->set_kernel_size(3); + convolution_param->set_stride(1); + convolution_param->set_num_output(1); + convolution_param->mutable_weight_filler()->set_type("test_local"); + convolution_param->mutable_weight_filler()->set_value(1); + convolution_param->mutable_bias_filler()->set_type("constant"); + convolution_param->mutable_bias_filler()->set_value(0.1); + shared_ptr > layer( + new LocalLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // After the convolution, the output should all have output values 27.1 + const Dtype* top_data = this->blob_top_->cpu_data(); + for (int n=0; nblob_top_->num(); n++) { + for (int k=0; kblob_top_->channels(); k++) { + for (int j=0; jblob_top_->height(); j++) { + for (int i=0; iblob_top_->width(); i++) { + int idx = j*this->blob_top_->width()+i; + EXPECT_NEAR(*(top_data+this->blob_top_->offset(n, k, j, i)), idx*27+0.1, 1e-4); + } + } + } + } +} + +TYPED_TEST(LocalLayerTest, TestGradient) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + LocalParameter* convolution_param = + layer_param.mutable_local_param(); + convolution_param->set_kernel_size(3); + convolution_param->set_stride(2); + convolution_param->set_num_output(2); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + convolution_param->mutable_bias_filler()->set_type("gaussian"); + LocalLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +} // namespace caffe From 00d2cae8d88442aaf9ecd3d0e81885cf14979334 Mon Sep 17 00:00:00 2001 From: Yuncheng Li Date: Tue, 15 Sep 2015 09:29:01 -0400 Subject: [PATCH 2/5] fix cpu only compile --- src/caffe/test/test_local_layer.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/caffe/test/test_local_layer.cpp b/src/caffe/test/test_local_layer.cpp index 880c1bb49aa..49685697829 100644 --- a/src/caffe/test/test_local_layer.cpp +++ b/src/caffe/test/test_local_layer.cpp @@ -1,20 +1,18 @@ #include #include -#include "cuda_runtime.h" #include "gtest/gtest.h" + #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" #include "caffe/vision_layers.hpp" -#include "caffe/test/test_gradient_check_util.hpp" #include "caffe/test/test_caffe_main.hpp" +#include "caffe/test/test_gradient_check_util.hpp" namespace caffe { -extern cudaDeviceProp CAFFE_TEST_CUDA_PROP; - template class LocalLayerTest: public MultiDeviceTest { typedef typename TypeParam::Dtype Dtype; From 53e146609abfe7a56c0a3138878f7c5267321bb9 Mon Sep 17 00:00:00 2001 From: Yuncheng Li Date: Tue, 15 Sep 2015 13:25:27 -0400 Subject: [PATCH 3/5] fix lint errors --- include/caffe/filler.hpp | 6 +- src/caffe/layers/local_layer.cpp | 82 +++++++++--------- src/caffe/layers/local_layer.cu | 127 +++++++++++++++------------- src/caffe/test/test_local_layer.cpp | 22 ++--- 4 files changed, 127 insertions(+), 110 deletions(-) diff --git a/include/caffe/filler.hpp b/include/caffe/filler.hpp index 3f762920f87..ad4b88e32ac 100644 --- a/include/caffe/filler.hpp +++ b/include/caffe/filler.hpp @@ -178,9 +178,9 @@ class TestLocalFiller : public Filler { LOG(INFO) << "Done Doing mutable cpu"; CHECK_EQ(blob->channels(), 1); - for (int n=0; nnum(); n++) { - for (int j=0; jheight(); j++) { - for (int i=0; iwidth(); i++) { + for (int n = 0; n < blob->num(); n++) { + for (int j = 0; j < blob->height(); j++) { + for (int i = 0; i < blob->width(); i++) { *(data+blob->offset(n, 0, j, i)) = i; } } diff --git a/src/caffe/layers/local_layer.cpp b/src/caffe/layers/local_layer.cpp index 2c45e1765f9..1261fc67b9d 100644 --- a/src/caffe/layers/local_layer.cpp +++ b/src/caffe/layers/local_layer.cpp @@ -10,7 +10,7 @@ namespace caffe { template void LocalLayer::LayerSetUp(const vector*>& bottom, - const vector*>& top) { + const vector*>& top) { CHECK_EQ(bottom.size(), 1) << "Conv Layer takes a single blob as input."; CHECK_EQ(top.size(), 1) << "Conv Layer takes a single blob as output."; @@ -47,16 +47,16 @@ void LocalLayer::LayerSetUp(const vector*>& bottom, } // Intialize the weight this->blobs_[0].reset(new Blob( - num_output_, 1, K_, N_)); + num_output_, 1, K_, N_)); // fill the weights shared_ptr > weight_filler(GetFiller( - this->layer_param_.local_param().weight_filler())); + this->layer_param_.local_param().weight_filler())); weight_filler->Fill(this->blobs_[0].get()); // If necessary, intiialize and fill the bias term if (bias_term_) { this->blobs_[1].reset(new Blob(1, 1, M_, N_)); shared_ptr > bias_filler(GetFiller( - this->layer_param_.local_param().bias_filler())); + this->layer_param_.local_param().bias_filler())); bias_filler->Fill(this->blobs_[1].get()); } } @@ -64,18 +64,18 @@ void LocalLayer::LayerSetUp(const vector*>& bottom, template void LocalLayer::Reshape(const vector*>& bottom, - const vector*>& top) { + const vector*>& top) { CHECK_EQ(bottom[0]->channels(), channels_) << "Input size incompatible with" " weights."; // TODO: generalize to handle inputs of different shapes. for (int bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) { CHECK_EQ(num_, bottom[bottom_id]->num()) << "Inputs must have same num."; CHECK_EQ(channels_, bottom[bottom_id]->channels()) - << "Inputs must have same channels."; + << "Inputs must have same channels."; CHECK_EQ(height_, bottom[bottom_id]->height()) - << "Inputs must have same height."; + << "Inputs must have same height."; CHECK_EQ(width_, bottom[bottom_id]->width()) - << "Inputs must have same width."; + << "Inputs must have same width."; } // Shape the tops. @@ -95,7 +95,7 @@ void LocalLayer::Reshape(const vector*>& bottom, template void LocalLayer::Forward_cpu(const vector*>& bottom, - const vector*>& top) { + const vector*>& top) { Dtype* x_data = col_buffer_.mutable_cpu_data(); const Dtype* weight = this->blobs_[0]->cpu_data(); @@ -111,31 +111,32 @@ void LocalLayer::Forward_cpu(const vector*>& bottom, Blob intermediate; intermediate.Reshape(1, 1, K_, N_); - for (int n=0; noffset(n), channels_, height_, - width_, kernel_size_, kernel_size_, pad_, pad_, stride_, stride_, x_data); + width_, kernel_size_, kernel_size_, + pad_, pad_, stride_, stride_, x_data); - for (int m=0; mblobs_[0]->offset(m), - intermediate.mutable_cpu_data()); + intermediate.mutable_cpu_data()); caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, 1, N_, K_, - (Dtype)1., E.cpu_data(), - intermediate.cpu_data(), - (Dtype)0., top_data + top[0]->offset(n, m)); + (Dtype)1., E.cpu_data(), + intermediate.cpu_data(), + (Dtype)0., top_data + top[0]->offset(n, m)); } if (bias_term_) { caffe_add(M_ * N_, this->blobs_[1]->cpu_data(), - top_data + top[0]->offset(n), - top_data + top[0]->offset(n)); + top_data + top[0]->offset(n), + top_data + top[0]->offset(n)); } } } template void LocalLayer::Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom) { + const vector& propagate_down, const vector*>& bottom) { const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* bottom_data = bottom[0]->cpu_data(); @@ -155,51 +156,52 @@ void LocalLayer::Backward_cpu(const vector*>& top, if (bias_term_) { bias_diff = this->blobs_[1]->mutable_cpu_diff(); - memset(bias_diff, 0, sizeof(Dtype) * this->blobs_[1]->count()); + caffe_set(this->blobs_[1]->count(), Dtype(0.0), bias_diff); for (int n = 0; n < num_; ++n) { caffe_add(M_ * N_, bias_diff, - top_diff + top[0]->offset(n), - bias_diff); + top_diff + top[0]->offset(n), + bias_diff); } } - memset(weight_diff, 0, sizeof(Dtype) * this->blobs_[0]->count()); - for (int n=0; nblobs_[0]->count(), Dtype(0.0), weight_diff); + for (int n = 0; n < num_; n++) { im2col_cpu(bottom_data + bottom[0]->offset(n), channels_, height_, - width_, kernel_size_, kernel_size_, pad_, pad_, stride_, stride_, x_data); + width_, kernel_size_, kernel_size_, + pad_, pad_, stride_, stride_, x_data); // gradient wrt weight - for (int m=0; mblobs_[0]->offset(m); - for (int k=0; koffset(n, m), - x_data+col_buffer_.offset(0,k), xt_data+xt.offset(0,0,k)); + x_data+col_buffer_.offset(0, k), xt_data+xt.offset(0, 0, k)); } - caffe_cpu_axpby(K_*N_, Dtype(1.0), xt_data, Dtype(1.0), filter_weight_diff); + caffe_cpu_axpby(K_*N_, Dtype(1.0), xt_data, + Dtype(1.0), filter_weight_diff); } // gradient wrt bottom data if (propagate_down[0]) { - memset(x_diff, 0, col_buffer_.count() * sizeof(Dtype)); - for (int m=0; moffset(n, m), - weight+this->blobs_[0]->offset(m,0,k), - intermediate.mutable_cpu_data()); + weight+this->blobs_[0]->offset(m, 0, k), + intermediate.mutable_cpu_data()); caffe_cpu_axpby(N_, Dtype(1.0), - intermediate.cpu_data(), Dtype(1.0), - x_diff+col_buffer_.offset(0,k)); + intermediate.cpu_data(), Dtype(1.0), + x_diff+col_buffer_.offset(0, k)); } } // col2im back to the data - col2im_cpu(x_diff, channels_, height_, width_, kernel_size_, kernel_size_, - pad_, pad_, stride_, stride_, bottom_diff + bottom[0]->offset(n)); - + col2im_cpu(x_diff, channels_, height_, + width_, kernel_size_, kernel_size_, + pad_, pad_, stride_, stride_, bottom_diff + bottom[0]->offset(n)); } } - } #ifdef CPU_ONLY diff --git a/src/caffe/layers/local_layer.cu b/src/caffe/layers/local_layer.cu index f393ac6e4d7..da739c68e6a 100644 --- a/src/caffe/layers/local_layer.cu +++ b/src/caffe/layers/local_layer.cu @@ -11,9 +11,10 @@ namespace caffe { template -__global__ void local_update1_gpu_kernel(const Dtype* data_A, const Dtype* data_B, - Dtype* data_R, const int filter_num, - const int location_num, const int output_num) { +__global__ void local_update1_gpu_kernel( + const Dtype* data_A, const Dtype* data_B, + Dtype* data_R, const int filter_num, + const int location_num, const int output_num) { int total = filter_num * location_num * output_num; CUDA_KERNEL_LOOP(index, total) { int p = index % location_num; @@ -24,63 +25,75 @@ __global__ void local_update1_gpu_kernel(const Dtype* data_A, const Dtype* data_ } template -void local_update1_gpu(const Dtype* data_A, const Dtype* data_B, - Dtype* data_R, const int filter_num, - const int location_num, const int output_num) { +void local_update1_gpu( + const Dtype* data_A, const Dtype* data_B, + Dtype* data_R, const int filter_num, + const int location_num, const int output_num) { // data_A is output_num x location_num // data_B is filter_num x location_num - // data_R is output_num x filter_num x location_num, the update performed is Rqnp += Aqp * Bnp + // data_R is output_num x filter_num x location_num, + // the update performed is Rqnp += Aqp * Bnp + const int nthreads = filter_num * location_num * output_num; - // NOLINT_NEXT_LINE(whitespace/operators) - local_update1_gpu_kernel<<>>(data_A, data_B, data_R, filter_num, location_num, output_num); + local_update1_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + data_A, data_B, data_R, filter_num, location_num, output_num); CUDA_POST_KERNEL_CHECK; } // Explicit instantiation -template void local_update1_gpu(const float* data_A, const float* data_B, - float* data_R, const int filter_num, - const int location_num, const int output_num); -template void local_update1_gpu(const double* data_A, const double* data_B, - double* data_R, const int filter_num, - const int location_num, const int output_num); - +template void local_update1_gpu( + const float* data_A, const float* data_B, + float* data_R, const int filter_num, + const int location_num, const int output_num); +template void local_update1_gpu( + const double* data_A, const double* data_B, + double* data_R, const int filter_num, + const int location_num, const int output_num); template -__global__ void local_update2_gpu_kernel(const Dtype* data_A, const Dtype* data_B, - Dtype* data_R, const int filter_num, - const int location_num, const int output_num) { +__global__ void local_update2_gpu_kernel( + const Dtype* data_A, const Dtype* data_B, + Dtype* data_R, const int filter_num, + const int location_num, const int output_num) { int total = filter_num * location_num; CUDA_KERNEL_LOOP(index, total) { int p = index % location_num; int n = (index / location_num); - for (int q=0; q void local_update2_gpu(const Dtype* data_A, const Dtype* data_B, - Dtype* data_R, const int filter_num, - const int location_num, const int output_num) { + Dtype* data_R, const int filter_num, + const int location_num, const int output_num) { // data_A is output_num x location_num // data_B is output_num x filter_num x location_num - // data_R is filter_num x location_num, the update performed is Rnp += \sum_q(Aqp * Bqnp) + // data_R is filter_num x location_num, + // the update performed is Rnp += \sum_q(Aqp * Bqnp) + int nthreads = filter_num * location_num; + - // NOLINT_NEXT_LINE(whitespace/operators) - local_update2_gpu_kernel<<>>(data_A, data_B, data_R, filter_num, location_num, output_num); + local_update2_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + data_A, data_B, data_R, filter_num, + location_num, output_num); CUDA_POST_KERNEL_CHECK; } // Explicit instantiation -template void local_update2_gpu(const float* data_A, const float* data_B, - float* data_R, const int filter_num, - const int location_num, const int output_num); -template void local_update2_gpu(const double* data_A, const double* data_B, - double* data_R, const int filter_num, - const int location_num, const int output_num); +template void local_update2_gpu( + const float* data_A, const float* data_B, + float* data_R, const int filter_num, + const int location_num, const int output_num); +template void local_update2_gpu( + const double* data_A, const double* data_B, + double* data_R, const int filter_num, + const int location_num, const int output_num); @@ -88,7 +101,7 @@ template void local_update2_gpu(const double* data_A, const double* data /// @brief refer to CPU forward -- the BLAS implementation is the same. template void LocalLayer::Forward_gpu(const vector*>& bottom, - const vector*>& top) { + const vector*>& top) { Dtype* x_data = col_buffer_.mutable_gpu_data(); const Dtype* weight = this->blobs_[0]->gpu_data(); @@ -104,32 +117,32 @@ void LocalLayer::Forward_gpu(const vector*>& bottom, Blob intermediate; intermediate.Reshape(1, 1, K_, N_); - for (int n=0; noffset(n), channels_, height_, - width_, kernel_size_, kernel_size_, pad_, pad_, stride_, stride_, x_data); + width_, kernel_size_, kernel_size_, + pad_, pad_, stride_, stride_, x_data); - for (int m=0; mblobs_[0]->offset(m), - intermediate.mutable_gpu_data()); + intermediate.mutable_gpu_data()); caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, 1, N_, K_, - (Dtype)1., E.gpu_data(), intermediate.gpu_data(), - (Dtype)0., top_data + top[0]->offset(n, m)); + (Dtype)1., E.gpu_data(), intermediate.gpu_data(), + (Dtype)0., top_data + top[0]->offset(n, m)); } if (bias_term_) { caffe_gpu_add(M_ * N_, this->blobs_[1]->gpu_data(), - top_data + top[0]->offset(n), - top_data + top[0]->offset(n)); + top_data + top[0]->offset(n), + top_data + top[0]->offset(n)); } } - } /// @brief refer to CPU backward -- the BLAS implementation is the same. template void LocalLayer::Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom) { + const vector& propagate_down, const vector*>& bottom) { const Dtype* top_diff = top[0]->gpu_diff(); const Dtype* bottom_data = bottom[0]->gpu_data(); @@ -148,38 +161,38 @@ void LocalLayer::Backward_gpu(const vector*>& top, Dtype* xt_data = xt.mutable_gpu_data(); if (bias_term_) { bias_diff = this->blobs_[1]->mutable_gpu_diff(); - CUDA_CHECK(cudaMemset(bias_diff, 0, sizeof(Dtype) * this->blobs_[1]->count())); + caffe_gpu_set(this->blobs_[1]->count(), Dtype(0.), bias_diff); for (int n = 0; n < num_; ++n) { caffe_gpu_add(M_ * N_, bias_diff, - top_diff + top[0]->offset(n), - bias_diff); + top_diff + top[0]->offset(n), + bias_diff); } } Blob buf; buf.Reshape(1, 1, K_, N_); Dtype* buf_data = buf.mutable_gpu_data(); - CUDA_CHECK(cudaMemset(weight_diff, 0, sizeof(Dtype) * this->blobs_[0]->count())); - for (int n=0; nblobs_[0]->count(), Dtype(0.), weight_diff); + for (int n = 0; n < num_; n++) { im2col_gpu(bottom_data + bottom[0]->offset(n), channels_, height_, - width_, kernel_size_, kernel_size_, pad_, pad_, stride_, stride_, x_data); + width_, kernel_size_, kernel_size_, + pad_, pad_, stride_, stride_, x_data); - local_update1_gpu(top_diff+top[0]->offset(n), x_data, weight_diff, K_, N_, M_); + local_update1_gpu( + top_diff+top[0]->offset(n), x_data, + weight_diff, K_, N_, M_); if (propagate_down[0]) { - CUDA_CHECK(cudaMemset(x_diff, 0, col_buffer_.count() * sizeof(Dtype))); + caffe_gpu_set(col_buffer_.count(), Dtype(0.), x_diff); local_update2_gpu(top_diff+top[0]->offset(n), weight, x_diff, K_, N_, M_); // col2im back to the data col2im_gpu(x_diff, channels_, height_, width_, kernel_size_, kernel_size_, - pad_, pad_, stride_, stride_, bottom_diff + bottom[0]->offset(n)); + pad_, pad_, stride_, stride_, bottom_diff + bottom[0]->offset(n)); } } } - - - INSTANTIATE_LAYER_GPU_FUNCS(LocalLayer); } // namespace caffe diff --git a/src/caffe/test/test_local_layer.cpp b/src/caffe/test/test_local_layer.cpp index 49685697829..1e16c117655 100644 --- a/src/caffe/test/test_local_layer.cpp +++ b/src/caffe/test/test_local_layer.cpp @@ -17,7 +17,7 @@ template class LocalLayerTest: public MultiDeviceTest { typedef typename TypeParam::Dtype Dtype; protected: - LocalLayerTest() + LocalLayerTest() : blob_bottom_(new Blob()), blob_top_(new Blob()) {} virtual void SetUp() { @@ -74,7 +74,7 @@ TYPED_TEST(LocalLayerTest, TestSimpleConvolution) { filler.Fill(this->blob_bottom_); LayerParameter layer_param; LocalParameter* convolution_param = - layer_param.mutable_local_param(); + layer_param.mutable_local_param(); convolution_param->set_kernel_size(3); convolution_param->set_stride(1); convolution_param->set_num_output(1); @@ -88,12 +88,13 @@ TYPED_TEST(LocalLayerTest, TestSimpleConvolution) { layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); // After the convolution, the output should all have output values 27.1 const Dtype* top_data = this->blob_top_->cpu_data(); - for (int n=0; nblob_top_->num(); n++) { - for (int k=0; kblob_top_->channels(); k++) { - for (int j=0; jblob_top_->height(); j++) { - for (int i=0; iblob_top_->width(); i++) { - int idx = j*this->blob_top_->width()+i; - EXPECT_NEAR(*(top_data+this->blob_top_->offset(n, k, j, i)), idx*27+0.1, 1e-4); + for (int n = 0; n < this->blob_top_->num(); n++) { + for (int k = 0; k < this->blob_top_->channels(); k++) { + for (int j = 0; j < this->blob_top_->height(); j++) { + for (int i = 0; i < this->blob_top_->width(); i++) { + int idx = j * this->blob_top_->width() + i; + EXPECT_NEAR(*(top_data + this->blob_top_->offset(n, k, j, i)), + idx * 27 + 0.1, 1e-4); } } } @@ -104,7 +105,7 @@ TYPED_TEST(LocalLayerTest, TestGradient) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; LocalParameter* convolution_param = - layer_param.mutable_local_param(); + layer_param.mutable_local_param(); convolution_param->set_kernel_size(3); convolution_param->set_stride(2); convolution_param->set_num_output(2); @@ -112,7 +113,8 @@ TYPED_TEST(LocalLayerTest, TestGradient) { convolution_param->mutable_bias_filler()->set_type("gaussian"); LocalLayer layer(layer_param); GradientChecker checker(1e-2, 1e-3); - checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + checker.CheckGradientExhaustive(&layer, + this->blob_bottom_vec_, this->blob_top_vec_); } From 255d2d760b5f8edb7b6ebde06bbdf44e14a4e239 Mon Sep 17 00:00:00 2001 From: Yuncheng Li Date: Wed, 13 Jan 2016 18:19:34 -0500 Subject: [PATCH 4/5] merge with master; use new format for the example; --- .../mnist/lenet_local_train_test.prototxt | 137 +++++++++++------- examples/mnist/train_lenet_local.sh | 0 include/caffe/layers/local_layer.hpp | 4 +- src/caffe/layers/local_layer.cpp | 10 +- src/caffe/layers/local_layer.cu | 10 +- src/caffe/proto/caffe.proto | 2 +- src/caffe/test/test_local_layer.cpp | 2 +- 7 files changed, 106 insertions(+), 59 deletions(-) mode change 100644 => 100755 examples/mnist/train_lenet_local.sh diff --git a/examples/mnist/lenet_local_train_test.prototxt b/examples/mnist/lenet_local_train_test.prototxt index ff88b5f4919..81bb4787a58 100644 --- a/examples/mnist/lenet_local_train_test.prototxt +++ b/examples/mnist/lenet_local_train_test.prototxt @@ -1,42 +1,49 @@ name: "LeNet" -layers { +layer { name: "mnist" - type: DATA + type: "Data" top: "data" top: "label" - data_param { - source: "examples/mnist/mnist_train_lmdb" - backend: LMDB - batch_size: 64 + include { + phase: TRAIN } transform_param { scale: 0.00390625 } - include: { phase: TRAIN } + data_param { + source: "examples/mnist/mnist_train_lmdb" + batch_size: 64 + backend: LMDB + } } -layers { +layer { name: "mnist" - type: DATA + type: "Data" top: "data" top: "label" - data_param { - source: "examples/mnist/mnist_test_lmdb" - backend: LMDB - batch_size: 100 + include { + phase: TEST } transform_param { scale: 0.00390625 } - include: { phase: TEST } + data_param { + source: "examples/mnist/mnist_test_lmdb" + batch_size: 100 + backend: LMDB + } } - -layers { +layer { name: "conv1" - type: CONVOLUTION + type: "Convolution" bottom: "data" top: "conv1" - blobs_lr: 1 - blobs_lr: 2 + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } convolution_param { num_output: 20 kernel_size: 5 @@ -49,9 +56,9 @@ layers { } } } -layers { +layer { name: "pool1" - type: POOLING + type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { @@ -60,13 +67,17 @@ layers { stride: 2 } } -layers { +layer { name: "local1" - type: LOCAL + type: "Local" bottom: "pool1" top: "local1" - blobs_lr: 1 - blobs_lr: 1 + param { + lr_mult: 1 + } + param { + lr_mult: 1 + } local_param { num_output: 5 kernel_size: 5 @@ -81,19 +92,23 @@ layers { } } } -layers { +layer { name: "relu1" - type: RELU + type: "ReLU" bottom: "local1" top: "local1" } -layers { +layer { name: "local2" - type: LOCAL + type: "Local" bottom: "local1" top: "local2" - blobs_lr: 1 - blobs_lr: 1 + param { + lr_mult: 1 + } + param { + lr_mult: 1 + } local_param { num_output: 10 kernel_size: 5 @@ -108,19 +123,35 @@ layers { } } } -layers { +layer { name: "relu2" - type: RELU + type: "ReLU" bottom: "local2" top: "local2" } -layers { - name: "ip1" - type: INNER_PRODUCT + +layer { + name: "pool2" + type: "Pooling" bottom: "local2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "ip1" + type: "InnerProduct" + bottom: "pool2" top: "ip1" - blobs_lr: 1 - blobs_lr: 2 + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } inner_product_param { num_output: 500 weight_filler { @@ -131,19 +162,23 @@ layers { } } } -layers { - name: "relu2" - type: RELU +layer { + name: "relu1" + type: "ReLU" bottom: "ip1" top: "ip1" } -layers { +layer { name: "ip2" - type: INNER_PRODUCT + type: "InnerProduct" bottom: "ip1" top: "ip2" - blobs_lr: 1 - blobs_lr: 2 + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } inner_product_param { num_output: 10 weight_filler { @@ -154,17 +189,19 @@ layers { } } } -layers { +layer { name: "accuracy" - type: ACCURACY + type: "Accuracy" bottom: "ip2" bottom: "label" top: "accuracy" - include: { phase: TEST } + include { + phase: TEST + } } -layers { +layer { name: "loss" - type: SOFTMAX_LOSS + type: "SoftmaxWithLoss" bottom: "ip2" bottom: "label" top: "loss" diff --git a/examples/mnist/train_lenet_local.sh b/examples/mnist/train_lenet_local.sh old mode 100644 new mode 100755 diff --git a/include/caffe/layers/local_layer.hpp b/include/caffe/layers/local_layer.hpp index 80cd1244c02..e2e4d526772 100644 --- a/include/caffe/layers/local_layer.hpp +++ b/include/caffe/layers/local_layer.hpp @@ -13,7 +13,7 @@ template class LocalLayer : public Layer { public: explicit LocalLayer(const LayerParameter& param) - : Layer(param) {} + : Layer(param), dilation_(1) {} virtual inline const char* type() const { return "Local"; } @@ -51,6 +51,8 @@ class LocalLayer : public Layer { int K_; int N_; + const int dilation_; + Blob col_buffer_; }; diff --git a/src/caffe/layers/local_layer.cpp b/src/caffe/layers/local_layer.cpp index c28ea065a5a..17e9c63de4a 100644 --- a/src/caffe/layers/local_layer.cpp +++ b/src/caffe/layers/local_layer.cpp @@ -2,6 +2,7 @@ #include "caffe/filler.hpp" #include "caffe/layers/local_layer.hpp" +#include "caffe/util/im2col.hpp" namespace caffe { @@ -111,7 +112,8 @@ void LocalLayer::Forward_cpu(const vector*>& bottom, for (int n = 0; n < num_; n++) { im2col_cpu(bottom_data + bottom[0]->offset(n), channels_, height_, width_, kernel_size_, kernel_size_, - pad_, pad_, stride_, stride_, x_data); + pad_, pad_, stride_, stride_, + dilation_, dilation_, x_data); for (int m = 0; m < num_output_; m++) { caffe_mul(K_*N_, x_data, weight+this->blobs_[0]->offset(m), @@ -165,7 +167,8 @@ void LocalLayer::Backward_cpu(const vector*>& top, for (int n = 0; n < num_; n++) { im2col_cpu(bottom_data + bottom[0]->offset(n), channels_, height_, width_, kernel_size_, kernel_size_, - pad_, pad_, stride_, stride_, x_data); + pad_, pad_, stride_, stride_, + dilation_, dilation_, x_data); // gradient wrt weight for (int m = 0; m < num_output_; m++) { @@ -196,7 +199,8 @@ void LocalLayer::Backward_cpu(const vector*>& top, // col2im back to the data col2im_cpu(x_diff, channels_, height_, width_, kernel_size_, kernel_size_, - pad_, pad_, stride_, stride_, bottom_diff + bottom[0]->offset(n)); + pad_, pad_, stride_, stride_, + dilation_, dilation_, bottom_diff + bottom[0]->offset(n)); } } } diff --git a/src/caffe/layers/local_layer.cu b/src/caffe/layers/local_layer.cu index 817076e5cc0..396a4ed0325 100644 --- a/src/caffe/layers/local_layer.cu +++ b/src/caffe/layers/local_layer.cu @@ -2,6 +2,7 @@ #include "caffe/filler.hpp" #include "caffe/layers/local_layer.hpp" +#include "caffe/util/im2col.hpp" namespace caffe { @@ -115,7 +116,8 @@ void LocalLayer::Forward_gpu(const vector*>& bottom, for (int n = 0; n < num_; n++) { im2col_gpu(bottom_data + bottom[0]->offset(n), channels_, height_, width_, kernel_size_, kernel_size_, - pad_, pad_, stride_, stride_, x_data); + pad_, pad_, stride_, stride_, + dilation_, dilation_, x_data); for (int m = 0; m < num_output_; m++) { caffe_gpu_mul(K_*N_, x_data, weight+this->blobs_[0]->offset(m), @@ -171,7 +173,8 @@ void LocalLayer::Backward_gpu(const vector*>& top, for (int n = 0; n < num_; n++) { im2col_gpu(bottom_data + bottom[0]->offset(n), channels_, height_, width_, kernel_size_, kernel_size_, - pad_, pad_, stride_, stride_, x_data); + pad_, pad_, stride_, stride_, + dilation_, dilation_, x_data); local_update1_gpu( top_diff+top[0]->offset(n), x_data, @@ -183,7 +186,8 @@ void LocalLayer::Backward_gpu(const vector*>& top, // col2im back to the data col2im_gpu(x_diff, channels_, height_, width_, kernel_size_, kernel_size_, - pad_, pad_, stride_, stride_, bottom_diff + bottom[0]->offset(n)); + pad_, pad_, stride_, stride_, + dilation_, dilation_, bottom_diff + bottom[0]->offset(n)); } } } diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index e2cee07e0f6..a1ca42a399b 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -391,7 +391,7 @@ message LayerParameter { optional ThresholdParameter threshold_param = 128; optional TileParameter tile_param = 138; optional WindowDataParameter window_data_param = 129; - optional LocalParameter local_param = 134; + optional LocalParameter local_param = 140; } // Message that stores parameters used to apply transformation diff --git a/src/caffe/test/test_local_layer.cpp b/src/caffe/test/test_local_layer.cpp index 1e16c117655..a1e3a8ea1f8 100644 --- a/src/caffe/test/test_local_layer.cpp +++ b/src/caffe/test/test_local_layer.cpp @@ -6,7 +6,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/layers/local_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" From 96b20185f77d17e81efead841a18aa509f9f7c4f Mon Sep 17 00:00:00 2001 From: Yuncheng Li Date: Wed, 13 Jan 2016 18:32:10 -0500 Subject: [PATCH 5/5] remove unnecessary diff; upgrade the siamese examples --- examples/mnist/train_lenet.sh | 2 +- examples/mnist/train_lenet_local.sh | 2 +- .../mnist_siamese_local_train_test.prototxt | 400 +++++++++--------- examples/siamese/train_mnist_siamese_local.sh | 2 +- 4 files changed, 201 insertions(+), 205 deletions(-) mode change 100644 => 100755 examples/siamese/train_mnist_siamese_local.sh diff --git a/examples/mnist/train_lenet.sh b/examples/mnist/train_lenet.sh index 1f718825ded..1b6bf7d978d 100755 --- a/examples/mnist/train_lenet.sh +++ b/examples/mnist/train_lenet.sh @@ -1,3 +1,3 @@ #!/usr/bin/env sh -GLOG_logtostderr=0 GLOG_log_dir=examples/mnist/ ./build/tools/caffe train --solver=examples/mnist/lenet_solver.prototxt +./build/tools/caffe train --solver=examples/mnist/lenet_solver.prototxt diff --git a/examples/mnist/train_lenet_local.sh b/examples/mnist/train_lenet_local.sh index b9e29e5ceb8..c9b969c55e1 100755 --- a/examples/mnist/train_lenet_local.sh +++ b/examples/mnist/train_lenet_local.sh @@ -1,3 +1,3 @@ #!/usr/bin/env sh -GLOG_logtostderr=0 GLOG_log_dir=examples/mnist/ ./build/tools/caffe train --solver=examples/mnist/lenet_local_solver.prototxt --gpu=1 +./build/tools/caffe train --solver=examples/mnist/lenet_local_solver.prototxt --gpu=1 diff --git a/examples/siamese/mnist_siamese_local_train_test.prototxt b/examples/siamese/mnist_siamese_local_train_test.prototxt index 4212217df15..fc91301f457 100644 --- a/examples/siamese/mnist_siamese_local_train_test.prototxt +++ b/examples/siamese/mnist_siamese_local_train_test.prototxt @@ -1,50 +1,60 @@ name: "mnist_siamese_train_test" -layers { +layer { name: "pair_data" - type: DATA + type: "Data" top: "pair_data" top: "sim" + include { + phase: TRAIN + } + transform_param { + scale: 0.00390625 + } data_param { source: "examples/siamese/mnist_siamese_train_leveldb" - scale: 0.00390625 batch_size: 64 } - include: { phase: TRAIN } } -layers { +layer { name: "pair_data" - type: DATA + type: "Data" top: "pair_data" top: "sim" + include { + phase: TEST + } + transform_param { + scale: 0.00390625 + } data_param { source: "examples/siamese/mnist_siamese_test_leveldb" - scale: 0.00390625 batch_size: 100 } - include: { phase: TEST } } -layers { - name: "slice_pair" - type: SLICE - bottom: "pair_data" - top: "data" - top: "data_p" - slice_param { - slice_dim: 1 - slice_point: 1 - } +layer { + name: "slice_pair" + type: "Slice" + bottom: "pair_data" + top: "data" + top: "data_p" + slice_param { + slice_dim: 1 + slice_point: 1 + } } - - - - -layers { +layer { name: "conv1" - type: CONVOLUTION + type: "Convolution" bottom: "data" top: "conv1" - blobs_lr: 1 - blobs_lr: 2 + param { + name: "conv1_w" + lr_mult: 1 + } + param { + name: "conv1_b" + lr_mult: 2 + } convolution_param { num_output: 20 kernel_size: 5 @@ -56,12 +66,10 @@ layers { type: "constant" } } - param: "conv1_w" - param: "conv1_b" } -layers { +layer { name: "pool1" - type: POOLING + type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { @@ -70,15 +78,11 @@ layers { stride: 2 } } - - -layers { +layer { name: "local1" - type: LOCAL + type: "Local" bottom: "pool1" top: "local1" - blobs_lr: 1 - blobs_lr: 2 local_param { num_output: 5 kernel_size: 5 @@ -92,22 +96,26 @@ layers { value: 0.1 } } - param: "local1_w" - param: "local1_b" + param { + name: "local1_w" + lr_mult: 1 + } + param { + name: "local1_b" + lr_mult: 2 + } } -layers { +layer { name: "relu1" - type: RELU + type: "ReLU" bottom: "local1" top: "local1" } -layers { +layer { name: "local2" - type: LOCAL + type: "Local" bottom: "local1" top: "local2" - blobs_lr: 1 - blobs_lr: 2 local_param { num_output: 10 kernel_size: 5 @@ -121,29 +129,45 @@ layers { value: 0.1 } } - param: "local2_w" - param: "local2_b" + param { + name: "local2_w" + lr_mult: 1 + } + param { + name: "local2_b" + lr_mult: 2 + } } -layers { +layer { name: "relu2" - type: RELU + type: "ReLU" bottom: "local2" top: "local2" } - - - - - - - -layers { - name: "ip1" - type: INNER_PRODUCT +layer { + name: "pool2" + type: "Pooling" bottom: "local2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "ip1" + type: "InnerProduct" + bottom: "pool2" top: "ip1" - blobs_lr: 1 - blobs_lr: 2 + param { + name: "ip1_w" + lr_mult: 1 + } + param { + name: "ip1_b" + lr_mult: 2 + } inner_product_param { num_output: 500 weight_filler { @@ -153,22 +177,26 @@ layers { type: "constant" } } - param: "ip1_w" - param: "ip1_b" } -layers { +layer { name: "relu1" - type: RELU + type: "ReLU" bottom: "ip1" top: "ip1" } -layers { +layer { name: "ip2" - type: INNER_PRODUCT + type: "InnerProduct" bottom: "ip1" top: "ip2" - blobs_lr: 1 - blobs_lr: 2 + param { + name: "ip2_w" + lr_mult: 1 + } + param { + name: "ip2_b" + lr_mult: 2 + } inner_product_param { num_output: 10 weight_filler { @@ -178,37 +206,20 @@ layers { type: "constant" } } - param: "ip2_w" - param: "ip2_b" } - -layers { - name: "feat2" - type: INNER_PRODUCT +layer { + name: "feat" + type: "InnerProduct" bottom: "ip2" - top: "feat2" - blobs_lr: 1 - blobs_lr: 2 - inner_product_param { - num_output: 2 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - } + top: "feat" + param { + name: "feat_w" + lr_mult: 1 + } + param { + name: "feat_b" + lr_mult: 2 } - param: "feat2_w" - param: "feat2_b" -} - -layers { - name: "feat1" - type: INNER_PRODUCT - bottom: "local1" - top: "feat1" - blobs_lr: 1 - blobs_lr: 2 inner_product_param { num_output: 2 weight_filler { @@ -218,19 +229,20 @@ layers { type: "constant" } } - param: "feat1_w" - param: "feat1_b" } - - - -layers { +layer { name: "conv1_p" - type: CONVOLUTION + type: "Convolution" bottom: "data_p" top: "conv1_p" - blobs_lr: 1 - blobs_lr: 2 + param { + name: "conv1_w" + lr_mult: 1 + } + param { + name: "conv1_b" + lr_mult: 2 + } convolution_param { num_output: 20 kernel_size: 5 @@ -242,12 +254,10 @@ layers { type: "constant" } } - param: "conv1_w" - param: "conv1_b" } -layers { +layer { name: "pool1_p" - type: POOLING + type: "Pooling" bottom: "conv1_p" top: "pool1_p" pooling_param { @@ -256,15 +266,11 @@ layers { stride: 2 } } - - -layers { +layer { name: "local1_p" - type: LOCAL + type: "Local" bottom: "pool1_p" top: "local1_p" - blobs_lr: 1 - blobs_lr: 2 local_param { num_output: 5 kernel_size: 5 @@ -278,22 +284,26 @@ layers { value: 0.1 } } - param: "local1_w" - param: "local1_b" + param { + name: "local1_w" + lr_mult: 1 + } + param { + name: "local1_b" + lr_mult: 2 + } } -layers { +layer { name: "relu1_p" - type: RELU + type: "ReLU" bottom: "local1_p" top: "local1_p" } -layers { +layer { name: "local2_p" - type: LOCAL + type: "Local" bottom: "local1_p" top: "local2_p" - blobs_lr: 1 - blobs_lr: 2 local_param { num_output: 10 kernel_size: 5 @@ -307,27 +317,45 @@ layers { value: 0.1 } } - param: "local2_w" - param: "local2_b" + param { + name: "local2_w" + lr_mult: 1 + } + param { + name: "local2_b" + lr_mult: 2 + } } -layers { +layer { name: "relu2_p" - type: RELU + type: "ReLU" bottom: "local2_p" top: "local2_p" } - - - - - -layers { - name: "ip1_p" - type: INNER_PRODUCT +layer { + name: "pool2_p" + type: "Pooling" bottom: "local2_p" + top: "pool2_p" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "ip1_p" + type: "InnerProduct" + bottom: "pool2_p" top: "ip1_p" - blobs_lr: 1 - blobs_lr: 2 + param { + name: "ip1_w" + lr_mult: 1 + } + param { + name: "ip1_b" + lr_mult: 2 + } inner_product_param { num_output: 500 weight_filler { @@ -337,22 +365,26 @@ layers { type: "constant" } } - param: "ip1_w" - param: "ip1_b" } -layers { +layer { name: "relu1_p" - type: RELU + type: "ReLU" bottom: "ip1_p" top: "ip1_p" } -layers { +layer { name: "ip2_p" - type: INNER_PRODUCT + type: "InnerProduct" bottom: "ip1_p" top: "ip2_p" - blobs_lr: 1 - blobs_lr: 2 + param { + name: "ip2_w" + lr_mult: 1 + } + param { + name: "ip2_b" + lr_mult: 2 + } inner_product_param { num_output: 10 weight_filler { @@ -362,38 +394,20 @@ layers { type: "constant" } } - param: "ip2_w" - param: "ip2_b" } - -layers { - name: "feat2_p" - type: INNER_PRODUCT +layer { + name: "feat_p" + type: "InnerProduct" bottom: "ip2_p" - top: "feat2_p" - blobs_lr: 1 - blobs_lr: 2 - inner_product_param { - num_output: 2 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - } + top: "feat_p" + param { + name: "feat_w" + lr_mult: 1 + } + param { + name: "feat_b" + lr_mult: 2 } - param: "feat2_w" - param: "feat2_b" -} - - -layers { - name: "feat1_p" - type: INNER_PRODUCT - bottom: "local1_p" - top: "feat1_p" - blobs_lr: 1 - blobs_lr: 2 inner_product_param { num_output: 2 weight_filler { @@ -403,33 +417,15 @@ layers { type: "constant" } } - param: "feat1_w" - param: "feat1_b" } - - -layers { - name: "loss2" - type: CONTRASTIVE_LOSS - contrastive_loss_param { - margin: 1.0 - } - bottom: "feat2" - bottom: "feat2_p" - bottom: "sim" - top: "loss2" -} - - -layers { - name: "loss1" - type: CONTRASTIVE_LOSS - contrastive_loss_param { - margin: 1.0 - } - bottom: "feat1" - bottom: "feat1_p" - bottom: "sim" - top: "loss1" +layer { + name: "loss" + type: "ContrastiveLoss" + bottom: "feat" + bottom: "feat_p" + bottom: "sim" + top: "loss" + contrastive_loss_param { + margin: 1 + } } - diff --git a/examples/siamese/train_mnist_siamese_local.sh b/examples/siamese/train_mnist_siamese_local.sh old mode 100644 new mode 100755 index 83035bf2ccc..aa84bcb718a --- a/examples/siamese/train_mnist_siamese_local.sh +++ b/examples/siamese/train_mnist_siamese_local.sh @@ -2,4 +2,4 @@ TOOLS=./build/tools -GLOG_logtostderr=0 GLOG_log_dir=examples/siamese/ $TOOLS/caffe train --solver=examples/siamese/mnist_siamese_local_solver.prototxt +$TOOLS/caffe train --solver=examples/siamese/mnist_siamese_local_solver.prototxt