diff --git a/examples/mnist/lenet_local_solver.prototxt b/examples/mnist/lenet_local_solver.prototxt new file mode 100644 index 00000000000..a4b02d25e7e --- /dev/null +++ b/examples/mnist/lenet_local_solver.prototxt @@ -0,0 +1,25 @@ +# The train/test net protocol buffer definition +net: "examples/mnist/lenet_local_train_test.prototxt" +# test_iter specifies how many forward passes the test should carry out. +# In the case of MNIST, we have test batch size 100 and 100 test iterations, +# covering the full 10,000 testing images. +test_iter: 100 +# Carry out testing every 500 training iterations. +test_interval: 500 +# The base learning rate, momentum and the weight decay of the network. +base_lr: 0.01 +momentum: 0.9 +weight_decay: 0.0005 +# The learning rate policy +lr_policy: "inv" +gamma: 0.0001 +power: 0.75 +# Display every 100 iterations +display: 100 +# The maximum number of iterations +max_iter: 10000 +# snapshot intermediate results +snapshot: 5000 +snapshot_prefix: "examples/mnist/lenet" +# solver mode: CPU or GPU +solver_mode: GPU diff --git a/examples/mnist/lenet_local_train_test.prototxt b/examples/mnist/lenet_local_train_test.prototxt new file mode 100644 index 00000000000..81bb4787a58 --- /dev/null +++ b/examples/mnist/lenet_local_train_test.prototxt @@ -0,0 +1,208 @@ +name: "LeNet" +layer { + name: "mnist" + type: "Data" + top: "data" + top: "label" + include { + phase: TRAIN + } + transform_param { + scale: 0.00390625 + } + data_param { + source: "examples/mnist/mnist_train_lmdb" + batch_size: 64 + backend: LMDB + } +} +layer { + name: "mnist" + type: "Data" + top: "data" + top: "label" + include { + phase: TEST + } + transform_param { + scale: 0.00390625 + } + data_param { + source: "examples/mnist/mnist_test_lmdb" + batch_size: 100 + backend: LMDB + } +} +layer { + name: "conv1" + type: "Convolution" + bottom: "data" + top: "conv1" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 20 + kernel_size: 5 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "pool1" + type: "Pooling" + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "local1" + type: "Local" + bottom: "pool1" + top: "local1" + param { + lr_mult: 1 + } + param { + lr_mult: 1 + } + local_param { + num_output: 5 + kernel_size: 5 + stride: 1 + pad: 0 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0.1 + } + } +} +layer { + name: "relu1" + type: "ReLU" + bottom: "local1" + top: "local1" +} +layer { + name: "local2" + type: "Local" + bottom: "local1" + top: "local2" + param { + lr_mult: 1 + } + param { + lr_mult: 1 + } + local_param { + num_output: 10 + kernel_size: 5 + stride: 1 + pad: 0 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0.1 + } + } +} +layer { + name: "relu2" + type: "ReLU" + bottom: "local2" + top: "local2" +} + +layer { + name: "pool2" + type: "Pooling" + bottom: "local2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "ip1" + type: "InnerProduct" + bottom: "pool2" + top: "ip1" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + inner_product_param { + num_output: 500 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu1" + type: "ReLU" + bottom: "ip1" + top: "ip1" +} +layer { + name: "ip2" + type: "InnerProduct" + bottom: "ip1" + top: "ip2" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + inner_product_param { + num_output: 10 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "accuracy" + type: "Accuracy" + bottom: "ip2" + bottom: "label" + top: "accuracy" + include { + phase: TEST + } +} +layer { + name: "loss" + type: "SoftmaxWithLoss" + bottom: "ip2" + bottom: "label" + top: "loss" +} diff --git a/examples/mnist/train_lenet_local.sh b/examples/mnist/train_lenet_local.sh new file mode 100755 index 00000000000..c9b969c55e1 --- /dev/null +++ b/examples/mnist/train_lenet_local.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env sh + +./build/tools/caffe train --solver=examples/mnist/lenet_local_solver.prototxt --gpu=1 diff --git a/examples/siamese/mnist_siamese_local_solver.prototxt b/examples/siamese/mnist_siamese_local_solver.prototxt new file mode 100644 index 00000000000..c85bb90fc87 --- /dev/null +++ b/examples/siamese/mnist_siamese_local_solver.prototxt @@ -0,0 +1,25 @@ +# The train/test net protocol buffer definition +net: "examples/siamese/mnist_siamese_local_train_test.prototxt" +# test_iter specifies how many forward passes the test should carry out. +# In the case of MNIST, we have test batch size 100 and 100 test iterations, +# covering the full 10,000 testing images. +test_iter: 100 +# Carry out testing every 500 training iterations. +test_interval: 500 +# The base learning rate, momentum and the weight decay of the network. +base_lr: 0.01 +momentum: 0.9 +weight_decay: 0.0000 +# The learning rate policy +lr_policy: "inv" +gamma: 0.0001 +power: 0.75 +# Display every 100 iterations +display: 100 +# The maximum number of iterations +max_iter: 50000 +# snapshot intermediate results +snapshot: 5000 +snapshot_prefix: "examples/siamese/mnist_siamese" +# solver mode: CPU or GPU +solver_mode: GPU diff --git a/examples/siamese/mnist_siamese_local_train_test.prototxt b/examples/siamese/mnist_siamese_local_train_test.prototxt new file mode 100644 index 00000000000..fc91301f457 --- /dev/null +++ b/examples/siamese/mnist_siamese_local_train_test.prototxt @@ -0,0 +1,431 @@ +name: "mnist_siamese_train_test" +layer { + name: "pair_data" + type: "Data" + top: "pair_data" + top: "sim" + include { + phase: TRAIN + } + transform_param { + scale: 0.00390625 + } + data_param { + source: "examples/siamese/mnist_siamese_train_leveldb" + batch_size: 64 + } +} +layer { + name: "pair_data" + type: "Data" + top: "pair_data" + top: "sim" + include { + phase: TEST + } + transform_param { + scale: 0.00390625 + } + data_param { + source: "examples/siamese/mnist_siamese_test_leveldb" + batch_size: 100 + } +} +layer { + name: "slice_pair" + type: "Slice" + bottom: "pair_data" + top: "data" + top: "data_p" + slice_param { + slice_dim: 1 + slice_point: 1 + } +} +layer { + name: "conv1" + type: "Convolution" + bottom: "data" + top: "conv1" + param { + name: "conv1_w" + lr_mult: 1 + } + param { + name: "conv1_b" + lr_mult: 2 + } + convolution_param { + num_output: 20 + kernel_size: 5 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "pool1" + type: "Pooling" + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "local1" + type: "Local" + bottom: "pool1" + top: "local1" + local_param { + num_output: 5 + kernel_size: 5 + stride: 1 + pad: 0 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0.1 + } + } + param { + name: "local1_w" + lr_mult: 1 + } + param { + name: "local1_b" + lr_mult: 2 + } +} +layer { + name: "relu1" + type: "ReLU" + bottom: "local1" + top: "local1" +} +layer { + name: "local2" + type: "Local" + bottom: "local1" + top: "local2" + local_param { + num_output: 10 + kernel_size: 5 + stride: 1 + pad: 0 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0.1 + } + } + param { + name: "local2_w" + lr_mult: 1 + } + param { + name: "local2_b" + lr_mult: 2 + } +} +layer { + name: "relu2" + type: "ReLU" + bottom: "local2" + top: "local2" +} +layer { + name: "pool2" + type: "Pooling" + bottom: "local2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "ip1" + type: "InnerProduct" + bottom: "pool2" + top: "ip1" + param { + name: "ip1_w" + lr_mult: 1 + } + param { + name: "ip1_b" + lr_mult: 2 + } + inner_product_param { + num_output: 500 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu1" + type: "ReLU" + bottom: "ip1" + top: "ip1" +} +layer { + name: "ip2" + type: "InnerProduct" + bottom: "ip1" + top: "ip2" + param { + name: "ip2_w" + lr_mult: 1 + } + param { + name: "ip2_b" + lr_mult: 2 + } + inner_product_param { + num_output: 10 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "feat" + type: "InnerProduct" + bottom: "ip2" + top: "feat" + param { + name: "feat_w" + lr_mult: 1 + } + param { + name: "feat_b" + lr_mult: 2 + } + inner_product_param { + num_output: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "conv1_p" + type: "Convolution" + bottom: "data_p" + top: "conv1_p" + param { + name: "conv1_w" + lr_mult: 1 + } + param { + name: "conv1_b" + lr_mult: 2 + } + convolution_param { + num_output: 20 + kernel_size: 5 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "pool1_p" + type: "Pooling" + bottom: "conv1_p" + top: "pool1_p" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "local1_p" + type: "Local" + bottom: "pool1_p" + top: "local1_p" + local_param { + num_output: 5 + kernel_size: 5 + stride: 1 + pad: 0 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0.1 + } + } + param { + name: "local1_w" + lr_mult: 1 + } + param { + name: "local1_b" + lr_mult: 2 + } +} +layer { + name: "relu1_p" + type: "ReLU" + bottom: "local1_p" + top: "local1_p" +} +layer { + name: "local2_p" + type: "Local" + bottom: "local1_p" + top: "local2_p" + local_param { + num_output: 10 + kernel_size: 5 + stride: 1 + pad: 0 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0.1 + } + } + param { + name: "local2_w" + lr_mult: 1 + } + param { + name: "local2_b" + lr_mult: 2 + } +} +layer { + name: "relu2_p" + type: "ReLU" + bottom: "local2_p" + top: "local2_p" +} +layer { + name: "pool2_p" + type: "Pooling" + bottom: "local2_p" + top: "pool2_p" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "ip1_p" + type: "InnerProduct" + bottom: "pool2_p" + top: "ip1_p" + param { + name: "ip1_w" + lr_mult: 1 + } + param { + name: "ip1_b" + lr_mult: 2 + } + inner_product_param { + num_output: 500 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu1_p" + type: "ReLU" + bottom: "ip1_p" + top: "ip1_p" +} +layer { + name: "ip2_p" + type: "InnerProduct" + bottom: "ip1_p" + top: "ip2_p" + param { + name: "ip2_w" + lr_mult: 1 + } + param { + name: "ip2_b" + lr_mult: 2 + } + inner_product_param { + num_output: 10 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "feat_p" + type: "InnerProduct" + bottom: "ip2_p" + top: "feat_p" + param { + name: "feat_w" + lr_mult: 1 + } + param { + name: "feat_b" + lr_mult: 2 + } + inner_product_param { + num_output: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "loss" + type: "ContrastiveLoss" + bottom: "feat" + bottom: "feat_p" + bottom: "sim" + top: "loss" + contrastive_loss_param { + margin: 1 + } +} diff --git a/examples/siamese/train_mnist_siamese_local.sh b/examples/siamese/train_mnist_siamese_local.sh new file mode 100755 index 00000000000..aa84bcb718a --- /dev/null +++ b/examples/siamese/train_mnist_siamese_local.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env sh + +TOOLS=./build/tools + +$TOOLS/caffe train --solver=examples/siamese/mnist_siamese_local_solver.prototxt diff --git a/include/caffe/filler.hpp b/include/caffe/filler.hpp index dad9ad46b3b..f57a445956f 100644 --- a/include/caffe/filler.hpp +++ b/include/caffe/filler.hpp @@ -165,6 +165,29 @@ class XavierFiller : public Filler { } }; +template +class TestLocalFiller : public Filler { + public: + explicit TestLocalFiller(const FillerParameter& param) + : Filler(param) {} + virtual void Fill(Blob* blob) { + LOG(INFO) << "Doing mutable cpu"; + LOG(INFO) << "blobs" << blob; + Dtype* data = blob->mutable_cpu_data(); + LOG(INFO) << "Done Doing mutable cpu"; + CHECK_EQ(blob->channels(), 1); + + for (int n = 0; n < blob->num(); n++) { + for (int j = 0; j < blob->height(); j++) { + for (int i = 0; i < blob->width(); i++) { + *(data+blob->offset(n, 0, j, i)) = i; + } + } + } + } +}; + + /** * @brief Fills a Blob with values @f$ x \sim N(0, \sigma^2) @f$ where * @f$ \sigma^2 @f$ is set inversely proportional to number of incoming @@ -280,6 +303,8 @@ Filler* GetFiller(const FillerParameter& param) { return new UniformFiller(param); } else if (type == "xavier") { return new XavierFiller(param); + } else if (type == "test_local") { + return new TestLocalFiller(param); } else if (type == "msra") { return new MSRAFiller(param); } else if (type == "bilinear") { diff --git a/include/caffe/layers/local_layer.hpp b/include/caffe/layers/local_layer.hpp new file mode 100644 index 00000000000..e2e4d526772 --- /dev/null +++ b/include/caffe/layers/local_layer.hpp @@ -0,0 +1,61 @@ +#ifndef HEADER_LOCAL_LAYER +#define HEADER_LOCAL_LAYER + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +template +class LocalLayer : public Layer { + public: + explicit LocalLayer(const LayerParameter& param) + : Layer(param), dilation_(1) {} + + virtual inline const char* type() const { return "Local"; } + + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline int MinBottomBlobs() const { return 1; } + virtual inline int MinTopBlobs() const { return 1; } + virtual inline bool EqualNumBottomTopBlobs() const { return true; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + + int kernel_size_; + int stride_; + int num_; + int channels_; + int pad_; + int height_, width_; + int height_out_, width_out_; + int num_output_; + bool bias_term_; + + int M_; + int K_; + int N_; + + const int dilation_; + + Blob col_buffer_; +}; + +} // namespace caffe + +#endif diff --git a/src/caffe/layers/local_layer.cpp b/src/caffe/layers/local_layer.cpp new file mode 100644 index 00000000000..17e9c63de4a --- /dev/null +++ b/src/caffe/layers/local_layer.cpp @@ -0,0 +1,215 @@ +#include + +#include "caffe/filler.hpp" +#include "caffe/layers/local_layer.hpp" +#include "caffe/util/im2col.hpp" + +namespace caffe { + +template +void LocalLayer::LayerSetUp(const vector*>& bottom, + const vector*>& top) { + CHECK_EQ(bottom.size(), 1) << "Conv Layer takes a single blob as input."; + CHECK_EQ(top.size(), 1) << "Conv Layer takes a single blob as output."; + + kernel_size_ = this->layer_param_.local_param().kernel_size(); + stride_ = this->layer_param_.local_param().stride(); + pad_ = this->layer_param_.local_param().pad(); + num_ = bottom[0]->num(); + channels_ = bottom[0]->channels(); + height_ = bottom[0]->height(); + width_ = bottom[0]->width(); + num_output_ = this->layer_param_.local_param().num_output(); + + height_out_ = (height_ + 2 * pad_ - kernel_size_) / stride_ + 1; + width_out_ = (width_ + 2 * pad_ - kernel_size_) / stride_ + 1; + + M_ = num_output_; + K_ = channels_ * kernel_size_ * kernel_size_; + N_ = height_out_ * width_out_; + + CHECK_GT(num_output_, 0); + CHECK_GE(height_, kernel_size_) << "height smaller than kernel size"; + CHECK_GE(width_, kernel_size_) << "width smaller than kernel size"; + // Set the parameters + bias_term_ = this->layer_param_.local_param().bias_term(); + + // Check if we need to set up the weights + if (this->blobs_.size() > 0) { + LOG(INFO) << "Skipping parameter initialization"; + } else { + if (bias_term_) { + this->blobs_.resize(2); + } else { + this->blobs_.resize(1); + } + // Intialize the weight + this->blobs_[0].reset(new Blob( + num_output_, 1, K_, N_)); + // fill the weights + shared_ptr > weight_filler(GetFiller( + this->layer_param_.local_param().weight_filler())); + weight_filler->Fill(this->blobs_[0].get()); + // If necessary, intiialize and fill the bias term + if (bias_term_) { + this->blobs_[1].reset(new Blob(1, 1, M_, N_)); + shared_ptr > bias_filler(GetFiller( + this->layer_param_.local_param().bias_filler())); + bias_filler->Fill(this->blobs_[1].get()); + } + } +} + +template +void LocalLayer::Reshape(const vector*>& bottom, + const vector*>& top) { + CHECK_EQ(bottom[0]->channels(), channels_) << "Input size incompatible with" + " weights."; + // TODO: generalize to handle inputs of different shapes. + for (int bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) { + CHECK_EQ(num_, bottom[bottom_id]->num()) << "Inputs must have same num."; + CHECK_EQ(channels_, bottom[bottom_id]->channels()) + << "Inputs must have same channels."; + CHECK_EQ(height_, bottom[bottom_id]->height()) + << "Inputs must have same height."; + CHECK_EQ(width_, bottom[bottom_id]->width()) + << "Inputs must have same width."; + } + + // Shape the tops. + for (int top_id = 0; top_id < top.size(); ++top_id) { + top[top_id]->Reshape(num_, num_output_, height_out_, width_out_); + } + + // The im2col result buffer would only hold one image at a time to avoid + // overly large memory usage. + col_buffer_.Reshape( + 1, channels_ * kernel_size_ * kernel_size_, height_out_, width_out_); + + for (int top_id = 0; top_id < top.size(); ++top_id) { + top[top_id]->Reshape(num_, num_output_, height_out_, width_out_); + } +} + +template +void LocalLayer::Forward_cpu(const vector*>& bottom, + const vector*>& top) { + + Dtype* x_data = col_buffer_.mutable_cpu_data(); + const Dtype* weight = this->blobs_[0]->cpu_data(); + const Dtype* bottom_data = bottom[0]->cpu_data(); + Dtype* top_data = top[0]->mutable_cpu_data(); + + Blob E; + E.Reshape(1, 1, 1, K_); + FillerParameter filler_param; + filler_param.set_value(1); + ConstantFiller filler(filler_param); + filler.Fill(&E); + + Blob intermediate; + intermediate.Reshape(1, 1, K_, N_); + for (int n = 0; n < num_; n++) { + im2col_cpu(bottom_data + bottom[0]->offset(n), channels_, height_, + width_, kernel_size_, kernel_size_, + pad_, pad_, stride_, stride_, + dilation_, dilation_, x_data); + + for (int m = 0; m < num_output_; m++) { + caffe_mul(K_*N_, x_data, weight+this->blobs_[0]->offset(m), + intermediate.mutable_cpu_data()); + + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, 1, N_, K_, + (Dtype)1., E.cpu_data(), + intermediate.cpu_data(), + (Dtype)0., top_data + top[0]->offset(n, m)); + } + + if (bias_term_) { + caffe_add(M_ * N_, this->blobs_[1]->cpu_data(), + top_data + top[0]->offset(n), + top_data + top[0]->offset(n)); + } + } +} + +template +void LocalLayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + + const Dtype* top_diff = top[0]->cpu_diff(); + const Dtype* bottom_data = bottom[0]->cpu_data(); + Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); + Dtype* x_data = col_buffer_.mutable_cpu_data(); + Dtype* x_diff = col_buffer_.mutable_cpu_diff(); + const Dtype* weight = this->blobs_[0]->cpu_data(); + Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff(); + Dtype* bias_diff = NULL; + + Blob intermediate; + intermediate.Reshape(1, 1, 1, N_); + + Blob xt; + xt.Reshape(1, 1, K_, N_); + Dtype* xt_data = xt.mutable_cpu_data(); + + if (bias_term_) { + bias_diff = this->blobs_[1]->mutable_cpu_diff(); + caffe_set(this->blobs_[1]->count(), Dtype(0.0), bias_diff); + for (int n = 0; n < num_; ++n) { + caffe_add(M_ * N_, bias_diff, + top_diff + top[0]->offset(n), + bias_diff); + } + } + + caffe_set(this->blobs_[0]->count(), Dtype(0.0), weight_diff); + for (int n = 0; n < num_; n++) { + im2col_cpu(bottom_data + bottom[0]->offset(n), channels_, height_, + width_, kernel_size_, kernel_size_, + pad_, pad_, stride_, stride_, + dilation_, dilation_, x_data); + + // gradient wrt weight + for (int m = 0; m < num_output_; m++) { + Dtype* filter_weight_diff = weight_diff+this->blobs_[0]->offset(m); + for (int k = 0; k < K_; k++) { + caffe_mul(N_, top_diff+top[0]->offset(n, m), + x_data+col_buffer_.offset(0, k), xt_data+xt.offset(0, 0, k)); + } + caffe_cpu_axpby(K_*N_, Dtype(1.0), xt_data, + Dtype(1.0), filter_weight_diff); + } + + // gradient wrt bottom data + if (propagate_down[0]) { + caffe_set(col_buffer_.count(), Dtype(0.0), x_diff); + for (int m = 0; m < num_output_; m++) { + for (int k = 0; k < K_; k++) { + caffe_mul(N_, top_diff+top[0]->offset(n, m), + weight+this->blobs_[0]->offset(m, 0, k), + intermediate.mutable_cpu_data()); + + caffe_cpu_axpby(N_, Dtype(1.0), + intermediate.cpu_data(), Dtype(1.0), + x_diff+col_buffer_.offset(0, k)); + } + } + + // col2im back to the data + col2im_cpu(x_diff, channels_, height_, + width_, kernel_size_, kernel_size_, + pad_, pad_, stride_, stride_, + dilation_, dilation_, bottom_diff + bottom[0]->offset(n)); + } + } +} + +#ifdef CPU_ONLY +STUB_GPU(LocalLayer); +#endif + +INSTANTIATE_CLASS(LocalLayer); +REGISTER_LAYER_CLASS(Local); + +} // namespace caffe diff --git a/src/caffe/layers/local_layer.cu b/src/caffe/layers/local_layer.cu new file mode 100644 index 00000000000..396a4ed0325 --- /dev/null +++ b/src/caffe/layers/local_layer.cu @@ -0,0 +1,197 @@ +#include + +#include "caffe/filler.hpp" +#include "caffe/layers/local_layer.hpp" +#include "caffe/util/im2col.hpp" + +namespace caffe { + +template +__global__ void local_update1_gpu_kernel( + const Dtype* data_A, const Dtype* data_B, + Dtype* data_R, const int filter_num, + const int location_num, const int output_num) { + int total = filter_num * location_num * output_num; + CUDA_KERNEL_LOOP(index, total) { + int p = index % location_num; + int n = (index / location_num) % filter_num; + int q = (index / location_num) / filter_num; + data_R[index] += data_A[q*location_num+p] * data_B[n*location_num+p]; + } +} + +template +void local_update1_gpu( + const Dtype* data_A, const Dtype* data_B, + Dtype* data_R, const int filter_num, + const int location_num, const int output_num) { + // data_A is output_num x location_num + // data_B is filter_num x location_num + // data_R is output_num x filter_num x location_num, + // the update performed is Rqnp += Aqp * Bnp + const int nthreads = filter_num * location_num * output_num; + + local_update1_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + data_A, data_B, data_R, filter_num, location_num, output_num); + CUDA_POST_KERNEL_CHECK; +} + +// Explicit instantiation +template void local_update1_gpu( + const float* data_A, const float* data_B, + float* data_R, const int filter_num, + const int location_num, const int output_num); +template void local_update1_gpu( + const double* data_A, const double* data_B, + double* data_R, const int filter_num, + const int location_num, const int output_num); + +template +__global__ void local_update2_gpu_kernel( + const Dtype* data_A, const Dtype* data_B, + Dtype* data_R, const int filter_num, + const int location_num, const int output_num) { + int total = filter_num * location_num; + CUDA_KERNEL_LOOP(index, total) { + int p = index % location_num; + int n = (index / location_num); + for (int q = 0; q < output_num; q++) { + data_R[index] += + data_A[q*location_num+p] * data_B[(q*filter_num+n)*location_num+p]; + } + } +} + +template +void local_update2_gpu(const Dtype* data_A, const Dtype* data_B, + Dtype* data_R, const int filter_num, + const int location_num, const int output_num) { + // data_A is output_num x location_num + // data_B is output_num x filter_num x location_num + // data_R is filter_num x location_num, + // the update performed is Rnp += \sum_q(Aqp * Bqnp) + int nthreads = filter_num * location_num; + + + local_update2_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + data_A, data_B, data_R, filter_num, + location_num, output_num); + CUDA_POST_KERNEL_CHECK; +} + +// Explicit instantiation +template void local_update2_gpu( + const float* data_A, const float* data_B, + float* data_R, const int filter_num, + const int location_num, const int output_num); +template void local_update2_gpu( + const double* data_A, const double* data_B, + double* data_R, const int filter_num, + const int location_num, const int output_num); + + + + +/// @brief refer to CPU forward -- the BLAS implementation is the same. +template +void LocalLayer::Forward_gpu(const vector*>& bottom, + const vector*>& top) { + + Dtype* x_data = col_buffer_.mutable_gpu_data(); + const Dtype* weight = this->blobs_[0]->gpu_data(); + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* top_data = top[0]->mutable_gpu_data(); + + Blob E; + E.Reshape(1, 1, 1, K_); + FillerParameter filler_param; + filler_param.set_value(1); + ConstantFiller filler(filler_param); + filler.Fill(&E); + + Blob intermediate; + intermediate.Reshape(1, 1, K_, N_); + for (int n = 0; n < num_; n++) { + im2col_gpu(bottom_data + bottom[0]->offset(n), channels_, height_, + width_, kernel_size_, kernel_size_, + pad_, pad_, stride_, stride_, + dilation_, dilation_, x_data); + + for (int m = 0; m < num_output_; m++) { + caffe_gpu_mul(K_*N_, x_data, weight+this->blobs_[0]->offset(m), + intermediate.mutable_gpu_data()); + + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, 1, N_, K_, + (Dtype)1., E.gpu_data(), intermediate.gpu_data(), + (Dtype)0., top_data + top[0]->offset(n, m)); + } + + if (bias_term_) { + caffe_gpu_add(M_ * N_, this->blobs_[1]->gpu_data(), + top_data + top[0]->offset(n), + top_data + top[0]->offset(n)); + } + } +} + +/// @brief refer to CPU backward -- the BLAS implementation is the same. +template +void LocalLayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + + const Dtype* top_diff = top[0]->gpu_diff(); + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); + Dtype* x_data = col_buffer_.mutable_gpu_data(); + Dtype* x_diff = col_buffer_.mutable_gpu_diff(); + const Dtype* weight = this->blobs_[0]->gpu_data(); + Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff(); + Dtype* bias_diff = NULL; + + Blob intermediate; + intermediate.Reshape(1, 1, 1, N_); + + Blob xt; + xt.Reshape(1, 1, K_, N_); + Dtype* xt_data = xt.mutable_gpu_data(); + if (bias_term_) { + bias_diff = this->blobs_[1]->mutable_gpu_diff(); + caffe_gpu_set(this->blobs_[1]->count(), Dtype(0.), bias_diff); + for (int n = 0; n < num_; ++n) { + caffe_gpu_add(M_ * N_, bias_diff, + top_diff + top[0]->offset(n), + bias_diff); + } + } + + Blob buf; + buf.Reshape(1, 1, K_, N_); + Dtype* buf_data = buf.mutable_gpu_data(); + caffe_gpu_set(this->blobs_[0]->count(), Dtype(0.), weight_diff); + for (int n = 0; n < num_; n++) { + im2col_gpu(bottom_data + bottom[0]->offset(n), channels_, height_, + width_, kernel_size_, kernel_size_, + pad_, pad_, stride_, stride_, + dilation_, dilation_, x_data); + + local_update1_gpu( + top_diff+top[0]->offset(n), x_data, + weight_diff, K_, N_, M_); + + if (propagate_down[0]) { + caffe_gpu_set(col_buffer_.count(), Dtype(0.), x_diff); + local_update2_gpu(top_diff+top[0]->offset(n), weight, x_diff, K_, N_, M_); + + // col2im back to the data + col2im_gpu(x_diff, channels_, height_, width_, kernel_size_, kernel_size_, + pad_, pad_, stride_, stride_, + dilation_, dilation_, bottom_diff + bottom[0]->offset(n)); + } + } +} + +INSTANTIATE_LAYER_GPU_FUNCS(LocalLayer); + +} // namespace caffe diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 019aa614373..a1ca42a399b 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -306,7 +306,7 @@ message ParamSpec { // NOTE // Update the next available ID when you add a new LayerParameter field. // -// LayerParameter next available layer-specific ID: 140 (last added: batch_norm_param) +// LayerParameter next available layer-specific ID: 141 (last added: local_param) message LayerParameter { optional string name = 1; // the layer name optional string type = 2; // the layer type @@ -391,6 +391,7 @@ message LayerParameter { optional ThresholdParameter threshold_param = 128; optional TileParameter tile_param = 138; optional WindowDataParameter window_data_param = 129; + optional LocalParameter local_param = 140; } // Message that stores parameters used to apply transformation @@ -428,7 +429,7 @@ message LossParameter { // Outputs that receive the ignore label will NOT be ignored in computing // the normalization factor. FULL = 0; - // Divide by the total number of output locations that do not take the + // Divide by the total number of output locations that do not take the // ignore_label. If ignore_label is not set, this behaves like FULL. VALID = 1; // Divide by the batch size. @@ -1254,3 +1255,14 @@ message PReLUParameter { // Whether or not slope paramters are shared across channels. optional bool channel_shared = 2 [default = false]; } + +// Message that stores parameters used by LocalLayer +message LocalParameter { + optional uint32 num_output = 1; // The number of outputs for the layer + optional bool bias_term = 2 [default = true]; // whether to have bias terms + optional uint32 pad = 3 [default = 0]; // The padding size + optional uint32 kernel_size = 4; // The kernel size + optional uint32 stride = 6 [default = 1]; // The stride + optional FillerParameter weight_filler = 7; // The filler for the weight + optional FillerParameter bias_filler = 8; // The filler for the bias +} diff --git a/src/caffe/test/test_local_layer.cpp b/src/caffe/test/test_local_layer.cpp new file mode 100644 index 00000000000..a1e3a8ea1f8 --- /dev/null +++ b/src/caffe/test/test_local_layer.cpp @@ -0,0 +1,121 @@ +#include +#include + +#include "gtest/gtest.h" + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/layers/local_layer.hpp" + +#include "caffe/test/test_caffe_main.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +namespace caffe { + +template +class LocalLayerTest: public MultiDeviceTest { + typedef typename TypeParam::Dtype Dtype; + protected: + LocalLayerTest() + : blob_bottom_(new Blob()), + blob_top_(new Blob()) {} + virtual void SetUp() { + blob_bottom_->Reshape(2, 3, 6, 4); + // fill the values + FillerParameter filler_param; + filler_param.set_value(1.); + GaussianFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + blob_bottom_vec_.push_back(blob_bottom_); + blob_top_vec_.push_back(blob_top_); + } + + virtual ~LocalLayerTest() { delete blob_bottom_; delete blob_top_; } + Blob* const blob_bottom_; + Blob* const blob_top_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +TYPED_TEST_CASE(LocalLayerTest, TestDtypesAndDevices); + +TYPED_TEST(LocalLayerTest, TestSetup) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + LocalParameter* convolution_param = + layer_param.mutable_local_param(); + convolution_param->set_kernel_size(3); + convolution_param->set_stride(2); + convolution_param->set_num_output(4); + shared_ptr > layer( + new LocalLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + EXPECT_EQ(this->blob_top_->num(), 2); + EXPECT_EQ(this->blob_top_->channels(), 4); + EXPECT_EQ(this->blob_top_->height(), 2); + EXPECT_EQ(this->blob_top_->width(), 1); + convolution_param->set_num_output(3); + layer.reset(new LocalLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + EXPECT_EQ(this->blob_top_->num(), 2); + EXPECT_EQ(this->blob_top_->channels(), 3); + EXPECT_EQ(this->blob_top_->height(), 2); + EXPECT_EQ(this->blob_top_->width(), 1); +} + + +TYPED_TEST(LocalLayerTest, TestSimpleConvolution) { + typedef typename TypeParam::Dtype Dtype; + // We will simply see if the convolution layer carries out averaging well. + FillerParameter filler_param; + filler_param.set_value(1.); + ConstantFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + LayerParameter layer_param; + LocalParameter* convolution_param = + layer_param.mutable_local_param(); + convolution_param->set_kernel_size(3); + convolution_param->set_stride(1); + convolution_param->set_num_output(1); + convolution_param->mutable_weight_filler()->set_type("test_local"); + convolution_param->mutable_weight_filler()->set_value(1); + convolution_param->mutable_bias_filler()->set_type("constant"); + convolution_param->mutable_bias_filler()->set_value(0.1); + shared_ptr > layer( + new LocalLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // After the convolution, the output should all have output values 27.1 + const Dtype* top_data = this->blob_top_->cpu_data(); + for (int n = 0; n < this->blob_top_->num(); n++) { + for (int k = 0; k < this->blob_top_->channels(); k++) { + for (int j = 0; j < this->blob_top_->height(); j++) { + for (int i = 0; i < this->blob_top_->width(); i++) { + int idx = j * this->blob_top_->width() + i; + EXPECT_NEAR(*(top_data + this->blob_top_->offset(n, k, j, i)), + idx * 27 + 0.1, 1e-4); + } + } + } + } +} + +TYPED_TEST(LocalLayerTest, TestGradient) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + LocalParameter* convolution_param = + layer_param.mutable_local_param(); + convolution_param->set_kernel_size(3); + convolution_param->set_stride(2); + convolution_param->set_num_output(2); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + convolution_param->mutable_bias_filler()->set_type("gaussian"); + LocalLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, + this->blob_bottom_vec_, + this->blob_top_vec_); +} + +} // namespace caffe