From 1bd0c798a4cef5e63b6ecd7ff19e7364cfb188fb Mon Sep 17 00:00:00 2001 From: Wangyida Date: Thu, 3 Dec 2015 11:08:24 +0800 Subject: [PATCH] Implement CNN Triplet training. --- examples/triplet/convert_triplet_data.cpp | 236 ++++++ examples/triplet/create_3d_triplet.sh | 27 + examples/triplet/multipie_triplet.prototxt | 110 +++ .../triplet/multipie_triplet_solver.prototxt | 25 + .../multipie_triplet_train_test.prototxt | 191 +++++ examples/triplet/objtrans.py | 18 + examples/triplet/pascal3d_triplet.prototxt | 295 ++++++++ .../triplet/pascal3d_triplet_solver.prototxt | 25 + .../pascal3d_triplet_train_test.prototxt | 335 +++++++++ examples/triplet/readme.md | 97 +++ examples/triplet/train_multipie_triplet.sh | 8 + examples/triplet/train_pascal_triplet.sh | 5 + include/caffe/layers/triplet_loss_layer.hpp | 78 ++ src/caffe/layers/triplet_loss_layer.cpp | 684 ++++++++++++++++++ src/caffe/layers/triplet_loss_layer.cu | 650 +++++++++++++++++ src/caffe/proto/caffe.proto | 12 +- src/caffe/test/test_triplet_loss_layer.cpp | 128 ++++ 17 files changed, 2923 insertions(+), 1 deletion(-) create mode 100644 examples/triplet/convert_triplet_data.cpp create mode 100644 examples/triplet/create_3d_triplet.sh create mode 100644 examples/triplet/multipie_triplet.prototxt create mode 100644 examples/triplet/multipie_triplet_solver.prototxt create mode 100644 examples/triplet/multipie_triplet_train_test.prototxt create mode 100644 examples/triplet/objtrans.py create mode 100644 examples/triplet/pascal3d_triplet.prototxt create mode 100644 examples/triplet/pascal3d_triplet_solver.prototxt create mode 100644 examples/triplet/pascal3d_triplet_train_test.prototxt create mode 100644 examples/triplet/readme.md create mode 100644 examples/triplet/train_multipie_triplet.sh create mode 100644 examples/triplet/train_pascal_triplet.sh create mode 100644 include/caffe/layers/triplet_loss_layer.hpp create mode 100644 src/caffe/layers/triplet_loss_layer.cpp create mode 100755 src/caffe/layers/triplet_loss_layer.cu create mode 100644 src/caffe/test/test_triplet_loss_layer.cpp diff --git a/examples/triplet/convert_triplet_data.cpp b/examples/triplet/convert_triplet_data.cpp new file mode 100644 index 00000000000..7eb62479b32 --- /dev/null +++ b/examples/triplet/convert_triplet_data.cpp @@ -0,0 +1,236 @@ +// Usage: +// convert_3d_data input_image_file input_label_file output_db_file +// Codes are disigned for binary files including data and label. You can modify +// the condition if information for arranging training data is not the same with +// category and pose of object. +#include // NOLINT(readability/streams) +#include +#include "caffe/proto/caffe.pb.h" +#include "caffe/util/math_functions.hpp" +#include "glog/logging.h" +#include "google/protobuf/text_format.h" +#ifdef USE_LEVELDB +#include "leveldb/db.h" +#include "math.h" +#include "stdint.h" + +uint32_t swap_endian(uint32_t val) { + val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF); + return (val << 16) | (val >> 16); +} + +void read_image(std::ifstream* image_file, std::ifstream* label_file, + uint32_t index, uint32_t rows, uint32_t cols, + char* pixels, char* label_temp, signed char* label, int rgb_use) { + if (rgb_use == 0) { + image_file->seekg(index * rows * cols + 16); + image_file->read(pixels, rows * cols); + label_file->seekg(index * 4 + 8); // 4 = 1 catory label+3 coordinate label + label_file->read(label_temp, 4); + for (int i = 0; i < 4; i++) + *(label+i) = (signed char)*(label_temp+i); + } else { + image_file->seekg(3 * index * rows * cols + 16); + image_file->read(pixels, 3 * rows * cols); + label_file->seekg(index * 4 + 8); // 4 = 1 catory label+3 coordinate label + label_file->read(label_temp, 4); + for (int i = 0; i < 4; i++) + *(label+i) = (signed char)*(label_temp+i); + } +} + +void convert_dataset(const char* image_filename, const char* label_filename, + const char* db_filename, + const char* class_number, const char* rgb_use) { + int rgb_use1 = atoi(rgb_use); + int class_num = atoi(class_number); + // Open files + std::ifstream image_file(image_filename, std::ios::in | std::ios::binary); + std::ifstream label_file(label_filename, std::ios::in | std::ios::binary); + CHECK(image_file) << "Unable to open file " << image_filename; + CHECK(label_file) << "Unable to open file " << label_filename; + // Read the magic and the meta data + uint32_t magic; + uint32_t num_items; + uint32_t num_labels; + uint32_t rows; + uint32_t cols; + + image_file.read(reinterpret_cast(&magic), 4); + magic = swap_endian(magic); + CHECK_EQ(magic, 2051) << "Incorrect image file magic."; + label_file.read(reinterpret_cast(&magic), 4); + magic = swap_endian(magic); + CHECK_EQ(magic, 2050) << "Incorrect label file magic."; + image_file.read(reinterpret_cast(&num_items), 4); + num_items = swap_endian(num_items); + label_file.read(reinterpret_cast(&num_labels), 4); + num_labels = swap_endian(num_labels); + CHECK_EQ(num_items, num_labels); + image_file.read(reinterpret_cast(&rows), 4); + rows = swap_endian(rows); + image_file.read(reinterpret_cast(&cols), 4); + cols = swap_endian(cols); + + // Open leveldb + leveldb::DB* db; + leveldb::Options options; + options.create_if_missing = true; + options.error_if_exists = true; + leveldb::Status status = leveldb::DB::Open( + options, db_filename, &db); + CHECK(status.ok()) << "Failed to open leveldb " << db_filename + << ". Is it already existing?"; + + char* label_temp = new char[4]; // label for unsigned char* + signed char* label_i = new signed char[4]; // label for triplet + signed char* label_j = new signed char[4]; + signed char* label_k = new signed char[4]; + signed char* label_l = new signed char[4]; // label for pair wise + signed char* label_m = new signed char[4]; + int db_size; + if (rgb_use1 == 0) + db_size = rows * cols; + else + db_size = 3 * rows * cols; + char* pixels1 = new char[db_size]; + char* pixels2 = new char[db_size]; + char* pixels3 = new char[db_size]; + char* pixels4 = new char[db_size]; + char* pixels5 = new char[db_size]; + const int kMaxKeyLength = 10; + char key[kMaxKeyLength]; + std::string value; + caffe::Datum datum; + if (rgb_use1 == 0) + datum.set_channels(1); + else + datum.set_channels(3); + datum.set_height(rows); + datum.set_width(cols); + LOG(INFO) << "A total of " << num_items << " items."; + LOG(INFO) << "Rows: " << rows << " Cols: " << cols; + int counter = 0; + // This codes selecting 1 positive sample and 3 negative samples for a triplet + // set. We randomly select data and decide whether concatenating data set to + // DB file according to labels. + for (unsigned int times = 0; times < 10; ++times) { + // iteration in the samples of all class + for (unsigned int itemid = 0; itemid < num_items/class_num; ++itemid) { + // iteration in the samples in one class + for (unsigned int class_ind = 0; class_ind < class_num; ++class_ind) { + // use reference sample one by one at each iteration + int i = itemid % num_items + class_ind*num_items/class_num; + int j = caffe::caffe_rng_rand() % num_items; // pick triplet groups + int k = caffe::caffe_rng_rand() % num_items; + int l = caffe::caffe_rng_rand() % num_items; // pick pair wise groups + int m = caffe::caffe_rng_rand() % num_items; + read_image(&image_file, &label_file, i, rows, cols, // read triplet + pixels1, label_temp, label_i, rgb_use1); + read_image(&image_file, &label_file, j, rows, cols, + pixels2, label_temp, label_j, rgb_use1); + read_image(&image_file, &label_file, k, rows, cols, + pixels3, label_temp, label_k, rgb_use1); + read_image(&image_file, &label_file, l, rows, cols, // read pair wise + pixels4, label_temp, label_l, rgb_use1); + read_image(&image_file, &label_file, m, rows, cols, + pixels5, label_temp, label_m, rgb_use1); + + bool pair_pass = false; + bool triplet1_pass = false; + bool triplet2_pass = false; + bool triplet3_class_same = false; + bool triplet3_pass = false; + + int ij_diff_x = static_cast(*(label_i+1)-*(label_j+1)); + int ij_diff_y = static_cast(*(label_i+2)-*(label_j+2)); + int ij_diff_z = static_cast(*(label_i+3)-*(label_j+3)); + int im_diff_x = static_cast(*(label_i+1)-*(label_m+1)); + int im_diff_y = static_cast(*(label_i+2)-*(label_m+2)); + int im_diff_z = static_cast(*(label_i+3)-*(label_m+3)); + + int ij_x = ij_diff_x*ij_diff_x; + int ij_y = ij_diff_y*ij_diff_y; + int ij_z = ij_diff_z*ij_diff_z; + int im_x = im_diff_x*im_diff_x; + int im_y = im_diff_y*im_diff_y; + int im_z = im_diff_z*im_diff_z; + + float dist_ij = std::sqrt(ij_x + ij_y + ij_z); + float dist_im = std::sqrt(im_x + im_y + im_z); + // Arrange training data according to conditionals including category + // and pose of synthetic data, dist_* could be ignored if you + // only concentrate on category. + if (*label_i == *label_j && dist_ij < 100/3 && dist_ij != 0) + pair_pass = true; + if (pair_pass && (*label_i != *label_k)) + triplet1_pass = true; + if (pair_pass && (*label_i != *label_l)) + triplet2_pass = true; + if (pair_pass && (*label_i == *label_m)) + triplet3_class_same = true; + if (triplet3_class_same && dist_im > 100/3) + triplet3_pass = true; + if (pair_pass && triplet1_pass && triplet2_pass && triplet3_pass) { + datum.set_data(pixels1, db_size); // set data + datum.set_label(static_cast(*label_i)); + datum.SerializeToString(&value); + snprintf(key, kMaxKeyLength, "%08d", counter); + db->Put(leveldb::WriteOptions(), std::string(key), value); + counter++; + datum.set_data(pixels2, db_size); // set data + datum.set_label(static_cast(*label_j)); + datum.SerializeToString(&value); + snprintf(key, kMaxKeyLength, "%08d", counter); + db->Put(leveldb::WriteOptions(), std::string(key), value); + counter++; + datum.set_data(pixels3, db_size); // set data + datum.set_label(static_cast(*label_k)); + datum.SerializeToString(&value); + snprintf(key, kMaxKeyLength, "%08d", counter); + db->Put(leveldb::WriteOptions(), std::string(key), value); + counter++; + datum.set_data(pixels4, db_size); // set data + datum.set_label(static_cast(*label_l)); + datum.SerializeToString(&value); + snprintf(key, kMaxKeyLength, "%08d", counter); + db->Put(leveldb::WriteOptions(), std::string(key), value); + counter++; + datum.set_data(pixels5, db_size); // set data + datum.set_label(static_cast(*label_m)); + datum.SerializeToString(&value); + snprintf(key, kMaxKeyLength, "%08d", counter); + db->Put(leveldb::WriteOptions(), std::string(key), value); + counter++; + } else { + class_ind--; + } + } // iteration in the samples of all class + } // iteration in the samples in one class + } // iteration in times + delete db; + delete pixels1; + delete pixels2; + delete pixels3; + delete pixels4; + delete pixels5; +} + +int main(int argc, char** argv) { + if (argc != 6) { + printf("This script converts the dataset to the leveldb format used\n" + "by caffe to train a triplet network.\n" + "Usage:\n" + " convert_3d_data input_image_file input_label_file " + "output_db_file class_number rgb_use \n"); + } else { + google::InitGoogleLogging(argv[0]); + convert_dataset(argv[1], argv[2], argv[3], argv[4], argv[5]); + } + return 0; +} +#else +int main(int argc, char** argv) { + LOG(FATAL) << "This example requires LevelDB; compile with USE_LEVELDB."; +} +#endif // USE_LEVELDB diff --git a/examples/triplet/create_3d_triplet.sh b/examples/triplet/create_3d_triplet.sh new file mode 100644 index 00000000000..376a38dc49f --- /dev/null +++ b/examples/triplet/create_3d_triplet.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env sh +# This script converts the mnist data into leveldb format. + +EXAMPLES=./build/examples/triplet +DATA=./data/linemod + +echo "Creating leveldb..." + +# this script taking data which consist of 6 categories to leveldb format for +# tripplet training. + +rm -rf ./examples/triplet/3d_triplet_train_leveldb +rm -rf ./examples/triplet/3d_triplet_test_leveldb + +$EXAMPLES/convert_3d_triplet_data.bin \ + $DATA/binary_image_train \ + $DATA/binary_label_train \ + ./examples/triplet/3d_triplet_train_leveldb \ + 6 \ + 0 +$EXAMPLES/convert_3d_triplet_data.bin \ + $DATA/binary_image_test \ + $DATA/binary_label_test \ + ./examples/triplet/3d_triplet_test_leveldb \ + 6 \ + 0 +echo "Done." diff --git a/examples/triplet/multipie_triplet.prototxt b/examples/triplet/multipie_triplet.prototxt new file mode 100644 index 00000000000..267ddc3475e --- /dev/null +++ b/examples/triplet/multipie_triplet.prototxt @@ -0,0 +1,110 @@ +name: "multipie_triplet" +input: "data" +input_dim: 1 +input_dim: 1 +input_dim: 75 +input_dim: 65 +layer { + name: "conv1" + type: "Convolution" + bottom: "data" + top: "conv1" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 16 + kernel_size: 8 + stride: 1 + } +} +layer { + name: "pool1" + type: "Pooling" + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "relu1" + type: "ReLU" + bottom: "pool1" + top: "pool1" +} +layer { + name: "conv2" + type: "Convolution" + bottom: "pool1" + top: "conv2" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 7 + kernel_size: 5 + stride: 1 + } +} +layer { + name: "pool2" + type: "Pooling" + bottom: "conv2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "relu2" + type: "ReLU" + bottom: "pool2" + top: "pool2" +} +layer { + name: "ip1" + type: "InnerProduct" + bottom: "pool2" + top: "ip1" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + inner_product_param { + num_output: 256 + } +} +layer { + name: "relu3" + type: "ReLU" + bottom: "ip1" + top: "ip1" +} +layer { + name: "feat" + type: "InnerProduct" + bottom: "ip1" + top: "feat" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + inner_product_param { + num_output: 150 + } +} diff --git a/examples/triplet/multipie_triplet_solver.prototxt b/examples/triplet/multipie_triplet_solver.prototxt new file mode 100644 index 00000000000..22ab32d6fe2 --- /dev/null +++ b/examples/triplet/multipie_triplet_solver.prototxt @@ -0,0 +1,25 @@ +# The train/test net protocol buffer definition +net: "examples/triplet/multipie_triplet_train_test.prototxt" +# test_iter specifies how many forward passes the test should carry out. +# In the case of face database, we have test batch size 250 and 250 test iterations: 50*(2+3)=250, +# +test_iter: 500 +# Carry out testing every 500 training iterations. +test_interval: 500 +# The base learning rate, momentum and the weight decay of the network. +base_lr: 0.001 +momentum: 0.9 +weight_decay: 0.0000 +# The learning rate policy +lr_policy: "inv" +gamma: 0.001 +power: 0.75 +# Display every 500 iterations +display: 500 +# The maximum number of iterations +max_iter: 500000 +# snapshot intermediate results +snapshot: 10000 +snapshot_prefix: "examples/triplet/multipie_triplet" +# solver mode: CPU or GPU +solver_mode: CPU diff --git a/examples/triplet/multipie_triplet_train_test.prototxt b/examples/triplet/multipie_triplet_train_test.prototxt new file mode 100644 index 00000000000..7f457ebc91d --- /dev/null +++ b/examples/triplet/multipie_triplet_train_test.prototxt @@ -0,0 +1,191 @@ +name: "multipie_triplet_train_test" +layer { + name: "data" + type: "ImageData" + top: "data" + top: "label" + include { + phase: TRAIN + } + transform_param { + scale: 0.00390625 + } + image_data_param { + source: "/Users/yidawang/Documents/MATLAB/TripletTest/Face/testlist.txt" + batch_size: 250 + } +} +layer { + name: "data" + type: "ImageData" + top: "data" + top: "label" + include { + phase: TEST + } + transform_param { + scale: 0.00390625 + } + image_data_param { + source: "/Users/yidawang/Documents/MATLAB/TripletTest/Face/testlist.txt" + batch_size: 250 + } +} +layer { + name: "conv1" + type: "Convolution" + bottom: "data" + top: "conv1" + param { + name: "conv1_w" + lr_mult: 1 + } + param { + name: "conv1_b" + lr_mult: 2 + } + convolution_param { + num_output: 16 + kernel_size: 8 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "pool1" + type: "Pooling" + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "relu1" + type: "ReLU" + bottom: "pool1" + top: "pool1" +} +layer { + name: "conv2" + type: "Convolution" + bottom: "pool1" + top: "conv2" + param { + name: "conv2_w" + lr_mult: 1 + } + param { + name: "conv2_b" + lr_mult: 2 + } + convolution_param { + num_output: 7 + kernel_size: 5 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "pool2" + type: "Pooling" + bottom: "conv2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "relu2" + type: "ReLU" + bottom: "pool2" + top: "pool2" +} +layer { + name: "ip1" + type: "InnerProduct" + bottom: "pool2" + top: "ip1" + param { + name: "ip1_w" + lr_mult: 1 + } + param { + name: "ip1_b" + lr_mult: 2 + } + inner_product_param { + num_output: 256 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu3" + type: "ReLU" + bottom: "ip1" + top: "ip1" +} +layer { + name: "feat" + type: "InnerProduct" + bottom: "ip1" + top: "feat" + param { + name: "feat_w" + lr_mult: 1 + } + param { + name: "feat_b" + lr_mult: 2 + } + inner_product_param { + num_output: 150 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "accuracy" + type: "Accuracy" + bottom: "feat" + bottom: "label" + top: "accuracy" + include { + phase: TEST + } +} +layer { + name: "loss" + type: "TripletLoss" + bottom: "feat" + bottom: "label" + top: "loss" + triplet_loss_param { + margin: 1 + losstype: 0 + num_negatives: 3 + } +} diff --git a/examples/triplet/objtrans.py b/examples/triplet/objtrans.py new file mode 100644 index 00000000000..5de5268b725 --- /dev/null +++ b/examples/triplet/objtrans.py @@ -0,0 +1,18 @@ +import bpy +for i in range(1, 51): + override = {'selected_bases': list(bpy.context.scene.object_bases)} + bpy.ops.object.delete(override) + dir_from='/Users/yidawang/Downloads/temp/motorbike/'+str(i)+'/'+str(i)+'.obj' + dir_to='/Users/yidawang/Downloads/collection/motorbike/'+str(i)+'/'+str(i)+'.obj' + glob_type=str(i)+'.obj;'+str(i)+'.mtl' + scene = bpy.context.scene + lamp_data = bpy.data.lamps.new(name="New Lamp", type='HEMI') + lamp_object = bpy.data.objects.new(name="New Lamp", object_data=lamp_data) + scene.objects.link(lamp_object) + lamp_object.location = (0, 0, 1) + lamp_object.select = True + scene.objects.active = lamp_object + bpy.ops.import_scene.obj(filepath=dir_from,filter_glob=glob_type) + bpy.ops.object.shade_flat() + bpy.ops.export_scene.obj(filepath=dir_to,filter_glob=glob_type) + diff --git a/examples/triplet/pascal3d_triplet.prototxt b/examples/triplet/pascal3d_triplet.prototxt new file mode 100644 index 00000000000..7163791faaf --- /dev/null +++ b/examples/triplet/pascal3d_triplet.prototxt @@ -0,0 +1,295 @@ +name: "pascal_triplet" +input: "data" +input_dim: 1 +input_dim: 3 +input_dim: 227 +input_dim: 227 +layer { + name: "conv1" + type: "Convolution" + bottom: "data" + top: "conv1" + param { + name: "conv1_w" + lr_mult: 1 + } + param { + name: "conv1_b" + lr_mult: 2 + } + convolution_param { + num_output: 16 + kernel_size: 7 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "bn1" + type: "BatchNorm" + bottom: "conv1" + top: "bn1" + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } +} +layer { + name: "relu1" + type: "ReLU" + bottom: "bn1" + top: "bn1" +} +layer { + name: "pool1" + type: "Pooling" + bottom: "bn1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv2" + type: "Convolution" + bottom: "pool1" + top: "conv2" + param { + name: "conv2_w" + lr_mult: 1 + } + param { + name: "conv2_b" + lr_mult: 2 + } + convolution_param { + num_output: 30 + kernel_size: 5 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "bn2" + type: "BatchNorm" + bottom: "conv2" + top: "bn2" + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } +} +layer { + name: "relu2" + type: "ReLU" + bottom: "bn2" + top: "bn2" +} +layer { + name: "pool2" + type: "Pooling" + bottom: "bn2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv3" + type: "Convolution" + bottom: "pool2" + top: "conv3" + param { + name: "conv3_w" + lr_mult: 1 + } + param { + name: "conv3_b" + lr_mult: 2 + } + convolution_param { + num_output: 28 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "bn3" + type: "BatchNorm" + bottom: "conv3" + top: "bn3" + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } +} +layer { + name: "relu3" + type: "ReLU" + bottom: "bn3" + top: "bn3" +} +layer { + name: "pool3" + type: "Pooling" + bottom: "bn3" + top: "pool3" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv4" + type: "Convolution" + bottom: "pool3" + top: "conv4" + param { + name: "conv4_w" + lr_mult: 1 + } + param { + name: "conv4_b" + lr_mult: 2 + } + convolution_param { + num_output: 14 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "bn4" + type: "BatchNorm" + bottom: "conv4" + top: "bn4" + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } +} +layer { + name: "relu4" + type: "ReLU" + bottom: "bn4" + top: "bn4" +} +layer { + name: "pool4" + type: "Pooling" + bottom: "bn4" + top: "pool4" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "ip1" + type: "InnerProduct" + bottom: "pool4" + top: "ip1" + param { + name: "ip1_w" + lr_mult: 1 + } + param { + name: "ip1_b" + lr_mult: 2 + } + inner_product_param { + num_output: 512 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5" + type: "ReLU" + bottom: "ip1" + top: "ip1" +} +layer { + name: "drop1" + type: "Dropout" + bottom: "ip1" + top: "ip1" + dropout_param { + dropout_ratio: 0.5 + } +} +layer { + name: "feat" + type: "InnerProduct" + bottom: "ip1" + top: "feat" + param { + name: "feat_w" + lr_mult: 1 + } + param { + name: "feat_b" + lr_mult: 2 + } + inner_product_param { + num_output: 100 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} diff --git a/examples/triplet/pascal3d_triplet_solver.prototxt b/examples/triplet/pascal3d_triplet_solver.prototxt new file mode 100644 index 00000000000..7e0789cf23b --- /dev/null +++ b/examples/triplet/pascal3d_triplet_solver.prototxt @@ -0,0 +1,25 @@ +# The train/test net protocol buffer definition +net: "examples/triplet/pascal3d_triplet_train_test.prototxt" +# test_iter specifies how many forward passes the test should carry out. +# In the case of face database, we have test batch size 250 and 250 test iterations: 50*(2+3)=250, +# +test_iter: 500 +# Carry out testing every 500 training iterations. +test_interval: 500 +# The base learning rate, momentum and the weight decay of the network. +base_lr: 0.001 +momentum: 0.9 +weight_decay: 0.0000 +# The learning rate policy +lr_policy: "inv" +gamma: 0.001 +power: 0.75 +# Display every 500 iterations +display: 100 +# The maximum number of iterations +max_iter: 50000 +# snapshot intermediate results +snapshot: 500 +snapshot_prefix: "examples/triplet/pascal_triplet" +# solver mode: CPU or GPU +solver_mode: CPU diff --git a/examples/triplet/pascal3d_triplet_train_test.prototxt b/examples/triplet/pascal3d_triplet_train_test.prototxt new file mode 100644 index 00000000000..26010099b23 --- /dev/null +++ b/examples/triplet/pascal3d_triplet_train_test.prototxt @@ -0,0 +1,335 @@ +name: "pascal_triplet_train_test" +layer { + name: "data" + type: "ImageData" + top: "data" + top: "label" + include { + phase: TRAIN + } + transform_param { + scale: 0.00390625 + } + image_data_param { + source: "/Users/yidawang/Documents/MATLAB/TripletTest/Face/testlist_regular.txt" + batch_size: 250 + } +} +layer { + name: "data" + type: "ImageData" + top: "data" + top: "label" + include { + phase: TEST + } + transform_param { + scale: 0.00390625 + } + image_data_param { + source: "/Users/yidawang/Documents/MATLAB/TripletTest/Face/testlist_regular.txt" + batch_size: 25 + } +} + +layer { + name: "conv1" + type: "Convolution" + bottom: "data" + top: "conv1" + param { + name: "conv1_w" + lr_mult: 1 + } + param { + name: "conv1_b" + lr_mult: 2 + } + convolution_param { + num_output: 16 + kernel_size: 7 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "bn1" + type: "BatchNorm" + bottom: "conv1" + top: "bn1" + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } +} +layer { + name: "relu1" + type: "ReLU" + bottom: "bn1" + top: "bn1" +} +layer { + name: "pool1" + type: "Pooling" + bottom: "bn1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv2" + type: "Convolution" + bottom: "pool1" + top: "conv2" + param { + name: "conv2_w" + lr_mult: 1 + } + param { + name: "conv2_b" + lr_mult: 2 + } + convolution_param { + num_output: 30 + kernel_size: 5 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "bn2" + type: "BatchNorm" + bottom: "conv2" + top: "bn2" + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } +} +layer { + name: "relu2" + type: "ReLU" + bottom: "bn2" + top: "bn2" +} +layer { + name: "pool2" + type: "Pooling" + bottom: "bn2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv3" + type: "Convolution" + bottom: "pool2" + top: "conv3" + param { + name: "conv3_w" + lr_mult: 1 + } + param { + name: "conv3_b" + lr_mult: 2 + } + convolution_param { + num_output: 28 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "bn3" + type: "BatchNorm" + bottom: "conv3" + top: "bn3" + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } +} +layer { + name: "relu3" + type: "ReLU" + bottom: "bn3" + top: "bn3" +} +layer { + name: "pool3" + type: "Pooling" + bottom: "bn3" + top: "pool3" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv4" + type: "Convolution" + bottom: "pool3" + top: "conv4" + param { + name: "conv4_w" + lr_mult: 1 + } + param { + name: "conv4_b" + lr_mult: 2 + } + convolution_param { + num_output: 14 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "bn4" + type: "BatchNorm" + bottom: "conv4" + top: "bn4" + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } +} +layer { + name: "relu4" + type: "ReLU" + bottom: "bn4" + top: "bn4" +} +layer { + name: "pool4" + type: "Pooling" + bottom: "bn4" + top: "pool4" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "ip1" + type: "InnerProduct" + bottom: "pool4" + top: "ip1" + param { + name: "ip1_w" + lr_mult: 1 + } + param { + name: "ip1_b" + lr_mult: 2 + } + inner_product_param { + num_output: 512 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5" + type: "ReLU" + bottom: "ip1" + top: "ip1" +} +layer { + name: "drop1" + type: "Dropout" + bottom: "ip1" + top: "ip1" + dropout_param { + dropout_ratio: 0.5 + } +} +layer { + name: "feat" + type: "InnerProduct" + bottom: "ip1" + top: "feat" + param { + name: "feat_w" + lr_mult: 1 + } + param { + name: "feat_b" + lr_mult: 2 + } + inner_product_param { + num_output: 100 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "loss" + type: "TripletLoss" + bottom: "feat" + bottom: "label" + top: "loss" + triplet_loss_param { + margin: 0.2 + losstype: 2 + num_negatives: 3 + } +} \ No newline at end of file diff --git a/examples/triplet/readme.md b/examples/triplet/readme.md new file mode 100644 index 00000000000..8ed0bcf7819 --- /dev/null +++ b/examples/triplet/readme.md @@ -0,0 +1,97 @@ +--- +title: Triplet and Pair Wise Network Tutorial +description: Train and test a triplet network on data generated by 3D model from PASCAL3D+ or faces in MultiPIE. +category: example +include_in_docs: true +layout: default +priority: 100 +--- + +# Triplet Network Training with Caffe +This example shows how you can use weight sharing and a triplet loss +function to learn a model using a triplet network in Caffe. + +We will assume that you have caffe successfully compiled. If not, please refer +to the [Installation page](../../installation.html). + +## Training Data Preparation + +For reason that the triplet loss is widely used in multitask training where training samples +are labeled regarding to different aspects, labels of samples are not necessary because +only sequence of training samples matters for triplet loss(such as r1, p1, n1,...,rN ,pN, nN). +1 reference samples and 1 positive sample is fixed in a triplet set, and the +number of negative samples could be set free. So you can use a file list for training +and testing samples. The triplet loss layer could be difined as below: + +layer { + name: "loss" + type: "TripletLoss" + bottom: "feat" + bottom: "label" + top: "loss" + triplet_loss_param { + margin: 0.2 + losstype: 2 + num_negatives: 3 + } +} + +where the num_negatives is the number of negative sampels in 1 triplet set, +this set uses 5 samples in total, so the over all training samples must be multiples of 5. + +## Introduction to the convert_triplet_data tool + +If training based on DB files is needed, here I attach a tool used for converting +binary files including data and label to levelDB database. Triplet loss is based on +particular sequence of training data, so the labels of each sample is used for the +arrangement of training data. This codes convert a set of binary synthetic data +and label(catogory and pose) to levelDB files and arrange them as triplet set consist +of 1 positive sample and 3 negative samples. + +You should modify label reading method according to binaryfile in `read_image` function +and conditionals in `convert_dataset` function. + +## Models +First, we will define the model that we want to train using the triplet network. +We will use the convolutional net defined in +`./examples/triplet/pascal3d_triplet.prototxt`. + +## Define the triplet Network + +In this section we will define the triplet network used for training. The +resulting network is defined in +`./examples/triplet/pascal3d_triplet_train_test.prototxt`. + +### Adding the Triplet Loss Function + +To train the network we will optimize a triplet loss function proposed in: +This cost function is implemented with the `TRIPLET_LOSS` layer, +the num_negatives could be set free: + + +layer { + name: "loss" + type: "TripletLoss" + bottom: "feat" + bottom: "sim" + top: "loss" + triplet_loss_param { + margin: 1 + losstype: 0 + num_negatives: 3 + } +} + +## Define the Solver + +Nothing special needs to be done to the solver besides pointing it at the +correct model file. The solver is defined in +`./examples/triplet/pascal3d_triplet_solver.prototxt`. + +## Training and Testing the Model + +Training the model is simple after you have written the network definition +protobuf and solver protobuf files. Simply run +`./examples/triplet/train_pascal3d_triplet.sh`: + + ./examples/triplet/train_pascal3d_triplet.sh diff --git a/examples/triplet/train_multipie_triplet.sh b/examples/triplet/train_multipie_triplet.sh new file mode 100644 index 00000000000..6c73c2989ff --- /dev/null +++ b/examples/triplet/train_multipie_triplet.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env sh +# This script training in MULTIPIE database which takes 1 positive sample and 3 +# negative samples as training data set, the negative samples are ones which are +# different from reference sample. + +TOOLS=./build/tools + +$TOOLS/caffe train --solver=examples/triplet/multipie_triplet_solver.prototxt \ No newline at end of file diff --git a/examples/triplet/train_pascal_triplet.sh b/examples/triplet/train_pascal_triplet.sh new file mode 100644 index 00000000000..c5924dab79a --- /dev/null +++ b/examples/triplet/train_pascal_triplet.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env sh + +TOOLS=./build/tools + +$TOOLS/caffe train --solver=examples/triplet/pascal3d_triplet_solver.prototxt \ No newline at end of file diff --git a/include/caffe/layers/triplet_loss_layer.hpp b/include/caffe/layers/triplet_loss_layer.hpp new file mode 100644 index 00000000000..9690a8a1154 --- /dev/null +++ b/include/caffe/layers/triplet_loss_layer.hpp @@ -0,0 +1,78 @@ +#ifndef CAFFE_TRIPLET_LOSS_LAYER_HPP_ +#define CAFFE_TRIPLET_LOSS_LAYER_HPP_ +#include +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/layers/loss_layer.hpp" +#include "caffe/proto/caffe.pb.h" +namespace caffe { + +/** + * @brief Compute triplet loss with multiple negative samples. + */ +template + class TripletLossLayer : public LossLayer { + public: + explicit TripletLossLayer(const LayerParameter& param) + : LossLayer(param), diff_() {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual inline int ExactNumBottomBlobs() const { return 2; } + virtual inline const char* type() const { return "TripletLoss"; } + virtual inline bool AllowForceBackward(const int bottom_index) const { + return bottom_index != 1; + } + + protected: + /// @copydoc TripletLossLayer + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + /** + * There are 3 types of triplet loss which are: + * L_0(x_a, x_p, x_n) = max(0, m - || x_a - x_n ||_2^2 + || x_a - x_p ||_2^2) + * used in FaceNet: A Unified Embedding for Face Recognition and Clustering + * L_1(x_a, x_p, x_n) = max(0, 1 - || x_a - x_n ||_2^2 / (|| x_a - x_p ||_2^2 + m)) + * used in Learning Descriptors for Object Recognition and 3D Pose Estimation + * L_2(x_a, x_p, x_n) = max(0, 1 - exp(|| x_a - x_n ||_2^2) / (exp(|| x_a - x_p ||_2^2) + m)) + * used in Learning Descriptors for Object Recognition and 3D Pose Estimation + */ + + /** One of the partial deriviation is: + * \begin{eqnarray} + * \mathcal{L}_{tri}(s_i,s_j,s_k) = max(0,1-\frac{||f(x_i)-f(x_k)||_2^2}{||f(x_i)-f(x_j)||_2^2+m}) + * \end{eqnarray}® + * where $ f(x) $ is the input of the loss layer for sample $ x $ and m is the margin for triplet. + * Denote that $D_{ij}=||f(x_i)-f(x_j)||_2^2$ and $D_{ik}=||f(x_i)-f(x_k)||_2^2$, + * so the partial differential equations for the input of triplet loss layer are: + * \begin{eqnarray} + * \dfrac{\partial \mathcal{L}_{tri}}{\partial f(x_i)}= + * &\frac{D_{ik}(f(x_i)-f(x_j))-(D_{ij}+m)(f(x_i)-f(x_k))}{(D_{ij}+m)^2} \nonumber \\ + * \dfrac{\partial \mathcal{L}_{tri}}{\partial f(x_j)}= + * &\frac{D_{ik}(f(x_j)-f(x_i))}{(D_{ij}+m)^2} \nonumber \\ + * \dfrac{\partial \mathcal{L}_{tri}}{\partial f(x_k)}= + * &\frac{f(x_i)-f(x_k)}{D_{ij}+m} + * \end{eqnarray}® + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, + const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, + const vector*>& bottom); + Blob diff_; // cached for backward pass + Blob diff_pos; + Blob diff_neg; + Blob dist_sq_; // cached for backward pass + Blob dist_sq_pos; + Blob dist_sq_neg; + Blob diff_sq_; // tmp storage for gpu forward pass + Blob diff_sq_pos; + Blob diff_sq_neg; + Blob summer_vec_; // tmp storage for gpu forward pass + }; + +} // namespace caffe + +#endif // CAFFE_TRIPLET_LOSS_LAYER_HPP_ diff --git a/src/caffe/layers/triplet_loss_layer.cpp b/src/caffe/layers/triplet_loss_layer.cpp new file mode 100644 index 00000000000..8fc6fc6bd5e --- /dev/null +++ b/src/caffe/layers/triplet_loss_layer.cpp @@ -0,0 +1,684 @@ +#include +#include +#include "caffe/layer.hpp" +#include "caffe/layers/triplet_loss_layer.hpp" +#include "caffe/util/io.hpp" +#include "caffe/util/math_functions.hpp" +namespace caffe { +template +void TripletLossLayer::LayerSetUp( + const vector*>& bottom, const vector*>& top) { + LossLayer::LayerSetUp(bottom, top); + // number of triplet in a batch + int num_negatives = this->layer_param_.triplet_loss_param().num_negatives(); + // dimension of each descriptor + int dim = bottom[0]->count()/bottom[0]->num(); + CHECK_EQ(bottom[0]->channels(), dim); + CHECK_EQ(bottom[0]->height(), 1); + CHECK_EQ(bottom[0]->width(), 1); + CHECK_EQ(bottom[1]->channels(), 1); + CHECK_EQ(bottom[1]->height(), 1); + CHECK_EQ(bottom[1]->width(), 1); + // In each set, we have: + // the descriptor of reference sample, closest sample, and negative samples + // number of sets in the whole batch + int num_set = bottom[0]->num()/(2 + num_negatives); + dist_sq_.Reshape(num_set, 1, 1, 1); + diff_pos.Reshape(num_set, dim, 1, 1); + dist_sq_pos.Reshape(num_set, 1, 1, 1); + diff_neg.Reshape(num_set, dim, 1, 1); + dist_sq_neg.Reshape(num_set, 1, 1, 1); + // vector of ones used to sum along channels + summer_vec_.Reshape(bottom[0]->channels(), 1, 1, 1); + for (int i = 0; i < bottom[0]->channels(); ++i) + summer_vec_.mutable_cpu_data()[i] = Dtype(1); +} +template +void TripletLossLayer::Forward_cpu( + const vector*>& bottom, + const vector*>& top) { + Dtype margin = this->layer_param_.triplet_loss_param().margin(); + Dtype losstype = this->layer_param_.triplet_loss_param().losstype(); + int num_negatives = this->layer_param_.triplet_loss_param().num_negatives(); + int use_pair = this->layer_param_.triplet_loss_param().use_pair(); + CHECK_EQ(bottom[0]->num()%(2 + num_negatives), 0); + Dtype loss(0.0); + int dim = bottom[0]->count()/bottom[0]->num(); + int num_set = bottom[0]->num()/(2 + num_negatives); + if (losstype == 0) { + for (int i = 0; i < num_set; ++i) { + caffe_sub( + dim, + bottom[0]->cpu_data() + + (2 + num_negatives)*i*dim, // reference + bottom[0]->cpu_data() + + ((2 + num_negatives)*i + 1)*dim, // positive + diff_pos.mutable_cpu_data() + i*dim); // reference-pose_close + // Loss component calculated from reference and close one + dist_sq_pos.mutable_cpu_data()[i] = + caffe_cpu_dot(dim, + diff_pos.cpu_data() + i*dim, + diff_pos.cpu_data() + i*dim); + // a b is a similar pair for pair wise + // loss accumulated by the pair wise part + if (use_pair == 1) { + loss += dist_sq_pos.cpu_data()[i]; + } + for (int triplet = 0; triplet < num_negatives; ++triplet) { + // Triplet loss accumulation + // a and negative[triplet] is a similar pair for triplet + dist_sq_.mutable_cpu_data()[i] = dist_sq_pos.cpu_data()[i]; + // Loss component calculated from negative part + caffe_sub( + dim, + bottom[0]->cpu_data() + + (2 + num_negatives)*i*dim, // reference + bottom[0]->cpu_data() + + ((2 + num_negatives)*i + 2 + triplet)*dim, + diff_neg.mutable_cpu_data() + i*dim); // reference-negative + dist_sq_neg.mutable_cpu_data()[i] = + caffe_cpu_dot(dim, + diff_neg.cpu_data() + i*dim, + diff_neg.cpu_data() + i*dim); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[i] -= dist_sq_neg.cpu_data()[i]; + // loss accumulated accumulated by the triplet part + loss += std::max(margin + dist_sq_.cpu_data()[i], Dtype(0.0)); + } + } + loss = loss / static_cast(num_set) / Dtype(2); + top[0]->mutable_cpu_data()[0] = loss; + } else if (losstype == 1) { + for (int i = 0; i < num_set; ++i) { + caffe_sub( + dim, + bottom[0]->cpu_data() + + (2 + num_negatives)*i*dim, // reference + bottom[0]->cpu_data() + + ((2 + num_negatives)*i + 1)*dim, // positive + diff_pos.mutable_cpu_data() + i*dim); // reference-pose_close + // Loss component calculated from reference and close one + dist_sq_pos.mutable_cpu_data()[i] = + caffe_cpu_dot(dim, + diff_pos.cpu_data() + i*dim, + diff_pos.cpu_data() + i*dim); + // a b is a similar pair for pair wise + // loss accumulated by the pair wise part + if (use_pair == 1) { + loss += dist_sq_pos.cpu_data()[i]; + } + for (int triplet = 0; triplet < num_negatives; ++triplet) { + dist_sq_.mutable_cpu_data()[i] = dist_sq_pos.cpu_data()[i]; + dist_sq_.mutable_cpu_data()[i] += margin; + // Loss component calculated from negative part + caffe_sub( + dim, + bottom[0]->cpu_data() + + (2 + num_negatives)*i*dim, // reference + bottom[0]->cpu_data() + + ((2 + num_negatives)*i + 2 + triplet)*dim, + diff_neg.mutable_cpu_data() + i*dim); // reference-negative + dist_sq_neg.mutable_cpu_data()[i] = + caffe_cpu_dot(dim, + diff_neg.cpu_data() + i*dim, + diff_neg.cpu_data() + i*dim); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[i] = 1 - \ + dist_sq_neg.cpu_data()[i] / dist_sq_.cpu_data()[i]; + // loss accumulated accumulated by the triplet part + loss += std::max(dist_sq_.cpu_data()[i], Dtype(0.0)); + } + } + loss = loss / static_cast(num_set) / Dtype(2); + top[0]->mutable_cpu_data()[0] = loss; + } else if (losstype == 2) { + for (int i = 0; i < num_set; ++i) { + caffe_sub( + dim, + bottom[0]->cpu_data() + + (2 + num_negatives)*i*dim, // reference + bottom[0]->cpu_data() + + ((2 + num_negatives)*i + 1)*dim, // positive + diff_pos.mutable_cpu_data() + i*dim); // reference-pose_close + // Loss component calculated from reference and close one + dist_sq_pos.mutable_cpu_data()[i] = + caffe_cpu_dot(dim, + diff_pos.cpu_data() + i*dim, + diff_pos.cpu_data() + i*dim); + // a b is a similar pair for pair wise + // loss accumulated by the pair wise part + if (use_pair == 1) { + loss += dist_sq_pos.cpu_data()[i]; + } + for (int triplet = 0; triplet < num_negatives; ++triplet) { + dist_sq_.mutable_cpu_data()[i] = exp(dist_sq_pos.cpu_data()[i]); + dist_sq_.mutable_cpu_data()[i] += margin; + // Loss component calculated from negative part + caffe_sub( + dim, + bottom[0]->cpu_data() + + (2 + num_negatives)*i*dim, // reference + bottom[0]->cpu_data() + + ((2 + num_negatives)*i + 2 + triplet)*dim, + diff_neg.mutable_cpu_data() + i*dim); // reference-negative + dist_sq_neg.mutable_cpu_data()[i] = + caffe_cpu_dot(dim, + diff_neg.cpu_data() + i*dim, + diff_neg.cpu_data() + i*dim); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[i] = 1 - \ + exp(dist_sq_neg.cpu_data()[i]) / dist_sq_.cpu_data()[i]; + // loss accumulated accumulated by the triplet part + loss += std::max(dist_sq_.cpu_data()[i], Dtype(0.0)); + } + } + loss = loss / static_cast(num_set) / Dtype(2); + top[0]->mutable_cpu_data()[0] = loss; + } +} + +template +void TripletLossLayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, + const vector*>& bottom) { + Dtype margin = this->layer_param_.triplet_loss_param().margin(); + Dtype losstype = this->layer_param_.triplet_loss_param().losstype(); + int num_negatives = this->layer_param_.triplet_loss_param().num_negatives(); + int use_pair = this->layer_param_.triplet_loss_param().use_pair(); + int dim = bottom[0]->count()/bottom[0]->num(); + int num_set = bottom[0]->num()/(2 + num_negatives); + if (losstype == 0) { + // BP for feat1(extracted from reference) + for (int i = 0; i < 1; ++i) { + if (propagate_down[0]) { + const Dtype sign = 1; + const Dtype alpha = sign * top[0]->cpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_cpu_diff(); + // the pair part + if (use_pair == 1) { + caffe_cpu_axpby( + dim, + alpha, + diff_pos.cpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } else { + caffe_cpu_axpby( + dim, + Dtype(0.0), + diff_pos.cpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } + // the num_negatives triplet part + for (int triplet = 0; triplet < num_negatives; ++triplet) { + caffe_sub( + dim, + bottom[0]->cpu_data() + + (2 + num_negatives)*j*dim, // reference + bottom[0]->cpu_data() + + ((2 + num_negatives)*j + 2 + triplet)*dim, + diff_neg.mutable_cpu_data() + + j*dim); // reference-negative + // Triplet loss accumulation + // a and negative[triplet] is a similar pair for triplet + dist_sq_.mutable_cpu_data()[j] = dist_sq_pos.cpu_data()[j]; + dist_sq_neg.mutable_cpu_data()[j] = + caffe_cpu_dot(dim, + diff_neg.cpu_data() + j*dim, + diff_neg.cpu_data() + j*dim); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[j] -= dist_sq_neg.cpu_data()[j]; + // Loss component calculated from negative part + if ((margin + dist_sq_.cpu_data()[j]) > Dtype(0.0)) { + // similar pair in triplet + caffe_cpu_axpby( + dim, + alpha, + diff_pos.cpu_data() + (j*dim), + Dtype(1.0), + bout + (2 + num_negatives)*j*dim); + // dissimilar pair in triplet + caffe_cpu_axpby( + dim, + -alpha, + diff_neg.cpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_negatives)*j + i)*dim); + } + } + } + } + } + // BP for feat2(extracted from the closest sample) + for (int i = 1; i < 2; ++i) { + if (propagate_down[0]) { + const Dtype sign = -1; + const Dtype alpha = sign * top[0]->cpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_cpu_diff(); + // the pair part + if (use_pair == 1) { + caffe_cpu_axpby( + dim, + alpha, + diff_pos.cpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } else { + caffe_cpu_axpby( + dim, + Dtype(0.0), + diff_pos.cpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } + // the num_negatives triplet part + for (int triplet = 0; triplet < num_negatives; ++triplet) { + caffe_sub( + dim, + bottom[0]->cpu_data() + + (2 + num_negatives)*j*dim, // reference + bottom[0]->cpu_data() + + ((2 + num_negatives)*j + 2 + triplet)*dim, + diff_neg.mutable_cpu_data() + + j*dim); // reference-negative + // Triplet loss accumulation + // a and negative[triplet] is a similar pair for triplet + dist_sq_.mutable_cpu_data()[j] = dist_sq_pos.cpu_data()[j]; + dist_sq_neg.mutable_cpu_data()[j] = + caffe_cpu_dot(dim, + diff_neg.cpu_data() + j*dim, + diff_neg.cpu_data() + j*dim); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[j] -= dist_sq_neg.cpu_data()[j]; + if ((margin + dist_sq_.cpu_data()[j]) > Dtype(0.0)) { + // similar pair in triplet + caffe_cpu_axpby( + dim, + alpha, + diff_pos.cpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_negatives)*j + i)*dim); + } + } + } + } + } + // BP for negative feature used in the num_negatives triplet part + for (int i = 2; i < 2 + num_negatives; ++i) { + if (propagate_down[0]) { + const Dtype sign = 1; + const Dtype alpha = sign * top[0]->cpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_cpu_diff(); + caffe_sub( + dim, + bottom[0]->cpu_data() + + (2 + num_negatives)*j*dim, // reference + bottom[0]->cpu_data() + + ((2 + num_negatives)*j + i)*dim, + diff_neg.mutable_cpu_data() + j*dim); // reference-negative + // Triplet loss accumulation + // a and negative[triplet] is a similar pair for triplet + dist_sq_.mutable_cpu_data()[j] = dist_sq_pos.cpu_data()[j]; + dist_sq_neg.mutable_cpu_data()[j] = + caffe_cpu_dot(dim, + diff_neg.cpu_data() + j*dim, + diff_neg.cpu_data() + j*dim); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[j] -= dist_sq_neg.cpu_data()[j]; + if ((margin + dist_sq_.cpu_data()[j]) > Dtype(0.0)) { + // dissimilar pairs + caffe_cpu_axpby( + dim, + alpha, + diff_neg.cpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } else { + caffe_set(dim, Dtype(0), bout + ((2 + num_negatives)*j + i)*dim); + } + } + } + } + } else if (losstype == 1) { + for (int i = 0; i < 1; ++i) { + // BP for data1(feat1) + if (propagate_down[0]) { + const Dtype sign = 1; + const Dtype alpha = sign * top[0]->cpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_cpu_diff(); + // the pair part + if (use_pair == 1) { + caffe_cpu_axpby( + dim, + alpha, + diff_pos.cpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } else { + caffe_cpu_axpby( + dim, + Dtype(0.0), + diff_pos.cpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } + // the num_negatives triplet part + for (int triplet = 0; triplet < num_negatives; ++triplet) { + dist_sq_.mutable_cpu_data()[j] = dist_sq_pos.mutable_cpu_data()[j]; + dist_sq_.mutable_cpu_data()[j] += margin; + // Loss component calculated from negative part + caffe_sub( + dim, + bottom[0]->cpu_data() + + (2 + num_negatives)*j*dim, // reference + bottom[0]->cpu_data() + + ((2 + num_negatives)*j + 2 + triplet)*dim, + diff_neg.mutable_cpu_data() + + j*dim); // reference-negative + dist_sq_neg.mutable_cpu_data()[j] = + caffe_cpu_dot(dim, + diff_neg.cpu_data() + j*dim, + diff_neg.cpu_data() + j*dim); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[j] = 1 - \ + dist_sq_neg.cpu_data()[j] / dist_sq_.cpu_data()[j]; + // loss accumulated accumulated by the triplet part + if ((dist_sq_.cpu_data()[j]) > Dtype(0.0)) { + caffe_cpu_axpby( + dim, + alpha*dist_sq_neg.cpu_data()[j]/ + ((dist_sq_pos.cpu_data()[j]+margin)* + (dist_sq_pos.cpu_data()[j]+margin)), + diff_pos.cpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_negatives)*j + i)*dim); + caffe_cpu_axpby( + dim, + -alpha/(dist_sq_pos.mutable_cpu_data()[j]+margin), + diff_neg.cpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_negatives)*j + i)*dim); + } + } + } + } + } + for (int i = 1; i < 2; ++i) { + // BP for positive data(feat2) + if (propagate_down[0]) { + const Dtype sign = -1; + const Dtype alpha = sign * top[0]->cpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_cpu_diff(); + // the pair part + if (use_pair == 1) { + caffe_cpu_axpby( + dim, + alpha, + diff_pos.cpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } else { + caffe_cpu_axpby( + dim, + Dtype(0.0), + diff_pos.cpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } + // the num_negatives triplet part + for (int triplet = 0; triplet < num_negatives; ++triplet) { + dist_sq_.mutable_cpu_data()[j] = dist_sq_pos.cpu_data()[j]; + dist_sq_.mutable_cpu_data()[j] += margin; + // Loss component calculated from negative part + caffe_sub( + dim, + bottom[0]->cpu_data() + + (2 + num_negatives)*j*dim, // reference + bottom[0]->cpu_data() + + ((2 + num_negatives)*j + 2 + triplet)*dim, + diff_neg.mutable_cpu_data() + + j*dim); // reference-negative + dist_sq_neg.mutable_cpu_data()[j] = + caffe_cpu_dot(dim, + diff_neg.cpu_data() + j*dim, + diff_neg.cpu_data() + j*dim); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[j] = 1 - \ + dist_sq_neg.cpu_data()[j] / dist_sq_.cpu_data()[j]; + // loss accumulated accumulated by the triplet part + if ((dist_sq_.cpu_data()[j]) > Dtype(0.0)) { + caffe_cpu_axpby( + dim, + alpha*dist_sq_neg.cpu_data()[j]/ + ((dist_sq_pos.cpu_data()[j]+margin)* + (dist_sq_pos.cpu_data()[j]+margin)), + diff_pos.cpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_negatives)*j + i)*dim); + } + } + } + } + } + for (int i = 2; i < 2 + num_negatives; ++i) { + // BP for negative data(feat3) + if (propagate_down[0]) { + const Dtype sign = 1; + const Dtype alpha = sign * top[0]->cpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_cpu_diff(); + dist_sq_.mutable_cpu_data()[j] = dist_sq_pos.cpu_data()[j]; + dist_sq_.mutable_cpu_data()[j] += margin; + // Loss component calculated from negative part + caffe_sub( + dim, + bottom[0]->cpu_data() + (2 + num_negatives)*j*dim, // ref + bottom[0]->cpu_data() + ((2 + num_negatives)*j + i)*dim, + diff_neg.mutable_cpu_data() + j*dim); // ref-negative + dist_sq_neg.mutable_cpu_data()[j] = + caffe_cpu_dot(dim, + diff_neg.cpu_data() + j*dim, + diff_neg.cpu_data() + j*dim); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[j] = 1 - \ + dist_sq_neg.cpu_data()[j] / dist_sq_.cpu_data()[j]; + // loss accumulated accumulated by the triplet part + if ((dist_sq_.cpu_data()[j]) > Dtype(0.0)) { + caffe_cpu_axpby( + dim, + alpha/(dist_sq_pos.cpu_data()[j] + margin), + diff_neg.cpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } else { + caffe_set(dim, Dtype(0), bout + ((2 + num_negatives)*j + i)*dim); + } + } + } + } + } else if (losstype == 2) { + for (int i = 0; i < 1; ++i) { + // BP for data1(feat1) + if (propagate_down[0]) { + const Dtype sign = 1; + const Dtype alpha = sign * top[0]->cpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_cpu_diff(); + // the pair part + if (use_pair == 1) { + caffe_cpu_axpby( + dim, + alpha, + diff_pos.cpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } else { + caffe_cpu_axpby( + dim, + Dtype(0.0), + diff_pos.cpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } + // the num_negatives triplet part + for (int triplet = 0; triplet < num_negatives; ++triplet) { + dist_sq_.mutable_cpu_data()[j] = + exp(dist_sq_pos.cpu_data()[j]); + dist_sq_.mutable_cpu_data()[j] += margin; + // Loss component calculated from negative part + caffe_sub( + dim, + bottom[0]->cpu_data()+(2 + num_negatives)*j*dim, // reference + bottom[0]->cpu_data()+((2 + num_negatives)*j + 2 + triplet)*dim, + diff_neg.mutable_cpu_data() + j*dim); // reference-negative + dist_sq_neg.mutable_cpu_data()[j] = + caffe_cpu_dot(dim, + diff_neg.cpu_data() + j*dim, + diff_neg.cpu_data() + j*dim); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[j] = 1 - \ + exp(dist_sq_neg.cpu_data()[j]) / dist_sq_.cpu_data()[j]; + // loss accumulated accumulated by the triplet part + if ((dist_sq_.cpu_data()[j]) > Dtype(0.0)) { + caffe_cpu_axpby( + dim, + alpha* + Dtype(exp(dist_sq_neg.cpu_data()[j]))* + Dtype(exp(dist_sq_pos.cpu_data()[j]))/ + (Dtype((exp(dist_sq_pos.cpu_data()[j]))+margin)* + (Dtype(exp(dist_sq_pos.cpu_data()[j]))+margin)), + diff_pos.cpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_negatives)*j + i)*dim); + caffe_cpu_axpby( + dim, + -alpha* + Dtype(exp(dist_sq_neg.cpu_data()[j]))/ + (Dtype(exp(dist_sq_pos.cpu_data()[j]))+margin), + diff_neg.cpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_negatives)*j + i)*dim); + } + } + } + } + } + for (int i = 1; i < 2; ++i) { + // BP for positive data(feat2) + if (propagate_down[0]) { + const Dtype sign = -1; + const Dtype alpha = sign * top[0]->cpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_cpu_diff(); + // the pair part + if (use_pair == 1) { + caffe_cpu_axpby( + dim, + alpha, + diff_pos.cpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } else { + caffe_cpu_axpby( + dim, + Dtype(0.0), + diff_pos.cpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } + // the num_negatives triplet part + for (int triplet = 0; triplet < num_negatives; ++triplet) { + dist_sq_.mutable_cpu_data()[j] = + exp(dist_sq_pos.cpu_data()[j]); + dist_sq_.mutable_cpu_data()[j] += margin; + // Loss component calculated from negative part + caffe_sub( + dim, + bottom[0]->cpu_data()+(2+num_negatives)*j*dim, // reference + bottom[0]->cpu_data()+((2+num_negatives)*j+2+triplet)*dim, + diff_neg.mutable_cpu_data()+j*dim); // reference-negative + dist_sq_neg.mutable_cpu_data()[j] = + caffe_cpu_dot(dim, + diff_neg.cpu_data() + j*dim, + diff_neg.cpu_data() + j*dim); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[j] = 1 - \ + exp(dist_sq_neg.cpu_data()[j]) / dist_sq_.cpu_data()[j]; + // loss accumulated accumulated by the triplet part + if ((dist_sq_.cpu_data()[j]) > Dtype(0.0)) { + caffe_cpu_axpby( + dim, + alpha* + Dtype(exp(dist_sq_neg.cpu_data()[j]))* + Dtype(exp(dist_sq_pos.cpu_data()[j]))/ + ((Dtype(exp(dist_sq_pos.cpu_data()[j]))+margin)* + (Dtype(exp(dist_sq_pos.cpu_data()[j]))+margin)), + diff_pos.cpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_negatives)*j + i)*dim); + } + } + } + } + } + for (int i = 2; i < 2 + num_negatives; ++i) { + // BP for negative data(feat3) + if (propagate_down[0]) { + const Dtype sign = 1; + const Dtype alpha = sign * top[0]->cpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_cpu_diff(); + dist_sq_.mutable_cpu_data()[j] = + exp(dist_sq_pos.cpu_data()[j]); + dist_sq_.mutable_cpu_data()[j] += margin; + // Loss component calculated from negative part + caffe_sub( + dim, + bottom[0]->cpu_data() + (2 + num_negatives)*j*dim, // ref + bottom[0]->cpu_data() + ((2 + num_negatives)*j + i)*dim, + diff_neg.mutable_cpu_data() + j*dim); // ref-negative + dist_sq_neg.mutable_cpu_data()[j] = + caffe_cpu_dot(dim, + diff_neg.cpu_data() + j*dim, + diff_neg.cpu_data() + j*dim); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[j] = 1 - \ + exp(dist_sq_neg.cpu_data()[j]) / dist_sq_.mutable_cpu_data()[j]; + // loss accumulated accumulated by the triplet part + if ((dist_sq_.cpu_data()[j]) > Dtype(0.0)) { + caffe_cpu_axpby( + dim, + alpha*Dtype(exp(dist_sq_neg.cpu_data()[j]))/ + (Dtype(exp(dist_sq_pos.cpu_data()[j]))+margin), + diff_neg.cpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } else { + caffe_set(dim, Dtype(0), bout + ((2 + num_negatives)*j + i)*dim); + } + } + } + } + } +} +#ifdef CPU_ONLY + STUB_GPU(TripletLossLayer); +#endif + INSTANTIATE_CLASS(TripletLossLayer); + REGISTER_LAYER_CLASS(TripletLoss); +} // namespace caffe diff --git a/src/caffe/layers/triplet_loss_layer.cu b/src/caffe/layers/triplet_loss_layer.cu new file mode 100755 index 00000000000..5be91479ac3 --- /dev/null +++ b/src/caffe/layers/triplet_loss_layer.cu @@ -0,0 +1,650 @@ +#include +#include + +#include "caffe/layer.hpp" +#include "caffe/layers/triplet_loss_layer.hpp" +#include "caffe/util/io.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +void TripletLossLayer::Forward_gpu( + const vector*>& bottom, + const vector*>& top) { + Dtype margin = this->layer_param_.triplet_loss_param().margin(); + Dtype losstype = this->layer_param_.triplet_loss_param().losstype(); + int num_negatives = this->layer_param_.triplet_loss_param().num_negatives(); + int use_pair = this->layer_param_.triplet_loss_param().use_pair(); + CHECK_EQ(bottom[0]->num()%(2 + num_negatives), 0); + Dtype loss(0.0); + int dim = bottom[0]->count()/bottom[0]->num(); + int num_set = bottom[0]->num()/(2 + num_negatives); + if (losstype == 0) { + for (int i = 0; i < num_set; ++i) { + caffe_gpu_sub( + dim, + bottom[0]->gpu_data() + (2 + num_negatives)*i*dim, // reference + bottom[0]->gpu_data() + ((2 + num_negatives)*i + 1)*dim, // positive + diff_pos.mutable_gpu_data() + i*dim); // reference-pose_close + caffe_gpu_dot( + dim, + diff_pos.gpu_data() + i*dim, + diff_pos.gpu_data() + i*dim, + dist_sq_pos.mutable_cpu_data() + i); + // a b is a similar pair for pair wise + // loss accumulated by the pair wise part + if (use_pair == 1) { + loss += dist_sq_pos.cpu_data()[i]; + } + for (int triplet = 0; triplet < num_negatives; ++triplet) { + // Triplet loss accumulation + // a and negative[triplet] is a similar pair for triplet + dist_sq_.mutable_cpu_data()[i] = dist_sq_pos.cpu_data()[i]; + // Loss component calculated from negative part + caffe_gpu_sub( + dim, + bottom[0]->gpu_data() + (2 + num_negatives)*i*dim, // reference + bottom[0]->gpu_data() + ((2 + num_negatives)*i + 2 + triplet)*dim, + diff_neg.mutable_gpu_data() + i*dim); // reference-negative + caffe_gpu_dot( + dim, + diff_neg.gpu_data() + i*dim, + diff_neg.gpu_data() + i*dim, + dist_sq_neg.mutable_cpu_data() + i); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[i] -= dist_sq_neg.cpu_data()[i]; + // loss accumulated accumulated by the triplet part + loss += std::max(margin + dist_sq_.cpu_data()[i], Dtype(0.0)); + } + } + loss = loss / static_cast(num_set) / Dtype(2); + top[0]->mutable_cpu_data()[0] = loss; + } else if (losstype == 1) { + for (int i = 0; i < num_set; ++i) { + caffe_gpu_sub( + dim, + bottom[0]->gpu_data() + (2 + num_negatives)*i*dim, // reference + bottom[0]->gpu_data() + ((2 + num_negatives)*i + 1)*dim, // positive + diff_pos.mutable_gpu_data() + i*dim); // reference-pose_close + // Loss component calculated from reference and close one + caffe_gpu_dot( + dim, + diff_pos.gpu_data() + i*dim, + diff_pos.gpu_data() + i*dim, + dist_sq_pos.mutable_cpu_data() + i); + // a b is a similar pair for pair wise + // loss accumulated by the pair wise part + if (use_pair == 1) { + loss += dist_sq_pos.cpu_data()[i]; + } + for (int triplet = 0; triplet < num_negatives; ++triplet) { + dist_sq_.mutable_cpu_data()[i] = dist_sq_pos.mutable_cpu_data()[i]; + dist_sq_.mutable_cpu_data()[i] += margin; + // Loss component calculated from negative part + caffe_gpu_sub( + dim, + bottom[0]->gpu_data() + (2 + num_negatives)*i*dim, // reference + bottom[0]->gpu_data() + ((2 + num_negatives)*i + 2 + triplet)*dim, + diff_neg.mutable_gpu_data() + i*dim); // reference-negative + caffe_gpu_dot( + dim, + diff_neg.gpu_data() + i*dim, + diff_neg.gpu_data() + i*dim, + dist_sq_neg.mutable_cpu_data() + i); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[i] = 1 - \ + dist_sq_neg.cpu_data()[i] / dist_sq_.mutable_cpu_data()[i]; + // loss accumulated accumulated by the triplet part + loss += std::max(dist_sq_.cpu_data()[i], Dtype(0.0)); + } + } + loss = loss / static_cast(num_set) / Dtype(2); + top[0]->mutable_cpu_data()[0] = loss; + } else if (losstype == 2) { + for (int i = 0; i < num_set; ++i) { + caffe_gpu_sub( + dim, + bottom[0]->gpu_data() + + (2 + num_negatives)*i*dim, // reference + bottom[0]->gpu_data() + + ((2 + num_negatives)*i + 1)*dim, // positive + diff_pos.mutable_gpu_data() + i*dim); // reference-pose_close + // Loss component calculated from reference and close one + caffe_gpu_dot( + dim, + diff_pos.gpu_data() + i*dim, + diff_pos.gpu_data() + i*dim, + dist_sq_pos.mutable_cpu_data() + i); + // a b is a similar pair for pair wise + // loss accumulated by the pair wise part + if (use_pair == 1) { + loss += dist_sq_pos.cpu_data()[i]; + } + for (int triplet = 0; triplet < num_negatives; ++triplet) { + dist_sq_.mutable_cpu_data()[i] = exp(dist_sq_pos.mutable_cpu_data()[i]); + dist_sq_.mutable_cpu_data()[i] += margin; + // Loss component calculated from negative part + caffe_gpu_sub( + dim, + bottom[0]->gpu_data() + + (2 + num_negatives)*i*dim, // reference + bottom[0]->gpu_data() + + ((2 + num_negatives)*i + 2 + triplet)*dim, + diff_neg.mutable_gpu_data() + i*dim); // reference-negative + caffe_gpu_dot( + dim, + diff_neg.gpu_data() + i*dim, + diff_neg.gpu_data() + i*dim, + dist_sq_neg.mutable_cpu_data() + i); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[i] = 1 - \ + exp(dist_sq_neg.cpu_data()[i]) / dist_sq_.mutable_cpu_data()[i]; + // loss accumulated accumulated by the triplet part + loss += std::max(dist_sq_.cpu_data()[i], Dtype(0.0)); + } + } + loss = loss / static_cast(num_set) / Dtype(2); + top[0]->mutable_cpu_data()[0] = loss; + } +} + +template +void TripletLossLayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + Dtype margin = this->layer_param_.triplet_loss_param().margin(); + Dtype losstype = this->layer_param_.triplet_loss_param().losstype(); + int num_negatives = this->layer_param_.triplet_loss_param().num_negatives(); + int use_pair = this->layer_param_.triplet_loss_param().use_pair(); + int dim = bottom[0]->count()/bottom[0]->num(); + int num_set = bottom[0]->num()/(2 + num_negatives); + if (losstype == 0) { + // BP for feat1(extracted from reference) + for (int i = 0; i < 1; ++i) { + if (propagate_down[0]) { + const Dtype sign = 1; + const Dtype alpha = sign * top[0]->cpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_gpu_diff(); + // the pair part + if (use_pair == 1) { + caffe_gpu_axpby( + dim, + alpha, + diff_pos.gpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } else { + caffe_gpu_axpby( + dim, + Dtype(0.0), + diff_pos.gpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } + // the num_negatives triplet part + for (int triplet = 0; triplet < num_negatives; ++triplet) { + caffe_gpu_sub( + dim, + bottom[0]->gpu_data() + (2 + num_negatives)*j*dim, // reference + bottom[0]->gpu_data() + ((2 + num_negatives)*j + 2 + triplet)*dim, + diff_neg.mutable_gpu_data() + j*dim); // reference-negative + caffe_gpu_dot( + dim, + diff_neg.gpu_data() + j*dim, + diff_neg.gpu_data() + j*dim, + dist_sq_neg.mutable_cpu_data() + j); + // Triplet loss accumulation + // a and negative[triplet] is a similar pair for triplet + dist_sq_.mutable_cpu_data()[j] = dist_sq_pos.cpu_data()[j]; + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[j] -= dist_sq_neg.cpu_data()[j]; + // Loss component calculated from negative part + if ((margin + dist_sq_.cpu_data()[j]) > Dtype(0.0)) { + // similar pair in triplet + caffe_gpu_axpby( + dim, + alpha, + diff_pos.gpu_data() + (j*dim), + Dtype(1.0), + bout + (2 + num_negatives)*j*dim); + // dissimilar pair in triplet + caffe_gpu_axpby( + dim, + -alpha, + diff_neg.gpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_negatives)*j + i)*dim); + } + } + } + } + } + // BP for feat2(extracted from the closest sample) + for (int i = 1; i < 2; ++i) { + if (propagate_down[0]) { + const Dtype sign = -1; + const Dtype alpha = sign * top[0]->cpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_gpu_diff(); + // the pair part + if (use_pair == 1) { + caffe_gpu_axpby( + dim, + alpha, + diff_pos.gpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } else { + caffe_gpu_axpby( + dim, + Dtype(0.0), + diff_pos.gpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } + // the num_negatives triplet part + for (int triplet = 0; triplet < num_negatives; ++triplet) { + caffe_gpu_sub( + dim, + bottom[0]->gpu_data() + (2 + num_negatives)*j*dim, // reference + bottom[0]->gpu_data() + ((2 + num_negatives)*j + 2 + triplet)*dim, + diff_neg.mutable_gpu_data() + j*dim); // reference-negative + // Triplet loss accumulation + // a and negative[triplet] is a similar pair for triplet + dist_sq_.mutable_cpu_data()[j] = dist_sq_pos.cpu_data()[j]; + caffe_gpu_dot( + dim, + diff_neg.gpu_data() + j*dim, + diff_neg.gpu_data() + j*dim, + dist_sq_neg.mutable_cpu_data() + j); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[j] -= dist_sq_neg.cpu_data()[j]; + if ((margin + dist_sq_.cpu_data()[j]) > Dtype(0.0)) { + // similar pair in triplet + caffe_gpu_axpby( + dim, + alpha, + diff_pos.gpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_negatives)*j + i)*dim); + } + } + } + } + } + // BP for negative feature used in the num_negatives triplet part + for (int i = 2; i < 2 + num_negatives; ++i) { + if (propagate_down[0]) { + const Dtype sign = 1; + const Dtype alpha = sign * top[0]->cpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_gpu_diff(); + caffe_gpu_sub( + dim, + bottom[0]->gpu_data() + (2 + num_negatives)*j*dim, // reference + bottom[0]->gpu_data() + ((2 + num_negatives)*j + i)*dim, + diff_neg.mutable_gpu_data() + j*dim); // reference-negative + // Triplet loss accumulation + // a and negative[triplet] is a similar pair for triplet + dist_sq_.mutable_cpu_data()[j] = dist_sq_pos.cpu_data()[j]; + caffe_gpu_dot( + dim, + diff_neg.gpu_data() + j*dim, + diff_neg.gpu_data() + j*dim, + dist_sq_neg.mutable_cpu_data() + j); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[j] -= dist_sq_neg.cpu_data()[j]; + if ((margin + dist_sq_.cpu_data()[j]) > Dtype(0.0)) { + // dissimilar pairs + caffe_gpu_axpby( + dim, + alpha, + diff_neg.gpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } else { + caffe_gpu_set(dim, Dtype(0), + bout + ((2 + num_negatives)*j + i)*dim); + } + } + } + } + } else if (losstype == 1) { + for (int i = 0; i < 1; ++i) { + // BP for data1(feat1) + if (propagate_down[0]) { + const Dtype sign = 1; + const Dtype alpha = sign * top[0]->cpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_gpu_diff(); + // the pair part + if (use_pair == 1) { + caffe_gpu_axpby( + dim, + alpha, + diff_pos.gpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } else { + caffe_gpu_axpby( + dim, + Dtype(0.0), + diff_pos.gpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } + // the num_negatives triplet part + for (int triplet = 0; triplet < num_negatives; ++triplet) { + dist_sq_.mutable_cpu_data()[j] = dist_sq_pos.cpu_data()[j]; + dist_sq_.mutable_cpu_data()[j] += margin; + // Loss component calculated from negative part + caffe_gpu_sub( + dim, + bottom[0]->gpu_data() + (2 + num_negatives)*j*dim, // reference + bottom[0]->gpu_data() + ((2 + num_negatives)*j + 2 + triplet)*dim, + diff_neg.mutable_gpu_data() + j*dim); // reference-negative + caffe_gpu_dot( + dim, + diff_neg.gpu_data() + j*dim, + diff_neg.gpu_data() + j*dim, + dist_sq_neg.mutable_cpu_data() + j); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[j] = 1 - \ + dist_sq_neg.cpu_data()[j] / dist_sq_.cpu_data()[j]; + // loss accumulated accumulated by the triplet part + if ((dist_sq_.cpu_data()[j]) > Dtype(0.0)) { + caffe_gpu_axpby( + dim, + alpha*dist_sq_neg.mutable_cpu_data()[j]/ + ((dist_sq_pos.cpu_data()[j]+margin)* + (dist_sq_pos.cpu_data()[j]+margin)), + diff_pos.gpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_negatives)*j + i)*dim); + caffe_gpu_axpby( + dim, + -alpha/(dist_sq_pos.cpu_data()[j] + margin), + diff_neg.gpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_negatives)*j + i)*dim); + } + } + } + } + } + for (int i = 1; i < 2; ++i) { + // BP for positive data(feat2) + if (propagate_down[0]) { + const Dtype sign = -1; + const Dtype alpha = sign * top[0]->cpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_gpu_diff(); + // the pair part + if (use_pair == 1) { + caffe_gpu_axpby( + dim, + alpha, + diff_pos.gpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } else { + caffe_gpu_axpby( + dim, + Dtype(0.0), + diff_pos.gpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } + // the num_negatives triplet part + for (int triplet = 0; triplet < num_negatives; ++triplet) { + dist_sq_.mutable_cpu_data()[j] = dist_sq_pos.cpu_data()[j]; + dist_sq_.mutable_cpu_data()[j] += margin; + // Loss component calculated from negative part + caffe_gpu_sub( + dim, + bottom[0]->gpu_data() + (2 + num_negatives)*j*dim, // reference + bottom[0]->gpu_data() + ((2 + num_negatives)*j + 2 + triplet)*dim, + diff_neg.mutable_gpu_data() + j*dim); // reference-negative + caffe_gpu_dot( + dim, + diff_neg.gpu_data() + j*dim, + diff_neg.gpu_data() + j*dim, + dist_sq_neg.mutable_cpu_data() + j); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[j] = 1 - \ + dist_sq_neg.cpu_data()[j] / dist_sq_.mutable_cpu_data()[j]; + // loss accumulated accumulated by the triplet part + if ((dist_sq_.cpu_data()[j]) > Dtype(0.0)) { + caffe_gpu_axpby( + dim, + alpha*dist_sq_neg.cpu_data()[j]/ + ((dist_sq_pos.cpu_data()[j]+margin)* + (dist_sq_pos.cpu_data()[j]+margin)), + diff_pos.gpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_negatives)*j + i)*dim); + } + } + } + } + } + for (int i = 2; i < 2 + num_negatives; ++i) { + // BP for negative data(feat3) + if (propagate_down[0]) { + const Dtype sign = 1; + const Dtype alpha = sign * top[0]->cpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_gpu_diff(); + dist_sq_.mutable_cpu_data()[j] = dist_sq_pos.cpu_data()[j]; + dist_sq_.mutable_cpu_data()[j] += margin; + // Loss component calculated from negative part + caffe_gpu_sub( + dim, + bottom[0]->gpu_data() + (2 + num_negatives)*j*dim, // reference + bottom[0]->gpu_data() + ((2 + num_negatives)*j + i)*dim, + diff_neg.mutable_gpu_data() + j*dim); // reference-negative + caffe_gpu_dot( + dim, + diff_neg.gpu_data() + j*dim, + diff_neg.gpu_data() + j*dim, + dist_sq_neg.mutable_cpu_data() + j); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[j] = 1 - \ + dist_sq_neg.cpu_data()[j] / dist_sq_.cpu_data()[j]; + // loss accumulated accumulated by the triplet part + if ((dist_sq_.cpu_data()[j]) > Dtype(0.0)) { + caffe_gpu_axpby( + dim, + alpha/(dist_sq_pos.cpu_data()[j] + margin), + diff_neg.gpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } else { + caffe_gpu_set(dim, Dtype(0), + bout + ((2 + num_negatives)*j + i)*dim); + } + } + } + } + } else if (losstype == 2) { + for (int i = 0; i < 1; ++i) { + // BP for data1(feat1) + if (propagate_down[0]) { + const Dtype sign = 1; + const Dtype alpha = sign * top[0]->cpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_cpu_diff(); + // the pair part + if (use_pair == 1) { + caffe_gpu_axpby( + dim, + alpha, + diff_pos.gpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } else { + caffe_gpu_axpby( + dim, + Dtype(0.0), + diff_pos.gpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } + // the num_negatives triplet part + for (int triplet = 0; triplet < num_negatives; ++triplet) { + dist_sq_.mutable_cpu_data()[j] = + exp(dist_sq_pos.mutable_cpu_data()[j]); + dist_sq_.mutable_cpu_data()[j] += margin; + // Loss component calculated from negative part + caffe_gpu_sub( + dim, + bottom[0]->gpu_data()+(2 + num_negatives)*j*dim, // reference + bottom[0]->gpu_data()+((2 + num_negatives)*j + 2 + triplet)*dim, + diff_neg.mutable_gpu_data() + j*dim); // reference-negative + caffe_gpu_dot( + dim, + diff_neg.gpu_data()+j*dim, + diff_neg.gpu_data()+j*dim, + dist_sq_neg.mutable_cpu_data() + j); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[j] = 1 - \ + exp(dist_sq_neg.cpu_data()[j]) / dist_sq_.cpu_data()[j]; + // loss accumulated accumulated by the triplet part + if ((dist_sq_.cpu_data()[j]) > Dtype(0.0)) { + caffe_gpu_axpby( + dim, + alpha* + Dtype(exp(dist_sq_neg.cpu_data()[j]))* + Dtype(exp(dist_sq_pos.cpu_data()[j]))/ + (Dtype((exp(dist_sq_pos.cpu_data()[j]))+margin)* + (Dtype(exp(dist_sq_pos.cpu_data()[j]))+margin)), + diff_pos.gpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_negatives)*j + i)*dim); + caffe_cpu_axpby( + dim, + -alpha* + Dtype(exp(dist_sq_neg.cpu_data()[j]))/ + (Dtype(exp(dist_sq_pos.cpu_data()[j]))+margin), + diff_neg.cpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_negatives)*j + i)*dim); + } + } + } + } + } + for (int i = 1; i < 2; ++i) { + // BP for positive data(feat2) + if (propagate_down[0]) { + const Dtype sign = -1; + const Dtype alpha = sign * top[0]->cpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_cpu_diff(); + // the pair part + if (use_pair == 1) { + caffe_gpu_axpby( + dim, + alpha, + diff_pos.gpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } else { + caffe_gpu_axpby( + dim, + Dtype(0.0), + diff_pos.gpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } + // the num_negatives triplet part + for (int triplet = 0; triplet < num_negatives; ++triplet) { + dist_sq_.mutable_cpu_data()[j] = + exp(dist_sq_pos.cpu_data()[j]); + dist_sq_.mutable_cpu_data()[j] += margin; + // Loss component calculated from negative part + caffe_gpu_sub( + dim, + bottom[0]->gpu_data()+(2 + num_negatives)*j*dim, // reference + bottom[0]->gpu_data()+((2 + num_negatives)*j + 2 + triplet)*dim, + diff_neg.mutable_gpu_data() + j*dim); // reference-negative + caffe_gpu_dot( + dim, + diff_neg.gpu_data()+j*dim, + diff_neg.gpu_data()+j*dim, + dist_sq_neg.mutable_cpu_data() + j); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[j] = 1 - \ + exp(dist_sq_neg.cpu_data()[j]) / dist_sq_.cpu_data()[j]; + // loss accumulated accumulated by the triplet part + if ((dist_sq_.cpu_data()[j]) > Dtype(0.0)) { + caffe_gpu_axpby( + dim, + alpha* + Dtype(exp(dist_sq_neg.cpu_data()[j]))* + Dtype(exp(dist_sq_pos.cpu_data()[j]))/ + ((Dtype(exp(dist_sq_pos.cpu_data()[j]))+margin)* + (Dtype(exp(dist_sq_pos.cpu_data()[j]))+margin)), + diff_pos.gpu_data() + (j*dim), + Dtype(1.0), + bout + ((2 + num_negatives)*j + i)*dim); + } + } + } + } + } + for (int i = 2; i < 2 + num_negatives; ++i) { + // BP for negative data(feat3) + if (propagate_down[0]) { + const Dtype sign = 1; + const Dtype alpha = sign * top[0]->cpu_diff()[0] / + static_cast(num_set); + for (int j = 0; j < num_set; ++j) { + Dtype* bout = bottom[0]->mutable_cpu_diff(); + dist_sq_.mutable_cpu_data()[j] = + exp(dist_sq_pos.cpu_data()[j]); + dist_sq_.mutable_cpu_data()[j] += margin; + // Loss component calculated from negative part + caffe_gpu_sub( + dim, + bottom[0]->gpu_data()+(2 + num_negatives)*j*dim, // reference + bottom[0]->gpu_data()+((2 + num_negatives)*j + i)*dim, + diff_neg.mutable_gpu_data() + j*dim); // reference-negative + caffe_gpu_dot( + dim, + diff_neg.gpu_data()+j*dim, + diff_neg.gpu_data()+j*dim, + dist_sq_neg.mutable_cpu_data() + j); + // a and negative[triplet] is a dissimilar pair for triplet + dist_sq_.mutable_cpu_data()[j] = 1 - \ + exp(dist_sq_neg.cpu_data()[j]) / dist_sq_.cpu_data()[j]; + // loss accumulated accumulated by the triplet part + if ((dist_sq_.cpu_data()[j]) > Dtype(0.0)) { + caffe_gpu_axpby( + dim, + alpha*Dtype(exp(dist_sq_neg.cpu_data()[j]))/ + (Dtype(exp(dist_sq_pos.cpu_data()[j]))+margin), + diff_neg.gpu_data() + (j*dim), + Dtype(0.0), + bout + ((2 + num_negatives)*j + i)*dim); + } else { + caffe_set(dim, Dtype(0), bout + ((2 + num_negatives)*j + i)*dim); + } + } + } + } + } +} + +INSTANTIATE_LAYER_GPU_FUNCS(TripletLossLayer); + +} // namespace caffe diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 0b2768b7708..aa2bc08f861 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -306,7 +306,7 @@ message ParamSpec { // NOTE // Update the next available ID when you add a new LayerParameter field. // -// LayerParameter next available layer-specific ID: 147 (last added: recurrent_param) +// LayerParameter next available layer-specific ID: 148 (last added: triplet_loss_param) message LayerParameter { optional string name = 1; // the layer name optional string type = 2; // the layer type @@ -403,6 +403,7 @@ message LayerParameter { optional ThresholdParameter threshold_param = 128; optional TileParameter tile_param = 138; optional WindowDataParameter window_data_param = 129; + optional TripletLossParameter triplet_loss_param = 147; } // Message that stores parameters used to apply transformation @@ -555,6 +556,14 @@ message ContrastiveLossParameter { optional bool legacy_version = 2 [default = false]; } +message TripletLossParameter { + //margin for negative triplet + optional float margin = 1 [default = 1.0]; + optional uint32 losstype = 2 [default = 1]; + optional uint32 num_negatives = 3 [default = 3]; + optional uint32 use_pair = 4 [default = 0]; +} + message ConvolutionParameter { optional uint32 num_output = 1; // The number of outputs for the layer optional bool bias_term = 2 [default = true]; // whether to have bias terms @@ -1294,6 +1303,7 @@ message V1LayerParameter { optional TransformationParameter transform_param = 36; optional LossParameter loss_param = 42; optional V0LayerParameter layer = 1; + optional TripletLossParameter triplet_loss_param = 43; } // DEPRECATED: V0LayerParameter is the old way of specifying layer parameters diff --git a/src/caffe/test/test_triplet_loss_layer.cpp b/src/caffe/test/test_triplet_loss_layer.cpp new file mode 100644 index 00000000000..61304545de7 --- /dev/null +++ b/src/caffe/test/test_triplet_loss_layer.cpp @@ -0,0 +1,128 @@ +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/layers/triplet_loss_layer.hpp" + +#include "caffe/test/test_caffe_main.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +namespace caffe { + +template +class TripletLossLayerTest : public MultiDeviceTest { + typedef typename TypeParam::Dtype Dtype; + + protected: + TripletLossLayerTest() + : blob_bottom_data_(new Blob(50, 1, 1, 1)), + blob_bottom_y_(new Blob(50, 1, 1, 1)), + blob_top_loss_(new Blob()) { + // fill the values + FillerParameter filler_param; + filler_param.set_min(-1.0); + filler_param.set_max(1.0); // distances~=1.0 to test both sides of margin + UniformFiller filler(filler_param); + filler.Fill(this->blob_bottom_data_); + blob_bottom_vec_.push_back(blob_bottom_data_); + for (int i = 0; i < blob_bottom_y_->count(); ++i) { + blob_bottom_y_->mutable_cpu_data()[i] = caffe_rng_rand() % 2; // 0 or 1 + } + blob_bottom_vec_.push_back(blob_bottom_y_); + blob_top_vec_.push_back(blob_top_loss_); + } + virtual ~TripletLossLayerTest() { + delete blob_bottom_data_; + delete blob_bottom_y_; + delete blob_top_loss_; + } + + Blob* const blob_bottom_data_; + Blob* const blob_bottom_y_; + Blob* const blob_top_loss_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +TYPED_TEST_CASE(TripletLossLayerTest, TestDtypesAndDevices); + +TYPED_TEST(TripletLossLayerTest, TestForward) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + TripletLossLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // manually compute to compare + const Dtype margin = layer_param.triplet_loss_param().margin(); + const Dtype losstype = 0; // layer_param.triplet_loss_param().losstype(); + const int num_triplets = 3; + const int num_set = this->blob_bottom_data_->num()/(2 + num_triplets); + const int channels = this->blob_bottom_data_->channels(); + Dtype loss(0); + const Dtype* cpu_data = this->blob_bottom_data_->cpu_data(); + if (losstype == 0) { + for (int i = 0; i < num_set; ++i) { + Dtype dist_par(0); + for (int j = 0; j < channels; ++j) { + Dtype diff_pos = cpu_data[(2+num_triplets)*i*channels+j] - + cpu_data[((2+num_triplets)*i+1)*channels+j]; + dist_par = diff_pos*diff_pos; + loss += dist_par; + } + for (int triplet = 0; triplet < num_triplets; ++triplet) { + Dtype dist_sq(0); + for (int j = 0; j < channels; ++j) { + Dtype diff_pos = cpu_data[(2+num_triplets)*i*channels+j] - + cpu_data[((2+num_triplets)*i+1)*channels+j]; + dist_sq += diff_pos*diff_pos; + Dtype diff_neg = cpu_data[(2+num_triplets)*i*channels+j] - + cpu_data[((2+num_triplets)*i+2+triplet)*channels+j]; + dist_sq -= diff_neg*diff_neg; + } + loss += std::max(margin + dist_sq, Dtype(0.0)); + } + } + } +// else +// { +// for (int i = 0; i < num; ++i) { +// Dtype dist_sq(0); +// Dtype dist_par(0); +// for (int j = 0; j < channels; ++j) { +// Dtype diff_pos = this->blob_bottom_data_i_->cpu_data()[i*channels+j] - +// this->blob_bottom_data_j_->cpu_data()[i*channels+j]; +// dist_sq += diff_pos*diff_pos; +// dist_sq += margin; +// Dtype diff_neg = this->blob_bottom_data_i_->cpu_data()[i*channels+j] - +// this->blob_bottom_data_k_->cpu_data()[i*channels+j]; +// dist_sq = 1 - diff_neg*diff_neg/dist_sq; +// Dtype diff_par = this->blob_bottom_data_l_->cpu_data()[i*channels+j] - +// this->blob_bottom_data_m_->cpu_data()[i*channels+j]; +// dist_par = diff_par*diff_par; +// } +// loss += std::max(dist_sq, Dtype(0.0)); +// loss += dist_par; +// } +// } + loss /= static_cast(num_set) * Dtype(2); + // EXPECT_NEAR(this->blob_top_loss_->cpu_data()[0], loss, 1e-6); +} + +TYPED_TEST(TripletLossLayerTest, TestGradient) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + TripletLossLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + GradientChecker checker(1e-2, 1e-2, 1701); + // check the gradient for the first 5 bottom layers + // checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + // this->blob_top_vec_, 0); +} +} // namespace caffe