diff --git a/include/caffe/layers/triplet_loss_layer.hpp b/include/caffe/layers/triplet_loss_layer.hpp
new file mode 100644
index 00000000000..a052380e33e
--- /dev/null
+++ b/include/caffe/layers/triplet_loss_layer.hpp
@@ -0,0 +1,48 @@
+#ifndef CAFFE_TRIPLET_LOSS_LAYER_HPP_
+#define CAFFE_TRIPLET_LOSS_LAYER_HPP_
+
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+#include "caffe/layers/loss_layer.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+class TripletLossLayer : public LossLayer<Dtype> {
+ public:
+  explicit TripletLossLayer(const LayerParameter& param)
+      : LossLayer<Dtype>(param), diff_same_class_(), diff_diff_class_() {}
+
+  void Reshape(const vector<Blob<Dtype>*>& bottom,
+               const vector<Blob<Dtype>*>& top);
+
+  inline const char* type() const { return "TripletLoss"; }
+  inline int ExactNumBottomBlobs() const { return 3; }
+  inline bool AllowForceBackward(const int bottom_index) const { return true; }
+
+  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+                          const vector<Blob<Dtype>*>& top);
+
+ protected:
+  /// @copydoc TripletLossLayer
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+                           const vector<Blob<Dtype>*>& top);
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+                            const vector<bool>& propagate_down,
+                            const vector<Blob<Dtype>*>& bottom);
+
+  Blob<Dtype> diff_same_class_;
+  Blob<Dtype> diff_diff_class_;
+  Dtype alpha_;
+  vector<Dtype> vec_loss_;
+  int batch_size_;
+  int vec_dimension_;
+};
+
+}  // namespace caffe
+
+#endif  // CAFFE_TRIPLET_LOSS_LAYER_HPP_
diff --git a/src/caffe/layers/triplet_loss_layer.cpp b/src/caffe/layers/triplet_loss_layer.cpp
new file mode 100644
index 00000000000..01f6bcdb690
--- /dev/null
+++ b/src/caffe/layers/triplet_loss_layer.cpp
@@ -0,0 +1,97 @@
+#include <algorithm>
+#include <vector>
+
+#include "caffe/layers/triplet_loss_layer.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void TripletLossLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
+                                      const vector<Blob<Dtype>*>& top) {
+  CHECK(bottom[0]->shape() == bottom[1]->shape())
+      << "Inputs must have the same dimension.";
+  CHECK(bottom[0]->shape() == bottom[2]->shape())
+      << "Inputs must have the same dimension.";
+  diff_same_class_.ReshapeLike(*bottom[0]);
+  diff_diff_class_.ReshapeLike(*bottom[0]);
+
+  vector<int> loss_shape(0);  // Loss layers output a scalar; 0 axes.
+  top[0]->Reshape(loss_shape);
+  batch_size_ = bottom[0]->shape(0);
+  vec_dimension_ = bottom[0]->count() / batch_size_;
+  vec_loss_.resize(batch_size_);
+}
+
+template <typename Dtype>
+void TripletLossLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+                                         const vector<Blob<Dtype>*>& top) {
+  LossLayer<Dtype>::LayerSetUp(bottom, top);
+  alpha_ = this->layer_param_.threshold_param().threshold();
+}
+
+template <typename Dtype>
+void TripletLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+                                          const vector<Blob<Dtype>*>& top) {
+  int count = bottom[0]->count();
+
+  caffe_sub(count, bottom[0]->cpu_data(), bottom[1]->cpu_data(),
+            diff_same_class_.mutable_cpu_data());
+  caffe_sub(count, bottom[0]->cpu_data(), bottom[2]->cpu_data(),
+            diff_diff_class_.mutable_cpu_data());
+
+  Dtype loss = 0;
+  for (int v = 0; v < batch_size_; ++v) {
+    vec_loss_[v] =
+        alpha_ +
+        caffe_cpu_dot(vec_dimension_,
+                      diff_same_class_.cpu_data() + v * vec_dimension_,
+                      diff_same_class_.cpu_data() + v * vec_dimension_) -
+        caffe_cpu_dot(vec_dimension_,
+                      diff_diff_class_.cpu_data() + v * vec_dimension_,
+                      diff_diff_class_.cpu_data() + v * vec_dimension_);
+    vec_loss_[v] = std::max(Dtype(0), vec_loss_[v]);
+    loss += vec_loss_[v];
+  }
+
+  loss /= batch_size_ * Dtype(2);
+  top[0]->mutable_cpu_data()[0] = loss;
+}
+
+template <typename Dtype>
+void TripletLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+                                           const vector<bool>& propagate_down,
+                                           const vector<Blob<Dtype>*>& bottom) {
+  const Dtype scale = top[0]->cpu_diff()[0] / bottom[0]->num();
+  const int n = bottom[0]->count();
+
+  caffe_sub(n, diff_same_class_.cpu_data(), diff_diff_class_.cpu_data(),
+            bottom[0]->mutable_cpu_diff());
+  caffe_scal(n, scale, bottom[0]->mutable_cpu_diff());
+
+  caffe_cpu_scale(n, -scale, diff_same_class_.cpu_data(),
+                  bottom[1]->mutable_cpu_diff());
+
+  caffe_cpu_scale(n, scale, diff_diff_class_.cpu_data(),
+                  bottom[2]->mutable_cpu_diff());
+
+  for (int v = 0; v < batch_size_; ++v) {
+    if (vec_loss_[v] == 0) {
+      caffe_set(vec_dimension_, Dtype(0),
+                bottom[0]->mutable_cpu_diff() + v * vec_dimension_);
+      caffe_set(vec_dimension_, Dtype(0),
+                bottom[1]->mutable_cpu_diff() + v * vec_dimension_);
+      caffe_set(vec_dimension_, Dtype(0),
+                bottom[2]->mutable_cpu_diff() + v * vec_dimension_);
+    }
+  }
+}
+
+#ifdef CPU_ONLY
+// STUB_GPU(TripletLossLayer);
+#endif
+
+INSTANTIATE_CLASS(TripletLossLayer);
+REGISTER_LAYER_CLASS(TripletLoss);
+
+}  // namespace caffe
diff --git a/src/caffe/test/test_triplet_loss_layer.cpp b/src/caffe/test/test_triplet_loss_layer.cpp
new file mode 100644
index 00000000000..a262e0cea36
--- /dev/null
+++ b/src/caffe/test/test_triplet_loss_layer.cpp
@@ -0,0 +1,106 @@
+#include <cmath>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/layers/triplet_loss_layer.hpp"
+
+#include "caffe/test/test_caffe_main.hpp"
+#include "caffe/test/test_gradient_check_util.hpp"
+
+namespace caffe {
+
+template <typename TypeParam>
+class TripletLossLayerTest : public MultiDeviceTest<TypeParam> {
+  typedef typename TypeParam::Dtype Dtype;
+
+ protected:
+  TripletLossLayerTest()
+      : blob_bottom_anchor_(new Blob<Dtype>(10, 4, 5, 2)),
+        blob_bottom_same_(new Blob<Dtype>(10, 4, 5, 2)),
+        blob_bottom_diff_(new Blob<Dtype>(10, 4, 5, 2)),
+        blob_top_loss_(new Blob<Dtype>()) {
+    // fill the values
+    FillerParameter filler_param;
+    GaussianFiller<Dtype> filler(filler_param);
+    filler.Fill(this->blob_bottom_anchor_);
+    blob_bottom_vec_.push_back(blob_bottom_anchor_);
+
+    filler.Fill(this->blob_bottom_same_);
+    blob_bottom_vec_.push_back(blob_bottom_same_);
+
+    filler.Fill(this->blob_bottom_diff_);
+    blob_bottom_vec_.push_back(blob_bottom_diff_);
+
+    blob_top_vec_.push_back(blob_top_loss_);
+  }
+  virtual ~TripletLossLayerTest() {
+    delete blob_bottom_anchor_;
+    delete blob_bottom_same_;
+    delete blob_bottom_diff_;
+    delete blob_top_loss_;
+  }
+
+  void TestForward() {
+    // Get the loss without a specified objective weight -- should be
+    // equivalent to explicitly specifiying a weight of 1.
+    LayerParameter layer_param;
+    TripletLossLayer<Dtype> layer_weight_1(layer_param);
+    layer_weight_1.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+    const Dtype loss_weight_1 =
+        layer_weight_1.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+
+    // Make sure the loss is non-trivial.
+    const Dtype kNonTrivialAbsThresh = 1e-1;
+    EXPECT_GE(fabs(loss_weight_1), kNonTrivialAbsThresh);
+
+    // Get the loss again with a different objective weight; check that it is
+    // scaled appropriately.
+    const Dtype kLossWeight = 3.7;
+    layer_param.add_loss_weight(kLossWeight);
+    TripletLossLayer<Dtype> layer_weight_2(layer_param);
+    layer_weight_2.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+    const Dtype loss_weight_2 =
+        layer_weight_2.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+    const Dtype kErrorMargin = 1e-5;
+    EXPECT_NEAR(loss_weight_1 * kLossWeight, loss_weight_2, kErrorMargin);
+
+    // Get the loss again with a different alpha; check that it is changed
+    // appropriately.
+    const Dtype kAlpha = 0.314;
+    layer_param.mutable_threshold_param()->set_threshold(kAlpha);
+    TripletLossLayer<Dtype> layer_weight_2_alpha(layer_param);
+    layer_weight_2_alpha.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+    const Dtype loss_weight_2_alpha = layer_weight_2_alpha.Forward(
+        this->blob_bottom_vec_, this->blob_top_vec_);
+    EXPECT_GE(loss_weight_2_alpha, loss_weight_2);
+  }
+
+  Blob<Dtype>* const blob_bottom_anchor_;
+  Blob<Dtype>* const blob_bottom_same_;
+  Blob<Dtype>* const blob_bottom_diff_;
+  Blob<Dtype>* const blob_top_loss_;
+  vector<Blob<Dtype>*> blob_bottom_vec_;
+  vector<Blob<Dtype>*> blob_top_vec_;
+};
+
+TYPED_TEST_CASE(TripletLossLayerTest, TestDtypesAndDevices);
+
+TYPED_TEST(TripletLossLayerTest, TestForward) { this->TestForward(); }
+
+TYPED_TEST(TripletLossLayerTest, TestGradient) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  const Dtype kLossWeight = 3.7;
+  layer_param.add_loss_weight(kLossWeight);
+  TripletLossLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  GradientChecker<Dtype> checker(1e-2, 1e-2, 1701);
+  checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+                                  this->blob_top_vec_);
+}
+
+}  // namespace caffe