Skip to content

Commit 93bfcb5

Browse files
fyulongjon
authored andcommittedDec 28, 2015
add support for 2D dilated convolution
·
rc51.0
1 parent 03a84bf commit 93bfcb5

15 files changed

+170
-74
lines changed
 

‎include/caffe/layers/base_conv_layer.hpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ class BaseConvolutionLayer : public Layer<Dtype> {
6868
Blob<int> stride_;
6969
/// @brief The spatial dimensions of the padding.
7070
Blob<int> pad_;
71+
/// @brief The spatial dimensions of the dilation.
72+
Blob<int> dilation_;
7173
/// @brief The spatial dimensions of the convolution input.
7274
Blob<int> conv_input_shape_;
7375
/// @brief The spatial dimensions of the col_buffer.
@@ -99,7 +101,8 @@ class BaseConvolutionLayer : public Layer<Dtype> {
99101
conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],
100102
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
101103
pad_.cpu_data()[0], pad_.cpu_data()[1],
102-
stride_.cpu_data()[0], stride_.cpu_data()[1], col_buff);
104+
stride_.cpu_data()[0], stride_.cpu_data()[1],
105+
dilation_.cpu_data()[0], dilation_.cpu_data()[1], col_buff);
103106
} else {
104107
im2col_nd_cpu(data, num_spatial_axes_, conv_input_shape_.cpu_data(),
105108
col_buffer_shape_.data(), kernel_shape_.cpu_data(),
@@ -112,7 +115,8 @@ class BaseConvolutionLayer : public Layer<Dtype> {
112115
conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],
113116
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
114117
pad_.cpu_data()[0], pad_.cpu_data()[1],
115-
stride_.cpu_data()[0], stride_.cpu_data()[1], data);
118+
stride_.cpu_data()[0], stride_.cpu_data()[1],
119+
dilation_.cpu_data()[0], dilation_.cpu_data()[1], data);
116120
} else {
117121
col2im_nd_cpu(col_buff, num_spatial_axes_, conv_input_shape_.cpu_data(),
118122
col_buffer_shape_.data(), kernel_shape_.cpu_data(),
@@ -126,7 +130,8 @@ class BaseConvolutionLayer : public Layer<Dtype> {
126130
conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],
127131
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
128132
pad_.cpu_data()[0], pad_.cpu_data()[1],
129-
stride_.cpu_data()[0], stride_.cpu_data()[1], col_buff);
133+
stride_.cpu_data()[0], stride_.cpu_data()[1],
134+
dilation_.cpu_data()[0], dilation_.cpu_data()[1], col_buff);
130135
} else {
131136
im2col_nd_gpu(data, num_spatial_axes_, num_kernels_im2col_,
132137
conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(),
@@ -140,7 +145,8 @@ class BaseConvolutionLayer : public Layer<Dtype> {
140145
conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],
141146
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
142147
pad_.cpu_data()[0], pad_.cpu_data()[1],
143-
stride_.cpu_data()[0], stride_.cpu_data()[1], data);
148+
stride_.cpu_data()[0], stride_.cpu_data()[1],
149+
dilation_.cpu_data()[0], dilation_.cpu_data()[1], data);
144150
} else {
145151
col2im_nd_gpu(col_buff, num_spatial_axes_, num_kernels_col2im_,
146152
conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(),

‎include/caffe/layers/conv_layer.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ class ConvolutionLayer : public BaseConvolutionLayer<Dtype> {
4444
* convolution, given by pad for equal dimensions or pad_h and pad_w for
4545
* different padding. Input padding is computed implicitly instead of
4646
* actually padding.
47+
* - dilation (\b optional, default 1). The filter
48+
* dilation, given by dilation_size for equal dimensions for different
49+
* dilation. By default the convolution has dilation 1.
4750
* - group (\b optional, default 1). The number of filter groups. Group
4851
* convolution is a method for reducing parameterization by selectively
4952
* connecting input and output channels. The input and output channel dimensions must be divisible

‎include/caffe/layers/im2col_layer.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ class Im2colLayer : public Layer<Dtype> {
4646
Blob<int> stride_;
4747
/// @brief The spatial dimensions of the padding.
4848
Blob<int> pad_;
49+
/// @brief The spatial dimensions of the dilation.
50+
Blob<int> dilation_;
4951

5052
int num_spatial_axes_;
5153
int bottom_dim_;

‎include/caffe/util/im2col.hpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ template <typename Dtype>
1313
void im2col_cpu(const Dtype* data_im, const int channels,
1414
const int height, const int width, const int kernel_h, const int kernel_w,
1515
const int pad_h, const int pad_w, const int stride_h,
16-
const int stride_w, Dtype* data_col);
16+
const int stride_w, const int dilation_h, const int dilation_w,
17+
Dtype* data_col);
1718

1819
template <typename Dtype>
1920
void col2im_nd_cpu(const Dtype* data_col, const int num_spatial_axes,
@@ -25,7 +26,8 @@ template <typename Dtype>
2526
void col2im_cpu(const Dtype* data_col, const int channels,
2627
const int height, const int width, const int kernel_h, const int kernel_w,
2728
const int pad_h, const int pad_w, const int stride_h,
28-
const int stride_w, Dtype* data_im);
29+
const int stride_w, const int dilation_h, const int dilation_w,
30+
Dtype* data_im);
2931

3032
template <typename Dtype>
3133
void im2col_nd_gpu(const Dtype* data_im, const int num_spatial_axes,
@@ -37,7 +39,8 @@ template <typename Dtype>
3739
void im2col_gpu(const Dtype* data_im, const int channels,
3840
const int height, const int width, const int kernel_h, const int kernel_w,
3941
const int pad_h, const int pad_w, const int stride_h,
40-
const int stride_w, Dtype* data_col);
42+
const int stride_w, const int dilation_h, const int dilation_w,
43+
Dtype* data_col);
4144

4245
template <typename Dtype>
4346
void col2im_nd_gpu(const Dtype* data_col, const int num_spatial_axes,
@@ -49,7 +52,8 @@ template <typename Dtype>
4952
void col2im_gpu(const Dtype* data_col, const int channels,
5053
const int height, const int width, const int kernel_h, const int kernel_w,
5154
const int pad_h, const int pad_w, const int stride_h,
52-
const int stride_w, Dtype* data_im);
55+
const int stride_w, const int dilation_h, const int dilation_w,
56+
Dtype* data_im);
5357

5458
} // namespace caffe
5559

‎src/caffe/layer_factory.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,17 +37,30 @@ namespace caffe {
3737
template <typename Dtype>
3838
shared_ptr<Layer<Dtype> > GetConvolutionLayer(
3939
const LayerParameter& param) {
40-
ConvolutionParameter_Engine engine = param.convolution_param().engine();
40+
ConvolutionParameter conv_param = param.convolution_param();
41+
ConvolutionParameter_Engine engine = conv_param.engine();
42+
bool use_dilation = false;
43+
for (int i = 0; i < conv_param.dilation_size(); ++i) {
44+
if (conv_param.dilation(i) > 1) {
45+
use_dilation = true;
46+
}
47+
}
4148
if (engine == ConvolutionParameter_Engine_DEFAULT) {
4249
engine = ConvolutionParameter_Engine_CAFFE;
4350
#ifdef USE_CUDNN
44-
engine = ConvolutionParameter_Engine_CUDNN;
51+
if (!use_dilation) {
52+
engine = ConvolutionParameter_Engine_CUDNN;
53+
}
4554
#endif
4655
}
4756
if (engine == ConvolutionParameter_Engine_CAFFE) {
4857
return shared_ptr<Layer<Dtype> >(new ConvolutionLayer<Dtype>(param));
4958
#ifdef USE_CUDNN
5059
} else if (engine == ConvolutionParameter_Engine_CUDNN) {
60+
if (use_dilation) {
61+
LOG(FATAL) << "CuDNN doesn't support the dilated convolution at Layer "
62+
<< param.name();
63+
}
5164
return shared_ptr<Layer<Dtype> >(new CuDNNConvolutionLayer<Dtype>(param));
5265
#endif
5366
} else {

‎src/caffe/layers/base_conv_layer.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ void BaseConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
3636
CHECK(num_kernel_dims == 1 || num_kernel_dims == num_spatial_axes_)
3737
<< "kernel_size must be specified once, or once per spatial dimension "
3838
<< "(kernel_size specified " << num_kernel_dims << " times; "
39-
<< num_spatial_axes_ << " spatial dims);";
39+
<< num_spatial_axes_ << " spatial dims).";
4040
for (int i = 0; i < num_spatial_axes_; ++i) {
4141
kernel_shape_data[i] =
4242
conv_param.kernel_size((num_kernel_dims == 1) ? 0 : i);
@@ -61,7 +61,7 @@ void BaseConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
6161
num_stride_dims == num_spatial_axes_)
6262
<< "stride must be specified once, or once per spatial dimension "
6363
<< "(stride specified " << num_stride_dims << " times; "
64-
<< num_spatial_axes_ << " spatial dims);";
64+
<< num_spatial_axes_ << " spatial dims).";
6565
const int kDefaultStride = 1;
6666
for (int i = 0; i < num_spatial_axes_; ++i) {
6767
stride_data[i] = (num_stride_dims == 0) ? kDefaultStride :
@@ -85,13 +85,27 @@ void BaseConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
8585
num_pad_dims == num_spatial_axes_)
8686
<< "pad must be specified once, or once per spatial dimension "
8787
<< "(pad specified " << num_pad_dims << " times; "
88-
<< num_spatial_axes_ << " spatial dims);";
88+
<< num_spatial_axes_ << " spatial dims).";
8989
const int kDefaultPad = 0;
9090
for (int i = 0; i < num_spatial_axes_; ++i) {
9191
pad_data[i] = (num_pad_dims == 0) ? kDefaultPad :
9292
conv_param.pad((num_pad_dims == 1) ? 0 : i);
9393
}
9494
}
95+
// Setup dilation dimensions (dilation_).
96+
dilation_.Reshape(spatial_dim_blob_shape);
97+
int* dilation_data = dilation_.mutable_cpu_data();
98+
const int num_dilation_dims = conv_param.dilation_size();
99+
CHECK(num_dilation_dims == 0 || num_dilation_dims == 1 ||
100+
num_dilation_dims == num_spatial_axes_)
101+
<< "dilation must be specified once, or once per spatial dimension "
102+
<< "(dilation specified " << num_dilation_dims << " times; "
103+
<< num_spatial_axes_ << " spatial dims).";
104+
const int kDefaultDilation = 1;
105+
for (int i = 0; i < num_spatial_axes_; ++i) {
106+
dilation_data[i] = (num_dilation_dims == 0) ? kDefaultDilation :
107+
conv_param.dilation((num_dilation_dims == 1) ? 0 : i);
108+
}
95109
// Special case: im2col is the identity for 1x1 convolution with stride 1
96110
// and no padding, so flag for skipping the buffer and transformation.
97111
is_1x1_ = true;

‎src/caffe/layers/conv_layer.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,13 @@ void ConvolutionLayer<Dtype>::compute_output_shape() {
99
const int* kernel_shape_data = this->kernel_shape_.cpu_data();
1010
const int* stride_data = this->stride_.cpu_data();
1111
const int* pad_data = this->pad_.cpu_data();
12+
const int* dilation_data = this->dilation_.cpu_data();
1213
this->output_shape_.clear();
1314
for (int i = 0; i < this->num_spatial_axes_; ++i) {
1415
// i + 1 to skip channel axis
1516
const int input_dim = this->input_shape(i + 1);
16-
const int output_dim = (input_dim + 2 * pad_data[i] - kernel_shape_data[i])
17+
const int kernel_extent = dilation_data[i] * (kernel_shape_data[i] - 1) + 1;
18+
const int output_dim = (input_dim + 2 * pad_data[i] - kernel_extent)
1719
/ stride_data[i] + 1;
1820
this->output_shape_.push_back(output_dim);
1921
}

‎src/caffe/layers/im2col_layer.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,20 @@ void Im2colLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
8787
conv_param.pad((num_pad_dims == 1) ? 0 : i);
8888
}
8989
}
90+
// Setup dilation dimensions (dilation_).
91+
dilation_.Reshape(dim_blob_shape);
92+
int* dilation_data = dilation_.mutable_cpu_data();
93+
const int num_dilation_dims = conv_param.dilation_size();
94+
CHECK(num_dilation_dims == 0 || num_dilation_dims == 1 ||
95+
num_dilation_dims == num_spatial_axes_)
96+
<< "dilation must be specified once, or once per spatial dimension "
97+
<< "(dilation specified " << num_dilation_dims << " times; "
98+
<< num_spatial_axes_ << " spatial dims).";
99+
const int kDefaultDilation = 1;
100+
for (int i = 0; i < num_spatial_axes_; ++i) {
101+
dilation_data[i] = (num_dilation_dims == 0) ? kDefaultDilation :
102+
conv_param.dilation((num_dilation_dims == 1) ? 0 : i);
103+
}
90104
}
91105

92106
template <typename Dtype>
@@ -96,10 +110,12 @@ void Im2colLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
96110
const int* kernel_shape_data = kernel_shape_.cpu_data();
97111
const int* stride_data = stride_.cpu_data();
98112
const int* pad_data = pad_.cpu_data();
113+
const int* dilation_data = dilation_.cpu_data();
99114
for (int i = 0; i < num_spatial_axes_; ++i) {
100115
top_shape[channel_axis_] *= kernel_shape_data[i];
101116
const int input_dim = bottom[0]->shape(channel_axis_ + i + 1);
102-
const int output_dim = (input_dim + 2 * pad_data[i] - kernel_shape_data[i])
117+
const int kernel_extent = dilation_data[i] * (kernel_shape_data[i] - 1) + 1;
118+
const int output_dim = (input_dim + 2 * pad_data[i] - kernel_extent)
103119
/ stride_data[i] + 1;
104120
top_shape[channel_axis_ + i + 1] = output_dim;
105121
}
@@ -122,13 +138,15 @@ void Im2colLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
122138
DCHECK_EQ(kernel_shape_.count(), num_spatial_axes_);
123139
DCHECK_EQ(pad_.count(), num_spatial_axes_);
124140
DCHECK_EQ(stride_.count(), num_spatial_axes_);
141+
DCHECK_EQ(dilation_.count(), num_spatial_axes_);
125142
if (!force_nd_im2col_ && num_spatial_axes_ == 2) {
126143
im2col_cpu(bottom_data + n * bottom_dim_, channels_,
127144
bottom[0]->shape(channel_axis_ + 1),
128145
bottom[0]->shape(channel_axis_ + 2),
129146
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
130147
pad_.cpu_data()[0], pad_.cpu_data()[1],
131148
stride_.cpu_data()[0], stride_.cpu_data()[1],
149+
dilation_.cpu_data()[0], dilation_.cpu_data()[1],
132150
top_data + n * top_dim_);
133151
} else {
134152
im2col_nd_cpu(bottom_data + n * bottom_dim_, num_spatial_axes_,
@@ -153,6 +171,7 @@ void Im2colLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
153171
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
154172
pad_.cpu_data()[0], pad_.cpu_data()[1],
155173
stride_.cpu_data()[0], stride_.cpu_data()[1],
174+
dilation_.cpu_data()[0], dilation_.cpu_data()[1],
156175
bottom_diff + n * bottom_dim_);
157176
} else {
158177
col2im_nd_cpu(top_diff + n * top_dim_, num_spatial_axes_,

‎src/caffe/layers/im2col_layer.cu

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ void Im2colLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
1919
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
2020
pad_.cpu_data()[0], pad_.cpu_data()[1],
2121
stride_.cpu_data()[0], stride_.cpu_data()[1],
22+
dilation_.cpu_data()[0], dilation_.cpu_data()[1],
2223
top_data + n * top_dim_);
2324
} else {
2425
im2col_nd_gpu(bottom_data + n * bottom_dim_, num_spatial_axes_,
@@ -43,6 +44,7 @@ void Im2colLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
4344
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
4445
pad_.cpu_data()[0], pad_.cpu_data()[1],
4546
stride_.cpu_data()[0], stride_.cpu_data()[1],
47+
dilation_.cpu_data()[0], dilation_.cpu_data()[1],
4648
bottom_diff + n * bottom_dim_);
4749
} else {
4850
col2im_nd_gpu(top_diff + n * top_dim_, num_spatial_axes_, bottom_dim_,

‎src/caffe/proto/caffe.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,7 @@ message ConvolutionParameter {
518518
repeated uint32 pad = 3; // The padding size; defaults to 0
519519
repeated uint32 kernel_size = 4; // The kernel size
520520
repeated uint32 stride = 6; // The stride; defaults to 1
521+
repeated uint32 dilation = 18; // The dilation; defaults to 1
521522

522523
// For 2D convolution only, the *_h and *_w versions may also be used to
523524
// specify both spatial dimensions.

‎src/caffe/test/test_convolution_layer.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,17 @@ void caffe_conv(const Blob<Dtype>* in, ConvolutionParameter* conv_param,
4646
} else {
4747
stride_h = stride_w = conv_param->stride_size() ? conv_param->stride(0) : 1;
4848
}
49-
int kernel_d, pad_d, stride_d;
49+
int dilation_h, dilation_w;
50+
dilation_h = dilation_w = conv_param->dilation_size() ?
51+
conv_param->dilation(0) : 1;
52+
int kernel_d, pad_d, stride_d, dilation_d;
5053
if (has_depth) {
5154
kernel_d = kernel_h;
5255
stride_d = stride_h;
5356
pad_d = pad_h;
57+
dilation_d = dilation_h;
5458
} else {
55-
kernel_d = stride_d = 1;
59+
kernel_d = stride_d = dilation_d = 1;
5660
pad_d = 0;
5761
}
5862
// Groups
@@ -77,9 +81,9 @@ void caffe_conv(const Blob<Dtype>* in, ConvolutionParameter* conv_param,
7781
for (int r = 0; r < kernel_d; r++) {
7882
for (int p = 0; p < kernel_h; p++) {
7983
for (int q = 0; q < kernel_w; q++) {
80-
int in_z = z * stride_d - pad_d + r;
81-
int in_y = y * stride_h - pad_h + p;
82-
int in_x = x * stride_w - pad_w + q;
84+
int in_z = z * stride_d - pad_d + r * dilation_d;
85+
int in_y = y * stride_h - pad_h + p * dilation_h;
86+
int in_x = x * stride_w - pad_w + q * dilation_w;
8387
if (in_z >= 0 && in_z < (has_depth ? in->shape(2) : 1)
8488
&& in_y >= 0 && in_y < in->shape(2 + has_depth)
8589
&& in_x >= 0 && in_x < in->shape(3 + has_depth)) {

‎src/caffe/test/test_im2col_kernel.cu

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ __global__ void im2col_gpu_kernel(const int n, const Dtype* data_im,
1818
const int height, const int width, const int kernel_h, const int kernel_w,
1919
const int pad_h, const int pad_w,
2020
const int stride_h, const int stride_w,
21+
const int dilation_h, const int dilation_w,
2122
const int height_col, const int width_col,
2223
Dtype* data_col);
2324

@@ -38,6 +39,7 @@ class Im2colKernelTest : public GPUDeviceTest<Dtype> {
3839
blob_kernel_shape_(new Blob<int>()),
3940
blob_stride_(new Blob<int>()),
4041
blob_pad_(new Blob<int>()),
42+
blob_dilation_(new Blob<int>()),
4143
blob_top_(new Blob<Dtype>()),
4244
blob_top_cpu_(new Blob<Dtype>()) {
4345
FillerParameter filler_param;
@@ -47,20 +49,25 @@ class Im2colKernelTest : public GPUDeviceTest<Dtype> {
4749
blob_kernel_shape_->Reshape(dim_blob_shape);
4850
blob_stride_->Reshape(dim_blob_shape);
4951
blob_pad_->Reshape(dim_blob_shape);
52+
blob_dilation_->Reshape(dim_blob_shape);
5053

5154
height_ = blob_bottom_->height();
5255
width_ = blob_bottom_->width();
5356
channels_ = blob_bottom_->channels();
5457
pad_ = 0;
5558
stride_ = 2;
59+
dilation_ = 1;
5660
kernel_size_ = 3;
57-
height_col_ = (height_ + 2 * pad_ - kernel_size_) / stride_ + 1;
58-
width_col_ = (width_ + 2 * pad_ - kernel_size_) / stride_ + 1;
61+
height_col_ = (height_ + 2 * pad_ -
62+
(dilation_ * (kernel_size_ - 1) + 1)) / stride_ + 1;
63+
width_col_ = (width_ + 2 * pad_ -
64+
(dilation_ * (kernel_size_ - 1) + 1)) / stride_ + 1;
5965

6066
for (int i = 0; i < 2; ++i) {
6167
blob_kernel_shape_->mutable_cpu_data()[i] = kernel_size_;
6268
blob_stride_->mutable_cpu_data()[i] = stride_;
6369
blob_pad_->mutable_cpu_data()[i] = pad_;
70+
blob_dilation_->mutable_cpu_data()[i] = dilation_;
6471
}
6572
}
6673

@@ -71,11 +78,13 @@ class Im2colKernelTest : public GPUDeviceTest<Dtype> {
7178
delete blob_kernel_shape_;
7279
delete blob_stride_;
7380
delete blob_pad_;
81+
delete blob_dilation_;
7482
}
7583

7684
Blob<int>* const blob_kernel_shape_;
7785
Blob<int>* const blob_stride_;
7886
Blob<int>* const blob_pad_;
87+
Blob<int>* const blob_dilation_;
7988
Blob<Dtype>* const blob_bottom_;
8089
Blob<Dtype>* const blob_top_;
8190
Blob<Dtype>* const blob_top_cpu_;
@@ -84,6 +93,7 @@ class Im2colKernelTest : public GPUDeviceTest<Dtype> {
8493
int channels_;
8594
int pad_;
8695
int stride_;
96+
int dilation_;
8797
int kernel_size_;
8898
int height_col_;
8999
int width_col_;
@@ -112,7 +122,7 @@ TYPED_TEST(Im2colKernelTest, Test2D) {
112122
im2col_cpu(this->blob_bottom_->cpu_data() + this->blob_bottom_->offset(n),
113123
this->channels_, this->height_, this->width_,
114124
this->kernel_size_, this->kernel_size_, this->pad_, this->pad_,
115-
this->stride_, this->stride_,
125+
this->stride_, this->stride_, this->dilation_, this->dilation_,
116126
cpu_data + this->blob_top_cpu_->offset(n));
117127
}
118128

@@ -129,6 +139,7 @@ TYPED_TEST(Im2colKernelTest, Test2D) {
129139
num_kernels, bottom_data + this->blob_bottom_->offset(n),
130140
this->height_, this->width_, this->kernel_size_, this->kernel_size_,
131141
this->pad_, this->pad_, this->stride_, this->stride_,
142+
this->dilation_, this->dilation_,
132143
this->height_col_, this->width_col_,
133144
top_data + this->blob_top_->offset(n));
134145
CUDA_POST_KERNEL_CHECK;

‎src/caffe/test/test_im2col_layer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ class Im2colLayerTest : public MultiDeviceTest<TypeParam> {
1717
typedef typename TypeParam::Dtype Dtype;
1818
protected:
1919
Im2colLayerTest()
20-
: blob_bottom_(new Blob<Dtype>(2, 3, 6, 5)),
20+
: blob_bottom_(new Blob<Dtype>(2, 3, 10, 9)),
2121
blob_top_(new Blob<Dtype>()) {
2222
// fill the values
2323
Caffe::set_random_seed(1701);
@@ -75,6 +75,7 @@ TYPED_TEST(Im2colLayerTest, TestGradient) {
7575
layer_param.mutable_convolution_param();
7676
convolution_param->add_kernel_size(3);
7777
convolution_param->add_stride(2);
78+
convolution_param->add_dilation(3);
7879
Im2colLayer<Dtype> layer(layer_param);
7980
GradientChecker<Dtype> checker(1e-2, 1e-2);
8081
checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,

‎src/caffe/util/im2col.cpp

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,21 @@ void im2col_cpu(const Dtype* data_im, const int channels,
1010
const int height, const int width, const int kernel_h, const int kernel_w,
1111
const int pad_h, const int pad_w,
1212
const int stride_h, const int stride_w,
13+
const int dilation_h, const int dilation_w,
1314
Dtype* data_col) {
14-
const int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
15-
const int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
15+
const int height_col = (height + 2 * pad_h -
16+
(dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
17+
const int width_col = (width + 2 * pad_w -
18+
(dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
1619
const int channels_col = channels * kernel_h * kernel_w;
1720
for (int c_col = 0; c_col < channels_col; ++c_col) {
1821
int w_offset = c_col % kernel_w;
1922
int h_offset = (c_col / kernel_w) % kernel_h;
2023
int c_im = c_col / kernel_h / kernel_w;
2124
for (int h_col = 0; h_col < height_col; ++h_col) {
2225
for (int w_col = 0; w_col < width_col; ++w_col) {
23-
int h_im = h_col * stride_h - pad_h + h_offset;
24-
int w_im = w_col * stride_w - pad_w + w_offset;
26+
int h_im = h_col * stride_h - pad_h + h_offset * dilation_h;
27+
int w_im = w_col * stride_w - pad_w + w_offset * dilation_w;
2528
data_col[(c_col * height_col + h_col) * width_col + w_col] =
2629
(h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ?
2730
data_im[(c_im * height + h_im) * width + w_im] : 0;
@@ -34,11 +37,13 @@ void im2col_cpu(const Dtype* data_im, const int channels,
3437
template void im2col_cpu<float>(const float* data_im, const int channels,
3538
const int height, const int width, const int kernel_h, const int kernel_w,
3639
const int pad_h, const int pad_w, const int stride_h,
37-
const int stride_w, float* data_col);
40+
const int stride_w, const int dilation_h, const int dilation_w,
41+
float* data_col);
3842
template void im2col_cpu<double>(const double* data_im, const int channels,
3943
const int height, const int width, const int kernel_h, const int kernel_w,
4044
const int pad_h, const int pad_w, const int stride_h,
41-
const int stride_w, double* data_col);
45+
const int stride_w, const int dilation_h, const int dilation_w,
46+
double* data_col);
4247

4348
template <typename Dtype>
4449
inline void im2col_nd_core_cpu(const Dtype* data_input, const bool im2col,
@@ -137,19 +142,22 @@ void col2im_cpu(const Dtype* data_col, const int channels,
137142
const int height, const int width, const int kernel_h, const int kernel_w,
138143
const int pad_h, const int pad_w,
139144
const int stride_h, const int stride_w,
145+
const int dilation_h, const int dilation_w,
140146
Dtype* data_im) {
141147
caffe_set(height * width * channels, Dtype(0), data_im);
142-
const int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
143-
const int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
148+
const int height_col = (height + 2 * pad_h -
149+
(dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
150+
const int width_col = (width + 2 * pad_w -
151+
(dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
144152
const int channels_col = channels * kernel_h * kernel_w;
145153
for (int c_col = 0; c_col < channels_col; ++c_col) {
146154
int w_offset = c_col % kernel_w;
147155
int h_offset = (c_col / kernel_w) % kernel_h;
148156
int c_im = c_col / kernel_h / kernel_w;
149157
for (int h_col = 0; h_col < height_col; ++h_col) {
150158
for (int w_col = 0; w_col < width_col; ++w_col) {
151-
int h_im = h_col * stride_h - pad_h + h_offset;
152-
int w_im = w_col * stride_w - pad_w + w_offset;
159+
int h_im = h_col * stride_h - pad_h + h_offset * dilation_h;
160+
int w_im = w_col * stride_w - pad_w + w_offset * dilation_w;
153161
if (h_im >= 0 && h_im < height && w_im >= 0 && w_im < width)
154162
data_im[(c_im * height + h_im) * width + w_im] +=
155163
data_col[(c_col * height_col + h_col) * width_col + w_col];
@@ -162,11 +170,13 @@ void col2im_cpu(const Dtype* data_col, const int channels,
162170
template void col2im_cpu<float>(const float* data_col, const int channels,
163171
const int height, const int width, const int kernel_h, const int kernel_w,
164172
const int pad_h, const int pad_w, const int stride_h,
165-
const int stride_w, float* data_im);
173+
const int stride_w, const int dilation_h, const int dilation_w,
174+
float* data_im);
166175
template void col2im_cpu<double>(const double* data_col, const int channels,
167176
const int height, const int width, const int kernel_h, const int kernel_w,
168177
const int pad_h, const int pad_w, const int stride_h,
169-
const int stride_w, double* data_im);
178+
const int stride_w, const int dilation_h, const int dilation_w,
179+
double* data_im);
170180

171181
template <typename Dtype>
172182
void col2im_nd_cpu(const Dtype* data_col, const int num_spatial_axes,

‎src/caffe/util/im2col.cu

Lines changed: 42 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ __global__ void im2col_gpu_kernel(const int n, const Dtype* data_im,
1010
const int height, const int width, const int kernel_h, const int kernel_w,
1111
const int pad_h, const int pad_w,
1212
const int stride_h, const int stride_w,
13+
const int dilation_h, const int dilation_w,
1314
const int height_col, const int width_col,
1415
Dtype* data_col) {
1516
CUDA_KERNEL_LOOP(index, n) {
@@ -26,11 +27,11 @@ __global__ void im2col_gpu_kernel(const int n, const Dtype* data_im,
2627
data_im_ptr += (c_im * height + h_offset) * width + w_offset;
2728
for (int i = 0; i < kernel_h; ++i) {
2829
for (int j = 0; j < kernel_w; ++j) {
29-
int h_im = h_offset + i;
30-
int w_im = w_offset + j;
30+
int h_im = h_offset + i * dilation_h;
31+
int w_im = w_offset + j * dilation_w;
3132
*data_col_ptr =
3233
(h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ?
33-
data_im_ptr[i * width + j] : 0;
34+
data_im_ptr[i * dilation_h * width + j * dilation_w] : 0;
3435
data_col_ptr += height_col * width_col;
3536
}
3637
}
@@ -42,17 +43,20 @@ void im2col_gpu(const Dtype* data_im, const int channels,
4243
const int height, const int width, const int kernel_h, const int kernel_w,
4344
const int pad_h, const int pad_w,
4445
const int stride_h, const int stride_w,
46+
const int dilation_h, const int dilation_w,
4547
Dtype* data_col) {
4648
// We are going to launch channels * height_col * width_col kernels, each
4749
// kernel responsible for copying a single-channel grid.
48-
int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
49-
int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
50+
int height_col = (height + 2 * pad_h -
51+
(dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
52+
int width_col = (width + 2 * pad_w -
53+
(dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
5054
int num_kernels = channels * height_col * width_col;
5155
// NOLINT_NEXT_LINE(whitespace/operators)
5256
im2col_gpu_kernel<Dtype><<<CAFFE_GET_BLOCKS(num_kernels),
5357
CAFFE_CUDA_NUM_THREADS>>>(
5458
num_kernels, data_im, height, width, kernel_h, kernel_w, pad_h,
55-
pad_w, stride_h, stride_w, height_col,
59+
pad_w, stride_h, stride_w, dilation_h, dilation_w, height_col,
5660
width_col, data_col);
5761
CUDA_POST_KERNEL_CHECK;
5862
}
@@ -61,11 +65,11 @@ void im2col_gpu(const Dtype* data_im, const int channels,
6165
template void im2col_gpu<float>(const float* data_im, const int channels,
6266
const int height, const int width, const int kernel_h, const int kernel_w,
6367
const int pad_h, const int pad_w, const int stride_h, const int stride_w,
64-
float* data_col);
68+
const int dilation_h, const int dilation_w, float* data_col);
6569
template void im2col_gpu<double>(const double* data_im, const int channels,
6670
const int height, const int width, const int kernel_h, const int kernel_w,
6771
const int pad_h, const int pad_w, const int stride_h, const int stride_w,
68-
double* data_col);
72+
const int dilation_h, const int dilation_w, double* data_col);
6973

7074
template <typename Dtype, int num_axes>
7175
__global__ void im2col_nd_gpu_kernel(const int n, const Dtype* data_im,
@@ -223,40 +227,35 @@ __global__ void col2im_gpu_kernel(const int n, const Dtype* data_col,
223227
const int kernel_h, const int kernel_w,
224228
const int pad_h, const int pad_w,
225229
const int stride_h, const int stride_w,
230+
const int dilation_h, const int dilation_w,
226231
const int height_col, const int width_col,
227232
Dtype* data_im) {
228233
CUDA_KERNEL_LOOP(index, n) {
229234
Dtype val = 0;
230235
const int w_im = index % width + pad_w;
231236
const int h_im = (index / width) % height + pad_h;
232237
const int c_im = index / (width * height);
238+
int kernel_extent_w = (kernel_w - 1) * dilation_w + 1;
239+
int kernel_extent_h = (kernel_h - 1) * dilation_h + 1;
233240
// compute the start and end of the output
234241
const int w_col_start =
235-
(w_im < kernel_w) ? 0 : (w_im - kernel_w) / stride_w + 1;
236-
const int w_col_end =
237-
min(w_im / stride_w + 1, width_col);
242+
(w_im < kernel_extent_w) ? 0 : (w_im - kernel_extent_w) / stride_w + 1;
243+
const int w_col_end = min(w_im / stride_w + 1, width_col);
238244
const int h_col_start =
239-
(h_im < kernel_h) ? 0 : (h_im - kernel_h) / stride_h + 1;
240-
const int h_col_end =
241-
min(h_im / stride_h + 1, height_col);
242-
/*
243-
for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
244-
for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
245-
// the col location: [c * width * height + h_out, w_out]
246-
int c_col = c_im * kernel_h * kernel_w
247-
+ (h_im - h_col * stride_h) * kernel_w + (w_im - w_col * stride_w);
248-
val += data_col[(c_col * height_col + h_col) * width_col + w_col];
249-
}
250-
}
251-
*/
252-
// equivalent implementation
253-
int offset = (c_im * kernel_h * kernel_w + h_im * kernel_w + w_im)
254-
* height_col * width_col;
255-
int coeff_h_col = (1 - stride_h * kernel_w * height_col) * width_col;
256-
int coeff_w_col = (1 - stride_w * height_col * width_col);
257-
for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
258-
for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
259-
val += data_col[offset + h_col * coeff_h_col + w_col * coeff_w_col];
245+
(h_im < kernel_extent_h) ? 0 : (h_im - kernel_extent_h) / stride_h + 1;
246+
const int h_col_end = min(h_im / stride_h + 1, height_col);
247+
// TODO: use LCM of stride and dilation to avoid unnecessary loops
248+
for (int h_col = h_col_start; h_col < h_col_end; h_col += 1) {
249+
for (int w_col = w_col_start; w_col < w_col_end; w_col += 1) {
250+
int h_k = (h_im - h_col * stride_h);
251+
int w_k = (w_im - w_col * stride_w);
252+
if (h_k % dilation_h == 0 && w_k % dilation_w == 0) {
253+
h_k /= dilation_h;
254+
w_k /= dilation_w;
255+
int data_col_index = (((c_im * kernel_h + h_k) * kernel_w + w_k) *
256+
height_col + h_col) * width_col + w_col;
257+
val += data_col[data_col_index];
258+
}
260259
}
261260
}
262261
data_im[index] = val;
@@ -267,17 +266,20 @@ template <typename Dtype>
267266
void col2im_gpu(const Dtype* data_col, const int channels,
268267
const int height, const int width, const int kernel_h, const int kernel_w,
269268
const int pad_h, const int pad_w, const int stride_h,
270-
const int stride_w, Dtype* data_im) {
271-
int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
272-
int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
269+
const int stride_w, const int dilation_h, const int dilation_w,
270+
Dtype* data_im) {
271+
int height_col = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) /
272+
stride_h + 1;
273+
int width_col = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) /
274+
stride_w + 1;
273275
int num_kernels = channels * height * width;
274276
// To avoid involving atomic operations, we will launch one kernel per
275277
// bottom dimension, and then in the kernel add up the top dimensions.
276278
// NOLINT_NEXT_LINE(whitespace/operators)
277279
col2im_gpu_kernel<Dtype><<<CAFFE_GET_BLOCKS(num_kernels),
278280
CAFFE_CUDA_NUM_THREADS>>>(
279281
num_kernels, data_col, height, width, channels, kernel_h, kernel_w,
280-
pad_h, pad_w, stride_h, stride_w,
282+
pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
281283
height_col, width_col, data_im);
282284
CUDA_POST_KERNEL_CHECK;
283285
}
@@ -286,11 +288,13 @@ void col2im_gpu(const Dtype* data_col, const int channels,
286288
template void col2im_gpu<float>(const float* data_col, const int channels,
287289
const int height, const int width, const int kernel_h, const int kernel_w,
288290
const int pad_h, const int pad_w, const int stride_h,
289-
const int stride_w, float* data_im);
291+
const int stride_w, const int dilation_h, const int dilation_w,
292+
float* data_im);
290293
template void col2im_gpu<double>(const double* data_col, const int channels,
291294
const int height, const int width, const int kernel_h, const int kernel_w,
292295
const int pad_h, const int pad_w, const int stride_h,
293-
const int stride_w, double* data_im);
296+
const int stride_w, const int dilation_h, const int dilation_w,
297+
double* data_im);
294298

295299
template <typename Dtype, int num_axes>
296300
__global__ void col2im_nd_gpu_kernel(const int n, const Dtype* data_col,

0 commit comments

Comments
 (0)
Please sign in to comment.