Source code for npdl.layers.pooling

# -*- coding: utf-8 -*-

import numpy as np

from .base import Layer
from ..initializations import _zero


[docs]class MeanPooling(Layer): """Average pooling operation for spatial data. Parameters ---------- pool_size : tuple of 2 integers, factors by which to downscale (vertical, horizontal). (2, 2) will halve the image in each dimension. Returns ------- 4D numpy.array with shape `(nb_samples, channels, pooled_rows, pooled_cols)` if dim_ordering='th' or 4D tensor with shape: `(samples, pooled_rows, pooled_cols, channels)` if dim_ordering='tf'. """ def __init__(self, pool_size): self.pool_size = pool_size self.out_shape = 0 self.out_shape = None self.input_shape = None
[docs] def connect_to(self, prev_layer): assert 5 > len(prev_layer.out_shape) >= 3 old_h, old_w = prev_layer.out_shape[-2:] pool_h, pool_w = self.pool_size new_h, new_w = old_h // pool_h, old_w // pool_w assert old_h % pool_h == old_w % pool_w == 0 self.out_shape = prev_layer.out_shape[:-2] + (new_h, new_w)
[docs] def forward(self, input, *args, **kwargs): # shape self.input_shape = input.shape pool_h, pool_w = self.pool_size new_h, new_w = self.out_shape[-2:] # forward outputs = _zero(self.input_shape[:-2] + self.out_shape[-2:]) if np.ndim(input) == 4: nb_batch, nb_axis, _, _ = input.shape for a in np.arange(nb_batch): for b in np.arange(nb_axis): for h in np.arange(new_h): for w in np.arange(new_w): outputs[a, b, h, w] = np.mean(input[a, b, h:h + pool_h, w:w + pool_w]) elif np.ndim(input) == 3: nb_batch, _, _ = input.shape for a in np.arange(nb_batch): for h in np.arange(new_h): for w in np.arange(new_w): outputs[a, h, w] = np.mean(input[a, h:h + pool_h, w:w + pool_w]) else: raise ValueError() return outputs
[docs] def backward(self, pre_grad, *args, **kwargs): new_h, new_w = self.out_shape[-2:] pool_h, pool_w = self.pool_size length = np.prod(self.pool_size) layer_grads = _zero(self.input_shape) if np.ndim(pre_grad) == 4: nb_batch, nb_axis, _, _ = pre_grad.shape for a in np.arange(nb_batch): for b in np.arange(nb_axis): for h in np.arange(new_h): for w in np.arange(new_w): h_shift, w_shift = h * pool_h, w * pool_w layer_grads[a, b, h_shift: h_shift + pool_h, w_shift: w_shift + pool_w] = \ pre_grad[a, b, h, w] / length elif np.ndim(pre_grad) == 3: nb_batch, _, _ = pre_grad.shape for a in np.arange(nb_batch): for h in np.arange(new_h): for w in np.arange(new_w): h_shift, w_shift = h * pool_h, w * pool_w layer_grads[a, h_shift: h_shift + pool_h, w_shift: w_shift + pool_w] = \ pre_grad[a, h, w] / length else: raise ValueError() return layer_grads
[docs]class MaxPooling(Layer): """Max pooling operation for spatial data. Parameters ---------- pool_size : tuple of 2 integers, factors by which to downscale (vertical, horizontal). (2, 2) will halve the image in each dimension. Returns ------- 4D numpy.array with shape `(nb_samples, channels, pooled_rows, pooled_cols)` if dim_ordering='th' or 4D tensor with shape: `(samples, pooled_rows, pooled_cols, channels)` if dim_ordering='tf'. """ def __init__(self, pool_size): self.pool_size = pool_size self.input_shape = None self.out_shape = None self.last_input = None
[docs] def connect_to(self, prev_layer): # prev_layer.out_shape: (nb_batch, ..., height, width) assert len(prev_layer.out_shape) >= 3 old_h, old_w = prev_layer.out_shape[-2:] pool_h, pool_w = self.pool_size new_h, new_w = old_h // pool_h, old_w // pool_w assert old_h % pool_h == old_w % pool_w == 0 self.out_shape = prev_layer.out_shape[:-2] + (new_h, new_w)
[docs] def forward(self, input, *args, **kwargs): # shape self.input_shape = input.shape pool_h, pool_w = self.pool_size new_h, new_w = self.out_shape[-2:] # forward self.last_input = input outputs = _zero(self.input_shape[:-2] + self.out_shape[-2:]) if np.ndim(input) == 4: nb_batch, nb_axis, _, _ = input.shape for a in np.arange(nb_batch): for b in np.arange(nb_axis): for h in np.arange(new_h): for w in np.arange(new_w): outputs[a, b, h, w] = np.max(input[a, b, h:h + pool_h, w:w + pool_w]) elif np.ndim(input) == 3: nb_batch, _, _ = input.shape for a in np.arange(nb_batch): for h in np.arange(new_h): for w in np.arange(new_w): outputs[a, h, w] = np.max(input[a, h:h + pool_h, w:w + pool_w]) else: raise ValueError() return outputs
[docs] def backward(self, pre_grad, *args, **kwargs): new_h, new_w = self.out_shape[-2:] pool_h, pool_w = self.pool_size layer_grads = _zero(self.input_shape) if np.ndim(pre_grad) == 4: nb_batch, nb_axis, _, _ = pre_grad.shape for a in np.arange(nb_batch): for b in np.arange(nb_axis): for h in np.arange(new_h): for w in np.arange(new_w): patch = self.last_input[a, b, h:h + pool_h, w:w + pool_w] max_idx = np.unravel_index(patch.argmax(), patch.shape) h_shift, w_shift = h * pool_h + max_idx[0], w * pool_w + max_idx[1] layer_grads[a, b, h_shift, w_shift] = pre_grad[a, b, a, w] elif np.ndim(pre_grad) == 3: nb_batch, _, _ = pre_grad.shape for a in np.arange(nb_batch): for h in np.arange(new_h): for w in np.arange(new_w): patch = self.last_input[a, h:h + pool_h, w:w + pool_w] max_idx = np.unravel_index(patch.argmax(), patch.shape) h_shift, w_shift = h * pool_h + max_idx[0], w * pool_w + max_idx[1] layer_grads[a, h_shift, w_shift] = pre_grad[a, a, w] else: raise ValueError() return layer_grads