Module kerod.core.learning_rate_schedule

Manual Stepping is designed to integrate the computation graph and compute the learning_rate at

each step.

However, the WarmupLearningRateScheduler is Callback handle by the fit in keras.

View Source

"""

Manual Stepping is designed to integrate the computation graph and compute the learning_rate at

each step.

However, the WarmupLearningRateScheduler is Callback handle by the fit in keras.

"""

from typing import List

import tensorflow as tf

from tensorflow.keras import backend as K

from tensorflow.keras.optimizers.schedules import LearningRateSchedule

from tensorflow.python.keras.callbacks import Callback

class LearningRateScheduler(Callback):

    """Warmup Learning rate scheduler. It will perform at the beginning of the training

    a linear warmup from `init_lr` to `base_lr`. The learning rate is decreased by 10 according

    to the schedule provided by `epochs`.

    Arguments:

    - *base_lr*: The target learning rate value after the linear warmup

    - *num_gpus*: Number of gpus used during the training

    - *epochs*: A list of epoch on which the learning rate should be reduce.

    - *use_warmup*: Perform the warmup strategy.

    - *init_lr*: Learning rate value from which the warmup will start.

    - *num_warmup_steps*: Number of training step on which the warmup will be performed.

    """

    def __init__(self,

                 base_lr: float,

                 num_gpus: int,

                 epochs: List[int],

                 use_warmup: bool = True,

                 init_lr: float = 1e-2 / 3,

                 num_warmup_steps: int = 1000):

        super().__init__()

        self._init_lr = init_lr * min(8 / num_gpus, 1)

        self.slope = (base_lr - self._init_lr) / num_warmup_steps

        self._epochs_to_lr = {epoch: base_lr * 1 / 10**(i + 1) for i, epoch in enumerate(epochs)}

        self._epochs = epochs

        self._num_gpus = num_gpus

        self._use_warmup = use_warmup

        self._num_warmup_steps = num_warmup_steps

    def on_train_batch_begin(self, batch, logs=None):

        global_step = K.get_value((self.model.optimizer.iterations))

        if global_step <= self._num_warmup_steps and global_step != 0 and self._use_warmup:

            lr = self._init_lr + global_step * self.slope

            K.set_value(self.model.optimizer.lr, lr)

    def on_epoch_begin(self, epoch, logs=None):

        if not hasattr(self.model.optimizer, 'lr'):

            raise ValueError('Optimizer must have a "lr" attribute.')

        if not hasattr(self.model.optimizer, 'iterations'):

            raise ValueError('Optimizer must have an "iterations" attribute.')

        global_step = K.get_value(self.model.optimizer.iterations)

        target_epochs = [

            e for e in self._epochs if epoch >= e and global_step > self._num_warmup_steps

        ]

        if target_epochs:

            lr = self._epochs_to_lr[max(target_epochs)]

            K.set_value(self.model.optimizer.lr, lr)

class ManualStepping(LearningRateSchedule):

    """Manually stepped learning rate schedule. (Taken and modified from Google object detection)

    This function provides fine grained control over learning rates.  One must

    specify a sequence of learning rates as well as a set of integer steps

    at which the current learning rate must transition to the next.  For example,

    if boundaries = [5, 10] and rates = [.1, .01, .001], then the learning

    rate returned by this function is .1 for step=0,...,4, .01 for

    step=5...9, and .001 for step=10 and onward.

    You can pass this schedule directly into a `tf.keras.optimizers.Optimizer`

    as the learning rate.

    ```python

    lr_schedule = tf.keras.optimizers.schedules.ManualStepping(

        boundaries=[5, 10],

        rates=[.1, .01, .001],

        warmup=True)

    model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=lr_schedule),

                loss='sparse_categorical_crossentropy',

                metrics=['accuracy'])

    model.fit(data, labels, epochs=5)

    ```

    The learning rate schedule is also serializable and deserializable using

    `tf.keras.optimizers.schedules.serialize` and

    `tf.keras.optimizers.schedules.deserialize`.

    Arguments:

    - *boundaries*: A List of scalar `int32` or `int64` or a `Tensor`. It is a

    list of global steps at which to switch learning

    rates.  This list is assumed to consist of increasing positive integers.

    - *rates*: a list of (float) learning rates corresponding to intervals between

    the boundaries.  The length of this list must be exactly

    len(boundaries) + 1.

    - *warmup*: Whether to linearly interpolate learning rate for steps in

    [0, boundaries[0]].

    - *name*: String.  Optional name of the operation.  Defaults to

        'ExponentialDecay'.

    Return:

    A 1-arg callable learning rate schedule that takes the current optimizer

    step and outputs the decayed learning rate, a scalar `Tensor` of the same

    type as `rates`.

    """

    def __init__(self, boundaries, rates, warmup=False, name=None):

        super().__init__()

        self.name = name

        if warmup and boundaries:

            slope = (rates[1] - rates[0]) * 1.0 / boundaries[0]

            warmup_steps = list(range(boundaries[0]))

            warmup_rates = [rates[0] + slope * step for step in warmup_steps]

            boundaries = warmup_steps + boundaries

            rates = warmup_rates + rates[1:]

        else:

            boundaries = [0] + boundaries

        self.warmup = warmup

        self.rates = rates

        self.boundaries = boundaries

        self.num_boundaries = len(boundaries)

        self.dtype = tf.convert_to_tensor(rates[0]).dtype

    def __call__(self, step):

        with tf.name_scope(self.name or "ManualStepping"):

            boundaries = tf.convert_to_tensor(self.boundaries, self.dtype)

            rates = tf.convert_to_tensor(self.rates, self.dtype)

            step = tf.convert_to_tensor(step, self.dtype)

            rate_index = tf.reduce_max(

                tf.where(tf.greater_equal(step, boundaries), list(range(self.num_boundaries)),

                         [0] * self.num_boundaries))

            return tf.reduce_sum(rates * tf.one_hot(rate_index, depth=self.num_boundaries))

    def get_config(self):

        return {

            "boundaries": self.boundaries,

            "rates": self.rates,

            "warmup": self.warmup,

            "name": self.name

        }

Classes

LearningRateScheduler

class LearningRateScheduler(
    base_lr: float,
    num_gpus: int,
    epochs: List[int],
    use_warmup: bool = True,
    init_lr: float = 0.0033333333333333335,
    num_warmup_steps: int = 1000
)

a linear warmup from init_lr to base_lr. The learning rate is decreased by 10 according to the schedule provided by epochs.

Arguments:

base_lr: The target learning rate value after the linear warmup
num_gpus: Number of gpus used during the training
epochs: A list of epoch on which the learning rate should be reduce.
use_warmup: Perform the warmup strategy.
init_lr: Learning rate value from which the warmup will start.
num_warmup_steps: Number of training step on which the warmup will be performed.

Ancestors (in MRO)

tensorflow.python.keras.callbacks.Callback

Methods

on_batch_begin

def on_batch_begin(
    self,
    batch,
    logs=None
)

A backwards compatibility alias for on_train_batch_begin.

View Source

  @doc_controls.for_subclass_implementers

  @generic_utils.default

  def on_batch_begin(self, batch, logs=None):

    """A backwards compatibility alias for `on_train_batch_begin`."""

on_batch_end

def on_batch_end(
    self,
    batch,
    logs=None
)

A backwards compatibility alias for on_train_batch_end.

View Source

  @doc_controls.for_subclass_implementers

  @generic_utils.default

  def on_batch_end(self, batch, logs=None):

    """A backwards compatibility alias for `on_train_batch_end`."""

on_epoch_begin

def on_epoch_begin(
    self,
    epoch,
    logs=None
)

View Source

    def on_epoch_begin(self, epoch, logs=None):

        if not hasattr(self.model.optimizer, 'lr'):

            raise ValueError('Optimizer must have a "lr" attribute.')

        if not hasattr(self.model.optimizer, 'iterations'):

            raise ValueError('Optimizer must have an "iterations" attribute.')

        global_step = K.get_value(self.model.optimizer.iterations)

        target_epochs = [

            e for e in self._epochs if epoch >= e and global_step > self._num_warmup_steps

        ]

        if target_epochs:

            lr = self._epochs_to_lr[max(target_epochs)]

            K.set_value(self.model.optimizer.lr, lr)

on_epoch_end

def on_epoch_end(
    self,
    epoch,
    logs=None
)

Called at the end of an epoch.

Subclasses should override for any actions to run. This function should only be called during TRAIN mode.

Parameters:

Name	Description
epoch	Integer, index of epoch.
logs	Dict, metric results for this training epoch, and for the validation epoch if validation is performed. Validation result keys are prefixed with `val_`. For training epoch, the values of the `Model`'s metrics are returned. Example : `{'loss': 0.2, 'acc': 0.7}`.

View Source

  @doc_controls.for_subclass_implementers

  def on_epoch_end(self, epoch, logs=None):

    """Called at the end of an epoch.

    Subclasses should override for any actions to run. This function should only

    be called during TRAIN mode.

    Arguments:

        epoch: Integer, index of epoch.

        logs: Dict, metric results for this training epoch, and for the

          validation epoch if validation is performed. Validation result keys

          are prefixed with `val_`. For training epoch, the values of the

         `Model`'s metrics are returned. Example : `{'loss': 0.2, 'acc': 0.7}`.

    """

on_predict_batch_begin

def on_predict_batch_begin(
    self,
    batch,
    logs=None
)

Called at the beginning of a batch in predict methods.

Subclasses should override for any actions to run.

Note that if the steps_per_execution argument to compile in tf.keras.Model is set to N, this method will only be called every N batches.

Parameters:

Name	Description
batch	Integer, index of batch within the current epoch.
logs	Dict, contains the return value of `model.predict_step`, it typically returns a dict with a key 'outputs' containing the model's outputs.

View Source

  @doc_controls.for_subclass_implementers

  @generic_utils.default

  def on_predict_batch_begin(self, batch, logs=None):

    """Called at the beginning of a batch in `predict` methods.

    Subclasses should override for any actions to run.

    Note that if the `steps_per_execution` argument to `compile` in

    `tf.keras.Model` is set to `N`, this method will only be called every `N`

    batches.

    Arguments:

        batch: Integer, index of batch within the current epoch.

        logs: Dict, contains the return value of `model.predict_step`,

          it typically returns a dict with a key 'outputs' containing

          the model's outputs.

    """

on_predict_batch_end

def on_predict_batch_end(
    self,
    batch,
    logs=None
)

Called at the end of a batch in predict methods.

Subclasses should override for any actions to run.

Note that if the steps_per_execution argument to compile in tf.keras.Model is set to N, this method will only be called every N batches.

Parameters:

Name	Description
batch	Integer, index of batch within the current epoch.
logs	Dict. Aggregated metric results up until this batch.

View Source

  @doc_controls.for_subclass_implementers

  @generic_utils.default

  def on_predict_batch_end(self, batch, logs=None):

    """Called at the end of a batch in `predict` methods.

    Subclasses should override for any actions to run.

    Note that if the `steps_per_execution` argument to `compile` in

    `tf.keras.Model` is set to `N`, this method will only be called every `N`

    batches.

    Arguments:

        batch: Integer, index of batch within the current epoch.

        logs: Dict. Aggregated metric results up until this batch.

    """

on_predict_begin

def on_predict_begin(
    self,
    logs=None
)

Called at the beginning of prediction.

Subclasses should override for any actions to run.

Parameters:

Name	Description
logs	Dict. Currently no data is passed to this argument for this method but that may change in the future.

View Source

  @doc_controls.for_subclass_implementers

  def on_predict_begin(self, logs=None):

    """Called at the beginning of prediction.

    Subclasses should override for any actions to run.

    Arguments:

        logs: Dict. Currently no data is passed to this argument for this method

          but that may change in the future.

    """

on_predict_end

def on_predict_end(
    self,
    logs=None
)

Called at the end of prediction.

Subclasses should override for any actions to run.

Parameters:

Name	Description
logs	Dict. Currently no data is passed to this argument for this method but that may change in the future.

View Source

  @doc_controls.for_subclass_implementers

  def on_predict_end(self, logs=None):

    """Called at the end of prediction.

    Subclasses should override for any actions to run.

    Arguments:

        logs: Dict. Currently no data is passed to this argument for this method

          but that may change in the future.

    """

on_test_batch_begin

def on_test_batch_begin(
    self,
    batch,
    logs=None
)

Called at the beginning of a batch in evaluate methods.

Also called at the beginning of a validation batch in the fit methods, if validation data is provided.

Subclasses should override for any actions to run.

Note that if the steps_per_execution argument to compile in tf.keras.Model is set to N, this method will only be called every N batches.

Parameters:

Name	Description
batch	Integer, index of batch within the current epoch.
logs	Dict, contains the return value of `model.test_step`. Typically, the values of the `Model`'s metrics are returned. Example: `{'loss': 0.2, 'accuracy': 0.7}`.

View Source

  @doc_controls.for_subclass_implementers

  @generic_utils.default

  def on_test_batch_begin(self, batch, logs=None):

    """Called at the beginning of a batch in `evaluate` methods.

    Also called at the beginning of a validation batch in the `fit`

    methods, if validation data is provided.

    Subclasses should override for any actions to run.

    Note that if the `steps_per_execution` argument to `compile` in

    `tf.keras.Model` is set to `N`, this method will only be called every `N`

    batches.

    Arguments:

        batch: Integer, index of batch within the current epoch.

        logs: Dict, contains the return value of `model.test_step`. Typically,

          the values of the `Model`'s metrics are returned.  Example:

          `{'loss': 0.2, 'accuracy': 0.7}`.

    """

on_test_batch_end

def on_test_batch_end(
    self,
    batch,
    logs=None
)

Called at the end of a batch in evaluate methods.

Also called at the end of a validation batch in the fit methods, if validation data is provided.

Subclasses should override for any actions to run.

Note that if the steps_per_execution argument to compile in tf.keras.Model is set to N, this method will only be called every N batches.

Parameters:

Name	Description
batch	Integer, index of batch within the current epoch.
logs	Dict. Aggregated metric results up until this batch.

View Source

  @doc_controls.for_subclass_implementers

  @generic_utils.default

  def on_test_batch_end(self, batch, logs=None):

    """Called at the end of a batch in `evaluate` methods.

    Also called at the end of a validation batch in the `fit`

    methods, if validation data is provided.

    Subclasses should override for any actions to run.

    Note that if the `steps_per_execution` argument to `compile` in

    `tf.keras.Model` is set to `N`, this method will only be called every `N`

    batches.

    Arguments:

        batch: Integer, index of batch within the current epoch.

        logs: Dict. Aggregated metric results up until this batch.

    """

on_test_begin

def on_test_begin(
    self,
    logs=None
)

Called at the beginning of evaluation or validation.

Subclasses should override for any actions to run.

Parameters:

Name	Description
logs	Dict. Currently no data is passed to this argument for this method but that may change in the future.

View Source

  @doc_controls.for_subclass_implementers

  def on_test_begin(self, logs=None):

    """Called at the beginning of evaluation or validation.

    Subclasses should override for any actions to run.

    Arguments:

        logs: Dict. Currently no data is passed to this argument for this method

          but that may change in the future.

    """

on_test_end

def on_test_end(
    self,
    logs=None
)

Called at the end of evaluation or validation.

Subclasses should override for any actions to run.

Parameters:

Name	Description
logs	Dict. Currently the output of the last call to `on_test_batch_end()` is passed to this argument for this method but that may change in the future.

View Source

  @doc_controls.for_subclass_implementers

  def on_test_end(self, logs=None):

    """Called at the end of evaluation or validation.

    Subclasses should override for any actions to run.

    Arguments:

        logs: Dict. Currently the output of the last call to

          `on_test_batch_end()` is passed to this argument for this method

          but that may change in the future.

    """

on_train_batch_begin

def on_train_batch_begin(
    self,
    batch,
    logs=None
)

View Source

    def on_train_batch_begin(self, batch, logs=None):

        global_step = K.get_value((self.model.optimizer.iterations))

        if global_step <= self._num_warmup_steps and global_step != 0 and self._use_warmup:

            lr = self._init_lr + global_step * self.slope

            K.set_value(self.model.optimizer.lr, lr)

on_train_batch_end

def on_train_batch_end(
    self,
    batch,
    logs=None
)

Called at the end of a training batch in fit methods.

Subclasses should override for any actions to run.

Note that if the steps_per_execution argument to compile in tf.keras.Model is set to N, this method will only be called every N batches.

Parameters:

Name	Description
batch	Integer, index of batch within the current epoch.
logs	Dict. Aggregated metric results up until this batch.

View Source

  @doc_controls.for_subclass_implementers

  @generic_utils.default

  def on_train_batch_end(self, batch, logs=None):

    """Called at the end of a training batch in `fit` methods.

    Subclasses should override for any actions to run.

    Note that if the `steps_per_execution` argument to `compile` in

    `tf.keras.Model` is set to `N`, this method will only be called every `N`

    batches.

    Arguments:

        batch: Integer, index of batch within the current epoch.

        logs: Dict. Aggregated metric results up until this batch.

    """

    # For backwards compatibility.

    self.on_batch_end(batch, logs=logs)

on_train_begin

def on_train_begin(
    self,
    logs=None
)

Called at the beginning of training.

Subclasses should override for any actions to run.

Parameters:

Name	Description
logs	Dict. Currently no data is passed to this argument for this method but that may change in the future.

View Source

  @doc_controls.for_subclass_implementers

  def on_train_begin(self, logs=None):

    """Called at the beginning of training.

    Subclasses should override for any actions to run.

    Arguments:

        logs: Dict. Currently no data is passed to this argument for this method

          but that may change in the future.

    """

on_train_end

def on_train_end(
    self,
    logs=None
)

Called at the end of training.

Subclasses should override for any actions to run.

Parameters:

Name	Description
logs	Dict. Currently the output of the last call to `on_epoch_end()` is passed to this argument for this method but that may change in the future.

View Source

  @doc_controls.for_subclass_implementers

  def on_train_end(self, logs=None):

    """Called at the end of training.

    Subclasses should override for any actions to run.

    Arguments:

        logs: Dict. Currently the output of the last call to `on_epoch_end()`

          is passed to this argument for this method but that may change in

          the future.

    """

set_model

def set_model(
    self,
    model
)

View Source

  def set_model(self, model):

    self.model = model

set_params

def set_params(
    self,
    params
)

View Source

  def set_params(self, params):

    self.params = params

ManualStepping

class ManualStepping(
    boundaries,
    rates,
    warmup=False,
    name=None
)

This function provides fine grained control over learning rates. One must specify a sequence of learning rates as well as a set of integer steps at which the current learning rate must transition to the next. For example, if boundaries = [5, 10] and rates = [.1, .01, .001], then the learning rate returned by this function is .1 for step=0,...,4, .01 for step=5...9, and .001 for step=10 and onward.

You can pass this schedule directly into a tf.keras.optimizers.Optimizer as the learning rate.

lr_schedule = tf.keras.optimizers.schedules.ManualStepping(
    boundaries=[5, 10],
    rates=[.1, .01, .001],
    warmup=True)
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=lr_schedule),
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy'])
model.fit(data, labels, epochs=5)

The learning rate schedule is also serializable and deserializable using tf.keras.optimizers.schedules.serialize and tf.keras.optimizers.schedules.deserialize.

Arguments:

boundaries: A List of scalar int32 or int64 or a Tensor. It is a list of global steps at which to switch learning rates. This list is assumed to consist of increasing positive integers.
rates: a list of (float) learning rates corresponding to intervals between the boundaries. The length of this list must be exactly len(boundaries) + 1.
warmup: Whether to linearly interpolate learning rate for steps in [0, boundaries[0]].
name: String. Optional name of the operation. Defaults to 'ExponentialDecay'.

Return:

A 1-arg callable learning rate schedule that takes the current optimizer step and outputs the decayed learning rate, a scalar Tensor of the same type as rates.

Ancestors (in MRO)

tensorflow.python.keras.optimizer_v2.learning_rate_schedule.LearningRateSchedule

Static methods

from_config

def from_config(
    config
)

Instantiates a LearningRateSchedule from its config.

Parameters:

Name	Description
config	Output of `get_config()`.

Returns:

Type	Description
None	A `LearningRateSchedule` instance.

View Source

  @classmethod

  def from_config(cls, config):

    """Instantiates a `LearningRateSchedule` from its config.

    Args:

        config: Output of `get_config()`.

    Returns:

        A `LearningRateSchedule` instance.

    """

    return cls(**config)

Methods

get_config

def get_config(
    self
)

View Source

    def get_config(self):

        return {

            "boundaries": self.boundaries,

            "rates": self.rates,

            "warmup": self.warmup,

            "name": self.name

        }