Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
eliorc committed Oct 6, 2020
2 parents c2db3a3 + 7d94742 commit d7c3950
Show file tree
Hide file tree
Showing 14 changed files with 445 additions and 408 deletions.
27 changes: 3 additions & 24 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,19 @@ workflows:
version: 2
test:
jobs:
- test-3.7:
filters:
branches:
only:
- master
- dev
- test-3.6:
filters:
branches:
only:
- master
- dev
- test-3.5:
filters:
branches:
only:
- master
- dev
jobs:
test-3.7: &test-template
test-3.5:
docker:
- image: circleci/python:3.7
- image: circleci/python:3.5
steps:
# Checkout repository
- checkout
# Install dependencies, uses cache if not updat
# Install dependencies, uses cache if not update
- restore_cache:
key: test-deps-{{ .Branch }}-{{ checksum "requirements/test.txt" }}
- run:
Expand Down Expand Up @@ -56,11 +43,3 @@ jobs:
source venv/bin/activate
pytest -s --cov=tavolo tests/
codecov
test-3.6:
<<: *test-template
docker:
- image: circleci/python:3.6
test-3.5:
<<: *test-template
docker:
- image: circleci/python:3.5
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@

------------

.. image:: https://img.shields.io/badge/python-3.5%20%7C%203.6%20%7C%203.7-blue.svg
.. image:: https://img.shields.io/badge/python-3.5+-blue.svg
:alt: Supported Python versions

.. image:: https://img.shields.io/badge/tensorflow-2.0%20%7C%202.1-orange.svg
.. image:: https://img.shields.io/badge/tensorflow-2.2+-orange.svg
:alt: Supported TensorFlow versions

.. image:: https://codecov.io/gh/eliorc/tavolo/branch/master/graph/badge.svg
Expand Down
5 changes: 2 additions & 3 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ Showcase
--------

| tavolo's API is straightforward and adopting its modules is as easy as it gets.
| In tavolo, you'll find implementations for basic layers like :ref:`positional_encoding` to complex modules like the Transformer's
:ref:`multi_headed_attention`. You'll also find non-layer implementations that can ease development, like the :ref:`learning_rate_finder`.
| In tavolo, you'll find implementations for layers like :ref:`positional_encoding` and non-layer implementations
that can ease development, like the :ref:`learning_rate_finder`.
| For example, if we wanted to add head a Yang-style attention mechanism into our model and look for the optimal learning rate, it would look something like:
.. code-block:: python3
Expand Down Expand Up @@ -55,7 +55,6 @@ Showcase

embeddings
learning
seq2seq
seq2vec


Expand Down
9 changes: 9 additions & 0 deletions docs/source/learning.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,15 @@ Modules for altering the learning process

-------

.. _`datamap`:

``DataMapCallback``
++++++++++++++++++++++++++++++

.. automodule:: learning.DataMapCallback

-------

.. _`learning_rate_finder`:

``LearningRateFinder``
Expand Down
18 changes: 0 additions & 18 deletions docs/source/seq2seq.rst

This file was deleted.

2 changes: 1 addition & 1 deletion requirements/docs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ sphinxcontrib-httpdomain
sphinx_rtd_theme
pygments
pygments-style-github
tensorflow==2.1.0
tensorflow==2.2.0
tavolo
2 changes: 1 addition & 1 deletion requirements/test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ pytest
pytest-cov
twine
codecov
tensorflow==2.1.0
tensorflow==2.2.0
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from setuptools import setup

VERSION = '0.6.1'
VERSION = '0.7.0'

setup(name='tavolo',
version=VERSION,
Expand Down
3 changes: 1 addition & 2 deletions tavolo/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
__name__ = 'tavolo'
__version__ = '0.6.1'
__version__ = '0.7.0'

from . import embeddings
from . import seq2vec
from . import seq2seq
from . import learning
198 changes: 195 additions & 3 deletions tavolo/learning.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import tempfile
import math
from typing import Optional, Callable, Tuple, List
import tempfile
from contextlib import suppress
from typing import Optional, Callable, Tuple, List, Any

import tensorflow as tf
import numpy as np
import tensorflow as tf


class CyclicLearningRateCallback(tf.keras.callbacks.Callback):
Expand Down Expand Up @@ -177,6 +177,198 @@ def _set_scale_scheme(self):
self.scale_mode = 'iterations'


class DataMapCallback(tf.keras.callbacks.Callback):
"""
Gather training dynamics for data map generation. Assumes a binary or multi-class model, no support for multi label.
Arguments
---------
- `dataset` (``tf.data.: Dataset``): Usually, as the paper suggests, this is the training dataset. It should be:
- Non-shuffled, so each iteration over the dataset should yield samples in the same order
- Already batched, the ``.batch(n)`` method should already be applied on this dataset
- Should yield batches of ``(features, labels)``, sample weights are not supported
- `outputs_to_probabilities` (``Optional[Callable[[Any], tf.Tensor]]``): Callable to convert model's output to
probabilities. Use this if the model outputs logits, dictionary or any other form which is not a tensor
of probabilities. Defaults to ``None``.
- `sparse_labels` (``bool``): Set to ``True`` if the labels are given as integers (not one hot encoded). Defaults
to ``False``.
Attributes
----------
- `gold_labels_probabilities` (``np.ndarray``): Gold label predicted probabilities. With the shape of
``(n_samples, n_epochs)`` and ``(i, j)`` is the probability of the gold label for sample ``i`` at epoch ``j``.
- `confidence` (``np.ndarray``): Mean of true label probability across epochs.
- `variability` (``np.ndarray``): Standard deviation of true label probability across epochs.
- `correctness` (``np.ndarray``): Fraction of times correctly predicted across epochs
Examples
--------
Calculate training dynamics during training
.. code-block:: python3
import tensorflow as tf
import tavolo as tvl
# Load dataset
train = ... # Instance of dataset
train_unshuffled = ... # Instance of dataset, unshuffled so that each iteration over the dataset would yield
# samples in the same order
# Prepare
train = train.shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
train = train_unshuffled.batch(BATCH_SIZE * 10) # No gradient updates in data map, can use bigger batches
# Create the datamap callback
datamap = tvl.learning.DatMaCallback(dataset=train_unshuffled)
# Train
model.fit(train, epochs=N_EPOCHS, callbacks=[datamap])
# Get training dynamics
confidence, variability, correctness = datamap.confidence, datamap.variability, datamap.correctness
Calculate training dynamics from a model that outputs logits (and NOT probabilities)
.. code-block:: python3
import tensorflow as tf
import tavolo as tvl
# Create the datamap callback - using the outputs_to_predictions option
datamap = tvl.learning.DatMaCallback(dataset=train_unshuffled, outputs_to_probabilities=tf.nn.softmax)
# Train
model.fit(train, epochs=N_EPOCHS, callbacks=[datamap])
References
----------
- `Dataset Cartography: Mapping and Diagnosing Datasets with Training Dynamics`_
.. _`Dataset Cartography: Mapping and Diagnosing Datasets with Training Dynamics`: https://arxiv.org/pdf/2009.10795
"""

# TODO - The implementation saves all the gold label probabilities across epochs for the training dynamics
# computations. This can be optimized by calculating a running version of each training dynamic.
# Once tfp.stats releases RunningVariance and RunningMean to the stable tfp versions - training dynamics
# calculations should be reimplemented doing this, thus avoiding (n_epochs - 1) * n_samples floating points
# memory usage.

def __init__(self, dataset: tf.data.Dataset,
outputs_to_probabilities: Optional[Callable[[Any], tf.Tensor]] = None,
sparse_labels: bool = False):
"""
:param dataset: Dataset. Usually, as the paper suggests, this is the training dataset. It should be:
- Non-shuffled, so each iteration over the dataset should yield samples in the same order
- Already batched, the ``.batch(n)`` method should already be applied on this dataset
- Should yield batches of ``(features, labels)``, sample weights are not supported
:param outputs_to_probabilities: Callable to convert model's output to probabilities. Use this if the model
outputs logits, dictionary or any other form which is not a vector of probabilities.
:param sparse_labels: Set to ``True`` if the labels are given as integers (not one hot encoded)
"""

self._dataset = dataset
self._outputs2probabilities = outputs_to_probabilities
self._sparse_labels = sparse_labels

# Stores the probabilities for the gold labels after each epoch,
# e.g. self._gold_labels_probabilities[i] == gold label probabilities at the end of epoch i
self._gold_labels_probabilities = None

def on_epoch_end(self, epoch, logs=None):

# Gather gold label probabilities over the dataset
gold_label_probabilities = list()
for x, y in self._dataset:
probabilities = self.model.predict(x)

# Convert outputs to probabilities if necessary
if self._outputs2probabilities is not None:
probabilities = self._outputs2probabilities(probabilities)

# Convert to probabilities if labels are sparse, e.g. [1, 2] -> [[0, 1, 0], [0, 0, 1]]
if self._sparse_labels:
y = tf.one_hot(y, depth=probabilities.shape[-1])

# Extract the gold label probabilities from the predictions
if tf.rank(tf.squeeze(y)) == 1: # Labels binary e.g. y = [1, 0, 0, 1...], len(y) == batch_size
# Squeeze to remove redundant dimensions
probabilities, y = tf.squeeze(probabilities), tf.squeeze(y)

batch_gold_label_probabilities = tf.where(y == 0, 1 - probabilities, probabilities)
elif tf.rank(tf.squeeze(y)) == 2: # One hot labels e.g. y = [[1,0,0], [0,1,0] ...], len(y == batch_size)
# Verify labels are NOT multi-labels (e.g. [[1,0,1], [1,1,0], ...]
if not tf.reduce_all(tf.reduce_sum(tf.cast(y == 1, tf.int32), axis=-1) == 1):
raise ValueError('DataMapCallback does not support multi-label classification')

batch_gold_label_probabilities = tf.boolean_mask(probabilities, tf.cast(y, tf.bool)).numpy()
else:
raise ValueError(
'tf.squeeze(y) (y == labels from the dataset) must be of rank 1 for binary classification or '
'2 for multi class. Instead got ({})'.format(tf.rank(tf.squeeze(y))))

# Gather gold label probabilities
gold_label_probabilities = np.append(gold_label_probabilities, [batch_gold_label_probabilities])

# Gather probabilities
if self._gold_labels_probabilities is None: # Happens only on first iteration
self._gold_labels_probabilities = np.expand_dims(gold_label_probabilities, axis=-1)
else:
stack = [self._gold_labels_probabilities, np.expand_dims(gold_label_probabilities, axis=-1)]
self._gold_labels_probabilities = np.hstack(stack)

@property
def gold_labels_probabilities(self) -> np.ndarray:
"""
Gold label predicted probabilities. With the shape of ``(n_samples, n_epochs)`` and ``(i, j)`` is the
probability of the gold label for sample ``i`` at epoch ``j``
:return: Gold label probabilities
"""

return self._gold_labels_probabilities

@property
def confidence(self) -> np.ndarray:
"""
Mean of true label probability across epochs
:return: Confidence
"""
return np.mean(self._gold_labels_probabilities, axis=-1)

@property
def variability(self) -> np.ndarray:
"""
Standard deviation of true label probability across epochs
:return: Variability
"""

return np.std(self._gold_labels_probabilities, axis=-1)

@property
def correctness(self) -> np.ndarray:
"""
Fraction of times correctly predicted across epochs
:return: Correctness
"""
return np.mean(self._gold_labels_probabilities > 0.5, axis=-1)


class LearningRateFinder:
"""
Learning rate finding utility for conducting the "LR range test", see article reference for more information
Expand Down
Loading

0 comments on commit d7c3950

Please sign in to comment.