Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
Juan Eiros committed May 24, 2018
1 parent 4c2040c commit cc0f6ad
Show file tree
Hide file tree
Showing 10 changed files with 2,024 additions and 995 deletions.
6 changes: 0 additions & 6 deletions .ipynb_checkpoints/Untitled1-checkpoint.ipynb

This file was deleted.

1,022 changes: 1,022 additions & 0 deletions .ipynb_checkpoints/ala-gan_2-checkpoint.ipynb

Large diffs are not rendered by default.

391 changes: 391 additions & 0 deletions .ipynb_checkpoints/dialanine-example-checkpoint.ipynb

Large diffs are not rendered by default.

25 changes: 22 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,31 @@
# MD-GAN
# MDGAN
In this repo you will find some experiments I have done with a GAN to generate three-dimensional conformations
for a given protein.

The GAN is trained using conformations obtained from MD simulations. The Generator aims to get
better at faking conformations that look like the conformations that are seen during the simulations.
The Discriminator tries to discern if a given conformation comes from a simulation (real) or from the
Generator (fake).

## installation

## Installation


```bash
conda install -c omnia msmbuilder mdtraj msmexplorer
pip install tensorflow-gpu
pip install tensorflow-gpu # or tensorflow if no GPU available
pip install keras
```

## Example

```python
from msmbuilder.example_datasets import AlanineDipeptide
from utils import make_trajectory_trainable
from mdgan import MDGAN

trjs = AlanineDipeptide().get().trajectories
data, sc = make_trajectory_trainable(trjs) # sc is the MinMaxScaler we'll need it later
gan = MDGAN(n_atoms=22)
losses = gan.train(data, num_epochs=10) # That's it
```
Binary file added __pycache__/mdgan.cpython-36.pyc
Binary file not shown.
Binary file added __pycache__/utils.cpython-36.pyc
Binary file not shown.
959 changes: 0 additions & 959 deletions ala-gan.ipynb

This file was deleted.

391 changes: 391 additions & 0 deletions dialanine-example.ipynb

Large diffs are not rendered by default.

117 changes: 117 additions & 0 deletions mdgan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
"""
MDGAN generates three-dimensional conformations that resemble the ones provided
as training data (MD simulations).
"""
import numpy as np
from keras.layers import Dense, Reshape, Flatten, Dropout
from keras.layers import BatchNormalization
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import Conv2D
from keras.models import Sequential
from keras.optimizers import Adam
from utils import make_trainable, make_latent_samples, make_labels
from sklearn.model_selection import train_test_split


class MDGAN():
def __init__(self, n_atoms, noise_dim=100, gen_lr=2e-4, disc_lr=1e-3,
gan_lr=2e-4):
self.n_atoms = n_atoms
self.noise_dim = noise_dim
self.generator = self.build_generator(lr=gen_lr)
self.discriminator = self.build_discriminator(lr=disc_lr)
self.gan = self.build_GAN(lr=gan_lr)

def __repr__(self):
self.gan.summary()

def build_generator(self, lr):
g = Sequential([
Dense(2 * 2 * self.noise_dim, input_dim=self.noise_dim),
BatchNormalization(),
LeakyReLU(0.2),
Dense(self.n_atoms * 3, input_dim=self.noise_dim * 2 * 2),
Reshape((self.n_atoms, 3, 1))
], name='generator')
adam = Adam(lr=lr)
g.compile(adam, loss='binary_crossentropy')
return g

def build_discriminator(self, lr):
d = Sequential([
Conv2D(32, 3, padding='same', strides=2, input_shape=(self.n_atoms, 3, 1)),
LeakyReLU(0.2),
Dropout(0.3),

Conv2D(64, 3, padding='same', strides=1),
LeakyReLU(0.2),
Dropout(0.3),


Conv2D(128, 3, padding='same', strides=1),
LeakyReLU(0.2),
Dropout(0.3),

Flatten(),
Dense(1, activation='sigmoid')

], name='discriminator')
adam = Adam(lr=lr)
d.compile(adam, 'binary_crossentropy')
return d

def build_GAN(self, lr):
gan = Sequential([self.generator, self.discriminator])
adam = Adam(lr=lr)
gan.compile(adam, 'binary_crossentropy')
return gan

def train(self, data, batch_size=250, num_epochs=25, eval_size=200):
losses = []
train, test = train_test_split(data)
for epoch in range(num_epochs):
for i in range(len(train) // batch_size):
# ------------------
# Train Disciminator
# ------------------
make_trainable(self.discriminator, True)
# Get some real conformations from the train data
real_confs = train[i * batch_size:(i + 1) * batch_size]
real_confs = real_confs.reshape(-1, self.n_atoms, 3, 1)

# Sample high dimensional noise and generate fake conformations
noise = make_latent_samples(batch_size, self.noise_dim)
fake_confs = self.generator.predict_on_batch(noise)

# Label the conformations accordingly
real_confs_labels, fake_confs_labels = make_labels(batch_size)

self.discriminator.train_on_batch(real_confs, real_confs_labels)
self.discriminator.train_on_batch(fake_confs, fake_confs_labels)

# --------------------------------------------------
# Train Generator via GAN (swith off discriminator)
# --------------------------------------------------
noise = make_latent_samples(batch_size, self.noise_dim)
make_trainable(self.discriminator, False)
g_loss = self.gan.train_on_batch(noise, real_confs_labels)

# Evaluate performance after epoch
conf_eval_real = test[np.random.choice(len(test), eval_size, replace=False)]
conf_eval_real = conf_eval_real.reshape(-1, self.n_atoms, 3, 1)
noise = make_latent_samples(eval_size, self.noise_dim)
conf_eval_fake = self.generator.predict_on_batch(noise)

eval_real_labels, eval_fake_labels = make_labels(eval_size)

d_loss_r = self.discriminator.test_on_batch(conf_eval_real, eval_real_labels)
d_loss_f = self.discriminator.test_on_batch(conf_eval_fake, eval_fake_labels)
d_loss = (d_loss_r + d_loss_f) / 2

# we want the fake to be realistic!
g_loss = self.gan.test_on_batch(noise, eval_real_labels)

print("Epoch: {:>3}/{} Discriminator Loss: {:>6.4f} Generator Loss: {:>6.4f}".format(epoch + 1, num_epochs, d_loss, g_loss))

losses.append((d_loss, g_loss))
return losses
108 changes: 81 additions & 27 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,61 @@
from keras.layers.convolutional import UpSampling2D, Conv2D
from keras.models import Sequential, Model
from keras.optimizers import Adam
from msmbuilder.preprocessing import MinMaxScaler
import mdtraj
from matplotlib import pyplot as plt


def plot_losses(losses):
losses = np.array(losses)
fig, ax = plt.subplots()
plt.plot(losses.T[0], label='Discriminator')
plt.plot(losses.T[1], label='Generator')
plt.title("Training Losses")
plt.legend()
ax.set(ylabel='BCE', xlabel='Epoch')
return fig, ax


def make_trajectory_trainable(traj_list):
"""
Build a train/test splittable array of cartesian coordinates from a list
of mdtraj.Trajectory objects
Parameters
----------
traj_list: list of mdtraj.Trajectory objects
Returns
-------
data: np.array, shape=(frames, n_atoms, 3)
A numpy array of the XYZ coordinates of all the frames in the list of
trajs. Coordinates are squised from -1 to 1.
Use a MinMaxScaler.inverse_transform to map them back to the original
space.
sc: MinMaxScaler, The scaler used to squish the coordinates.
"""
frame00 = traj_list[0][0]
trjs = [t.superpose(frame00) for t in traj_list]
sc = MinMaxScaler(feature_range=(-1, 1))
frames = []
for t in trjs:
for f in t:
frames.append(f.xyz.reshape(frame00.n_atoms, 3))
f_txx_sc = sc.fit_transform(frames)
data = np.dstack(f_txx_sc)
data = data.transpose(2, 0, 1)
return data, sc


def fake_traj_from_samples(samples, top, scaler):
fake_tr = samples[:, :, :, 0]
fake_traj_orig_space = [scaler.inverse_transform(t) for t in fake_tr]
fake_traj = mdtraj.Trajectory(fake_traj_orig_space, topology=top)
fake_traj.center_coordinates()
fake_traj.superpose(fake_traj, 0)
return fake_traj


def scatter(arr, ax=None, scatter_kws=None):
if ax is None:
Expand All @@ -20,79 +73,80 @@ def scatter(arr, ax=None, scatter_kws=None):
return ax


def make_latent_samples(n_samples, sample_size):
return np.random.normal(loc=0, scale=1, size=(n_samples, sample_size))
def make_latent_samples(n_samples, sample_dim):
return np.random.normal(loc=0, scale=1, size=(n_samples, sample_dim))


def make_trainable(model, trainable):
for layer in model.layers:
layer.trainable = trainable



def make_labels(size):
return np.ones([size, 1]), np.zeros([size, 1])



def make_2dtraj_GAN(sample_size,
g_hidden_size,
d_hidden_size,
leaky_alpha,
def make_2dtraj_GAN(sample_size,
g_hidden_size,
d_hidden_size,
leaky_alpha,
g_learning_rate,
d_learning_rate):
K.clear_session()

generator = Sequential([
Dense(g_hidden_size, input_shape=(sample_size,)),
LeakyReLU(alpha=leaky_alpha),
Dense(2),
Dense(2),
Activation('tanh')
], name='generator')
], name='generator')

discriminator = Sequential([
Dense(d_hidden_size, input_shape=(2,)),
LeakyReLU(alpha=leaky_alpha),
Dense(1),
Activation('sigmoid')
], name='discriminator')
], name='discriminator')

gan = Sequential([
generator,
discriminator
])

discriminator.compile(optimizer=Adam(lr=d_learning_rate), loss='binary_crossentropy')
gan.compile(optimizer=Adam(lr=g_learning_rate), loss='binary_crossentropy')

return gan, generator, discriminator


def make_3dtraj_GAN(sample_size,
g_hidden_size,
d_hidden_size,
leaky_alpha,
def make_3dtraj_GAN(sample_size,
g_hidden_size,
d_hidden_size,
leaky_alpha,
g_learning_rate,
d_learning_rate):
K.clear_session()

generator = Sequential([
Dense(g_hidden_size, input_shape=(sample_size,)),
LeakyReLU(alpha=leaky_alpha),
Dense(2),
Dense(2),
Activation('tanh')
], name='generator')
], name='generator')

discriminator = Sequential([
Dense(d_hidden_size, input_shape=(2,)),
LeakyReLU(alpha=leaky_alpha),
Dense(1),
Activation('sigmoid')
], name='discriminator')
], name='discriminator')

gan = Sequential([
generator,
discriminator
])

discriminator.compile(optimizer=Adam(lr=d_learning_rate), loss='binary_crossentropy')
gan.compile(optimizer=Adam(lr=g_learning_rate), loss='binary_crossentropy')
return gan, generator, discriminator

return gan, generator, discriminator

0 comments on commit cc0f6ad

Please sign in to comment.