Skip to content

Commit

Permalink
Release 0.0.6.6
Browse files Browse the repository at this point in the history
Release 0.0.6.6
  • Loading branch information
seliverstov authored Aug 11, 2018
2 parents c8a49fb + 0d0cb8d commit c7896c6
Show file tree
Hide file tree
Showing 20 changed files with 85 additions and 116 deletions.
8 changes: 4 additions & 4 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ node('gpu') {
stage('Setup') {
env.CUDA_VISIBLE_DEVICES=0
sh """
virtualenv --python=python3 ".venv-$BUILD_NUMBER"
. .venv-$BUILD_NUMBER/bin/activate
sed -i "s/stream=True/stream=False/g" deeppavlov/core/data/utils.py
pip install -e .[tests]
virtualenv --python=python3 '.venv-$BUILD_NUMBER'
. '.venv-$BUILD_NUMBER/bin/activate'
pip install .[tests]
pip install -r dp_requirements/tf-gpu.txt
rm -rf `find . -mindepth 1 -maxdepth 1 ! -name tests ! -name Jenkinsfile ! -name '.venv-$BUILD_NUMBER'`
"""
}
stage('Tests') {
Expand Down
2 changes: 1 addition & 1 deletion deeppavlov/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

__version__ = '0.0.6.5'
__version__ = '0.0.6.6'
__author__ = 'Neural Networks and Deep Learning lab, MIPT'
__description__ = 'An open source library for building end-to-end dialog systems and training chatbots.'
__keywords__ = ['NLP', 'NER', 'SQUAD', 'Intents', 'Chatbot']
Expand Down
3 changes: 1 addition & 2 deletions deeppavlov/configs/odqa/ru_odqa_infer_wiki.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,7 @@
},
"download": [
"http://files.deeppavlov.ai/datasets/wikipedia/ruwiki.tar.gz",
"http://files.deeppavlov.ai/deeppavlov_data/ru_odqa.tar.gz",
"http://files.deeppavlov.ai/deeppavlov_data/squad_model_ru.tar.gz"
"http://files.deeppavlov.ai/deeppavlov_data/ru_odqa.tar.gz"
]
}
}
7 changes: 6 additions & 1 deletion deeppavlov/core/commands/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
See the License for the specific language governing permissions and
limitations under the License.
"""

from pathlib import Path

from deeppavlov.core.commands.utils import set_deeppavlov_root, import_packages
from deeppavlov.core.common.chainer import Chainer
from deeppavlov.core.common.file import read_json
Expand All @@ -25,7 +28,9 @@
log = get_logger(__name__)


def build_model_from_config(config, mode='infer', load_trained=False, as_component=False):
def build_model_from_config(config: [str, Path, dict], mode='infer', load_trained=False, as_component=False):
if isinstance(config, (str, Path)):
config = read_json(config)
set_deeppavlov_root(config)

import_packages(config.get('metadata', {}).get('imports', []))
Expand Down
3 changes: 1 addition & 2 deletions deeppavlov/core/models/keras_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,7 @@ def init_model_from_scratch(self, model_name: str, optimizer_name: str,
Returns:
compiled model with given network and learning parameters
"""
log.info("[initializing `{}` from scratch]".format(self.__class__.__name__))
print(model_name)
log.info(f'[initializing `{self.__class__.__name__}` from scratch as {model_name}]')
model_func = getattr(self, model_name, None)
if callable(model_func):
model = model_func(**self.opt)
Expand Down
92 changes: 31 additions & 61 deletions deeppavlov/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
"""

import argparse
from collections import defaultdict
from pathlib import Path
import sys

root_path = (Path(__file__) / ".." / "..").resolve()
sys.path.append(str(root_path))

import deeppavlov
from deeppavlov.core.commands.utils import get_deeppavlov_root, set_deeppavlov_root, expand_path
from deeppavlov.core.common.file import read_json
from deeppavlov.core.data.utils import download, download_decompress, get_all_elems_from_json
from deeppavlov.core.common.log import get_logger
Expand All @@ -35,73 +35,52 @@
parser.add_argument('-all', action='store_true',
help="Download everything. Warning! There should be at least 10 GB space"
" available on disk.")
parser.add_argument('-test', action='store_true',
help="Turn test mode")


def get_config_downloads(config_path, config_downloads=None):
def get_config_downloads(config_path):
dp_root_back = get_deeppavlov_root()
config = read_json(config_path)
set_deeppavlov_root(config)

if config_downloads is None:
config_downloads = {}

downloads = set()
if 'metadata' in config and 'download' in config['metadata']:
for resource in config['metadata']['download']:
if isinstance(resource, str):
url = resource
sub_dir = ''
elif isinstance(resource, dict):
url = resource['url']
sub_dir = resource['subdir'] if 'subdir' in resource else ''
resource = {
'url': resource
}

if url in config_downloads:
config_downloads[url]['subdir'] = list(set(config_downloads[url]['subdir'] +
[sub_dir]))
else:
config_downloads[url] = {'url': url, 'subdir': [sub_dir]}
url = resource['url']
dest = expand_path(resource.get('subdir', ''))

config_references = get_all_elems_from_json(config, 'config_path')
config_references = [root_path.joinpath(config_ref.split('../', 1)[1]) for config_ref in config_references]
downloads.add((url, dest))

for config_ref in config_references:
config_downloads = get_config_downloads(config_ref, config_downloads)
config_references = [expand_path(config_ref) for config_ref in get_all_elems_from_json(config, 'config_path')]

return config_downloads
downloads |= {(url, dest) for config in config_references for url, dest in get_config_downloads(config)}

set_deeppavlov_root({'deeppavlov_root': dp_root_back})

def get_configs_downloads(config_path=None, test=None):
all_downloads = {}
return downloads

if test:
configs_path = root_path / 'tests' / 'deeppavlov' / 'configs'
else:
configs_path = root_path / 'deeppavlov' / 'configs'

def get_configs_downloads(config_path=None):
all_downloads = defaultdict(set)

if config_path:
configs = [config_path]
else:
configs = list(configs_path.glob('**/*.json'))
configs = list(Path(deeppavlov.__path__[0], 'configs').glob('**/*.json'))

for config_path in configs:
config_downloads = get_config_downloads(config_path)
for url in config_downloads:
if url in all_downloads:
all_downloads[url]['subdir'] = list(set(all_downloads[url]['subdir'] +
config_downloads[url]['subdir']))
else:
all_downloads[url] = config_downloads[url]
for url, dest in get_config_downloads(config_path):
all_downloads[url].add(dest)

return all_downloads


def download_resource(resource, download_path):
url = resource['url']
sub_dirs = resource['subdir']
dest_paths = []

for sub_dir in sub_dirs:
dest_path = download_path.joinpath(sub_dir)
dest_paths.append(dest_path)
def download_resource(url, dest_paths):
dest_paths = list(dest_paths)

if url.endswith(('.tar.gz', '.gz', '.zip')):
download_path = dest_paths[0].parent
Expand All @@ -113,31 +92,22 @@ def download_resource(resource, download_path):


def download_resources(args):
download_path = root_path / 'download'

if args.test:
download_path = root_path / 'tests' / 'download'
test = True
else:
test = False

if not args.all and not args.config:
log.error('You should provide either skill config path or -all flag')
sys.exit(1)
elif args.all:
downloads = get_configs_downloads(test=test)
downloads = get_configs_downloads()
else:
config_path = Path(args.config).resolve()
downloads = get_configs_downloads(config_path=config_path)

download_path.mkdir(exist_ok=True)

for url in downloads:
resource = downloads[url]
download_resource(resource, download_path)
for url, dest_paths in downloads.items():
download_resource(url, dest_paths)


def deep_download(args=None):
def deep_download(args: [str, Path, list]=None):
if isinstance(args, (str, Path)):
args = ['-c', str(args)] # if args is a path to config
args = parser.parse_args(args)
log.info("Downloading...")
download_resources(args)
Expand Down
7 changes: 5 additions & 2 deletions deeppavlov/models/classifiers/keras_classification_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def __init__(self, text_size: int,
"lear_rate": self.opt.get('lear_rate'),
"lear_rate_decay": self.opt.get('lear_rate_decay')}

self.model = self.load(**params)
self.model: Model = self.load(**params)
self._change_not_fixed_params(text_size=text_size, model_name=model_name,
optimizer=optimizer, loss=loss,
lear_rate=lear_rate, lear_rate_decay=lear_rate_decay,
Expand All @@ -133,7 +133,10 @@ def __init__(self, text_size: int,
if self.opt['fasttext_md5'] != current_fasttext_md5:
raise ConfigError(
"Given fasttext model does NOT match fasttext model used previously to train loaded model")
print("Model was successfully initialized!\nModel summary:\n{}".format(self.model.summary()))

summary = ['Model was successfully initialized!', 'Model summary:']
self.model.summary(print_fn=summary.append)
log.info('\n'.join(summary))

def _change_not_fixed_params(self, **kwargs) -> None:
"""
Expand Down
3 changes: 1 addition & 2 deletions deeppavlov/models/classifiers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,7 @@ def log_metrics(names: [list, np.ndarray], values: [list, np.ndarray],
Returns:
None
"""
sys.stdout.write("\r") # back to previous line
log.info("{} -->\t".format(mode))
log.info("\r{} -->\t".format(mode))
if updates is not None:
log.info("updates: {}\t".format(updates))

Expand Down
Empty file.
2 changes: 1 addition & 1 deletion deeppavlov/models/morpho_tagger/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ class TagOutputPrettifier(Component):
"""

def __init__(self, return_string: bool=True, begin: str="",
end: str ="", sep: str ="\n"):
end: str ="", sep: str ="\n", **kwargs):

self.return_string = return_string
self.begin = begin
Expand Down
19 changes: 5 additions & 14 deletions deeppavlov/models/morpho_tagger/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import inspect
import json
from typing import List

import keras.layers as kl
Expand Down Expand Up @@ -110,7 +108,7 @@ def build(self):
self.model_ = Model(inputs, outputs)
self.model_.compile(**compile_args)
if self.verbose > 0:
log.info(str(self.model_.summary()))
self.model_.summary(print_fn=log.info)
return self

def build_word_cnn(self, inputs):
Expand Down Expand Up @@ -172,12 +170,10 @@ def build_basic_network(self, word_outputs):
return pre_outputs, lstm_outputs

def _transform_batch(self, data, labels=None, transform_to_one_hot=True):
if len(self.word_vectorizers) > 0:
data, additional_data = data[0], data[1:]
data, additional_data = data[0], data[1:]
L = max(len(x) for x in data)
X = np.array([self._make_sent_vector(x, L) for x in data])
if len(self.word_vectorizers) > 0:
X = [X] + [np.array(x) for x in additional_data]
X = [X] + [np.array(x) for x in additional_data]
if labels is not None:
Y = np.array([self._make_tags_vector(y, L) for y in labels])
if transform_to_one_hot:
Expand All @@ -197,7 +193,7 @@ def train_on_batch(self, data, labels):
# TO_DO: add weights to deal with padded instances
return self.model_.train_on_batch(X, Y)

def predict_on_batch(self, data: List, return_indexes=False):
def predict_on_batch(self, data: [list, tuple], return_indexes=False):
"""
Makes predictions on a single batch
Expand All @@ -206,10 +202,7 @@ def predict_on_batch(self, data: List, return_indexes=False):
answer: a batch of label sequences
"""
X = self._transform_batch(data)
if len(self.word_vectorizers) > 0:
objects_number, lengths = len(X[0]), [len(elem) for elem in data[0]]
else:
objects_number, lengths = len(X), [len(elem) for elem in data]
objects_number, lengths = len(X[0]), [len(elem) for elem in data[0]]
Y = self.model_.predict_on_batch(X)
labels = np.argmax(Y, axis=-1)
answer: List[List[str]] = [None] * objects_number
Expand Down Expand Up @@ -245,5 +238,3 @@ def save(self, outfile):

def load(self, infile):
self.model_.load_weights(infile)


8 changes: 2 additions & 6 deletions deeppavlov/models/morpho_tagger/tagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
import copy
import inspect

Expand Down Expand Up @@ -104,11 +103,8 @@ def train_on_batch(self, *args):
Returns:
"""
if len(args) > 2:
data, labels = [list(x) for x in args[:-1]], list(args[-1])
else:
data, labels = args
self._net.train_on_batch(data, labels, **self.train_parameters)
*data, labels = args
self._net.train_on_batch(data, labels)

def __call__(self, *x_batch, **kwargs):
"""
Expand Down
2 changes: 1 addition & 1 deletion deeppavlov/models/preprocessors/squad_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def convert_idx(text: str, tokens: List[str]) -> List[Tuple[int, int]]:
for token in tokens:
current = text.find(token, current)
if current < 0:
print("Token {} cannot be found".format(token))
logger.error("Token {} cannot be found".format(token))
raise Exception()
spans.append((current, current + len(token)))
current += len(token)
Expand Down
5 changes: 3 additions & 2 deletions deeppavlov/models/ranking/ranking_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,8 @@ def make_hard_triplets(self, x, y, net):
if not no_samples:
break
if no_samples:
print("There is no negative examples with distances greater than positive examples distances.")
log.error("There are no negative examples with distances"
" greater than positive examples distances.")
exit(0)
else:
if self.num_hardest_negatives is not None:
Expand Down Expand Up @@ -285,7 +286,7 @@ def make_hard_triplets(self, x, y, net):
rp = [el[1] for el in triplets]
rn = [el[2] for el in triplets]
ratio = sum(hrds) / len(hrds)
print("Ratio of semi-hard negative samples is %f" % ratio)
log.info("Ratio of semi-hard negative samples is %f" % ratio)
return [(c, rp), (c, rn)]

def get_semi_hard_negative_ind(self, i, j, k, distances, anchor_negative_dist, batch_size, num_samples):
Expand Down
2 changes: 1 addition & 1 deletion deeppavlov/models/seq2seq_go_bot/bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def _filter(tokens):
preds = [list(_filter(self.tgt_vocab(utter_idxs)))
for utter_idxs in pred_idxs]
if self.debug:
print("Dialog prediction = \"{}\"".format(preds[-1]))
log.debug("Dialog prediction = \"{}\"".format(preds[-1]))
return preds

def save(self):
Expand Down
1 change: 0 additions & 1 deletion deeppavlov/models/squad/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ def __call__(self, inputs, seq_len, keep_prob=1.0, is_train=None, concat_layers=
gru_fw, gru_bw = self.grus[layer]
init_fw, init_bw = self.inits[layer]
mask_fw, mask_bw = self.dropout_mask[layer]
print(outputs)
with tf.variable_scope('fw_{}'.format(layer), reuse=tf.AUTO_REUSE):
with tf.variable_scope('cudnn_gru', reuse=tf.AUTO_REUSE):
out_fw, _ = tf.nn.dynamic_rnn(cell=gru_fw, inputs=outputs[-1] * mask_fw, time_major=True,
Expand Down
Loading

0 comments on commit c7896c6

Please sign in to comment.