Skip to content

Commit

Permalink
Merge pull request #228 from floydhub/fix-internal
Browse files Browse the repository at this point in the history
Enforce users to specify mount_dir
  • Loading branch information
ReDeiPirati authored Dec 14, 2018
2 parents 10c0b35 + 17aa8f0 commit 78ac3c5
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 52 deletions.
35 changes: 11 additions & 24 deletions floyd/cli/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
)
from floyd.client.project import ProjectClient
from floyd.cli.utils import (
get_data_name, normalize_data_name, normalize_job_name
normalize_data_name, normalize_job_name
)
from floyd.client.experiment import ExperimentClient
from floyd.client.module import ModuleClient
Expand All @@ -42,20 +42,13 @@
# This is the same as r'[\w\-\.]'
ALLOWED_CHARSET = r'[a-zA-Z0-9\-_\.]'

# DEPRECATED pattern, but still available: <data_id>, <data_id>:<mount_dir>
# DATAID_PATTERN = '%s+(\:\/?%s+\/?)?' % (ALLOWED_CHARSET, ALLOWED_CHARSET)
DATAID_PATTERN = '%s+(:/?%s+/?)?' % (ALLOWED_CHARSET, ALLOWED_CHARSET)

# All the possible patterns:
# <dataset_name>
# or <dataset_name>:<mount_dir>
# or <namespace>/[projects|datasets]/<dataset_or_project_name>
# <dataset_or_project_name>:<mount_dir>
# or <namespace>/[projects|datasets]/<dataset_or_project_name>:<mount_dir>
# or <namespace>/[projects|datasets]/<dataset_or_project_name>/<version>
# or <namespace>/[projects|datasets]/<dataset_or_project_name>/<version>:<mount_dir>
DATANAME_PATTERN = '(%s+/datasets/)?%s+(/[0-9]+)?(:/?%s+/?)?' % (ALLOWED_CHARSET, ALLOWED_CHARSET, ALLOWED_CHARSET)
PRJFILE_PATTERN = '(%s+/projects/)?%s+(/[0-9]+(/output)?)?(:/?%s+/?)?' % (ALLOWED_CHARSET, ALLOWED_CHARSET, ALLOWED_CHARSET)
DATAMOUNT_PATTERN = '^(%s|%s|%s)$' % (DATAID_PATTERN, DATANAME_PATTERN, PRJFILE_PATTERN)
DATANAME_PATTERN = '(%s+/datasets/)?%s+(/?|/[0-9]+/?):/?%s+/?' % (ALLOWED_CHARSET, ALLOWED_CHARSET, ALLOWED_CHARSET)
PROJECT_OUTPUT_PATTERN = '(%s+/projects/)?%s+(/?|/[0-9]+(/?|/output/?)):/?%s+/?' % (ALLOWED_CHARSET, ALLOWED_CHARSET, ALLOWED_CHARSET)
DATAMOUNT_PATTERN = '^(%s|%s)$' % (DATANAME_PATTERN, PROJECT_OUTPUT_PATTERN)


def process_data_ids(data_ids):
Expand All @@ -77,13 +70,11 @@ def process_data_ids(data_ids):
"\tfloyd run --data <namespace>/datasets/<dataset_name>:<mount_dir>\n"
"\tfloyd run --data <namespace>/datasets/<dataset_name>/<version>:<mount_dir>\n"
"\tfloyd run --data <namespace>/projects/<project_name>/<version>:<mount_dir>\n"
"\tfloyd run --data <data_id>:<mounting_point> (DEPRECATED)\n"
"\tfloyd run --data <namespace>/projects/<project_name>/<version>/output:<mount_dir>\n"
"\n Note: Argument can only contain alphanumeric, hyphen-minus '-' , underscore '_' and dot '.' characters."
) % data_name_or_id)
path = None
if ':' in data_name_or_id:
data_name_or_id, path = data_name_or_id.split(':')

data_name_or_id, path = data_name_or_id.split(':')
data_obj = get_data_object(data_id=data_name_or_id, use_data_config=False)

if not data_obj:
Expand All @@ -103,12 +94,9 @@ def process_data_ids(data_ids):
)
return False, None

if path:
processed_data_ids.append("%s:%s" % (data_obj.id, path))
show_data_info.append([data_name_or_id, path if path.startswith('/') else '/' + path])
else:
processed_data_ids.append(data_obj.id)
show_data_info.append([data_name_or_id, '/' + data_obj.id])
processed_data_ids.append("%s:%s" % (data_obj.id, path))
show_data_info.append([data_name_or_id, path if path.startswith('/') else '/' + path])

return True, processed_data_ids, show_data_info


Expand Down Expand Up @@ -279,8 +267,7 @@ def run(ctx, cpu, gpu, env, message, data, mode, open_notebook, follow, tensorbo
sys.exit(2)

# Create module
default_name = 'input' if len(data_ids) <= 1 else None
module_inputs = [{'name': get_data_name(data_str, default_name),
module_inputs = [{'name': data_str.split(':')[1],
'type': 'dir'} for data_str in data_ids]

instance_type = None
Expand Down
13 changes: 0 additions & 13 deletions floyd/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,6 @@ def get_module_task_instance_id(task_instances):
return None


def get_data_name(data_str, default=None):
"""
If data_str is of the format <ID>:<NAME>, return <NAME>
Else return default if default is present
Otherwise return ID itself
"""
if ':' in data_str:
_, name = data_str.split(':')
else:
name = default if default else data_str
return name


def get_data_id(data_str):
"""
If data_str is of the format <ID>:<NAME>, or <URI>/<PATH>:<NAME>
Expand Down
18 changes: 3 additions & 15 deletions tests/cli/run/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def test_with_multiple_data_ids(self,
Experiment with multiple data ids
"""
data_get.return_value.id = 'data_id'
result = self.runner.invoke(run, ['command', '--data', 'data-id1', '--data', 'data-id2'], catch_exceptions=False)
result = self.runner.invoke(run, ['command', '--data', 'data-id1:bar', '--data', 'data-id2:foo'], catch_exceptions=False)
assert_exit_code(result, 0)

@patch('floyd.manager.data_config.DataConfigManager.get_config', side_effect=mock_data_config)
Expand All @@ -93,30 +93,18 @@ def test_regex_for_data_arg(self,
"""
Test regex patterns for data arg
"""
# PATTERN: <dataset_name>
result = self.runner.invoke(run, ['command', '--data', 'data-id1'], catch_exceptions=False)
assert_exit_code(result, 0)

# PATTERN: <dataset_name>:<mounting_point>
result = self.runner.invoke(run, ['command', '--data', 'data-id1:/bar'], catch_exceptions=False)
assert_exit_code(result, 0)

# PATTERN: <namespace>/[projects|datasets]/<dataset_or_project_name>
result = self.runner.invoke(run, ['command', '--data', 'mckay/datasets/data-id1'], catch_exceptions=False)
assert_exit_code(result, 0)

# PATTERN: <namespace>/[projects|datasets]/<dataset_or_project_name>:<mounting_point>
result = self.runner.invoke(run, ['command', '--data', 'mckay/datasets/data-id1:/bar'], catch_exceptions=False)
result = self.runner.invoke(run, ['command', '--data', 'mckay/datasets/data-id1/:bar'], catch_exceptions=False)
assert_exit_code(result, 0)

# PATTERN: <namespace>/projects/<project_name>/output:<mounting_point>
result = self.runner.invoke(run, ['command', '--data', 'mckay/projects/test/1/output:/bar'], catch_exceptions=False)
assert_exit_code(result, 0)

# PATTERN: <namespace>/[projects|datasets]/<dataset_or_project_name>/<version>
result = self.runner.invoke(run, ['command', '--data', 'mckay/datasets/data_id1/1'], catch_exceptions=False)
assert_exit_code(result, 0)

# PATTERN: <namespace>/[projects|datasets]/<dataset_or_project_name>/<version>:<mounting_point>
result = self.runner.invoke(run, ['command', '--data', 'mckay/datasets/data-id1/1:bar_2_foo'], catch_exceptions=False)
assert_exit_code(result, 0)
Expand Down Expand Up @@ -224,7 +212,7 @@ def test_mount_dataset_without_version(self,
"""
CLI should fail if more than one --env is passed
"""
result = self.runner.invoke(run, ['--env', 'foo', '--data', 'foo/datasets/bar', 'ls'])
result = self.runner.invoke(run, ['--env', 'foo', '--data', 'foo/datasets/bar:bar', 'ls'])
assert_exit_code(result, 0)

def test_resolve_final_instance_type(self):
Expand Down

0 comments on commit 78ac3c5

Please sign in to comment.