Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use run ID instead of date prefix for resource naming. #26

Merged
merged 1 commit into from
Sep 25, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions batch-setup/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def s3_policy(bucket, date_prefix, allow_write=False):
def find_or_create_s3_policy(iam, bucket_name, bucket, date_prefix,
allow_write=False):
"""
Finds a policy in the planet_date environment to access the given bucket,
Finds a policy in the run's environment to access the given bucket,
or creates one.

Returns the policy's ARN.
Expand All @@ -216,9 +216,9 @@ def kebab_to_camel(name):
return "".join(map(lambda s: s.capitalize(), name.split('-')))


def ensure_job_role_arn(iam, planet_date, name, buckets, date_prefixes):
def ensure_job_role_arn(iam, run_id, name, buckets, date_prefixes):
role_name = kebab_to_camel(
"batch-%s-%s" % (name, planet_date.strftime("%y%m%d")))
"batch-%s-%s" % (name, run_id))

arn = None
try:
Expand Down Expand Up @@ -259,7 +259,7 @@ def ensure_job_role_arn(iam, planet_date, name, buckets, date_prefixes):
def create_role(iam, image_name, role_name, buckets, date_prefixes):

"""
Create a role with the given role_name for the image in the planet_date
Create a role with the given role_name for the image in the run's
environment.
"""

Expand Down Expand Up @@ -317,7 +317,7 @@ def allow_s3_write(self, name, bucket, date_prefix):


def make_job_definitions(
iam, planet_date, region, repo_urls, databases, buckets,
iam, run_id, region, repo_urls, databases, buckets,
db_password, memory, vcpus, retry_attempts, date_prefixes,
check_metatile_exists):

Expand All @@ -327,13 +327,13 @@ def make_job_definitions(
definition_names = {}
for name, image in repo_urls.items():
job_role_arn = ensure_job_role_arn(
iam, planet_date, name, buckets, date_prefixes)
iam, run_id, name, buckets, date_prefixes)
memory_value = memory[name] if isinstance(memory, dict) else memory
vcpus_value = vcpus[name] if isinstance(vcpus, dict) else vcpus
retry_value = retry_attempts[name] \
if isinstance(retry_attempts, dict) else retry_attempts

job_name = "%s-%s" % (name, planet_date.strftime("%y%m%d"))
job_name = "%s-%s" % (name, run_id)
definition = {
'name': job_name,
'job-role-arn': job_role_arn,
Expand Down Expand Up @@ -424,7 +424,7 @@ def run_go(cmd, *args, **kwargs):
stdout.close()


def create_job_definitions(planet_date, region, repo_urls, databases, buckets,
def create_job_definitions(run_id, region, repo_urls, databases, buckets,
db_password, memory=1024, vcpus=1,
retry_attempts=5, date_prefix=None,
meta_date_prefix=None, check_metatile_exists=False):
Expand All @@ -437,7 +437,7 @@ def create_job_definitions(planet_date, region, repo_urls, databases, buckets,
not they will be passed directly to AWS.

If date_prefix is left as None (the default) it will be generated from
planet_date automatically.
run_id automatically.

If meta_date_prefix is specified, a different date prefix will be used in
the metatile and missing buckets.
Expand All @@ -448,15 +448,15 @@ def create_job_definitions(planet_date, region, repo_urls, databases, buckets,
iam = boto3.client('iam')

if date_prefix is None:
date_prefix = planet_date.strftime('%Y%m%d')
date_prefix = run_id

if meta_date_prefix is None:
meta_date_prefix = date_prefix

date_prefixes = Buckets(date_prefix, meta_date_prefix, meta_date_prefix)

job_definitions, definition_names = make_job_definitions(
iam, planet_date, region, repo_urls, databases, buckets,
iam, run_id, region, repo_urls, databases, buckets,
db_password, memory, vcpus, retry_attempts, date_prefixes,
check_metatile_exists)

Expand Down
27 changes: 13 additions & 14 deletions batch-setup/cron.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,8 @@ def is_power_of_two(x):
'date prefix, defaults to planet date. You can '
'also set the environment variable '
'META_DATE_PREFIX.')
parser.add_argument('--run-id', help='Distinctive run ID to give to '
'this build. Defaults to planet date YYMMDD.')

args = parser.parse_args()
planet_date = datetime.strptime(args.date, '%y%m%d')
Expand All @@ -525,9 +527,8 @@ def is_power_of_two(x):
assert args.metatile_size > 0
assert args.metatile_size < 100

profile_name = args.profile_name
if profile_name is None:
profile_name = planet_date.strftime('tps-%y%m%d')
run_id = args.run_id or planet_date.strftime('%y%m%d')
profile_name = args.profile_name or ('tps-' + run_id)

def bucket_name(arg_name, bucket_function):
prop_name = arg_name.lstrip('-').replace('-', '_')
Expand All @@ -543,16 +544,15 @@ def bucket_name(arg_name, bucket_function):
rawr_bucket = bucket_name('--rawr-bucket', 'rawr-tiles')
missing_bucket = bucket_name('--missing-bucket', 'missing-tiles')
meta_bucket = bucket_name('--meta-bucket', 'meta-tiles')
date_prefix = planet_date.strftime('%y%m%d')
locations = Locations(
Bucket(assets_bucket, 'flat-nodes-' + date_prefix),
Bucket(rawr_bucket, date_prefix),
Bucket(meta_bucket, date_prefix),
Bucket(missing_bucket, date_prefix),
Bucket(assets_bucket, 'flat-nodes-' + run_id),
Bucket(rawr_bucket, run_id),
Bucket(meta_bucket, run_id),
Bucket(missing_bucket, run_id),
)
meta_date_prefix = (args.meta_date_prefix or
os.environ.get('META_DATE_PREFIX') or
date_prefix)
run_id)

iam = boto3.client('iam')

Expand All @@ -569,9 +569,8 @@ def bucket_name(arg_name, bucket_function):

smgr = boto3.client('secretsmanager')
smgr_name = (args.db_password_secret_name or
planet_date.strftime('TilesDatabasePassword%y%m%d'))
smgr_description = planet_date.strftime(
'Tiles database password for %Y-%m-%d import')
('TilesDatabasePassword' + run_id))
smgr_description = 'Tiles database password for %s import' % (run_id,)
db_password = generate_or_update_password(
smgr, args.db_password, smgr_name, smgr_description)

Expand All @@ -585,7 +584,7 @@ def bucket_name(arg_name, bucket_function):
meta_bucket=locations.meta.name,
missing_bucket=locations.missing.name,
date_iso=planet_date.strftime('%Y-%m-%d'),
planet_date=planet_date.strftime('%y%m%d'),
run_id=run_id,
raw_tiles_version=args.raw_tiles_version,
tilequeue_version=args.tilequeue_version,
vector_datasource_version=args.vector_datasource_version,
Expand Down Expand Up @@ -618,7 +617,7 @@ def bucket_name(arg_name, bucket_function):
ResourceType='instance',
Tags=[dict(
Key='tps-instance',
Value=planet_date.strftime('%Y-%m-%d'),
Value=run_id,
)],
)],
)
Expand Down
5 changes: 2 additions & 3 deletions batch-setup/ecr.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,8 @@ def ensure_repo(ecr, repo_name):
return repo_uri


def ensure_ecr(planet_date):
def ensure_ecr(run_id):
ecr = boto3.client('ecr')
date_suffix = planet_date.strftime('%y%m%d')

repo_names = (
'meta-low-zoom-batch',
Expand All @@ -40,7 +39,7 @@ def ensure_ecr(planet_date):

repo_uris = {}
for repo_name in repo_names:
full_name = 'tilezen/%s-%s' % (repo_name, date_suffix)
full_name = 'tilezen/%s-%s' % (repo_name, run_id)
repo_uris[repo_name] = ensure_repo(ecr, full_name)

return repo_uris
12 changes: 6 additions & 6 deletions batch-setup/make_meta_tiles.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from batch import Buckets
from batch import run_go
from datetime import datetime
import yaml
from make_rawr_tiles import wait_for_jobs_to_finish
from make_rawr_tiles import wc_line
from run_id import assert_run_id_format
from contextlib import contextmanager
from collections import namedtuple
import boto3
Expand Down Expand Up @@ -197,9 +197,9 @@ def enqueue_tiles(config_file, tile_list_file, check_metatile_exists):
parser.add_argument('meta_bucket', help="Bucket with meta tiles in")
parser.add_argument('--missing-bucket', help="Bucket to store missing "
"tile logs in")
parser.add_argument('date', help='Planet date, YYMMDD')
parser.add_argument('run_id', help='Unique identifier for run.')
parser.add_argument('--date-prefix', help="Date prefix in bucket, "
"defaults to planet date.")
"defaults to run ID.")
parser.add_argument('--retries', default=5, type=int, help="Number "
"of times to retry enqueueing the remaining jobs "
"before giving up.")
Expand All @@ -225,11 +225,11 @@ def enqueue_tiles(config_file, tile_list_file, check_metatile_exists):
help='Metatile size (in 256px tiles).')

args = parser.parse_args()
planet_date = datetime.strptime(args.date, '%y%m%d')
assert_run_id_format(args.run_id)
buckets = Buckets(args.rawr_bucket, args.meta_bucket,
args.missing_bucket or args.meta_bucket)
date_prefix = args.date_prefix or planet_date.strftime('%y%m%d')
missing_bucket_date_prefix = planet_date.strftime('%y%m%d')
date_prefix = args.date_prefix or args.run_id
missing_bucket_date_prefix = args.run_id
assert args.key_format_type in ('prefix-hash', 'hash-prefix')

# TODO: split zoom and zoom max should come from config.
Expand Down
28 changes: 13 additions & 15 deletions batch-setup/make_tiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from batch import Buckets
from batch import create_job_definitions
from docker import build_and_upload_images
from datetime import datetime
from run_id import assert_run_id_format
import argparse
import os
import yaml
Expand All @@ -21,7 +21,8 @@ def vpc_of_sg(sg_id):


parser = argparse.ArgumentParser('Script to kick off tile creation.')
parser.add_argument('date', help='Planet date, YYMMDD')
parser.add_argument('run_id', help='Unique run identifier, used to name '
'resources and log out in the batch runs.')
parser.add_argument('--num-db-replicas', default=1, type=int,
help='Number of database replicas to create.')
parser.add_argument('rawr_bucket', help='S3 bucket for RAWR tiles')
Expand All @@ -30,11 +31,8 @@ def vpc_of_sg(sg_id):
parser.add_argument('--missing-bucket', default=None,
help='Bucket for missing meta tile lists. Defaults to the '
'meta bucket.')
parser.add_argument('--run-id', help='Identifying string to log out in the '
'batch runs.')
parser.add_argument('--date-prefix', default=None, help='Date prefix to use '
'in S3 buckets. By default, generated from the planet '
'date.')
'in S3 buckets. By default, generated from the run ID.')
parser.add_argument('--region', help='AWS region. If not provided, then the '
'AWS_DEFAULT_REGION environment variable must be set.')
parser.add_argument('--meta-date-prefix', help='Optional different date '
Expand All @@ -44,24 +42,24 @@ def vpc_of_sg(sg_id):
'metatile exists first before processing the batch job.')

args = parser.parse_args()
planet_date = datetime.strptime(args.date, '%y%m%d')
run_id = args.run_id or planet_date.strftime('%Y%m%d')
date_prefix = args.date_prefix or planet_date.strftime('%y%m%d')
run_id = args.run_id
assert_run_id_format(run_id)
date_prefix = args.date_prefix or run_id

region = args.region or os.environ.get('AWS_DEFAULT_REGION')
if region is None:
import sys
print "ERROR: Need environment variable AWS_DEFAULT_REGION to be set."
sys.exit(1)

repo_uris = ensure_ecr(planet_date)
repo_uris = ensure_ecr(run_id)

# start databases => db_sg & database hostnames
db_sg_id, database_ids = ensure_dbs(planet_date, args.num_db_replicas)
db_sg_id, database_ids = ensure_dbs(run_id, args.num_db_replicas)

# create batch environment and job queue
compute_env_name = planet_date.strftime('compute-env-%y%m%d')
job_queue_name = planet_date.strftime('job-queue-%y%m%d')
compute_env_name = 'compute-env-' + run_id
job_queue_name = 'job-queue-' + run_id
vpc_id = vpc_of_sg(db_sg_id)

batch_setup(region, vpc_id, [db_sg_id], compute_env_name, job_queue_name)
Expand Down Expand Up @@ -92,7 +90,7 @@ def vpc_of_sg(sg_id):

# create job definitions (references databases, batch setup)
job_def_names = create_job_definitions(
planet_date, region, repo_uris, database_ids, buckets, args.db_password,
run_id, region, repo_uris, database_ids, buckets, args.db_password,
memory=memory, vcpus=vcpus, retry_attempts=retry_attempts,
date_prefix=date_prefix, meta_date_prefix=args.meta_date_prefix,
check_metatile_exists=args.check_metatile_exists)
Expand All @@ -111,7 +109,7 @@ def vpc_of_sg(sg_id):
'run_id': run_id,
'retry-attempts': retry_attempts[name],
'queue-zoom': 7,
'job-name-prefix': name + planet_date.strftime('-%y%m%d'),
'job-name-prefix': ('%s-%s') % (name, run_id),
'vcpus': vcpus,
'job-queue': job_queue_name,
'job-definition': job_def_names[name],
Expand Down
9 changes: 4 additions & 5 deletions batch-setup/provision.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ export META_BUCKET='%(meta_bucket)s'
export MISSING_BUCKET='%(missing_bucket)s'

export DATE='%(date_iso)s'
export PLANET_DATE='%(planet_date)s'
export DATE_PREFIX='%(planet_date)s'
export RUN_ID='%(run_id)s'
export META_DATE_PREFIX='%(meta_date_prefix)s'

export RAW_TILES_VERSION='%(raw_tiles_version)s'
Expand Down Expand Up @@ -82,12 +81,12 @@ set -x

python -u /usr/local/src/tileops/import/import.py --find-ip-address meta --date \$DATE \$TILE_ASSET_BUCKET \$AWS_DEFAULT_REGION \
\$TILE_ASSET_PROFILE_ARN \$DB_PASSWORD
python -u /usr/local/src/tileops/batch-setup/make_tiles.py --num-db-replicas 10 \$PLANET_DATE --missing-bucket \$MISSING_BUCKET \
python -u /usr/local/src/tileops/batch-setup/make_tiles.py --num-db-replicas 10 \$RUN_ID --missing-bucket \$MISSING_BUCKET \
--meta-date-prefix \$META_DATE_PREFIX \$RAWR_BUCKET \$META_BUCKET \$DB_PASSWORD
python -u /usr/local/src/tileops/batch-setup/make_rawr_tiles.py --config enqueue-rawr-batch.config.yaml --key-format-type hash-prefix \
\$RAWR_BUCKET \$DATE_PREFIX \$MISSING_BUCKET
\$RAWR_BUCKET \$RUN_ID \$MISSING_BUCKET
python -u /usr/local/src/tileops/batch-setup/make_meta_tiles.py --date-prefix \$META_DATE_PREFIX --missing-bucket \$MISSING_BUCKET \
--key-format-type hash-prefix --metatile-size \$METATILE_SIZE \$RAWR_BUCKET \$META_BUCKET \$DATE_PREFIX
--key-format-type hash-prefix --metatile-size \$METATILE_SIZE \$RAWR_BUCKET \$META_BUCKET \$RUN_ID
EOF
chmod +x /usr/local/bin/run.sh

Expand Down
4 changes: 2 additions & 2 deletions batch-setup/rds.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ def set_databases_security_group(rds, security_group_id, databases):
)


def ensure_dbs(planet_date, num_instances):
snapshot_id = planet_date.strftime('postgis-prod-%Y%m%d')
def ensure_dbs(run_id, num_instances):
snapshot_id = 'postgis-prod-' + run_id
rds = boto3.client('rds')

if not does_snapshot_exist(rds, snapshot_id):
Expand Down
18 changes: 18 additions & 0 deletions batch-setup/run_id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import re
import sys


def assert_run_id_format(run_id):
"""
Checks that the run ID has a format that means we can use it in resource
names, directory and file names, etc... without any problems. Many AWS
resources are quite restrictive in terms of the characters that they can
contain.
"""

m = re.match('^[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?$', run_id)
if m is None:
print("Run ID %r is badly formed. Run IDs may only contain ASCII "
"letters, numbers and dashes. Dashes may not appear at the "
"beginning or end of the run ID." % (run_id,))
sys.exit(1)
Loading