Skip to content

Commit

Permalink
Merge pull request #26 from tilezen/zerebubuth/undate-prefix
Browse files Browse the repository at this point in the history
Use run ID instead of date prefix for resource naming.
  • Loading branch information
zerebubuth authored Sep 25, 2018
2 parents 1a5672a + 6b2297d commit b7d62be
Show file tree
Hide file tree
Showing 14 changed files with 157 additions and 122 deletions.
22 changes: 11 additions & 11 deletions batch-setup/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def s3_policy(bucket, date_prefix, allow_write=False):
def find_or_create_s3_policy(iam, bucket_name, bucket, date_prefix,
allow_write=False):
"""
Finds a policy in the planet_date environment to access the given bucket,
Finds a policy in the run's environment to access the given bucket,
or creates one.
Returns the policy's ARN.
Expand All @@ -216,9 +216,9 @@ def kebab_to_camel(name):
return "".join(map(lambda s: s.capitalize(), name.split('-')))


def ensure_job_role_arn(iam, planet_date, name, buckets, date_prefixes):
def ensure_job_role_arn(iam, run_id, name, buckets, date_prefixes):
role_name = kebab_to_camel(
"batch-%s-%s" % (name, planet_date.strftime("%y%m%d")))
"batch-%s-%s" % (name, run_id))

arn = None
try:
Expand Down Expand Up @@ -259,7 +259,7 @@ def ensure_job_role_arn(iam, planet_date, name, buckets, date_prefixes):
def create_role(iam, image_name, role_name, buckets, date_prefixes):

"""
Create a role with the given role_name for the image in the planet_date
Create a role with the given role_name for the image in the run's
environment.
"""

Expand Down Expand Up @@ -317,7 +317,7 @@ def allow_s3_write(self, name, bucket, date_prefix):


def make_job_definitions(
iam, planet_date, region, repo_urls, databases, buckets,
iam, run_id, region, repo_urls, databases, buckets,
db_password, memory, vcpus, retry_attempts, date_prefixes,
check_metatile_exists):

Expand All @@ -327,13 +327,13 @@ def make_job_definitions(
definition_names = {}
for name, image in repo_urls.items():
job_role_arn = ensure_job_role_arn(
iam, planet_date, name, buckets, date_prefixes)
iam, run_id, name, buckets, date_prefixes)
memory_value = memory[name] if isinstance(memory, dict) else memory
vcpus_value = vcpus[name] if isinstance(vcpus, dict) else vcpus
retry_value = retry_attempts[name] \
if isinstance(retry_attempts, dict) else retry_attempts

job_name = "%s-%s" % (name, planet_date.strftime("%y%m%d"))
job_name = "%s-%s" % (name, run_id)
definition = {
'name': job_name,
'job-role-arn': job_role_arn,
Expand Down Expand Up @@ -424,7 +424,7 @@ def run_go(cmd, *args, **kwargs):
stdout.close()


def create_job_definitions(planet_date, region, repo_urls, databases, buckets,
def create_job_definitions(run_id, region, repo_urls, databases, buckets,
db_password, memory=1024, vcpus=1,
retry_attempts=5, date_prefix=None,
meta_date_prefix=None, check_metatile_exists=False):
Expand All @@ -437,7 +437,7 @@ def create_job_definitions(planet_date, region, repo_urls, databases, buckets,
not they will be passed directly to AWS.
If date_prefix is left as None (the default) it will be generated from
planet_date automatically.
run_id automatically.
If meta_date_prefix is specified, a different date prefix will be used in
the metatile and missing buckets.
Expand All @@ -448,15 +448,15 @@ def create_job_definitions(planet_date, region, repo_urls, databases, buckets,
iam = boto3.client('iam')

if date_prefix is None:
date_prefix = planet_date.strftime('%Y%m%d')
date_prefix = run_id

if meta_date_prefix is None:
meta_date_prefix = date_prefix

date_prefixes = Buckets(date_prefix, meta_date_prefix, meta_date_prefix)

job_definitions, definition_names = make_job_definitions(
iam, planet_date, region, repo_urls, databases, buckets,
iam, run_id, region, repo_urls, databases, buckets,
db_password, memory, vcpus, retry_attempts, date_prefixes,
check_metatile_exists)

Expand Down
27 changes: 13 additions & 14 deletions batch-setup/cron.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,8 @@ def is_power_of_two(x):
'date prefix, defaults to planet date. You can '
'also set the environment variable '
'META_DATE_PREFIX.')
parser.add_argument('--run-id', help='Distinctive run ID to give to '
'this build. Defaults to planet date YYMMDD.')

args = parser.parse_args()
planet_date = datetime.strptime(args.date, '%y%m%d')
Expand All @@ -525,9 +527,8 @@ def is_power_of_two(x):
assert args.metatile_size > 0
assert args.metatile_size < 100

profile_name = args.profile_name
if profile_name is None:
profile_name = planet_date.strftime('tps-%y%m%d')
run_id = args.run_id or planet_date.strftime('%y%m%d')
profile_name = args.profile_name or ('tps-' + run_id)

def bucket_name(arg_name, bucket_function):
prop_name = arg_name.lstrip('-').replace('-', '_')
Expand All @@ -543,16 +544,15 @@ def bucket_name(arg_name, bucket_function):
rawr_bucket = bucket_name('--rawr-bucket', 'rawr-tiles')
missing_bucket = bucket_name('--missing-bucket', 'missing-tiles')
meta_bucket = bucket_name('--meta-bucket', 'meta-tiles')
date_prefix = planet_date.strftime('%y%m%d')
locations = Locations(
Bucket(assets_bucket, 'flat-nodes-' + date_prefix),
Bucket(rawr_bucket, date_prefix),
Bucket(meta_bucket, date_prefix),
Bucket(missing_bucket, date_prefix),
Bucket(assets_bucket, 'flat-nodes-' + run_id),
Bucket(rawr_bucket, run_id),
Bucket(meta_bucket, run_id),
Bucket(missing_bucket, run_id),
)
meta_date_prefix = (args.meta_date_prefix or
os.environ.get('META_DATE_PREFIX') or
date_prefix)
run_id)

iam = boto3.client('iam')

Expand All @@ -569,9 +569,8 @@ def bucket_name(arg_name, bucket_function):

smgr = boto3.client('secretsmanager')
smgr_name = (args.db_password_secret_name or
planet_date.strftime('TilesDatabasePassword%y%m%d'))
smgr_description = planet_date.strftime(
'Tiles database password for %Y-%m-%d import')
('TilesDatabasePassword' + run_id))
smgr_description = 'Tiles database password for %s import' % (run_id,)
db_password = generate_or_update_password(
smgr, args.db_password, smgr_name, smgr_description)

Expand All @@ -585,7 +584,7 @@ def bucket_name(arg_name, bucket_function):
meta_bucket=locations.meta.name,
missing_bucket=locations.missing.name,
date_iso=planet_date.strftime('%Y-%m-%d'),
planet_date=planet_date.strftime('%y%m%d'),
run_id=run_id,
raw_tiles_version=args.raw_tiles_version,
tilequeue_version=args.tilequeue_version,
vector_datasource_version=args.vector_datasource_version,
Expand Down Expand Up @@ -618,7 +617,7 @@ def bucket_name(arg_name, bucket_function):
ResourceType='instance',
Tags=[dict(
Key='tps-instance',
Value=planet_date.strftime('%Y-%m-%d'),
Value=run_id,
)],
)],
)
Expand Down
5 changes: 2 additions & 3 deletions batch-setup/ecr.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,8 @@ def ensure_repo(ecr, repo_name):
return repo_uri


def ensure_ecr(planet_date):
def ensure_ecr(run_id):
ecr = boto3.client('ecr')
date_suffix = planet_date.strftime('%y%m%d')

repo_names = (
'meta-low-zoom-batch',
Expand All @@ -40,7 +39,7 @@ def ensure_ecr(planet_date):

repo_uris = {}
for repo_name in repo_names:
full_name = 'tilezen/%s-%s' % (repo_name, date_suffix)
full_name = 'tilezen/%s-%s' % (repo_name, run_id)
repo_uris[repo_name] = ensure_repo(ecr, full_name)

return repo_uris
12 changes: 6 additions & 6 deletions batch-setup/make_meta_tiles.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from batch import Buckets
from batch import run_go
from datetime import datetime
import yaml
from make_rawr_tiles import wait_for_jobs_to_finish
from make_rawr_tiles import wc_line
from run_id import assert_run_id_format
from contextlib import contextmanager
from collections import namedtuple
import boto3
Expand Down Expand Up @@ -197,9 +197,9 @@ def enqueue_tiles(config_file, tile_list_file, check_metatile_exists):
parser.add_argument('meta_bucket', help="Bucket with meta tiles in")
parser.add_argument('--missing-bucket', help="Bucket to store missing "
"tile logs in")
parser.add_argument('date', help='Planet date, YYMMDD')
parser.add_argument('run_id', help='Unique identifier for run.')
parser.add_argument('--date-prefix', help="Date prefix in bucket, "
"defaults to planet date.")
"defaults to run ID.")
parser.add_argument('--retries', default=5, type=int, help="Number "
"of times to retry enqueueing the remaining jobs "
"before giving up.")
Expand All @@ -225,11 +225,11 @@ def enqueue_tiles(config_file, tile_list_file, check_metatile_exists):
help='Metatile size (in 256px tiles).')

args = parser.parse_args()
planet_date = datetime.strptime(args.date, '%y%m%d')
assert_run_id_format(args.run_id)
buckets = Buckets(args.rawr_bucket, args.meta_bucket,
args.missing_bucket or args.meta_bucket)
date_prefix = args.date_prefix or planet_date.strftime('%y%m%d')
missing_bucket_date_prefix = planet_date.strftime('%y%m%d')
date_prefix = args.date_prefix or args.run_id
missing_bucket_date_prefix = args.run_id
assert args.key_format_type in ('prefix-hash', 'hash-prefix')

# TODO: split zoom and zoom max should come from config.
Expand Down
28 changes: 13 additions & 15 deletions batch-setup/make_tiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from batch import Buckets
from batch import create_job_definitions
from docker import build_and_upload_images
from datetime import datetime
from run_id import assert_run_id_format
import argparse
import os
import yaml
Expand All @@ -21,7 +21,8 @@ def vpc_of_sg(sg_id):


parser = argparse.ArgumentParser('Script to kick off tile creation.')
parser.add_argument('date', help='Planet date, YYMMDD')
parser.add_argument('run_id', help='Unique run identifier, used to name '
'resources and log out in the batch runs.')
parser.add_argument('--num-db-replicas', default=1, type=int,
help='Number of database replicas to create.')
parser.add_argument('rawr_bucket', help='S3 bucket for RAWR tiles')
Expand All @@ -30,11 +31,8 @@ def vpc_of_sg(sg_id):
parser.add_argument('--missing-bucket', default=None,
help='Bucket for missing meta tile lists. Defaults to the '
'meta bucket.')
parser.add_argument('--run-id', help='Identifying string to log out in the '
'batch runs.')
parser.add_argument('--date-prefix', default=None, help='Date prefix to use '
'in S3 buckets. By default, generated from the planet '
'date.')
'in S3 buckets. By default, generated from the run ID.')
parser.add_argument('--region', help='AWS region. If not provided, then the '
'AWS_DEFAULT_REGION environment variable must be set.')
parser.add_argument('--meta-date-prefix', help='Optional different date '
Expand All @@ -44,24 +42,24 @@ def vpc_of_sg(sg_id):
'metatile exists first before processing the batch job.')

args = parser.parse_args()
planet_date = datetime.strptime(args.date, '%y%m%d')
run_id = args.run_id or planet_date.strftime('%Y%m%d')
date_prefix = args.date_prefix or planet_date.strftime('%y%m%d')
run_id = args.run_id
assert_run_id_format(run_id)
date_prefix = args.date_prefix or run_id

region = args.region or os.environ.get('AWS_DEFAULT_REGION')
if region is None:
import sys
print "ERROR: Need environment variable AWS_DEFAULT_REGION to be set."
sys.exit(1)

repo_uris = ensure_ecr(planet_date)
repo_uris = ensure_ecr(run_id)

# start databases => db_sg & database hostnames
db_sg_id, database_ids = ensure_dbs(planet_date, args.num_db_replicas)
db_sg_id, database_ids = ensure_dbs(run_id, args.num_db_replicas)

# create batch environment and job queue
compute_env_name = planet_date.strftime('compute-env-%y%m%d')
job_queue_name = planet_date.strftime('job-queue-%y%m%d')
compute_env_name = 'compute-env-' + run_id
job_queue_name = 'job-queue-' + run_id
vpc_id = vpc_of_sg(db_sg_id)

batch_setup(region, vpc_id, [db_sg_id], compute_env_name, job_queue_name)
Expand Down Expand Up @@ -92,7 +90,7 @@ def vpc_of_sg(sg_id):

# create job definitions (references databases, batch setup)
job_def_names = create_job_definitions(
planet_date, region, repo_uris, database_ids, buckets, args.db_password,
run_id, region, repo_uris, database_ids, buckets, args.db_password,
memory=memory, vcpus=vcpus, retry_attempts=retry_attempts,
date_prefix=date_prefix, meta_date_prefix=args.meta_date_prefix,
check_metatile_exists=args.check_metatile_exists)
Expand All @@ -111,7 +109,7 @@ def vpc_of_sg(sg_id):
'run_id': run_id,
'retry-attempts': retry_attempts[name],
'queue-zoom': 7,
'job-name-prefix': name + planet_date.strftime('-%y%m%d'),
'job-name-prefix': ('%s-%s') % (name, run_id),
'vcpus': vcpus,
'job-queue': job_queue_name,
'job-definition': job_def_names[name],
Expand Down
9 changes: 4 additions & 5 deletions batch-setup/provision.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ export META_BUCKET='%(meta_bucket)s'
export MISSING_BUCKET='%(missing_bucket)s'
export DATE='%(date_iso)s'
export PLANET_DATE='%(planet_date)s'
export DATE_PREFIX='%(planet_date)s'
export RUN_ID='%(run_id)s'
export META_DATE_PREFIX='%(meta_date_prefix)s'
export RAW_TILES_VERSION='%(raw_tiles_version)s'
Expand Down Expand Up @@ -82,12 +81,12 @@ set -x
python -u /usr/local/src/tileops/import/import.py --find-ip-address meta --date \$DATE \$TILE_ASSET_BUCKET \$AWS_DEFAULT_REGION \
\$TILE_ASSET_PROFILE_ARN \$DB_PASSWORD
python -u /usr/local/src/tileops/batch-setup/make_tiles.py --num-db-replicas 10 \$PLANET_DATE --missing-bucket \$MISSING_BUCKET \
python -u /usr/local/src/tileops/batch-setup/make_tiles.py --num-db-replicas 10 \$RUN_ID --missing-bucket \$MISSING_BUCKET \
--meta-date-prefix \$META_DATE_PREFIX \$RAWR_BUCKET \$META_BUCKET \$DB_PASSWORD
python -u /usr/local/src/tileops/batch-setup/make_rawr_tiles.py --config enqueue-rawr-batch.config.yaml --key-format-type hash-prefix \
\$RAWR_BUCKET \$DATE_PREFIX \$MISSING_BUCKET
\$RAWR_BUCKET \$RUN_ID \$MISSING_BUCKET
python -u /usr/local/src/tileops/batch-setup/make_meta_tiles.py --date-prefix \$META_DATE_PREFIX --missing-bucket \$MISSING_BUCKET \
--key-format-type hash-prefix --metatile-size \$METATILE_SIZE \$RAWR_BUCKET \$META_BUCKET \$DATE_PREFIX
--key-format-type hash-prefix --metatile-size \$METATILE_SIZE \$RAWR_BUCKET \$META_BUCKET \$RUN_ID
EOF
chmod +x /usr/local/bin/run.sh

Expand Down
4 changes: 2 additions & 2 deletions batch-setup/rds.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ def set_databases_security_group(rds, security_group_id, databases):
)


def ensure_dbs(planet_date, num_instances):
snapshot_id = planet_date.strftime('postgis-prod-%Y%m%d')
def ensure_dbs(run_id, num_instances):
snapshot_id = 'postgis-prod-' + run_id
rds = boto3.client('rds')

if not does_snapshot_exist(rds, snapshot_id):
Expand Down
18 changes: 18 additions & 0 deletions batch-setup/run_id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import re
import sys


def assert_run_id_format(run_id):
"""
Checks that the run ID has a format that means we can use it in resource
names, directory and file names, etc... without any problems. Many AWS
resources are quite restrictive in terms of the characters that they can
contain.
"""

m = re.match('^[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?$', run_id)
if m is None:
print("Run ID %r is badly formed. Run IDs may only contain ASCII "
"letters, numbers and dashes. Dashes may not appear at the "
"beginning or end of the run ID." % (run_id,))
sys.exit(1)
Loading

0 comments on commit b7d62be

Please sign in to comment.