.env.docker

#
# APTrust Preservation Services config file
#
PRESERV_RELEASE=
#ENVIRONMENT=development
# APT_ENV not required for Docker setups since we use .env as default and populate .env to bind-mount over the default in the container.
# APT_ENV=
DOCKER_DATA_PATH=./docker
DOCKER_RESTORE_PATH=./docker/restore
DOCKER_TAG_NAME=latest
DOCKER_DOMAIN=docker.localhost

BASE_WORKING_DIR=./data
PROFILES_DIR=./profiles
RESTORE_DIR=./data/restore

# Preservation buckets. These differ for live, demo, and staging.
# E.g., for staging:
#
# BUCKET_STANDARD_OR="aptrust.staging.preservation.oregon"
# BUCKET_STANDARD_VA="aptrust.staging.preservation"
# BUCKET_GLACIER_OH="aptrust.staging.preservation.glacier.oh"
# BUCKET_GLACIER_OR="aptrust.staging.preservation.glacier.or"
# BUCKET_GLACIER_VA="aptrust.staging.preservation.glacier.va"
# BUCKET_GLACIER_DEEP_OH="aptrust.staging.preservation.glacier-deep.oh"
# BUCKET_GLACIER_DEEP_OR="aptrust.staging.preservation.glacier-deep.or"
# BUCKET_GLACIER_DEEP_VA="aptrust.staging.preservation.glacier-deep.va"
# BUCKET_WASABI_OR="coming-soon"
# BUCKET_WASABI_VA="coming-soon"

BUCKET_STANDARD_OR="preservation-or"
BUCKET_STANDARD_VA="preservation-va"
BUCKET_GLACIER_OH="glacier-oh"
BUCKET_GLACIER_OR="glacier-or"
BUCKET_GLACIER_VA="glacier-va"
BUCKET_GLACIER_DEEP_OH="glacier-deep-oh"
BUCKET_GLACIER_DEEP_OR="glacier-deep-or"
BUCKET_GLACIER_DEEP_VA="glacier-deep-va"
BUCKET_WASABI_OR="wasabi-or"
BUCKET_WASABI_VA="wasabi-va"

INGEST_BUCKET_READER_INTERVAL="3m"
INGEST_TEMP_DIR="${BASE_WORKING_DIR}/tmp"

# To log to STDOUT, set LOG_DIR to "STDOUT"
# LOG_DIR="${BASE_WORKING_DIR}/logs"
LOG_DIR="STDOUT"

LOG_LEVEL=DEBUG
MAX_DAYS_SINCE_LAST_FIXITY=90

MAX_FILE_SIZE=5497558138880
MAX_FIXITY_ITEMS_PER_RUN=2500
MAX_WORKER_ATTEMPTS=3

# NSQ Settings
NSQ_LOOKUPD_TCP_ADDRESS="nsqlookupd:4160"
NSQ_LOOKUPD_HTTP_ADDRESS="nsqlookupd:4161"
#NSQ_DATA_PATH="${DOCKER_DATA_PATH}/nsq"
NSQ_BROADCAST_ADDRESS="nsqd"
NSQ_MSG_TIMEOUT="360m0s"
NSQ_MAX_MSG_TIMEOUT="180m0s"
NSQ_MAX_MSG_SIZE="31457280"
NSQ_MEM_QUEUE_SIZE="0"
NSQ_TCP_ADDRESS=0.0.0.0:4150
NSQ_MAX_REQ_TIMEOUT="180m0s"
NSQ_MAX_HEARTBEAT_INTERVAL="30s"

NSQ_LOOKUPD="${NSQ_LOOKUPD_HTTP_ADDRESS}"
NSQ_URL="http://${NSQ_BROADCAST_ADDRESS}:4151"

# Registry
PRESERV_REGISTRY_API_KEY="c3958c7b09e40af1d065020484dafa9b2a35cea0"
PRESERV_REGISTRY_API_USER="system@aptrust.org"
PRESERV_REGISTRY_API_VERSION="v3"
PRESERV_REGISTRY_URL="http://registry"


# For apt_queue_fixity. Run this often...
QUEUE_FIXITY_INTERVAL="60m"

# REDIS
REDIS_DEFAULT_DB= 0
REDIS_PASSWORD=""
REDIS_RETRIES=3
REDIS_RETRY_MS=250ms
REDIS_URL="redis:6379"
REDIS_USER= ""


STAGING_BUCKET="staging"
STAGING_UPLOAD_RETRY_MS=250ms
VOLUME_SERVICE_URL="http://volume_service:8898"


# --------------------------------------------------------------------------
# Below are setings for our workers.
#
# BUFFER_SIZE  is the size of the channel buffer for Go workers.
#              Default is 20.
# WORKERS      is the number of workers (Go routines) to do the
#              work of copy files, recording metadata, etc.
#              Default is 3, but see notes below.
# MAX_ATTEMPTS is the maximum number of times a service should try
#              to complete a single WorkItem.
#              Default is 5.
# --------------------------------------------------------------------------

# apt_delete deletes files. It does not tax any resources, except
# possible Pharos in cases where it's asked to delete large numbers
# of files.
APT_DELETE_BUFFER_SIZE=20
APT_DELETE_WORKERS=3
APT_DELETE_MAX_ATTEMPTS=3

# apt_fixity runs scheduled fixity checks. It can tax network I/O.
APT_FIXITY_BUFFER_SIZE=20
APT_FIXITY_WORKERS=3
APT_FIXITY_MAX_ATTEMPTS=3

# bag_restorer restores bags to the depositor's restoration bucket.
# It can be taxing on network I/O when restoring bags with many files
# or bags with large files.
BAG_RESTORER_BUFFER_SIZE=20
BAG_RESTORER_WORKERS=3
BAG_RESTORER_MAX_ATTEMPTS=3

# file_restorer restores individual files to a depositor's restoration
# bucket. It can use a lot of network I/O when restoring large files.
FILE_RESTORER_BUFFER_SIZE=20
FILE_RESTORER_WORKERS=3
FILE_RESTORER_MAX_ATTEMPTS=3

# glacier_restorer initiates restoration of files from Glacier. It
# uses very few resources (very light network I/O, CPU and RAM)
GLACIER_RESTORER_BUFFER_SIZE=20
GLACIER_RESTORER_WORKERS=3
GLACIER_RESTORER_MAX_ATTEMPTS=3

# ingest_cleanup deletes interim processing data from the staging
# bucket, receiving bucket, and Redis. It uses few resources
# (very light network I/O, CPU and RAM)
INGEST_CLEANUP_BUFFER_SIZE=20
INGEST_CLEANUP_WORKERS=3
INGEST_CLEANUP_MAX_ATTEMPTS=3

# format_identifier identifies the file format of items in the
# staging bucket. This one can be a memory hog, so we're setting
# workers to 2 instead of 3. Siegfried keeps large S3 data buffers
# in memory. With too many concurrent streams, the system will
# run out of memory.
INGEST_FORMAT_IDENTIFIER_BUFFER_SIZE=12
INGEST_FORMAT_IDENTIFIER_WORKERS=2
INGEST_FORMAT_IDENTIFIER_MAX_ATTEMPTS=3

# ingest_pre_fetch untars bags from receiving buckets, calculates multiple
# checksums for each file, and stores interim metadata for each file in Redis.
# This service can use a lot of bandwidth and CPU while pulling data from
# the S3 receiving bucket. It can also issue large numbers of writes to Redis,
# though that doesn't seem to be a problem.
INGEST_PRE_FETCH_BUFFER_SIZE=20
INGEST_PRE_FETCH_WORKERS=3
INGEST_PRE_FETCH_MAX_ATTEMPTS=3

# ingest_preservation_uploader copies files from the staging bucket
# to preservation buckets. It can use a lot of network I/O, memory,
# and CPU.
#
# *** Change number of workers based on hardware's bandwidth, CPU,
# *** and RAM limits.
#
INGEST_PRESERVATION_UPLOADER_BUFFER_SIZE=20
INGEST_PRESERVATION_UPLOADER_WORKERS=3
INGEST_PRESERVATION_UPLOADER_MAX_ATTEMPTS=3

# ingest_preservation_verifier ensures that ingest_preservation_uploader
# correctly copied all files to all of the right preservation buckets.
# This worker uses light network I/O, CPU and RAM.
INGEST_PRESERVATION_VERIFIER_BUFFER_SIZE=20
INGEST_PRESERVATION_VERIFIER_WORKERS=3
INGEST_PRESERVATION_VERIFIER_MAX_ATTEMPTS=3

# ingest_recorder records metadata for all ingested items in Pharos.
# While this worker does not use many resources itself, it heavily
# taxes Pharos on both CPU and RAM. When Pharos is getting overwhelmed,
# we need to reduce the number of workers for this service.
#
# Note that this one defaults to 5 attempts instead of 3. When Pharos
# is busy, Nginx returns sporadic 502/Bad Gateway responses. The
# extra attempts account for those.
#
# *** Consider reducing number of workers when Pharos is overloaded.
#
INGEST_RECORDER_BUFFER_SIZE=20
INGEST_RECORDER_WORKERS=3
INGEST_RECORDER_MAX_ATTEMPTS=5

# ingest_staging_uploader copies individual (untarred) files from the
# tar file in the receiving bucket to a temporary staging bucket.
# This service can potentially use a lot of network I/O, RAM, and CPU
# when working with bags containing many files or large files.
#
# *** Change number of workers based on hardware's bandwidth, CPU,
# *** and RAM limits.
#
INGEST_STAGING_UPLOADER_BUFFER_SIZE=20
INGEST_STAGING_UPLOADER_WORKERS=3
INGEST_STAGING_UPLOADER_MAX_ATTEMPTS=3

# ingest_validator validates metadata captured by ingest_pre_fetch.
# Although this worker can do huge numbers of reads and writes to
# Redis, Redis doesn't ever seem to mind.
INGEST_VALIDATOR_BUFFER_SIZE=20
INGEST_VALIDATOR_WORKERS=3
INGEST_VALIDATOR_MAX_ATTEMPTS=3

# reingest_manager checks the file metadata collected by ingest_pre_fetch
# against Pharos, checking to see whether any files being ingested now have
# ever been ingested before. This typically runs a limited number of read
# requests against Pharos, but may occasionally issue thousands of Pharos
# reads in cases where we're re-ingesting large bags. Except in those cases,
# this service tends to use light network/RAM/CPU resources.
#
# *** Consider reducing number of workers when Pharos is overloaded.
#
REINGEST_MANAGER_BUFFER_SIZE=20
REINGEST_MANAGER_WORKERS=3
REINGEST_MANAGER_MAX_ATTEMPTS=3


# Connection info for local S3 service. This is a minio server that
# runs in dev/test, but not demo/production.
# For localhost testing, use 'localhost' instead of '127.0.0.1' because
# Minio signed URLs use hostname, not IP.
S3_LOCAL_HOST="minio:9000"
S3_LOCAL_KEY="minioadmin"
S3_LOCAL_SECRET="minioadmin"

# AWS S3 connection info. In dev/test, point back to the local S3
# provider, so we can't overwrite anything in demo/prod.
# For localhost testing, use 'localhost' instead of '127.0.0.1' because
# Minio signed URLs use hostname, not IP.
S3_AWS_HOST="minio:9000"
S3_AWS_KEY="minioadmin"
S3_AWS_SECRET="minioadmin"

# Wasabi S3 connection info.In dev/test, point back to the local S3
# provider, so we can't overwrite anything in demo/prod.
# For localhost testing, use 'localhost' instead of '127.0.0.1' because
# Minio signed URLs use hostname, not IP.
S3_WASABI_HOST_OR="minio:9000"
S3_WASABI_HOST_VA="minio:9000"
S3_WASABI_KEY="minioadmin"
S3_WASABI_SECRET="minioadmin"