.env.audit

#
# APTrust Preservation Services config file
# for auditing the production repo.
#
# This file includes no credentials. The essential ones
# are commented out below so they will be read in
# from env vars.
#

# BASE_WORKING_DIR is the directory under which preservation services
# may create new folders and files. For test and dev, this should be
# ~/tmp. For demo and production, probably /mnt/lvm/apt or something
# similar.
BASE_WORKING_DIR="~/aptrust/audit"

# Preservation buckets. Be sure to set these right for each environment.
# For testing, we're using a local Minio server, so we can call these
# whatever we want.
# Preservation buckets. These differ for live, demo, and staging.
# E.g., for staging:
#
# BUCKET_STANDARD_OR="aptrust.staging.preservation.oregon"
# BUCKET_STANDARD_VA="aptrust.staging.preservation"
# BUCKET_GLACIER_OH="aptrust.staging.preservation.glacier.oh"
# BUCKET_GLACIER_OR="aptrust.staging.preservation.glacier.or"
# BUCKET_GLACIER_VA="aptrust.staging.preservation.glacier.va"
# BUCKET_GLACIER_DEEP_OH="aptrust.staging.preservation.glacier-deep.oh"
# BUCKET_GLACIER_DEEP_OR="aptrust.staging.preservation.glacier-deep.or"
# BUCKET_GLACIER_DEEP_VA="aptrust.staging.preservation.glacier-deep.va"
# BUCKET_WASABI_OR="coming-soon"
# BUCKET_WASABI_VA="coming-soon"
#
BUCKET_STANDARD_OR="aptrust.preservation.oregon"
BUCKET_STANDARD_VA="aptrust.preservation.storage"
BUCKET_GLACIER_OH="aptrust.preservation.glacier.oh"
BUCKET_GLACIER_OR="aptrust.preservation.glacier.or"
BUCKET_GLACIER_VA="aptrust.preservation.glacier.va"
BUCKET_GLACIER_DEEP_OH="aptrust.preservation.glacier-deep.oh"
BUCKET_GLACIER_DEEP_OR="aptrust.preservation.glacier-deep.or"
BUCKET_GLACIER_DEEP_VA="aptrust.preservation.glacier-deep.va"
BUCKET_WASABI_OR="aptrust-production-wasabi-or"
BUCKET_WASABI_VA="aptrust-production-wasabi-va"

# INGEST_BUCKET_READER_INTERVAL describes how often the ingest bucket
# reader should scan the receiving buckets for new bags. The reader
# will wait this long after finishing a scan before starting the next
# scan. Use Golang duration strings, e.g. "90s" is 90 seconds, "5m" is
# five minutes, "1h" is one hour.
INGEST_BUCKET_READER_INTERVAL="10s"

# APT_QUEUE_INTERVAL describes how often apt_queue should check for new
# WorkItems.
APT_QUEUE_INTERVAL="20s"

# INGEST_TEMP_DIR is a directory in which preservation services can
# keep temporary files during the ingest process.
INGEST_TEMP_DIR="~/tmp/pres-serv/ingest"

# LOG_DIR is where preservation services writes its log files.
# To log to STDOUT, set LOG_DIR to "STDOUT"
LOG_DIR="~/aptrust/audit/logs"

# LOG_LEVEL should be one of: CRITICAL, ERROR, WARNING, NOTICE, INFO
# OR DEBUG. For dev and test, it's usually DEBUG. For demo and prod,
# it should usually be INFO.
LOG_LEVEL=DEBUG

# MAX_DAYS_SINCE_LAST_FIXITY is the maximum number of days allowed
# between fixity checks. Per agreement with depositors, this is 90.
# In dev and test, we occasionally set it lower to force fixity checks
# to run.
MAX_DAYS_SINCE_LAST_FIXITY=90

# MAX_FILE_SIZE is the maximum file the system can handle. Since we're
# working with S3, this is 5TB, or 5497558138880. File size will be lower
# on the demo server, probably more like 5GB, or 5368709120
MAX_FILE_SIZE=5497558138880

# MAX_FIXITY_ITEMS_PER_RUN is the maximum number of files we should
# queue for fixity in each run of apt_queue_fixity. For production,
# this should be around 2500.
MAX_FIXITY_ITEMS_PER_RUN=2500

# MAX_WORKER_ATTEMPTS is the maximum number of times a worker should
# attempt its job.
MAX_WORKER_ATTEMPTS=3

# NSQ_LOOKUPD is the host name and port of the NSQ lookup daemon.
# By default, it runs on port 4161. Format should be "host:port"
NSQ_LOOKUPD="localhost:4161"

# NSQ_URL is the URL of the NSQ server. It typically runs on port 4151.
# Format should be "http(s)://host:port"
NSQ_URL="http://localhost:4151"

# Registry
#PRESERV_REGISTRY_API_KEY="password"
#PRESERV_REGISTRY_API_USER="system@aptrust.org"
PRESERV_REGISTRY_API_VERSION="v3"
PRESERV_REGISTRY_URL="https://repo.aptrust.org"

# PROFILES_DIR is the directory that contains BagIt profiles and
# the default.sig signature file for file format identification.
# In dev and test configs, this can be set relative to the project
# root if it begins with "./". In other environments, it should be
# set to an absolute path.
PROFILES_DIR="./profiles"

# QUEUE_FIXITY_INTERVAL describes how often we should queue new items
# for fixity checks.
QUEUE_FIXITY_INTERVAL="60m"

# REDIS_DEFAULT_DB is the number of the Redis DB in which preservation
# services keeps its data. This should be 0 in most cases.
REDIS_DEFAULT_DB=0

# REDIS_PASSWORD is the password requried to connect to the Redis
# server. In dev and test, this should be an empty string.
REDIS_PASSWORD=""

# REDIS_RETRIES is the number of times we should try to get a record
# from Redis. This is helpful in testing, as our test code often requests
# recently-inserted data before Redis is able to return it.
REDIS_RETRIES=3

# REDIS_RETRY_MS is the number of milliseconds between retries when
# we retry Redis requests.
REDIS_RETRY_MS=250ms

# REDIS_URL is the URL of the Redis server in the format "host:port".
# The default port is 6379. For dev and test, this should be localhost:6379.
REDIS_URL="localhost:6379"

# REDIS_USER is the user name required to connect to Redis. For dev and
# test, this should be an empty string.
REDIS_USER= ""

# RESTORE_DIR is the directory in which preservation services should build
# the bags it's restoring.
RESTORE_DIR="~/tmp/pres-serv/restore"

# STAGING_BUCKET is the name of the bucket into which ingest workers copy
# files for staging, before they are fully ingested.
STAGING_BUCKET="staging"

# STAGING_UPLOAD_RETRY_MS is the number of milliseconds to wait before
# retrying an upload to the S3 staging bucket.
STAGING_UPLOAD_RETRY_MS=250ms

# VOLUME_SERVICE_URL is the URL for the local volume server. This will
# always be on localhost, typically http://localhost:8898
VOLUME_SERVICE_URL="http://localhost:8898"

# --------------------------------------------------------------------------
# Below are setings for our workers.
#
# BUFFER_SIZE  is the size of the channel buffer for Go workers.
#              Default is 20.
# WORKERS      is the number of workers (Go routines) to do the
#              work of copy files, recording metadata, etc.
#              Default is 3, but see notes below.
# MAX_ATTEMPTS is the maximum number of times a service should try
#              to complete a single WorkItem.
#              Default is 5.
# --------------------------------------------------------------------------

# apt_delete deletes files. It does not tax any resources, except
# possible Pharos in cases where it's asked to delete large numbers
# of files.
APT_DELETE_BUFFER_SIZE=20
APT_DELETE_WORKERS=3
APT_DELETE_MAX_ATTEMPTS=3

# apt_fixity runs scheduled fixity checks. It can tax network I/O.
APT_FIXITY_BUFFER_SIZE=20
APT_FIXITY_WORKERS=3
APT_FIXITY_MAX_ATTEMPTS=3

# bag_restorer restores bags to the depositor's restoration bucket.
# It can be taxing on network I/O when restoring bags with many files
# or bags with large files.
BAG_RESTORER_BUFFER_SIZE=20
BAG_RESTORER_WORKERS=3
BAG_RESTORER_MAX_ATTEMPTS=3

# file_restorer restores individual files to a depositor's restoration
# bucket. It can use a lot of network I/O when restoring large files.
FILE_RESTORER_BUFFER_SIZE=20
FILE_RESTORER_WORKERS=3
FILE_RESTORER_MAX_ATTEMPTS=3

# glacier_restorer initiates restoration of files from Glacier. It
# uses very few resources (very light network I/O, CPU and RAM)
GLACIER_RESTORER_BUFFER_SIZE=20
GLACIER_RESTORER_WORKERS=3
GLACIER_RESTORER_MAX_ATTEMPTS=3

# ingest_cleanup deletes interim processing data from the staging
# bucket, receiving bucket, and Redis. It uses few resources
# (very light network I/O, CPU and RAM)
INGEST_CLEANUP_BUFFER_SIZE=20
INGEST_CLEANUP_WORKERS=3
INGEST_CLEANUP_MAX_ATTEMPTS=3

# format_identifier identifies the file format of items in the
# staging bucket. This one can be a memory hog, so we're setting
# workers to 2 instead of 3. Siegfried keeps large S3 data buffers
# in memory. With too many concurrent streams, the system will
# run out of memory.
INGEST_FORMAT_IDENTIFIER_BUFFER_SIZE=12
INGEST_FORMAT_IDENTIFIER_WORKERS=2
INGEST_FORMAT_IDENTIFIER_MAX_ATTEMPTS=3

# ingest_pre_fetch untars bags from receiving buckets, calculates multiple
# checksums for each file, and stores interim metadata for each file in Redis.
# This service can use a lot of bandwidth and CPU while pulling data from
# the S3 receiving bucket. It can also issue large numbers of writes to Redis,
# though that doesn't seem to be a problem.
INGEST_PRE_FETCH_BUFFER_SIZE=20
INGEST_PRE_FETCH_WORKERS=3
INGEST_PRE_FETCH_MAX_ATTEMPTS=3

# ingest_preservation_uploader copies files from the staging bucket
# to preservation buckets. It can use a lot of network I/O, memory,
# and CPU.
#
# *** Change number of workers based on hardware's bandwidth, CPU,
# *** and RAM limits.
#
INGEST_PRESERVATION_UPLOADER_BUFFER_SIZE=20
INGEST_PRESERVATION_UPLOADER_WORKERS=3
INGEST_PRESERVATION_UPLOADER_MAX_ATTEMPTS=3

# ingest_preservation_verifier ensures that ingest_preservation_uploader
# correctly copied all files to all of the right preservation buckets.
# This worker uses light network I/O, CPU and RAM.
INGEST_PRESERVATION_VERIFIER_BUFFER_SIZE=20
INGEST_PRESERVATION_VERIFIER_WORKERS=3
INGEST_PRESERVATION_VERIFIER_MAX_ATTEMPTS=3

# ingest_recorder records metadata for all ingested items in Pharos.
# While this worker does not use many resources itself, it heavily
# taxes Pharos on both CPU and RAM. When Pharos is getting overwhelmed,
# we need to reduce the number of workers for this service.
#
# Note that this one defaults to 5 attempts instead of 3. When Pharos
# is busy, Nginx returns sporadic 502/Bad Gateway responses. The
# extra attempts account for those.
#
# *** Consider reducing number of workers when Pharos is overloaded.
#
INGEST_RECORDER_BUFFER_SIZE=20
INGEST_RECORDER_WORKERS=3
INGEST_RECORDER_MAX_ATTEMPTS=5

# ingest_staging_uploader copies individual (untarred) files from the
# tar file in the receiving bucket to a temporary staging bucket.
# This service can potentially use a lot of network I/O, RAM, and CPU
# when working with bags containing many files or large files.
#
# *** Change number of workers based on hardware's bandwidth, CPU,
# *** and RAM limits.
#
INGEST_STAGING_UPLOADER_BUFFER_SIZE=20
INGEST_STAGING_UPLOADER_WORKERS=3
INGEST_STAGING_UPLOADER_MAX_ATTEMPTS=3

# ingest_validator validates metadata captured by ingest_pre_fetch.
# Although this worker can do huge numbers of reads and writes to
# Redis, Redis doesn't ever seem to mind.
INGEST_VALIDATOR_BUFFER_SIZE=20
INGEST_VALIDATOR_WORKERS=3
INGEST_VALIDATOR_MAX_ATTEMPTS=3

# reingest_manager checks the file metadata collected by ingest_pre_fetch
# against Pharos, checking to see whether any files being ingested now have
# ever been ingested before. This typically runs a limited number of read
# requests against Pharos, but may occasionally issue thousands of Pharos
# reads in cases where we're re-ingesting large bags. Except in those cases,
# this service tends to use light network/RAM/CPU resources.
#
# *** Consider reducing number of workers when Pharos is overloaded.
#
REINGEST_MANAGER_BUFFER_SIZE=20
REINGEST_MANAGER_WORKERS=3
REINGEST_MANAGER_MAX_ATTEMPTS=3


# Connection info for local S3 service. This is a minio server that
# runs in dev/test, but not demo/production.
# For localhost testing, use 'localhost' instead of '127.0.0.1' because
# Minio signed URLs use hostname, not IP.
S3_LOCAL_HOST="localhost:9899"
S3_LOCAL_KEY="minioadmin"
S3_LOCAL_SECRET="minioadmin"

# AWS S3 connection info. In dev/test, point back to the local S3
# provider, so we can't overwrite anything in demo/prod.
# For localhost testing, use 'localhost' instead of '127.0.0.1' because
# Minio signed URLs use hostname, not IP.
S3_AWS_HOST="s3.amazonaws.com"
#S3_AWS_KEY="minioadmin"        <---- Read this from environment instead
#S3_AWS_SECRET="minioadmin"     <---- Read this from environment instead

# Wasabi S3 connection info.In dev/test, point back to the local S3
# provider, so we can't overwrite anything in demo/prod.
# For localhost testing, use 'localhost' instead of '127.0.0.1' because
# Minio signed URLs use hostname, not IP.
S3_WASABI_HOST_OR="s3.us-west-1.wasabisys.com"
S3_WASABI_HOST_VA="s3.us-east-1.wasabisys.com"
#S3_WASABI_KEY="minioadmin"     <---- Read this from environment instead
#S3_WASABI_SECRET="minioadmin"  <---- Read this from environment instead