-
Notifications
You must be signed in to change notification settings - Fork 0
/
.env.docker
254 lines (219 loc) · 8.87 KB
/
.env.docker
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
#
# APTrust Preservation Services config file
#
PRESERV_RELEASE=
#ENVIRONMENT=development
# APT_ENV not required for Docker setups since we use .env as default and populate .env to bind-mount over the default in the container.
# APT_ENV=
DOCKER_DATA_PATH=./docker
DOCKER_RESTORE_PATH=./docker/restore
DOCKER_TAG_NAME=latest
DOCKER_DOMAIN=docker.localhost
BASE_WORKING_DIR=./data
PROFILES_DIR=./profiles
RESTORE_DIR=./data/restore
# Preservation buckets. These differ for live, demo, and staging.
# E.g., for staging:
#
# BUCKET_STANDARD_OR="aptrust.staging.preservation.oregon"
# BUCKET_STANDARD_VA="aptrust.staging.preservation"
# BUCKET_GLACIER_OH="aptrust.staging.preservation.glacier.oh"
# BUCKET_GLACIER_OR="aptrust.staging.preservation.glacier.or"
# BUCKET_GLACIER_VA="aptrust.staging.preservation.glacier.va"
# BUCKET_GLACIER_DEEP_OH="aptrust.staging.preservation.glacier-deep.oh"
# BUCKET_GLACIER_DEEP_OR="aptrust.staging.preservation.glacier-deep.or"
# BUCKET_GLACIER_DEEP_VA="aptrust.staging.preservation.glacier-deep.va"
# BUCKET_WASABI_OR="coming-soon"
# BUCKET_WASABI_VA="coming-soon"
BUCKET_STANDARD_OR="preservation-or"
BUCKET_STANDARD_VA="preservation-va"
BUCKET_GLACIER_OH="glacier-oh"
BUCKET_GLACIER_OR="glacier-or"
BUCKET_GLACIER_VA="glacier-va"
BUCKET_GLACIER_DEEP_OH="glacier-deep-oh"
BUCKET_GLACIER_DEEP_OR="glacier-deep-or"
BUCKET_GLACIER_DEEP_VA="glacier-deep-va"
BUCKET_WASABI_OR="wasabi-or"
BUCKET_WASABI_VA="wasabi-va"
INGEST_BUCKET_READER_INTERVAL="3m"
INGEST_TEMP_DIR="${BASE_WORKING_DIR}/tmp"
# To log to STDOUT, set LOG_DIR to "STDOUT"
# LOG_DIR="${BASE_WORKING_DIR}/logs"
LOG_DIR="STDOUT"
LOG_LEVEL=DEBUG
MAX_DAYS_SINCE_LAST_FIXITY=90
MAX_FILE_SIZE=5497558138880
MAX_FIXITY_ITEMS_PER_RUN=2500
MAX_WORKER_ATTEMPTS=3
# NSQ Settings
NSQ_LOOKUPD_TCP_ADDRESS="nsqlookupd:4160"
NSQ_LOOKUPD_HTTP_ADDRESS="nsqlookupd:4161"
#NSQ_DATA_PATH="${DOCKER_DATA_PATH}/nsq"
NSQ_BROADCAST_ADDRESS="nsqd"
NSQ_MSG_TIMEOUT="360m0s"
NSQ_MAX_MSG_TIMEOUT="180m0s"
NSQ_MAX_MSG_SIZE="31457280"
NSQ_MEM_QUEUE_SIZE="0"
NSQ_TCP_ADDRESS=0.0.0.0:4150
NSQ_MAX_REQ_TIMEOUT="180m0s"
NSQ_MAX_HEARTBEAT_INTERVAL="30s"
NSQ_LOOKUPD="${NSQ_LOOKUPD_HTTP_ADDRESS}"
NSQ_URL="http://${NSQ_BROADCAST_ADDRESS}:4151"
# Registry
PRESERV_REGISTRY_API_KEY="c3958c7b09e40af1d065020484dafa9b2a35cea0"
PRESERV_REGISTRY_API_USER="[email protected]"
PRESERV_REGISTRY_API_VERSION="v3"
PRESERV_REGISTRY_URL="http://registry"
# For apt_queue_fixity. Run this often...
QUEUE_FIXITY_INTERVAL="60m"
# REDIS
REDIS_DEFAULT_DB= 0
REDIS_PASSWORD=""
REDIS_RETRIES=3
REDIS_RETRY_MS=250ms
REDIS_URL="redis:6379"
REDIS_USER= ""
STAGING_BUCKET="staging"
STAGING_UPLOAD_RETRY_MS=250ms
VOLUME_SERVICE_URL="http://volume_service:8898"
# --------------------------------------------------------------------------
# Below are setings for our workers.
#
# BUFFER_SIZE is the size of the channel buffer for Go workers.
# Default is 20.
# WORKERS is the number of workers (Go routines) to do the
# work of copy files, recording metadata, etc.
# Default is 3, but see notes below.
# MAX_ATTEMPTS is the maximum number of times a service should try
# to complete a single WorkItem.
# Default is 5.
# --------------------------------------------------------------------------
# apt_delete deletes files. It does not tax any resources, except
# possible Pharos in cases where it's asked to delete large numbers
# of files.
APT_DELETE_BUFFER_SIZE=20
APT_DELETE_WORKERS=3
APT_DELETE_MAX_ATTEMPTS=3
# apt_fixity runs scheduled fixity checks. It can tax network I/O.
APT_FIXITY_BUFFER_SIZE=20
APT_FIXITY_WORKERS=3
APT_FIXITY_MAX_ATTEMPTS=3
# bag_restorer restores bags to the depositor's restoration bucket.
# It can be taxing on network I/O when restoring bags with many files
# or bags with large files.
BAG_RESTORER_BUFFER_SIZE=20
BAG_RESTORER_WORKERS=3
BAG_RESTORER_MAX_ATTEMPTS=3
# file_restorer restores individual files to a depositor's restoration
# bucket. It can use a lot of network I/O when restoring large files.
FILE_RESTORER_BUFFER_SIZE=20
FILE_RESTORER_WORKERS=3
FILE_RESTORER_MAX_ATTEMPTS=3
# glacier_restorer initiates restoration of files from Glacier. It
# uses very few resources (very light network I/O, CPU and RAM)
GLACIER_RESTORER_BUFFER_SIZE=20
GLACIER_RESTORER_WORKERS=3
GLACIER_RESTORER_MAX_ATTEMPTS=3
# ingest_cleanup deletes interim processing data from the staging
# bucket, receiving bucket, and Redis. It uses few resources
# (very light network I/O, CPU and RAM)
INGEST_CLEANUP_BUFFER_SIZE=20
INGEST_CLEANUP_WORKERS=3
INGEST_CLEANUP_MAX_ATTEMPTS=3
# format_identifier identifies the file format of items in the
# staging bucket. This one can be a memory hog, so we're setting
# workers to 2 instead of 3. Siegfried keeps large S3 data buffers
# in memory. With too many concurrent streams, the system will
# run out of memory.
INGEST_FORMAT_IDENTIFIER_BUFFER_SIZE=12
INGEST_FORMAT_IDENTIFIER_WORKERS=2
INGEST_FORMAT_IDENTIFIER_MAX_ATTEMPTS=3
# ingest_pre_fetch untars bags from receiving buckets, calculates multiple
# checksums for each file, and stores interim metadata for each file in Redis.
# This service can use a lot of bandwidth and CPU while pulling data from
# the S3 receiving bucket. It can also issue large numbers of writes to Redis,
# though that doesn't seem to be a problem.
INGEST_PRE_FETCH_BUFFER_SIZE=20
INGEST_PRE_FETCH_WORKERS=3
INGEST_PRE_FETCH_MAX_ATTEMPTS=3
# ingest_preservation_uploader copies files from the staging bucket
# to preservation buckets. It can use a lot of network I/O, memory,
# and CPU.
#
# *** Change number of workers based on hardware's bandwidth, CPU,
# *** and RAM limits.
#
INGEST_PRESERVATION_UPLOADER_BUFFER_SIZE=20
INGEST_PRESERVATION_UPLOADER_WORKERS=3
INGEST_PRESERVATION_UPLOADER_MAX_ATTEMPTS=3
# ingest_preservation_verifier ensures that ingest_preservation_uploader
# correctly copied all files to all of the right preservation buckets.
# This worker uses light network I/O, CPU and RAM.
INGEST_PRESERVATION_VERIFIER_BUFFER_SIZE=20
INGEST_PRESERVATION_VERIFIER_WORKERS=3
INGEST_PRESERVATION_VERIFIER_MAX_ATTEMPTS=3
# ingest_recorder records metadata for all ingested items in Pharos.
# While this worker does not use many resources itself, it heavily
# taxes Pharos on both CPU and RAM. When Pharos is getting overwhelmed,
# we need to reduce the number of workers for this service.
#
# Note that this one defaults to 5 attempts instead of 3. When Pharos
# is busy, Nginx returns sporadic 502/Bad Gateway responses. The
# extra attempts account for those.
#
# *** Consider reducing number of workers when Pharos is overloaded.
#
INGEST_RECORDER_BUFFER_SIZE=20
INGEST_RECORDER_WORKERS=3
INGEST_RECORDER_MAX_ATTEMPTS=5
# ingest_staging_uploader copies individual (untarred) files from the
# tar file in the receiving bucket to a temporary staging bucket.
# This service can potentially use a lot of network I/O, RAM, and CPU
# when working with bags containing many files or large files.
#
# *** Change number of workers based on hardware's bandwidth, CPU,
# *** and RAM limits.
#
INGEST_STAGING_UPLOADER_BUFFER_SIZE=20
INGEST_STAGING_UPLOADER_WORKERS=3
INGEST_STAGING_UPLOADER_MAX_ATTEMPTS=3
# ingest_validator validates metadata captured by ingest_pre_fetch.
# Although this worker can do huge numbers of reads and writes to
# Redis, Redis doesn't ever seem to mind.
INGEST_VALIDATOR_BUFFER_SIZE=20
INGEST_VALIDATOR_WORKERS=3
INGEST_VALIDATOR_MAX_ATTEMPTS=3
# reingest_manager checks the file metadata collected by ingest_pre_fetch
# against Pharos, checking to see whether any files being ingested now have
# ever been ingested before. This typically runs a limited number of read
# requests against Pharos, but may occasionally issue thousands of Pharos
# reads in cases where we're re-ingesting large bags. Except in those cases,
# this service tends to use light network/RAM/CPU resources.
#
# *** Consider reducing number of workers when Pharos is overloaded.
#
REINGEST_MANAGER_BUFFER_SIZE=20
REINGEST_MANAGER_WORKERS=3
REINGEST_MANAGER_MAX_ATTEMPTS=3
# Connection info for local S3 service. This is a minio server that
# runs in dev/test, but not demo/production.
# For localhost testing, use 'localhost' instead of '127.0.0.1' because
# Minio signed URLs use hostname, not IP.
S3_LOCAL_HOST="minio:9000"
S3_LOCAL_KEY="minioadmin"
S3_LOCAL_SECRET="minioadmin"
# AWS S3 connection info. In dev/test, point back to the local S3
# provider, so we can't overwrite anything in demo/prod.
# For localhost testing, use 'localhost' instead of '127.0.0.1' because
# Minio signed URLs use hostname, not IP.
S3_AWS_HOST="minio:9000"
S3_AWS_KEY="minioadmin"
S3_AWS_SECRET="minioadmin"
# Wasabi S3 connection info.In dev/test, point back to the local S3
# provider, so we can't overwrite anything in demo/prod.
# For localhost testing, use 'localhost' instead of '127.0.0.1' because
# Minio signed URLs use hostname, not IP.
S3_WASABI_HOST_OR="minio:9000"
S3_WASABI_HOST_VA="minio:9000"
S3_WASABI_KEY="minioadmin"
S3_WASABI_SECRET="minioadmin"