-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path.env.audit
312 lines (267 loc) · 12.3 KB
/
.env.audit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
#
# APTrust Preservation Services config file
# for auditing the production repo.
#
# This file includes no credentials. The essential ones
# are commented out below so they will be read in
# from env vars.
#
# BASE_WORKING_DIR is the directory under which preservation services
# may create new folders and files. For test and dev, this should be
# ~/tmp. For demo and production, probably /mnt/lvm/apt or something
# similar.
BASE_WORKING_DIR="~/aptrust/audit"
# Preservation buckets. Be sure to set these right for each environment.
# For testing, we're using a local Minio server, so we can call these
# whatever we want.
# Preservation buckets. These differ for live, demo, and staging.
# E.g., for staging:
#
# BUCKET_STANDARD_OR="aptrust.staging.preservation.oregon"
# BUCKET_STANDARD_VA="aptrust.staging.preservation"
# BUCKET_GLACIER_OH="aptrust.staging.preservation.glacier.oh"
# BUCKET_GLACIER_OR="aptrust.staging.preservation.glacier.or"
# BUCKET_GLACIER_VA="aptrust.staging.preservation.glacier.va"
# BUCKET_GLACIER_DEEP_OH="aptrust.staging.preservation.glacier-deep.oh"
# BUCKET_GLACIER_DEEP_OR="aptrust.staging.preservation.glacier-deep.or"
# BUCKET_GLACIER_DEEP_VA="aptrust.staging.preservation.glacier-deep.va"
# BUCKET_WASABI_OR="coming-soon"
# BUCKET_WASABI_VA="coming-soon"
#
BUCKET_STANDARD_OR="aptrust.preservation.oregon"
BUCKET_STANDARD_VA="aptrust.preservation.storage"
BUCKET_GLACIER_OH="aptrust.preservation.glacier.oh"
BUCKET_GLACIER_OR="aptrust.preservation.glacier.or"
BUCKET_GLACIER_VA="aptrust.preservation.glacier.va"
BUCKET_GLACIER_DEEP_OH="aptrust.preservation.glacier-deep.oh"
BUCKET_GLACIER_DEEP_OR="aptrust.preservation.glacier-deep.or"
BUCKET_GLACIER_DEEP_VA="aptrust.preservation.glacier-deep.va"
BUCKET_WASABI_OR="aptrust-production-wasabi-or"
BUCKET_WASABI_VA="aptrust-production-wasabi-va"
# INGEST_BUCKET_READER_INTERVAL describes how often the ingest bucket
# reader should scan the receiving buckets for new bags. The reader
# will wait this long after finishing a scan before starting the next
# scan. Use Golang duration strings, e.g. "90s" is 90 seconds, "5m" is
# five minutes, "1h" is one hour.
INGEST_BUCKET_READER_INTERVAL="10s"
# APT_QUEUE_INTERVAL describes how often apt_queue should check for new
# WorkItems.
APT_QUEUE_INTERVAL="20s"
# INGEST_TEMP_DIR is a directory in which preservation services can
# keep temporary files during the ingest process.
INGEST_TEMP_DIR="~/tmp/pres-serv/ingest"
# LOG_DIR is where preservation services writes its log files.
# To log to STDOUT, set LOG_DIR to "STDOUT"
LOG_DIR="~/aptrust/audit/logs"
# LOG_LEVEL should be one of: CRITICAL, ERROR, WARNING, NOTICE, INFO
# OR DEBUG. For dev and test, it's usually DEBUG. For demo and prod,
# it should usually be INFO.
LOG_LEVEL=DEBUG
# MAX_DAYS_SINCE_LAST_FIXITY is the maximum number of days allowed
# between fixity checks. Per agreement with depositors, this is 90.
# In dev and test, we occasionally set it lower to force fixity checks
# to run.
MAX_DAYS_SINCE_LAST_FIXITY=90
# MAX_FILE_SIZE is the maximum file the system can handle. Since we're
# working with S3, this is 5TB, or 5497558138880. File size will be lower
# on the demo server, probably more like 5GB, or 5368709120
MAX_FILE_SIZE=5497558138880
# MAX_FIXITY_ITEMS_PER_RUN is the maximum number of files we should
# queue for fixity in each run of apt_queue_fixity. For production,
# this should be around 2500.
MAX_FIXITY_ITEMS_PER_RUN=2500
# MAX_WORKER_ATTEMPTS is the maximum number of times a worker should
# attempt its job.
MAX_WORKER_ATTEMPTS=3
# NSQ_LOOKUPD is the host name and port of the NSQ lookup daemon.
# By default, it runs on port 4161. Format should be "host:port"
NSQ_LOOKUPD="localhost:4161"
# NSQ_URL is the URL of the NSQ server. It typically runs on port 4151.
# Format should be "http(s)://host:port"
NSQ_URL="http://localhost:4151"
# Registry
#PRESERV_REGISTRY_API_KEY="password"
#PRESERV_REGISTRY_API_USER="[email protected]"
PRESERV_REGISTRY_API_VERSION="v3"
PRESERV_REGISTRY_URL="https://repo.aptrust.org"
# PROFILES_DIR is the directory that contains BagIt profiles and
# the default.sig signature file for file format identification.
# In dev and test configs, this can be set relative to the project
# root if it begins with "./". In other environments, it should be
# set to an absolute path.
PROFILES_DIR="./profiles"
# QUEUE_FIXITY_INTERVAL describes how often we should queue new items
# for fixity checks.
QUEUE_FIXITY_INTERVAL="60m"
# REDIS_DEFAULT_DB is the number of the Redis DB in which preservation
# services keeps its data. This should be 0 in most cases.
REDIS_DEFAULT_DB=0
# REDIS_PASSWORD is the password requried to connect to the Redis
# server. In dev and test, this should be an empty string.
REDIS_PASSWORD=""
# REDIS_RETRIES is the number of times we should try to get a record
# from Redis. This is helpful in testing, as our test code often requests
# recently-inserted data before Redis is able to return it.
REDIS_RETRIES=3
# REDIS_RETRY_MS is the number of milliseconds between retries when
# we retry Redis requests.
REDIS_RETRY_MS=250ms
# REDIS_URL is the URL of the Redis server in the format "host:port".
# The default port is 6379. For dev and test, this should be localhost:6379.
REDIS_URL="localhost:6379"
# REDIS_USER is the user name required to connect to Redis. For dev and
# test, this should be an empty string.
REDIS_USER= ""
# RESTORE_DIR is the directory in which preservation services should build
# the bags it's restoring.
RESTORE_DIR="~/tmp/pres-serv/restore"
# STAGING_BUCKET is the name of the bucket into which ingest workers copy
# files for staging, before they are fully ingested.
STAGING_BUCKET="staging"
# STAGING_UPLOAD_RETRY_MS is the number of milliseconds to wait before
# retrying an upload to the S3 staging bucket.
STAGING_UPLOAD_RETRY_MS=250ms
# VOLUME_SERVICE_URL is the URL for the local volume server. This will
# always be on localhost, typically http://localhost:8898
VOLUME_SERVICE_URL="http://localhost:8898"
# --------------------------------------------------------------------------
# Below are setings for our workers.
#
# BUFFER_SIZE is the size of the channel buffer for Go workers.
# Default is 20.
# WORKERS is the number of workers (Go routines) to do the
# work of copy files, recording metadata, etc.
# Default is 3, but see notes below.
# MAX_ATTEMPTS is the maximum number of times a service should try
# to complete a single WorkItem.
# Default is 5.
# --------------------------------------------------------------------------
# apt_delete deletes files. It does not tax any resources, except
# possible Pharos in cases where it's asked to delete large numbers
# of files.
APT_DELETE_BUFFER_SIZE=20
APT_DELETE_WORKERS=3
APT_DELETE_MAX_ATTEMPTS=3
# apt_fixity runs scheduled fixity checks. It can tax network I/O.
APT_FIXITY_BUFFER_SIZE=20
APT_FIXITY_WORKERS=3
APT_FIXITY_MAX_ATTEMPTS=3
# bag_restorer restores bags to the depositor's restoration bucket.
# It can be taxing on network I/O when restoring bags with many files
# or bags with large files.
BAG_RESTORER_BUFFER_SIZE=20
BAG_RESTORER_WORKERS=3
BAG_RESTORER_MAX_ATTEMPTS=3
# file_restorer restores individual files to a depositor's restoration
# bucket. It can use a lot of network I/O when restoring large files.
FILE_RESTORER_BUFFER_SIZE=20
FILE_RESTORER_WORKERS=3
FILE_RESTORER_MAX_ATTEMPTS=3
# glacier_restorer initiates restoration of files from Glacier. It
# uses very few resources (very light network I/O, CPU and RAM)
GLACIER_RESTORER_BUFFER_SIZE=20
GLACIER_RESTORER_WORKERS=3
GLACIER_RESTORER_MAX_ATTEMPTS=3
# ingest_cleanup deletes interim processing data from the staging
# bucket, receiving bucket, and Redis. It uses few resources
# (very light network I/O, CPU and RAM)
INGEST_CLEANUP_BUFFER_SIZE=20
INGEST_CLEANUP_WORKERS=3
INGEST_CLEANUP_MAX_ATTEMPTS=3
# format_identifier identifies the file format of items in the
# staging bucket. This one can be a memory hog, so we're setting
# workers to 2 instead of 3. Siegfried keeps large S3 data buffers
# in memory. With too many concurrent streams, the system will
# run out of memory.
INGEST_FORMAT_IDENTIFIER_BUFFER_SIZE=12
INGEST_FORMAT_IDENTIFIER_WORKERS=2
INGEST_FORMAT_IDENTIFIER_MAX_ATTEMPTS=3
# ingest_pre_fetch untars bags from receiving buckets, calculates multiple
# checksums for each file, and stores interim metadata for each file in Redis.
# This service can use a lot of bandwidth and CPU while pulling data from
# the S3 receiving bucket. It can also issue large numbers of writes to Redis,
# though that doesn't seem to be a problem.
INGEST_PRE_FETCH_BUFFER_SIZE=20
INGEST_PRE_FETCH_WORKERS=3
INGEST_PRE_FETCH_MAX_ATTEMPTS=3
# ingest_preservation_uploader copies files from the staging bucket
# to preservation buckets. It can use a lot of network I/O, memory,
# and CPU.
#
# *** Change number of workers based on hardware's bandwidth, CPU,
# *** and RAM limits.
#
INGEST_PRESERVATION_UPLOADER_BUFFER_SIZE=20
INGEST_PRESERVATION_UPLOADER_WORKERS=3
INGEST_PRESERVATION_UPLOADER_MAX_ATTEMPTS=3
# ingest_preservation_verifier ensures that ingest_preservation_uploader
# correctly copied all files to all of the right preservation buckets.
# This worker uses light network I/O, CPU and RAM.
INGEST_PRESERVATION_VERIFIER_BUFFER_SIZE=20
INGEST_PRESERVATION_VERIFIER_WORKERS=3
INGEST_PRESERVATION_VERIFIER_MAX_ATTEMPTS=3
# ingest_recorder records metadata for all ingested items in Pharos.
# While this worker does not use many resources itself, it heavily
# taxes Pharos on both CPU and RAM. When Pharos is getting overwhelmed,
# we need to reduce the number of workers for this service.
#
# Note that this one defaults to 5 attempts instead of 3. When Pharos
# is busy, Nginx returns sporadic 502/Bad Gateway responses. The
# extra attempts account for those.
#
# *** Consider reducing number of workers when Pharos is overloaded.
#
INGEST_RECORDER_BUFFER_SIZE=20
INGEST_RECORDER_WORKERS=3
INGEST_RECORDER_MAX_ATTEMPTS=5
# ingest_staging_uploader copies individual (untarred) files from the
# tar file in the receiving bucket to a temporary staging bucket.
# This service can potentially use a lot of network I/O, RAM, and CPU
# when working with bags containing many files or large files.
#
# *** Change number of workers based on hardware's bandwidth, CPU,
# *** and RAM limits.
#
INGEST_STAGING_UPLOADER_BUFFER_SIZE=20
INGEST_STAGING_UPLOADER_WORKERS=3
INGEST_STAGING_UPLOADER_MAX_ATTEMPTS=3
# ingest_validator validates metadata captured by ingest_pre_fetch.
# Although this worker can do huge numbers of reads and writes to
# Redis, Redis doesn't ever seem to mind.
INGEST_VALIDATOR_BUFFER_SIZE=20
INGEST_VALIDATOR_WORKERS=3
INGEST_VALIDATOR_MAX_ATTEMPTS=3
# reingest_manager checks the file metadata collected by ingest_pre_fetch
# against Pharos, checking to see whether any files being ingested now have
# ever been ingested before. This typically runs a limited number of read
# requests against Pharos, but may occasionally issue thousands of Pharos
# reads in cases where we're re-ingesting large bags. Except in those cases,
# this service tends to use light network/RAM/CPU resources.
#
# *** Consider reducing number of workers when Pharos is overloaded.
#
REINGEST_MANAGER_BUFFER_SIZE=20
REINGEST_MANAGER_WORKERS=3
REINGEST_MANAGER_MAX_ATTEMPTS=3
# Connection info for local S3 service. This is a minio server that
# runs in dev/test, but not demo/production.
# For localhost testing, use 'localhost' instead of '127.0.0.1' because
# Minio signed URLs use hostname, not IP.
S3_LOCAL_HOST="localhost:9899"
S3_LOCAL_KEY="minioadmin"
S3_LOCAL_SECRET="minioadmin"
# AWS S3 connection info. In dev/test, point back to the local S3
# provider, so we can't overwrite anything in demo/prod.
# For localhost testing, use 'localhost' instead of '127.0.0.1' because
# Minio signed URLs use hostname, not IP.
S3_AWS_HOST="s3.amazonaws.com"
#S3_AWS_KEY="minioadmin" <---- Read this from environment instead
#S3_AWS_SECRET="minioadmin" <---- Read this from environment instead
# Wasabi S3 connection info.In dev/test, point back to the local S3
# provider, so we can't overwrite anything in demo/prod.
# For localhost testing, use 'localhost' instead of '127.0.0.1' because
# Minio signed URLs use hostname, not IP.
S3_WASABI_HOST_OR="s3.us-west-1.wasabisys.com"
S3_WASABI_HOST_VA="s3.us-east-1.wasabisys.com"
#S3_WASABI_KEY="minioadmin" <---- Read this from environment instead
#S3_WASABI_SECRET="minioadmin" <---- Read this from environment instead