Skip to content

Commit

Permalink
WIP: Support Elasticsearch 7.x and OpenSearch 1.x
Browse files Browse the repository at this point in the history
  • Loading branch information
jraddaoui committed Mar 31, 2022
1 parent f757eb7 commit 1449386
Show file tree
Hide file tree
Showing 11 changed files with 71 additions and 12 deletions.
38 changes: 38 additions & 0 deletions hack/docker-compose.opensearch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
---
version: "2.1"

volumes:
opensearch_data:

services:
opensearch:
image: opensearchproject/opensearch:1.3.0
environment:
- discovery.type=single-node
- cluster.name=am-cluster
- cluster.routing.allocation.disk.threshold_enabled=false
- node.name=am-node
- bootstrap.memory_lock=true
- "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m"
- "DISABLE_INSTALL_DEMO_CONFIG=true"
- "DISABLE_SECURITY_PLUGIN=true"
ulimits:
memlock:
soft: -1
hard: -1
nofile:
soft: 65536
hard: 65536
volumes:
- opensearch_data:/usr/share/opensearch/data
ports:
- 127.0.0.1:62092:9200
- 127.0.0.1:62096:9600

archivematica-mcp-client:
environment:
ARCHIVEMATICA_MCPCLIENT_MCPCLIENT_ELASTICSEARCHSERVER: "opensearch:9200"

archivematica-dashboard:
environment:
ARCHIVEMATICA_DASHBOARD_DASHBOARD_ELASTICSEARCH_SERVER: "opensearch:9200"
4 changes: 3 additions & 1 deletion hack/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,11 @@ services:
- "127.0.0.1:62001:3306"

elasticsearch:
image: "docker.elastic.co/elasticsearch/elasticsearch:6.5.4"
image: "docker.elastic.co/elasticsearch/elasticsearch-oss:7.10.2"
environment:
- "discovery.type=single-node"
- "cluster.name=am-cluster"
- "cluster.routing.allocation.disk.threshold_enabled=false"
- "node.name=am-node"
- "network.host=0.0.0.0"
- "bootstrap.memory_lock=true"
Expand Down
3 changes: 2 additions & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ brotli==0.5.2
certifi==2021.5.30
# via
# -r requirements.txt
# elasticsearch
# requests
cffi==1.14.6
# via
Expand Down Expand Up @@ -77,7 +78,7 @@ django-shibboleth-remoteuser @ git+https://github.com/Brown-University-Library/d
# via -r requirements.txt
django-tastypie==0.13.2
# via -r requirements.txt
elasticsearch==6.8.2
elasticsearch==7.13.0
# via -r requirements.txt
filelock==3.3.1
# via
Expand Down
2 changes: 1 addition & 1 deletion requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ django-extensions==1.7.9
django-forms-bootstrap>=3.0.0,<4.0.0
django-prometheus==1.0.15
django-tastypie==0.13.2
elasticsearch>=6.0.0,<7.0.0
elasticsearch==7.13.0
gearman3==0.2.1
gevent==1.3.6 # used by gunicorn's async workers
gunicorn==19.9.0
Expand Down
6 changes: 4 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ bagit==1.7.0
brotli==0.5.2
# via -r requirements.in
certifi==2021.5.30
# via requests
# via
# elasticsearch
# requests
cffi==1.14.6
# via cryptography
charset-normalizer==2.0.1
Expand Down Expand Up @@ -56,7 +58,7 @@ django-shibboleth-remoteuser @ git+https://github.com/Brown-University-Library/d
# via -r requirements.in
django-tastypie==0.13.2
# via -r requirements.in
elasticsearch==6.8.2
elasticsearch==7.13.0
# via -r requirements.in
future==0.18.2
# via metsrw
Expand Down
18 changes: 15 additions & 3 deletions src/archivematicaCommon/lib/elasticSearchFunctions.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def create_indexes_if_needed(client, indexes):
# Call get index body functions below for each index
body = getattr(sys.modules[__name__], "_get_%s_index_body" % index)()
logger.info('Creating "%s" index ...', index)
client.indices.create(index, body=body, ignore=400)
client.indices.create(index, body=body, ignore=400, include_type_name=True)
logger.info("Index created.")


Expand Down Expand Up @@ -1212,7 +1212,13 @@ def search_all_results(client, body, index):
if isinstance(index, list):
index = ",".join(index)

results = client.search(body=body, index=index, size=MAX_QUERY_SIZE)
results = client.search(
body=body,
index=index,
size=MAX_QUERY_SIZE,
rest_total_hits_as_int=True,
track_total_hits=True,
)

if results["hits"]["total"] > MAX_QUERY_SIZE:
logger.warning(
Expand Down Expand Up @@ -1284,7 +1290,13 @@ def get_file_tags(client, uuid):
"""
query = {"query": {"term": {"fileuuid": uuid}}}

results = client.search(body=query, index=TRANSFER_FILES_INDEX, _source="tags")
results = client.search(
body=query,
index=TRANSFER_FILES_INDEX,
_source="tags",
rest_total_hits_as_int=True,
track_total_hits=True,
)

count = results["hits"]["total"]
if count == 0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ interactions:
connection: [keep-alive]
content-type: [application/json]
method: GET
uri: http://elasticsearch:9200/transferfiles/_search?_source=tags
uri: http://elasticsearch:9200/transferfiles/_search?_source=tags&rest_total_hits_as_int=true&track_total_hits=true
response:
body: {string: !!python/unicode '{"took":1,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":0,"max_score":null,"hits":[]}}'}
headers:
Expand Down
4 changes: 2 additions & 2 deletions src/archivematicaCommon/tests/fixtures/test_set_get_tags.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ interactions:
connection: [keep-alive]
content-type: [application/json]
method: GET
uri: http://elasticsearch:9200/transferfiles/_search?size=10000
uri: http://elasticsearch:9200/transferfiles/_search?rest_total_hits_as_int=true&size=10000&track_total_hits=true
response:
body: {string: !!python/unicode '{"took":1,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":1,"max_score":0.6931472,"hits":[{"_index":"transferfiles","_type":"_doc","_id":"mBsZBWgBn49OAVhMh-OV","_score":0.6931472,"_source":{"accessionid":"","status":"backlog","sipuuid":"17b168b6-cbba-4f43-8838-a53360238acb","tags":[],"file_extension":"jpg","relative_path":"test-17b168b6-cbba-4f43-8838-a53360238acb/objects/Landing_zone.jpg","bulk_extractor_reports":[],"origin":"1a14043f-68ef-4bfe-a129-e2e4cdbe391b","size":1.2982568740844727,"modification_date":"2018-12-11","created":1546273029.7313669,"format":[],"ingestdate":"2018-12-31","filename":"Landing_zone.jpg","fileuuid":"268421a7-a986-4fa0-95c1-54176e508210"}}]}}'}
headers:
Expand All @@ -31,7 +31,7 @@ interactions:
connection: [keep-alive]
content-type: [application/json]
method: GET
uri: http://elasticsearch:9200/transferfiles/_search?_source=tags
uri: http://elasticsearch:9200/transferfiles/_search?_source=tags&rest_total_hits_as_int=true&track_total_hits=true
response:
body: {string: !!python/unicode '{"took":2,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":1,"max_score":0.47000363,"hits":[{"_index":"transferfiles","_type":"_doc","_id":"mBsZBWgBn49OAVhMh-OV","_score":0.47000363,"_source":{"tags":["test"]}}]}}'}
headers:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ interactions:
connection: [keep-alive]
content-type: [application/json]
method: GET
uri: http://elasticsearch:9200/transferfiles/_search?size=10000
uri: http://elasticsearch:9200/transferfiles/_search?rest_total_hits_as_int=true&size=10000&track_total_hits=true
response:
body: {string: !!python/unicode '{"took":0,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":0,"max_score":null,"hits":[]}}'}
headers:
Expand Down
2 changes: 2 additions & 0 deletions src/dashboard/src/components/archival_storage/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,8 @@ def search(request):
size=page_size,
sort=order_by + ":" + sort_direction if order_by else "",
_source=source,
rest_total_hits_as_int=True,
track_total_hits=True,
)

if file_mode:
Expand Down
2 changes: 2 additions & 0 deletions src/dashboard/src/components/backlog/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,8 @@ def search(request):
size=page_size,
sort=order_by + ":" + sort_direction if order_by else "",
_source=source,
rest_total_hits_as_int=True,
track_total_hits=True,
)
hit_count = hits["hits"]["total"]

Expand Down

0 comments on commit 1449386

Please sign in to comment.