Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add s3 head object function. #5020

Open
wants to merge 22 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
5315a6a
Add s3 head object function.
DailyDreaming Jul 15, 2024
eb16dfa
Review comments.
DailyDreaming Aug 19, 2024
3561801
Merge master into issues/4986-head-s3
github-actions[bot] Aug 19, 2024
a0db96c
Merge master into issues/4986-head-s3
github-actions[bot] Aug 19, 2024
4f3b712
Merge master into issues/4986-head-s3
github-actions[bot] Aug 21, 2024
8765646
Merge master into issues/4986-head-s3
github-actions[bot] Aug 22, 2024
89a1f11
Merge master into issues/4986-head-s3
github-actions[bot] Aug 22, 2024
9d8168f
Merge master into issues/4986-head-s3
github-actions[bot] Aug 22, 2024
04c552f
Merge master into issues/4986-head-s3
github-actions[bot] Aug 27, 2024
d368580
Merge master into issues/4986-head-s3
github-actions[bot] Aug 27, 2024
eee721c
Merge master into issues/4986-head-s3
github-actions[bot] Sep 3, 2024
9c6a7db
Rebase and fix conflicts.
DailyDreaming Oct 7, 2024
e799098
Merge master into issues/4986-head-s3
github-actions[bot] Oct 8, 2024
f118076
Merge master into issues/4986-head-s3
github-actions[bot] Oct 9, 2024
7331b8b
Merge master into issues/4986-head-s3
github-actions[bot] Oct 9, 2024
0479262
Merge master into issues/4986-head-s3
github-actions[bot] Oct 10, 2024
612c64b
Merge master into issues/4986-head-s3
github-actions[bot] Oct 14, 2024
c0d1127
Merge master into issues/4986-head-s3
github-actions[bot] Oct 21, 2024
61e9ca5
Merge master into issues/4986-head-s3
github-actions[bot] Oct 22, 2024
d1c1546
Merge master into issues/4986-head-s3
github-actions[bot] Oct 22, 2024
cbc0133
Merge master into issues/4986-head-s3
github-actions[bot] Oct 22, 2024
2efb60d
Merge master into issues/4986-head-s3
github-actions[bot] Oct 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 17 additions & 22 deletions src/toil/jobStores/aws/jobStore.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
from toil.jobStores.utils import ReadablePipe, ReadableTransformingPipe, WritablePipe
from toil.lib.aws import build_tag_dict_from_env
from toil.lib.aws.session import establish_boto3_session
from toil.lib.aws.s3 import head_s3_object
from toil.lib.aws.utils import (
NoBucketLocationError,
boto3_pager,
Expand Down Expand Up @@ -1418,15 +1419,11 @@ def readFrom(self, readable):

if info.version is None:
# Somehow we don't know the version. Try and get it.
for attempt in retry_s3(predicate=lambda e: retryable_s3_errors(e) or isinstance(e, AssertionError)):
with attempt:
version = client.head_object(Bucket=bucket_name,
Key=compat_bytes(info.fileID),
**headerArgs).get('VersionId', None)
logger.warning('Loaded key for upload with no version and got version %s',
str(version))
info.version = version
assert info.version is not None
info.version = head_s3_object(
Bucket=bucket_name,
Key=compat_bytes(info.fileID),
**headerArgs
).get('VersionId', None)

# Make sure we actually wrote something, even if an empty file
assert (bool(info.version) or info.content is not None)
Expand Down Expand Up @@ -1467,23 +1464,21 @@ def readFrom(self, readable):
ExtraArgs=headerArgs)

# use head_object with the SSE headers to access versionId and content_length attributes
headObj = client.head_object(Bucket=bucket_name,
Key=compat_bytes(info.fileID),
**headerArgs)
assert dataLength == headObj.get('ContentLength', None)
info.version = headObj.get('VersionId', None)
resp = head_s3_object(
Bucket=bucket_name,
Key=compat_bytes(info.fileID),
**headerArgs
)
assert dataLength == resp.get('ContentLength', None)
info.version = resp.get('VersionId', None)
logger.debug('Upload received version %s', str(info.version))

if info.version is None:
# Somehow we don't know the version
for attempt in retry_s3(predicate=lambda e: retryable_s3_errors(e) or isinstance(e, AssertionError)):
with attempt:
headObj = client.head_object(Bucket=bucket_name,
Key=compat_bytes(info.fileID),
**headerArgs)
info.version = headObj.get('VersionId', None)
logger.warning('Reloaded key with no version and got version %s', str(info.version))
assert info.version is not None
resp = head_s3_object(Bucket=bucket_name, Key=compat_bytes(info.fileID), header=headerArgs)
info.version = resp.get('VersionId', None)
logger.warning('Reloaded key with no version and got version %s', str(info.version))
DailyDreaming marked this conversation as resolved.
Show resolved Hide resolved
assert info.version is not None

# Make sure we actually wrote something, even if an empty file
assert (bool(info.version) or info.content is not None)
Expand Down
18 changes: 16 additions & 2 deletions src/toil/lib/aws/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from typing import List
from typing import Dict, Any, Optional, List

from mypy_boto3_s3.type_defs import ListMultipartUploadsOutputTypeDef
from mypy_boto3_s3.type_defs import ListMultipartUploadsOutputTypeDef, HeadObjectOutputTypeDef

from toil.lib.aws import session, AWSServerErrors
from toil.lib.retry import retry
Expand All @@ -23,6 +23,20 @@


@retry(errors=[AWSServerErrors])
def head_s3_object(bucket: str, key: str, header: Dict[str, Any], region: Optional[str] = None) -> HeadObjectOutputTypeDef:
"""
Attempt to HEAD an s3 object and return its response.

:param bucket: AWS bucket name
:param key: AWS Key name for the s3 object
:param header: Headers to include (mostly for encryption).
See: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/client/head_object.html
:param region: Region that we want to look for the bucket in
"""
s3_client = session.client("s3", region_name=region)
DailyDreaming marked this conversation as resolved.
Show resolved Hide resolved
return s3_client.head_object(Bucket=bucket, Key=key, **header)


def list_multipart_uploads(bucket: str, region: str, prefix: str, max_uploads: int = 1) -> ListMultipartUploadsOutputTypeDef:
s3_client = session.client("s3", region_name=region)
return s3_client.list_multipart_uploads(Bucket=bucket, MaxUploads=max_uploads, Prefix=prefix)