Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[api] delete file #1122

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/integration/sda/rbac.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@
"path": "/file/accession",
"action": "POST"
},
{
"role": "admin",
"path": "/file/*",
"action": "(DELETE)"
},
{
"role": "submission",
"path": "/users",
Expand Down
113 changes: 112 additions & 1 deletion .github/integration/tests/sda/60_api_admin_test.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,121 @@
#!/bin/sh
set -e
cd shared || true

token="$(curl http://oidc:8080/tokens | jq -r '.[0]')"
# Upload a file and make sure it's listed
result="$(curl -sk -L "http://api:8080/users/[email protected]/files" -H "Authorization: Bearer $token" | jq '. | length')"
if [ "$result" -ne 2 ]; then
echo "wrong number of files returned for user [email protected]"
echo "expected 4 got $result"
echo "expected 2 got $result"
exit 1
fi


# Reupload a file under a different name, test to delete it
s3cmd -c s3cfg put "NA12878.bam.c4gh" s3://test_dummy.org/NC12878.bam.c4gh

echo "waiting for upload to complete"
URI=http://rabbitmq:15672
if [ -n "$PGSSLCERT" ]; then
URI=https://rabbitmq:15671
fi
RETRY_TIMES=0
until [ "$(curl -s -k -u guest:guest $URI/api/queues/sda/inbox | jq -r '."messages_ready"')" -eq 4 ]; do
echo "waiting for upload to complete"
RETRY_TIMES=$((RETRY_TIMES + 1))
if [ "$RETRY_TIMES" -eq 30 ]; then
echo "::error::Time out while waiting for upload to complete"
exit 1
fi
sleep 2
done

# get the fileId of the new file
fileid="$(curl -k -L -H "Authorization: Bearer $token" -H "Content-Type: application/json" "http://api:8080/users/[email protected]/files" | jq -r '.[] | select(.inboxPath == "test_dummy.org/NC12878.bam.c4gh") | .fileID')"

# delete it
resp="$(curl -s -k -L -o /dev/null -w "%{http_code}\n" -H "Authorization: Bearer $token" -H "Content-Type: application/json" -X DELETE "http://api:8080/file/[email protected]/$fileid")"
if [ "$resp" != "200" ]; then
echo "Error when deleting the file, expected 200 got: $resp"
exit 1
fi

last_event=$(psql -U postgres -h postgres -d sda -At -c "SELECT event FROM sda.file_event_log WHERE file_id='$fileid' order by started_at desc limit 1;")

if [ "$last_event" != "disabled" ]; then
echo "The file $fileid does not have the expected las event 'disabled', but $last_event."
fi


# Try to delete an unknown file
resp="$(curl -s -k -L -o /dev/null -w "%{http_code}\n" -H "Authorization: Bearer $token" -H "Content-Type: application/json" -X DELETE "http://api:8080/file/[email protected]/badfileid")"
if [ "$resp" != "404" ]; then
echo "Error when deleting the file, expected error got: $resp"
exit 1
fi


# Try to delete file of other user
fileid="$(curl -k -L -H "Authorization: Bearer $token" -H "Content-Type: application/json" "http://api:8080/users/[email protected]/files" | jq -r '.[0]| .fileID')"
resp="$(curl -s -k -L -o /dev/null -w "%{http_code}\n" -H "Authorization: Bearer $token" -H "Content-Type: application/json" -X DELETE "http://api:8080/file/[email protected]/$fileid")"
if [ "$resp" != "404" ]; then
echo "Error when deleting the file, expected 404 got: $resp"
exit 1
fi


# Re-upload the file and use the api to ingest it, then try to delete it
s3cmd -c s3cfg put NA12878.bam.c4gh s3://test_dummy.org/NE12878.bam.c4gh

URI=http://rabbitmq:15672
if [ -n "$PGSSLCERT" ]; then
URI=https://rabbitmq:15671
fi
RETRY_TIMES=0
until [ "$(curl -s -k -u guest:guest $URI/api/queues/sda/inbox | jq -r '."messages_ready"')" -eq 6 ]; do
echo "waiting for upload to complete"
RETRY_TIMES=$((RETRY_TIMES + 1))
if [ "$RETRY_TIMES" -eq 3 ]; then
echo "::error::Time out while waiting for upload to complete"
#exit 1
break
fi
sleep 2
done

# Ingest it
new_payload=$(
jq -c -n \
--arg filepath "test_dummy.org/NE12878.bam.c4gh" \
--arg user "[email protected]" \
'$ARGS.named'
)

resp="$(curl -s -k -L -o /dev/null -w "%{http_code}\n" -H "Authorization: Bearer $token" -H "Content-Type: application/json" -X POST -d "$new_payload" "http://api:8080/file/ingest")"
if [ "$resp" != "200" ]; then
echo "Error when requesting to ingesting file, expected 200 got: $resp"
exit 1
fi

fileid="$(curl -k -L -H "Authorization: Bearer $token" -H "Content-Type: application/json" "http://api:8080/users/[email protected]/files" | jq -r '.[] | select(.inboxPath == "test_dummy.org/NE12878.bam.c4gh") | .fileID')"
# wait for the fail to get the correct status
RETRY_TIMES=0

until [ "$(psql -U postgres -h postgres -d sda -At -c "select id from sda.file_events e where e.title in (select event from sda.file_event_log where file_id = '$fileid' order by started_at desc limit 1);")" -gt 30 ]; do
echo "waiting for ingest to complete"
RETRY_TIMES=$((RETRY_TIMES + 1))
if [ "$RETRY_TIMES" -eq 30 ]; then
echo "::error::Time out while waiting for upload to complete"
exit 1
fi
sleep 2
done

# Try to delete file not in inbox
fileid="$(curl -k -L -H "Authorization: Bearer $token" -H "Content-Type: application/json" "http://api:8080/users/[email protected]/files" | jq -r '.[] | select(.inboxPath == "test_dummy.org/NE12878.bam.c4gh") | .fileID')"
resp="$(curl -s -k -L -o /dev/null -w "%{http_code}\n" -H "Authorization: Bearer $token" -H "Content-Type: application/json" -X DELETE "http://api:8080/file/[email protected]/$fileid")"
if [ "$resp" != "404" ]; then
echo "Error when deleting the file, expected 404 got: $resp"
exit 1
fi
49 changes: 49 additions & 0 deletions charts/sda-svc/templates/api-deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,40 @@ spec:
value: {{ required "A valid DB host is required" .Values.global.db.host | quote }}
- name: DB_PORT
value: {{ .Values.global.db.port | quote }}
- name: INBOX_TYPE
{{- if eq "s3" .Values.global.inbox.storageType }}
value: "s3"
- name: INBOX_BUCKET
value: {{ required "S3 inbox bucket missing" .Values.global.inbox.s3Bucket }}
{{- if and .Values.global.inbox.s3CaFile .Values.global.tls.enabled }}
- name: INBOX_CACERT
value: {{ template "tlsPath" . }}/ca.crt
{{- end }}
- name: INBOX_REGION
value: {{ default "us-east-1" .Values.global.inbox.s3Region }}
- name: INBOX_URL
value: {{ required "S3 inbox URL missing" .Values.global.inbox.s3Url }}
{{- if .Values.global.inbox.s3Port }}
- name: INBOX_PORT
value: {{ .Values.global.inbox.s3Port | quote }}
{{- end }}
{{- else }}
value: "posix"
- name: INBOX_LOCATION
value: "{{ .Values.global.inbox.path }}/"
{{- end }}
{{- if eq "s3" .Values.global.inbox.storageType }}
- name: INBOX_ACCESSKEY
valueFrom:
secretKeyRef:
name: {{ template "sda.fullname" . }}-s3inbox-keys
key: s3InboxAccessKey
- name: INBOX_SECRETKEY
valueFrom:
secretKeyRef:
name: {{ template "sda.fullname" . }}-s3inbox-keys
key: s3InboxSecretKey
{{- end }}
{{- if .Values.global.log.format }}
- name: LOG_FORMAT
value: {{ .Values.global.log.format | quote }}
Expand Down Expand Up @@ -204,6 +238,10 @@ spec:
{{- if .Values.global.tls.enabled }}
- name: tls
mountPath: {{ template "tlsPath" . }}
{{- end }}
{{- if eq "posix" .Values.global.inbox.storageType }}
- name: inbox
mountPath: {{ .Values.global.inbox.path | quote }}
{{- end }}
volumes:
{{- if not .Values.global.vaultSecrets }}
Expand Down Expand Up @@ -238,4 +276,15 @@ spec:
secretName: {{ required "An certificate issuer or a TLS secret name is required for api" .Values.api.tls.secretName }}
{{- end }}
{{- end }}
{{- if eq "posix" .Values.global.inbox.storageType }}
- name: inbox
{{- if .Values.global.inbox.existingClaim }}
persistentVolumeClaim:
claimName: {{ .Values.global.inbox.existingClaim }}
{{- else }}
nfs:
server: {{ required "An inbox NFS server is required" .Values.global.inbox.nfsServer | quote }}
path: {{ if .Values.global.inbox.nfsPath }}{{ .Values.global.inbox.nfsPath | quote }}{{ else }}{{ "/" }}{{ end }}
{{- end }}
{{- end }}
{{- end }}
3 changes: 2 additions & 1 deletion postgresql/initdb.d/01_main.sql
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ VALUES (0, now(), 'Created with version'),
(10, now(), 'Create Inbox user'),
(11, now(), 'Grant select permission to download on dataset_event_log'),
(12, now(), 'Add key hash'),
(13, now(), 'Create API user');
(13, now(), 'Create API user'),
(14, now(), 'Give API user insert priviledge in logs table');

-- Datasets are used to group files, and permissions are set on the dataset
-- level
Expand Down
3 changes: 2 additions & 1 deletion postgresql/initdb.d/04_grants.sql
Original file line number Diff line number Diff line change
Expand Up @@ -164,12 +164,13 @@ GRANT USAGE ON SCHEMA sda TO api;
GRANT SELECT ON sda.files TO api;
GRANT SELECT ON sda.file_dataset TO api;
GRANT SELECT ON sda.checksums TO api;
GRANT SELECT ON sda.file_event_log TO api;
GRANT SELECT, INSERT ON sda.file_event_log TO api;
GRANT SELECT ON sda.encryption_keys TO api;
GRANT SELECT ON sda.datasets TO api;
GRANT SELECT ON sda.dataset_event_log TO api;
GRANT INSERT ON sda.encryption_keys TO api;
GRANT UPDATE ON sda.encryption_keys TO api;
GRANT USAGE, SELECT ON SEQUENCE sda.file_event_log_id_seq TO api;

-- legacy schema
GRANT USAGE ON SCHEMA local_ega TO api;
Expand Down
21 changes: 21 additions & 0 deletions postgresql/migratedb.d/14.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
DO
$$
DECLARE
-- The version we know how to do migration from, at the end of a successful migration
-- we will no longer be at this version.
sourcever INTEGER := 13;
changes VARCHAR := 'Give API user insert priviledge in logs table';
BEGIN
IF (select max(version) from sda.dbschema_version) = sourcever then
RAISE NOTICE 'Doing migration from schema version % to %', sourcever, sourcever+1;
RAISE NOTICE 'Changes: %', changes;
INSERT INTO sda.dbschema_version VALUES(sourcever+1, now(), changes);

GRANT INSERT ON sda.file_event_log TO api;
GRANT USAGE, SELECT ON SEQUENCE sda.file_event_log_id_seq TO api;

ELSE
RAISE NOTICE 'Schema migration from % to % does not apply now, skipping', sourcever, sourcever+1;
END IF;
END
$$
66 changes: 66 additions & 0 deletions sda/cmd/api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"encoding/hex"
"encoding/json"
"fmt"
"math"
"net/http"
"os"
"os/signal"
Expand All @@ -25,6 +26,7 @@ import (
"github.com/neicnordic/sensitive-data-archive/internal/database"
"github.com/neicnordic/sensitive-data-archive/internal/jsonadapter"
"github.com/neicnordic/sensitive-data-archive/internal/schema"
"github.com/neicnordic/sensitive-data-archive/internal/storage"
"github.com/neicnordic/sensitive-data-archive/internal/userauth"
log "github.com/sirupsen/logrus"
)
Expand Down Expand Up @@ -54,6 +56,10 @@ func main() {
if err != nil {
log.Fatal(err)
}
Conf.API.INBOX, err = storage.NewBackend(Conf.Inbox)
if err != nil {
log.Fatal(err)
}

if err := setupJwtAuth(); err != nil {
log.Fatalf("error when setting up JWT auth, reason %s", err.Error())
Expand Down Expand Up @@ -100,6 +106,7 @@ func setup(config *config.Config) *http.Server {
r.POST("/c4gh-keys/add", rbac(e), addC4ghHash) // Adds a key hash to the database
r.GET("/c4gh-keys/list", rbac(e), listC4ghHashes) // Lists key hashes in the database
r.POST("/c4gh-keys/deprecate/*keyHash", rbac(e), deprecateC4ghHash) // Deprecate a given key hash
r.DELETE("/file/:username/*file", rbac(e), deleteFile) // Delete a file from inbox
// submission endpoints below here
r.POST("/file/ingest", rbac(e), ingestFile) // start ingestion of a file
r.POST("/file/accession", rbac(e), setAccession) // assign accession ID to a file
Expand Down Expand Up @@ -283,6 +290,63 @@ func ingestFile(c *gin.Context) {
c.Status(http.StatusOK)
}

// The deleteFile function deletes files from the inbox and marks them as
// discarded in the db. Files are identified by their ids and the user id.
func deleteFile(c *gin.Context) {

inbox, err := storage.NewBackend(Conf.Inbox)
if err != nil {
log.Fatal(err)
}

submissionUser := c.Param("username")
log.Warn("submission user:", submissionUser)

fileID := c.Param("file")
fileID = strings.TrimPrefix(fileID, "/")
log.Warn("submission file:", fileID)
if fileID == "" {
c.AbortWithStatusJSON(http.StatusBadRequest, "file ID is required")
}

filePath := ""
// Get the file path from the fileID and submission user
if filePath, err = Conf.API.DB.GetInboxFilePathFromID(submissionUser, fileID); err != nil {
log.Errorf("getting file from fileID failed, reason: (%v)", err)
c.AbortWithStatusJSON(http.StatusNotFound, "File could not be found in inbox")

return
}

// Requires a filepath instead of fileID
// Note: The remove fails randomly sometimes
var RetryTimes = 5
for count := 1; count <= RetryTimes; count++ {
log.Warn("trying to remove file from inbox, try", count)
err = inbox.RemoveFile(filePath)
if err != nil {
log.Errorf("Remove file from inbox failed, reason: %v", err)
}

// The GetFileSize returns zero in case of error after retrying a number of times
fileSize, _ := inbox.GetFileSize(filePath)
if fileSize == 0 {
break
}

time.Sleep(time.Duration(math.Pow(2, float64(count))) * time.Second)
}

if err := Conf.API.DB.UpdateFileEventLog(fileID, "disabled", fileID, "api", "{}", "{}"); err != nil {
log.Errorf("set status deleted failed, reason: (%v)", err)
c.AbortWithStatusJSON(http.StatusInternalServerError, err.Error())

return
}

c.Status(http.StatusOK)
}

func setAccession(c *gin.Context) {
var accession schema.IngestionAccession
if err := c.BindJSON(&accession); err != nil {
Expand Down Expand Up @@ -476,6 +540,8 @@ func listActiveUsers(c *gin.Context) {
c.JSON(http.StatusOK, users)
}

// listUserFiles returns a list of files for a specific user
// If the file has status disabled, the file will be skipped
func listUserFiles(c *gin.Context) {
username := c.Param("username")
username = strings.TrimPrefix(username, "/")
Expand Down
18 changes: 18 additions & 0 deletions sda/cmd/api/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,24 @@ Admin endpoints are only available to a set of whitelisted users specified in th
curl -H "Authorization: Bearer $token" -H "Content-Type: application/json" -X POST -d '{"accession_id": "my-id-01", "filepath": "/uploads/file.c4gh", "user": "testuser"}' https://HOSTNAME/file/accession
```

- `/file/:username/*fileid`
- accepts `DELETE` requests
- marks the file as `disabled` in the database, and deletes it from the inbox.
- The file is identfied by its id, returned by `users/:username/:files`

- Response codes
- `200` Query execute ok.
- `400` File id not provided
- `401` Token user is not in the list of admins.
- `404` File not found
- `500` Internal error due to Inbox, DB or MQ failures.

Example:
MalinAhlberg marked this conversation as resolved.
Show resolved Hide resolved

```bash
curl -H "Authorization: Bearer $token" -H "Content-Type: application/json" -X DELETE https://HOSTNAME/file/[email protected]/123abc
```

- `/dataset/create`
- accepts `POST` requests with JSON data with the format: `{"accession_ids": ["<FILE_ACCESSION_01>", "<FILE_ACCESSION_02>"], "dataset_id": "<DATASET_01>", "user": "<SUBMISSION_USER>"}`
- creates a dataset from the list of accession IDs and the dataset ID.
Expand Down
Loading
Loading