{hasPagination && (
diff --git a/catalog/app/containers/Bucket/Queries/Athena/Workgroups.tsx b/catalog/app/containers/Bucket/Queries/Athena/Workgroups.tsx
index b9713539063..f0387219d4d 100644
--- a/catalog/app/containers/Bucket/Queries/Athena/Workgroups.tsx
+++ b/catalog/app/containers/Bucket/Queries/Athena/Workgroups.tsx
@@ -97,8 +97,10 @@ function WorkgroupsEmpty({ error }: WorkgroupsEmptyProps) {
Check{' '}
- Athena Queries docs on
- setup and correct usage
+
+ Athena Queries docs
+ {' '}
+ on setup and correct usage
>
diff --git a/catalog/app/containers/Bucket/Queries/Athena/model/requests.spec.ts b/catalog/app/containers/Bucket/Queries/Athena/model/requests.spec.ts
index bf8a0793922..4faf6dbbcee 100644
--- a/catalog/app/containers/Bucket/Queries/Athena/model/requests.spec.ts
+++ b/catalog/app/containers/Bucket/Queries/Athena/model/requests.spec.ts
@@ -852,6 +852,48 @@ describe('containers/Bucket/Queries/Athena/model/requests', () => {
unmount()
})
})
+
+ it('return "not ready" if database is not ready', async () => {
+ startQueryExecution.mockImplementation(
+ reqThen
(() => ({})),
+ )
+ await act(async () => {
+ const { result, unmount, waitForNextUpdate } = renderHook(() =>
+ requests.useQueryRun({
+ workgroup: 'a',
+ catalogName: 'b',
+ database: Model.Loading,
+ queryBody: 'd',
+ }),
+ )
+ await waitForNextUpdate()
+ expect(result.current[0]).toBeUndefined()
+ unmount()
+ })
+ })
+
+ it('mark as ready to run but return error for confirmation if database is empty', async () => {
+ startQueryExecution.mockImplementation(
+ reqThen(() => ({})),
+ )
+ await act(async () => {
+ const { result, unmount, waitForValueToChange } = renderHook(() =>
+ requests.useQueryRun({
+ workgroup: 'a',
+ catalogName: 'b',
+ database: '',
+ queryBody: 'd',
+ }),
+ )
+ await waitForValueToChange(() => result.current)
+ await waitForValueToChange(() => result.current[0])
+ expect(result.current[0]).toBeNull()
+ const run = await result.current[1](false)
+ expect(run).toBeInstanceOf(Error)
+ expect(run).toBe(requests.NO_DATABASE)
+ unmount()
+ })
+ })
})
describe('useWorkgroup', () => {
@@ -1036,7 +1078,7 @@ describe('containers/Bucket/Queries/Athena/model/requests', () => {
}
})
- it('does not change query if a valid query is already selected', async () => {
+ it('retains execution query when the list is changed', async () => {
const queries = {
list: [
{ key: 'foo', name: 'Foo', body: 'SELECT * FROM foo' },
@@ -1063,8 +1105,7 @@ describe('containers/Bucket/Queries/Athena/model/requests', () => {
{
list: [
{ key: 'baz', name: 'Baz', body: 'SELECT * FROM baz' },
- { key: 'foo', name: 'Foo', body: 'SELECT * FROM foo' },
- { key: 'bar', name: 'Bar', body: 'SELECT * FROM bar' },
+ ...queries.list,
],
},
execution,
@@ -1077,6 +1118,45 @@ describe('containers/Bucket/Queries/Athena/model/requests', () => {
throw new Error('No data')
}
})
+
+ it('does not change query when list is updated if a valid query is already selected', async () => {
+ const queries = {
+ list: [
+ { key: 'foo', name: 'Foo', body: 'SELECT * FROM foo' },
+ { key: 'bar', name: 'Bar', body: 'SELECT * FROM bar' },
+ ],
+ }
+ const execution = null
+ const { result, rerender, waitForNextUpdate } = renderHook(
+ (props: Parameters) => useWrapper(props),
+ {
+ initialProps: [queries, execution],
+ },
+ )
+
+ if (Model.hasData(result.current.value)) {
+ expect(result.current.value.body).toBe('SELECT * FROM foo')
+ } else {
+ throw new Error('No data')
+ }
+ await act(async () => {
+ rerender([
+ {
+ list: [
+ { key: 'baz', name: 'Baz', body: 'SELECT * FROM baz' },
+ ...queries.list,
+ ],
+ },
+ execution,
+ ])
+ await waitForNextUpdate()
+ })
+ if (Model.hasData(result.current.value)) {
+ expect(result.current.value.body).toBe('SELECT * FROM foo')
+ } else {
+ throw new Error('No data')
+ }
+ })
})
describe('useQueryBody', () => {
@@ -1086,7 +1166,7 @@ describe('containers/Bucket/Queries/Athena/model/requests', () => {
it('sets query body from query if query is ready', () => {
const query = { name: 'Foo', key: 'foo', body: 'SELECT * FROM foo' }
- const execution = {}
+ const execution = null
const setQuery = jest.fn()
const { result } = renderHook(() => useWrapper([query, setQuery, execution]))
@@ -1098,7 +1178,7 @@ describe('containers/Bucket/Queries/Athena/model/requests', () => {
}
})
- it('sets query body from execution if query is not ready', () => {
+ it('sets query body from execution if query is not selected', () => {
const query = null
const execution = { query: 'SELECT * FROM bar' }
const setQuery = jest.fn()
@@ -1127,8 +1207,8 @@ describe('containers/Bucket/Queries/Athena/model/requests', () => {
})
it('does not change value if query and execution are both not ready', async () => {
- const query = null
- const execution = null
+ const query = undefined
+ const execution = undefined
const setQuery = jest.fn()
const { result, rerender, waitForNextUpdate } = renderHook(
@@ -1139,11 +1219,15 @@ describe('containers/Bucket/Queries/Athena/model/requests', () => {
)
expect(result.current.value).toBeUndefined()
+ // That's not possible from UI now,
+ // but let's pretend UI is ready to handle user input
act(() => {
result.current.setValue('foo')
})
expect(result.current.value).toBe('foo')
+ // We rerenderd hook but internal useEffect didn't rewrite the value
+ // to `undefined` as it was supposed to do on the first render
await act(async () => {
rerender([query, setQuery, execution])
await waitForNextUpdate()
@@ -1166,7 +1250,7 @@ describe('containers/Bucket/Queries/Athena/model/requests', () => {
expect(setQuery).toHaveBeenCalledWith(null)
})
- it('retains value when execution and query are initially empty but later updates', async () => {
+ it('obtains value when execution and query are initially empty but later update', async () => {
const initialQuery = null
const initialExecution = null
const setQuery = jest.fn()
@@ -1178,8 +1262,10 @@ describe('containers/Bucket/Queries/Athena/model/requests', () => {
},
)
- expect(result.current.value).toBeUndefined()
+ expect(result.current.value).toBeNull()
+ // Query was loaded with some value
+ // Execution is ready but it's still null
await act(async () => {
rerender([
{ key: 'up', name: 'Updated', body: 'SELECT * FROM updated' },
@@ -1195,5 +1281,68 @@ describe('containers/Bucket/Queries/Athena/model/requests', () => {
throw new Error('No data')
}
})
+
+ it('sets query body to null if query is null after being loaded', async () => {
+ const initialQuery = Model.Loading
+ const initialExecution = null
+ const setQuery = jest.fn()
+
+ const { result, rerender, waitForNextUpdate } = renderHook(
+ (props: Parameters) => useWrapper(props),
+ {
+ initialProps: [
+ initialQuery as Model.Value,
+ setQuery,
+ initialExecution,
+ ],
+ },
+ )
+
+ expect(result.current.value).toBe(Model.Loading)
+
+ await act(async () => {
+ rerender([null, setQuery, initialExecution])
+ await waitForNextUpdate()
+ })
+
+ if (Model.hasValue(result.current.value)) {
+ expect(result.current.value).toBeNull()
+ } else {
+ throw new Error('Unexpected state')
+ }
+ })
+
+ it('retains value if selected query is null and we switch from some execution', async () => {
+ // That's not ideal,
+ // but we don't know what chanded the query body: execution page or user.
+ // So, at least, it is documented here.
+ const initialQuery = null
+ const initialExecution = { id: 'any', query: 'SELECT * FROM updated' }
+ const setQuery = jest.fn()
+
+ const { result, rerender, waitForNextUpdate } = renderHook(
+ (props: Parameters) => useWrapper(props),
+ {
+ initialProps: [
+ initialQuery as Model.Value,
+ setQuery,
+ initialExecution,
+ ],
+ },
+ )
+
+ expect(result.current.value).toBe('SELECT * FROM updated')
+
+ await act(async () => {
+ rerender([initialQuery, setQuery, null])
+ await waitForNextUpdate()
+ })
+
+ if (Model.hasValue(result.current.value)) {
+ expect(result.current.value).toBe('SELECT * FROM updated')
+ } else {
+ throw new Error('Unexpected state')
+ }
+ })
})
})
diff --git a/catalog/app/containers/Bucket/Queries/Athena/model/requests.ts b/catalog/app/containers/Bucket/Queries/Athena/model/requests.ts
index 227e6ecbde2..8da2450012b 100644
--- a/catalog/app/containers/Bucket/Queries/Athena/model/requests.ts
+++ b/catalog/app/containers/Bucket/Queries/Athena/model/requests.ts
@@ -630,6 +630,9 @@ export function useQueryBody(
if (Model.isError(query)) return null
if (Model.hasData(query)) return query.body
if (Model.hasData(execution) && execution.query) return execution.query
+ if (!Model.isReady(v) && Model.isReady(query) && Model.isReady(execution)) {
+ return null
+ }
return v
})
}, [execution, query])
diff --git a/catalog/app/containers/Bucket/Successors.tsx b/catalog/app/containers/Bucket/Successors.tsx
index 3a0e5449b34..5fc1a51cc40 100644
--- a/catalog/app/containers/Bucket/Successors.tsx
+++ b/catalog/app/containers/Bucket/Successors.tsx
@@ -28,7 +28,7 @@ function EmptySlot({ bucket }: EmptySlotProps) {
Learn more
@@ -52,7 +52,7 @@ function ErrorSlot({ error }: ErrorSlotProps) {
{error instanceof ERRORS.WorkflowsConfigInvalid && (
Please fix the workflows config according to{' '}
-
+
the documentation
diff --git a/catalog/app/containers/Bucket/Summarize.tsx b/catalog/app/containers/Bucket/Summarize.tsx
index e644215263c..ebc2116b441 100644
--- a/catalog/app/containers/Bucket/Summarize.tsx
+++ b/catalog/app/containers/Bucket/Summarize.tsx
@@ -258,7 +258,7 @@ interface FilePreviewProps {
expanded?: boolean
file?: SummarizeFile
handle: LogicalKeyResolver.S3SummarizeHandle
- headingOverride: React.ReactNode
+ headingOverride?: React.ReactNode
packageHandle?: PackageHandle
}
@@ -270,7 +270,7 @@ export function FilePreview({
packageHandle,
}: FilePreviewProps) {
const description = file?.description ? : null
- const heading = headingOverride != null ? headingOverride :
+ const heading = headingOverride ??
const key = handle.logicalKey || handle.key
const props = React.useMemo(() => Preview.getRenderProps(key, file), [key, file])
@@ -566,7 +566,7 @@ interface SummaryRootProps {
s3: S3
bucket: string
inStack: boolean
- overviewUrl: string
+ overviewUrl?: string | null
}
export function SummaryRoot({ s3, bucket, inStack, overviewUrl }: SummaryRootProps) {
@@ -618,7 +618,9 @@ function SummaryFailed({ error }: SummaryFailedProps) {
Check your quilt_summarize.json file for errors.
See the{' '}
-
+
summarize docs
{' '}
for more.
diff --git a/catalog/app/containers/Bucket/errors.tsx b/catalog/app/containers/Bucket/errors.tsx
index fd45d3399ca..5a91f621431 100644
--- a/catalog/app/containers/Bucket/errors.tsx
+++ b/catalog/app/containers/Bucket/errors.tsx
@@ -124,7 +124,7 @@ const defaultHandlers: ErrorHandler[] = [
Learn how to configure the bucket for Quilt
@@ -167,7 +167,7 @@ const defaultHandlers: ErrorHandler[] = [
Learn about access control in Quilt
diff --git a/catalog/app/containers/Bucket/requests/requestsUntyped.js b/catalog/app/containers/Bucket/requests/requestsUntyped.js
index 2ba9722da61..5efb639049f 100644
--- a/catalog/app/containers/Bucket/requests/requestsUntyped.js
+++ b/catalog/app/containers/Bucket/requests/requestsUntyped.js
@@ -1,7 +1,6 @@
import { join as pathJoin } from 'path'
-import * as dateFns from 'date-fns'
-import * as FP from 'fp-ts'
+import * as Eff from 'effect'
import sampleSize from 'lodash/fp/sampleSize'
import * as R from 'ramda'
@@ -9,7 +8,6 @@ import quiltSummarizeSchema from 'schemas/quilt_summarize.json'
import { SUPPORTED_EXTENSIONS as IMG_EXTS } from 'components/Thumbnail'
import * as quiltConfigs from 'constants/quiltConfigs'
-import cfg from 'constants/config'
import * as Resource from 'utils/Resource'
import { makeSchemaValidator } from 'utils/json-schema'
import mkSearch from 'utils/mkSearch'
@@ -24,106 +22,6 @@ import { decodeS3Key } from './utils'
const promiseProps = (obj) =>
Promise.all(Object.values(obj)).then(R.zipObj(Object.keys(obj)))
-const MAX_BANDS = 10
-
-export const bucketAccessCounts = async ({ s3, bucket, today, window }) => {
- if (!cfg.analyticsBucket)
- throw new Error('bucketAccessCounts: "analyticsBucket" required')
-
- const dates = R.unfold(
- (daysLeft) => daysLeft >= 0 && [dateFns.subDays(today, daysLeft), daysLeft - 1],
- window,
- )
-
- try {
- const result = await s3Select({
- s3,
- Bucket: cfg.analyticsBucket,
- Key: `${ACCESS_COUNTS_PREFIX}/Exts.csv`,
- Expression: `
- SELECT ext, counts FROM s3object
- WHERE eventname = 'GetObject'
- AND bucket = '${sqlEscape(bucket)}'
- `,
- InputSerialization: {
- CSV: {
- FileHeaderInfo: 'Use',
- AllowQuotedRecordDelimiter: true,
- },
- },
- })
- return FP.function.pipe(
- result,
- R.map((r) => {
- const recordedCounts = JSON.parse(r.counts)
- const { counts, total } = dates.reduce(
- (acc, date) => {
- const value = recordedCounts[dateFns.format(date, 'yyyy-MM-dd')] || 0
- const sum = acc.total + value
- return {
- total: sum,
- counts: acc.counts.concat({ date, value, sum }),
- }
- },
- { total: 0, counts: [] },
- )
- return { ext: r.ext && `.${r.ext}`, total, counts }
- }),
- R.filter((i) => i.total),
- R.sort(R.descend(R.prop('total'))),
- R.applySpec({
- byExt: R.identity,
- byExtCollapsed: (bands) => {
- if (bands.length <= MAX_BANDS) return bands
- const [other, rest] = R.partition((b) => b.ext === '', bands)
- const [toKeep, toMerge] = R.splitAt(MAX_BANDS - 1, rest)
- const merged = [...other, ...toMerge].reduce((acc, band) => ({
- ext: '',
- total: acc.total + band.total,
- counts: R.zipWith(
- (a, b) => ({
- date: a.date,
- value: a.value + b.value,
- sum: a.sum + b.sum,
- }),
- acc.counts,
- band.counts,
- ),
- }))
- return R.sort(R.descend(R.prop('total')), toKeep.concat(merged))
- },
- combined: {
- total: R.reduce((sum, { total }) => sum + total, 0),
- counts: R.pipe(
- R.pluck('counts'),
- R.transpose,
- R.map(
- R.reduce(
- (acc, { date, value, sum }) => ({
- date,
- value: acc.value + value,
- sum: acc.sum + sum,
- }),
- { value: 0, sum: 0 },
- ),
- ),
- ),
- },
- }),
- )
- } catch (e) {
- // eslint-disable-next-line no-console
- console.log('Unable to fetch bucket access counts:')
- // eslint-disable-next-line no-console
- console.error(e)
- return {
- byExt: [],
- byExtCollapsed: [],
- combined: { total: 0, counts: [] },
- }
- }
-}
-
const parseDate = (d) => d && new Date(d)
const getOverviewBucket = (url) => s3paths.parseS3Url(url).bucket
@@ -373,7 +271,7 @@ export const bucketSummary = async ({ s3, req, bucket, overviewUrl, inStack }) =
Key: getOverviewKey(overviewUrl, 'summary.json'),
})
.promise()
- return FP.function.pipe(
+ return Eff.pipe(
JSON.parse(r.Body.toString('utf-8')),
R.pathOr([], ['aggregations', 'other', 'keys', 'buckets']),
R.map((b) => ({
@@ -403,7 +301,7 @@ export const bucketSummary = async ({ s3, req, bucket, overviewUrl, inStack }) =
try {
const qs = mkSearch({ action: 'sample', index: bucket })
const result = await req(`/search${qs}`)
- return FP.function.pipe(
+ return Eff.pipe(
result,
R.pathOr([], ['aggregations', 'objects', 'buckets']),
R.map((h) => {
@@ -425,7 +323,7 @@ export const bucketSummary = async ({ s3, req, bucket, overviewUrl, inStack }) =
const result = await s3
.listObjectsV2({ Bucket: bucket, EncodingType: 'url' })
.promise()
- return FP.function.pipe(
+ return Eff.pipe(
result,
R.path(['Contents']),
R.map(R.evolve({ Key: decodeS3Key })),
@@ -477,7 +375,7 @@ export const bucketImgs = async ({ req, s3, bucket, overviewUrl, inStack }) => {
Key: getOverviewKey(overviewUrl, 'summary.json'),
})
.promise()
- return FP.function.pipe(
+ return Eff.pipe(
JSON.parse(r.Body.toString('utf-8')),
R.pathOr([], ['aggregations', 'images', 'keys', 'buckets']),
R.map((b) => ({
@@ -498,7 +396,7 @@ export const bucketImgs = async ({ req, s3, bucket, overviewUrl, inStack }) => {
try {
const qs = mkSearch({ action: 'images', index: bucket })
const result = await req(`/search${qs}`)
- return FP.function.pipe(
+ return Eff.pipe(
result,
R.pathOr([], ['aggregations', 'objects', 'buckets']),
R.map((h) => {
@@ -519,7 +417,7 @@ export const bucketImgs = async ({ req, s3, bucket, overviewUrl, inStack }) => {
const result = await s3
.listObjectsV2({ Bucket: bucket, EncodingType: 'url' })
.promise()
- return FP.function.pipe(
+ return Eff.pipe(
result,
R.path(['Contents']),
R.map(R.evolve({ Key: decodeS3Key })),
@@ -656,8 +554,6 @@ export const summarize = async ({ s3, handle: inputHandle, resolveLogicalKey })
}
}
-const MANIFESTS_PREFIX = '.quilt/packages/'
-
const withCalculatedRevisions = (s) => ({
scripted_metric: {
init_script: `
@@ -712,113 +608,33 @@ export const countPackageRevisions = ({ req, bucket, name }) =>
.then(R.path(['aggregations', 'revisions', 'value']))
.catch(errors.catchErrors())
-// TODO: Preview endpoint only allows up to 512 lines right now. Increase it to 1000.
-const MAX_PACKAGE_ENTRIES = 500
-
-// TODO: remove
-export const getRevisionData = async ({
- endpoint,
- sign,
- bucket,
- hash,
- maxKeys = MAX_PACKAGE_ENTRIES,
-}) => {
- const url = sign({ bucket, key: `${MANIFESTS_PREFIX}${hash}` })
- const maxLines = maxKeys + 2 // 1 for the meta and 1 for checking overflow
- const r = await fetch(
- `${endpoint}/preview?url=${encodeURIComponent(url)}&input=txt&line_count=${maxLines}`,
- )
- const [header, ...entries] = await r
- .json()
- .then((json) => json.info.data.head.map((l) => JSON.parse(l)))
- const files = Math.min(maxKeys, entries.length)
- const bytes = entries.slice(0, maxKeys).reduce((sum, i) => sum + i.size, 0)
- const truncated = entries.length > maxKeys
- return {
- stats: { files, bytes, truncated },
- message: header.message,
- header,
- }
-}
-
-const s3Select = ({
- s3,
- ExpressionType = 'SQL',
- InputSerialization = { JSON: { Type: 'LINES' } },
- ...rest
-}) =>
- s3
- .selectObjectContent({
- ExpressionType,
- InputSerialization,
- OutputSerialization: { JSON: {} },
- ...rest,
- })
- .promise()
- .then(
- R.pipe(
- R.prop('Payload'),
- R.reduce((acc, evt) => {
- if (!evt.Records) return acc
- const s = evt.Records.Payload.toString()
- return acc + s
- }, ''),
- R.trim,
- R.ifElse(R.isEmpty, R.always([]), R.pipe(R.split('\n'), R.map(JSON.parse))),
- ),
- )
-
-const sqlEscape = (arg) => arg.replace(/'/g, "''")
+// const MANIFESTS_PREFIX = '.quilt/packages/'
-const ACCESS_COUNTS_PREFIX = 'AccessCounts'
-
-const queryAccessCounts = async ({ s3, type, query, today, window = 365 }) => {
- try {
- const records = await s3Select({
- s3,
- Bucket: cfg.analyticsBucket,
- Key: `${ACCESS_COUNTS_PREFIX}/${type}.csv`,
- Expression: query,
- InputSerialization: {
- CSV: {
- FileHeaderInfo: 'Use',
- AllowQuotedRecordDelimiter: true,
- },
- },
- })
-
- const recordedCounts = records.length ? JSON.parse(records[0].counts) : {}
-
- const counts = R.times((i) => {
- const date = dateFns.subDays(today, window - i - 1)
- return {
- date,
- value: recordedCounts[dateFns.format(date, 'yyyy-MM-dd')] || 0,
- }
- }, window)
-
- const total = Object.values(recordedCounts).reduce(R.add, 0)
-
- return { counts, total }
- } catch (e) {
- // eslint-disable-next-line no-console
- console.log('queryAccessCounts: error caught')
- // eslint-disable-next-line no-console
- console.error(e)
- throw e
- }
-}
-
-export const objectAccessCounts = ({ s3, bucket, path, today }) =>
- queryAccessCounts({
- s3,
- type: 'Objects',
- query: `
- SELECT counts FROM s3object
- WHERE eventname = 'GetObject'
- AND bucket = '${sqlEscape(bucket)}'
- AND "key" = '${sqlEscape(path)}'
- `,
- today,
- window: 365,
- })
+// TODO: Preview endpoint only allows up to 512 lines right now. Increase it to 1000.
+// const MAX_PACKAGE_ENTRIES = 500
+
+// TODO: remove: used in a comented-out code in PackageList
+// export const getRevisionData = async ({
+// endpoint,
+// sign,
+// bucket,
+// hash,
+// maxKeys = MAX_PACKAGE_ENTRIES,
+// }) => {
+// const url = sign({ bucket, key: `${MANIFESTS_PREFIX}${hash}` })
+// const maxLines = maxKeys + 2 // 1 for the meta and 1 for checking overflow
+// const r = await fetch(
+// `${endpoint}/preview?url=${encodeURIComponent(url)}&input=txt&line_count=${maxLines}`,
+// )
+// const [header, ...entries] = await r
+// .json()
+// .then((json) => json.info.data.head.map((l) => JSON.parse(l)))
+// const files = Math.min(maxKeys, entries.length)
+// const bytes = entries.slice(0, maxKeys).reduce((sum, i) => sum + i.size, 0)
+// const truncated = entries.length > maxKeys
+// return {
+// stats: { files, bytes, truncated },
+// message: header.message,
+// header,
+// }
+// }
diff --git a/catalog/app/containers/NavBar/Suggestions/Suggestions.tsx b/catalog/app/containers/NavBar/Suggestions/Suggestions.tsx
index 4fa912fe30d..d459995f009 100644
--- a/catalog/app/containers/NavBar/Suggestions/Suggestions.tsx
+++ b/catalog/app/containers/NavBar/Suggestions/Suggestions.tsx
@@ -61,7 +61,10 @@ function SuggestionsList({ items, selected }: SuggestionsProps) {
))}
Learn the{' '}
-
+
advanced search syntax
{' '}
for query string queries in ElasticSearch {ES_V}.
diff --git a/catalog/app/embed/File.js b/catalog/app/embed/File.js
index bc47739202b..247a19ff163 100644
--- a/catalog/app/embed/File.js
+++ b/catalog/app/embed/File.js
@@ -1,7 +1,5 @@
import { basename } from 'path'
-import * as dateFns from 'date-fns'
-import * as R from 'ramda'
import * as React from 'react'
import { Link, useLocation, useParams } from 'react-router-dom'
import * as M from '@material-ui/core'
@@ -9,22 +7,21 @@ import * as M from '@material-ui/core'
import * as BreadCrumbs from 'components/BreadCrumbs'
import Message from 'components/Message'
import * as Preview from 'components/Preview'
-import Sparkline from 'components/Sparkline'
import cfg from 'constants/config'
import * as Notifications from 'containers/Notifications'
import * as AWS from 'utils/AWS'
import AsyncResult from 'utils/AsyncResult'
import { useData } from 'utils/Data'
import * as NamedRoutes from 'utils/NamedRoutes'
-import * as SVG from 'utils/SVG'
import { linkStyle } from 'utils/StyledLink'
import copyToClipboard from 'utils/clipboard'
import * as Format from 'utils/format'
import parseSearch from 'utils/parseSearch'
import * as s3paths from 'utils/s3paths'
-import { readableBytes, readableQuantity } from 'utils/string'
+import { readableBytes } from 'utils/string'
import FileCodeSamples from 'containers/Bucket/CodeSamples/File'
+import Analytics from 'containers/Bucket/File/Analytics'
import FileProperties from 'containers/Bucket/FileProperties'
import * as FileView from 'containers/Bucket/FileView'
import Section from 'containers/Bucket/Section'
@@ -229,74 +226,6 @@ function VersionInfo({ bucket, path, version }) {
)
}
-function Analytics({ bucket, path }) {
- const [cursor, setCursor] = React.useState(null)
- const s3 = AWS.S3.use()
- const today = React.useMemo(() => new Date(), [])
- const formatDate = (date) =>
- dateFns.format(
- date,
- today.getFullYear() === date.getFullYear() ? 'd MMM' : 'd MMM yyyy',
- )
- const data = useData(requests.objectAccessCounts, {
- s3,
- bucket,
- path,
- today,
- })
-
- const defaultExpanded = data.case({
- Ok: ({ total }) => !!total,
- _: () => false,
- })
-
- return (
-
- {data.case({
- Ok: ({ counts, total }) =>
- total ? (
-
-
- Downloads
-
- {readableQuantity(cursor === null ? total : counts[cursor].value)}
-
-
- {cursor === null
- ? `${counts.length} days`
- : formatDate(counts[cursor].date)}
-
-
-
-
-
-
- ,
- )}
- />
-
-
- ) : (
- No analytics available
- ),
- Err: () => No analytics available,
- _: () => ,
- })}
-
- )
-}
-
function CenteredProgress() {
return (
diff --git a/catalog/app/model/graphql/schema.generated.ts b/catalog/app/model/graphql/schema.generated.ts
index ba8ed87e3fd..be04791e97a 100644
--- a/catalog/app/model/graphql/schema.generated.ts
+++ b/catalog/app/model/graphql/schema.generated.ts
@@ -82,6 +82,41 @@ export default {
],
interfaces: [],
},
+ {
+ kind: 'OBJECT',
+ name: 'AccessCountsGroup',
+ fields: [
+ {
+ name: 'ext',
+ type: {
+ kind: 'NON_NULL',
+ ofType: {
+ kind: 'SCALAR',
+ name: 'String',
+ ofType: null,
+ },
+ },
+ args: [],
+ },
+ {
+ name: 'counts',
+ type: {
+ kind: 'NON_NULL',
+ ofType: {
+ kind: 'OBJECT',
+ name: 'AccessCounts',
+ ofType: null,
+ },
+ },
+ args: [],
+ },
+ ],
+ interfaces: [],
+ },
+ {
+ kind: 'SCALAR',
+ name: 'String',
+ },
{
kind: 'OBJECT',
name: 'AdminMutations',
@@ -208,10 +243,6 @@ export default {
],
interfaces: [],
},
- {
- kind: 'SCALAR',
- name: 'String',
- },
{
kind: 'OBJECT',
name: 'AdminQueries',
@@ -365,6 +396,52 @@ export default {
},
],
},
+ {
+ kind: 'OBJECT',
+ name: 'BucketAccessCounts',
+ fields: [
+ {
+ name: 'byExt',
+ type: {
+ kind: 'NON_NULL',
+ ofType: {
+ kind: 'LIST',
+ ofType: {
+ kind: 'NON_NULL',
+ ofType: {
+ kind: 'OBJECT',
+ name: 'AccessCountsGroup',
+ ofType: null,
+ },
+ },
+ },
+ },
+ args: [
+ {
+ name: 'groups',
+ type: {
+ kind: 'SCALAR',
+ name: 'Int',
+ ofType: null,
+ },
+ },
+ ],
+ },
+ {
+ name: 'combined',
+ type: {
+ kind: 'NON_NULL',
+ ofType: {
+ kind: 'OBJECT',
+ name: 'AccessCounts',
+ ofType: null,
+ },
+ },
+ args: [],
+ },
+ ],
+ interfaces: [],
+ },
{
kind: 'UNION',
name: 'BucketAddResult',
@@ -4188,6 +4265,81 @@ export default {
},
args: [],
},
+ {
+ name: 'bucketAccessCounts',
+ type: {
+ kind: 'OBJECT',
+ name: 'BucketAccessCounts',
+ ofType: null,
+ },
+ args: [
+ {
+ name: 'bucket',
+ type: {
+ kind: 'NON_NULL',
+ ofType: {
+ kind: 'SCALAR',
+ name: 'String',
+ ofType: null,
+ },
+ },
+ },
+ {
+ name: 'window',
+ type: {
+ kind: 'NON_NULL',
+ ofType: {
+ kind: 'SCALAR',
+ name: 'Int',
+ ofType: null,
+ },
+ },
+ },
+ ],
+ },
+ {
+ name: 'objectAccessCounts',
+ type: {
+ kind: 'OBJECT',
+ name: 'AccessCounts',
+ ofType: null,
+ },
+ args: [
+ {
+ name: 'bucket',
+ type: {
+ kind: 'NON_NULL',
+ ofType: {
+ kind: 'SCALAR',
+ name: 'String',
+ ofType: null,
+ },
+ },
+ },
+ {
+ name: 'key',
+ type: {
+ kind: 'NON_NULL',
+ ofType: {
+ kind: 'SCALAR',
+ name: 'String',
+ ofType: null,
+ },
+ },
+ },
+ {
+ name: 'window',
+ type: {
+ kind: 'NON_NULL',
+ ofType: {
+ kind: 'SCALAR',
+ name: 'Int',
+ ofType: null,
+ },
+ },
+ },
+ ],
+ },
{
name: 'admin',
type: {
diff --git a/catalog/app/model/graphql/types.generated.ts b/catalog/app/model/graphql/types.generated.ts
index 8ad7b159639..fb5d1b2a862 100644
--- a/catalog/app/model/graphql/types.generated.ts
+++ b/catalog/app/model/graphql/types.generated.ts
@@ -36,6 +36,12 @@ export interface AccessCounts {
readonly counts: ReadonlyArray
}
+export interface AccessCountsGroup {
+ readonly __typename: 'AccessCountsGroup'
+ readonly ext: Scalars['String']
+ readonly counts: AccessCounts
+}
+
export interface AdminMutations {
readonly __typename: 'AdminMutations'
readonly user: UserAdminMutations
@@ -89,6 +95,16 @@ export type BrowsingSessionDisposeResult = Ok | OperationError
export type BrowsingSessionRefreshResult = BrowsingSession | InvalidInput | OperationError
+export interface BucketAccessCounts {
+ readonly __typename: 'BucketAccessCounts'
+ readonly byExt: ReadonlyArray
+ readonly combined: AccessCounts
+}
+
+export interface BucketAccessCountsbyExtArgs {
+ groups: Maybe
+}
+
export interface BucketAddInput {
readonly name: Scalars['String']
readonly title: Scalars['String']
@@ -864,6 +880,8 @@ export interface Query {
readonly searchMoreObjects: ObjectsSearchMoreResult
readonly searchMorePackages: PackagesSearchMoreResult
readonly subscription: SubscriptionState
+ readonly bucketAccessCounts: Maybe
+ readonly objectAccessCounts: Maybe
readonly admin: AdminQueries
readonly policies: ReadonlyArray
readonly policy: Maybe
@@ -910,6 +928,17 @@ export interface QuerysearchMorePackagesArgs {
size?: Maybe
}
+export interface QuerybucketAccessCountsArgs {
+ bucket: Scalars['String']
+ window: Scalars['Int']
+}
+
+export interface QueryobjectAccessCountsArgs {
+ bucket: Scalars['String']
+ key: Scalars['String']
+ window: Scalars['Int']
+}
+
export interface QuerypolicyArgs {
id: Scalars['ID']
}
diff --git a/catalog/app/utils/AWS/S3.js b/catalog/app/utils/AWS/S3.js
index a51c29b5566..6b052a52927 100644
--- a/catalog/app/utils/AWS/S3.js
+++ b/catalog/app/utils/AWS/S3.js
@@ -43,44 +43,28 @@ function useSmartS3() {
return useConstant(() => {
class SmartS3 extends S3 {
- getReqType(req) {
+ shouldSign(req) {
const bucket = req.params.Bucket
if (cfg.mode === 'LOCAL') {
- return 'signed'
+ return true
}
- if (isAuthenticated()) {
- if (
- // sign if operation is not bucket-specific
- // (not sure if there are any such operations that can be used from the browser)
- !bucket ||
- cfg.analyticsBucket === bucket ||
+ if (
+ isAuthenticated() &&
+ // sign if operation is not bucket-specific
+ // (not sure if there are any such operations that can be used from the browser)
+ (!bucket ||
cfg.serviceBucket === bucket ||
statusReportsBucket === bucket ||
- (cfg.mode !== 'OPEN' && isInStack(bucket))
- ) {
- return 'signed'
- }
- } else if (req.operation === 'selectObjectContent') {
- return 'select'
+ (cfg.mode !== 'OPEN' && isInStack(bucket)))
+ ) {
+ return true
}
- return 'unsigned'
- }
-
- populateURI(req) {
- if (req.service.getReqType(req) === 'select') {
- return
- }
- super.populateURI(req)
+ return false
}
customRequestHandler(req) {
- const b = req.params.Bucket
- const type = this.getReqType(req)
-
- if (b) {
- const endpoint = new AWS.Endpoint(
- type === 'select' ? `${cfg.apiGatewayEndpoint}/s3select/` : cfg.s3Proxy,
- )
+ if (req.params.Bucket) {
+ const endpoint = new AWS.Endpoint(cfg.s3Proxy)
req.on('sign', () => {
if (req.httpRequest[PRESIGN]) return
@@ -96,10 +80,7 @@ function useSmartS3() {
const basePath = endpoint.path.replace(/\/$/, '')
req.httpRequest.endpoint = endpoint
- req.httpRequest.path =
- type === 'select'
- ? `${basePath}${origPath}`
- : `${basePath}/${origEndpoint.host}${origPath}`
+ req.httpRequest.path = `${basePath}/${origEndpoint.host}${origPath}`
})
req.on(
'retry',
@@ -138,9 +119,8 @@ function useSmartS3() {
if (forceProxy) {
req.httpRequest[FORCE_PROXY] = true
}
- const type = this.getReqType(req)
- if (type !== 'signed') {
+ if (!this.shouldSign(req)) {
req.toUnauthenticated()
}
diff --git a/catalog/app/utils/AWS/Signer.js b/catalog/app/utils/AWS/Signer.js
index 0c0c24b1ac8..404fe0f4d73 100644
--- a/catalog/app/utils/AWS/Signer.js
+++ b/catalog/app/utils/AWS/Signer.js
@@ -25,7 +25,7 @@ export function useS3Signer({ urlExpiration: exp, forceProxy = false } = {}) {
const statusReportsBucket = useStatusReportsBucket()
const s3 = S3.use()
const inStackOrSpecial = React.useCallback(
- (b) => isInStack(b) || cfg.analyticsBucket === b || statusReportsBucket === b,
+ (b) => isInStack(b) || statusReportsBucket === b,
[isInStack, statusReportsBucket],
)
return React.useCallback(
diff --git a/catalog/app/utils/GraphQL/Provider.tsx b/catalog/app/utils/GraphQL/Provider.tsx
index 592b71e58e6..05c34cd7238 100644
--- a/catalog/app/utils/GraphQL/Provider.tsx
+++ b/catalog/app/utils/GraphQL/Provider.tsx
@@ -90,6 +90,8 @@ export default function GraphQLProvider({ children }: React.PropsWithChildren<{}
keys: {
AccessCountForDate: () => null,
AccessCounts: () => null,
+ AccessCountsGroup: () => null,
+ BucketAccessCounts: () => null,
BucketConfig: (b) => b.name as string,
Canary: (c) => c.name as string,
Collaborator: (c) => c.username as string,
diff --git a/docs/Catalog/SearchQuery.md b/docs/Catalog/SearchQuery.md
index 4b51655b4a4..1f4aaa7951e 100644
--- a/docs/Catalog/SearchQuery.md
+++ b/docs/Catalog/SearchQuery.md
@@ -128,10 +128,13 @@ run them. You must first set up you an Athena workgroup and Saved queries per
[AWS's Athena documentation](https://docs.aws.amazon.com/athena/latest/ug/getting-started.html).
### Configuration
-You can hide the "Queries" tab by setting `ui > nav > queries: false` ([learn more](./Preferences.md)).
+You can hide the "Queries" tab by setting `ui > nav > queries: false`.
+It is also possible to set the default workgroup in `ui > athena > defaultWorkgroup: 'your-default-workgroup'`.
+[Learn more](./Preferences.md).
+
+The tab will remember the last workgroup, catalog name and database that was selected.
### Basics
"Run query" executes the selected query and waits for the result.
-![](../imgs/athena-ui.png)
-![](../imgs/athena-history.png)
+![Athena page](../imgs/athena-ui.png)
diff --git a/docs/README.md b/docs/README.md
index eb5afe0ba28..0582c038cb3 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -7,6 +7,30 @@ data integrity at scale.
---
+## How to Get Started
+
+Quilt consists of three main elements:
+
+- [Quilt Platform](#quilt-platform-overview) which is a cloud platform for
+ interacting with, visualizing, searching and querying Quilt Packages, which is
+ hosted in an organization's AWS Account.
+- [Quilt Python SDK](#quilt-python-sdk) which provides the ability to create,
+ push, install and delete Quilt Packages.
+- [Quilt Ecosystem](#quilt-ecosystem-and-integrations) which provide extension
+ of the core Quilt Capabilities to enable typical elements of life sciences
+ workflows, such as incorporating orchestration data, and connecting packages
+ to Electronic Lab Notebooks.
+
+To dive deeper into the capabilities of Quilt, start with our [Quick Start
+Guide](Quickstart.md) or explore the [Installation
+Instructions](Installation.md) for setting up your environment.
+
+If you have any questions or need help, join our [Slack
+community](https://slack.quiltdata.com/) or submit a support request to
+.
+
+---
+
## Navigating the Documentation
The Quilt documentation is structured to guide users through different layers of
@@ -24,8 +48,7 @@ capabilities like embeddable previews and metadata collection.
**Core Sections:**
- [Architecture](Architecture.md) - Learn how Quilt is architected.
-- [Mental Model](MentalModel.md) - Understand the guiding principles behind
- Quilt.
+- [Mental Model](MentalModel.md) - Understand the guiding principles behind Quilt.
- [Metadata Management](Catalog/Metadata.md) - Manage metadata at scale.
For users of the Quilt Platform (often referred to as the Catalog):
@@ -40,11 +63,9 @@ For users of the Quilt Platform (often referred to as the Catalog):
For administrators managing Quilt deployments:
-- [Admin Settings UI](Catalog/Admin.md) - Control platform settings and user
- access.
+- [Admin Settings UI](Catalog/Admin.md) - Control platform settings and user access.
- [Catalog Configuration](Catalog/Preferences.md) - Set platform preferences.
-- [Cross-Account Access](CrossAccount.md) - Manage multi-account access to S3
- data.
+- [Cross-Account Access](CrossAccount.md) - Manage multi-account access to S3 data.
### Quilt Python SDK
@@ -58,8 +79,7 @@ flexibility needed for deeper integrations.
managing data packages.
- [Editing and Uploading Packages](walkthrough/editing-a-package.md) - Learn how
to version, edit, and share data.
-- [API Reference](api-reference/api.md) - Detailed API documentation for
- developers.
+- [API Reference](api-reference/api.md) - Detailed API documentation for developers.
### Quilt Ecosystem and Integrations
@@ -67,9 +87,8 @@ The **Quilt Ecosystem** extends the platform with integrations and plugins to
fit your workflow. Whether you're managing scientific data or automating
packaging tasks, Quilt can be tailored to your needs with these tools:
-- [Benchling
- Packager](https://open.quiltdata.com/b/quilt-example/packages/examples/benchling-packager)
- - Package biological data from Benchling.
+- [Benchling Packager](examples/benchling.md) - Package electronic lab notebooks
+ from Benchling.
- [Nextflow Plugin](examples/nextflow.md) - Integrate with Nextflow pipelines
for bioinformatics.
@@ -89,18 +108,7 @@ administrator, Quilt helps streamline your data management workflows.
better insights.
- **Discover**: Use metadata and search tools to explore data relationships
across projects.
-- **Model**: Version and manage large data sets that don't fit traditional git
- repositories.
+- **Model**: Version and manage large data sets that don't fit traditional git repositories.
- **Decide**: Empower your team with auditable data for better decision-making.
---
-
-## How to Get Started
-
-To dive deeper into the capabilities of Quilt, start with our [Quick Start
-Guide](Quickstart.md) or explore the [Installation
-Instructions](Installation.md) for setting up your environment.
-
-If you have any questions or need help, join our [Slack
-community](https://slack.quiltdata.com/) or visit our full [documentation
-site](https://docs.quiltdata.com/).
diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
index 0f1ca68efd6..43fe43fffa6 100644
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -38,13 +38,7 @@
* [GxP for Security & Compliance](advanced-features/good-practice.md)
* [Organizing S3 Buckets](advanced-features/s3-bucket-organization.md)
-## Quilt Ecosystem Integrations
-
-* [Benchling Packager](https://open.quiltdata.com/b/quilt-example/packages/examples/benchling-packager)
-* [Event-Driven Packaging](advanced-features/event-driven-packaging.md)
-* [Nextflow Plugin](examples/nextflow.md)
-
-## Quilt Python SDK Developers
+## Quilt Python SDK
* [Installation](Installation.md)
* [Quick Start](Quickstart.md)
@@ -73,3 +67,9 @@
* [Contributing](CONTRIBUTING.md)
* [Frequently Asked Questions](FAQ.md)
* [Troubleshooting](Troubleshooting.md)
+
+## Quilt Ecosystem Integrations
+
+* [Benchling Packager](examples/benchling.md)
+* [Event-Driven Packaging](advanced-features/event-driven-packaging.md)
+* [Nextflow Plugin](examples/nextflow.md)
diff --git a/docs/advanced-features/athena.md b/docs/advanced-features/athena.md
index 993e8448a58..cb61b2d9312 100644
--- a/docs/advanced-features/athena.md
+++ b/docs/advanced-features/athena.md
@@ -1,4 +1,5 @@
+
# Querying package metadata with Athena
Quilt stores package data and metadata in S3. Metadata lives in a per-package manifest file
in a each bucket's `.quilt/` directory.
@@ -9,9 +10,11 @@ using predicates based on package or object-level metadata.
Packages can be created from the resulting tabular data.
To be able to create a package,
-the table must contain the columns `logical_key`, `physical_keys` and `size` as shown below.
+the table must contain the columns `logical_key`, `physical_keys` (or `physical_key`) and `size`.
(See also [Mental Model](https://docs.quiltdata.com/mentalmodel))
+![Athena page with results ready to be packaged](../imgs/athena-package.png)
+
## Defining package tables and views in Athena
> This step is not required for users of Quilt enterprise, since tables and views
diff --git a/docs/examples/benchling.md b/docs/examples/benchling.md
new file mode 100644
index 00000000000..68a0a5a56bb
--- /dev/null
+++ b/docs/examples/benchling.md
@@ -0,0 +1,32 @@
+
+The Benchling Packager is a lambda you can deploy in your own AWS private cloud
+to process [Benchling](https://benchling.com/) events in order to create (and
+link back, if possible) a dedicated [Quilt](https://quiltdata.com/) package for
+every Benchling notebook.
+
+The CloudFormation template is available as a package on
+[open.quiltdata.com](https://open.quiltdata.com/b/quilt-example/packages/examples/benchling-packager).
+
+## Prerequisites
+
+In order to install the benchling packager, you will need to know, and have
+administrative access to:
+
+- Your Benchling tenant domain (e.g., `` from
+ `.benchling.com`), for ßconfiguring event subscriptions and
+ metadata schemas.
+- The AWS Account ID (e.g. 12345689123) and AWS Region (e.g., us-west-2) used by
+ your Quilt stack, for configuring the CloudFormation stack and lambdas.
+
+## Installation
+
+Go to the [Benchling Packager
+package](https://open.quiltdata.com/b/quilt-example/packages/examples/benchling-packager)
+on open.quiltdata.com and follow the instructions in the README.
+
+## References
+
+- [AWS CloudFormation templates](https://aws.amazon.com/cloudformation/resources/templates/)
+- [AWS Lambda functions](https://aws.amazon.com/lambda/)
+- [Benchling EventBridge events](https://docs.benchling.com/docs/events-getting-started#event-types)
+- [Benchling Schemas](https://help.benchling.com/hc/en-us/articles/9684227216781)
diff --git a/docs/imgs/athena-history.png b/docs/imgs/athena-history.png
deleted file mode 100644
index 7ef0916506e..00000000000
Binary files a/docs/imgs/athena-history.png and /dev/null differ
diff --git a/docs/imgs/athena-package.png b/docs/imgs/athena-package.png
new file mode 100644
index 00000000000..bdae54c2776
Binary files /dev/null and b/docs/imgs/athena-package.png differ
diff --git a/docs/imgs/athena-ui.png b/docs/imgs/athena-ui.png
index 8c417e376cd..3f185385b8d 100644
Binary files a/docs/imgs/athena-ui.png and b/docs/imgs/athena-ui.png differ
diff --git a/lambdas/indexer/CHANGELOG.md b/lambdas/indexer/CHANGELOG.md
new file mode 100644
index 00000000000..c7ea99597d5
--- /dev/null
+++ b/lambdas/indexer/CHANGELOG.md
@@ -0,0 +1,21 @@
+
+# Changelog
+
+Changes are listed in reverse chronological order (newer entries at the top).
+The entry format is
+
+```markdown
+- [Verb] Change description ([#](https://github.com/quiltdata/quilt/pull/))
+```
+
+where verb is one of
+
+- Removed
+- Added
+- Fixed
+- Changed
+
+## Changes
+
+- [Changed] Stop using S3 select ([#4212](https://github.com/quiltdata/quilt/pull/4212))
+- [Added] Bootstrap the change log ([#4212](https://github.com/quiltdata/quilt/pull/4212))
diff --git a/lambdas/indexer/index.py b/lambdas/indexer/index.py
index 80b6861a11f..bb6a9422229 100644
--- a/lambdas/indexer/index.py
+++ b/lambdas/indexer/index.py
@@ -47,6 +47,7 @@
import datetime
+import functools
import json
import os
import pathlib
@@ -92,7 +93,6 @@
POINTER_PREFIX_V1,
get_available_memory,
get_quilt_logger,
- query_manifest_content,
separated_env_to_iter,
)
@@ -168,12 +168,7 @@
# currently only affects .parquet, TODO: extend to other extensions
assert 'SKIP_ROWS_EXTS' in os.environ
SKIP_ROWS_EXTS = separated_env_to_iter('SKIP_ROWS_EXTS')
-SELECT_PACKAGE_META = "SELECT * from S3Object o WHERE o.version IS NOT MISSING LIMIT 1"
-# No WHERE clause needed for aggregations since S3 Select skips missing fields for aggs
-SELECT_PACKAGE_STATS = (
- "SELECT COALESCE(SUM(obj['size']), 0) as total_bytes,"
- " COUNT(obj['size']) as total_files from S3Object obj"
-)
+DUCKDB_SELECT_LAMBDA_ARN = os.environ["DUCKDB_SELECT_LAMBDA_ARN"]
TEST_EVENT = "s3:TestEvent"
# we need to filter out GetObject and HeadObject calls generated by the present
# lambda in order to display accurate analytics in the Quilt catalog
@@ -182,6 +177,7 @@
logger = get_quilt_logger()
+s3_client = boto3.client("s3", config=botocore.config.Config(user_agent_extra=USER_AGENT_EXTRA))
def now_like_boto3():
@@ -247,13 +243,10 @@ def select_manifest_meta(s3_client, bucket: str, key: str):
wrapper for retry and returning a string
"""
try:
- raw = query_manifest_content(
- s3_client,
- bucket=bucket,
- key=key,
- sql_stmt=SELECT_PACKAGE_META
- )
- return json.load(raw)
+ body = s3_client.get_object(Bucket=bucket, Key=key)["Body"]
+ with body: # this *might* be needed to close the stream ASAP
+ for line in body.iter_lines():
+ return json.loads(line)
except (botocore.exceptions.ClientError, json.JSONDecodeError) as cle:
print(f"Unable to S3 select manifest: {cle}")
@@ -439,7 +432,7 @@ def get_pkg_data():
first = select_manifest_meta(s3_client, bucket, manifest_key)
if not first:
return
- stats = select_package_stats(s3_client, bucket, manifest_key)
+ stats = select_package_stats(bucket, manifest_key)
if not stats:
return
@@ -472,33 +465,54 @@ def get_pkg_data():
return True
-def select_package_stats(s3_client, bucket, manifest_key) -> str:
+@functools.lru_cache(maxsize=None)
+def get_bucket_region(bucket: str) -> str:
+ resp = s3_client.head_bucket(Bucket=bucket)
+ return resp["ResponseMetadata"]["HTTPHeaders"]["x-amz-bucket-region"]
+
+
+@functools.lru_cache(maxsize=None)
+def get_presigner_client(bucket: str):
+ return boto3.client(
+ "s3",
+ region_name=get_bucket_region(bucket),
+ config=botocore.config.Config(signature_version="s3v4"),
+ )
+
+
+def select_package_stats(bucket, manifest_key) -> Optional[dict]:
"""use s3 select to generate file stats for package"""
logger_ = get_quilt_logger()
- try:
- raw_stats = query_manifest_content(
- s3_client,
- bucket=bucket,
- key=manifest_key,
- sql_stmt=SELECT_PACKAGE_STATS
- ).read()
-
- if raw_stats:
- stats = json.loads(raw_stats)
- assert isinstance(stats['total_bytes'], int)
- assert isinstance(stats['total_files'], int)
-
- return stats
-
- except (
- AssertionError,
- botocore.exceptions.ClientError,
- json.JSONDecodeError,
- KeyError,
- ) as err:
- logger_.exception("Unable to compute package stats via S3 select")
+ presigner_client = get_presigner_client(bucket)
+ url = presigner_client.generate_presigned_url(
+ ClientMethod="get_object",
+ Params={
+ "Bucket": bucket,
+ "Key": manifest_key,
+ },
+ )
+ lambda_ = make_lambda_client()
+ q = f"""
+ SELECT
+ COALESCE(SUM(size), 0) AS total_bytes,
+ COUNT(size) AS total_files FROM read_ndjson('{url}', columns={{size: 'UBIGINT'}}) obj
+ """
+ resp = lambda_.invoke(
+ FunctionName=DUCKDB_SELECT_LAMBDA_ARN,
+ Payload=json.dumps({"query": q, "user_agent": f"DuckDB Select {USER_AGENT_EXTRA}"}),
+ )
- return None
+ payload = resp["Payload"].read()
+ if "FunctionError" in resp:
+ logger_.error("DuckDB select unhandled error: %s", payload)
+ return None
+ parsed = json.loads(payload)
+ if "error" in parsed:
+ logger_.error("DuckDB select error: %s", parsed["error"])
+ return None
+
+ rows = parsed["rows"]
+ return rows[0] if rows else None
def extract_pptx(fileobj, max_size: int) -> str:
@@ -732,6 +746,11 @@ def make_s3_client():
return boto3.client("s3", config=configuration)
+@functools.lru_cache(maxsize=None)
+def make_lambda_client():
+ return boto3.client("lambda")
+
+
def map_event_name(event: dict):
"""transform eventbridge names into S3-like ones"""
input_ = event["eventName"]
diff --git a/lambdas/indexer/pytest.ini b/lambdas/indexer/pytest.ini
index dd07825516f..f9355a4fbaf 100644
--- a/lambdas/indexer/pytest.ini
+++ b/lambdas/indexer/pytest.ini
@@ -1,4 +1,6 @@
[pytest]
+env =
+ DUCKDB_SELECT_LAMBDA_ARN = "arn:aws:lambda:us-west-2:123456789012:function:select-lambda"
log_cli = True
# This is set above critical to prevent logger events from confusing output in CI
-log_level = 51
+log_level = 51
diff --git a/lambdas/indexer/test-requirements.txt b/lambdas/indexer/test-requirements.txt
index e75e43e319b..b8fc13134ea 100644
--- a/lambdas/indexer/test-requirements.txt
+++ b/lambdas/indexer/test-requirements.txt
@@ -5,4 +5,5 @@ pluggy==0.9
py==1.10.0
pytest==4.4.0
pytest-cov==2.6.1
+pytest-env==0.6.2
responses==0.10.14
diff --git a/lambdas/indexer/test/test_index.py b/lambdas/indexer/test/test_index.py
index c53e3bfa8de..05cc0c85a1f 100644
--- a/lambdas/indexer/test/test_index.py
+++ b/lambdas/indexer/test/test_index.py
@@ -23,7 +23,6 @@
import responses
from botocore import UNSIGNED
from botocore.client import Config
-from botocore.exceptions import ParamValidationError
from botocore.stub import Stubber
from dateutil.tz import tzutc
from document_queue import EVENT_PREFIX, RetryError
@@ -979,7 +978,7 @@ def test_index_if_package_select_stats_fail(self, append_mock, select_meta_mock,
)
select_meta_mock.assert_called_once_with(self.s3_client, bucket, manifest_key)
- select_stats_mock.assert_called_once_with(self.s3_client, bucket, manifest_key)
+ select_stats_mock.assert_called_once_with(bucket, manifest_key)
append_mock.assert_called_once_with({
"_index": bucket + PACKAGE_INDEX_SUFFIX,
"_id": key,
@@ -1023,7 +1022,7 @@ def test_index_if_package(self, append_mock, select_meta_mock, select_stats_mock
)
select_meta_mock.assert_called_once_with(self.s3_client, bucket, manifest_key)
- select_stats_mock.assert_called_once_with(self.s3_client, bucket, manifest_key)
+ select_stats_mock.assert_called_once_with(bucket, manifest_key)
append_mock.assert_called_once_with({
"_index": bucket + PACKAGE_INDEX_SUFFIX,
"_id": key,
@@ -1182,51 +1181,6 @@ def test_extension_overrides(self):
assert self._get_contents('foo.txt', '.txt') == ""
assert self._get_contents('foo.ipynb', '.ipynb') == ""
- @pytest.mark.xfail(
- raises=ParamValidationError,
- reason="boto bug https://github.com/boto/botocore/issues/1621",
- strict=True,
- )
- def test_stub_select_object_content(self):
- """Demonstrate that mocking S3 select with boto3 is broken"""
- sha_hash = "50f4d0fc2c22a70893a7f356a4929046ce529b53c1ef87e28378d92b884691a5"
- manifest_key = f"{MANIFEST_PREFIX_V1}{sha_hash}"
- # this SHOULD work, but due to botocore bugs it does not
- self.s3_stubber.add_response(
- method="select_object_content",
- service_response={
- "ResponseMetadata": ANY,
- # it is sadly not possible to mock S3 select responses because
- # boto incorrectly believes "Payload"'s value should be a dict
- # but it's really an iterable in realworld code
- # see https://github.com/boto/botocore/issues/1621
- "Payload": [
- {
- "Stats": {}
- },
- {
- "Records": {
- "Payload": json.dumps(MANIFEST_DATA).encode(),
- },
- },
- {
- "End": {}
- },
- ]
- },
- expected_params={
- "Bucket": "test-bucket",
- "Key": manifest_key,
- "Expression": index.SELECT_PACKAGE_META,
- "ExpressionType": "SQL",
- "InputSerialization": {
- 'JSON': {'Type': 'LINES'},
- 'CompressionType': 'NONE'
- },
- "OutputSerialization": {'JSON': {'RecordDelimiter': '\n'}}
- }
- )
-
def test_synthetic_copy_event(self):
"""check synthetic ObjectCreated:Copy event vs organic obtained on 26-May-2020
(bucket versioning on)
diff --git a/lambdas/pkgselect/.python-version b/lambdas/pkgselect/.python-version
deleted file mode 100644
index cc1923a40b1..00000000000
--- a/lambdas/pkgselect/.python-version
+++ /dev/null
@@ -1 +0,0 @@
-3.8
diff --git a/lambdas/pkgselect/__init__.py b/lambdas/pkgselect/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/lambdas/pkgselect/index.py b/lambdas/pkgselect/index.py
deleted file mode 100644
index 5840cbe8c53..00000000000
--- a/lambdas/pkgselect/index.py
+++ /dev/null
@@ -1,274 +0,0 @@
-"""
-Provide a virtual-file-system view of a package's logical keys.
-"""
-
-import asyncio
-import dataclasses
-import functools
-import json
-import typing as T
-
-import boto3
-import pandas as pd
-
-from t4_lambda_shared.utils import query_manifest_content, sql_escape
-
-
-async def run_async(fn, executor=None, loop=None):
- if loop is None:
- loop = asyncio.get_running_loop()
- return await loop.run_in_executor(executor, fn)
-
-
-class PkgselectException(Exception):
- def __str__(self):
- s = self.__class__.__name__
- if self.args:
- s = f"{s}: {self.args[0]}"
- return s
-
-
-class BadInputParameters(PkgselectException):
- pass
-
-
-class AccessDenied(PkgselectException):
- pass
-
-
-class NotFound(PkgselectException):
- pass
-
-
-def validate(condition: T.Any, message: str):
- if not condition:
- raise BadInputParameters(message)
-
-
-def file_list_to_folder(df: pd.DataFrame, limit: int, offset: int) -> dict:
- """
- Post process a set of logical keys to return only the top-level folder view.
- """
- if {'physical_key', 'logical_key', 'size'}.issubset(df.columns):
- groups = df.groupby(df.logical_key.str.extract('([^/]+/?).*')[0], dropna=True)
- folder = groups.agg(
- size=('size', 'sum'),
- physical_key=('physical_key', 'first')
- )
- folder.reset_index(inplace=True) # move the logical_key from the index to column[0]
- folder.rename(columns={0: 'logical_key'}, inplace=True) # name the new column
-
- # Sort to ensure consistent paging
- folder.sort_values(by=['logical_key'], inplace=True)
-
- # Page response (folders and files) based on limit & offset
- total_results = len(folder.index)
- folder = folder.iloc[offset:offset+limit]
-
- # Do not return physical_key for prefixes
- prefixes = folder[folder.logical_key.str.contains('/')].drop(
- ['physical_key'],
- axis=1
- ).to_dict(orient='records')
- objects = folder[~folder.logical_key.str.contains('/')].to_dict(orient='records')
- else:
- # df might not have the expected columns if either:
- # (1) the package is empty (has zero package entries) or,
- # (2) zero package entries match the prefix filter.
- # In either case, the folder view is empty.
- prefixes = []
- objects = []
- total_results = 0
-
- return dict(
- total=total_results,
- prefixes=prefixes,
- objects=objects,
- )
-
-
-@functools.lru_cache(maxsize=None)
-def get_s3_client():
- return boto3.client("s3")
-
-
-async def select(bucket: str, key: str, stmt: str):
- s3 = get_s3_client()
- try:
- return await run_async(functools.partial(
- query_manifest_content,
- s3,
- bucket=bucket,
- key=key,
- sql_stmt=stmt,
- ))
- except (s3.exceptions.NoSuchKey, s3.exceptions.NoSuchBucket):
- raise NotFound
- except s3.exceptions.ClientError as ex:
- if ex.response.get("Error", {}).get("Code") == "AccessDenied":
- raise AccessDenied
- raise ex
-
-
-async def select_meta(bucket: str, manifest: str, path: T.Optional[str] = None) -> dict:
- """
- Fetch package-level, directory-level or object-level metadata
- """
- if path:
- sql_stmt = f"SELECT s.meta FROM s3object s WHERE s.logical_key = '{sql_escape(path)}' LIMIT 1"
- else:
- sql_stmt = "SELECT s.* FROM s3object s WHERE s.logical_key is NULL LIMIT 1"
-
- result = await select(bucket, manifest, sql_stmt)
- return json.load(result) if result else {}
-
-
-@dataclasses.dataclass
-class FileView:
- physical_key: str
- size: int
- hash: str
- meta: T.Optional[dict]
-
-
-async def file_view(bucket: str, manifest: str, path: str) -> T.Optional[FileView]:
- """
- Get details of a single file in the package.
- """
- validate(
- isinstance(bucket, str) and bucket,
- f"file_view: bucket must be a non-empty string (given: {bucket!r})",
- )
- validate(
- isinstance(manifest, str) and manifest,
- f"file_view: manifest must be a non-empty string (given: {manifest!r})",
- )
- validate(
- isinstance(path, str) and path,
- f"file_view: path must be a non-empty string (given: {path!r})",
- )
-
- details = await select(
- bucket,
- manifest,
- f"""
- SELECT s.physical_keys[0] as physical_key, s."size", s.hash."value" as hash, s.meta
- FROM s3object s
- WHERE s.logical_key = '{sql_escape(path)}'
- LIMIT 1
- """,
- )
- return FileView(**json.load(details)) if details is not None else None
-
-
-@dataclasses.dataclass
-class DirView:
- total: int
- prefixes: T.List[dict] # {logical_key: str, size: float}
- objects: T.List[dict] # {logical_key: str, size: float, physical_key: str}
- meta: dict
-
-
-async def dir_view(
- bucket: str,
- manifest: str,
- path: T.Optional[str],
- limit: T.Optional[int] = None,
- offset: T.Optional[int] = None,
-) -> DirView:
- validate(
- isinstance(bucket, str) and bucket,
- f"dir_view: bucket must be a non-empty string (given: {bucket!r})",
- )
- validate(
- isinstance(manifest, str) and manifest,
- f"dir_view: manifest must be a non-empty string (given: {manifest!r})",
- )
- validate(
- path is None or isinstance(path, str),
- f"dir_view: path must be a string if provided (given: {path!r})",
- )
- validate(
- limit is None or isinstance(limit, int) and limit > 0,
- f"dir_view: limit must be a positive int if provided (given: {limit!r})",
- )
- validate(
- offset is None or isinstance(offset, int) and offset >= 0,
- f"dir_view: offset must be a non-negative int if provided (given: {offset!r})",
- )
-
- if limit is None:
- limit = 1000
- if offset is None:
- offset = 0
-
- path = path.rstrip("/")
- if path:
- path += "/"
-
- meta = asyncio.create_task(select_meta(bucket, manifest, path))
-
- # Call s3 select to fetch only logical keys matching the desired prefix (folder path)
- prefix_length = len(path) if path is not None else 0
- sql_stmt = \
- f"""
- SELECT
- SUBSTRING(s.logical_key, {prefix_length + 1}) as logical_key,
- s."size",
- s.physical_keys[0] as physical_key
- FROM s3object s
- """
-
- if path:
- sql_stmt += f" WHERE SUBSTRING(s.logical_key, 1, {prefix_length}) = '{sql_escape(path)}'"
-
- result = await select(bucket, manifest, sql_stmt)
-
- # Parse the response into a logical folder view
- if result is not None:
- df = pd.read_json(
- result,
- lines=True,
- dtype=dict(logical_key="string", physical_key="string"),
- )
- else:
- df = pd.DataFrame()
-
- return DirView(
- **file_list_to_folder(df, limit, offset),
- meta=await meta,
- )
-
-
-actions = {
- "file": file_view,
- "dir": dir_view,
-}
-
-
-def lambda_handler(evt, _ctx):
- """
- Parse a manifest to return a folder-like view of its contents (logical keys).
- Payload format:
- bucket: str
- manifest: str
- action: see actions mapping
- params: see *_view functions
- Returns: {result} or {error} (see *_view functions for result format)
- """
- try:
- action = evt.get("action")
- validate(
- action in actions,
- f"action must be one of: {', '.join(actions)} (given: {action!r})",
- )
-
- result = asyncio.run(actions[action](
- evt.get("bucket"),
- evt.get("manifest"),
- **evt.get("params", {}),
- ))
- return {"result": dataclasses.asdict(result) if result is not None else None}
-
- except PkgselectException as ex:
- return {"error": str(ex)}
diff --git a/lambdas/pkgselect/requirements.txt b/lambdas/pkgselect/requirements.txt
deleted file mode 100644
index 86849527514..00000000000
--- a/lambdas/pkgselect/requirements.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-boto3==1.17.100
-botocore==1.20.100
-jmespath==0.10.0
-numpy==1.22.0
-pandas==1.1.0
-python-dateutil==2.8.2
-pytz==2023.3
-s3transfer==0.4.2
-six==1.16.0
-urllib3==1.26.19
diff --git a/lambdas/pkgselect/setup.py b/lambdas/pkgselect/setup.py
deleted file mode 100644
index 1a58f98d407..00000000000
--- a/lambdas/pkgselect/setup.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from setuptools import setup
-
-setup(
- name='quilt3_package_browse',
- version='0.0.1',
- py_modules=['index'],
-)
diff --git a/lambdas/pkgselect/test-requirements.txt b/lambdas/pkgselect/test-requirements.txt
deleted file mode 100644
index c6d95fdf08a..00000000000
--- a/lambdas/pkgselect/test-requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-coverage==5.5
-pytest==4.3.0
-pytest-cov==2.6.1
-responses==0.10.5
diff --git a/lambdas/pkgselect/test/__init__.py b/lambdas/pkgselect/test/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/lambdas/pkgselect/test/test_pkgselect.py b/lambdas/pkgselect/test/test_pkgselect.py
deleted file mode 100644
index 2be9be05e73..00000000000
--- a/lambdas/pkgselect/test/test_pkgselect.py
+++ /dev/null
@@ -1,537 +0,0 @@
-"""
-Test functions for pkgselect endpoint
-"""
-
-import json
-import os
-from unittest import TestCase, skip
-from unittest.mock import patch
-
-import boto3
-import pandas as pd
-import responses
-
-from t4_lambda_shared.utils import buffer_s3response, read_body
-
-from .. import index as pkgselect
-
-
-@skip("TODO: fix tests")
-class TestPackageSelect(TestCase):
- """
- Unit tests for the PackageSelect API endpoint.
- """
-
- def make_s3response(self, payload_bytes):
- """
- Generate a mock s3 select response
- """
- return {
- 'Payload': [
- {
- 'Records': {
- 'Payload': payload_bytes
- }
- },
- {
- 'Progress': {
- 'Details': {
- 'BytesScanned': 123,
- 'BytesProcessed': 123,
- 'BytesReturned': 123
- }
- }
- },
- {
- 'Stats': {
- 'Details': {
- 'BytesScanned': 123,
- 'BytesProcessed': 123,
- 'BytesReturned': 123
- }
- }
- },
- {
- 'End': {}
- }
- ]
- }
-
- def make_s3response_empty(self):
- """
- Generate a mock s3 select response
- """
- return {
- 'Payload': [
- {
- 'Stats': {
- 'Details': {
- 'BytesScanned': 123,
- 'BytesProcessed': 123,
- 'BytesReturned': 0
- }
- }
- },
- {
- 'End': {}
- }
- ]
- }
-
- def make_manifest_query(self, logical_keys):
- entries = []
- for key in logical_keys:
- entry = dict(
- logical_key=key,
- physical_key=f"{key}?versionid=1234",
- size=100
- )
- entries.append(json.dumps(entry))
- jsonl = "\n".join(entries)
- streambytes = jsonl.encode()
-
- return self.make_s3response(streambytes)
-
- def setUp(self):
- """
- Mocks to tests calls to S3 Select
- """
-
- logical_keys = [
- "foo.csv",
- "bar/file1.txt",
- "bar/file2.txt",
- "bar/baz/file3.txt",
- "bar/baz/file4.txt"
- ]
-
- manifest_row = dict(
- logical_key="bar/file1.txt",
- physical_keys=["s3://test-bucket/bar/file1.txt"],
- size=1234,
- hash={"type": "SHA256", "value": "0123456789ABCDEF"},
- meta={}
- )
- detailbytes = json.dumps(manifest_row).encode()
-
- self.s3response = self.make_manifest_query(logical_keys)
- self.s3response_detail = self.make_s3response(detailbytes)
- self.s3response_detail_empty = self.make_s3response_empty()
- self.s3response_incomplete = {
- 'Payload': [
- {
- 'Records': {
- 'Payload': self.s3response['Payload'][0]['Records']['Payload']
- }
- },
- {
- 'Stats': {
- 'Details': {
- 'BytesScanned': 123,
- 'BytesProcessed': 123,
- 'BytesReturned': 123
- }
- }
- }
- ]
- }
-
- meta = {
- "version": "v0",
- "user_meta": {
- "somefield": "somevalue"
- },
- "message": "Commit message"
- }
- metabytes = json.dumps(meta).encode()
- self.s3response_meta = self.make_s3response(metabytes)
-
- requests_mock = responses.RequestsMock(assert_all_requests_are_fired=False)
- requests_mock.start()
- self.addCleanup(requests_mock.stop)
-
- env_patcher = patch.dict(os.environ, {
- 'AWS_ACCESS_KEY_ID': 'test_key',
- 'AWS_SECRET_ACCESS_KEY': 'test_secret',
- })
- env_patcher.start()
- self.addCleanup(env_patcher.stop)
-
- def test_browse_top_level(self):
- """
- Test that the S3 Select response is parsed
- into the correct top-level folder view.
- """
- df = pd.read_json(buffer_s3response(self.s3response), lines=True)
- assert isinstance(df, pd.DataFrame)
-
- folder = pkgselect.file_list_to_folder(df, 1000, 0)
- assert len(folder['prefixes']) == 1
- assert len(folder['objects']) == 1
- assert folder['objects'][0]['logical_key'] == 'foo.csv'
- assert folder['prefixes'][0]['logical_key'] == 'bar/'
-
- def test_limit(self):
- """
- Test that the S3 Select response is parsed
- into the correct top-level folder view.
- """
- df = pd.read_json(buffer_s3response(self.s3response), lines=True)
- assert isinstance(df, pd.DataFrame)
-
- folder = pkgselect.file_list_to_folder(df, 1, 0)
- assert len(folder['prefixes']) == 1
- assert len(folder['objects']) == 0
- assert folder['prefixes'][0]['logical_key'] == 'bar/'
-
- def test_offset(self):
- """
- Test that the S3 Select response is parsed
- into the correct top-level folder view.
- """
- df = pd.read_json(buffer_s3response(self.s3response), lines=True)
- assert isinstance(df, pd.DataFrame)
-
- folder = pkgselect.file_list_to_folder(df, 1000, 1)
- assert len(folder['prefixes']) == 0
- assert len(folder['objects']) == 1
- assert folder['objects'][0]['logical_key'] == 'foo.csv'
-
- def test_browse_subfolder(self):
- """
- Test that the S3 Select response is parsed
- into the correct sub-folder view.
- """
- prefix = "bar/"
- df = pd.read_json(buffer_s3response(self.s3response), lines=True)
- assert isinstance(df, pd.DataFrame)
- filtered_df = df[df['logical_key'].str.startswith(prefix)]
- stripped = filtered_df['logical_key'].str.slice(start=len(prefix))
- stripped_df = stripped.to_frame('logical_key')
- s3_df = pd.concat(
- [stripped_df['logical_key'], filtered_df['size'], filtered_df['physical_key']],
- axis=1,
- keys=['logical_key', 'size', 'physical_key']
- )
-
- folder = pkgselect.file_list_to_folder(s3_df, 1000, 0)
- assert len(folder['prefixes']) == 1
- assert len(folder['objects']) == 2
- object_keys = [obj['logical_key'] for obj in folder['objects']]
- assert "file1.txt" in object_keys
- assert "file2.txt" in object_keys
- assert folder['prefixes'][0]['logical_key'] == "baz/"
-
- def test_browse_subsubfolder(self):
- """
- Test that the S3 Select response is parsed
- into the correct sub-sub-folder view.
- """
- prefix = "bar/baz/"
- df = pd.read_json(buffer_s3response(self.s3response), lines=True)
- assert isinstance(df, pd.DataFrame)
- filtered_df = df[df['logical_key'].str.startswith(prefix)]
- stripped = filtered_df['logical_key'].str.slice(start=len(prefix))
- stripped_df = stripped.to_frame('logical_key')
- s3_df = pd.concat(
- [stripped_df['logical_key'], filtered_df['size'], filtered_df['physical_key']],
- axis=1,
- keys=['logical_key', 'size', 'physical_key']
- )
- folder = pkgselect.file_list_to_folder(s3_df, 1000, 0)
- assert "objects" in folder
- assert "prefixes" in folder
- assert not folder['prefixes']
- assert len(folder['objects']) == 2
- object_keys = [obj['logical_key'] for obj in folder['objects']]
- assert "file3.txt" in object_keys
- assert "file4.txt" in object_keys
-
- def test_folder_view(self):
- """
- End-to-end test (folder view without a prefix)
- """
- bucket = "bucket"
- key = ".quilt/packages/manifest_hash"
- params = dict(
- bucket=bucket,
- manifest=key,
- action="dir",
- )
-
- expected_args = {
- 'Bucket': bucket,
- 'Key': key,
- 'Expression': "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s",
- 'ExpressionType': 'SQL',
- 'InputSerialization': {
- 'CompressionType': 'NONE',
- 'JSON': {'Type': 'LINES'}
- },
- 'OutputSerialization': {'JSON': {'RecordDelimiter': '\n'}},
- }
-
- mock_s3 = boto3.client('s3')
- with patch.object(
- mock_s3,
- 'select_object_content',
- side_effect=[
- self.s3response,
- self.s3response_meta,
- ]
- ) as client_patch, patch('boto3.Session.client', return_value=mock_s3):
- response = pkgselect.lambda_handler(params, None)
- print(response)
- folder = json.loads(read_body(response))['result']
- assert len(folder['prefixes']) == 1
- assert len(folder['objects']) == 1
- assert folder['objects'][0]['logical_key'] == 'foo.csv'
- assert folder['prefixes'][0]['logical_key'] == 'bar/'
-
- def test_folder_view_paging(self):
- """
- End-to-end test (top-level folder view with a limit & offset)
- """
- bucket = "bucket"
- key = ".quilt/packages/manifest_hash"
- params = dict(
- bucket=bucket,
- manifest=key,
- action="dir",
- params={
- "path": "paging_test/",
- "limit": 10,
- "offset": 10,
- },
- )
-
- expected_args = {
- 'Bucket': bucket,
- 'Key': key,
- 'Expression': "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s",
- 'ExpressionType': 'SQL',
- 'InputSerialization': {
- 'CompressionType': 'NONE',
- 'JSON': {'Type': 'LINES'}
- },
- 'OutputSerialization': {'JSON': {'RecordDelimiter': '\n'}},
- }
-
- paging_logical_keys = [
- f"f{i:03d}.csv" for i in range(1000)
- ]
- s3response_paging = self.make_manifest_query(paging_logical_keys)
-
- mock_s3 = boto3.client('s3')
- with patch.object(
- mock_s3,
- 'select_object_content',
- side_effect=[
- s3response_paging,
- self.s3response_meta
- ]
- ) as client_patch, patch(
- 'boto3.Session.client',
- return_value=mock_s3
- ):
- response = pkgselect.lambda_handler(params, None)
- print(response)
- folder = json.loads(read_body(response))['result']
- assert len(folder['prefixes']) == 0
- assert len(folder['objects']) == 10
- assert folder['total'] == 1000
- assert folder['objects'][0]['logical_key'] == 'f010.csv'
-
- def test_detail_view(self):
- """
- End-to-end test (detail view)
- """
- bucket = "bucket"
- key = ".quilt/packages/manifest_hash"
- logical_key = "bar/file1.txt"
- params = dict(
- bucket=bucket,
- manifest=key,
- action="file",
- params={"path": logical_key},
- )
-
- expected_sql = "SELECT s.* FROM s3object s WHERE s.logical_key = 'bar/file1.txt' LIMIT 1"
- expected_args = {
- 'Bucket': bucket,
- 'Key': key,
- 'Expression': "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s",
- 'ExpressionType': 'SQL',
- 'InputSerialization': {
- 'CompressionType': 'NONE',
- 'JSON': {'Type': 'LINES'}
- },
- 'OutputSerialization': {'JSON': {'RecordDelimiter': '\n'}},
- }
-
- mock_s3 = boto3.client('s3')
- with patch.object(
- mock_s3,
- 'select_object_content',
- return_value=self.s3response_detail
- ) as client_patch, patch(
- 'boto3.Session.client',
- return_value=mock_s3
- ):
- response = pkgselect.lambda_handler(params, None)
- print(response)
- json.loads(read_body(response))['result']
-
- def test_non_existing_logical_key(self):
- """
- End-to-end test (detail view)
- """
- bucket = "bucket"
- key = ".quilt/packages/manifest_hash"
- logical_key = "non-existing.txt"
- params = dict(
- bucket=bucket,
- manifest=key,
- action="file",
- params={"path": logical_key},
- )
-
- expected_sql = f"SELECT s.* FROM s3object s WHERE s.logical_key = '{logical_key}' LIMIT 1"
- expected_args = {
- 'Bucket': bucket,
- 'Key': key,
- 'Expression': "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s",
- 'ExpressionType': 'SQL',
- 'InputSerialization': {
- 'CompressionType': 'NONE',
- 'JSON': {'Type': 'LINES'}
- },
- 'OutputSerialization': {'JSON': {'RecordDelimiter': '\n'}},
- }
-
- mock_s3 = boto3.client('s3')
- with patch.object(
- mock_s3,
- 'select_object_content',
- return_value=self.s3response_detail_empty
- ) as client_patch, patch(
- 'boto3.Session.client',
- return_value=mock_s3
- ):
- response = pkgselect.lambda_handler(params, None)
- print(response)
- assert response['statusCode'] == 404
-
- def test_non_string_keys(self):
- """
- End-to-end test (folder view without a prefix)
- """
- bucket = "bucket"
- key = ".quilt/packages/manifest_hash"
- params = dict(
- bucket=bucket,
- manifest=key,
- action="dir",
- )
-
- expected_args = {
- 'Bucket': bucket,
- 'Key': key,
- 'Expression': "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s",
- 'ExpressionType': 'SQL',
- 'InputSerialization': {
- 'CompressionType': 'NONE',
- 'JSON': {'Type': 'LINES'}
- },
- 'OutputSerialization': {'JSON': {'RecordDelimiter': '\n'}},
- }
-
- # Return a response with keys that are not strings (integers here)
- # The important test case is where all members of a column are
- # non-string
- logical_keys = [
- "1",
- "2",
- "3",
- ]
- entries = []
- for key in logical_keys:
- entry = dict(
- logical_key=key,
- physical_key=key,
- size=100
- )
- entries.append(json.dumps(entry))
- jsonl = "\n".join(entries)
- streambytes = jsonl.encode()
- non_string_s3response = self.make_s3response(streambytes)
-
- mock_s3 = boto3.client('s3')
- with patch.object(
- mock_s3,
- 'select_object_content',
- side_effect=[
- non_string_s3response,
- self.s3response_meta
- ]
- ) as client_patch, patch(
- 'boto3.Session.client',
- return_value=mock_s3
- ):
- response = pkgselect.lambda_handler(params, None)
- print(response)
- folder = json.loads(read_body(response))['result']
- assert not folder['prefixes']
- assert len(folder['objects']) == 3
- assert folder['objects'][0]['logical_key'] == '1'
- assert folder['objects'][1]['logical_key'] == '2'
- assert folder['objects'][2]['logical_key'] == '3'
-
- def test_empty_manifest(self):
- """
- End-to-end test (folder view without a prefix) for an
- empty package manifest
- """
- bucket = "bucket"
- key = ".quilt/packages/manifest_hash"
- params = dict(
- bucket=bucket,
- manifest=key,
- action="dir",
- )
-
- expected_args = {
- 'Bucket': bucket,
- 'Key': key,
- 'Expression': "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s",
- 'ExpressionType': 'SQL',
- 'InputSerialization': {
- 'CompressionType': 'NONE',
- 'JSON': {'Type': 'LINES'}
- },
- 'OutputSerialization': {'JSON': {'RecordDelimiter': '\n'}},
- }
-
- # Empty manifest
- jsonl = '{"version": "v0", "message": null}'
- streambytes = jsonl.encode()
- non_string_s3response = self.make_s3response(streambytes)
-
- mock_s3 = boto3.client('s3')
- with patch.object(
- mock_s3,
- 'select_object_content',
- side_effect=[
- non_string_s3response,
- self.s3response_meta
- ]
- ) as client_patch, patch(
- 'boto3.Session.client',
- return_value=mock_s3
- ):
- response = pkgselect.lambda_handler(params, None)
- print(response)
- folder = json.loads(read_body(response))['result']
- assert not folder['prefixes']
- assert not folder['objects']
- assert folder['total'] == 0
diff --git a/lambdas/s3select/.python-version b/lambdas/s3select/.python-version
deleted file mode 100644
index cc1923a40b1..00000000000
--- a/lambdas/s3select/.python-version
+++ /dev/null
@@ -1 +0,0 @@
-3.8
diff --git a/lambdas/s3select/requirements.txt b/lambdas/s3select/requirements.txt
deleted file mode 100644
index d60d4aa053c..00000000000
--- a/lambdas/s3select/requirements.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-attrs==19.1.0
-botocore==1.21.44
-certifi==2024.7.4
-chardet==3.0.4
-docutils==0.14
-idna==3.7
-jmespath==0.9.4
-jsonschema==3.0.1
-pyrsistent==0.15.3
-python-dateutil==2.8.0
-requests==2.32.0
-six==1.12.0
-urllib3==1.26.19
diff --git a/lambdas/s3select/setup.py b/lambdas/s3select/setup.py
deleted file mode 100644
index 0e4b32174d8..00000000000
--- a/lambdas/s3select/setup.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from setuptools import find_packages, setup
-
-setup(
- name='t4_lambda_s3select',
- version='0.0.1',
- packages=find_packages(where="src"),
- package_dir={"": "src"},
-)
diff --git a/lambdas/s3select/src/t4_lambda_s3select/__init__.py b/lambdas/s3select/src/t4_lambda_s3select/__init__.py
deleted file mode 100644
index 2a13e51f1cb..00000000000
--- a/lambdas/s3select/src/t4_lambda_s3select/__init__.py
+++ /dev/null
@@ -1,75 +0,0 @@
-"""
-Sign S3 select requests (because S3 select does not allow anonymous access).
-
-The implementation doesn't care what the request is, and just signs it using
-the current AWS credentials.
-"""
-import os
-from urllib.parse import urlencode
-
-import requests
-from botocore.auth import SigV4Auth
-from botocore.awsrequest import AWSRequest
-from botocore.session import Session
-
-from t4_lambda_shared.decorator import api
-from t4_lambda_shared.utils import get_default_origins
-
-SERVICE = 's3'
-REGION = os.environ.get('AWS_REGION', '')
-
-REQUEST_HEADERS_TO_FORWARD = {'content-type', 'cache-control', 'pragma', 'x-amz-content-sha256', 'x-amz-user-agent'}
-REQUEST_HEADERS_TO_SIGN = {'host', 'x-amz-content-sha256', 'x-amz-user-agent'}
-RESPONSE_HEADERS_TO_FORWARD = {'content-type'}
-
-session = requests.Session()
-
-
-@api(cors_origins=get_default_origins())
-def lambda_handler(request):
- """
- Sign the request and forward it to S3.
- """
- if not (request.method == 'POST' and 'select' in request.args):
- return requests.codes.bad_request, 'Not an S3 select', {'content-type': 'text/plain'}
-
- bucket, key = request.pathParameters['proxy'].split('/', 1)
- host = f'{bucket}.s3.amazonaws.com'
-
- # Make an unsigned HEAD request to test anonymous access.
-
- object_url = f'https://{host}/{key}'
- head_response = session.head(object_url)
- if not head_response.ok:
- return requests.codes.forbidden, 'Not allowed', {'content-type': 'text/plain'}
-
- # Sign the full S3 select request.
-
- url = f'{object_url}?{urlencode(request.args)}'
-
- headers = {k: v for k, v in request.headers.items() if k in REQUEST_HEADERS_TO_FORWARD}
- headers['host'] = host
-
- aws_request = AWSRequest(
- method=request.method,
- url=url,
- data=request.data,
- headers={k: v for k, v in headers.items() if k in REQUEST_HEADERS_TO_SIGN}
- )
- credentials = Session().get_credentials()
- auth = SigV4Auth(credentials, SERVICE, REGION)
- auth.add_auth(aws_request)
-
- headers.update(aws_request.headers)
-
- response = session.post(
- url=url,
- data=request.data, # Forward the POST data.
- headers=headers,
- )
-
- response_headers = {k: v for k, v in response.headers.items() if k in RESPONSE_HEADERS_TO_FORWARD}
- # Add a default content type to prevent API Gateway from setting it to application/json.
- response_headers.setdefault('content-type', 'application/octet-stream')
-
- return response.status_code, response.content, response_headers
diff --git a/lambdas/s3select/test-requirements.txt b/lambdas/s3select/test-requirements.txt
deleted file mode 100644
index 7e23afb8f36..00000000000
--- a/lambdas/s3select/test-requirements.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-atomicwrites==1.3.0
-importlib-metadata==0.18
-more-itertools==7.1.0
-pluggy==0.13.1
-py==1.10.0
-pyparsing==2.4.0
-pytest==4.3.0
-pytest-cov==2.6.1
-responses==0.10.6
-wcwidth==0.1.7
-zipp==3.19.1
diff --git a/lambdas/s3select/tests/__init__.py b/lambdas/s3select/tests/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/lambdas/s3select/tests/test_s3select.py b/lambdas/s3select/tests/test_s3select.py
deleted file mode 100644
index 13e61696508..00000000000
--- a/lambdas/s3select/tests/test_s3select.py
+++ /dev/null
@@ -1,104 +0,0 @@
-import os
-from base64 import b64decode
-from unittest import TestCase
-from unittest.mock import patch
-
-import responses
-
-import t4_lambda_s3select
-
-
-@patch('t4_lambda_s3select.REGION', 'us-east-1')
-class TestS3Select(TestCase):
- """Tests S3 Select"""
- def setUp(self):
- self.requests_mock = responses.RequestsMock(assert_all_requests_are_fired=False)
- self.requests_mock.start()
-
- self.env_patcher = patch.dict(os.environ, {
- 'AWS_ACCESS_KEY_ID': 'test_key',
- 'AWS_SECRET_ACCESS_KEY': 'test_secret',
- })
- self.env_patcher.start()
-
- def tearDown(self):
- self.env_patcher.stop()
- self.requests_mock.stop()
-
- @classmethod
- def _make_event(cls, path, query, headers, body):
- return {
- 'httpMethod': 'POST',
- 'path': f'/lambda/{path}',
- 'pathParameters': {
- 'proxy': path
- },
- 'queryStringParameters': query or None,
- 'headers': headers or None,
- 'body': body,
- 'isBase64Encoded': False,
- }
-
- def test_signature(self):
- url = 'https://bucket.s3.amazonaws.com/object.csv'
-
- self.requests_mock.add(
- responses.HEAD,
- url,
- status=200)
-
- def _callback(request):
- assert 'X-Amz-Date' in request.headers
- assert 'Authorization' in request.headers
- assert request.headers['content-type'] == 'application/octet-stream'
- assert request.headers['cache-control'] == 'no-cache'
- assert request.headers['pragma'] == 'no-cache'
- assert 'referer' not in request.headers
- return 200, {}, b'results'
-
- self.requests_mock.add_callback(
- responses.POST,
- url,
- _callback)
-
- query = {
- 'select': '',
- 'select-type': '2',
- }
- headers = {
- 'content-type': 'application/octet-stream',
- 'x-amz-content-sha256': '123456',
- 'x-amz-user-agent': 'test',
- 'cache-control': 'no-cache',
- 'pragma': 'no-cache',
- 'referer': 'http://example.com'
- }
- body = b's3 select request body'
-
- event = self._make_event('bucket/object.csv', query, headers, body)
- resp = t4_lambda_s3select.lambda_handler(event, None)
- assert resp['statusCode'] == 200
- assert resp['isBase64Encoded']
- assert b64decode(resp['body']) == b'results'
-
- def test_not_public(self):
- url = 'https://bucket.s3.amazonaws.com/object.csv'
-
- self.requests_mock.add(
- responses.HEAD,
- url,
- status=403)
-
- event = self._make_event('bucket/object.csv', {'select': None}, {}, b'test')
- resp = t4_lambda_s3select.lambda_handler(event, None)
- assert resp['statusCode'] == 403
-
- def test_bad_request(self):
- event = self._make_event('bucket/object.csv', {}, {}, b'test')
- resp = t4_lambda_s3select.lambda_handler(event, None)
- assert resp['statusCode'] == 400
-
- event = self._make_event('bucket/object.csv', {'select': None}, {}, b'test')
- event['httpMethod'] = 'PUT'
- resp = t4_lambda_s3select.lambda_handler(event, None)
- assert resp['statusCode'] == 400
diff --git a/lambdas/shared/t4_lambda_shared/utils.py b/lambdas/shared/t4_lambda_shared/utils.py
index 9fb161dc15b..8d94dc7cef8 100644
--- a/lambdas/shared/t4_lambda_shared/utils.py
+++ b/lambdas/shared/t4_lambda_shared/utils.py
@@ -2,7 +2,6 @@
Helper functions.
"""
import gzip
-import io
import json
import logging
import os
@@ -96,13 +95,6 @@ def read_body(resp):
return body
-class IncompleteResultException(Exception):
- """
- Exception indicating an incomplete response
- (e.g., from S3 Select)
- """
-
-
def sql_escape(s):
"""
Escape strings that might contain single quotes for use in Athena
@@ -110,60 +102,3 @@ def sql_escape(s):
"""
escaped = s or ""
return escaped.replace("'", "''")
-
-
-def buffer_s3response(s3response):
- """
- Read a streaming response (botocore.eventstream.EventStream) from s3 select
- into a BytesIO buffer
- """
- logger_ = logging.getLogger(LOGGER_NAME)
- response = io.BytesIO()
- end_event_received = False
- stats = None
- found_records = False
- for event in s3response['Payload']:
- if 'Records' in event:
- records = event['Records']['Payload']
- response.write(records)
- found_records = True
- elif 'Progress' in event:
- logger_.info("select progress: %s", event['Progress'].get('Details'))
- elif 'Stats' in event:
- logger_.info("select stats: %s", event['Stats'])
- elif 'End' in event:
- # End event indicates that the request finished successfully
- end_event_received = True
-
- if not end_event_received:
- raise IncompleteResultException("Error: Received an incomplete response from S3 Select.")
- response.seek(0)
- return response if found_records else None
-
-
-def query_manifest_content(
- s3_client: str,
- *,
- bucket: str,
- key: str,
- sql_stmt: str
-) -> io.BytesIO:
- """
- Call S3 Select to read only the logical keys from a
- package manifest that match the desired folder path
- prefix
- """
- logger_ = get_quilt_logger()
- logger_.debug("utils.py: manifest_select: %s", sql_stmt)
- response = s3_client.select_object_content(
- Bucket=bucket,
- Key=key,
- ExpressionType='SQL',
- Expression=sql_stmt,
- InputSerialization={
- 'JSON': {'Type': 'LINES'},
- 'CompressionType': 'NONE'
- },
- OutputSerialization={'JSON': {'RecordDelimiter': '\n'}}
- )
- return buffer_s3response(response)
diff --git a/lambdas/shared/tests/test_utils.py b/lambdas/shared/tests/test_utils.py
index b9a7a403db3..6cb0a4eea7d 100644
--- a/lambdas/shared/tests/test_utils.py
+++ b/lambdas/shared/tests/test_utils.py
@@ -7,16 +7,13 @@
from unittest import TestCase
from unittest.mock import patch
-import boto3
import pytest
from testfixtures import LogCapture
from t4_lambda_shared.utils import (
- IncompleteResultException,
get_available_memory,
get_default_origins,
make_json_response,
- query_manifest_content,
separated_env_to_iter,
)
@@ -143,76 +140,6 @@ def test_json_response(self):
assert json.loads(body) == {'foo': 'bar'}
assert headers == {'Content-Type': 'application/json', 'Content-Length': '123'}
- def test_call_s3select(self):
- """
- Test that parameters are correctly passed to
- S3 Select (without a prefix)
- """
- bucket = "bucket"
- key = ".quilt/packages/manifest_hash"
-
- expected_sql = "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s"
- expected_args = {
- 'Bucket': bucket,
- 'Key': key,
- 'Expression': expected_sql,
- 'ExpressionType': 'SQL',
- 'InputSerialization': {
- 'CompressionType': 'NONE',
- 'JSON': {'Type': 'LINES'}
- },
- 'OutputSerialization': {'JSON': {'RecordDelimiter': '\n'}},
- }
-
- mock_s3 = boto3.client('s3')
- with patch.object(
- mock_s3,
- 'select_object_content',
- return_value=self.s3response
- ) as patched:
- query_manifest_content(
- mock_s3,
- bucket=bucket,
- key=key,
- sql_stmt=expected_sql)
- patched.assert_called_once_with(**expected_args)
-
- def test_call_s3select_incomplete_response(self):
- """
- Test that an incomplete response from S3 Select is
- detected and an exception is raised.
- """
- bucket = "bucket"
- key = ".quilt/packages/manifest_hash"
-
- expected_sql = "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s"
- expected_args = {
- 'Bucket': bucket,
- 'Key': key,
- 'Expression': expected_sql,
- 'ExpressionType': 'SQL',
- 'InputSerialization': {
- 'CompressionType': 'NONE',
- 'JSON': {'Type': 'LINES'}
- },
- 'OutputSerialization': {'JSON': {'RecordDelimiter': '\n'}},
- }
-
- mock_s3 = boto3.client('s3')
- with patch.object(
- mock_s3,
- 'select_object_content',
- return_value=self.s3response_incomplete
- ) as patched:
- with self.assertRaises(IncompleteResultException):
- query_manifest_content(
- mock_s3,
- bucket=bucket,
- key=key,
- sql_stmt=expected_sql
- )
- patched.assert_called_once_with(**expected_args)
-
@pytest.mark.parametrize(
"level, call, message, expected, name",
diff --git a/lambdas/tabular_preview/requirements.txt b/lambdas/tabular_preview/requirements.txt
index c787be65d4d..b10ea779083 100644
--- a/lambdas/tabular_preview/requirements.txt
+++ b/lambdas/tabular_preview/requirements.txt
@@ -4,7 +4,7 @@
#
# pip-compile --output-file=requirements.txt ../shared/setup.py setup.py
#
-aiohttp==3.10.2
+aiohttp==3.10.11
# via fsspec
aiosignal==1.2.0
# via aiohttp
diff --git a/s3-proxy/Dockerfile b/s3-proxy/Dockerfile
index adf17c7a6a8..72fe0690fb6 100644
--- a/s3-proxy/Dockerfile
+++ b/s3-proxy/Dockerfile
@@ -1,4 +1,4 @@
-FROM amazonlinux:2023.6.20241031.0
+FROM amazonlinux:2023.6.20241111.0
MAINTAINER Quilt Data, Inc. contact@quiltdata.io
# Based on:
diff --git a/shared/graphql/schema.graphql b/shared/graphql/schema.graphql
index 0bb997e7809..ea342cd5806 100644
--- a/shared/graphql/schema.graphql
+++ b/shared/graphql/schema.graphql
@@ -222,6 +222,16 @@ type AccessCounts {
counts: [AccessCountForDate!]!
}
+type AccessCountsGroup {
+ ext: String!
+ counts: AccessCounts!
+}
+
+type BucketAccessCounts {
+ byExt(groups: Int): [AccessCountsGroup!]!
+ combined: AccessCounts!
+}
+
type PackageDir {
path: String!
metadata: JsonRecord
@@ -556,6 +566,9 @@ type Query {
searchMorePackages(after: String!, size: Int = 30): PackagesSearchMoreResult!
subscription: SubscriptionState!
+ bucketAccessCounts(bucket: String!, window: Int!): BucketAccessCounts
+ objectAccessCounts(bucket: String!, key: String!, window: Int!): AccessCounts
+
admin: AdminQueries! @admin
policies: [Policy!]! @admin