diff --git a/.github/workflows/deploy-lambdas.yaml b/.github/workflows/deploy-lambdas.yaml index cd248e39cd9..8cf561a973b 100644 --- a/.github/workflows/deploy-lambdas.yaml +++ b/.github/workflows/deploy-lambdas.yaml @@ -17,10 +17,8 @@ jobs: - indexer - pkgevents - pkgpush - - pkgselect - preview - s3hash - - s3select - status_reports - tabular_preview - transcode diff --git a/.github/workflows/py-ci.yml b/.github/workflows/py-ci.yml index e8ad615dea6..a20c8342c6f 100644 --- a/.github/workflows/py-ci.yml +++ b/.github/workflows/py-ci.yml @@ -160,10 +160,8 @@ jobs: - molecule - pkgevents - pkgpush - - pkgselect - preview - s3hash - - s3select - shared - status_reports - tabular_preview diff --git a/catalog/CHANGELOG.md b/catalog/CHANGELOG.md index 8758f53dd0a..25f09cb8347 100644 --- a/catalog/CHANGELOG.md +++ b/catalog/CHANGELOG.md @@ -17,6 +17,10 @@ where verb is one of ## Changes +- [Fixed] Athena: fix minor UI bugs ([#4232](https://github.com/quiltdata/quilt/pull/4232)) +- [Fixed] Show Athena query editor when no named queries ([#4230](https://github.com/quiltdata/quilt/pull/4230)) +- [Fixed] Fix some doc URLs in catalog ([#4205](https://github.com/quiltdata/quilt/pull/4205)) +- [Changed] S3 Select -> GQL API calls for getting access counts ([#4218](https://github.com/quiltdata/quilt/pull/4218)) - [Changed] Athena: improve loading state and errors visuals; fix minor bugs; alphabetize and persist selection in workgroups, catalog names and databases ([#4208](https://github.com/quiltdata/quilt/pull/4208)) - [Changed] Show stack release version in footer ([#4200](https://github.com/quiltdata/quilt/pull/4200)) - [Added] Selective package downloading ([#4173](https://github.com/quiltdata/quilt/pull/4173)) diff --git a/catalog/Dockerfile b/catalog/Dockerfile index a58b9f30e70..ad288b8fe8a 100644 --- a/catalog/Dockerfile +++ b/catalog/Dockerfile @@ -1,4 +1,4 @@ -FROM amazonlinux:2023.6.20241031.0 +FROM amazonlinux:2023.6.20241111.0 MAINTAINER Quilt Data, Inc. contact@quiltdata.io ENV LC_ALL=C.UTF-8 diff --git a/catalog/app/components/FileEditor/QuiltConfigEditor/BucketPreferences.tsx b/catalog/app/components/FileEditor/QuiltConfigEditor/BucketPreferences.tsx index 67737f681f5..9abbd53a7a4 100644 --- a/catalog/app/components/FileEditor/QuiltConfigEditor/BucketPreferences.tsx +++ b/catalog/app/components/FileEditor/QuiltConfigEditor/BucketPreferences.tsx @@ -12,7 +12,10 @@ function Header() { return ( Configuration for Catalog UI: show and hide features, set default values. See{' '} - + the docs diff --git a/catalog/app/components/FileEditor/QuiltConfigEditor/Workflows.tsx b/catalog/app/components/FileEditor/QuiltConfigEditor/Workflows.tsx index 23175e61171..5288698cdeb 100644 --- a/catalog/app/components/FileEditor/QuiltConfigEditor/Workflows.tsx +++ b/catalog/app/components/FileEditor/QuiltConfigEditor/Workflows.tsx @@ -18,7 +18,7 @@ function Header() { return ( Configuration for data quality workflows. See{' '} - + the docs diff --git a/catalog/app/containers/Admin/Status/Status.tsx b/catalog/app/containers/Admin/Status/Status.tsx index 79cd6360655..c91d6b74d2c 100644 --- a/catalog/app/containers/Admin/Status/Status.tsx +++ b/catalog/app/containers/Admin/Status/Status.tsx @@ -54,7 +54,10 @@ export default function Status() { GxP and other compliance regimes. - + Learn more {' '} or contact sales. diff --git a/catalog/app/containers/Admin/UsersAndRoles/SsoConfig.tsx b/catalog/app/containers/Admin/UsersAndRoles/SsoConfig.tsx index b740034202e..26f292af45f 100644 --- a/catalog/app/containers/Admin/UsersAndRoles/SsoConfig.tsx +++ b/catalog/app/containers/Admin/UsersAndRoles/SsoConfig.tsx @@ -121,7 +121,10 @@ function Form({ Learn more about{' '} - + SSO permissions mapping . diff --git a/catalog/app/containers/Bucket/CodeSamples/Dir.tsx b/catalog/app/containers/Bucket/CodeSamples/Dir.tsx index bfd77c042be..5b3491691d1 100644 --- a/catalog/app/containers/Bucket/CodeSamples/Dir.tsx +++ b/catalog/app/containers/Bucket/CodeSamples/Dir.tsx @@ -14,9 +14,9 @@ const TEMPLATES = { dedent` import quilt3 as q3 b = q3.Bucket("s3://${bucket}") - # List files [[${docs}/api-reference/bucket#bucket.ls]] + # List files [[${docs}/quilt-python-sdk-developers/api-reference/bucket#bucket.ls]] b.ls("${path}") - # Download [[${docs}/api-reference/bucket#bucket.fetch]] + # Download [[${docs}/quilt-python-sdk-developers/api-reference/bucket#bucket.fetch]] b.fetch("${path}", "./${dest}") `, CLI: (bucket: string, path: string, dest: string) => diff --git a/catalog/app/containers/Bucket/CodeSamples/File.tsx b/catalog/app/containers/Bucket/CodeSamples/File.tsx index f1eea186809..006609ad393 100644 --- a/catalog/app/containers/Bucket/CodeSamples/File.tsx +++ b/catalog/app/containers/Bucket/CodeSamples/File.tsx @@ -14,7 +14,7 @@ const TEMPLATES = { dedent` import quilt3 as q3 b = q3.Bucket("s3://${bucket}") - # Download [[${docs}/api-reference/bucket#bucket.fetch]] + # Download [[${docs}/quilt-python-sdk-developers/api-reference/bucket#bucket.fetch]] b.fetch("${path}", "./${basename(path)}") `, CLI: (bucket: string, path: string) => diff --git a/catalog/app/containers/Bucket/CodeSamples/Package.tsx b/catalog/app/containers/Bucket/CodeSamples/Package.tsx index 7dbce6e97ee..c2763b3d750 100644 --- a/catalog/app/containers/Bucket/CodeSamples/Package.tsx +++ b/catalog/app/containers/Bucket/CodeSamples/Package.tsx @@ -19,16 +19,16 @@ const TEMPLATES = { const hashPy = hashDisplay && `, top_hash="${hashDisplay}"` return dedent` import quilt3 as q3 - # Browse [[${docs}/api-reference/package#package.browse]] + # Browse [[${docs}/quilt-python-sdk-developers/api-reference/package#package.browse]] p = q3.Package.browse("${name}"${hashPy}, registry="s3://${bucket}") - # make changes to package adding individual files [[${docs}/api-reference/package#package.set]] + # make changes to package adding individual files [[${docs}/quilt-python-sdk-developers/api-reference/package#package.set]] p.set("data.csv", "data.csv") - # or whole directories [[${docs}/api-reference/package#package.set_dir]] + # or whole directories [[${docs}/quilt-python-sdk-developers/api-reference/package#package.set_dir]] p.set_dir("subdir", "subdir") - # and push changes [[${docs}/api-reference/package#package.push]] + # and push changes [[${docs}/quilt-python-sdk-developers/api-reference/package#package.push]] p.push("${name}", registry="s3://${bucket}", message="Hello World") - # Download (be mindful of large packages) [[${docs}/api-reference/package#package.push]] + # Download (be mindful of large packages) [[${docs}/quilt-python-sdk-developers/api-reference/package#package.install]] q3.Package.install("${name}"${pathPy}${hashPy}, registry="s3://${bucket}", dest=".") ` }, @@ -36,13 +36,13 @@ const TEMPLATES = { const pathCli = path && ` --path "${s3paths.ensureNoSlash(path)}"` const hashCli = hashDisplay && ` --top-hash ${hashDisplay}` return dedent` - # Download package [[${docs}/api-reference/cli#install]] + # Download package [[${docs}/quilt-python-sdk-developers/api-reference/cli#install]] quilt3 install "${name}"${pathCli}${hashCli} --registry s3://${bucket} --dest . ` }, CLI_UPLOAD: (bucket: string, name: string) => dedent` - # Upload package [[${docs}/api-reference/cli#push]] + # Upload package [[${docs}/quilt-python-sdk-developers/api-reference/cli#push]] echo "Hello World" > README.md quilt3 push "${name}" --registry s3://${bucket} --dir . `, diff --git a/catalog/app/containers/Bucket/File/Analytics.tsx b/catalog/app/containers/Bucket/File/Analytics.tsx new file mode 100644 index 00000000000..4152b083953 --- /dev/null +++ b/catalog/app/containers/Bucket/File/Analytics.tsx @@ -0,0 +1,92 @@ +import * as dateFns from 'date-fns' +import * as Eff from 'effect' +import * as React from 'react' +import * as M from '@material-ui/core' + +import Sparkline from 'components/Sparkline' +import * as GQL from 'utils/GraphQL' +import log from 'utils/Logging' +import * as SVG from 'utils/SVG' +import { readableQuantity } from 'utils/string' + +import Section from '../Section' + +import ACCESS_COUNTS_QUERY from './gql/ObjectAccessCounts.generated' + +const currentYear = new Date().getFullYear() + +const formatDate = (date: Date) => + dateFns.format(date, currentYear === date.getFullYear() ? 'd MMM' : 'd MMM yyyy') + +interface AnalyticsProps { + bucket: string + path: string +} + +export default function Analytics({ bucket, path }: AnalyticsProps) { + const [cursor, setCursor] = React.useState(null) + + const result = GQL.useQuery(ACCESS_COUNTS_QUERY, { bucket, key: path }) + + const data = React.useMemo(() => { + if (result.fetching) return Eff.Option.none() + if (result.error) log.error('Error fetching object access counts:', result.error) + return Eff.Option.some(Eff.Option.fromNullable(result.data?.objectAccessCounts)) + }, [result.fetching, result.error, result.data]) + + const defaultExpanded = Eff.Option.match(data, { + onNone: () => false, + onSome: Eff.Option.match({ + onNone: () => false, + onSome: ({ total }) => !!total, + }), + }) + + return ( +
+ {Eff.Option.match(data, { + onNone: () => , + onSome: Eff.Option.match({ + onNone: () => No analytics available, + onSome: ({ counts, total }) => + total ? ( + + + Downloads + + {readableQuantity(cursor === null ? total : counts[cursor].value)} + + + {cursor === null + ? `${counts.length} days` + : formatDate(counts[cursor].date)} + + + + c.value)} + onCursor={setCursor} + width={1000} + height={60} + stroke={SVG.Paint.Server( + + + + , + )} + /> + + + ) : ( + No analytics available + ), + }), + })} +
+ ) +} diff --git a/catalog/app/containers/Bucket/FileAssistantContext.ts b/catalog/app/containers/Bucket/File/AssistantContext.ts similarity index 98% rename from catalog/app/containers/Bucket/FileAssistantContext.ts rename to catalog/app/containers/Bucket/File/AssistantContext.ts index 6f5876945f8..46f06c8ccf3 100644 --- a/catalog/app/containers/Bucket/FileAssistantContext.ts +++ b/catalog/app/containers/Bucket/File/AssistantContext.ts @@ -4,7 +4,7 @@ import * as React from 'react' import * as Assistant from 'components/Assistant' import * as XML from 'utils/XML' -import { ObjectExistence } from './requests' +import { ObjectExistence } from '../requests' interface VersionsContextProps { data: $TSFixMe diff --git a/catalog/app/containers/Bucket/File.js b/catalog/app/containers/Bucket/File/File.js similarity index 85% rename from catalog/app/containers/Bucket/File.js rename to catalog/app/containers/Bucket/File/File.js index a14259c510f..ed484e40d7c 100644 --- a/catalog/app/containers/Bucket/File.js +++ b/catalog/app/containers/Bucket/File/File.js @@ -1,6 +1,5 @@ import { basename } from 'path' -import * as dateFns from 'date-fns' import * as R from 'ramda' import * as React from 'react' import { Link, useHistory, useLocation, useParams } from 'react-router-dom' @@ -11,7 +10,6 @@ import * as Buttons from 'components/Buttons' import * as FileEditor from 'components/FileEditor' import Message from 'components/Message' import * as Preview from 'components/Preview' -import Sparkline from 'components/Sparkline' import cfg from 'constants/config' import * as Bookmarks from 'containers/Bookmarks' import * as Notifications from 'containers/Notifications' @@ -21,23 +19,24 @@ import * as BucketPreferences from 'utils/BucketPreferences' import { useData } from 'utils/Data' import MetaTitle from 'utils/MetaTitle' import * as NamedRoutes from 'utils/NamedRoutes' -import * as SVG from 'utils/SVG' import { linkStyle } from 'utils/StyledLink' import copyToClipboard from 'utils/clipboard' import * as Format from 'utils/format' import parseSearch from 'utils/parseSearch' import { up, decode, handleToHttpsUri } from 'utils/s3paths' -import { readableBytes, readableQuantity } from 'utils/string' - -import AssistButton from './AssistButton' -import FileCodeSamples from './CodeSamples/File' -import * as AssistantContext from './FileAssistantContext' -import FileProperties from './FileProperties' -import * as FileView from './FileView' -import Section from './Section' -import renderPreview from './renderPreview' -import * as requests from './requests' -import { useViewModes, viewModeToSelectOption } from './viewModes' +import { readableBytes } from 'utils/string' + +import AssistButton from '../AssistButton' +import FileCodeSamples from '../CodeSamples/File' +import FileProperties from '../FileProperties' +import * as FileView from '../FileView' +import Section from '../Section' +import renderPreview from '../renderPreview' +import * as requests from '../requests' +import { useViewModes, viewModeToSelectOption } from '../viewModes' + +import Analytics from './Analytics' +import * as AssistantContext from './AssistantContext' const useVersionInfoStyles = M.makeStyles(({ typography }) => ({ version: { @@ -203,69 +202,6 @@ function VersionInfo({ bucket, path, version }) { ) } -function Analytics({ bucket, path }) { - const [cursor, setCursor] = React.useState(null) - const s3 = AWS.S3.use() - const today = React.useMemo(() => new Date(), []) - const formatDate = (date) => - dateFns.format( - date, - today.getFullYear() === date.getFullYear() ? 'd MMM' : 'd MMM yyyy', - ) - const data = useData(requests.objectAccessCounts, { s3, bucket, path, today }) - - const defaultExpanded = data.case({ - Ok: ({ total }) => !!total, - _: () => false, - }) - - return ( -
- {data.case({ - Ok: ({ counts, total }) => - total ? ( - - - Downloads - - {readableQuantity(cursor === null ? total : counts[cursor].value)} - - - {cursor === null - ? `${counts.length} days` - : formatDate(counts[cursor].date)} - - - - - - - , - )} - /> - - - ) : ( - No analytics available - ), - Err: () => No analytics available, - _: () => , - })} -
- ) -} - function CenteredProgress() { return ( diff --git a/catalog/app/containers/Bucket/File/gql/ObjectAccessCounts.generated.ts b/catalog/app/containers/Bucket/File/gql/ObjectAccessCounts.generated.ts new file mode 100644 index 00000000000..94875020cfe --- /dev/null +++ b/catalog/app/containers/Bucket/File/gql/ObjectAccessCounts.generated.ts @@ -0,0 +1,100 @@ +/* eslint-disable @typescript-eslint/naming-convention */ +import type { TypedDocumentNode as DocumentNode } from '@graphql-typed-document-node/core' +import * as Types from '../../../../model/graphql/types.generated' + +export type containers_Bucket_File_gql_ObjectAccessCountsQueryVariables = Types.Exact<{ + bucket: Types.Scalars['String'] + key: Types.Scalars['String'] +}> + +export type containers_Bucket_File_gql_ObjectAccessCountsQuery = { + readonly __typename: 'Query' +} & { + readonly objectAccessCounts: Types.Maybe< + { readonly __typename: 'AccessCounts' } & Pick & { + readonly counts: ReadonlyArray< + { readonly __typename: 'AccessCountForDate' } & Pick< + Types.AccessCountForDate, + 'date' | 'value' + > + > + } + > +} + +export const containers_Bucket_File_gql_ObjectAccessCountsDocument = { + kind: 'Document', + definitions: [ + { + kind: 'OperationDefinition', + operation: 'query', + name: { kind: 'Name', value: 'containers_Bucket_File_gql_ObjectAccessCounts' }, + variableDefinitions: [ + { + kind: 'VariableDefinition', + variable: { kind: 'Variable', name: { kind: 'Name', value: 'bucket' } }, + type: { + kind: 'NonNullType', + type: { kind: 'NamedType', name: { kind: 'Name', value: 'String' } }, + }, + }, + { + kind: 'VariableDefinition', + variable: { kind: 'Variable', name: { kind: 'Name', value: 'key' } }, + type: { + kind: 'NonNullType', + type: { kind: 'NamedType', name: { kind: 'Name', value: 'String' } }, + }, + }, + ], + selectionSet: { + kind: 'SelectionSet', + selections: [ + { + kind: 'Field', + name: { kind: 'Name', value: 'objectAccessCounts' }, + arguments: [ + { + kind: 'Argument', + name: { kind: 'Name', value: 'bucket' }, + value: { kind: 'Variable', name: { kind: 'Name', value: 'bucket' } }, + }, + { + kind: 'Argument', + name: { kind: 'Name', value: 'key' }, + value: { kind: 'Variable', name: { kind: 'Name', value: 'key' } }, + }, + { + kind: 'Argument', + name: { kind: 'Name', value: 'window' }, + value: { kind: 'IntValue', value: '365' }, + }, + ], + selectionSet: { + kind: 'SelectionSet', + selections: [ + { kind: 'Field', name: { kind: 'Name', value: 'total' } }, + { + kind: 'Field', + name: { kind: 'Name', value: 'counts' }, + selectionSet: { + kind: 'SelectionSet', + selections: [ + { kind: 'Field', name: { kind: 'Name', value: 'date' } }, + { kind: 'Field', name: { kind: 'Name', value: 'value' } }, + ], + }, + }, + ], + }, + }, + ], + }, + }, + ], +} as unknown as DocumentNode< + containers_Bucket_File_gql_ObjectAccessCountsQuery, + containers_Bucket_File_gql_ObjectAccessCountsQueryVariables +> + +export { containers_Bucket_File_gql_ObjectAccessCountsDocument as default } diff --git a/catalog/app/containers/Bucket/File/gql/ObjectAccessCounts.graphql b/catalog/app/containers/Bucket/File/gql/ObjectAccessCounts.graphql new file mode 100644 index 00000000000..431f1cb2ee2 --- /dev/null +++ b/catalog/app/containers/Bucket/File/gql/ObjectAccessCounts.graphql @@ -0,0 +1,9 @@ +query ($bucket: String!, $key: String!) { + objectAccessCounts(bucket: $bucket, key: $key, window: 365) { + total + counts { + date + value + } + } +} diff --git a/catalog/app/containers/Bucket/File/index.ts b/catalog/app/containers/Bucket/File/index.ts new file mode 100644 index 00000000000..d1590f6b882 --- /dev/null +++ b/catalog/app/containers/Bucket/File/index.ts @@ -0,0 +1 @@ +export { default } from './File' diff --git a/catalog/app/containers/Bucket/Overview.js b/catalog/app/containers/Bucket/Overview.js deleted file mode 100644 index 3acef4e2ccb..00000000000 --- a/catalog/app/containers/Bucket/Overview.js +++ /dev/null @@ -1,963 +0,0 @@ -import cx from 'classnames' -import * as dateFns from 'date-fns' -import * as R from 'ramda' -import * as React from 'react' -import { Link as RRLink, useParams } from 'react-router-dom' -import * as redux from 'react-redux' -import * as M from '@material-ui/core' -import { fade } from '@material-ui/core/styles' -import useComponentSize from '@rehooks/component-size' - -import Skeleton from 'components/Skeleton' -import StackedAreaChart from 'components/StackedAreaChart' -import cfg from 'constants/config' -import * as authSelectors from 'containers/Auth/selectors' -import * as APIConnector from 'utils/APIConnector' -import * as AWS from 'utils/AWS' -import AsyncResult from 'utils/AsyncResult' -import * as BucketPreferences from 'utils/BucketPreferences' -import Data, { useData } from 'utils/Data' -import { useQueryS } from 'utils/GraphQL' -import * as LinkedData from 'utils/LinkedData' -import * as NamedRoutes from 'utils/NamedRoutes' -import * as SVG from 'utils/SVG' -import { readableBytes, readableQuantity, formatQuantity } from 'utils/string' - -import * as Gallery from './Gallery' -import * as Summarize from './Summarize' -import * as requests from './requests' -import BUCKET_CONFIG_QUERY from './OverviewBucketConfig.generated' - -import bg from './Overview-bg.jpg' - -const RODA_LINK = 'https://registry.opendata.aws' -const RODA_BUCKET = 'quilt-open-data-bucket' -const MAX_EXTS = 7 -// must have length >= MAX_EXTS -const COLOR_MAP = [ - '#8ad3cb', - '#d7ce69', - '#bfbadb', - '#f4806c', - '#83b0d1', - '#b2de67', - '#bc81be', - '#f0b5d3', - '#7ba39f', - '#9894ad', - '#be7265', - '#94ad6b', -] - -function mkKeyedPool(pool) { - const map = {} - let poolIdx = 0 - const get = (key) => { - if (!(key in map)) { - // eslint-disable-next-line no-plusplus - map[key] = pool[poolIdx++ % pool.length] - } - return map[key] - } - return { get } -} - -function useConst(cons) { - const ref = React.useRef(null) - if (!ref.current) ref.current = { value: cons() } - return ref.current.value -} - -const useObjectsByExtStyles = M.makeStyles((t) => ({ - root: { - display: 'grid', - gridAutoRows: 20, - gridColumnGap: t.spacing(1), - gridRowGap: t.spacing(0.25), - gridTemplateAreas: ` - ". heading heading" - `, - gridTemplateColumns: 'minmax(30px, max-content) 1fr minmax(30px, max-content)', - gridTemplateRows: 'auto', - [t.breakpoints.down('sm')]: { - gridTemplateAreas: ` - "heading heading heading" - `, - }, - }, - heading: { - ...t.typography.h6, - gridArea: 'heading', - marginBottom: t.spacing(1), - [t.breakpoints.down('sm')]: { - textAlign: 'center', - }, - }, - ext: { - color: t.palette.text.secondary, - gridColumn: 1, - fontSize: t.typography.overline.fontSize, - fontWeight: t.typography.fontWeightMedium, - letterSpacing: t.typography.subtitle2.letterSpacing, - lineHeight: t.typography.pxToRem(20), - textAlign: 'right', - }, - count: { - color: t.palette.text.secondary, - gridColumn: 3, - fontSize: t.typography.overline.fontSize, - fontWeight: t.typography.fontWeightMedium, - letterSpacing: t.typography.subtitle2.letterSpacing, - lineHeight: t.typography.pxToRem(20), - }, - bar: { - background: t.palette.action.hover, - gridColumn: 2, - }, - gauge: { - height: '100%', - position: 'relative', - }, - flip: {}, - size: { - color: t.palette.common.white, - fontSize: t.typography.overline.fontSize, - fontWeight: t.typography.fontWeightMedium, - letterSpacing: t.typography.subtitle2.letterSpacing, - lineHeight: t.typography.pxToRem(20), - position: 'absolute', - right: t.spacing(1), - '&$flip': { - color: t.palette.text.hint, - left: `calc(100% + ${t.spacing(1)}px)`, - right: 'auto', - }, - }, - skeleton: { - gridColumn: '1 / span 3', - }, - unavail: { - ...t.typography.body2, - alignItems: 'center', - display: 'flex', - gridColumn: '1 / span 3', - gridRow: `2 / span ${MAX_EXTS}`, - justifyContent: 'center', - }, -})) - -function ObjectsByExt({ data, colorPool, ...props }) { - const classes = useObjectsByExtStyles() - return ( - -
Objects by File Extension
- {AsyncResult.case( - { - Ok: (exts) => { - const capped = exts.slice(0, MAX_EXTS) - const maxBytes = capped.reduce((max, e) => Math.max(max, e.bytes), 0) - const max = Math.log(maxBytes + 1) - const scale = (x) => Math.log(x + 1) / max - return capped.map(({ ext, bytes, objects }, i) => { - const color = colorPool.get(ext) - return ( - -
- {ext || 'other'} -
-
-
-
- {readableBytes(bytes)} -
-
-
-
- {readableQuantity(objects)} -
-
- ) - }) - }, - _: (r) => ( - <> - {R.times( - (i) => ( - - ), - MAX_EXTS, - )} - {AsyncResult.Err.is(r) && ( -
Data unavailable
- )} - - ), - }, - data, - )} -
- ) -} - -const skelData = R.times( - R.pipe( - () => R.times(Math.random, 30), - R.scan(R.add, 0), - R.drop(1), - R.map((v) => Math.log(100 * v + 1)), - ), - 8, -) - -const skelColors = [ - [M.colors.grey[300], M.colors.grey[100]], - [M.colors.grey[400], M.colors.grey[200]], -] - -const mkPulsingGradient = ({ colors: [c1, c2], animate = false }) => - SVG.Paint.Server( - - - {animate && ( - - )} - - , - ) - -function ChartSkel({ - height, - width, - lines = skelData.length, - animate = false, - children, -}) { - const data = React.useMemo( - () => R.times((i) => skelData[i % skelData.length], lines), - [lines], - ) - const fills = React.useMemo( - () => - R.times( - (i) => mkPulsingGradient({ colors: skelColors[i % skelColors.length], animate }), - lines, - ), - [lines, animate], - ) - return ( - - - {children} - - ) -} - -const ANALYTICS_WINDOW_OPTIONS = [ - { value: 31, label: 'Last 1 month' }, - { value: 91, label: 'Last 3 months' }, - { value: 182, label: 'Last 6 months' }, - { value: 365, label: 'Last 12 months' }, -] - -function DownloadsRange({ value, onChange, bucket, rawData }) { - const [anchor, setAnchor] = React.useState(null) - - const open = React.useCallback( - (e) => { - setAnchor(e.target) - }, - [setAnchor], - ) - - const close = React.useCallback(() => { - setAnchor(null) - }, [setAnchor]) - - const choose = React.useCallback( - (e) => { - onChange(e.target.value) - close() - }, - [onChange, close], - ) - - const { label } = ANALYTICS_WINDOW_OPTIONS.find((o) => o.value === value) || {} - - return ( - <> - - - {label} expand_more - - - {ANALYTICS_WINDOW_OPTIONS.map((o) => ( - - {o.label} - - ))} - - - Download to file - - - - ) -} - -const useStatsTipStyles = M.makeStyles((t) => ({ - root: { - background: fade(t.palette.grey[700], 0.9), - color: t.palette.common.white, - padding: [[6, 8]], - }, - head: { - display: 'flex', - justifyContent: 'space-between', - marginBottom: 4, - }, - date: {}, - total: {}, - extsContainer: { - alignItems: 'center', - display: 'grid', - gridAutoRows: 'auto', - gridColumnGap: 4, - gridTemplateColumns: 'max-content max-content 1fr', - }, - ext: { - fontSize: 12, - lineHeight: '16px', - maxWidth: 70, - opacity: 0.6, - overflow: 'hidden', - textAlign: 'right', - textOverflow: 'ellipsis', - }, - color: { - borderRadius: '50%', - height: 8, - opacity: 0.6, - width: 8, - }, - number: { - fontSize: 12, - lineHeight: '16px', - opacity: 0.6, - }, - hl: { - opacity: 1, - }, -})) - -function StatsTip({ stats, colorPool, className, ...props }) { - const classes = useStatsTipStyles() - return ( - -
-
{dateFns.format(stats.date, 'd MMM')}
-
- {readableQuantity(stats.combined.sum)} (+ - {readableQuantity(stats.combined.value)}) -
-
-
- {stats.byExt.map((s) => { - const hl = stats.highlighted ? stats.highlighted.ext === s.ext : true - return ( - -
{s.ext || 'other'}
-
-
- {readableQuantity(s.sum)} (+ - {readableQuantity(s.value)}) -
- - ) - })} -
- - ) -} - -const Transition = ({ TransitionComponent = M.Grow, children, ...props }) => { - const contentsRef = React.useRef(null) - if (props.in) contentsRef.current = children() - return ( - contentsRef.current && ( - {contentsRef.current} - ) - ) -} - -// use the same height as the bar chart: 20px per bar with 2px margin -const CHART_H = 22 * MAX_EXTS - 2 - -const useDownloadsStyles = M.makeStyles((t) => ({ - root: { - display: 'grid', - gridRowGap: t.spacing(0.25), - gridTemplateAreas: ` - "heading period" - "chart chart" - `, - gridTemplateColumns: 'min-content 1fr', - gridTemplateRows: 'auto auto', - [t.breakpoints.down('sm')]: { - gridTemplateAreas: ` - "heading" - "chart" - "period" - `, - gridTemplateColumns: '1fr', - gridTemplateRows: 'auto auto auto', - }, - }, - heading: { - ...t.typography.h6, - gridArea: 'heading', - marginBottom: t.spacing(1), - whiteSpace: 'nowrap', - [t.breakpoints.down('sm')]: { - marginBottom: 0, - textAlign: 'center', - }, - }, - ext: { - display: 'inline-block', - maxWidth: 100, - overflow: 'hidden', - textOverflow: 'ellipsis', - verticalAlign: 'bottom', - }, - period: { - display: 'flex', - gridArea: 'period', - justifyContent: 'center', - alignItems: 'center', - [t.breakpoints.down('sm')]: { - paddingBottom: t.spacing(1), - paddingTop: t.spacing(2), - }, - [t.breakpoints.up('md')]: { - height: 37, - justifyContent: 'flex-end', - }, - }, - chart: { - gridArea: 'chart', - position: 'relative', - }, - left: {}, - right: {}, - dateStats: { - maxWidth: 180, - position: 'absolute', - top: 0, - width: 'calc(50% - 8px)', - zIndex: 1, - '&$left': { - left: 0, - }, - '&$right': { - right: 0, - }, - }, - unavail: { - ...t.typography.body2, - alignItems: 'center', - display: 'flex', - height: '100%', - justifyContent: 'center', - position: 'absolute', - top: 0, - width: '100%', - }, -})) - -function Downloads({ bucket, colorPool, ...props }) { - const s3 = AWS.S3.use() - const today = React.useMemo(() => new Date(), []) - const classes = useDownloadsStyles() - const ref = React.useRef(null) - const { width } = useComponentSize(ref) - const [window, setWindow] = React.useState(ANALYTICS_WINDOW_OPTIONS[0].value) - const [cursor, setCursor] = React.useState(null) - const cursorStats = (counts) => { - if (!cursor) return null - const { date, ...combined } = counts.combined.counts[cursor.j] - const byExt = counts.byExtCollapsed.map((e) => ({ - ext: e.ext, - ...e.counts[cursor.j], - })) - const highlighted = cursor.i == null ? null : counts.byExtCollapsed[cursor.i] - const firstHalf = cursor.j < counts.combined.counts.length / 2 - return { date, combined, byExt, highlighted, firstHalf } - } - - const mkRawData = AsyncResult.case({ - Ok: (data) => `data:application/json,${JSON.stringify(data)}`, - _: () => null, - }) - - if (!cfg.analyticsBucket) { - return ( - -
Requires CloudTrail
-
- ) - } - - return ( - - {(data) => ( - -
- -
-
- {AsyncResult.case( - { - Ok: (counts) => { - const stats = cursorStats(counts) - const hl = stats && stats.highlighted - const ext = hl ? hl.ext || 'other' : 'total' - const total = hl ? hl.total : counts.combined.total - if (!counts.byExtCollapsed.length) return 'Downloads' - return ( - <> - Downloads ({ext}):{' '} - {readableQuantity(total)} - - ) - }, - _: () => 'Downloads', - }, - data, - )} -
-
- {AsyncResult.case( - { - Ok: (counts) => { - if (!counts.byExtCollapsed.length) { - return ( - -
No Data
-
- ) - } - - const stats = cursorStats(counts) - return ( - <> - - e.counts.map((i) => Math.log(i.sum + 1)), - )} - onCursor={setCursor} - height={CHART_H} - width={width} - areaFills={counts.byExtCollapsed.map((e) => - SVG.Paint.Color(colorPool.get(e.ext)), - )} - lineStroke={SVG.Paint.Color(M.colors.grey[500])} - extendL - extendR - px={10} - /> - - {() => ( - - )} - - - {() => ( - - )} - - - ) - }, - _: () => , - }, - data, - )} -
-
- )} -
- ) -} - -const useStatDisplayStyles = M.makeStyles((t) => ({ - root: { - alignItems: 'baseline', - display: 'flex', - '& + &': { - marginLeft: t.spacing(1.5), - [t.breakpoints.up('sm')]: { - marginLeft: t.spacing(4), - }, - [t.breakpoints.up('md')]: { - marginLeft: t.spacing(6), - }, - }, - }, - value: { - fontSize: t.typography.h6.fontSize, - fontWeight: t.typography.fontWeightBold, - letterSpacing: 0, - lineHeight: '20px', - [t.breakpoints.up('sm')]: { - fontSize: t.typography.h4.fontSize, - lineHeight: '32px', - }, - }, - label: { - ...t.typography.body2, - color: t.palette.grey[300], - lineHeight: 1, - marginLeft: t.spacing(0.5), - [t.breakpoints.up('sm')]: { - marginLeft: t.spacing(1), - }, - }, - skeletonContainer: { - alignItems: 'center', - height: 20, - [t.breakpoints.up('sm')]: { - height: 32, - }, - }, - skeleton: { - borderRadius: t.shape.borderRadius, - height: t.typography.h6.fontSize, - width: 96, - [t.breakpoints.up('sm')]: { - height: t.typography.h4.fontSize, - width: 120, - }, - }, -})) - -function StatDisplay({ value, label, format, fallback }) { - const classes = useStatDisplayStyles() - return R.pipe( - AsyncResult.case({ - Ok: R.pipe(format || R.identity, AsyncResult.Ok), - Err: R.pipe(fallback || R.identity, AsyncResult.Ok), - _: R.identity, - }), - AsyncResult.case({ - Ok: (v) => - v != null && ( - - {v} - {!!label && {label}} - - ), - _: () => ( -
- -
- ), - }), - )(value) -} - -const useHeadStyles = M.makeStyles((t) => ({ - root: { - position: 'relative', - [t.breakpoints.down('xs')]: { - borderRadius: 0, - }, - [t.breakpoints.up('sm')]: { - marginTop: t.spacing(2), - }, - }, - top: { - background: `center / cover url(${bg}) ${t.palette.grey[700]}`, - borderTopLeftRadius: t.shape.borderRadius, - borderTopRightRadius: t.shape.borderRadius, - color: t.palette.common.white, - overflow: 'hidden', - paddingBottom: t.spacing(3), - paddingLeft: t.spacing(2), - paddingRight: t.spacing(2), - paddingTop: t.spacing(4), - position: 'relative', - [t.breakpoints.up('sm')]: { - padding: t.spacing(4), - }, - [t.breakpoints.down('xs')]: { - borderRadius: 0, - }, - }, - settings: { - color: t.palette.common.white, - position: 'absolute', - right: t.spacing(2), - top: t.spacing(2), - }, -})) - -function Head({ s3, overviewUrl, bucket, description }) { - const classes = useHeadStyles() - const req = APIConnector.use() - const isRODA = !!overviewUrl && overviewUrl.includes(`/${RODA_BUCKET}/`) - const colorPool = useConst(() => mkKeyedPool(COLOR_MAP)) - const statsData = useData(requests.bucketStats, { req, s3, bucket, overviewUrl }) - const pkgCountData = useData(requests.countPackageRevisions, { req, bucket }) - const { urls } = NamedRoutes.use() - const isAdmin = redux.useSelector(authSelectors.isAdmin) - return ( - - - {bucket} - {!!description && ( - - {description} - - )} - {isRODA && ( - - - From the{' '} - - Registry of Open Data on AWS - - - - )} - - '? B'} - /> - '?'} - /> - null} - /> - - {isAdmin && ( - - - settings - - - )} - - - - - - - - - - - - ) -} - -function Readmes({ s3, overviewUrl, bucket }) { - return ( - - {AsyncResult.case({ - Ok: (rs) => - (rs.discovered.length > 0 || !!rs.forced) && ( - <> - {!!rs.forced && ( - - )} - {rs.discovered.map((h) => ( - - ))} - - ), - _: () => , - })} - - ) -} - -function Imgs({ s3, overviewUrl, inStack, bucket }) { - const req = APIConnector.use() - return ( - - {AsyncResult.case({ - Ok: (images) => (images.length ? : null), - _: () => , - })} - - ) -} - -function ThumbnailsWrapper({ - s3, - overviewUrl, - inStack, - bucket, - preferences: galleryPrefs, -}) { - if (cfg.noOverviewImages || !galleryPrefs) return null - if (!galleryPrefs.overview) return null - return ( - - {AsyncResult.case({ - Ok: (h) => - (!h || galleryPrefs.summarize) && ( - - ), - Err: () => , - Pending: () => , - _: () => null, - })} - - ) -} - -export default function Overview() { - const { bucket } = useParams() - - const s3 = AWS.S3.use() - const { bucketConfig } = useQueryS(BUCKET_CONFIG_QUERY, { bucket }) - const inStack = !!bucketConfig - const overviewUrl = bucketConfig?.overviewUrl - const description = bucketConfig?.description - const prefs = BucketPreferences.use() - return ( - - {inStack && ( - - - - )} - {bucketConfig ? ( - - ) : ( - - {bucket} - - )} - - {BucketPreferences.Result.match( - { - Ok: ({ ui: { blocks } }) => ( - - ), - Pending: () => , - Init: R.F, - }, - prefs, - )} - - - ) -} diff --git a/catalog/app/containers/Bucket/Overview/ColorPool.ts b/catalog/app/containers/Bucket/Overview/ColorPool.ts new file mode 100644 index 00000000000..6be8be62756 --- /dev/null +++ b/catalog/app/containers/Bucket/Overview/ColorPool.ts @@ -0,0 +1,16 @@ +export interface ColorPool { + get: (key: string) => string +} + +export function makeColorPool(pool: string[]): ColorPool { + const map: Record = {} + let poolIdx = 0 + const get = (key: string): string => { + if (!(key in map)) { + // eslint-disable-next-line no-plusplus + map[key] = pool[poolIdx++ % pool.length] + } + return map[key] + } + return { get } +} diff --git a/catalog/app/containers/Bucket/Overview/Downloads.spec.ts b/catalog/app/containers/Bucket/Overview/Downloads.spec.ts new file mode 100644 index 00000000000..0611d0b7b54 --- /dev/null +++ b/catalog/app/containers/Bucket/Overview/Downloads.spec.ts @@ -0,0 +1,194 @@ +import { processBucketAccessCounts } from './Downloads' + +jest.mock( + 'constants/config', + jest.fn(() => ({})), +) + +describe('containers/Bucket/Overview/Downloads', () => { + describe('processBucketAccessCounts', () => { + it('should normalize the data received from GQL and compute some missing data', () => { + expect( + processBucketAccessCounts({ + __typename: 'BucketAccessCounts', + byExt: [ + { + __typename: 'AccessCountsGroup', + ext: 'csv', + counts: { + __typename: 'AccessCounts', + total: 10, + counts: [ + { + __typename: 'AccessCountForDate', + value: 1, + date: new Date('2021-08-01'), + }, + { + __typename: 'AccessCountForDate', + value: 2, + date: new Date('2021-08-02'), + }, + { + __typename: 'AccessCountForDate', + value: 3, + date: new Date('2021-08-03'), + }, + { + __typename: 'AccessCountForDate', + value: 4, + date: new Date('2021-08-04'), + }, + ], + }, + }, + ], + byExtCollapsed: [ + { + __typename: 'AccessCountsGroup', + ext: 'csv', + counts: { + __typename: 'AccessCounts', + total: 10, + counts: [ + { + __typename: 'AccessCountForDate', + value: 1, + date: new Date('2021-08-01'), + }, + { + __typename: 'AccessCountForDate', + value: 2, + date: new Date('2021-08-02'), + }, + { + __typename: 'AccessCountForDate', + value: 3, + date: new Date('2021-08-03'), + }, + { + __typename: 'AccessCountForDate', + value: 4, + date: new Date('2021-08-04'), + }, + ], + }, + }, + ], + combined: { + __typename: 'AccessCounts', + total: 10, + counts: [ + { + __typename: 'AccessCountForDate', + value: 1, + date: new Date('2021-08-01'), + }, + { + __typename: 'AccessCountForDate', + value: 2, + date: new Date('2021-08-02'), + }, + { + __typename: 'AccessCountForDate', + value: 3, + date: new Date('2021-08-03'), + }, + { + __typename: 'AccessCountForDate', + value: 4, + date: new Date('2021-08-04'), + }, + ], + }, + }), + ).toEqual({ + byExt: [ + { + ext: '.csv', + counts: { + total: 10, + counts: [ + { + date: new Date('2021-08-01'), + value: 1, + sum: 1, + }, + { + date: new Date('2021-08-02'), + value: 2, + sum: 3, + }, + { + date: new Date('2021-08-03'), + value: 3, + sum: 6, + }, + { + date: new Date('2021-08-04'), + value: 4, + sum: 10, + }, + ], + }, + }, + ], + byExtCollapsed: [ + { + ext: '.csv', + counts: { + total: 10, + counts: [ + { + date: new Date('2021-08-01'), + value: 1, + sum: 1, + }, + { + date: new Date('2021-08-02'), + value: 2, + sum: 3, + }, + { + date: new Date('2021-08-03'), + value: 3, + sum: 6, + }, + { + date: new Date('2021-08-04'), + value: 4, + sum: 10, + }, + ], + }, + }, + ], + combined: { + total: 10, + counts: [ + { + date: new Date('2021-08-01'), + value: 1, + sum: 1, + }, + { + date: new Date('2021-08-02'), + value: 2, + sum: 3, + }, + { + date: new Date('2021-08-03'), + value: 3, + sum: 6, + }, + { + date: new Date('2021-08-04'), + value: 4, + sum: 10, + }, + ], + }, + }) + }) + }) +}) diff --git a/catalog/app/containers/Bucket/Overview/Downloads.tsx b/catalog/app/containers/Bucket/Overview/Downloads.tsx new file mode 100644 index 00000000000..065519d45b2 --- /dev/null +++ b/catalog/app/containers/Bucket/Overview/Downloads.tsx @@ -0,0 +1,593 @@ +import cx from 'classnames' +import * as dateFns from 'date-fns' +import * as Eff from 'effect' +import * as React from 'react' +import * as M from '@material-ui/core' +import { fade } from '@material-ui/core/styles' +import useComponentSize from '@rehooks/component-size' + +import StackedAreaChart from 'components/StackedAreaChart' +import cfg from 'constants/config' +import * as GQL from 'utils/GraphQL' +import log from 'utils/Logging' +import * as SVG from 'utils/SVG' +import { readableQuantity } from 'utils/string' + +import { ColorPool } from './ColorPool' + +import BUCKET_ACCESS_COUNTS_QUERY from './gql/BucketAccessCounts.generated' + +type GQLBucketAccessCounts = NonNullable< + GQL.DataForDoc['bucketAccessCounts'] +> +type GQLAccessCountsGroup = GQLBucketAccessCounts['byExt'][0] +type GQLAccessCounts = GQLBucketAccessCounts['combined'] +type GQLAccessCountForDate = GQLAccessCounts['counts'][0] + +interface ProcessedAccessCountForDate { + date: Date + value: number + sum: number +} + +interface ProcessedAccessCounts { + total: number + counts: readonly ProcessedAccessCountForDate[] +} + +interface ProcessedAccessCountsGroup { + ext: string + counts: ProcessedAccessCounts +} + +interface ProcessedBucketAccessCounts { + byExt: readonly ProcessedAccessCountsGroup[] + byExtCollapsed: readonly ProcessedAccessCountsGroup[] + combined: ProcessedAccessCounts +} + +const processAccessCountForDateArr = ( + counts: readonly GQLAccessCountForDate[], +): readonly ProcessedAccessCountForDate[] => + // compute running sum + Eff.Array.mapAccum(counts, 0, (acc, { value, date }) => [ + acc + value, + { + value, + date, + sum: acc + value, + }, + ])[1] + +const processAccessCounts = (counts: GQLAccessCounts): ProcessedAccessCounts => ({ + total: counts.total, + counts: processAccessCountForDateArr(counts.counts), +}) + +const processAccessCountsGroup = ( + group: GQLAccessCountsGroup, +): ProcessedAccessCountsGroup => ({ + ext: group.ext && `.${group.ext}`, + counts: processAccessCounts(group.counts), +}) + +export const processBucketAccessCounts = ( + counts: GQLBucketAccessCounts, +): ProcessedBucketAccessCounts => ({ + byExt: counts.byExt.map(processAccessCountsGroup), + byExtCollapsed: counts.byExtCollapsed.map(processAccessCountsGroup), + combined: processAccessCounts(counts.combined), +}) + +interface Cursor { + i: number | null // ext + j: number // date +} + +interface CursorStats { + date: Date + combined: { + sum: number + value: number + } + byExt: { + ext: string + sum: number + value: number + date: Date + }[] + highlighted: { + ext: string + counts: ProcessedAccessCounts + } | null + firstHalf: boolean +} + +function getCursorStats( + counts: ProcessedBucketAccessCounts, + cursor: Cursor | null, +): CursorStats | null { + if (!cursor) return null + + const { date, ...combined } = counts.combined.counts[cursor.j] + const byExt = counts.byExtCollapsed.map((e) => ({ + ext: e.ext, + ...e.counts.counts[cursor.j], + })) + const highlighted = cursor.i == null ? null : counts.byExtCollapsed[cursor.i] + const firstHalf = cursor.j < counts.combined.counts.length / 2 + return { date, combined, byExt, highlighted, firstHalf } +} + +const skelData = Eff.Array.makeBy( + 8, + Eff.flow( + () => Eff.Array.makeBy(30, Math.random), + Eff.Array.scan(0, Eff.Number.sum), + Eff.Array.drop(1), + Eff.Array.map((v) => Math.log(100 * v + 1)), + ), +) + +const skelColors = [ + [M.colors.grey[300], M.colors.grey[100]], + [M.colors.grey[400], M.colors.grey[200]], +] as const + +const mkPulsingGradient = ([c1, c2]: readonly [string, string], animate: boolean) => + SVG.Paint.Server( + + + {animate && ( + + )} + + , + ) + +interface ChartSkelProps { + height: number + width: number + lines?: number + animate?: boolean + children?: React.ReactNode +} + +function ChartSkel({ + height, + width, + lines = skelData.length, + animate = false, + children, +}: ChartSkelProps) { + const data = React.useMemo( + () => Eff.Array.makeBy(lines, (i) => skelData[i % skelData.length]), + [lines], + ) + const fills = React.useMemo( + () => + Eff.Array.makeBy(lines, (i) => + mkPulsingGradient(skelColors[i % skelColors.length], animate), + ), + [lines, animate], + ) + return ( + + {/* @ts-expect-error */} + + {children} + + ) +} + +const ANALYTICS_WINDOW_OPTIONS = [ + { value: 31, label: 'Last 1 month' }, + { value: 91, label: 'Last 3 months' }, + { value: 182, label: 'Last 6 months' }, + { value: 365, label: 'Last 12 months' }, +] + +interface DownloadsRangeProps { + value: number + onChange: (value: number) => void + bucket: string + data: Eff.Option.Option +} + +function DownloadsRange({ value, onChange, bucket, data }: DownloadsRangeProps) { + const [anchor, setAnchor] = React.useState(null) + + const open = React.useCallback( + (e) => { + setAnchor(e.target) + }, + [setAnchor], + ) + + const close = React.useCallback(() => { + setAnchor(null) + }, [setAnchor]) + + const choose = React.useCallback( + (e) => { + onChange(e.target.value) + close() + }, + [onChange, close], + ) + + const { label } = ANALYTICS_WINDOW_OPTIONS.find((o) => o.value === value) || {} + + const jsonData = React.useMemo( + () => + Eff.Option.match(data, { + onNone: () => null, + onSome: (d) => `data:application/json,${JSON.stringify(d)}`, + }), + [data], + ) + + return ( + <> + + + {label} expand_more + + + {ANALYTICS_WINDOW_OPTIONS.map((o) => ( + + {o.label} + + ))} + + + Download to file + + + + ) +} + +const useStatsTipStyles = M.makeStyles((t) => ({ + root: { + background: fade(t.palette.grey[700], 0.9), + color: t.palette.common.white, + padding: '6px 8px', + }, + head: { + display: 'flex', + justifyContent: 'space-between', + marginBottom: 4, + }, + date: {}, + total: {}, + extsContainer: { + alignItems: 'center', + display: 'grid', + gridAutoRows: 'auto', + gridColumnGap: 4, + gridTemplateColumns: 'max-content max-content 1fr', + }, + ext: { + fontSize: 12, + lineHeight: '16px', + maxWidth: 70, + opacity: 0.6, + overflow: 'hidden', + textAlign: 'right', + textOverflow: 'ellipsis', + }, + color: { + borderRadius: '50%', + height: 8, + opacity: 0.6, + width: 8, + }, + number: { + fontSize: 12, + lineHeight: '16px', + opacity: 0.6, + }, + hl: { + opacity: 1, + }, +})) + +interface StatsTipProps { + stats: CursorStats | null + colorPool: ColorPool + className?: string +} + +function StatsTip({ stats, colorPool, className, ...props }: StatsTipProps) { + const classes = useStatsTipStyles() + if (!stats) return null + return ( + +
+
{dateFns.format(stats.date, 'd MMM')}
+
+ {readableQuantity(stats.combined.sum)} (+ + {readableQuantity(stats.combined.value)}) +
+
+
+ {stats.byExt.map((s) => { + const hl = stats.highlighted ? stats.highlighted.ext === s.ext : true + return ( + +
{s.ext || 'other'}
+
+
+ {readableQuantity(s.sum)} (+ + {readableQuantity(s.value)}) +
+ + ) + })} +
+ + ) +} + +interface TransitionProps { + children: JSX.Element + in: boolean +} + +function Transition({ children, ...props }: TransitionProps) { + const contentsRef = React.useRef(null) + // when `in` is false, we want to keep the last rendered contents + if (props.in) contentsRef.current = children + return contentsRef.current && {contentsRef.current} +} + +const useStyles = M.makeStyles((t) => ({ + root: { + display: 'grid', + gridRowGap: t.spacing(0.25), + gridTemplateAreas: ` + "heading period" + "chart chart" + `, + gridTemplateColumns: 'min-content 1fr', + gridTemplateRows: 'auto auto', + [t.breakpoints.down('sm')]: { + gridTemplateAreas: ` + "heading" + "chart" + "period" + `, + gridTemplateColumns: '1fr', + gridTemplateRows: 'auto auto auto', + }, + }, + heading: { + ...t.typography.h6, + gridArea: 'heading', + marginBottom: t.spacing(1), + whiteSpace: 'nowrap', + [t.breakpoints.down('sm')]: { + marginBottom: 0, + textAlign: 'center', + }, + }, + ext: { + display: 'inline-block', + maxWidth: 100, + overflow: 'hidden', + textOverflow: 'ellipsis', + verticalAlign: 'bottom', + }, + period: { + display: 'flex', + gridArea: 'period', + justifyContent: 'center', + alignItems: 'center', + [t.breakpoints.down('sm')]: { + paddingBottom: t.spacing(1), + paddingTop: t.spacing(2), + }, + [t.breakpoints.up('md')]: { + height: 37, + justifyContent: 'flex-end', + }, + }, + chart: { + gridArea: 'chart', + position: 'relative', + }, + left: {}, + right: {}, + dateStats: { + maxWidth: 180, + position: 'absolute', + top: 0, + width: 'calc(50% - 8px)', + zIndex: 1, + '&$left': { + left: 0, + }, + '&$right': { + right: 0, + }, + }, + unavail: { + ...t.typography.body2, + alignItems: 'center', + display: 'flex', + height: '100%', + justifyContent: 'center', + position: 'absolute', + top: 0, + width: '100%', + }, +})) + +interface DownloadsProps extends M.BoxProps { + bucket: string + colorPool: ColorPool + chartHeight: number +} + +export default function Downloads({ + bucket, + colorPool, + chartHeight, + ...props +}: DownloadsProps) { + const classes = useStyles() + const ref = React.useRef(null) + const { width } = useComponentSize(ref) + const [window, setWindow] = React.useState(ANALYTICS_WINDOW_OPTIONS[0].value) + + const [cursor, setCursor] = React.useState(null) + + const result = GQL.useQuery( + BUCKET_ACCESS_COUNTS_QUERY, + { bucket, window }, + { pause: !cfg.analyticsBucket }, + ) + + const processed = React.useMemo( + () => + Eff.pipe( + result, + ({ fetching, data, error }) => { + if (fetching) return Eff.Option.none() + if (error) log.error('Error fetching bucket access counts:', error) + return Eff.Option.fromNullable(data?.bucketAccessCounts) + }, + Eff.Option.map(processBucketAccessCounts), + ), + [result], + ) + + const processedWithCursor = React.useMemo( + () => + Eff.pipe( + processed, + Eff.Option.map((counts) => ({ + counts, + cursorStats: getCursorStats(counts, cursor), + })), + ), + [processed, cursor], + ) + + if (!cfg.analyticsBucket) { + return ( + +
Requires CloudTrail
+
+ ) + } + + return ( + +
+ +
+
+ {Eff.Option.match(processedWithCursor, { + onSome: ({ counts, cursorStats: stats }) => { + if (!counts?.byExtCollapsed.length) return 'Downloads' + + const hl = stats?.highlighted + const ext = hl ? hl.ext || 'other' : 'total' + const total = hl ? hl.counts.total : counts.combined.total + return ( + <> + Downloads ({ext}):{' '} + {readableQuantity(total)} + + ) + }, + onNone: () => 'Downloads', + })} +
+
+ {Eff.Option.match(processedWithCursor, { + onSome: ({ counts, cursorStats: stats }) => { + if (!counts.byExtCollapsed.length) { + return ( + +
No Data
+
+ ) + } + + return ( + <> + {/* @ts-expect-error */} + + e.counts.counts.map((i) => Math.log(i.sum + 1)), + )} + onCursor={setCursor} + height={chartHeight} + width={width} + areaFills={counts.byExtCollapsed.map((e) => + SVG.Paint.Color(colorPool.get(e.ext)), + )} + lineStroke={SVG.Paint.Color(M.colors.grey[500])} + extendL + extendR + px={10} + /> + + + + + + + + ) + }, + onNone: () => , + })} +
+
+ ) +} diff --git a/catalog/app/containers/Bucket/Overview/Header.tsx b/catalog/app/containers/Bucket/Overview/Header.tsx new file mode 100644 index 00000000000..3c76edd1330 --- /dev/null +++ b/catalog/app/containers/Bucket/Overview/Header.tsx @@ -0,0 +1,431 @@ +import type AWSSDK from 'aws-sdk' +import cx from 'classnames' +import * as Eff from 'effect' +import * as React from 'react' +import { Link as RRLink } from 'react-router-dom' +import * as redux from 'react-redux' +import * as M from '@material-ui/core' + +import Skeleton from 'components/Skeleton' +import * as authSelectors from 'containers/Auth/selectors' +import * as APIConnector from 'utils/APIConnector' +import AsyncResult from 'utils/AsyncResult' +import { useData } from 'utils/Data' +import * as NamedRoutes from 'utils/NamedRoutes' +import { readableBytes, readableQuantity, formatQuantity } from 'utils/string' +import useConst from 'utils/useConstant' + +import * as requests from '../requests' + +import { ColorPool, makeColorPool } from './ColorPool' +import Downloads from './Downloads' + +import bg from './Overview-bg.jpg' + +// interface StatsData { +// exts: ExtData[] +// totalObjects: number +// totalBytes: number +// } + +interface ExtData { + ext: string + bytes: number + objects: number +} + +const RODA_LINK = 'https://registry.opendata.aws' +const RODA_BUCKET = 'quilt-open-data-bucket' +const MAX_EXTS = 7 +// must have length >= MAX_EXTS +const COLOR_MAP = [ + '#8ad3cb', + '#d7ce69', + '#bfbadb', + '#f4806c', + '#83b0d1', + '#b2de67', + '#bc81be', + '#f0b5d3', + '#7ba39f', + '#9894ad', + '#be7265', + '#94ad6b', +] + +const useObjectsByExtStyles = M.makeStyles((t) => ({ + root: { + display: 'grid', + gridAutoRows: 20, + gridColumnGap: t.spacing(1), + gridRowGap: t.spacing(0.25), + gridTemplateAreas: ` + ". heading heading" + `, + gridTemplateColumns: 'minmax(30px, max-content) 1fr minmax(30px, max-content)', + gridTemplateRows: 'auto', + [t.breakpoints.down('sm')]: { + gridTemplateAreas: ` + "heading heading heading" + `, + }, + }, + heading: { + ...t.typography.h6, + gridArea: 'heading', + marginBottom: t.spacing(1), + [t.breakpoints.down('sm')]: { + textAlign: 'center', + }, + }, + ext: { + color: t.palette.text.secondary, + gridColumn: 1, + fontSize: t.typography.overline.fontSize, + fontWeight: t.typography.fontWeightMedium, + letterSpacing: t.typography.subtitle2.letterSpacing, + lineHeight: t.typography.pxToRem(20), + textAlign: 'right', + }, + count: { + color: t.palette.text.secondary, + gridColumn: 3, + fontSize: t.typography.overline.fontSize, + fontWeight: t.typography.fontWeightMedium, + letterSpacing: t.typography.subtitle2.letterSpacing, + lineHeight: t.typography.pxToRem(20), + }, + bar: { + background: t.palette.action.hover, + gridColumn: 2, + }, + gauge: { + height: '100%', + position: 'relative', + }, + flip: {}, + size: { + color: t.palette.common.white, + fontSize: t.typography.overline.fontSize, + fontWeight: t.typography.fontWeightMedium, + letterSpacing: t.typography.subtitle2.letterSpacing, + lineHeight: t.typography.pxToRem(20), + position: 'absolute', + right: t.spacing(1), + '&$flip': { + color: t.palette.text.hint, + left: `calc(100% + ${t.spacing(1)}px)`, + right: 'auto', + }, + }, + skeleton: { + gridColumn: '1 / span 3', + }, + unavail: { + ...t.typography.body2, + alignItems: 'center', + display: 'flex', + gridColumn: '1 / span 3', + gridRow: `2 / span ${MAX_EXTS}`, + justifyContent: 'center', + }, +})) + +interface ObjectsByExtProps extends M.BoxProps { + data: $TSFixMe // AsyncResult + colorPool: ColorPool +} + +function ObjectsByExt({ data, colorPool, ...props }: ObjectsByExtProps) { + const classes = useObjectsByExtStyles() + return ( + +
Objects by File Extension
+ {AsyncResult.case( + { + Ok: (exts: ExtData[]) => { + const capped = exts.slice(0, MAX_EXTS) + const maxBytes = capped.reduce((max, e) => Math.max(max, e.bytes), 0) + const max = Math.log(maxBytes + 1) + const scale = (x: number) => Math.log(x + 1) / max + return capped.map(({ ext, bytes, objects }, i) => { + const color = colorPool.get(ext) + return ( + +
+ {ext || 'other'} +
+
+
+
+ {readableBytes(bytes)} +
+
+
+
+ {readableQuantity(objects)} +
+
+ ) + }) + }, + _: (r: $TSFixMe) => ( + <> + {Eff.Array.makeBy(MAX_EXTS, (i) => ( + + ))} + {AsyncResult.Err.is(r) && ( +
Data unavailable
+ )} + + ), + }, + data, + )} +
+ ) +} + +const useStatDisplayStyles = M.makeStyles((t) => ({ + root: { + alignItems: 'baseline', + display: 'flex', + '& + &': { + marginLeft: t.spacing(1.5), + [t.breakpoints.up('sm')]: { + marginLeft: t.spacing(4), + }, + [t.breakpoints.up('md')]: { + marginLeft: t.spacing(6), + }, + }, + }, + value: { + fontSize: t.typography.h6.fontSize, + fontWeight: t.typography.fontWeightBold, + letterSpacing: 0, + lineHeight: '20px', + [t.breakpoints.up('sm')]: { + fontSize: t.typography.h4.fontSize, + lineHeight: '32px', + }, + }, + label: { + ...t.typography.body2, + color: t.palette.grey[300], + lineHeight: 1, + marginLeft: t.spacing(0.5), + [t.breakpoints.up('sm')]: { + marginLeft: t.spacing(1), + }, + }, + skeletonContainer: { + alignItems: 'center', + height: 20, + [t.breakpoints.up('sm')]: { + height: 32, + }, + }, + skeleton: { + borderRadius: t.shape.borderRadius, + height: t.typography.h6.fontSize, + width: 96, + [t.breakpoints.up('sm')]: { + height: t.typography.h4.fontSize, + width: 120, + }, + }, +})) + +interface StatDisplayProps { + value: $TSFixMe // AsyncResult + label?: string + format?: (v: any) => any + fallback?: (v: any) => any +} + +function StatDisplay({ value, label, format, fallback }: StatDisplayProps) { + const classes = useStatDisplayStyles() + return Eff.pipe( + value, + AsyncResult.case({ + Ok: Eff.flow(format || Eff.identity, AsyncResult.Ok), + Err: Eff.flow(fallback || Eff.identity, AsyncResult.Ok), + _: Eff.identity, + }), + AsyncResult.case({ + Ok: (v: $TSFixMe) => + v != null && ( + + {v} + {!!label && {label}} + + ), + _: () => ( +
+ +
+ ), + }), + ) as JSX.Element +} + +// use the same height as the bar chart: 20px per bar with 2px margin +const DOWNLOADS_CHART_H = 22 * MAX_EXTS - 2 + +const useStyles = M.makeStyles((t) => ({ + root: { + position: 'relative', + [t.breakpoints.down('xs')]: { + borderRadius: 0, + }, + [t.breakpoints.up('sm')]: { + marginTop: t.spacing(2), + }, + }, + top: { + background: `center / cover url(${bg}) ${t.palette.grey[700]}`, + borderTopLeftRadius: t.shape.borderRadius, + borderTopRightRadius: t.shape.borderRadius, + color: t.palette.common.white, + overflow: 'hidden', + paddingBottom: t.spacing(3), + paddingLeft: t.spacing(2), + paddingRight: t.spacing(2), + paddingTop: t.spacing(4), + position: 'relative', + [t.breakpoints.up('sm')]: { + padding: t.spacing(4), + }, + [t.breakpoints.down('xs')]: { + borderRadius: 0, + }, + }, + settings: { + color: t.palette.common.white, + position: 'absolute', + right: t.spacing(2), + top: t.spacing(2), + }, +})) + +interface HeaderProps { + s3: AWSSDK.S3 + bucket: string + overviewUrl: string | null | undefined + description: string | null | undefined +} + +export default function Header({ s3, overviewUrl, bucket, description }: HeaderProps) { + const classes = useStyles() + const req = APIConnector.use() + const isRODA = !!overviewUrl && overviewUrl.includes(`/${RODA_BUCKET}/`) + const colorPool = useConst(() => makeColorPool(COLOR_MAP)) + const statsData = useData(requests.bucketStats, { req, s3, bucket, overviewUrl }) + const pkgCountData = useData(requests.countPackageRevisions, { req, bucket }) + const { urls } = NamedRoutes.use() + const isAdmin = redux.useSelector(authSelectors.isAdmin) + return ( + + + {bucket} + {!!description && ( + + {description} + + )} + {isRODA && ( + + + From the{' '} + + Registry of Open Data on AWS + + + + )} + + '? B'} + /> + '?'} + /> + null} + /> + + {isAdmin && ( + + + settings + + + )} + + + + + + + + + + + + ) +} diff --git a/catalog/app/containers/Bucket/Overview-bg.jpg b/catalog/app/containers/Bucket/Overview/Overview-bg.jpg similarity index 100% rename from catalog/app/containers/Bucket/Overview-bg.jpg rename to catalog/app/containers/Bucket/Overview/Overview-bg.jpg diff --git a/catalog/app/containers/Bucket/Overview/Overview.tsx b/catalog/app/containers/Bucket/Overview/Overview.tsx new file mode 100644 index 00000000000..2eee868fd4f --- /dev/null +++ b/catalog/app/containers/Bucket/Overview/Overview.tsx @@ -0,0 +1,163 @@ +import type AWSSDK from 'aws-sdk' +import * as React from 'react' +import { useParams } from 'react-router-dom' +import * as M from '@material-ui/core' + +import cfg from 'constants/config' +import type * as Model from 'model' +import * as APIConnector from 'utils/APIConnector' +import * as AWS from 'utils/AWS' +import AsyncResult from 'utils/AsyncResult' +import * as BucketPreferences from 'utils/BucketPreferences' +import Data from 'utils/Data' +import * as GQL from 'utils/GraphQL' +import * as LinkedData from 'utils/LinkedData' + +import * as Gallery from '../Gallery' +import * as Summarize from '../Summarize' +import * as requests from '../requests' + +import Header from './Header' +import BUCKET_CONFIG_QUERY from './gql/BucketConfig.generated' + +interface BucketReadmes { + forced?: Model.S3.S3ObjectLocation + discovered: Model.S3.S3ObjectLocation[] +} + +interface ReadmesProps { + s3: AWSSDK.S3 + bucket: string + overviewUrl: string | undefined | null +} + +function Readmes({ s3, overviewUrl, bucket }: ReadmesProps) { + return ( + // @ts-expect-error + + {AsyncResult.case({ + Ok: (rs: BucketReadmes) => + (rs.discovered.length > 0 || !!rs.forced) && ( + <> + {!!rs.forced && ( + + )} + {rs.discovered.map((h) => ( + + ))} + + ), + _: () => , + })} + + ) +} + +interface ImgsProps { + s3: AWSSDK.S3 + bucket: string + overviewUrl: string | undefined | null + inStack: boolean +} + +function Imgs({ s3, overviewUrl, inStack, bucket }: ImgsProps) { + const req = APIConnector.use() + return ( + // @ts-expect-error + + {AsyncResult.case({ + Ok: (images: Model.S3.S3ObjectLocation[]) => + images.length ? : null, + _: () => , + })} + + ) +} + +interface ThumbnailsWrapperProps extends ImgsProps { + preferences?: + | false + | { + overview: boolean + summarize: boolean + } +} + +function ThumbnailsWrapper({ + s3, + overviewUrl, + inStack, + bucket, + preferences: galleryPrefs, +}: ThumbnailsWrapperProps) { + if (cfg.noOverviewImages || !galleryPrefs) return null + if (!galleryPrefs.overview) return null + return ( + // @ts-expect-error + + {AsyncResult.case({ + Ok: (h?: Model.S3.S3ObjectLocation) => + (!h || galleryPrefs.summarize) && ( + + ), + Err: () => , + Pending: () => , + _: () => null, + })} + + ) +} + +export default function Overview() { + const { bucket } = useParams<{ bucket: string }>() + + const s3 = AWS.S3.use() + const { bucketConfig } = GQL.useQueryS(BUCKET_CONFIG_QUERY, { bucket }) + const inStack = !!bucketConfig + const overviewUrl = bucketConfig?.overviewUrl + const description = bucketConfig?.description + const prefs = BucketPreferences.use() + return ( + + {inStack && ( + + + + )} + {bucketConfig ? ( +
+ ) : ( + + {bucket} + + )} + + {BucketPreferences.Result.match( + { + Ok: ({ ui: { blocks } }) => ( + + ), + Pending: () => , + Init: () => null, + }, + prefs, + )} + + + ) +} diff --git a/catalog/app/containers/Bucket/Overview/gql/BucketAccessCounts.generated.ts b/catalog/app/containers/Bucket/Overview/gql/BucketAccessCounts.generated.ts new file mode 100644 index 00000000000..f7a763654e1 --- /dev/null +++ b/catalog/app/containers/Bucket/Overview/gql/BucketAccessCounts.generated.ts @@ -0,0 +1,207 @@ +/* eslint-disable @typescript-eslint/naming-convention */ +import type { TypedDocumentNode as DocumentNode } from '@graphql-typed-document-node/core' +import * as Types from '../../../../model/graphql/types.generated' + +export type AccessCountsSelectionFragment = { + readonly __typename: 'AccessCounts' +} & Pick & { + readonly counts: ReadonlyArray< + { readonly __typename: 'AccessCountForDate' } & Pick< + Types.AccessCountForDate, + 'date' | 'value' + > + > + } + +export type containers_Bucket_Overview_gql_BucketAccessCountsQueryVariables = + Types.Exact<{ + bucket: Types.Scalars['String'] + window: Types.Scalars['Int'] + }> + +export type containers_Bucket_Overview_gql_BucketAccessCountsQuery = { + readonly __typename: 'Query' +} & { + readonly bucketAccessCounts: Types.Maybe< + { readonly __typename: 'BucketAccessCounts' } & { + readonly byExt: ReadonlyArray< + { readonly __typename: 'AccessCountsGroup' } & Pick< + Types.AccessCountsGroup, + 'ext' + > & { + readonly counts: { + readonly __typename: 'AccessCounts' + } & AccessCountsSelectionFragment + } + > + readonly byExtCollapsed: ReadonlyArray< + { readonly __typename: 'AccessCountsGroup' } & Pick< + Types.AccessCountsGroup, + 'ext' + > & { + readonly counts: { + readonly __typename: 'AccessCounts' + } & AccessCountsSelectionFragment + } + > + readonly combined: { + readonly __typename: 'AccessCounts' + } & AccessCountsSelectionFragment + } + > +} + +export const AccessCountsSelectionFragmentDoc = { + kind: 'Document', + definitions: [ + { + kind: 'FragmentDefinition', + name: { kind: 'Name', value: 'AccessCountsSelection' }, + typeCondition: { kind: 'NamedType', name: { kind: 'Name', value: 'AccessCounts' } }, + selectionSet: { + kind: 'SelectionSet', + selections: [ + { kind: 'Field', name: { kind: 'Name', value: 'total' } }, + { + kind: 'Field', + name: { kind: 'Name', value: 'counts' }, + selectionSet: { + kind: 'SelectionSet', + selections: [ + { kind: 'Field', name: { kind: 'Name', value: 'date' } }, + { kind: 'Field', name: { kind: 'Name', value: 'value' } }, + ], + }, + }, + ], + }, + }, + ], +} as unknown as DocumentNode +export const containers_Bucket_Overview_gql_BucketAccessCountsDocument = { + kind: 'Document', + definitions: [ + { + kind: 'OperationDefinition', + operation: 'query', + name: { kind: 'Name', value: 'containers_Bucket_Overview_gql_BucketAccessCounts' }, + variableDefinitions: [ + { + kind: 'VariableDefinition', + variable: { kind: 'Variable', name: { kind: 'Name', value: 'bucket' } }, + type: { + kind: 'NonNullType', + type: { kind: 'NamedType', name: { kind: 'Name', value: 'String' } }, + }, + }, + { + kind: 'VariableDefinition', + variable: { kind: 'Variable', name: { kind: 'Name', value: 'window' } }, + type: { + kind: 'NonNullType', + type: { kind: 'NamedType', name: { kind: 'Name', value: 'Int' } }, + }, + }, + ], + selectionSet: { + kind: 'SelectionSet', + selections: [ + { + kind: 'Field', + name: { kind: 'Name', value: 'bucketAccessCounts' }, + arguments: [ + { + kind: 'Argument', + name: { kind: 'Name', value: 'bucket' }, + value: { kind: 'Variable', name: { kind: 'Name', value: 'bucket' } }, + }, + { + kind: 'Argument', + name: { kind: 'Name', value: 'window' }, + value: { kind: 'Variable', name: { kind: 'Name', value: 'window' } }, + }, + ], + selectionSet: { + kind: 'SelectionSet', + selections: [ + { + kind: 'Field', + name: { kind: 'Name', value: 'byExt' }, + selectionSet: { + kind: 'SelectionSet', + selections: [ + { kind: 'Field', name: { kind: 'Name', value: 'ext' } }, + { + kind: 'Field', + name: { kind: 'Name', value: 'counts' }, + selectionSet: { + kind: 'SelectionSet', + selections: [ + { + kind: 'FragmentSpread', + name: { kind: 'Name', value: 'AccessCountsSelection' }, + }, + ], + }, + }, + ], + }, + }, + { + kind: 'Field', + alias: { kind: 'Name', value: 'byExtCollapsed' }, + name: { kind: 'Name', value: 'byExt' }, + arguments: [ + { + kind: 'Argument', + name: { kind: 'Name', value: 'groups' }, + value: { kind: 'IntValue', value: '10' }, + }, + ], + selectionSet: { + kind: 'SelectionSet', + selections: [ + { kind: 'Field', name: { kind: 'Name', value: 'ext' } }, + { + kind: 'Field', + name: { kind: 'Name', value: 'counts' }, + selectionSet: { + kind: 'SelectionSet', + selections: [ + { + kind: 'FragmentSpread', + name: { kind: 'Name', value: 'AccessCountsSelection' }, + }, + ], + }, + }, + ], + }, + }, + { + kind: 'Field', + name: { kind: 'Name', value: 'combined' }, + selectionSet: { + kind: 'SelectionSet', + selections: [ + { + kind: 'FragmentSpread', + name: { kind: 'Name', value: 'AccessCountsSelection' }, + }, + ], + }, + }, + ], + }, + }, + ], + }, + }, + ...AccessCountsSelectionFragmentDoc.definitions, + ], +} as unknown as DocumentNode< + containers_Bucket_Overview_gql_BucketAccessCountsQuery, + containers_Bucket_Overview_gql_BucketAccessCountsQueryVariables +> + +export { containers_Bucket_Overview_gql_BucketAccessCountsDocument as default } diff --git a/catalog/app/containers/Bucket/Overview/gql/BucketAccessCounts.graphql b/catalog/app/containers/Bucket/Overview/gql/BucketAccessCounts.graphql new file mode 100644 index 00000000000..c54990cda53 --- /dev/null +++ b/catalog/app/containers/Bucket/Overview/gql/BucketAccessCounts.graphql @@ -0,0 +1,27 @@ +fragment AccessCountsSelection on AccessCounts { + total + counts { + date + value + } +} + +query ($bucket: String!, $window: Int!) { + bucketAccessCounts(bucket: $bucket, window: $window) { + byExt { + ext + counts { + ...AccessCountsSelection + } + } + byExtCollapsed: byExt(groups: 10) { + ext + counts { + ...AccessCountsSelection + } + } + combined { + ...AccessCountsSelection + } + } +} diff --git a/catalog/app/containers/Bucket/OverviewBucketConfig.generated.ts b/catalog/app/containers/Bucket/Overview/gql/BucketConfig.generated.ts similarity index 75% rename from catalog/app/containers/Bucket/OverviewBucketConfig.generated.ts rename to catalog/app/containers/Bucket/Overview/gql/BucketConfig.generated.ts index 89a16de328f..293100b338b 100644 --- a/catalog/app/containers/Bucket/OverviewBucketConfig.generated.ts +++ b/catalog/app/containers/Bucket/Overview/gql/BucketConfig.generated.ts @@ -1,12 +1,12 @@ /* eslint-disable @typescript-eslint/naming-convention */ import type { TypedDocumentNode as DocumentNode } from '@graphql-typed-document-node/core' -import * as Types from '../../model/graphql/types.generated' +import * as Types from '../../../../model/graphql/types.generated' -export type containers_Bucket_OverviewBucketConfigQueryVariables = Types.Exact<{ +export type containers_Bucket_Overview_gql_BucketConfigQueryVariables = Types.Exact<{ bucket: Types.Scalars['String'] }> -export type containers_Bucket_OverviewBucketConfigQuery = { +export type containers_Bucket_Overview_gql_BucketConfigQuery = { readonly __typename: 'Query' } & { readonly bucketConfig: Types.Maybe< @@ -17,13 +17,13 @@ export type containers_Bucket_OverviewBucketConfigQuery = { > } -export const containers_Bucket_OverviewBucketConfigDocument = { +export const containers_Bucket_Overview_gql_BucketConfigDocument = { kind: 'Document', definitions: [ { kind: 'OperationDefinition', operation: 'query', - name: { kind: 'Name', value: 'containers_Bucket_OverviewBucketConfig' }, + name: { kind: 'Name', value: 'containers_Bucket_Overview_gql_BucketConfig' }, variableDefinitions: [ { kind: 'VariableDefinition', @@ -61,8 +61,8 @@ export const containers_Bucket_OverviewBucketConfigDocument = { }, ], } as unknown as DocumentNode< - containers_Bucket_OverviewBucketConfigQuery, - containers_Bucket_OverviewBucketConfigQueryVariables + containers_Bucket_Overview_gql_BucketConfigQuery, + containers_Bucket_Overview_gql_BucketConfigQueryVariables > -export { containers_Bucket_OverviewBucketConfigDocument as default } +export { containers_Bucket_Overview_gql_BucketConfigDocument as default } diff --git a/catalog/app/containers/Bucket/OverviewBucketConfig.graphql b/catalog/app/containers/Bucket/Overview/gql/BucketConfig.graphql similarity index 100% rename from catalog/app/containers/Bucket/OverviewBucketConfig.graphql rename to catalog/app/containers/Bucket/Overview/gql/BucketConfig.graphql diff --git a/catalog/app/containers/Bucket/Overview/index.tsx b/catalog/app/containers/Bucket/Overview/index.tsx new file mode 100644 index 00000000000..1de667af70e --- /dev/null +++ b/catalog/app/containers/Bucket/Overview/index.tsx @@ -0,0 +1 @@ +export { default } from './Overview' diff --git a/catalog/app/containers/Bucket/PackageDialog/DialogError.tsx b/catalog/app/containers/Bucket/PackageDialog/DialogError.tsx index 291cd6c3a75..3e04966ab87 100644 --- a/catalog/app/containers/Bucket/PackageDialog/DialogError.tsx +++ b/catalog/app/containers/Bucket/PackageDialog/DialogError.tsx @@ -42,7 +42,7 @@ const errorDisplay = R.cond([ Please fix the{' '} workflows config{' '} according to{' '} - + the documentation . diff --git a/catalog/app/containers/Bucket/PackageDialog/SelectWorkflow.tsx b/catalog/app/containers/Bucket/PackageDialog/SelectWorkflow.tsx index 55f14548fcb..d3a1bc8ca4b 100644 --- a/catalog/app/containers/Bucket/PackageDialog/SelectWorkflow.tsx +++ b/catalog/app/containers/Bucket/PackageDialog/SelectWorkflow.tsx @@ -71,7 +71,7 @@ export default function SelectWorkflow({ {!!error && {error}} - + Learn about data quality workflows , or edit{' '} diff --git a/catalog/app/containers/Bucket/Queries/Athena/Database.tsx b/catalog/app/containers/Bucket/Queries/Athena/Database.tsx index 7f73a8e36f6..990c27b68b1 100644 --- a/catalog/app/containers/Bucket/Queries/Athena/Database.tsx +++ b/catalog/app/containers/Bucket/Queries/Athena/Database.tsx @@ -205,15 +205,8 @@ const useStyles = M.makeStyles((t) => ({ display: 'flex', }, field: { - cursor: 'pointer', flexBasis: '50%', marginRight: t.spacing(2), - '& input': { - cursor: 'pointer', - }, - '& > *': { - cursor: 'pointer', - }, }, button: { marginLeft: t.spacing(1), diff --git a/catalog/app/containers/Bucket/Queries/Athena/History.tsx b/catalog/app/containers/Bucket/Queries/Athena/History.tsx index 23de3ea9811..db8a869c3b6 100644 --- a/catalog/app/containers/Bucket/Queries/Athena/History.tsx +++ b/catalog/app/containers/Bucket/Queries/Athena/History.tsx @@ -277,6 +277,13 @@ export default function History({ bucket, executions, onLoadMore }: HistoryProps const { workgroup } = Model.use() if (!Model.hasValue(workgroup)) return null + if (!executions.length) + return ( + + + + ) + return ( <> @@ -304,7 +311,6 @@ export default function History({ bucket, executions, onLoadMore }: HistoryProps /> ), )} - {!executions.length && } {(hasPagination || !!onLoadMore) && (
{hasPagination && ( diff --git a/catalog/app/containers/Bucket/Queries/Athena/Workgroups.tsx b/catalog/app/containers/Bucket/Queries/Athena/Workgroups.tsx index b9713539063..f0387219d4d 100644 --- a/catalog/app/containers/Bucket/Queries/Athena/Workgroups.tsx +++ b/catalog/app/containers/Bucket/Queries/Athena/Workgroups.tsx @@ -97,8 +97,10 @@ function WorkgroupsEmpty({ error }: WorkgroupsEmptyProps) { Check{' '} - Athena Queries docs on - setup and correct usage + + Athena Queries docs + {' '} + on setup and correct usage diff --git a/catalog/app/containers/Bucket/Queries/Athena/model/requests.spec.ts b/catalog/app/containers/Bucket/Queries/Athena/model/requests.spec.ts index bf8a0793922..4faf6dbbcee 100644 --- a/catalog/app/containers/Bucket/Queries/Athena/model/requests.spec.ts +++ b/catalog/app/containers/Bucket/Queries/Athena/model/requests.spec.ts @@ -852,6 +852,48 @@ describe('containers/Bucket/Queries/Athena/model/requests', () => { unmount() }) }) + + it('return "not ready" if database is not ready', async () => { + startQueryExecution.mockImplementation( + reqThen(() => ({})), + ) + await act(async () => { + const { result, unmount, waitForNextUpdate } = renderHook(() => + requests.useQueryRun({ + workgroup: 'a', + catalogName: 'b', + database: Model.Loading, + queryBody: 'd', + }), + ) + await waitForNextUpdate() + expect(result.current[0]).toBeUndefined() + unmount() + }) + }) + + it('mark as ready to run but return error for confirmation if database is empty', async () => { + startQueryExecution.mockImplementation( + reqThen(() => ({})), + ) + await act(async () => { + const { result, unmount, waitForValueToChange } = renderHook(() => + requests.useQueryRun({ + workgroup: 'a', + catalogName: 'b', + database: '', + queryBody: 'd', + }), + ) + await waitForValueToChange(() => result.current) + await waitForValueToChange(() => result.current[0]) + expect(result.current[0]).toBeNull() + const run = await result.current[1](false) + expect(run).toBeInstanceOf(Error) + expect(run).toBe(requests.NO_DATABASE) + unmount() + }) + }) }) describe('useWorkgroup', () => { @@ -1036,7 +1078,7 @@ describe('containers/Bucket/Queries/Athena/model/requests', () => { } }) - it('does not change query if a valid query is already selected', async () => { + it('retains execution query when the list is changed', async () => { const queries = { list: [ { key: 'foo', name: 'Foo', body: 'SELECT * FROM foo' }, @@ -1063,8 +1105,7 @@ describe('containers/Bucket/Queries/Athena/model/requests', () => { { list: [ { key: 'baz', name: 'Baz', body: 'SELECT * FROM baz' }, - { key: 'foo', name: 'Foo', body: 'SELECT * FROM foo' }, - { key: 'bar', name: 'Bar', body: 'SELECT * FROM bar' }, + ...queries.list, ], }, execution, @@ -1077,6 +1118,45 @@ describe('containers/Bucket/Queries/Athena/model/requests', () => { throw new Error('No data') } }) + + it('does not change query when list is updated if a valid query is already selected', async () => { + const queries = { + list: [ + { key: 'foo', name: 'Foo', body: 'SELECT * FROM foo' }, + { key: 'bar', name: 'Bar', body: 'SELECT * FROM bar' }, + ], + } + const execution = null + const { result, rerender, waitForNextUpdate } = renderHook( + (props: Parameters) => useWrapper(props), + { + initialProps: [queries, execution], + }, + ) + + if (Model.hasData(result.current.value)) { + expect(result.current.value.body).toBe('SELECT * FROM foo') + } else { + throw new Error('No data') + } + await act(async () => { + rerender([ + { + list: [ + { key: 'baz', name: 'Baz', body: 'SELECT * FROM baz' }, + ...queries.list, + ], + }, + execution, + ]) + await waitForNextUpdate() + }) + if (Model.hasData(result.current.value)) { + expect(result.current.value.body).toBe('SELECT * FROM foo') + } else { + throw new Error('No data') + } + }) }) describe('useQueryBody', () => { @@ -1086,7 +1166,7 @@ describe('containers/Bucket/Queries/Athena/model/requests', () => { it('sets query body from query if query is ready', () => { const query = { name: 'Foo', key: 'foo', body: 'SELECT * FROM foo' } - const execution = {} + const execution = null const setQuery = jest.fn() const { result } = renderHook(() => useWrapper([query, setQuery, execution])) @@ -1098,7 +1178,7 @@ describe('containers/Bucket/Queries/Athena/model/requests', () => { } }) - it('sets query body from execution if query is not ready', () => { + it('sets query body from execution if query is not selected', () => { const query = null const execution = { query: 'SELECT * FROM bar' } const setQuery = jest.fn() @@ -1127,8 +1207,8 @@ describe('containers/Bucket/Queries/Athena/model/requests', () => { }) it('does not change value if query and execution are both not ready', async () => { - const query = null - const execution = null + const query = undefined + const execution = undefined const setQuery = jest.fn() const { result, rerender, waitForNextUpdate } = renderHook( @@ -1139,11 +1219,15 @@ describe('containers/Bucket/Queries/Athena/model/requests', () => { ) expect(result.current.value).toBeUndefined() + // That's not possible from UI now, + // but let's pretend UI is ready to handle user input act(() => { result.current.setValue('foo') }) expect(result.current.value).toBe('foo') + // We rerenderd hook but internal useEffect didn't rewrite the value + // to `undefined` as it was supposed to do on the first render await act(async () => { rerender([query, setQuery, execution]) await waitForNextUpdate() @@ -1166,7 +1250,7 @@ describe('containers/Bucket/Queries/Athena/model/requests', () => { expect(setQuery).toHaveBeenCalledWith(null) }) - it('retains value when execution and query are initially empty but later updates', async () => { + it('obtains value when execution and query are initially empty but later update', async () => { const initialQuery = null const initialExecution = null const setQuery = jest.fn() @@ -1178,8 +1262,10 @@ describe('containers/Bucket/Queries/Athena/model/requests', () => { }, ) - expect(result.current.value).toBeUndefined() + expect(result.current.value).toBeNull() + // Query was loaded with some value + // Execution is ready but it's still null await act(async () => { rerender([ { key: 'up', name: 'Updated', body: 'SELECT * FROM updated' }, @@ -1195,5 +1281,68 @@ describe('containers/Bucket/Queries/Athena/model/requests', () => { throw new Error('No data') } }) + + it('sets query body to null if query is null after being loaded', async () => { + const initialQuery = Model.Loading + const initialExecution = null + const setQuery = jest.fn() + + const { result, rerender, waitForNextUpdate } = renderHook( + (props: Parameters) => useWrapper(props), + { + initialProps: [ + initialQuery as Model.Value, + setQuery, + initialExecution, + ], + }, + ) + + expect(result.current.value).toBe(Model.Loading) + + await act(async () => { + rerender([null, setQuery, initialExecution]) + await waitForNextUpdate() + }) + + if (Model.hasValue(result.current.value)) { + expect(result.current.value).toBeNull() + } else { + throw new Error('Unexpected state') + } + }) + + it('retains value if selected query is null and we switch from some execution', async () => { + // That's not ideal, + // but we don't know what chanded the query body: execution page or user. + // So, at least, it is documented here. + const initialQuery = null + const initialExecution = { id: 'any', query: 'SELECT * FROM updated' } + const setQuery = jest.fn() + + const { result, rerender, waitForNextUpdate } = renderHook( + (props: Parameters) => useWrapper(props), + { + initialProps: [ + initialQuery as Model.Value, + setQuery, + initialExecution, + ], + }, + ) + + expect(result.current.value).toBe('SELECT * FROM updated') + + await act(async () => { + rerender([initialQuery, setQuery, null]) + await waitForNextUpdate() + }) + + if (Model.hasValue(result.current.value)) { + expect(result.current.value).toBe('SELECT * FROM updated') + } else { + throw new Error('Unexpected state') + } + }) }) }) diff --git a/catalog/app/containers/Bucket/Queries/Athena/model/requests.ts b/catalog/app/containers/Bucket/Queries/Athena/model/requests.ts index 227e6ecbde2..8da2450012b 100644 --- a/catalog/app/containers/Bucket/Queries/Athena/model/requests.ts +++ b/catalog/app/containers/Bucket/Queries/Athena/model/requests.ts @@ -630,6 +630,9 @@ export function useQueryBody( if (Model.isError(query)) return null if (Model.hasData(query)) return query.body if (Model.hasData(execution) && execution.query) return execution.query + if (!Model.isReady(v) && Model.isReady(query) && Model.isReady(execution)) { + return null + } return v }) }, [execution, query]) diff --git a/catalog/app/containers/Bucket/Successors.tsx b/catalog/app/containers/Bucket/Successors.tsx index 3a0e5449b34..5fc1a51cc40 100644 --- a/catalog/app/containers/Bucket/Successors.tsx +++ b/catalog/app/containers/Bucket/Successors.tsx @@ -28,7 +28,7 @@ function EmptySlot({ bucket }: EmptySlotProps) { Learn more @@ -52,7 +52,7 @@ function ErrorSlot({ error }: ErrorSlotProps) { {error instanceof ERRORS.WorkflowsConfigInvalid && ( Please fix the workflows config according to{' '} - + the documentation diff --git a/catalog/app/containers/Bucket/Summarize.tsx b/catalog/app/containers/Bucket/Summarize.tsx index e644215263c..ebc2116b441 100644 --- a/catalog/app/containers/Bucket/Summarize.tsx +++ b/catalog/app/containers/Bucket/Summarize.tsx @@ -258,7 +258,7 @@ interface FilePreviewProps { expanded?: boolean file?: SummarizeFile handle: LogicalKeyResolver.S3SummarizeHandle - headingOverride: React.ReactNode + headingOverride?: React.ReactNode packageHandle?: PackageHandle } @@ -270,7 +270,7 @@ export function FilePreview({ packageHandle, }: FilePreviewProps) { const description = file?.description ? : null - const heading = headingOverride != null ? headingOverride : + const heading = headingOverride ?? const key = handle.logicalKey || handle.key const props = React.useMemo(() => Preview.getRenderProps(key, file), [key, file]) @@ -566,7 +566,7 @@ interface SummaryRootProps { s3: S3 bucket: string inStack: boolean - overviewUrl: string + overviewUrl?: string | null } export function SummaryRoot({ s3, bucket, inStack, overviewUrl }: SummaryRootProps) { @@ -618,7 +618,9 @@ function SummaryFailed({ error }: SummaryFailedProps) { Check your quilt_summarize.json file for errors. See the{' '} - + summarize docs {' '} for more. diff --git a/catalog/app/containers/Bucket/errors.tsx b/catalog/app/containers/Bucket/errors.tsx index fd45d3399ca..5a91f621431 100644 --- a/catalog/app/containers/Bucket/errors.tsx +++ b/catalog/app/containers/Bucket/errors.tsx @@ -124,7 +124,7 @@ const defaultHandlers: ErrorHandler[] = [
Learn how to configure the bucket for Quilt @@ -167,7 +167,7 @@ const defaultHandlers: ErrorHandler[] = [
Learn about access control in Quilt diff --git a/catalog/app/containers/Bucket/requests/requestsUntyped.js b/catalog/app/containers/Bucket/requests/requestsUntyped.js index 2ba9722da61..5efb639049f 100644 --- a/catalog/app/containers/Bucket/requests/requestsUntyped.js +++ b/catalog/app/containers/Bucket/requests/requestsUntyped.js @@ -1,7 +1,6 @@ import { join as pathJoin } from 'path' -import * as dateFns from 'date-fns' -import * as FP from 'fp-ts' +import * as Eff from 'effect' import sampleSize from 'lodash/fp/sampleSize' import * as R from 'ramda' @@ -9,7 +8,6 @@ import quiltSummarizeSchema from 'schemas/quilt_summarize.json' import { SUPPORTED_EXTENSIONS as IMG_EXTS } from 'components/Thumbnail' import * as quiltConfigs from 'constants/quiltConfigs' -import cfg from 'constants/config' import * as Resource from 'utils/Resource' import { makeSchemaValidator } from 'utils/json-schema' import mkSearch from 'utils/mkSearch' @@ -24,106 +22,6 @@ import { decodeS3Key } from './utils' const promiseProps = (obj) => Promise.all(Object.values(obj)).then(R.zipObj(Object.keys(obj))) -const MAX_BANDS = 10 - -export const bucketAccessCounts = async ({ s3, bucket, today, window }) => { - if (!cfg.analyticsBucket) - throw new Error('bucketAccessCounts: "analyticsBucket" required') - - const dates = R.unfold( - (daysLeft) => daysLeft >= 0 && [dateFns.subDays(today, daysLeft), daysLeft - 1], - window, - ) - - try { - const result = await s3Select({ - s3, - Bucket: cfg.analyticsBucket, - Key: `${ACCESS_COUNTS_PREFIX}/Exts.csv`, - Expression: ` - SELECT ext, counts FROM s3object - WHERE eventname = 'GetObject' - AND bucket = '${sqlEscape(bucket)}' - `, - InputSerialization: { - CSV: { - FileHeaderInfo: 'Use', - AllowQuotedRecordDelimiter: true, - }, - }, - }) - return FP.function.pipe( - result, - R.map((r) => { - const recordedCounts = JSON.parse(r.counts) - const { counts, total } = dates.reduce( - (acc, date) => { - const value = recordedCounts[dateFns.format(date, 'yyyy-MM-dd')] || 0 - const sum = acc.total + value - return { - total: sum, - counts: acc.counts.concat({ date, value, sum }), - } - }, - { total: 0, counts: [] }, - ) - return { ext: r.ext && `.${r.ext}`, total, counts } - }), - R.filter((i) => i.total), - R.sort(R.descend(R.prop('total'))), - R.applySpec({ - byExt: R.identity, - byExtCollapsed: (bands) => { - if (bands.length <= MAX_BANDS) return bands - const [other, rest] = R.partition((b) => b.ext === '', bands) - const [toKeep, toMerge] = R.splitAt(MAX_BANDS - 1, rest) - const merged = [...other, ...toMerge].reduce((acc, band) => ({ - ext: '', - total: acc.total + band.total, - counts: R.zipWith( - (a, b) => ({ - date: a.date, - value: a.value + b.value, - sum: a.sum + b.sum, - }), - acc.counts, - band.counts, - ), - })) - return R.sort(R.descend(R.prop('total')), toKeep.concat(merged)) - }, - combined: { - total: R.reduce((sum, { total }) => sum + total, 0), - counts: R.pipe( - R.pluck('counts'), - R.transpose, - R.map( - R.reduce( - (acc, { date, value, sum }) => ({ - date, - value: acc.value + value, - sum: acc.sum + sum, - }), - { value: 0, sum: 0 }, - ), - ), - ), - }, - }), - ) - } catch (e) { - // eslint-disable-next-line no-console - console.log('Unable to fetch bucket access counts:') - // eslint-disable-next-line no-console - console.error(e) - return { - byExt: [], - byExtCollapsed: [], - combined: { total: 0, counts: [] }, - } - } -} - const parseDate = (d) => d && new Date(d) const getOverviewBucket = (url) => s3paths.parseS3Url(url).bucket @@ -373,7 +271,7 @@ export const bucketSummary = async ({ s3, req, bucket, overviewUrl, inStack }) = Key: getOverviewKey(overviewUrl, 'summary.json'), }) .promise() - return FP.function.pipe( + return Eff.pipe( JSON.parse(r.Body.toString('utf-8')), R.pathOr([], ['aggregations', 'other', 'keys', 'buckets']), R.map((b) => ({ @@ -403,7 +301,7 @@ export const bucketSummary = async ({ s3, req, bucket, overviewUrl, inStack }) = try { const qs = mkSearch({ action: 'sample', index: bucket }) const result = await req(`/search${qs}`) - return FP.function.pipe( + return Eff.pipe( result, R.pathOr([], ['aggregations', 'objects', 'buckets']), R.map((h) => { @@ -425,7 +323,7 @@ export const bucketSummary = async ({ s3, req, bucket, overviewUrl, inStack }) = const result = await s3 .listObjectsV2({ Bucket: bucket, EncodingType: 'url' }) .promise() - return FP.function.pipe( + return Eff.pipe( result, R.path(['Contents']), R.map(R.evolve({ Key: decodeS3Key })), @@ -477,7 +375,7 @@ export const bucketImgs = async ({ req, s3, bucket, overviewUrl, inStack }) => { Key: getOverviewKey(overviewUrl, 'summary.json'), }) .promise() - return FP.function.pipe( + return Eff.pipe( JSON.parse(r.Body.toString('utf-8')), R.pathOr([], ['aggregations', 'images', 'keys', 'buckets']), R.map((b) => ({ @@ -498,7 +396,7 @@ export const bucketImgs = async ({ req, s3, bucket, overviewUrl, inStack }) => { try { const qs = mkSearch({ action: 'images', index: bucket }) const result = await req(`/search${qs}`) - return FP.function.pipe( + return Eff.pipe( result, R.pathOr([], ['aggregations', 'objects', 'buckets']), R.map((h) => { @@ -519,7 +417,7 @@ export const bucketImgs = async ({ req, s3, bucket, overviewUrl, inStack }) => { const result = await s3 .listObjectsV2({ Bucket: bucket, EncodingType: 'url' }) .promise() - return FP.function.pipe( + return Eff.pipe( result, R.path(['Contents']), R.map(R.evolve({ Key: decodeS3Key })), @@ -656,8 +554,6 @@ export const summarize = async ({ s3, handle: inputHandle, resolveLogicalKey }) } } -const MANIFESTS_PREFIX = '.quilt/packages/' - const withCalculatedRevisions = (s) => ({ scripted_metric: { init_script: ` @@ -712,113 +608,33 @@ export const countPackageRevisions = ({ req, bucket, name }) => .then(R.path(['aggregations', 'revisions', 'value'])) .catch(errors.catchErrors()) -// TODO: Preview endpoint only allows up to 512 lines right now. Increase it to 1000. -const MAX_PACKAGE_ENTRIES = 500 - -// TODO: remove -export const getRevisionData = async ({ - endpoint, - sign, - bucket, - hash, - maxKeys = MAX_PACKAGE_ENTRIES, -}) => { - const url = sign({ bucket, key: `${MANIFESTS_PREFIX}${hash}` }) - const maxLines = maxKeys + 2 // 1 for the meta and 1 for checking overflow - const r = await fetch( - `${endpoint}/preview?url=${encodeURIComponent(url)}&input=txt&line_count=${maxLines}`, - ) - const [header, ...entries] = await r - .json() - .then((json) => json.info.data.head.map((l) => JSON.parse(l))) - const files = Math.min(maxKeys, entries.length) - const bytes = entries.slice(0, maxKeys).reduce((sum, i) => sum + i.size, 0) - const truncated = entries.length > maxKeys - return { - stats: { files, bytes, truncated }, - message: header.message, - header, - } -} - -const s3Select = ({ - s3, - ExpressionType = 'SQL', - InputSerialization = { JSON: { Type: 'LINES' } }, - ...rest -}) => - s3 - .selectObjectContent({ - ExpressionType, - InputSerialization, - OutputSerialization: { JSON: {} }, - ...rest, - }) - .promise() - .then( - R.pipe( - R.prop('Payload'), - R.reduce((acc, evt) => { - if (!evt.Records) return acc - const s = evt.Records.Payload.toString() - return acc + s - }, ''), - R.trim, - R.ifElse(R.isEmpty, R.always([]), R.pipe(R.split('\n'), R.map(JSON.parse))), - ), - ) - -const sqlEscape = (arg) => arg.replace(/'/g, "''") +// const MANIFESTS_PREFIX = '.quilt/packages/' -const ACCESS_COUNTS_PREFIX = 'AccessCounts' - -const queryAccessCounts = async ({ s3, type, query, today, window = 365 }) => { - try { - const records = await s3Select({ - s3, - Bucket: cfg.analyticsBucket, - Key: `${ACCESS_COUNTS_PREFIX}/${type}.csv`, - Expression: query, - InputSerialization: { - CSV: { - FileHeaderInfo: 'Use', - AllowQuotedRecordDelimiter: true, - }, - }, - }) - - const recordedCounts = records.length ? JSON.parse(records[0].counts) : {} - - const counts = R.times((i) => { - const date = dateFns.subDays(today, window - i - 1) - return { - date, - value: recordedCounts[dateFns.format(date, 'yyyy-MM-dd')] || 0, - } - }, window) - - const total = Object.values(recordedCounts).reduce(R.add, 0) - - return { counts, total } - } catch (e) { - // eslint-disable-next-line no-console - console.log('queryAccessCounts: error caught') - // eslint-disable-next-line no-console - console.error(e) - throw e - } -} - -export const objectAccessCounts = ({ s3, bucket, path, today }) => - queryAccessCounts({ - s3, - type: 'Objects', - query: ` - SELECT counts FROM s3object - WHERE eventname = 'GetObject' - AND bucket = '${sqlEscape(bucket)}' - AND "key" = '${sqlEscape(path)}' - `, - today, - window: 365, - }) +// TODO: Preview endpoint only allows up to 512 lines right now. Increase it to 1000. +// const MAX_PACKAGE_ENTRIES = 500 + +// TODO: remove: used in a comented-out code in PackageList +// export const getRevisionData = async ({ +// endpoint, +// sign, +// bucket, +// hash, +// maxKeys = MAX_PACKAGE_ENTRIES, +// }) => { +// const url = sign({ bucket, key: `${MANIFESTS_PREFIX}${hash}` }) +// const maxLines = maxKeys + 2 // 1 for the meta and 1 for checking overflow +// const r = await fetch( +// `${endpoint}/preview?url=${encodeURIComponent(url)}&input=txt&line_count=${maxLines}`, +// ) +// const [header, ...entries] = await r +// .json() +// .then((json) => json.info.data.head.map((l) => JSON.parse(l))) +// const files = Math.min(maxKeys, entries.length) +// const bytes = entries.slice(0, maxKeys).reduce((sum, i) => sum + i.size, 0) +// const truncated = entries.length > maxKeys +// return { +// stats: { files, bytes, truncated }, +// message: header.message, +// header, +// } +// } diff --git a/catalog/app/containers/NavBar/Suggestions/Suggestions.tsx b/catalog/app/containers/NavBar/Suggestions/Suggestions.tsx index 4fa912fe30d..d459995f009 100644 --- a/catalog/app/containers/NavBar/Suggestions/Suggestions.tsx +++ b/catalog/app/containers/NavBar/Suggestions/Suggestions.tsx @@ -61,7 +61,10 @@ function SuggestionsList({ items, selected }: SuggestionsProps) { ))}
Learn the{' '} - + advanced search syntax {' '} for query string queries in ElasticSearch {ES_V}. diff --git a/catalog/app/embed/File.js b/catalog/app/embed/File.js index bc47739202b..247a19ff163 100644 --- a/catalog/app/embed/File.js +++ b/catalog/app/embed/File.js @@ -1,7 +1,5 @@ import { basename } from 'path' -import * as dateFns from 'date-fns' -import * as R from 'ramda' import * as React from 'react' import { Link, useLocation, useParams } from 'react-router-dom' import * as M from '@material-ui/core' @@ -9,22 +7,21 @@ import * as M from '@material-ui/core' import * as BreadCrumbs from 'components/BreadCrumbs' import Message from 'components/Message' import * as Preview from 'components/Preview' -import Sparkline from 'components/Sparkline' import cfg from 'constants/config' import * as Notifications from 'containers/Notifications' import * as AWS from 'utils/AWS' import AsyncResult from 'utils/AsyncResult' import { useData } from 'utils/Data' import * as NamedRoutes from 'utils/NamedRoutes' -import * as SVG from 'utils/SVG' import { linkStyle } from 'utils/StyledLink' import copyToClipboard from 'utils/clipboard' import * as Format from 'utils/format' import parseSearch from 'utils/parseSearch' import * as s3paths from 'utils/s3paths' -import { readableBytes, readableQuantity } from 'utils/string' +import { readableBytes } from 'utils/string' import FileCodeSamples from 'containers/Bucket/CodeSamples/File' +import Analytics from 'containers/Bucket/File/Analytics' import FileProperties from 'containers/Bucket/FileProperties' import * as FileView from 'containers/Bucket/FileView' import Section from 'containers/Bucket/Section' @@ -229,74 +226,6 @@ function VersionInfo({ bucket, path, version }) { ) } -function Analytics({ bucket, path }) { - const [cursor, setCursor] = React.useState(null) - const s3 = AWS.S3.use() - const today = React.useMemo(() => new Date(), []) - const formatDate = (date) => - dateFns.format( - date, - today.getFullYear() === date.getFullYear() ? 'd MMM' : 'd MMM yyyy', - ) - const data = useData(requests.objectAccessCounts, { - s3, - bucket, - path, - today, - }) - - const defaultExpanded = data.case({ - Ok: ({ total }) => !!total, - _: () => false, - }) - - return ( -
- {data.case({ - Ok: ({ counts, total }) => - total ? ( - - - Downloads - - {readableQuantity(cursor === null ? total : counts[cursor].value)} - - - {cursor === null - ? `${counts.length} days` - : formatDate(counts[cursor].date)} - - - - - - - , - )} - /> - - - ) : ( - No analytics available - ), - Err: () => No analytics available, - _: () => , - })} -
- ) -} - function CenteredProgress() { return ( diff --git a/catalog/app/model/graphql/schema.generated.ts b/catalog/app/model/graphql/schema.generated.ts index ba8ed87e3fd..be04791e97a 100644 --- a/catalog/app/model/graphql/schema.generated.ts +++ b/catalog/app/model/graphql/schema.generated.ts @@ -82,6 +82,41 @@ export default { ], interfaces: [], }, + { + kind: 'OBJECT', + name: 'AccessCountsGroup', + fields: [ + { + name: 'ext', + type: { + kind: 'NON_NULL', + ofType: { + kind: 'SCALAR', + name: 'String', + ofType: null, + }, + }, + args: [], + }, + { + name: 'counts', + type: { + kind: 'NON_NULL', + ofType: { + kind: 'OBJECT', + name: 'AccessCounts', + ofType: null, + }, + }, + args: [], + }, + ], + interfaces: [], + }, + { + kind: 'SCALAR', + name: 'String', + }, { kind: 'OBJECT', name: 'AdminMutations', @@ -208,10 +243,6 @@ export default { ], interfaces: [], }, - { - kind: 'SCALAR', - name: 'String', - }, { kind: 'OBJECT', name: 'AdminQueries', @@ -365,6 +396,52 @@ export default { }, ], }, + { + kind: 'OBJECT', + name: 'BucketAccessCounts', + fields: [ + { + name: 'byExt', + type: { + kind: 'NON_NULL', + ofType: { + kind: 'LIST', + ofType: { + kind: 'NON_NULL', + ofType: { + kind: 'OBJECT', + name: 'AccessCountsGroup', + ofType: null, + }, + }, + }, + }, + args: [ + { + name: 'groups', + type: { + kind: 'SCALAR', + name: 'Int', + ofType: null, + }, + }, + ], + }, + { + name: 'combined', + type: { + kind: 'NON_NULL', + ofType: { + kind: 'OBJECT', + name: 'AccessCounts', + ofType: null, + }, + }, + args: [], + }, + ], + interfaces: [], + }, { kind: 'UNION', name: 'BucketAddResult', @@ -4188,6 +4265,81 @@ export default { }, args: [], }, + { + name: 'bucketAccessCounts', + type: { + kind: 'OBJECT', + name: 'BucketAccessCounts', + ofType: null, + }, + args: [ + { + name: 'bucket', + type: { + kind: 'NON_NULL', + ofType: { + kind: 'SCALAR', + name: 'String', + ofType: null, + }, + }, + }, + { + name: 'window', + type: { + kind: 'NON_NULL', + ofType: { + kind: 'SCALAR', + name: 'Int', + ofType: null, + }, + }, + }, + ], + }, + { + name: 'objectAccessCounts', + type: { + kind: 'OBJECT', + name: 'AccessCounts', + ofType: null, + }, + args: [ + { + name: 'bucket', + type: { + kind: 'NON_NULL', + ofType: { + kind: 'SCALAR', + name: 'String', + ofType: null, + }, + }, + }, + { + name: 'key', + type: { + kind: 'NON_NULL', + ofType: { + kind: 'SCALAR', + name: 'String', + ofType: null, + }, + }, + }, + { + name: 'window', + type: { + kind: 'NON_NULL', + ofType: { + kind: 'SCALAR', + name: 'Int', + ofType: null, + }, + }, + }, + ], + }, { name: 'admin', type: { diff --git a/catalog/app/model/graphql/types.generated.ts b/catalog/app/model/graphql/types.generated.ts index 8ad7b159639..fb5d1b2a862 100644 --- a/catalog/app/model/graphql/types.generated.ts +++ b/catalog/app/model/graphql/types.generated.ts @@ -36,6 +36,12 @@ export interface AccessCounts { readonly counts: ReadonlyArray } +export interface AccessCountsGroup { + readonly __typename: 'AccessCountsGroup' + readonly ext: Scalars['String'] + readonly counts: AccessCounts +} + export interface AdminMutations { readonly __typename: 'AdminMutations' readonly user: UserAdminMutations @@ -89,6 +95,16 @@ export type BrowsingSessionDisposeResult = Ok | OperationError export type BrowsingSessionRefreshResult = BrowsingSession | InvalidInput | OperationError +export interface BucketAccessCounts { + readonly __typename: 'BucketAccessCounts' + readonly byExt: ReadonlyArray + readonly combined: AccessCounts +} + +export interface BucketAccessCountsbyExtArgs { + groups: Maybe +} + export interface BucketAddInput { readonly name: Scalars['String'] readonly title: Scalars['String'] @@ -864,6 +880,8 @@ export interface Query { readonly searchMoreObjects: ObjectsSearchMoreResult readonly searchMorePackages: PackagesSearchMoreResult readonly subscription: SubscriptionState + readonly bucketAccessCounts: Maybe + readonly objectAccessCounts: Maybe readonly admin: AdminQueries readonly policies: ReadonlyArray readonly policy: Maybe @@ -910,6 +928,17 @@ export interface QuerysearchMorePackagesArgs { size?: Maybe } +export interface QuerybucketAccessCountsArgs { + bucket: Scalars['String'] + window: Scalars['Int'] +} + +export interface QueryobjectAccessCountsArgs { + bucket: Scalars['String'] + key: Scalars['String'] + window: Scalars['Int'] +} + export interface QuerypolicyArgs { id: Scalars['ID'] } diff --git a/catalog/app/utils/AWS/S3.js b/catalog/app/utils/AWS/S3.js index a51c29b5566..6b052a52927 100644 --- a/catalog/app/utils/AWS/S3.js +++ b/catalog/app/utils/AWS/S3.js @@ -43,44 +43,28 @@ function useSmartS3() { return useConstant(() => { class SmartS3 extends S3 { - getReqType(req) { + shouldSign(req) { const bucket = req.params.Bucket if (cfg.mode === 'LOCAL') { - return 'signed' + return true } - if (isAuthenticated()) { - if ( - // sign if operation is not bucket-specific - // (not sure if there are any such operations that can be used from the browser) - !bucket || - cfg.analyticsBucket === bucket || + if ( + isAuthenticated() && + // sign if operation is not bucket-specific + // (not sure if there are any such operations that can be used from the browser) + (!bucket || cfg.serviceBucket === bucket || statusReportsBucket === bucket || - (cfg.mode !== 'OPEN' && isInStack(bucket)) - ) { - return 'signed' - } - } else if (req.operation === 'selectObjectContent') { - return 'select' + (cfg.mode !== 'OPEN' && isInStack(bucket))) + ) { + return true } - return 'unsigned' - } - - populateURI(req) { - if (req.service.getReqType(req) === 'select') { - return - } - super.populateURI(req) + return false } customRequestHandler(req) { - const b = req.params.Bucket - const type = this.getReqType(req) - - if (b) { - const endpoint = new AWS.Endpoint( - type === 'select' ? `${cfg.apiGatewayEndpoint}/s3select/` : cfg.s3Proxy, - ) + if (req.params.Bucket) { + const endpoint = new AWS.Endpoint(cfg.s3Proxy) req.on('sign', () => { if (req.httpRequest[PRESIGN]) return @@ -96,10 +80,7 @@ function useSmartS3() { const basePath = endpoint.path.replace(/\/$/, '') req.httpRequest.endpoint = endpoint - req.httpRequest.path = - type === 'select' - ? `${basePath}${origPath}` - : `${basePath}/${origEndpoint.host}${origPath}` + req.httpRequest.path = `${basePath}/${origEndpoint.host}${origPath}` }) req.on( 'retry', @@ -138,9 +119,8 @@ function useSmartS3() { if (forceProxy) { req.httpRequest[FORCE_PROXY] = true } - const type = this.getReqType(req) - if (type !== 'signed') { + if (!this.shouldSign(req)) { req.toUnauthenticated() } diff --git a/catalog/app/utils/AWS/Signer.js b/catalog/app/utils/AWS/Signer.js index 0c0c24b1ac8..404fe0f4d73 100644 --- a/catalog/app/utils/AWS/Signer.js +++ b/catalog/app/utils/AWS/Signer.js @@ -25,7 +25,7 @@ export function useS3Signer({ urlExpiration: exp, forceProxy = false } = {}) { const statusReportsBucket = useStatusReportsBucket() const s3 = S3.use() const inStackOrSpecial = React.useCallback( - (b) => isInStack(b) || cfg.analyticsBucket === b || statusReportsBucket === b, + (b) => isInStack(b) || statusReportsBucket === b, [isInStack, statusReportsBucket], ) return React.useCallback( diff --git a/catalog/app/utils/GraphQL/Provider.tsx b/catalog/app/utils/GraphQL/Provider.tsx index 592b71e58e6..05c34cd7238 100644 --- a/catalog/app/utils/GraphQL/Provider.tsx +++ b/catalog/app/utils/GraphQL/Provider.tsx @@ -90,6 +90,8 @@ export default function GraphQLProvider({ children }: React.PropsWithChildren<{} keys: { AccessCountForDate: () => null, AccessCounts: () => null, + AccessCountsGroup: () => null, + BucketAccessCounts: () => null, BucketConfig: (b) => b.name as string, Canary: (c) => c.name as string, Collaborator: (c) => c.username as string, diff --git a/docs/Catalog/SearchQuery.md b/docs/Catalog/SearchQuery.md index 4b51655b4a4..1f4aaa7951e 100644 --- a/docs/Catalog/SearchQuery.md +++ b/docs/Catalog/SearchQuery.md @@ -128,10 +128,13 @@ run them. You must first set up you an Athena workgroup and Saved queries per [AWS's Athena documentation](https://docs.aws.amazon.com/athena/latest/ug/getting-started.html). ### Configuration -You can hide the "Queries" tab by setting `ui > nav > queries: false` ([learn more](./Preferences.md)). +You can hide the "Queries" tab by setting `ui > nav > queries: false`. +It is also possible to set the default workgroup in `ui > athena > defaultWorkgroup: 'your-default-workgroup'`. +[Learn more](./Preferences.md). + +The tab will remember the last workgroup, catalog name and database that was selected. ### Basics "Run query" executes the selected query and waits for the result. -![](../imgs/athena-ui.png) -![](../imgs/athena-history.png) +![Athena page](../imgs/athena-ui.png) diff --git a/docs/README.md b/docs/README.md index eb5afe0ba28..0582c038cb3 100644 --- a/docs/README.md +++ b/docs/README.md @@ -7,6 +7,30 @@ data integrity at scale. --- +## How to Get Started + +Quilt consists of three main elements: + +- [Quilt Platform](#quilt-platform-overview) which is a cloud platform for + interacting with, visualizing, searching and querying Quilt Packages, which is + hosted in an organization's AWS Account. +- [Quilt Python SDK](#quilt-python-sdk) which provides the ability to create, + push, install and delete Quilt Packages. +- [Quilt Ecosystem](#quilt-ecosystem-and-integrations) which provide extension + of the core Quilt Capabilities to enable typical elements of life sciences + workflows, such as incorporating orchestration data, and connecting packages + to Electronic Lab Notebooks. + +To dive deeper into the capabilities of Quilt, start with our [Quick Start +Guide](Quickstart.md) or explore the [Installation +Instructions](Installation.md) for setting up your environment. + +If you have any questions or need help, join our [Slack +community](https://slack.quiltdata.com/) or submit a support request to +. + +--- + ## Navigating the Documentation The Quilt documentation is structured to guide users through different layers of @@ -24,8 +48,7 @@ capabilities like embeddable previews and metadata collection. **Core Sections:** - [Architecture](Architecture.md) - Learn how Quilt is architected. -- [Mental Model](MentalModel.md) - Understand the guiding principles behind - Quilt. +- [Mental Model](MentalModel.md) - Understand the guiding principles behind Quilt. - [Metadata Management](Catalog/Metadata.md) - Manage metadata at scale. For users of the Quilt Platform (often referred to as the Catalog): @@ -40,11 +63,9 @@ For users of the Quilt Platform (often referred to as the Catalog): For administrators managing Quilt deployments: -- [Admin Settings UI](Catalog/Admin.md) - Control platform settings and user - access. +- [Admin Settings UI](Catalog/Admin.md) - Control platform settings and user access. - [Catalog Configuration](Catalog/Preferences.md) - Set platform preferences. -- [Cross-Account Access](CrossAccount.md) - Manage multi-account access to S3 - data. +- [Cross-Account Access](CrossAccount.md) - Manage multi-account access to S3 data. ### Quilt Python SDK @@ -58,8 +79,7 @@ flexibility needed for deeper integrations. managing data packages. - [Editing and Uploading Packages](walkthrough/editing-a-package.md) - Learn how to version, edit, and share data. -- [API Reference](api-reference/api.md) - Detailed API documentation for - developers. +- [API Reference](api-reference/api.md) - Detailed API documentation for developers. ### Quilt Ecosystem and Integrations @@ -67,9 +87,8 @@ The **Quilt Ecosystem** extends the platform with integrations and plugins to fit your workflow. Whether you're managing scientific data or automating packaging tasks, Quilt can be tailored to your needs with these tools: -- [Benchling - Packager](https://open.quiltdata.com/b/quilt-example/packages/examples/benchling-packager) - - Package biological data from Benchling. +- [Benchling Packager](examples/benchling.md) - Package electronic lab notebooks + from Benchling. - [Nextflow Plugin](examples/nextflow.md) - Integrate with Nextflow pipelines for bioinformatics. @@ -89,18 +108,7 @@ administrator, Quilt helps streamline your data management workflows. better insights. - **Discover**: Use metadata and search tools to explore data relationships across projects. -- **Model**: Version and manage large data sets that don't fit traditional git - repositories. +- **Model**: Version and manage large data sets that don't fit traditional git repositories. - **Decide**: Empower your team with auditable data for better decision-making. --- - -## How to Get Started - -To dive deeper into the capabilities of Quilt, start with our [Quick Start -Guide](Quickstart.md) or explore the [Installation -Instructions](Installation.md) for setting up your environment. - -If you have any questions or need help, join our [Slack -community](https://slack.quiltdata.com/) or visit our full [documentation -site](https://docs.quiltdata.com/). diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 0f1ca68efd6..43fe43fffa6 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -38,13 +38,7 @@ * [GxP for Security & Compliance](advanced-features/good-practice.md) * [Organizing S3 Buckets](advanced-features/s3-bucket-organization.md) -## Quilt Ecosystem Integrations - -* [Benchling Packager](https://open.quiltdata.com/b/quilt-example/packages/examples/benchling-packager) -* [Event-Driven Packaging](advanced-features/event-driven-packaging.md) -* [Nextflow Plugin](examples/nextflow.md) - -## Quilt Python SDK Developers +## Quilt Python SDK * [Installation](Installation.md) * [Quick Start](Quickstart.md) @@ -73,3 +67,9 @@ * [Contributing](CONTRIBUTING.md) * [Frequently Asked Questions](FAQ.md) * [Troubleshooting](Troubleshooting.md) + +## Quilt Ecosystem Integrations + +* [Benchling Packager](examples/benchling.md) +* [Event-Driven Packaging](advanced-features/event-driven-packaging.md) +* [Nextflow Plugin](examples/nextflow.md) diff --git a/docs/advanced-features/athena.md b/docs/advanced-features/athena.md index 993e8448a58..cb61b2d9312 100644 --- a/docs/advanced-features/athena.md +++ b/docs/advanced-features/athena.md @@ -1,4 +1,5 @@ + # Querying package metadata with Athena Quilt stores package data and metadata in S3. Metadata lives in a per-package manifest file in a each bucket's `.quilt/` directory. @@ -9,9 +10,11 @@ using predicates based on package or object-level metadata. Packages can be created from the resulting tabular data. To be able to create a package, -the table must contain the columns `logical_key`, `physical_keys` and `size` as shown below. +the table must contain the columns `logical_key`, `physical_keys` (or `physical_key`) and `size`. (See also [Mental Model](https://docs.quiltdata.com/mentalmodel)) +![Athena page with results ready to be packaged](../imgs/athena-package.png) + ## Defining package tables and views in Athena > This step is not required for users of Quilt enterprise, since tables and views diff --git a/docs/examples/benchling.md b/docs/examples/benchling.md new file mode 100644 index 00000000000..68a0a5a56bb --- /dev/null +++ b/docs/examples/benchling.md @@ -0,0 +1,32 @@ + +The Benchling Packager is a lambda you can deploy in your own AWS private cloud +to process [Benchling](https://benchling.com/) events in order to create (and +link back, if possible) a dedicated [Quilt](https://quiltdata.com/) package for +every Benchling notebook. + +The CloudFormation template is available as a package on +[open.quiltdata.com](https://open.quiltdata.com/b/quilt-example/packages/examples/benchling-packager). + +## Prerequisites + +In order to install the benchling packager, you will need to know, and have +administrative access to: + +- Your Benchling tenant domain (e.g., `` from + `.benchling.com`), for ßconfiguring event subscriptions and + metadata schemas. +- The AWS Account ID (e.g. 12345689123) and AWS Region (e.g., us-west-2) used by + your Quilt stack, for configuring the CloudFormation stack and lambdas. + +## Installation + +Go to the [Benchling Packager +package](https://open.quiltdata.com/b/quilt-example/packages/examples/benchling-packager) +on open.quiltdata.com and follow the instructions in the README. + +## References + +- [AWS CloudFormation templates](https://aws.amazon.com/cloudformation/resources/templates/) +- [AWS Lambda functions](https://aws.amazon.com/lambda/) +- [Benchling EventBridge events](https://docs.benchling.com/docs/events-getting-started#event-types) +- [Benchling Schemas](https://help.benchling.com/hc/en-us/articles/9684227216781) diff --git a/docs/imgs/athena-history.png b/docs/imgs/athena-history.png deleted file mode 100644 index 7ef0916506e..00000000000 Binary files a/docs/imgs/athena-history.png and /dev/null differ diff --git a/docs/imgs/athena-package.png b/docs/imgs/athena-package.png new file mode 100644 index 00000000000..bdae54c2776 Binary files /dev/null and b/docs/imgs/athena-package.png differ diff --git a/docs/imgs/athena-ui.png b/docs/imgs/athena-ui.png index 8c417e376cd..3f185385b8d 100644 Binary files a/docs/imgs/athena-ui.png and b/docs/imgs/athena-ui.png differ diff --git a/lambdas/indexer/CHANGELOG.md b/lambdas/indexer/CHANGELOG.md new file mode 100644 index 00000000000..c7ea99597d5 --- /dev/null +++ b/lambdas/indexer/CHANGELOG.md @@ -0,0 +1,21 @@ + +# Changelog + +Changes are listed in reverse chronological order (newer entries at the top). +The entry format is + +```markdown +- [Verb] Change description ([#](https://github.com/quiltdata/quilt/pull/)) +``` + +where verb is one of + +- Removed +- Added +- Fixed +- Changed + +## Changes + +- [Changed] Stop using S3 select ([#4212](https://github.com/quiltdata/quilt/pull/4212)) +- [Added] Bootstrap the change log ([#4212](https://github.com/quiltdata/quilt/pull/4212)) diff --git a/lambdas/indexer/index.py b/lambdas/indexer/index.py index 80b6861a11f..bb6a9422229 100644 --- a/lambdas/indexer/index.py +++ b/lambdas/indexer/index.py @@ -47,6 +47,7 @@ import datetime +import functools import json import os import pathlib @@ -92,7 +93,6 @@ POINTER_PREFIX_V1, get_available_memory, get_quilt_logger, - query_manifest_content, separated_env_to_iter, ) @@ -168,12 +168,7 @@ # currently only affects .parquet, TODO: extend to other extensions assert 'SKIP_ROWS_EXTS' in os.environ SKIP_ROWS_EXTS = separated_env_to_iter('SKIP_ROWS_EXTS') -SELECT_PACKAGE_META = "SELECT * from S3Object o WHERE o.version IS NOT MISSING LIMIT 1" -# No WHERE clause needed for aggregations since S3 Select skips missing fields for aggs -SELECT_PACKAGE_STATS = ( - "SELECT COALESCE(SUM(obj['size']), 0) as total_bytes," - " COUNT(obj['size']) as total_files from S3Object obj" -) +DUCKDB_SELECT_LAMBDA_ARN = os.environ["DUCKDB_SELECT_LAMBDA_ARN"] TEST_EVENT = "s3:TestEvent" # we need to filter out GetObject and HeadObject calls generated by the present # lambda in order to display accurate analytics in the Quilt catalog @@ -182,6 +177,7 @@ logger = get_quilt_logger() +s3_client = boto3.client("s3", config=botocore.config.Config(user_agent_extra=USER_AGENT_EXTRA)) def now_like_boto3(): @@ -247,13 +243,10 @@ def select_manifest_meta(s3_client, bucket: str, key: str): wrapper for retry and returning a string """ try: - raw = query_manifest_content( - s3_client, - bucket=bucket, - key=key, - sql_stmt=SELECT_PACKAGE_META - ) - return json.load(raw) + body = s3_client.get_object(Bucket=bucket, Key=key)["Body"] + with body: # this *might* be needed to close the stream ASAP + for line in body.iter_lines(): + return json.loads(line) except (botocore.exceptions.ClientError, json.JSONDecodeError) as cle: print(f"Unable to S3 select manifest: {cle}") @@ -439,7 +432,7 @@ def get_pkg_data(): first = select_manifest_meta(s3_client, bucket, manifest_key) if not first: return - stats = select_package_stats(s3_client, bucket, manifest_key) + stats = select_package_stats(bucket, manifest_key) if not stats: return @@ -472,33 +465,54 @@ def get_pkg_data(): return True -def select_package_stats(s3_client, bucket, manifest_key) -> str: +@functools.lru_cache(maxsize=None) +def get_bucket_region(bucket: str) -> str: + resp = s3_client.head_bucket(Bucket=bucket) + return resp["ResponseMetadata"]["HTTPHeaders"]["x-amz-bucket-region"] + + +@functools.lru_cache(maxsize=None) +def get_presigner_client(bucket: str): + return boto3.client( + "s3", + region_name=get_bucket_region(bucket), + config=botocore.config.Config(signature_version="s3v4"), + ) + + +def select_package_stats(bucket, manifest_key) -> Optional[dict]: """use s3 select to generate file stats for package""" logger_ = get_quilt_logger() - try: - raw_stats = query_manifest_content( - s3_client, - bucket=bucket, - key=manifest_key, - sql_stmt=SELECT_PACKAGE_STATS - ).read() - - if raw_stats: - stats = json.loads(raw_stats) - assert isinstance(stats['total_bytes'], int) - assert isinstance(stats['total_files'], int) - - return stats - - except ( - AssertionError, - botocore.exceptions.ClientError, - json.JSONDecodeError, - KeyError, - ) as err: - logger_.exception("Unable to compute package stats via S3 select") + presigner_client = get_presigner_client(bucket) + url = presigner_client.generate_presigned_url( + ClientMethod="get_object", + Params={ + "Bucket": bucket, + "Key": manifest_key, + }, + ) + lambda_ = make_lambda_client() + q = f""" + SELECT + COALESCE(SUM(size), 0) AS total_bytes, + COUNT(size) AS total_files FROM read_ndjson('{url}', columns={{size: 'UBIGINT'}}) obj + """ + resp = lambda_.invoke( + FunctionName=DUCKDB_SELECT_LAMBDA_ARN, + Payload=json.dumps({"query": q, "user_agent": f"DuckDB Select {USER_AGENT_EXTRA}"}), + ) - return None + payload = resp["Payload"].read() + if "FunctionError" in resp: + logger_.error("DuckDB select unhandled error: %s", payload) + return None + parsed = json.loads(payload) + if "error" in parsed: + logger_.error("DuckDB select error: %s", parsed["error"]) + return None + + rows = parsed["rows"] + return rows[0] if rows else None def extract_pptx(fileobj, max_size: int) -> str: @@ -732,6 +746,11 @@ def make_s3_client(): return boto3.client("s3", config=configuration) +@functools.lru_cache(maxsize=None) +def make_lambda_client(): + return boto3.client("lambda") + + def map_event_name(event: dict): """transform eventbridge names into S3-like ones""" input_ = event["eventName"] diff --git a/lambdas/indexer/pytest.ini b/lambdas/indexer/pytest.ini index dd07825516f..f9355a4fbaf 100644 --- a/lambdas/indexer/pytest.ini +++ b/lambdas/indexer/pytest.ini @@ -1,4 +1,6 @@ [pytest] +env = + DUCKDB_SELECT_LAMBDA_ARN = "arn:aws:lambda:us-west-2:123456789012:function:select-lambda" log_cli = True # This is set above critical to prevent logger events from confusing output in CI -log_level = 51 +log_level = 51 diff --git a/lambdas/indexer/test-requirements.txt b/lambdas/indexer/test-requirements.txt index e75e43e319b..b8fc13134ea 100644 --- a/lambdas/indexer/test-requirements.txt +++ b/lambdas/indexer/test-requirements.txt @@ -5,4 +5,5 @@ pluggy==0.9 py==1.10.0 pytest==4.4.0 pytest-cov==2.6.1 +pytest-env==0.6.2 responses==0.10.14 diff --git a/lambdas/indexer/test/test_index.py b/lambdas/indexer/test/test_index.py index c53e3bfa8de..05cc0c85a1f 100644 --- a/lambdas/indexer/test/test_index.py +++ b/lambdas/indexer/test/test_index.py @@ -23,7 +23,6 @@ import responses from botocore import UNSIGNED from botocore.client import Config -from botocore.exceptions import ParamValidationError from botocore.stub import Stubber from dateutil.tz import tzutc from document_queue import EVENT_PREFIX, RetryError @@ -979,7 +978,7 @@ def test_index_if_package_select_stats_fail(self, append_mock, select_meta_mock, ) select_meta_mock.assert_called_once_with(self.s3_client, bucket, manifest_key) - select_stats_mock.assert_called_once_with(self.s3_client, bucket, manifest_key) + select_stats_mock.assert_called_once_with(bucket, manifest_key) append_mock.assert_called_once_with({ "_index": bucket + PACKAGE_INDEX_SUFFIX, "_id": key, @@ -1023,7 +1022,7 @@ def test_index_if_package(self, append_mock, select_meta_mock, select_stats_mock ) select_meta_mock.assert_called_once_with(self.s3_client, bucket, manifest_key) - select_stats_mock.assert_called_once_with(self.s3_client, bucket, manifest_key) + select_stats_mock.assert_called_once_with(bucket, manifest_key) append_mock.assert_called_once_with({ "_index": bucket + PACKAGE_INDEX_SUFFIX, "_id": key, @@ -1182,51 +1181,6 @@ def test_extension_overrides(self): assert self._get_contents('foo.txt', '.txt') == "" assert self._get_contents('foo.ipynb', '.ipynb') == "" - @pytest.mark.xfail( - raises=ParamValidationError, - reason="boto bug https://github.com/boto/botocore/issues/1621", - strict=True, - ) - def test_stub_select_object_content(self): - """Demonstrate that mocking S3 select with boto3 is broken""" - sha_hash = "50f4d0fc2c22a70893a7f356a4929046ce529b53c1ef87e28378d92b884691a5" - manifest_key = f"{MANIFEST_PREFIX_V1}{sha_hash}" - # this SHOULD work, but due to botocore bugs it does not - self.s3_stubber.add_response( - method="select_object_content", - service_response={ - "ResponseMetadata": ANY, - # it is sadly not possible to mock S3 select responses because - # boto incorrectly believes "Payload"'s value should be a dict - # but it's really an iterable in realworld code - # see https://github.com/boto/botocore/issues/1621 - "Payload": [ - { - "Stats": {} - }, - { - "Records": { - "Payload": json.dumps(MANIFEST_DATA).encode(), - }, - }, - { - "End": {} - }, - ] - }, - expected_params={ - "Bucket": "test-bucket", - "Key": manifest_key, - "Expression": index.SELECT_PACKAGE_META, - "ExpressionType": "SQL", - "InputSerialization": { - 'JSON': {'Type': 'LINES'}, - 'CompressionType': 'NONE' - }, - "OutputSerialization": {'JSON': {'RecordDelimiter': '\n'}} - } - ) - def test_synthetic_copy_event(self): """check synthetic ObjectCreated:Copy event vs organic obtained on 26-May-2020 (bucket versioning on) diff --git a/lambdas/pkgselect/.python-version b/lambdas/pkgselect/.python-version deleted file mode 100644 index cc1923a40b1..00000000000 --- a/lambdas/pkgselect/.python-version +++ /dev/null @@ -1 +0,0 @@ -3.8 diff --git a/lambdas/pkgselect/__init__.py b/lambdas/pkgselect/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/lambdas/pkgselect/index.py b/lambdas/pkgselect/index.py deleted file mode 100644 index 5840cbe8c53..00000000000 --- a/lambdas/pkgselect/index.py +++ /dev/null @@ -1,274 +0,0 @@ -""" -Provide a virtual-file-system view of a package's logical keys. -""" - -import asyncio -import dataclasses -import functools -import json -import typing as T - -import boto3 -import pandas as pd - -from t4_lambda_shared.utils import query_manifest_content, sql_escape - - -async def run_async(fn, executor=None, loop=None): - if loop is None: - loop = asyncio.get_running_loop() - return await loop.run_in_executor(executor, fn) - - -class PkgselectException(Exception): - def __str__(self): - s = self.__class__.__name__ - if self.args: - s = f"{s}: {self.args[0]}" - return s - - -class BadInputParameters(PkgselectException): - pass - - -class AccessDenied(PkgselectException): - pass - - -class NotFound(PkgselectException): - pass - - -def validate(condition: T.Any, message: str): - if not condition: - raise BadInputParameters(message) - - -def file_list_to_folder(df: pd.DataFrame, limit: int, offset: int) -> dict: - """ - Post process a set of logical keys to return only the top-level folder view. - """ - if {'physical_key', 'logical_key', 'size'}.issubset(df.columns): - groups = df.groupby(df.logical_key.str.extract('([^/]+/?).*')[0], dropna=True) - folder = groups.agg( - size=('size', 'sum'), - physical_key=('physical_key', 'first') - ) - folder.reset_index(inplace=True) # move the logical_key from the index to column[0] - folder.rename(columns={0: 'logical_key'}, inplace=True) # name the new column - - # Sort to ensure consistent paging - folder.sort_values(by=['logical_key'], inplace=True) - - # Page response (folders and files) based on limit & offset - total_results = len(folder.index) - folder = folder.iloc[offset:offset+limit] - - # Do not return physical_key for prefixes - prefixes = folder[folder.logical_key.str.contains('/')].drop( - ['physical_key'], - axis=1 - ).to_dict(orient='records') - objects = folder[~folder.logical_key.str.contains('/')].to_dict(orient='records') - else: - # df might not have the expected columns if either: - # (1) the package is empty (has zero package entries) or, - # (2) zero package entries match the prefix filter. - # In either case, the folder view is empty. - prefixes = [] - objects = [] - total_results = 0 - - return dict( - total=total_results, - prefixes=prefixes, - objects=objects, - ) - - -@functools.lru_cache(maxsize=None) -def get_s3_client(): - return boto3.client("s3") - - -async def select(bucket: str, key: str, stmt: str): - s3 = get_s3_client() - try: - return await run_async(functools.partial( - query_manifest_content, - s3, - bucket=bucket, - key=key, - sql_stmt=stmt, - )) - except (s3.exceptions.NoSuchKey, s3.exceptions.NoSuchBucket): - raise NotFound - except s3.exceptions.ClientError as ex: - if ex.response.get("Error", {}).get("Code") == "AccessDenied": - raise AccessDenied - raise ex - - -async def select_meta(bucket: str, manifest: str, path: T.Optional[str] = None) -> dict: - """ - Fetch package-level, directory-level or object-level metadata - """ - if path: - sql_stmt = f"SELECT s.meta FROM s3object s WHERE s.logical_key = '{sql_escape(path)}' LIMIT 1" - else: - sql_stmt = "SELECT s.* FROM s3object s WHERE s.logical_key is NULL LIMIT 1" - - result = await select(bucket, manifest, sql_stmt) - return json.load(result) if result else {} - - -@dataclasses.dataclass -class FileView: - physical_key: str - size: int - hash: str - meta: T.Optional[dict] - - -async def file_view(bucket: str, manifest: str, path: str) -> T.Optional[FileView]: - """ - Get details of a single file in the package. - """ - validate( - isinstance(bucket, str) and bucket, - f"file_view: bucket must be a non-empty string (given: {bucket!r})", - ) - validate( - isinstance(manifest, str) and manifest, - f"file_view: manifest must be a non-empty string (given: {manifest!r})", - ) - validate( - isinstance(path, str) and path, - f"file_view: path must be a non-empty string (given: {path!r})", - ) - - details = await select( - bucket, - manifest, - f""" - SELECT s.physical_keys[0] as physical_key, s."size", s.hash."value" as hash, s.meta - FROM s3object s - WHERE s.logical_key = '{sql_escape(path)}' - LIMIT 1 - """, - ) - return FileView(**json.load(details)) if details is not None else None - - -@dataclasses.dataclass -class DirView: - total: int - prefixes: T.List[dict] # {logical_key: str, size: float} - objects: T.List[dict] # {logical_key: str, size: float, physical_key: str} - meta: dict - - -async def dir_view( - bucket: str, - manifest: str, - path: T.Optional[str], - limit: T.Optional[int] = None, - offset: T.Optional[int] = None, -) -> DirView: - validate( - isinstance(bucket, str) and bucket, - f"dir_view: bucket must be a non-empty string (given: {bucket!r})", - ) - validate( - isinstance(manifest, str) and manifest, - f"dir_view: manifest must be a non-empty string (given: {manifest!r})", - ) - validate( - path is None or isinstance(path, str), - f"dir_view: path must be a string if provided (given: {path!r})", - ) - validate( - limit is None or isinstance(limit, int) and limit > 0, - f"dir_view: limit must be a positive int if provided (given: {limit!r})", - ) - validate( - offset is None or isinstance(offset, int) and offset >= 0, - f"dir_view: offset must be a non-negative int if provided (given: {offset!r})", - ) - - if limit is None: - limit = 1000 - if offset is None: - offset = 0 - - path = path.rstrip("/") - if path: - path += "/" - - meta = asyncio.create_task(select_meta(bucket, manifest, path)) - - # Call s3 select to fetch only logical keys matching the desired prefix (folder path) - prefix_length = len(path) if path is not None else 0 - sql_stmt = \ - f""" - SELECT - SUBSTRING(s.logical_key, {prefix_length + 1}) as logical_key, - s."size", - s.physical_keys[0] as physical_key - FROM s3object s - """ - - if path: - sql_stmt += f" WHERE SUBSTRING(s.logical_key, 1, {prefix_length}) = '{sql_escape(path)}'" - - result = await select(bucket, manifest, sql_stmt) - - # Parse the response into a logical folder view - if result is not None: - df = pd.read_json( - result, - lines=True, - dtype=dict(logical_key="string", physical_key="string"), - ) - else: - df = pd.DataFrame() - - return DirView( - **file_list_to_folder(df, limit, offset), - meta=await meta, - ) - - -actions = { - "file": file_view, - "dir": dir_view, -} - - -def lambda_handler(evt, _ctx): - """ - Parse a manifest to return a folder-like view of its contents (logical keys). - Payload format: - bucket: str - manifest: str - action: see actions mapping - params: see *_view functions - Returns: {result} or {error} (see *_view functions for result format) - """ - try: - action = evt.get("action") - validate( - action in actions, - f"action must be one of: {', '.join(actions)} (given: {action!r})", - ) - - result = asyncio.run(actions[action]( - evt.get("bucket"), - evt.get("manifest"), - **evt.get("params", {}), - )) - return {"result": dataclasses.asdict(result) if result is not None else None} - - except PkgselectException as ex: - return {"error": str(ex)} diff --git a/lambdas/pkgselect/requirements.txt b/lambdas/pkgselect/requirements.txt deleted file mode 100644 index 86849527514..00000000000 --- a/lambdas/pkgselect/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ -boto3==1.17.100 -botocore==1.20.100 -jmespath==0.10.0 -numpy==1.22.0 -pandas==1.1.0 -python-dateutil==2.8.2 -pytz==2023.3 -s3transfer==0.4.2 -six==1.16.0 -urllib3==1.26.19 diff --git a/lambdas/pkgselect/setup.py b/lambdas/pkgselect/setup.py deleted file mode 100644 index 1a58f98d407..00000000000 --- a/lambdas/pkgselect/setup.py +++ /dev/null @@ -1,7 +0,0 @@ -from setuptools import setup - -setup( - name='quilt3_package_browse', - version='0.0.1', - py_modules=['index'], -) diff --git a/lambdas/pkgselect/test-requirements.txt b/lambdas/pkgselect/test-requirements.txt deleted file mode 100644 index c6d95fdf08a..00000000000 --- a/lambdas/pkgselect/test-requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -coverage==5.5 -pytest==4.3.0 -pytest-cov==2.6.1 -responses==0.10.5 diff --git a/lambdas/pkgselect/test/__init__.py b/lambdas/pkgselect/test/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/lambdas/pkgselect/test/test_pkgselect.py b/lambdas/pkgselect/test/test_pkgselect.py deleted file mode 100644 index 2be9be05e73..00000000000 --- a/lambdas/pkgselect/test/test_pkgselect.py +++ /dev/null @@ -1,537 +0,0 @@ -""" -Test functions for pkgselect endpoint -""" - -import json -import os -from unittest import TestCase, skip -from unittest.mock import patch - -import boto3 -import pandas as pd -import responses - -from t4_lambda_shared.utils import buffer_s3response, read_body - -from .. import index as pkgselect - - -@skip("TODO: fix tests") -class TestPackageSelect(TestCase): - """ - Unit tests for the PackageSelect API endpoint. - """ - - def make_s3response(self, payload_bytes): - """ - Generate a mock s3 select response - """ - return { - 'Payload': [ - { - 'Records': { - 'Payload': payload_bytes - } - }, - { - 'Progress': { - 'Details': { - 'BytesScanned': 123, - 'BytesProcessed': 123, - 'BytesReturned': 123 - } - } - }, - { - 'Stats': { - 'Details': { - 'BytesScanned': 123, - 'BytesProcessed': 123, - 'BytesReturned': 123 - } - } - }, - { - 'End': {} - } - ] - } - - def make_s3response_empty(self): - """ - Generate a mock s3 select response - """ - return { - 'Payload': [ - { - 'Stats': { - 'Details': { - 'BytesScanned': 123, - 'BytesProcessed': 123, - 'BytesReturned': 0 - } - } - }, - { - 'End': {} - } - ] - } - - def make_manifest_query(self, logical_keys): - entries = [] - for key in logical_keys: - entry = dict( - logical_key=key, - physical_key=f"{key}?versionid=1234", - size=100 - ) - entries.append(json.dumps(entry)) - jsonl = "\n".join(entries) - streambytes = jsonl.encode() - - return self.make_s3response(streambytes) - - def setUp(self): - """ - Mocks to tests calls to S3 Select - """ - - logical_keys = [ - "foo.csv", - "bar/file1.txt", - "bar/file2.txt", - "bar/baz/file3.txt", - "bar/baz/file4.txt" - ] - - manifest_row = dict( - logical_key="bar/file1.txt", - physical_keys=["s3://test-bucket/bar/file1.txt"], - size=1234, - hash={"type": "SHA256", "value": "0123456789ABCDEF"}, - meta={} - ) - detailbytes = json.dumps(manifest_row).encode() - - self.s3response = self.make_manifest_query(logical_keys) - self.s3response_detail = self.make_s3response(detailbytes) - self.s3response_detail_empty = self.make_s3response_empty() - self.s3response_incomplete = { - 'Payload': [ - { - 'Records': { - 'Payload': self.s3response['Payload'][0]['Records']['Payload'] - } - }, - { - 'Stats': { - 'Details': { - 'BytesScanned': 123, - 'BytesProcessed': 123, - 'BytesReturned': 123 - } - } - } - ] - } - - meta = { - "version": "v0", - "user_meta": { - "somefield": "somevalue" - }, - "message": "Commit message" - } - metabytes = json.dumps(meta).encode() - self.s3response_meta = self.make_s3response(metabytes) - - requests_mock = responses.RequestsMock(assert_all_requests_are_fired=False) - requests_mock.start() - self.addCleanup(requests_mock.stop) - - env_patcher = patch.dict(os.environ, { - 'AWS_ACCESS_KEY_ID': 'test_key', - 'AWS_SECRET_ACCESS_KEY': 'test_secret', - }) - env_patcher.start() - self.addCleanup(env_patcher.stop) - - def test_browse_top_level(self): - """ - Test that the S3 Select response is parsed - into the correct top-level folder view. - """ - df = pd.read_json(buffer_s3response(self.s3response), lines=True) - assert isinstance(df, pd.DataFrame) - - folder = pkgselect.file_list_to_folder(df, 1000, 0) - assert len(folder['prefixes']) == 1 - assert len(folder['objects']) == 1 - assert folder['objects'][0]['logical_key'] == 'foo.csv' - assert folder['prefixes'][0]['logical_key'] == 'bar/' - - def test_limit(self): - """ - Test that the S3 Select response is parsed - into the correct top-level folder view. - """ - df = pd.read_json(buffer_s3response(self.s3response), lines=True) - assert isinstance(df, pd.DataFrame) - - folder = pkgselect.file_list_to_folder(df, 1, 0) - assert len(folder['prefixes']) == 1 - assert len(folder['objects']) == 0 - assert folder['prefixes'][0]['logical_key'] == 'bar/' - - def test_offset(self): - """ - Test that the S3 Select response is parsed - into the correct top-level folder view. - """ - df = pd.read_json(buffer_s3response(self.s3response), lines=True) - assert isinstance(df, pd.DataFrame) - - folder = pkgselect.file_list_to_folder(df, 1000, 1) - assert len(folder['prefixes']) == 0 - assert len(folder['objects']) == 1 - assert folder['objects'][0]['logical_key'] == 'foo.csv' - - def test_browse_subfolder(self): - """ - Test that the S3 Select response is parsed - into the correct sub-folder view. - """ - prefix = "bar/" - df = pd.read_json(buffer_s3response(self.s3response), lines=True) - assert isinstance(df, pd.DataFrame) - filtered_df = df[df['logical_key'].str.startswith(prefix)] - stripped = filtered_df['logical_key'].str.slice(start=len(prefix)) - stripped_df = stripped.to_frame('logical_key') - s3_df = pd.concat( - [stripped_df['logical_key'], filtered_df['size'], filtered_df['physical_key']], - axis=1, - keys=['logical_key', 'size', 'physical_key'] - ) - - folder = pkgselect.file_list_to_folder(s3_df, 1000, 0) - assert len(folder['prefixes']) == 1 - assert len(folder['objects']) == 2 - object_keys = [obj['logical_key'] for obj in folder['objects']] - assert "file1.txt" in object_keys - assert "file2.txt" in object_keys - assert folder['prefixes'][0]['logical_key'] == "baz/" - - def test_browse_subsubfolder(self): - """ - Test that the S3 Select response is parsed - into the correct sub-sub-folder view. - """ - prefix = "bar/baz/" - df = pd.read_json(buffer_s3response(self.s3response), lines=True) - assert isinstance(df, pd.DataFrame) - filtered_df = df[df['logical_key'].str.startswith(prefix)] - stripped = filtered_df['logical_key'].str.slice(start=len(prefix)) - stripped_df = stripped.to_frame('logical_key') - s3_df = pd.concat( - [stripped_df['logical_key'], filtered_df['size'], filtered_df['physical_key']], - axis=1, - keys=['logical_key', 'size', 'physical_key'] - ) - folder = pkgselect.file_list_to_folder(s3_df, 1000, 0) - assert "objects" in folder - assert "prefixes" in folder - assert not folder['prefixes'] - assert len(folder['objects']) == 2 - object_keys = [obj['logical_key'] for obj in folder['objects']] - assert "file3.txt" in object_keys - assert "file4.txt" in object_keys - - def test_folder_view(self): - """ - End-to-end test (folder view without a prefix) - """ - bucket = "bucket" - key = ".quilt/packages/manifest_hash" - params = dict( - bucket=bucket, - manifest=key, - action="dir", - ) - - expected_args = { - 'Bucket': bucket, - 'Key': key, - 'Expression': "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s", - 'ExpressionType': 'SQL', - 'InputSerialization': { - 'CompressionType': 'NONE', - 'JSON': {'Type': 'LINES'} - }, - 'OutputSerialization': {'JSON': {'RecordDelimiter': '\n'}}, - } - - mock_s3 = boto3.client('s3') - with patch.object( - mock_s3, - 'select_object_content', - side_effect=[ - self.s3response, - self.s3response_meta, - ] - ) as client_patch, patch('boto3.Session.client', return_value=mock_s3): - response = pkgselect.lambda_handler(params, None) - print(response) - folder = json.loads(read_body(response))['result'] - assert len(folder['prefixes']) == 1 - assert len(folder['objects']) == 1 - assert folder['objects'][0]['logical_key'] == 'foo.csv' - assert folder['prefixes'][0]['logical_key'] == 'bar/' - - def test_folder_view_paging(self): - """ - End-to-end test (top-level folder view with a limit & offset) - """ - bucket = "bucket" - key = ".quilt/packages/manifest_hash" - params = dict( - bucket=bucket, - manifest=key, - action="dir", - params={ - "path": "paging_test/", - "limit": 10, - "offset": 10, - }, - ) - - expected_args = { - 'Bucket': bucket, - 'Key': key, - 'Expression': "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s", - 'ExpressionType': 'SQL', - 'InputSerialization': { - 'CompressionType': 'NONE', - 'JSON': {'Type': 'LINES'} - }, - 'OutputSerialization': {'JSON': {'RecordDelimiter': '\n'}}, - } - - paging_logical_keys = [ - f"f{i:03d}.csv" for i in range(1000) - ] - s3response_paging = self.make_manifest_query(paging_logical_keys) - - mock_s3 = boto3.client('s3') - with patch.object( - mock_s3, - 'select_object_content', - side_effect=[ - s3response_paging, - self.s3response_meta - ] - ) as client_patch, patch( - 'boto3.Session.client', - return_value=mock_s3 - ): - response = pkgselect.lambda_handler(params, None) - print(response) - folder = json.loads(read_body(response))['result'] - assert len(folder['prefixes']) == 0 - assert len(folder['objects']) == 10 - assert folder['total'] == 1000 - assert folder['objects'][0]['logical_key'] == 'f010.csv' - - def test_detail_view(self): - """ - End-to-end test (detail view) - """ - bucket = "bucket" - key = ".quilt/packages/manifest_hash" - logical_key = "bar/file1.txt" - params = dict( - bucket=bucket, - manifest=key, - action="file", - params={"path": logical_key}, - ) - - expected_sql = "SELECT s.* FROM s3object s WHERE s.logical_key = 'bar/file1.txt' LIMIT 1" - expected_args = { - 'Bucket': bucket, - 'Key': key, - 'Expression': "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s", - 'ExpressionType': 'SQL', - 'InputSerialization': { - 'CompressionType': 'NONE', - 'JSON': {'Type': 'LINES'} - }, - 'OutputSerialization': {'JSON': {'RecordDelimiter': '\n'}}, - } - - mock_s3 = boto3.client('s3') - with patch.object( - mock_s3, - 'select_object_content', - return_value=self.s3response_detail - ) as client_patch, patch( - 'boto3.Session.client', - return_value=mock_s3 - ): - response = pkgselect.lambda_handler(params, None) - print(response) - json.loads(read_body(response))['result'] - - def test_non_existing_logical_key(self): - """ - End-to-end test (detail view) - """ - bucket = "bucket" - key = ".quilt/packages/manifest_hash" - logical_key = "non-existing.txt" - params = dict( - bucket=bucket, - manifest=key, - action="file", - params={"path": logical_key}, - ) - - expected_sql = f"SELECT s.* FROM s3object s WHERE s.logical_key = '{logical_key}' LIMIT 1" - expected_args = { - 'Bucket': bucket, - 'Key': key, - 'Expression': "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s", - 'ExpressionType': 'SQL', - 'InputSerialization': { - 'CompressionType': 'NONE', - 'JSON': {'Type': 'LINES'} - }, - 'OutputSerialization': {'JSON': {'RecordDelimiter': '\n'}}, - } - - mock_s3 = boto3.client('s3') - with patch.object( - mock_s3, - 'select_object_content', - return_value=self.s3response_detail_empty - ) as client_patch, patch( - 'boto3.Session.client', - return_value=mock_s3 - ): - response = pkgselect.lambda_handler(params, None) - print(response) - assert response['statusCode'] == 404 - - def test_non_string_keys(self): - """ - End-to-end test (folder view without a prefix) - """ - bucket = "bucket" - key = ".quilt/packages/manifest_hash" - params = dict( - bucket=bucket, - manifest=key, - action="dir", - ) - - expected_args = { - 'Bucket': bucket, - 'Key': key, - 'Expression': "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s", - 'ExpressionType': 'SQL', - 'InputSerialization': { - 'CompressionType': 'NONE', - 'JSON': {'Type': 'LINES'} - }, - 'OutputSerialization': {'JSON': {'RecordDelimiter': '\n'}}, - } - - # Return a response with keys that are not strings (integers here) - # The important test case is where all members of a column are - # non-string - logical_keys = [ - "1", - "2", - "3", - ] - entries = [] - for key in logical_keys: - entry = dict( - logical_key=key, - physical_key=key, - size=100 - ) - entries.append(json.dumps(entry)) - jsonl = "\n".join(entries) - streambytes = jsonl.encode() - non_string_s3response = self.make_s3response(streambytes) - - mock_s3 = boto3.client('s3') - with patch.object( - mock_s3, - 'select_object_content', - side_effect=[ - non_string_s3response, - self.s3response_meta - ] - ) as client_patch, patch( - 'boto3.Session.client', - return_value=mock_s3 - ): - response = pkgselect.lambda_handler(params, None) - print(response) - folder = json.loads(read_body(response))['result'] - assert not folder['prefixes'] - assert len(folder['objects']) == 3 - assert folder['objects'][0]['logical_key'] == '1' - assert folder['objects'][1]['logical_key'] == '2' - assert folder['objects'][2]['logical_key'] == '3' - - def test_empty_manifest(self): - """ - End-to-end test (folder view without a prefix) for an - empty package manifest - """ - bucket = "bucket" - key = ".quilt/packages/manifest_hash" - params = dict( - bucket=bucket, - manifest=key, - action="dir", - ) - - expected_args = { - 'Bucket': bucket, - 'Key': key, - 'Expression': "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s", - 'ExpressionType': 'SQL', - 'InputSerialization': { - 'CompressionType': 'NONE', - 'JSON': {'Type': 'LINES'} - }, - 'OutputSerialization': {'JSON': {'RecordDelimiter': '\n'}}, - } - - # Empty manifest - jsonl = '{"version": "v0", "message": null}' - streambytes = jsonl.encode() - non_string_s3response = self.make_s3response(streambytes) - - mock_s3 = boto3.client('s3') - with patch.object( - mock_s3, - 'select_object_content', - side_effect=[ - non_string_s3response, - self.s3response_meta - ] - ) as client_patch, patch( - 'boto3.Session.client', - return_value=mock_s3 - ): - response = pkgselect.lambda_handler(params, None) - print(response) - folder = json.loads(read_body(response))['result'] - assert not folder['prefixes'] - assert not folder['objects'] - assert folder['total'] == 0 diff --git a/lambdas/s3select/.python-version b/lambdas/s3select/.python-version deleted file mode 100644 index cc1923a40b1..00000000000 --- a/lambdas/s3select/.python-version +++ /dev/null @@ -1 +0,0 @@ -3.8 diff --git a/lambdas/s3select/requirements.txt b/lambdas/s3select/requirements.txt deleted file mode 100644 index d60d4aa053c..00000000000 --- a/lambdas/s3select/requirements.txt +++ /dev/null @@ -1,13 +0,0 @@ -attrs==19.1.0 -botocore==1.21.44 -certifi==2024.7.4 -chardet==3.0.4 -docutils==0.14 -idna==3.7 -jmespath==0.9.4 -jsonschema==3.0.1 -pyrsistent==0.15.3 -python-dateutil==2.8.0 -requests==2.32.0 -six==1.12.0 -urllib3==1.26.19 diff --git a/lambdas/s3select/setup.py b/lambdas/s3select/setup.py deleted file mode 100644 index 0e4b32174d8..00000000000 --- a/lambdas/s3select/setup.py +++ /dev/null @@ -1,8 +0,0 @@ -from setuptools import find_packages, setup - -setup( - name='t4_lambda_s3select', - version='0.0.1', - packages=find_packages(where="src"), - package_dir={"": "src"}, -) diff --git a/lambdas/s3select/src/t4_lambda_s3select/__init__.py b/lambdas/s3select/src/t4_lambda_s3select/__init__.py deleted file mode 100644 index 2a13e51f1cb..00000000000 --- a/lambdas/s3select/src/t4_lambda_s3select/__init__.py +++ /dev/null @@ -1,75 +0,0 @@ -""" -Sign S3 select requests (because S3 select does not allow anonymous access). - -The implementation doesn't care what the request is, and just signs it using -the current AWS credentials. -""" -import os -from urllib.parse import urlencode - -import requests -from botocore.auth import SigV4Auth -from botocore.awsrequest import AWSRequest -from botocore.session import Session - -from t4_lambda_shared.decorator import api -from t4_lambda_shared.utils import get_default_origins - -SERVICE = 's3' -REGION = os.environ.get('AWS_REGION', '') - -REQUEST_HEADERS_TO_FORWARD = {'content-type', 'cache-control', 'pragma', 'x-amz-content-sha256', 'x-amz-user-agent'} -REQUEST_HEADERS_TO_SIGN = {'host', 'x-amz-content-sha256', 'x-amz-user-agent'} -RESPONSE_HEADERS_TO_FORWARD = {'content-type'} - -session = requests.Session() - - -@api(cors_origins=get_default_origins()) -def lambda_handler(request): - """ - Sign the request and forward it to S3. - """ - if not (request.method == 'POST' and 'select' in request.args): - return requests.codes.bad_request, 'Not an S3 select', {'content-type': 'text/plain'} - - bucket, key = request.pathParameters['proxy'].split('/', 1) - host = f'{bucket}.s3.amazonaws.com' - - # Make an unsigned HEAD request to test anonymous access. - - object_url = f'https://{host}/{key}' - head_response = session.head(object_url) - if not head_response.ok: - return requests.codes.forbidden, 'Not allowed', {'content-type': 'text/plain'} - - # Sign the full S3 select request. - - url = f'{object_url}?{urlencode(request.args)}' - - headers = {k: v for k, v in request.headers.items() if k in REQUEST_HEADERS_TO_FORWARD} - headers['host'] = host - - aws_request = AWSRequest( - method=request.method, - url=url, - data=request.data, - headers={k: v for k, v in headers.items() if k in REQUEST_HEADERS_TO_SIGN} - ) - credentials = Session().get_credentials() - auth = SigV4Auth(credentials, SERVICE, REGION) - auth.add_auth(aws_request) - - headers.update(aws_request.headers) - - response = session.post( - url=url, - data=request.data, # Forward the POST data. - headers=headers, - ) - - response_headers = {k: v for k, v in response.headers.items() if k in RESPONSE_HEADERS_TO_FORWARD} - # Add a default content type to prevent API Gateway from setting it to application/json. - response_headers.setdefault('content-type', 'application/octet-stream') - - return response.status_code, response.content, response_headers diff --git a/lambdas/s3select/test-requirements.txt b/lambdas/s3select/test-requirements.txt deleted file mode 100644 index 7e23afb8f36..00000000000 --- a/lambdas/s3select/test-requirements.txt +++ /dev/null @@ -1,11 +0,0 @@ -atomicwrites==1.3.0 -importlib-metadata==0.18 -more-itertools==7.1.0 -pluggy==0.13.1 -py==1.10.0 -pyparsing==2.4.0 -pytest==4.3.0 -pytest-cov==2.6.1 -responses==0.10.6 -wcwidth==0.1.7 -zipp==3.19.1 diff --git a/lambdas/s3select/tests/__init__.py b/lambdas/s3select/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/lambdas/s3select/tests/test_s3select.py b/lambdas/s3select/tests/test_s3select.py deleted file mode 100644 index 13e61696508..00000000000 --- a/lambdas/s3select/tests/test_s3select.py +++ /dev/null @@ -1,104 +0,0 @@ -import os -from base64 import b64decode -from unittest import TestCase -from unittest.mock import patch - -import responses - -import t4_lambda_s3select - - -@patch('t4_lambda_s3select.REGION', 'us-east-1') -class TestS3Select(TestCase): - """Tests S3 Select""" - def setUp(self): - self.requests_mock = responses.RequestsMock(assert_all_requests_are_fired=False) - self.requests_mock.start() - - self.env_patcher = patch.dict(os.environ, { - 'AWS_ACCESS_KEY_ID': 'test_key', - 'AWS_SECRET_ACCESS_KEY': 'test_secret', - }) - self.env_patcher.start() - - def tearDown(self): - self.env_patcher.stop() - self.requests_mock.stop() - - @classmethod - def _make_event(cls, path, query, headers, body): - return { - 'httpMethod': 'POST', - 'path': f'/lambda/{path}', - 'pathParameters': { - 'proxy': path - }, - 'queryStringParameters': query or None, - 'headers': headers or None, - 'body': body, - 'isBase64Encoded': False, - } - - def test_signature(self): - url = 'https://bucket.s3.amazonaws.com/object.csv' - - self.requests_mock.add( - responses.HEAD, - url, - status=200) - - def _callback(request): - assert 'X-Amz-Date' in request.headers - assert 'Authorization' in request.headers - assert request.headers['content-type'] == 'application/octet-stream' - assert request.headers['cache-control'] == 'no-cache' - assert request.headers['pragma'] == 'no-cache' - assert 'referer' not in request.headers - return 200, {}, b'results' - - self.requests_mock.add_callback( - responses.POST, - url, - _callback) - - query = { - 'select': '', - 'select-type': '2', - } - headers = { - 'content-type': 'application/octet-stream', - 'x-amz-content-sha256': '123456', - 'x-amz-user-agent': 'test', - 'cache-control': 'no-cache', - 'pragma': 'no-cache', - 'referer': 'http://example.com' - } - body = b's3 select request body' - - event = self._make_event('bucket/object.csv', query, headers, body) - resp = t4_lambda_s3select.lambda_handler(event, None) - assert resp['statusCode'] == 200 - assert resp['isBase64Encoded'] - assert b64decode(resp['body']) == b'results' - - def test_not_public(self): - url = 'https://bucket.s3.amazonaws.com/object.csv' - - self.requests_mock.add( - responses.HEAD, - url, - status=403) - - event = self._make_event('bucket/object.csv', {'select': None}, {}, b'test') - resp = t4_lambda_s3select.lambda_handler(event, None) - assert resp['statusCode'] == 403 - - def test_bad_request(self): - event = self._make_event('bucket/object.csv', {}, {}, b'test') - resp = t4_lambda_s3select.lambda_handler(event, None) - assert resp['statusCode'] == 400 - - event = self._make_event('bucket/object.csv', {'select': None}, {}, b'test') - event['httpMethod'] = 'PUT' - resp = t4_lambda_s3select.lambda_handler(event, None) - assert resp['statusCode'] == 400 diff --git a/lambdas/shared/t4_lambda_shared/utils.py b/lambdas/shared/t4_lambda_shared/utils.py index 9fb161dc15b..8d94dc7cef8 100644 --- a/lambdas/shared/t4_lambda_shared/utils.py +++ b/lambdas/shared/t4_lambda_shared/utils.py @@ -2,7 +2,6 @@ Helper functions. """ import gzip -import io import json import logging import os @@ -96,13 +95,6 @@ def read_body(resp): return body -class IncompleteResultException(Exception): - """ - Exception indicating an incomplete response - (e.g., from S3 Select) - """ - - def sql_escape(s): """ Escape strings that might contain single quotes for use in Athena @@ -110,60 +102,3 @@ def sql_escape(s): """ escaped = s or "" return escaped.replace("'", "''") - - -def buffer_s3response(s3response): - """ - Read a streaming response (botocore.eventstream.EventStream) from s3 select - into a BytesIO buffer - """ - logger_ = logging.getLogger(LOGGER_NAME) - response = io.BytesIO() - end_event_received = False - stats = None - found_records = False - for event in s3response['Payload']: - if 'Records' in event: - records = event['Records']['Payload'] - response.write(records) - found_records = True - elif 'Progress' in event: - logger_.info("select progress: %s", event['Progress'].get('Details')) - elif 'Stats' in event: - logger_.info("select stats: %s", event['Stats']) - elif 'End' in event: - # End event indicates that the request finished successfully - end_event_received = True - - if not end_event_received: - raise IncompleteResultException("Error: Received an incomplete response from S3 Select.") - response.seek(0) - return response if found_records else None - - -def query_manifest_content( - s3_client: str, - *, - bucket: str, - key: str, - sql_stmt: str -) -> io.BytesIO: - """ - Call S3 Select to read only the logical keys from a - package manifest that match the desired folder path - prefix - """ - logger_ = get_quilt_logger() - logger_.debug("utils.py: manifest_select: %s", sql_stmt) - response = s3_client.select_object_content( - Bucket=bucket, - Key=key, - ExpressionType='SQL', - Expression=sql_stmt, - InputSerialization={ - 'JSON': {'Type': 'LINES'}, - 'CompressionType': 'NONE' - }, - OutputSerialization={'JSON': {'RecordDelimiter': '\n'}} - ) - return buffer_s3response(response) diff --git a/lambdas/shared/tests/test_utils.py b/lambdas/shared/tests/test_utils.py index b9a7a403db3..6cb0a4eea7d 100644 --- a/lambdas/shared/tests/test_utils.py +++ b/lambdas/shared/tests/test_utils.py @@ -7,16 +7,13 @@ from unittest import TestCase from unittest.mock import patch -import boto3 import pytest from testfixtures import LogCapture from t4_lambda_shared.utils import ( - IncompleteResultException, get_available_memory, get_default_origins, make_json_response, - query_manifest_content, separated_env_to_iter, ) @@ -143,76 +140,6 @@ def test_json_response(self): assert json.loads(body) == {'foo': 'bar'} assert headers == {'Content-Type': 'application/json', 'Content-Length': '123'} - def test_call_s3select(self): - """ - Test that parameters are correctly passed to - S3 Select (without a prefix) - """ - bucket = "bucket" - key = ".quilt/packages/manifest_hash" - - expected_sql = "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s" - expected_args = { - 'Bucket': bucket, - 'Key': key, - 'Expression': expected_sql, - 'ExpressionType': 'SQL', - 'InputSerialization': { - 'CompressionType': 'NONE', - 'JSON': {'Type': 'LINES'} - }, - 'OutputSerialization': {'JSON': {'RecordDelimiter': '\n'}}, - } - - mock_s3 = boto3.client('s3') - with patch.object( - mock_s3, - 'select_object_content', - return_value=self.s3response - ) as patched: - query_manifest_content( - mock_s3, - bucket=bucket, - key=key, - sql_stmt=expected_sql) - patched.assert_called_once_with(**expected_args) - - def test_call_s3select_incomplete_response(self): - """ - Test that an incomplete response from S3 Select is - detected and an exception is raised. - """ - bucket = "bucket" - key = ".quilt/packages/manifest_hash" - - expected_sql = "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s" - expected_args = { - 'Bucket': bucket, - 'Key': key, - 'Expression': expected_sql, - 'ExpressionType': 'SQL', - 'InputSerialization': { - 'CompressionType': 'NONE', - 'JSON': {'Type': 'LINES'} - }, - 'OutputSerialization': {'JSON': {'RecordDelimiter': '\n'}}, - } - - mock_s3 = boto3.client('s3') - with patch.object( - mock_s3, - 'select_object_content', - return_value=self.s3response_incomplete - ) as patched: - with self.assertRaises(IncompleteResultException): - query_manifest_content( - mock_s3, - bucket=bucket, - key=key, - sql_stmt=expected_sql - ) - patched.assert_called_once_with(**expected_args) - @pytest.mark.parametrize( "level, call, message, expected, name", diff --git a/lambdas/tabular_preview/requirements.txt b/lambdas/tabular_preview/requirements.txt index c787be65d4d..b10ea779083 100644 --- a/lambdas/tabular_preview/requirements.txt +++ b/lambdas/tabular_preview/requirements.txt @@ -4,7 +4,7 @@ # # pip-compile --output-file=requirements.txt ../shared/setup.py setup.py # -aiohttp==3.10.2 +aiohttp==3.10.11 # via fsspec aiosignal==1.2.0 # via aiohttp diff --git a/s3-proxy/Dockerfile b/s3-proxy/Dockerfile index adf17c7a6a8..72fe0690fb6 100644 --- a/s3-proxy/Dockerfile +++ b/s3-proxy/Dockerfile @@ -1,4 +1,4 @@ -FROM amazonlinux:2023.6.20241031.0 +FROM amazonlinux:2023.6.20241111.0 MAINTAINER Quilt Data, Inc. contact@quiltdata.io # Based on: diff --git a/shared/graphql/schema.graphql b/shared/graphql/schema.graphql index 0bb997e7809..ea342cd5806 100644 --- a/shared/graphql/schema.graphql +++ b/shared/graphql/schema.graphql @@ -222,6 +222,16 @@ type AccessCounts { counts: [AccessCountForDate!]! } +type AccessCountsGroup { + ext: String! + counts: AccessCounts! +} + +type BucketAccessCounts { + byExt(groups: Int): [AccessCountsGroup!]! + combined: AccessCounts! +} + type PackageDir { path: String! metadata: JsonRecord @@ -556,6 +566,9 @@ type Query { searchMorePackages(after: String!, size: Int = 30): PackagesSearchMoreResult! subscription: SubscriptionState! + bucketAccessCounts(bucket: String!, window: Int!): BucketAccessCounts + objectAccessCounts(bucket: String!, key: String!, window: Int!): AccessCounts + admin: AdminQueries! @admin policies: [Policy!]! @admin