From d8e208ca5e0598ac46735d4915eb9f1a8bc6a26f Mon Sep 17 00:00:00 2001 From: Maksim Chervonnyi Date: Mon, 25 Nov 2024 13:10:28 +0100 Subject: [PATCH] Athena: check access for catalog (#4239) --- catalog/CHANGELOG.md | 1 + .../Queries/Athena/model/requests.spec.ts | 165 ++++++++++++++---- .../Bucket/Queries/Athena/model/requests.ts | 108 ++++++++---- .../Queries/Athena/model/state.spec.tsx | 9 + .../Bucket/Queries/Athena/model/state.tsx | 2 +- 5 files changed, 216 insertions(+), 69 deletions(-) diff --git a/catalog/CHANGELOG.md b/catalog/CHANGELOG.md index 7d1a15da7c0..dbdb686eaf1 100644 --- a/catalog/CHANGELOG.md +++ b/catalog/CHANGELOG.md @@ -17,6 +17,7 @@ where verb is one of ## Changes +- [Changed] Athena: hide data catalogs user doesn't have access to ([#4239](https://github.com/quiltdata/quilt/pull/4239)) - [Added] Enable MixPanel tracking in Embed mode ([#4237](https://github.com/quiltdata/quilt/pull/4237)) - [Fixed] Fix embed files listing ([#4236](https://github.com/quiltdata/quilt/pull/4236)) - [Changed] Qurator: switch to Claude 3.5 Sonnet **v2** ([#4234](https://github.com/quiltdata/quilt/pull/4234)) diff --git a/catalog/app/containers/Bucket/Queries/Athena/model/requests.spec.ts b/catalog/app/containers/Bucket/Queries/Athena/model/requests.spec.ts index 4faf6dbbcee..60af764b2e3 100644 --- a/catalog/app/containers/Bucket/Queries/Athena/model/requests.spec.ts +++ b/catalog/app/containers/Bucket/Queries/Athena/model/requests.spec.ts @@ -6,6 +6,15 @@ import Log from 'utils/Logging' import * as Model from './utils' import * as requests from './requests' +class AWSError extends Error { + code: string + + constructor(code: string, message?: string) { + super(message) + this.code = code + } +} + jest.mock( 'utils/Logging', jest.fn(() => ({ @@ -61,16 +70,17 @@ const reqThrowWith = (o: unknown) => }, })) +const batchGetNamedQuery = jest.fn() const batchGetQueryExecution = jest.fn() +const getDataCatalog = jest.fn() +const getQueryExecution = jest.fn() +const getQueryResults = jest.fn() const getWorkGroup = jest.fn() const listDataCatalogs = jest.fn() const listDatabases = jest.fn() +const listNamedQueries = jest.fn() const listQueryExecutions = jest.fn() const listWorkGroups = jest.fn() -const getQueryExecution = jest.fn() -const listNamedQueries = jest.fn() -const batchGetNamedQuery = jest.fn() -const getQueryResults = jest.fn() const startQueryExecution = jest.fn() jest.mock('utils/AWS', () => ({ @@ -78,6 +88,7 @@ jest.mock('utils/AWS', () => ({ use: () => ({ batchGetNamedQuery, batchGetQueryExecution, + getDataCatalog, getQueryExecution, getQueryResults, getWorkGroup, @@ -93,63 +104,143 @@ jest.mock('utils/AWS', () => ({ describe('containers/Bucket/Queries/Athena/model/requests', () => { describe('useCatalogNames', () => { + getDataCatalog.mockImplementation( + reqThen( + ({ Name }: { Name: string }) => ({ + DataCatalog: { + Name, + Type: 'any', + }, + }), + ), + ) it('return catalog names', async () => { - listDataCatalogs.mockImplementationOnce( - req({ + listDataCatalogs.mockImplementation( + reqThen(() => ({ DataCatalogsSummary: [{ CatalogName: 'foo' }, { CatalogName: 'bar' }], - }), + })), + ) + const { result, waitForValueToChange } = renderHook(() => + requests.useCatalogNames('any'), ) - const { result, waitForNextUpdate } = renderHook(() => requests.useCatalogNames()) expect(result.current.data).toBe(undefined) - await act(async () => { - await waitForNextUpdate() - }) - expect(result.current.data).toMatchObject({ list: ['foo', 'bar'] }) + await waitForValueToChange(() => result.current) + expect(result.current.data).toMatchObject({ list: ['bar', 'foo'] }) }) it('return empty list', async () => { - listDataCatalogs.mockImplementationOnce( - req({ + listDataCatalogs.mockImplementation( + reqThen(() => ({ DataCatalogsSummary: [], - }), + })), + ) + const { result, waitForValueToChange } = renderHook(() => + requests.useCatalogNames('any'), ) - const { result, waitForNextUpdate } = renderHook(() => requests.useCatalogNames()) - await act(async () => { - await waitForNextUpdate() - }) + await waitForValueToChange(() => result.current) expect(result.current.data).toMatchObject({ list: [] }) }) - it('return unknowns on invalid data', async () => { - listDataCatalogs.mockImplementationOnce( - req({ + it('return empty list on invalid catalog data', async () => { + listDataCatalogs.mockImplementation( + reqThen(() => ({ // @ts-expect-error DataCatalogsSummary: [{ Nonsense: true }, { Absurd: false }], - }), + })), + ) + const { result, waitForValueToChange } = renderHook(() => + requests.useCatalogNames('any'), + ) + + await waitForValueToChange(() => result.current) + expect(result.current.data).toMatchObject({ list: [] }) + }) + + it('return empty list on invalid list data', async () => { + listDataCatalogs.mockImplementation( + // @ts-expect-error + reqThen(() => ({ + Invalid: [], + })), + ) + const { result, waitForValueToChange } = renderHook(() => + requests.useCatalogNames('any'), ) - const { result, waitForNextUpdate } = renderHook(() => requests.useCatalogNames()) + await waitForValueToChange(() => result.current) + expect(result.current.data).toMatchObject({ list: [] }) + }) + + it('doesnt return catalogs with denied access', async () => { + listDataCatalogs.mockImplementation( + reqThen(() => ({ + DataCatalogsSummary: [{ CatalogName: 'foo' }, { CatalogName: 'bar' }], + })), + ) + getDataCatalog.mockImplementation( + reqThrowWith(new AWSError('AccessDeniedException')), + ) + const { result, waitForValueToChange } = renderHook(() => + requests.useCatalogNames('any'), + ) + + await waitForValueToChange(() => result.current) + expect(result.current.data).toMatchObject({ list: [] }) + }) + + it('doesnt return failed catalogs', async () => { + listDataCatalogs.mockImplementation( + reqThen(() => ({ + DataCatalogsSummary: [{ CatalogName: 'foo' }, { CatalogName: 'bar' }], + })), + ) + getDataCatalog.mockImplementation(reqThrow) + const { result, waitForValueToChange } = renderHook(() => + requests.useCatalogNames('any'), + ) + + await waitForValueToChange(() => result.current) + expect(result.current.data).toMatchObject({ list: [] }) + }) + + it('handle fail in requesting list', async () => { await act(async () => { - await waitForNextUpdate() + listDataCatalogs.mockImplementation(reqThrow) + const { result, unmount, waitFor } = renderHook(() => + requests.useCatalogNames('any'), + ) + await waitFor(() => result.current.data instanceof Error) + expect(Log.error).toBeCalledWith(expect.any(Error)) + expect(result.current.data).toBeInstanceOf(Error) + unmount() }) - expect(result.current.data).toMatchObject({ list: ['Unknown', 'Unknown'] }) }) - it('return empty list on invalid data', async () => { - listDataCatalogs.mockImplementationOnce( - req({ - // @ts-expect-error - Invalid: [], - }), + function useWrapper(props: Parameters) { + return requests.useCatalogNames(...props) + } + + it('wait until workgroup is ready', async () => { + const { result, rerender, waitForValueToChange, unmount } = renderHook( + (x: Parameters) => useWrapper(x), + { initialProps: [null] }, ) - const { result, waitForNextUpdate } = renderHook(() => requests.useCatalogNames()) await act(async () => { - await waitForNextUpdate() + rerender([Model.Loading]) + await waitForValueToChange(() => result.current) }) - expect(result.current.data).toMatchObject({ list: [] }) + expect(result.current.data).toBe(Model.Loading) + + const error = new Error('foo') + await act(async () => { + rerender([error]) + await waitForValueToChange(() => result.current) + }) + expect(result.current.data).toBe(error) + unmount() }) }) @@ -563,9 +654,7 @@ describe('containers/Bucket/Queries/Athena/model/requests', () => { it('handle access denied for workgroup list', async () => { await act(async () => { getWorkGroup.mockImplementation( - reqThrowWith({ - code: 'AccessDeniedException', - }), + reqThrowWith(new AWSError('AccessDeniedException')), ) const { result, unmount, waitFor } = renderHook(() => requests.useWorkgroups()) await waitFor(() => typeof result.current.data === 'object') diff --git a/catalog/app/containers/Bucket/Queries/Athena/model/requests.ts b/catalog/app/containers/Bucket/Queries/Athena/model/requests.ts index 8da2450012b..a6c50ca9613 100644 --- a/catalog/app/containers/Bucket/Queries/Athena/model/requests.ts +++ b/catalog/app/containers/Bucket/Queries/Athena/model/requests.ts @@ -1,4 +1,4 @@ -import type Athena from 'aws-sdk/clients/athena' +import type { Athena, AWSError } from 'aws-sdk' import * as React from 'react' import * as Sentry from '@sentry/react' @@ -33,15 +33,10 @@ function listIncludes(list: string[], value: string): boolean { export type Workgroup = string -interface WorkgroupArgs { - athena: Athena - workgroup: Workgroup -} - -async function fetchWorkgroup({ - athena, - workgroup, -}: WorkgroupArgs): Promise { +async function fetchWorkgroup( + athena: Athena, + workgroup: Workgroup, +): Promise { try { const workgroupOutput = await athena.getWorkGroup({ WorkGroup: workgroup }).promise() if ( @@ -53,8 +48,8 @@ async function fetchWorkgroup({ } return null } catch (error) { - if ((error as $TSFixMe).code === 'AccessDeniedException') { - Log.info(`Fetching "${workgroup}" workgroup failed: ${(error as $TSFixMe).code}`) + if (isAwsErrorAccessDenied(error)) { + Log.info(`Fetching "${workgroup}" workgroup failed: ${error.code}`) } else { Log.error(`Fetching "${workgroup}" workgroup failed:`, error) } @@ -75,7 +70,7 @@ async function fetchWorkgroups( .filter(Boolean) .sort() const available = ( - await Promise.all(parsed.map((workgroup) => fetchWorkgroup({ athena, workgroup }))) + await Promise.all(parsed.map((workgroup) => fetchWorkgroup(athena, workgroup))) ).filter(Boolean) const list = (prev?.list || []).concat(available as Workgroup[]) return { @@ -536,27 +531,80 @@ export function useDatabase( return React.useMemo(() => Model.wrapValue(value, setValue), [value]) } -export function useCatalogNames(): Model.DataController> { +function isAwsErrorAccessDenied(e: unknown): e is AWSError { + return ( + e instanceof Error && + (e as Error & { code?: string }).code === 'AccessDeniedException' + ) +} + +async function fetchCatalogName( + athena: Athena, + workgroup: Workgroup, + catalogName: CatalogName, +): Promise { + try { + return ( + (await athena.getDataCatalog({ Name: catalogName, WorkGroup: workgroup }).promise()) + ?.DataCatalog?.Name || null + ) + } catch (error) { + if (isAwsErrorAccessDenied(error)) { + Log.info(`Fetching "${catalogName}" catalog name failed: ${error.code}`) + } else { + Log.error(`Fetching "${catalogName}" catalog name failed:`, error) + } + return null + } +} + +async function fetchCatalogNames( + athena: Athena, + workgroup: Workgroup, + prev: Model.List | null, +): Promise> { + try { + const catalogsOutput = await athena + .listDataCatalogs({ NextToken: prev?.next }) + .promise() + const parsed = (catalogsOutput.DataCatalogsSummary || []) + .map(({ CatalogName }) => CatalogName || '') + .filter(Boolean) + .sort() + const available = ( + await Promise.all(parsed.map((name) => fetchCatalogName(athena, workgroup, name))) + ).filter(Boolean) + const list = (prev?.list || []).concat(available as Workgroup[]) + return { + list, + next: catalogsOutput.NextToken, + } + } catch (e) { + Log.error(e) + throw e + } +} + +export function useCatalogNames( + workgroup: Model.Value, +): Model.DataController> { const athena = AWS.Athena.use() const [prev, setPrev] = React.useState | null>(null) const [data, setData] = React.useState>>() React.useEffect(() => { - const request = athena?.listDataCatalogs({ NextToken: prev?.next }, (error, d) => { - const { DataCatalogsSummary, NextToken: next } = d || {} - setData(Model.Loading) - if (error) { - Sentry.captureException(error) - setData(error) - return - } - const list = DataCatalogsSummary?.map(({ CatalogName }) => CatalogName || 'Unknown') - setData({ - list: (prev?.list || []).concat(list || []), - next, - }) - }) - return () => request?.abort() - }, [athena, prev]) + if (!Model.hasData(workgroup)) { + setData(workgroup || undefined) + return + } + let mounted = true + if (!athena) return + fetchCatalogNames(athena, workgroup, prev) + .then((d) => mounted && setData(d)) + .catch((d) => mounted && setData(d)) + return () => { + mounted = false + } + }, [athena, prev, workgroup]) return React.useMemo(() => Model.wrapData(data, setPrev), [data]) } diff --git a/catalog/app/containers/Bucket/Queries/Athena/model/state.spec.tsx b/catalog/app/containers/Bucket/Queries/Athena/model/state.spec.tsx index 27bfad46eb6..67425051c39 100644 --- a/catalog/app/containers/Bucket/Queries/Athena/model/state.spec.tsx +++ b/catalog/app/containers/Bucket/Queries/Athena/model/state.spec.tsx @@ -90,12 +90,21 @@ describe('app/containers/Queries/Athena/model/state', () => { }, }), })) + listNamedQueries.mockImplementation((_x, cb) => { + cb(undefined, { NamedQueryIds: [] }) + return { + abort: jest.fn(), + } + }) listQueryExecutions.mockImplementation((_x, cb) => { cb(undefined, { QueryExecutionIds: [] }) return { abort: jest.fn(), } }) + listDataCatalogs.mockImplementation(() => ({ + promise: () => Promise.resolve({ DataCatalogsSummary: [] }), + })) const wrapper = ({ children }: { children: React.ReactNode }) => ( {children} ) diff --git a/catalog/app/containers/Bucket/Queries/Athena/model/state.tsx b/catalog/app/containers/Bucket/Queries/Athena/model/state.tsx index a7bc4631fcb..77a378c31ee 100644 --- a/catalog/app/containers/Bucket/Queries/Athena/model/state.tsx +++ b/catalog/app/containers/Bucket/Queries/Athena/model/state.tsx @@ -89,7 +89,7 @@ export function Provider({ preferences, children }: ProviderProps) { const queries = requests.useQueries(workgroup.data) const query = requests.useQuery(queries.data, execution) const queryBody = requests.useQueryBody(query.value, query.setValue, execution) - const catalogNames = requests.useCatalogNames() + const catalogNames = requests.useCatalogNames(workgroup.data) const catalogName = requests.useCatalogName(catalogNames.data, execution) const databases = requests.useDatabases(catalogName.value) const database = requests.useDatabase(databases.data, execution)