Skip to content

Commit

Permalink
Merge pull request #29 from samansmink/switch-to-kv-secret
Browse files Browse the repository at this point in the history
Add support for secrets
  • Loading branch information
Mytherin authored Jan 2, 2024
2 parents 6b1f93f + 8360792 commit 72cbc1b
Show file tree
Hide file tree
Showing 12 changed files with 415 additions and 52 deletions.
17 changes: 5 additions & 12 deletions .github/workflows/Linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,11 @@ jobs:
matrix:
# Add commits/tags to build against other DuckDB versions
duckdb_version: [ '<submodule_version>' ]
arch: ['linux_amd64', 'linux_arm64', 'linux_amd64_gcc4']
arch: ['linux_amd64_gcc4']
vcpkg_version: [ '2023.04.15' ]
include:
- arch: 'linux_amd64_gcc4'
container: 'quay.io/pypa/manylinux2014_x86_64'
vcpkg_triplet: 'x64-linux'
- arch: 'linux_amd64'
container: 'ubuntu:18.04'
vcpkg_triplet: 'x64-linux'
- arch: 'linux_arm64'
container: 'ubuntu:18.04'
vcpkg_triplet: 'arm64-linux'
container: ['quay.io/pypa/manylinux2014_x86_64']
vcpkg_triplet: ['x64-linux']

env:
VCPKG_TARGET_TRIPLET: ${{ matrix.vcpkg_triplet }}
GEN: Ninja
Expand Down Expand Up @@ -105,7 +98,7 @@ jobs:
- name: Setup vcpkg
uses: lukka/[email protected]
with:
vcpkgGitCommitId: 9edb1b8e590cc086563301d735cae4b6e732d2d2
vcpkgGitCommitId: a42af01b72c28a8e1d7b48107b33e4f286a55ef6

# Build extension
- name: Build extension
Expand Down
14 changes: 4 additions & 10 deletions .github/workflows/MacOS.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,11 @@ jobs:
runs-on: macos-latest
strategy:
matrix:
# Add commits/tags to build against other DuckDB versions
duckdb_version: [ '<submodule_version>' ]
vcpkg_version: [ '2023.04.15' ]
vcpkg_triplet: [ 'x64-osx', 'arm64-osx' ]
include:
- vcpkg_triplet: 'x64-osx'
osx_build_arch: 'x86_64'
duckdb_arch: 'osx_amd64'
- vcpkg_triplet: 'arm64-osx'
osx_build_arch: 'arm64'
duckdb_arch: 'osx_arm64'
vcpkg_triplet: [ 'x64-osx']
osx_build_arch: ['x86_64']
duckdb_arch: ['osx_amd64']

env:
VCPKG_TARGET_TRIPLET: ${{ matrix.vcpkg_triplet }}
Expand Down Expand Up @@ -54,7 +48,7 @@ jobs:
- name: Setup vcpkg
uses: lukka/run-vcpkg@v11
with:
vcpkgGitCommitId: 9edb1b8e590cc086563301d735cae4b6e732d2d2
vcpkgGitCommitId: a42af01b72c28a8e1d7b48107b33e4f286a55ef6

- name: Build extension
shell: bash
Expand Down
9 changes: 5 additions & 4 deletions .github/workflows/MainDistributionPipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,19 @@ concurrency:
jobs:
duckdb-stable-build:
name: Build extension binaries
uses: duckdb/duckdb/.github/workflows/_extension_distribution.yml@v0.9.2
uses: duckdb/duckdb/.github/workflows/_extension_distribution.yml@a491470b039c54fe2f0adfe1d161b14c7fe64642
with:
duckdb_version: v0.9.2
extension_name: azure
vcpkg_commit: 9edb1b8e590cc086563301d735cae4b6e732d2d2 # TODO: remove pinned vcpkg commit when updating duckdb version
exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads' # Can't really work in wasm sandbox
duckdb_version: a491470b03

duckdb-stable-deploy:
name: Deploy extension binaries
needs: duckdb-stable-build
uses: ./.github/workflows/_extension_deploy.yml
secrets: inherit
with:
duckdb_version: v0.9.2
extension_name: azure
exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads'
duckdb_version: a491470b03
deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ include_directories(src/include)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED True)

set(EXTENSION_SOURCES src/azure_extension.cpp)
set(EXTENSION_SOURCES src/azure_extension.cpp src/azure_secret.cpp)
add_library(${EXTENSION_NAME} STATIC ${EXTENSION_SOURCES})

set(PARAMETERS "-warnings")
Expand Down
2 changes: 1 addition & 1 deletion duckdb
Submodule duckdb updated 2344 files
103 changes: 88 additions & 15 deletions src/azure_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,35 @@

#include "azure_extension.hpp"

#include "azure_secret.hpp"
#include "duckdb.hpp"
#include "duckdb/common/exception.hpp"
#include "duckdb/common/http_state.hpp"
#include "duckdb/common/file_opener.hpp"
#include "duckdb/common/string_util.hpp"
#include "duckdb/main/secret/secret.hpp"
#include "duckdb/main/secret/secret_manager.hpp"
#include "duckdb/function/scalar/string_functions.hpp"
#include "duckdb/function/scalar_function.hpp"
#include "duckdb/main/extension_util.hpp"
#include "duckdb/main/client_data.hpp"
#include <azure/storage/blobs.hpp>
#include <azure/core/diagnostics/logger.hpp>
#include <azure/identity/default_azure_credential.hpp>
#include <azure/identity/azure_cli_credential.hpp>
#include <azure/identity/chained_token_credential.hpp>
#include <azure/identity/default_azure_credential.hpp>
#include <azure/identity/environment_credential.hpp>
#include <azure/identity/managed_identity_credential.hpp>
#include <azure/identity/azure_cli_credential.hpp>
#include <azure/storage/blobs.hpp>
#include <duckdb/parser/parsed_data/create_scalar_function_info.hpp>
#include <iostream>

namespace duckdb {

using namespace Azure::Core::Diagnostics;

// globals for collection Azure SDK logging information
mutex AzureStorageFileSystem::azure_log_lock = {};
weak_ptr<HTTPState> AzureStorageFileSystem::http_state = std::weak_ptr<HTTPState>();
bool AzureStorageFileSystem::listener_set = false;
Expand All @@ -33,7 +39,7 @@ bool AzureStorageFileSystem::listener_set = false;
static void Log(Logger::Level level, std::string const &message) {
auto http_state_ptr = AzureStorageFileSystem::http_state;
auto http_state = http_state_ptr.lock();
if (!http_state) {
if (!http_state && AzureStorageFileSystem::listener_set) {
throw std::runtime_error("HTTP state weak pointer failed to lock");
}
if (message.find("Request") != std::string::npos) {
Expand Down Expand Up @@ -71,9 +77,20 @@ CreateCredentialChainFromSetting(const string &credential_chain) {
return result;
}

static AzureAuthentication ParseAzureAuthSettings(FileOpener *opener) {
static AzureAuthentication ParseAzureAuthSettings(FileOpener *opener, const string &path) {
AzureAuthentication auth;

// Lookup Secret
auto context = opener->TryGetClientContext();
if (context) {
auto transaction = CatalogTransaction::GetSystemCatalogTransaction(*context);
auto secret_lookup = context->db->config.secret_manager->LookupSecret(transaction, path, "azure");
if (secret_lookup.HasMatch()) {
const auto &secret = secret_lookup.GetSecret();
auth.secret = &dynamic_cast<const KeyValueSecret &>(secret);
}
}

Value connection_string_val;
if (FileOpener::TryGetCurrentSetting(opener, "azure_storage_connection_string", connection_string_val)) {
auth.connection_string = connection_string_val.ToString();
Expand Down Expand Up @@ -122,28 +139,72 @@ static AzureReadOptions ParseAzureReadOptions(FileOpener *opener) {
}

static Azure::Storage::Blobs::BlobContainerClient GetContainerClient(AzureAuthentication &auth, AzureParsedUrl &url) {
if (!auth.connection_string.empty()) {
return Azure::Storage::Blobs::BlobContainerClient::CreateFromConnectionString(auth.connection_string,
url.container);
string connection_string;
bool use_secret = false;
string chain;
string account_name;
string endpoint;

// Firstly, try to use the auth from the secret
if (auth.secret) {
// If connection string, we're done heres
auto connection_string_value = auth.secret->TryGetValue("connection_string");
if (!connection_string_value.IsNull()) {
return Azure::Storage::Blobs::BlobContainerClient::CreateFromConnectionString(
connection_string_value.ToString(), url.container);
}

// Account_name can be used both for unauthenticated
if (!auth.secret->TryGetValue("account_name").IsNull()) {
use_secret = true;
account_name = auth.secret->TryGetValue("account_name").ToString();
}

if (auth.secret->GetProvider() == "credential_chain") {
use_secret = true;
if (!auth.secret->TryGetValue("chain").IsNull()) {
chain = auth.secret->TryGetValue("chain").ToString();
}
if (chain.empty()) {
chain = "default";
}
if (!auth.secret->TryGetValue("endpoint").IsNull()) {
endpoint = auth.secret->TryGetValue("endpoint").ToString();
}
}
}

if (!use_secret) {
chain = auth.credential_chain;
account_name = auth.account_name;
endpoint = auth.endpoint;

if (!auth.connection_string.empty()) {
return Azure::Storage::Blobs::BlobContainerClient::CreateFromConnectionString(auth.connection_string,
url.container);
}
}

if (endpoint.empty()) {
endpoint = "blob.core.windows.net";
}

// Build credential chain, from last to first
Azure::Identity::ChainedTokenCredential::Sources credential_chain;
if (!auth.credential_chain.empty()) {
credential_chain = CreateCredentialChainFromSetting(auth.credential_chain);
if (!chain.empty()) {
credential_chain = CreateCredentialChainFromSetting(chain);
}

auto accountURL = "https://" + auth.account_name + "." + auth.endpoint;
auto accountURL = "https://" + account_name + "." + endpoint;
if (!credential_chain.empty()) {
// A set of credentials providers was passed
auto chainedTokenCredential = std::make_shared<Azure::Identity::ChainedTokenCredential>(credential_chain);
Azure::Storage::Blobs::BlobServiceClient blob_service_client(accountURL, chainedTokenCredential);
return blob_service_client.GetBlobContainerClient(url.container);
} else if (!auth.account_name.empty()) {
} else if (!account_name.empty()) {
return Azure::Storage::Blobs::BlobContainerClient(accountURL + "/" + url.container);
} else {
throw InvalidInputException(
"No valid Azure credentials found, use either the azure_connection_string or azure_account_name");
throw InvalidInputException("No valid Azure credentials found!");
}
}

Expand All @@ -155,7 +216,12 @@ BlobClientWrapper::BlobClientWrapper(AzureAuthentication &auth, AzureParsedUrl &
BlobClientWrapper::~BlobClientWrapper() = default;
Azure::Storage::Blobs::BlobClient *BlobClientWrapper::GetClient() {
return blob_client.get();
};
}

AzureStorageFileSystem::~AzureStorageFileSystem() {
Logger::SetListener(nullptr);
AzureStorageFileSystem::listener_set = false;
}

AzureStorageFileHandle::AzureStorageFileHandle(FileSystem &fs, string path_p, uint8_t flags, AzureAuthentication &auth,
const AzureReadOptions &read_options, AzureParsedUrl parsed_url)
Expand All @@ -169,6 +235,10 @@ AzureStorageFileHandle::AzureStorageFileHandle(FileSystem &fs, string path_p, ui
} catch (Azure::Storage::StorageException &e) {
throw IOException("AzureStorageFileSystem open file '" + path + "' failed with code'" + e.ErrorCode +
"',Reason Phrase: '" + e.ReasonPhrase + "', Message: '" + e.Message + "'");
} catch (std::exception &e) {
throw IOException("AzureStorageFileSystem could not open file: '%s', unknown error occured, this could mean "
"the credentials used were wrong. Original error message: '%s' ",
path, e.what());
}

if (flags & FileFlags::FILE_FLAGS_READ) {
Expand All @@ -183,7 +253,7 @@ unique_ptr<AzureStorageFileHandle> AzureStorageFileSystem::CreateHandle(const st
D_ASSERT(compression == FileCompressionType::UNCOMPRESSED);

auto parsed_url = ParseUrl(path);
auto azure_auth = ParseAzureAuthSettings(opener);
auto azure_auth = ParseAzureAuthSettings(opener, path);
auto read_options = ParseAzureReadOptions(opener);

return make_uniq<AzureStorageFileHandle>(*this, path, flags, azure_auth, read_options, parsed_url);
Expand Down Expand Up @@ -250,6 +320,9 @@ static void LoadInternal(DatabaseInstance &instance) {
auto &fs = instance.GetFileSystem();
fs.RegisterSubSystem(make_uniq<AzureStorageFileSystem>());

// Load Secret functions
CreateAzureSecretFunctions::Register(instance);

// Load extension config
auto &config = DBConfig::GetConfig(instance);
config.AddExtensionOption("azure_storage_connection_string",
Expand Down Expand Up @@ -329,7 +402,7 @@ vector<string> AzureStorageFileSystem::Glob(const string &path, FileOpener *open
throw InternalException("Cannot do Azure storage Glob without FileOpener");
}
auto azure_url = AzureStorageFileSystem::ParseUrl(path);
auto azure_auth = ParseAzureAuthSettings(opener);
auto azure_auth = ParseAzureAuthSettings(opener, path);

// Azure matches on prefix, not glob pattern, so we take a substring until the first wildcard
auto first_wildcard_pos = azure_url.path.find_first_of("*[\\");
Expand Down
Loading

0 comments on commit 72cbc1b

Please sign in to comment.