Skip to content

Commit

Permalink
feat: add support for abfs://
Browse files Browse the repository at this point in the history
  • Loading branch information
gdubya committed Sep 2, 2024
1 parent eddc484 commit 1c64ca0
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 6 deletions.
2 changes: 2 additions & 0 deletions src/azure_dfs_filesystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
namespace duckdb {
const string AzureDfsStorageFileSystem::SCHEME = "abfss";
const string AzureDfsStorageFileSystem::PATH_PREFIX = "abfss://";
const string AzureDfsStorageFileSystem::UNSECURE_SCHEME = "abfs";
const string AzureDfsStorageFileSystem::UNSECURE_PATH_PREFIX = "abfs://";

inline static bool IsDfsScheme(const string &fpath) {
return fpath.rfind("abfss://", 0) == 0;
Expand Down
17 changes: 11 additions & 6 deletions src/azure_parsed_url.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,16 @@ namespace duckdb {
AzureParsedUrl ParseUrl(const std::string &url) {
constexpr auto invalid_url_format =
"The URL %s does not match the expected formats: (azure|az)://<container>/[<path>] or the fully qualified one: "
"(abfss|azure|az)://<storage account>.<endpoint>/<container>/[<path>] "
"or abfss://<container>@<storage account>.<endpoint>/[<path>]";
"(abfs[s]|azure|az)://<storage account>.<endpoint>/<container>/[<path>] "
"or abfs[s]://<container>@<storage account>.<endpoint>/[<path>]";
bool is_fully_qualified;
std::string container, storage_account_name, endpoint, prefix, path;

if (url.rfind("azure://", 0) != 0 && url.rfind("az://", 0) != 0 &&
url.rfind(AzureDfsStorageFileSystem::PATH_PREFIX, 0) != 0) {
throw IOException("URL needs to start with azure:// or az:// or %s", AzureDfsStorageFileSystem::PATH_PREFIX);
throw IOException("URL needs to start with azure:// or az:// or %s or %s",
AzureDfsStorageFileSystem::PATH_PREFIX,
AzureDfsStorageFileSystem::UNSECURE_PATH_PREFIX);
}
const auto prefix_end_pos = url.find("//") + 2;

Expand All @@ -31,9 +33,12 @@ AzureParsedUrl ParseUrl(const std::string &url) {
if (dot_pos != std::string::npos && dot_pos < slash_pos) {
is_fully_qualified = true;

if (url.rfind(AzureDfsStorageFileSystem::PATH_PREFIX, 0) == 0 &&
if ((
url.rfind(AzureDfsStorageFileSystem::PATH_PREFIX, 0) == 0 ||
url.rfind(AzureDfsStorageFileSystem::UNSECURE_PATH_PREFIX, 0) == 0
) &&
at_pos != std::string::npos) {
// syntax is abfss://<container>@<storage account>.<endpoint>/[<path>]
// syntax is abfs[s]://<container>@<storage account>.<endpoint>/[<path>]
const auto path_slash_pos = url.find('/', prefix_end_pos + 1);
if (path_slash_pos == string::npos) {
throw IOException(invalid_url_format, url);
Expand All @@ -44,7 +49,7 @@ AzureParsedUrl ParseUrl(const std::string &url) {
endpoint = url.substr(dot_pos + 1, path_slash_pos - dot_pos - 1);
path = url.substr(path_slash_pos + 1);
} else {
// syntax is (abfss|azure|az)://<storage account>.<endpoint>/<container>/[<path>]
// syntax is (abfs[s]|azure|az)://<storage account>.<endpoint>/<container>/[<path>]
const auto container_slash_pos = url.find('/', dot_pos);
if (container_slash_pos == string::npos) {
throw IOException(invalid_url_format, url);
Expand Down
2 changes: 2 additions & 0 deletions src/include/azure_dfs_filesystem.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ class AzureDfsStorageFileSystem : public AzureStorageFileSystem {
public:
static const string SCHEME;
static const string PATH_PREFIX;
static const string UNSECURE_SCHEME;
static const string UNSECURE_PATH_PREFIX;

protected:
// From AzureFilesystem
Expand Down

0 comments on commit 1c64ca0

Please sign in to comment.