Skip to content

Commit

Permalink
extension loading refactor and documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
samansmink committed May 19, 2023
1 parent e21da95 commit 2896b34
Show file tree
Hide file tree
Showing 6 changed files with 154 additions and 163 deletions.
1 change: 1 addition & 0 deletions .github/actions/build_extensions/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ runs:
ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts
cd ${{ inputs.build_dir}}
mkdir -p build/release/extension/out_of_tree
EXTENSION_CONFIGS=.github/config/extension_config.cmake make
${{ inputs.python_name}} scripts/build_out_of_tree_extensions.py ${{ inputs.aarch64_cross_compile == 1 && '--aarch64-cc' || '' }} --github-ref=$GITHUB_REF
- name: Run post-install scripts
Expand Down
14 changes: 0 additions & 14 deletions .github/config/extensions.csv

This file was deleted.

5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -341,4 +341,7 @@ zig-cache/*
*.zig

# .db files
*.db
*.db

# local config files
extension/extension_config_local.cmake
82 changes: 49 additions & 33 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,7 @@ function(build_loadable_extension NAME PARAMETERS)
build_loadable_extension_directory(${NAME} "extension/${NAME}" "${PARAMETERS}" ${FILES})
endfunction()

# Internal extension register function
function(register_extension NAME DONT_LINK PATH INCLUDE_PATH)
string(TOLOWER ${NAME} EXTENSION_NAME_LOWERCASE)
string(TOUPPER ${NAME} EXTENSION_NAME_UPPERCASE)
Expand All @@ -724,7 +725,8 @@ function(register_extension NAME DONT_LINK PATH INCLUDE_PATH)

if (NOT ${DONT_LINK} AND NOT DISABLE_BUILTIN_EXTENSIONS)
set(DUCKDB_OOT_EXTENSION_${EXTENSION_NAME_UPPERCASE}_SHOULD_LINK TRUE PARENT_SCOPE)
add_definitions(-DDUCKDB_OOT_EXTENSION_${EXTENSION_NAME_UPPERCASE}_LINKED=true)
else()
set(DUCKDB_OOT_EXTENSION_${EXTENSION_NAME_UPPERCASE}_SHOULD_LINK FALSE PARENT_SCOPE)
endif()

if ("${PATH}" STREQUAL "")
Expand All @@ -738,7 +740,7 @@ function(register_extension NAME DONT_LINK PATH INCLUDE_PATH)
set(DUCKDB_OOT_EXTENSION_${EXTENSION_NAME_UPPERCASE}_INCLUDE_PATH ${INCLUDE_PATH} PARENT_SCOPE)
endfunction()

# Similar to register_extension but for a remote git repo.
# Downloads the external extension repo at the specified commit and calls register_extension
macro(register_external_extension NAME URL COMMIT DONT_LINK PATH INCLUDE_PATH)
include(FetchContent)
FETCHCONTENT_DECLARE(
Expand All @@ -750,20 +752,27 @@ macro(register_external_extension NAME URL COMMIT DONT_LINK PATH INCLUDE_PATH)
FETCHCONTENT_POPULATE(${NAME}_EXTENSION_FC)

if ("${INCLUDE_PATH}" STREQUAL "")
set(INCLUDE_PATH_DEFAULT "src/include")
register_extension(${NAME} ${DONT_LINK} ${${NAME}_extension_fc_SOURCE_DIR}/${PATH} "${${NAME}_extension_fc_SOURCE_DIR}/src/include")
else()
set(INCLUDE_PATH_DEFAULT ${INCLUDE_PATH})
register_extension(${NAME} ${DONT_LINK} ${${NAME}_extension_fc_SOURCE_DIR}/${PATH} "${${NAME}_extension_fc_SOURCE_DIR}/${INCLUDE_PATH}")
endif()

register_extension(${NAME} ${DONT_LINK} ${${NAME}_extension_fc_SOURCE_DIR}/${PATH} ${${NAME}_extension_fc_SOURCE_DIR}/${INCLUDE_PATH_DEFAULT})
endmacro()

macro(duckdb_extension_load NAME)
function(duckdb_extension_load NAME)
# Parameter parsing
set(options DONT_LINK)
set(oneValueArgs SOURCE_DIR INCLUDE_DIR GIT_URL GIT_TAG)
cmake_parse_arguments(duckdb_extension_load "${options}" "${oneValueArgs}" "" ${ARGN})

string(TOLOWER ${NAME} EXTENSION_NAME_LOWERCASE)
string(TOUPPER ${NAME} EXTENSION_NAME_UPPERCASE)

# If extension was set already, we ignore subsequent calls
list (FIND DUCKDB_OOT_EXTENSION_NAMES ${EXTENSION_NAME_LOWERCASE} _index)
if (${_index} GREATER -1)
return()
endif()

# Remote Git extension
if (NOT "${duckdb_extension_load_GIT_URL}" STREQUAL "")
if (NOT "${duckdb_extension_load_GIT_COMMIT}" STREQUAL "")
Expand Down Expand Up @@ -791,10 +800,33 @@ macro(duckdb_extension_load NAME)
message(STATUS "Building extension '${NAME}' from '${CMAKE_SOURCE_DIR}/extensions'")
register_extension(${NAME} ${duckdb_extension_load_DONT_LINK} "${CMAKE_SOURCE_DIR}/extension/${NAME}" "${CMAKE_SOURCE_DIR}/extension/${NAME}/include")
endif()
endmacro()

# Register in tree extensions TODO: we may want to remove these specific cmake vars and use passing an extension_config instead.
set(INTERNAL_EXTENSION_ROOT_DIR ${CMAKE_SOURCE_DIR}/extension)
# Propagate variables set by register_extension
set(DUCKDB_OOT_EXTENSION_NAMES ${DUCKDB_OOT_EXTENSION_NAMES} PARENT_SCOPE)
set(DUCKDB_OOT_EXTENSION_${EXTENSION_NAME_UPPERCASE}_SHOULD_LINK ${DUCKDB_OOT_EXTENSION_${EXTENSION_NAME_UPPERCASE}_SHOULD_LINK} PARENT_SCOPE)
set(DUCKDB_OOT_EXTENSION_${EXTENSION_NAME_UPPERCASE}_PATH ${DUCKDB_OOT_EXTENSION_${EXTENSION_NAME_UPPERCASE}_PATH} PARENT_SCOPE)
set(DUCKDB_OOT_EXTENSION_${EXTENSION_NAME_UPPERCASE}_INCLUDE_PATH ${DUCKDB_OOT_EXTENSION_${EXTENSION_NAME_UPPERCASE}_INCLUDE_PATH} PARENT_SCOPE)
endfunction()

if(${EXPORT_DLL_SYMBOLS})
# For Windows DLL export symbols
add_definitions(-DDUCKDB_BUILD_LIBRARY)
endif()

# Custom extension configs passed in DUCKDB_EXTENSION_CONFIGS parameter
foreach(DUCKDB_EXTENSION_CONFIG IN LISTS DUCKDB_EXTENSION_CONFIGS)
include(${DUCKDB_EXTENSION_CONFIG})
endforeach()

# Local extension config
if (EXISTS ${CMAKE_SOURCE_DIR}/extension/extension_config_local.cmake)
include(${CMAKE_SOURCE_DIR}/extension/extension_config_local.cmake)
endif()

# Load base extension config
include(${CMAKE_SOURCE_DIR}/extension/extension_config.cmake)

# Load extensions passed through cmake config var TODO: make nicer
if (BUILD_HTTPFS_EXTENSION)
duckdb_extension_load(httpfs)
endif()
Expand Down Expand Up @@ -835,15 +867,6 @@ if (BUILD_AUTOCOMPLETE_EXTENSION)
duckdb_extension_load(autocomplete)
endif()

if(${EXPORT_DLL_SYMBOLS})
# For Windows DLL export symbols
add_definitions(-DDUCKDB_BUILD_LIBRARY)
endif()

foreach(DUCKDB_EXTENSION_CONFIG IN LISTS DUCKDB_EXTENSION_CONFIGS)
include(${DUCKDB_EXTENSION_CONFIG})
endforeach()

if (BUILD_MAIN_DUCKDB_LIBRARY)
add_subdirectory(src)
add_subdirectory(tools)
Expand All @@ -855,6 +878,12 @@ add_subdirectory(extension)
# Add Out-of-tree extensions
foreach(OOTE_NAME IN LISTS DUCKDB_OOT_EXTENSION_NAMES)
string(TOUPPER ${OOTE_NAME} OOTE_NAME_UPPERCASE)
if (DUCKDB_OOT_EXTENSION_${OOTE_NAME_UPPERCASE}_SHOULD_LINK)
add_definitions(-DDUCKDB_OOT_EXTENSION_${OOTE_NAME_UPPERCASE}_LINKED=true)
else()
add_definitions(-DDUCKDB_OOT_EXTENSION_${OOTE_NAME_UPPERCASE}_LINKED=false)
endif()

if (DEFINED DUCKDB_OOT_EXTENSION_${OOTE_NAME_UPPERCASE}_PATH)
add_subdirectory(${DUCKDB_OOT_EXTENSION_${OOTE_NAME_UPPERCASE}_PATH} extension/${OOTE_NAME})
else()
Expand Down Expand Up @@ -1025,17 +1054,4 @@ if(EXISTS ${CMAKE_CONFIG_TEMPLATE} AND EXISTS ${CMAKE_CONFIG_VERSION_TEMPLATE})
DESTINATION "${INSTALL_CMAKE_DIR}")
endif()

endif()

# TODO: This is the old way of Loading OOTEs can be removed after all OOTES in extensions.csv play ball with the new way
# build out-of-tree extensions on demand
if(NOT "${EXTERNAL_EXTENSION_DIRECTORIES}" STREQUAL "")
separate_arguments(EXTERNAL_EXTENSION_DIRECTORIES)

foreach(EXTERNAL_EXTENSION_DIRECTORY IN LISTS EXTERNAL_EXTENSION_DIRECTORIES)

# the build path seems to get ignored on windows in just the right way. no idea why.
get_filename_component(EXTERNAL_EXTENSION_NAME ${EXTERNAL_EXTENSION_DIRECTORY} NAME)
add_subdirectory(${EXTERNAL_EXTENSION_DIRECTORY} "extension/${EXTERNAL_EXTENSION_NAME}")
endforeach()
endif()
endif()
116 changes: 100 additions & 16 deletions extension/README.md
Original file line number Diff line number Diff line change
@@ -1,16 +1,100 @@
# In-Tree Extensions
These are DuckDB's In-tree extensions, meaning that their code lives in the main DuckDB repository. These extensions
are considered fundamental to DuckDB and connect to DuckDB so deeply that changes to DuckDB will regularly break them.
We aim to keep the amount of in-tree extensions to a minimum and strive to move extensions out-of-tree where possible.

## Building extensions
Both in-tree extensions and out-of-tree extensions are built the same way. To build an extension, it needs to be registered
in the main DuckDB CMake build. This can be done in several ways.

### Extension config file
To configure which extensions are built using a config file, pass the path to `<your_config>.cmake` to the `DUCKDB_EXTENSION_CONFIG`
CMake variable. This config file will be included in the DuckDB CMake build and allows configuring extensions through
the `register_extension` CMake function.

### Manually setting the extension variables
It's also possible to manually set the extension variables that are set by `register_extension`.
This document explains what types of extensions there are in DuckDB and how to build them.

# Extension Types
### In-tree extensions
In-tree extensions are extensions that live in the main DuckDB repository. These extensions are considered fundamental
to DuckDB and/or connect to DuckDB so deeply that changes to DuckDB are expected to regularly break them. We aim to
keep the amount of in-tree extensions to a minimum and strive to move extensions out-of-tree where possible.
### Out-of-tree Extensions (OOTEs)
Out-of-tree extensions live in separate repositories outside the main DuckDB repository. These extensions can be
distributed in two ways: Firstly, they can be distributed using the CI running in their own repository. In this case the
owner of the OOTE repository is responsible for ensuring the extension is passes CI and is kept up to date with DuckDB.
Secondly OOTEs can be pulled into the main DuckDB CI. In this case extensions are built and distributed by the main
DuckDB CI. Some examples here are the `sqlite_scanner` and `postgres_scanner` extensions. For the complete list of
extensions built using the main DuckDB repository CI check out the extension configuration in
`.github/config/external_extension_config.cmake`

# Building extensions
Under the hood, all types of extensions are built the same way, which is using the DuckDB's root `CMakeLists.txt` file as root CMake file
and passing the extensions that should be build to it. Configuring which extensions are built by the DuckDB can be done in
different ways.

## Makefile environment variables
Simplest way to build an extension is to use the `BUILD_<extension name>` environment variables defined in the root
`Makefile` in this repository. For example, to build the JSON extension, simply run `BUILD_JSON=1 make`. Note that this
will only work for in-tree extensions since out of tree extensions require extra configuration steps

## CMake variables
TODO

## Config files
To build out-of-tree extensions or have more control over how in-tree extensions are built, extension config files should
be used. These config files are simply CMake files thait are included by DuckDB's CMake build. There are 3 different places
that will be searched for config files:

1) The base configuration `extension/extension_config.cmake`. The extensions specified here will be built every time duckdb
is built.
2) The local configuration file `extension/extension_config_local.cmake` This is where you would specify extensions you need
included in your local/custom/dev build of DuckDB.
3) Additional configuration files passed to the `DUCKDB_EXTENSION_CONFIGS` parameter. This can be used to point DuckDB
to config files stored anywhere on the machine.

Note that DuckDB will load these config files in reverse order and ignore subsequent calls to load an extension with the
same name. This allows overriding the base configuration of an extension by providing a different configuration
in the local config. For example, currently the parquet extension is always statically linked into DuckDB, because of this
line in `extension/extension_config.cmake`:
```cmake
duckdb_extension_load(parquet)
```
Now say we want to build DuckDB with our custom parquet extension, and we also don't want to link this statically in DuckDB,
but only produce the loadable binary. We can achieve this creating the `extension/extension_config_local.cmake` file and adding:
```cmake
duckdb_extension_load(parquet
DONT_LINK
SOURCE_DIR /path/to/my/custom/parquet
)
```
Now when we run `make` cmake will output:
```shell
-- Building extension 'parquet' from 'path/to/my/custom/parquet'
-- Extensions built but not linked: parquet
```

## Loading extensions with config files
The `duckdb_extension_load` function is used in the configuration files to specify how an extension should
be loaded. There are 3 different ways this can be done. For some examples, check out `.github/config/extension_config.cmake`

### Automatic loading
The simplest way to load an extension is just passing the extension name. This will automatically try to load the extension.
Optionally, the DONT_LINK parameter can be passed to disable linking the extension into DuckDB.
```cmake
duckdb_extension_load(<extension_name> (DONT_LINK))
```
This configuration of `duckdb_extension_load` will search the `./extension` and `./extension_external` directories for
extensions and attempt to load them if possible. Note that the `extension_external` directory does not exist but should
be created and populated with the out-of-tree extensions that should be built. Extensions based on the
[extension-template](https://github.com/duckdb/extension-template) should work out of the box using this automatic
loading when placed in the `extension_external` directory.

### Custom path
When extensions are located in a path or their project structure is different from that the
[extension-template](https://github.customcom/duckdb/extension-template), the `SOURCE_DIR` and `INCLUDE_DIR` variables can
be used to tell DuckDB how to load the extension:
```cmake
duckdb_extension_load(<extension_name>
(DONT_LINK)
SOURCE_DIR <absolute_path_to_extension_root>
(INCLUDE_DIR <absolute_path_to_extension_header>)
)
```

### Remote GitHub repo
Directly installing extensions from GitHub repositories is also supported. This will download the extension to the current
cmake build directory and build it from there:
```cmake
duckdb_extension_load(postgres_scanner
(DONT_LINK)
GIT_URL https://github.com/duckdblabs/postgres_scanner
GIT_TAG cd043b49cdc9e0d3752535b8333c9433e1007a48
)
```
99 changes: 0 additions & 99 deletions scripts/build_out_of_tree_extensions.py

This file was deleted.

0 comments on commit 2896b34

Please sign in to comment.