diff --git a/.github/actions/build_extensions/action.yml b/.github/actions/build_extensions/action.yml index 88771fb7ad8d..5eea7ed19a15 100644 --- a/.github/actions/build_extensions/action.yml +++ b/.github/actions/build_extensions/action.yml @@ -138,6 +138,7 @@ runs: ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts cd ${{ inputs.build_dir}} mkdir -p build/release/extension/out_of_tree + EXTENSION_CONFIGS=.github/config/extension_config.cmake make ${{ inputs.python_name}} scripts/build_out_of_tree_extensions.py ${{ inputs.aarch64_cross_compile == 1 && '--aarch64-cc' || '' }} --github-ref=$GITHUB_REF - name: Run post-install scripts diff --git a/.github/config/extensions.csv b/.github/config/extensions.csv deleted file mode 100644 index f3400cbda91d..000000000000 --- a/.github/config/extensions.csv +++ /dev/null @@ -1,14 +0,0 @@ -name,url,commit,options -excel,,, -fts,,, -httpfs,,, -icu,,, -json,,, -parquet,,, -tpcds,,, -tpch,,, -visualizer,,, -sqlite_scanner,https://github.com/duckdblabs/sqlite_scanner,e607f30160260a5d3152087e001967ece39c36c0,no-link -postgres_scanner,https://github.com/duckdblabs/postgres_scanner,cd043b49cdc9e0d3752535b8333c9433e1007a48,no-link -substrait,https://github.com/duckdblabs/substrait,48d64b27c7a6985d6f6c3e65044038544608c03b,no-windows -arrow,https://github.com/duckdblabs/arrow,c80462e30b463c2391e033fe11d86668a5ac44c3,no-windows \ No newline at end of file diff --git a/.gitignore b/.gitignore index 3fbd2b52df70..6762afad2eab 100644 --- a/.gitignore +++ b/.gitignore @@ -341,4 +341,7 @@ zig-cache/* *.zig # .db files -*.db \ No newline at end of file +*.db + +# local config files +extension/extension_config_local.cmake \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index e7926a7e9118..437da27d107b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -716,6 +716,7 @@ function(build_loadable_extension NAME PARAMETERS) build_loadable_extension_directory(${NAME} "extension/${NAME}" "${PARAMETERS}" ${FILES}) endfunction() +# Internal extension register function function(register_extension NAME DONT_LINK PATH INCLUDE_PATH) string(TOLOWER ${NAME} EXTENSION_NAME_LOWERCASE) string(TOUPPER ${NAME} EXTENSION_NAME_UPPERCASE) @@ -724,7 +725,8 @@ function(register_extension NAME DONT_LINK PATH INCLUDE_PATH) if (NOT ${DONT_LINK} AND NOT DISABLE_BUILTIN_EXTENSIONS) set(DUCKDB_OOT_EXTENSION_${EXTENSION_NAME_UPPERCASE}_SHOULD_LINK TRUE PARENT_SCOPE) - add_definitions(-DDUCKDB_OOT_EXTENSION_${EXTENSION_NAME_UPPERCASE}_LINKED=true) + else() + set(DUCKDB_OOT_EXTENSION_${EXTENSION_NAME_UPPERCASE}_SHOULD_LINK FALSE PARENT_SCOPE) endif() if ("${PATH}" STREQUAL "") @@ -738,7 +740,7 @@ function(register_extension NAME DONT_LINK PATH INCLUDE_PATH) set(DUCKDB_OOT_EXTENSION_${EXTENSION_NAME_UPPERCASE}_INCLUDE_PATH ${INCLUDE_PATH} PARENT_SCOPE) endfunction() -# Similar to register_extension but for a remote git repo. +# Downloads the external extension repo at the specified commit and calls register_extension macro(register_external_extension NAME URL COMMIT DONT_LINK PATH INCLUDE_PATH) include(FetchContent) FETCHCONTENT_DECLARE( @@ -750,20 +752,27 @@ macro(register_external_extension NAME URL COMMIT DONT_LINK PATH INCLUDE_PATH) FETCHCONTENT_POPULATE(${NAME}_EXTENSION_FC) if ("${INCLUDE_PATH}" STREQUAL "") - set(INCLUDE_PATH_DEFAULT "src/include") + register_extension(${NAME} ${DONT_LINK} ${${NAME}_extension_fc_SOURCE_DIR}/${PATH} "${${NAME}_extension_fc_SOURCE_DIR}/src/include") else() - set(INCLUDE_PATH_DEFAULT ${INCLUDE_PATH}) + register_extension(${NAME} ${DONT_LINK} ${${NAME}_extension_fc_SOURCE_DIR}/${PATH} "${${NAME}_extension_fc_SOURCE_DIR}/${INCLUDE_PATH}") endif() - - register_extension(${NAME} ${DONT_LINK} ${${NAME}_extension_fc_SOURCE_DIR}/${PATH} ${${NAME}_extension_fc_SOURCE_DIR}/${INCLUDE_PATH_DEFAULT}) endmacro() -macro(duckdb_extension_load NAME) +function(duckdb_extension_load NAME) # Parameter parsing set(options DONT_LINK) set(oneValueArgs SOURCE_DIR INCLUDE_DIR GIT_URL GIT_TAG) cmake_parse_arguments(duckdb_extension_load "${options}" "${oneValueArgs}" "" ${ARGN}) + string(TOLOWER ${NAME} EXTENSION_NAME_LOWERCASE) + string(TOUPPER ${NAME} EXTENSION_NAME_UPPERCASE) + + # If extension was set already, we ignore subsequent calls + list (FIND DUCKDB_OOT_EXTENSION_NAMES ${EXTENSION_NAME_LOWERCASE} _index) + if (${_index} GREATER -1) + return() + endif() + # Remote Git extension if (NOT "${duckdb_extension_load_GIT_URL}" STREQUAL "") if (NOT "${duckdb_extension_load_GIT_COMMIT}" STREQUAL "") @@ -791,10 +800,33 @@ macro(duckdb_extension_load NAME) message(STATUS "Building extension '${NAME}' from '${CMAKE_SOURCE_DIR}/extensions'") register_extension(${NAME} ${duckdb_extension_load_DONT_LINK} "${CMAKE_SOURCE_DIR}/extension/${NAME}" "${CMAKE_SOURCE_DIR}/extension/${NAME}/include") endif() -endmacro() -# Register in tree extensions TODO: we may want to remove these specific cmake vars and use passing an extension_config instead. -set(INTERNAL_EXTENSION_ROOT_DIR ${CMAKE_SOURCE_DIR}/extension) + # Propagate variables set by register_extension + set(DUCKDB_OOT_EXTENSION_NAMES ${DUCKDB_OOT_EXTENSION_NAMES} PARENT_SCOPE) + set(DUCKDB_OOT_EXTENSION_${EXTENSION_NAME_UPPERCASE}_SHOULD_LINK ${DUCKDB_OOT_EXTENSION_${EXTENSION_NAME_UPPERCASE}_SHOULD_LINK} PARENT_SCOPE) + set(DUCKDB_OOT_EXTENSION_${EXTENSION_NAME_UPPERCASE}_PATH ${DUCKDB_OOT_EXTENSION_${EXTENSION_NAME_UPPERCASE}_PATH} PARENT_SCOPE) + set(DUCKDB_OOT_EXTENSION_${EXTENSION_NAME_UPPERCASE}_INCLUDE_PATH ${DUCKDB_OOT_EXTENSION_${EXTENSION_NAME_UPPERCASE}_INCLUDE_PATH} PARENT_SCOPE) +endfunction() + +if(${EXPORT_DLL_SYMBOLS}) + # For Windows DLL export symbols + add_definitions(-DDUCKDB_BUILD_LIBRARY) +endif() + +# Custom extension configs passed in DUCKDB_EXTENSION_CONFIGS parameter +foreach(DUCKDB_EXTENSION_CONFIG IN LISTS DUCKDB_EXTENSION_CONFIGS) + include(${DUCKDB_EXTENSION_CONFIG}) +endforeach() + +# Local extension config +if (EXISTS ${CMAKE_SOURCE_DIR}/extension/extension_config_local.cmake) + include(${CMAKE_SOURCE_DIR}/extension/extension_config_local.cmake) +endif() + +# Load base extension config +include(${CMAKE_SOURCE_DIR}/extension/extension_config.cmake) + +# Load extensions passed through cmake config var TODO: make nicer if (BUILD_HTTPFS_EXTENSION) duckdb_extension_load(httpfs) endif() @@ -835,15 +867,6 @@ if (BUILD_AUTOCOMPLETE_EXTENSION) duckdb_extension_load(autocomplete) endif() -if(${EXPORT_DLL_SYMBOLS}) - # For Windows DLL export symbols - add_definitions(-DDUCKDB_BUILD_LIBRARY) -endif() - -foreach(DUCKDB_EXTENSION_CONFIG IN LISTS DUCKDB_EXTENSION_CONFIGS) - include(${DUCKDB_EXTENSION_CONFIG}) -endforeach() - if (BUILD_MAIN_DUCKDB_LIBRARY) add_subdirectory(src) add_subdirectory(tools) @@ -855,6 +878,12 @@ add_subdirectory(extension) # Add Out-of-tree extensions foreach(OOTE_NAME IN LISTS DUCKDB_OOT_EXTENSION_NAMES) string(TOUPPER ${OOTE_NAME} OOTE_NAME_UPPERCASE) + if (DUCKDB_OOT_EXTENSION_${OOTE_NAME_UPPERCASE}_SHOULD_LINK) + add_definitions(-DDUCKDB_OOT_EXTENSION_${OOTE_NAME_UPPERCASE}_LINKED=true) + else() + add_definitions(-DDUCKDB_OOT_EXTENSION_${OOTE_NAME_UPPERCASE}_LINKED=false) + endif() + if (DEFINED DUCKDB_OOT_EXTENSION_${OOTE_NAME_UPPERCASE}_PATH) add_subdirectory(${DUCKDB_OOT_EXTENSION_${OOTE_NAME_UPPERCASE}_PATH} extension/${OOTE_NAME}) else() @@ -1025,17 +1054,4 @@ if(EXISTS ${CMAKE_CONFIG_TEMPLATE} AND EXISTS ${CMAKE_CONFIG_VERSION_TEMPLATE}) DESTINATION "${INSTALL_CMAKE_DIR}") endif() -endif() - -# TODO: This is the old way of Loading OOTEs can be removed after all OOTES in extensions.csv play ball with the new way -# build out-of-tree extensions on demand -if(NOT "${EXTERNAL_EXTENSION_DIRECTORIES}" STREQUAL "") - separate_arguments(EXTERNAL_EXTENSION_DIRECTORIES) - - foreach(EXTERNAL_EXTENSION_DIRECTORY IN LISTS EXTERNAL_EXTENSION_DIRECTORIES) - - # the build path seems to get ignored on windows in just the right way. no idea why. - get_filename_component(EXTERNAL_EXTENSION_NAME ${EXTERNAL_EXTENSION_DIRECTORY} NAME) - add_subdirectory(${EXTERNAL_EXTENSION_DIRECTORY} "extension/${EXTERNAL_EXTENSION_NAME}") - endforeach() -endif() +endif() \ No newline at end of file diff --git a/extension/README.md b/extension/README.md index f5f13557d9f1..c19fec2d1da9 100644 --- a/extension/README.md +++ b/extension/README.md @@ -1,16 +1,100 @@ -# In-Tree Extensions -These are DuckDB's In-tree extensions, meaning that their code lives in the main DuckDB repository. These extensions -are considered fundamental to DuckDB and connect to DuckDB so deeply that changes to DuckDB will regularly break them. -We aim to keep the amount of in-tree extensions to a minimum and strive to move extensions out-of-tree where possible. - -## Building extensions -Both in-tree extensions and out-of-tree extensions are built the same way. To build an extension, it needs to be registered -in the main DuckDB CMake build. This can be done in several ways. - -### Extension config file -To configure which extensions are built using a config file, pass the path to `.cmake` to the `DUCKDB_EXTENSION_CONFIG` -CMake variable. This config file will be included in the DuckDB CMake build and allows configuring extensions through -the `register_extension` CMake function. - -### Manually setting the extension variables -It's also possible to manually set the extension variables that are set by `register_extension`. \ No newline at end of file +This document explains what types of extensions there are in DuckDB and how to build them. + +# Extension Types +### In-tree extensions +In-tree extensions are extensions that live in the main DuckDB repository. These extensions are considered fundamental +to DuckDB and/or connect to DuckDB so deeply that changes to DuckDB are expected to regularly break them. We aim to +keep the amount of in-tree extensions to a minimum and strive to move extensions out-of-tree where possible. +### Out-of-tree Extensions (OOTEs) +Out-of-tree extensions live in separate repositories outside the main DuckDB repository. These extensions can be +distributed in two ways: Firstly, they can be distributed using the CI running in their own repository. In this case the +owner of the OOTE repository is responsible for ensuring the extension is passes CI and is kept up to date with DuckDB. +Secondly OOTEs can be pulled into the main DuckDB CI. In this case extensions are built and distributed by the main +DuckDB CI. Some examples here are the `sqlite_scanner` and `postgres_scanner` extensions. For the complete list of +extensions built using the main DuckDB repository CI check out the extension configuration in +`.github/config/external_extension_config.cmake` + +# Building extensions +Under the hood, all types of extensions are built the same way, which is using the DuckDB's root `CMakeLists.txt` file as root CMake file +and passing the extensions that should be build to it. Configuring which extensions are built by the DuckDB can be done in +different ways. + +## Makefile environment variables +Simplest way to build an extension is to use the `BUILD_` environment variables defined in the root +`Makefile` in this repository. For example, to build the JSON extension, simply run `BUILD_JSON=1 make`. Note that this +will only work for in-tree extensions since out of tree extensions require extra configuration steps + +## CMake variables +TODO + +## Config files +To build out-of-tree extensions or have more control over how in-tree extensions are built, extension config files should +be used. These config files are simply CMake files thait are included by DuckDB's CMake build. There are 3 different places +that will be searched for config files: + +1) The base configuration `extension/extension_config.cmake`. The extensions specified here will be built every time duckdb +is built. +2) The local configuration file `extension/extension_config_local.cmake` This is where you would specify extensions you need +included in your local/custom/dev build of DuckDB. +3) Additional configuration files passed to the `DUCKDB_EXTENSION_CONFIGS` parameter. This can be used to point DuckDB +to config files stored anywhere on the machine. + +Note that DuckDB will load these config files in reverse order and ignore subsequent calls to load an extension with the +same name. This allows overriding the base configuration of an extension by providing a different configuration +in the local config. For example, currently the parquet extension is always statically linked into DuckDB, because of this +line in `extension/extension_config.cmake`: +```cmake +duckdb_extension_load(parquet) +``` +Now say we want to build DuckDB with our custom parquet extension, and we also don't want to link this statically in DuckDB, +but only produce the loadable binary. We can achieve this creating the `extension/extension_config_local.cmake` file and adding: +```cmake +duckdb_extension_load(parquet + DONT_LINK + SOURCE_DIR /path/to/my/custom/parquet +) +``` +Now when we run `make` cmake will output: +```shell +-- Building extension 'parquet' from 'path/to/my/custom/parquet' +-- Extensions built but not linked: parquet +``` + +## Loading extensions with config files +The `duckdb_extension_load` function is used in the configuration files to specify how an extension should +be loaded. There are 3 different ways this can be done. For some examples, check out `.github/config/extension_config.cmake` + +### Automatic loading +The simplest way to load an extension is just passing the extension name. This will automatically try to load the extension. +Optionally, the DONT_LINK parameter can be passed to disable linking the extension into DuckDB. +```cmake +duckdb_extension_load( (DONT_LINK)) +``` +This configuration of `duckdb_extension_load` will search the `./extension` and `./extension_external` directories for +extensions and attempt to load them if possible. Note that the `extension_external` directory does not exist but should +be created and populated with the out-of-tree extensions that should be built. Extensions based on the +[extension-template](https://github.com/duckdb/extension-template) should work out of the box using this automatic +loading when placed in the `extension_external` directory. + +### Custom path +When extensions are located in a path or their project structure is different from that the +[extension-template](https://github.customcom/duckdb/extension-template), the `SOURCE_DIR` and `INCLUDE_DIR` variables can +be used to tell DuckDB how to load the extension: +```cmake +duckdb_extension_load( + (DONT_LINK) + SOURCE_DIR + (INCLUDE_DIR ) +) +``` + +### Remote GitHub repo +Directly installing extensions from GitHub repositories is also supported. This will download the extension to the current +cmake build directory and build it from there: +```cmake +duckdb_extension_load(postgres_scanner + (DONT_LINK) + GIT_URL https://github.com/duckdblabs/postgres_scanner + GIT_TAG cd043b49cdc9e0d3752535b8333c9433e1007a48 +) +``` \ No newline at end of file diff --git a/scripts/build_out_of_tree_extensions.py b/scripts/build_out_of_tree_extensions.py deleted file mode 100644 index d386d4caa4a1..000000000000 --- a/scripts/build_out_of_tree_extensions.py +++ /dev/null @@ -1,99 +0,0 @@ -import argparse -import csv -import subprocess -import tempfile -import os -import glob -import pathlib -import shutil -import sys - -parser = argparse.ArgumentParser(description='Builds out-of-tree extensions for DuckDB') - -parser.add_argument('--extensions', action='store', - help='CSV file with DuckDB extensions to build', default=os.path.join(".github", "config", "extensions.csv")) -parser.add_argument('--aarch64-cc', help='Enables Linux aarch64 crosscompile build', action='store_true') -parser.add_argument('--github-ref', action='store', - help='The github ref this job is launched from', default='') - -args = parser.parse_args() - -tasks = [] - -def exec(cmd): - print(cmd) - sys.stdout.flush() - - res = subprocess.Popen(cmd.split(' ')) - res.wait() - if res.returncode != 0: - raise ValueError('failed to execute %s' % cmd) - - -reader = csv.reader(open(args.extensions)) -# This skips the first row (i.e., the header) of the CSV file. -next(reader) -for row in reader: - if len(row) != 4: - raise ValueError('Row malformed' + str(row)) - - name = row[0].strip() - url = row[1].strip() - commit = row[2].strip() - if not url: - # This is not an out-of-tree extension - continue - if len(name) == 0 or len(url) == 0 or len(commit) != 40: - raise ValueError('Row malformed' + str(row)) - - tasks+= [{'name' : row[0], 'url' : row[1], 'commit' : row[2], 'options' : row[3]}] - -def build_extension(task): - print(task) - if os.name == 'nt' and 'no-windows' in task['options']: - return False - if 'main-repo-only' in task['options'] and args.github_ref != 'refs/heads/master': - return False - return True - -cmake_config = os.path.join('extension_external', 'external_extension_config.cmake') -basedir = os.getcwd() -def init_cmake_config(): - os.makedirs(os.path.dirname(cmake_config), exist_ok=True) - with open(cmake_config, 'w+') as the_file: - the_file.write("# NOTE: Autogenerated by 'scripts/build_out_of_tree_extensions.py' edits may be overwritten\n") - the_file.write("#\n") - the_file.write("# This file contains the configuration for the out-of-tree extensions to build\n") - the_file.write("\n") - -def append_task_to_cmake_config(task): - with open(cmake_config, 'a') as the_file: - name = task['name'] - if 'no-link' in task['options']: - the_file.write(f'duckdb_extension_load({name} DONT_LINK)\n') - else: - the_file.write(f'duckdb_extension_load({name})\n') - -init_cmake_config() - -for task in tasks: - print(task) - if build_extension(task): - clonedir = os.path.join("extension_external", task['name']) - if not os.path.isdir(clonedir): - exec('git clone %s %s' % (task['url'], clonedir)) - os.chdir(clonedir) - exec('git checkout %s' % (task['commit'])) - os.chdir(basedir) - print(f"Building extension \"{task['name']}\" from URL \"{task['url']}\" at commit \"{task['commit']}\" at clonedir \"{clonedir}\"") - - append_task_to_cmake_config(task) - -# Now produce the cmake configuration to build the external extensions -os.environ['EXTENSION_CONFIGS'] = os.path.join("extension_external", "external_extension_config.cmake") - -if (args.aarch64_cc): - os.environ['CC'] = "aarch64-linux-gnu-gcc" - os.environ['CXX'] = "aarch64-linux-gnu-g++" -exec('make') -print("done")