From 1d70522e86d0ce375d64f8df883a076669022d87 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Tue, 5 Sep 2023 02:28:21 +0300 Subject: [PATCH] docs: start databricks page Part of #4724 --- content/docs/sidebar.json | 4 ++ .../user-guide/integrations/databricks.md | 47 +++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 content/docs/user-guide/integrations/databricks.md diff --git a/content/docs/sidebar.json b/content/docs/sidebar.json index af7e87129a6..dcdbe98e88c 100644 --- a/content/docs/sidebar.json +++ b/content/docs/sidebar.json @@ -213,6 +213,10 @@ { "label": "Amazon SageMaker", "slug": "sagemaker" + }, + { + "label": "Databricks", + "slug": "databricks" } ] }, diff --git a/content/docs/user-guide/integrations/databricks.md b/content/docs/user-guide/integrations/databricks.md new file mode 100644 index 00000000000..28c9bd4d8cb --- /dev/null +++ b/content/docs/user-guide/integrations/databricks.md @@ -0,0 +1,47 @@ +# Databricks + +As of September 2023 Databricks doesn't expose the underlying GIT repo in your +project, so GIT-related DVC functionality within the repo provided by Databricks +is not supported (e.g. [experiments], `--rev/--all-commits/--all-tags/etc`). But +everything will operate as normal if you `git clone` a project yourself or use +remote projects with DVC directly. + +## Install + +```bash +%pip install dvc +``` + +## DVC API + +You can use your existing DVC projects through [Python API] as normal. + +### Secrets + +If you need to use secrets to access your data, first add them to databricks +secrets https://docs.databricks.com/en/security/secrets/index.html and then use +them with DVC, for example: + +```python +import dvc.api + +remote_config = { + "access_key_id": dbutils.secrets.get(scope="test_scope", key="aws_access_key_id"), + "secret_access_key": dbutils.secrets.get(scope="test_scope", key="aws_secret_access_key"), +} + +mydataregistry = "https://github.com/efiop/mydataregistry" + +with dvc.api.open("recent-grads.csv", repo=mydataregistry, remote_config=remote_config) as fobj: + ... +``` + +## Running DVC commands + +Databricks doesn't provide a classic terminal, so you'll need to use [magic +commands] to run it, e.g. `!dvc add data`. + +[experiments]: /doc/start/experiments +[Python API]: /doc/api-reference +[magic commands]: + https://ipython.readthedocs.io/en/stable/interactive/magics.html