diff --git a/00_notebooks/00_index.ipynb b/00_notebooks/00_index.ipynb index 1fc0c4ed5..6a614462b 100644 --- a/00_notebooks/00_index.ipynb +++ b/00_notebooks/00_index.ipynb @@ -37,7 +37,7 @@ "* [**Import into a lakeFS repository from multiple paths**](./import-multiple-buckets.ipynb) \n", "* [**ML Experimentation/Reproducibility 01 (Dogs)**](./ml-reproducibility.ipynb)\n", "* [**ML Experimentation 02 (Wine Quality)**](./ml-experimentation-wine-quality-prediction.ipynb)
_See also the [accompanying blog](https://lakefs.io/blog/building-an-ml-experimentation-platform-for-easy-reproducibility-using-lakefs/)_\n", - "* [**RBAC demo**](./rbac-demo.ipynb) ([lakefS Cloud](https://lakefs.cloud/register) only)\n", + "* [**RBAC demo**](./rbac-demo.ipynb) (Requires [lakefS Cloud](https://lakefs.cloud/register) or [lakefS Enterprise](https://docs.lakefs.io/understand/enterprise/) on prem)\n", "* [**Version Control of multi-buckets pipelines**](./version-control-of-multi-buckets-pipelines.ipynb) \n", "* [**Reprocess and Backfill Data with new ETL logic**](./reprocess-backfill-data.ipynb) \n", "* **lakeFS and Apache Iceberg**\n", diff --git a/00_notebooks/data-lineage.ipynb b/00_notebooks/data-lineage.ipynb index ec93f8405..75faa5edc 100644 --- a/00_notebooks/data-lineage.ipynb +++ b/00_notebooks/data-lineage.ipynb @@ -679,7 +679,7 @@ }, "outputs": [], "source": [ - "# The section below will only work on lakeFS cloud. \n", + "# The section below will only work on lakeFS Cloud or lakeFS Enterprise. \n", "# This cell will stop execution which is useful if the notebook has been \n", "# run from the top or is being run as part of automated testing.\n", "import sys\n", @@ -700,7 +700,7 @@ "id": "ba99c5a6-7df6-42b5-9a3f-74ba3d0f67c1", "metadata": {}, "source": [ - "# Auditing (lakeFS Cloud only)\n", + "# Auditing (lakeFS Enterprise and lakeFS Cloud only)\n", "\n", "## Setup" ] diff --git a/00_notebooks/rbac-demo.ipynb b/00_notebooks/rbac-demo.ipynb index 847be7dcd..a84af6a5b 100644 --- a/00_notebooks/rbac-demo.ipynb +++ b/00_notebooks/rbac-demo.ipynb @@ -19,8 +19,8 @@ "source": [ "## Prerequisites\n", "\n", - "###### This Notebook requires connecting to lakeFS Cloud.\n", - "###### Register for the lakeFS Cloud: https://lakefs.cloud/register" + "###### This Notebook requires connecting to lakeFS Cloud or lakeFS Enterprise.\n", + "###### Register for the lakeFS Cloud: https://lakefs.cloud/register or Contact Us for a lakeFS Enterprise Key: https://lakefs.io/contact-sales/" ] }, { @@ -62,7 +62,7 @@ "metadata": {}, "outputs": [], "source": [ - "lakefsEndPoint = '127.0.0.1:8000' # e.g. 'https://username.aws_region_name.lakefscloud.io'\n", + "lakefsEndPoint = 'lakefs:8000' # e.g. 'https://username.aws_region_name.lakefscloud.io'\n", "lakefsAccessKey = 'AKIAIOSFOLKFSSAMPLES'\n", "lakefsSecretKey = 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY'" ] @@ -83,7 +83,7 @@ "metadata": {}, "outputs": [], "source": [ - "storageNamespace = 's3:///' # e.g. \"s3://username-lakefs-cloud/\"" + "storageNamespace = 's3://example/' # e.g. \"s3://username-lakefs-cloud/\"" ] }, { diff --git a/02_lakefs_enterprise/README.md b/02_lakefs_enterprise/README.md new file mode 100644 index 000000000..a36a2ff59 --- /dev/null +++ b/02_lakefs_enterprise/README.md @@ -0,0 +1,53 @@ +# lakeFS Enterprise + +![lakeFS logo](../images/logo.png) + +**This sample repository captures a collection of notebooks, dockerized applications and code snippets that demonstrate how to use [lakeFS Enterprise](https://docs.lakefs.io/understand/enterprise/).** + +## Let's Get Started 👩🏻‍💻 + +Clone this repository + +```bash +git clone https://github.com/treeverse/lakeFS-samples.git +cd lakeFS-samples/02_lakefs_enterprise +``` + +### **Run a lakeFS Enterprise server** + +Login to [Treeverse Dockerhub](https://hub.docker.com/u/treeverse) by using the granted token so Fluffy proprietary image can be retrieved. [Contact Sales](https://lakefs.io/contact-sales/) to get the token for Fluffy: + +```bash +docker login -u externallakefs +``` + +Run following command to provision a lakeFS Enterprise server as well as MinIO for your object store, plus Jupyter: + +```bash +docker compose up +``` + +Once the stack's up and running, open the Jupyter Notebook (http://localhost:8894) and check out the [catalog of sample notebooks](../00_notebooks/00_index.ipynb) to explore lakeFS. + +Once you've finished, run the following to remove all the containers: + +```bash +docker compose down +``` + +## Environment Details + +* **Jupyter Notebook** is based on the [Jupyter PySpark notebook](https://hub.docker.com/r/jupyter/pyspark-notebook/) and provides an interactive environment in which to explore lakeFS using Python and PySpark. +* **lakeFS Enterprise** is provisioned as part of this environment. +* **MinIO** is provided as an S3-compatible object store. You can use other S3-compatible object stores include S3, GCS, as well as Azure Blob Storage. + +### URLs and login details + +* Jupyter http://localhost:8894/ +* lakeFS http://localhost:8084/ (`AKIAIOSFOLKFSSAMPLES` / `wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY`) +* MinIO http://localhost:9005/ (`minioadmin`/`minioadmin`) +* Spark UI http://localhost:4044/ + +## Got Questions or Want to Chat? + +👉🏻 Join the lakeFS Slack group - https://lakefs.io/slack diff --git a/02_lakefs_enterprise/docker-compose.yml b/02_lakefs_enterprise/docker-compose.yml new file mode 100644 index 000000000..136ec9f18 --- /dev/null +++ b/02_lakefs_enterprise/docker-compose.yml @@ -0,0 +1,128 @@ +version: "3.5" +name: lakefs-enterprise-samples +services: + jupyter-notebook: + build: ../jupyter + environment: + # log-level is set to WARN because of noisy stdout problem + # -> See https://github.com/jupyter-server/jupyter_server/issues/1279 + - NOTEBOOK_ARGS=--log-level='WARN' --NotebookApp.token='' --NotebookApp.password='' --notebook-dir=/home/jovyan/notebooks + ports: + - 8894:8888 # Jupyter + - 4044:4040 # Spark + volumes: + - ../00_notebooks:/home/jovyan/notebooks + - ../data:/data + + lakefs: + image: treeverse/lakefs:1 + pull_policy: always + ports: + - "8084:8000" + depends_on: + postgres: + condition: service_healthy + minio-setup: + condition: service_completed_successfully + environment: + - LAKEFS_BLOCKSTORE_TYPE=s3 + - LAKEFS_BLOCKSTORE_S3_FORCE_PATH_STYLE=true + - LAKEFS_BLOCKSTORE_S3_ENDPOINT=http://minio:9000 + - LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID=minioadmin + - LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY=minioadmin + - LAKEFS_AUTH_ENCRYPT_SECRET_KEY=some random secret string + - LAKEFS_LOGGING_LEVEL=INFO + - LAKEFS_STATS_ENABLED=${LAKEFS_STATS_ENABLED:-1} + - LAKECTL_CREDENTIALS_ACCESS_KEY_ID=AKIAIOSFOLKFSSAMPLES + - LAKECTL_CREDENTIALS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY + - LAKECTL_SERVER_ENDPOINT_URL=http://localhost:8000 + - LAKEFS_DATABASE_TYPE=postgres + - LAKEFS_DATABASE_POSTGRES_CONNECTION_STRING=postgres://lakefs:lakefs@postgres/postgres?sslmode=disable + - LAKEFS_AUTH_API_ENDPOINT=http://fluffy:9006/api/v1 + - LAKEFS_AUTH_UI_CONFIG_RBAC=internal + entrypoint: ["/bin/sh", "-c"] + command: + - | + lakefs setup --user-name everything-bagel --access-key-id "$$LAKECTL_CREDENTIALS_ACCESS_KEY_ID" --secret-access-key "$$LAKECTL_CREDENTIALS_SECRET_ACCESS_KEY" || true + lakefs run & + echo "---- Creating repository ----" + wait-for -t 60 lakefs:8000 -- curl -u "$$LAKECTL_CREDENTIALS_ACCESS_KEY_ID":"$$LAKECTL_CREDENTIALS_SECRET_ACCESS_KEY" -X POST -H "Content-Type: application/json" -d '{ "name": "quickstart", "storage_namespace": "s3://quickstart", "default_branch": "main", "sample_data": true }' http://localhost:8000/api/v1/repositories || true + echo "" + wait-for -t 60 minio:9000 && echo '------------------------------------------------ + + MinIO admin: http://127.0.0.1:9005/ + + Username : minioadmin + Password : minioadmin + ' + echo "------------------------------------------------" + wait-for -t 60 jupyter-notebook:8888 && echo ' + + Jupyter: http://127.0.0.1:8894/ + ' + echo "------------------------------------------------" + echo "" + echo "lakeFS Web UI: http://127.0.0.1:8084/ >(._.)<" + echo " ( )_ " + echo "" + echo " Access Key ID : $$LAKECTL_CREDENTIALS_ACCESS_KEY_ID" + echo " Secret Access Key: $$LAKECTL_CREDENTIALS_SECRET_ACCESS_KEY" + echo "" + echo "-------- Let's go and have axolotl fun! --------" + echo "" + wait + + minio-setup: + image: minio/mc:RELEASE.2023-05-18T16-59-00Z + environment: + - MC_HOST_lakefs=http://minioadmin:minioadmin@minio:9000 + depends_on: + - minio + volumes: + - ../data:/data + entrypoint: ["/bin/sh", "-c"] + command: + - | + mc mb lakefs/quickstart lakefs/example lakefs/sample-data + mc cp --recursive /data/* lakefs/sample-data 1>/dev/null # don't be so noisy 🤫 + + minio: + image: minio/minio:RELEASE.2023-05-18T00-05-36Z + ports: + - "9004:9000" + - "9005:9001" + entrypoint: ["minio", "server", "/data", "--console-address", ":9001"] + + postgres: + image: postgres:14 + ports: + - "5433:5432" + environment: + POSTGRES_USER: lakefs + POSTGRES_PASSWORD: lakefs + healthcheck: + test: ["CMD", "pg_isready", "-U", "lakefs"] + interval: 10s + retries: 5 + start_period: 5s + restart: always + + fluffy: + image: "${FLUFFY_REPO:-treeverse}/fluffy:${TAG:-0.4.0}" + command: "${COMMAND:-run}" + ports: + - "8085:8000" + - "9006:9000" + depends_on: + - "postgres" + environment: + - FLUFFY_LOGGING_LEVEL=INFO + - FLUFFY_DATABASE_TYPE=postgres + - FLUFFY_DATABASE_POSTGRES_CONNECTION_STRING=postgres://lakefs:lakefs@postgres/postgres?sslmode=disable + - FLUFFY_AUTH_ENCRYPT_SECRET_KEY=some random secret string + - FLUFFY_AUTH_SERVE_LISTEN_ADDRESS=0.0.0.0:9006 + entrypoint: [ "/app/wait-for", "postgres:5432", "--", "/app/fluffy" ] + +networks: + default: + name: bagel diff --git a/README.md b/README.md index 45b4e2693..e4dbe4a37 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,9 @@ _Incorporating the Docker Compose formally known as **Everything Bagel**._ _lakeFS is a popular open-source solution for managing data. It provides a consistent and scalable data management layer on top of cloud storage, such as Amazon S3, Azure Blob Storage, or Google Cloud Storage. It allows users to create and manage data in a version-controlled and immutable manner, and offers features such as data governance, data lineage, and data access controls. lakeFS is compatible with a wide range of data processing frameworks and tools._ +### **Go to [lakefs_enterprise](./02_lakefs_enterprise/) folder if you want to use [lakeFS Enterprise](https://docs.lakefs.io/understand/enterprise/) instead of lakeFS open source** + + ## Let's Get Started 👩🏻‍💻 Clone this repository