Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into gen-pyi
Browse files Browse the repository at this point in the history
  • Loading branch information
trim21 committed Dec 27, 2024
2 parents cf17469 + 712b4f0 commit 8c37fb1
Show file tree
Hide file tree
Showing 9 changed files with 301 additions and 98 deletions.
62 changes: 62 additions & 0 deletions .github/workflows/ci_odev.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

name: ODev CI

on:
push:
branches:
- main
pull_request:
branches:
- main
paths:
- "**/*.rs"
- ".github/workflows/ci_odev.yml"

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}
cancel-in-progress: true

jobs:
check_clippy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Setup Rust toolchain
uses: ./.github/actions/setup
with:
github-token: ${{ secrets.GITHUB_TOKEN }}

- name: Cargo clippy
working-directory: dev
run: cargo clippy --all-targets --all-features -- -D warnings

test_dev:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Setup Rust toolchain
uses: ./.github/actions/setup
with:
github-token: ${{ secrets.GITHUB_TOKEN }}

- name: Cargo Test
working-directory: dev
run: cargo test
Empty file.
141 changes: 141 additions & 0 deletions core/src/docs/rfcs/5444_operator_from_uri.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
- Proposal Name: `operator_from_uri`
- Start Date: 2024-12-23
- RFC PR: [apache/opendal#5444](https://github.com/apache/opendal/pull/5444)
- Tracking Issue: [apache/opendal#5445](https://github.com/apache/opendal/issues/5445)

# Summary

This RFC proposes adding URI-based configuration support to OpenDAL, allowing users to create operators directly from URIs. The proposal introduces a new `from_uri` API in both the `Operator` and `Configurator` traits, along with an `OperatorRegistry` to manage operator factories.

# Motivation

Currently, creating an operator in OpenDAL requires explicit configuration through builder patterns. While this approach provides type safety and clear documentation, it can be verbose and inflexible for simple use cases. Many storage systems are naturally identified by URIs (e.g., `s3://bucket/path`, `fs:///path/to/dir`).

Adding URI-based configuration would:

- Simplify operator creation for common use cases
- Enable configuration via connection strings (common in many applications)
- Make OpenDAL more approachable for new users
- Allow dynamic operator creation based on runtime configuration

# Guide-level explanation

The new API allows creating operators directly from URIs:

```rust
// Create an operator using URI
let op = Operator::from_uri("s3://my-bucket/path", vec![
("endpoint".to_string(), "http://localhost:8080"to_string()),
])?;

// Users can pass options through the URI along with additional key-value pairs
// The extra options will override identical options specified in the URI
let op = Operator::from_uri("s3://my-bucket/path?region=us-east-1", vec![
("endpoint".to_string(), "http://localhost:8080"to_string()),
])?;

// Create a file system operator
let op = Operator::from_uri("fs:///tmp/test", vec![])?;
```

OpenDAL will, by default, register services enabled by features in a global `OperatorRegistry`. Users can also create custom operator registries to support their own schemes or additional options.

```
// Using with custom registry
let registry = OperatorRegistry::new();
registry.register("custom", my_factory);
let op = registry.parse("custom://endpoint", options)?;
```

# Reference-level explanation

The implementation consists of three main components:

1. The `OperatorFactory` and `OperatorRegistry`:

`OperatorFactory` is a function type that takes a URI and a map of options and returns an `Operator`. `OperatorRegistry` manages operator factories for different schemes.

```rust
type OperatorFactory = fn(http::Uri, HashMap<String, String>) -> Result<Operator>;

pub struct OperatorRegistry { ... }

impl OperatorRegistry {
fn register(&self, scheme: &str, factory: OperatorFactory) {
...
}

fn parse(&self, uri: &str, options: impl IntoIterator<Item = (String, String)>) -> Result<Operator> {
...
}
}
```

2. The `Configurator` trait extension:

`Configurator` will add a new API to create a configuration from a URI and options. OpenDAL will provide default implementations for common configurations. But services can override this method to support their own special needs.

For example, S3 might need to extract the `bucket` and `region` from the URI when possible.

```rust
impl Configurator for S3Config {
fn from_uri(uri: &str, options: impl IntoIterator<Item = (String, String)>) -> Result<Self> {
...
}
}
```

3. The `Operator` `from_uri` method:

The `Operator` trait will add a new `from_uri` method to create an operator from a URI and options. This method will use the global `OperatorRegistry` to find the appropriate factory for the scheme.

```rust
impl Operator {
pub fn from_uri(
uri: &str,
options: impl IntoIterator<Item = (String, String)>,
) -> Result<Self> {
...
}
}
```

We are intentionally using `&str` instead of `Scheme` here to simplify working with external components outside this crate. Additionally, we plan to remove `Scheme` from our public API soon to enable splitting OpenDAL into multiple crates.

# Drawbacks

- Increases API surface area
- Less type safety compared to builder patterns
- Potential for confusing error messages with invalid URIs
- Need to maintain backwards compatibility

# Rationale and alternatives

Alternatives considered:

1. Connection string format instead of URIs
2. Builder pattern with URI parsing
3. Macro-based configuration

URI-based configuration was chosen because:

- URIs are widely understood
- Natural fit for storage locations
- Extensible through custom schemes
- Common in similar tools

# Prior art

Similar patterns exist in:

- Database connection strings (PostgreSQL, MongoDB)
- [`object_store::parse_url`](https://docs.rs/object_store/latest/object_store/fn.parse_url.html)

# Unresolved questions

None

# Future possibilities

- Support for connection string format.
- Configuration presets like `r2` and `s3` with directory bucket enabled.
4 changes: 4 additions & 0 deletions core/src/docs/rfcs/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,3 +240,7 @@ pub mod rfc_4638_executor {}
/// Remove metakey
#[doc = include_str!("5314_remove_metakey.md")]
pub mod rfc_5314_remove_metakey {}

/// Operator from uri
#[doc = include_str!("5444_operator_from_uri.md")]
pub mod rfc_5444_operator_from_uri {}
47 changes: 0 additions & 47 deletions dev/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 1 addition & 4 deletions dev/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,10 @@ version = "0.0.1"
[dependencies]
anyhow = "1.0.95"
clap = { version = "4.5.23", features = ["derive"] }
enquote = "1.1.0"
env_logger = "0.11.6"
itertools = "0.13.0"
log = "0.4.22"
proc-macro2 = { version = "1.0.91", features = ["span-locations"] }
rinja = "0.3.5"
syn = { version = "2.0.91", features = ['parsing', 'full', 'derive', 'visit', 'extra-traits'] }
syn = { version = "2.0.91", features = ["visit", "full", "extra-traits"] }

[dev-dependencies]
pretty_assertions = "1.4.1"
15 changes: 5 additions & 10 deletions dev/src/generate/binding_python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
use crate::generate::parser::Services;
use anyhow::Result;
use rinja::Template;
use itertools::Itertools;
use std::fs;
use std::path::PathBuf;
use std::process::Command;
Expand Down Expand Up @@ -52,15 +51,11 @@ pub fn generate(project_root: PathBuf, services: &Services) -> Result<()> {

// move required options at beginning.
for srv in &mut v {
srv.1.config = srv
.1
.config
.clone()
.into_iter()
.enumerate()
.sorted_by_key(|(i, x)| (x.optional, *i))
.map(|(_, e)| e)
.collect();
let mut v = Vec::from_iter(srv.1.config.clone().into_iter().enumerate());

v.sort_by_key(|a| (a.1.optional, a.0));

srv.1.config = v.iter().map(|f| f.1.clone()).collect();
}

let tmpl = PythonTemplate { services: v };
Expand Down
Loading

0 comments on commit 8c37fb1

Please sign in to comment.