Skip to content

Commit

Permalink
Migrate documentation for all string functions from scalar_functions…
Browse files Browse the repository at this point in the history
….md to code (apache#12775)

* Added documentation for string and unicode functions.

* Fixed issues with aliases.

* Cargo fmt.

* Minor doc fixes.

* Update docs for var_pop/samp

---------

Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
Omega359 and alamb authored Oct 7, 2024
1 parent ef227f4 commit 5360d20
Show file tree
Hide file tree
Showing 37 changed files with 2,076 additions and 827 deletions.
9 changes: 5 additions & 4 deletions datafusion/core/src/bin/print_functions_docs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,13 +130,14 @@ fn print_docs(
.find(|f| f.get_name() == name || f.get_aliases().contains(&name))
.unwrap();

let name = f.get_name();
let aliases = f.get_aliases();
let documentation = f.get_documentation();

// if this name is an alias we need to display what it's an alias of
if aliases.contains(&name) {
let _ = write!(docs, "_Alias of [{name}](#{name})._");
let fname = f.get_name();
let _ = writeln!(docs, r#"### `{name}`"#);
let _ = writeln!(docs, "_Alias of [{fname}](#{fname})._");
continue;
}

Expand Down Expand Up @@ -183,10 +184,10 @@ fn print_docs(

// next, aliases
if !f.get_aliases().is_empty() {
let _ = write!(docs, "#### Aliases");
let _ = writeln!(docs, "#### Aliases");

for alias in f.get_aliases() {
let _ = writeln!(docs, "- {alias}");
let _ = writeln!(docs, "- {}", alias.replace("_", r#"\_"#));
}
}

Expand Down
51 changes: 33 additions & 18 deletions datafusion/functions/src/string/ascii.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,24 +26,6 @@ use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use std::any::Any;
use std::sync::{Arc, OnceLock};

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_ascii_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_STRING)
.with_description("Returns the ASCII value of the first character in a string.")
.with_syntax_example("ascii(str)")
.with_argument(
"str",
"String expression to operate on. Can be a constant, column, or function that evaluates to or can be coerced to a Utf8, LargeUtf8 or a Utf8View.",
)
.with_related_udf("chr")
.build()
.unwrap()
})
}

#[derive(Debug)]
pub struct AsciiFunc {
signature: Signature,
Expand Down Expand Up @@ -96,6 +78,39 @@ impl ScalarUDFImpl for AsciiFunc {
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_ascii_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_STRING)
.with_description(
"Returns the Unicode character code of the first character in a string.",
)
.with_syntax_example("ascii(str)")
.with_sql_example(
r#"```sql
> select ascii('abc');
+--------------------+
| ascii(Utf8("abc")) |
+--------------------+
| 97 |
+--------------------+
> select ascii('🚀');
+-------------------+
| ascii(Utf8("🚀")) |
+-------------------+
| 128640 |
+-------------------+
```"#,
)
.with_standard_argument("str", "String")
.with_related_udf("chr")
.build()
.unwrap()
})
}

fn calculate_ascii<'a, V>(array: V) -> Result<ArrayRef, ArrowError>
where
V: ArrayAccessor<Item = &'a str>,
Expand Down
40 changes: 35 additions & 5 deletions datafusion/functions/src/string/bit_length.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,17 @@
// specific language governing permissions and limitations
// under the License.

use std::any::Any;

use arrow::compute::kernels::length::bit_length;
use arrow::datatypes::DataType;
use std::any::Any;
use std::sync::OnceLock;

use crate::utils::utf8_to_int_type;
use datafusion_common::{exec_err, Result, ScalarValue};
use datafusion_expr::{ColumnarValue, Volatility};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};

use crate::utils::utf8_to_int_type;

#[derive(Debug)]
pub struct BitLengthFunc {
signature: Signature,
Expand Down Expand Up @@ -88,4 +88,34 @@ impl ScalarUDFImpl for BitLengthFunc {
},
}
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_bit_length_doc())
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_bit_length_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_STRING)
.with_description("Returns the bit length of a string.")
.with_syntax_example("bit_length(str)")
.with_sql_example(
r#"```sql
> select bit_length('datafusion');
+--------------------------------+
| bit_length(Utf8("datafusion")) |
+--------------------------------+
| 80 |
+--------------------------------+
```"#,
)
.with_standard_argument("str", "String")
.with_related_udf("length")
.with_related_udf("octet_length")
.build()
.unwrap()
})
}
41 changes: 35 additions & 6 deletions datafusion/functions/src/string/btrim.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,18 @@
// specific language governing permissions and limitations
// under the License.

use crate::string::common::*;
use crate::utils::{make_scalar_function, utf8_to_str_type};
use arrow::array::{ArrayRef, OffsetSizeTrait};
use arrow::datatypes::DataType;
use std::any::Any;

use datafusion_common::{exec_err, Result};
use datafusion_expr::function::Hint;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::*;
use datafusion_expr::{ColumnarValue, Volatility};
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};

use crate::string::common::*;
use crate::utils::{make_scalar_function, utf8_to_str_type};
use std::any::Any;
use std::sync::OnceLock;

/// Returns the longest string with leading and trailing characters removed. If the characters are not specified, whitespace is removed.
/// btrim('xyxtrimyyx', 'xyz') = 'trim'
Expand Down Expand Up @@ -109,6 +109,35 @@ impl ScalarUDFImpl for BTrimFunc {
fn aliases(&self) -> &[String] {
&self.aliases
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_btrim_doc())
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_btrim_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_STRING)
.with_description("Trims the specified trim string from the start and end of a string. If no trim string is provided, all whitespace is removed from the start and end of the input string.")
.with_syntax_example("btrim(str[, trim_str])")
.with_sql_example(r#"```sql
> select btrim('__datafusion____', '_');
+-------------------------------------------+
| btrim(Utf8("__datafusion____"),Utf8("_")) |
+-------------------------------------------+
| datafusion |
+-------------------------------------------+
```"#)
.with_standard_argument("str", "String")
.with_argument("trim_str", "String expression to operate on. Can be a constant, column, or function, and any combination of operators. _Default is whitespace characters._")
.with_related_udf("ltrim")
.with_related_udf("rtrim")
.build()
.unwrap()
})
}

#[cfg(test)]
Expand Down
39 changes: 35 additions & 4 deletions datafusion/functions/src/string/chr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,21 @@
// under the License.

use std::any::Any;
use std::sync::Arc;
use std::sync::{Arc, OnceLock};

use arrow::array::ArrayRef;
use arrow::array::StringArray;
use arrow::datatypes::DataType;
use arrow::datatypes::DataType::Int64;
use arrow::datatypes::DataType::Utf8;

use crate::utils::make_scalar_function;
use datafusion_common::cast::as_int64_array;
use datafusion_common::{exec_err, Result};
use datafusion_expr::{ColumnarValue, Volatility};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};

use crate::utils::make_scalar_function;

/// Returns the character with the given code. chr(0) is disallowed because text data types cannot store that character.
/// chr(65) = 'A'
pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
Expand Down Expand Up @@ -99,4 +99,35 @@ impl ScalarUDFImpl for ChrFunc {
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
make_scalar_function(chr, vec![])(args)
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_chr_doc())
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_chr_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_STRING)
.with_description(
"Returns the character with the specified ASCII or Unicode code value.",
)
.with_syntax_example("chr(expression)")
.with_sql_example(
r#"```sql
> select chr(128640);
+--------------------+
| chr(Int64(128640)) |
+--------------------+
| 🚀 |
+--------------------+
```"#,
)
.with_standard_argument("expression", "String")
.with_related_udf("ascii")
.build()
.unwrap()
})
}
40 changes: 35 additions & 5 deletions datafusion/functions/src/string/concat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,18 @@
use arrow::array::{as_largestring_array, Array};
use arrow::datatypes::DataType;
use std::any::Any;
use std::sync::Arc;
use std::sync::{Arc, OnceLock};

use crate::string::common::*;
use crate::string::concat;
use datafusion_common::cast::{as_string_array, as_string_view_array};
use datafusion_common::{internal_err, plan_err, Result, ScalarValue};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
use datafusion_expr::{lit, ColumnarValue, Expr, Volatility};
use datafusion_expr::{lit, ColumnarValue, Documentation, Expr, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};

use crate::string::common::*;
use crate::string::concat;

#[derive(Debug)]
pub struct ConcatFunc {
signature: Signature,
Expand Down Expand Up @@ -244,6 +244,36 @@ impl ScalarUDFImpl for ConcatFunc {
) -> Result<ExprSimplifyResult> {
simplify_concat(args)
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_concat_doc())
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_concat_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_STRING)
.with_description("Concatenates multiple strings together.")
.with_syntax_example("concat(str[, ..., str_n])")
.with_sql_example(
r#"```sql
> select concat('data', 'f', 'us', 'ion');
+-------------------------------------------------------+
| concat(Utf8("data"),Utf8("f"),Utf8("us"),Utf8("ion")) |
+-------------------------------------------------------+
| datafusion |
+-------------------------------------------------------+
```"#,
)
.with_standard_argument("str", "String")
.with_argument("str_n", "Subsequent string expressions to concatenate.")
.with_related_udf("concat_ws")
.build()
.unwrap()
})
}

pub fn simplify_concat(args: Vec<Expr>) -> Result<ExprSimplifyResult> {
Expand Down
Loading

0 comments on commit 5360d20

Please sign in to comment.