-
Notifications
You must be signed in to change notification settings - Fork 2
/
duckdb-query.yaml
39 lines (36 loc) · 1.21 KB
/
duckdb-query.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
id: duckdb-query
namespace: company.team
tasks:
- id: download_csv
type: io.kestra.plugin.core.http.Download
description: salaries of data professionals from 2020 to 2023 (source ai-jobs.net)
uri: https://huggingface.co/datasets/kestra/datasets/raw/main/csv/salaries.csv
- id: avg_salary_by_job_title
type: io.kestra.plugin.jdbc.duckdb.Query
inputFiles:
data.csv: "{{ outputs.download_csv.uri }}"
sql: |
SELECT
job_title,
ROUND(AVG(salary),2) AS avg_salary
FROM read_csv_auto('{{ workingDir }}/data.csv', header=True)
GROUP BY job_title
HAVING COUNT(job_title) > 10
ORDER BY avg_salary DESC;
fetchType: STORE
- id: result
type: io.kestra.plugin.serdes.csv.IonToCsv
from: "{{ outputs.avg_salary_by_job_title.uri }}"
extend:
title: Analyze salaries of data professionals using a SQL query in DuckDB,
extract the results to a CSV file
description: Download a CSV file and query data with DuckDB. Then, extract the
results to a CSV file.
tags:
- DuckDB
- SQL
- Outputs
ee: false
demo: true
meta_description: This flow will download a CSV file and query data with DuckDB.
Then, extract the results to a CSV file.