-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Andrew D.Gramigna
committed
Oct 27, 2023
1 parent
14ef730
commit ef1e286
Showing
1 changed file
with
184 additions
and
0 deletions.
There are no files selected for viewing
184 changes: 184 additions & 0 deletions
184
levergreen_dbt/models/staging/ashby/_ashby__sources.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,184 @@ | ||
version: 2 | ||
|
||
sources: | ||
- name: ashby | ||
description: > | ||
Data from careers pages of companies who use ashby. Called from external Python script and cleaned slightly. | ||
schema: public | ||
loader: requests | ||
loaded_at_field: to_timestamp(updated_at) | ||
freshness: | ||
warn_after: {count: 1, period: day} | ||
error_after: {count: 2, period: day} | ||
|
||
tables: | ||
- name: ashby_job_departments | ||
description: > | ||
Contains job department information from ashby | ||
tests: | ||
- dbt_expectations.expect_table_columns_to_match_set: | ||
column_list: ["id", | ||
"levergreen_id", "created_at" , "updated_at", "company_name", "ashby_job_board_source", | ||
"run_hash", "raw_json_file_location", "existing_json_used", "department_id", "department_name", | ||
"parent_department_id" | ||
] | ||
columns: | ||
- name: id | ||
description: serial id created by postgres upon insertion | ||
tests: | ||
- unique | ||
- not_null | ||
- name: levergreen_id | ||
description: Id from Levergreen scraper. Unique relative to the HTML file used | ||
tests: | ||
- not_null | ||
- name: created_at | ||
description: Timestamp of when the ashby site was scraped, in UNIX time. If we used existing HTML, this field is not updated. | ||
tests: | ||
- not_null | ||
- name: updated_at | ||
description: Timestamp of when the ashby site was scraped, in UNIX time. | ||
tests: | ||
- not_null | ||
- name: ashby_job_board_source | ||
description: Ashby careers page source. | ||
tests: | ||
- not_null | ||
- name: company_name | ||
description: Company name from ashby. Taken by grabbing the end of the source. | ||
tests: | ||
- not_null | ||
- name: run_hash | ||
description: Hashed value using hash ids to identify the id of a particular scraped | ||
- name: existing_json_used | ||
description: Boolean whether or not this was a fresh request | ||
- name: raw_json_file_location | ||
description: S3 bucket where the scraped data is stored | ||
- name: department_id | ||
description: Ashby specific id for the department | ||
- name: parent_department_id | ||
description: Parent id of the department. | ||
- name: department_name | ||
description: Name of the ashby Department | ||
|
||
|
||
- name: ashby_job_locations | ||
description: > | ||
Contains job location information from ashby | ||
tests: | ||
- dbt_expectations.expect_table_columns_to_match_set: | ||
column_list: ["id", | ||
"levergreen_id", "created_at" , "updated_at", "company_name", "ashby_job_board_source", | ||
"run_hash", "raw_json_file_location", "existing_json_used", "opening_id", "secondary_location_id", | ||
"secondary_location_name" | ||
] | ||
columns: | ||
- name: id | ||
description: serial id created by postgres upon insertion | ||
tests: | ||
- unique | ||
- not_null | ||
- name: levergreen_id | ||
description: Id from Levergreen scraper. Unique relative to the HTML file used | ||
tests: | ||
- not_null | ||
- name: created_at | ||
description: Timestamp of when the ashby site was scraped, in UNIX time. If we used existing HTML, this field is not updated. | ||
tests: | ||
- not_null | ||
- name: updated_at | ||
description: Timestamp of when the ashby site was scraped, in UNIX time. | ||
tests: | ||
- not_null | ||
- name: ashby_job_board_source | ||
description: Ashby careers page source. | ||
tests: | ||
- not_null | ||
- name: company_name | ||
description: Company name from ashby. Taken by grabbing the end of the source. | ||
tests: | ||
- not_null | ||
- name: run_hash | ||
description: Hashed value using hash ids to identify the id of a particular scraped | ||
- name: existing_json_used | ||
description: Boolean whether or not this was a fresh request | ||
- name: raw_json_file_location | ||
description: S3 bucket where the scraped data is stored | ||
- name: secondary_location_id | ||
description: Secondary Location ids from ashby | ||
- name: secondary_location_name | ||
description: Name of the secondary location | ||
- name: opening_id | ||
description: foreign key to tie the secondary location to | ||
|
||
- name: ashby_jobs_outline | ||
description: > | ||
Contains job outline information from ashby | ||
tests: | ||
- dbt_expectations.expect_table_columns_to_match_set: | ||
column_list: ["id", | ||
"levergreen_id", "created_at" , "updated_at", "company_name", "ashby_job_board_source", | ||
"run_hash", "raw_json_file_location", "existing_json_used", "opening_id", "opening_name", | ||
"department_id", "location_id", "location_name", "employment_type", "compensation_tier", | ||
"opening_link" | ||
] | ||
columns: | ||
- name: id | ||
description: serial id created by postgres upon insertion | ||
tests: | ||
- unique | ||
- not_null | ||
- name: levergreen_id | ||
description: Id from Levergreen scraper. Unique relative to the HTML file used | ||
tests: | ||
- not_null | ||
- name: created_at | ||
description: Timestamp of when the ashby site was scraped, in UNIX time. If we used existing HTML, this field is not updated. | ||
tests: | ||
- not_null | ||
- name: updated_at | ||
description: Timestamp of when the ashby site was scraped, in UNIX time. | ||
tests: | ||
- not_null | ||
- name: ashby_job_board_source | ||
description: Ashby careers page source. | ||
tests: | ||
- not_null | ||
- name: company_name | ||
description: Company name from ashby. Taken by grabbing the end of the source. | ||
tests: | ||
- not_null | ||
- name: run_hash | ||
description: Hashed value using hash ids to identify the id of a particular scraped | ||
- name: existing_json_used | ||
description: Boolean whether or not this was a fresh request | ||
- name: raw_json_file_location | ||
description: S3 bucket where the scraped data is stored | ||
- name: opening_id | ||
description: Job opening id from ashby | ||
tests: | ||
- not_null | ||
- name: opening_name | ||
description: Name of the job opening | ||
tests: | ||
- not_null | ||
- name: department_id | ||
description: department_id of the job opening. Most specific opening is included (furthest level down). | ||
- name: location_id | ||
description: Primary location id | ||
tests: | ||
- not_null | ||
- name: location_name | ||
description: Name of the primary location | ||
tests: | ||
- not_null | ||
- name: employment_type | ||
description: Type of employment | ||
tests: | ||
- accepted_values: | ||
values: ["Intern", "Contract", "PartTime", "FullTime", "Temporary"] | ||
- name: compensation_tier | ||
description: Salary Range as text | ||
- name: opening_link | ||
description: Link of the job posting | ||
|