Skip to content

Commit

Permalink
add source config for ashby dbt
Browse files Browse the repository at this point in the history
  • Loading branch information
Andrew D.Gramigna committed Oct 27, 2023
1 parent 14ef730 commit ef1e286
Showing 1 changed file with 184 additions and 0 deletions.
184 changes: 184 additions & 0 deletions levergreen_dbt/models/staging/ashby/_ashby__sources.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
version: 2

sources:
- name: ashby
description: >
Data from careers pages of companies who use ashby. Called from external Python script and cleaned slightly.
schema: public
loader: requests
loaded_at_field: to_timestamp(updated_at)
freshness:
warn_after: {count: 1, period: day}
error_after: {count: 2, period: day}

tables:
- name: ashby_job_departments
description: >
Contains job department information from ashby
tests:
- dbt_expectations.expect_table_columns_to_match_set:
column_list: ["id",
"levergreen_id", "created_at" , "updated_at", "company_name", "ashby_job_board_source",
"run_hash", "raw_json_file_location", "existing_json_used", "department_id", "department_name",
"parent_department_id"
]
columns:
- name: id
description: serial id created by postgres upon insertion
tests:
- unique
- not_null
- name: levergreen_id
description: Id from Levergreen scraper. Unique relative to the HTML file used
tests:
- not_null
- name: created_at
description: Timestamp of when the ashby site was scraped, in UNIX time. If we used existing HTML, this field is not updated.
tests:
- not_null
- name: updated_at
description: Timestamp of when the ashby site was scraped, in UNIX time.
tests:
- not_null
- name: ashby_job_board_source
description: Ashby careers page source.
tests:
- not_null
- name: company_name
description: Company name from ashby. Taken by grabbing the end of the source.
tests:
- not_null
- name: run_hash
description: Hashed value using hash ids to identify the id of a particular scraped
- name: existing_json_used
description: Boolean whether or not this was a fresh request
- name: raw_json_file_location
description: S3 bucket where the scraped data is stored
- name: department_id
description: Ashby specific id for the department
- name: parent_department_id
description: Parent id of the department.
- name: department_name
description: Name of the ashby Department


- name: ashby_job_locations
description: >
Contains job location information from ashby
tests:
- dbt_expectations.expect_table_columns_to_match_set:
column_list: ["id",
"levergreen_id", "created_at" , "updated_at", "company_name", "ashby_job_board_source",
"run_hash", "raw_json_file_location", "existing_json_used", "opening_id", "secondary_location_id",
"secondary_location_name"
]
columns:
- name: id
description: serial id created by postgres upon insertion
tests:
- unique
- not_null
- name: levergreen_id
description: Id from Levergreen scraper. Unique relative to the HTML file used
tests:
- not_null
- name: created_at
description: Timestamp of when the ashby site was scraped, in UNIX time. If we used existing HTML, this field is not updated.
tests:
- not_null
- name: updated_at
description: Timestamp of when the ashby site was scraped, in UNIX time.
tests:
- not_null
- name: ashby_job_board_source
description: Ashby careers page source.
tests:
- not_null
- name: company_name
description: Company name from ashby. Taken by grabbing the end of the source.
tests:
- not_null
- name: run_hash
description: Hashed value using hash ids to identify the id of a particular scraped
- name: existing_json_used
description: Boolean whether or not this was a fresh request
- name: raw_json_file_location
description: S3 bucket where the scraped data is stored
- name: secondary_location_id
description: Secondary Location ids from ashby
- name: secondary_location_name
description: Name of the secondary location
- name: opening_id
description: foreign key to tie the secondary location to

- name: ashby_jobs_outline
description: >
Contains job outline information from ashby
tests:
- dbt_expectations.expect_table_columns_to_match_set:
column_list: ["id",
"levergreen_id", "created_at" , "updated_at", "company_name", "ashby_job_board_source",
"run_hash", "raw_json_file_location", "existing_json_used", "opening_id", "opening_name",
"department_id", "location_id", "location_name", "employment_type", "compensation_tier",
"opening_link"
]
columns:
- name: id
description: serial id created by postgres upon insertion
tests:
- unique
- not_null
- name: levergreen_id
description: Id from Levergreen scraper. Unique relative to the HTML file used
tests:
- not_null
- name: created_at
description: Timestamp of when the ashby site was scraped, in UNIX time. If we used existing HTML, this field is not updated.
tests:
- not_null
- name: updated_at
description: Timestamp of when the ashby site was scraped, in UNIX time.
tests:
- not_null
- name: ashby_job_board_source
description: Ashby careers page source.
tests:
- not_null
- name: company_name
description: Company name from ashby. Taken by grabbing the end of the source.
tests:
- not_null
- name: run_hash
description: Hashed value using hash ids to identify the id of a particular scraped
- name: existing_json_used
description: Boolean whether or not this was a fresh request
- name: raw_json_file_location
description: S3 bucket where the scraped data is stored
- name: opening_id
description: Job opening id from ashby
tests:
- not_null
- name: opening_name
description: Name of the job opening
tests:
- not_null
- name: department_id
description: department_id of the job opening. Most specific opening is included (furthest level down).
- name: location_id
description: Primary location id
tests:
- not_null
- name: location_name
description: Name of the primary location
tests:
- not_null
- name: employment_type
description: Type of employment
tests:
- accepted_values:
values: ["Intern", "Contract", "PartTime", "FullTime", "Temporary"]
- name: compensation_tier
description: Salary Range as text
- name: opening_link
description: Link of the job posting

0 comments on commit ef1e286

Please sign in to comment.