diff --git a/dockerfile b/dockerfile new file mode 100644 index 0000000..198d0f1 --- /dev/null +++ b/dockerfile @@ -0,0 +1,22 @@ +FROM rust:latest AS builder + +WORKDIR /usr/src/app + +COPY Cargo.toml Cargo.lock ./ + +RUN mkdir src +RUN echo "fn main() {}" > src/main.rs +RUN cargo build --release +RUN rm -f target/release/deps/app* + +COPY . . + +RUN cargo build --release + +FROM debian:buster-slim + +RUN apt-get update && apt-get install -y ca-certificates && rm -rf /var/lib/apt/lists/* + +COPY --from=builder /usr/src/app/target/release/spooderman /usr/local/bin/spooderman + +CMD ["/usr/local/bin/spooderman", "scrape_n_batch_insert"] diff --git a/sql/Classes/up.sql b/sql/Classes/up.sql index 4937161..f75cda9 100644 --- a/sql/Classes/up.sql +++ b/sql/Classes/up.sql @@ -13,5 +13,5 @@ CREATE TABLE Classes ( "consent" VARCHAR(255) NOT NULL, "mode" VARCHAR(255) NOT NULL, "class_notes" TEXT, - FOREIGN KEY ("course_id") REFERENCES Courses("subject_area_course_code") ON DELETE CASCADE + FOREIGN KEY ("course_id") REFERENCES Courses("course_code") ON DELETE CASCADE ); \ No newline at end of file diff --git a/sql/Courses/up.sql b/sql/Courses/up.sql index 2710b16..8ddb9e5 100644 --- a/sql/Courses/up.sql +++ b/sql/Courses/up.sql @@ -1,6 +1,6 @@ CREATE TABLE Courses ( - "subject_area_course_code" VARCHAR(8) PRIMARY KEY, --id - "subject_area_course_name" VARCHAR(255) NOT NULL, + "course_code" VARCHAR(8) PRIMARY KEY, --id + "course_name" VARCHAR(255) NOT NULL, "uoc" INT NOT NULL, "faculty" VARCHAR(255), "school" VARCHAR(255), diff --git a/sql/Times/up.sql b/sql/Times/up.sql index 426dc5e..e6c385c 100644 --- a/sql/Times/up.sql +++ b/sql/Times/up.sql @@ -8,5 +8,5 @@ CREATE TABLE Times ( "time" VARCHAR(100) NOT NULL, "weeks" VARCHAR(100) NOT NULL, FOREIGN KEY ("class_id") REFERENCES Classes("class_id") ON DELETE CASCADE, - FOREIGN KEY ("course_id") REFERENCES Courses("subject_area_course_code") ON DELETE CASCADE + FOREIGN KEY ("course_id") REFERENCES Courses("course_code") ON DELETE CASCADE ); diff --git a/src/class_scraper.rs b/src/class_scraper.rs index 5b5bcf0..f589d9f 100644 --- a/src/class_scraper.rs +++ b/src/class_scraper.rs @@ -8,8 +8,8 @@ use crate::{ #[derive(Debug)] pub struct Course { - pub subject_area_course_code: String, - pub subject_area_course_name: String, + pub course_code: String, + pub course_name: String, pub uoc: i32, pub faculty: Option, pub school: Option, @@ -49,15 +49,15 @@ pub struct Time { #[derive(Debug)] pub struct ClassScraper { - pub subject_area_course_code: String, - pub subject_area_course_name: String, + pub course_code: String, + pub course_name: String, pub uoc: i32, pub url: String, } impl ClassScraper { pub async fn scrape(&mut self) -> Result> { - println!("Currently working on {:?}", self.subject_area_course_code); + println!("Currently working on {:?}", self.course_code); let html = fetch_url(&self.url) .await .expect(&format!("Something was wrong with the URL: {}", self.url)); @@ -84,8 +84,8 @@ impl ClassScraper { .map(|course_name_words| String::from(course_name_words)) .collect(); let mut course_info = Course { - subject_area_course_code: self.subject_area_course_code.clone(), - subject_area_course_name: course_name_code_info.join(" "), + course_code: self.course_code.clone(), + course_name: course_name_code_info.join(" "), uoc: self.uoc, faculty: None, school: None, @@ -151,7 +151,7 @@ impl ClassScraper { course_info.classes = class_activity_information .into_par_iter() - .map(|class_data| parse_class_info(class_data, self.subject_area_course_code.clone())) + .map(|class_data| parse_class_info(class_data, self.course_code.clone())) .collect(); let _ = course_info .classes diff --git a/src/hasuragres_b_insert.rs b/src/hasuragres_b_insert.rs index ce91d69..7136e3c 100644 --- a/src/hasuragres_b_insert.rs +++ b/src/hasuragres_b_insert.rs @@ -87,8 +87,8 @@ pub async fn send_batch_data(hdata: &impl HasuragresData) -> Result<(), Box) -> Vec>, } @@ -51,9 +51,9 @@ impl SchoolAreaScraper { let document = scraper::Html::parse_document(&html); for row_node in document.select(&row_selector) { // Extract data from each row - let subject_area_course_code = + let course_code = extract_text(row_node.select(&code_selector).next().unwrap()); - let subject_area_course_name = + let course_name = extract_text(row_node.select(&name_selector).next().unwrap()); let url = get_html_link_to_page( row_node @@ -63,8 +63,8 @@ impl SchoolAreaScraper { ); let school = extract_text(row_node.select(&school_selector).next().unwrap()); let page = SchoolAreaPage { - subject_area_course_code, - subject_area_course_name, + course_code, + course_name, school, subject_area_scraper: Arc::new(Mutex::new(SubjectAreaScraper::new(url))), }; diff --git a/src/subject_area_scraper.rs b/src/subject_area_scraper.rs index b4baf1f..27bd9f7 100644 --- a/src/subject_area_scraper.rs +++ b/src/subject_area_scraper.rs @@ -32,9 +32,9 @@ impl SubjectAreaScraper { let document = scraper::Html::parse_document(&html); for row_node in document.select(&row_selector) { // Extract data from each row - let subject_area_course_code = + let course_code = extract_text(row_node.select(&code_selector).next().unwrap()); - let subject_area_course_name = + let course_name = extract_text(row_node.select(&name_selector).next().unwrap()); let url = get_html_link_to_page( row_node @@ -46,8 +46,8 @@ impl SubjectAreaScraper { .parse() .expect("Could not parse UOC!"); self.class_scrapers.push(Arc::new(Mutex::new(ClassScraper { - subject_area_course_code, - subject_area_course_name, + course_code, + course_name, uoc, url, })));