Skip to content

Commit

Permalink
removed all the prefix
Browse files Browse the repository at this point in the history
  • Loading branch information
Rayahhhmed committed Sep 29, 2024
1 parent a60c4f8 commit 451220a
Show file tree
Hide file tree
Showing 10 changed files with 52 additions and 31 deletions.
22 changes: 22 additions & 0 deletions dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
FROM rust:latest AS builder

WORKDIR /usr/src/app

COPY Cargo.toml Cargo.lock ./

RUN mkdir src
RUN echo "fn main() {}" > src/main.rs
RUN cargo build --release
RUN rm -f target/release/deps/app*

COPY . .

RUN cargo build --release

FROM debian:buster-slim

RUN apt-get update && apt-get install -y ca-certificates && rm -rf /var/lib/apt/lists/*

COPY --from=builder /usr/src/app/target/release/spooderman /usr/local/bin/spooderman

CMD ["/usr/local/bin/spooderman", "scrape_n_batch_insert"]
2 changes: 1 addition & 1 deletion sql/Classes/up.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ CREATE TABLE Classes (
"consent" VARCHAR(255) NOT NULL,
"mode" VARCHAR(255) NOT NULL,
"class_notes" TEXT,
FOREIGN KEY ("course_id") REFERENCES Courses("subject_area_course_code") ON DELETE CASCADE
FOREIGN KEY ("course_id") REFERENCES Courses("course_code") ON DELETE CASCADE
);
4 changes: 2 additions & 2 deletions sql/Courses/up.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
CREATE TABLE Courses (
"subject_area_course_code" VARCHAR(8) PRIMARY KEY, --id
"subject_area_course_name" VARCHAR(255) NOT NULL,
"course_code" VARCHAR(8) PRIMARY KEY, --id
"course_name" VARCHAR(255) NOT NULL,
"uoc" INT NOT NULL,
"faculty" VARCHAR(255),
"school" VARCHAR(255),
Expand Down
2 changes: 1 addition & 1 deletion sql/Times/up.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ CREATE TABLE Times (
"time" VARCHAR(100) NOT NULL,
"weeks" VARCHAR(100) NOT NULL,
FOREIGN KEY ("class_id") REFERENCES Classes("class_id") ON DELETE CASCADE,
FOREIGN KEY ("course_id") REFERENCES Courses("subject_area_course_code") ON DELETE CASCADE
FOREIGN KEY ("course_id") REFERENCES Courses("course_code") ON DELETE CASCADE
);
16 changes: 8 additions & 8 deletions src/class_scraper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ use crate::{

#[derive(Debug)]
pub struct Course {
pub subject_area_course_code: String,
pub subject_area_course_name: String,
pub course_code: String,
pub course_name: String,
pub uoc: i32,
pub faculty: Option<String>,
pub school: Option<String>,
Expand Down Expand Up @@ -49,15 +49,15 @@ pub struct Time {

#[derive(Debug)]
pub struct ClassScraper {
pub subject_area_course_code: String,
pub subject_area_course_name: String,
pub course_code: String,
pub course_name: String,
pub uoc: i32,
pub url: String,
}

impl ClassScraper {
pub async fn scrape(&mut self) -> Result<Course, Box<ScrapeError>> {
println!("Currently working on {:?}", self.subject_area_course_code);
println!("Currently working on {:?}", self.course_code);
let html = fetch_url(&self.url)
.await
.expect(&format!("Something was wrong with the URL: {}", self.url));
Expand All @@ -84,8 +84,8 @@ impl ClassScraper {
.map(|course_name_words| String::from(course_name_words))
.collect();
let mut course_info = Course {
subject_area_course_code: self.subject_area_course_code.clone(),
subject_area_course_name: course_name_code_info.join(" "),
course_code: self.course_code.clone(),
course_name: course_name_code_info.join(" "),
uoc: self.uoc,
faculty: None,
school: None,
Expand Down Expand Up @@ -151,7 +151,7 @@ impl ClassScraper {

course_info.classes = class_activity_information
.into_par_iter()
.map(|class_data| parse_class_info(class_data, self.subject_area_course_code.clone()))
.map(|class_data| parse_class_info(class_data, self.course_code.clone()))
.collect();
let _ = course_info
.classes
Expand Down
4 changes: 2 additions & 2 deletions src/hasuragres_b_insert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ pub async fn send_batch_data(hdata: &impl HasuragresData) -> Result<(), Box<dyn
metadata: Metadata {
table_name: "courses".to_string(),
columns: vec![
"subject_area_course_code".to_string(),
"subject_area_course_name".to_string(),
"course_code".to_string(),
"course_name".to_string(),
"uoc".to_string(),
"faculty".to_string(),
"school".to_string(),
Expand Down
1 change: 0 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ pub use scraper::fetch_url;
pub use scraper::Scraper;
pub use text_manipulators::mutate_string_to_include_curr_year;
pub use url_invalid_error::UrlInvalidError;
// pub use subject_area_scraper::SubjectAreaScraper;
pub use class_scraper::{Class, ClassScraper, Course, Time};
pub use school_area_scraper::SchoolAreaScraper;
pub use subject_area_scraper::SubjectAreaScraper;
12 changes: 6 additions & 6 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ async fn run_school_courses_page_scraper_job(

// Iterate over the pages and create tasks for each scrape operation
for school_area_scrapers in &mut all_school_offered_courses_scraper.pages {
let subject_area_scraper = Arc::clone(&school_area_scrapers.subject_area_scraper);
let scraper = Arc::clone(&school_area_scrapers.scraper);
let task = tokio::spawn(async move {
let mut scraper = subject_area_scraper.lock().await;
let mut scraper = scraper.lock().await;
let _ = scraper.scrape().await;
});
tasks.push(task);
Expand All @@ -65,11 +65,11 @@ async fn run_course_classes_page_scraper_job(
let rate_limit_delay = Duration::from_millis(1); // delay between tasks

for school_area_scrapers in &mut all_school_offered_courses_scraper.pages {
let subject_area_scraper = Arc::clone(&school_area_scrapers.subject_area_scraper);
let scraper = Arc::clone(&school_area_scrapers.scraper);

// Lock the mutex to access the underlying data
let class_scrapers = {
let scraper = subject_area_scraper.lock().await;
let scraper = scraper.lock().await;
scraper.class_scrapers.clone()
};

Expand Down Expand Up @@ -113,8 +113,8 @@ fn convert_courses_to_json(course_vec: &mut Vec<Course>) -> Vec<serde_json::Valu
let mut json_courses = Vec::new();
for course in course_vec.iter() {
json_courses.push(json!({
"subject_area_course_code": course.subject_area_course_code,
"subject_area_course_name": course.subject_area_course_name,
"course_code": course.course_code,
"course_name": course.course_name,
"uoc": course.uoc,
"faculty": course.faculty,
"school": course.school,
Expand Down
12 changes: 6 additions & 6 deletions src/school_area_scraper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ use tokio::sync::Mutex;

#[derive(Debug)]
pub struct SchoolAreaPage {
pub subject_area_course_code: String,
pub subject_area_course_name: String,
pub course_code: String,
pub course_name: String,
pub school: String,
pub subject_area_scraper: Arc<Mutex<SubjectAreaScraper>>,
}
Expand Down Expand Up @@ -51,9 +51,9 @@ impl SchoolAreaScraper {
let document = scraper::Html::parse_document(&html);
for row_node in document.select(&row_selector) {
// Extract data from each row
let subject_area_course_code =
let course_code =
extract_text(row_node.select(&code_selector).next().unwrap());
let subject_area_course_name =
let course_name =
extract_text(row_node.select(&name_selector).next().unwrap());
let url = get_html_link_to_page(
row_node
Expand All @@ -63,8 +63,8 @@ impl SchoolAreaScraper {
);
let school = extract_text(row_node.select(&school_selector).next().unwrap());
let page = SchoolAreaPage {
subject_area_course_code,
subject_area_course_name,
course_code,
course_name,
school,
subject_area_scraper: Arc::new(Mutex::new(SubjectAreaScraper::new(url))),
};
Expand Down
8 changes: 4 additions & 4 deletions src/subject_area_scraper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ impl SubjectAreaScraper {
let document = scraper::Html::parse_document(&html);
for row_node in document.select(&row_selector) {
// Extract data from each row
let subject_area_course_code =
let course_code =
extract_text(row_node.select(&code_selector).next().unwrap());
let subject_area_course_name =
let course_name =
extract_text(row_node.select(&name_selector).next().unwrap());
let url = get_html_link_to_page(
row_node
Expand All @@ -46,8 +46,8 @@ impl SubjectAreaScraper {
.parse()
.expect("Could not parse UOC!");
self.class_scrapers.push(Arc::new(Mutex::new(ClassScraper {
subject_area_course_code,
subject_area_course_name,
course_code,
course_name,
uoc,
url,
})));
Expand Down

0 comments on commit 451220a

Please sign in to comment.