Skip to content

Commit

Permalink
prelim fixes for 2025
Browse files Browse the repository at this point in the history
  • Loading branch information
Rayahhhmed committed Nov 12, 2024
1 parent 13e791a commit ccb864a
Show file tree
Hide file tree
Showing 10 changed files with 37,456 additions and 982,165 deletions.
631,566 changes: 22,975 additions & 608,591 deletions classes.json

Large diffs are not rendered by default.

97,732 changes: 13,396 additions & 84,336 deletions courses.json

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions sql/Classes/up.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
CREATE TYPE status_enum AS ENUM ('Open', 'Closed', 'Full', 'On Hold');
CREATE TABLE Classes (
"class_id" VARCHAR(255) PRIMARY KEY,
"course_id" VARCHAR(8) NOT NULL,
"course_code" VARCHAR(8) NOT NULL,
"section" VARCHAR(255) NOT NULL,
"term" VARCHAR(50) NOT NULL,
"year" VARCHAR(4) NOT NULL,
Expand All @@ -14,5 +14,5 @@ CREATE TABLE Classes (
"consent" VARCHAR(255) NOT NULL,
"mode" VARCHAR(255) NOT NULL,
"class_notes" TEXT,
FOREIGN KEY ("course_id") REFERENCES Courses("course_code") ON DELETE CASCADE
FOREIGN KEY ("course_code") REFERENCES Courses("course_code") ON DELETE CASCADE
);
19 changes: 10 additions & 9 deletions sql/Courses/up.sql
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
CREATE TABLE Courses (
"course_code" VARCHAR(8) PRIMARY KEY, --id
"course_name" VARCHAR(255) NOT NULL,
"uoc" INT NOT NULL,
"faculty" VARCHAR(255),
"school" VARCHAR(255),
"campus" VARCHAR(255),
"career" VARCHAR(255),
"terms" TEXT,
"modes" VARCHAR(255)[]
"course_code" VARCHAR(8) PRIMARY KEY, --id
"year" VARCHAR(4) NOT NULL,
"course_name" VARCHAR(255) NOT NULL,
"uoc" INT NOT NULL,
"faculty" VARCHAR(255),
"school" VARCHAR(255),
"campus" VARCHAR(255),
"career" VARCHAR(255),
"terms" TEXT,
"modes" VARCHAR(255)[]
);
32 changes: 16 additions & 16 deletions src/class_scraper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,21 @@ use crate::{
#[derive(Debug)]
pub struct Course {
pub course_code: String,
pub year: String,
pub course_name: String,
pub uoc: i32,
pub faculty: Option<String>,
pub school: Option<String>,
pub career: Option<String>,
pub modes: HashSet<String>, // For Notangles.
pub campus: Option<String>,
pub terms: Vec<String>,
pub terms: HashSet<String>,
pub classes: Vec<Class>,
}

#[derive(Debug)]
pub struct Class {
pub course_id: String,
pub course_code: String,
pub class_id: String,
pub section: String,
pub term: String,
Expand Down Expand Up @@ -52,6 +53,7 @@ pub struct Time {
pub struct ClassScraper {
pub course_code: String,
pub course_name: String,
pub year: String,
pub uoc: i32,
pub url: String,
}
Expand All @@ -75,25 +77,23 @@ impl ClassScraper {
Selector::parse("td.formBody td.formBody table").unwrap();
let information_body = document.select(&form_bodies).next().unwrap();
let course_name: String = document
.select(&course_name_selector)
.map(|el| el.text().collect::<String>())
.collect();
let course_name_code_info: Vec<_> = course_name
.split(" ")
.into_iter()
.select(&course_name_selector).nth(0)
.map(|el| el.text().collect::<String>()).unwrap()
.split_whitespace()
.skip(1)
.map(|course_name_words| String::from(course_name_words))
.collect();
.collect::<Vec<&str>>()
.join(" ");
let mut course_info = Course {
course_code: self.course_code.clone(),
course_name: course_name_code_info.join(" "),
course_name,
year: self.year.clone(),
uoc: self.uoc,
faculty: None,
school: None,
campus: None,
career: None,
modes: HashSet::<String>::new(),
terms: vec![],
terms: HashSet::<String>::new(),
classes: vec![],
};

Expand Down Expand Up @@ -129,7 +129,7 @@ impl ClassScraper {
.map(|row| extract_text(row).trim().replace("\u{a0}", ""))
.collect::<Vec<_>>();

course_info.terms = term_data.clone();
course_info.terms = term_data.clone().into_iter().collect();

// Skip header and course info, and go to class details
let skip_count = 3 + term_data.len() + 3 * term_data.len();
Expand Down Expand Up @@ -163,7 +163,7 @@ impl ClassScraper {
}
}

fn parse_class_info(class_data: Vec<String>, course_id: String) -> Class {
fn parse_class_info(class_data: Vec<String>, course_code: String) -> Class {
let mut map = HashMap::new();
let mut i = 0;
let mut times_parsed = Vec::<Time>::new();
Expand Down Expand Up @@ -196,10 +196,10 @@ fn parse_class_info(class_data: Vec<String>, course_id: String) -> Class {
let date = split_offering_period_str.next().unwrap();
let year = date.split("/").nth(2).unwrap();
Class {
course_id: course_id.clone(),
course_code: course_code.clone(),
class_id: format!(
"{}-{}-{}-{}",
course_id,
course_code,
map.get("Class Nbr").unwrap_or(&String::new()),
map
.get("Teaching Period")
Expand Down
3 changes: 2 additions & 1 deletion src/hasuragres_b_insert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ pub async fn send_batch_data(hdata: &impl HasuragresData) -> Result<(), Box<dyn
columns: vec![
"course_code".to_string(),
"course_name".to_string(),
"year".to_string(),
"uoc".to_string(),
"faculty".to_string(),
"school".to_string(),
Expand All @@ -110,7 +111,7 @@ pub async fn send_batch_data(hdata: &impl HasuragresData) -> Result<(), Box<dyn
table_name: "classes".to_string(),
columns: vec![
"class_id".to_string(),
"course_id".to_string(),
"course_code".to_string(),
"section".to_string(),
"term".to_string(),
"activity".to_string(),
Expand Down
8 changes: 5 additions & 3 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ async fn run_all_school_offered_courses_scraper_job(curr_year: i32) -> Option<Sc
match std::env::var("TIMETABLE_API_URL") {
Ok(url) => {
let url_to_scrape = mutate_string_to_include_curr_year(&mut url.to_string(), curr_year.to_string());
let mut scraper = SchoolAreaScraper::new(url_to_scrape);
let mut scraper = SchoolAreaScraper::new(url_to_scrape, curr_year.to_string());
let _ = scraper.scrape().await;
return Some(scraper);
}
Expand Down Expand Up @@ -112,10 +112,12 @@ async fn run_course_classes_page_scraper_job(

fn convert_courses_to_json(course_vec: &mut Vec<Course>) -> Vec<serde_json::Value> {
let mut json_courses = Vec::new();

for course in course_vec.iter() {
json_courses.push(json!({
"course_code": course.course_code,
"course_name": course.course_name,
"year": course.year,
"uoc": course.uoc,
"faculty": course.faculty,
"school": course.school,
Expand Down Expand Up @@ -158,7 +160,7 @@ fn convert_classes_to_json(course_vec: &mut Vec<Course>) -> Vec<serde_json::Valu
for course in course_vec.iter() {
for class in course.classes.iter() {
json_classes.push(json!({
"course_id": class.course_id,
"course_code": class.course_code,
"class_id": class.class_id,
"section": class.section,
"term": class.term,
Expand All @@ -180,7 +182,7 @@ fn convert_classes_to_json(course_vec: &mut Vec<Course>) -> Vec<serde_json::Valu
}

async fn handle_scrape(course_vec: &mut Vec<Course>, start_year: i32) -> Result<(), Box<dyn Error>> {
for year in &[start_year, start_year + 1] {
for year in &[start_year + 1] { // TODO: Check for next year as well
println!("Handling scrape for year: {year}");
let mut all_school_offered_courses_scraper = run_all_school_offered_courses_scraper_job(*year).await;
if let Some(all_school_offered_courses_scraper) = &mut all_school_offered_courses_scraper {
Expand Down
6 changes: 4 additions & 2 deletions src/school_area_scraper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ pub struct SchoolAreaPage {
#[derive(Debug)]
pub struct SchoolAreaScraper {
pub url: Option<String>,
pub year: String,
pub pages: Vec<SchoolAreaPage>,
}

Expand Down Expand Up @@ -66,7 +67,7 @@ impl SchoolAreaScraper {
course_code,
course_name,
school,
subject_area_scraper: Arc::new(Mutex::new(SubjectAreaScraper::new(url_to_scrape_further))),
subject_area_scraper: Arc::new(Mutex::new(SubjectAreaScraper::new(url_to_scrape_further, self.year.clone()))),
};

self.pages.push(page);
Expand All @@ -82,8 +83,9 @@ impl SchoolAreaScraper {
}

impl SchoolAreaScraper {
pub fn new(url: String) -> Self {
pub fn new(url: String, year: String) -> Self {
SchoolAreaScraper {
year,
url: Some(url),
pages: Vec::new(),
}
Expand Down
5 changes: 4 additions & 1 deletion src/subject_area_scraper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use crate::{
#[derive(Debug)]
pub struct SubjectAreaScraper {
pub url: Option<String>,
pub year: String,
pub class_scrapers: Vec<Arc<Mutex<ClassScraper>>>,
}

Expand Down Expand Up @@ -49,6 +50,7 @@ impl SubjectAreaScraper {
course_code,
course_name,
uoc,
year: self.year.clone(),
url: url_to_scrape_further,
})));
}
Expand All @@ -61,9 +63,10 @@ impl SubjectAreaScraper {
}

impl SubjectAreaScraper {
pub fn new(url: String) -> Self {
pub fn new(url: String, year: String) -> Self {
Self {
url: Some(url),
year,
class_scrapers: vec![],
}
}
Expand Down
Loading

0 comments on commit ccb864a

Please sign in to comment.