Skip to content

Commit

Permalink
pg classes fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
Rayahhhmed committed Nov 15, 2024
1 parent 0e51364 commit 34d7e12
Show file tree
Hide file tree
Showing 10 changed files with 107,778 additions and 125,040 deletions.
147,270 changes: 66,449 additions & 80,821 deletions classes.json

Large diffs are not rendered by default.

15,636 changes: 1,853 additions & 13,783 deletions courses.json

Large diffs are not rendered by default.

10,832 changes: 10,695 additions & 137 deletions s.txt

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions sql/Classes/up.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
CREATE TYPE status_enum AS ENUM ('Open', 'Closed', 'Full', 'On Hold');
CREATE TABLE Classes (
"class_id" VARCHAR(255) PRIMARY KEY,
-- "career" VARCHAR(255),
"course_id" VARCHAR(8) NOT NULL,
"section" VARCHAR(255) NOT NULL,
"term" VARCHAR(50) NOT NULL,
Expand Down
3 changes: 2 additions & 1 deletion sql/Courses/up.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
CREATE TABLE Courses (
"course_code" VARCHAR(8) PRIMARY KEY, --id
"course_code" VARCHAR(8), --id
"course_name" VARCHAR(255) NOT NULL,
"uoc" INT NOT NULL,
"faculty" VARCHAR(255),
Expand All @@ -8,4 +8,5 @@ CREATE TABLE Courses (
"career" VARCHAR(255),
"terms" TEXT,
"modes" VARCHAR(255)[]
PRIMARY KEY ("course_code", "career")
);
19 changes: 10 additions & 9 deletions src/class_scraper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ pub struct Course {
#[derive(Debug)]
pub struct Class {
pub course_id: String,
pub career: String,
pub class_id: String,
pub section: String,
pub term: String,
Expand All @@ -42,6 +43,7 @@ pub struct Class {

#[derive(Debug)]
pub struct Time {
pub career: String,
pub day: String,
pub time: String,
pub location: String,
Expand All @@ -61,7 +63,7 @@ pub struct ClassScraper {

impl ClassScraper {
pub async fn scrape(&mut self) -> Result<Course, Box<ScrapeError>> {
// println!("Currently working on {:?}", self.course_code);
println!("Currently working on {:?}", self.course_code);
let html = fetch_url(&self.url)
.await
.expect(&format!("Something was wrong with the URL: {}", self.url));
Expand Down Expand Up @@ -155,20 +157,17 @@ impl ClassScraper {

course_info.classes = class_activity_information
.into_par_iter()
.map(|class_data| parse_class_info(class_data, self.course_code.clone()))
.map(|class_data| parse_class_info(class_data, self.course_code.clone(), self.career.clone()))
.collect();
let _ = course_info
.classes
.iter_mut()
.map(|c| course_info.modes.insert(c.mode.to_string()))
.collect::<Vec<_>>();
if course_info.course_code == "COMP6441" {
println!("TESTTEST {:?}", course_info.career);
}
Ok(course_info)
}
}
fn parse_class_info(class_data: Vec<String>, course_id: String) -> Class {
fn parse_class_info(class_data: Vec<String>, course_id: String, career: String) -> Class {
let mut map = HashMap::new();
let mut i = 0;
let mut times_parsed = Vec::<Time>::new();
Expand All @@ -180,7 +179,7 @@ fn parse_class_info(class_data: Vec<String>, course_id: String) -> Class {
while j < class_data.len() && class_data[j] != "Class Notes" {
j += 1;
}
times_parsed = parse_meeting_info(&class_data[i + 1..j]);
times_parsed = parse_meeting_info(&class_data[i + 1..j], career.clone());
i = j + 1;
continue;
}
Expand Down Expand Up @@ -249,6 +248,7 @@ fn parse_class_info(class_data: Vec<String>, course_id: String) -> Class {
.unwrap_or(&"".to_string())
.to_string(),
consent: map.get("Consent").unwrap_or(&"".to_string()).to_string(),
career,
times: if times_parsed.is_empty() {
None
} else {
Expand All @@ -261,7 +261,7 @@ fn parse_class_info(class_data: Vec<String>, course_id: String) -> Class {
}
}

fn parse_meeting_info(vec: &[String]) -> Vec<Time> {
fn parse_meeting_info(vec: &[String], career: String) -> Vec<Time> {
let days = vec!["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"];
let mut meetings = Vec::new();
let mut iter: Box<dyn Iterator<Item = &String>> = Box::new(vec.iter());
Expand Down Expand Up @@ -290,7 +290,7 @@ fn parse_meeting_info(vec: &[String]) -> Vec<Time> {
iter = Box::new(std::iter::once(instructor).chain(iter));
}
}

timeslot.career = career.clone();
meetings.push(timeslot);
}
}
Expand All @@ -300,6 +300,7 @@ fn parse_meeting_info(vec: &[String]) -> Vec<Time> {

fn get_blank_time_struct() -> Time {
Time {
career: "".to_string(),
day: "".to_string(),
time: "".to_string(),
location: "".to_string(),
Expand Down
4 changes: 4 additions & 0 deletions src/hasuragres_b_insert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ pub async fn send_batch_data(hdata: &impl HasuragresData) -> Result<(), Box<dyn
let hasuragres_url = env::var("HASURAGRES_URL")?;
let api_key = env::var("HASURAGRES_API_KEY")?;
let client = Client::new();
println!("{:?} {:?}", hasuragres_url, api_key);
println!("Starting to insert into Hasuragres!");
let requests = vec![
BatchInsertRequest {
Expand Down Expand Up @@ -110,6 +111,7 @@ pub async fn send_batch_data(hdata: &impl HasuragresData) -> Result<(), Box<dyn
table_name: "classes".to_string(),
columns: vec![
"class_id".to_string(),
"career".to_string(),
"course_id".to_string(),
"section".to_string(),
"term".to_string(),
Expand Down Expand Up @@ -139,6 +141,7 @@ pub async fn send_batch_data(hdata: &impl HasuragresData) -> Result<(), Box<dyn
columns: vec![
"id".to_string(),
"class_id".to_string(),
"career".to_string(),
"day".to_string(),
"instructor".to_string(),
"location".to_string(),
Expand All @@ -155,6 +158,7 @@ pub async fn send_batch_data(hdata: &impl HasuragresData) -> Result<(), Box<dyn
payload: hdata.get_times(),
},
];

let response = client
.post(format!("{}/batch_insert", hasuragres_url))
.header("X-API-Key", api_key)
Expand Down
6 changes: 2 additions & 4 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ fn convert_classes_times_to_json(course_vec: &mut Vec<Course>) -> Vec<serde_json
"id": generate_time_id(class, time),
"class_id": class.class_id,
"day": time.day,
"career": time.career,
"instructor": time.instructor,
"location": time.location,
"time": time.time,
Expand All @@ -162,6 +163,7 @@ fn convert_classes_to_json(course_vec: &mut Vec<Course>) -> Vec<serde_json::Valu
"class_id": class.class_id,
"section": class.section,
"term": class.term,
"career": class.career,
"year": class.year,
"activity": class.activity,
"status": class.status,
Expand All @@ -188,10 +190,6 @@ async fn handle_scrape(course_vec: &mut Vec<Course>, start_year: i32) -> Result<
let course = run_course_classes_page_scraper_job(all_school_offered_courses_scraper).await;
course_vec.extend(course);
}
// let mut rc = ClassScraper { course_code: "COMP6420".to_string(), course_name: "Hardware Security".to_string(), career: "Undergraduate".to_string(), uoc: 6, url: "https://timetable.unsw.edu.au/2025/COMP1511.html".to_string() };
// rc.scrape().await;
// println!("{:?}", rc);

}

Ok(())
Expand Down
10 changes: 8 additions & 2 deletions src/subject_area_scraper.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::sync::Arc;
use std::{collections::HashSet, sync::Arc};

use scraper::Selector;
use tokio::sync::Mutex;
Expand Down Expand Up @@ -31,13 +31,18 @@ impl SubjectAreaScraper {
let link_selector = Selector::parse("td.data a").unwrap();
let uoc_selector = Selector::parse("td.data:nth-child(3)").unwrap();
let document = scraper::Html::parse_document(&html);
let mut course_code_career_set = HashSet::<String>::new();
for career_elem_ref in document.select(&career_selector) {
let career = extract_text(career_elem_ref);
if career.is_empty() {continue};
for row_node in document.select(&row_selector) {
// Extract data from each row
let course_code = extract_text(row_node.select(&code_selector).next().unwrap());
let course_name = extract_text(row_node.select(&name_selector).nth(1).unwrap());
let name_hash = course_code.to_string() + &career;
if course_code_career_set.contains(&name_hash) {
continue;
}
let year_to_scrape = extract_year(url).unwrap();
let url_to_scrape_further = get_html_link_to_page(
year_to_scrape as i32,
Expand All @@ -50,12 +55,13 @@ impl SubjectAreaScraper {
.parse()
.expect("Could not parse UOC!");
self.class_scrapers.push(Arc::new(Mutex::new(ClassScraper {
course_code,
course_code: course_code.clone(),
course_name,
career: career.trim().to_string(),
uoc,
url: url_to_scrape_further,
})));
course_code_career_set.insert(name_hash);
}
}

Expand Down
Loading

0 comments on commit 34d7e12

Please sign in to comment.