Skip to content

Commit

Permalink
Format code
Browse files Browse the repository at this point in the history
  • Loading branch information
lhvy committed Dec 23, 2024
1 parent d1f3385 commit 9d87777
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 55 deletions.
106 changes: 56 additions & 50 deletions src/class_scraper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ use rayon::prelude::*;
use scraper::Selector;
use std::collections::{HashMap, HashSet};


use crate::{
school_area_scraper::ScrapeError, scraper::fetch_url, text_manipulators::extract_text,
};
Expand Down Expand Up @@ -61,7 +60,6 @@ pub struct ClassScraper {
pub url: String,
}


impl ClassScraper {
pub async fn scrape(&mut self) -> Result<Course, Box<ScrapeError>> {
println!("Currently working on {:?}", self.course_code);
Expand All @@ -72,14 +70,12 @@ impl ClassScraper {

// Selectors
let form_bodies = Selector::parse("td.formBody td.formBody").unwrap();
let term_selector =
Selector::parse("table table:nth-of-type(3)").unwrap();
let table_selector =
Selector::parse("table table").unwrap();
let term_selector = Selector::parse("table table:nth-of-type(3)").unwrap();
let table_selector = Selector::parse("table table").unwrap();
let label_selector = Selector::parse("td.label").unwrap();
let data_selector = Selector::parse("td.data").unwrap();
let information_body = document.select(&form_bodies);

let mut course_info = Course {
course_id: self.course_code.clone() + &self.career.clone(),
course_code: self.course_code.clone(),
Expand All @@ -94,34 +90,34 @@ impl ClassScraper {
classes: vec![],
};
let mut skip_this_info_box = false;
let mut terms: Vec<String> = vec![];
let mut terms: Vec<String> = vec![];
let mut class_activity_information: Vec<Vec<String>> = vec![];
for info_box in information_body {
if let Some(label_info) = info_box.select(&label_selector).next() {

// Check if it is a form body with course information
if extract_text(label_info).trim() == "Faculty" {
let labels: Vec<_> = info_box
.select(&label_selector)
.map(|el|
extract_text(el).trim().replace("\u{a0}", ""))
.collect();

.select(&label_selector)
.map(|el| extract_text(el).trim().replace("\u{a0}", ""))
.collect();

let data: Vec<_> = info_box
.select(&data_selector)
.map(|el| extract_text(el).trim().replace("\u{a0}", ""))
.collect();
.select(&data_selector)
.map(|el| extract_text(el).trim().replace("\u{a0}", ""))
.collect();
for (label, data) in labels.iter().zip(data.iter()) {
match label.trim().to_lowercase().as_str() {
"faculty" => course_info.faculty = Some(data.clone()),
"school" => course_info.school = Some(data.clone()),
"campus" => course_info.campus = Some(data.clone()),
"career" => if course_info.career != Some(data.clone()) {
skip_this_info_box = true;
break;
} else {
skip_this_info_box = false;
},
"career" => {
if course_info.career != Some(data.clone()) {
skip_this_info_box = true;
break;
} else {
skip_this_info_box = false;
}
}
_ => {}
}
}
Expand All @@ -130,36 +126,46 @@ impl ClassScraper {
}
if let Some(terms_info_table) = info_box.select(&term_selector).next() {
for terms_table in terms_info_table.select(&table_selector) {
let curr_terms_row = terms_table.text().map(|e| e.trim().to_string()).filter(|s| !s.is_empty()).collect::<Vec<_>>();
let curr_terms_row = terms_table
.text()
.map(|e| e.trim().to_string())
.filter(|s| !s.is_empty())
.collect::<Vec<_>>();
if !curr_terms_row.is_empty() {
terms.extend(curr_terms_row);
}
}
}

} else if extract_text(label_info).trim() == "Class Nbr" && !skip_this_info_box {
// Extract class.
let info_map = info_box.select(&Selector::parse("td.label, td.data").unwrap())
.map(|cell| {
cell.text()
.collect::<String>()
.trim()
.replace("\u{a0}", "")
.to_string()
}).collect::<Vec<_>>();
let info_map = info_box
.select(&Selector::parse("td.label, td.data").unwrap())
.map(|cell| {
cell.text()
.collect::<String>()
.trim()
.replace("\u{a0}", "")
.to_string()
})
.collect::<Vec<_>>();
if !info_map.is_empty() {
class_activity_information.push(info_map);
}
}

}
}
}

course_info.terms = terms.clone();

course_info.classes = class_activity_information
.into_par_iter()
.map(|class_data| parse_class_info(class_data, self.course_code.clone() + &self.career.clone(), self.career.clone()))
.map(|class_data| {
parse_class_info(
class_data,
self.course_code.clone() + &self.career.clone(),
self.career.clone(),
)
})
.collect();
let _ = course_info
.classes
Expand Down Expand Up @@ -194,10 +200,10 @@ fn parse_class_info(class_data: Vec<String>, course_id: String, career: String)
map.insert(key, value);
i += 2;
}
let offering_period_str = map
.get("Offering Period")
.unwrap_or(&"".to_string())
.to_string();
let offering_period_str = map
.get("Offering Period")
.unwrap_or(&"".to_string())
.to_string();
let mut split_offering_period_str = offering_period_str.split(" - ");
let date = split_offering_period_str.next().unwrap();
let year = date.split("/").nth(2).unwrap();
Expand All @@ -206,15 +212,14 @@ fn parse_class_info(class_data: Vec<String>, course_id: String, career: String)
class_id: format!(
"{}-{}-{}-{}",
course_id,
map.get("Class Nbr").unwrap_or(&String::new()),
map
.get("Teaching Period")
.unwrap_or(&"".to_string())
.to_string()
.split(" - ")
.next()
.expect("Could not split teaching periods properly!")
.to_string(),
map.get("Class Nbr").unwrap_or(&String::new()),
map.get("Teaching Period")
.unwrap_or(&"".to_string())
.to_string()
.split(" - ")
.next()
.expect("Could not split teaching periods properly!")
.to_string(),
year,
),
section: map.get("Section").unwrap_or(&"".to_string()).to_string(),
Expand All @@ -232,7 +237,8 @@ fn parse_class_info(class_data: Vec<String>, course_id: String, career: String)
course_enrolment: map
.get("Enrols/Capacity")
.unwrap_or(&"".to_string())
.replace("*", "").to_string(),
.replace("*", "")
.to_string(),
offering_period: map
.get("Offering Period")
.unwrap_or(&"".to_string())
Expand Down
4 changes: 3 additions & 1 deletion src/school_area_scraper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@ impl SchoolAreaScraper {
course_code,
course_name,
school,
subject_area_scraper: Arc::new(Mutex::new(SubjectAreaScraper::new(url_to_scrape_further))),
subject_area_scraper: Arc::new(Mutex::new(SubjectAreaScraper::new(
url_to_scrape_further,
))),
};

self.pages.push(page);
Expand Down
9 changes: 5 additions & 4 deletions src/text_manipulators.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,15 @@ pub fn extract_year(url: &str) -> Option<u32> {

pub fn mutate_string_to_include_curr_year(curr_base_url: &mut String, year_str: String) -> String {
let pattern = Regex::new("year").unwrap();
pattern
.replace(&curr_base_url, year_str)
.to_string()
pattern.replace(&curr_base_url, year_str).to_string()
}

pub fn get_html_link_to_page(year: i32, html_fragment: &str) -> String {
match std::env::var("TIMETABLE_API_URL") {
Ok(url) => mutate_string_to_include_curr_year(&mut url.to_string(), year.to_string()) + html_fragment,
Ok(url) => {
mutate_string_to_include_curr_year(&mut url.to_string(), year.to_string())
+ html_fragment
}
Err(e) => {
warn!("Timetable URL has NOT been parsed properly from env file and error report: {e}");
return "".to_string();
Expand Down

0 comments on commit 9d87777

Please sign in to comment.