Skip to content

Commit

Permalink
adding async traits
Browse files Browse the repository at this point in the history
  • Loading branch information
Rayahhhmed committed Jan 14, 2024
1 parent a9ba7cd commit f5bef72
Show file tree
Hide file tree
Showing 7 changed files with 96 additions and 68 deletions.
46 changes: 24 additions & 22 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,6 @@ chrono = "0.4.31"
tokio = { version = "1", features = ["full"] }
reqwest = "0.11"
select = "0.5"
scraper = "0.12"
scraper = "0.12"
log = "0.4.20"
serde = "1.0.195"
97 changes: 59 additions & 38 deletions src/class_scraper.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use scraper::Selector;

use crate::{
scraper::{fetch_url, Course, Page, Term, Status, Enrolment, DateBlock, ClassTimeBlock},
scraper::{fetch_url, ClassTimeBlock, Course, DateBlock, Enrolment, Page, Status, Term},
text_manipulators::{extract_text, get_html_link_to_page},
Scraper, UrlInvalidError,
};
Expand All @@ -15,8 +15,6 @@ pub struct ClassPage {
courses: Vec<Course>,
}



#[derive(Debug)]
pub struct Class {
class_id: u32,
Expand All @@ -30,8 +28,6 @@ pub struct Class {
times: Vec<ClassTimeBlock>,
}



impl Page for ClassPage {
fn view_page_details(&self) {
println!("{:?}", self)
Expand All @@ -40,7 +36,7 @@ impl Page for ClassPage {

#[derive(Debug)]

pub struct SubjectAreaScraper {
pub struct ClassScraper {
pub url: Option<String>,
pub pages: Vec<Box<dyn Page>>,
}
Expand All @@ -51,41 +47,66 @@ impl std::fmt::Debug for dyn Page {
}
}

impl SubjectAreaScraper {
impl ClassScraper {
pub async fn run_scraper_on_url(&mut self) -> Result<(), Box<dyn std::error::Error>> {
match &self.url {
Some(url) => {
let html = fetch_url(url).await?;
// println!("{}", html);
let form_body_selector = Selector::parse("td.formBody tbody tr td.formBody").unwrap();
let code_selector = Selector::parse("tr.label").unwrap();
let name_selector = Selector::parse("td.data a").unwrap();
let link_selector = Selector::parse("td.data a").unwrap();
let school_selector = Selector::parse("td.data:nth-child(3)").unwrap();
let form_body_selector =
Selector::parse("td.formBody tbody tr td.formBody").unwrap();
let course_summary = Selector::parse("tbody tr").unwrap();

// let name_selector = Selector::parse("td.data a").unwrap();
// let link_selector = Selector::parse("td.data a").unwrap();
// let school_selector = Selector::parse("td.data:nth-child(3)").unwrap();
let document = scraper::Html::parse_document(&html);
// for row_node in document.select(&row_selector) {
// // Extract data from each row
// let subject_area_course_code =
// extract_text(row_node.select(&code_selector).next().unwrap());
// let subject_area_course_name =
// extract_text(row_node.select(&name_selector).next().unwrap());
// let url = get_html_link_to_page(
// row_node
// .select(&link_selector)
// .next()
// .map_or("", |node| node.value().attr("href").unwrap_or("")),
// );
// let school = extract_text(row_node.select(&school_selector).next().unwrap());
// // Create a Course struct and push it to the vector
// let page = SubjectAreaPage {
// subject_area_course_code,
// subject_area_course_name,
// url,
// school,
// courses: Vec::new(),
// };

// self.add_page(Box::new(page));
for td in document.select(&form_body_selector) {
// Selector for tables within the td element
let table_selector = Selector::parse("table").unwrap();

// Iterate over tables
for table in td.select(&table_selector) {
// Selector for rows within the table
let row_selector = Selector::parse("tr").unwrap();

// Iterate over rows
for row in table.select(&row_selector) {
// Selector for cells within the row
let cell_selector = Selector::parse("td").unwrap();

// Extract and print data from each cell
for cell in row.select(&cell_selector) {
println!("{}", cell.text().collect::<String>());
}
}
}
}
// for row_node in document.select(&form_body_selector) {
// let course_summary = extract_text(row_node.select(&course_summary).next().unwrap());
// println!("{:?}", course_summary);
// // // Extract data from each row
// // let subject_area_course_code =
// // extract_text(row_node.select(&code_selector).next().unwrap());
// // let subject_area_course_name =
// // extract_text(row_node.select(&name_selector).next().unwrap());
// // let url = get_html_link_to_page(
// // row_node
// // .select(&link_selector)
// // .next()
// // .map_or("", |node| node.value().attr("href").unwrap_or("")),
// // );
// // let school = extract_text(row_node.select(&school_selector).next().unwrap());
// // // Create a Course struct and push it to the vector
// // let page = SubjectAreaPage {
// // subject_area_course_code,
// // subject_area_course_name,
// // url,
// // school,
// // courses: Vec::new(),
// // };

// // self.add_page(Box::new(page));
// }

println!("{:?}", self.pages);
Expand All @@ -95,16 +116,16 @@ impl SubjectAreaScraper {
}
}
}
impl Scraper for SubjectAreaScraper {
impl Scraper for ClassScraper {
fn new() -> Self {
SubjectAreaScraper {
ClassScraper {
url: None,
pages: Vec::new(),
}
}

fn set_url(&mut self, url: String) -> Self {
SubjectAreaScraper {
ClassScraper {
url: Some(url),
pages: Vec::new(),
}
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ mod class_scraper;
mod subject_area_scraper;
mod text_manipulators;

pub use class_scraper::ClassScraper;
pub use scraper::Scraper;
pub use subject_area_scraper::SubjectAreaScraper;
pub use url_invalid_error::UrlInvalidError;
8 changes: 5 additions & 3 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
use spooderman::{Scraper, SubjectAreaScraper};
use spooderman::{ClassScraper, Scraper, SubjectAreaScraper};

#[tokio::main]
async fn main() {
let mut scraper = SubjectAreaScraper::new()
.set_url("https://timetable.unsw.edu.au/2024/subjectSearch.html".to_string());
// let mut scraper = SubjectAreaScraper::new()
// .set_url("https://timetable.unsw.edu.au/2024/subjectSearch.html".to_string());
let mut scraper: ClassScraper =
ClassScraper::new().set_url("https://timetable.unsw.edu.au/2024/COMP1511.html".to_string());
match scraper.run_scraper_on_url().await {
Ok(_) => {
println!("Scraping successful!\n");
Expand Down
6 changes: 4 additions & 2 deletions src/scraper.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
use chrono::{DateTime, Utc};
use reqwest::ClientBuilder;
use tokio::sync::Mutex;
use std::ops::Add;

use crate::class_scraper::Class;


// static COURSE_LIST: Mutex<HashMap<String, >>

#[derive(Debug)]
pub enum Term {
T1,
Expand Down Expand Up @@ -74,7 +78,6 @@ pub struct ClassTimeBlock {
location: String,
}


#[derive(Debug)]
enum Career {
UG,
Expand All @@ -98,7 +101,6 @@ pub struct Course {
notes: String,
}


pub trait Page {
fn view_page_details(&self);
}
Expand Down
2 changes: 0 additions & 2 deletions src/subject_area_scraper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ pub struct SubjectAreaScraper {
pub pages: Vec<Box<dyn Page>>,
}



impl SubjectAreaScraper {
pub async fn run_scraper_on_url(&mut self) -> Result<(), Box<dyn std::error::Error>> {
match &self.url {
Expand Down

0 comments on commit f5bef72

Please sign in to comment.