Skip to content

Commit

Permalink
Merge pull request #61 from Hugo-C/feature/GH-60
Browse files Browse the repository at this point in the history
 GH-60 # Remove Alexa related code
  • Loading branch information
Hugo-C authored Apr 23, 2024
2 parents 5cbfa7b + 456c46c commit b8e6979
Show file tree
Hide file tree
Showing 13 changed files with 11 additions and 259 deletions.
8 changes: 4 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,4 @@ version = "0.1.0"
features = ["deadpool_redis"]

[dev-dependencies]
rstest = "0.18"
rstest = "0.19"
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM rust:1.76-bullseye as build-stage
FROM rust:1.77-bullseye as build-stage

ARG BUILD_TARGET="x86_64-unknown-linux-musl"
ARG BUILD_OPTIONS="--release --target $BUILD_TARGET"
Expand Down
10 changes: 0 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,6 @@ GET api/v1/jarm?host=<host>&port=<port>

It takes a required `host` parameter and optionally `port` (default to 443).

### Retrieve domains from alexa top 1 million that match a jarm hash

**DEPRECATED** see `tranco-overlap` instead

````http request
GET api/v1/alexa-overlap?jarm_hash=<jarm-hash>
````

The returned list is ordered by top alexa rank first

### Retrieve domains from tranco top 1 million that match a jarm hash

````http request
Expand Down
2 changes: 0 additions & 2 deletions docker-compose.playwright.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,8 @@ services:
service: jarm_online_api
environment:
- ROCKET_SENTRY_DSN=123 # this way sentry is disabled but do not crash
- ALEXA_TOP1M_RAW_DATA_PATH=/alexa_top1M.csv
- FORCE_TRANCO_TOP1M_RAW_DATA_PATH=/tranco_top1M.csv
volumes:
- ./tests/fixtures_data/alexa_top1M.csv:/alexa_top1M.csv
- ./tests/fixtures_data/tranco_top1M.csv:/tranco_top1M.csv
depends_on:
- redis
Expand Down
4 changes: 1 addition & 3 deletions docker-compose.yml.prod
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@ services:
- ROCKET_DATABASES={redis_db={url="redis://redis"}}
- ROCKET_SENTRY_DSN=${ROCKET_SENTRY_DSN}
- ROCKET_SENTRY_TRACES_SAMPLE_RATE=1.0
- ALEXA_TOP1M_RAW_DATA_PATH=/alexa_top1M.csv
volumes:
- ./data/result_aggregated.csv:/alexa_top1M.csv
- TRANCO_TOP1M_S3_URL=https://storage.googleapis.com/tranco-jarm/jarm-tranco-top-1m.csv
restart: unless-stopped
depends_on:
- redis
Expand Down
29 changes: 0 additions & 29 deletions examples/alexa_top1m.hurl

This file was deleted.

55 changes: 0 additions & 55 deletions src/alexa_top1m/mod.rs

This file was deleted.

29 changes: 1 addition & 28 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,14 @@
extern crate rocket;

pub mod utils;
pub mod alexa_top1m;
pub mod tranco_top1m;

use rocket_db_pools::{Connection, deadpool_redis};
use crate::alexa_top1m::{AlexaTop1M, RankedDomain};
use crate::tranco_top1m::{TrancoTop1M};
use crate::tranco_top1m::RankedDomain as TrancoRankedDomain;

use std::env;
use std::path::Path;
use rocket::{Build, fairing, Rocket, State};
use rocket::{Build, fairing, Rocket};
use rocket::serde::json::Json;
use rust_jarm::Jarm;
use serde::Serialize;
Expand Down Expand Up @@ -67,11 +64,6 @@ struct LastScanListResponse {
last_scans: Vec<LastScanResponse>,
}

#[derive(Serialize)]
struct AlexaOverlapResponse {
overlapping_domains: Vec<RankedDomain>,
}

#[derive(Serialize)]
struct TrancoOverlapResponse {
overlapping_domains: Vec<TrancoRankedDomain>,
Expand All @@ -84,12 +76,6 @@ pub fn scan_timeout_in_seconds() -> u64 {
.expect("Valid timeout value")
}

pub fn alexa_top1m_raw_data_path() -> Box<Path> {
let raw_path = env::var("ALEXA_TOP1M_RAW_DATA_PATH")
.expect("ALEXA_TOP1M_RAW_DATA_PATH env var has to be set");
Path::new(&raw_path).into()
}

#[get("/?<host>&<port>")]
async fn jarm(host: String, port: Option<String>, mut redis_client: Connection<Db>) -> Json<JarmResponse> {
let _port = port.unwrap_or_else(|| "443".to_string());
Expand Down Expand Up @@ -136,15 +122,6 @@ async fn last_scans(mut redis_client: Connection<Db>) -> Json<LastScanListRespon
Json(LastScanListResponse { last_scans })
}

#[get("/?<jarm_hash>")]
fn alexa_overlap(alexa_top1m: &State<AlexaTop1M>, jarm_hash: String) -> Json<AlexaOverlapResponse> { // TODO try str
let overlap = match alexa_top1m.get(jarm_hash.as_str()) {
None => vec![],
Some(overlap) => overlap.to_vec()
};
Json(AlexaOverlapResponse { overlapping_domains: overlap })
}

#[get("/?<jarm_hash>")]
async fn tranco_overlap(redis_client: Connection<Db>, jarm_hash: String) -> Result<Json<TrancoOverlapResponse>, Custom<Json<ErrorResponse>>> {
let mut tranco = TrancoTop1M::from(redis_client);
Expand Down Expand Up @@ -177,15 +154,11 @@ fn build_error_json(jarm_error: JarmError) -> Json<JarmResponse> {
}

pub fn build_rocket_without_tranco_initialisation() -> Rocket<Build> {
let alexa_top1m = AlexaTop1M::new(&alexa_top1m_raw_data_path())
.expect("AlexaTop1M built correctly");
rocket::build()
.mount("/jarm", routes![jarm])
.mount("/last-scans", routes![last_scans])
.mount("/alexa-overlap", routes![alexa_overlap])
.mount("/tranco-overlap", routes![tranco_overlap])
.attach(Db::init())
.manage(alexa_top1m)
}

pub fn build_rocket() -> Rocket<Build> {
Expand Down
15 changes: 3 additions & 12 deletions tests/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,6 @@ lazy_static! {
static ref REDIS_MUTEX: Mutex<()> = Mutex::default(); // restrict redis parallel access
}


#[fixture]
#[once]
pub fn alexa_top1m_path() -> &'static Path {
Path::new("tests/fixtures_data/alexa_top1M.csv")
}

#[fixture]
#[once]
pub fn tranco_top1m_path() -> &'static Path {
Expand All @@ -33,22 +26,20 @@ pub fn tranco_top1m_path() -> &'static Path {

#[fixture]
#[once]
pub fn set_env_var_top1m_path(alexa_top1m_path: &'static Path, tranco_top1m_path: &'static Path) -> &'static Path {
env::set_var("ALEXA_TOP1M_RAW_DATA_PATH", alexa_top1m_path.to_str().expect("valid path"));
pub fn set_env_var_top1m_path(tranco_top1m_path: &'static Path) {
env::set_var("FORCE_TRANCO_TOP1M_RAW_DATA_PATH", tranco_top1m_path.to_str().expect("valid path"));
alexa_top1m_path
}

#[fixture]
#[allow(unused_variables)]
pub fn rocket_client(set_env_var_top1m_path: &'static Path) -> Client {
pub fn rocket_client(set_env_var_top1m_path: ()) -> Client {
let test_rocket = build_rocket();
Client::tracked(test_rocket).expect("valid rocket instance")
}

#[fixture]
#[allow(unused_variables)]
pub fn rocket_client_without_tranco_init(set_env_var_top1m_path: &'static Path) -> Client {
pub fn rocket_client_without_tranco_init(set_env_var_top1m_path: ()) -> Client {
let test_rocket = build_rocket_without_tranco_initialisation();
Client::tracked(test_rocket).expect("valid rocket instance")
}
Expand Down
12 changes: 0 additions & 12 deletions tests/fixtures_data/alexa_top1M.csv

This file was deleted.

61 changes: 0 additions & 61 deletions tests/test_alexa_top1m.rs

This file was deleted.

41 changes: 0 additions & 41 deletions tests/test_route_alexa_overlap.rs

This file was deleted.

0 comments on commit b8e6979

Please sign in to comment.