Skip to content

Commit

Permalink
feat: add --external to allow scanning external domains + improve t…
Browse files Browse the repository at this point in the history
…ree display in spider mode
  • Loading branch information
cestef committed Jun 15, 2024
1 parent 99aa8c5 commit db7cfa3
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 3 deletions.
6 changes: 6 additions & 0 deletions src/cli/opts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,12 @@ pub struct Opts {
#[serde(default)]
pub subdomains: bool,

/// Allow external domains to be scanned in spider mode (Warning: this can generate a lot of traffic)
#[clap(long, help_heading = Some("Spider"), env, hide_env=true)]
#[merge(strategy = merge::bool::overwrite_false)]
#[serde(default)]
pub external: bool,

/// Scripts to run after each request
#[clap(long, help_heading = Some("Scripts"), env, hide_env=true, visible_alias = "sc")]
#[merge(strategy = merge::vec::overwrite_empty)]
Expand Down
54 changes: 51 additions & 3 deletions src/runner/spider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@ use super::{
filters::is_directory,
Runner,
};
use crate::runner::scripting::run_scripts;
use crate::{
cli::opts::Opts,
utils::{
constants::{DEFAULT_DEPTH, ERROR, PROGRESS_CHARS, PROGRESS_TEMPLATE, SUCCESS, WARNING},
tree::{Tree, TreeData, UrlType},
},
};
use crate::{runner::scripting::run_scripts, utils::tree::TreeNode};
use color_eyre::eyre::eyre;
use color_eyre::eyre::{Context, Ok, Result};
use colored::Colorize;
Expand Down Expand Up @@ -187,7 +187,7 @@ impl Runner for Spider {
.context(format!("Could not parse links from {}", url))?;

for link in links {
if link.link_type != LinkType::Internal {
if !self.opts.external && link.link_type == LinkType::External {
continue;
}

Expand All @@ -206,7 +206,7 @@ impl Runner for Spider {
let mut tree = self.tree.lock();
let root = tree.root.clone().unwrap();

if self.opts.subdomains {
if self.opts.subdomains || self.opts.external {
// We need to group the visited nodes by domain
let mut grouped: std::collections::HashMap<String, Vec<TreeData>> =
std::collections::HashMap::new();
Expand Down Expand Up @@ -281,6 +281,54 @@ impl Runner for Spider {
}
}
}

// Reduce the paths that have only one child
// This is done to make the tree more readable
// For example, if we have a tree like this:
// /
// |-> a
// |-> b
// |-> c
// We can reduce it to:
// /
// |-> a/b/c
// This is done for all paths that have only one child

for domain in root.lock().children.clone() {
let mut domain = domain.lock();
for (i, child) in domain.children.clone().iter().enumerate() {
let mut current = child.clone();
// Check until where we can reduce the path
// If we have a node with more than one child, we stop
let mut path = vec![current.clone()];
while current.lock().children.len() == 1 {
let child = current.lock().children[0].clone();
path.push(child.clone());
current = child;
}
// If we can reduce the path, we do it
if path.len() > 1 {
// Create the new path
let new_path = path.iter().fold("".to_string(), |acc, node| {
format!("{}/{}", acc, node.lock().data.path)
});

// Create the new node
// We copy the data from the last node in the path
let new_node = TreeData {
path: new_path,
..path.last().unwrap().lock().data.clone()
};

// Remove the old nodes
domain.children.remove(i);
domain.children.push(Arc::new(Mutex::new(TreeNode {
data: new_node,
children: vec![],
})));
}
}
}
Ok(())
}
}

0 comments on commit db7cfa3

Please sign in to comment.