From db7cfa3e702552e5835872c97fc05af20043e5c0 Mon Sep 17 00:00:00 2001 From: cstef Date: Sat, 15 Jun 2024 12:12:28 +0200 Subject: [PATCH] feat: add `--external` to allow scanning external domains + improve tree display in spider mode --- src/cli/opts.rs | 6 +++++ src/runner/spider.rs | 54 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/src/cli/opts.rs b/src/cli/opts.rs index 584b466..0c68b07 100644 --- a/src/cli/opts.rs +++ b/src/cli/opts.rs @@ -251,6 +251,12 @@ pub struct Opts { #[serde(default)] pub subdomains: bool, + /// Allow external domains to be scanned in spider mode (Warning: this can generate a lot of traffic) + #[clap(long, help_heading = Some("Spider"), env, hide_env=true)] + #[merge(strategy = merge::bool::overwrite_false)] + #[serde(default)] + pub external: bool, + /// Scripts to run after each request #[clap(long, help_heading = Some("Scripts"), env, hide_env=true, visible_alias = "sc")] #[merge(strategy = merge::vec::overwrite_empty)] diff --git a/src/runner/spider.rs b/src/runner/spider.rs index 9288d37..b587ea1 100644 --- a/src/runner/spider.rs +++ b/src/runner/spider.rs @@ -3,7 +3,6 @@ use super::{ filters::is_directory, Runner, }; -use crate::runner::scripting::run_scripts; use crate::{ cli::opts::Opts, utils::{ @@ -11,6 +10,7 @@ use crate::{ tree::{Tree, TreeData, UrlType}, }, }; +use crate::{runner::scripting::run_scripts, utils::tree::TreeNode}; use color_eyre::eyre::eyre; use color_eyre::eyre::{Context, Ok, Result}; use colored::Colorize; @@ -187,7 +187,7 @@ impl Runner for Spider { .context(format!("Could not parse links from {}", url))?; for link in links { - if link.link_type != LinkType::Internal { + if !self.opts.external && link.link_type == LinkType::External { continue; } @@ -206,7 +206,7 @@ impl Runner for Spider { let mut tree = self.tree.lock(); let root = tree.root.clone().unwrap(); - if self.opts.subdomains { + if self.opts.subdomains || self.opts.external { // We need to group the visited nodes by domain let mut grouped: std::collections::HashMap> = std::collections::HashMap::new(); @@ -281,6 +281,54 @@ impl Runner for Spider { } } } + + // Reduce the paths that have only one child + // This is done to make the tree more readable + // For example, if we have a tree like this: + // / + // |-> a + // |-> b + // |-> c + // We can reduce it to: + // / + // |-> a/b/c + // This is done for all paths that have only one child + + for domain in root.lock().children.clone() { + let mut domain = domain.lock(); + for (i, child) in domain.children.clone().iter().enumerate() { + let mut current = child.clone(); + // Check until where we can reduce the path + // If we have a node with more than one child, we stop + let mut path = vec![current.clone()]; + while current.lock().children.len() == 1 { + let child = current.lock().children[0].clone(); + path.push(child.clone()); + current = child; + } + // If we can reduce the path, we do it + if path.len() > 1 { + // Create the new path + let new_path = path.iter().fold("".to_string(), |acc, node| { + format!("{}/{}", acc, node.lock().data.path) + }); + + // Create the new node + // We copy the data from the last node in the path + let new_node = TreeData { + path: new_path, + ..path.last().unwrap().lock().data.clone() + }; + + // Remove the old nodes + domain.children.remove(i); + domain.children.push(Arc::new(Mutex::new(TreeNode { + data: new_node, + children: vec![], + }))); + } + } + } Ok(()) } }