From 23ced073d5f21638811484487debf63315d0e3a8 Mon Sep 17 00:00:00 2001 From: Jarell <91372088+jarelllama@users.noreply.github.com> Date: Wed, 3 Apr 2024 12:26:06 +0800 Subject: [PATCH] Add error logging --- config/source_log.csv | 2 +- functions/retrieve_domains.sh | 30 ++++++++++++++++++++++-------- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/config/source_log.csv b/config/source_log.csv index 52651f27f..378c5f4ef 100644 --- a/config/source_log.csv +++ b/config/source_log.csv @@ -1,4 +1,4 @@ -Time,Source,Search Term,Raw Count,Final Count,Whitelisted,Dead,Redundant,Parked,Toplist Count,Toplist Domains,Query Count,Rate limited,Saved +Time,Source,Search Term,Raw Count,Final Count,Whitelisted,Dead,Redundant,Parked,Toplist Count,Toplist Domains,Query Count,Error,Saved 02:50:05 23-03-24,aa419.org,aa419.org,4255,5,1,499,0,0,0,,0,,no 02:50:05 23-03-24,guntab.com,guntab.com,1765,0,1,237,0,0,0,,0,,no 02:50:05 23-03-24,petscams.com,petscams.com,718,0,0,62,2,0,0,,0,,no diff --git a/functions/retrieve_domains.sh b/functions/retrieve_domains.sh index 949394b4f..17d473a23 100644 --- a/functions/retrieve_domains.sh +++ b/functions/retrieve_domains.sh @@ -57,9 +57,10 @@ source() { process_source() { [[ ! -f "$results_file" ]] && return - # Skip to next source if no results retrieved + # Check if any results were retrieved # [ -s ] does not seem to work well here if ! grep -q '[a-z]' "$results_file"; then + local empty=true log_source rm "$results_file" return @@ -252,20 +253,34 @@ decide_exit() { # otherwise, the default values are used. log_source() { local item - total_whitelisted_count="$(( whitelisted_count + whitelisted_tld_count ))" - excluded_count="$(( dead_count + redundant_count + parked_count ))" + local error if [[ "$source" == 'Google Search' ]]; then search_term="\"${search_term:0:100}...\"" item="$search_term" fi + if [[ "$rate_limited" == true ]]; then + error='rate_limited' + elif [[ "$empty" == true ]]; then + error='empty' + fi + + total_whitelisted_count="$(( whitelisted_count + whitelisted_tld_count ))" + excluded_count="$(( dead_count + redundant_count + parked_count ))" + echo "${TIME_FORMAT},${source},${search_term},${unfiltered_count:-0},\ ${filtered_count:-0},${total_whitelisted_count},${dead_count:-0},${redundant_count},\ ${parked_count:-0},${toplist_count:-0},$(printf "%s" "$domains_in_toplist" | tr '\n' ' '),\ -${query_count:-0},${rate_limited:-false},no" >> "$SOURCE_LOG" +${query_count:-0},${error},no" >> "$SOURCE_LOG" printf "\n\e[1mSource: %s\e[0m\n" "${item:-$source}" + + if [[ "$empty" == true ]]; then + printf "\e[1m;31No results retrieved. Potential error occurred.\e[0m\n" + return + fi + printf "Raw:%4s Final:%4s Whitelisted:%4s Excluded:%4s Toplist:%4s\n" \ "${unfiltered_count:-0}" "${filtered_count:-0}" \ "$total_whitelisted_count" "$excluded_count" "${toplist_count:-0}" @@ -495,15 +510,14 @@ source_scamadviser() { [[ "$USE_EXISTING" == true ]] && { process_source; return; } + touch "$results_file" # Create results file to ensure proper logging + local url='https://www.scamadviser.com/articles' for page in {1..20}; do # Loop through pages page_results="$(curl -s "${url}?p=${page}")" # Trailing slash breaks curl # Stop if page has an error - if ! grep -qiF 'article' <<< "$page_results"; then - printf "\e[1mError retrieving results for scamadviser.com.\e[0m\n" - break - fi + ! grep -qiF 'article' <<< "$page_results" && break grep -oE '
.*
Read more
' <<< "$page_results" \ | grep -oE '[A-Z][[:alnum:].-]+\.[[:alnum:]-]{2,}' >> "$results_file"