From 23ced073d5f21638811484487debf63315d0e3a8 Mon Sep 17 00:00:00 2001
From: Jarell <91372088+jarelllama@users.noreply.github.com>
Date: Wed, 3 Apr 2024 12:26:06 +0800
Subject: [PATCH] Add error logging

---
 config/source_log.csv         |  2 +-
 functions/retrieve_domains.sh | 30 ++++++++++++++++++++++--------
 2 files changed, 23 insertions(+), 9 deletions(-)
diff --git a/config/source_log.csv b/config/source_log.csv
index 52651f27f..378c5f4ef 100644
--- a/config/source_log.csv
+++ b/config/source_log.csv
@@ -1,4 +1,4 @@
-Time,Source,Search Term,Raw Count,Final Count,Whitelisted,Dead,Redundant,Parked,Toplist Count,Toplist Domains,Query Count,Rate limited,Saved
+Time,Source,Search Term,Raw Count,Final Count,Whitelisted,Dead,Redundant,Parked,Toplist Count,Toplist Domains,Query Count,Error,Saved
 02:50:05 23-03-24,aa419.org,aa419.org,4255,5,1,499,0,0,0,,0,,no
 02:50:05 23-03-24,guntab.com,guntab.com,1765,0,1,237,0,0,0,,0,,no
 02:50:05 23-03-24,petscams.com,petscams.com,718,0,0,62,2,0,0,,0,,no
diff --git a/functions/retrieve_domains.sh b/functions/retrieve_domains.sh
index 949394b4f..17d473a23 100644
--- a/functions/retrieve_domains.sh
+++ b/functions/retrieve_domains.sh
@@ -57,9 +57,10 @@ source() {
 process_source() {
     [[ ! -f "$results_file" ]] && return
 
-    # Skip to next source if no results retrieved
+    # Check if any results were retrieved
     # [ -s ] does not seem to work well here
     if ! grep -q '[a-z]' "$results_file"; then
+        local empty=true
         log_source
         rm "$results_file"
         return
@@ -252,20 +253,34 @@ decide_exit() {
 # otherwise, the default values are used.
 log_source() {
     local item
-    total_whitelisted_count="$(( whitelisted_count + whitelisted_tld_count ))"
-    excluded_count="$(( dead_count + redundant_count + parked_count ))"
+    local error
 
     if [[ "$source" == 'Google Search' ]]; then
         search_term="\"${search_term:0:100}...\""
         item="$search_term"
     fi
 
+    if [[ "$rate_limited" == true ]]; then
+        error='rate_limited'
+    elif [[ "$empty" == true ]]; then
+        error='empty'
+    fi
+
+    total_whitelisted_count="$(( whitelisted_count + whitelisted_tld_count ))"
+    excluded_count="$(( dead_count + redundant_count + parked_count ))"
+
     echo "${TIME_FORMAT},${source},${search_term},${unfiltered_count:-0},\
 ${filtered_count:-0},${total_whitelisted_count},${dead_count:-0},${redundant_count},\
 ${parked_count:-0},${toplist_count:-0},$(printf "%s" "$domains_in_toplist" | tr '\n' ' '),\
-${query_count:-0},${rate_limited:-false},no" >> "$SOURCE_LOG"
+${query_count:-0},${error},no" >> "$SOURCE_LOG"
 
     printf "\n\e[1mSource: %s\e[0m\n" "${item:-$source}"
+
+    if [[ "$empty" == true ]]; then
+        printf "\e[1m;31No results retrieved. Potential error occurred.\e[0m\n"
+        return
+    fi
+
     printf "Raw:%4s  Final:%4s  Whitelisted:%4s  Excluded:%4s  Toplist:%4s\n" \
         "${unfiltered_count:-0}" "${filtered_count:-0}" \
         "$total_whitelisted_count" "$excluded_count" "${toplist_count:-0}"
@@ -495,15 +510,14 @@ source_scamadviser() {
 
     [[ "$USE_EXISTING" == true ]] && { process_source; return; }
 
+    touch "$results_file"  # Create results file to ensure proper logging
+
     local url='https://www.scamadviser.com/articles'
     for page in {1..20}; do  # Loop through pages
         page_results="$(curl -s "${url}?p=${page}")"  # Trailing slash breaks curl
 
         # Stop if page has an error
-        if ! grep -qiF 'article' <<< "$page_results"; then
-            printf "\e[1mError retrieving results for scamadviser.com.\e[0m\n"
-            break
-        fi
+        ! grep -qiF 'article' <<< "$page_results" && break
 
         grep -oE '<div class="articles">.*<div>Read more</div>' <<< "$page_results" \
             | grep -oE '[A-Z][[:alnum:].-]+\.[[:alnum:]-]{2,}' >> "$results_file"