Skip to content

Commit

Permalink
Add email unsubscribe functionality with HTML parsing and link extrac…
Browse files Browse the repository at this point in the history
…tion
  • Loading branch information
Wambaforestin committed Nov 30, 2024
1 parent 7667e30 commit a6c3535
Show file tree
Hide file tree
Showing 3 changed files with 238 additions and 0 deletions.
122 changes: 122 additions & 0 deletions del_unsubscribe_email/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyderworkspace

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# txt file that contains the unsubscribe links
unsubscribe_links.txt
116 changes: 116 additions & 0 deletions del_unsubscribe_email/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
from dotenv import load_dotenv
import imaplib # Library for connecting to email using IMAP
import email # Library for processing email content
from tqdm import tqdm # For showing a progress bar
from bs4 import BeautifulSoup # For parsing HTML content
import requests # For sending HTTP requests
import os

# Load environment variables (e.g., email credentials) from a .env file
load_dotenv()

# Function to connect to your email account
def connect_to_email():
email_address = os.getenv("EMAIL_ADDRESS")
email_password = os.getenv("EMAIL_PASSWORD")

# Check if the credentials are provided
if not email_address or not email_password:
raise ValueError("Email or password is not set. Check your .env file.")

try:
# Show progress while connecting to the email server
with tqdm(total=3, desc="Connecting to email") as pbar:
mail = imaplib.IMAP4_SSL("imap.gmail.com") # Connect to Gmail's IMAP server
pbar.update(1)

# Log in using your email address and password
mail.login(email_address, email_password)
pbar.update(1)

mail.select("inbox", readonly=False) # Select the inbox to access email messages and use readonly=False to allow message deletion if needed
pbar.update(1)

print("Connected to email successfully!")
return mail
except imaplib.IMAP4.error as e:
print(f"Failed to connect: {e}")
raise

# Function to extract unsubscribe links from HTML content in emails
def extract_emails_from_html(html_content):
# Debugging: Show the first 200 characters of the email content to understand the input
print("HTML Content to Parse:", html_content[:200])
# Check if the email content is valid HTML (it should start with "<")
if not html_content.strip().startswith("<"):
print("Skipped non-HTML content.")
return [] # return an empty list if the content is not HTML
soup = BeautifulSoup(html_content, "html.parser") # Parse the HTML content using BeautifulSoup
# Find all links in the HTML that contain the word "unsubscribe"
links = [a["href"] for a in soup.find_all("a", href=True) if "unsubscribe" in a["href"]]

return links

def click_unsubscribe_link(link):
try:
response = requests.get(link) # Send a GET request to the unsubscribe link
if response.status_code == 200:
print(f"Unsubscribed successfully from: {link}")
else:
print(f"Failed to unsubscribe from: {link}, error code: {response.status_code}")
except requests.exceptions.RequestException as e:
print(f"Failed to connect to the link: {link}")
print(e)

# Function to search for emails and extract unsubscribe links
def search_for_emails():
mail = connect_to_email() # Connect to your email account using the function.

# Search for emails that contain the word "unsubscribe" in their body
_, search_data = mail.search(None, '(BODY "unsubscribe")')
data = search_data[0].split() # Get a list of email IDs that match the search

# List to store all unsubscribe links found
links = []

# Loop through each email ID
for number in data:
_, message_data = mail.fetch(number, "(RFC822)")
message = email.message_from_bytes(message_data[0][1])

# Check if the email has multiple parts (e.g., plain text and HTML)
if message.is_multipart():
for part in message.walk():
# Process plain text and HTML parts
if part.get_content_type() in ["text/plain", "text/html"]:
body = part.get_payload(decode=True).decode()
links.extend(extract_emails_from_html(body))
else:
# If the email is not multipart, process it directly
body = message.get_payload(decode=True).decode()
links.extend(extract_emails_from_html(body))

mail.logout()

return links

#Fucntion to sava the links to a file
def save_links_to_file(links):
with open("unsubscribe_links.txt", "w") as file:
for link in links:
file.write(link + "\n")

# Testing the functions
result = search_for_emails()

# Check if any unsubscribe links were found and display them
if result:
print("Unsubscribe Links Found:")
for link in result:
# print(link)
click_unsubscribe_link(link)
else:
print("No unsubscribe links found.")

# Save the unsubscribe links to a file
save_links_to_file(result)
Binary file added del_unsubscribe_email/requirements.txt
Binary file not shown.

0 comments on commit a6c3535

Please sign in to comment.