Skip to content

Commit

Permalink
Merge pull request #630 from ScrapeGraphAI/595-rate-limit-error
Browse files Browse the repository at this point in the history
feat(AbstractGraph): add adjustable rate limit
  • Loading branch information
VinciGit00 authored Sep 6, 2024
2 parents 4347afb + 2859fb7 commit c382b9d
Show file tree
Hide file tree
Showing 16 changed files with 699 additions and 0 deletions.
48 changes: 48 additions & 0 deletions examples/anthropic/rate_limit_haiku.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""
Basic example of scraping pipeline using SmartScraper while setting an API rate limit.
"""

import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info


# required environment variables in .env
# ANTHROPIC_API_KEY
load_dotenv()

# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************

graph_config = {
"llm": {
"api_key": os.getenv("ANTHROPIC_API_KEY"),
"model": "anthropic/claude-3-haiku-20240307",
"rate_limit": {
"requests_per_second": 1
}
},
}

smart_scraper_graph = SmartScraperGraph(
prompt="""Don't say anything else. Output JSON only. List me all the events, with the following fields: company_name, event_name, event_start_date, event_start_time,
event_end_date, event_end_time, location, event_mode, event_category,
third_party_redirect, no_of_days,
time_in_hours, hosted_or_attending, refreshments_type,
registration_available, registration_link""",
# also accepts a string with the already downloaded HTML code
source="https://www.hmhco.com/event",
config=graph_config
)

result = smart_scraper_graph.run()
print(result)

# ************************************************
# Get graph execution info
# ************************************************

graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
57 changes: 57 additions & 0 deletions examples/azure/rate_limit_azure.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""
Basic example of scraping pipeline using SmartScraper with a custom rate limit
"""

import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info


# required environment variable in .env
# AZURE_OPENAI_ENDPOINT
# AZURE_OPENAI_CHAT_DEPLOYMENT_NAME
# MODEL_NAME
# AZURE_OPENAI_API_KEY
# OPENAI_API_TYPE
# AZURE_OPENAI_API_VERSION
# AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME
load_dotenv()


# ************************************************
# Initialize the model instances
# ************************************************

graph_config = {
"llm": {
"api_key": os.environ["AZURE_OPENAI_KEY"],
"model": "azure_openai/gpt-3.5-turbo",
"rate_limit": {
"requests_per_second": 1
},
},
"verbose": True,
"headless": False
}

smart_scraper_graph = SmartScraperGraph(
prompt="""List me all the events, with the following fields: company_name, event_name, event_start_date, event_start_time,
event_end_date, event_end_time, location, event_mode, event_category,
third_party_redirect, no_of_days,
time_in_hours, hosted_or_attending, refreshments_type,
registration_available, registration_link""",
# also accepts a string with the already downloaded HTML code
source="https://www.hmhco.com/event",
config=graph_config
)

result = smart_scraper_graph.run()
print(result)

# ************************************************
# Get graph execution info
# ************************************************

graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
47 changes: 47 additions & 0 deletions examples/bedrock/rate_limit_bedrock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""
Basic example of scraping pipeline using SmartScraper with a custom rate limit
"""

import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info

load_dotenv()


# ************************************************
# Define the configuration for the graph
# ************************************************

graph_config = {
"llm": {
"client": "client_name",
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"temperature": 0.0,
"rate_limit": {
"requests_per_second": 1
},
}
}

# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************

smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description",
# also accepts a string with the already downloaded HTML code
source="https://perinim.github.io/projects/",
config=graph_config
)

result = smart_scraper_graph.run()
print(result)

# ************************************************
# Get graph execution info
# ************************************************

graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
49 changes: 49 additions & 0 deletions examples/deepseek/rate_limit_deepseek.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""
Basic example of scraping pipeline using SmartScraper with a custom rate limit
"""

import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info

load_dotenv()


# ************************************************
# Define the configuration for the graph
# ************************************************

deepseek_key = os.getenv("DEEPSEEK_APIKEY")

graph_config = {
"llm": {
"model": "deepseek/deepseek-chat",
"api_key": deepseek_key,
"rate_limit": {
"requests_per_second": 1
}
},
"verbose": True,
}

# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************

smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description.",
# also accepts a string with the already downloaded HTML code
source="https://perinim.github.io/projects/",
config=graph_config
)

result = smart_scraper_graph.run()
print(result)

# ************************************************
# Get graph execution info
# ************************************************

graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
49 changes: 49 additions & 0 deletions examples/ernie/rate_limit_ernie.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""
Basic example of scraping pipeline using SmartScraper with a custom rate limit
"""

import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info

load_dotenv()


# ************************************************
# Define the configuration for the graph
# ************************************************

graph_config = {
"llm": {
"model": "ernie/ernie-bot-turbo",
"ernie_client_id": "<ernie_client_id>",
"ernie_client_secret": "<ernie_client_secret>",
"temperature": 0.1,
"rate_limit": {
"requests_per_second": 1
},
},
"library": "beautifulsoup"
}

# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************

smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description",
# also accepts a string with the already downloaded HTML code
source="https://perinim.github.io/projects/",
config=graph_config,
)

result = smart_scraper_graph.run()
print(result)

# ************************************************
# Get graph execution info
# ************************************************

graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
50 changes: 50 additions & 0 deletions examples/fireworks/rate_limit_fireworks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""
Basic example of scraping pipeline using SmartScraper with a custom rate limit
"""

import os, json
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info

load_dotenv()


# ************************************************
# Define the configuration for the graph
# ************************************************

fireworks_api_key = os.getenv("FIREWORKS_APIKEY")

graph_config = {
"llm": {
"api_key": fireworks_api_key,
"model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct",
"rate_limit": {
"requests_per_second": 1
},
},
"verbose": True,
"headless": False,
}

# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************

smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description",
# also accepts a string with the already downloaded HTML code
source="https://perinim.github.io/projects/",
config=graph_config,
)

result = smart_scraper_graph.run()
print(json.dumps(result, indent=4))

# ************************************************
# Get graph execution info
# ************************************************

graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
47 changes: 47 additions & 0 deletions examples/google_genai/rate_limit_gemini.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""
Basic example of scraping pipeline using SmartScraper with a custom rate limit
"""

import os
from dotenv import load_dotenv
from scrapegraphai.utils import prettify_exec_info
from scrapegraphai.graphs import SmartScraperGraph
load_dotenv()


# ************************************************
# Define the configuration for the graph
# ************************************************

gemini_key = os.getenv("GOOGLE_APIKEY")

graph_config = {
"llm": {
"api_key": gemini_key,
"model": "google_genai/gemini-pro",
"rate_limit": {
"requests_per_second": 1
}
},
}

# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************

smart_scraper_graph = SmartScraperGraph(
prompt="List me all the news with their description.",
# also accepts a string with the already downloaded HTML code
source="https://www.wired.com",
config=graph_config
)

result = smart_scraper_graph.run()
print(result)

# ************************************************
# Get graph execution info
# ************************************************

graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
Loading

0 comments on commit c382b9d

Please sign in to comment.