forked from TheBlewish/Automated-AI-Web-Researcher-Ollama
-
Notifications
You must be signed in to change notification settings - Fork 0
/
system_config.py
147 lines (134 loc) · 4.57 KB
/
system_config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
"""
System-wide configuration settings for Web Scraper, Logging, and Research components
"""
import logging
import logging.handlers
# Web Scraper Configuration
SCRAPER_CONFIG = {
"user_agent": "WebLLMAssistant/1.0 (+https://github.com/YourUsername/Web-LLM-Assistant-Llama-cpp)",
"rate_limit": 1, # Seconds between requests to same domain
"timeout": 10, # Request timeout in seconds
"max_retries": 3, # Number of retry attempts for failed requests
"max_workers": 5, # Maximum number of concurrent scraping threads
"content_limits": {
"max_content_length": 2400, # Maximum characters to extract from content
"max_links": 10 # Maximum number of links to extract
},
"respect_robots_txt": False # Whether to respect robots.txt
}
# Search Provider Configuration
SEARCH_CONFIG = {
"default_provider": "duckduckgo", # Default search provider to use
"fallback_order": [ # Order of providers to try if default fails
"exa",
"bing",
"brave",
"tavily",
"duckduckgo" # Keep DuckDuckGo as final fallback
],
"provider_settings": {
"tavily": {
"search_depth": "basic",
"max_results": 5,
"include_answer": True,
"include_images": False
},
"brave": {
"max_results": 10
},
"bing": {
"max_results": 10,
"freshness": "Month" # Time range for results
},
"exa": {
"max_results": 10,
"use_highlights": True
},
"duckduckgo": {
"max_results": 10,
"region": "wt-wt", # Worldwide results
"safesearch": "off"
}
},
"rate_limiting": {
"requests_per_minute": 10,
"cooldown_period": 60 # Seconds to wait after hitting rate limit
}
}
# System-wide Logging Configuration
LOGGING_CONFIG = {
"level": logging.INFO,
"format": "%(asctime)s - %(levelname)s - %(message)s",
"handlers": {
"console": {
"enabled": True,
"level": logging.INFO
},
"file": {
"enabled": True,
"level": logging.DEBUG,
"filename": "web_llm.log",
"max_bytes": 1024 * 1024, # 1MB
"backup_count": 3
}
}
}
# Research Configuration
RESEARCH_CONFIG = {
"search": {
"max_searches_per_cycle": 5,
"max_results_per_search": 10,
"min_relevance_score": 0.6
},
"content": {
"max_document_size": 12000, # Maximum size of research document in characters
"max_chunk_size": 2000, # Maximum size of content chunks for processing
"min_chunk_size": 100 # Minimum size of content chunks to process
},
"storage": {
"auto_save": True,
"auto_save_interval": 150, # Auto-save interval in seconds
"backup_enabled": True,
"max_backups": 2
},
"rate_limiting": {
"requests_per_minute": 60,
"concurrent_requests": 5,
"cooldown_period": 60 # Seconds to wait after hitting rate limit
}
}
def setup_logging():
"""Configure logging based on LOGGING_CONFIG settings"""
logging.basicConfig(
level=LOGGING_CONFIG["level"],
format=LOGGING_CONFIG["format"]
)
logger = logging.getLogger()
# Clear existing handlers
logger.handlers.clear()
# Console handler
if LOGGING_CONFIG["handlers"]["console"]["enabled"]:
console_handler = logging.StreamHandler()
console_handler.setLevel(LOGGING_CONFIG["handlers"]["console"]["level"])
console_handler.setFormatter(logging.Formatter(LOGGING_CONFIG["format"]))
logger.addHandler(console_handler)
# File handler
if LOGGING_CONFIG["handlers"]["file"]["enabled"]:
file_handler = logging.handlers.RotatingFileHandler(
LOGGING_CONFIG["handlers"]["file"]["filename"],
maxBytes=LOGGING_CONFIG["handlers"]["file"]["max_bytes"],
backupCount=LOGGING_CONFIG["handlers"]["file"]["backup_count"]
)
file_handler.setLevel(LOGGING_CONFIG["handlers"]["file"]["level"])
file_handler.setFormatter(logging.Formatter(LOGGING_CONFIG["format"]))
logger.addHandler(file_handler)
return logger
def get_scraper_config():
"""Get the web scraper configuration"""
return SCRAPER_CONFIG
def get_research_config():
"""Get the research configuration"""
return RESEARCH_CONFIG
def get_search_config():
"""Get the search provider configuration"""
return SEARCH_CONFIG