-
Notifications
You must be signed in to change notification settings - Fork 301
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #182 from manikumarreddyu/automated
Github-Automated-Analysis
- Loading branch information
Showing
5 changed files
with
447 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
share/python-wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
MANIFEST | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.nox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
*.py,cover | ||
.hypothesis/ | ||
.pytest_cache/ | ||
cover/ | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
local_settings.py | ||
db.sqlite3 | ||
db.sqlite3-journal | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
.pybuilder/ | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# IPython | ||
profile_default/ | ||
ipython_config.py | ||
|
||
# pyenv | ||
# For a library or package, you might want to ignore these files since the code is | ||
# intended to run in multiple environments; otherwise, check them in: | ||
# .python-version | ||
|
||
# pipenv | ||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | ||
# However, in case of collaboration, if having platform-specific dependencies or dependencies | ||
# having no cross-platform support, pipenv may install dependencies that don't work, or not | ||
# install all needed dependencies. | ||
#Pipfile.lock | ||
|
||
# poetry | ||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. | ||
# This is especially recommended for binary packages to ensure reproducibility, and is more | ||
# commonly ignored for libraries. | ||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control | ||
#poetry.lock | ||
|
||
# pdm | ||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. | ||
#pdm.lock | ||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it | ||
# in version control. | ||
# https://pdm.fming.dev/#use-with-ide | ||
.pdm.toml | ||
|
||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm | ||
__pypackages__/ | ||
|
||
# Celery stuff | ||
celerybeat-schedule | ||
celerybeat.pid | ||
|
||
# SageMath parsed files | ||
*.sage.py | ||
|
||
# Environments | ||
.env | ||
.venv | ||
env/ | ||
venv/ | ||
ENV/ | ||
env.bak/ | ||
venv.bak/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
.spyproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# mkdocs documentation | ||
/site | ||
|
||
# mypy | ||
.mypy_cache/ | ||
.dmypy.json | ||
dmypy.json | ||
|
||
# Pyre type checker | ||
.pyre/ | ||
|
||
# pytype static type analyzer | ||
.pytype/ | ||
|
||
# Cython debug symbols | ||
cython_debug/ | ||
|
||
# PyCharm | ||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can | ||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore | ||
# and can be added to the global gitignore or merged into this file. For a more nuclear | ||
# option (not recommended) you can uncomment the following to ignore the entire idea folder. | ||
#.idea/ | ||
|
||
|
||
.vscode | ||
.streamlit |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,229 @@ | ||
import streamlit as st | ||
import requests | ||
import os | ||
from dotenv import load_dotenv | ||
import pandas as pd | ||
from langchain.embeddings.openai import OpenAIEmbeddings | ||
from langchain.prompts import PromptTemplate | ||
from langchain.document_loaders import CSVLoader,TextLoader,DirectoryLoader | ||
from langchain.text_splitter import CharacterTextSplitter | ||
from langchain.chains import RetrievalQA | ||
from langchain.llms import OpenAI | ||
from langchain.vectorstores import FAISS | ||
import utils.config as config | ||
from github import Github | ||
from utils.constants import * | ||
|
||
load_dotenv() | ||
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY') | ||
os.environ['GITHUB_TOKEN'] = os.getenv('GITHUB_TOKEN') | ||
os.environ['ACTIVELOOP_TOKEN'] = os.getenv('ACTIVELOOP_TOKEN') | ||
|
||
|
||
|
||
st.set_page_config(page_title="GitHub Repositories List" , page_icon=":computer:" , layout="wide" , initial_sidebar_state="expanded") | ||
|
||
|
||
|
||
|
||
# Function to fetch GitHub repositories | ||
@st.cache_data # Cache data so that we don't have to fetch it again | ||
def fetch_github_repos(username): | ||
# url = f'https://api.github.com/users/{username}/repos' | ||
# response = requests.get(url) | ||
# if response.status_code == 200: | ||
# return response.json() | ||
# else: | ||
# return None | ||
repos = [] | ||
page = 1 | ||
while True: | ||
url = f"https://api.github.com/users/{username}/repos?page={page}&per_page=50" | ||
response = requests.get(url) | ||
data = response.json() | ||
if not data: | ||
break | ||
repos.extend([(repo) for repo in data]) | ||
page += 1 | ||
return repos | ||
|
||
# Function to display repositories | ||
def display_repos(repos): | ||
for repo in repos: | ||
repo_name = repo["name"] | ||
repo_url = repo["html_url"] | ||
st.write(f"[{repo_name}]({repo_url})") | ||
|
||
|
||
|
||
def get_user_repos(username): | ||
"""Gets the repository information of each of the repositories of a GitHub user. | ||
Args: | ||
username: The username of the GitHub user. | ||
Returns: | ||
A list of dictionaries, where each dictionary contains the information of a repository. | ||
""" | ||
client = Github() | ||
|
||
user = client.get_user(username) | ||
repos = user.get_repos() | ||
|
||
repo_info = [] | ||
|
||
for repo in repos: | ||
|
||
repo_info.append({ | ||
"name": repo.name, | ||
"description": repo.description, | ||
"language": repo.language, | ||
"stars": repo.stargazers_count, | ||
"forks": repo.forks_count, | ||
"labels": repo.get_labels(), | ||
"issues": repo.get_issues(state="all"), | ||
"contents" : repo.get_contents(""), | ||
|
||
}) | ||
|
||
repo_info_df = pd.DataFrame(repo_info) | ||
repo_info_df.to_csv("repo_data.csv") | ||
|
||
loader = CSVLoader(file_path="repo_data.csv", encoding ="utf-8") | ||
csv_data = loader.load() | ||
csv_embeddings = OpenAIEmbeddings() | ||
vectors = FAISS.from_documents(csv_data, csv_embeddings) | ||
|
||
# Create a question-answering chain using the index | ||
|
||
context = """ You are Supersmart Github Repository AI system. You are a superintelligent AI that answers questions about Github Repositories and can understand the technical complexity if the repo. | ||
You are: | ||
- helpful & friendly | ||
- good at answering complex questions in simple language | ||
- an expert in all programming languages | ||
- able to infer the intent of the user's question | ||
Remember You are an inteelligent CSV Agent who can understand CSV files and their contents. You are given a CSV file with the following columns: Repository Name, Repository Link, Analysis. You are asked to find the most technically complex and challenging repository from the given CSV file. | ||
To measure the technical complexity of a GitHub repository using the provided API endpoints, You will analyze various factors such as the number of commits, branches, pull requests, issues,contents , number of forks , stars , and contributors. Additionally, you will consider the programming languages used, the size of the codebase, and the frequency of updates. | ||
You will Analyze the following GitHub repository factors to determine the technical complexity of the codebase and calculate a complexity score for each project: | ||
1.Description | ||
2.languages used in the repository | ||
3.Number of stars | ||
4.Number of forks | ||
5.Labels of the repository | ||
6.Description of the repository | ||
7.Contents of the repository | ||
You can consider other factors as well if you think they are relevant for determining the technical complexity of a GitHub repository. | ||
Calculate the complexity score for each project by assigning weights to each factor and summing up the weighted scores. | ||
The project with the highest complexity score will be considered the most technically complex. | ||
Here is the approach or chain-of-thought process , you can use to reach to the solution : | ||
Step 1: Analyze each row and it's contents in the CSV file , each Row represents a Github Repository | ||
""" | ||
|
||
prompt_template = """ | ||
Understand the following to answer the question in an efficient way | ||
{context} | ||
Question: {question} | ||
Now answer the question. Let's think step by step:""" | ||
PROMPT = PromptTemplate( | ||
template=prompt_template, input_variables=["context", "question"] | ||
) | ||
|
||
|
||
chain_type_kwargs = {"prompt": PROMPT} | ||
|
||
chain = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=vectors.as_retriever(), input_key="question" , chain_type_kwargs=chain_type_kwargs) | ||
|
||
|
||
st.subheader("Most Technically Complex Github Repository is") | ||
|
||
query = f""" | ||
Which is the most technically challenging repository from the given CSV file? | ||
Return the name of the repository , the link to the repository and the analysis of the repository showing why it is the most technically challenging/Complex repository.Try to provide a detailed analysis to hold your answer strong | ||
The output should be in the following format: | ||
Repository Name: <name of the repository> | ||
Repository Link: <link to the repository> | ||
Analysis: <analysis of the repository> | ||
Provide a clickable link to the repository as well like this: | ||
To get the repo url , you can use this format : | ||
The username is : "{username}" | ||
"https://github.com/{username}/repository_name" | ||
[Repository Name](Repository Link) --> This is Important.Don't skip it | ||
Let's think step by step about how to answer this question: | ||
""" | ||
result = chain({"question": query}) | ||
if result is not None: | ||
st.write(result['result']) | ||
else: | ||
st.info("Please wait..") | ||
st.stop() | ||
|
||
|
||
|
||
|
||
# Main app | ||
def main(): | ||
config.init() | ||
# Set up the app title and sidebar | ||
st.title("GitHub Automated Analysis Tool") | ||
st.sidebar.title("GitHub Automated Analysis Tool") | ||
|
||
# Input field for GitHub username | ||
username = st.sidebar.text_input("Enter GitHub Username") | ||
|
||
# Submit and clear buttons | ||
submit_button = st.sidebar.button("Submit") | ||
clear_button = st.sidebar.button("Clear") | ||
st.sidebar.header("About") | ||
st.sidebar.info("This Python-based tool , when given a GitHub user's URL, returns the most technically complex and challenging repository from that user's profile. The tool will use GPT and LangChain to assess each repository individually before determining the most technically challenging one.") | ||
st.divider() | ||
st.sidebar.write("This tool is created by [MANI KUMAR REDDY U](https/github.com/manikumarreddyu).") | ||
|
||
# Display the repositories | ||
if submit_button: | ||
st.subheader(f"Repositories for {username}") | ||
repos = fetch_github_repos(username) | ||
if repos: | ||
display_repos(repos) | ||
st.info("Analysis of the repositories using LangChain and ChatGPT started. Please wait...") | ||
get_user_repos(username) | ||
st.error("Invalid username or unable to fetch repositories") | ||
|
||
# Clear the input field | ||
if clear_button: | ||
username = "" | ||
st.experimental_rerun() | ||
|
||
|
||
|
||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.