-
Notifications
You must be signed in to change notification settings - Fork 0
/
readme_generator.py
161 lines (128 loc) · 5.91 KB
/
readme_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import json
import os
from typing import Dict, List
from openai import OpenAI
client = OpenAI(api_key=input("Enter OpenAI API Key: "))
# PROMPTS
FILE_IDENTIFICATION_SYSTEM_PROMPT = """# Task
You will be given a list of files in a code repository. Extract out a list of files that you would use the contents to write the different parts of a README (Project Title, Description, Installation, Usage, Configuration) for a code repository.
# Additional Info
For project title and description, look at the main files and any UI files that may hold information about what the project is.
Usage information should include any docker or other package manager instructions to build the project.
# Output
Your output will be a JSON object written in the following format. Do not write anything other than this JSON object
```json
{
"project_title": ["list", "of", "files"],
"project_description": ["list", "of", "other", "files"],
"installation": ["list", "of", "files"],
"usage": ["another", "list", "of", "files"],
"configuration": ["list", "of", "files"],
}
# CRITICAL INFORMATION
You must ENSURE that the correct paths are retained. Do not confuse any paths or else downstream tasks will fail.
```"""
SECTION_WRITING_SYSTEM_PROMPT = """# Task
You are an expert README writer. Your writing can be found in the best Open Source projects. For the given section of a README and the provided files, write the section as best as you can.
# Output
Just output the section, do not write anything else. Do NOT (!) write anything other than the specified section to write. DO NOT write other sections of a README.
For example, for the project title you would just write "<Project Title>", no markdown formatting or other text is necessary.
For the description section, you would not include information on installation, usage or configuration.
For the installation section, you would not include information on usage or configuration.
For the usage section, this section would be specific to how the user could use the project.
For the configuration section, you would just write about what the user needs to configure to run the project successfully (environment variables and .env files to create).
"""
FORMAT_SYSTEM_PROMPT = """# Task
With the provided information about a code repository, format the text into a README file with the following sections.
```markdown
# {title}
## Description
{description}
## Installation
{installation}
## Usage
{usage}
## Configuration
{configuration}
```
Do not (!) write anything about the repo that you do not know for certain. Some of the sections may have oddities from the extraction portion of this pipeline, feel free to make slight corrections such as removing template text.
Write your output exactly as above. Do not write anything besides this output markdown
"""
DIRS_TO_IGNORE = ['.git', 'venv']
FILES_TO_IGNORE = ['.DS_Store']
# helper functions
def list_files(directory: str) -> List[str]:
"""List all files in the directory and subdirectories, excluding anything under the .git/ directory."""
if directory == ".":
directory = os.getcwd()
else:
directory = os.path.abspath(directory)
file_list = []
for root, dirs, files in os.walk(directory):
# Exclude the .git directory
dirs[:] = [d for d in dirs if d not in DIRS_TO_IGNORE]
for file in files:
if file not in FILES_TO_IGNORE:
file_list.append(os.path.join(root, file))
return file_list
# OpenAI functions
def decide_interesting_files(file_list: List[str]) -> Dict:
"""Use GPT to decide which files are interesting for different parts of the README."""
response = client.chat.completions.create(
model="gpt-4o",
temperature=0,
messages=[
{"role": "system", "content": FILE_IDENTIFICATION_SYSTEM_PROMPT},
{"role": "user", "content": f"{file_list}"}
],
max_tokens=500
)
output = response.choices[0].message.content.strip()
output = output.replace("```json", "").replace("```", "")
return json.loads(output)
def generate_readme_section(section: str, file_list: List[str]) -> str:
"""Use GPT to write section of README"""
file_context = f"# Section to Write\n{section}\n\n"
for file in file_list:
with open(file, 'r') as f:
file_context += f"## File name: {file}\nContent:\n{f.read()}\n\n"
response = client.chat.completions.create(
model="gpt-4o",
temperature=0,
messages=[
{"role": "system", "content": SECTION_WRITING_SYSTEM_PROMPT},
{"role": "user", "content": file_context}
],
max_tokens=500
)
return response.choices[0].message.content.strip()
def format_extracted_information(extracted_info: Dict) -> str:
"""Format different sections into one text output"""
response = client.chat.completions.create(
model="gpt-4o",
temperature=0,
messages=[
{"role": "system", "content": FORMAT_SYSTEM_PROMPT},
{"role": "user", "content": f"{extracted_info}"}
],
max_tokens=2000
)
return response.choices[0].message.content.strip()
def main():
desired_path = input("Enter path to repo: ")
files_at_path = list_files(desired_path)
if files_at_path:
file_list = decide_interesting_files(files_at_path)
output_sections = {}
for section, section_files in file_list.items():
output_sections[section] = generate_readme_section(section, section_files)
output_readme = format_extracted_information(output_sections)
output_file_path = os.path.join(desired_path, "GENERATED_README.md")
# Write the output to the file
with open(output_file_path, "w") as output_file:
output_file.write(output_readme)
print(f"wrote readme to {output_file_path}")
else:
print("didn't find any files")
if __name__ == "__main__":
main()