From 081e87ac6e43a2f94e3377ab899cf984a0a9c9ad Mon Sep 17 00:00:00 2001 From: madjin <32600939+madjin@users.noreply.github.com> Date: Tue, 17 Dec 2024 16:21:35 -0500 Subject: [PATCH] add daily cronjob --- .github/workflows/weekly-summaries.yml | 44 ++- scripts/summarize_daily.py | 413 +++++++++++++++++++++++++ 2 files changed, 451 insertions(+), 6 deletions(-) create mode 100644 scripts/summarize_daily.py diff --git a/.github/workflows/weekly-summaries.yml b/.github/workflows/weekly-summaries.yml index 7c94e9e..07ba163 100644 --- a/.github/workflows/weekly-summaries.yml +++ b/.github/workflows/weekly-summaries.yml @@ -1,8 +1,9 @@ name: Contributor Updates on: schedule: - - cron: '0 19 * * 5' # Weekly on Friday at 2:00 PM EST - - cron: '0 19 1 * *' # Monthly on 1st at 2:00 PM EST + - cron: '0 19 * * *' # Daily at 5:00 PM EST + - cron: '0 19 * * 5' # Weekly on Friday at 5:00 PM EST + - cron: '0 19 4 * *' # Monthly on 4th at 5:00 PM EST workflow_dispatch: permissions: @@ -39,9 +40,23 @@ jobs: run: | echo "TIMESTAMP=$(date +'%Y_%m_%d')" >> $GITHUB_ENV echo "IS_MONTH_START=$(date +'%d')" >> $GITHUB_ENV + echo "IS_FRIDAY=$(date +'%u')" >> $GITHUB_ENV + + - name: Fetch daily data + if: github.event.schedule != '0 19 * * 5' && github.event.schedule != '0 19 4 * *' + env: + GH_TOKEN: ${{ secrets.GH_ACCESS_TOKEN }} + run: | + # Create directories + mkdir -p data/daily data/daily/history + + # Fetch current data with timestamp + bash scripts/fetch_github.sh ai16z eliza --type prs --days 1 | tee data/daily/prs.json data/daily/history/prs_${TIMESTAMP}.json + bash scripts/fetch_github.sh ai16z eliza --type issues --days 1 | tee data/daily/issues.json data/daily/history/issues_${TIMESTAMP}.json + bash scripts/fetch_github.sh ai16z eliza --type commits --days 1 | tee data/daily/commits.json data/daily/history/commits_${TIMESTAMP}.json - name: Fetch weekly data - if: github.event.schedule != '0 19 1 * *' + if: github.event.schedule == '0 19 * * 5' env: GH_TOKEN: ${{ secrets.GH_ACCESS_TOKEN }} run: | @@ -54,7 +69,7 @@ jobs: bash scripts/fetch_github.sh ai16z eliza --type commits --days 7 | tee data/weekly/commits.json data/weekly/history/commits_${TIMESTAMP}.json - name: Fetch monthly data - if: github.event.schedule == '0 19 1 * *' + if: github.event.schedule == '0 19 4 * *' env: GH_TOKEN: ${{ secrets.GH_ACCESS_TOKEN }} run: | @@ -65,23 +80,40 @@ jobs: bash scripts/fetch_github.sh ai16z eliza --type prs --days 30 | tee data/monthly/prs.json data/monthly/history/prs_${TIMESTAMP}.json bash scripts/fetch_github.sh ai16z eliza --type issues --days 30 | tee data/monthly/issues.json data/monthly/history/issues_${TIMESTAMP}.json bash scripts/fetch_github.sh ai16z eliza --type commits --days 30 | tee data/monthly/commits.json data/monthly/history/commits_${TIMESTAMP}.json + + - name: Process daily data + if: github.event.schedule != '0 19 * * 5' && github.event.schedule != '0 19 4 * *' + env: + GH_ACCESS_TOKEN: ${{ secrets.GH_ACCESS_TOKEN }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + [ -f data/daily/contributors.json ] && mv data/daily/contributors.json data/daily/history/contributors_${TIMESTAMP}.json || true + [ -f data/daily/summary.json ] && mv data/daily/summary.json data/daily/history/summary_${TIMESTAMP}.json || true + [ -f data/daily/summary.md ] && mv data/daily/summary.md data/daily/history/summary_${TIMESTAMP}.md || true + python scripts/combine.py -p data/daily/prs.json -i data/daily/issues.json -c data/daily/commits.json -o data/daily/combined.json + python scripts/calculate_scores.py data/daily/combined.json data/daily/scored.json + python scripts/summarize.py data/daily/scored.json data/daily/contributors.json --model openai + python scripts/summarize_daily.py data/daily/contributors.json -t json data/daily/summary.json --model openai + python scripts/summarize_daily.py data/daily/contributors.json -t md data/daily/summary.md --model openai - name: Process weekly data - if: github.event.schedule != '0 19 1 * *' + if: github.event.schedule == '0 19 * * 5' env: GH_ACCESS_TOKEN: ${{ secrets.GH_ACCESS_TOKEN }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: | + [ -f data/weekly/contributors.json ] && mv data/weekly/contributors.json data/weekly/history/contributors_${TIMESTAMP}.json || true python scripts/combine.py -p data/weekly/prs.json -i data/weekly/issues.json -c data/weekly/commits.json -o data/weekly/combined.json python scripts/calculate_scores.py data/weekly/combined.json data/weekly/scored.json python scripts/summarize.py data/weekly/scored.json data/weekly/contributors.json --model openai - name: Process monthly data - if: github.event.schedule == '0 19 1 * *' + if: github.event.schedule == '0 19 4 * *' env: GH_ACCESS_TOKEN: ${{ secrets.GH_ACCESS_TOKEN }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: | + [ -f data/monthly/contributors.json ] && mv data/monthly/contributors.json data/monthly/history/contributors_${TIMESTAMP}.json || true python scripts/combine.py -p data/monthly/prs.json -i data/monthly/issues.json -c data/monthly/commits.json -o data/monthly/combined.json python scripts/calculate_scores.py data/monthly/combined.json data/monthly/scored.json python scripts/summarize.py data/monthly/scored.json data/monthly/contributors.json --model openai diff --git a/scripts/summarize_daily.py b/scripts/summarize_daily.py new file mode 100644 index 0000000..f7b7bbf --- /dev/null +++ b/scripts/summarize_daily.py @@ -0,0 +1,413 @@ +import json +import os +import argparse +from datetime import datetime +from collections import Counter, defaultdict +from typing import List, Dict +from langchain_core.prompts import PromptTemplate +from langchain_ollama import ChatOllama + +def analyze_activity_metrics(data: List[Dict]) -> Dict: + """Generate detailed activity metrics with file-level analysis""" + metrics = defaultdict(int) + file_changes = defaultdict(lambda: {'adds': 0, 'dels': 0, 'changes': 0}) + pr_types = defaultdict(int) + issue_labels = defaultdict(int) + + for contributor in data: + # Process PRs + for pr in contributor['activity']['code']['pull_requests']: + if pr.get('merged'): + metrics['merged_prs'] += 1 + # Categorize PR types + title_lower = pr['title'].lower() + if 'feat:' in title_lower: + pr_types['features'] += 1 + elif 'fix:' in title_lower: + pr_types['fixes'] += 1 + elif 'chore:' in title_lower: + pr_types['chores'] += 1 + elif 'refactor:' in title_lower: + pr_types['refactors'] += 1 + + # Process file changes + for file in pr.get('files', []): + path = file['path'] + category = path.split('/')[0] if '/' in path else 'root' + file_changes[category]['adds'] += file.get('additions', 0) + file_changes[category]['dels'] += file.get('deletions', 0) + file_changes[category]['changes'] += 1 + + # Process Issues + for issue in contributor['activity']['issues']['opened']: + metrics['new_issues'] += 1 + for label in issue.get('labels', []): + issue_labels[label.get('name', 'unlabeled')] += 1 + + # Process Commits + metrics['total_commits'] += len(contributor['activity']['code']['commits']) + + return { + 'basic_metrics': { + 'contributors': len(data), + 'commits': metrics['total_commits'], + 'merged_prs': metrics['merged_prs'], + 'new_issues': metrics['new_issues'] + }, + 'pr_types': dict(pr_types), + 'file_changes': dict(file_changes), + 'issue_labels': dict(issue_labels) + } + +def generate_overview(metrics: Dict, changes: List[Dict]) -> str: + """Generate a concise 2-3 sentence overview of daily activities""" + # Get key areas of focus + areas = sorted(metrics['file_changes'].items(), + key=lambda x: x[1]['changes'], + reverse=True)[:2] + main_areas = [area[0] for area in areas] + + # Count change types + features = sum(1 for c in changes if c.get('merged') and + c['title'].lower().startswith('feat:')) + fixes = sum(1 for c in changes if c.get('merged') and + c['title'].lower().startswith('fix:')) + + # Key developments + key_changes = [] + if features: + key_changes.append(f"{features} new features") + if fixes: + key_changes.append(f"{fixes} fixes") + + overview = ( + f"Today's development focused on {' and '.join(main_areas)}, " + f"with {metrics['basic_metrics']['contributors']} contributors " + f"merging {metrics['basic_metrics']['merged_prs']} PRs. " + f"Key developments include {', '.join(key_changes)}." + ) + + return overview + +def generate_overview(metrics: Dict, changes: List[Dict], data: List[Dict]) -> str: + """Generate a detailed overview of daily activities and key developments""" + # Get key features and changes + features = [c['title'].split(': ')[1] for c in changes + if c.get('merged') and c['title'].lower().startswith('feat:')] + + # Get key areas and what's being built + key_developments = [] + if 'packages' in metrics['file_changes']: + pkg_changes = next((c['title'].split(': ')[1] for c in changes + if 'plugin' in c['title'].lower() or + 'client' in c['title'].lower()), None) + if pkg_changes: + key_developments.append(f"package improvements ({pkg_changes})") + + if features: + key_developments.append(f"new features ({features[0]})") + + if metrics['pr_types'].get('fixes', 0) > 0: + key_developments.append(f"{metrics['pr_types']['fixes']} bug fixes") + + # Find major work summary + major_work = next( + (c['summary'].split('.')[0].lower() + for c in data if c['score'] > 50), + 'various improvements' + ) + + overview = ( + f"Development focused on {', '.join(key_developments)}, " + f"with {metrics['basic_metrics']['contributors']} contributors " + f"merging {metrics['basic_metrics']['merged_prs']} PRs. " + f"Major work included {major_work}." + ) + + return overview + +def get_contributor_details(data: List[Dict]) -> List[Dict]: + """Get detailed contributor information including summaries""" + top_contributors = [] + for c in sorted(data, key=lambda x: x['score'], reverse=True)[:3]: + # Get their main merged PR + main_pr = next((pr['title'] for pr in c['activity']['code']['pull_requests'] + if pr.get('merged')), None) + + # Get their activity summary + summary = c['summary'].split('.')[0] + + # Get their main areas of work + areas = set() + for pr in c['activity']['code']['pull_requests']: + if pr.get('merged') and pr.get('files'): + areas.update(f['path'].split('/')[0] for f in pr['files']) + + top_contributors.append({ + "name": c['contributor'], + "main_contribution": main_pr, + "summary": summary, + "areas": list(areas)[:3] # Top 3 areas they worked in + }) + + return top_contributors + +def generate_json_summary(metrics: Dict, data: List[Dict]) -> Dict: + """Generate structured JSON summary of activity""" + changes = [pr for c in data for pr in c['activity']['code']['pull_requests'] if pr.get('merged')] + version = next((c['title'].split(':')[1].strip() for c in changes + if 'version' in c['title'].lower() or + 'bump' in c['title'].lower()), "") + + # Collect all issues + all_issues = [] + for c in data: + all_issues.extend(c['activity']['issues']['opened']) + + # Get issues by type + bugs = [issue for issue in all_issues + if any(label['name'] == 'bug' for label in issue.get('labels', []))] + enhancements = [issue for issue in all_issues + if any(label['name'] == 'enhancement' for label in issue.get('labels', []))] + + # Generate issue summary + issue_summary = "" + if bugs or enhancements: + summaries = [] + if bugs: + bug_titles = [f"'{issue['title']}'" for issue in bugs[:2]] + summaries.append(f"working on {len(bugs)} bugs including {', '.join(bug_titles)}") + if enhancements: + enhancement_titles = [f"'{issue['title']}'" for issue in enhancements[:2]] + summaries.append(f"implementing {len(enhancements)} feature requests including {', '.join(enhancement_titles)}") + issue_summary = " and ".join(summaries) + + return { + "title": f"ai16z Eliza ({datetime.utcnow().strftime('%Y-%m-%d')})", + "version": version, + "overview": generate_overview(metrics, changes, data), + "metrics": { + "contributors": metrics['basic_metrics']['contributors'], + "merged_prs": metrics['basic_metrics']['merged_prs'], + "new_issues": metrics['basic_metrics']['new_issues'], + "lines_changed": sum(area['adds'] + area['dels'] + for area in metrics['file_changes'].values()) + }, + "changes": { + "features": [c['title'].split(': ')[1] for c in changes + if c.get('merged') and c['title'].lower().startswith('feat:')][:3], + "fixes": [c['title'].split(': ')[1] for c in changes + if c.get('merged') and c['title'].lower().startswith('fix:')][:3], + "chores": [c['title'].split(': ')[1] for c in changes + if c.get('merged') and c['title'].lower().startswith('chore:')][:3] + }, + "areas": [ + { + "name": area, + "files": stats['changes'], + "additions": stats['adds'], + "deletions": stats['dels'] + } + for area, stats in sorted( + metrics['file_changes'].items(), + key=lambda x: x[1]['changes'], + reverse=True + )[:3] + ], + "issues_summary": issue_summary, + "questions": [], + "top_contributors": [ + { + "name": c['contributor'], + "summary": c['summary'].split('.')[0], + "areas": list(set( + f['path'].split('/')[0] + for pr in c['activity']['code']['pull_requests'] + if pr.get('merged') and pr.get('files') + for f in pr['files'] + ))[:3] + } + for c in sorted(data, key=lambda x: x['score'], reverse=True)[:3] + ] + } + +def generate_summary(data: List[Dict], model: str = "ollama", api_key: str = None) -> str: + """Generate a unified markdown summary with key sections""" + metrics = analyze_activity_metrics(data) + + # Get user-facing summary first + user_summary = generate_user_summary(metrics, data) + + # Get top contributors with their main contribution + top_contributors = sorted(data, key=lambda x: x['score'], reverse=True)[:3] + contributor_summary = [] + for c in top_contributors: + main_pr = next((pr['title'] for pr in c['activity']['code']['pull_requests'] + if pr.get('merged')), None) + if main_pr: + contributor_summary.append(f"- **{c['contributor']}**: {main_pr}") + + # Remove the date from user_summary since it's now in the title + user_summary_lines = user_summary.split('\n')[1:] # Skip the first line that had the old title + user_summary = '\n'.join(user_summary_lines) + + summary = f"""# ai16z Eliza ({datetime.utcnow().strftime("%Y-%m-%d")}) +{user_summary} + +## Top Contributors +{chr(10).join(contributor_summary)}""" + + return summary + +def generate_user_summary(metrics: Dict, data: List[Dict]) -> str: + """Generate thorough but concise user-facing summary with bullet points""" + changes = [pr for c in data for pr in c['activity']['code']['pull_requests'] if pr.get('merged')] + version = next((c['title'].split(':')[1].strip() for c in changes + if 'version' in c['title'].lower() or + 'bump' in c['title'].lower()), "") + + date = datetime.utcnow().strftime("%Y-%m-%d") + overview = generate_overview(metrics, changes, data) + + features = [c['title'].split(': ')[1] for c in changes + if c.get('merged') and c['title'].lower().startswith('feat:')] + fixes = [c['title'].split(': ')[1] for c in changes + if c.get('merged') and c['title'].lower().startswith('fix:')] + chores = [c['title'].split(': ')[1] for c in changes + if c.get('merged') and c['title'].lower().startswith('chore:')] + + # Count PR types + pr_types = Counter( + pr['title'].split(':')[0].lower() + for pr in changes + if ':' in pr['title'] + ) + + # Get total commits + total_commits = sum(len(c['activity']['code']['commits']) for c in data) + + # Format file changes + file_changes = [] + for area, stats in sorted( + metrics['file_changes'].items(), + key=lambda x: x[1]['adds'] + x[1]['dels'], + reverse=True + )[:5]: # Show top 5 areas + file_changes.append( + f"- **{area}**: {stats['changes']} files (+{stats['adds']}/-{stats['dels']} lines)" + ) + + # Get contributors with summaries + contributors = get_contributor_details(data) + contributor_details = [] + for c in contributors: + contributor_details.append( + f"- **{c['name']}**: {c['summary']}" + ) + + # Count issue labels + label_counts = Counter() + for c in data: + for issue in c['activity']['issues']['opened']: + for label in issue.get('labels', []): + label_counts[label.get('name', 'unlabeled')] += 1 + + # Format notable changes + notable_changes = [f"- {pr['title']}" for pr in changes[:3]] + + # Format labels and create issue summary + label_text = ', '.join(f'`{label}` ({count})' for label, count in label_counts.most_common(3)) + + + # Collect all issues + all_issues = [] + for c in data: + all_issues.extend(c['activity']['issues']['opened']) + + # Generate rich issue summary + issue_summary = "" + if metrics['basic_metrics']['new_issues'] > 0: + bugs = [issue for issue in all_issues if any(label['name'] == 'bug' for label in issue.get('labels', []))] + enhancements = [issue for issue in all_issues if any(label['name'] == 'enhancement' for label in issue.get('labels', []))] + + summaries = [] + if bugs: + bug_details = ", ".join(f"'{issue['title']}'" for issue in bugs[:2]) + summaries.append(f"{len(bugs)} bugs reported (including {bug_details})") + if enhancements: + enhancement_details = ", ".join(f"'{issue['title']}'" for issue in enhancements[:2]) + summaries.append(f"{len(enhancements)} feature requests (including {enhancement_details})") + + issue_summary = " ".join(summaries) + "." + + summary = f"""# ai16z/eliza Daily {date} + +## ๐Ÿ“Š Overview +{overview} + +## ๐Ÿ“ˆ Key Metrics +| Metric | Count | +|---------|--------| +| ๐Ÿ‘ฅ Contributors | {metrics['basic_metrics']['contributors']} | +| ๐Ÿ“ Commits | {total_commits} | +| ๐Ÿ”„ Merged PRs | {metrics['basic_metrics']['merged_prs']} | +| โš ๏ธ New Issues | {metrics['basic_metrics']['new_issues']} | + +## ๐Ÿ”„ Pull Request Summary +- ๐Ÿงน **Chores**: {pr_types.get('chore', 0)} +- ๐Ÿ› **Fixes**: {pr_types.get('fix', 0)} +- โœจ **Features**: {pr_types.get('feat', 0)} + +## ๐Ÿ“ File Changes +{chr(10).join(file_changes)} + +## ๐Ÿ”ฅ Notable Changes +{chr(10).join(notable_changes)} + +## ๐Ÿ‘ฅ Top Contributors +{chr(10).join(contributor_details)} + +## โš ๏ธ Issues +- **New Issues**: {metrics['basic_metrics']['new_issues']} +- **Labels**: {label_text} +- **Summary**: {issue_summary}""" + + return summary + + +def main(): + parser = argparse.ArgumentParser(description="Generate repository summary") + parser.add_argument("input_file", help="Input JSON file with contributor data") + parser.add_argument("output_file", help="Output file for summary") + parser.add_argument("-t", "--type", choices=["md", "json"], default="md", + help="Output format type (markdown or json)") + parser.add_argument("--model", choices=["openai", "ollama"], default="ollama", + help="Model to use for summary generation") + args = parser.parse_args() + + with open(args.input_file) as f: + data = json.load(f) + + metrics = analyze_activity_metrics(data) + + if args.type == "json": + summary = json.dumps(generate_json_summary(metrics, data), indent=2) + else: + summary = generate_summary(data, args.model) + + # Add appropriate extension + base_output = os.path.splitext(args.output_file)[0] + output_file = f"{base_output}.{args.type}" + + with open(output_file, 'w') as f: + f.write(summary) + + print(f"\nSummary saved to {output_file}") + if args.type == "md": + print("\nUser-facing summary:") + print("-" * 50) + print(generate_user_summary(metrics, data)) + +if __name__ == "__main__": + main() +