From 081e87ac6e43a2f94e3377ab899cf984a0a9c9ad Mon Sep 17 00:00:00 2001
From: madjin <32600939+madjin@users.noreply.github.com>
Date: Tue, 17 Dec 2024 16:21:35 -0500
Subject: [PATCH] add daily cronjob

---
 .github/workflows/weekly-summaries.yml |  44 ++-
 scripts/summarize_daily.py             | 413 +++++++++++++++++++++++++
 2 files changed, 451 insertions(+), 6 deletions(-)
 create mode 100644 scripts/summarize_daily.py

diff --git a/.github/workflows/weekly-summaries.yml b/.github/workflows/weekly-summaries.yml
index 7c94e9e..07ba163 100644
--- a/.github/workflows/weekly-summaries.yml
+++ b/.github/workflows/weekly-summaries.yml
@@ -1,8 +1,9 @@
 name: Contributor Updates
 on:
   schedule:
-    - cron: '0 19 * * 5'  # Weekly on Friday at 2:00 PM EST
-    - cron: '0 19 1 * *'  # Monthly on 1st at 2:00 PM EST
+    - cron: '0 19 * * *'   # Daily at 5:00 PM EST
+    - cron: '0 19 * * 5'   # Weekly on Friday at 5:00 PM EST
+    - cron: '0 19 4 * *'   # Monthly on 4th at 5:00 PM EST
   workflow_dispatch:
 
 permissions:
@@ -39,9 +40,23 @@ jobs:
       run: |
         echo "TIMESTAMP=$(date +'%Y_%m_%d')" >> $GITHUB_ENV
         echo "IS_MONTH_START=$(date +'%d')" >> $GITHUB_ENV
+        echo "IS_FRIDAY=$(date +'%u')" >> $GITHUB_ENV
+
+    - name: Fetch daily data
+      if: github.event.schedule != '0 19 * * 5' && github.event.schedule != '0 19 4 * *'
+      env:
+        GH_TOKEN: ${{ secrets.GH_ACCESS_TOKEN }}
+      run: |
+        # Create directories
+        mkdir -p data/daily data/daily/history
+        
+        # Fetch current data with timestamp
+        bash scripts/fetch_github.sh ai16z eliza --type prs --days 1 | tee data/daily/prs.json data/daily/history/prs_${TIMESTAMP}.json
+        bash scripts/fetch_github.sh ai16z eliza --type issues --days 1 | tee data/daily/issues.json data/daily/history/issues_${TIMESTAMP}.json
+        bash scripts/fetch_github.sh ai16z eliza --type commits --days 1 | tee data/daily/commits.json data/daily/history/commits_${TIMESTAMP}.json
     
     - name: Fetch weekly data
-      if: github.event.schedule != '0 19 1 * *'
+      if: github.event.schedule == '0 19 * * 5'
       env:
         GH_TOKEN: ${{ secrets.GH_ACCESS_TOKEN }}
       run: |
@@ -54,7 +69,7 @@ jobs:
         bash scripts/fetch_github.sh ai16z eliza --type commits --days 7 | tee data/weekly/commits.json data/weekly/history/commits_${TIMESTAMP}.json
     
     - name: Fetch monthly data
-      if: github.event.schedule == '0 19 1 * *'
+      if: github.event.schedule == '0 19 4 * *'
       env:
         GH_TOKEN: ${{ secrets.GH_ACCESS_TOKEN }}
       run: |
@@ -65,23 +80,40 @@ jobs:
         bash scripts/fetch_github.sh ai16z eliza --type prs --days 30 | tee data/monthly/prs.json data/monthly/history/prs_${TIMESTAMP}.json
         bash scripts/fetch_github.sh ai16z eliza --type issues --days 30 | tee data/monthly/issues.json data/monthly/history/issues_${TIMESTAMP}.json
         bash scripts/fetch_github.sh ai16z eliza --type commits --days 30 | tee data/monthly/commits.json data/monthly/history/commits_${TIMESTAMP}.json
+
+    - name: Process daily data
+      if: github.event.schedule != '0 19 * * 5' && github.event.schedule != '0 19 4 * *'
+      env:
+        GH_ACCESS_TOKEN: ${{ secrets.GH_ACCESS_TOKEN }}
+        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+      run: |
+        [ -f data/daily/contributors.json ] && mv data/daily/contributors.json data/daily/history/contributors_${TIMESTAMP}.json || true
+        [ -f data/daily/summary.json ] && mv data/daily/summary.json data/daily/history/summary_${TIMESTAMP}.json || true
+        [ -f data/daily/summary.md ] && mv data/daily/summary.md data/daily/history/summary_${TIMESTAMP}.md || true
+        python scripts/combine.py -p data/daily/prs.json -i data/daily/issues.json -c data/daily/commits.json -o data/daily/combined.json
+        python scripts/calculate_scores.py data/daily/combined.json data/daily/scored.json
+        python scripts/summarize.py data/daily/scored.json data/daily/contributors.json --model openai
+        python scripts/summarize_daily.py data/daily/contributors.json -t json data/daily/summary.json --model openai
+        python scripts/summarize_daily.py data/daily/contributors.json -t md data/daily/summary.md --model openai
     
     - name: Process weekly data
-      if: github.event.schedule != '0 19 1 * *'
+      if: github.event.schedule == '0 19 * * 5'
       env:
         GH_ACCESS_TOKEN: ${{ secrets.GH_ACCESS_TOKEN }}
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
+        [ -f data/weekly/contributors.json ] && mv data/weekly/contributors.json data/weekly/history/contributors_${TIMESTAMP}.json || true
         python scripts/combine.py -p data/weekly/prs.json -i data/weekly/issues.json -c data/weekly/commits.json -o data/weekly/combined.json
         python scripts/calculate_scores.py data/weekly/combined.json data/weekly/scored.json
         python scripts/summarize.py data/weekly/scored.json data/weekly/contributors.json --model openai
     
     - name: Process monthly data
-      if: github.event.schedule == '0 19 1 * *'
+      if: github.event.schedule == '0 19 4 * *'
       env:
         GH_ACCESS_TOKEN: ${{ secrets.GH_ACCESS_TOKEN }}
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
+        [ -f data/monthly/contributors.json ] && mv data/monthly/contributors.json data/monthly/history/contributors_${TIMESTAMP}.json || true
         python scripts/combine.py -p data/monthly/prs.json -i data/monthly/issues.json -c data/monthly/commits.json -o data/monthly/combined.json
         python scripts/calculate_scores.py data/monthly/combined.json data/monthly/scored.json
         python scripts/summarize.py data/monthly/scored.json data/monthly/contributors.json --model openai
diff --git a/scripts/summarize_daily.py b/scripts/summarize_daily.py
new file mode 100644
index 0000000..f7b7bbf
--- /dev/null
+++ b/scripts/summarize_daily.py
@@ -0,0 +1,413 @@
+import json
+import os
+import argparse
+from datetime import datetime
+from collections import Counter, defaultdict
+from typing import List, Dict
+from langchain_core.prompts import PromptTemplate
+from langchain_ollama import ChatOllama
+
+def analyze_activity_metrics(data: List[Dict]) -> Dict:
+    """Generate detailed activity metrics with file-level analysis"""
+    metrics = defaultdict(int)
+    file_changes = defaultdict(lambda: {'adds': 0, 'dels': 0, 'changes': 0})
+    pr_types = defaultdict(int)
+    issue_labels = defaultdict(int)
+    
+    for contributor in data:
+        # Process PRs
+        for pr in contributor['activity']['code']['pull_requests']:
+            if pr.get('merged'):
+                metrics['merged_prs'] += 1
+                # Categorize PR types
+                title_lower = pr['title'].lower()
+                if 'feat:' in title_lower:
+                    pr_types['features'] += 1
+                elif 'fix:' in title_lower:
+                    pr_types['fixes'] += 1
+                elif 'chore:' in title_lower:
+                    pr_types['chores'] += 1
+                elif 'refactor:' in title_lower:
+                    pr_types['refactors'] += 1
+                
+                # Process file changes
+                for file in pr.get('files', []):
+                    path = file['path']
+                    category = path.split('/')[0] if '/' in path else 'root'
+                    file_changes[category]['adds'] += file.get('additions', 0)
+                    file_changes[category]['dels'] += file.get('deletions', 0)
+                    file_changes[category]['changes'] += 1
+        
+        # Process Issues
+        for issue in contributor['activity']['issues']['opened']:
+            metrics['new_issues'] += 1
+            for label in issue.get('labels', []):
+                issue_labels[label.get('name', 'unlabeled')] += 1
+                
+        # Process Commits
+        metrics['total_commits'] += len(contributor['activity']['code']['commits'])
+    
+    return {
+        'basic_metrics': {
+            'contributors': len(data),
+            'commits': metrics['total_commits'],
+            'merged_prs': metrics['merged_prs'],
+            'new_issues': metrics['new_issues']
+        },
+        'pr_types': dict(pr_types),
+        'file_changes': dict(file_changes),
+        'issue_labels': dict(issue_labels)
+    }
+
+def generate_overview(metrics: Dict, changes: List[Dict]) -> str:
+    """Generate a concise 2-3 sentence overview of daily activities"""
+    # Get key areas of focus
+    areas = sorted(metrics['file_changes'].items(), 
+                  key=lambda x: x[1]['changes'], 
+                  reverse=True)[:2]
+    main_areas = [area[0] for area in areas]
+    
+    # Count change types
+    features = sum(1 for c in changes if c.get('merged') and 
+                  c['title'].lower().startswith('feat:'))
+    fixes = sum(1 for c in changes if c.get('merged') and 
+                c['title'].lower().startswith('fix:'))
+    
+    # Key developments
+    key_changes = []
+    if features:
+        key_changes.append(f"{features} new features")
+    if fixes:
+        key_changes.append(f"{fixes} fixes")
+    
+    overview = (
+        f"Today's development focused on {' and '.join(main_areas)}, "
+        f"with {metrics['basic_metrics']['contributors']} contributors "
+        f"merging {metrics['basic_metrics']['merged_prs']} PRs. "
+        f"Key developments include {', '.join(key_changes)}."
+    )
+    
+    return overview
+
+def generate_overview(metrics: Dict, changes: List[Dict], data: List[Dict]) -> str:
+    """Generate a detailed overview of daily activities and key developments"""
+    # Get key features and changes
+    features = [c['title'].split(': ')[1] for c in changes 
+               if c.get('merged') and c['title'].lower().startswith('feat:')]
+    
+    # Get key areas and what's being built
+    key_developments = []
+    if 'packages' in metrics['file_changes']:
+        pkg_changes = next((c['title'].split(': ')[1] for c in changes 
+                          if 'plugin' in c['title'].lower() or 
+                          'client' in c['title'].lower()), None)
+        if pkg_changes:
+            key_developments.append(f"package improvements ({pkg_changes})")
+    
+    if features:
+        key_developments.append(f"new features ({features[0]})")
+        
+    if metrics['pr_types'].get('fixes', 0) > 0:
+        key_developments.append(f"{metrics['pr_types']['fixes']} bug fixes")
+    
+    # Find major work summary
+    major_work = next(
+        (c['summary'].split('.')[0].lower() 
+         for c in data if c['score'] > 50),
+        'various improvements'
+    )
+    
+    overview = (
+        f"Development focused on {', '.join(key_developments)}, "
+        f"with {metrics['basic_metrics']['contributors']} contributors "
+        f"merging {metrics['basic_metrics']['merged_prs']} PRs. "
+        f"Major work included {major_work}."
+    )
+    
+    return overview
+
+def get_contributor_details(data: List[Dict]) -> List[Dict]:
+    """Get detailed contributor information including summaries"""
+    top_contributors = []
+    for c in sorted(data, key=lambda x: x['score'], reverse=True)[:3]:
+        # Get their main merged PR
+        main_pr = next((pr['title'] for pr in c['activity']['code']['pull_requests'] 
+                       if pr.get('merged')), None)
+        
+        # Get their activity summary
+        summary = c['summary'].split('.')[0]
+        
+        # Get their main areas of work
+        areas = set()
+        for pr in c['activity']['code']['pull_requests']:
+            if pr.get('merged') and pr.get('files'):
+                areas.update(f['path'].split('/')[0] for f in pr['files'])
+        
+        top_contributors.append({
+            "name": c['contributor'],
+            "main_contribution": main_pr,
+            "summary": summary,
+            "areas": list(areas)[:3]  # Top 3 areas they worked in
+        })
+    
+    return top_contributors
+
+def generate_json_summary(metrics: Dict, data: List[Dict]) -> Dict:
+    """Generate structured JSON summary of activity"""
+    changes = [pr for c in data for pr in c['activity']['code']['pull_requests'] if pr.get('merged')]
+    version = next((c['title'].split(':')[1].strip() for c in changes 
+                   if 'version' in c['title'].lower() or 
+                   'bump' in c['title'].lower()), "")
+    
+    # Collect all issues
+    all_issues = []
+    for c in data:
+        all_issues.extend(c['activity']['issues']['opened'])
+    
+    # Get issues by type
+    bugs = [issue for issue in all_issues 
+            if any(label['name'] == 'bug' for label in issue.get('labels', []))]
+    enhancements = [issue for issue in all_issues 
+                   if any(label['name'] == 'enhancement' for label in issue.get('labels', []))]
+    
+    # Generate issue summary
+    issue_summary = ""
+    if bugs or enhancements:
+        summaries = []
+        if bugs:
+            bug_titles = [f"'{issue['title']}'" for issue in bugs[:2]]
+            summaries.append(f"working on {len(bugs)} bugs including {', '.join(bug_titles)}")
+        if enhancements:
+            enhancement_titles = [f"'{issue['title']}'" for issue in enhancements[:2]]
+            summaries.append(f"implementing {len(enhancements)} feature requests including {', '.join(enhancement_titles)}")
+        issue_summary = " and ".join(summaries)
+    
+    return {
+        "title": f"ai16z Eliza ({datetime.utcnow().strftime('%Y-%m-%d')})",
+        "version": version,
+        "overview": generate_overview(metrics, changes, data),
+        "metrics": {
+            "contributors": metrics['basic_metrics']['contributors'],
+            "merged_prs": metrics['basic_metrics']['merged_prs'],
+            "new_issues": metrics['basic_metrics']['new_issues'],
+            "lines_changed": sum(area['adds'] + area['dels'] 
+                               for area in metrics['file_changes'].values())
+        },
+        "changes": {
+            "features": [c['title'].split(': ')[1] for c in changes 
+                        if c.get('merged') and c['title'].lower().startswith('feat:')][:3],
+            "fixes": [c['title'].split(': ')[1] for c in changes 
+                     if c.get('merged') and c['title'].lower().startswith('fix:')][:3],
+            "chores": [c['title'].split(': ')[1] for c in changes
+                      if c.get('merged') and c['title'].lower().startswith('chore:')][:3]
+        },
+        "areas": [
+            {
+                "name": area,
+                "files": stats['changes'],
+                "additions": stats['adds'],
+                "deletions": stats['dels']
+            }
+            for area, stats in sorted(
+                metrics['file_changes'].items(),
+                key=lambda x: x[1]['changes'],
+                reverse=True
+            )[:3]
+        ],
+        "issues_summary": issue_summary,
+        "questions": [],
+        "top_contributors": [
+            {
+                "name": c['contributor'],
+                "summary": c['summary'].split('.')[0],
+                "areas": list(set(
+                    f['path'].split('/')[0]
+                    for pr in c['activity']['code']['pull_requests']
+                    if pr.get('merged') and pr.get('files')
+                    for f in pr['files']
+                ))[:3]
+            }
+            for c in sorted(data, key=lambda x: x['score'], reverse=True)[:3]
+        ]
+    }
+    
+def generate_summary(data: List[Dict], model: str = "ollama", api_key: str = None) -> str:
+    """Generate a unified markdown summary with key sections"""
+    metrics = analyze_activity_metrics(data)
+    
+    # Get user-facing summary first
+    user_summary = generate_user_summary(metrics, data)
+    
+    # Get top contributors with their main contribution
+    top_contributors = sorted(data, key=lambda x: x['score'], reverse=True)[:3]
+    contributor_summary = []
+    for c in top_contributors:
+        main_pr = next((pr['title'] for pr in c['activity']['code']['pull_requests'] 
+                       if pr.get('merged')), None)
+        if main_pr:
+            contributor_summary.append(f"- **{c['contributor']}**: {main_pr}")
+
+    # Remove the date from user_summary since it's now in the title
+    user_summary_lines = user_summary.split('\n')[1:]  # Skip the first line that had the old title
+    user_summary = '\n'.join(user_summary_lines)
+
+    summary = f"""# ai16z Eliza ({datetime.utcnow().strftime("%Y-%m-%d")})
+{user_summary}
+
+## Top Contributors
+{chr(10).join(contributor_summary)}"""
+
+    return summary
+
+def generate_user_summary(metrics: Dict, data: List[Dict]) -> str:
+    """Generate thorough but concise user-facing summary with bullet points"""
+    changes = [pr for c in data for pr in c['activity']['code']['pull_requests'] if pr.get('merged')]
+    version = next((c['title'].split(':')[1].strip() for c in changes 
+                   if 'version' in c['title'].lower() or 
+                   'bump' in c['title'].lower()), "")
+    
+    date = datetime.utcnow().strftime("%Y-%m-%d")
+    overview = generate_overview(metrics, changes, data)
+    
+    features = [c['title'].split(': ')[1] for c in changes 
+               if c.get('merged') and c['title'].lower().startswith('feat:')]
+    fixes = [c['title'].split(': ')[1] for c in changes 
+            if c.get('merged') and c['title'].lower().startswith('fix:')]
+    chores = [c['title'].split(': ')[1] for c in changes
+             if c.get('merged') and c['title'].lower().startswith('chore:')]
+    
+    # Count PR types
+    pr_types = Counter(
+        pr['title'].split(':')[0].lower()
+        for pr in changes
+        if ':' in pr['title']
+    )
+    
+    # Get total commits
+    total_commits = sum(len(c['activity']['code']['commits']) for c in data)
+    
+    # Format file changes
+    file_changes = []
+    for area, stats in sorted(
+        metrics['file_changes'].items(),
+        key=lambda x: x[1]['adds'] + x[1]['dels'],
+        reverse=True
+    )[:5]:  # Show top 5 areas
+        file_changes.append(
+            f"- **{area}**: {stats['changes']} files (+{stats['adds']}/-{stats['dels']} lines)"
+        )
+    
+    # Get contributors with summaries
+    contributors = get_contributor_details(data)
+    contributor_details = []
+    for c in contributors:
+        contributor_details.append(
+            f"- **{c['name']}**: {c['summary']}"
+        )
+    
+    # Count issue labels
+    label_counts = Counter()
+    for c in data:
+        for issue in c['activity']['issues']['opened']:
+            for label in issue.get('labels', []):
+                label_counts[label.get('name', 'unlabeled')] += 1
+    
+    # Format notable changes
+    notable_changes = [f"- {pr['title']}" for pr in changes[:3]]
+    
+    # Format labels and create issue summary
+    label_text = ', '.join(f'`{label}` ({count})' for label, count in label_counts.most_common(3))
+    
+
+    # Collect all issues
+    all_issues = []
+    for c in data:
+        all_issues.extend(c['activity']['issues']['opened'])
+    
+    # Generate rich issue summary
+    issue_summary = ""
+    if metrics['basic_metrics']['new_issues'] > 0:
+        bugs = [issue for issue in all_issues if any(label['name'] == 'bug' for label in issue.get('labels', []))]
+        enhancements = [issue for issue in all_issues if any(label['name'] == 'enhancement' for label in issue.get('labels', []))]
+        
+        summaries = []
+        if bugs:
+            bug_details = ", ".join(f"'{issue['title']}'" for issue in bugs[:2])
+            summaries.append(f"{len(bugs)} bugs reported (including {bug_details})")
+        if enhancements:
+            enhancement_details = ", ".join(f"'{issue['title']}'" for issue in enhancements[:2])
+            summaries.append(f"{len(enhancements)} feature requests (including {enhancement_details})")
+        
+        issue_summary = " ".join(summaries) + "."
+    
+    summary = f"""# ai16z/eliza Daily {date}
+    
+## 📊 Overview
+{overview}
+
+## 📈 Key Metrics
+| Metric | Count |
+|---------|--------|
+| 👥 Contributors | {metrics['basic_metrics']['contributors']} |
+| 📝 Commits | {total_commits} |
+| 🔄 Merged PRs | {metrics['basic_metrics']['merged_prs']} |
+| ⚠️ New Issues | {metrics['basic_metrics']['new_issues']} |
+
+## 🔄 Pull Request Summary
+- 🧹 **Chores**: {pr_types.get('chore', 0)}
+- 🐛 **Fixes**: {pr_types.get('fix', 0)}
+- ✨ **Features**: {pr_types.get('feat', 0)}
+
+## 📁 File Changes
+{chr(10).join(file_changes)}
+
+## 🔥 Notable Changes
+{chr(10).join(notable_changes)}
+
+## 👥 Top Contributors
+{chr(10).join(contributor_details)}
+
+## ⚠️ Issues
+- **New Issues**: {metrics['basic_metrics']['new_issues']}
+- **Labels**: {label_text}
+- **Summary**: {issue_summary}"""
+
+    return summary
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Generate repository summary")
+    parser.add_argument("input_file", help="Input JSON file with contributor data")
+    parser.add_argument("output_file", help="Output file for summary")
+    parser.add_argument("-t", "--type", choices=["md", "json"], default="md",
+                       help="Output format type (markdown or json)")
+    parser.add_argument("--model", choices=["openai", "ollama"], default="ollama",
+                       help="Model to use for summary generation")
+    args = parser.parse_args()
+    
+    with open(args.input_file) as f:
+        data = json.load(f)
+    
+    metrics = analyze_activity_metrics(data)
+    
+    if args.type == "json":
+        summary = json.dumps(generate_json_summary(metrics, data), indent=2)
+    else:
+        summary = generate_summary(data, args.model)
+    
+    # Add appropriate extension
+    base_output = os.path.splitext(args.output_file)[0]
+    output_file = f"{base_output}.{args.type}"
+    
+    with open(output_file, 'w') as f:
+        f.write(summary)
+    
+    print(f"\nSummary saved to {output_file}")
+    if args.type == "md":
+        print("\nUser-facing summary:")
+        print("-" * 50)
+        print(generate_user_summary(metrics, data))
+
+if __name__ == "__main__":
+    main()
+