Skip to content

Commit

Permalink
pypi llm
Browse files Browse the repository at this point in the history
  • Loading branch information
fpgmaas committed Jun 13, 2024
1 parent 60af63e commit dbc7d28
Show file tree
Hide file tree
Showing 6 changed files with 3,214 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,4 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
data
59 changes: 59 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,62 @@ For more details, see [here](https://fpgmaas.github.io/cookiecutter-poetry/featu
---

Repository initiated with [fpgmaas/cookiecutter-poetry](https://github.com/fpgmaas/cookiecutter-poetry).


---

## Download counts

```sql
SELECT
project,
COUNT(*) AS download_count
FROM
`bigquery-public-data.pypi.file_downloads`
WHERE
DATE(timestamp) BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 28 DAY) AND CURRENT_DATE()
GROUP BY
project
ORDER BY
download_count DESC;
```

## total

```sql
WITH recent_downloads AS (
SELECT
project,
COUNT(*) AS download_count
FROM
`bigquery-public-data.pypi.file_downloads`
WHERE
DATE(timestamp) BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 28 DAY) AND CURRENT_DATE()
GROUP BY
project
HAVING
download_count >= 250
)
SELECT
rd.project AS project_name,
dm.description AS project_description,
dm.version AS latest_project_version,
rd.download_count AS project_number_of_downloads
FROM
recent_downloads rd
JOIN
`bigquery-public-data.pypi.distribution_metadata` dm
ON
rd.project = dm.name
WHERE
dm.upload_time = (
SELECT
MAX(upload_time)
FROM
`bigquery-public-data.pypi.distribution_metadata` sub_dm
WHERE
sub_dm.name = dm.name
)
ORDER BY
rd.download_count DESC;
```
114 changes: 114 additions & 0 deletions notebooks/main.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "4bb61e27-eacb-4404-be88-42b65a295c3f",
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ecd6821f-bf99-415e-8154-176eecda5cba",
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import json\n",
"from bs4 import BeautifulSoup\n",
"\n",
"def get_package_list():\n",
" url = \"https://pypi.org/simple/\"\n",
" response = requests.get(url)\n",
" soup = BeautifulSoup(response.text, 'html.parser')\n",
" package_links = soup.find_all('a')\n",
" package_list = [link.text for link in package_links]\n",
" return package_list\n",
"\n",
"all_packages = get_package_list()\n",
"\n",
"def get_package_info(package_name):\n",
" url = f\"https://pypi.org/pypi/{package_name}/json\"\n",
" response = requests.get(url)\n",
" if response.status_code == 200:\n",
" return response.json()\n",
" else:\n",
" return None\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9a67e8d1-3b83-41d0-8ff3-b8f2d0c7aa78",
"metadata": {},
"outputs": [],
"source": [
"import polars as pl"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "caf13777-d8db-41cf-87d4-d9d8b21fd987",
"metadata": {},
"outputs": [],
"source": [
"df = pl.read_csv(\"data/pypi_dataset.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b5c1cc33-e506-4233-859a-3b52543b91c8",
"metadata": {},
"outputs": [],
"source": [
"df.tail(100)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b4f3923d-8789-4bf7-bf5b-c5ce866ffd16",
"metadata": {},
"outputs": [],
"source": [
"list(df.head(3)['project_description'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "842be041-86d6-431a-9e33-c7cbc28a1228",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading

0 comments on commit dbc7d28

Please sign in to comment.