Skip to content

Commit

Permalink
Add members and orgs on same file
Browse files Browse the repository at this point in the history
  • Loading branch information
luke-strange committed Jun 6, 2024
1 parent fa69fab commit dbc4488
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 269 deletions.
307 changes: 68 additions & 239 deletions pipelines/truenorth/analyse_members_list.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 208,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -15,7 +15,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 209,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -24,7 +24,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 210,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -193,7 +193,7 @@
"freq 21 "
]
},
"execution_count": 19,
"execution_count": 210,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -204,7 +204,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 211,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -279,270 +279,99 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 212,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>last_updated</th>\n",
" <th>Create Date</th>\n",
" <th>Do you feel the True North report identified the key challenges and opportunities facing the region?</th>\n",
" <th>Are you interested in attending future True North events?</th>\n",
" <th>Are you currently a B Corp or in the process of becoming a B Corp?</th>\n",
" <th>Would you be interested in hearing more from Brabners about the B Corp process?</th>\n",
" <th>Company name</th>\n",
" <th>City</th>\n",
" <th>Industry</th>\n",
" <th>sector</th>\n",
" <th>location</th>\n",
" <th>Which theme of the True North report do you most identify with and could support activity around?</th>\n",
" <th>How would you like to be involved with the True North network?</th>\n",
" <th>company_size</th>\n",
" <th>month</th>\n",
" <th>month_formatted</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>2024-04-26 16:12:00</td>\n",
" <td>2022-01-28 11:00:00</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Brabners LLP</td>\n",
" <td>Liverpool</td>\n",
" <td>Legal Services</td>\n",
" <td>Legal, Tech, IP</td>\n",
" <td>NaN</td>\n",
" <td>Innovation and change</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2022-01-28 11:00:00</td>\n",
" <td>2022-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>2024-04-09 14:00:00</td>\n",
" <td>2022-01-28 13:01:00</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Brabners LLP</td>\n",
" <td>Liverpool</td>\n",
" <td>Legal Services</td>\n",
" <td>Real estate</td>\n",
" <td>NaN</td>\n",
" <td>People, skills and the future</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2022-01-28 13:01:00</td>\n",
" <td>2022-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58</th>\n",
" <td>2024-01-23 14:11:00</td>\n",
" <td>2022-01-28 15:01:00</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Brabners LLP</td>\n",
" <td>Liverpool</td>\n",
" <td>Legal Services</td>\n",
" <td>Legal</td>\n",
" <td>Cumbria; West Yorkshire; Greater Manchester; M...</td>\n",
" <td>Innovation and change</td>\n",
" <td>Keeping informed about the latest True North n...</td>\n",
" <td>500-999</td>\n",
" <td>2022-01-28 15:01:00</td>\n",
" <td>2022-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>88</th>\n",
" <td>2023-11-01 10:03:00</td>\n",
" <td>2022-01-31 20:01:00</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Brabners LLP</td>\n",
" <td>Liverpool</td>\n",
" <td>Legal Services</td>\n",
" <td>Agriculture and real estate</td>\n",
" <td>NaN</td>\n",
" <td>Sustainable development</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2022-01-31 20:01:00</td>\n",
" <td>2022-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2024-05-13 11:12:00</td>\n",
" <td>2022-03-15 11:01:00</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Brabners LLP</td>\n",
" <td>Liverpool</td>\n",
" <td>Legal Services</td>\n",
" <td>Construction and Engineering</td>\n",
" <td>NaN</td>\n",
" <td>People, skills and the future</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2022-03-15 11:01:00</td>\n",
" <td>2022-03</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" last_updated Create Date \\\n",
"11 2024-04-26 16:12:00 2022-01-28 11:00:00 \n",
"24 2024-04-09 14:00:00 2022-01-28 13:01:00 \n",
"58 2024-01-23 14:11:00 2022-01-28 15:01:00 \n",
"88 2023-11-01 10:03:00 2022-01-31 20:01:00 \n",
"2 2024-05-13 11:12:00 2022-03-15 11:01:00 \n",
"\n",
" Do you feel the True North report identified the key challenges and opportunities facing the region? \\\n",
"11 Yes \n",
"24 Yes \n",
"58 Yes \n",
"88 Yes \n",
"2 Yes \n",
"\n",
" Are you interested in attending future True North events? \\\n",
"11 Yes \n",
"24 Yes \n",
"58 Yes \n",
"88 Yes \n",
"2 Yes \n",
"\n",
" Are you currently a B Corp or in the process of becoming a B Corp? \\\n",
"11 Yes \n",
"24 Yes \n",
"58 Yes \n",
"88 Yes \n",
"2 Yes \n",
"\n",
" Would you be interested in hearing more from Brabners about the B Corp process? \\\n",
"11 Yes \n",
"24 Yes \n",
"58 No \n",
"88 No \n",
"2 Yes \n",
"\n",
" Company name City Industry sector \\\n",
"11 Brabners LLP Liverpool Legal Services Legal, Tech, IP \n",
"24 Brabners LLP Liverpool Legal Services Real estate \n",
"58 Brabners LLP Liverpool Legal Services Legal \n",
"88 Brabners LLP Liverpool Legal Services Agriculture and real estate \n",
"2 Brabners LLP Liverpool Legal Services Construction and Engineering \n",
"\n",
" location \\\n",
"11 NaN \n",
"24 NaN \n",
"58 Cumbria; West Yorkshire; Greater Manchester; M... \n",
"88 NaN \n",
"2 NaN \n",
"\n",
" Which theme of the True North report do you most identify with and could support activity around? \\\n",
"11 Innovation and change \n",
"24 People, skills and the future \n",
"58 Innovation and change \n",
"88 Sustainable development \n",
"2 People, skills and the future \n",
"\n",
" How would you like to be involved with the True North network? \\\n",
"11 NaN \n",
"24 NaN \n",
"58 Keeping informed about the latest True North n... \n",
"88 NaN \n",
"2 NaN \n",
"\n",
" company_size month month_formatted \n",
"11 NaN 2022-01-28 11:00:00 2022-01 \n",
"24 NaN 2022-01-28 13:01:00 2022-01 \n",
"58 500-999 2022-01-28 15:01:00 2022-01 \n",
"88 NaN 2022-01-31 20:01:00 2022-01 \n",
"2 NaN 2022-03-15 11:01:00 2022-03 "
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"# convert the month column to a datetime object\n",
"data['month'] = pd.to_datetime(data['Create Date'])\n",
"data['month_formatted'] = data['month'].apply(datetime.strftime, format='%Y-%m').sort_index(ascending=True)\n",
"monthly_members = pd.DataFrame(data['month_formatted'].value_counts(ascending=False)).reset_index()\n",
"data.sort_values(by='Create Date').head()"
"\n",
"#convert the item to a formatted value in yyyy-mm format.\n",
"data['month_formatted'] = data['month'].apply(datetime.strftime, format='%Y-%m').sort_index(ascending=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 213,
"metadata": {},
"outputs": [],
"source": []
"source": [
"# take the number of members and calculate the number that joined each month.\n",
"monthly_members = pd.DataFrame(data['month_formatted'].value_counts(ascending=False)).reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 214,
"metadata": {},
"outputs": [],
"source": [
"# monthly_members['decimal_date'] = monthly_members['month'].apply(lambda x: x[5:7])\n",
"# monthly_members['decimal_date']"
"monthly_orgs = data.drop_duplicates(subset='Company name', keep='last')\n",
"monthly_orgs = pd.DataFrame(monthly_orgs['month_formatted'].value_counts(ascending=False)).reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 215,
"metadata": {},
"outputs": [],
"source": [
"monthly_members['timestamp'] = pd.to_datetime(monthly_members['month_formatted'], format='%Y-%m').astype(int) / 10**9\n",
"def decimal_date(data):\n",
" # make a unix timestamp column\n",
" data['timestamp'] = pd.to_datetime(data['month_formatted'], format='%Y-%m').astype(int) / 10**9\n",
" # make a decimal date and round to 2dp.\n",
" data['year'] = data['timestamp'].div((86400*365.25)).add(1970).round(2)\n",
" # drop the timestamp column\n",
" data.drop(columns='timestamp', inplace=True) \n",
" # set year and formatted month as the index so they aren't included in the cumsum.\n",
" data.set_index(['year', 'month_formatted'], inplace=True, append=True)\n",
"\n",
"monthly_members['year'] = monthly_members['timestamp'].div((86400*365.25)).add(1970).round(2)"
" return data"
]
},
{
"cell_type": "code",
"execution_count": 216,
"metadata": {},
"outputs": [],
"source": [
"def calculate_cumsum(data, count_name):\n",
" # order by date, then do the cumsum. reset the index, drop the original index column as not needed\n",
" data = pd.DataFrame(data.sort_index(level=2).cumsum().reset_index().drop(columns='level_0'))\n",
" data.rename(columns={'count': f'{count_name}'}, inplace=True)\n",
" return data"
]
},
{
"cell_type": "code",
"execution_count": 217,
"metadata": {},
"outputs": [],
"source": [
"# apply above functions to data\n",
"cs_monthly_members = calculate_cumsum(decimal_date(monthly_members), count_name='individuals')\n",
"cs_monthly_orgs = calculate_cumsum(decimal_date(monthly_orgs), count_name='orgs')"
]
},
{
"cell_type": "code",
"execution_count": 218,
"metadata": {},
"outputs": [],
"source": [
"cs_merged = cs_monthly_members.merge(cs_monthly_orgs, how='inner', on=['year', 'month_formatted'])"
]
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 219,
"metadata": {},
"outputs": [],
"source": [
"monthly_members.drop(columns='timestamp', inplace=True)\n",
"monthly_members.set_index(['year', 'month_formatted'], inplace=True, append=True)\n",
"cumsum = pd.DataFrame(monthly_members.sort_index(level=2).cumsum().reset_index().drop(columns='level_0'))\n",
"cumsum.to_csv(os.path.join(SRC_DIR,'themes/true-north/membership/_data/cumsum_members.csv'), index=False)"
"# write to file\n",
"# cs_monthly_members.to_csv(os.path.join(SRC_DIR,'themes/true-north/membership/_data/cumsum_members.csv'), index=False)\n",
"# cs_monthly_orgs.to_csv(os.path.join(SRC_DIR,'themes/true-north/membership/_data/cumsum_organisations.csv'), index=False)\n",
"cs_merged.to_csv(os.path.join(SRC_DIR,'themes/true-north/membership/_data/cumsum.csv'), index=False)"
]
}
],
Expand Down
Loading

0 comments on commit dbc4488

Please sign in to comment.