diff --git a/images/plot1.png b/images/figure0.png similarity index 100% rename from images/plot1.png rename to images/figure0.png diff --git a/images/figure1.png b/images/figure1.png new file mode 100644 index 0000000..d763d45 Binary files /dev/null and b/images/figure1.png differ diff --git a/images/tagexample.png b/images/figure2.png similarity index 100% rename from images/tagexample.png rename to images/figure2.png diff --git a/images/plot3.png b/images/figure3.png similarity index 92% rename from images/plot3.png rename to images/figure3.png index 2705250..4220c66 100644 Binary files a/images/plot3.png and b/images/figure3.png differ diff --git a/images/figure4a.png b/images/figure4a.png new file mode 100644 index 0000000..01af931 Binary files /dev/null and b/images/figure4a.png differ diff --git a/images/figure4b.png b/images/figure4b.png new file mode 100644 index 0000000..2c2a231 Binary files /dev/null and b/images/figure4b.png differ diff --git a/images/plot4b.png b/images/figure5.png similarity index 100% rename from images/plot4b.png rename to images/figure5.png diff --git a/images/plot0.png b/images/plot0.png deleted file mode 100644 index 7528f74..0000000 Binary files a/images/plot0.png and /dev/null differ diff --git a/images/plot2.png b/images/plot2.png deleted file mode 100644 index a35476d..0000000 Binary files a/images/plot2.png and /dev/null differ diff --git a/images/plot2b.png b/images/plot2b.png deleted file mode 100644 index 5d870dd..0000000 Binary files a/images/plot2b.png and /dev/null differ diff --git a/images/plot4.png b/images/plot4.png deleted file mode 100644 index a405260..0000000 Binary files a/images/plot4.png and /dev/null differ diff --git a/notebooks/Assessment.ipynb b/notebooks/Assessment.ipynb index 4cc20e0..34cee4c 100644 --- a/notebooks/Assessment.ipynb +++ b/notebooks/Assessment.ipynb @@ -1,3048 +1,3050 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Overlap and assessment" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "from matplotlib_venn import venn3, venn3_circles, venn2, venn2_circles\n", - "import matplotlib.pyplot as plt" - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Deterministic: 2052\n", - "Probabilistic: 7530\n" - ] - } - ], - "source": [ - "data_d = pd.read_csv('../data/tech.csv')\n", - "data_p = pd.read_csv('../data/tech2.csv')\n", - "print('Deterministic: {}\\nProbabilistic: {}'.format(len(data_d), len(data_p)))" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "#print(len(table['link'].unique())) #all the unique links found in all the methods\n", - "plt.figure(figsize=(16, 12))\n", - "v = venn2([set(data_d.id), \n", - " set(data_p.id)], \n", - " set_labels = (\"Deterministic\", 'Probabilistic'),\n", - " set_colors=('#44b023', '#e8784f'), alpha=0.5)#\n", - "for text in v.set_labels:\n", - " text.set_fontsize(10)\n", - "for text in v.subset_labels:\n", - " text.set_fontsize(10)\n", - "c = venn2_circles([set(data_d.id), \n", - " set(data_p.id)], linestyle='solid', linewidth=0.6)\n", - "plt.title(\"Ovelap of Methods\")\n", - "#plt.savefig('../plots/venn-methods.png')\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "metadata": {}, - "outputs": [], - "source": [ - "data_inner = pd.merge(data_p,data_d,\n", - " how='inner',on='id',suffixes=('_prob', '_det'))" - ] - }, - { - "cell_type": "code", - "execution_count": 83, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1534\n", - "518\n", - "5996\n" - ] - } - ], - "source": [ - "print(len(data_inner))\n", - "print(len(list(set(list(data_d.id)) - set(list(data_p.id)))))\n", - "print(len(list(set(list(data_p.id)) - set(list(data_d.id)))))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "How does it look the ones that match on deterministic but not probabilistic? \n", - "How does it look all the way around?" - ] - }, - { - "cell_type": "code", - "execution_count": 84, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "category\n", - "Building design nanomaterial 1\n", - "heat pump Vehicle design 1\n", - "heat pump energy storage 1\n", - "heat pump energy tower 1\n", - "heat pump renewable energy 1\n", - "Name: id, dtype: int64" - ] - }, - "execution_count": 84, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#data_inner.columns\n", - "inner_table = data_inner[['id','category']]\n", - "inner_table.groupby('category').count()['id'].sort_values().head()#.plot()" - ] - }, - { - "cell_type": "code", - "execution_count": 86, - "metadata": {}, - "outputs": [], - "source": [ - "#random choice\n", - "#sample_list = list(np.random.choice(list(inner_table.id), 91, replace=False))" - ] - }, - { - "cell_type": "code", - "execution_count": 87, - "metadata": {}, - "outputs": [], - "source": [ - "cordish2020 = pd.read_excel('../data/cordis-h2020projects.xlsx')" - ] - }, - { - "cell_type": "code", - "execution_count": 88, - "metadata": {}, - "outputs": [], - "source": [ - "#create a sample set to assess\n", - "#cordish2020[cordish2020.id.isin(sample_list)][['title','objective','coordinator']].to_csv('../data/sample.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": 89, - "metadata": {}, - "outputs": [], - "source": [ - "#inner_table.to_csv('../data/inner_table.csv') #only once is need\n", - "inner_table = cordish2020[cordish2020.id.isin(list(inner_table.id))]" - ] - }, - { - "cell_type": "code", - "execution_count": 96, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
rcnidacronymstatusprogrammetopicsframeworkProgrammetitlestartDateendDate...objectivetotalCostecMaxContributioncallfundingSchemecoordinatorcoordinatorCountryparticipantsparticipantCountriessubjects
52216738353983D-FOGRODSIGNEDH2020-EU.1.3.2.MSCA-IF-2018H2020Understanding forest growth dynamics using nov...2019-10-012021-09-30...Forest ecosystems are an essential terrestrial...178320.00178320.00H2020-MSCA-IF-2018MSCA-IF-EF-STUNIVERSITEIT GENTBENaNNaNNaN
16221633835541MOVESSIGNEDH2020-EU.1.3.2.MSCA-IF-2018H2020MOnitoring VEgetation status and functioning a...2019-10-012021-09-30...Leaf Area Index (LAI), Fraction of green Veget...172932.48172932.48H2020-MSCA-IF-2018MSCA-IF-EF-STCENTRO DE INVESTIGACION ECOLOGICA Y APLICACION...ESNaNNaNNaN
33217649812602POLIPOCLOSEDH2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.EIC-SMEInst-2018-2020H2020The first non-fermentative production process ...2018-06-012018-11-30...POLìPO S.r.l. has developed the first chemica...71429.0050000.00H2020-SMEInst-2018-2020-1SME-1POLIPO SRLITNaNNaNNaN
43217808811592NUTRI-NEEDSIGNEDH2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.EIC-SMEInst-2018-2020H2020Final development, clinical validation and lau...2018-06-012020-05-31...People want to live a long and healthy life an...3567000.002496900.00H2020-SMEInst-2018-2020-2SME-2NUTRILEADS BVNLNaNNaNNaN
45223666847641MICADOSIGNEDH2020-Euratom-1.7.;H2020-Euratom-1.1.NFRP-2018-10H2020Measurement and Instrumentation for Cleaning A...2019-06-012022-05-31...The goal of the MICADO project is to propose a...4986643.754442162.50NFRP-2018IACOSTRUZIONI APPARECCHIATURE ELETTRONICHE NUCLE...ITCOMMISSARIAT A L ENERGIE ATOMIQUE ET AUX ENERG...FR;DE;IT;BE;CZNaN
\n", - "

5 rows × 21 columns

\n", - "
" - ], - "text/plain": [ - " rcn id acronym status \\\n", - "5 221673 835398 3D-FOGROD SIGNED \n", - "16 221633 835541 MOVES SIGNED \n", - "33 217649 812602 POLIPO CLOSED \n", - "43 217808 811592 NUTRI-NEED SIGNED \n", - "45 223666 847641 MICADO SIGNED \n", - "\n", - " programme topics \\\n", - "5 H2020-EU.1.3.2. MSCA-IF-2018 \n", - "16 H2020-EU.1.3.2. MSCA-IF-2018 \n", - "33 H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1. EIC-SMEInst-2018-2020 \n", - "43 H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1. EIC-SMEInst-2018-2020 \n", - "45 H2020-Euratom-1.7.;H2020-Euratom-1.1. NFRP-2018-10 \n", - "\n", - " frameworkProgramme title \\\n", - "5 H2020 Understanding forest growth dynamics using nov... \n", - "16 H2020 MOnitoring VEgetation status and functioning a... \n", - "33 H2020 The first non-fermentative production process ... \n", - "43 H2020 Final development, clinical validation and lau... \n", - "45 H2020 Measurement and Instrumentation for Cleaning A... \n", - "\n", - " startDate endDate ... \\\n", - "5 2019-10-01 2021-09-30 ... \n", - "16 2019-10-01 2021-09-30 ... \n", - "33 2018-06-01 2018-11-30 ... \n", - "43 2018-06-01 2020-05-31 ... \n", - "45 2019-06-01 2022-05-31 ... \n", - "\n", - " objective totalCost \\\n", - "5 Forest ecosystems are an essential terrestrial... 178320.00 \n", - "16 Leaf Area Index (LAI), Fraction of green Veget... 172932.48 \n", - "33 POLìPO S.r.l. has developed the first chemica... 71429.00 \n", - "43 People want to live a long and healthy life an... 3567000.00 \n", - "45 The goal of the MICADO project is to propose a... 4986643.75 \n", - "\n", - " ecMaxContribution call fundingScheme \\\n", - "5 178320.00 H2020-MSCA-IF-2018 MSCA-IF-EF-ST \n", - "16 172932.48 H2020-MSCA-IF-2018 MSCA-IF-EF-ST \n", - "33 50000.00 H2020-SMEInst-2018-2020-1 SME-1 \n", - "43 2496900.00 H2020-SMEInst-2018-2020-2 SME-2 \n", - "45 4442162.50 NFRP-2018 IA \n", - "\n", - " coordinator coordinatorCountry \\\n", - "5 UNIVERSITEIT GENT BE \n", - "16 CENTRO DE INVESTIGACION ECOLOGICA Y APLICACION... ES \n", - "33 POLIPO SRL IT \n", - "43 NUTRILEADS BV NL \n", - "45 COSTRUZIONI APPARECCHIATURE ELETTRONICHE NUCLE... IT \n", - "\n", - " participants participantCountries \\\n", - "5 NaN NaN \n", - "16 NaN NaN \n", - "33 NaN NaN \n", - "43 NaN NaN \n", - "45 COMMISSARIAT A L ENERGIE ATOMIQUE ET AUX ENERG... FR;DE;IT;BE;CZ \n", - "\n", - " subjects \n", - "5 NaN \n", - "16 NaN \n", - "33 NaN \n", - "43 NaN \n", - "45 NaN \n", - "\n", - "[5 rows x 21 columns]" - ] - }, - "execution_count": 96, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "inner_table.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### After assessment" - ] - }, - { - "cell_type": "code", - "execution_count": 90, - "metadata": {}, - "outputs": [], - "source": [ - "sample_lab = pd.read_csv('../data/sample4tag.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": 91, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "93.4065934065934" - ] - }, - "execution_count": 91, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sample_lab.label.sum()*100/len(sample_lab)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---\n", - "### Demanded technologies" - ] - }, - { - "cell_type": "code", - "execution_count": 92, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sectortechnology_grouptechnologydescriptiontechnology_termsourcepriority
0EnergySolar energy sourcessolar PVAlso “solar photovoltaic”; technology of using...photovoltaic, PV, “solar cell”https://www.ctc-n.org/technologies/solar-pv1
1EnergySolar energy sourcesconcentrated solar power (CSP)Technology of producing electricity by concent...“concentrated solar power”, CSP, “concentrated...https://setis.ec.europa.eu/technologies/concen...1
2EnergySolar energy sourcessolar heatingTechnology of capturing the sun's radiation an...“solar heating”https://www.ctc-n.org/technologies/solar-heating1
3EnergySolar energy sourcessolar dryerTechnology of drying substances, especially fo...“solar dryer”https://www.ctc-n.org/technologies/solar-dryer0
4EnergySolar energy sourcessolar water pumpTechnology of powering electrical water pumps ...“solar water pump”https://www.ctc-n.org/technologies/solar-water...0
\n", - "
" - ], - "text/plain": [ - " sector technology_group technology \\\n", - "0 Energy Solar energy sources solar PV \n", - "1 Energy Solar energy sources concentrated solar power (CSP) \n", - "2 Energy Solar energy sources solar heating \n", - "3 Energy Solar energy sources solar dryer \n", - "4 Energy Solar energy sources solar water pump \n", - "\n", - " description \\\n", - "0 Also “solar photovoltaic”; technology of using... \n", - "1 Technology of producing electricity by concent... \n", - "2 Technology of capturing the sun's radiation an... \n", - "3 Technology of drying substances, especially fo... \n", - "4 Technology of powering electrical water pumps ... \n", - "\n", - " technology_term \\\n", - "0 photovoltaic, PV, “solar cell”  \n", - "1 “concentrated solar power”, CSP, “concentrated... \n", - "2 “solar heating” \n", - "3 “solar dryer” \n", - "4 “solar water pump” \n", - "\n", - " source priority \n", - "0 https://www.ctc-n.org/technologies/solar-pv 1 \n", - "1 https://setis.ec.europa.eu/technologies/concen... 1 \n", - "2 https://www.ctc-n.org/technologies/solar-heating 1 \n", - "3 https://www.ctc-n.org/technologies/solar-dryer 0 \n", - "4 https://www.ctc-n.org/technologies/solar-water... 0 " - ] - }, - "execution_count": 92, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "categories = pd.read_excel('../data/categories_v2.xls')\n", - "categories = categories.fillna(method='ffill')\n", - "categories.columns = ['sector','technology_group','technology','description','technology_term','source','priority']\n", - "categories.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 93, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "97" - ] - }, - "execution_count": 93, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(categories.technology)" - ] - }, - { - "cell_type": "code", - "execution_count": 94, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "76" - ] - }, - "execution_count": 94, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "categories.priority.sum()" - ] - }, - { - "cell_type": "code", - "execution_count": 95, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "78.35051546391753" - ] - }, - "execution_count": 95, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#Percentage of priority from the catalogue\n", - "100*categories.priority.sum()/len(categories.technology)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---\n", - "### Supplied technologies" - ] - }, - { - "cell_type": "code", - "execution_count": 99, - "metadata": {}, - "outputs": [], - "source": [ - "technology_table = pd.DataFrame()\n", - "# unnest the messy matches\n", - "list_technologies = data_inner['matches_technology_det']\n", - "number_matches, cleaned_lists = [], []\n", - "for l in list_technologies:\n", - " new_l = l.replace(\"[\",\"\").replace(\"]\",\"\").replace(\" '\",\"\").replace(\"'\",\"\").replace(\"'\",\"\").split(',')\n", - " cleaned_lists.append(new_l)\n", - " number_matches.append(len(new_l))\n", - "technology_table['technology_terms'] = [y for x in cleaned_lists for y in x] \n", - "#len(number_matches) == len(data_inner.id)\n", - "#create a list with project id that equals the terms\n", - "ids = [[i]*j for i,j in zip(inner_table.id, number_matches)] \n", - "technology_table['id'] = [y for x in ids for y in x]" - ] - }, - { - "cell_type": "code", - "execution_count": 101, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
technology_termsid
0forest management835398
1forest management835541
2bioplastic812602
3DSM811592
4waste management847641
\n", - "
" - ], - "text/plain": [ - " technology_terms id\n", - "0 forest management 835398\n", - "1 forest management 835541\n", - "2 bioplastic 812602\n", - "3 DSM 811592\n", - "4 waste management 847641" - ] - }, - "execution_count": 101, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "technology_table.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 102, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sectortechnology_grouptechnologydescriptiontechnology_termsourcepriority
0EnergySolar energy sourcessolar PVAlso “solar photovoltaic”; technology of using...photovoltaic, PV, “solar cell”https://www.ctc-n.org/technologies/solar-pv1
1EnergySolar energy sourcesconcentrated solar power (CSP)Technology of producing electricity by concent...“concentrated solar power”, CSP, “concentrated...https://setis.ec.europa.eu/technologies/concen...1
2EnergySolar energy sourcessolar heatingTechnology of capturing the sun's radiation an...“solar heating”https://www.ctc-n.org/technologies/solar-heating1
3EnergySolar energy sourcessolar dryerTechnology of drying substances, especially fo...“solar dryer”https://www.ctc-n.org/technologies/solar-dryer0
4EnergySolar energy sourcessolar water pumpTechnology of powering electrical water pumps ...“solar water pump”https://www.ctc-n.org/technologies/solar-water...0
\n", - "
" - ], - "text/plain": [ - " sector technology_group technology \\\n", - "0 Energy Solar energy sources solar PV \n", - "1 Energy Solar energy sources concentrated solar power (CSP) \n", - "2 Energy Solar energy sources solar heating \n", - "3 Energy Solar energy sources solar dryer \n", - "4 Energy Solar energy sources solar water pump \n", - "\n", - " description \\\n", - "0 Also “solar photovoltaic”; technology of using... \n", - "1 Technology of producing electricity by concent... \n", - "2 Technology of capturing the sun's radiation an... \n", - "3 Technology of drying substances, especially fo... \n", - "4 Technology of powering electrical water pumps ... \n", - "\n", - " technology_term \\\n", - "0 photovoltaic, PV, “solar cell”  \n", - "1 “concentrated solar power”, CSP, “concentrated... \n", - "2 “solar heating” \n", - "3 “solar dryer” \n", - "4 “solar water pump” \n", - "\n", - " source priority \n", - "0 https://www.ctc-n.org/technologies/solar-pv 1 \n", - "1 https://setis.ec.europa.eu/technologies/concen... 1 \n", - "2 https://www.ctc-n.org/technologies/solar-heating 1 \n", - "3 https://www.ctc-n.org/technologies/solar-dryer 0 \n", - "4 https://www.ctc-n.org/technologies/solar-water... 0 " - ] - }, - "execution_count": 102, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "categories.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 103, - "metadata": {}, - "outputs": [], - "source": [ - "matrix = []\n", - "for term in categories['technology_term']:\n", - " row = [x.strip() for x in term.split(',')]\n", - " row = [i.replace('“', '').replace('”', '') for i in row]\n", - " matrix.append(row)" - ] - }, - { - "cell_type": "code", - "execution_count": 104, - "metadata": {}, - "outputs": [], - "source": [ - "tech_dict = {key: value for (key, value) in zip(categories.technology, matrix)}" - ] - }, - { - "cell_type": "code", - "execution_count": 105, - "metadata": {}, - "outputs": [], - "source": [ - "technology_table['category'] = None" - ] - }, - { - "cell_type": "code", - "execution_count": 106, - "metadata": {}, - "outputs": [], - "source": [ - "for i in technology_table.index:\n", - " for k,v in tech_dict.items():\n", - " for j in v:\n", - " if technology_table.loc[i,'technology_terms'] == j:\n", - " technology_table.loc[i, 'category'] = k" - ] - }, - { - "cell_type": "code", - "execution_count": 107, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
technology_termsidcategory
1405recycling633192recycling
1404efficient lighting633192low GHG lighting
845recycling633962recycling
844bioplastic633962bioplastics
935soil management635201soil management
\n", - "
" - ], - "text/plain": [ - " technology_terms id category\n", - "1405 recycling 633192 recycling\n", - "1404 efficient lighting 633192 low GHG lighting\n", - "845 recycling 633962 recycling\n", - "844 bioplastic 633962 bioplastics\n", - "935 soil management 635201 soil management" - ] - }, - "execution_count": 107, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "technology_table.sort_values('id').head() #repeated ids" - ] - }, - { - "cell_type": "code", - "execution_count": 108, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "category\n", - "low GHG steel 385\n", - "solar PV 302\n", - "recycling 211\n", - "energy storage 190\n", - "fuel cell 79\n", - "waste management 77\n", - "geothermal 70\n", - "carbon capture and storage 69\n", - "concentrated solar power (CSP) 68\n", - "grid management 67\n", - "combined heat and power (CHP) 58\n", - "electric vehicle design 43\n", - "biofuel 42\n", - "district heating and/or cooling 40\n", - "traffic management 39\n", - "heat pumps 37\n", - "offshore wind 37\n", - "wave energy 29\n", - "nanomaterial 28\n", - "forest management 27\n", - "biorefinery design 25\n", - "thermal storage 25\n", - "demand-side energy management 24\n", - "low GHG data centers 24\n", - "tidal energy 20\n", - "biopolymer 17\n", - "bioplastics 14\n", - "micro-grids 13\n", - "liquefied natural gas (LNG) 12\n", - "soil management 10\n", - " ... \n", - "afforestation 8\n", - "solar heating 8\n", - "combined cycle power plant 6\n", - "biochar 6\n", - "osmotic 5\n", - "energy-from-waste 5\n", - "nutrient management 5\n", - "compressed/converted natural gas 4\n", - "onshore wind 4\n", - "regenerative braking 4\n", - "ocean thermal energy conversion (OTEC) 4\n", - "compressed air energy storage (CAES) 3\n", - "off-grid systems 3\n", - "livestock management 3\n", - "nuclear fusion 3\n", - "agroforestry 3\n", - "updraft 2\n", - "non-motorised transport design 2\n", - "passive house 2\n", - "electron acceptor 2\n", - "reduced need for travel 1\n", - "waste heat reuse 1\n", - "inert anode 1\n", - "improved durability 1\n", - "downdraft 1\n", - "nuclear fission 1\n", - "new sources of protein 1\n", - "manure management 1\n", - "low GHG public transport / rapid transit design 1\n", - "direct seeding 1\n", - "Name: id, Length: 62, dtype: int64" - ] - }, - "execution_count": 108, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "technology_table.groupby('category').count()['id'].sort_values(ascending=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 109, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idecMaxContribution
083775019630411.18
18181846004252.50
28151474991323.75
38176425995904.00
48257314017817.50
\n", - "
" - ], - "text/plain": [ - " id ecMaxContribution\n", - "0 837750 19630411.18\n", - "1 818184 6004252.50\n", - "2 815147 4991323.75\n", - "3 817642 5995904.00\n", - "4 825731 4017817.50" - ] - }, - "execution_count": 109, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#put the money and the rest of features\n", - "cordish2020[['id','ecMaxContribution']].head() #'acronym','title','startDate','objective'," - ] - }, - { - "cell_type": "code", - "execution_count": 110, - "metadata": {}, - "outputs": [], - "source": [ - "cost_per_category = pd.merge(technology_table, cordish2020[['id','ecMaxContribution']], how='left', on='id')\n", - "cost_per_category = cost_per_category.groupby('category').agg(\n", - " ['mean','sum'])['ecMaxContribution'].sort_values(['sum','mean'], ascending=False).reset_index()\n", - "cost_per_category = pd.merge(cost_per_category, \n", - " categories[['sector','technology','priority']], how='left',\n", - " left_on='category', right_on='technology')\n", - "cost_per_category = cost_per_category[['sector','technology','priority','mean','sum']]" - ] - }, - { - "cell_type": "code", - "execution_count": 111, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sectortechnologyprioritymeansum
0Industrylow GHG steel12.351494e+069.053252e+08
1Energysolar PV11.950508e+065.890533e+08
2Human settlementsrecycling12.518069e+065.313126e+08
3Energyenergy storage12.533548e+064.813741e+08
4Energygeothermal14.934920e+063.454444e+08
5Transportfuel cell13.971767e+063.137696e+08
6Energygrid management14.049332e+062.713052e+08
7Energyconcentrated solar power (CSP)13.295332e+062.240826e+08
8Industrycarbon capture and storage13.016954e+062.081698e+08
9Human settlementswaste management12.354403e+061.812890e+08
\n", - "
" - ], - "text/plain": [ - " sector technology priority mean \\\n", - "0 Industry low GHG steel 1 2.351494e+06 \n", - "1 Energy solar PV 1 1.950508e+06 \n", - "2 Human settlements recycling 1 2.518069e+06 \n", - "3 Energy energy storage 1 2.533548e+06 \n", - "4 Energy geothermal 1 4.934920e+06 \n", - "5 Transport fuel cell 1 3.971767e+06 \n", - "6 Energy grid management 1 4.049332e+06 \n", - "7 Energy concentrated solar power (CSP) 1 3.295332e+06 \n", - "8 Industry carbon capture and storage 1 3.016954e+06 \n", - "9 Human settlements waste management 1 2.354403e+06 \n", - "\n", - " sum \n", - "0 9.053252e+08 \n", - "1 5.890533e+08 \n", - "2 5.313126e+08 \n", - "3 4.813741e+08 \n", - "4 3.454444e+08 \n", - "5 3.137696e+08 \n", - "6 2.713052e+08 \n", - "7 2.240826e+08 \n", - "8 2.081698e+08 \n", - "9 1.812890e+08 " - ] - }, - "execution_count": 111, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cost_per_category.head(10)" - ] - }, - { - "cell_type": "code", - "execution_count": 112, - "metadata": {}, - "outputs": [], - "source": [ - "def grouping(cost_per_category, category, agg):\n", - " agg_df = cost_per_category.groupby(category).sum()[str(agg)].reset_index()\n", - " agg_df['percentage'] = agg_df[str(agg)]*100/agg_df[str(agg)].sum()\n", - " agg_df = agg_df.sort_values('percentage', ascending=False)\n", - " return agg_df " - ] - }, - { - "cell_type": "code", - "execution_count": 113, - "metadata": {}, - "outputs": [], - "source": [ - "tech_sum = grouping(cost_per_category, ['priority','technology'], 'sum') \n", - "sector_sum = grouping(cost_per_category, 'sector', 'sum') \n", - "priority_sum = grouping(cost_per_category, 'priority', 'sum') " - ] - }, - { - "cell_type": "code", - "execution_count": 114, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
prioritytechnologysumpercentage
361low GHG steel9.053252e+0814.955627
531solar PV5.890533e+089.730936
501recycling5.313126e+088.777082
221energy storage4.813741e+087.952116
271geothermal3.454444e+085.706610
251fuel cell3.137696e+085.183354
281grid management2.713052e+084.481859
181concentrated solar power (CSP)2.240826e+083.701759
131carbon capture and storage2.081698e+083.438886
601waste management1.812890e+082.994826
201district heating and/or cooling1.796751e+082.968163
571traffic management1.739252e+082.873178
151combined heat and power (CHP)1.710958e+082.826438
121biorefinery design1.488818e+082.459471
471offshore wind1.397011e+082.307808
551thermal storage1.042644e+081.722408
291heat pumps1.022275e+081.688759
91biofuel1.016568e+081.679331
191demand-side energy management8.734684e+071.442936
561tidal energy8.708085e+071.438542
211electric vehicle design8.568994e+071.415565
391nanomaterial6.605166e+071.091148
611wave energy5.983902e+070.988518
30low GHG data centers5.005764e+070.826933
381micro-grids4.002983e+070.661278
81biochar3.684225e+070.608620
241forest management3.622698e+070.598456
321liquefied natural gas (LNG)2.576755e+070.425670
261gas turbine2.552590e+070.421678
50soil management2.518647e+070.416071
...............
341low GHG lighting2.268657e+070.374773
141combined cycle power plant2.117825e+070.349857
481osmotic2.104861e+070.347715
441nutrient management2.047841e+070.338296
541solar heating1.541945e+070.254723
451ocean thermal energy conversion (OTEC)1.103012e+070.182213
581updraft1.097947e+070.181377
231energy-from-waste9.231716e+060.152504
161compressed air energy storage (CAES)9.101826e+060.150359
521regenerative braking8.978962e+060.148329
331livestock management8.962547e+060.148058
351low GHG public transport / rapid transit design7.996591e+060.132101
71agroforestry6.371010e+060.105247
311inert anode5.232146e+060.086433
61afforestation5.114686e+060.084493
431nuclear fusion3.960661e+060.065429
411non-motorised transport design3.831696e+060.063298
491passive house3.770893e+060.062294
461off-grid systems2.843348e+060.046971
591waste heat reuse2.299104e+060.037980
401new sources of protein2.005463e+060.033129
171compressed/converted natural gas1.886744e+060.031168
511reduced need for travel1.217982e+060.020121
421nuclear fission5.000000e+050.008260
20electron acceptor4.111080e+050.006791
40onshore wind2.000000e+050.003304
371manure management5.000000e+040.000826
10downdraft5.000000e+040.000826
301improved durability5.000000e+040.000826
00direct seeding5.000000e+040.000826
\n", - "

62 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " priority technology sum \\\n", - "36 1 low GHG steel 9.053252e+08 \n", - "53 1 solar PV 5.890533e+08 \n", - "50 1 recycling 5.313126e+08 \n", - "22 1 energy storage 4.813741e+08 \n", - "27 1 geothermal 3.454444e+08 \n", - "25 1 fuel cell 3.137696e+08 \n", - "28 1 grid management 2.713052e+08 \n", - "18 1 concentrated solar power (CSP) 2.240826e+08 \n", - "13 1 carbon capture and storage 2.081698e+08 \n", - "60 1 waste management 1.812890e+08 \n", - "20 1 district heating and/or cooling 1.796751e+08 \n", - "57 1 traffic management 1.739252e+08 \n", - "15 1 combined heat and power (CHP) 1.710958e+08 \n", - "12 1 biorefinery design 1.488818e+08 \n", - "47 1 offshore wind 1.397011e+08 \n", - "55 1 thermal storage 1.042644e+08 \n", - "29 1 heat pumps 1.022275e+08 \n", - "9 1 biofuel 1.016568e+08 \n", - "19 1 demand-side energy management 8.734684e+07 \n", - "56 1 tidal energy 8.708085e+07 \n", - "21 1 electric vehicle design 8.568994e+07 \n", - "39 1 nanomaterial 6.605166e+07 \n", - "61 1 wave energy 5.983902e+07 \n", - "3 0 low GHG data centers 5.005764e+07 \n", - "38 1 micro-grids 4.002983e+07 \n", - "8 1 biochar 3.684225e+07 \n", - "24 1 forest management 3.622698e+07 \n", - "32 1 liquefied natural gas (LNG) 2.576755e+07 \n", - "26 1 gas turbine 2.552590e+07 \n", - "5 0 soil management 2.518647e+07 \n", - ".. ... ... ... \n", - "34 1 low GHG lighting 2.268657e+07 \n", - "14 1 combined cycle power plant 2.117825e+07 \n", - "48 1 osmotic 2.104861e+07 \n", - "44 1 nutrient management 2.047841e+07 \n", - "54 1 solar heating 1.541945e+07 \n", - "45 1 ocean thermal energy conversion (OTEC) 1.103012e+07 \n", - "58 1 updraft 1.097947e+07 \n", - "23 1 energy-from-waste 9.231716e+06 \n", - "16 1 compressed air energy storage (CAES) 9.101826e+06 \n", - "52 1 regenerative braking 8.978962e+06 \n", - "33 1 livestock management 8.962547e+06 \n", - "35 1 low GHG public transport / rapid transit design 7.996591e+06 \n", - "7 1 agroforestry 6.371010e+06 \n", - "31 1 inert anode 5.232146e+06 \n", - "6 1 afforestation 5.114686e+06 \n", - "43 1 nuclear fusion 3.960661e+06 \n", - "41 1 non-motorised transport design 3.831696e+06 \n", - "49 1 passive house 3.770893e+06 \n", - "46 1 off-grid systems 2.843348e+06 \n", - "59 1 waste heat reuse 2.299104e+06 \n", - "40 1 new sources of protein 2.005463e+06 \n", - "17 1 compressed/converted natural gas 1.886744e+06 \n", - "51 1 reduced need for travel 1.217982e+06 \n", - "42 1 nuclear fission 5.000000e+05 \n", - "2 0 electron acceptor 4.111080e+05 \n", - "4 0 onshore wind 2.000000e+05 \n", - "37 1 manure management 5.000000e+04 \n", - "1 0 downdraft 5.000000e+04 \n", - "30 1 improved durability 5.000000e+04 \n", - "0 0 direct seeding 5.000000e+04 \n", - "\n", - " percentage \n", - "36 14.955627 \n", - "53 9.730936 \n", - "50 8.777082 \n", - "22 7.952116 \n", - "27 5.706610 \n", - "25 5.183354 \n", - "28 4.481859 \n", - "18 3.701759 \n", - "13 3.438886 \n", - "60 2.994826 \n", - "20 2.968163 \n", - "57 2.873178 \n", - "15 2.826438 \n", - "12 2.459471 \n", - "47 2.307808 \n", - "55 1.722408 \n", - "29 1.688759 \n", - "9 1.679331 \n", - "19 1.442936 \n", - "56 1.438542 \n", - "21 1.415565 \n", - "39 1.091148 \n", - "61 0.988518 \n", - "3 0.826933 \n", - "38 0.661278 \n", - "8 0.608620 \n", - "24 0.598456 \n", - "32 0.425670 \n", - "26 0.421678 \n", - "5 0.416071 \n", - ".. ... \n", - "34 0.374773 \n", - "14 0.349857 \n", - "48 0.347715 \n", - "44 0.338296 \n", - "54 0.254723 \n", - "45 0.182213 \n", - "58 0.181377 \n", - "23 0.152504 \n", - "16 0.150359 \n", - "52 0.148329 \n", - "33 0.148058 \n", - "35 0.132101 \n", - "7 0.105247 \n", - "31 0.086433 \n", - "6 0.084493 \n", - "43 0.065429 \n", - "41 0.063298 \n", - "49 0.062294 \n", - "46 0.046971 \n", - "59 0.037980 \n", - "40 0.033129 \n", - "17 0.031168 \n", - "51 0.020121 \n", - "42 0.008260 \n", - "2 0.006791 \n", - "4 0.003304 \n", - "37 0.000826 \n", - "1 0.000826 \n", - "30 0.000826 \n", - "0 0.000826 \n", - "\n", - "[62 rows x 4 columns]" - ] - }, - "execution_count": 114, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tech_sum#.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sectorsumpercentage
2Energy2.395613e+0939.574619
4Industry1.469098e+0924.268933
3Human settlements1.037377e+0917.137079
5Transport7.247211e+0811.972115
1Buildings2.849001e+084.706441
\n", - "
" - ], - "text/plain": [ - " sector sum percentage\n", - "2 Energy 2.395613e+09 39.574619\n", - "4 Industry 1.469098e+09 24.268933\n", - "3 Human settlements 1.037377e+09 17.137079\n", - "5 Transport 7.247211e+08 11.972115\n", - "1 Buildings 2.849001e+08 4.706441" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sector_sum.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
prioritysumpercentage
115.977453e+0998.745249
007.595522e+071.254751
\n", - "
" - ], - "text/plain": [ - " priority sum percentage\n", - "1 1 5.977453e+09 98.745249\n", - "0 0 7.595522e+07 1.254751" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "priority_sum.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sectortechnology_grouptechnologydescriptiontechnology_termsourcepriority
0EnergySolar energy sourcessolar PVAlso “solar photovoltaic”; technology of using...photovoltaic, PV, “solar cell”https://www.ctc-n.org/technologies/solar-pv1
1EnergySolar energy sourcesconcentrated solar power (CSP)Technology of producing electricity by concent...“concentrated solar power”, CSP, “concentrated...https://setis.ec.europa.eu/technologies/concen...1
2EnergySolar energy sourcessolar heatingTechnology of capturing the sun's radiation an...“solar heating”https://www.ctc-n.org/technologies/solar-heating1
3EnergySolar energy sourcessolar dryerTechnology of drying substances, especially fo...“solar dryer”https://www.ctc-n.org/technologies/solar-dryer0
4EnergySolar energy sourcessolar water pumpTechnology of powering electrical water pumps ...“solar water pump”https://www.ctc-n.org/technologies/solar-water...0
\n", - "
" - ], - "text/plain": [ - " sector technology_group technology \\\n", - "0 Energy Solar energy sources solar PV \n", - "1 Energy Solar energy sources concentrated solar power (CSP) \n", - "2 Energy Solar energy sources solar heating \n", - "3 Energy Solar energy sources solar dryer \n", - "4 Energy Solar energy sources solar water pump \n", - "\n", - " description \\\n", - "0 Also “solar photovoltaic”; technology of using... \n", - "1 Technology of producing electricity by concent... \n", - "2 Technology of capturing the sun's radiation an... \n", - "3 Technology of drying substances, especially fo... \n", - "4 Technology of powering electrical water pumps ... \n", - "\n", - " technology_term \\\n", - "0 photovoltaic, PV, “solar cell”  \n", - "1 “concentrated solar power”, CSP, “concentrated... \n", - "2 “solar heating” \n", - "3 “solar dryer” \n", - "4 “solar water pump” \n", - "\n", - " source priority \n", - "0 https://www.ctc-n.org/technologies/solar-pv 1 \n", - "1 https://setis.ec.europa.eu/technologies/concen... 1 \n", - "2 https://www.ctc-n.org/technologies/solar-heating 1 \n", - "3 https://www.ctc-n.org/technologies/solar-dryer 0 \n", - "4 https://www.ctc-n.org/technologies/solar-water... 0 " - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "categories.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [], - "source": [ - "extended_categories = pd.merge(technology_table, categories, \n", - " how = 'left', left_on = 'category', right_on = 'technology')" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "priority\n", - "0.0 42\n", - "1.0 2150\n", - "Name: id, dtype: int64" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "extended_categories.groupby('priority').count()['id']" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.506593906321055" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#categories v1\n", - "1114/(1085+1114)" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.9808394160583942" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#categories v2\n", - "2150/(42+2150)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "what about the money" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [], - "source": [ - "#extended_categories.to_csv('../data/extended_categories_v2.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
technology_termsidcategorysectortechnology_grouptechnologydescriptiontechnology_termsourcepriority
0forest management835398forest managementAgricultureForest protectionforest managementTechnology of increasing carbon stocks of stan...“forest management”, “management of forest”https://www.ctc-n.org/technologies/forest-mana...1.0
1forest management835541forest managementAgricultureForest protectionforest managementTechnology of increasing carbon stocks of stan...“forest management”, “management of forest”https://www.ctc-n.org/technologies/forest-mana...1.0
2bioplastic812602bioplasticsIndustryUse of innovative materialsbioplasticsTechnology of using renewable biomass material...bioplastichttps://www.ctc-n.org/technologies/bioplastics1.0
3DSM811592demand-side energy managementBuildingsDecreased energy consumption design or practicesdemand-side energy managementTechnology inside buildings that help ensure t...“demand-side energy management”, “demand-side ...https://www.ctc-n.org/technologies/energy-mana...1.0
4waste management847641waste managementHuman settlementsWaste managementwaste managementTechnology of maximising efficiency of resourc...“waste management”, “management of waste”https://www.ctc-n.org/technologies/integrated-...1.0
\n", - "
" - ], - "text/plain": [ - " technology_terms id category \\\n", - "0 forest management 835398 forest management \n", - "1 forest management 835541 forest management \n", - "2 bioplastic 812602 bioplastics \n", - "3 DSM 811592 demand-side energy management \n", - "4 waste management 847641 waste management \n", - "\n", - " sector technology_group \\\n", - "0 Agriculture Forest protection \n", - "1 Agriculture Forest protection \n", - "2 Industry Use of innovative materials \n", - "3 Buildings Decreased energy consumption design or practices \n", - "4 Human settlements Waste management \n", - "\n", - " technology \\\n", - "0 forest management \n", - "1 forest management \n", - "2 bioplastics \n", - "3 demand-side energy management \n", - "4 waste management \n", - "\n", - " description \\\n", - "0 Technology of increasing carbon stocks of stan... \n", - "1 Technology of increasing carbon stocks of stan... \n", - "2 Technology of using renewable biomass material... \n", - "3 Technology inside buildings that help ensure t... \n", - "4 Technology of maximising efficiency of resourc... \n", - "\n", - " technology_term \\\n", - "0 “forest management”, “management of forest” \n", - "1 “forest management”, “management of forest” \n", - "2 bioplastic \n", - "3 “demand-side energy management”, “demand-side ... \n", - "4 “waste management”, “management of waste” \n", - "\n", - " source priority \n", - "0 https://www.ctc-n.org/technologies/forest-mana... 1.0 \n", - "1 https://www.ctc-n.org/technologies/forest-mana... 1.0 \n", - "2 https://www.ctc-n.org/technologies/bioplastics 1.0 \n", - "3 https://www.ctc-n.org/technologies/energy-mana... 1.0 \n", - "4 https://www.ctc-n.org/technologies/integrated-... 1.0 " - ] - }, - "execution_count": 67, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "extended_categories.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": {}, - "outputs": [], - "source": [ - "categories_raw_minus_categories_extended = pd.merge(categories[['sector','technology_group','technology']], extended_categories, how='left', on='technology')" - ] - }, - { - "cell_type": "code", - "execution_count": 76, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2227" - ] - }, - "execution_count": 76, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(categories_raw_minus_categories_extended)" - ] - }, - { - "cell_type": "code", - "execution_count": 133, - "metadata": {}, - "outputs": [], - "source": [ - "not_invested = categories_raw_minus_categories_extended[categories_raw_minus_categories_extended.id.isnull()]\\\n", - " [['sector_x','technology_group_x','technology']]\\\n", - " .reset_index(drop=True)\\\n", - " .rename(columns={'sector_x':'sector','technology_group_x':'technology_group'})\\\n", - " .merge(categories[['technology','priority']], how='left',on='technology')\\\n", - " .drop_duplicates()" - ] - }, - { - "cell_type": "code", - "execution_count": 141, - "metadata": {}, - "outputs": [], - "source": [ - "not_invested.to_csv('../data/not_invested.csv',index=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 140, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sectortechnology_grouptechnologypriority
0EnergySolar energy sourcessolar dryer0
1EnergySolar energy sourcessolar water pump0
2EnergySystem innovationlow GHG transmission1
11TransportFuelelectric battery1
12TransportVehicle designlow GHG aircraft design1
\n", - "
" - ], - "text/plain": [ - " sector technology_group technology priority\n", - "0 Energy Solar energy sources solar dryer 0\n", - "1 Energy Solar energy sources solar water pump 0\n", - "2 Energy System innovation low GHG transmission 1\n", - "11 Transport Fuel electric battery 1\n", - "12 Transport Vehicle design low GHG aircraft design 1" - ] - }, - "execution_count": 140, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "not_invested.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "----\n", - "## Join calculation (parelell to tableau)" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [], - "source": [ - "cordis_data = pd.merge(extended_categories, inner_table, how='inner', on='id')" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['technology_terms', 'id', 'category', 'sector', 'technology_group',\n", - " 'technology', 'description', 'technology_term', 'source', 'priority',\n", - " 'rcn', 'acronym', 'status', 'programme', 'topics', 'frameworkProgramme',\n", - " 'title', 'startDate', 'endDate', 'projectUrl', 'objective', 'totalCost',\n", - " 'ecMaxContribution', 'call', 'fundingScheme', 'coordinator',\n", - " 'coordinatorCountry', 'participants', 'participantCountries',\n", - " 'subjects'],\n", - " dtype='object')" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cordis_data.columns" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---\n", - "#### CleanTech Projects" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1534" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(cordis_data.id.unique())" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [], - "source": [ - "priority_table = cordis_data.groupby('id').mean()['priority'].reset_index()" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
priorityid
00.00000036
10.5000004
20.6666671
31.0000001487
\n", - "
" - ], - "text/plain": [ - " priority id\n", - "0 0.000000 36\n", - "1 0.500000 4\n", - "2 0.666667 1\n", - "3 1.000000 1487" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "priority_summary = priority_table.groupby('priority').count().reset_index()\n", - "priority_summary" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This is because you might have a project with more than one technology and those technologies might be priority or not, so we take all the 1 as priority 0 otherwise" - ] - }, - { - "cell_type": "code", - "execution_count": 146, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Priority projects: 1487, 96.9361147327249\n", - "Non-priority projects: 47, 3.0638852672751016\n" - ] - } - ], - "source": [ - "priority = priority_summary.loc[3,'id']\n", - "total = len(cordis_data.id.unique())\n", - "\n", - "print('Priority projects: {}, {}'.format(priority, priority/total*100))\n", - "print('Non-priority projects: {}, {}'.format(total-priority, (1-(priority/total))*100))" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1528" - ] - }, - "execution_count": 59, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "priority_summary.id.sum()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---\n", - "#### EU contribution" - ] - }, - { - "cell_type": "code", - "execution_count": 168, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
priorityecMaxContribution
00.0000007.570522e+07
10.5000004.000000e+05
20.6666671.500000e+05
31.0000005.978837e+09
\n", - "
" - ], - "text/plain": [ - " priority ecMaxContribution\n", - "0 0.000000 7.570522e+07\n", - "1 0.500000 4.000000e+05\n", - "2 0.666667 1.500000e+05\n", - "3 1.000000 5.978837e+09" - ] - }, - "execution_count": 168, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "priority_conrtibution_summary = pd.merge(priority_table, cordis_data[['id','ecMaxContribution']], how='left', on='id')\n", - "priority_conrtibution_summary = priority_conrtibution_summary.groupby('priority').sum()['ecMaxContribution'].reset_index()\n", - "priority_conrtibution_summary" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This is because you might have a project with more than one technology and those technologies might be priority or not, so we take all the 1 as priority 0 otherwise" - ] - }, - { - "cell_type": "code", - "execution_count": 171, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Priority contribution: 5978.83690700002, 98.21835140837607\n", - "Non-priority contribution: 108.45413511998082, 1.78164859162393\n" - ] + "cells": [ + { + "cell_type": "markdown", + "source": [ + "### Overlap and assessment" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from matplotlib_venn import venn3, venn3_circles, venn2, venn2_circles\n", + "import matplotlib.pyplot as plt" + ], + "outputs": [], + "execution_count": 79, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "data_d = pd.read_csv('../data/tech.csv')\n", + "data_p = pd.read_csv('../data/tech2.csv')\n", + "print('Deterministic: {}\\nProbabilistic: {}'.format(len(data_d), len(data_p)))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Deterministic: 2052\n", + "Probabilistic: 7530\n" + ] + } + ], + "execution_count": 80, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "#print(len(table['link'].unique())) #all the unique links found in all the methods\n", + "plt.figure(figsize=(16, 12))\n", + "v = venn2([set(data_d.id), \n", + " set(data_p.id)], \n", + " set_labels = (\"Deterministic\", 'Probabilistic'),\n", + " set_colors=('#44b023', '#e8784f'), alpha=0.5)#\n", + "for text in v.set_labels:\n", + " text.set_fontsize(10)\n", + "for text in v.subset_labels:\n", + " text.set_fontsize(10)\n", + "c = venn2_circles([set(data_d.id), \n", + " set(data_p.id)], linestyle='solid', linewidth=0.6)\n", + "plt.title(\"Ovelap of Methods\")\n", + "#plt.savefig('../plots/venn-methods.png')\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": [ + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {} + } + ], + "execution_count": 81, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "data_inner = pd.merge(data_p,data_d,\n", + " how='inner',on='id',suffixes=('_prob', '_det'))" + ], + "outputs": [], + "execution_count": 82, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "print(len(data_inner))\n", + "print(len(list(set(list(data_d.id)) - set(list(data_p.id)))))\n", + "print(len(list(set(list(data_p.id)) - set(list(data_d.id)))))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "1534\n", + "518\n", + "5996\n" + ] + } + ], + "execution_count": 83, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "---" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "How does it look the ones that match on deterministic but not probabilistic? \n", + "How does it look all the way around?" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "#data_inner.columns\n", + "inner_table = data_inner[['id','category']]\n", + "inner_table.groupby('category').count()['id'].sort_values().head()#.plot()" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 84, + "data": { + "text/plain": [ + "category\n", + "Building design nanomaterial 1\n", + "heat pump Vehicle design 1\n", + "heat pump energy storage 1\n", + "heat pump energy tower 1\n", + "heat pump renewable energy 1\n", + "Name: id, dtype: int64" + ] + }, + "metadata": {} + } + ], + "execution_count": 84, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "#random choice\n", + "#sample_list = list(np.random.choice(list(inner_table.id), 91, replace=False))" + ], + "outputs": [], + "execution_count": 86, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "cordish2020 = pd.read_excel('../data/cordis-h2020projects.xlsx')" + ], + "outputs": [], + "execution_count": 87, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "#create a sample set to assess\n", + "#cordish2020[cordish2020.id.isin(sample_list)][['title','objective','coordinator']].to_csv('../data/sample.csv')" + ], + "outputs": [], + "execution_count": 88, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "#inner_table.to_csv('../data/inner_table.csv') #only once is need\n", + "inner_table = cordish2020[cordish2020.id.isin(list(inner_table.id))]" + ], + "outputs": [], + "execution_count": 89, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "inner_table.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 96, + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rcnidacronymstatusprogrammetopicsframeworkProgrammetitlestartDateendDate...objectivetotalCostecMaxContributioncallfundingSchemecoordinatorcoordinatorCountryparticipantsparticipantCountriessubjects
52216738353983D-FOGRODSIGNEDH2020-EU.1.3.2.MSCA-IF-2018H2020Understanding forest growth dynamics using nov...2019-10-012021-09-30...Forest ecosystems are an essential terrestrial...178320.00178320.00H2020-MSCA-IF-2018MSCA-IF-EF-STUNIVERSITEIT GENTBENaNNaNNaN
16221633835541MOVESSIGNEDH2020-EU.1.3.2.MSCA-IF-2018H2020MOnitoring VEgetation status and functioning a...2019-10-012021-09-30...Leaf Area Index (LAI), Fraction of green Veget...172932.48172932.48H2020-MSCA-IF-2018MSCA-IF-EF-STCENTRO DE INVESTIGACION ECOLOGICA Y APLICACION...ESNaNNaNNaN
33217649812602POLIPOCLOSEDH2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.EIC-SMEInst-2018-2020H2020The first non-fermentative production process ...2018-06-012018-11-30...POLìPO S.r.l. has developed the first chemica...71429.0050000.00H2020-SMEInst-2018-2020-1SME-1POLIPO SRLITNaNNaNNaN
43217808811592NUTRI-NEEDSIGNEDH2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.EIC-SMEInst-2018-2020H2020Final development, clinical validation and lau...2018-06-012020-05-31...People want to live a long and healthy life an...3567000.002496900.00H2020-SMEInst-2018-2020-2SME-2NUTRILEADS BVNLNaNNaNNaN
45223666847641MICADOSIGNEDH2020-Euratom-1.7.;H2020-Euratom-1.1.NFRP-2018-10H2020Measurement and Instrumentation for Cleaning A...2019-06-012022-05-31...The goal of the MICADO project is to propose a...4986643.754442162.50NFRP-2018IACOSTRUZIONI APPARECCHIATURE ELETTRONICHE NUCLE...ITCOMMISSARIAT A L ENERGIE ATOMIQUE ET AUX ENERG...FR;DE;IT;BE;CZNaN
\n", + "

5 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " rcn id acronym status \\\n", + "5 221673 835398 3D-FOGROD SIGNED \n", + "16 221633 835541 MOVES SIGNED \n", + "33 217649 812602 POLIPO CLOSED \n", + "43 217808 811592 NUTRI-NEED SIGNED \n", + "45 223666 847641 MICADO SIGNED \n", + "\n", + " programme topics \\\n", + "5 H2020-EU.1.3.2. MSCA-IF-2018 \n", + "16 H2020-EU.1.3.2. MSCA-IF-2018 \n", + "33 H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1. EIC-SMEInst-2018-2020 \n", + "43 H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1. EIC-SMEInst-2018-2020 \n", + "45 H2020-Euratom-1.7.;H2020-Euratom-1.1. NFRP-2018-10 \n", + "\n", + " frameworkProgramme title \\\n", + "5 H2020 Understanding forest growth dynamics using nov... \n", + "16 H2020 MOnitoring VEgetation status and functioning a... \n", + "33 H2020 The first non-fermentative production process ... \n", + "43 H2020 Final development, clinical validation and lau... \n", + "45 H2020 Measurement and Instrumentation for Cleaning A... \n", + "\n", + " startDate endDate ... \\\n", + "5 2019-10-01 2021-09-30 ... \n", + "16 2019-10-01 2021-09-30 ... \n", + "33 2018-06-01 2018-11-30 ... \n", + "43 2018-06-01 2020-05-31 ... \n", + "45 2019-06-01 2022-05-31 ... \n", + "\n", + " objective totalCost \\\n", + "5 Forest ecosystems are an essential terrestrial... 178320.00 \n", + "16 Leaf Area Index (LAI), Fraction of green Veget... 172932.48 \n", + "33 POLìPO S.r.l. has developed the first chemica... 71429.00 \n", + "43 People want to live a long and healthy life an... 3567000.00 \n", + "45 The goal of the MICADO project is to propose a... 4986643.75 \n", + "\n", + " ecMaxContribution call fundingScheme \\\n", + "5 178320.00 H2020-MSCA-IF-2018 MSCA-IF-EF-ST \n", + "16 172932.48 H2020-MSCA-IF-2018 MSCA-IF-EF-ST \n", + "33 50000.00 H2020-SMEInst-2018-2020-1 SME-1 \n", + "43 2496900.00 H2020-SMEInst-2018-2020-2 SME-2 \n", + "45 4442162.50 NFRP-2018 IA \n", + "\n", + " coordinator coordinatorCountry \\\n", + "5 UNIVERSITEIT GENT BE \n", + "16 CENTRO DE INVESTIGACION ECOLOGICA Y APLICACION... ES \n", + "33 POLIPO SRL IT \n", + "43 NUTRILEADS BV NL \n", + "45 COSTRUZIONI APPARECCHIATURE ELETTRONICHE NUCLE... IT \n", + "\n", + " participants participantCountries \\\n", + "5 NaN NaN \n", + "16 NaN NaN \n", + "33 NaN NaN \n", + "43 NaN NaN \n", + "45 COMMISSARIAT A L ENERGIE ATOMIQUE ET AUX ENERG... FR;DE;IT;BE;CZ \n", + "\n", + " subjects \n", + "5 NaN \n", + "16 NaN \n", + "33 NaN \n", + "43 NaN \n", + "45 NaN \n", + "\n", + "[5 rows x 21 columns]" + ] + }, + "metadata": {} + } + ], + "execution_count": 96, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "---" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "### After assessment" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "sample_lab = pd.read_csv('../data/sample4tag.csv')" + ], + "outputs": [], + "execution_count": 90, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "sample_lab.label.sum()*100/len(sample_lab)" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 91, + "data": { + "text/plain": [ + "93.4065934065934" + ] + }, + "metadata": {} + } + ], + "execution_count": 91, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "---\n", + "### Demanded technologies" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "categories = pd.read_excel('../data/categories_v2.xls')\n", + "categories = categories.fillna(method='ffill')\n", + "categories.columns = ['sector','technology_group','technology','description','technology_term','source','priority']\n", + "categories.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 92, + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sectortechnology_grouptechnologydescriptiontechnology_termsourcepriority
0EnergySolar energy sourcessolar PVAlso “solar photovoltaic”; technology of using...photovoltaic, PV, “solar cell”https://www.ctc-n.org/technologies/solar-pv1
1EnergySolar energy sourcesconcentrated solar power (CSP)Technology of producing electricity by concent...“concentrated solar power”, CSP, “concentrated...https://setis.ec.europa.eu/technologies/concen...1
2EnergySolar energy sourcessolar heatingTechnology of capturing the sun's radiation an...“solar heating”https://www.ctc-n.org/technologies/solar-heating1
3EnergySolar energy sourcessolar dryerTechnology of drying substances, especially fo...“solar dryer”https://www.ctc-n.org/technologies/solar-dryer0
4EnergySolar energy sourcessolar water pumpTechnology of powering electrical water pumps ...“solar water pump”https://www.ctc-n.org/technologies/solar-water...0
\n", + "
" + ], + "text/plain": [ + " sector technology_group technology \\\n", + "0 Energy Solar energy sources solar PV \n", + "1 Energy Solar energy sources concentrated solar power (CSP) \n", + "2 Energy Solar energy sources solar heating \n", + "3 Energy Solar energy sources solar dryer \n", + "4 Energy Solar energy sources solar water pump \n", + "\n", + " description \\\n", + "0 Also “solar photovoltaic”; technology of using... \n", + "1 Technology of producing electricity by concent... \n", + "2 Technology of capturing the sun's radiation an... \n", + "3 Technology of drying substances, especially fo... \n", + "4 Technology of powering electrical water pumps ... \n", + "\n", + " technology_term \\\n", + "0 photovoltaic, PV, “solar cell”  \n", + "1 “concentrated solar power”, CSP, “concentrated... \n", + "2 “solar heating” \n", + "3 “solar dryer” \n", + "4 “solar water pump” \n", + "\n", + " source priority \n", + "0 https://www.ctc-n.org/technologies/solar-pv 1 \n", + "1 https://setis.ec.europa.eu/technologies/concen... 1 \n", + "2 https://www.ctc-n.org/technologies/solar-heating 1 \n", + "3 https://www.ctc-n.org/technologies/solar-dryer 0 \n", + "4 https://www.ctc-n.org/technologies/solar-water... 0 " + ] + }, + "metadata": {} + } + ], + "execution_count": 92, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "len(categories.technology)" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 93, + "data": { + "text/plain": [ + "97" + ] + }, + "metadata": {} + } + ], + "execution_count": 93, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "categories.priority.sum()" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 94, + "data": { + "text/plain": [ + "76" + ] + }, + "metadata": {} + } + ], + "execution_count": 94, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "#Percentage of priority from the catalogue\n", + "100*categories.priority.sum()/len(categories.technology)" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 95, + "data": { + "text/plain": [ + "78.35051546391753" + ] + }, + "metadata": {} + } + ], + "execution_count": 95, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "---\n", + "### Supplied technologies" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "technology_table = pd.DataFrame()\n", + "# unnest the messy matches\n", + "list_technologies = data_inner['matches_technology_det']\n", + "number_matches, cleaned_lists = [], []\n", + "for l in list_technologies:\n", + " new_l = l.replace(\"[\",\"\").replace(\"]\",\"\").replace(\" '\",\"\").replace(\"'\",\"\").replace(\"'\",\"\").split(',')\n", + " cleaned_lists.append(new_l)\n", + " number_matches.append(len(new_l))\n", + "technology_table['technology_terms'] = [y for x in cleaned_lists for y in x] \n", + "#len(number_matches) == len(data_inner.id)\n", + "#create a list with project id that equals the terms\n", + "ids = [[i]*j for i,j in zip(inner_table.id, number_matches)] \n", + "technology_table['id'] = [y for x in ids for y in x]" + ], + "outputs": [], + "execution_count": 99, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "technology_table.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 101, + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
technology_termsid
0forest management835398
1forest management835541
2bioplastic812602
3DSM811592
4waste management847641
\n", + "
" + ], + "text/plain": [ + " technology_terms id\n", + "0 forest management 835398\n", + "1 forest management 835541\n", + "2 bioplastic 812602\n", + "3 DSM 811592\n", + "4 waste management 847641" + ] + }, + "metadata": {} + } + ], + "execution_count": 101, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "categories.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 102, + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sectortechnology_grouptechnologydescriptiontechnology_termsourcepriority
0EnergySolar energy sourcessolar PVAlso “solar photovoltaic”; technology of using...photovoltaic, PV, “solar cell”https://www.ctc-n.org/technologies/solar-pv1
1EnergySolar energy sourcesconcentrated solar power (CSP)Technology of producing electricity by concent...“concentrated solar power”, CSP, “concentrated...https://setis.ec.europa.eu/technologies/concen...1
2EnergySolar energy sourcessolar heatingTechnology of capturing the sun's radiation an...“solar heating”https://www.ctc-n.org/technologies/solar-heating1
3EnergySolar energy sourcessolar dryerTechnology of drying substances, especially fo...“solar dryer”https://www.ctc-n.org/technologies/solar-dryer0
4EnergySolar energy sourcessolar water pumpTechnology of powering electrical water pumps ...“solar water pump”https://www.ctc-n.org/technologies/solar-water...0
\n", + "
" + ], + "text/plain": [ + " sector technology_group technology \\\n", + "0 Energy Solar energy sources solar PV \n", + "1 Energy Solar energy sources concentrated solar power (CSP) \n", + "2 Energy Solar energy sources solar heating \n", + "3 Energy Solar energy sources solar dryer \n", + "4 Energy Solar energy sources solar water pump \n", + "\n", + " description \\\n", + "0 Also “solar photovoltaic”; technology of using... \n", + "1 Technology of producing electricity by concent... \n", + "2 Technology of capturing the sun's radiation an... \n", + "3 Technology of drying substances, especially fo... \n", + "4 Technology of powering electrical water pumps ... \n", + "\n", + " technology_term \\\n", + "0 photovoltaic, PV, “solar cell”  \n", + "1 “concentrated solar power”, CSP, “concentrated... \n", + "2 “solar heating” \n", + "3 “solar dryer” \n", + "4 “solar water pump” \n", + "\n", + " source priority \n", + "0 https://www.ctc-n.org/technologies/solar-pv 1 \n", + "1 https://setis.ec.europa.eu/technologies/concen... 1 \n", + "2 https://www.ctc-n.org/technologies/solar-heating 1 \n", + "3 https://www.ctc-n.org/technologies/solar-dryer 0 \n", + "4 https://www.ctc-n.org/technologies/solar-water... 0 " + ] + }, + "metadata": {} + } + ], + "execution_count": 102, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "matrix = []\n", + "for term in categories['technology_term']:\n", + " row = [x.strip() for x in term.split(',')]\n", + " row = [i.replace('“', '').replace('”', '') for i in row]\n", + " matrix.append(row)" + ], + "outputs": [], + "execution_count": 103, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "tech_dict = {key: value for (key, value) in zip(categories.technology, matrix)}" + ], + "outputs": [], + "execution_count": 104, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "technology_table['category'] = None" + ], + "outputs": [], + "execution_count": 105, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "for i in technology_table.index:\n", + " for k,v in tech_dict.items():\n", + " for j in v:\n", + " if technology_table.loc[i,'technology_terms'] == j:\n", + " technology_table.loc[i, 'category'] = k" + ], + "outputs": [], + "execution_count": 106, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "technology_table.sort_values('id').head() #repeated ids" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 107, + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
technology_termsidcategory
1405recycling633192recycling
1404efficient lighting633192low GHG lighting
845recycling633962recycling
844bioplastic633962bioplastics
935soil management635201soil management
\n", + "
" + ], + "text/plain": [ + " technology_terms id category\n", + "1405 recycling 633192 recycling\n", + "1404 efficient lighting 633192 low GHG lighting\n", + "845 recycling 633962 recycling\n", + "844 bioplastic 633962 bioplastics\n", + "935 soil management 635201 soil management" + ] + }, + "metadata": {} + } + ], + "execution_count": 107, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "technology_table.groupby('category').count()['id'].sort_values(ascending=False)" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 108, + "data": { + "text/plain": [ + "category\n", + "low GHG steel 385\n", + "solar PV 302\n", + "recycling 211\n", + "energy storage 190\n", + "fuel cell 79\n", + "waste management 77\n", + "geothermal 70\n", + "carbon capture and storage 69\n", + "concentrated solar power (CSP) 68\n", + "grid management 67\n", + "combined heat and power (CHP) 58\n", + "electric vehicle design 43\n", + "biofuel 42\n", + "district heating and/or cooling 40\n", + "traffic management 39\n", + "heat pumps 37\n", + "offshore wind 37\n", + "wave energy 29\n", + "nanomaterial 28\n", + "forest management 27\n", + "biorefinery design 25\n", + "thermal storage 25\n", + "demand-side energy management 24\n", + "low GHG data centers 24\n", + "tidal energy 20\n", + "biopolymer 17\n", + "bioplastics 14\n", + "micro-grids 13\n", + "liquefied natural gas (LNG) 12\n", + "soil management 10\n", + " ... \n", + "afforestation 8\n", + "solar heating 8\n", + "combined cycle power plant 6\n", + "biochar 6\n", + "osmotic 5\n", + "energy-from-waste 5\n", + "nutrient management 5\n", + "compressed/converted natural gas 4\n", + "onshore wind 4\n", + "regenerative braking 4\n", + "ocean thermal energy conversion (OTEC) 4\n", + "compressed air energy storage (CAES) 3\n", + "off-grid systems 3\n", + "livestock management 3\n", + "nuclear fusion 3\n", + "agroforestry 3\n", + "updraft 2\n", + "non-motorised transport design 2\n", + "passive house 2\n", + "electron acceptor 2\n", + "reduced need for travel 1\n", + "waste heat reuse 1\n", + "inert anode 1\n", + "improved durability 1\n", + "downdraft 1\n", + "nuclear fission 1\n", + "new sources of protein 1\n", + "manure management 1\n", + "low GHG public transport / rapid transit design 1\n", + "direct seeding 1\n", + "Name: id, Length: 62, dtype: int64" + ] + }, + "metadata": {} + } + ], + "execution_count": 108, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "#put the money and the rest of features\n", + "cordish2020[['id','ecMaxContribution']].head() #'acronym','title','startDate','objective'," + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 109, + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idecMaxContribution
083775019630411.18
18181846004252.50
28151474991323.75
38176425995904.00
48257314017817.50
\n", + "
" + ], + "text/plain": [ + " id ecMaxContribution\n", + "0 837750 19630411.18\n", + "1 818184 6004252.50\n", + "2 815147 4991323.75\n", + "3 817642 5995904.00\n", + "4 825731 4017817.50" + ] + }, + "metadata": {} + } + ], + "execution_count": 109, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "cost_per_category = pd.merge(technology_table, cordish2020[['id','ecMaxContribution']], how='left', on='id')\n", + "cost_per_category = cost_per_category.groupby('category').agg(\n", + " ['mean','sum'])['ecMaxContribution'].sort_values(['sum','mean'], ascending=False).reset_index()\n", + "cost_per_category = pd.merge(cost_per_category, \n", + " categories[['sector','technology','priority']], how='left',\n", + " left_on='category', right_on='technology')\n", + "cost_per_category = cost_per_category[['sector','technology','priority','mean','sum']]" + ], + "outputs": [], + "execution_count": 110, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "cost_per_category.head(10)" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 111, + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sectortechnologyprioritymeansum
0Industrylow GHG steel12.351494e+069.053252e+08
1Energysolar PV11.950508e+065.890533e+08
2Human settlementsrecycling12.518069e+065.313126e+08
3Energyenergy storage12.533548e+064.813741e+08
4Energygeothermal14.934920e+063.454444e+08
5Transportfuel cell13.971767e+063.137696e+08
6Energygrid management14.049332e+062.713052e+08
7Energyconcentrated solar power (CSP)13.295332e+062.240826e+08
8Industrycarbon capture and storage13.016954e+062.081698e+08
9Human settlementswaste management12.354403e+061.812890e+08
\n", + "
" + ], + "text/plain": [ + " sector technology priority mean \\\n", + "0 Industry low GHG steel 1 2.351494e+06 \n", + "1 Energy solar PV 1 1.950508e+06 \n", + "2 Human settlements recycling 1 2.518069e+06 \n", + "3 Energy energy storage 1 2.533548e+06 \n", + "4 Energy geothermal 1 4.934920e+06 \n", + "5 Transport fuel cell 1 3.971767e+06 \n", + "6 Energy grid management 1 4.049332e+06 \n", + "7 Energy concentrated solar power (CSP) 1 3.295332e+06 \n", + "8 Industry carbon capture and storage 1 3.016954e+06 \n", + "9 Human settlements waste management 1 2.354403e+06 \n", + "\n", + " sum \n", + "0 9.053252e+08 \n", + "1 5.890533e+08 \n", + "2 5.313126e+08 \n", + "3 4.813741e+08 \n", + "4 3.454444e+08 \n", + "5 3.137696e+08 \n", + "6 2.713052e+08 \n", + "7 2.240826e+08 \n", + "8 2.081698e+08 \n", + "9 1.812890e+08 " + ] + }, + "metadata": {} + } + ], + "execution_count": 111, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "def grouping(cost_per_category, category, agg):\n", + " agg_df = cost_per_category.groupby(category).sum()[str(agg)].reset_index()\n", + " agg_df['percentage'] = agg_df[str(agg)]*100/agg_df[str(agg)].sum()\n", + " agg_df = agg_df.sort_values('percentage', ascending=False)\n", + " return agg_df " + ], + "outputs": [], + "execution_count": 112, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "tech_sum = grouping(cost_per_category, ['priority','technology'], 'sum') \n", + "sector_sum = grouping(cost_per_category, 'sector', 'sum') \n", + "priority_sum = grouping(cost_per_category, 'priority', 'sum') " + ], + "outputs": [], + "execution_count": 113, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "tech_sum#.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 114, + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
prioritytechnologysumpercentage
361low GHG steel9.053252e+0814.955627
531solar PV5.890533e+089.730936
501recycling5.313126e+088.777082
221energy storage4.813741e+087.952116
271geothermal3.454444e+085.706610
251fuel cell3.137696e+085.183354
281grid management2.713052e+084.481859
181concentrated solar power (CSP)2.240826e+083.701759
131carbon capture and storage2.081698e+083.438886
601waste management1.812890e+082.994826
201district heating and/or cooling1.796751e+082.968163
571traffic management1.739252e+082.873178
151combined heat and power (CHP)1.710958e+082.826438
121biorefinery design1.488818e+082.459471
471offshore wind1.397011e+082.307808
551thermal storage1.042644e+081.722408
291heat pumps1.022275e+081.688759
91biofuel1.016568e+081.679331
191demand-side energy management8.734684e+071.442936
561tidal energy8.708085e+071.438542
211electric vehicle design8.568994e+071.415565
391nanomaterial6.605166e+071.091148
611wave energy5.983902e+070.988518
30low GHG data centers5.005764e+070.826933
381micro-grids4.002983e+070.661278
81biochar3.684225e+070.608620
241forest management3.622698e+070.598456
321liquefied natural gas (LNG)2.576755e+070.425670
261gas turbine2.552590e+070.421678
50soil management2.518647e+070.416071
...............
341low GHG lighting2.268657e+070.374773
141combined cycle power plant2.117825e+070.349857
481osmotic2.104861e+070.347715
441nutrient management2.047841e+070.338296
541solar heating1.541945e+070.254723
451ocean thermal energy conversion (OTEC)1.103012e+070.182213
581updraft1.097947e+070.181377
231energy-from-waste9.231716e+060.152504
161compressed air energy storage (CAES)9.101826e+060.150359
521regenerative braking8.978962e+060.148329
331livestock management8.962547e+060.148058
351low GHG public transport / rapid transit design7.996591e+060.132101
71agroforestry6.371010e+060.105247
311inert anode5.232146e+060.086433
61afforestation5.114686e+060.084493
431nuclear fusion3.960661e+060.065429
411non-motorised transport design3.831696e+060.063298
491passive house3.770893e+060.062294
461off-grid systems2.843348e+060.046971
591waste heat reuse2.299104e+060.037980
401new sources of protein2.005463e+060.033129
171compressed/converted natural gas1.886744e+060.031168
511reduced need for travel1.217982e+060.020121
421nuclear fission5.000000e+050.008260
20electron acceptor4.111080e+050.006791
40onshore wind2.000000e+050.003304
371manure management5.000000e+040.000826
10downdraft5.000000e+040.000826
301improved durability5.000000e+040.000826
00direct seeding5.000000e+040.000826
\n", + "

62 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " priority technology sum \\\n", + "36 1 low GHG steel 9.053252e+08 \n", + "53 1 solar PV 5.890533e+08 \n", + "50 1 recycling 5.313126e+08 \n", + "22 1 energy storage 4.813741e+08 \n", + "27 1 geothermal 3.454444e+08 \n", + "25 1 fuel cell 3.137696e+08 \n", + "28 1 grid management 2.713052e+08 \n", + "18 1 concentrated solar power (CSP) 2.240826e+08 \n", + "13 1 carbon capture and storage 2.081698e+08 \n", + "60 1 waste management 1.812890e+08 \n", + "20 1 district heating and/or cooling 1.796751e+08 \n", + "57 1 traffic management 1.739252e+08 \n", + "15 1 combined heat and power (CHP) 1.710958e+08 \n", + "12 1 biorefinery design 1.488818e+08 \n", + "47 1 offshore wind 1.397011e+08 \n", + "55 1 thermal storage 1.042644e+08 \n", + "29 1 heat pumps 1.022275e+08 \n", + "9 1 biofuel 1.016568e+08 \n", + "19 1 demand-side energy management 8.734684e+07 \n", + "56 1 tidal energy 8.708085e+07 \n", + "21 1 electric vehicle design 8.568994e+07 \n", + "39 1 nanomaterial 6.605166e+07 \n", + "61 1 wave energy 5.983902e+07 \n", + "3 0 low GHG data centers 5.005764e+07 \n", + "38 1 micro-grids 4.002983e+07 \n", + "8 1 biochar 3.684225e+07 \n", + "24 1 forest management 3.622698e+07 \n", + "32 1 liquefied natural gas (LNG) 2.576755e+07 \n", + "26 1 gas turbine 2.552590e+07 \n", + "5 0 soil management 2.518647e+07 \n", + ".. ... ... ... \n", + "34 1 low GHG lighting 2.268657e+07 \n", + "14 1 combined cycle power plant 2.117825e+07 \n", + "48 1 osmotic 2.104861e+07 \n", + "44 1 nutrient management 2.047841e+07 \n", + "54 1 solar heating 1.541945e+07 \n", + "45 1 ocean thermal energy conversion (OTEC) 1.103012e+07 \n", + "58 1 updraft 1.097947e+07 \n", + "23 1 energy-from-waste 9.231716e+06 \n", + "16 1 compressed air energy storage (CAES) 9.101826e+06 \n", + "52 1 regenerative braking 8.978962e+06 \n", + "33 1 livestock management 8.962547e+06 \n", + "35 1 low GHG public transport / rapid transit design 7.996591e+06 \n", + "7 1 agroforestry 6.371010e+06 \n", + "31 1 inert anode 5.232146e+06 \n", + "6 1 afforestation 5.114686e+06 \n", + "43 1 nuclear fusion 3.960661e+06 \n", + "41 1 non-motorised transport design 3.831696e+06 \n", + "49 1 passive house 3.770893e+06 \n", + "46 1 off-grid systems 2.843348e+06 \n", + "59 1 waste heat reuse 2.299104e+06 \n", + "40 1 new sources of protein 2.005463e+06 \n", + "17 1 compressed/converted natural gas 1.886744e+06 \n", + "51 1 reduced need for travel 1.217982e+06 \n", + "42 1 nuclear fission 5.000000e+05 \n", + "2 0 electron acceptor 4.111080e+05 \n", + "4 0 onshore wind 2.000000e+05 \n", + "37 1 manure management 5.000000e+04 \n", + "1 0 downdraft 5.000000e+04 \n", + "30 1 improved durability 5.000000e+04 \n", + "0 0 direct seeding 5.000000e+04 \n", + "\n", + " percentage \n", + "36 14.955627 \n", + "53 9.730936 \n", + "50 8.777082 \n", + "22 7.952116 \n", + "27 5.706610 \n", + "25 5.183354 \n", + "28 4.481859 \n", + "18 3.701759 \n", + "13 3.438886 \n", + "60 2.994826 \n", + "20 2.968163 \n", + "57 2.873178 \n", + "15 2.826438 \n", + "12 2.459471 \n", + "47 2.307808 \n", + "55 1.722408 \n", + "29 1.688759 \n", + "9 1.679331 \n", + "19 1.442936 \n", + "56 1.438542 \n", + "21 1.415565 \n", + "39 1.091148 \n", + "61 0.988518 \n", + "3 0.826933 \n", + "38 0.661278 \n", + "8 0.608620 \n", + "24 0.598456 \n", + "32 0.425670 \n", + "26 0.421678 \n", + "5 0.416071 \n", + ".. ... \n", + "34 0.374773 \n", + "14 0.349857 \n", + "48 0.347715 \n", + "44 0.338296 \n", + "54 0.254723 \n", + "45 0.182213 \n", + "58 0.181377 \n", + "23 0.152504 \n", + "16 0.150359 \n", + "52 0.148329 \n", + "33 0.148058 \n", + "35 0.132101 \n", + "7 0.105247 \n", + "31 0.086433 \n", + "6 0.084493 \n", + "43 0.065429 \n", + "41 0.063298 \n", + "49 0.062294 \n", + "46 0.046971 \n", + "59 0.037980 \n", + "40 0.033129 \n", + "17 0.031168 \n", + "51 0.020121 \n", + "42 0.008260 \n", + "2 0.006791 \n", + "4 0.003304 \n", + "37 0.000826 \n", + "1 0.000826 \n", + "30 0.000826 \n", + "0 0.000826 \n", + "\n", + "[62 rows x 4 columns]" + ] + }, + "metadata": {} + } + ], + "execution_count": 114, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "sector_sum.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 32, + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sectorsumpercentage
2Energy2.395613e+0939.574619
4Industry1.469098e+0924.268933
3Human settlements1.037377e+0917.137079
5Transport7.247211e+0811.972115
1Buildings2.849001e+084.706441
\n", + "
" + ], + "text/plain": [ + " sector sum percentage\n", + "2 Energy 2.395613e+09 39.574619\n", + "4 Industry 1.469098e+09 24.268933\n", + "3 Human settlements 1.037377e+09 17.137079\n", + "5 Transport 7.247211e+08 11.972115\n", + "1 Buildings 2.849001e+08 4.706441" + ] + }, + "metadata": {} + } + ], + "execution_count": 32, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "priority_sum.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 33, + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
prioritysumpercentage
115.977453e+0998.745249
007.595522e+071.254751
\n", + "
" + ], + "text/plain": [ + " priority sum percentage\n", + "1 1 5.977453e+09 98.745249\n", + "0 0 7.595522e+07 1.254751" + ] + }, + "metadata": {} + } + ], + "execution_count": 33, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "categories.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 34, + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sectortechnology_grouptechnologydescriptiontechnology_termsourcepriority
0EnergySolar energy sourcessolar PVAlso “solar photovoltaic”; technology of using...photovoltaic, PV, “solar cell”https://www.ctc-n.org/technologies/solar-pv1
1EnergySolar energy sourcesconcentrated solar power (CSP)Technology of producing electricity by concent...“concentrated solar power”, CSP, “concentrated...https://setis.ec.europa.eu/technologies/concen...1
2EnergySolar energy sourcessolar heatingTechnology of capturing the sun's radiation an...“solar heating”https://www.ctc-n.org/technologies/solar-heating1
3EnergySolar energy sourcessolar dryerTechnology of drying substances, especially fo...“solar dryer”https://www.ctc-n.org/technologies/solar-dryer0
4EnergySolar energy sourcessolar water pumpTechnology of powering electrical water pumps ...“solar water pump”https://www.ctc-n.org/technologies/solar-water...0
\n", + "
" + ], + "text/plain": [ + " sector technology_group technology \\\n", + "0 Energy Solar energy sources solar PV \n", + "1 Energy Solar energy sources concentrated solar power (CSP) \n", + "2 Energy Solar energy sources solar heating \n", + "3 Energy Solar energy sources solar dryer \n", + "4 Energy Solar energy sources solar water pump \n", + "\n", + " description \\\n", + "0 Also “solar photovoltaic”; technology of using... \n", + "1 Technology of producing electricity by concent... \n", + "2 Technology of capturing the sun's radiation an... \n", + "3 Technology of drying substances, especially fo... \n", + "4 Technology of powering electrical water pumps ... \n", + "\n", + " technology_term \\\n", + "0 photovoltaic, PV, “solar cell”  \n", + "1 “concentrated solar power”, CSP, “concentrated... \n", + "2 “solar heating” \n", + "3 “solar dryer” \n", + "4 “solar water pump” \n", + "\n", + " source priority \n", + "0 https://www.ctc-n.org/technologies/solar-pv 1 \n", + "1 https://setis.ec.europa.eu/technologies/concen... 1 \n", + "2 https://www.ctc-n.org/technologies/solar-heating 1 \n", + "3 https://www.ctc-n.org/technologies/solar-dryer 0 \n", + "4 https://www.ctc-n.org/technologies/solar-water... 0 " + ] + }, + "metadata": {} + } + ], + "execution_count": 34, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "extended_categories = pd.merge(technology_table, categories, \n", + " how = 'left', left_on = 'category', right_on = 'technology')" + ], + "outputs": [], + "execution_count": 35, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "extended_categories.groupby('priority').count()['id']" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 36, + "data": { + "text/plain": [ + "priority\n", + "0.0 42\n", + "1.0 2150\n", + "Name: id, dtype: int64" + ] + }, + "metadata": {} + } + ], + "execution_count": 36, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "#categories v1\n", + "1114/(1085+1114)" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 37, + "data": { + "text/plain": [ + "0.506593906321055" + ] + }, + "metadata": {} + } + ], + "execution_count": 37, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "#categories v2\n", + "2150/(42+2150)" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 38, + "data": { + "text/plain": [ + "0.9808394160583942" + ] + }, + "metadata": {} + } + ], + "execution_count": 38, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "what about the money" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "#extended_categories.to_csv('../data/extended_categories_v2.csv')" + ], + "outputs": [], + "execution_count": 64, + "metadata": {} + }, + { + "cell_type": "code", + "source": [], + "outputs": [], + "execution_count": null, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "extended_categories.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 67, + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
technology_termsidcategorysectortechnology_grouptechnologydescriptiontechnology_termsourcepriority
0forest management835398forest managementAgricultureForest protectionforest managementTechnology of increasing carbon stocks of stan...“forest management”, “management of forest”https://www.ctc-n.org/technologies/forest-mana...1.0
1forest management835541forest managementAgricultureForest protectionforest managementTechnology of increasing carbon stocks of stan...“forest management”, “management of forest”https://www.ctc-n.org/technologies/forest-mana...1.0
2bioplastic812602bioplasticsIndustryUse of innovative materialsbioplasticsTechnology of using renewable biomass material...bioplastichttps://www.ctc-n.org/technologies/bioplastics1.0
3DSM811592demand-side energy managementBuildingsDecreased energy consumption design or practicesdemand-side energy managementTechnology inside buildings that help ensure t...“demand-side energy management”, “demand-side ...https://www.ctc-n.org/technologies/energy-mana...1.0
4waste management847641waste managementHuman settlementsWaste managementwaste managementTechnology of maximising efficiency of resourc...“waste management”, “management of waste”https://www.ctc-n.org/technologies/integrated-...1.0
\n", + "
" + ], + "text/plain": [ + " technology_terms id category \\\n", + "0 forest management 835398 forest management \n", + "1 forest management 835541 forest management \n", + "2 bioplastic 812602 bioplastics \n", + "3 DSM 811592 demand-side energy management \n", + "4 waste management 847641 waste management \n", + "\n", + " sector technology_group \\\n", + "0 Agriculture Forest protection \n", + "1 Agriculture Forest protection \n", + "2 Industry Use of innovative materials \n", + "3 Buildings Decreased energy consumption design or practices \n", + "4 Human settlements Waste management \n", + "\n", + " technology \\\n", + "0 forest management \n", + "1 forest management \n", + "2 bioplastics \n", + "3 demand-side energy management \n", + "4 waste management \n", + "\n", + " description \\\n", + "0 Technology of increasing carbon stocks of stan... \n", + "1 Technology of increasing carbon stocks of stan... \n", + "2 Technology of using renewable biomass material... \n", + "3 Technology inside buildings that help ensure t... \n", + "4 Technology of maximising efficiency of resourc... \n", + "\n", + " technology_term \\\n", + "0 “forest management”, “management of forest” \n", + "1 “forest management”, “management of forest” \n", + "2 bioplastic \n", + "3 “demand-side energy management”, “demand-side ... \n", + "4 “waste management”, “management of waste” \n", + "\n", + " source priority \n", + "0 https://www.ctc-n.org/technologies/forest-mana... 1.0 \n", + "1 https://www.ctc-n.org/technologies/forest-mana... 1.0 \n", + "2 https://www.ctc-n.org/technologies/bioplastics 1.0 \n", + "3 https://www.ctc-n.org/technologies/energy-mana... 1.0 \n", + "4 https://www.ctc-n.org/technologies/integrated-... 1.0 " + ] + }, + "metadata": {} + } + ], + "execution_count": 67, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "categories_raw_minus_categories_extended = pd.merge(categories[['sector','technology_group','technology']], extended_categories, how='left', on='technology')" + ], + "outputs": [], + "execution_count": 70, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "len(categories_raw_minus_categories_extended)" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 76, + "data": { + "text/plain": [ + "2227" + ] + }, + "metadata": {} + } + ], + "execution_count": 76, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "not_invested = categories_raw_minus_categories_extended[categories_raw_minus_categories_extended.id.isnull()]\\\n", + " [['sector_x','technology_group_x','technology']]\\\n", + " .reset_index(drop=True)\\\n", + " .rename(columns={'sector_x':'sector','technology_group_x':'technology_group'})\\\n", + " .merge(categories[['technology','priority']], how='left',on='technology')\\\n", + " .drop_duplicates()" + ], + "outputs": [], + "execution_count": 133, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "not_invested.to_csv('../data/not_invested.csv',index=False)" + ], + "outputs": [], + "execution_count": 141, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "not_invested.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 140, + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sectortechnology_grouptechnologypriority
0EnergySolar energy sourcessolar dryer0
1EnergySolar energy sourcessolar water pump0
2EnergySystem innovationlow GHG transmission1
11TransportFuelelectric battery1
12TransportVehicle designlow GHG aircraft design1
\n", + "
" + ], + "text/plain": [ + " sector technology_group technology priority\n", + "0 Energy Solar energy sources solar dryer 0\n", + "1 Energy Solar energy sources solar water pump 0\n", + "2 Energy System innovation low GHG transmission 1\n", + "11 Transport Fuel electric battery 1\n", + "12 Transport Vehicle design low GHG aircraft design 1" + ] + }, + "metadata": {} + } + ], + "execution_count": 140, + "metadata": {} + }, + { + "cell_type": "code", + "source": [], + "outputs": [], + "execution_count": null, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "----\n", + "## Join calculation (parelell to tableau)" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "cordis_data = pd.merge(extended_categories, inner_table, how='inner', on='id')" + ], + "outputs": [], + "execution_count": 40, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "cordis_data.columns" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 41, + "data": { + "text/plain": [ + "Index(['technology_terms', 'id', 'category', 'sector', 'technology_group',\n", + " 'technology', 'description', 'technology_term', 'source', 'priority',\n", + " 'rcn', 'acronym', 'status', 'programme', 'topics', 'frameworkProgramme',\n", + " 'title', 'startDate', 'endDate', 'projectUrl', 'objective', 'totalCost',\n", + " 'ecMaxContribution', 'call', 'fundingScheme', 'coordinator',\n", + " 'coordinatorCountry', 'participants', 'participantCountries',\n", + " 'subjects'],\n", + " dtype='object')" + ] + }, + "metadata": {} + } + ], + "execution_count": 41, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "---\n", + "#### CleanTech Projects" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "len(cordis_data.id.unique())" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 42, + "data": { + "text/plain": [ + "1534" + ] + }, + "metadata": {} + } + ], + "execution_count": 42, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "priority_table = cordis_data.groupby('id').mean()['priority'].reset_index()" + ], + "outputs": [], + "execution_count": 45, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "priority_summary = priority_table.groupby('priority').count().reset_index()\n", + "priority_summary" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 52, + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
priorityid
00.00000036
10.5000004
20.6666671
31.0000001487
\n", + "
" + ], + "text/plain": [ + " priority id\n", + "0 0.000000 36\n", + "1 0.500000 4\n", + "2 0.666667 1\n", + "3 1.000000 1487" + ] + }, + "metadata": {} + } + ], + "execution_count": 52, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "This is because you might have a project with more than one technology and those technologies might be priority or not, so we take all the 1 as priority 0 otherwise" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "priority = priority_summary.loc[3,'id']\n", + "total = len(cordis_data.id.unique())\n", + "\n", + "print('Priority projects: {}, {}'.format(priority, priority/total*100))\n", + "print('Non-priority projects: {}, {}'.format(total-priority, (1-(priority/total))*100))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Priority projects: 1487, 96.9361147327249\n", + "Non-priority projects: 47, 3.0638852672751016\n" + ] + } + ], + "execution_count": 146, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "priority_summary.id.sum()" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 59, + "data": { + "text/plain": [ + "1528" + ] + }, + "metadata": {} + } + ], + "execution_count": 59, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "---\n", + "#### EU contribution" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "priority_conrtibution_summary = pd.merge(priority_table, cordis_data[['id','ecMaxContribution']], how='left', on='id')\n", + "priority_conrtibution_summary = priority_conrtibution_summary.groupby('priority').sum()['ecMaxContribution'].reset_index()\n", + "priority_conrtibution_summary" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 168, + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
priorityecMaxContribution
00.0000007.570522e+07
10.5000004.000000e+05
20.6666671.500000e+05
31.0000005.978837e+09
\n", + "
" + ], + "text/plain": [ + " priority ecMaxContribution\n", + "0 0.000000 7.570522e+07\n", + "1 0.500000 4.000000e+05\n", + "2 0.666667 1.500000e+05\n", + "3 1.000000 5.978837e+09" + ] + }, + "metadata": {} + } + ], + "execution_count": 168, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "This is because you might have a project with more than one technology and those technologies might be priority or not, so we take all the 1 as priority 0 otherwise" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "priorityM = priority_conrtibution_summary.loc[3,'ecMaxContribution']\n", + "totalM = cordis_data.ecMaxContribution.sum()\n", + "\n", + "print('Priority contribution: {}, {}'.format(priorityM/1000000, priorityM/totalM*100))\n", + "print('Non-priority contribution: {}, {}'.format((totalM-priorityM)/1000000, (1-(priorityM/totalM))*100))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Priority contribution: 5978.83690700002, 98.21835140837607\n", + "Non-priority contribution: 108.45413511998082, 1.78164859162393\n" + ] + } + ], + "execution_count": 171, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "totalM" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 170, + "data": { + "text/plain": [ + "6087291042.120001" + ] + }, + "metadata": {} + } + ], + "execution_count": 170, + "metadata": {} } - ], - "source": [ - "priorityM = priority_conrtibution_summary.loc[3,'ecMaxContribution']\n", - "totalM = cordis_data.ecMaxContribution.sum()\n", - "\n", - "print('Priority contribution: {}, {}'.format(priorityM/1000000, priorityM/totalM*100))\n", - "print('Non-priority contribution: {}, {}'.format((totalM-priorityM)/1000000, (1-(priorityM/totalM))*100))" - ] - }, - { - "cell_type": "code", - "execution_count": 170, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "6087291042.120001" - ] - }, - "execution_count": 170, - "metadata": {}, - "output_type": "execute_result" + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + }, + "nteract": { + "version": "0.15.0" } - ], - "source": [ - "totalM" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" }, - "nteract": { - "version": "0.15.0" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/notebooks/Clean-tech-probabilistic.ipynb b/notebooks/Clean-tech-probabilistic.ipynb index 8747f39..150eab8 100644 --- a/notebooks/Clean-tech-probabilistic.ipynb +++ b/notebooks/Clean-tech-probabilistic.ipynb @@ -1,1451 +1,1454 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Clean Technologies - Probabilistic" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import warnings\n", - "warnings.filterwarnings(\"ignore\")\n", - "%matplotlib inline" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ + "cells": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sectortechnology_grouptechnologydescriptiontechnology_termsourcepriority
0EnergySolar energy sourcessolar PVAlso “solar photovoltaic”; technology of using...photovoltaic, PV, “solar cell”https://www.ctc-n.org/technologies/solar-pv1
1EnergySolar energy sourcesconcentrated solar power (CSP)Technology of producing electricity by concent...“concentrated solar power”, CSP, “concentrated...https://setis.ec.europa.eu/technologies/concen...1
2EnergySolar energy sourcessolar heatingTechnology of capturing the sun's radiation an...“solar heating”https://www.ctc-n.org/technologies/solar-heating0
3EnergySolar energy sourcessolar dryerTechnology of drying substances, especially fo...“solar dryer”https://www.ctc-n.org/technologies/solar-dryer0
4EnergySolar energy sourcessolar water pumpTechnology of powering electrical water pumps ...“solar water pump”https://www.ctc-n.org/technologies/solar-water...0
\n", - "
" + "cell_type": "markdown", + "source": [ + "## Clean Technologies - Probabilistic" ], - "text/plain": [ - " sector technology_group technology \\\n", - "0 Energy Solar energy sources solar PV \n", - "1 Energy Solar energy sources concentrated solar power (CSP) \n", - "2 Energy Solar energy sources solar heating \n", - "3 Energy Solar energy sources solar dryer \n", - "4 Energy Solar energy sources solar water pump \n", - "\n", - " description \\\n", - "0 Also “solar photovoltaic”; technology of using... \n", - "1 Technology of producing electricity by concent... \n", - "2 Technology of capturing the sun's radiation an... \n", - "3 Technology of drying substances, especially fo... \n", - "4 Technology of powering electrical water pumps ... \n", - "\n", - " technology_term \\\n", - "0 photovoltaic, PV, “solar cell”  \n", - "1 “concentrated solar power”, CSP, “concentrated... \n", - "2 “solar heating” \n", - "3 “solar dryer” \n", - "4 “solar water pump” \n", - "\n", - " source priority \n", - "0 https://www.ctc-n.org/technologies/solar-pv 1 \n", - "1 https://setis.ec.europa.eu/technologies/concen... 1 \n", - "2 https://www.ctc-n.org/technologies/solar-heating 0 \n", - "3 https://www.ctc-n.org/technologies/solar-dryer 0 \n", - "4 https://www.ctc-n.org/technologies/solar-water... 0 " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "categories = pd.read_excel('../data/categories.xls')\n", - "categories = categories.fillna(method='ffill')\n", - "categories.columns = ['sector','technology_group','technology','description','technology_term','source','priority']\n", - "categories.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "matrix = []\n", - "for term in categories['technology_term']:\n", - " row = [x.strip() for x in term.split(',')]\n", - " row = [i.replace('“', '').replace('”', '') for i in row]\n", - " matrix.append(row)\n", - "categories['technology_term'] = matrix" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "#https://data.europa.eu/euodp/en/data/dataset/cordisH2020projects" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "cordish2020 = pd.read_excel('../data/cordis-h2020projects.xlsx')\n", - "#cordish2020 = pd.read_csv('../data/cordis-h2020projects.csv', sep=\";\")" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "cordish2020['totalCost'] = [float(str(i).replace(',', '.')) for i in cordish2020['totalCost']]\n", - "cordish2020['ecMaxContribution'] = [float(str(i).replace(',', '.')) for i in cordish2020['ecMaxContribution']]\n", - "cordish2020['startDate'] = cordish2020['startDate'].map(pd.Timestamp)\n", - "cordish2020['endDate'] = cordish2020['endDate'].map(pd.Timestamp)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ + "metadata": {} + }, { - "data": { - "text/plain": [ - "Index(['rcn', 'id', 'acronym', 'status', 'programme', 'topics',\n", - " 'frameworkProgramme', 'title', 'startDate', 'endDate', 'projectUrl',\n", - " 'objective', 'totalCost', 'ecMaxContribution', 'call', 'fundingScheme',\n", - " 'coordinator', 'coordinatorCountry', 'participants',\n", - " 'participantCountries', 'subjects'],\n", - " dtype='object')" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cordish2020.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\")\n", + "%matplotlib inline" + ], + "outputs": [], + "execution_count": 1, + "metadata": {} + }, { - "data": { - "text/plain": [ - "24554" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Total of proyects\n", - "len(cordish2020)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "source": [ + "categories = pd.read_excel('../data/categories.xls')\n", + "categories = categories.fillna(method='ffill')\n", + "categories.columns = ['sector','technology_group','technology','description','technology_term','source','priority']\n", + "categories.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 4, + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sectortechnology_grouptechnologydescriptiontechnology_termsourcepriority
0EnergySolar energy sourcessolar PVAlso “solar photovoltaic”; technology of using...photovoltaic, PV, “solar cell”https://www.ctc-n.org/technologies/solar-pv1
1EnergySolar energy sourcesconcentrated solar power (CSP)Technology of producing electricity by concent...“concentrated solar power”, CSP, “concentrated...https://setis.ec.europa.eu/technologies/concen...1
2EnergySolar energy sourcessolar heatingTechnology of capturing the sun's radiation an...“solar heating”https://www.ctc-n.org/technologies/solar-heating0
3EnergySolar energy sourcessolar dryerTechnology of drying substances, especially fo...“solar dryer”https://www.ctc-n.org/technologies/solar-dryer0
4EnergySolar energy sourcessolar water pumpTechnology of powering electrical water pumps ...“solar water pump”https://www.ctc-n.org/technologies/solar-water...0
\n", + "
" + ], + "text/plain": [ + " sector technology_group technology \\\n", + "0 Energy Solar energy sources solar PV \n", + "1 Energy Solar energy sources concentrated solar power (CSP) \n", + "2 Energy Solar energy sources solar heating \n", + "3 Energy Solar energy sources solar dryer \n", + "4 Energy Solar energy sources solar water pump \n", + "\n", + " description \\\n", + "0 Also “solar photovoltaic”; technology of using... \n", + "1 Technology of producing electricity by concent... \n", + "2 Technology of capturing the sun's radiation an... \n", + "3 Technology of drying substances, especially fo... \n", + "4 Technology of powering electrical water pumps ... \n", + "\n", + " technology_term \\\n", + "0 photovoltaic, PV, “solar cell”  \n", + "1 “concentrated solar power”, CSP, “concentrated... \n", + "2 “solar heating” \n", + "3 “solar dryer” \n", + "4 “solar water pump” \n", + "\n", + " source priority \n", + "0 https://www.ctc-n.org/technologies/solar-pv 1 \n", + "1 https://setis.ec.europa.eu/technologies/concen... 1 \n", + "2 https://www.ctc-n.org/technologies/solar-heating 0 \n", + "3 https://www.ctc-n.org/technologies/solar-dryer 0 \n", + "4 https://www.ctc-n.org/technologies/solar-water... 0 " + ] + }, + "metadata": {} + } + ], + "execution_count": 4, + "metadata": {} + }, { - "data": { - "text/plain": [ - "223" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# unique list of technology term\n", - "technology_term = [y for x in categories['technology_term'] for y in x if y != '']\n", - "len(technology_term)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "all_technology_terms = [i for i in set([*list(categories['technology_group']),*list(technology_term)])]" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "source": [ + "matrix = []\n", + "for term in categories['technology_term']:\n", + " row = [x.strip() for x in term.split(',')]\n", + " row = [i.replace('“', '').replace('”', '') for i in row]\n", + " matrix.append(row)\n", + "categories['technology_term'] = matrix" + ], + "outputs": [], + "execution_count": 5, + "metadata": {} + }, { - "data": { - "text/plain": [ - "235" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(all_technology_terms)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ + "cell_type": "markdown", + "source": [ + "---" + ], + "metadata": {} + }, { - "data": { - "text/plain": [ - "['parabolic trough',\n", - " 'carbon capture and storage',\n", - " 'biorefinery design',\n", - " 'conservation tillage',\n", - " 'meat alternative',\n", - " 'solar water pump',\n", - " 'demand side management',\n", - " 'Improved durability',\n", - " 'renewable energy',\n", - " 'Storage']" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "all_technology_terms[:10]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---\n", - "### Syntactic probabilistic classification using spacy and fuzzywuzzy" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "import spacy\n", - "import numpy as np\n", - "from spacy import displacy\n", - "from collections import Counter\n", - "import en_core_web_sm\n", - "from fuzzywuzzy import process, fuzz\n", - "import re\n", - "nlp = en_core_web_sm.load()\n", - "import textdistance as tx\n", - "import unicodedata\n", - "from textdistance.algorithms import vector_based" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "def clean(string):\n", - " return re.sub(r'[-\\s]+', '-',\n", - " str(\n", - " re.sub(r'[^\\w\\s-]', '',\n", - " unicodedata.normalize('NFKD', string)\n", - " .strip()\n", - " )))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Objective example\n", - "\n", - "---" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "source": [ + "#https://data.europa.eu/euodp/en/data/dataset/cordisH2020projects" + ], + "outputs": [], + "execution_count": 6, + "metadata": {} + }, { - "data": { - "text/plain": [ - "'Offshore wind has long been identified as one of the most promising energy forms to improve the penetration of renewables in the European energy mix. Since most of offshore wind resources is available over deep waters at a considerable distance from the shore, it is inevitable that the campaign of the offshore wind exploitation would move from shallow waters to deep waters. As the conventional bottom-fixed offshore wind turbine is no longer economically viable over deep waters (>50m), the floating offshore wind turbine (FOWT) seems to be an appealing alternative to harvest the ampler deep-water wind. FOWTs are, however, threaten by the hostile deep offshore environment, which would induce unacceptable tilt motions and drastic vibrations of the floating system. The undesirable loadings on the blades, tower, floating foundations and other components, results in mechanical failures and electrical faults of FOWTs, both of which could lead to operation interruptions and cause disastrous economic losses. Overcoming the difficulties of effectiveness, robustness, integration and multi-scalability of optimal control and fault diagnosis system of the FOWTs is precisely the topic of the proposal, which would actively contribute to the implementation of the Economical Deep Offshore Wind Exploitation (EDOWE) by introducing the concept of an innovative distributed multi-scale control and monitoring system. Delft University of Technology owns top-level expertise in wind turbine/farm control and distributed multi-scale applications. Its world-leading experimental facilities provide a solid foundation for hosting a systematic study on the control and monitoring strategies of the FOWTs. Moreover, secondment at Politecnico di Milano and collaborating with our industrial partner 2B Energy on the action help developing an advanced solution to economically harvest deep offshore wind, and thus contributing to achieve the renewables consumption goal set by European Union.'" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#cordish2020.loc[13]['objective']\n", - "cordish2020.loc[9631]['objective'][]" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "source": [ + "cordish2020 = pd.read_excel('../data/cordis-h2020projects.xlsx')\n", + "#cordish2020 = pd.read_csv('../data/cordis-h2020projects.csv', sep=\";\")" + ], + "outputs": [], + "execution_count": 7, + "metadata": {} + }, { - "data": { - "text/plain": [ - "['Offshore wind',\n", - " 'the most promising energy forms',\n", - " 'the penetration',\n", - " 'renewables',\n", - " 'the European energy mix',\n", - " 'offshore wind resources',\n", - " 'deep waters',\n", - " 'a considerable distance',\n", - " 'the shore',\n", - " 'it',\n", - " 'the campaign',\n", - " 'the offshore wind exploitation',\n", - " 'shallow waters',\n", - " 'deep waters',\n", - " 'the conventional bottom-fixed offshore wind turbine',\n", - " 'deep waters',\n", - " 'the floating offshore wind turbine',\n", - " 'FOWT',\n", - " 'an appealing alternative',\n", - " 'the ampler deep-water wind']" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "text = cordish2020['objective'][9631]\n", - "doc = nlp(text)\n", - "docs = list(map(str, doc.noun_chunks))\n", - "docs[:20]" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [], - "source": [ - "#find_best_matching_tech(technologies, docs)" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [], - "source": [ - "#technologies" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Get all the synonyms from the short descriptions**" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "technologies = all_technology_terms" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "source": [ + "cordish2020['totalCost'] = [float(str(i).replace(',', '.')) for i in cordish2020['totalCost']]\n", + "cordish2020['ecMaxContribution'] = [float(str(i).replace(',', '.')) for i in cordish2020['ecMaxContribution']]\n", + "cordish2020['startDate'] = cordish2020['startDate'].map(pd.Timestamp)\n", + "cordish2020['endDate'] = cordish2020['endDate'].map(pd.Timestamp)" + ], + "outputs": [], + "execution_count": 8, + "metadata": {} + }, { - "data": { - "text/plain": [ - "235" - ] - }, - "execution_count": 61, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(technologies)" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "def find_best_matching_tech(techs, doc):\n", - " best_matches = []\n", - " best1 = [(\"\", 0)]\n", - " best2 = [(\"\", 0)]\n", - " val2=\"\"\n", - " best_tech = []\n", - " for val in techs:\n", - " best_curs = process.extract(val, doc, limit=1, scorer=fuzz.ratio)\n", - " terms = \"\"\n", - " for cat in best_curs:\n", - " terms = terms + cat[0] + \",\" \n", - " terms = terms[:-1]\n", - " if len(best_curs)==0:\n", - " avg = 0\n", - " else:\n", - " avg = sum(i for _, i in best_curs)/float(len(best_curs))\n", - " best_cur = [(terms, avg)]\n", - " best_matches.extend(best_cur)\n", - " if best_cur[0][1] > best1[0][1]:\n", - " best1 = best_cur\n", - " best_tech = [val]\n", - " elif best_cur[0][1] == best1[0][1]:\n", - " best1.extend(best_cur)\n", - " best_tech.append(val)\n", - " else:\n", - " if best_cur[0][1] > best2[0][1] and len(best1)<3:\n", - " best2 = best_cur\n", - " val2 = val\n", - " if len(best1)<3: \n", - " best1.extend(best2)\n", - " best_tech.append(val2)\n", - " return best_matches, best_tech, best1" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "metadata": {}, - "outputs": [], - "source": [ - "#process.extract(each tech term, each noun chunk in each objective)" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [], - "source": [ - "cordish2020['category'] = pd.Series(np.random.randn(len(cordish2020)), index=cordish2020.index)\n", - "for idx in range(len(cordish2020['objective'])):\n", - " text = cordish2020['objective'][idx]\n", - " doc = nlp(text)\n", - " # doc = \" \".join([token.lemma_ for token in doc])\n", - " #doc = nlp(doc)\n", - " docs = list(map(str, doc.noun_chunks))\n", - " #docs = [clean(str(txt)) for txt in docs]\n", - " _, best_tech, best_match = find_best_matching_tech(technologies, docs)\n", - " #print(best_tech, best_match)\n", - " if round(best_match[0][1]) > 77:\n", - " cordish2020['category'][idx] = \" \".join(best_tech)\n", - " else:\n", - " cordish2020['category'][idx] = 'None'" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [], - "source": [ - "#category is the column to store the best matches with technology" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "source": [ + "cordish2020.columns" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 9, + "data": { + "text/plain": [ + "Index(['rcn', 'id', 'acronym', 'status', 'programme', 'topics',\n", + " 'frameworkProgramme', 'title', 'startDate', 'endDate', 'projectUrl',\n", + " 'objective', 'totalCost', 'ecMaxContribution', 'call', 'fundingScheme',\n", + " 'coordinator', 'coordinatorCountry', 'participants',\n", + " 'participantCountries', 'subjects'],\n", + " dtype='object')" + ] + }, + "metadata": {} + } + ], + "execution_count": 9, + "metadata": {} + }, { - "data": { - "text/plain": [ - "17024" - ] - }, - "execution_count": 66, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cordish2020['category'].value_counts()['None']" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [], - "source": [ - "cordish2020matches2 = cordish2020[cordish2020['category'] != \"None\" ]" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "source": [ + "# Total of proyects\n", + "len(cordish2020)" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 10, + "data": { + "text/plain": [ + "24554" + ] + }, + "metadata": {} + } + ], + "execution_count": 10, + "metadata": {} + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.3066710108332655\n" - ] - } - ], - "source": [ - "print(len(cordish2020matches2)/len(cordish2020))" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "source": [ + "# unique list of technology term\n", + "technology_term = [y for x in categories['technology_term'] for y in x if y != '']\n", + "len(technology_term)" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 11, + "data": { + "text/plain": [ + "223" + ] + }, + "metadata": {} + } + ], + "execution_count": 11, + "metadata": {} + }, { - "data": { - "text/plain": [ - "0 837750\n", - "5 835398\n", - "6 838845\n", - "9 827561\n", - "10 823782\n", - "12 825435\n", - "16 835541\n", - "17 833088\n", - "22 835051\n", - "27 815279\n", - "29 810812\n", - "32 827826\n", - "33 812602\n", - "37 822897\n", - "40 828666\n", - "41 827565\n", - "43 811592\n", - "45 847641\n", - "48 817240\n", - "49 826588\n", - "Name: id, dtype: int64" - ] - }, - "execution_count": 69, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cordish2020matches2.id[:20]" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "source": [ + "all_technology_terms = [i for i in set([*list(categories['technology_group']),*list(technology_term)])]" + ], + "outputs": [], + "execution_count": 12, + "metadata": {} + }, { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
rcnidacronymstatusprogrammetopicsframeworkProgrammetitlestartDateendDate...participantCountriessubjectsmatches_groupcount_matches_groupcount_unique_matches_groupmatches_technologycount_matches_technologycount_unique_matches_technologycount_matchescategory
0222681837750FARMYNGSIGNEDH2020-EU.2.1.4.;H2020-EU.3.2.6.BBI.2018.SO3.F2H2020FlAgship demonstration of industrial scale pro...2019-06-012022-06-30...FR;NO;BE;ES;PL;CH;DE;NLNaN00000agrosylviculture agro-sylviculture
52216738353983D-FOGRODSIGNEDH2020-EU.1.3.2.MSCA-IF-2018H2020Understanding forest growth dynamics using nov...2019-10-012021-09-30...NaNNaN00[forest management]111reforestation distributed production
6222088838845SPIRSIGNEDH2020-EU.1.3.2.MSCA-IF-2018H2020Spasers in the infrared range2020-03-012022-02-28...NaNNaN00000nanomaterial reforestation
9217264827561UFineCLOSEDH2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.EIC-SMEInst-2018-2020H2020An innovative ultra-fine bubble engineered noz...2018-09-012018-12-31...NaNNaN00000smelt reduction electric battery
10220938823782SSHOCSIGNEDH2020-EU.1.4.1.1.INFRAEOSC-04-2018H2020Social Sciences & Humanities Open Cloud2019-01-012022-04-30...UK;NL;FR;EL;AT;IT;DENaN00000System innovation Energy use innovation
12221878825435DECOMPACTSIGNEDH2020-EU.1.1.ERC-2018-PoCH2020Development of Collagenase Polymeric nanocapsu...2019-05-012020-10-31...NaNNaN00000soil treatment pump system
16221633835541MOVESSIGNEDH2020-EU.1.3.2.MSCA-IF-2018H2020MOnitoring VEgetation status and functioning a...2019-10-012021-09-30...NaNNaN00[forest management]111forest management Waste management
17222602833088InfraStressSIGNEDH2020-EU.3.7.4.;H2020-EU.3.7.2.SU-INFRA01-2018-2019-2020H2020Improving resilience of sensitive industrial p...2019-06-012021-05-31...SI;CY;PT;EL;DE;IE;IT;FR;IL;PL;BENaN00000SPS Wind
22221963835051NucSatSIGNEDH2020-EU.1.3.2.MSCA-IF-2018H2020Satellites and nuclear information. Production...2019-07-012021-06-30...NaNNaN00000soil management grid management waste management
272185298152795G-VINNISIGNEDH2020-EU.2.1.1.ICT-17-2018H20205G Verticals INNovation Infrastructure2018-07-012021-06-30...LU;DE;NO;EL;PT;IE;ES;DK;FI;UK;ITNaN00000soil management grid management waste management
29216990810812FASTERSIGNEDH2020-EU.4.b.WIDESPREAD-05-2017H2020Farmers’ Adaptation Sustainability in Tunis...2018-11-012021-10-31...UK;SE;ES;TNNaN00000Waste management waste management straw manage...
32217204827826CreamOliveSIGNEDH2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.EIC-SMEInst-2018-2020H2020Industrial scale-up of the first all-natural s...2018-07-012018-12-31...NaNNaN00000meat alternative alternatives to meat
33217649812602POLIPOCLOSEDH2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.EIC-SMEInst-2018-2020H2020The first non-fermentative production process ...2018-06-012018-11-30...NaNNaN00[bioplastic]414bioplastic protection of soil
37217413822897BEATIKSIGNEDH2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.EIC-SMEInst-2018-2020H2020Beatik- Collaborative Digital Scores Platform ...2018-09-012020-03-31...NaNNaN[Storage]11001Storage conservation tillage
40217195828666KERMACLOSEDH2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.EIC-SMEInst-2018-2020H2020A NEW VERIFICATION SYSTEM FOR COMPLEX RADIOTHE...2018-09-012018-12-31...NaNNaN[Storage]11001Storage soil treatment
41217265827565PTCCLOSEDH2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.EIC-SMEInst-2018-2020H2020Machine-Learning Technology for Digital Marketing2018-09-012019-01-31...NaNNaN00000grid management soil management
43217808811592NUTRI-NEEDSIGNEDH2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.EIC-SMEInst-2018-2020H2020Final development, clinical validation and lau...2018-06-012020-05-31...NaNNaN00[DSM]111DSM Cleaner product
45223666847641MICADOSIGNEDH2020-Euratom-1.7.;H2020-Euratom-1.1.NFRP-2018-10H2020Measurement and Instrumentation for Cleaning A...2019-06-012022-05-31...FR;DE;IT;BE;CZNaN[Waste management]21[waste management]214Waste management waste management nutrient man...
48216834817240POCCLOSEDH2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.EIC-SMEInst-2018-2020H2020PieceOfCake: an AI-driven chatbot to manage co...2018-06-012018-11-30...NaNNaN00000RAPS Cleaner product
49223648826588APPLAUSESIGNEDH2020-EU.2.1.1.7.ECSEL-2018-1-IAH2020Advanced packaging for photonics, optics and e...2019-05-012022-04-30...DE;NO;NL;CH;AT;HU;FI;BE;FR;LV;ILNaN00000local manufacturing distributed manufacturing
\n", - "

20 rows × 29 columns

\n", - "
" + "cell_type": "code", + "source": [ + "len(all_technology_terms)" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 13, + "data": { + "text/plain": [ + "235" + ] + }, + "metadata": {} + } ], - "text/plain": [ - " rcn id acronym status \\\n", - "0 222681 837750 FARMYNG SIGNED \n", - "5 221673 835398 3D-FOGROD SIGNED \n", - "6 222088 838845 SPIR SIGNED \n", - "9 217264 827561 UFine CLOSED \n", - "10 220938 823782 SSHOC SIGNED \n", - "12 221878 825435 DECOMPACT SIGNED \n", - "16 221633 835541 MOVES SIGNED \n", - "17 222602 833088 InfraStress SIGNED \n", - "22 221963 835051 NucSat SIGNED \n", - "27 218529 815279 5G-VINNI SIGNED \n", - "29 216990 810812 FASTER SIGNED \n", - "32 217204 827826 CreamOlive SIGNED \n", - "33 217649 812602 POLIPO CLOSED \n", - "37 217413 822897 BEATIK SIGNED \n", - "40 217195 828666 KERMA CLOSED \n", - "41 217265 827565 PTC CLOSED \n", - "43 217808 811592 NUTRI-NEED SIGNED \n", - "45 223666 847641 MICADO SIGNED \n", - "48 216834 817240 POC CLOSED \n", - "49 223648 826588 APPLAUSE SIGNED \n", - "\n", - " programme topics \\\n", - "0 H2020-EU.2.1.4.;H2020-EU.3.2.6. BBI.2018.SO3.F2 \n", - "5 H2020-EU.1.3.2. MSCA-IF-2018 \n", - "6 H2020-EU.1.3.2. MSCA-IF-2018 \n", - "9 H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1. EIC-SMEInst-2018-2020 \n", - "10 H2020-EU.1.4.1.1. INFRAEOSC-04-2018 \n", - "12 H2020-EU.1.1. ERC-2018-PoC \n", - "16 H2020-EU.1.3.2. MSCA-IF-2018 \n", - "17 H2020-EU.3.7.4.;H2020-EU.3.7.2. SU-INFRA01-2018-2019-2020 \n", - "22 H2020-EU.1.3.2. MSCA-IF-2018 \n", - "27 H2020-EU.2.1.1. ICT-17-2018 \n", - "29 H2020-EU.4.b. WIDESPREAD-05-2017 \n", - "32 H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1. EIC-SMEInst-2018-2020 \n", - "33 H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1. EIC-SMEInst-2018-2020 \n", - "37 H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1. EIC-SMEInst-2018-2020 \n", - "40 H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1. EIC-SMEInst-2018-2020 \n", - "41 H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1. EIC-SMEInst-2018-2020 \n", - "43 H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1. EIC-SMEInst-2018-2020 \n", - "45 H2020-Euratom-1.7.;H2020-Euratom-1.1. NFRP-2018-10 \n", - "48 H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1. EIC-SMEInst-2018-2020 \n", - "49 H2020-EU.2.1.1.7. ECSEL-2018-1-IA \n", - "\n", - " frameworkProgramme title \\\n", - "0 H2020 FlAgship demonstration of industrial scale pro... \n", - "5 H2020 Understanding forest growth dynamics using nov... \n", - "6 H2020 Spasers in the infrared range \n", - "9 H2020 An innovative ultra-fine bubble engineered noz... \n", - "10 H2020 Social Sciences & Humanities Open Cloud \n", - "12 H2020 Development of Collagenase Polymeric nanocapsu... \n", - "16 H2020 MOnitoring VEgetation status and functioning a... \n", - "17 H2020 Improving resilience of sensitive industrial p... \n", - "22 H2020 Satellites and nuclear information. Production... \n", - "27 H2020 5G Verticals INNovation Infrastructure \n", - "29 H2020 Farmers’ Adaptation Sustainability in Tunis... \n", - "32 H2020 Industrial scale-up of the first all-natural s... \n", - "33 H2020 The first non-fermentative production process ... \n", - "37 H2020 Beatik- Collaborative Digital Scores Platform ... \n", - "40 H2020 A NEW VERIFICATION SYSTEM FOR COMPLEX RADIOTHE... \n", - "41 H2020 Machine-Learning Technology for Digital Marketing \n", - "43 H2020 Final development, clinical validation and lau... \n", - "45 H2020 Measurement and Instrumentation for Cleaning A... \n", - "48 H2020 PieceOfCake: an AI-driven chatbot to manage co... \n", - "49 H2020 Advanced packaging for photonics, optics and e... \n", - "\n", - " startDate endDate ... participantCountries subjects \\\n", - "0 2019-06-01 2022-06-30 ... FR;NO;BE;ES;PL;CH;DE;NL NaN \n", - "5 2019-10-01 2021-09-30 ... NaN NaN \n", - "6 2020-03-01 2022-02-28 ... NaN NaN \n", - "9 2018-09-01 2018-12-31 ... NaN NaN \n", - "10 2019-01-01 2022-04-30 ... UK;NL;FR;EL;AT;IT;DE NaN \n", - "12 2019-05-01 2020-10-31 ... NaN NaN \n", - "16 2019-10-01 2021-09-30 ... NaN NaN \n", - "17 2019-06-01 2021-05-31 ... SI;CY;PT;EL;DE;IE;IT;FR;IL;PL;BE NaN \n", - "22 2019-07-01 2021-06-30 ... NaN NaN \n", - "27 2018-07-01 2021-06-30 ... LU;DE;NO;EL;PT;IE;ES;DK;FI;UK;IT NaN \n", - "29 2018-11-01 2021-10-31 ... UK;SE;ES;TN NaN \n", - "32 2018-07-01 2018-12-31 ... NaN NaN \n", - "33 2018-06-01 2018-11-30 ... NaN NaN \n", - "37 2018-09-01 2020-03-31 ... NaN NaN \n", - "40 2018-09-01 2018-12-31 ... NaN NaN \n", - "41 2018-09-01 2019-01-31 ... NaN NaN \n", - "43 2018-06-01 2020-05-31 ... NaN NaN \n", - "45 2019-06-01 2022-05-31 ... FR;DE;IT;BE;CZ NaN \n", - "48 2018-06-01 2018-11-30 ... NaN NaN \n", - "49 2019-05-01 2022-04-30 ... DE;NO;NL;CH;AT;HU;FI;BE;FR;LV;IL NaN \n", - "\n", - " matches_group count_matches_group count_unique_matches_group \\\n", - "0 0 0 \n", - "5 0 0 \n", - "6 0 0 \n", - "9 0 0 \n", - "10 0 0 \n", - "12 0 0 \n", - "16 0 0 \n", - "17 0 0 \n", - "22 0 0 \n", - "27 0 0 \n", - "29 0 0 \n", - "32 0 0 \n", - "33 0 0 \n", - "37 [Storage] 1 1 \n", - "40 [Storage] 1 1 \n", - "41 0 0 \n", - "43 0 0 \n", - "45 [Waste management] 2 1 \n", - "48 0 0 \n", - "49 0 0 \n", - "\n", - " matches_technology count_matches_technology \\\n", - "0 0 \n", - "5 [forest management] 1 \n", - "6 0 \n", - "9 0 \n", - "10 0 \n", - "12 0 \n", - "16 [forest management] 1 \n", - "17 0 \n", - "22 0 \n", - "27 0 \n", - "29 0 \n", - "32 0 \n", - "33 [bioplastic] 4 \n", - "37 0 \n", - "40 0 \n", - "41 0 \n", - "43 [DSM] 1 \n", - "45 [waste management] 2 \n", - "48 0 \n", - "49 0 \n", - "\n", - " count_unique_matches_technology count_matches \\\n", - "0 0 0 \n", - "5 1 1 \n", - "6 0 0 \n", - "9 0 0 \n", - "10 0 0 \n", - "12 0 0 \n", - "16 1 1 \n", - "17 0 0 \n", - "22 0 0 \n", - "27 0 0 \n", - "29 0 0 \n", - "32 0 0 \n", - "33 1 4 \n", - "37 0 1 \n", - "40 0 1 \n", - "41 0 0 \n", - "43 1 1 \n", - "45 1 4 \n", - "48 0 0 \n", - "49 0 0 \n", - "\n", - " category \n", - "0 agrosylviculture agro-sylviculture \n", - "5 reforestation distributed production \n", - "6 nanomaterial reforestation \n", - "9 smelt reduction electric battery \n", - "10 System innovation Energy use innovation \n", - "12 soil treatment pump system \n", - "16 forest management Waste management \n", - "17 SPS Wind \n", - "22 soil management grid management waste management \n", - "27 soil management grid management waste management \n", - "29 Waste management waste management straw manage... \n", - "32 meat alternative alternatives to meat \n", - "33 bioplastic protection of soil \n", - "37 Storage conservation tillage \n", - "40 Storage soil treatment \n", - "41 grid management soil management \n", - "43 DSM Cleaner product \n", - "45 Waste management waste management nutrient man... \n", - "48 RAPS Cleaner product \n", - "49 local manufacturing distributed manufacturing \n", - "\n", - "[20 rows x 29 columns]" - ] - }, - "execution_count": 70, - "metadata": {}, - "output_type": "execute_result" + "execution_count": 13, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "all_technology_terms[:10]" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 14, + "data": { + "text/plain": [ + "['parabolic trough',\n", + " 'carbon capture and storage',\n", + " 'biorefinery design',\n", + " 'conservation tillage',\n", + " 'meat alternative',\n", + " 'solar water pump',\n", + " 'demand side management',\n", + " 'Improved durability',\n", + " 'renewable energy',\n", + " 'Storage']" + ] + }, + "metadata": {} + } + ], + "execution_count": 14, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "---\n", + "### Syntactic probabilistic classification using spacy and fuzzywuzzy" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "import spacy\n", + "import numpy as np\n", + "from spacy import displacy\n", + "from collections import Counter\n", + "import en_core_web_sm\n", + "from fuzzywuzzy import process, fuzz\n", + "import re\n", + "nlp = en_core_web_sm.load()\n", + "import textdistance as tx\n", + "import unicodedata\n", + "from textdistance.algorithms import vector_based" + ], + "outputs": [], + "execution_count": 15, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "def clean(string):\n", + " return re.sub(r'[-\\s]+', '-',\n", + " str(\n", + " re.sub(r'[^\\w\\s-]', '',\n", + " unicodedata.normalize('NFKD', string)\n", + " .strip()\n", + " )))" + ], + "outputs": [], + "execution_count": 16, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Objective example\n", + "\n", + "---" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "#cordish2020.loc[13]['objective']\n", + "cordish2020.loc[9631]['objective'][]" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 22, + "data": { + "text/plain": [ + "'Offshore wind has long been identified as one of the most promising energy forms to improve the penetration of renewables in the European energy mix. Since most of offshore wind resources is available over deep waters at a considerable distance from the shore, it is inevitable that the campaign of the offshore wind exploitation would move from shallow waters to deep waters. As the conventional bottom-fixed offshore wind turbine is no longer economically viable over deep waters (>50m), the floating offshore wind turbine (FOWT) seems to be an appealing alternative to harvest the ampler deep-water wind. FOWTs are, however, threaten by the hostile deep offshore environment, which would induce unacceptable tilt motions and drastic vibrations of the floating system. The undesirable loadings on the blades, tower, floating foundations and other components, results in mechanical failures and electrical faults of FOWTs, both of which could lead to operation interruptions and cause disastrous economic losses. Overcoming the difficulties of effectiveness, robustness, integration and multi-scalability of optimal control and fault diagnosis system of the FOWTs is precisely the topic of the proposal, which would actively contribute to the implementation of the Economical Deep Offshore Wind Exploitation (EDOWE) by introducing the concept of an innovative distributed multi-scale control and monitoring system. Delft University of Technology owns top-level expertise in wind turbine/farm control and distributed multi-scale applications. Its world-leading experimental facilities provide a solid foundation for hosting a systematic study on the control and monitoring strategies of the FOWTs. Moreover, secondment at Politecnico di Milano and collaborating with our industrial partner 2B Energy on the action help developing an advanced solution to economically harvest deep offshore wind, and thus contributing to achieve the renewables consumption goal set by European Union.'" + ] + }, + "metadata": {} + } + ], + "execution_count": 22, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "text = cordish2020['objective'][9631]\n", + "doc = nlp(text)\n", + "docs = list(map(str, doc.noun_chunks))\n", + "docs[:20]" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 30, + "data": { + "text/plain": [ + "['Offshore wind',\n", + " 'the most promising energy forms',\n", + " 'the penetration',\n", + " 'renewables',\n", + " 'the European energy mix',\n", + " 'offshore wind resources',\n", + " 'deep waters',\n", + " 'a considerable distance',\n", + " 'the shore',\n", + " 'it',\n", + " 'the campaign',\n", + " 'the offshore wind exploitation',\n", + " 'shallow waters',\n", + " 'deep waters',\n", + " 'the conventional bottom-fixed offshore wind turbine',\n", + " 'deep waters',\n", + " 'the floating offshore wind turbine',\n", + " 'FOWT',\n", + " 'an appealing alternative',\n", + " 'the ampler deep-water wind']" + ] + }, + "metadata": {} + } + ], + "execution_count": 30, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "#find_best_matching_tech(technologies, docs)" + ], + "outputs": [], + "execution_count": 33, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "#technologies" + ], + "outputs": [], + "execution_count": 34, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "---" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "**Get all the synonyms from the short descriptions**" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "technologies = all_technology_terms" + ], + "outputs": [], + "execution_count": 27, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "len(technologies)" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 61, + "data": { + "text/plain": [ + "235" + ] + }, + "metadata": {} + } + ], + "execution_count": 61, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "def find_best_matching_tech(techs, doc):\n", + " best_matches = []\n", + " best1 = [(\"\", 0)]\n", + " best2 = [(\"\", 0)]\n", + " val2=\"\"\n", + " best_tech = []\n", + " for val in techs:\n", + " best_curs = process.extract(val, doc, limit=1, scorer=fuzz.ratio)\n", + " terms = \"\"\n", + " for cat in best_curs:\n", + " terms = terms + cat[0] + \",\" \n", + " terms = terms[:-1]\n", + " if len(best_curs)==0:\n", + " avg = 0\n", + " else:\n", + " avg = sum(i for _, i in best_curs)/float(len(best_curs))\n", + " best_cur = [(terms, avg)]\n", + " best_matches.extend(best_cur)\n", + " if best_cur[0][1] > best1[0][1]:\n", + " best1 = best_cur\n", + " best_tech = [val]\n", + " elif best_cur[0][1] == best1[0][1]:\n", + " best1.extend(best_cur)\n", + " best_tech.append(val)\n", + " else:\n", + " if best_cur[0][1] > best2[0][1] and len(best1)<3:\n", + " best2 = best_cur\n", + " val2 = val\n", + " if len(best1)<3: \n", + " best1.extend(best2)\n", + " best_tech.append(val2)\n", + " return best_matches, best_tech, best1" + ], + "outputs": [], + "execution_count": 26, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "#process.extract(each tech term, each noun chunk in each objective)" + ], + "outputs": [], + "execution_count": 63, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "cordish2020['category'] = pd.Series(np.random.randn(len(cordish2020)), index=cordish2020.index)\n", + "for idx in range(len(cordish2020['objective'])):\n", + " text = cordish2020['objective'][idx]\n", + " doc = nlp(text)\n", + " # doc = \" \".join([token.lemma_ for token in doc])\n", + " #doc = nlp(doc)\n", + " docs = list(map(str, doc.noun_chunks))\n", + " #docs = [clean(str(txt)) for txt in docs]\n", + " _, best_tech, best_match = find_best_matching_tech(technologies, docs)\n", + " #print(best_tech, best_match)\n", + " if round(best_match[0][1]) > 77:\n", + " cordish2020['category'][idx] = \" \".join(best_tech)\n", + " else:\n", + " cordish2020['category'][idx] = 'None'" + ], + "outputs": [], + "execution_count": 64, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "#category is the column to store the best matches with technology" + ], + "outputs": [], + "execution_count": 65, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "cordish2020['category'].value_counts()['None']" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 66, + "data": { + "text/plain": [ + "17024" + ] + }, + "metadata": {} + } + ], + "execution_count": 66, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "cordish2020matches2 = cordish2020[cordish2020['category'] != \"None\" ]" + ], + "outputs": [], + "execution_count": 67, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "print(len(cordish2020matches2)/len(cordish2020))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0.3066710108332655\n" + ] + } + ], + "execution_count": 68, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "cordish2020matches2.id[:20]" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 69, + "data": { + "text/plain": [ + "0 837750\n", + "5 835398\n", + "6 838845\n", + "9 827561\n", + "10 823782\n", + "12 825435\n", + "16 835541\n", + "17 833088\n", + "22 835051\n", + "27 815279\n", + "29 810812\n", + "32 827826\n", + "33 812602\n", + "37 822897\n", + "40 828666\n", + "41 827565\n", + "43 811592\n", + "45 847641\n", + "48 817240\n", + "49 826588\n", + "Name: id, dtype: int64" + ] + }, + "metadata": {} + } + ], + "execution_count": 69, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "cordish2020matches2.head(20)" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 70, + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rcnidacronymstatusprogrammetopicsframeworkProgrammetitlestartDateendDate...participantCountriessubjectsmatches_groupcount_matches_groupcount_unique_matches_groupmatches_technologycount_matches_technologycount_unique_matches_technologycount_matchescategory
0222681837750FARMYNGSIGNEDH2020-EU.2.1.4.;H2020-EU.3.2.6.BBI.2018.SO3.F2H2020FlAgship demonstration of industrial scale pro...2019-06-012022-06-30...FR;NO;BE;ES;PL;CH;DE;NLNaN00000agrosylviculture agro-sylviculture
52216738353983D-FOGRODSIGNEDH2020-EU.1.3.2.MSCA-IF-2018H2020Understanding forest growth dynamics using nov...2019-10-012021-09-30...NaNNaN00[forest management]111reforestation distributed production
6222088838845SPIRSIGNEDH2020-EU.1.3.2.MSCA-IF-2018H2020Spasers in the infrared range2020-03-012022-02-28...NaNNaN00000nanomaterial reforestation
9217264827561UFineCLOSEDH2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.EIC-SMEInst-2018-2020H2020An innovative ultra-fine bubble engineered noz...2018-09-012018-12-31...NaNNaN00000smelt reduction electric battery
10220938823782SSHOCSIGNEDH2020-EU.1.4.1.1.INFRAEOSC-04-2018H2020Social Sciences & Humanities Open Cloud2019-01-012022-04-30...UK;NL;FR;EL;AT;IT;DENaN00000System innovation Energy use innovation
12221878825435DECOMPACTSIGNEDH2020-EU.1.1.ERC-2018-PoCH2020Development of Collagenase Polymeric nanocapsu...2019-05-012020-10-31...NaNNaN00000soil treatment pump system
16221633835541MOVESSIGNEDH2020-EU.1.3.2.MSCA-IF-2018H2020MOnitoring VEgetation status and functioning a...2019-10-012021-09-30...NaNNaN00[forest management]111forest management Waste management
17222602833088InfraStressSIGNEDH2020-EU.3.7.4.;H2020-EU.3.7.2.SU-INFRA01-2018-2019-2020H2020Improving resilience of sensitive industrial p...2019-06-012021-05-31...SI;CY;PT;EL;DE;IE;IT;FR;IL;PL;BENaN00000SPS Wind
22221963835051NucSatSIGNEDH2020-EU.1.3.2.MSCA-IF-2018H2020Satellites and nuclear information. Production...2019-07-012021-06-30...NaNNaN00000soil management grid management waste management
272185298152795G-VINNISIGNEDH2020-EU.2.1.1.ICT-17-2018H20205G Verticals INNovation Infrastructure2018-07-012021-06-30...LU;DE;NO;EL;PT;IE;ES;DK;FI;UK;ITNaN00000soil management grid management waste management
29216990810812FASTERSIGNEDH2020-EU.4.b.WIDESPREAD-05-2017H2020Farmers’ Adaptation Sustainability in Tunis...2018-11-012021-10-31...UK;SE;ES;TNNaN00000Waste management waste management straw manage...
32217204827826CreamOliveSIGNEDH2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.EIC-SMEInst-2018-2020H2020Industrial scale-up of the first all-natural s...2018-07-012018-12-31...NaNNaN00000meat alternative alternatives to meat
33217649812602POLIPOCLOSEDH2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.EIC-SMEInst-2018-2020H2020The first non-fermentative production process ...2018-06-012018-11-30...NaNNaN00[bioplastic]414bioplastic protection of soil
37217413822897BEATIKSIGNEDH2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.EIC-SMEInst-2018-2020H2020Beatik- Collaborative Digital Scores Platform ...2018-09-012020-03-31...NaNNaN[Storage]11001Storage conservation tillage
40217195828666KERMACLOSEDH2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.EIC-SMEInst-2018-2020H2020A NEW VERIFICATION SYSTEM FOR COMPLEX RADIOTHE...2018-09-012018-12-31...NaNNaN[Storage]11001Storage soil treatment
41217265827565PTCCLOSEDH2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.EIC-SMEInst-2018-2020H2020Machine-Learning Technology for Digital Marketing2018-09-012019-01-31...NaNNaN00000grid management soil management
43217808811592NUTRI-NEEDSIGNEDH2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.EIC-SMEInst-2018-2020H2020Final development, clinical validation and lau...2018-06-012020-05-31...NaNNaN00[DSM]111DSM Cleaner product
45223666847641MICADOSIGNEDH2020-Euratom-1.7.;H2020-Euratom-1.1.NFRP-2018-10H2020Measurement and Instrumentation for Cleaning A...2019-06-012022-05-31...FR;DE;IT;BE;CZNaN[Waste management]21[waste management]214Waste management waste management nutrient man...
48216834817240POCCLOSEDH2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.EIC-SMEInst-2018-2020H2020PieceOfCake: an AI-driven chatbot to manage co...2018-06-012018-11-30...NaNNaN00000RAPS Cleaner product
49223648826588APPLAUSESIGNEDH2020-EU.2.1.1.7.ECSEL-2018-1-IAH2020Advanced packaging for photonics, optics and e...2019-05-012022-04-30...DE;NO;NL;CH;AT;HU;FI;BE;FR;LV;ILNaN00000local manufacturing distributed manufacturing
\n", + "

20 rows × 29 columns

\n", + "
" + ], + "text/plain": [ + " rcn id acronym status \\\n", + "0 222681 837750 FARMYNG SIGNED \n", + "5 221673 835398 3D-FOGROD SIGNED \n", + "6 222088 838845 SPIR SIGNED \n", + "9 217264 827561 UFine CLOSED \n", + "10 220938 823782 SSHOC SIGNED \n", + "12 221878 825435 DECOMPACT SIGNED \n", + "16 221633 835541 MOVES SIGNED \n", + "17 222602 833088 InfraStress SIGNED \n", + "22 221963 835051 NucSat SIGNED \n", + "27 218529 815279 5G-VINNI SIGNED \n", + "29 216990 810812 FASTER SIGNED \n", + "32 217204 827826 CreamOlive SIGNED \n", + "33 217649 812602 POLIPO CLOSED \n", + "37 217413 822897 BEATIK SIGNED \n", + "40 217195 828666 KERMA CLOSED \n", + "41 217265 827565 PTC CLOSED \n", + "43 217808 811592 NUTRI-NEED SIGNED \n", + "45 223666 847641 MICADO SIGNED \n", + "48 216834 817240 POC CLOSED \n", + "49 223648 826588 APPLAUSE SIGNED \n", + "\n", + " programme topics \\\n", + "0 H2020-EU.2.1.4.;H2020-EU.3.2.6. BBI.2018.SO3.F2 \n", + "5 H2020-EU.1.3.2. MSCA-IF-2018 \n", + "6 H2020-EU.1.3.2. MSCA-IF-2018 \n", + "9 H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1. EIC-SMEInst-2018-2020 \n", + "10 H2020-EU.1.4.1.1. INFRAEOSC-04-2018 \n", + "12 H2020-EU.1.1. ERC-2018-PoC \n", + "16 H2020-EU.1.3.2. MSCA-IF-2018 \n", + "17 H2020-EU.3.7.4.;H2020-EU.3.7.2. SU-INFRA01-2018-2019-2020 \n", + "22 H2020-EU.1.3.2. MSCA-IF-2018 \n", + "27 H2020-EU.2.1.1. ICT-17-2018 \n", + "29 H2020-EU.4.b. WIDESPREAD-05-2017 \n", + "32 H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1. EIC-SMEInst-2018-2020 \n", + "33 H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1. EIC-SMEInst-2018-2020 \n", + "37 H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1. EIC-SMEInst-2018-2020 \n", + "40 H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1. EIC-SMEInst-2018-2020 \n", + "41 H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1. EIC-SMEInst-2018-2020 \n", + "43 H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1. EIC-SMEInst-2018-2020 \n", + "45 H2020-Euratom-1.7.;H2020-Euratom-1.1. NFRP-2018-10 \n", + "48 H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1. EIC-SMEInst-2018-2020 \n", + "49 H2020-EU.2.1.1.7. ECSEL-2018-1-IA \n", + "\n", + " frameworkProgramme title \\\n", + "0 H2020 FlAgship demonstration of industrial scale pro... \n", + "5 H2020 Understanding forest growth dynamics using nov... \n", + "6 H2020 Spasers in the infrared range \n", + "9 H2020 An innovative ultra-fine bubble engineered noz... \n", + "10 H2020 Social Sciences & Humanities Open Cloud \n", + "12 H2020 Development of Collagenase Polymeric nanocapsu... \n", + "16 H2020 MOnitoring VEgetation status and functioning a... \n", + "17 H2020 Improving resilience of sensitive industrial p... \n", + "22 H2020 Satellites and nuclear information. Production... \n", + "27 H2020 5G Verticals INNovation Infrastructure \n", + "29 H2020 Farmers’ Adaptation Sustainability in Tunis... \n", + "32 H2020 Industrial scale-up of the first all-natural s... \n", + "33 H2020 The first non-fermentative production process ... \n", + "37 H2020 Beatik- Collaborative Digital Scores Platform ... \n", + "40 H2020 A NEW VERIFICATION SYSTEM FOR COMPLEX RADIOTHE... \n", + "41 H2020 Machine-Learning Technology for Digital Marketing \n", + "43 H2020 Final development, clinical validation and lau... \n", + "45 H2020 Measurement and Instrumentation for Cleaning A... \n", + "48 H2020 PieceOfCake: an AI-driven chatbot to manage co... \n", + "49 H2020 Advanced packaging for photonics, optics and e... \n", + "\n", + " startDate endDate ... participantCountries subjects \\\n", + "0 2019-06-01 2022-06-30 ... FR;NO;BE;ES;PL;CH;DE;NL NaN \n", + "5 2019-10-01 2021-09-30 ... NaN NaN \n", + "6 2020-03-01 2022-02-28 ... NaN NaN \n", + "9 2018-09-01 2018-12-31 ... NaN NaN \n", + "10 2019-01-01 2022-04-30 ... UK;NL;FR;EL;AT;IT;DE NaN \n", + "12 2019-05-01 2020-10-31 ... NaN NaN \n", + "16 2019-10-01 2021-09-30 ... NaN NaN \n", + "17 2019-06-01 2021-05-31 ... SI;CY;PT;EL;DE;IE;IT;FR;IL;PL;BE NaN \n", + "22 2019-07-01 2021-06-30 ... NaN NaN \n", + "27 2018-07-01 2021-06-30 ... LU;DE;NO;EL;PT;IE;ES;DK;FI;UK;IT NaN \n", + "29 2018-11-01 2021-10-31 ... UK;SE;ES;TN NaN \n", + "32 2018-07-01 2018-12-31 ... NaN NaN \n", + "33 2018-06-01 2018-11-30 ... NaN NaN \n", + "37 2018-09-01 2020-03-31 ... NaN NaN \n", + "40 2018-09-01 2018-12-31 ... NaN NaN \n", + "41 2018-09-01 2019-01-31 ... NaN NaN \n", + "43 2018-06-01 2020-05-31 ... NaN NaN \n", + "45 2019-06-01 2022-05-31 ... FR;DE;IT;BE;CZ NaN \n", + "48 2018-06-01 2018-11-30 ... NaN NaN \n", + "49 2019-05-01 2022-04-30 ... DE;NO;NL;CH;AT;HU;FI;BE;FR;LV;IL NaN \n", + "\n", + " matches_group count_matches_group count_unique_matches_group \\\n", + "0 0 0 \n", + "5 0 0 \n", + "6 0 0 \n", + "9 0 0 \n", + "10 0 0 \n", + "12 0 0 \n", + "16 0 0 \n", + "17 0 0 \n", + "22 0 0 \n", + "27 0 0 \n", + "29 0 0 \n", + "32 0 0 \n", + "33 0 0 \n", + "37 [Storage] 1 1 \n", + "40 [Storage] 1 1 \n", + "41 0 0 \n", + "43 0 0 \n", + "45 [Waste management] 2 1 \n", + "48 0 0 \n", + "49 0 0 \n", + "\n", + " matches_technology count_matches_technology \\\n", + "0 0 \n", + "5 [forest management] 1 \n", + "6 0 \n", + "9 0 \n", + "10 0 \n", + "12 0 \n", + "16 [forest management] 1 \n", + "17 0 \n", + "22 0 \n", + "27 0 \n", + "29 0 \n", + "32 0 \n", + "33 [bioplastic] 4 \n", + "37 0 \n", + "40 0 \n", + "41 0 \n", + "43 [DSM] 1 \n", + "45 [waste management] 2 \n", + "48 0 \n", + "49 0 \n", + "\n", + " count_unique_matches_technology count_matches \\\n", + "0 0 0 \n", + "5 1 1 \n", + "6 0 0 \n", + "9 0 0 \n", + "10 0 0 \n", + "12 0 0 \n", + "16 1 1 \n", + "17 0 0 \n", + "22 0 0 \n", + "27 0 0 \n", + "29 0 0 \n", + "32 0 0 \n", + "33 1 4 \n", + "37 0 1 \n", + "40 0 1 \n", + "41 0 0 \n", + "43 1 1 \n", + "45 1 4 \n", + "48 0 0 \n", + "49 0 0 \n", + "\n", + " category \n", + "0 agrosylviculture agro-sylviculture \n", + "5 reforestation distributed production \n", + "6 nanomaterial reforestation \n", + "9 smelt reduction electric battery \n", + "10 System innovation Energy use innovation \n", + "12 soil treatment pump system \n", + "16 forest management Waste management \n", + "17 SPS Wind \n", + "22 soil management grid management waste management \n", + "27 soil management grid management waste management \n", + "29 Waste management waste management straw manage... \n", + "32 meat alternative alternatives to meat \n", + "33 bioplastic protection of soil \n", + "37 Storage conservation tillage \n", + "40 Storage soil treatment \n", + "41 grid management soil management \n", + "43 DSM Cleaner product \n", + "45 Waste management waste management nutrient man... \n", + "48 RAPS Cleaner product \n", + "49 local manufacturing distributed manufacturing \n", + "\n", + "[20 rows x 29 columns]" + ] + }, + "metadata": {} + } + ], + "execution_count": 70, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "text = cordish2020['objective'][13]\n", + "doc = nlp(text)\n", + "#doc = \" \".join([token.lemma_ for token in doc])\n", + "#doc = nlp(doc)\n", + "docs = list(map(str, doc.noun_chunks))\n", + "_, best_tech, best_match = find_best_matching_tech(technologies, docs)\n", + "print(best_tech,best_match)" + ], + "outputs": [], + "execution_count": null, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "cordish2020matches2[['id','acronym','title','objective','startDate','endDate','ecMaxContribution','matches_group','matches_technology','count_matches','category']].to_csv('../data/tech2.csv')" + ], + "outputs": [], + "execution_count": 72, + "metadata": {} + }, + { + "cell_type": "code", + "source": [], + "outputs": [], + "execution_count": null, + "metadata": {} + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + }, + "nteract": { + "version": "0.15.0" } - ], - "source": [ - "cordish2020matches2.head(20)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "text = cordish2020['objective'][13]\n", - "doc = nlp(text)\n", - "#doc = \" \".join([token.lemma_ for token in doc])\n", - "#doc = nlp(doc)\n", - "docs = list(map(str, doc.noun_chunks))\n", - "_, best_tech, best_match = find_best_matching_tech(technologies, docs)\n", - "print(best_tech,best_match)" - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "metadata": {}, - "outputs": [], - "source": [ - "cordish2020matches2[['id','acronym','title','objective','startDate','endDate','ecMaxContribution','matches_group','matches_technology','count_matches','category']].to_csv('../data/tech2.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file