From 32966d6e06e33f6991547463010087f143de2afc Mon Sep 17 00:00:00 2001 From: Aidan Kelley Date: Sun, 20 Sep 2020 22:28:34 -0500 Subject: [PATCH] updated the search.ipnyb notebook --- search.ipynb | 222 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 187 insertions(+), 35 deletions(-) diff --git a/search.ipynb b/search.ipynb index e2c6ae9..8b56502 100644 --- a/search.ipynb +++ b/search.ipynb @@ -52,13 +52,93 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Searching the Knowledge Graph\n", + "# Interacting with the Knowledge Graph\n", + "Using the below command, we can search the graph, and describe results of searches. Trying typing search and then choosing the description method. Then search for adversarial machine learning. Then, we can describe the first result by doing describe and then 1.\n", + "\n", + "Normally, we would run the below command with `python -m sosen interactive`, but we run the script fully in Python for notebook compatability." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Choose an action (search/describe/quit):> search\n", + "Which method (description/keyword/title)?> description\n", + "what is your query?> adversarial machine learning\n", + "\n", + "MATCHES:\n", + "| | result iri | matches | tf-idf sum |\n", + "|----|-------------------------------------------------------------------------------------|-----------|--------------|\n", + "| 1 | https://w3id.org/okn/i/Software/soorya19/sparsity-based-defenses | 3 | 1.95632 |\n", + "| 2 | https://w3id.org/okn/i/Software/mdoucet/refl_ml | 2 | 2.62688 |\n", + "| 3 | https://w3id.org/okn/i/Software/psesh/Effective-Quadratures | 2 | 2.62688 |\n", + "| 4 | https://w3id.org/okn/i/Software/BENR0/textory | 2 | 1.97016 |\n", + "| 5 | https://w3id.org/okn/i/Software/andre-wojtowicz/uci-ml-to-r | 2 | 1.97016 |\n", + "| 6 | https://w3id.org/okn/i/Software/fabiodasilva/mlwl | 2 | 1.97016 |\n", + "| 7 | https://w3id.org/okn/i/Software/masseyr/geosoupML | 2 | 1.97016 |\n", + "| 8 | https://w3id.org/okn/i/Software/JoshuaE1/supervised-classification-SSH-publications | 2 | 1.57613 |\n", + "| 9 | https://w3id.org/okn/i/Software/bcbi/PredictMD.jl | 2 | 1.57613 |\n", + "| 10 | https://w3id.org/okn/i/Software/bnpy/bnpy | 2 | 1.57613 |\n", + "| 11 | https://w3id.org/okn/i/Software/caiostringari/pywavelearn | 2 | 1.57613 |\n", + "| 12 | https://w3id.org/okn/i/Software/cisprague/Astro.IQ | 2 | 1.57613 |\n", + "| 13 | https://w3id.org/okn/i/Software/fqararyah/tensorflow-1 | 2 | 1.57613 |\n", + "| 14 | https://w3id.org/okn/i/Software/hls-fpga-machine-learning/hls4ml | 2 | 1.57613 |\n", + "| 15 | https://w3id.org/okn/i/Software/machine-learning-scala/mls | 2 | 1.57613 |\n", + "| 16 | https://w3id.org/okn/i/Software/msmbuilder/osprey | 2 | 1.57613 |\n", + "| 17 | https://w3id.org/okn/i/Software/muammar/ml4chem | 2 | 1.57613 |\n", + "| 18 | https://w3id.org/okn/i/Software/qzhu2017/PyXtal_FF | 2 | 1.57613 |\n", + "| 19 | https://w3id.org/okn/i/Software/raamana/confounds | 2 | 1.57613 |\n", + "| 20 | https://w3id.org/okn/i/Software/scikit-learn/scikit-learn | 2 | 1.57613 |\n", + "Choose an action (search/describe/quit):> describe\n", + "Enter a space-separated list of URIs\n", + "Alternatively, enter numbers 1-20, referring to the results of the previous search\n", + ">1\n", + "['https://w3id.org/okn/i/Software/soorya19/sparsity-based-defenses']\n", + "[]\n", + "['sparsity-based-defenses']\n", + "[]\n", + "['soorya19']\n", + "[]\n", + "['Sparsity-based defenses against adversarial attacks on machine learning classifiers']\n", + "[]\n", + "['Python']\n", + "[]\n", + "[]\n", + "[]\n", + "['https://api.github.com/licenses/bsd-3-clause']\n", + "[['name', ['sparsity-based-defenses']], ['author', ['soorya19']], ['description', ['Sparsity-based defenses against adversarial attacks on machine learning classifiers']], ['languages', ['Python']], ['download', []], ['license', ['https://api.github.com/licenses/bsd-3-clause']]]\n", + "| name | sparsity-based-defenses |\n", + "|-------------|----------------------------------------------|\n", + "| author | soorya19 |\n", + "| description | Sparsity-based defenses against adversarial |\n", + "| | attacks on machine learning classifiers |\n", + "| languages | Python |\n", + "| license | https://api.github.com/licenses/bsd-3-clause |\n", + "Choose an action (search/describe/quit):> quit\n" + ] + } + ], + "source": [ + "from sosen.cli import run_interactive\n", + "run_interactive()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Searching the Knowledge Graph (old)\n", "Currently, there are three methods for searching the Knowledge Graph via exact keyword matching. There are manual keywords from GitHub, and additional keywords that are extracted from the title and description of software objects, queried using the methods keyword, title, and description, respectively. After the `--method` input, everything else is interpreted as part of the search query. The first 20 matches are printed, ordered first by the number of keywords " ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -66,34 +146,30 @@ "output_type": "stream", "text": [ "SoSEn Command Line Interface\n", - "['adversarial', 'machine', 'learning']\n", - "\n", - "FOUND KEYWORDS:\n", - "keyword: https://w3id.org/okn/o/i/Keyword/adversarial, idf: 7.402451520818244\n", - "keyword: https://w3id.org/okn/o/i/Keyword/machine, idf: 3.915076442915036\n", - "keyword: https://w3id.org/okn/o/i/Keyword/learning, idf: 3.7312270019430285\n", "\n", "MATCHES:\n", - "1. https://w3id.org/okn/o/i/Software/soorya19/sparsity-based-defenses, # keyword matches: 3, tf-idf sum: 0.5094190135977307\n", - "2. https://w3id.org/okn/o/i/Software/mdoucet/refl_ml, # keyword matches: 2, tf-idf sum: 2.5487678149526882\n", - "3. https://w3id.org/okn/o/i/Software/raamana/confounds, # keyword matches: 2, tf-idf sum: 1.529260688971613\n", - "4. https://w3id.org/okn/o/i/Software/fqararyah/tensorflow-1, # keyword matches: 2, tf-idf sum: 1.529260688971613\n", - "5. https://w3id.org/okn/o/i/Software/cisprague/Astro.IQ, # keyword matches: 2, tf-idf sum: 1.529260688971613\n", - "6. https://w3id.org/okn/o/i/Software/smcclatchy/machine-learning-python, # keyword matches: 2, tf-idf sum: 1.529260688971613\n", - "7. https://w3id.org/okn/o/i/Software/JoshuaE1/supervised-classification-SSH-publications, # keyword matches: 2, tf-idf sum: 1.529260688971613\n", - "8. https://w3id.org/okn/o/i/Software/bbuelens/energy-balance, # keyword matches: 2, tf-idf sum: 1.529260688971613\n", - "9. https://w3id.org/okn/o/i/Software/indigo-dc/DEEPaaS, # keyword matches: 2, tf-idf sum: 1.4221913058501365\n", - "10. https://w3id.org/okn/o/i/Software/neelsoumya/butterfly_detector, # keyword matches: 2, tf-idf sum: 1.4221913058501365\n", - "11. https://w3id.org/okn/o/i/Software/JeanSunny999/Breast-Cancer-Wisconsin-Diagnostic-DataSet, # keyword matches: 2, tf-idf sum: 1.2743839074763441\n", - "12. https://w3id.org/okn/o/i/Software/mortezanear/Matlab-codes-for-vaccine-tracking, # keyword matches: 2, tf-idf sum: 1.2743839074763441\n", - "13. https://w3id.org/okn/o/i/Software/Nikeshbajaj/Machine_Learning_From_Scratch, # keyword matches: 2, tf-idf sum: 1.2743839074763441\n", - "14. https://w3id.org/okn/o/i/Software/bnpy/bnpy, # keyword matches: 2, tf-idf sum: 1.2743839074763441\n", - "15. https://w3id.org/okn/o/i/Software/gbotto/bats, # keyword matches: 2, tf-idf sum: 1.17635437613201\n", - "16. https://w3id.org/okn/o/i/Software/caiostringari/pywavelearn, # keyword matches: 2, tf-idf sum: 1.092329063551152\n", - "17. https://w3id.org/okn/o/i/Software/BENR0/textory, # keyword matches: 2, tf-idf sum: 1.092329063551152\n", - "18. https://w3id.org/okn/o/i/Software/machine-learning-scala/mls, # keyword matches: 2, tf-idf sum: 1.092329063551152\n", - "19. https://w3id.org/okn/o/i/Software/CCS-Lab/easyml, # keyword matches: 2, tf-idf sum: 1.092329063551152\n", - "20. https://w3id.org/okn/o/i/Software/psesh/Effective-Quadratures, # keyword matches: 2, tf-idf sum: 1.092329063551152\n" + "| | result iri | matches | tf-idf sum |\n", + "|----|-------------------------------------------------------------------------------------|-----------|--------------|\n", + "| 1 | https://w3id.org/okn/i/Software/soorya19/sparsity-based-defenses | 3 | 1.95632 |\n", + "| 2 | https://w3id.org/okn/i/Software/mdoucet/refl_ml | 2 | 2.62688 |\n", + "| 3 | https://w3id.org/okn/i/Software/psesh/Effective-Quadratures | 2 | 2.62688 |\n", + "| 4 | https://w3id.org/okn/i/Software/BENR0/textory | 2 | 1.97016 |\n", + "| 5 | https://w3id.org/okn/i/Software/andre-wojtowicz/uci-ml-to-r | 2 | 1.97016 |\n", + "| 6 | https://w3id.org/okn/i/Software/fabiodasilva/mlwl | 2 | 1.97016 |\n", + "| 7 | https://w3id.org/okn/i/Software/masseyr/geosoupML | 2 | 1.97016 |\n", + "| 8 | https://w3id.org/okn/i/Software/JoshuaE1/supervised-classification-SSH-publications | 2 | 1.57613 |\n", + "| 9 | https://w3id.org/okn/i/Software/bcbi/PredictMD.jl | 2 | 1.57613 |\n", + "| 10 | https://w3id.org/okn/i/Software/bnpy/bnpy | 2 | 1.57613 |\n", + "| 11 | https://w3id.org/okn/i/Software/caiostringari/pywavelearn | 2 | 1.57613 |\n", + "| 12 | https://w3id.org/okn/i/Software/cisprague/Astro.IQ | 2 | 1.57613 |\n", + "| 13 | https://w3id.org/okn/i/Software/fqararyah/tensorflow-1 | 2 | 1.57613 |\n", + "| 14 | https://w3id.org/okn/i/Software/hls-fpga-machine-learning/hls4ml | 2 | 1.57613 |\n", + "| 15 | https://w3id.org/okn/i/Software/machine-learning-scala/mls | 2 | 1.57613 |\n", + "| 16 | https://w3id.org/okn/i/Software/msmbuilder/osprey | 2 | 1.57613 |\n", + "| 17 | https://w3id.org/okn/i/Software/muammar/ml4chem | 2 | 1.57613 |\n", + "| 18 | https://w3id.org/okn/i/Software/qzhu2017/PyXtal_FF | 2 | 1.57613 |\n", + "| 19 | https://w3id.org/okn/i/Software/raamana/confounds | 2 | 1.57613 |\n", + "| 20 | https://w3id.org/okn/i/Software/scikit-learn/scikit-learn | 2 | 1.57613 |\n" ] } ], @@ -104,9 +180,41 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SoSEn Command Line Interface\n", + "\n", + "MATCHES:\n", + "| | result iri | matches | tf-idf sum |\n", + "|----|----------------------------------------------------------------------|-----------|--------------|\n", + "| 1 | https://w3id.org/okn/i/Software/bcbi/PredictMD.jl | 3 | 2.5966 |\n", + "| 2 | https://w3id.org/okn/i/Software/smarie/python-azureml-client | 2 | 1.64152 |\n", + "| 3 | https://w3id.org/okn/i/Software/radtorch/radtorch | 2 | 0.80129 |\n", + "| 4 | https://w3id.org/okn/i/Software/neelsoumya/butterfly_detector | 2 | 0.615491 |\n", + "| 5 | https://w3id.org/okn/i/Software/christopher-beckham/weka-pyscript | 1 | 2.17881 |\n", + "| 6 | https://w3id.org/okn/i/Software/iml-wg/HEP-ML-Resources | 1 | 2.17881 |\n", + "| 7 | https://w3id.org/okn/i/Software/kjappelbaum/ml_molsim2020 | 1 | 2.17881 |\n", + "| 8 | https://w3id.org/okn/i/Software/rieck/harry | 1 | 2.17881 |\n", + "| 9 | https://w3id.org/okn/i/Software/rieck/sally | 1 | 2.17881 |\n", + "| 10 | https://w3id.org/okn/i/Software/SommerEngineering/blog-shitty-models | 1 | 1.59044 |\n", + "| 11 | https://w3id.org/okn/i/Software/CCS-Lab/easyml | 1 | 1.45254 |\n", + "| 12 | https://w3id.org/okn/i/Software/EducationalTestingService/skll | 1 | 1.45254 |\n", + "| 13 | https://w3id.org/okn/i/Software/SCCAF/sccaf | 1 | 1.45254 |\n", + "| 14 | https://w3id.org/okn/i/Software/YinLiLin/KAML | 1 | 1.45254 |\n", + "| 15 | https://w3id.org/okn/i/Software/asergiobranco/clara | 1 | 1.45254 |\n", + "| 16 | https://w3id.org/okn/i/Software/ds4dm/nectar | 1 | 1.45254 |\n", + "| 17 | https://w3id.org/okn/i/Software/hls-fpga-machine-learning/hls4ml | 1 | 1.45254 |\n", + "| 18 | https://w3id.org/okn/i/Software/jmbr/diffusion-maps | 1 | 1.45254 |\n", + "| 19 | https://w3id.org/okn/i/Software/lil-lab/spf | 1 | 1.45254 |\n", + "| 20 | https://w3id.org/okn/i/Software/maxibor/sourcepredict | 1 | 1.45254 |\n" + ] + } + ], "source": [ "%%bash\n", "python -m sosen search --method keyword machine learning" @@ -114,9 +222,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SoSEn Command Line Interface\n", + "\n", + "MATCHES:\n", + "| | result iri | matches | tf-idf sum |\n", + "|----|-------------------------------------------------|-----------|--------------|\n", + "| 1 | https://w3id.org/okn/i/Software/usc-isi-i2/kgtk | 1 | 9.56163 |\n" + ] + } + ], "source": [ "%%bash\n", "python -m sosen search --method title kgtk" @@ -132,12 +253,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SoSEn Command Line Interface\n", + "('https://w3id.org/okn/i/Software/usc-isi-i2/kgtk',)\n", + "[]\n", + "['kgtk']\n", + "[]\n", + "['usc-isi-i2']\n", + "[]\n", + "['Knowledge Graph Toolkit ']\n", + "[]\n", + "['Makefile', 'Dockerfile', 'Python', 'Shell']\n", + "[]\n", + "[]\n", + "[]\n", + "['https://api.github.com/licenses/mit']\n", + "[['name', ['kgtk']], ['author', ['usc-isi-i2']], ['description', ['Knowledge Graph Toolkit ']], ['languages', ['Dockerfile', 'Makefile', 'Python', 'Shell']], ['download', []], ['license', ['https://api.github.com/licenses/mit']]]\n", + "| name | kgtk |\n", + "|-------------|-------------------------------------|\n", + "| author | usc-isi-i2 |\n", + "| description | Knowledge Graph Toolkit |\n", + "| languages | Dockerfile |\n", + "| \" \" | Makefile |\n", + "| \" \" | Python |\n", + "| \" \" | Shell |\n", + "| license | https://api.github.com/licenses/mit |\n" + ] + } + ], "source": [ "%%bash\n", - "python -m sosen describe https://w3id.org/okn/o/i/Software/usc-isi-i2/kgtk" + "python -m sosen describe https://w3id.org/okn/i/Software/usc-isi-i2/kgtk" ] }, {