forked from simonjupp/zooma_recommender
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathefficient_apriori_pdx.py
executable file
·96 lines (72 loc) · 3.66 KB
/
efficient_apriori_pdx.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/env python3
"""
Description goes here
"""
__author__ = "jupp"
__license__ = "Apache 2.0"
__date__ = "03/10/2018"
from efficient_apriori import apriori
from neo4j.v1 import GraphDatabase, basic_auth
import json
driver = GraphDatabase.driver("bolt://scrappy.ebi.ac.uk:7687")
cypher_query = """
MATCH (a)-[HAS_PROVENANCE]->(s:Source)
WHERE s.name = 'pdx-finder'
WITH a
MATCH (be:BiologicalEntity)<-[:HAS_BIO_ENTITY]-(a:Annotation)-[:HAS_PROPERTY]->(p:Property)
OPTIONAL MATCH (a)-[:HAS_SEMANTIC_TAG]->(st:SemanticTag)
RETURN distinct be.bioEntity, p.propertyType, p.propertyValue, st.semanticTag
ORDER BY be.bioEntity
;"""
session = driver.session()
results = session.run (cypher_query)
annotations = {}
for result in results:
# print(result)
bioentity = result[0]
propertyType = result[1]
propertyValue = result[2]
semanticTag = result[3]
if bioentity not in annotations:
annotations[bioentity] = {'properties' : [], 'tags' : []}
property = { 'propertyType' : propertyType, 'propertyValue' : propertyValue}
annotations[bioentity]['properties'].append(property)
if semanticTag:
tag = { 'propertyType' : propertyType, 'propertyValue' : propertyValue, 'semanticTag' : semanticTag}
annotations[bioentity]['tags'].append(tag)
transactions = []
for key, value in annotations.items():
for tag in value['tags']:
entry = []
for property in annotations[key]['properties']:
entry.append(property['propertyType'] + "|" + property['propertyValue'])
entry.append("TAG|"+tag['propertyType']+"|"+tag['propertyValue']+"|"+tag['semanticTag'])
print (entry)
transactions.append(entry)
# transactions = [('OriginTissue : Blood', 'TumorType : Primary', 'SampleDiagnosis : acute myeloid leukemia', 'TAG = OriginTissue : Blood : UBERON_0000178'),
# ('OriginTissue : Blood', 'TumorType : Primary', 'SampleDiagnosis : acute myeloid leukemia','TAG = SampleDiagnosis : acute myeloid leukemia : NCIT_C3171'),
# ('OriginTissue : Blood', 'TumorType : Primary', 'SampleDiagnosis : acute myeloid leukemia', 'TAG = TumorType : Primary : UBERON_0002371')
# ,
# ('OriginTissue : Blood', 'TumorType : Metastatic', 'SampleDiagnosis : acute myeloid leukemia', 'TAG = OriginTissue : Blood : UBERON_0000178'),
# ('OriginTissue : Blood', 'TumorType : Metastatic', 'SampleDiagnosis : acute myeloid leukemia', 'TAG = SampleDiagnosis : Metastatic acute myeloid leukemia : EFO_00002'),
# ('OriginTissue : Blood', 'TumorType : Metastatic', 'SampleDiagnosis : acute myeloid leukemia', 'TAG = TumorType : Primary : ONTO_XXXX')
#
# ]
itemsets, rules = apriori(transactions, min_support=0, min_confidence=0)
# Print out every rule with 2 items on the left hand side,
# 1 item on the right hand side, sorted by lift
rules_rhs = filter(lambda rule: len(rule.rhs) == 1, rules)
documents = []
for rule in sorted(rules_rhs, key=lambda rule: rule.lift):
if "TAG" in rule.rhs[0]:
doc = {'properties' : [], 'conf' : rule.confidence, 'support' : rule.support, "lift" : rule.lift, "conviction" : rule.conviction}
for lr in rule.lhs:
type, value = lr.split("|")
property = {'propertyType' : type, 'propertyValue' : value}
doc['properties'].append(property)
print(rule) # Prints the rule and its confidence, support, lift, ...
ignore, stype, svalue, stag = rule.rhs[0].split("|")
doc['tag'] = {'propertyType' : stype, 'propertyValue' : svalue, 'tag' : stag}
documents.append(doc)
with open('rules.json', 'w') as outfile:
json.dump(documents, outfile, indent=4, )