Skip to content

Commit

Permalink
[ALS-7452] Sort unfilterable vars to EOL
Browse files Browse the repository at this point in the history
  • Loading branch information
Luke Sikina committed Oct 8, 2024
1 parent 6bebc56 commit c0abd7d
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import edu.harvard.dbmi.avillach.dictionary.facet.Facet;
import edu.harvard.dbmi.avillach.dictionary.filter.Filter;
import edu.harvard.dbmi.avillach.dictionary.filter.QueryParamPair;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.data.domain.Pageable;
import org.springframework.jdbc.core.namedparam.MapSqlParameterSource;
import org.springframework.stereotype.Component;
Expand All @@ -14,6 +16,22 @@

@Component
public class ConceptFilterQueryGenerator {
private final List<String> disallowedMetaFields;

private static final String RANK_ADJUSTMENTS = """
, allow_filtering AS (
SELECT
concept_node.concept_node_id AS concept_node_id,
(string_agg(concept_node_meta.value, ' ') NOT LIKE '%' || 'true' || '%')::int AS rank_adjustment
FROM
concept_node
JOIN concept_node_meta ON
concept_node.concept_node_id = concept_node_meta.concept_node_id
AND concept_node_meta.KEY IN (:disallowed_meta_keys)
GROUP BY
concept_node.concept_node_id
)
""";

private static final String CONSENT_QUERY = """
dataset.dataset_id IN (
Expand All @@ -36,6 +54,13 @@ dataset.dataset_id IN (
) AND
""";

@Autowired
public ConceptFilterQueryGenerator(
@Value("${filtering.unfilterable_concepts}") List<String> disallowedMetaFields
) {
this.disallowedMetaFields = disallowedMetaFields;
}

/**
* This generates a query that will return a list of concept_node IDs for the given filter.
* <p>
Expand All @@ -50,6 +75,7 @@ dataset.dataset_id IN (
*/
public QueryParamPair generateFilterQuery(Filter filter, Pageable pageable) {
MapSqlParameterSource params = new MapSqlParameterSource();
params.addValue("disallowed_meta_keys", disallowedMetaFields);
List<String> clauses = new java.util.ArrayList<>(List.of());
if (!CollectionUtils.isEmpty(filter.facets())) {
clauses.addAll(createFacetFilter(filter, params));
Expand All @@ -68,11 +94,19 @@ public QueryParamPair generateFilterQuery(Filter filter, Pageable pageable) {
WITH q AS (
%s
)
SELECT concept_node_id
%s
SELECT q.concept_node_id AS concept_node_id
FROM q
GROUP BY concept_node_id
ORDER BY max(rank) DESC
""".formatted(query);
LEFT JOIN allow_filtering ON allow_filtering.concept_node_id = q.concept_node_id
GROUP BY q.concept_node_id
ORDER BY max((1 + rank) * coalesce(rank_adjustment, 1)) DESC, q.concept_node_id ASC
""".formatted(query, RANK_ADJUSTMENTS);
// explanation of ORDER BY max((1 + rank) * coalesce(rank_adjustment, 1)) DESC
// you want to sort the best matches first, BUT anything that is marked as unfilterable should be put last
// coalesce will return the first non null value; this solves rows that aren't marked as filterable or not
// I then multiply that by 1 + rank instead of just rank so that a rank value of 0 for an unfilterable var
// is placed below a rank value of 0 for a filterable var
// Finally, I add the concept node id to the sort to keep it stable for ties, otherwise pagination gets weird

if (pageable.isPaged()) {
superQuery = superQuery + """
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,23 +55,23 @@ void shouldListAllConcepts() {
@Test
void shouldListFirstTwoConcepts() {
List<Concept> actual = subject.getConcepts(new Filter(List.of(), "", List.of()), Pageable.ofSize(2).first());
List<? extends Record> expected = List.of(
new ContinuousConcept("\\phs000007\\pht000021\\phv00003844\\FL200\\", "phv00003844", "FL200", "phs000007", "# 12 OZ CUPS OF CAFFEINATED COLA / DAY", true, 0F, 3F, "FHS", null),
new CategoricalConcept("\\Variant Data Type\\Low coverage WGS\\", "Low coverage WGS", "Low coverage WGS", "1", "Low coverage WGS", List.of("TRUE"), true, "GIC", null, null)
List<String> expectedPaths = List.of(
"\\ACT Diagnosis ICD-10\\J00-J99 Diseases of the respiratory system (J00-J99)\\J40-J47 Chronic lower respiratory diseases (J40-J47)\\J45 Asthma\\J45.5 Severe persistent asthma\\J45.52 Severe persistent asthma with status asthmaticus\\",
"\\ACT Diagnosis ICD-10\\J00-J99 Diseases of the respiratory system (J00-J99)\\J40-J47 Chronic lower respiratory diseases (J40-J47)\\J45 Asthma\\J45.9 Other and unspecified asthma\\J45.90 Unspecified asthma\\J45.901 Unspecified asthma with (acute) exacerbation\\"
);

Assertions.assertEquals(expected, actual);
Assertions.assertEquals(expectedPaths, actual.stream().map(Concept::conceptPath).toList());
}

@Test
void shouldListNextTwoConcepts() {
List<Concept> actual = subject.getConcepts(new Filter(List.of(), "", List.of()), Pageable.ofSize(2).first().next());
List<? extends Record> expected = List.of(
new CategoricalConcept("\\phs002385\\RACEG\\", "RACEG", "RACEG", "phs002385", "Race (regrouped)", List.of("Not Reported"), true, "HCT_for_SCD", null, null),
new CategoricalConcept("\\Variant Data Type\\Low coverage WGS\\", "Low coverage WGS", "Low coverage WGS", "1", "Low coverage WGS", List.of("TRUE"), true, "GIC", null, null)
List<String> expectedPaths = List.of(
"\\ACT Diagnosis ICD-10\\J00-J99 Diseases of the respiratory system (J00-J99)\\J40-J47 Chronic lower respiratory diseases (J40-J47)\\J45 Asthma\\J45.9 Other and unspecified asthma\\J45.90 Unspecified asthma\\J45.902 Unspecified asthma with status asthmaticus\\",
"\\Bio Specimens\\HumanFluid\\Blood (Whole)\\SPECIMENS:HF.BLD.000 Quantity\\"
);

Assertions.assertEquals(expected, actual);
Assertions.assertEquals(expectedPaths, actual.stream().map(Concept::conceptPath).toList());
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,17 @@ static void mySQLProperties(DynamicPropertyRegistry registry) {
@Autowired
NamedParameterJdbcTemplate template;

@Test
void shouldPutStigvarsLastForEmptySearch() {
Filter filter = new Filter(List.of(), "", List.of());
QueryParamPair pair = subject.generateFilterQuery(filter, Pageable.unpaged());
String query = "WITH " + pair.query() + "\n SELECT * FROM concepts_filtered_sorted;";

List<Integer> actual = template.queryForList(query, pair.params(), Integer.class);

Assertions.assertEquals(246, actual.getLast());
}

@Test
void shouldGenerateForHarmonizedConsents() {
Filter filter = new Filter(List.of(), "", List.of("phs001963.c1"));
Expand Down
44 changes: 22 additions & 22 deletions src/test/resources/seed.sql
Original file line number Diff line number Diff line change
Expand Up @@ -405,24 +405,24 @@ COPY public.concept_node (concept_node_id, dataset_id, name, display, concept_ty

COPY public.concept_node_meta (concept_node_meta_id, concept_node_id, key, value) FROM stdin;
19 186 description Approximate Synonyms:\nSevere persistent allergic asthma in status asthmaticus\nSevere persistent allergic asthma with status asthmaticus\nSevere persistent asthma in status asthmaticus\nSevere persistent asthma with allergic rhinitis in status asthmaticus\nSevere persistent asthma with allergic rhinitis with status asthmaticus
20 186 values J45.52 Severe persistent asthma with status asthmaticus
20 186 values ["J45.52 Severe persistent asthma with status asthmaticus"]
21 189 description Approximate Synonyms:\nAcute exacerbation of asthma with allergic rhinitis\nAllergic asthma with acute exacerbation\nAsthma, with acute exacerbation (flare-up)\nAsthma, with allergic rhinitis with acute exacerbation\nExacerbation of asthma
22 189 values J45.901 Unspecified asthma with (acute) exacerbation
22 189 values ["J45.901 Unspecified asthma with (acute) exacerbation"]
23 190 description Approximate Synonyms:\nAsthma with allergic rhinitis in status asthmaticus\nAsthma with allergic rhinitis with status asthmaticus\nAsthma with status\nAsthma with status asthmaticus\nAsthma, allergic with status asthmaticus\nExtrinsic asthma with status asthmaticus
24 190 values J45.902 Unspecified asthma with status asthmaticus
24 190 values ["J45.902 Unspecified asthma with status asthmaticus"]
25 211 description GIC biosample: wholeblood
26 211 data_source Biosample
27 212 values TRUE
28 214 values TRUE
27 212 values ["TRUE"]
28 214 values ["TRUE"]
29 216 description Those patients who align with the IRB Phase 2 protocols
30 216 values GIC Consent
30 216 values ["GIC Consent"]
31 217 description Those patients who DO NOT align with the IRB Phase 2 protocols
32 217 values GIC Legacy Consent
32 217 values ["GIC Legacy Consent"]
33 218 description Patients who have waived consent
34 218 values Waiver of Consent
34 218 values ["Waiver of Consent"]
35 222 description Heart rate is taken by the automated blood pressure/heart rate monitor and captured directly into the computer system. In the event the heart rate is not captured automatically at the end of stage 1, the technician would manually enter the readings from the heart rate monitor.
37 225 description Including living and deceased, were any of {SP's/your} close biological that is, blood relatives including father, mother, sisters or brothers, ever told by a health professional that they had a heart attack or angina (an-gi-na) before the age of 50?
38 225 values Yes
38 225 values ["Yes"]
39 229 description # 12 OZ CUPS OF CAFFEINATED COLA / DAY
41 229 stigmatized false
42 229 unique_identifier false
Expand All @@ -439,7 +439,7 @@ COPY public.concept_node_meta (concept_node_meta_id, concept_node_id, key, value
55 235 free_text false
56 235 bdc_open_access true
57 239 description Most recent occupation (A)
58 239 values ACCOUNTANT
58 239 values ["ACCOUNTANT"]
59 241 description Age
61 241 stigmatized false
62 241 unique_identifier false
Expand All @@ -451,66 +451,66 @@ COPY public.concept_node_meta (concept_node_meta_id, concept_node_id, key, value
69 242 free_text false
70 242 bdc_open_access true
71 244 description Patient age at transplant, years
72 244 values 42
72 244 values [42]
73 244 stigmatized false
74 244 unique_identifier false
75 244 free_text false
76 244 bdc_open_access true
77 244 hct status pre-hct
78 244 computed variable yes
79 245 description Race (regrouped)
80 245 values Not Reported
80 245 values ["Not Reported"]
81 245 stigmatized false
82 245 unique_identifier false
83 245 free_text false
84 245 bdc_open_access true
85 246 description Transplant Number
86 246 values 1
86 246 values [1]
87 246 stigmatized true
88 246 unique_identifier false
89 246 free_text false
90 246 bdc_open_access false
91 246 hct status pre-hct
92 246 computed variable yes
93 248 description Participant's age (category)
94 248 values 21
94 248 values [21]
95 248 stigmatized false
96 248 unique_identifier false
97 248 free_text false
98 248 bdc_open_access true
99 249 description Smoker status
100 249 values true
100 249 values ["true"]
101 249 stigmatized false
102 249 unique_identifier false
103 249 free_text false
104 249 bdc_open_access true
105 253 description (AFC) Reason for ending future contact: Withdrew consent for future nuMoM2b contact
106 253 values No
106 253 values ["No"]
107 253 unique_identifier false
108 253 free_text false
109 253 bdc_open_access true
110 255 description (V5A) Which of the following problems have a doctor or health care professional told you that you have with your kidney?: Other - Specify Field
111 255 values infection
111 255 values ["infection"]
112 255 unique_identifier false
113 255 free_text true
114 255 bdc_open_access false
115 258 description (T01) Are you currently prescribed medication for your high blood pressure?
116 258 values Yes
116 258 values ["Yes"]
117 258 unique_identifier false
118 258 free_text false
119 258 bdc_open_access true
120 266 description Genotype array
121 266 values TRUE
121 266 values ["TRUE"]
122 267 description Low coverage WGS
123 267 values TRUE
123 267 values ["TRUE"]
124 268 description Whole exome sequencing
40 229 values [0, 3]
46 232 values [0, 1]
52 235 values [0.57,6.77]
60 241 values ["5E-21", "7E+33"]
125 268 values TRUE
125 268 values ["TRUE"]
126 269 description Whole genome sequencing
127 269 values TRUE
127 269 values ["TRUE"]
128 227 description Clinic Exam, Original Cohort Exam 19
129 230 description Clinic Exam, Original Cohort Exam 20
130 233 description Clinic Exam, Offspring Cohort Exam 4
Expand Down

0 comments on commit c0abd7d

Please sign in to comment.