Skip to content

Commit

Permalink
Use brief explain stats
Browse files Browse the repository at this point in the history
Use a more compact and easy to read version for the decompression and
arrow array cache stats. Also simplify the code and the tests to remove
unnecessary parts.
  • Loading branch information
mkindahl committed Dec 17, 2024
1 parent 12f262c commit 1a43266
Show file tree
Hide file tree
Showing 20 changed files with 657 additions and 602 deletions.
103 changes: 40 additions & 63 deletions tsl/src/hypercore/arrow_cache_explain.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,43 +61,12 @@ standard_ExplainOneQuery(Query *query, int cursorOptions, IntoClause *into, Expl
}
#endif

static struct
static inline void
append_if_positive(StringInfo info, const char *key, long long val)
{
const char *hits_text; /* Number of cache hits */
const char *miss_text; /* Number of cache misses */
const char *evict_text; /* Number of cache evictions */
const char *decompress_text; /* Number of arrays decompressed */
const char *decompress_calls_text; /* Number of calls to decompress an array */
} format_texts[] = {
[EXPLAIN_FORMAT_TEXT] = {
.hits_text = "Array Cache Hits",
.miss_text = "Array Cache Misses",
.evict_text = "Array Cache Evictions",
.decompress_text = "Array Decompressions",
.decompress_calls_text = "Array Decompression Calls",
},
[EXPLAIN_FORMAT_XML]= {
.hits_text = "hits",
.miss_text = "misses",
.evict_text = "evictions",
.decompress_text = "decompressions",
.decompress_calls_text = "decompression calls",
},
[EXPLAIN_FORMAT_JSON] = {
.hits_text = "hits",
.miss_text = "misses",
.evict_text = "evictions",
.decompress_text = "decompressions",
.decompress_calls_text = "decompression calls",
},
[EXPLAIN_FORMAT_YAML] = {
.hits_text = "hits",
.miss_text = "misses",
.evict_text = "evictions",
.decompress_text = "decompressions",
.decompress_calls_text = "decompression calls",
},
};
if (val > 0)
appendStringInfo(info, " %s=%lld", key, val);
}

static void
explain_decompression(Query *query, int cursorOptions, IntoClause *into, ExplainState *es,
Expand All @@ -106,33 +75,41 @@ explain_decompression(Query *query, int cursorOptions, IntoClause *into, Explain
standard_ExplainOneQuery(query, cursorOptions, into, es, queryString, params, queryEnv);
if (decompress_cache_print)
{
Assert(es->format < sizeof(format_texts) / sizeof(*format_texts));

ExplainOpenGroup("Array cache", "Arrow Array Cache", true, es);
ExplainPropertyInteger(format_texts[es->format].hits_text,
NULL,
decompress_cache_stats.hits,
es);
ExplainPropertyInteger(format_texts[es->format].miss_text,
NULL,
decompress_cache_stats.misses,
es);
ExplainPropertyInteger(format_texts[es->format].evict_text,
NULL,
decompress_cache_stats.evictions,
es);
ExplainPropertyInteger(format_texts[es->format].decompress_text,
NULL,
decompress_cache_stats.decompressions,
es);

if (es->verbose)
ExplainPropertyInteger(format_texts[es->format].decompress_calls_text,
NULL,
decompress_cache_stats.decompress_calls,
es);

ExplainCloseGroup("Array cache", "Arrow Array Cache", true, es);
const bool has_decompress_data = decompress_cache_stats.decompressions > 0 ||
decompress_cache_stats.decompress_calls > 0;
const bool has_cache_data = decompress_cache_stats.hits > 0 ||
decompress_cache_stats.misses > 0 ||
decompress_cache_stats.evictions > 0;
if (has_decompress_data || has_cache_data)
{
if (es->format == EXPLAIN_FORMAT_TEXT)
{
appendStringInfoString(es->str, "Array:");
if (has_cache_data)
appendStringInfoString(es->str, " cache");
append_if_positive(es->str, "hits", decompress_cache_stats.hits);
append_if_positive(es->str, "misses", decompress_cache_stats.misses);
append_if_positive(es->str, "evictions", decompress_cache_stats.evictions);
if (has_decompress_data)
appendStringInfoString(es->str, ", decompress");
append_if_positive(es->str, "count", decompress_cache_stats.decompressions);
append_if_positive(es->str, "calls", decompress_cache_stats.decompress_calls);
appendStringInfoChar(es->str, '\n');
}
else
{
ExplainOpenGroup("Array Cache", "Arrow Array Cache", true, es);
ExplainPropertyInteger("hits", NULL, decompress_cache_stats.hits, es);
ExplainPropertyInteger("misses", NULL, decompress_cache_stats.misses, es);
ExplainPropertyInteger("evictions", NULL, decompress_cache_stats.evictions, es);
ExplainCloseGroup("Array Cache", "Arrow Array Cache", true, es);

ExplainOpenGroup("Array Decompress", "Arrow Array Decompress", true, es);
ExplainPropertyInteger("count", NULL, decompress_cache_stats.decompressions, es);
ExplainPropertyInteger("calls", NULL, decompress_cache_stats.decompress_calls, es);
ExplainCloseGroup("Array Decompress", "Arrow Array Decompress", true, es);
}
}

decompress_cache_print = false;
memset(&decompress_cache_stats, 0, sizeof(struct DecompressCacheStats));
Expand Down
96 changes: 39 additions & 57 deletions tsl/test/expected/hypercore_columnar.out
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,23 @@
-- emitted plan. This is intended to be used when the structure of the
-- plan is important, but not the specific chunks scanned nor the
-- number of heap fetches, rows, loops, etc.
create function anonymize(ln text) returns text language plpgsql as
$$
begin
ln := regexp_replace(ln, '_hyper_\d+_\d+_chunk', '_hyper_I_N_chunk', 1, 0);
ln := regexp_replace(ln, 'Heap Fetches: \d+', 'Heap Fetches: N');
ln := regexp_replace(ln, 'Workers Launched: \d+', 'Workers Launched: N');
ln := regexp_replace(ln, 'actual rows=\d+ loops=\d+', 'actual rows=N loops=N');

if trim(both from ln) like 'Array: %' then
ln := regexp_replace(ln, 'hits=\d+', 'hits=N');
ln := regexp_replace(ln, 'misses=\d+', 'misses=N');
ln := regexp_replace(ln, 'count=\d+', 'count=N');
ln := regexp_replace(ln, 'calls=\d+', 'calls=N');
end if;
return ln;
end
$$;
create function explain_analyze_anonymize(text) returns setof text
language plpgsql as
$$
Expand All @@ -18,17 +35,13 @@ begin
for ln in
execute format('explain (analyze, costs off, summary off, timing off, decompress_cache_stats) %s', $1)
loop
if trim(both from ln) like 'Group Key:%' then
-- Group keys are shown for plans in PG15 but not others, so
-- we remove these lines to avoid having to have
-- version-sensible tests.
if trim(both from ln) like 'Group Key:%' then
continue;
end if;
ln := regexp_replace(ln, 'Array Cache Hits: \d+', 'Array Cache Hits: N');
ln := regexp_replace(ln, 'Array Cache Misses: \d+', 'Array Cache Misses: N');
ln := regexp_replace(ln, 'Array Cache Evictions: \d+', 'Array Cache Evictions: N');
ln := regexp_replace(ln, 'Heap Fetches: \d+', 'Heap Fetches: N');
ln := regexp_replace(ln, 'Workers Launched: \d+', 'Workers Launched: N');
ln := regexp_replace(ln, 'actual rows=\d+ loops=\d+', 'actual rows=N loops=N');
ln := regexp_replace(ln, '_hyper_\d+_\d+_chunk', '_hyper_I_N_chunk', 1, 0);
return next ln;
return next anonymize(ln);
end loop;
end;
$$;
Expand All @@ -41,14 +54,7 @@ begin
for ln in
execute format('explain (costs off, summary off, timing off) %s', $1)
loop
ln := regexp_replace(ln, 'Array Cache Hits: \d+', 'Array Cache Hits: N');
ln := regexp_replace(ln, 'Array Cache Misses: \d+', 'Array Cache Misses: N');
ln := regexp_replace(ln, 'Array Cache Evictions: \d+', 'Array Cache Evictions: N');
ln := regexp_replace(ln, 'Heap Fetches: \d+', 'Heap Fetches: N');
ln := regexp_replace(ln, 'Workers Launched: \d+', 'Workers Launched: N');
ln := regexp_replace(ln, 'actual rows=\d+ loops=\d+', 'actual rows=N loops=N');
ln := regexp_replace(ln, '_hyper_\d+_\d+_chunk', '_hyper_I_N_chunk', 1, 0);
return next ln;
return next anonymize(ln);
end loop;
end;
$$;
Expand Down Expand Up @@ -101,11 +107,8 @@ $$, :'chunk'));
Scankey: (device < 4)
Vectorized Filter: (location = 2)
Rows Removed by Filter: 16
Array Cache Hits: N
Array Cache Misses: N
Array Cache Evictions: N
Array Decompressions: 3
(9 rows)
Array: cache misses=N, decompress count=N calls=N
(6 rows)

-- Save away all data from the chunk so that we can compare.
create table saved as select * from :chunk;
Expand Down Expand Up @@ -136,11 +139,8 @@ $$, :'chunk'));
-> Custom Scan (ColumnarScan) on _hyper_I_N_chunk (actual rows=N loops=N)
Vectorized Filter: (humidity > '110'::double precision)
Rows Removed by Filter: 204
Array Cache Hits: N
Array Cache Misses: N
Array Cache Evictions: N
Array Decompressions: 30
(8 rows)
Array: cache misses=N, decompress count=N calls=N
(5 rows)

select count(*) from :chunk where humidity > 110;
count
Expand All @@ -159,11 +159,8 @@ $$, :'chunk'));
-> Custom Scan (ColumnarScan) on _hyper_I_N_chunk (actual rows=N loops=N)
Vectorized Filter: (humidity > '50'::double precision)
Rows Removed by Filter: 87
Array Cache Hits: N
Array Cache Misses: N
Array Cache Evictions: N
Array Decompressions: 30
(8 rows)
Array: cache misses=N, decompress count=N calls=N
(5 rows)

select lhs.count, rhs.count
from (select count(*) from :chunk where humidity > 50) lhs,
Expand Down Expand Up @@ -194,11 +191,8 @@ $$, :'chunk'));
-> Custom Scan (ColumnarScan) on _hyper_I_N_chunk (actual rows=N loops=N)
Filter: (temp > '50'::numeric)
Rows Removed by Filter: 204
Array Cache Hits: N
Array Cache Misses: N
Array Cache Evictions: N
Array Decompressions: 30
(8 rows)
Array: cache misses=N, decompress count=N calls=N
(5 rows)

select count(*) from :chunk where temp > 50;
count
Expand All @@ -216,11 +210,8 @@ $$, :'chunk'));
-> Custom Scan (ColumnarScan) on _hyper_I_N_chunk (actual rows=N loops=N)
Filter: (temp > '20'::numeric)
Rows Removed by Filter: 98
Array Cache Hits: N
Array Cache Misses: N
Array Cache Evictions: N
Array Decompressions: 30
(8 rows)
Array: cache misses=N, decompress count=N calls=N
(5 rows)

select lhs.count, rhs.count
from (select count(*) from :chunk where temp > 20) lhs,
Expand Down Expand Up @@ -251,11 +242,8 @@ select count(*) from :chunk where humidity > 40 and temp > 20;
Filter: (temp > '20'::numeric)
Rows Removed by Filter: 132
Vectorized Filter: (humidity > '40'::double precision)
Array Cache Hits: 0
Array Cache Misses: 30
Array Cache Evictions: 0
Array Decompressions: 60
(9 rows)
Array: cache misses=30, decompress count=60 calls=165
(6 rows)

select count(*) from :chunk where humidity > 40 and temp > 20;
count
Expand Down Expand Up @@ -284,11 +272,8 @@ $$, :'chunk'));
Rows Removed by Filter: 3
Scankey: (device = 3)
Vectorized Filter: (humidity > '40'::double precision)
Array Cache Hits: N
Array Cache Misses: N
Array Cache Evictions: N
Array Decompressions: 2
(10 rows)
Array: cache misses=N, decompress count=N calls=N
(7 rows)

select count(*) from :chunk where humidity > 40 and temp > 20 and device = 3;
count
Expand Down Expand Up @@ -318,11 +303,8 @@ $$, :'chunk'));
-> Seq Scan on _hyper_I_N_chunk (actual rows=N loops=N)
Filter: (device < 4)
Rows Removed by Filter: 184
Array Cache Hits: N
Array Cache Misses: N
Array Cache Evictions: N
Array Decompressions: 96
(11 rows)
Array: cache misses=N, decompress count=N calls=N
(8 rows)

drop table readings;
drop table saved;
40 changes: 23 additions & 17 deletions tsl/test/expected/hypercore_constraints.out
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,23 @@
-- emitted plan. This is intended to be used when the structure of the
-- plan is important, but not the specific chunks scanned nor the
-- number of heap fetches, rows, loops, etc.
create function anonymize(ln text) returns text language plpgsql as
$$
begin
ln := regexp_replace(ln, '_hyper_\d+_\d+_chunk', '_hyper_I_N_chunk', 1, 0);
ln := regexp_replace(ln, 'Heap Fetches: \d+', 'Heap Fetches: N');
ln := regexp_replace(ln, 'Workers Launched: \d+', 'Workers Launched: N');
ln := regexp_replace(ln, 'actual rows=\d+ loops=\d+', 'actual rows=N loops=N');

if trim(both from ln) like 'Array: %' then
ln := regexp_replace(ln, 'hits=\d+', 'hits=N');
ln := regexp_replace(ln, 'misses=\d+', 'misses=N');
ln := regexp_replace(ln, 'count=\d+', 'count=N');
ln := regexp_replace(ln, 'calls=\d+', 'calls=N');
end if;
return ln;
end
$$;
create function explain_analyze_anonymize(text) returns setof text
language plpgsql as
$$
Expand All @@ -24,17 +41,13 @@ begin
for ln in
execute format('explain (analyze, costs off, summary off, timing off, decompress_cache_stats) %s', $1)
loop
if trim(both from ln) like 'Group Key:%' then
-- Group keys are shown for plans in PG15 but not others, so
-- we remove these lines to avoid having to have
-- version-sensible tests.
if trim(both from ln) like 'Group Key:%' then
continue;
end if;
ln := regexp_replace(ln, 'Array Cache Hits: \d+', 'Array Cache Hits: N');
ln := regexp_replace(ln, 'Array Cache Misses: \d+', 'Array Cache Misses: N');
ln := regexp_replace(ln, 'Array Cache Evictions: \d+', 'Array Cache Evictions: N');
ln := regexp_replace(ln, 'Heap Fetches: \d+', 'Heap Fetches: N');
ln := regexp_replace(ln, 'Workers Launched: \d+', 'Workers Launched: N');
ln := regexp_replace(ln, 'actual rows=\d+ loops=\d+', 'actual rows=N loops=N');
ln := regexp_replace(ln, '_hyper_\d+_\d+_chunk', '_hyper_I_N_chunk', 1, 0);
return next ln;
return next anonymize(ln);
end loop;
end;
$$;
Expand All @@ -47,14 +60,7 @@ begin
for ln in
execute format('explain (costs off, summary off, timing off) %s', $1)
loop
ln := regexp_replace(ln, 'Array Cache Hits: \d+', 'Array Cache Hits: N');
ln := regexp_replace(ln, 'Array Cache Misses: \d+', 'Array Cache Misses: N');
ln := regexp_replace(ln, 'Array Cache Evictions: \d+', 'Array Cache Evictions: N');
ln := regexp_replace(ln, 'Heap Fetches: \d+', 'Heap Fetches: N');
ln := regexp_replace(ln, 'Workers Launched: \d+', 'Workers Launched: N');
ln := regexp_replace(ln, 'actual rows=\d+ loops=\d+', 'actual rows=N loops=N');
ln := regexp_replace(ln, '_hyper_\d+_\d+_chunk', '_hyper_I_N_chunk', 1, 0);
return next ln;
return next anonymize(ln);
end loop;
end;
$$;
Expand Down
Loading

0 comments on commit 1a43266

Please sign in to comment.