From 116bf04d33e886fdbc86fb7813437124569d19b6 Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Sat, 4 Nov 2023 13:04:41 -0700 Subject: [PATCH] kvstreamer: increase default avg response multiple This commit increases the default value for `sql.distsql.streamer.avg_response_size_multiple` cluster setting from 1.5 to 3.0. This setting controls the factor by which the current "avg response size" estimate is multiplied and allows for TargetBytes parameter to grow over time. In the reproduction query from the previous commit it was determined that the growth might not be as quick as desirable. The effect of this change is as follows: - if we have responses of varying sizes, then we're now likely to be more effective since we'll end up issuing less BatchRequests - if we have responses of similar sizes, then we might pre-reserve too much budget upfront, so we'll end up with lower concurrency across ranges. Thus, we don't want to increase the multiple by too much; however, keeping it at 1.5 can be quite suboptimal in some cases - 3.0 seems like a decent middle ground. This number was chosen based on running TPCH queries (both via InOrder and OutOfOrder modes of the streamer) and the reproduction query. (For the latter this change reduces the number of gRPC calls by a factor of 3 or so.) Release note: None --- pkg/kv/kvclient/kvstreamer/avg_response_estimator.go | 5 +++-- pkg/kv/kvclient/kvstreamer/avg_response_estimator_test.go | 8 ++++---- pkg/kv/kvclient/kvstreamer/streamer_test.go | 4 ++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/pkg/kv/kvclient/kvstreamer/avg_response_estimator.go b/pkg/kv/kvclient/kvstreamer/avg_response_estimator.go index bc894b491e68..e88ce566a538 100644 --- a/pkg/kv/kvclient/kvstreamer/avg_response_estimator.go +++ b/pkg/kv/kvclient/kvstreamer/avg_response_estimator.go @@ -34,8 +34,9 @@ const ( // response. // TODO(yuzefovich): use the optimizer-driven estimates. InitialAvgResponseSize = 1 << 10 // 1KiB - // This value was determined using tpchvec/bench test on all TPC-H queries. - defaultAvgResponseSizeMultiple = 1.5 + // This value was determined using tpchvec/bench test on all TPC-H queries + // as well as the query in TestStreamerVaryingResponseSizes. + defaultAvgResponseSizeMultiple = 3.0 ) // streamerAvgResponseSizeMultiple determines the multiple used when calculating diff --git a/pkg/kv/kvclient/kvstreamer/avg_response_estimator_test.go b/pkg/kv/kvclient/kvstreamer/avg_response_estimator_test.go index 2708d95582c1..7b4c8d2016ba 100644 --- a/pkg/kv/kvclient/kvstreamer/avg_response_estimator_test.go +++ b/pkg/kv/kvclient/kvstreamer/avg_response_estimator_test.go @@ -85,10 +85,10 @@ func TestAvgResponseSizeForPartialResponses(t *testing.T) { } // We started with the TargetBytes equal to the initial estimate of 1KiB, // and then with each update the estimate should have grown. In particular, - // we expect 7 BatchRequests total that fetch 1, 1, 3, 7, 18, 45, 25 rows - // respectively (note that the growth is faster than 2x because we use 1.5 - // multiple on top of the average). - require.Equal(t, 7, batchRequestsCount) + // we expect 5 BatchRequests total that fetch 1, 3, 12, 48, 36 rows + // respectively (note that the growth is 3-4x because we use 3.0 multiple on + // top of the average). + require.Equal(t, 5, batchRequestsCount) // From the perspective of the response estimator, we received only one // response (that happened to be paginated across BatchRequests), so our // estimate should match exactly the total size. diff --git a/pkg/kv/kvclient/kvstreamer/streamer_test.go b/pkg/kv/kvclient/kvstreamer/streamer_test.go index 3af419802483..c1607082b22e 100644 --- a/pkg/kv/kvclient/kvstreamer/streamer_test.go +++ b/pkg/kv/kvclient/kvstreamer/streamer_test.go @@ -584,7 +584,7 @@ ALTER TABLE t SPLIT AT SELECT generate_series(1, 30000, 3000); // The meat of the test - run the query that performs an index join to fetch // all rows via the streamer, both in the OutOfOrder and InOrder modes. Each - // time assert that the number of BatchRequests issued is in low hundreds + // time assert that the number of BatchRequests issued is in double digits // (if not, then the streamer was extremely suboptimal). kvGRPCCallsRegex := regexp.MustCompile(`KV gRPC calls: (\d+,)`) for inOrder := range []bool{false, true} { @@ -600,7 +600,7 @@ ALTER TABLE t SPLIT AT SELECT generate_series(1, 30000, 3000); break } } - require.Greater(t, 300, gRPCCalls, rows) + require.Greater(t, 100, gRPCCalls, rows) } } }