Skip to content

Commit

Permalink
refactor: reduce default batch and partition sizes for improved stabi…
Browse files Browse the repository at this point in the history
…lity

Reduces default batch sizes from 1,000,000 to 250,000 rows across CDC and snapshot operations:
- Lower CDC batch size for incremental syncs
- Reduce snapshot partition size for initial loads
- Update UI defaults and tooltips to reflect new values

This change helps prevent memory pressure and timeout issues when processing large datasets.
  • Loading branch information
iskakaushik committed Nov 17, 2024
1 parent cd58315 commit f2c00b2
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 10 deletions.
2 changes: 1 addition & 1 deletion flow/activities/flowable_core.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ func syncCore[TPull connectors.CDCPullConnectorCore, TSync connectors.CDCSyncCon

batchSize := options.BatchSize
if batchSize == 0 {
batchSize = 1_000_000
batchSize = 250_000
}

lastOffset, err := func() (int64, error) {
Expand Down
2 changes: 1 addition & 1 deletion flow/workflows/snapshot_flow.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ func (s *SnapshotFlowExecution) cloneTable(
numWorkers = s.config.SnapshotMaxParallelWorkers
}

numRowsPerPartition := uint32(500000)
numRowsPerPartition := uint32(250000)
if s.config.SnapshotNumRowsPerPartition > 0 {
numRowsPerPartition = s.config.SnapshotNumRowsPerPartition
}
Expand Down
12 changes: 6 additions & 6 deletions ui/app/mirrors/create/helpers/cdc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ export const cdcSettings: MirrorSetting[] = [
setter(
(curr: CDCConfig): CDCConfig => ({
...curr,
maxBatchSize: (value as number) || 1000000,
maxBatchSize: (value as number) || 250000,
})
),
tips: 'The number of rows PeerDB will pull from source at a time. If left empty, the default value is 1,000,000 rows.',
tips: 'The number of rows PeerDB will pull from source at a time. If left empty, the default value is 250,000 rows.',
type: 'number',
default: '1000000',
default: '250000',
advanced: AdvancedSettingType.ALL,
},
{
Expand Down Expand Up @@ -78,11 +78,11 @@ export const cdcSettings: MirrorSetting[] = [
setter(
(curr: CDCConfig): CDCConfig => ({
...curr,
snapshotNumRowsPerPartition: parseInt(value as string, 10) || 1000000,
snapshotNumRowsPerPartition: parseInt(value as string, 10) || 250000,
})
),
tips: 'PeerDB splits up table data into partitions for increased performance. This setting controls the number of rows per partition. The default value is 1000000.',
default: '1000000',
tips: 'PeerDB splits up table data into partitions for increased performance. This setting controls the number of rows per partition. The default value is 250000.',
default: '250000',
type: 'number',
advanced: AdvancedSettingType.ALL,
},
Expand Down
4 changes: 2 additions & 2 deletions ui/app/mirrors/create/helpers/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ export const blankCDCSetting: CDCConfig = {
destinationName: '',
flowJobName: '',
tableMappings: [],
maxBatchSize: 1000000,
maxBatchSize: 250000,
doInitialSnapshot: true,
publicationName: '',
snapshotNumRowsPerPartition: 1000000,
snapshotNumRowsPerPartition: 250000,
snapshotMaxParallelWorkers: 4,
snapshotNumTablesInParallel: 1,
snapshotStagingPath: '',
Expand Down

0 comments on commit f2c00b2

Please sign in to comment.