Skip to content

Commit

Permalink
save
Browse files Browse the repository at this point in the history
  • Loading branch information
dsmilkov committed Feb 28, 2024
1 parent 97b6cd4 commit 20d3fc2
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 1 deletion.
7 changes: 7 additions & 0 deletions lilac/data/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,13 @@ def cluster_impl(
else:
raise ValueError('input must be provided.')

if use_garden and skip_noisy_assignment:
raise ValueError(
'`use_garden` and `skip_noisy_assignment` cannot both be True. '
'The garden implementation is heavily optimizied and will always '
'assign noisy points to the nearest cluster.'
)

# Extract the text from the input path into a temporary column.
TEXT_COLUMN = 'text'
temp_text_path = (*cluster_output_path, TEXT_COLUMN)
Expand Down
6 changes: 6 additions & 0 deletions lilac/router_dataset_signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,11 @@ class ClusterOptions(BaseModel):
use_garden: bool = PydanticField(
default=False, description='Accelerate computation by running remotely on Lilac Garden.'
)
skip_noisy_assignment: bool = PydanticField(
default=False,
description='Skip assignment of noisy points to the nearest cluster to speed up clustering.',
)

overwrite: bool = False


Expand Down Expand Up @@ -145,6 +150,7 @@ def run() -> None:
use_garden=options.use_garden,
overwrite=options.overwrite,
task_id=task_id,
skip_noisy_assignment=options.skip_noisy_assignment,
)

launch_task(task_id, run)
Expand Down
18 changes: 17 additions & 1 deletion web/blueprint/src/lib/components/ComputeClusterModal.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
input: Path;
output_path?: Path;
use_garden?: boolean;
skip_noisy_assignment?: boolean;
overwrite?: boolean;
};
Expand Down Expand Up @@ -101,7 +102,8 @@
use_garden: options.use_garden,
output_path: outputColumn,
input_selector: selectedFormatSelector,
overwrite: options.overwrite
overwrite: options.overwrite,
skip_noisy_assignment: options.skip_noisy_assignment
}
]);
close();
Expand Down Expand Up @@ -173,6 +175,20 @@
</div>
{/if}
</div>

<div>
<div class="label mb-2 font-medium text-gray-700">Skip noisy assignment</div>
<div class="label text-sm text-gray-700">
Skip assignment of noisy points to the nearest cluster to speed up clustering.
</div>
<Toggle
labelA={'False'}
labelB={'True'}
bind:toggled={options.skip_noisy_assignment}
hideLabel
/>
</div>

<div>
<div class="label text-s mb-2 font-medium text-gray-700">Overwrite</div>
<Toggle labelA={'False'} labelB={'True'} bind:toggled={options.overwrite} hideLabel />
Expand Down
4 changes: 4 additions & 0 deletions web/lib/fastapi_client/models/ClusterOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ export type ClusterOptions = {
* Accelerate computation by running remotely on Lilac Garden.
*/
use_garden?: boolean;
/**
* Skip assignment of noisy points to the nearest cluster to speed up clustering.
*/
skip_noisy_assignment?: boolean;
overwrite?: boolean;
};

0 comments on commit 20d3fc2

Please sign in to comment.