Skip to content

Commit

Permalink
Fix comments to review
Browse files Browse the repository at this point in the history
  • Loading branch information
TolyaTalamanov committed Jan 2, 2025
1 parent 9f6aca4 commit 808d6b9
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 36 deletions.
52 changes: 26 additions & 26 deletions src/cpp/src/llm_pipeline_static.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (C) 2024 Intel Corporation
// Copyright (C) 2024-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#include "llm_pipeline_static.hpp"
Expand Down Expand Up @@ -635,6 +635,31 @@ void copy_columns_by_row_chunks(const ov::Tensor& src, ov::Tensor& dst) {
}
}

void stream_generated_tokens(std::shared_ptr<StreamerBase> streamer_ptr,
GenerationHandle& handle) {
if (streamer_ptr && handle->can_read()) {
std::unordered_map<uint64_t, GenerationOutput> token = handle->back();
for (const auto& gen_token : token.begin()->second.generated_ids) {
if (streamer_ptr->put(gen_token)) {
handle->drop();
break;
}
}
}
}

int64_t get_last_token(SequenceGroup::Ptr sequence_group) {
const auto running_sequences = sequence_group->get_running_sequences();
OPENVINO_ASSERT(running_sequences.size() == 1u);
const auto sequence = running_sequences.front();

size_t num_scheduled_tokens = sequence_group->get_num_scheduled_tokens();
OPENVINO_ASSERT(num_scheduled_tokens == 1u);

const auto num_processed_tokens = sequence_group->get_num_processed_tokens();
return sequence->get_generated_ids()[num_processed_tokens - sequence_group->get_prompt_len()];
}

} // anonymous namespace

namespace ov {
Expand Down Expand Up @@ -945,31 +970,6 @@ DecodedResults StaticLLMPipeline::generate(
return decoded_results;
}

void stream_generated_tokens(std::shared_ptr<StreamerBase> streamer_ptr,
GenerationHandle& handle) {
if (streamer_ptr && handle->can_read()) {
std::unordered_map<uint64_t, GenerationOutput> token = handle->back();
for (const auto& gen_token : token.begin()->second.generated_ids) {
if (streamer_ptr->put(gen_token)) {
handle->drop();
break;
}
}
}
}

int64_t get_last_token(SequenceGroup::Ptr sequence_group) {
const auto running_sequences = sequence_group->get_running_sequences();
OPENVINO_ASSERT(running_sequences.size() == 1u);
const auto sequence = running_sequences.front();

size_t num_scheduled_tokens = sequence_group->get_num_scheduled_tokens();
OPENVINO_ASSERT(num_scheduled_tokens == 1u);

const auto num_processed_tokens = sequence_group->get_num_processed_tokens();
return sequence->get_generated_ids()[num_processed_tokens - sequence_group->get_prompt_len()];
}

EncodedResults StaticLLMPipeline::generate(
const EncodedInputs& inputs,
OptionalGenerationConfig generation_config,
Expand Down
3 changes: 1 addition & 2 deletions src/cpp/src/llm_pipeline_static.hpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
// Copyright (C) 2024 Intel Corporation
// Copyright (C) 2024-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include <filesystem>
#include <random>

#include "llm_pipeline_base.hpp"
#include "sampler.hpp"
Expand Down
2 changes: 1 addition & 1 deletion src/cpp/src/sampler.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (C) 2023-2024 Intel Corporation
// Copyright (C) 2023-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#include "sampler.hpp"
Expand Down
2 changes: 1 addition & 1 deletion src/cpp/src/sampler.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

// Copyright (C) 2023-2024 Intel Corporation
// Copyright (C) 2023-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#pragma once
Expand Down
2 changes: 1 addition & 1 deletion src/cpp/src/sequence_group.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (C) 2023-2024 Intel Corporation
// Copyright (C) 2023-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#pragma once
Expand Down
10 changes: 5 additions & 5 deletions tests/python_tests/test_llm_pipeline_static.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,9 @@ def test_generation_compare_with_stateful(generation_config):
@pytest.mark.nightly
@pytest.mark.parametrize("generation_config", generation_configs)
def test_multinomial_sampling(generation_config):
# Multinomial sampling is highly sensitive to raw logits values. For fair comparison,
# a reference implementation producing identical logits (e.g., from StaticLLMPipeline)
# would be necessary. However, the CPU in StatefulPipeline and StaticLLMPipeline may apply
# Multinomial sampling is highly sensitive to raw logits values. For fair comparison,
# a reference implementation producing identical logits (e.g., from StaticLLMPipeline)
# would be necessary. However, the CPU in StatefulPipeline and StaticLLMPipeline may apply
# different optimizations due to differences in provided topologies, leading to slight
# variations in raw logits. Therefore, there is no reliable reference for validation,
# so only ensure that no exceptions are raised.
Expand Down Expand Up @@ -163,13 +163,13 @@ def test_batch_raise_error():


# TODO: For the further sampling support
generation_config = [
generation_configs = [
get_beam_search(),
# NB: Only num_return_sequences=1 is supported!
get_multinomial_all_parameters()
]
@pytest.mark.skipif(sys.platform in ["darwin", "linux"], reason="Not supposed to work on mac. Segfault on linux CI")
@pytest.mark.parametrize("generation_config", generation_config)
@pytest.mark.parametrize("generation_config", generation_configs)
@pytest.mark.precommit
@pytest.mark.nightly
def test_unsupported_sampling_raise_error(generation_config):
Expand Down

0 comments on commit 808d6b9

Please sign in to comment.