Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Embedding / rerank for windows #2937

Merged
merged 21 commits into from
Dec 21, 2024
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ci/lib_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def check_dir(start_dir):
'opencv_cmake_flags.txt',
'ovms-c/dist',
'requirements.txt',
'requirements_win.txt',
'resnet_images.txt',
"resnet_labels.txt",
'rest_sdk_v2.10.16.patch',
Expand Down Expand Up @@ -219,6 +220,7 @@ def check_func(start_dir):
'openvino.LICENSE.txt',
'ovms-c/dist',
'requirements.txt',
'requirements_win.txt',
'rest_sdk_v2.10.16.patch',
'summator.xml',
'tf.patch',
Expand Down
11 changes: 11 additions & 0 deletions demos/common/export_models/requirements_win.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
--extra-index-url "https://download.pytorch.org/whl/cpu"
--extra-index-url "https://storage.openvinotoolkit.org/simple/wheels/nightly"
--pre
optimum-intel@git+https://github.com/huggingface/optimum-intel.git
openvino-tokenizers[transformers]<=2025.0.0.dev20241123
openvino<=2025.0.0.dev20241123
nncf>=2.11.0
sentence_transformers==3.1.1
openai
transformers<4.45
einops
10 changes: 2 additions & 8 deletions src/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -1811,6 +1811,7 @@ cc_test(
"test/custom_node_output_allocator_test.cpp",
"test/demultiplexer_node_test.cpp",
"test/deserialization_tests.cpp",
"test/embeddingsnode_test.cpp",
"test/ensemble_config_change_stress.cpp",
"test/ensemble_flow_custom_node_tests.cpp",
"test/ensemble_mapping_config_tests.cpp",
Expand Down Expand Up @@ -1849,6 +1850,7 @@ cc_test(
"test/predict_validation_test.cpp",
"test/prediction_service_test.cpp",
"test/rest_utils_test.cpp",
"test/reranknode_test.cpp",
"test/schema_test.cpp",
"test/sequence_manager_test.cpp",
"test/sequence_test.cpp",
Expand All @@ -1872,14 +1874,6 @@ cc_test(
"test/threadsafequeue_test.cpp",
"test/unit_tests.cpp",
] + select({
"//:is_windows_or_mediapipe_is_disabled_no_http": [
],
# Tests that require MediaPipe and HTTP (windows does not have HTTP yet)
"//conditions:default" : [
"test/reranknode_test.cpp", # TODO: Enable on windows
"test/embeddingsnode_test.cpp", # TODO: Enable on windows
],
}) + select({
"//:not_disable_cloud": [
"test/gcsfilesystem_test.cpp",
"test/azurefilesystem_test.cpp",
Expand Down
2 changes: 1 addition & 1 deletion src/modelinstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ enum : unsigned int {
} // namespace

namespace ov {
struct Meta; // pure fwd declaration in getRTInfo
class Meta; // pure fwd declaration in getRTInfo
}

namespace ovms {
Expand Down
24 changes: 18 additions & 6 deletions src/test/embeddingsnode_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ class EmbeddingsHttpTest : public V3HttpTest {
public:
static void SetUpTestSuite() {
std::string port = "9173";
std::string configPath = "/ovms/src/test/embeddings/config_embeddings.json";
std::string configPath = getGenericFullPathForSrcTest("/ovms/src/test/embeddings/config_embeddings.json");
SetUpSuite(port, configPath, t);
}

Expand Down Expand Up @@ -299,18 +299,21 @@ class EmbeddingsExtensionTest : public ::testing::Test {
ovms::HttpResponseComponents responseComponents;

static void SetUpTestSuite() {
#ifdef _WIN32
GTEST_SKIP() << "Skipping test because we have no custom extension built for Windows";
#endif
std::string port = "9173";
ovms::Server& server = ovms::Server::instance();
const char* configPath = "/ovms/src/test/embeddings/config_embeddings.json";
std::string configPath = getGenericFullPathForSrcTest("/ovms/src/test/embeddings/config_embeddings.json");
const char* extensionPath = std::filesystem::exists("/opt/libcustom_relu_cpu_extension.so") ? "/opt/libcustom_relu_cpu_extension.so" : "/ovms/src/example/SampleCpuExtension/libcustom_relu_cpu_extension.so";
server.setShutdownRequest(0);
randomizePort(port);
char* argv[] = {(char*)"ovms",
(char*)"--config_path",
(char*)configPath,
(char*)configPath.c_str(),
(char*)"--cpu_extension",
(char*)extensionPath,
(char*)"--port",
(char*)"--port ",
(char*)port.c_str()};
int argc = 5;
t.reset(new std::thread([&argc, &argv, &server]() {
Expand All @@ -324,20 +327,29 @@ class EmbeddingsExtensionTest : public ::testing::Test {
}

void SetUp() {
#ifdef _WIN32
GTEST_SKIP() << "Skipping test because we have no custom extension built for Windows";
#endif
writer = std::make_shared<MockedServerRequestInterface>();
ovms::Server& server = ovms::Server::instance();
handler = std::make_unique<ovms::HttpRestApiHandler>(server, 5);
ASSERT_EQ(handler->parseRequestComponents(comp, "POST", endpointEmbeddings, headers), ovms::StatusCode::OK);
}

static void TearDownTestSuite() {
#ifdef _WIN32
GTEST_SKIP() << "Skipping test because we have no custom extension built for Windows";
#endif
ovms::Server& server = ovms::Server::instance();
server.setShutdownRequest(1);
t->join();
server.setShutdownRequest(0);
}

void TearDown() {
#ifdef _WIN32
GTEST_SKIP() << "Skipping test because we have no custom extension built for Windows";
#endif
handler.reset();
}
};
Expand Down Expand Up @@ -371,7 +383,7 @@ class EmbeddingsInvalidConfigTest : public V3HttpTest {
public:
static void SetUpTestSuite() {
std::string port = "9173";
std::string configPath = "/ovms/src/test/embeddings/invalid_config_embeddings.json";
std::string configPath = getGenericFullPathForSrcTest("/ovms/src/test/embeddings/invalid_config_embeddings.json");
SetUpSuite(port, configPath, t);
}

Expand Down Expand Up @@ -399,7 +411,7 @@ class EmbeddingsInvalidTokenizerConfigTest : public V3HttpTest {
public:
static void SetUpTestSuite() {
std::string port = "9173";
std::string configPath = "/ovms/src/test/embeddings/invalid_config_tokenizer.json";
std::string configPath = getGenericFullPathForSrcTest("/ovms/src/test/embeddings/invalid_config_tokenizer.json");
SetUpSuite(port, configPath, t);
}

Expand Down
6 changes: 3 additions & 3 deletions src/test/reranknode_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ class RerankHttpTest : public V3HttpTest {
public:
static void SetUpTestSuite() {
std::string port = "9173";
std::string configPath = "/ovms/src/test/rerank/config.json";
std::string configPath = getGenericFullPathForSrcTest("/ovms/src/test/rerank/config.json");
SetUpSuite(port, configPath, t);
}

Expand Down Expand Up @@ -208,7 +208,7 @@ class RerankWithParamsHttpTest : public V3HttpTest {
And maximum number of documents or chunks (after chunking process) can be 4
Allowed space for chunk is 12-6-4=2 tokens
*/
std::string configPath = "/ovms/src/test/rerank/with_params/config.json";
std::string configPath = getGenericFullPathForSrcTest("/ovms/src/test/rerank/with_params/config.json");
SetUpSuite(port, configPath, t);
}

Expand Down Expand Up @@ -324,7 +324,7 @@ class RerankWithInvalidParamsHttpTest : public V3HttpTest {

This is invalid setup since there is reservation for 4 special tokens and space for query is max half of max_position_embeddings (4) - meaning 0 token space for document
*/
std::string configPath = "/ovms/src/test/rerank/with_params/invalid_config.json";
std::string configPath = getGenericFullPathForSrcTest("/ovms/src/test/rerank/with_params/invalid_config.json");
SetUpSuite(port, configPath, t);
}

Expand Down
14 changes: 11 additions & 3 deletions windows_prepare_llm_models.bat
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@ if "%~1"=="" (
exit /b 1
)

:: Create a link to preexported models on CI workers
IF /I EXIST c:\opt\llm_testing (
rmdir /S /Q "%~1"
mklink /d "%~1" c:\opt\llm_testing
echo Created link to existing in c:\opt\llm_testing. Skipping downloading models.
exit /b 0
)

set "EMBEDDING_MODEL=thenlper/gte-small"
set "RERANK_MODEL=BAAI/bge-reranker-base"
set "TEXT_GENERATION_MODEL=facebook/opt-125m"
Expand All @@ -32,10 +40,10 @@ if exist "%~1\%TEXT_GENERATION_MODEL%" if exist "%~1\%EMBEDDING_MODEL%" if exist

echo Downloading LLM testing models to directory %~1
set "PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu https://storage.openvinotoolkit.org/simple/wheels/nightly"
C:\opt\Python39\python.exe -m venv .venv
"C:\Program Files\Python310\python.exe" -m venv .venv
call .\.venv\Scripts\Activate.bat
pip install -U pip
pip install -U -r demos\common\export_models\requirements.txt
python -m pip install --upgrade pip
pip install -U -r demos\common\export_models\requirements_win.txt

if not exist "%~1" mkdir "%~1"

Expand Down