Multi cherry picks from master branch (#2349)

- Added grpc as a valid protocol for uri (#2090) - build(deps): bump urllib3 from 1.26.18 to 1.26.19 (#2140) - build(deps): bump certifi from 2023.7.22 to 2024.7.4 (#2170) - feat(pymilvus/settings.py): Load configuration without altering the environment (#2192) - feat: Add compact, get_server_version and flush api (#2326) - Fix typo and correct grammar (#2333) - Update return type of describe_role to Dict (#2337) - enhance: Reorganize the examples (#2340) Related: #2166, #2325, #2332 Signed-off-by: yangxuan <[email protected]> Co-authored-by: Bruno Faria <[email protected]> Co-authored-by: Bruno Faria <[email protected]> Co-authored-by: dependabot[bot] <[email protected]> Co-authored-by: -LAN- <[email protected]> Co-authored-by: zhenshan.cao <[email protected]> Co-authored-by: NamCaoHai <[email protected]>
milvus-io · Nov 25, 2024 · 29bef71 · 29bef71
1 parent 31cde42
commit 29bef71
Show file tree

Hide file tree

Showing 69 changed files with 608 additions and 908 deletions.
diff --git a/README.md b/README.md
@@ -27,7 +27,7 @@ The following collection shows Milvus versions and recommended PyMilvus versions
 | 2.1.\* | 2.1.3 |
 | 2.2.\* | 2.2.15 |
 | 2.3.\* | 2.3.7 |
-| 2.4.\* | 2.4.0 |
+| 2.4.\* | 2.4.9 |
 
 
 ## Installation
@@ -43,7 +43,7 @@ $ pip3 install pymilvus[bulk_writer] # for bulk_writer
 You can install a specific version of PyMilvus by:
 
 ```shell
-$ pip3 install pymilvus==2.3.7
+$ pip3 install pymilvus==2.4.9
 ```
 
 You can upgrade PyMilvus to the latest version by:

diff --git a/examples/README.md b/examples/README.md
@@ -0,0 +1 @@
+# Examples
diff --git a/examples/milvus_client/alias.py → examples/alias.py b/examples/milvus_client/alias.py → examples/alias.py
diff --git a/examples/example_bulkinsert_json.py → ...es/bulk_import/example_bulkinsert_json.py b/examples/example_bulkinsert_json.py → ...es/bulk_import/example_bulkinsert_json.py
diff --git a/examples/example_bulkinsert_numpy.py → ...s/bulk_import/example_bulkinsert_numpy.py b/examples/example_bulkinsert_numpy.py → ...s/bulk_import/example_bulkinsert_numpy.py
diff --git a/examples/example_bulkwriter.py → examples/bulk_import/example_bulkwriter.py b/examples/example_bulkwriter.py → examples/bulk_import/example_bulkwriter.py
diff --git a/examples/data/train_embeddings.csv → examples/bulk_import/train_embeddings.csv b/examples/data/train_embeddings.csv → examples/bulk_import/train_embeddings.csv
diff --git a/examples/example_tls1.py → examples/cert/example_tls1.py b/examples/example_tls1.py → examples/cert/example_tls1.py
diff --git a/examples/example_tls2.py → examples/cert/example_tls2.py b/examples/example_tls2.py → examples/cert/example_tls2.py
diff --git a/examples/compact.py b/examples/compact.py
@@ -0,0 +1,83 @@
+import time
+import numpy as np
+from pymilvus import (
+    MilvusClient,
+)
+
+fmt = "\n=== {:30} ===\n"
+dim = 8
+collection_name = "hello_milvus"
+milvus_client = MilvusClient("http://localhost:19530")
+
+has_collection = milvus_client.has_collection(collection_name, timeout=5)
+if has_collection:
+    milvus_client.drop_collection(collection_name)
+milvus_client.create_collection(collection_name, dim, consistency_level="Strong", metric_type="L2")
+
+rng = np.random.default_rng(seed=19530)
+rows = [
+        {"id": 1, "vector": rng.random((1, dim))[0], "a": 100},
+        {"id": 2, "vector": rng.random((1, dim))[0], "b": 200},
+        {"id": 3, "vector": rng.random((1, dim))[0], "c": 300},
+        {"id": 4, "vector": rng.random((1, dim))[0], "d": 400},
+        {"id": 5, "vector": rng.random((1, dim))[0], "e": 500},
+        {"id": 6, "vector": rng.random((1, dim))[0], "f": 600},
+]
+
+print(fmt.format("Start inserting entities"))
+insert_result = milvus_client.insert(collection_name, rows)
+print(fmt.format("Inserting entities done"))
+print(insert_result)
+
+upsert_ret = milvus_client.upsert(collection_name, {"id": 2 , "vector": rng.random((1, dim))[0], "g": 100})
+print(upsert_ret)
+
+print(fmt.format("Start flush"))
+milvus_client.flush(collection_name)
+print(fmt.format("flush done"))
+
+result = milvus_client.query(collection_name, "", output_fields = ["count(*)"])
+print(f"final entities in {collection_name} is {result[0]['count(*)']}")
+
+rows = [
+        {"id": 7, "vector": rng.random((1, dim))[0], "g": 700},
+        {"id": 8, "vector": rng.random((1, dim))[0], "h": 800},
+        {"id": 9, "vector": rng.random((1, dim))[0], "i": 900},
+        {"id": 10, "vector": rng.random((1, dim))[0], "j": 1000},
+        {"id": 11, "vector": rng.random((1, dim))[0], "k": 1100},
+        {"id": 12, "vector": rng.random((1, dim))[0], "l": 1200},
+]
+
+print(fmt.format("Start inserting entities"))
+insert_result = milvus_client.insert(collection_name, rows)
+print(fmt.format("Inserting entities done"))
+print(insert_result)
+
+print(fmt.format("Start flush"))
+milvus_client.flush(collection_name)
+print(fmt.format("flush done"))
+
+result = milvus_client.query(collection_name, "", output_fields = ["count(*)"])
+print(f"final entities in {collection_name} is {result[0]['count(*)']}")
+
+print(fmt.format("Start compact"))
+job_id = milvus_client.compact(collection_name)
+print(f"job_id:{job_id}")
+
+cnt = 0
+state = milvus_client.get_compaction_state(job_id)
+while (state != "Completed" and cnt < 10):
+    time.sleep(1.0)
+    state = milvus_client.get_compaction_state(job_id) 
+    print(f"compaction state: {state}")
+    cnt += 1
+
+if state == "Completed":
+    print(fmt.format("compact done"))
+else:
+    print(fmt.format("compact timeout"))
+
+result = milvus_client.query(collection_name, "", output_fields = ["count(*)"])
+print(f"final entities in {collection_name} is {result[0]['count(*)']}")
+
+milvus_client.drop_collection(collection_name)
diff --git a/examples/multithreading_hello_milvus.py → ...oncurrency/multithreading_hello_milvus.py b/examples/multithreading_hello_milvus.py → ...oncurrency/multithreading_hello_milvus.py
diff --git a/examples/milvus_client/customize_schema.py → examples/customize_schema.py b/examples/milvus_client/customize_schema.py → examples/customize_schema.py
diff --git a/...milvus_client/customize_schema_auto_id.py → examples/customize_schema_auto_id.py b/...milvus_client/customize_schema_auto_id.py → examples/customize_schema_auto_id.py
diff --git a/examples/bfloat16_example.py → examples/datatypes/bfloat16_example.py b/examples/bfloat16_example.py → examples/datatypes/bfloat16_example.py
diff --git a/examples/binary_example.py → examples/datatypes/binary_example.py b/examples/binary_example.py → examples/datatypes/binary_example.py
diff --git a/examples/dynamic_field.py → examples/datatypes/dynamic_field.py b/examples/dynamic_field.py → examples/datatypes/dynamic_field.py
diff --git a/examples/example_str.py → examples/datatypes/example_str.py b/examples/example_str.py → examples/datatypes/example_str.py
diff --git a/examples/float16_example.py → examples/datatypes/float16_example.py b/examples/float16_example.py → examples/datatypes/float16_example.py
diff --git a/examples/fuzzy_match.py → examples/datatypes/fuzzy_match.py b/examples/fuzzy_match.py → examples/datatypes/fuzzy_match.py
diff --git a/examples/hello_milvus_array.py → examples/datatypes/hello_milvus_array.py b/examples/hello_milvus_array.py → examples/datatypes/hello_milvus_array.py
diff --git a/examples/hello_sparse.py → examples/datatypes/hello_sparse.py b/examples/hello_sparse.py → examples/datatypes/hello_sparse.py
diff --git a/examples/flush.py b/examples/flush.py
@@ -0,0 +1,57 @@
+import time
+import numpy as np
+from pymilvus import (
+    MilvusClient,
+)
+
+fmt = "\n=== {:30} ===\n"
+dim = 8
+collection_name = "hello_milvus"
+milvus_client = MilvusClient("http://localhost:19530")
+
+has_collection = milvus_client.has_collection(collection_name, timeout=5)
+if has_collection:
+    milvus_client.drop_collection(collection_name)
+milvus_client.create_collection(collection_name, dim, consistency_level="Strong", metric_type="L2")
+
+rng = np.random.default_rng(seed=19530)
+rows = [
+        {"id": 1, "vector": rng.random((1, dim))[0], "a": 100},
+        {"id": 2, "vector": rng.random((1, dim))[0], "b": 200},
+        {"id": 3, "vector": rng.random((1, dim))[0], "c": 300},
+        {"id": 4, "vector": rng.random((1, dim))[0], "d": 400},
+        {"id": 5, "vector": rng.random((1, dim))[0], "e": 500},
+        {"id": 6, "vector": rng.random((1, dim))[0], "f": 600},
+]
+
+print(fmt.format("Start inserting entities"))
+insert_result = milvus_client.insert(collection_name, rows)
+print(fmt.format("Inserting entities done"))
+print(insert_result)
+
+upsert_ret = milvus_client.upsert(collection_name, {"id": 2 , "vector": rng.random((1, dim))[0], "g": 100})
+print(upsert_ret)
+
+print(fmt.format("Start flush"))
+milvus_client.flush(collection_name)
+print(fmt.format("flush done"))
+
+
+result = milvus_client.query(collection_name, "", output_fields = ["count(*)"])
+print(f"final entities in {collection_name} is {result[0]['count(*)']}")
+
+
+print(f"start to delete by specifying filter in collection {collection_name}")
+delete_result = milvus_client.delete(collection_name, ids=[6])
+print(delete_result)
+
+
+print(fmt.format("Start flush"))
+milvus_client.flush(collection_name)
+print(fmt.format("flush done"))
+
+
+result = milvus_client.query(collection_name, "", output_fields = ["count(*)"])
+print(f"final entities in {collection_name} is {result[0]['count(*)']}")
+
+milvus_client.drop_collection(collection_name)
diff --git a/examples/get_server_version.py b/examples/get_server_version.py
@@ -0,0 +1,8 @@
+from pymilvus import (
+    MilvusClient,
+)
+
+milvus_client = MilvusClient("http://localhost:19530")
+
+version = milvus_client.get_server_version()
+print(f"server version: {version}")
diff --git a/examples/example_gpu_brute_force.py → examples/gpu_indx/example_gpu_brute_force.py b/examples/example_gpu_brute_force.py → examples/gpu_indx/example_gpu_brute_force.py
diff --git a/examples/example_gpu_cagra.py → examples/gpu_indx/example_gpu_cagra.py b/examples/example_gpu_cagra.py → examples/gpu_indx/example_gpu_cagra.py
diff --git a/examples/hybrid_search.py b/examples/hybrid_search.py
@@ -1,35 +1,34 @@
 import numpy as np
 from pymilvus import (
-    connections,
-    utility,
-    FieldSchema, CollectionSchema, DataType,
-    Collection,
+    MilvusClient,
+    DataType,
     AnnSearchRequest, RRFRanker, WeightedRanker,
 )
 
 fmt = "\n=== {:30} ===\n"
 search_latency_fmt = "search latency = {:.4f}s"
 num_entities, dim = 3000, 8
 
-print(fmt.format("start connecting to Milvus"))
-connections.connect("default", host="localhost", port="19530")
+collection_name = "hello_milvus"
+milvus_client = MilvusClient("http://localhost:19530")
 
-has = utility.has_collection("hello_milvus")
-print(f"Does collection hello_milvus exist in Milvus: {has}")
-if has:
-    utility.drop_collection("hello_milvus")
+has_collection = milvus_client.has_collection(collection_name, timeout=5)
+if has_collection:
+    milvus_client.drop_collection(collection_name)
 
-fields = [
-    FieldSchema(name="pk", dtype=DataType.VARCHAR, is_primary=True, auto_id=False, max_length=100),
-    FieldSchema(name="random", dtype=DataType.DOUBLE),
-    FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=dim),
-    FieldSchema(name="embeddings2", dtype=DataType.FLOAT_VECTOR, dim=dim)
-]
+schema = milvus_client.create_schema(auto_id=False, description="hello_milvus is the simplest demo to introduce the APIs")
+schema.add_field("pk", DataType.VARCHAR, is_primary=True, max_length=100)
+schema.add_field("random", DataType.DOUBLE)
+schema.add_field("embeddings", DataType.FLOAT_VECTOR, dim=dim)
+schema.add_field("embeddings2", DataType.FLOAT_VECTOR, dim=dim)
 
-schema = CollectionSchema(fields, "hello_milvus is the simplest demo to introduce the APIs")
+index_params = milvus_client.prepare_index_params()
+index_params.add_index(field_name = "embeddings", index_type = "IVF_FLAT", metric_type="L2", nlist=128)
+index_params.add_index(field_name = "embeddings2",index_type = "IVF_FLAT", metric_type="L2", nlist=128)
 
 print(fmt.format("Create collection `hello_milvus`"))
-hello_milvus = Collection("hello_milvus", schema, consistency_level="Strong", num_shards = 4)
+
+milvus_client.create_collection(collection_name, schema=schema, index_params=index_params, consistency_level="Strong")
 
 print(fmt.format("Start inserting entities"))
 rng = np.random.default_rng(seed=19530)
@@ -41,29 +40,19 @@
     rng.random((num_entities, dim)),    # field embeddings2, supports numpy.ndarray and list
 ]
 
-insert_result = hello_milvus.insert(entities)
+rows = [ {"pk": entities[0][i], "random": entities[1][i], "embeddings": entities[2][i], "embeddings2": entities[3][i]} for i in range (num_entities)]
 
-hello_milvus.flush()
-print(f"Number of entities in Milvus: {hello_milvus.num_entities}")  # check the num_entities
+insert_result = milvus_client.insert(collection_name, rows)
 
-print(fmt.format("Start Creating index IVF_FLAT"))
-index = {
-    "index_type": "IVF_FLAT",
-    "metric_type": "L2",
-    "params": {"nlist": 128},
-}
-
-hello_milvus.create_index("embeddings", index)
-hello_milvus.create_index("embeddings2", index)
 
 print(fmt.format("Start loading"))
-hello_milvus.load()
+milvus_client.load_collection(collection_name)
 
 field_names = ["embeddings", "embeddings2"]
+field_names = ["embeddings"]
 
 req_list = []
 nq = 1
-weights = [0.2, 0.3]
 default_limit = 5
 vectors_to_search = []
 
@@ -79,15 +68,8 @@
     req = AnnSearchRequest(**search_param)
     req_list.append(req)
 
-hybrid_res = hello_milvus.hybrid_search(req_list, WeightedRanker(*weights), default_limit, output_fields=["random"])
-
-print("rank by WightedRanker")
-for hits in hybrid_res:
-    for hit in hits:
-        print(f" hybrid search hit: {hit}")
-
 print("rank by RRFRanker")
-hybrid_res = hello_milvus.hybrid_search(req_list, RRFRanker(), default_limit, output_fields=["random"])
+hybrid_res = milvus_client.hybrid_search(collection_name, req_list, RRFRanker(), default_limit, output_fields=["random"])
 for hits in hybrid_res:
     for hit in hits:
         print(f" hybrid search hit: {hit}")
diff --git a/examples/hello_hybrid_sparse_dense.py → ...ybrid_search/hello_hybrid_sparse_dense.py b/examples/hello_hybrid_sparse_dense.py → ...ybrid_search/hello_hybrid_sparse_dense.py
diff --git a/examples/hybrid_search/hybrid_search.py b/examples/hybrid_search/hybrid_search.py
@@ -0,0 +1,93 @@
+import numpy as np
+from pymilvus import (
+    connections,
+    utility,
+    FieldSchema, CollectionSchema, DataType,
+    Collection,
+    AnnSearchRequest, RRFRanker, WeightedRanker,
+)
+
+fmt = "\n=== {:30} ===\n"
+search_latency_fmt = "search latency = {:.4f}s"
+num_entities, dim = 3000, 8
+
+print(fmt.format("start connecting to Milvus"))
+connections.connect("default", host="localhost", port="19530")
+
+has = utility.has_collection("hello_milvus")
+print(f"Does collection hello_milvus exist in Milvus: {has}")
+if has:
+    utility.drop_collection("hello_milvus")
+
+fields = [
+    FieldSchema(name="pk", dtype=DataType.VARCHAR, is_primary=True, auto_id=False, max_length=100),
+    FieldSchema(name="random", dtype=DataType.DOUBLE),
+    FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=dim),
+    FieldSchema(name="embeddings2", dtype=DataType.FLOAT_VECTOR, dim=dim)
+]
+
+schema = CollectionSchema(fields, "hello_milvus is the simplest demo to introduce the APIs")
+
+print(fmt.format("Create collection `hello_milvus`"))
+hello_milvus = Collection("hello_milvus", schema, consistency_level="Strong", num_shards = 4)
+
+print(fmt.format("Start inserting entities"))
+rng = np.random.default_rng(seed=19530)
+entities = [
+    # provide the pk field because `auto_id` is set to False
+    [str(i) for i in range(num_entities)],
+    rng.random(num_entities).tolist(),  # field random, only supports list
+    rng.random((num_entities, dim)),    # field embeddings, supports numpy.ndarray and list
+    rng.random((num_entities, dim)),    # field embeddings2, supports numpy.ndarray and list
+]
+
+insert_result = hello_milvus.insert(entities)
+
+hello_milvus.flush()
+print(f"Number of entities in Milvus: {hello_milvus.num_entities}")  # check the num_entities
+
+print(fmt.format("Start Creating index IVF_FLAT"))
+index = {
+    "index_type": "IVF_FLAT",
+    "metric_type": "L2",
+    "params": {"nlist": 128},
+}
+
+hello_milvus.create_index("embeddings", index)
+hello_milvus.create_index("embeddings2", index)
+
+print(fmt.format("Start loading"))
+hello_milvus.load()
+
+field_names = ["embeddings", "embeddings2"]
+
+req_list = []
+nq = 1
+weights = [0.2, 0.3]
+default_limit = 5
+vectors_to_search = []
+
+for i in range(len(field_names)):
+    # 4. generate search data
+    vectors_to_search = rng.random((nq, dim))
+    search_param = {
+        "data": vectors_to_search,
+        "anns_field": field_names[i],
+        "param": {"metric_type": "L2"},
+        "limit": default_limit,
+        "expr": "random > 0.5"}
+    req = AnnSearchRequest(**search_param)
+    req_list.append(req)
+
+hybrid_res = hello_milvus.hybrid_search(req_list, WeightedRanker(*weights), default_limit, output_fields=["random"])
+
+print("rank by WightedRanker")
+for hits in hybrid_res:
+    for hit in hits:
+        print(f" hybrid search hit: {hit}")
+
+print("rank by RRFRanker")
+hybrid_res = hello_milvus.hybrid_search(req_list, RRFRanker(), default_limit, output_fields=["random"])
+for hits in hybrid_res:
+    for hit in hits:
+        print(f" hybrid search hit: {hit}")
diff --git a/examples/milvus_client/index.py → examples/index.py b/examples/milvus_client/index.py → examples/index.py
diff --git a/examples/milvus_client/index_params.py → examples/index_params.py b/examples/milvus_client/index_params.py → examples/index_params.py