Skip to content

Commit

Permalink
Multi cherry picks from master branch (#2349)
Browse files Browse the repository at this point in the history
- Added grpc as a valid protocol for uri (#2090)
- build(deps): bump urllib3 from 1.26.18 to 1.26.19 (#2140)
- build(deps): bump certifi from 2023.7.22 to 2024.7.4 (#2170)
- feat(pymilvus/settings.py): Load configuration without altering the
environment (#2192)
- feat: Add compact, get_server_version and flush api (#2326)
- Fix typo and correct grammar (#2333)
- Update return type of describe_role to Dict (#2337)
- enhance: Reorganize the examples (#2340)

Related: #2166, #2325, #2332

Signed-off-by: yangxuan <[email protected]>
Co-authored-by: Bruno Faria <[email protected]>
Co-authored-by: Bruno Faria <[email protected]>
Co-authored-by: dependabot[bot] <[email protected]>
Co-authored-by: -LAN- <[email protected]>
Co-authored-by: zhenshan.cao <[email protected]>
Co-authored-by: NamCaoHai <[email protected]>
  • Loading branch information
7 people authored Nov 25, 2024
1 parent 31cde42 commit 29bef71
Show file tree
Hide file tree
Showing 69 changed files with 608 additions and 908 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ The following collection shows Milvus versions and recommended PyMilvus versions
| 2.1.\* | 2.1.3 |
| 2.2.\* | 2.2.15 |
| 2.3.\* | 2.3.7 |
| 2.4.\* | 2.4.0 |
| 2.4.\* | 2.4.9 |


## Installation
Expand All @@ -43,7 +43,7 @@ $ pip3 install pymilvus[bulk_writer] # for bulk_writer
You can install a specific version of PyMilvus by:

```shell
$ pip3 install pymilvus==2.3.7
$ pip3 install pymilvus==2.4.9
```

You can upgrade PyMilvus to the latest version by:
Expand Down
1 change: 1 addition & 0 deletions examples/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Examples
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
83 changes: 83 additions & 0 deletions examples/compact.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import time
import numpy as np
from pymilvus import (
MilvusClient,
)

fmt = "\n=== {:30} ===\n"
dim = 8
collection_name = "hello_milvus"
milvus_client = MilvusClient("http://localhost:19530")

has_collection = milvus_client.has_collection(collection_name, timeout=5)
if has_collection:
milvus_client.drop_collection(collection_name)
milvus_client.create_collection(collection_name, dim, consistency_level="Strong", metric_type="L2")

rng = np.random.default_rng(seed=19530)
rows = [
{"id": 1, "vector": rng.random((1, dim))[0], "a": 100},
{"id": 2, "vector": rng.random((1, dim))[0], "b": 200},
{"id": 3, "vector": rng.random((1, dim))[0], "c": 300},
{"id": 4, "vector": rng.random((1, dim))[0], "d": 400},
{"id": 5, "vector": rng.random((1, dim))[0], "e": 500},
{"id": 6, "vector": rng.random((1, dim))[0], "f": 600},
]

print(fmt.format("Start inserting entities"))
insert_result = milvus_client.insert(collection_name, rows)
print(fmt.format("Inserting entities done"))
print(insert_result)

upsert_ret = milvus_client.upsert(collection_name, {"id": 2 , "vector": rng.random((1, dim))[0], "g": 100})
print(upsert_ret)

print(fmt.format("Start flush"))
milvus_client.flush(collection_name)
print(fmt.format("flush done"))

result = milvus_client.query(collection_name, "", output_fields = ["count(*)"])
print(f"final entities in {collection_name} is {result[0]['count(*)']}")

rows = [
{"id": 7, "vector": rng.random((1, dim))[0], "g": 700},
{"id": 8, "vector": rng.random((1, dim))[0], "h": 800},
{"id": 9, "vector": rng.random((1, dim))[0], "i": 900},
{"id": 10, "vector": rng.random((1, dim))[0], "j": 1000},
{"id": 11, "vector": rng.random((1, dim))[0], "k": 1100},
{"id": 12, "vector": rng.random((1, dim))[0], "l": 1200},
]

print(fmt.format("Start inserting entities"))
insert_result = milvus_client.insert(collection_name, rows)
print(fmt.format("Inserting entities done"))
print(insert_result)

print(fmt.format("Start flush"))
milvus_client.flush(collection_name)
print(fmt.format("flush done"))

result = milvus_client.query(collection_name, "", output_fields = ["count(*)"])
print(f"final entities in {collection_name} is {result[0]['count(*)']}")

print(fmt.format("Start compact"))
job_id = milvus_client.compact(collection_name)
print(f"job_id:{job_id}")

cnt = 0
state = milvus_client.get_compaction_state(job_id)
while (state != "Completed" and cnt < 10):
time.sleep(1.0)
state = milvus_client.get_compaction_state(job_id)
print(f"compaction state: {state}")
cnt += 1

if state == "Completed":
print(fmt.format("compact done"))
else:
print(fmt.format("compact timeout"))

result = milvus_client.query(collection_name, "", output_fields = ["count(*)"])
print(f"final entities in {collection_name} is {result[0]['count(*)']}")

milvus_client.drop_collection(collection_name)
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
57 changes: 57 additions & 0 deletions examples/flush.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import time
import numpy as np
from pymilvus import (
MilvusClient,
)

fmt = "\n=== {:30} ===\n"
dim = 8
collection_name = "hello_milvus"
milvus_client = MilvusClient("http://localhost:19530")

has_collection = milvus_client.has_collection(collection_name, timeout=5)
if has_collection:
milvus_client.drop_collection(collection_name)
milvus_client.create_collection(collection_name, dim, consistency_level="Strong", metric_type="L2")

rng = np.random.default_rng(seed=19530)
rows = [
{"id": 1, "vector": rng.random((1, dim))[0], "a": 100},
{"id": 2, "vector": rng.random((1, dim))[0], "b": 200},
{"id": 3, "vector": rng.random((1, dim))[0], "c": 300},
{"id": 4, "vector": rng.random((1, dim))[0], "d": 400},
{"id": 5, "vector": rng.random((1, dim))[0], "e": 500},
{"id": 6, "vector": rng.random((1, dim))[0], "f": 600},
]

print(fmt.format("Start inserting entities"))
insert_result = milvus_client.insert(collection_name, rows)
print(fmt.format("Inserting entities done"))
print(insert_result)

upsert_ret = milvus_client.upsert(collection_name, {"id": 2 , "vector": rng.random((1, dim))[0], "g": 100})
print(upsert_ret)

print(fmt.format("Start flush"))
milvus_client.flush(collection_name)
print(fmt.format("flush done"))


result = milvus_client.query(collection_name, "", output_fields = ["count(*)"])
print(f"final entities in {collection_name} is {result[0]['count(*)']}")


print(f"start to delete by specifying filter in collection {collection_name}")
delete_result = milvus_client.delete(collection_name, ids=[6])
print(delete_result)


print(fmt.format("Start flush"))
milvus_client.flush(collection_name)
print(fmt.format("flush done"))


result = milvus_client.query(collection_name, "", output_fields = ["count(*)"])
print(f"final entities in {collection_name} is {result[0]['count(*)']}")

milvus_client.drop_collection(collection_name)
8 changes: 8 additions & 0 deletions examples/get_server_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from pymilvus import (
MilvusClient,
)

milvus_client = MilvusClient("http://localhost:19530")

version = milvus_client.get_server_version()
print(f"server version: {version}")
File renamed without changes.
File renamed without changes.
62 changes: 22 additions & 40 deletions examples/hybrid_search.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,34 @@
import numpy as np
from pymilvus import (
connections,
utility,
FieldSchema, CollectionSchema, DataType,
Collection,
MilvusClient,
DataType,
AnnSearchRequest, RRFRanker, WeightedRanker,
)

fmt = "\n=== {:30} ===\n"
search_latency_fmt = "search latency = {:.4f}s"
num_entities, dim = 3000, 8

print(fmt.format("start connecting to Milvus"))
connections.connect("default", host="localhost", port="19530")
collection_name = "hello_milvus"
milvus_client = MilvusClient("http://localhost:19530")

has = utility.has_collection("hello_milvus")
print(f"Does collection hello_milvus exist in Milvus: {has}")
if has:
utility.drop_collection("hello_milvus")
has_collection = milvus_client.has_collection(collection_name, timeout=5)
if has_collection:
milvus_client.drop_collection(collection_name)

fields = [
FieldSchema(name="pk", dtype=DataType.VARCHAR, is_primary=True, auto_id=False, max_length=100),
FieldSchema(name="random", dtype=DataType.DOUBLE),
FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=dim),
FieldSchema(name="embeddings2", dtype=DataType.FLOAT_VECTOR, dim=dim)
]
schema = milvus_client.create_schema(auto_id=False, description="hello_milvus is the simplest demo to introduce the APIs")
schema.add_field("pk", DataType.VARCHAR, is_primary=True, max_length=100)
schema.add_field("random", DataType.DOUBLE)
schema.add_field("embeddings", DataType.FLOAT_VECTOR, dim=dim)
schema.add_field("embeddings2", DataType.FLOAT_VECTOR, dim=dim)

schema = CollectionSchema(fields, "hello_milvus is the simplest demo to introduce the APIs")
index_params = milvus_client.prepare_index_params()
index_params.add_index(field_name = "embeddings", index_type = "IVF_FLAT", metric_type="L2", nlist=128)
index_params.add_index(field_name = "embeddings2",index_type = "IVF_FLAT", metric_type="L2", nlist=128)

print(fmt.format("Create collection `hello_milvus`"))
hello_milvus = Collection("hello_milvus", schema, consistency_level="Strong", num_shards = 4)

milvus_client.create_collection(collection_name, schema=schema, index_params=index_params, consistency_level="Strong")

print(fmt.format("Start inserting entities"))
rng = np.random.default_rng(seed=19530)
Expand All @@ -41,29 +40,19 @@
rng.random((num_entities, dim)), # field embeddings2, supports numpy.ndarray and list
]

insert_result = hello_milvus.insert(entities)
rows = [ {"pk": entities[0][i], "random": entities[1][i], "embeddings": entities[2][i], "embeddings2": entities[3][i]} for i in range (num_entities)]

hello_milvus.flush()
print(f"Number of entities in Milvus: {hello_milvus.num_entities}") # check the num_entities
insert_result = milvus_client.insert(collection_name, rows)

print(fmt.format("Start Creating index IVF_FLAT"))
index = {
"index_type": "IVF_FLAT",
"metric_type": "L2",
"params": {"nlist": 128},
}

hello_milvus.create_index("embeddings", index)
hello_milvus.create_index("embeddings2", index)

print(fmt.format("Start loading"))
hello_milvus.load()
milvus_client.load_collection(collection_name)

field_names = ["embeddings", "embeddings2"]
field_names = ["embeddings"]

req_list = []
nq = 1
weights = [0.2, 0.3]
default_limit = 5
vectors_to_search = []

Expand All @@ -79,15 +68,8 @@
req = AnnSearchRequest(**search_param)
req_list.append(req)

hybrid_res = hello_milvus.hybrid_search(req_list, WeightedRanker(*weights), default_limit, output_fields=["random"])

print("rank by WightedRanker")
for hits in hybrid_res:
for hit in hits:
print(f" hybrid search hit: {hit}")

print("rank by RRFRanker")
hybrid_res = hello_milvus.hybrid_search(req_list, RRFRanker(), default_limit, output_fields=["random"])
hybrid_res = milvus_client.hybrid_search(collection_name, req_list, RRFRanker(), default_limit, output_fields=["random"])
for hits in hybrid_res:
for hit in hits:
print(f" hybrid search hit: {hit}")
File renamed without changes.
93 changes: 93 additions & 0 deletions examples/hybrid_search/hybrid_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import numpy as np
from pymilvus import (
connections,
utility,
FieldSchema, CollectionSchema, DataType,
Collection,
AnnSearchRequest, RRFRanker, WeightedRanker,
)

fmt = "\n=== {:30} ===\n"
search_latency_fmt = "search latency = {:.4f}s"
num_entities, dim = 3000, 8

print(fmt.format("start connecting to Milvus"))
connections.connect("default", host="localhost", port="19530")

has = utility.has_collection("hello_milvus")
print(f"Does collection hello_milvus exist in Milvus: {has}")
if has:
utility.drop_collection("hello_milvus")

fields = [
FieldSchema(name="pk", dtype=DataType.VARCHAR, is_primary=True, auto_id=False, max_length=100),
FieldSchema(name="random", dtype=DataType.DOUBLE),
FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=dim),
FieldSchema(name="embeddings2", dtype=DataType.FLOAT_VECTOR, dim=dim)
]

schema = CollectionSchema(fields, "hello_milvus is the simplest demo to introduce the APIs")

print(fmt.format("Create collection `hello_milvus`"))
hello_milvus = Collection("hello_milvus", schema, consistency_level="Strong", num_shards = 4)

print(fmt.format("Start inserting entities"))
rng = np.random.default_rng(seed=19530)
entities = [
# provide the pk field because `auto_id` is set to False
[str(i) for i in range(num_entities)],
rng.random(num_entities).tolist(), # field random, only supports list
rng.random((num_entities, dim)), # field embeddings, supports numpy.ndarray and list
rng.random((num_entities, dim)), # field embeddings2, supports numpy.ndarray and list
]

insert_result = hello_milvus.insert(entities)

hello_milvus.flush()
print(f"Number of entities in Milvus: {hello_milvus.num_entities}") # check the num_entities

print(fmt.format("Start Creating index IVF_FLAT"))
index = {
"index_type": "IVF_FLAT",
"metric_type": "L2",
"params": {"nlist": 128},
}

hello_milvus.create_index("embeddings", index)
hello_milvus.create_index("embeddings2", index)

print(fmt.format("Start loading"))
hello_milvus.load()

field_names = ["embeddings", "embeddings2"]

req_list = []
nq = 1
weights = [0.2, 0.3]
default_limit = 5
vectors_to_search = []

for i in range(len(field_names)):
# 4. generate search data
vectors_to_search = rng.random((nq, dim))
search_param = {
"data": vectors_to_search,
"anns_field": field_names[i],
"param": {"metric_type": "L2"},
"limit": default_limit,
"expr": "random > 0.5"}
req = AnnSearchRequest(**search_param)
req_list.append(req)

hybrid_res = hello_milvus.hybrid_search(req_list, WeightedRanker(*weights), default_limit, output_fields=["random"])

print("rank by WightedRanker")
for hits in hybrid_res:
for hit in hits:
print(f" hybrid search hit: {hit}")

print("rank by RRFRanker")
hybrid_res = hello_milvus.hybrid_search(req_list, RRFRanker(), default_limit, output_fields=["random"])
for hits in hybrid_res:
for hit in hits:
print(f" hybrid search hit: {hit}")
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit 29bef71

Please sign in to comment.