From 78587f75bb7f9defc6b930272bd6857c169d5433 Mon Sep 17 00:00:00 2001 From: Jennifer Hamon Date: Tue, 16 Jan 2024 07:24:50 -0500 Subject: [PATCH] Update documentation for v3.0 release (#284) Update the docs and bring back the docs publishing workflow step. --- .github/actions/build-docs/action.yml | 2 +- .github/workflows/merge.yaml | 26 +- CONTRIBUTING.md | 17 + README.md | 281 +++++++++++---- pinecone/control/pinecone.py | 328 +++++++++++++++++- pinecone/grpc/__init__.py | 46 +++ pinecone/grpc/pinecone.py | 115 +++++- pinecone/models/collection_description.py | 13 +- pinecone/models/collection_list.py | 4 + pinecone/models/index_description.py | 27 ++ pinecone/models/pod_spec.py | 55 +++ .../control/test_configure_pod_index.py | 3 + 12 files changed, 814 insertions(+), 103 deletions(-) diff --git a/.github/actions/build-docs/action.yml b/.github/actions/build-docs/action.yml index e9d2f460..ae6e79de 100644 --- a/.github/actions/build-docs/action.yml +++ b/.github/actions/build-docs/action.yml @@ -21,4 +21,4 @@ runs: - name: Build html documentation shell: bash run: | - poetry run pdoc pinecone/ --favicon ./favicon-32x32.png --docformat google -o ./docs + poetry run pdoc pinecone '!pinecone.core' '!pinecone.utils' --favicon ./favicon-32x32.png --docformat google -o ./docs diff --git a/.github/workflows/merge.yaml b/.github/workflows/merge.yaml index 4e795304..8c2fdccc 100644 --- a/.github/workflows/merge.yaml +++ b/.github/workflows/merge.yaml @@ -1,4 +1,4 @@ -name: 'Merge to main' +name: 'CI (main)' on: push: @@ -18,15 +18,15 @@ jobs: with: python-version: 3.11 - # - name: Push documentation artifacts to sdk-docs - # uses: cpina/github-action-push-to-another-repository@main - # env: - # SSH_DEPLOY_KEY: ${{ secrets.SSH_DEPLOY_KEY }} - # with: - # source-directory: docs - # destination-github-username: pinecone-io - # destination-repository-name: sdk-docs - # user-email: clients@pinecone.io - # target-branch: main - # target-directory: python - # commit-message: 'Python: automated documentation build - pinecone-python-client merge SHA: ${{ github.sha }}' + - name: Push documentation artifacts to sdk-docs + uses: cpina/github-action-push-to-another-repository@main + env: + SSH_DEPLOY_KEY: ${{ secrets.SSH_DEPLOY_KEY }} + with: + source-directory: docs + destination-github-username: pinecone-io + destination-repository-name: sdk-docs + user-email: clients@pinecone.io + target-branch: main + target-directory: python + commit-message: 'Python: automated documentation build - pinecone-python-client merge SHA: ${{ github.sha }}' diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1def52e7..f2b1b80d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,5 +1,22 @@ # Contributing +## Installing development versions + +If you want to explore a potential code change, investigate +a bug, or just want to try unreleased features, you can also install +specific git shas. + +Some example commands: + +```shell +pip3 install git+https://git@github.com/pinecone-io/pinecone-python-client.git +pip3 install git+https://git@github.com/pinecone-io/pinecone-python-client.git@example-branch-name +pip3 install git+https://git@github.com/pinecone-io/pinecone-python-client.git@44fc7ed + +poetry add git+https://github.com/pinecone-io/pinecone-python-client.git@44fc7ed +``` + + ## Developing locally with Poetry [Poetry](https://python-poetry.org/) is a tool that combines [virtualenv](https://virtualenv.pypa.io/en/latest/) usage with dependency management, to provide a consistent experience for project maintainers and contributors who need to develop the pinecone-python-client diff --git a/README.md b/README.md index bc9d9071..87d3a977 100644 --- a/README.md +++ b/README.md @@ -1,60 +1,182 @@ -# pinecone-client -The Pinecone python client +# Pinecone Python Client · ![License](https://img.shields.io/github/license/pinecone-io/pinecone-python-client?color=orange) [![CI](https://github.com/pinecone-io/pinecone-python-client/actions/workflows/merge.yaml/badge.svg)](https://github.com/pinecone-io/pinecone-python-client/actions/workflows/merge.yaml) + + +The official Pinecone Python client. For more information, see the docs at https://www.pinecone.io/docs/ +## Documentation + +- If you are upgrading from a `2.2.x` version of the client, check out the [**v3 Migration Guide**](https://canyon-quilt-082.notion.site/Pinecone-Python-SDK-v3-0-0-Migration-Guide-056d3897d7634bf7be399676a4757c7b#a21aff70b403416ba352fd30e300bce3). +- [**Reference Documentation**](https://sdk.pinecone.io/python/index.html) + + +### Example code + +Many of the brief examples shown in this README are using very small vectors to keep the documentation concise, but most real world usage will involve much larger embedding vectors. To see some more realistic examples of how this client can be used, explore some of our many Jupyter notebooks in the [examples](https://github.com/pinecone-io/examples) repository. + + +## Prerequisites + +The Pinecone Python client is compatible with Python 3.8 and greater. + + ## Installation -Install a released version from pip: +There are two flavors of the Pinecone python client. The default client installed from PyPI as `pinecone-client` has a minimal set of dependencies and interacts with Pinecone via HTTP requests. + +If you are aiming to maximimize performance, you can install additional gRPC dependencies to access an alternate client implementation that relies on gRPC for data operations. See the guide on [tuning performance](https://docs.pinecone.io/docs/performance-tuning). + + +### Installing with pip + ```shell +# Install the latest version pip3 install pinecone-client + +# Install the latest version, with extra grpc dependencies +pip3 install pinecone-client[grpc] + +# Install a specific version +pip3 install pinecone-client==3.0.0 + +# Install a specific version, with grpc extras +pip3 install "pinecone-client[grpc]"==3.0.0 ``` -Or the gRPC version of the client for [tuning performance](https://docs.pinecone.io/docs/performance-tuning) +### Installing with poetry ```shell -pip3 install "pinecone-client[grpc]" +# Install the latest version +poetry add pinecone + +# Install the latest version, with grpc extras +poetry add pinecone --extras grpc + +# Install a specific version +poetry add pinecone-client==3.0.0 + +# Install a specific version, with grpc extras +poetry add pinecone-client==3.0.0 --extras grpc ``` -Or the latest development version: -```shell -pip3 install git+https://git@github.com/pinecone-io/pinecone-python-client.git +## Usage + +### Initializing the client + +Before you can use the Pinecone SDK, you must sign up for an account and find your API key in the Pinecone console dashboard at [https://app.pinecone.io](https://app.pinecone.io). + +#### Using environment variables + +The `Pinecone` class is your main entry point into the Pinecone python SDK. If you have set your API Key in the `PINECONE_API_KEY` environment variable, you can instantiate the client with no other arguments. + +```python +from pinecone import Pinecone + +pc = Pinecone() # This reads the PINECONE_API_KEY env var ``` -Or a specific development version: -```shell -pip3 install git+https://git@github.com/pinecone-io/pinecone-python-client.git -pip3 install git+https://git@github.com/pinecone-io/pinecone-python-client.git@example-branch-name -pip3 install git+https://git@github.com/pinecone-io/pinecone-python-client.git@259deff +#### Using a configuration object + +If you prefer to pass configuration in code, for example if you have a complex application that needs to interact with multiple different Pinecone projects, the constructor accepts a keyword argument for `api_key`. + +If you pass configuration in this way, you can have full control over what name to use for the environment variable, sidestepping any issues that would result +from two different client instances both needing to read the same `PINECONE_API_KEY` variable that the client implicitly checks for. + +Configuration passed with keyword arguments takes precedent over environment variables. + +```python +import os +from pinecone import Pinecone + +pc = Pinecone(api_key=os.environ.get('CUSTOM_VAR')) ``` -## Creating an index +### Working with GRPC (for improved performance) -The following example creates an index without a metadata -configuration. By default, Pinecone indexes all metadata. +If you've followed instructions above to install with optional `grpc` extras, you can unlock some performance improvements by working with an alternative version of the client imported from the `pinecone.grpc` subpackage. ```python +import os +from pinecone.grpc import PineconeGRPC + +pc = PineconeGRPC(api_key=os.environ.get('PINECONE_API_KEY')) + +# From here on, everything is identical to the REST-based client. +index = pc.Index(host='my-index-8833ca1.svc.us-east1-gcp.pinecone.io') + +index.upsert(vectors=[]) +index.query(vector=[...], top_key=10) +``` -import pinecone +## Indexes +### Create Index -pinecone.init(api_key="YOUR_API_KEY", - environment="us-west1-gcp") +#### Create a serverless index -pinecone.create_index("example-index", dimension=1024) +> [!WARNING] +> Serverless indexes are in **public preview** and are available only on AWS in the +> `us-west-2` region. Check the [current limitations](https://docs.pinecone.io/docs/limits#serverless-index-limitations) and test thoroughly before using it in production. + +```python +from pinecone import Pinecone, ServerlessSpec + +pc = Pinecone(api_key='<>') +pc.create_index( + name='my-index', + dimension=1536, + metric='euclidean', + spec=ServerlessSpec( + cloud='aws', + region='us-west-2' + ) +) ``` -The following example creates an index that only indexes +## Create a pod index + +The following example creates an index without a metadata +configuration. By default, Pinecone indexes all metadata. + +```python +from pinecone import Pinecone, PodSpec + +pc = Pinecone(api_key='<>') +pc.create_index( + name="example-index", + dimension=1536, + metric="cosine", + spec=PodSpec( + environment='us-west-2', + pod_type='p1.x1' + ) +) +``` + +Pod indexes support many optional configuration fields. For example, +the following example creates an index that only indexes the "color" metadata field. Queries against this index cannot filter based on any other metadata field. ```python +from pinecone import Pinecone, PodSpec + +pc = Pinecone(api_key='<>') + metadata_config = { "indexed": ["color"] } -pinecone.create_index("example-index-2", dimension=1024, - metadata_config=metadata_config) +pc.create_index( + "example-index-2", + dimension=1536, + spec=PodSpec( + environment='us-west-2', + pod_type='p1.x1', + metadata_config=metadata_config + ) +) ``` ## List indexes @@ -62,11 +184,11 @@ pinecone.create_index("example-index-2", dimension=1024, The following example returns all indexes in your project. ```python -import pinecone +from pinecone import Pinecone -pinecone.init(api_key="YOUR_API_KEY", environment="us-west1-gcp") - -active_indexes = pinecone.list_indexes() +pc = Pinecone(api_key='<>') +for index in pc.list_indexes(): + print(index['name']) ``` ## Describe index @@ -74,24 +196,23 @@ active_indexes = pinecone.list_indexes() The following example returns information about the index `example-index`. ```python -import pinecone - -pinecone.init(api_key="YOUR_API_KEY", environment="us-west1-gcp") +from pinecone import Pinecone -index_description = pinecone.describe_index("example-index") +pc = Pinecone(api_key='<>') +index_description = pc.describe_index("example-index") ``` ## Delete an index -The following example deletes `example-index`. +The following example deletes the index named `example-index`. ```python -import pinecone +from pinecone import Pinecone -pinecone.init(api_key="YOUR_API_KEY", environment="us-west1-gcp") +pc = Pinecone(api_key='<>') -pinecone.delete_index("example-index") +pc.delete_index("example-index") ``` ## Scale replicas @@ -99,12 +220,12 @@ pinecone.delete_index("example-index") The following example changes the number of replicas for `example-index`. ```python -import pinecone +from pinecone import Pinecone -pinecone.init(api_key="YOUR_API_KEY", environment="us-west1-gcp") +pc = Pinecone(api_key='<>') new_number_of_replicas = 4 -pinecone.configure_index("example-index", replicas=new_number_of_replicas) +pc.configure_index("example-index", replicas=new_number_of_replicas) ``` ## Describe index statistics @@ -112,10 +233,11 @@ pinecone.configure_index("example-index", replicas=new_number_of_replicas) The following example returns statistics about the index `example-index`. ```python -import pinecone +import os +from pinecone import Pinecone -pinecone.init(api_key="YOUR_API_KEY", environment="us-west1-gcp") -index = pinecone.Index("example-index") +pc = Pinecone(api_key='<>') +index = pc.Index(host=os.environ.get('INDEX_HOST')) index_stats_response = index.describe_index_stats() ``` @@ -126,10 +248,11 @@ index_stats_response = index.describe_index_stats() The following example upserts vectors to `example-index`. ```python -import pinecone +import os +from pinecone import Pinecone -pinecone.init(api_key="YOUR_API_KEY", environment="us-west1-gcp") -index = pinecone.Index("example-index") +pc = Pinecone(api_key='<>') +index = pc.Index(host=os.environ.get('INDEX_HOST')) upsert_response = index.upsert( vectors=[ @@ -146,17 +269,21 @@ The following example queries the index `example-index` with metadata filtering. ```python -import pinecone +import os +from pinecone import Pinecone -pinecone.init(api_key="YOUR_API_KEY", environment="us-west1-gcp") -index = pinecone.Index("example-index") +pc = Pinecone(api_key='<>') + +# Find your index host by calling describe_index +# through the Pinecone web console +index = pc.Index(host=os.environ.get('INDEX_HOST')) query_response = index.query( namespace="example-namespace", + vector=[0.1, 0.2, 0.3, 0.4], top_k=10, include_values=True, include_metadata=True, - vector=[0.1, 0.2, 0.3, 0.4], filter={ "genre": {"$in": ["comedy", "documentary", "drama"]} } @@ -168,10 +295,14 @@ query_response = index.query( The following example deletes vectors by ID. ```python -import pinecone +import os +from pinecone import Pinecone + +pc = Pinecone(api_key='<>') -pinecone.init(api_key="YOUR_API_KEY", environment="us-west1-gcp") -index = pinecone.Index("example-index") +# Find your index host by calling describe_index +# through the Pinecone web console +index = pc.Index(host=os.environ.get('INDEX_HOST')) delete_response = index.delete(ids=["vec1", "vec2"], namespace="example-namespace") ``` @@ -181,24 +312,30 @@ delete_response = index.delete(ids=["vec1", "vec2"], namespace="example-namespac The following example fetches vectors by ID. ```python -import pinecone +import os +from pinecone import Pinecone -pinecone.init(api_key="YOUR_API_KEY", environment="us-west1-gcp") -index = pinecone.Index("example-index") +pc = Pinecone(api_key='<>') + +# Find your index host by calling describe_index +# through the Pinecone web console +index = pc.Index(host=os.environ.get('INDEX_HOST')) fetch_response = index.fetch(ids=["vec1", "vec2"], namespace="example-namespace") ``` - ## Update vectors The following example updates vectors by ID. ```python -import pinecone +from pinecone import Pinecone -pinecone.init(api_key="YOUR_API_KEY", environment="us-west1-gcp") -index = pinecone.Index("example-index") +pc = Pinecone(api_key='<>') + +# Find your index host by calling describe_index +# through the Pinecone web console +index = pc.Index(host=os.environ.get('INDEX_HOST')) update_response = index.update( id="vec1", @@ -214,12 +351,14 @@ The following example creates the collection `example-collection` from `example-index`. ```python -import pinecone +from pinecone import Pinecone -pinecone.init(api_key="YOUR_API_KEY", - environment="us-west1-gcp") +pc = Pinecone(api_key='<>') -pinecone.create_collection("example-collection", "example-index") +pc.create_collection( + name="example-collection", + source="example-index" +) ``` ## List collections @@ -227,11 +366,11 @@ pinecone.create_collection("example-collection", "example-index") The following example returns a list of the collections in the current project. ```python -import pinecone +from pinecone import Pinecone -pinecone.init(api_key="YOUR_API_KEY", environment="us-west1-gcp") +pc = Pinecone(api_key='<>') -active_collections = pinecone.list_collections() +active_collections = pc.list_collections() ``` ## Describe a collection @@ -240,11 +379,11 @@ The following example returns a description of the collection `example-collection`. ```python -import pinecone +from pinecone import Pinecone -pinecone.init(api_key="YOUR_API_KEY", environment="us-west1-gcp") +pc = Pinecone(api_key='<>') -collection_description = pinecone.describe_collection("example-collection") +collection_description = pc.describe_collection("example-collection") ``` ## Delete a collection @@ -252,11 +391,11 @@ collection_description = pinecone.describe_collection("example-collection") The following example deletes the collection `example-collection`. ```python -import pinecone +from pinecone import Pinecone -pinecone.init(api_key="YOUR_API_KEY", environment="us-west1-gcp") +pc = Pinecone(api_key='<>') -pinecone.delete_collection("example-collection") +pc.delete_collection("example-collection") ``` # Contributing diff --git a/pinecone/control/pinecone.py b/pinecone/control/pinecone.py index ad51b76b..ea14a7cc 100644 --- a/pinecone/control/pinecone.py +++ b/pinecone/control/pinecone.py @@ -5,7 +5,7 @@ from pinecone.config import PineconeConfig, Config -from pinecone.core.client.api.manage_indexes_api import ManageIndexesApi as IndexOperationsApi +from pinecone.core.client.api.manage_indexes_api import ManageIndexesApi from pinecone.core.client.api_client import ApiClient from pinecone.utils import get_user_agent, normalize_host from pinecone.core.client.models import ( @@ -20,6 +20,7 @@ from pinecone.data import Index class Pinecone: + def __init__( self, api_key: Optional[str] = None, @@ -27,9 +28,63 @@ def __init__( config: Optional[Config] = None, additional_headers: Optional[Dict[str, str]] = {}, pool_threads: Optional[int] = 1, - index_api: Optional[IndexOperationsApi] = None, + index_api: Optional[ManageIndexesApi] = None, **kwargs, ): + """ + The `Pinecone` class is the main entry point for interacting with Pinecone via this Python SDK. + It is used to create, delete, and manage your indexes and collections. + + :param api_key: The API key to use for authentication. If not passed via kwarg, the API key will be read from the environment variable `PINECONE_API_KEY`. + :type api_key: str, optional + :param host: The control plane host to connect to. + :type host: str, optional + :param config: A `pinecone.config.Config` object. If passed, the `api_key` and `host` parameters will be ignored. + :type config: pinecone.config.Config, optional + :param additional_headers: Additional headers to pass to the API. Default: `{}` + :type additional_headers: Dict[str, str], optional + :param pool_threads: The number of threads to use for the connection pool. Default: `1` + :type pool_threads: int, optional + :param index_api: An instance of `pinecone.core.client.api.manage_indexes_api.ManageIndexesApi`. If passed, the `host` parameter will be ignored. + :type index_api: pinecone.core.client.api.manage_indexes_api.ManageIndexesApi, optional + + + ### Configuration with environment variables + + If you instantiate the Pinecone client with no arguments, it will attempt to read the API key from the environment variable `PINECONE_API_KEY`. + + ```python + from pinecone import Pinecone + + pc = Pinecone() + ``` + + ### Configuration with keyword arguments + + If you prefer being more explicit in your code, you can also pass the API as a keyword argument. + + ```python + import os + from pinecone import Pinecone + + pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY")) + ``` + + ### Environment variables + + The Pinecone client supports the following environment variables: + + - `PINECONE_API_KEY`: The API key to use for authentication. If not passed via + kwarg, the API key will be read from the environment variable `PINECONE_API_KEY`. + + - `PINECONE_DEBUG_CURL`: When troubleshooting it can be very useful to run curl + commands against the control plane API to see exactly what data is being sent + and received without all the abstractions and transformations applied by the Python + SDK. If you set this environment variable to `true`, the Pinecone client will use + request parameters to print out an equivalent curl command that you can run yourself + or share with Pinecone support. **Be very careful with this option, as it will print out + your API key** which forms part of a required authentication header. Default: `false` + """ if config or kwargs.get("config"): configKwarg = config or kwargs.get("config") if not isinstance(configKwarg, Config): @@ -48,9 +103,10 @@ def __init__( extra_headers = self.config.additional_headers or {} for key, value in extra_headers.items(): api_client.set_default_header(key, value) - self.index_api = IndexOperationsApi(api_client) + self.index_api = ManageIndexesApi(api_client) self.index_host_store = IndexHostStore() + """ @private """ def create_index( self, @@ -62,21 +118,58 @@ def create_index( ): """Creates a Pinecone index. - :param name: the name of the index. + :param name: The name of the index to create. Must be unique within your project and + cannot be changed once created. Allowed characters are lowercase letters, numbers, + and hyphens and the name may not begin or end with hyphens. Maximum length is 45 characters. :type name: str - :param dimension: the dimension of vectors that would be inserted in the index + :param dimension: The dimension of vectors that will be inserted in the index. This should + match the dimension of the embeddings you will be inserting. For example, if you are using + OpenAI's CLIP model, you should use `dimension=1536`. :type dimension: int - :param metric: type of metric used in the vector index, one of `{"cosine", "dotproduct", "euclidean"}`, defaults to "cosine". - Use "cosine" for cosine similarity, - "dotproduct" for dot-product, - and "euclidean" for euclidean distance. + :param metric: Type of metric used in the vector index when querying, one of `{"cosine", "dotproduct", "euclidean"}`. Defaults to `"cosine"`. + Defaults to `"cosine"`. :type metric: str, optional :param spec: A dictionary containing configurations describing how the index should be deployed. For serverless indexes, specify region and cloud. For pod indexes, specify replicas, shards, pods, pod_type, metadata_config, and source_collection. :type spec: Dict :type timeout: int, optional - :param timeout: Timeout for wait until index gets ready. If None, wait indefinitely; if >=0, time out after this many seconds; + :param timeout: Specify the number of seconds to wait until index gets ready. If None, wait indefinitely; if >=0, time out after this many seconds; if -1, return immediately and do not wait. Default: None + + ### Creating a serverless index + + ```python + import os + from pinecone import Pinecone, ServerlessSpec + + client = Pinecone(api_key=os.environ.get("PINECONE_API_KEY")) + + client.create_index( + name="my_index", + dimension=1536, + metric="cosine", + spec=ServerlessSpec(cloud="aws", region="us-west-2") + ) + ``` + + ### Creating a pod index + + ```python + import os + from pinecone import Pinecone, PodSpec + + client = Pinecone(api_key=os.environ.get("PINECONE_API_KEY")) + + client.create_index( + name="my_index", + dimension=1536, + metric="cosine", + spec=PodSpec( + environment="us-east1-gcp", + pod_type="p1.x1" + ) + ) + ``` """ api_instance = self.index_api @@ -116,9 +209,19 @@ def is_ready(): def delete_index(self, name: str, timeout: Optional[int] = None): """Deletes a Pinecone index. + Deleting an index is an irreversible operation. All data in the index will be lost. + When you use this command, a request is sent to the Pinecone control plane to delete + the index, but the termination is not synchronous because resources take a few moments to + be released. + + You can check the status of the index by calling the `describe_index()` command. + With repeated polling of the describe_index command, you will see the index transition to a + `Terminating` state before eventually resulting in a 404 after it has been removed. + :param name: the name of the index. :type name: str - :param timeout: Timeout for wait until index gets ready. If None, wait indefinitely; if >=0, time out after this many seconds; + :param timeout: Number of seconds to poll status checking whether the index has been deleted. If None, + wait indefinitely; if >=0, time out after this many seconds; if -1, return immediately and do not wait. Default: None :type timeout: int, optional """ @@ -149,7 +252,49 @@ def get_remaining(): ) def list_indexes(self) -> IndexList: - """Lists all indexes.""" + """Lists all indexes. + + The results include a description of all indexes in your project, including the + index name, dimension, metric, status, and spec. + + :return: Returns an `IndexList` object, which is iterable and contains a + list of `IndexDescription` objects. It also has a convenience method `names()` + which returns a list of index names. + + ```python + from pinecone import Pinecone + + client = Pinecone() + + index_name = "my_index" + if index_name not in client.list_indexes().names(): + print("Index does not exist, creating...") + client.create_index( + name=index_name, + dimension=768, + metric="cosine", + spec=ServerlessSpec(cloud="aws", region="us-west-2") + ) + ``` + + You can also use the `list_indexes()` method to iterate over all indexes in your project + and get other information besides just names. + + ```python + from pinecone import Pinecone + + client = Pinecone() + + for index in client.list_indexes(): + print(index.name) + print(index.dimension) + print(index.metric) + print(index.status) + print(index.host) + print(index.spec) + ``` + + """ response = self.index_api.list_indexes() return IndexList(response) @@ -158,6 +303,31 @@ def describe_index(self, name: str): :param name: the name of the index to describe. :return: Returns an `IndexDescription` object + which gives access to properties such as the + index name, dimension, metric, host url, status, + and spec. + + ### Getting your index host url + + In a real production situation, you probably want to + store the host url in an environment variable so you + don't have to call describe_index and re-fetch it + every time you want to use the index. But this example + shows how to get the value from the API using describe_index. + + ```python + from pinecone import Pinecone, Index + + client = Pinecone() + + description = client.describe_index("my_index") + + host = description.host + print(f"Your index is hosted at {description.host}") + + index = client.Index(name="my_index", host=host) + index.upsert(vectors=[...]) + ``` """ api_instance = self.index_api description = api_instance.describe_index(name) @@ -167,10 +337,27 @@ def describe_index(self, name: str): return description def configure_index(self, name: str, replicas: Optional[int] = None, pod_type: Optional[str] = None): - """Changes current configuration of the index. + """This method is used to scale configuration fields for your pod-based Pinecone index. + :param: name: the name of the Index :param: replicas: the desired number of replicas, lowest value is 0. - :param: pod_type: the new pod_type for the index. + :param: pod_type: the new pod_type for the index. To learn more about the + available pod types, please see [Understanding Indexes](https://docs.pinecone.io/docs/indexes) + + + ```python + from pinecone import Pinecone + + client = Pinecone() + + # Make a configuration change + client.configure_index(name="my_index", replicas=4) + + # Call describe_index to see the index status as the + # change is applied. + client.describe_index("my_index") + ``` + """ api_instance = self.index_api config_args: Dict[str, Any] = {} @@ -186,7 +373,8 @@ def configure_index(self, name: str, replicas: Optional[int] = None, pod_type: O api_instance.configure_index(name, configure_index_request=configure_index_request) def create_collection(self, name: str, source: str): - """Create a collection + """Create a collection from a pod-based index + :param name: Name of the collection :param source: Name of the source index """ @@ -194,14 +382,40 @@ def create_collection(self, name: str, source: str): api_instance.create_collection(create_collection_request=CreateCollectionRequest(name=name, source=source)) def list_collections(self) -> CollectionList: - """List all collections""" + """List all collections + + ```python + from pinecone import Pinecone + + client = Pinecone() + + for collection in client.list_collections(): + print(collection.name) + print(collection.source) + + # You can also iterate specifically over the collection + # names with the .names() helper. + collection_name="my_collection" + for collection_name in client.list_collections().names(): + print(collection_name) + ``` + """ api_instance = self.index_api response = api_instance.list_collections() return CollectionList(response) def delete_collection(self, name: str): """Deletes a collection. + :param: name: The name of the collection + + Deleting a collection is an irreversible operation. All data + in the collection will be lost. + + This method tells Pinecone you would like to delete a collection, + but it takes a few moments to complete the operation. Use the + `describe_collection()` method to confirm that the collection + has been deleted. """ api_instance = self.index_api api_instance.delete_collection(name) @@ -210,6 +424,19 @@ def describe_collection(self, name: str): """Describes a collection. :param: The name of the collection :return: Description of the collection + + ```python + from pinecone import Pinecone + + client = Pinecone() + + description = client.describe_collection("my_collection") + print(description.name) + print(description.source) + print(description.status) + print(description.size) + print(description.) + ``` """ api_instance = self.index_api return api_instance.describe_collection(name).to_dict() @@ -220,6 +447,75 @@ def _get_status(self, name: str): return response["status"] def Index(self, name: str = '', host: str = ''): + """ + Target an index for data operations. + + ### Target an index by host url + + In production situations, you want to uspert or query your data as quickly + as possible. If you know in advance the host url of your index, you can + eliminate a round trip to the Pinecone control plane by specifying the + host of the index. + + ```python + import os + from pinecone import Pinecone + + api_key = os.environ.get("PINECONE_API_KEY") + index_host = os.environ.get("PINECONE_INDEX_HOST") + + pc = Pinecone(api_key=api_key) + index = pc.Index(host=index_host) + + # Now you're ready to perform data operations + index.query(vector=[...], top_k=10) + ``` + + To find your host url, you can use the Pinecone control plane to describe + the index. The host url is returned in the response. Or, alternatively, the + host is displayed in the Pinecone web console. + + ```python + import os + from pinecone import Pinecone + + pc = Pinecone( + api_key=os.environ.get("PINECONE_API_KEY") + ) + + host = pc.describe_index('index-name').host + ``` + + ### Target an index by name (not recommended for production) + + For more casual usage, such as when you are playing and exploring with Pinecone + in a notebook setting, you can also target an index by name. If you use this + approach, the client may need to perform an extra call to the Pinecone control + plane to get the host url on your behalf to get the index host. + + The client will cache the index host for future use whenever it is seen, so you + will only incur the overhead of only one call. But this approach is not + recommended for production usage. + + ```python + import os + from pinecone import Pinecone, ServerlessSpec + + api_key = os.environ.get("PINECONE_API_KEY") + + pc = Pinecone(api_key=api_key) + pc.create_index( + name='my-index', + dimension=1536, + metric='cosine', + spec=ServerlessSpec(cloud='aws', region='us-west-2') + ) + index = pc.Index('my-index') + + # Now you're ready to perform data operations + index.query(vector=[...], top_k=10) + ``` + """ if name == '' and host == '': raise ValueError("Either name or host must be specified") diff --git a/pinecone/grpc/__init__.py b/pinecone/grpc/__init__.py index c6505a36..67338779 100644 --- a/pinecone/grpc/__init__.py +++ b/pinecone/grpc/__init__.py @@ -1,3 +1,49 @@ +""" +Connecting to Pinecone with GRPC + +The `pinecone.grpc` submodule provides an alternative version of the Pinecone +client that uses gRPC instead of HTTP for data operations. This provides a +significant performance boost for data operations. + +### Installing the gRPC client + +You must install extra dependencies in order to install the GRPC client. + +#### Installing with pip + +```bash +# Install the latest version +pip3 install pinecone-client[grpc] + +# Install a specific version +pip3 install "pinecone-client[grpc]"==3.0.0 +``` + +#### Installing with poetry + +```bash +# Install the latest version +poetry add pinecone-client --extras grpc + +# Install a specific version +poetry add pinecone-client==3.0.0 --extras grpc +``` + +### Using the gRPC client + +```python +import os +from pinecone.grpc import PineconeGRPC + +client = PineconeGRPC(api_key=os.environ.get("PINECONE_API_KEY")) + +# From this point on, usage is identical to the HTTP client. +index = client.Index(host=os.environ("PINECONE_INDEX_HOST")) +index.query(vector=[...], top_k=10) +``` + +""" + from .index_grpc import GRPCIndex from .pinecone import PineconeGRPC diff --git a/pinecone/grpc/pinecone.py b/pinecone/grpc/pinecone.py index 7279b107..bd68360b 100644 --- a/pinecone/grpc/pinecone.py +++ b/pinecone/grpc/pinecone.py @@ -3,7 +3,120 @@ from .index_grpc import GRPCIndex class PineconeGRPC(Pinecone): + """ + An alternative version of the Pinecone client that uses gRPC instead of HTTP for + data operations. + + ### Installing the gRPC client + + You must install extra dependencies in order to install the GRPC client. + + #### Installing with pip + + ```bash + # Install the latest version + pip3 install pinecone[grpc] + + # Install a specific version + pip3 install "pinecone-client[grpc]"==3.0.0 + ``` + + #### Installing with poetry + + ```bash + # Install the latest version + poetry add pinecone --extras grpc + + # Install a specific version + poetry add pinecone-client==3.0.0 --extras grpc + ``` + + ### Using the gRPC client + + ```python + import os + from pinecone.grpc import PineconeGRPC + + client = PineconeGRPC(api_key=os.environ.get("PINECONE_API_KEY")) + + # From this point on, usage is identical to the HTTP client. + index = client.Index("my-index", host=os.environ("PINECONE_INDEX_HOST")) + index.query(...) + ``` + + """ + def Index(self, name: str = '', host: str = ''): + """ + Target an index for data operations. + + ### Target an index by host url + + In production situations, you want to uspert or query your data as quickly + as possible. If you know in advance the host url of your index, you can + eliminate a round trip to the Pinecone control plane by specifying the + host of the index. + + ```python + import os + from pinecone.grpc import PineconeGRPC + + api_key = os.environ.get("PINECONE_API_KEY") + index_host = os.environ.get("PINECONE_INDEX_HOST") + + pc = PineconeGRPC(api_key=api_key) + index = pc.Index(host=index_host) + + # Now you're ready to perform data operations + index.query(vector=[...], top_k=10) + ``` + + To find your host url, you can use the Pinecone control plane to describe + the index. The host url is returned in the response. Or, alternatively, the + host is displayed in the Pinecone web console. + + ```python + import os + from pinecone import Pinecone + + pc = Pinecone( + api_key=os.environ.get("PINECONE_API_KEY") + ) + + host = pc.describe_index('index-name').host + ``` + + ### Target an index by name (not recommended for production) + + For more casual usage, such as when you are playing and exploring with Pinecone + in a notebook setting, you can also target an index by name. If you use this + approach, the client may need to perform an extra call to the Pinecone control + plane to get the host url on your behalf to get the index host. + + The client will cache the index host for future use whenever it is seen, so you + will only incur the overhead of only one call. But this approach is not + recommended for production usage. + + ```python + import os + from pinecone import ServerlessSpec + from pinecone.grpc import PineconeGRPC + + api_key = os.environ.get("PINECONE_API_KEY") + + pc = PineconeGRPC(api_key=api_key) + pc.create_index( + name='my-index', + dimension=1536, + metric='cosine', + spec=ServerlessSpec(cloud='aws', region='us-west-2') + ) + index = pc.Index('my-index') + + # Now you're ready to perform data operations + index.query(vector=[...], top_k=10) + ``` + """ if name == '' and host == '': raise ValueError("Either name or host must be specified") @@ -16,4 +129,4 @@ def Index(self, name: str = '', host: str = ''): # Otherwise, get host url from describe_index using the index name index_host = self.index_host_store.get_host(self.index_api, self.config, name) config = ConfigBuilder.build(api_key=self.config.api_key, host=index_host) - return GRPCIndex(index_name=name, config=config) + return GRPCIndex(index_name=name, config=config) \ No newline at end of file diff --git a/pinecone/models/collection_description.py b/pinecone/models/collection_description.py index 44c5f51e..533d9745 100644 --- a/pinecone/models/collection_description.py +++ b/pinecone/models/collection_description.py @@ -1,5 +1,16 @@ from typing import NamedTuple class CollectionDescription(NamedTuple): + """ + The description of a collection. + """ + name: str - source: str \ No newline at end of file + """ + The name of the collection. + """ + + source: str + """ + The name of the index used to create the collection. + """ \ No newline at end of file diff --git a/pinecone/models/collection_list.py b/pinecone/models/collection_list.py index 40fd8815..b7f4472c 100644 --- a/pinecone/models/collection_list.py +++ b/pinecone/models/collection_list.py @@ -1,6 +1,10 @@ from pinecone.core.client.models import CollectionList as OpenAPICollectionList class CollectionList: + """ + A list of collections. + """ + def __init__(self, collection_list: OpenAPICollectionList): self.collection_list = collection_list self.current = 0 diff --git a/pinecone/models/index_description.py b/pinecone/models/index_description.py index 8135241d..8566cae5 100644 --- a/pinecone/models/index_description.py +++ b/pinecone/models/index_description.py @@ -23,9 +23,36 @@ class ServerlessSpecDefinition(NamedTuple): ServerlessSpec = Dict[ServerlessKey, ServerlessSpecDefinition] class IndexDescription(NamedTuple): + """ + The description of an index. This object is returned from the `describe_index()` method. + """ + name: str + """ + The name of the index + """ + dimension: int + """ + The dimension of the index. This corresponds to the length of the vectors stored in the index. + """ + metric: str + """ + One of 'cosine', 'euclidean', or 'dotproduct'. + """ + host: str + """ + The endpoint you will use to connect to this index for data operations such as upsert and query. + """ + spec: Union[PodSpec, ServerlessSpec] + """ + The spec describes how the index is being deployed. + """ + status: IndexStatus + """ + Status includes information on whether the index is ready to accept data operations. + """ diff --git a/pinecone/models/pod_spec.py b/pinecone/models/pod_spec.py index b126aefa..f72465fb 100644 --- a/pinecone/models/pod_spec.py +++ b/pinecone/models/pod_spec.py @@ -1,12 +1,67 @@ from typing import NamedTuple, Optional, Dict class PodSpec(NamedTuple): + """ + PodSpec represents the configuration used to deploy a pod-based index. + + To learn more about the options for each configuration, please see [Understanding Indexes](https://docs.pinecone.io/docs/indexes) + """ + environment: str + """ + The environment where the pod index will be deployed. Example: 'us-east1-gcp' + """ + replicas: Optional[int] = None + """ + The number of replicas to deploy for the pod index. Default: 1 + """ + shards: Optional[int] = None + """ + The number of shards to use. Shards are used to expand the amount of vectors you can store beyond the capacity of a single pod. Default: 1 + """ + pods: Optional[int] = None + """ + Number of pods to deploy. Default: 1 + """ + pod_type: Optional[str] = "p1.x1" + """ + This value combines pod type and pod size into a single string. This configuration is your main lever for vertical scaling. + """ + metadata_config: Optional[Dict] = {} + """ + If you are storing a lot of metadata, you can use this configuration to limit the fields which are indexed for search. + + This configuration should be a dictionary with the key 'indexed' and the value as a list of fields to index. + + For example, if your vectors have metadata along like this: + + ```python + from pinecone import Vector + + vector = Vector( + id='237438191', + values=[...], + metadata={ + 'productId': '237438191', + 'description': 'Stainless Steel Tumbler with Straw', + 'category': 'kitchen', + 'price': '19.99' + } + ) + ``` + + You might want to limit which fields are indexed with metadata config such as this: + ``` + {'indexed': ['field1', 'field2']} + """ def asdict(self): + """ + Returns the PodSpec as a dictionary. + """ return {"pod": self._asdict()} \ No newline at end of file diff --git a/tests/integration/control/test_configure_pod_index.py b/tests/integration/control/test_configure_pod_index.py index 1167fc48..6c4411b7 100644 --- a/tests/integration/control/test_configure_pod_index.py +++ b/tests/integration/control/test_configure_pod_index.py @@ -1,4 +1,7 @@ +import pytest import time + +@pytest.mark.skip(reason='API bug reported') class TestCreatePodIndex(): def test_create_pod_index(self, client, ready_pod_index): time.sleep(30) # Wait a little more, just in case.