Documentation: https://googleapis.github.io/python-genai/
Google Gen AI Python SDK provides an interface for developers to integrate Google's generative models into their Python applications. It supports the Gemini Developer API and Vertex AI APIs. This is an early release. API is subject to change. Please do not use this SDK in production environments at this stage.
pip install google-genai
from google import genai
from google.genai import types
Please run one of the following code blocks to create a client for different services (Gemini Developer API or Vertex AI).
# Only run this block for Gemini Developer API
client = genai.Client(api_key="GEMINI_API_KEY")
# Only run this block for Vertex AI API
client = genai.Client(
vertexai=True, project="your-project-id", location="us-central1"
)
Parameter types can be specified as either dictionaries(TypedDict
) or
Pydantic Models.
Pydantic model types are available in the types
module.
The client.models
modules exposes model inferencing and model getters.
response = client.models.generate_content(
model="gemini-2.0-flash-exp", contents="What is your name?"
)
print(response.text)
download the file in console.
!wget -q https://storage.googleapis.com/generativeai-downloads/data/a11.txt
python code.
file = client.files.upload(path="a11.text")
response = client.models.generate_content(
model="gemini-2.0-flash-exp", contents=["Summarize this file", file]
)
print(response.text)
response = client.models.generate_content(
model="gemini-2.0-flash-exp",
contents="high",
config=types.GenerateContentConfig(
system_instruction="I say high, you say low",
temperature=0.3,
),
)
print(response.text)
All API methods support Pydantic types for parameters as well as
dictionaries. You can get the type from google.genai.types
.
response = client.models.generate_content(
model="gemini-2.0-flash-exp",
contents=types.Part.from_text("Why is the sky blue?"),
config=types.GenerateContentConfig(
temperature=0,
top_p=0.95,
top_k=20,
candidate_count=1,
seed=5,
max_output_tokens=100,
stop_sequences=["STOP!"],
presence_penalty=0.0,
frequency_penalty=0.0,
),
)
response
The Gemini 2.0 Flash Thinking model is an experimental model that could return "thoughts" as part of its response.
Thinking config is only available in v1alpha for Gemini AI API.
response = client.models.generate_content(
model='gemini-2.0-flash-thinking-exp',
contents='What is the sum of natural numbers from 1 to 100?',
config=types.GenerateContentConfig(
thinking_config=types.ThinkingConfig(include_thoughts=True),
http_options=types.HttpOptions(api_version='v1alpha'),
)
)
for part in response.candidates[0].content.parts:
print(part)
response = client.models.generate_content(
model='gemini-2.0-flash-thinking-exp-1219',
contents='What is the sum of natural numbers from 1 to 100?',
config=types.GenerateContentConfig(
thinking_config=types.ThinkingConfig(include_thoughts=True),
)
)
for part in response.candidates[0].content.parts:
print(part)
To retrieve tuned models, see list tuned models.
for model in client.models.list(config={'query_base':True}):
print(model)
pager = client.models.list(config={"page_size": 10, 'query_base':True})
print(pager.page_size)
print(pager[0])
pager.next_page()
print(pager[0])
async for job in await client.aio.models.list(config={'query_base':True}):
print(job)
async_pager = await client.aio.models.list(config={"page_size": 10, 'query_base':True})
print(async_pager.page_size)
print(async_pager[0])
await async_pager.next_page()
print(async_pager[0])
response = client.models.generate_content(
model="gemini-2.0-flash-exp",
contents="Say something bad.",
config=types.GenerateContentConfig(
safety_settings=[
types.SafetySetting(
category="HARM_CATEGORY_HATE_SPEECH",
threshold="BLOCK_ONLY_HIGH",
)
]
),
)
print(response.text)
You can pass a Python function directly and it will be automatically called and responded.
def get_current_weather(location: str) -> str:
"""Returns the current weather.
Args:
location: The city and state, e.g. San Francisco, CA
"""
return "sunny"
response = client.models.generate_content(
model="gemini-2.0-flash-exp",
contents="What is the weather like in Boston?",
config=types.GenerateContentConfig(tools=[get_current_weather]),
)
print(response.text)
If you don't want to use the automatic function support, you can manually declare the function and invoke it.
The following example shows how to declare a function and pass it as a tool. Then you will receive a function call part in the response.
function = types.FunctionDeclaration(
name="get_current_weather",
description="Get the current weather in a given location",
parameters=types.FunctionParameters(
type="OBJECT",
properties={
"location": types.ParameterType(
type="STRING",
description="The city and state, e.g. San Francisco, CA",
),
},
required=["location"],
),
)
tool = types.Tool(function_declarations=[function])
response = client.models.generate_content(
model="gemini-2.0-flash-exp",
contents="What is the weather like in Boston?",
config=types.GenerateContentConfig(tools=[tool]),
)
print(response.function_calls[0])
After you receive the function call part from the model, you can invoke the function and get the function response. And then you can pass the function response to the model. The following example shows how to do it for a simple function invocation.
user_prompt_content = types.Content(
role="user",
parts=[types.Part.from_text("What is the weather like in Boston?")],
)
function_call_content = response.candidates[0].content
function_call_part = function_call_content.parts[0]
try:
function_result = get_current_weather(
**function_call_part.function_call.args
)
function_response = {"result": function_result}
except (
Exception
) as e: # instead of raising the exception, you can let the model handle it
function_response = {"error": str(e)}
function_response_part = types.Part.from_function_response(
name=function_call_part.function_call.name,
response=function_response,
)
function_response_content = types.Content(
role="tool", parts=[function_response_part]
)
response = client.models.generate_content(
model="gemini-2.0-flash-exp",
contents=[
user_prompt_content,
function_call_content,
function_response_content,
],
config=types.GenerateContentConfig(
tools=[tool],
),
)
print(response.text)
Schemas can be provided as Pydantic Models.
from pydantic import BaseModel
class CountryInfo(BaseModel):
name: str
population: int
capital: str
continent: str
gdp: int
official_language: str
total_area_sq_mi: int
response = client.models.generate_content(
model="gemini-2.0-flash-exp",
contents="Give me information for the United States.",
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=CountryInfo,
),
)
print(response.text)
response = client.models.generate_content(
model="gemini-2.0-flash-exp",
contents="Give me information for the United States.",
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema={
"required": [
"name",
"population",
"capital",
"continent",
"gdp",
"official_language",
"total_area_sq_mi",
],
"properties": {
"name": {"type": "STRING"},
"population": {"type": "INTEGER"},
"capital": {"type": "STRING"},
"continent": {"type": "STRING"},
"gdp": {"type": "INTEGER"},
"official_language": {"type": "STRING"},
"total_area_sq_mi": {"type": "INTEGER"},
},
"type": "OBJECT",
},
),
)
print(response.text)
You can set response_mime_type to 'text/x.enum' to return one of those enum values as the response.
class InstrumentEnum(Enum):
PERCUSSION = 'Percussion'
STRING = 'String'
WOODWIND = 'Woodwind'
BRASS = 'Brass'
KEYBOARD = 'Keyboard'
response = client.models.generate_content(
model='gemini-2.0-flash-exp',
contents='What instrument plays multiple notes at once?',
config={
'response_mime_type': 'text/x.enum',
'response_schema': InstrumentEnum,
},
)
print(response.text)
You can also set response_mime_type to 'application/json', the response will be identical but in quotes.
class InstrumentEnum(Enum):
PERCUSSION = 'Percussion'
STRING = 'String'
WOODWIND = 'Woodwind'
BRASS = 'Brass'
KEYBOARD = 'Keyboard'
response = client.models.generate_content(
model='gemini-2.0-flash-exp',
contents='What instrument plays multiple notes at once?',
config={
'response_mime_type': 'application/json',
'response_schema': InstrumentEnum,
},
)
print(response.text)
for chunk in client.models.generate_content_stream(
model="gemini-2.0-flash-exp", contents="Tell me a story in 300 words."
):
print(chunk.text, end="")
If your image is stored in Google Cloud Storage,
you can use the from_uri
class method to create a Part
object.
for chunk in client.models.generate_content_stream(
model="gemini-2.0-flash-exp",
contents=[
"What is this image about?",
types.Part.from_uri(
file_uri="gs://generativeai-downloads/images/scones.jpg",
mime_type="image/jpeg",
),
],
):
print(chunk.text, end="")
If your image is stored in your local file system, you can read it in as bytes
data and use the from_bytes
class method to create a Part
object.
YOUR_IMAGE_PATH = "your_image_path"
YOUR_IMAGE_MIME_TYPE = "your_image_mime_type"
with open(YOUR_IMAGE_PATH, "rb") as f:
image_bytes = f.read()
for chunk in client.models.generate_content_stream(
model="gemini-2.0-flash-exp",
contents=[
"What is this image about?",
types.Part.from_bytes(data=image_bytes, mime_type=YOUR_IMAGE_MIME_TYPE),
],
):
print(chunk.text, end="")
client.aio
exposes all the analogous async
methods
that are available on client
For example, client.aio.models.generate_content
is the async
version
of client.models.generate_content
response = await client.aio.models.generate_content(
model="gemini-2.0-flash-exp", contents="Tell me a story in 300 words."
)
print(response.text)
async for response in client.aio.models.generate_content_stream(
model="gemini-2.0-flash-exp", contents="Tell me a story in 300 words."
):
print(response.text, end="")
response = client.models.count_tokens(
model="gemini-2.0-flash-exp",
contents="What is your name?",
)
print(response)
Compute tokens is only supported in Vertex AI.
response = client.models.compute_tokens(
model="gemini-2.0-flash-exp",
contents="What is your name?",
)
print(response)
response = await client.aio.models.count_tokens(
model="gemini-2.0-flash-exp",
contents="What is your name?",
)
print(response)
response = client.models.embed_content(
model="text-embedding-004",
contents="What is your name?",
)
print(response)
# multiple contents with config
response = client.models.embed_content(
model="text-embedding-004",
contents=["What is your name?", "What is your age?"],
config=types.EmbedContentConfig(output_dimensionality=10),
)
print(response)
Support for generate images in Gemini Developer API is behind an allowlist
# Generate Image
response1 = client.models.generate_images(
model="imagen-3.0-generate-001",
prompt="An umbrella in the foreground, and a rainy night sky in the background",
config=types.GenerateImageConfig(
negative_prompt="human",
number_of_images=1,
include_rai_reason=True,
output_mime_type="image/jpeg",
),
)
response1.generated_images[0].image.show()
Upscale image is only supported in Vertex AI.
# Upscale the generated image from above
response2 = client.models.upscale_image(
model="imagen-3.0-generate-001",
image=response1.generated_images[0].image,
upscale_factor="x2",
config=types.UpscaleImageConfig(
include_rai_reason=True,
output_mime_type="image/jpeg",
),
)
response2.generated_images[0].image.show()
Edit image uses a separate model from generate and upscale.
Edit image is only supported in Vertex AI.
# Edit the generated image from above
from google.genai.types import RawReferenceImage, MaskReferenceImage
raw_ref_image = RawReferenceImage(
reference_id=1,
reference_image=response1.generated_images[0].image,
)
# Model computes a mask of the background
mask_ref_image = MaskReferenceImage(
reference_id=2,
config=types.MaskReferenceConfig(
mask_mode="MASK_MODE_BACKGROUND",
mask_dilation=0,
),
)
response3 = client.models.edit_image(
model="imagen-3.0-capability-001",
prompt="Sunlight and clear sky",
reference_images=[raw_ref_image, mask_ref_image],
config=types.EditImageConfig(
edit_mode="EDIT_MODE_INPAINT_INSERTION",
number_of_images=1,
negative_prompt="human",
include_rai_reason=True,
output_mime_type="image/jpeg",
),
)
response3.generated_images[0].image.show()
Create a chat session to start a multi-turn conversations with the model.
chat = client.chats.create(model="gemini-2.0-flash-exp")
response = chat.send_message("tell me a story")
print(response.text)
chat = client.chats.create(model="gemini-2.0-flash-exp")
for chunk in chat.send_message_stream("tell me a story"):
print(chunk.text)
chat = client.aio.chats.create(model="gemini-2.0-flash-exp")
response = await chat.send_message("tell me a story")
print(response.text)
chat = client.aio.chats.create(model="gemini-2.0-flash-exp")
async for chunk in chat.send_message_stream("tell me a story"):
print(chunk.text)
Files are only supported in Gemini Developer API.
!gsutil cp gs://cloud-samples-data/generative-ai/pdf/2312.11805v3.pdf .
!gsutil cp gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf .
file1 = client.files.upload(path="2312.11805v3.pdf")
file2 = client.files.upload(path="2403.05530.pdf")
print(file1)
print(file2)
file3 = client.files.upload(path="2312.11805v3.pdf")
client.files.delete(name=file3.name)
client.caches
contains the control plane APIs for cached content
if client.vertexai:
file_uris = [
"gs://cloud-samples-data/generative-ai/pdf/2312.11805v3.pdf",
"gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf",
]
else:
file_uris = [file1.uri, file2.uri]
cached_content = client.caches.create(
model="gemini-1.5-pro-002",
config=types.CreateCachedContentConfig(
contents=[
types.Content(
role="user",
parts=[
types.Part.from_uri(
file_uri=file_uris[0], mime_type="application/pdf"
),
types.Part.from_uri(
file_uri=file_uris[1],
mime_type="application/pdf",
),
],
)
],
system_instruction="What is the sum of the two pdfs?",
display_name="test cache",
ttl="3600s",
),
)
cached_content = client.caches.get(name=cached_content.name)
response = client.models.generate_content(
model="gemini-1.5-pro-002",
contents="Summarize the pdfs",
config=types.GenerateContentConfig(
cached_content=cached_content.name,
),
)
print(response.text)
client.tunings
contains tuning job APIs and supports supervised fine
tuning through tune
and distillation through distill
- Vertex AI supports tuning from GCS source
- Gemini Developer API supports tuning from inline examples
if client.vertexai:
model = "gemini-1.5-pro-002"
training_dataset = types.TuningDataset(
gcs_uri="gs://cloud-samples-data/ai-platform/generative_ai/gemini-1_5/text/sft_train_data.jsonl",
)
else:
model = "models/gemini-1.0-pro-001"
training_dataset = types.TuningDataset(
examples=[
types.TuningExample(
text_input=f"Input text {i}",
output=f"Output text {i}",
)
for i in range(5)
],
)
tuning_job = client.tunings.tune(
base_model=model,
training_dataset=training_dataset,
config=types.CreateTuningJobConfig(
epoch_count=1, tuned_model_display_name="test_dataset_examples model"
),
)
print(tuning_job)
tuning_job = client.tunings.get(name=tuning_job.name)
print(tuning_job)
import time
running_states = set(
[
"JOB_STATE_PENDING",
"JOB_STATE_RUNNING",
]
)
while tuning_job.state in running_states:
print(tuning_job.state)
tuning_job = client.tunings.get(name=tuning_job.name)
time.sleep(10)
response = client.models.generate_content(
model=tuning_job.tuned_model.endpoint,
contents="What is your name?",
)
print(response.text)
tuned_model = client.models.get(model=tuning_job.tuned_model.model)
print(tuned_model)
To retrieve base models, see list base models.
for model in client.models.list(config={"page_size": 10}):
print(model)
pager = client.models.list(config={"page_size": 10})
print(pager.page_size)
print(pager[0])
pager.next_page()
print(pager[0])
async for job in await client.aio.models.list(config={"page_size": 10}):
print(job)
async_pager = await client.aio.models.list(config={"page_size": 10})
print(async_pager.page_size)
print(async_pager[0])
await async_pager.next_page()
print(async_pager[0])
model = pager[0]
model = client.models.update(
model=model.name,
config=types.UpdateModelConfig(
display_name="my tuned model", description="my tuned model description"
),
)
print(model)
Only supported in Vertex AI. Requires allowlist.
distillation_job = client.tunings.distill(
student_model="gemma-2b-1.1-it",
teacher_model="gemini-1.5-pro-002",
training_dataset=genai.types.DistillationDataset(
gcs_uri="gs://cloud-samples-data/ai-platform/generative_ai/gemini-1_5/text/sft_train_data.jsonl",
),
config=genai.types.CreateDistillationJobConfig(
epoch_count=1,
pipeline_root_directory=("gs://my-bucket"),
),
)
print(distillation_job)
completed_states = set(
[
"JOB_STATE_SUCCEEDED",
"JOB_STATE_FAILED",
"JOB_STATE_CANCELLED",
"JOB_STATE_PAUSED",
]
)
while distillation_job.state not in completed_states:
print(distillation_job.state)
distillation_job = client.tunings.get(name=distillation_job.name)
time.sleep(10)
print(distillation_job)
for job in client.tunings.list(config={"page_size": 10}):
print(job)
pager = client.tunings.list(config={"page_size": 10})
print(pager.page_size)
print(pager[0])
pager.next_page()
print(pager[0])
async for job in await client.aio.tunings.list(config={"page_size": 10}):
print(job)
async_pager = await client.aio.tunings.list(config={"page_size": 10})
print(async_pager.page_size)
print(async_pager[0])
await async_pager.next_page()
print(async_pager[0])
Only supported in Vertex AI.
# Specify model and source file only, destination and job display name will be auto-populated
job = client.batches.create(
model="gemini-1.5-flash-002",
src="bq://my-project.my-dataset.my-table",
)
job
# Get a job by name
job = client.batches.get(name=job.name)
job.state
completed_states = set(
[
"JOB_STATE_SUCCEEDED",
"JOB_STATE_FAILED",
"JOB_STATE_CANCELLED",
"JOB_STATE_PAUSED",
]
)
while job.state not in completed_states:
print(job.state)
job = client.batches.get(name=job.name)
time.sleep(30)
job
for job in client.batches.list(config=types.ListBatchJobConfig(page_size=10)):
print(job)
pager = client.batches.list(config=types.ListBatchJobConfig(page_size=10))
print(pager.page_size)
print(pager[0])
pager.next_page()
print(pager[0])
async for job in await client.aio.batches.list(
config=types.ListBatchJobConfig(page_size=10)
):
print(job)
async_pager = await client.aio.batches.list(
config=types.ListBatchJobConfig(page_size=10)
)
print(async_pager.page_size)
print(async_pager[0])
await async_pager.next_page()
print(async_pager[0])
# Delete the job resource
delete_job = client.batches.delete(name=job.name)
delete_job