forked from defenseunicorns/leapfrogai
-
Notifications
You must be signed in to change notification settings - Fork 0
/
20240502193159_v0.8.0_vector_stores.sql
167 lines (151 loc) · 6.12 KB
/
20240502193159_v0.8.0_vector_stores.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
-- Some of this is from https://python.langchain.com/docs/integrations/vectorstores/supabase
-- Enable the pgvector extension to work with embedding vectors
create extension if not exists vector;
-- Create a table to store the OpenAI Vector Store Objects
create table
vector_store (
id uuid primary key DEFAULT uuid_generate_v4(),
user_id uuid references auth.users not null,
usage_bytes bigint,
created_at bigint default extract(epoch from now()) not null,
file_counts jsonb,
last_active_at bigint,
metadata jsonb,
name text,
object text check (object in ('vector_store')),
status text,
expires_after jsonb,
expires_at bigint
);
-- Create a table to store the OpenAI Vector Store File Objects
create table
vector_store_file (
id uuid references file_objects (id) on delete cascade,
user_id uuid references auth.users not null,
created_at bigint default extract(epoch from now()) not null,
last_error jsonb,
object text check (object in ('vector_store.file')),
status text,
usage_bytes bigint,
chunking_strategy jsonb,
vector_store_id uuid references vector_store (id) on delete cascade,
primary key (vector_store_id, id)
);
-- Create a table to store your documents
create table
vector_content (
id uuid primary key DEFAULT uuid_generate_v4(),
user_id uuid references auth.users not null,
vector_store_id uuid references vector_store (id) on delete cascade,
file_id uuid references file_objects (id) on delete cascade,
content text, -- corresponds to Document.pageContent
metadata jsonb, -- corresponds to Document.metadata
embedding vector (768) -- Instructor-XL produces 768-length embeddings
);
-- Create a function to update the size in bytes for vector_store_file
create or replace function calculate_vector_store_file_usage_bytes() returns trigger as $$
declare
file_size bigint;
begin
-- Calculate the size of the content, metadata, and embedding for the new or updated file
select coalesce(pg_column_size(content) + pg_column_size(metadata) + pg_column_size(embedding), 0)
into file_size
from vector_content
where file_id = new.file_id;
-- Update the usage_bytes in the vector_store_file table
update vector_store_file
set usage_bytes = file_size + coalesce(usage_bytes, 0)
where id = new.file_id
and vector_store_id = new.vector_store_id;
return new;
end;
$$ language plpgsql;
-- Create a trigger to call the function after insert or update on the vector_content table
create trigger calculate_vector_store_file_usage_bytes_trigger
after insert or update on vector_content
for each row execute function calculate_vector_store_file_usage_bytes();
-- Create a function to update the total size in bytes for vector_store
create or replace function update_vector_store_usage_bytes() returns trigger as $$
declare
total_size bigint;
begin
-- Calculate the total size of relevant entries in the vector_store_file table
select coalesce(sum(usage_bytes), 0)
into total_size
from vector_store_file
where vector_store_id = coalesce(new.vector_store_id, old.vector_store_id);
-- Update the usage_bytes column in the vector_store table
update vector_store
set usage_bytes = total_size
where id = coalesce(new.vector_store_id, old.vector_store_id);
return new;
end;
$$ language plpgsql;
-- Create a trigger to call the function after insert, update, or delete on the vector_store_file table
create trigger update_vector_store_usage_bytes_trigger
after insert or update or delete on vector_store_file
for each row execute function update_vector_store_usage_bytes();
-- Create a function to search for documents
create function match_vectors (
query_embedding vector (768), -- Instructor-XL produces 768-length embeddings
vs_id uuid,
user_id uuid,
match_limit int,
filter jsonb default '{}'
) returns table (
id uuid,
vector_store_id uuid,
file_id uuid,
content text,
metadata jsonb,
similarity float
) language plpgsql as $$
#variable_conflict use_column
begin
return query
select
id,
vector_store_id,
file_id,
content,
metadata,
1 - (vector_content.embedding <=> query_embedding) as similarity
from vector_content
where vector_store_id = vs_id
and user_id = user_id
and metadata @> filter
order by vector_content.embedding <=> query_embedding
limit match_limit;
end;
$$;
-- RLS policies
alter table vector_store enable row level security;
alter table vector_store_file enable row level security;
alter table vector_content enable row level security;
-- Policies for vector_store
create policy "Individuals can view their own vector_store." on vector_store for
select using (auth.uid() = user_id);
create policy "Individuals can create vector_store." on vector_store for
insert with check (auth.uid() = user_id);
create policy "Individuals can update their own vector_store." on vector_store for
update using (auth.uid() = user_id);
create policy "Individuals can delete their own vector_store." on vector_store for
delete using (auth.uid() = user_id);
-- Policies for vector_store_file
create policy "Individuals can view their own vector_store_file." on vector_store_file for
select using (auth.uid() = user_id);
create policy "Individuals can create vector_store_file." on vector_store_file for
insert with check (auth.uid() = user_id);
create policy "Individuals can update their own vector_store_file." on vector_store_file for
update using (auth.uid() = user_id);
create policy "Individuals can delete their own vector_store_file." on vector_store_file for
delete using (auth.uid() = user_id);
-- Policies for vector_content
create policy "Individuals can view their own vector_content." on vector_content for
select using (auth.uid() = user_id);
create policy "Individuals can create vector_content." on vector_content for
insert with check (auth.uid() = user_id);
create policy "Individuals can update their own vector_content." on vector_content for
update using (auth.uid() = user_id);
create policy "Individuals can delete their own vector_content." on vector_content for
delete using (auth.uid() = user_id);