forked from timescale/pgai
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextract_entities.sql
88 lines (81 loc) · 3.07 KB
/
extract_entities.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
-- Extract entities example: https://github.com/anthropics/anthropic-cookbook/tree/main/tool_use
\getenv anthropic_api_key ANTHROPIC_API_KEY
CREATE OR REPLACE FUNCTION public.detect_entities(input_text text)
RETURNS TABLE(entity_name text, entity_type text, entity_context text)
AS $$
DECLARE
api_response jsonb;
entities_json jsonb;
BEGIN
SELECT ai.anthropic_generate(
'claude-3-5-sonnet-20240620',
jsonb_build_array(
jsonb_build_object(
'role', 'user',
'content', input_text
)
),
_max_tokens => 4096,
_tools => jsonb_build_array(
jsonb_build_object(
'name', 'print_entities',
'description', 'Prints extract named entities.',
'input_schema', jsonb_build_object(
'type', 'object',
'properties', jsonb_build_object(
'entities', jsonb_build_object(
'type', 'array',
'items', jsonb_build_object(
'type', 'object',
'properties', jsonb_build_object(
'name', jsonb_build_object('type', 'string', 'description', 'The extracted entity name.'),
'type', jsonb_build_object('type', 'string', 'description', 'The entity type (e.g., PERSON, ORGANIZATION, LOCATION).'),
'context', jsonb_build_object('type', 'string', 'description', 'The context in which the entity appears in the text.')
),
'required', jsonb_build_array('name', 'type', 'context')
)
)
),
'required', jsonb_build_array('entities')
)
)
)
) INTO api_response;
entities_json := jsonb_extract_path_text(api_response::jsonb, 'content', '1', 'input', 'entities')::jsonb;
RETURN QUERY
SELECT
e->>'name' AS entity_name,
e->>'type' AS entity_type,
e->>'context' AS entity_context
FROM jsonb_array_elements(entities_json) AS e;
EXCEPTION
WHEN OTHERS THEN
RAISE NOTICE 'An error occurred: %', SQLERRM;
RAISE NOTICE 'API Response: %', api_response;
RETURN;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION public.anonymize_text(input_text text)
RETURNS text
AS $$
DECLARE
entity record;
anonymized text := input_text;
BEGIN
-- Replace entities with their types, starting with the longest entities
FOR entity IN (
SELECT entity_name, entity_type
FROM public.detect_entities(input_text)
ORDER BY length(entity_name) DESC
)
LOOP
anonymized := regexp_replace(
anonymized,
'\m' || regexp_replace(entity.entity_name, '([().\\*+?])', '\\\1', 'g') || '\M',
':' || entity.entity_type || ':',
'gi'
);
END LOOP;
RETURN anonymized;
END;
$$ LANGUAGE plpgsql;