Skip to content

Commit

Permalink
start working on a simple vault integration
Browse files Browse the repository at this point in the history
  • Loading branch information
akshaykarle committed Sep 18, 2024
1 parent 1cbcfe2 commit 2d4da8a
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 14 deletions.
22 changes: 20 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ presidio-anonymizer = "^2.2.354"
presidio-analyzer = {version = "^2.2.354", extras = ["transformers", "stanza"]}
pytest = "^8.2.1"
flask = "^3.0.3"
hvac = "^2.3.0"


[build-system]
Expand Down
30 changes: 18 additions & 12 deletions src/cli.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,19 @@
import argparse
from presidio_analyzer.analyzer_engine import AnalyzerEngine
from presidio_anonymizer.anonymizer_engine import AnonymizerEngine
from analyzer_engine.csv_analyzer_engine import CSVAnalyzerEngine
from text.text import text_analyzer, text_anonymizer
from presidio_anonymizer import BatchAnonymizerEngine
from config.nlp_engine_config import FlairNLPEngine

NLP_ENGINE = "flair/ner-english-large"


def analyze(args):
nlp_engine = FlairNLPEngine(NLP_ENGINE)
analyzer_results = None

if args.text:
nlp_engine, registry = nlp_engine.create_nlp_engine()
engine = AnalyzerEngine(registry=registry, nlp_engine=nlp_engine)

analyzer_results = engine.analyze(
text=args.text,
language=args.language
)
analyzer_results = text_analyzer(args.text, args.language)
else:
nlp_engine = FlairNLPEngine(NLP_ENGINE)
engine = CSVAnalyzerEngine(nlp_engine)

analyzer_results = engine.analyze_csv(
Expand All @@ -37,8 +30,7 @@ def anonymize(args):
anonymized_results = None

if args.text:
anonymizer = AnonymizerEngine()
anonymized_results = anonymizer.anonymize(args.text, analyzer_results)
anonymized_results = text_anonymizer(args.text, analyzer_results)
else:
anonymizer = BatchAnonymizerEngine()
anonymized_results = anonymizer.anonymize_dict(analyzer_results)
Expand Down Expand Up @@ -69,3 +61,17 @@ def main():

if __name__ == '__main__':
main()


# vault test:
from presidio_anonymizer.entities import OperatorConfig
def vault_encrypt(text):
return text + "x"

operators = {"DEFAULT": OperatorConfig("custom", {"lambda": vault_encrypt})}

t = "Hi my name is Qwerty and I live in London. My number is 07440 123456."
res = text_analyzer(t, "en")
anon_res = text_anonymizer(t, res, operators)

print(anon_res)
Empty file added src/text/__init__.py
Empty file.
21 changes: 21 additions & 0 deletions src/text/text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from typing import Optional
from presidio_analyzer.analyzer_engine import AnalyzerEngine
from presidio_anonymizer.anonymizer_engine import AnonymizerEngine
from config.nlp_engine_config import FlairNLPEngine

NLP_ENGINE = "flair/ner-english-large"

def text_analyzer(text, language):
nlp_engine = FlairNLPEngine(NLP_ENGINE)
nlp_engine, registry = nlp_engine.create_nlp_engine()
engine = AnalyzerEngine(registry=registry, nlp_engine=nlp_engine)

return engine.analyze(
text=text,
language=language
)


def text_anonymizer(text: str, analyzer_results, operators: Optional[dict] = None):
anonymizer = AnonymizerEngine()
return anonymizer.anonymize(text, analyzer_results, operators)
43 changes: 43 additions & 0 deletions src/vault.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from text.text import text_analyzer, text_anonymizer
from presidio_anonymizer.entities import OperatorConfig
import base64
import hvac
import sys


def base64ify(bytes_or_str):
"""Helper method to perform base64 encoding across Python 2.7 and Python 3.X"""
if isinstance(bytes_or_str, str):
input_bytes = bytes_or_str.encode('utf8')
else:
input_bytes = bytes_or_str

output_bytes = base64.urlsafe_b64encode(input_bytes)
return output_bytes.decode('ascii')

VAULT_URL = "https://127.0.0.1:8200"

def vault_encrypt(text):
print(f'plaintext is: {text} and {text.__class__}')
print(f'b64 plaintext is: {base64.b64encode(text.encode())}')
client = hvac.Client(url=VAULT_URL)

encrypt_data_response = client.secrets.transit.encrypt_data(
name='orders',
plaintext=base64ify(text.encode()),
)
print(f"Response: {encrypt_data_response}")

ciphertext = encrypt_data_response['data']['ciphertext']
print(f'Encrypted plaintext ciphertext is: {ciphertext}')
return ciphertext


vault_encrypt("PII")

# operators = {"DEFAULT": OperatorConfig("custom", {"lambda": vault_encrypt})}
# t = "Hi my name is Qwerty and I live in London. My number is 07440 123456."
# res = text_analyzer(t, "en")
# anon_res = text_anonymizer(t, res, operators)

# print(anon_res)

0 comments on commit 2d4da8a

Please sign in to comment.