Skip to content

Commit

Permalink
Merge pull request #30 from sahajsoft/poetry-improvements
Browse files Browse the repository at this point in the history
Poetry improvements
  • Loading branch information
akshaykarle authored Oct 17, 2024
2 parents ef60f43 + 30fa53a commit fbd891f
Show file tree
Hide file tree
Showing 21 changed files with 2,154 additions and 2,015 deletions.
40 changes: 13 additions & 27 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: "Test and Build"
name: Test

on:
push:
Expand All @@ -9,30 +9,16 @@ on:
- main

jobs:
build:
runs-on: ubuntu-latest
test:
runs-on: ubuntu-22.04
permissions:
id-token: "write"
contents: "read"
steps:
- uses: actions/checkout@v4

- uses: actions/setup-python@v5
with:
python-version: "3.11"

- name: "Setup poetry"
uses: abatilo/actions-poetry@v2

- name: "Load cached venv"
id: cached-poetry-dependencies
uses: actions/cache@v4
with:
path: .venv
key: venv-${{ runner.os }}-${{ hashFiles('**/flake.lock') }}-${{ hashFiles('**/poetry.lock') }}

- name: "Install python dependencies"
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction --no-root
shell: bash

- name: "Test"
run: 'poetry run pytest'
shell: bash
- uses: actions/checkout@v4
- uses: DeterminateSystems/nix-installer-action@main
- uses: DeterminateSystems/magic-nix-cache-action@main
- uses: DeterminateSystems/flake-checker-action@main
- name: Run tests
run: pytest
shell: nix develop --command bash -e {0}
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ To run the CLI locally, run any of the following commands:

```sh
# alias for easier calls
alias pii='poetry run python src/cli.py'
alias pii='poetry run python -m pii_detection_and_anonymizer'
# alias for docker
alias pii=docker run --rm -i ghcr.io/sahajsoft/pii

# text
echo "My name is Don Stark and my phone number is 212-555-5555" | pii analyze
echo "My name is Don Stark and my phone number is 212-555-5555" | pii analyze
echo "My name is Don Stark and my phone number is 212-555-5555" | pii analyze | pii anonymize

# text files
Expand All @@ -49,6 +49,7 @@ cat sample.csv | pii analyze --csv | pii anonymize | jq -r '.text'
cat sample.csv | pii analyze --csv | pii anonymize | jq -r '.text' > anonymized.csv

# vault integration
# NOTE: this won't work from docker unless you mount ports from the vault server into docker. Make sure you run either do that or run using poetry
./vault.sh # start and configure vault server and transit secret engine keys
echo "My name is Don Stark and my phone number is 212-555-5555" | pii anonymize --vaulturl "http://127.0.0.1:8200" --vaultkey "orders"

Expand All @@ -69,6 +70,8 @@ Run `./setup.sh` to install all dependencies. This will install [direnv](https:/

Alternatively, make sure you have [python 3.11](https://www.python.org/downloads/) and [poetry](https://python-poetry.org/docs/#installation) setup on your machine.

Install [vault](https://developer.hashicorp.com/vault/install) if you are planning to use vault for anonymization/deanonymization.

### Running the app

To get started, run the following:
Expand Down
71 changes: 20 additions & 51 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

108 changes: 76 additions & 32 deletions flake.nix
Original file line number Diff line number Diff line change
@@ -1,53 +1,97 @@
{
description = "Application packaged using poetry2nix";

inputs = {
flake-utils.url = "github:numtide/flake-utils";
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.05-small";
poetry2nix = {
url = "github:nix-community/poetry2nix";
inputs.nixpkgs.follows = "nixpkgs";
inputs.flake-utils.follows = "flake-utils";
};
};

outputs = { self, nixpkgs, flake-utils, poetry2nix }:
flake-utils.lib.eachDefaultSystem (system:
outputs =
{
self,
nixpkgs,
flake-utils,
poetry2nix,
}:
flake-utils.lib.eachDefaultSystem (
system:
let

pypkgs-build-requirements = {
conllu = [ "setuptools" ];
janome = [ "setuptools" ];
pptree = [ "setuptools" ];
confection = [ "setuptools" ];
ftfy = [ "hatchling" ];
segtok = [ "setuptools" ];
wikipedia-api = [ "setuptools" ];
presidio-vault = [ "poetry" ];
safetensors = [ "maturin" ];
};
p2n-overrides = pkgs.poetry2nix.defaultPoetryOverrides.extend (
final: prev:
builtins.mapAttrs (
package: build-requirements:
(builtins.getAttr package prev).overridePythonAttrs (old: {
buildInputs =
(old.buildInputs or [ ])
++ (builtins.map (
pkg: if builtins.isString pkg then builtins.getAttr pkg prev else pkg
) build-requirements);
})
) pypkgs-build-requirements
);
myapp =
{ poetry2nix, lib }:
poetry2nix.mkPoetryApplication {
projectDir = self;
overrides = p2n-overrides;
preferWheels = true;
};
pkgs = import nixpkgs {
inherit system;
config.allowUnfree = true; # needed for vault
config.allowUnfree = true;
overlays = [
poetry2nix.overlays.default
(final: _: { myapp = final.callPackage myapp { }; })
];
};
nativeBuildInputs = with pkgs; [
stdenv
python311
poetry
zlib
tesseract
];
buildInputs = with pkgs; [ vault jq ];
in
{
packages.default = pkgs.myapp;

# see https://github.com/nix-community/poetry2nix/tree/master#api for more functions and examples.
inherit (poetry2nix.lib.mkPoetry2Nix { inherit pkgs; })
mkPoetryApplication;
in {
inherit nativeBuildInputs buildInputs;

packages = {
myapp = mkPoetryApplication {
projectDir = self;
python = pkgs.python311;
};
default = self.packages.${system}.myapp;
apps.default = {
type = "app";
program = "${pkgs.myapp}/bin/cli";
};

devShells = {
# Shell for app dependencies.
#
# nix develop
#
# Use this shell for developing your app.
default = pkgs.mkShell {
packages = nativeBuildInputs ++ buildInputs;
LD_LIBRARY_PATH = if pkgs.stdenv.isLinux then
"${
pkgs.lib.makeLibraryPath nativeBuildInputs
}:${pkgs.stdenv.cc.cc.lib}/lib:/run/opengl-driver/lib:/run/opengl-driver-32/lib"
else
"$LD_LIBRARY_PATH";
inputsFrom = [ pkgs.myapp ];
packages = [
pkgs.vault
pkgs.jq
];
};

# Shell for poetry.
#
# nix develop .#poetry
#
# Use this shell for changes to pyproject.toml and poetry.lock.
poetry = pkgs.mkShell { packages = [ pkgs.poetry ]; };
};
});
legacyPackages = pkgs;
}
);
}
File renamed without changes.
4 changes: 4 additions & 0 deletions pii_detection_and_anonymizer/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from pii_detection_and_anonymizer.cli import main

if __name__ == "__main__":
main()
File renamed without changes.
File renamed without changes.
8 changes: 4 additions & 4 deletions src/app.py → pii_detection_and_anonymizer/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@
import os
import uuid
from typing import Tuple
import csv

from flask import Flask, request, jsonify, Response, send_file

import csv
from analyzer_engine.csv_analyzer_engine import CSVAnalyzerEngine
from presidio_analyzer import AnalyzerEngine, DictAnalyzerResult, RecognizerResult
from presidio_anonymizer import AnonymizerEngine, BatchAnonymizerEngine
from config.nlp_engine_config import FlairNLPEngine
from presidio_vault.vault import Vault

from pii_detection_and_anonymizer.analyzer_engine.csv_analyzer_engine import CSVAnalyzerEngine
from pii_detection_and_anonymizer.config.nlp_engine_config import FlairNLPEngine

DEFAULT_PORT = "3000"
NLP_ENGINE = "flair/ner-english-large"
UPLOAD_DIR = "file_uploads"
Expand Down
14 changes: 6 additions & 8 deletions src/cli.py → pii_detection_and_anonymizer/cli.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
import argparse
import json
import logging
import sys

from presidio_analyzer import RecognizerResult
from presidio_analyzer.analyzer_engine import AnalyzerEngine
from presidio_anonymizer.entities.engine.result.operator_result import OperatorResult
from analyzer_engine.csv_analyzer_engine import CSVAnalyzerEngine
from presidio_anonymizer import AnonymizerEngine, BatchAnonymizerEngine
from config.nlp_engine_config import FlairNLPEngine
from presidio_vault.vault import Vault
import sys
import logging

from pii_detection_and_anonymizer.analyzer_engine.csv_analyzer_engine import CSVAnalyzerEngine
from pii_detection_and_anonymizer.config.nlp_engine_config import FlairNLPEngine


NLP_ENGINE = "flair/ner-english-large"

Expand Down Expand Up @@ -131,7 +133,3 @@ def main():

args = parser.parse_args()
args.func(args)


if __name__ == "__main__":
main()
File renamed without changes.
Loading

0 comments on commit fbd891f

Please sign in to comment.