Skip to content

Commit

Permalink
Getting spans into applied and identified lists appropriately (#127)
Browse files Browse the repository at this point in the history
#126 Working on conditionals being met appropriately.
  • Loading branch information
jzonthemtn authored Aug 14, 2024
1 parent bf89fed commit 4f51765
Show file tree
Hide file tree
Showing 30 changed files with 295 additions and 230 deletions.
14 changes: 10 additions & 4 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ jobs:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
packages: write
steps:
- uses: actions/checkout@v3
with:
lfs: true
lfs: true
- name: Cache Maven packages
uses: actions/cache@v1
with:
Expand All @@ -26,8 +26,14 @@ jobs:
server-id: philterd-repository-snapshots
server-username: MAVEN_USERNAME
server-password: MAVEN_PASSWORD
- name: Build and Deploy
run: mvn --batch-mode --update-snapshots test deploy
- name: Build
run: mvn --batch-mode --update-snapshots test
env:
MAVEN_USERNAME: ${{ secrets.PHILTERD_ARTIFACTS_USER }}
MAVEN_PASSWORD: ${{ secrets.PHILTERD_ARTIFACTS_TOKEN }}
- name: Deploy
if: ${{ github.ref == 'refs/heads/main' }}
run: mvn --batch-mode --update-snapshots package deploy
env:
MAVEN_USERNAME: ${{ secrets.PHILTERD_ARTIFACTS_USER }}
MAVEN_PASSWORD: ${{ secrets.PHILTERD_ARTIFACTS_TOKEN }}
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ public FilterResult filter(final Policy policy, final String context, final Stri
replacement.getReplacement(),
replacement.getSalt(),
isIgnored,
replacement.isApplied(),
window
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ public FilterResult filter(final Policy policy, final String context, final Stri
replacement.getReplacement(),
replacement.getSalt(),
isIgnored,
replacement.isApplied(),
window
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,8 @@ private Span createSpan(final Policy policy, final String context, final String
// Is this term ignored?
final boolean ignored = isIgnored(text);

return Span.make(start, end, FilterType.PERSON, context, documentId, confidence, text, replacement.getReplacement(), replacement.getSalt(), ignored, window);
return Span.make(start, end, FilterType.PERSON, context, documentId, confidence, text,
replacement.getReplacement(), replacement.getSalt(), ignored, replacement.isApplied(), window);

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,12 @@ public FilterResult filter(final Policy policy, final String context, final Stri

final String[] window = getWindow(input, match.start(), match.end());
final String classification = "";
final Replacement replacement = getReplacement(policy, context, documentId, text, window, confidence, classification, attributes, null);
final Replacement replacement = getReplacement(policy, context, documentId, text, window, confidence,
classification, attributes, null);
final boolean isIgnored = ignored.contains(text);

spans.add(Span.make(match.start(), match.end(), getFilterType(), context, documentId, confidence, text, replacement.getReplacement(), replacement.getSalt(), isIgnored, window));
spans.add(Span.make(match.start(), match.end(), getFilterType(), context, documentId, confidence,
text, replacement.getReplacement(), replacement.getSalt(), isIgnored, replacement.isApplied(), window));

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,7 @@
import java.util.List;
import java.util.Properties;

import static ai.philterd.test.phileas.services.EndToEndTestsHelper.getPdfFilterWithPersonPolicy;
import static ai.philterd.test.phileas.services.EndToEndTestsHelper.getPolicy;
import static ai.philterd.test.phileas.services.EndToEndTestsHelper.getPolicyJustCreditCard;
import static ai.philterd.test.phileas.services.EndToEndTestsHelper.getPolicyJustIdentifier;
import static ai.philterd.test.phileas.services.EndToEndTestsHelper.getPolicyJustStreetAddress;
import static ai.philterd.test.phileas.services.EndToEndTestsHelper.getPolicyWithSentiment;
import static ai.philterd.test.phileas.services.EndToEndTestsHelper.getPolicyZipCodeWithIgnored;
import static ai.philterd.test.phileas.services.EndToEndTestsHelper.getPolicyZipCodeWithIgnoredFromFile;
import static ai.philterd.test.phileas.services.EndToEndTestsHelper.*;

@Disabled("Some of these tests require a running philter-ner service")
public class EndToEndTests {
Expand Down Expand Up @@ -566,6 +559,42 @@ public void endToEnd17() throws Exception {

}

@Test
public void endToEndJustPhoneNumbers() throws Exception {

final Path temp = Files.createTempDirectory("philter");
final File file = Paths.get(temp.toFile().getAbsolutePath(), "phonenumbers.json").toFile();
LOGGER.info("Writing policy to {}", file.getAbsolutePath());
final String policy = gson.toJson(getPolicyJustPhoneNumber("phonenumbers"));
LOGGER.info(policy);
FileUtils.writeStringToFile(file, policy, Charset.defaultCharset());

Properties properties = new Properties();
properties.setProperty("indexes.directory", INDEXES_DIRECTORY);
properties.setProperty("filter.policies.directory", temp.toFile().getAbsolutePath());

final PhileasConfiguration phileasConfiguration = new PhileasConfiguration(properties);

final String input = "his number is 123-456-7890. her number is 9999999999. her number is 102-304-5678.";

final PhileasFilterService service = new PhileasFilterService(phileasConfiguration);
final FilterResponse response = service.filter(List.of("phonenumbers"), "context", "documentid", input, MimeType.TEXT_PLAIN);

LOGGER.info(response.filteredText());

LOGGER.info("Identified spans:");
showSpans(response.explanation().identifiedSpans());

LOGGER.info("Applied spans:");
showSpans(response.explanation().appliedSpans());

Assertions.assertEquals("documentid", response.documentId());
Assertions.assertEquals(1, response.explanation().appliedSpans().size());
Assertions.assertEquals(3, response.explanation().identifiedSpans().size());
Assertions.assertEquals("his number is {{{REDACTED-phone-number}}}. her number is 9999999999. her number is 102-304-5678.", response.filteredText().trim());

}

@Test
public void endToEndWithPolicyAsObject() throws Exception {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,12 +199,12 @@ public static Policy getPolicy(String policyName) throws IOException, URISyntaxE
AgeFilterStrategy ageFilterStrategy = new AgeFilterStrategy();

Age age = new Age();
age.setAgeFilterStrategies(Arrays.asList(ageFilterStrategy));
age.setAgeFilterStrategies(List.of(ageFilterStrategy));

CreditCardFilterStrategy creditCardFilterStrategy = new CreditCardFilterStrategy();

CreditCard creditCard = new CreditCard();
creditCard.setCreditCardFilterStrategies(Arrays.asList(creditCardFilterStrategy));
creditCard.setCreditCardFilterStrategies(List.of(creditCardFilterStrategy));

DateFilterStrategy dateFilterStrategy = new DateFilterStrategy();
dateFilterStrategy.setStrategy(AbstractFilterStrategy.SHIFT);
Expand All @@ -213,15 +213,15 @@ public static Policy getPolicy(String policyName) throws IOException, URISyntaxE
dateFilterStrategy.setShiftDays(1);

Date date = new Date();
date.setDateFilterStrategies(Arrays.asList(dateFilterStrategy));
date.setDateFilterStrategies(List.of(dateFilterStrategy));

EmailAddressFilterStrategy emailAddressFilterStrategy = new EmailAddressFilterStrategy();

EmailAddress emailAddress = new EmailAddress();
emailAddress.setEmailAddressFilterStrategies(Arrays.asList(emailAddressFilterStrategy));
emailAddress.setEmailAddressFilterStrategies(List.of(emailAddressFilterStrategy));

Identifier identifier1 = new Identifier();
identifier1.setIdentifierFilterStrategies(Arrays.asList(new IdentifierFilterStrategy()));
identifier1.setIdentifierFilterStrategies(List.of(new IdentifierFilterStrategy()));
identifier1.setPattern("asdfasdfasdf");
identifier1.setCaseSensitive(true);

Expand All @@ -231,12 +231,12 @@ public static Policy getPolicy(String policyName) throws IOException, URISyntaxE
Identifier identifier2 = new Identifier();
identifier2.setPattern("JEFF");
identifier2.setCaseSensitive(true);
identifier2.setIdentifierFilterStrategies(Arrays.asList(identifier2FilterStrategy));
identifier2.setIdentifierFilterStrategies(List.of(identifier2FilterStrategy));

IpAddressFilterStrategy ipAddressFilterStrategy = new IpAddressFilterStrategy();

IpAddress ipAddress = new IpAddress();
ipAddress.setIpAddressFilterStrategies(Arrays.asList(ipAddressFilterStrategy));
ipAddress.setIpAddressFilterStrategies(List.of(ipAddressFilterStrategy));

PhoneNumberFilterStrategy phoneNumberFilterStrategy = new PhoneNumberFilterStrategy();

Expand Down Expand Up @@ -271,13 +271,13 @@ public static Policy getPolicy(String policyName) throws IOException, URISyntaxE

PersonsFilterStrategy personsFilterStrategy = new PersonsFilterStrategy();

final File model = new File(EndToEndTestsHelper.class.getClassLoader().getResource("models/model.onnx").toURI());
/*final File model = new File(EndToEndTestsHelper.class.getClassLoader().getResource("models/model.onnx").toURI());
final File vocab = new File(EndToEndTestsHelper.class.getClassLoader().getResource("models/vocab.txt").toURI());
PersonV2 personV2 = new PersonV2();
personV2.setModel(model.getAbsolutePath());
personV2.setVocab(vocab.getAbsolutePath());
personV2.setPersonFilterStrategies(List.of(personsFilterStrategy));
personV2.setPersonFilterStrategies(List.of(personsFilterStrategy));*/

// ----------------------------------------------------------------------------------

Expand Down Expand Up @@ -326,7 +326,7 @@ public static Policy getPolicy(String policyName) throws IOException, URISyntaxE
identifiers.setEmailAddress(emailAddress);
identifiers.setIdentifiers(Arrays.asList(identifier1, identifier2));
identifiers.setIpAddress(ipAddress);
identifiers.setPersonV2(personV2);
//identifiers.setPersonV2(personV2);
identifiers.setPhoneNumber(phoneNumber);
identifiers.setSsn(ssn);
//identifiers.setStateAbbreviation(stateAbbreviation);
Expand Down Expand Up @@ -387,4 +387,24 @@ public static Policy getPolicyJustStreetAddress(String policyName) {

}

public static Policy getPolicyJustPhoneNumber(String policyName) {

PhoneNumberFilterStrategy phoneNumberFilterStrategy = new PhoneNumberFilterStrategy();
phoneNumberFilterStrategy.setConditions("confidence > 0.70");

PhoneNumber phoneNumber = new PhoneNumber();
phoneNumber.setPhoneNumberFilterStrategies(List.of(phoneNumberFilterStrategy));
phoneNumber.setIgnored(Set.of("102-304-5678"));

Identifiers identifiers = new Identifiers();
identifiers.setPhoneNumber(phoneNumber);

Policy policy = new Policy();
policy.setName(policyName);
policy.setIdentifiers(identifiers);

return policy;

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ public void policyWithPlaceholder() throws IOException, URISyntaxException {
ignored.setTerms(Arrays.asList("john", "jeff", "${USER}"));

final Policy policy = getPolicy("placeholder");
policy.setIgnored(Arrays.asList(ignored));
policy.setIgnored(List.of(ignored));
final String json = gson.toJson(policy);
LOGGER.info(json);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.mockito.Mockito;

Expand All @@ -36,6 +37,7 @@
import java.util.List;
import java.util.Map;

@Disabled
public class PersonsV3FilterTest extends AbstractFilterTest {

private static final Logger LOGGER = LogManager.getLogger(PersonsV3FilterTest.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public void ignored1() {
policy.setIgnoredPatterns(Arrays.asList(ignoredPattern));

final List<Span> spans = new LinkedList<>();
spans.add(Span.make(6, 10, FilterType.IDENTIFIER, "context", "docid", 0.80, "AB01", "*****", "", false, new String[0]));
spans.add(Span.make(6, 10, FilterType.IDENTIFIER, "context", "docid", 0.80, "AB01", "*****", "", false, true, new String[0]));

final IgnoredPatternsFilter ignoredPatternsFilter = new IgnoredPatternsFilter(Arrays.asList(ignoredPattern));
final List<Span> filteredSpans = ignoredPatternsFilter.filter("ID is AB01.", spans);
Expand All @@ -60,7 +60,7 @@ public void notIgnored1() {
policy.setIgnoredPatterns(Arrays.asList(ignoredPattern));

final List<Span> spans = new LinkedList<>();
spans.add(Span.make(6, 10, FilterType.IDENTIFIER, "context", "docid", 0.80, "Ab01", "*****", "", false, new String[0]));
spans.add(Span.make(6, 10, FilterType.IDENTIFIER, "context", "docid", 0.80, "Ab01", "*****", "", false, true, new String[0]));

final IgnoredPatternsFilter ignoredPatternsFilter = new IgnoredPatternsFilter(Arrays.asList(ignoredPattern));
final List<Span> filteredSpans = ignoredPatternsFilter.filter("ID is Ab01.", spans);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public void ignored() throws IOException {
policy.setIgnored(Arrays.asList(ignored));

final List<Span> spans = new LinkedList<>();
spans.add(Span.make(12, 22, FilterType.LOCATION_STATE, "context", "docid", 0.80, "test", "*****", "", false, new String[0]));
spans.add(Span.make(12, 22, FilterType.LOCATION_STATE, "context", "docid", 0.80, "test", "*****", "", false, true, new String[0]));

final IgnoredTermsFilter ignoredTermsFilter = new IgnoredTermsFilter(ignored);
final List<Span> filteredSpans = ignoredTermsFilter.filter("He lived in Washington.", spans);
Expand All @@ -58,7 +58,7 @@ public void ignoredFile1() throws IOException {
ignored.setFiles(Arrays.asList(new File("src/test/resources/ignored-terms.txt").getAbsolutePath()));

final List<Span> spans = new LinkedList<>();
spans.add(Span.make(12, 18, FilterType.IDENTIFIER, "context", "docid", 0.80, "test", "*****", "", false, new String[0]));
spans.add(Span.make(12, 18, FilterType.IDENTIFIER, "context", "docid", 0.80, "test", "*****", "", false, true, new String[0]));

final IgnoredTermsFilter ignoredTermsFilter = new IgnoredTermsFilter(ignored);
final List<Span> filteredSpans = ignoredTermsFilter.filter("He lived in samuel.", spans);
Expand All @@ -74,7 +74,7 @@ public void ignoredFile2() throws IOException {
ignored.setFiles(Arrays.asList(new File("src/test/resources/ignored-terms.txt").getAbsolutePath()));

final List<Span> spans = new LinkedList<>();
spans.add(Span.make(12, 18, FilterType.IDENTIFIER, "context", "docid", 0.80, "test", "*****", "", false, new String[0]));
spans.add(Span.make(12, 18, FilterType.IDENTIFIER, "context", "docid", 0.80, "test", "*****", "", false, true, new String[0]));

final IgnoredTermsFilter ignoredTermsFilter = new IgnoredTermsFilter(ignored);
final List<Span> filteredSpans = ignoredTermsFilter.filter("He lived in samuel.", spans);
Expand Down Expand Up @@ -102,7 +102,7 @@ public void notIgnored() throws IOException {
ignored.setTerms(Arrays.asList("Seattle", "California", "Virginia"));

final List<Span> spans = new LinkedList<>();
spans.add(Span.make(12, 22, FilterType.LOCATION_STATE, "context", "docid", 0.80, "test", "*****", "", false, new String[0]));
spans.add(Span.make(12, 22, FilterType.LOCATION_STATE, "context", "docid", 0.80, "test", "*****", "", false, true, new String[0]));

final IgnoredTermsFilter ignoredTermsFilter = new IgnoredTermsFilter(ignored);
final List<Span> filteredSpans = ignoredTermsFilter.filter("He lived in Washington.", spans);
Expand All @@ -118,7 +118,7 @@ public void caseSensitive1Test() throws IOException {
ignored.setTerms(Arrays.asList("washington", "California", "Virginia"));

final List<Span> spans = new LinkedList<>();
spans.add(Span.make(12, 22, FilterType.LOCATION_STATE, "context", "docid", 0.80, "test", "*****", "", false, new String[0]));
spans.add(Span.make(12, 22, FilterType.LOCATION_STATE, "context", "docid", 0.80, "test", "*****", "", false, true, new String[0]));

final IgnoredTermsFilter ignoredTermsFilter = new IgnoredTermsFilter(ignored);
final List<Span> filteredSpans = ignoredTermsFilter.filter("He lived in Washington.", spans);
Expand All @@ -134,7 +134,7 @@ public void caseSensitive2Test() throws IOException {
ignored.setTerms(Arrays.asList("Washington", "California", "Virginia"));

final List<Span> spans = new LinkedList<>();
spans.add(Span.make(12, 22, FilterType.LOCATION_STATE, "context", "docid", 0.80, "test", "*****", "", false, new String[0]));
spans.add(Span.make(12, 22, FilterType.LOCATION_STATE, "context", "docid", 0.80, "test", "*****", "", false, true, new String[0]));

final IgnoredTermsFilter ignoredTermsFilter = new IgnoredTermsFilter(ignored);
final List<Span> filteredSpans = ignoredTermsFilter.filter("He lived in Washington.", spans);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public class TrailingNewLinePostFilterTest extends AbstractFilterTest {
public void test1() {

final List<Span> spans = new LinkedList<>();
spans.add(Span.make(12, 22, FilterType.URL, "context", "docid", 0.80, "ends with\n", "*****", "", false, new String[0]));
spans.add(Span.make(12, 22, FilterType.URL, "context", "docid", 0.80, "ends with\n", "*****", "", false, true, new String[0]));

final TrailingNewLinePostFilter postFilter = TrailingNewLinePostFilter.getInstance();
final List<Span> filteredSpans = postFilter.filter("doesn't matter", spans);
Expand All @@ -47,7 +47,7 @@ public void test1() {
public void test2() {

final List<Span> spans = new LinkedList<>();
spans.add(Span.make(12, 22, FilterType.URL, "context", "docid", 0.80, "ends with", "*****", "", false, new String[0]));
spans.add(Span.make(12, 22, FilterType.URL, "context", "docid", 0.80, "ends with", "*****", "", false, true, new String[0]));

final TrailingNewLinePostFilter postFilter = TrailingNewLinePostFilter.getInstance();
final List<Span> filteredSpans = postFilter.filter("doesn't matter", spans);
Expand All @@ -63,7 +63,7 @@ public void test2() {
public void test3() {

final List<Span> spans = new LinkedList<>();
spans.add(Span.make(12, 22, FilterType.URL, "context", "docid", 0.80, "ends with\n\n", "*****", "", false, new String[0]));
spans.add(Span.make(12, 22, FilterType.URL, "context", "docid", 0.80, "ends with\n\n", "*****", "", false, true, new String[0]));

final TrailingNewLinePostFilter postFilter = TrailingNewLinePostFilter.getInstance();
final List<Span> filteredSpans = postFilter.filter("doesn't matter", spans);
Expand Down
Loading

0 comments on commit 4f51765

Please sign in to comment.