Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release 0.0.19 #63

Merged
merged 4 commits into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ on:
jobs:
doc_build:
runs-on: ubuntu-latest
permissions:
contents: write

steps:
- name: checkout and set up
Expand Down
329 changes: 168 additions & 161 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "type_infer"
version = "0.0.18"
version = "0.0.19"
description = "Automated type inference for Machine Learning pipelines."
authors = ["MindsDB Inc. <[email protected]>"]
license = "GPL-3.0"
Expand All @@ -19,7 +19,7 @@ psutil = "^5.9.0"
toml = "^0.10.2"

# rule based deps, part of core
langid = "^1.1.6"
py3langid = ">=0.2.2,<0.3"
nltk = "^3"

[build-system]
Expand Down
8 changes: 8 additions & 0 deletions tests/unit_tests/test_helpers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import unittest

from type_infer import helpers
from type_infer.rule_based.helpers import get_language_dist


class TestCastStringToPythonType(unittest.TestCase):
Expand Down Expand Up @@ -32,3 +33,10 @@ def test_nan_is_numeric(self):
def test_inf_is_numeric(self):
self.assertTrue(helpers.is_nan_numeric('inf'))
self.assertTrue(helpers.is_nan_numeric(float('inf')))


class TestLangid(unittest.TestCase):
def test_get_language_dist(self):
dist = get_language_dist(['This is clearly English', 'Y esto es claramente español'])
self.assertTrue('en' in dist and dist['en'] == 1)
self.assertTrue('es' in dist and dist['es'] == 1)
2 changes: 1 addition & 1 deletion type_infer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from type_infer import api
from type_infer import helpers

__version__ = '0.0.18'
__version__ = '0.0.19'


__all__ = [
Expand Down
6 changes: 3 additions & 3 deletions type_infer/rule_based/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@

import numpy as np
import scipy.stats as st
from langid.langid import LanguageIdentifier
from langid.langid import model as langid_model
from py3langid.langid import LanguageIdentifier
from py3langid.langid import MODEL_FILE as LANGID_MODEL_FILE

from type_infer.dtype import dtype

Expand Down Expand Up @@ -109,7 +109,7 @@ def get_language_dist(data):
lang_dist = defaultdict(lambda: 0)
lang_dist['Unknown'] = 0
lang_probs_cache = dict()
identifier = LanguageIdentifier.from_modelstring(langid_model, norm_probs=True)
identifier = LanguageIdentifier.from_pickled_model(LANGID_MODEL_FILE, norm_probs=True)
for text in data:
text = str(text)
text = text.translate(str.maketrans('', '', string.punctuation))
Expand Down
Loading