From 25c464c399ae0f3e929f7ef1279e988bbae159b8 Mon Sep 17 00:00:00 2001 From: Weitang Li Date: Sat, 14 Oct 2017 11:58:53 +0800 Subject: [PATCH 1/6] python3 support --- .gitignore | 1 + MANIFEST.in | 2 +- README.md | 13 +++++++++ chembl_ikey/ikey.py | 31 +++++++-------------- chembl_ikey/ikey_base26.py | 55 +++++++++++++++----------------------- chembl_ikey/tests.py | 8 +++--- setup.py | 2 +- 7 files changed, 52 insertions(+), 60 deletions(-) create mode 100644 README.md diff --git a/.gitignore b/.gitignore index ded6067..d19baf2 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,4 @@ nosetests.xml .mr.developer.cfg .project .pydevproject +.idea/ \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in index 9c8317c..689e50f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,2 @@ include LICENSE -include README.rst \ No newline at end of file +include README.md \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..158f82c --- /dev/null +++ b/README.md @@ -0,0 +1,13 @@ +# chembl_ikey + +This is pure python implementation of InChiKey generation algorithm based on the original C code. + +This is a improved version of the chembl_ikey package developed at Chembl group, EMBL-EBI, Cambridge, UK. + +The original version is only compatible with python2. And this version could run with python2 and python3. + +## How to use + +1. Download and unzip (or clone) the package ++ Install the package by `pip install setup.py` ++ diff --git a/chembl_ikey/ikey.py b/chembl_ikey/ikey.py index a7abf41..c01d8c6 100644 --- a/chembl_ikey/ikey.py +++ b/chembl_ikey/ikey.py @@ -1,29 +1,23 @@ -__author__ = 'mnowotka' - +#from __future__ import absolute_import import hashlib -from chembl_ikey import ikey_base26 -from ikey_base26 import base26_triplet_1 -from ikey_base26 import base26_triplet_2 -from ikey_base26 import base26_triplet_3 -from ikey_base26 import base26_triplet_4 -from ikey_base26 import base26_dublet_for_bits_56_to_64 -from ikey_base26 import base26_dublet_for_bits_28_to_36 +from .ikey_base26 import base26_triplet_1, base26_triplet_2, base26_triplet_3, base26_triplet_4, \ + base26_dublet_for_bits_56_to_64, base26_dublet_for_bits_28_to_36 -#----------------------------------------------------------------------------------------------------------------------- +__author__ = 'mnowotka' INCHI_STRING_PREFIX = "InChI=" LEN_INCHI_STRING_PREFIX = len(INCHI_STRING_PREFIX) -#----------------------------------------------------------------------------------------------------------------------- def get_sha256(text): hash = hashlib.sha256() - hash.update(text) - return hash.digest() + hash.update(text.encode('ascii')) + digest = hash.digest() + digest_bytes_list = [ord(digest_byte) for digest_byte in digest] if isinstance(digest, str) else list(digest) + return digest_bytes_list -#----------------------------------------------------------------------------------------------------------------------- -def inchiToInchiKey(szINCHISource): +def inchi_to_inchikey(szINCHISource): flagstd = 'S' flagnonstd = 'N' @@ -118,9 +112,4 @@ def inchiToInchiKey(szINCHISource): base26_triplet_4(digest_major) + base26_dublet_for_bits_56_to_64(digest_major) minor = base26_triplet_1(digest_minor) + base26_triplet_2(digest_minor) + \ base26_dublet_for_bits_28_to_36(digest_minor) - return "%s-%s%s%s-%s" % (major, minor, flag, flagver, flagproto) - - -#----------------------------------------------------------------------------------------------------------------------- - - + return "%s-%s%s%s-%s" % (major, minor, flag, flagver, flagproto) \ No newline at end of file diff --git a/chembl_ikey/ikey_base26.py b/chembl_ikey/ikey_base26.py index fbe71c7..09f8a9e 100644 --- a/chembl_ikey/ikey_base26.py +++ b/chembl_ikey/ikey_base26.py @@ -1,64 +1,53 @@ -__author__ = 'mnowotka' - from itertools import product from string import ascii_uppercase -#----------------------------------------------------------------------------------------------------------------------- +__author__ = 'mnowotka' -t26 = [ t for t in map(''.join, product(ascii_uppercase, repeat=3)) if t[0]!='E' and (t < 'TAA' or t > 'TTV')] -d26 = map(''.join, product(ascii_uppercase, repeat=2)) +t26 = [t for t in map(''.join, product(ascii_uppercase, repeat=3)) if t[0] != 'E' and (t < 'TAA' or t > 'TTV')] +d26 = list(map(''.join, product(ascii_uppercase, repeat=2))) -#----------------------------------------------------------------------------------------------------------------------- def base26_triplet_1(a): - b0 = ord(a[0]) - b1 = ord(a[1]) & 0x3f + b0 = a[0] + b1 = a[1] & 0x3f h = b0 | b1 << 8 return t26[h] -#----------------------------------------------------------------------------------------------------------------------- def base26_triplet_2(a): - b0 = ord(a[1]) & 0xc0 - b1 = ord(a[2]) - b2 = ord(a[3]) & 0x0f - h = (b0 | b1 << 8 | b2 << 16) >> 6 + b0 = a[1] & 0xc0 + b1 = a[2] + b2 = a[3] & 0x0f + h = (b0 | b1 << 8 | b2 << 16) >> 6 return t26[h] -#----------------------------------------------------------------------------------------------------------------------- def base26_triplet_3(a): - b0 = ord(a[3]) & 0xf0 - b1 = ord(a[4]) - b2 = ord(a[5]) & 0x03 - h = (b0 | b1 << 8 | b2 << 16) >> 4 + b0 = a[3] & 0xf0 + b1 = a[4] + b2 = a[5] & 0x03 + h = (b0 | b1 << 8 | b2 << 16) >> 4 return t26[h] -#----------------------------------------------------------------------------------------------------------------------- def base26_triplet_4(a): - b0 = ord(a[5]) & 0xfc - b1 = ord(a[6]) - h = (b0 | b1 << 8) >> 2 + b0 = a[5] & 0xfc + b1 = a[6] + h = (b0 | b1 << 8) >> 2 return t26[h] -#----------------------------------------------------------------------------------------------------------------------- def base26_dublet_for_bits_28_to_36(a): - b0 = ord(a[3]) & 0xf0 - b1 = ord(a[4]) & 0x1f - h = (b0 | b1 << 8) >> 4 + b0 = a[3] & 0xf0 + b1 = a[4] & 0x1f + h = (b0 | b1 << 8) >> 4 return d26[h] -#----------------------------------------------------------------------------------------------------------------------- def base26_dublet_for_bits_56_to_64(a): - b0 = ord(a[7]) - b1 = ord(a[8]) & 0x01 - h = b0 | b1 << 8 + b0 = a[7] + b1 = a[8] & 0x01 + h = b0 | b1 << 8 return d26[h] -#----------------------------------------------------------------------------------------------------------------------- - - diff --git a/chembl_ikey/tests.py b/chembl_ikey/tests.py index 4796990..725be96 100644 --- a/chembl_ikey/tests.py +++ b/chembl_ikey/tests.py @@ -1,11 +1,11 @@ -__author__ = 'mnowotka' - import unittest -from chembl_ikey.ikey import inchiToInchiKey +from chembl_ikey.ikey import inchi_to_inchikey + +__author__ = 'mnowotka' class TestIKey(unittest.TestCase): def test_morphineInChIKey(self): - key = inchiToInchiKey("InChI=1S/C17H19NO3/c1-18-7-6-17-10-3-5-13(20)16(17)21-15-12(19)4-2-9(14(15)17)8-11(10)18/h2-5,10-11,13,16,19-20H,6-8H2,1H3/t10-,11+,13-,16-,17-/m0/s1") + key = inchi_to_inchikey("InChI=1S/C17H19NO3/c1-18-7-6-17-10-3-5-13(20)16(17)21-15-12(19)4-2-9(14(15)17)8-11(10)18/h2-5,10-11,13,16,19-20H,6-8H2,1H3/t10-,11+,13-,16-,17-/m0/s1") self.assertEqual(key,'BQJCRHHNABKAKU-KBQPJGBKSA-N') diff --git a/setup.py b/setup.py index b35c15e..187d18c 100755 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ url='https://www.ebi.ac.uk/chembl/', license='CC BY-SA 3.0', packages=['chembl_ikey'], - long_description=open('README.rst').read(), + long_description=open('README.md').read(), include_package_data=False, classifiers=['Development Status :: 2 - Pre-Alpha', 'Intended Audience :: Developers', From be089cb6b3adbd13d3f5150ed4d19c6d8693bf76 Mon Sep 17 00:00:00 2001 From: Weitang Li Date: Sat, 14 Oct 2017 12:08:59 +0800 Subject: [PATCH 2/6] Delete README.rst --- README.rst | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 README.rst diff --git a/README.rst b/README.rst deleted file mode 100644 index 3139805..0000000 --- a/README.rst +++ /dev/null @@ -1,6 +0,0 @@ -chembl_ikey -====== - -This is chembl_ikey package developed at Chembl group, EMBL-EBI, Cambridge, UK. - -This is pure python implementation of InChiKey generation algorithm based on the original C code. \ No newline at end of file From 640edb253f9bc80a86cc54facb12b4a3872fbddb Mon Sep 17 00:00:00 2001 From: Weitang Li Date: Sat, 14 Oct 2017 12:08:04 +0800 Subject: [PATCH 3/6] update readme.md --- README.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 158f82c..4bc5918 100644 --- a/README.md +++ b/README.md @@ -4,10 +4,13 @@ This is pure python implementation of InChiKey generation algorithm based on the This is a improved version of the chembl_ikey package developed at Chembl group, EMBL-EBI, Cambridge, UK. -The original version is only compatible with python2. And this version could run with python2 and python3. +The original version is only compatible with python2, and this version could run with python2 or python3. ## How to use 1. Download and unzip (or clone) the package -+ Install the package by `pip install setup.py` -+ ++ Install the package by using `pip install setup.py` ++ Import the interface by using `from chembl_ikey.ikey import inchi_to_inchikey` ++ Convert inchi to inchikey by calling `inchi_to_inchikey(inchi)` + +A sample of how to use the code could be found in chembl_ikey/test.py From 1dbb0a737c246d89b8ae38b3c2d740d599f3506d Mon Sep 17 00:00:00 2001 From: Weitang Li Date: Sat, 14 Oct 2017 12:10:50 +0800 Subject: [PATCH 4/6] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 4bc5918..e312dfe 100644 --- a/README.md +++ b/README.md @@ -9,8 +9,8 @@ The original version is only compatible with python2, and this version could run ## How to use 1. Download and unzip (or clone) the package -+ Install the package by using `pip install setup.py` -+ Import the interface by using `from chembl_ikey.ikey import inchi_to_inchikey` -+ Convert inchi to inchikey by calling `inchi_to_inchikey(inchi)` +2. Install the package by using `pip install setup.py` +3. Import the interface by using `from chembl_ikey.ikey import inchi_to_inchikey` +4. Convert inchi to inchikey by calling `inchi_to_inchikey(inchi)` A sample of how to use the code could be found in chembl_ikey/test.py From 4bf9b9700f1662b3bb1489c9b851b748eaf898e0 Mon Sep 17 00:00:00 2001 From: Weitang Li Date: Sat, 14 Oct 2017 15:24:34 +0800 Subject: [PATCH 5/6] bug fix fix bug when sminor does not exist --- README.md | 4 ++-- chembl_ikey/__init__.py | 4 +++- chembl_ikey/ikey.py | 11 +++++++---- chembl_ikey/ikey_base26.py | 2 +- chembl_ikey/tests.py | 4 ++-- setup.py | 6 +++--- 6 files changed, 18 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index e312dfe..6db5e10 100644 --- a/README.md +++ b/README.md @@ -4,13 +4,13 @@ This is pure python implementation of InChiKey generation algorithm based on the This is a improved version of the chembl_ikey package developed at Chembl group, EMBL-EBI, Cambridge, UK. -The original version is only compatible with python2, and this version could run with python2 or python3. +The original version is only compatible with python2, and this version could run with python2 or python3. Some bugs are also fixed. ## How to use 1. Download and unzip (or clone) the package 2. Install the package by using `pip install setup.py` -3. Import the interface by using `from chembl_ikey.ikey import inchi_to_inchikey` +3. Import the interface by using `from chembl_ikey import inchi_to_inchikey` 4. Convert inchi to inchikey by calling `inchi_to_inchikey(inchi)` A sample of how to use the code could be found in chembl_ikey/test.py diff --git a/chembl_ikey/__init__.py b/chembl_ikey/__init__.py index d731da3..821916c 100644 --- a/chembl_ikey/__init__.py +++ b/chembl_ikey/__init__.py @@ -1 +1,3 @@ -__author__ = 'mnowotka' +__author__ = 'mnowotka, liwt31' + +from .ikey import inchi_to_inchikey diff --git a/chembl_ikey/ikey.py b/chembl_ikey/ikey.py index c01d8c6..7ffd59d 100644 --- a/chembl_ikey/ikey.py +++ b/chembl_ikey/ikey.py @@ -1,9 +1,8 @@ -#from __future__ import absolute_import import hashlib from .ikey_base26 import base26_triplet_1, base26_triplet_2, base26_triplet_3, base26_triplet_4, \ base26_dublet_for_bits_56_to_64, base26_dublet_for_bits_28_to_36 -__author__ = 'mnowotka' +__author__ = 'mnowotka, liwt31' INCHI_STRING_PREFIX = "InChI=" LEN_INCHI_STRING_PREFIX = len(INCHI_STRING_PREFIX) @@ -73,9 +72,11 @@ def inchi_to_inchikey(szINCHISource): return None end = idx break + else: + end = slen - if end == (slen - 1): - end += 1 + #if end == (slen - 1): + # end += 1 if not proto: smajor = aux[:end] @@ -112,4 +113,6 @@ def inchi_to_inchikey(szINCHISource): base26_triplet_4(digest_major) + base26_dublet_for_bits_56_to_64(digest_major) minor = base26_triplet_1(digest_minor) + base26_triplet_2(digest_minor) + \ base26_dublet_for_bits_28_to_36(digest_minor) + if szINCHISource == 'InChI=1S/C23H24N6O5S2/c1-34-27-16(14-11-36-23(24)25-14)19(30)26-17-20(31)29-18(22(32)33)13(10-35-21(17)29)9-28-8-4-6-12-5-2-3-7-15(12)28/h4,6,8,11,17,21H,2-3,5,7,9-10H2,1H3,(H3-,24,25,26,30,32,33)': + pass return "%s-%s%s%s-%s" % (major, minor, flag, flagver, flagproto) \ No newline at end of file diff --git a/chembl_ikey/ikey_base26.py b/chembl_ikey/ikey_base26.py index 09f8a9e..fab68dc 100644 --- a/chembl_ikey/ikey_base26.py +++ b/chembl_ikey/ikey_base26.py @@ -1,7 +1,7 @@ from itertools import product from string import ascii_uppercase -__author__ = 'mnowotka' +__author__ = 'mnowotka, liwt31' t26 = [t for t in map(''.join, product(ascii_uppercase, repeat=3)) if t[0] != 'E' and (t < 'TAA' or t > 'TTV')] d26 = list(map(''.join, product(ascii_uppercase, repeat=2))) diff --git a/chembl_ikey/tests.py b/chembl_ikey/tests.py index 725be96..5317f9e 100644 --- a/chembl_ikey/tests.py +++ b/chembl_ikey/tests.py @@ -1,7 +1,7 @@ import unittest -from chembl_ikey.ikey import inchi_to_inchikey +from chembl_ikey import inchi_to_inchikey -__author__ = 'mnowotka' +__author__ = 'mnowotka, weitangli' class TestIKey(unittest.TestCase): def test_morphineInChIKey(self): diff --git a/setup.py b/setup.py index 187d18c..b9c5401 100755 --- a/setup.py +++ b/setup.py @@ -12,9 +12,9 @@ setup( name='chembl_ikey', - version='0.0.1', - author='Michal Nowotka', - author_email='mnowotka@ebi.ac.uk', + version='0.0.2', + author='Michal Nowotka, Weitang Li', + author_email='mnowotka@ebi.ac.uk, liwt31@163.com', description='Pure python implementation of InChiKey generation algorithm based on the original C code', url='https://www.ebi.ac.uk/chembl/', license='CC BY-SA 3.0', From 43369daf1493b8b9127913e6510357619a065d40 Mon Sep 17 00:00:00 2001 From: Weitang Li Date: Sat, 14 Oct 2017 15:25:44 +0800 Subject: [PATCH 6/6] delete unnecessary debug --- chembl_ikey/ikey.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/chembl_ikey/ikey.py b/chembl_ikey/ikey.py index 7ffd59d..87c1132 100644 --- a/chembl_ikey/ikey.py +++ b/chembl_ikey/ikey.py @@ -113,6 +113,4 @@ def inchi_to_inchikey(szINCHISource): base26_triplet_4(digest_major) + base26_dublet_for_bits_56_to_64(digest_major) minor = base26_triplet_1(digest_minor) + base26_triplet_2(digest_minor) + \ base26_dublet_for_bits_28_to_36(digest_minor) - if szINCHISource == 'InChI=1S/C23H24N6O5S2/c1-34-27-16(14-11-36-23(24)25-14)19(30)26-17-20(31)29-18(22(32)33)13(10-35-21(17)29)9-28-8-4-6-12-5-2-3-7-15(12)28/h4,6,8,11,17,21H,2-3,5,7,9-10H2,1H3,(H3-,24,25,26,30,32,33)': - pass return "%s-%s%s%s-%s" % (major, minor, flag, flagver, flagproto) \ No newline at end of file