Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python 3 support #2

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,4 @@ nosetests.xml
.mr.developer.cfg
.project
.pydevproject
.idea/
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
include LICENSE
include README.rst
include README.md
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# chembl_ikey

This is pure python implementation of InChiKey generation algorithm based on the original C code.

This is a improved version of the chembl_ikey package developed at Chembl group, EMBL-EBI, Cambridge, UK.

The original version is only compatible with python2, and this version could run with python2 or python3. Some bugs are also fixed.

## How to use

1. Download and unzip (or clone) the package
2. Install the package by using `pip install setup.py`
3. Import the interface by using `from chembl_ikey import inchi_to_inchikey`
4. Convert inchi to inchikey by calling `inchi_to_inchikey(inchi)`

A sample of how to use the code could be found in chembl_ikey/test.py
6 changes: 0 additions & 6 deletions README.rst

This file was deleted.

4 changes: 3 additions & 1 deletion chembl_ikey/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
__author__ = 'mnowotka'
__author__ = 'mnowotka, liwt31'

from .ikey import inchi_to_inchikey
36 changes: 13 additions & 23 deletions chembl_ikey/ikey.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,22 @@
__author__ = 'mnowotka'

import hashlib
from chembl_ikey import ikey_base26
from ikey_base26 import base26_triplet_1
from ikey_base26 import base26_triplet_2
from ikey_base26 import base26_triplet_3
from ikey_base26 import base26_triplet_4
from ikey_base26 import base26_dublet_for_bits_56_to_64
from ikey_base26 import base26_dublet_for_bits_28_to_36
from .ikey_base26 import base26_triplet_1, base26_triplet_2, base26_triplet_3, base26_triplet_4, \
base26_dublet_for_bits_56_to_64, base26_dublet_for_bits_28_to_36

#-----------------------------------------------------------------------------------------------------------------------
__author__ = 'mnowotka, liwt31'

INCHI_STRING_PREFIX = "InChI="
LEN_INCHI_STRING_PREFIX = len(INCHI_STRING_PREFIX)

#-----------------------------------------------------------------------------------------------------------------------

def get_sha256(text):
hash = hashlib.sha256()
hash.update(text)
return hash.digest()
hash.update(text.encode('ascii'))
digest = hash.digest()
digest_bytes_list = [ord(digest_byte) for digest_byte in digest] if isinstance(digest, str) else list(digest)
return digest_bytes_list

#-----------------------------------------------------------------------------------------------------------------------

def inchiToInchiKey(szINCHISource):
def inchi_to_inchikey(szINCHISource):

flagstd = 'S'
flagnonstd = 'N'
Expand Down Expand Up @@ -79,9 +72,11 @@ def inchiToInchiKey(szINCHISource):
return None
end = idx
break
else:
end = slen

if end == (slen - 1):
end += 1
#if end == (slen - 1):
# end += 1

if not proto:
smajor = aux[:end]
Expand Down Expand Up @@ -118,9 +113,4 @@ def inchiToInchiKey(szINCHISource):
base26_triplet_4(digest_major) + base26_dublet_for_bits_56_to_64(digest_major)
minor = base26_triplet_1(digest_minor) + base26_triplet_2(digest_minor) + \
base26_dublet_for_bits_28_to_36(digest_minor)
return "%s-%s%s%s-%s" % (major, minor, flag, flagver, flagproto)


#-----------------------------------------------------------------------------------------------------------------------


return "%s-%s%s%s-%s" % (major, minor, flag, flagver, flagproto)
55 changes: 22 additions & 33 deletions chembl_ikey/ikey_base26.py
Original file line number Diff line number Diff line change
@@ -1,64 +1,53 @@
__author__ = 'mnowotka'

from itertools import product
from string import ascii_uppercase

#-----------------------------------------------------------------------------------------------------------------------
__author__ = 'mnowotka, liwt31'

t26 = [ t for t in map(''.join, product(ascii_uppercase, repeat=3)) if t[0]!='E' and (t < 'TAA' or t > 'TTV')]
d26 = map(''.join, product(ascii_uppercase, repeat=2))
t26 = [t for t in map(''.join, product(ascii_uppercase, repeat=3)) if t[0] != 'E' and (t < 'TAA' or t > 'TTV')]
d26 = list(map(''.join, product(ascii_uppercase, repeat=2)))

#-----------------------------------------------------------------------------------------------------------------------

def base26_triplet_1(a):
b0 = ord(a[0])
b1 = ord(a[1]) & 0x3f
b0 = a[0]
b1 = a[1] & 0x3f
h = b0 | b1 << 8
return t26[h]

#-----------------------------------------------------------------------------------------------------------------------

def base26_triplet_2(a):
b0 = ord(a[1]) & 0xc0
b1 = ord(a[2])
b2 = ord(a[3]) & 0x0f
h = (b0 | b1 << 8 | b2 << 16) >> 6
b0 = a[1] & 0xc0
b1 = a[2]
b2 = a[3] & 0x0f
h = (b0 | b1 << 8 | b2 << 16) >> 6
return t26[h]

#-----------------------------------------------------------------------------------------------------------------------

def base26_triplet_3(a):
b0 = ord(a[3]) & 0xf0
b1 = ord(a[4])
b2 = ord(a[5]) & 0x03
h = (b0 | b1 << 8 | b2 << 16) >> 4
b0 = a[3] & 0xf0
b1 = a[4]
b2 = a[5] & 0x03
h = (b0 | b1 << 8 | b2 << 16) >> 4
return t26[h]

#-----------------------------------------------------------------------------------------------------------------------

def base26_triplet_4(a):
b0 = ord(a[5]) & 0xfc
b1 = ord(a[6])
h = (b0 | b1 << 8) >> 2
b0 = a[5] & 0xfc
b1 = a[6]
h = (b0 | b1 << 8) >> 2
return t26[h]

#-----------------------------------------------------------------------------------------------------------------------

def base26_dublet_for_bits_28_to_36(a):
b0 = ord(a[3]) & 0xf0
b1 = ord(a[4]) & 0x1f
h = (b0 | b1 << 8) >> 4
b0 = a[3] & 0xf0
b1 = a[4] & 0x1f
h = (b0 | b1 << 8) >> 4
return d26[h]

#-----------------------------------------------------------------------------------------------------------------------

def base26_dublet_for_bits_56_to_64(a):
b0 = ord(a[7])
b1 = ord(a[8]) & 0x01
h = b0 | b1 << 8
b0 = a[7]
b1 = a[8] & 0x01
h = b0 | b1 << 8
return d26[h]

#-----------------------------------------------------------------------------------------------------------------------



8 changes: 4 additions & 4 deletions chembl_ikey/tests.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
__author__ = 'mnowotka'

import unittest
from chembl_ikey.ikey import inchiToInchiKey
from chembl_ikey import inchi_to_inchikey

__author__ = 'mnowotka, weitangli'

class TestIKey(unittest.TestCase):
def test_morphineInChIKey(self):
key = inchiToInchiKey("InChI=1S/C17H19NO3/c1-18-7-6-17-10-3-5-13(20)16(17)21-15-12(19)4-2-9(14(15)17)8-11(10)18/h2-5,10-11,13,16,19-20H,6-8H2,1H3/t10-,11+,13-,16-,17-/m0/s1")
key = inchi_to_inchikey("InChI=1S/C17H19NO3/c1-18-7-6-17-10-3-5-13(20)16(17)21-15-12(19)4-2-9(14(15)17)8-11(10)18/h2-5,10-11,13,16,19-20H,6-8H2,1H3/t10-,11+,13-,16-,17-/m0/s1")
self.assertEqual(key,'BQJCRHHNABKAKU-KBQPJGBKSA-N')


Expand Down
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@

setup(
name='chembl_ikey',
version='0.0.1',
author='Michal Nowotka',
author_email='[email protected]',
version='0.0.2',
author='Michal Nowotka, Weitang Li',
author_email='[email protected], [email protected]',
description='Pure python implementation of InChiKey generation algorithm based on the original C code',
url='https://www.ebi.ac.uk/chembl/',
license='CC BY-SA 3.0',
packages=['chembl_ikey'],
long_description=open('README.rst').read(),
long_description=open('README.md').read(),
include_package_data=False,
classifiers=['Development Status :: 2 - Pre-Alpha',
'Intended Audience :: Developers',
Expand Down