|
25 | 25 |
|
26 | 26 | """ |
27 | 27 |
|
| 28 | +import hashlib |
28 | 29 | import json |
29 | 30 | import operator |
30 | 31 | import pathlib |
@@ -750,11 +751,11 @@ def atom2(self): |
750 | 751 |
|
751 | 752 | @property |
752 | 753 | def atom1_index(self) -> int: |
753 | | - return self.molecule.atoms.index(self._atom1) |
| 754 | + return self._atom1.molecule_atom_index |
754 | 755 |
|
755 | 756 | @property |
756 | 757 | def atom2_index(self) -> int: |
757 | | - return self.molecule.atoms.index(self._atom2) |
| 758 | + return self._atom2.molecule_atom_index |
758 | 759 |
|
759 | 760 | @property |
760 | 761 | def atoms(self): |
@@ -1255,17 +1256,28 @@ def __hash__(self): |
1255 | 1256 | return hash(self.to_smiles()) |
1256 | 1257 |
|
1257 | 1258 | def ordered_connection_table_hash(self) -> int: |
1258 | | - """Compute an ordered hash of the atoms and bonds in the molecule""" |
| 1259 | + """ |
| 1260 | + Compute an ordered hash of the atoms and bonds in the molecule. |
| 1261 | +
|
| 1262 | + This hash method is intended for comparison of Molecule objects at |
| 1263 | + runtime, and hashes from one version of the software should not be |
| 1264 | + compared with hashes generated using different versions. |
| 1265 | + """ |
1259 | 1266 | if self._ordered_connection_table_hash is not None: |
1260 | 1267 | return self._ordered_connection_table_hash |
1261 | 1268 |
|
| 1269 | + # Pre-assign molecule atom indices in O(N) time to avoid use of List.index to get index of each one |
| 1270 | + # in O(N^2) time |
| 1271 | + for index, atom in enumerate(self.atoms): |
| 1272 | + atom._molecule_atom_index = index |
| 1273 | + |
1262 | 1274 | id = "" |
1263 | 1275 | for atom in self.atoms: |
1264 | | - id += f"{atom.symbol}_{atom.formal_charge}_{atom.stereochemistry}__" |
| 1276 | + id += f"{atom.atomic_number}_{atom.formal_charge.magnitude}_{atom.stereochemistry}__" |
1265 | 1277 | for bond in self.bonds: |
1266 | 1278 | id += f"{bond.bond_order}_{bond.stereochemistry}_{bond.atom1_index}_{bond.atom2_index}__" |
1267 | 1279 |
|
1268 | | - self._ordered_connection_table_hash = hash(id) |
| 1280 | + self._ordered_connection_table_hash = hashlib.sha3_224(id.encode("utf-8")).hexdigest() |
1269 | 1281 | return self._ordered_connection_table_hash |
1270 | 1282 |
|
1271 | 1283 | @classmethod |
@@ -1971,23 +1983,22 @@ def from_smiles( |
1971 | 1983 | return molecule |
1972 | 1984 |
|
1973 | 1985 | def _is_exactly_the_same_as(self, other): |
1974 | | - for atom1, atom2 in zip(self.atoms, other.atoms): |
1975 | | - if ( |
1976 | | - (atom1.atomic_number != atom2.atomic_number) |
1977 | | - or (atom1.formal_charge != atom2.formal_charge) |
1978 | | - or (atom1.is_aromatic != atom2.is_aromatic) |
1979 | | - or (atom1.stereochemistry != atom2.stereochemistry) |
1980 | | - ): |
1981 | | - return False |
1982 | | - for bond1, bond2 in zip(self.bonds, other.bonds): |
1983 | | - if ( |
1984 | | - (bond1.atom1_index != bond2.atom1_index) |
1985 | | - or (bond1.atom2_index != bond2.atom2_index) |
1986 | | - or (bond1.is_aromatic != bond2.is_aromatic) |
1987 | | - or (bond1.stereochemistry != bond2.stereochemistry) |
1988 | | - ): |
1989 | | - return False |
1990 | | - return True |
| 1986 | + # Pre-assign molecule atom indices in O(N) time to avoid use of List.index to get index of each one |
| 1987 | + # in O(N^2) time |
| 1988 | + for index, atom in enumerate(self.atoms): |
| 1989 | + atom._molecule_atom_index = index |
| 1990 | + for index, atom in enumerate(other.atoms): |
| 1991 | + atom._molecule_atom_index = index |
| 1992 | + |
| 1993 | + self_id = ( |
| 1994 | + tuple((atom.atomic_number, atom.formal_charge.magnitude, atom.stereochemistry) for atom in self.atoms), |
| 1995 | + tuple((bond.bond_order, bond.stereochemistry, bond.atom1_index, bond.atom2_index) for bond in self.bonds), |
| 1996 | + ) |
| 1997 | + other_id = ( |
| 1998 | + tuple((atom.atomic_number, atom.formal_charge.magnitude, atom.stereochemistry) for atom in other.atoms), |
| 1999 | + tuple((bond.bond_order, bond.stereochemistry, bond.atom1_index, bond.atom2_index) for bond in other.bonds), |
| 2000 | + ) |
| 2001 | + return self_id == other_id |
1991 | 2002 |
|
1992 | 2003 | @staticmethod |
1993 | 2004 | def are_isomorphic( |
|
0 commit comments