Source code for rxn.reaction_preprocessing.annotations.missing_annotation_detector

from typing import Callable, Generator, Iterable, Optional, Set, Union

from rxn.chemutils.reaction_equation import ReactionEquation

from rxn.reaction_preprocessing.annotations.annotation_criterion import (
    AnnotationCriterion,
)
from rxn.reaction_preprocessing.annotations.molecule_annotation import (
    MoleculeAnnotation,
)


[docs]class MissingAnnotationDetector: """ Find reactions with molecules that should be annotated, taking into account a set of already-annotated molecules. """
[docs] def __init__( self, annotated_molecules: Set[str], requires_annotation_fn: Optional[Callable[[str], bool]] = None, ): """ Args: annotated_molecules: set of already-annotated molecules. requires_annotation_fn: function with which to decide whether a molecule needs an annotation. Defaults to AnnotationCriterion(). """ self.annotated_molecules = annotated_molecules if requires_annotation_fn is None: requires_annotation_fn = AnnotationCriterion() self.requires_annotation_fn = requires_annotation_fn
[docs] def molecule_needs_annotation(self, smiles: str) -> bool: """ Whether a molecule needs annotation. Checks the overlap between the elements in the molecule and the extended transition metals, and then looks in the annotated molecules if necessary. """ if not self.requires_annotation_fn(smiles): return False else: return smiles not in self.annotated_molecules
[docs] def missing_in_reaction_equation( self, reaction_equation: ReactionEquation ) -> Generator[str, None, None]: """In a reaction equation, find the molecules requiring annotation.""" for smiles in reaction_equation.iter_all_smiles(): if self.molecule_needs_annotation(smiles): yield smiles
[docs] def missing_in_reaction_equations( self, reaction_equations: Iterable[ReactionEquation] ) -> Generator[str, None, None]: """In multiple reaction equations, find the molecules requiring annotation.""" for reaction_equation in reaction_equations: yield from self.missing_in_reaction_equation(reaction_equation)
[docs] def missing_in_reaction_smiles( self, reaction_smiles: Union[Iterable[str], str], fragment_bond: Optional[str] = None, ) -> Generator[str, None, None]: """ In one or multiple reaction SMILES, find the molecules requiring annotation. Args: reaction_smiles: One reaction SMILES (str), or multiple reaction SMILES. fragment_bond: fragment bond used in the reaction SMILES. """ if isinstance(reaction_smiles, str): reaction_smiles = [reaction_smiles] reaction_equations = ( ReactionEquation.from_string(reaction_smile, fragment_bond) for reaction_smile in reaction_smiles ) return self.missing_in_reaction_equations(reaction_equations)
[docs] @classmethod def from_molecule_annotations( cls, molecule_annotations: Iterable[MoleculeAnnotation], requires_annotation_fn: Optional[Callable[[str], bool]] = None, ) -> "MissingAnnotationDetector": """ Create a MissingAnnotationDetector instance from existing molecule annotations. Args: molecule_annotations: existing molecule annotations. requires_annotation_fn: function with which to decide whether a molecule needs an annotation. Defaults to AnnotationCriterion(). """ original_smiles = { annotation.original_without_fragment_bond for annotation in molecule_annotations } # Also consider the updated SMILES, but only if they consist in exactly one molecule. updated_smiles = { annotation.updated_without_fragment_bond[0] for annotation in molecule_annotations if annotation.updated_smiles is not None and len(annotation.updated_without_fragment_bond) == 1 } return cls( annotated_molecules=original_smiles | updated_smiles, requires_annotation_fn=requires_annotation_fn, )