Source code for rxn.reaction_preprocessing.annotations.molecule_annotation

import json
from enum import auto
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Union

import attr
from rxn.chemutils.multicomponent_smiles import multicomponent_smiles_to_list
from rxn.utilities.types import RxnEnum


[docs]class AnnotationDecision(RxnEnum): ACCEPT = auto() REJECT = auto()
[docs]@attr.s(auto_attribs=True, init=False) class MoleculeAnnotation: """ Specifies a molecule annotation, i.e. a SMILES string that may have an updated SMILES, whether to keep it or not, etc. """ original_smiles: str updated_smiles: Optional[str] decision: AnnotationDecision categories: List[str] extra_info: Dict[str, Any]
[docs] def __init__( self, original_smiles: str, updated_smiles: Optional[str], decision: str, categories: List[str], **extra_info: Any ): """ Args: original_smiles: original SMILES that is potentially present in a data set. Fragment bonds are indicated by a tilde '~'. updated_smiles: if specified, SMILES with which to replace original_smiles. Also uses '~' for fragment bonds, and dots '.' may be used to separate compounds from the solvent in which they are solved. decision: "accept" or "reject". categories: categories to which the annotation belongs to. **extra_info: additional information not covered by the other variables. """ decision_enum = AnnotationDecision.from_string(decision) self.__attrs_init__( original_smiles=original_smiles, updated_smiles=updated_smiles, decision=decision_enum, categories=categories, extra_info=extra_info, )
@property def original_without_fragment_bond(self) -> str: """Get the original SMILES with dots instead of tildes to delimit fragments.""" return self.original_smiles.replace("~", ".") @property def updated_without_fragment_bond(self) -> List[str]: """ Get the updated SMILES with dots instead of tildes to delimit fragments. Since dots may be used to delimit solvents from compounds, a list must be returned. """ if self.updated_smiles is None: raise RuntimeError("No updated SMILES!") return multicomponent_smiles_to_list(self.updated_smiles, "~")
[docs]def load_annotations(json_file: Union[Path, str]) -> List[MoleculeAnnotation]: """ Load the molecule annotations from a JSON file. Args: json_file: path to the JSON file containing the annotations. Returns: List of annotations. """ with open(json_file, "rt") as f: json_content = json.load(f) return [MoleculeAnnotation(**block) for block in json_content]
[docs]def load_annotations_multiple( json_files: Iterable[Union[Path, str]] ) -> List[MoleculeAnnotation]: """ Load the molecule annotations from multiple JSON files. Args: json_files: paths to the JSON file containing the annotations. Returns: List of annotations. """ annotations = [] for json_file in json_files: annotations.extend(load_annotations(json_file)) return annotations