RXNBERT fingerprints

class RXNBERTFingerprintGenerator[source]

RXNBERTFingerprintGenerator(model:BertModel, tokenizer:SmilesTokenizer, force_no_cuda=False) :: FingerprintGenerator

Generate RXNBERT fingerprints from reaction SMILES

class RXNBERTMinhashFingerprintGenerator[source]

RXNBERTMinhashFingerprintGenerator(model:BertModel, tokenizer:SmilesTokenizer, permutations=256, seed=42, force_no_cuda=False) :: FingerprintGenerator

Generate RXNBERT fingerprints from reaction SMILES

get_default_model_and_tokenizer[source]

get_default_model_and_tokenizer(model='bert_ft', force_no_cuda=False)

generate_fingerprints[source]

generate_fingerprints(rxns:List[str], fingerprint_generator:FingerprintGenerator, batch_size=1)

Usage

Convert reaction to fingerprint

model, tokenizer = get_default_model_and_tokenizer()

rxnfp_generator = RXNBERTFingerprintGenerator(model, tokenizer)

example_rxn = "Nc1cccc2cnccc12.O=C(O)c1cc([N+](=O)[O-])c(Sc2c(Cl)cncc2Cl)s1>>O=C(Nc1cccc2cnccc12)c1cc([N+](=O)[O-])c(Sc2c(Cl)cncc2Cl)s1"

fp = rxnfp_generator.convert(example_rxn)
print(len(fp))
print(fp[:5])
256
[-2.017495632171631, 1.7602037191390991, -1.3323537111282349, -1.109501838684082, 1.2254540920257568]

Convert reaction list to fingerprints

fps = rxnfp_generator.convert_batch([example_rxn, example_rxn])
print(len(fps), len(fps[0]))
2 256

Convert reaction to minhash fingerprint

model, tokenizer = get_default_model_and_tokenizer()

rxnmhfp_generator = RXNBERTMinhashFingerprintGenerator(model, tokenizer)

example_rxn = "Nc1cccc2cnccc12.O=C(O)c1cc([N+](=O)[O-])c(Sc2c(Cl)cncc2Cl)s1>>O=C(Nc1cccc2cnccc12)c1cc([N+](=O)[O-])c(Sc2c(Cl)cncc2Cl)s1"

fp = rxnmhfp_generator.convert(example_rxn)
print(len(fp))
print(fp[:5])
512
VectorUint[248, 1, 39, 1, 201]