gt4sd.training_pipelines.guacamol_baselines.smiles_lstm.core module

SMILES LSTM training pipeline from GuacaMol.

Summary

Classes:

GuacaMolLSTMModelArguments

Arguments related to SMILES LSTM trainer.

GuacaMolLSTMTrainingArguments

Training Arguments related to SMILES LSTM trainer.

GuacaMolLSTMTrainingPipeline

GuacaMol SMILES LSTM training pipeline.

Reference

class GuacaMolLSTMTrainingPipeline[source]

Bases: GuacaMolBaselinesTrainingPipeline

GuacaMol SMILES LSTM training pipeline.

train(training_args, model_args, dataset_args)[source]

Generic training function for GuacaMol Baselines training.

Parameters
  • training_args (Dict[str, Any]) – training arguments passed to the configuration.

  • model_args (Dict[str, Any]) – model arguments passed to the configuration.

  • dataset_args (Dict[str, Any]) – dataset arguments passed to the configuration.

Raises

NotImplementedError – the generic trainer does not implement the pipeline.

Return type

None

__annotations__ = {}
__doc__ = 'GuacaMol SMILES LSTM training pipeline.'
__module__ = 'gt4sd.training_pipelines.guacamol_baselines.smiles_lstm.core'
class GuacaMolLSTMTrainingArguments(output_dir, batch_size=512, valid_every=1000, n_epochs=10, lr=0.001)[source]

Bases: TrainingPipelineArguments

Training Arguments related to SMILES LSTM trainer.

__name__ = 'GuacaMolLSTMTrainingArguments'
output_dir: str
batch_size: int = 512
valid_every: int = 1000
n_epochs: int = 10
lr: float = 0.001
__annotations__ = {'batch_size': <class 'int'>, 'lr': <class 'float'>, 'n_epochs': <class 'int'>, 'output_dir': <class 'str'>, 'valid_every': <class 'int'>}
__dataclass_fields__ = {'batch_size': Field(name='batch_size',type=<class 'int'>,default=512,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Size of a mini-batch for gradient descent.'}),kw_only=False,_field_type=_FIELD), 'lr': Field(name='lr',type=<class 'float'>,default=0.001,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'RNN learning rate.'}),kw_only=False,_field_type=_FIELD), 'n_epochs': Field(name='n_epochs',type=<class 'int'>,default=10,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Number of training epochs.'}),kw_only=False,_field_type=_FIELD), 'output_dir': Field(name='output_dir',type=<class 'str'>,default=<dataclasses._MISSING_TYPE object>,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Output directory.'}),kw_only=False,_field_type=_FIELD), 'valid_every': Field(name='valid_every',type=<class 'int'>,default=1000,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Validate every so many batches.'}),kw_only=False,_field_type=_FIELD)}
__dataclass_params__ = _DataclassParams(init=True,repr=True,eq=True,order=False,unsafe_hash=False,frozen=False)
__doc__ = 'Training Arguments related to SMILES LSTM trainer.'
__eq__(other)

Return self==value.

__hash__ = None
__init__(output_dir, batch_size=512, valid_every=1000, n_epochs=10, lr=0.001)
__match_args__ = ('output_dir', 'batch_size', 'valid_every', 'n_epochs', 'lr')
__module__ = 'gt4sd.training_pipelines.guacamol_baselines.smiles_lstm.core'
__repr__()

Return repr(self).

class GuacaMolLSTMModelArguments(hidden_size=512, n_layers=3, rnn_dropout=0.2, max_len=100)[source]

Bases: TrainingPipelineArguments

Arguments related to SMILES LSTM trainer.

__name__ = 'GuacaMolLSTMModelArguments'
hidden_size: int = 512
n_layers: int = 3
rnn_dropout: float = 0.2
max_len: int = 100
__annotations__ = {'hidden_size': <class 'int'>, 'max_len': <class 'int'>, 'n_layers': <class 'int'>, 'rnn_dropout': <class 'float'>}
__dataclass_fields__ = {'hidden_size': Field(name='hidden_size',type=<class 'int'>,default=512,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Size of hidden layer.'}),kw_only=False,_field_type=_FIELD), 'max_len': Field(name='max_len',type=<class 'int'>,default=100,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Max length of a SMILES string.'}),kw_only=False,_field_type=_FIELD), 'n_layers': Field(name='n_layers',type=<class 'int'>,default=3,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Number of layers for training.'}),kw_only=False,_field_type=_FIELD), 'rnn_dropout': Field(name='rnn_dropout',type=<class 'float'>,default=0.2,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Dropout value for RNN.'}),kw_only=False,_field_type=_FIELD)}
__dataclass_params__ = _DataclassParams(init=True,repr=True,eq=True,order=False,unsafe_hash=False,frozen=False)
__doc__ = 'Arguments related to SMILES LSTM trainer.'
__eq__(other)

Return self==value.

__hash__ = None
__init__(hidden_size=512, n_layers=3, rnn_dropout=0.2, max_len=100)
__match_args__ = ('hidden_size', 'n_layers', 'rnn_dropout', 'max_len')
__module__ = 'gt4sd.training_pipelines.guacamol_baselines.smiles_lstm.core'
__repr__()

Return repr(self).