gt4sd.training_pipelines.moses.organ.core module

Moses Organ training pipeline.

Summary

Classes:

MosesOrganModelArguments

Arguments related to Moses Organ model.

MosesOrganTrainingArguments

Arguments related to Moses Organ training.

MosesOrganTrainingPipeline

Moses Organ training pipelines.

Reference

class MosesOrganTrainingPipeline[source]

Bases: MosesTrainingPipeline

Moses Organ training pipelines.

train(training_args, model_args, dataset_args)[source]

Generic training function for Moses Organ training.

Parameters
  • training_args (Dict[str, Any]) – training arguments passed to the configuration.

  • model_args (Dict[str, Any]) – model arguments passed to the configuration.

  • dataset_args (Dict[str, Any]) – dataset arguments passed to the configuration.

Return type

None

__annotations__ = {}
__doc__ = 'Moses Organ training pipelines.'
__module__ = 'gt4sd.training_pipelines.moses.organ.core'
class MosesOrganTrainingArguments(model_save, log_file, config_save, vocab_save, save_frequency=1, seed=0, device='cpu', generator_pretrain_epochs=50, discriminator_pretrain_epochs=50, pg_iters=1000, n_batch=64, lr=0.0001, n_jobs=8, n_workers=8, clip_grad=5, rollouts=16, generator_updates=1, discriminator_updates=1, discriminator_epochs=10, reward_weight=0.7, addition_rewards='sa', max_length=100, n_ref_subsample=500)[source]

Bases: MosesTrainingArguments

Arguments related to Moses Organ training.

generator_pretrain_epochs: int = 50
discriminator_pretrain_epochs: int = 50
pg_iters: int = 1000
n_batch: int = 64
lr: float = 0.0001
n_jobs: int = 8
n_workers: int = 8
clip_grad: int = 5
rollouts: int = 16
generator_updates: int = 1
discriminator_updates: int = 1
discriminator_epochs: int = 10
reward_weight: float = 0.7
addition_rewards: str = 'sa'
max_length: int = 100
n_ref_subsample: int = 500
__annotations__ = {'addition_rewards': <class 'str'>, 'clip_grad': <class 'int'>, 'config_save': 'str', 'device': 'str', 'discriminator_epochs': <class 'int'>, 'discriminator_pretrain_epochs': <class 'int'>, 'discriminator_updates': <class 'int'>, 'generator_pretrain_epochs': <class 'int'>, 'generator_updates': <class 'int'>, 'log_file': 'str', 'lr': <class 'float'>, 'max_length': <class 'int'>, 'model_save': 'str', 'n_batch': <class 'int'>, 'n_jobs': <class 'int'>, 'n_ref_subsample': <class 'int'>, 'n_workers': <class 'int'>, 'pg_iters': <class 'int'>, 'reward_weight': <class 'float'>, 'rollouts': <class 'int'>, 'save_frequency': 'int', 'seed': 'int', 'vocab_save': 'str'}
__dataclass_fields__ = {'addition_rewards': Field(name='addition_rewards',type=<class 'str'>,default='sa',default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Comma separated list of rewards. Feasible values from: fcd,snn,fragments,scaffolds,internal_diversity,filters,logp,sa,qed,np,weight. Defaults to optimization of SA.'}),kw_only=False,_field_type=_FIELD), 'clip_grad': Field(name='clip_grad',type=<class 'int'>,default=5,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Clip PG generator gradients to this value.'}),kw_only=False,_field_type=_FIELD), 'config_save': Field(name='config_save',type=<class 'str'>,default=<dataclasses._MISSING_TYPE object>,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Path for the config.'}),kw_only=False,_field_type=_FIELD), 'device': Field(name='device',type=<class 'str'>,default='cpu',default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': "Device to run: 'cpu' or 'cuda:<device number>'"}),kw_only=False,_field_type=_FIELD), 'discriminator_epochs': Field(name='discriminator_epochs',type=<class 'int'>,default=10,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Number of epochs of discriminator per iteration.'}),kw_only=False,_field_type=_FIELD), 'discriminator_pretrain_epochs': Field(name='discriminator_pretrain_epochs',type=<class 'int'>,default=50,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Number of epochs for discriminator pretraining.'}),kw_only=False,_field_type=_FIELD), 'discriminator_updates': Field(name='discriminator_updates',type=<class 'int'>,default=1,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Number of updates of discriminator per iteration.'}),kw_only=False,_field_type=_FIELD), 'generator_pretrain_epochs': Field(name='generator_pretrain_epochs',type=<class 'int'>,default=50,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Number of epochs for generator pretraining.'}),kw_only=False,_field_type=_FIELD), 'generator_updates': Field(name='generator_updates',type=<class 'int'>,default=1,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Number of updates of generator per iteration.'}),kw_only=False,_field_type=_FIELD), 'log_file': Field(name='log_file',type=<class 'str'>,default=<dataclasses._MISSING_TYPE object>,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Path where to save the the logs.'}),kw_only=False,_field_type=_FIELD), 'lr': Field(name='lr',type=<class 'float'>,default=0.0001,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Learning rate.'}),kw_only=False,_field_type=_FIELD), 'max_length': Field(name='max_length',type=<class 'int'>,default=100,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Maximum length for sequence.'}),kw_only=False,_field_type=_FIELD), 'model_save': Field(name='model_save',type=<class 'str'>,default=<dataclasses._MISSING_TYPE object>,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Path where the trained model is saved.'}),kw_only=False,_field_type=_FIELD), 'n_batch': Field(name='n_batch',type=<class 'int'>,default=64,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Size of batch.'}),kw_only=False,_field_type=_FIELD), 'n_jobs': Field(name='n_jobs',type=<class 'int'>,default=8,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Number of threads.'}),kw_only=False,_field_type=_FIELD), 'n_ref_subsample': Field(name='n_ref_subsample',type=<class 'int'>,default=500,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Number of reference molecules (sampling from training data).'}),kw_only=False,_field_type=_FIELD), 'n_workers': Field(name='n_workers',type=<class 'int'>,default=8,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Number of workers.'}),kw_only=False,_field_type=_FIELD), 'pg_iters': Field(name='pg_iters',type=<class 'int'>,default=1000,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Number of iterations for policy gradient training.'}),kw_only=False,_field_type=_FIELD), 'reward_weight': Field(name='reward_weight',type=<class 'float'>,default=0.7,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Reward weight for policy gradient training.'}),kw_only=False,_field_type=_FIELD), 'rollouts': Field(name='rollouts',type=<class 'int'>,default=16,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Number of rollouts.'}),kw_only=False,_field_type=_FIELD), 'save_frequency': Field(name='save_frequency',type=<class 'int'>,default=1,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'How often to save the model.'}),kw_only=False,_field_type=_FIELD), 'seed': Field(name='seed',type=<class 'int'>,default=0,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Seed used for random number generation.'}),kw_only=False,_field_type=_FIELD), 'vocab_save': Field(name='vocab_save',type=<class 'str'>,default=<dataclasses._MISSING_TYPE object>,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Path to save the model vocabulary.'}),kw_only=False,_field_type=_FIELD)}
__dataclass_params__ = _DataclassParams(init=True,repr=True,eq=True,order=False,unsafe_hash=False,frozen=False)
__doc__ = 'Arguments related to Moses Organ training.'
__eq__(other)

Return self==value.

__hash__ = None
__init__(model_save, log_file, config_save, vocab_save, save_frequency=1, seed=0, device='cpu', generator_pretrain_epochs=50, discriminator_pretrain_epochs=50, pg_iters=1000, n_batch=64, lr=0.0001, n_jobs=8, n_workers=8, clip_grad=5, rollouts=16, generator_updates=1, discriminator_updates=1, discriminator_epochs=10, reward_weight=0.7, addition_rewards='sa', max_length=100, n_ref_subsample=500)
__match_args__ = ('model_save', 'log_file', 'config_save', 'vocab_save', 'save_frequency', 'seed', 'device', 'generator_pretrain_epochs', 'discriminator_pretrain_epochs', 'pg_iters', 'n_batch', 'lr', 'n_jobs', 'n_workers', 'clip_grad', 'rollouts', 'generator_updates', 'discriminator_updates', 'discriminator_epochs', 'reward_weight', 'addition_rewards', 'max_length', 'n_ref_subsample')
__module__ = 'gt4sd.training_pipelines.moses.organ.core'
__repr__()

Return repr(self).

class MosesOrganModelArguments(embedding_size=32, hidden_size=512, num_layers=2, dropout=0.0, discriminator_layers='[(100, 1), (200, 2), (200, 3), (200, 4), (200, 5), (100, 6), (100, 7), (100, 8), (100, 9), (100, 10), (160, 15), (160, 20)]', discriminator_dropout=0.0)[source]

Bases: TrainingPipelineArguments

Arguments related to Moses Organ model.

__name__ = 'MosesOrganModelArguments'
embedding_size: int = 32
hidden_size: int = 512
num_layers: int = 2
dropout: float = 0.0
discriminator_layers: str = '[(100, 1), (200, 2), (200, 3), (200, 4), (200, 5), (100, 6), (100, 7), (100, 8), (100, 9), (100, 10), (160, 15), (160, 20)]'
discriminator_dropout: float = 0.0
__annotations__ = {'discriminator_dropout': <class 'float'>, 'discriminator_layers': <class 'str'>, 'dropout': <class 'float'>, 'embedding_size': <class 'int'>, 'hidden_size': <class 'int'>, 'num_layers': <class 'int'>}
__dataclass_fields__ = {'discriminator_dropout': Field(name='discriminator_dropout',type=<class 'float'>,default=0.0,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Dropout probability for discriminator.'}),kw_only=False,_field_type=_FIELD), 'discriminator_layers': Field(name='discriminator_layers',type=<class 'str'>,default='[(100, 1), (200, 2), (200, 3), (200, 4), (200, 5), (100, 6), (100, 7), (100, 8), (100, 9), (100, 10), (160, 15), (160, 20)]',default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'String representation of numbers of features for convolutional layers in discriminator.'}),kw_only=False,_field_type=_FIELD), 'dropout': Field(name='dropout',type=<class 'float'>,default=0.0,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Dropout probability for lstm layers in generator.'}),kw_only=False,_field_type=_FIELD), 'embedding_size': Field(name='embedding_size',type=<class 'int'>,default=32,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Embedding size in generator and discriminator.'}),kw_only=False,_field_type=_FIELD), 'hidden_size': Field(name='hidden_size',type=<class 'int'>,default=512,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Size of hidden state for lstm layers in generator.'}),kw_only=False,_field_type=_FIELD), 'num_layers': Field(name='num_layers',type=<class 'int'>,default=2,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'help': 'Number of lstm layers in generator.'}),kw_only=False,_field_type=_FIELD)}
__dataclass_params__ = _DataclassParams(init=True,repr=True,eq=True,order=False,unsafe_hash=False,frozen=False)
__doc__ = 'Arguments related to Moses Organ model.'
__eq__(other)

Return self==value.

__hash__ = None
__init__(embedding_size=32, hidden_size=512, num_layers=2, dropout=0.0, discriminator_layers='[(100, 1), (200, 2), (200, 3), (200, 4), (200, 5), (100, 6), (100, 7), (100, 8), (100, 9), (100, 10), (160, 15), (160, 20)]', discriminator_dropout=0.0)
__match_args__ = ('embedding_size', 'hidden_size', 'num_layers', 'dropout', 'discriminator_layers', 'discriminator_dropout')
__module__ = 'gt4sd.training_pipelines.moses.organ.core'
__repr__()

Return repr(self).