Source code for kgeserver.algorithm

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# coding:utf-8
#
# Model class: create and train a model
# Copyright (C) 2016 Víctor Fernández Rico <vfrico@gmail.com>
# Copyright (C) 2016 Maximilian Nickel <mnick@mit.edu>
#
#   This file include original part of the holographic-embeddings
#   project, which is located in GitHub:
#   <https://github.com/mnick/holographic-embeddings/>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import numpy as np
import threading
import itertools
import skge
import kgeserver.dataset as dataset
import kgeserver.experiment as experiment


class TransEEval(experiment.FilteredRankingEval):

    def prepare(self, mdl, p):
        self.ER = mdl.E + mdl.R[p]

    def scores_o(self, mdl, s, p):
        return -np.sum(np.abs(self.ER[s] - mdl.E), axis=1)

    def scores_s(self, mdl, o, p):
        return -np.sum(np.abs(self.ER - mdl.E[o]), axis=1)


class HolEEval(experiment.FilteredRankingEval):

    def prepare(self, mdl, p):
        self.ER = skge.util.ccorr(mdl.R[p], mdl.E)

    def scores_o(self, mdl, s, p):
        return np.dot(self.ER, mdl.E[s])

    def scores_s(self, mdl, o, p):
        return np.dot(mdl.E, self.ER[o])


[docs]class ModelTrainer(experiment.Experiment): """Creates a Model from a dataset and trains it""" def __init__(self, dataset, ncomp=150, afs='sigmoid', trainer_type=skge.PairwiseStochasticTrainer, model_type=skge.TransE, eval_type=TransEEval, **kwargs): """Constructor method. :param Dataset dataset: The dataset to train :param int ncomp: Number of latent components :param string afs: Activation function :param skge.Trainer trainer_type: The class desired for the trainer :param skge.Model model_type: The Model used to train :param Class eval_type: The class used to evaluate the model :param float margin: Margin for loss function :param string init: Initialization method :param float lr: Learning rate :param int max_epochs: Maximum number of epochs :param int ne: Number of negative examples :param int nbatches: Number of batches :param string fout: Path to store model and results TODO->CHANGE :param string fin: Path to imput data TODO->CHANGE :param int test_all: Evaluate the modell after x epochs :param bool no_pairwise: If true, trainer used is no pairwise :param string mode: :param string sampler: """ super(ModelTrainer, self).__init__(dataset, **kwargs) self.ncomp = ncomp self.evaluator = eval_type self.trainer_type = trainer_type self.model_type = model_type self.afs = afs print(self.__dict__) def setup_trainer(self, size, sampler): """Configures a model and a trainer to be used in train method :param tuple size: A tuple (X, Y, Z) with the size of tensor :param skge.Sampler sampler: A sampler used by trainer. :return: An instantiated trainer :rtype: skge.Trainer """ model = self.model_type(size, self.ncomp, init=self.init, rparam=0, af=skge.activation_functions[self.afs]) trainer = self.trainer_type( model, nbatches=self.nb, margin=self.margin, max_epochs=self.me, learning_rate=self.lr, samplef=sampler.sample, post_epoch=[self.callback] ) return trainer def get_conf(self): """Returns a dict with all model configuration """ return {'ncomp': self.ncomp, 'afs': self.afs, 'trainer_type': self.trainer_type, 'model_type': self.model_type, 'evaluator': self.evaluator, 'margin': self.margin, 'init': self.init, 'lr': self.lr, 'max_epochs': self.me, 'ne': self.ne, 'nbatches': self.nb, 'test_all': self.test_all, 'no_pairwise': self.no_pairwise, 'mode': self.mode, 'sampler': self.sampler}
[docs]class Algorithm(): """Generate several models to test and choose the right one """ def __init__(self, dataset, thread_limiter=4): self.dataset = dataset self.th_semaphore = threading.Semaphore(thread_limiter)
[docs] def find_best(self, margins=[0.2, 2.0], ncomps=range(50, 100, 20), model_types=[skge.HolE, skge.TransE], **kwargs): """Find the best training params for a given dataset This method makes several trains with different models and parameters, and returns a ModelTrainer Instance. :param list margins: A list of all margins to try :param list ncomps: A list of latent components :param list model_types: A list of models """ # Create a pool of threads threads = [] # The list of model trainer that will be created model_trainer_list = [] model_trainer_scores = [] num = 0 for tup in itertools.product(margins, ncomps, model_types): if tup[2] == skge.HolE: evaluator = HolEEval else: evaluator = TransEEval # Fill model trainer modtr = ModelTrainer(self.dataset, model_type=tup[2], margin=tup[0], ncomp=tup[1], th_num=num, eval_type=evaluator, **kwargs) model_trainer_list.append(modtr) num += 1 # callback will find best epoch the model get best score def callbk_fn(modeltrainer): self.th_semaphore.release() print("[%d]Un model trainer ha terminado" % modeltrainer.th_num) # Extract evaluation data tuples = [(e['score'], e['epoch']) for e in modeltrainer.scores] sorted_scores = sorted(tuples, key=lambda t: t[0], reverse=True) print("[{}] {}".format(modeltrainer.th_num, sorted_scores)) model_trainer_scores.append((modeltrainer, sorted_scores)) # for t in : # modeltrainer.best_epoch = t[1] # # print(modeltrainer.__dict__) # Launch threads for each model trainer for mt in model_trainer_list: self.th_semaphore.acquire() t = threading.Thread( target=mt.thread_start, args=(callbk_fn, )) threads.append(t) t.start() for th in threads: th.join() best = sorted(model_trainer_scores, key=lambda t: t[1][0], reverse=True)[0] kwdict = best[0].get_conf() kwdict['train_all'] = True kwdict['test_all'] = -1 new_model_trainer = ModelTrainer(self.dataset, **kwdict) return (model_trainer_scores, best, new_model_trainer)
if __name__ == '__main__': dtset = dataset.Dataset() # dataset.load_from_binary("holographic-embeddings/data/wn18.bin") dtset.load_from_binary("wdata_15k.bin") alg = Algorithm(dtset) alg.find_best() # modeltrainer = ModelTrainer(dtset, model_type=skge.HolE, test_all=10, # max_epochs=200, margin=0.2, ncomp=50, # mode="rank") # modeltrained = modeltrainer.run() # print(modeltrainer.scores)