Source code for openmdao.lib.surrogatemodels.logistic_regression

"""Surrogate Model based on a logistic regression model, with regularization to  adjust for
overfitting. Based on work from  `Python logistic regression
<http://blog.smellthedata.com/2009/06/python-logistic-regression-with-l2.html>`_. 
"""
import logging
from random import seed
try:
    import numpy as np
    from numpy import log
    from scipy.optimize.optimize import fmin_bfgs
except ImportError as err:
    logging.warn("In %s: %r" % (__file__, err))

from openmdao.lib.datatypes.api import Float, Bool
from openmdao.main.api import Container
from openmdao.main.interfaces import implements, ISurrogate
from openmdao.util.decorators import stub_if_missing_deps

[docs]def sigmoid(x): return 1.0 / (1.0 + np.exp(-x))
@stub_if_missing_deps('numpy', 'scipy')
[docs]class LogisticRegression(Container): implements(ISurrogate) alpha = Float(.1,low=0,iotype='in',desc='L2 regularization strength') def __init__(self,X=None,Y=None,alpha=.1): # must call HasTraits init to set up Traits stuff super(LogisticRegression, self).__init__() self.m = None #number of independents self.n = None #number of training points self.alpha = alpha self.degenerate = False if X is not None and Y is not None: self.train(X,Y)
[docs] def lik(self, betas): """ Likelihood of the data under the current settings of parameters. """ # Data likelihood l = 0 for i in range(self.n): l += log(sigmoid(self.Y[i] * \ np.dot(betas, self.X[i,:]))) # Prior likelihood for k in range(1, self.X.shape[1]): l -= (self.alpha / 2.0) * self.betas[k]**2 #multiply by -1 so the optimizer will maxize return -1*l
[docs] def get_uncertain_value(self,value): """Returns the value iself. Logistic regressions don't have uncertainty.""" return value
[docs] def train(self,X,Y): """ Define the gradient and hand it off to a scipy gradient-based optimizer. """ #normalize all Y data to be between -1 and 1 low = min(Y) high = max(Y) #there was no data to predict on, so just degenerate to predicting True all the time self.degenerate = False if high == low: self.degenerate = high return self.m = 2.0/(high-low) self.b = (2.0*low/(low-high))-1 #constants for unscaling the output self.z = high-low self.w = low self.X = np.array(X) self.Y = self.m*np.array(Y)+self.b self.n = len(X) self.betas = np.zeros(len(X[0])) # Define the derivative of the likelihood with respect to beta_k. # Need to multiply by -1 because we will be minimizing. dB_k = lambda B, k : (k > 0) * self.alpha * B[k] - np.sum([ \ self.Y[i] * self.X[i, k] * \ sigmoid(-self.Y[i] *\ np.dot(B, self.X[i,:])) \ for i in range(self.n)]) # The full gradient is just an array of componentwise derivatives dB = lambda B : np.array([dB_k(B, k) \ for k in range(self.X.shape[1])]) # Optimize self.betas = fmin_bfgs(self.lik, self.betas, fprime=dB, disp=False)
[docs] def predict(self,new_x): """Calculates a predicted value of the response based on the current trained model for the supplied list of inputs. """ if self.degenerate: return self.degenerate return self.z*sigmoid(np.dot(self.betas,np.array(new_x)))+self.w
OpenMDAO Home