diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d71f5d3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,112 @@ +glmnet_python/GLMnet.so + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json diff --git a/glmnet_python/GLMnet.so b/glmnet_python/GLMnet.so deleted file mode 100755 index 28ffdcc..0000000 Binary files a/glmnet_python/GLMnet.so and /dev/null differ diff --git a/glmnet_python/coxnet.py b/glmnet_python/coxnet.py index 30fb6e1..3388f2c 100644 --- a/glmnet_python/coxnet.py +++ b/glmnet_python/coxnet.py @@ -6,7 +6,7 @@ status -- column 1 """ # import packages/methods -import scipy +import numpy import ctypes from loadGlmLib import loadGlmLib @@ -20,7 +20,7 @@ def coxnet(x, is_sparse, irs, pcs, y, weights, offset, parm, # pre-process data ty = y[:, 0] tevent = y[:, 1] - if scipy.any(ty <= 0): + if numpy.any(ty <= 0): raise ValueError('negative event time not permitted for cox family') if len(offset) == 0: offset = ty*0 @@ -35,17 +35,17 @@ def coxnet(x, is_sparse, irs, pcs, y, weights, offset, parm, ###################################### # force inputs into fortran order and scipy float64 copyFlag = False - x = x.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - irs = irs.astype(dtype = scipy.int32, order = 'F', copy = copyFlag) - pcs = pcs.astype(dtype = scipy.int32, order = 'F', copy = copyFlag) - ty = ty.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - tevent = tevent.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - offset = offset.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - weights = weights.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - jd = jd.astype(dtype = scipy.int32, order = 'F', copy = copyFlag) - vp = vp.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - cl = cl.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - ulam = ulam.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) + x = x.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + irs = irs.astype(dtype = numpy.int32, order = 'F', copy = copyFlag) + pcs = pcs.astype(dtype = numpy.int32, order = 'F', copy = copyFlag) + ty = ty.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + tevent = tevent.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + offset = offset.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + weights = weights.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + jd = jd.astype(dtype = numpy.int32, order = 'F', copy = copyFlag) + vp = vp.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + cl = cl.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + ulam = ulam.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) ###################################### # --------- ALLOCATE OUTPUTS --------- @@ -54,24 +54,24 @@ def coxnet(x, is_sparse, irs, pcs, y, weights, offset, parm, lmu = -1 lmu_r = ctypes.c_int(lmu) # ca - ca = scipy.zeros([nx, nlam], dtype = scipy.float64) - ca = ca.astype(dtype = scipy.float64, order = 'F', copy = False) + ca = numpy.zeros([nx, nlam], dtype = numpy.float64) + ca = ca.astype(dtype = numpy.float64, order = 'F', copy = False) ca_r = ca.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # ia - ia = -1*scipy.ones([nx], dtype = scipy.int32) - ia = ia.astype(dtype = scipy.int32, order = 'F', copy = False) + ia = -1*numpy.ones([nx], dtype = numpy.int32) + ia = ia.astype(dtype = numpy.int32, order = 'F', copy = False) ia_r = ia.ctypes.data_as(ctypes.POINTER(ctypes.c_int)) # nin - nin = -1*scipy.ones([nlam], dtype = scipy.int32) - nin = nin.astype(dtype = scipy.int32, order = 'F', copy = False) + nin = -1*numpy.ones([nlam], dtype = numpy.int32) + nin = nin.astype(dtype = numpy.int32, order = 'F', copy = False) nin_r = nin.ctypes.data_as(ctypes.POINTER(ctypes.c_int)) # dev - dev = -1*scipy.ones([nlam], dtype = scipy.float64) - dev = dev.astype(dtype = scipy.float64, order = 'F', copy = False) + dev = -1*numpy.ones([nlam], dtype = numpy.float64) + dev = dev.astype(dtype = numpy.float64, order = 'F', copy = False) dev_r = dev.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # alm - alm = -1*scipy.ones([nlam], dtype = scipy.float64) - alm = alm.astype(dtype = scipy.float64, order = 'F', copy = False) + alm = -1*numpy.ones([nlam], dtype = numpy.float64) + alm = alm.astype(dtype = numpy.float64, order = 'F', copy = False) alm_r = alm.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # nlp nlp = -1 @@ -146,21 +146,21 @@ def coxnet(x, is_sparse, irs, pcs, y, weights, offset, parm, ninmax = max(nin) # fix first value of alm (from inf to correct value) if ulam[0] == 0.0: - t1 = scipy.log(alm[1]) - t2 = scipy.log(alm[2]) - alm[0] = scipy.exp(2*t1 - t2) + t1 = numpy.log(alm[1]) + t2 = numpy.log(alm[2]) + alm[0] = numpy.exp(2*t1 - t2) # create return fit dictionary if ninmax > 0: ca = ca[0:ninmax, :] - df = scipy.sum(scipy.absolute(ca) > 0, axis=0) + df = numpy.sum(numpy.absolute(ca) > 0, axis=0) ja = ia[0:ninmax] - 1 # ia is 1-indexed in fortran - oja = scipy.argsort(ja) + oja = numpy.argsort(ja) ja1 = ja[oja] - beta = scipy.zeros([nvars, lmu], dtype = scipy.float64) + beta = numpy.zeros([nvars, lmu], dtype = numpy.float64) beta[ja1, :] = ca[oja, :] else: - beta = scipy.zeros([nvars, lmu], dtype = scipy.float64) - df = scipy.zeros([1, lmu], dtype = scipy.float64) + beta = numpy.zeros([nvars, lmu], dtype = numpy.float64) + df = numpy.zeros([1, lmu], dtype = numpy.float64) fit = dict() fit['beta'] = beta @@ -170,7 +170,7 @@ def coxnet(x, is_sparse, irs, pcs, y, weights, offset, parm, fit['lambdau'] = alm fit['npasses'] = nlp_r.value fit['jerr'] = jerr_r.value - fit['dim'] = scipy.array([nvars, lmu], dtype = scipy.integer) + fit['dim'] = numpy.array([nvars, lmu], dtype = numpy.integer) fit['offset'] = is_offset fit['class'] = 'coxnet' diff --git a/glmnet_python/cvcompute.py b/glmnet_python/cvcompute.py index 7dcac66..7d96dea 100644 --- a/glmnet_python/cvcompute.py +++ b/glmnet_python/cvcompute.py @@ -4,24 +4,24 @@ Compute the weighted mean and SD within folds, and hence the SE of the mean """ -import scipy +import numpy from wtmean import wtmean def cvcompute(mat, weights, foldid, nlams): if len(weights.shape) > 1: - weights = scipy.reshape(weights, [weights.shape[0], ]) - wisum = scipy.bincount(foldid, weights = weights) - nfolds = scipy.amax(foldid) + 1 - outmat = scipy.ones([nfolds, mat.shape[1]])*scipy.NaN - good = scipy.zeros([nfolds, mat.shape[1]]) - mat[scipy.isinf(mat)] = scipy.NaN + weights = numpy.reshape(weights, [weights.shape[0], ]) + wisum = numpy.bincount(foldid, weights = weights) + nfolds = numpy.amax(foldid) + 1 + outmat = numpy.ones([nfolds, mat.shape[1]])*numpy.NaN + good = numpy.zeros([nfolds, mat.shape[1]]) + mat[numpy.isinf(mat)] = numpy.NaN for i in range(nfolds): tf = foldid == i mati = mat[tf, ] wi = weights[tf, ] outmat[i, :] = wtmean(mati, wi) good[i, 0:nlams[i]] = 1 - N = scipy.sum(good, axis = 0) + N = numpy.sum(good, axis = 0) cvcpt = dict() cvcpt['cvraw'] = outmat cvcpt['weights'] = wisum diff --git a/glmnet_python/cvelnet.py b/glmnet_python/cvelnet.py index e9f6349..ac89c38 100644 --- a/glmnet_python/cvelnet.py +++ b/glmnet_python/cvelnet.py @@ -3,7 +3,7 @@ Internal cvglmnet function. See also cvglmnet. """ -import scipy +import numpy from glmnetPredict import glmnetPredict from wtmean import wtmean from cvcompute import cvcompute @@ -31,29 +31,29 @@ def cvelnet(fit, \ if len(offset) > 0: y = y - offset - predmat = scipy.ones([y.size, lambdau.size])*scipy.NAN - nfolds = scipy.amax(foldid) + 1 + predmat = numpy.ones([y.size, lambdau.size])*numpy.NAN + nfolds = numpy.amax(foldid) + 1 nlams = [] for i in range(nfolds): which = foldid == i fitobj = fit[i].copy() fitobj['offset'] = False preds = glmnetPredict(fitobj, x[which, ]) - nlami = scipy.size(fit[i]['lambdau']) + nlami = numpy.size(fit[i]['lambdau']) predmat[which, 0:nlami] = preds nlams.append(nlami) # convert nlams to scipy array - nlams = scipy.array(nlams, dtype = scipy.integer) + nlams = numpy.array(nlams, dtype = numpy.integer) - N = y.shape[0] - scipy.sum(scipy.isnan(predmat), axis = 0) - yy = scipy.tile(y, [1, lambdau.size]) + N = y.shape[0] - numpy.sum(numpy.isnan(predmat), axis = 0) + yy = numpy.tile(y, [1, lambdau.size]) if ptype == 'mse': cvraw = (yy - predmat)**2 elif ptype == 'deviance': cvraw = (yy - predmat)**2 elif ptype == 'mae': - cvraw = scipy.absolute(yy - predmat) + cvraw = numpy.absolute(yy - predmat) if y.size/nfolds < 3 and grouped == True: print('Option grouped=false enforced in cv.glmnet, since < 3 observations per fold') @@ -67,7 +67,7 @@ def cvelnet(fit, \ cvm = wtmean(cvraw, weights) sqccv = (cvraw - cvm)**2 - cvsd = scipy.sqrt(wtmean(sqccv, weights)/(N-1)) + cvsd = numpy.sqrt(wtmean(sqccv, weights)/(N-1)) result = dict() result['cvm'] = cvm diff --git a/glmnet_python/cvfishnet.py b/glmnet_python/cvfishnet.py index 6dbd3bd..f97b329 100644 --- a/glmnet_python/cvfishnet.py +++ b/glmnet_python/cvfishnet.py @@ -3,7 +3,7 @@ Internal cvglmnet function. See also cvglmnet. """ -import scipy +import numpy from glmnetPredict import glmnetPredict from wtmean import wtmean from cvcompute import cvcompute @@ -34,8 +34,8 @@ def cvfishnet(fit, \ else: is_offset = False - predmat = scipy.ones([y.size, lambdau.size])*scipy.NAN - nfolds = scipy.amax(foldid) + 1 + predmat = numpy.ones([y.size, lambdau.size])*numpy.NAN + nfolds = numpy.amax(foldid) + 1 nlams = [] for i in range(nfolds): which = foldid == i @@ -43,23 +43,23 @@ def cvfishnet(fit, \ if is_offset: off_sub = offset[which] else: - off_sub = scipy.empty([0]) + off_sub = numpy.empty([0]) preds = glmnetPredict(fitobj, x[which, ], offset = off_sub) - nlami = scipy.size(fit[i]['lambdau']) + nlami = numpy.size(fit[i]['lambdau']) predmat[which, 0:nlami] = preds nlams.append(nlami) # convert nlams to scipy array - nlams = scipy.array(nlams, dtype = scipy.integer) + nlams = numpy.array(nlams, dtype = numpy.integer) - N = y.shape[0] - scipy.sum(scipy.isnan(predmat), axis = 0) - yy = scipy.tile(y, [1, lambdau.size]) + N = y.shape[0] - numpy.sum(numpy.isnan(predmat), axis = 0) + yy = numpy.tile(y, [1, lambdau.size]) if ptype == 'mse': cvraw = (yy - predmat)**2 elif ptype == 'deviance': cvraw = devi(yy, predmat) elif ptype == 'mae': - cvraw = scipy.absolute(yy - predmat) + cvraw = numpy.absolute(yy - predmat) if y.size/nfolds < 3 and grouped == True: print('Option grouped=false enforced in cvglmnet, since < 3 observations per fold') @@ -73,7 +73,7 @@ def cvfishnet(fit, \ cvm = wtmean(cvraw, weights) sqccv = (cvraw - cvm)**2 - cvsd = scipy.sqrt(wtmean(sqccv, weights)/(N-1)) + cvsd = numpy.sqrt(wtmean(sqccv, weights)/(N-1)) result = dict() result['cvm'] = cvm @@ -88,8 +88,8 @@ def cvfishnet(fit, \ # end of cvfishnet #========================= def devi(yy, eta): - deveta = yy*eta - scipy.exp(eta) - devy = yy*scipy.log(yy) - yy + deveta = yy*eta - numpy.exp(eta) + devy = yy*numpy.log(yy) - yy devy[yy == 0] = 0 result = 2*(devy - deveta) return(result) diff --git a/glmnet_python/cvglmnet.py b/glmnet_python/cvglmnet.py index a539d54..e1a07a6 100644 --- a/glmnet_python/cvglmnet.py +++ b/glmnet_python/cvglmnet.py @@ -126,8 +126,8 @@ class Type of regression - internal usage. EXAMPLES: # Gaussian - x = scipy.random.rand(100, 10) - y = scipy.random.rand(100, 1) + x = numpy.random.rand(100, 10) + y = numpy.random.rand(100, 1) cvfit = cvglmnet(x = x, y = y) cvglmnetPlot(cvfit) print( cvglmnetCoef(cvfit) ) @@ -136,27 +136,27 @@ class Type of regression - internal usage. cvglmnetPlot(cvfit1) # Binomial - x = scipy.random.rand(100, 10) - y = scipy.random.rand(100,1) + x = numpy.random.rand(100, 10) + y = numpy.random.rand(100,1) y = (y > 0.5)*1.0 fit = cvglmnet(x = x, y = y, family = 'binomial', ptype = 'class') cvglmnetPlot(fit) # poisson - x = scipy.random.rand(100,10) - y = scipy.random.poisson(size = [100, 1])*1.0 + x = numpy.random.rand(100,10) + y = numpy.random.poisson(size = [100, 1])*1.0 cvfit = cvglmnet(x = x, y = y, family = 'poisson') cvglmnetPlot(cvfit) # Multivariate Gaussian: - x = scipy.random.rand(100, 10) - y = scipy.random.rand(100,3) + x = numpy.random.rand(100, 10) + y = numpy.random.rand(100,3) cvfit = cvglmnet(x = x, y = y, family = 'mgaussian') cvglmnetPlot(cvfit) # Multinomial - x = scipy.random.rand(100,10) - y = scipy.random.rand(100,1) + x = numpy.random.rand(100,10) + y = numpy.random.rand(100,1) y[y < 0.3] = 1.0 y[y < 0.6] = 2.0 y[y < 1.0] = 3.0 @@ -199,7 +199,7 @@ class Type of regression - internal usage. import multiprocessing from glmnetSet import glmnetSet from glmnetPredict import glmnetPredict -import scipy +import numpy from glmnet import glmnet from cvelnet import cvelnet from cvlognet import cvlognet @@ -212,7 +212,7 @@ def cvglmnet(*, x, family = 'gaussian', ptype = 'default', nfolds = 10, - foldid = scipy.empty([0]), + foldid = numpy.empty([0]), parallel = 1, keep = False, grouped = True, @@ -227,18 +227,18 @@ def cvglmnet(*, x, # we should not really need this. user must supply the right shape # if y.shape[0] != nobs: - # y = scipy.transpose(y) + # y = numpy.transpose(y) # convert 1d python array of size nobs to 2d python array of size nobs x 1 if len(y.shape) == 1: - y = scipy.reshape(y, [y.size, 1]) + y = numpy.reshape(y, [y.size, 1]) # we should not really need this. user must supply the right shape # if (len(options['offset']) > 0) and (options['offset'].shape[0] != nobs): - # options['offset'] = scipy.transpose(options['offset']) + # options['offset'] = numpy.transpose(options['offset']) if len(options['weights']) == 0: - options['weights'] = scipy.ones([nobs, 1], dtype = scipy.float64) + options['weights'] = numpy.ones([nobs, 1], dtype = numpy.float64) # main call to glmnet glmfit = glmnet(x = x, y = y, family = family, **options) @@ -246,34 +246,34 @@ def cvglmnet(*, x, is_offset = glmfit['offset'] options['lambdau'] = glmfit['lambdau'] - nz = glmnetPredict(glmfit, scipy.empty([0]), scipy.empty([0]), 'nonzero') + nz = glmnetPredict(glmfit, numpy.empty([0]), numpy.empty([0]), 'nonzero') if glmfit['class'] == 'multnet': - nnz = scipy.zeros([len(options['lambdau']), len(nz)]) + nnz = numpy.zeros([len(options['lambdau']), len(nz)]) for i in range(len(nz)): - nnz[:, i] = scipy.transpose(scipy.sum(nz[i], axis = 0)) - nz = scipy.ceil(scipy.median(nnz, axis = 1)) + nnz[:, i] = numpy.transpose(numpy.sum(nz[i], axis = 0)) + nz = numpy.ceil(numpy.median(nnz, axis = 1)) elif glmfit['class'] == 'mrelnet': - nz = scipy.transpose(scipy.sum(nz[0], axis = 0)) + nz = numpy.transpose(numpy.sum(nz[0], axis = 0)) else: - nz = scipy.transpose(scipy.sum(nz, axis = 0)) + nz = numpy.transpose(numpy.sum(nz, axis = 0)) if len(foldid) == 0: - ma = scipy.tile(scipy.arange(nfolds), [1, int(scipy.floor(nobs/nfolds))]) - mb = scipy.arange(scipy.mod(nobs, nfolds)) - mb = scipy.reshape(mb, [1, mb.size]) - population = scipy.append(ma, mb, axis = 1) - mc = scipy.random.permutation(len(population)) + ma = numpy.tile(numpy.arange(nfolds), [1, int(numpy.floor(nobs/nfolds))]) + mb = numpy.arange(numpy.mod(nobs, nfolds)) + mb = numpy.reshape(mb, [1, mb.size]) + population = numpy.append(ma, mb, axis = 1) + mc = numpy.random.permutation(len(population)) mc = mc[0:nobs] foldid = population[mc] - foldid = scipy.reshape(foldid, [foldid.size,]) + foldid = numpy.reshape(foldid, [foldid.size,]) else: - nfolds = scipy.amax(foldid) + 1 + nfolds = numpy.amax(foldid) + 1 if nfolds < 3: raise ValueError('nfolds must be bigger than 3; nfolds = 10 recommended') cpredmat = list() - foldid = scipy.reshape(foldid, [foldid.size, ]) + foldid = numpy.reshape(foldid, [foldid.size, ]) if parallel != 1: if parallel == -1: num_cores = multiprocessing.cpu_count() @@ -318,10 +318,10 @@ def cvglmnet(*, x, CVerr = dict() CVerr['lambdau'] = options['lambdau'] - CVerr['cvm'] = scipy.transpose(cvm) - CVerr['cvsd'] = scipy.transpose(cvsd) - CVerr['cvup'] = scipy.transpose(cvm + cvsd) - CVerr['cvlo'] = scipy.transpose(cvm - cvsd) + CVerr['cvm'] = numpy.transpose(cvm) + CVerr['cvsd'] = numpy.transpose(cvsd) + CVerr['cvup'] = numpy.transpose(cvm + cvsd) + CVerr['cvlo'] = numpy.transpose(cvm - cvsd) CVerr['nzero'] = nz CVerr['name'] = cvname CVerr['glmnet_fit'] = glmfit @@ -330,10 +330,10 @@ def cvglmnet(*, x, CVerr['foldid'] = foldid if ptype == 'auc': cvm = -cvm - CVerr['lambda_min'] = scipy.amax(options['lambdau'][cvm <= scipy.amin(cvm)]).reshape([1]) + CVerr['lambda_min'] = numpy.amax(options['lambdau'][cvm <= numpy.amin(cvm)]).reshape([1]) idmin = options['lambdau'] == CVerr['lambda_min'] semin = cvm[idmin] + cvsd[idmin] - CVerr['lambda_1se'] = scipy.amax(options['lambdau'][cvm <= semin]).reshape([1]) + CVerr['lambda_1se'] = numpy.amax(options['lambdau'][cvm <= semin]).reshape([1]) CVerr['class'] = 'cvglmnet' return(CVerr) diff --git a/glmnet_python/cvglmnetCoef.py b/glmnet_python/cvglmnetCoef.py index e0f0adf..210c6f7 100644 --- a/glmnet_python/cvglmnetCoef.py +++ b/glmnet_python/cvglmnetCoef.py @@ -69,7 +69,7 @@ """ -import scipy +import numpy from glmnetCoef import glmnetCoef def cvglmnetCoef(obj, s = None): @@ -77,7 +77,7 @@ def cvglmnetCoef(obj, s = None): if s is None or len(s) == 0: s = obj['lambda_1se'] - if isinstance(s, scipy.ndarray): + if isinstance(s, numpy.ndarray): lambdau = s elif isinstance(s, str): sbase = ['lambda_1se', 'lambda_min'] diff --git a/glmnet_python/cvglmnetPlot.py b/glmnet_python/cvglmnetPlot.py index f55a97d..30117aa 100644 --- a/glmnet_python/cvglmnetPlot.py +++ b/glmnet_python/cvglmnetPlot.py @@ -44,11 +44,11 @@ EXAMPLES: - scipy.random.seed(1) - x=scipy.random.normal(size = (100,20)) - y=scipy.random.normal(size = (100,1)) - g2=scipy.random.choice(2,size = (100,1))*1.0 - g4=scipy.random.choice(4,size = (100,1))*1.0 + numpy.random.seed(1) + x=numpy.random.normal(size = (100,20)) + y=numpy.random.normal(size = (100,1)) + g2=numpy.random.choice(2,size = (100,1))*1.0 + g4=numpy.random.choice(4,size = (100,1))*1.0 plt.figure() fit1=cvglmnet(x = x.copy(),y = y.copy()) @@ -64,13 +64,13 @@ """ -import scipy +import numpy def cvglmnetPlot(cvobject, sign_lambda = 1.0, **options): import matplotlib.pyplot as plt - sloglam = sign_lambda*scipy.log(cvobject['lambdau']) + sloglam = sign_lambda*numpy.log(cvobject['lambdau']) fig = plt.gcf() ax1 = plt.gca() @@ -87,12 +87,12 @@ def cvglmnetPlot(cvobject, sign_lambda = 1.0, **options): xlim1 = ax1.get_xlim() ylim1 = ax1.get_ylim() - xval = sign_lambda*scipy.log(scipy.array([cvobject['lambda_min'], cvobject['lambda_min']])) + xval = sign_lambda*numpy.log(numpy.array([cvobject['lambda_min'], cvobject['lambda_min']])) plt.plot(xval, ylim1, color = 'b', linestyle = 'dashed', \ linewidth = 1) if cvobject['lambda_min'] != cvobject['lambda_1se']: - xval = sign_lambda*scipy.log([cvobject['lambda_1se'], cvobject['lambda_1se']]) + xval = sign_lambda*numpy.log([cvobject['lambda_1se'], cvobject['lambda_1se']]) plt.plot(xval, ylim1, color = 'b', linestyle = 'dashed', \ linewidth = 1) @@ -100,7 +100,7 @@ def cvglmnetPlot(cvobject, sign_lambda = 1.0, **options): ax2.xaxis.tick_top() atdf = ax1.get_xticks() - indat = scipy.ones(atdf.shape, dtype = scipy.integer) + indat = numpy.ones(atdf.shape, dtype = numpy.integer) if sloglam[-1] >= sloglam[1]: for j in range(len(sloglam)-1, -1, -1): indat[atdf <= sloglam[j]] = j diff --git a/glmnet_python/cvglmnetPredict.py b/glmnet_python/cvglmnetPredict.py index 7d325c5..6f749e7 100644 --- a/glmnet_python/cvglmnetPredict.py +++ b/glmnet_python/cvglmnetPredict.py @@ -67,23 +67,23 @@ cvglmnet and glmnetPredict. EXAMPLES: - x = scipy.random.rand(100, 10) - y = scipy.random.rand(100, 1) + x = numpy.random.rand(100, 10) + y = numpy.random.rand(100, 1) cvfit = cvglmnet(x = x, y = y) cvglmnetPredict(cvfit, x[0:5, :], 'lambda_min') - cvglmnetPredict(cvfit, x[0:5, :], scipy.array([0.0866, 0.2323])) + cvglmnetPredict(cvfit, x[0:5, :], numpy.array([0.0866, 0.2323])) """ from cvglmnetCoef import cvglmnetCoef from glmnetPredict import glmnetPredict -import scipy +import numpy def cvglmnetPredict(obj, newx = None, s = 'lambda_1se', **options): if newx is None: CVpred = cvglmnetCoef(obj) return(CVpred) - if type(s) == scipy.ndarray and s.dtype == 'float64': + if type(s) == numpy.ndarray and s.dtype == 'float64': lambdau = s elif s in ['lambda_1se', 'lambda_min']: lambdau = obj[s] diff --git a/glmnet_python/cvlognet.py b/glmnet_python/cvlognet.py index adae68c..2915fd4 100644 --- a/glmnet_python/cvlognet.py +++ b/glmnet_python/cvlognet.py @@ -3,7 +3,7 @@ Internal function called by cvglmnet. See also cvglmnet """ -import scipy +import numpy from glmnetPredict import glmnetPredict from wtmean import wtmean from cvcompute import cvcompute @@ -33,15 +33,15 @@ def cvlognet(fit, \ prob_max = 1 - prob_min nc = y.shape[1] if nc == 1: - classes, sy = scipy.unique(y, return_inverse = True) + classes, sy = numpy.unique(y, return_inverse = True) nc = len(classes) - indexes = scipy.eye(nc, nc) + indexes = numpy.eye(nc, nc) y = indexes[sy, :] else: - classes = scipy.arange(nc) + 1 # 1:nc + classes = numpy.arange(nc) + 1 # 1:nc N = y.size - nfolds = scipy.amax(foldid) + 1 + nfolds = numpy.amax(foldid) + 1 if (N/nfolds < 10) and (type == 'auc'): print('Warning: Too few (<10) observations per fold for type.measure=auc in cvlognet') print('Warning: changed to type.measure = deviance. Alternately, use smaller value ') @@ -53,8 +53,8 @@ def cvlognet(fit, \ grouped = False is_offset = not(len(offset) == 0) - predmat = scipy.ones([y.shape[0], lambdau.size])*scipy.NAN - nfolds = scipy.amax(foldid) + 1 + predmat = numpy.ones([y.shape[0], lambdau.size])*numpy.NAN + nfolds = numpy.amax(foldid) + 1 nlams = [] for i in range(nfolds): which = foldid == i @@ -62,46 +62,46 @@ def cvlognet(fit, \ if is_offset: off_sub = offset[which, ] else: - off_sub = scipy.empty([0]) - preds = glmnetPredict(fitobj, x[which, ], scipy.empty([0]), 'response', False, off_sub) - nlami = scipy.size(fit[i]['lambdau']) + off_sub = numpy.empty([0]) + preds = glmnetPredict(fitobj, x[which, ], numpy.empty([0]), 'response', False, off_sub) + nlami = numpy.size(fit[i]['lambdau']) predmat[which, 0:nlami] = preds nlams.append(nlami) # convert nlams to scipy array - nlams = scipy.array(nlams, dtype = scipy.integer) + nlams = numpy.array(nlams, dtype = numpy.integer) if ptype == 'auc': - cvraw = scipy.zeros([nfolds, lambdau.size])*scipy.NaN - good = scipy.zeros([nfolds, lambdau.size]) + cvraw = numpy.zeros([nfolds, lambdau.size])*numpy.NaN + good = numpy.zeros([nfolds, lambdau.size]) for i in range(nfolds): good[i, 0:nlams[i]] = 1 which = foldid == i for j in range(nlams[i]): cvraw[i,j] = auc_mat(y[which,], predmat[which,j], weights[which]) - N = scipy.sum(good, axis = 0) - sweights = scipy.zeros([nfolds, 1]) + N = numpy.sum(good, axis = 0) + sweights = numpy.zeros([nfolds, 1]) for i in range(nfolds): - sweights[i]= scipy.sum(weights[foldid == i], axis = 0) + sweights[i]= numpy.sum(weights[foldid == i], axis = 0) weights = sweights else: - ywt = scipy.sum(y, axis = 1, keepdims = True) - y = y/scipy.tile(ywt, [1, y.shape[1]]) + ywt = numpy.sum(y, axis = 1, keepdims = True) + y = y/numpy.tile(ywt, [1, y.shape[1]]) weights = weights*ywt - N = y.shape[0] - scipy.sum(scipy.isnan(predmat), axis = 0, keepdims = True) - yy1 = scipy.tile(y[:,0:1], [1, lambdau.size]) - yy2 = scipy.tile(y[:,1:2], [1, lambdau.size]) + N = y.shape[0] - numpy.sum(numpy.isnan(predmat), axis = 0, keepdims = True) + yy1 = numpy.tile(y[:,0:1], [1, lambdau.size]) + yy2 = numpy.tile(y[:,1:2], [1, lambdau.size]) if ptype == 'mse': cvraw = (yy1 - (1 - predmat))**2 + (yy2 - (1 - predmat))**2 elif ptype == 'deviance': - predmat = scipy.minimum(scipy.maximum(predmat, prob_min), prob_max) - lp = yy1*scipy.log(1-predmat) + yy2*scipy.log(predmat) - ly = scipy.log(y) + predmat = numpy.minimum(numpy.maximum(predmat, prob_min), prob_max) + lp = yy1*numpy.log(1-predmat) + yy2*numpy.log(predmat) + ly = numpy.log(y) ly[y == 0] = 0 - ly = scipy.dot(y*ly, scipy.array([1.0, 1.0]).reshape([2,1])) - cvraw = 2*(scipy.tile(ly, [1, lambdau.size]) - lp) + ly = numpy.dot(y*ly, numpy.array([1.0, 1.0]).reshape([2,1])) + cvraw = 2*(numpy.tile(ly, [1, lambdau.size]) - lp) elif ptype == 'mae': - cvraw = scipy.absolute(yy1 - (1 - predmat)) + scipy.absolute(yy2 - (1 - predmat)) + cvraw = numpy.absolute(yy1 - (1 - predmat)) + numpy.absolute(yy2 - (1 - predmat)) elif ptype == 'class': cvraw = yy1*(predmat > 0.5) + yy2*(predmat <= 0.5) @@ -117,7 +117,7 @@ def cvlognet(fit, \ cvm = wtmean(cvraw, weights) sqccv = (cvraw - cvm)**2 - cvsd = scipy.sqrt(wtmean(sqccv, weights)/(N-1)) + cvsd = numpy.sqrt(wtmean(sqccv, weights)/(N-1)) result = dict() result['cvm'] = cvm @@ -137,37 +137,37 @@ def cvlognet(fit, \ #========================= def auc_mat(y, prob, weights = None): if weights == None or len(weights) == 0: - weights = scipy.ones([y.shape[0], 1]) + weights = numpy.ones([y.shape[0], 1]) wweights = weights*y wweights = wweights.flatten() - wweights = scipy.reshape(wweights, [1, wweights.size]) + wweights = numpy.reshape(wweights, [1, wweights.size]) ny= y.shape[0] - a = scipy.zeros([ny, 1]) - b = scipy.ones([ny, 1]) - yy = scipy.vstack((a, b)) - pprob = scipy.vstack((prob,prob)) + a = numpy.zeros([ny, 1]) + b = numpy.ones([ny, 1]) + yy = numpy.vstack((a, b)) + pprob = numpy.vstack((prob,prob)) result = auc(yy, pprob, wweights) return(result) #========================= def auc(y, prob, w): if len(w) == 0: - mindiff = scipy.amin(scipy.diff(scipy.unique(prob))) - pert = scipy.random.uniform(0, mindiff/3, prob.size) - t, rprob = scipy.unique(prob + pert, return_inverse = True) - n1 = scipy.sum(y, keepdims = True) + mindiff = numpy.amin(numpy.diff(numpy.unique(prob))) + pert = numpy.random.uniform(0, mindiff/3, prob.size) + t, rprob = numpy.unique(prob + pert, return_inverse = True) + n1 = numpy.sum(y, keepdims = True) n0 = y.shape[0] - n1 - u = scipy.sum(rprob[y == 1]) - n1*(n1 + 1)/2 + u = numpy.sum(rprob[y == 1]) - n1*(n1 + 1)/2 result = u/(n1*n0) else: - op = scipy.argsort(prob) + op = numpy.argsort(prob) y = y[op] w = w[op] - cw = scipy.cumsum(w) + cw = numpy.cumsum(w) w1 = w[y == 1] - cw1 = scipy.cumsum(w1) - wauc = scipy.sum(w1*(cw[y == 1] - cw1)) + cw1 = numpy.cumsum(w1) + wauc = numpy.sum(w1*(cw[y == 1] - cw1)) sumw = cw1[-1] - sumw = sumw*(c1[-1] - sumw) + sumw = sumw*(cw1[-1] - sumw) result = wauc/sumw return(result) #========================= diff --git a/glmnet_python/cvmrelnet.py b/glmnet_python/cvmrelnet.py index 40e8540..fe8a088 100644 --- a/glmnet_python/cvmrelnet.py +++ b/glmnet_python/cvmrelnet.py @@ -3,7 +3,7 @@ Internal function called by cvglmnet. See also cvglmnet """ -import scipy +import numpy from glmnetPredict import glmnetPredict from wtmean import wtmean from cvcompute import cvcompute @@ -34,27 +34,27 @@ def cvmrelnet(fit, \ if len(offset) > 0: y = y - offset - predmat = scipy.ones([nobs, nc, lambdau.size])*scipy.NAN - nfolds = scipy.amax(foldid) + 1 + predmat = numpy.ones([nobs, nc, lambdau.size])*numpy.NAN + nfolds = numpy.amax(foldid) + 1 nlams = [] for i in range(nfolds): which = foldid == i fitobj = fit[i].copy() fitobj['offset'] = False preds = glmnetPredict(fitobj, x[which, ]) - nlami = scipy.size(fit[i]['lambdau']) + nlami = numpy.size(fit[i]['lambdau']) predmat[which, 0:nlami] = preds nlams.append(nlami) # convert nlams to scipy array - nlams = scipy.array(nlams, dtype = scipy.integer) + nlams = numpy.array(nlams, dtype = numpy.integer) - N = nobs - scipy.reshape(scipy.sum(scipy.isnan(predmat[:, 1, :]), axis = 0), (1, -1)) - bigY = scipy.tile(y[:, :, None], [1, 1, lambdau.size]) + N = nobs - numpy.reshape(numpy.sum(numpy.isnan(predmat[:, 1, :]), axis = 0), (1, -1)) + bigY = numpy.tile(y[:, :, None], [1, 1, lambdau.size]) if ptype == 'mse': - cvraw = scipy.sum((bigY - predmat)**2, axis = 1).squeeze() + cvraw = numpy.sum((bigY - predmat)**2, axis = 1).squeeze() elif ptype == 'mae': - cvraw = scipy.sum(scipy.absolute(bigY - predmat), axis = 1).squeeze() + cvraw = numpy.sum(numpy.absolute(bigY - predmat), axis = 1).squeeze() if y.size/nfolds < 3 and grouped == True: print('Option grouped=false enforced in cv.glmnet, since < 3 observations per fold') @@ -68,7 +68,7 @@ def cvmrelnet(fit, \ cvm = wtmean(cvraw, weights) sqccv = (cvraw - cvm)**2 - cvsd = scipy.sqrt(wtmean(sqccv, weights)/(N-1)) + cvsd = numpy.sqrt(wtmean(sqccv, weights)/(N-1)) result = dict() result['cvm'] = cvm diff --git a/glmnet_python/cvmultnet.py b/glmnet_python/cvmultnet.py index aa3efed..9e5c2f9 100644 --- a/glmnet_python/cvmultnet.py +++ b/glmnet_python/cvmultnet.py @@ -3,7 +3,7 @@ Internal function called by cvglmnet. See also cvglmnet """ -import scipy +import numpy from glmnetPredict import glmnetPredict from wtmean import wtmean from cvcompute import cvcompute @@ -33,16 +33,16 @@ def cvmultnet(fit, \ prob_max = 1 - prob_min nc = y.shape if nc[1] == 1: - classes, sy = scipy.unique(y, return_inverse = True) + classes, sy = numpy.unique(y, return_inverse = True) nc = len(classes) - indexes = scipy.eye(nc, nc) + indexes = numpy.eye(nc, nc) y = indexes[sy, :] else: nc = nc[1] is_offset = not(len(offset) == 0) - predmat = scipy.ones([y.shape[0], nc, lambdau.size])*scipy.NAN - nfolds = scipy.amax(foldid) + 1 + predmat = numpy.ones([y.shape[0], nc, lambdau.size])*numpy.NAN + nfolds = numpy.amax(foldid) + 1 nlams = [] for i in range(nfolds): which = foldid == i @@ -50,39 +50,39 @@ def cvmultnet(fit, \ if is_offset: off_sub = offset[which, ] else: - off_sub = scipy.empty([0]) - preds = glmnetPredict(fitobj, x[which, ], scipy.empty([0]), 'response', False, off_sub) - nlami = scipy.size(fit[i]['lambdau']) + off_sub = numpy.empty([0]) + preds = glmnetPredict(fitobj, x[which, ], numpy.empty([0]), 'response', False, off_sub) + nlami = numpy.size(fit[i]['lambdau']) predmat[which, 0:nlami] = preds nlams.append(nlami) # convert nlams to scipy array - nlams = scipy.array(nlams, dtype = scipy.integer) + nlams = numpy.array(nlams, dtype = numpy.integer) - ywt = scipy.sum(y, axis = 1, keepdims = True) - y = y/scipy.tile(ywt, [1, y.shape[1]]) + ywt = numpy.sum(y, axis = 1, keepdims = True) + y = y/numpy.tile(ywt, [1, y.shape[1]]) weights = weights*ywt - N = y.shape[0] - scipy.sum(scipy.isnan(predmat[:,1,:]), axis = 0, keepdims = True) - bigY = scipy.tile(y[:, :, None], [1, 1, lambdau.size]) + N = y.shape[0] - numpy.sum(numpy.isnan(predmat[:,1,:]), axis = 0, keepdims = True) + bigY = numpy.tile(y[:, :, None], [1, 1, lambdau.size]) if ptype == 'mse': - cvraw = scipy.sum((bigY - predmat)**2, axis = 1).squeeze() + cvraw = numpy.sum((bigY - predmat)**2, axis = 1).squeeze() elif ptype == 'deviance': - predmat = scipy.minimum(scipy.maximum(predmat, prob_min), prob_max) - lp = bigY*scipy.log(predmat) - ly = bigY*scipy.log(bigY) + predmat = numpy.minimum(numpy.maximum(predmat, prob_min), prob_max) + lp = bigY*numpy.log(predmat) + ly = bigY*numpy.log(bigY) ly[y == 0] = 0 - cvraw = scipy.sum(2*(ly - lp), axis = 1).squeeze() + cvraw = numpy.sum(2*(ly - lp), axis = 1).squeeze() elif ptype == 'mae': - cvraw = scipy.sum(scipy.absolute(bigY - predmat), axis = 1).squeeze() + cvraw = numpy.sum(numpy.absolute(bigY - predmat), axis = 1).squeeze() elif ptype == 'class': - classid = scipy.zeros([y.shape[0], lambdau.size])*scipy.NaN + classid = numpy.zeros([y.shape[0], lambdau.size])*numpy.NaN for i in range(lambdau.size): classid[:, i] = glmnet_softmax(predmat[:,:,i]) classid = classid.reshape([classid.size,1]) yperm = bigY.transpose((0,2,1)) yperm = yperm.reshape([yperm.size, 1]) idx = sub2ind(yperm.shape, range(len(classid)), classid.transpose()) - cvraw = scipy.reshape(1 - yperm[idx], [-1, lambdau.size]); + cvraw = numpy.reshape(1 - yperm[idx], [-1, lambdau.size]) if grouped == True: cvob = cvcompute(cvraw, weights, foldid, nlams) @@ -92,7 +92,7 @@ def cvmultnet(fit, \ cvm = wtmean(cvraw, weights) sqccv = (cvraw - cvm)**2 - cvsd = scipy.sqrt(wtmean(sqccv, weights)/(N-1)) + cvsd = numpy.sqrt(wtmean(sqccv, weights)/(N-1)) result = dict() result['cvm'] = cvm @@ -115,16 +115,16 @@ def sub2ind(array_shape, rows, cols): #========================= def glmnet_softmax(x): d = x.shape - nas = scipy.any(scipy.isnan(x), axis = 1) - if scipy.any(nas): - pclass = scipy.zeros([d[0], 1])*scipy.NaN - if scipy.sum(nas) < d[0]: + nas = numpy.any(numpy.isnan(x), axis = 1) + if numpy.any(nas): + pclass = numpy.zeros([d[0], 1])*numpy.NaN + if numpy.sum(nas) < d[0]: pclass2 = glmnet_softmax(x[~nas, :]) pclass[~nas] = pclass2 result = pclass else: maxdist = x[:, 1] - pclass = scipy.ones([d[0], 1]) + pclass = numpy.ones([d[0], 1]) for i in range(1, d[1], 1): t = x[:, i] > maxdist pclass[t] = i diff --git a/glmnet_python/elnet.py b/glmnet_python/elnet.py index 0f462cd..4756e1a 100644 --- a/glmnet_python/elnet.py +++ b/glmnet_python/elnet.py @@ -5,7 +5,7 @@ # import packages/methods -import scipy +import numpy import ctypes from loadGlmLib import loadGlmLib @@ -17,14 +17,14 @@ def elnet(x, is_sparse, irs, pcs, y, weights, offset, gtype, parm, lempty, glmlib = loadGlmLib() # pre-process data - ybar = scipy.dot(scipy.transpose(y), weights) + ybar = numpy.dot(numpy.transpose(y), weights) ybar = ybar/sum(weights) nulldev = (y - ybar)**2 * weights # ka lst = ['covariance', 'naive'] ka = [i for i in range(len(lst)) if lst[i] == gtype] if len(ka) == 0: - raise ValueError('unrecognized type for ka'); + raise ValueError('unrecognized type for ka') else: ka = ka[0] + 1 # convert from 0-based to 1-based index for fortran # offset @@ -44,15 +44,15 @@ def elnet(x, is_sparse, irs, pcs, y, weights, offset, gtype, parm, lempty, ###################################### # force inputs into fortran order and into the correct scipy datatype copyFlag = False - x = x.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - irs = irs.astype(dtype = scipy.int32, order = 'F', copy = copyFlag) - pcs = pcs.astype(dtype = scipy.int32, order = 'F', copy = copyFlag) - y = y.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - weights = weights.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - jd = jd.astype(dtype = scipy.int32, order = 'F', copy = copyFlag) - vp = vp.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - cl = cl.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - ulam = ulam.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) + x = x.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + irs = irs.astype(dtype = numpy.int32, order = 'F', copy = copyFlag) + pcs = pcs.astype(dtype = numpy.int32, order = 'F', copy = copyFlag) + y = y.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + weights = weights.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + jd = jd.astype(dtype = numpy.int32, order = 'F', copy = copyFlag) + vp = vp.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + cl = cl.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + ulam = ulam.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) ###################################### # --------- ALLOCATE OUTPUTS --------- @@ -61,28 +61,28 @@ def elnet(x, is_sparse, irs, pcs, y, weights, offset, gtype, parm, lempty, lmu = -1 lmu_r = ctypes.c_int(lmu) # a0 - a0 = scipy.zeros([nlam], dtype = scipy.float64) - a0 = a0.astype(dtype = scipy.float64, order = 'F', copy = False) + a0 = numpy.zeros([nlam], dtype = numpy.float64) + a0 = a0.astype(dtype = numpy.float64, order = 'F', copy = False) a0_r = a0.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # ca - ca = scipy.zeros([nx, nlam], dtype = scipy.float64) - ca = ca.astype(dtype = scipy.float64, order = 'F', copy = False) + ca = numpy.zeros([nx, nlam], dtype = numpy.float64) + ca = ca.astype(dtype = numpy.float64, order = 'F', copy = False) ca_r = ca.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # ia - ia = -1*scipy.ones([nx], dtype = scipy.int32) - ia = ia.astype(dtype = scipy.int32, order = 'F', copy = False) + ia = -1*numpy.ones([nx], dtype = numpy.int32) + ia = ia.astype(dtype = numpy.int32, order = 'F', copy = False) ia_r = ia.ctypes.data_as(ctypes.POINTER(ctypes.c_int)) # nin - nin = -1*scipy.ones([nlam], dtype = scipy.int32) - nin = nin.astype(dtype = scipy.int32, order = 'F', copy = False) + nin = -1*numpy.ones([nlam], dtype = numpy.int32) + nin = nin.astype(dtype = numpy.int32, order = 'F', copy = False) nin_r = nin.ctypes.data_as(ctypes.POINTER(ctypes.c_int)) # rsq - rsq = -1*scipy.ones([nlam], dtype = scipy.float64) - rsq = rsq.astype(dtype = scipy.float64, order = 'F', copy = False) + rsq = -1*numpy.ones([nlam], dtype = numpy.float64) + rsq = rsq.astype(dtype = numpy.float64, order = 'F', copy = False) rsq_r = rsq.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # alm - alm = -1*scipy.ones([nlam], dtype = scipy.float64) - alm = alm.astype(dtype = scipy.float64, order = 'F', copy = False) + alm = -1*numpy.ones([nlam], dtype = numpy.float64) + alm = alm.astype(dtype = numpy.float64, order = 'F', copy = False) alm_r = alm.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # nlp nlp = -1 @@ -186,21 +186,21 @@ def elnet(x, is_sparse, irs, pcs, y, weights, offset, gtype, parm, lempty, ninmax = max(nin) # fix first value of alm (from inf to correct value) if lempty: - t1 = scipy.log(alm[1]) - t2 = scipy.log(alm[2]) - alm[0] = scipy.exp(2*t1 - t2) + t1 = numpy.log(alm[1]) + t2 = numpy.log(alm[2]) + alm[0] = numpy.exp(2*t1 - t2) # create return fit dictionary if ninmax > 0: ca = ca[0:ninmax, :] - df = scipy.sum(scipy.absolute(ca) > 0, axis=0) + df = numpy.sum(numpy.absolute(ca) > 0, axis=0) ja = ia[0:ninmax] - 1 # ia is 1-indexed in fortran - oja = scipy.argsort(ja) + oja = numpy.argsort(ja) ja1 = ja[oja] - beta = scipy.zeros([nvars, lmu], dtype = scipy.float64) + beta = numpy.zeros([nvars, lmu], dtype = numpy.float64) beta[ja1, :] = ca[oja, :] else: - beta = scipy.zeros([nvars, lmu], dtype = scipy.float64) - df = scipy.zeros([1, lmu], dtype = scipy.float64) + beta = numpy.zeros([nvars, lmu], dtype = numpy.float64) + df = numpy.zeros([1, lmu], dtype = numpy.float64) fit = dict() fit['a0'] = a0 @@ -211,7 +211,7 @@ def elnet(x, is_sparse, irs, pcs, y, weights, offset, gtype, parm, lempty, fit['lambdau'] = alm fit['npasses'] = nlp_r.value fit['jerr'] = jerr_r.value - fit['dim'] = scipy.array([nvars, lmu], dtype = scipy.integer) + fit['dim'] = numpy.array([nvars, lmu], dtype = numpy.integer) fit['offset'] = is_offset fit['class'] = 'elnet' diff --git a/glmnet_python/fishnet.py b/glmnet_python/fishnet.py index 7b022ab..e0a3724 100644 --- a/glmnet_python/fishnet.py +++ b/glmnet_python/fishnet.py @@ -3,7 +3,7 @@ Internal function called by glmnet. See also glmnet, cvglmnet """ # import packages/methods -import scipy +import numpy import ctypes from loadGlmLib import loadGlmLib @@ -14,7 +14,7 @@ def fishnet(x, is_sparse, irs, pcs, y, weights, offset, parm, # load shared fortran library glmlib = loadGlmLib() - if scipy.any( y < 0): + if numpy.any( y < 0): raise ValueError('negative responses not permitted for Poisson family') if len(offset) == 0: @@ -30,16 +30,16 @@ def fishnet(x, is_sparse, irs, pcs, y, weights, offset, parm, ###################################### # force inputs into fortran order and scipy float64 copyFlag = False - x = x.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - irs = irs.astype(dtype = scipy.int32, order = 'F', copy = copyFlag) - pcs = pcs.astype(dtype = scipy.int32, order = 'F', copy = copyFlag) - y = y.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - weights = weights.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - offset = offset.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - jd = jd.astype(dtype = scipy.int32, order = 'F', copy = copyFlag) - vp = vp.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - cl = cl.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - ulam = ulam.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) + x = x.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + irs = irs.astype(dtype = numpy.int32, order = 'F', copy = copyFlag) + pcs = pcs.astype(dtype = numpy.int32, order = 'F', copy = copyFlag) + y = y.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + weights = weights.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + offset = offset.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + jd = jd.astype(dtype = numpy.int32, order = 'F', copy = copyFlag) + vp = vp.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + cl = cl.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + ulam = ulam.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) ###################################### # --------- ALLOCATE OUTPUTS --------- @@ -48,28 +48,28 @@ def fishnet(x, is_sparse, irs, pcs, y, weights, offset, parm, lmu = -1 lmu_r = ctypes.c_int(lmu) # a0 - a0 = scipy.zeros([nlam], dtype = scipy.float64) - a0 = a0.astype(dtype = scipy.float64, order = 'F', copy = False) + a0 = numpy.zeros([nlam], dtype = numpy.float64) + a0 = a0.astype(dtype = numpy.float64, order = 'F', copy = False) a0_r = a0.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # ca - ca = scipy.zeros([nx, nlam], dtype = scipy.float64) - ca = ca.astype(dtype = scipy.float64, order = 'F', copy = False) + ca = numpy.zeros([nx, nlam], dtype = numpy.float64) + ca = ca.astype(dtype = numpy.float64, order = 'F', copy = False) ca_r = ca.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # ia - ia = -1*scipy.ones([nx], dtype = scipy.int32) - ia = ia.astype(dtype = scipy.int32, order = 'F', copy = False) + ia = -1*numpy.ones([nx], dtype = numpy.int32) + ia = ia.astype(dtype = numpy.int32, order = 'F', copy = False) ia_r = ia.ctypes.data_as(ctypes.POINTER(ctypes.c_int)) # nin - nin = -1*scipy.ones([nlam], dtype = scipy.int32) - nin = nin.astype(dtype = scipy.int32, order = 'F', copy = False) + nin = -1*numpy.ones([nlam], dtype = numpy.int32) + nin = nin.astype(dtype = numpy.int32, order = 'F', copy = False) nin_r = nin.ctypes.data_as(ctypes.POINTER(ctypes.c_int)) # dev - dev = -1*scipy.ones([nlam], dtype = scipy.float64) - dev = dev.astype(dtype = scipy.float64, order = 'F', copy = False) + dev = -1*numpy.ones([nlam], dtype = numpy.float64) + dev = dev.astype(dtype = numpy.float64, order = 'F', copy = False) dev_r = dev.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # alm - alm = -1*scipy.ones([nlam], dtype = scipy.float64) - alm = alm.astype(dtype = scipy.float64, order = 'F', copy = False) + alm = -1*numpy.ones([nlam], dtype = numpy.float64) + alm = alm.astype(dtype = numpy.float64, order = 'F', copy = False) alm_r = alm.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # nlp nlp = -1 @@ -176,22 +176,22 @@ def fishnet(x, is_sparse, irs, pcs, y, weights, offset, parm, ninmax = max(nin) # fix first value of alm (from inf to correct value) if ulam[0] == 0.0: - t1 = scipy.log(alm[1]) - t2 = scipy.log(alm[2]) - alm[0] = scipy.exp(2*t1 - t2) + t1 = numpy.log(alm[1]) + t2 = numpy.log(alm[2]) + alm[0] = numpy.exp(2*t1 - t2) # create return fit dictionary - dd = scipy.array([nvars, lmu], dtype = scipy.integer) + dd = numpy.array([nvars, lmu], dtype = numpy.integer) if ninmax > 0: ca = ca[0:ninmax, :] - df = scipy.sum(scipy.absolute(ca) > 0, axis = 0) + df = numpy.sum(numpy.absolute(ca) > 0, axis = 0) ja = ia[0:ninmax] - 1 # ia is 1-indexed in fortran - oja = scipy.argsort(ja) + oja = numpy.argsort(ja) ja1 = ja[oja] - beta = scipy.zeros([nvars, lmu], dtype = scipy.float64) + beta = numpy.zeros([nvars, lmu], dtype = numpy.float64) beta[ja1, :] = ca[oja, :] else: - beta = scipy.zeros([nvars, lmu], dtype = scipy.float64) - df = scipy.zeros([1, lmu], dtype = scipy.float64) + beta = numpy.zeros([nvars, lmu], dtype = numpy.float64) + df = numpy.zeros([1, lmu], dtype = numpy.float64) fit = dict() fit['a0'] = a0 diff --git a/glmnet_python/glmnet.py b/glmnet_python/glmnet.py index 5d46e1e..db75f05 100644 --- a/glmnet_python/glmnet.py +++ b/glmnet_python/glmnet.py @@ -87,59 +87,59 @@ class Type of regression - internal usage EXAMPLES: -------- # Gaussian - x = scipy.random.rand(100, 10) - y = scipy.random.rand(100, 1) + x = numpy.random.rand(100, 10) + y = numpy.random.rand(100, 1) fit = glmnet(x = x, y = y) fit = glmnet(x = x, y = y, alpha = 0.5) glmnetPrint(fit) - glmnetPredict(fit, scipy.empty([0]), scipy.array([0.01]), 'coef') # extract coefficients at a single value of lambdau - glmnetPredict(fit, x[0:10,:], scipy.array([0.01, 0.005])) # make predictions + glmnetPredict(fit, numpy.empty([0]), numpy.array([0.01]), 'coef') # extract coefficients at a single value of lambdau + glmnetPredict(fit, x[0:10,:], numpy.array([0.01, 0.005])) # make predictions # Multivariate Gaussian: - x = scipy.random.rand(100, 10) - y = scipy.random.rand(100,3) + x = numpy.random.rand(100, 10) + y = numpy.random.rand(100,3) fit = glmnet(x, y, 'mgaussian') glmnetPlot(fit, 'norm', False, '2norm') # Binomial - x = scipy.random.rand(100, 10) - y = scipy.random.rand(100,1) + x = numpy.random.rand(100, 10) + y = numpy.random.rand(100,1) y = (y > 0.5)*1.0 fit = glmnet(x = x, y = y, family = 'binomial', alpha = 0.5) # Multinomial - x = scipy.random.rand(100,10) - y = scipy.random.rand(100,1) + x = numpy.random.rand(100,10) + y = numpy.random.rand(100,1) y[y < 0.3] = 1.0 y[y < 0.6] = 2.0 y[y < 1.0] = 3.0 fit = glmnet(x = x, y = y, family = 'multinomial', mtype = 'grouped') # poisson - x = scipy.random.rand(100,10) - y = scipy.random.poisson(size = [100, 1])*1.0 + x = numpy.random.rand(100,10) + y = numpy.random.poisson(size = [100, 1])*1.0 fit = glmnet(x = x, y = y, family = 'poisson') # cox N = 1000; p = 30; nzc = p/3; - x = scipy.random.normal(size = [N, p]) - beta = scipy.random.normal(size = [nzc, 1]) - fx = scipy.dot(x[:, 0:nzc], beta/3) - hx = scipy.exp(fx) - ty = scipy.random.exponential(scale = 1/hx, size = [N, 1]) - tcens = scipy.random.binomial(1, 0.3, size = [N, 1]) + x = numpy.random.normal(size = [N, p]) + beta = numpy.random.normal(size = [nzc, 1]) + fx = numpy.dot(x[:, 0:nzc], beta/3) + hx = numpy.exp(fx) + ty = numpy.random.exponential(scale = 1/hx, size = [N, 1]) + tcens = numpy.random.binomial(1, 0.3, size = [N, 1]) tcens = 1 - tcens - y = scipy.column_stack((ty, tcens)) + y = numpy.column_stack((ty, tcens)) fit = glmnet(x = x.copy(), y = y.copy(), family = 'cox') glmnetPlot(fit) # sparse example N = 1000000; - x = scipy.random.normal(size = [N,10]) + x = numpy.random.normal(size = [N,10]) x[x < 3.0] = 0.0 - xs = scipy.sparse.csc_matrix(x, dtype = scipy.float64) - y = scipy.random.binomial(1, 0.5, size =[N,1]) + xs = numpy.sparse.csc_matrix(x, dtype = numpy.float64) + y = numpy.random.binomial(1, 0.5, size =[N,1]) y = y*1.0 st = time.time() fit = glmnet.glmnet(x = xs, y = y, family = 'binomial') @@ -236,7 +236,8 @@ class Type of regression - internal usage # import packages/methods from glmnetSet import glmnetSet from glmnetControl import glmnetControl -import scipy +import numpy +import scipy from elnet import elnet from lognet import lognet from coxnet import coxnet @@ -248,21 +249,21 @@ def glmnet(*, x, y, family='gaussian', **options): # check inputs: make sure x and y are scipy, float64 arrays # fortran order is not checked as we force a convert later if not( isinstance(x, scipy.sparse.csc.csc_matrix) ): - if not( isinstance(x, scipy.ndarray) and x.dtype == 'float64'): + if not( isinstance(x, numpy.ndarray) and x.dtype == 'float64'): raise ValueError('x input must be a scipy float64 ndarray') else: if not (x.dtype == 'float64'): raise ValueError('x input must be a float64 array') - if not( isinstance(y, scipy.ndarray) and y.dtype == 'float64'): + if not( isinstance(y, numpy.ndarray) and y.dtype == 'float64'): raise ValueError('y input must be a scipy float64 ndarray') # create options if options is None: - options = glmnetSet(); + options = glmnetSet() ## match the family, abbreviation allowed - fambase = ['gaussian','binomial','poisson','multinomial','cox','mgaussian']; + fambase = ['gaussian','binomial','poisson','multinomial','cox','mgaussian'] # find index of family in fambase indxtf = [x.startswith(family.lower()) for x in fambase] # find index of family in fambase famind = [i for i in range(len(indxtf)) if indxtf[i] == True] @@ -279,27 +280,27 @@ def glmnet(*, x, y, family='gaussian', **options): #print(options) ## error check options parameters - alpha = scipy.float64(options['alpha']) + alpha = numpy.float64(options['alpha']) if alpha > 1.0 : print('Warning: alpha > 1.0; setting to 1.0') - options['alpha'] = scipy.float64(1.0) + options['alpha'] = numpy.float64(1.0) if alpha < 0.0 : print('Warning: alpha < 0.0; setting to 0.0') - options['alpha'] = scipy.float64(0.0) + options['alpha'] = numpy.float64(0.0) - parm = scipy.float64(options['alpha']) - nlam = scipy.int32(options['nlambda']) + parm = numpy.float64(options['alpha']) + nlam = numpy.int32(options['nlambda']) nobs, nvars = x.shape # check weights length weights = options['weights'].copy() if len(weights) == 0: - weights = scipy.ones([nobs, 1], dtype = scipy.float64) + weights = numpy.ones([nobs, 1], dtype = numpy.float64) elif len(weights) != nobs: raise ValueError('Error: Number of elements in ''weights'' not equal to number of rows of ''x''') # check if weights are scipy nd array - if not( isinstance(weights, scipy.ndarray) and weights.dtype == 'float64'): + if not( isinstance(weights, numpy.ndarray) and weights.dtype == 'float64'): raise ValueError('weights input must be a scipy float64 ndarray') # check y length @@ -321,18 +322,18 @@ def glmnet(*, x, y, family='gaussian', **options): exclude = options['exclude'] # TBD: test this if not (len(exclude) == 0): - exclude = scipy.unique(exclude) - if scipy.any(exclude < 0) or scipy.any(exclude >= nvars): + exclude = numpy.unique(exclude) + if numpy.any(exclude < 0) or numpy.any(exclude >= nvars): raise ValueError('Error: Some excluded variables are out of range') else: - jd = scipy.append(len(exclude), exclude + 1) # indices are 1-based in fortran + jd = numpy.append(len(exclude), exclude + 1) # indices are 1-based in fortran else: - jd = scipy.zeros([1,1], dtype = scipy.integer) + jd = numpy.zeros([1,1], dtype = numpy.integer) # check vp vp = options['penalty_factor'] if len(vp) == 0: - vp = scipy.ones([1, nvars]) + vp = numpy.ones([1, nvars]) # inparms inparms = glmnetControl() @@ -345,12 +346,12 @@ def glmnet(*, x, y, family='gaussian', **options): if any(cl[1,:] < 0): raise ValueError('Error: The lower bound on cl must be non-negative') - cl[0, cl[0, :] == scipy.float64('-inf')] = -1.0*inparms['big'] - cl[1, cl[1, :] == scipy.float64('inf')] = 1.0*inparms['big'] + cl[0, cl[0, :] == numpy.float64('-inf')] = -1.0*inparms['big'] + cl[1, cl[1, :] == numpy.float64('inf')] = 1.0*inparms['big'] if cl.shape[1] < nvars: if cl.shape[1] == 1: - cl = cl*scipy.ones([1, nvars]) + cl = cl*numpy.ones([1, nvars]) else: raise ValueError('Error: Require length 1 or nvars lower and upper limits') else: @@ -358,7 +359,7 @@ def glmnet(*, x, y, family='gaussian', **options): exit_rec = 0 - if scipy.any(cl == 0.0): + if numpy.any(cl == 0.0): fdev = inparms['fdev'] if fdev != 0: optset = dict() @@ -366,12 +367,12 @@ def glmnet(*, x, y, family='gaussian', **options): glmnetControl(optset) exit_rec = 1 - isd = scipy.int32(options['standardize']) - intr = scipy.int32(options['intr']) + isd = numpy.int32(options['standardize']) + intr = numpy.int32(options['intr']) if (intr == True) and (family == 'cox'): print('Warning: Cox model has no intercept!') - jsd = scipy.int32(options['standardize_resp']) + jsd = numpy.int32(options['standardize_resp']) thresh = options['thresh'] lambdau = options['lambdau'] lambda_min = options['lambda_min'] @@ -387,16 +388,16 @@ def glmnet(*, x, y, family='gaussian', **options): if (lambda_min >= 1): raise ValueError('ERROR: lambda_min should be less than 1') flmin = lambda_min - ulam = scipy.zeros([1,1], dtype = scipy.float64) + ulam = numpy.zeros([1,1], dtype = numpy.float64) else: flmin = 1.0 if any(lambdau < 0): raise ValueError('ERROR: lambdas should be non-negative') - ulam = -scipy.sort(-lambdau) # reverse sort + ulam = -numpy.sort(-lambdau) # reverse sort nlam = lambdau.size - maxit = scipy.int32(options['maxit']) + maxit = numpy.int32(options['maxit']) gtype = options['gtype'] if len(gtype) == 0: if (nvars < 500): @@ -429,15 +430,15 @@ def glmnet(*, x, y, family='gaussian', **options): is_sparse = False if scipy.sparse.issparse(x): is_sparse = True - tx = scipy.sparse.csc_matrix(x, dtype = scipy.float64) + tx = scipy.sparse.csc_matrix(x, dtype = numpy.float64) x = tx.data; x = x.reshape([len(x), 1]) irs = tx.indices + 1 pcs = tx.indptr + 1 - irs = scipy.reshape(irs, [len(irs),]) - pcs = scipy.reshape(pcs, [len(pcs),]) + irs = numpy.reshape(irs, [len(irs),]) + pcs = numpy.reshape(pcs, [len(pcs),]) else: - irs = scipy.empty([0]) - pcs = scipy.empty([0]) + irs = numpy.empty([0]) + pcs = numpy.empty([0]) if scipy.sparse.issparse(y): y = y.todense() @@ -467,7 +468,7 @@ def glmnet(*, x, y, family='gaussian', **options): # call fishnet fit = fishnet(x, is_sparse, irs, pcs, y, weights, offset, parm, nobs, nvars, jd, vp, cl, ne, nx, nlam, flmin, ulam, - thresh, isd, intr, maxit, family); + thresh, isd, intr, maxit, family) else: raise ValueError('calling a family of fits that has not been implemented yet') diff --git a/glmnet_python/glmnetCoef.py b/glmnet_python/glmnetCoef.py index 574552a..e577c2e 100644 --- a/glmnet_python/glmnetCoef.py +++ b/glmnet_python/glmnetCoef.py @@ -14,8 +14,8 @@ Fewer input arguments (more often) are allowed in the call, but must come in the order listed above. To set default values on the way, use - scipy.empty([0]). - For example, ncoef = glmnetCoef(fit,scipy.empty([0]),False). + numpy.empty([0]). + For example, ncoef = glmnetCoef(fit,numpy.empty([0]),False). INPUT ARGUMENTS: obj Fitted "glmnet" model object. @@ -61,14 +61,14 @@ glmnet, glmnetPrint, glmnetPredict, and cvglmnet. EXAMPLES: - x = scipy.random.rand(100,20); - y = scipy.random.rand(100,1); + x = numpy.random.rand(100,20); + y = numpy.random.rand(100,1); fit = glmnet(x = x.copy(),y = y.copy()); - ncoef = glmnetCoef(fit,scipy.array([0.01, 0.001])); + ncoef = glmnetCoef(fit,numpy.array([0.01, 0.001])); """ -import scipy +import numpy from glmnetPredict import glmnetPredict def glmnetCoef(obj, s = None, exact = False): @@ -79,7 +79,7 @@ def glmnetCoef(obj, s = None, exact = False): if exact and len(s) > 0: raise NotImplementedError('exact = True not implemented in glmnetCoef') - result = glmnetPredict(obj, scipy.empty([0]), s, 'coefficients') + result = glmnetPredict(obj, numpy.empty([0]), s, 'coefficients') return(result) diff --git a/glmnet_python/glmnetControl.py b/glmnet_python/glmnetControl.py index fae3b60..2f79dcf 100644 --- a/glmnet_python/glmnetControl.py +++ b/glmnet_python/glmnetControl.py @@ -70,26 +70,26 @@ """ def glmnetControl(pars = None): - import scipy + import numpy # default options - ivals = dict(); - ivals["fdev"] = scipy.float64(1e-5) - ivals["devmax"] = scipy.float64(0.999) - ivals["eps"] = scipy.float64(1e-6) - ivals["big"] = scipy.float64(9.9e35) - ivals["mnlam"] = scipy.float64(5) - ivals["pmin"] = scipy.float64(1e-5) - ivals["exmx"] = scipy.float64(250) - ivals["prec"] = scipy.float64(1e-10) - ivals["mxit"] = scipy.float64(100) + ivals = dict() + ivals["fdev"] = numpy.float64(1e-5) + ivals["devmax"] = numpy.float64(0.999) + ivals["eps"] = numpy.float64(1e-6) + ivals["big"] = numpy.float64(9.9e35) + ivals["mnlam"] = numpy.float64(5) + ivals["pmin"] = numpy.float64(1e-5) + ivals["exmx"] = numpy.float64(250) + ivals["prec"] = numpy.float64(1e-10) + ivals["mxit"] = numpy.float64(100) # quick return if no user opts if pars == None: return ivals # if options are passed in by user, update options with values from opts - parsInIvals = set(pars.keys()) - set(ivals.keys()); + parsInIvals = set(pars.keys()) - set(ivals.keys()) if len(parsInIvals) > 0: # assert 'opts' keys are subsets of 'options' keys raise ValueError('attempting to set glmnet controls that are not known to glmnetControl') else: diff --git a/glmnet_python/glmnetPlot.py b/glmnet_python/glmnetPlot.py index 76b9437..a1dcc9d 100644 --- a/glmnet_python/glmnetPlot.py +++ b/glmnet_python/glmnetPlot.py @@ -59,10 +59,10 @@ EXAMPLES: import matplotlib.pyplot as plt - scipy.random.seed(1) - x=scipy.random.normal(size = (100,20)) - y=scipy.random.normal(size = (100,1)) - g4=scipy.random.choice(4,size = (100,1))*1.0 + numpy.random.seed(1) + x=numpy.random.normal(size = (100,20)) + y=numpy.random.normal(size = (100,1)) + g4=numpy.random.choice(4,size = (100,1))*1.0 fit1=glmnet(x = x.copy(),y = y.copy()) glmnetPlot(fit1) plt.figure() @@ -71,7 +71,7 @@ plt.figure() glmnetPlot(fit3) """ -import scipy +import numpy def glmnetPlot(x, xvar = 'norm', label = False, ptype = 'coef', **options): @@ -93,7 +93,7 @@ def glmnetPlot(x, xvar = 'norm', label = False, ptype = 'coef', **options): for i in range(len(beta)): which = nonzeroCoef(beta[i]) nzbeta[i] = beta[i][which, :] - norm = norm + scipy.sum(scipy.absolute(nzbeta[i]), axis = 0) + norm = norm + numpy.sum(numpy.absolute(nzbeta[i]), axis = 0) else: norm = 0 @@ -114,19 +114,19 @@ def glmnetPlot(x, xvar = 'norm', label = False, ptype = 'coef', **options): if i < ncl - 1: plt.figure() else: - dfseq = scipy.round_(scipy.mean(x['dfmat'], axis = 0)) + dfseq = numpy.round_(numpy.mean(x['dfmat'], axis = 0)) coefnorm = beta[1]*0 for i in range(len(beta)): - coefnorm = coefnorm + scipy.absolute(beta[i])**2 - coefnorm = scipy.sqrt(coefnorm) + coefnorm = coefnorm + numpy.absolute(beta[i])**2 + coefnorm = numpy.sqrt(coefnorm) if x['class'] == 'multnet': mstr = 'Coefficient 2Norms' handle = plotCoef(coefnorm, norm, x['lambdau'], dfseq, x['dev'], - label, xvar, '',mstr, **options); + label, xvar, '',mstr, **options) else: mstr = 'Coefficient 2Norms' handle = plotCoef(coefnorm, norm, x['lambdau'], x['dfmat'][0,:], x['dev'], - label, xvar, '', mstr, **options); + label, xvar, '', mstr, **options) return(handle) # end of glmnetplot @@ -146,11 +146,11 @@ def getFromList(xvar, xvarbase, errMsg): # end of getFromList() # ========================================= def nonzeroCoef(beta, bystep = False): - result = scipy.absolute(beta) > 0 + result = numpy.absolute(beta) > 0 if len(result.shape) == 1: - result = scipy.reshape(result, [result.shape[0], 1]) + result = numpy.reshape(result, [result.shape[0], 1]) if not bystep: - result = scipy.any(result, axis = 1) + result = numpy.any(result, axis = 1) return(result) # end of nonzeroCoef() @@ -169,12 +169,12 @@ def plotCoef(beta, norm, lambdau, df, dev, label, xvar, xlab, ylab, **options): beta = beta[which, :] if xvar == 'norm': if len(norm) == 0: - index = scipy.sum(scipy.absolute(beta), axis = 0) + index = numpy.sum(numpy.absolute(beta), axis = 0) else: index = norm iname = 'L1 Norm' elif xvar == 'lambda': - index = scipy.log(lambdau) + index = numpy.log(lambdau) iname = 'Log Lambda' elif xvar == 'dev': index = dev @@ -189,7 +189,7 @@ def plotCoef(beta, norm, lambdau, df, dev, label, xvar, xlab, ylab, **options): ax1 = plt.gca() # plot x vs y - beta = scipy.transpose(beta) + beta = numpy.transpose(beta) ax1.plot(index, beta, **options) ax2 = ax1.twiny() @@ -199,7 +199,7 @@ def plotCoef(beta, norm, lambdau, df, dev, label, xvar, xlab, ylab, **options): ylim1 = ax1.get_ylim() atdf = ax1.get_xticks() - indat = scipy.ones(atdf.shape, dtype = scipy.integer) + indat = numpy.ones(atdf.shape, dtype = numpy.integer) if index[-1] >= index[1]: for j in range(len(index)-1, -1, -1): indat[atdf <= index[j]] = j diff --git a/glmnet_python/glmnetPredict.py b/glmnet_python/glmnetPredict.py index 9b732a9..7441bb1 100644 --- a/glmnet_python/glmnetPredict.py +++ b/glmnet_python/glmnetPredict.py @@ -56,7 +56,7 @@ DETAILS: The shape of the objects returned are different for "multinomial" objects. glmnetCoef(fit, ...) is equivalent to - glmnetPredict(fit,scipy.empty([]),scipy.empty([]),'coefficients"). + glmnetPredict(fit,numpy.empty([]),numpy.empty([]),'coefficients"). LICENSE: GPL-2 @@ -86,32 +86,33 @@ EXAMPLES: - x = scipy.random.normal(size = [100,20]) - y = scipy.random.normal(size = [100,1]) - g2 = scipy.random.choice(2, size = [100, 1])*1.0 # must be float64 - g4 = scipy.random.choice(4, size = [100, 1])*1.0 # must be float64 + x = numpy.random.normal(size = [100,20]) + y = numpy.random.normal(size = [100,1]) + g2 = numpy.random.choice(2, size = [100, 1])*1.0 # must be float64 + g4 = numpy.random.choice(4, size = [100, 1])*1.0 # must be float64 fit1 = glmnet(x = x.copy(),y = y.copy()); - print( glmnetPredict(fit1,x[0:5,:],scipy.array([0.01,0.005])) ) - print( glmnetPredict(fit1, scipy.empty([0]), scipy.empty([0]), 'coefficients') ) + print( glmnetPredict(fit1,x[0:5,:],numpy.array([0.01,0.005])) ) + print( glmnetPredict(fit1, numpy.empty([0]), numpy.empty([0]), 'coefficients') ) fit2 = glmnet(x = x.copy(), y = g2.copy(), family = 'binomial'); - print(glmnetPredict(fit2, x[2:5,:],scipy.empty([0]), 'response')) - print(glmnetPredict(fit2, scipy.empty([0]), scipy.empty([0]), 'nonzero')) + print(glmnetPredict(fit2, x[2:5,:],numpy.empty([0]), 'response')) + print(glmnetPredict(fit2, numpy.empty([0]), numpy.empty([0]), 'nonzero')) fit3 = glmnet(x = x.copy(), y = g4.copy(), family = 'multinomial'); - print(glmnetPredict(fit3, x[0:3,:], scipy.array([0.01, 0.5]), 'response')) + print(glmnetPredict(fit3, x[0:3,:], numpy.array([0.01, 0.5]), 'response')) """ -import scipy +import numpy +import scipy import scipy.interpolate def glmnetPredict(fit,\ - newx = scipy.empty([0]), \ - s = scipy.empty([0]), \ + newx = numpy.empty([0]), \ + s = numpy.empty([0]), \ ptype = 'link', \ exact = False, \ - offset = scipy.empty([0])): + offset = numpy.empty([0])): typebase = ['link', 'response', 'coefficients', 'nonzero', 'class'] indxtf = [x.startswith(ptype.lower()) for x in typebase] @@ -136,7 +137,7 @@ def glmnetPredict(fit,\ raise NotImplementedError('exact = True option is not implemented in python') # we convert newx to full here since sparse and full operations do not seem to - # be overloaded completely in scipy. + # be overloaded completely in numpy. if scipy.sparse.issparse(newx): newx = newx.todense() @@ -145,15 +146,15 @@ def glmnetPredict(fit,\ if fit['class'] == 'lognet': a0 = fit['a0'] else: - a0 = scipy.transpose(fit['a0']) + a0 = numpy.transpose(fit['a0']) - a0 = scipy.reshape(a0, [1, a0.size]) # convert to 1 x N for appending - nbeta = scipy.row_stack( (a0, fit['beta']) ) - if scipy.size(s) > 0: + a0 = numpy.reshape(a0, [1, a0.size]) # convert to 1 x N for appending + nbeta = numpy.row_stack( (a0, fit['beta']) ) + if numpy.size(s) > 0: lambdau = fit['lambdau'] lamlist = lambda_interp(lambdau, s) - nbeta = nbeta[:, lamlist['left']]*scipy.tile(scipy.transpose(lamlist['frac']), [nbeta.shape[0], 1]) \ - + nbeta[:, lamlist['right']]*( 1 - scipy.tile(scipy.transpose(lamlist['frac']), [nbeta.shape[0], 1])) + nbeta = nbeta[:, lamlist['left']]*numpy.tile(numpy.transpose(lamlist['frac']), [nbeta.shape[0], 1]) \ + + nbeta[:, lamlist['right']]*( 1 - numpy.tile(numpy.transpose(lamlist['frac']), [nbeta.shape[0], 1])) if ptype == 'coefficients': result = nbeta @@ -162,8 +163,8 @@ def glmnetPredict(fit,\ if ptype == 'nonzero': result = nonzeroCoef(nbeta[1:nbeta.shape[0], :], True) return(result) - # use scipy.sparse.hstack instead of column_stack for sparse matrices - result = scipy.dot(scipy.column_stack( (scipy.ones([newx.shape[0], 1]) , newx) ) , nbeta) + # use numpy.sparse.hstack instead of column_stack for sparse matrices + result = numpy.dot(numpy.column_stack( (numpy.ones([newx.shape[0], 1]) , newx) ) , nbeta) if fit['offset']: if len(offset) == 0: @@ -171,16 +172,16 @@ def glmnetPredict(fit,\ if offset.shape[1] == 2: offset = offset[:, 1] - result = result + scipy.tile(offset, [1, result.shape[1]]) + result = result + numpy.tile(offset, [1, result.shape[1]]) # fishnet if fit['class'] == 'fishnet' and ptype == 'response': - result = scipy.exp(result) + result = numpy.exp(result) # lognet if fit['class'] == 'lognet': if ptype == 'response': - pp = scipy.exp(-result) + pp = numpy.exp(-result) result = 1/(1 + pp) elif ptype == 'class': result = (result > 0)*1 + (result <= 0)*0 @@ -202,13 +203,13 @@ def glmnetPredict(fit,\ lambdau = fit['lambdau'] lamlist = lambda_interp(lambdau, s) for i in range(nclass): - kbeta = scipy.row_stack( (a0[i, :], nbeta[i]) ) - kbeta = kbeta[:, lamlist['left']]*scipy.tile(scipy.transpose(lamlist['frac']), [kbeta.shape[0], 1]) \ - + kbeta[:, lamlist['right']]*( 1 - scipy.tile(scipy.transpose(lamlist['frac']), [kbeta.shape[0], 1])) + kbeta = numpy.row_stack( (a0[i, :], nbeta[i]) ) + kbeta = kbeta[:, lamlist['left']]*numpy.tile(numpy.transpose(lamlist['frac']), [kbeta.shape[0], 1]) \ + + kbeta[:, lamlist['right']]*( 1 - numpy.tile(numpy.transpose(lamlist['frac']), [kbeta.shape[0], 1])) nbeta[i] = kbeta else: for i in range(nclass): - nbeta[i] = scipy.row_stack( (a0[i, :], nbeta[i]) ) + nbeta[i] = numpy.row_stack( (a0[i, :], nbeta[i]) ) nlambda = len(fit['lambdau']) if ptype == 'coefficients': @@ -228,33 +229,33 @@ def glmnetPredict(fit,\ return(result) npred = newx.shape[0] - dp = scipy.zeros([nclass, nlambda, npred], dtype = scipy.float64) + dp = numpy.zeros([nclass, nlambda, npred], dtype = numpy.float64) for i in range(nclass): - qq = scipy.column_stack( (scipy.ones([newx.shape[0], 1]), newx) ) - fitk = scipy.dot( qq, nbeta[i] ) - dp[i, :, :] = dp[i, :, :] + scipy.reshape(scipy.transpose(fitk), [1, nlambda, npred]) + qq = numpy.column_stack( (numpy.ones([newx.shape[0], 1]), newx) ) + fitk = numpy.dot( qq, nbeta[i] ) + dp[i, :, :] = dp[i, :, :] + numpy.reshape(numpy.transpose(fitk), [1, nlambda, npred]) if fit['offset']: if len(offset) == 0: raise ValueError('No offset provided for prediction, yet used in fit of glmnet') if offset.shape[1] != nclass: raise ValueError('Offset should be dimension %d x %d' % (npred, nclass)) - toff = scipy.transpose(offset) + toff = numpy.transpose(offset) for i in range(nlambda): dp[:, i, :] = dp[:, i, :] + toff if ptype == 'response': - pp = scipy.exp(dp) - psum = scipy.sum(pp, axis = 0, keepdims = True) - result = scipy.transpose(pp/scipy.tile(psum, [nclass, 1, 1]), [2, 0, 1]) + pp = numpy.exp(dp) + psum = numpy.sum(pp, axis = 0, keepdims = True) + result = numpy.transpose(pp/numpy.tile(psum, [nclass, 1, 1]), [2, 0, 1]) if ptype == 'link': - result = scipy.transpose(dp, [2, 0, 1]) + result = numpy.transpose(dp, [2, 0, 1]) if ptype == 'class': - dp = scipy.transpose(dp, [2, 0, 1]) + dp = numpy.transpose(dp, [2, 0, 1]) result = list() for i in range(dp.shape[2]): t = softmax(dp[:, :, i]) - result = scipy.append(result, fit['label'][t['pclass']]) + result = numpy.append(result, fit['label'][t['pclass']]) # coxnet if fit['class'] == 'coxnet': @@ -262,8 +263,8 @@ def glmnetPredict(fit,\ if len(s) > 0: lambdau = fit['lambdau'] lamlist = lambda_interp(lambdau, s) - nbeta = nbeta[:, lamlist['left']]*scipy.tile(scipy.transpose(lamlist['frac']), [nbeta.shape[0], 1]) \ - + nbeta[:, lamlist['right']]*( 1 - scipy.tile(scipy.transpose(lamlist['frac']), [nbeta.shape[0], 1])) + nbeta = nbeta[:, lamlist['left']]*numpy.tile(numpy.transpose(lamlist['frac']), [nbeta.shape[0], 1]) \ + + nbeta[:, lamlist['right']]*( 1 - numpy.tile(numpy.transpose(lamlist['frac']), [nbeta.shape[0], 1])) if ptype == 'coefficients': result = nbeta @@ -273,16 +274,16 @@ def glmnetPredict(fit,\ result = nonzeroCoef(nbeta, True) return(result) - result = scipy.dot(newx, nbeta) + result = numpy.dot(newx, nbeta) if fit['offset']: if len(offset) == 0: raise ValueError('No offset provided for prediction, yet used in fit of glmnet') - result = result + scipy.tile(offset, [1, result.shape[1]]) + result = result + numpy.tile(offset, [1, result.shape[1]]) if ptype == 'response': - result = scipy.exp(result) + result = numpy.exp(result) return(result) @@ -302,18 +303,18 @@ def lambda_interp(lambdau, s): # sfrac*left+(1-sfrac*right) if len(lambdau) == 1: nums = len(s) - left = scipy.zeros([nums, 1], dtype = scipy.integer) + left = numpy.zeros([nums, 1], dtype = numpy.integer) right = left - sfrac = scipy.zeros([nums, 1], dtype = scipy.float64) + sfrac = numpy.zeros([nums, 1], dtype = numpy.float64) else: - s[s > scipy.amax(lambdau)] = scipy.amax(lambdau) - s[s < scipy.amin(lambdau)] = scipy.amin(lambdau) + s[s > numpy.amax(lambdau)] = numpy.amax(lambdau) + s[s < numpy.amin(lambdau)] = numpy.amin(lambdau) k = len(lambdau) sfrac = (lambdau[0] - s)/(lambdau[0] - lambdau[k - 1]) lambdau = (lambdau[0] - lambdau)/(lambdau[0] - lambdau[k - 1]) coord = scipy.interpolate.interp1d(lambdau, range(k))(sfrac) - left = scipy.floor(coord).astype(scipy.integer, copy = False) - right = scipy.ceil(coord).astype(scipy.integer, copy = False) + left = numpy.floor(coord).astype(numpy.integer, copy = False) + right = numpy.ceil(coord).astype(numpy.integer, copy = False) # tf = left != right sfrac[tf] = (sfrac[tf] - lambdau[right[tf]])/(lambdau[left[tf]] - lambdau[right[tf]]) @@ -334,14 +335,14 @@ def lambda_interp(lambdau, s): def softmax(x, gap = False): d = x.shape maxdist = x[:, 0] - pclass = scipy.zeros([d[0], 1], dtype = scipy.integer) + pclass = numpy.zeros([d[0], 1], dtype = numpy.integer) for i in range(1, d[1], 1): l = x[:, i] > maxdist pclass[l] = i maxdist[l] = x[l, i] if gap == True: - x = scipy.absolute(maxdist - x) - x[0:d[0], pclass] = x*scipy.ones([d[1], d[1]]) + x = numpy.absolute(maxdist - x) + x[0:d[0], pclass] = x*numpy.ones([d[1], d[1]]) #gaps = pmin(x)# not sure what this means; gap is never called with True raise ValueError('gap = True is not implemented yet') @@ -351,15 +352,15 @@ def softmax(x, gap = False): #result['gaps'] = gaps raise ValueError('gap = True is not implemented yet') else: - result['pclass'] = pclass; + result['pclass'] = pclass return(result) # end of softmax # ========================================= def nonzeroCoef(beta, bystep = False): - result = scipy.absolute(beta) > 0 + result = numpy.absolute(beta) > 0 if not bystep: - result = scipy.any(result, axis = 1) + result = numpy.any(result, axis = 1) return(result) # end of nonzeroCoef # ========================================= diff --git a/glmnet_python/glmnetSet.py b/glmnet_python/glmnetSet.py index 9f7b9ca..5456395 100644 --- a/glmnet_python/glmnetSet.py +++ b/glmnet_python/glmnetSet.py @@ -29,8 +29,8 @@ # given values for these parameters. options = glmnetSet( alpha = 0.1, \ intr = False, \ - maxit = scipy.int32(1e6), \ - offset = scipy.empty([0]) ) + maxit = numpy.int32(1e6), \ + offset = numpy.empty([0]) ) # same as previous case, except we pass in a # dict() object instead opts = dict(); opts['alpha'] = 0.5; @@ -40,17 +40,17 @@ Parameter Default value Description .................................................................. -alpha +alpha The elasticnet mixing parameter, with 0 < alpha <= 1. The penalty is defined as (1-alpha)/2(||beta||_2)^2+alpha||beta||_1. Default is alpha = 1, which is the lasso penalty; Currently alpha = 0 the ridge penalty. -nlambda +nlambda The number of lambda values - default is -lambdau +lambdau A user supplied lambda sequence. Typical usage is to have the program compute its own lambda sequence based on nlambda and lambda_min. Supplying a value of @@ -70,7 +70,7 @@ details below for y standardization with family='gaussian'. -weights +weights Observation weights. Can be total counts if responses are proportion matrices. Default is 1 for each observation. @@ -79,7 +79,7 @@ Should intercept(s) be fitted (default=true) or set to zero (false). -offset +offset A vector of length nobs that is included in the linear predictor (a nobs x nc matrix for the "multinomial" family). Useful for the "poisson" @@ -88,7 +88,7 @@ supplied, then values must also be supplied to the predict function. -lambda_min +lambda_min Smallest value for lambda, as a fraction of lambda_max, the (data derived) entry value (i.e., the smallest value for which all coefficients are zero). @@ -101,28 +101,28 @@ and glmnet will exit gracefully when the percentage deviance explained is almost 1. -thresh +thresh Convergence threshold for coordinate descent. Each inner coordinate-descent loop continues until the maximum change in the objective after any coefficient update is less than thresh times the null deviance. Defaults value is 1E-4. -dfmax +dfmax Limit the maximum number of variables in the model. Useful for very large nvars, if a partial path is desired. Default is nvars + 1. -pmax +pmax Limit the maximum number of variables ever to be nonzero. Default is min(dfmax * 2 + 20, nvars). -exclude +exclude Indices of variables to be excluded from the model. Default is none. Equivalent to an infinite penalty factor (next item). -penalty_factor +penalty_factor Separate penalty factors can be applied to each coefficient. This is a number that multiplies lambda to allow differential shrinkage. Can be 0 for some @@ -133,11 +133,11 @@ factors are internally rescaled to sum to nvars, and the lambda sequence will reflect this change. -maxit +maxit Maximum number of passes over the data for all lambda values; default is 10^5. -cl +cl Two-row matrix with the first row being the lower limits for each coefficient and the second the upper limits. Can be presented as a single column (which @@ -187,25 +187,25 @@ """ def glmnetSet(opts = None): - import scipy + import numpy # default options options = { - "weights" : scipy.empty([0]), - "offset" : scipy.empty([0]), - "alpha" : scipy.float64(1.0), - "nlambda" : scipy.int32(100), - "lambda_min" : scipy.empty([0]), - "lambdau" : scipy.empty([0]), + "weights" : numpy.empty([0]), + "offset" : numpy.empty([0]), + "alpha" : numpy.float64(1.0), + "nlambda" : numpy.int32(100), + "lambda_min" : numpy.empty([0]), + "lambdau" : numpy.empty([0]), "standardize" : True, "intr" : True, - "thresh" : scipy.float64(1e-7), - "dfmax" : scipy.empty([0]), - "pmax" : scipy.empty([0]), - "exclude" : scipy.empty([0], dtype = scipy.integer), - "penalty_factor" : scipy.empty([0]), - "cl" : scipy.array([[scipy.float64(-scipy.inf)], [scipy.float64(scipy.inf)]]), - "maxit" : scipy.int32(1e5), + "thresh" : numpy.float64(1e-7), + "dfmax" : numpy.empty([0]), + "pmax" : numpy.empty([0]), + "exclude" : numpy.empty([0], dtype = numpy.integer), + "penalty_factor" : numpy.empty([0]), + "cl" : numpy.array([[numpy.float64(-numpy.inf)], [numpy.float64(numpy.inf)]]), + "maxit" : numpy.int32(1e5), "gtype" : [], "ltype" : 'Newton', "standardize_resp" : False, @@ -219,7 +219,7 @@ def glmnetSet(opts = None): return options # if options are passed in by user, update options with values from opts - optsInOptions = set(opts.keys()) - set(options.keys()); + optsInOptions = set(opts.keys()) - set(options.keys()) if len(optsInOptions) > 0: # assert 'opts' keys are subsets of 'options' keys print(optsInOptions, ' : unknown option for glmnetSet') raise ValueError('attempting to set glmnet options that are not known to glmnetSet') diff --git a/glmnet_python/lognet.py b/glmnet_python/lognet.py index d231ec7..0645fde 100644 --- a/glmnet_python/lognet.py +++ b/glmnet_python/lognet.py @@ -4,7 +4,7 @@ """ # import packages/methods -import scipy +import numpy import ctypes from loadGlmLib import loadGlmLib @@ -26,12 +26,12 @@ def lognet(x, is_sparse, irs, pcs, y, weights, offset, parm, raise ValueError('x and y have different number of rows in call to glmnet') if nc == 1: - classes, sy = scipy.unique(y, return_inverse = True) + classes, sy = numpy.unique(y, return_inverse = True) nc = len(classes) - indexes = scipy.eye(nc, nc) + indexes = numpy.eye(nc, nc) y = indexes[sy, :] else: - classes = scipy.arange(nc) + 1 # 1:nc + classes = numpy.arange(nc) + 1 # 1:nc # if family == 'binomial': if nc > 2: @@ -42,14 +42,14 @@ def lognet(x, is_sparse, irs, pcs, y, weights, offset, parm, # if (len(weights) != 0): t = weights > 0 - if ~scipy.all(t): - t = scipy.reshape(t, (len(y), )) + if ~numpy.all(t): + t = numpy.reshape(t, (len(y), )) y = y[t, :] x = x[t, :] weights = weights[t] - nobs = scipy.sum(t) + nobs = numpy.sum(t) else: - t = scipy.empty([0], dtype = scipy.integer) + t = numpy.empty([0], dtype = numpy.integer) # if len(y.shape) == 1: mv = len(y) @@ -57,7 +57,7 @@ def lognet(x, is_sparse, irs, pcs, y, weights, offset, parm, else: mv, ny = y.shape - y = y*scipy.tile(weights, (1, ny)) + y = y*numpy.tile(weights, (1, ny)) # if len(offset) == 0: @@ -71,7 +71,7 @@ def lognet(x, is_sparse, irs, pcs, y, weights, offset, parm, raise ValueError('offset should have the same number of values as observations in binominal/multinomial call to glmnet') if nc == 1: if do[1] == 1: - offset = scipy.column_stack((offset, -offset), 1) + offset = numpy.column_stack((offset, -offset)) if do[1] > 2: raise ValueError('offset should have 1 or 2 columns in binomial call to glmnet') if (family == 'multinomial') and (do[1] != nc): @@ -85,16 +85,16 @@ def lognet(x, is_sparse, irs, pcs, y, weights, offset, parm, ###################################### # force inputs into fortran order and scipy float64 copyFlag = False - x = x.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - irs = irs.astype(dtype = scipy.int32, order = 'F', copy = copyFlag) - pcs = pcs.astype(dtype = scipy.int32, order = 'F', copy = copyFlag) - y = y.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - weights = weights.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - offset = offset.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - jd = jd.astype(dtype = scipy.int32, order = 'F', copy = copyFlag) - vp = vp.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - cl = cl.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - ulam = ulam.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) + x = x.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + irs = irs.astype(dtype = numpy.int32, order = 'F', copy = copyFlag) + pcs = pcs.astype(dtype = numpy.int32, order = 'F', copy = copyFlag) + y = y.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + weights = weights.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + offset = offset.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + jd = jd.astype(dtype = numpy.int32, order = 'F', copy = copyFlag) + vp = vp.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + cl = cl.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + ulam = ulam.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) ###################################### # --------- ALLOCATE OUTPUTS --------- @@ -104,32 +104,32 @@ def lognet(x, is_sparse, irs, pcs, y, weights, offset, parm, lmu_r = ctypes.c_int(lmu) # a0, ca if nc == 1: - a0 = scipy.zeros([nlam], dtype = scipy.float64) - ca = scipy.zeros([nx, nlam], dtype = scipy.float64) + a0 = numpy.zeros([nlam], dtype = numpy.float64) + ca = numpy.zeros([nx, nlam], dtype = numpy.float64) else: - a0 = scipy.zeros([nc, nlam], dtype = scipy.float64) - ca = scipy.zeros([nx, nc, nlam], dtype = scipy.float64) + a0 = numpy.zeros([nc, nlam], dtype = numpy.float64) + ca = numpy.zeros([nx, nc, nlam], dtype = numpy.float64) # a0 - a0 = a0.astype(dtype = scipy.float64, order = 'F', copy = False) + a0 = a0.astype(dtype = numpy.float64, order = 'F', copy = False) a0_r = a0.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # ca - ca = ca.astype(dtype = scipy.float64, order = 'F', copy = False) + ca = ca.astype(dtype = numpy.float64, order = 'F', copy = False) ca_r = ca.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # ia - ia = -1*scipy.ones([nx], dtype = scipy.int32) - ia = ia.astype(dtype = scipy.int32, order = 'F', copy = False) + ia = -1*numpy.ones([nx], dtype = numpy.int32) + ia = ia.astype(dtype = numpy.int32, order = 'F', copy = False) ia_r = ia.ctypes.data_as(ctypes.POINTER(ctypes.c_int)) # nin - nin = -1*scipy.ones([nlam], dtype = scipy.int32) - nin = nin.astype(dtype = scipy.int32, order = 'F', copy = False) + nin = -1*numpy.ones([nlam], dtype = numpy.int32) + nin = nin.astype(dtype = numpy.int32, order = 'F', copy = False) nin_r = nin.ctypes.data_as(ctypes.POINTER(ctypes.c_int)) # dev - dev = -1*scipy.ones([nlam], dtype = scipy.float64) - dev = dev.astype(dtype = scipy.float64, order = 'F', copy = False) + dev = -1*numpy.ones([nlam], dtype = numpy.float64) + dev = dev.astype(dtype = numpy.float64, order = 'F', copy = False) dev_r = dev.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # alm - alm = -1*scipy.ones([nlam], dtype = scipy.float64) - alm = alm.astype(dtype = scipy.float64, order = 'F', copy = False) + alm = -1*numpy.ones([nlam], dtype = numpy.float64) + alm = alm.astype(dtype = numpy.float64, order = 'F', copy = False) alm_r = alm.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # nlp nlp = -1 @@ -243,39 +243,39 @@ def lognet(x, is_sparse, irs, pcs, y, weights, offset, parm, ninmax = max(nin) # fix first value of alm (from inf to correct value) if ulam[0] == 0.0: - t1 = scipy.log(alm[1]) - t2 = scipy.log(alm[2]) - alm[0] = scipy.exp(2*t1 - t2) + t1 = numpy.log(alm[1]) + t2 = numpy.log(alm[2]) + alm[0] = numpy.exp(2*t1 - t2) # create return fit dictionary if family == 'multinomial': - a0 = a0 - scipy.tile(scipy.mean(a0), (nc, 1)) + a0 = a0 - numpy.tile(numpy.mean(a0), (nc, 1)) dfmat = a0.copy() - dd = scipy.array([nvars, lmu], dtype = scipy.integer) + dd = numpy.array([nvars, lmu], dtype = numpy.integer) beta_list = list() if ninmax > 0: # TODO: is the reshape here done right? - ca = scipy.reshape(ca, (nx, nc, lmu)) + ca = numpy.reshape(ca, (nx, nc, lmu)) ca = ca[0:ninmax, :, :] ja = ia[0:ninmax] - 1 # ia is 1-indexed in fortran - oja = scipy.argsort(ja) + oja = numpy.argsort(ja) ja1 = ja[oja] - df = scipy.any(scipy.absolute(ca) > 0, axis=1) - df = scipy.sum(df) - df = scipy.reshape(df, (1, df.size)) + df = numpy.any(numpy.absolute(ca) > 0, axis=1) + df = numpy.sum(df) + df = numpy.reshape(df, (1, df.size)) for k in range(0, nc): - ca1 = scipy.reshape(ca[:,k,:], (ninmax, lmu)) + ca1 = numpy.reshape(ca[:,k,:], (ninmax, lmu)) cak = ca1[oja,:] - dfmat[k, :] = scipy.sum(scipy.absolute(cak) > 0, axis = 0) - beta = scipy.zeros([nvars, lmu], dtype = scipy.float64) + dfmat[k, :] = numpy.sum(numpy.absolute(cak) > 0, axis = 0) + beta = numpy.zeros([nvars, lmu], dtype = numpy.float64) beta[ja1, :] = cak beta_list.append(beta) else: for k in range(0, nc): - dfmat[k, :] = scipy.zeros([1, lmu], dtype = scipy.float64) - beta_list.append(scipy.zeros([nvars, lmu], dtype = scipy.float64)) + dfmat[k, :] = numpy.zeros([1, lmu], dtype = numpy.float64) + beta_list.append(numpy.zeros([nvars, lmu], dtype = numpy.float64)) # - df = scipy.zeros([1, lmu], dtype = scipy.float64) + df = numpy.zeros([1, lmu], dtype = numpy.float64) # if kopt == 2: grouped = True @@ -298,18 +298,18 @@ def lognet(x, is_sparse, irs, pcs, y, weights, offset, parm, fit['offset'] = is_offset fit['class'] = 'multnet' else: - dd = scipy.array([nvars, lmu], dtype = scipy.integer) + dd = numpy.array([nvars, lmu], dtype = numpy.integer) if ninmax > 0: - ca = ca[0:ninmax,:]; - df = scipy.sum(scipy.absolute(ca) > 0, axis = 0); - ja = ia[0:ninmax] - 1; # ia is 1-indexes in fortran - oja = scipy.argsort(ja) + ca = ca[0:ninmax,:] + df = numpy.sum(numpy.absolute(ca) > 0, axis = 0) + ja = ia[0:ninmax] - 1 # ia is 1-indexes in fortran + oja = numpy.argsort(ja) ja1 = ja[oja] - beta = scipy.zeros([nvars, lmu], dtype = scipy.float64); - beta[ja1, :] = ca[oja, :]; + beta = numpy.zeros([nvars, lmu], dtype = numpy.float64) + beta[ja1, :] = ca[oja, :] else: - beta = scipy.zeros([nvars,lmu], dtype = scipy.float64); - df = scipy.zeros([1,lmu], dtype = scipy.float64); + beta = numpy.zeros([nvars,lmu], dtype = numpy.float64) + df = numpy.zeros([1,lmu], dtype = numpy.float64) # fit = dict() fit['a0'] = a0 diff --git a/glmnet_python/mrelnet.py b/glmnet_python/mrelnet.py index 3613e49..5e4b473 100644 --- a/glmnet_python/mrelnet.py +++ b/glmnet_python/mrelnet.py @@ -4,7 +4,7 @@ """ # import packages/methods -import scipy +import numpy import ctypes from wtmean import wtmean from loadGlmLib import loadGlmLib @@ -19,9 +19,9 @@ def mrelnet(x, is_sparse, irs, pcs, y, weights, offset, parm, # nr = y.shape[1] wym = wtmean(y, weights) - wym = scipy.reshape(wym, (1, wym.size)) - yt2 = (y - scipy.tile(wym, (y.shape[0], 1)))**2 - nulldev = scipy.sum(wtmean(yt2,weights)*scipy.sum(weights)) + wym = numpy.reshape(wym, (1, wym.size)) + yt2 = (y - numpy.tile(wym, (y.shape[0], 1)))**2 + nulldev = numpy.sum(wtmean(yt2,weights)*numpy.sum(weights)) if len(offset) == 0: offset = y*0 @@ -39,15 +39,15 @@ def mrelnet(x, is_sparse, irs, pcs, y, weights, offset, parm, ###################################### # force inputs into fortran order and scipy float64 copyFlag = False - x = x.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - irs = irs.astype(dtype = scipy.int32, order = 'F', copy = copyFlag) - pcs = pcs.astype(dtype = scipy.int32, order = 'F', copy = copyFlag) - y = y.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - weights = weights.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - jd = jd.astype(dtype = scipy.int32, order = 'F', copy = copyFlag) - vp = vp.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - cl = cl.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) - ulam = ulam.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) + x = x.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + irs = irs.astype(dtype = numpy.int32, order = 'F', copy = copyFlag) + pcs = pcs.astype(dtype = numpy.int32, order = 'F', copy = copyFlag) + y = y.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + weights = weights.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + jd = jd.astype(dtype = numpy.int32, order = 'F', copy = copyFlag) + vp = vp.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + cl = cl.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) + ulam = ulam.astype(dtype = numpy.float64, order = 'F', copy = copyFlag) ###################################### # --------- ALLOCATE OUTPUTS --------- @@ -56,28 +56,28 @@ def mrelnet(x, is_sparse, irs, pcs, y, weights, offset, parm, lmu = -1 lmu_r = ctypes.c_int(lmu) # a0 - a0 = scipy.zeros([nr, nlam], dtype = scipy.float64) - a0 = a0.astype(dtype = scipy.float64, order = 'F', copy = False) + a0 = numpy.zeros([nr, nlam], dtype = numpy.float64) + a0 = a0.astype(dtype = numpy.float64, order = 'F', copy = False) a0_r = a0.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # ca - ca = scipy.zeros([nx, nr, nlam], dtype = scipy.float64) - ca = ca.astype(dtype = scipy.float64, order = 'F', copy = False) + ca = numpy.zeros([nx, nr, nlam], dtype = numpy.float64) + ca = ca.astype(dtype = numpy.float64, order = 'F', copy = False) ca_r = ca.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # ia - ia = -1*scipy.ones([nx], dtype = scipy.int32) - ia = ia.astype(dtype = scipy.int32, order = 'F', copy = False) + ia = -1*numpy.ones([nx], dtype = numpy.int32) + ia = ia.astype(dtype = numpy.int32, order = 'F', copy = False) ia_r = ia.ctypes.data_as(ctypes.POINTER(ctypes.c_int)) # nin - nin = -1*scipy.ones([nlam], dtype = scipy.int32) - nin = nin.astype(dtype = scipy.int32, order = 'F', copy = False) + nin = -1*numpy.ones([nlam], dtype = numpy.int32) + nin = nin.astype(dtype = numpy.int32, order = 'F', copy = False) nin_r = nin.ctypes.data_as(ctypes.POINTER(ctypes.c_int)) # rsq - rsq = -1*scipy.ones([nlam], dtype = scipy.float64) - rsq = rsq.astype(dtype = scipy.float64, order = 'F', copy = False) + rsq = -1*numpy.ones([nlam], dtype = numpy.float64) + rsq = rsq.astype(dtype = numpy.float64, order = 'F', copy = False) rsq_r = rsq.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # alm - alm = -1*scipy.ones([nlam], dtype = scipy.float64) - alm = alm.astype(dtype = scipy.float64, order = 'F', copy = False) + alm = -1*numpy.ones([nlam], dtype = numpy.float64) + alm = alm.astype(dtype = numpy.float64, order = 'F', copy = False) alm_r = alm.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # nlp nlp = -1 @@ -182,54 +182,54 @@ def mrelnet(x, is_sparse, irs, pcs, y, weights, offset, parm, ninmax = max(nin) # fix first value of alm (from inf to correct value) if ulam[0] == 0.0: - t1 = scipy.log(alm[1]) - t2 = scipy.log(alm[2]) - alm[0] = scipy.exp(2*t1 - t2) + t1 = numpy.log(alm[1]) + t2 = numpy.log(alm[2]) + alm[0] = numpy.exp(2*t1 - t2) # create return fit dictionary if nr > 1: dfmat = a0.copy() - dd = scipy.array([nvars, lmu], dtype = scipy.integer) + dd = numpy.array([nvars, lmu], dtype = numpy.integer) beta_list = list() if ninmax > 0: # TODO: is the reshape here done right? - ca = scipy.reshape(ca, (nx, nr, lmu)) + ca = numpy.reshape(ca, (nx, nr, lmu)) ca = ca[0:ninmax, :, :] ja = ia[0:ninmax] - 1 # ia is 1-indexed in fortran - oja = scipy.argsort(ja) + oja = numpy.argsort(ja) ja1 = ja[oja] - df = scipy.any(scipy.absolute(ca) > 0, axis=1) - df = scipy.sum(df, axis = 0) - df = scipy.reshape(df, (1, df.size)) + df = numpy.any(numpy.absolute(ca) > 0, axis=1) + df = numpy.sum(df, axis = 0) + df = numpy.reshape(df, (1, df.size)) for k in range(0, nr): - ca1 = scipy.reshape(ca[:,k,:], (ninmax, lmu)) + ca1 = numpy.reshape(ca[:,k,:], (ninmax, lmu)) cak = ca1[oja,:] - dfmat[k, :] = scipy.sum(scipy.absolute(cak) > 0, axis = 0) - beta = scipy.zeros([nvars, lmu], dtype = scipy.float64) + dfmat[k, :] = numpy.sum(numpy.absolute(cak) > 0, axis = 0) + beta = numpy.zeros([nvars, lmu], dtype = numpy.float64) beta[ja1, :] = cak beta_list.append(beta) else: for k in range(0, nr): - dfmat[k, :] = scipy.zeros([1, lmu], dtype = scipy.float64) - beta_list.append(scipy.zeros([nvars, lmu], dtype = scipy.float64)) + dfmat[k, :] = numpy.zeros([1, lmu], dtype = numpy.float64) + beta_list.append(numpy.zeros([nvars, lmu], dtype = numpy.float64)) # - df = scipy.zeros([1, lmu], dtype = scipy.float64) + df = numpy.zeros([1, lmu], dtype = numpy.float64) # fit = dict() fit['beta'] = beta_list fit['dfmat']= dfmat else: - dd = scipy.array([nvars, lmu], dtype = scipy.integer) + dd = numpy.array([nvars, lmu], dtype = numpy.integer) if ninmax > 0: - ca = ca[0:ninmax,:]; - df = scipy.sum(scipy.absolute(ca) > 0, axis = 0); - ja = ia[0:ninmax] - 1; # ia is 1-indexes in fortran - oja = scipy.argsort(ja) + ca = ca[0:ninmax,:] + df = numpy.sum(numpy.absolute(ca) > 0, axis = 0) + ja = ia[0:ninmax] - 1 # ia is 1-indexes in fortran + oja = numpy.argsort(ja) ja1 = ja[oja] - beta = scipy.zeros([nvars, lmu], dtype = scipy.float64); - beta[ja1, :] = ca[oja, :]; + beta = numpy.zeros([nvars, lmu], dtype = numpy.float64) + beta[ja1, :] = ca[oja, :] else: - beta = scipy.zeros([nvars,lmu], dtype = scipy.float64); - df = scipy.zeros([1,lmu], dtype = scipy.float64); + beta = numpy.zeros([nvars,lmu], dtype = numpy.float64) + df = numpy.zeros([1,lmu], dtype = numpy.float64) fit['beta'] = beta fit['a0'] = a0 diff --git a/glmnet_python/wtmean.py b/glmnet_python/wtmean.py index c3c84aa..0727fbf 100644 --- a/glmnet_python/wtmean.py +++ b/glmnet_python/wtmean.py @@ -13,25 +13,25 @@ returns nan-removed weighted mean as a 1D array of size K """ -import scipy +import numpy def wtmean(mat,weights): if len(weights.shape) == 1: - weights = scipy.reshape(weights, [scipy.size(weights), 1]) + weights = numpy.reshape(weights, [numpy.size(weights), 1]) wmat = isfinite(mat)*weights mat[isnan(mat)] = 0 swmat = mat*wmat tf = weights != 0 tf = tf[:,0] - y = scipy.sum(swmat[tf, :], axis = 0)/scipy.sum(wmat, axis = 0) + y = numpy.sum(swmat[tf, :], axis = 0)/numpy.sum(wmat, axis = 0) return y # end of wtmean def isnan(x): - return ~scipy.isfinite(x) + return ~numpy.isfinite(x) # end of isnan def isfinite(x): - return scipy.isfinite(x) + return numpy.isfinite(x) # end of isfinite diff --git a/setup.py b/setup.py index 47bf59e..cc8855a 100644 --- a/setup.py +++ b/setup.py @@ -1,21 +1,53 @@ import os, sys -from setuptools import setup, find_packages -# from numpy.distutils.core import setup, Extension +from setuptools import setup, Extension +from setuptools.command.build_ext import build_ext +import subprocess -cmd = 'gfortran ./glmnet_python/GLMnet.f -fPIC -fdefault-real-8 -shared -o ./glmnet_python/GLMnet.so' -os.system(cmd) +class GfortranExtension(Extension): + def __init__(self, name, sourcedir='', input='', output=''): + Extension.__init__(self, name, sources=[]) + self.sourcedir = os.path.abspath(sourcedir) + self.input = os.path.join(self.sourcedir, input) + self.output = os.path.join(self.sourcedir, output) + +class GfortranBuild(build_ext): + def run(self): + try: + out = subprocess.check_output(['gfortran', '--version']) + except OSError: + raise RuntimeError("gfortran must be installed to build the following extensions: " + + ", ".join(e.name for e in self.extensions)) + + for ext in self.extensions: + self.build_extension(ext) + + def build_extension(self, ext): + gfortran_args = ['-fPIC', + '-fdefault-real-8', + '-shared', + '-o', + ext.output] + + if not os.path.exists(self.build_temp): + os.makedirs(self.build_temp) + + env = os.environ.copy() + subprocess.check_call(['gfortran', ext.input] + gfortran_args, cwd=self.build_temp, env=env) + + extdir = os.path.join(os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))), + os.path.basename(ext.sourcedir)) + subprocess.check_call(['install', ext.output, extdir], cwd=self.build_temp, env=env) setup(name='glmnet_python', - version = '0.2.0', + version = '1.0.2', description = 'Python version of glmnet, from Stanford University', long_description=open('README.md').read(), - url="https://github.com/bbalasub1/glmnet_python", - author = 'Trevor Hastie, Balakumar B.J.', - author_email = 'bbalasub@gmail.com', + url="https://github.com/johnlees/glmnet_python", + author = 'Han Fang (modified by John Lees)', + author_email = 'hanfang.cshl@gmail.com,john@johnlees.me', license = 'GPL-2', packages=['glmnet_python'], install_requires=['joblib>=0.10.3'], - package_data={'glmnet_python': ['*.so', 'glmnet_python/*.so']}, classifiers=[ 'Development Status :: 4 - Beta', 'Intended Audience :: Science/Research', @@ -26,4 +58,9 @@ 'Operating System :: Unix', ], keywords='glm glmnet ridge lasso elasticnet', - ) + ext_modules=[GfortranExtension('GLMnet', 'glmnet_python', + 'GLMnet.f', 'GLMnet.so')], + cmdclass={'build_ext': GfortranBuild}, + package_data={'glmnet_python': ['*.so', 'glmnet_python/*.so']}, + zip_safe=False +)