Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
55959a5
cross validation of MLDataset Pipeline
Oct 24, 2017
396f9aa
changes with CV sampling
Oct 26, 2017
33bac56
changes to cv_cache
Oct 26, 2017
b422e68
closer to working cross validation for MLDataset
Oct 26, 2017
d45d4e1
CV / xarray experimentation - work in progress
Oct 31, 2017
92054c9
MLDataset cross validation working for pipeline of 1 step that is uns…
Nov 1, 2017
35450c1
wrapped sklearn classes need to wrap score methods as fit, predict, o…
Nov 1, 2017
f86a079
update tests;fix cross validation with most data structures
Nov 3, 2017
5cf646f
a couple tests for Python 2.7
Nov 3, 2017
744109a
avoid dask-searchcv test in conda.recipe;add test_config.yml to MANIF…
Nov 3, 2017
1e7bec8
remove print statement
Nov 3, 2017
83437f5
ensure test_config.yaml included in pkg
Nov 3, 2017
de9efd0
remove elm.mldataset.cross_validation - modify environment.yml for el…
Nov 3, 2017
6267041
fix usage of is_arr utility to separate X, y tuple
Nov 3, 2017
66013e6
1850 passing tests
Nov 4, 2017
a91caf6
dask-searchcv in meta.yaml
Nov 4, 2017
e9b5d85
use elm/label/dev and elm for CI installs
Nov 4, 2017
f6ef7c8
change earthio version for fixing CI build
Nov 4, 2017
948efe5
ensure EARTHIO_CHANNEL_STR is set correctly in .travis.yml
Nov 6, 2017
edbe1f5
ensure ANACONDA_UPLOAD_USER is defined in .travis for pkg upload
Nov 6, 2017
6304e37
change order of channels to ensure dask-searchcv comes from elm
Nov 6, 2017
8a6d46f
subset the number of tests being run in CI
Nov 6, 2017
21a18d9
better diagnostics on upload failure in CI
Nov 6, 2017
8ad7b4c
remove earthio from CI
Nov 6, 2017
9a1734d
be sure to create env from elm's conda build output
Nov 6, 2017
dc47f65
remove diagnostic print from deploy section
Nov 6, 2017
00ea1be
refactor to simplify changes in dask-searchcv
Nov 8, 2017
7d81830
fix pep8 issues
Nov 8, 2017
cca7b36
move some of dask-searchcv PR 61 changes to Elm
Nov 8, 2017
5018e3e
add cross_validation.py - remove commented code
Nov 8, 2017
acdf244
remove extra whitespace
Nov 8, 2017
431b1aa
changes to avoid needing changes in dask-searchcv
Nov 9, 2017
3f78207
space between functions
Nov 9, 2017
589762c
changes for dask-searchcv PR 65 refit changes
Nov 30, 2017
fbc7db2
Fix NLDAS data exploration notebook
gbrener Dec 1, 2017
1d15f64
Add comments
gbrener Dec 1, 2017
02f8950
Fix typo
gbrener Dec 1, 2017
7be6c74
get rid of unicode literals
Dec 2, 2017
82547dd
Merge branch 'fix_data_expl_notebook' of https://github.com/Continuum…
Dec 2, 2017
8111b40
merge Greg's changes and PR 228
Dec 2, 2017
0b4681a
move Elm-Earthio-NLDAS commit 88047abc80684d0ea0c9d831b7887da082b69c84
Dec 2, 2017
3d2e64e
fixes for reading forcing data and ML ideas in NLDAS notebooks
Dec 2, 2017
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 16 additions & 18 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@ dist: trusty

env:
global:
- EARTHIO_VERSION=master
- EARTHIO_INSTALL_METHOD="conda"
- EARTHIO_TEST_ENV=earth-test-env
- TEST_ENV=earth-test-env
- ELM_EXAMPLE_DATA_PATH=/tmp/elm-data
- EARTHIO_CHANNEL_STR=" -c ioam -c conda-forge -c scitools/label/dev -c bioconda"

- INSTALL_CHANNELS=" -c elm -c elm/label/dev -c ioam -c conda-forge -c scitools/label/dev -c bioconda "
- ANACONDA_UPLOAD_USER=elm
matrix:
- PYTHON=3.6 NUMPY=1.12
- PYTHON=3.5 NUMPY=1.11 TEST_DOCS=1
Expand All @@ -25,12 +23,12 @@ before_install:

install:
- MAKE_MINICONDA=1 ./build_elm_env.sh
- pushd docs
- ~/miniconda/bin/conda env create -f environment.yml -n ${EARTHIO_TEST_ENV}-docs
- source ~/miniconda/bin/activate ${EARTHIO_TEST_ENV}-docs
#- pushd docs
#- ~/miniconda/bin/conda env create -f environment.yml -n ${TEST_ENV}-docs
#- source ~/miniconda/bin/activate ${TEST_ENV}-docs
# - if [ "$TEST_DOCS" ]; then conda install -c conda-forge -c ioam -c scitools --use-local elm earthio && make html && make doctest; fi
- source deactivate
- popd
#- source deactivate
#- popd

script:
- rm -rf $ELM_EXAMPLE_DATA_PATH/*
Expand All @@ -40,11 +38,11 @@ notifications:
on_failure: always
flowdock: $FD_TOKEN

#deploy:
# - provider: script
# script:
# - ~/miniconda/bin/conda install --name root anaconda-client && ~/miniconda/bin/conda build $EARTHIO_CHANNEL_STR --output --python $PYTHON --numpy $NUMPY conda.recipe | xargs ~/miniconda/bin/conda convert -p all -o _pkgs && find _pkgs -type f -name "*.tar.bz2" -exec ~/miniconda/bin/anaconda --token $ANACONDA_UPLOAD_TOKEN upload --user $ANACONDA_UPLOAD_USER --label dev --force {} \+
# on:
# tags: false
# all_branches: true
# skip_cleanup: true
deploy:
- provider: script
script:
- ~/miniconda/bin/conda install --name root anaconda-client && ~/miniconda/bin/conda build $INSTALL_CHANNELS --output --python $PYTHON --numpy $NUMPY conda.recipe | xargs ~/miniconda/bin/conda convert -p all -o _pkgs && find _pkgs -type f -name "*.tar.bz2" -exec ~/miniconda/bin/anaconda --token $ANACONDA_UPLOAD_TOKEN upload --user $ANACONDA_UPLOAD_USER --label dev --force {} \+
on:
tags: false
all_branches: true
skip_cleanup: true
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
include elm/config/defaults/environment_vars_spec.yaml
include elm/config/defaults/config_standard.yaml
include elm/tests/test_config.yaml
58 changes: 18 additions & 40 deletions build_elm_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,51 +3,29 @@
set -e

export ELM_BUILD_DIR=`pwd -P`
export EARTHIO_VERSION="${EARTHIO_VERSION:-master}"

if [ \( "$EARTHIO_INSTALL_METHOD" = "conda" \) -o \( "$EARTHIO_INSTALL_METHOD" = "git" \) ]; then
rm -rf .earthio_tmp
git clone http://github.com/ContinuumIO/earthio .earthio_tmp
cd .earthio_tmp
git fetch --all
echo git checkout $EARTHIO_VERSION
git checkout $EARTHIO_VERSION

set +e
IGNORE_ELM_DATA_DOWNLOAD=1 . build_earthio_env.sh
set -e
else
if [ ! -d "$HOME/miniconda" ]; then
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
bash miniconda.sh -b -p $HOME/miniconda
fi

if [ ! -d "$HOME/miniconda" ]; then
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
source deactivate
conda config --set always_yes true
conda config --set anaconda_upload no
conda install -n root conda conda-build

# Create $EARTHIO_TEST_ENV
conda env remove -n $EARTHIO_TEST_ENV || true
conda create -n $EARTHIO_TEST_ENV $EARTHIO_CHANNEL_STR -c elm -y python=$PYTHON numpy=$NUMPY earthio

# Add earthio package to index
mkdir -p ~/miniconda/conda-bld/linux-64/
cp -av ~/miniconda/pkgs/earthio*.tar.bz2 ~/miniconda/conda-bld/linux-64/
cd ~/miniconda/conda-bld
conda index
cd -
else
source deactivate
export PATH="$PATH:$(dirname $(which python))"
fi

conda remove -n root elm &> /dev/null || true
pip uninstall -y elm &> /dev/null || true
conda config --set always_yes true
conda config --set anaconda_upload no
conda install -n root conda conda-build

# Create $TEST_ENV
conda env remove -n $TEST_ENV || true

cd $ELM_BUILD_DIR

conda build $EARTHIO_CHANNEL_STR --python $PYTHON --numpy $NUMPY conda.recipe
conda install -n $EARTHIO_TEST_ENV $EARTHIO_CHANNEL_STR --use-local python=$PYTHON numpy=$NUMPY elm
for repo in "dask-glm" "dask-searchcv";do
# TODO improve with packaging later for ^^ dask packages
git clone "https://github.com/dask/${repo}" && cd $repo && python setup.py install;
done
conda remove -n root elm &> /dev/null || true
pip uninstall -y elm &> /dev/null || true

conda build $INSTALL_CHANNELS --python $PYTHON --numpy $NUMPY conda.recipe
conda create -n $TEST_ENV $INSTALL_CHANNELS --use-local python=$PYTHON numpy=$NUMPY elm
set +e
8 changes: 4 additions & 4 deletions conda.recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,20 @@ build:
requirements:
build:
- python
- numpy
- setuptools

run:
- attrs
- deap
- dask
- dask-searchcv
- dill
- distributed
- earthio
- networkx
- numba
- numpy
- pandas
- python
- requests
- scikit-image
- scikit-learn
- scipy
Expand All @@ -46,7 +46,7 @@ test:
imports:
- elm.config
- elm.mldataset
- elm.model_selection
#- elm.model_selection
- elm.pipeline.pipeline
- elm.pipeline.steps
- elm.scripts
Expand Down
2 changes: 1 addition & 1 deletion elm/config/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from __future__ import absolute_import, division, print_function, unicode_literals
from __future__ import absolute_import, division, print_function

'''Module of helpers for building command line interfaces'''
from argparse import ArgumentParser
Expand Down
2 changes: 1 addition & 1 deletion elm/config/config_info.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from __future__ import absolute_import, division, print_function, unicode_literals
from __future__ import absolute_import, division, print_function

'''
This module loads elm/config/defaults/config_standard.yaml which
Expand Down
2 changes: 1 addition & 1 deletion elm/config/dask_settings.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from __future__ import absolute_import, division, print_function, unicode_literals
from __future__ import absolute_import, division, print_function

'''
dask_settings.py is a module of helpers for dask executors
Expand Down
2 changes: 1 addition & 1 deletion elm/config/env.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from __future__ import absolute_import, division, print_function, unicode_literals
from __future__ import absolute_import, division, print_function

'''This module parses environment variables used by elm.

Expand Down
2 changes: 1 addition & 1 deletion elm/config/load_config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from __future__ import absolute_import, division, print_function, unicode_literals
from __future__ import absolute_import, division, print_function

'''
This module is used by the command line interface of elm
Expand Down
2 changes: 1 addition & 1 deletion elm/config/logging_config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from __future__ import absolute_import, division, print_function, unicode_literals
from __future__ import absolute_import, division, print_function

import logging
import os
Expand Down
2 changes: 1 addition & 1 deletion elm/config/tests/fixtures.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from __future__ import absolute_import, division, print_function, unicode_literals
from __future__ import absolute_import, division, print_function

import os

Expand Down
5 changes: 3 additions & 2 deletions elm/config/tests/test_config_simple.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from __future__ import absolute_import, division, print_function, unicode_literals
from __future__ import absolute_import, division, print_function

import copy
import os
Expand Down Expand Up @@ -51,7 +51,7 @@ def tst_bad_config(bad_config):
return ok_config

def test_bad_train_config():

pytest.skip('Deprecated (temporarily) elm.config')
bad_config = copy.deepcopy(DEFAULTS)
name = tuple(bad_config['train'].keys())[0]
for item in NOT_DICT + (None,):
Expand Down Expand Up @@ -82,6 +82,7 @@ def test_bad_train_config():


def test_bad_pipeline():
pytest.skip('Deprecated (temporarily) elm.config')
bad_config = copy.deepcopy(DEFAULTS)
for item in NOT_LIST:
bad_config['run'] = item
Expand Down
2 changes: 1 addition & 1 deletion elm/config/util.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from __future__ import absolute_import, division, print_function, unicode_literals
from __future__ import absolute_import, division, print_function


from pkg_resources import resource_stream, Requirement, resource_filename
Expand Down
1 change: 1 addition & 0 deletions elm/mldataset/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from elm.mldataset.util import is_mldataset
3 changes: 2 additions & 1 deletion elm/mldataset/serialize_mixin.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import (absolute_import, division, print_function, unicode_literals,)
from __future__ import (absolute_import, division, print_function,)
import dill

class SerializeMixin:
'''A mixin for serialization of estimators via dill'''
def dumps(self, protocol=None, byref=None, fmode=None, recurse=None):
Expand Down
45 changes: 45 additions & 0 deletions elm/mldataset/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import numpy as np
import dask.array as da

from collections import Sequence


def is_mldataset(arr, raise_err=False):
try:
from xarray_filters import MLDataset
from xarray import Dataset
except Exception as e:
MLDataset = Dataset = None
if not raise_err:
return False
# Much of the ML logic
# wrapping Xarray would fail
# if only xarray and not Xarray_filters
# is installed, but when xarray_filters
# is installed, xarray.Dataset can be
# used
raise ValueError('Cannot use cross validation for xarray Dataset without xarray_filters')
return MLDataset and Dataset and isinstance(arr, (MLDataset, Dataset))


def is_arr(arr, raise_err=False):
is_ml = is_mldataset(arr, raise_err=raise_err)
_is_arr = is_ml or isinstance(arr, (np.ndarray, da.Array))
if not _is_arr and raise_err:
raise ValueError('Expected MLDataset, Dataset or Dask/Numpy array')
return _is_arr


def _is_xy_tuple(result, typ=tuple):
if typ and not isinstance(typ, tuple):
typ = (typ,)
typ = typ + (tuple,)
return isinstance(result, typ) and len(result) == 2


def _split_transformer_result(X, y, typ=tuple):
if _is_xy_tuple(X, typ=typ):
X, y2 = X
if y2 is not None and y is None:
y = y2
return X, y
Loading