Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions .github/workflows/pytest.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
name: Pytest Suite

on:
pull_request:
branches:
- main

jobs:
test:
strategy:
fail-fast: false
matrix:
include:
# - {"version": "3.6", "os": "ubuntu-20.04"} # EOL: 2021-12-23
- {"version": "3.7", "os": "ubuntu-22.04"}
- {"version": "3.8", "os": "ubuntu-latest"}
- {"version": "3.9", "os": "ubuntu-latest"}
- {"version": "3.10", "os": "ubuntu-latest"}
- {"version": "3.11", "os": "ubuntu-latest"}
- {"version": "3.12", "os": "ubuntu-latest"}
- {"version": "3.13", "os": "ubuntu-latest"}

name: Python ${{ matrix.version }} on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v3
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.version }}
- name: Install dependencies
run: |
pip install .[keras] pytest pytest-md-report
- name: Run tests
uses: dariocurr/pytest-summary@main
with:
options: -v --durations=10 --tb=short
paths: test/


# - name: Run tests
# run: |
# pytest \
# -v \
# --tb=short \
# --durations=10 \
# &> report
# cat report >> $GITHUB_STEP_SUMMARY
16 changes: 8 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -182,14 +182,14 @@ A few notes:
## Implemented converters

#### Scikit-Learn preprocessing
| Model | Implementation | Test | Notes |
| ---------------------- | --------------- | --------- | ----------------------------- |
| `MinMaxScaler` | Available | Available | |
| `StandardScaler` | Available | Available | |
| `QuantileTransformer` | Available | Available | |
| `FunctionTransformer` | Available | Available | Only functions in math.h |
| `ColumnTransformer` | Available | Available | Only integer column indices |
| `Pipeline` | Available | Partial | Pipelines of pipelines break |
| Model | Implementation | Test | Notes |
| ---------------------- | --------------- | --------- |-----------------------------------|
| `MinMaxScaler` | Available | Available | |
| `StandardScaler` | Available | Available | |
| `QuantileTransformer` | Available | Available | |
| `FunctionTransformer` | Available | Available | Supports user-defined C functions |
| `ColumnTransformer` | Available | Available | Only integer column indices |
| `Pipeline` | Available | Partial | Pipelines of pipelines break |

#### Scikit-Learn models
| Model | Implementation | Test | Notes |
Expand Down
55 changes: 29 additions & 26 deletions scikinC/GBDTTraversalConverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from scikinC import BaseConverter
import numpy as np

from scikinC._tools import array2c, retrieve_prior
from scikinC._tools import array2c, retrieve_prior, sklearn_min_version

class GBDTTraversalConverter (BaseConverter):
"""
Expand Down Expand Up @@ -41,9 +41,11 @@ def convert(self, bdt, name=None):

min_, max_=self._get_limits(bdt)

nX = bdt.n_features_in_
nX = bdt.n_features_in_

retvar="FLOAT_T ret[%d]" % n_classes
n_output = max(2, n_classes) if sklearn_min_version("1.0") else n_classes

retvar="FLOAT_T ret[%d]" % n_output
invar="FLOAT_T inp[%d]" % nX
lines += [
"#include <math.h>",
Expand Down Expand Up @@ -100,20 +102,22 @@ def convert(self, bdt, name=None):


for iClass in range (n_classes):
lines.append ( " "
"accumulator[%(iClass)d] += %(learningrate).10f * __%(name)s_traversal ( inp, "
" v%(iTree)03d_%(iClass)02d, t%(iTree)03d_%(iClass)02d, f%(iTree)03d_%(iClass)02d, l%(iTree)03d_%(iClass)02d, r%(iTree)03d_%(iClass)02d ); "
% dict(
learningrate = bdt.learning_rate,
maxlen = len (tree[iClass].tree_.feature),
iClass = iClass,
iTree = iTree,
name = name or "bdt",
value = array2c ([v[0][0] for v in tree[iClass].tree_.value]),
threshold = array2c (threshold, "%.20f"),
feature = array2c (feature, "%.0f"),
left = array2c ([l for l in tree[iClass].tree_.children_left], "%.0f"),
right = array2c ([r for r in tree[iClass].tree_.children_right], "%.0f"),
class_id = 1 if n_classes == 1 else iClass
lines.append ( " "
"accumulator[%(class_id)d] += %(learningrate).10f * __%(name)s_traversal ( inp, "
" v%(iTree)03d_%(iClass)02d, t%(iTree)03d_%(iClass)02d, f%(iTree)03d_%(iClass)02d, l%(iTree)03d_%(iClass)02d, r%(iTree)03d_%(iClass)02d ); "
% dict(
class_id=class_id,
learningrate=bdt.learning_rate,
maxlen=len (tree[iClass].tree_.feature),
iClass=iClass,
iTree=iTree,
name=name or "bdt",
value=array2c ([v[0][0] for v in tree[iClass].tree_.value]),
threshold=array2c (threshold, "%.20f"),
feature=array2c (feature, "%.0f"),
left=array2c ([l for l in tree[iClass].tree_.children_left], "%.0f"),
right=array2c ([r for r in tree[iClass].tree_.children_right], "%.0f"),
))

lines.append (" }")
Expand All @@ -130,20 +134,19 @@ def convert(self, bdt, name=None):
lines.append(" update_%s_tree%03d (acc, inp); " % (name or bdt, iTree))



if n_classes > 1:
if n_output > 1:
lines += [
" short argmax = 0; ",
" for (i = 0; i < %d; ++i) if (acc[i] > acc[argmax]) argmax = i; " % n_classes,
" for (i = 0; i < %d; ++i) if (acc[i] > acc[argmax]) argmax = i; " % n_output,
" if (acc[argmax] > 1e10) { ",
" for (i = 0; i < %d; ++i) ret[i] = (i==argmax ? 1.: 0.); " % n_classes,
" for (i = 0; i < %d; ++i) ret[i] = (i==argmax ? 1.: 0.); " % n_output,
" return ret; ",
" }",
" for (i=0; i < %d; ++i) acc[i] = exp(acc[i]);" % n_classes,
" for (i=0; i < %d; ++i) acc[i] = (acc[i] > 1e300?1e300:acc[i]);" % n_classes,
" for (i=0; i < %d; ++i) acc[i] = exp(acc[i]);" % n_output,
" for (i=0; i < %d; ++i) acc[i] = (acc[i] > 1e300?1e300:acc[i]);" % n_output,
" long double sum = 0;",
" for (i=0; i < %d; ++i) sum += acc[i];" % n_classes,
" for (i=0; i < %d; ++i) acc[i] /= sum;" % n_classes,
" for (i=0; i < %d; ++i) sum += acc[i];" % n_output,
" for (i=0; i < %d; ++i) acc[i] /= sum;" % n_output,
]
else:
lines += [
Expand All @@ -153,7 +156,7 @@ def convert(self, bdt, name=None):


lines += [
" for (i = 0; i < %d; ++i) ret[i] = acc[i];" % n_classes,
" for (i = 0; i < %d; ++i) ret[i] = acc[i];" % n_output,
" return ret;", "}"
]

Expand Down
199 changes: 99 additions & 100 deletions scikinC/GBDTUnrollingConverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,106 +2,105 @@
from scikinC import BaseConverter
import numpy as np

from scikinC._tools import array2c, retrieve_prior


class GBDTUnrollingConverter (BaseConverter):
"""
Converts GradientBoostingClassifiers with explicit conversion of
each tree in C language. Resulting C takes longer to compile, but it is
slightly faster in inference, and does not require pointer algebra.
"""

def _singletree(self, tree, node):
"Single-tree traversal"
if tree.feature[node] >= 0:
return "(inp[%d] <= %.20f ? %s : %s)" % (tree.feature[node],
tree.threshold[node],
self._singletree(tree, tree.children_left[node]),
self._singletree(tree, tree.children_right[node]))
else:
return str(tree.value[node][0][0])


@ staticmethod
def _get_limits(bdt):
mins=[None] * bdt.n_features_in_
maxs=[None] * bdt.n_features_in_

for treeset in bdt.estimators_:
for tree in treeset:
for feature in range(bdt.n_features_in_):
features=tree.tree_.feature
if feature not in features: continue
min_=np.min(tree.tree_.threshold[features == feature])
if mins[feature] is None or min_ < mins[feature]:
mins[feature]=min_

max_=np.max(tree.tree_.threshold[features == feature])
if maxs[feature] is None or max_ > maxs[feature]:
maxs[feature]=max_

return mins, maxs




def convert(self, bdt, name=None):
n_classes=bdt.n_classes_ if bdt.n_classes_ > 2 else 1
lines=self.header()

if n_classes > 1:
for iClass in range(n_classes):
lines.append("/* ret [ %d ] is the probability for category: %-15s */" %
(iClass, str(bdt.classes_[iClass])))

min_, max_=self._get_limits(bdt)

nX = bdt.n_features_in_

retvar="FLOAT_T ret[%d]" % n_classes
invar="FLOAT_T inp[%d]" % nX
lines += [
"#include <math.h>",
"extern \"C\"",
"FLOAT_T *%s (%s, const %s)" % (name or "bdt", retvar, invar),
"{",
" const FLOAT_T init[] = %s;" % array2c(retrieve_prior(bdt)),
" int i; ",
" for (i=0; i < %d; ++i) ret[i] = init[i];" % n_classes,
]

for iTree, tree in enumerate(bdt.estimators_):
lines += [" /** TREE %03d **/" % iTree]
for iClass in range(n_classes):
from scikinC._tools import array2c, retrieve_prior, sklearn_min_version



class GBDTUnrollingConverter(BaseConverter):
"""
Converts GradientBoostingClassifiers with explicit conversion of
each tree in C language. Resulting C takes longer to compile, but it is
slightly faster in inference, and does not require pointer algebra.
"""

def _singletree(self, tree, node):
"Single-tree traversal"
if tree.feature[node] >= 0:
return "(inp[%d] <= %.20f ? %s : %s)" % (tree.feature[node],
tree.threshold[node],
self._singletree(tree, tree.children_left[node]),
self._singletree(tree, tree.children_right[node]))
else:
return str(tree.value[node][0][0])

@staticmethod
def _get_limits(bdt):
mins = [None] * bdt.n_features_in_
maxs = [None] * bdt.n_features_in_

for treeset in bdt.estimators_:
for tree in treeset:
for feature in range(bdt.n_features_in_):
features = tree.tree_.feature
if feature not in features: continue
min_ = np.min(tree.tree_.threshold[features == feature])
if mins[feature] is None or min_ < mins[feature]:
mins[feature] = min_

max_ = np.max(tree.tree_.threshold[features == feature])
if maxs[feature] is None or max_ > maxs[feature]:
maxs[feature] = max_

return mins, maxs

def convert(self, bdt, name=None):
n_classes = bdt.n_classes_ if bdt.n_classes_ > 2 else 1
lines = self.header()

if n_classes > 1:
for iClass in range(n_classes):
lines.append(
"/* ret [ %d ] is the probability for category: %-15s */" %
(iClass, str(bdt.classes_[iClass]))
)

min_, max_ = self._get_limits(bdt)

nX = bdt.n_features_in_
n_output = max(2, n_classes) if sklearn_min_version("1.0") else n_classes

retvar = "FLOAT_T ret[%d]" % n_output
invar = "FLOAT_T inp[%d]" % nX
lines += [
" ret[%d] += %f * (%s); " % (iClass, bdt.learning_rate,
self._singletree(tree[iClass].tree_, 0))
]


if n_classes > 1:
lines += [
" short argmax = 0; ",
" for (int i = 0; i < %d; ++i) if (ret[i] > ret[argmax]) argmax = i; " % n_classes,
" if (ret[argmax] > 1e10) { ",
" for (int i = 0; i < %d; ++i) ret[i] = (i==argmax ? 1.: 0.); " % n_classes,
" return ret; ",
" }",
" for (short i=0; i < %d; ++i) ret[i] = exp(ret[i]);" % n_classes,
" for (short i=0; i < %d; ++i) ret[i] = (ret[i] > 1e300?1e300:ret[i]);" % n_classes,
" long double sum = 0;",
" for (short i=0; i < %d; ++i) sum += ret[i];" % n_classes,
" for (short i=0; i < %d; ++i) ret[i] /= sum;" % n_classes,
"#include <math.h>",
"extern \"C\"",
"FLOAT_T *%s (%s, const %s)" % (name or "bdt", retvar, invar),
"{",
" const FLOAT_T init[] = %s;" % array2c(retrieve_prior(bdt)),
" int i; ",
" for (i=0; i < %d; ++i) ret[i] = init[i];" % n_output,
]
else:
lines += [
" if (ret[0] > 1e10) ret[0] = 1.;",
" else ret[0] = 1. / (1 + exp(-ret[0]));"
]


lines += [" return ret;", "}"]

return "\n".join(lines)

for iTree, tree in enumerate(bdt.estimators_):
lines += [" /** TREE %03d **/" % iTree]
for iClass in range(n_classes):
class_id = 1 if n_classes == 1 else iClass
lines += [
" ret[%d] += %f * (%s); " % (class_id, bdt.learning_rate,
self._singletree(tree[iClass].tree_, 0))
]


if n_output > 1:
lines += [
" short argmax = 0; ",
" for (int i = 0; i < %d; ++i) if (ret[i] > ret[argmax]) argmax = i; " % n_output,
" if (ret[argmax] > 1e10) { ",
" for (int i = 0; i < %d; ++i) ret[i] = (i==argmax ? 1.: 0.); " % n_output,
" return ret; ",
" }",
" for (short i=0; i < %d; ++i) ret[i] = exp(ret[i]);" % n_output,
" for (short i=0; i < %d; ++i) ret[i] = (ret[i] > 1e300?1e300:ret[i]);" % n_output,
" long double sum = 0;",
" for (short i=0; i < %d; ++i) sum += ret[i];" % n_output,
" for (short i=0; i < %d; ++i) ret[i] /= sum;" % n_output,
]
else:
lines += [
" if (ret[0] > 1e10) ret[0] = 1.;",
" else ret[0] = 1. / (1 + exp(-ret[0]));"
]

lines += [" return ret;", "}"]

return "\n".join(lines)
4 changes: 4 additions & 0 deletions scikinC/ModelLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ def load_from_string ( string ):


if os.path.isfile (string):
if string.endswith(".keras"):
from tensorflow.keras.models import load_model
return ({name or _basename(string): load_model (string, compile=False)},)

try:
with open ( string, 'rb' ) as f:
## it is a pickled object
Expand Down
Loading