Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 30 additions & 13 deletions generate_cpp_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@
import argparse
import re

def get_single_booster_cpp_code(booster_tree, branch_id, class_index, indentation_level=0):
def get_single_booster_cpp_code(booster_tree, branch_id, class_index, indentation_level=0, model_type='classification'):
level = booster_tree[branch_id].split()

booster_code = ""

if 'leaf' in level[0]:
booster_code += "{0}sum[{1}] += {2};\n".format(" " * indentation_level, class_index, float(level[0].split('=')[1]))
if model_type == 'classification':
booster_code += "{0}sum[{1}] += {2};\n".format(" " * indentation_level, class_index, float(level[0].split('=')[1]))
elif model_type == 'regression':
booster_code += "{0}sum += {1};\n".format(" " * indentation_level, float(level[0].split('=')[1]))
return booster_code

branch_ids = level[1].split(',')
Expand All @@ -21,37 +24,46 @@ def get_single_booster_cpp_code(booster_tree, branch_id, class_index, indentatio
comparison = re.search('[^0-9a-zA-Z:[]+[0-9]*[0-9.]*', level[0]).group(0)

booster_code += "{0}if (sample[{1}] {2}) {{\n".format(" " * indentation_level, feature_index, comparison)
booster_code += get_single_booster_cpp_code(booster_tree, yes_branch_id, class_index, indentation_level + 1)
booster_code += get_single_booster_cpp_code(booster_tree, yes_branch_id, class_index, indentation_level + 1, model_type)
booster_code += "{0}}} else {{\n".format(" " * indentation_level)
booster_code += get_single_booster_cpp_code(booster_tree, no_branch_id, class_index, indentation_level + 1)
booster_code += get_single_booster_cpp_code(booster_tree, no_branch_id, class_index, indentation_level + 1, model_type)
booster_code += "{0}}}\n".format(" " * indentation_level)

return booster_code

def generate_single_booster_cpp_code(booster, class_index):
def generate_single_booster_cpp_code(booster, class_index, model_type):
booster_tree = dict()
for line in booster:
branch_id = int(line.split(':')[0].strip())
booster_tree[branch_id] = line

return get_single_booster_cpp_code(booster_tree, 0, class_index, 1)
return get_single_booster_cpp_code(booster_tree, 0, class_index, 1, model_type)

if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--xgb_dump', type=str, default='dump.raw.txt', help='Raw boosters dump. Created without passing feature map file to XGBoost dump() function.')
parser.add_argument('--num_classes', type=int, required=True, help='number of classes this model is classyfing')
parser.add_argument('--model_type', type=str, required=True, help='classification/regression')

args = parser.parse_args()

result = ""

result += "#include \"xgboost_classifier.h\"\n"
if args.model_type == 'classification':
result += "#include \"xgboost_classifier.h\"\n"
else:
result += "#include \"xgboost_regressor.h\"\n"
result += "#include <iostream>\n"
result += "#include <fstream>\n"
result += "#include <vector>\n"
result += "using namespace std;\n\n"
result += "std::vector<float> xgb_classify(std::vector<float> &sample) {\n\n"
result += " std::vector<float> sum ({0}, 0.0);\n\n".format(args.num_classes)
if args.model_type == 'classification':
result += "std::vector<float> xgb_classify(std::vector<float> &sample) {\n\n"
result += " std::vector<float> sum ({0}, 0.0);\n\n".format(args.num_classes)
elif args.model_type == 'regression':
result += "float xgb_regress(std::vector<float> &sample) {\n\n"
result += " float sum = 0;"
else:
raise Exception("Please use model_type as classification/regression")

booster_counter = 0
boosters = []
Expand All @@ -64,12 +76,17 @@ def generate_single_booster_cpp_code(booster, class_index):
boosters[booster_counter - 1].append(line.strip())

for index, booster in enumerate(boosters):
class_index = index % args.num_classes
result += generate_single_booster_cpp_code(booster, class_index)
if args.model_type == 'classification':
class_index = index % args.num_classes
else:
class_index = 1
# result += generate_single_booster_cpp_code(booster, class_index, model_type='classification')
result += generate_single_booster_cpp_code(booster, class_index, model_type=args.model_type)
result += "\n\n"

result += " return sum;\n"
result += "}\n\n"

with open('xgboost_classifier.cpp', 'w') as f:
cpp_file_name = 'xgboost_classifier.cpp' if args.model_type == 'classification' else 'xgboost_regressor.cpp'
with open(cpp_file_name, 'w') as f:
f.write(result)
31 changes: 31 additions & 0 deletions sample/codeblocks_project/main.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
// g++ -c xgboost_classifier.cpp -o xgb_c.o
// g++ -c xgboost_regressor.cpp -o xgb_r.o
// g++ -std=c++11 main.cpp -I./ -o test xgb_c.o xgb_r.o
// ./test

#include <iostream>
#include "xgboost_classifier.h"
#include "xgboost_regressor.h"
#include <chrono>
#include <math.h>

Expand Down Expand Up @@ -60,5 +66,30 @@ int main()
std::cout << "Classified " << NUM_SAMPLES << " samples in " << time_diff << " miliseconds." << std::endl;
std::cout << time_diff / NUM_SAMPLES << "ms per sample." << std::endl;

// Regression test
cout << "Start regression." << endl;

test_sample = {
-0.0016666666666666904, -0.0668518518518519, 0.018124999999999978,
0.10351851851851848, -0.013125000000000024, 0.09703703703703699,
0.021249999999999977, -0.06129629629629634, -0.024583333333333356,
-0.07240740740740745, 1.250996173937358, 1.5632783693080987,
0.3273181103443364, 0.23735363332356418, 1.6726289334343811,
1.2316100868318476, 0.20377132592362593, 1.027838760908221,
1.9099825667579455, 0.2446893370421753, 1.3379751193714342,
1.5589281971761841, 0.24330398118549879, 1.315624215990686,
1.5826644564136092, 1.4901161193847656e-08, 1.4901161193847656e-08,
3.141592638688632, 1.3185890322659237, 0.3346538140629468,
1.4883498072609227, 1.8065823504935967, 0.2877854550824648,
1.0472248480137314, 1.6726289334343811, 1.027838760908221,
0.44112495924719125, 1.3379751193714342, 1.315624215990686,
0.48799331822767306, 0.03125, 0.006481481481481481,
0.04583333333333333, 0.011111111111111112, 0.9662725445360024,
1.4702680482603403, 1, 1, 1, 1
};

float reg_result = xgb_regress(test_sample);
cout << "Regression result:" << reg_result << endl;

return 1;
}
Binary file added sample/codeblocks_project/test
Binary file not shown.
Loading