Skip to content

Commit e6dff78

Browse files
vzeufackVannel Zeufack
authored andcommitted
Update Preprocessing/OneHotEncoder.eclCo-authored-by: Lily <lilix@g.clemson.edu>
1 parent 0bbf67a commit e6dff78

8 files changed

Lines changed: 3 additions & 56 deletions

File tree

Preprocessing/LabelEncoder.ecl

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ EXPORT LabelEncoder := MODULE
5656
RETURN Result;
5757
ENDMACRO;
5858

59-
<<<<<<< HEAD
6059
/**
6160
* Builds a lookup table that maps each category of a feature to a unique number.
6261
* Each category is assigned its index in the category set.
@@ -88,8 +87,6 @@ EXPORT LabelEncoder := MODULE
8887
RETURN LabelEncoder.MapCategoriesToValues(key);
8988
ENDMACRO;
9089

91-
=======
92-
>>>>>>> e119eb11b4b077b1c2a25099f826d07112ea1d23
9390
/**
9491
* Replaces each categorical value in the data with its index in the key.
9592
* Every unknown category (not in the key) is replaced by -1.

Preprocessing/OneHotEncoder.ecl

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
/**
66
* For categorical features within a dataset,
7-
* this module creates new binary and numeric features. The new binary features only takes a value * of 0 or 1 indicating the true value of the category.
7+
* this module creates new binary and numeric features. The new binary features only takes a value of 0 or 1 indicating the true value of the category.
88
*
99
* @param baseData: DATASET(NumericField), default = DATASET([], NumericField).
1010
* <p> Data from which categories are extracted for encoding/decoding.
@@ -67,17 +67,8 @@ EXPORT OneHotEncoder(DATASET(NumericField) baseData = DATASET([], NumericField),
6767
END;
6868

6969
featureNumbers := DATASET(SET(baseData(id = 1), number), numberLayout);
70-
<<<<<<< HEAD
7170
Result := PROJECT(featureNumbers, extract(LEFT));
7271
RETURN Result;
73-
=======
74-
//display warning if some of the featureIds provided does not exists in baseData
75-
assertFeatIdsValid := IF(EXISTS(featureIdDS-featureNumbers),
76-
STD.System.Log.addWorkunitWarning('Inexisting featureIds found!', 1));
77-
78-
Result := PROJECT(featureNumbers, extract(LEFT));
79-
RETURN WHEN(Result, assertFeatIdsValid);
80-
>>>>>>> e119eb11b4b077b1c2a25099f826d07112ea1d23
8172
END;
8273

8374
/**
@@ -270,4 +261,4 @@ EXPORT OneHotEncoder(DATASET(NumericField) baseData = DATASET([], NumericField),
270261
Result := ROLLUP(groupedData, GROUP, convert(LEFT, ROWS(LEFT)));
271262
RETURN Result;
272263
END;
273-
END;
264+
END;

Preprocessing/README.md

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,6 @@ Machine Learning Preprocessing Bundle for HPCC Systems
55
- OneHotEncoder
66
- MinMaxScaler
77
- StandardScaler
8-
<<<<<<< HEAD
98
- Normaliz
109
- Split
1110
- StratifiedSplit
12-
=======
13-
>>>>>>> e119eb11b4b077b1c2a25099f826d07112ea1d23

Preprocessing/StandardScaler.ecl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ EXPORT StandardScaler(DATASET(NumericField) baseData = DATASET([], NumericField)
2626
SHARED numberLayout := Preprocessing.Types.numberLayout;
2727

2828
/**
29-
* Computes averages and stdevs for each feature in baseData.
29+
* Computes averages and standard deviation (stdevs) for each feature in baseData.
3030
*
3131
* @return avgandStdevByFeature: DATASET(KeyLayout).
3232
*/

Preprocessing/Test/Functional/RunFunctionalTests.ecl

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,6 @@ $.TestLabelEncoder.RunLabelEncoderTests;
99
$.TestOneHotEncoder.RunOneHotEncoderTests;
1010
$.TestStandardScaler.RunStandardScalerTests;
1111
$.TestMinMaxScaler.RunMinMaxScalerTests;
12-
<<<<<<< HEAD
1312
$.TestNormalize.RunNormalizTests;
1413
$.TestSplit.RunSplitTests;
15-
=======
16-
>>>>>>> e119eb11b4b077b1c2a25099f826d07112ea1d23
1714
$.TestUtils.RunUtilsTests;

Preprocessing/Test/Functional/TestUtils/TestDatasetComparator/RunDatasetComparatorTests.ecl

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,4 @@ $.TestAreEqualRows.TestEqualRows();
1818
$.TestAreEqualRows.TestDifferentRows();
1919

2020
$.TestCompare.TestEqualData();
21-
<<<<<<< HEAD
22-
$.TestCompare.TestRowDifference();
23-
=======
24-
//$.TestCompare.TestEqualNumericFields();
25-
$.TestCompare.TestRowDifference();
26-
$.TestCompare.TestDifferentRecord();
27-
>>>>>>> e119eb11b4b077b1c2a25099f826d07112ea1d23
2821
$.TestCompare.TestRowDifference();

Preprocessing/Types.ecl

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,10 @@ IMPORT $.^.Types as MLCTypes;
66

77
t_FieldNumber := MLCTypes.t_FieldNumber;
88
t_FieldReal := MLCTypes.t_FieldReal;
9-
<<<<<<< HEAD
109
t_RecordID := MLCTypes.t_RecordID;
1110

1211
/**
1312
* Record structures for Preprocessing modules.
14-
=======
15-
16-
/**
17-
* Record structures for Preprocessing modules
18-
>>>>>>> e119eb11b4b077b1c2a25099f826d07112ea1d23
1913
*/
2014
EXPORT Types := MODULE
2115
//record structure for storing REAL values
@@ -28,18 +22,13 @@ EXPORT Types := MODULE
2822
t_FieldNumber number;
2923
END;
3024

31-
<<<<<<< HEAD
3225
//record structure for storing ids
3326
EXPORT idLayout := RECORD
3427
t_RecordID id;
3528
END;
3629

3730
/**
3831
* record structures for OneHotEncoder.
39-
=======
40-
/**
41-
* record structures for OneHotEncoder
42-
>>>>>>> e119eb11b4b077b1c2a25099f826d07112ea1d23
4332
*/
4433
EXPORT OneHotEncoder := MODULE
4534
//record structure for key.
@@ -57,11 +46,7 @@ EXPORT Types := MODULE
5746
END;
5847

5948
/**
60-
<<<<<<< HEAD
6149
* record structures for StandardScaler.
62-
=======
63-
* record structures for StandardScaler
64-
>>>>>>> e119eb11b4b077b1c2a25099f826d07112ea1d23
6550
*/
6651
EXPORT StandardScaler := MODULE
6752
//record structure for storing features' average and standard deviation
@@ -72,13 +57,9 @@ EXPORT Types := MODULE
7257
END;
7358
END;
7459

75-
<<<<<<< HEAD
7660
/**
7761
* record structures for MinMaxScaler.
7862
*/
79-
=======
80-
//record structures for MinMaxScaler
81-
>>>>>>> e119eb11b4b077b1c2a25099f826d07112ea1d23
8263
EXPORT MinMaxScaler := MODULE
8364
//record structure for storing a feature's min and max value
8465
EXPORT FeatureMinMax := RECORD
@@ -95,7 +76,6 @@ EXPORT Types := MODULE
9576
DATASET(FeatureMinMax) minsMaxs;
9677
END;
9778
END;
98-
<<<<<<< HEAD
9979

10080
/**
10181
* record structures for normalize function.
@@ -107,6 +87,4 @@ EXPORT Types := MODULE
10787
t_FieldReal value;
10888
END;
10989
END;
110-
=======
111-
>>>>>>> e119eb11b4b077b1c2a25099f826d07112ea1d23
11290
END;

Preprocessing/Utils/DatasetComparator/Compare.ecl

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,18 +48,12 @@ EXPORT Compare(dta1, dta2) := FUNCTIONMACRO
4848
PROJECT(ROWS(LEFT), XF(LEFT)));
4949

5050
#UNIQUENAME(comparisonResult)
51-
<<<<<<< HEAD
5251
/*%comparisonResult% := IF(%comp%.AreOfSameType(dta1, dta2),
5352
IF(COUNT(dta1) = COUNT(dta2),
5453
IF(%loopResult%[1].isSameRow, 0, %loopResult%[1].cnt - 1), -2), -1);*/
5554

5655
%comparisonResult% := IF(COUNT(dta1) = COUNT(dta2),
5756
IF(%loopResult%[1].isSameRow, 0, %loopResult%[1].cnt - 1),
5857
-2);
59-
=======
60-
%comparisonResult% := IF(%comp%.AreOfSameType(dta1, dta2),
61-
IF(COUNT(dta1) = COUNT(dta2),
62-
IF(%loopResult%[1].isSameRow, 0, %loopResult%[1].cnt - 1), -2), -1);
63-
>>>>>>> e119eb11b4b077b1c2a25099f826d07112ea1d23
6458
RETURN %comparisonResult%;
6559
ENDMACRO;

0 commit comments

Comments
 (0)