Skip to content
This repository was archived by the owner on May 11, 2023. It is now read-only.

Commit f9c4ea4

Browse files
committed
Critical patch. Bug #26 fixed.
1 parent 039c3bf commit f9c4ea4

File tree

7 files changed

+277
-49
lines changed

7 files changed

+277
-49
lines changed

Demo/DemoConsoleApp/Playground.cs

Lines changed: 49 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
1-
using RCNet.Neural.Activation;
2-
using RCNet.Neural.Data.Transformers;
3-
using RCNet.Neural.Data.Generators;
4-
using RCNet.Neural.Data.Coders.AnalogToSpiking;
5-
using System;
1+
using System;
62
using System.Collections.Generic;
7-
using RCNet.CsvTools;
83
using System.Globalization;
94
using System.IO;
105
using System.Text;
116
using System.Linq;
7+
using RCNet.Extensions;
8+
using RCNet.Neural.Activation;
9+
using RCNet.Neural.Data.Transformers;
10+
using RCNet.Neural.Data.Generators;
11+
using RCNet.Neural.Data.Coders.AnalogToSpiking;
12+
using RCNet.CsvTools;
1213
using RCNet.MathTools;
1314
using RCNet.Neural.Data.Filter;
15+
using RCNet.Neural.Data;
1416

1517
namespace Demo.DemoConsoleApp
1618
{
@@ -294,7 +296,47 @@ private void TestBinFeatureFilter()
294296
Console.WriteLine($" {value.ToString(CultureInfo.InvariantCulture),-20} {filter.ApplyReverse(value)}");
295297
}
296298
Console.ReadLine();
299+
}
297300

301+
private void TestVectorBundleFolderization(string dataFile, int numOfClasses)
302+
{
303+
//Load csv data
304+
CsvDataHolder csvData = new CsvDataHolder(dataFile);
305+
//Convert csv data to a VectorBundle
306+
VectorBundle vectorData = VectorBundle.Load(csvData, numOfClasses);
307+
double binBorder = 0.5d;
308+
double[] foldDataRatios = { -1d, 0d, 0.1d, 0.5d, 0.75d, 1d, 2d };
309+
Console.WriteLine($"Folderization test of {dataFile}. NumOfSamples={vectorData.InputVectorCollection.Count.ToString(CultureInfo.InvariantCulture)}, NumOfFoldDataRatios={foldDataRatios.Length.ToString(CultureInfo.InvariantCulture)}");
310+
foreach (double foldDataRatio in foldDataRatios)
311+
{
312+
Console.WriteLine($" Testing fold data ratio = {foldDataRatio.ToString(CultureInfo.InvariantCulture)}");
313+
List<VectorBundle> folds = vectorData.Folderize(foldDataRatio, binBorder);
314+
Console.WriteLine($" Number of resulting folds = {folds.Count.ToString(CultureInfo.InvariantCulture)}");
315+
for (int foldIdx = 0; foldIdx < folds.Count; foldIdx++)
316+
{
317+
int numOfFoldSamples = folds[foldIdx].InputVectorCollection.Count;
318+
Console.WriteLine($" FoldIdx={foldIdx.ToString(CultureInfo.InvariantCulture),-4} FoldSize={numOfFoldSamples.ToString(CultureInfo.InvariantCulture),-4}");
319+
int[] classesBin1Counts = new int[numOfClasses];
320+
classesBin1Counts.Populate(0);
321+
for (int sampleIdx = 0; sampleIdx < numOfFoldSamples; sampleIdx++)
322+
{
323+
for(int classIdx = 0; classIdx < numOfClasses; classIdx++)
324+
{
325+
if(folds[foldIdx].OutputVectorCollection[sampleIdx][classIdx] >= binBorder)
326+
{
327+
++classesBin1Counts[classIdx];
328+
}
329+
}
330+
}
331+
Console.WriteLine($" Number of positive samples per class");
332+
for (int classIdx = 0; classIdx < numOfClasses; classIdx++)
333+
{
334+
Console.WriteLine($" ClassID={classIdx.ToString(CultureInfo.InvariantCulture), -3}, Bin1Samples={classesBin1Counts[classIdx].ToString(CultureInfo.InvariantCulture)}");
335+
}
336+
}
337+
Console.ReadLine();
338+
}
339+
return;
298340
}
299341

300342
/// <summary>
@@ -304,9 +346,7 @@ public void Run()
304346
{
305347
Console.Clear();
306348
//TODO - place your code here
307-
//TestSpikingAF((AFSpikingBase)ActivationFactory.CreateAF(new AFSpikingExpIFSettings(), _rand), 200, 0.25, 50, 100);
308-
//TestEnumFeatureFilter();
309-
TestBinFeatureFilter();
349+
TestVectorBundleFolderization("./Data/ProximalPhalanxOutlineAgeGroup_train.csv", 3);
310350
return;
311351
}
312352

RCNet/Neural/Data/Filter/BinFeatureFilter.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ public override void Update(double sample)
5959
/// <inheritdoc/>
6060
public override double ApplyReverse(double value)
6161
{
62-
return Math.Round(base.ApplyReverse(value)).Bound(FeatureRange.Min, FeatureRange.Max);
62+
return base.ApplyReverse(value);
6363
}
6464

6565
}//BinFeatureFilter

RCNet/Neural/Data/Filter/EnumFeatureFIlter.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ public override void Update(double sample)
6666
/// <inheritdoc/>
6767
public override double ApplyReverse(double value)
6868
{
69-
return Math.Round(base.ApplyReverse(value)).Bound(FeatureRange.Min, FeatureRange.Max);
69+
//return Math.Round(base.ApplyReverse(value)).Bound(FeatureRange.Min, FeatureRange.Max);
70+
return base.ApplyReverse(value);
7071
}
7172

7273
}//EnumFeatureFilter

RCNet/Neural/Data/VectorBundle.cs

Lines changed: 191 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -211,19 +211,21 @@ public static VectorBundle Load(CsvDataHolder csvData, int numOfOutputVariables)
211211
return bundle;
212212
}
213213

214-
//Methods
215214
/// <summary>
216-
/// Splits this bundle to a collection of smaller folds (sub-bundles) suitable for the cross-validation.
217-
/// Remember that in case of binary output the length of the output vectors should be equal to 1, because
218-
/// function keeps balanced ratios of 0 and 1 values in output vectors in each fold and takes into account
219-
/// only the first value in the output vector.
215+
/// Adds data from given bundle into this bundle
220216
/// </summary>
221-
/// <param name="foldDataRatio">Requested ratio of the samples constituting one fold (sub-bundle).</param>
222-
/// <param name="binBorder">If specified, method keeps balanced ratios of 0 and 1 values in each fold (sub-bundle).</param>
223-
/// <returns>Collection of created folds (sub-bundles)</returns>
224-
public List<VectorBundle> CreateFolds(double foldDataRatio, double binBorder = double.NaN)
217+
/// <param name="data">Data to be added</param>
218+
public void Add(VectorBundle data)
219+
{
220+
InputVectorCollection.AddRange(data.InputVectorCollection);
221+
OutputVectorCollection.AddRange(data.OutputVectorCollection);
222+
return;
223+
}
224+
225+
//Methods
226+
public List<VectorBundle> Folderize(double foldDataRatio, double binBorder = double.NaN)
225227
{
226-
if(OutputVectorCollection.Count < 2)
228+
if (OutputVectorCollection.Count < 2)
227229
{
228230
throw new InvalidOperationException($"Insufficient number of samples ({OutputVectorCollection.Count.ToString(CultureInfo.InvariantCulture)}).");
229231
}
@@ -268,11 +270,11 @@ public List<VectorBundle> CreateFolds(double foldDataRatio, double binBorder = d
268270
BinDistribution refBinDistr = new BinDistribution(binBorder);
269271
refBinDistr.Update(OutputVectorCollection, 0);
270272
int min01 = Math.Min(refBinDistr.NumOf[0], refBinDistr.NumOf[1]);
271-
if(min01 < 2)
273+
if (min01 < 2)
272274
{
273275
throw new InvalidOperationException($"Insufficient bin 0 or 1 samples (less than 2).");
274276
}
275-
if(numOfFolds > min01)
277+
if (numOfFolds > min01)
276278
{
277279
numOfFolds = min01;
278280
}
@@ -334,17 +336,186 @@ public List<VectorBundle> CreateFolds(double foldDataRatio, double binBorder = d
334336

335337
return bundleCollection;
336338
}
337-
338-
339339
/// <summary>
340-
/// Adds data from given bundle into this bundle
340+
/// Splits this bundle to a collection of smaller folds (sub-bundles) suitable for the cross-validation.
341+
/// When the binBorder is specified then all output features are considered as binary
342+
/// within the "one takes all" group and function then keeps balanced ratios of 0 and 1
343+
/// for every output feature and fold.
341344
/// </summary>
342-
/// <param name="data">Data to be added</param>
343-
public void Add(VectorBundle data)
345+
/// <param name="foldDataRatio">Requested ratio of the samples constituting one fold (sub-bundle).</param>
346+
/// <param name="binBorder">When specified then method keeps balanced ratios of 0 and 1 values in each fold sub-bundle.</param>
347+
/// <returns>Collection of created folds.</returns>
348+
public List<VectorBundle> Folderize_new(double foldDataRatio, double binBorder = double.NaN)
344349
{
345-
InputVectorCollection.AddRange(data.InputVectorCollection);
346-
OutputVectorCollection.AddRange(data.OutputVectorCollection);
347-
return;
350+
if (OutputVectorCollection.Count < 2)
351+
{
352+
throw new InvalidOperationException($"Insufficient number of samples ({OutputVectorCollection.Count.ToString(CultureInfo.InvariantCulture)}).");
353+
}
354+
List<VectorBundle> foldCollection = new List<VectorBundle>();
355+
//Fold data ratio basic correction
356+
if (foldDataRatio > MaxRatioOfFoldData)
357+
{
358+
foldDataRatio = MaxRatioOfFoldData;
359+
}
360+
//Prelimitary fold size estimation
361+
int foldSize = Math.Max(1, (int)Math.Round(OutputVectorCollection.Count * foldDataRatio, 0));
362+
//Prelimitary number of folds
363+
int numOfFolds = OutputVectorCollection.Count / foldSize;
364+
//Folds creation
365+
if (double.IsNaN(binBorder))
366+
{
367+
//No binary output -> simple split
368+
int samplesPos = 0;
369+
for (int foldIdx = 0; foldIdx < numOfFolds; foldIdx++)
370+
{
371+
VectorBundle fold = new VectorBundle();
372+
for (int i = 0; i < foldSize && samplesPos < OutputVectorCollection.Count; i++)
373+
{
374+
fold.InputVectorCollection.Add(InputVectorCollection[samplesPos]);
375+
fold.OutputVectorCollection.Add(OutputVectorCollection[samplesPos]);
376+
++samplesPos;
377+
}
378+
foldCollection.Add(fold);
379+
}
380+
//Remaining samples
381+
for (int i = 0; i < OutputVectorCollection.Count - samplesPos; i++)
382+
{
383+
int foldIdx = i % foldCollection.Count;
384+
foldCollection[foldIdx].InputVectorCollection.Add(InputVectorCollection[samplesPos + i]);
385+
foldCollection[foldIdx].OutputVectorCollection.Add(OutputVectorCollection[samplesPos + i]);
386+
}
387+
}//Indifferent output
388+
else
389+
{
390+
//Binary outputs -> keep balanced ratios of outputs
391+
int numOfOutputs = OutputVectorCollection[0].Length;
392+
if(numOfOutputs == 1)
393+
{
394+
//Special case there is only one binary output
395+
//Investigation of the output data metrics
396+
BinDistribution refBinDistr = new BinDistribution(binBorder);
397+
refBinDistr.Update(OutputVectorCollection, 0);
398+
int min01 = Math.Min(refBinDistr.NumOf[0], refBinDistr.NumOf[1]);
399+
if (min01 < 2)
400+
{
401+
throw new InvalidOperationException($"Insufficient bin 0 or 1 samples (less than 2).");
402+
}
403+
if (numOfFolds > min01)
404+
{
405+
numOfFolds = min01;
406+
}
407+
//Scan data
408+
int[] bin0SampleIdxs = new int[refBinDistr.NumOf[0]];
409+
int bin0SamplesPos = 0;
410+
int[] bin1SampleIdxs = new int[refBinDistr.NumOf[1]];
411+
int bin1SamplesPos = 0;
412+
for (int i = 0; i < OutputVectorCollection.Count; i++)
413+
{
414+
if (OutputVectorCollection[i][0] >= refBinDistr.BinBorder)
415+
{
416+
bin1SampleIdxs[bin1SamplesPos++] = i;
417+
}
418+
else
419+
{
420+
bin0SampleIdxs[bin0SamplesPos++] = i;
421+
}
422+
}
423+
//Determine distributions of 0 and 1 for one fold
424+
int bundleBin0Count = Math.Max(1, refBinDistr.NumOf[0] / numOfFolds);
425+
int bundleBin1Count = Math.Max(1, refBinDistr.NumOf[1] / numOfFolds);
426+
//Bundles creation
427+
bin0SamplesPos = 0;
428+
bin1SamplesPos = 0;
429+
for (int foldIdx = 0; foldIdx < numOfFolds; foldIdx++)
430+
{
431+
VectorBundle fold = new VectorBundle();
432+
//Bin 0
433+
for (int i = 0; i < bundleBin0Count; i++)
434+
{
435+
fold.InputVectorCollection.Add(InputVectorCollection[bin0SampleIdxs[bin0SamplesPos]]);
436+
fold.OutputVectorCollection.Add(OutputVectorCollection[bin0SampleIdxs[bin0SamplesPos]]);
437+
++bin0SamplesPos;
438+
}
439+
//Bin 1
440+
for (int i = 0; i < bundleBin1Count; i++)
441+
{
442+
fold.InputVectorCollection.Add(InputVectorCollection[bin1SampleIdxs[bin1SamplesPos]]);
443+
fold.OutputVectorCollection.Add(OutputVectorCollection[bin1SampleIdxs[bin1SamplesPos]]);
444+
++bin1SamplesPos;
445+
}
446+
foldCollection.Add(fold);
447+
}
448+
//Remaining samples
449+
for (int i = 0; i < bin0SampleIdxs.Length - bin0SamplesPos; i++)
450+
{
451+
int foldIdx = i % foldCollection.Count;
452+
foldCollection[foldIdx].InputVectorCollection.Add(InputVectorCollection[bin0SampleIdxs[bin0SamplesPos + i]]);
453+
foldCollection[foldIdx].OutputVectorCollection.Add(OutputVectorCollection[bin0SampleIdxs[bin0SamplesPos + i]]);
454+
}
455+
for (int i = 0; i < bin1SampleIdxs.Length - bin1SamplesPos; i++)
456+
{
457+
int foldIdx = i % foldCollection.Count;
458+
foldCollection[foldIdx].InputVectorCollection.Add(InputVectorCollection[bin1SampleIdxs[bin1SamplesPos + i]]);
459+
foldCollection[foldIdx].OutputVectorCollection.Add(OutputVectorCollection[bin1SampleIdxs[bin1SamplesPos + i]]);
460+
}
461+
}//Only 1 binary output
462+
else
463+
{
464+
//There is more than 1 binary output - "one takes all approach"
465+
//Investigation of the output data metrics
466+
//Collect bin 1 sample indexes and check "one takes all" consistency for every output feature
467+
List<int>[] outBin1SampleIdxs = new List<int>[numOfOutputs];
468+
for (int i = 0; i < numOfOutputs; i++)
469+
{
470+
outBin1SampleIdxs[i] = new List<int>();
471+
}
472+
for (int sampleIdx = 0; sampleIdx < OutputVectorCollection.Count; sampleIdx++)
473+
{
474+
int numOf1 = 0;
475+
for (int outFeatureIdx = 0; outFeatureIdx < numOfOutputs; outFeatureIdx++)
476+
{
477+
if(OutputVectorCollection[sampleIdx][outFeatureIdx] >= binBorder)
478+
{
479+
outBin1SampleIdxs[outFeatureIdx].Add(sampleIdx);
480+
++numOf1;
481+
}
482+
}
483+
if(numOf1 != 1)
484+
{
485+
throw new ArgumentException($"Data are inconsistent on data index {sampleIdx.ToString(CultureInfo.InvariantCulture)}. Output vector has {numOf1.ToString(CultureInfo.InvariantCulture)} feature(s) having bin value 1.", "binBorder");
486+
}
487+
}
488+
//Determine max possible number of folds
489+
int maxNumOfFolds = OutputVectorCollection.Count;
490+
for (int outFeatureIdx = 0; outFeatureIdx < numOfOutputs; outFeatureIdx++)
491+
{
492+
int outFeatureMaxFolds = Math.Min(outBin1SampleIdxs[outFeatureIdx].Count, OutputVectorCollection.Count - outBin1SampleIdxs[outFeatureIdx].Count);
493+
maxNumOfFolds = Math.Min(outFeatureMaxFolds, maxNumOfFolds);
494+
}
495+
//Correct the number of folds to be created
496+
if(numOfFolds > maxNumOfFolds)
497+
{
498+
numOfFolds = maxNumOfFolds;
499+
}
500+
//Create the folds
501+
for(int foldIdx = 0; foldIdx < numOfFolds; foldIdx++)
502+
{
503+
foldCollection.Add(new VectorBundle());
504+
}
505+
//Samples distribution
506+
for (int outFeatureIdx = 0; outFeatureIdx < numOfOutputs; outFeatureIdx++)
507+
{
508+
for(int bin1SampleRefIdx = 0; bin1SampleRefIdx < outBin1SampleIdxs[outFeatureIdx].Count; bin1SampleRefIdx++)
509+
{
510+
int foldIdx = bin1SampleRefIdx % foldCollection.Count;
511+
int dataIdx = outBin1SampleIdxs[outFeatureIdx][bin1SampleRefIdx];
512+
foldCollection[foldIdx].AddPair(InputVectorCollection[dataIdx], OutputVectorCollection[dataIdx]);
513+
}
514+
}
515+
}//More binary outputs
516+
}//Binary output
517+
518+
return foldCollection;
348519
}
349520

350521
}//VectorBundle

0 commit comments

Comments
 (0)