Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions eng/Versions.props
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@
<MicrosoftDotNetInteractiveVersion>1.0.0-beta.22314.1</MicrosoftDotNetInteractiveVersion>
<MicrosoftMLOnnxRuntimeVersion>1.10.0</MicrosoftMLOnnxRuntimeVersion>
<MlNetMklDepsVersion>0.0.0.12</MlNetMklDepsVersion>
<MlNetMklDepsVersion>0.0.0.12</MlNetMklDepsVersion>
<MlNetMklDepsPackageVersion>0.0.0.12</MlNetMklDepsPackageVersion>
<OneDalDevelReducedPackageVersion>2021.7.0</OneDalDevelReducedPackageVersion>
<OneDalBinaryMajorVersion>1</OneDalBinaryMajorVersion>
<OneDalBinaryMinorVersion>1</OneDalBinaryMinorVersion>
<MSTestTestAdapterVersion>2.1.0</MSTestTestAdapterVersion>
<MSTestTestFrameworkVersion>2.1.0</MSTestTestFrameworkVersion>
<NewtonsoftJsonVersion>13.0.1</NewtonsoftJsonVersion>
Expand Down
156 changes: 155 additions & 1 deletion src/Microsoft.ML.FastTree/RandomForestClassification.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.InteropServices;
using Microsoft.ML;
using Microsoft.ML.Calibrators;
using Microsoft.ML.CommandLine;
using Microsoft.ML.Data;
using Microsoft.ML.EntryPoints;
using Microsoft.ML.Internal.Utilities;
using Microsoft.ML.Model;
using Microsoft.ML.Runtime;
using Microsoft.ML.Trainers.FastTree;
Expand Down Expand Up @@ -219,7 +222,26 @@ private protected override FastForestBinaryModelParameters TrainModelCore(TrainC
trainData.CheckOptFloatWeight();
FeatureCount = trainData.Schema.Feature.Value.Type.GetValueCount();
ConvertData(trainData);
TrainCore(ch);

if (!trainData.Schema.Weight.HasValue &&
System.Runtime.InteropServices.RuntimeInformation.ProcessArchitecture == System.Runtime.InteropServices.Architecture.X64 &&
Environment.GetEnvironmentVariable("MLNET_BACKEND") == "ONEDAL")
{
if (FastTreeTrainerOptions.FeatureFraction != 1.0)
{
ch.Warning($"oneDAL decision forest doesn't support 'FeatureFraction'[per tree] != 1.0, changing it from {FastTreeTrainerOptions.FeatureFraction} to 1.0");
FastTreeTrainerOptions.FeatureFraction = 1.0;
}
CursOpt cursorOpt = CursOpt.Label | CursOpt.Features;
var cursorFactory = new FloatLabelCursor.Factory(trainData, cursorOpt);
TrainCoreOneDal(ch, cursorFactory, FeatureCount);
if (FeatureMap != null)
TrainedEnsemble.RemapFeatures(FeatureMap);
}
else
{
TrainCore(ch);
}
}
// LogitBoost is naturally calibrated to
// output probabilities when transformed using
Expand All @@ -230,6 +252,138 @@ private protected override FastForestBinaryModelParameters TrainModelCore(TrainC
return new FastForestBinaryModelParameters(Host, TrainedEnsemble, FeatureCount, InnerOptions);
}

internal static class OneDal
{
private const string OneDalLibPath = "OneDalNative";

[DllImport(OneDalLibPath, EntryPoint = "decisionForestClassificationCompute")]
public static extern unsafe int DecisionForestClassificationCompute(
void* featuresPtr, void* labelsPtr, long nRows, int nColumns, int nClasses, int numberOfThreads,
float featureFractionPerSplit, int numberOfTrees, int numberOfLeaves, int minimumExampleCountPerLeaf, int maxBins,
void* lteChildPtr, void* gtChildPtr, void* splitFeaturePtr, void* featureThresholdPtr, void* leafValuesPtr, void* modelPtr);
}

private protected void TrainCoreOneDal(IChannel ch, FloatLabelCursor.Factory cursorFactory, int featureCount)
{
CheckOptions(ch);
Initialize(ch);

List<float> featuresList = new List<float>();
List<float> labelsList = new List<float>();
int nClasses = 2;
int numberOfLeaves = FastTreeTrainerOptions.NumberOfLeaves;
int numberOfTrees = FastTreeTrainerOptions.NumberOfTrees;
long n = 0;

int numberOfThreads = 0;
if (FastTreeTrainerOptions.NumberOfThreads.HasValue)
numberOfThreads = FastTreeTrainerOptions.NumberOfThreads.Value;

using (var cursor = cursorFactory.Create())
{
while (cursor.MoveNext())
{
// label
labelsList.Add(cursor.Label);

// features
var values = cursor.Features.GetValues();
if (cursor.Features.IsDense)
{
ch.Assert(values.Length == featureCount);

for (int j = 0; j < featureCount; ++j)
{
featuresList.Add(values[j]);
}
}
else
{
var indices = cursor.Features.GetIndices();
int i = 0;
for (int j = 0; j < indices.Length; ++j)
{
for (int k = i; k < indices[j]; ++k)
{
featuresList.Add(0);
}
featuresList.Add(values[indices[j]]);
i = indices[j] + 1;
}
}
n++;
}
ch.Check(n > 0, "No training examples in dataset.");
if (cursor.BadFeaturesRowCount > 0)
ch.Warning("Skipped {0} instances with missing features/labelColumn during training", cursor.SkippedRowCount);
}

float[] featuresArray = featuresList.ToArray();
float[] labelsArray = labelsList.ToArray();

int[] lteChildArray = new int[(numberOfLeaves - 1) * numberOfTrees];
int[] gtChildArray = new int[(numberOfLeaves - 1) * numberOfTrees];
int[] splitFeatureArray = new int[(numberOfLeaves - 1) * numberOfTrees];
float[] featureThresholdArray = new float[(numberOfLeaves - 1) * numberOfTrees];
float[] leafValuesArray = new float[numberOfLeaves * numberOfTrees];

int oneDalModelSize = -1;
int projectedOneDalModelSize = 96 * nClasses * numberOfLeaves * numberOfTrees + 4096 * 16;
byte[] oneDalModel = new byte[projectedOneDalModelSize];

unsafe
{
#pragma warning disable MSML_SingleVariableDeclaration // Have only a single variable present per declaration
fixed (void* featuresPtr = &featuresArray[0], labelsPtr = &labelsArray[0],
lteChildPtr = &lteChildArray[0], gtChildPtr = &gtChildArray[0], splitFeaturePtr = &splitFeatureArray[0],
featureThresholdPtr = &featureThresholdArray[0], leafValuesPtr = &leafValuesArray[0], oneDalModelPtr = &oneDalModel[0])
#pragma warning restore MSML_SingleVariableDeclaration // Have only a single variable present per declaration
{
oneDalModelSize = OneDal.DecisionForestClassificationCompute(featuresPtr, labelsPtr, n, featureCount, nClasses,
numberOfThreads, (float)FastTreeTrainerOptions.FeatureFractionPerSplit, numberOfTrees,
numberOfLeaves, FastTreeTrainerOptions.MinimumExampleCountPerLeaf, FastTreeTrainerOptions.MaximumBinCountPerFeature,
lteChildPtr, gtChildPtr, splitFeaturePtr, featureThresholdPtr, leafValuesPtr, oneDalModelPtr
);
}
}
// TrainedEnsemble = new InternalTreeEnsemble(oneDalModel, oneDalModelSize, InternalTreeEnsemble.OneDalModelType.Classification);
TrainedEnsemble = new InternalTreeEnsemble();
for (int i = 0; i < numberOfTrees; ++i)
{
int[] lteChildArrayPerTree = new int[numberOfLeaves - 1];
int[] gtChildArrayPerTree = new int[numberOfLeaves - 1];
int[] splitFeatureArrayPerTree = new int[numberOfLeaves - 1];
float[] featureThresholdArrayPerTree = new float[numberOfLeaves - 1];
double[] leafValuesArrayPerTree = new double[numberOfLeaves];

int[][] categoricalSplitFeaturesPerTree = new int[numberOfLeaves - 1][];
bool[] categoricalSplitPerTree = new bool[numberOfLeaves - 1];
double[] splitGainPerTree = new double[numberOfLeaves - 1];
float[] defaultValueForMissingPerTree = new float[numberOfLeaves - 1];

for (int j = 0; j < numberOfLeaves - 1; ++j)
{
lteChildArrayPerTree[j] = lteChildArray[(numberOfLeaves - 1) * i + j];
gtChildArrayPerTree[j] = gtChildArray[(numberOfLeaves - 1) * i + j];
splitFeatureArrayPerTree[j] = splitFeatureArray[(numberOfLeaves - 1) * i + j];
featureThresholdArrayPerTree[j] = featureThresholdArray[(numberOfLeaves - 1) * i + j];
leafValuesArrayPerTree[j] = leafValuesArray[numberOfLeaves * i + j];

categoricalSplitFeaturesPerTree[j] = null;
categoricalSplitPerTree[j] = false;
splitGainPerTree[j] = 0.0;
defaultValueForMissingPerTree[j] = 0.0f;
}
leafValuesArrayPerTree[numberOfLeaves - 1] = leafValuesArray[numberOfLeaves * i + numberOfLeaves - 1];

InternalQuantileRegressionTree newTree = new InternalQuantileRegressionTree(splitFeatureArrayPerTree, splitGainPerTree, null,
featureThresholdArrayPerTree, defaultValueForMissingPerTree, lteChildArrayPerTree, gtChildArrayPerTree, leafValuesArrayPerTree,
categoricalSplitFeaturesPerTree, categoricalSplitPerTree);
newTree.PopulateThresholds(TrainSet);
TrainedEnsemble.AddTree(newTree);
}
}

private protected override ObjectiveFunctionBase ConstructObjFunc(IChannel ch)
{
return new ObjectiveFunctionImpl(TrainSet, _trainSetLabels, FastTreeTrainerOptions);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ public InternalQuantileRegressionTree(int maxLeaves)
{
}

public InternalQuantileRegressionTree(int[] splitFeatures, double[] splitGain, double[] gainPValue,
float[] rawThresholds, float[] defaultValueForMissing, int[] lteChild, int[] gtChild, double[] leafValues,
int[][] categoricalSplitFeatures, bool[] categoricalSplit)
: base(splitFeatures, splitGain, gainPValue, rawThresholds, defaultValueForMissing,
lteChild, gtChild, leafValues, categoricalSplitFeatures, categoricalSplit)
{
}

internal InternalQuantileRegressionTree(ModelLoadContext ctx, bool usingDefaultValue, bool categoricalSplits)
: base(ctx, usingDefaultValue, categoricalSplits)
{
Expand Down
31 changes: 29 additions & 2 deletions src/Microsoft.ML.FastTree/TreeEnsemble/InternalRegressionTree.cs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ internal class InternalRegressionTree
/// </summary>
public int[][] CategoricalSplitFeatureRanges;
// These are the thresholds based on the binned values of the raw features.
public uint[] Thresholds { get; }
public uint[] Thresholds { get; private set; }
// These are the thresholds based on the raw feature values. Populated after training.
public float[] RawThresholds { get; private set; }
public double[] SplitGains { get { return _splitGain; } }
Expand Down Expand Up @@ -189,7 +189,7 @@ public static InternalRegressionTree Create(int numLeaves, int[] splitFeatures,
}
}

internal InternalRegressionTree(int[] splitFeatures, double[] splitGain, double[] gainPValue,
protected InternalRegressionTree(int[] splitFeatures, double[] splitGain, double[] gainPValue,
float[] rawThresholds, float[] defaultValueForMissing, int[] lteChild, int[] gtChild, double[] leafValues,
int[][] categoricalSplitFeatures, bool[] categoricalSplit)
: this()
Expand All @@ -201,6 +201,7 @@ internal InternalRegressionTree(int[] splitFeatures, double[] splitGain, double[
_splitGain = splitGain;
_gainPValue = gainPValue;
RawThresholds = rawThresholds;
Thresholds = new uint[NumLeaves - 1];
DefaultValueForMissing = defaultValueForMissing;
LteChild = lteChild;
GtChild = gtChild;
Expand Down Expand Up @@ -1099,6 +1100,32 @@ public void PopulateRawThresholds(Dataset dataset)
}
}

public void PopulateThresholds(Dataset dataset)
{
var features = dataset.Flocks;

int numNodes = NumLeaves - 1;
for (int n = 0; n < numNodes; n++)
{
int flock;
int subfeature;
dataset.MapFeatureToFlockAndSubFeature(SplitFeatures[n], out flock, out subfeature);
if (CategoricalSplit[n] == false)
{
uint numBins = (uint)dataset.Flocks[flock].BinUpperBounds(subfeature).Length;
for (uint i = 1; i < numBins; ++i)
{
double rawThreshold = dataset.Flocks[flock].BinUpperBounds(subfeature)[i];
if (RawThresholds[n] < rawThreshold)
{
Thresholds[n] = i;
break;
}
}
}
}
}

public void RemapFeatures(int[] oldToNewFeatures)
{
Contracts.AssertValue(oldToNewFeatures);
Expand Down
57 changes: 55 additions & 2 deletions src/Microsoft.ML.FastTree/TreeEnsemble/InternalTreeEnsemble.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading.Tasks;
using Microsoft.ML.Data;
Expand All @@ -25,6 +26,25 @@ internal class InternalTreeEnsemble
private readonly string _firstInputInitializationContent;
private readonly List<InternalRegressionTree> _trees;

// oneDAL model specific properties
public enum OneDalModelType : byte
{
Regression = 0,
Classification = 1
}
private readonly OneDalModelType _oneDalModelType;
private readonly byte[] _oneDalModel;
private readonly int _oneDalModelSize;

internal static class OneDal
{
private const string OneDalLibPath = "OneDalNative";

[DllImport(OneDalLibPath, EntryPoint = "decisionForestClassificationPrediction")]
public static extern unsafe double DecisionForestClassificationPrediction(
void* featuresPtr, int nColumns, int nClasses, void* modelPtr, int modelSize);
}

public IEnumerable<InternalRegressionTree> Trees => _trees;

public double Bias { get; set; }
Expand All @@ -34,6 +54,15 @@ internal class InternalTreeEnsemble
public InternalTreeEnsemble()
{
_trees = new List<InternalRegressionTree>();
_oneDalModel = null;
}

public InternalTreeEnsemble(byte[] oneDalModel, int oneDalModelSize, OneDalModelType oneDalModelType)
{
_trees = new List<InternalRegressionTree>();
_oneDalModel = oneDalModel;
_oneDalModelSize = oneDalModelSize;
_oneDalModelType = oneDalModelType;
}

public InternalTreeEnsemble(ModelLoadContext ctx, bool usingDefaultValues, bool categoricalSplits)
Expand All @@ -54,6 +83,7 @@ public InternalTreeEnsemble(ModelLoadContext ctx, bool usingDefaultValues, bool
AddTree(InternalRegressionTree.Load(ctx, usingDefaultValues, categoricalSplits));
Bias = ctx.Reader.ReadDouble();
_firstInputInitializationContent = ctx.LoadStringOrNull();
_oneDalModel = null;
}

internal void Save(ModelSaveContext ctx)
Expand Down Expand Up @@ -258,8 +288,31 @@ public double GetOutput(int[] binnedInstance)
public double GetOutput(in VBuffer<float> feat)
{
double output = 0.0;
for (int h = 0; h < NumTrees; h++)
output += _trees[h].GetOutput(in feat);
if (_oneDalModel != null)
{
var featuresToCopy = feat.GetValues();
int nFeatures = feat.Length;
float[] featuresArray = new float[nFeatures];
for (int i = 0; i < nFeatures; ++i)
featuresArray[i] = featuresToCopy[i];

unsafe
{
#pragma warning disable MSML_SingleVariableDeclaration // Have only a single variable present per declaration
fixed (void* featuresPtr = &featuresArray[0], modelPtr = &_oneDalModel[0])
#pragma warning restore MSML_SingleVariableDeclaration // Have only a single variable present per declaration
{
if (_oneDalModelType == OneDalModelType.Classification)
output = OneDal.DecisionForestClassificationPrediction(featuresPtr, nFeatures, 2, modelPtr, _oneDalModelSize);
output = (1.0 - 2.0 * output) * (double)NumTrees;
}
}
}
else
{
for (int h = 0; h < NumTrees; h++)
output += _trees[h].GetOutput(in feat);
}
return output;
}

Expand Down
Loading