From 8140e417ac8f21d4273461aaa571607dca2e6f17 Mon Sep 17 00:00:00 2001 From: "R. G. Esteves" Date: Thu, 6 Oct 2022 09:14:09 -0700 Subject: [PATCH 1/5] Random forests accelerated with OneDAL --- .../RandomForestClassification.cs | 156 ++++- .../InternalQuantileRegressionTree.cs | 8 + .../TreeEnsemble/InternalRegressionTree.cs | 31 +- .../Microsoft.ML.OneDal.csproj | 42 ++ .../Properties/AssemblyInfo.cs | 11 + src/Native/Native.proj | 21 +- src/Native/OneDalNative/CMakeLists.txt | 24 + src/Native/OneDalNative/OneDalAlgorithms.cpp | 623 ++++++++++++++++++ src/Native/build.cmd | 3 + src/Native/build.sh | 8 +- 10 files changed, 921 insertions(+), 6 deletions(-) create mode 100644 src/Microsoft.ML.OneDal/Microsoft.ML.OneDal.csproj create mode 100644 src/Microsoft.ML.OneDal/Properties/AssemblyInfo.cs create mode 100644 src/Native/OneDalNative/CMakeLists.txt create mode 100644 src/Native/OneDalNative/OneDalAlgorithms.cpp diff --git a/src/Microsoft.ML.FastTree/RandomForestClassification.cs b/src/Microsoft.ML.FastTree/RandomForestClassification.cs index ec7846f7dc..d474ee861e 100644 --- a/src/Microsoft.ML.FastTree/RandomForestClassification.cs +++ b/src/Microsoft.ML.FastTree/RandomForestClassification.cs @@ -4,7 +4,10 @@ using System; using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; using Microsoft.ML; +using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Calibrators; using Microsoft.ML.CommandLine; using Microsoft.ML.Data; @@ -219,7 +222,26 @@ private protected override FastForestBinaryModelParameters TrainModelCore(TrainC trainData.CheckOptFloatWeight(); FeatureCount = trainData.Schema.Feature.Value.Type.GetValueCount(); ConvertData(trainData); - TrainCore(ch); + + if (!trainData.Schema.Weight.HasValue && + System.Runtime.InteropServices.RuntimeInformation.ProcessArchitecture == System.Runtime.InteropServices.Architecture.X64 && + Environment.GetEnvironmentVariable("MLNET_BACKEND") == "ONEDAL") + { + if (FastTreeTrainerOptions.FeatureFraction != 1.0) + { + ch.Warning($"oneDAL decision forest doesn't support 'FeatureFraction'[per tree] != 1.0, changing it from {FastTreeTrainerOptions.FeatureFraction} to 1.0"); + FastTreeTrainerOptions.FeatureFraction = 1.0; + } + CursOpt cursorOpt = CursOpt.Label | CursOpt.Features; + var cursorFactory = new FloatLabelCursor.Factory(trainData, cursorOpt); + TrainCoreOneDal(ch, cursorFactory, FeatureCount); + if (FeatureMap != null) + TrainedEnsemble.RemapFeatures(FeatureMap); + } + else + { + TrainCore(ch); + } } // LogitBoost is naturally calibrated to // output probabilities when transformed using @@ -230,6 +252,138 @@ private protected override FastForestBinaryModelParameters TrainModelCore(TrainC return new FastForestBinaryModelParameters(Host, TrainedEnsemble, FeatureCount, InnerOptions); } + internal static class OneDal + { + private const string OneDalLibPath = "OneDalNative"; + + [DllImport(OneDalLibPath, EntryPoint = "decisionForestClassificationCompute")] + public static extern unsafe int DecisionForestClassificationCompute( + void* featuresPtr, void* labelsPtr, long nRows, int nColumns, int nClasses, int numberOfThreads, + float featureFractionPerSplit, int numberOfTrees, int numberOfLeaves, int minimumExampleCountPerLeaf, int maxBins, + void* lteChildPtr, void* gtChildPtr, void* splitFeaturePtr, void* featureThresholdPtr, void* leafValuesPtr, void* modelPtr); + } + + private protected void TrainCoreOneDal(IChannel ch, FloatLabelCursor.Factory cursorFactory, int featureCount) + { + CheckOptions(ch); + Initialize(ch); + + List featuresList = new List(); + List labelsList = new List(); + int nClasses = 2; + int numberOfLeaves = FastTreeTrainerOptions.NumberOfLeaves; + int numberOfTrees = FastTreeTrainerOptions.NumberOfTrees; + long n = 0; + + int numberOfThreads = 0; + if (FastTreeTrainerOptions.NumberOfThreads.HasValue) + numberOfThreads = FastTreeTrainerOptions.NumberOfThreads.Value; + + using (var cursor = cursorFactory.Create()) + { + while (cursor.MoveNext()) + { + // label + labelsList.Add(cursor.Label); + + // features + var values = cursor.Features.GetValues(); + if (cursor.Features.IsDense) + { + ch.Assert(values.Length == featureCount); + + for (int j = 0; j < featureCount; ++j) + { + featuresList.Add(values[j]); + } + } + else + { + var indices = cursor.Features.GetIndices(); + int i = 0; + for (int j = 0; j < indices.Length; ++j) + { + for (int k = i; k < indices[j]; ++k) + { + featuresList.Add(0); + } + featuresList.Add(values[indices[j]]); + i = indices[j] + 1; + } + } + n++; + } + ch.Check(n > 0, "No training examples in dataset."); + if (cursor.BadFeaturesRowCount > 0) + ch.Warning("Skipped {0} instances with missing features/labelColumn during training", cursor.SkippedRowCount); + } + + float[] featuresArray = featuresList.ToArray(); + float[] labelsArray = labelsList.ToArray(); + + int[] lteChildArray = new int[(numberOfLeaves - 1) * numberOfTrees]; + int[] gtChildArray = new int[(numberOfLeaves - 1) * numberOfTrees]; + int[] splitFeatureArray = new int[(numberOfLeaves - 1) * numberOfTrees]; + float[] featureThresholdArray = new float[(numberOfLeaves - 1) * numberOfTrees]; + float[] leafValuesArray = new float[numberOfLeaves * numberOfTrees]; + + int oneDalModelSize = -1; + int projectedOneDalModelSize = 96 * nClasses * numberOfLeaves * numberOfTrees + 4096 * 16; + byte[] oneDalModel = new byte[projectedOneDalModelSize]; + + unsafe + { +#pragma warning disable MSML_SingleVariableDeclaration // Have only a single variable present per declaration + fixed (void* featuresPtr = &featuresArray[0], labelsPtr = &labelsArray[0], + lteChildPtr = <eChildArray[0], gtChildPtr = >ChildArray[0], splitFeaturePtr = &splitFeatureArray[0], + featureThresholdPtr = &featureThresholdArray[0], leafValuesPtr = &leafValuesArray[0], oneDalModelPtr = &oneDalModel[0]) +#pragma warning restore MSML_SingleVariableDeclaration // Have only a single variable present per declaration + { + oneDalModelSize = OneDal.DecisionForestClassificationCompute(featuresPtr, labelsPtr, n, featureCount, nClasses, + numberOfThreads, (float)FastTreeTrainerOptions.FeatureFractionPerSplit, numberOfTrees, + numberOfLeaves, FastTreeTrainerOptions.MinimumExampleCountPerLeaf, FastTreeTrainerOptions.MaximumBinCountPerFeature, + lteChildPtr, gtChildPtr, splitFeaturePtr, featureThresholdPtr, leafValuesPtr, oneDalModelPtr + ); + } + } + // TrainedEnsemble = new InternalTreeEnsemble(oneDalModel, oneDalModelSize, InternalTreeEnsemble.OneDalModelType.Classification); + TrainedEnsemble = new InternalTreeEnsemble(); + for (int i = 0; i < numberOfTrees; ++i) + { + int[] lteChildArrayPerTree = new int[numberOfLeaves - 1]; + int[] gtChildArrayPerTree = new int[numberOfLeaves - 1]; + int[] splitFeatureArrayPerTree = new int[numberOfLeaves - 1]; + float[] featureThresholdArrayPerTree = new float[numberOfLeaves - 1]; + double[] leafValuesArrayPerTree = new double[numberOfLeaves]; + + int[][] categoricalSplitFeaturesPerTree = new int[numberOfLeaves - 1][]; + bool[] categoricalSplitPerTree = new bool[numberOfLeaves - 1]; + double[] splitGainPerTree = new double[numberOfLeaves - 1]; + float[] defaultValueForMissingPerTree = new float[numberOfLeaves - 1]; + + for (int j = 0; j < numberOfLeaves - 1; ++j) + { + lteChildArrayPerTree[j] = lteChildArray[(numberOfLeaves - 1) * i + j]; + gtChildArrayPerTree[j] = gtChildArray[(numberOfLeaves - 1) * i + j]; + splitFeatureArrayPerTree[j] = splitFeatureArray[(numberOfLeaves - 1) * i + j]; + featureThresholdArrayPerTree[j] = featureThresholdArray[(numberOfLeaves - 1) * i + j]; + leafValuesArrayPerTree[j] = leafValuesArray[numberOfLeaves * i + j]; + + categoricalSplitFeaturesPerTree[j] = null; + categoricalSplitPerTree[j] = false; + splitGainPerTree[j] = 0.0; + defaultValueForMissingPerTree[j] = 0.0f; + } + leafValuesArrayPerTree[numberOfLeaves - 1] = leafValuesArray[numberOfLeaves * i + numberOfLeaves - 1]; + + InternalQuantileRegressionTree newTree = new InternalQuantileRegressionTree(splitFeatureArrayPerTree, splitGainPerTree, null, + featureThresholdArrayPerTree, defaultValueForMissingPerTree, lteChildArrayPerTree, gtChildArrayPerTree, leafValuesArrayPerTree, + categoricalSplitFeaturesPerTree, categoricalSplitPerTree); + newTree.PopulateThresholds(TrainSet); + TrainedEnsemble.AddTree(newTree); + } + } + private protected override ObjectiveFunctionBase ConstructObjFunc(IChannel ch) { return new ObjectiveFunctionImpl(TrainSet, _trainSetLabels, FastTreeTrainerOptions); diff --git a/src/Microsoft.ML.FastTree/TreeEnsemble/InternalQuantileRegressionTree.cs b/src/Microsoft.ML.FastTree/TreeEnsemble/InternalQuantileRegressionTree.cs index e5ae4776ed..c193029668 100644 --- a/src/Microsoft.ML.FastTree/TreeEnsemble/InternalQuantileRegressionTree.cs +++ b/src/Microsoft.ML.FastTree/TreeEnsemble/InternalQuantileRegressionTree.cs @@ -29,6 +29,14 @@ public InternalQuantileRegressionTree(int maxLeaves) { } + public InternalQuantileRegressionTree(int[] splitFeatures, double[] splitGain, double[] gainPValue, + float[] rawThresholds, float[] defaultValueForMissing, int[] lteChild, int[] gtChild, double[] leafValues, + int[][] categoricalSplitFeatures, bool[] categoricalSplit) + : base(splitFeatures, splitGain, gainPValue, rawThresholds, defaultValueForMissing, + lteChild, gtChild, leafValues, categoricalSplitFeatures, categoricalSplit) + { + } + internal InternalQuantileRegressionTree(ModelLoadContext ctx, bool usingDefaultValue, bool categoricalSplits) : base(ctx, usingDefaultValue, categoricalSplits) { diff --git a/src/Microsoft.ML.FastTree/TreeEnsemble/InternalRegressionTree.cs b/src/Microsoft.ML.FastTree/TreeEnsemble/InternalRegressionTree.cs index bdfaabdcde..e325861e2f 100644 --- a/src/Microsoft.ML.FastTree/TreeEnsemble/InternalRegressionTree.cs +++ b/src/Microsoft.ML.FastTree/TreeEnsemble/InternalRegressionTree.cs @@ -51,7 +51,7 @@ internal class InternalRegressionTree /// public int[][] CategoricalSplitFeatureRanges; // These are the thresholds based on the binned values of the raw features. - public uint[] Thresholds { get; } + public uint[] Thresholds { get; private set; } // These are the thresholds based on the raw feature values. Populated after training. public float[] RawThresholds { get; private set; } public double[] SplitGains { get { return _splitGain; } } @@ -189,7 +189,7 @@ public static InternalRegressionTree Create(int numLeaves, int[] splitFeatures, } } - internal InternalRegressionTree(int[] splitFeatures, double[] splitGain, double[] gainPValue, + protected InternalRegressionTree(int[] splitFeatures, double[] splitGain, double[] gainPValue, float[] rawThresholds, float[] defaultValueForMissing, int[] lteChild, int[] gtChild, double[] leafValues, int[][] categoricalSplitFeatures, bool[] categoricalSplit) : this() @@ -201,6 +201,7 @@ internal InternalRegressionTree(int[] splitFeatures, double[] splitGain, double[ _splitGain = splitGain; _gainPValue = gainPValue; RawThresholds = rawThresholds; + Thresholds = new uint[NumLeaves - 1]; DefaultValueForMissing = defaultValueForMissing; LteChild = lteChild; GtChild = gtChild; @@ -1099,6 +1100,32 @@ public void PopulateRawThresholds(Dataset dataset) } } + public void PopulateThresholds(Dataset dataset) + { + var features = dataset.Flocks; + + int numNodes = NumLeaves - 1; + for (int n = 0; n < numNodes; n++) + { + int flock; + int subfeature; + dataset.MapFeatureToFlockAndSubFeature(SplitFeatures[n], out flock, out subfeature); + if (CategoricalSplit[n] == false) + { + uint numBins = (uint)dataset.Flocks[flock].BinUpperBounds(subfeature).Length; + for (uint i = 1; i < numBins; ++i) + { + double rawThreshold = dataset.Flocks[flock].BinUpperBounds(subfeature)[i]; + if (RawThresholds[n] < rawThreshold) + { + Thresholds[n] = i; + break; + } + } + } + } + } + public void RemapFeatures(int[] oldToNewFeatures) { Contracts.AssertValue(oldToNewFeatures); diff --git a/src/Microsoft.ML.OneDal/Microsoft.ML.OneDal.csproj b/src/Microsoft.ML.OneDal/Microsoft.ML.OneDal.csproj new file mode 100644 index 0000000000..3b4803766c --- /dev/null +++ b/src/Microsoft.ML.OneDal/Microsoft.ML.OneDal.csproj @@ -0,0 +1,42 @@ + + + + netstandard2.0 + Microsoft.ML.OneDal + true + ML.NET additional learners making use of Intel® oneAPI Data Analytics Library (oneDAL). + $(TargetsForTfmSpecificBuildOutput) + + + + + + + all + + + all + + + all + + + all + + + + win + linux + osx + + + + + + + + \ No newline at end of file diff --git a/src/Microsoft.ML.OneDal/Properties/AssemblyInfo.cs b/src/Microsoft.ML.OneDal/Properties/AssemblyInfo.cs new file mode 100644 index 0000000000..22bfa3fd8d --- /dev/null +++ b/src/Microsoft.ML.OneDal/Properties/AssemblyInfo.cs @@ -0,0 +1,11 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.CompilerServices; +using Microsoft.ML; + +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Tests" + PublicKey.TestValue)] +[assembly: InternalsVisibleTo(assemblyName: "RunTests" + InternalPublicKey.Value)] + +[assembly: WantsToBeBestFriends] diff --git a/src/Native/Native.proj b/src/Native/Native.proj index 92578bb066..498e852896 100644 --- a/src/Native/Native.proj +++ b/src/Native/Native.proj @@ -50,6 +50,11 @@ .dylib.dwarf + + .$(OneDalBinaryMajorVersion)$(NativeLibExtension) + $(NativeLibExtension).$(OneDalBinaryMajorVersion).$(OneDalBinaryMinorVersion) + + @@ -59,7 +64,7 @@ We are shipping 2 native assemblies in different nuget packages, and one has a reference on the other and MacOS needs to have the rpath in the assembly to where it should load the referenced assembly and since .NET Core can run assemblies out of a NuGet cache, we need to add the NuGet cache relative location. --> - --configuration $(Configuration) --arch $(TargetArchitecture) $(StripArgs) --mkllibpath $(NuGetPackageRoot)mlnetmkldeps/$(MlNetMklDepsVersion)/runtimes/$(PackageRid)/native --mkllibrpath ../../../../../microsoft.ml.mkl.redist/$(Version)/runtimes/$(PackageRid)/native + --configuration $(Configuration) --arch $(TargetArchitecture) $(StripArgs) --mkllibpath $(NuGetPackageRoot)mlnetmkldeps/$(MlNetMklDepsPackageVersion)/runtimes/$(PackageRid)/native --mkllibrpath ../../../../../microsoft.ml.mkl.redist/$(Version)/runtimes/$(PackageRid)/native --onedallibpath $(NuGetPackageRoot)inteldal.devel-reduced.$(PackageRid)/$(OneDalDevelReducedPackageVersion)/build/native @@ -72,7 +77,7 @@ DependsOnTargets="GenerateNativeVersionFile"> - $(Configuration) $(TargetArchitecture) --mkllibpath $(NuGetPackageRoot)mlnetmkldeps\$(MlNetMklDepsVersion)\runtimes\$(PackageRid)\native + $(Configuration) $(TargetArchitecture) --mkllibpath $(NuGetPackageRoot)mlnetmkldeps\$(MlNetMklDepsPackageVersion)\runtimes\$(PackageRid)\native --onedallibpath $(NuGetPackageRoot)inteldal.devel-reduced.$(PackageRid).$(OneDalDevelReducedPackageVersion)/build/native @@ -97,6 +102,14 @@ + + + + + + diff --git a/src/Native/OneDalNative/CMakeLists.txt b/src/Native/OneDalNative/CMakeLists.txt new file mode 100644 index 0000000000..85dedbfb44 --- /dev/null +++ b/src/Native/OneDalNative/CMakeLists.txt @@ -0,0 +1,24 @@ +project (OneDalNative) + +if(NOT ${ARCHITECTURE} MATCHES "arm.*") + add_definitions(-DUSESSE) +endif() + +set(USE_DPCPP no) +set(TARGET_LINK dynamic) +set(USE_PARALLEL yes) + +set(DALROOT "${ONEDAL_LIB_PATH}/daal") + +list(APPEND CMAKE_PREFIX_PATH "${ONEDAL_LIB_PATH}/daal") +list(APPEND CMAKE_PREFIX_PATH "${ONEDAL_LIB_PATH}/tbb") + +find_package(oneDAL CONFIG REQUIRED onedal_core onedal_thread) + +include_directories(${oneDAL_INCLUDE_DIRS}) +link_libraries(${oneDAL_IMPORTED_TARGETS}) + +set(SOURCES OneDalAlgorithms.cpp) + +add_library(OneDalNative SHARED ${SOURCES} ${RESOURCES}) +install_library_and_symbols(OneDalNative) diff --git a/src/Native/OneDalNative/OneDalAlgorithms.cpp b/src/Native/OneDalNative/OneDalAlgorithms.cpp new file mode 100644 index 0000000000..2d58aa016f --- /dev/null +++ b/src/Native/OneDalNative/OneDalAlgorithms.cpp @@ -0,0 +1,623 @@ +#include +#include +#include "daal.h" +#include "../Stdafx.h" + +using namespace std; +using namespace daal; +using namespace daal::algorithms; +using namespace daal::services; +using namespace daal::data_management; + + +bool getVerboseVariable() +{ + bool verbose = false; + #ifdef linux + if (const char* env_p = std::getenv("MLNET_BACKEND_VERBOSE")) + #elif _WIN32 + // WL Merge Note: std::getenv cause compilation error, use _dupenv_s in win, need to validate correctness. + char * env_p; + size_t size; + errno_t err = _dupenv_s(&env_p, &size, "MLNET_BACKEND_VERBOSE"); + if(!err && env_p) + #endif + verbose = true; + #ifdef _WIN32 + free(env_p); + #endif + + return verbose; +} +/* + Decision Forest regression tree traveler +*/ + +/* + ### Decision Forest regression wrappers ### + +*/ + +/* + Decision Forest classification tree traveler +*/ +template +class ClassifierNodeVisitor : public daal::algorithms::tree_utils::classification::TreeNodeVisitor +{ +public: + ClassifierNodeVisitor(size_t numberOfLeaves, bool verbose) + { + _verbose = verbose; + _numberOfLeaves = numberOfLeaves; + _lteChild = new int[_numberOfLeaves - 1]; + _gtChild = new int[_numberOfLeaves - 1]; + _splitFeature = new int[_numberOfLeaves - 1]; + _featureThreshold = new FPType[_numberOfLeaves - 1]; + _leafValues = new FPType[_numberOfLeaves]; + + _currentNode = 0; + _currentLeaf = -1; + _previousLevel = 0; + _previousNodes = new size_t[1024]; + } + + virtual bool onLeafNode(const tree_utils::classification::LeafNodeDescriptor & desc) + { + // step down + if (desc.level == _previousLevel + 1) + { + _lteChild[_previousNodes[_previousLevel]] = _currentLeaf; + } + // switch to different branch + else + { + _gtChild[_previousNodes[desc.level - 1]] = _currentLeaf; + } + _leafValues[-_currentLeaf - 1] = 1 - 2 * desc.prob[0]; + _previousLevel = desc.level; + _currentLeaf--; + + if (_verbose) + { + for (size_t i = 0; i < desc.level; ++i) std::cout << " "; + std::cout << "Level " << desc.level << ", leaf node. Label value = " << desc.label << ", Impurity = " << desc.impurity + << ", Number of samples = " << desc.nNodeSampleCount << ", Probabilities = { "; + for (size_t indexClass = 0; indexClass < 2; ++indexClass) + { + std::cout << desc.prob[indexClass] << ' '; + } + std::cout << "}" << std::endl; + + for (size_t i = 0; i < desc.level; ++i) std::cout << " "; + std::cout << "DEBUG: current level " << _previousLevel << " currentLeaf " << _currentLeaf + 1 << " previousNodes"; + for (size_t i = 0; i < desc.level; ++i) + std::cout << " " << _previousNodes[i]; + std::cout << std::endl; + } + return true; + } + + virtual bool onSplitNode(const tree_utils::classification::SplitNodeDescriptor & desc) + { + // step down or root node + if (desc.level == _previousLevel + 1 || desc.level == 0) + { + if (desc.level != 0) + _lteChild[_previousNodes[_previousLevel]] = _currentNode; + } + // switch to different branch + else + { + _gtChild[_previousNodes[desc.level - 1]] = _currentNode; + } + _splitFeature[_currentNode] = desc.featureIndex; + _featureThreshold[_currentNode] = desc.featureValue; + _previousNodes[desc.level] = _currentNode; + _previousLevel = desc.level; + _currentNode++; + + if (_verbose) + { + for (size_t i = 0; i < desc.level; ++i) std::cout << " "; + std::cout << "Level " << desc.level << ", split node. Feature index = " << desc.featureIndex << ", feature value = " << desc.featureValue + << ", Impurity = " << desc.impurity << ", Number of samples = " << desc.nNodeSampleCount << std::endl; + + for (size_t i = 0; i < desc.level; ++i) std::cout << " "; + std::cout << "DEBUG: current level " << _previousLevel << " currentNode " << _currentNode - 1 << " previousNodes"; + for (size_t i = 0; i < desc.level; ++i) + std::cout << " " << _previousNodes[i]; + std::cout << std::endl; + } + return true; + } + + void copyTreeStructureToBuffers(int * lteChild, int * gtChild, int * splitFeature, FPType * featureThreshold, FPType * leafValues) + { + for (size_t i = 0; i < _numberOfLeaves - 1; ++i) + { + lteChild[i] = _lteChild[i]; + gtChild[i] = _gtChild[i]; + splitFeature[i] = _splitFeature[i]; + featureThreshold[i] = _featureThreshold[i]; + leafValues[i] = _leafValues[i]; + } + leafValues[_numberOfLeaves - 1] = _leafValues[_numberOfLeaves - 1]; + + if (_verbose) + { + printf("Number of leaves: %d\n", -_currentLeaf - 1); + printf("Number of nodes: %lu\n", _currentNode); + } + } + + ~ClassifierNodeVisitor() + { + delete[] _previousNodes; + delete[] _lteChild; + delete[] _gtChild; + delete[] _splitFeature; + delete[] _featureThreshold; + delete[] _leafValues; + } + + size_t _numberOfLeaves; + int * _lteChild; + int * _gtChild; + int * _splitFeature; + FPType * _featureThreshold; + FPType * _leafValues; + + size_t * _previousNodes; + size_t _currentNode; + int _currentLeaf; + size_t _previousLevel; + bool _verbose; +}; + +/* + ### Decision Forest classification wrappers ### + + [DllImport(OneDalLibPath, EntryPoint = "decisionForestClassificationCompute")] + public static extern unsafe int DecisionForestClassificationCompute( + void* featuresPtr, void* labelsPtr, long nRows, int nColumns, int nClasses, int numberOfThreads, + float featureFractionPerSplit, int numberOfTrees, int numberOfLeaves, int minimumExampleCountPerLeaf, int maxBins, + void* lteChildPtr, void* gtChildPtr, void* splitFeaturePtr, void* featureThresholdPtr, void* leafValuesPtr, void* modelPtr) +*/ +template +int decisionForestClassificationComputeTemplate( + FPType * featuresPtr, FPType * labelsPtr, long long nRows, int nColumns, int nClasses, + int numberOfThreads, float featureFractionPerSplit, int numberOfTrees, int numberOfLeaves, int minimumExampleCountPerLeaf, int maxBins, + int * lteChildPtr, int * gtChildPtr, int * splitFeaturePtr, FPType * featureThresholdPtr, FPType * leafValuesPtr, byte* modelPtr) +{ + bool verbose = getVerboseVariable(); + if (verbose) + { + printf("%s\n", "Decision Forest Classification parameters:"); + printf("\t%s - %d\n", "numberOfThreads", numberOfThreads); + printf("\t%s - %d\n", "numberOfTrees", numberOfTrees); + printf("\t%s - %.6f\n", "featureFractionPerSplit", featureFractionPerSplit); + printf("\t%s - %d\n", "featureFractionPerSplit(int)", (int)(nColumns * featureFractionPerSplit)); + printf("\t%s - %d\n", "numberOfLeaves", numberOfLeaves); + printf("\t%s - %d\n", "minimumExampleCountPerLeaf", minimumExampleCountPerLeaf); + printf("\t%s - %d\n", "maxBins", maxBins); + } + + if (numberOfThreads != 0) + Environment::getInstance()->setNumberOfThreads(numberOfThreads); + + NumericTablePtr featuresTable(new HomogenNumericTable(featuresPtr, nColumns, nRows)); + NumericTablePtr labelsTable(new HomogenNumericTable(labelsPtr, 1, nRows)); + + decision_forest::classification::training::Batch algorithm(nClasses); + + algorithm.input.set(classifier::training::data, featuresTable); + algorithm.input.set(classifier::training::labels, labelsTable); + + algorithm.parameter().nTrees = numberOfTrees; + algorithm.parameter().observationsPerTreeFraction = 1; + algorithm.parameter().featuresPerNode = (int)(nColumns * featureFractionPerSplit); + algorithm.parameter().maxTreeDepth = 0; // unlimited growth in depth + algorithm.parameter().impurityThreshold = 0; + algorithm.parameter().varImportance = algorithms::decision_forest::training::MDI; + algorithm.parameter().resultsToCompute = algorithms::decision_forest::training::computeOutOfBagError; + algorithm.parameter().bootstrap = true; + algorithm.parameter().minObservationsInLeafNode = minimumExampleCountPerLeaf; + algorithm.parameter().minObservationsInSplitNode = 2; + algorithm.parameter().minWeightFractionInLeafNode = 0; + algorithm.parameter().minImpurityDecreaseInSplitNode = 0; + algorithm.parameter().maxLeafNodes = numberOfLeaves; + algorithm.parameter().maxBins = maxBins; + algorithm.parameter().minBinSize = 5; + + algorithm.compute(); + + decision_forest::classification::training::ResultPtr trainingResult = algorithm.getResult(); + decision_forest::classification::ModelPtr model = trainingResult->get(classifier::training::model); + + InputDataArchive dataArch; + trainingResult->serialize(dataArch); + int modelSize = dataArch.getSizeOfArchive(); + dataArch.copyArchiveToArray(modelPtr, modelSize); + + for (size_t i = 0; i < numberOfTrees; ++i) + { + ClassifierNodeVisitor visitor(numberOfLeaves, verbose); + model->traverseDFS(i, visitor); + + visitor.copyTreeStructureToBuffers( + lteChildPtr + i * (numberOfLeaves - 1), + gtChildPtr + i * (numberOfLeaves - 1), + splitFeaturePtr + i * (numberOfLeaves - 1), + featureThresholdPtr + i * (numberOfLeaves - 1), + leafValuesPtr + i * numberOfLeaves + ); + + if (verbose) + { + printf("lteChild:\n"); + for (size_t j = 0; j < numberOfLeaves - 1; ++j) + printf("%d ", lteChildPtr[i * (numberOfLeaves - 1) + j]); + printf("\n"); + + printf("gtChild:\n"); + for (size_t j = 0; j < numberOfLeaves - 1; ++j) + printf("%d ", gtChildPtr[i * (numberOfLeaves - 1) + j]); + printf("\n"); + + printf("splitFeature:\n"); + for (size_t j = 0; j < numberOfLeaves - 1; ++j) + printf("%d ", splitFeaturePtr[i * (numberOfLeaves - 1) + j]); + printf("\n"); + + printf("featureThreshold:\n"); + for (size_t j = 0; j < numberOfLeaves - 1; ++j) + printf("%f ", featureThresholdPtr[i * (numberOfLeaves - 1) + j]); + printf("\n"); + + printf("leafValues:\n"); + for (size_t j = 0; j < numberOfLeaves; ++j) + printf("%f ", leafValuesPtr[i * numberOfLeaves + j]); + printf("\n"); + } + } + + return modelSize; +} + +EXPORT_API(int) decisionForestClassificationCompute( + void * featuresPtr, void * labelsPtr, long long nRows, int nColumns, int nClasses, + int numberOfThreads, float featureFractionPerSplit, int numberOfTrees, int numberOfLeaves, int minimumExampleCountPerLeaf, int maxBins, + void * lteChildPtr, void * gtChildPtr, void * splitFeaturePtr, void * featureThresholdPtr, void * leafValuesPtr, void* modelPtr) +{ + return decisionForestClassificationComputeTemplate( + (float *)featuresPtr, (float *)labelsPtr, nRows, nColumns, nClasses, + numberOfThreads, featureFractionPerSplit, numberOfTrees, numberOfLeaves, minimumExampleCountPerLeaf, maxBins, + (int *)lteChildPtr, (int *)gtChildPtr, (int *)splitFeaturePtr, (float *)featureThresholdPtr, (float *)leafValuesPtr, (byte *)modelPtr); +} + +/* + [DllImport(OneDalLibPath, EntryPoint = "decisionForestClassificationPrediction")] + public static extern unsafe double DecisionForestClassificationPrediction( + void* featuresPtr, int nColumns, int nClasses, void* modelPtr, int modelSize); +*/ +template +double decisionForestClassificationPredictionTemplate( + FPType * featuresPtr, int nColumns, int nClasses, byte* modelPtr, int modelSize) +{ + OutputDataArchive dataArch(modelPtr, modelSize); + + decision_forest::classification::training::ResultPtr trainingResult(new decision_forest::classification::training::Result()); + trainingResult->deserialize(dataArch); + + double output; + NumericTablePtr featuresTable(new HomogenNumericTable(featuresPtr, nColumns, 1)); + + decision_forest::classification::prediction::Batch algorithm(nClasses); + + algorithm.input.set(classifier::prediction::data, featuresTable); + algorithm.input.set(classifier::prediction::model, trainingResult->get(classifier::training::model)); + + algorithm.parameter().votingMethod = decision_forest::classification::prediction::weighted; + algorithm.parameter().resultsToEvaluate |= static_cast(classifier::computeClassProbabilities); + + algorithm.compute(); + + classifier::prediction::ResultPtr predictionResult = algorithm.getResult(); + + NumericTablePtr predictionTable(predictionResult->get(classifier::prediction::probabilities)); + BlockDescriptor predictionBlock; + predictionTable->getBlockOfRows(0, predictionTable->getNumberOfRows(), readWrite, predictionBlock); + FPType * prediction = predictionBlock.getBlockPtr(); + output = prediction[0]; + predictionTable->releaseBlockOfRows(predictionBlock); + + return output; +} + +EXPORT_API(int) decisionForestClassificationPrediction( + void * featuresPtr, int nColumns, int nClasses, void* modelPtr, int modelSize) +{ + return decisionForestClassificationPredictionTemplate( + (float *)featuresPtr, nColumns, nClasses, (byte *)modelPtr, modelSize); +} + +/* + ### Logistic regression wrapper ### + + public unsafe static extern void LogisticRegressionCompute(void* featuresPtr, void* labelsPtr, void* weightsPtr, bool useSampleWeights, void* betaPtr, + long nRows, int nColumns, int nClasses, float l1Reg, float l2Reg, float accuracyThreshold, int nIterations, int m, int nThreads); +*/ +template +void logisticRegressionLBFGSComputeTemplate(FPType * featuresPtr, int * labelsPtr, FPType * weightsPtr, bool useSampleWeights, FPType * betaPtr, + long long nRows, int nColumns, int nClasses, float l1Reg, float l2Reg, float accuracyThreshold, int nIterations, int m, int nThreads) +{ + bool verbose = getVerboseVariable(); + if (verbose) + { + printf("%s\n", "Logistic Regression parameters:"); + printf("%s - %.12f\n", "l1Reg", l1Reg); + printf("%s - %.12f\n", "l2Reg", l2Reg); + printf("%s - %.12f\n", "accuracyThreshold", accuracyThreshold); + printf("%s - %d\n", "nIterations", nIterations); + printf("%s - %d\n", "m", m); + printf("%s - %d\n", "nClasses", nClasses); + printf("%s - %d\n", "nThreads", nThreads); + + const size_t nThreadsOld = Environment::getInstance()->getNumberOfThreads(); + printf("%s - %zd\n", "nThreadsOld", nThreadsOld); //Note: %lu cause compilation error, modify to %d + } + + Environment::getInstance()->setNumberOfThreads(nThreads); + + NumericTablePtr featuresTable(new HomogenNumericTable(featuresPtr, nColumns, nRows)); + NumericTablePtr labelsTable(new HomogenNumericTable(labelsPtr, 1, nRows)); + NumericTablePtr weightsTable(new HomogenNumericTable(weightsPtr, 1, nRows)); + + SharedPtr> lbfgsAlgorithm(new optimization_solver::lbfgs::Batch()); + lbfgsAlgorithm->parameter.batchSize = featuresTable->getNumberOfRows(); + lbfgsAlgorithm->parameter.correctionPairBatchSize = featuresTable->getNumberOfRows(); + lbfgsAlgorithm->parameter.L = 1; + lbfgsAlgorithm->parameter.m = m; + lbfgsAlgorithm->parameter.accuracyThreshold = accuracyThreshold; + lbfgsAlgorithm->parameter.nIterations = nIterations; + + if (nClasses == 1) + { + SharedPtr> logLoss(new optimization_solver::logistic_loss::Batch(featuresTable->getNumberOfRows())); + logLoss->parameter().numberOfTerms = featuresTable->getNumberOfRows(); + logLoss->parameter().interceptFlag = true; + logLoss->parameter().penaltyL1 = l1Reg; + logLoss->parameter().penaltyL2 = l2Reg; + + lbfgsAlgorithm->parameter.function = logLoss; + } + else + { + SharedPtr> crossEntropyLoss(new optimization_solver::cross_entropy_loss::Batch(nClasses, featuresTable->getNumberOfRows())); + crossEntropyLoss->parameter().numberOfTerms = featuresTable->getNumberOfRows(); + crossEntropyLoss->parameter().interceptFlag = true; + crossEntropyLoss->parameter().penaltyL1 = l1Reg; + crossEntropyLoss->parameter().penaltyL2 = l2Reg; + crossEntropyLoss->parameter().nClasses = nClasses; + + lbfgsAlgorithm->parameter.function = crossEntropyLoss; + } + + logistic_regression::training::Batch trainingAlgorithm(nClasses == 1 ? 2 : nClasses); + trainingAlgorithm.parameter().optimizationSolver = lbfgsAlgorithm; + trainingAlgorithm.parameter().penaltyL1 = l1Reg; + trainingAlgorithm.parameter().penaltyL2 = l2Reg; + trainingAlgorithm.parameter().interceptFlag = true; + + trainingAlgorithm.input.set(classifier::training::data, featuresTable); + trainingAlgorithm.input.set(classifier::training::labels, labelsTable); + if (useSampleWeights) + { + trainingAlgorithm.input.set(classifier::training::weights, weightsTable); + } + + trainingAlgorithm.compute(); + + logistic_regression::training::ResultPtr trainingResult = trainingAlgorithm.getResult(); + logistic_regression::ModelPtr modelPtr = trainingResult->get(classifier::training::model); + + NumericTablePtr betaTable = modelPtr->getBeta(); + if (betaTable->getNumberOfRows() != nClasses) + { + printf("Wrong number of classes in beta table\n"); + } + if (betaTable->getNumberOfColumns() != nColumns + 1) + { + printf("Wrong number of features in beta table\n"); + } + + BlockDescriptor betaBlock; + betaTable->getBlockOfRows(0, betaTable->getNumberOfRows(), readWrite, betaBlock); + FPType * betaForCopy = betaBlock.getBlockPtr(); + for (size_t i = 0; i < nClasses; ++i) + { + betaPtr[i] = betaForCopy[i * (nColumns + 1)]; + } + for (size_t i = 0; i < nClasses; ++i) + { + for (size_t j = 1; j < nColumns + 1; ++j) + { + betaPtr[nClasses + i * nColumns + j - 1] = betaForCopy[i * (nColumns + 1) + j]; + } + } + + if (verbose) + { + optimization_solver::iterative_solver::ResultPtr solverResult = lbfgsAlgorithm->getResult(); + NumericTablePtr nIterationsTable = solverResult->get(optimization_solver::iterative_solver::nIterations); + BlockDescriptor nIterationsBlock; + nIterationsTable->getBlockOfRows(0, 1, readWrite, nIterationsBlock); + int * nIterationsPtr = nIterationsBlock.getBlockPtr(); + + printf("Solver iterations: %d\n", nIterationsPtr[0]); + + logistic_regression::prediction::Batch predictionAlgorithm(nClasses == 1 ? 2 : nClasses); + // predictionAlgorithm.parameter().resultsToEvaluate |= + // static_cast(classifier::computeClassProbabilities); + predictionAlgorithm.input.set(classifier::prediction::data, featuresTable); + predictionAlgorithm.input.set(classifier::prediction::model, modelPtr); + predictionAlgorithm.compute(); + NumericTablePtr predictionsTable = predictionAlgorithm.getResult()->get(classifier::prediction::prediction); + BlockDescriptor predictionsBlock; + predictionsTable->getBlockOfRows(0, nRows, readWrite, predictionsBlock); + int * predictions = predictionsBlock.getBlockPtr(); + FPType accuracy = 0; + for (long i = 0; i < nRows; ++i) + { + if (predictions[i] == labelsPtr[i]) + { + accuracy += 1.0; + } + } + accuracy /= nRows; + predictionsTable->releaseBlockOfRows(predictionsBlock); + nIterationsTable->releaseBlockOfRows(nIterationsBlock); + printf("oneDAL LogReg traning accuracy: %f\n", accuracy); + } + + betaTable->releaseBlockOfRows(betaBlock); + +} + +EXPORT_API(void) logisticRegressionLBFGSCompute(void * featuresPtr, void * labelsPtr, void * weightsPtr, bool useSampleWeights, void * betaPtr, + long long nRows, int nColumns, int nClasses, float l1Reg, float l2Reg, float accuracyThreshold, int nIterations, int m, int nThreads) +{ + return logisticRegressionLBFGSComputeTemplate((float *)featuresPtr, (int *)labelsPtr, (float *)weightsPtr, useSampleWeights, (float *)betaPtr, + nRows, nColumns, nClasses, l1Reg, l2Reg, accuracyThreshold, nIterations, m, nThreads); +} + +/* + ### Ridge regression wrapper ### + + [DllImport(OneDalLibPath, EntryPoint = "ridgeRegressionOnlineCompute")] + public unsafe static extern int RidgeRegressionOnlineCompute(void* featuresPtr, void* labelsPtr, int nRows, int nColumns, float l2Reg, void* partialResultPtr, int partialResultSize); + + [DllImport(OneDalLibPath, EntryPoint = "ridgeRegressionOnlineFinalize")] + public unsafe static extern void RidgeRegressionOnlineFinalize(void* featuresPtr, void* labelsPtr, long nAllRows, int nRows, int nColumns, float l2Reg, void* partialResultPtr, int partialResultSize, + void* betaPtr, void* xtyPtr, void* xtxPtr); +*/ +template +int ridgeRegressionOnlineComputeTemplate(FPType * featuresPtr, FPType * labelsPtr, int nRows, int nColumns, float l2Reg, byte * partialResultPtr, int partialResultSize) +{ + // Create input data tables + NumericTablePtr featuresTable(new HomogenNumericTable(featuresPtr, nColumns, nRows)); + NumericTablePtr labelsTable(new HomogenNumericTable(labelsPtr, 1, nRows)); + FPType l2 = l2Reg; + NumericTablePtr l2RegTable(new HomogenNumericTable(&l2, 1, 1)); + + // Set up and execute training + ridge_regression::training::Online trainingAlgorithm; + trainingAlgorithm.parameter.ridgeParameters = l2RegTable; + + ridge_regression::training::PartialResultPtr pRes(new ridge_regression::training::PartialResult); + if (partialResultSize != 0) + { + OutputDataArchive dataArch(partialResultPtr, partialResultSize); + pRes->deserialize(dataArch); + trainingAlgorithm.setPartialResult(pRes); + } + + trainingAlgorithm.input.set(ridge_regression::training::data, featuresTable); + trainingAlgorithm.input.set(ridge_regression::training::dependentVariables, labelsTable); + trainingAlgorithm.compute(); + + // Serialize partial result + pRes = trainingAlgorithm.getPartialResult(); + InputDataArchive dataArch; + pRes->serialize(dataArch); + partialResultSize = (int)dataArch.getSizeOfArchive(); + dataArch.copyArchiveToArray(partialResultPtr, (size_t)partialResultSize); + + return partialResultSize; +} + +template +void ridgeRegressionOnlineFinalizeTemplate(FPType * featuresPtr, FPType * labelsPtr, long long int nAllRows, int nRows, int nColumns, float l2Reg, byte * partialResultPtr, int partialResultSize, + FPType * betaPtr, FPType * xtyPtr, FPType * xtxPtr) +{ + NumericTablePtr featuresTable(new HomogenNumericTable(featuresPtr, nColumns, nRows)); + NumericTablePtr labelsTable(new HomogenNumericTable(labelsPtr, 1, nRows)); + FPType l2 = l2Reg; + NumericTablePtr l2RegTable(new HomogenNumericTable(&l2, 1, 1)); + + ridge_regression::training::Online trainingAlgorithm; + + ridge_regression::training::PartialResultPtr pRes(new ridge_regression::training::PartialResult); + if (partialResultSize != 0) + { + OutputDataArchive dataArch(partialResultPtr, partialResultSize); + pRes->deserialize(dataArch); + trainingAlgorithm.setPartialResult(pRes); + } + + trainingAlgorithm.parameter.ridgeParameters = l2RegTable; + + trainingAlgorithm.input.set(ridge_regression::training::data, featuresTable); + trainingAlgorithm.input.set(ridge_regression::training::dependentVariables, labelsTable); + trainingAlgorithm.compute(); + trainingAlgorithm.finalizeCompute(); + + ridge_regression::training::ResultPtr trainingResult = trainingAlgorithm.getResult(); + ridge_regression::ModelNormEq * model = static_cast(trainingResult->get(ridge_regression::training::model).get()); + + NumericTablePtr xtxTable = model->getXTXTable(); + const size_t nBeta = xtxTable->getNumberOfRows(); + BlockDescriptor xtxBlock; + xtxTable->getBlockOfRows(0, nBeta, readWrite, xtxBlock); + FPType * xtx = xtxBlock.getBlockPtr(); + + size_t offset = 0; + for (size_t i = 0; i < nBeta; ++i) + { + for (size_t j = 0; j <= i; ++j) + { + xtxPtr[offset] = xtx[i * nBeta + j]; + offset++; + } + } + offset = 0; + for (size_t i = 0; i < nBeta; ++i) + { + xtxPtr[offset] += l2Reg * l2Reg * nAllRows; + offset += i + 2; + } + + NumericTablePtr xtyTable = model->getXTYTable(); + BlockDescriptor xtyBlock; + xtyTable->getBlockOfRows(0, xtyTable->getNumberOfRows(), readWrite, xtyBlock); + FPType * xty = xtyBlock.getBlockPtr(); + for (size_t i = 0; i < nBeta; ++i) + { + xtyPtr[i] = xty[i]; + } + + NumericTablePtr betaTable = trainingResult->get(ridge_regression::training::model)->getBeta(); + BlockDescriptor betaBlock; + betaTable->getBlockOfRows(0, 1, readWrite, betaBlock); + FPType * betaForCopy = betaBlock.getBlockPtr(); + for (size_t i = 0; i < nBeta; ++i) + { + betaPtr[i] = betaForCopy[i]; + } + + xtxTable->releaseBlockOfRows(xtxBlock); + xtyTable->releaseBlockOfRows(xtyBlock); + betaTable->releaseBlockOfRows(betaBlock); +} + +EXPORT_API(int) ridgeRegressionOnlineCompute(void * featuresPtr, void * labelsPtr, int nRows, int nColumns, float l2Reg, void * partialResultPtr, int partialResultSize) +{ + return ridgeRegressionOnlineComputeTemplate((double *)featuresPtr, (double *)labelsPtr, nRows, nColumns, l2Reg, (byte *)partialResultPtr, partialResultSize); +} + +EXPORT_API(void) ridgeRegressionOnlineFinalize(void * featuresPtr, void * labelsPtr, long long int nAllRows, int nRows, int nColumns, float l2Reg, void * partialResultPtr, int partialResultSize, + void * betaPtr, void * xtyPtr, void * xtxPtr) +{ + ridgeRegressionOnlineFinalizeTemplate((double *)featuresPtr, (double *)labelsPtr, nAllRows, nRows, nColumns, l2Reg, (byte *)partialResultPtr, partialResultSize, + (double *)betaPtr, (double *)xtyPtr, (double *)xtxPtr); +} diff --git a/src/Native/build.cmd b/src/Native/build.cmd index 3a2dcac82a..27ea994706 100644 --- a/src/Native/build.cmd +++ b/src/Native/build.cmd @@ -16,6 +16,8 @@ set __BuildArch=x64 set __VCBuildArch=x86_amd64 set CMAKE_BUILD_TYPE=Debug set MKL_LIB_PATH="" +set ONEDAL_LIB_PATH="" + :Arg_Loop if [%1] == [] goto :ToolsVersion @@ -29,6 +31,7 @@ if /i [%1] == [arm] ( set __BuildArch=arm&&set __VCBuildArch=x86_arm&&sh if /i [%1] == [arm64] ( set __BuildArch=arm64&&set __VCBuildArch=x86_arm64&&shift&goto Arg_Loop) if /i [%1] == [--mkllibpath] ( set MKL_LIB_PATH=%2&&shift&goto Arg_Loop) +if /i [%1] == [--onedallibpath] ( set ONEDAL_LIB_PATH=%2&&shift&goto Arg_Loop) shift goto :Arg_Loop diff --git a/src/Native/build.sh b/src/Native/build.sh index b804c5e5e8..bd18f5157b 100755 --- a/src/Native/build.sh +++ b/src/Native/build.sh @@ -10,6 +10,7 @@ usage() echo " --configuration Build Configuration (Debug, Release)" echo " --stripSymbols Enable symbol stripping (to external file)" echo " --mkllibpath Path to mkl library." + echo " --onedallibpath Path to OneDal library." exit 1 } @@ -30,6 +31,7 @@ __baseIntermediateOutputPath="$RootRepo/artifacts/obj" __versionSourceFile="$__baseIntermediateOutputPath/version.c" __mkllibpath="" __mkllibrpath="" +__onedallibpath="" while [ "$1" != "" ]; do lowerI="$(echo $1 | awk '{print tolower($0)}')" @@ -54,6 +56,10 @@ while [ "$1" != "" ]; do shift __mkllibrpath=$1 ;; + --onedallibpath) + shift + __onedallibpath=$1 + ;; --stripsymbols) __strip_argument="-DSTRIP_SYMBOLS=true" ;; @@ -63,7 +69,7 @@ while [ "$1" != "" ]; do shift done -__cmake_defines="-DCMAKE_BUILD_TYPE=${__configuration} ${__strip_argument} -DMKL_LIB_PATH=${__mkllibpath} -DMKL_LIB_RPATH=${__mkllibrpath}" +__cmake_defines="-DCMAKE_BUILD_TYPE=${__configuration} ${__strip_argument} -DMKL_LIB_PATH=${__mkllibpath} -DMKL_LIB_RPATH=${__mkllibrpath} -DONEDAL_LIB_PATH=${__onedallibpath}" __IntermediatesDir="$__baseIntermediateOutputPath/Native/$__build_arch.$__configuration" __BinDir="$__rootBinPath/Native/$__build_arch.$__configuration" From bbad65f3010a07216633211edd8daa5fbb47bfa6 Mon Sep 17 00:00:00 2001 From: "R. G. Esteves" Date: Thu, 6 Oct 2022 09:21:20 -0700 Subject: [PATCH 2/5] Missed from previous commit --- .../TreeEnsemble/InternalTreeEnsemble.cs | 57 ++++++++++++++++++- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.FastTree/TreeEnsemble/InternalTreeEnsemble.cs b/src/Microsoft.ML.FastTree/TreeEnsemble/InternalTreeEnsemble.cs index ce3b9328c4..d88dabb7f8 100644 --- a/src/Microsoft.ML.FastTree/TreeEnsemble/InternalTreeEnsemble.cs +++ b/src/Microsoft.ML.FastTree/TreeEnsemble/InternalTreeEnsemble.cs @@ -8,6 +8,7 @@ using System.Linq; using System.Text; using System.Threading.Tasks; +using System.Runtime.InteropServices; using Microsoft.ML.Data; using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Model.Pfa; @@ -25,6 +26,25 @@ internal class InternalTreeEnsemble private readonly string _firstInputInitializationContent; private readonly List _trees; + // oneDAL model specific properties + public enum OneDalModelType : byte + { + Regression = 0, + Classification = 1 + } + private readonly OneDalModelType _oneDalModelType; + private readonly byte[] _oneDalModel; + private readonly int _oneDalModelSize; + + internal static class OneDal + { + private const string OneDalLibPath = "OneDalNative"; + + [DllImport(OneDalLibPath, EntryPoint = "decisionForestClassificationPrediction")] + public static extern unsafe double DecisionForestClassificationPrediction( + void* featuresPtr, int nColumns, int nClasses, void* modelPtr, int modelSize); + } + public IEnumerable Trees => _trees; public double Bias { get; set; } @@ -34,6 +54,15 @@ internal class InternalTreeEnsemble public InternalTreeEnsemble() { _trees = new List(); + _oneDalModel = null; + } + + public InternalTreeEnsemble(byte[] oneDalModel, int oneDalModelSize, OneDalModelType oneDalModelType) + { + _trees = new List(); + _oneDalModel = oneDalModel; + _oneDalModelSize = oneDalModelSize; + _oneDalModelType = oneDalModelType; } public InternalTreeEnsemble(ModelLoadContext ctx, bool usingDefaultValues, bool categoricalSplits) @@ -54,6 +83,7 @@ public InternalTreeEnsemble(ModelLoadContext ctx, bool usingDefaultValues, bool AddTree(InternalRegressionTree.Load(ctx, usingDefaultValues, categoricalSplits)); Bias = ctx.Reader.ReadDouble(); _firstInputInitializationContent = ctx.LoadStringOrNull(); + _oneDalModel = null; } internal void Save(ModelSaveContext ctx) @@ -258,8 +288,31 @@ public double GetOutput(int[] binnedInstance) public double GetOutput(in VBuffer feat) { double output = 0.0; - for (int h = 0; h < NumTrees; h++) - output += _trees[h].GetOutput(in feat); + if (_oneDalModel != null) + { + var featuresToCopy = feat.GetValues(); + int nFeatures = feat.Length; + float[] featuresArray = new float[nFeatures]; + for (int i = 0; i < nFeatures; ++i) + featuresArray[i] = featuresToCopy[i]; + + unsafe + { + #pragma warning disable MSML_SingleVariableDeclaration // Have only a single variable present per declaration + fixed (void* featuresPtr = &featuresArray[0], modelPtr = &_oneDalModel[0]) + #pragma warning restore MSML_SingleVariableDeclaration // Have only a single variable present per declaration + { + if (_oneDalModelType == OneDalModelType.Classification) + output = OneDal.DecisionForestClassificationPrediction(featuresPtr, nFeatures, 2, modelPtr, _oneDalModelSize); + output = (1.0 - 2.0 * output) * (double)NumTrees; + } + } + } + else + { + for (int h = 0; h < NumTrees; h++) + output += _trees[h].GetOutput(in feat); + } return output; } From 328a679e758e3d85552364acc164e5d8b058ce99 Mon Sep 17 00:00:00 2001 From: "R. G. Esteves" Date: Fri, 7 Oct 2022 02:54:16 -0700 Subject: [PATCH 3/5] Missed these build-support changes from last commit --- eng/Versions.props | 5 +++++ src/Native/CMakeLists.txt | 2 ++ 2 files changed, 7 insertions(+) diff --git a/eng/Versions.props b/eng/Versions.props index 76f10699f7..6397438e16 100644 --- a/eng/Versions.props +++ b/eng/Versions.props @@ -40,6 +40,11 @@ 0.4.1 1.10.0 0.0.0.12 + 0.0.0.12 + 0.0.0.12 + 2021.7.0 + 1 + 1 2.1.0 2.1.0 13.0.1 diff --git a/src/Native/CMakeLists.txt b/src/Native/CMakeLists.txt index dddda445f9..2a51672e93 100644 --- a/src/Native/CMakeLists.txt +++ b/src/Native/CMakeLists.txt @@ -243,6 +243,8 @@ if(NOT ${ARCHITECTURE} MATCHES "arm.*") add_subdirectory(CpuMathNative) add_subdirectory(FastTreeNative) add_subdirectory(MklProxyNative) + # TODO: Almost sure this OneDal doesn't work on ARM (or doesn't work as well), so leaving here for the moment (but disabled) + add_subdirectory(OneDalNative) # TODO: once we fix the 4 intel MKL methods, SymSgdNative will need to go back in. add_subdirectory(SymSgdNative) endif() From 7e9b7aed52bc767c83aaadd975d82576f75666d0 Mon Sep 17 00:00:00 2001 From: "R. G. Esteves" Date: Wed, 12 Oct 2022 10:13:18 -0700 Subject: [PATCH 4/5] Fixed formatting --- src/Microsoft.ML.FastTree/RandomForestClassification.cs | 4 ++-- .../TreeEnsemble/InternalTreeEnsemble.cs | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Microsoft.ML.FastTree/RandomForestClassification.cs b/src/Microsoft.ML.FastTree/RandomForestClassification.cs index d474ee861e..b403564efe 100644 --- a/src/Microsoft.ML.FastTree/RandomForestClassification.cs +++ b/src/Microsoft.ML.FastTree/RandomForestClassification.cs @@ -3,15 +3,15 @@ // See the LICENSE file in the project root for more information. using System; -using System.Linq; using System.Collections.Generic; +using System.Linq; using System.Runtime.InteropServices; using Microsoft.ML; -using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Calibrators; using Microsoft.ML.CommandLine; using Microsoft.ML.Data; using Microsoft.ML.EntryPoints; +using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Model; using Microsoft.ML.Runtime; using Microsoft.ML.Trainers.FastTree; diff --git a/src/Microsoft.ML.FastTree/TreeEnsemble/InternalTreeEnsemble.cs b/src/Microsoft.ML.FastTree/TreeEnsemble/InternalTreeEnsemble.cs index d88dabb7f8..36479a88de 100644 --- a/src/Microsoft.ML.FastTree/TreeEnsemble/InternalTreeEnsemble.cs +++ b/src/Microsoft.ML.FastTree/TreeEnsemble/InternalTreeEnsemble.cs @@ -6,9 +6,9 @@ using System.Collections.Generic; using System.IO; using System.Linq; +using System.Runtime.InteropServices; using System.Text; using System.Threading.Tasks; -using System.Runtime.InteropServices; using Microsoft.ML.Data; using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Model.Pfa; @@ -298,9 +298,9 @@ public double GetOutput(in VBuffer feat) unsafe { - #pragma warning disable MSML_SingleVariableDeclaration // Have only a single variable present per declaration +#pragma warning disable MSML_SingleVariableDeclaration // Have only a single variable present per declaration fixed (void* featuresPtr = &featuresArray[0], modelPtr = &_oneDalModel[0]) - #pragma warning restore MSML_SingleVariableDeclaration // Have only a single variable present per declaration +#pragma warning restore MSML_SingleVariableDeclaration // Have only a single variable present per declaration { if (_oneDalModelType == OneDalModelType.Classification) output = OneDal.DecisionForestClassificationPrediction(featuresPtr, nFeatures, 2, modelPtr, _oneDalModelSize); From d400d676577c23905e70fb4e65296b6871a04ef0 Mon Sep 17 00:00:00 2001 From: "R. G. Esteves" Date: Wed, 12 Oct 2022 22:32:12 -0700 Subject: [PATCH 5/5] LBFGS acceleration via OneDAL --- .../LogisticRegression/LbfgsPredictorBase.cs | 115 +++++++++++++++++- 1 file changed, 114 insertions(+), 1 deletion(-) diff --git a/src/Microsoft.ML.StandardTrainers/Standard/LogisticRegression/LbfgsPredictorBase.cs b/src/Microsoft.ML.StandardTrainers/Standard/LogisticRegression/LbfgsPredictorBase.cs index a5993b9d85..1cdaa9229f 100644 --- a/src/Microsoft.ML.StandardTrainers/Standard/LogisticRegression/LbfgsPredictorBase.cs +++ b/src/Microsoft.ML.StandardTrainers/Standard/LogisticRegression/LbfgsPredictorBase.cs @@ -5,6 +5,7 @@ using System; using System.Collections.Generic; using System.Threading.Tasks; +using System.Runtime.InteropServices; using Microsoft.ML.CommandLine; using Microsoft.ML.Data; using Microsoft.ML.EntryPoints; @@ -18,6 +19,16 @@ namespace Microsoft.ML.Trainers /// /// Base class for L-BFGS-based trainers. /// + + internal static class OneDal + { + private const string OneDalLibPath = "OneDalNative"; + + [DllImport(OneDalLibPath, EntryPoint = "logisticRegressionLBFGSCompute")] + public static extern unsafe void LogisticRegressionCompute(void* featuresPtr, void* labelsPtr, void* weightsPtr, bool useSampleWeights, void* betaPtr, + long nRows, int nColumns, int nClasses, float l1Reg, float l2Reg, float accuracyThreshold, int nIterations, int m, int nThreads); + } + public abstract class LbfgsTrainerBase : TrainerEstimatorBase where TTransformer : ISingleFeaturePredictionTransformer where TModel : class @@ -429,11 +440,113 @@ private protected override TModel TrainModelCore(TrainContext context) using (var ch = Host.Start("Training")) { - TrainCore(ch, data); + if (Environment.GetEnvironmentVariable("MLNET_BACKEND") == "ONEDAL") + { + TrainCoreOneDal(ch, data); + } + else + { + TrainCore(ch, data); + } return CreatePredictor(); } } + private protected virtual void TrainCoreOneDal(IChannel ch, RoleMappedData data) + { + Host.AssertValue(ch); + ch.AssertValue(data); + + int numThreads = !UseThreads ? 1 : (NumThreads ?? Environment.ProcessorCount); + ch.Assert(numThreads > 0); + + NumGoodRows = 0; + WeightSum = 0; + + _features = null; + _labels = null; + _weights = null; + + CursOpt cursorOpt = CursOpt.Label | CursOpt.Features; + bool useSampleWeights = false; + if (data.Schema.Weight.HasValue) + { + useSampleWeights = true; + cursorOpt |= CursOpt.Weight; + } + + var typeFeat = data.Schema.Feature.Value.Type as VectorDataViewType; + int nFeatures = typeFeat.Size; + + var cursorFactory = new FloatLabelCursor.Factory(data, cursorOpt); + + var labelsList = new List(); + var featuresList = new List(); + var weightsList = new List(); + + using (var cursor = cursorFactory.Create()) + { + while (cursor.MoveNext()) + { + if (useSampleWeights) + { + WeightSum += cursor.Weight; + weightsList.Add(cursor.Weight); + } + labelsList.Add((int)cursor.Label); + var values = cursor.Features.GetValues(); + if (cursor.Features.IsDense) + { + ch.Assert(values.Length == nFeatures); + + for (int j = 0; j < nFeatures; ++j) + { + featuresList.Add(values[j]); + } + } + else + { + var indices = cursor.Features.GetIndices(); + int i = 0; + for (int j = 0; j < indices.Length; ++j) + { + for (int k = i; k < indices[j]; ++k) + { + featuresList.Add(0); + } + featuresList.Add(values[indices[j]]); + i = indices[j] + 1; + } + } + } + NumGoodRows = cursor.KeptRowCount; + if (cursor.SkippedRowCount > 0) + ch.Warning("Skipped {0} instances with missing features/label/weight during training", cursor.SkippedRowCount); + } + ch.Check(NumGoodRows > 0, NoTrainingInstancesMessage); + + int[] labelsArray = labelsList.ToArray(); + float[] featuresArray = featuresList.ToArray(); + if (!useSampleWeights) + { + weightsList.Add(1); + } + float[] weightsArray = weightsList.ToArray(); + float[] betaArray = new float[WeightCount + BiasCount]; + + unsafe + { +#pragma warning disable MSML_SingleVariableDeclaration // Have only a single variable present per declaration + fixed (void* featuresPtr = &featuresArray[0], labelsPtr = &labelsArray[0], weightsPtr = &weightsArray[0], betaPtr = &betaArray[0]) +#pragma warning restore MSML_SingleVariableDeclaration // Have only a single variable present per declaration + { + OneDal.LogisticRegressionCompute(featuresPtr, labelsPtr, weightsPtr, useSampleWeights, betaPtr, NumGoodRows, nFeatures, ClassCount, L1Weight, L2Weight, OptTol, MaxIterations, MemorySize, numThreads); + } + } + + CurrentWeights = new VBuffer(betaArray.Length, betaArray); + } + private protected virtual void TrainCore(IChannel ch, RoleMappedData data) { Host.AssertValue(ch);