From a94c6cfc441dca947cd176422f14f1803a88b1a4 Mon Sep 17 00:00:00 2001
From: "R. G. Esteves" <rgesteves@gmail.com>
Date: Wed, 19 Oct 2022 15:45:22 -0700
Subject: [PATCH 1/4] Initial structure and started fleshing out some sections

---
 docs/code/DeepLearningOverview.md | 83 +++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)
 create mode 100644 docs/code/DeepLearningOverview.md

diff --git a/docs/code/DeepLearningOverview.md b/docs/code/DeepLearningOverview.md
new file mode 100644
index 0000000000..ef1a89dce5
--- /dev/null
+++ b/docs/code/DeepLearningOverview.md
@@ -0,0 +1,83 @@
+# What is Deep Learning?
+
+Deep Learning is an umbrella term for an approach to Machine Learning
+that makes use of "deep" Neural Networks, a kind of models originally
+inspired by the function of biological brains.  These days, Deep
+Learning is probably the most visible area of Machine Learning, and it
+has seen amazing successes in areas like Computer Vision, Natural
+Language Processing and, in combination with Reinforcement Learning,
+more complicated settings such as game playing, decision making and
+simulation.
+
+A crucial element of the success of Deep Learning ("DL" in what
+follows) has been the existence of software frameworks and runtimes
+that facilitate the creation of Neural Network models and their
+execution for inference.  Examples of such frameworks include
+Tensorflow, (Py)Torch and onnx.  ML.NET provides access to some of
+these frameworks, while maintaining the familiar pipeline interface.
+In this way, users of ML.NET can take advantage of some
+state-of-the-art models and applications of DL at a lower cost than
+the steep learning curve learning that other DL frameworks require.
+
+# Deep Learning vs Machine Learning?
+
+As mentioned above, DL relies on "Neural Network" models, in contrast
+with "traditional" Machine Learning techniques (which use a wider
+variety of architectures, such as, for example, generalized linear
+models, decision trees or Support Vector Machines).  The most
+immediate, practical implication of this difference is that DL methods
+may be better or worse suited for some kind of data.  The performance
+of DL methods on images, on textual and on other non- or
+less-structured data has been well documented in the literature.
+Traditional Machine Learning methods such as gradient-boosted trees
+(XGBoost, LightGBM and CatBoost) seem to still have an edge when it
+comes to tabular data.  The best approach is always to experiment with
+your particular data source and use case and determine for yourself,
+and ML.NET makes this experimentation relatively straightforward and
+pain-free.
+
+# Neural Network architectures
+
+A crucial differentiating characteristic of DL from other classes (or
+schools) of ML is the use of artificial Neural Networks as models.  At
+a high-level, one can think of a Neural Network as a configuration of
+"processing units" where the output of each unit constitutes the input
+of another.  Each of these units can take one or many inputs, and
+essentially carries out a weighted sum of its inputs, applies an
+offset (or "bias") and then a non-linear transformation function
+(called "activation").  Different arrangements of these relatively
+simple components have been proven surprisingly rich to describe
+decision boundaries in classification, regression functions and other
+structures central to ML tasks.
+
+The past decade has seen an explosion of use cases, applications and
+techniques of DL, each more impressive than the last, pushing the
+boundaries of what functionalities we thought a computer program could
+feature.  This expansion is fueled by an increasing variety of
+operations that can be incorporated into Neural Networks, by a richer
+set of arrangments that these operations can be configured in and by
+improved computational support for these improvements.  In general, we
+can categorize these new Neural Architectures, and their use cases
+they enable, in (a more complete description can be found [here](https://learn.microsoft.com/en-us/azure/machine-learning/concept-deep-learning-vs-machine-learning#artificial-neural-networks) ):
+
+* Feed-forward Neural Network
+* Convolutional Neural Network
+* Recurrent Neural Network
+* Generative Adversarial Network
+* Transformers
+
+# What can I use deep learning for?
+
+# Deep learning in ML.NET  http://ml.net/
+
+# Train custom models
+
+# Image classification
+
+# Text classification (Needs tutorial)
+# Sentence Similarity (Needs tutorial - P1)
+
+# Consume pretrained models
+
+# TensorFlow  https://learn.microsoft.com/en-us/dotnet/machine-learning/tutorials/text-classification-tf
+# ONNX https://github.com/dotnet/csharp-notebooks/blob/main/machine-learning/E2E-Text-Classification-API-with-Yelp-Dataset.ipynb

From 9cda5699da75643a0afe1157bfd1e52bb12be4d3 Mon Sep 17 00:00:00 2001
From: "R. G. Esteves" <rgesteves@gmail.com>
Date: Wed, 19 Oct 2022 16:25:03 -0700
Subject: [PATCH 2/4] Some corrections and paragraph on DL usages

---
 docs/code/DeepLearningOverview.md | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/docs/code/DeepLearningOverview.md b/docs/code/DeepLearningOverview.md
index ef1a89dce5..90245cf793 100644
--- a/docs/code/DeepLearningOverview.md
+++ b/docs/code/DeepLearningOverview.md
@@ -68,7 +68,19 @@ they enable, in (a more complete description can be found [here](https://learn.m
 
 # What can I use deep learning for?
 
-# Deep learning in ML.NET  http://ml.net/
+As stated above, the scope of application of DL techniques is rapidly
+expanding.  DL architectures, however, have shown amazing
+(close-to-human in some cases) performance in tasks having to do with
+"unstructured data": images, audio, free-form text and the like.  In
+this way, DL is constantly featured in image/audio classification and
+generation applications.  When it comes to text processing, more
+generally Natural Language Processing, DL methods have shown amazing
+results in tasks like translation, classification, generation and
+similar.  Some of the more spectacular, recent applications of ML,
+such as "Stable Diffusion" are powered by sophisticated, large Neural
+Network architectures.
+
+# Deep learning in ML.NET
 
 # Train custom models
 

From f2f5fabc1fc7a02738a56ffd0f0becd3bc02d2f2 Mon Sep 17 00:00:00 2001
From: "R. G. Esteves" <rgesteves@gmail.com>
Date: Thu, 20 Oct 2022 01:54:45 -0700
Subject: [PATCH 3/4] Starting fleshing out DL on ML.NET section

---
 docs/code/DeepLearningOverview.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/code/DeepLearningOverview.md b/docs/code/DeepLearningOverview.md
index 90245cf793..ed383b8358 100644
--- a/docs/code/DeepLearningOverview.md
+++ b/docs/code/DeepLearningOverview.md
@@ -77,11 +77,13 @@ generation applications.  When it comes to text processing, more
 generally Natural Language Processing, DL methods have shown amazing
 results in tasks like translation, classification, generation and
 similar.  Some of the more spectacular, recent applications of ML,
-such as "Stable Diffusion" are powered by sophisticated, large Neural
+such as "[Stable Diffusion](https://en.wikipedia.org/wiki/Stable_Diffusion)" are powered by sophisticated, large Neural
 Network architectures.
 
 # Deep learning in ML.NET
 
+A central concern of DL is what Neural Network architecture (specific configuration of operations) will the model have, and to this end, DL frameworks like Tensorflow and Pytorch feature expressive Domain-Specific Languages to describe in detail such architectures.  ML.NET departs from this practice and concentrates on the consumption of pre-trained models (i.e., architectures that have been specified *and* trained in other frameworks).
+
 # Train custom models
 
 # Image classification

From 3a8f08f846b7c5d50965f051a9553b6a7f5631fb Mon Sep 17 00:00:00 2001
From: "R. G. Esteves" <rodolfo.g.esteves@intel.com>
Date: Mon, 25 Sep 2023 08:05:00 -0700
Subject: [PATCH 4/4] Addresses #6533

---
 .../OlsLinearRegression.cs                            |  6 +++++-
 src/Microsoft.ML.OneDal/OneDalUtils.cs                |  6 +++++-
 .../Standard/LogisticRegression/LbfgsPredictorBase.cs |  6 +++++-
 src/Native/OneDalNative/OneDalAlgorithms.cpp          | 11 ++++++++++-
 4 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/src/Microsoft.ML.Mkl.Components/OlsLinearRegression.cs b/src/Microsoft.ML.Mkl.Components/OlsLinearRegression.cs
index 1d072c1290..af9a16574e 100644
--- a/src/Microsoft.ML.Mkl.Components/OlsLinearRegression.cs
+++ b/src/Microsoft.ML.Mkl.Components/OlsLinearRegression.cs
@@ -252,9 +252,13 @@ private void ComputeOneDalRegression(IChannel ch, FloatLabelCursor.Factory curso
                             {
                                 featuresArray[rowOffset * (m - 1) + k] = 0;
                             }
-                            featuresArray[rowOffset * (m - 1) + j] = values[indices[j]];
+                            featuresArray[rowOffset * (m - 1) + indices[j]] = values[j];
                             i = indices[j] + 1;
                         }
+                        for (int j = i; j < m - 1; ++j)
+                        {
+                            featuresArray[rowOffset * (m - 1) + j] = 0;
+                        }
                     }
                     n++;
                 }
diff --git a/src/Microsoft.ML.OneDal/OneDalUtils.cs b/src/Microsoft.ML.OneDal/OneDalUtils.cs
index 061c4a3a88..269cea75e9 100644
--- a/src/Microsoft.ML.OneDal/OneDalUtils.cs
+++ b/src/Microsoft.ML.OneDal/OneDalUtils.cs
@@ -72,9 +72,13 @@ internal static long GetTrainData(IChannel channel, FloatLabelCursor.Factory cur
                             {
                                 featuresList.Add(0);
                             }
-                            featuresList.Add(values[indices[j]]);
+                            featuresList.Add(values[j]);
                             i = indices[j] + 1;
                         }
+                        for (int j = i; j < numberOfFeatures; ++j)
+                        {
+                            featuresList.Add(0);
+                        }
                     }
                     n++;
                 }
diff --git a/src/Microsoft.ML.StandardTrainers/Standard/LogisticRegression/LbfgsPredictorBase.cs b/src/Microsoft.ML.StandardTrainers/Standard/LogisticRegression/LbfgsPredictorBase.cs
index c5008a76ca..713e457f6f 100644
--- a/src/Microsoft.ML.StandardTrainers/Standard/LogisticRegression/LbfgsPredictorBase.cs
+++ b/src/Microsoft.ML.StandardTrainers/Standard/LogisticRegression/LbfgsPredictorBase.cs
@@ -523,9 +523,13 @@ private protected virtual void TrainCoreOneDal(IChannel ch, RoleMappedData data)
                             {
                                 featuresList.Add(0);
                             }
-                            featuresList.Add(values[indices[j]]);
+                            featuresList.Add(values[j]);
                             i = indices[j] + 1;
                         }
+                        for (int j = i; j < nFeatures; ++j)
+                        {
+                            featuresList.Add(0);
+                        }
                     }
                 }
                 NumGoodRows = cursor.KeptRowCount;
diff --git a/src/Native/OneDalNative/OneDalAlgorithms.cpp b/src/Native/OneDalNative/OneDalAlgorithms.cpp
index a02656baea..f30830614f 100644
--- a/src/Native/OneDalNative/OneDalAlgorithms.cpp
+++ b/src/Native/OneDalNative/OneDalAlgorithms.cpp
@@ -570,7 +570,7 @@ void logisticRegressionLBFGSComputeTemplate(FPType * featuresPtr, int * labelsPt
     SharedPtr<optimization_solver::lbfgs::Batch<FPType>> lbfgsAlgorithm(new optimization_solver::lbfgs::Batch<FPType>());
     lbfgsAlgorithm->parameter.batchSize = featuresTable->getNumberOfRows();
     lbfgsAlgorithm->parameter.correctionPairBatchSize = featuresTable->getNumberOfRows();
-    lbfgsAlgorithm->parameter.L = 1;
+    lbfgsAlgorithm->parameter.L = 10;
     lbfgsAlgorithm->parameter.m = m;
     lbfgsAlgorithm->parameter.accuracyThreshold = accuracyThreshold;
     lbfgsAlgorithm->parameter.nIterations = nIterations;
@@ -642,6 +642,15 @@ void logisticRegressionLBFGSComputeTemplate(FPType * featuresPtr, int * labelsPt
 
     if (verbose)
     {
+        printf("Intercepts and coefficients:\n");
+        for (size_t i = 0; i < nClasses; ++i)
+        {
+            for (size_t j = 0; j < nColumns + 1; ++j)
+            {
+                printf("%f ", betaPtr[i * (nColumns + 1) + j]);
+            }
+            printf("\n");
+        }
         optimization_solver::iterative_solver::ResultPtr solverResult = lbfgsAlgorithm->getResult();
         NumericTablePtr nIterationsTable = solverResult->get(optimization_solver::iterative_solver::nIterations);
         BlockDescriptor<int> nIterationsBlock;