From 48b517f4630f636a8523f4b2ba4ef2ad6267ac61 Mon Sep 17 00:00:00 2001 From: darth-vader-lg Date: Fri, 18 Jun 2021 01:01:41 +0200 Subject: [PATCH 1/3] Speed up of the inference of saved_model(s). Signed-off-by: darth-vader-lg --- src/Microsoft.ML.TensorFlow/TensorflowTransform.cs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs index 2cdd868522..ee5cf8c2be 100644 --- a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs +++ b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs @@ -537,6 +537,7 @@ private sealed class Mapper : MapperBase private readonly bool[] _isInputVector; private readonly TensorShape[] _fullySpecifiedShapes; private readonly ConcurrentBag _runners; + private readonly OutputCache _outputCache; public Mapper(TensorFlowTransformer parent, DataViewSchema inputSchema) : base(Contracts.CheckRef(parent, nameof(parent)).Host.Register(nameof(Mapper)), inputSchema, parent) @@ -546,6 +547,7 @@ public Mapper(TensorFlowTransformer parent, DataViewSchema inputSchema) : _inputColIndices = new int[_parent.Inputs.Length]; _isInputVector = new bool[_parent.Inputs.Length]; _fullySpecifiedShapes = new TensorShape[_parent.Inputs.Length]; + _outputCache = new OutputCache(); for (int i = 0; i < _parent.Inputs.Length; i++) { if (!inputSchema.TryGetColumnIndex(_parent.Inputs[i], out _inputColIndices[i])) @@ -655,13 +657,12 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func activeOutput(i)).ToArray(); var type = Tf2MlNetType(_parent.TFOutputTypes[iinfo]).RawType; Host.Assert(type == _parent.OutputTypes[iinfo].GetItemType().RawType); var srcTensorGetters = GetTensorValueGetters(input, _inputColIndices, _isInputVector, _parent.TFInputTypes, _fullySpecifiedShapes); - return Utils.MarshalInvoke(MakeGetter, type, input, iinfo, srcTensorGetters, activeOutputColNames, outputCache); + return Utils.MarshalInvoke(MakeGetter, type, input, iinfo, srcTensorGetters, activeOutputColNames, _outputCache); } private Delegate MakeGetter(DataViewRow input, int iinfo, ITensorValueGetter[] srcTensorGetters, string[] activeOutputColNames, OutputCache outputCache) where T : unmanaged From 7af106e1093dc35171c5da98e05c8eb3b510253e Mon Sep 17 00:00:00 2001 From: darth-vader-lg Date: Fri, 18 Jun 2021 17:28:17 +0200 Subject: [PATCH 2/3] Fixed TensorFlowTransform fitting problem. - Fixed the exception while fitting data with more than one input tensor. Followed the OnnxTransformer schema for the data view getters creation. Signed-off-by: darth-vader-lg --- .../TensorflowTransform.cs | 46 +++++++++++++------ 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs index ee5cf8c2be..e52bc202d4 100644 --- a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs +++ b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs @@ -537,7 +537,6 @@ private sealed class Mapper : MapperBase private readonly bool[] _isInputVector; private readonly TensorShape[] _fullySpecifiedShapes; private readonly ConcurrentBag _runners; - private readonly OutputCache _outputCache; public Mapper(TensorFlowTransformer parent, DataViewSchema inputSchema) : base(Contracts.CheckRef(parent, nameof(parent)).Host.Register(nameof(Mapper)), inputSchema, parent) @@ -547,7 +546,6 @@ public Mapper(TensorFlowTransformer parent, DataViewSchema inputSchema) : _inputColIndices = new int[_parent.Inputs.Length]; _isInputVector = new bool[_parent.Inputs.Length]; _fullySpecifiedShapes = new TensorShape[_parent.Inputs.Length]; - _outputCache = new OutputCache(); for (int i = 0; i < _parent.Inputs.Length; i++) { if (!inputSchema.TryGetColumnIndex(_parent.Inputs[i], out _inputColIndices[i])) @@ -639,6 +637,38 @@ public Mapper(TensorFlowTransformer parent, DataViewSchema inputSchema) : _runners = new ConcurrentBag(); } + private Delegate CreateGetter(DataViewRow input, int iinfo, Func activeOutput, OutputCache outputCache) + { + Host.AssertValue(input); + + var activeOutputColNames = _parent.Outputs.Where((x, i) => activeOutput(i)).ToArray(); + + var type = Tf2MlNetType(_parent.TFOutputTypes[iinfo]).RawType; + Host.Assert(type == _parent.OutputTypes[iinfo].GetItemType().RawType); + var srcTensorGetters = GetTensorValueGetters(input, _inputColIndices, _isInputVector, _parent.TFInputTypes, _fullySpecifiedShapes); + return Utils.MarshalInvoke(MakeGetter, type, input, iinfo, srcTensorGetters, activeOutputColNames, outputCache); + } + + public override Delegate[] CreateGetters(DataViewRow input, Func activeOutput, out Action disposer) + { + Contracts.Assert(input.Schema == InputSchema); + + OutputCache outputCacher = new OutputCache(); + + int n = OutputColumns.Value.Length; + var result = new Delegate[n]; + for (int i = 0; i < n; i++) { + if (!activeOutput(i)) + continue; + result[i] = CreateGetter(input, i, activeOutput, outputCacher); + } + disposer = () => + { + (outputCacher as IDisposable)?.Dispose(); + }; + return result; + } + private protected override void SaveModel(ModelSaveContext ctx) => _parent.SaveModel(ctx); private class OutputCache @@ -653,17 +683,7 @@ public OutputCache() } protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func activeOutput, out Action disposer) - { - disposer = null; - Host.AssertValue(input); - - var activeOutputColNames = _parent.Outputs.Where((x, i) => activeOutput(i)).ToArray(); - - var type = Tf2MlNetType(_parent.TFOutputTypes[iinfo]).RawType; - Host.Assert(type == _parent.OutputTypes[iinfo].GetItemType().RawType); - var srcTensorGetters = GetTensorValueGetters(input, _inputColIndices, _isInputVector, _parent.TFInputTypes, _fullySpecifiedShapes); - return Utils.MarshalInvoke(MakeGetter, type, input, iinfo, srcTensorGetters, activeOutputColNames, _outputCache); - } + => throw new NotImplementedException("This should never be called!"); private Delegate MakeGetter(DataViewRow input, int iinfo, ITensorValueGetter[] srcTensorGetters, string[] activeOutputColNames, OutputCache outputCache) where T : unmanaged { From e2e5ae60d84356069b934377a546dcc468c25eec Mon Sep 17 00:00:00 2001 From: darth-vader-lg Date: Tue, 22 Jun 2021 11:28:07 +0200 Subject: [PATCH 3/3] Dispose of the cached tensors in the TensorFlowTransformer. - The cached tensors are disposed at the end of inference operations. Signed-off-by: darth-vader-lg --- .../TensorflowTransform.cs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs index e52bc202d4..7c5aef7328 100644 --- a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs +++ b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs @@ -664,14 +664,14 @@ public override Delegate[] CreateGetters(DataViewRow input, Func acti } disposer = () => { - (outputCacher as IDisposable)?.Dispose(); + outputCacher.Dispose(); }; return result; } private protected override void SaveModel(ModelSaveContext ctx) => _parent.SaveModel(ctx); - private class OutputCache + private class OutputCache : IDisposable { public long Position; public Dictionary Outputs; @@ -680,6 +680,17 @@ public OutputCache() Position = -1; Outputs = new Dictionary(); } + + private bool _isDisposed; + + public void Dispose() + { + if (_isDisposed) + return; + foreach (var tensor in Outputs.Values) + tensor.Dispose(); + _isDisposed = true; + } } protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func activeOutput, out Action disposer)