From 280c9985d6baec099af1e10272493198d7d5389b Mon Sep 17 00:00:00 2001 From: XiaoYun Zhang Date: Tue, 18 Jun 2024 09:33:17 -0700 Subject: [PATCH 01/12] add GenAI.Core project --- Microsoft.ML.sln | 33 ++- eng/Versions.props | 4 +- .../Extension/ModuleExtension.cs | 274 ++++++++++++++++++ .../Extension/TensorExtension.cs | 33 +++ .../Microsoft.ML.GenAI.Core.csproj | 22 ++ .../Module/DynamicLoadingModule.cs | 50 ++++ .../Module/IDynamicLoadModule.cs | 15 + .../Module/IQuantizeModule.cs | 6 + .../Pipeline/CasualLMModelInput.cs | 44 +++ .../Pipeline/CasualLMModelOutput.cs | 30 ++ .../Pipeline/CausalLMPipeline.cs | 141 +++++++++ .../Utility/AttentionMaskConverter.cs | 175 +++++++++++ src/Microsoft.ML.GenAI.Core/Utility/Cache.cs | 85 ++++++ src/Microsoft.ML.GenAI.Core/Utils.cs | 159 ++++++++++ 14 files changed, 1058 insertions(+), 13 deletions(-) create mode 100644 src/Microsoft.ML.GenAI.Core/Extension/ModuleExtension.cs create mode 100644 src/Microsoft.ML.GenAI.Core/Extension/TensorExtension.cs create mode 100644 src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj create mode 100644 src/Microsoft.ML.GenAI.Core/Module/DynamicLoadingModule.cs create mode 100644 src/Microsoft.ML.GenAI.Core/Module/IDynamicLoadModule.cs create mode 100644 src/Microsoft.ML.GenAI.Core/Module/IQuantizeModule.cs create mode 100644 src/Microsoft.ML.GenAI.Core/Pipeline/CasualLMModelInput.cs create mode 100644 src/Microsoft.ML.GenAI.Core/Pipeline/CasualLMModelOutput.cs create mode 100644 src/Microsoft.ML.GenAI.Core/Pipeline/CausalLMPipeline.cs create mode 100644 src/Microsoft.ML.GenAI.Core/Utility/AttentionMaskConverter.cs create mode 100644 src/Microsoft.ML.GenAI.Core/Utility/Cache.cs create mode 100644 src/Microsoft.ML.GenAI.Core/Utils.cs diff --git a/Microsoft.ML.sln b/Microsoft.ML.sln index 5763a903b4..1fa8823763 100644 --- a/Microsoft.ML.sln +++ b/Microsoft.ML.sln @@ -172,9 +172,11 @@ Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "Microsoft.ML.FSharp.Tests", EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.Data.Analysis.PerformanceTests", "test\Microsoft.Data.Analysis.PerformanceTests\Microsoft.Data.Analysis.PerformanceTests.csproj", "{FB8A8823-CC6C-4C2F-8539-05FBFB7C91CD}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Microsoft.ML.TorchSharp.Tests", "test\Microsoft.ML.TorchSharp.Tests\Microsoft.ML.TorchSharp.Tests.csproj", "{AB8D68F1-6C3E-41FD-B0EC-A093E009341D}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.TorchSharp.Tests", "test\Microsoft.ML.TorchSharp.Tests\Microsoft.ML.TorchSharp.Tests.csproj", "{AB8D68F1-6C3E-41FD-B0EC-A093E009341D}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Microsoft.ML.TensorFlow.Tests", "test\Microsoft.ML.TensorFlow.Tests\Microsoft.ML.TensorFlow.Tests.csproj", "{763FF013-8309-4680-A769-B54E7BB99612}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.TensorFlow.Tests", "test\Microsoft.ML.TensorFlow.Tests\Microsoft.ML.TensorFlow.Tests.csproj", "{763FF013-8309-4680-A769-B54E7BB99612}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Microsoft.ML.GenAI.Core", "src\Microsoft.ML.GenAI.Core\Microsoft.ML.GenAI.Core.csproj", "{DB2CA055-8ABD-4E3E-8089-5B64C3415E85}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -512,6 +514,14 @@ Global {C8E1772B-DFD9-4A4D-830D-6AAB1C668BB3}.Release|Any CPU.Build.0 = Release|Any CPU {C8E1772B-DFD9-4A4D-830D-6AAB1C668BB3}.Release|x64.ActiveCfg = Release|Any CPU {C8E1772B-DFD9-4A4D-830D-6AAB1C668BB3}.Release|x64.Build.0 = Release|Any CPU + {39E89702-1A46-4D5B-BA50-530D11309B5E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {39E89702-1A46-4D5B-BA50-530D11309B5E}.Debug|Any CPU.Build.0 = Debug|Any CPU + {39E89702-1A46-4D5B-BA50-530D11309B5E}.Debug|x64.ActiveCfg = Debug|Any CPU + {39E89702-1A46-4D5B-BA50-530D11309B5E}.Debug|x64.Build.0 = Debug|Any CPU + {39E89702-1A46-4D5B-BA50-530D11309B5E}.Release|Any CPU.ActiveCfg = Release|Any CPU + {39E89702-1A46-4D5B-BA50-530D11309B5E}.Release|Any CPU.Build.0 = Release|Any CPU + {39E89702-1A46-4D5B-BA50-530D11309B5E}.Release|x64.ActiveCfg = Release|Any CPU + {39E89702-1A46-4D5B-BA50-530D11309B5E}.Release|x64.Build.0 = Release|Any CPU {9222FC9D-599A-49A5-B685-08CC9A5C81D7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {9222FC9D-599A-49A5-B685-08CC9A5C81D7}.Debug|Any CPU.Build.0 = Debug|Any CPU {9222FC9D-599A-49A5-B685-08CC9A5C81D7}.Debug|x64.ActiveCfg = Debug|Any CPU @@ -820,14 +830,14 @@ Global {763FF013-8309-4680-A769-B54E7BB99612}.Release|Any CPU.Build.0 = Release|Any CPU {763FF013-8309-4680-A769-B54E7BB99612}.Release|x64.ActiveCfg = Release|Any CPU {763FF013-8309-4680-A769-B54E7BB99612}.Release|x64.Build.0 = Release|Any CPU - {39E89702-1A46-4D5B-BA50-530D11309B5E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {39E89702-1A46-4D5B-BA50-530D11309B5E}.Debug|Any CPU.Build.0 = Debug|Any CPU - {39E89702-1A46-4D5B-BA50-530D11309B5E}.Debug|x64.ActiveCfg = Debug|Any CPU - {39E89702-1A46-4D5B-BA50-530D11309B5E}.Debug|x64.Build.0 = Debug|Any CPU - {39E89702-1A46-4D5B-BA50-530D11309B5E}.Release|Any CPU.ActiveCfg = Release|Any CPU - {39E89702-1A46-4D5B-BA50-530D11309B5E}.Release|Any CPU.Build.0 = Release|Any CPU - {39E89702-1A46-4D5B-BA50-530D11309B5E}.Release|x64.ActiveCfg = Release|Any CPU - {39E89702-1A46-4D5B-BA50-530D11309B5E}.Release|x64.Build.0 = Release|Any CPU + {DB2CA055-8ABD-4E3E-8089-5B64C3415E85}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {DB2CA055-8ABD-4E3E-8089-5B64C3415E85}.Debug|Any CPU.Build.0 = Debug|Any CPU + {DB2CA055-8ABD-4E3E-8089-5B64C3415E85}.Debug|x64.ActiveCfg = Debug|Any CPU + {DB2CA055-8ABD-4E3E-8089-5B64C3415E85}.Debug|x64.Build.0 = Debug|Any CPU + {DB2CA055-8ABD-4E3E-8089-5B64C3415E85}.Release|Any CPU.ActiveCfg = Release|Any CPU + {DB2CA055-8ABD-4E3E-8089-5B64C3415E85}.Release|Any CPU.Build.0 = Release|Any CPU + {DB2CA055-8ABD-4E3E-8089-5B64C3415E85}.Release|x64.ActiveCfg = Release|Any CPU + {DB2CA055-8ABD-4E3E-8089-5B64C3415E85}.Release|x64.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -874,6 +884,7 @@ Global {11A5210E-2EA7-42F1-80DB-827762E9C781} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {38ED61F4-FA22-4DE9-B0C4-91F327F4EE31} = {DA452A53-2E94-4433-B08C-041EDEC729E6} {C8E1772B-DFD9-4A4D-830D-6AAB1C668BB3} = {09EADF06-BE25-4228-AB53-95AE3E15B530} + {39E89702-1A46-4D5B-BA50-530D11309B5E} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {9222FC9D-599A-49A5-B685-08CC9A5C81D7} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {6C29AA9B-054B-4762-BEA5-D305B932AA80} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {4805129D-78C8-46D4-9519-0AD9B0574D6D} = {09EADF06-BE25-4228-AB53-95AE3E15B530} @@ -913,7 +924,7 @@ Global {FB8A8823-CC6C-4C2F-8539-05FBFB7C91CD} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} {AB8D68F1-6C3E-41FD-B0EC-A093E009341D} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} {763FF013-8309-4680-A769-B54E7BB99612} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} - {39E89702-1A46-4D5B-BA50-530D11309B5E} = {09EADF06-BE25-4228-AB53-95AE3E15B530} + {DB2CA055-8ABD-4E3E-8089-5B64C3415E85} = {09EADF06-BE25-4228-AB53-95AE3E15B530} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {41165AF1-35BB-4832-A189-73060F82B01D} diff --git a/eng/Versions.props b/eng/Versions.props index b1d4979662..95200a2599 100644 --- a/eng/Versions.props +++ b/eng/Versions.props @@ -63,8 +63,8 @@ 0.20.1 2 2.3.1 - 0.101.5 - 2.1.0.1 + 0.102.5 + 2.2.1.1 1.12.4 3.1.2 diff --git a/src/Microsoft.ML.GenAI.Core/Extension/ModuleExtension.cs b/src/Microsoft.ML.GenAI.Core/Extension/ModuleExtension.cs new file mode 100644 index 0000000000..3a4c012446 --- /dev/null +++ b/src/Microsoft.ML.GenAI.Core/Extension/ModuleExtension.cs @@ -0,0 +1,274 @@ +using Microsoft.ML.GenAI.Core; +using Microsoft.ML.GenAI.Core.Extension; +using Phi.Module; +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using TorchSharp; +using static TorchSharp.torch; + +namespace Microsoft.ML.GenAI.Core.Extension; + +internal static class ModuleExtension +{ + public static long GetSizeInBytes(this nn.Module model) + { + var stateDict = model.state_dict(); + long size = 0; + foreach (var (_, value) in stateDict) + { + size += value.numel() * value.element_size(); + } + + return size; + } + + public static Dictionary GetSizeForEachDynamicLayerInBytes(this nn.Module model) + { + var stateDict = model.named_children(); + if (stateDict.Count() == 0) + { + return new(); + } + else + { + var dict = new Dictionary(); + + foreach (var (key, value) in stateDict) + { + if (value is IDynamicLoadModule) + { + dict[key] = value.GetSizeInBytes(); + } + else + { + var subDict = value.GetSizeForEachDynamicLayerInBytes(); + foreach (var (subKey, subValue) in subDict) + { + dict[key + "." + subKey] = subValue; + } + } + } + + return dict; + } + } + + public static void ToQuantizedModule( + this T model) + where T : nn.Module + { + foreach (var (_, value) in model.named_children()) + { + if (value is IQuantizeModule quantizeModule) + { + quantizeModule.Quantize(); + } + else + { + value.ToQuantizedModule(); + } + } + } + + public static T ToDynamicLoadingModel( + this T model, + Dictionary deviceMap, + string targetDevice) + where T : nn.Module + { + if (deviceMap.Count == 0) + { + model.to(new Device(targetDevice)); + + return model; + } + + //var dynamicModules = model.named_modules().Where(x => x.module is IDynamicLoadModule).Select(x => x.name).ToList(); + // for each module in the model, update device if it is IDynamicLoadModule + foreach (var (key, value) in model.named_children()) + { + if (value is IDynamicLoadModule dynamicModule) + { + var device = deviceMap[key]; + if (device != targetDevice) + { + dynamicModule.LoadToDeviceFunc = (nn.Module module) => + { + module.to(new Device(targetDevice)); + }; + dynamicModule.UnloadFromDeviceFunc = (nn.Module module) => + { + module.to(new Device(device)); + }; + } + + value.to(new Device(device)); + } + else + { + var childrenDeviceMap = deviceMap.Where(x => x.Key.StartsWith($"{key}.")).ToDictionary(x => x.Key.Substring($"{key}.".Length), x => x.Value); + value.ToDynamicLoadingModel(childrenDeviceMap, targetDevice); + } + } + + return model; + } + + /// + /// Infer the device map for each layer in the model. + /// The device map is a dictionary where the key is the device id (e.g. "cuda:0") and the value is the memory size in bytes of the device. + /// When inferring the device map, each layer in the model will be placed on the device in the order of the devices list. + /// + /// + /// a list of device ids (e.g. ["cuda:0", "cpu", "disk"]) + /// a map where the key is the device id (e.g. "cuda:0") and the value is the memory size in bytes of the device + /// + public static Dictionary InferDeviceMapForEachLayer( + this nn.Module model, + string[] devices, + Dictionary deviceSizeMapInByte) + { + var layerSizeMap = model.GetSizeForEachDynamicLayerInBytes(); + var sizeToRemainOnEachDevice = 2 * layerSizeMap.Max(x => x.Value); + var deviceMap = new Dictionary(); + foreach (var device in devices) + { + long size = deviceSizeMapInByte[device]; + var remainingLayerSizeMap = layerSizeMap.Where(x => !deviceMap.ContainsKey(x.Key)).ToDictionary(x => x.Key, x => x.Value); + // larger layer fit first + foreach (var (key, value) in remainingLayerSizeMap.OrderByDescending(x => x.Value)) + { + if (size >= value) + { + deviceMap[key] = device; + size -= value; + } + + if (size < sizeToRemainOnEachDevice) + { + break; + } + } + } + + return deviceMap; + } + + public static string? Generate( + this CausalLMPipeline pipeline, + string prompt, + int maxLen = 128, + float temperature = 0.7f, + float topP = 0.9f, + string[]? stopSequences = null, + int eosId = 0, + string device = "cpu", + bool bos = true, + bool eos = false, + bool echo = false) + { + using var newScope = NewDisposeScope(); + var inputIds = pipeline.Tokenizer.EncodeToIds(prompt); + var inputTensor = torch.tensor(inputIds.ToArray(), dtype: ScalarType.Int64, device: device).unsqueeze(0); + var attentionMask = torch.ones_like(inputTensor); + + // set up stop token ids + // stop token ids: [[eosId], [stopSequence1], [stopSequence2], ...] + // when causal language model generates tokens, it will stop when it generates any token in stopSequences + List stopTokenIds = [[eosId]]; + if (stopSequences != null) + { + stopTokenIds.AddRange(stopSequences.Select(x => pipeline.Tokenizer.EncodeToIds(x).ToArray())); + } + + (var token, var _) = pipeline.Generate(inputTensor, attentionMask, temperature: temperature, maxLen: maxLen, topP: topP, stopTokenSequence: stopTokenIds.ToArray(), echo: echo); + + var tokenIds = token[0].to_type(ScalarType.Int32).data().ToArray(); + + return pipeline.Tokenizer.Decode(tokenIds); + } + + + public static string Peek(this nn.Module model) + { + var sb = new StringBuilder(); + var stateDict = model.state_dict(); + // preview state_dict + int i = 0; + foreach (var (key, value) in stateDict.OrderBy(x => x.Key, StringComparer.OrdinalIgnoreCase)) + { + var str = value.Peek(key); + sb.AppendLine($"{i}: {str}"); + i++; + } + + var res = sb.ToString(); + + return res; + } + + public static string PeekShape(this nn.Module model) + { + var sb = new StringBuilder(); + var stateDict = model.state_dict(); + // preview state_dict + int i = 0; + foreach (var (key, value) in stateDict.OrderBy(x => x.Key, StringComparer.OrdinalIgnoreCase)) + { + // shape str: [x, y, z] + var shapeStr = string.Join(", ", value.shape); + sb.AppendLine($"{i}: {key} shape: [{shapeStr}]"); + i++; + } + + var res = sb.ToString(); + + return res; + } + + public static void LoadStateDict(this Dictionary dict, string location) + { + using FileStream stream = File.OpenRead(location); + using BinaryReader reader = new BinaryReader(stream); + var num = reader.Decode(); + for (int i = 0; i < num; i++) + { + var key = reader.ReadString(); + Tensor tensor = dict[key]; + + var originalDevice = tensor.device; + var originalType = tensor.dtype; + if (tensor.dtype == ScalarType.BFloat16) + { + tensor = tensor.to_type(ScalarType.Float32); + } + + TensorExtensionMethods.Load(ref tensor!, reader, skip: false); + + tensor = tensor!.to_type(originalType); + dict[key] = tensor; + } + } + + public static long Decode(this BinaryReader reader) + { + long num = 0L; + int num2 = 0; + while (true) + { + long num3 = reader.ReadByte(); + num += (num3 & 0x7F) << num2 * 7; + if ((num3 & 0x80) == 0L) + { + break; + } + + num2++; + } + + return num; + } +} diff --git a/src/Microsoft.ML.GenAI.Core/Extension/TensorExtension.cs b/src/Microsoft.ML.GenAI.Core/Extension/TensorExtension.cs new file mode 100644 index 0000000000..d621e3cb43 --- /dev/null +++ b/src/Microsoft.ML.GenAI.Core/Extension/TensorExtension.cs @@ -0,0 +1,33 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using TorchSharp; +using static TorchSharp.torch; + +namespace Microsoft.ML.GenAI.Core.Extension; + +internal static class TensorExtension +{ + public static string Peek(this Tensor tensor, string id, int n = 10) + { + var device = tensor.device; + var dType = tensor.dtype; + // if type is fp16, convert to fp32 + if (tensor.dtype == ScalarType.Float16) + { + tensor = tensor.to_type(ScalarType.Float32); + } + tensor = tensor.cpu(); + var shapeString = string.Join(',', tensor.shape); + var tensor1D = tensor.reshape(-1); + var tensorIndex = torch.arange(tensor1D.shape[0], dtype: ScalarType.Float32).to(tensor1D.device).sqrt(); + var avg = (tensor1D * tensorIndex).sum(); + avg = avg / tensor1D.sum(); + // keep four decimal places + avg = avg.round(4); + var str = $"{id}: sum: {avg.ToSingle()} dType: {dType} shape: [{shapeString}]"; + + return str; + } +} diff --git a/src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj b/src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj new file mode 100644 index 0000000000..3346b40caa --- /dev/null +++ b/src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj @@ -0,0 +1,22 @@ + + + + net8.0 + false + enable + preview + + + + + + + + + + + + + + + diff --git a/src/Microsoft.ML.GenAI.Core/Module/DynamicLoadingModule.cs b/src/Microsoft.ML.GenAI.Core/Module/DynamicLoadingModule.cs new file mode 100644 index 0000000000..42f3442f95 --- /dev/null +++ b/src/Microsoft.ML.GenAI.Core/Module/DynamicLoadingModule.cs @@ -0,0 +1,50 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using TorchSharp; +using static TorchSharp.torch; + +namespace Phi.Module; + +public class DynamicLoadingModule : torch.nn.Module, IDynamicLoadModule + where T : nn.Module + where T1 : Tensor +{ + private readonly T _model; + + public DynamicLoadingModule(T model) + : base(model.GetName()) + { + this._model = model; + this.RegisterComponents(); + } + + public static DynamicLoadingModule CreateFromModel(T model) + { + return new DynamicLoadingModule(model); + } + + public Action? LoadToDeviceFunc { get; set; } + public Action? UnloadFromDeviceFunc { get; set; } + +#pragma warning disable MSML_GeneralName // This name should be PascalCased + public override TResult forward(T1 input) +#pragma warning restore MSML_GeneralName // This name should be PascalCased + { + if (LoadToDeviceFunc != null) + { + LoadToDeviceFunc(this); + } + + var output = this._model.forward(input); + + if (UnloadFromDeviceFunc != null) + { + UnloadFromDeviceFunc(this); + } + + return output; + } +} diff --git a/src/Microsoft.ML.GenAI.Core/Module/IDynamicLoadModule.cs b/src/Microsoft.ML.GenAI.Core/Module/IDynamicLoadModule.cs new file mode 100644 index 0000000000..25ba6cbc95 --- /dev/null +++ b/src/Microsoft.ML.GenAI.Core/Module/IDynamicLoadModule.cs @@ -0,0 +1,15 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using static TorchSharp.torch; + +namespace Phi.Module; + +public interface IDynamicLoadModule +{ + public Action? LoadToDeviceFunc { get; set; } + + public Action? UnloadFromDeviceFunc { get; set; } +} diff --git a/src/Microsoft.ML.GenAI.Core/Module/IQuantizeModule.cs b/src/Microsoft.ML.GenAI.Core/Module/IQuantizeModule.cs new file mode 100644 index 0000000000..2cb065c7f7 --- /dev/null +++ b/src/Microsoft.ML.GenAI.Core/Module/IQuantizeModule.cs @@ -0,0 +1,6 @@ +namespace Phi.Module; + +public interface IQuantizeModule +{ + public void Quantize(); +} diff --git a/src/Microsoft.ML.GenAI.Core/Pipeline/CasualLMModelInput.cs b/src/Microsoft.ML.GenAI.Core/Pipeline/CasualLMModelInput.cs new file mode 100644 index 0000000000..ef16b3078b --- /dev/null +++ b/src/Microsoft.ML.GenAI.Core/Pipeline/CasualLMModelInput.cs @@ -0,0 +1,44 @@ +using static TorchSharp.torch; + +namespace Microsoft.ML.GenAI.Core; + +public class CasualLMModelInput +{ + public CasualLMModelInput( + Tensor inputIds, + Tensor? attentionMask = null, + Tensor? positionIds = null, + int pastKeyValuesLength = 0, + Tensor? inputsEmbeds = null, + bool useCache = false, + bool outputAttentions = false, + bool outputHiddenStates = false) + { + this.InputIds = inputIds; + this.AttentionMask = attentionMask; + this.PositionIds = positionIds; + this.PastKeyValuesLength = pastKeyValuesLength; + this.InputEmbeddings = inputsEmbeds; + this.UseCache = useCache; + this.OutputAttentions = outputAttentions; + this.OutputHiddenStates = outputHiddenStates; + } + + public Tensor InputIds { get; set; } + + public Tensor? AttentionMask { get; set; } + + public Tensor? PositionIds { get; set; } + + public IKVCache? OverrideCache { get; set; } + + public int PastKeyValuesLength { get; set; } + + public Tensor? InputEmbeddings { get; set; } + + public bool UseCache { get; set; } + + public bool OutputAttentions { get; set; } + + public bool OutputHiddenStates { get; set; } +} diff --git a/src/Microsoft.ML.GenAI.Core/Pipeline/CasualLMModelOutput.cs b/src/Microsoft.ML.GenAI.Core/Pipeline/CasualLMModelOutput.cs new file mode 100644 index 0000000000..1ac56f9e31 --- /dev/null +++ b/src/Microsoft.ML.GenAI.Core/Pipeline/CasualLMModelOutput.cs @@ -0,0 +1,30 @@ +using static TorchSharp.torch; + +namespace Microsoft.ML.GenAI.Core; + +public class CasualLMModelOutput +{ + public CasualLMModelOutput( + Tensor lastHiddenState, + Tensor logits, + Tensor[]? allHiddenStates = null, + Tensor[]? attentions = null, + IKVCache? cache = null) + { + this.LastHiddenState = lastHiddenState; + this.AllHiddenStates = allHiddenStates; + this.Logits = logits; + this.Attentions = attentions; + this.Cache = cache; + } + + public Tensor Logits { get; set; } + + public Tensor LastHiddenState { get; set; } + + public Tensor[]? AllHiddenStates { get; set; } + + public Tensor[]? Attentions { get; set; } + + public IKVCache? Cache { get; set; } +} diff --git a/src/Microsoft.ML.GenAI.Core/Pipeline/CausalLMPipeline.cs b/src/Microsoft.ML.GenAI.Core/Pipeline/CausalLMPipeline.cs new file mode 100644 index 0000000000..3d8309ac4b --- /dev/null +++ b/src/Microsoft.ML.GenAI.Core/Pipeline/CausalLMPipeline.cs @@ -0,0 +1,141 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Microsoft.ML.Tokenizers; +using TorchSharp; +using static TorchSharp.torch; + +namespace Microsoft.ML.GenAI.Core; + +public class CausalLMPipeline : CausalLMPipeline + where TTokenizer : Tokenizer + where TModel : nn.Module +{ + public CausalLMPipeline( + TTokenizer tokenizer, + TModel model, + string device = "cpu") + : base(tokenizer, model, device) + { + } +} + +public class CausalLMPipeline +{ + public CausalLMPipeline( + Tokenizer tokenizer, + nn.Module model, + string device = "cpu") + { + this.Tokenizer = tokenizer; + this.Model = model; + this.Device = device; + } + + public Tokenizer Tokenizer { get; } + + public nn.Module Model { get; } + + public Device Device { get; } + + public virtual ( + Tensor, // output token ids [batch_size, sequence_length] + Tensor // output logits [batch_size, sequence_length, vocab_size] + ) Generate( + Tensor inputIds, // input token ids [batch_size, sequence_length] + Tensor attentionMask, // attention mask [batch_size, sequence_length] + int[][] stopTokenSequence, + float temperature = 0.7f, + float topP = 0.9f, + int maxLen = 128, + bool echo = false) + { + using var newScope = NewDisposeScope(); + var batch = inputIds.shape[0]; + var device = inputIds.device; + var promptLength = (int)inputIds.shape[1]; + var totalLen = promptLength + maxLen; + + using (var noGrad = torch.no_grad()) + { + var prevPos = 0; + var eosReached = torch.tensor(new bool[batch], device: device); + torch.Tensor? logits = default; + var cache = new DynamicKVCache(); + if (promptLength == totalLen) + { + var input = new CasualLMModelInput(inputIds, attentionMask, pastKeyValuesLength: 0) + { + OverrideCache = cache, + }; + var output = this.Model.forward(input); + logits = output.Logits; + } + for (var curPos = promptLength; curPos != totalLen; curPos++) + { + var input = new CasualLMModelInput(inputIds[.., prevPos..curPos], attentionMask[.., prevPos..curPos], pastKeyValuesLength: prevPos) + { + OverrideCache = cache, + }; + var output = this.Model.forward(input); + logits = output.Logits; + torch.Tensor nextToken; + if (temperature > 0) + { + var probs = torch.softmax(logits[.., -1] / temperature, dim: -1); + nextToken = this.SampleTopP(probs, topP); + } + else + { + nextToken = torch.argmax(logits[.., -1], dim: -1); + } + + nextToken = nextToken.reshape(-1); + inputIds = torch.cat([inputIds, nextToken.unsqueeze(1)], dim: -1); + attentionMask = torch.cat([attentionMask, attentionMask.new_ones(attentionMask.shape[0], 1)], dim: -1); + foreach (var stopSequence in stopTokenSequence) + { + // determine if the last n tokens are the stop sequence + var lastN = inputIds[.., ^stopSequence.Length..]; + var lastNMatch = lastN == torch.tensor(stopSequence, device: device); + eosReached |= lastNMatch.all(dim: -1); + } + if (eosReached.all().item()) + { + break; + } + + // pBar.Tick(curPos, message); + var nextTokenIds = nextToken.to_type(ScalarType.Int32).data().ToArray(); + var nextTokenStr = this.Tokenizer.Decode(nextTokenIds); + + prevPos = curPos; + } + + if (echo) + { + // return entire inputIds and logits + return (inputIds.MoveToOuterDisposeScope(), logits!.MoveToOuterDisposeScope()); + } + else + { + // return [batch_size, promptLength..] and [batch_size, promptLength.., vocab_size] + return (inputIds[.., promptLength..].MoveToOuterDisposeScope(), logits![.., promptLength..].MoveToOuterDisposeScope()); + } + } + } + + protected torch.Tensor SampleTopP(torch.Tensor logits, float topP) + { + (var probsSort, var probsIndex) = torch.sort(logits, dim: -1, descending: true); + var cumSum = torch.cumsum(probsSort, dim: -1); + var mask = cumSum - probsSort > topP; + probsSort[mask] = 0f; + probsSort /= probsSort.sum(dim: -1, keepdim: true); + var nextToken = torch.multinomial(probsSort, num_samples: 1); + nextToken = torch.gather(probsIndex, dim: -1, index: nextToken); + return nextToken; + } +} diff --git a/src/Microsoft.ML.GenAI.Core/Utility/AttentionMaskConverter.cs b/src/Microsoft.ML.GenAI.Core/Utility/AttentionMaskConverter.cs new file mode 100644 index 0000000000..abd15c6c6f --- /dev/null +++ b/src/Microsoft.ML.GenAI.Core/Utility/AttentionMaskConverter.cs @@ -0,0 +1,175 @@ +using static TorchSharp.torch.nn; +using static TorchSharp.torch; +using TorchSharp.Modules; +using TorchSharp; +using System.Threading.Tasks; +using System; + +namespace Microsoft.ML.GenAI.Core; + +public class AttentionMaskConverter +{ + private readonly bool _isCasual; + private readonly int? _slidingWindow; + + public AttentionMaskConverter(bool isCausal, int? slidingWindow) + { + this._isCasual = isCausal; + this._slidingWindow = slidingWindow; + } + + /// + /// Converts 2D attention mask to 4D attention mask by expanding mask to (bsz, head_dim=1, query_length, + /// key_value_length) shape and by adding a large negative bias to not-attended positions.If attention_mask is + /// causal, a causal mask will be added. + /// + /// + /// + /// + /// + /// + public Tensor To4D( + Tensor attentionMask2d, + int queryLength, + ScalarType dType, + int? keyValueLength = null) + { + long[] inputShape = [attentionMask2d.shape[0], queryLength]; + + // create causal mask + // [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len] + Tensor? casual4dMask = null; + if ((inputShape[^1] > 1 || this._slidingWindow is not null) && this._isCasual) + { + if (keyValueLength is null) + { + throw new ArgumentException("key_value_length should be provided when attention_mask is causal"); + } + + var pastKeyValuesLength = keyValueLength.Value - queryLength; + casual4dMask = MakeCasualMask(inputShape, dType, attentionMask2d.device, pastKeyValuesLength, this._slidingWindow); + } + else if (this._slidingWindow is not null) + { + throw new NotImplementedException("Sliding window is not supported for non-causal masks"); + } + + var expandedAttnMask = ExpandMask(attentionMask2d, dType, queryLength).to(attentionMask2d.device); + if (casual4dMask is not null) + { + var min = torch.finfo(dType).min; + expandedAttnMask = casual4dMask.masked_fill(expandedAttnMask.to(ScalarType.Bool), min); + } + + return expandedAttnMask; + } + + public Tensor? ToCasual4D( + int batchSize, + int queryLength, + int keyValueLength, + ScalarType dType, + Device device) + { + if (!_isCasual) + { + throw new ArgumentException("This is not a casual mask"); + } + + long[] inputShape = [batchSize, queryLength]; + var pastKeyValueLength = keyValueLength - queryLength; + + // create causal mask + // [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len] + Tensor? causal4DMask = null; + if (queryLength > 1 || this._slidingWindow is int) + { + causal4DMask = MakeCasualMask(inputShape, dType, device, pastKeyValueLength, this._slidingWindow); + } + + return causal4DMask; + } + + public static Tensor MakeCasualMask( + long[] inputIdsShape, + ScalarType dType, + Device device, + int pastKeyValuesLength = 0, + int? slidingWindow = null) + { + // Make causal mask used for bi-directional self-attention. + var bsz = inputIdsShape[0]; + var tgtLen = inputIdsShape[1]; + var min = torch.finfo(dType).min; + var mask = torch.full([tgtLen, tgtLen], min, dtype: dType, device: device); + var maskCondition = torch.arange(tgtLen, device: device); + mask.masked_fill_(maskCondition < (maskCondition + 1).view(tgtLen, 1), 0); + mask = mask.to(dType); + + + if (pastKeyValuesLength > 0) + { + mask = torch.cat([torch.zeros([tgtLen, pastKeyValuesLength], dtype: dType, device: device), mask], dim: -1); + } + + if (slidingWindow is int window) + { + var diagonal = pastKeyValuesLength - window - 1; + var contextMask = torch.tril(torch.ones([tgtLen, tgtLen], dtype: ScalarType.Bool, device: device), diagonal: diagonal); + mask = mask.masked_fill(contextMask, min); + } + + // return mask[None, None, :, :].expand(bsz, 1, tgt_len, tgt_len + past_key_values_length) + + return mask.unsqueeze(0).unsqueeze(0).expand(bsz, 1, tgtLen, tgtLen + pastKeyValuesLength); + } + + /// + /// Creates a causal 4D mask of shape `(batch_size, 1, query_length, key_value_length)` + /// + /// The attention mask should be 2D. + /// The device to place the mask tensor. + /// The data type of the mask tensor. + /// The length of past key values in cache. + /// The sliding window size. + /// The input shape should be a tuple that defines `(batch_size, query_length)`. + public static Tensor? Create4DCausalAttentionMask( + Tensor? attentionMask, + long[] inputShape, + ScalarType dType, + Device device, + int pastKeyValuesLength = 0, + int? slidingWindow = null) + { + var converter = new AttentionMaskConverter(isCausal: true, slidingWindow: slidingWindow); + var batchSize = (int)inputShape[0]; + var queryLength = (int)inputShape[1]; + var keyValueLength = pastKeyValuesLength + queryLength; + if (attentionMask is not null) + { + if (attentionMask.ndim != 2) + { + throw new ArgumentException("Attention mask should be 2D"); + } + return converter.To4D(attentionMask, (int)inputShape[1], dType, keyValueLength); + } + + return converter.ToCasual4D(batchSize, queryLength, keyValueLength, dType, device); + } + + public static Tensor ExpandMask( + Tensor mask, + ScalarType dType, + int? tgtLen = null) + { + var bsz = (int)mask.shape[0]; + var srcLen = (int)mask.shape[1]; + tgtLen ??= srcLen; + + var expandedMask = mask.unsqueeze(1).unsqueeze(1).expand(bsz, 1, tgtLen.Value, srcLen).to(dType); + var invertedMask = 1.0 - expandedMask; + var min = torch.finfo(dType).min; + + return invertedMask.masked_fill(invertedMask.to(ScalarType.Bool), min); + } +} diff --git a/src/Microsoft.ML.GenAI.Core/Utility/Cache.cs b/src/Microsoft.ML.GenAI.Core/Utility/Cache.cs new file mode 100644 index 0000000000..026c2cdff3 --- /dev/null +++ b/src/Microsoft.ML.GenAI.Core/Utility/Cache.cs @@ -0,0 +1,85 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using TorchSharp; +using static TorchSharp.torch; + +namespace Microsoft.ML.GenAI.Core; + +public interface IKVCache : IDictionary, IDisposable +{ + public (Tensor, Tensor) UpdateKVCache(Tensor key, Tensor value, int layerIndex); + + public int GetSeqLen(int layerIndex = 0); + + public int? GetMaxLength(); + + public int GetUsableLength(int newSeqLen, int layerIndex = 0); +} + +public class DynamicKVCache : Dictionary, IKVCache +{ + private readonly DisposeScope _disposeScope = NewDisposeScope(); + public DynamicKVCache() + { + } + + public (Tensor, Tensor) UpdateKVCache(Tensor key, Tensor value, int layerIndex) + { + if (this.ContainsKey(layerIndex)) + { + var (oldKey, oldValue) = this[layerIndex]; + oldKey.DetachFromDisposeScope(); + oldValue.DetachFromDisposeScope(); + + var newKey = torch.cat([oldKey, key], -2).MoveToOtherDisposeScope(this._disposeScope); + var newValue = torch.cat([oldValue, value], -2).MoveToOtherDisposeScope(this._disposeScope); + + oldKey.Dispose(); + oldValue.Dispose(); + + this[layerIndex] = (newKey, newValue); + } + else + { + this.Add(layerIndex, (key.MoveToOtherDisposeScope(this._disposeScope), value.MoveToOtherDisposeScope(this._disposeScope))); + } + + return this[layerIndex]; + } + + public int GetSeqLen(int layerIndex = 0) + { + if (this.TryGetValue(layerIndex, out var kv)) + { + return kv.Item1.IntShape()[^2]; + } + + return 0; + } + + public int? GetMaxLength() + { + return null; + } + + public int GetUsableLength(int newSeqLen, int layerIndex = 0) + { + var maxLength = this.GetMaxLength(); + var previousSeqLen = this.GetSeqLen(layerIndex); + + if (maxLength.HasValue && previousSeqLen + newSeqLen > maxLength.Value) + { + return maxLength.Value - previousSeqLen; + } + + return previousSeqLen; + } + + public void Dispose() + { + this._disposeScope.Dispose(); + } +} diff --git a/src/Microsoft.ML.GenAI.Core/Utils.cs b/src/Microsoft.ML.GenAI.Core/Utils.cs new file mode 100644 index 0000000000..84f0062951 --- /dev/null +++ b/src/Microsoft.ML.GenAI.Core/Utils.cs @@ -0,0 +1,159 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using TorchSharp.Modules; +using TorchSharp; +using static TorchSharp.torch; +using static TorchSharp.torch.nn; + +public static class Utils +{ + public static Tensor ApplyRotaryEmbeddings(Tensor input, Tensor freqsComplex) + { + // Separate the last dimension pairs of two values, representing the real and imaginary parts of the complex number + // Two consecutive values will become a single complex number + // (B, Seq_Len, H, Head_Dim) -> (B, Seq_Len, H, Head_Dim/2) + var inputComplex = input.to_type(ScalarType.Float32).reshape(input.shape[0], input.shape[1], input.shape[2], -1, 2).view_as_complex(); + freqsComplex = freqsComplex.to(input.device); + + // Reshape the freqs_complex tensor to match the shape of the x_complex tensor. So we need to add the batch dimension and the head dimension + // (Seq_Len, Head_Dim/2) --> (1, Seq_Len, 1, Head_Dim/2) + var freqsComplexReshaped = freqsComplex.unsqueeze(0).unsqueeze(2); + + // Multiply each complex number in the x_complex tensor by the corresponding complex number in the freqs_complex tensor + // Which results in the rotation of the complex number as shown in the Figure 1 of the paper + // (B, Seq_Len, H, Head_Dim/2) * (1, Seq_Len, 1, Head_Dim/2) = (B, Seq_Len, H, Head_Dim/2) + var rotatedComplex = inputComplex * freqsComplexReshaped; + // Console.WriteLine(rotated_complex.mean().ToSingle()); + + // Convert the complex number back to the real number + // (B, Seq_Len, H, Head_Dim/2) -> (B, Seq_Len, H, Head_Dim/2, 2) + var rotated = rotatedComplex.view_as_real(); + + // (B, Seq_Len, H, Head_Dim/2, 2) -> (B, Seq_Len, H, Head_Dim) + var rotatedReshaped = rotated.reshape(rotated.shape[0], rotated.shape[1], rotated.shape[2], -1); + + return rotatedReshaped.type_as(input); + } + + public static Tensor PrecomputeThetaPosFrequencies(int headDim, int seqLen, string device, float theta = 10000.0f) + { + // As written in the paragraph 3.2.2 of the paper + // >> In order to generalize our results in 2D to any xi ∈ Rd where **d is even**, [...] + if (headDim % 2 != 0) + { + throw new ArgumentException("Dimension must be divisible by 2", nameof(headDim)); + } + + // Build the theta parameter + // According to the formula theta_i = 10000^(-2(i-1)/dim) for i = [1, 2, ... dim/2] + // Shape: (Head_Dim / 2) + var thetaNumerator = torch.arange(0, headDim, 2).to(torch.float32).to(device); + // Shape: (Head_Dim / 2) + var thetaInput = torch.pow(theta, -1.0f * (thetaNumerator / headDim)).to(device); // (Dim / 2) + // Construct the positions (the "m" parameter) + // Shape: (Seq_Len) + var m = torch.arange(seqLen, device: device); + // Multiply each theta by each position using the outer product. + // Shape: (Seq_Len) outer_product* (Head_Dim / 2) -> (Seq_Len, Head_Dim / 2) + var thetaPositionFrequencies = torch.outer(m, thetaInput).to(torch.float32).to(device); + + // We can compute complex numbers in the polar form c = R * exp(m * theta), where R = 1 as follows: + // (Seq_Len, Head_Dim / 2) -> (Seq_Len, Head_Dim / 2) + var freqsComplex = torch.polar(torch.ones_like(thetaPositionFrequencies), thetaPositionFrequencies); + + return freqsComplex; + } + + // python + // def rotate_half(x): + // """Rotates half the hidden dims of the input.""" + // x1 = x[..., : x.shape[-1] // 2] + // x2 = x[..., x.shape[-1] // 2 :] + // return torch.cat((-x2, x1), dim=-1) + public static Tensor RotateHalf(Tensor x) + { + var x1 = x[.., .., .., ..(int)(x.shape[^1] / 2)]; + var x2 = x[.., .., .., (int)(x.shape[^1] / 2)..]; + // (x1 * x1 * x2).Peek("x1 * x1 * x2"); + return torch.cat([-x2, x1], dim: -1); + } + + public static (Tensor, Tensor) ApplyRotaryPosEmb(Tensor q, Tensor k, Tensor cos, Tensor sin, Tensor? positionIds = null, int unsqueezeDim = 1) + { + // The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and + // sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note + // that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and + // k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes + // cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have + // the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2. + + if (positionIds is not null) + { + cos = cos[positionIds!].unsqueeze(unsqueezeDim); + sin = sin[positionIds!].unsqueeze(unsqueezeDim); + } + else + { + cos = cos.unsqueeze(unsqueezeDim); + sin = sin.unsqueeze(unsqueezeDim); + } + + var qEmbed = q * cos; + qEmbed += RotateHalf(q) * sin; + + var kEmbed = k * cos; + kEmbed += RotateHalf(k) * sin; + // var kEmbed = (k * cos) + (RotateHalf(k) * sin); + return (qEmbed, kEmbed); + } + + public static Module GetActivation(string actFn) + { + return actFn switch + { + "silu" => nn.SiLU(), + "relu" => nn.ReLU(), + "gelu" => nn.GELU(), + "tanh" => nn.Tanh(), + "swish" => nn.SiLU(), + _ => throw new ArgumentException("Invalid activation function", actFn), + }; + } + + + public static Tensor Phi2RepeatKV(Tensor x, int nRep) + { + var batchSize = x.shape[0]; + var seqLen = x.shape[1]; + var nKVHeads = x.shape[2]; + var headDim = x.shape[3]; + if (nRep == 1) + { + return x; + } + + return x.unsqueeze(3) + .expand(batchSize, seqLen, nKVHeads, nRep, headDim) + .view(batchSize, seqLen, nKVHeads * nRep, headDim); + } + + public static Tensor Phi3RepeatKV(Tensor x, int nRep) + { + var batchSize = x.shape[0]; + var nKVHeads = x.shape[1]; + var seqLen = x.shape[2]; + var headDim = x.shape[3]; + if (nRep == 1) + { + return x; + } + + return x.unsqueeze(3) + .expand(batchSize, nKVHeads, nRep, seqLen, headDim) + .view(batchSize, nKVHeads * nRep, seqLen, headDim); + } + +} From be7690112acc5be954820eb0f89aab37a3ef543f Mon Sep 17 00:00:00 2001 From: XiaoYun Zhang Date: Tue, 18 Jun 2024 09:42:32 -0700 Subject: [PATCH 02/12] fix format --- .../Extension/ModuleExtension.cs | 9 ++++++--- .../Module/DynamicLoadingModule.cs | 8 ++++++-- .../Module/IDynamicLoadModule.cs | 8 ++++++-- .../Module/IQuantizeModule.cs | 6 +++++- .../Pipeline/CasualLMModelInput.cs | 6 +++++- .../Pipeline/CasualLMModelOutput.cs | 6 +++++- .../Pipeline/CausalLMPipeline.cs | 6 +++++- .../Utility/AttentionMaskConverter.cs | 14 +++++++++----- src/Microsoft.ML.GenAI.Core/Utility/Cache.cs | 6 +++++- src/Microsoft.ML.GenAI.Core/Utils.cs | 9 +++++++-- 10 files changed, 59 insertions(+), 19 deletions(-) diff --git a/src/Microsoft.ML.GenAI.Core/Extension/ModuleExtension.cs b/src/Microsoft.ML.GenAI.Core/Extension/ModuleExtension.cs index 3a4c012446..fa5849a08d 100644 --- a/src/Microsoft.ML.GenAI.Core/Extension/ModuleExtension.cs +++ b/src/Microsoft.ML.GenAI.Core/Extension/ModuleExtension.cs @@ -1,11 +1,14 @@ -using Microsoft.ML.GenAI.Core; -using Microsoft.ML.GenAI.Core.Extension; -using Phi.Module; +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; +using Microsoft.ML.GenAI.Core; +using Microsoft.ML.GenAI.Core.Extension; using TorchSharp; using static TorchSharp.torch; diff --git a/src/Microsoft.ML.GenAI.Core/Module/DynamicLoadingModule.cs b/src/Microsoft.ML.GenAI.Core/Module/DynamicLoadingModule.cs index 42f3442f95..49b8b46477 100644 --- a/src/Microsoft.ML.GenAI.Core/Module/DynamicLoadingModule.cs +++ b/src/Microsoft.ML.GenAI.Core/Module/DynamicLoadingModule.cs @@ -1,4 +1,8 @@ -using System; +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; using System.Collections.Generic; using System.Linq; using System.Text; @@ -6,7 +10,7 @@ using TorchSharp; using static TorchSharp.torch; -namespace Phi.Module; +namespace Microsoft.ML.GenAI.Core; public class DynamicLoadingModule : torch.nn.Module, IDynamicLoadModule where T : nn.Module diff --git a/src/Microsoft.ML.GenAI.Core/Module/IDynamicLoadModule.cs b/src/Microsoft.ML.GenAI.Core/Module/IDynamicLoadModule.cs index 25ba6cbc95..d215d68bb3 100644 --- a/src/Microsoft.ML.GenAI.Core/Module/IDynamicLoadModule.cs +++ b/src/Microsoft.ML.GenAI.Core/Module/IDynamicLoadModule.cs @@ -1,11 +1,15 @@ -using System; +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using static TorchSharp.torch; -namespace Phi.Module; +namespace Microsoft.ML.GenAI.Core; public interface IDynamicLoadModule { diff --git a/src/Microsoft.ML.GenAI.Core/Module/IQuantizeModule.cs b/src/Microsoft.ML.GenAI.Core/Module/IQuantizeModule.cs index 2cb065c7f7..164936f3d7 100644 --- a/src/Microsoft.ML.GenAI.Core/Module/IQuantizeModule.cs +++ b/src/Microsoft.ML.GenAI.Core/Module/IQuantizeModule.cs @@ -1,4 +1,8 @@ -namespace Phi.Module; +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace Microsoft.ML.GenAI.Core; public interface IQuantizeModule { diff --git a/src/Microsoft.ML.GenAI.Core/Pipeline/CasualLMModelInput.cs b/src/Microsoft.ML.GenAI.Core/Pipeline/CasualLMModelInput.cs index ef16b3078b..31b7530b88 100644 --- a/src/Microsoft.ML.GenAI.Core/Pipeline/CasualLMModelInput.cs +++ b/src/Microsoft.ML.GenAI.Core/Pipeline/CasualLMModelInput.cs @@ -1,4 +1,8 @@ -using static TorchSharp.torch; +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using static TorchSharp.torch; namespace Microsoft.ML.GenAI.Core; diff --git a/src/Microsoft.ML.GenAI.Core/Pipeline/CasualLMModelOutput.cs b/src/Microsoft.ML.GenAI.Core/Pipeline/CasualLMModelOutput.cs index 1ac56f9e31..10dde68852 100644 --- a/src/Microsoft.ML.GenAI.Core/Pipeline/CasualLMModelOutput.cs +++ b/src/Microsoft.ML.GenAI.Core/Pipeline/CasualLMModelOutput.cs @@ -1,4 +1,8 @@ -using static TorchSharp.torch; +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using static TorchSharp.torch; namespace Microsoft.ML.GenAI.Core; diff --git a/src/Microsoft.ML.GenAI.Core/Pipeline/CausalLMPipeline.cs b/src/Microsoft.ML.GenAI.Core/Pipeline/CausalLMPipeline.cs index 3d8309ac4b..fa61f7b43a 100644 --- a/src/Microsoft.ML.GenAI.Core/Pipeline/CausalLMPipeline.cs +++ b/src/Microsoft.ML.GenAI.Core/Pipeline/CausalLMPipeline.cs @@ -1,4 +1,8 @@ -using System; +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; using System.Collections.Generic; using System.Linq; using System.Text; diff --git a/src/Microsoft.ML.GenAI.Core/Utility/AttentionMaskConverter.cs b/src/Microsoft.ML.GenAI.Core/Utility/AttentionMaskConverter.cs index abd15c6c6f..b292c3d731 100644 --- a/src/Microsoft.ML.GenAI.Core/Utility/AttentionMaskConverter.cs +++ b/src/Microsoft.ML.GenAI.Core/Utility/AttentionMaskConverter.cs @@ -1,9 +1,13 @@ -using static TorchSharp.torch.nn; -using static TorchSharp.torch; -using TorchSharp.Modules; -using TorchSharp; -using System.Threading.Tasks; +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + using System; +using System.Threading.Tasks; +using TorchSharp; +using TorchSharp.Modules; +using static TorchSharp.torch; +using static TorchSharp.torch.nn; namespace Microsoft.ML.GenAI.Core; diff --git a/src/Microsoft.ML.GenAI.Core/Utility/Cache.cs b/src/Microsoft.ML.GenAI.Core/Utility/Cache.cs index 026c2cdff3..4647cfd122 100644 --- a/src/Microsoft.ML.GenAI.Core/Utility/Cache.cs +++ b/src/Microsoft.ML.GenAI.Core/Utility/Cache.cs @@ -1,4 +1,8 @@ -using System; +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; using System.Collections.Generic; using System.Linq; using System.Text; diff --git a/src/Microsoft.ML.GenAI.Core/Utils.cs b/src/Microsoft.ML.GenAI.Core/Utils.cs index 84f0062951..2f46e7d43d 100644 --- a/src/Microsoft.ML.GenAI.Core/Utils.cs +++ b/src/Microsoft.ML.GenAI.Core/Utils.cs @@ -1,13 +1,18 @@ -using System; +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; -using TorchSharp.Modules; using TorchSharp; +using TorchSharp.Modules; using static TorchSharp.torch; using static TorchSharp.torch.nn; +namespace Microsoft.ML.GenAI.Core; public static class Utils { public static Tensor ApplyRotaryEmbeddings(Tensor input, Tensor freqsComplex) From f115479b384a2558b95e39712531dbaaea93afe9 Mon Sep 17 00:00:00 2001 From: XiaoYun Zhang Date: Tue, 18 Jun 2024 09:47:37 -0700 Subject: [PATCH 03/12] move causalml generate method to causalmlpipelineextension --- .../Extension/CausalMLPipelineExtension.cs | 51 +++++++++++++++++++ .../Extension/ModuleExtension.cs | 35 ------------- 2 files changed, 51 insertions(+), 35 deletions(-) create mode 100644 src/Microsoft.ML.GenAI.Core/Extension/CausalMLPipelineExtension.cs diff --git a/src/Microsoft.ML.GenAI.Core/Extension/CausalMLPipelineExtension.cs b/src/Microsoft.ML.GenAI.Core/Extension/CausalMLPipelineExtension.cs new file mode 100644 index 0000000000..4bb2f64a66 --- /dev/null +++ b/src/Microsoft.ML.GenAI.Core/Extension/CausalMLPipelineExtension.cs @@ -0,0 +1,51 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using static TorchSharp.torch; +using TorchSharp; + +namespace Microsoft.ML.GenAI.Core.Extension; + +public static class CausalMLPipelineExtension +{ + + public static string? Generate( + this CausalLMPipeline pipeline, + string prompt, + int maxLen = 128, + float temperature = 0.7f, + float topP = 0.9f, + string[]? stopSequences = null, + int eosId = 0, + string device = "cpu", + bool bos = true, + bool eos = false, + bool echo = false) + { + using var newScope = NewDisposeScope(); + var inputIds = pipeline.Tokenizer.EncodeToIds(prompt); + var inputTensor = torch.tensor(inputIds.ToArray(), dtype: ScalarType.Int64, device: device).unsqueeze(0); + var attentionMask = torch.ones_like(inputTensor); + + // set up stop token ids + // stop token ids: [[eosId], [stopSequence1], [stopSequence2], ...] + // when causal language model generates tokens, it will stop when it generates any token in stopSequences + List stopTokenIds = [[eosId]]; + if (stopSequences != null) + { + stopTokenIds.AddRange(stopSequences.Select(x => pipeline.Tokenizer.EncodeToIds(x).ToArray())); + } + + (var token, var _) = pipeline.Generate(inputTensor, attentionMask, temperature: temperature, maxLen: maxLen, topP: topP, stopTokenSequence: stopTokenIds.ToArray(), echo: echo); + + var tokenIds = token[0].to_type(ScalarType.Int32).data().ToArray(); + + return pipeline.Tokenizer.Decode(tokenIds); + } +} diff --git a/src/Microsoft.ML.GenAI.Core/Extension/ModuleExtension.cs b/src/Microsoft.ML.GenAI.Core/Extension/ModuleExtension.cs index fa5849a08d..e7b253d325 100644 --- a/src/Microsoft.ML.GenAI.Core/Extension/ModuleExtension.cs +++ b/src/Microsoft.ML.GenAI.Core/Extension/ModuleExtension.cs @@ -160,41 +160,6 @@ public static Dictionary InferDeviceMapForEachLayer( return deviceMap; } - public static string? Generate( - this CausalLMPipeline pipeline, - string prompt, - int maxLen = 128, - float temperature = 0.7f, - float topP = 0.9f, - string[]? stopSequences = null, - int eosId = 0, - string device = "cpu", - bool bos = true, - bool eos = false, - bool echo = false) - { - using var newScope = NewDisposeScope(); - var inputIds = pipeline.Tokenizer.EncodeToIds(prompt); - var inputTensor = torch.tensor(inputIds.ToArray(), dtype: ScalarType.Int64, device: device).unsqueeze(0); - var attentionMask = torch.ones_like(inputTensor); - - // set up stop token ids - // stop token ids: [[eosId], [stopSequence1], [stopSequence2], ...] - // when causal language model generates tokens, it will stop when it generates any token in stopSequences - List stopTokenIds = [[eosId]]; - if (stopSequences != null) - { - stopTokenIds.AddRange(stopSequences.Select(x => pipeline.Tokenizer.EncodeToIds(x).ToArray())); - } - - (var token, var _) = pipeline.Generate(inputTensor, attentionMask, temperature: temperature, maxLen: maxLen, topP: topP, stopTokenSequence: stopTokenIds.ToArray(), echo: echo); - - var tokenIds = token[0].to_type(ScalarType.Int32).data().ToArray(); - - return pipeline.Tokenizer.Decode(tokenIds); - } - - public static string Peek(this nn.Module model) { var sb = new StringBuilder(); From 487b7d4bb1d75229173d1259f2fda98c09a2ed89 Mon Sep 17 00:00:00 2001 From: XiaoYun Zhang Date: Wed, 19 Jun 2024 09:01:09 -0700 Subject: [PATCH 04/12] fix build error from api breakchange in torchsharp --- src/Microsoft.ML.TorchSharp/AutoFormerV2/ConvModule.cs | 2 +- .../NasBert/Optimizers/BaseOptimizer.cs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Microsoft.ML.TorchSharp/AutoFormerV2/ConvModule.cs b/src/Microsoft.ML.TorchSharp/AutoFormerV2/ConvModule.cs index f8f8167cce..8568239fc5 100644 --- a/src/Microsoft.ML.TorchSharp/AutoFormerV2/ConvModule.cs +++ b/src/Microsoft.ML.TorchSharp/AutoFormerV2/ConvModule.cs @@ -35,7 +35,7 @@ public class ConvModule : Module public ConvModule(int inChannel, int outChannel, int kernelSize, int stride = 1, int padding = 0, int dilation = 1, bool bias = true, bool useRelu = true) : base(nameof(ConvModule)) { - this.conv = nn.Conv2d(inputChannel: inChannel, outputChannel: outChannel, kernelSize: kernelSize, stride: stride, padding: padding, dilation: dilation, bias: bias); + this.conv = nn.Conv2d(in_channels: inChannel, out_channels: outChannel, kernelSize: kernelSize, stride: stride, padding: padding, dilation: dilation, bias: bias); this.useRelu = useRelu; if (this.useRelu) { diff --git a/src/Microsoft.ML.TorchSharp/NasBert/Optimizers/BaseOptimizer.cs b/src/Microsoft.ML.TorchSharp/NasBert/Optimizers/BaseOptimizer.cs index ff94553b93..b7c0595094 100644 --- a/src/Microsoft.ML.TorchSharp/NasBert/Optimizers/BaseOptimizer.cs +++ b/src/Microsoft.ML.TorchSharp/NasBert/Optimizers/BaseOptimizer.cs @@ -63,7 +63,7 @@ public void Step() public double GetGradNorm() { return Math.Sqrt(Parameters - .Select(p => p.grad()) + .Select(p => p.grad) .Where(grad => grad.IsNotNull()) // parameters unused have no gradient .Select(grad => grad.square().sum().ToDouble()) .Sum()); @@ -82,7 +82,7 @@ public void MultiplyGrads(double c) { foreach (var p in Parameters) { - using var grad = p.grad(); + using var grad = p.grad; if (grad.IsNotNull()) { grad.mul_(c); From 4607227ab1802e5df97777dbbff1f51cf9d6c4b5 Mon Sep 17 00:00:00 2001 From: XiaoYun Zhang Date: Wed, 19 Jun 2024 10:21:52 -0700 Subject: [PATCH 05/12] update package reference --- .../Microsoft.ML.TorchSharp.Tests.csproj | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/Microsoft.ML.TorchSharp.Tests/Microsoft.ML.TorchSharp.Tests.csproj b/test/Microsoft.ML.TorchSharp.Tests/Microsoft.ML.TorchSharp.Tests.csproj index 4d7de3e37e..0091475ff4 100644 --- a/test/Microsoft.ML.TorchSharp.Tests/Microsoft.ML.TorchSharp.Tests.csproj +++ b/test/Microsoft.ML.TorchSharp.Tests/Microsoft.ML.TorchSharp.Tests.csproj @@ -25,10 +25,10 @@ - - + + From 2ab68b8023cbb8410f7bcdb7db466b1b25dd24a0 Mon Sep 17 00:00:00 2001 From: XiaoYun Zhang Date: Wed, 19 Jun 2024 10:24:12 -0700 Subject: [PATCH 06/12] fix build error --- .../Microsoft.ML.TorchSharp.Tests.csproj | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/test/Microsoft.ML.TorchSharp.Tests/Microsoft.ML.TorchSharp.Tests.csproj b/test/Microsoft.ML.TorchSharp.Tests/Microsoft.ML.TorchSharp.Tests.csproj index 0091475ff4..fe4e783706 100644 --- a/test/Microsoft.ML.TorchSharp.Tests/Microsoft.ML.TorchSharp.Tests.csproj +++ b/test/Microsoft.ML.TorchSharp.Tests/Microsoft.ML.TorchSharp.Tests.csproj @@ -25,11 +25,7 @@ - - - + From a390717dc4c94cd9566aa24890a044ac51a93291 Mon Sep 17 00:00:00 2001 From: Xiaoyun Zhang Date: Fri, 21 Jun 2024 12:15:02 -0700 Subject: [PATCH 07/12] Update job-template.yml --- build/ci/job-template.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/ci/job-template.yml b/build/ci/job-template.yml index 4c033c30cd..d437a2960f 100644 --- a/build/ci/job-template.yml +++ b/build/ci/job-template.yml @@ -121,7 +121,7 @@ jobs: - ${{ if eq(parameters.nightlyBuild, 'false') }}: - ${{ if eq(parameters.innerLoop, 'false') }}: - ${{ if and(eq(parameters.runSpecific, 'false'), eq(parameters.useVSTestTask, 'false')) }}: - - script: set PATH=%PATH%;%USERPROFILE%\.nuget\packages\libtorch-cpu-win-x64\2.1.0.1\runtimes\win-x64\native;%USERPROFILE%\.nuget\packages\torchsharp\0.101.5\runtimes\win-x64\native & ${{ parameters.buildScript }} /p:Build=false -configuration $(_configuration) /p:TargetArchitecture=${{ parameters.architecture }} /p:TestArchitectures=${{ parameters.architecture }} -test -integrationTest /p:Coverage=${{ parameters.codeCoverage }} $(testTargetFramework) + - script: set PATH=%PATH%;%USERPROFILE%\.nuget\packages\libtorch-cpu-win-x64\2.2.1.1\runtimes\win-x64\native;%USERPROFILE%\.nuget\packages\torchsharp\0.102.5\runtimes\win-x64\native & ${{ parameters.buildScript }} /p:Build=false -configuration $(_configuration) /p:TargetArchitecture=${{ parameters.architecture }} /p:TestArchitectures=${{ parameters.architecture }} -test -integrationTest /p:Coverage=${{ parameters.codeCoverage }} $(testTargetFramework) displayName: Run All Tests. - ${{ if and(eq(parameters.runSpecific, 'true'), eq(parameters.useVSTestTask, 'false')) }}: - script: ${{ parameters.buildScript }} /p:Build=false -configuration $(_configuration) /p:TargetArchitecture=${{ parameters.architecture }} /p:TestArchitectures=${{ parameters.architecture }} -test -integrationTest /p:TestRunnerAdditionalArguments='-trait$(spaceValue)Category=RunSpecificTest' /p:Coverage=${{ parameters.codeCoverage }} $(testTargetFramework) From 656066965777343d772e0bc22a467c9349babe17 Mon Sep 17 00:00:00 2001 From: Xiaoyun Zhang Date: Fri, 21 Jun 2024 12:42:59 -0700 Subject: [PATCH 08/12] Update Microsoft.ML.TorchSharp.Tests.csproj --- .../Microsoft.ML.TorchSharp.Tests.csproj | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/Microsoft.ML.TorchSharp.Tests/Microsoft.ML.TorchSharp.Tests.csproj b/test/Microsoft.ML.TorchSharp.Tests/Microsoft.ML.TorchSharp.Tests.csproj index fe4e783706..4824d46ea8 100644 --- a/test/Microsoft.ML.TorchSharp.Tests/Microsoft.ML.TorchSharp.Tests.csproj +++ b/test/Microsoft.ML.TorchSharp.Tests/Microsoft.ML.TorchSharp.Tests.csproj @@ -25,7 +25,10 @@ - + + + + From 9fcfec7fa306391157000b6d56ab82c6d04834ef Mon Sep 17 00:00:00 2001 From: Xiaoyun Zhang Date: Fri, 21 Jun 2024 12:43:45 -0700 Subject: [PATCH 09/12] Update Microsoft.ML.TorchSharp.Tests.csproj --- .../Microsoft.ML.TorchSharp.Tests.csproj | 1 + 1 file changed, 1 insertion(+) diff --git a/test/Microsoft.ML.TorchSharp.Tests/Microsoft.ML.TorchSharp.Tests.csproj b/test/Microsoft.ML.TorchSharp.Tests/Microsoft.ML.TorchSharp.Tests.csproj index 4824d46ea8..4d7de3e37e 100644 --- a/test/Microsoft.ML.TorchSharp.Tests/Microsoft.ML.TorchSharp.Tests.csproj +++ b/test/Microsoft.ML.TorchSharp.Tests/Microsoft.ML.TorchSharp.Tests.csproj @@ -29,6 +29,7 @@ + From a86bb7efe8cbb93751381a7dbb583fa2a6046466 Mon Sep 17 00:00:00 2001 From: XiaoYun Zhang Date: Mon, 24 Jun 2024 09:18:54 -0700 Subject: [PATCH 10/12] revert change for update torchsharp runtime --- eng/Versions.props | 4 ++-- src/Microsoft.ML.TorchSharp/AutoFormerV2/ConvModule.cs | 2 +- .../NasBert/Optimizers/BaseOptimizer.cs | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/eng/Versions.props b/eng/Versions.props index 95200a2599..b1d4979662 100644 --- a/eng/Versions.props +++ b/eng/Versions.props @@ -63,8 +63,8 @@ 0.20.1 2 2.3.1 - 0.102.5 - 2.2.1.1 + 0.101.5 + 2.1.0.1 1.12.4 3.1.2 diff --git a/src/Microsoft.ML.TorchSharp/AutoFormerV2/ConvModule.cs b/src/Microsoft.ML.TorchSharp/AutoFormerV2/ConvModule.cs index 8568239fc5..f8f8167cce 100644 --- a/src/Microsoft.ML.TorchSharp/AutoFormerV2/ConvModule.cs +++ b/src/Microsoft.ML.TorchSharp/AutoFormerV2/ConvModule.cs @@ -35,7 +35,7 @@ public class ConvModule : Module public ConvModule(int inChannel, int outChannel, int kernelSize, int stride = 1, int padding = 0, int dilation = 1, bool bias = true, bool useRelu = true) : base(nameof(ConvModule)) { - this.conv = nn.Conv2d(in_channels: inChannel, out_channels: outChannel, kernelSize: kernelSize, stride: stride, padding: padding, dilation: dilation, bias: bias); + this.conv = nn.Conv2d(inputChannel: inChannel, outputChannel: outChannel, kernelSize: kernelSize, stride: stride, padding: padding, dilation: dilation, bias: bias); this.useRelu = useRelu; if (this.useRelu) { diff --git a/src/Microsoft.ML.TorchSharp/NasBert/Optimizers/BaseOptimizer.cs b/src/Microsoft.ML.TorchSharp/NasBert/Optimizers/BaseOptimizer.cs index b7c0595094..ff94553b93 100644 --- a/src/Microsoft.ML.TorchSharp/NasBert/Optimizers/BaseOptimizer.cs +++ b/src/Microsoft.ML.TorchSharp/NasBert/Optimizers/BaseOptimizer.cs @@ -63,7 +63,7 @@ public void Step() public double GetGradNorm() { return Math.Sqrt(Parameters - .Select(p => p.grad) + .Select(p => p.grad()) .Where(grad => grad.IsNotNull()) // parameters unused have no gradient .Select(grad => grad.square().sum().ToDouble()) .Sum()); @@ -82,7 +82,7 @@ public void MultiplyGrads(double c) { foreach (var p in Parameters) { - using var grad = p.grad; + using var grad = p.grad(); if (grad.IsNotNull()) { grad.mul_(c); From d478444ebb8e20cd114d68087c0df54f309b2fd6 Mon Sep 17 00:00:00 2001 From: XiaoYun Zhang Date: Mon, 24 Jun 2024 09:34:45 -0700 Subject: [PATCH 11/12] use explicit torchsharp version --- .../Microsoft.ML.GenAI.Core.csproj | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj b/src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj index 3346b40caa..6e1ffed0c9 100644 --- a/src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj +++ b/src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj @@ -9,10 +9,10 @@ - - - - + + + + From e39c8fc07f45c7b8d59a015bf8531211240a1c9b Mon Sep 17 00:00:00 2001 From: XiaoYun Zhang Date: Tue, 25 Jun 2024 16:04:15 -0700 Subject: [PATCH 12/12] fix comments --- build/ci/job-template.yml | 2 +- ...usalMLPipelineExtension.cs => CausalLMPipelineExtension.cs} | 3 +-- src/Microsoft.ML.GenAI.Core/Extension/ModuleExtension.cs | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) rename src/Microsoft.ML.GenAI.Core/Extension/{CausalMLPipelineExtension.cs => CausalLMPipelineExtension.cs} (97%) diff --git a/build/ci/job-template.yml b/build/ci/job-template.yml index d437a2960f..4c033c30cd 100644 --- a/build/ci/job-template.yml +++ b/build/ci/job-template.yml @@ -121,7 +121,7 @@ jobs: - ${{ if eq(parameters.nightlyBuild, 'false') }}: - ${{ if eq(parameters.innerLoop, 'false') }}: - ${{ if and(eq(parameters.runSpecific, 'false'), eq(parameters.useVSTestTask, 'false')) }}: - - script: set PATH=%PATH%;%USERPROFILE%\.nuget\packages\libtorch-cpu-win-x64\2.2.1.1\runtimes\win-x64\native;%USERPROFILE%\.nuget\packages\torchsharp\0.102.5\runtimes\win-x64\native & ${{ parameters.buildScript }} /p:Build=false -configuration $(_configuration) /p:TargetArchitecture=${{ parameters.architecture }} /p:TestArchitectures=${{ parameters.architecture }} -test -integrationTest /p:Coverage=${{ parameters.codeCoverage }} $(testTargetFramework) + - script: set PATH=%PATH%;%USERPROFILE%\.nuget\packages\libtorch-cpu-win-x64\2.1.0.1\runtimes\win-x64\native;%USERPROFILE%\.nuget\packages\torchsharp\0.101.5\runtimes\win-x64\native & ${{ parameters.buildScript }} /p:Build=false -configuration $(_configuration) /p:TargetArchitecture=${{ parameters.architecture }} /p:TestArchitectures=${{ parameters.architecture }} -test -integrationTest /p:Coverage=${{ parameters.codeCoverage }} $(testTargetFramework) displayName: Run All Tests. - ${{ if and(eq(parameters.runSpecific, 'true'), eq(parameters.useVSTestTask, 'false')) }}: - script: ${{ parameters.buildScript }} /p:Build=false -configuration $(_configuration) /p:TargetArchitecture=${{ parameters.architecture }} /p:TestArchitectures=${{ parameters.architecture }} -test -integrationTest /p:TestRunnerAdditionalArguments='-trait$(spaceValue)Category=RunSpecificTest' /p:Coverage=${{ parameters.codeCoverage }} $(testTargetFramework) diff --git a/src/Microsoft.ML.GenAI.Core/Extension/CausalMLPipelineExtension.cs b/src/Microsoft.ML.GenAI.Core/Extension/CausalLMPipelineExtension.cs similarity index 97% rename from src/Microsoft.ML.GenAI.Core/Extension/CausalMLPipelineExtension.cs rename to src/Microsoft.ML.GenAI.Core/Extension/CausalLMPipelineExtension.cs index 4bb2f64a66..3a1041ee8a 100644 --- a/src/Microsoft.ML.GenAI.Core/Extension/CausalMLPipelineExtension.cs +++ b/src/Microsoft.ML.GenAI.Core/Extension/CausalLMPipelineExtension.cs @@ -12,9 +12,8 @@ namespace Microsoft.ML.GenAI.Core.Extension; -public static class CausalMLPipelineExtension +public static class CausalLMPipelineExtension { - public static string? Generate( this CausalLMPipeline pipeline, string prompt, diff --git a/src/Microsoft.ML.GenAI.Core/Extension/ModuleExtension.cs b/src/Microsoft.ML.GenAI.Core/Extension/ModuleExtension.cs index e7b253d325..6395ffd3fd 100644 --- a/src/Microsoft.ML.GenAI.Core/Extension/ModuleExtension.cs +++ b/src/Microsoft.ML.GenAI.Core/Extension/ModuleExtension.cs @@ -89,7 +89,6 @@ public static T ToDynamicLoadingModel( return model; } - //var dynamicModules = model.named_modules().Where(x => x.module is IDynamicLoadModule).Select(x => x.name).ToList(); // for each module in the model, update device if it is IDynamicLoadModule foreach (var (key, value) in model.named_children()) {