Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion Microsoft.ML.sln
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,11 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.GenAI.Phi.Test
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.GenAI.Samples", "docs\samples\Microsoft.ML.GenAI.Samples\Microsoft.ML.GenAI.Samples.csproj", "{1D4AD9A3-19AF-432B-889D-A63FE6D7BD47}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Microsoft.ML.GenAI.Core.Tests", "test\Microsoft.ML.GenAI.Core.Tests\Microsoft.ML.GenAI.Core.Tests.csproj", "{14AB0804-D4CE-4634-B544-5A8587620783}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.GenAI.Core.Tests", "test\Microsoft.ML.GenAI.Core.Tests\Microsoft.ML.GenAI.Core.Tests.csproj", "{14AB0804-D4CE-4634-B544-5A8587620783}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.GenAI.LLaMA", "src\Microsoft.ML.GenAI.LLaMA\Microsoft.ML.GenAI.LLaMA.csproj", "{0AA6D5CB-195F-457A-8792-4221E76E6C44}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Microsoft.ML.GenAI.LLaMA.Tests", "test\Microsoft.ML.GenAI.LLaMA.Tests\Microsoft.ML.GenAI.LLaMA.Tests.csproj", "{D202353D-6FAF-4263-9A01-BDCFBC92391F}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand Down Expand Up @@ -878,6 +882,22 @@ Global
{14AB0804-D4CE-4634-B544-5A8587620783}.Release|Any CPU.Build.0 = Release|Any CPU
{14AB0804-D4CE-4634-B544-5A8587620783}.Release|x64.ActiveCfg = Release|Any CPU
{14AB0804-D4CE-4634-B544-5A8587620783}.Release|x64.Build.0 = Release|Any CPU
{0AA6D5CB-195F-457A-8792-4221E76E6C44}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{0AA6D5CB-195F-457A-8792-4221E76E6C44}.Debug|Any CPU.Build.0 = Debug|Any CPU
{0AA6D5CB-195F-457A-8792-4221E76E6C44}.Debug|x64.ActiveCfg = Debug|Any CPU
{0AA6D5CB-195F-457A-8792-4221E76E6C44}.Debug|x64.Build.0 = Debug|Any CPU
{0AA6D5CB-195F-457A-8792-4221E76E6C44}.Release|Any CPU.ActiveCfg = Release|Any CPU
{0AA6D5CB-195F-457A-8792-4221E76E6C44}.Release|Any CPU.Build.0 = Release|Any CPU
{0AA6D5CB-195F-457A-8792-4221E76E6C44}.Release|x64.ActiveCfg = Release|Any CPU
{0AA6D5CB-195F-457A-8792-4221E76E6C44}.Release|x64.Build.0 = Release|Any CPU
{D202353D-6FAF-4263-9A01-BDCFBC92391F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{D202353D-6FAF-4263-9A01-BDCFBC92391F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{D202353D-6FAF-4263-9A01-BDCFBC92391F}.Debug|x64.ActiveCfg = Debug|Any CPU
{D202353D-6FAF-4263-9A01-BDCFBC92391F}.Debug|x64.Build.0 = Debug|Any CPU
{D202353D-6FAF-4263-9A01-BDCFBC92391F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{D202353D-6FAF-4263-9A01-BDCFBC92391F}.Release|Any CPU.Build.0 = Release|Any CPU
{D202353D-6FAF-4263-9A01-BDCFBC92391F}.Release|x64.ActiveCfg = Release|Any CPU
{D202353D-6FAF-4263-9A01-BDCFBC92391F}.Release|x64.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -969,6 +989,8 @@ Global
{867FFC34-DFA7-400F-B9BB-85158326CE08} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
{1D4AD9A3-19AF-432B-889D-A63FE6D7BD47} = {DA452A53-2E94-4433-B08C-041EDEC729E6}
{14AB0804-D4CE-4634-B544-5A8587620783} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
{0AA6D5CB-195F-457A-8792-4221E76E6C44} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
{D202353D-6FAF-4263-9A01-BDCFBC92391F} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {41165AF1-35BB-4832-A189-73060F82B01D}
Expand Down
4 changes: 4 additions & 0 deletions NuGet.config
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
<add key="dotnet5-roslyn" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5/nuget/v3/index.json" />
<add key="mlnet-daily" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/MachineLearning/nuget/v3/index.json" />
<add key="mlnet-assets" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/machinelearning-assets/nuget/v3/index.json" />
<add key="dotnet-libraries-transport" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-libraries-transport/nuget/v3/index.json" />
<add key="dotnet8" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet8/nuget/v3/index.json" />
</packageSources>
<packageSourceMapping>
Expand Down Expand Up @@ -40,6 +41,9 @@
<packageSource key="mlnet-assets">
<package pattern="*" />
</packageSource>
<packageSource key="dotnet-libraries-transport">
<package pattern="*" />
</packageSource>
<packageSource key="dotnet8">
<package pattern="*" />
</packageSource>
Expand Down
51 changes: 51 additions & 0 deletions docs/samples/Microsoft.ML.GenAI.Samples/Llama/LLaMA3_1.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;
using AutoGen.Core;
using Microsoft.ML.GenAI.Core;
using Microsoft.ML.GenAI.Core.Extension;
using Microsoft.ML.GenAI.LLaMA;
using Microsoft.ML.Tokenizers;
using TorchSharp;
using static TorchSharp.torch;

namespace Microsoft.ML.GenAI.Samples.Llama;

internal class LlamaSample
{
public static async void Run()
{
var device = "cuda";
if (device == "cuda")
{
torch.InitializeDeviceType(DeviceType.CUDA);
}

var defaultType = ScalarType.Float16;
torch.manual_seed(1);
torch.set_default_dtype(defaultType);
var weightFolder = @"C:\Users\xiaoyuz\source\repos\Meta-Llama-3.1-8B-Instruct";
var configName = "config.json";
var originalWeightFolder = Path.Combine(weightFolder, "original");

Console.WriteLine("Loading Llama from huggingface model weight folder");
var stopWatch = System.Diagnostics.Stopwatch.StartNew();
stopWatch.Start();
var tokenizer = LlamaTokenizerHelper.FromPretrained(originalWeightFolder);
var model = LlamaForCausalLM.FromPretrained(weightFolder, configName, layersOnTargetDevice: -1);

var pipeline = new CausalLMPipeline<TiktokenTokenizer, LlamaForCausalLM>(tokenizer, model, device);

var agent = new LlamaCausalLMAgent(pipeline, "assistant")
.RegisterPrintMessage();

var task = """
Write a C# program to print the sum of two numbers. Use top-level statement, put code between ```csharp and ```.
""";

await agent.SendAsync(task);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

<ItemGroup>
<ProjectReference Include="..\..\..\src\Microsoft.ML.GenAI.Core\Microsoft.ML.GenAI.Core.csproj" />
<ProjectReference Include="..\..\..\src\Microsoft.ML.GenAI.LLaMA\Microsoft.ML.GenAI.LLaMA.csproj" />
<ProjectReference Include="..\..\..\src\Microsoft.ML.GenAI.Phi\Microsoft.ML.GenAI.Phi.csproj" />
</ItemGroup>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public static async Task RunAsync()
torch.manual_seed(1);
torch.set_default_dtype(defaultType);
var weightFolder = @"C:\Users\xiaoyuz\source\repos\Phi-3-mini-4k-instruct";
var pipeline = Utils.LoadPhi3Mini4KFromFolder(weightFolder, device: device);
var pipeline = Utils.LoadPhi3Mini4KFromFolder(weightFolder, device: device, quantizeToInt8: false);

// agent
var agent = new Phi3Agent(pipeline, "assistant")
Expand Down
2 changes: 1 addition & 1 deletion docs/samples/Microsoft.ML.GenAI.Samples/Phi3Mini/Utils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public static ICausalLMPipeline<Tokenizer, Phi3ForCasualLM> LoadPhi3Mini4KFromFo
string weightFolder,
string configName = "config.json",
string device = "cuda",
int modelSizeOnCudaInGB = 16,
int modelSizeOnCudaInGB = 55,
int modelSizeOnMemoryInGB = 64,
int modelSizeOnDiskInGB = 200,
bool quantizeToInt8 = false,
Expand Down
2 changes: 1 addition & 1 deletion eng/Versions.props
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@
<MicrosoftMLTensorFlowTestModelsVersion>0.0.13-test</MicrosoftMLTensorFlowTestModelsVersion>
<MicrosoftMLTestDatabasesVersion>0.0.6-test</MicrosoftMLTestDatabasesVersion>
<MicrosoftMLTestModelsVersion>0.0.7-test</MicrosoftMLTestModelsVersion>
<MicrosoftMLTestTokenizersVersion>2.0.0-beta.24219.1</MicrosoftMLTestTokenizersVersion>
<MicrosoftMLTestTokenizersVersion>2.0.0-beta.24415.1</MicrosoftMLTestTokenizersVersion>
<SystemDataSqlClientVersion>4.8.6</SystemDataSqlClientVersion>
<SystemDataSQLiteCoreVersion>1.0.118</SystemDataSQLiteCoreVersion>
<XunitCombinatorialVersion>1.6.24</XunitCombinatorialVersion>
Expand Down
51 changes: 51 additions & 0 deletions src/Microsoft.ML.GenAI.Core/Extension/ModuleExtension.cs
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,57 @@ public static Dictionary<string, string> InferDeviceMapForEachLayer(
return deviceMap;
}

/// <summary>
/// Infer the device map for each layer in the model.
/// The device map is a dictionary where the key is the device id (e.g. "cuda:0") and the value is the memory size in bytes of the device.
/// When inferring the device map, each layer in the model will be placed on the device in the order of the devices list.
/// </summary>
/// <param name="model"></param>
/// <param name="numberOfLayerToBePlaced">a list of key-value pairs where the key is the device id (e.g. "cuda:0") and the value is the number of layers to be placed on the device.
/// If you want to place all remaining layers on the device, set that value to -1.
/// e.g. [{"cuda:0", 2}, {"cpu", -1}], the first 2 layers will be placed on "cuda:0" and the rest will be placed on "cpu".
/// </param>
/// <returns></returns>
public static Dictionary<string, string> InferDeviceMapForEachLayer(
this nn.Module model,
IEnumerable<KeyValuePair<string, int>> numberOfLayerToBePlaced)
{
var layerSizeMap = model.GetSizeForEachDynamicLayerInBytes()
.OrderByDescending(x => x.Value)
.ToList();

var deviceMap = new Dictionary<string, string>();
foreach (var (device, count) in numberOfLayerToBePlaced)
{
if (count != -1)
{
var topK = layerSizeMap.Take(count).ToList();
layerSizeMap = layerSizeMap.Skip(count).ToList();
foreach (var (key, value) in topK)
{
deviceMap[key] = device;
}
}
else
{
foreach (var (key, value) in layerSizeMap)
{
deviceMap[key] = device;
}

layerSizeMap.Clear();
break;
}
}

if (layerSizeMap.Count > 0)
{
throw new ArgumentException("The layer count is not enough to cover all layers, did you forget to set the last layer count to -1?");
}

return deviceMap;
}

internal static string Peek(this nn.Module model)
{
var sb = new StringBuilder();
Expand Down
11 changes: 4 additions & 7 deletions src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,20 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="AutoGen.Core" Version="$(AutoGenVersion)" />
<PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="$(SemanticKernelVersion)" />
<PackageReference Include="System.Memory" Version="$(SystemMemoryVersion)" />
<PackageReference Include="TorchSharp" Version="$(TorchSharpVersion)" />
</ItemGroup>
<!--

<ItemGroup Condition="'$(Configuration)' == 'Debug'">
<PackageReference Include="libtorch-cpu-win-x64" Version="$(LibTorchVersion)" Condition="$([MSBuild]::IsOSPlatform('Windows'))" PrivateAssets="all" />
<PackageReference Include="libtorch-cpu-linux-x64" Version="$(LibTorchVersion)" Condition="$([MSBuild]::IsOSPlatform('Linux'))" PrivateAssets="all" />
<PackageReference Include="libtorch-cpu-osx-x64" Version="$(LibTorchVersion)" Condition="$([MSBuild]::IsOSPlatform('OSX'))" PrivateAssets="all" />
</ItemGroup> -->

<ItemGroup>
<ProjectReference Include="..\Microsoft.ML.Tokenizers\Microsoft.ML.Tokenizers.csproj" />
</ItemGroup>

<ItemGroup>
<InternalsVisibleTo Include="Microsoft.ML.GenAI.Phi" />
<InternalsVisibleTo Include="Microsoft.ML.GenAI.LLaMA" />
<InternalsVisibleTo Include="Microsoft.ML.GenAI.LLaMA.Tests" />
<InternalsVisibleTo Include="Microsoft.ML.GenAI.Phi.Tests" />
<InternalsVisibleTo Include="Microsoft.ML.GenAI.Core.Tests" />
</ItemGroup>
Expand Down
Loading