diff --git a/Microsoft.ML.sln b/Microsoft.ML.sln index e69d535369..d22e7ee013 100644 --- a/Microsoft.ML.sln +++ b/Microsoft.ML.sln @@ -1,4 +1,5 @@ -Microsoft Visual Studio Solution File, Format Version 12.00 + +Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 17 VisualStudioVersion = 17.1.32120.378 MinimumVisualStudioVersion = 10.0.40219.1 @@ -65,8 +66,6 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.CpuMath.Perfor EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.CpuMath.UnitTests", "test\Microsoft.ML.CpuMath.UnitTests\Microsoft.ML.CpuMath.UnitTests.csproj", "{E97D8B5A-3035-4D41-9B0D-77FF8FB8D132}" EndProject -Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "Microsoft.ML.FSharp.Tests", "test\Microsoft.ML.FSharp.Tests\Microsoft.ML.FSharp.Tests.fsproj", "{802233D6-8CC0-46AD-9F23-FEE1E9AED9B3}" -EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.ImageAnalytics", "src\Microsoft.ML.ImageAnalytics\Microsoft.ML.ImageAnalytics.csproj", "{00E38F77-1E61-4CDF-8F97-1417D4E85053}" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Mkl.Components", "src\Microsoft.ML.Mkl.Components\Microsoft.ML.Mkl.Components.csproj", "{A7222F41-1CF0-47D9-B80C-B4D77B027A61}" @@ -92,8 +91,8 @@ EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Samples", "docs\samples\Microsoft.ML.Samples\Microsoft.ML.Samples.csproj", "{ECB71297-9DF1-48CE-B93A-CD969221F9B6}" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.SamplesUtils", "src\Microsoft.ML.SamplesUtils\Microsoft.ML.SamplesUtils.csproj", "{11A5210E-2EA7-42F1-80DB-827762E9C781}" -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Samples.OneDal", "docs\samples\Microsoft.ML.Samples.OneDal\Microsoft.ML.Samples.OneDal.csproj", "{38ED61F4-FA22-4DE9-B0C4-91F327F4EE31}" EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Samples.OneDal", "docs\samples\Microsoft.ML.Samples.OneDal\Microsoft.ML.Samples.OneDal.csproj", "{38ED61F4-FA22-4DE9-B0C4-91F327F4EE31}" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Recommender", "src\Microsoft.ML.Recommender\Microsoft.ML.Recommender.csproj", "{C8E1772B-DFD9-4A4D-830D-6AAB1C668BB3}" EndProject @@ -157,12 +156,18 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.SearchSpace.Te EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.AutoML.SourceGenerator", "tools-local\Microsoft.ML.AutoML.SourceGenerator\Microsoft.ML.AutoML.SourceGenerator.csproj", "{C804B990-390E-41D7-8FF1-6774495D70E2}" EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Fairlearn", "src\Microsoft.ML.Fairlearn\Microsoft.ML.Fairlearn.csproj", "{2FB7A5CB-B41A-4A89-9B81-AFF24576DE99}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Fairlearn.Tests", "test\Microsoft.ML.Fairlearn.Tests\Microsoft.ML.Fairlearn.Tests.csproj", "{416E682A-3958-49B9-8693-14EA96967AD3}" +EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.TorchSharp", "src\Microsoft.ML.TorchSharp\Microsoft.ML.TorchSharp.csproj", "{FF0BD187-4451-4A3B-934B-2AE3454896E2}" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Tokenizers", "src\Microsoft.ML.Tokenizers\Microsoft.ML.Tokenizers.csproj", "{BBC3A950-BD68-45AC-9DBD-A8F4D8847745}" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Tokenizers.Tests", "test\Microsoft.ML.Tokenizers.Tests\Microsoft.ML.Tokenizers.Tests.csproj", "{C3D82402-F207-4F19-8C57-5AF0FBAF9682}" EndProject +Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "Microsoft.ML.FSharp.Tests", "test\Microsoft.ML.FSharp.Tests\Microsoft.ML.FSharp.Tests.fsproj", "{041CB5CD-5832-413E-A894-D9DBED210B16}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -387,14 +392,6 @@ Global {E97D8B5A-3035-4D41-9B0D-77FF8FB8D132}.Release|Any CPU.Build.0 = Release|Any CPU {E97D8B5A-3035-4D41-9B0D-77FF8FB8D132}.Release|x64.ActiveCfg = Release|Any CPU {E97D8B5A-3035-4D41-9B0D-77FF8FB8D132}.Release|x64.Build.0 = Release|Any CPU - {802233D6-8CC0-46AD-9F23-FEE1E9AED9B3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {802233D6-8CC0-46AD-9F23-FEE1E9AED9B3}.Debug|Any CPU.Build.0 = Debug|Any CPU - {802233D6-8CC0-46AD-9F23-FEE1E9AED9B3}.Debug|x64.ActiveCfg = Debug|Any CPU - {802233D6-8CC0-46AD-9F23-FEE1E9AED9B3}.Debug|x64.Build.0 = Debug|Any CPU - {802233D6-8CC0-46AD-9F23-FEE1E9AED9B3}.Release|Any CPU.ActiveCfg = Release|Any CPU - {802233D6-8CC0-46AD-9F23-FEE1E9AED9B3}.Release|Any CPU.Build.0 = Release|Any CPU - {802233D6-8CC0-46AD-9F23-FEE1E9AED9B3}.Release|x64.ActiveCfg = Release|Any CPU - {802233D6-8CC0-46AD-9F23-FEE1E9AED9B3}.Release|x64.Build.0 = Release|Any CPU {00E38F77-1E61-4CDF-8F97-1417D4E85053}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {00E38F77-1E61-4CDF-8F97-1417D4E85053}.Debug|Any CPU.Build.0 = Debug|Any CPU {00E38F77-1E61-4CDF-8F97-1417D4E85053}.Debug|x64.ActiveCfg = Debug|Any CPU @@ -407,6 +404,10 @@ Global {A7222F41-1CF0-47D9-B80C-B4D77B027A61}.Debug|Any CPU.Build.0 = Debug|Any CPU {A7222F41-1CF0-47D9-B80C-B4D77B027A61}.Debug|x64.ActiveCfg = Debug|Any CPU {A7222F41-1CF0-47D9-B80C-B4D77B027A61}.Debug|x64.Build.0 = Debug|Any CPU + {A7222F41-1CF0-47D9-B80C-B4D77B027A61}.Release|Any CPU.ActiveCfg = Release|Any CPU + {A7222F41-1CF0-47D9-B80C-B4D77B027A61}.Release|Any CPU.Build.0 = Release|Any CPU + {A7222F41-1CF0-47D9-B80C-B4D77B027A61}.Release|x64.ActiveCfg = Release|Any CPU + {A7222F41-1CF0-47D9-B80C-B4D77B027A61}.Release|x64.Build.0 = Release|Any CPU {A7222F94-2AF1-10C9-A21C-C4D22B137A69}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {A7222F94-2AF1-10C9-A21C-C4D22B137A69}.Debug|Any CPU.Build.0 = Debug|Any CPU {A7222F94-2AF1-10C9-A21C-C4D22B137A69}.Debug|x64.ActiveCfg = Debug|Any CPU @@ -415,10 +416,6 @@ Global {A7222F94-2AF1-10C9-A21C-C4D22B137A69}.Release|Any CPU.Build.0 = Release|Any CPU {A7222F94-2AF1-10C9-A21C-C4D22B137A69}.Release|x64.ActiveCfg = Release|Any CPU {A7222F94-2AF1-10C9-A21C-C4D22B137A69}.Release|x64.Build.0 = Release|Any CPU - {A7222F41-1CF0-47D9-B80C-B4D77B027A61}.Release|Any CPU.ActiveCfg = Release|Any CPU - {A7222F41-1CF0-47D9-B80C-B4D77B027A61}.Release|Any CPU.Build.0 = Release|Any CPU - {A7222F41-1CF0-47D9-B80C-B4D77B027A61}.Release|x64.ActiveCfg = Release|Any CPU - {A7222F41-1CF0-47D9-B80C-B4D77B027A61}.Release|x64.Build.0 = Release|Any CPU {570A0B8A-5463-44D2-8521-54C0CA4CACA9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {570A0B8A-5463-44D2-8521-54C0CA4CACA9}.Debug|Any CPU.Build.0 = Debug|Any CPU {570A0B8A-5463-44D2-8521-54C0CA4CACA9}.Debug|x64.ActiveCfg = Debug|Any CPU @@ -491,6 +488,14 @@ Global {11A5210E-2EA7-42F1-80DB-827762E9C781}.Release|Any CPU.Build.0 = Release|Any CPU {11A5210E-2EA7-42F1-80DB-827762E9C781}.Release|x64.ActiveCfg = Release|Any CPU {11A5210E-2EA7-42F1-80DB-827762E9C781}.Release|x64.Build.0 = Release|Any CPU + {38ED61F4-FA22-4DE9-B0C4-91F327F4EE31}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {38ED61F4-FA22-4DE9-B0C4-91F327F4EE31}.Debug|Any CPU.Build.0 = Debug|Any CPU + {38ED61F4-FA22-4DE9-B0C4-91F327F4EE31}.Debug|x64.ActiveCfg = Debug|Any CPU + {38ED61F4-FA22-4DE9-B0C4-91F327F4EE31}.Debug|x64.Build.0 = Debug|Any CPU + {38ED61F4-FA22-4DE9-B0C4-91F327F4EE31}.Release|Any CPU.ActiveCfg = Release|Any CPU + {38ED61F4-FA22-4DE9-B0C4-91F327F4EE31}.Release|Any CPU.Build.0 = Release|Any CPU + {38ED61F4-FA22-4DE9-B0C4-91F327F4EE31}.Release|x64.ActiveCfg = Release|Any CPU + {38ED61F4-FA22-4DE9-B0C4-91F327F4EE31}.Release|x64.Build.0 = Release|Any CPU {C8E1772B-DFD9-4A4D-830D-6AAB1C668BB3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {C8E1772B-DFD9-4A4D-830D-6AAB1C668BB3}.Debug|Any CPU.Build.0 = Debug|Any CPU {C8E1772B-DFD9-4A4D-830D-6AAB1C668BB3}.Debug|x64.ActiveCfg = Debug|Any CPU @@ -735,6 +740,22 @@ Global {C804B990-390E-41D7-8FF1-6774495D70E2}.Release|Any CPU.Build.0 = Release|Any CPU {C804B990-390E-41D7-8FF1-6774495D70E2}.Release|x64.ActiveCfg = Release|Any CPU {C804B990-390E-41D7-8FF1-6774495D70E2}.Release|x64.Build.0 = Release|Any CPU + {2FB7A5CB-B41A-4A89-9B81-AFF24576DE99}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {2FB7A5CB-B41A-4A89-9B81-AFF24576DE99}.Debug|Any CPU.Build.0 = Debug|Any CPU + {2FB7A5CB-B41A-4A89-9B81-AFF24576DE99}.Debug|x64.ActiveCfg = Debug|Any CPU + {2FB7A5CB-B41A-4A89-9B81-AFF24576DE99}.Debug|x64.Build.0 = Debug|Any CPU + {2FB7A5CB-B41A-4A89-9B81-AFF24576DE99}.Release|Any CPU.ActiveCfg = Release|Any CPU + {2FB7A5CB-B41A-4A89-9B81-AFF24576DE99}.Release|Any CPU.Build.0 = Release|Any CPU + {2FB7A5CB-B41A-4A89-9B81-AFF24576DE99}.Release|x64.ActiveCfg = Release|Any CPU + {2FB7A5CB-B41A-4A89-9B81-AFF24576DE99}.Release|x64.Build.0 = Release|Any CPU + {416E682A-3958-49B9-8693-14EA96967AD3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {416E682A-3958-49B9-8693-14EA96967AD3}.Debug|Any CPU.Build.0 = Debug|Any CPU + {416E682A-3958-49B9-8693-14EA96967AD3}.Debug|x64.ActiveCfg = Debug|Any CPU + {416E682A-3958-49B9-8693-14EA96967AD3}.Debug|x64.Build.0 = Debug|Any CPU + {416E682A-3958-49B9-8693-14EA96967AD3}.Release|Any CPU.ActiveCfg = Release|Any CPU + {416E682A-3958-49B9-8693-14EA96967AD3}.Release|Any CPU.Build.0 = Release|Any CPU + {416E682A-3958-49B9-8693-14EA96967AD3}.Release|x64.ActiveCfg = Release|Any CPU + {416E682A-3958-49B9-8693-14EA96967AD3}.Release|x64.Build.0 = Release|Any CPU {FF0BD187-4451-4A3B-934B-2AE3454896E2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {FF0BD187-4451-4A3B-934B-2AE3454896E2}.Debug|Any CPU.Build.0 = Debug|Any CPU {FF0BD187-4451-4A3B-934B-2AE3454896E2}.Debug|x64.ActiveCfg = Debug|Any CPU @@ -759,14 +780,14 @@ Global {C3D82402-F207-4F19-8C57-5AF0FBAF9682}.Release|Any CPU.Build.0 = Release|Any CPU {C3D82402-F207-4F19-8C57-5AF0FBAF9682}.Release|x64.ActiveCfg = Release|Any CPU {C3D82402-F207-4F19-8C57-5AF0FBAF9682}.Release|x64.Build.0 = Release|Any CPU - {38ED61F4-FA22-4DE9-B0C4-91F327F4EE31}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {38ED61F4-FA22-4DE9-B0C4-91F327F4EE31}.Debug|Any CPU.Build.0 = Debug|Any CPU - {38ED61F4-FA22-4DE9-B0C4-91F327F4EE31}.Debug|x64.ActiveCfg = Debug|Any CPU - {38ED61F4-FA22-4DE9-B0C4-91F327F4EE31}.Debug|x64.Build.0 = Debug|Any CPU - {38ED61F4-FA22-4DE9-B0C4-91F327F4EE31}.Release|Any CPU.ActiveCfg = Release|Any CPU - {38ED61F4-FA22-4DE9-B0C4-91F327F4EE31}.Release|Any CPU.Build.0 = Release|Any CPU - {38ED61F4-FA22-4DE9-B0C4-91F327F4EE31}.Release|x64.ActiveCfg = Release|Any CPU - {38ED61F4-FA22-4DE9-B0C4-91F327F4EE31}.Release|x64.Build.0 = Release|Any CPU + {041CB5CD-5832-413E-A894-D9DBED210B16}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {041CB5CD-5832-413E-A894-D9DBED210B16}.Debug|Any CPU.Build.0 = Debug|Any CPU + {041CB5CD-5832-413E-A894-D9DBED210B16}.Debug|x64.ActiveCfg = Debug|Any CPU + {041CB5CD-5832-413E-A894-D9DBED210B16}.Debug|x64.Build.0 = Debug|Any CPU + {041CB5CD-5832-413E-A894-D9DBED210B16}.Release|Any CPU.ActiveCfg = Release|Any CPU + {041CB5CD-5832-413E-A894-D9DBED210B16}.Release|Any CPU.Build.0 = Release|Any CPU + {041CB5CD-5832-413E-A894-D9DBED210B16}.Release|x64.ActiveCfg = Release|Any CPU + {041CB5CD-5832-413E-A894-D9DBED210B16}.Release|x64.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -799,7 +820,6 @@ Global {3E4ABF07-7970-4BE6-B45B-A13D3C397545} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} {7333EDEF-4144-405C-A5EC-6F42201857D8} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} {E97D8B5A-3035-4D41-9B0D-77FF8FB8D132} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} - {802233D6-8CC0-46AD-9F23-FEE1E9AED9B3} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} {00E38F77-1E61-4CDF-8F97-1417D4E85053} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {A7222F41-1CF0-47D9-B80C-B4D77B027A61} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {A7222F94-2AF1-10C9-A21C-C4D22B137A69} = {09EADF06-BE25-4228-AB53-95AE3E15B530} @@ -812,6 +832,7 @@ Global {4B101D58-E7E4-4877-A536-A9B41E2E82A3} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} {ECB71297-9DF1-48CE-B93A-CD969221F9B6} = {DA452A53-2E94-4433-B08C-041EDEC729E6} {11A5210E-2EA7-42F1-80DB-827762E9C781} = {09EADF06-BE25-4228-AB53-95AE3E15B530} + {38ED61F4-FA22-4DE9-B0C4-91F327F4EE31} = {DA452A53-2E94-4433-B08C-041EDEC729E6} {C8E1772B-DFD9-4A4D-830D-6AAB1C668BB3} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {9222FC9D-599A-49A5-B685-08CC9A5C81D7} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {6C29AA9B-054B-4762-BEA5-D305B932AA80} = {09EADF06-BE25-4228-AB53-95AE3E15B530} @@ -843,10 +864,12 @@ Global {A8F4F08F-1F9D-4AAE-8C8D-502CDBBDE7D3} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {A3E9F25F-2718-4FF9-A35A-54C232A847AB} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} {C804B990-390E-41D7-8FF1-6774495D70E2} = {7F13E156-3EBA-4021-84A5-CD56BA72F99E} + {2FB7A5CB-B41A-4A89-9B81-AFF24576DE99} = {09EADF06-BE25-4228-AB53-95AE3E15B530} + {416E682A-3958-49B9-8693-14EA96967AD3} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} {FF0BD187-4451-4A3B-934B-2AE3454896E2} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {BBC3A950-BD68-45AC-9DBD-A8F4D8847745} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {C3D82402-F207-4F19-8C57-5AF0FBAF9682} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} - {38ED61F4-FA22-4DE9-B0C4-91F327F4EE31} = {DA452A53-2E94-4433-B08C-041EDEC729E6} + {041CB5CD-5832-413E-A894-D9DBED210B16} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {41165AF1-35BB-4832-A189-73060F82B01D} diff --git a/src/Microsoft.Data.Analysis/DataFrameRow.cs b/src/Microsoft.Data.Analysis/DataFrameRow.cs index 812fd69721..0d21fb0bae 100644 --- a/src/Microsoft.Data.Analysis/DataFrameRow.cs +++ b/src/Microsoft.Data.Analysis/DataFrameRow.cs @@ -52,6 +52,23 @@ public object this[int index] } } + /// + /// An indexer to return the value at . + /// + /// The name of the column that corresponds to the return value + /// The value at this . + public object this[string columnName] + { + get + { + return _dataFrame[columnName][_rowIndex]; + } + set + { + _dataFrame[columnName][_rowIndex] = value; + } + } + /// /// A simple string representation of the values in this row /// diff --git a/src/Microsoft.ML.AutoML/Assembly.cs b/src/Microsoft.ML.AutoML/Assembly.cs index 451a875b49..054074cee4 100644 --- a/src/Microsoft.ML.AutoML/Assembly.cs +++ b/src/Microsoft.ML.AutoML/Assembly.cs @@ -9,6 +9,7 @@ [assembly: InternalsVisibleTo("mlnet.Tests, PublicKey=00240000048000009400000006020000002400005253413100040000010001004b86c4cb78549b34bab61a3b1800e23bfeb5b3ec390074041536a7e3cbd97f5f04cf0f857155a8928eaa29ebfd11cfbbad3ba70efea7bda3226c6a8d370a4cd303f714486b6ebc225985a638471e6ef571cc92a4613c00b8fa65d61ccee0cbe5f36330c9a01f4183559f1bef24cc2917c6d913e3a541333a1d05d9bed22b38cb")] [assembly: InternalsVisibleTo("Benchmark, PublicKey=00240000048000009400000006020000002400005253413100040000010001004b86c4cb78549b34bab61a3b1800e23bfeb5b3ec390074041536a7e3cbd97f5f04cf0f857155a8928eaa29ebfd11cfbbad3ba70efea7bda3226c6a8d370a4cd303f714486b6ebc225985a638471e6ef571cc92a4613c00b8fa65d61ccee0cbe5f36330c9a01f4183559f1bef24cc2917c6d913e3a541333a1d05d9bed22b38cb")] [assembly: InternalsVisibleTo("Microsoft.ML.CodeGenerator, PublicKey=00240000048000009400000006020000002400005253413100040000010001004b86c4cb78549b34bab61a3b1800e23bfeb5b3ec390074041536a7e3cbd97f5f04cf0f857155a8928eaa29ebfd11cfbbad3ba70efea7bda3226c6a8d370a4cd303f714486b6ebc225985a638471e6ef571cc92a4613c00b8fa65d61ccee0cbe5f36330c9a01f4183559f1bef24cc2917c6d913e3a541333a1d05d9bed22b38cb")] +[assembly: InternalsVisibleTo("Microsoft.ML.Fairlearn, PublicKey=00240000048000009400000006020000002400005253413100040000010001004b86c4cb78549b34bab61a3b1800e23bfeb5b3ec390074041536a7e3cbd97f5f04cf0f857155a8928eaa29ebfd11cfbbad3ba70efea7bda3226c6a8d370a4cd303f714486b6ebc225985a638471e6ef571cc92a4613c00b8fa65d61ccee0cbe5f36330c9a01f4183559f1bef24cc2917c6d913e3a541333a1d05d9bed22b38cb")] [assembly: InternalsVisibleTo("Microsoft.ML.CodeGenerator.Tests, PublicKey=00240000048000009400000006020000002400005253413100040000010001004b86c4cb78549b34bab61a3b1800e23bfeb5b3ec390074041536a7e3cbd97f5f04cf0f857155a8928eaa29ebfd11cfbbad3ba70efea7bda3226c6a8d370a4cd303f714486b6ebc225985a638471e6ef571cc92a4613c00b8fa65d61ccee0cbe5f36330c9a01f4183559f1bef24cc2917c6d913e3a541333a1d05d9bed22b38cb")] [assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")] [assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")] diff --git a/src/Microsoft.ML.AutoML/AutoMLExperiment/AutoMLExperiment.cs b/src/Microsoft.ML.AutoML/AutoMLExperiment/AutoMLExperiment.cs index 16d2984aa6..deeb2cab2e 100644 --- a/src/Microsoft.ML.AutoML/AutoMLExperiment/AutoMLExperiment.cs +++ b/src/Microsoft.ML.AutoML/AutoMLExperiment/AutoMLExperiment.cs @@ -234,6 +234,7 @@ public async Task RunAsync(CancellationToken ct = default) var monitor = serviceProvider.GetService(); var trialResultManager = serviceProvider.GetService(); var trialNum = trialResultManager?.GetAllTrialResults().Max(t => t.TrialSettings?.TrialId) + 1 ?? 0; + serviceProvider.GetService(); var tuner = serviceProvider.GetService(); Contracts.Assert(tuner != null, "tuner can't be null"); diff --git a/src/Microsoft.ML.AutoML/AutoMLExperiment/IMonitor.cs b/src/Microsoft.ML.AutoML/AutoMLExperiment/IMonitor.cs index 51335f8d94..91eb4b2de7 100644 --- a/src/Microsoft.ML.AutoML/AutoMLExperiment/IMonitor.cs +++ b/src/Microsoft.ML.AutoML/AutoMLExperiment/IMonitor.cs @@ -45,7 +45,14 @@ public virtual void ReportBestTrial(TrialResult result) public virtual void ReportCompletedTrial(TrialResult result) { - _logger.Info($"Update Completed Trial - Id: {result.TrialSettings.TrialId} - Metric: {result.Metric} - Pipeline: {_pipeline.ToString(result.TrialSettings.Parameter)} - Duration: {result.DurationInMilliseconds} - Peak CPU: {result.PeakCpu?.ToString("p")} - Peak Memory in MB: {result.PeakMemoryInMegaByte?.ToString("F")}"); + if (result is FairnessTrialResult fResult) + { //TODO: now we are assuming the higher the raw metric the better and the lower the fairness metric the better. If we have a raw metric that needs to be minimized then this should change + _logger.Info($"Update Completed Trial - Id: {result.TrialSettings.TrialId} - Raw Metric: {result.Metric + fResult.FairnessMetric} - Fairness Metric: {-fResult.FairnessMetric} - Total Metric: {result.Metric} - Pipeline: {this._pipeline} - Duration: {result.DurationInMilliseconds}"); + } + else + { + _logger.Info($"Update Completed Trial - Id: {result.TrialSettings.TrialId} - Metric: {result.Metric} - Pipeline: {this._pipeline} - Duration: {result.DurationInMilliseconds}"); + } _completedTrials.Add(result); } diff --git a/src/Microsoft.ML.AutoML/AutoMLExperiment/TrialResult.cs b/src/Microsoft.ML.AutoML/AutoMLExperiment/TrialResult.cs index 369d271d2a..0c9502d3e7 100644 --- a/src/Microsoft.ML.AutoML/AutoMLExperiment/TrialResult.cs +++ b/src/Microsoft.ML.AutoML/AutoMLExperiment/TrialResult.cs @@ -66,4 +66,9 @@ internal class TrialResult : TrialResult public EstimatorChain Pipeline { get; set; } } + + public class FairnessTrialResult : TrialResult + { + public double FairnessMetric { get; set; } + } } diff --git a/src/Microsoft.ML.Core/Properties/AssemblyInfo.cs b/src/Microsoft.ML.Core/Properties/AssemblyInfo.cs index 65cf1b095c..433ad2fa1c 100644 --- a/src/Microsoft.ML.Core/Properties/AssemblyInfo.cs +++ b/src/Microsoft.ML.Core/Properties/AssemblyInfo.cs @@ -43,6 +43,7 @@ [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.TimeSeries" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Transforms" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.AutoML" + PublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Fairlearn" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.TorchSharp" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Internal.MetaLinearLearner" + InternalPublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "TreeVisualizer" + InternalPublicKey.Value)] diff --git a/src/Microsoft.ML.Data/Properties/AssemblyInfo.cs b/src/Microsoft.ML.Data/Properties/AssemblyInfo.cs index e905b08993..c0b9e1782c 100644 --- a/src/Microsoft.ML.Data/Properties/AssemblyInfo.cs +++ b/src/Microsoft.ML.Data/Properties/AssemblyInfo.cs @@ -25,6 +25,7 @@ [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.FastTree" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Mkl.Components" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.OneDal" + PublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Fairlearn" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.KMeansClustering" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.LightGbm" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.OnnxConverter" + PublicKey.Value)] diff --git a/src/Microsoft.ML.Fairlearn/AutoML/AutoMLExperimentExtension.cs b/src/Microsoft.ML.Fairlearn/AutoML/AutoMLExperimentExtension.cs new file mode 100644 index 0000000000..e3b5351237 --- /dev/null +++ b/src/Microsoft.ML.Fairlearn/AutoML/AutoMLExperimentExtension.cs @@ -0,0 +1,80 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Text; +using Microsoft.Data.Analysis; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.ML.AutoML; +using Microsoft.ML.Data; + +namespace Microsoft.ML.Fairlearn.AutoML +{ + /// + /// An internal class that holds the gridLimit value to conduct gridsearch. + /// Needed to pass the value into the AutoMLExperiment as a singleton + /// + internal class GridLimit + { + public float Value { get; set; } + } + /// + /// An extension class used to add more options to the Fairlearn girdsearch experiment + /// + public static class AutoMLExperimentExtension + { + public static AutoMLExperiment SetBinaryClassificationMoment(this AutoMLExperiment experiment, ClassificationMoment moment) + { + experiment.ServiceCollection.AddSingleton(moment); + + return experiment; + } + + public static AutoMLExperiment SetGridLimit(this AutoMLExperiment experiment, float gridLimit) + { + var gridLimitObject = new GridLimit(); + gridLimitObject.Value = gridLimit; + experiment.ServiceCollection.AddSingleton(gridLimitObject); + experiment.SetTuner(); + + return experiment; + } + + public static AutoMLExperiment SetBinaryClassificationMetricWithFairLearn( + this AutoMLExperiment experiment, + string labelColumn, + string predictedColumn, + string sensitiveColumnName, + string exampleWeightColumnName, + float gridLimit = 10f, + bool negativeAllowed = true) + { + experiment.ServiceCollection.AddSingleton((serviceProvider) => + { + var datasetManager = serviceProvider.GetRequiredService(); + var moment = new UtilityParity(); + var sensitiveFeature = DataFrameColumn.Create("group_id", datasetManager.TrainDataset.GetColumn(sensitiveColumnName)); + var label = DataFrameColumn.Create("label", datasetManager.TrainDataset.GetColumn(labelColumn)); + moment.LoadData(datasetManager.TrainDataset, label, sensitiveFeature); + var lambdaSearchSpace = Utilities.GenerateBinaryClassificationLambdaSearchSpace(moment, gridLimit, negativeAllowed); + experiment.AddSearchSpace("_lambda_search_space", lambdaSearchSpace); + + return moment; + }); + + experiment.SetTrialRunner((serviceProvider) => + { + var context = serviceProvider.GetRequiredService(); + var moment = serviceProvider.GetRequiredService(); + var datasetManager = serviceProvider.GetRequiredService(); + var pipeline = serviceProvider.GetRequiredService(); + return new GridSearchTrailRunner(context, datasetManager.TrainDataset, datasetManager.ValidateDataset, labelColumn, sensitiveColumnName, pipeline, moment); + }); + experiment.SetRandomSearchTuner(); + + return experiment; + } + } +} diff --git a/src/Microsoft.ML.Fairlearn/AutoML/TunerFactory.cs b/src/Microsoft.ML.Fairlearn/AutoML/TunerFactory.cs new file mode 100644 index 0000000000..03368c3c6b --- /dev/null +++ b/src/Microsoft.ML.Fairlearn/AutoML/TunerFactory.cs @@ -0,0 +1,49 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Text; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.ML; +using Microsoft.ML.AutoML; +using Microsoft.ML.SearchSpace; + +namespace Microsoft.ML.Fairlearn.AutoML +{ + internal class CostFrugalWithLambdaTunerFactory : ITuner + { + private readonly IServiceProvider _provider; + private readonly ClassificationMoment _moment; + private readonly MLContext _context; + private readonly float _gridLimit = 10f; + private readonly SweepablePipeline _pipeline; + private readonly SearchSpace.SearchSpace _searchSpace; + private readonly ITuner _tuner; + + public CostFrugalWithLambdaTunerFactory(IServiceProvider provider) + { + _provider = provider; + _moment = provider.GetService(); + _context = provider.GetService(); + _gridLimit = provider.GetService().Value; + _pipeline = provider.GetRequiredService(); + var lambdaSearchSpace = Utilities.GenerateBinaryClassificationLambdaSearchSpace(_moment, gridLimit: _gridLimit); + var settings = provider.GetRequiredService(); + _searchSpace = settings.SearchSpace; + _searchSpace["_lambda_search_space"] = lambdaSearchSpace; + _tuner = new RandomSearchTuner(_searchSpace, settings.Seed); + } + + public Parameter Propose(TrialSettings settings) + { + return _tuner.Propose(settings); + } + + public void Update(TrialResult result) + { + _tuner.Update(result); + } + } +} diff --git a/src/Microsoft.ML.Fairlearn/FairlearnCatalog.cs b/src/Microsoft.ML.Fairlearn/FairlearnCatalog.cs new file mode 100644 index 0000000000..bca1dca305 --- /dev/null +++ b/src/Microsoft.ML.Fairlearn/FairlearnCatalog.cs @@ -0,0 +1,16 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace Microsoft.ML.Fairlearn +{ + public sealed class FairlearnCatalog + { + public FairlearnMetricCatalog Metric; + + internal FairlearnCatalog(MLContext context) + { + Metric = new FairlearnMetricCatalog(context); + } + } +} diff --git a/src/Microsoft.ML.Fairlearn/MLContextExtension.cs b/src/Microsoft.ML.Fairlearn/MLContextExtension.cs new file mode 100644 index 0000000000..0386406547 --- /dev/null +++ b/src/Microsoft.ML.Fairlearn/MLContextExtension.cs @@ -0,0 +1,22 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace Microsoft.ML.Fairlearn +{ + /// + /// Class containing AutoML extension methods to + /// + public static class MLContextExtension + { + /// + /// Returns a catalog of all possible Fairlearn operations. + /// + /// instance. + /// A catalog of all possible AutoML operations. + public static FairlearnCatalog Fairlearn(this MLContext mlContext) + { + return new FairlearnCatalog(mlContext); + } + } +} diff --git a/src/Microsoft.ML.Fairlearn/Metrics/FairlearnMetricCatalog.cs b/src/Microsoft.ML.Fairlearn/Metrics/FairlearnMetricCatalog.cs new file mode 100644 index 0000000000..ea319ee35a --- /dev/null +++ b/src/Microsoft.ML.Fairlearn/Metrics/FairlearnMetricCatalog.cs @@ -0,0 +1,270 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.Data.Analysis; +using Microsoft.ML.Data; +using Microsoft.ML.Runtime; + +namespace Microsoft.ML.Fairlearn +{ + public class FairlearnMetricCatalog + { + private readonly MLContext _context; + + public FairlearnMetricCatalog(MLContext context) + { + _context = context; + } + + #region binary classification + public BinaryGroupMetric BinaryClassification(IDataView eval, string labelColumn, string predictedColumn, string sensitiveFeatureColumn, string scoreColumn = "Score") + { + return new BinaryGroupMetric(_context, eval, labelColumn, predictedColumn, sensitiveFeatureColumn, scoreColumn); + } + #endregion + + #region regression + public RegressionGroupMetric Regression(IDataView eval, string labelColumn, string scoreColumn, string sensitiveFeatureColumn) + { + return new RegressionGroupMetric(eval, labelColumn, scoreColumn, sensitiveFeatureColumn); + } + #endregion + } + + public class BinaryGroupMetric : IGroupMetric + { + private readonly IDataView _eval; + private readonly string _labelColumn; + private readonly string _predictedColumn; + private readonly string _scoreColumn; + private readonly string _sensitiveFeatureColumn; + private readonly MLContext _context; + + public BinaryGroupMetric(MLContext context, IDataView eval, string labelColumn, string predictedColumn, string sensitiveFeatureColumn, string scoreColumn) + { + _context = context; + _eval = eval; + _labelColumn = labelColumn; + _predictedColumn = predictedColumn; + _sensitiveFeatureColumn = sensitiveFeatureColumn; + _scoreColumn = scoreColumn; + } + + public IEnumerable GroupIds + { + get + { + var sensitiveCol = _eval.Schema[_sensitiveFeatureColumn]; + if (sensitiveCol.Type == TextDataViewType.Instance) + { + return _eval.GetColumn(sensitiveCol.Name); + } + else + { + var convertToString = _context.Transforms.Conversion.ConvertType(sensitiveCol.Name, sensitiveCol.Name, DataKind.String); + var data = convertToString.Fit(_eval).Transform(_eval); + + return data.GetColumn(sensitiveCol.Name); + } + } + } + + public DataFrame ByGroup() + { + var truths = _eval.GetColumn(_labelColumn).ToArray(); + var predicted = _eval.GetColumn(_predictedColumn).ToArray(); + var scores = _eval.GetColumn(_scoreColumn).ToArray(); + Contracts.Assert(truths.Count() == predicted.Count()); + Contracts.Assert(truths.Count() == scores.Count()); + Contracts.Assert(GroupIds.Count() == truths.Count()); + + var res = GroupIds.Select((id, i) => + { + return (id, new ModelInput + { + Label = truths[i], + PredictedLabel = predicted[i], + Score = scores[i], + }); + }).GroupBy(kv => kv.id) + .ToDictionary(group => group.Key, group => _context.Data.LoadFromEnumerable(group.Select(g => g.Item2))); + + var groupMetric = res.Select(kv => (kv.Key, _context.BinaryClassification.EvaluateNonCalibrated(kv.Value))) + .ToDictionary(kv => kv.Key, kv => kv.Item2); + + DataFrame result = new DataFrame(); + result[_sensitiveFeatureColumn] = DataFrameColumn.Create(_sensitiveFeatureColumn, groupMetric.Keys.Select(x => x.ToString())); + result["AUC"] = DataFrameColumn.Create("AUC", groupMetric.Keys.Select(k => groupMetric[k].AreaUnderRocCurve)); //coloumn name? + result["Accuracy"] = DataFrameColumn.Create("Accuracy", groupMetric.Keys.Select(k => groupMetric[k].Accuracy)); + result["PosPrec"] = DataFrameColumn.Create("PosPrec", groupMetric.Keys.Select(k => groupMetric[k].PositivePrecision)); + result["PosRecall"] = DataFrameColumn.Create("PosRecall", groupMetric.Keys.Select(k => groupMetric[k].PositiveRecall)); + result["NegPrec"] = DataFrameColumn.Create("NegPrec", groupMetric.Keys.Select(k => groupMetric[k].NegativePrecision)); + result["NegRecall"] = DataFrameColumn.Create("NegRecall", groupMetric.Keys.Select(k => groupMetric[k].NegativeRecall)); + result["F1Score"] = DataFrameColumn.Create("F1Score", groupMetric.Keys.Select(k => groupMetric[k].F1Score)); + result["AreaUnderPrecisionRecallCurve"] = DataFrameColumn.Create("AreaUnderPrecisionRecallCurve", groupMetric.Keys.Select(k => groupMetric[k].AreaUnderPrecisionRecallCurve)); + + return result; + } + + public Dictionary Overall() + { + CalibratedBinaryClassificationMetrics metrics = _context.BinaryClassification.Evaluate(_eval, _labelColumn); + + // create the dictionary to hold the results + Dictionary metricsDict = new Dictionary + { + { "AUC", metrics.AreaUnderRocCurve }, + { "Accuracy", metrics.Accuracy }, + { "PosPrec", metrics.PositivePrecision }, + { "PosRecall", metrics.PositiveRecall }, + { "NegPrec", metrics.NegativePrecision }, + { "NegRecall", metrics.NegativeRecall }, + { "F1Score", metrics.F1Score }, + { "AreaUnderPrecisionRecallCurve", metrics.AreaUnderPrecisionRecallCurve }, + // following metrics are from the extensions + { "LogLoss", metrics.LogLoss }, + { "LogLossReduction", metrics.LogLossReduction }, + { "Entropy", metrics.Entropy } + }; + + return metricsDict; + } + + private class ModelInput + { + public bool Label { get; set; } + + public bool PredictedLabel { get; set; } + + public float Score { get; set; } + } + } + + public class RegressionGroupMetric : IGroupMetric + { + private readonly IDataView _eval; + private readonly string _labelColumn; + private readonly string _scoreColumn; + private readonly string _sensitiveFeatureColumn; + private readonly MLContext _context = new MLContext(); + + public RegressionGroupMetric(IDataView eval, string labelColumn, string scoreColumn, string sensitiveFeatureColumn) + { + _eval = eval; + _labelColumn = labelColumn; + _scoreColumn = scoreColumn; + _sensitiveFeatureColumn = sensitiveFeatureColumn; + } + + /// + /// + /// + /// + /// + public DataFrame ByGroup() + { + // 1. group row according to sensitive feature column + // 2. split dataset to different groups, data_g1, data_g2..... + // 3. calculate binary metrics for different groups + // 4. create datafrome from result of step 3 + // 5. return it. + var sensitiveCol = _eval.Schema[_sensitiveFeatureColumn]; + // get all the columns of the schema + DataViewSchema columns = _eval.Schema; + + // TODO: is converting IDataview to DataFrame the best practice? + // .ToDataFram pulls the data into memory. + + //Brainstorm: 1. save it to a text file, temp file. figure unique columns. do a filter on those columns + // 2. filtering (maybe not the best approach) dataview + // 3. custom mapping + var evalDf = _eval.ToDataFrame(); + var groups = evalDf.Rows.GroupBy(r => r[sensitiveCol.Index]); + var groupMetric = new Dictionary(); + foreach (var kv in groups) + { + var data = new DataFrame(_eval.Schema.AsEnumerable().Select(column => + { + if (column.Type is TextDataViewType) + { + var columns = new StringDataFrameColumn(column.Name); + return columns; + } + else if (column.Type.RawType == typeof(bool)) + { + var primitiveColumn = new BooleanDataFrameColumn(column.Name); + + return primitiveColumn; + } + else if (column.Type.RawType == typeof(int)) + { + var primitiveColumn = new Int32DataFrameColumn(column.Name); + + return primitiveColumn; + } + else if (column.Type.RawType == typeof(float)) + { + var primitiveColumn = new SingleDataFrameColumn(column.Name); + + return primitiveColumn; + } + else if (column.Type.RawType == typeof(DateTime)) + { + // BLOCKED by DataFrame bug https://github.com/dotnet/machinelearning/issues/6213 + // Evaluate as a string for now + var columns = new StringDataFrameColumn(column.Name, 0); + return columns; + } + else + { + throw new NotImplementedException(); + } + }).Where(x => x != null)); + // create the column + data.Append(kv, inPlace: true); + RegressionMetrics metrics = _context.Regression.Evaluate(data, _labelColumn, _scoreColumn); + groupMetric[kv.Key] = metrics; + } + + DataFrame result = new DataFrame(); + result[_sensitiveFeatureColumn] = DataFrameColumn.Create(_sensitiveFeatureColumn, groupMetric.Keys.Select(x => x.ToString())); + result["RSquared"] = DataFrameColumn.Create("RSquared", groupMetric.Keys.Select(k => groupMetric[k].RSquared)); + result["RMS"] = DataFrameColumn.Create("RMS", groupMetric.Keys.Select(k => groupMetric[k].RootMeanSquaredError)); + result["MSE"] = DataFrameColumn.Create("MSE", groupMetric.Keys.Select(k => groupMetric[k].MeanSquaredError)); + result["MAE"] = DataFrameColumn.Create("MAE", groupMetric.Keys.Select(k => groupMetric[k].MeanAbsoluteError)); + return result; + } + + public Dictionary DifferenceBetweenGroups() + { + Dictionary diffDict = new Dictionary(); + DataFrame groupMetrics = ByGroup(); + diffDict.Add("RSquared", Math.Abs((double)groupMetrics["RSquared"].Max() - (double)groupMetrics["RSquared"].Min())); + diffDict.Add("RMS", Math.Abs((double)groupMetrics["RMS"].Max() - (double)groupMetrics["RMS"].Min())); + diffDict.Add("MSE", Math.Abs((double)groupMetrics["MSE"].Max() - (double)groupMetrics["MSE"].Min())); + diffDict.Add("MAE", Math.Abs((double)groupMetrics["MAE"].Max() - (double)groupMetrics["MAE"].Min())); + + return diffDict; + } + + public Dictionary Overall() + { + RegressionMetrics metrics = _context.Regression.Evaluate(_eval, _labelColumn); + + // create the dictionary to hold the results + Dictionary metricsDict = new Dictionary + { + { "RSquared", metrics.RSquared }, + { "RMS", metrics.RootMeanSquaredError }, + { "MSE", metrics.MeanSquaredError }, + { "MAE", metrics.MeanAbsoluteError } + }; + + return metricsDict; + } + } +} diff --git a/src/Microsoft.ML.Fairlearn/Metrics/GroupMetric.cs b/src/Microsoft.ML.Fairlearn/Metrics/GroupMetric.cs new file mode 100644 index 0000000000..708d836410 --- /dev/null +++ b/src/Microsoft.ML.Fairlearn/Metrics/GroupMetric.cs @@ -0,0 +1,26 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Text; +using Microsoft.Data.Analysis; + +namespace Microsoft.ML.Fairlearn +{ + internal interface IGroupMetric + { + /// + /// calculate metric all over group. It returns a dictionary which key is metric name + /// and value is metric value + /// + Dictionary Overall(); + + /// + /// calculate metric according to group. It returns a dataframe + /// which index is each value in a group and column is metric name and metric name. + /// + DataFrame ByGroup(); + } +} diff --git a/src/Microsoft.ML.Fairlearn/Microsoft.ML.Fairlearn.csproj b/src/Microsoft.ML.Fairlearn/Microsoft.ML.Fairlearn.csproj new file mode 100644 index 0000000000..72b2ad0edb --- /dev/null +++ b/src/Microsoft.ML.Fairlearn/Microsoft.ML.Fairlearn.csproj @@ -0,0 +1,28 @@ + + + + + netstandard2.0 + Microsoft.ML.Fairlearn + None + + + + + + + all + + + all + true + + + + + all + + + + + diff --git a/src/Microsoft.ML.Fairlearn/Reductions/GridSearchTrialRunner.cs b/src/Microsoft.ML.Fairlearn/Reductions/GridSearchTrialRunner.cs new file mode 100644 index 0000000000..1adf3b37d4 --- /dev/null +++ b/src/Microsoft.ML.Fairlearn/Reductions/GridSearchTrialRunner.cs @@ -0,0 +1,106 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Data.Analysis; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.ML.AutoML; +using Microsoft.ML.Data; + +namespace Microsoft.ML.Fairlearn +{ + /// + /// + /// 1, generate cost column from lamda parameter + /// 2. insert cost column into dataset + /// 3. restore trainable pipeline + /// 4. train + /// 5. calculate metric = observe loss + fairness loss + /// + public class GridSearchTrailRunner : ITrialRunner + { + private readonly MLContext _context; + private readonly IDataView _trainDataset; + private readonly IDataView _testDataset; + private readonly string _labelColumn; + private readonly string _sensitiveColumn; + private readonly SweepablePipeline _pipeline; + private readonly ClassificationMoment _moment; + + public GridSearchTrailRunner(MLContext context, IDataView trainDataset, IDataView testDataset, string labelColumn, string sensitiveColumn, SweepablePipeline pipeline, ClassificationMoment moment) + { + _context = context; + this._trainDataset = trainDataset; + this._testDataset = testDataset; + this._labelColumn = labelColumn; + this._sensitiveColumn = sensitiveColumn; + _pipeline = pipeline; + _moment = moment; + } + + public void Dispose() + { + } + + public Task RunAsync(TrialSettings settings, CancellationToken ct) + { + var stopWatch = new Stopwatch(); + stopWatch.Start(); + //DataFrameColumn signedWeights = null; + var pipeline = _pipeline.BuildFromOption(_context, settings.Parameter["_pipeline_"]); + // get lambda + var lambdas = settings.Parameter["_lambda_search_space"]; + var key = lambdas.Keys; + // (sign, group, value) + var lambdasValue = key.Select(x => + { + var sign = x.Split('_')[1] == "pos" ? "+" : "-"; + var e = x.Split('_')[0]; + var value = lambdas[x].AsType(); + + return (sign, e, value); + }); + + var df = new DataFrame(); + df["sign"] = DataFrameColumn.Create("sign", lambdasValue.Select(x => x.sign)); + df["group_id"] = DataFrameColumn.Create("group_id", lambdasValue.Select(x => x.e)); + df["value"] = DataFrameColumn.Create("value", lambdasValue.Select(x => x.value)); + _moment.LoadData(this._trainDataset, DataFrameColumn.Create("y", this._trainDataset.GetColumn(this._labelColumn)), DataFrameColumn.Create("group_id", this._trainDataset.GetColumn(this._sensitiveColumn))); + var signWeightColumn = _moment.SignedWeights(df); + var trainDataset = ZipDataView.Create(_context, new IDataView[] { _trainDataset, new DataFrame(signWeightColumn) }); + var model = pipeline.Fit(trainDataset); + // returns an IDataview object that contains the predictions + var eval = model.Transform(this._testDataset); + // extract the predicted label and convert it to 1.0f and 0.0 so that we can feed that into the gamma function + var predictedLabel = eval.GetColumn("PredictedLabel").Select(b => b ? 1f : 0f).ToArray(); + var column = DataFrameColumn.Create("pred", predictedLabel); + //Get the gamma based on the predicted label of the testDataset + _moment.LoadData(this._testDataset, DataFrameColumn.Create("y", eval.GetColumn(this._labelColumn)), DataFrameColumn.Create("group_id", _testDataset.GetColumn(this._sensitiveColumn))); + var gamma = _moment.Gamma(column); + double fairnessLost = Convert.ToSingle(gamma["value"].Max()); + var metrics = _context.BinaryClassification.EvaluateNonCalibrated(eval, this._labelColumn); + // the metric should be the combination of the observed loss from the model and the fairness loss + double metric = 0.0f; + metric = metrics.Accuracy - fairnessLost; + + stopWatch.Stop(); + + return Task.FromResult(new FairnessTrialResult() + { + FairnessMetric = fairnessLost, + Metric = metric, + Model = model, + Loss = -metric, + TrialSettings = settings, + DurationInMilliseconds = stopWatch.ElapsedMilliseconds, + }); + } + } +} diff --git a/src/Microsoft.ML.Fairlearn/Reductions/Moment.cs b/src/Microsoft.ML.Fairlearn/Reductions/Moment.cs new file mode 100644 index 0000000000..d481ebd5d8 --- /dev/null +++ b/src/Microsoft.ML.Fairlearn/Reductions/Moment.cs @@ -0,0 +1,81 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Text; +using Microsoft.Data.Analysis; +using Microsoft.ML.Data; + +namespace Microsoft.ML.Fairlearn +{ + /// + /// Generic moment. + /// Modeled after the original Fairlearn repo + /// Our implementations of the reductions approach to fairness + /// agarwal2018reductions + /// make use of Moment objects to describe both the optimization objective + /// and the fairness constraints imposed on the solution. + /// This is an abstract class for all such objects. + /// + public abstract class Moment + { + protected DataFrameColumn Y; //maybe lowercase this? + public DataFrame Tags { get; private set; } + + public IDataView X { get; protected set; } + + public long TotalSamples { get; protected set; } + + public DataFrameColumn SensitiveFeatureColumn { get => Tags["group_id"]; } + + public string[] GroudIds; + + public Moment() + { + } + /// + /// Load the data into the moment to generate parity constarint + /// + /// The feature set + /// The label + /// The sentivite featue that contain the sensitive groups + public virtual void LoadData(IDataView features, DataFrameColumn label, StringDataFrameColumn sensitiveFeature) + { + X = features; + TotalSamples = label.Length; + Y = label; + Tags = new DataFrame(); + Tags["label"] = label; + Tags["group_id"] = sensitiveFeature; + } + + public virtual void LoadData(IDataView trainData, string label, string sensitiveColumnName) + { + var sensitiveFeature = DataFrameColumn.Create("group_id", trainData.GetColumn(sensitiveColumnName)); + var labelColumn = DataFrameColumn.Create("label", trainData.GetColumn(label)); + this.LoadData(trainData, labelColumn, sensitiveFeature); + } + + /// + /// Calculate the degree to which constraints are currently violated by the predictor. + /// + /// Contains the predictions of the label + /// + public abstract DataFrame Gamma(PrimitiveDataFrameColumn yPred); + public abstract float Bound(); + public float ProjectLambda() + { + throw new NotImplementedException(); + } + public abstract DataFrameColumn SignedWeights(DataFrame lambdaVec); + } + /// + /// Moment that can be expressed as weighted classification error. + /// + public abstract class ClassificationMoment : Moment + { + + } +} diff --git a/src/Microsoft.ML.Fairlearn/Reductions/Utilities.cs b/src/Microsoft.ML.Fairlearn/Reductions/Utilities.cs new file mode 100644 index 0000000000..74034649c3 --- /dev/null +++ b/src/Microsoft.ML.Fairlearn/Reductions/Utilities.cs @@ -0,0 +1,39 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using Microsoft.ML.Data; +using Microsoft.ML.SearchSpace; +using Microsoft.ML.SearchSpace.Option; + +namespace Microsoft.ML.Fairlearn +{ + public static class Utilities + { + public static SearchSpace.SearchSpace GenerateBinaryClassificationLambdaSearchSpace(Moment moment, float gridLimit = 10, bool negativeAllowed = true) + { + var searchSpace = new SearchSpace.SearchSpace(); + var sensitiveFeatureColumnValue = moment.SensitiveFeatureColumn.Cast().Distinct(); + + // for different_bound only + // if sensitive feature column value is "a", "b", "c", + // the search space will contains 6 options with name format {sensitive column value}_{pos/neg} + // a_pos, a_neg, b_pos, b_neg, c_pos, c_neg. + var rand = new Random(); + foreach (var p in from _groupValue in sensitiveFeatureColumnValue + from _indicator in new[] { "pos", "neg" } + select new { _groupValue, _indicator }) + { + var option = new UniformSingleOption(-gridLimit, gridLimit, defaultValue: Convert.ToSingle(rand.NextDouble()) * 2.0f * gridLimit - gridLimit); + var optionName = $"{p._groupValue}_{p._indicator}"; + searchSpace[optionName] = option; + } + + return searchSpace; + } + } +} diff --git a/src/Microsoft.ML.Fairlearn/Reductions/UtilityParity.cs b/src/Microsoft.ML.Fairlearn/Reductions/UtilityParity.cs new file mode 100644 index 0000000000..be304962ce --- /dev/null +++ b/src/Microsoft.ML.Fairlearn/Reductions/UtilityParity.cs @@ -0,0 +1,171 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using Microsoft.Data.Analysis; + +namespace Microsoft.ML.Fairlearn +{ + /// + /// Modeled after the original repo + /// A generic moment for parity in utilities (or costs) under classification. + /// This serves as the base class for Demographic Parity + /// can be used as difference-based constraints or ratio-based constraints. + /// + /// Constraints compare the group-level mean utility for each group with the + /// overall mean utility + /// + /// + public class UtilityParity : ClassificationMoment + { + private const float _defaultDifferenceBound = 0.01F; + private readonly float _epsilon; + private readonly float _ratio; + + public float ProbEvent { get; protected set; } + + public DataFrameColumn ProbGroupEvent { get; protected set; } + + public UtilityParity(float differenceBound = Single.NaN, float ratioBond = Single.NaN, float ratioBoundSlack = 0.0f) + { + if (Single.NaN.Equals(differenceBound) && Single.NaN.Equals(ratioBond)) + { + _epsilon = _defaultDifferenceBound; + _ratio = 1.0F; + } + else if (!Single.NaN.Equals(differenceBound) && Single.NaN.Equals(ratioBond)) + { + _epsilon = differenceBound; + _ratio = 1.0F; + } + else if (Single.NaN.Equals(differenceBound) && !Single.NaN.Equals(ratioBond)) + { + _epsilon = ratioBoundSlack; + if (ratioBond <= 0.0f || ratioBond > 1.0f) + { + throw new Exception("ratio must lie between (0.1]"); + } + _ratio = ratioBond; + } + else + { + throw new Exception("Only one of difference_bound and ratio_bound can be used"); + } + } + + //TODO: what should be the object type of X be? How can I make x capitilized to fit the whole data strcuture + /// + /// + /// + /// The features + /// The label + /// The sensitive groups + public override void LoadData(IDataView x, DataFrameColumn y, StringDataFrameColumn sensitiveFeature)//, StringDataFrameColumn events = null, StringDataFrameColumn utilities = null) + { + base.LoadData(x, y, sensitiveFeature); + //Tags["event"] = events; + //Tags["utilities"] = utilities; + + //if (utilities == null) + //{ + // // TODO: set up the default utitlity + //} + + //probEvent will contain the probabilities for each of the event, since we are now focusing on + //TODO: implementing the demography parity which has only one event, we will set it like this for now. + ProbEvent = 1.0F; + //ProbEvent = Tags.GroupBy("event").Count / TotalSamples; We should use this if we have an event + + //Here the "label" column is just a dummy column for the end goal of getting the number of data rows + ProbGroupEvent = Tags.GroupBy("group_id").Count()["label"] / (TotalSamples * 1.0); + } + + /// + /// Calculate the degree to which constraints are currently violated by the predictor. + /// + /// + public override DataFrame Gamma(PrimitiveDataFrameColumn yPred/* Maybe change this to a predictor (func)*/) + { + Tags["pred"] = yPred; + //TODO: add the utility into the calculation of the violation, will be needed for other parity methods + //TODO: also we need to add the events column to the returned gamma singed + //calculate upper bound difference and lower bound difference + var expectEvent = Tags["pred"].Mean(); + var expectGroupEvent = Tags.GroupBy("group_id").Mean("pred").OrderBy(("group_id"))["pred"]; + var upperBoundDiff = _ratio * expectGroupEvent - expectEvent; + var lowerBoundDiff = -1.0 /*to add a negative sign*/ * expectGroupEvent + _ratio * expectEvent; + + //the two diffs are going to be in the same column later on + upperBoundDiff.SetName("value"); + lowerBoundDiff.SetName("value"); + + //create the columns that hold the signs + StringDataFrameColumn posSign = new StringDataFrameColumn("sign", upperBoundDiff.Length); + + // a string column that has all the group names + + // var groupID = DataFrameColumn.Create("group_id", Tags["group_id"].Cast()); + var groupID = Tags.GroupBy("group_id").Mean("pred").OrderBy("group_id")["group_id"]; + // gSigned (gamma signed) is the dataframe that we return in the end that presents the uitility parity + DataFrame gSigned = new DataFrame(posSign, groupID, upperBoundDiff); + + // plus sign for the upper bound + gSigned["sign"].FillNulls("+", inPlace: true); + + // a temp dataframe that hold the utility rows for the lowerbound values + StringDataFrameColumn negSign = new StringDataFrameColumn("sign", lowerBoundDiff.Length); + DataFrame dfNeg = new DataFrame(negSign, groupID, lowerBoundDiff); + dfNeg["sign"].FillNulls("-", inPlace: true); + + // stack the temp dataframe dfNeg to the bottom dataframe that we want to return + dfNeg.Rows.ToList().ForEach(row => { gSigned.Append(row, inPlace: true); }); + + return gSigned; + } + + public override float Bound() + { + return _epsilon; + } + + public override DataFrameColumn SignedWeights(DataFrame lambdaVec) + { + //TODO: calculate the propper Lambda Event and ProbEvent. + // In the case of Demographic Parity, LambdaEvent contains one value, and ProbEvent is just 1, so we will skip it for now + // lambdaEvent = (lambdaVec["+"] - _ratio * lambdaVec["-"]) + + var gPos = lambdaVec.Filter(lambdaVec["sign"].ElementwiseEquals("+")).OrderBy("group_id"); + var gNeg = lambdaVec.Filter(lambdaVec["sign"].ElementwiseEquals("-")).OrderBy("group_id"); + var lambdaEvent = (float)(gPos["value"] - _ratio * gNeg["value"]).Sum() / ProbEvent; + var lambdaGroupEvent = (_ratio * gPos["value"] - gNeg["value"]) / ProbGroupEvent; + + DataFrameColumn adjust = lambdaEvent - lambdaGroupEvent; + DataFrame lookUp = new DataFrame(gPos["group_id"], adjust); + //TODO: chech for null values i.e., if any entry in adjust is 0, make the corrosponding of singed weight to 0 + //TODO: add utility calculation, for now it is just 1 for everything + long dataSetLength = Tags.Rows.Count(); + float[] signedWeightsFloat = new float[dataSetLength]; + // iterate through the rows of the original dataset of features + long i = 0; + foreach (DataFrameRow row in Tags.Rows) + { + // we are creating a new array where it will store the weight according the the lookup table (adjust) we created + // TODO: right now this only supports one event, we have to filter through the event column so that this supports multiple events + signedWeightsFloat[i] = Convert.ToSingle(lookUp.Filter(lookUp["group_id"].ElementwiseEquals(row["group_id"]))["value"][0]); + i++; + } + + DataFrameColumn signedWeights = new PrimitiveDataFrameColumn("signedWeight", signedWeightsFloat); + + return signedWeights; + } + } + + public class DemographicParity : UtilityParity + { + } +} diff --git a/test/Microsoft.ML.AutoML.Tests/AutoMLExperimentTests.cs b/test/Microsoft.ML.AutoML.Tests/AutoMLExperimentTests.cs index 1820a9447f..40f3f9fb68 100644 --- a/test/Microsoft.ML.AutoML.Tests/AutoMLExperimentTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/AutoMLExperimentTests.cs @@ -14,6 +14,7 @@ using Microsoft.Data.Analysis; using Microsoft.Extensions.DependencyInjection; using Microsoft.ML.AutoML.CodeGen; +using Microsoft.ML.Fairlearn.AutoML; using Microsoft.ML.Runtime; using Microsoft.ML.TestFramework; using Microsoft.ML.TestFramework.Attributes; @@ -229,6 +230,31 @@ public async Task AutoMLExperiment_UCI_Adult_Train_Test_Split_Test() result.Metric.Should().BeGreaterThan(0.8); } + [Fact(Skip = "skip in CI build")] + public async Task AutoMLExperiment_UCI_Adult_Fairlearn_Test() + { + var context = new MLContext(1); + context.Log += (o, e) => + { + if (e.Source.StartsWith("AutoMLExperiment")) + { + this.Output.WriteLine(e.RawMessage); + } + }; + var data = DatasetUtil.GetUciAdultDataView(); + var experiment = context.Auto().CreateExperiment(); + var pipeline = context.Auto().Featurizer(data, "_Features_", excludeColumns: new[] { DatasetUtil.UciAdultLabel }) + .Append(context.Auto().BinaryClassification(DatasetUtil.UciAdultLabel, "_Features_", exampleWeightColumnName: "signedWeight", useLgbm: false, useSdcaLogisticRegression: false, useLbfgsLogisticRegression: false)); + + experiment.SetDataset(context.Data.TrainTestSplit(data)) + .SetPipeline(pipeline) + .SetBinaryClassificationMetricWithFairLearn(DatasetUtil.UciAdultLabel, "PredictedLabel", "Workclass", "signedWeight") + .SetMaxModelToExplore(100); + + var result = await experiment.RunAsync(); + result.Metric.Should().BeGreaterThan(0.8); + } + [Fact] public async Task AutoMLExperiment_UCI_Adult_CV_5_Test() { diff --git a/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj b/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj index 75cdd289f3..f6b9a021d9 100644 --- a/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj +++ b/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj @@ -10,6 +10,7 @@ + diff --git a/test/Microsoft.ML.Fairlearn.Tests/GridSearchTest.cs b/test/Microsoft.ML.Fairlearn.Tests/GridSearchTest.cs new file mode 100644 index 0000000000..c6664cd2a0 --- /dev/null +++ b/test/Microsoft.ML.Fairlearn.Tests/GridSearchTest.cs @@ -0,0 +1,108 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using FluentAssertions; +using Microsoft.Data.Analysis; +using Microsoft.ML.AutoML; +using Microsoft.ML.Fairlearn.AutoML; +using Xunit; +using Xunit.Abstractions; + +namespace Microsoft.ML.Fairlearn.Tests +{ + public class GridSearchTest + { + private readonly ITestOutputHelper _output; + public GridSearchTest(ITestOutputHelper output) + { + _output = output; + } + + [Fact] + public void Generate_binary_classification_lambda_search_space_test() + { + var context = new MLContext(); + var moment = new UtilityParity(); + var X = CreateDummyDataset(); + moment.LoadData(X, X["y_true"], X["sensitiveFeature"] as StringDataFrameColumn); + + var searchSpace = Utilities.GenerateBinaryClassificationLambdaSearchSpace(moment, 5); + searchSpace.Keys.Should().BeEquivalentTo("a_pos", "a_neg", "b_pos", "b_neg"); + + } + + private DataFrame CreateDummyDataset() + { + var df = new DataFrame(); + df["X"] = DataFrameColumn.Create("X", new[] { 0f, 1, 2, 3, 4, 5, 6, 7, 8, 9 }); + df["y_true"] = DataFrameColumn.Create("y_true", new[] { true, true, true, true, true, true, true, false, false, false }); + df["y_pred"] = DataFrameColumn.Create("y_pred", new[] { true, true, true, true, false, false, false, true, false, false }); + df["sensitiveFeature"] = DataFrameColumn.Create("sensitiveFeature", new[] { "a", "b", "a", "a", "b", "a", "b", "b", "a", "b" }); + + return df; + } + + // Data generated so it is identical from Binary_Classification.ipynb from Fairlearn.github on Github + private DataFrame CreateGridScearhDataset() + { + float[] score_feature = new float[52]; + int index = 0; + for (int i = 0; i < 31; i++) + { + score_feature[index] = (i * 1.0f) / 30; + index++; + } + for (int j = 0; j < 21; j++) + { + score_feature[index] = (j * 1.0f) / 20; + index++; + } + var df = new DataFrame(); + df["score_feature"] = DataFrameColumn.Create("score_feature", score_feature); + df["y"] = DataFrameColumn.Create("y", new[] { + false, false, false, false, false, false, false, true, true, + true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true, + true, true, true, true, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, + true, true, true, true, true, true, true }); + df["sensitiveFeature"] = DataFrameColumn.Create("sensitiveFeature", new[] { "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3" }); + + return df; + } + + /// + /// This trial runner run the tests from Grid searh for Binary Classification.ipynb + /// + [Fact] + public void TestGridSearchTrialRunner2() + { + var context = new MLContext(); + context.Log += (o, e) => + { + + if (e.Source == "AutoMLExperiment") + { + _output.WriteLine(e.Message); + } + }; + var experiment = context.Auto().CreateExperiment(); + var df = CreateGridScearhDataset(); + var shuffledDataset = context.Data.ShuffleRows(df); + var trainTestSplit = context.Data.TrainTestSplit(shuffledDataset, 0.2); + var pipeline = context.Transforms.Categorical.OneHotHashEncoding("sensitiveFeature_encode", "sensitiveFeature") + .Append(context.Transforms.Concatenate("Features", "sensitiveFeature_encode", "score_feature")) + .Append(context.Auto().BinaryClassification(labelColumnName: "y", exampleWeightColumnName: "signedWeight")); + + experiment.SetPipeline(pipeline) + .SetDataset(trainTestSplit) + .SetBinaryClassificationMetricWithFairLearn("y", "PredictedLabel", "sensitiveFeature", "signedWeight") + .SetTrainingTimeInSeconds(10);//100 + + var bestResult = experiment.Run(); + var model = bestResult.Model; + bestResult.Metric.Should().BeGreaterOrEqualTo(0.4); + } + } +} diff --git a/test/Microsoft.ML.Fairlearn.Tests/MetricTest.cs b/test/Microsoft.ML.Fairlearn.Tests/MetricTest.cs new file mode 100644 index 0000000000..a51c8ae06f --- /dev/null +++ b/test/Microsoft.ML.Fairlearn.Tests/MetricTest.cs @@ -0,0 +1,79 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using Microsoft.Data.Analysis; +using Xunit; + +namespace Microsoft.ML.Fairlearn.Tests +{ + public class MetricTest + { + MLContext mlContext; + IDataView data; + public MetricTest() + { + mlContext = new MLContext(); + data = mlContext.Data.LoadFromEnumerable(houseData); + } + + public class HouseData + { + public float Size { get; set; } + public float Price { get; set; } + public float Score { get; set; } + public string Gender { get; set; } + } + + HouseData[] houseData = { + new HouseData() { Size = 1.1F, Price = 0.2F, Gender = "Male", Score = 1.2F}, + new HouseData() { Size = 1.9F, Price = 1.3F, Gender = "Male", Score = 2.3F }, + new HouseData() { Size = 2.8F, Price = 3.0F, Gender = "Female", Score = 25.0F }, + new HouseData() { Size = 3.4F, Price = 3.7F, Gender = "Female", Score = 7.7F } }; + + [Fact] + public void RegressionMetricTest() + { + RegressionGroupMetric regressionMetric = mlContext.Fairlearn().Metric.Regression(eval: data, labelColumn: "Price", scoreColumn: "Score", sensitiveFeatureColumn: "Gender"); + var metricByGroup = regressionMetric.ByGroup(); + Assert.Equal(-2.30578, Convert.ToSingle(metricByGroup["RSquared"][0]), 3); + Assert.Equal(-2039.81453, Convert.ToSingle(metricByGroup["RSquared"][1]), 3); + Assert.Equal(1.00000, Convert.ToSingle(metricByGroup["RMS"][0]), 3); + Assert.Equal(15.811388, Convert.ToSingle(metricByGroup["RMS"][1]), 3); + metricByGroup.Description(); + Dictionary metricOverall = regressionMetric.Overall(); + Assert.Equal(125.5, metricOverall["MSE"], 1); + Assert.Equal(11.202678, metricOverall["RMS"], 4); + Dictionary diff = regressionMetric.DifferenceBetweenGroups(); + Assert.Equal(14.81138, diff["RMS"], 4); + Assert.Equal(2037.5, diff["RSquared"], 1); + + } + + [Fact] + public void BinaryClassificationMetricTest() + { + //create dummy dataset + bool[] vs = { true, true, true, true, true, true, true, false, false, false }; + PrimitiveDataFrameColumn label = new PrimitiveDataFrameColumn("label", vs); + string[] str = { "a", "b", "a", "a", "b", "a", "b", "b", "a", "b" }; + StringDataFrameColumn groupId = new StringDataFrameColumn("group_id", str); + bool[] fl = { true, true, true, true, false, false, false, false, false, false }; + PrimitiveDataFrameColumn pred = new PrimitiveDataFrameColumn("PredictedLabel", fl); + float[] fl2 = { 1.0F, 1.0F, 1.0F, 1.0F, 0.0F, 0.0F, 0.0F, 0.0F, 0.0F, 0.0F }; + PrimitiveDataFrameColumn score = new PrimitiveDataFrameColumn("Score", fl2); + float[] fl3 = { 1.0F, 1.0F, 1.0F, 1.0F, 0.0F, 0.0F, 0.0F, 1.0F, 0.0F, 0.0F }; + PrimitiveDataFrameColumn prob = new PrimitiveDataFrameColumn("Probability", fl3); + DataFrame df = new DataFrame(label, groupId, pred, score, prob); + + BinaryGroupMetric metrics = mlContext.Fairlearn().Metric.BinaryClassification(eval: df, labelColumn: "label", predictedColumn: "PredictedLabel", sensitiveFeatureColumn: "group_id"); + var metricByGroup = metrics.ByGroup(); + Assert.Equal(0.8, Convert.ToSingle(metricByGroup["Accuracy"][0]), 1); + Assert.Equal(0.6, Convert.ToSingle(metricByGroup["Accuracy"][1]), 1); + var metricOverall = metrics.Overall(); + Assert.Equal(0.7, Convert.ToSingle(metricOverall["Accuracy"]), 1); + } + } +} diff --git a/test/Microsoft.ML.Fairlearn.Tests/Microsoft.ML.Fairlearn.Tests.csproj b/test/Microsoft.ML.Fairlearn.Tests/Microsoft.ML.Fairlearn.Tests.csproj new file mode 100644 index 0000000000..b950086278 --- /dev/null +++ b/test/Microsoft.ML.Fairlearn.Tests/Microsoft.ML.Fairlearn.Tests.csproj @@ -0,0 +1,38 @@ + + + None + $(NoWarn);MSML_ParameterLocalVarName;MSML_PrivateFieldName;MSML_ExtendBaseTestClass;MSML_GeneralName + + + + + + + + + + + + + + + + + Always + + + + + + + + + + + + + + + + + diff --git a/test/Microsoft.ML.Fairlearn.Tests/UtilityTest.cs b/test/Microsoft.ML.Fairlearn.Tests/UtilityTest.cs new file mode 100644 index 0000000000..3a0354755d --- /dev/null +++ b/test/Microsoft.ML.Fairlearn.Tests/UtilityTest.cs @@ -0,0 +1,40 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Text; +using Xunit; +using Microsoft.Data.Analysis; + +namespace Microsoft.ML.Fairlearn.Tests +{ + public class UtilityTest + { + [Fact] + public void DemographyParityTest() + { + var dp = new UtilityParity(differenceBound: 0.01F); + + string[] str = { "a", "b", "a", "a", "b", "a", "b", "b", "a", "b" }; + StringDataFrameColumn sensitiveFeature = new StringDataFrameColumn("group_id", str); + + int[] vs = { 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 }; + PrimitiveDataFrameColumn y = new PrimitiveDataFrameColumn("label", vs); + + + DataFrame x = new DataFrame(); + dp.LoadData(x, y, sensitiveFeature: sensitiveFeature); + + float[] fl = { 1.0F, 1.0F, 1.0F, 1.0F, 0.0F, 0.0F, 0.0F, 1.0F, 0.0F, 0.0F }; + PrimitiveDataFrameColumn ypred = new PrimitiveDataFrameColumn("pred", fl); + var gSinged = dp.Gamma(ypred); + + Assert.Equal(0.1, Convert.ToSingle(gSinged["value"][0]), 1); + Assert.Equal(-0.1, Convert.ToSingle(gSinged["value"][1]), 1); + Assert.Equal(-0.1, Convert.ToSingle(gSinged["value"][2]), 1); + Assert.Equal(0.1, Convert.ToSingle(gSinged["value"][3]), 1); + } + } +}