Skip to content

Commit fc9cc3e

Browse files
Tom FinleyTomFinley
authored andcommitted
Initial code analyzer for Microsoft.ML
Adds code analysis initially for correct usage of common Contracts.Except/Check patterns, naming conventions, variable usage and initializations, access modifiers, and other idioms used throughout the Microsoft.ML codebase. Enables analysis on Microsoft.ML projects.
1 parent 0e37508 commit fc9cc3e

40 files changed

+3466
-14
lines changed

Microsoft.ML.sln

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,12 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.LightGBM", "sr
8282
EndProject
8383
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Ensemble", "src\Microsoft.ML.Ensemble\Microsoft.ML.Ensemble.csproj", "{DCF46B79-1FDB-4DBA-A263-D3D64E3AAA27}"
8484
EndProject
85+
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "analyzer", "analyzer", "{7F13E156-3EBA-4021-84A5-CD56BA72F99E}"
86+
EndProject
87+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.CodeAnalyzer", "analyzer\Microsoft.ML.CodeAnalyzer\Microsoft.ML.CodeAnalyzer.csproj", "{B4E55B2D-2A92-46E7-B72F-E76D6FD83440}"
88+
EndProject
89+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.CodeAnalyzer.Tests", "test\Microsoft.ML.CodeAnalyzer.Tests\Microsoft.ML.CodeAnalyzer.Tests.csproj", "{3E4ABF07-7970-4BE6-B45B-A13D3C397545}"
90+
EndProject
8591
Global
8692
GlobalSection(SolutionConfigurationPlatforms) = preSolution
8793
Debug|Any CPU = Debug|Any CPU
@@ -192,6 +198,14 @@ Global
192198
{DCF46B79-1FDB-4DBA-A263-D3D64E3AAA27}.Debug|Any CPU.Build.0 = Debug|Any CPU
193199
{DCF46B79-1FDB-4DBA-A263-D3D64E3AAA27}.Release|Any CPU.ActiveCfg = Release|Any CPU
194200
{DCF46B79-1FDB-4DBA-A263-D3D64E3AAA27}.Release|Any CPU.Build.0 = Release|Any CPU
201+
{B4E55B2D-2A92-46E7-B72F-E76D6FD83440}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
202+
{B4E55B2D-2A92-46E7-B72F-E76D6FD83440}.Debug|Any CPU.Build.0 = Debug|Any CPU
203+
{B4E55B2D-2A92-46E7-B72F-E76D6FD83440}.Release|Any CPU.ActiveCfg = Release|Any CPU
204+
{B4E55B2D-2A92-46E7-B72F-E76D6FD83440}.Release|Any CPU.Build.0 = Release|Any CPU
205+
{3E4ABF07-7970-4BE6-B45B-A13D3C397545}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
206+
{3E4ABF07-7970-4BE6-B45B-A13D3C397545}.Debug|Any CPU.Build.0 = Debug|Any CPU
207+
{3E4ABF07-7970-4BE6-B45B-A13D3C397545}.Release|Any CPU.ActiveCfg = Release|Any CPU
208+
{3E4ABF07-7970-4BE6-B45B-A13D3C397545}.Release|Any CPU.Build.0 = Release|Any CPU
195209
EndGlobalSection
196210
GlobalSection(SolutionProperties) = preSolution
197211
HideSolutionNode = FALSE
@@ -227,6 +241,8 @@ Global
227241
{3DEB504D-7A07-48CE-91A2-8047461CB3D4} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
228242
{001F3B4E-FBE4-4001-AFD2-A6A989CD1C25} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
229243
{DCF46B79-1FDB-4DBA-A263-D3D64E3AAA27} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
244+
{B4E55B2D-2A92-46E7-B72F-E76D6FD83440} = {7F13E156-3EBA-4021-84A5-CD56BA72F99E}
245+
{3E4ABF07-7970-4BE6-B45B-A13D3C397545} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
230246
EndGlobalSection
231247
GlobalSection(ExtensibilityGlobals) = postSolution
232248
SolutionGuid = {41165AF1-35BB-4832-A189-73060F82B01D}
Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using System.Collections.Generic;
6+
using System.Collections.Immutable;
7+
using Microsoft.CodeAnalysis;
8+
using Microsoft.CodeAnalysis.CSharp;
9+
using Microsoft.CodeAnalysis.CSharp.Syntax;
10+
using Microsoft.CodeAnalysis.Diagnostics;
11+
12+
namespace Microsoft.ML.CodeAnalyzer
13+
{
14+
[DiagnosticAnalyzer(LanguageNames.CSharp)]
15+
public sealed class ContractsCheckAnalyzer : DiagnosticAnalyzer
16+
{
17+
// Detecting that a syntax call is actually on a particular method is computationally
18+
// intensive, so once we detect that we're on Contracts methods, we put all the methods
19+
// here.
20+
21+
private const string Category = "Contracts";
22+
23+
public static class NameofDiagnostic
24+
{
25+
public const string Id = "MSML_ContractsNameUsesNameof";
26+
private const string Title = "Contracts argument for names is not a nameof";
27+
private const string Format = "Call to '{0}' should use nameof(...) for {1} argument, but instead used '{2}'";
28+
private const string Description =
29+
"For Contracts.Checks or Excepts with some form of parameter name, unless that " +
30+
"argument is a nameof(...) expression there's almost certainly something wrong.";
31+
32+
internal static DiagnosticDescriptor Rule =
33+
new DiagnosticDescriptor(Id, Title, Format, Category,
34+
DiagnosticSeverity.Warning, isEnabledByDefault: true, description: Description);
35+
}
36+
37+
public static class ExceptionDiagnostic
38+
{
39+
public const string Id = "MSML_ContractsExceptAsExpression";
40+
private const string Title = "Contracts.Except used as expression";
41+
private const string Format = "Something should be done with the exception created by '{0}'";
42+
private const string Description =
43+
"Contracts.Except and similar methods do not themselves throw, but provide an " +
44+
"exception that can be thrown. This call did nothing with the exception.";
45+
46+
internal static DiagnosticDescriptor Rule =
47+
new DiagnosticDescriptor(Id, Title, Format, Category,
48+
DiagnosticSeverity.Warning, isEnabledByDefault: true, description: Description);
49+
}
50+
51+
public static class SimpleMessageDiagnostic
52+
{
53+
public const string Id = "MSML_ContractsCheckMessageNotLiteralOrIdentifier";
54+
private const string Title = "Contracts.Check argument for message may involve formatting";
55+
private const string Format = "On call to '{0}' message '{1}' could not be identified as being either a string literal or variable";
56+
57+
internal static DiagnosticDescriptor Rule =
58+
new DiagnosticDescriptor(Id, Title, Format, Category,
59+
DiagnosticSeverity.Warning, isEnabledByDefault: true,
60+
description: Descriptions.ContractsCheckMessageNotLiteralOrIdentifier);
61+
}
62+
63+
public static class DecodeMessageWithLoadContextDiagnostic
64+
{
65+
public const string Id = "MSML_NoMessagesForLoadContext";
66+
private const string Title = "Contracts.Check argument for message may involve formatting";
67+
private const string Format = "On call to '{0}' message '{1}' was provided, but this method had a ModelLoadContext";
68+
69+
internal static DiagnosticDescriptor Rule =
70+
new DiagnosticDescriptor(Id, Title, Format, Category,
71+
DiagnosticSeverity.Warning, isEnabledByDefault: true,
72+
description: Descriptions.NoMessagesForLoadContext);
73+
}
74+
75+
public override ImmutableArray<DiagnosticDescriptor> SupportedDiagnostics =>
76+
ImmutableArray.Create(
77+
NameofDiagnostic.Rule, ExceptionDiagnostic.Rule, SimpleMessageDiagnostic.Rule,
78+
DecodeMessageWithLoadContextDiagnostic.Rule);
79+
80+
private static HashSet<string> _targetSet = new HashSet<string>(new[]
81+
{
82+
"Check", "CheckUserArg", "CheckParam", "CheckParamValue", "CheckRef", "CheckValue",
83+
"CheckNonEmpty", "CheckNonWhiteSpace", "CheckDecode", "CheckIO", "CheckAlive", "CheckValueOrNull",
84+
"Except", "ExceptUserArg", "ExceptParam", "ExceptParamValue", "ExceptValue", "ExceptEmpty",
85+
"ExceptWhiteSpace", "ExceptDecode", "ExceptIO", "ExceptNotImpl", "ExceptNotSupp",
86+
});
87+
88+
public override void Initialize(AnalysisContext context)
89+
{
90+
context.RegisterSyntaxNodeAction(Analyze, SyntaxKind.InvocationExpression);
91+
}
92+
93+
/// <summary>
94+
/// Returns an array parallel to <paramref name="parameters"/> that contains
95+
/// the arguments in <paramref name="invocation"/>. If named parameters are used
96+
/// then this is not necessarily the same. Note that in the event that there are
97+
/// more arguments than parameters (e.g., via a <c>params</c> variable length
98+
/// parameter) only the first match for the parameter is recorded.
99+
/// </summary>
100+
private static ArgumentSyntax[] ParallelArgs(
101+
ImmutableArray<IParameterSymbol> parameters,
102+
InvocationExpressionSyntax invocation)
103+
{
104+
ArgumentSyntax[] args = new ArgumentSyntax[parameters.Length];
105+
var syntaxArgs = invocation.ArgumentList.Arguments;
106+
for (int i = 0; i < syntaxArgs.Count; ++i)
107+
{
108+
var arg = syntaxArgs[i];
109+
int index = -1;
110+
if (arg.NameColon == null)
111+
index = i;
112+
else
113+
{
114+
string nameColonText = arg.NameColon.Name.ToString();
115+
for (int p = 0; p < parameters.Length; ++p)
116+
{
117+
if (parameters[p].Name == nameColonText)
118+
{
119+
index = p;
120+
break;
121+
}
122+
}
123+
}
124+
if (0 <= index && index < args.Length && args[index] == null)
125+
args[index] = arg;
126+
}
127+
return args;
128+
}
129+
130+
private static bool NameIsNameof(ExpressionSyntax exp)
131+
{
132+
var invokeExp = exp as InvocationExpressionSyntax;
133+
return invokeExp != null && invokeExp.Expression.ToString() == "nameof";
134+
}
135+
136+
private static bool IsGoodMessage(SyntaxNodeAnalysisContext context, ExpressionSyntax exp)
137+
{
138+
if (exp.IsKind(SyntaxKind.AddExpression))
139+
{
140+
// These sorts of string concatenation things always wind up being compile
141+
// time constants, from what I can tell from ildasm.
142+
var binExp = (BinaryExpressionSyntax)exp;
143+
return IsGoodMessage(context, binExp.Left) && IsGoodMessage(context, binExp.Right);
144+
}
145+
146+
if (exp.IsKind(SyntaxKind.SimpleMemberAccessExpression))
147+
{
148+
var access = (MemberAccessExpressionSyntax)exp;
149+
var field = context.SemanticModel.GetSymbolInfo(access).Symbol as IFieldSymbol;
150+
return field?.IsConst ?? false;
151+
}
152+
153+
if (exp.IsKind(SyntaxKind.InvocationExpression))
154+
return ((InvocationExpressionSyntax)exp).Expression.ToString() == "nameof";
155+
156+
return exp.IsKind(SyntaxKind.StringLiteralExpression) || exp.IsKind(SyntaxKind.IdentifierName);
157+
}
158+
159+
private static bool HasModelLoadContext(SyntaxNode node)
160+
{
161+
while (node != null && !node.IsKind(SyntaxKind.MethodDeclaration) && !node.IsKind(SyntaxKind.ConstructorDeclaration))
162+
node = node.Parent;
163+
if (node == null)
164+
return false;
165+
var enclosingParams = ((node as MethodDeclarationSyntax)?.ParameterList
166+
?? ((ConstructorDeclarationSyntax)node).ParameterList).Parameters;
167+
foreach (var param in enclosingParams)
168+
{
169+
// It is possible that this may mislead us slightly, since there could be another
170+
// unrelated type called ModelLoadContext, or someone could have type aliasing, or
171+
// some other complicating factor that will defeat this simple check. With some
172+
// additional computational load, we could access the semantic model for this.
173+
if (param.Type.ToString() == "ModelLoadContext")
174+
return true;
175+
}
176+
return false;
177+
}
178+
179+
private static void Analyze(SyntaxNodeAnalysisContext context)
180+
{
181+
var invocation = (InvocationExpressionSyntax)context.Node;
182+
if (!(invocation.Expression is MemberAccessExpressionSyntax access))
183+
return;
184+
var name = access.Name.ToString();
185+
// Do the quick checks first on the name.
186+
bool isCheck = false;
187+
bool isExcept = false;
188+
if ((!(isCheck = name.StartsWith("Check")) && !(isExcept = name.StartsWith("Except"))) || !_targetSet.Contains(name))
189+
return;
190+
// Now that we've verified we're approximately in the right neighborhood, do a more
191+
// in depth semantic analysis to verify we're targetting the right sort of object.
192+
var symbolInfo = context.SemanticModel.GetSymbolInfo(invocation);
193+
if (!(symbolInfo.Symbol is IMethodSymbol methodSymbol))
194+
return;
195+
var containingSymbolName = methodSymbol.ContainingSymbol.ToString();
196+
// The "internal" version is one used by some projects that want to benefit from Contracts,
197+
// but for some reason cannot reference MLCore.
198+
if (containingSymbolName != "Microsoft.ML.Runtime.Contracts" &&
199+
containingSymbolName != "Microsoft.ML.Runtime.Internal.Contracts")
200+
{
201+
return;
202+
}
203+
if (isExcept && invocation.Parent.IsKind(SyntaxKind.ExpressionStatement))
204+
{
205+
context.ReportDiagnostic(Diagnostic.Create(
206+
ExceptionDiagnostic.Rule, invocation.GetLocation(), name));
207+
}
208+
209+
var parameters = methodSymbol.Parameters;
210+
var args = ParallelArgs(parameters, invocation);
211+
212+
for (int i = 0; i < parameters.Length; ++i)
213+
{
214+
if (args[i] == null)
215+
continue;
216+
var arg = args[i];
217+
var parameter = parameters[i];
218+
219+
switch (parameter.Name)
220+
{
221+
case "paramName":
222+
case "name":
223+
if (!NameIsNameof(arg.Expression))
224+
{
225+
context.ReportDiagnostic(Diagnostic.Create(
226+
NameofDiagnostic.Rule, arg.GetLocation(), name, parameter.Name, arg.Expression));
227+
}
228+
break;
229+
case "msg":
230+
if (isCheck && !IsGoodMessage(context, arg.Expression))
231+
{
232+
context.ReportDiagnostic(Diagnostic.Create(
233+
SimpleMessageDiagnostic.Rule, arg.GetLocation(), name, arg.Expression));
234+
}
235+
if ((name == "CheckDecode" || name == "ExceptDecode") && HasModelLoadContext(invocation))
236+
{
237+
context.ReportDiagnostic(Diagnostic.Create(
238+
DecodeMessageWithLoadContextDiagnostic.Rule, arg.GetLocation(), name, arg.Expression));
239+
}
240+
break;
241+
default:
242+
break;
243+
}
244+
}
245+
}
246+
}
247+
}

0 commit comments

Comments
 (0)