Skip to content

Commit d67ec24

Browse files
author
Eric Liu
committed
[clangd] Filter out private proto symbols in SymbolCollector.
Summary: This uses heuristics to identify private proto symbols. For example, top-level symbols whose name contains "_" are considered private. These symbols are not expected to be used by users. Reviewers: ilya-biryukov, malaperle Reviewed By: ilya-biryukov Subscribers: sammccall, klimek, MaskRay, jkorous, cfe-commits Differential Revision: https://reviews.llvm.org/D46751 llvm-svn: 332456
1 parent c922e07 commit d67ec24

File tree

2 files changed

+78
-0
lines changed

2 files changed

+78
-0
lines changed

clang-tools-extra/clangd/index/SymbolCollector.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,46 @@ llvm::Optional<std::string> toURI(const SourceManager &SM, StringRef Path,
8989
return llvm::None;
9090
}
9191

92+
// All proto generated headers should start with this line.
93+
static const char *PROTO_HEADER_COMMENT =
94+
"// Generated by the protocol buffer compiler. DO NOT EDIT!";
95+
96+
// Checks whether the decl is a private symbol in a header generated by
97+
// protobuf compiler.
98+
// To identify whether a proto header is actually generated by proto compiler,
99+
// we check whether it starts with PROTO_HEADER_COMMENT.
100+
// FIXME: make filtering extensible when there are more use cases for symbol
101+
// filters.
102+
bool isPrivateProtoDecl(const NamedDecl &ND) {
103+
const auto &SM = ND.getASTContext().getSourceManager();
104+
auto Loc = findNameLoc(&ND);
105+
auto FileName = SM.getFilename(Loc);
106+
if (!FileName.endswith(".proto.h") && !FileName.endswith(".pb.h"))
107+
return false;
108+
auto FID = SM.getFileID(Loc);
109+
// Double check that this is an actual protobuf header.
110+
if (!SM.getBufferData(FID).startswith(PROTO_HEADER_COMMENT))
111+
return false;
112+
113+
// ND without identifier can be operators.
114+
if (ND.getIdentifier() == nullptr)
115+
return false;
116+
auto Name = ND.getIdentifier()->getName();
117+
if (!Name.contains('_'))
118+
return false;
119+
// Nested proto entities (e.g. Message::Nested) have top-level decls
120+
// that shouldn't be used (Message_Nested). Ignore them completely.
121+
// The nested entities are dangling type aliases, we may want to reconsider
122+
// including them in the future.
123+
// For enum constants, SOME_ENUM_CONSTANT is not private and should be
124+
// indexed. Outer_INNER is private. This heuristic relies on naming style, it
125+
// will include OUTER_INNER and exclude some_enum_constant.
126+
// FIXME: the heuristic relies on naming style (i.e. no underscore in
127+
// user-defined names) and can be improved.
128+
return (ND.getKind() != Decl::EnumConstant) ||
129+
std::any_of(Name.begin(), Name.end(), islower);
130+
}
131+
92132
bool shouldFilterDecl(const NamedDecl *ND, ASTContext *ASTCtx,
93133
const SymbolCollector::Options &Opts) {
94134
using namespace clang::ast_matchers;
@@ -129,6 +169,9 @@ bool shouldFilterDecl(const NamedDecl *ND, ASTContext *ASTCtx,
129169
.empty())
130170
return true;
131171

172+
// Avoid indexing internal symbols in protobuf generated headers.
173+
if (isPrivateProtoDecl(*ND))
174+
return true;
132175
return false;
133176
}
134177

clang-tools-extra/unittests/clangd/SymbolCollectorTests.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -697,6 +697,41 @@ TEST_F(SymbolCollectorTest, UTF16Character) {
697697
AllOf(QName("pörk"), DeclRange(Header.range()))));
698698
}
699699

700+
TEST_F(SymbolCollectorTest, FilterPrivateProtoSymbols) {
701+
TestHeaderName = testPath("x.proto.h");
702+
const std::string Header =
703+
R"(// Generated by the protocol buffer compiler. DO NOT EDIT!
704+
namespace nx {
705+
class Top_Level {};
706+
class TopLevel {};
707+
enum Kind {
708+
KIND_OK,
709+
Kind_Not_Ok,
710+
};
711+
bool operator<(const TopLevel &, const TopLevel &);
712+
})";
713+
runSymbolCollector(Header, /*Main=*/"");
714+
EXPECT_THAT(Symbols,
715+
UnorderedElementsAre(QName("nx"), QName("nx::TopLevel"),
716+
QName("nx::Kind"), QName("nx::KIND_OK"),
717+
QName("nx::operator<")));
718+
}
719+
720+
TEST_F(SymbolCollectorTest, DoubleCheckProtoHeaderComment) {
721+
TestHeaderName = testPath("x.proto.h");
722+
const std::string Header = R"(
723+
namespace nx {
724+
class Top_Level {};
725+
enum Kind {
726+
Kind_Fine
727+
};
728+
}
729+
)";
730+
runSymbolCollector(Header, /*Main=*/"");
731+
EXPECT_THAT(Symbols,
732+
UnorderedElementsAre(QName("nx"), QName("nx::Top_Level"),
733+
QName("nx::Kind"), QName("nx::Kind_Fine")));
734+
}
700735

701736
} // namespace
702737
} // namespace clangd

0 commit comments

Comments
 (0)