Skip to content

Commit 4c6aa8f

Browse files
[clang][ssaf] Introduce entity abstraction for SSAF (#169131)
Add core abstractions for identifying program entities across compilation and link unit boundaries in the Scalable Static Analysis Framework (SSAF). Introduces three key components: - BuildNamespace: Represents build artifacts (compilation units, link units) - EntityName: Globally unique entity identifiers across compilation boundaries - AST mapping: Functions to map Clang AST declarations to EntityNames Entity identification uses Unified Symbol Resolution (USR) as the underlying mechanism, with extensions for sub-entities (parameters, return values) via suffixes. The abstraction allows whole-program analysis by providing stable identifiers that persist across separately compiled translation units.
1 parent 1267488 commit 4c6aa8f

File tree

13 files changed

+966
-0
lines changed

13 files changed

+966
-0
lines changed
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
//===- ASTEntityMapping.h - AST to SSAF Entity mapping ----------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_CLANG_ANALYSIS_SCALABLE_ASTENTITYMAPPING_H
10+
#define LLVM_CLANG_ANALYSIS_SCALABLE_ASTENTITYMAPPING_H
11+
12+
#include "clang/AST/Decl.h"
13+
#include "clang/Analysis/Scalable/Model/EntityName.h"
14+
#include "llvm/ADT/StringRef.h"
15+
#include <optional>
16+
17+
namespace clang::ssaf {
18+
19+
/// Maps a declaration to an EntityName.
20+
///
21+
/// Supported declaration types for entity mapping:
22+
/// - Functions and methods
23+
/// - Global Variables
24+
/// - Function parameters
25+
/// - Struct/class/union type definitions
26+
/// - Struct/class/union fields
27+
///
28+
/// Implicit declarations and compiler builtins are not mapped.
29+
///
30+
/// \param D The declaration to map. Must not be null.
31+
///
32+
/// \return An EntityName if the declaration can be mapped, std::nullopt
33+
/// otherwise.
34+
std::optional<EntityName> getEntityName(const Decl *D);
35+
36+
/// Maps return entity of a function to an EntityName.
37+
/// The returned name uniquely identifies the return value of function \param
38+
/// FD.
39+
///
40+
/// \param FD The function declaration. Must not be null.
41+
///
42+
/// \return An EntityName for the function's return entity.
43+
std::optional<EntityName> getEntityNameForReturn(const FunctionDecl *FD);
44+
45+
} // namespace clang::ssaf
46+
47+
#endif // LLVM_CLANG_ANALYSIS_SCALABLE_ASTENTITYMAPPING_H
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
//===- BuildNamespace.h -----------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file defines BuildNamespace and NestedBuildNamespace classes that
10+
// represent build namespaces in the Scalable Static Analysis Framework.
11+
//
12+
// Build namespaces provide an abstraction for grouping program entities (such
13+
// as those in a shared library or compilation unit) to enable analysis of
14+
// software projects constructed from individual components.
15+
//
16+
//===----------------------------------------------------------------------===//
17+
18+
#ifndef LLVM_CLANG_ANALYSIS_SCALABLE_MODEL_BUILDNAMESPACE_H
19+
#define LLVM_CLANG_ANALYSIS_SCALABLE_MODEL_BUILDNAMESPACE_H
20+
21+
#include "llvm/ADT/STLExtras.h"
22+
#include "llvm/ADT/StringRef.h"
23+
#include <optional>
24+
#include <string>
25+
#include <vector>
26+
27+
namespace clang::ssaf {
28+
29+
enum class BuildNamespaceKind : unsigned short { CompilationUnit, LinkUnit };
30+
31+
llvm::StringRef toString(BuildNamespaceKind BNK);
32+
33+
std::optional<BuildNamespaceKind> parseBuildNamespaceKind(llvm::StringRef Str);
34+
35+
/// Represents a single namespace in the build process.
36+
///
37+
/// A BuildNamespace groups program entities, such as those belonging to a
38+
/// compilation unit or link unit (e.g., a shared library). Each namespace has a
39+
/// kind (CompilationUnit or LinkUnit) and a unique identifier name within that
40+
/// kind.
41+
///
42+
/// BuildNamespaces can be composed into NestedBuildNamespace to represent
43+
/// hierarchical namespace structures that model how software is constructed
44+
/// from its components.
45+
class BuildNamespace {
46+
BuildNamespaceKind Kind;
47+
std::string Name;
48+
49+
auto asTuple() const { return std::tie(Kind, Name); }
50+
51+
public:
52+
BuildNamespace(BuildNamespaceKind Kind, llvm::StringRef Name)
53+
: Kind(Kind), Name(Name.str()) {}
54+
55+
/// Creates a BuildNamespace representing a compilation unit.
56+
///
57+
/// \param CompilationId The unique identifier for the compilation unit.
58+
/// \returns A BuildNamespace with CompilationUnit kind.
59+
static BuildNamespace makeCompilationUnit(llvm::StringRef CompilationId);
60+
61+
bool operator==(const BuildNamespace &Other) const;
62+
bool operator!=(const BuildNamespace &Other) const;
63+
bool operator<(const BuildNamespace &Other) const;
64+
65+
friend class SerializationFormat;
66+
};
67+
68+
/// Represents a hierarchical sequence of build namespaces.
69+
///
70+
/// A NestedBuildNamespace captures namespace qualification for program entities
71+
/// by maintaining an ordered sequence of BuildNamespace steps. This models how
72+
/// entities are organized through multiple steps of the build process, such as
73+
/// first being part of a compilation unit, then incorporated into a link unit.
74+
///
75+
/// For example, an entity might be qualified by a compilation unit namespace
76+
/// followed by a shared library namespace.
77+
class NestedBuildNamespace {
78+
friend class SerializationFormat;
79+
80+
std::vector<BuildNamespace> Namespaces;
81+
82+
public:
83+
NestedBuildNamespace() = default;
84+
85+
explicit NestedBuildNamespace(const std::vector<BuildNamespace> &Namespaces)
86+
: Namespaces(Namespaces) {}
87+
88+
explicit NestedBuildNamespace(const BuildNamespace &N) {
89+
Namespaces.push_back(N);
90+
}
91+
92+
/// Creates a NestedBuildNamespace representing a compilation unit.
93+
///
94+
/// \param CompilationId The unique identifier for the compilation unit.
95+
/// \returns A NestedBuildNamespace containing a single CompilationUnit
96+
/// BuildNamespace.
97+
static NestedBuildNamespace
98+
makeCompilationUnit(llvm::StringRef CompilationId);
99+
100+
/// Creates a new NestedBuildNamespace by appending additional namespace.
101+
///
102+
/// \param Namespace The namespace to append.
103+
NestedBuildNamespace makeQualified(NestedBuildNamespace Namespace) const {
104+
auto Copy = *this;
105+
Copy.Namespaces.reserve(Copy.Namespaces.size() +
106+
Namespace.Namespaces.size());
107+
llvm::append_range(Copy.Namespaces, Namespace.Namespaces);
108+
return Copy;
109+
}
110+
111+
bool empty() const;
112+
113+
bool operator==(const NestedBuildNamespace &Other) const;
114+
bool operator!=(const NestedBuildNamespace &Other) const;
115+
bool operator<(const NestedBuildNamespace &Other) const;
116+
117+
friend class JSONWriter;
118+
friend class LinkUnitResolution;
119+
};
120+
121+
} // namespace clang::ssaf
122+
123+
#endif // LLVM_CLANG_ANALYSIS_SCALABLE_MODEL_BUILDNAMESPACE_H
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
//===- EntityName.h ---------------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_CLANG_ANALYSIS_SCALABLE_MODEL_ENTITYNAME_H
10+
#define LLVM_CLANG_ANALYSIS_SCALABLE_MODEL_ENTITYNAME_H
11+
12+
#include "clang/Analysis/Scalable/Model/BuildNamespace.h"
13+
#include "llvm/ADT/SmallString.h"
14+
#include "llvm/ADT/StringRef.h"
15+
#include <string>
16+
17+
namespace clang::ssaf {
18+
/// Uniquely identifies an entity in a program.
19+
///
20+
/// EntityName provides a globally unique identifier for program entities that
21+
/// remains stable across compilation boundaries. This enables whole-program
22+
/// analysis to track and relate entities across separately compiled translation
23+
/// units.
24+
///
25+
/// Client code should not make assumptions about the implementation details,
26+
/// such as USRs.
27+
class EntityName {
28+
std::string USR;
29+
llvm::SmallString<16> Suffix;
30+
NestedBuildNamespace Namespace;
31+
32+
auto asTuple() const { return std::tie(USR, Suffix, Namespace); }
33+
34+
public:
35+
/// Client code should not use this constructor directly.
36+
/// Use getEntityName and other functions in ASTEntityMapping.h to get
37+
/// entity names.
38+
EntityName(llvm::StringRef USR, llvm::StringRef Suffix,
39+
NestedBuildNamespace Namespace);
40+
41+
bool operator==(const EntityName &Other) const;
42+
bool operator!=(const EntityName &Other) const;
43+
bool operator<(const EntityName &Other) const;
44+
45+
/// Creates a new EntityName with additional build namespace qualification.
46+
///
47+
/// \param Namespace The namespace steps to append to this entity's namespace.
48+
EntityName makeQualified(NestedBuildNamespace Namespace) const;
49+
50+
friend class LinkUnitResolution;
51+
friend class SerializationFormat;
52+
};
53+
54+
} // namespace clang::ssaf
55+
56+
#endif // LLVM_CLANG_ANALYSIS_SCALABLE_MODEL_ENTITYNAME_H

clang/lib/Analysis/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,4 @@ add_clang_library(clangAnalysis
5050
add_subdirectory(plugins)
5151
add_subdirectory(FlowSensitive)
5252
add_subdirectory(LifetimeSafety)
53+
add_subdirectory(Scalable)
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
//===- ASTMapping.cpp - AST to SSAF Entity mapping --------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file implements utilities for mapping AST declarations to SSAF entities.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "clang/Analysis/Scalable/ASTEntityMapping.h"
14+
#include "clang/AST/Decl.h"
15+
#include "clang/Analysis/Scalable/Model/BuildNamespace.h"
16+
#include "clang/Index/USRGeneration.h"
17+
#include "llvm/ADT/SmallString.h"
18+
19+
namespace clang::ssaf {
20+
21+
std::optional<EntityName> getEntityName(const Decl *D) {
22+
if (!D)
23+
return std::nullopt;
24+
25+
if (D->isImplicit())
26+
return std::nullopt;
27+
28+
if (isa<FunctionDecl>(D) && cast<FunctionDecl>(D)->getBuiltinID())
29+
return std::nullopt;
30+
31+
if (!isa<FunctionDecl, ParmVarDecl, VarDecl, FieldDecl, RecordDecl>(D))
32+
return std::nullopt;
33+
34+
llvm::SmallString<16> Suffix;
35+
const Decl *USRDecl = D;
36+
37+
// For parameters, use the parent function's USR with parameter index as
38+
// suffix
39+
if (const auto *PVD = dyn_cast<ParmVarDecl>(D)) {
40+
const auto *FD =
41+
dyn_cast_or_null<FunctionDecl>(PVD->getParentFunctionOrMethod());
42+
if (!FD)
43+
return std::nullopt;
44+
USRDecl = FD;
45+
46+
const auto ParamIdx = PVD->getFunctionScopeIndex();
47+
llvm::raw_svector_ostream OS(Suffix);
48+
// Parameter uses function's USR with 1-based index as suffix
49+
OS << (ParamIdx + 1);
50+
}
51+
52+
llvm::SmallString<128> USRBuf;
53+
if (clang::index::generateUSRForDecl(USRDecl, USRBuf))
54+
return std::nullopt;
55+
56+
if (USRBuf.empty())
57+
return std::nullopt;
58+
59+
return EntityName(USRBuf.str(), Suffix, {});
60+
}
61+
62+
std::optional<EntityName> getEntityNameForReturn(const FunctionDecl *FD) {
63+
if (!FD)
64+
return std::nullopt;
65+
66+
if (FD->isImplicit())
67+
return std::nullopt;
68+
69+
if (FD->getBuiltinID())
70+
return std::nullopt;
71+
72+
llvm::SmallString<128> USRBuf;
73+
if (clang::index::generateUSRForDecl(FD, USRBuf)) {
74+
return std::nullopt;
75+
}
76+
77+
if (USRBuf.empty())
78+
return std::nullopt;
79+
80+
return EntityName(USRBuf.str(), /*Suffix=*/"0", /*Namespace=*/{});
81+
}
82+
83+
} // namespace clang::ssaf
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
set(LLVM_LINK_COMPONENTS
2+
Support
3+
)
4+
5+
add_clang_library(clangAnalysisScalable
6+
ASTEntityMapping.cpp
7+
Model/BuildNamespace.cpp
8+
Model/EntityName.cpp
9+
10+
LINK_LIBS
11+
clangAST
12+
clangASTMatchers
13+
clangBasic
14+
clangIndex
15+
clangLex
16+
clangFrontend
17+
18+
DEPENDS
19+
)

0 commit comments

Comments
 (0)