Skip to content

Commit a80c5c5

Browse files
committed
feat: Add COREF for C family extractor source code
1 parent efcb9e0 commit a80c5c5

File tree

126 files changed

+54638
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

126 files changed

+54638
-0
lines changed
+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# We'll use defaults from the LLVM style, but with 4 columns indentation.
2+
BasedOnStyle: LLVM
3+
IndentWidth: 4
4+
## The column limit.
5+
## A column limit of 0 means that there is no column limit. In this case, clang-format will respect the input’s line breaking decisions within statements.
6+
ColumnLimit: 100
+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Specify a comma-separated list of positive and negative globs: positive globs add subsets of checks, while negative globs (prefixed with "-") remove them.
2+
# Current header guard does not follow preferred style [llvm-header-guard] so disable it
3+
Checks: '-*,clang-diagnostic-*,llvm-*,-llvm-header-guard,misc-*,-misc-unused-parameters,-misc-non-private-member-variables-in-classes,-misc-no-recursion,readability-identifier-naming'
4+
CheckOptions:
5+
- key: readability-identifier-naming.ClassCase
6+
value: CamelCase
7+
- key: readability-identifier-naming.EnumCase
8+
value: CamelCase
9+
- key: readability-identifier-naming.FunctionCase
10+
value: camelBack
11+
- key: readability-identifier-naming.MemberCase
12+
value: camelBack
13+
- key: readability-identifier-naming.PrivateMemberPrefix
14+
value: '_'
15+
- key: readability-identifier-naming.ProtectedMemberPrefix
16+
value: '_'
17+
- key: readability-identifier-naming.ParameterCase
18+
value: camelBack
19+
- key: readability-identifier-naming.UnionCase
20+
value: CamelCase
21+
- key: readability-identifier-naming.VariableCase
22+
value: camelBack
23+
- key: readability-identifier-naming.IgnoreMainLikeFunctions
24+
value: 1
25+
- key: readability-redundant-member-init.IgnoreBaseInCopyConstructors
26+
value: 1
27+
- key: modernize-use-default-member-init.UseAssignment
28+
value: 1

language/cfamily/extractor/.gitignore

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#OS X specific files.
2+
.DS_store
3+
4+
# Nested build directory
5+
/cmake-build-*
6+
/Tests/cmake-build-*
7+
8+
# VS2017 and VSCode config files.
9+
.vscode
10+
.vs
11+
12+
# CLion project configuration
13+
/.idea
14+
15+
# Ignore all bazel-* symlinks. There is no full list since this can change
16+
# based on the name of the directory bazel is cloned into.
17+
/bazel-*
+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
2+
3+
#include "ASTUtil.hpp"
4+
5+
using namespace llvm;
6+
7+
/// Get Root-Relative path of the given file
8+
/// \param absoluteFile
9+
/// \param relativeFile
10+
/// \return true if root-relative path solved, or false
11+
bool getRootRelativePath(StringRef &absolutePath, StringRef &relativePath) {
12+
SmallString<256> cwd;
13+
sys::fs::current_path(cwd);
14+
auto npos = cwd.rfind(sys::path::get_separator().data());
15+
if (absolutePath.contains(cwd.substr(0, npos))) {
16+
relativePath = absolutePath.substr(npos);
17+
return true;
18+
}
19+
relativePath = absolutePath;
20+
return false;
21+
}
+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
2+
3+
#ifndef COREF_CFAMILY_SRC_EXTRACTOR_ASTUTIL_HPP
4+
#define COREF_CFAMILY_SRC_EXTRACTOR_ASTUTIL_HPP
5+
6+
#include <clang/Tooling/Tooling.h>
7+
8+
using namespace llvm;
9+
10+
bool getRootRelativePath(StringRef &absolutePath, StringRef &relativePath);
11+
12+
#endif // COREF_CFAMILY_SRC_EXTRACTOR_ASTUTIL_HPP
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
2+
#ifndef COREF_CFAMILY_SRC_EXTRACTOR_COREFASTCONSUMER_HPP
3+
#define COREF_CFAMILY_SRC_EXTRACTOR_COREFASTCONSUMER_HPP
4+
5+
#include "ASTUtil.hpp"
6+
#include "CorefASTVisitor.hpp"
7+
#include <clang/Tooling/Tooling.h>
8+
#include <llvm/Support/Regex.h>
9+
10+
using namespace llvm;
11+
12+
namespace coref {
13+
14+
class CorefASTConsumer : public clang::ASTConsumer {
15+
private:
16+
const CorefUri _corefUri;
17+
std::set<clang::FileID> _visitedFileIds;
18+
std::unique_ptr<llvm::Regex> _blacklistDirFilter;
19+
20+
inline bool isInBlackListDir(StringRef absolutePath) {
21+
return _blacklistDirFilter && _blacklistDirFilter->match(absolutePath);
22+
}
23+
24+
protected:
25+
/// An override HandleTranslationUnit
26+
/// This method is called when the ASTs for entire translation unit have
27+
/// been parsed. \param astContext
28+
void HandleTranslationUnit(clang::ASTContext &astContext) final {
29+
coref::StorageFacade::transaction([&]() mutable {
30+
// insert entry for Program table, entry could be existed already.
31+
auto programOid = CorefUri::generateCorpusOId(_corefUri.getCorpus());
32+
coref::StorageFacade::insertClassObj(Program{programOid, _corefUri.getCorpus()});
33+
34+
std::unordered_map<CorefOid, File> newVisitFileMap{};
35+
coref::CorefASTVisitor visitor(astContext, _corefUri, programOid, newVisitFileMap);
36+
37+
auto decls = astContext.getTranslationUnitDecl()->decls();
38+
auto &sourceMngr = astContext.getSourceManager();
39+
for (auto &decl : decls) {
40+
auto curFileId = sourceMngr.getFileID(decl->getLocation());
41+
42+
if (_visitedFileIds.find(curFileId) != _visitedFileIds.end()) {
43+
// skip visited files
44+
continue;
45+
}
46+
47+
// skip AST nodes having invalid source location
48+
if (!decl->getLocation().isValid()) {
49+
continue;
50+
}
51+
52+
// todo: need to verify the accuracy of function
53+
// "isInSystemHeader" & "isInSystemMacro"
54+
if (sourceMngr.isInSystemHeader(decl->getLocation()) ||
55+
sourceMngr.isInSystemMacro(decl->getLocation())) {
56+
// skip AST nodes in system headers
57+
_visitedFileIds.insert(curFileId);
58+
continue;
59+
}
60+
61+
StringRef absolutePath = sourceMngr.getFilename(decl->getLocation());
62+
if (absolutePath.empty())
63+
continue;
64+
if (isInBlackListDir(absolutePath)) {
65+
_visitedFileIds.insert(curFileId);
66+
continue;
67+
}
68+
69+
StringRef relativePath;
70+
getRootRelativePath(absolutePath, relativePath);
71+
// note: relativePath would be an absolute path when handling a
72+
// framework file.
73+
74+
auto fileOid =
75+
CorefUri::generateFileOId(_corefUri.getCorpus(), std::string(relativePath));
76+
if (coref::StorageFacade::checkFileObjExist(programOid, fileOid)) {
77+
// skip file that have been added in Sqlite DB
78+
_visitedFileIds.insert(curFileId);
79+
continue;
80+
}
81+
82+
if (newVisitFileMap.find(fileOid) == newVisitFileMap.end()) {
83+
File file{fileOid, std::string(relativePath),
84+
std::string(sys::path::extension(relativePath)),
85+
std::string(sys::path::filename(relativePath)), programOid};
86+
newVisitFileMap.insert({fileOid, std::move(file)});
87+
}
88+
visitor.setExtractFileOid(fileOid);
89+
visitor.TraverseDecl(decl);
90+
}
91+
92+
// traverse the newFileStruct and update the File table
93+
for (auto &[fileOid, f] : newVisitFileMap) {
94+
coref::StorageFacade::insertClassObj(std::move(f));
95+
}
96+
97+
return true;
98+
});
99+
}
100+
101+
public:
102+
CorefASTConsumer(const CorefUri &corefUri, std::vector<std::string> &blacklistDir)
103+
: _corefUri(corefUri) {
104+
std::stringstream regexStr;
105+
bool first = true;
106+
for (auto dir : blacklistDir) {
107+
if (first) {
108+
regexStr << "(" << dir << ")";
109+
first = false;
110+
} else {
111+
regexStr << "|(" << dir << ")";
112+
}
113+
}
114+
_blacklistDirFilter = std::make_unique<llvm::Regex>(regexStr.str());
115+
_blacklistDirFilter->isValid();
116+
};
117+
};
118+
} // namespace coref
119+
120+
#endif // COREF_CFAMILY_SRC_EXTRACTOR_COREFASTCONSUMER_HPP
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
2+
#ifndef COREF_CFAMILY_SRC_EXTRACTOR_COREFASTFRONTENDACTION_HPP
3+
#define COREF_CFAMILY_SRC_EXTRACTOR_COREFASTFRONTENDACTION_HPP
4+
5+
#include "../Coref/CorefUri.hpp"
6+
#include "CorefASTConsumer.hpp"
7+
#include <clang/Tooling/Tooling.h>
8+
#include <filesystem>
9+
10+
namespace coref {
11+
12+
class CorefASTFrontendAction : public clang::ASTFrontendAction {
13+
private:
14+
std::string _corpus;
15+
std::vector<std::string>& _blacklistDir;
16+
17+
public:
18+
/// Constructor of CorefASTFrontendAction Class
19+
/// \param corpusName
20+
explicit CorefASTFrontendAction(const std::string &corpusName, std::vector<std::string>& blacklistDir)
21+
: clang::ASTFrontendAction(), _corpus(corpusName), _blacklistDir(blacklistDir) {}
22+
23+
protected:
24+
/// An override of CreateASTConsumer
25+
/// \param ci
26+
/// \param input_file
27+
/// \return
28+
std::unique_ptr<clang::ASTConsumer> CreateASTConsumer(clang::CompilerInstance &ci,
29+
llvm::StringRef inputFile) override {
30+
// TODO fill in corpus info
31+
auto absoluteInputFile = std::filesystem::absolute(inputFile.str());
32+
auto corefUri = CorefUri(_corpus, absoluteInputFile.string());
33+
llvm::outs() << "Consuming file: " << absoluteInputFile.string()
34+
<< " of corpus: " << _corpus << "\n";
35+
return std::unique_ptr<clang::ASTConsumer>(new coref::CorefASTConsumer(corefUri, _blacklistDir));
36+
}
37+
};
38+
39+
struct CorefFrontendActionFactory : public clang::tooling::FrontendActionFactory {
40+
/// Constructor of CorefFrontendActionFactory Class
41+
/// \param corpusName
42+
explicit CorefFrontendActionFactory(const std::string &corpusName, std::vector<std::string>& blacklistDir)
43+
: clang::tooling::FrontendActionFactory(), _corpusName(corpusName), _blacklistDir(blacklistDir) {}
44+
45+
/// a Factory method to create clang::FrontendAction
46+
/// \return
47+
std::unique_ptr<clang::FrontendAction> create() override {
48+
auto *action =
49+
dynamic_cast<clang::FrontendAction *>(new CorefASTFrontendAction(_corpusName, _blacklistDir));
50+
return std::unique_ptr<clang::FrontendAction>(action);
51+
}
52+
53+
private:
54+
std::string _corpusName;
55+
std::vector<std::string>& _blacklistDir;
56+
};
57+
58+
} // namespace coref
59+
60+
#endif // COREF_CFAMILY_SRC_EXTRACTOR_COREFASTFRONTENDACTION_HPP

0 commit comments

Comments
 (0)