ACAV f0ba4b7c9529
Abstract Syntax Tree (AST) visualization tool for C, C++, and Objective-C
Loading...
Searching...
No Matches
ClangUtils.cpp
Go to the documentation of this file.
1/*$!{
2* Aurora Clang AST Viewer (ACAV)
3*
4* Copyright (c) 2026 Min Liu
5* Copyright (c) 2026 Michael David Adams
6*
7* SPDX-License-Identifier: GPL-2.0-or-later
8*
9* This program is free software; you can redistribute it and/or modify
10* it under the terms of the GNU General Public License as published by
11* the Free Software Foundation; either version 2 of the License, or
12* (at your option) any later version.
13*
14* This program is distributed in the hope that it will be useful,
15* but WITHOUT ANY WARRANTY; without even the implied warranty of
16* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17* GNU General Public License for more details.
18*
19* You should have received a copy of the GNU General Public License along
20* with this program; if not, see <https://www.gnu.org/licenses/>.
21}$!*/
22
25
26#include "common/ClangUtils.h"
28#include <algorithm>
29#include <array>
30#include <cctype>
31#include <clang/Basic/DiagnosticIDs.h>
32#include <clang/Basic/DiagnosticOptions.h>
33#include <clang/Basic/SourceLocation.h>
34#include <clang/Basic/SourceManager.h>
35#include <cstdlib>
36#if LLVM_VERSION_MAJOR >= 22
37#include <clang/Driver/CreateASTUnitFromArgs.h>
38#endif
39#include <clang/Frontend/ASTUnit.h>
40#include <clang/Frontend/PCHContainerOperations.h>
41#include <clang/Lex/HeaderSearchOptions.h>
42#include <clang/Tooling/CompilationDatabase.h>
43#include <clang/Tooling/JSONCompilationDatabase.h>
44#include <clang/Tooling/Tooling.h>
45#include <llvm/ADT/ScopeExit.h>
46#include <llvm/ADT/StringExtras.h>
47#include <llvm/Config/llvm-config.h>
48#include <llvm/Support/FileSystem.h>
49#include <llvm/Support/MemoryBuffer.h>
50#include <llvm/Support/Path.h>
51#include <llvm/Support/Program.h>
52#include <llvm/Support/raw_ostream.h>
53#include <map>
54#include <optional>
55#include <utility>
56
57namespace ct = clang::tooling;
58
59namespace acav {
60
61namespace {
62
63void emitStructuredMessage(const std::string &level,
64 const std::string &message) {
65 llvm::errs() << acav::logfmt::formatDiagnosticLine(level, "", 0, 0, message)
66 << "\n";
67}
68
71std::string getExecutableDir() {
72 // Use LLVM's getMainExecutable with a dummy function address
73 // This works on all platforms (Linux, macOS, Windows)
74 std::string execPath = llvm::sys::fs::getMainExecutable(
75 "acav", reinterpret_cast<void *>(&getExecutableDir));
76
77 if (execPath.empty()) {
78 return "";
79 }
80
81 llvm::SmallString<256> execDir(execPath);
82 llvm::sys::path::remove_filename(execDir);
83 return std::string(execDir);
84}
85
89std::optional<std::string> findBundledResourceDir() {
90 std::string execDir = getExecutableDir();
91 if (execDir.empty()) {
92 return std::nullopt;
93 }
94
95 // Build path: <exec_dir>/../lib/clang/<version>/
96 llvm::SmallString<256> resourcePath(execDir);
97 llvm::sys::path::append(resourcePath, "..", "lib", "clang",
98 std::to_string(LLVM_VERSION_MAJOR));
99
100 // Normalize the path (resolve .. components)
101 llvm::SmallString<256> normalizedPath;
102 if (std::error_code ec =
103 llvm::sys::fs::real_path(resourcePath, normalizedPath)) {
104 // Path doesn't exist or can't be resolved
105 return std::nullopt;
106 }
107
108 return std::string(normalizedPath);
109}
110
111bool isValidClangResourceDir(llvm::StringRef resourceDir) {
112 if (resourceDir.empty() || !llvm::sys::fs::exists(resourceDir)) {
113 return false;
114 }
115
116 llvm::SmallString<256> includeDir(resourceDir);
117 llvm::sys::path::append(includeDir, "include");
118 if (!llvm::sys::fs::exists(includeDir)) {
119 return false;
120 }
121
122 llvm::SmallString<256> stddefPath(includeDir);
123 llvm::sys::path::append(stddefPath, "stddef.h");
124 return llvm::sys::fs::exists(stddefPath);
125}
126
127std::optional<std::string> readFileToString(const std::string &path) {
128 auto bufferOrErr = llvm::MemoryBuffer::getFile(path);
129 if (!bufferOrErr) {
130 return std::nullopt;
131 }
132 return (*bufferOrErr)->getBuffer().str();
133}
134
135std::optional<std::string>
136runProgramCaptureStdout(llvm::StringRef program,
137 llvm::ArrayRef<llvm::StringRef> args) {
138 llvm::SmallString<256> stdoutPath;
139 if (llvm::sys::fs::createTemporaryFile("acav", "stdout", stdoutPath)) {
140 return std::nullopt;
141 }
142
143 std::string errMsg;
144 bool executionFailed = false;
145 std::array<std::optional<llvm::StringRef>, 3> redirects = {
146 std::nullopt, llvm::StringRef(stdoutPath),
147 llvm::StringRef("") // portable /dev/null for stderr
148 };
149
150 int rc = llvm::sys::ExecuteAndWait(program, args, std::nullopt, redirects, 0,
151 0, &errMsg, &executionFailed);
152#if LLVM_VERSION_MAJOR >= 22
153 llvm::scope_exit cleanup([&]() { (void)llvm::sys::fs::remove(stdoutPath); });
154#else
155 auto cleanup =
156 llvm::make_scope_exit([&]() { (void)llvm::sys::fs::remove(stdoutPath); });
157#endif
158
159 if (executionFailed || rc != 0) {
160 return std::nullopt;
161 }
162
163 auto output = readFileToString(stdoutPath.str().str());
164 if (!output) {
165 return std::nullopt;
166 }
167 return *output;
168}
169
170std::optional<std::string> runClangWithArg(llvm::StringRef programPath,
171 llvm::StringRef arg) {
172 llvm::StringRef progName = llvm::sys::path::filename(programPath);
173 std::array<llvm::StringRef, 2> args = {progName, arg};
174 return runProgramCaptureStdout(programPath, args);
175}
176
177int parseClangMajorVersion(const std::string &versionOutput) {
178 // Typical outputs include:
179 // "clang version 21.1.8 ..."
180 // "Apple clang version 15.0.0 ..."
181 // We parse the first integer following the token "version".
182 constexpr llvm::StringLiteral versionToken("version");
183 std::size_t pos = versionOutput.find(versionToken.data());
184 if (pos == std::string::npos) {
185 return -1;
186 }
187 pos += versionToken.size();
188 while (pos < versionOutput.size() &&
189 (versionOutput[pos] == ' ' || versionOutput[pos] == '\t')) {
190 ++pos;
191 }
192 std::size_t start = pos;
193 while (pos < versionOutput.size() && std::isdigit(versionOutput[pos])) {
194 ++pos;
195 }
196 if (start == pos) {
197 return -1;
198 }
199 return std::atoi(versionOutput.substr(start, pos - start).c_str());
200}
201
202std::optional<std::string>
203getClangResourceDirFromProgram(llvm::StringRef programPath) {
204 auto output = runClangWithArg(programPath, "-print-resource-dir");
205 if (!output) {
206 return std::nullopt;
207 }
208 std::string resourceDir =
209 llvm::StringRef(*output).split('\n').first.trim().str();
210 if (!isValidClangResourceDir(resourceDir)) {
211 return std::nullopt;
212 }
213 return resourceDir;
214}
215
216std::optional<int> getClangProgramMajorVersion(llvm::StringRef programPath) {
217 auto output = runClangWithArg(programPath, "--version");
218 if (!output) {
219 return std::nullopt;
220 }
221 int major = parseClangMajorVersion(*output);
222 if (major <= 0) {
223 return std::nullopt;
224 }
225 return major;
226}
227
228std::vector<std::string>
229stripResourceDirArgs(const std::vector<std::string> &commandLine) {
230 std::vector<std::string> stripped;
231 stripped.reserve(commandLine.size());
232
233 for (std::size_t i = 0; i < commandLine.size(); ++i) {
234 const std::string &arg = commandLine[i];
235 if (arg == "-resource-dir") {
236 if (i + 1 < commandLine.size()) {
237 ++i;
238 }
239 continue;
240 }
241 if (llvm::StringRef(arg).starts_with("-resource-dir=")) {
242 continue;
243 }
244 stripped.push_back(arg);
245 }
246
247 return stripped;
248}
249
250class CallbackDiagnosticConsumer : public clang::DiagnosticConsumer {
251public:
252 explicit CallbackDiagnosticConsumer(DiagnosticCallback callback)
253 : callback_(std::move(callback)) {}
254
255 void HandleDiagnostic(clang::DiagnosticsEngine::Level level,
256 const clang::Diagnostic &info) override {
257 if (!callback_) {
258 return;
259 }
260
261 llvm::SmallString<256> message;
262 info.FormatDiagnostic(message);
263
264 DiagnosticMessage diag;
265 diag.level = level;
266 diag.message = message.str().str();
267
268 clang::SourceLocation loc = info.getLocation();
269 if (loc.isValid()) {
270 const clang::SourceManager &sm = info.getSourceManager();
271 clang::FullSourceLoc fullLoc(loc, sm);
272 if (fullLoc.isValid()) {
273 const llvm::StringRef filename =
274 sm.getFilename(fullLoc.getSpellingLoc());
275 if (!filename.empty()) {
276 diag.file = filename.str();
277 }
278 diag.line = fullLoc.getSpellingLineNumber();
279 diag.column = fullLoc.getSpellingColumnNumber();
280 }
281 }
282
283 callback_(diag);
284 }
285
286private:
287 DiagnosticCallback callback_;
288};
289
290// Create diagnostic consumer with callback or fallback to structured logging
291clang::DiagnosticConsumer *
292createDiagnosticConsumer(const DiagnosticCallback &callback) {
293 if (callback) {
294 return new CallbackDiagnosticConsumer(callback);
295 }
296 auto structuredFallback = [](const DiagnosticMessage &diag) {
297 llvm::errs() << acav::logfmt::formatDiagnosticLine(diag.level, diag.file,
298 diag.line, diag.column,
299 diag.message)
300 << "\n";
301 };
302 return new CallbackDiagnosticConsumer(structuredFallback);
303}
304
305} // namespace
306
307// Forward declaration for internal helper function
308static std::map<std::string, std::string>
309extractModuleFileMappings(const std::string &compilationDb,
310 const std::string &sourcePath);
311
312std::string getClangResourceDir(const std::string &overrideResourceDir) {
313 // 1. Check explicit override first
314 if (!overrideResourceDir.empty()) {
315 if (!isValidClangResourceDir(overrideResourceDir)) {
316 emitStructuredMessage("error", "[clang] Invalid override resource dir: " +
317 overrideResourceDir);
318 return "";
319 }
320 emitStructuredMessage("info", "[clang] Using override resource dir: " +
321 overrideResourceDir);
322 return overrideResourceDir;
323 }
324
325 // 2. Check for bundled resource directory (../lib/clang/<version>/)
326 // This is the preferred method for release builds where dependencies
327 // are bundled with the executable
328 if (auto bundledDir = findBundledResourceDir()) {
329 if (isValidClangResourceDir(*bundledDir)) {
330 emitStructuredMessage("info", "[clang] Using bundled resource dir: " +
331 *bundledDir);
332 return *bundledDir;
333 }
334 emitStructuredMessage("debug",
335 "[clang] Bundled resource dir found but invalid: " +
336 *bundledDir);
337 }
338
339 // 3. Fallback: query clang++ -print-resource-dir
340 // This method requires a compatible clang++ to be installed on the system
341 constexpr int requiredMajor = LLVM_VERSION_MAJOR;
342
343 auto tryProgram =
344 [&](llvm::StringRef programPath) -> std::optional<std::string> {
345 auto major = getClangProgramMajorVersion(programPath);
346 if (!major || *major != requiredMajor) {
347 emitStructuredMessage(
348 "warning",
349 "[clang] Skipping clang binary (major mismatch): " +
350 programPath.str() + " (found " +
351 (major ? std::to_string(*major) : std::string("unknown")) +
352 ", need " + std::to_string(requiredMajor) + ")");
353 return std::nullopt;
354 }
355 return getClangResourceDirFromProgram(programPath);
356 };
357
358 const std::vector<std::string> candidateNames = {
359 "clang++-" + std::to_string(requiredMajor),
360 "clang-" + std::to_string(requiredMajor) + "++",
361 "clang++",
362 };
363
364 for (const std::string &name : candidateNames) {
365 auto programOrErr = llvm::sys::findProgramByName(name);
366 if (!programOrErr) {
367 continue;
368 }
369 if (auto dir = tryProgram(*programOrErr)) {
370 emitStructuredMessage("info",
371 "[clang] Resource dir (via clang++): " + *dir);
372 return *dir;
373 }
374 }
375
376 emitStructuredMessage(
377 "error", "[clang] Failed to locate clang resource dir. Checked:\n"
378 " 1. Bundled: ../lib/clang/" +
379 std::to_string(requiredMajor) +
380 "/ (not found or invalid)\n"
381 " 2. System clang++ -print-resource-dir (not found or "
382 "version mismatch)");
383 return "";
384}
385
386std::vector<std::string>
387buildToolchainAdjustedCommandLine(const std::vector<std::string> &commandLine,
388 const std::string &clangResourceDir,
389 std::string &diagnostic) {
390 diagnostic.clear();
391 std::vector<std::string> adjusted = stripResourceDirArgs(commandLine);
392
393#ifdef __APPLE__
394 bool hasSysroot = false;
395 bool useMacOSSDK = true;
396 for (std::size_t i = 0; i < adjusted.size(); ++i) {
397 const std::string &arg = adjusted[i];
398 const llvm::StringRef argRef(arg);
399
400 hasSysroot |= argRef.starts_with("-isysroot") || arg == "--sysroot" ||
401 argRef.starts_with("--sysroot=");
402
403 llvm::StringRef target;
404 if ((arg == "-target" || arg == "--target") && i + 1 < adjusted.size()) {
405 target = adjusted[i + 1];
406 } else if (argRef.starts_with("--target=")) {
407 target = argRef.drop_front(llvm::StringRef("--target=").size());
408 } else if (argRef.starts_with("-target=")) {
409 target = argRef.drop_front(llvm::StringRef("-target=").size());
410 }
411
412 if (!target.empty()) {
413 const std::string lowerTarget = target.lower();
414 const llvm::StringRef lowerTargetRef(lowerTarget);
415 useMacOSSDK = lowerTargetRef.contains("-apple-macos") ||
416 lowerTargetRef.contains("-apple-darwin");
417 }
418 }
419
420 if (!hasSysroot && useMacOSSDK) {
421 auto xcrunOrErr = llvm::sys::findProgramByName("xcrun");
422 const std::string xcrun = xcrunOrErr ? *xcrunOrErr : "/usr/bin/xcrun";
423 if (!llvm::sys::fs::exists(xcrun)) {
424 diagnostic = "unable to find xcrun to discover the active macOS SDK";
425 } else {
426 std::array<llvm::StringRef, 4> args = {"xcrun", "--sdk", "macosx",
427 "--show-sdk-path"};
428 auto output = runProgramCaptureStdout(xcrun, args);
429 if (!output) {
430 diagnostic = "xcrun failed while discovering the active macOS SDK";
431 } else {
432 std::string sdkPath =
433 llvm::StringRef(*output).split('\n').first.trim().str();
434 if (sdkPath.empty() || !llvm::sys::fs::is_directory(sdkPath)) {
435 diagnostic = "xcrun returned an invalid macOS SDK path: " + sdkPath;
436 } else {
437 adjusted.push_back("-isysroot");
438 adjusted.push_back(sdkPath);
439 }
440 }
441 }
442 }
443#endif
444
445 if (!clangResourceDir.empty()) {
446 adjusted.push_back("-resource-dir");
447 adjusted.push_back(clangResourceDir);
448 }
449
450 return adjusted;
451}
452
453std::unique_ptr<clang::ASTUnit>
454createAstFromCDB(const std::string &compilationDatabase,
455 const std::string &sourcePath, std::string &errorMessage,
456 const DiagnosticCallback &diagnosticCallback,
457 const std::string &clangResourceDirOverride) {
458 // Load compilation database
459 std::string loadError;
460 std::unique_ptr<ct::CompilationDatabase> compdb =
461 ct::JSONCompilationDatabase::loadFromFile(
462 compilationDatabase, loadError,
463 clang::tooling::JSONCommandLineSyntax::AutoDetect);
464
465 if (!compdb) {
466 errorMessage = "Failed to load compilation database: " + loadError;
467 return nullptr;
468 }
469
470 // Let Clang tooling recover driver path/mode information before expanding
471 // response files. The driver simulation then owns implicit include discovery.
472 compdb = ct::inferToolLocation(std::move(compdb));
473 compdb = ct::inferTargetAndDriverMode(std::move(compdb));
474
475 // Expand response file
476 compdb = ct::expandResponseFiles(std::move(compdb),
477 llvm::vfs::getRealFileSystem());
478
479 // Get compile commands for the source file
480 std::vector<clang::tooling::CompileCommand> commands =
481 compdb->getCompileCommands(sourcePath);
482
483 if (commands.empty()) {
484 errorMessage = "No compile command found for source file: " + sourcePath;
485 return nullptr;
486 }
487
488 // Get the resource dir
489 std::string resourceDir = getClangResourceDir(clangResourceDirOverride);
490 if (resourceDir.empty()) {
491 errorMessage = "Get clang resource dir failed";
492 return nullptr;
493 }
494
495 // Use the first command
496 const clang::tooling::CompileCommand &cmd = commands[0];
497
498 // Setup VFS with working directory from compilation database
499 // This is critical for C++20 modules: -fmodule-file=name=path uses paths
500 // relative to the working directory, which must match the compilation
501 // database's "directory" field
502 llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> vfs =
503 llvm::vfs::getRealFileSystem();
504 if (std::error_code ec = vfs->setCurrentWorkingDirectory(cmd.Directory)) {
505 errorMessage = "Failed to set working directory to '" + cmd.Directory +
506 "': " + ec.message();
507 return nullptr;
508 }
509
510 // Setup diagnostics engine to capture compilation errors
511#if LLVM_VERSION_MAJOR >= 21
512 auto diagOpts = std::make_shared<clang::DiagnosticOptions>();
513 clang::IntrusiveRefCntPtr<clang::DiagnosticsEngine> diags(
514 new clang::DiagnosticsEngine(
515 clang::IntrusiveRefCntPtr<clang::DiagnosticIDs>(
516 new clang::DiagnosticIDs()),
517 *diagOpts, createDiagnosticConsumer(diagnosticCallback)));
518#else
519 clang::IntrusiveRefCntPtr<clang::DiagnosticOptions> diagOpts(
520 new clang::DiagnosticOptions());
521 clang::IntrusiveRefCntPtr<clang::DiagnosticsEngine> diags(
522 new clang::DiagnosticsEngine(
523 clang::IntrusiveRefCntPtr<clang::DiagnosticIDs>(
524 new clang::DiagnosticIDs()),
525 diagOpts, createDiagnosticConsumer(diagnosticCallback)));
526#endif
527
528 std::string toolchainDiagnostic;
529 std::vector<std::string> adjustedCommandLine =
530 buildToolchainAdjustedCommandLine(cmd.CommandLine, resourceDir,
531 toolchainDiagnostic);
532 if (!toolchainDiagnostic.empty()) {
533 emitStructuredMessage("warning", "[toolchain] " + toolchainDiagnostic);
534 }
535
536 // Prepare command line arguments for Clang
537 std::vector<const char *> args;
538 for (const auto &arg : adjustedCommandLine) {
539 args.push_back(arg.c_str());
540 }
541 if (std::find(adjustedCommandLine.begin(), adjustedCommandLine.end(),
542 "-fparse-all-comments") == adjustedCommandLine.end()) {
543 args.push_back("-fparse-all-comments");
544 }
545
546 // Log the fully expanded command for debugging (includes response expansion).
547 std::string expandedCommand = "[make-ast] Expanded command:";
548 for (const auto *arg : args) {
549 expandedCommand += " ";
550 expandedCommand += arg;
551 }
552 emitStructuredMessage("debug", expandedCommand);
553
554 // Create PCH container operations
555 auto pchOps = std::make_shared<clang::PCHContainerOperations>();
556
557 // Build AST from command line arguments
558 // Pass VFS with working directory set to ensure module file paths resolve
559 // correctly
560#if LLVM_VERSION_MAJOR >= 22
561 std::unique_ptr<clang::ASTUnit> astUnit = clang::CreateASTUnitFromCommandLine(
562 args.data(), args.data() + args.size(), pchOps, diagOpts, diags,
563 resourceDir,
564 /*StorePreamblesInMemory=*/false,
565 /*PreambleStoragePath=*/llvm::StringRef(),
566 /*OnlyLocalDecls=*/false, clang::CaptureDiagsKind::None,
567 /*RemappedFiles=*/{},
568 /*RemappedFilesKeepOriginalName=*/true,
569 /*PrecompilePreambleAfterNParses=*/0, clang::TU_Complete,
570 /*CacheCodeCompletionResults=*/false,
571 /*IncludeBriefCommentsInCodeCompletion=*/false,
572 /*AllowPCHWithCompilerErrors=*/false,
573 clang::SkipFunctionBodiesScope::None,
574 /*SingleFileParse=*/false,
575 /*UserFilesAreVolatile=*/false,
576 /*ForSerialization=*/true, // Important: we may call Save()
577 /*RetainExcludedConditionalBlocks=*/false,
578 /*ModuleFormat=*/std::nullopt,
579 /*ErrAST=*/nullptr,
580 /*VFS=*/vfs);
581#elif LLVM_VERSION_MAJOR >= 21
582 // LLVM 21: ASTUnit::LoadFromCommandLine with diagOpts as param 4
583 std::unique_ptr<clang::ASTUnit> astUnit = clang::ASTUnit::LoadFromCommandLine(
584 args.data(), args.data() + args.size(), pchOps, diagOpts, diags,
585 resourceDir,
586 /*StorePreamblesInMemory=*/false,
587 /*PreambleStoragePath=*/llvm::StringRef(),
588 /*OnlyLocalDecls=*/false, clang::CaptureDiagsKind::None,
589 /*RemappedFiles=*/{},
590 /*RemappedFilesKeepOriginalName=*/true,
591 /*PrecompilePreambleAfterNParses=*/0, clang::TU_Complete,
592 /*CacheCodeCompletionResults=*/false,
593 /*IncludeBriefCommentsInCodeCompletion=*/false,
594 /*AllowPCHWithCompilerErrors=*/false,
595 clang::SkipFunctionBodiesScope::None,
596 /*SingleFileParse=*/false,
597 /*UserFilesAreVolatile=*/false,
598 /*ForSerialization=*/true, // Important: we may call Save()
599 /*RetainExcludedConditionalBlocks=*/false,
600 /*ModuleFormat=*/std::nullopt,
601 /*ErrAST=*/nullptr,
602 /*VFS=*/vfs);
603#else
604 // LLVM 20: ASTUnit::LoadFromCommandLine without separate diagOpts
605 std::unique_ptr<clang::ASTUnit> astUnit = clang::ASTUnit::LoadFromCommandLine(
606 args.data(), args.data() + args.size(), pchOps, diags, resourceDir,
607 /*StorePreamblesInMemory=*/false,
608 /*PreambleStoragePath=*/llvm::StringRef(),
609 /*OnlyLocalDecls=*/false, clang::CaptureDiagsKind::None,
610 /*RemappedFiles=*/{},
611 /*RemappedFilesKeepOriginalName=*/true,
612 /*PrecompilePreambleAfterNParses=*/0, clang::TU_Complete,
613 /*CacheCodeCompletionResults=*/false,
614 /*IncludeBriefCommentsInCodeCompletion=*/false,
615 /*AllowPCHWithCompilerErrors=*/false,
616 clang::SkipFunctionBodiesScope::None,
617 /*SingleFileParse=*/false,
618 /*UserFilesAreVolatile=*/false,
619 /*ForSerialization=*/true, // Important: we may call Save()
620 /*RetainExcludedConditionalBlocks=*/false,
621 /*ModuleFormat=*/std::nullopt,
622 /*ErrAST=*/nullptr,
623 /*VFS=*/vfs);
624#endif
625
626 if (!astUnit) {
627 errorMessage = "Failed to create AST for source file: " + sourcePath;
628 return nullptr;
629 }
630
631 return std::move(astUnit);
632}
633
634bool saveAst(clang::ASTUnit &astUnit, const std::string &outputPath,
635 std::string &errorMessage) {
636 // Note: Save() returns true on error, false on success
637 if (astUnit.Save(outputPath)) {
638 errorMessage = "Failed to save AST to: " + outputPath;
639 return false;
640 }
641 return true;
642}
643
644std::unique_ptr<clang::ASTUnit>
645loadAstFromFile(const std::string &astFilePath, std::string &errorMessage,
646 const std::string &compilationDbPath,
647 const std::string &sourcePath,
648 const DiagnosticCallback &diagnosticCallback) {
649 // Debug output
650 emitStructuredMessage("debug", "[loadAstFromFile] AST file: " + astFilePath);
651 emitStructuredMessage(
652 "debug", "[loadAstFromFile] Compilation DB: " +
653 (compilationDbPath.empty() ? "(none)" : compilationDbPath));
654
655 // Extract module file mappings from compilation database (for C++20 modules)
656 std::map<std::string, std::string> moduleFileMappings;
657 std::string workingDir;
658
659 if (!compilationDbPath.empty() && !sourcePath.empty()) {
660 moduleFileMappings =
661 extractModuleFileMappings(compilationDbPath, sourcePath);
662
663 // Get working directory from compilation database path
664 llvm::SmallString<256> compDbDir(compilationDbPath);
665 llvm::sys::path::remove_filename(compDbDir);
666 workingDir = std::string(compDbDir);
667 }
668
669 emitStructuredMessage("debug",
670 "[loadAstFromFile] Working dir: " +
671 (workingDir.empty() ? "(empty)" : workingDir));
672 emitStructuredMessage("debug", "[loadAstFromFile] Module mappings: " +
673 std::to_string(moduleFileMappings.size()) +
674 " entries");
675
676 // Setup diagnostics - these don't need to match Save() configuration
677#if LLVM_VERSION_MAJOR >= 21
678 auto diagOpts = std::make_shared<clang::DiagnosticOptions>();
679 clang::IntrusiveRefCntPtr<clang::DiagnosticsEngine> diags(
680 new clang::DiagnosticsEngine(
681 clang::IntrusiveRefCntPtr<clang::DiagnosticIDs>(
682 new clang::DiagnosticIDs()),
683 *diagOpts, createDiagnosticConsumer(diagnosticCallback)));
684#else
685 clang::IntrusiveRefCntPtr<clang::DiagnosticOptions> diagOpts(
686 new clang::DiagnosticOptions());
687 clang::IntrusiveRefCntPtr<clang::DiagnosticsEngine> diags(
688 new clang::DiagnosticsEngine(
689 clang::IntrusiveRefCntPtr<clang::DiagnosticIDs>(
690 new clang::DiagnosticIDs()),
691 diagOpts, createDiagnosticConsumer(diagnosticCallback)));
692#endif
693
694 // Create PCH container reader
695 auto pchContainerOps = std::make_shared<clang::PCHContainerOperations>();
696
697 // Setup VFS with working directory if provided
698 // This is needed for C++20 modules where the AST may reference .pcm files
699 // with relative paths
700 llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> vfs =
701 llvm::vfs::getRealFileSystem();
702 if (!workingDir.empty()) {
703 if (std::error_code ec = vfs->setCurrentWorkingDirectory(workingDir)) {
704 // Log but don't fail - module mappings are already absolute
705 emitStructuredMessage("warning", "[loadAstFromFile] Warning: Failed to "
706 "set VFS working directory to '" +
707 workingDir + "': " + ec.message());
708 }
709 }
710
711 // Setup FileSystemOptions with working directory
712 clang::FileSystemOptions fsOpts;
713 if (!workingDir.empty()) {
714 fsOpts.WorkingDir = workingDir;
715 }
716
717 // Setup HeaderSearchOptions for C++20 module resolution
718 // Use explicit module file mappings extracted from compilation database
719 // Note: extractModuleFileMappings already resolves relative paths to absolute
720 clang::HeaderSearchOptions hsOpts;
721 for (const auto &[moduleName, pcmPath] : moduleFileMappings) {
722 hsOpts.PrebuiltModuleFiles[moduleName] = pcmPath;
723 emitStructuredMessage("debug", "[loadAstFromFile] Added module mapping: " +
724 moduleName + " -> " + pcmPath);
725 }
726
727 // Also add working directory as a search path for backward compatibility
728 if (!workingDir.empty()) {
729 hsOpts.PrebuiltModulePaths.push_back(workingDir);
730 }
731
732 // Load AST from file
733 // Note: Most configuration is stored in the AST file itself
734 // AllowASTWithCompilerErrors=true to handle cases where module files
735 // can't be found (the AST was already successfully built, we just need
736 // to load the serialized data)
737#if LLVM_VERSION_MAJOR >= 22
738 // LLVM 22: VFS is 4th param (before DiagOpts)
739 std::unique_ptr<clang::ASTUnit> astUnit = clang::ASTUnit::LoadFromASTFile(
740 astFilePath, pchContainerOps->getRawReader(),
741 clang::ASTUnit::LoadEverything, /*VFS=*/vfs, diagOpts, diags, fsOpts,
742 hsOpts,
743 /*LangOpts=*/nullptr,
744 /*OnlyLocalDecls=*/false, clang::CaptureDiagsKind::None,
745 /*AllowASTWithCompilerErrors=*/true,
746 /*UserFilesAreVolatile=*/false);
747#elif LLVM_VERSION_MAJOR >= 21
748 // LLVM 21: VFS is last param, diagOpts passed separately
749 std::unique_ptr<clang::ASTUnit> astUnit = clang::ASTUnit::LoadFromASTFile(
750 astFilePath, pchContainerOps->getRawReader(),
751 clang::ASTUnit::LoadEverything, diagOpts, diags, fsOpts, hsOpts,
752 /*LangOpts=*/nullptr,
753 /*OnlyLocalDecls=*/false, clang::CaptureDiagsKind::None,
754 /*AllowASTWithCompilerErrors=*/true,
755 /*UserFilesAreVolatile=*/false,
756 /*VFS=*/vfs);
757#else
758 // LLVM 20: No separate diagOpts, shared_ptr<HeaderSearchOptions>
759 auto hsOptsPtr = std::make_shared<clang::HeaderSearchOptions>(hsOpts);
760 std::unique_ptr<clang::ASTUnit> astUnit = clang::ASTUnit::LoadFromASTFile(
761 astFilePath, pchContainerOps->getRawReader(),
762 clang::ASTUnit::LoadEverything, diags, fsOpts, hsOptsPtr,
763 /*LangOpts=*/nullptr,
764 /*OnlyLocalDecls=*/false, clang::CaptureDiagsKind::None,
765 /*AllowASTWithCompilerErrors=*/true,
766 /*UserFilesAreVolatile=*/false,
767 /*VFS=*/vfs);
768#endif
769
770 if (!astUnit) {
771 errorMessage = "Failed to load AST from file: " + astFilePath;
772 return nullptr;
773 }
774
775 return std::move(astUnit);
776}
777
778std::vector<std::string>
779getSourceFilesFromCompilationDatabase(const std::string &compDbPath,
780 std::string &errorMessage) {
781
782 // Load compilation database
783 std::string loadError;
784 std::unique_ptr<ct::CompilationDatabase> compilationDatabase =
785 ct::JSONCompilationDatabase::loadFromFile(
786 compDbPath, loadError, ct::JSONCommandLineSyntax::AutoDetect);
787
788 if (!compilationDatabase) {
789 errorMessage = "Failed to load compilation database: " + loadError;
790 return {};
791 }
792
793 // Extract all source files
794 std::vector<std::string> sourceFiles = compilationDatabase->getAllFiles();
795
796 if (sourceFiles.empty()) {
797 errorMessage = "No source files found in compilation database";
798 return {};
799 }
800
801 return sourceFiles;
802}
803
804// Internal helper: Extract -fmodule-file=name=path mappings from compile
805// command
806static std::map<std::string, std::string>
807extractModuleFileMappings(const std::string &compilationDb,
808 const std::string &sourcePath) {
809 std::map<std::string, std::string> mappings;
810
811 // Load compilation database
812 std::string loadError;
813 std::unique_ptr<ct::CompilationDatabase> compdb =
814 ct::JSONCompilationDatabase::loadFromFile(
815 compilationDb, loadError, ct::JSONCommandLineSyntax::AutoDetect);
816
817 if (!compdb) {
818 emitStructuredMessage(
819 "error",
820 "[extractModuleFileMappings] Failed to load compilation database: " +
821 loadError);
822 return mappings;
823 }
824
825 // Expand response files (this expands @modmap files)
826 compdb = ct::expandResponseFiles(std::move(compdb),
827 llvm::vfs::getRealFileSystem());
828
829 // Get compile commands for the source file
830 std::vector<ct::CompileCommand> commands =
831 compdb->getCompileCommands(sourcePath);
832
833 if (commands.empty()) {
834 emitStructuredMessage(
835 "warning",
836 "[extractModuleFileMappings] No compile command found for: " +
837 sourcePath);
838 return mappings;
839 }
840
841 const ct::CompileCommand &cmd = commands.front();
842 std::string workingDir = cmd.Directory;
843
844 // Parse command line for -fmodule-file=name=path flags
845 for (const std::string &arg : cmd.CommandLine) {
846 // Check for -fmodule-file=name=path format
847 if (arg.rfind("-fmodule-file=", 0) == 0) {
848 // Extract the name=path part
849 std::string nameAndPath = arg.substr(14); // Skip "-fmodule-file="
850
851 // Find the first '=' which separates module name from path
852 size_t eqPos = nameAndPath.find('=');
853 if (eqPos != std::string::npos) {
854 std::string moduleName = nameAndPath.substr(0, eqPos);
855 std::string pcmPath = nameAndPath.substr(eqPos + 1);
856
857 // Resolve relative paths against working directory
858 if (!pcmPath.empty() && pcmPath[0] != '/') {
859 pcmPath = workingDir + "/" + pcmPath;
860 }
861
862 mappings[moduleName] = pcmPath;
863 emitStructuredMessage("debug",
864 "[extractModuleFileMappings] Found mapping: " +
865 moduleName + " -> " + pcmPath);
866 }
867 }
868 }
869
870 return mappings;
871}
872
873} // namespace acav
Utilities for interacting with Clang at runtime This includes runtime detection of Clang paths and AS...
std::vector< std::string > getSourceFilesFromCompilationDatabase(const std::string &compDbPath, std::string &errorMessage)
Extract source file paths from a compilation database.
std::unique_ptr< clang::ASTUnit > createAstFromCDB(const std::string &compilationDatabase, const std::string &sourcePath, std::string &errorMessage, const DiagnosticCallback &diagnosticCallback=nullptr, const std::string &clangResourceDirOverride="")
Create AST from a given compilation database This function provides a easy way to generate clang AST ...
bool saveAst(clang::ASTUnit &astUnit, const std::string &outputPath, std::string &errorMessage)
Save ast to local file.
std::string getClangResourceDir(const std::string &overrideResourceDir="")
Get clang resource directory.
std::unique_ptr< clang::ASTUnit > loadAstFromFile(const std::string &astFilePath, std::string &errorMessage, const std::string &compilationDbPath="", const std::string &sourcePath="", const DiagnosticCallback &diagnosticCallback=nullptr)
Load AST from local file.
std::vector< std::string > buildToolchainAdjustedCommandLine(const std::vector< std::string > &commandLine, const std::string &clangResourceDir, std::string &diagnostic)
Normalize a Clang command line for ACAV's embedded Clang.
Helpers to format diagnostics for log ingestion.