ACAV f0ba4b7c9529
Abstract Syntax Tree (AST) visualization tool for C, C++, and Objective-C
Loading...
Searching...
No Matches
SourceLocation.cpp
1/*$!{
2* Aurora Clang AST Viewer (ACAV)
3*
4* Copyright (c) 2026 Min Liu
5* Copyright (c) 2026 Michael David Adams
6*
7* SPDX-License-Identifier: GPL-2.0-or-later
8*
9* This program is free software; you can redistribute it and/or modify
10* it under the terms of the GNU General Public License as published by
11* the Free Software Foundation; either version 2 of the License, or
12* (at your option) any later version.
13*
14* This program is distributed in the hope that it will be useful,
15* but WITHOUT ANY WARRANTY; without even the implied warranty of
16* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17* GNU General Public License for more details.
18*
19* You should have received a copy of the GNU General Public License along
20* with this program; if not, see <https://www.gnu.org/licenses/>.
21}$!*/
22
23#include "core/SourceLocation.h"
24#include <clang/Basic/SourceLocation.h>
25#include <clang/Basic/SourceManager.h>
26#include <clang/Lex/Lexer.h>
27#include <llvm/ADT/DenseMap.h>
28#include <optional>
29#include <utility>
30
31namespace acav {
32
33namespace {
34// Cache for spelling locations to SourceLocation components.
35struct CachedLoc {
36 FileID fileId = 0;
37 unsigned offset = 0; // offset from start of file (spelling)
38 unsigned line = 0;
39 unsigned column = 0;
40};
41
42using LocCache = llvm::DenseMap<uintptr_t, CachedLoc>;
43using FileEntryCache = llvm::DenseMap<const clang::FileEntry *, FileID>;
44using EndTokenCache = llvm::DenseMap<uintptr_t, clang::SourceLocation>;
45
46// Thread-local caches avoid locking overhead during single-threaded AST walks.
47thread_local LocCache locCache;
48thread_local FileEntryCache fileEntryCache;
49thread_local EndTokenCache endTokenCache;
50
51uintptr_t toKey(const clang::SourceLocation &loc) {
52 return static_cast<uintptr_t>(loc.getRawEncoding());
53}
54
55clang::SourceLocation resolveFileLoc(const clang::SourceLocation &loc,
56 const clang::SourceManager &sm) {
57 return loc.isInvalid() ? loc : sm.getFileLoc(loc);
58}
59} // namespace
60
61SourceLocation::SourceLocation(FileID fileId, unsigned line, unsigned column)
62 : fileId_(fileId), line_(line), column_(column) {}
63
64SourceLocation SourceLocation::fromClang(const clang::SourceLocation &loc,
65 const clang::SourceManager &sm,
66 FileManager &fileMgr) {
67 if (loc.isInvalid()) {
68 return SourceLocation(FileManager::InvalidFileID, 0, 0);
69 }
70
71 // Resolve macro locations to the file location of the expansion.
72 clang::SourceLocation fileLoc = resolveFileLoc(loc, sm);
73 if (fileLoc.isInvalid()) {
74 return SourceLocation(FileManager::InvalidFileID, 0, 0);
75 }
76
77 const uintptr_t key = toKey(fileLoc);
78 if (auto it = locCache.find(key); it != locCache.end()) {
79 return SourceLocation(it->second.fileId, it->second.line, it->second.column);
80 }
81
82 // Decompose spelling location once to avoid redundant SourceManager queries.
83 auto decomposed = sm.getDecomposedSpellingLoc(fileLoc);
84 clang::FileID fid = decomposed.first;
85 unsigned offset = decomposed.second;
86 if (!fid.isValid()) {
87 return SourceLocation(FileManager::InvalidFileID, 0, 0);
88 }
89
90 // Get the FileEntry for the spelling location
91 auto fileEntry = sm.getFileEntryRefForID(fid);
92 if (!fileEntry) {
93 return SourceLocation(FileManager::InvalidFileID, 0, 0);
94 }
95
96 // FileEntry to FileID cache
97 const clang::FileEntry *fe = &fileEntry->getFileEntry();
98 auto feIt = fileEntryCache.find(fe);
99 FileID fileId;
100 if (feIt != fileEntryCache.end()) {
101 fileId = feIt->second;
102 } else {
103 // Register file and get FileID
104 const char *filename = fileEntry->getName().data();
105 fileId =
106 fileMgr.tryGetFileId(filename).value_or(FileManager::InvalidFileID);
107 if (fileId == FileManager::InvalidFileID) {
108 fileId = fileMgr.registerFile(filename);
109 }
110 fileEntryCache.try_emplace(fe, fileId);
111 }
112
113 // Get line and column (1-based) using decomposed offset to avoid extra work.
114 unsigned line = sm.getLineNumber(fid, offset);
115 unsigned column = sm.getColumnNumber(fid, offset);
116
117 locCache.try_emplace(key, CachedLoc{fileId, offset, line, column});
118
119 return SourceLocation(fileId, line, column);
120}
121
122SourceRange::SourceRange(SourceLocation begin, SourceLocation end)
123 : begin_(begin), end_(end) {}
124
125SourceRange SourceRange::fromClang(const clang::SourceRange &range,
126 const clang::SourceManager &sm,
127 FileManager &fileMgr) {
128 if (range.isInvalid()) {
130 return SourceRange(invalid, invalid);
131 }
132
133 // Resolve macro locations to file locations at expansion sites.
134 clang::SourceLocation startLoc = resolveFileLoc(range.getBegin(), sm);
135 clang::SourceLocation lastTokenLoc = resolveFileLoc(range.getEnd(), sm);
136
137 // Get the actual end location (after the last token)
138 clang::SourceLocation endLoc;
139
140 // Validate that lastTokenLoc is safe to use with getLocForEndOfToken
141 // The lexer can crash if the location is at or past the end of a file buffer
142 bool canUseGetLocForEndOfToken = false;
143 if (lastTokenLoc.isValid() && lastTokenLoc.isFileID()) {
144 clang::FileID fid = sm.getFileID(lastTokenLoc);
145 if (fid.isValid()) {
146 auto buffer = sm.getBufferOrNone(fid);
147 if (buffer) {
148 unsigned offset = sm.getFileOffset(lastTokenLoc);
149 // Ensure we're not at or past the end of the buffer
150 // Leave some margin for the lexer to safely read ahead
151 if (offset < buffer->getBufferSize()) {
152 canUseGetLocForEndOfToken = true;
153 }
154 }
155 }
156 }
157
158 if (canUseGetLocForEndOfToken) {
159 if (auto it = endTokenCache.find(toKey(lastTokenLoc));
160 it != endTokenCache.end()) {
161 endLoc = it->second;
162 } else {
163 clang::LangOptions langOpts;
164 endLoc = clang::Lexer::getLocForEndOfToken(lastTokenLoc, 0, sm, langOpts);
165 endTokenCache.try_emplace(toKey(lastTokenLoc), endLoc);
166 }
167 }
168
169 // If getLocForEndOfToken fails or was skipped, fall back to lastTokenLoc
170 if (endLoc.isInvalid()) {
171 endLoc = lastTokenLoc;
172 }
173
174 SourceLocation begin = SourceLocation::fromClang(startLoc, sm, fileMgr);
175 SourceLocation end = SourceLocation::fromClang(endLoc, sm, fileMgr);
176
177 return SourceRange(begin, end);
178}
179
180static void resetAllCaches() {
181 locCache.clear();
182 fileEntryCache.clear();
183 endTokenCache.clear();
184}
185
186void SourceLocation::resetCache() { resetAllCaches(); }
187
188} // namespace acav
std::size_t FileID
Type-safe identifier for registered files. 0 is reserved for invalid.
Definition FileManager.h:38
Source code location representation.
Centralized file registry providing path-to-FileID mapping.
Definition FileManager.h:45
static constexpr FileID InvalidFileID
Reserved invalid FileID.
Definition FileManager.h:47
FileID registerFile(std::string_view filePath)
Register a file and return its FileID.
std::optional< FileID > tryGetFileId(std::string_view filePath) const
Look up FileID for a path without registering.
Represents a specific position in source code.
static void resetCache()
Reset internal caches (per extraction run).
static SourceLocation fromClang(const clang::SourceLocation &loc, const clang::SourceManager &sm, FileManager &fileMgr)
Create SourceLocation from Clang's SourceLocation.
static SourceRange fromClang(const clang::SourceRange &range, const clang::SourceManager &sm, FileManager &fileMgr)
Create SourceRange from Clang's SourceRange.