ACAV f0ba4b7c9529
Abstract Syntax Tree (AST) visualization tool for C, C++, and Objective-C
Loading...
Searching...
No Matches
InternedString.cpp
1/*$!{
2* Aurora Clang AST Viewer (ACAV)
3*
4* Copyright (c) 2026 Min Liu
5* Copyright (c) 2026 Michael David Adams
6*
7* SPDX-License-Identifier: GPL-2.0-or-later
8*
9* This program is free software; you can redistribute it and/or modify
10* it under the terms of the GNU General Public License as published by
11* the Free Software Foundation; either version 2 of the License, or
12* (at your option) any later version.
13*
14* This program is distributed in the hope that it will be useful,
15* but WITHOUT ANY WARRANTY; without even the implied warranty of
16* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17* GNU General Public License for more details.
18*
19* You should have received a copy of the GNU General Public License along
20* with this program; if not, see <https://www.gnu.org/licenses/>.
21}$!*/
22
24
25#include <iomanip>
26#include <iostream>
27
28namespace acav {
29
30// Initialize static members
31std::unordered_set<InternedString::StringData *, InternedString::Hash,
32 InternedString::Equal>
33 InternedString::pool_;
34std::mutex InternedString::poolMutex_;
35
36// StringData constructors
37InternedString::StringData::StringData(const std::string &v) : value_(v) {}
38
39InternedString::StringData::StringData(std::string &&v)
40 : value_(std::move(v)) {}
41
42// Hash functors
43std::size_t
44InternedString::Hash::operator()(const StringData *s) const noexcept {
45 return std::hash<std::string>()(s->value_);
46}
47
48std::size_t
49InternedString::Hash::operator()(const std::string &s) const noexcept {
50 return std::hash<std::string>()(s);
51}
52
53std::size_t
54InternedString::Hash::operator()(std::string_view s) const noexcept {
55 return std::hash<std::string_view>()(s);
56}
57
58// Equal functors
59bool InternedString::Equal::operator()(const StringData *a,
60 const StringData *b) const noexcept {
61 return a->value_ == b->value_;
62}
63
64bool InternedString::Equal::operator()(const StringData *a,
65 const std::string &b) const noexcept {
66 return a->value_ == b;
67}
68
69bool InternedString::Equal::operator()(const std::string &a,
70 const StringData *b) const noexcept {
71 return a == b->value_;
72}
73
74bool InternedString::Equal::operator()(const StringData *a,
75 std::string_view b) const noexcept {
76 return a->value_ == b;
77}
78
79bool InternedString::Equal::operator()(std::string_view a,
80 const StringData *b) const noexcept {
81 return a == b->value_;
82}
83
84// Constructors
85InternedString::InternedString(const std::string &v) {
86 data_ = internString(v);
87}
88
89InternedString::InternedString(std::string &&v) {
90 data_ = internString(std::move(v));
91}
92
93InternedString::InternedString(const char *v)
94 : InternedString(std::string_view(v ? v : "")) {}
95
96InternedString::InternedString(std::string_view v) { data_ = internString(v); }
97
98InternedString::InternedString(std::size_t n, char c)
99 : InternedString(std::string(n, c)) {}
100
101InternedString::InternedString(const InternedString &other)
102 : data_(other.data_) {
103 if (data_) {
104 std::lock_guard<std::mutex> lock(poolMutex_);
105 ++data_->refCount_;
106 }
107}
108
109InternedString &InternedString::operator=(const InternedString &other) {
110 if (this != &other) {
111 release();
112 data_ = other.data_;
113 if (data_) {
114 std::lock_guard<std::mutex> lock(poolMutex_);
115 ++data_->refCount_;
116 }
117 }
118 return *this;
119}
120
121InternedString::InternedString(InternedString &&other) noexcept
122 : data_(other.data_) {
123 other.data_ = nullptr;
124}
125
126InternedString &InternedString::operator=(InternedString &&other) noexcept {
127 if (this != &other) {
128 release();
129 data_ = other.data_;
130 other.data_ = nullptr;
131 }
132 return *this;
133}
134
135InternedString::~InternedString() { release(); }
136
137// Accessors
138const std::string &InternedString::str() const {
139 static const std::string empty;
140 return data_ ? data_->value_ : empty;
141}
142
143bool InternedString::empty() const noexcept { return data_ == nullptr; }
144
145std::size_t InternedString::size() const noexcept {
146 return data_ ? data_->value_.size() : 0;
147}
148
149InternedString::const_iterator InternedString::begin() const noexcept {
150 return str().begin();
151}
152
153InternedString::const_iterator InternedString::end() const noexcept {
154 return str().end();
155}
156
157const char &InternedString::operator[](std::size_t pos) const {
158 return str()[pos];
159}
160
161char &InternedString::operator[](std::size_t pos) {
162 // Non-const access modifies the underlying string directly
163 // This is required for JSON library compatibility
164 // The string will be re-interned when necessary
165 if (data_ && pos < data_->value_.size()) {
166 return data_->value_[pos];
167 }
168 // Return reference to static dummy for out-of-range access
169 static char dummy = '\0';
170 return dummy;
171}
172
173const char &InternedString::back() const { return str().back(); }
174
175const char *InternedString::c_str() const { return str().c_str(); }
176
177const char *InternedString::data() const { return c_str(); }
178
179void InternedString::clear() { release(); }
180
181// Mutating operations for JSON compatibility
182void InternedString::push_back(char c) {
183 std::string temp = str();
184 temp.push_back(c);
185 *this = InternedString(std::move(temp));
186}
187
188InternedString &InternedString::append(const char *s, std::size_t n) {
189 std::string temp = str();
190 temp.append(s, n);
191 *this = InternedString(std::move(temp));
192 return *this;
193}
194
195InternedString &InternedString::operator+=(const InternedString &other) {
196 if (other.empty()) {
197 return *this;
198 }
199 if (empty()) {
200 *this = other;
201 return *this;
202 }
203 std::string combined = str() + other.str();
204 *this = InternedString(std::move(combined));
205 return *this;
206}
207
208void InternedString::resize(std::size_t n, char c) {
209 std::string temp = str();
210 temp.resize(n, c);
211 *this = InternedString(std::move(temp));
212}
213
214// Comparison operators
215bool InternedString::operator==(const InternedString &other) const noexcept {
216 // Fast pointer comparison (strings are interned)
217 return data_ == other.data_;
218}
219
220bool InternedString::operator!=(const InternedString &other) const noexcept {
221 return !(*this == other);
222}
223
224bool operator<(const InternedString &lhs, const InternedString &rhs) noexcept {
225 if (lhs.data_ == rhs.data_)
226 return false;
227 if (!lhs.data_)
228 return rhs.data_ != nullptr;
229 if (!rhs.data_)
230 return false;
231 return lhs.data_->value_ < rhs.data_->value_;
232}
233
234// Static methods
236 std::lock_guard<std::mutex> lock(poolMutex_);
237 return pool_.size();
238}
239
240std::size_t InternedString::refCount() const noexcept {
241 return data_ ? data_->refCount_ : 0;
242}
243
245 std::lock_guard<std::mutex> lock(poolMutex_);
246 for (const auto &v : pool_) {
247 std::cout << v->value_ << " (refCount: " << v->refCount_ << ")\n";
248 }
249}
250
251// Private methods
252void InternedString::release() {
253 if (data_) {
254 std::lock_guard<std::mutex> lock(poolMutex_);
255 if (--data_->refCount_ == 0) {
256 pool_.erase(data_);
257 delete data_;
258 }
259 data_ = nullptr;
260 }
261}
262
263InternedString::StringData *InternedString::internString(const std::string &v) {
264 std::lock_guard<std::mutex> lock(poolMutex_);
265 auto it = pool_.find(v);
266 if (it != pool_.end()) {
267 ++(*it)->refCount_;
268 return *it;
269 }
270 // Not found, create new
271 auto *newData = new StringData(v);
272 newData->refCount_ = 1;
273 pool_.insert(newData);
274 return newData;
275}
276
277InternedString::StringData *InternedString::internString(std::string &&v) {
278 std::lock_guard<std::mutex> lock(poolMutex_);
279 auto it = pool_.find(v);
280 if (it != pool_.end()) {
281 ++(*it)->refCount_;
282 return *it;
283 }
284 // Not found, create new with move
285 auto *newData = new StringData(std::move(v));
286 newData->refCount_ = 1;
287 pool_.insert(newData);
288 return newData;
289}
290
291InternedString::StringData *InternedString::internString(std::string_view v) {
292 std::lock_guard<std::mutex> lock(poolMutex_);
293 auto it = pool_.find(v);
294 if (it != pool_.end()) {
295 ++(*it)->refCount_;
296 return *it;
297 }
298 auto *newData = new StringData(std::string(v));
299 newData->refCount_ = 1;
300 pool_.insert(newData);
301 return newData;
302}
303
304#ifdef ACAV_ENABLE_STRING_STATS
305
306StringInterningStats InternedString::getStats() {
307 std::lock_guard<std::mutex> lock(poolMutex_);
308
309 StringInterningStats stats{};
310 stats.uniqueStrings = pool_.size();
311
312 for (const auto *data : pool_) {
313 std::size_t strBytes = data->value_.size();
314 std::size_t strCapacity = data->value_.capacity();
315
316 // Memory used by the string data (actual allocation)
317 stats.poolMemoryBytes += strCapacity;
318
319 // Overhead per StringData: refCount_ + string object overhead
320 stats.poolOverheadBytes += sizeof(StringData);
321
322 // Total references
323 stats.totalReferences += data->refCount_;
324
325 // Without interning: each reference would have its own copy
326 stats.withoutInterningBytes += data->refCount_ * (strCapacity + sizeof(std::string));
327 }
328
329 // Add hash table overhead (approximate)
330 stats.poolOverheadBytes += pool_.bucket_count() * sizeof(void *);
331
332 // Calculate savings
333 std::size_t withInterning = stats.poolMemoryBytes + stats.poolOverheadBytes;
334 if (stats.withoutInterningBytes > withInterning) {
335 stats.savedBytes = stats.withoutInterningBytes - withInterning;
336 stats.savingsPercent =
337 100.0 * static_cast<double>(stats.savedBytes) /
338 static_cast<double>(stats.withoutInterningBytes);
339 } else {
340 stats.savedBytes = 0;
341 stats.savingsPercent = 0.0;
342 }
343
344 return stats;
345}
346
347void InternedString::printStats(const char *label) {
348 auto stats = getStats();
349
350 std::cerr << "\n";
351 std::cerr << "========== String Interning Statistics";
352 if (label) {
353 std::cerr << " [" << label << "]";
354 }
355 std::cerr << " ==========\n";
356 std::cerr << std::fixed << std::setprecision(2);
357 std::cerr << " Unique strings in pool: " << stats.uniqueStrings << "\n";
358 std::cerr << " Total string references: " << stats.totalReferences << "\n";
359 std::cerr << " Pool string data: " << stats.poolMemoryBytes / 1024.0 << " KB\n";
360 std::cerr << " Pool overhead: " << stats.poolOverheadBytes / 1024.0 << " KB\n";
361 std::cerr << " Total with interning: "
362 << (stats.poolMemoryBytes + stats.poolOverheadBytes) / 1024.0 << " KB\n";
363 std::cerr << " Without interning: " << stats.withoutInterningBytes / 1024.0 << " KB\n";
364 std::cerr << " Memory saved: " << stats.savedBytes / 1024.0 << " KB ("
365 << stats.savingsPercent << "%)\n";
366 std::cerr << " Deduplication ratio: ";
367 if (stats.uniqueStrings > 0) {
368 std::cerr << static_cast<double>(stats.totalReferences) / stats.uniqueStrings << "x\n";
369 } else {
370 std::cerr << "N/A\n";
371 }
372 std::cerr << "=======================================================\n\n";
373}
374
375void InternedString::resetStats() {
376 // Currently no additional counters to reset beyond what's in the pool
377 // This is a placeholder for future expansion (e.g., hit/miss counters)
378}
379
380#endif // ACAV_ENABLE_STRING_STATS
381
382} // namespace acav
Memory-efficient immutable string with automatic deduplication.
Immutable string with automatic deduplication via global pool.
static void displayPool()
Display all strings in pool (for debugging).
std::size_t refCount() const noexcept
Get reference count for this string.
std::size_t size() const noexcept
Get string length.
bool empty() const noexcept
Check if the string is empty.
const std::string & str() const
Get the underlying string value.
static std::size_t poolSize()
Get current pool size (for debugging).
void clear()
Clear the string (sets to empty interned string).