ascii-chat 0.6.0
Real-time terminal-based video chat with ASCII art conversion
Loading...
Searching...
No Matches
defer/tool.cpp
Go to the documentation of this file.
1#include <cstddef>
2#include <filesystem>
3#include <fstream>
4#include <mutex>
5#include <optional>
6#include <set>
7#include <string>
8#include <unordered_map>
9#include <unordered_set>
10#include <vector>
11
12#include "clang/AST/ASTConsumer.h"
13#include "clang/AST/ASTContext.h"
14#include "clang/AST/RecursiveASTVisitor.h"
15#include "clang/Frontend/CompilerInstance.h"
16#include "clang/Lex/Lexer.h"
17#include "clang/Lex/Preprocessor.h"
18#include "clang/Rewrite/Core/Rewriter.h"
19#include "clang/Driver/Driver.h"
20#include "clang/Tooling/ArgumentsAdjusters.h"
21#include "clang/Tooling/CommonOptionsParser.h"
22#include "clang/Tooling/Tooling.h"
23#include "llvm/Support/CommandLine.h"
24#include "llvm/Support/FileSystem.h"
25#include "llvm/Support/InitLLVM.h"
26#include "llvm/Support/Path.h"
27#include "llvm/Support/raw_ostream.h"
28
29using namespace llvm;
30using namespace clang;
31
32namespace fs = std::filesystem;
33
34std::mutex &outputRegistryMutex() {
35 static std::mutex mutex;
36 return mutex;
37}
38
39std::unordered_set<std::string> &outputRegistry() {
40 static std::unordered_set<std::string> registry;
41 return registry;
42}
43
44bool registerOutputPath(const std::string &path) {
45 std::lock_guard<std::mutex> guard(outputRegistryMutex());
46 auto &registry = outputRegistry();
47 bool inserted = registry.insert(path).second;
48 return inserted;
49}
50
51void unregisterOutputPath(const std::string &path) {
52 std::lock_guard<std::mutex> guard(outputRegistryMutex());
53 outputRegistry().erase(path);
54}
55
56// Command line options
57static cl::OptionCategory ToolCategory("ascii-defer transformation options");
58static cl::extrahelp CommonHelp(tooling::CommonOptionsParser::HelpMessage);
59static cl::extrahelp MoreHelp("\nDefer transformation tool for ascii-chat\n");
60
61static cl::opt<std::string> OutputDirectoryOption("output-dir",
62 cl::desc("Directory where transformed sources will be written"),
63 cl::value_desc("path"), cl::Required, cl::cat(ToolCategory));
64
65static cl::opt<std::string>
66 InputRootOption("input-root", cl::desc("Root directory of original sources (used to compute relative paths)"),
67 cl::value_desc("path"), cl::init(""), cl::cat(ToolCategory));
68
69static cl::opt<std::string> BuildPath("p", cl::desc("Build path (directory containing compile_commands.json)"),
70 cl::Optional, cl::cat(ToolCategory));
71
72static cl::list<std::string> SourcePaths(cl::Positional, cl::desc("<source0> [... <sourceN>]"), cl::cat(ToolCategory));
73
74namespace {
75
76// Structure to track a block (compound statement) that contains defers
77struct BlockScope {
78 CompoundStmt *stmt = nullptr;
79 unsigned scopeId = 0;
80 unsigned depth = 0; // Nesting depth (0 = function body)
81 bool hasDefers = false;
82 bool endsWithReturn = false; // True if block's last statement is a return
83 SourceLocation startLoc; // After opening brace
84 SourceLocation endLoc; // Before closing brace
85};
86
87// Structure to track defer calls within a function
88struct DeferCall {
89 SourceLocation location;
90 SourceLocation endLocation; // End of the defer statement (after semicolon)
91 unsigned fileOffset; // File offset of the defer statement
92 std::string expression; // The code to execute (e.g., "fclose(f)" or "{ cleanup(); }")
93 unsigned scopeId; // Which scope this defer belongs to
94};
95
96// Structure to track return statements and their active scopes
97struct ReturnInfo {
98 SourceLocation location;
99 unsigned fileOffset; // File offset of the return statement
100 std::vector<unsigned> activeScopeIds; // Scopes that are active at this return
101};
102
103// Structure to track function transformation state
104struct FunctionTransformState {
105 FunctionDecl *funcDecl = nullptr;
106 std::vector<DeferCall> deferCalls;
107 std::vector<ReturnInfo> returnInfos;
108 std::map<unsigned, BlockScope> blockScopes; // scopeId -> BlockScope
109 std::vector<unsigned> currentScopeStack; // Stack of active scope IDs during traversal
110 bool needsTransformation = false;
111 unsigned nextScopeId = 0;
112};
113
114class DeferVisitor : public RecursiveASTVisitor<DeferVisitor> {
115public:
116 DeferVisitor(ASTContext &context, Rewriter &rewriter, const fs::path &outputDir, const fs::path &inputRoot)
117 : context_(context), rewriter_(rewriter), outputDir_(outputDir), inputRoot_(inputRoot) {}
118
119 bool TraverseFunctionDecl(FunctionDecl *funcDecl) {
120 if (!funcDecl || funcDecl->isImplicit()) {
121 return RecursiveASTVisitor<DeferVisitor>::TraverseFunctionDecl(funcDecl);
122 }
123
124 SourceManager &sourceManager = context_.getSourceManager();
125 SourceLocation location = funcDecl->getLocation();
126 location = sourceManager.getExpansionLoc(location);
127 if (!location.isValid() || !sourceManager.isWrittenInMainFile(location)) {
128 return RecursiveASTVisitor<DeferVisitor>::TraverseFunctionDecl(funcDecl);
129 }
130
131 // Start tracking this function
132 currentFunction_ = FunctionTransformState();
133 currentFunction_.funcDecl = funcDecl;
134
135 bool result = RecursiveASTVisitor<DeferVisitor>::TraverseFunctionDecl(funcDecl);
136
137 // Transform function if it contains defer calls
138 if (currentFunction_.needsTransformation && !currentFunction_.deferCalls.empty()) {
139 transformFunction(currentFunction_);
140 }
141
142 currentFunction_ = FunctionTransformState();
143 return result;
144 }
145
146 bool TraverseCompoundStmt(CompoundStmt *compoundStmt) {
147 if (!compoundStmt || !currentFunction_.funcDecl) {
148 return RecursiveASTVisitor<DeferVisitor>::TraverseCompoundStmt(compoundStmt);
149 }
150
151 SourceManager &sourceManager = context_.getSourceManager();
152 SourceLocation lbracLoc = compoundStmt->getLBracLoc();
153 if (!lbracLoc.isValid() || !sourceManager.isWrittenInMainFile(lbracLoc)) {
154 return RecursiveASTVisitor<DeferVisitor>::TraverseCompoundStmt(compoundStmt);
155 }
156
157 // Create a new scope for this block
158 unsigned scopeId = currentFunction_.nextScopeId++;
159 unsigned depth = currentFunction_.currentScopeStack.size();
160
161 BlockScope blockScope;
162 blockScope.stmt = compoundStmt;
163 blockScope.scopeId = scopeId;
164 blockScope.depth = depth;
165 blockScope.hasDefers = false;
166 blockScope.endsWithReturn = false;
167 blockScope.startLoc = compoundStmt->getLBracLoc().getLocWithOffset(1);
168 blockScope.endLoc = compoundStmt->getRBracLoc();
169
170 // Check if block ends with a return statement
171 if (!compoundStmt->body_empty()) {
172 Stmt *lastStmt = compoundStmt->body_back();
173 if (isa<ReturnStmt>(lastStmt)) {
174 blockScope.endsWithReturn = true;
175 }
176 }
177
178 currentFunction_.blockScopes[scopeId] = blockScope;
179 currentFunction_.currentScopeStack.push_back(scopeId);
180
181 // Traverse children
182 bool result = RecursiveASTVisitor<DeferVisitor>::TraverseCompoundStmt(compoundStmt);
183
184 // Pop the scope
185 currentFunction_.currentScopeStack.pop_back();
186
187 return result;
188 }
189
190 bool TraverseStmt(Stmt *stmt) {
191 if (!stmt || !currentFunction_.funcDecl) {
192 return RecursiveASTVisitor<DeferVisitor>::TraverseStmt(stmt);
193 }
194
195 // Skip container statements that may contain defers in nested blocks.
196 // We DON'T skip DoStmt because defer() macro expands to do { ... } while(0).
197 // We DO skip IfStmt/ForStmt/WhileStmt/SwitchStmt because their source text
198 // includes child statements with defers that should be tracked at inner scopes.
199
200 if (isa<CompoundStmt>(stmt) || isa<IfStmt>(stmt) || isa<ForStmt>(stmt) || isa<WhileStmt>(stmt) ||
201 isa<SwitchStmt>(stmt)) {
202 return RecursiveASTVisitor<DeferVisitor>::TraverseStmt(stmt);
203 }
204
205 SourceManager &sourceManager = context_.getSourceManager();
206 SourceLocation stmtLoc = stmt->getBeginLoc();
207
208 // For macro-expanded statements, check the expansion location
209 SourceLocation checkLoc = stmtLoc;
210 if (stmtLoc.isMacroID()) {
211 checkLoc = sourceManager.getExpansionLoc(stmtLoc);
212 }
213
214 if (checkLoc.isValid() && sourceManager.isWrittenInMainFile(checkLoc)) {
215 CharSourceRange range;
216 bool isMacro = stmtLoc.isMacroID();
217
218 if (isMacro) {
219 // For macro-expanded statements, get the full macro call range
220 // This includes the macro name AND arguments
221 CharSourceRange macroRange = sourceManager.getImmediateExpansionRange(stmtLoc);
222 range = macroRange;
223 } else {
224 SourceLocation begin = stmt->getBeginLoc();
225 SourceLocation end = stmt->getEndLoc();
226 if (!begin.isValid() || !end.isValid()) {
227 return RecursiveASTVisitor<DeferVisitor>::TraverseStmt(stmt);
228 }
229 range = CharSourceRange::getTokenRange(begin, end);
230 }
231
232 bool invalid = false;
233 StringRef stmtText = Lexer::getSourceText(range, sourceManager, context_.getLangOpts(), &invalid);
234
235 // Get begin location for defer location calculation
236 SourceLocation begin = isMacro ? range.getBegin() : stmt->getBeginLoc();
237
238 // Only process defer() for DoStmt (or non-macro statements)
239 // This avoids processing the same defer multiple times for child nodes
240 bool shouldProcess = !isMacro || isa<DoStmt>(stmt);
241
242 if (shouldProcess && !invalid && stmtText.contains("defer(")) {
243 // Found a defer() call - extract it
244 size_t deferPos = stmtText.find("defer(");
245 if (deferPos != StringRef::npos) {
246 // Find matching closing parenthesis
247 size_t openParen = deferPos + 5; // after "defer"
248 size_t closeParen = findMatchingParen(stmtText, openParen);
249
250 if (closeParen != StringRef::npos) {
251 // Extract the expression inside defer(...)
252 StringRef expression = stmtText.substr(openParen + 1, closeParen - openParen - 1);
253
254 // Calculate the actual source location of "defer" in the file
255 SourceLocation deferLoc = begin.getLocWithOffset(deferPos);
256
257 // Calculate the end location (after the closing paren, we'll find semicolon later)
258 SourceLocation deferEndLoc = begin.getLocWithOffset(closeParen + 1);
259
260 // Get the current scope ID (innermost scope)
261 unsigned currentScopeId = 0;
262 if (!currentFunction_.currentScopeStack.empty()) {
263 currentScopeId = currentFunction_.currentScopeStack.back();
264 }
265
266 // Store the expression as-is - we'll inline it directly at exit points
267 std::string exprStr = expression.str();
268
269 // Trim leading/trailing whitespace
270 size_t firstNonSpace = exprStr.find_first_not_of(" \t\n\r");
271 size_t lastNonSpace = exprStr.find_last_not_of(" \t\n\r");
272 if (firstNonSpace != std::string::npos && lastNonSpace != std::string::npos) {
273 exprStr = exprStr.substr(firstNonSpace, lastNonSpace - firstNonSpace + 1);
274 }
275
276 DeferCall deferCall;
277 deferCall.location = deferLoc;
278 deferCall.endLocation = deferEndLoc;
279 deferCall.fileOffset = sourceManager.getFileOffset(deferLoc);
280 deferCall.expression = exprStr;
281 deferCall.scopeId = currentScopeId;
282
283 // Mark the scope as having defers
284 if (currentFunction_.blockScopes.count(currentScopeId)) {
285 currentFunction_.blockScopes[currentScopeId].hasDefers = true;
286 }
287
288 currentFunction_.deferCalls.push_back(deferCall);
289 currentFunction_.needsTransformation = true;
290 }
291 }
292 }
293 }
294
295 return RecursiveASTVisitor<DeferVisitor>::TraverseStmt(stmt);
296 }
297
298 bool TraverseReturnStmt(ReturnStmt *returnStmt) {
299 if (returnStmt && currentFunction_.funcDecl) {
300 SourceManager &sourceManager = context_.getSourceManager();
301 SourceLocation location = returnStmt->getReturnLoc();
302 if (location.isValid()) {
303 SourceLocation expansionLocation = sourceManager.getExpansionLoc(location);
304 if (expansionLocation.isValid() && sourceManager.isWrittenInMainFile(expansionLocation)) {
305 // Record return with its active scopes (copy the current scope stack)
306 ReturnInfo returnInfo;
307 returnInfo.location = expansionLocation;
308 returnInfo.fileOffset = sourceManager.getFileOffset(expansionLocation);
309 returnInfo.activeScopeIds = currentFunction_.currentScopeStack;
310 currentFunction_.returnInfos.push_back(returnInfo);
311 }
312 }
313 }
314 return RecursiveASTVisitor<DeferVisitor>::TraverseReturnStmt(returnStmt);
315 }
316
317 std::string makeRelativePath(const fs::path &absolutePath) const {
318 if (inputRoot_.empty()) {
319 return absolutePath.generic_string();
320 }
321
322 std::error_code errorCode;
323 fs::path relative = fs::relative(absolutePath, inputRoot_, errorCode);
324 if (errorCode) {
325 return absolutePath.generic_string();
326 }
327 return relative.generic_string();
328 }
329
330private:
331 size_t findMatchingParen(StringRef text, size_t openPos) const {
332 if (openPos >= text.size() || text[openPos] != '(') {
333 return StringRef::npos;
334 }
335
336 int depth = 1;
337 for (size_t i = openPos + 1; i < text.size(); i++) {
338 if (text[i] == '(') {
339 depth++;
340 } else if (text[i] == ')') {
341 depth--;
342 if (depth == 0) {
343 return i;
344 }
345 }
346 }
347
348 return StringRef::npos;
349 }
350
351 // Get defers for a specific scope in LIFO order (last registered first)
352 std::vector<const DeferCall *> getDefersForScope(unsigned scopeId, const std::vector<DeferCall> &deferCalls) const {
353 std::vector<const DeferCall *> result;
354 for (const auto &dc : deferCalls) {
355 if (dc.scopeId == scopeId) {
356 result.push_back(&dc);
357 }
358 }
359 // Reverse for LIFO order
360 std::reverse(result.begin(), result.end());
361 return result;
362 }
363
364 // Format a defer expression for inline insertion
365 std::string formatDeferExpression(const std::string &expr) const {
366 // Check if it's a block-style defer (starts with '{')
367 if (!expr.empty() && expr[0] == '{') {
368 // Block defer - execute the block directly
369 return "do " + expr + " while(0); ";
370 } else {
371 // Function call defer - just add semicolon if needed
372 std::string result = expr;
373 // Trim trailing semicolons/whitespace
374 while (!result.empty() && (result.back() == ';' || result.back() == ' ' || result.back() == '\t' ||
375 result.back() == '\n' || result.back() == '\r')) {
376 result.pop_back();
377 }
378 return result + "; ";
379 }
380 }
381
382 // Generate inline cleanup code for all active scopes at a return statement (LIFO order)
383 // Only includes defers that were declared BEFORE the return statement
384 std::string generateInlineCleanupForReturn(const ReturnInfo &returnInfo, const FunctionTransformState &state) const {
385 std::string code;
386 // Process scopes from innermost to outermost
387 for (auto scopeIt = returnInfo.activeScopeIds.rbegin(); scopeIt != returnInfo.activeScopeIds.rend(); ++scopeIt) {
388 unsigned scopeId = *scopeIt;
389 auto blockIt = state.blockScopes.find(scopeId);
390 if (blockIt == state.blockScopes.end() || !blockIt->second.hasDefers) {
391 continue;
392 }
393 // Get defers for this scope in LIFO order, but only those declared before this return
394 auto defers = getDefersForScopeBeforeOffset(scopeId, state.deferCalls, returnInfo.fileOffset);
395 for (const auto *dc : defers) {
396 code += formatDeferExpression(dc->expression);
397 }
398 }
399 return code;
400 }
401
402 // Get defers for a specific scope that were declared before a given file offset (in LIFO order)
403 std::vector<const DeferCall *>
404 getDefersForScopeBeforeOffset(unsigned scopeId, const std::vector<DeferCall> &deferCalls, unsigned maxOffset) const {
405 std::vector<const DeferCall *> result;
406 for (const auto &dc : deferCalls) {
407 if (dc.scopeId == scopeId && dc.fileOffset < maxOffset) {
408 result.push_back(&dc);
409 }
410 }
411 // Reverse for LIFO order
412 std::reverse(result.begin(), result.end());
413 return result;
414 }
415
416 // Generate inline cleanup code for end of a block (LIFO order)
417 std::string generateInlineCleanupAtBlockEnd(unsigned scopeId, const FunctionTransformState &state) const {
418 std::string code;
419 auto defers = getDefersForScope(scopeId, state.deferCalls);
420 for (const auto *dc : defers) {
421 code += " " + formatDeferExpression(dc->expression) + "\n";
422 }
423 return code;
424 }
425
426 void transformFunction(FunctionTransformState &state) {
427 if (!state.funcDecl || state.deferCalls.empty()) {
428 return;
429 }
430
431 Stmt *body = state.funcDecl->getBody();
432 if (!body) {
433 return;
434 }
435
436 CompoundStmt *compoundBody = dyn_cast<CompoundStmt>(body);
437 if (!compoundBody) {
438 return;
439 }
440
441 // Step 1: Remove all defer() statements
442 for (const DeferCall &deferCall : state.deferCalls) {
443 removeDeferStatement(deferCall);
444 }
445
446 // Step 2: Insert cleanup before each return statement (inline the deferred code)
447 for (const ReturnInfo &returnInfo : state.returnInfos) {
448 std::string cleanup = generateInlineCleanupForReturn(returnInfo, state);
449 if (!cleanup.empty()) {
450 rewriter_.InsertText(returnInfo.location, cleanup, true, true);
451 }
452 }
453
454 // Step 3: Insert cleanup at the end of each block that has defers
455 // Skip blocks that end with a return statement (cleanup already inserted before the return)
456 for (const auto &pair : state.blockScopes) {
457 const BlockScope &blockScope = pair.second;
458 if (blockScope.hasDefers && blockScope.endLoc.isValid() && !blockScope.endsWithReturn) {
459 std::string cleanup = generateInlineCleanupAtBlockEnd(blockScope.scopeId, state);
460 if (!cleanup.empty()) {
461 rewriter_.InsertText(blockScope.endLoc, cleanup, true, true);
462 }
463 }
464 }
465 }
466
467 void removeDeferStatement(const DeferCall &deferCall) {
468 SourceManager &sourceManager = context_.getSourceManager();
469
470 SourceLocation macroLoc = deferCall.location;
471 if (!macroLoc.isValid()) {
472 return;
473 }
474
475 // Get the range covering "defer(expression);"
476 FileID fileId = sourceManager.getFileID(macroLoc);
477 bool invalid = false;
478 StringRef fileData = sourceManager.getBufferData(fileId, &invalid);
479 if (invalid) {
480 return;
481 }
482
483 unsigned offset = sourceManager.getFileOffset(macroLoc);
484
485 // Find "defer(" starting at offset
486 size_t deferStart = fileData.find("defer(", offset);
487 if (deferStart == StringRef::npos || deferStart != offset) {
488 return; // Not at the expected position
489 }
490
491 // Find matching closing paren for defer(...)
492 size_t openParen = deferStart + 5; // Position of '(' in defer(
493 size_t closeParen = findMatchingParenInFile(fileData, openParen);
494 if (closeParen == StringRef::npos) {
495 return;
496 }
497
498 // Find the semicolon AFTER the closing paren
499 size_t semicolonPos = closeParen + 1;
500 while (semicolonPos < fileData.size() && (fileData[semicolonPos] == ' ' || fileData[semicolonPos] == '\t' ||
501 fileData[semicolonPos] == '\n' || fileData[semicolonPos] == '\r')) {
502 semicolonPos++;
503 }
504 if (semicolonPos >= fileData.size() || fileData[semicolonPos] != ';') {
505 return; // No semicolon found after closing paren
506 }
507
508 SourceLocation semicolonLoc = macroLoc.getLocWithOffset(semicolonPos - offset);
509 CharSourceRange deferRange = CharSourceRange::getCharRange(macroLoc, semicolonLoc.getLocWithOffset(1));
510
511 // Replace with a comment noting the defer was moved
512 // For block defers, just note it's a block defer to avoid multiline comment issues
513 std::string exprSummary = deferCall.expression;
514 bool isBlockDefer = !exprSummary.empty() && exprSummary[0] == '{';
515 if (isBlockDefer) {
516 exprSummary = "{...}"; // Summarize block defers
517 }
518 std::string comment = "/* defer: " + exprSummary + " (moved to scope exit) */";
519 rewriter_.ReplaceText(deferRange, comment);
520 }
521
522 size_t findMatchingParenInFile(StringRef fileData, size_t openPos) const {
523 if (openPos >= fileData.size() || fileData[openPos] != '(') {
524 return StringRef::npos;
525 }
526
527 int depth = 1;
528 size_t i = openPos + 1;
529 while (i < fileData.size()) {
530 char c = fileData[i];
531 if (c == '(') {
532 depth++;
533 i++;
534 } else if (c == ')') {
535 depth--;
536 if (depth == 0) {
537 return i;
538 }
539 i++;
540 } else if (c == '"') {
541 // Skip string literals to avoid counting parens inside strings
542 i++;
543 while (i < fileData.size() && fileData[i] != '"') {
544 if (fileData[i] == '\\' && i + 1 < fileData.size()) {
545 i++; // Skip escaped character
546 }
547 i++;
548 }
549 if (i < fileData.size()) {
550 i++; // Skip closing quote
551 }
552 } else if (c == '\'') {
553 // Skip character literals
554 i++;
555 while (i < fileData.size() && fileData[i] != '\'') {
556 if (fileData[i] == '\\' && i + 1 < fileData.size()) {
557 i++; // Skip escaped character
558 }
559 i++;
560 }
561 if (i < fileData.size()) {
562 i++; // Skip closing quote
563 }
564 } else {
565 i++;
566 }
567 }
568
569 return StringRef::npos;
570 }
571
572 ASTContext &context_;
573 Rewriter &rewriter_;
574 fs::path outputDir_;
575 fs::path inputRoot_;
576 FunctionTransformState currentFunction_;
577};
578
579class DeferASTConsumer : public ASTConsumer {
580public:
581 explicit DeferASTConsumer(DeferVisitor &visitor) : visitor_(visitor) {}
582
583 void HandleTranslationUnit(ASTContext &context) override {
584 visitor_.TraverseDecl(context.getTranslationUnitDecl());
585 }
586
587private:
588 DeferVisitor &visitor_;
589};
590
591class DeferFrontendAction : public ASTFrontendAction {
592public:
593 explicit DeferFrontendAction(const fs::path &outputDir, const fs::path &inputRoot)
594 : outputDir_(outputDir), inputRoot_(inputRoot) {
595 initializeProtectedDirectories();
596 }
597
598 void EndSourceFileAction() override {
599 SourceManager &sourceManager = rewriter_.getSourceMgr();
600 const FileEntry *fileEntry = sourceManager.getFileEntryForID(sourceManager.getMainFileID());
601 if (!fileEntry) {
602 return;
603 }
604
605 if (!visitor_) {
606 llvm::errs() << "Defer visitor not initialized; skipping file output\n";
607 hadWriteError_ = true;
608 return;
609 }
610
611 const StringRef filePathRef = fileEntry->tryGetRealPathName();
612 if (filePathRef.empty()) {
613 llvm::errs() << "Unable to resolve file path for transformed output\n";
614 return;
615 }
616
617 const fs::path originalPath = fs::path(filePathRef.str());
618 const std::string relativePath = visitor_->makeRelativePath(originalPath);
619 fs::path destinationPath = outputDir_ / relativePath;
620
621 // SAFETY CHECK: Never overwrite source files
622 std::error_code ec;
623 fs::path canonicalOriginal = fs::canonical(originalPath, ec);
624 if (!ec) {
625 fs::path canonicalDest = fs::weakly_canonical(destinationPath, ec);
626 if (!ec && canonicalOriginal == canonicalDest) {
627 llvm::errs() << "ERROR: Output path is the same as source file! Refusing to overwrite source.\n";
628 llvm::errs() << " Source: " << canonicalOriginal.string() << "\n";
629 llvm::errs() << " Output: " << canonicalDest.string() << "\n";
630 hadWriteError_ = true;
631 return;
632 }
633 }
634
635 // outputDir_ is already absolute (made absolute in main()), so destinationPath should be too
636
637 // Use generic_string() for forward slashes on all platforms
638 const std::string destinationString = destinationPath.generic_string();
639
640 if (!registerOutputPath(destinationString)) {
641 return;
642 }
643
644 // Check file existence using LLVM's exists() inline function
645 bool fileExists = llvm::sys::fs::exists(llvm::Twine(destinationString));
646
647 if (fileExists && isInProtectedSourceTree(destinationPath)) {
648 llvm::errs() << "Refusing to overwrite existing file in protected source tree: " << destinationString << "\n";
649 unregisterOutputPath(destinationString);
650 hadWriteError_ = true;
651 return;
652 }
653
654 const fs::path parent = destinationPath.parent_path();
655 std::error_code directoryError;
656 fs::create_directories(parent, directoryError);
657 if (directoryError) {
658 llvm::errs() << "Failed to create output directory: " << parent.string() << " - " << directoryError.message()
659 << "\n";
660 unregisterOutputPath(destinationString);
661 hadWriteError_ = true;
662 return;
663 }
664
665 std::string rewrittenContents;
666 if (const RewriteBuffer *buffer = rewriter_.getRewriteBufferFor(sourceManager.getMainFileID())) {
667 rewrittenContents.assign(buffer->begin(), buffer->end());
668
669 } else {
670 rewrittenContents = sourceManager.getBufferData(sourceManager.getMainFileID()).str();
671 }
672
673 std::error_code fileError;
674 llvm::raw_fd_ostream outputStream(destinationPath.string(), fileError, llvm::sys::fs::OF_Text);
675 if (fileError) {
676 llvm::errs() << "Failed to open output file: " << destinationString << " - " << fileError.message() << "\n";
677 unregisterOutputPath(destinationString);
678 hadWriteError_ = true;
679 return;
680 }
681
682 outputStream << rewrittenContents;
683 outputStream.close();
684 if (outputStream.has_error()) {
685 llvm::errs() << "Error while writing transformed file: " << destinationString << "\n";
686 unregisterOutputPath(destinationString);
687 hadWriteError_ = true;
688 }
689 }
690
691 bool hadWriteError() const {
692 return hadWriteError_;
693 }
694
695protected:
696 std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &compiler, StringRef) override {
697 rewriter_.setSourceMgr(compiler.getSourceManager(), compiler.getLangOpts());
698 visitor_ = std::make_unique<DeferVisitor>(compiler.getASTContext(), rewriter_, outputDir_, inputRoot_);
699 return std::make_unique<DeferASTConsumer>(*visitor_);
700 }
701
702private:
703 Rewriter rewriter_;
704 fs::path outputDir_;
705 fs::path inputRoot_;
706 fs::path inputRootCanonical_;
707 fs::path protectedSrcDir_;
708 fs::path protectedLibDir_;
709 std::unique_ptr<DeferVisitor> visitor_;
710 bool hadWriteError_ = false;
711
712 void initializeProtectedDirectories() {
713 std::error_code ec;
714 fs::path normalizedRoot = inputRoot_;
715
716 if (normalizedRoot.empty()) {
717 normalizedRoot = fs::current_path(ec);
718 ec.clear();
719 }
720
721 if (!normalizedRoot.is_absolute()) {
722 normalizedRoot = fs::absolute(normalizedRoot, ec);
723 ec.clear();
724 }
725
726 inputRootCanonical_ = fs::weakly_canonical(normalizedRoot, ec);
727 if (ec) {
728 inputRootCanonical_.clear();
729 return;
730 }
731
732 protectedSrcDir_ = inputRootCanonical_ / "src";
733 protectedLibDir_ = inputRootCanonical_ / "lib";
734 }
735
736 static bool pathStartsWith(const fs::path &path, const fs::path &prefix) {
737 if (prefix.empty()) {
738 return false;
739 }
740
741 auto pathIter = path.begin();
742 for (auto prefixIter = prefix.begin(); prefixIter != prefix.end(); ++prefixIter) {
743 if (pathIter == path.end() || *pathIter != *prefixIter) {
744 return false;
745 }
746 ++pathIter;
747 }
748
749 return true;
750 }
751
752 bool isInProtectedSourceTree(const fs::path &path) const {
753 if (inputRootCanonical_.empty()) {
754 return false;
755 }
756
757 std::error_code ec;
758 fs::path canonicalPath = fs::weakly_canonical(path, ec);
759 if (ec) {
760 return false;
761 }
762
763 return pathStartsWith(canonicalPath, protectedSrcDir_) || pathStartsWith(canonicalPath, protectedLibDir_);
764 }
765};
766
767class DeferActionFactory : public tooling::FrontendActionFactory {
768public:
769 DeferActionFactory(const fs::path &outputDir, const fs::path &inputRoot)
770 : outputDir_(outputDir), inputRoot_(inputRoot) {}
771
772 std::unique_ptr<FrontendAction> create() {
773 return std::make_unique<DeferFrontendAction>(outputDir_, inputRoot_);
774 }
775
776private:
777 fs::path outputDir_;
778 fs::path inputRoot_;
779};
780
781} // namespace
782
783// Store the original CWD before any tool changes it
784static fs::path g_originalCwd;
785
786int main(int argc, const char **argv) {
787 InitLLVM InitLLVM(argc, argv);
788
789 // Capture CWD before anything else can change it
790 g_originalCwd = fs::current_path();
791
792 // Hide all the LLVM internal options that aren't relevant to our tool
793 cl::HideUnrelatedOptions(ToolCategory);
794
795 cl::ParseCommandLineOptions(argc, argv, "ascii-defer transformation tool\n");
796
797 fs::path outputDir = fs::path(OutputDirectoryOption.getValue());
798 // Make output directory absolute relative to original CWD
799 if (!outputDir.is_absolute()) {
800 outputDir = g_originalCwd / outputDir;
801 }
802 fs::path inputRoot;
803 if (!InputRootOption.getValue().empty()) {
804 inputRoot = fs::path(InputRootOption.getValue());
805 } else {
806 inputRoot = fs::current_path();
807 }
808
809 // Make input root absolute for reliable path computation
810 if (!inputRoot.is_absolute()) {
811 std::error_code ec;
812 inputRoot = fs::absolute(inputRoot, ec);
813 if (ec) {
814 llvm::errs() << "Failed to resolve input root path: " << ec.message() << "\n";
815 return 1;
816 }
817 }
818
819 std::vector<std::string> sourcePaths;
820 for (const auto &path : SourcePaths) {
821 if (!path.empty()) {
822 sourcePaths.push_back(path);
823 }
824 }
825
826 if (sourcePaths.empty()) {
827 llvm::errs() << "No translation units specified for transformation. Provide positional source paths.\n";
828 return 1;
829 }
830
831 if (fs::exists(outputDir)) {
832 if (!fs::is_directory(outputDir)) {
833 llvm::errs() << "Output path exists and is not a directory: " << outputDir.c_str() << "\n";
834 return 1;
835 }
836 } else {
837 std::error_code errorCode;
838 fs::create_directories(outputDir, errorCode);
839 if (errorCode) {
840 llvm::errs() << "Failed to create output directory: " << outputDir.c_str() << " - " << errorCode.message()
841 << "\n";
842 return 1;
843 }
844 }
845
846 // Load compilation database
847 std::string buildPath = BuildPath.getValue();
848 if (buildPath.empty()) {
849 buildPath = ".";
850 }
851 std::string errorMessage;
852 std::unique_ptr<tooling::CompilationDatabase> compilations =
853 tooling::CompilationDatabase::loadFromDirectory(buildPath, errorMessage);
854 if (!compilations) {
855 llvm::errs() << "Error loading compilation database from '" << buildPath << "': " << errorMessage << "\n";
856 return 1;
857 }
858
859 tooling::ClangTool tool(*compilations, sourcePaths);
860
861 // Build the list of arguments to prepend for system header resolution
862 // LibTooling uses CC1 mode internally which has different include path handling than
863 // the clang driver. We use -Xclang to pass CC1-level flags that properly configure
864 // system include paths for LibTooling's CompilerInvocation.
865 std::vector<std::string> prependArgs;
866
867 // Try to find clang resource directory at runtime
868 // Priority: 1) CLANG_RESOURCE_DIR compile-time path, 2) Runtime detection via common paths
869 std::string resourceDir;
870
871#ifdef CLANG_RESOURCE_DIR
872 if (llvm::sys::fs::exists(CLANG_RESOURCE_DIR)) {
873 resourceDir = CLANG_RESOURCE_DIR;
874 llvm::errs() << "Using embedded clang resource directory: " << resourceDir << "\n";
875 } else {
876 llvm::errs() << "Embedded clang resource directory not found: " << CLANG_RESOURCE_DIR << "\n";
877 }
878#endif
879
880 // Runtime detection if embedded path doesn't work
881 if (resourceDir.empty()) {
882 // Common locations for clang resource directories
883 std::vector<std::string> searchPaths;
884
885#ifdef __APPLE__
886 // Homebrew LLVM on Apple Silicon
887 searchPaths.push_back("/opt/homebrew/opt/llvm/lib/clang");
888 // Homebrew LLVM on Intel Mac
889 searchPaths.push_back("/usr/local/opt/llvm/lib/clang");
890 // Xcode's clang
891 searchPaths.push_back("/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang");
892 // CommandLineTools clang
893 searchPaths.push_back("/Library/Developer/CommandLineTools/usr/lib/clang");
894#endif
895#ifdef __linux__
896 // System LLVM installations
897 searchPaths.push_back("/usr/lib/llvm-22/lib/clang");
898 searchPaths.push_back("/usr/lib/llvm-21/lib/clang");
899 searchPaths.push_back("/usr/lib/llvm-20/lib/clang");
900 searchPaths.push_back("/usr/lib/clang");
901 searchPaths.push_back("/usr/local/lib/clang");
902#endif
903 // Universal fallback
904 searchPaths.push_back("/usr/local/lib/clang");
905
906 for (const auto& basePath : searchPaths) {
907 if (!llvm::sys::fs::exists(basePath)) {
908 continue;
909 }
910
911 // Find the highest version subdirectory
912 std::error_code ec;
913 std::string bestVersion;
914 int bestMajor = 0;
915
916 for (llvm::sys::fs::directory_iterator dir(basePath, ec), dirEnd;
917 !ec && dir != dirEnd; dir.increment(ec)) {
918 std::string name = llvm::sys::path::filename(dir->path()).str();
919 // Parse version number (e.g., "22", "21.1.0", "21")
920 int major = 0;
921 if (sscanf(name.c_str(), "%d", &major) == 1 && major > bestMajor) {
922 bestMajor = major;
923 bestVersion = dir->path();
924 }
925 }
926
927 if (!bestVersion.empty()) {
928 resourceDir = bestVersion;
929 llvm::errs() << "Found clang resource directory at runtime: " << resourceDir << "\n";
930 break;
931 }
932 }
933
934 if (resourceDir.empty()) {
935 llvm::errs() << "Warning: Could not find clang resource directory\n";
936 }
937 }
938
939 if (!resourceDir.empty()) {
940 prependArgs.push_back(std::string("-resource-dir=") + resourceDir);
941 }
942
943 // Add target triple - LibTooling needs this to validate architecture-specific flags
944 // Without a target, flags like -mavx2 cause "unsupported option for target ''" errors
945#ifdef __APPLE__
946 #ifdef __arm64__
947 prependArgs.push_back("-target");
948 prependArgs.push_back("arm64-apple-darwin");
949 llvm::errs() << "Using target: arm64-apple-darwin\n";
950 #else
951 prependArgs.push_back("-target");
952 prependArgs.push_back("x86_64-apple-darwin");
953 llvm::errs() << "Using target: x86_64-apple-darwin\n";
954 #endif
955#elif defined(__linux__)
956 #ifdef __aarch64__
957 prependArgs.push_back("-target");
958 prependArgs.push_back("aarch64-linux-gnu");
959 llvm::errs() << "Using target: aarch64-linux-gnu\n";
960 #else
961 prependArgs.push_back("-target");
962 prependArgs.push_back("x86_64-linux-gnu");
963 llvm::errs() << "Using target: x86_64-linux-gnu\n";
964 #endif
965#endif
966
967 // Override the sysroot for macOS. Homebrew's LLVM config file sets -isysroot
968 // to CommandLineTools SDK, but we strip that from compile_commands.json and
969 // set our own explicitly to ensure consistent behavior.
970 std::string selectedSDK;
971#ifdef __APPLE__
972 {
973 const char* sdkPaths[] = {
974 // Xcode SDK (preferred - most complete headers and frameworks)
975 "/Applications/Xcode.app/Contents/Developer/Platforms/"
976 "MacOSX.platform/Developer/SDKs/MacOSX.sdk",
977 // CommandLineTools SDK (fallback for users without Xcode)
978 "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk",
979 };
980
981 for (const char* sdk : sdkPaths) {
982 if (llvm::sys::fs::exists(sdk)) {
983 selectedSDK = sdk;
984 break;
985 }
986 }
987
988 if (!selectedSDK.empty()) {
989 prependArgs.push_back("-isysroot");
990 prependArgs.push_back(selectedSDK);
991 llvm::errs() << "Using macOS SDK: " << selectedSDK << "\n";
992 } else {
993 llvm::errs() << "Warning: No macOS SDK found, system headers may not be available\n";
994 }
995 }
996#endif
997
998 // Build list of system include paths to add as -isystem paths.
999 // LibTooling (cc1 mode) doesn't automatically add system include paths like
1000 // the clang driver does, so we add them explicitly:
1001 // 1. Clang's builtin headers (stdbool.h, stddef.h, etc.) - FIRST so they shadow SDK builtins
1002 // 2. SDK's usr/include (stdio.h, stdlib.h, etc.) - for system headers
1003 std::vector<std::string> appendArgs;
1004 if (!resourceDir.empty()) {
1005 std::string builtinInclude = resourceDir + "/include";
1006 if (llvm::sys::fs::exists(builtinInclude)) {
1007 appendArgs.push_back("-isystem");
1008 appendArgs.push_back(builtinInclude);
1009 llvm::errs() << "Added clang builtin -isystem: " << builtinInclude << "\n";
1010 }
1011 }
1012 // NOTE: We intentionally do NOT add SDK's usr/include to the -isystem path.
1013 // LibTooling on LLVM 21 has a bug where __has_include_next() evaluates even
1014 // in non-taken preprocessor branches and creates VFS entries that can't be opened.
1015 // The clang builtin stdbool.h has:
1016 // #if defined(__MVS__) && __has_include_next(<stdbool.h>)
1017 // Even though __MVS__ is not defined on macOS, LLVM 21's LibTooling still tries
1018 // to resolve __has_include_next(<stdbool.h>), looks in SDK/usr/include, and fails
1019 // because stdbool.h doesn't exist there (it's a builtin header).
1020 //
1021 // By not adding SDK/usr/include, the __has_include_next won't find any entry
1022 // and will return false without trying to open a non-existent file.
1023 // The -isysroot flag still provides access to SDK headers through framework paths.
1024
1025 // Single consolidated argument adjuster that:
1026 // 1. Preserves compiler path (first arg)
1027 // 2. Inserts prepend args immediately after compiler (-nostdlibinc, -resource-dir, -isysroot)
1028 // 3. Strips unnecessary flags from remaining args
1029 // 4. Adds system include paths as -isystem at the end (after project -I paths)
1030 // 5. Adds defer tool parsing define BEFORE the "--" separator
1031 std::string inputRootStr = inputRoot.string();
1032 auto consolidatedAdjuster = [prependArgs, appendArgs, inputRootStr](const tooling::CommandLineArguments &args, StringRef) {
1033 // Helper to check if a path is under the project root
1034 auto isProjectPath = [&inputRootStr](const std::string &path) -> bool {
1035 if (inputRootStr.empty()) return false;
1036 // Normalize both paths for comparison
1037 std::error_code ec;
1038 auto normalizedPath = fs::canonical(path, ec);
1039 if (ec) return false; // Path doesn't exist or can't be resolved
1040 auto normalizedRoot = fs::canonical(inputRootStr, ec);
1041 if (ec) return false;
1042 // Check if the path starts with the root
1043 std::string pathStr = normalizedPath.string();
1044 std::string rootStr = normalizedRoot.string();
1045 if (pathStr.find(rootStr) != 0) return false;
1046 // Exclude .deps-cache directory - these are cached dependencies that use
1047 // angled includes (<header.h>) and need -isystem, not -iquote
1048 if (pathStr.find("/.deps-cache/") != std::string::npos) return false;
1049 return true;
1050 };
1051 tooling::CommandLineArguments result;
1052
1053 if (args.empty()) {
1054 return result;
1055 }
1056
1057 // First: preserve the compiler path (first argument)
1058 result.push_back(args[0]);
1059
1060 // Second: add the prepend args right after the compiler
1061 for (const auto &arg : prependArgs) {
1062 result.push_back(arg);
1063 }
1064
1065 // Third: process remaining arguments, stripping unnecessary flags
1066 // IMPORTANT: Convert -I to -iquote for project include paths.
1067 // Clang include search order for <header.h>: -I paths -> -isystem paths -> system paths
1068 // Clang include search order for "header.h": -iquote paths -> -I paths -> -isystem paths -> system paths
1069 //
1070 // The problem: Project -I paths are searched BEFORE -isystem paths for <stdio.h>.
1071 // This causes LibTooling to look for <stdio.h> in the project's lib/ directory first.
1072 //
1073 // The fix: Convert project -I to -iquote. Then <stdio.h> skips project paths entirely
1074 // and finds system headers in -isystem paths instead.
1075 //
1076 // ALSO: Collect all -isystem paths and reorder them so clang builtins come FIRST.
1077 // LibTooling on LLVM 21 has a VFS bug where it creates phantom entries for headers
1078 // in -isystem directories even when the header doesn't exist there.
1079 std::vector<std::string> collectedIsystemPaths;
1080 bool foundSeparator = false;
1081 size_t separatorIndex = 0;
1082 for (size_t i = 1; i < args.size(); ++i) {
1083 const std::string &arg = args[i];
1084
1085 // When we hit "--", note its position and break
1086 if (arg == "--") {
1087 foundSeparator = true;
1088 separatorIndex = i;
1089 break;
1090 }
1091
1092 // Skip sanitizer flags
1093 if (arg.find("-fsanitize") != std::string::npos)
1094 continue;
1095 if (arg.find("-fno-sanitize") != std::string::npos)
1096 continue;
1097 // Skip debug info flags (not needed for AST parsing)
1098 if (arg == "-g" || arg == "-g2" || arg == "-g3")
1099 continue;
1100 if (arg == "-fno-eliminate-unused-debug-types")
1101 continue;
1102 if (arg == "-fno-inline")
1103 continue;
1104 // Strip -resource-dir flags and their arguments - we added our embedded path
1105 if (arg == "-resource-dir") {
1106 ++i;
1107 continue;
1108 }
1109 if (arg.find("-resource-dir=") == 0)
1110 continue;
1111 // Strip -isysroot flags and their arguments - we added our embedded SDK path
1112 if (arg == "-isysroot") {
1113 ++i;
1114 continue;
1115 }
1116 if (arg.find("-isysroot=") == 0 || (arg.find("-isysroot") == 0 && arg.length() > 9))
1117 continue;
1118
1119 // Collect -isystem paths instead of passing them through
1120 // We'll add them at the end in the correct order (clang builtins first)
1121 if (arg == "-isystem" && i + 1 < args.size()) {
1122 collectedIsystemPaths.push_back(args[++i]);
1123 continue;
1124 }
1125 if (arg.find("-isystem") == 0 && arg.length() > 8) {
1126 collectedIsystemPaths.push_back(arg.substr(8));
1127 continue;
1128 }
1129
1130 // Convert -I to -iquote for project include paths
1131 // Convert -I to -isystem for dependency paths (so they come after our clang builtins)
1132 // This prevents <stdbool.h> from being searched in dependency directories
1133 // before our clang builtin path
1134 if (arg == "-I" && i + 1 < args.size()) {
1135 // -I /path/to/dir (separate argument)
1136 const std::string &includePath = args[++i];
1137 if (isProjectPath(includePath)) {
1138 result.push_back("-iquote");
1139 result.push_back(includePath);
1140 } else {
1141 // Collect dependency -I paths to add as -isystem after our builtins
1142 collectedIsystemPaths.push_back(includePath);
1143 }
1144 continue;
1145 }
1146 if (arg.find("-I") == 0 && arg.length() > 2) {
1147 // -I/path/to/dir (combined form)
1148 std::string includePath = arg.substr(2);
1149 if (isProjectPath(includePath)) {
1150 result.push_back("-iquote");
1151 result.push_back(includePath);
1152 } else {
1153 // Collect dependency -I paths to add as -isystem after our builtins
1154 collectedIsystemPaths.push_back(includePath);
1155 }
1156 continue;
1157 }
1158
1159 result.push_back(arg);
1160 }
1161
1162 // Fourth: add system include paths in the correct order
1163 // Order matters for LibTooling on LLVM 21 - clang builtins MUST come first
1164 // to shadow any phantom VFS entries that might be created for other paths
1165 for (const auto &arg : appendArgs) {
1166 result.push_back(arg);
1167 }
1168 // Then add the collected -isystem paths from the compilation database
1169 for (const auto &path : collectedIsystemPaths) {
1170 result.push_back("-isystem");
1171 result.push_back(path);
1172 }
1173
1174 // Fifth: add the defer tool define and separator
1175 result.push_back("-DASCIICHAT_DEFER_TOOL_PARSING");
1176 if (foundSeparator) {
1177 result.push_back("--");
1178 // Copy any remaining args after "--"
1179 for (size_t i = separatorIndex + 1; i < args.size(); ++i) {
1180 result.push_back(args[i]);
1181 }
1182 }
1183
1184 return result;
1185 };
1186 tool.appendArgumentsAdjuster(consolidatedAdjuster);
1187
1188 // Debug: Print the final command line arguments
1189 tool.appendArgumentsAdjuster([](const tooling::CommandLineArguments &args, StringRef filename) {
1190 llvm::errs() << "Final command for " << filename << ":\n";
1191 for (const auto &arg : args) {
1192 llvm::errs() << " " << arg << "\n";
1193 }
1194 return args;
1195 });
1196
1197 DeferActionFactory actionFactory(outputDir, inputRoot);
1198 const int executionResult = tool.run(&actionFactory);
1199 if (executionResult != 0) {
1200 llvm::errs() << "Defer transformation failed with code " << executionResult << "\n";
1201 }
1202 return executionResult;
1203}
void unregisterOutputPath(const std::string &path)
std::mutex & outputRegistryMutex()
int main(int argc, const char **argv)
std::unordered_set< std::string > & outputRegistry()
bool registerOutputPath(const std::string &path)