Skip to content

Instantly share code, notes, and snippets.

@CodeBoy2006
Created April 4, 2025 01:09
Show Gist options
  • Select an option

  • Save CodeBoy2006/722c1d2b950ed35603d69e2ad503b798 to your computer and use it in GitHub Desktop.

Select an option

Save CodeBoy2006/722c1d2b950ed35603d69e2ad503b798 to your computer and use it in GitHub Desktop.
C++ Windows tool: Scans dir, outputs filtered tree & file contents (Markdown), copies to clipboard (for LLMs/docs).
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <filesystem> // Requires C++17
#include <sstream>
#include <stdexcept>
#include <algorithm>
#include <windows.h> // For Windows Clipboard API
// Add this line
using namespace std;
namespace fs = std::filesystem; // Keep the alias for filesystem for clarity
// --- Configuration ---
const vector<string> EXCLUDED_DIRS = {
"node_modules", ".git", ".svn", ".hg", "dist", "build",
"out", "target", "__pycache__", ".venv", "venv", "env",
".vscode", ".idea"
};
const vector<string> TEXT_EXTENSIONS = {
".txt", ".log", ".ini", ".conf", ".config", ".md", ".json", ".xml",
".yaml", ".yml", ".toml", ".sh", ".bash", ".zsh", ".bat", ".cmd",
".ps1", ".py", ".java", ".c", ".cpp", ".h", ".hpp", ".cs", ".js",
".mjs", ".ts", ".tsx", ".jsx", ".html", ".htm", ".css", ".scss",
".sass", ".less", ".php", ".sql", ".vue", ".go", ".rb", ".swift",
".kt", ".kts", ".rs", ".lua", ".pl", ".pm", ".dart", ".dockerfile",
".tf", ".gitignore", ".gitattributes", ".editorconfig", ".env",
".properties", ".gradle", ".pom", ".csproj", ".vbproj", ".fsproj",
".sln", ".mk", ".makefile", ".cmake", ".scala", ".sbt", ".tex", ".r",
".vb", ".fs", ".aspx", ".asp", ".jsp", ".rst", ".csv", ".tsv"
};
// --- End Configuration ---
// --- Helper Functions ---
// Convert UTF-8 string to UTF-16 wstring (for Windows API)
wstring utf8_to_wstring(const string& str) {
if (str.empty()) return wstring();
int size_needed = MultiByteToWideChar(CP_UTF8, 0, &str[0], (int)str.size(), NULL, 0);
wstring wstrTo(size_needed, 0);
MultiByteToWideChar(CP_UTF8, 0, &str[0], (int)str.size(), &wstrTo[0], size_needed);
return wstrTo;
}
// Copies text to the Windows clipboard
bool copyToClipboard(const string& text) {
wstring wtext = utf8_to_wstring(text);
if (!OpenClipboard(nullptr)) {
cerr << "Error: Cannot open clipboard." << endl;
return false;
}
if (!EmptyClipboard()) {
cerr << "Error: Cannot empty clipboard." << endl;
CloseClipboard();
return false;
}
HGLOBAL hg = GlobalAlloc(GMEM_MOVEABLE, (wtext.size() + 1) * sizeof(wchar_t));
if (!hg) {
CloseClipboard();
cerr << "Error: Cannot allocate memory for clipboard." << endl;
return false;
}
wchar_t* buffer = (wchar_t*)GlobalLock(hg);
if (!buffer) {
GlobalFree(hg);
CloseClipboard();
cerr << "Error: Cannot lock memory for clipboard." << endl;
return false;
}
memcpy(buffer, wtext.c_str(), (wtext.size() + 1) * sizeof(wchar_t));
GlobalUnlock(hg);
if (!SetClipboardData(CF_UNICODETEXT, hg)) {
cerr << "Error: Cannot set clipboard data." << endl;
GlobalFree(hg);
CloseClipboard();
return false;
}
CloseClipboard();
return true;
}
// Checks if a directory entry should be excluded
bool isExcluded(const fs::directory_entry& entry, const fs::path& base_path) {
if (!entry.is_directory()) {
return false; // Only exclude directories directly
}
string dir_name = entry.path().filename().string();
for (const auto& excluded : EXCLUDED_DIRS) {
if (dir_name == excluded) {
return true;
}
}
return false;
}
// Checks if a file extension is in our list of text/code files
bool isTextFile(const fs::path& file_path) {
if (!file_path.has_extension()) {
return false;
}
string ext = file_path.extension().string();
// Convert to lower case for case-insensitive comparison
transform(ext.begin(), ext.end(), ext.begin(),
[](unsigned char c){ return std::tolower(c); }); // Use std::tolower here explicitly if needed
for (const auto& text_ext : TEXT_EXTENSIONS) {
if (ext == text_ext) {
return true;
}
}
return false;
}
// Guesses the Markdown language hint from the file extension
string guessLanguage(const fs::path& file_path) {
if (!file_path.has_extension()) return "";
string ext = file_path.extension().string();
transform(ext.begin(), ext.end(), ext.begin(), ::tolower); // Global tolower is fine here
if (ext == ".py") return "python";
if (ext == ".js") return "javascript";
if (ext == ".ts" || ext == ".tsx") return "typescript";
if (ext == ".java") return "java";
if (ext == ".cs") return "csharp";
if (ext == ".html" || ext == ".htm") return "html";
if (ext == ".css") return "css";
if (ext == ".json") return "json";
if (ext == ".xml") return "xml";
if (ext == ".sql") return "sql";
if (ext == ".md") return "markdown";
if (ext == ".sh" || ext == ".bash" || ext == ".zsh") return "bash";
if (ext == ".bat" || ext == ".cmd") return "batch";
if (ext == ".ps1") return "powershell";
if (ext == ".c" || ext == ".h") return "c";
if (ext == ".cpp" || ext == ".hpp") return "cpp";
if (ext == ".go") return "go";
if (ext == ".php") return "php";
if (ext == ".rb") return "ruby";
if (ext == ".yaml" || ext == ".yml") return "yaml";
if (ext == ".toml") return "toml";
if (ext == ".dockerfile") return "dockerfile";
if (ext == ".vue") return "vue";
if (ext == ".swift") return "swift";
if (ext == ".kt" || ext == ".kts") return "kotlin";
if (ext == ".rs") return "rust";
if (ext == ".lua") return "lua";
if (ext == ".pl" || ext == ".pm") return "perl";
if (ext == ".dart") return "dart";
if (ext == ".scala") return "scala";
if (ext == ".tex") return "latex";
if (ext == ".r") return "r";
if (ext == ".vb") return "vbnet";
if (ext == ".fs") return "fsharp";
// Add more language hints as needed
return ""; // Default to no language hint
}
// Reads the content of a file into a string (UTF-8 assumed)
bool readFileContent(const fs::path& file_path, string& content) {
ifstream file_stream(file_path, ios::binary); // Read binary to preserve bytes
if (!file_stream) {
return false;
}
// Read the whole file into a string stream
ostringstream sstr;
sstr << file_stream.rdbuf();
content = sstr.str();
return true;
}
// --- Core Recursive Function ---
void processDirectory(
const fs::path& current_path,
const fs::path& base_path,
const string& indent,
bool is_last,
stringstream& tree_output,
stringstream& file_content_output)
{
// Get relative path for display
fs::path relative_path = fs::relative(current_path, base_path.parent_path()); // Relative to parent for clean output
// --- 1. Add current item to tree ---
tree_output << indent;
if (is_last) {
tree_output << "\\---";
} else {
tree_output << "+---";
}
tree_output << relative_path.filename().string() << endl;
// Prepare indentation for children
string child_indent = indent + (is_last ? " " : "| ");
// --- 2. If it's a directory, recurse ---
if (fs::is_directory(current_path)) {
vector<fs::directory_entry> entries;
try {
for (const auto& entry : fs::directory_iterator(current_path)) {
// Crucially, filter *before* adding to the list for recursion
if (!isExcluded(entry, base_path)) {
entries.push_back(entry);
}
}
} catch (const exception& e) {
cerr << "Warning: Cannot access directory '" << current_path.string() << "': " << e.what() << endl;
tree_output << child_indent << "[Access Error]" << endl; // Indicate error in tree
return; // Stop recursion for this branch
}
// Sort entries for consistent output (optional but nice)
sort(entries.begin(), entries.end(), [](const fs::directory_entry& a, const fs::directory_entry& b) {
// Simple alphabetical sort
return a.path().filename().string() < b.path().filename().string();
});
for (size_t i = 0; i < entries.size(); ++i) {
processDirectory(entries[i].path(), base_path, child_indent, (i == entries.size() - 1), tree_output, file_content_output);
}
}
// --- 3. If it's a file, check if it's a text file and process content ---
else if (fs::is_regular_file(current_path) && isTextFile(current_path)) {
string content;
string lang = guessLanguage(current_path);
string rel_path_str = fs::relative(current_path, base_path).string(); // Relative to base for content header
file_content_output << "====================================================" << endl;
file_content_output << "文件: " << rel_path_str << endl;
file_content_output << "====================================================" << endl;
file_content_output << "```" << lang << endl;
if (readFileContent(current_path, content)) {
file_content_output << content;
} else {
file_content_output << "[无法读取文件内容 - 可能被占用、权限不足或编码问题]";
cerr << "Warning: Could not read file: " << current_path.string() << endl;
}
// Ensure newline before closing backticks if file didn't end with one
if (!content.empty() && content.back() != '\n') {
file_content_output << endl;
}
file_content_output << "```" << endl << endl;
}
}
// --- Main Function ---
int main(int argc, char* argv[]) {
// Set console output to UTF-8 (important for status messages if they contain Unicode)
#ifdef _WIN32
SetConsoleOutputCP(CP_UTF8);
SetConsoleCP(CP_UTF8);
#endif
// Ensure cout/cerr also use UTF-8 potentially (may require more setup on some systems)
// locale::global(locale("en_US.UTF-8")); // Might be needed on some systems/compilers
// cout.imbue(locale());
// cerr.imbue(locale());
fs::path target_path;
if (argc > 1) {
target_path = argv[1];
} else {
target_path = fs::current_path();
}
cout << "Scanning directory: " << target_path.string() << endl;
try {
if (!fs::exists(target_path)) {
throw runtime_error("Target path does not exist.");
}
if (!fs::is_directory(target_path)) {
throw runtime_error("Target path is not a directory.");
}
// Use canonical path for consistency
target_path = fs::canonical(target_path);
cout << "Canonical path: " << target_path.string() << endl;
} catch (const exception& e) {
cerr << "Error: " << e.what() << endl;
return 1;
}
stringstream tree_output;
stringstream file_content_output;
stringstream final_output;
cout << "Processing..." << endl;
// --- Generate Tree and File Content ---
tree_output << target_path.filename().string() << endl; // Start tree with root dir name
vector<fs::directory_entry> root_entries;
try {
for (const auto& entry : fs::directory_iterator(target_path)) {
if (!isExcluded(entry, target_path)) {
root_entries.push_back(entry);
}
}
} catch (const exception& e) {
cerr << "Warning: Cannot access directory '" << target_path.string() << "': " << e.what() << endl;
tree_output << " [Access Error]" << endl;
root_entries.clear(); // Cannot process further
}
// Sort root entries
sort(root_entries.begin(), root_entries.end(), [](const fs::directory_entry& a, const fs::directory_entry& b) {
return a.path().filename().string() < b.path().filename().string();
});
for (size_t i = 0; i < root_entries.size(); ++i) {
processDirectory(root_entries[i].path(), target_path, "", (i == root_entries.size() - 1), tree_output, file_content_output);
}
// --- Combine Outputs ---
final_output << "=== Folder Structure ===" << endl << endl;
final_output << tree_output.str() << endl;
final_output << "=== File Contents ===" << endl << endl;
final_output << file_content_output.str();
// --- Copy to Clipboard ---
cout << "Copying output to clipboard..." << endl;
if (copyToClipboard(final_output.str())) {
cout << "Success! Output copied to clipboard." << endl;
} else {
cerr << "Failed to copy output to clipboard." << endl;
// Optionally print to console as a fallback
// cout << "\n--- Output ---\n" << final_output.str() << "\n------------\n";
return 1; // Indicate failure
}
return 0;
}
@CodeBoy2006
Copy link
Author

Build command: g++ scan_project.cpp -o scan_project.exe -std=c++17 -lstdc++fs -static

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment