Created
April 4, 2025 01:09
-
-
Save CodeBoy2006/722c1d2b950ed35603d69e2ad503b798 to your computer and use it in GitHub Desktop.
C++ Windows tool: Scans dir, outputs filtered tree & file contents (Markdown), copies to clipboard (for LLMs/docs).
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <iostream> | |
| #include <fstream> | |
| #include <string> | |
| #include <vector> | |
| #include <filesystem> // Requires C++17 | |
| #include <sstream> | |
| #include <stdexcept> | |
| #include <algorithm> | |
| #include <windows.h> // For Windows Clipboard API | |
| // Add this line | |
| using namespace std; | |
| namespace fs = std::filesystem; // Keep the alias for filesystem for clarity | |
| // --- Configuration --- | |
| const vector<string> EXCLUDED_DIRS = { | |
| "node_modules", ".git", ".svn", ".hg", "dist", "build", | |
| "out", "target", "__pycache__", ".venv", "venv", "env", | |
| ".vscode", ".idea" | |
| }; | |
| const vector<string> TEXT_EXTENSIONS = { | |
| ".txt", ".log", ".ini", ".conf", ".config", ".md", ".json", ".xml", | |
| ".yaml", ".yml", ".toml", ".sh", ".bash", ".zsh", ".bat", ".cmd", | |
| ".ps1", ".py", ".java", ".c", ".cpp", ".h", ".hpp", ".cs", ".js", | |
| ".mjs", ".ts", ".tsx", ".jsx", ".html", ".htm", ".css", ".scss", | |
| ".sass", ".less", ".php", ".sql", ".vue", ".go", ".rb", ".swift", | |
| ".kt", ".kts", ".rs", ".lua", ".pl", ".pm", ".dart", ".dockerfile", | |
| ".tf", ".gitignore", ".gitattributes", ".editorconfig", ".env", | |
| ".properties", ".gradle", ".pom", ".csproj", ".vbproj", ".fsproj", | |
| ".sln", ".mk", ".makefile", ".cmake", ".scala", ".sbt", ".tex", ".r", | |
| ".vb", ".fs", ".aspx", ".asp", ".jsp", ".rst", ".csv", ".tsv" | |
| }; | |
| // --- End Configuration --- | |
| // --- Helper Functions --- | |
| // Convert UTF-8 string to UTF-16 wstring (for Windows API) | |
| wstring utf8_to_wstring(const string& str) { | |
| if (str.empty()) return wstring(); | |
| int size_needed = MultiByteToWideChar(CP_UTF8, 0, &str[0], (int)str.size(), NULL, 0); | |
| wstring wstrTo(size_needed, 0); | |
| MultiByteToWideChar(CP_UTF8, 0, &str[0], (int)str.size(), &wstrTo[0], size_needed); | |
| return wstrTo; | |
| } | |
| // Copies text to the Windows clipboard | |
| bool copyToClipboard(const string& text) { | |
| wstring wtext = utf8_to_wstring(text); | |
| if (!OpenClipboard(nullptr)) { | |
| cerr << "Error: Cannot open clipboard." << endl; | |
| return false; | |
| } | |
| if (!EmptyClipboard()) { | |
| cerr << "Error: Cannot empty clipboard." << endl; | |
| CloseClipboard(); | |
| return false; | |
| } | |
| HGLOBAL hg = GlobalAlloc(GMEM_MOVEABLE, (wtext.size() + 1) * sizeof(wchar_t)); | |
| if (!hg) { | |
| CloseClipboard(); | |
| cerr << "Error: Cannot allocate memory for clipboard." << endl; | |
| return false; | |
| } | |
| wchar_t* buffer = (wchar_t*)GlobalLock(hg); | |
| if (!buffer) { | |
| GlobalFree(hg); | |
| CloseClipboard(); | |
| cerr << "Error: Cannot lock memory for clipboard." << endl; | |
| return false; | |
| } | |
| memcpy(buffer, wtext.c_str(), (wtext.size() + 1) * sizeof(wchar_t)); | |
| GlobalUnlock(hg); | |
| if (!SetClipboardData(CF_UNICODETEXT, hg)) { | |
| cerr << "Error: Cannot set clipboard data." << endl; | |
| GlobalFree(hg); | |
| CloseClipboard(); | |
| return false; | |
| } | |
| CloseClipboard(); | |
| return true; | |
| } | |
| // Checks if a directory entry should be excluded | |
| bool isExcluded(const fs::directory_entry& entry, const fs::path& base_path) { | |
| if (!entry.is_directory()) { | |
| return false; // Only exclude directories directly | |
| } | |
| string dir_name = entry.path().filename().string(); | |
| for (const auto& excluded : EXCLUDED_DIRS) { | |
| if (dir_name == excluded) { | |
| return true; | |
| } | |
| } | |
| return false; | |
| } | |
| // Checks if a file extension is in our list of text/code files | |
| bool isTextFile(const fs::path& file_path) { | |
| if (!file_path.has_extension()) { | |
| return false; | |
| } | |
| string ext = file_path.extension().string(); | |
| // Convert to lower case for case-insensitive comparison | |
| transform(ext.begin(), ext.end(), ext.begin(), | |
| [](unsigned char c){ return std::tolower(c); }); // Use std::tolower here explicitly if needed | |
| for (const auto& text_ext : TEXT_EXTENSIONS) { | |
| if (ext == text_ext) { | |
| return true; | |
| } | |
| } | |
| return false; | |
| } | |
| // Guesses the Markdown language hint from the file extension | |
| string guessLanguage(const fs::path& file_path) { | |
| if (!file_path.has_extension()) return ""; | |
| string ext = file_path.extension().string(); | |
| transform(ext.begin(), ext.end(), ext.begin(), ::tolower); // Global tolower is fine here | |
| if (ext == ".py") return "python"; | |
| if (ext == ".js") return "javascript"; | |
| if (ext == ".ts" || ext == ".tsx") return "typescript"; | |
| if (ext == ".java") return "java"; | |
| if (ext == ".cs") return "csharp"; | |
| if (ext == ".html" || ext == ".htm") return "html"; | |
| if (ext == ".css") return "css"; | |
| if (ext == ".json") return "json"; | |
| if (ext == ".xml") return "xml"; | |
| if (ext == ".sql") return "sql"; | |
| if (ext == ".md") return "markdown"; | |
| if (ext == ".sh" || ext == ".bash" || ext == ".zsh") return "bash"; | |
| if (ext == ".bat" || ext == ".cmd") return "batch"; | |
| if (ext == ".ps1") return "powershell"; | |
| if (ext == ".c" || ext == ".h") return "c"; | |
| if (ext == ".cpp" || ext == ".hpp") return "cpp"; | |
| if (ext == ".go") return "go"; | |
| if (ext == ".php") return "php"; | |
| if (ext == ".rb") return "ruby"; | |
| if (ext == ".yaml" || ext == ".yml") return "yaml"; | |
| if (ext == ".toml") return "toml"; | |
| if (ext == ".dockerfile") return "dockerfile"; | |
| if (ext == ".vue") return "vue"; | |
| if (ext == ".swift") return "swift"; | |
| if (ext == ".kt" || ext == ".kts") return "kotlin"; | |
| if (ext == ".rs") return "rust"; | |
| if (ext == ".lua") return "lua"; | |
| if (ext == ".pl" || ext == ".pm") return "perl"; | |
| if (ext == ".dart") return "dart"; | |
| if (ext == ".scala") return "scala"; | |
| if (ext == ".tex") return "latex"; | |
| if (ext == ".r") return "r"; | |
| if (ext == ".vb") return "vbnet"; | |
| if (ext == ".fs") return "fsharp"; | |
| // Add more language hints as needed | |
| return ""; // Default to no language hint | |
| } | |
| // Reads the content of a file into a string (UTF-8 assumed) | |
| bool readFileContent(const fs::path& file_path, string& content) { | |
| ifstream file_stream(file_path, ios::binary); // Read binary to preserve bytes | |
| if (!file_stream) { | |
| return false; | |
| } | |
| // Read the whole file into a string stream | |
| ostringstream sstr; | |
| sstr << file_stream.rdbuf(); | |
| content = sstr.str(); | |
| return true; | |
| } | |
| // --- Core Recursive Function --- | |
| void processDirectory( | |
| const fs::path& current_path, | |
| const fs::path& base_path, | |
| const string& indent, | |
| bool is_last, | |
| stringstream& tree_output, | |
| stringstream& file_content_output) | |
| { | |
| // Get relative path for display | |
| fs::path relative_path = fs::relative(current_path, base_path.parent_path()); // Relative to parent for clean output | |
| // --- 1. Add current item to tree --- | |
| tree_output << indent; | |
| if (is_last) { | |
| tree_output << "\\---"; | |
| } else { | |
| tree_output << "+---"; | |
| } | |
| tree_output << relative_path.filename().string() << endl; | |
| // Prepare indentation for children | |
| string child_indent = indent + (is_last ? " " : "| "); | |
| // --- 2. If it's a directory, recurse --- | |
| if (fs::is_directory(current_path)) { | |
| vector<fs::directory_entry> entries; | |
| try { | |
| for (const auto& entry : fs::directory_iterator(current_path)) { | |
| // Crucially, filter *before* adding to the list for recursion | |
| if (!isExcluded(entry, base_path)) { | |
| entries.push_back(entry); | |
| } | |
| } | |
| } catch (const exception& e) { | |
| cerr << "Warning: Cannot access directory '" << current_path.string() << "': " << e.what() << endl; | |
| tree_output << child_indent << "[Access Error]" << endl; // Indicate error in tree | |
| return; // Stop recursion for this branch | |
| } | |
| // Sort entries for consistent output (optional but nice) | |
| sort(entries.begin(), entries.end(), [](const fs::directory_entry& a, const fs::directory_entry& b) { | |
| // Simple alphabetical sort | |
| return a.path().filename().string() < b.path().filename().string(); | |
| }); | |
| for (size_t i = 0; i < entries.size(); ++i) { | |
| processDirectory(entries[i].path(), base_path, child_indent, (i == entries.size() - 1), tree_output, file_content_output); | |
| } | |
| } | |
| // --- 3. If it's a file, check if it's a text file and process content --- | |
| else if (fs::is_regular_file(current_path) && isTextFile(current_path)) { | |
| string content; | |
| string lang = guessLanguage(current_path); | |
| string rel_path_str = fs::relative(current_path, base_path).string(); // Relative to base for content header | |
| file_content_output << "====================================================" << endl; | |
| file_content_output << "文件: " << rel_path_str << endl; | |
| file_content_output << "====================================================" << endl; | |
| file_content_output << "```" << lang << endl; | |
| if (readFileContent(current_path, content)) { | |
| file_content_output << content; | |
| } else { | |
| file_content_output << "[无法读取文件内容 - 可能被占用、权限不足或编码问题]"; | |
| cerr << "Warning: Could not read file: " << current_path.string() << endl; | |
| } | |
| // Ensure newline before closing backticks if file didn't end with one | |
| if (!content.empty() && content.back() != '\n') { | |
| file_content_output << endl; | |
| } | |
| file_content_output << "```" << endl << endl; | |
| } | |
| } | |
| // --- Main Function --- | |
| int main(int argc, char* argv[]) { | |
| // Set console output to UTF-8 (important for status messages if they contain Unicode) | |
| #ifdef _WIN32 | |
| SetConsoleOutputCP(CP_UTF8); | |
| SetConsoleCP(CP_UTF8); | |
| #endif | |
| // Ensure cout/cerr also use UTF-8 potentially (may require more setup on some systems) | |
| // locale::global(locale("en_US.UTF-8")); // Might be needed on some systems/compilers | |
| // cout.imbue(locale()); | |
| // cerr.imbue(locale()); | |
| fs::path target_path; | |
| if (argc > 1) { | |
| target_path = argv[1]; | |
| } else { | |
| target_path = fs::current_path(); | |
| } | |
| cout << "Scanning directory: " << target_path.string() << endl; | |
| try { | |
| if (!fs::exists(target_path)) { | |
| throw runtime_error("Target path does not exist."); | |
| } | |
| if (!fs::is_directory(target_path)) { | |
| throw runtime_error("Target path is not a directory."); | |
| } | |
| // Use canonical path for consistency | |
| target_path = fs::canonical(target_path); | |
| cout << "Canonical path: " << target_path.string() << endl; | |
| } catch (const exception& e) { | |
| cerr << "Error: " << e.what() << endl; | |
| return 1; | |
| } | |
| stringstream tree_output; | |
| stringstream file_content_output; | |
| stringstream final_output; | |
| cout << "Processing..." << endl; | |
| // --- Generate Tree and File Content --- | |
| tree_output << target_path.filename().string() << endl; // Start tree with root dir name | |
| vector<fs::directory_entry> root_entries; | |
| try { | |
| for (const auto& entry : fs::directory_iterator(target_path)) { | |
| if (!isExcluded(entry, target_path)) { | |
| root_entries.push_back(entry); | |
| } | |
| } | |
| } catch (const exception& e) { | |
| cerr << "Warning: Cannot access directory '" << target_path.string() << "': " << e.what() << endl; | |
| tree_output << " [Access Error]" << endl; | |
| root_entries.clear(); // Cannot process further | |
| } | |
| // Sort root entries | |
| sort(root_entries.begin(), root_entries.end(), [](const fs::directory_entry& a, const fs::directory_entry& b) { | |
| return a.path().filename().string() < b.path().filename().string(); | |
| }); | |
| for (size_t i = 0; i < root_entries.size(); ++i) { | |
| processDirectory(root_entries[i].path(), target_path, "", (i == root_entries.size() - 1), tree_output, file_content_output); | |
| } | |
| // --- Combine Outputs --- | |
| final_output << "=== Folder Structure ===" << endl << endl; | |
| final_output << tree_output.str() << endl; | |
| final_output << "=== File Contents ===" << endl << endl; | |
| final_output << file_content_output.str(); | |
| // --- Copy to Clipboard --- | |
| cout << "Copying output to clipboard..." << endl; | |
| if (copyToClipboard(final_output.str())) { | |
| cout << "Success! Output copied to clipboard." << endl; | |
| } else { | |
| cerr << "Failed to copy output to clipboard." << endl; | |
| // Optionally print to console as a fallback | |
| // cout << "\n--- Output ---\n" << final_output.str() << "\n------------\n"; | |
| return 1; // Indicate failure | |
| } | |
| return 0; | |
| } |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Build command:
g++ scan_project.cpp -o scan_project.exe -std=c++17 -lstdc++fs -static