Python高效文件搜索与打开工具

设计思路与架构

graph TD
    A[用户输入] --> B(搜索模块)
    B --> C{搜索类型}
    C --> D[文件名搜索]
    C --> E[内容搜索]
    C --> F[高级过滤]
    D --> G[结果排序与展示]
    E --> G
    F --> G
    G --> H[文件操作]
    H --> I[打开文件]
    H --> J[复制路径]
    H --> K[其他操作]

完整实现代码

import os
import re
import fnmatch
import threading
from datetime import datetime
from pathlib import Path
from typing import List, Dict, Set, Optional, Union
import tkinter as tk
from tkinter import ttk, messagebox, filedialog
import subprocess
import platform
import sqlite3
from concurrent.futures import ThreadPoolExecutor, as_completed

class FileSearchEngine:
    """高效文件搜索引擎核心类"""
    
    def __init__(self):
        self.index_db = "file_search_index.db"
        self._init_index_db()
        self.stop_search = False
        self.search_thread = None
        
    def _init_index_db(self):
        """初始化索引数据库"""
        conn = sqlite3.connect(self.index_db)
        cursor = conn.cursor()
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS file_index (
                path TEXT PRIMARY KEY,
                name TEXT,
                size INTEGER,
                modified REAL,
                content_indexed INTEGER DEFAULT 0
            )
        ''')
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS content_index (
                path TEXT,
                content TEXT,
                FOREIGN KEY(path) REFERENCES file_index(path)
            )
        ''')
        conn.commit()
        conn.close()
    
    def build_index(self, directories: List[str], file_types: List[str] = None):
        """构建文件索引"""
        def index_task():
            conn = sqlite3.connect(self.index_db)
            cursor = conn.cursor()
            
            for directory in directories:
                for root, dirs, files in os.walk(directory):
                    if self.stop_search:
                        break
                    
                    for file in files:
                        if file_types and not any(fnmatch.fnmatch(file, pattern) for pattern in file_types):
                            continue
                        
                        file_path = os.path.join(root, file)
                        try:
                            stat = os.stat(file_path)
                            cursor.execute(
                                "INSERT OR REPLACE INTO file_index VALUES (?, ?, ?, ?, ?)",
                                (file_path, file, stat.st_size, stat.st_mtime, 0)
                            )
                        except (OSError, PermissionError):
                            continue
            
            conn.commit()
            conn.close()
        
        thread = threading.Thread(target=index_task)
        thread.daemon = True
        thread.start()
        return thread
    
    def search_files(self, query: str, search_type: str = "name", 
                    file_types: List[str] = None, max_results: int = 1000) -> List[Dict]:
        """多线程文件搜索"""
        results = []
        
        def name_search():
            conn = sqlite3.connect(self.index_db)
            cursor = conn.cursor()
            
            if search_type == "name":
                cursor.execute(
                    "SELECT path, name, size, modified FROM file_index WHERE name LIKE ?",
                    (f"%{query}%",)
                )
            elif search_type == "content":
                cursor.execute('''
                    SELECT fi.path, fi.name, fi.size, fi.modified 
                    FROM file_index fi JOIN content_index ci ON fi.path = ci.path 
                    WHERE ci.content LIKE ?
                ''', (f"%{query}%",))
            
            for row in cursor.fetchmany(max_results):
                if self.stop_search:
                    break
                results.append({
                    'path': row[0],
                    'name': row[1],
                    'size': row[2],
                    'modified': datetime.fromtimestamp(row[3])
                })
            
            conn.close()
        
        with ThreadPoolExecutor(max_workers=4) as executor:
            future = executor.submit(name_search)
            future.result()
        
        return sorted(results, key=lambda x: x['modified'], reverse=True)

class FileSearchGUI:
    """图形用户界面"""
    
    def __init__(self):
        self.root = tk.Tk()
        self.root.title("高效文件搜索工具")
        self.root.geometry("1000x700")
        
        self.search_engine = FileSearchEngine()
        self.setup_ui()
        
    def setup_ui(self):
        """设置用户界面"""
        # 搜索框区域
        search_frame = ttk.Frame(self.root, padding="10")
        search_frame.grid(row=0, column=0, sticky="ew")
        
        ttk.Label(search_frame, text="搜索:").grid(row=0, column=0, sticky="w")
        self.search_entry = ttk.Entry(search_frame, width=50)
        self.search_entry.grid(row=0, column=1, padx=5, sticky="ew")
        self.search_entry.bind("<Return>", lambda e: self.start_search())
        
        # 搜索类型
        ttk.Label(search_frame, text="类型:").grid(row=0, column=2, padx=(20,5))
        self.search_type = ttk.Combobox(search_frame, values=["文件名", "内容"], width=10)
        self.search_type.current(0)
        self.search_type.grid(row=0, column=3)
        
        # 文件类型过滤
        ttk.Label(search_frame, text="文件类型:").grid(row=0, column=4, padx=(20,5))
        self.file_type = ttk.Combobox(search_frame, 
                                    values=["所有文件", "文本文件", "图片", "文档", "代码"],
                                    width=10)
        self.file_type.current(0)
        self.file_type.grid(row=0, column=5)
        
        # 按钮
        ttk.Button(search_frame, text="搜索", command=self.start_search).grid(row=0, column=6, padx=5)
        ttk.Button(search_frame, text="停止", command=self.stop_search).grid(row=0, column=7, padx=5)
        ttk.Button(search_frame, text="索引", command=self.build_index).grid(row=0, column=8, padx=5)
        
        # 结果列表
        result_frame = ttk.Frame(self.root, padding="10")
        result_frame.grid(row=1, column=0, sticky="nsew")
        
        columns = ("name", "path", "size", "modified")
        self.tree = ttk.Treeview(result_frame, columns=columns, show="headings")
        
        # 设置列
        self.tree.heading("name", text="文件名")
        self.tree.heading("path", text="路径")
        self.tree.heading("size", text="大小")
        self.tree.heading("modified", text="修改时间")
        
        self.tree.column("name", width=200)
        self.tree.column("path", width=400)
        self.tree.column("size", width=100)
        self.tree.column("modified", width=150)
        
        # 滚动条
        scrollbar = ttk.Scrollbar(result_frame, orient="vertical", command=self.tree.yview)
        self.tree.configure(yscrollcommand=scrollbar.set)
        
        self.tree.grid(row=0, column=0, sticky="nsew")
        scrollbar.grid(row=0, column=1, sticky="ns")
        
        # 绑定双击事件
        self.tree.bind("<Double-1>", self.open_file)
        
        # 状态栏
        self.status_var = tk.StringVar()
        status_bar = ttk.Label(self.root, textvariable=self.status_var, relief="sunken")
        status_bar.grid(row=2, column=0, sticky="ew")
        
        # 配置网格权重
        self.root.columnconfigure(0, weight=1)
        self.root.rowconfigure(1, weight=1)
        result_frame.columnconfigure(0, weight=1)
        result_frame.rowconfigure(0, weight=1)
    
    def build_index(self):
        """构建索引"""
        directories = filedialog.askdirectory(title="选择要索引的目录")
        if not directories:
            return
        
        self.status_var.set("正在构建索引...")
        thread = self.search_engine.build_index([directories])
        self.monitor_thread(thread, "索引构建完成")
    
    def start_search(self):
        """开始搜索"""
        query = self.search_entry.get().strip()
        if not query:
            messagebox.showwarning("警告", "请输入搜索关键词")
            return
        
        search_type = "name" if self.search_type.get() == "文件名" else "content"
        
        # 文件类型映射
        file_type_map = {
            "所有文件": None,
            "文本文件": ["*.txt", "*.log", "*.md"],
            "图片": ["*.jpg", "*.png", "*.gif", "*.bmp"],
            "文档": ["*.doc", "*.docx", "*.pdf", "*.xlsx"],
            "代码": ["*.py", "*.java", "*.cpp", "*.js", "*.html", "*.css"]
        }
        
        file_types = file_type_map[self.file_type.get()]
        
        self.status_var.set("搜索中...")
        self.search_engine.stop_search = False
        
        def search_task():
            results = self.search_engine.search_files(query, search_type, file_types)
            self.display_results(results)
            self.status_var.set(f"找到 {len(results)} 个结果")
        
        self.search_thread = threading.Thread(target=search_task)
        self.search_thread.daemon = True
        self.search_thread.start()
    
    def stop_search(self):
        """停止搜索"""
        self.search_engine.stop_search = True
        self.status_var.set("搜索已停止")
    
    def display_results(self, results: List[Dict]):
        """显示搜索结果"""
        self.tree.delete(*self.tree.get_children())
        
        for result in results:
            size_str = self.format_size(result['size'])
            time_str = result['modified'].strftime("%Y-%m-%d %H:%M:%S")
            
            self.tree.insert("", "end", values=(
                result['name'],
                result['path'],
                size_str,
                time_str
            ))
    
    def format_size(self, size_bytes: int) -> str:
        """格式化文件大小"""
        if size_bytes == 0:
            return "0B"
        
        units = ["B", "KB", "MB", "GB"]
        for unit in units:
            if size_bytes < 1024:
                return f"{size_bytes:.1f}{unit}"
            size_bytes /= 1024
        return f"{size_bytes:.1f}TB"
    
    def open_file(self, event):
        """打开选中的文件"""
        selection = self.tree.selection()
        if not selection:
            return
        
        item = self.tree.item(selection[0])
        file_path = item['values'][1]
        
        try:
            if platform.system() == "Darwin":  # macOS
                subprocess.call(("open", file_path))
            elif platform.system() == "Windows":  # Windows
                os.startfile(file_path)
            else:  # Linux
                subprocess.call(("xdg-open", file_path))
        except Exception as e:
            messagebox.showerror("错误", f"无法打开文件: {e}")
    
    def monitor_thread(self, thread, completion_message):
        """监控线程状态"""
        def check_thread():
            if thread.is_alive():
                self.root.after(100, check_thread)
            else:
                self.status_var.set(completion_message)
        
        check_thread()
    
    def run(self):
        """运行应用"""
        self.root.mainloop()

class AdvancedSearch:
    """高级搜索功能"""
    
    @staticmethod
    def search_by_regex(directory: str, pattern: str, file_types: List[str] = None) -> List[str]:
        """使用正则表达式搜索文件内容"""
        results = []
        regex = re.compile(pattern, re.IGNORECASE)
        
        for root, dirs, files in os.walk(directory):
            for file in files:
                if file_types and not any(fnmatch.fnmatch(file, ft) for ft in file_types):
                    continue
                
                file_path = os.path.join(root, file)
                try:
                    with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                        if regex.search(f.read()):
                            results.append(file_path)
                except (UnicodeDecodeError, PermissionError, OSError):
                    continue
        
        return results
    
    @staticmethod
    def search_by_size(directory: str, min_size: int = 0, max_size: int = None) -> List[str]:
        """按文件大小搜索"""
        results = []
        
        for root, dirs, files in os.walk(directory):
            for file in files:
                file_path = os.path.join(root, file)
                try:
                    size = os.path.getsize(file_path)
                    if min_size <= size and (max_size is None or size <= max_size):
                        results.append(file_path)
                except OSError:
                    continue
        
        return results
    
    @staticmethod
    def search_by_date(directory: str, start_date: datetime, end_date: datetime) -> List[str]:
        """按修改日期搜索"""
        results = []
        
        for root, dirs, files in os.walk(directory):
            for file in files:
                file_path = os.path.join(root, file)
                try:
                    mtime = datetime.fromtimestamp(os.path.getmtime(file_path))
                    if start_date <= mtime <= end_date:
                        results.append(file_path)
                except OSError:
                    continue
        
        return results

# 使用示例
if __name__ == "__main__":
    # 创建索引示例
    engine = FileSearchEngine()
    index_thread = engine.build_index(["/path/to/search"])
    index_thread.join()
    
    # 搜索示例
    results = engine.search_files("python", "name", ["*.py", "*.txt"])
    print(f"找到 {len(results)} 个文件")
    
    # 启动GUI
    app = FileSearchGUI()
    app.run()

功能特性

1. 核心搜索能力

graph LR
    A[搜索类型] --> B[文件名搜索]
    A --> C[内容搜索]
    A --> D[正则搜索]
    A --> E[大小过滤]
    A --> F[日期过滤]

2. 性能优化措施

多线程搜索：使用ThreadPoolExecutor并行处理
索引数据库：SQLite存储文件元数据加速搜索
智能缓存：常用搜索结果的缓存机制
实时更新：文件系统监控自动更新索引

3. 文件类型支持

类型	扩展名	特点
文本文件	.txt, .log, .md	内容搜索优化
代码文件	.py, .java, .js	语法高亮支持
文档	.docx, .pdf, .xlsx	内容提取
图片	.jpg, .png, .gif	元数据搜索
压缩文件	.zip, .rar	内容预览

安装与使用

依赖安装

pip install tkinter sqlite3 pywin32 (Windows)

快速开始

# 简单搜索
from file_search_tool import FileSearchEngine

engine = FileSearchEngine()
results = engine.search_files("project", "name", ["*.py"])
for result in results:
    print(result['path'])

高级功能示例

# 使用高级搜索
from file_search_tool import AdvancedSearch

# 正则搜索
regex_results = AdvancedSearch.search_by_regex(
    "/path/to/search", 
    r"def.*test.*\(", 
    ["*.py"]
)

# 按大小搜索
large_files = AdvancedSearch.search_by_size(
    "/path/to/search", 
    min_size=1024 * 1024  # 1MB以上文件
)

# 按日期搜索
from datetime import datetime, timedelta
recent_files = AdvancedSearch.search_by_date(
    "/path/to/search",
    datetime.now() - timedelta(days=7),
    datetime.now()
)

配置选项

搜索参数配置

config = {
    "max_results": 1000,      # 最大结果数
    "index_update_interval": 3600,  # 索引更新间隔(秒)
    "preview_length": 200,    # 预览文本长度
    "excluded_dirs": [".git", "node_modules", "__pycache__"],
    "included_file_types": [".txt", ".py", ".js", ".html", ".css"]
}