Python操作Excel删除行列与单元格详解

一、准备工作

1.1 安装必要库

pip install openpyxl  # 适用于.xlsx文件
pip install xlrd xlwt  # 适用于.xls文件（旧版）
pip install pandas  # 高级数据处理

1.2 测试文件准备

创建一个名为example.xlsx的测试文件，包含以下数据：

序号	姓名	年龄	部门	薪资
1	张三	28	技术部	15000
2	李四	32	市场部	18000
3	王五	25	人事部	12000
4	赵六	40	财务部	20000

二、使用openpyxl删除行列

2.1 删除单行

from openpyxl import load_workbook

def delete_row(file_path, sheet_name, row_index):
    """删除指定行"""
    wb = load_workbook(file_path)
    ws = wb[sheet_name]
    ws.delete_rows(row_index)
    wb.save(file_path)
    print(f"已删除第{row_index}行")

# 示例：删除第2行（李四）
delete_row('example.xlsx', 'Sheet1', 2)

2.2 删除多行

def delete_multiple_rows(file_path, sheet_name, start_row, num_rows):
    """删除从start_row开始的num_rows行"""
    wb = load_workbook(file_path)
    ws = wb[sheet_name]
    ws.delete_rows(start_row, num_rows)
    wb.save(file_path)
    print(f"已删除从{start_row}行开始的{num_rows}行")

# 示例：删除第2-3行（李四和王五）
delete_multiple_rows('example.xlsx', 'Sheet1', 2, 2)

2.3 删除列

def delete_column(file_path, sheet_name, col_letter):
    """删除指定列"""
    wb = load_workbook(file_path)
    ws = wb[sheet_name]
    ws.delete_cols(col_letter)
    wb.save(file_path)
    print(f"已删除{col_letter}列")

# 示例：删除D列（部门）
delete_column('example.xlsx', 'Sheet1', 'D')

三、使用pandas删除行列

3.1 删除指定行

import pandas as pd

def pandas_delete_rows(file_path, sheet_name, rows_to_drop):
    """使用pandas删除行"""
    df = pd.read_excel(file_path, sheet_name=sheet_name)
    df = df.drop(rows_to_drop)
    df.to_excel(file_path, index=False)
    print(f"已删除行：{rows_to_drop}")

# 示例：删除索引为1和2的行（第2-3行）
pandas_delete_rows('example.xlsx', 'Sheet1', [1, 2])

3.2 删除指定列

def pandas_delete_columns(file_path, sheet_name, cols_to_drop):
    """使用pandas删除列"""
    df = pd.read_excel(file_path, sheet_name=sheet_name)
    df = df.drop(cols_to_drop, axis=1)
    df.to_excel(file_path, index=False)
    print(f"已删除列：{cols_to_drop}")

# 示例：删除部门和薪资列
pandas_delete_columns('example.xlsx', 'Sheet1', ['部门', '薪资'])

四、高级操作：条件删除

4.1 删除满足条件的行

def delete_rows_by_condition(file_path, sheet_name, condition):
    """根据条件删除行"""
    df = pd.read_excel(file_path, sheet_name=sheet_name)
    df = df[~condition(df)]
    df.to_excel(file_path, index=False)
    print("已根据条件删除行")

# 示例：删除年龄大于30的行
delete_rows_by_condition(
    'example.xlsx', 
    'Sheet1',
    lambda df: df['年龄'] > 30
)

4.2 删除空值行

def delete_empty_rows(file_path, sheet_name):
    """删除所有空行"""
    df = pd.read_excel(file_path, sheet_name=sheet_name)
    df = df.dropna(how='all')
    df.to_excel(file_path, index=False)
    print("已删除所有空行")

# 使用示例
delete_empty_rows('example.xlsx', 'Sheet1')

五、单元格操作

5.1 清空单元格内容

from openpyxl import load_workbook

def clear_cell(file_path, sheet_name, cell_address):
    """清空指定单元格内容"""
    wb = load_workbook(file_path)
    ws = wb[sheet_name]
    ws[cell_address] = None
    wb.save(file_path)
    print(f"已清空单元格{cell_address}")

# 示例：清空B2单元格
clear_cell('example.xlsx', 'Sheet1', 'B2')

5.2 批量清空单元格区域

def clear_cell_range(file_path, sheet_name, start_cell, end_cell):
    """清空单元格区域"""
    wb = load_workbook(file_path)
    ws = wb[sheet_name]

    for row in ws[start_cell:end_cell]:
        for cell in row:
            cell.value = None

    wb.save(file_path)
    print(f"已清空区域{start_cell}:{end_cell}")

# 示例：清空B2:D4区域
clear_cell_range('example.xlsx', 'Sheet1', 'B2', 'D4')

六、样式处理（删除后保持格式）

6.1 删除行但保留格式

from openpyxl import load_workbook
from openpyxl.utils import get_column_letter

def delete_rows_preserve_format(file_path, sheet_name, row_index):
    """删除行并尝试保留下方行的格式"""
    wb = load_workbook(file_path)
    ws = wb[sheet_name]

    # 获取要删除行的格式
    formats = {}
    for col in range(1, ws.max_column + 1):
        col_letter = get_column_letter(col)
        formats[col_letter] = ws[f"{col_letter}{row_index}"].style

    # 删除行
    ws.delete_rows(row_index)

    # 将格式应用到下方行（可选）
    # for col_letter, style in formats.items():
    #     ws[f"{col_letter}{row_index}"].style = style

    wb.save(file_path)
    print(f"已删除第{row_index}行并尝试保留格式")

七、性能优化建议

7.1 批量操作最佳实践

def batch_delete_rows(file_path, sheet_name, rows_to_delete):
    """批量删除行（性能优化版）"""
    wb = load_workbook(file_path)
    ws = wb[sheet_name]

    # 按降序删除避免索引错位
    for row_index in sorted(rows_to_delete, reverse=True):
        ws.delete_rows(row_index)

    wb.save(file_path)
    print(f"已批量删除行：{rows_to_delete}")

# 示例：删除第2,4,6行
batch_delete_rows('example.xlsx', 'Sheet1', [2, 4, 6])

7.2 使用只读模式提高读取效率

def read_only_operations(file_path):
    """只读模式下的操作示例"""
    from openpyxl import load_workbook

    # 只读模式
    wb = load_workbook(file_path, read_only=True)
    ws = wb['Sheet1']

    # 获取需要删除的行索引（不实际删除）
    rows_to_delete = []
    for idx, row in enumerate(ws.iter_rows(values_only=True), 1):
        if row[2] > 30:  # 假设第3列是年龄
            rows_to_delete.append(idx)

    # 关闭只读工作簿
    wb.close()

    # 在可写模式下执行删除
    wb = load_workbook(file_path)
    ws = wb['Sheet1']
    for row_idx in sorted(rows_to_delete, reverse=True):
        ws.delete_rows(row_idx)
    wb.save(file_path)

    print(f"已删除年龄大于30的行：{rows_to_delete}")

八、错误处理与日志记录

8.1 带错误处理的删除操作

import logging
from openpyxl.utils.exceptions import IllegalCharacterError

logging.basicConfig(filename='excel_operations.log', level=logging.INFO)

def safe_delete_rows(file_path, sheet_name, row_indices):
    """带错误处理的行删除"""
    try:
        wb = load_workbook(file_path)
        ws = wb[sheet_name]

        for row_idx in sorted(row_indices, reverse=True):
            if row_idx <= ws.max_row:
                ws.delete_rows(row_idx)
                logging.info(f"成功删除行：{row_idx}")
            else:
                logging.warning(f"行{row_idx}不存在，最大行数为{ws.max_row}")

        wb.save(file_path)
        return True
    except IllegalCharacterError as e:
        logging.error(f"非法字符错误：{e}")
        return False
    except Exception as e:
        logging.error(f"未知错误：{e}")
        return False

# 使用示例
success = safe_delete_rows('example.xlsx', 'Sheet1', [2, 10, 5])
if not success:
    print("操作失败，请查看日志文件")

九、实际应用案例

9.1 清理Excel报表模板

def clean_report_template(file_path):
    """清理报表模板中的示例数据"""
    df = pd.read_excel(file_path, sheet_name='Report')

    # 删除前两行示例数据
    df = df.iloc[2:]

    # 删除备注列
    if '备注' in df.columns:
        df = df.drop('备注', axis=1)

    # 清空合计行
    df.loc[df['项目'] == '合计', :] = None

    # 保存结果
    df.to_excel(file_path, index=False)
    print("报表模板清理完成")

# 使用示例
clean_report_template('monthly_report.xlsx')

9.2 处理大型Excel文件

def process_large_excel(input_path, output_path):
    """分块处理大型Excel文件"""
    chunk_size = 1000
    reader = pd.read_excel(input_path, chunksize=chunk_size)

    with pd.ExcelWriter(output_path) as writer:
        for i, chunk in enumerate(reader):
            # 删除不需要的列
            chunk = chunk.drop(['临时列', '测试数据'], axis=1, errors='ignore')

            # 删除空行
            chunk = chunk.dropna(how='all')

            # 写入到新文件
            chunk.to_excel(writer, sheet_name='Data', startrow=i*chunk_size, index=False)

    print(f"大型文件处理完成，已保存到{output_path}")

十、总结对比

10.1 不同方法的比较

方法	优点	缺点	适用场景
openpyxl	精细控制格式，支持.xlsx	处理大数据较慢	需要保留格式的修改
pandas	简洁高效，适合数据处理	可能丢失部分格式	数据清洗和批量处理
xlrd/xlwt	支持旧版.xls	功能有限，不推荐新项目	必须处理.xls的旧系统