Python怎样实现PDF文件的分割

1、安装库

pip install -i https://pypi.tuna.tsinghua.edu.cn/simple PyPDf2

2、代码编写

# pip install -i https://pypi.tuna.tsinghua.edu.cn/simple PyPDf2
import os
from PyPDF2 import PdfWriter, PdfReader

def split(pdf_file, delta, output_dir):
    """实现拆分"""
    input_stream = open(pdf_file, 'rb')
    pdf_input = PdfReader(input_stream)

    page_count = len(pdf_input.pages)
    sum_batch = int(page_count / (delta * 1.0))
    remain_page = page_count % delta

    for i in range(0, sum_batch + 1):
        start = i * delta
        end = (i + 1) * delta
        pdf_out = PdfWriter()
        if i < sum_batch:
            full_name = f"{start + 1}-{end}.pdf"
            file_path = os.path.join(output_dir, full_name)
            print(full_name + '切分完成')
            for j in range(start, end):
                page = pdf_input.pages[j]
                pdf_out.add_page(page)
        else:
            full_name = f"{start + 1}-{start + remain_page}.pdf"
            file_path = os.path.join(output_dir, full_name)
            print(full_name + '切分完成')
            for j in range(delta * (sum_batch), page_count):
                page = pdf_input.pages[j]
                pdf_out.add_page(page)
        out_stream = open(file_path, 'wb')
        pdf_out.write(out_stream)
        out_stream.close()
    input_stream.close()

pdf_path = "Python编程:从入门到实践.pdf"
page_count = 60
out_dir = "输出目录"
split(pdf_path, page_count, out_dir)

3、效果展示

file

Leave a Comment