I'm facing a rather intricate challenge in converting text files to PDF. Using the fpdf library, I'm trying to optimize the process for converting large volumes of textual data. I've read an interesting article on the subject ( https://astuce2geek.fr/comment-convertir-un-fichier-en-pdf-avec-python/ ), but I wonder if anyone has experimented with specific optimization techniques?
My question is twofold: First, how can we efficiently manage memory when converting very large text files? Second, are there methods to speed up the conversion process while maintaining the formatting and quality of the final PDF?
Feedback, tips, or code suggestions would be extremely helpful. Thanks in advance for your help!
Code: Select all
import os
from fpdf import FPDF
class MyPDF(FPDF):
# Personnaliser l'en-tête
def header(self):
self.set_font('Arial', 'I', 8)
self.cell(0, 10, 'Ton En-tête Personnalisé', 0, 0, 'C')
# Personnaliser le pied de page
def footer(self):
self.set_y(-15)
self.set_font('Arial', 'I', 8)
self.cell(0, 10, 'Page %s' % self.page_no(), 0, 0, 'C')
# Ajouter un titre
def add_title(self, title):
self.set_xy(10, 20) # Augmente la position y pour éviter la superposition
self.set_font('Arial', 'B', 16)
self.cell(190, 10, title, 0, 1, 'C')
# Fonction pour convertir un seul fichier texte en PDF
def txt_to_pdf(file_path, output_path):
pdf = MyPDF()
pdf.add_page()
pdf.set_font("Arial", size=12)
# Ajouter un titre
pdf.add_title("Mon Titre Stylé")
# Ajouter une image
pdf.image('image/image.jpg', x=10, y=100, w=100)
# Ouvrir le fichier texte en mode lecture
with open(file_path, 'r') as file:
for line in file:
pdf.cell(200, 10, txt=line, ln=True, align='L')
# Sauvegarder le PDF
pdf.output(output_path)
# Nouvelle fonction pour traiter plusieurs fichiers
def convert_multiple_txt_to_pdf(folder_path, output_folder):
for filename in os.listdir(folder_path):
if filename.endswith(".txt"):
file_path = os.path.join(folder_path, filename)
output_path = os.path.join(output_folder, filename.replace(".txt", ".pdf"))
txt_to_pdf(file_path, output_path)
# Appel de la fonction pour un dossier spécifique
convert_multiple_txt_to_pdf("dossier_textes", "dossier_pdfs")