%pip install pdfplumber Unidecode
“Extract text from pdf”
“How to use extract text from a pdf”
- toc: false
- branch: master
- badges: true
- comments: true
- categories: [snippet]
- hide: true
- search_exclude: false
- metadata_key1: metadata_value1
- metadata_key2: metadata_value2
- food: food
Notes
Packages
from typing import List
import pdfplumber
from unidecode import unidecode
def pdf_parser(
str,
filepath: =1,
x_tolerance=1
y_tolerance-> List[str]:
) = []
texts with pdfplumber.open(filepath) as pdf:
for page in pdf.pages:
=x_tolerance, y_tolerance=y_tolerance)))
texts.append(unidecode(page.extract_text(x_tolerancereturn texts
= pdf_parser(filepath='my_sample.pdf')
pdf_texts print(pdf_texts)