public List<string> ExtractTextFromPdfA(string pdfPath)
{
var extractedText = new List<string>();
try
{
using (PdfReader pdfReader = new PdfReader(pdfPath))
using (PdfDocument pdfDocument = new PdfDocument(pdfReader))
{
for (int i = 1; i <= pdfDocument.GetNumberOfPages(); i++)
{
//ITextExtractionStrategy strategy = new LocationTextExtractionStrategy();
ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
string pageText = PdfTextExtractor.GetTextFromPage(pdfDocument.GetPage(i), strategy);
extractedText.Add(pageText);
}
return extractedText;
}
}
catch (Exception ex)
{
//_logger.LogError(ex, "Error extracting text from pdf");
throw;
}
}
The following code extracts text from pdf file but the extract is missing additional new line feed. \n\n return \n and \r\n returns \n.