Make PDF Searchable
The code below shows you how to add a searchable text layer to PDF using OCR.
This sample requires a Professional+OCR license. If you use a Trial License then the generated file will contain some mangled words.
C#
using System; using System.IO; using SolidFramework.Converters.Plumbing; using SolidFramework.Converters; namespace OCRforSearch { class Program { [STAThread] static void Main(string[] args) { // Call your Solid Documents License SolidFramework.License.Import(@"C:\MyFolder\license.xml"); // Set the location of your the file you want to convert String pdfPath = @"C:\YourFolder\yourpdf.pdf"; // Set the file extension you are creating String strPDFAPath = Path.ChangeExtension(pdfPath, "pdfa.pdf"); //*Create a Searchable PDF/A*// using (PdfToPdfAConverter converter = new PdfToPdfAConverter()) { // Make the Converted File Searchable converter.OcrType = SolidFramework.Converters.Plumbing.OcrType.CreateSearchableTextLayer; // Set the PDF/A Validation converter.ValidationMode = SolidFramework.Plumbing.ValidationMode.PdfA2B; // All other PDFA formats are available // Then add the Source File converter.AddSourceFile(pdfPath); // Convert the file, setting overwrite to true converter.ConvertTo(pdfPath, true); //Show the status of the PDF file in the Console Window SolidFramework.Converters.Plumbing.ConversionStatus status = converter.ConvertTo(strPDFAPath, true); if (status != ConversionStatus.Success) { Console.WriteLine(status); Console.ReadKey(); } } } } }
C++
#include "stdafx.h" #include "SolidFramework.h" #include <iostream> using namespace std; void DoProgress(SolidFramework::ProgressEventArgsPtr args) { // Not implemented } void DoWarning(SolidFramework::WarningEventArgsPtr args) { // Not implemented } class PdfToPdfAConverter : public SolidFramework::Converters::PdfToPdfAConverterBase { public: void FireProgress(SolidFramework::ProgressEventArgsPtr args) override { DoProgress(args); }; void FireWarning(SolidFramework::WarningEventArgsPtr args) override { DoWarning(args); }; }; int _tmain(int argc, _TCHAR* argv[]) { // Enter your License code goes here SolidFramework::License::Import(L"C:\\MyFolder\\license.xml"); // Create a PDF to Word Converter called converter PdfToPdfAConverter *converter = new PdfToPdfAConverter(); SolidFramework::Converters::CustomData *pData = NULL; pData = new SolidFramework::Converters::CustomData(); pData->Converter = converter; pData->Data = nullptr; converter->setCustomData(pData); // Add the PDF file to convert. converter->AddSourceFile(L"C:\\YourFolder\\yourpdf.pdf"); converter->setOutputDirectory(L"C:\\MyFolder"); // Make the Converted File Searchable converter->setOcrType(SolidFramework::Converters::Plumbing::OcrType::CreateSearchableTextLayer); // Set the PDF/A Validation converter->setValidationMode(SolidFramework::Plumbing::ValidationMode::PdfA2B); // All PDF/A options are supported //Start the Conversion cout << "Starting conversion." << endl; converter->Convert(); SolidFramework::Converters::Plumbing::ConversionStatus status = converter->getResults()->getItem(0)->getStatus(); if (status != SolidFramework::Converters::Plumbing::ConversionStatus::Success) { cout << "Conversion failed." << endl; } else { cout << "Conversion succeeded." << endl; } converter->Dispose(); cout << "Press <Enter> to exit." << endl; cin.get(); return 0; }
VB.Net
Imports System.IO Imports SolidFramework.Converters.Plumbing Imports SolidFramework.Plumbing Module OCRforSearch Sub Main() ' Call your Solid Documents License SolidFramework.License.Import("C:\MyFolder\license.xml") 'Define a variable for your source file Dim sPdfPath As String ' Define a String for the output file Dim pdfaPath As String ' Define your Solid Framework Converter Dim myConverter As SolidFramework.Converters.PdfToPdfAConverter ' Set your file path sPdfPath = "C:\YourFolder\yourpdf.pdf" ' Set your output file and location pdfaPath = Path.ChangeExtension(sPdfPath, ".pdfa.pdf") ' Set the converter myConverter = New SolidFramework.Converters.PdfToPdfAConverter 'Set the preferred conversion properties ' Add files to convert. myConverter.AddSourceFile(sPdfPath) ' Detect Headers and Footers myConverter.ValidationMode = ValidationMode.PdfA2B ' All ofther PDFA formats are available ' Make the Converted File Searchable myConverter.OcrType = OcrType.CreateSearchableTextLayer ' Convert the File. myConverter.ConvertTo(pdfaPath, True) ' Clean up myConverter.Dispose() End Sub End Module