Make Searchable PDF
The code below shows you how to add a searchable text layer to PDF using OCR.
It is recommended that you have already reviewed the Getting Started sample, since that includes Licensing and Framework initialization code required to make this sample run.
This sample requires a Professional+OCR license. If you use a Trial License then the generated file will contain some mangled words.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
using System;
using SolidFramework.Converters;
using SolidFramework.Converters.Plumbing;
namespace CSharp_Tutorials
{
public static partial class Tutorials
{
public static bool MakePdfSearchable(string pdfPath, string outputPath)
{
// Ensure license allows OCR
if (!SolidFramework.License.Allows(SolidFramework.Plumbing.LicensePermissions.Ocr))
{
Console.WriteLine("License doesn't allow OCR - skipping MakePdfSearchable");
Console.WriteLine();
return false;
}
// Create a PdfToPdfAConverter
using (var converter = new PdfToPdfAConverter())
{
// Add the PDF file to convert
converter.AddSourceFile(pdfPath);
// Set the PDF/A mode to convert to
converter.ValidationMode = SolidFramework.Plumbing.ValidationMode.PdfA2B;
// Add a searchable text layer (requires a Professional+OCR license)
converter.OcrType = OcrType.CreateSearchableTextLayer;
converter.OcrEngine = TextRecoveryEngine.SolidOCR;
converter.OcrLanguage = "en";
converter.OcrImageCompression = SolidFramework.Imaging.Plumbing.ImageCompression.PreserveOriginal;
Console.WriteLine("Converting " + pdfPath + " to " + outputPath);
// Convert the file
var result = converter.ConvertTo(outputPath, true);
// Check if it was successful (PdfAError means it wasn't PDF/A but was fixed)
if (result != ConversionStatus.Success && result != ConversionStatus.PdfAError)
{
Console.WriteLine("Converting " + pdfPath + " to " + outputPath + " failed with status: " + result);
Console.WriteLine();
return false;
}
}
Console.WriteLine("Successfully converted " + pdfPath + " to " + outputPath);
Console.WriteLine();
return true;
}
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
Imports System
Imports SolidFramework.Converters
Imports SolidFramework.Converters.Plumbing
Namespace VBNet_Tutorials
Partial Module Tutorials
Function MakePdfSearchable(ByVal pdfPath As String, ByVal outputPath As String) As Boolean
' Ensure license allows OCR
If Not SolidFramework.License.Allows(SolidFramework.Plumbing.LicensePermissions.Ocr) Then
Console.WriteLine("License doesn't allow OCR - skipping MakePdfSearchable")
Console.WriteLine()
Return False
End If
' Create a PdfToPdfAConverter
Using converter As PdfToPdfAConverter = New PdfToPdfAConverter()
' Add the PDF file to convert
converter.AddSourceFile(pdfPath)
' Set the PDF/A mode to convert to
converter.ValidationMode = SolidFramework.Plumbing.ValidationMode.PdfA2B
' Add a searchable text layer (requires a Professional+OCR license)
converter.OcrType = OcrType.CreateSearchableTextLayer
converter.OcrEngine = TextRecoveryEngine.SolidOCR
converter.OcrLanguage = "en"
converter.OcrImageCompression = SolidFramework.Imaging.Plumbing.ImageCompression.PreserveOriginal
Console.WriteLine("Converting " & pdfPath & " to " & outputPath)
' Convert the file
Dim result As ConversionStatus = converter.ConvertTo(outputPath, True)
' Check if it was successful (PdfAError means it wasn't PDF/A but was fixed)
If result <> ConversionStatus.Success AndAlso result <> ConversionStatus.PdfAError Then
Console.WriteLine("Converting " & pdfPath & " to " & outputPath & " failed with status: " & result)
Console.WriteLine()
Return False
End If
End Using
Console.WriteLine("Successfully converted " & pdfPath & " to " & outputPath)
Console.WriteLine()
Return True
End Function
End Module
End Namespace
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#include "Tutorials.h"
using namespace SolidFramework::Converters::Plumbing;
using namespace SolidFramework::Converters;
bool MakePdfSearchable(const wstring & pdfPath, const wstring & outputPath)
{
// Ensure license allows OCR
if (!SolidFramework::License::Allows(SolidFramework::Plumbing::LicensePermissions::Ocr))
{
wcout << L"License doesn't allow OCR - skipping MakePdfSearchable" << endl << endl;
return false;
}
// Create a PdfToPdfAConverter
auto converter = make_shared<PdfToPdfAConverter>();
// Add the PDF file to convert
converter->AddSourceFile(pdfPath);
// Set the PDF/A mode to convert to
converter->SetValidationMode(SolidFramework::Plumbing::ValidationMode::PdfA2B);
// Add a searchable text layer (requires a Professional+OCR license)
converter->SetOcrType(OcrType::CreateSearchableTextLayer);
converter->SetOcrEngine(TextRecoveryEngine::SolidOCR);
converter->SetOcrLanguage(L"en");
converter->SetOcrImageCompression(SolidFramework::Imaging::Plumbing::ImageCompression::PreserveOriginal);
wcout << L"Converting " << pdfPath << L" to " << outputPath << endl;
// Convert the file
auto result = converter->ConvertTo(outputPath, true);
// Check if it was successful (PdfAError means it wasn't PDF/A but was fixed)
if (result != ConversionStatus::Success && result != ConversionStatus::PdfAError)
{
wcout << L"Converting " << pdfPath << L" to " << outputPath << L" failed with status: " << (int)result << endl << endl;
return false;
}
wcout << L"Successfully converted " << pdfPath << L" to " << outputPath << endl << endl;
return true;
}