Extract Text from a PDF File

Reconstruct text flow order and then extract text from an existing PDF.

This sample requires a Tools license. If you use a trial license then some of the output will be mangled.

C#
using System;
using System.IO;
using SolidFramework.Converters.Plumbing;
using SolidFramework.Converters;

namespace ExtractText
{
    class Program
    {
        [STAThread]
        static void Main(string[] args)
        {
            // Call your Solid Documents License
            SolidFramework.License.Import(@"C:\MyFolder\license.xml");

            // Set the location of your the file you want to convert
            String pdfPath = @"C:\YourFolder\yourpdf.pdf";

            // Set the file extension you are creating
            String txtPath = Path.ChangeExtension(pdfPath, ".txt");

            //*Extract PDF Text*//
            using (PdfToTextConverter converter = new PdfToTextConverter())
            {

                // Add the selected file 
                converter.AddSourceFile(pdfPath);

                // Force the file to be overwritten 
                converter.OverwriteMode = SolidFramework.Plumbing.OverwriteMode.ForceOverwrite;

                // Select where to save the file 
                converter.OutputDirectory = @"C:\MyFolder\";

                // Convert the File, Saving it as the same name but with the extension .txt 
                converter.Convert();

                //Show the status of the PDF file in the Console Window
                SolidFramework.Converters.Plumbing.ConversionStatus status = converter.ConvertTo(txtPath, true);
                if (status != ConversionStatus.Success)
                {
                    Console.WriteLine(status);
                    Console.ReadKey();
                }
            }
        }
    }
}

C++
#include "stdafx.h"
#include "SolidFramework.h"

#include <iostream>
using  namespace std;

void DoProgress(SolidFramework::ProgressEventArgsPtr args)
{
    // Not implemented
}

void DoWarning(SolidFramework::WarningEventArgsPtr args)
{
    // Not implemented
}

class PdfToTextConverter : public SolidFramework::Converters::PdfToTextConverterBase
{
public:
    void FireProgress(SolidFramework::ProgressEventArgsPtr args) override { DoProgress(args); };
    void FireWarning(SolidFramework::WarningEventArgsPtr args) override { DoWarning(args); };
};

int _tmain(int argc, _TCHAR* argv[])
{
    // Enter your License code goes here
    SolidFramework::License::Import(L"C:\\MyFolder\\license.xml");

    // Create a PDF to Word Converter called converter
    PdfToTextConverter *converter = new PdfToTextConverter();

    SolidFramework::Converters::CustomData *pData = NULL;
    pData = new SolidFramework::Converters::CustomData();
    pData->Converter = converter;
    pData->Data = nullptr;
    converter->setCustomData(pData);

    // Add the PDF file to convert.
    converter->AddSourceFile(L"C:\\YourFolder\\yourpdf.pdf");
    converter->setOutputDirectory(L"C:\\MyFolder");

    // Force the file to be overwritten 
    converter->setOverwriteMode(SolidFramework::Plumbing::OverwriteMode::ForceOverwrite);

    //Start the Conversion

    cout << "Starting conversion." << endl; converter->Convert();

    SolidFramework::Converters::Plumbing::ConversionStatus status = converter->getResults()->getItem(0)->getStatus();
    if (status != SolidFramework::Converters::Plumbing::ConversionStatus::Success)
    {
        cout << "Conversion failed." << endl;
    }
    else
    {
        cout << "Conversion succeeded." << endl; } converter->Dispose();

    cout << "Press <Enter> to exit." << endl;
    cin.get();

    return 0;
}
VB.Net
Imports System.IO

Module ExtractText

    Sub Main()

        ' Call your Solid Documents License
        SolidFramework.License.Import("C:\MyFolder\license.xml")

        'Define a variable for your source file
        Dim sPdfPath As String

        ' Define a String for the output file
        Dim txtPath As String

        ' Define your Solid Framework Converter
        Dim myConverter As SolidFramework.Converters.PdfToTextConverter

        ' Set your file path 
        sPdfPath = "C:\YourFolder\filepdf.pdf"

        ' Set your output file
        txtPath = Path.ChangeExtension(sPdfPath, ".txt")

        ' Set the converter
        myConverter = New SolidFramework.Converters.PdfToTextConverter

        'Set the preferred conversion properties 

        ' Add files to convert. 
        myConverter.AddSourceFile(sPdfPath)

        ' Select where to save the file 
        myConverter.OutputDirectory = "C:\MyFolder\"

        ' Convert the File
        myConverter.ConvertTo(txtPath, True)

        ' Clean up
        myConverter.Dispose()

    End Sub

End Module