Hi all.
Using my Workflow, I'm using a SDK C# to OCR somes entries.
This is the code :
namespace WorkflowActivity.Scripting.ScriptSDK
{
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Data.SqlClient;
using System.Text;
using Laserfiche.RepositoryAccess;
using Laserfiche.DocumentServices;
/// <summary>
/// Offre une ou plusieurs méthodes qui peuvent être exécutées au moment de l'exécution de l'activité de scriptage du flux de travail.
/// </summary>
public class Script1 : RAScriptClass104
{
/// <summary>
/// Cette méthode est exécutée quand l'activité est effectuée.
/// </summary>
protected override void Execute()
{
// Write your code here. The BoundEntryInfo property will access the entry, RASession will get the Repository Access session
string sError = "None";
try
{
// Retrieves a document to be processed with OCR.
if ((BoundEntryInfo.EntryType == EntryType.Document))
{
using (DocumentInfo Doc = (DocumentInfo)BoundEntryInfo)
{
Doc.Lock(LockType.Exclusive);
// Instantiates a new OCR engine.
using (OcrEngine ocr = OcrEngine.LoadEngine())
{
// configure OCR options
ocr.AutoOrient = true;
ocr.Decolumnize = true;
ocr.OptimizationMode = OcrOptimizationMode.Accuracy;
// Generate text for all pages of the given document
PageSet ps = Doc.AllPages;
ocr.Run(Doc, ps);
}
// unlock the document
Doc.Unlock();
}
}
}
catch (Exception ex)
{
sError = ex.Message;
WorkflowApi.TrackError(ex.Message);
}
SetTokenValue("Script_Error", sError);
}
}
}
But the results are not pretty good.
I tried to use REGEX to get some informations but because of the OCR, I have a lot of error.
Can I improve the OCR?
The language is french.
Thanks in advance.
Regards