Hello,
I am looking for a way to determine the page number of a Pattern Match. successful match
Example: I have documents that come in 1-5 pages and the important information is typically on page 1 OR page 2, but not really any method to tell which based on layout, header, etc. I have a pattern match activity that is pulling a date from the 'Retrieve Text' activity which is set to OCR ALL pages. Then, I'm running an SDK Script to highlight his information automatically.
Imports System
Imports System.Collections.Generic
Imports System.ComponentModel
Imports System.Data
Imports System.Data.SqlClient
Imports System.Text
Imports Laserfiche.RepositoryAccess
Namespace WorkflowActivity.Scripting.AssignTextAnnotation
'''<summary>
'''Provides one or more methods that can be run when the workflow scripting activity is performed.
'''</summary>
Public Class Script2
Inherits RAScriptClass92
'''<summary>
'''This method is run when the activity is performed.
'''</summary>
Protected Overrides Sub Execute()
'Document?
If Me.BoundEntryInfo.EntryType = EntryType.Document Then
' Create a String object to hold page text
Dim sPageText As String = Nothing
Dim sPageText2 As String = Nothing
' Get PageInfo Object for page1
Dim pInfo As PageInfo = Document.GetDocumentInfo(Me.BoundEntryId, Me.RASession).GetPageInfo(1)
Dim pInfo2 As PageInfo = Document.GetDocumentInfo(Me.BoundEntryId, Me.RASession).GetPageInfo(2)
' Load page text into stream
Using sr As System.IO.StreamReader = pInfo.ReadTextPagePart()
' Copy page text stream to page text string
sPageText = sr.ReadToEnd
End Using
Using sr2 As System.IO.StreamReader = pInfo2.ReadTextPagePart()
sPageText2 = sr2.ReadToEnd
End Using
' Create Regular Expression to find text on page
Dim regex As System.Text.RegularExpressions.Regex = New System.Text.RegularExpressions.Regex(me.TokenReplace("%(PatternMatching_Reference)"), System.Text.RegularExpressions.RegexOptions.IgnoreCase)
Dim regex1 As System.Text.RegularExpressions.Regex = New System.Text.RegularExpressions.Regex(me.TokenReplace("%(PatternMatching_Effective Date)*"), System.Text.RegularExpressions.RegexOptions.IgnoreCase)
Dim regex3 As System.Text.RegularExpressions.Regex = New System.Text.RegularExpressions.Regex(me.TokenReplace("%(PatternMatching_Effective Date)"), System.Text.RegularExpressions.RegexOptions.IgnoreCase)
Dim regex2 As System.Text.RegularExpressions.Regex = New System.Text.RegularExpressions.Regex(me.TokenReplace("CLO %(PatternMatching_Denali Fund) LTD"), System.Text.RegularExpressions.RegexOptions.IgnoreCase)
Dim regex4 As System.Text.RegularExpressions.Regex = New System.Text.RegularExpressions.Regex(me.TokenReplace("%(PatternMatching_Rate)"), System.Text.RegularExpressions.RegexOptions.IgnoreCase)
Dim regex5 As System.Text.RegularExpressions.Regex = New System.Text.RegularExpressions.Regex(me.TokenReplace("%(PatternMatching_Rate Expiration)"), System.Text.RegularExpressions.RegexOptions.IgnoreCase)
' Apply regular Expression to page text to find match
Dim match As System.Text.RegularExpressions.Match = regex.Match(sPageText)
Dim match1 As System.Text.RegularExpressions.Match = regex1.Match(sPageText2)
Dim match3 As System.Text.RegularExpressions.Match = regex3.Match(sPageText)
Dim match2 As System.Text.RegularExpressions.Match = regex2.Match(sPageText)
Dim match4 As System.Text.RegularExpressions.Match = regex4.Match(sPageText)
Dim match5 As System.Text.RegularExpressions.Match = regex5.Match(sPageText)
' Create TextRange object to hold start and end points of found text
Dim FoundRange As TextRange = Nothing
Dim FoundRange1 As TextRange = Nothing
Dim FoundRange2 As TextRange = Nothing
Dim FoundRange3 As TextRange = Nothing
Dim FoundRange4 As TextRange = Nothing
Dim FoundRange5 As TextRange = Nothing
If match.Success Then
' Add the Start and End index of found text to textrange object
FoundRange = New TextRange(match.Index, match.Length + match.Index)
FoundRange1 = New TextRange(match1.Index, match1.Length + match1.Index)
FoundRange2 = New TextRange(match2.Index, match2.Length + match2.Index)
FoundRange3 = New TextRange(match3.Index, match3.Length + match3.Index)
FoundRange4 = New TextRange(match4.Index, match4.Length + match4.Index)
FoundRange5 = New TextRange(match5.Index, match5.Length + match5.Index)
End If
' Only process of textrange has an end index greater than 0
If FoundRange.EndPosition > 0 Then
' Create Highlight Annotation object
Dim hA As HighlightAnnotation = New HighlightAnnotation
Dim hA1 As HighlightAnnotation = New HighlightAnnotation
Dim hA2 As HighlightAnnotation = New HighlightAnnotation
Dim hA3 As HighlightAnnotation = New HighlightAnnotation
Dim hA4 As HighlightAnnotation = New HighlightAnnotation
Dim hA5 As HighlightAnnotation = New HighlightAnnotation
' link it to Start Index of found text
hA.TextStart = FoundRange.StartPosition
hA1.TextStart = FoundRange1.StartPosition
hA2.TextStart = FoundRange2.StartPosition
hA3.TextStart = FoundRange3.StartPosition
hA4.TextStart = FoundRange4.StartPosition
hA5.TextStart = FoundRange5.StartPosition
' link it to End Index of found text
hA.TextEnd = FoundRange.EndPosition
hA1.TextEnd = FoundRange1.EndPosition
hA2.TextEnd = FoundRange2.EndPosition
hA3.TextEnd = FoundRange3.EndPosition
hA4.TextEnd = FoundRange4.EndPosition
hA5.TextEnd = FoundRange5.EndPosition
' Link the text highlight to the image location
hA.LinkTextToImage(New TextLinker(pInfo.ReadTextPagePartAsWords(), pInfo.ReadLocationsPagePart()))
hA1.LinkTextToImage(New TextLinker(pInfo.ReadTextPagePartAsWords(), pInfo.ReadLocationsPagePart()))
hA2.LinkTextToImage(New TextLinker(pInfo.ReadTextPagePartAsWords(), pInfo.ReadLocationsPagePart()))
hA3.LinkTextToImage(New TextLinker(pInfo.ReadTextPagePartAsWords(), pInfo.ReadLocationsPagePart()))
hA4.LinkTextToImage(New TextLinker(pInfo.ReadTextPagePartAsWords(), pInfo.ReadLocationsPagePart()))
hA5.LinkTextToImage(New TextLinker(pInfo.ReadTextPagePartAsWords(), pInfo.ReadLocationsPagePart()))
' Set highlighter color
hA.Color = Laserfiche.RepositoryAccess.Common.LfColor.FromAbgr(65535)
hA1.Color = Laserfiche.RepositoryAccess.Common.LfColor.FromAbgr(65535)
hA2.Color = Laserfiche.RepositoryAccess.Common.LfColor.FromAbgr(65535)
hA3.Color = Laserfiche.RepositoryAccess.Common.LfColor.FromAbgr(65535)
hA4.Color = Laserfiche.RepositoryAccess.Common.LfColor.FromAbgr(65535)
hA5.Color = Laserfiche.RepositoryAccess.Common.LfColor.FromAbgr(65535)
' Add the annotation to the page
pInfo.AddAnnotation(hA)
pInfo2.AddAnnotation(hA1)
pInfo.AddAnnotation(hA2)
pInfo.AddAnnotation(hA3)
pInfo.AddAnnotation(hA4)
pInfo.AddAnnotation(hA5)
' Save Annotation change
pInfo.Save()
End If
End If
End Sub
End Class
End Namespace
Namespace WorkflowActivity.Scripting.HighlightTextScript
End Namespace
The issue I'm running into, is that I cannot determine where (page#) this date is, and when it's on page 2, it is not being highlighted. I know this has something to do with the .GetPageInfo() but I'm not sure how to automatically get this information from the token. I've included the script, to maybe help explain my situation.
Thanks,
Nate