OCR and text read with Azure Computer Vision

Step 1. clone the repository
https://github.com/MicrosoftLearning/AI-102-AIEngineer/20-ocr

Step 2. edit read-text.py as below:

import os

import time

from PIL import Image, ImageDraw

from matplotlib import pyplot as plt

# import namespaces

from azure.cognitiveservices.vision.computervision import ComputerVisionClient

from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes

from msrest.authentication import CognitiveServicesCredentials

def main():

global cv_client

try:

# Get Configuration Settings

KEY='c36f33fd15784c3984c5a88dcde4c31c'

ENDPOINT='https://ai102cg.cognitiveservices.azure.com/'

# Authenticate Computer Vision client

credential = CognitiveServicesCredentials(KEY)

cv_client = ComputerVisionClient(ENDPOINT, credential)

# Menu for text reading functions

print('1: Use OCR API\n2: Use Read API\n3: Read handwriting\nAny other key to quit')

command = input('Enter a number:')

if command == '1':

image_file = os.path.join('images','Lincoln.jpg')

GetTextOcr(image_file)

elif command =='2':

image_file = os.path.join('images','Rome.pdf')

GetTextRead(image_file)

elif command =='3':

image_file = os.path.join('images','Note.jpg')

GetTextRead(image_file)

except Exception as ex:

print(ex)

def GetTextOcr(image_file):

print('Reading text in {}\n'.format(image_file))

# Use OCR API to read text in image

with open(image_file, mode="rb") as image_data:

ocr_results = cv_client.recognize_printed_text_in_stream(image_data)

# Prepare image for drawing

fig = plt.figure(figsize=(7, 7))

img = Image.open(image_file)

draw = ImageDraw.Draw(img)

# Process the text line by line

for region in ocr_results.regions:

for line in region.lines:

# Show the position of the line of text

l,t,w,h = list(map(int, line.bounding_box.split(',')))

draw.rectangle(((l,t), (l+w, t+h)), outline='magenta', width=5)

# Read the words in the line of text

line_text = ''

for word in line.words:

line_text += word.text + ' '

print(line_text.rstrip())

# Save the image with the text locations highlighted

plt.axis('off')

plt.imshow(img)

outputfile = 'ocr_results.jpg'

fig.savefig(outputfile)

print('Results saved in', outputfile)

def GetTextRead(image_file):

print('Reading text in {}\n'.format(image_file))

# Use Read API to read text in image

with open(image_file, mode="rb") as image_data:

read_op = cv_client.read_in_stream(image_data, raw=True)

# Get the async operation ID so we can check for the results

operation_location = read_op.headers["Operation-Location"]

operation_id = operation_location.split("/")[-1]

# Wait for the asynchronous operation to complete

while True:

read_results = cv_client.get_read_result(operation_id)

if read_results.status not in [OperationStatusCodes.running, OperationStatusCodes.not_started]:

break

time.sleep(1)

# If the operation was successfuly, process the text line by line

if read_results.status == OperationStatusCodes.succeeded:

for page in read_results.analyze_result.read_results:

for line in page.lines:

print(line.text)

if __name__ == "__main__":

main()

import os import time from PIL import Image, ImageDraw from matplotlib import pyplot as plt # import namespaces from azure.cognitiveservices.vision.computervision import ComputerVisionClient from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes from msrest.authentication import CognitiveServicesCredentials def main(): global cv_client try: # Get Configuration Settings KEY='c36f33fd15784c3984c5a88dcde4c31c' ENDPOINT='https://ai102cg.cognitiveservices.azure.com/' # Authenticate Computer Vision client credential = CognitiveServicesCredentials(KEY) cv_client = ComputerVisionClient(ENDPOINT, credential) # Menu for text reading functions print('1: Use OCR API\n2: Use Read API\n3: Read handwriting\nAny other key to quit') command = input('Enter a number:') if command == '1': image_file = os.path.join('images','Lincoln.jpg') GetTextOcr(image_file) elif command =='2': image_file = os.path.join('images','Rome.pdf') GetTextRead(image_file) elif command =='3': image_file = os.path.join('images','Note.jpg') GetTextRead(image_file) except Exception as ex: print(ex) def GetTextOcr(image_file): print('Reading text in {}\n'.format(image_file)) # Use OCR API to read text in image with open(image_file, mode="rb") as image_data: ocr_results = cv_client.recognize_printed_text_in_stream(image_data) # Prepare image for drawing fig = plt.figure(figsize=(7, 7)) img = Image.open(image_file) draw = ImageDraw.Draw(img) # Process the text line by line for region in ocr_results.regions: for line in region.lines: # Show the position of the line of text l,t,w,h = list(map(int, line.bounding_box.split(','))) draw.rectangle(((l,t), (l+w, t+h)), outline='magenta', width=5) # Read the words in the line of text line_text = '' for word in line.words: line_text += word.text + ' ' print(line_text.rstrip()) # Save the image with the text locations highlighted plt.axis('off') plt.imshow(img) outputfile = 'ocr_results.jpg' fig.savefig(outputfile) print('Results saved in', outputfile) def GetTextRead(image_file): print('Reading text in {}\n'.format(image_file)) # Use Read API to read text in image with open(image_file, mode="rb") as image_data: read_op = cv_client.read_in_stream(image_data, raw=True) # Get the async operation ID so we can check for the results operation_location = read_op.headers["Operation-Location"] operation_id = operation_location.split("/")[-1] # Wait for the asynchronous operation to complete while True: read_results = cv_client.get_read_result(operation_id) if read_results.status not in [OperationStatusCodes.running, OperationStatusCodes.not_started]: break time.sleep(1) # If the operation was successfuly, process the text line by line if read_results.status == OperationStatusCodes.succeeded: for page in read_results.analyze_result.read_results: for line in page.lines: print(line.text) if __name__ == "__main__": main()

import os
import time
from PIL import Image, ImageDraw
from matplotlib import pyplot as plt

# import namespaces
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from msrest.authentication import CognitiveServicesCredentials

def main():
    global cv_client
    try:
        # Get Configuration Settings
        KEY='c36f33fd15784c3984c5a88dcde4c31c'
        ENDPOINT='https://ai102cg.cognitiveservices.azure.com/'

        # Authenticate Computer Vision client
        credential = CognitiveServicesCredentials(KEY) 
        cv_client = ComputerVisionClient(ENDPOINT, credential)
     
        # Menu for text reading functions
        print('1: Use OCR API\n2: Use Read API\n3: Read handwriting\nAny other key to quit')
        command = input('Enter a number:')
        if command == '1':
            image_file = os.path.join('images','Lincoln.jpg')
            GetTextOcr(image_file)
        elif command =='2':
            image_file = os.path.join('images','Rome.pdf')
            GetTextRead(image_file)
        elif command =='3':
            image_file = os.path.join('images','Note.jpg')
            GetTextRead(image_file)
                
    except Exception as ex:
        print(ex)

def GetTextOcr(image_file):
    print('Reading text in {}\n'.format(image_file))
    # Use OCR API to read text in image
    with open(image_file, mode="rb") as image_data:
        ocr_results = cv_client.recognize_printed_text_in_stream(image_data)

    # Prepare image for drawing
    fig = plt.figure(figsize=(7, 7))
    img = Image.open(image_file)
    draw = ImageDraw.Draw(img)

    # Process the text line by line
    for region in ocr_results.regions:
        for line in region.lines:

            # Show the position of the line of text
            l,t,w,h = list(map(int, line.bounding_box.split(',')))
            draw.rectangle(((l,t), (l+w, t+h)), outline='magenta', width=5)

            # Read the words in the line of text
            line_text = ''
            for word in line.words:
                line_text += word.text + ' '
            print(line_text.rstrip())

    # Save the image with the text locations highlighted
    plt.axis('off')
    plt.imshow(img)
    outputfile = 'ocr_results.jpg'
    fig.savefig(outputfile)
    print('Results saved in', outputfile)

def GetTextRead(image_file):
    print('Reading text in {}\n'.format(image_file))
    # Use Read API to read text in image
    with open(image_file, mode="rb") as image_data:
        read_op = cv_client.read_in_stream(image_data, raw=True)

        # Get the async operation ID so we can check for the results
        operation_location = read_op.headers["Operation-Location"]
        operation_id = operation_location.split("/")[-1]

        # Wait for the asynchronous operation to complete
        while True:
            read_results = cv_client.get_read_result(operation_id)
            if read_results.status not in [OperationStatusCodes.running, OperationStatusCodes.not_started]:
                break
            time.sleep(1)

        # If the operation was successfuly, process the text line by line
        if read_results.status == OperationStatusCodes.succeeded:
            for page in read_results.analyze_result.read_results:
                for line in page.lines:
                    print(line.text)

if __name__ == "__main__":
    main()

Step 3. verify the app

C:\Hans\AI-102-AIEngineer\20-ocr\Python\read-text> python .\read-text.py

1: Use OCR API

2: Use Read API

3: Read handwriting

Any other key to quit

Enter a number:3

Reading text in images\Note.jpg

Shopping List

Non-Fat milk

Bread

Eggs

C:\Hans\AI-102-AIEngineer\20-ocr\Python\read-text> python .\read-text.py 1: Use OCR API 2: Use Read API 3: Read handwriting Any other key to quit Enter a number:3 Reading text in images\Note.jpg Shopping List Non-Fat milk Bread Eggs

C:\Hans\AI-102-AIEngineer\20-ocr\Python\read-text> python .\read-text.py
1: Use OCR API
2: Use Read API
3: Read handwriting
Any other key to quit
Enter a number:3
Reading text in images\Note.jpg

Shopping List
Non-Fat milk
Bread
Eggs

9 Replies to “OCR and text read with Azure Computer Vision”

Alyce says:

July 27, 2021 at 4:00 pm

This design is spectacular! You most certainly know
how to keep a reader amused. Between your wit and your videos, I was almost moved to start my own blog (well, almost…HaHa!)
Excellent job. I really enjoyed what you had to say, and more than that,
how you presented it. Too cool!

Modesto says:

July 29, 2021 at 6:38 pm

What’s Taking place i am new to this, I stumbled upon this I
have found It positively useful and it has helped me out
loads. I hope to give a contribution & aid other customers like its aided me.
Great job.

ViagRx Reviews says:

July 29, 2021 at 8:58 pm

Hi there, its good piece of writing on the topic of media print, we all be aware of media is a fantastic source of data.

Take a look at my web-site :: ViagRx Reviews

http://www.cruzenews.com says:

July 29, 2021 at 9:34 pm

You are my intake, I own few blogs and often run out
from post :).

my blog post :: http://www.cruzenews.com

https://www.dailystrength.org says:

July 29, 2021 at 11:58 pm

I pay a visit day-to-day some web pages and sites to read content, however this
blog offers feature based articles.

Here is my web page :: https://www.dailystrength.org

www.dailystrength.org says:

July 30, 2021 at 1:37 am

I am sure this post has touched all the internet viewers, its really really good piece of writing on building up new website.

Also visit my webpage: http://www.dailystrength.org

Alpha Extracts Reviews says:

July 31, 2021 at 5:06 pm

Thank you for some other informative website. The place else could
I get that kind of information written in such a perfect means?
I have a venture that I’m just now running on, and I’ve been at the look out for such info.

Feel free to visit my webpage … Alpha Extracts Reviews

www.44706648-90-20190827182230.webstarterz.com says:

July 31, 2021 at 10:03 pm

Wow! This can be one particular of the most helpful
blogs We’ve ever arrive across on this subject.

Actually Wonderful. I’m also a specialist in this topic
so I can understand your effort.

Also visit my homepage; http://www.44706648-90-20190827182230.webstarterz.com

Zenzi CBD Gummies Reviews says:

August 22, 2021 at 7:38 am

This is the right site for everyone who wants to understand this topic.
You understand a whole lot its almost hard to argue with you
(not that I personally will need to?HaHa). You certainly put a new spin on a subject which has been written about for many years.
Excellent stuff, just wonderful!

Look at my web blog – Zenzi CBD Gummies Reviews

9 Replies to “OCR and text read with Azure Computer Vision”

Leave a Reply Cancel reply