OCR and text read with Azure Computer Vision

Step 1. clone the repository
https://github.com/MicrosoftLearning/AI-102-AIEngineer/20-ocr

Step 2. edit read-text.py as below:

import os
import time
from PIL import Image, ImageDraw
from matplotlib import pyplot as plt
# import namespaces
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from msrest.authentication import CognitiveServicesCredentials
def main():
global cv_client
try:
# Get Configuration Settings
KEY='c36f33fd15784c3984c5a88dcde4c31c'
ENDPOINT='https://ai102cg.cognitiveservices.azure.com/'
# Authenticate Computer Vision client
credential = CognitiveServicesCredentials(KEY)
cv_client = ComputerVisionClient(ENDPOINT, credential)
# Menu for text reading functions
print('1: Use OCR API\n2: Use Read API\n3: Read handwriting\nAny other key to quit')
command = input('Enter a number:')
if command == '1':
image_file = os.path.join('images','Lincoln.jpg')
GetTextOcr(image_file)
elif command =='2':
image_file = os.path.join('images','Rome.pdf')
GetTextRead(image_file)
elif command =='3':
image_file = os.path.join('images','Note.jpg')
GetTextRead(image_file)
except Exception as ex:
print(ex)
def GetTextOcr(image_file):
print('Reading text in {}\n'.format(image_file))
# Use OCR API to read text in image
with open(image_file, mode="rb") as image_data:
ocr_results = cv_client.recognize_printed_text_in_stream(image_data)
# Prepare image for drawing
fig = plt.figure(figsize=(7, 7))
img = Image.open(image_file)
draw = ImageDraw.Draw(img)
# Process the text line by line
for region in ocr_results.regions:
for line in region.lines:
# Show the position of the line of text
l,t,w,h = list(map(int, line.bounding_box.split(',')))
draw.rectangle(((l,t), (l+w, t+h)), outline='magenta', width=5)
# Read the words in the line of text
line_text = ''
for word in line.words:
line_text += word.text + ' '
print(line_text.rstrip())
# Save the image with the text locations highlighted
plt.axis('off')
plt.imshow(img)
outputfile = 'ocr_results.jpg'
fig.savefig(outputfile)
print('Results saved in', outputfile)
def GetTextRead(image_file):
print('Reading text in {}\n'.format(image_file))
# Use Read API to read text in image
with open(image_file, mode="rb") as image_data:
read_op = cv_client.read_in_stream(image_data, raw=True)
# Get the async operation ID so we can check for the results
operation_location = read_op.headers["Operation-Location"]
operation_id = operation_location.split("/")[-1]
# Wait for the asynchronous operation to complete
while True:
read_results = cv_client.get_read_result(operation_id)
if read_results.status not in [OperationStatusCodes.running, OperationStatusCodes.not_started]:
break
time.sleep(1)
# If the operation was successfuly, process the text line by line
if read_results.status == OperationStatusCodes.succeeded:
for page in read_results.analyze_result.read_results:
for line in page.lines:
print(line.text)
if __name__ == "__main__":
main()
import os import time from PIL import Image, ImageDraw from matplotlib import pyplot as plt # import namespaces from azure.cognitiveservices.vision.computervision import ComputerVisionClient from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes from msrest.authentication import CognitiveServicesCredentials def main(): global cv_client try: # Get Configuration Settings KEY='c36f33fd15784c3984c5a88dcde4c31c' ENDPOINT='https://ai102cg.cognitiveservices.azure.com/' # Authenticate Computer Vision client credential = CognitiveServicesCredentials(KEY) cv_client = ComputerVisionClient(ENDPOINT, credential) # Menu for text reading functions print('1: Use OCR API\n2: Use Read API\n3: Read handwriting\nAny other key to quit') command = input('Enter a number:') if command == '1': image_file = os.path.join('images','Lincoln.jpg') GetTextOcr(image_file) elif command =='2': image_file = os.path.join('images','Rome.pdf') GetTextRead(image_file) elif command =='3': image_file = os.path.join('images','Note.jpg') GetTextRead(image_file) except Exception as ex: print(ex) def GetTextOcr(image_file): print('Reading text in {}\n'.format(image_file)) # Use OCR API to read text in image with open(image_file, mode="rb") as image_data: ocr_results = cv_client.recognize_printed_text_in_stream(image_data) # Prepare image for drawing fig = plt.figure(figsize=(7, 7)) img = Image.open(image_file) draw = ImageDraw.Draw(img) # Process the text line by line for region in ocr_results.regions: for line in region.lines: # Show the position of the line of text l,t,w,h = list(map(int, line.bounding_box.split(','))) draw.rectangle(((l,t), (l+w, t+h)), outline='magenta', width=5) # Read the words in the line of text line_text = '' for word in line.words: line_text += word.text + ' ' print(line_text.rstrip()) # Save the image with the text locations highlighted plt.axis('off') plt.imshow(img) outputfile = 'ocr_results.jpg' fig.savefig(outputfile) print('Results saved in', outputfile) def GetTextRead(image_file): print('Reading text in {}\n'.format(image_file)) # Use Read API to read text in image with open(image_file, mode="rb") as image_data: read_op = cv_client.read_in_stream(image_data, raw=True) # Get the async operation ID so we can check for the results operation_location = read_op.headers["Operation-Location"] operation_id = operation_location.split("/")[-1] # Wait for the asynchronous operation to complete while True: read_results = cv_client.get_read_result(operation_id) if read_results.status not in [OperationStatusCodes.running, OperationStatusCodes.not_started]: break time.sleep(1) # If the operation was successfuly, process the text line by line if read_results.status == OperationStatusCodes.succeeded: for page in read_results.analyze_result.read_results: for line in page.lines: print(line.text) if __name__ == "__main__": main()
import os
import time
from PIL import Image, ImageDraw
from matplotlib import pyplot as plt

# import namespaces
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from msrest.authentication import CognitiveServicesCredentials

def main():
    global cv_client
    try:
        # Get Configuration Settings
        KEY='c36f33fd15784c3984c5a88dcde4c31c'
        ENDPOINT='https://ai102cg.cognitiveservices.azure.com/'

        # Authenticate Computer Vision client
        credential = CognitiveServicesCredentials(KEY) 
        cv_client = ComputerVisionClient(ENDPOINT, credential)
     
        # Menu for text reading functions
        print('1: Use OCR API\n2: Use Read API\n3: Read handwriting\nAny other key to quit')
        command = input('Enter a number:')
        if command == '1':
            image_file = os.path.join('images','Lincoln.jpg')
            GetTextOcr(image_file)
        elif command =='2':
            image_file = os.path.join('images','Rome.pdf')
            GetTextRead(image_file)
        elif command =='3':
            image_file = os.path.join('images','Note.jpg')
            GetTextRead(image_file)
                
    except Exception as ex:
        print(ex)

def GetTextOcr(image_file):
    print('Reading text in {}\n'.format(image_file))
    # Use OCR API to read text in image
    with open(image_file, mode="rb") as image_data:
        ocr_results = cv_client.recognize_printed_text_in_stream(image_data)

    # Prepare image for drawing
    fig = plt.figure(figsize=(7, 7))
    img = Image.open(image_file)
    draw = ImageDraw.Draw(img)

    # Process the text line by line
    for region in ocr_results.regions:
        for line in region.lines:

            # Show the position of the line of text
            l,t,w,h = list(map(int, line.bounding_box.split(',')))
            draw.rectangle(((l,t), (l+w, t+h)), outline='magenta', width=5)

            # Read the words in the line of text
            line_text = ''
            for word in line.words:
                line_text += word.text + ' '
            print(line_text.rstrip())

    # Save the image with the text locations highlighted
    plt.axis('off')
    plt.imshow(img)
    outputfile = 'ocr_results.jpg'
    fig.savefig(outputfile)
    print('Results saved in', outputfile)

def GetTextRead(image_file):
    print('Reading text in {}\n'.format(image_file))
    # Use Read API to read text in image
    with open(image_file, mode="rb") as image_data:
        read_op = cv_client.read_in_stream(image_data, raw=True)

        # Get the async operation ID so we can check for the results
        operation_location = read_op.headers["Operation-Location"]
        operation_id = operation_location.split("/")[-1]

        # Wait for the asynchronous operation to complete
        while True:
            read_results = cv_client.get_read_result(operation_id)
            if read_results.status not in [OperationStatusCodes.running, OperationStatusCodes.not_started]:
                break
            time.sleep(1)

        # If the operation was successfuly, process the text line by line
        if read_results.status == OperationStatusCodes.succeeded:
            for page in read_results.analyze_result.read_results:
                for line in page.lines:
                    print(line.text)

if __name__ == "__main__":
    main()

Step 3. verify the app

C:\Hans\AI-102-AIEngineer\20-ocr\Python\read-text> python .\read-text.py
1: Use OCR API
2: Use Read API
3: Read handwriting
Any other key to quit
Enter a number:3
Reading text in images\Note.jpg
Shopping List
Non-Fat milk
Bread
Eggs
C:\Hans\AI-102-AIEngineer\20-ocr\Python\read-text> python .\read-text.py 1: Use OCR API 2: Use Read API 3: Read handwriting Any other key to quit Enter a number:3 Reading text in images\Note.jpg Shopping List Non-Fat milk Bread Eggs
C:\Hans\AI-102-AIEngineer\20-ocr\Python\read-text> python .\read-text.py
1: Use OCR API
2: Use Read API
3: Read handwriting
Any other key to quit
Enter a number:3
Reading text in images\Note.jpg

Shopping List
Non-Fat milk
Bread
Eggs
Note.jpg

9 Replies to “OCR and text read with Azure Computer Vision”

  1. This design is spectacular! You most certainly know
    how to keep a reader amused. Between your wit and your videos, I was almost moved to start my own blog (well, almost…HaHa!)
    Excellent job. I really enjoyed what you had to say, and more than that,
    how you presented it. Too cool!

  2. What’s Taking place i am new to this, I stumbled upon this I
    have found It positively useful and it has helped me out
    loads. I hope to give a contribution & aid other customers like its aided me.
    Great job.

  3. This is the right site for everyone who wants to understand this topic.
    You understand a whole lot its almost hard to argue with you
    (not that I personally will need to?HaHa). You certainly put a new spin on a subject which has been written about for many years.
    Excellent stuff, just wonderful!

    Look at my web blog – Zenzi CBD Gummies Reviews

Leave a Reply

Your email address will not be published. Required fields are marked *