aws上传文件、删除文件、图像识别( 二 )

单纯的识别文字#Analyzes text in a document stored in an S3 bucket. Display polygon box around text and angled textimport boto3import iofrom io import BytesIOimport sysimport mathfrom PIL import Image, ImageDraw, ImageFontdef ShowBoundingBox(draw,box,width,height,boxColor):left = width * box['Left']top = height * box['Top']draw.rectangle([left,top, left + (width * box['Width']), top +(height * box['Height'])],outline=boxColor)def ShowSelectedElement(draw,box,width,height,boxColor):left = width * box['Left']top = height * box['Top']draw.rectangle([left,top, left + (width * box['Width']), top +(height * box['Height'])],fill=boxColor)# Displays information about a block returned by text detection and text analysisdef DisplayBlockInformation(block):print('Id: {}'.format(block['Id']))if 'Text' in block:print('Detected: ' + block['Text'])print('Type: ' + block['BlockType'])if 'Confidence' in block:print('Confidence: ' + "{:.2f}".format(block['Confidence']) + "%")if block['BlockType'] == 'CELL':print("Cell information")print("Column:" + str(block['ColumnIndex']))print("Row:" + str(block['RowIndex']))print("Column Span:" + str(block['ColumnSpan']))print("RowSpan:" + str(block['ColumnSpan']))if 'Relationships' in block:print('Relationships: {}'.format(block['Relationships']))print('Geometry: ')print('Bounding Box: {}'.format(block['Geometry']['BoundingBox']))print('Polygon: {}'.format(block['Geometry']['Polygon']))if block['BlockType'] == "KEY_VALUE_SET":print ('Entity Type: ' + block['EntityTypes'][0])if block['BlockType'] == 'SELECTION_ELEMENT':print('Selection element detected: ', end='')if block['SelectionStatus'] =='SELECTED':print('Selected')else:print('Not selected')if 'Page' in block:print('Page: ' + block['Page'])print()def process_text_analysis(bucket, document):#Get the document from S3s3_connection = boto3.resource('s3')s3_object = s3_connection.Object(bucket,document)s3_response = s3_object.get()stream = io.BytesIO(s3_response['Body'].read())image=Image.open(stream)# Analyze the documentclient = boto3.client('textract')image_binary = stream.getvalue()response = client.analyze_document(Document={'Bytes': image_binary},FeatureTypes=["TABLES", "FORMS"])### Alternatively, process using S3 object ####response = client.analyze_document(#Document={'S3Object': {'Bucket': bucket, 'Name': document}},#FeatureTypes=["TABLES", "FORMS"])### To use a local file #### with open("pathToFile", 'rb') as img_file:### To display image using PIL ####image = Image.open()### Read bytes ####img_bytes = img_file.read()#response = client.analyze_document(Document={'Bytes': img_bytes}, FeatureTypes=["TABLES", "FORMS"])#Get the text blocksblocks=response['Blocks']width, height =image.sizedraw = ImageDraw.Draw(image)print ('Detected Document Text')# Create image showing bounding box/polygon the detected lines/textfor block in blocks:DisplayBlockInformation(block)draw=ImageDraw.Draw(image)if block['BlockType'] == "KEY_VALUE_SET":if block['EntityTypes'][0] == "KEY":ShowBoundingBox(draw, block['Geometry']['BoundingBox'],width,height,'red')else:ShowBoundingBox(draw, block['Geometry']['BoundingBox'],width,height,'green')if block['BlockType'] == 'TABLE':ShowBoundingBox(draw, block['Geometry']['BoundingBox'],width,height, 'blue')if block['BlockType'] == 'CELL':ShowBoundingBox(draw, block['Geometry']['BoundingBox'],width,height, 'yellow')if block['BlockType'] == 'SELECTION_ELEMENT':if block['SelectionStatus'] =='SELECTED':ShowSelectedElement(draw, block['Geometry']['BoundingBox'],width,height, 'blue')#uncomment to draw polygon for all Blocks#points=[]#for polygon in block['Geometry']['Polygon']:#points.append((width * polygon['X'], height * polygon['Y']))#draw.polygon((points), outline='blue')# Display the imageimage.show()return len(blocks)def main():bucket = ''document = ''block_count=process_text_analysis(bucket,document)print("Blocks detected: " + str(block_count))if __name__ == "__main__":main()

推荐阅读