单纯的识别文字#Analyzes text in a document stored in an S3 bucket. Display polygon box around text and angled textimport boto3import iofrom io import BytesIOimport sysimport mathfrom PIL import Image, ImageDraw, ImageFontdef ShowBoundingBox(draw,box,width,height,boxColor):left = width * box['Left']top = height * box['Top']draw.rectangle([left,top, left + (width * box['Width']), top +(height * box['Height'])],outline=boxColor)def ShowSelectedElement(draw,box,width,height,boxColor):left = width * box['Left']top = height * box['Top']draw.rectangle([left,top, left + (width * box['Width']), top +(height * box['Height'])],fill=boxColor)# Displays information about a block returned by text detection and text analysisdef DisplayBlockInformation(block):print('Id: {}'.format(block['Id']))if 'Text' in block:print('Detected: ' + block['Text'])print('Type: ' + block['BlockType'])if 'Confidence' in block:print('Confidence: ' + "{:.2f}".format(block['Confidence']) + "%")if block['BlockType'] == 'CELL':print("Cell information")print("Column:" + str(block['ColumnIndex']))print("Row:" + str(block['RowIndex']))print("Column Span:" + str(block['ColumnSpan']))print("RowSpan:" + str(block['ColumnSpan']))if 'Relationships' in block:print('Relationships: {}'.format(block['Relationships']))print('Geometry: ')print('Bounding Box: {}'.format(block['Geometry']['BoundingBox']))print('Polygon: {}'.format(block['Geometry']['Polygon']))if block['BlockType'] == "KEY_VALUE_SET":print ('Entity Type: ' + block['EntityTypes'][0])if block['BlockType'] == 'SELECTION_ELEMENT':print('Selection element detected: ', end='')if block['SelectionStatus'] =='SELECTED':print('Selected')else:print('Not selected')if 'Page' in block:print('Page: ' + block['Page'])print()def process_text_analysis(bucket, document):#Get the document from S3s3_connection = boto3.resource('s3')s3_object = s3_connection.Object(bucket,document)s3_response = s3_object.get()stream = io.BytesIO(s3_response['Body'].read())image=Image.open(stream)# Analyze the documentclient = boto3.client('textract')image_binary = stream.getvalue()response = client.analyze_document(Document={'Bytes': image_binary},FeatureTypes=["TABLES", "FORMS"])### Alternatively, process using S3 object ####response = client.analyze_document(#Document={'S3Object': {'Bucket': bucket, 'Name': document}},#FeatureTypes=["TABLES", "FORMS"])### To use a local file #### with open("pathToFile", 'rb') as img_file:### To display image using PIL ####image = Image.open()### Read bytes ####img_bytes = img_file.read()#response = client.analyze_document(Document={'Bytes': img_bytes}, FeatureTypes=["TABLES", "FORMS"])#Get the text blocksblocks=response['Blocks']width, height =image.sizedraw = ImageDraw.Draw(image)print ('Detected Document Text')# Create image showing bounding box/polygon the detected lines/textfor block in blocks:DisplayBlockInformation(block)draw=ImageDraw.Draw(image)if block['BlockType'] == "KEY_VALUE_SET":if block['EntityTypes'][0] == "KEY":ShowBoundingBox(draw, block['Geometry']['BoundingBox'],width,height,'red')else:ShowBoundingBox(draw, block['Geometry']['BoundingBox'],width,height,'green')if block['BlockType'] == 'TABLE':ShowBoundingBox(draw, block['Geometry']['BoundingBox'],width,height, 'blue')if block['BlockType'] == 'CELL':ShowBoundingBox(draw, block['Geometry']['BoundingBox'],width,height, 'yellow')if block['BlockType'] == 'SELECTION_ELEMENT':if block['SelectionStatus'] =='SELECTED':ShowSelectedElement(draw, block['Geometry']['BoundingBox'],width,height, 'blue')#uncomment to draw polygon for all Blocks#points=[]#for polygon in block['Geometry']['Polygon']:#points.append((width * polygon['X'], height * polygon['Y']))#draw.polygon((points), outline='blue')# Display the imageimage.show()return len(blocks)def main():bucket = ''document = ''block_count=process_text_analysis(bucket,document)print("Blocks detected: " + str(block_count))if __name__ == "__main__":main()
推荐阅读
- 如何在QQ群文件中创建QQ群在线文档(怎么创建手机qq群文件)
- uni-app 如何优雅的使用权限认证并对本地文件上下起手
- git中 gitignore 忽略文件操作
- Spring Boot 配置 jar 包外面的 Properties 配置文件
- 研一入坑Go文件操作
- 使用开源计算引擎提升Excel格式文件处理效率
- 电脑中怎么隐藏文件夹,怎么显示隐藏的文件
- 手机隐藏文件夹怎么显示(手机隐藏文件夹怎么显示oppo)
- 电脑里隐藏的文件夹怎么找回来(电脑怎么调出隐藏文件)
- 怎样显示隐藏的文件夹(电脑怎么调出隐藏文件)