Skip to content

Commit 377b2b4

Browse files
committed
lambda-read-pdf
1 parent 478860e commit 377b2b4

File tree

1 file changed

+38
-0
lines changed

1 file changed

+38
-0
lines changed

lambda/lambda_read_pdf_s3_trigger.py

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#-*- coding: utf-8 -*-
2+
__author__ = "Chirag Rathod (Srce Cde)"
3+
__license__ = "MIT"
4+
__email__ = "chiragr83@gmail.com"
5+
__maintainer__ = "Chirag Rathod (Srce Cde)"
6+
7+
import json
8+
import boto3
9+
import fitz
10+
11+
def lambda_handler(event, context):
12+
"""Read file from s3 on trigger."""
13+
# boto3 client
14+
s3 = boto3.client("s3")
15+
if event:
16+
file_obj = event["Records"][0]
17+
# fetching bucket name from event
18+
bucketname = str(file_obj['s3']['bucket']['name'])
19+
# fetching file name from event
20+
filename = str(file_obj['s3']['object']['key'])
21+
# retrieving object from S3
22+
fileObj = s3.get_object(Bucket=bucketname, Key=filename)
23+
# reading botocore stream
24+
file_content = fileObj["Body"].read()
25+
26+
# loading pdf from memory/stream
27+
with fitz.open(stream=file_content, filetype="pdf") as doc:
28+
text = ""
29+
# iterating through pdf file pages
30+
for page in doc:
31+
# fetching & appending text to text variable of each page
32+
text += page.getText()
33+
34+
print(text)
35+
return {
36+
'statusCode': 200,
37+
'body': json.dumps('Thanks from Srce Cde!')
38+
}

0 commit comments

Comments
 (0)