Skip to content

Commit 56f6afb

Browse files
author
John Demos
committed
JD: Adds new Search script for demonstrating Tweet Type function.
1 parent 9a1ceda commit 56f6afb

File tree

1 file changed

+155
-0
lines changed

1 file changed

+155
-0
lines changed

Search-API/search_tweet_type.py

+155
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
# Enterprise Search Tweets - Data request with Tweet type function to demonstrate how to classify Tweets
2+
# Supports data request only (not counts) and returns parsed Tweet payload (only select fields)
3+
import argparse
4+
import json
5+
import os
6+
import sys
7+
8+
import requests
9+
from dotenv import load_dotenv
10+
load_dotenv(verbose=True) # Throws error if it can't find .env file
11+
12+
# Argparse for cli options. Run `python search_tweet_type.py -h` to see the list of arguments.
13+
parser = argparse.ArgumentParser()
14+
parser.add_argument("-r", "--request_file", help="Use json file for request body",
15+
action="store_true")
16+
parser.add_argument("-q", "--query", help="A valid query up to 2,048 characters")
17+
parser.add_argument("-f", "--from_date", help="Oldest date from which results will be provided")
18+
parser.add_argument("-t", "--to_date", help="Most recent date to which results will be provided")
19+
parser.add_argument("-m", "--max_results", help="Maximum number of results returned by a single\
20+
request/response cycle (range: 10-500, default: 100)")
21+
parser.add_argument("-b", "--bucket", choices=['day', 'hour', 'minute'],
22+
help="The unit of time for which count data will be provided.")
23+
parser.add_argument("-n", "--next", help="Auto paginate through next tokens", action="store_true")
24+
args = parser.parse_args()
25+
26+
# Retrieves and stores credential information from the '.env' file
27+
USERNAME = os.getenv("USERNAME")
28+
PASSWORD = os.getenv("PASSWORD")
29+
ACCOUNT_NAME = os.getenv("ACCOUNT_NAME")
30+
ENDPOINT_LABEL = os.getenv("SEARCH_LABEL")
31+
ARCHIVE = os.getenv ("SEARCH_ARCHIVE")
32+
33+
34+
def main():
35+
search_endpoint = f"https://gnip-api.twitter.com/search/{ARCHIVE}/accounts/{ACCOUNT_NAME}/{ENDPOINT_LABEL}.json"
36+
# Build request body from file if it exists, else use cli args
37+
request_body = build_request_body()
38+
# Make first request
39+
first_response = make_request(search_endpoint, request_body)
40+
# Deserialize json response
41+
json_response = (json.loads(first_response.text))
42+
# Create Python dict from results list
43+
tweet_results = json_response["results"]
44+
parsed_results = { "parsed_results": [] }
45+
46+
# Loop through Tweet results to test for type, extended Tweet, and parse JSON
47+
for tweet in tweet_results:
48+
extended_tweet = check_for_extended_tweet(tweet)
49+
tweet_type = determine_tweet_type(tweet)
50+
if extended_tweet is True:
51+
text = tweet["extended_tweet"]["full_text"]
52+
else:
53+
text = tweet["text"]
54+
custom_dict = {
55+
"tweet_id": tweet["id_str"],
56+
"text": text,
57+
"tweet_type": tweet_type,
58+
"hyperlink": "https://twitter.com/twitter/status/" + tweet["id_str"]
59+
}
60+
parsed_results["parsed_results"].append(custom_dict) # Add Tweet to parsed_results list
61+
print(json.dumps(parsed_results, indent=2, sort_keys=True))
62+
63+
# Pagination logic (if -n flag is passed, paginate through the results)
64+
if json_response.get("next") is None or args.next is False:
65+
print(f"Request complete.")
66+
elif json_response.get("next") is not None and args.next:
67+
next_token = json_response.get("next")
68+
request_count = 1 # Keep track of the number of requests being made (pagination)
69+
while next_token is not None:
70+
# Update request_body with next token
71+
request_body.update(next=next_token)
72+
# Make the request with the next token
73+
response = make_request(search_endpoint, request_body)
74+
parsed_results = { "parsed_results": [] }
75+
# Loop through Tweet results to test for type, extended Tweet, and parse JSON
76+
for tweet in tweet_results:
77+
extended_tweet = check_for_extended_tweet(tweet)
78+
tweet_type = determine_tweet_type(tweet)
79+
if extended_tweet is True:
80+
text = tweet["extended_tweet"]["full_text"]
81+
else:
82+
text = tweet["text"]
83+
custom_dict = {
84+
"tweet_id": tweet["id_str"],
85+
"text": text,
86+
"tweet_type": tweet_type,
87+
"hyperlink": "https://twitter.com/twitter/status/" + tweet["id_str"]
88+
}
89+
parsed_results["parsed_results"].append(custom_dict) # Add Tweet to parsed_results
90+
print(json.dumps(parsed_results, indent=2, sort_keys=True))
91+
# Parse n response and it's 'next' token
92+
n_response = (json.loads(response.text))
93+
next_token = n_response.get("next")
94+
# Iterates the request counter
95+
request_count += 1
96+
print(f"Done paginating.\nTotal requests made: {request_count}")
97+
98+
99+
def build_request_body():
100+
# Request file will override CLI options
101+
if args.request_file is True:
102+
with open("request.json", "r") as read_file:
103+
request_body = json.load(read_file)
104+
else:
105+
request_body = {}
106+
if args.query:
107+
request_body.update(query=args.query)
108+
if args.from_date:
109+
request_body.update(fromDate=args.from_date)
110+
if args.to_date:
111+
request_body.update(toDate=args.to_date)
112+
if args.max_results:
113+
request_body.update(maxResults=args.max_results)
114+
if args.bucket:
115+
request_body.update(bucket=args.bucket)
116+
117+
return request_body
118+
119+
120+
def make_request(endpoint, request_body):
121+
try:
122+
response = requests.post(url=endpoint, auth=(USERNAME, PASSWORD), json=request_body)
123+
except requests.exceptions.RequestException as e:
124+
print(e)
125+
sys.exit(120)
126+
127+
return response
128+
129+
130+
def determine_tweet_type(tweet):
131+
# Check for reply indicator first
132+
if tweet["in_reply_to_status_id"] is not None:
133+
tweet_type = "Reply Tweet"
134+
# Check boolean quote status field but make sure it's not a Retweet (of a Quote Tweet)
135+
elif tweet["is_quote_status"] is True and not tweet["text"].startswith("RT"):
136+
tweet_type = "Quote Tweet"
137+
# Check both indicators of a Retweet
138+
elif tweet["text"].startswith("RT") and tweet.get("retweeted_status") is not None:
139+
tweet_type = "Retweet"
140+
else:
141+
tweet_type = "Original Tweet"
142+
143+
return tweet_type
144+
145+
146+
def check_for_extended_tweet(tweet):
147+
try:
148+
value = tweet["extended_tweet"]
149+
return True
150+
except KeyError:
151+
return False
152+
153+
154+
if __name__ == '__main__':
155+
main()

0 commit comments

Comments
 (0)