Skip to content

Commit b6eb448

Browse files
JDeepDcclauss
andauthored
Added reddit.py to get data from reddit (#5698)
* Rewritten reddit.py * Removed logging module import * Fixed minor bug which was causing extreme rate limiting * Update reddit.py * Update reddit.py * Update reddit.py Co-authored-by: Christian Clauss <cclauss@me.com>
1 parent 3815a97 commit b6eb448

File tree

1 file changed

+53
-0
lines changed

1 file changed

+53
-0
lines changed

web_programming/reddit.py

+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
from __future__ import annotations
2+
3+
import requests
4+
5+
valid_terms = set(
6+
"""approved_at_utc approved_by author_flair_background_color
7+
author_flair_css_class author_flair_richtext author_flair_template_id author_fullname
8+
author_premium can_mod_post category clicked content_categories created_utc downs
9+
edited gilded gildings hidden hide_score is_created_from_ads_ui is_meta
10+
is_original_content is_reddit_media_domain is_video link_flair_css_class
11+
link_flair_richtext link_flair_text link_flair_text_color media_embed mod_reason_title
12+
name permalink pwls quarantine saved score secure_media secure_media_embed selftext
13+
subreddit subreddit_name_prefixed subreddit_type thumbnail title top_awarded_type
14+
total_awards_received ups upvote_ratio url user_reports""".split()
15+
)
16+
17+
18+
def get_subreddit_data(
19+
subreddit: str, limit: int = 1, age: str = "new", wanted_data: list | None = None
20+
) -> dict:
21+
"""
22+
subreddit : Subreddit to query
23+
limit : Number of posts to fetch
24+
age : ["new", "top", "hot"]
25+
wanted_data : Get only the required data in the list
26+
27+
>>> pass
28+
"""
29+
wanted_data = wanted_data or []
30+
if invalid_search_terms := ", ".join(sorted(set(wanted_data) - valid_terms)):
31+
raise ValueError(f"Invalid search term: {invalid_search_terms}")
32+
response = requests.get(
33+
f"https://reddit.com/r/{subreddit}/{age}.json?limit={limit}",
34+
headers={"User-agent": "A random string"},
35+
)
36+
if response.status_code == 429:
37+
raise requests.HTTPError
38+
39+
data = response.json()
40+
if not wanted_data:
41+
return {id_: data["data"]["children"][id_] for id_ in range(limit)}
42+
43+
data_dict = {}
44+
for id_ in range(limit):
45+
data_dict[id_] = {
46+
item: data["data"]["children"][id_]["data"][item] for item in wanted_data
47+
}
48+
return data_dict
49+
50+
51+
if __name__ == "__main__":
52+
# If you get Error 429, that means you are rate limited.Try after some time
53+
print(get_subreddit_data("learnpython", wanted_data=["title", "url", "selftext"]))

0 commit comments

Comments
 (0)