Skip to content

add crawl_google_scholar_citation.py #3879

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Nov 13, 2020
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ repos:
hooks:
- id: codespell
args:
- --ignore-words-list=ans,fo,followings,hist,iff,secant,som,tim
- --ignore-words-list=ans,fo,followings,hist,iff,mater,secant,som,tim
- --skip="./.*,./other/dictionary.txt,./other/words,./project_euler/problem_022/p022_names.txt"
- --quiet-level=2
exclude: |
Expand Down
32 changes: 32 additions & 0 deletions web_programming/crawl_google_scholar_citation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""
Get the citation from google scholar
using title and year of publication, and volume and pages of journal.
"""

import requests
from bs4 import BeautifulSoup


def get_citation(base_url: str, params: dict) -> str:
"""
Return the citation number.
"""
soup = BeautifulSoup(requests.get(base_url, params=params).content, "html.parser")
div = soup.find("div", attrs={"class": "gs_ri"})
anchors = div.find("div", attrs={"class": "gs_fl"}).find_all("a")
return anchors[2].get_text()


if __name__ == "__main__":
params = {
"title": (
"Precisely geometry controlled microsupercapacitors for ultrahigh areal "
"capacitance, volumetric capacitance, and energy density"
),
"journal": "Chem. Mater.",
"volume": 30,
"pages": "3979-3990",
"year": 2018,
"hl": "en",
}
print(get_citation("http://scholar.google.com/scholar_lookup", params=params))