From 3e5e9a38fcafa08cbef929bc8f12544b7800100a Mon Sep 17 00:00:00 2001
From: boyun <boyoon54@gmail.com>
Date: Wed, 11 Nov 2020 23:50:29 +0900
Subject: [PATCH 01/12] add crawl_google_scholar_citation.py

---
 .../crawl_google_scholar_citation.py          | 64 +++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 web_programming/crawl_google_scholar_citation.py

diff --git a/web_programming/crawl_google_scholar_citation.py b/web_programming/crawl_google_scholar_citation.py
new file mode 100644
index 000000000000..e639741d532c
--- /dev/null
+++ b/web_programming/crawl_google_scholar_citation.py
@@ -0,0 +1,64 @@
+
+"""
+Get the citation from google scholar
+using title and year of publication, and volume and pages of journal.
+"""
+
+from bs4 import BeautifulSoup
+import requests
+import re
+
+
+def create_url(title: str, journal: str, volume: str, page: str, year: str) -> str:
+    """
+    Return the url.
+    """
+    url = f"http://scholar.google.com/scholar_lookup?hl=en&title={title}&journal={journal}&volume={volume}&pages={page}&publication_year={year}"
+    url = remove_tag(url)
+    return url.replace(" ", "%")
+
+
+def remove_tag(url: str) -> str:
+    """
+    Return the url removed the html tags.
+    """
+    tag = re.compile('<.*?>')
+    clean_url = re.sub(tag, '', url)
+    return clean_url
+
+
+def get_citation(url: str) -> str:
+    """
+    Return the citation number.
+    """
+    url = requests.get(url).text
+    soup = BeautifulSoup(url, "html.parser")
+    get_div = soup.find(u"div", attrs={u"class": u"gs_ri"})
+    get_a_tag = get_div.find(u"div", attrs={u"class": u"gs_fl"}).findAll('a')
+    citation = get_a_tag[2].get_text()
+    if 'Cited' not in citation:
+        citation = 'Cited by 0'
+
+    return citation.replace("Cited by ", "")
+
+
+if __name__ == '__main__':
+    """
+    You have to fill following values: title, journal_name, volume, page, year. 
+    
+    For example,
+    title = "Precisely geometry controlled microsupercapacitors for ultrahigh areal capacitance, volumetric capacitance, and energy density"
+    journal_name = "Chem. Mater"
+    volume = "30"
+    page = "3979-3990"
+    year = "2018"
+    """
+    title = ""
+    journal_name = ""
+    volume = ""
+    page = ""
+    year = ""
+
+    citation = get_citation(create_url(title, journal_name, volume, page, year))
+    print(citation)
+

From 47efd43125e7efbd790b645ef1aaf9a938209421 Mon Sep 17 00:00:00 2001
From: boyun <boyoon54@gmail.com>
Date: Thu, 12 Nov 2020 00:07:10 +0900
Subject: [PATCH 02/12] pass flack8

---
 .../crawl_google_scholar_citation.py           | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/web_programming/crawl_google_scholar_citation.py b/web_programming/crawl_google_scholar_citation.py
index e639741d532c..1fb52efa5dd7 100644
--- a/web_programming/crawl_google_scholar_citation.py
+++ b/web_programming/crawl_google_scholar_citation.py
@@ -13,13 +13,19 @@ def create_url(title: str, journal: str, volume: str, page: str, year: str) -> s
     """
     Return the url.
     """
-    url = f"http://scholar.google.com/scholar_lookup?hl=en&title={title}&journal={journal}&volume={volume}&pages={page}&publication_year={year}"
+    url = f"http://scholar.google.com/scholar_lookup?hl=en&" \
+          f"title={title}" \
+          f"&journal={journal}" \
+          f"&volume={volume}" \
+          f"&pages={page}" \
+          f"&publication_year={year}"
     url = remove_tag(url)
     return url.replace(" ", "%")
 
 
 def remove_tag(url: str) -> str:
     """
+    Remove the html tags in 'url'.
     Return the url removed the html tags.
     """
     tag = re.compile('<.*?>')
@@ -44,14 +50,13 @@ def get_citation(url: str) -> str:
 
 if __name__ == '__main__':
     """
-    You have to fill following values: title, journal_name, volume, page, year. 
-    
+    You have to fill following values: title, journal_name, volume, page, year.
     For example,
-    title = "Precisely geometry controlled microsupercapacitors for ultrahigh areal capacitance, volumetric capacitance, and energy density"
-    journal_name = "Chem. Mater"
+    title = "abcde"
+    journal_name = "fgh"
     volume = "30"
     page = "3979-3990"
-    year = "2018"
+    year = "2020"
     """
     title = ""
     journal_name = ""
@@ -61,4 +66,3 @@ def get_citation(url: str) -> str:
 
     citation = get_citation(create_url(title, journal_name, volume, page, year))
     print(citation)
-

From e08b235bbe089027f9ee16901ae0e44b83036c05 Mon Sep 17 00:00:00 2001
From: boyun <boyoon54@gmail.com>
Date: Thu, 12 Nov 2020 00:28:42 +0900
Subject: [PATCH 03/12] pass isort

---
 web_programming/crawl_google_results.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/web_programming/crawl_google_results.py b/web_programming/crawl_google_results.py
index a33a3f3bbe5c..caacea52c3e5 100644
--- a/web_programming/crawl_google_results.py
+++ b/web_programming/crawl_google_results.py
@@ -3,6 +3,7 @@
 
 import requests
 from bs4 import BeautifulSoup
+
 from fake_useragent import UserAgent
 
 if __name__ == "__main__":

From a614c9d4b823c6caa73df8d17fdd17df68d6da6a Mon Sep 17 00:00:00 2001
From: boyun <boyoon54@gmail.com>
Date: Thu, 12 Nov 2020 00:33:33 +0900
Subject: [PATCH 04/12] pass isort

---
 .../crawl_google_scholar_citation.py          | 36 ++++++++++---------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/web_programming/crawl_google_scholar_citation.py b/web_programming/crawl_google_scholar_citation.py
index 1fb52efa5dd7..a45a20610a43 100644
--- a/web_programming/crawl_google_scholar_citation.py
+++ b/web_programming/crawl_google_scholar_citation.py
@@ -1,24 +1,26 @@
-
 """
 Get the citation from google scholar
 using title and year of publication, and volume and pages of journal.
 """
 
-from bs4 import BeautifulSoup
-import requests
 import re
 
+import requests
+from bs4 import BeautifulSoup
+
 
 def create_url(title: str, journal: str, volume: str, page: str, year: str) -> str:
     """
     Return the url.
     """
-    url = f"http://scholar.google.com/scholar_lookup?hl=en&" \
-          f"title={title}" \
-          f"&journal={journal}" \
-          f"&volume={volume}" \
-          f"&pages={page}" \
-          f"&publication_year={year}"
+    url = (
+        f"http://scholar.google.com/scholar_lookup?hl=en&"
+        f"title={title}"
+        f"&journal={journal}"
+        f"&volume={volume}"
+        f"&pages={page}"
+        f"&publication_year={year}"
+    )
     url = remove_tag(url)
     return url.replace(" ", "%")
 
@@ -28,8 +30,8 @@ def remove_tag(url: str) -> str:
     Remove the html tags in 'url'.
     Return the url removed the html tags.
     """
-    tag = re.compile('<.*?>')
-    clean_url = re.sub(tag, '', url)
+    tag = re.compile("<.*?>")
+    clean_url = re.sub(tag, "", url)
     return clean_url
 
 
@@ -39,20 +41,20 @@ def get_citation(url: str) -> str:
     """
     url = requests.get(url).text
     soup = BeautifulSoup(url, "html.parser")
-    get_div = soup.find(u"div", attrs={u"class": u"gs_ri"})
-    get_a_tag = get_div.find(u"div", attrs={u"class": u"gs_fl"}).findAll('a')
+    get_div = soup.find("div", attrs={"class": "gs_ri"})
+    get_a_tag = get_div.find("div", attrs={"class": "gs_fl"}).findAll("a")
     citation = get_a_tag[2].get_text()
-    if 'Cited' not in citation:
-        citation = 'Cited by 0'
+    if "Cited" not in citation:
+        citation = "Cited by 0"
 
     return citation.replace("Cited by ", "")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     """
     You have to fill following values: title, journal_name, volume, page, year.
     For example,
-    title = "abcde"
+    title = "abc de"
     journal_name = "fgh"
     volume = "30"
     page = "3979-3990"

From 0ee618ff24bb47831ae8caa70236e2a66f06bd27 Mon Sep 17 00:00:00 2001
From: boyun <boyoon54@gmail.com>
Date: Thu, 12 Nov 2020 09:58:28 +0900
Subject: [PATCH 05/12] change comment in main

---
 web_programming/crawl_google_scholar_citation.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/web_programming/crawl_google_scholar_citation.py b/web_programming/crawl_google_scholar_citation.py
index a45a20610a43..ba11875a6f77 100644
--- a/web_programming/crawl_google_scholar_citation.py
+++ b/web_programming/crawl_google_scholar_citation.py
@@ -53,12 +53,6 @@ def get_citation(url: str) -> str:
 if __name__ == "__main__":
     """
     You have to fill following values: title, journal_name, volume, page, year.
-    For example,
-    title = "abc de"
-    journal_name = "fgh"
-    volume = "30"
-    page = "3979-3990"
-    year = "2020"
     """
     title = ""
     journal_name = ""

From 14c55a795d8a3b75fc1063514e7c757932098457 Mon Sep 17 00:00:00 2001
From: boyun <boyoon54@gmail.com>
Date: Fri, 13 Nov 2020 19:39:27 +0900
Subject: [PATCH 06/12] modify main code

---
 web_programming/crawl_google_scholar_citation.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/web_programming/crawl_google_scholar_citation.py b/web_programming/crawl_google_scholar_citation.py
index ba11875a6f77..1e582a284c31 100644
--- a/web_programming/crawl_google_scholar_citation.py
+++ b/web_programming/crawl_google_scholar_citation.py
@@ -54,11 +54,15 @@ def get_citation(url: str) -> str:
     """
     You have to fill following values: title, journal_name, volume, page, year.
     """
-    title = ""
-    journal_name = ""
-    volume = ""
-    page = ""
-    year = ""
+    title = (
+        "Precisely geometry controlled microsupercapacitors"
+        " for ultrahigh areal capacitance,"
+        " volumetric capacitance, and energy density"
+    )
+    journal_name = "Chem. Mater."
+    volume = "30"
+    page = "3979-3990"
+    year = "2018"
 
     citation = get_citation(create_url(title, journal_name, volume, page, year))
     print(citation)

From a97c312b086976c9e6b1a60308b58f4b08aa2859 Mon Sep 17 00:00:00 2001
From: boyun <boyoon54@gmail.com>
Date: Fri, 13 Nov 2020 19:55:29 +0900
Subject: [PATCH 07/12] delete file

---
 web_programming/crawl_google_results.py | 25 -------------------------
 1 file changed, 25 deletions(-)
 delete mode 100644 web_programming/crawl_google_results.py

diff --git a/web_programming/crawl_google_results.py b/web_programming/crawl_google_results.py
deleted file mode 100644
index caacea52c3e5..000000000000
--- a/web_programming/crawl_google_results.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import sys
-import webbrowser
-
-import requests
-from bs4 import BeautifulSoup
-
-from fake_useragent import UserAgent
-
-if __name__ == "__main__":
-    print("Googling.....")
-    url = "https://www.google.com/search?q=" + " ".join(sys.argv[1:])
-    res = requests.get(url, headers={"UserAgent": UserAgent().random})
-    # res.raise_for_status()
-    with open("project1a.html", "wb") as out_file:  # only for knowing the class
-        for data in res.iter_content(10000):
-            out_file.write(data)
-    soup = BeautifulSoup(res.text, "html.parser")
-    links = list(soup.select(".eZt8xd"))[:5]
-
-    print(len(links))
-    for link in links:
-        if link.text == "Maps":
-            webbrowser.open(link.get("href"))
-        else:
-            webbrowser.open(f"http://google.com{link.get('href')}")

From 6c3d24d3880497471211b184c74c8b18a5f3ec8c Mon Sep 17 00:00:00 2001
From: boyun <boyoon54@gmail.com>
Date: Fri, 13 Nov 2020 20:35:42 +0900
Subject: [PATCH 08/12] change how to build url

---
 .../crawl_google_scholar_citation.py          | 61 +++++--------------
 1 file changed, 15 insertions(+), 46 deletions(-)

diff --git a/web_programming/crawl_google_scholar_citation.py b/web_programming/crawl_google_scholar_citation.py
index 1e582a284c31..22c73f20cc99 100644
--- a/web_programming/crawl_google_scholar_citation.py
+++ b/web_programming/crawl_google_scholar_citation.py
@@ -3,44 +3,15 @@
 using title and year of publication, and volume and pages of journal.
 """
 
-import re
-
 import requests
 from bs4 import BeautifulSoup
 
 
-def create_url(title: str, journal: str, volume: str, page: str, year: str) -> str:
-    """
-    Return the url.
-    """
-    url = (
-        f"http://scholar.google.com/scholar_lookup?hl=en&"
-        f"title={title}"
-        f"&journal={journal}"
-        f"&volume={volume}"
-        f"&pages={page}"
-        f"&publication_year={year}"
-    )
-    url = remove_tag(url)
-    return url.replace(" ", "%")
-
-
-def remove_tag(url: str) -> str:
-    """
-    Remove the html tags in 'url'.
-    Return the url removed the html tags.
-    """
-    tag = re.compile("<.*?>")
-    clean_url = re.sub(tag, "", url)
-    return clean_url
-
-
-def get_citation(url: str) -> str:
+def get_citation(base_url: str, params: dict) -> str:
     """
     Return the citation number.
     """
-    url = requests.get(url).text
-    soup = BeautifulSoup(url, "html.parser")
+    soup = BeautifulSoup(requests.get(base_url, params=params).content, "html.parser")
     get_div = soup.find("div", attrs={"class": "gs_ri"})
     get_a_tag = get_div.find("div", attrs={"class": "gs_fl"}).findAll("a")
     citation = get_a_tag[2].get_text()
@@ -51,18 +22,16 @@ def get_citation(url: str) -> str:
 
 
 if __name__ == "__main__":
-    """
-    You have to fill following values: title, journal_name, volume, page, year.
-    """
-    title = (
-        "Precisely geometry controlled microsupercapacitors"
-        " for ultrahigh areal capacitance,"
-        " volumetric capacitance, and energy density"
-    )
-    journal_name = "Chem. Mater."
-    volume = "30"
-    page = "3979-3990"
-    year = "2018"
-
-    citation = get_citation(create_url(title, journal_name, volume, page, year))
-    print(citation)
+    params = {
+        "title": (
+            "Precisely geometry controlled microsupercapacitors"
+            " for ultrahigh areal capacitance,"
+            " volumetric capacitance, and energy density"
+        ),
+        "journal_name": "Chem. Mater.",
+        "volume": "30",
+        "page": "3979-3990",
+        "year": "2018",
+    }
+
+    print(get_citation("http://scholar.google.com/scholar_lookup?hl=en&", params=params))

From 3ee5392092162d3733f59eea8dbc976c80349042 Mon Sep 17 00:00:00 2001
From: boyun <boyoon54@gmail.com>
Date: Fri, 13 Nov 2020 21:49:45 +0900
Subject: [PATCH 09/12] add a key 'hl' in params dict

---
 web_programming/crawl_google_scholar_citation.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/web_programming/crawl_google_scholar_citation.py b/web_programming/crawl_google_scholar_citation.py
index 22c73f20cc99..4a96c9c7154e 100644
--- a/web_programming/crawl_google_scholar_citation.py
+++ b/web_programming/crawl_google_scholar_citation.py
@@ -15,10 +15,8 @@ def get_citation(base_url: str, params: dict) -> str:
     get_div = soup.find("div", attrs={"class": "gs_ri"})
     get_a_tag = get_div.find("div", attrs={"class": "gs_fl"}).findAll("a")
     citation = get_a_tag[2].get_text()
-    if "Cited" not in citation:
-        citation = "Cited by 0"
 
-    return citation.replace("Cited by ", "")
+    return citation
 
 
 if __name__ == "__main__":
@@ -28,10 +26,12 @@ def get_citation(base_url: str, params: dict) -> str:
             " for ultrahigh areal capacitance,"
             " volumetric capacitance, and energy density"
         ),
-        "journal_name": "Chem. Mater.",
+        "journal": "Chem. Mater.",
         "volume": "30",
-        "page": "3979-3990",
+        "pages": "3979-3990",
         "year": "2018",
+        "hl": "en"
+        ,
     }
 
-    print(get_citation("http://scholar.google.com/scholar_lookup?hl=en&", params=params))
+    print(get_citation("http://scholar.google.com/scholar_lookup", params=params))

From 7cf27f6c5d3d263188fd52dc6c418528b173b91c Mon Sep 17 00:00:00 2001
From: Christian Clauss <cclauss@me.com>
Date: Fri, 13 Nov 2020 14:43:57 +0100
Subject: [PATCH 10/12] Update crawl_google_scholar_citation.py

---
 .../crawl_google_scholar_citation.py          | 21 +++++++------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/web_programming/crawl_google_scholar_citation.py b/web_programming/crawl_google_scholar_citation.py
index 4a96c9c7154e..d023380c0818 100644
--- a/web_programming/crawl_google_scholar_citation.py
+++ b/web_programming/crawl_google_scholar_citation.py
@@ -12,26 +12,21 @@ def get_citation(base_url: str, params: dict) -> str:
     Return the citation number.
     """
     soup = BeautifulSoup(requests.get(base_url, params=params).content, "html.parser")
-    get_div = soup.find("div", attrs={"class": "gs_ri"})
-    get_a_tag = get_div.find("div", attrs={"class": "gs_fl"}).findAll("a")
-    citation = get_a_tag[2].get_text()
-
-    return citation
+    div = soup.find("div", attrs={"class": "gs_ri"})
+    anchors = div.find("div", attrs={"class": "gs_fl"}).find_all("a")
+    return anchors[2].get_text()
 
 
 if __name__ == "__main__":
     params = {
         "title": (
-            "Precisely geometry controlled microsupercapacitors"
-            " for ultrahigh areal capacitance,"
-            " volumetric capacitance, and energy density"
+            "Precisely geometry controlled microsupercapacitors for ultrahigh areal "
+            "capacitance, volumetric capacitance, and energy density"
         ),
         "journal": "Chem. Mater.",
-        "volume": "30",
+        "volume": 30,
         "pages": "3979-3990",
-        "year": "2018",
-        "hl": "en"
-        ,
+        "year": 2018,
+        "hl": "en",
     }
-
     print(get_citation("http://scholar.google.com/scholar_lookup", params=params))

From d0b60d276d144b78fce183506acb0c481d010352 Mon Sep 17 00:00:00 2001
From: Christian Clauss <cclauss@me.com>
Date: Fri, 13 Nov 2020 14:46:50 +0100
Subject: [PATCH 11/12] Create crawl_google_results.py

---
 web_programming/crawl_google_results.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 web_programming/crawl_google_results.py

diff --git a/web_programming/crawl_google_results.py b/web_programming/crawl_google_results.py
new file mode 100644
index 000000000000..a33a3f3bbe5c
--- /dev/null
+++ b/web_programming/crawl_google_results.py
@@ -0,0 +1,24 @@
+import sys
+import webbrowser
+
+import requests
+from bs4 import BeautifulSoup
+from fake_useragent import UserAgent
+
+if __name__ == "__main__":
+    print("Googling.....")
+    url = "https://www.google.com/search?q=" + " ".join(sys.argv[1:])
+    res = requests.get(url, headers={"UserAgent": UserAgent().random})
+    # res.raise_for_status()
+    with open("project1a.html", "wb") as out_file:  # only for knowing the class
+        for data in res.iter_content(10000):
+            out_file.write(data)
+    soup = BeautifulSoup(res.text, "html.parser")
+    links = list(soup.select(".eZt8xd"))[:5]
+
+    print(len(links))
+    for link in links:
+        if link.text == "Maps":
+            webbrowser.open(link.get("href"))
+        else:
+            webbrowser.open(f"http://google.com{link.get('href')}")

From 0a206eca1bb6d0a7e58e10839bdc4e468929d816 Mon Sep 17 00:00:00 2001
From: Christian Clauss <cclauss@me.com>
Date: Fri, 13 Nov 2020 14:52:17 +0100
Subject: [PATCH 12/12] codespell: Mater

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 01da6cad0335..a3288e1c5eef 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -42,7 +42,7 @@ repos:
     hooks:
       - id: codespell
         args:
-          - --ignore-words-list=ans,fo,followings,hist,iff,secant,som,tim
+          - --ignore-words-list=ans,fo,followings,hist,iff,mater,secant,som,tim
           - --skip="./.*,./other/dictionary.txt,./other/words,./project_euler/problem_022/p022_names.txt"
           - --quiet-level=2
         exclude: |