Skip to content

Commit

Permalink
Add comments in wikipedia/wikipedia_scratcher.py
Browse files Browse the repository at this point in the history
Signed-off-by: Priyanshi Gaur <[email protected]>
  • Loading branch information
nox1134 committed Mar 12, 2024
1 parent d5fa841 commit 43973b1
Showing 1 changed file with 20 additions and 23 deletions.
43 changes: 20 additions & 23 deletions wikipedia/wikipedia_scratcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,32 +23,32 @@


def get_wiki_langs():
"""Provides the list of language to find Creative Commons usage data on.
"""
Provides the list of language to find Creative Commons usage data on.
The codes represent the language codes defined by ISO 639-1 and ISO 639-3,
and the decision of which language code to use is usually determined by the
IETF language tag policy.
(https://en.wikipedia.org/wiki/List_of_Wikipedias#Wikipedia_edition_codes)
Returns:
pd.DataFrame: A Dataframe containing information of each Wikipedia
language and its respective encoding on web address.
- pd.DataFrame: A Dataframe containing information of each Wikipedia
language and its respective encoding on web address.
"""
return pd.read_csv(f"{CWD}/language-codes_csv.csv")


def get_request_url(lang="en"):
"""Provides the API Endpoint URL for specified parameter combinations.
"""
Provides the API Endpoint URL for specified parameter combinations.
Args:
lang:
A string representing the language that the search results are
presented in. Alternatively, the default value is by Wikipedia
customs "en".
- lang: A string representing the language that the search results are
presented in. Alternatively, the default value is by Wikipedia customs "en"
Returns:
string: A string representing the API Endpoint URL for the query
specified by this function's parameters.
- string: A string representing the API Endpoint URL for the query
specified by this function's parameters.
"""
base_url = (
r"wikipedia.org/w/api.php?action=query&meta=siteinfo&siprop=statistics"
Expand All @@ -59,17 +59,16 @@ def get_request_url(lang="en"):


def get_response_elems(language="en"):
"""Provides the metadata for query of specified parameters
"""
Provides the metadata for query of specified parameters
Args:
language:
A string representing the language that the search results are
presented in. Alternatively, the default value is by Wikipedia
customs "en".
- language: A string representing the language that the search results are
presented in. Alternatively, the default value is by Wikipedia customs "en"
Returns:
dict: A dictionary mapping metadata to its value provided from the API
query of specified parameters.
- dict: A dictionary mapping metadata to its value provided from the API
query of specified parameters.
"""
search_data = None
try:
Expand Down Expand Up @@ -114,10 +113,8 @@ def record_lang_data(lang="en"):
"""Writes the row for LICENSE_TYPE to file to contain Google Query data.
Args:
lang:
A string representing the language that the search results are
presented in. Alternatively, the default value is by Wikipedia
customs "en".
- lang: A string representing the language that the search results are
presented in. Alternatively, the default value is by Wikipedia customs "en"
"""
response = get_response_elems(lang)
if response != {}:
Expand All @@ -141,8 +138,8 @@ def get_current_data():
Wikipedia texts are licensed under CC-BY-SA 3.0
Returns:
pd.DataFrame: A DataFrame recording the number of CC-licensed documents
per search query of assumption.
- pd.DataFrame: A DataFrame recording the number of CC-licensed documents
per search query of assumption.
"""
return pd.read_csv(DATA_WRITE_FILE).set_index("language")

Expand Down

0 comments on commit 43973b1

Please sign in to comment.