diff --git a/wikipedia/wikipedia_scratcher.py b/wikipedia/wikipedia_scratcher.py index af4a0b2..519e996 100755 --- a/wikipedia/wikipedia_scratcher.py +++ b/wikipedia/wikipedia_scratcher.py @@ -23,7 +23,8 @@ def get_wiki_langs(): - """Provides the list of language to find Creative Commons usage data on. + """ + Provides the list of language to find Creative Commons usage data on. The codes represent the language codes defined by ISO 639-1 and ISO 639-3, and the decision of which language code to use is usually determined by the @@ -31,24 +32,23 @@ def get_wiki_langs(): (https://en.wikipedia.org/wiki/List_of_Wikipedias#Wikipedia_edition_codes) Returns: - pd.DataFrame: A Dataframe containing information of each Wikipedia - language and its respective encoding on web address. + - pd.DataFrame: A Dataframe containing information of each Wikipedia + language and its respective encoding on web address. """ return pd.read_csv(f"{CWD}/language-codes_csv.csv") def get_request_url(lang="en"): - """Provides the API Endpoint URL for specified parameter combinations. + """ + Provides the API Endpoint URL for specified parameter combinations. Args: - lang: - A string representing the language that the search results are - presented in. Alternatively, the default value is by Wikipedia - customs "en". + - lang: A string representing the language that the search results are + presented in. Alternatively, the default value is by Wikipedia customs "en" Returns: - string: A string representing the API Endpoint URL for the query - specified by this function's parameters. + - string: A string representing the API Endpoint URL for the query + specified by this function's parameters. """ base_url = ( r"wikipedia.org/w/api.php?action=query&meta=siteinfo&siprop=statistics" @@ -59,17 +59,16 @@ def get_request_url(lang="en"): def get_response_elems(language="en"): - """Provides the metadata for query of specified parameters + """ + Provides the metadata for query of specified parameters Args: - language: - A string representing the language that the search results are - presented in. Alternatively, the default value is by Wikipedia - customs "en". + - language: A string representing the language that the search results are + presented in. Alternatively, the default value is by Wikipedia customs "en" Returns: - dict: A dictionary mapping metadata to its value provided from the API - query of specified parameters. + - dict: A dictionary mapping metadata to its value provided from the API + query of specified parameters. """ search_data = None try: @@ -114,10 +113,8 @@ def record_lang_data(lang="en"): """Writes the row for LICENSE_TYPE to file to contain Google Query data. Args: - lang: - A string representing the language that the search results are - presented in. Alternatively, the default value is by Wikipedia - customs "en". + - lang: A string representing the language that the search results are + presented in. Alternatively, the default value is by Wikipedia customs "en" """ response = get_response_elems(lang) if response != {}: @@ -141,8 +138,8 @@ def get_current_data(): Wikipedia texts are licensed under CC-BY-SA 3.0 Returns: - pd.DataFrame: A DataFrame recording the number of CC-licensed documents - per search query of assumption. + - pd.DataFrame: A DataFrame recording the number of CC-licensed documents + per search query of assumption. """ return pd.read_csv(DATA_WRITE_FILE).set_index("language")