Skip to content

Commit

Permalink
Merge pull request #76 from fhdsl/cansavvy/scholar
Browse files Browse the repository at this point in the history
Retrieving Citations!
  • Loading branch information
cansavvy authored Mar 6, 2024
2 parents 4f0ffa6 + 64dcaa3 commit 243ad96
Show file tree
Hide file tree
Showing 18 changed files with 376 additions and 21 deletions.
5 changes: 4 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@ Imports:
janitor,
stringr,
methods,
magrittr
magrittr,
rvest,
rprojroot,
yaml,
Suggests:
knitr,
rmarkdown,
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ export(delete_creds)
export(example_data_folder)
export(extract_answers)
export(get_calendly_user)
export(get_citation_count)
export(get_config_file)
export(get_example_data)
export(get_ga_metadata)
export(get_ga_properties)
Expand Down Expand Up @@ -38,6 +40,7 @@ export(list_calendly_events)
export(list_example_data)
export(request_ga)
export(request_google_forms)
export(setup_folders)
export(write_playlist_details)
export(write_to_gsheet)
import(dplyr)
Expand Down
39 changes: 39 additions & 0 deletions R/citations.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#' Get a list of papers that cite your paper
#' @description This is a function to retrieve a list of papers that cite your papers
#' @param paper_cite_link This is not a google citation page. 1. Go to: https://scholar.google.com/scholar 2. Search for the paper we are looking for the citation count. 3. Then click the Cited by ___ button below the title of the paper 4. Copy and paste this url and put it in this get_citation_count() function
#' @export
#' @return A list of the example datasets available in this package
#' @examples \dontrun{
#'
#' paper_cite_link <- "https://scholar.google.com/scholar?cites=6140457238337460780"
#'
#' papers_cited_df <- get_citation_count(paper_cite_link)
#' }
get_citation_count <- function(paper_cite_link) {

base_url <- "https://scholar.google.com/scholar?cites="

is_cite_page <- grepl(base_url, paper_cite_link, fixed = TRUE)

if (!is_cite_page) {
stop("This is not a google citation page. \n",
"1. Go to: https://scholar.google.com/scholar \n",
"2. Search for the paper we are looking for the citation count. \n",
"3. Then click the Cited by ___ button below the title of the paper \n",
"4. Copy and paste this url and put it in this get_citation_count() function")
}

response <- httr::GET(paper_cite_link)

titles <- rvest::read_html(httr::content(response, "text")) %>%
rvest::html_nodes('h3') %>%
rvest::html_text()

links <- rvest::read_html(paper_cite_link) %>%
rvest::html_nodes('h3') %>%
rvest::html_nodes("a") %>%
rvest::html_attr("href")

df <- data.frame(titles, links)
return(df)
}
2 changes: 0 additions & 2 deletions R/google-analytics.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,6 @@ get_ga_properties <- function(account_id, token = NULL) {
#' property_info <- get_ga_property_info(property_id = property_id)
#' }
get_ga_property_info <- function(property_id, token = NULL) {

results <- request_ga(
token = token,
url = paste0("https://analyticsadmin.googleapis.com/v1alpha/properties/", property_id),
Expand Down Expand Up @@ -365,7 +364,6 @@ get_multiple_ga_metrics <- function(account_id = NULL, property_ids = NULL, toke


all_metrics <- lapply(stats_type, function(a_stats_type) {

# Now loop through all the properties
per_type <- lapply(property_ids, function(property_id) {
# Be vocal about it
Expand Down
2 changes: 1 addition & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
utils::globalVariables(c(
"result", "num", "test_name", "scopes", "set_token", "browseURL", "remove_token", "get_token", "get_github", "get_calendly", "%>%",
"token", "query_params", "file_name", "accounts", "get_repo_list", "timestamp", "uniques", "req", "cache_secrets_folder"
"token", "query_params", "file_name", "accounts", "get_repo_list", "timestamp", "uniques", "req", "cache_secrets_folder", "google_folder_locations"
))

#' Get list of example datasets
Expand Down
171 changes: 171 additions & 0 deletions R/write-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,174 @@ write_to_gsheet <- function(input, token = NULL, gsheet = NULL, overwrite = FALS

return(gsheet_output)
}

#' Setups folder structure for metricminer
#' @description This is a function to setup a folder structure for metricminer data to be saved to. It depends on and reads
#' Scope used for this function is the `See, edit, create, and delete only the specific Google Drive files you use with this app.`
#' @param config_file The file path to the _config_automation.yml file
#' @param token OAuth token from Google login.
#' @return The googlesheet URL where the data has been written
#' @importFrom googlesheets4 read_sheet sheet_add write_sheet
#' @export
#' @examples \dontrun{
#'
#' authorize("google")
#'
#' setup_folders(
#' config_file = "_config_automation.yml"
#' )
#' }
#'
setup_folders <- function(
config_file = file.path(rprojroot::find_root(rprojroot::has_dir(".git")), "_config_automation.yml"),
token = NULL) {
if (!file.exists(config_file)) stop("This function requires a _config_automation.yml file to run.
Run get_config_file() to get one in your current directory.")

# Read in config file
yaml <- yaml::read_yaml(config_file)
yaml[sapply(yaml, is.null)] <- NA

# Store this sheet_id in the yaml
new_yaml <- readLines(config_file)

data_names <- c(
"calendly",
"cran",
"github",
"ga",
"googleforms",
"slido",
"youtube"
)
refresh_toggle <- c(
yaml$`refresh-calendly`,
yaml$`refresh-cran`,
yaml$`refresh-github`,
yaml$`refresh-ga`,
yaml$`refresh-googleforms`,
yaml$`refresh-slido`,
yaml$`refresh-youtube`
)

googlesheets_ids <- c(
yaml$calendly_googlesheet,
yaml$cran_googlesheet,
yaml$github_googlesheet,
yaml$ga_googlesheet,
yaml$googleforms_googlesheet,
yaml$slido_googlesheet,
yaml$youtube_googlesheet
)

data_df <- data.frame(
data_names,
refresh_toggle,
googlesheets_ids
) %>%
dplyr::filter(refresh_toggle)

# Save locally if we said that
if (yaml$data_dest == "github") {
# if no parent folder exists, make it
if (is.na(yaml$folder_path)) yaml$folder_path <- "metricminer_data"

# Now go through and make the data specific folders
sapply(file.path(yaml$folder_path, data_names),
dir.create,
showWarnings = FALSE,
recursive = TRUE
)
}

if (yaml$data_dest == "google") {
# if no parent folder exists, make it
folder_id <- googledrive::drive_find(yaml$folder_path, type = "folder")$id
if (length(folder_id) < 1) googledrive::drive_mkdir(yaml$folder_path, overwrite = FALSE)

# Save the folder id in the new yaml
new_yaml <- stringr::str_replace(
new_yaml,
paste0("^folder_path:$"),
paste0("folder_path: ", folder_id)
)
writeLines(new_yaml, config_file)

folder_locations <- sapply(data_df$data_names, function(data_name) {
folder_id <- googledrive::drive_mkdir(data_name, path = folder_id)
return(folder_id$id)
})

data_df$google_folder_locations <- folder_locations

if (any(!is.null(googlesheets_ids))) {
# Make a new sheet since there isn't one

no_sheet <- data_df %>%
dplyr::filter(is.na(googlesheets_ids)) %>%
dplyr::select(
data_names,
google_folder_locations
)

purrr::map(no_sheet, function(data_names, google_folder_locations) {
sheet_id <- googlesheets4::gs4_create(data_names)

googledrive::drive_mv(
file = googledrive::as_id(sheet_id),
path = googledrive::as_id(google_folder_locations)
)

new_yaml <- stringr::str_replace(
yaml,
paste0("^", data_names, "_googlesheet:$"),
paste0(google_entry, ": ", googledrive::as_id(sheet_id))
)
})

writeLines(new_yaml, config_file)

# Reread it back in
yaml <- yaml::read_yaml(config_file)
} else {
sheet_exists <- data_df %>%
dplyr::filter(is.na(googlesheets_ids)) %>%
dplyr::select(googlesheets_ids, data_names)

# Check that the sheets given exist
purrr::pmap(sheet_exists, function(googlesheets_ids, data_names) {

gsheet_test <- try(
suppressMessages(
googlesheets4::read_sheet(googlesheets_ids, range = "A1:F20", sheet = 1)
),
silent = TRUE
)
if (class(gsheet_test)[1] == "try-error") {
stop(paste0("Can't find the provided gsheet check your the '",
data_names, "_googlesheet:' in your _config_automation.yml file"))
}
})
}
}
}


#' Get config file
#' @description Get the _config_automation.yml file to set up a metricminer repo
#' @param overwrite Should a _config_automation.yml file in the current directory be overwritten? Default is false.
#' @export
#' @return Copies a config_automation.yml file to your current working directory
get_config_file <- function(overwrite = FALSE) {
config_file <- list.files(
pattern = "_config_automation.yml",
recursive = TRUE,
system.file("extdata", package = "metricminer"),
full.names = TRUE
)
file.copy(
from = config_file,
to = file.path("_config_automation.yml"),
overwrite = overwrite
)
}
55 changes: 55 additions & 0 deletions inst/extdata/_config_automation.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
##### Checks run at pull request #####
# Check that urls in the content are not broken
url-checker: yes
# Render preview of content with changes (Rmd's and md's are checked)
render-preview: yes
# Spell check Rmds and quizzes
spell-check: yes
# Style any R code
style-code: yes

############ Metric Collection Destination ##############
### Options are "github" or "google"
data_dest: github
folder_path:

###### Calendly ######
refresh-calendly: yes
calendly_googlesheet:

###### CRAN ######
refresh-cran: yes
cran_packages: [ metricminer, ottrpal ]
cran_googlesheet:

###### GitHub ######
refresh-github: yes
github_repos: [ fhdsl/metricminer, fhdsl/metricminer.org ]
github_googlesheet:

###### Google Analytics ######
refresh-ga: yes
ga_property_ids: [ 422671031, 422558989 ]
ga_googlesheet:

###### Google Forms ######
refresh-googleforms: yes
google_forms: [
1pbFfgUPYH2w9zEoCDjCa4HFOxzEhGOseufw28Xxmd-o,
1JjmsiuVoGSxvl-1M_oWittcftO955tijzeNc-cgJlo8 ]
googleforms_googlesheet:

###### Slido ######
refresh-slido: yes
drive_id: 1XWXHHyj32Uw_UyaUJrqp6S--hHnM0-7l
slido_googlesheet:

###### YouTube ######
refresh-youtube: yes
video_ids: [ XN_QPRrJZAw, YkYnni-WuaQ ]
youtube_googlesheet:

################## DOCKER #######################
# What docker image should be used for rendering?
# The default is cansav09/metricminer:main
rendering-docker-image: 'cansav09/metricminer:main'
25 changes: 25 additions & 0 deletions man/get_citation_count.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions man/get_config_file.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions man/get_ga_metadata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 1 addition & 3 deletions man/get_ga_properties.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 243ad96

Please sign in to comment.