Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Retrieving Citations! #76

Merged
merged 8 commits into from
Mar 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@ Imports:
janitor,
stringr,
methods,
magrittr
magrittr,
rvest,
rprojroot,
yaml,
Suggests:
knitr,
rmarkdown,
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ export(delete_creds)
export(example_data_folder)
export(extract_answers)
export(get_calendly_user)
export(get_citation_count)
export(get_config_file)
export(get_example_data)
export(get_ga_metadata)
export(get_ga_properties)
Expand Down Expand Up @@ -38,6 +40,7 @@ export(list_calendly_events)
export(list_example_data)
export(request_ga)
export(request_google_forms)
export(setup_folders)
export(write_playlist_details)
export(write_to_gsheet)
import(dplyr)
Expand Down
39 changes: 39 additions & 0 deletions R/citations.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#' Get a list of papers that cite your paper
#' @description This is a function to retrieve a list of papers that cite your papers
#' @param paper_cite_link This is not a google citation page. 1. Go to: https://scholar.google.com/scholar 2. Search for the paper we are looking for the citation count. 3. Then click the Cited by ___ button below the title of the paper 4. Copy and paste this url and put it in this get_citation_count() function
#' @export
#' @return A list of the example datasets available in this package
#' @examples \dontrun{
#'
#' paper_cite_link <- "https://scholar.google.com/scholar?cites=6140457238337460780"
#'
#' papers_cited_df <- get_citation_count(paper_cite_link)
#' }
get_citation_count <- function(paper_cite_link) {

base_url <- "https://scholar.google.com/scholar?cites="

is_cite_page <- grepl(base_url, paper_cite_link, fixed = TRUE)

if (!is_cite_page) {
stop("This is not a google citation page. \n",
"1. Go to: https://scholar.google.com/scholar \n",
"2. Search for the paper we are looking for the citation count. \n",
"3. Then click the Cited by ___ button below the title of the paper \n",
"4. Copy and paste this url and put it in this get_citation_count() function")
}

response <- httr::GET(paper_cite_link)

titles <- rvest::read_html(httr::content(response, "text")) %>%
rvest::html_nodes('h3') %>%
rvest::html_text()

links <- rvest::read_html(paper_cite_link) %>%
rvest::html_nodes('h3') %>%
rvest::html_nodes("a") %>%
rvest::html_attr("href")

df <- data.frame(titles, links)
return(df)
}
2 changes: 0 additions & 2 deletions R/google-analytics.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,6 @@ get_ga_properties <- function(account_id, token = NULL) {
#' property_info <- get_ga_property_info(property_id = property_id)
#' }
get_ga_property_info <- function(property_id, token = NULL) {

results <- request_ga(
token = token,
url = paste0("https://analyticsadmin.googleapis.com/v1alpha/properties/", property_id),
Expand Down Expand Up @@ -365,7 +364,6 @@ get_multiple_ga_metrics <- function(account_id = NULL, property_ids = NULL, toke


all_metrics <- lapply(stats_type, function(a_stats_type) {

# Now loop through all the properties
per_type <- lapply(property_ids, function(property_id) {
# Be vocal about it
Expand Down
2 changes: 1 addition & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
utils::globalVariables(c(
"result", "num", "test_name", "scopes", "set_token", "browseURL", "remove_token", "get_token", "get_github", "get_calendly", "%>%",
"token", "query_params", "file_name", "accounts", "get_repo_list", "timestamp", "uniques", "req", "cache_secrets_folder"
"token", "query_params", "file_name", "accounts", "get_repo_list", "timestamp", "uniques", "req", "cache_secrets_folder", "google_folder_locations"
))

#' Get list of example datasets
Expand Down
171 changes: 171 additions & 0 deletions R/write-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,174 @@ write_to_gsheet <- function(input, token = NULL, gsheet = NULL, overwrite = FALS

return(gsheet_output)
}

#' Setups folder structure for metricminer
#' @description This is a function to setup a folder structure for metricminer data to be saved to. It depends on and reads
#' Scope used for this function is the `See, edit, create, and delete only the specific Google Drive files you use with this app.`
#' @param config_file The file path to the _config_automation.yml file
#' @param token OAuth token from Google login.
#' @return The googlesheet URL where the data has been written
#' @importFrom googlesheets4 read_sheet sheet_add write_sheet
#' @export
#' @examples \dontrun{
#'
#' authorize("google")
#'
#' setup_folders(
#' config_file = "_config_automation.yml"
#' )
#' }
#'
setup_folders <- function(
config_file = file.path(rprojroot::find_root(rprojroot::has_dir(".git")), "_config_automation.yml"),
token = NULL) {
if (!file.exists(config_file)) stop("This function requires a _config_automation.yml file to run.
Run get_config_file() to get one in your current directory.")

# Read in config file
yaml <- yaml::read_yaml(config_file)
yaml[sapply(yaml, is.null)] <- NA

# Store this sheet_id in the yaml
new_yaml <- readLines(config_file)

data_names <- c(
"calendly",
"cran",
"github",
"ga",
"googleforms",
"slido",
"youtube"
)
refresh_toggle <- c(
yaml$`refresh-calendly`,
yaml$`refresh-cran`,
yaml$`refresh-github`,
yaml$`refresh-ga`,
yaml$`refresh-googleforms`,
yaml$`refresh-slido`,
yaml$`refresh-youtube`
)

googlesheets_ids <- c(
yaml$calendly_googlesheet,
yaml$cran_googlesheet,
yaml$github_googlesheet,
yaml$ga_googlesheet,
yaml$googleforms_googlesheet,
yaml$slido_googlesheet,
yaml$youtube_googlesheet
)

data_df <- data.frame(
data_names,
refresh_toggle,
googlesheets_ids
) %>%
dplyr::filter(refresh_toggle)

# Save locally if we said that
if (yaml$data_dest == "github") {
# if no parent folder exists, make it
if (is.na(yaml$folder_path)) yaml$folder_path <- "metricminer_data"

# Now go through and make the data specific folders
sapply(file.path(yaml$folder_path, data_names),
dir.create,
showWarnings = FALSE,
recursive = TRUE
)
}

if (yaml$data_dest == "google") {
# if no parent folder exists, make it
folder_id <- googledrive::drive_find(yaml$folder_path, type = "folder")$id
if (length(folder_id) < 1) googledrive::drive_mkdir(yaml$folder_path, overwrite = FALSE)

# Save the folder id in the new yaml
new_yaml <- stringr::str_replace(
new_yaml,
paste0("^folder_path:$"),
paste0("folder_path: ", folder_id)
)
writeLines(new_yaml, config_file)

folder_locations <- sapply(data_df$data_names, function(data_name) {
folder_id <- googledrive::drive_mkdir(data_name, path = folder_id)
return(folder_id$id)
})

data_df$google_folder_locations <- folder_locations

if (any(!is.null(googlesheets_ids))) {
# Make a new sheet since there isn't one

no_sheet <- data_df %>%
dplyr::filter(is.na(googlesheets_ids)) %>%
dplyr::select(
data_names,
google_folder_locations
)

purrr::map(no_sheet, function(data_names, google_folder_locations) {
sheet_id <- googlesheets4::gs4_create(data_names)

googledrive::drive_mv(
file = googledrive::as_id(sheet_id),
path = googledrive::as_id(google_folder_locations)
)

new_yaml <- stringr::str_replace(
yaml,
paste0("^", data_names, "_googlesheet:$"),
paste0(google_entry, ": ", googledrive::as_id(sheet_id))
)
})

writeLines(new_yaml, config_file)

# Reread it back in
yaml <- yaml::read_yaml(config_file)
} else {
sheet_exists <- data_df %>%
dplyr::filter(is.na(googlesheets_ids)) %>%
dplyr::select(googlesheets_ids, data_names)

# Check that the sheets given exist
purrr::pmap(sheet_exists, function(googlesheets_ids, data_names) {

gsheet_test <- try(
suppressMessages(
googlesheets4::read_sheet(googlesheets_ids, range = "A1:F20", sheet = 1)
),
silent = TRUE
)
if (class(gsheet_test)[1] == "try-error") {
stop(paste0("Can't find the provided gsheet check your the '",
data_names, "_googlesheet:' in your _config_automation.yml file"))
}
})
}
}
}


#' Get config file
#' @description Get the _config_automation.yml file to set up a metricminer repo
#' @param overwrite Should a _config_automation.yml file in the current directory be overwritten? Default is false.
#' @export
#' @return Copies a config_automation.yml file to your current working directory
get_config_file <- function(overwrite = FALSE) {
config_file <- list.files(
pattern = "_config_automation.yml",
recursive = TRUE,
system.file("extdata", package = "metricminer"),
full.names = TRUE
)
file.copy(
from = config_file,
to = file.path("_config_automation.yml"),
overwrite = overwrite
)
}
55 changes: 55 additions & 0 deletions inst/extdata/_config_automation.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
##### Checks run at pull request #####
# Check that urls in the content are not broken
url-checker: yes
# Render preview of content with changes (Rmd's and md's are checked)
render-preview: yes
# Spell check Rmds and quizzes
spell-check: yes
# Style any R code
style-code: yes

############ Metric Collection Destination ##############
### Options are "github" or "google"
data_dest: github
folder_path:

###### Calendly ######
refresh-calendly: yes
calendly_googlesheet:

###### CRAN ######
refresh-cran: yes
cran_packages: [ metricminer, ottrpal ]
cran_googlesheet:

###### GitHub ######
refresh-github: yes
github_repos: [ fhdsl/metricminer, fhdsl/metricminer.org ]
github_googlesheet:

###### Google Analytics ######
refresh-ga: yes
ga_property_ids: [ 422671031, 422558989 ]
ga_googlesheet:

###### Google Forms ######
refresh-googleforms: yes
google_forms: [
1pbFfgUPYH2w9zEoCDjCa4HFOxzEhGOseufw28Xxmd-o,
1JjmsiuVoGSxvl-1M_oWittcftO955tijzeNc-cgJlo8 ]
googleforms_googlesheet:

###### Slido ######
refresh-slido: yes
drive_id: 1XWXHHyj32Uw_UyaUJrqp6S--hHnM0-7l
slido_googlesheet:

###### YouTube ######
refresh-youtube: yes
video_ids: [ XN_QPRrJZAw, YkYnni-WuaQ ]
youtube_googlesheet:

################## DOCKER #######################
# What docker image should be used for rendering?
# The default is cansav09/metricminer:main
rendering-docker-image: 'cansav09/metricminer:main'
25 changes: 25 additions & 0 deletions man/get_citation_count.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions man/get_config_file.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions man/get_ga_metadata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 1 addition & 3 deletions man/get_ga_properties.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading