rstudio · aronatkins · Jan 28, 2021
diff --git a/NEWS.md b/NEWS.md
@@ -39,6 +39,9 @@
 - Infer package dependencies from `requireNamespace()` and `loadNamespace()`
   only when the package name is character input. (#602)
 
+- Allow repository names with commas. Repository URLs containing commas are
+  still not supported. (#587)
+
 # Packrat 0.5.0
 
 - Packrat now supports both of BiocManager and BiocInstaller (as used for

diff --git a/R/lockfile.R b/R/lockfile.R
@@ -86,28 +86,7 @@ readLockFile <- function(file) {
   names(df) <- translate(names(df), aliases)
 
   # Split the repos
-  repos <- gsub("[\r\n]", " ", df[1, 'Repos'])
-  repos <- strsplit(unlist(strsplit(repos, "\\s*,\\s*", perl = TRUE)), "=", fixed = TRUE)
-
-  # Support older-style lockfiles containing unnamed repositories
-  repoLens <- vapply(repos, length, numeric(1))
-  if (all(repoLens == 1)) {
-    # Support for old (unnamed) repositories
-
-    if (length(repoLens) > 1) {
-      # We warn if there were multiple repositories (if there was only one, we
-      # can safely assume it was CRAN)
-      warning("Old-style repository format detected; bumped to new version\n",
-              "Please re-set the repositories with options(repos = ...)\n",
-              "and call packrat::snapshot() to update the lock file.")
-    }
-    repos <- c(CRAN = repos[[1]])
-  } else if (all(repoLens == 2)) {
-    repos <- setNames(
-      sapply(repos, "[[", 2),
-      sapply(repos, "[[", 1)
-    )
-  }
+  repos <- parseRepositories(df[1, 'Repos'])
 
   packages <- if (nrow(df) > 1)
     deserializePackages(utils::tail(df, -1))
@@ -123,6 +102,63 @@ readLockFile <- function(file) {
   )
 }
 
+parseRepositories <- function(repos) {
+  repos <- gsub("[\r\n]", " ", repos)
+
+  if (!grepl("=", repos)) {
+    # Older-style lockfile with unnamed repositories.
+    repos <- unlist(strsplit(repos, "\\s*,\\s*", perl = TRUE))
+    if (length(repos) > 1) {
+      # We warn if there were multiple repositories (if there was only one, we
+      # can safely assume it was CRAN)
+      warning("Old-style repository format detected; bumped to new version\n",
+              "Please re-set the repositories with options(repos = ...)\n",
+              "and call packrat::snapshot() to update the lock file.")
+    }
+    repos <- c(CRAN = repos[[1]])
+    return(repos)
+  }
+
+  # Repositories are of the form:
+  # NAME1=URL1, NAME2=URL2
+  #
+  # The comma is our separator character, but comma may also be part of
+  # the repository name. We do not support / expect commas in the URL.
+  # If we ever see commas in URLs, we will need quoting of names and
+  # values. See https://github.com/rstudio/packrat/issues/587
+  #
+  # Because URLs will not have commas, we know that text of the form:
+  #     A=B,C,D=E
+  # must imply c("A"="B", "C,D"=E). The first comma in "B,C,D" must
+  # split a URL from the subsequent name, and any other commas
+  # are part of that name.
+  parts <- unlist(strsplit(repos, "=", fixed = TRUE))
+  if (length(parts) > 2) {
+    first <- parts[1]
+    middle <- parts[2:(length(parts)-1)]
+    last <- parts[length(parts)]
+    middle <- unlist(lapply(middle, function(each) {
+      commas = unlist(strsplit(each, ",", fixed = TRUE))
+      result <- list(
+        commas[1],
+        paste(commas[2:length(commas)], collapse = ",")
+      )
+      result
+    }))
+    parts <- c(first, middle, last)
+  }
+
+  parts <- sapply(parts, function(each) {
+    sub("^\\s+", "", sub("\\s+$", "", each))
+  })
+
+  repos <- setNames(
+    parts[seq(2,length(parts),2)],
+    parts[seq(1,length(parts),2)]
+  )
+  return(repos)
+}
+
 # Remove leading and trailing whitespace from character vectors
 # in the dataframe, and return the modified dataframe
 cleanupWhitespace <- function(df) {

diff --git a/tests/testthat/test-lockfile.R b/tests/testthat/test-lockfile.R
@@ -40,3 +40,75 @@ test_that("Repository is properly split by readLockFile", {
   )
 
 })
+
+test_that("multiple styles of repository parsing are supported", {
+  # One old style repository, no name. Assumed to be CRAN.
+  expect_equal(
+    parseRepositories("A"),
+    c(CRAN = "A")
+  )
+
+  # Two old repositories, no names.
+  expect_warning(parseRepositories("A,B"))
+
+  # One repository, no whitespace separating name and URL
+  expect_equal(
+    parseRepositories("A=B"),
+    c(A = "B")
+  )
+
+  # One repository, whitespace separating name and URL
+  expect_equal(
+    parseRepositories("A = B"),
+    c(A = "B")
+  )
+
+  # Two repositories, no whitespace.
+  expect_equal(
+    parseRepositories("A=B,C=D"),
+    c(A = "B", C = "D")
+  )
+
+  # Two repositories, with whitespace.
+  expect_equal(
+    parseRepositories("A = B , C = D"),
+    c(A = "B", C = "D")
+  )
+
+  # Two repositories, with newlines.
+  expect_equal(
+    parseRepositories("A = B,\nC = D\n"),
+    c(A = "B", C = "D")
+  )
+
+  # Three repositories
+  repos <- parseRepositories("A = B,\nC = D,\nE = F\n")
+  expect_equal(
+    repos,
+    c(A = "B", C = "D", E = "F")
+  )
+
+  # Four repositories
+  expect_equal(
+    parseRepositories("A = B,\nC = D,\nE = F,\nG = H\n"),
+    c(A = "B", C = "D", E = "F", G = "H")
+  )
+
+  # One repository name with a comma
+  expect_equal(
+    parseRepositories("A,B=C"),
+    c("A,B" = "C")
+  )
+
+  # Two repository names with commas
+  expect_equal(
+    parseRepositories("A,B=C,D,E=F"),
+    c("A,B" = "C", "D,E" = "F")
+  )
+
+  # Three repository names with commas
+  expect_equal(
+    parseRepositories("A,B=C,D,E=F,G,H=I"),
+    c("A,B" = "C", "D,E" = "F", "G,H" = "I")
+  )
+})