From 2032f217adf83291cf1dbfaf3ea2eefff8a87d55 Mon Sep 17 00:00:00 2001
From: Duncan Cameron
Date: Mon, 23 Sep 2019 09:33:19 +0100
Subject: [PATCH] Use fgetcsv() to parse an import file.
---
public_html/lists/admin/CsvReader.php | 55 ++++++++
public_html/lists/admin/actions/import2.php | 36 ++---
public_html/lists/admin/import2.php | 65 +++------
tests/phpunit/CsvReaderTest.php | 145 ++++++++++++++++++++
4 files changed, 230 insertions(+), 71 deletions(-)
create mode 100644 public_html/lists/admin/CsvReader.php
create mode 100644 tests/phpunit/CsvReaderTest.php
diff --git a/public_html/lists/admin/CsvReader.php b/public_html/lists/admin/CsvReader.php
new file mode 100644
index 000000000..d5d3cb68b
--- /dev/null
+++ b/public_html/lists/admin/CsvReader.php
@@ -0,0 +1,55 @@
+fh = fopen($filename, 'r');
+ $this->delimiter = $delimiter;
+ $this->totalRows = 0;
+
+ while ($row = fgetcsv($this->fh, 0, $this->delimiter, self::ENCLOSURE, SELF::ESCAPE)) {
+ if ($row[0] !== null) {
+ ++$this->totalRows;
+ }
+ }
+ rewind($this->fh);
+ }
+
+ /**
+ * Return the number of rows in the file.
+ *
+ * @return int
+ */
+ public function totalRows()
+ {
+ return $this->totalRows;
+ }
+
+ /**
+ * Return the result of calling fgetcsv() ignoring empty lines.
+ *
+ * @return array|false|null
+ */
+ public function getRow()
+ {
+ do {
+ $row = fgetcsv($this->fh, 0, $this->delimiter, self::ENCLOSURE, SELF::ESCAPE);
+ } while ($row && $row[0] === null);
+
+ return $row;
+ }
+}
diff --git a/public_html/lists/admin/actions/import2.php b/public_html/lists/admin/actions/import2.php
index 7dc817253..1b9b83068 100644
--- a/public_html/lists/admin/actions/import2.php
+++ b/public_html/lists/admin/actions/import2.php
@@ -4,6 +4,7 @@
require dirname(__FILE__).'/../structure.php';
require dirname(__FILE__).'/../inc/importlib.php';
+require dirname(__FILE__).'/../CsvReader.php';
@ob_end_flush();
$status = 'FAIL';
@@ -12,36 +13,24 @@
if (filesize($_SESSION['import_file']) > 50000) {
@ini_set('memory_limit', memory_get_usage() + 50 * filesize($_SESSION['import_file']));
}
-$email_list = file_get_contents($_SESSION['import_file']);
flush();
-// Clean up email file
-$email_list = trim($email_list);
-$email_list = str_replace("\r", "\n", $email_list);
-$email_list = str_replace("\n\r", "\n", $email_list);
-$email_list = str_replace("\n\n", "\n", $email_list);
-if ($_SESSION['import_record_delimiter'] != "\n") {
- $email_list = str_replace($_SESSION['import_record_delimiter'], "\n", $email_list);
-}
-
-// Split file/emails into array
-$email_list = explode("\n", $email_list); //WARNING the file contents get replace by an array
-output(sprintf('..'.$GLOBALS['I18N']->get('ok, %d lines').'
', count($email_list)));
-$header = array_shift($email_list);
-$total = count($email_list);
-$headers = str_getcsv($header, $_SESSION['import_field_delimiter']);
+$csvReader = new CsvReader($_SESSION['import_file'], $_SESSION['import_field_delimiter']);
+$total = $csvReader->totalRows();
+output(sprintf('..'.$GLOBALS['I18N']->get('ok, %d lines').'', $total));
+--$total; // now the number of subscribers to be imported
+$headers = $csvReader->getRow();
$headers = array_unique($headers);
$_SESSION['columnnames'] = $headers;
//## show progress and adjust working space
-if (count($email_list)) {
+if ($total > 0) {
$import_field_delimiter = $_SESSION['import_field_delimiter'];
- if (count($email_list) > 300 && !$_SESSION['test_import']) {
+ if ($total > 300 && !$_SESSION['test_import']) {
// this is a possibly a time consuming process, so show a progress bar
echo '';
flush();
// increase the memory to make sure we are not running out
- // $mem = sizeof($email_list);
ini_set('memory_limit', '32M');
}
@@ -65,18 +54,17 @@
$c = 1;
$count['invalid_email'] = 0;
$num_lists = count($_SESSION['lists']);
- $total = count($email_list);
$cnt = 0;
$count['emailmatch'] = 0;
$count['fkeymatch'] = 0;
$count['dataupdate'] = 0;
$count['duplicate'] = 0;
$additional_emails = 0;
- foreach ($email_list as $line) {
+
+ while ($values = $csvReader->getRow()) {
set_time_limit(60);
// will contain attributes to store / change
$user = array();
- $values = str_getcsv($line, $_SESSION['import_field_delimiter']);
$system_values = array();
foreach ($system_attribute_mapping as $column => $index) {
// print ' '.$column . ' = '. $values[$index];
@@ -114,8 +102,8 @@
$replace = array();
foreach ($_SESSION['import_attribute'] as $key => $val) {
if (!empty($values[$val['index']])) {
- $user[$val['index']] = addslashes($values[$val['index']]);
- $replace[$key] = addslashes($values[$val['index']]);
+ $user[$val['index']] = htmlspecialchars($values[$val['index']]);
+ $replace[$key] = htmlspecialchars($values[$val['index']]);
}
}
} else {
diff --git a/public_html/lists/admin/import2.php b/public_html/lists/admin/import2.php
index edada8265..52ee4f6d9 100644
--- a/public_html/lists/admin/import2.php
+++ b/public_html/lists/admin/import2.php
@@ -20,7 +20,6 @@
'#',
"\t",
);
-$email_list = array();
$attributes = array();
if (!isset($everyone_groupid)) {
@@ -39,8 +38,9 @@
'http://php.net/post_max_size', 'http://php.net/post_max_size')));
}
-require dirname(__FILE__).'//structure.php';
+require dirname(__FILE__).'/structure.php';
require dirname(__FILE__).'/inc/importlib.php';
+require dirname(__FILE__).'/CsvReader.php';
register_shutdown_function('my_shutdown');
if (!defined('WEBBLER')) {
@@ -184,43 +184,16 @@
if (filesize($_SESSION['import_file']) > 50000) {
@ini_set('memory_limit', memory_get_usage() + 50 * filesize($_SESSION['import_file']));
}
- $email_list = file_get_contents($_SESSION['import_file']);
flush();
if (!isset($_SESSION['import_attribute'])) {
$_SESSION['import_attribute'] = array();
}
- // Clean up email file
- $email_list = trim($email_list);
- $email_list = str_replace("\r", "\n", $email_list);
- $email_list = str_replace("\n\r", "\n", $email_list);
- $email_list = str_replace("\n\n", "\n", $email_list);
-
- if ($_SESSION['import_record_delimiter'] != "\n") {
- $email_list = str_replace($_SESSION['import_record_delimiter'], "\n", $email_list);
- }
-
- // not sure if we need to check on errors
- /*
- for($i=0; $i 20 ) ? $errpos - 20 : 0;
- print '
';
- printf($GLOBALS['I18N']->get('Error was around here "%s"'),substr( $email_list, $startpos, 40 ));
- print '
';
- printf('
',$GLOBALS['I18N']->get('Illegal character was %s').'
',$illegal_cha[$i]);
- Fatal_Error($GLOBALS['I18N']->get('A character has been found in the import which is not the delimiter indicated, but is likely to be confused for one. Please clean up your import file and try again')." $import_field_delimiter, $import_record_delimiter");
- return;
- }
- };
- */
- // Split file/emails into array
- $email_list = explode("\n", $email_list); //WARNING the file contents get replace by an array
- output(sprintf('..'.$GLOBALS['I18N']->get('ok, %d lines').'', count($email_list)));
- $header = array_shift($email_list);
- $total = count($email_list);
- $headers = str_getcsv($header, $_SESSION['import_field_delimiter']);
+ $csvReader = new CsvReader($_SESSION['import_file'], $_SESSION['import_field_delimiter']);
+ $total = $csvReader->totalRows();
+ output(sprintf('..'.$GLOBALS['I18N']->get('ok, %d lines').'', $total));
+ --$total; // now the number of subscribers to be imported
+ $headers = $csvReader->getRow();
$headers = array_unique($headers);
$_SESSION['columnnames'] = $headers;
@@ -436,8 +409,8 @@
echo '