diff --git a/public_html/lists/admin/CsvReader.php b/public_html/lists/admin/CsvReader.php new file mode 100644 index 000000000..d5d3cb68b --- /dev/null +++ b/public_html/lists/admin/CsvReader.php @@ -0,0 +1,55 @@ +fh = fopen($filename, 'r'); + $this->delimiter = $delimiter; + $this->totalRows = 0; + + while ($row = fgetcsv($this->fh, 0, $this->delimiter, self::ENCLOSURE, SELF::ESCAPE)) { + if ($row[0] !== null) { + ++$this->totalRows; + } + } + rewind($this->fh); + } + + /** + * Return the number of rows in the file. + * + * @return int + */ + public function totalRows() + { + return $this->totalRows; + } + + /** + * Return the result of calling fgetcsv() ignoring empty lines. + * + * @return array|false|null + */ + public function getRow() + { + do { + $row = fgetcsv($this->fh, 0, $this->delimiter, self::ENCLOSURE, SELF::ESCAPE); + } while ($row && $row[0] === null); + + return $row; + } +} diff --git a/public_html/lists/admin/actions/import2.php b/public_html/lists/admin/actions/import2.php index 7dc817253..1b9b83068 100644 --- a/public_html/lists/admin/actions/import2.php +++ b/public_html/lists/admin/actions/import2.php @@ -4,6 +4,7 @@ require dirname(__FILE__).'/../structure.php'; require dirname(__FILE__).'/../inc/importlib.php'; +require dirname(__FILE__).'/../CsvReader.php'; @ob_end_flush(); $status = 'FAIL'; @@ -12,36 +13,24 @@ if (filesize($_SESSION['import_file']) > 50000) { @ini_set('memory_limit', memory_get_usage() + 50 * filesize($_SESSION['import_file'])); } -$email_list = file_get_contents($_SESSION['import_file']); flush(); -// Clean up email file -$email_list = trim($email_list); -$email_list = str_replace("\r", "\n", $email_list); -$email_list = str_replace("\n\r", "\n", $email_list); -$email_list = str_replace("\n\n", "\n", $email_list); -if ($_SESSION['import_record_delimiter'] != "\n") { - $email_list = str_replace($_SESSION['import_record_delimiter'], "\n", $email_list); -} - -// Split file/emails into array -$email_list = explode("\n", $email_list); //WARNING the file contents get replace by an array -output(sprintf('..'.$GLOBALS['I18N']->get('ok, %d lines').'

', count($email_list))); -$header = array_shift($email_list); -$total = count($email_list); -$headers = str_getcsv($header, $_SESSION['import_field_delimiter']); +$csvReader = new CsvReader($_SESSION['import_file'], $_SESSION['import_field_delimiter']); +$total = $csvReader->totalRows(); +output(sprintf('..'.$GLOBALS['I18N']->get('ok, %d lines').'

', $total)); +--$total; // now the number of subscribers to be imported +$headers = $csvReader->getRow(); $headers = array_unique($headers); $_SESSION['columnnames'] = $headers; //## show progress and adjust working space -if (count($email_list)) { +if ($total > 0) { $import_field_delimiter = $_SESSION['import_field_delimiter']; - if (count($email_list) > 300 && !$_SESSION['test_import']) { + if ($total > 300 && !$_SESSION['test_import']) { // this is a possibly a time consuming process, so show a progress bar echo ''; flush(); // increase the memory to make sure we are not running out - // $mem = sizeof($email_list); ini_set('memory_limit', '32M'); } @@ -65,18 +54,17 @@ $c = 1; $count['invalid_email'] = 0; $num_lists = count($_SESSION['lists']); - $total = count($email_list); $cnt = 0; $count['emailmatch'] = 0; $count['fkeymatch'] = 0; $count['dataupdate'] = 0; $count['duplicate'] = 0; $additional_emails = 0; - foreach ($email_list as $line) { + + while ($values = $csvReader->getRow()) { set_time_limit(60); // will contain attributes to store / change $user = array(); - $values = str_getcsv($line, $_SESSION['import_field_delimiter']); $system_values = array(); foreach ($system_attribute_mapping as $column => $index) { // print '
'.$column . ' = '. $values[$index]; @@ -114,8 +102,8 @@ $replace = array(); foreach ($_SESSION['import_attribute'] as $key => $val) { if (!empty($values[$val['index']])) { - $user[$val['index']] = addslashes($values[$val['index']]); - $replace[$key] = addslashes($values[$val['index']]); + $user[$val['index']] = htmlspecialchars($values[$val['index']]); + $replace[$key] = htmlspecialchars($values[$val['index']]); } } } else { diff --git a/public_html/lists/admin/import2.php b/public_html/lists/admin/import2.php index edada8265..52ee4f6d9 100644 --- a/public_html/lists/admin/import2.php +++ b/public_html/lists/admin/import2.php @@ -20,7 +20,6 @@ '#', "\t", ); -$email_list = array(); $attributes = array(); if (!isset($everyone_groupid)) { @@ -39,8 +38,9 @@ 'http://php.net/post_max_size', 'http://php.net/post_max_size'))); } -require dirname(__FILE__).'//structure.php'; +require dirname(__FILE__).'/structure.php'; require dirname(__FILE__).'/inc/importlib.php'; +require dirname(__FILE__).'/CsvReader.php'; register_shutdown_function('my_shutdown'); if (!defined('WEBBLER')) { @@ -184,43 +184,16 @@ if (filesize($_SESSION['import_file']) > 50000) { @ini_set('memory_limit', memory_get_usage() + 50 * filesize($_SESSION['import_file'])); } - $email_list = file_get_contents($_SESSION['import_file']); flush(); if (!isset($_SESSION['import_attribute'])) { $_SESSION['import_attribute'] = array(); } - // Clean up email file - $email_list = trim($email_list); - $email_list = str_replace("\r", "\n", $email_list); - $email_list = str_replace("\n\r", "\n", $email_list); - $email_list = str_replace("\n\n", "\n", $email_list); - - if ($_SESSION['import_record_delimiter'] != "\n") { - $email_list = str_replace($_SESSION['import_record_delimiter'], "\n", $email_list); - } - - // not sure if we need to check on errors - /* - for($i=0; $i 20 ) ? $errpos - 20 : 0; - print '

'; - printf($GLOBALS['I18N']->get('Error was around here "%s"'),substr( $email_list, $startpos, 40 )); - print '

'; - printf('

',$GLOBALS['I18N']->get('Illegal character was %s').'

',$illegal_cha[$i]); - Fatal_Error($GLOBALS['I18N']->get('A character has been found in the import which is not the delimiter indicated, but is likely to be confused for one. Please clean up your import file and try again')." $import_field_delimiter, $import_record_delimiter"); - return; - } - }; - */ - // Split file/emails into array - $email_list = explode("\n", $email_list); //WARNING the file contents get replace by an array - output(sprintf('..'.$GLOBALS['I18N']->get('ok, %d lines').'

', count($email_list))); - $header = array_shift($email_list); - $total = count($email_list); - $headers = str_getcsv($header, $_SESSION['import_field_delimiter']); + $csvReader = new CsvReader($_SESSION['import_file'], $_SESSION['import_field_delimiter']); + $total = $csvReader->totalRows(); + output(sprintf('..'.$GLOBALS['I18N']->get('ok, %d lines').'

', $total)); + --$total; // now the number of subscribers to be imported + $headers = $csvReader->getRow(); $headers = array_unique($headers); $_SESSION['columnnames'] = $headers; @@ -436,8 +409,8 @@ echo '

'.PageLinkButton($_GET['page'].'&confirm=yes', $GLOBALS['I18N']->get('Confirm Import')).'

'; echo '

'.$GLOBALS['I18N']->get('Test Output').'

'; // dbg($_SESSION["import_attribute"]); -} elseif (count($email_list)) { - echo '

'.s('Importing %d subscribers to %d lists, please wait', count($email_list), +} elseif (isset($_GET['confirm']) || isset($_POST['import'])) { + echo '

'.s('Importing %d subscribers to %d lists, please wait', $total, count($_SESSION['lists'])).'

'; echo $GLOBALS['img_busy']; echo '
Progress
'; @@ -448,14 +421,13 @@ //var_dump($system_attributes); //## show progress and adjust working space -if (count($email_list)) { +if (!empty($_SESSION['test_import'])) { $import_field_delimiter = $_SESSION['import_field_delimiter']; - if (count($email_list) > 300 && !$_SESSION['test_import']) { + if ($total > 300 && !$_SESSION['test_import']) { // this is a possibly a time consuming process, so show a progress bar echo ''; flush(); // increase the memory to make sure we are not running out - // $mem = sizeof($email_list); ini_set('memory_limit', '32M'); } @@ -494,18 +466,17 @@ $c = 1; $count['invalid_email'] = 0; $num_lists = count($_SESSION['lists']); - $total = count($email_list); $cnt = 0; $count['emailmatch'] = 0; $count['fkeymatch'] = 0; $count['dataupdate'] = 0; $count['duplicate'] = 0; $additional_emails = 0; - foreach ($email_list as $line) { + + while ($values = $csvReader->getRow()) { set_time_limit(60); // will contain attributes to store / change $user = array(); - $values = str_getcsv($line, $_SESSION['import_field_delimiter']); $system_values = array(); foreach ($system_attribute_mapping as $column => $index) { // print '
'.$column . ' = '. $values[$index]; @@ -540,8 +511,8 @@ $replace = array(); foreach ($_SESSION['import_attribute'] as $key => $val) { if (!empty($values[$val['index']])) { - $user[$val['index']] = addslashes($values[$val['index']]); - $replace[$key] = addslashes($values[$val['index']]); + $user[$val['index']] = htmlspecialchars($values[$val['index']]); + $replace[$key] = htmlspecialchars($values[$val['index']]); } } } else { @@ -714,7 +685,7 @@ get('Test output') ?>: - + get('Show Warnings') ?>: @@ -728,12 +699,12 @@ - + get('Overwrite Existing') ?>: - + get('Retain Old User Email') ?>: diff --git a/tests/phpunit/CsvReaderTest.php b/tests/phpunit/CsvReaderTest.php new file mode 100644 index 000000000..778998a6b --- /dev/null +++ b/tests/phpunit/CsvReaderTest.php @@ -0,0 +1,145 @@ +assertEquals($expected, $csv->totalRows()); + } + + public function lineEndingDataProvider() + { + $dataForTests = [ + 'line-ending CRNL' => ["email,name,country\r\nfoo@foo.com,Jim Smith,United Kingdom\r\n", 2], + 'line-ending NL' => ["email,name,country\nfoo@foo.com,Jim Smith,United Kingdom\n", 2], + 'line-ending CR' => ["email,name,country\rfoo@foo.com,Jim Smith,United Kingdom\r", 2], + ]; + + return array_map( + function ($item) { + $data = $item[0]; + $expected = $item[1]; + $filename = $this->createTestFile($data); + + return [$filename, $expected]; + }, + $dataForTests + ); + } + + /** + * @test + */ + public function embeddedEnclosure() + { + $le = "\n"; + $data = '"email","name","description"' . $le + . '"foo@foo.com","Jim Smith","before "" after"' . $le + . '"foo2@foo.com","John Brown","a description"' . $le; + $filename = $this->createTestFile($data); + + $csv = new csvReader($filename, ','); + $this->assertEquals(3, $csv->totalRows()); + + $headers = $csv->getRow(); + $fields = $csv->getRow(); + $this->assertEquals('before " after', $fields[2]); + } + + /** + * @test + */ + public function embeddedDelimiter() + { + $le = "\n"; + $data = '"email","name","description"' . $le + . '"foo@foo.com","Jim Smith","before , after"' . $le + . '"foo2@foo.com","John Brown","a description"' . $le; + $filename = $this->createTestFile($data); + + $csv = new csvReader($filename, ','); + $this->assertEquals(3, $csv->totalRows()); + + $headers = $csv->getRow(); + $fields = $csv->getRow(); + $this->assertEquals('before , after', $fields[2]); + } + + /** + * @test + * @dataProvider embeddedLineEndingDataProvider + */ + public function embeddedLineEnding($le) + { + $data = '"email","name","description"' . $le + . '"foo2@foo.com","John Brown","a description"' . $le + . sprintf('"foo@foo.com","Jim Smith","%s"', "before $le after") . $le; + $filename = $this->createTestFile($data); + + $csv = new csvReader($filename, ','); + $this->assertEquals(3, $csv->totalRows()); + + $headers = $csv->getRow(); + $csv->getRow(); + $fields = $csv->getRow(); + $this->assertEquals("before $le after", $fields[2]); + } + + public function embeddedLineEndingDataProvider() + { + return [ + 'embedded NL' => ["\n"], + 'embedded CRNL' => ["\r\n"], + 'embedded CR' => ["\r"], + ]; + } + + /** + * @test + */ + public function embeddedBackslash() + { + $le = "\n"; + $data = '"email","name","description"' . $le + . '"foo@foo.com","Jim Smith","before \"" after"' . $le + . '"foo2@foo.com","John Brown","at end\"' . $le; + $filename = $this->createTestFile($data); + + $csv = new csvReader($filename, ','); + $this->assertEquals(3, $csv->totalRows()); + + $headers = $csv->getRow(); + $fields = $csv->getRow(); + $this->assertEquals('before \" after', $fields[2]); + $fields = $csv->getRow(); + $this->assertEquals('at end\\', $fields[2]); + } + + public static function tearDownAfterClass() : void + { + array_walk( + self::$temporaryFiles, + function ($item) { + unlink($item); + } + ); + } +}