Last active
August 29, 2015 14:19
-
-
Save steve-todorov/929c074788011aff3230 to your computer and use it in GitHub Desktop.
Get csv data as an array
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| set_time_limit(0); | |
| ini_set('memory_limit',-1); | |
| function parse_csv_assoc($str,&$f) { | |
| if (empty($f)) { $f = str_getcsv($str); } | |
| return @array_combine($f, str_getcsv($str)); | |
| } | |
| function assoc_getcsv1($csv_path) { | |
| $f = array(); | |
| return array_values(array_slice(array_map('parse_csv_assoc', file($csv_path), $f),1)); | |
| } | |
| function assoc_getcsv2($csv_path) { | |
| $r = array_map('str_getcsv', file($csv_path)); | |
| foreach( $r as $k => $d ) { $r[$k] = @array_combine($r[0], $r[$k]); } | |
| return array_values(array_slice($r,1)); | |
| } | |
| function assoc_getcsv3($csv_path) { | |
| $data = array(); | |
| $file = new SplFileObject($csv_path); | |
| // Get columns | |
| $file->seek(0); | |
| $columns = array_map('trim', $file->fgetcsv()); | |
| // Set flags | |
| $file->setFlags(SplFileObject::READ_CSV | SplFileObject::SKIP_EMPTY | SplFileObject::READ_AHEAD | SplFileObject::DROP_NEW_LINE); | |
| // Process everything else | |
| while (!$file->eof() && ($csv = $file->fgetcsv()) != null) { | |
| $data[] = @array_combine($columns, $csv); | |
| } | |
| return $data; | |
| } | |
| function test($method, array $files) { | |
| foreach($files as $file) { | |
| print_r("Testing method ".$method." with file ".$file."\n"); | |
| $time1 = microtime(true); | |
| $csv = $method($file); | |
| $time2 = microtime(true); | |
| print_r("script execution time: ".($time2-$time1)."\n"); | |
| print_r("Count: ".count($csv)."\n\n"); | |
| } | |
| } | |
| $filePaths = array( | |
| 'test-1k.csv', 'test-5k.csv', 'test-10k.csv', 'test-20k.csv', 'test-50k.csv', 'test-100k.csv', 'test-200k.csv', 'test-300k.csv', 'test-500k.csv', 'test-1m.csv' | |
| ); | |
| test('assoc_getcsv1', $filePaths); | |
| test('assoc_getcsv2', $filePaths); | |
| test('assoc_getcsv3', $filePaths); | |
| ?> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| set_time_limit(0); | |
| ini_set('memory_limit',-1); | |
| function str_random($length = 16) { | |
| $pool = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'; | |
| return substr(str_shuffle(str_repeat($pool, $length)), 0, $length); | |
| } | |
| function dump($name, $length) { | |
| $file = fopen('./test-'.$name.'.csv','w'); | |
| fwrite($file, "group_id, name, description\n"); | |
| for($i=1;$i<=$length; $i++){ | |
| fwrite($file, $i.",".str_random().",Some interesting and unique description ".str_random(5)."\n"); | |
| } | |
| fclose($file); | |
| } | |
| dump( '1k', 1000 ); | |
| dump( '5k', 5000 ); | |
| dump( '10k', 10000 ); | |
| dump( '20k', 20000 ); | |
| dump( '50k', 50000 ); | |
| dump( '100k', 100000 ); | |
| dump( '200k', 200000 ); | |
| dump( '300k', 300000 ); | |
| dump( '500k', 500000 ); | |
| dump( '1m', 1000000 ); | |
| ?> |
Author
Thank you - The comparison is not fair since the SplFileObject don't return an associative array.
BTW this way is cleaner for SplFileObject and csv:
$file = new SplFileObject($csv);
$array = array();
while (!$file->eof()) { $array[] = $file->fgetcsv(); }
Author
You're absolutely right. It was late here and I've overlooked the test. I've updated my files with a new test. Both SplFileObject and str_getcsv have pros and cons. Your two functions work quite well with around 30-40k records in a csv file. After that, the performance starts to decrease and that's where SplFileObject is better. The test was running on a server with 8 cores and a normal hdd with 16mb cache and the results are here. It would be great if somebody else publishes their results as well so we could further compare how this works on different architectures 😃
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
NOTE: My test here was invalid, so I've removed the results to prevent confusing anyone.
Your new code is pretty much has the same speed as
SplFileObject. By the way, you have a typo in the anonymous function name - it should beparse_csv_assoc.