-
-
Save steve-todorov/929c074788011aff3230 to your computer and use it in GitHub Desktop.
| <?php | |
| set_time_limit(0); | |
| ini_set('memory_limit',-1); | |
| function parse_csv_assoc($str,&$f) { | |
| if (empty($f)) { $f = str_getcsv($str); } | |
| return @array_combine($f, str_getcsv($str)); | |
| } | |
| function assoc_getcsv1($csv_path) { | |
| $f = array(); | |
| return array_values(array_slice(array_map('parse_csv_assoc', file($csv_path), $f),1)); | |
| } | |
| function assoc_getcsv2($csv_path) { | |
| $r = array_map('str_getcsv', file($csv_path)); | |
| foreach( $r as $k => $d ) { $r[$k] = @array_combine($r[0], $r[$k]); } | |
| return array_values(array_slice($r,1)); | |
| } | |
| function assoc_getcsv3($csv_path) { | |
| $data = array(); | |
| $file = new SplFileObject($csv_path); | |
| // Get columns | |
| $file->seek(0); | |
| $columns = array_map('trim', $file->fgetcsv()); | |
| // Set flags | |
| $file->setFlags(SplFileObject::READ_CSV | SplFileObject::SKIP_EMPTY | SplFileObject::READ_AHEAD | SplFileObject::DROP_NEW_LINE); | |
| // Process everything else | |
| while (!$file->eof() && ($csv = $file->fgetcsv()) != null) { | |
| $data[] = @array_combine($columns, $csv); | |
| } | |
| return $data; | |
| } | |
| function test($method, array $files) { | |
| foreach($files as $file) { | |
| print_r("Testing method ".$method." with file ".$file."\n"); | |
| $time1 = microtime(true); | |
| $csv = $method($file); | |
| $time2 = microtime(true); | |
| print_r("script execution time: ".($time2-$time1)."\n"); | |
| print_r("Count: ".count($csv)."\n\n"); | |
| } | |
| } | |
| $filePaths = array( | |
| 'test-1k.csv', 'test-5k.csv', 'test-10k.csv', 'test-20k.csv', 'test-50k.csv', 'test-100k.csv', 'test-200k.csv', 'test-300k.csv', 'test-500k.csv', 'test-1m.csv' | |
| ); | |
| test('assoc_getcsv1', $filePaths); | |
| test('assoc_getcsv2', $filePaths); | |
| test('assoc_getcsv3', $filePaths); | |
| ?> |
| <?php | |
| set_time_limit(0); | |
| ini_set('memory_limit',-1); | |
| function str_random($length = 16) { | |
| $pool = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'; | |
| return substr(str_shuffle(str_repeat($pool, $length)), 0, $length); | |
| } | |
| function dump($name, $length) { | |
| $file = fopen('./test-'.$name.'.csv','w'); | |
| fwrite($file, "group_id, name, description\n"); | |
| for($i=1;$i<=$length; $i++){ | |
| fwrite($file, $i.",".str_random().",Some interesting and unique description ".str_random(5)."\n"); | |
| } | |
| fclose($file); | |
| } | |
| dump( '1k', 1000 ); | |
| dump( '5k', 5000 ); | |
| dump( '10k', 10000 ); | |
| dump( '20k', 20000 ); | |
| dump( '50k', 50000 ); | |
| dump( '100k', 100000 ); | |
| dump( '200k', 200000 ); | |
| dump( '300k', 300000 ); | |
| dump( '500k', 500000 ); | |
| dump( '1m', 1000000 ); | |
| ?> |
Yeah, I actually tried it with 300k+ results and it's still faster.
script execution time: 2.1298151016235
Count: 325003
script execution time: 3.4907600879669
Count: 325003
Thanks for the idea! :)
steve test please the two other methods I added those are my associative approaches
NOTE: My test here was invalid, so I've removed the results to prevent confusing anyone.
Your new code is pretty much has the same speed as SplFileObject. By the way, you have a typo in the anonymous function name - it should be parse_csv_assoc.
Thank you - The comparison is not fair since the SplFileObject don't return an associative array.
BTW this way is cleaner for SplFileObject and csv:
$file = new SplFileObject($csv);
$array = array();
while (!$file->eof()) { $array[] = $file->fgetcsv(); }
You're absolutely right. It was late here and I've overlooked the test. I've updated my files with a new test. Both SplFileObject and str_getcsv have pros and cons. Your two functions work quite well with around 30-40k records in a csv file. After that, the performance starts to decrease and that's where SplFileObject is better. The test was running on a server with 8 cores and a normal hdd with 16mb cache and the results are here. It would be great if somebody else publishes their results as well so we could further compare how this works on different architectures 😃
Amazingly fast ;)