#!/usr/bin/env python
"""
The examples below show how to load multiple files via numpy loadtext.
Sorting the files may or may not be necessary depending on your problem.
Given that loading multiple files for a single calculation seems to be a common
task, numpy should have a better approach somewhere in its library.
This is the best I could do with a little research.
"""
import fileinput
import glob
import numpy as np
def oneliner(filename_glob_pattern):
array = np.loadtxt(fileinput.input(sorted(glob.glob(filename_glob_pattern))))
return array
def broken_out_for_pedagogical_purposes(filename_glob_pattern):
list_all_matching_files = glob.glob(filename_glob_pattern)
sorted_list_of_filenames = sorted(list_all_matching_files)
concatenate_files_into_single_input = fileinput.input(sorted_list_of_filenames)
single_array_of_all_files = np.loadtxt(concatenate_files_into_single_input)
return single_array_of_all_files
if __name__ == "__main__":
array = oneliner("flinakT940n100UF4-Charges.dat*")
print("The whole array has {} rows".format(len(array)))References and further reading:
- glob selects files by unix patterns. Asterisks are patterns for zero or more characters, question marks and square brackets also have special meanings.
- sorted is a builtin function. You do not need to import it: it's always available.
- fileinput concatenates multiple files together into a single iterator
- numpy.loadtxt It's always worth checking the documentation to see whether something is already built in.
"""
You have to know the number of files for this approach.
This may be more comfortable for those more familiar with C.
"""
import numpy as np
def iterative_approach(filename_pattern, number_of_files):
"""
This approach may be more comfortable to those coming from a language like C.
"""
list_of_filenames = []
for number in range(1, number_of_files + 1):
list_of_filenames.append(filename_pattern % number)
list_of_arrays = []
for filename in list_of_filenames:
list_of_arrays.append(np.loadtxt(filename))
final_array = np.concatenate(arrays)
return final_array
def using_string_formatting(filename_pattern, number_of_files):
"""
String formatting & list comprehensions both can help condense the code in iterative_approach.
String formatting replaces a {} with the format arguments.
List comprehensions create a list by [something for varfoo in iterator];
all the resulting somethings are elements of the list.
"""
filename_list = ["{}{}".format(filename_pattern, number)
for number in range(1, number_of_files + 1)]
return [np.loadtxt(filename) for filename in filename_list]
def test_iterative_approach():
array = iterative_approach("flinakT940n100UF4-Charges.dat%s", 23)
print("The whole array has %d rows" % len(array))
def test_using_string_formating():
array = using_string_formatting("flinakT940n100UF4-Charges.dat", 23)
print("The whole array has {} rows".format(len(array)))
References and further reading:
- PEP 3101 is a gentle introduction to the string formatting "mini language."
- The library format specification is a a quick reference.
- The range function is a built in (no need to import.) There are about forty such functions.
- Numpy examples illustrating some real world uses.
- sorted is another built in function that is worth learning how to use.
i am by every definition a python beginner but shouldn't the first
onelinerfunction end withreturn array?