isayev/csv_split.py

## csv_split.py
import csv
import os
import pprint
import datetime
#splits file returns the files splitted and the number of files(current_piece)

import csv
def splits(filehandler, delimiter=',', row_limit=100000,
    output_name_template='output_%s.csv', output_path='.', keep_headers=True):
    """
    Splits a CSV file into multiple pieces.


    Arguments:

        `row_limit`: The number of rows you want in each output file. 10,000 by default.
        `output_name_template`: A %s-style template for the numbered output files.
        `output_path`: Where to stick the output files.
        `keep_headers`: Whether or not to print the headers in each output file.

    """
    created_files = []
    reader = csv.reader(filehandler, delimiter=delimiter)
    current_piece = 1
    current_out_path = os.path.join(
         output_path,
         output_name_template  % current_piece
    )
    current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
    current_limit = row_limit
    if keep_headers:
        headers = reader.next()
        current_out_writer.writerow(headers)
        print current_out_path
        created_files.append(current_out_path)
    for i, row in enumerate(reader):
        if i + 1 > current_limit:
            current_piece += 1
            current_limit = row_limit * current_piece
            current_out_path = os.path.join(
               output_path,
               output_name_template  % current_piece
            )
            current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
            print current_out_path
            created_files.append(current_out_path)
            if keep_headers:
                current_out_writer.writerow(headers)
        current_out_writer.writerow(row)
    return created_files,current_piece

'''
This module is a simple utility for working with a single csv file,
or muttiple csv files in a directory.
You can instantiate a csv file by simply doing
>>>import csvgoo2
>>> c = CsvFile('sample.csv')
and then grab the data by doing
>>>c.fill()

The CsFile object only grabs the data when you explicitly call the fill() method
in order to save memory.  You can also use the empty() method to clear data once
its been filled
'''


#CsvFile USAGE
#c = CsvFile('sample.csv')
#print c.columns
#c.fill()


class CsvFile(object):
    def __init__(self, name):
        self.name = name
        self.data = []
        self.columns = []
    def __str__(self):
        return self.name

    def empty(self):
        self.data = []

    def fill(self):
        self.data = [x for x in csv.DictReader(open(self.name, 'rU'))]
        self.columns = self.data[0].keys()


'''
The Repo class is similar to the csv file object
but lets you gather all csv files in a directory at once
the get() method just instantiates a CsvFile object.
'''

class Repo(object):
    def __init__(self, pathdir = '.', fillall = False):
        if not fillall:
            self.items = [CsvFile(f).empty() for f in os.listdir(pathdir) if os.path.isfile(f) and f.lower().endswith('.csv')]
        else:
            self.items = [CsvFile(f) for f in os.listdir(pathdir) if os.path.isfile(f) and f.lower().endswith('.csv')]
    def fill_all(self):
        self.items = [x.fill for x in self.items]
    def get(self, filea):
        return CsvFile(str(filea))
	import csv
	import os
	import pprint
	import datetime
	#splits file returns the files splitted and the number of files(current_piece)

	import csv
	def splits(filehandler, delimiter=',', row_limit=100000,
	output_name_template='output_%s.csv', output_path='.', keep_headers=True):
	"""
	Splits a CSV file into multiple pieces.


	Arguments:

	`row_limit`: The number of rows you want in each output file. 10,000 by default.
	`output_name_template`: A %s-style template for the numbered output files.
	`output_path`: Where to stick the output files.
	`keep_headers`: Whether or not to print the headers in each output file.

	"""
	created_files = []
	reader = csv.reader(filehandler, delimiter=delimiter)
	current_piece = 1
	current_out_path = os.path.join(
	output_path,
	output_name_template % current_piece
	)
	current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
	current_limit = row_limit
	if keep_headers:
	headers = reader.next()
	current_out_writer.writerow(headers)
	print current_out_path
	created_files.append(current_out_path)
	for i, row in enumerate(reader):
	if i + 1 > current_limit:
	current_piece += 1
	current_limit = row_limit * current_piece
	current_out_path = os.path.join(
	output_path,
	output_name_template % current_piece
	)
	current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
	print current_out_path
	created_files.append(current_out_path)
	if keep_headers:
	current_out_writer.writerow(headers)
	current_out_writer.writerow(row)
	return created_files,current_piece

	'''
	This module is a simple utility for working with a single csv file,
	or muttiple csv files in a directory.
	You can instantiate a csv file by simply doing
	>>>import csvgoo2
	>>> c = CsvFile('sample.csv')
	and then grab the data by doing
	>>>c.fill()

	The CsFile object only grabs the data when you explicitly call the fill() method
	in order to save memory. You can also use the empty() method to clear data once
	its been filled
	'''


	#CsvFile USAGE
	#c = CsvFile('sample.csv')
	#print c.columns
	#c.fill()


	class CsvFile(object):
	def __init__(self, name):
	self.name = name
	self.data = []
	self.columns = []
	def __str__(self):
	return self.name

	def empty(self):
	self.data = []

	def fill(self):
	self.data = [x for x in csv.DictReader(open(self.name, 'rU'))]
	self.columns = self.data[0].keys()


	'''
	The Repo class is similar to the csv file object
	but lets you gather all csv files in a directory at once
	the get() method just instantiates a CsvFile object.
	'''

	class Repo(object):
	def __init__(self, pathdir = '.', fillall = False):
	if not fillall:
	self.items = [CsvFile(f).empty() for f in os.listdir(pathdir) if os.path.isfile(f) and f.lower().endswith('.csv')]
	else:
	self.items = [CsvFile(f) for f in os.listdir(pathdir) if os.path.isfile(f) and f.lower().endswith('.csv')]
	def fill_all(self):
	self.items = [x.fill for x in self.items]
	def get(self, filea):
	return CsvFile(str(filea))
No results found