Source code for pymlp.datahandling.DataHandler

import os
import re
import numpy as np

[docs]class DataHandler: """ Loads, filters and saves the data files. """ def __init__(self, folder): """ Constructor: folder -- folder to look for data files """ self.folder = folder self.files = filter(self.isDataFile, os.listdir(folder)) self.files = map(lambda f: "%s/%s" % (folder, f), self.files) self.files.sort()
[docs] def isDataFile(self, filename): """ Returns True if the given filename is a data file as written by this library. It can be a binary, as well as a ASCII file. """ dataFile = filename.endswith(".dat") or filename.endswith(".dat.gz") dataFile = dataFile or filename.endswith(".dat.npy") dataFile = dataFile or filename.endswith(".dat.gz.npy") dataFile = dataFile or filename.endswith(".dat.npz") dataFile = dataFile or filename.endswith(".bin.npz") return dataFile
[docs] def isBinaryDataFile(self, filename): """ Returns True if the given filename is a binary numpy file. """ return filename.endswith(".npy") or filename.endswith(".npz")
[docs] def isReshapedDataFile(self, filename): """ Returns True, if the filename contains information about its data's original shape. This is the case when the filename starts normally with its name and has its shape in paranthesis before the filename ending. e.g. foo.(1, 2, 3).dat.gz. """ return re.match(".*\((\d+(, )?)+\)\.dat(\.gz)?", filename)
[docs] def loadData(self, filename): """ Loads and returns the data. Parameters: filename -- the name of the file to load the data from returns -- the data in the file with the name filename """ print filename if self.isBinaryDataFile(filename): content = np.load(filename) if type(content) == dict: content = content["arr_0"] elif self.isReshapedDataFile(filename): shape = filename.split("/")[-1].split(".")[1] shape = [int(i) for i in filter(lambda x: "" != x, re.split("[, \(\)]", shape))] content = np.loadtxt(filename) content = content.reshape(shape) else: content = np.loadtxt(filename) return content
[docs] def saveData(self, filename, data, i): """ Writes the data into the file with the filename. Parameters: filename -- name of the file to put the data into data -- data to write i -- the unique number for this run so it can be distinguished from other runs, e.g. repeated runs for stability binary -- True if the data is supposed to be stored as a binary, defaul: False """ fileTemplate = "{0}/{1}_{2}" fileToWrite = fileTemplate.format(self.folder, filename, i) if data.ndim > 2: shape = str(data.shape) np.savetxt("{0}.{1}.dat.gz".format(fileToWrite, shape), data.flatten()) else: np.savetxt(fileToWrite + ".dat.gz", data)
[docs] def filterFiles(self, f): files = filter(f, self.files) files.sort() return files