u4 update
This commit is contained in:
parent
f80de8e68d
commit
80e36cf86e
@ -7,26 +7,44 @@ import random
|
||||
|
||||
|
||||
# config variables
|
||||
actualDir = os.path.dirname(os.path.realpath(__file__))
|
||||
dataDir = os.path.join(actualDir, '../data')
|
||||
trainDir = os.path.join(dataDir, 'u4_train')
|
||||
testDir = os.path.join(dataDir, 'u4_test')
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
'''
|
||||
################################################################################################################################
|
||||
--> CLASS Trainingsset <--
|
||||
################################################################################################################################
|
||||
'''
|
||||
|
||||
class trainingsset:
|
||||
actualDir = os.path.dirname(os.path.realpath(__file__))
|
||||
dataDir = os.path.join(actualDir, '../data')
|
||||
trainDir = os.path.join(dataDir, 'u4_train')
|
||||
|
||||
#def __init__(self):
|
||||
|
||||
classes = ['adventure', 'belles_lettres', 'editorial', 'fiction', 'government', 'hobbies', 'learned', 'lore','mystery','news','romantic']
|
||||
|
||||
def createTrainingsset(self):
|
||||
self.splitTrainingsdataRandomly(self) # first split our data into trainings- and testdata
|
||||
|
||||
|
||||
def getClassesToTrain(self):
|
||||
for dirpath, dirnames, filenames in os.walk(trainDir):
|
||||
# TODO: implement
|
||||
pass
|
||||
|
||||
|
||||
|
||||
# copies files randomly to new directories. Each directory will contain fileCount / 2 numbers of files
|
||||
# If fileCount is uneven /trainingsdata will contain one file more than /testdata
|
||||
def splitTrainingsdataRandomly(self):
|
||||
for dirpath, dirnames, filenames in os.walk(self.trainDir, topdown=False):
|
||||
for dirpath, dirnames, filenames in os.walk(trainDir, topdown=False):
|
||||
newTrainDir = dirpath+'/trainingsdata'
|
||||
newTestDir = dirpath+'/testdata'
|
||||
fileCount = len(filenames)
|
||||
|
||||
if(fileCount > 0):
|
||||
if(fileCount > 0):
|
||||
#remove old dirs if they already exist
|
||||
if os.path.isdir(newTrainDir):
|
||||
shutil.rmtree(newTrainDir)
|
||||
@ -39,8 +57,6 @@ class trainingsset:
|
||||
numberOfFilesInTraining = 0
|
||||
numberOfFilesInTest = 0
|
||||
|
||||
# copies files randomly to new directories. Each directory will contain fileCount / 2 numbers of files
|
||||
# If fileCount is uneven /trainingsdata will contain one file more than /testdata
|
||||
for actualFile in filenames:
|
||||
fileCopied = False
|
||||
|
||||
@ -57,7 +73,57 @@ class trainingsset:
|
||||
shutil.copy(dirpath+'/'+actualFile, dirpath+'/testdata/'+actualFile)
|
||||
|
||||
|
||||
|
||||
|
||||
'''
|
||||
################################################################################################################################
|
||||
--> CLASS MulticlassClassifier <--
|
||||
################################################################################################################################
|
||||
'''
|
||||
class multiclassClassifier:
|
||||
|
||||
filesToPrediction = {};
|
||||
testFiles = {}
|
||||
|
||||
|
||||
def writePredictionFile(self):
|
||||
with open(actualDir+'/../G22_predictions.txt', 'w') as f:
|
||||
for k in sorted(self.filesToPrediction.iterkeys()):
|
||||
f.write(str(k)+'\t'+str(self.filesToPrediction[k])+'\n')
|
||||
f.closed
|
||||
return
|
||||
|
||||
# reads all testData from /u4_train to the list
|
||||
def getTestData(self):
|
||||
listing = os.listdir(testDir)
|
||||
for infile in listing:
|
||||
f = open(testDir+'/'+infile, 'r')
|
||||
self.testFiles[infile] = f.readline()
|
||||
f.close()
|
||||
|
||||
|
||||
def bayes(self, listOfFiles):
|
||||
#TODO : implement
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
||||
'''
|
||||
################################################################################################################################
|
||||
--> Main method <--
|
||||
################################################################################################################################
|
||||
'''
|
||||
|
||||
# main method
|
||||
if __name__ == '__main__':
|
||||
bla = trainingsset()
|
||||
bla.splitTrainingsdataRandomly();
|
||||
ts = trainingsset()
|
||||
#ts.splitTrainingsdataRandomly();
|
||||
ts.getClassesToTrain();
|
||||
|
||||
mc = multiclassClassifier()
|
||||
#mc.getTestData()
|
||||
#mc.writePredictionFile()
|
||||
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user