Commit a8747aed authored by Antoine PIGEAU's avatar Antoine PIGEAU
Browse files

work on NN code for scriptclassifier.

parent cc030e2a
......@@ -20,6 +20,8 @@ This file is part of Hubble-UserProfile.
@author: Antoine Pigeau
'''
from collections import namedtuple
class Constant:
AdaBoost = "AdaBoost"
......@@ -42,4 +44,21 @@ class Constant:
FORWARD = 'forward'
BACKWARD = 'backward'
\ No newline at end of file
BACKWARD = 'backward'
ResultSingleIteration = namedtuple('ResultSingleIteration', ['accuracy', 'auc', 'confusionMatrix', 'dictWeight', 'epoch'])
''' Before Python 3.7 '''
ResultSingleIteration.__new__.__defaults__ = (None,) * len(ResultSingleIteration._fields)
''' Python 3.7 '''
# fields = ['accuracy', 'auc', 'confusionMatrix', 'dictWeight']
# ResultSingleIteration = namedtuple('Node', fields, defaults=(None,) * len(fields))
ResultCourse = namedtuple('ResultCourse', ['accuracies', 'aucs', 'avgConfusionMatrix', 'dictWeight', 'epochs'])
ResultCourse.__new__.__defaults__ = (None,) * len(ResultCourse._fields)
ResultAllCourses = namedtuple('ResultAllCourses', ['avgAccuracy', 'stdAccurracy', 'avgAuc',
'stdAuc', 'avgAccuraccyPerClass', 'dictAllCourses'])
ResultAllCourses.__new__.__defaults__ = (None,) * len(ResultAllCourses._fields)
\ No newline at end of file
......@@ -39,8 +39,10 @@ from sklearn.metrics import roc_curve
from sklearn.metrics import auc
from featureManager.normalize import Normalize
from exportManager import exportLatex
from featureManager.featureMultiGroup import FeatureMultiGroup
from exportManager import exportLatex
from classifierManager.constant import Constant
......@@ -185,7 +187,7 @@ class ScriptClassifier:
'''
ResultSingleIteration = namedtuple('ResultSingleIteration', ['accuracy', 'auc', 'confusionMatrix', 'dictWeight'])
#ResultSingleIteration = namedtuple('ResultSingleIteration', ['accuracy', 'auc', 'confusionMatrix', 'dictWeight'])
featureMultiGroup = self.classifier.getFeatureMultiGroup()
......@@ -221,7 +223,7 @@ class ScriptClassifier:
assert confusionMatrix.shape == (2,2)
return ResultSingleIteration(accuracy, auc, confusionMatrix, dictWeight) #(accuracy, auc, confusionMatrix, resultWeight)
return Constant.ResultSingleIteration(accuracy, auc, confusionMatrix, dictWeight) #(accuracy, auc, confusionMatrix, resultWeight)
def predictionTaskNTimes(self,
......@@ -243,7 +245,7 @@ class ScriptClassifier:
print("---------------------------------- Call predictionTaskNTimes of ScriptClassifier")
ResultCourse = namedtuple('ResultCourse', ['accuracies', 'aucs', 'avgConfusionMatrix', 'dictWeight'])
#ResultCourse = namedtuple('ResultCourse', ['accuracies', 'aucs', 'avgConfusionMatrix', 'dictWeight'])
resultCourse = None
accuracies = []
......@@ -310,7 +312,7 @@ class ScriptClassifier:
s = np.sum(avgConfusionMatrix, axis=0)
avgConfusionMatrix = avgConfusionMatrix / s
resultCourse = ResultCourse(accuracies, aucScores, avgConfusionMatrix, dictWeight)
resultCourse = Constant.ResultCourse(accuracies, aucScores, avgConfusionMatrix, dictWeight)
with open(fileNameSavedResult, "wb") as fileResult:
#pickle.dump((accuracies, aucScores, confusionMatrice, dictResultWeight), fileResult)
......@@ -340,8 +342,8 @@ class ScriptClassifier:
'''
ResultAllCourses = namedtuple('ResultAllCourses', ['avgAccuracy', 'stdAccurracy', 'avgAuc',
'stdAuc', 'avgAccuraccyPerClass', 'dictAllCourses'])
#ResultAllCourses = namedtuple('ResultAllCourses', ['avgAccuracy', 'stdAccurracy', 'avgAuc',
# 'stdAuc', 'avgAccuraccyPerClass', 'dictAllCourses'])
fileNameResult = os.path.join(self.directoryExperiment,
self.fileName+
......@@ -352,7 +354,7 @@ class ScriptClassifier:
size = len(self.classifier.nameGroups)
avgConfusionMatrix = np.zeros((size, size))
accuracies = []
aucScores = []
aucs = []
#accuracysPerClass = []
dictWeightAllCourses = {}
dictAllCourses = {}
......@@ -373,10 +375,10 @@ class ScriptClassifier:
dictAllCourses[course.getCourseId()] = resultCourse
accuracy = np.nanmean(resultCourse.accuracies)
aucScore = np.nanmean(resultCourse.aucs)
auc = np.nanmean(resultCourse.aucs)
accuracies.append(accuracy)
aucScores.append(aucScore)
aucs.append(auc)
#accuracysPerClass.append(resultCourse.avgConfusionMatrix.diagonal())
avgConfusionMatrix = avgConfusionMatrix + resultCourse.avgConfusionMatrix
......@@ -389,7 +391,7 @@ class ScriptClassifier:
fileResult.write("Course "+courseName+": accuracy("+str(accuracy)+") \n")
fileResult.write("Accuracy: "+str(accuracy)+" \n")
fileResult.write("RocScore: "+str(aucScore)+" \n")
fileResult.write("RocScore: "+str(auc)+" \n")
fileResult.write("confusion matrix\n")
fileResult.write(str(resultCourse.avgConfusionMatrix))
......@@ -403,17 +405,17 @@ class ScriptClassifier:
scoreFinal = np.average(accuracies)
accuracyFinal = np.average(accuracies)
stdAccuracy = np.std(accuracies)
aucScoreFinal = np.nanmean(aucScores)
stdAuc = np.std(aucScores)
aucFinal = np.nanmean(aucs)
stdAuc = np.std(aucs)
avgConfusionMatrix = avgConfusionMatrix / float(ntime)
s = np.sum(avgConfusionMatrix, axis=0)
avgConfusionMatrix = avgConfusionMatrix / s
fileResult.write("\n final Accuracy("+str(scoreFinal)+")")
fileResult.write("\n final AUC ("+str(aucScoreFinal)+") \n")
fileResult.write("\n final Accuracy("+str(accuracyFinal)+")")
fileResult.write("\n final AUC ("+str(aucFinal)+") \n")
if dictWeightAllCourses:
......@@ -437,9 +439,12 @@ class ScriptClassifier:
fileResult.write(str(resultWeigth))
# 'avgAccuracy', 'stdAccurracy', 'avgAuc', 'stdAuc', 'accuracyPerClass', 'avgAccuraccyPerClass', 'dictAllCourses'
resultAllCourses = ResultAllCourses(scoreFinal, stdAccuracy,
aucScoreFinal, stdAuc,
avgConfusionMatrix.diagonal(), dictAllCourses)
resultAllCourses = Constant.ResultAllCourses(accuracyFinal,
stdAccuracy,
aucFinal,
stdAuc,
avgConfusionMatrix.diagonal(),
dictAllCourses)
return resultAllCourses
......
......@@ -23,6 +23,7 @@ This file is part of Hubble-UserProfile.
import numpy as np
import os
import pickle
import cloudpickle
import hashlib
from sklearn.metrics import confusion_matrix
......@@ -38,6 +39,8 @@ from classifierManager.script.scriptClassifier import ScriptClassifier
from featureManager.ocr.featureGroup import FeatureGroup
from featureManager.normalize import Normalize
from classifierManager.constant import Constant
class ScriptNeuralNetwork(ScriptClassifier):
......@@ -187,7 +190,7 @@ class ScriptNeuralNetwork(ScriptClassifier):
assert confusionMatrix.shape == (2,2)
return accuracy, confusionMatrix, model.getEarlyStoppingEpoch(), auc
return Constant.ResultSingleIteration(accuracy, auc, confusionMatrix, None, model.getEarlyStoppingEpoch())
def predictionTaskNTimes(self,
course,
......@@ -203,12 +206,12 @@ class ScriptNeuralNetwork(ScriptClassifier):
print("--------------------- Call predictionTaskNTimes of ScriptNeuralNetwork")
accuracys = []
nbEpochs = []
accuracies = []
epochs = []
aucScores = []
size = len(self.classifier.nameGroups)
confusionMatrice = np.zeros((size, size))
avgConfusionMatrix = np.zeros((size, size))
name = str(self.classifier.features).encode('utf-8')
hashFeature = "["+hashlib.sha1(name).hexdigest()+"]"
......@@ -243,40 +246,49 @@ class ScriptNeuralNetwork(ScriptClassifier):
if os.path.exists(fileNameSavedResult) and cache:
with open(fileNameSavedResult, "rb") as fileResult:
(accuracy, confusionMatrice, nbEpoch, auc) = pickle.load(fileResult)
resultCourse = cloudpickle.load(fileResult)
print(str(fileNameSavedResult)+" is already done")
return (accuracy, confusionMatrice, nbEpoch, auc)
return resultCourse
print(str(fileNameSavedResult)+" is not already done")
for _ in range(0, ntime):
(accuracy, confusionMatrix, nbEpoch, auc) = self.predictionTask(course, whereToCut)
#(accuracy, confusionMatrix, nbEpoch, auc)
result = self.predictionTask(course, whereToCut, cache=cache)
if auc is None:
raise ValueError("classifierManager.script.ScriptNeuralNetwork - predictionTaskNTimes : auc is None")
accuracys.append(accuracy)
nbEpochs.append(nbEpoch)
aucScores.append(auc)
accuracies.append(result.accuracy)
epochs.append(result.epoch)
aucScores.append(result.auc)
confusionMatrice = confusionMatrice + confusionMatrix
avgConfusionMatrix = avgConfusionMatrix + result.confusionMatrix
confusionMatrice = confusionMatrice / float(ntime)
s = np.sum(confusionMatrice, axis=0)
confusionMatrice = confusionMatrice / s
# confusionMatrice = confusionMatrice / float(ntime)
# s = np.sum(confusionMatrice, axis=0)
# confusionMatrice = confusionMatrice / s
avgConfusionMatrix = avgConfusionMatrix / float(ntime)
s = np.sum(avgConfusionMatrix, axis=0)
avgConfusionMatrix = avgConfusionMatrix / s
resultCourse = Constant.ResultCourse(accuracies, aucScores, avgConfusionMatrix, None, epochs)
if cache:
with open(fileNameSavedResult, "wb") as fileResult:
pickle.dump((np.average(accuracys), confusionMatrice, np.average(nbEpochs), np.nanmean(aucScores)), fileResult)
with open(fileNameSavedResult, "wb") as fileResult:
cloudpickle.dump(resultCourse, fileResult)
return (np.average(accuracys), confusionMatrice, np.average(nbEpochs), np.nanmean(aucScores))
#return (np.average(accuracys), confusionMatrice, np.average(nbEpochs), np.nanmean(aucScores))
return resultCourse
# TODO: Run completely this methods and fully and ensure it is fully debugged.
def predictionTaskForAllCourses(self,
whereToCut,
ntime = 10):
ntime = 10,
cache=True):
'''
Launch prediction task n-times function for all the courses
......@@ -296,29 +308,42 @@ class ScriptNeuralNetwork(ScriptClassifier):
"NTimes="+str(ntime)+
'.txt')
accuracies = []
nbEpochs = []
aucScores = []
aucs = []
epochs = []
avgConfusionMatrix = np.zeros((size, size))
accuraciesPerClass = []
dictAllCourses = {}
with open(fileNameResult, 'w') as fileResult:
for course in self.classifier.getCourses():
(accuracy, confusionMatrix, nbEpoch, auc) = self.predictionTaskNTimes(course,
whereToCut = whereToCut,
ntime=ntime)
#(accuracy, confusionMatrix, nbEpoch, auc)
resultCourse = self.predictionTaskNTimes(course,
whereToCut = whereToCut,
ntime=ntime,
cache=cache)
dictAllCourses[course.getCourseId()] = resultCourse
accuracy = np.nanmean(resultCourse.accuracies)
auc = np.nanmean(resultCourse.aucs)
epoch = np.nanmean(resultCourse.epochs)
accuracies.append(accuracy)
nbEpochs.append(nbEpoch)
aucScores.append(auc)
accuraciesPerClass.append(confusionMatrix.diagonal())
aucs.append(auc)
epochs.append(epoch)
avgConfusionMatrix = avgConfusionMatrix + resultCourse.avgConfusionMatrix
#accuraciesPerClass.append(confusionMatrix.diagonal())
courseName = course.getName()
fileResult.write("--------------------------\n")
fileResult.write("CourseOcr "+courseName+": accuracy("+str(accuracy)+") \n")
fileResult.write("confusion matrix\n")
fileResult.write(str(confusionMatrix))
fileResult.write(str(avgConfusionMatrix))
fileResult.write("\n auc\n")
fileResult.write(str(auc))
......@@ -335,13 +360,25 @@ class ScriptNeuralNetwork(ScriptClassifier):
#accuraciesPerCourseOcr.append(np.array([1.0, 0.59, 0.78]))
#accuracies.append(1.0)
scoreFinal = np.average(accuracies)
rocScoreFinal = np.nanmean(aucScores)
accuracyFinal = np.average(accuracies)
stdAccuracy = np.std(accuracies)
aucFinal = np.nanmean(aucs)
stdAuc = np.std(aucs)
fileResult.write("\n final accuracy("+str(scoreFinal)+")")
fileResult.write("\n final area under curve roc("+str(rocScoreFinal)+") \n")
avgConfusionMatrix = avgConfusionMatrix / float(ntime)
s = np.sum(avgConfusionMatrix, axis=0)
avgConfusionMatrix = avgConfusionMatrix / s
fileResult.write("\n final accuracy("+str(accuracyFinal)+")")
fileResult.write("\n final area under curve roc("+str(aucFinal)+") \n")
resultAllCourses = Constant.ResultAllCourses(accuracyFinal, stdAccuracy,
aucFinal, stdAuc,
avgConfusionMatrix.diagonal(), dictAllCourses)
return (accuracies, accuraciesPerClass, aucScores)
#return (accuracies, accuraciesPerClass, aucScores)
return resultAllCourses
def predictionForMergedCourse(self, mergedCourse, whereToCut, ntime=10):
......
......@@ -58,7 +58,7 @@ if __name__ == "__main__":
classifier.setScriptClassifier(ConstantClassifier.LogisticRegression)
classifier = Classifier.getClassifierAdaBoost()
classifier = Classifier.getClassifierPerceptron()
'''
classification task process
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment