Commit cdfc188d authored by Antoine PIGEAU's avatar Antoine PIGEAU
Browse files

Continue NamedTuple update : allCourse is done. All period to do.

parent b2e42931
......@@ -169,14 +169,14 @@ class Classifier:
ntime,
cache)
def predictionTaskForAllCourses(self, whereToCut=100, ntime = 10):
def predictionTaskForAllCourses(self, whereToCut=100, ntime = 10, cache=True):
courses = []
for idCourse in self.idCourses:
courses.append(self.getCourse(idCourse))
return self.scriptClassifier.predictionTaskForAllCourses(whereToCut, ntime)
return self.scriptClassifier.predictionTaskForAllCourses(whereToCut=whereToCut, ntime=ntime, cache=cache)
def predictionTaskForAllPeriods(self, ntime = 10):
self.scriptClassifier.predictionTaskForAllPeriods(ntime)
......
......@@ -314,7 +314,7 @@ class ScriptClassifier:
#pickle.dump((accuracies, aucScores, confusionMatrice, dictResultWeight), fileResult)
pickle.dump(tuple(resultCourse), fileResult)
return resultCourse #(accuracies, aucScores, confusionMatrice, dictResultWeight)
return resultCourse #'accuracies', 'aucs', 'avgConfusionMatrix', 'dictWeight'
def setFeatures(self, course=None, whereToCut=None):
......@@ -336,18 +336,23 @@ class ScriptClassifier:
roc score
'''
ResultAllCourses = namedtuple('ResultAllCourses', ['avgAccuracy', 'stdAccurracy', 'avgAuc', 'stdAuc', 'accuracyPerClass', 'avgConfusionMatrix', 'dictAllCourses'])
ResultAllCourses = namedtuple('ResultAllCourses', ['avgAccuracy', 'stdAccurracy', 'avgAuc',
'stdAuc', 'avgAccuraccyPerClass', 'dictAllCourses'])
fileNameResult = os.path.join(self.directoryExperiment,
self.fileName+
"WhereToCut"+str(whereToCut)+
"NTimes="+str(ntime)+
'.txt')
size = len(self.classifier.nameGroups)
avgConfusionMatrix = np.zeros((size, size))
accuracies = []
aucScores = []
accuracysPerClass = []
#accuracysPerClass = []
dictWeightAllCourses = {}
dictResults = {}
dictResultAllCourses = {}
with open(fileNameResult, 'w') as fileResult:
......@@ -362,7 +367,7 @@ class ScriptClassifier:
ntime=ntime,
cache=cache)
dictResults[course.getCourseId()] = resultCourse
dictResultAllCourses[course.getCourseId()] = resultCourse
accuracy = np.nanmean(resultCourse.accuracies)
aucScore = np.nanmean(resultCourse.aucs)
......@@ -370,7 +375,8 @@ class ScriptClassifier:
accuracies.append(accuracy)
aucScores.append(aucScore)
accuracysPerClass.append(resultCourse.avgConfusionMatrix.diagonal())
#accuracysPerClass.append(resultCourse.avgConfusionMatrix.diagonal())
avgConfusionMatrix = avgConfusionMatrix + resultCourse.avgConfusionMatrix
if resultCourse.dictWeight:
dictWeightAllCourses[course.getCourseId()] = resultCourse.dictWeight
......@@ -398,6 +404,10 @@ class ScriptClassifier:
stdAccuracy = np.std(accuracies)
aucScoreFinal = np.nanmean(aucScores)
stdAuc = np.std(aucScores)
avgConfusionMatrix = avgConfusionMatrix / float(ntime)
s = np.sum(avgConfusionMatrix, axis=0)
avgConfusionMatrix = avgConfusionMatrix / s
fileResult.write("\n final Accuracy("+str(scoreFinal)+")")
fileResult.write("\n final AUC ("+str(aucScoreFinal)+") \n")
......@@ -423,9 +433,12 @@ class ScriptClassifier:
resultWeigth = sorted(resultWeight.items(), key=operator.itemgetter(1), reverse=True)
fileResult.write(str(resultWeigth))
# 'avgAccuracy', 'stdAccurracy', 'avgAuc', 'stdAuc', 'accuracyPerClass', 'avgConfusionMatrix', 'dictAllCourses'
resultAllCourses = ResultAllCourses(scoreFinal, stdAccuracy, aucScoreFinal, stdAuc, )
return (accuracies, accuracysPerClass, aucScores, stdAccuracy, stdAuc, dictResults)
# 'avgAccuracy', 'stdAccurracy', 'avgAuc', 'stdAuc', 'accuracyPerClass', 'avgAccuraccyPerClass', 'dictAllCourses'
resultAllCourses = ResultAllCourses(scoreFinal, stdAccuracy,
aucScoreFinal, stdAuc,
avgConfusionMatrix.diagonal(), dictResultAllCourses)
return resultAllCourses
def predictionTaskForAllPeriods(self, ntime = 10):
......
......@@ -80,28 +80,31 @@ if __name__ == "__main__":
''' classification N times '''
classifier.features = ConstantFeature.BEST_LR_FEATURE_ALL_COURSES[course.getCourseId()][25]
result = classifier.predictionTaskNTimes(course,
whereToCut=25,
ntime=10,
cache = False)
print("accuracy: ", np.nanmean(result.accuracies))
print("std accuracy: ", np.std(result.accuracies))
print("confusion Matrix:\n", result.avgConfusionMatrix)
print("AUC score: ", np.nanmean(result.aucs))
print("std AUC:", np.std(result.aucs))
print("dictWeight:", result.dictWeight)
# classifier.features = ConstantFeature.BEST_LR_FEATURE_ALL_COURSES[course.getCourseId()][25]
#
# result = classifier.predictionTaskNTimes(course,
# whereToCut=25,
# ntime=10,
# cache = False)
#
# print("accuracy: ", np.nanmean(result.accuracies))
# print("std accuracy: ", np.std(result.accuracies))
# print("confusion Matrix:\n", result.avgConfusionMatrix)
# print("AUC score: ", np.nanmean(result.aucs))
# print("std AUC:", np.std(result.aucs))
# print("dictWeight:", result.dictWeight)
''' for all courses '''
# (accuracies, accuracysPerClass, aucScores, stdAccuracy, stdAuc) = classifier.predictionTaskForAllCourses(whereToCut=100, ntime=10)
# print("accuracy: ", accuracies)
# print("std accuracy: ", stdAccuracy)
# print("accuracy per class:\n", accuracysPerClass)
# print("AUC scores: ", aucScores)
# print("std AUC: ", stdAuc)
resultAllCourses = classifier.predictionTaskForAllCourses(whereToCut=100, ntime=10, cache=True)
# ResultAllCourses = namedtuple('ResultAllCourses', ['avgAccuracy', 'stdAccurracy', 'avgAuc', 'stdAuc', 'avgConfusionMatrix', 'dictAllCourses'])
print("accuracy: ", resultAllCourses.avgAccuracy)
print("std accuracy: ", resultAllCourses.stdAccurracy)
print("accuracy per class:\n", resultAllCourses.avgAccuraccyPerClass)
print("AUC scores: ", resultAllCourses.avgAuc)
print("std AUC: ", resultAllCourses.stdAuc)
''' for all periods '''
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment