Commit 46358aaa authored by Antoine PIGEAU's avatar Antoine PIGEAU
Browse files

Merge comparison : done for LR

parent af7ebae6
......@@ -179,10 +179,10 @@ class Classifier:
return self.scriptClassifier.predictionTaskForAllCourses(whereToCut=whereToCut, ntime=ntime, cache=cache)
def predictionTaskForAllPeriods(self, ntime = 10, cache=True):
self.scriptClassifier.predictionTaskForAllPeriods(ntime, cache=cache)
return self.scriptClassifier.predictionTaskForAllPeriods(ntime, cache=cache)
def predictionForMergedCourse(self, mergedCourse, whereToCut, ntime=10):
return self.scriptClassifier.predictionForMergedCourse(mergedCourse, whereToCut=whereToCut, ntime=ntime)
def predictionForMergedCourse(self, mergedCourse, whereToCut, ntime=10, cache=True):
return self.scriptClassifier.predictionForMergedCourse(mergedCourse, whereToCut=whereToCut, ntime=ntime, cache=cache)
def predictionTaskForMergedForAllPeriods(self, mergedCourse, ntime = 10):
return self.scriptClassifier.predictionTaskForMergedCourseForAllPeriods(mergedCourse, ntime)
def predictionTaskForMergedForAllPeriods(self, mergedCourse, ntime = 10, cache=True):
return self.scriptClassifier.predictionTaskForMergedCourseForAllPeriods(mergedCourse, ntime, cache=cache)
......@@ -355,7 +355,7 @@ class ScriptClassifier:
aucScores = []
#accuracysPerClass = []
dictWeightAllCourses = {}
dictResultAllCourses = {}
dictAllCourses = {}
with open(fileNameResult, 'w') as fileResult:
......@@ -370,7 +370,7 @@ class ScriptClassifier:
ntime=ntime,
cache=cache)
dictResultAllCourses[course.getCourseId()] = resultCourse
dictAllCourses[course.getCourseId()] = resultCourse
accuracy = np.nanmean(resultCourse.accuracies)
aucScore = np.nanmean(resultCourse.aucs)
......@@ -439,7 +439,7 @@ class ScriptClassifier:
# 'avgAccuracy', 'stdAccurracy', 'avgAuc', 'stdAuc', 'accuracyPerClass', 'avgAccuraccyPerClass', 'dictAllCourses'
resultAllCourses = ResultAllCourses(scoreFinal, stdAccuracy,
aucScoreFinal, stdAuc,
avgConfusionMatrix.diagonal(), dictResultAllCourses)
avgConfusionMatrix.diagonal(), dictAllCourses)
return resultAllCourses
......@@ -511,7 +511,9 @@ class ScriptClassifier:
exportLatex.exportResultAccuracy(fileNameResultAccuracy, dictResult, self.classifier.getCourses())
exportLatex.exportResultAUC(fileNameResultAUC, dictResult, self.classifier.getCourses())
return dictResult
# def exportLastResult(self, experimentDirectory):
#
# fileNameSavedResult = os.path.join(experimentDirectory, "launchTestForAllCoursesAlgoPeriod.p")
......@@ -553,7 +555,7 @@ class ScriptClassifier:
size = len(self.classifier.nameGroups)
avgConfusionMatrix = np.zeros((size, size))
dictResultAllCourses = {}
dictAllCourses = {}
fileNameResult = os.path.join(self.directoryExperiment,
self.fileName+"MergedCourse"+
......@@ -582,9 +584,9 @@ class ScriptClassifier:
resultCourse = self.predictionTaskNTimes(mergedCourse,
whereToCut=whereToCut,
ntime=ntime,
catche=cache)
cache=cache)
dictResultAllCourses[course.getCourseId()] = resultCourse
dictAllCourses[course.getCourseId()] = resultCourse
accuracy = np.nanmean(resultCourse.accuracies)
auc = np.nanmean(resultCourse.aucs)
......@@ -655,11 +657,11 @@ class ScriptClassifier:
resultAllCourses = ResultAllCourses(scoreFinal, stdAccuracy,
aucScoreFinal, stdAuc,
avgConfusionMatrix.diagonal(), dictResultAllCourses)
avgConfusionMatrix.diagonal(), dictAllCourses)
return resultAllCourses #(accuracies, accuracysPerClass, aucScores)
def predictionTaskForMergedCourseForAllPeriods(self, mergedCourse, ntime = 10):
def predictionTaskForMergedCourseForAllPeriods(self, mergedCourse, ntime = 10, cache=True):
'''
Do the prediction task on all the periods on all the courses (call the previous function).
......@@ -675,10 +677,10 @@ class ScriptClassifier:
for t in whereToCuts:
#accuracies, accuracysPerClass, rocScores = self.predictionForMergedCourse(mergedCourse, t, ntime)
resultAllCourses = self.predictionForMergedCourse(mergedCourse, t, ntime)
resultAllCourses = self.predictionForMergedCourse(mergedCourse, t, ntime, cache=cache)
dictResult[t] = resultAllCourses
courses = mergedCourse.getCourses()
# courses = mergedCourse.getCourses()
# for i, course in enumerate(courses): #
#
......
......@@ -72,32 +72,31 @@ if __name__ == "__main__":
''' classification one time '''
groups = [ConstantModel.GROUP_SUCCEED_ALL_USERS, ConstantModel.GROUP_FAILED_ALL_USERS]
whereToCut=25
classifier.testCourseId = ConstantModel.COURSE_ID_GESTION_PROJET
classifier.scriptClassifier.setFeatures(dictCourses[classifier.testCourseId], whereToCut)
#(accuracy, confusionMatrix, weightOrEpoch, rocScore) = classifier.predictionTaskNTimes(mergedCourse, whereToCut=25, cache = False)
resultCourse = classifier.predictionTaskNTimes(mergedCourse, whereToCut=whereToCut, cache = False)
#(accuracy, confusionMatrix, weightOrEpoch, rocScore) = classifier.predictionTask(mergedCourse, whereToCut=25)
print("accuracy="+str(np.nanmean(resultCourse.accuracies)))
#print(confusionMatrix)
#print("weightOrEpoch="+str(weightOrEpoch))
print("auc="+str(np.nanmean(resultCourse.aucs)))
# groups = [ConstantModel.GROUP_SUCCEED_ALL_USERS, ConstantModel.GROUP_FAILED_ALL_USERS]
# whereToCut=25
#
# classifier.testCourseId = ConstantModel.COURSE_ID_GESTION_PROJET
# classifier.scriptClassifier.setFeatures(dictCourses[classifier.testCourseId], whereToCut)
#
# #(accuracy, confusionMatrix, weightOrEpoch, rocScore) = classifier.predictionTaskNTimes(mergedCourse, whereToCut=25, cache = False)
# resultCourse = classifier.predictionTaskNTimes(mergedCourse, whereToCut=whereToCut, cache = False)
# #(accuracy, confusionMatrix, weightOrEpoch, rocScore) = classifier.predictionTask(mergedCourse, whereToCut=25)
# print("accuracy="+str(np.nanmean(resultCourse.accuracies)))
# #print(confusionMatrix)
# #print("weightOrEpoch="+str(weightOrEpoch))
# print("auc="+str(np.nanmean(resultCourse.aucs)))
''' classification n-times for a specific period '''
# resultAllCourses = classifier.predictionForMergedCourse(mergedCourse, whereToCut=25, ntime=10)
# resultAllCourses = classifier.predictionForMergedCourse(mergedCourse, whereToCut=25, ntime=10, cache=True)
# print("Accuracies:", resultAllCourses.avgAccuracy)
#print("accuracysPerClass:,", accuracysPerClass)
#print("auc Scores:", aucScores)
# print("auc Scores:", resultAllCourses.avgAuc)
''' classification for all periods '''
# dictResult = classifier.predictionTaskForMergedForAllPeriods(mergedCourse, ntime=10)
dictResult = classifier.predictionTaskForMergedForAllPeriods(mergedCourse, ntime=10, cache=False)
#print("result=", dictResult)
print("result=", dictResult)
'''
Best features for LR
......
# -*- coding: UTF-8 -*-
'''
Created on 29-Apr-2014
This file is part of Hubble-UserProfile.
Hubble-UserProfile is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Hubble-UserProfile is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with Hubble-UserProfile. If not, see <http://www.gnu.org/licenses/>.
@author: Antoine Pigeau
'''
import numpy as np
import scipy.stats
from datetime import datetime
from classifierManager.ocr.classifier import Classifier
from classifierManager.constant import Constant as ConstantClassifier
from featureManager.ocr.constant import Constant as ConstantFeature
from model.ocr.constant import Constant as ConstantModel
from model.ocr.course import Course
from model.mergedCourse import MergedCourse
if __name__ == "__main__":
print("start of the process boosting")
startTime = datetime.now()
classifierMerged = Classifier(idCourses = ConstantModel.ID_COURSES,
directory = Classifier.DIRECTORY_EXPERIMENT_MERGED,
nameGroups = [ConstantModel.GROUP_SUCCEED_ALL_USERS, ConstantModel.GROUP_FAILED_ALL_USERS],
features = None,
typeFeatureGroup = ConstantFeature.TYPE_FEATURE_GROUP,
hiddenLayers = None,
featuresByPeriod = ConstantFeature.IMPLEMENTED_FEATURES_FORWARD_BEST_MERGED_LR,
featuresByCourse= None )#ConstantFeature.IMPLEMENTED_FEATURES_FORWARD_BEST_MERGED_LR)
classifierMerged.setScriptClassifier(ConstantClassifier.LogisticRegression)
'''
Creation of the merged course with all the courses
'''
courses = []
dictCourses = {}
for idCourse in ConstantModel.ID_COURSES:
course = Course(idCourse, granularity=ConstantModel.CHAPTER)
courses.append(course)
dictCourses[idCourse] = course
mergedCourse = MergedCourse(courses)
'''
Prediction task for all courses on each period
'''
''' classification for all periods '''
dictResultMerged = classifierMerged.predictionTaskForMergedForAllPeriods(mergedCourse, ntime=10, cache=True)
classifier = Classifier(idCourses = ConstantModel.ID_COURSES,
nameGroups = [ConstantModel.GROUP_SUCCEED_ALL_USERS, ConstantModel.GROUP_FAILED_ALL_USERS],
hiddenLayers = None,
typeFeatureGroup = ConstantFeature.TYPE_FEATURE_GROUP,
features = None,
featuresByCourse = ConstantFeature.BEST_LR_FEATURE_ALL_COURSES,
featuresByPeriod = None)
classifier.setScriptClassifier(ConstantClassifier.LogisticRegression)
dictResult = classifier.predictionTaskForAllPeriods(ntime=10, cache=True)
pvalues = {}
for t in [25, 50, 75, 100]:
pvalues[t] = {}
for idCourse in ConstantModel.ID_COURSES:
print('dictResultMerged[t]: ', dictResultMerged[t])
resultAllCoursesMerged = dictResultMerged[t].dictAllCourses
resultsAllCourses = dictResult[t].dictAllCourses
aucsMerged = resultAllCoursesMerged[idCourse].aucs
aucs = resultsAllCourses[idCourse].aucs
pvalue = scipy.stats.ttest_ind(aucsMerged, aucs, equal_var=False)
avgAucs = np.nanmean(aucs)
avgAucsMerged = np.nanmean(aucsMerged)
pvalues[t][idCourse] = (pvalue.pvalue, avgAucsMerged > avgAucs)
#print('aucsMerged: ', aucsMerged)
#print('aucs: ', aucs)
for t in [25, 50, 75, 100]:
print("---- traces cut at ", t)
coursesImproved = []
for idCourse in ConstantModel.ID_COURSES:
(pvalue, isBetter) = pvalues[t][idCourse]
print(idCourse, " : ", pvalue)
if(pvalue < 0.05 and isBetter):
coursesImproved.append(idCourse)
print("Course Improved with transfert learning: " , coursesImproved)
stopTime = datetime.now()
print(("end of the classification task ("+str(stopTime-startTime)+")"))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment