Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Antoine PIGEAU
2015-Hubble-UserProfiles
Commits
46358aaa
Commit
46358aaa
authored
Nov 29, 2021
by
Antoine PIGEAU
Browse files
Merge comparison : done for LR
parent
af7ebae6
Changes
4
Hide whitespace changes
Inline
Side-by-side
source/classifierManager/classifier.py
View file @
46358aaa
...
...
@@ -179,10 +179,10 @@ class Classifier:
return
self
.
scriptClassifier
.
predictionTaskForAllCourses
(
whereToCut
=
whereToCut
,
ntime
=
ntime
,
cache
=
cache
)
def
predictionTaskForAllPeriods
(
self
,
ntime
=
10
,
cache
=
True
):
self
.
scriptClassifier
.
predictionTaskForAllPeriods
(
ntime
,
cache
=
cache
)
return
self
.
scriptClassifier
.
predictionTaskForAllPeriods
(
ntime
,
cache
=
cache
)
def
predictionForMergedCourse
(
self
,
mergedCourse
,
whereToCut
,
ntime
=
10
):
return
self
.
scriptClassifier
.
predictionForMergedCourse
(
mergedCourse
,
whereToCut
=
whereToCut
,
ntime
=
ntime
)
def
predictionForMergedCourse
(
self
,
mergedCourse
,
whereToCut
,
ntime
=
10
,
cache
=
True
):
return
self
.
scriptClassifier
.
predictionForMergedCourse
(
mergedCourse
,
whereToCut
=
whereToCut
,
ntime
=
ntime
,
cache
=
cache
)
def
predictionTaskForMergedForAllPeriods
(
self
,
mergedCourse
,
ntime
=
10
):
return
self
.
scriptClassifier
.
predictionTaskForMergedCourseForAllPeriods
(
mergedCourse
,
ntime
)
def
predictionTaskForMergedForAllPeriods
(
self
,
mergedCourse
,
ntime
=
10
,
cache
=
True
):
return
self
.
scriptClassifier
.
predictionTaskForMergedCourseForAllPeriods
(
mergedCourse
,
ntime
,
cache
=
cache
)
source/classifierManager/script/scriptClassifier.py
View file @
46358aaa
...
...
@@ -355,7 +355,7 @@ class ScriptClassifier:
aucScores
=
[]
#accuracysPerClass = []
dictWeightAllCourses
=
{}
dict
Result
AllCourses
=
{}
dictAllCourses
=
{}
with
open
(
fileNameResult
,
'w'
)
as
fileResult
:
...
...
@@ -370,7 +370,7 @@ class ScriptClassifier:
ntime
=
ntime
,
cache
=
cache
)
dict
Result
AllCourses
[
course
.
getCourseId
()]
=
resultCourse
dictAllCourses
[
course
.
getCourseId
()]
=
resultCourse
accuracy
=
np
.
nanmean
(
resultCourse
.
accuracies
)
aucScore
=
np
.
nanmean
(
resultCourse
.
aucs
)
...
...
@@ -439,7 +439,7 @@ class ScriptClassifier:
# 'avgAccuracy', 'stdAccurracy', 'avgAuc', 'stdAuc', 'accuracyPerClass', 'avgAccuraccyPerClass', 'dictAllCourses'
resultAllCourses
=
ResultAllCourses
(
scoreFinal
,
stdAccuracy
,
aucScoreFinal
,
stdAuc
,
avgConfusionMatrix
.
diagonal
(),
dict
Result
AllCourses
)
avgConfusionMatrix
.
diagonal
(),
dictAllCourses
)
return
resultAllCourses
...
...
@@ -511,7 +511,9 @@ class ScriptClassifier:
exportLatex
.
exportResultAccuracy
(
fileNameResultAccuracy
,
dictResult
,
self
.
classifier
.
getCourses
())
exportLatex
.
exportResultAUC
(
fileNameResultAUC
,
dictResult
,
self
.
classifier
.
getCourses
())
return
dictResult
# def exportLastResult(self, experimentDirectory):
#
# fileNameSavedResult = os.path.join(experimentDirectory, "launchTestForAllCoursesAlgoPeriod.p")
...
...
@@ -553,7 +555,7 @@ class ScriptClassifier:
size
=
len
(
self
.
classifier
.
nameGroups
)
avgConfusionMatrix
=
np
.
zeros
((
size
,
size
))
dict
Result
AllCourses
=
{}
dictAllCourses
=
{}
fileNameResult
=
os
.
path
.
join
(
self
.
directoryExperiment
,
self
.
fileName
+
"MergedCourse"
+
...
...
@@ -582,9 +584,9 @@ class ScriptClassifier:
resultCourse
=
self
.
predictionTaskNTimes
(
mergedCourse
,
whereToCut
=
whereToCut
,
ntime
=
ntime
,
ca
t
che
=
cache
)
cache
=
cache
)
dict
Result
AllCourses
[
course
.
getCourseId
()]
=
resultCourse
dictAllCourses
[
course
.
getCourseId
()]
=
resultCourse
accuracy
=
np
.
nanmean
(
resultCourse
.
accuracies
)
auc
=
np
.
nanmean
(
resultCourse
.
aucs
)
...
...
@@ -655,11 +657,11 @@ class ScriptClassifier:
resultAllCourses
=
ResultAllCourses
(
scoreFinal
,
stdAccuracy
,
aucScoreFinal
,
stdAuc
,
avgConfusionMatrix
.
diagonal
(),
dict
Result
AllCourses
)
avgConfusionMatrix
.
diagonal
(),
dictAllCourses
)
return
resultAllCourses
#(accuracies, accuracysPerClass, aucScores)
def
predictionTaskForMergedCourseForAllPeriods
(
self
,
mergedCourse
,
ntime
=
10
):
def
predictionTaskForMergedCourseForAllPeriods
(
self
,
mergedCourse
,
ntime
=
10
,
cache
=
True
):
'''
Do the prediction task on all the periods on all the courses (call the previous function).
...
...
@@ -675,10 +677,10 @@ class ScriptClassifier:
for
t
in
whereToCuts
:
#accuracies, accuracysPerClass, rocScores = self.predictionForMergedCourse(mergedCourse, t, ntime)
resultAllCourses
=
self
.
predictionForMergedCourse
(
mergedCourse
,
t
,
ntime
)
resultAllCourses
=
self
.
predictionForMergedCourse
(
mergedCourse
,
t
,
ntime
,
cache
=
cache
)
dictResult
[
t
]
=
resultAllCourses
courses
=
mergedCourse
.
getCourses
()
#
courses = mergedCourse.getCourses()
# for i, course in enumerate(courses): #
#
...
...
source/main/ocr/classificationExperimentMerged.py
View file @
46358aaa
...
...
@@ -72,32 +72,31 @@ if __name__ == "__main__":
''' classification one time '''
groups
=
[
ConstantModel
.
GROUP_SUCCEED_ALL_USERS
,
ConstantModel
.
GROUP_FAILED_ALL_USERS
]
whereToCut
=
25
classifier
.
testCourseId
=
ConstantModel
.
COURSE_ID_GESTION_PROJET
classifier
.
scriptClassifier
.
setFeatures
(
dictCourses
[
classifier
.
testCourseId
],
whereToCut
)
#(accuracy, confusionMatrix, weightOrEpoch, rocScore) = classifier.predictionTaskNTimes(mergedCourse, whereToCut=25, cache = False)
resultCourse
=
classifier
.
predictionTaskNTimes
(
mergedCourse
,
whereToCut
=
whereToCut
,
cache
=
False
)
#(accuracy, confusionMatrix, weightOrEpoch, rocScore) = classifier.predictionTask(mergedCourse, whereToCut=25)
print
(
"accuracy="
+
str
(
np
.
nanmean
(
resultCourse
.
accuracies
)))
#print(confusionMatrix)
#print("weightOrEpoch="+str(weightOrEpoch))
print
(
"auc="
+
str
(
np
.
nanmean
(
resultCourse
.
aucs
)))
#
groups = [ConstantModel.GROUP_SUCCEED_ALL_USERS, ConstantModel.GROUP_FAILED_ALL_USERS]
#
whereToCut=25
#
#
classifier.testCourseId = ConstantModel.COURSE_ID_GESTION_PROJET
#
classifier.scriptClassifier.setFeatures(dictCourses[classifier.testCourseId], whereToCut)
#
#
#(accuracy, confusionMatrix, weightOrEpoch, rocScore) = classifier.predictionTaskNTimes(mergedCourse, whereToCut=25, cache = False)
#
resultCourse = classifier.predictionTaskNTimes(mergedCourse, whereToCut=whereToCut, cache = False)
#
#(accuracy, confusionMatrix, weightOrEpoch, rocScore) = classifier.predictionTask(mergedCourse, whereToCut=25)
#
print("accuracy="+str(np.nanmean(resultCourse.accuracies)))
#
#print(confusionMatrix)
#
#print("weightOrEpoch="+str(weightOrEpoch))
#
print("auc="+str(np.nanmean(resultCourse.aucs)))
''' classification n-times for a specific period '''
# resultAllCourses = classifier.predictionForMergedCourse(mergedCourse, whereToCut=25, ntime=10)
# resultAllCourses = classifier.predictionForMergedCourse(mergedCourse, whereToCut=25, ntime=10
, cache=True
)
# print("Accuracies:", resultAllCourses.avgAccuracy)
#print("accuracysPerClass:,", accuracysPerClass)
#print("auc Scores:", aucScores)
# print("auc Scores:", resultAllCourses.avgAuc)
''' classification for all periods '''
#
dictResult = classifier.predictionTaskForMergedForAllPeriods(mergedCourse, ntime=10)
dictResult
=
classifier
.
predictionTaskForMergedForAllPeriods
(
mergedCourse
,
ntime
=
10
,
cache
=
False
)
#
print("result=", dictResult)
print
(
"result="
,
dictResult
)
'''
Best features for LR
...
...
source/main/ocr/classificationExperimentMergedComparison.py
0 → 100644
View file @
46358aaa
# -*- coding: UTF-8 -*-
'''
Created on 29-Apr-2014
This file is part of Hubble-UserProfile.
Hubble-UserProfile is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Hubble-UserProfile is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with Hubble-UserProfile. If not, see <http://www.gnu.org/licenses/>.
@author: Antoine Pigeau
'''
import
numpy
as
np
import
scipy.stats
from
datetime
import
datetime
from
classifierManager.ocr.classifier
import
Classifier
from
classifierManager.constant
import
Constant
as
ConstantClassifier
from
featureManager.ocr.constant
import
Constant
as
ConstantFeature
from
model.ocr.constant
import
Constant
as
ConstantModel
from
model.ocr.course
import
Course
from
model.mergedCourse
import
MergedCourse
if
__name__
==
"__main__"
:
print
(
"start of the process boosting"
)
startTime
=
datetime
.
now
()
classifierMerged
=
Classifier
(
idCourses
=
ConstantModel
.
ID_COURSES
,
directory
=
Classifier
.
DIRECTORY_EXPERIMENT_MERGED
,
nameGroups
=
[
ConstantModel
.
GROUP_SUCCEED_ALL_USERS
,
ConstantModel
.
GROUP_FAILED_ALL_USERS
],
features
=
None
,
typeFeatureGroup
=
ConstantFeature
.
TYPE_FEATURE_GROUP
,
hiddenLayers
=
None
,
featuresByPeriod
=
ConstantFeature
.
IMPLEMENTED_FEATURES_FORWARD_BEST_MERGED_LR
,
featuresByCourse
=
None
)
#ConstantFeature.IMPLEMENTED_FEATURES_FORWARD_BEST_MERGED_LR)
classifierMerged
.
setScriptClassifier
(
ConstantClassifier
.
LogisticRegression
)
'''
Creation of the merged course with all the courses
'''
courses
=
[]
dictCourses
=
{}
for
idCourse
in
ConstantModel
.
ID_COURSES
:
course
=
Course
(
idCourse
,
granularity
=
ConstantModel
.
CHAPTER
)
courses
.
append
(
course
)
dictCourses
[
idCourse
]
=
course
mergedCourse
=
MergedCourse
(
courses
)
'''
Prediction task for all courses on each period
'''
''' classification for all periods '''
dictResultMerged
=
classifierMerged
.
predictionTaskForMergedForAllPeriods
(
mergedCourse
,
ntime
=
10
,
cache
=
True
)
classifier
=
Classifier
(
idCourses
=
ConstantModel
.
ID_COURSES
,
nameGroups
=
[
ConstantModel
.
GROUP_SUCCEED_ALL_USERS
,
ConstantModel
.
GROUP_FAILED_ALL_USERS
],
hiddenLayers
=
None
,
typeFeatureGroup
=
ConstantFeature
.
TYPE_FEATURE_GROUP
,
features
=
None
,
featuresByCourse
=
ConstantFeature
.
BEST_LR_FEATURE_ALL_COURSES
,
featuresByPeriod
=
None
)
classifier
.
setScriptClassifier
(
ConstantClassifier
.
LogisticRegression
)
dictResult
=
classifier
.
predictionTaskForAllPeriods
(
ntime
=
10
,
cache
=
True
)
pvalues
=
{}
for
t
in
[
25
,
50
,
75
,
100
]:
pvalues
[
t
]
=
{}
for
idCourse
in
ConstantModel
.
ID_COURSES
:
print
(
'dictResultMerged[t]: '
,
dictResultMerged
[
t
])
resultAllCoursesMerged
=
dictResultMerged
[
t
].
dictAllCourses
resultsAllCourses
=
dictResult
[
t
].
dictAllCourses
aucsMerged
=
resultAllCoursesMerged
[
idCourse
].
aucs
aucs
=
resultsAllCourses
[
idCourse
].
aucs
pvalue
=
scipy
.
stats
.
ttest_ind
(
aucsMerged
,
aucs
,
equal_var
=
False
)
avgAucs
=
np
.
nanmean
(
aucs
)
avgAucsMerged
=
np
.
nanmean
(
aucsMerged
)
pvalues
[
t
][
idCourse
]
=
(
pvalue
.
pvalue
,
avgAucsMerged
>
avgAucs
)
#print('aucsMerged: ', aucsMerged)
#print('aucs: ', aucs)
for
t
in
[
25
,
50
,
75
,
100
]:
print
(
"---- traces cut at "
,
t
)
coursesImproved
=
[]
for
idCourse
in
ConstantModel
.
ID_COURSES
:
(
pvalue
,
isBetter
)
=
pvalues
[
t
][
idCourse
]
print
(
idCourse
,
" : "
,
pvalue
)
if
(
pvalue
<
0.05
and
isBetter
):
coursesImproved
.
append
(
idCourse
)
print
(
"Course Improved with transfert learning: "
,
coursesImproved
)
stopTime
=
datetime
.
now
()
print
((
"end of the classification task ("
+
str
(
stopTime
-
startTime
)
+
")"
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment