Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Antoine PIGEAU
2015-Hubble-UserProfiles
Commits
b2e42931
Commit
b2e42931
authored
Oct 27, 2021
by
Antoine PIGEAU
Browse files
new return for result - not finished
parent
cc7733e9
Changes
2
Hide whitespace changes
Inline
Side-by-side
source/classifierManager/script/scriptClassifier.py
View file @
b2e42931
...
...
@@ -24,6 +24,8 @@ import os
import
hashlib
from
collections
import
namedtuple
import
numpy
as
np
import
operator
...
...
@@ -40,10 +42,14 @@ from exportManager import exportLatex
from
featureManager.featureMultiGroup
import
FeatureMultiGroup
from
classifierManager.constant
import
Constant
class
ScriptClassifier
:
'''
'''
def
__init__
(
self
,
classifier
,
fileName
):
...
...
@@ -178,8 +184,8 @@ class ScriptClassifier:
'''
ResultSingleIteration
=
namedtuple
(
'ResultSingleIteration'
,
[
'accuracy'
,
'auc'
,
'confusionMatrix'
,
'dictWeight'
])
featureMultiGroup
=
self
.
classifier
.
getFeatureMultiGroup
()
(
train_x
,
train_y
,
...
...
@@ -202,7 +208,7 @@ class ScriptClassifier:
accuracy
=
accuracy_score
(
test_y
,
pred_y
)
resul
tWeight
=
self
.
getBestFeatures
(
model
,
self
.
classifier
.
features
,
featureMultiGroup
)
dic
tWeight
=
self
.
getBestFeatures
(
model
,
self
.
classifier
.
features
,
featureMultiGroup
)
print
(
"--------------------------------------------------"
)
print
(
"input dimension:"
+
str
(
train_x
.
shape
))
...
...
@@ -214,7 +220,8 @@ class ScriptClassifier:
assert
confusionMatrix
.
shape
==
(
2
,
2
)
return
(
accuracy
,
confusionMatrix
,
resultWeight
,
auc
)
return
ResultSingleIteration
(
accuracy
,
auc
,
confusionMatrix
,
dictWeight
)
#(accuracy, auc, confusionMatrix, resultWeight)
def
predictionTaskNTimes
(
self
,
course
,
...
...
@@ -235,13 +242,16 @@ class ScriptClassifier:
print
(
"---------------------------------- Call predictionTaskNTimes of ScriptClassifier"
)
accuracys
=
[]
ResultCourse
=
namedtuple
(
'ResultCourse'
,
[
'accuracies'
,
'aucs'
,
'avgConfusionMatrix'
,
'dictWeight'
])
resultCourse
=
None
accuracies
=
[]
aucScores
=
[]
size
=
len
(
self
.
classifier
.
nameGroups
)
c
onfusionMatri
ce
=
np
.
zeros
((
size
,
size
))
avgC
onfusionMatri
x
=
np
.
zeros
((
size
,
size
))
dict
Result
Weight
=
{}
dictWeight
=
{}
name
=
str
(
self
.
classifier
.
features
).
encode
(
'utf-8'
)
hashFeature
=
"CourseMergedHash["
+
hashlib
.
sha1
(
name
).
hexdigest
()
+
"]"
...
...
@@ -264,48 +274,53 @@ class ScriptClassifier:
if
os
.
path
.
exists
(
fileNameSavedResult
)
and
cache
:
with
open
(
fileNameSavedResult
,
"rb"
)
as
fileResult
:
(
accuracys
,
aucScores
,
confusionMatrice
,
dictResultWeight
)
=
pickle
.
load
(
fileResult
)
#(accuracies, aucScores, avgConfusionMatrix, dictResultWeight) = pickle.load(fileResult)
resultCourse
=
ResultCourse
(
*
pickle
.
load
(
fileResult
))
print
(
str
(
fileNameSavedResult
)
+
" is already done"
)
return
(
accuracys
,
aucScores
,
confusionMatrice
,
dictResultWeight
)
#return (accuracies, aucScores, avgConfusionMatrix, dictResultWeight)
return
resultCourse
#self.classifier.testIdCourse = testIdCourse
for
_
in
range
(
0
,
ntime
):
(
accuracy
,
confusionMatrix
,
resultWeight
,
auc
)
=
self
.
predictionTask
(
course
,
whereToCut
=
whereToCut
)
# (accuracy, auc, confusionMatrix, resultWeight)
result
=
self
.
predictionTask
(
course
,
whereToCut
=
whereToCut
)
accurac
y
s
.
append
(
accuracy
)
aucScores
.
append
(
auc
)
accurac
ie
s
.
append
(
result
.
accuracy
)
aucScores
.
append
(
result
.
auc
)
c
onfusionMatri
ce
=
c
onfusionMatri
ce
+
confusionMatrix
avgC
onfusionMatri
x
=
avgC
onfusionMatri
x
+
result
.
confusionMatrix
for
(
feature
,
weight
)
in
resultWeight
:
if
feature
not
in
dict
Result
Weight
:
dict
Result
Weight
[
feature
]
=
[
weight
]
for
(
feature
,
weight
)
in
result
.
dict
Weight
:
if
feature
not
in
dictWeight
:
dictWeight
[
feature
]
=
[
weight
]
else
:
dict
Result
Weight
[
feature
].
append
(
weight
)
dictWeight
[
feature
].
append
(
weight
)
for
feature
in
dict
Result
Weight
:
dict
Result
Weight
[
feature
]
=
np
.
average
(
dict
Result
Weight
[
feature
])
for
feature
in
dictWeight
:
dictWeight
[
feature
]
=
np
.
average
(
dictWeight
[
feature
])
c
onfusionMatri
ce
=
c
onfusionMatri
ce
/
float
(
ntime
)
s
=
np
.
sum
(
c
onfusionMatri
ce
,
axis
=
0
)
c
onfusionMatri
ce
=
c
onfusionMatri
ce
/
s
avgC
onfusionMatri
x
=
avgC
onfusionMatri
x
/
float
(
ntime
)
s
=
np
.
sum
(
avgC
onfusionMatri
x
,
axis
=
0
)
avgC
onfusionMatri
x
=
avgC
onfusionMatri
x
/
s
if
cache
==
False
:
with
open
(
fileNameSavedResult
,
"wb"
)
as
fileResult
:
pickle
.
dump
((
accuracys
,
aucScores
,
confusionMatrice
,
dictResultWeight
),
fileResult
)
return
(
accuracys
,
aucScores
,
confusionMatrice
,
dictResultWeight
)
resultCourse
=
ResultCourse
(
accuracies
,
aucScores
,
avgConfusionMatrix
,
dictWeight
)
with
open
(
fileNameSavedResult
,
"wb"
)
as
fileResult
:
#pickle.dump((accuracies, aucScores, confusionMatrice, dictResultWeight), fileResult)
pickle
.
dump
(
tuple
(
resultCourse
),
fileResult
)
return
resultCourse
#(accuracies, aucScores, confusionMatrice, dictResultWeight)
def
setFeatures
(
self
,
course
=
None
,
whereToCut
=
None
):
pass
def
predictionTaskForAllCourses
(
self
,
whereToCut
,
ntime
=
10
):
def
predictionTaskForAllCourses
(
self
,
whereToCut
,
ntime
=
10
,
cache
=
True
):
'''
Do the prediction task for all the courses (call the previous function predictionNTimes on all the courses).
...
...
@@ -313,12 +328,15 @@ class ScriptClassifier:
@param whereToCut: the whereToCut use to cut the sequence when loaded. Unity is the number of days
@param features: list of feature to use for the input data. Each element is feature defined in the FeatureSequence class
@param algorithms: list of SVM algorithm to test. Possible values are in the class Svm
@param cache : use the cache obtained previously for all the courses
@return: ([accuracyCourse1, accuracyCourse2, ...],
[ [accuracyClass1Course1, class2....], [Course2, ...],... ],
[rocCourse1, rocCourse2, ...] ), a triplet list of accuracies, list of accuracy per class, list of
roc score
'''
ResultAllCourses
=
namedtuple
(
'ResultAllCourses'
,
[
'avgAccuracy'
,
'stdAccurracy'
,
'avgAuc'
,
'stdAuc'
,
'accuracyPerClass'
,
'avgConfusionMatrix'
,
'dictAllCourses'
])
fileNameResult
=
os
.
path
.
join
(
self
.
directoryExperiment
,
self
.
fileName
+
...
...
@@ -337,24 +355,25 @@ class ScriptClassifier:
self
.
setFeatures
(
course
,
whereToCut
)
(
accuracysCourse
,
aucScoresCourse
,
confusionMatrix
,
dictWeight
)
=
self
.
predictionTaskNTimes
(
course
,
whereToCut
=
whereToCut
,
ntime
=
ntime
,
cache
=
True
)
#(accuracysCourse, aucScoresCourse, confusionMatrix, dictWeight)
# (accuracies, aucScores, avgConfusionMatrix, dictResultWeight)
resultCourse
=
self
.
predictionTaskNTimes
(
course
,
whereToCut
=
whereToCut
,
ntime
=
ntime
,
cache
=
cache
)
dictResults
[
course
.
getCourseId
()]
=
(
accuracysCourse
,
aucScoresCourse
,
confusionMatrix
,
dictWeight
)
dictResults
[
course
.
getCourseId
()]
=
resultCourse
accuracy
=
np
.
nanmean
(
accuracysCourse
)
aucScore
=
np
.
nanmean
(
aucSco
resCourse
)
accuracy
=
np
.
nanmean
(
resultCourse
.
accuracies
)
aucScore
=
np
.
nanmean
(
res
ult
Course
.
aucs
)
accuracies
.
append
(
accuracy
)
aucScores
.
append
(
aucScore
)
accuracysPerClass
.
append
(
c
onfusionMatrix
.
diagonal
())
accuracysPerClass
.
append
(
resultCourse
.
avgC
onfusionMatrix
.
diagonal
())
if
dictWeight
:
dictWeightAllCourses
[
course
.
getCourseId
()]
=
dictWeight
if
resultCourse
.
dictWeight
:
dictWeightAllCourses
[
course
.
getCourseId
()]
=
resultCourse
.
dictWeight
courseName
=
course
.
getName
()
fileResult
.
write
(
"--------------------------
\n
"
)
...
...
@@ -364,11 +383,11 @@ class ScriptClassifier:
fileResult
.
write
(
"RocScore: "
+
str
(
aucScore
)
+
"
\n
"
)
fileResult
.
write
(
"confusion matrix
\n
"
)
fileResult
.
write
(
str
(
c
onfusionMatrix
))
fileResult
.
write
(
str
(
resultCourse
.
avgC
onfusionMatrix
))
if
dictWeight
:
if
resultCourse
.
dictWeight
:
fileResult
.
write
(
"
\n
best feature:
\n
"
)
sortedDictWeigth
=
sorted
(
dictWeight
.
items
(),
key
=
operator
.
itemgetter
(
1
),
reverse
=
True
)
sortedDictWeigth
=
sorted
(
resultCourse
.
dictWeight
.
items
(),
key
=
operator
.
itemgetter
(
1
),
reverse
=
True
)
fileResult
.
write
(
str
(
sortedDictWeigth
))
fileResult
.
write
(
"
\n
--------------------------
\n
"
)
...
...
@@ -403,8 +422,9 @@ class ScriptClassifier:
fileResult
.
write
(
" best feature for all courses:
\n
"
)
resultWeigth
=
sorted
(
resultWeight
.
items
(),
key
=
operator
.
itemgetter
(
1
),
reverse
=
True
)
fileResult
.
write
(
str
(
resultWeigth
))
# 'avgAccuracy', 'stdAccurracy', 'avgAuc', 'stdAuc', 'accuracyPerClass', 'avgConfusionMatrix', 'dictAllCourses'
resultAllCourses
=
ResultAllCourses
(
scoreFinal
,
stdAccuracy
,
aucScoreFinal
,
stdAuc
,
)
return
(
accuracies
,
accuracysPerClass
,
aucScores
,
stdAccuracy
,
stdAuc
,
dictResults
)
def
predictionTaskForAllPeriods
(
self
,
ntime
=
10
):
...
...
source/main/ocr/classificationExperiment.py
View file @
b2e42931
...
...
@@ -80,17 +80,19 @@ if __name__ == "__main__":
''' classification N times '''
# classifier.features = ConstantFeature.BEST_LR_FEATURE_ALL_COURSES[course.getCourseId()][25]
#
# (accuracys, aucScores, confusionMatrix, dictResultWeight) = classifier.predictionTaskNTimes(course,
# whereToCut=25,
# ntime=10,
# cache = False)
# print("accuracy: ", np.nanmean(accuracys))
# print("std accuracy: ", np.std(accuracys))
# print("confusion Matrix:\n", confusionMatrix)
# print("AUC score: ", np.nanmean(aucScores))
# print("std AUC:", np.std(aucScores))
classifier
.
features
=
ConstantFeature
.
BEST_LR_FEATURE_ALL_COURSES
[
course
.
getCourseId
()][
25
]
result
=
classifier
.
predictionTaskNTimes
(
course
,
whereToCut
=
25
,
ntime
=
10
,
cache
=
False
)
print
(
"accuracy: "
,
np
.
nanmean
(
result
.
accuracies
))
print
(
"std accuracy: "
,
np
.
std
(
result
.
accuracies
))
print
(
"confusion Matrix:
\n
"
,
result
.
avgConfusionMatrix
)
print
(
"AUC score: "
,
np
.
nanmean
(
result
.
aucs
))
print
(
"std AUC:"
,
np
.
std
(
result
.
aucs
))
print
(
"dictWeight:"
,
result
.
dictWeight
)
''' for all courses '''
...
...
@@ -103,7 +105,7 @@ if __name__ == "__main__":
''' for all periods '''
classifier
.
predictionTaskForAllPeriods
(
ntime
=
10
)
#
classifier.predictionTaskForAllPeriods(ntime=10)
''' NN : all courses with hidden layers'''
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment