Isolation forest - understanding the plot
up vote
-1
down vote
favorite
I'm a begginer with isolation forest and i started with the tutorial in this link :
https://www.kaggle.com/rgaddati/unsupervised-fraud-detection-isolation-forest
I didn't understand the first plot of path length. what does the brown color means ?
Thank you for your help
##All General Import Statements
import pandas as pd
import numpy as np
import math
import random
%matplotlib inline
import random
from matplotlib import pyplot
import os
print(os.listdir("../input"))
class ExNode:
def __init__(self,size):
self.size=size
class InNode:
def __init__(self,left,right,splitAtt,splitVal):
self.left=left
self.right=right
self.splitAtt=splitAtt
self.splitVal=splitVal
def iForest(X,noOfTrees,sampleSize):
forest=
hlim=math.ceil(math.log(sampleSize,2))
for i in range(noOfTrees):
X_train=df_data.sample(sampleSize)
forest.append(iTree(X_train,0,hlim))
return forest
def pathLength(x,Tree,currHeight):
if isinstance(Tree,ExNode):
return currHeight
a=Tree.splitAtt
if x[a]<Tree.splitVal:
return pathLength(x,Tree.left,currHeight+1)
else:
return pathLength(x,Tree.right,currHeight+1)
df=pd.read_csv("../input/creditcard.csv")
y_true=df['Class']
df_data=df.drop('Class',1)
sampleSize=10000
ifor=iForest(df_data.sample(100000),10,sampleSize) ##Forest of 10 trees
posLenLst=
negLenLst=
for sim in range(1000):
ind=random.choice(df_data[y_true==1].index)
for tree in ifor:
posLenLst.append(pathLength(df_data.iloc[ind],tree,0))
ind=random.choice(df_data[y_true==0].index)
for tree in ifor:
negLenLst.append(pathLength(df_data.iloc[ind],tree,0))
bins = np.linspace(0,math.ceil(math.log(sampleSize,2)), math.ceil(math.log(sampleSize,2)))
pyplot.figure(figsize=(12,8))
pyplot.hist(posLenLst, bins, alpha=0.5, label='Anomaly')
pyplot.hist(negLenLst, bins, alpha=0.5, label='Normal')
pyplot.xlabel('Path Length')
pyplot.ylabel('Frequency')
pyplot.legend(loc='upper left')
machine-learning unsupervised-learning
add a comment |
up vote
-1
down vote
favorite
I'm a begginer with isolation forest and i started with the tutorial in this link :
https://www.kaggle.com/rgaddati/unsupervised-fraud-detection-isolation-forest
I didn't understand the first plot of path length. what does the brown color means ?
Thank you for your help
##All General Import Statements
import pandas as pd
import numpy as np
import math
import random
%matplotlib inline
import random
from matplotlib import pyplot
import os
print(os.listdir("../input"))
class ExNode:
def __init__(self,size):
self.size=size
class InNode:
def __init__(self,left,right,splitAtt,splitVal):
self.left=left
self.right=right
self.splitAtt=splitAtt
self.splitVal=splitVal
def iForest(X,noOfTrees,sampleSize):
forest=
hlim=math.ceil(math.log(sampleSize,2))
for i in range(noOfTrees):
X_train=df_data.sample(sampleSize)
forest.append(iTree(X_train,0,hlim))
return forest
def pathLength(x,Tree,currHeight):
if isinstance(Tree,ExNode):
return currHeight
a=Tree.splitAtt
if x[a]<Tree.splitVal:
return pathLength(x,Tree.left,currHeight+1)
else:
return pathLength(x,Tree.right,currHeight+1)
df=pd.read_csv("../input/creditcard.csv")
y_true=df['Class']
df_data=df.drop('Class',1)
sampleSize=10000
ifor=iForest(df_data.sample(100000),10,sampleSize) ##Forest of 10 trees
posLenLst=
negLenLst=
for sim in range(1000):
ind=random.choice(df_data[y_true==1].index)
for tree in ifor:
posLenLst.append(pathLength(df_data.iloc[ind],tree,0))
ind=random.choice(df_data[y_true==0].index)
for tree in ifor:
negLenLst.append(pathLength(df_data.iloc[ind],tree,0))
bins = np.linspace(0,math.ceil(math.log(sampleSize,2)), math.ceil(math.log(sampleSize,2)))
pyplot.figure(figsize=(12,8))
pyplot.hist(posLenLst, bins, alpha=0.5, label='Anomaly')
pyplot.hist(negLenLst, bins, alpha=0.5, label='Normal')
pyplot.xlabel('Path Length')
pyplot.ylabel('Frequency')
pyplot.legend(loc='upper left')
machine-learning unsupervised-learning
1
Please include the relevant code, as links can go stale.
– Dragonthoughts
Nov 9 at 9:03
Have you tried to reach the tutorial authors?
– Bebs
Nov 9 at 9:05
I added the code and to be honest i didn't try to reach the tutorial authors..
– Zoya
Nov 9 at 9:22
I can't reach the tutorial authors, the page doesn't work..
– Zoya
Nov 9 at 11:02
add a comment |
up vote
-1
down vote
favorite
up vote
-1
down vote
favorite
I'm a begginer with isolation forest and i started with the tutorial in this link :
https://www.kaggle.com/rgaddati/unsupervised-fraud-detection-isolation-forest
I didn't understand the first plot of path length. what does the brown color means ?
Thank you for your help
##All General Import Statements
import pandas as pd
import numpy as np
import math
import random
%matplotlib inline
import random
from matplotlib import pyplot
import os
print(os.listdir("../input"))
class ExNode:
def __init__(self,size):
self.size=size
class InNode:
def __init__(self,left,right,splitAtt,splitVal):
self.left=left
self.right=right
self.splitAtt=splitAtt
self.splitVal=splitVal
def iForest(X,noOfTrees,sampleSize):
forest=
hlim=math.ceil(math.log(sampleSize,2))
for i in range(noOfTrees):
X_train=df_data.sample(sampleSize)
forest.append(iTree(X_train,0,hlim))
return forest
def pathLength(x,Tree,currHeight):
if isinstance(Tree,ExNode):
return currHeight
a=Tree.splitAtt
if x[a]<Tree.splitVal:
return pathLength(x,Tree.left,currHeight+1)
else:
return pathLength(x,Tree.right,currHeight+1)
df=pd.read_csv("../input/creditcard.csv")
y_true=df['Class']
df_data=df.drop('Class',1)
sampleSize=10000
ifor=iForest(df_data.sample(100000),10,sampleSize) ##Forest of 10 trees
posLenLst=
negLenLst=
for sim in range(1000):
ind=random.choice(df_data[y_true==1].index)
for tree in ifor:
posLenLst.append(pathLength(df_data.iloc[ind],tree,0))
ind=random.choice(df_data[y_true==0].index)
for tree in ifor:
negLenLst.append(pathLength(df_data.iloc[ind],tree,0))
bins = np.linspace(0,math.ceil(math.log(sampleSize,2)), math.ceil(math.log(sampleSize,2)))
pyplot.figure(figsize=(12,8))
pyplot.hist(posLenLst, bins, alpha=0.5, label='Anomaly')
pyplot.hist(negLenLst, bins, alpha=0.5, label='Normal')
pyplot.xlabel('Path Length')
pyplot.ylabel('Frequency')
pyplot.legend(loc='upper left')
machine-learning unsupervised-learning
I'm a begginer with isolation forest and i started with the tutorial in this link :
https://www.kaggle.com/rgaddati/unsupervised-fraud-detection-isolation-forest
I didn't understand the first plot of path length. what does the brown color means ?
Thank you for your help
##All General Import Statements
import pandas as pd
import numpy as np
import math
import random
%matplotlib inline
import random
from matplotlib import pyplot
import os
print(os.listdir("../input"))
class ExNode:
def __init__(self,size):
self.size=size
class InNode:
def __init__(self,left,right,splitAtt,splitVal):
self.left=left
self.right=right
self.splitAtt=splitAtt
self.splitVal=splitVal
def iForest(X,noOfTrees,sampleSize):
forest=
hlim=math.ceil(math.log(sampleSize,2))
for i in range(noOfTrees):
X_train=df_data.sample(sampleSize)
forest.append(iTree(X_train,0,hlim))
return forest
def pathLength(x,Tree,currHeight):
if isinstance(Tree,ExNode):
return currHeight
a=Tree.splitAtt
if x[a]<Tree.splitVal:
return pathLength(x,Tree.left,currHeight+1)
else:
return pathLength(x,Tree.right,currHeight+1)
df=pd.read_csv("../input/creditcard.csv")
y_true=df['Class']
df_data=df.drop('Class',1)
sampleSize=10000
ifor=iForest(df_data.sample(100000),10,sampleSize) ##Forest of 10 trees
posLenLst=
negLenLst=
for sim in range(1000):
ind=random.choice(df_data[y_true==1].index)
for tree in ifor:
posLenLst.append(pathLength(df_data.iloc[ind],tree,0))
ind=random.choice(df_data[y_true==0].index)
for tree in ifor:
negLenLst.append(pathLength(df_data.iloc[ind],tree,0))
bins = np.linspace(0,math.ceil(math.log(sampleSize,2)), math.ceil(math.log(sampleSize,2)))
pyplot.figure(figsize=(12,8))
pyplot.hist(posLenLst, bins, alpha=0.5, label='Anomaly')
pyplot.hist(negLenLst, bins, alpha=0.5, label='Normal')
pyplot.xlabel('Path Length')
pyplot.ylabel('Frequency')
pyplot.legend(loc='upper left')
machine-learning unsupervised-learning
machine-learning unsupervised-learning
edited Nov 9 at 11:06
asked Nov 9 at 8:40
Zoya
1952213
1952213
1
Please include the relevant code, as links can go stale.
– Dragonthoughts
Nov 9 at 9:03
Have you tried to reach the tutorial authors?
– Bebs
Nov 9 at 9:05
I added the code and to be honest i didn't try to reach the tutorial authors..
– Zoya
Nov 9 at 9:22
I can't reach the tutorial authors, the page doesn't work..
– Zoya
Nov 9 at 11:02
add a comment |
1
Please include the relevant code, as links can go stale.
– Dragonthoughts
Nov 9 at 9:03
Have you tried to reach the tutorial authors?
– Bebs
Nov 9 at 9:05
I added the code and to be honest i didn't try to reach the tutorial authors..
– Zoya
Nov 9 at 9:22
I can't reach the tutorial authors, the page doesn't work..
– Zoya
Nov 9 at 11:02
1
1
Please include the relevant code, as links can go stale.
– Dragonthoughts
Nov 9 at 9:03
Please include the relevant code, as links can go stale.
– Dragonthoughts
Nov 9 at 9:03
Have you tried to reach the tutorial authors?
– Bebs
Nov 9 at 9:05
Have you tried to reach the tutorial authors?
– Bebs
Nov 9 at 9:05
I added the code and to be honest i didn't try to reach the tutorial authors..
– Zoya
Nov 9 at 9:22
I added the code and to be honest i didn't try to reach the tutorial authors..
– Zoya
Nov 9 at 9:22
I can't reach the tutorial authors, the page doesn't work..
– Zoya
Nov 9 at 11:02
I can't reach the tutorial authors, the page doesn't work..
– Zoya
Nov 9 at 11:02
add a comment |
1 Answer
1
active
oldest
votes
up vote
0
down vote
The brown look likes a mix of the tan and blue. So I think thats where they overlap. This is evident because the alpha of each is .5
Further Explained: The colors are mixing where they overlap. Its plotting the blue then if the tan is also there it plots it on top. Since the colors have alpha of .5 which alpha is transparency the colors are mixing. Resulting in a brown where they overlap. Try plotting each separately and you will see that they will overlap.
Could you please explain more ? I still do not understand
– Zoya
Nov 12 at 13:03
@Zoya edited the post for you
– Neo
Nov 12 at 14:24
add a comment |
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
up vote
0
down vote
The brown look likes a mix of the tan and blue. So I think thats where they overlap. This is evident because the alpha of each is .5
Further Explained: The colors are mixing where they overlap. Its plotting the blue then if the tan is also there it plots it on top. Since the colors have alpha of .5 which alpha is transparency the colors are mixing. Resulting in a brown where they overlap. Try plotting each separately and you will see that they will overlap.
Could you please explain more ? I still do not understand
– Zoya
Nov 12 at 13:03
@Zoya edited the post for you
– Neo
Nov 12 at 14:24
add a comment |
up vote
0
down vote
The brown look likes a mix of the tan and blue. So I think thats where they overlap. This is evident because the alpha of each is .5
Further Explained: The colors are mixing where they overlap. Its plotting the blue then if the tan is also there it plots it on top. Since the colors have alpha of .5 which alpha is transparency the colors are mixing. Resulting in a brown where they overlap. Try plotting each separately and you will see that they will overlap.
Could you please explain more ? I still do not understand
– Zoya
Nov 12 at 13:03
@Zoya edited the post for you
– Neo
Nov 12 at 14:24
add a comment |
up vote
0
down vote
up vote
0
down vote
The brown look likes a mix of the tan and blue. So I think thats where they overlap. This is evident because the alpha of each is .5
Further Explained: The colors are mixing where they overlap. Its plotting the blue then if the tan is also there it plots it on top. Since the colors have alpha of .5 which alpha is transparency the colors are mixing. Resulting in a brown where they overlap. Try plotting each separately and you will see that they will overlap.
The brown look likes a mix of the tan and blue. So I think thats where they overlap. This is evident because the alpha of each is .5
Further Explained: The colors are mixing where they overlap. Its plotting the blue then if the tan is also there it plots it on top. Since the colors have alpha of .5 which alpha is transparency the colors are mixing. Resulting in a brown where they overlap. Try plotting each separately and you will see that they will overlap.
edited Nov 12 at 14:23
answered Nov 9 at 14:21
Neo
946
946
Could you please explain more ? I still do not understand
– Zoya
Nov 12 at 13:03
@Zoya edited the post for you
– Neo
Nov 12 at 14:24
add a comment |
Could you please explain more ? I still do not understand
– Zoya
Nov 12 at 13:03
@Zoya edited the post for you
– Neo
Nov 12 at 14:24
Could you please explain more ? I still do not understand
– Zoya
Nov 12 at 13:03
Could you please explain more ? I still do not understand
– Zoya
Nov 12 at 13:03
@Zoya edited the post for you
– Neo
Nov 12 at 14:24
@Zoya edited the post for you
– Neo
Nov 12 at 14:24
add a comment |
Thanks for contributing an answer to Stack Overflow!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Some of your past answers have not been well-received, and you're in danger of being blocked from answering.
Please pay close attention to the following guidance:
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53222323%2fisolation-forest-understanding-the-plot%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
1
Please include the relevant code, as links can go stale.
– Dragonthoughts
Nov 9 at 9:03
Have you tried to reach the tutorial authors?
– Bebs
Nov 9 at 9:05
I added the code and to be honest i didn't try to reach the tutorial authors..
– Zoya
Nov 9 at 9:22
I can't reach the tutorial authors, the page doesn't work..
– Zoya
Nov 9 at 11:02