Compare commits
No commits in common. "e6e9e593a451d6e01094c45fb22a15d465ed415b" and "e68267182d686971490247dd617de49e2696f373" have entirely different histories.
e6e9e593a4
...
e68267182d
60
CREClass.m
60
CREClass.m
@ -1,7 +1,4 @@
|
|||||||
classdef CREClass < handle
|
classdef CREClass < handle
|
||||||
% 23 and 24 Nov 2022 I added a new calculation for the RB based on
|
|
||||||
% Shanon entropy given a threshold \lambda. I also think there is a
|
|
||||||
% "mistake" in calculating the RB the classical way.
|
|
||||||
properties
|
properties
|
||||||
Data; % the data set to get the CRE off
|
Data; % the data set to get the CRE off
|
||||||
nBin; %number of bins
|
nBin; %number of bins
|
||||||
@ -114,62 +111,7 @@ classdef CREClass < handle
|
|||||||
obj.nBin=numel(obj.Edges)-1;
|
obj.nBin=numel(obj.Edges)-1;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
function CalcRBShannon(obj)
|
function Calc(obj)
|
||||||
% 23Nov22 Here I will try to implement a (hopefully better)
|
|
||||||
% definition of the relevance boundry. The basic idea is
|
|
||||||
% assume a dataset X= {x_1 ... x_i} X
|
|
||||||
% set a threshold \lambda NB \lambda \in of the data X
|
|
||||||
%Shanon entropy for this \lambda will be:
|
|
||||||
% H(\lamda)=p(x<\lambda)log(p(x<lambda))+p(x>=\lambda)log(p(x>=\lambda))
|
|
||||||
% Now the expectation value for H will be :
|
|
||||||
%E(H)=\int p(\lambda)*H(\lambda) d\lambda
|
|
||||||
if isempty(obj.Data)
|
|
||||||
warning('nothing to do');
|
|
||||||
return;
|
|
||||||
end
|
|
||||||
if isempty(obj.Edges);obj.CalcEdges;end
|
|
||||||
[CP,bin]=histcounts(obj.Data(:),transpose(obj.Edges),'Normalization','cdf');
|
|
||||||
dl=transpose(diff(bin)./sum(diff(bin))); % will capture the case with non-equidistant bins as well as equidistant bins
|
|
||||||
pdf_lambda=histcounts(obj.Data(:),transpose(obj.Edges),'Normalization','pdf');
|
|
||||||
CP=transpose(CP);
|
|
||||||
LogCP=log(CP);
|
|
||||||
LogCP(~isfinite(LogCP))=0; % see comment below
|
|
||||||
FC=1-CP;
|
|
||||||
FC(FC<0)=0; % due to rounding errors, small negative numbers can arrise. These are theoretically not possible.
|
|
||||||
LogFC=log(FC);
|
|
||||||
LogFC(~isfinite(LogFC))=0;%the log of 0 is -inf. however in Fc.*logFc it should end up as 0. to avoid conflicts removing the -inf
|
|
||||||
H=-1*(CP.*LogCP+FC.*LogFC); %Shannon Entropy when a theshold (lambda) is set to the lower bin edge
|
|
||||||
w=(dl.*pdf_lambda);
|
|
||||||
w=w./sum(w); % for one reason or the other, the sum is not always equal to 1.
|
|
||||||
EH=w*H; %calculate expectation value for H
|
|
||||||
RB_ind=[find(H>=EH,1,'first') find(H>=EH,1,'last')];
|
|
||||||
obj.RB=obj.Edges(RB_ind)+dl(RB_ind)/2;
|
|
||||||
obj.P_RB=FC(RB_ind);
|
|
||||||
if obj.Monitor
|
|
||||||
lambda=obj.Edges(1:end-1);
|
|
||||||
scatter(w,H);xlabel('weight');ylabel('Shannon Entropy');snapnow
|
|
||||||
plot(lambda,H);xlabel('lambda');ylabel('Shannon Entropy');
|
|
||||||
line(lambda(RB_ind(:)),[EH;EH],'linestyle',':');snapnow
|
|
||||||
histogram(obj.Data(:));snapnow
|
|
||||||
disp('sum of weights')
|
|
||||||
disp(sum(dl.*pdf_lambda))
|
|
||||||
disp('EH')
|
|
||||||
disp(EH)
|
|
||||||
disp('boundry index');
|
|
||||||
disp(RB_ind);
|
|
||||||
disp('boundry lambda value');
|
|
||||||
disp(obj.RB);
|
|
||||||
disp('boundry p-values');
|
|
||||||
disp(FC(RB_ind));
|
|
||||||
disp('fraction inside interval');
|
|
||||||
disp(abs(diff(FC(RB_ind))));
|
|
||||||
end
|
|
||||||
end
|
|
||||||
function Calc(obj) %calculate the CRE as well as a on-tailed definition of the RB.
|
|
||||||
% RR note: 23Nov22: I am not sure if I still like this RB
|
|
||||||
% definition. I keep having a hard time defending it
|
|
||||||
% theoretically. The code is retained, for now, however for backwards
|
|
||||||
% compatibility.
|
|
||||||
if isempty(obj.Data)
|
if isempty(obj.Data)
|
||||||
warning('nothing to do');
|
warning('nothing to do');
|
||||||
return;
|
return;
|
||||||
|
@ -2,36 +2,31 @@
|
|||||||
% show some default behaviour
|
% show some default behaviour
|
||||||
% and test the error handling.
|
% and test the error handling.
|
||||||
%% set some constants
|
%% set some constants
|
||||||
% A=randn(1000,1);
|
A=randn(100);
|
||||||
% A=rand(1000,1);
|
|
||||||
A=cat(1,randn(1000,1),randn(200,1)+10);
|
|
||||||
%% create instance of CRE class
|
%% create instance of CRE class
|
||||||
% % % S1=CREClass;
|
S1=CREClass;
|
||||||
% % % %% Calculate by setting labda equal to each of the (unique) values in the data.
|
%% Calculate by setting labda equal to each of the (unique) values in the data.
|
||||||
% % % S1.UseHistProxyFlag=false;
|
S1.UseHistProxyFlag=false;
|
||||||
% % % S1.EqualSizeBinFlag=false;
|
S1.EqualSizeBinFlag=false;
|
||||||
% % % S1.Data=A;% set gaussian data
|
S1.Data=A;% set gaussian data
|
||||||
% % % S1.Calc; %get RB based on the CRE
|
S1.Calc;
|
||||||
% % % S1.CalcRBShannon; %% get RB based on shannon entropy
|
disp(S1)
|
||||||
% % % disp(S1)
|
%% use histogram aproximation with unequal bin sizes in the histogram
|
||||||
% % % %% use histogram aproximation with unequal bin sizes in the histogram
|
S2=CREClass;
|
||||||
% % % S2=CREClass;
|
S2.UseHistProxyFlag=true;
|
||||||
% % % S2.UseHistProxyFlag=true;
|
S2.EqualSizeBinFlag=false;
|
||||||
% % % S2.EqualSizeBinFlag=false;
|
S2.Data=A;% set gaussian data
|
||||||
% % % S2.Data=A;% set gaussian data
|
S2.nBin=numel(A); % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos
|
||||||
% % % S2.nBin=numel(A); % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos
|
S2.Calc
|
||||||
% % % S2.Calc
|
disp(S2)
|
||||||
% % % S2.CalcRBShannon;
|
%% use histogram aproximation with equal bin sizes in the histogram
|
||||||
% % % disp(S2)
|
S3=CREClass;
|
||||||
% % % %% use histogram aproximation with equal bin sizes in the histogram
|
S3.UseHistProxyFlag=true;
|
||||||
% % % S3=CREClass;
|
S3.EqualSizeBinFlag=false;
|
||||||
% % % S3.UseHistProxyFlag=true;
|
S3.Data=A;% set gaussian data
|
||||||
% % % S3.EqualSizeBinFlag=false;
|
S3.nBin=numel(A); % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos
|
||||||
% % % S3.Data=A;% set gaussian data
|
S3.Calc
|
||||||
% % % S3.nBin=numel(A); % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos
|
disp(S3)
|
||||||
% % % S3.Calc
|
|
||||||
% % % S3.CalcRBShannon;
|
|
||||||
% % % disp(S3)
|
|
||||||
%% Check recalculation of edges
|
%% Check recalculation of edges
|
||||||
S4=CREClass;
|
S4=CREClass;
|
||||||
S4.UseHistProxyFlag=true;
|
S4.UseHistProxyFlag=true;
|
||||||
@ -39,15 +34,13 @@ S4.EqualSizeBinFlag=true;
|
|||||||
S4.Data=A;% set gaussian data
|
S4.Data=A;% set gaussian data
|
||||||
S4.nBin=numel(A); % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos
|
S4.nBin=numel(A); % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos
|
||||||
S4.Calc
|
S4.Calc
|
||||||
S4.CalcRBShannon;
|
|
||||||
disp(S4)
|
disp(S4)
|
||||||
S4.nBin=round(numel(A)/10);
|
S4.nBin=numel(A)/10;
|
||||||
S4.CalcEdges ; % force a recalculation of the edges
|
S4.CalcEdges ; % force a recalculation of the edges
|
||||||
S4.Calc;
|
S4.Calc;
|
||||||
S4.CalcRBShannon;
|
|
||||||
disp(S4)
|
disp(S4)
|
||||||
%%
|
|
||||||
return;
|
|
||||||
%% Show effect of scaling or ofsett the data
|
%% Show effect of scaling or ofsett the data
|
||||||
% Scale
|
% Scale
|
||||||
S.Data=100*A;
|
S.Data=100*A;
|
||||||
|
Loading…
Reference in New Issue
Block a user