classdef CREClass < handle % 23 and 24 Nov 2022 I added a new calculation for the RB based on % Shanon entropy given a threshold \lambda. I also think there is a % "mistake" in calculating the RB the classical way. properties Data; % the data set to get the CRE off nBin; %number of bins Monitor=true; %set to true to get intermediat results/figures UseHistProxyFlag=true; % set this to get a proxy of the survival function using the hist function EqualSizeBinFlag=true; % set to true to get equal sized bins, set to false to get equi-content bins, % if set to false: each bin will contain (aproximately) an equal % number of pixel values. end properties (SetAccess = private) %read only CRE RB P_RB end properties(Dependent) Edges end properties (Hidden) WaitBarHandle; HiddenEdges; end properties (Constant,Hidden) alpha=1; %needed for the scaled cummulative redual entropy as defined in zografoss. Will be implemented in the future. (I hope) beta=1; %they are currently obsolete. % FeedBackOptions={'CRE','RB','All'} end %% constructor methods %constructor function obj = CREClass(varargin) obj.Data=[]; obj.nBin=[]; %obj.FeedBack={'CRE'}; if nargin==0;return;end % if no input arguments are given, just create an empty instance if ischar(varargin{1}) %assume number value pairs. if nargin<=2; error('please use number value pairs' ); end for k=2:2:nargin switch lower(varargin{k-1}) case {'ref','data'} obj.Data=varargin{k}; case {'nbin','bin'} obj.nBin=varargin{k}; case {'equalsizebinflag'} obj.EqualSizeBinFlag=varargin{k}; case {'usehistproxyflag'} obj.UseHistProxyFlag=varargin{k}; otherwise error('invalid variable name'); end end else if nargin>=1 obj.Data=varargin{1}; end if nargin==2 obj.nBin=varargin{2}; end end end end %% set and get functions methods function Edges=get.Edges(obj) Edges=obj.HiddenEdges; end function set.Edges(obj,val) if ~isnumeric(val);error('provide a array with edge values');end if numel(val)<3;error('too few edges provided. Need at least 3 values, i.e. 2 bins defined');end obj.HiddenEdges=transpose(val(:)); obj.nBin=numel(val)-1; end function set.Data(obj,val) if isnumeric(val) obj.Data=val; else error('please enter numeric array/matrix as reference'); end end function set.nBin(obj,val) if isnumeric(val) obj.nBin=val; else error('please enter numeric array or value as bin definition'); end end end %% core function(s) methods function CalcEdges(obj) if isempty(obj.Data) warning('nothing to do'); return; end if obj.UseHistProxyFlag if isempty(obj.nBin) obj.nBin=floor(numel(obj.Data)./10); end if obj.EqualSizeBinFlag [~,obj.Edges]=histcounts(obj.Data(:),obj.nBin); else prc=linspace(0,100,obj.nBin+1); % define the edges of the bins as a precentile obj.Edges=prctile(obj.Data(:),prc); %find the edges of the bins end else if obj.UseHistProxyFlag ; error('You cannot use EqualSizeBinFlag=true and UseHistProxyFlag=false as a combination');end q=transpose(unique(sort(obj.Data(:)))); dq=diff(q); obj.Edges=cat(2,q(1)-eps,q(1:end-1)+dq/2,q(end)+eps); %the eps is a "shortcut" to avoid value=edge cases. Note that eps will increase the size of the bin by a very small amount. Although this is a systemetic bias, its effect will be neglectable obj.nBin=numel(obj.Edges)-1; end end function CalcRBShannon(obj) % 23Nov22 Here I will try to implement a (hopefully better) % definition of the relevance boundry. The basic idea is % assume a dataset X= {x_1 ... x_i} X % set a threshold \lambda NB \lambda \in of the data X %Shanon entropy for this \lambda will be: % H(\lamda)=p(x<\lambda)log(p(x=\lambda)log(p(x>=\lambda)) % Now the expectation value for H will be : %E(H)=\int p(\lambda)*H(\lambda) d\lambda if isempty(obj.Data) warning('nothing to do'); return; end if isempty(obj.Edges);obj.CalcEdges;end [CP,bin]=histcounts(obj.Data(:),transpose(obj.Edges),'Normalization','cdf'); dl=transpose(diff(bin)./sum(diff(bin))); % will capture the case with non-equidistant bins as well as equidistant bins pdf_lambda=histcounts(obj.Data(:),transpose(obj.Edges),'Normalization','pdf'); CP=transpose(CP); LogCP=log(CP); LogCP(~isfinite(LogCP))=0; % see comment below FC=1-CP; FC(FC<0)=0; % due to rounding errors, small negative numbers can arrise. These are theoretically not possible. LogFC=log(FC); LogFC(~isfinite(LogFC))=0;%the log of 0 is -inf. however in Fc.*logFc it should end up as 0. to avoid conflicts removing the -inf H=-1*(CP.*LogCP+FC.*LogFC); %Shannon Entropy when a theshold (lambda) is set to the lower bin edge w=(dl.*pdf_lambda); w=w./sum(w); % for one reason or the other, the sum is not always equal to 1. EH=w*H; %calculate expectation value for H RB_ind=[find(H>=EH,1,'first') find(H>=EH,1,'last')]; obj.RB=obj.Edges(RB_ind)+dl(RB_ind)/2; obj.P_RB=FC(RB_ind); if obj.Monitor lambda=obj.Edges(1:end-1); scatter(w,H);xlabel('weight');ylabel('Shannon Entropy');snapnow plot(lambda,H);xlabel('lambda');ylabel('Shannon Entropy'); line(lambda(RB_ind(:)),[EH;EH],'linestyle',':');snapnow histogram(obj.Data(:));snapnow disp('sum of weights') disp(sum(dl.*pdf_lambda)) disp('EH') disp(EH) disp('boundry index'); disp(RB_ind); disp('boundry lambda value'); disp(obj.RB); disp('boundry p-values'); disp(FC(RB_ind)); disp('fraction inside interval'); disp(abs(diff(FC(RB_ind)))); end end function Calc(obj) %calculate the CRE as well as a on-tailed definition of the RB. % RR note: 23Nov22: I am not sure if I still like this RB % definition. I keep having a hard time defending it % theoretically. The code is retained, for now, however for backwards % compatibility. if isempty(obj.Data) warning('nothing to do'); return; end if isempty(obj.Edges);obj.CalcEdges;end [CP,bin]=histcounts(obj.Data(:),transpose(obj.Edges),'Normalization','cdf'); FC=1-transpose(CP); FC(FC<0)=0; dl=diff(bin)./sum(diff(bin)); % will capture the case with non-equidistant bins as well as equidistant bins LogFC=log(FC); LogFC(~isfinite(LogFC))=0;%the log of 0 is -inf. however in Fc.*logFc it should end up as 0. to avoid conflicts removing the -inf if any(isnan(FC));error('something went wrong');end %catch a posible error. if transpose(dl)*FC==0 %if only one value in the most left bin in the distribution I may get a 0 divided by 0 %as the CRE of a delta function is 0, enforce this outcome obj.CRE=0; else obj.CRE=-transpose(dl)*(FC.*LogFC)./(transpose(dl)*FC); %CRE zografos end %% get the RB dl(FC>0.5)=0; %set the weight for all in the histogram to the left side (i.e. =FC,1,'first'); if obj.UseHistProxyFlag obj.P_RB=FC(ind); if ind==numel(bin)%if RB is (beyond) the last bin, warning(sprintf('RB is hitting the upper bin value\n Maybe increase number of bins')) %#ok<*SPWRN> obj.RB=bin(ind); end obj.RB=(bin(ind)+bin(ind+1))/2; % set the RB to the center of the bin else SortData=unique(sort(obj.Data(:))); obj.RB=SortData(ind); obj.P_RB=FC(ind); end end end end