From ce95974c7bff564aa6f718eb41248f59c9cb0891 Mon Sep 17 00:00:00 2001 From: Remco Renken Date: Sat, 26 Mar 2022 16:10:39 +0100 Subject: [PATCH] Big modification in the way the histogram is calculated. Now first the edges are calculated based on the two paramters: EqualSizeBinFlag and UseHistProxy. Once edges for bins are known (calculated) creating the cdf is the same for all cases. --- CREClass.m | 79 ++++++++++++++++++++++++---------------------- TestCREClass.m | 86 ++++++++++++++++++++++++++++++++++---------------- 2 files changed, 99 insertions(+), 66 deletions(-) diff --git a/CREClass.m b/CREClass.m index f9d4eec..7ac99ea 100644 --- a/CREClass.m +++ b/CREClass.m @@ -4,12 +4,17 @@ classdef CREClass < handle nBin; %number of bins Monitor=true; %set to true to get intermediat results/figures UseHistProxy=true; % set this to get a proxy of the survival function using the hist function + EqualSizeBinFlag=true; % set to true to get equal sized bins, set to false to get equi-content bins, + % if set to false: each bin will contain (aproximately) an equal + % number of pixel values. end - properties %to be made read only + properties (SetAccess = private) %read only CRE RB P_RB + Edges end + properties (Hidden) WaitBarHandle; end @@ -66,41 +71,39 @@ classdef CREClass < handle end %% core function(s) methods - + function CalcEdges(obj) + if isempty(obj.Data) + warning('nothing to do'); + return; + end + if obj.UseHistProxy + if isempty(obj.nBin) + obj.nBin=floor(numel(obj.Data)./10); + end + if obj.EqualSizeBinFlag + [~,obj.Edges]=histcounts(obj.Data(:),obj.nBin); + else + prc=linspace(0,100,obj.nBin+1); % define the edges of the bins as a precentile + obj.Edges=prctile(obj.Data(:),prc); %find the edges of the bins + end + else + if obj.UseHistProxy ; error('You cannot use EqualSizeBinFlag=true and UseHistProxy=false as a combination');end + q=transpose(unique(sort(obj.Data(:)))); + dq=diff(q); + obj.Edges=cat(2,q(1)-eps,q(1:end-1)+dq/2,q(end)+eps); %the eps is a "shortcut" to avoid value=edge cases. Note that eps will increase the size of the bin by a very small amount. Although this is a systemetic bias, its effect will be neglectable + obj.nBin=numel(obj.Edges)-1; + end + end function Calc(obj) if isempty(obj.Data) warning('nothing to do'); return; end - if obj.UseHistProxy - if isempty(obj.nBin) - obj.nBin=floor(numel(obj.Data)./10); - end -%update part below to prefered matlab commands -% [N,bin]=hist(obj.Data(:),obj.nBin); -% N=transpose(N); -% P=N./sum(N); -% CP=cumsum(P); - [CP,bin]=histcounts(obj.Data(:),obj.nBin,'Normalization','cdf'); - FC=1-transpose(CP); - FC(FC<0)=0; - dl=ones(size(FC))./numel(FC); - else - q=sort(unique(obj.Data(:))); - FC=zeros(size(q)); - dl=zeros(size(q)); - for k=1:numel(q) - FC(k)=sum(obj.Data(:)>=q(k)); - if k==1 - dl(k)=(q(k+1)-q(k))/2; - elseif k==numel(q) - dl(k)=(q(k)-q(k-1))/2; - else - dl(k)=(q(k+1)-q(k))/2+(q(k)-q(k-1))/2; - end - end - FC=FC./numel(obj.Data); - end + if isempty(obj.Edges);obj.CalcEdges;end + [CP,bin]=histcounts(obj.Data(:),transpose(obj.Edges),'Normalization','cdf'); + FC=1-transpose(CP); + FC(FC<0)=0; + dl=diff(bin)./sum(diff(bin)); % will capture the case with non-equidistant bins as well as equidistant bins LogFC=log(FC); LogFC(~isfinite(LogFC))=0;%the log of 0 is -inf. however in Fc.*logFc it should end up as 0. to avoid conflicts removing the -inf if any(isnan(FC));error('something went wrong');end %catch a posible error. @@ -117,14 +120,14 @@ classdef CREClass < handle ind=find(exp(-CRE_Med_Inf)>=FC,1,'first'); if obj.UseHistProxy obj.P_RB=FC(ind); - obj.RB=bin(ind); + if ind==numel(bin)%if RB is (beyond) the last bin, + warning(sprintf('RB is hitting the upper bin value\n Maybe increase number of bins')) %#ok<*SPWRN> + obj.RB=bin(ind); + end + obj.RB=(bin(ind)+bin(ind+1))/2; % set the RB to the center of the bin else -% SortData=sort(obj.Data); -% obj.RB=SortData(ind); - % I think the two lines above are a bug. I should not use - % "SortData but rather q itsself. Some numbers may be in - % the data multiple times - obj.RB=q(ind); + SortData=unique(sort(obj.Data(:))); + obj.RB=SortData(ind); obj.P_RB=FC(ind); end end diff --git a/TestCREClass.m b/TestCREClass.m index 12509ae..3abd73c 100644 --- a/TestCREClass.m +++ b/TestCREClass.m @@ -4,33 +4,54 @@ %% set some constants A=randn(100); %% create instance of CRE class -S=CREClass; -%% check histogram vs "new" technique -% NB I come to the conclusion that the new method is not always working. -% Why? -S.Data=A;% set gaussian data -S.nBin=(numel(S.Data)); % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos -% use histogram -S.UseHistProxy=true; -S.Calc; -S -% Use my aproximation for the histogram -% only do this if the number of data points is not too big. -S.UseHistProxy=false; -S.Calc; -S +S1=CREClass; +%% Calculate by setting labda equal to each of the (unique) values in the data. +S1.UseHistProxy=false; +S1.EqualSizeBinFlag=false; +S1.Data=A;% set gaussian data +S1.Calc; +% use histogram aproximation with unequal bin sizes in the histogram +S2=CREClass; +S2.UseHistProxy=true; +S2.EqualSizeBinFlag=false; +S2.Data=A;% set gaussian data +S2.nBin=numel(A); % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos +S2.Calc + +% use histogram aproximation with equal bin sizes in the histogram +S3=CREClass; +S3.UseHistProxy=true; +S3.EqualSizeBinFlag=false; +S3.Data=A;% set gaussian data +S3.nBin=numel(A); % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos +S3.Calc + +%Check recalculation of edges +S4=CREClass; +S4.UseHistProxy=true; +S4.EqualSizeBinFlag=true; +S4.Data=A;% set gaussian data +S4.nBin=numel(A); % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos +S4.Calc +disp(S4) +S4.nBin=numel(A)/10; +S4.CalcEdges ; % force a recalculation of the edges +S4.Calc; +disp(S4) + +return; %% Show effect of scaling or ofsett the data % Scale S.Data=100*A; S.UseHistProxy=true; S.Calc; -S +disp(S); % Offset S.Data=A+10; S.UseHistProxy=true; S.Calc; -S +disp(S); %% %% Test CCRE class @@ -39,21 +60,30 @@ figure(2);clf; figure(3);clf;hold on; A=randn(100); B=A; -B(:)=A(randperm(numel(A))) -for k=-1:0.1:1 +B(:)=A(randperm(numel(A))); +w=-1:0.1:1; +out(numel(w),1)=struct('w',[],'CRE',[],'CCRE',[],'R',[]); +for k=1:numel(w) + out(k).w=w(k); CS=CCREClass; - CS.Data=1*(1-abs(k))*A+k*B; + CS.EquidistantBinFlag=false; + CS.Data=1*(1-abs(w(k)))*A+w(k)*B; CS.nBin=50; CS.DataRef=B; CS.nBinRef=500; CS.Calc; - figure(1); - plot(k,CS.CCRE,'o') - plot(k,CS.CRE,'x'); + out(k).CRE=CS.CRE; + out(k).CCRE=CS.CCRE; + out(k).R=CS.CCRE./CS.CRE; figure(2); - scatter(CS.DataRef(:),CS.Data(:)); - figure(3); - plot(k,CS.CCRE./CS.CRE,'d'); + title(sprintf('w: %d',w(k))); + scatter(CS.DataRef(:),CS.Data(:)); + snapnow end -CS -snapnow \ No newline at end of file +disp(CS); + figure(1); + clf;hold on; + plot([out(:).w],[out(:).CCRE],'o'); + plot([out(:).w],[out(:).CRE],'x'); + figure(3);clf + plot([out(:).w],[out(:).R],'d') \ No newline at end of file