% deglitch1vector - Identify and remove suspicious points from ADV data % for use by other functions % % [gdata,Qa]=deglitch1vector(bdata,settings); % % Inputs % bdata = the dirty ADV data set, a structure defined as % settings = the fine controls % settings.samplerate = 1; % Hz, required! there must be a sample rate! % settings.nsd = 2.8; % num std. deviations that defines outliers % settings.ndt = 6; % cutoff frequency % settings.verbose = 1; %turn on statistical output % settings.threshold = 7; % a threshold for recomputing std on a different size chunk of the data. % % deglitch actually replaces bad points with estimated good values % gdata = the fixed data set % Qa = the feedback on what happened % Qa.nbad = the number of bad points % Qa.nfixed = the number of endpoints fixed % Qa.delmean = the ratio of the means before and after points were replaced % Qa.delvar = the ratio of the variance before and after points were replaced % Qa.stdr = standard deviation of the residuals % % written by Chris Sherwood and Ellyn Montgomery % rewritten for a single vector by Marinna Martini % first version to Marinna by Ellyn % % mod to not handle NaN's cleanly 4/06 etm function [gdata,Qa] = deglitch1vector(bdata,settings); % check inputs if exist('settings', 'var'), if isfield(settings,'nsd'), nsd = settings.nsd; end if isfield(settings,'ndt'), ndt = settings.ndt; end if isfield(settings,'verbose'), verbose = settings.verbose; end if isfield(settings, 'samplerate'), samplerate = settings.samplerate; end if isfield(settings,'threshold'), thold = settings.threshold; end end %set reasonable values for most stuff. If there's a big storm in the %dataset, you may want up increase threshold if exist('nsd')~=1, nsd = 2.8; end if exist('ndt')~=1, ndt = 6; end if exist('thold')~=1, thold = 7; end if exist('verbose')~=1, verbose = 0; end if exist('samplerate','var')~=1, disp(sprintf('deglitch1vector: no sample rate provided, assuming 1 Hz')); samplerate = 1; end warning off MATLAB:polyfit:RepeatedPointsOrRescale; Qa = []; % number of points nfixed.dg = 0; nfixed.filt=0; dt = 1 ./ (samplerate); % need a specific shape for this to work [r, c] = size(bdata); if r == 1, bdata = bdata'; end % if there's nan in the data pass the burst back as-is with some quality words % etm 4/26/06 if(find(isnan(bdata)==1)) disp 'has NaNs, cannot process further' gdata=bdata; xx=find(isnan(bdata)==1); nbad=length([xx(1):1:xx(end)]); Qa.delmean=gmean(gdata(1:xx(1)-1)); Qa.delvar=gstd(gdata(1:xx(1)-1)); Qa.nbad = nbad; Qa.nfixed_dg = 0; Qa.nfixed_filt = 0; Qa.stdr = 99; % some big obvious number else % for cases where there's lots of garbage- work progressively % through the data until stdr < 10 stval=1; all_bad=0; bdataa=lpfilt(bdata(stval:end),dt,1/(ndt*dt)); resid = bdata(stval:end)-bdataa; stdr=gstd(resid); bn=length(resid); % plot(bdata); keyboard % this part tries to get rid of real garbage at the beginning % of a burst % 7 was chosen because it made "reasonable" decisions- 10 might be better if stdr > thold % recompute stdr on tenths of the burst to see if it's all noisy or not lchunk=floor(length(bdata)*.1); % is this way better for identifying noisy sections nn=floor(length(bdata)/10); nmat=reshape(bdata(1:nn*10),nn,10); sect_std=gstd(nmat); if (isempty(find(sect_std <= thold))) disp 'high std in all sections- declaring whole burst bad' % this declares the data is bad, but doesn't replace anything with % NaN- should it? Qa says burst is bad, but the data returned is % what cane in. stval=length(bdata); stdbeg=sect_std(1); resida=resid; else while (stdr > thold && stval < length(bdata)-lchunk) stval=stval+lchunk; bdataf=lpfilt(bdata(stval:end),dt,1/(ndt*dt)); resid = bdata(stval:end)-bdataf; stdr=gstd(resid); end % double check that it's not an isolated huge spike at the beginning if (stval < length(bdata)-lchunk) bdatab=lpfilt(bdata(1:stval),dt,1/(ndt*dt)); resida=bdata(1:stval)-bdatab; % see if it's a small isolated chunk bn=find(abs(resida-gmean(resida)) < 100); if (length(bn) > ceil(length(resida)*.05)) stdbeg=gstd(resida(bn)); else stdbeg=gstd(resida); end else stdbeg=stdr; resida=resid; end % end if (stval + lchunk... end % end if isempty( % this condition may need to be changed! if stdbeg < settings.nsd*4 | (length(bn) > ceil(length(resida)*.05)) resid = bdata(1:end)-bdataa; n = length(bdata); X = (1:n)'; gdata = bdata; elseif stval < length(bdata)-lchunk nanfill=ones(1,stval-1)*NaN; gdata = bdata(stval:end); bdata = bdata(stval:end); X = (1:length(bdata))'; n = length(bdata); else nanfill=ones(1,length(bdata))*NaN; all_bad=1; end else % stdr <= thold n = length(bdata); X = (1:n)'; gdata = bdata; end if( all_bad ~=1) if verbose, fprintf(1,'std devs of residuals %g \n',stdr); end thold=stdr*nsd; bad = abs(resid) > thold; rep = find(bad); nrep = find(~bad); jj=1; domore_flg=0; if(any(rep)), if(rep(1)==1); rep=rep(2:end); end for kk=1:length(rep), % if kk == length(rep); keyboard; end if(rep(kk)>(7) & rep(kk) length(notfx)*4); % nreps=length(lx)+1; if isempty(lx) lenspk=length(notfx); if lenspk<5 loind=notfx(1)-10; hiind=notfx(end)+10; rel_bdind=[10:(hiind-loind)-9]; if (loind < 1); loind = 1; end if (hiind > length(bdata)); hiind = length(bdata); rel_bdind=[10:hiind-loind]; end else loind=notfx(1)-lenspk*2; hiind=notfx(end)+lenspk*2; if (loind < 1); loind = 1; end if (hiind > length(bdata)); hiind = length(bdata); rel_bdind=[10:hiind]; end rel_bdind=[lenspk*2+1:lenspk*2+1+lenspk*2]; % remove any indices that are beyond the ends of the data llo=find (rel_bdind < 1); lhi=find(rel_bdind > length(bdata)); if (~isempty(llo)) rel_bdind(llo)=[]; end if (~isempty(lhi)) rel_bdind(lhi)=[]; end end % call ellyn's filter to fix the points for each veolcity component if (~isempty(loind) & ~isempty(hiind)) if (loind >10) gdata(loind:hiind)=fix_filt(gdata(loind:hiind),rel_bdind); nfixed.filt=length([loind:hiind]); else if(loind < 1); loind=1; end gdata(loind:notfx(end))=ones((notfx(end)-loind)+1,1)*NaN; end end % use 717_burst 1548 to work out close multiple spikes else nreps=length(lx)+1; n=1; for kk=1:nreps if( kk==nreps) stp=length(notfx); else stp=lx(kk); end lenspk=notfx(stp)-notfx(n); if lenspk<5 loind=notfx(n)-10; hiind=notfx(stp)+10; rel_bdind=[10:(hiind-loind)-9]; if (loind < 1); loind = 1; end if (hiind > length(bdata)); hiind = length(bdata); rel_bdind=[10:hiind-loind]; end else loind=notfx(n)-lenspk*2; hiind=notfx(stp)+lenspk*2; rel_bdind=[lenspk*2+1:lenspk*2+1+lenspk*2]; if (loind < 1); loind = 1; end if (hiind > length(bdata)); hiind = length(bdata); rel_bdind=[10:hiind-loind]; end llo=find (rel_bdind < 1); lhi=find(rel_bdind > length(bdata)); if (~isempty(llo)) rel_bdind(llo)=[]; end if (~isempty(lhi)) rel_bdind(lhi)=[]; end end % call ellyn's filter to fix the points for each veolcity component if (~isempty(loind) & ~isempty(hiind)) disp(['filtering ' num2str(loind) ' : ' num2str(hiind)]) gdata(loind:hiind)=fix_filt(gdata(loind:hiind),rel_bdind); nfixed.filt=nfixed.filt+length([loind:hiind]); n=stp+1; end end end end if(exist('nanfill')) gdata=[nanfill'; gdata]; bdata=[nanfill'; bdata]; end % end ETM mods nbad = sum(bad) + stval; if verbose, fprintf(1,'Number of points flagged: %d\n',nbad ); end if verbose, fprintf(1,'Number. of endpoints fixed (by dglitch, filter): %d %d\n',nfixed.dg, nfixed.filt ); end if verbose, fprintf(1,'Statistics: before (after replacement) [after removal].\n'); end m = [ gmean([ bdata, gdata]), gmean( gdata(nrep) ) ]; if verbose, fprintf(1,'gdata Mean: %9f (%9f) [%9f]; Pct. orig: (%f) [%f]\n',... m, 100*[m(2)/m(1), m(3)/m(1)] ); end vv = [ gstd([ bdata, gdata]), gstd( gdata(nrep) ) ].^2; if verbose, fprintf(1,'gdata Variance: %9f (%9f) [%9f]; Pct. orig: (%f) [%f]\n',... vv, 100*[vv(2)/vv(1), vv(3)/vv(1)] ); end mx = [max([ bdata, gdata ]), max(bdata(nrep))]; mn = [min([ bdata, gdata ]), min(bdata(nrep))]; if verbose, fprintf(1,'gdata Max: %9f (%9f) [%9f]; Pct. orig: (%f) [%f]\n',... mx, 100*[mx(2)/mx(1), mx(3)/mx(1)] ); end if verbose, fprintf(1,'gdata Min: %9f (%9f) [%9f]; Pct. orig: (%f) [%f]\n',... mn, 100*[mn(2)/mn(1), mn(3)/mn(1)] ); end if verbose, fprintf('\n'); end else %all data bad - skipped the processing gdata=[nanfill']; bdata=[nanfill']; nbad=length(gdata); m(1)=0; vv(1)=0; stdr=NaN; if verbose, fprintf(1,'whole burst is bad! %d points removed\n',nbad ); end end % Qa.delmean = zeros(3,1); % Qa.delvar = zeros(3,1); % Qa.delmean(1)=m(3)/m(1); % Qa.delvar(1) = vv(3)/vv(1); % Qa.nbad = nbad; % Qa.nfixed = nfixed; % Qa.stdr = stdr; if m(1)==0, Qa.delmean=NaN; else Qa.delmean=m(3)/m(1); end if vv(1)==0, Qa.delvar = NaN; else Qa.delvar = vv(3)/vv(1); end Qa.nbad = nbad; Qa.nfixed_dg = nfixed.dg; Qa.nfixed_filt = nfixed.filt; Qa.stdr = stdr; end return % end of main function deglitch1vector function x=fix_filt(x, spks) % % catches a range of data detected in deglitch and prettifies by employing % lowpass and high pass filters % make sure the indices don't overflow! if (~isempty(find(spks > length(x)))) spks(find(spks > length(x)))=[]; end % now set up to filter n=ones(length(x),1); n(spks)=ones(length(spks),1)*NaN; xmin=floor(min(x.*n)); xmax=ceil(max(x.*n)); ibad=find(xxmax); igood=find(x>=xmin & x<=xmax); npts=length(spks); if(mod(npts,2) == 0) npts=npts-1; end if(length(ibad>0)); if(ibad(1)==1); x(1)=x(min(igood)); ibad=find(xxmax); igood=find(x>=xmin & x<=xmax); end if(ibad(end)==length(x)); x(end)=x(max(igood)); ibad=find(xxmax); igood=find(x>=xmin & x<=xmax); end for i=1:length(ibad); ipast=find(igoodibad(i)); past=x(igood(ipast)); future=x(igood(ifut)); npast=igood(ipast); nfuture=igood(ifut); beg=length(past)-npts+1; if(beg<1) beg=1; end past=past(beg:end); npast=npast(beg:end); thend=npts; if(length(future) 3*smad); % Guess the outliers, 3 times std here ny(ii)=ym(ii); % replace outliers with interpolated values. %% plot(1:N,y,1:N,ny,ii,ny(ii),'.g'); ny=reshape(ny,Ny,My); return