function [cleanvar,Qa]=both_cleanup(fname, varname, settings) % both_cleanup attempts to clean up adv and pcadp stats data with spikes % % this is a two-step process, where step 1 is a simple min-max % excluder, and step 2 uses std to find spikes. It replaces only the % spikes with data from medfilt over 15 points in first part, over 7 points % in the second. % % usage: [cl_dat,Qa]=both_cleanup('8523advAs-cal.nc','vrange',settings); % inputs are the filename, the variable name and a structure of % settings - the fields below are needed: % settings.min=550;settings.max=700; % settings.npts=50, settings.std_threshold=20; settings.nstds=2.4; % outputs are the clean data, and the points where something was replaced % check for arguments if nargin < 3; help mfilename; return; end % set up defaults for settings, if not supplied if exist('settings', 'var'), if ~isfield(settings, 'min'), settings.min=400; end if ~isfield(settings, 'max'), settings.max=500; end if ~isfield(settings,'nstds'), settings.nsds=2.0; end if ~isfield(settings, 'npts'), settings.npts=20; end if ~isfield(settings, 'std_threshold'), settings.std_threshold=10; end end % load the data ncload(fname,'time2') ncload(fname,'time') ncload(fname,varname) eval(['odat=' varname ';']) %find out how big the data is (whether looping is needed) [p,q]=size(odat); % convert to julian days tt=time+(time2/86400000); for jj=1:q % find spiky data, using min and max values to defing the spikes loc=find(odat(:,jj)< settings.min | odat(:,jj) > settings.max); % now do a median filter over a longish number of points od_mf=medfilt(odat(:,jj),15); %finally replace only the spiky locations with the filtered points clvar=odat(:,jj); clvar(loc)=od_mf(loc); cleanvar(:,jj)=clvar; Qa(jj).loc=loc; end figure plot(tt,odat) hold on plot(tt, cleanvar,'c') clear od_mf % the above is all that's needed if the vrange or brange was relatively flat % but if the data slopes or has biggish jumps, it won't do it all % use std as a threshold for doing more checking for jj=1:q if gstd(cleanvar(:,jj)) > settings.std_threshold % subset the data and find spikes by chunk npcs=length(cleanvar)/settings.npts; indx=1; spikes=[]; for ik=1:npcs dat=cleanvar(indx:indx+settings.npts,jj); chunkstd=gstd(dat); chunkmed=gmedian(dat); nbad=find(abs(dat-chunkmed) > settings.nstds*chunkstd); spikes=[spikes; nbad+(indx-1)]; indx=indx+settings.npts; clear nbad chunkstd chunkmed dat end dat=cleanvar(indx:end,jj); chunkstd=gstd(dat); chunkmed=gmedian(dat); nbad=find(abs(dat-chunkmed) > settings.nstds*chunkstd); spikes=[spikes; nbad+(indx-1)]; clear nbad chunkstd chunkmed dat % then do a median filter of the whole thing with 7 points vr_mf=medfilt(cleanvar(:,jj),7); fvr=cleanvar(:,jj); % substitute the median filtered data in place of the spikes fvr(spikes)=vr_mf(spikes); plot(tt,fvr,'r') cleanvar(:,jj)=fvr; Qa(jj).loc=sort([loc; spikes]); end end