SAS Functions Coding Project (solution) - Data Cleansing and Error Report

[Note: you are recommended to copy and paste the code below onto your SAS Studio for better viewing]

Model Answer

Data Error;
Set Customer;
Length Variable Value $30 Comment $500; 

** Range Check: Income **;
If Income<0 or Income>500000 then do;
Variable = "Income";
Value = put(Income, best.);
Comment = "The Income (" || compress(put(Income, best.)) || ") is out of the logical range.";
Output;
End;

** Range Check: Spend **;
If Spend<0 or Spend>3*Income then do;
Variable = "Spend";
Value = put(Spend, best.);
Comment = "The total spending (" || compress(put(Spend, best.)) || ") is out of the logical range.";
Output;
End;

** Range Check: Age **;
Age = floor((DOS-DOB)/365.25);
If Age<0 or Age>140 then do;
Variable = "Age";
Value = put(Age, best.);
Comment = "The age (" || compress(put(age, best.)) || ") is out of the logical range.";
Output;
End;

** Invalid Character Check: CustID **;
If length(custid)^=8 then do;
Variable = "CustID";
Value = CustID;
Comment = "The CustID (" || compress(CustID) || ") is not 8-character long.";
Output;
End;

If indexc(lowcase(custid), "!@#$%^&*()<>?{}:abcdefghijklmnopqrstuvwxyz")^=0 then do;
Variable = "CustID";
Value = CustID;
Comment = "The CustID (" || compress(CustID) || ") contains invalid character(s).";
Output;
End;

** Invalid Character Check: FIRST/LAST **;
If indexc(first, "!@#$%^&*()<>?{}:1234567890")^=0 then do;
Variable = "First";
Value = First;
Comment = "The First Name (" || compress(First) || ") contains invalid character(s).";
Output;
End;

** Invalid Character Check: FIRST/LAST **;
If indexc(last, "!@#$%^&*()<>?{}:1234567890")^=0 then do;
Variable = "Last";
Value = Last;
Comment = "The Last Name (" || compress(Last) || ") contains invalid character(s).";
Output;
End;

** Invalid Character Check: Occup **;
If indexc(Occup, "!@#$%^&*()<>?{}:1234567890")^=0 then do;
Variable = "Occup";
Value = Occup;
Comment = "The Occup (" || compress(Occup) || ") contains invalid character(s).";
Output;
End;

** Category Value Check: Gender **;
If Gender not in ("Male" "Female") then do;
Variable = "Gender";
Value = Gender;
Comment = "The Gender (" || compress(Gender) || ") can only be Male or Female.";
Output;
End;

** Category Value Check: EDU **;
If Edu not in (1, 2, 3, 4) then do;
Variable = "Edu";
Value = Edu;
Comment = "The Edu (" || compress(put(edu, edu.)) || ") contains invalid results.";
Output;
End;

** Category Value Check: Status **;
If Status not in ("Married" "Single" "Divorced") then do;
Variable = "Status";
Value = Status;
Comment = "The Status (" || compress(status) || ") can only be 'Married', 'Single' or 'Divorced'.";
Output;
End;

Keep CustID Variable Value Comment;
Run;

Proc Export Data=Error
OutFile='/folders/myfolders/Error Report.xls'
Replace
Dbms=xls;
Run;