Hi, MY dataframe has records with below conditions but dataframe never gets filtered. I am always getting total count of original records even after using below filter function. Am i doing anything wrong here
Note: I tied OR and || too def filterDatapointRawCountsDataFrame(datapoint_df: DataFrame): DataFrame = { //println("................."+datapoint_df(Constants.Datapoint.Vin).cast(String)) val x = datapoint_df.filter( datapoint_df(Constants.Datapoint.Vin).contains("VIN") or datapoint_df(Constants.Datapoint.Vin).contains("XXX") or datapoint_df(Constants.Datapoint.Vin).contains("Ÿ") or datapoint_df(Constants.Datapoint.Vin).contains("0123456789ABCDEFG") or datapoint_df(Constants.Datapoint.Vin).contains("") or datapoint_df(Constants.Datapoint.Vin).contains("XXX") or datapoint_df(Constants.Datapoint.Vin).contains("00000000") or datapoint_df(Constants.Datapoint.Vin).contains("99999999") or datapoint_df(Constants.Datapoint.Vin).contains("") or datapoint_df(Constants.Datapoint.Vin).contains("@") or datapoint_df(Constants.Datapoint.Vin).contains("?") or datapoint_df(Constants.Datapoint.Vin).contains("*") or datapoint_df(Constants.Datapoint.Vin).contains(" ") or datapoint_df(Constants.Datapoint.Vin).contains("FFF") or datapoint_df(Constants.Datapoint.Vin).contains("INTAKE") or datapoint_df(Constants.Datapoint.Vin).contains("SERVICES") or datapoint_df(Constants.Datapoint.Vin).contains("1111111111") or datapoint_df(Constants.Datapoint.Vin).contains("JJJJJJJJJJ") or datapoint_df(Constants.Datapoint.Vin).contains("AAAAAAAAAA") or datapoint_df(Constants.Datapoint.Vin).contains("BBBBBBBBBB") or datapoint_df(Constants.Datapoint.Vin).contains("TTTTTTTTTT") or datapoint_df(Constants.Datapoint.Vin).contains("NUMBER") or datapoint_df(Constants.Datapoint.Vin).contains("NOTSUREATTHISTIME") or datapoint_df(Constants.Datapoint.Vin).contains("1800CALLJOESNAPPY") or datapoint_df(Constants.Datapoint.Vin).contains("34567890") or datapoint_df(Constants.Datapoint.Vin).contains("012345") or datapoint_df(Constants.Datapoint.Vin).contains("JALES") or datapoint_df(Constants.Datapoint.Vin).contains("SATAN") or datapoint_df(Constants.Datapoint.Vin).contains("SIMULAT") ) x; } Thanks, Asmath On Tue, Feb 28, 2017 at 10:49 AM, KhajaAsmath Mohammed < mdkhajaasm...@gmail.com> wrote: > Hi, > > Could anyone please provide me your suggestions on how to resolve the > issue that I am facing with not contains code on dataframe column. > > Here is the code. My dataframe is not getting filtered with below > conditions. I even tried not and ! on Column. any suggestions? > > def filterDatapointRawCountsDF(vin: Column): Column = > > { > > import org.apache.spark.sql.functions.not > val filterColumn: Column = { > > not(vin.contains("VIN")) || > not(vin.contains("Ÿ")) || > not(vin.contains("0123456789ABCDEFG")) > > } > > filterColumn; > > } > > > } > >