001/* 002 * Copyright (c) 2016-2019 Chris K Wensel. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading.nested.core; 022 023import java.util.ArrayList; 024import java.util.List; 025import java.util.regex.Matcher; 026import java.util.regex.Pattern; 027 028import cascading.flow.FlowProcess; 029import cascading.operation.Filter; 030import cascading.operation.FilterCall; 031import cascading.operation.OperationCall; 032import cascading.operation.OperationException; 033import heretical.pointer.path.NestedPointer; 034import org.slf4j.Logger; 035import org.slf4j.LoggerFactory; 036 037/** 038 * Class NestedRegexFilter is the base class for {@link Filter} implementations that want to filter a tuple stream 039 * based on the values in a nested object tree. 040 * <p> 041 * {@link cascading.tuple.Tuple} instances are retained if any of the {@link Pattern} instances match. 042 * <p> 043 * By default a {@code null} value will throw an {@link OperationException} unless {@code failOnMissingNode} is true, 044 * the {@code null} will be converted to an empty string before being passed to a pattern for matching. 045 * <p> 046 * Note that a wildcard or descent pointer can be used which may return multiple elements, each of which will be tested, 047 * and any match will trigger the filter. 048 * <p> 049 * Use the {@link cascading.operation.filter.Not} filter to negate this filter. 050 */ 051public class NestedRegexFilter<Node, Results> extends NestedBaseOperation<Node, Results, Matcher[]> implements Filter<Matcher[]> 052 { 053 private static final Logger LOG = LoggerFactory.getLogger( NestedRegexFilter.class ); 054 055 private static final String EMPTY = ""; 056 057 final NestedPointer<Node, Results> pointer; 058 final List<Pattern> patterns; 059 final boolean failOnMissingNode; 060 061 public NestedRegexFilter( NestedCoercibleType<Node, Results> nestedCoercibleType, String pointer, List<Pattern> patterns, boolean failOnMissingNode ) 062 { 063 super( nestedCoercibleType ); 064 this.pointer = getNestedPointerCompiler().nested( pointer ); 065 this.patterns = new ArrayList<>( patterns ); 066 this.failOnMissingNode = failOnMissingNode; 067 } 068 069 @Override 070 public void prepare( FlowProcess flowProcess, OperationCall<Matcher[]> operationCall ) 071 { 072 Matcher[] matchers = new Matcher[ patterns.size() ]; 073 074 for( int i = 0; i < patterns.size(); i++ ) 075 matchers[ i ] = patterns.get( i ).matcher( "" ); 076 077 operationCall.setContext( matchers ); 078 } 079 080 @Override 081 public boolean isRemove( FlowProcess flowProcess, FilterCall<Matcher[]> filterCall ) 082 { 083 Node node = (Node) filterCall.getArguments().getObject( 0, getCoercibleType() ); 084 Results results = pointer.allAt( node ); 085 086 if( size( results ) == 0 ) 087 { 088 if( failOnMissingNode ) 089 throw new OperationException( "node missing from json node tree: " + pointer ); 090 091 for( Matcher matcher : filterCall.getContext() ) 092 { 093 matcher.reset( EMPTY ); 094 095 boolean found = matcher.find(); 096 097 if( LOG.isDebugEnabled() ) 098 LOG.debug( "pointer: {}, pattern: {}, matches: {}, on empty string, no json node found with ", pointer, matcher.pattern().pattern(), found ); 099 100 if( found ) 101 return false; 102 } 103 104 return true; 105 } 106 107 Iterable<Node> iterable = iterable( results ); 108 109 for( Node result : iterable ) 110 { 111 String value = getCoercibleType().coerce( result, String.class ); 112 113 if( value == null ) 114 value = EMPTY; 115 116 for( Matcher matcher : filterCall.getContext() ) 117 { 118 matcher.reset( value ); 119 120 boolean found = matcher.find(); 121 122 if( LOG.isDebugEnabled() ) 123 LOG.debug( "pointer: {}, pattern: {}, matches: {}, element: {}", pointer, matcher.pattern().pattern(), found, value ); 124 125 if( found ) 126 return false; 127 } 128 } 129 130 return true; 131 } 132 }