001/*
002 * Copyright (c) 2016-2019 Chris K Wensel. All Rights Reserved.
003 *
004 * Project and contact information: http://www.cascading.org/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.nested.core;
022
023import java.util.ArrayList;
024import java.util.List;
025import java.util.regex.Matcher;
026import java.util.regex.Pattern;
027
028import cascading.flow.FlowProcess;
029import cascading.operation.Filter;
030import cascading.operation.FilterCall;
031import cascading.operation.OperationCall;
032import cascading.operation.OperationException;
033import heretical.pointer.path.NestedPointer;
034import org.slf4j.Logger;
035import org.slf4j.LoggerFactory;
036
037/**
038 * Class NestedRegexFilter is the base class for {@link Filter} implementations that want to filter a tuple stream
039 * based on the values in a nested object tree.
040 * <p>
041 * {@link cascading.tuple.Tuple} instances are retained if any of the {@link Pattern} instances match.
042 * <p>
043 * By default a {@code null} value will throw an {@link OperationException} unless {@code failOnMissingNode} is true,
044 * the {@code null} will be converted to an empty string before being passed to a pattern for matching.
045 * <p>
046 * Note that a wildcard or descent pointer can be used which may return multiple elements, each of which will be tested,
047 * and any match will trigger the filter.
048 * <p>
049 * Use the {@link cascading.operation.filter.Not} filter to negate this filter.
050 */
051public class NestedRegexFilter<Node, Results> extends NestedBaseOperation<Node, Results, Matcher[]> implements Filter<Matcher[]>
052  {
053  private static final Logger LOG = LoggerFactory.getLogger( NestedRegexFilter.class );
054
055  private static final String EMPTY = "";
056
057  final NestedPointer<Node, Results> pointer;
058  final List<Pattern> patterns;
059  final boolean failOnMissingNode;
060
061  public NestedRegexFilter( NestedCoercibleType<Node, Results> nestedCoercibleType, String pointer, List<Pattern> patterns, boolean failOnMissingNode )
062    {
063    super( nestedCoercibleType );
064    this.pointer = getNestedPointerCompiler().nested( pointer );
065    this.patterns = new ArrayList<>( patterns );
066    this.failOnMissingNode = failOnMissingNode;
067    }
068
069  @Override
070  public void prepare( FlowProcess flowProcess, OperationCall<Matcher[]> operationCall )
071    {
072    Matcher[] matchers = new Matcher[ patterns.size() ];
073
074    for( int i = 0; i < patterns.size(); i++ )
075      matchers[ i ] = patterns.get( i ).matcher( "" );
076
077    operationCall.setContext( matchers );
078    }
079
080  @Override
081  public boolean isRemove( FlowProcess flowProcess, FilterCall<Matcher[]> filterCall )
082    {
083    Node node = (Node) filterCall.getArguments().getObject( 0, getCoercibleType() );
084    Results results = pointer.allAt( node );
085
086    if( size( results ) == 0 )
087      {
088      if( failOnMissingNode )
089        throw new OperationException( "node missing from json node tree: " + pointer );
090
091      for( Matcher matcher : filterCall.getContext() )
092        {
093        matcher.reset( EMPTY );
094
095        boolean found = matcher.find();
096
097        if( LOG.isDebugEnabled() )
098          LOG.debug( "pointer: {}, pattern: {}, matches: {}, on empty string, no json node found with ", pointer, matcher.pattern().pattern(), found );
099
100        if( found )
101          return false;
102        }
103
104      return true;
105      }
106
107    Iterable<Node> iterable = iterable( results );
108
109    for( Node result : iterable )
110      {
111      String value = getCoercibleType().coerce( result, String.class );
112
113      if( value == null )
114        value = EMPTY;
115
116      for( Matcher matcher : filterCall.getContext() )
117        {
118        matcher.reset( value );
119
120        boolean found = matcher.find();
121
122        if( LOG.isDebugEnabled() )
123          LOG.debug( "pointer: {}, pattern: {}, matches: {}, element: {}", pointer, matcher.pattern().pattern(), found, value );
124
125        if( found )
126          return false;
127        }
128      }
129
130    return true;
131    }
132  }