001/*
002 * Copyright (c) 2007-2022 The Cascading Authors. All Rights Reserved.
003 *
004 * Project and contact information: https://cascading.wensel.net/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.nested.core;
022
023import java.util.Arrays;
024import java.util.Collection;
025import java.util.Map;
026import java.util.stream.Stream;
027import java.util.stream.StreamSupport;
028
029import cascading.flow.FlowProcess;
030import cascading.operation.Function;
031import cascading.operation.FunctionCall;
032import cascading.operation.OperationCall;
033import cascading.operation.OperationException;
034import cascading.operation.SerFunction;
035import cascading.tuple.Fields;
036import cascading.tuple.Tuple;
037import cascading.util.LazyIterable;
038import cascading.util.Pair;
039import heretical.pointer.path.NestedPointer;
040import heretical.pointer.path.NestedPointerCompiler;
041
042/**
043 * Class NestedGetAllAggregateFunction is the base class for {@link Function} implementations that when given
044 * the root of a collection of container nodes, need to aggregate the values of a common child element.
045 * <p>
046 * For example, given an array of objects that represent a person, the function can calculate the average
047 * age of the people listed in the array if every object has the same property name for {@code age}.
048 * <p>
049 * The {@code stringRootPointer} value must point to a container node with one or more child elements or objects.
050 * <p>
051 * The {@code pointerMap} maps a child node or property found in the child object to a {@link NestedAggregate}
052 * implementation that implements the required aggregation algorithm.
053 * <p>
054 * If {@code failOnMissingNode} is {@code true} and the root pointer is empty or the field pointer returns a
055 * {@code null} value, the operation will fail.
056 * <p>
057 * See {@link cascading.nested.core.aggregate.SimpleNestedAggregate} for a convenient base implementation.
058 */
059public class NestedGetAllAggregateFunction<Node, Result> extends NestedGetFunction<Node, Result>
060  {
061  protected final NestedPointer<Node, Result> rootPointer;
062  protected final SerFunction<Stream<Node>, Stream<Node>> streamWrapper;
063  protected final NestedAggregate<Node, ?>[] nestedAggregates;
064
065  /**
066   * Constructor NestedGetAllAggregateFunction creates a new NestedGetAllAggregateFunction instance.
067   *
068   * @param nestedCoercibleType of type NestedCoercibleType
069   * @param stringRootPointer   of type String
070   * @param failOnMissingNode   of type boolean
071   * @param pointerMap          of type Map
072   */
073  public NestedGetAllAggregateFunction( NestedCoercibleType<Node, Result> nestedCoercibleType, String stringRootPointer, boolean failOnMissingNode, Map<String, NestedAggregate<Node, ?>> pointerMap )
074    {
075    this( nestedCoercibleType, stringRootPointer, null, failOnMissingNode, pointerMap );
076    }
077
078  public NestedGetAllAggregateFunction( NestedCoercibleType<Node, Result> nestedCoercibleType, String stringRootPointer, SerFunction<Stream<Node>, Stream<Node>> streamWrapper, boolean failOnMissingNode, Map<String, NestedAggregate<Node, ?>> pointerMap )
079    {
080    this( nestedCoercibleType, stringRootPointer, streamWrapper, declared( pointerMap.values() ), failOnMissingNode, asArray( pointerMap.keySet() ), asArrayOf( pointerMap.values() ) );
081    }
082
083  public NestedGetAllAggregateFunction( NestedCoercibleType<Node, Result> nestedCoercibleType, String stringRootPointer, boolean failOnMissingNode, String[] stringPointers, NestedAggregate<Node, ?>[] nestedAggregates )
084    {
085    this( nestedCoercibleType, stringRootPointer, null, declared( Arrays.asList( nestedAggregates ) ), failOnMissingNode, stringPointers, nestedAggregates );
086    }
087
088  public NestedGetAllAggregateFunction( NestedCoercibleType<Node, Result> nestedCoercibleType, String stringRootPointer, Fields fieldDeclaration, boolean failOnMissingNode, String[] stringPointers, NestedAggregate<Node, ?>[] nestedAggregates )
089    {
090    this( nestedCoercibleType, stringRootPointer, null, fieldDeclaration, failOnMissingNode, stringPointers, nestedAggregates );
091    }
092
093  public NestedGetAllAggregateFunction( NestedCoercibleType<Node, Result> nestedCoercibleType, String stringRootPointer, SerFunction<Stream<Node>, Stream<Node>> streamWrapper, boolean failOnMissingNode, String[] stringPointers, NestedAggregate<Node, ?>[] nestedAggregates )
094    {
095    this( nestedCoercibleType, stringRootPointer, streamWrapper, declared( Arrays.asList( nestedAggregates ) ), failOnMissingNode, stringPointers, nestedAggregates );
096    }
097
098  public NestedGetAllAggregateFunction( NestedCoercibleType<Node, Result> nestedCoercibleType, String stringRootPointer, SerFunction<Stream<Node>, Stream<Node>> streamWrapper, Fields fieldDeclaration, boolean failOnMissingNode, String[] stringPointers, NestedAggregate<Node, ?>[] nestedAggregates )
099    {
100    super( nestedCoercibleType, fieldDeclaration, failOnMissingNode, stringPointers );
101    this.streamWrapper = streamWrapper == null ? s -> s : streamWrapper;
102
103    NestedPointerCompiler<Node, Result> compiler = getNestedPointerCompiler();
104
105    this.rootPointer = compiler.nested( stringRootPointer );
106
107    this.nestedAggregates = nestedAggregates;
108    }
109
110  protected static <Node> Fields declared( Collection<NestedAggregate<Node, ?>> nestedAggregates )
111    {
112    return nestedAggregates.stream().map( NestedAggregate::getFieldDeclaration ).reduce( Fields.NONE, Fields::append );
113    }
114
115  protected static <Node> NestedAggregate[] asArrayOf( Collection<NestedAggregate<Node, ?>> nestedAggregates )
116    {
117    return nestedAggregates.toArray( new NestedAggregate[ 0 ] );
118    }
119
120  @Override
121  protected void verify( String[] stringPointers )
122    {
123    // do nothing as each NestedAggregateFunction may return more than one field
124    }
125
126  @Override
127  public void prepare( FlowProcess flowProcess, OperationCall<Tuple> operationCall )
128    {
129    Pair<NestedAggregate<Node, Object>, Object>[] pairs = new Pair[ nestedAggregates.length ];
130
131    for( int i = 0; i < nestedAggregates.length; i++ )
132      pairs[ i ] = new Pair( nestedAggregates[ i ], nestedAggregates[ i ].createContext( nestedCoercibleType ) );
133
134    LazyIterable<Pair<NestedAggregate<Node, Object>, Object>, Tuple> tupleIterator = new LazyIterable<Pair<NestedAggregate<Node, Object>, Object>, Tuple>( false, pairs )
135      {
136      @Override
137      protected Tuple convert( Pair<NestedAggregate<Node, Object>, Object> next )
138        {
139        return next.getLhs().complete( next.getRhs() );
140        }
141      };
142
143    operationCall.setContext( new Tuple( Tuple.size( getFieldDeclaration().size() ), pairs, tupleIterator ) );
144    }
145
146  @Override
147  public void operate( FlowProcess flowProcess, FunctionCall<Tuple> functionCall )
148    {
149    Node argument = (Node) functionCall.getArguments().getObject( 0, getCoercibleType() );
150
151    Result result = rootPointer.allAt( argument );
152
153    if( failOnMissingNode && getNestedPointerCompiler().size( result ) == 0 )
154      throw new OperationException( "nodes missing from json node tree at: " + rootPointer );
155
156    Tuple resultTuple = (Tuple) functionCall.getContext().getObject( 0 );
157    Pair<NestedAggregate<Node, Object>, Object>[] pairs = (Pair<NestedAggregate<Node, Object>, Object>[]) functionCall.getContext().getObject( 1 );
158    LazyIterable<Pair<NestedAggregate<Node, Object>, Object>, Tuple> tupleIterator = (LazyIterable<Pair<NestedAggregate<Node, Object>, Object>, Tuple>) functionCall.getContext().getObject( 2 );
159
160    for( Pair<NestedAggregate<Node, Object>, Object> pair : pairs )
161      pair.setRhs( pair.getLhs().resetContext( pair.getRhs() ) );
162
163    stream( result ).forEach( node -> aggregateNode( pairs, node ) );
164
165    resultTuple.setAll( tupleIterator );
166
167    functionCall.getOutputCollector().add( resultTuple );
168    }
169
170  /**
171   * Applies the given stream wrapper function
172   *
173   * @param allAt
174   * @return
175   */
176  protected Stream<Node> stream( Result allAt )
177    {
178    return streamWrapper.apply( StreamSupport.stream( iterable( allAt ).spliterator(), false ) );
179    }
180
181  protected void aggregateNode( Pair<NestedAggregate<Node, Object>, Object>[] pairs, Node node )
182    {
183    extractResult( ( i, value ) -> setInto( pairs, i, value ), node );
184    }
185
186  protected <Context> void setInto( Pair<NestedAggregate<Node, Context>, Context>[] pairs, int i, Node result )
187    {
188    NestedAggregate<Node, Context> nestedAggregate = pairs[ i ].getLhs();
189    Context context = pairs[ i ].getRhs();
190
191    nestedAggregate.aggregate( context, result );
192    }
193  }