001/* 002 * Copyright (c) 2007-2022 The Cascading Authors. All Rights Reserved. 003 * 004 * Project and contact information: https://cascading.wensel.net/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading.nested.core; 022 023import java.util.Arrays; 024import java.util.Collection; 025import java.util.Map; 026import java.util.stream.Stream; 027import java.util.stream.StreamSupport; 028 029import cascading.flow.FlowProcess; 030import cascading.operation.Function; 031import cascading.operation.FunctionCall; 032import cascading.operation.OperationCall; 033import cascading.operation.OperationException; 034import cascading.operation.SerFunction; 035import cascading.tuple.Fields; 036import cascading.tuple.Tuple; 037import cascading.util.LazyIterable; 038import cascading.util.Pair; 039import heretical.pointer.path.NestedPointer; 040import heretical.pointer.path.NestedPointerCompiler; 041 042/** 043 * Class NestedGetAllAggregateFunction is the base class for {@link Function} implementations that when given 044 * the root of a collection of container nodes, need to aggregate the values of a common child element. 045 * <p> 046 * For example, given an array of objects that represent a person, the function can calculate the average 047 * age of the people listed in the array if every object has the same property name for {@code age}. 048 * <p> 049 * The {@code stringRootPointer} value must point to a container node with one or more child elements or objects. 050 * <p> 051 * The {@code pointerMap} maps a child node or property found in the child object to a {@link NestedAggregate} 052 * implementation that implements the required aggregation algorithm. 053 * <p> 054 * If {@code failOnMissingNode} is {@code true} and the root pointer is empty or the field pointer returns a 055 * {@code null} value, the operation will fail. 056 * <p> 057 * See {@link cascading.nested.core.aggregate.SimpleNestedAggregate} for a convenient base implementation. 058 */ 059public class NestedGetAllAggregateFunction<Node, Result> extends NestedGetFunction<Node, Result> 060 { 061 protected final NestedPointer<Node, Result> rootPointer; 062 protected final SerFunction<Stream<Node>, Stream<Node>> streamWrapper; 063 protected final NestedAggregate<Node, ?>[] nestedAggregates; 064 065 /** 066 * Constructor NestedGetAllAggregateFunction creates a new NestedGetAllAggregateFunction instance. 067 * 068 * @param nestedCoercibleType of type NestedCoercibleType 069 * @param stringRootPointer of type String 070 * @param failOnMissingNode of type boolean 071 * @param pointerMap of type Map 072 */ 073 public NestedGetAllAggregateFunction( NestedCoercibleType<Node, Result> nestedCoercibleType, String stringRootPointer, boolean failOnMissingNode, Map<String, NestedAggregate<Node, ?>> pointerMap ) 074 { 075 this( nestedCoercibleType, stringRootPointer, null, failOnMissingNode, pointerMap ); 076 } 077 078 public NestedGetAllAggregateFunction( NestedCoercibleType<Node, Result> nestedCoercibleType, String stringRootPointer, SerFunction<Stream<Node>, Stream<Node>> streamWrapper, boolean failOnMissingNode, Map<String, NestedAggregate<Node, ?>> pointerMap ) 079 { 080 this( nestedCoercibleType, stringRootPointer, streamWrapper, declared( pointerMap.values() ), failOnMissingNode, asArray( pointerMap.keySet() ), asArrayOf( pointerMap.values() ) ); 081 } 082 083 public NestedGetAllAggregateFunction( NestedCoercibleType<Node, Result> nestedCoercibleType, String stringRootPointer, boolean failOnMissingNode, String[] stringPointers, NestedAggregate<Node, ?>[] nestedAggregates ) 084 { 085 this( nestedCoercibleType, stringRootPointer, null, declared( Arrays.asList( nestedAggregates ) ), failOnMissingNode, stringPointers, nestedAggregates ); 086 } 087 088 public NestedGetAllAggregateFunction( NestedCoercibleType<Node, Result> nestedCoercibleType, String stringRootPointer, Fields fieldDeclaration, boolean failOnMissingNode, String[] stringPointers, NestedAggregate<Node, ?>[] nestedAggregates ) 089 { 090 this( nestedCoercibleType, stringRootPointer, null, fieldDeclaration, failOnMissingNode, stringPointers, nestedAggregates ); 091 } 092 093 public NestedGetAllAggregateFunction( NestedCoercibleType<Node, Result> nestedCoercibleType, String stringRootPointer, SerFunction<Stream<Node>, Stream<Node>> streamWrapper, boolean failOnMissingNode, String[] stringPointers, NestedAggregate<Node, ?>[] nestedAggregates ) 094 { 095 this( nestedCoercibleType, stringRootPointer, streamWrapper, declared( Arrays.asList( nestedAggregates ) ), failOnMissingNode, stringPointers, nestedAggregates ); 096 } 097 098 public NestedGetAllAggregateFunction( NestedCoercibleType<Node, Result> nestedCoercibleType, String stringRootPointer, SerFunction<Stream<Node>, Stream<Node>> streamWrapper, Fields fieldDeclaration, boolean failOnMissingNode, String[] stringPointers, NestedAggregate<Node, ?>[] nestedAggregates ) 099 { 100 super( nestedCoercibleType, fieldDeclaration, failOnMissingNode, stringPointers ); 101 this.streamWrapper = streamWrapper == null ? s -> s : streamWrapper; 102 103 NestedPointerCompiler<Node, Result> compiler = getNestedPointerCompiler(); 104 105 this.rootPointer = compiler.nested( stringRootPointer ); 106 107 this.nestedAggregates = nestedAggregates; 108 } 109 110 protected static <Node> Fields declared( Collection<NestedAggregate<Node, ?>> nestedAggregates ) 111 { 112 return nestedAggregates.stream().map( NestedAggregate::getFieldDeclaration ).reduce( Fields.NONE, Fields::append ); 113 } 114 115 protected static <Node> NestedAggregate[] asArrayOf( Collection<NestedAggregate<Node, ?>> nestedAggregates ) 116 { 117 return nestedAggregates.toArray( new NestedAggregate[ 0 ] ); 118 } 119 120 @Override 121 protected void verify( String[] stringPointers ) 122 { 123 // do nothing as each NestedAggregateFunction may return more than one field 124 } 125 126 @Override 127 public void prepare( FlowProcess flowProcess, OperationCall<Tuple> operationCall ) 128 { 129 Pair<NestedAggregate<Node, Object>, Object>[] pairs = new Pair[ nestedAggregates.length ]; 130 131 for( int i = 0; i < nestedAggregates.length; i++ ) 132 pairs[ i ] = new Pair( nestedAggregates[ i ], nestedAggregates[ i ].createContext( nestedCoercibleType ) ); 133 134 LazyIterable<Pair<NestedAggregate<Node, Object>, Object>, Tuple> tupleIterator = new LazyIterable<Pair<NestedAggregate<Node, Object>, Object>, Tuple>( false, pairs ) 135 { 136 @Override 137 protected Tuple convert( Pair<NestedAggregate<Node, Object>, Object> next ) 138 { 139 return next.getLhs().complete( next.getRhs() ); 140 } 141 }; 142 143 operationCall.setContext( new Tuple( Tuple.size( getFieldDeclaration().size() ), pairs, tupleIterator ) ); 144 } 145 146 @Override 147 public void operate( FlowProcess flowProcess, FunctionCall<Tuple> functionCall ) 148 { 149 Node argument = (Node) functionCall.getArguments().getObject( 0, getCoercibleType() ); 150 151 Result result = rootPointer.allAt( argument ); 152 153 if( failOnMissingNode && getNestedPointerCompiler().size( result ) == 0 ) 154 throw new OperationException( "nodes missing from json node tree at: " + rootPointer ); 155 156 Tuple resultTuple = (Tuple) functionCall.getContext().getObject( 0 ); 157 Pair<NestedAggregate<Node, Object>, Object>[] pairs = (Pair<NestedAggregate<Node, Object>, Object>[]) functionCall.getContext().getObject( 1 ); 158 LazyIterable<Pair<NestedAggregate<Node, Object>, Object>, Tuple> tupleIterator = (LazyIterable<Pair<NestedAggregate<Node, Object>, Object>, Tuple>) functionCall.getContext().getObject( 2 ); 159 160 for( Pair<NestedAggregate<Node, Object>, Object> pair : pairs ) 161 pair.setRhs( pair.getLhs().resetContext( pair.getRhs() ) ); 162 163 stream( result ).forEach( node -> aggregateNode( pairs, node ) ); 164 165 resultTuple.setAll( tupleIterator ); 166 167 functionCall.getOutputCollector().add( resultTuple ); 168 } 169 170 /** 171 * Applies the given stream wrapper function 172 * 173 * @param allAt 174 * @return 175 */ 176 protected Stream<Node> stream( Result allAt ) 177 { 178 return streamWrapper.apply( StreamSupport.stream( iterable( allAt ).spliterator(), false ) ); 179 } 180 181 protected void aggregateNode( Pair<NestedAggregate<Node, Object>, Object>[] pairs, Node node ) 182 { 183 extractResult( ( i, value ) -> setInto( pairs, i, value ), node ); 184 } 185 186 protected <Context> void setInto( Pair<NestedAggregate<Node, Context>, Context>[] pairs, int i, Node result ) 187 { 188 NestedAggregate<Node, Context> nestedAggregate = pairs[ i ].getLhs(); 189 Context context = pairs[ i ].getRhs(); 190 191 nestedAggregate.aggregate( context, result ); 192 } 193 }