001/* 002 * Copyright (c) 2007-2022 The Cascading Authors. All Rights Reserved. 003 * 004 * Project and contact information: https://cascading.wensel.net/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading.nested.json.local; 022 023import java.io.IOException; 024import java.io.InputStream; 025import java.io.LineNumberReader; 026import java.io.OutputStream; 027import java.io.PrintWriter; 028import java.util.Properties; 029 030import cascading.flow.FlowProcess; 031import cascading.nested.json.JSONCoercibleType; 032import cascading.scheme.SinkCall; 033import cascading.scheme.SourceCall; 034import cascading.scheme.local.Compressors; 035import cascading.scheme.local.TextLine; 036import cascading.tuple.Fields; 037import cascading.tuple.Tuple; 038import cascading.tuple.TupleEntry; 039import com.fasterxml.jackson.databind.DeserializationFeature; 040import com.fasterxml.jackson.databind.JsonNode; 041import com.fasterxml.jackson.databind.ObjectMapper; 042 043/** 044 * A JSONTextLine is a type of {@link cascading.scheme.Scheme} for JSON text files. Files are broken into 045 * lines, where each line is a JSON object. Either line-feed or carriage-return are used to signal end of line. 046 * <p> 047 * By default, this scheme returns a {@link Tuple} with one field, "json" with the type {@link JSONCoercibleType}. 048 * <p> 049 * Any {@link Fields} object passed to the constructor will have the JSONCoercibleType.TYPE type applied. 050 * <p> 051 * In order to read or write a compressed files, pass a {@link cascading.scheme.local.CompressorScheme.Compressor} 052 * instance to the appropriate constructors. See {@link Compressors} for provided compression algorithms. 053 * <p> 054 * Note, when supplying a custom {@link ObjectMapper}, the default {@link JSONCoercibleType#TYPE} and ObjectMapper 055 * sets the {@link DeserializationFeature#FAIL_ON_READING_DUP_TREE_KEY} Jackson property. 056 * 057 * @see Compressors 058 */ 059public class JSONTextLine extends TextLine 060 { 061 public static final Fields DEFAULT_FIELDS = new Fields( "json" ).applyTypes( JSONCoercibleType.TYPE ); 062 063 private ObjectMapper mapper = new ObjectMapper(); 064 065 { 066 // prevents json object from being created with duplicate names at the same level 067 mapper.setConfig( mapper.getDeserializationConfig() 068 .with( DeserializationFeature.FAIL_ON_READING_DUP_TREE_KEY ) ); 069 } 070 071 /** 072 * Constructor JSONTextLine creates a new JSONTextLine instance for use with the 073 * {@link cascading.flow.local.LocalFlowConnector} returning results with the default field named "json". 074 */ 075 public JSONTextLine() 076 { 077 this( DEFAULT_FIELDS ); 078 } 079 080 /** 081 * Constructor JSONTextLine creates a new JSONTextLine instance for use with the 082 * {@link cascading.flow.local.LocalFlowConnector}. 083 * 084 * @param fields of Fields 085 */ 086 public JSONTextLine( Fields fields ) 087 { 088 this( fields, DEFAULT_CHARSET ); 089 } 090 091 /** 092 * Constructor JSONTextLine creates a new JSONTextLine instance for use with the 093 * {@link cascading.flow.local.LocalFlowConnector}. 094 * 095 * @param fields of Fields 096 * @param charsetName of String 097 */ 098 public JSONTextLine( Fields fields, String charsetName ) 099 { 100 this( fields, null, charsetName ); 101 } 102 103 /** 104 * Constructor JSONTextLine creates a new JSONTextLine instance for use with the 105 * {@link cascading.flow.local.LocalFlowConnector} returning results with the default field named "json". 106 * 107 * @param compressor of type Compressor, see {@link Compressors} 108 */ 109 public JSONTextLine( Compressor compressor ) 110 { 111 this( DEFAULT_FIELDS, compressor ); 112 } 113 114 /** 115 * Constructor JSONTextLine creates a new JSONTextLine instance for use with the 116 * {@link cascading.flow.local.LocalFlowConnector}. 117 * 118 * @param fields of Fields 119 * @param compressor of type Compressor, see {@link Compressors} 120 */ 121 public JSONTextLine( Fields fields, Compressor compressor ) 122 { 123 this( fields, compressor, DEFAULT_CHARSET ); 124 } 125 126 /** 127 * Constructor JSONTextLine creates a new JSONTextLine instance for use with the 128 * {@link cascading.flow.local.LocalFlowConnector}. 129 * 130 * @param fields of Fields 131 * @param compressor of type Compressor, see {@link Compressors} 132 * @param charsetName of String 133 */ 134 public JSONTextLine( Fields fields, Compressor compressor, String charsetName ) 135 { 136 this( null, fields, compressor, charsetName ); 137 } 138 139 /** 140 * Constructor JSONTextLine creates a new JSONTextLine instance for use with the 141 * {@link cascading.flow.local.LocalFlowConnector}. 142 * 143 * @param mapper of ObjectMapper 144 * @param fields of Fields 145 */ 146 public JSONTextLine( ObjectMapper mapper, Fields fields ) 147 { 148 this( mapper, fields, DEFAULT_CHARSET ); 149 } 150 151 /** 152 * Constructor JSONTextLine creates a new JSONTextLine instance for use with the 153 * {@link cascading.flow.local.LocalFlowConnector}. 154 * 155 * @param mapper of ObjectMapper 156 * @param fields of Fields 157 * @param charsetName of String 158 */ 159 public JSONTextLine( ObjectMapper mapper, Fields fields, String charsetName ) 160 { 161 this( mapper, fields, null, charsetName ); 162 } 163 164 /** 165 * Constructor JSONTextLine creates a new JSONTextLine instance for use with the 166 * {@link cascading.flow.local.LocalFlowConnector} returning results with the default field named "json". 167 * 168 * @param mapper of ObjectMapper 169 * @param compressor of type Compressor, see {@link Compressors} 170 */ 171 public JSONTextLine( ObjectMapper mapper, Compressor compressor ) 172 { 173 this( mapper, DEFAULT_FIELDS, compressor ); 174 } 175 176 /** 177 * Constructor JSONTextLine creates a new JSONTextLine instance for use with the 178 * {@link cascading.flow.local.LocalFlowConnector}. 179 * 180 * @param mapper of ObjectMapper 181 * @param fields of Fields 182 * @param compressor of type Compressor, see {@link Compressors} 183 */ 184 public JSONTextLine( ObjectMapper mapper, Fields fields, Compressor compressor ) 185 { 186 this( mapper, fields, compressor, DEFAULT_CHARSET ); 187 } 188 189 /** 190 * Constructor JSONTextLine creates a new JSONTextLine instance for use with the 191 * {@link cascading.flow.local.LocalFlowConnector}. 192 * 193 * @param mapper of ObjectMapper 194 * @param fields of Fields 195 * @param compressor of type Compressor, see {@link Compressors} 196 * @param charsetName of String 197 */ 198 public JSONTextLine( ObjectMapper mapper, Fields fields, Compressor compressor, String charsetName ) 199 { 200 if( mapper != null ) 201 this.mapper = mapper; 202 203 if( fields == null ) 204 throw new IllegalArgumentException( "fields may not be null" ); 205 206 if( !fields.isDefined() ) 207 throw new IllegalArgumentException( "fields argument must declare a single field" ); 208 209 if( fields.size() != 1 ) 210 throw new IllegalArgumentException( "may only declare a single source/sink field in the fields argument" ); 211 212 fields = fields.hasTypes() ? fields : fields.applyTypes( new JSONCoercibleType( this.mapper ) ); 213 214 setSinkFields( fields ); 215 setSourceFields( fields ); 216 217 setCompressor( compressor ); 218 219 // throws an exception if not found 220 setCharsetName( charsetName ); 221 } 222 223 @Override 224 public boolean source( FlowProcess<? extends Properties> flowProcess, SourceCall<LineNumberReader, InputStream> sourceCall ) throws IOException 225 { 226 String line = sourceCall.getContext().readLine(); 227 228 if( line == null ) 229 return false; 230 231 TupleEntry incomingEntry = sourceCall.getIncomingEntry(); 232 233 JsonNode jsonNode = null; 234 235 if( !line.isEmpty() ) 236 jsonNode = mapper.readTree( line ); 237 238 incomingEntry.setObject( 0, jsonNode ); 239 240 return true; 241 } 242 243 @Override 244 public void sink( FlowProcess<? extends Properties> flowProcess, SinkCall<PrintWriter, OutputStream> sinkCall ) throws IOException 245 { 246 JsonNode jsonNode = (JsonNode) sinkCall.getOutgoingEntry().getTuple().getObject( 0 ); 247 248 if( jsonNode == null ) 249 { 250 sinkCall.getContext().println(); 251 } 252 else 253 { 254 String string = mapper.writeValueAsString( jsonNode ); 255 256 sinkCall.getContext().println( string ); 257 } 258 } 259 260 @Override 261 protected String getBaseFileExtension() 262 { 263 return "json"; 264 } 265 }