001/*
002 * Copyright (c) 2007-2022 The Cascading Authors. All Rights Reserved.
003 *
004 * Project and contact information: https://cascading.wensel.net/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.nested.json.local;
022
023import java.io.IOException;
024import java.io.InputStream;
025import java.io.LineNumberReader;
026import java.io.OutputStream;
027import java.io.PrintWriter;
028import java.util.Properties;
029
030import cascading.flow.FlowProcess;
031import cascading.nested.json.JSONCoercibleType;
032import cascading.scheme.SinkCall;
033import cascading.scheme.SourceCall;
034import cascading.scheme.local.Compressors;
035import cascading.scheme.local.TextLine;
036import cascading.tuple.Fields;
037import cascading.tuple.Tuple;
038import cascading.tuple.TupleEntry;
039import com.fasterxml.jackson.databind.DeserializationFeature;
040import com.fasterxml.jackson.databind.JsonNode;
041import com.fasterxml.jackson.databind.ObjectMapper;
042
043/**
044 * A JSONTextLine is a type of {@link cascading.scheme.Scheme} for JSON text files. Files are broken into
045 * lines, where each line is a JSON object. Either line-feed or carriage-return are used to signal end of line.
046 * <p>
047 * By default, this scheme returns a {@link Tuple} with one field, "json" with the type {@link JSONCoercibleType}.
048 * <p>
049 * Any {@link Fields} object passed to the constructor will have the JSONCoercibleType.TYPE type applied.
050 * <p>
051 * In order to read or write a compressed files, pass a {@link cascading.scheme.local.CompressorScheme.Compressor}
052 * instance to the appropriate constructors. See {@link Compressors} for provided compression algorithms.
053 * <p>
054 * Note, when supplying a custom {@link ObjectMapper}, the default {@link JSONCoercibleType#TYPE} and ObjectMapper
055 * sets the {@link DeserializationFeature#FAIL_ON_READING_DUP_TREE_KEY} Jackson property.
056 *
057 * @see Compressors
058 */
059public class JSONTextLine extends TextLine
060  {
061  public static final Fields DEFAULT_FIELDS = new Fields( "json" ).applyTypes( JSONCoercibleType.TYPE );
062
063  private ObjectMapper mapper = new ObjectMapper();
064
065  {
066  // prevents json object from being created with duplicate names at the same level
067  mapper.setConfig( mapper.getDeserializationConfig()
068    .with( DeserializationFeature.FAIL_ON_READING_DUP_TREE_KEY ) );
069  }
070
071  /**
072   * Constructor JSONTextLine creates a new JSONTextLine instance for use with the
073   * {@link cascading.flow.local.LocalFlowConnector} returning results with the default field named "json".
074   */
075  public JSONTextLine()
076    {
077    this( DEFAULT_FIELDS );
078    }
079
080  /**
081   * Constructor JSONTextLine creates a new JSONTextLine instance for use with the
082   * {@link cascading.flow.local.LocalFlowConnector}.
083   *
084   * @param fields of Fields
085   */
086  public JSONTextLine( Fields fields )
087    {
088    this( fields, DEFAULT_CHARSET );
089    }
090
091  /**
092   * Constructor JSONTextLine creates a new JSONTextLine instance for use with the
093   * {@link cascading.flow.local.LocalFlowConnector}.
094   *
095   * @param fields      of Fields
096   * @param charsetName of String
097   */
098  public JSONTextLine( Fields fields, String charsetName )
099    {
100    this( fields, null, charsetName );
101    }
102
103  /**
104   * Constructor JSONTextLine creates a new JSONTextLine instance for use with the
105   * {@link cascading.flow.local.LocalFlowConnector} returning results with the default field named "json".
106   *
107   * @param compressor of type Compressor, see {@link Compressors}
108   */
109  public JSONTextLine( Compressor compressor )
110    {
111    this( DEFAULT_FIELDS, compressor );
112    }
113
114  /**
115   * Constructor JSONTextLine creates a new JSONTextLine instance for use with the
116   * {@link cascading.flow.local.LocalFlowConnector}.
117   *
118   * @param fields     of Fields
119   * @param compressor of type Compressor, see {@link Compressors}
120   */
121  public JSONTextLine( Fields fields, Compressor compressor )
122    {
123    this( fields, compressor, DEFAULT_CHARSET );
124    }
125
126  /**
127   * Constructor JSONTextLine creates a new JSONTextLine instance for use with the
128   * {@link cascading.flow.local.LocalFlowConnector}.
129   *
130   * @param fields      of Fields
131   * @param compressor  of type Compressor, see {@link Compressors}
132   * @param charsetName of String
133   */
134  public JSONTextLine( Fields fields, Compressor compressor, String charsetName )
135    {
136    this( null, fields, compressor, charsetName );
137    }
138
139  /**
140   * Constructor JSONTextLine creates a new JSONTextLine instance for use with the
141   * {@link cascading.flow.local.LocalFlowConnector}.
142   *
143   * @param mapper of ObjectMapper
144   * @param fields of Fields
145   */
146  public JSONTextLine( ObjectMapper mapper, Fields fields )
147    {
148    this( mapper, fields, DEFAULT_CHARSET );
149    }
150
151  /**
152   * Constructor JSONTextLine creates a new JSONTextLine instance for use with the
153   * {@link cascading.flow.local.LocalFlowConnector}.
154   *
155   * @param mapper      of ObjectMapper
156   * @param fields      of Fields
157   * @param charsetName of String
158   */
159  public JSONTextLine( ObjectMapper mapper, Fields fields, String charsetName )
160    {
161    this( mapper, fields, null, charsetName );
162    }
163
164  /**
165   * Constructor JSONTextLine creates a new JSONTextLine instance for use with the
166   * {@link cascading.flow.local.LocalFlowConnector} returning results with the default field named "json".
167   *
168   * @param mapper     of ObjectMapper
169   * @param compressor of type Compressor, see {@link Compressors}
170   */
171  public JSONTextLine( ObjectMapper mapper, Compressor compressor )
172    {
173    this( mapper, DEFAULT_FIELDS, compressor );
174    }
175
176  /**
177   * Constructor JSONTextLine creates a new JSONTextLine instance for use with the
178   * {@link cascading.flow.local.LocalFlowConnector}.
179   *
180   * @param mapper     of ObjectMapper
181   * @param fields     of Fields
182   * @param compressor of type Compressor, see {@link Compressors}
183   */
184  public JSONTextLine( ObjectMapper mapper, Fields fields, Compressor compressor )
185    {
186    this( mapper, fields, compressor, DEFAULT_CHARSET );
187    }
188
189  /**
190   * Constructor JSONTextLine creates a new JSONTextLine instance for use with the
191   * {@link cascading.flow.local.LocalFlowConnector}.
192   *
193   * @param mapper      of ObjectMapper
194   * @param fields      of Fields
195   * @param compressor  of type Compressor, see {@link Compressors}
196   * @param charsetName of String
197   */
198  public JSONTextLine( ObjectMapper mapper, Fields fields, Compressor compressor, String charsetName )
199    {
200    if( mapper != null )
201      this.mapper = mapper;
202
203    if( fields == null )
204      throw new IllegalArgumentException( "fields may not be null" );
205
206    if( !fields.isDefined() )
207      throw new IllegalArgumentException( "fields argument must declare a single field" );
208
209    if( fields.size() != 1 )
210      throw new IllegalArgumentException( "may only declare a single source/sink field in the fields argument" );
211
212    fields = fields.hasTypes() ? fields : fields.applyTypes( new JSONCoercibleType( this.mapper ) );
213
214    setSinkFields( fields );
215    setSourceFields( fields );
216
217    setCompressor( compressor );
218
219    // throws an exception if not found
220    setCharsetName( charsetName );
221    }
222
223  @Override
224  public boolean source( FlowProcess<? extends Properties> flowProcess, SourceCall<LineNumberReader, InputStream> sourceCall ) throws IOException
225    {
226    String line = sourceCall.getContext().readLine();
227
228    if( line == null )
229      return false;
230
231    TupleEntry incomingEntry = sourceCall.getIncomingEntry();
232
233    JsonNode jsonNode = null;
234
235    if( !line.isEmpty() )
236      jsonNode = mapper.readTree( line );
237
238    incomingEntry.setObject( 0, jsonNode );
239
240    return true;
241    }
242
243  @Override
244  public void sink( FlowProcess<? extends Properties> flowProcess, SinkCall<PrintWriter, OutputStream> sinkCall ) throws IOException
245    {
246    JsonNode jsonNode = (JsonNode) sinkCall.getOutgoingEntry().getTuple().getObject( 0 );
247
248    if( jsonNode == null )
249      {
250      sinkCall.getContext().println();
251      }
252    else
253      {
254      String string = mapper.writeValueAsString( jsonNode );
255
256      sinkCall.getContext().println( string );
257      }
258    }
259
260  @Override
261  protected String getBaseFileExtension()
262    {
263    return "json";
264    }
265  }