You are viewing an old version of this page. View the current version.
Compare with Current
View Page History
Version 1
Next »
This example illustrates how a CSV file on HDFS can be converted to Avro in Standalone mode. The final output is written back to HDFS in this example.
job.name=CSVToAvroQuickStart
fs.uri=hdfs://localhost:9000
converter.classes=org.apache.gobblin.converter.csv.CsvToJsonConverter,org.apache.gobblin.converter.avro.JsonIntermediateToAvroConverter
writer.builder.class=org.apache.gobblin.writer.AvroDataWriterBuilder
source.class=org.apache.gobblin.source.extractor.filebased.TextFileBasedSource
source.filebased.data.directory=${fs.uri}/source
source.schema=[{"columnName":"ID","comment":"","isNullable":"true","dataType":{"type":"String"}},{"columnName":"NAME","comment":"","isNullable":"true","dataType":{"type":"String"}}]
source.skip.first.record=false
extract.table.name=CsvToAvro
extract.namespace=org.apache.gobblin.example
extract.table.type=APPEND_ONLY
converter.csv.to.json.delimiter=","
writer.output.format=AVRO
writer.destination.type=HDFS
writer.fs.uri=${fs.uri}
writer.staging.dir=/writer-staging
writer.output.dir=/output
state.store.dir=/state
state.store.fs.uri=hdfs://localhost:9000
state.store.dir=/gobblin-kafka/state-store
data.publisher.final.dir=/final
|
---|