Avro

Avro 编码的数据可以转换为 Arrow 格式。

Avro 到 Arrow

此示例假设 Avro 模式与 Avro 数据本身分开存储。

import org.apache.arrow.AvroToArrow;
import org.apache.arrow.AvroToArrowConfig;
import org.apache.arrow.AvroToArrowConfigBuilder;
import org.apache.arrow.AvroToArrowVectorIterator;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.avro.Schema;
import org.apache.avro.io.BinaryDecoder;
import org.apache.avro.io.DecoderFactory;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;

try {
    BinaryDecoder decoder = new DecoderFactory().binaryDecoder(new FileInputStream("./thirdpartydeps/avro/users.avro"), null);
    Schema schema = new Schema.Parser().parse(new File("./thirdpartydeps/avro/user.avsc"));
    try (BufferAllocator allocator = new RootAllocator()) {
        AvroToArrowConfig config = new AvroToArrowConfigBuilder(allocator).build();
        try (AvroToArrowVectorIterator avroToArrowVectorIterator = AvroToArrow.avroToArrowIterator(schema, decoder, config)) {
            while(avroToArrowVectorIterator.hasNext()) {
                try (VectorSchemaRoot root = avroToArrowVectorIterator.next()) {
                    System.out.print(root.contentToTSVString());
                }
            }
        }
    }
} catch (Exception e) {
    e.printStackTrace();
}
name    favorite_number    favorite_color
Alyssa    256    null
Ben    7    red