Skip to content

Deserializing failure: hasNext always true (continue looping) #28

@keenangebze

Description

@keenangebze

Hi,

I have this problem when deserializing from avro file, which returned duplicate values and also always continue running (not stopping).

regards

main.go

package main

import (
	"bufio"
	"encoding/json"
	"fmt"
	"gopkg.in/avro.v0"
	"log"
	"os"
)

type ViewJSON struct {
	Timestamp string `json:"timestamp"`
	UserID    int    `json:"user_id"`
	ProductID int    `json:"product_id"`
}

type ViewAVRO struct {
	Timestamp string
	UserID    int32
	ProductID int32
}

func main() {
	view2avro()
	avro2view()
}

func avro2view() {
	specificReader, err := avro.NewDataFileReader("view.avro", avro.NewSpecificDatumReader())

	if err != nil {
		log.Fatalln("Cannot create data file reader.", err)
	}

	for {
		obj := new(ViewAVRO)
		hasNext, err := specificReader.Next(obj)
		if err != nil {
			log.Println("err", err)
		}
		fmt.Println(obj)
		if !hasNext {
			break
		}
	}
}

func view2avro() {
	in, err := os.Open("view.txt")
	if err != nil {
		log.Fatalln("Cannot open file:", err)
	}
	defer in.Close()

	out, err := os.Create("view.avro")
	if err != nil {
		log.Fatalln("Cannot open file:", err)
	}
	defer out.Close()

	schema, err := avro.ParseSchemaFile("view.avsc")
	if err != nil {
		log.Fatalln("Cannot read schema file:", err)
	}

	dWriter := avro.NewSpecificDatumWriter()
	dWriter.SetSchema(schema)
	dfw, err := avro.NewDataFileWriter(out, schema, dWriter)
	if err != nil {
		log.Fatalln("Cannot read schema file:", err)
	}
	// defer dfw.Close()

	scanner := bufio.NewScanner(in)
	for scanner.Scan() {
		viewRecord := ViewJSON{}
		json.Unmarshal(scanner.Bytes(), &viewRecord)
		// log.Println(viewRecord)
		dfw.Write(&ViewAVRO{
			Timestamp: viewRecord.Timestamp,
			UserID:    int32(viewRecord.UserID),
			ProductID: int32(viewRecord.ProductID),
		}) // must be in int32 (avro "typed"), and then pass it as reference; if not, its skipped.
		// dfw.Flush()
	}
	dfw.Flush()
	if err = dfw.Close(); err != nil {
		log.Fatal(err)
	}
}

view.txt

{"user_id":1,"product_id":100,"timestamp":"2019-03-10T23:59:58.953775421+07:00"}
{"user_id":2,"product_id":101,"timestamp":"2019-03-10T23:59:59.026548024+07:00"}
{"user_id":3,"product_id":102,"timestamp":"2019-03-10T23:59:58.953455719+07:00"}
{"user_id":4,"product_id":501,"timestamp":"2019-03-10T23:59:59.034088025+07:00"}
{"user_id":5,"product_id":30,"timestamp":"2019-03-10T23:59:58.941772276+07:00"}

view.avsc

{
  "namespace": "shop",
  "type": "record",
  "name": "View",
  "fields": [
    {"name": "timestamp", "type": "string"},
    {"name": "userID", "type": "int"},
    {"name": "productID", "type": "int"}
  ]
}

Output:

go build main.go
./main
&{2019-03-10T23:59:58.953775421+07:00 1 100}
&{2019-03-10T23:59:59.026548024+07:00 2 101}
&{2019-03-10T23:59:58.953455719+07:00 3 102}
&{2019-03-10T23:59:59.034088025+07:00 4 501}
&{2019-03-10T23:59:58.941772276+07:00 5 30}
&{2019-03-10T23:59:58.953775421+07:00 1 100}
&{2019-03-10T23:59:59.026548024+07:00 2 101}
&{2019-03-10T23:59:58.953455719+07:00 3 102}
&{2019-03-10T23:59:59.034088025+07:00 4 501}
&{2019-03-10T23:59:58.941772276+07:00 5 30}
&{ 0 0}
&{ 0 0}
&{ 0 0}
&{ 0 0}
&{ 0 0}
&{ 0 0}
&{ 0 0}
&{ 0 0}
&{ 0 0}
&{ 0 0}
&{ 0 0}
&{ 0 0}
&{ 0 0}
...
...
..
..

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions