1 year ago

#60247

test-img

databius

scio cannot write to BYTES field in BigQuery after upgrading to version 0.10.0

I am using scio to write data to BigQuery using Scala and am facing strange error after upgrading to version 0.10.0.

Here is my simple example:

package com.databius.demo

import com.google.api.services.bigquery.model.{TableFieldSchema, TableSchema}
import com.spotify.scio.bigquery._
import com.spotify.scio.{ContextAndArgs, bigquery}
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO

import scala.jdk.CollectionConverters._

object ScioDemo {
  def main(args: Array[String]): Unit = {
    val (sc, _) = ContextAndArgs(args)

    val schema = new TableSchema().setFields(
      List(
        new TableFieldSchema()
          .setName("blob")
          .setType("BYTES")
          .setMode("NULLABLE")
      ).asJava
    )

    val blob = "test".getBytes
    val tr   = bigquery.TableRow("blob" -> blob)

    sc.parallelize(Seq(tr))
      .saveAsCustomOutput(
        "custom bigquery IO",
        BigQueryIO
          .writeTableRows()
          .to("demo:demo_ds.demo_tb")
          .withSchema(schema)
          .withCreateDisposition(CREATE_IF_NEEDED)
          .withWriteDisposition(WRITE_TRUNCATE)
      )

    sc.run()
  }
}

The example works well with scio version 0.9.2 (build.gradle):

plugins {
    id "java"
    id "scala"
}

def scioVersion = "0.9.2"

repositories {
    mavenCentral()
}

dependencies {
    implementation("org.scala-lang:scala-library:2.13.8")

    implementation("com.spotify:scio-core_2.13:$scioVersion")
    implementation("com.spotify:scio-bigquery_2.13:$scioVersion")

    implementation('com.google.cloud:google-cloud-bigquery:2.6.2')
}

When I upgrade to version 0.10.0. I've been following Migration Guide from scio team.

plugins {
    id "java"
    id "scala"
}

def scioVersion = "0.10.0"

repositories {
    mavenCentral()
}

dependencies {
    implementation("org.scala-lang:scala-library:2.13.8")

    implementation("com.spotify:scio-core_2.13:$scioVersion")
    implementation("com.spotify:scio-google-cloud-platform_2.13:$scioVersion")

    implementation('com.google.cloud:google-cloud-bigquery:2.6.2')
}

I got the error:

"message" : "Error while reading data, error message: JSON parsing error in row starting at position 0: Array specified for non-repeated field: blob.",

I also tried with the latest version (0.11.3) but still the same error. Do you know how to solve the issue?

scala

google-cloud-platform

google-bigquery

apache-beam

spotify-scio

0 Answers

Your Answer

Accepted video resources