blob: cd60afd2624c6ec3837d815157f95c5240bd69fe [file] [log] [blame]
# Copyright 2024 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Code for interacting with BigQuery."""
from typing import Generator
# pylint: disable=import-error
from google.cloud import bigquery
from google.cloud import bigquery_storage
import pandas
# pylint: enable=import-error
class Querier:
def __init__(self, billing_project: str):
self._billing_project = billing_project
def GetSeriesForQuery(self,
query: str) -> Generator[pandas.Series, None, None]:
"""Generates results for |query|.
Args:
query: The BigQuery query to run.
Yields:
A pandas.Series object for each row returned by the query. Columns can be
accessed directly as attributes.
"""
client = bigquery.Client(project=self._billing_project)
job = client.query(query)
row_iterator = job.result()
# Using a Dataframe iterator instead of directly using |row_iterator| allows
# us to use the BigQuery Storage API, which results in ~10x faster query
# result retrieval at the cost of a few more dependencies.
dataframe_iterator = row_iterator.to_dataframe_iterable(
bigquery_storage.BigQueryReadClient())
for df in dataframe_iterator:
for _, row in df.iterrows():
yield row