from google.cloud import bigquery
from google.oauth2 import service_account


project_id = 'hayleyhell'
key = './key.json'

credentials = service_account.Credentials.from_service_account_file(key)
client = bigquery.Client(project=project_id, credentials=credentials)

org = client.query('select 1').to_dataframe()

print(org)

   f0_
0    1


query = '''
  select * from temp.add_to_cart
'''

org = client.query(query).to_dataframe()

print(org)

          date  user_count
0   2021-01-18         109
1   2021-01-17          88
2   2021-01-16         116
3   2021-01-19         167
4   2021-01-11         152
5   2021-01-13         168
6   2021-01-12         163
7   2021-01-15         158
8   2021-01-14         193
9   2021-01-23         117
10  2021-01-22         197
11  2021-01-21         202
12  2021-01-20         216


# Google Colab에서는 Credential을 사용하지 않아도 된다. 
# 서비스 계정 없이 내 계정으로 사용자 인증 할 수 있다. 
from google.colab import auth 
auth.authenticate_user()


sql = """
  select 
    store_number,
    any_value(store_location) store_location,
    item_number,
    any_value(item_description) item_description,
    date_trunc(date, MONTH) period, 
    round(sum(sale_dollars), 2) sale_dollars,
    round(sum(volume_sold_liters), 2) volume_soold_liters
  from 
    `bigquery-public-data.iowa_liquor_sales.sales`
  where date between DATE(2019, 1, 1) and DATE(2019, 12, 31)
  group by store_number, item_number, period
"""

df = client.query(sql).to_dataframe()
df.head()


df[df['item_description']=='CROWN ROYAL'].groupby('period')['volume_soold_liters']

period
2019-01-01    23480.89
2019-02-01    23868.52
2019-03-01    29978.41
2019-04-01    26213.71
2019-05-01    35768.19
2019-06-01    27146.86
2019-07-01    32014.75
2019-08-01    24656.22
2019-09-01    41482.36
2019-10-01    38161.44
2019-11-01    35143.01
2019-12-01    36001.87
Name: volume_soold_liters, dtype: float64


df[df['item_description']=='CROWN ROYAL'].groupby('period')['volume_soold_liters'].sum().plot.bar()

<matplotlib.axes._subplots.AxesSubplot at 0x7fd43df79070>


# reset_index는 UNNEST와 유사하다 
dagg = df.groupby('item_description').sum().nlargest(5, 'volume_soold_liters').reset_index()
dagg


# 판매량 및 리터별 상위 5개 
dfm = df[df['item_description'].isin(dagg['item_description'])].groupby(['item_description', 'period']).sum()
di = dfm.reset_index()
di.pivot(index='period', columns='item_description', values='volume_soold_liters').plot.bar(figsize=(12,5))

<matplotlib.axes._subplots.AxesSubplot at 0x7fd43d9993a0>


%%bigquery --project hayleyhell 

select * from temp.add_to_cart


import pandas as pd 

project_id = 'hayleyhell'
org = pd.read_gbq(query, project_id=project_id) 
org

	store_number	store_location	item_number	item_description	period	sale_dollars	volume_soold_liters
0	3707	POINT(-91.649625 42.035157)	37996	SMIRNOFF 80PRF	2019-02-01	297.12	18.0
1	3707	POINT(-91.649625 42.035157)	38176	TITOS HANDMADE VODKA	2019-02-01	694.08	36.0
2	2500	None	40126	BURNETTS MANGO PINEAPPLE	2019-02-01	241.92	27.0
3	2501	POINT(-93.619455 42.022848)	43051	BACARDI DRAGON BERRY	2019-02-01	297.36	18.0
4	2561	POINT(-93.644943 41.542748)	64866	FIREBALL CINNAMON WHISKEY	2019-02-01	1134.00	63.0

	item_description	sale_dollars	volume_soold_liters
0	BLACK VELVET	13014788.20	1379947.87
1	TITOS HANDMADE VODKA	17674224.67	976326.54
2	HAWKEYE VODKA	5659257.73	888218.60
3	FIREBALL CINNAMON WHISKEY	7710667.15	484763.90
4	FIVE O'CLOCK VODKA	2728524.74	420271.47

티스토리

BigQuery 데이터 사이언스

BigQuery 데이터 사이언스

데이터 추출 방법¶

Google API Client¶

Magic Commands¶

pandas-GBQ Library¶

	date	user_count
0	2021-01-18	109
1	2021-01-17	88
2	2021-01-16	116
3	2021-01-19	167
4	2021-01-11	152
5	2021-01-13	168
6	2021-01-12	163
7	2021-01-15	158
8	2021-01-14	193
9	2021-01-23	117
10	2021-01-22	197
11	2021-01-21	202
12	2021-01-20	216