from google.cloud import bigquery
from google.oauth2 import service_account


project_id = 'hayleyhell'
key = './key.json'

credentials = service_account.Credentials.from_service_account_file(key)
client = bigquery.Client(project=project_id, credentials=credentials)

org = client.query('select 1').to_dataframe()

print(org)

   f0_
0    1


query = '''
  select * from temp.add_to_cart
'''

org = client.query(query).to_dataframe()

print(org)

          date  user_count
0   2021-01-18         109
1   2021-01-17          88
2   2021-01-16         116
3   2021-01-19         167
4   2021-01-11         152
5   2021-01-13         168
6   2021-01-12         163
7   2021-01-15         158
8   2021-01-14         193
9   2021-01-23         117
10  2021-01-22         197
11  2021-01-21         202
12  2021-01-20         216


# Google Colab에서는 Credential을 사용하지 않아도 된다. 
# 서비스 계정 없이 내 계정으로 사용자 인증 할 수 있다. 
from google.colab import auth 
auth.authenticate_user()


sql = """
  select 
    store_number,
    any_value(store_location) store_location,
    item_number,
    any_value(item_description) item_description,
    date_trunc(date, MONTH) period, 
    round(sum(sale_dollars), 2) sale_dollars,
    round(sum(volume_sold_liters), 2) volume_soold_liters
  from 
    `bigquery-public-data.iowa_liquor_sales.sales`
  where date between DATE(2019, 1, 1) and DATE(2019, 12, 31)
  group by store_number, item_number, period
"""

df = client.query(sql).to_dataframe()
df.head()


df[df['item_description']=='CROWN ROYAL'].groupby('period')['volume_soold_liters']

period
2019-01-01    23480.89
2019-02-01    23868.52
2019-03-01    29978.41
2019-04-01    26213.71
2019-05-01    35768.19
2019-06-01    27146.86
2019-07-01    32014.75
2019-08-01    24656.22
2019-09-01    41482.36
2019-10-01    38161.44
2019-11-01    35143.01
2019-12-01    36001.87
Name: volume_soold_liters, dtype: float64


df[df['item_description']=='CROWN ROYAL'].groupby('period')['volume_soold_liters'].sum().plot.bar()

<matplotlib.axes._subplots.AxesSubplot at 0x7fd43df79070>


# reset_index는 UNNEST와 유사하다 
dagg = df.groupby('item_description').sum().nlargest(5, 'volume_soold_liters').reset_index()
dagg


# 판매량 및 리터별 상위 5개 
dfm = df[df['item_description'].isin(dagg['item_description'])].groupby(['item_description', 'period']).sum()
di = dfm.reset_index()
di.pivot(index='period', columns='item_description', values='volume_soold_liters').plot.bar(figsize=(12,5))

<matplotlib.axes._subplots.AxesSubplot at 0x7fd43d9993a0>


%%bigquery --project hayleyhell 

select * from temp.add_to_cart


import pandas as pd 

project_id = 'hayleyhell'
org = pd.read_gbq(query, project_id=project_id) 
org

Cloud SQL (0)	2022.12.13
Google Compute Engine (0)	2022.12.13
BigQuery 데이터 사이언스 (0)	2022.12.08
BigQuery 추가 기능 (0)	2022.12.07
BigQuery 스크립팅 (0)	2022.12.07

마리아DB ODBC 등록하기 (0)	2023.03.02
Google Compute Engine (0)	2022.12.13
BigQuery 데이터 사이언스 (0)	2022.12.08
BigQuery 추가 기능 (0)	2022.12.07
BigQuery 스크립팅 (0)	2022.12.07

마리아DB ODBC 등록하기 (0)	2023.03.02
Cloud SQL (0)	2022.12.13
BigQuery 데이터 사이언스 (0)	2022.12.08
BigQuery 추가 기능 (0)	2022.12.07
BigQuery 스크립팅 (0)	2022.12.07

	store_number	store_location	item_number	item_description	period	sale_dollars	volume_soold_liters
0	3707	POINT(-91.649625 42.035157)	37996	SMIRNOFF 80PRF	2019-02-01	297.12	18.0
1	3707	POINT(-91.649625 42.035157)	38176	TITOS HANDMADE VODKA	2019-02-01	694.08	36.0
2	2500	None	40126	BURNETTS MANGO PINEAPPLE	2019-02-01	241.92	27.0
3	2501	POINT(-93.619455 42.022848)	43051	BACARDI DRAGON BERRY	2019-02-01	297.36	18.0
4	2561	POINT(-93.644943 41.542748)	64866	FIREBALL CINNAMON WHISKEY	2019-02-01	1134.00	63.0

	item_description	sale_dollars	volume_soold_liters
0	BLACK VELVET	13014788.20	1379947.87
1	TITOS HANDMADE VODKA	17674224.67	976326.54
2	HAWKEYE VODKA	5659257.73	888218.60
3	FIREBALL CINNAMON WHISKEY	7710667.15	484763.90
4	FIVE O'CLOCK VODKA	2728524.74	420271.47

	date	user_count
0	2021-01-18	109
1	2021-01-17	88
2	2021-01-16	116
3	2021-01-19	167
4	2021-01-11	152
5	2021-01-13	168
6	2021-01-12	163
7	2021-01-15	158
8	2021-01-14	193
9	2021-01-23	117
10	2021-01-22	197
11	2021-01-21	202
12	2021-01-20	216

행	struct_col.col_1	struct_col.col_2
1	1	2

행	array_col
1	1
1	2

행	col_1	col_2
1	1	2

행	string_col	array_item
1	a	1
2	a	2

gcp

'gcp' 카테고리의 다른 글

'gcp' 카테고리의 다른 글

GCP 글로벌 인프라

리전 (Region)

영역(Zone)

글로벌(Global)

Google Compute Engine

Compute Engine 개요

Networking

GCE Instance 생성 실습

'gcp' 카테고리의 다른 글

데이터 추출 방법¶

Google API Client¶

Magic Commands¶

pandas-GBQ Library¶

'gcp' 카테고리의 다른 글

1. 쿼리 저장 및 공유

2. Data Transfer

3. 쿼리 예약

3.1 예약된 쿼리 사용 방법 - 예약 생성

3.2 예약된 쿼리 사용 방법 - 백필 실행

3.3 주의사항

4. INFORMATION_SCHEMA

5. Audit Logs

'gcp' 카테고리의 다른 글

1. 절차적 언어

1.1 DECLARE / SET

1.2 EXECUTE IMMEDIATE

1.3 IF / THEN / ELSEIF / ELSE / END IF

1.4 LOOP / END LOOP

1.5 WHILE / DO / END WHILE

1.6 예외 처리

2. 저장 프로시져

3. UDF (사용자 정의 함수)

4. TVF (사용자 정의 테이블 함수)

'gcp' 카테고리의 다른 글

WildCard Table

샤딩 테이블

파티션 테이블

Cluster

'gcp' 카테고리의 다른 글

ANY_VALUE

ARRAY_AGG

ARRAY_CONCAT_AGG

STRING_AGG

COUNTIF

SUM

집계 분석 함수

'gcp' 카테고리의 다른 글

데이터 유형 - Struct

# Struct 만드는 방법

# Struct 데이터 조회 방법

데이터 유형 - Array

# Array 만드는 방법

# Array 데이터 조회 방법 (컬럼으로 선택)

# Array 데이터 조회 방법 (행으로 나눠 선택)

실습 - Array 안에 Struct 가 들어간 경우

'gcp' 카테고리의 다른 글

티스토리툴바