In [3]:
%load_ext sql
In [1]:
!pip install ipython-sql==0.4.1
!pip install SQLAlchemy==1.4.49
WARNING: Ignoring invalid distribution -qlalchemy (/usr/local/lib/python3.10/dist-packages)
Requirement already satisfied: ipython-sql==0.4.1 in /usr/local/lib/python3.10/dist-packages (0.4.1)
Requirement already satisfied: prettytable<1 in /usr/local/lib/python3.10/dist-packages (from ipython-sql==0.4.1) (0.7.2)
Requirement already satisfied: ipython>=1.0 in /usr/local/lib/python3.10/dist-packages (from ipython-sql==0.4.1) (7.34.0)
Requirement already satisfied: sqlalchemy>=0.6.7 in /usr/local/lib/python3.10/dist-packages (from ipython-sql==0.4.1) (1.4.49)
Requirement already satisfied: sqlparse in /usr/local/lib/python3.10/dist-packages (from ipython-sql==0.4.1) (0.4.4)
Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from ipython-sql==0.4.1) (1.16.0)
Requirement already satisfied: ipython-genutils>=0.1.0 in /usr/local/lib/python3.10/dist-packages (from ipython-sql==0.4.1) (0.2.0)
Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.10/dist-packages (from ipython>=1.0->ipython-sql==0.4.1) (67.7.2)
Requirement already satisfied: jedi>=0.16 in /usr/local/lib/python3.10/dist-packages (from ipython>=1.0->ipython-sql==0.4.1) (0.19.1)
Requirement already satisfied: decorator in /usr/local/lib/python3.10/dist-packages (from ipython>=1.0->ipython-sql==0.4.1) (4.4.2)
Requirement already satisfied: pickleshare in /usr/local/lib/python3.10/dist-packages (from ipython>=1.0->ipython-sql==0.4.1) (0.7.5)
Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.10/dist-packages (from ipython>=1.0->ipython-sql==0.4.1) (5.7.1)
Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from ipython>=1.0->ipython-sql==0.4.1) (3.0.43)
Requirement already satisfied: pygments in /usr/local/lib/python3.10/dist-packages (from ipython>=1.0->ipython-sql==0.4.1) (2.16.1)
Requirement already satisfied: backcall in /usr/local/lib/python3.10/dist-packages (from ipython>=1.0->ipython-sql==0.4.1) (0.2.0)
Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.10/dist-packages (from ipython>=1.0->ipython-sql==0.4.1) (0.1.6)
Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.10/dist-packages (from ipython>=1.0->ipython-sql==0.4.1) (4.9.0)
Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from sqlalchemy>=0.6.7->ipython-sql==0.4.1) (3.0.2)
Requirement already satisfied: parso<0.9.0,>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from jedi>=0.16->ipython>=1.0->ipython-sql==0.4.1) (0.8.3)
Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.10/dist-packages (from pexpect>4.3->ipython>=1.0->ipython-sql==0.4.1) (0.7.0)
Requirement already satisfied: wcwidth in /usr/local/lib/python3.10/dist-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=1.0->ipython-sql==0.4.1) (0.2.12)
WARNING: Ignoring invalid distribution -qlalchemy (/usr/local/lib/python3.10/dist-packages)
WARNING: Ignoring invalid distribution -qlalchemy (/usr/local/lib/python3.10/dist-packages)
Requirement already satisfied: SQLAlchemy==1.4.49 in /usr/local/lib/python3.10/dist-packages (1.4.49)
Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy==1.4.49) (3.0.2)
WARNING: Ignoring invalid distribution -qlalchemy (/usr/local/lib/python3.10/dist-packages)
ID와 PW와 (본인스키마)를 자신의 것으로 변경
In [4]:
%sql postgresql://wearealego:Wearealego!1@learnde.cduaw970ssvt.ap-northeast-2.redshift.amazonaws.com:5439/dev
In [5]:
%%sql SELECT schema_name
FROM information_schema.schemata;
* postgresql://wearealego:***@learnde.cduaw970ssvt.ap-northeast-2.redshift.amazonaws.com:5439/dev
1 rows affected.
Out[5]:
schema_name |
---|
wearealego |
In [ ]:
%%sql SELECT tablename
FROM pg_tables
WHERE schemaname = 'wearealego';
* postgresql://wearealego:***@learnde.cduaw970ssvt.ap-northeast-2.redshift.amazonaws.com:5439/dev
1 rows affected.
Out[ ]:
tablename |
---|
name_gender |
In [6]:
%%sql
drop table if exists wearealego.name_gender;
create table wearealego.name_gender(
name varchar(32) primary key,
gender varchar(8)
);
* postgresql://wearealego:***@learnde.cduaw970ssvt.ap-northeast-2.redshift.amazonaws.com:5439/dev
Done.
Done.
Out[6]:
[]
In [7]:
import psycopg2
# Redshift connection 함수
# 본인 ID/PW 사용!
def get_Redshift_connection():
host = "learnde.cduaw970ssvt.ap-northeast-2.redshift.amazonaws.com"
redshift_user = "wearealego"
redshift_pass = "Wearealego!1"
port = 5439
dbname = "dev"
conn = psycopg2.connect("dbname={dbname} user={user} host={host} password={password} port={port}".format(
dbname=dbname,
user=redshift_user,
password=redshift_pass,
host=host,
port=port
))
conn.set_session(autocommit=True)
return conn.cursor()
ETL 함수를 하나씩 정의¶
In [8]:
import requests
def extract(url):
f = requests.get(url)
return (f.text)
In [9]:
def transform(text):
#[1:]로 헤더 제외
lines = text.strip().split("\n")[1:]
records = []
for l in lines:
# l = "gwanghyeon,M" -> [ 'gwanghyeon', 'M' ]
(name, gender) = l.split(",")
records.append([name, gender])
return records
In [26]:
def load(records):
"""
records = [
[ "gwanghyeon", "M" ],
[ "Claire", "F" ],
...
]
"""
schema='wearealego'
# 문제는 실행시마다 값이 달라짐 그래서 수정해야함
# BEGIN과 END를 사용해서 SQL 결과를 트랜잭션으로 만들어주는 것이 좋음
cur = get_Redshift_connection()
cur.execute("BEGIN;")
# DELETE 문을 사용하여 테이블의 기존 데이터 삭제
cur.execute(f"DELETE FROM {schema}.name_gender;")
try:
# DELETE FROM을 먼저 수행 -> FULL REFRESH을 하는 형태
for r in records:
name = r[0]
gender = r[1]
print(name, "-", gender)
sql = f"INSERT INTO {schema}.name_gender VALUES ('{name}', '{gender}')"
cur.execute(sql)
# COMMIT 트랜잭션 완료, cur.execute("END;")도 같은기능
cur.execute("COMMIT;")
except (Exception, psycopg2.DatabaseError) as error:
print(error)
cur.execute("rollback;")
이제 Extract부터 함수를 하나씩 실행¶
In [27]:
link = "https://s3-geospatial.s3-us-west-2.amazonaws.com/name_gender.csv"
data = extract(link)
In [28]:
data
Out[28]:
'name,gender\nAdaleigh,F\nAmryn,Unisex\nApurva,Unisex\nAryion,M\nAlixia,F\nAlyssarose,F\nArvell,M\nAibel,M\nAtiyyah,F\nAdlie,F\nAnyely,F\nAamoni,F\nAhman,M\nArlane,F\nArmoney,F\nAtzhiry,F\nAntonette,F\nAkeelah,F\nAbdikadir,M\nArinze,M\nArshaun,M\nAlexandro,M\nAyriauna,F\nAqib,M\nAlleya,F\nAavah,F\nAnesti,Unisex\nAdalaide,F\nAnalena,F\nAlaeyah,F\nAlbena,F\nAimi,F\nAdwaith,M\nArkady,M\nAstyn,Unisex\nAdelee,F\nAgata,F\nAlegna,F\nAltan,M\nAhnaleigh,F\nAlgie,Unisex\nAshanti,F\nAislyn,F\nAdaleine,F\nAnthnoy,M\nAlgernon,M\nAeryona,F\nAdrinne,F\nAddell,F\nAvril,F\nAhni,F\nAimon,M\nAdolpho,M\nAhuva,F\nAurielle,F\nAveana,F\nAliyia,F\nAlesander,M\nAdnrea,F\nAnjae,F\nAlvine,F\nAdorah,F\nAdlemi,F\nAlesi,F\nAlontae,M\nAntonny,M\nAdarah,F\nAyreanna,F\nAntyon,M\nAndia,F\nAshla,F\nAspyn,F\nAntwanett,F\nAundreia,F\nAudella,F\nAmari,Unisex\nArsha,Unisex\nAricella,F\nAdan,M\nApasra,F\nAlaysha,F\nAnderson,M\nAurelius,M\nAerial,F\nAverleigh,F\nAslean,F\nArniesha,F\nAsyana,F\nAnnjane,F\nAmabella,F\nAustinjohn,M\nArloween,F\nAlula,M\nAnemone,F\nAmorina,F\nAnureet,F\nArric,M\nAntonne,M\nAlyre,M\nAnnaise,F\n'
In [29]:
lines = transform(data)
In [30]:
lines[0:10]
Out[30]:
[['Adaleigh', 'F'],
['Amryn', 'Unisex'],
['Apurva', 'Unisex'],
['Aryion', 'M'],
['Alixia', 'F'],
['Alyssarose', 'F'],
['Arvell', 'M'],
['Aibel', 'M'],
['Atiyyah', 'F'],
['Adlie', 'F']]
In [31]:
load(lines)
Adaleigh - F
Amryn - Unisex
Apurva - Unisex
Aryion - M
Alixia - F
Alyssarose - F
Arvell - M
Aibel - M
Atiyyah - F
Adlie - F
Anyely - F
Aamoni - F
Ahman - M
Arlane - F
Armoney - F
Atzhiry - F
Antonette - F
Akeelah - F
Abdikadir - M
Arinze - M
Arshaun - M
Alexandro - M
Ayriauna - F
Aqib - M
Alleya - F
Aavah - F
Anesti - Unisex
Adalaide - F
Analena - F
Alaeyah - F
Albena - F
Aimi - F
Adwaith - M
Arkady - M
Astyn - Unisex
Adelee - F
Agata - F
Alegna - F
Altan - M
Ahnaleigh - F
Algie - Unisex
Ashanti - F
Aislyn - F
Adaleine - F
Anthnoy - M
Algernon - M
Aeryona - F
Adrinne - F
Addell - F
Avril - F
Ahni - F
Aimon - M
Adolpho - M
Ahuva - F
Aurielle - F
Aveana - F
Aliyia - F
Alesander - M
Adnrea - F
Anjae - F
Alvine - F
Adorah - F
Adlemi - F
Alesi - F
Alontae - M
Antonny - M
Adarah - F
Ayreanna - F
Antyon - M
Andia - F
Ashla - F
Aspyn - F
Antwanett - F
Aundreia - F
Audella - F
Amari - Unisex
Arsha - Unisex
Aricella - F
Adan - M
Apasra - F
Alaysha - F
Anderson - M
Aurelius - M
Aerial - F
Averleigh - F
Aslean - F
Arniesha - F
Asyana - F
Annjane - F
Amabella - F
Austinjohn - M
Arloween - F
Alula - M
Anemone - F
Amorina - F
Anureet - F
Arric - M
Antonne - M
Alyre - M
Annaise - F
In [32]:
%%sql
SELECT COUNT(1)
FROM wearealego.name_gender;
* postgresql://wearealego:***@learnde.cduaw970ssvt.ap-northeast-2.redshift.amazonaws.com:5439/dev
1 rows affected.
Out[32]:
count |
---|
100 |
In [33]:
%%sql
SELECT *
FROM wearealego.name_gender;
* postgresql://wearealego:***@learnde.cduaw970ssvt.ap-northeast-2.redshift.amazonaws.com:5439/dev
100 rows affected.
Out[33]:
name | gender |
---|---|
Adaleigh | F |
Amryn | Unisex |
Apurva | Unisex |
Aryion | M |
Alixia | F |
Alyssarose | F |
Arvell | M |
Aibel | M |
Atiyyah | F |
Adlie | F |
Anyely | F |
Aamoni | F |
Ahman | M |
Arlane | F |
Armoney | F |
Atzhiry | F |
Antonette | F |
Akeelah | F |
Abdikadir | M |
Arinze | M |
Arshaun | M |
Alexandro | M |
Ayriauna | F |
Aqib | M |
Alleya | F |
Aavah | F |
Anesti | Unisex |
Adalaide | F |
Analena | F |
Alaeyah | F |
Albena | F |
Aimi | F |
Adwaith | M |
Arkady | M |
Astyn | Unisex |
Adelee | F |
Agata | F |
Alegna | F |
Altan | M |
Ahnaleigh | F |
Algie | Unisex |
Ashanti | F |
Aislyn | F |
Adaleine | F |
Anthnoy | M |
Algernon | M |
Aeryona | F |
Adrinne | F |
Addell | F |
Avril | F |
Ahni | F |
Aimon | M |
Adolpho | M |
Ahuva | F |
Aurielle | F |
Aveana | F |
Aliyia | F |
Alesander | M |
Adnrea | F |
Anjae | F |
Alvine | F |
Adorah | F |
Adlemi | F |
Alesi | F |
Alontae | M |
Antonny | M |
Adarah | F |
Ayreanna | F |
Antyon | M |
Andia | F |
Ashla | F |
Aspyn | F |
Antwanett | F |
Aundreia | F |
Audella | F |
Amari | Unisex |
Arsha | Unisex |
Aricella | F |
Adan | M |
Apasra | F |
Alaysha | F |
Anderson | M |
Aurelius | M |
Aerial | F |
Averleigh | F |
Aslean | F |
Arniesha | F |
Asyana | F |
Annjane | F |
Amabella | F |
Austinjohn | M |
Arloween | F |
Alula | M |
Anemone | F |
Amorina | F |
Anureet | F |
Arric | M |
Antonne | M |
Alyre | M |
Annaise | F |
In [34]:
%%sql
SELECT gender, COUNT(1) count
FROM wearealego.name_gender
GROUP BY gender;
* postgresql://wearealego:***@learnde.cduaw970ssvt.ap-northeast-2.redshift.amazonaws.com:5439/dev
3 rows affected.
Out[34]:
gender | count |
---|---|
F | 65 |
Unisex | 7 |
M | 28 |
'데이터 기초' 카테고리의 다른 글
AB테스트 (0) | 2024.01.04 |
---|---|
테스트 코드 구조 소개 (1) | 2023.12.25 |
워드클라우드 이용 데이터 시각화 (0) | 2023.10.27 |
셀레니엄 이용한 자동화 웹 스크래핑 (0) | 2023.10.26 |