Patrick's 데이터 세상

Python 데이터 분석 - Google API 본문

Programming/Python

Python 데이터 분석 - Google API

patrick610 2020. 6. 24. 22:03
반응형
SMALL

실습 파일 : 05.seoul-crime-analysis.ipynb


>>> source
 activate pyshpark 
>>> jupyter lab --notebook-dir=/Users/sunghwanpark/Desktop/shpark/Development/Python/GottAcademy/AcademyPractice/workspace/nb-workspace

 

* 외부 data 가져오는 방법

OpenAPI
Crawling / scraping

 

 

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
crime_anal_police = pd.read_csv('data-files/crime-in-seoul.csv', 
                                thousands=',', #파일에서 ,를 숫자 단위 구분자로 해석
                                encoding='euc-kr')
# crime_anal_police = pd.read_csv('data-files/crime-in-seoul2.csv', thousands=',', encoding='utf-8')
crime_anal_police.info()
crime_anal_police.head()

결과

 

 

crime_anal_police.sort_values(by="폭력 발생", ascending=False)

결과

...이하

 

 

!pip install googlemaps
import googlemaps
gmaps_key = "*******************************" # 자신의 key를 사용합니다.
# gmaps = googlemaps.Client(key="")
gmaps = googlemaps.Client(key="AIzaSyD53IpkHwpF-EViVKy52nOANIme2CK7Rns")

 

gmaps.geocode('서울중부경찰서', language='ko')

 

station_name = []

for name in crime_anal_police['관서명']:
    station_name.append('서울' + str(name[:-1]) + '경찰서') # 서울 + 중부 + 경찰서

station_name

 

station_addreess = []
station_lat = []
station_lng = []

for name in station_name:
    tmp = gmaps.geocode(name, language='ko')
    station_addreess.append(tmp[0].get("formatted_address"))
    
    tmp_loc = tmp[0].get("geometry")

    station_lat.append(tmp_loc['location']['lat']) # append: list에 데이터 추가
    station_lng.append(tmp_loc['location']['lng'])
    
    print(name + '-->' + tmp[0].get("formatted_address"))

 

for addr, lat, lng in zip(station_addreess, station_lat, station_lng):
    print("[%s][%s][%s]" % (addr, lat, lng))

* 3개 리스트에서 한번에 꺼내기

station_addreess[0].split()

 

gu_name = []

for name in station_addreess:
    tmp = name.split() # 공백을 기준으로 각 단어를 분할해서 리스트로 반환
    
    tmp_gu = [gu for gu in tmp if gu[-1] == '구'][0]
    
    gu_name.append(tmp_gu)
    
crime_anal_police['구별'] = gu_name
crime_anal_police.head()

 

crime_anal_police.to_csv('data_files/crime-in-seoul-include-gu-name.csv', 
                         sep=',', 
                         encoding='utf-8',
                         index=False) # 인덱스는 저장하지 마세요
# crime_anal_police.to_csv('data-files/crime-in-seoul-include-gu-name.csv', sep=',', encoding='utf-8')


* 인덱스도 지정대상이다.
실제 파일을 가면 인덱스가 무기명 컬럼으로 잡혀있다.
따라서 index=False로 잡아야한다.

temp_df = pd.read_csv('data_files/crime-in-seoul-include-gu-name.csv')
temp_df

 

 

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
crime_anal_norm = pd.read_csv('data_files/crime-in-seoul-final.csv', 
crime_anal_norm = crime_anal_norm.set_index(["구별"]) # 컬럼 -> index로 변경
# crime_anal_norm.set_index(["구별"], inplace=True) # 컬럼 -> index로 변경
crime_anal_norm.head()

 

# !command -> terminal command
# !pip install folium
import folium
import json
geo_path = 'data_files/skorea-municipalities-geo-simple.json'
geo_str = json.load(open(geo_path, encoding='utf-8'))
geo_str
map = folium.Map(location=[37.5502, 126.982], # 지도의 중심 위치
                 zoom_start=11,  # 0 ~ 18
                 tiles='Stamen Toner') # Stamen Toner, Stamen Terrain, ...

# 지도에 영역 표시
folium.Choropleth(geo_data = geo_str,
               data = crime_anal_norm['살인'],
               columns = [crime_anal_norm.index, crime_anal_norm['살인']],
               fill_color = 'PuRd', #PuRd, YlGnBu
               key_on = 'feature.id').add_to(map)
map

 

map = folium.Map(location=[37.5502, 126.982], zoom_start=11, 
                 tiles='Stamen Toner')

folium.Choropleth(geo_data = geo_str,
               data = crime_anal_norm['강간'],
               columns = [crime_anal_norm.index, crime_anal_norm['강간']],
               fill_color = 'PuRd', #PuRd, YlGnBu
               key_on = 'feature.id').add_to(map)
map

 

map = folium.Map(location=[37.5502, 126.982], zoom_start=11, tiles='Stamen Toner')

folium.Choropleth(geo_data = geo_str,
               data = crime_anal_norm['범죄'],
               columns = [crime_anal_norm.index, crime_anal_norm['범죄']],
               fill_color = 'PuRd', #PuRd, YlGnBu
               key_on = 'feature.id').add_to(map)
map

 

crime_anal_raw = pd.read_csv('data_files/crime-anal-raw.csv', 
                             encoding='utf-8')
crime_anal_raw.head()

 

 

* data preprocessing - scaling

import pandas as pd
crime_data = pd.read_csv("data_files/crime-in-seoul-include-gu-name.csv",
                         encoding="utf-8")
crime_data.head()

 

col = ['살인 발생', '강도 발생', '강간 발생', '절도 발생', '폭력 발생']
# x = crime_data[col] # DataFrame 반환
# x.head()
x = crime_data[col].values # ndarray 반환
x[:5]

 

from sklearn.preprocessing import MinMaxScaler
mms = MinMaxScaler() # 데이터를 0 ~ 1사이의 값으로 매핑
x_scaled = mms.fit_transform(x)
x_scaled[:5]

 

반응형
LIST

'Programming > Python' 카테고리의 다른 글

Glances 성능 모니터링  (0) 2021.04.09
Python 데이터 분석 - Google Maps Setting  (0) 2020.06.24
Python 데이터 분석 - WordCloud  (2) 2020.06.24
Python 데이터 분석 - Pandas  (0) 2020.06.24
Python 데이터 분석 - NumPy  (0) 2020.06.24
Comments