Python - Matplotlib 기초

July 1, 2021 4 분 소요

matplotlib - 데이터 시각화 라이브러리

#pip install matplotlib
import matplotlib.pyplot as plt
import numpy as np

선 그래프

plt.plot([x,] y [,fmt])

기본 파라미터는 y값
x축과 포맷은 생략가능

x = [1,2,3,4]
y = [3,8,5,6]

plt.plot(x, y)
plt.show() #그래프 출력

png

plt.plot(y)  #x를 생략해도 출력가능
plt.show()

png

출력 형식 지정

fmt = ‘[색][선스타일][마커지정]’

color: b(blue), g(green), r(red), c(cyan), m(magenta), k(black), w(white) 등…
linestyle: -(실선), –(파선), :(점선), -.(파선/점선 혼합)
marker: o(원모양), ^,v,<,>(방향에 따른 삼각형), s(사각형) 등…

plt.plot(x, y, 'r--')  #r:빨강, b:파랑, ... --:파선
plt.grid(True)  #배경 그리드 추가
plt.title('graph test')  #그래프 타이틀 추가
plt.xlabel('x_label')  #x축 레이블 추가
plt.ylabel('y_label')  #y축 레이블 추가
plt.show()

png

x = np.arange(-4.5, 5, 0.5)
y = 2*x**2  # 2x²
plt.plot(x, y)
plt.show()

png

x = np.arange(-4.5, 5, 0.5)
y1 = 2*x**2
y2 = 5*x+30
y3 = 4*x**2+10
plt.plot(x, y1)
plt.plot(x, y2)
plt.plot(x, y3)
#plt.plot(x, y1, x, y2, x, y3)
plt.show()

png

그래프 화면 나누기

plt.figure() - 새로운 그래프 창을 생성해서 그래프를 출력
plt.subplot(m, n, p) - 그래프 창을 m x n 행렬로 나눠서 p번 위치에 그래프가 그려지도록 지정

x = np.arange(-4.5, 5, 0.5)
y1 = 2*x**2
y2 = 5*x+30
plt.plot(x, y1)
plt.figure()  #새 칸을 만들어줌
plt.plot(x, y2)
plt.show()

png

x = np.arange(-4.5, 5, 0.5)
y1 = 2*x**2
y2 = 5*x+30
y3 = 4*x**2+10
plt.figure(1)# 1번 박스
plt.plot(x, y1)
plt.figure(2)# 2번 박스
plt.plot(x, y3)
plt.figure(1)# 1번 박스
plt.plot(x, y2)
plt.show()

png

x = np.arange(-4.5, 5, 0.5)
y1 = 2*x**2
y2 = 5*x+30
y3 = 4*x**2+10
y4 = np.sin(x)**2
plt.subplot(2,2,1)#subplot(행,렬,순번)
plt.plot(x, y1)

plt.subplot(2,2,2)
plt.plot(x, y2)

plt.subplot(2,2,3)
plt.plot(x, y3)

plt.subplot(2,2,4)
plt.plot(x, y4)

plt.show()

png

막대 그래프

plt.bar(x, height [,width=float, color=colors, tick_label=tick_labels, align=’center/edge])

x: x축에 표시될 위치
height: 시각화할 막대그래프의 데이터
width: 0과 1사이의 실수를 지정하여 막대 폭을 조절
color: fmt 컬러 약어로 색 지정
tick_label: 막대 이름 지정
align: 막대 위치를 가운데로 할지 한쪽으로 치우치게 할지 지정. 기본값은 center

idx = np.arange(3) #리스트 인덱스
x = ['2018', '2019', '2020']
y = [100, 700, 400]

p=plt.bar(idx, y) #y에 해당하는 막대 그래프 표현
plt.xticks(idx, x)  #x 레이블 변경
plt.show()

png

산점도 그래프 (scatter plot)

plt.scatter(x, y, [,s=size_n, c=colors, marker=’marker_string’, alpha=alpha_f])

x,y: x축 y축 값
s: 마커 크기
c: 마커 색
marker: 마커 모양/종류
alpha: 0과 1 사이의 실수로 마커 투명도 지정

city=['seoul', 'inchun', 'daejun', 'daegu', 'woolsan', 'busan', 'gwangju']

lat=[37.56, 37.45, 36.35, 35.87, 35.53, 35.18, 35.16]
lon=[126.97, 126.70, 127.38, 128.60, 129.31, 129.07, 126.85]

pop_den=[16154, 2751, 2839, 2790, 1099, 4454, 2995]
size = np.array(pop_den)*0.2
colors = ['r', 'g', 'b', 'c', 'm', 'k', 'y']

plt.scatter(lon, lat, s=size, c=colors, alpha=0.5)
plt.xlabel('lon')
plt.ylabel('lat')
plt.title('pop den')
for x, y, name in zip(lon, lat, city):
    plt.text(x, y, name)
    
plt.show()

png

히스토그램

데이터의 분포를 막대형식으로 표현

plt.hist(x, [,bins=bins_n, 혹은 ‘auto’])

x: 변량 데이터
bins: 계급의 개수. 입력하지 않을 경우 기본값은 10. bin=’auto’가 입력되면 x에 맞게 자동으로 값이 들어감

x = [43,67,87,76,54,34,56,76,89,98,100,87,65,43,23]  #점수

plt.hist(x, bins=7)
plt.show()

png

파이 그래프

plt.pie(x [,labels=label_seq, autopct=’비율 표시 형식’, shadow = False(기본) 혹은 True, explode=explode_seq, counterclock=True(기본) 혹은 False, startangle=각도])

x: 데이터. 입력시 x의 각 요소가 전체에서 차지하는 비율을 자동으로 계산하여 그래프를 그려줌
labels: 데이터 항목 수와 같은 문자열 시퀀스를 통해 각 항목 레이블 표시
autopct: 각 항목에 비율이 표시되는 숫자의 형식 지정. ‘%0.1f’가 입력되면 소수점 첫째자리까지, ‘%0.0f’가 입력되면 정수만 표시
shadow: 그림자 효과
explode: 항목이 원에서 돌출되는 효과
counterclock: 항목이 순서대로 표시되는 방향 정의
startangle: 첫 항목이 그려지는 각도, x축을 중심으로 반시계방향으로 증가, 기본값은 0

fruit = ['apple', 'banana', 'strawberry', 'orange', 'grape']
result = [7,6,3,2,2]
exp = [0,0,0,.1,0]
plt.pie(result, labels=fruit, autopct='%.1f%%', counterclock=False, explode=exp, startangle=90)
plt.show()

png

파일을 사용한 실습

import pandas as pd

df = pd.read_excel('a.xlsx', engine='openpyxl')
df

	이름	국어	영어	수학
0	aaa	43	65	78
1	bbb	76	98	54
2	ccc	98	67	54
3	ddd	54	98	67
4	eee	98	65	34

x = ['kor', 'eng', 'math']
y = df.loc[:, ['국어', '영어', '수학']]
names = df.loc[:, '이름']
for i in range(0, df.shape[0]):
    plt.plot(x, y.loc[i])
    plt.text(x[0], y.loc[i, '국어'], names[i])
plt.show()

png

data = pd.read_csv('../Day22 - Pandas/도로교통공단_최근5년_교통사고_통계.csv', encoding='euc-kr')
data

	발생년	사고건수	사망자수	중상자수	경상자수	부상신고자수
0	2015	232035	4621	92522	233646	24232
1	2016	220917	4292	82463	226283	22974
2	2017	216335	4185	78212	223200	21417
3	2018	217148	3781	74258	227511	21268
4	2019	229600	3349	72306	245524	23882

x = data['발생년']
y = data['사고건수']
plt.plot(x, y)
plt.title('accident')
plt.xlabel('year')
plt.ylabel('number')
plt.show()

png

plt.bar(x,y)
plt.title('accident')
plt.xlabel('year')
plt.ylabel('number')
plt.ylim(200000,240000)  #y축 데이터 표시 범위 지정
plt.show()

png

pandas로 그래프 그리기

s1=pd.Series([1,2,3,4,5,6,7,8,9,10])
s1

   1
   2
   3
   4
   5
   6
   7
   8
   9
  10
dtype: int64

s1.plot()#X축은 인덱스, Y축은 값
plt.show()

png

arr = np.array([[1,2,3],[4,5,6]])
idx = ['row1', 'row2']
cols = ['col1', 'col2', 'col3']
d1 = pd.DataFrame(arr, columns=cols)
d1

	col1	col2	col3
0	1	2	3
1	4	5	6

d1.plot()
plt.show()

png

data.set_index('발생년')

	사고건수	사망자수	중상자수	경상자수	부상신고자수
발생년
2015	232035	4621	92522	233646	24232
2016	220917	4292	82463	226283	22974
2017	216335	4185	78212	223200	21417
2018	217148	3781	74258	227511	21268
2019	229600	3349	72306	245524	23882

#matplot에서 한글 깨짐 방지
plt.rcParams['font.family']='Malgun Gothic'
plt.rcParams['axes.unicode_minus']=False

data.plot()
plt.show()

png

size = data['사고건수']*0.02
data.plot.scatter(x='발생년', y='사고건수',s=size, grid=True, title='최근5년교통사고현황')
plt.show()

png

Changmin Lucas Lee

Python - Matplotlib 기초

matplotlib - 데이터 시각화 라이브러리

선 그래프

출력 형식 지정

그래프 화면 나누기

막대 그래프

산점도 그래프 (scatter plot)

히스토그램

파이 그래프

파일을 사용한 실습

pandas로 그래프 그리기

댓글남기기

참고

블로그 이전

첫번째 이직

Python - 타입 힌트 (Type Hint)

TDD - Test Driven Development