9.5. ENDI#
Info sobre la encuesta: https://www.ecuadorencifras.gob.ec/encuesta_nacional_desnutricion_infantil/
Datos: https://www.ecuadorencifras.gob.ec/documentos/web-inec/ENDI/BDD_ENDI_R1_rds.zip
Importamos librerías
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
sns.set()
url = 'https://www.ecuadorencifras.gob.ec/documentos/web-inec/ENDI/BDD_ENDI_R1_rds.zip'
# Download the file from `url` and save it locally under `file_name`:
import urllib.request
file_name = 'ENDI.zip'
urllib.request.urlretrieve(url, file_name)
('ENDI.zip', <http.client.HTTPMessage at 0x7fe4626a1ae0>)
# extract file
import zipfile
with zipfile.ZipFile(file_name, 'r') as zip_ref:
zip_ref.extractall('./data/ENDI/')
# read .rds file
import pyreadr
df = pyreadr.read_r('./data/ENDI/BDD_ENDI_R1_rds/rds/BDD_ENDI_R1_f1_personas.rds')
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[4], line 2
1 # read .rds file
----> 2 import pyreadr
3 df = pyreadr.read_r('./data/ENDI/BDD_ENDI_R1_rds/rds/BDD_ENDI_R1_f1_personas.rds')
ModuleNotFoundError: No module named 'pyreadr'
df
OrderedDict([(None,
id_upm id_viv id_hogar id_per \
0 0101500060 010150006003 01015000600301 0101500060030101
1 0101500060 010150006003 01015000600301 0101500060030102
2 0101500060 010150006003 01015000600301 0101500060030103
3 0101500060 010150006003 01015000600301 0101500060030104
4 0101500060 010150006003 01015000600301 0101500060030105
... ... ... ... ...
90027 2301569009 230156900907 23015690090701 2301569009070102
90028 2301569009 230156900907 23015690090701 2301569009070103
90029 2301569009 230156900908 23015690090801 2301569009080101
90030 2301569009 230156900908 23015690090801 2301569009080102
90031 2301569009 230156900908 23015690090801 2301569009080103
id_mef fecha_anio fecha_mes fecha_dia fexp estrato ... \
0 0101500060030101 2022 07 28 0.508820 2712 ...
1 NaN 2022 07 28 0.508820 2712 ...
2 0101500060030103 2022 07 28 0.508820 2712 ...
3 NaN 2022 07 28 0.508820 2712 ...
4 NaN 2022 07 28 0.508820 2712 ...
... ... ... ... ... ... ... ...
90027 2301569009070102 2023 06 15 0.595402 2321 ...
90028 NaN 2023 06 15 0.595402 2321 ...
90029 NaN 2023 06 15 0.595402 2321 ...
90030 2301569009080102 2023 06 15 0.595402 2321 ...
90031 NaN 2023 06 15 0.595402 2321 ...
f1_s6_3 f1_s6_4_1 f1_s6_4_2 f1_s6_5_1 f1_s6_5_2 f1_s6_5_3 f1_s6_6 \
0 13.2 16 30 28 7 2022 NaN
1 NaN NaN NaN NaN NaN NaN NaN
2 NaN NaN NaN NaN NaN NaN NaN
3 NaN NaN NaN NaN NaN NaN NaN
4 NaN NaN NaN NaN NaN NaN 4
... ... ... ... ... ... ... ...
90027 13.3 14 42 20 6 2023 NaN
90028 11.1 12 38 20 6 2023 NaN
90029 NaN NaN NaN NaN NaN NaN NaN
90030 13.2 16 33 15 6 2023 NaN
90031 10.5 16 30 15 6 2023 NaN
quintil pobreza nbi_1
0 5.0 0.0 0.0
1 5.0 0.0 0.0
2 5.0 0.0 0.0
3 5.0 0.0 0.0
4 5.0 0.0 0.0
... ... ... ...
90027 4.0 0.0 1.0
90028 4.0 0.0 1.0
90029 2.0 1.0 1.0
90030 2.0 1.0 1.0
90031 2.0 1.0 1.0
[90032 rows x 111 columns])])
df = pd.DataFrame(df[None])
df['altitud']
sns.scatterplot(data=df, x="edaddias", y="altitud")
<Axes: xlabel='edaddias', ylabel='altitud'>