library(reticulate)
readr::write_csv(nycflights13::flights, file = "flights.csv")
print(reticulate::py_config())
python: /opt/rstudio-connect/mnt/python-environments/ghcr.io_rstudio_content-pro__r4.1.0-py3.8.8-bionic/pip/3.8.8/x8MsyAVKiG31Nk12SDjbTw/bin/python
libpython: /opt/python/3.8.8/lib/libpython3.8.so
pythonhome: /opt/rstudio-connect/mnt/python-environments/ghcr.io_rstudio_content-pro__r4.1.0-py3.8.8-bionic/pip/3.8.8/x8MsyAVKiG31Nk12SDjbTw:/opt/rstudio-connect/mnt/python-environments/ghcr.io_rstudio_content-pro__r4.1.0-py3.8.8-bionic/pip/3.8.8/x8MsyAVKiG31Nk12SDjbTw
version: 3.8.8 (default, Apr 13 2021, 19:58:26) [GCC 7.3.0]
numpy: /opt/rstudio-connect/mnt/python-environments/ghcr.io_rstudio_content-pro__r4.1.0-py3.8.8-bionic/pip/3.8.8/x8MsyAVKiG31Nk12SDjbTw/lib/python3.8/site-packages/numpy
numpy_version: 1.22.0
NOTE: Python version was forced by RETICULATE_PYTHON
import pandas
flights = pandas.read_csv("flights.csv")
flights = flights[flights['dest'] == "ORD"]
flights = flights[['carrier', 'dep_delay', 'arr_delay']]
flights = flights.dropna()
flights
carrier dep_delay arr_delay
5 UA -4.0 12.0
9 AA -2.0 8.0
25 MQ 8.0 32.0
38 AA -1.0 14.0
57 AA -4.0 4.0
... ... ... ...
336645 AA -12.0 -37.0
336669 UA -7.0 -13.0
336675 MQ -7.0 -11.0
336696 B6 -5.0 -23.0
336709 AA -13.0 -38.0
[16566 rows x 3 columns]
library(ggplot2)
ggplot(py$flights, aes(carrier, arr_delay)) + geom_point() + geom_jitter()
library(readr)
library(dplyr)
flights <- read_csv("flights.csv") %>%
filter(dest == "ORD") %>%
select(carrier, dep_delay, arr_delay) %>%
na.omit()
print(r.flights.head(10))
carrier dep_delay arr_delay
0 UA -4.0 12.0
1 AA -2.0 8.0
2 MQ 8.0 32.0
3 AA -1.0 14.0
4 AA -4.0 4.0
5 UA 9.0 20.0
6 UA 2.0 21.0
7 AA -6.0 -12.0
8 MQ 39.0 49.0
9 B6 -2.0 15.0
See the reticulate website for details.