Analyzing MTA Bus Data

You are logged in as . Logout
Time Space Diagram using Bus Data #2

Load Required Libraries

library("ggplot2")
library("parsedate")
## Warning: package 'parsedate' was built under R version 3.3.3
library("reshape2")
## Warning: package 'reshape2' was built under R version 3.3.3

Reading Bus Data

This is a Markdown document. If you are interested about how to do similar HTML pages, please visit: http://rmarkdown.rstudio.com

# Read the input into a dataframe
idf <- read.csv("../Data/busDataHW.csv", head=TRUE)

head(idf)
##   X                          date        lineref  originref directionref
## 1 1 2017-05-15T21:35:18.000-04:00   MTA NYCT_B35 MTA_308194            0
## 2 2 2017-05-15T21:35:40.000-04:00   MTA NYCT_S59 MTA_202744            1
## 3 3 2017-05-15T21:35:13.000-04:00   MTA NYCT_Q27 MTA_505137            0
## 4 4 2017-05-15T21:35:24.873-04:00   MTA NYCT_S53 MTA_905059            1
## 5 5 2017-05-15T21:35:41.000-04:00 MTA NYCT_BX41+ MTA_104339            0
## 6 6 2017-05-15T21:35:11.970-04:00   MTA NYCT_Q88 MTA_500035            1
##      vehicleref                           destinatinname   bearing
## 1  MTA NYCT_406   LTD BROWNSVILLE M GASTON BL via CHURCH 328.89374
## 2 MTA NYCT_8263                                 HYLAN BL 221.93826
## 3 MTA NYCT_4663                    FLUSHING  MAIN ST STA 165.41959
## 4 MTA NYCT_7080                              PT RICHMOND 247.89835
## 5 MTA NYCT_5781 SELECT BUS SERVICE WLIMSBRDG GUN HILL RD  29.68718
## 6 MTA NYCT_8024  WOODHVN BL STA QNS CTR MALL via H.HRDNG 197.34670
##       locx      locy publishlinename journeypatternref
## 1 40.65150 -74.00429             B35       MTA_B350146
## 2 40.61861 -74.15504             S59       MTA_S590420
## 3 40.75318 -73.79281             Q27       MTA_Q270866
## 4 40.62220 -74.02859             S53       MTA_S530120
## 5 40.86711 -73.88388        Bx41-SBS     MTA_SBS410037
## 6 40.73813 -73.85056             Q88       MTA_Q880189
##                         datedvehiclejourneyref PresentableDistance
## 1    MTA NYCT_JG_B7-Weekday-SDon-129100_B35_62             at stop
## 2    MTA NYCT_YU_B7-Weekday-SDon-127500_S59_13         approaching
## 3    MTA NYCT_QV_B7-Weekday-SDon-125800_Q27_29             at stop
## 4  MTA NYCT_CA_B7-Weekday-SDon-129800_MISC_499       < 1 stop away
## 5 MTA NYCT_GH_B7-Weekday-SDon-125700_SBS41_520             at stop
## 6  MTA NYCT_QV_B7-Weekday-SDon-126000_MISC_286       < 1 stop away
##   DistanceFromCall CallDistanceAlongRoute StopsFromCall
## 1            27.27                 900.54             0
## 2            57.67                3598.30             0
## 3             7.75               11318.11             0
## 4           312.93                 322.60             0
## 5            10.03                6423.19             0
## 6           168.91               12882.63             0
##               StopPointName
## 1                39 ST/5 AV
## 2      RICHMOND AV/JULES DR
## 3           46 AV/UTOPIA PY
## 4                4 AV/90 ST
## 5  WEBSTER AV/BEDFORD PK BL
## 6 HORACE HARDING EXP/108 ST

Filtering Bus Data

You can also embed plots, for example:

# Create another dataframe which will contain one bus line and one direction
# Bus line = M15
df <- subset(idf, idf$publishlinename == "M15")

# Format date column
df$date <- as.POSIXlt(parse_iso_8601(df$date))

# Extract hour from the column
head(df$date$hour)
## [1] 1 1 1 1 1 1
# Direction = 0
df <- subset(df, df$direction == 1)

# Hour = 18
df <- subset(df, df$date$hour == "18")

# Let's take a look at the data
head(df)
##         X                date      lineref  originref directionref
## 3519 3519 2017-05-15 18:43:39 MTA NYCT_M15 MTA_401732            1
## 3624 3624 2017-05-15 18:43:27 MTA NYCT_M15 MTA_401732            1
## 3651 3651 2017-05-15 18:43:15 MTA NYCT_M15 MTA_401732            1
## 4037 4037 2017-05-15 18:43:37 MTA NYCT_M15 MTA_401732            1
## 4207 4207 2017-05-15 18:43:36 MTA NYCT_M15 MTA_401732            1
## 4304 4304 2017-05-15 18:43:14 MTA NYCT_M15 MTA_401732            1
##         vehicleref                 destinatinname  bearing     locx
## 3519 MTA NYCT_5585 PIKE ST - DIVISION ST via 2 AV 233.8372 40.79812
## 3624 MTA NYCT_5538 PIKE ST - DIVISION ST via 2 AV 234.1669 40.77945
## 3651 MTA NYCT_5260 PIKE ST - DIVISION ST via 2 AV 233.9291 40.75823
## 4037 MTA NYCT_5888           SOUTH FERRY via 2 AV 234.2239 40.73979
## 4207 MTA NYCT_5885           SOUTH FERRY via 2 AV 234.0964 40.78709
## 4304 MTA NYCT_5613           SOUTH FERRY via 2 AV 192.4937 40.70162
##           locy publishlinename journeypatternref
## 3519 -73.93696             M15       MTA_M150393
## 3624 -73.95059             M15       MTA_M150393
## 3651 -73.96608             M15       MTA_M150393
## 4037 -73.97952             M15       MTA_M150392
## 4207 -73.94503             M15       MTA_M150392
## 4304 -74.01191             M15       MTA_M150392
##                         datedvehiclejourneyref PresentableDistance
## 3519 MTA NYCT_OH_B7-Weekday-SDon-087800_M15_42       < 1 stop away
## 3624 MTA NYCT_OH_B7-Weekday-SDon-086100_M15_21       < 1 stop away
## 3651 MTA NYCT_OH_B7-Weekday-SDon-082600_M15_16         approaching
## 4037 MTA NYCT_OH_B7-Weekday-SDon-081600_M15_14         approaching
## 4207 MTA NYCT_OH_B7-Weekday-SDon-087000_M15_18         approaching
## 4304 MTA NYCT_OH_B7-Weekday-SDon-077600_M15_11             at stop
##      DistanceFromCall CallDistanceAlongRoute StopsFromCall
## 3519           180.44                 910.98             0
## 3624           193.66                3296.42             0
## 3651           139.99                5940.04             0
## 4037           112.25                8255.74             0
## 4207           106.42                2239.34             0
## 4304            26.00               13899.75             0
##              StopPointName
## 3519         2 AV/E 116 ST
## 3624          2 AV/E 86 ST
## 3651          2 AV/E 54 ST
## 4037          2 AV/E 25 ST
## 4207          2 AV/E 99 ST
## 4304 SOUTH ST/WHITEHALL ST

Creating multiple plots using ggplot

# ggplot2
ggplot(df,aes(x=date, y=CallDistanceAlongRoute,colour=datedvehiclejourneyref)) + 
  geom_line()+
  theme(legend.position="none")