#!/usr/bin/env python def text_time_to_num_minutes(time_str): '''A helper function that converts a hour:minute string to an int representation of the number of minutes (since midnight). ''' hour_str, minutes_str = time_str.split(':') return 60*int(hour_str) + int(minutes_str) def convert_timed_obs_inp_to_waiting_time_outp(inp, outp): '''A function that an input representing timed observations of states, and writes to outp a series of waiting times between state changes. It iterates through inp. Each iteration is exected to yield a line of text with the syntax: hour:min\tstate\tidentifier\n Rows of inp must be in the correct sorted order (by identifier then time). When the state changes for observations associated with the same identifier, a waiting time record is written to outp file stream with the syntax: identifier\tnew state\twaiting time where the waiting time is the number of minutes spent in the previous state. ''' current_bird = None for row in inp: # strip the newline character, separate the row based on # tab characters, and unpack the row of data into its 3 parts time_str, state, bird = row.strip().split('\t') if current_bird is None: # The first bird in the whole data set as a special case current_bird = bird current_state = state prev_num_minutes = text_time_to_num_minutes(time_str) elif bird != current_bird or state != current_state: # We have a new bird or new state curr_num_minutes = text_time_to_num_minutes(time_str) if bird == current_bird: # We have a new state for the same bird wait_time = curr_num_minutes - prev_num_minutes while wait_time < 0: # if we appeared to move back in time (negative wait_time), # then what really happened is we that times recorded # actually refer to the day after the previous measurements # e.g the clock went from 23:45 to 01:32 wait_time += 24*60 curr_num_minutes += 24*60 # Write out a waiting time event to the output file outp.write('{b}\t{s}\t{w}\n'.format(b=current_bird, s=current_state, w=wait_time)) current_bird = bird current_state = state prev_num_minutes = curr_num_minutes # Now we can call the functions we have written for our data file with open("hw3.birds.1.txt", 'rU') as data_file: # first just check that the header is what we expect it to be header_line = data_file.readline() assert header_line == 'Time\tHeight\tUnique ID\n' # creating the iterator and calling next "consumes" the # header line, so that our main parsing function just sees the # data. with open("hw3.birds.2.txt", 'w') as outfile: convert_timed_obs_inp_to_waiting_time_outp(data_file, outfile)