"""Profiling Module based on hotshot using line-event data.
Examples
--------
First we start with the standard profiling analysis:
>>> import numpy as np
>>> import mmf.utils.mmf_profile
>>> def f(A):
... np.roll(A, 1, axis=1)
>>> prof = mmf.utils.mmf_profile.Profile('data_file.prof')
>>> A = np.empty((100,100,100))
>>> prof.runcall(f, A)
>>> prof.close()
>>> stats, data = mmf.utils.mmf_profile.load('data_file.prof')
>>> stats = stats.strip_dirs()
>>> stats = stats.sort_stats('time', 'calls')
>>> stats = stats.print_stats(20) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
3 function calls in ... CPU seconds
<BLANKLINE>
Ordered by: internal time, call count
<BLANKLINE>
ncalls tottime percall cumtime percall filename:lineno(function)
1 ... ... ... ... numeric.py:1033(roll)
1 ... ... ... ... <doctest mmf.utils.mmf_profile[2]>:1(f)
1 ... ... ... ... numeric.py:...(asanyarray)
0 ... ... profile:0(profiler)
<BLANKLINE>
<BLANKLINE>
This is not so useful because the time is taken in the function roll
rather than in external calls. Now let's look at the line data by
annotating the files:
>>> files = data.annotate_files() # doctest: +ELLIPSIS
Warning: Ignoring source file ...
>>> for f in files:
... print(f)
... print(files[f]) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
/...
1033: ------ ------ def roll(a, shift, axis=None):
...
1082: ...% 1 a = asanyarray(a)
1083: ...% 1 if axis is None:
1084: ------ ------ n = a.size
1085: ------ ------ reshape = True
1086: ------ ------ else:
1087: ...% 1 n = a.shape[axis]
1088: ...% 1 reshape = False
1089: ...% 1 shift %= n
1090: ...% 1 indexes = concatenate((arange(n-shift,n),arange(n-shift)))
1091: 9...% 1 res = a.take(indexes, axis)
1092: ...% 1 if reshape:
1093: ------ ------ return res.reshape(a.shape)
1094: ------ ------ else:
1095: ...% 1 return res
1096: ------ ------
...
"""
import hotshot
import hotshot.log
import hotshot.stats
[docs]class Profile(hotshot.Profile):
def __init__(self,filename,lineevents=1,linetimings=1):
hotshot.Profile.__init__(self,filename,lineevents,linetimings)
[docs]class LineData(object):
[docs] def __init__(self,data):
self.data = data
self._linenum_width = 4
self._time_prec = 4
self._call_width = 6
[docs] def print_data(self):
print self.format_data()
[docs] def annotate_files(self):
annotated_files = {}
for filename in self.data:
lines = []
try:
file = open(filename,'r')
except IOError,err:
print "Warning: Ignoring source file %s..."%\
filename
print " %s"%`err`
file = None
if file is not None:
file_data = self.data[filename]
linenum = 0
for line in file:
linenum += 1
if linenum in file_data:
(cum_time,rel_time,n) = file_data[linenum]
prefix = " %*u: %*.*f%% %*u"%\
(self._linenum_width,linenum,
self._time_prec+1,self._time_prec-2,rel_time*100,
self._call_width,n)
else:
prefix = " %*u: ---%s- %s"%\
(self._linenum_width,linenum,
"-"*(self._time_prec-2),
"-"*self._call_width)
new_line = " ".join([prefix,line])
lines.append(new_line)
file.close()
annotated_files[filename] = "".join(lines)
return annotated_files
[docs]def load(filename):
"""Return standard pstats class and lineevent data.
pstats,data = load(filename)
raw_data[0] : cumulative time
raw_data[1] : number of times executed
data[filename][linenumber] : (cum_time,rel_time,n)
"""
try:
pstats = hotshot.stats.load(filename)
except Exception, err:
pstats = None
print "Warning: error forming pstats."
print " " + `err`
log = hotshot.log.LogReader(filename)
data = {}
raw_data = {}
total_time = 0.0
files = {}
prev_event = None
for event in log:
what, (filename,lineno,funcname), tdelta = event
key = (filename,lineno)
data[filename] = {}
# Function and frame data in pstats class.
# I think that tdelta is the time elapsed since the
# previous event, not the execution time of the current
# line, thus we need to add the data to the previous step.
# We use the prev_event to keep track of the previous line
total_time += tdelta
if prev_event is not None:
prev_event[0] += tdelta
if what == hotshot.log.LINE:
if key in raw_data:
raw_data[key][1] += 1
else:
raw_data[key] = [0.0,1]
prev_event = raw_data[key]
else:
prev_event = None
total_time *= 0.000001
for key in raw_data:
(filename,lineno) = key
cum_time = raw_data[key][0]*0.000001
rel_time = cum_time/total_time
n = raw_data[key][1]
data[filename][lineno] = (cum_time,rel_time,n)
return pstats,LineData(data)