Intial flowy commit
This commit is contained in:
parent
b9362ec8f5
commit
d6fe38272e
370 changed files with 12160 additions and 0 deletions
72
3http-download.flw
Normal file
72
3http-download.flw
Normal file
|
@ -0,0 +1,72 @@
|
|||
splitter S {}
|
||||
|
||||
filter www_req {
|
||||
dstport = 80
|
||||
}
|
||||
|
||||
filter www_res {
|
||||
srcport = 80
|
||||
}
|
||||
|
||||
filter www_res1 {
|
||||
srcport = 80
|
||||
}
|
||||
|
||||
grouper g_www_req {
|
||||
module g1 {
|
||||
srcip = srcip
|
||||
dstip = dstip
|
||||
etime < stime delta 1s
|
||||
}
|
||||
aggregate srcip, dstip, sum(bytes) as bytes, count(rec_id) as n,
|
||||
bitOR(tcp_flags) as flags, union(srcport) as srcports
|
||||
}
|
||||
|
||||
grouper g_www_res {
|
||||
module g1 {
|
||||
srcip = srcip
|
||||
dstip = dstip
|
||||
etime < stime delta 1s
|
||||
}
|
||||
aggregate srcip, dstip, sum(bytes) as bytes, count(rec_id) as n,
|
||||
bitOR(tcp_flags) as flags, union(dstport) as dstports
|
||||
}
|
||||
|
||||
grouper g_www_res1 {
|
||||
module g1 {
|
||||
srcip = srcip
|
||||
dstip = dstip
|
||||
etime < stime delta 5s
|
||||
}
|
||||
aggregate srcip, dstip, sum(bytes) as bytes, count(rec_id) as n,
|
||||
bitOR(tcp_flags) as flags, union(dstport) as dstports
|
||||
}
|
||||
|
||||
groupfilter ggf {
|
||||
bitAND(flags, 0x13) = 0x13
|
||||
}
|
||||
|
||||
merger M {
|
||||
module m1 {
|
||||
branches C, B, A
|
||||
A.srcip = B.dstip
|
||||
A.srcip = C.dstip
|
||||
A.srcports = B.dstports
|
||||
A.srcports = C.dstports
|
||||
A.bytes < B.bytes
|
||||
A.bytes < C.bytes
|
||||
B oi A OR B d A
|
||||
C o B
|
||||
C m A
|
||||
|
||||
}
|
||||
export m1
|
||||
}
|
||||
|
||||
ungrouper U {}
|
||||
|
||||
"./netflow-trace.h5" -> S
|
||||
S branch A -> www_req -> g_www_req -> ggf -> M
|
||||
S branch B -> www_res -> g_www_res -> ggf -> M
|
||||
S branch C -> www_res1 -> g_www_res1 -> ggf -> M
|
||||
M->U->"./ungroped.h5"
|
17
README
Normal file
17
README
Normal file
|
@ -0,0 +1,17 @@
|
|||
Flowy - Network Flow Analysis Application
|
||||
|
||||
Requirements:
|
||||
|
||||
Python 2.5 or higher (tested with Python 2.6)
|
||||
Pytables 2.1 or higher
|
||||
PLY (Python Lex-Yacc) 2.5 or higher
|
||||
pyflowtools 3.1 or higher
|
||||
|
||||
|
||||
Usage:
|
||||
|
||||
ft2hdf.py - convert flow-tools traces to hdf hdf file
|
||||
printhdf.py - print flowy hdf file
|
||||
print_hdf_in_step.py - print two or more hdf files printing one record
|
||||
from each file at each step
|
||||
flowy.py - the main flowy program
|
0
__init__.py
Normal file
0
__init__.py
Normal file
152
aggr_operators.py
Normal file
152
aggr_operators.py
Normal file
|
@ -0,0 +1,152 @@
|
|||
import options
|
||||
from tables import UInt32Col, UInt64Col
|
||||
|
||||
if options.import_grouper_ops:
|
||||
external_import = __import__(options.import_grouper_ops)
|
||||
|
||||
class last(object):
|
||||
__slots__ = ['field', 'gr_field', 'field_type', 'last']
|
||||
def __init__(self, field, gr_field, field_type):
|
||||
self.field = field
|
||||
self.gr_field = gr_field
|
||||
self.field_type = field_type
|
||||
self.last = None
|
||||
|
||||
def __call__(self, record = None):
|
||||
if record == None:
|
||||
return self.last
|
||||
else:
|
||||
self.last = getattr(record, self.field)
|
||||
return self.last
|
||||
|
||||
|
||||
class sum(object):
|
||||
__slots__ = ['field', 'gr_field', 'field_type','sum']
|
||||
def __init__(self, field, gr_field, field_type):
|
||||
self.field = field
|
||||
self.gr_field = gr_field
|
||||
self.field_type = field_type
|
||||
self.sum = 0
|
||||
|
||||
def __call__(self, record = None):
|
||||
if record == None:
|
||||
return self.sum
|
||||
else:
|
||||
self.sum += getattr(record, self.field)
|
||||
return self.sum
|
||||
|
||||
class avg(object):
|
||||
__slots__ = ['field', 'gr_field', 'field_type','sum','n','avg']
|
||||
def __init__(self, field, gr_field, field_type):
|
||||
self.field = field
|
||||
self.gr_field = gr_field
|
||||
self.field_type = field_type
|
||||
self.sum = 0
|
||||
self.n = 0
|
||||
self.avg = None
|
||||
|
||||
def __call__(self, record = None):
|
||||
if record == None:
|
||||
if str(self.field_type).find('Int') != -1:
|
||||
return int(round(self.avg))
|
||||
else:
|
||||
return self.avg
|
||||
else:
|
||||
self.sum += getattr(record, self.field)
|
||||
self.n += 1
|
||||
self.avg = self.sum / self.n
|
||||
return self.avg
|
||||
|
||||
class max(object):
|
||||
__slots__ = ['field', 'gr_field', 'field_type','max']
|
||||
def __init__(self, field, gr_field, field_type):
|
||||
self.field = field
|
||||
self.gr_field = gr_field
|
||||
self.field_type = field_type
|
||||
self.max = float("-inf")
|
||||
|
||||
def __call__(self, record = None):
|
||||
if record == None:
|
||||
return self.max
|
||||
else:
|
||||
new_val = getattr(record, self.field)
|
||||
if self.max < new_val:
|
||||
self.max = new_val
|
||||
return self.max
|
||||
|
||||
class min(object):
|
||||
__slots__ = ['field', 'gr_field', 'field_type','min']
|
||||
def __init__(self, field, gr_field, field_type):
|
||||
self.field = field
|
||||
self.gr_field = gr_field
|
||||
self.field_type = field_type
|
||||
self.min = float("inf")
|
||||
|
||||
def __call__(self, record = None):
|
||||
if record == None:
|
||||
return self.min
|
||||
else:
|
||||
new_val = getattr(record, self.field)
|
||||
if self.min > new_val:
|
||||
self.min = new_val
|
||||
return self.min
|
||||
|
||||
class count(object):
|
||||
__slots__ = ['field', 'gr_field', 'field_type','count']
|
||||
def __init__(self, field, gr_field, field_type):
|
||||
self.field = field
|
||||
self.gr_field = gr_field
|
||||
self.field_type = field_type
|
||||
self.count = 0
|
||||
|
||||
def __call__(self, record = None):
|
||||
if record == None:
|
||||
return self.count
|
||||
else:
|
||||
self.count += 1
|
||||
return self.count
|
||||
|
||||
class union(object):
|
||||
__slots__ = ['field', 'gr_field', 'field_type','union']
|
||||
def __init__(self, field, gr_field, field_type):
|
||||
self.field = field
|
||||
self.gr_field = gr_field
|
||||
self.field_type = field_type
|
||||
self.union = []
|
||||
|
||||
def __call__(self, record = None):
|
||||
if record == None:
|
||||
return sorted(set(self.union))
|
||||
else:
|
||||
self.union.append(getattr(record, self.field))
|
||||
return self.union
|
||||
|
||||
class bitAND(object):
|
||||
__slots__ = ['field', 'gr_field', 'field_type','bitAND']
|
||||
def __init__(self, field, gr_field, field_type):
|
||||
self.field = field
|
||||
self.gr_field = gr_field
|
||||
self.field_type = field_type
|
||||
self.bitAND = pow(2,field_type.size) - 1 # all 1s for the given size
|
||||
|
||||
def __call__(self, record = None):
|
||||
if record == None:
|
||||
return self.bitAND
|
||||
else:
|
||||
self.bitAND &= getattr(record, self.field)
|
||||
return self.bitAND
|
||||
|
||||
class bitOR(object):
|
||||
__slots__ = ['field', 'gr_field', 'field_type','bitOR']
|
||||
def __init__(self, field, gr_field, field_type):
|
||||
self.field = field
|
||||
self.gr_field = gr_field
|
||||
self.field_type = field_type
|
||||
self.bitOR = 0
|
||||
|
||||
def __call__(self, record = None):
|
||||
if record == None:
|
||||
return self.bitOR
|
||||
else:
|
||||
self.bitOR |= getattr(record, self.field)
|
||||
return self.bitOR
|
BIN
aggr_operators.pyc
Normal file
BIN
aggr_operators.pyc
Normal file
Binary file not shown.
172
allen_index.py
Normal file
172
allen_index.py
Normal file
|
@ -0,0 +1,172 @@
|
|||
class LT(object):
|
||||
"""
|
||||
X < Y
|
||||
x before y
|
||||
"""
|
||||
def __init__(self, src, target, delta):
|
||||
self.delta = delta
|
||||
self.src = src
|
||||
self.target = target
|
||||
|
||||
def __call__(self, x):
|
||||
return x.etime, x.etime + self.delta
|
||||
|
||||
class GT(object):
|
||||
"""
|
||||
X > Y
|
||||
x after y
|
||||
"""
|
||||
def __init__(self, src, target, delta):
|
||||
self.delta = delta
|
||||
self.src = src
|
||||
self.target = target
|
||||
|
||||
def __call__(self, x):
|
||||
return x.stime - self.delta, x.stime
|
||||
|
||||
class m(object):
|
||||
"""
|
||||
X m Y
|
||||
x meets y (x starts before y)
|
||||
y should occur at end of x
|
||||
"""
|
||||
def __init__(self, src, target, delta=1):
|
||||
self.delta = delta
|
||||
self.src = src
|
||||
self.target = target
|
||||
|
||||
def __call__(self, x):
|
||||
return x.etime, x.etime + self.delta
|
||||
|
||||
class mi(object):
|
||||
"""
|
||||
X mi Y
|
||||
inverse x meets y (x starts after y)
|
||||
y should occur at the beginning of x
|
||||
"""
|
||||
def __init__(self, src, target, delta=1):
|
||||
self.delta = delta
|
||||
self.src = src
|
||||
self.target = target
|
||||
|
||||
def __call__(self, x):
|
||||
return x.stime - self.delta, x.stime
|
||||
|
||||
class o(object):
|
||||
"""
|
||||
X o Y
|
||||
x overlaps y (x starts before y)
|
||||
y should occur at the end of x
|
||||
"""
|
||||
def __init__(self, src, target, delta=0):
|
||||
self.delta = delta
|
||||
self.src = src
|
||||
self.target = target
|
||||
|
||||
def __call__(self, x):
|
||||
return x.etime-self.delta, x.etime+self.delta
|
||||
|
||||
class oi(object):
|
||||
"""
|
||||
X oi Y
|
||||
inverse x overlaps y (x starts after y)
|
||||
"""
|
||||
def __init__(self, src, target, delta=0):
|
||||
self.delta = delta
|
||||
self.src = src
|
||||
self.target = target
|
||||
|
||||
def __call__(self, x):
|
||||
return x.stime, x.stime
|
||||
|
||||
class d(object):
|
||||
"""
|
||||
X d Y
|
||||
x during y
|
||||
"""
|
||||
def __init__(self, src, target, delta=0):
|
||||
self.delta = delta
|
||||
self.src = src
|
||||
self.target = target
|
||||
|
||||
def __call__(self, x):
|
||||
return x.stime, x.stime
|
||||
|
||||
class di(object):
|
||||
"""
|
||||
X di Y
|
||||
inverse x during y (y during x)
|
||||
"""
|
||||
def __init__(self, src, target, delta=0):
|
||||
self.delta = delta
|
||||
self.src = src
|
||||
self.target = target
|
||||
|
||||
def __call__(self, x):
|
||||
return x.stime, x.etime
|
||||
|
||||
|
||||
class f(object):
|
||||
"""
|
||||
X f Y
|
||||
x finishes y (x starts after y, x and y end together)
|
||||
"""
|
||||
def __init__(self, src, target, delta=1):
|
||||
self.delta = delta
|
||||
self.src = src
|
||||
self.target = target
|
||||
|
||||
def __call__(self, x):
|
||||
return x.etime - self.delta, x.etime + self.delta
|
||||
|
||||
class fi(object):
|
||||
"""
|
||||
X fi Y
|
||||
inverse x finishes y (x is finished by y)
|
||||
"""
|
||||
def __init__(self, src, target, delta=1):
|
||||
self.delta = delta
|
||||
self.src = src
|
||||
self.target = target
|
||||
|
||||
def __call__(self, x):
|
||||
return x.etime - self.delta, x.etime + self.delta
|
||||
|
||||
class s(object):
|
||||
"""
|
||||
X s Y
|
||||
x starts y (x ends before y, x and y starts together)
|
||||
"""
|
||||
def __init__(self, src, target, delta=0):
|
||||
self.delta = delta
|
||||
self.src = src
|
||||
self.target = target
|
||||
|
||||
def __call__(self, x):
|
||||
return x.stime - self.delta, x.stime + self.delta
|
||||
|
||||
class si(object):
|
||||
"""
|
||||
X si Y
|
||||
inverse x starts y (x is started by y)
|
||||
"""
|
||||
def __init__(self, src, target, delta=1):
|
||||
self.delta = delta
|
||||
self.src = src
|
||||
self.target = target
|
||||
|
||||
def __call__(self, x):
|
||||
return x.stime - self.delta, x.stime + self.delta
|
||||
|
||||
class EQ(object):
|
||||
"""
|
||||
X = Y
|
||||
X lasts the same time as Y and both start together.
|
||||
"""
|
||||
def __init__(self, src, target, delta=1):
|
||||
self.delta = delta
|
||||
self.src = src
|
||||
self.target = target
|
||||
|
||||
def __call__(self, x):
|
||||
return x.stime - self.delta, x.stime + self.delta
|
BIN
allen_index.pyc
Normal file
BIN
allen_index.pyc
Normal file
Binary file not shown.
232
allen_ops.py
Normal file
232
allen_ops.py
Normal file
|
@ -0,0 +1,232 @@
|
|||
from math import floor, ceil
|
||||
|
||||
def inv_op_str(op_name_string):
|
||||
inverse = {
|
||||
'LT' : 'GT',
|
||||
'GT' : 'LT',
|
||||
'm' : 'mi',
|
||||
'mi' : 'm',
|
||||
'o' : 'oi',
|
||||
'oi' : 'o',
|
||||
's' : 'si',
|
||||
'si' : 's',
|
||||
'd' : 'di',
|
||||
'di' : 'd',
|
||||
'f' : 'fi',
|
||||
'fi' : 'f',
|
||||
'=' : '='
|
||||
}
|
||||
return inverse[op_name_string]
|
||||
|
||||
class AllenOpIndex(object):
|
||||
def __init__(self, index):
|
||||
self.index
|
||||
|
||||
def LT(self, x, delta):
|
||||
"""
|
||||
X < Y
|
||||
x before y
|
||||
"""
|
||||
return x.etime, x.etime + delta
|
||||
|
||||
def GT(self, x, delta):
|
||||
"""
|
||||
X > Y
|
||||
x after y
|
||||
"""
|
||||
return x.stime - delta, x.stime
|
||||
|
||||
def m(self, x, delta=1):
|
||||
"""
|
||||
X m Y
|
||||
x meets y (x starts before y)
|
||||
y should occur at end of x
|
||||
"""
|
||||
return x.etime, x.etime + delta
|
||||
|
||||
def mi(self, x, delta=1):
|
||||
"""
|
||||
X mi Y
|
||||
inverse x meets y (x starts after y)
|
||||
y should occur at the beginning of x
|
||||
"""
|
||||
return x.stime - delta, x.stime
|
||||
|
||||
def o(self, x, delta=1):
|
||||
"""
|
||||
X o Y
|
||||
x overlaps y (x starts before y)
|
||||
y should occur at the end of x
|
||||
"""
|
||||
return x.etime-delta, x.etime+delta
|
||||
|
||||
def oi(self, x, delta=1):
|
||||
"""
|
||||
X oi Y
|
||||
inverse x overlaps y (x starts after y)
|
||||
"""
|
||||
return x.stime, x.stime
|
||||
|
||||
def d(self, x, delta=0):
|
||||
"""
|
||||
X d Y
|
||||
x during y
|
||||
"""
|
||||
return x.stime, x.stime
|
||||
|
||||
def di(self, x, delta=0):
|
||||
"""
|
||||
X di Y
|
||||
inverse x during y (y during x)
|
||||
"""
|
||||
return x.stime, x.etime
|
||||
|
||||
|
||||
def f(self, x, delta=1):
|
||||
"""
|
||||
X f Y
|
||||
x finishes y (x starts after y, x and y end together)
|
||||
"""
|
||||
# delta disregarded here
|
||||
return x.etime - delta, x.etime + delta
|
||||
|
||||
def fi(self, x, delta=1):
|
||||
"""
|
||||
X fi Y
|
||||
inverse x finishes y (x is finished by y)
|
||||
"""
|
||||
return x.etime - delta, x.etime + delta
|
||||
|
||||
def s(self, x, delta=1):
|
||||
"""
|
||||
X s Y
|
||||
x starts y (x ends before y, x and y starts together)
|
||||
"""
|
||||
return x.stime - delta, x.stime + delta
|
||||
|
||||
def si(self, x, delta=1):
|
||||
"""
|
||||
X si Y
|
||||
inverse x starts y (x is started by y)
|
||||
"""
|
||||
# delta disregarded here
|
||||
return x.stime - delta, x.stime + delta
|
||||
|
||||
def EQ(self, x, delta=1):
|
||||
"""
|
||||
X = Y
|
||||
X lasts the same time as Y
|
||||
"""
|
||||
# delta disregarded here
|
||||
return int((x.stime + x.etime)/2) - delta, int((x.stime +
|
||||
x.etime)/2) + delta
|
||||
|
||||
def composite_intervals(self, op_x_delta_tuples):
|
||||
intervals = set()
|
||||
for op_x_delta in op_x_delta_tuples:
|
||||
op = op_x_delta[0]
|
||||
args = op_x_delta[1:]
|
||||
intervals.update(getattr(self, op)(*args))
|
||||
|
||||
res = list(intervals)
|
||||
res.sort()
|
||||
return res
|
||||
|
||||
|
||||
def LT(x, y, delta=0):
|
||||
"""
|
||||
X < Y
|
||||
x before y
|
||||
"""
|
||||
return x.etime < y.stime
|
||||
|
||||
def GT(x, y, delta=1):
|
||||
"""
|
||||
X > Y
|
||||
x after y
|
||||
"""
|
||||
return x.stime > y.etime
|
||||
|
||||
def m(x, y, delta=1):
|
||||
"""
|
||||
X m Y
|
||||
x meets y (x starts before y)
|
||||
y should occur at end of x
|
||||
"""
|
||||
return abs(x.etime - y.stime) < delta
|
||||
|
||||
def mi(x, y, delta=1):
|
||||
"""
|
||||
X mi Y
|
||||
inverse x meets y (x starts after y)
|
||||
y should occur at the beginning of x
|
||||
"""
|
||||
return abs(x.stime - y.etime) < delta
|
||||
|
||||
def o(x, y, delta=1):
|
||||
"""
|
||||
X o Y
|
||||
x overlaps y (x starts before y)
|
||||
y should occur at the end of x
|
||||
"""
|
||||
return y.stime < x.etime < y.etime
|
||||
|
||||
def oi(x, y, delta=1):
|
||||
"""
|
||||
X oi Y
|
||||
inverse x overlaps y (x starts after y)
|
||||
"""
|
||||
return y.stime < x.stime < y.etime
|
||||
|
||||
def d(x, y, delta=0):
|
||||
"""
|
||||
X d Y
|
||||
x during y
|
||||
"""
|
||||
return y.stime < x.stime and x.etime < y.etime
|
||||
|
||||
def di(x, y, delta=0):
|
||||
"""
|
||||
X di Y
|
||||
inverse x during y (y during x)
|
||||
"""
|
||||
return y.stime > x.stime and x.etime > y.etime
|
||||
|
||||
|
||||
def f(x, y, delta=1):
|
||||
"""
|
||||
X f Y
|
||||
x finishes y (x starts after y, x and y end together)
|
||||
"""
|
||||
# delta disregarded here
|
||||
return x.stime > y.etime and abs(x.etime - y.etime) < delta
|
||||
|
||||
def fi(x, y, delta=1):
|
||||
"""
|
||||
X fi Y
|
||||
inverse x finishes y (x is finished by y)
|
||||
"""
|
||||
return x.stime < y.etime and abs(x.etime - y.etime) < delta
|
||||
|
||||
def s(x, y, delta=1):
|
||||
"""
|
||||
X s Y
|
||||
x starts y (x ends before y, x and y start together)
|
||||
"""
|
||||
return x.etime < y.etime and abs(x.stime - y.stime) < delta
|
||||
|
||||
def si(x, y, delta=1):
|
||||
"""
|
||||
X si Y
|
||||
inverse x starts y (x is started by y)
|
||||
"""
|
||||
# delta disregarded here
|
||||
return x.etime > y.etime and abs(x.stime - y.stime) < delta
|
||||
|
||||
def EQ(x, y, delta=1):
|
||||
"""
|
||||
X fi Y
|
||||
inverse x finishes y (x is finished by y)
|
||||
"""
|
||||
# delta disregarded here
|
||||
return abs(x.stime - y.stime) < delta and abs(x.etime - y.etime) < delta
|
BIN
allen_ops.pyc
Normal file
BIN
allen_ops.pyc
Normal file
Binary file not shown.
5
custops.py
Normal file
5
custops.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
def minus(*args):
|
||||
res = args[0]
|
||||
for arg in args[1:]:
|
||||
res -= arg
|
||||
return res
|
BIN
custops.pyc
Normal file
BIN
custops.pyc
Normal file
Binary file not shown.
197
filter.py
Normal file
197
filter.py
Normal file
|
@ -0,0 +1,197 @@
|
|||
from copy import deepcopy
|
||||
from copy import copy
|
||||
from statement import Field
|
||||
from record import RecordReader
|
||||
import time
|
||||
import profiler
|
||||
|
||||
class NoMatch(Exception):
|
||||
pass
|
||||
|
||||
class Filter(object):
|
||||
def __init__(self,rules, records, br_mask, nbranches):
|
||||
self.rules = rules
|
||||
self.records = records
|
||||
self.br_mask = br_mask
|
||||
|
||||
# print "The filter has just been initiated"
|
||||
|
||||
# Iteration of the filter happens at the splitter function go()
|
||||
# In this teration function, each of the records is being matched
|
||||
# against all of the conditions in each of the filters, and based
|
||||
# on what condition it matches, it is assigned an appropriate
|
||||
# branch mask. I.e., if Branch A has a srcport=443, then the record
|
||||
# that matches this requirement gets a mask of [True, False], else
|
||||
# if Branch B's filter is matched, then a mask of [False, True] is
|
||||
# assigned.
|
||||
def __iter__(self):
|
||||
print "Started filtering"
|
||||
# start = time.clock()
|
||||
# print "Fitlering time started at:", start
|
||||
for record in self.records:
|
||||
self.br_mask.reset()
|
||||
try:
|
||||
for rule in self.rules:
|
||||
rule_result = rule.match(record)
|
||||
self.br_mask.mask(rule.branch_mask, rule_result)
|
||||
except NoMatch:
|
||||
continue
|
||||
|
||||
branches = self.br_mask.final_result()
|
||||
if True in branches:
|
||||
yield record, branches
|
||||
|
||||
|
||||
# print "Finished filtering"
|
||||
# time_elapsed = (time.clock() - start)
|
||||
# print "Filtering required:", time_elapsed
|
||||
|
||||
#class Field(object):
|
||||
# def __init__(self, name):
|
||||
# self.name = name
|
||||
# def __repr__(self):
|
||||
# return "Field('%s')"%self.name
|
||||
|
||||
# Implementation of a self-defined deepcopy function that operates
|
||||
# for the simple data types.
|
||||
|
||||
def deep_copy(org):
|
||||
out = dict().fromkeys(org)
|
||||
for k,v in org.iteritems():
|
||||
try:
|
||||
out[k] = v.copy() # dicts, sets
|
||||
except AttributeError:
|
||||
try:
|
||||
out[k] = v[:] # lists, tuples, strings, unicode
|
||||
except TypeError:
|
||||
out[k] = v # ints
|
||||
|
||||
return out
|
||||
|
||||
|
||||
|
||||
class BranchMask(object):
|
||||
def __init__(self, branch_masks, pseudo_branches, n_real_branches):
|
||||
self.masks = branch_masks
|
||||
# self.orig_mask = deepcopy(branch_masks)
|
||||
self.orig_mask = deep_copy(branch_masks)
|
||||
# self.pseudo_branches = deepcopy(pseudo_branches)
|
||||
self.pseudo_branches = deep_copy(pseudo_branches)
|
||||
self.n_real_branches = n_real_branches
|
||||
|
||||
|
||||
def reset(self):
|
||||
# self.masks = deepcopy(self.orig_mask)
|
||||
self.masks = deep_copy(self.orig_mask)
|
||||
#self.masks = copy(self.orig_mask)
|
||||
# self.masks = self.orig_mask
|
||||
|
||||
|
||||
def mask(self, sub_branches, result):
|
||||
for br, sub_br, NOT in sub_branches:
|
||||
res = not result if NOT else result
|
||||
if sub_br == 0:
|
||||
self.masks[br][sub_br] = self.masks[br][sub_br] and res
|
||||
else:
|
||||
self.masks[br][sub_br] = self.masks[br][sub_br] or res
|
||||
|
||||
|
||||
def final_result(self):
|
||||
final_mask = {}
|
||||
|
||||
for br, mask in self.masks.iteritems():
|
||||
final_mask[br] = True if False not in mask else False
|
||||
result = []
|
||||
for id in xrange(self.n_real_branches):
|
||||
try:
|
||||
result.append(final_mask[id])
|
||||
|
||||
except KeyError:
|
||||
gr_res = True
|
||||
for or_group in self.pseudo_branches[id]:
|
||||
res = False
|
||||
for ref in or_group:
|
||||
if ref[1]:
|
||||
res = res or not final_mask[ref[0]]
|
||||
else:
|
||||
res = res or final_mask[ref[0]]
|
||||
|
||||
gr_res = gr_res and res
|
||||
|
||||
result.append(gr_res)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class Rule(object):
|
||||
def __init__(self, branch_mask, operation, args):
|
||||
self.operation = operation
|
||||
self.args = args
|
||||
self.branch_mask = branch_mask
|
||||
|
||||
# This match operation is used at both the filtering and group-filering
|
||||
# stages, since group-filter also relies on this Rule class.
|
||||
def match(self, record):
|
||||
args = []
|
||||
for arg in self.args:
|
||||
if type(arg) is Field: # Used both at filterin and group-filtering stages
|
||||
args.append(getattr(record, arg.name))
|
||||
elif type(arg) is Rule: # Used only at the group-fitlering stage
|
||||
args.append(arg.match(record))
|
||||
else: # Used at both stages. The actual argument numbers, i.e., port 80
|
||||
args.append(arg)
|
||||
|
||||
return self.operation(*args)
|
||||
|
||||
class PreSplitRule(Rule):
|
||||
def match(self,record):
|
||||
result = Rule.match(self,record)
|
||||
if not result:
|
||||
raise NoMatch()
|
||||
|
||||
class GroupFilter(object):
|
||||
def __init__(self, rules, records, branch_name, groups_table, index):
|
||||
self.rules = rules
|
||||
self.records = records
|
||||
self.branch_name = branch_name
|
||||
self.index = index
|
||||
self.groups_table = groups_table
|
||||
self.record_reader = RecordReader(self.groups_table)
|
||||
|
||||
def go(self):
|
||||
|
||||
count = 0
|
||||
for record in self.records:
|
||||
for or_rules in self.rules:
|
||||
matched = False
|
||||
for rule in or_rules:
|
||||
if rule.match(record):
|
||||
matched = True
|
||||
break
|
||||
if not matched:
|
||||
break
|
||||
if matched:
|
||||
record.rec_id = count
|
||||
count += 1
|
||||
self.index.add(record)
|
||||
self.groups_table.append(record)
|
||||
print "Finished filtering groups for branch " + self.branch_name
|
||||
self.groups_table.flush()
|
||||
|
||||
def __iter__(self):
|
||||
for rec in self.record_reader:
|
||||
yield rec
|
||||
|
||||
class AcceptGroupFilter(GroupFilter):
|
||||
def __init__(self, records, branch_name, groups_table, index):
|
||||
GroupFilter.__init__(self, None, records, branch_name, groups_table,
|
||||
index)
|
||||
def go(self):
|
||||
count = 0
|
||||
for record in self.records:
|
||||
record.rec_id = count
|
||||
count += 1
|
||||
self.index.add(record)
|
||||
self.groups_table.append(record)
|
||||
print "Finished filtering groups for branch " + self.branch_name
|
||||
self.groups_table.flush()
|
BIN
filter.pyc
Normal file
BIN
filter.pyc
Normal file
Binary file not shown.
178
filter_validator.py
Normal file
178
filter_validator.py
Normal file
|
@ -0,0 +1,178 @@
|
|||
from validator_common import *
|
||||
from copy import deepcopy
|
||||
from record import RecordReader
|
||||
from statement import FilterRef
|
||||
from filter import Rule as RuleImpl
|
||||
from filter import Filter as FilterImpl
|
||||
from filter import BranchMask
|
||||
|
||||
class FilterValidator(object):
|
||||
def __init__(self, parser):
|
||||
self.parser = parser
|
||||
self.n_real_branches = len(self.parser.branch_names)
|
||||
self.filters = deepcopy(parser.filters)
|
||||
self.filter_names = dict((filter.name, filter) for filter in self.filters)
|
||||
self.branch_names = self.parser.branch_names # note! not a copy
|
||||
# get_input_fields_types() comes from validator_common.py
|
||||
# get_input_reader()comes from validator_common.py, takes parsed query
|
||||
# as an input and returns a reader for the parser's input - a reader
|
||||
# object for an HDF table of flow records
|
||||
self.fields = get_input_fields_types(get_input_reader(self.parser)).keys()
|
||||
self.pseudo_branches = {}
|
||||
# Argument is a reader object that has an access to the description of the
|
||||
# stored records, and can create a list of available fields
|
||||
self.input_reader = RecordReader(get_input_reader(parser))
|
||||
self.impl = self.create_impl()
|
||||
|
||||
def check_for_unused_filters(self):
|
||||
for filter in self.filters:
|
||||
if len(filter.branches) == 0:
|
||||
msg = "Warning filter %s "%filter.name
|
||||
msg += "defined on line %s"%filter.line
|
||||
msg += " is not used in any branch."
|
||||
print msg
|
||||
continue # skips unused filters
|
||||
|
||||
def check_duplicate_filter_names(self):
|
||||
duplicates = {}
|
||||
for filter in self.filters:
|
||||
old_val = duplicates.setdefault(filter.name, 0)
|
||||
duplicates[filter.name] = old_val + 1
|
||||
|
||||
duplicate_names = [k for k,v in duplicates.iteritems() if v > 1]
|
||||
if len(duplicate_names) > 0:
|
||||
msg = "Filter(s) %s"%duplicate_names
|
||||
msg += " is/are all defined more than once."
|
||||
raise SyntaxError(msg)
|
||||
|
||||
def check_field_refs(self):
|
||||
"Check record field references, for unknown fields"
|
||||
for filter in self.filters:
|
||||
for rule in iterate_rules(filter):
|
||||
check_rule_fields(rule, self.fields)
|
||||
|
||||
def change_branch_names_to_id(self):
|
||||
"""
|
||||
Turn branch names into numerical ids. This helps with mask creation.
|
||||
"""
|
||||
# create numerical branch id's:
|
||||
self.branches_ids = dict((branch, id)
|
||||
for id, branch in enumerate(self.parser.branch_names))
|
||||
self.ids_branches = dict((id, branch)
|
||||
for id, branch in enumerate(self.parser.branch_names))
|
||||
for filter in self.filters:
|
||||
filter.branches = [self.branches_ids[br] for br in filter.branches]
|
||||
|
||||
def create_pseudobranches(self):
|
||||
"""
|
||||
Finds all Filter ref's and adds their branches to the referenced
|
||||
filters. If a filter is ORed with another a new branch is created for
|
||||
each OR-ed rule.
|
||||
"""
|
||||
|
||||
max_id = len(self.branches_ids)
|
||||
for filter in self.filters:
|
||||
for or_rule in filter.rules:
|
||||
if type(or_rule[0]) is not FilterRef:
|
||||
# Not a composite rule, so there can't be need for
|
||||
# pseudo branches
|
||||
break
|
||||
if len(or_rule) == 1:
|
||||
# Not an ORed FilterRef. Just add FilterRef's branches
|
||||
# to the referenced filter
|
||||
ref_filt = self.parser.names[or_rule[0].name]
|
||||
ref_filt.branches.update(filter.branches)
|
||||
else:
|
||||
# ORed FilteRef create pseudo branches
|
||||
pseudo_branch_group = []
|
||||
for br in filter.branches:
|
||||
for filter_ref in or_rule:
|
||||
try:
|
||||
ref_filt = self.filter_names[filter_ref.name]
|
||||
except KeyError, ex:
|
||||
msg = "Filter %s referenced in "%ex.message
|
||||
msg += "%s is not defined"%filter.name
|
||||
raise SyntaxError(msg)
|
||||
id = max_id
|
||||
max_id += 1
|
||||
self.branch_names.add(id)
|
||||
ref_filt.branches.append(id)
|
||||
pseudo_branch_group.append((id, filter_ref.NOT))
|
||||
ps_br_set = self.pseudo_branches.setdefault(br, [])
|
||||
ps_br_set.append(pseudo_branch_group)
|
||||
|
||||
def create_masks(self):
|
||||
branches_masks = {}
|
||||
rule_masks = {}
|
||||
for filter in self.filters:
|
||||
if type(filter.rules[0][0]) is FilterRef:
|
||||
continue
|
||||
for branch in filter.branches:
|
||||
|
||||
for or_rule in filter.rules:
|
||||
if len(or_rule) == 1:
|
||||
#not an OR rule:
|
||||
branches_masks.setdefault(branch,[True])[0] = True
|
||||
sub_br_id = 0
|
||||
else:
|
||||
branches_masks.setdefault(branch,
|
||||
[True]).append(False)
|
||||
sub_br_id = len(branches_masks[branch]) - 1
|
||||
|
||||
for rule in or_rule:
|
||||
rule_masks.setdefault(rule,[]).append((branch,
|
||||
sub_br_id,
|
||||
rule.NOT))
|
||||
|
||||
self.branches_masks = branches_masks
|
||||
self.rule_masks = rule_masks
|
||||
|
||||
def create_rule_implementations(self):
|
||||
rules = []
|
||||
for rule, br_mask in self.rule_masks.iteritems():
|
||||
# print rule, br_mask
|
||||
self.replace_nested_rules(rule)
|
||||
# print rule, br_mask
|
||||
op = find_op(rule)
|
||||
args = rule.args
|
||||
rules.append(RuleImpl(br_mask, op, args))
|
||||
|
||||
return rules
|
||||
|
||||
def replace_nested_rules(self, rule):
|
||||
if Rule not in map(type, rule.args):
|
||||
op = find_op(rule)
|
||||
args = rule.args
|
||||
return RuleImpl(None, op, args)
|
||||
|
||||
for i, arg in enumerate(rule.args):
|
||||
if type(arg) is Rule:
|
||||
rule.args[i] = self.replace_nested_rules(arg)
|
||||
|
||||
def validate(self):
|
||||
self.check_duplicate_filter_names()
|
||||
self.check_field_refs()
|
||||
self.change_branch_names_to_id()
|
||||
for filter in self.filters:
|
||||
replace_bound_rules(filter)
|
||||
replace_with_vals(filter)
|
||||
|
||||
self.create_pseudobranches()
|
||||
self.check_for_unused_filters()
|
||||
self.create_masks()
|
||||
|
||||
def create_impl(self):
|
||||
self.validate()
|
||||
rules = self.create_rule_implementations()
|
||||
pseudo_branches = self.pseudo_branches
|
||||
branch_masks = self.branches_masks
|
||||
br_mask = BranchMask(branch_masks, pseudo_branches,
|
||||
self.n_real_branches)
|
||||
|
||||
filter_impl = FilterImpl(rules, self.input_reader, br_mask,
|
||||
self.n_real_branches)
|
||||
|
||||
|
||||
return filter_impl
|
||||
|
||||
|
BIN
filter_validator.pyc
Normal file
BIN
filter_validator.pyc
Normal file
Binary file not shown.
BIN
flowy-py-files.tar.gz
Normal file
BIN
flowy-py-files.tar.gz
Normal file
Binary file not shown.
BIN
flowy-run/GroupsA-merged.h5
Normal file
BIN
flowy-run/GroupsA-merged.h5
Normal file
Binary file not shown.
BIN
flowy-run/GroupsA.h5
Normal file
BIN
flowy-run/GroupsA.h5
Normal file
Binary file not shown.
BIN
flowy-run/GroupsB-merged.h5
Normal file
BIN
flowy-run/GroupsB-merged.h5
Normal file
Binary file not shown.
BIN
flowy-run/GroupsB.h5
Normal file
BIN
flowy-run/GroupsB.h5
Normal file
Binary file not shown.
BIN
flowy-run/GroupsC.h5
Normal file
BIN
flowy-run/GroupsC.h5
Normal file
Binary file not shown.
BIN
flowy-run/GroupsD.h5
Normal file
BIN
flowy-run/GroupsD.h5
Normal file
Binary file not shown.
BIN
flowy-run/MergedM.h5
Normal file
BIN
flowy-run/MergedM.h5
Normal file
Binary file not shown.
32
flowy.py
Executable file
32
flowy.py
Executable file
|
@ -0,0 +1,32 @@
|
|||
#!/usr/bin/python
|
||||
import options
|
||||
from optparse import OptionParser
|
||||
import flowy_exec
|
||||
import sys
|
||||
import ply
|
||||
|
||||
if __name__ == '__main__':
|
||||
usage = 'usage: %prog [options] input_file.flw'
|
||||
p = OptionParser(usage)
|
||||
option_names = ['--time_index_interval_ms', '--max_unsatisfiable_deltas',
|
||||
'--unsat_delta_threshold_mul', '--do_not_expand_groups']
|
||||
for opt_name in option_names:
|
||||
p.add_option(opt_name)
|
||||
opts, arguments = p.parse_args()
|
||||
|
||||
for opt_name in map(lambda x: x[2:], option_names):
|
||||
opt = getattr(opts, opt_name)
|
||||
if opt:
|
||||
setattr(options, opt_name, opt)
|
||||
|
||||
if len(arguments) != 1:
|
||||
sys.stderr.write('Exactly one argument expected\n')
|
||||
exit(1)
|
||||
|
||||
file = arguments[0]
|
||||
|
||||
try:
|
||||
flowy_exec.run(file)
|
||||
except (ply.yacc.YaccError, SyntaxError) as e:
|
||||
import sys
|
||||
sys.stderr.write(str(e)+'\n')
|
142
flowy_exec.py
Normal file
142
flowy_exec.py
Normal file
|
@ -0,0 +1,142 @@
|
|||
from parser import Parser
|
||||
from filter_validator import FilterValidator
|
||||
from splitter_validator import SplitterValidator
|
||||
from grouper_validator import GrouperValidator
|
||||
from groupfilter_validator import GroupFilterValidator
|
||||
from merger_validator import MergerValidator
|
||||
from ungrouper_validator import UngrouperValidator
|
||||
from threading import Thread
|
||||
import options
|
||||
import profiler
|
||||
import time
|
||||
#profiler.profile_on()
|
||||
start = time.clock()
|
||||
print start
|
||||
|
||||
def run(filename):
|
||||
|
||||
#valstart_elapsed = (time.clock() - start)
|
||||
#print "Parsing and validation started:", valstart_elapsed
|
||||
|
||||
p = Parser()
|
||||
|
||||
file = open(filename)
|
||||
doc = file.read()
|
||||
|
||||
p.parse(doc)
|
||||
|
||||
#inps = get_inputs_list(p)
|
||||
#print get_input_fields_types(inps[0])
|
||||
# hdf_file = "../testFT2.h5"
|
||||
# r = pytables.FlowRecordsTable(hdf_file)
|
||||
# recordReader = record.RecordReader(r)
|
||||
f = FilterValidator(p)
|
||||
# fl = f.impl
|
||||
s = SplitterValidator(p, f)
|
||||
spl = s.impl
|
||||
|
||||
|
||||
gr = GrouperValidator(p, s)
|
||||
# grs = gr.impl
|
||||
|
||||
gr_filt = GroupFilterValidator(p, gr)
|
||||
# Returns a number of group-filter instances
|
||||
# with accordance to the number of branches.
|
||||
gr_filters = gr_filt.impl
|
||||
|
||||
|
||||
mr = MergerValidator(p, gr_filt)
|
||||
mergers = mr.impl
|
||||
|
||||
#valend_elapsed = (time.clock() - start)
|
||||
#print "Parsing and validation finished:", valend_elapsed
|
||||
|
||||
splitter_thread = Thread(target=spl.go)
|
||||
|
||||
gf_threads = [Thread(target=gf.go)for gf in gr_filters]
|
||||
|
||||
splitter_elapsed = (time.clock() - start)
|
||||
print "Splitter time estarted:", splitter_elapsed
|
||||
splitter_thread.start()
|
||||
|
||||
|
||||
|
||||
groupfil_start= (time.clock() - start)
|
||||
print "Group filter time started:", groupfil_start
|
||||
for gf_thread in gf_threads:
|
||||
gf_thread.start()
|
||||
|
||||
#Originally it was after gf_thread.start()
|
||||
splitter_thread.join()
|
||||
print "Splitter finished"
|
||||
|
||||
splitter_elapsed = (time.clock() - start)
|
||||
print "Splitter time elapsed:", splitter_elapsed
|
||||
|
||||
for gf_thread in gf_threads:
|
||||
gf_thread.join()
|
||||
|
||||
groupfil_elapsed = (time.clock() - start)
|
||||
print "Group filter threads joined:", groupfil_elapsed
|
||||
|
||||
merger_threads = [Thread(target=m.go()) for m in mergers]
|
||||
for merger_thread in merger_threads:
|
||||
merger_thread.start()
|
||||
|
||||
|
||||
for merger_thread in merger_threads:
|
||||
merger_thread.join()
|
||||
|
||||
|
||||
merger_elapsed = (time.clock() - start)
|
||||
print "Merger time elapsed:", merger_elapsed
|
||||
|
||||
|
||||
ung = UngrouperValidator(p, mr)
|
||||
ungroupers = ung.impl
|
||||
|
||||
ungrouper_threads = [Thread(target=u.go) for u in ungroupers]
|
||||
for ungrouper_thread in ungrouper_threads:
|
||||
ungrouper_thread.start()
|
||||
|
||||
for ungrouper_thread in ungrouper_threads:
|
||||
ungrouper_thread.join()
|
||||
|
||||
|
||||
# profiler.profile_off()
|
||||
# import pickle
|
||||
# stats = profiler.get_profile_stats()
|
||||
# sorted_stats = sorted(stats.iteritems(), key=lambda a: a[1][1]/a[1][0])
|
||||
# for st in sorted_stats:
|
||||
# print st
|
||||
# print ' '
|
||||
|
||||
print "FINISHED!"
|
||||
overall_elapsed = (time.clock() - start)
|
||||
print "Overall time elapsed:", overall_elapsed
|
||||
# fname = mergers[0].merger_table.tuples_table.file_path
|
||||
# print fname
|
||||
|
||||
|
||||
|
||||
import ft2hdf
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
options.delete_temp_files = True
|
||||
import ply
|
||||
# import profiler
|
||||
# profiler.profile_on()
|
||||
run('www_one_dir.flw')
|
||||
#
|
||||
#
|
||||
# profiler.profile_off()
|
||||
# import pickle
|
||||
# stats = profiler.get_profile_stats()
|
||||
# sorted_stats = sorted(stats.iteritems(), key=lambda a: a[1][0])
|
||||
# for st in sorted_stats:
|
||||
# print st
|
||||
#
|
||||
# f = open('./profile_stats1', 'w')
|
||||
# pickle.dump(sorted_stats,f)
|
BIN
flowy_exec.pyc
Normal file
BIN
flowy_exec.pyc
Normal file
Binary file not shown.
183
ft2hdf.py
Executable file
183
ft2hdf.py
Executable file
|
@ -0,0 +1,183 @@
|
|||
#!/usr/bin/python
|
||||
from pytables import FlowRecordsTable
|
||||
import pytables
|
||||
import ftreader
|
||||
import record
|
||||
import os
|
||||
from os.path import split, join, islink
|
||||
import re
|
||||
import sys
|
||||
from bisect import bisect, bisect_left
|
||||
from operator import itemgetter
|
||||
from optparse import OptionParser
|
||||
|
||||
#def ft2hdf(ft_file, hdf_file):
|
||||
# ft_fields = ftreader.find_fields(ft_file)
|
||||
# fields = ftreader.translate_field_names(ft_fields,
|
||||
# ftreader.default_names_dict)
|
||||
# field_types = dict((field,pytables.default_ft_types[field])
|
||||
# for field in fields)
|
||||
## print field_types
|
||||
# pytables.create_table_file(hdf_file, field_types)
|
||||
# rec_table = pytables.FlowRecordsTable(hdf_file)
|
||||
# # since pytables is initiated with dictionary there is no way to
|
||||
# # sort the fields order, so we have to translate back in order
|
||||
# # to keep the fields names order
|
||||
# ordered_ft_fields = ftreader.translate_field_names(rec_table.fields,
|
||||
# ftreader.reverse_names_dict)
|
||||
# flow_set = ftreader.FlowToolsReader(ft_file, ordered_ft_fields)
|
||||
# for flow in flow_set:
|
||||
# rec_table.append(flow)
|
||||
# rec_table.close()
|
||||
|
||||
|
||||
def ft2hdf_single(ft_file, hdf_file):
|
||||
ft_fields = ftreader.find_fields(ft_file)
|
||||
fields = ftreader.translate_field_names(ft_fields,
|
||||
ftreader.default_names_dict)
|
||||
field_types = dict((field,pytables.default_ft_types[field])
|
||||
for field in fields)
|
||||
# print field_types
|
||||
pytables.create_table_file(hdf_file, field_types)
|
||||
rec_table = pytables.FlowRecordsTable(hdf_file)
|
||||
# since pytables is initiated with dictionary there is no way to
|
||||
# sort the fields order, so we have to translate back in order
|
||||
# to keep the fields names order
|
||||
ordered_ft_fields = ftreader.translate_field_names(rec_table.fields,
|
||||
ftreader.reverse_names_dict)
|
||||
flow_set = ftreader.FlowToolsReader(ft_file,
|
||||
ordered_ft_fields, rec_table.fields[1:])
|
||||
rec_set = record.RecordReader(flow_set)
|
||||
for flow in rec_set:
|
||||
rec_table.append(flow)
|
||||
rec_table.close()
|
||||
|
||||
def ft2hdf(many_files, hdf_file):
|
||||
ft_file = many_files[0]
|
||||
ft_fields = ftreader.find_fields(ft_file) # returns fields present in the flow record
|
||||
fields = ftreader.translate_field_names(ft_fields, ftreader.default_names_dict)
|
||||
field_types = dict((field,pytables.default_ft_types[field]) for field in fields)
|
||||
# print ft_fields
|
||||
# print fields
|
||||
pytables.create_table_file(hdf_file, field_types)
|
||||
rec_table = pytables.FlowRecordsTable(hdf_file)
|
||||
# since pytables is initiated with dictionary there is no way to
|
||||
# sort the fields order, so we have to translate back in order
|
||||
# to keep the fields names order
|
||||
ordered_ft_fields = ftreader.translate_field_names(rec_table.fields, ftreader.reverse_names_dict)
|
||||
|
||||
for ft_file in many_files:
|
||||
flow_set = ftreader.FlowToolsReader(ft_file, ordered_ft_fields, rec_table.fields[1:]) # all fields except 'id_rec'
|
||||
rec_set = record.RecordReader(flow_set)
|
||||
for flow in rec_set:
|
||||
rec_table.append(flow)
|
||||
rec_table.close()
|
||||
|
||||
def printHDF(hdf_file):
|
||||
r = pytables.FlowRecordsTable(hdf_file)
|
||||
recordReader = record.RecordReader(r)
|
||||
for rec in recordReader:
|
||||
print rec
|
||||
|
||||
class FSLoop(Exception):
|
||||
pass
|
||||
|
||||
def findFiles(path, start_time, end_time, filter_files = False):
|
||||
timeExp = re.compile(r"ft-v05\.(\d{4})-(\d{2})-(\d{2}).(\d{6}).(\d{4})")
|
||||
|
||||
time_file_list = []
|
||||
dir_links = [path]
|
||||
def walkDirs(dir_links):
|
||||
file_list = []
|
||||
more_dir_links = []
|
||||
for link in dir_links:
|
||||
for root, dirs, files in os.walk(link):
|
||||
for file in files:
|
||||
match = timeExp.search(file)
|
||||
if match:
|
||||
element = (int(''.join(match.groups()[:-1])), join(root,file))
|
||||
if element in time_file_list:
|
||||
raise FSLoop
|
||||
file_list.append(element)
|
||||
for dir in dirs:
|
||||
if islink(join(root,dir)):
|
||||
print file
|
||||
more_dir_links.append(join(root,dir))
|
||||
return file_list, more_dir_links
|
||||
|
||||
while len(dir_links) > 0:
|
||||
tf, dir_links = walkDirs(dir_links)
|
||||
time_file_list.extend(tf)
|
||||
|
||||
def cmp((a,x),(b,y)):
|
||||
if a-b < 0:
|
||||
return -1
|
||||
elif a-b>0:
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
time_file_list.sort(cmp)
|
||||
|
||||
if (filter_files):
|
||||
keys = [r[0] for r in time_file_list]
|
||||
begin = 0
|
||||
end = len(time_file_list)
|
||||
if start_time is not None:
|
||||
begin = bisect_left(keys, long(start_time))
|
||||
if end_time is not None:
|
||||
end = bisect(keys, long(end_time))
|
||||
# the start and end time must be converted to long
|
||||
time_file_list = time_file_list[begin:end]
|
||||
|
||||
time_file_list = map(lambda (x,y):y,time_file_list)
|
||||
return time_file_list
|
||||
|
||||
def dateToInt(date):
|
||||
number_of_digits = [4, 2, 2, 2, 2, 2]
|
||||
separators = '[- :/]*'
|
||||
expr = "\d{%s}"%number_of_digits[0]
|
||||
for digit in number_of_digits[1:]:
|
||||
expr += separators + "(\d{%s})"%digit
|
||||
timeExp = re.compile(expr)
|
||||
result = timeExp.match(date)
|
||||
if result is None:
|
||||
raise ValueError("invalid date format")
|
||||
return date.translate(None, '- :/')
|
||||
|
||||
def lotsOfFolders(paths, start_time=None, end_time=None):
|
||||
full_file_paths=[]
|
||||
start_time, end_time = [dateToInt(d) if d != None else d for d in (start_time, end_time)]
|
||||
for path in paths:
|
||||
full_file_paths.extend(findFiles(path, start_time, end_time, True))
|
||||
# sort the results
|
||||
split_paths = map(split, full_file_paths)
|
||||
split_paths = set(split_paths)
|
||||
split_paths = sorted(split_paths, key=itemgetter(1))
|
||||
full_file_paths = [join(x, y) for x, y in split_paths]
|
||||
|
||||
return full_file_paths
|
||||
|
||||
def main():
|
||||
usage = 'usage: %prog [options] input_path1 [input_path2 [...]] output_file.h5'
|
||||
p = OptionParser(usage)
|
||||
p.add_option('--start-time', '-s')
|
||||
p.add_option('--end-time', '-e')
|
||||
options, arguments = p.parse_args()
|
||||
start_time = options.start_time
|
||||
end_time = options.end_time
|
||||
folders = arguments[:-1]
|
||||
output = arguments[-1]
|
||||
if not (output[output.find('.h5'):] == '.h5'):
|
||||
sys.stderr.write('Output file should have an .h5 extension\n')
|
||||
exit(1)
|
||||
file_paths = lotsOfFolders(folders, start_time,end_time)
|
||||
if len(file_paths) < 1:
|
||||
sys.stderr.write('No flow-tools files found\n')
|
||||
exit(1)
|
||||
ft2hdf(file_paths, output)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
BIN
ft2hdf.pyc
Normal file
BIN
ft2hdf.pyc
Normal file
Binary file not shown.
109
ftreader.py
Normal file
109
ftreader.py
Normal file
|
@ -0,0 +1,109 @@
|
|||
import flowtools
|
||||
from os.path import exists
|
||||
|
||||
default_names_dict = {
|
||||
'dFlows': 'dFlows', 'dOctets': 'bytes', 'dPkts': 'dPkts',
|
||||
'dst_as': 'dst_as', 'dst_mask': 'dst_mask', 'dst_tag': 'dst_tag',
|
||||
'dstaddr_raw': 'dstip', 'dstport': 'dstport',
|
||||
'engine_id': 'engine_id', 'engine_type': 'engine_type',
|
||||
'exaddr_raw': 'exaddr', 'extra_pkts': 'extra_pkts',
|
||||
'first_raw': 'stime', 'in_encaps': 'in_encaps',
|
||||
'input': 'input', 'last_raw': 'etime', 'marked_tos': 'marked_tos',
|
||||
'nexthop_raw': 'nexthop', 'out_encaps': 'out_encaps',
|
||||
'output': 'output', 'peer_nexthop_raw': 'peer_nexthop',
|
||||
'prot': 'prot', 'router_sc': 'router_sc', 'src_as': 'src_as',
|
||||
'src_mask': 'src_mask', 'src_tag': 'src_tag',
|
||||
'srcaddr_raw': 'srcip', 'srcport': 'srcport',
|
||||
'sysUpTime': 'sysUpTime', 'tcp_flags': 'tcp_flags',
|
||||
'tos': 'tos', 'unix_nsecs': 'unix_nsecs',
|
||||
'unix_secs': 'unix_secs'}
|
||||
|
||||
reverse_names_dict = dict(zip(default_names_dict.values(),
|
||||
default_names_dict.keys()))
|
||||
|
||||
# list of the possible fields in the flow tools file
|
||||
flow_tools_fields = ['dFlows', 'dOctets', 'dPkts', 'dst_as', 'dst_mask',
|
||||
'dst_tag', 'dstaddr_raw', 'dstport', 'engine_id',
|
||||
'engine_type', 'exaddr_raw', 'extra_pkts', 'first_raw',
|
||||
'in_encaps', 'input', 'last_raw', 'marked_tos',
|
||||
'nexthop_raw', 'out_encaps', 'output', 'peer_nexthop_raw',
|
||||
'prot', 'router_sc', 'src_as', 'src_mask', 'src_tag',
|
||||
'srcaddr_raw', 'srcport', 'sysUpTime', 'tcp_flags', 'tos',
|
||||
'unix_nsecs', 'unix_secs']
|
||||
|
||||
def find_fields(flowtools_file, fields_of_interest=flow_tools_fields):
|
||||
'''
|
||||
Returns list of fields_of_interest which are present in
|
||||
flotools_file.
|
||||
Arguments:
|
||||
flowtools_file - path to flowtools records file
|
||||
fields_of_interest - names of the fields for which to check
|
||||
if none is given all possible fields are searched for.
|
||||
'''
|
||||
# read first record to see which fields are present:
|
||||
flowset = flowtools.FlowSet(flowtools_file)
|
||||
flow = iter(flowset).next()
|
||||
|
||||
# Find which fields are present in the file
|
||||
# (The flow record should have these attributes):
|
||||
present_fields = [k for k in fields_of_interest if hasattr(flow,k)]
|
||||
return present_fields
|
||||
|
||||
def translate_field_names(fields_list, dictionary):
|
||||
'''
|
||||
Translates names of fields which have keys dictionary.
|
||||
For names not present in dictionary the name remains unchanged.
|
||||
'''
|
||||
return [dictionary[k] for k in fields_list if dictionary.has_key(k)]
|
||||
|
||||
def create_flowtools_value_reader(fields):
|
||||
def get_fields(record):
|
||||
x = tuple(getattr(record,attr) for attr in fields)
|
||||
return x
|
||||
|
||||
return get_fields
|
||||
|
||||
|
||||
class FlowToolsReader(object):
|
||||
|
||||
def __init__(self, path, ft_fields=None, fields=None):
|
||||
self.ft_fields = ft_fields if ft_fields else flow_tools_fields
|
||||
self.fields = fields if fields else ft_fields
|
||||
self.fields = ('rec_id',) + self.fields
|
||||
self.get_vals = create_flowtools_value_reader(self.ft_fields)
|
||||
if exists(path):
|
||||
self.path = path
|
||||
else:
|
||||
raise IOError("File %s cannot be accessed."%path)
|
||||
|
||||
|
||||
def __iter__(self):
|
||||
flowset = flowtools.FlowSet(self.path)
|
||||
for id, flow in enumerate(flowset):
|
||||
yield (id,) + self.get_vals(flow)
|
||||
raise StopIteration
|
||||
|
||||
|
||||
|
||||
#ft_file = "../ft-v05.2008-10-02.120001+0200"
|
||||
#ft_fields = find_fields(ft_file)
|
||||
#print ft_fields
|
||||
#fields = translate_field_names(ft_fields, default_names_dict)
|
||||
#import pytables
|
||||
#field_types = dict((field,pytables.default_ft_types[field]) for field in fields)
|
||||
#ordered_ft_fields = translate_field_names(field_types.keys(), reverse_names_dict)
|
||||
#print ordered_ft_fields
|
||||
#flow_set = FlowToolsReader(ft_file, ft_fields, ft_fields)
|
||||
#import record
|
||||
#rec_set = record.RecordReader(flow_set)
|
||||
#print len(flow_set.fields)
|
||||
#unix_secs = 0
|
||||
#sysuptime = 0
|
||||
#uptime_set = set()
|
||||
#for i, flow in enumerate(rec_set):
|
||||
# if sysuptime != flow.sysUpTime:
|
||||
# sysuptime = flow.sysUpTime
|
||||
# uptime_set.add(sysuptime)
|
||||
# print i, 'ut', flow.sysUpTime - flow.last_raw, 'usecs', flow.unix_secs, 'first - last', flow.last_raw - flow.first_raw
|
||||
#
|
||||
#print uptime_set
|
BIN
ftreader.pyc
Normal file
BIN
ftreader.pyc
Normal file
Binary file not shown.
7
gnuplot-http.dat
Normal file
7
gnuplot-http.dat
Normal file
|
@ -0,0 +1,7 @@
|
|||
# Records # Splitter(s) Grouper(s) Merger(s) Branch A Branch B Records Match
|
||||
3811 0.6 0.74 2.81 146 143 68
|
||||
26521 24.8 34.95 144.75 1800 1816 1683
|
||||
56992 53.06 57.68 443.36 1985 2004 2438
|
||||
99925 100.03 136.09 960.34 3644 3684 4038
|
||||
298063 475.83 1415.34 11485 16412 16666 15131
|
||||
916633 1706.32 50141 50338
|
6
gnuplot-https.dat
Normal file
6
gnuplot-https.dat
Normal file
|
@ -0,0 +1,6 @@
|
|||
# Records Splitter(s) Grouper(s) Merger(s) Branch A Branch B Records Match
|
||||
26521 6.1 6.17 6.23 243 243 486
|
||||
56992 13.2 13.2 13.23 158 61 219
|
||||
99925
|
||||
298063
|
||||
916633
|
5
gnuplot.dat
Normal file
5
gnuplot.dat
Normal file
|
@ -0,0 +1,5 @@
|
|||
# Records # Splitter Grouper Merger
|
||||
3811
|
||||
26521
|
||||
56992
|
||||
298063
|
238
grouper.py
Normal file
238
grouper.py
Normal file
|
@ -0,0 +1,238 @@
|
|||
import record
|
||||
import options
|
||||
from aggr_operators import count
|
||||
import time
|
||||
import profiler
|
||||
|
||||
class UnsatisfiableDelta(Exception):
|
||||
pass
|
||||
|
||||
class Grouper(object):
|
||||
def __init__(self, id, modules, aggr_ops, records, branch_name):
|
||||
self.modules = modules
|
||||
self.records = records
|
||||
self.aggr_ops = aggr_ops
|
||||
self.group_record_fields = self.create_gr_record_fields_list()
|
||||
self.group_record_fields = ('rec_id',) + self.group_record_fields
|
||||
self.group_record_types = self.create_gr_record_fields_types()
|
||||
self.group_records = []
|
||||
self.branch_name = branch_name
|
||||
self.Record = record.get_record_class(self.group_record_fields)
|
||||
|
||||
#profiler.profile_on()
|
||||
|
||||
#profiler.profile_off()
|
||||
#import pickle
|
||||
#stats = profiler.get_profile_stats()
|
||||
#sorted_stats = sorted(stats.iteritems(), key=lambda a: a[1][1]/a[1][0])
|
||||
#for st in sorted_stats:
|
||||
# print st
|
||||
# print
|
||||
|
||||
def new_group(self, record):
|
||||
group = Group(record, self.modules, self.aggr_ops)
|
||||
return group
|
||||
|
||||
def __iter__(self):
|
||||
count = 0
|
||||
start2 = time.clock()
|
||||
#print "Grouping started at:", start2
|
||||
|
||||
# For each of the records that have passed either
|
||||
# of the branched conditions we try to find a
|
||||
for record in self.records:
|
||||
# print record
|
||||
matched = False
|
||||
count = count + 1
|
||||
# print len(self.group_records)
|
||||
del_list = []
|
||||
try:
|
||||
for i, group_record in enumerate(self.group_records):
|
||||
# print i
|
||||
if group_record.satisfiable:
|
||||
if group_record.match(record): # match from Group class
|
||||
matched = True
|
||||
break
|
||||
else:
|
||||
yield self.Record(*((count,)+group_record.export()))
|
||||
count += 1
|
||||
del_list.append(i)
|
||||
except ValueError:
|
||||
# Group Records list is empty
|
||||
# WARNING may catch ValueError from another place
|
||||
# group list is still empty
|
||||
matched = False # this will signal new group creation
|
||||
if not matched:
|
||||
self.group_records.append(self.new_group(record))
|
||||
|
||||
# remove exported groups:
|
||||
for n_removed, i in enumerate(del_list):
|
||||
# count removed elements with n_removed and compensate
|
||||
# because positions change when removing elements
|
||||
# Fortunately del_list is sorted so '-' works as
|
||||
# a compensation, as all removed elements are before the
|
||||
# current one
|
||||
del self.group_records[i - n_removed]
|
||||
|
||||
print "Number of records in branch "+self.branch_name, count
|
||||
|
||||
for group_record in self.group_records:
|
||||
yield self.Record(*((count,)+group_record.export()))
|
||||
count += 1
|
||||
print "Finished grouping branch "+self.branch_name
|
||||
#time_elapsed2 = (time.clock() - start2)
|
||||
#print "Grouping time finished for branch "+self.branch_name, time_elapsed2
|
||||
#print "Current time is: ", time.clock()
|
||||
|
||||
def create_gr_record_fields_list(self):
|
||||
field_list = []
|
||||
for module in self.modules:
|
||||
for op in module.aggr_ops:
|
||||
field_list.append(op.gr_field)
|
||||
|
||||
for op in self.aggr_ops:
|
||||
field_list.append(op.gr_field)
|
||||
return tuple(field_list)
|
||||
|
||||
def create_gr_record_fields_types(self):
|
||||
type_list = [None]
|
||||
for module in self.modules:
|
||||
for op in module.aggr_ops:
|
||||
type_list.append(op.field_type)
|
||||
|
||||
for op in self.aggr_ops:
|
||||
if type(op) == count:
|
||||
type_list[0] = op.field_type # set the type for rec_id
|
||||
type_list.append(op.field_type)
|
||||
return tuple(type_list)
|
||||
|
||||
class AggrOp(object):
|
||||
def __init__(self, op, field, gr_field, field_type):
|
||||
self.op = op
|
||||
self.field = field
|
||||
self.gr_field = gr_field # field name used for the grouping of a set of common entries
|
||||
self.field_type = field_type
|
||||
|
||||
def new_op(self):
|
||||
return self.op(self.field, self.gr_field, self.field_type)
|
||||
|
||||
class GrouperModule(object):
|
||||
def __init__(self, name, rules, aggr_ops):
|
||||
self.name = name
|
||||
self.rules = rules
|
||||
self.aggr_ops = aggr_ops # set of the defined aggregation operations, plus 3 implicit operations
|
||||
|
||||
def match(self, record, group):
|
||||
for rule in self.rules:
|
||||
if not rule.match(record, group):
|
||||
return False
|
||||
return True
|
||||
|
||||
class GrouperRule(object):
|
||||
def __init__(self, op, old_rec_field, new_record_field,
|
||||
delta=None, relative=False):
|
||||
self.op = op
|
||||
self.old_rec_field = old_rec_field
|
||||
self.new_rec_field = new_record_field
|
||||
self.delta = delta
|
||||
self.relative = relative
|
||||
self.is_shortcut = self.check_is_shortcut()
|
||||
# print self.op, self.old_rec_field, self.new_rec_field
|
||||
|
||||
def check_is_shortcut(self):
|
||||
if self.delta:
|
||||
if (self.old_rec_field in ('stime', 'etime') and
|
||||
self.new_rec_field in ('stime', 'etime')):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def match(self, record, group):
|
||||
new = getattr(record, self.new_rec_field)
|
||||
if self.relative:
|
||||
old = getattr(group.last_record, self.old_rec_field)
|
||||
else:
|
||||
old = getattr(group.first_record, self.old_rec_field)
|
||||
|
||||
if self.delta:
|
||||
if self.op(abs(new - old), self.delta):
|
||||
return True
|
||||
elif (self.is_shortcut and
|
||||
not self.op(abs(new - old),
|
||||
self.delta * options.unsat_delta_threshold_mul )):
|
||||
# print abs(new - old)/1000.0, (self.delta * options.unsat_delta_threshold_mul)/1000.0
|
||||
raise UnsatisfiableDelta
|
||||
else:
|
||||
return True
|
||||
else:
|
||||
return self.op(old, new)
|
||||
|
||||
class Group(object):
|
||||
__slots__ = ['modules', 'modules_aggr_ops', 'aggr_ops', 'records',
|
||||
'first_record', 'last_record', 'satisfiable',
|
||||
'n_unsatisfiable_deltas', 'max_unsat_deltas']
|
||||
|
||||
def __init__(self, first_record, modules, aggr_ops,
|
||||
max_unsat_deltas=options.max_unsatisfiable_deltas):
|
||||
self.first_record = first_record
|
||||
self.last_record = first_record # changes with each new matched record
|
||||
self.modules = modules
|
||||
# list of lists of aggr_ops each corresponding to a module
|
||||
self.modules_aggr_ops = self.create_modules_aggr()
|
||||
self.aggr_ops = self.create_aggr_ops(aggr_ops)
|
||||
self.satisfiable = True
|
||||
self.n_unsatisfiable_deltas = 0
|
||||
self.max_unsat_deltas = max_unsat_deltas
|
||||
|
||||
def create_modules_aggr(self):
|
||||
modules_aggr_ops = []
|
||||
for module in self.modules:
|
||||
aggr = [op.new_op() for op in module.aggr_ops]
|
||||
for op in aggr:
|
||||
op(self.first_record)
|
||||
modules_aggr_ops.append(aggr)
|
||||
return modules_aggr_ops
|
||||
|
||||
def create_aggr_ops(self, aggr_ops):
|
||||
aggr = [op.new_op() for op in aggr_ops]
|
||||
for op in aggr:
|
||||
op(self.first_record)
|
||||
return aggr
|
||||
|
||||
def match(self, record):
|
||||
matched = False
|
||||
for module, aggr_ops in zip(self.modules, self.modules_aggr_ops):
|
||||
try:
|
||||
if module.match(record, self):
|
||||
for op in aggr_ops:
|
||||
op(record)
|
||||
matched = True
|
||||
except UnsatisfiableDelta:
|
||||
if matched:
|
||||
continue
|
||||
self.n_unsatisfiable_deltas += 1
|
||||
if self.n_unsatisfiable_deltas > self.max_unsat_deltas:
|
||||
self.satisfiable = False
|
||||
|
||||
if matched:
|
||||
# self.aggr_ops contains the fields from the aggregation statement of the grouper module
|
||||
# as well as 3 other implicitly stated aggregation operations (etime, stime, records...)
|
||||
for aggr_op in self.aggr_ops:
|
||||
aggr_op(record)
|
||||
# print aggr_op.gr_field, aggr_op()
|
||||
# print self.records
|
||||
self.n_unsatisfiable_deltas = 0
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def export(self):
|
||||
fields = []
|
||||
for aggr_ops in self.modules_aggr_ops:
|
||||
for op in aggr_ops:
|
||||
fields.append(op())
|
||||
|
||||
for op in self.aggr_ops:
|
||||
fields.append(op())
|
||||
|
||||
return tuple(fields)
|
BIN
grouper.pyc
Normal file
BIN
grouper.pyc
Normal file
Binary file not shown.
179
grouper_validator.py
Normal file
179
grouper_validator.py
Normal file
|
@ -0,0 +1,179 @@
|
|||
from validator_common import *
|
||||
from copy import deepcopy
|
||||
from tables import UIntAtom, UIntCol
|
||||
from grouper import GrouperModule as GrouperModuleImpl
|
||||
from grouper import Grouper as GrouperImpl
|
||||
from grouper import GrouperRule as GrouperRuleImpl
|
||||
from grouper import AggrOp as AggrOpImpl
|
||||
import profiler
|
||||
|
||||
class GrouperValidator(object):
|
||||
def __init__(self, parser, splitter_validator):
|
||||
self.parser = parser
|
||||
self.fields_types = get_input_fields_types(
|
||||
get_input_reader(self.parser))
|
||||
self.groupers = deepcopy(parser.groupers)
|
||||
# print splitter_validator.br_name_to_br
|
||||
self.br_name_to_br = splitter_validator.br_name_to_br
|
||||
self.br_name_to_grouper = {}
|
||||
self.impl = self.create_impl()
|
||||
' '
|
||||
|
||||
def validate(self):
|
||||
self.check_field_refs()
|
||||
self.check_duplicate_grouper_names()
|
||||
for grouper in self.groupers:
|
||||
self.check_duplicate_module_names(grouper)
|
||||
|
||||
for module in grouper.modules:
|
||||
# Both of these come from the validator_common.py
|
||||
# module in this case is/are the modules present in
|
||||
# each instance of the grouper
|
||||
#print module
|
||||
replace_bound_rules(module)
|
||||
replace_with_vals(module)
|
||||
|
||||
def check_duplicate_grouper_names(self):
|
||||
duplicates = {}
|
||||
for grouper in self.groupers:
|
||||
old_val = duplicates.setdefault(grouper.name, 0)
|
||||
duplicates[grouper.name] = old_val + 1
|
||||
|
||||
duplicate_names = [k for k,v in duplicates.iteritems() if v > 1]
|
||||
if len(duplicate_names) > 0:
|
||||
msg = "Grouper(s) %s"%duplicate_names
|
||||
msg += " is/are all defined more than once."
|
||||
raise SyntaxError(msg)
|
||||
|
||||
def check_duplicate_module_names(self, grouper):
|
||||
duplicates = {}
|
||||
for module in grouper.modules:
|
||||
old_val = duplicates.setdefault(module.name, 0)# Insert (key, value) into the dictionary
|
||||
duplicates[module.name] = old_val + 1
|
||||
|
||||
duplicate_names = [k for k,v in duplicates.iteritems() if v > 1]
|
||||
if len(duplicate_names) > 0:
|
||||
msg = "Module(s) %s"%duplicate_names
|
||||
msg += " is/are all defined more than once in grouper"
|
||||
msg += " %s."%grouper.name
|
||||
raise SyntaxError(msg)
|
||||
|
||||
# Check for presence of the reference fields
|
||||
def check_field_refs(self):
|
||||
for grouper in self.groupers:
|
||||
for module in grouper.modules:
|
||||
for rule in module.rules:
|
||||
# Checks if the rule names of modules match those that were established
|
||||
# from the flow records (passed as a second argument here). Defined in
|
||||
# validator_common
|
||||
check_rule_fields(rule[0], self.fields_types.keys())
|
||||
|
||||
# This section checks the correctness of the field names passed to the aggregator
|
||||
# section of the grouper stage. field_types are defined in init and are also
|
||||
# obtained in the validaton_common module.
|
||||
for aggr in grouper.aggr:
|
||||
for arg in aggr.args:
|
||||
if type(arg) == Field:
|
||||
mod, _, field = arg.name.partition('.')
|
||||
if field != '':
|
||||
if field not in self.fields_types.keys():
|
||||
msg = 'There is no such field %s, '%arg.name
|
||||
msg += 'referenced at line %s'%aggr.line
|
||||
raise SyntaxError(msg)
|
||||
else:
|
||||
if mod not in self.fields_types.keys():
|
||||
msg = 'There is no such field %s, '%arg.name
|
||||
msg += 'referenced at line %s'%aggr.line
|
||||
raise SyntaxError(msg)
|
||||
|
||||
#
|
||||
def create_grouper_rules_impl(self, grouper):
|
||||
modules_list = []
|
||||
for module in grouper.modules:
|
||||
rule_impl_list = self.convert_module_rules(module)
|
||||
aggr_ops_list = self.convert_module_aggr_ops(grouper, module)
|
||||
module_impl = GrouperModuleImpl(module.name, rule_impl_list,
|
||||
aggr_ops_list)
|
||||
modules_list.append(module_impl)
|
||||
|
||||
grouper_aggr_ops = []
|
||||
for aggr in grouper.aggr:
|
||||
init_args = self.create_aggr_impl_init_args(aggr)
|
||||
# print init_args
|
||||
spl = str.split(init_args[1], '.')
|
||||
if len(spl) > 1:
|
||||
msg = 'There is no such grouper module %s, '%spl
|
||||
msg += 'referenced on line %s'%aggr.line
|
||||
raise SyntaxError(msg)
|
||||
impl = AggrOpImpl(*init_args)
|
||||
grouper_aggr_ops.append(impl)
|
||||
|
||||
groupers = [GrouperImpl(grouper.name, modules_list, grouper_aggr_ops,
|
||||
self.br_name_to_br[br_name], br_name)
|
||||
for br_name in grouper.branches]
|
||||
|
||||
for grouper in groupers:
|
||||
self.br_name_to_grouper[grouper.branch_name] = grouper
|
||||
# print self.br_name_to_grouper
|
||||
return groupers
|
||||
|
||||
|
||||
def convert_module_aggr_ops(self, grouper, module):
|
||||
aggr_ops_list = []
|
||||
del_list = []
|
||||
for aggr in grouper.aggr:
|
||||
op, field, gr_field, field_type = self.create_aggr_impl_init_args(
|
||||
aggr)
|
||||
mod_name, _, f = str.partition(field, '.')
|
||||
if f != '':
|
||||
if module.name == mod_name:
|
||||
impl = AggrOpImpl(op, f, gr_field, field_type)
|
||||
aggr_ops_list.append(impl)
|
||||
del_list.append(aggr)
|
||||
|
||||
for a in del_list:
|
||||
grouper.aggr.remove(a)
|
||||
|
||||
return aggr_ops_list
|
||||
|
||||
def create_aggr_impl_init_args(self, aggr):
|
||||
field = aggr.args[0].name
|
||||
if '.' in field:
|
||||
_, _, non_qid_field = field.partition('.')
|
||||
else:
|
||||
non_qid_field = field
|
||||
gr_field = aggr.args[1]
|
||||
if aggr.op == 'count':
|
||||
field_type = UIntCol(self.fields_types['rec_id'].itemsize)
|
||||
elif aggr.op == 'union':
|
||||
field_type = UIntAtom(self.fields_types[non_qid_field].itemsize)
|
||||
else:
|
||||
field_type = UIntCol(self.fields_types[non_qid_field].itemsize)
|
||||
|
||||
op = find_op(aggr, 'aggr_operators')
|
||||
|
||||
return op, field, gr_field, field_type
|
||||
|
||||
def convert_module_rules(self, module):
|
||||
rule_impl_list = []
|
||||
for rules in module.rules:
|
||||
for rule in rules:
|
||||
op = find_op(rule)
|
||||
args = [arg.name if type(arg) is Field else arg
|
||||
for arg in rule.args]
|
||||
rule_impl_list.append(GrouperRuleImpl(op, *args))
|
||||
return rule_impl_list
|
||||
|
||||
def create_impl(self):
|
||||
self.validate()
|
||||
groupers_impls = []
|
||||
for grouper in self.groupers:
|
||||
groupers_impls.extend(self.create_grouper_rules_impl(grouper))
|
||||
|
||||
# print self.br_name_to_grouper
|
||||
for br_name in self.br_name_to_br.keys():
|
||||
if br_name not in self.br_name_to_grouper.keys():
|
||||
msg = 'There is no grouper for branch %s.'%br_name
|
||||
raise SyntaxError(msg)
|
||||
|
||||
return groupers_impls
|
BIN
grouper_validator.pyc
Normal file
BIN
grouper_validator.pyc
Normal file
Binary file not shown.
62
groupfilter.py
Normal file
62
groupfilter.py
Normal file
|
@ -0,0 +1,62 @@
|
|||
from record import RecordReader
|
||||
from filter import Rule
|
||||
import profiler
|
||||
|
||||
class GroupFilter(object):
|
||||
def __init__(self, rules, records, branch_name, groups_table, index):
|
||||
self.rules = rules
|
||||
self.records = records
|
||||
self.branch_name = branch_name
|
||||
self.index = index
|
||||
self.groups_table = groups_table
|
||||
self.record_reader = RecordReader(self.groups_table)
|
||||
|
||||
def go(self):
|
||||
count = 0
|
||||
for record in self.records: # These are the grouped records according to the groupers/modules
|
||||
# print record
|
||||
matched = False
|
||||
for or_rules in self.rules:
|
||||
# matched = False
|
||||
for rule in or_rules: # This for-loop, just extracts the rule from the list
|
||||
# print rule
|
||||
if rule.match(record):
|
||||
# print rule.operation
|
||||
matched = True
|
||||
break
|
||||
if not matched:
|
||||
break
|
||||
if matched:
|
||||
record.rec_id = count
|
||||
count += 1
|
||||
# Adds a record to the TimeIndex class' time interval
|
||||
# as an index value, over those times that the record
|
||||
# covers with its start-/end-time intervals.
|
||||
self.index.add(record)
|
||||
self.groups_table.append(record)
|
||||
print "Finished group-filtering for branch " + self.branch_name
|
||||
|
||||
self.groups_table.flush()
|
||||
|
||||
|
||||
|
||||
def __iter__(self):
|
||||
for rec in self.record_reader:
|
||||
yield rec
|
||||
|
||||
class AcceptGroupFilter(GroupFilter):
|
||||
def __init__(self, records, branch_name, groups_table, index):
|
||||
GroupFilter.__init__(self, None, records, branch_name, groups_table,
|
||||
index)
|
||||
# NIK commented out on Feb 08
|
||||
# This function is not used anywhere
|
||||
# in the code
|
||||
# def go(self):
|
||||
# count = 0
|
||||
# for record in self.records:
|
||||
# record.rec_id = count
|
||||
# count += 1
|
||||
# self.index.add(record)
|
||||
# self.groups_table.append(record)
|
||||
# print "Finished filtering groups for branch " + self.branch_name
|
||||
# self.groups_table.flush()
|
BIN
groupfilter.pyc
Normal file
BIN
groupfilter.pyc
Normal file
Binary file not shown.
141
groupfilter_validator.py
Normal file
141
groupfilter_validator.py
Normal file
|
@ -0,0 +1,141 @@
|
|||
import options
|
||||
from copy import deepcopy
|
||||
from validator_common import *
|
||||
from groupfilter import Rule as RuleImpl
|
||||
from groupfilter import GroupFilter as GroupFilterImpl
|
||||
from groupfilter import AcceptGroupFilter as AcceptGroupFilterImpl
|
||||
from operators import NOT
|
||||
import pytables
|
||||
from timeindex import TimeIndex
|
||||
import time
|
||||
|
||||
|
||||
|
||||
class GroupFilterValidator(object):
|
||||
# The initiation of the GroupFilterValidator happens only ones.
|
||||
def __init__(self, parser, grouper_validator):
|
||||
self.parser = parser
|
||||
self.grouper_validator = grouper_validator
|
||||
self.filters = deepcopy(parser.group_filters)
|
||||
self.branches_fields = self.get_branches_fields()
|
||||
self.br_name_to_grouper = grouper_validator.br_name_to_grouper
|
||||
self.br_name_to_gr_filter = {}
|
||||
self.impl = self.create_impl()
|
||||
|
||||
def check_duplicate_filter_names(self):
|
||||
duplicates = {}
|
||||
for filter in self.filters:
|
||||
old_val = duplicates.setdefault(filter.name, 0)
|
||||
duplicates[filter.name] = old_val + 1
|
||||
|
||||
duplicate_names = [k for k,v in duplicates.iteritems() if v > 1]
|
||||
if len(duplicate_names) > 0:
|
||||
msg = "Group filter(s) %s"%duplicate_names
|
||||
msg += " is/are all defined more than once."
|
||||
raise SyntaxError(msg)
|
||||
|
||||
def check_field_refs(self):
|
||||
"Check record field references, for unknown fields"
|
||||
for filter in self.filters:
|
||||
for rule in iterate_rules(filter):
|
||||
for branch in filter.branches:
|
||||
check_rule_fields(rule, self.branches_fields[branch])
|
||||
|
||||
|
||||
def get_branches_fields(self):
|
||||
branches_fields = {}
|
||||
for grouper in self.grouper_validator.impl:
|
||||
branches_fields[grouper.branch_name] = grouper.group_record_fields
|
||||
return branches_fields
|
||||
|
||||
def validate(self):
|
||||
self.check_for_unused_filters()
|
||||
self.check_field_refs()
|
||||
self.check_duplicate_filter_names()
|
||||
|
||||
def check_for_unused_filters(self):
|
||||
for filter in self.filters:
|
||||
if len(filter.branches) == 0:
|
||||
msg = "Warning groupfilter %s "%filter.name
|
||||
msg += "defined on line %s"%filter.line
|
||||
msg += " is not used in any branch."
|
||||
print msg
|
||||
continue # skips unused filters
|
||||
|
||||
def get_rule_impl(self, rule):
|
||||
op = find_op(rule)
|
||||
args = [self.get_rule_impl(arg) if type(arg) == Rule else arg
|
||||
for arg in rule.args]
|
||||
impl = RuleImpl(None, NOT(op) if rule.NOT else op, args)
|
||||
|
||||
return impl
|
||||
|
||||
def get_rules_impl(self, filter):
|
||||
replace_bound_rules(filter)
|
||||
replace_with_vals(filter)
|
||||
rules_impl = []
|
||||
for or_rule in filter.rules:
|
||||
or_rule_list = []
|
||||
for rule in or_rule:
|
||||
impl = self.get_rule_impl(rule)
|
||||
or_rule_list.append(impl)
|
||||
rules_impl.append(or_rule_list)
|
||||
|
||||
return rules_impl
|
||||
|
||||
def create_impl(self):
|
||||
|
||||
#start = time.clock()
|
||||
#print "GF validation started at:", start
|
||||
self.validate()
|
||||
group_filters_impl = []
|
||||
|
||||
for filter in self.filters:
|
||||
rules_impl = self.get_rules_impl(filter)
|
||||
for br_name in filter.branches:
|
||||
records = self.br_name_to_grouper[br_name]
|
||||
index = TimeIndex(5000)
|
||||
grouper = records
|
||||
field_types = dict(zip(grouper.group_record_fields,
|
||||
grouper.group_record_types))
|
||||
# print records
|
||||
fname = options.temp_path + options.groups_file_prefix
|
||||
fname += br_name+".h5"
|
||||
if options.delete_temp_files: if_exists_delete(fname)
|
||||
file = pytables.create_table_file(fname, field_types)
|
||||
groups_table = pytables.FlowRecordsTable(fname) # Create separate table files for each of the branches
|
||||
filt_impl = GroupFilterImpl(rules_impl, records, br_name,
|
||||
groups_table, index)
|
||||
group_filters_impl.append(filt_impl)
|
||||
|
||||
self.br_name_to_gr_filter = dict((filt.branch_name, filt)
|
||||
for filt in group_filters_impl)
|
||||
|
||||
|
||||
# Check for branches that don't have group filters and and put accept
|
||||
# filters on them
|
||||
for br_name in self.br_name_to_grouper.keys():
|
||||
if br_name not in self.br_name_to_gr_filter.keys():
|
||||
# print "We get here if the group-filter is removed"
|
||||
records = self.br_name_to_grouper[br_name]
|
||||
index = TimeIndex(5000)
|
||||
grouper = records
|
||||
field_types = dict(zip(grouper.group_record_fields,
|
||||
grouper.group_record_types))
|
||||
fname = options.temp_path + options.groups_file_prefix
|
||||
fname += br_name+".h5"
|
||||
if options.delete_temp_files: if_exists_delete(fname)
|
||||
file = pytables.create_table_file(fname, field_types)
|
||||
groups_table = pytables.FlowRecordsTable(fname)
|
||||
filt_impl = AcceptGroupFilterImpl(records, br_name,
|
||||
groups_table, index) # This class is called in case some branch is missing
|
||||
# the definition of a group-filter. Essentially a plain
|
||||
# GroupFilter, but with no rules as an argument.
|
||||
self.br_name_to_gr_filter[br_name] = filt_impl
|
||||
group_filters_impl.append(filt_impl)
|
||||
|
||||
#time_elapsed = (time.clock() - start)
|
||||
#print "GF Validation required:", time_elapsed
|
||||
return group_filters_impl
|
||||
|
||||
|
BIN
groupfilter_validator.pyc
Normal file
BIN
groupfilter_validator.pyc
Normal file
Binary file not shown.
BIN
h5ports.h5
Normal file
BIN
h5ports.h5
Normal file
Binary file not shown.
66
http-download.flw
Normal file
66
http-download.flw
Normal file
|
@ -0,0 +1,66 @@
|
|||
splitter S {}
|
||||
|
||||
filter www_req {
|
||||
dstport = 80
|
||||
}
|
||||
|
||||
filter www_res {
|
||||
srcport = 80
|
||||
}
|
||||
|
||||
filter www_res1 {
|
||||
srcport = 80
|
||||
}
|
||||
|
||||
grouper g_www_req {
|
||||
module g1 {
|
||||
srcip = srcip
|
||||
dstip = dstip
|
||||
etime < stime delta 1s
|
||||
}
|
||||
aggregate srcip, dstip, sum(bytes) as bytes, count(rec_id) as n,
|
||||
bitOR(tcp_flags) as flags, union(srcport) as srcports
|
||||
}
|
||||
|
||||
grouper g_www_res {
|
||||
module g1 {
|
||||
srcip = srcip
|
||||
dstip = dstip
|
||||
etime < stime delta 1s
|
||||
}
|
||||
aggregate srcip, dstip, sum(bytes) as bytes, count(rec_id) as n,
|
||||
bitOR(tcp_flags) as flags, union(dstport) as dstports
|
||||
}
|
||||
|
||||
grouper g_www_res1 {
|
||||
module g1 {
|
||||
srcip = srcip
|
||||
dstip = dstip
|
||||
etime < stime delta 5s
|
||||
}
|
||||
aggregate srcip, dstip, sum(bytes) as bytes, count(rec_id) as n,
|
||||
bitOR(tcp_flags) as flags, union(dstport) as dstports
|
||||
}
|
||||
|
||||
groupfilter ggf {
|
||||
bitAND(flags, 0x13) = 0x13
|
||||
}
|
||||
|
||||
merger M {
|
||||
module m1 {
|
||||
branches C, B, A
|
||||
A.srcip = B.dstip
|
||||
A.srcports = B.dstports
|
||||
A.bytes < B.bytes
|
||||
B oi A OR B d A
|
||||
}
|
||||
export m1
|
||||
}
|
||||
|
||||
ungrouper U {}
|
||||
|
||||
"./netflow-trace.h5" -> S
|
||||
S branch A -> www_req -> g_www_req -> ggf -> M
|
||||
S branch B -> www_res -> g_www_res -> ggf -> M
|
||||
S branch C -> www_res1 -> g_www_res1 -> ggf -> M
|
||||
M->U->"./ungroped.h5"
|
44
https-flows.flw
Normal file
44
https-flows.flw
Normal file
|
@ -0,0 +1,44 @@
|
|||
splitter S {}
|
||||
|
||||
filter fil_dstport {
|
||||
dstport = 443
|
||||
}
|
||||
|
||||
filter fil_srcport {
|
||||
srcport = 443
|
||||
}
|
||||
|
||||
grouper g_fil_dstport {
|
||||
module g1 {
|
||||
}
|
||||
aggregate sum(bytes) as bytes, bitOR(tcp_flags) as flags
|
||||
# aggregate srcip, dstip, sum(bytes) as bytes, count(rec_id) as n,
|
||||
# bitOR(tcp_flags) as flags, union(srcport) as srcports
|
||||
}
|
||||
|
||||
grouper g_fil_srcport {
|
||||
module g1 {
|
||||
}
|
||||
aggregate sum(bytes) as bytes, bitOR(tcp_flags) as flags
|
||||
# aggregate srcip, dstip, sum(bytes) as bytes, count(rec_id) as n,
|
||||
# bitOR(tcp_flags) as flags, union(dstport) as dstports
|
||||
}
|
||||
|
||||
groupfilter ggf {
|
||||
bitAND(flags, 0x13) = 0x13
|
||||
}
|
||||
|
||||
merger M {
|
||||
module m1 {
|
||||
branches B, A
|
||||
A m B delta 1440min
|
||||
}
|
||||
export m1
|
||||
}
|
||||
|
||||
ungrouper U {}
|
||||
|
||||
"./netflow-trace.h5" -> S
|
||||
S branch A -> fil_dstport -> g_fil_dstport -> ggf -> M
|
||||
S branch B -> fil_srcport -> g_fil_srcport -> ggf -> M
|
||||
M->U->"./ungroped.h5"
|
190
merger.py
Normal file
190
merger.py
Normal file
|
@ -0,0 +1,190 @@
|
|||
|
||||
|
||||
class MergerStorage(object):
|
||||
def __init__(self, id, tuples_table, record_class):
|
||||
self.id = id
|
||||
self.tuples_table = tuples_table
|
||||
self.RecordClass = record_class
|
||||
|
||||
def put(self, gr_rec_tuple):
|
||||
self.tuples_table.append(self.RecordClass(*gr_rec_tuple))
|
||||
|
||||
def flush(self):
|
||||
self.tuples_table.flush()
|
||||
|
||||
|
||||
class MergerRule(object):
|
||||
def __init__(self, op, args, br_to_record):
|
||||
# The records are changed externally from branches:
|
||||
self.br_to_record = br_to_record
|
||||
self.args = args
|
||||
self.op = op
|
||||
|
||||
def match(self):
|
||||
# The records are changed externally by another object
|
||||
args = []
|
||||
for arg in self.args:
|
||||
if type(arg) is MergerRule:
|
||||
args.append(arg.match())
|
||||
elif type(arg) is tuple:
|
||||
br, field = arg
|
||||
record = self.br_to_record[br]
|
||||
if field:
|
||||
# normal rule get field of the record
|
||||
args.append(getattr(record, field))
|
||||
else:
|
||||
# allen rule, argument is the record
|
||||
args.append(record)
|
||||
else:
|
||||
args.append(arg)
|
||||
return self.op(*args)
|
||||
|
||||
class Reject(Exception):
|
||||
pass
|
||||
|
||||
class Accept(Exception):
|
||||
pass
|
||||
|
||||
# This class represents only the first branch loop and no nested loops, unlike MergerBranch class
|
||||
class Merger(object):
|
||||
def __init__ (self, name, br_name, records, name_to_branch,
|
||||
next_branches_names, export_branches, br_to_record,
|
||||
index, index_rules, rules, merger_table):
|
||||
self.name = name
|
||||
self.merger_table = merger_table
|
||||
self.records = records
|
||||
self.export_branches = export_branches
|
||||
self.br_name = br_name
|
||||
self.name_to_branch = name_to_branch
|
||||
self.rules = rules
|
||||
self.index = index
|
||||
self.br_to_record = br_to_record
|
||||
self.next_branches_names = next_branches_names
|
||||
self.remaining_rec = dict((name, None) for name
|
||||
in next_branches_names)
|
||||
self.index_rules = index_rules
|
||||
|
||||
@property
|
||||
def next_branch(self):
|
||||
if not self.next_branches_names:
|
||||
return False
|
||||
return self.name_to_branch[self.next_branches_names[0]]
|
||||
|
||||
def match(self):
|
||||
for rule in self.rules:
|
||||
if not rule.match():
|
||||
return False
|
||||
return True
|
||||
|
||||
def pass_allen_indices_down(self, record):
|
||||
new_br_remaining_rec = {}
|
||||
for rules in self.index_rules:
|
||||
br_name = rules[0].target
|
||||
rec_set = set()
|
||||
branch = self.name_to_branch[br_name]
|
||||
index = branch.index
|
||||
for rule in rules:
|
||||
interval = rule(record)
|
||||
rec_set.update(index.get_interval_records(*interval))
|
||||
|
||||
# note {}.get(k) return none if {} has no key k
|
||||
set_from_parent = self.remaining_rec[br_name]
|
||||
if set_from_parent:
|
||||
# there is a set of records defined by parent
|
||||
# do an intersection
|
||||
new_br_remaining_rec[br_name] = rec_set & set_from_parent
|
||||
else:
|
||||
# no set from parent, just add this rec_set
|
||||
new_br_remaining_rec[br_name] = rec_set
|
||||
|
||||
# pass to next branch
|
||||
if len(new_br_remaining_rec) == 0:
|
||||
self.next_branch.remaining_rec = self.remaining_rec
|
||||
else:
|
||||
self.next_branch.remaining_rec = new_br_remaining_rec
|
||||
# print "passing",self.next_branch.remaining_rec
|
||||
|
||||
|
||||
def go(self):
|
||||
for rec in self.records.record_reader:
|
||||
self.br_to_record[self.br_name] = rec
|
||||
self.pass_allen_indices_down(rec)
|
||||
self.next_branch.next()
|
||||
print "Finished merging branches: ",
|
||||
print [self.br_name] + self.next_branches_names
|
||||
self.merger_table.flush()
|
||||
self.merger_table.tuples_table.close()
|
||||
|
||||
class MergerBranch(Merger):
|
||||
def __init__ (self, br_name, records, name_to_branch, next_branches_names,
|
||||
export_branches, br_to_record ,index, index_rules, rules,
|
||||
merger_table):
|
||||
Merger.__init__(self, None, br_name, records, name_to_branch,
|
||||
next_branches_names, export_branches, br_to_record,
|
||||
index, index_rules, rules, merger_table)
|
||||
|
||||
def next(self):
|
||||
remaining = self.remaining_rec[self.br_name]
|
||||
for rec in self.records.record_reader.read_rows_list(remaining):
|
||||
self.br_to_record[self.br_name] = rec
|
||||
if not self.match():
|
||||
continue
|
||||
|
||||
self.pass_allen_indices_down(rec)
|
||||
try:
|
||||
self.next_branch.next()
|
||||
except Accept:
|
||||
# the reject modules did not reject this tuple
|
||||
res = tuple(self.br_to_record[br].rec_id for br
|
||||
in self.export_branches)
|
||||
self.merger_table.put(res)
|
||||
except Reject:
|
||||
# this tuple matched reject module so we go on
|
||||
pass
|
||||
|
||||
class MergerLastBranch(Merger):
|
||||
def __init__ (self, br_name, records, name_to_branch, next_branches_names,
|
||||
export_branches, br_to_record ,index, index_rules, rules,
|
||||
merger_table):
|
||||
Merger.__init__(self, None, br_name, records, name_to_branch,
|
||||
next_branches_names, export_branches, br_to_record,
|
||||
index, index_rules, rules, merger_table)
|
||||
def next(self):
|
||||
remaining = self.remaining_rec[self.br_name]
|
||||
for rec in self.records.record_reader.read_rows_list(remaining):
|
||||
self.br_to_record[self.br_name] = rec
|
||||
if not self.match():
|
||||
continue
|
||||
|
||||
# last branch and no reject branches
|
||||
# append the record
|
||||
res = tuple(self.br_to_record[br].rec_id for br
|
||||
in self.export_branches)
|
||||
self.merger_table.put(res)
|
||||
|
||||
class MergerRejectBranch(Merger):
|
||||
def __init__ (self, br_name, records, name_to_branch, next_branches_names,
|
||||
export_branches, br_to_record ,index, index_rules, rules,
|
||||
merger_table):
|
||||
Merger.__init__(self, None, br_name, records, name_to_branch,
|
||||
next_branches_names, export_branches, br_to_record,
|
||||
index, index_rules, rules, merger_table)
|
||||
def next(self):
|
||||
remaining = self.remaining_rec[self.br_name]
|
||||
for rec in self.records.record_reader.read_rows_list(remaining):
|
||||
self.br_to_record[self.br_name] = rec
|
||||
if self.match():
|
||||
raise Reject # goes all the way up to last normal branch
|
||||
else:
|
||||
try:
|
||||
if self.next_branch:
|
||||
self.pass_allen_indices_down(rec)
|
||||
self.next_branch.next()
|
||||
else:
|
||||
# this is the last branch, so go on
|
||||
pass
|
||||
except Accept:
|
||||
# this Accept is from lower reject-branch so just
|
||||
# go on and raise Accept when this branch finishes
|
||||
pass
|
||||
raise Accept
|
BIN
merger.pyc
Normal file
BIN
merger.pyc
Normal file
Binary file not shown.
505
merger_validator.py
Normal file
505
merger_validator.py
Normal file
|
@ -0,0 +1,505 @@
|
|||
from validator_common import *
|
||||
from copy import deepcopy
|
||||
from tables import UIntCol
|
||||
from merger import MergerStorage
|
||||
from merger import Merger as MergerImpl
|
||||
from merger import MergerBranch as MergerBranchImpl
|
||||
from merger import MergerLastBranch as MergerLastBranchImpl
|
||||
from merger import MergerRejectBranch as MergerRejectBranchImpl
|
||||
from merger import MergerRule as MergerRuleImpl
|
||||
import itertools
|
||||
import allen_ops
|
||||
import pytables
|
||||
import record
|
||||
import options
|
||||
|
||||
class MergerValidator(object):
|
||||
def __init__(self, parser, gr_filter_validator):
|
||||
self.parser = parser
|
||||
self.gr_filter_validator = gr_filter_validator
|
||||
self.mergers = deepcopy(parser.mergers)
|
||||
# The last field returns a list of the present fields for each branch
|
||||
# ('rec_id', 'etime', 'stime', 'records', 'srcip', 'dstip', 'bytes', 'n', 'flags', 'srcports')
|
||||
# ('rec_id', 'etime', 'stime', 'records', 'srcip', 'dstip', 'bytes', 'n', 'flags', 'dstports')
|
||||
self.branches_fields = gr_filter_validator.branches_fields
|
||||
# A simple dictionary mapptin of branch name to a GroupFilter
|
||||
# {'A': <groupfilter.GroupFilter object at 0x9c3d66c>, 'B': <groupfilter.GroupFilter object at 0x9c43ccc>}
|
||||
self.br_name_to_gr_filter = gr_filter_validator.br_name_to_gr_filter
|
||||
# Checks that all the defined merger modules are actually exported
|
||||
# Returns a dictionay of a merger name and module implementation
|
||||
self.megers_export_modules = self.find_mergers_export_modules()
|
||||
# Returns the size of the field type of the 'records' field, 4 bytes
|
||||
self.id_size = self.get_id_size()
|
||||
self.impl = self.get_mergers_impl()
|
||||
|
||||
# Returns the size of the field type of the 'records' field, 4 bytess
|
||||
def get_id_size(self):
|
||||
rec_reader = self.gr_filter_validator.impl[0].records
|
||||
field_types = dict(zip(rec_reader.group_record_fields,
|
||||
rec_reader.group_record_types))
|
||||
id_size = field_types['records'].itemsize
|
||||
return id_size
|
||||
|
||||
# Check for duplicate merger names
|
||||
def check_duplicate_merger_names(self):
|
||||
duplicates = {}
|
||||
for merger in self.mergers:
|
||||
old_val = duplicates.setdefault(merger.name, 0)
|
||||
duplicates[merger.name] = old_val + 1
|
||||
|
||||
duplicate_names = [k for k,v in duplicates.iteritems() if v > 1]
|
||||
if len(duplicate_names) > 0:
|
||||
msg = "Merger(s) %s"%duplicate_names
|
||||
msg += " is/are all defined more than once."
|
||||
raise SyntaxError(msg)
|
||||
|
||||
# Check for duplicate module names
|
||||
def check_duplicate_module_names(self, merger):
|
||||
duplicates = {}
|
||||
for module in merger.modules:
|
||||
old_val = duplicates.setdefault(module.name, 0)
|
||||
duplicates[module.name] = old_val + 1
|
||||
|
||||
duplicate_names = [k for k,v in duplicates.iteritems() if v > 1]
|
||||
if len(duplicate_names) > 0:
|
||||
msg = "Module(s) %s"%duplicate_names
|
||||
msg += " is/are all defined more than once in merger"
|
||||
msg += " %s."%merger.name
|
||||
raise SyntaxError(msg)
|
||||
|
||||
|
||||
# Checks that all the defined merger modules are actually exported
|
||||
# Returns a dictionay of a merger name and module implementation
|
||||
def find_mergers_export_modules(self):
|
||||
merger_to_export_module = {}
|
||||
for merger in self.mergers:
|
||||
exp = None
|
||||
for module in merger.modules:
|
||||
if merger.export == module.name:
|
||||
exp = module
|
||||
break
|
||||
|
||||
if exp:
|
||||
merger_to_export_module[merger.name] = exp
|
||||
# print merger_to_export_module
|
||||
else:
|
||||
msg = "Merger %s"%merger.name
|
||||
msg += " export module %s is not defined."%merger.export
|
||||
|
||||
return merger_to_export_module
|
||||
|
||||
#--------------------------------------ALLEN CHECKS-------------------------------------#
|
||||
#All the operations on rules are around a sample set like: {'M': Module('m1', 38, [[Rule('EQ', 40, [Field('A.srcip'), Field('B.dstip')], False)], [Rule('EQ', 41, [Field('A.srcports'), Field('B.dstports')], False)], [Rule('LT', 42, [Field('A.bytes'), Field('B.bytes')], False)], [AllenRule('oi', 43, [Field('B'), Field('A')], False), AllenRule('d', 43, [Field('B'), Field('A')], False)]], ['B', 'A'])}
|
||||
|
||||
#Returns only the Allen rules
|
||||
def iterate_module_allen_op_groups(self, merger):
|
||||
for module in merger.modules:
|
||||
for rules in module.rules:
|
||||
if type(rules[0]) is not AllenRule:
|
||||
continue
|
||||
else:
|
||||
for op in rules:
|
||||
yield op
|
||||
|
||||
# Orders allen operations and the branches that they influence in a reverse order, if not already so
|
||||
def order_allen_ops_args(self, merger):
|
||||
order = self.get_merger_branches_order(merger)#Orders merger branches, exported module's branches being first
|
||||
arg_combinaions = tuple(itertools.combinations(order, 2))#combinations('ABCD', 2) --> AB AC AD BC BD CD
|
||||
for allen_op in self.iterate_module_allen_op_groups(merger):#Returns only the Allen rules
|
||||
first, second = allen_op.args[:2] # Returns Field('B') Field('A')
|
||||
op = allen_op.op # operations like oi, d
|
||||
if (first.name, second.name) not in arg_combinaions:
|
||||
allen_op.args = [second, first] + allen_op.args[2:]# reverse names
|
||||
allen_op.op = allen_ops.inv_op_str(op)# and operations
|
||||
|
||||
# A number of different checks of the AllenRule
|
||||
def check_allen_ops(self, merger):
|
||||
allen_arg_pairs = []
|
||||
arg_pairs_to_line = {}
|
||||
for module in merger.modules:
|
||||
for rules in module.rules:
|
||||
if type(rules[0]) is not AllenRule:
|
||||
continue
|
||||
|
||||
first_arg = rules[0].args[0].name # Get the branch names influenced by the AllenRule
|
||||
second_arg = rules[0].args[1].name
|
||||
line = rules[0].line
|
||||
order = (first_arg, second_arg)
|
||||
allen_arg_pairs.append(order)# [('B', 'A')]
|
||||
|
||||
self.check_allen_satisfiability(arg_pairs_to_line, order, line)
|
||||
self.check_allen_consistency(first_arg, second_arg, rules)
|
||||
self.check_allen_deltas(rules)
|
||||
|
||||
self.check_allen_reachability(allen_arg_pairs, merger)
|
||||
|
||||
# The following 3 methods run different tests on the allen arguments and rules
|
||||
def check_allen_satisfiability(self, arg_pairs_to_line, order, line):
|
||||
if arg_pairs_to_line.has_key(order):
|
||||
msg = "Unsatisfiable Allen op group. "
|
||||
msg += "All allen ops concerning a pair of branches should"
|
||||
msg += " be connected with and OR into a single group "
|
||||
msg += "within a single module.\n"
|
||||
msg += "Argument pair %s on line %s"%(order, line)
|
||||
msg += " is also used on line %s."%arg_pairs_to_line[order]
|
||||
raise SyntaxError(msg)
|
||||
else:
|
||||
arg_pairs_to_line[order] = line
|
||||
def check_allen_consistency(self, first_arg, second_arg, rules):
|
||||
for al_op in rules:
|
||||
first = al_op.args[0].name
|
||||
second = al_op.args[1].name
|
||||
|
||||
if (first != first_arg or second != second_arg):
|
||||
msg = "Inconsistent group of Allen statements "
|
||||
msg += "on line %s"%rules[0].line
|
||||
msg += ": %s, %s.\n"%(first, second)
|
||||
msg += "All branches in this group should have "
|
||||
msg += "%s and %s"%(first_arg, second_arg)
|
||||
msg += " as left and righthand side arguments "
|
||||
msg += "respectively."
|
||||
raise SyntaxError(msg)
|
||||
def check_allen_deltas(self, rules):
|
||||
for al_op in rules:
|
||||
if al_op.op == 'LT' or al_op.op == 'GT':
|
||||
if len(al_op.args) < 3:
|
||||
msg = "Allen op < or > on line %s "%al_op.line
|
||||
msg += " should have delta explicitly stated."
|
||||
raise SyntaxError(msg)
|
||||
# A check for reachability of subsequent branches from the first one
|
||||
def check_allen_reachability(self, allen_arg_pairs, merger):
|
||||
br_order = self.get_merger_branches_order(merger)
|
||||
# check reachability through allen index from initial branch
|
||||
# of export module:
|
||||
reachable = br_order[0:1] # list of first branch of exp module
|
||||
unreachable = br_order[1:]
|
||||
change = True
|
||||
while(change):
|
||||
change = False
|
||||
for arg1, arg2 in allen_arg_pairs:
|
||||
if arg1 in reachable and arg2 in unreachable:
|
||||
unreachable.remove(arg2)
|
||||
reachable.append(arg2)
|
||||
change = True
|
||||
if len(unreachable) > 0:
|
||||
msg = "Branch(es): %s"%unreachable
|
||||
msg += " in merger %s"%merger.name
|
||||
msg += " is/are unreachable through an allen op or chain of"
|
||||
msg += " allen ops from the first branch of the exported module"
|
||||
raise SyntaxError(msg)
|
||||
#--------------------------------------END ALLEN CHECKS---------------------------------#
|
||||
|
||||
# Orders the merger modules s.t. the exported module comes first
|
||||
def order_modules(self):
|
||||
for merger in self.mergers:
|
||||
exp_module = self.megers_export_modules[merger.name]
|
||||
new_modules_order = [exp_module]
|
||||
new_modules_order += [m for m in merger.modules if m != exp_module]
|
||||
merger.modules = new_modules_order
|
||||
|
||||
# Checks that the modules are interconnected among each other with at least one branch
|
||||
def check_for_disjoint_modules(self):
|
||||
for merger in self.mergers:
|
||||
exp_module = self.megers_export_modules[merger.name]
|
||||
exp_branches = set(exp_module.branches)
|
||||
for module in merger.modules:
|
||||
branches = set(module.branches)
|
||||
# NOTE & is set intersection
|
||||
if len(exp_branches & branches) < 1:
|
||||
msg = "Merger module %s.%s"%(merger.name,module.name)
|
||||
msg += " in has no overlaping branches with the"
|
||||
msg += " export module."
|
||||
raise SyntaxError(msg)
|
||||
|
||||
|
||||
# Check the validity of the AllenRule, by seeing if the branch names are all defined
|
||||
def check_branch_id_ref(self, rule, module_branches):
|
||||
for arg in rule.args:
|
||||
if type(arg) is Field:
|
||||
id_ref = arg.name
|
||||
if id_ref not in self.br_name_to_gr_filter.keys():
|
||||
msg = 'Branch %s referenced on line'%id_ref
|
||||
msg += ' %s is not defined.'%rule.line
|
||||
raise SyntaxError(msg)
|
||||
if id_ref not in module_branches:
|
||||
msg = 'Branch %s referenced on line'%id_ref
|
||||
msg += " %s "%rule.line
|
||||
msg += "is not in module's branches statement."
|
||||
raise SyntaxError(msg)
|
||||
|
||||
# Check the validity of the Rule, GrouperRule and statements like A.bytes
|
||||
def check_qid_field_ref(self, rule, module_branches):
|
||||
for arg in rule.args:
|
||||
if type(arg) is Field:
|
||||
qid_field = arg.name
|
||||
branch, _, field = qid_field.partition('.') #Separates statements like A.bytes
|
||||
try:
|
||||
if field not in self.branches_fields[branch]:
|
||||
msg = 'Wrong field %s on line %s. '%(qid_field,
|
||||
rule.line)
|
||||
msg += 'Branch %s does not have field %s.'%(branch,
|
||||
field)
|
||||
raise SyntaxError(msg)
|
||||
except KeyError:
|
||||
msg = 'Branch %s referenced on line'%branch
|
||||
msg += ' %s is not defined'%rule.line
|
||||
raise SyntaxError(msg)
|
||||
if branch not in module_branches:
|
||||
msg = 'Branch %s referenced on line'%branch
|
||||
msg += " %s "%rule.line
|
||||
msg += "is not in module's branches statement."
|
||||
raise SyntaxError(msg)
|
||||
|
||||
# Orders merger branches with the exported module's branches being first
|
||||
def get_merger_branches_order(self, merger):
|
||||
br_order = []
|
||||
# first add export module
|
||||
for module in merger.modules:
|
||||
if module.name == merger.export:
|
||||
for br in module.branches:
|
||||
if br not in br_order:
|
||||
br_order.append(br)
|
||||
|
||||
# add all the others:
|
||||
for module in merger.modules:
|
||||
for br in module.branches:
|
||||
if br not in br_order:
|
||||
br_order.append(br)
|
||||
|
||||
return br_order
|
||||
|
||||
#
|
||||
def order_merger_rules(self, merger):
|
||||
"""
|
||||
Produces mapping between incrementally larger available branches tuples
|
||||
(A,B,C,etc) ordered as they will appear in the implementation.
|
||||
"""
|
||||
br_order = self.get_merger_branches_order(merger)
|
||||
needed_brs_to_rule = {}
|
||||
for module in merger.modules:
|
||||
replace_with_vals(module)
|
||||
replace_bound_rules(module)
|
||||
for rules in module.rules:
|
||||
rule_branches = self.get_rule_needed_branches(rules[0])
|
||||
|
||||
ordered_branches = tuple(br for br in br_order
|
||||
if br in rule_branches)
|
||||
|
||||
if len(rules) > 1:
|
||||
rule = Rule('or_op', 0, rules)
|
||||
else:
|
||||
rule = rules[0]
|
||||
needed_brs_to_rule.setdefault(ordered_branches,
|
||||
[]).append(rule)
|
||||
|
||||
avail_to_rules = {}
|
||||
tup = ()
|
||||
# create sets - needed for the set intersection operation
|
||||
needed_sets = map(set, needed_brs_to_rule.keys())
|
||||
# incrementaly add branches to the tuple of available branches
|
||||
# and check which rules have their branch needs satisfied
|
||||
for br in br_order:
|
||||
tup += (br,)
|
||||
# find how many of the needed branches are in this tuple
|
||||
# of branches. It makes elementwise intesection of the sets
|
||||
# of the needed branches and the tuple of available branches
|
||||
intersect = map(set(tup).intersection , needed_sets )
|
||||
for el, intersection, key in zip(needed_sets , intersect,
|
||||
needed_brs_to_rule.keys()):
|
||||
if len(intersection) == len(el):
|
||||
# Lenght is the same, which means all needed branches
|
||||
# are present. Remove these elements, take the rules from
|
||||
# the needed_brs_to_rule and delete the key their to
|
||||
# keep the zip() in sync
|
||||
needed_sets.remove(el)
|
||||
avail_to_rules[tup] = needed_brs_to_rule[key]
|
||||
del needed_brs_to_rule[key]
|
||||
return avail_to_rules
|
||||
|
||||
#
|
||||
def get_rule_needed_branches(self, rule):
|
||||
args_list = set()
|
||||
for sub_rule in iterate_subrules(rule):
|
||||
for arg in sub_rule.args:
|
||||
if type(arg) is Field:
|
||||
args_list.add(arg.name)
|
||||
|
||||
for arg in rule.args:
|
||||
if type(arg) is Field:
|
||||
args_list.add(arg.name)
|
||||
|
||||
if type(rule) is AllenRule:
|
||||
return list(args_list)
|
||||
|
||||
else:
|
||||
return [qid.partition('.')[0] for qid in args_list]
|
||||
|
||||
|
||||
# Validates the correctness of the merger stage
|
||||
def validate(self):
|
||||
self.check_duplicate_merger_names()
|
||||
for merger in self.mergers:
|
||||
self.check_duplicate_module_names(merger)
|
||||
for module in merger.modules:
|
||||
# Checks the whole rule list to see that all
|
||||
# the rules fall into [Rule, GrouperRule, AllenRule]
|
||||
# Returns the actual rules
|
||||
for rule in iterate_rules(module):
|
||||
# Checks that all the rule entries are correctly specified
|
||||
if type(rule) is AllenRule:
|
||||
self.check_branch_id_ref(rule, module.branches)
|
||||
else:
|
||||
self.check_qid_field_ref(rule, module.branches)
|
||||
# Orders allen operations and the branches that they influence in a reverse order
|
||||
self.order_allen_ops_args(merger)
|
||||
# Performs several checks on the branches and the operations (consistency, reachability, etc.)
|
||||
self.check_allen_ops(merger)
|
||||
|
||||
# Orders the merger modules s.t. the exported module comes first
|
||||
self.order_modules()
|
||||
# Checks that the modules are interconnected among each other with at least one branch
|
||||
self.check_for_disjoint_modules()
|
||||
|
||||
|
||||
# Get the allen indexing operations for each branch.
|
||||
def get_branches_allen_index_ops(self, merger):
|
||||
"""
|
||||
Get the allen indexing operations for each branch.
|
||||
"""
|
||||
br_to_allen_ind_ops = {}
|
||||
for module in merger.modules:
|
||||
for rules in module.rules:
|
||||
if type(rules[0]) != AllenRule:
|
||||
continue
|
||||
br = rules[0].args[0].name
|
||||
br_to_allen_ind_ops.setdefault(br, []).append(rules)
|
||||
return br_to_allen_ind_ops
|
||||
|
||||
#
|
||||
def get_rule_impl(self, rule, br_to_record):
|
||||
if type(rule) == AllenRule:
|
||||
op = find_op(rule, module='allen_ops')
|
||||
args = [ (arg.name, None)
|
||||
if type(arg) == Field else arg
|
||||
for arg in rule.args]
|
||||
else:
|
||||
args = []
|
||||
op = find_op(rule)
|
||||
for arg in rule.args:
|
||||
if type(arg) == Rule:
|
||||
arg_impl = self.get_rule_impl(arg, br_to_record)
|
||||
elif type(arg) == Field:
|
||||
branch, _, field = arg.name.partition('.')
|
||||
arg_impl = (branch, field)
|
||||
else:
|
||||
arg_impl = arg
|
||||
|
||||
args.append(arg_impl)
|
||||
return MergerRuleImpl(op, args, br_to_record)
|
||||
|
||||
# Create indexing rules implementation for AllenRules
|
||||
def get_index_rule_impl(self, rules):
|
||||
res = []
|
||||
for or_rules in rules:
|
||||
or_rules_impl = []
|
||||
for rule in or_rules:
|
||||
op = find_op(rule, 'allen_index')
|
||||
args = [arg.name if type(arg) == Field else arg
|
||||
for arg in rule.args]
|
||||
# replace with values
|
||||
args = [arg.value if type(arg) == Arg else arg
|
||||
for arg in args]
|
||||
#[<allen_index.oi object at 0x9f5adcc>, <allen_index.d object at 0x9f5ae0c>]
|
||||
or_rules_impl.append(op(*args))
|
||||
res.append(or_rules_impl)
|
||||
return res
|
||||
|
||||
# Creates a file MergedM.h5 for further storage of the merged files
|
||||
def get_merger_table_impl(self, merger):
|
||||
fields = self.megers_export_modules[merger.name].branches
|
||||
types = [UIntCol(self.id_size) for _ in fields]
|
||||
field_types = dict(zip(fields,types))
|
||||
recordClass = record.get_record_class(fields, types)
|
||||
# TODO fix file names
|
||||
fname = options.temp_path + options.merger_file_prefix
|
||||
fname += merger.name + ".h5"
|
||||
if options.delete_temp_files: if_exists_delete(fname)
|
||||
pytables.create_table_file(fname, field_types)
|
||||
mergerTable = FlowRecordsTable(fname)
|
||||
|
||||
return MergerStorage(merger.name, mergerTable, recordClass)
|
||||
|
||||
# Actual implementation of the merger stage
|
||||
def get_merger_impl(self, merger):
|
||||
# Create merger storage
|
||||
merger_table = self.get_merger_table_impl(merger)
|
||||
|
||||
# Create indexing rules implementation
|
||||
br_to_index_rule_impl = {}
|
||||
|
||||
# {'B': [[AllenRule('oi', 43, [Field('B'), Field('A')], False), AllenRule('d', 43, [Field('B'), Field('A')], False)]]}
|
||||
for br, rules in self.get_branches_allen_index_ops(merger).iteritems():
|
||||
br_to_index_rule_impl[br] = self.get_index_rule_impl(rules)# List of allen index rules implemented
|
||||
|
||||
for br in self.get_merger_branches_order(merger):#orders branches with the exported branch being first
|
||||
if br not in br_to_index_rule_impl.keys():
|
||||
br_to_index_rule_impl[br] = []
|
||||
|
||||
# some "globals" shared among branches or needed for their creation
|
||||
needed_brs = self.order_merger_rules(merger) # Re-orders the rules as they will appear in the implementation
|
||||
tup = () # tuple of available branches
|
||||
name = merger.name
|
||||
br_order = self.get_merger_branches_order(merger) # Returns reversely-ordered branch names of the merger
|
||||
export_branches = self.megers_export_modules[merger.name].branches # Returns branch names contained in the export module
|
||||
br_to_record = {}
|
||||
name_to_branch = {}
|
||||
merger_impl = None
|
||||
for br_name in br_order: # For each branch in the ordered branch set
|
||||
tup += (br_name,)
|
||||
next_branches_names = [br for br in br_order if br not in tup]
|
||||
records = self.br_name_to_gr_filter[br_name] # Group-filters associated with each branch
|
||||
index_rules = br_to_index_rule_impl[br_name] # Allen index rule associated with each branch
|
||||
index = records.index # Time index object
|
||||
if len(tup)<2: # If tuple contains only one branch, then execute the initial Merger class
|
||||
# first branch
|
||||
rules = []
|
||||
impl = MergerImpl(name, br_name, records, name_to_branch,
|
||||
next_branches_names, export_branches,
|
||||
br_to_record, index, index_rules, rules,
|
||||
merger_table)
|
||||
merger_impl = impl
|
||||
else:
|
||||
unimpl_rules = needed_brs[tup]
|
||||
rules = [self.get_rule_impl(rule, br_to_record)
|
||||
for rule in unimpl_rules]
|
||||
if br_name not in export_branches:
|
||||
# Reject branch
|
||||
impl = MergerRejectBranchImpl(br_name, records,
|
||||
name_to_branch, next_branches_names,
|
||||
export_branches, br_to_record, index,
|
||||
index_rules, rules, merger_table)
|
||||
|
||||
elif not next_branches_names:
|
||||
# Last non-rejecting branch
|
||||
impl = MergerLastBranchImpl(br_name, records,
|
||||
name_to_branch, next_branches_names,
|
||||
export_branches, br_to_record, index,
|
||||
index_rules, rules, merger_table)
|
||||
|
||||
else:
|
||||
# For normal middle branches execute the MergerBranch class
|
||||
impl = MergerBranchImpl(br_name, records, name_to_branch,
|
||||
next_branches_names, export_branches,
|
||||
br_to_record, index, index_rules,
|
||||
rules, merger_table)
|
||||
|
||||
name_to_branch[br_name] = impl
|
||||
|
||||
return merger_impl
|
||||
|
||||
def get_mergers_impl(self):
|
||||
self.validate()
|
||||
mergers_impl = [self.get_merger_impl(merger)
|
||||
for merger in self.mergers]
|
||||
|
||||
return mergers_impl
|
BIN
merger_validator.pyc
Normal file
BIN
merger_validator.pyc
Normal file
Binary file not shown.
BIN
netflow-trace.h5
Normal file
BIN
netflow-trace.h5
Normal file
Binary file not shown.
111
operators.py
Normal file
111
operators.py
Normal file
|
@ -0,0 +1,111 @@
|
|||
import options
|
||||
from socket import getprotobyname
|
||||
|
||||
|
||||
if options.import_ops:
|
||||
external_import = __import__(options.import_ops)
|
||||
|
||||
def NOT(op):
|
||||
def not_op(*args):
|
||||
op_result = op(*args)
|
||||
return not op_result
|
||||
|
||||
return not_op
|
||||
|
||||
def and_op(*args, **kwargs):
|
||||
res = True
|
||||
|
||||
for arg in args:
|
||||
res = res and arg
|
||||
|
||||
for arg in kwargs.values():
|
||||
res = res and arg
|
||||
|
||||
return res
|
||||
|
||||
def bitAND(*args):
|
||||
res = args[0]
|
||||
|
||||
for arg in args[1:]:
|
||||
res &= arg
|
||||
|
||||
return res
|
||||
|
||||
def bitOR(*args):
|
||||
res = args[0]
|
||||
|
||||
for arg in args[1:]:
|
||||
res |= arg
|
||||
|
||||
return res
|
||||
|
||||
def or_op(*args, **kwargs):
|
||||
res = False
|
||||
|
||||
for arg in args:
|
||||
res = res or arg
|
||||
|
||||
for arg in kwargs.values():
|
||||
res = res or arg
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def protocol(name):
|
||||
return getprotobyname(name)
|
||||
|
||||
def SUM(*args):
|
||||
sum = 0
|
||||
for arg in args:
|
||||
sum += arg
|
||||
return sum
|
||||
|
||||
def EQ(*args):
|
||||
prev_arg = args[0]
|
||||
result = True
|
||||
for arg in args[1:]:
|
||||
result = result and prev_arg == arg
|
||||
prev_arg = arg
|
||||
return result
|
||||
|
||||
def LT(*args):
|
||||
prev_arg = args[0]
|
||||
result = True
|
||||
for arg in args[1:]:
|
||||
result = result and prev_arg < arg
|
||||
prev_arg = arg
|
||||
return result
|
||||
|
||||
def GT(*args):
|
||||
prev_arg = args[0]
|
||||
result = True
|
||||
for arg in args[1:]:
|
||||
result = result and prev_arg > arg
|
||||
prev_arg = arg
|
||||
return result
|
||||
|
||||
def GTEQ(*args):
|
||||
prev_arg = args[0]
|
||||
result = True
|
||||
for arg in args[1:]:
|
||||
result = result and prev_arg >= arg
|
||||
prev_arg = arg
|
||||
return result
|
||||
|
||||
def LTEQ(*args):
|
||||
prev_arg = args[0]
|
||||
result = True
|
||||
for arg in args[1:]:
|
||||
result = result and prev_arg <= arg
|
||||
prev_arg = arg
|
||||
return result
|
||||
|
||||
def IN(*args):
|
||||
last_arg = args[-1] # probably subnet mask
|
||||
result = True
|
||||
for arg in args[:-1]:
|
||||
result = result and arg & last_arg
|
||||
return result
|
||||
|
||||
def true(*args):
|
||||
return True
|
BIN
operators.pyc
Normal file
BIN
operators.pyc
Normal file
Binary file not shown.
19
options.py
Normal file
19
options.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
import_ops = "custops"
|
||||
import_grouper_ops = None
|
||||
|
||||
|
||||
delete_temp_files = True
|
||||
time_index_interval_ms = 5000
|
||||
unsat_delta_threshold_mul = 10
|
||||
max_unsatisfiable_deltas = 20
|
||||
|
||||
do_not_expand_groups = False
|
||||
|
||||
temp_path = "./flowy-run/"
|
||||
import os
|
||||
try:
|
||||
os.mkdir(temp_path)
|
||||
except OSError:
|
||||
pass
|
||||
groups_file_prefix = "Groups"
|
||||
merger_file_prefix = "Merged"
|
BIN
options.pyc
Normal file
BIN
options.pyc
Normal file
Binary file not shown.
4298
parser.out
Normal file
4298
parser.out
Normal file
File diff suppressed because it is too large
Load diff
931
parser.py
Normal file
931
parser.py
Normal file
|
@ -0,0 +1,931 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import ply.lex as lex
|
||||
import ply.yacc as yacc
|
||||
from statement import *
|
||||
from ply.yacc import YaccError
|
||||
import netaddr
|
||||
|
||||
|
||||
class Lexer(object):
|
||||
def __init__(self,**kwargs):
|
||||
self.lexer = lex.lex(module=self, **kwargs)
|
||||
|
||||
reserved = {
|
||||
'splitter' : 'splitterKeyword',
|
||||
'groupfilter' : 'groupFilterKeyword',
|
||||
'filter' : 'filterKeyword',
|
||||
'grouper' : 'grouperKeyword',
|
||||
'module' : 'moduleKeyword',
|
||||
'merger' : 'mergerKeyword',
|
||||
'export' : 'exportKeyword',
|
||||
'ungrouper' : 'ungrouperKeyword',
|
||||
'branches' : 'branchesKeyword',
|
||||
'branch' : 'branchKeyword',
|
||||
'aggregate' : 'aggregateKeyword',
|
||||
'as' : 'asKeyword',
|
||||
'min' : 'minKeyword',
|
||||
'max' : 'maxKeyword',
|
||||
'avg' : 'avgKeyword',
|
||||
'sum' : 'sumKeyword',
|
||||
'count' : 'countKeyword',
|
||||
'union' : 'unionKeyword',
|
||||
'in' : 'inKeyword',
|
||||
'notin' : 'notinKeyword',
|
||||
'OR' : 'ORKeyword',
|
||||
'NOT' : 'NOTKeyword',
|
||||
'bitOR': 'bitORKeyword',
|
||||
'bitAND' : 'bitANDKeyword',
|
||||
'm' : 'mKeyword',
|
||||
'mi' : 'miKeyword',
|
||||
'o' : 'oKeyword',
|
||||
'oi' : 'oiKeyword',
|
||||
's' : 'sKeyword',
|
||||
'si' : 'siKeyword',
|
||||
'd' : 'dKeyword',
|
||||
'di' : 'diKeyword',
|
||||
'f' : 'fKeyword',
|
||||
'fi' : 'fiKeyword',
|
||||
'eq' : 'eqKeyword', # prevent clash with = for match rules
|
||||
'delta': 'deltaKeyword',
|
||||
'rdelta' : 'rdeltaKeyword',
|
||||
'ms' : 'msKeyword'
|
||||
}
|
||||
|
||||
|
||||
def t_LTEQ(self, t):
|
||||
r'<='
|
||||
t.value = 'LTEQ'
|
||||
return t
|
||||
|
||||
def t_GTEQ(self, t):
|
||||
r'>='
|
||||
t.value = 'GTEQ'
|
||||
return t
|
||||
|
||||
def t_ML(self, t):
|
||||
r'<<'
|
||||
t.value = 'ML'
|
||||
return t
|
||||
|
||||
def t_MG(self, t):
|
||||
r'>>'
|
||||
t.value = 'MG'
|
||||
return t
|
||||
|
||||
def t_LT(self, t):
|
||||
r'<'
|
||||
t.value = 'LT'
|
||||
return t
|
||||
|
||||
def t_EQ(self, t):
|
||||
r'='
|
||||
t.value = 'EQ'
|
||||
return t
|
||||
|
||||
def t_GT(self, t):
|
||||
r'>'
|
||||
t.value = 'GT'
|
||||
return t
|
||||
|
||||
|
||||
tokens = ['id', 'LT', 'EQ', 'GT',
|
||||
'LTEQ', 'GTEQ', 'ML', 'MG',
|
||||
'MAC', 'IPv4', 'IPv6',
|
||||
'int', 'float', 'hex',
|
||||
'string'] + list(reserved.values())
|
||||
|
||||
t_ignore = ' \t'
|
||||
t_ignore_comment = r'\#.*'
|
||||
|
||||
literals = "+-*/(){},."
|
||||
|
||||
def t_string(self, t):
|
||||
r'"[^"\\\r\n]*(?:\\.[^"\\\r\n]*)*"'
|
||||
t.value = Arg("string", t.value[1:-1].replace("\\",''), t.value)
|
||||
return t
|
||||
|
||||
def t_IPv4(self, t):
|
||||
r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
|
||||
#the regex does include invalid IPs but they are
|
||||
#checked later during conversion
|
||||
try:
|
||||
t.value =Arg("addr_IPv4", int(netaddr.IP(t.value)), t.value)
|
||||
return t
|
||||
except netaddr.AddrFormatError:
|
||||
message = 'Bad IPv4 format %s at line %s' %(t.value,
|
||||
t.lexer.lineno)
|
||||
raise SyntaxError(message)
|
||||
|
||||
def t_MAC(self, t):
|
||||
r'([a-fA-F0-9]{2}[:\-]){5}[a-fA-F0-9]{2}'
|
||||
try:
|
||||
t.value = Arg("addr_MAC", int(netaddr.EUI(t.value)), t.value)
|
||||
return t
|
||||
except netaddr.AddrFormatError:
|
||||
message = 'Bad MAC format %s at line %s' %(t.value,
|
||||
t.lexer.lineno)
|
||||
raise SyntaxError(message)
|
||||
|
||||
def t_IPv6(self, t):
|
||||
r'(::[0-9a-f]{1,4}[0-9a-f:]*)|([0-9a-f]:[0-9a-f:]*)'
|
||||
# the regular expression is very genral, so this rule should be
|
||||
# after the other address rules.
|
||||
try:
|
||||
t.value = Arg("addr_IPv6", int(netaddr.IP(t.value)), t.value)
|
||||
return t
|
||||
except netaddr.AddrFormatError:
|
||||
message = 'Bad IPv6 format %s at line %s' %(t.value,
|
||||
t.lexer.lineno)
|
||||
raise SyntaxError(message)
|
||||
|
||||
def t_float(self, t):
|
||||
'[0-9]*\.[0-9]+([eE][+-]?[0-9]+)?'
|
||||
t.value = Arg("float", float(t.value), t.value)
|
||||
return t
|
||||
|
||||
def t_hex(self, t):
|
||||
r'0[xX][0-9a-fA-F]+'
|
||||
t.value = Arg("int", int(t.value, 0), t.value)
|
||||
return t
|
||||
|
||||
def t_int(self, t):
|
||||
r'\d+'
|
||||
t.value = Arg("int", int(t.value), t.value)
|
||||
return t
|
||||
#All the reserved words are matched in this rule
|
||||
def t_id(self, t):
|
||||
r'[a-zA-Z_][a-zA-Z_0-9]*'
|
||||
# matches also keywords, so be careful
|
||||
t.type = self.reserved.get(t.value,'id') # Check for reserved words
|
||||
return t
|
||||
|
||||
def t_newline(self, t):
|
||||
r'\n+'
|
||||
t.lexer.lineno += len(t.value)
|
||||
|
||||
# Error handling rule
|
||||
def t_error(self,t):
|
||||
msg = "Illegal character '%s'" % t.value[0]
|
||||
raise SyntaxError(msg)
|
||||
|
||||
# Test it output
|
||||
def test(self,data):
|
||||
self.lexer.input(data)
|
||||
while True:
|
||||
tok = self.lexer.token()
|
||||
if not tok: break
|
||||
print tok
|
||||
|
||||
class Parser(object):
|
||||
# the tokens from the lexer class:
|
||||
tokens = Lexer.tokens
|
||||
|
||||
def __init__(self):
|
||||
self.filters = []
|
||||
self.groupers = []
|
||||
self.splitter = None
|
||||
self.group_filters = []
|
||||
self.mergers = []
|
||||
self.branch_names = set()
|
||||
self.ungroupers = []
|
||||
self.branches = []
|
||||
self.input = None
|
||||
self.outputs = []
|
||||
self.names = {}
|
||||
self.lexer = Lexer().lexer
|
||||
self.parser = yacc.yacc(module=self)
|
||||
|
||||
def p_file(self,p):
|
||||
'''file : pipeline_stage_1n'''
|
||||
# for k, v in self.names.iteritems():
|
||||
# print k, v
|
||||
|
||||
def p_pipeline_stage_1n(self,p):
|
||||
'pipeline_stage_1n : pipeline_stage pipeline_stage_1n'
|
||||
# add a name mapping:
|
||||
try:
|
||||
# branch statements dont have names
|
||||
# so we skip them with try/except
|
||||
self.names[p[1].name] = p[1]
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
def p_pipeline_stage_end(self,p):
|
||||
'pipeline_stage_1n :'
|
||||
|
||||
def p_pipeline_stage(self,p):
|
||||
'''
|
||||
pipeline_stage : splitter
|
||||
| filter
|
||||
| composite_filter
|
||||
| branch
|
||||
| ungrouper
|
||||
| grouper
|
||||
| group_filter
|
||||
| merger
|
||||
'''
|
||||
|
||||
p[0] = p[1]
|
||||
|
||||
def p_splitter(self,p):
|
||||
'''
|
||||
splitter : splitterKeyword id '{' '}'
|
||||
'''
|
||||
p[0] = Splitter(p[2], p.lineno(2))
|
||||
if self.splitter != None:
|
||||
raise SyntaxError(
|
||||
"More than one splitter defined in file at line %s",p.lineno(2))
|
||||
|
||||
self.splitter = p[0]
|
||||
|
||||
def p_filter(self,p):
|
||||
'''
|
||||
filter : filterKeyword id '{' filter_rule_1n '}'
|
||||
'''
|
||||
# Note that p[4] is a list of lists of rules.
|
||||
# If the list has one element the rule is simple.
|
||||
# If the rule has more than one element, the
|
||||
# rule is OR-ed of all the rules in the list
|
||||
p[0] = Filter(p[2], p.lineno(2), p[4])
|
||||
self.filters.append(p[0])
|
||||
|
||||
|
||||
def p_composite_filter(self, p):
|
||||
'''
|
||||
composite_filter : filterKeyword id '{' filter_ref_rule_1n '}'
|
||||
'''
|
||||
# Note that p[4] is a list of lists of rules.
|
||||
# If the list has one element the rule is simple.
|
||||
# If the rule has more than one element, the
|
||||
# rule is OR-ed of all the rules in the list
|
||||
p[0] = Filter(p[2], p.lineno(2), p[4])
|
||||
self.filters.append(p[0])
|
||||
|
||||
def p_group_filter(self, p):
|
||||
'''
|
||||
group_filter : groupFilterKeyword id '{' filter_rule_1n '}'
|
||||
'''
|
||||
# Note that p[4] is a list of lists of rules.
|
||||
# If the list has one element the rule is simple.
|
||||
# If the rule has more than one element, the
|
||||
# rule is OR-ed of all the rules in the list
|
||||
p[0] = Filter(p[2], p.lineno(2), p[4])
|
||||
self.group_filters.append(p[0])
|
||||
|
||||
def p_filter_rule_1n(self,p):
|
||||
'filter_rule_1n : filter_rule filter_rule_1n'
|
||||
p[2].extend([p[1]])
|
||||
p[0] = p[2]
|
||||
|
||||
def p_filter_rule_0(self,p):
|
||||
'filter_rule_1n :'
|
||||
p[0] = []
|
||||
|
||||
def p_filter_rule(self,p):
|
||||
'''
|
||||
filter_rule : or_rule
|
||||
'''
|
||||
p[0] = p[1]
|
||||
|
||||
def p_filter_ref_rule_1n(self,p):
|
||||
'filter_ref_rule_1n : filter_ref_rule filter_ref_rule_1n'
|
||||
p[2].extend([p[1]])
|
||||
p[0] = p[2]
|
||||
|
||||
def p_filter_ref_rule_0(self,p):
|
||||
'filter_ref_rule_1n : filter_ref_rule'
|
||||
p[0] = [p[1]]
|
||||
|
||||
def p_filter_ref_rule(self,p):
|
||||
'''
|
||||
filter_ref_rule : or_id
|
||||
'''
|
||||
p[0] = p[1]
|
||||
|
||||
def p_or_id(self, p):
|
||||
'or_id : not_id opt_or_id'
|
||||
p[1].extend(p[2])
|
||||
p[0] = p[1]
|
||||
|
||||
def p_opt_or_id(self, p):
|
||||
'''
|
||||
opt_or_id : ORKeyword not_id opt_or_id
|
||||
'''
|
||||
p[2].extend(p[3])
|
||||
p[0] = p[2]
|
||||
|
||||
def p_opt_or_id_end(self, p):
|
||||
'opt_or_id :'
|
||||
p[0] = []
|
||||
|
||||
def p_not_id(self, p):
|
||||
'''
|
||||
not_id : NOTKeyword id
|
||||
| id
|
||||
'''
|
||||
try:
|
||||
p[0] = [FilterRef(p[2], p.lineno(2), True)]
|
||||
except IndexError:
|
||||
p[0] = [FilterRef(p[1], p.lineno(1))]
|
||||
|
||||
def p_or_optrule(self,p):
|
||||
'or_rule : rule_or_not opt_rule'
|
||||
if len(p[2]) > 0:
|
||||
ors = [p[1]]
|
||||
ors.extend(p[2])
|
||||
p[0] = ors
|
||||
else:
|
||||
p[0] = [p[1]]
|
||||
|
||||
def p_or_rule(self, p):
|
||||
'opt_rule : ORKeyword rule_or_not opt_rule'
|
||||
res = [p[2]]
|
||||
res.extend(p[3])
|
||||
p[0] = res
|
||||
|
||||
def p_term_opt_rule(self,p):
|
||||
'opt_rule :'
|
||||
p[0] = []
|
||||
|
||||
def p_rule_or_not(self, p):
|
||||
'''
|
||||
rule_or_not : rule
|
||||
| NOTKeyword rule
|
||||
'''
|
||||
try:
|
||||
p[2].NOT = True
|
||||
p[0] = p[2]
|
||||
except IndexError:
|
||||
p[0] = p[1]
|
||||
|
||||
def p_rule(self,p):
|
||||
'''
|
||||
rule : infix_rule
|
||||
| prefix_rule
|
||||
'''
|
||||
p[0] = p[1]
|
||||
|
||||
def p_infix_rule(self,p):
|
||||
'infix_rule : arg op arg'
|
||||
p[1].extend(p[3]) # concatenate args to get [arg, arg]
|
||||
# for some unknown reason p.lineno(2) does not work in this production
|
||||
# so p[2] is (op, lineno)
|
||||
p[0] = Rule(p[2][0], p[2][1], p[1]) # (op, line, args) From filter.py
|
||||
|
||||
def p_op(self, p):
|
||||
'''
|
||||
op : EQ
|
||||
| LT
|
||||
| GT
|
||||
| LTEQ
|
||||
| GTEQ
|
||||
| ML
|
||||
| MG
|
||||
| inKeyword
|
||||
| notinKeyword
|
||||
'''
|
||||
p[0] = (p[1], p.lineno(1))
|
||||
|
||||
def p_rule_prefix(self,p):
|
||||
'''
|
||||
prefix_rule : id '(' args ')'
|
||||
| bitANDKeyword '(' args ')'
|
||||
| bitORKeyword '(' args ')'
|
||||
'''
|
||||
p[0] = Rule(p[1], p.lineno(1), p[3])
|
||||
|
||||
def p_args(self,p):
|
||||
'''
|
||||
args : arg ',' args
|
||||
'''
|
||||
p[0] = p[1]
|
||||
p[0].extend(p[3]) # concatenate the rest of the args to arg
|
||||
|
||||
def p_args_more(self,p):
|
||||
'args : arg'
|
||||
p[0] = p[1]
|
||||
|
||||
def p_no_args(self, p):
|
||||
'args :'
|
||||
p[0] = []
|
||||
|
||||
def p_arg(self, p):
|
||||
'''
|
||||
arg : id
|
||||
| IPv4
|
||||
| IPv6
|
||||
| CIDR
|
||||
| MAC
|
||||
| int
|
||||
| float
|
||||
| hex
|
||||
| prefix_rule
|
||||
| string
|
||||
'''
|
||||
if type(p[1]) is type("string"):
|
||||
p[1] = Field(p[1]) # Was defined in filter.py, but the definition was commented out.
|
||||
p[0] = [p[1]] # list of one element for easy [].extend later
|
||||
|
||||
def p_cidr(self, p):
|
||||
'''
|
||||
CIDR : IPv4 '/' int
|
||||
| IPv6 '/' int
|
||||
'''
|
||||
p[0] = Rule('cidr_mask', p[1], p[3])
|
||||
|
||||
def p_start_branch(self, p):
|
||||
'''
|
||||
branch : id arrow mid_branch
|
||||
'''
|
||||
br = [BranchNode(p[1], p.lineno(1))] # In statement.py
|
||||
br.extend(p[3])
|
||||
p[0] = br
|
||||
self.branches.append(p[0])
|
||||
|
||||
def p_input_branch(self, p):
|
||||
'''
|
||||
branch : string arrow mid_branch
|
||||
'''
|
||||
if self.input != None:
|
||||
raise SyntaxError("More than one input defined in file at line %s",
|
||||
p.lineno(1))
|
||||
self.input = Input(p[1].value, p.lineno(1))
|
||||
br = [self.input]
|
||||
br.extend(p[3])
|
||||
p[0] = br
|
||||
self.branches.append(p[0])
|
||||
|
||||
def p_split_branch(self, p):
|
||||
'''
|
||||
branch : id branchKeyword mid_branch
|
||||
'''
|
||||
br = [BranchNode(p[1], p.lineno(1))]
|
||||
p[3][0] = Branch(p[3][0].name, p[3][0].line)
|
||||
br.extend(p[3])
|
||||
p[0] = br
|
||||
self.branches.append(p[0])
|
||||
|
||||
def p_mid_branch(self, p):
|
||||
'''
|
||||
mid_branch : id arrow mid_branch
|
||||
'''
|
||||
br = [BranchNode(p[1], p.lineno(1))]
|
||||
br.extend(p[3])
|
||||
p[0] = br
|
||||
|
||||
|
||||
def p_mid_branch_terminate(self, p):
|
||||
'''
|
||||
mid_branch : end_branch
|
||||
'''
|
||||
p[0] = p[1]
|
||||
|
||||
def p_end_branch(self, p):
|
||||
'end_branch : id'
|
||||
p[0] = [BranchNode(p[1], p.lineno(1))]
|
||||
|
||||
def p_output_branch(self, p):
|
||||
'end_branch : string'
|
||||
out = Output(p[1].value, p.lineno(1))
|
||||
self.outputs.append(out)
|
||||
p[0] = [out]
|
||||
|
||||
|
||||
def p_arrow(self, p):
|
||||
"""arrow : "-" GT"""
|
||||
pass
|
||||
|
||||
def p_ungrouper(self, p):
|
||||
'''
|
||||
ungrouper : ungrouperKeyword id '{' '}'
|
||||
'''
|
||||
p[0] = Ungrouper(p[2], p.lineno(2))
|
||||
self.ungroupers.append(p[0])
|
||||
|
||||
def p_grouper(self, p):
|
||||
"grouper : grouperKeyword id '{' module1_n aggregate '}'"
|
||||
p[0] = Grouper(p[2], p.lineno(2), p[4], p[5])
|
||||
# insert aggregation of record ids (needed for ungrouping later)
|
||||
p[0].aggr.insert(0,(Rule('union', p.lineno(2), [Field('rec_id'),
|
||||
'records'])))
|
||||
p[0].aggr.insert(0,(Rule('min', p.lineno(2), [Field('stime'),
|
||||
'stime'])))
|
||||
p[0].aggr.insert(0,(Rule('max', p.lineno(2), [Field('etime'),
|
||||
'etime'])))
|
||||
self.groupers.append(p[0])
|
||||
|
||||
def p_module1_n(self, p):
|
||||
'module1_n : module module1_n'
|
||||
p[1].extend(p[2])
|
||||
p[0] = p[1]
|
||||
|
||||
def p_module0(self, p):
|
||||
'module1_n :'
|
||||
p[0] = []
|
||||
|
||||
def p_module(self, p):
|
||||
"module : moduleKeyword id '{' grouper_rule1_n '}'"
|
||||
p[0] = [Module(p[2], p.lineno(2), p[4])]
|
||||
|
||||
def p_grouper_rule1_n(self, p):
|
||||
'grouper_rule1_n : grouper_rule grouper_rule1_n'
|
||||
p[1].extend(p[2])
|
||||
p[0] = p[1]
|
||||
|
||||
def p_grouper_rule0(self, p):
|
||||
'grouper_rule1_n :'
|
||||
p[0] = []
|
||||
|
||||
def p_grouper_rule(self, p):
|
||||
'grouper_rule : id grouper_op id'
|
||||
p[0] = [[GrouperRule(p[2], p.lineno(2), [Field(p[1]), Field(p[3]),
|
||||
None, False])]]
|
||||
|
||||
def p_grouper_rule_delta(self, p):
|
||||
'''
|
||||
grouper_rule : id grouper_op id deltaKeyword delta_arg
|
||||
'''
|
||||
p[0] = [[GrouperRule(p[2], p.lineno(2), [Field(p[1]), Field(p[3]),
|
||||
p[5], False])]]
|
||||
|
||||
def p_grouper_rule_rel_delta(self, p):
|
||||
'''
|
||||
grouper_rule : id grouper_op id rdeltaKeyword delta_arg
|
||||
'''
|
||||
p[0] = [[GrouperRule(p[2], p.lineno(2), [Field(p[1]), Field(p[3]),
|
||||
p[5], True])]]
|
||||
|
||||
def p_grouper_op(self, p):
|
||||
'''
|
||||
grouper_op : EQ
|
||||
| LT
|
||||
| GT
|
||||
| GTEQ
|
||||
| LTEQ
|
||||
'''
|
||||
p[0] = p[1]
|
||||
def p_delta_arg(self, p):
|
||||
'''
|
||||
delta_arg : time
|
||||
| int
|
||||
'''
|
||||
p[0] = p[1]
|
||||
|
||||
def p_time(self, p):
|
||||
'''
|
||||
time : int sKeyword
|
||||
| int msKeyword
|
||||
| int minKeyword
|
||||
'''
|
||||
# the number should be in ms:
|
||||
if p[2] == 's':
|
||||
p[1].value = p[1].value * 1000
|
||||
if p[2] == 'min':
|
||||
p[1].value = p[1].value * 60 * 1000
|
||||
p[0] = p[1]
|
||||
|
||||
def p_aggregate(self, p):
|
||||
'aggregate : aggregateKeyword aggr1_n'
|
||||
for aggr in p[2]:
|
||||
if aggr.line == 0:
|
||||
aggr.line = p.lineno(1)
|
||||
p[0] = p[2]
|
||||
|
||||
def p_aggr1_n(self, p):
|
||||
'aggr1_n : aggr opt_aggr'
|
||||
p[1].extend(p[2])
|
||||
p[0] = p[1]
|
||||
|
||||
def p_opt_aggr(self, p):
|
||||
"opt_aggr : ',' aggr opt_aggr"
|
||||
p[2].extend(p[3])
|
||||
p[0] = p[2]
|
||||
|
||||
def p_opt_aggr_end(self, p):
|
||||
'opt_aggr :'
|
||||
p[0] = []
|
||||
|
||||
def p_aggr(self, p):
|
||||
"aggr : aggr_op '(' id_or_qid ')' asKeyword id"
|
||||
args = [Field(p[3]), p[6]] # [id_or_qid, id, aggr_op]
|
||||
p[0] = [Rule(p[1], p.lineno(4), args)]
|
||||
|
||||
def p_simple_agg(self, p):
|
||||
'aggr : id_or_qid asKeyword id'
|
||||
args = [Field(p[1]), p[3]] # [qid, id]
|
||||
p[0] = [Rule('last', p.lineno(2), args)]
|
||||
|
||||
def p_simple_agg_same_name(self, p):
|
||||
'aggr : id_or_qid'
|
||||
args = [Field(p[1]), p[1]] # [qid, id]
|
||||
p[0] = [Rule('last', p.lineno(1), args)]
|
||||
|
||||
def p_qid(self, p):
|
||||
'''
|
||||
qid : id '.' id
|
||||
'''
|
||||
p[0] = p[1] + p[2] + p[3]
|
||||
|
||||
def p_id_or_qid(self, p):
|
||||
'''
|
||||
id_or_qid : id
|
||||
| qid
|
||||
'''
|
||||
p[0] = p[1]
|
||||
|
||||
def p_aggr_op(self, p):
|
||||
'''
|
||||
aggr_op : minKeyword
|
||||
| maxKeyword
|
||||
| sumKeyword
|
||||
| avgKeyword
|
||||
| unionKeyword
|
||||
| countKeyword
|
||||
| bitANDKeyword
|
||||
| bitORKeyword
|
||||
'''
|
||||
p[0] = p[1]
|
||||
|
||||
def p_merger(self, p):
|
||||
"merger : mergerKeyword id '{' merger_module1_n export '}'"
|
||||
p[0] = Merger(p[2], p.lineno(2), p[4], p[5])
|
||||
self.mergers.append(p[0])
|
||||
|
||||
|
||||
def p_merger_module1_n(self, p):
|
||||
'merger_module1_n : merger_module merger_module1_n'
|
||||
p[1].extend(p[2])
|
||||
p[0] = p[1]
|
||||
|
||||
def p_merger_module0(self, p):
|
||||
'merger_module1_n : '
|
||||
p[0] = []
|
||||
|
||||
def p_merger_module(self, p):
|
||||
"""
|
||||
merger_module : moduleKeyword id '{' merger_branches merger_rule1_n '}'
|
||||
"""
|
||||
p[0] = [Module(p[2], p.lineno(2), p[5], p[4])]
|
||||
|
||||
def p_merger_branches(self, p):
|
||||
'merger_branches : branchesKeyword branches1_n'
|
||||
p[0] = p[2]
|
||||
|
||||
def p_branches1_n(self, p):
|
||||
"""
|
||||
branches1_n : id ',' branches1_n
|
||||
"""
|
||||
p[0] = [p[1]]
|
||||
p[0].extend(p[3])
|
||||
|
||||
def p_branches1(self, p):
|
||||
' branches1_n : id'
|
||||
p[0] = [p[1]]
|
||||
|
||||
def p_export(self, p):
|
||||
'export : exportKeyword id'
|
||||
p[0] = p[2]
|
||||
|
||||
def p_merger_rule1_n(self, p):
|
||||
'merger_rule1_n : merger_rule merger_rule1_n'
|
||||
p[1].extend(p[2])
|
||||
p[0] = p[1]
|
||||
|
||||
def p_merger_rule0(self,p):
|
||||
'merger_rule1_n :'
|
||||
p[0] = []
|
||||
|
||||
def p_merger_rule(self, p):
|
||||
'''
|
||||
merger_rule : merger_prefix_rule
|
||||
| merger_infix_rule
|
||||
'''
|
||||
p[0] = [[p[1]]]
|
||||
|
||||
def p_not_merger_rule(self, p):
|
||||
'''
|
||||
merger_rule : NOTKeyword merger_prefix_rule
|
||||
| NOTKeyword merger_infix_rule
|
||||
'''
|
||||
p[2].NOT = True
|
||||
p[0] = [[p[2]]]
|
||||
|
||||
def p_merger_infix_rule(self, p):
|
||||
'merger_infix_rule : qid_arg op qid_arg'
|
||||
p[1].extend(p[3])
|
||||
p[0] = Rule(p[2][0], p[2][1], p[1])
|
||||
|
||||
def p_merger_prefix_rule(self,p):
|
||||
'''
|
||||
merger_prefix_rule : id '(' qid_args ')'
|
||||
'''
|
||||
p[0] = Rule(p[1], p.lineno(1), p[3])
|
||||
|
||||
def p_qid_args(self,p):
|
||||
'''
|
||||
qid_args : qid_arg ',' qid_args
|
||||
'''
|
||||
p[0] = p[1]
|
||||
p[0].extend(p[3]) # concatenate the rest of the args to arg
|
||||
|
||||
def p__qid_args_more(self,p):
|
||||
'qid_args : qid_arg'
|
||||
p[0] = p[1]
|
||||
|
||||
def p_no_qid_args(self, p):
|
||||
'qid_args :'
|
||||
p[0] = []
|
||||
|
||||
def p_qid_arg(self, p):
|
||||
'''
|
||||
qid_arg : qid
|
||||
| IPv4
|
||||
| IPv6
|
||||
| CIDR
|
||||
| MAC
|
||||
| int
|
||||
| float
|
||||
| hex
|
||||
| merger_prefix_rule
|
||||
| string
|
||||
'''
|
||||
if type(p[1]) is type("string"):
|
||||
p[1] = Field(p[1])
|
||||
p[0] = [p[1]] # list of one element for easy [].extend later
|
||||
|
||||
def p_merger_rule_al_op(self, p):
|
||||
'merger_rule : allen_rule opt_or_allen_rule'
|
||||
p[1].extend(p[2])
|
||||
p[0] = [p[1]]
|
||||
|
||||
def p_opt_or_allen_rule(self, p):
|
||||
'opt_or_allen_rule : ORKeyword allen_rule opt_or_allen_rule'
|
||||
p[2].extend(p[3])
|
||||
p[0] = p[2]
|
||||
|
||||
def p_opt_op_rule_end(self, p):
|
||||
'opt_or_allen_rule : '
|
||||
p[0] = []
|
||||
|
||||
def p_allen_rule(self, p):
|
||||
'allen_rule : id allen_op id opt_allen_delta'
|
||||
args = [Field(p[1]), Field(p[3])]
|
||||
args.extend(p[4]) # add the delta time to [arg, arg]
|
||||
p[0] = [AllenRule(p[2], p.lineno(1), args)] # (op, line, args)
|
||||
|
||||
def p_opt_allen_delta(self, p):
|
||||
'''
|
||||
opt_allen_delta : deltaKeyword time
|
||||
'''
|
||||
p[0] = [p[2]]
|
||||
|
||||
def p_no_allen_delta(self, p):
|
||||
'opt_allen_delta :'
|
||||
p[0] = []
|
||||
|
||||
def p_allen_op(self, p):
|
||||
'''
|
||||
allen_op : LT
|
||||
| GT
|
||||
| EQ
|
||||
| mKeyword
|
||||
| miKeyword
|
||||
| oKeyword
|
||||
| oiKeyword
|
||||
| sKeyword
|
||||
| siKeyword
|
||||
| dKeyword
|
||||
| diKeyword
|
||||
| fKeyword
|
||||
| fiKeyword
|
||||
| eqKeyword
|
||||
'''
|
||||
# for some strange reason upper level refuses to recognize lineno:
|
||||
p[0] = p[1]
|
||||
|
||||
def p_error(self, p):
|
||||
msg ="Syntax error. Unexpected token "
|
||||
msg +="%s (%s)"%(p.value, p.type)
|
||||
msg += " at line %s"% self.lexer.lineno
|
||||
raise SyntaxError(msg)
|
||||
|
||||
def parse(self, text):
|
||||
self.parser.parse(text, lexer=self.lexer) # parse method is called from ply.yacc
|
||||
self.resolve_branches()
|
||||
|
||||
def find_io_nodes(self):
|
||||
'''
|
||||
Finds which branch nodes are inputs and which are outputs.
|
||||
The rest of the branches are processing stages.
|
||||
'''
|
||||
|
||||
pass
|
||||
|
||||
def check_branching(self):
|
||||
pass
|
||||
|
||||
def check_branch_nodes(self):
|
||||
for b in self.branch_nodes.values():
|
||||
if not b.is_branch:
|
||||
try:
|
||||
node = self.names[b.name]
|
||||
if len(b.inputs) == 0:
|
||||
msg = "Node %s at line" % b.name
|
||||
msg += " %s does not have input." % b.line
|
||||
raise SyntaxError(msg)
|
||||
if len(b.outputs) == 0:
|
||||
msg = "Node %s at line" % b.name
|
||||
msg += " %s does not have output." % b.line
|
||||
raise SyntaxError(msg)
|
||||
if len(b.inputs) > 1 and type(node) is not Merger:
|
||||
msg = "Non-Merger node %s at line" % b.name
|
||||
msg += " %s has more than one input." % b.line
|
||||
raise SyntaxError(msg)
|
||||
if len(b.outputs) > 1 and type(node) is not Splitter:
|
||||
msg = "Non-Splitter node %s at line" % b.name
|
||||
msg += " %s has more than one output." % b.line
|
||||
raise SyntaxError(msg)
|
||||
|
||||
except KeyError:
|
||||
# check whether this is some middle node
|
||||
if len(b.inputs) != 0 and len(b.outputs) !=0:
|
||||
msg = "Node %s refferenced at line" % b.name
|
||||
msg += " %s not defined" % b.line
|
||||
raise SyntaxError(msg)
|
||||
|
||||
#check whether the node name is actually parser string(Arg)
|
||||
if type(b.name) is not Arg:
|
||||
msg = "Node %s refferenced at line" % b.name
|
||||
msg += " %s not defined" % b.line
|
||||
raise SyntaxError(msg)
|
||||
else:
|
||||
if len(b.inputs) != 1 or len(b.outputs) != 1:
|
||||
msg = "Branch Node %s at line" % b.name
|
||||
msg += " %s must have 1 input and 1 output." % b.line
|
||||
raise SyntaxError(msg)
|
||||
|
||||
|
||||
|
||||
def resolve_branches(self):
|
||||
noname_branchings = []
|
||||
for branch in self.branches:
|
||||
# print branch
|
||||
# print ""
|
||||
br_name = False
|
||||
br_index = 0
|
||||
for i, node in enumerate(branch):
|
||||
if type(node) is BranchNode:
|
||||
try:
|
||||
branch[i] = self.names[node.name]
|
||||
except KeyError:
|
||||
msg = "Node %s refferenced at line" % node.name
|
||||
msg += " %s not defined" % node.line
|
||||
raise SyntaxError(msg)
|
||||
if type(node) is Branch:
|
||||
br_name = node.name
|
||||
br_index = i
|
||||
self.branch_names.add(br_name)
|
||||
|
||||
if type(node) is Input and i != 0:
|
||||
msg = "Input node %s at line" % node.name
|
||||
msg += " %s should be at first posigion" % node.line
|
||||
msg += " of branching statement"
|
||||
raise SyntaxError(msg)
|
||||
|
||||
if type(node) is Output and i != (len(branch) - 1):
|
||||
msg = "Output node %s at line" % node.name
|
||||
msg += " %s should be at position posigion" % node.line
|
||||
msg += " of branching statement"
|
||||
raise SyntaxError(msg)
|
||||
|
||||
if br_name:
|
||||
del(branch[br_index])
|
||||
for node in branch:
|
||||
node.branches.add(br_name)
|
||||
else:
|
||||
noname_branchings.append(branch)
|
||||
|
||||
# second iteration to fix the remaining node, which don't have branches
|
||||
for branch in noname_branchings:
|
||||
s = set()
|
||||
for node in branch:
|
||||
s.update(node.branches)
|
||||
for node in branch:
|
||||
node.branches.update(s)
|
||||
|
||||
|
||||
class ParsedFile(object):
|
||||
def __init__(self, filters, groupers, splitters, group_filters,
|
||||
mergers, branches, ungroupers, input, output, names):
|
||||
self.filters = filters
|
||||
self.groupers = groupers
|
||||
self.splitters = splitters
|
||||
self.group_filters = group_filters
|
||||
self.mergers = mergers
|
||||
self.branches = branches
|
||||
self.ungroupers = ungroupers
|
||||
self.input = input
|
||||
self.output = output
|
||||
self.names = names
|
||||
|
||||
|
BIN
parser.pyc
Normal file
BIN
parser.pyc
Normal file
Binary file not shown.
185
parsetab.py
Normal file
185
parsetab.py
Normal file
File diff suppressed because one or more lines are too long
BIN
parsetab.pyc
Normal file
BIN
parsetab.pyc
Normal file
Binary file not shown.
48
port-filter.flw
Normal file
48
port-filter.flw
Normal file
|
@ -0,0 +1,48 @@
|
|||
splitter S {}
|
||||
|
||||
filter www_req {
|
||||
dstport = 80
|
||||
}
|
||||
|
||||
filter www_res {
|
||||
dstport = 80
|
||||
}
|
||||
|
||||
grouper g_www_req {
|
||||
module g1 {
|
||||
dstport = dstport
|
||||
etime < stime rdelta 1s
|
||||
}
|
||||
aggregate srcip, dstip, sum(bytes) as bytes, count(rec_id) as n,
|
||||
bitOR(tcp_flags) as flags, union(srcport) as srcports
|
||||
}
|
||||
|
||||
grouper g_www_res {
|
||||
module g1 {
|
||||
srcport = srcport
|
||||
etime < stime rdelta 1s
|
||||
}
|
||||
aggregate srcip, dstip, sum(bytes) as bytes, count(rec_id) as n,
|
||||
bitOR(tcp_flags) as flags, union(srcport) as srcports
|
||||
}
|
||||
|
||||
|
||||
|
||||
groupfilter ggf {
|
||||
bitAND(flags, 0x13) = 0x13
|
||||
}
|
||||
|
||||
|
||||
merger M {
|
||||
module m1 {
|
||||
branches A, B
|
||||
}
|
||||
export m1
|
||||
}
|
||||
|
||||
ungrouper U {}
|
||||
|
||||
"./netflow-trace.h5" -> S
|
||||
S branch A -> www_req -> g_www_req -> ggf -> M
|
||||
S branch B -> www_res -> g_www_res -> ggf -> M
|
||||
M->U->"./ungroped.h5"
|
46
ports.flw
Normal file
46
ports.flw
Normal file
|
@ -0,0 +1,46 @@
|
|||
splitter S {}
|
||||
|
||||
filter www_req {
|
||||
dstport = 443 OR dstport = 80 OR dstport = 8080
|
||||
unix_secs > 1259413200
|
||||
unix_secs < 1259445600
|
||||
}
|
||||
|
||||
filter www_res {
|
||||
unix_secs < 1259445600
|
||||
unix_secs > 1259413200
|
||||
srcport = 443 OR srcport = 80 OR srcport = 8080
|
||||
}
|
||||
|
||||
grouper g_www_req {
|
||||
module g1 {
|
||||
}
|
||||
aggregate bitOR(tcp_flags) as flags
|
||||
}
|
||||
|
||||
grouper g_www_res {
|
||||
module g1 {
|
||||
}
|
||||
aggregate bitOR(tcp_flags) as flags
|
||||
}
|
||||
|
||||
groupfilter ggf {
|
||||
bitAND(flags, 0x13) = 0x13
|
||||
}
|
||||
|
||||
merger M {
|
||||
module m1 {
|
||||
branches B, A
|
||||
# B.stime = 1259413200 AND B.etime = 1259445600
|
||||
A d B OR B d A
|
||||
# B o A delta 32400s
|
||||
}
|
||||
export m1
|
||||
}
|
||||
|
||||
ungrouper U {}
|
||||
|
||||
"./h5ports.h5" -> S
|
||||
S branch A -> www_req -> g_www_req -> ggf -> M
|
||||
S branch B -> www_res -> g_www_res -> ggf -> M
|
||||
M->U->"./portsungroped.h5"
|
BIN
portsungroped.h5
Normal file
BIN
portsungroped.h5
Normal file
Binary file not shown.
18
print_hdf_in_step.py
Executable file
18
print_hdf_in_step.py
Executable file
|
@ -0,0 +1,18 @@
|
|||
#!/usr/bin/python
|
||||
from record import RecordReader
|
||||
from pytables import FlowRecordsTable
|
||||
from itertools import izip
|
||||
from optparse import OptionParser
|
||||
|
||||
if __name__ == '__main__':
|
||||
usage = 'usage: %prog [options] input files'
|
||||
p = OptionParser(usage)
|
||||
opts, arguments = p.parse_args()
|
||||
|
||||
mg_readers = [RecordReader(FlowRecordsTable(f)) for f in arguments]
|
||||
|
||||
for rec_tuple in izip(*mg_readers):
|
||||
print ""
|
||||
for r in rec_tuple:
|
||||
print r
|
||||
|
21
printhdf.py
Executable file
21
printhdf.py
Executable file
|
@ -0,0 +1,21 @@
|
|||
#!/usr/bin/python
|
||||
from optparse import OptionParser
|
||||
import pytables
|
||||
import record
|
||||
import sys
|
||||
|
||||
def printHDF(hdf_file):
|
||||
r = pytables.FlowRecordsTable(hdf_file)
|
||||
recordReader = record.RecordReader(r)
|
||||
for rec in recordReader:
|
||||
print rec
|
||||
|
||||
if __name__ == "__main__":
|
||||
usage = 'usage: %prog file_name.h5'
|
||||
p = OptionParser(usage)
|
||||
options, arguments = p.parse_args()
|
||||
if len(arguments) != 1:
|
||||
sys.stderr.write('Exactly one argument expected\n')
|
||||
exit(1)
|
||||
|
||||
printHDF(arguments[0])
|
98
profiler.py
Normal file
98
profiler.py
Normal file
|
@ -0,0 +1,98 @@
|
|||
from time import time
|
||||
import threading
|
||||
import sys
|
||||
from collections import deque
|
||||
try:
|
||||
from resource import getrusage, RUSAGE_SELF
|
||||
except ImportError:
|
||||
RUSAGE_SELF = 0
|
||||
def getrusage(who=0):
|
||||
return [0.0, 0.0] # on non-UNIX platforms cpu_time always 0.0
|
||||
|
||||
p_stats = None
|
||||
p_start_time = None
|
||||
|
||||
def profiler(frame, event, arg):
|
||||
if event not in ('call','return'): return profiler
|
||||
#### gather stats ####
|
||||
rusage = getrusage(RUSAGE_SELF)
|
||||
t_cpu = rusage[0] + rusage[1] # user time + system time
|
||||
code = frame.f_code
|
||||
fun = (code.co_name, code.co_filename, code.co_firstlineno)
|
||||
#### get stack with functions entry stats ####
|
||||
ct = threading.currentThread()
|
||||
try:
|
||||
p_stack = ct.p_stack
|
||||
except AttributeError:
|
||||
ct.p_stack = deque()
|
||||
p_stack = ct.p_stack
|
||||
#### handle call and return ####
|
||||
if event == 'call':
|
||||
p_stack.append((time(), t_cpu, fun))
|
||||
elif event == 'return':
|
||||
try:
|
||||
t,t_cpu_prev,f = p_stack.pop()
|
||||
assert f == fun
|
||||
except IndexError: # TODO investigate
|
||||
t,t_cpu_prev,f = p_start_time, 0.0, None
|
||||
call_cnt, t_sum, t_cpu_sum = p_stats.get(fun, (0, 0.0, 0.0))
|
||||
p_stats[fun] = (call_cnt+1, t_sum+time()-t, t_cpu_sum+t_cpu-t_cpu_prev)
|
||||
return profiler
|
||||
|
||||
|
||||
def profile_on():
|
||||
global p_stats, p_start_time
|
||||
p_stats = {}
|
||||
p_start_time = time()
|
||||
threading.setprofile(profiler)
|
||||
sys.setprofile(profiler)
|
||||
|
||||
|
||||
def profile_off():
|
||||
threading.setprofile(None)
|
||||
sys.setprofile(None)
|
||||
|
||||
def get_profile_stats():
|
||||
"""
|
||||
returns dict[function_tuple] -> stats_tuple
|
||||
where
|
||||
function_tuple = (function_name, filename, lineno)
|
||||
stats_tuple = (call_cnt, real_time, cpu_time)
|
||||
"""
|
||||
return p_stats
|
||||
|
||||
|
||||
#### EXAMPLE ##################################################################
|
||||
|
||||
if __name__ == '__main__':
|
||||
from time import sleep
|
||||
from threading import Thread
|
||||
import random
|
||||
|
||||
def test_function():
|
||||
pass
|
||||
|
||||
class T(Thread):
|
||||
def __init__(self):
|
||||
Thread.__init__(self)
|
||||
def run(self): # takes about 5 seconds
|
||||
for i in xrange(100):
|
||||
self.test_method()
|
||||
test_function()
|
||||
def test_method(self):
|
||||
sleep(random.random() / 10)
|
||||
|
||||
profile_on()
|
||||
#######################
|
||||
threads = [T() for i in xrange(3)]
|
||||
for t in threads:
|
||||
t.start()
|
||||
for i in xrange(100):
|
||||
test_function()
|
||||
for t in threads:
|
||||
t.join()
|
||||
#######################
|
||||
profile_off()
|
||||
|
||||
from pprint import pprint
|
||||
pprint(get_profile_stats())
|
BIN
profiler.pyc
Normal file
BIN
profiler.pyc
Normal file
Binary file not shown.
885
profiling-heavy-functions.txt
Normal file
885
profiling-heavy-functions.txt
Normal file
|
@ -0,0 +1,885 @@
|
|||
/var/netflow/ft-data-fall09/sne-ft-data/2009/sneze/2009-11-29/
|
||||
26521 records
|
||||
1683 records matched the http request
|
||||
|
||||
|
||||
|
||||
deepcopy
|
||||
|
||||
Splitter initiated
|
||||
Parsing and validation finished: 0.31
|
||||
Started filtering
|
||||
Finished filtering
|
||||
Filters ready
|
||||
Splitter time elapsed: 27.19
|
||||
Finished grouping branch A
|
||||
Finished grouping branch B
|
||||
Finished filtering groups for branch A
|
||||
Finished filtering groups for branch B
|
||||
Group filter time elapsed: 45.0
|
||||
Finished merging branches: ['B', 'A']
|
||||
Merger time elapsed: 162.3
|
||||
Ungrouper U finished exectution
|
||||
FINISHED!
|
||||
Overall time elapsed: 168.99
|
||||
Closing remaining open files: ./flowy-run/GroupsB.h5... done ./flowy-run/GroupsA.h5... done
|
||||
|
||||
real 2m49.129s
|
||||
user 2m44.070s
|
||||
sys 0m5.824s
|
||||
|
||||
Splitter initiated
|
||||
Parsing and validation finished: 0.33
|
||||
Started filtering
|
||||
Finished filtering
|
||||
Filters ready
|
||||
Splitter time elapsed: 30.16
|
||||
Finished grouping branch B
|
||||
Finished grouping branch A
|
||||
Finished filtering groups for branch B
|
||||
Finished filtering groups for branch A
|
||||
Group filter time elapsed: 34.2
|
||||
Finished merging branches: ['B', 'A']
|
||||
Merger time elapsed: 138.3
|
||||
Ungrouper U finished exectution
|
||||
FINISHED!
|
||||
Overall time elapsed: 143.71
|
||||
Closing remaining open files: ./flowy-run/GroupsB.h5... done ./flowy-run/GroupsA.h5... done
|
||||
|
||||
real 2m24.193s
|
||||
user 2m19.957s
|
||||
sys 0m4.608s
|
||||
|
||||
|
||||
|
||||
deep_copy
|
||||
|
||||
Splitter initiated
|
||||
Parsing and validation finished: 0.36
|
||||
Started filtering
|
||||
Finished filtering
|
||||
Filters ready
|
||||
Splitter time elapsed: 24.02
|
||||
Finished grouping branch B
|
||||
Finished grouping branch A
|
||||
Finished filtering groups for branch B
|
||||
Finished filtering groups for branch A
|
||||
Group filter time elapsed: 32.74
|
||||
Finished merging branches: ['B', 'A']
|
||||
Merger time elapsed: 155.7
|
||||
Ungrouper U finished exectution
|
||||
FINISHED!
|
||||
Overall time elapsed: 162.56
|
||||
Closing remaining open files: ./flowy-run/GroupsA.h5... done ./flowy-run/GroupsB.h5... done
|
||||
|
||||
real 2m43.294s
|
||||
user 2m38.782s
|
||||
sys 0m4.628s
|
||||
|
||||
Splitter initiated
|
||||
Parsing and validation finished: 0.26
|
||||
Started filtering
|
||||
Finished filtering
|
||||
Filters ready
|
||||
Splitter time elapsed: 24.8
|
||||
Finished grouping branch B
|
||||
Finished grouping branch A
|
||||
Finished filtering groups for branch A
|
||||
Finished filtering groups for branch B
|
||||
Group filter time elapsed: 34.95
|
||||
Finished merging branches: ['B', 'A']
|
||||
Merger time elapsed: 144.75
|
||||
Ungrouper U finished exectution
|
||||
FINISHED!
|
||||
Overall time elapsed: 149.73
|
||||
Closing remaining open files: ./flowy-run/GroupsA.h5... done ./flowy-run/GroupsB.h5... done
|
||||
|
||||
real 2m36.640s
|
||||
user 2m27.385s
|
||||
sys 0m3.508s
|
||||
|
||||
Splitter initiated
|
||||
Parsing and validation finished: 0.3
|
||||
Started filtering
|
||||
Finished filtering
|
||||
Filters ready
|
||||
Splitter time elapsed: 24.2
|
||||
Finished grouping branch B
|
||||
Finished grouping branch A
|
||||
Finished filtering groups for branch A
|
||||
Finished filtering groups for branch B
|
||||
Group filter time elapsed: 31.15
|
||||
Finished merging branches: ['B', 'A']
|
||||
Merger time elapsed: 145.9
|
||||
Ungrouper U finished exectution
|
||||
FINISHED!
|
||||
Overall time elapsed: 151.19
|
||||
|
||||
real 2m31.325s
|
||||
user 2m26.629s
|
||||
sys 0m5.412s
|
||||
|
||||
modified reset/deepcopy
|
||||
(('reset', '/home/melnikovkolya/classes/semester-3-project/flowy/filter.py', 64), (26521, 11.015153884887695, 11.560714000001838))
|
||||
|
||||
|
||||
|
||||
|
||||
(('new_group', '/home/melnikovkolya/classes/semester-3-project/flowy/grouper.py', 21), (1466, 6.5672850608825684, 5.3123339999998507))
|
||||
(('__iter__', '/home/melnikovkolya/classes/semester-3-project/flowy/grouper.py', 25), (1468, 775.12532043457031, 766.78390699999591))
|
||||
(('__iter__', '/home/melnikovkolya/classes/semester-3-project/flowy/filter.py', 15), (3228, 155.0828640460968, 160.51002500000152))
|
||||
(('__iter__', '/home/melnikovkolya/classes/semester-3-project/flowy/splitter.py', 37), (3229, 87.616034030914307, 89.193573000000356))
|
||||
(('append', '/home/melnikovkolya/classes/semester-3-project/flowy/pytables.py', 118), (3490, 35.743690967559814, 30.529941999999664))
|
||||
(('notify', '/usr/lib/python2.6/threading.py', 270), (6570, 10.859287977218628, 10.72066600000062))
|
||||
(('_is_owned', '/usr/lib/python2.6/threading.py', 219), (6695, 9.4564809799194336, 9.1245670000004111))
|
||||
(('final_result', '/home/melnikovkolya/classes/semester-3-project/flowy/filter.py', 57), (26521, 5.4859673976898193, 5.0482840000003648))
|
||||
|
||||
(('reset', '/home/melnikovkolya/classes/semester-3-project/flowy/filter.py', 45), (26521, 85.135001659393311, 88.205508000023968))
|
||||
|
||||
(('_deepcopy_dict', '/usr/lib/python2.6/copy.py', 251), (26712, 73.298033714294434, 75.524687000011454))
|
||||
(('__iter__', '/home/melnikovkolya/classes/semester-3-project/flowy/record.py', 129), (27270, 27.118208885192871, 27.781735000003209))
|
||||
(('match', '/home/melnikovkolya/classes/semester-3-project/flowy/merger.py', 72), (97059, 33.632721662521362, 30.013754000007793))
|
||||
(('read_row', '/home/melnikovkolya/classes/semester-3-project/flowy/pytables.py', 99), (99360, 518.74268817901611, 468.40537100055235))
|
||||
(('iterrows', '/usr/local/lib/python2.6/dist-packages/tables/table.py', 1441), (99377, 118.15105223655701, 106.11463399998161))
|
||||
(('read_rows_list', '/home/melnikovkolya/classes/semester-3-project/flowy/pytables.py', 105), (99797, 522.83437442779541, 472.12965100054475))
|
||||
(('read_rows_list', '/home/melnikovkolya/classes/semester-3-project/flowy/record.py', 133), (99797, 550.52120852470398, 497.50723100058826))
|
||||
(('match', '/home/melnikovkolya/classes/semester-3-project/flowy/merger.py', 23), (147484, 24.74915337562561, 21.317261000004237))
|
||||
(('deepcopy', '/usr/lib/python2.6/copy.py', 144), (187567, 161.90160441398621, 165.33823200019515))
|
||||
(('read', '/usr/local/lib/python2.6/dist-packages/tables/vlarray.py', 700), (195364, 274.85678458213806, 246.25141199899576))
|
||||
(('<genexpr>', '/home/melnikovkolya/classes/semester-3-project/flowy/pytables.py', 102), (294714, 294.22120332717896, 264.55258099813909))
|
||||
(('match', '/home/melnikovkolya/classes/semester-3-project/flowy/grouper.py', 178), (856942, 596.70967555046082, 576.32406800022113))
|
||||
(('match', '/home/melnikovkolya/classes/semester-3-project/flowy/grouper.py', 101), (861729, 430.92800951004028, 418.1861820004529))
|
||||
(('match', '/home/melnikovkolya/classes/semester-3-project/flowy/grouper.py', 126), (989422, 290.51547265052795, 272.90903400041935))
|
||||
(('idx2long', '/usr/local/lib/python2.6/dist-packages/tables/utils.py', 66), (784846, 75.146798133850098, 69.772329999996373))
|
||||
(('is_idx', '/usr/local/lib/python2.6/dist-packages/tables/utils.py', 44), (784846, 26.284930467605591, 21.873351000002572))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/var/netflow/ft-data-fall09/kur-ft-data/2009-11-17/
|
||||
56992 records
|
||||
2438 records matched the http request
|
||||
|
||||
With profiler off:
|
||||
real 8m8.700s
|
||||
user 7m47.945s
|
||||
sys 0m12.909s
|
||||
|
||||
|
||||
Splitter initiated
|
||||
Parsing and validation finished: 1.29
|
||||
Started filtering
|
||||
Finished filtering
|
||||
Filters ready
|
||||
Splitter time elapsed: 58.21
|
||||
Finished grouping branch B
|
||||
Finished grouping branch A
|
||||
Finished filtering groups for branch A
|
||||
Finished filtering groups for branch B
|
||||
Group filter time elapsed: 59.8
|
||||
Finished merging branches: ['B', 'A']
|
||||
Merger time elapsed: 471.27
|
||||
Ungrouper U finished exectution
|
||||
FINISHED!
|
||||
Overall time elapsed: 480.68
|
||||
|
||||
56992
|
||||
root@melnikovkolya-laptop:/home/melnikovkolya/classes/semester-3-project/flowy# time python flowy.py http-download.flw
|
||||
0.72
|
||||
Splitter initiated
|
||||
Started filtering
|
||||
Group filter time started: 0.29
|
||||
3955
|
||||
Filters ready
|
||||
Splitter finished
|
||||
Splitter time elapsed: 53.06
|
||||
Number of records in branch A 1985
|
||||
Number of records in branch B 2004
|
||||
Finished grouping branch A
|
||||
Finished group-filtering for branch A
|
||||
Finished grouping branch B
|
||||
Finished group-filtering for branch B
|
||||
Group filter threads joined: 57.68
|
||||
Finished merging branches: ['B', 'A']
|
||||
Merger time elapsed: 443.36
|
||||
Ungrouper U finished exectution
|
||||
FINISHED!
|
||||
Overall time elapsed: 452.1
|
||||
Closing remaining open files: ./flowy-run/GroupsB.h5... done ./flowy-run/GroupsA.h5... done
|
||||
|
||||
real 7m46.456s
|
||||
user 7m21.036s
|
||||
sys 0m11.921s
|
||||
|
||||
|
||||
(('new_group', '/home/melnikovkolya/classes/semester-3-project/flowy/grouper.py', 21), (1824, 9.5541517734527588, 9.8766150000006974))
|
||||
(('__iter__', '/home/melnikovkolya/classes/semester-3-project/flowy/grouper.py', 25), (1828, 1249.1410629749298, 1300.497268999989))
|
||||
(('__iter__', '/home/melnikovkolya/classes/semester-3-project/flowy/splitter.py', 37), (3955, 59.615991353988647, 62.479928999999061))
|
||||
(('split', '/home/melnikovkolya/classes/semester-3-project/flowy/splitter.py', 17), (3955, 30.423548460006714, 32.126016000000902))
|
||||
(('__iter__', '/home/melnikovkolya/classes/semester-3-project/flowy/filter.py', 15), (3956, 456.31715869903564, 475.12168400000229))
|
||||
(('get', '/usr/lib/python2.6/Queue.py', 150), (3957, 35.274902582168579, 37.742364999999495))
|
||||
(('append', '/usr/local/lib/python2.6/dist-packages/tables/vlarray.py', 452), (5486, 76.012235879898071, 76.39678599999479))
|
||||
(('append', '/home/melnikovkolya/classes/semester-3-project/flowy/pytables.py', 118), (5785, 81.44921350479126, 81.341101000000435))
|
||||
(('notify', '/usr/lib/python2.6/threading.py', 270), (8002, 17.408251523971558, 17.825101000000359))
|
||||
(('_is_owned', '/usr/lib/python2.6/threading.py', 219), (8101, 14.244855642318726, 15.092936000000464))
|
||||
(('final_result', '/home/melnikovkolya/classes/semester-3-project/flowy/filter.py', 57), (56992, 15.892577886581421, 15.040958000006583))
|
||||
|
||||
(('reset', '/home/melnikovkolya/classes/semester-3-project/flowy/filter.py', 45), (56992, 255.76119065284729, 262.48040000008808))
|
||||
|
||||
(('_deepcopy_dict', '/usr/lib/python2.6/copy.py', 251), (57183, 218.50618243217468, 224.26205200008098))
|
||||
(('__iter__', '/home/melnikovkolya/classes/semester-3-project/flowy/pytables.py', 89), (58365, 30.709211587905884, 31.189945000012358))
|
||||
(('iterate_fixed_fields', '/home/melnikovkolya/classes/semester-3-project/flowy/pytables.py', 93), (58365, 19.963983297348022, 19.749231000007512))
|
||||
(('__iter__', '/home/melnikovkolya/classes/semester-3-project/flowy/record.py', 129), (58365, 86.714945554733276, 88.23755700004449))
|
||||
(('_deepcopy_list', '/usr/lib/python2.6/copy.py', 224), (114144, 72.901082038879395, 73.184596000045076))
|
||||
(('match', '/home/melnikovkolya/classes/semester-3-project/flowy/filter.py', 90), (117636, 47.137009859085083, 43.210651000023745))
|
||||
(('_deepcopy_atomic', '/usr/lib/python2.6/copy.py', 197), (171331, 14.566928386688232, 13.152824000005694))
|
||||
(('_keep_alive', '/usr/lib/python2.6/copy.py', 261), (343098, 47.557926893234253, 39.274455000023863))
|
||||
(('match', '/home/melnikovkolya/classes/semester-3-project/flowy/merger.py', 72), (343759, 89.168351411819458, 86.809352999718158))
|
||||
(('read_row', '/home/melnikovkolya/classes/semester-3-project/flowy/pytables.py', 99), (347405, 1355.7759656906128, 1345.6080879980259))
|
||||
(('iterrows', '/usr/local/lib/python2.6/dist-packages/tables/table.py', 1441), (347422, 306.37827634811401, 304.82301899932509))
|
||||
(('read_rows_list', '/home/melnikovkolya/classes/semester-3-project/flowy/pytables.py', 105), (348171, 1369.7901601791382, 1360.4090329980108))
|
||||
|
||||
(('deepcopy', '/usr/lib/python2.6/copy.py', 144), (400864, 485.14781737327576, 489.78665900019996))
|
||||
|
||||
(('__init__', '/home/melnikovkolya/classes/semester-3-project/flowy/record.py', 82), (408197, 79.613070487976074, 80.693067999662162))
|
||||
(('match', '/home/melnikovkolya/classes/semester-3-project/flowy/merger.py', 23), (527995, 64.410658597946167, 62.123842999773387))
|
||||
(('read', '/usr/local/lib/python2.6/dist-packages/tables/vlarray.py', 700), (689950, 714.14480590820312, 706.58424299669286))
|
||||
(('<genexpr>', '/home/melnikovkolya/classes/semester-3-project/flowy/pytables.py', 102), (1037339, 765.8496515750885, 758.55947299578656))
|
||||
(('_processRangeRead', '/usr/local/lib/python2.6/dist-packages/tables/leaf.py', 449), (1037372, 470.43238306045532, 463.84111299771757))
|
||||
(('match', '/home/melnikovkolya/classes/semester-3-project/flowy/grouper.py', 101), (1380363, 747.47748589515686, 753.67501099601259))
|
||||
(('match', '/home/melnikovkolya/classes/semester-3-project/flowy/grouper.py', 178), (1380426, 1028.9652721881866, 1053.8537989941979))
|
||||
(('match', '/home/melnikovkolya/classes/semester-3-project/flowy/grouper.py', 126), (1740570, 498.78313732147217, 495.35881499854258))
|
||||
(('EQ', '/home/melnikovkolya/classes/semester-3-project/flowy/operators.py', 63), (2370745, 182.36606240272522, 156.70575899921459))
|
||||
(('idx2long', '/usr/local/lib/python2.6/dist-packages/tables/utils.py', 66), (2764694, 214.65504741668701, 203.63286399914659))
|
||||
(('is_idx', '/usr/local/lib/python2.6/dist-packages/tables/utils.py', 44), (2764694, 75.347645044326782, 63.899976999761293))
|
||||
|
||||
|
||||
(('reset', '/home/melnikovkolya/classes/semester-3-project/flowy/filter.py', 64), (56992, 31.726502895355225, 31.213908000036554))
|
||||
(('deep_copy', '/home/melnikovkolya/classes/semester-3-project/flowy/filter.py', 41), (56994, 15.406083345413208, 16.889049000018872))
|
||||
|
||||
|
||||
7 days of data
|
||||
python ft2hdf.py /var/netflow/ft-data-fall09/sne-ft-data/2009/sneze/2009-12-0* netflow-trace.h5
|
||||
|
||||
246350 records in total
|
||||
12394 records match the query
|
||||
|
||||
profiling:
|
||||
Splitter initiated
|
||||
Parsing and validation finished: 2.22
|
||||
Started filtering
|
||||
Finished filtering
|
||||
Filters ready
|
||||
Splitter time elapsed: 1130.1
|
||||
Finished grouping branch B
|
||||
Finished filtering groups for branch B
|
||||
Finished grouping branch A
|
||||
Finished filtering groups for branch A
|
||||
Group filter time elapsed: 2123.665408
|
||||
Finished merging branches: ['B', 'A']
|
||||
Merger time elapsed: -185.553776
|
||||
Ungrouper U finished exectution
|
||||
FINISHED!
|
||||
Overall time elapsed: -114.543776
|
||||
|
||||
no profiling:
|
||||
Splitter initiated
|
||||
Parsing and validation finished: 0.26
|
||||
Started filtering
|
||||
Finished filtering
|
||||
Filters ready
|
||||
Splitter time elapsed: 320.43
|
||||
Finished grouping branch B
|
||||
Finished grouping branch A
|
||||
Finished filtering groups for branch B
|
||||
Finished filtering groups for branch A
|
||||
Group filter time elapsed: 922.42
|
||||
Finished merging branches: ['B', 'A']
|
||||
Merger time elapsed: 1039.122704
|
||||
Ungrouper U finished exectution
|
||||
FINISHED!
|
||||
Overall time elapsed: 1074.252704
|
||||
Closing remaining open files: ./flowy-run/GroupsB.h5... done ./flowy-run/GroupsA.h5... done
|
||||
|
||||
|
||||
start()real 90m16.511s
|
||||
user 86m23.020s
|
||||
sys 3m7.356s
|
||||
|
||||
Splitter initiated
|
||||
Parsing and validation finished: 0.31
|
||||
Started filtering
|
||||
Finished filtering
|
||||
Filters ready
|
||||
Splitter time elapsed: 346.66
|
||||
Finished grouping branch B
|
||||
Finished grouping branch A
|
||||
Finished filtering groups for branch B
|
||||
Finished filtering groups for branch A
|
||||
Group filter time elapsed: 916.19
|
||||
Finished merging branches: ['B', 'A']
|
||||
Merger time elapsed: 1037.532704
|
||||
Ungrouper U finished exectution
|
||||
FINISHED!
|
||||
Overall time elapsed: 1073.552704
|
||||
|
||||
(('reset', '/home/melnikovkolya/classes/semester-3-project/flowy/filter.py', 64), (246349, 940.52704691886902, 994.15005099796895))
|
||||
|
||||
|
||||
(('reset', '/home/melnikovkolya/classes/semester-3-project/flowy/filter.py', 64), (246349, 111.18868279457092, 105.20649999988791))
|
||||
(('deep_copy', '/home/melnikovkolya/classes/semester-3-project/flowy/filter.py', 41), (246351, 61.105264902114868, 52.447237999959725))
|
||||
(('read_rows_list', '/home/melnikovkolya/classes/semester-3-project/flowy/pytables.py', 105), (3155228, 13582.554839611053, 13318.368595361764))
|
||||
(('read_rows_list', '/home/melnikovkolya/classes/semester-3-project/flowy/record.py', 133), (3155228, 14223.106386899948, 13948.747855334786))
|
||||
(('read', '/usr/local/lib/python2.6/dist-packages/tables/vlarray.py', 700), (6280932, 6694.1691343784332, 6541.9808274548959))
|
||||
(('match', '/home/melnikovkolya/classes/semester-3-project/flowy/grouper.py', 178), (30651429, 17337.516788959503, 17566.637794171394))
|
||||
(('match', '/home/melnikovkolya/classes/semester-3-project/flowy/grouper.py', 101), (30677828, 12477.594463348389, 12583.8665639143))
|
||||
(('match', '/home/melnikovkolya/classes/semester-3-project/flowy/grouper.py', 126), (35383022, 8230.0888061523438, 8037.6824171527333))
|
||||
(('EQ', '/home/melnikovkolya/classes/semester-3-project/flowy/operators.py', 63), (40143460, 2728.9575715065002, 2304.1001345953482))
|
||||
|
||||
|
||||
root@melnikovkolya-laptop:/home/melnikovkolya/classes/semester-3-project/flowy# time flow-cat /var/netflow/ft-data-fall09/kur-ft-data/2009-11-16/ | flow-print | wc -l
|
||||
99925
|
||||
|
||||
root@melnikovkolya-laptop:/home/melnikovkolya/classes/semester-3-project/flowy# time python flowy.py http-download.flw0.77
|
||||
Splitter initiated
|
||||
Started filtering
|
||||
Group filter time started: 0.33
|
||||
7222
|
||||
Filters ready
|
||||
Splitter finished
|
||||
Splitter time elapsed: 100.03
|
||||
Number of records in branch B 3684
|
||||
Number of records in branch A 3644
|
||||
Finished grouping branch A
|
||||
Finished group-filtering for branch A
|
||||
Finished grouping branch B
|
||||
Finished group-filtering for branch B
|
||||
Group filter threads joined: 136.09
|
||||
Finished merging branches: ['B', 'A']
|
||||
Merger time elapsed: 960.34
|
||||
Ungrouper U finished exectution
|
||||
FINISHED!
|
||||
Overall time elapsed: 974.11
|
||||
Closing remaining open files: ./flowy-run/GroupsA.h5... done ./flowy-run/GroupsB.h5... done
|
||||
|
||||
real 16m39.728s
|
||||
user 15m49.067s
|
||||
sys 0m26.002s
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
root@melnikovkolya-laptop:/home/melnikovkolya/classes/semester-3-project/flowy# python ft2hdf.py /var/netflow/ft-data-fall09/sne-ft-data/2009/sneze/2009-11-* netflow-trace.h5
|
||||
|
||||
298063
|
||||
|
||||
root@melnikovkolya-laptop:/home/melnikovkolya/classes/semester-3-project/flowy# time python flowy.py http-download.flw
|
||||
0.84
|
||||
Splitter initiated
|
||||
Started filtering
|
||||
29448
|
||||
Filters ready
|
||||
Splitter finished
|
||||
Splitter time elapsed: 475.83
|
||||
Number of records in branch B 16666
|
||||
Number of records in branch A 16412
|
||||
Finished grouping branch B
|
||||
Finished group-filtering for branch B
|
||||
Finished grouping branch A
|
||||
Finished group-filtering for branch A
|
||||
Group filter threads joined: 1415.34
|
||||
Finished merging branches: ['B', 'A']
|
||||
Merger time elapsed: -1347.101888 = 11485
|
||||
Ungrouper U finished exectution
|
||||
FINISHED!
|
||||
Overall time elapsed: -1301.531888 = 11531
|
||||
Closing remaining open files: ./flowy-run/GroupsA.h5... done ./flowy-run/GroupsB.h5... done
|
||||
|
||||
real 192m11.507s = 11531
|
||||
user 185m39.648s
|
||||
sys 7m25.104s
|
||||
|
||||
|
||||
|
||||
The following example shows how heavy is the influence of performing the match operations (and the consequences in case of a match being found) by having a different load for 2 different branches.
|
||||
We can see that processing of Branch A, which has been matching records against a port number 443, has a running time of around 17 seconds, while the branch, which checked whether a record entry has a TCP protocol has taken around 90 seconds. The reason for a larger running time is that many more entries that match the prot=TCP requirement have been fround, and it takes an additional processing to index and to append the record to a group.
|
||||
|
||||
Though each of the executions of the reset function takes on average 9-10 times more time than the match function, the number of the executions of the match function is at least 30 times greater than that of the reset function.
|
||||
|
||||
After spot-profiling (running multi-thread profiler on certain sections of the code only), I could verify that the time spent in executing the match calls of the grouper module was causing most significant slow-down in all of the code, up to the merger module. Depending on the complexity of each of the match() calls, the execution time varied for the same number of function calls. The three match() calls from different classes form a nested chain, where one match() function, relies on another match(). The heaviest (in terms of time per execution) of all three match functions is the top-most match(), that comes from the Group class of the grouper module. Besides relying on a double-nested match call from two other classes, it also performs a
|
||||
and calculates the first and the last records of newly-formed group, which is necessary for relative comparisons.
|
||||
|
||||
with an average time spent per cycle (including the profiler overhead) being:
|
||||
(('match', '/home/melnikovkolya/classes/semester-3-project/flowy/grouper.py', 193), (280246, 151.20158743858337, 154.237679000132))
|
||||
|
||||
|
||||
A very simple GrouperModule class match, with a single
|
||||
|
||||
|
||||
def match(self, record, group):
|
||||
for rule in self.rules:
|
||||
if not rule.match(record, group):
|
||||
return False
|
||||
return True
|
||||
|
||||
This was followed by the filtering operation
|
||||
|
||||
|
||||
Splitter initiated
|
||||
GF validation started at: 0.89
|
||||
GF Validation required: 0.09
|
||||
Parsing and validation finished: 0.32
|
||||
Started filtering
|
||||
Grouping started at:Fitlering time started at: Grouping started at: 1.0
|
||||
1.0
|
||||
1.0
|
||||
Finished filtering
|
||||
Filtering required: 16.87
|
||||
Filters ready
|
||||
Splitter time elapsed: 17.11
|
||||
Finished grouping branch A
|
||||
Grouping time required branch A 17.34
|
||||
Current time is: 18.34
|
||||
Finished filtering groups for branch A
|
||||
Finished grouping branch B
|
||||
Grouping time required branch B 90.08
|
||||
Current time is: 91.08
|
||||
Finished filtering groups for branch B
|
||||
Group filter time elapsed: 90.41
|
||||
Finished merging branches: ['B', 'A']
|
||||
Merger time elapsed: 111.58
|
||||
Ungrouper U finished exectution
|
||||
FINISHED!
|
||||
Overall time elapsed: 111.75
|
||||
Closing remaining open files: ./flowy-run/GroupsA.h5... done ./flowy-run/GroupsB.h5... done
|
||||
|
||||
real 1m52.516s
|
||||
user 1m50.411s
|
||||
sys 0m2.136s
|
||||
|
||||
|
||||
The performance of the group-filters differs significantly, depending on how many matching records have been found in the filtering stage.
|
||||
I.e., a filter for the port 443 results in relatively few records, while the filter for a port number >30000 results in many record matching it. The matching records need to be processed and stored for further group-filters, where the group filters try to form groups from the matched records. An example of running a query, which identifies flows with a destination port 443 and a source port > 30000, is shown next. It can be seen, that group-filtering of branch B, which is responsible for filtering out the srcport > 30000 request has a much larger running time than that of branch A, which looks only for those few records with a destination port of 443.
|
||||
Splitter initiated
|
||||
Parsing and validation finished: 0.28
|
||||
Started filtering
|
||||
Fitlering time started at: 0.98
|
||||
Finished filtering
|
||||
Filtering required: 33.49
|
||||
Filters ready
|
||||
Splitter time elapsed: 33.61
|
||||
Finished grouping branch A
|
||||
Grouping time finished for branch A 40.49
|
||||
Finished filtering groups for branch A
|
||||
Finished grouping branch B
|
||||
Grouping time finished for branch B 228.46
|
||||
Finished filtering groups for branch B
|
||||
Group filter time elapsed: 227.86
|
||||
Finished merging branches: ['B', 'A']
|
||||
Merger time elapsed: 252.77
|
||||
Ungrouper U finished exectution
|
||||
FINISHED!
|
||||
Overall time elapsed: 253.31
|
||||
Closing remaining open files: ./flowy-run/GroupsB.h5... done ./flowy-run/GroupsA.h5... done
|
||||
|
||||
real 4m11.671s
|
||||
user 4m9.296s
|
||||
sys 0m4.880s
|
||||
|
||||
|
||||
|
||||
==================================HTTPS PROFILE===========================================
|
||||
|
||||
|
||||
|
||||
|
||||
A query that selects source and destination ports is defined as follows. We used that query to compare simple running times of different tools:
|
||||
|
||||
splitter S {}
|
||||
|
||||
filter www_tcp {
|
||||
dstport = 443
|
||||
}
|
||||
|
||||
filter www_port {
|
||||
srcport = 443
|
||||
|
||||
}
|
||||
|
||||
grouper g_www_tcp {
|
||||
module g1 {
|
||||
}
|
||||
aggregate srcip, dstip, sum(bytes) as bytes, count(rec_id) as n,
|
||||
bitOR(tcp_flags) as flags, union(srcport) as srcports
|
||||
}
|
||||
|
||||
grouper g_www_port {
|
||||
module g1 {
|
||||
}
|
||||
aggregate srcip, dstip, sum(bytes) as bytes, count(rec_id) as n,
|
||||
bitOR(tcp_flags) as flags, union(dstport) as dstports
|
||||
}
|
||||
|
||||
groupfilter ggf {
|
||||
bitAND(flags, 0x13) = 0x13
|
||||
}
|
||||
|
||||
merger M {
|
||||
module m1 {
|
||||
branches B, A
|
||||
A m B delta 1440min
|
||||
}
|
||||
export m1
|
||||
}
|
||||
|
||||
ungrouper U {}
|
||||
|
||||
"./netflow-trace.h5" -> S
|
||||
S branch A -> www_tcp -> g_www_tcp -> ggf -> M
|
||||
S branch B -> www_port -> g_www_port -> ggf -> M
|
||||
M->U->"./ungroped.h5"
|
||||
|
||||
The same number of
|
||||
|
||||
/var/netflow/ft-data-fall09/sne-ft-data/2009/sneze/2009-11-29/
|
||||
|
||||
26521 records in total
|
||||
486 records match
|
||||
|
||||
root@melnikovkolya-laptop:/home/melnikovkolya/classes/semester-3-project/flowy# time python flowy.py https-flows.flw
|
||||
0.73
|
||||
Splitter initiated
|
||||
Started filtering
|
||||
486
|
||||
Filters ready
|
||||
Splitter finished
|
||||
Number of records in branch A 243 Number of records in branch B 243
|
||||
|
||||
Finished grouping branch A
|
||||
Finished group-filtering for branch A
|
||||
Finished grouping branch B
|
||||
Finished group-filtering for branch B
|
||||
Group filter threads joined
|
||||
Finished merging branches: ['B', 'A']
|
||||
Ungrouper U finished exectution
|
||||
FINISHED!
|
||||
Overall time elapsed: 6.61
|
||||
Closing remaining open files: ./flowy-run/GroupsB.h5... done ./flowy-run/GroupsA.h5... done ./netflow-trace.h5... done ./netflow-trace.h5... done ./flowy-run/GroupsB-merged.h5... done ./flowy-run/GroupsB.h5... done ./flowy-run/GroupsA-merged.h5... done ./flowy-run/MergedM.h5... done ./flowy-run/MergedM.h5... done ./ungroped.h5... done ./flowy-run/GroupsA.h5... done
|
||||
|
||||
real 0m14.245s
|
||||
user 0m7.168s
|
||||
sys 0m0.280s
|
||||
root@melnikovkolya-laptop:/home/melnikovkolya/classes/semester-3-project/flowy# time python flowy.py https-flows.flw
|
||||
0.81
|
||||
Splitter initiated
|
||||
Started filtering
|
||||
486
|
||||
Filters ready
|
||||
Splitter finished
|
||||
Number of records in branch B 243Number of records in branch A 243
|
||||
|
||||
Finished grouping branch A
|
||||
Finished group-filtering for branch A
|
||||
Finished grouping branch B
|
||||
Finished group-filtering for branch B
|
||||
Group filter threads joined
|
||||
Finished merging branches: ['B', 'A']
|
||||
Ungrouper U finished exectution
|
||||
FINISHED!
|
||||
Overall time elapsed: 6.31
|
||||
Closing remaining open files: ./netflow-trace.h5... done ./flowy-run/GroupsA.h5... done ./flowy-run/MergedM.h5... done ./flowy-run/GroupsB-merged.h5... done ./flowy-run/GroupsB.h5... done ./flowy-run/GroupsA.h5... done ./ungroped.h5... done ./netflow-trace.h5... done ./flowy-run/MergedM.h5... done ./flowy-run/GroupsA-merged.h5... done ./flowy-run/GroupsB.h5... done
|
||||
|
||||
real 0m9.051s
|
||||
user 0m7.072s
|
||||
sys 0m0.160s
|
||||
|
||||
root@melnikovkolya-laptop:/home/melnikovkolya/classes/semester-3-project/flowy# time python flowy.py https-flows.flw
|
||||
0.83
|
||||
Splitter initiated
|
||||
Started filtering
|
||||
Group filter time started: 0.23
|
||||
486
|
||||
Filters ready
|
||||
Number of records in branch A 243
|
||||
Splitter finished
|
||||
Splitter time elapsed: 6.1
|
||||
Finished grouping branch A
|
||||
Finished group-filtering for branch A
|
||||
Number of records in branch B 243
|
||||
Finished grouping branch B
|
||||
Finished group-filtering for branch B
|
||||
Group filter threads joined: 6.17
|
||||
Finished merging branches: ['B', 'A']
|
||||
Merger time elapsed: 6.23
|
||||
Ungrouper U finished exectution
|
||||
FINISHED!
|
||||
Overall time elapsed: 7.36
|
||||
Closing remaining open files: ./flowy-run/GroupsB.h5... done ./netflow-trace.h5... done ./flowy-run/GroupsB.h5... done ./flowy-run/GroupsA.h5... done ./netflow-trace.h5... done ./flowy-run/GroupsB-merged.h5... done ./flowy-run/GroupsA.h5... done ./flowy-run/MergedM.h5... done ./flowy-run/GroupsA-merged.h5... done ./ungroped.h5... done ./flowy-run/MergedM.h5... done
|
||||
|
||||
real 0m15.893s
|
||||
user 0m7.440s
|
||||
sys 0m0.868s
|
||||
|
||||
|
||||
|
||||
Most frequent:
|
||||
|
||||
(('final_result', '/home/melnikovkolya/classes/semester-3-project/flowy/filter.py', 99), (26521, 1.8366894721984863, 1.7001189999999156))
|
||||
|
||||
(('reset', '/home/melnikovkolya/classes/semester-3-project/flowy/filter.py', 83), (26521, 3.138737678527832, 3.0042079999998066))
|
||||
|
||||
(('deep_copy', '/home/melnikovkolya/classes/semester-3-project/flowy/filter.py', 58), (26523, 1.7581963539123535, 1.6681159999999338))
|
||||
|
||||
(('__iter__', '/home/melnikovkolya/classes/semester-3-project/flowy/pytables.py', 92), (26526, 3.3419792652130127, 3.0921969999998495))
|
||||
|
||||
(('__iter__', '/home/melnikovkolya/classes/semester-3-project/flowy/record.py', 130), (26526, 9.8621282577514648, 9.6565820000015421))
|
||||
|
||||
(('iterate_fixed_fields', '/home/melnikovkolya/classes/semester-3-project/flowy/pytables.py', 96), (26526, 1.9721605777740479, 1.7561189999999769))
|
||||
|
||||
(('__init__', '/home/melnikovkolya/classes/semester-3-project/flowy/record.py', 82), (27015, 4.6438140869140625, 4.6482780000005732))
|
||||
|
||||
(('mask', '/home/melnikovkolya/classes/semester-3-project/flowy/filter.py', 90), (53042, 1.6173598766326904, 1.5800989999999153))
|
||||
|
||||
(('EQ', '/home/melnikovkolya/classes/semester-3-project/flowy/operators.py', 63), (53044, 1.4263303279876709, 1.1120729999999632))
|
||||
|
||||
(('match', '/home/melnikovkolya/classes/semester-3-project/flowy/filter.py', 134), (53046, 5.1699655055999756, 4.6562810000002663))
|
||||
|
||||
|
||||
Heaviest:
|
||||
|
||||
(('__init__', '/home/melnikovkolya/classes/semester-3-project/flowy/groupfilter_validator.py', 16), (1, 0.18725490570068359, 0.18801199999999962))
|
||||
|
||||
(('get_interval_records', '/home/melnikovkolya/classes/semester-3-project/flowy/timeindex.py', 57), (1, 0.2019498348236084, 0.20001300000000199))
|
||||
|
||||
(('pass_allen_indices_down', '/home/melnikovkolya/classes/semester-3-project/flowy/merger.py', 79), (1, 0.20258498191833496, 0.20001300000000199))
|
||||
|
||||
(('go', '/home/melnikovkolya/classes/semester-3-project/flowy/merger.py', 108), (1, 0.2162168025970459, 0.21201300000000245))
|
||||
|
||||
(('__init__', '/home/melnikovkolya/classes/semester-3-project/flowy/grouper_validator.py', 11), (1, 0.22698211669921875, 0.22401300000000002))
|
||||
|
||||
(('__iter__', '/home/melnikovkolya/classes/semester-3-project/flowy/grouper.py', 36), (4, 1.1266498565673828, 1.1920739999999945))
|
||||
|
||||
(('_form_master_re', '/usr/lib/pymodules/python2.6/ply/lex.py', 482), (1, 0.30334997177124023, 0.22801499999999986))
|
||||
|
||||
(('validate_rules', '/usr/lib/pymodules/python2.6/ply/lex.py', 723), (1, 0.33556008338928223, 0.31602000000000008))
|
||||
|
||||
(('validate_all', '/usr/lib/pymodules/python2.6/ply/lex.py', 567), (1, 0.33656787872314453, 0.31602000000000008))
|
||||
|
||||
(('__init__', '/home/melnikovkolya/classes/semester-3-project/flowy/filter_validator.py', 10), (1, 0.37907099723815918, 0.3560230000000002))
|
||||
|
||||
(('go', '/home/melnikovkolya/classes/semester-3-project/flowy/groupfilter.py', 14), (2, 1.1871206760406494, 1.248076999999995))
|
||||
|
||||
(('create_impl', '/home/melnikovkolya/classes/semester-3-project/flowy/ungrouper_validator.py', 76), (1, 0.60985612869262695, 0.60803800000000052))
|
||||
|
||||
(('lex', '/usr/lib/pymodules/python2.6/ply/lex.py', 865), (1, 0.65552186965942383, 0.56003499999999995))
|
||||
|
||||
(('__init__', '/home/melnikovkolya/classes/semester-3-project/flowy/parser.py', 9), (1, 0.6572871208190918, 0.56403499999999995))
|
||||
|
||||
(('__init__', '/home/melnikovkolya/classes/semester-3-project/flowy/ungrouper_validator.py', 10), (1, 0.67348289489746094, 0.67204200000000114))
|
||||
|
||||
(('__init__', '/home/melnikovkolya/classes/semester-3-project/flowy/parser.py', 182), (1, 0.71254801750183105, 0.6200389999999999))
|
||||
|
||||
(('go', '/home/melnikovkolya/classes/semester-3-project/flowy/ungrouper.py', 29), (1, 1.85223388671875, 1.8081130000000023))
|
||||
|
||||
|
||||
|
||||
/var/netflow/ft-data-fall09/kur-ft-data/2009-11-17/
|
||||
56992 records
|
||||
|
||||
root@melnikovkolya-laptop:/home/melnikovkolya/classes/semester-3-project/flowy# python ft2hdf.py /var/netflow/ft-data-fall09/kur-ft-data/2009-11-17/ netflow-trace.h5
|
||||
root@melnikovkolya-laptop:/home/melnikovkolya/classes/semester-3-project/flowy# time python flowy.py https-flows.flw
|
||||
0.7
|
||||
Splitter initiated
|
||||
Started filtering
|
||||
Group filter time started: 0.27
|
||||
219
|
||||
Filters ready
|
||||
Splitter finished
|
||||
Splitter time elapsed: 13.2
|
||||
Number of records in branch A 158
|
||||
Finished grouping branch A
|
||||
Finished group-filtering for branch A
|
||||
Number of records in branch B 61
|
||||
Finished grouping branch B
|
||||
Finished group-filtering for branch B
|
||||
Group filter threads joined: 13.18
|
||||
Finished merging branches: ['B', 'A']
|
||||
Merger time elapsed: 13.23
|
||||
Ungrouper U finished exectution
|
||||
FINISHED!
|
||||
Overall time elapsed: 13.83
|
||||
Closing remaining open files: ./netflow-trace.h5... done ./flowy-run/GroupsB.h5... done ./flowy-run/GroupsA.h5... done ./netflow-trace.h5... done ./flowy-run/GroupsB-merged.h5... done ./flowy-run/GroupsA.h5... done ./flowy-run/GroupsB.h5... done ./flowy-run/MergedM.h5... done ./flowy-run/GroupsA-merged.h5... done ./ungroped.h5... done ./flowy-run/MergedM.h5... done
|
||||
|
||||
real 0m15.696s
|
||||
user 0m13.653s
|
||||
sys 0m1.004s
|
||||
root@melnikovkolya-laptop:/home/melnikovkolya/classes/semester-3-project/flowy# python printhdf.py ungroped.h5 | wc -l
|
||||
Closing remaining open files: ungroped.h5... done
|
||||
219
|
||||
|
||||
|
||||
|
||||
root@melnikovkolya-laptop:/home/melnikovkolya/classes/semester-3-project/flowy# python ft2hdf.py /var/netflow/ft-data-fall09/kur-ft-data/2009-11-16/ netflow-trace.h5
|
||||
|
||||
99924
|
||||
|
||||
root@melnikovkolya-laptop:/home/melnikovkolya/classes/semester-3-project/flowy# time python flowy.py https-flows.flw
|
||||
0.71
|
||||
Splitter initiated
|
||||
Started filtering
|
||||
Group filter time started: 0.27
|
||||
1434
|
||||
Filters ready
|
||||
Splitter finished
|
||||
Splitter time elapsed: 23.19
|
||||
Number of records in branch A 748
|
||||
Finished grouping branch A
|
||||
Finished group-filtering for branch A
|
||||
Number of records in branch B 686
|
||||
Finished grouping branch B
|
||||
Finished group-filtering for branch B
|
||||
Group filter threads joined: 23.23
|
||||
Finished merging branches: ['B', 'A']
|
||||
Merger time elapsed: 23.31
|
||||
Ungrouper U finished exectution
|
||||
FINISHED!
|
||||
Overall time elapsed: 26.48
|
||||
Closing remaining open files: ./netflow-trace.h5... done ./flowy-run/GroupsB.h5... done ./flowy-run/GroupsB.h5... done ./flowy-run/GroupsA.h5... done ./netflow-trace.h5... done ./flowy-run/GroupsB-merged.h5... done ./flowy-run/GroupsA.h5... done ./flowy-run/MergedM.h5... done ./flowy-run/GroupsA-merged.h5... done ./ungroped.h5... done ./flowy-run/MergedM.h5... done
|
||||
|
||||
real 0m28.767s
|
||||
user 0m24.486s
|
||||
sys 0m2.840s
|
||||
root@melnikovkolya-laptop:/home/melnikovkolya/classes/semester-3-project/flowy# python printhdf.py ungroped.h5 | wc -l
|
||||
Closing remaining open files: ungroped.h5... done
|
||||
1434
|
||||
|
||||
|
||||
|
||||
root@melnikovkolya-laptop:/home/melnikovkolya/classes/semester-3-project/flowy# python ft2hdf.py /var/netflow/ft-data-fall09/sne-ft-data/2009/sneze/2009-11-* netflow-trace.h5
|
||||
|
||||
298063
|
||||
|
||||
root@melnikovkolya-laptop:/home/melnikovkolya/classes/semester-3-project/flowy# python printhdf.py ungroped.h5 | wc -l
|
||||
Closing remaining open files: ungroped.h5... done
|
||||
4087
|
||||
|
||||
|
||||
0.76
|
||||
Splitter initiated
|
||||
Started filtering
|
||||
4087
|
||||
Filters ready
|
||||
Splitter finished
|
||||
Group filter time started: 53.73
|
||||
Splitter time elapsed: 53.73
|
||||
Number of records in branch A 2041
|
||||
Finished grouping branch A
|
||||
Finished group-filtering for branch A
|
||||
Number of records in branch B 2046
|
||||
Finished grouping branch B
|
||||
Finished group-filtering for branch B
|
||||
Group filter threads joined: 54.37
|
||||
Finished merging branches: ['B', 'A']
|
||||
Merger time elapsed: 54.47
|
||||
Ungrouper U finished exectution
|
||||
FINISHED!
|
||||
Overall time elapsed: 63.47
|
||||
Closing remaining open files: ./flowy-run/GroupsB-merged.h5... done ./netflow-trace.h5... done ./ungroped.h5... done ./flowy-run/MergedM.h5... done ./flowy-run/MergedM.h5... done ./netflow-trace.h5... done ./flowy-run/GroupsA.h5... done ./flowy-run/GroupsA.h5... done ./flowy-run/GroupsA-merged.h5... done ./flowy-run/GroupsB.h5... done ./flowy-run/GroupsB.h5... done
|
||||
|
||||
real 1m8.146s
|
||||
user 1m3.576s
|
||||
sys 0m0.776s
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
\begin{mytinylisting}
|
||||
\begin{verbatim}
|
||||
splitter S {}
|
||||
|
||||
filter www_req {
|
||||
dstport = 443 OR dstport = 80 OR dstport = 8080
|
||||
unix_secs > 1259413200
|
||||
unix_secs < 1259445600
|
||||
}
|
||||
|
||||
filter www_res {
|
||||
srcport = 443 OR srcport = 80 OR srcport = 8080
|
||||
unix_secs < 1259445600
|
||||
unix_secs > 1259413200
|
||||
}
|
||||
|
||||
grouper g_www_req {
|
||||
module g1 {
|
||||
}
|
||||
aggregate bitOR(tcp_flags) as flags
|
||||
}
|
||||
|
||||
grouper g_www_res {
|
||||
module g1 {
|
||||
}
|
||||
aggregate bitOR(tcp_flags) as flags
|
||||
}
|
||||
|
||||
groupfilter ggf {
|
||||
bitAND(flags, 0x13) = 0x13
|
||||
}
|
||||
|
||||
merger M {
|
||||
module m1 {
|
||||
branches B, A
|
||||
A d B OR B d A
|
||||
}
|
||||
export m1
|
||||
}
|
||||
|
||||
ungrouper U {}
|
||||
|
||||
"./h5ports.h5" -> S
|
||||
S branch A -> www_req -> g_www_req -> ggf -> M
|
||||
S branch B -> www_res -> g_www_res -> ggf -> M
|
||||
M->U->"./portsungroped.h5"
|
||||
\end{verbatim}
|
||||
\end{mytinylisting}
|
||||
%
|
||||
Execution of that query
|
||||
\begin{verbatim}
|
||||
flowy# time python flowy.py ports.flw
|
||||
0.83
|
||||
Splitter initiated
|
||||
Started filtering
|
||||
Group filter time started: 0.3
|
||||
1463
|
||||
Filters ready
|
||||
Splitter finished
|
||||
Splitter time elapsed: 7.12
|
||||
Number of records in branch B 1463
|
||||
Finished grouping branch B
|
||||
Finished group-filtering for branch B
|
||||
Number of records in branch A 1463
|
||||
Finished grouping branch A
|
||||
Finished group-filtering for branch A
|
||||
Group filter threads joined: 7.26
|
||||
Finished merging branches: ['B', 'A']
|
||||
Merger time elapsed: 7.26
|
||||
Ungrouper U finished exectution
|
||||
FINISHED!
|
||||
Overall time elapsed: 13.92
|
||||
|
||||
real 0m14.788s
|
||||
user 0m13.969s
|
||||
sys 0m0.900s
|
||||
\end{verbatim}
|
||||
|
||||
|
383
pytables.py
Normal file
383
pytables.py
Normal file
|
@ -0,0 +1,383 @@
|
|||
import tables
|
||||
import os.path
|
||||
from itertools import izip
|
||||
from math import ceil, floor
|
||||
|
||||
|
||||
default_fields = []
|
||||
default_types = []
|
||||
|
||||
def create_flowtools_value_reader(fields):
|
||||
def get_fields(record):
|
||||
x = tuple(getattr(record,attr) for attr in fields)
|
||||
return x
|
||||
|
||||
return get_fields
|
||||
|
||||
#class RecordsTable(object):
|
||||
# def __init__(self, file_path, tree_path, id_size):
|
||||
# if os.path.exists(file_path):
|
||||
# self.file_path = file_path
|
||||
# else:
|
||||
# raise IOError("File %s cannot be accessed."%file_path)
|
||||
# self.tree_path = tree_path
|
||||
# # open for reading
|
||||
# self.file = tables.openFile(self.file_path, mode="r+")
|
||||
# try:
|
||||
# #try to open the table as specified by path if node does not
|
||||
# #exist create it
|
||||
# self.table = self.file.getNode(self.tree_path)
|
||||
# except tables.exceptions.NoSuchNodeError:
|
||||
# raise IOError("file %s does not contain table %s"%
|
||||
# (self.file_path,self.tree_path))
|
||||
# self.fields = tuple(self.table.description._v_names)
|
||||
# self.types = tuple([self.table.description._v_dtypes[name]
|
||||
# for name in self.fields])
|
||||
# # add the id field base on row number:
|
||||
# self.fields += ('rec_id',)
|
||||
# self.types += (id_size,)
|
||||
#
|
||||
# def __del__(self):
|
||||
# self.file.close()
|
||||
#
|
||||
# def close(self):
|
||||
# self.file.close()
|
||||
|
||||
|
||||
|
||||
class Table(object):
|
||||
def __init__(self, file_path, id_size):
|
||||
if os.path.exists(file_path):
|
||||
self.file_path = file_path
|
||||
else:
|
||||
raise IOError("File %s cannot be accessed."%file_path)
|
||||
# open for reading
|
||||
self.file = tables.openFile(self.file_path, mode="r+")
|
||||
|
||||
# Returns the names of the fields that matter at the filter stage.
|
||||
# i.e. srcport/dstport/prot/srcip/...
|
||||
var_nodes = ['/' + field
|
||||
for field in self.file.root._v_attrs.variable_fields]
|
||||
self.table = self.file.getNode("/fixed_length_fields")
|
||||
# print var_nodes
|
||||
self.tables =[self.table.iterrows()] + map(self.file.getNode, var_nodes)
|
||||
# print self.tables
|
||||
self.fixed_fields = self.file.root._v_attrs.fixed_fields
|
||||
# print self.fixed_fields
|
||||
self.variable_fields = self.file.root._v_attrs.variable_fields
|
||||
# add the id field base on row number:
|
||||
self.fields = tuple(['rec_id'] +
|
||||
self.fixed_fields +
|
||||
self.variable_fields)
|
||||
self.types = tuple([id_size] +
|
||||
self.file.root._v_attrs.fixed_types +
|
||||
self.file.root._v_attrs.variable_types)
|
||||
|
||||
# print self.file.root._v_attrs.variable_fields
|
||||
|
||||
def __del__(self):
|
||||
self.file.close()
|
||||
|
||||
def close(self):
|
||||
self.file.close()
|
||||
|
||||
def flush(self):
|
||||
self.file.flush()
|
||||
|
||||
class FlowRecordsTable(Table):
|
||||
"""A reader object for an HDF table of flow records"""
|
||||
def __init__(self, file_path, expected_id_size = tables.UInt32Col()):
|
||||
Table.__init__(self, file_path, id_size = expected_id_size)
|
||||
|
||||
def __iter__(self):
|
||||
for row in izip(self.iterate_fixed_fields(), *self.tables[1:]):
|
||||
yield row[0] + tuple(row[1:])
|
||||
|
||||
def iterate_fixed_fields(self):
|
||||
for row in self.table:
|
||||
yield tuple([row.nrow] +
|
||||
[row[field] for field in self.fixed_fields])
|
||||
raise StopIteration
|
||||
|
||||
def read_row(self, row_n):
|
||||
row = [r for r in self.table.iterrows(row_n, row_n + 1)][0] #Is iterrows actually heavy itself?
|
||||
fixed = tuple([row[field] for field in self.fixed_fields])
|
||||
variable = tuple(table.read(row_n)[0] for table in self.tables[1:])
|
||||
# print (row_n,) + fixed + variable
|
||||
return (row_n,) + fixed + variable
|
||||
|
||||
def read_rows_list(self, rows_list):
|
||||
for row_n in rows_list:
|
||||
yield self.read_row(row_n)
|
||||
|
||||
def iter_ids(self, id_list):
|
||||
return self.table.readCoordinates(id_list)
|
||||
|
||||
def get_record_by_id(self,id):
|
||||
return self.table[id]
|
||||
|
||||
def __del__(self):
|
||||
self.file.close()
|
||||
|
||||
def append(self, record):
|
||||
self.row = self.table.row
|
||||
for field in self.fixed_fields:
|
||||
self.row[field] = getattr(record, field)
|
||||
self.row.append()
|
||||
for field in self.variable_fields:
|
||||
getattr(self.file.root, field).append(getattr(record, field))
|
||||
|
||||
def get_current_row(self):
|
||||
return self.row.nrow
|
||||
|
||||
@property
|
||||
def len(self):
|
||||
return self.table.nrows
|
||||
|
||||
class TimeIndex(FlowRecordsTable):
|
||||
def __init__(self, fname, id_size_bytes=4):
|
||||
FlowRecordsTable.__init__(self, fname, id_size_bytes)
|
||||
self.start_time = self.file.root._v_attrs.start_time
|
||||
self.delta = self.file.root._v_attrs.delta
|
||||
self.id_size = id_size_bytes
|
||||
self.index = self.tables[0]
|
||||
|
||||
|
||||
def get_intervals_list(self, stime, etime):
|
||||
start_interval = int(floor((stime - self.start_time) / self.delta))
|
||||
end_interval = int(ceil((etime - self.start_time) / self.delta))
|
||||
if start_interval < 1 or end_interval < 1:
|
||||
raise ValueError("Something's wrong with index intervals")
|
||||
|
||||
return xrange(start_interval, end_interval)
|
||||
|
||||
def get_intervals_before(self, record, time_before):
|
||||
res = self.get_intervals_list(record.stime - time_before, record.stime)
|
||||
return res
|
||||
|
||||
def get_intervals_after(self, record, time_after):
|
||||
res = self.get_intervals_list(record.etime, record.etime + time_after)
|
||||
return res
|
||||
|
||||
def during(self, record):
|
||||
return self.index.get_intervals_list
|
||||
|
||||
def time_to_index_row(self, time):
|
||||
return int(floor((time - self.start_time) / self.delta))
|
||||
|
||||
def index(self, record):
|
||||
for i in self.get_intervals_list(record.stime, record.etime):
|
||||
self.index[i] = self.index[i].append(record.rec_id)
|
||||
|
||||
#class FlowRecordsTable(RecordsTable):
|
||||
# """A reader object for an HDF table of flow records"""
|
||||
# def __init__(self, file_path, expected_id_size = tables.UInt32Col()):
|
||||
# RecordsTable.__init__(self, file_path, "/flow_records",
|
||||
# id_size = expected_id_size)
|
||||
#
|
||||
# def __iter__(self):
|
||||
# for row in self.table:
|
||||
# yield row[:] + (row.nrow,) # tuple concatenation
|
||||
#
|
||||
# raise StopIteration
|
||||
#
|
||||
# def iter_ids(self, id_list):
|
||||
# return self.table.readCoordinates(id_list)
|
||||
#
|
||||
# def get_record_by_id(self,id):
|
||||
# return self.table[id]
|
||||
#
|
||||
# def __del__(self):
|
||||
# self.file.close()
|
||||
#
|
||||
# def append(self,args):
|
||||
# self.row = self.table.row
|
||||
## print zip(self.fields, args)
|
||||
# for field, val in zip(self.fields, args):
|
||||
# self.row[field]= val
|
||||
# self.row.append()
|
||||
|
||||
def create_Table(file, fields, table_name, field_types, filters):
|
||||
file.createTable(file.root, table_name, field_types,
|
||||
"Records Table", filters=filters)
|
||||
|
||||
def create_VLArray(file, name, atom, description, filters):
|
||||
array = file.createVLArray(file.root, name,
|
||||
atom,
|
||||
"variable length field "+name,
|
||||
filters=filters)
|
||||
array.flavor = 'python'
|
||||
|
||||
#def create_table_file(file_path, field_types, table_name="flow_records",
|
||||
# complib='lzo', complevel=9):
|
||||
# if os.path.exists(file_path):
|
||||
# raise IOError("File %s already exists"%file_path)
|
||||
#
|
||||
# file = tables.openFile(file_path, mode="w")
|
||||
# filters = tables.Filters(complevel=complevel, complib=complib)
|
||||
# file.createTable(file.root, table_name, field_types,
|
||||
# "Records Table", filters=filters)
|
||||
# file.close()
|
||||
|
||||
def create_index_file(file_path, start_time, delta, id_size_bytes,
|
||||
complib='lzo', complevel=9, itemsize_in_bytes = 4):
|
||||
if os.path.exists(file_path):
|
||||
raise IOError("File %s already exists"%file_path)
|
||||
|
||||
file = tables.openFile(file_path, mode="w")
|
||||
filters = tables.Filters(complevel=complevel, complib=complib)
|
||||
array = create_VLArray(file.root, 'time_index',
|
||||
tables.UIntAtom(itemsize=itemsize_in_bytes),
|
||||
"time_index", filters=filters)
|
||||
array.flavor = 'python'
|
||||
file.root._v_attrs.variable_fields = ['time_index']
|
||||
file.root._v_attrs.variable_types = [
|
||||
tables.UIntAtom(itemsize=itemsize_in_bytes)]
|
||||
file.root._v_attrs.start_time = start_time
|
||||
file.root._v_attrs.delta = delta
|
||||
file.close()
|
||||
|
||||
|
||||
def create_table_file(file_path, field_types,
|
||||
complib='lzo', complevel=9):
|
||||
if os.path.exists(file_path):
|
||||
raise IOError("File %s already exists"%file_path)
|
||||
|
||||
file = tables.openFile(file_path, mode="w")
|
||||
filters = tables.Filters(complevel=complevel, complib=complib)
|
||||
# filters = tables.Filters()
|
||||
if 'rec_id' in field_types:
|
||||
del field_types['rec_id']
|
||||
fixed_length_fields = {}
|
||||
variable_length_fields = {}
|
||||
for k, v in field_types.iteritems():
|
||||
# print str(type(v)), str(type(v)).find('atom')
|
||||
if str(type(v)).find('atom') == -1:
|
||||
fixed_length_fields[k] = v
|
||||
else:
|
||||
variable_length_fields[k] = v
|
||||
|
||||
file.createTable(file.root, "fixed_length_fields", fixed_length_fields,
|
||||
"Records Table", filters=filters)
|
||||
|
||||
for field_name, atom in variable_length_fields.iteritems():
|
||||
array = file.createVLArray(file.root, field_name, atom, "field "
|
||||
+ field_name, filters)
|
||||
array.flavor = 'python'
|
||||
file.root._v_attrs.fixed_fields = fixed_length_fields.keys()
|
||||
file.root._v_attrs.fixed_types = fixed_length_fields.values()
|
||||
|
||||
file.root._v_attrs.variable_fields = variable_length_fields.keys()
|
||||
file.root._v_attrs.variable_types = variable_length_fields.values()
|
||||
|
||||
file.close()
|
||||
|
||||
class GroupsMembersTable(object):
|
||||
def __init__(self, file_path, tree_path):
|
||||
self.file_path = file_path
|
||||
self.tree_path = tree_path
|
||||
# open for reading
|
||||
self.file = tables.openFile(self.file_path, mode="r+")
|
||||
try:
|
||||
#try to open the table as specified by path if node does not
|
||||
#exist create it
|
||||
self.table = self.file.getNode(self.tree_path)
|
||||
except tables.exceptions.NoSuchNodeError:
|
||||
raise IOError("file %s does not contain table %s"%
|
||||
(self.file_path,self.tree_path))
|
||||
|
||||
def __iter__(self):
|
||||
for row in self.table:
|
||||
yield row
|
||||
|
||||
raise StopIteration
|
||||
|
||||
def iter_ids(self, id_list):
|
||||
for id in id_list:
|
||||
yield self.table[id]
|
||||
|
||||
def get_group_by_id(self):
|
||||
return self.table[id]
|
||||
|
||||
def __del__(self):
|
||||
# self.table.flush()
|
||||
self.file.close()
|
||||
|
||||
def append(self, val_list):
|
||||
self.table.append(val_list)
|
||||
|
||||
# Performs ungrouping, based on the iterator of group records and an
|
||||
# iterator over flow records
|
||||
class GroupsExpander(object):
|
||||
def __init__(self, groups_file_path, records_file_path):
|
||||
self.groups = GroupsMembersTable(groups_file_path, "gr1")
|
||||
self.records = FlowRecordsTable(self.records_file_path)
|
||||
|
||||
|
||||
def group_members(self,group_id):
|
||||
grp_member_ids = self.groups.get_group_by_id(group_id)
|
||||
return self.record.iter_ids(grp_member_ids)
|
||||
|
||||
|
||||
default_ft_types = {
|
||||
'dFlows' : tables.UInt32Col(), 'bytes' : tables.UInt32Col(),
|
||||
'dPkts' : tables.UInt32Col(), 'dst_as' : tables.UInt16Col(),
|
||||
'dst_mask' : tables.UInt8Col(), 'dst_tag' : tables.UInt32Col(),
|
||||
'dstip' : tables.UInt32Col(), 'dstport' : tables.UInt16Col(),
|
||||
'engine_id' : tables.UInt8Col(), 'engine_type' : tables.UInt8Col(),
|
||||
'exaddr' : tables.UInt32Col(), 'extra_pkts' : tables.UInt32Col(),
|
||||
'stime' : tables.UInt32Col(), 'in_encaps' : tables.UInt8Col(),
|
||||
'input' : tables.UInt16Col(), 'etime' : tables.UInt32Col(),
|
||||
'marked_tos' : tables.UInt8Col(), 'nexthop' : tables.UInt32Col(),
|
||||
'out_encaps' : tables.UInt8Col(), 'output' : tables.UInt16Col(),
|
||||
'peer_nexthop' : tables.UInt32Col(), 'prot' : tables.UInt8Col(),
|
||||
'router_sc' : tables.UInt32Col(), 'src_as' : tables.UInt16Col(),
|
||||
'src_mask' : tables.UInt8Col(), 'src_tag' : tables.UInt32Col(),
|
||||
'srcip' : tables.UInt32Col(), 'srcport' : tables.UInt16Col(),
|
||||
'sysUpTime' : tables.UInt32Col(), 'tcp_flags' : tables.UInt8Col(),
|
||||
'tos' : tables.UInt8Col(), 'unix_nsecs' : tables.UInt32Col(),
|
||||
'unix_secs' : tables.UInt32Col()
|
||||
}
|
||||
#tab = FlowRecordsTable("../dynZip9.h5")
|
||||
|
||||
#for x in tab:
|
||||
# print x
|
||||
|
||||
#print tab.fields
|
||||
|
||||
#wr = TableWriter("../test.h5","/dumps/table1")
|
||||
|
||||
#create_group_file("../grptest.h5", "gr1")
|
||||
#grp = GroupsMembersTable("../grptest.h5", "/gr1")
|
||||
#grp.append([1,3,5])
|
||||
#grp.append([2,4])
|
||||
#grp.append([4324904231490123,98])
|
||||
#
|
||||
#for ls in grp.iter_ids([1,2]):
|
||||
# print ls
|
||||
|
||||
#grp.__del__()
|
||||
#print [1,4,543,32]
|
||||
|
||||
#from os import remove
|
||||
#fname = "../comp.h5"
|
||||
#remove(fname)
|
||||
#field_types = {'info': tables.UInt8Col(),
|
||||
# 'records': tables.UInt8Atom(), 'info_list': tables.UInt8Atom()}
|
||||
#create_complex_file(fname, field_types)
|
||||
##h5f = tables.openFile(fname, 'r')
|
||||
##print h5f.root._v_attrs.fixed_fields
|
||||
##print h5f.root._v_attrs.fixed_types
|
||||
##print h5f.root._v_attrs.variable_fields
|
||||
##print h5f.root._v_attrs.variable_types
|
||||
#
|
||||
#cread = FlRecordsTable(fname)
|
||||
#
|
||||
#cread.append((999,[1,3],[1]))
|
||||
#cread.append((2,[1,4],[2,4,999]))
|
||||
#cread.close()
|
||||
#
|
||||
#read = FlRecordsTable(fname)
|
||||
#for r in read:
|
||||
# print r
|
||||
|
BIN
pytables.pyc
Normal file
BIN
pytables.pyc
Normal file
Binary file not shown.
165
record.py
Normal file
165
record.py
Normal file
|
@ -0,0 +1,165 @@
|
|||
"""
|
||||
This module provides methods for dynamically creating flow and
|
||||
group record classes.
|
||||
"""
|
||||
|
||||
def get_record_class(attributes, types=None, default_vals=None):
|
||||
'''
|
||||
Creates a record class for given attribute names.
|
||||
|
||||
Arguments:
|
||||
attributes - a sequence of attribute names
|
||||
types - optional sequence of attribute types, which
|
||||
correspond to the attribute names in attributes.
|
||||
Types may be of any type, and are not used by the
|
||||
Record class, but are useful for external storage,
|
||||
where data type has to be predetermined.
|
||||
default_val - a sequence of default values which
|
||||
correspond to the attribute names in attributes
|
||||
|
||||
Lists are used instead of dictionaries because the order
|
||||
may be important.
|
||||
|
||||
Return:
|
||||
Record class which has attributes with the names given
|
||||
by attributes list. The class uses __slots__ to lower
|
||||
memory usage as potentially millions of instance will
|
||||
be present during runtime. The class has a constructor,
|
||||
which takes as argument values for the attributes ordered
|
||||
the same way as in the attributes list. If default values
|
||||
are specified there is a default(no argument) constructor
|
||||
as well.
|
||||
NOTE that this method returns a class not an instance.
|
||||
|
||||
Raises:
|
||||
ValueError if number of types or default values doesn't
|
||||
match number of attributes.
|
||||
'''
|
||||
if default_vals and len(attributes) != len(default_vals):
|
||||
raise ValueError(
|
||||
"Number of attributes(%d) and number of default values(%d)"%
|
||||
(len(attributes),len(default_vals))+" don't match")
|
||||
if types and len(attributes) != len(types):
|
||||
raise ValueError(
|
||||
"Number of attributes(%d) and number of default types(%d)"%
|
||||
(len(attributes),len(default_vals))+" don't match")
|
||||
elif types:
|
||||
types_dict = dict(zip(attributes, types))
|
||||
else:
|
||||
types_dict = {}
|
||||
class Record(object):
|
||||
'''
|
||||
Record class contains flow or group record information.
|
||||
|
||||
It uses __slots__ to save memory because potentially millions of
|
||||
FlowRecords will be used during run time.
|
||||
Attributes:
|
||||
attribute names are specified in cls.__slots__
|
||||
defaults - contains the default values for attributes used
|
||||
with default constructor.
|
||||
attr_types - contains a dictionary of the types of
|
||||
the attributes.
|
||||
|
||||
Methods:
|
||||
__init__ - when defaults is specified __init__()
|
||||
creates an object with default values. If no
|
||||
defaults are specified during class creation
|
||||
__init__() raises TypeError.
|
||||
__init__(*args) takes exactly the same number
|
||||
of arguments as the classes' number of attributes,
|
||||
and creates new instance with the given values.
|
||||
Argument order corresponds to the order of
|
||||
attributes in cls.__slots__
|
||||
|
||||
'''
|
||||
# set slots to conserve memory
|
||||
# copy ([:]) don't reference to protect from unexpected changes
|
||||
__slots__ = attributes[:]
|
||||
attr_types = types_dict
|
||||
num_of_fields = len(__slots__)
|
||||
defaults = default_vals[:] if default_vals else None
|
||||
|
||||
def __init__(self, *args):
|
||||
num_args = len(args)
|
||||
if num_args == self.num_of_fields:
|
||||
for name, value in zip(self.__slots__,args):
|
||||
setattr(self, name, value)
|
||||
elif num_args == 0 and self.defaults != None:
|
||||
for name, value in zip(self.__slots__,self.defaults):
|
||||
setattr(self, name, value)
|
||||
elif self.defaults == None:
|
||||
raise TypeError(
|
||||
"__init__() takes %d arguments (%d given)"%
|
||||
( self.num_of_fields + 1, num_args+1))
|
||||
else:
|
||||
raise TypeError(
|
||||
"__init__() takes either 1 or %d arguments (%d given)"%
|
||||
( self.num_of_fields + 1, num_args+1))
|
||||
|
||||
def tuple(self):
|
||||
return tuple(getattr(self, field) for field in self.slots)
|
||||
|
||||
def __repr__(self):
|
||||
res = "Recod("
|
||||
for field in self.__slots__:
|
||||
val = getattr(self, field)
|
||||
if type(val) is str:
|
||||
val = "'" + str(val) + "'"
|
||||
else:
|
||||
val = str(val)
|
||||
res += val + ", "
|
||||
res =res[:-2] + ")"
|
||||
return res
|
||||
|
||||
def __str__(self):
|
||||
res = "Recod: "
|
||||
for field in self.__slots__:
|
||||
val = getattr(self, field)
|
||||
res += field + "->" + str(val) + ", "
|
||||
res =res[:-2]
|
||||
return res
|
||||
return Record
|
||||
|
||||
|
||||
class RecordReader(object):
|
||||
def __init__(self, reader_object):
|
||||
self.reader = reader_object
|
||||
#print self.reader.fields
|
||||
self.Record = get_record_class(self.reader.fields)
|
||||
|
||||
def __iter__(self):
|
||||
for tuple in self.reader:
|
||||
yield self.Record(*tuple)
|
||||
|
||||
def read_rows_list(self, rows_list):
|
||||
for tuple in self.reader.read_rows_list(rows_list):
|
||||
yield self.Record(*tuple)
|
||||
|
||||
def read_row(self, row_n):
|
||||
tup = self.reader.read_row(row_n)
|
||||
return self.Record(*tup)
|
||||
|
||||
#from flowy import pytables
|
||||
#ptread = pytables.FlowRecordsTable("../testFT.h5" )
|
||||
#rr = RecordReader(ptread)
|
||||
#for i in rr:
|
||||
# print i.dOctets
|
||||
|
||||
#
|
||||
#
|
||||
#FlowRecord = get_record_class(["a","b"],["str","uint"],[1,6])
|
||||
#
|
||||
#def printSth(self):
|
||||
# print "sth"
|
||||
#
|
||||
#FlowRecord.p = printSth
|
||||
#
|
||||
#x = FlowRecord(1,6)
|
||||
#
|
||||
#
|
||||
#print x.a, x.b
|
||||
#print x.__slots__
|
||||
#
|
||||
#t = FlowRecord()
|
||||
#print t.a
|
||||
#t.p()
|
BIN
record.pyc
Normal file
BIN
record.pyc
Normal file
Binary file not shown.
1387
run-output.txt
Normal file
1387
run-output.txt
Normal file
File diff suppressed because it is too large
Load diff
25
run-output2.txt
Normal file
25
run-output2.txt
Normal file
|
@ -0,0 +1,25 @@
|
|||
0.82
|
||||
[Input('./netflow-trace.h5', 50, set([]), set([]), set([])), BranchNode('S', 50, set([]), set([]))]
|
||||
|
||||
[BranchNode('S', 51, set([]), set([])), Branch('A', 51, None, set([]), set([])), BranchNode('www_req', 51, set([]), set([])), BranchNode('g_www_req', 51, set([]), set([])), BranchNode('ggf', 51, set([]), set([])), BranchNode('M', 51, set([]), set([]))]
|
||||
|
||||
[BranchNode('S', 52, set([]), set([])), Branch('B', 52, None, set([]), set([])), BranchNode('www_res', 52, set([]), set([])), BranchNode('g_www_res', 52, set([]), set([])), BranchNode('ggf', 52, set([]), set([])), BranchNode('M', 52, set([]), set([]))]
|
||||
|
||||
[BranchNode('M', 53, set([]), set([])), BranchNode('U', 53, set([]), set([])), Output('./ungroped.h5', 53, set([]), set([]), set([]))]
|
||||
|
||||
Splitter initiated
|
||||
Parsing and validation finished: 0.31
|
||||
Started filtering
|
||||
Finished filtering
|
||||
Filters ready
|
||||
Splitter time elapsed: 346.66
|
||||
Finished grouping branch B
|
||||
Finished grouping branch A
|
||||
Finished filtering groups for branch B
|
||||
Finished filtering groups for branch A
|
||||
Group filter time elapsed: 916.19
|
||||
Finished merging branches: ['B', 'A']
|
||||
Merger time elapsed: 1037.532704
|
||||
Ungrouper U finished exectution
|
||||
FINISHED!
|
||||
Overall time elapsed: 1073.552704
|
64
splitter.py
Normal file
64
splitter.py
Normal file
|
@ -0,0 +1,64 @@
|
|||
from Queue import Queue
|
||||
from Queue import Empty
|
||||
import profiler
|
||||
|
||||
class Splitter(object):
|
||||
def __init__(self, name_to_br, filter):
|
||||
self.branches = name_to_br.values() # Returns the actual implementaion of Branches A and B, their values
|
||||
self.name_to_branch = name_to_br
|
||||
self.filter = filter
|
||||
print "Splitter initiated"
|
||||
|
||||
def go(self):
|
||||
count = 0
|
||||
|
||||
|
||||
# Exactly rec and branch are returned, since that is specified
|
||||
# by the 'generator' function, denoted by 'yield' inside the
|
||||
# __iter__ function. Every time an __iter__ is called, one tuple
|
||||
# of (rec, branch) is returned
|
||||
for rec, branch in self.filter:
|
||||
self.split(branch, rec)
|
||||
count = count + 1
|
||||
|
||||
|
||||
|
||||
print count
|
||||
self.ready()
|
||||
|
||||
|
||||
|
||||
def split(self, branch_mask, record):
|
||||
# print zip(self.branches, branch_mask)
|
||||
for branch, active in zip(self.branches, branch_mask):
|
||||
# print active, branch
|
||||
if active:
|
||||
branch.put(record)
|
||||
# if branch.name == 'A': print record
|
||||
# if branch.name == 'B': print record
|
||||
# print branch
|
||||
|
||||
def ready(self):
|
||||
print "Filters ready"
|
||||
for br in self.branches:
|
||||
br.ready = True
|
||||
|
||||
|
||||
|
||||
class Branch(Queue):
|
||||
def __init__(self, name):
|
||||
Queue.__init__(self, 0)
|
||||
self.name = name
|
||||
self.ready = False
|
||||
|
||||
def __iter__(self):
|
||||
while(True):
|
||||
if self.empty() and self.ready:
|
||||
raise StopIteration
|
||||
try:
|
||||
record = self.get(timeout=3)
|
||||
yield record
|
||||
self.task_done()
|
||||
except Empty:
|
||||
if self.ready:
|
||||
raise StopIteration
|
BIN
splitter.pyc
Normal file
BIN
splitter.pyc
Normal file
Binary file not shown.
25
splitter_validator.py
Normal file
25
splitter_validator.py
Normal file
|
@ -0,0 +1,25 @@
|
|||
from copy import copy, deepcopy
|
||||
from splitter import Branch as BranchImpl
|
||||
from splitter import Splitter as SplitterImpl
|
||||
|
||||
class SplitterValidator(object):
|
||||
def __init__(self, parser, filter_validator):
|
||||
self.splitter = copy(parser.splitter)
|
||||
self.branches = deepcopy(parser.branches)
|
||||
self.branch_ids = filter_validator.branches_ids
|
||||
self.filter_impl = filter_validator.impl
|
||||
self.br_name_to_br = {}
|
||||
self.impl = self.create_impl()
|
||||
|
||||
def sort_branches(self):
|
||||
id_to_branch = dict(zip(self.branch_ids.values(),
|
||||
self.branch_ids.keys()))
|
||||
sorted_br = [id_to_branch[k] for k in sorted(id_to_branch.keys())]
|
||||
return sorted_br
|
||||
|
||||
def create_impl(self):
|
||||
br_names = self.sort_branches()
|
||||
branches = [BranchImpl(name) for name in br_names]# Actual branch instances are being initiated
|
||||
name_to_br = dict(zip(br_names, branches))# These instances are being mapped to the corresponding names, i.e. A, B
|
||||
self.br_name_to_br = name_to_br
|
||||
return SplitterImpl(name_to_br, self.filter_impl)
|
BIN
splitter_validator.pyc
Normal file
BIN
splitter_validator.pyc
Normal file
Binary file not shown.
181
statement.py
Normal file
181
statement.py
Normal file
|
@ -0,0 +1,181 @@
|
|||
class Splitter(object):
|
||||
def __init__(self, name, line, branches=None):
|
||||
self.name = name
|
||||
self.line = line
|
||||
self.branches = branches if branches else set()
|
||||
|
||||
def __repr__(self):
|
||||
str = "Splitter('%s', %s, %s)"%(self.name, self.line, self.branches)
|
||||
return str
|
||||
|
||||
class Ungrouper(object):
|
||||
def __init__(self, name, line, branches=None):
|
||||
self.name = name
|
||||
self.line = line
|
||||
self.branches = branches if branches else set()
|
||||
|
||||
def __repr__(self):
|
||||
str = "Ungrouper('%s', %s, %s)"%(self.name, self.line, self.branches)
|
||||
return str
|
||||
|
||||
class Input(object):
|
||||
def __init__(self, name, line, inputs=None, outputs=None, branches=None):
|
||||
self.name = name
|
||||
self.line = line
|
||||
self.branches = branches if branches else set()
|
||||
self.inputs = inputs if inputs != None else set()
|
||||
self.outputs = outputs if outputs != None else set()
|
||||
|
||||
def __repr__(self):
|
||||
str = "Input('%s', %s, %s, %s, %s)"%(self.name, self.line,
|
||||
self.branches, self.inputs,
|
||||
self.outputs)
|
||||
return str
|
||||
|
||||
class Output(object):
|
||||
def __init__(self, name, line, inputs=None, outputs=None, branches=None):
|
||||
self.name = name
|
||||
self.line = line
|
||||
self.branches = branches if branches else set()
|
||||
self.inputs = inputs if inputs != None else set()
|
||||
self.outputs = outputs if outputs != None else set()
|
||||
|
||||
def __repr__(self):
|
||||
str = "Output('%s', %s, %s, %s, %s)"%(self.name, self.line,
|
||||
self.branches, self.inputs,
|
||||
self.outputs)
|
||||
return str
|
||||
|
||||
class Branch(object):
|
||||
def __init__(self, name, line, members=None, inputs=None, outputs=None):
|
||||
self.name = name
|
||||
self.members = members
|
||||
self.line = line
|
||||
self.inputs = members if members != None else set()
|
||||
self.inputs = inputs if inputs != None else set()
|
||||
self.outputs = outputs if outputs != None else set()
|
||||
|
||||
def __repr__(self):
|
||||
str = "Branch('%s', %s, %s, %s, %s)"%(self.name, self.line,
|
||||
self.members, self.inputs,
|
||||
self.outputs)
|
||||
return str
|
||||
|
||||
class BranchNode(object):
|
||||
def __init__(self, name, line, inputs=None, outputs=None):
|
||||
self.name = name
|
||||
self.line = line
|
||||
self.inputs = inputs if inputs != None else set()
|
||||
self.outputs = outputs if outputs != None else set()
|
||||
|
||||
def __repr__(self):
|
||||
str = "BranchNode('%s', %s, %s, %s)"%(self.name, self.line ,
|
||||
self.inputs, self.outputs)
|
||||
return str
|
||||
|
||||
class Rule(object):
|
||||
def __init__(self, op, line, args, NOT=False):
|
||||
self.op = op
|
||||
self.args = args
|
||||
self.NOT = False
|
||||
self.line = line
|
||||
|
||||
def __repr__(self):
|
||||
str = "Rule('%s', %s, %s, %s)"%(self.op, self.line,
|
||||
self.args, self.NOT)
|
||||
return str
|
||||
|
||||
def __str__(self):
|
||||
return "%s%s" % (self.op, self.args)
|
||||
|
||||
def __eq__(self, other):
|
||||
return str(self)== str(other)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self))
|
||||
|
||||
class AllenRule(Rule):
|
||||
def __repr__(self):
|
||||
str = "AllenRule('%s', %s, %s, %s)"%(self.op, self.line,
|
||||
self.args, self.NOT)
|
||||
return str
|
||||
|
||||
class Field(object):
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
def __repr__(self):
|
||||
return "Field('%s')"%self.name
|
||||
|
||||
class GrouperRule(object):
|
||||
def __init__(self, op, line, args):
|
||||
self.line = line
|
||||
self.args = args
|
||||
self.op = op
|
||||
|
||||
def __repr__(self):
|
||||
str = "GrouperRule('%s', %s, %s)"%(self.op, self.line, self.args)
|
||||
return str
|
||||
|
||||
class Filter(object):
|
||||
def __init__(self, name, line, rules, branches=None):
|
||||
self.name = name
|
||||
self.rules = rules
|
||||
self.line = line
|
||||
self.branches = branches if branches else set()
|
||||
|
||||
def __repr__(self):
|
||||
str = "Filter('%s', %s, %s, %s)"%(self.name, self.line, self.rules,
|
||||
self.branches)
|
||||
return str
|
||||
|
||||
class Module(Filter):
|
||||
def __repr__(self):
|
||||
str = "Module('%s', %s, %s, %s)"%(self.name, self.line,
|
||||
self.rules, self.branches)
|
||||
return str
|
||||
|
||||
class Grouper(object):
|
||||
def __init__(self, name, line, modules, aggr, branches=None):
|
||||
self.name = name
|
||||
self.aggr = aggr
|
||||
self.modules = modules
|
||||
self.line = line
|
||||
self.branches = branches if branches else set()
|
||||
|
||||
def __repr__(self):
|
||||
str = "Grouper('%s', %s, %s, %s, %s)"%(self.name, self.line,
|
||||
self.modules, self.aggr, self.branches)
|
||||
return str
|
||||
|
||||
class Merger(object):
|
||||
def __init__(self, name, line, modules, export, branches=None):
|
||||
self.name = name
|
||||
self.export = export
|
||||
self.modules = modules
|
||||
self.line = line
|
||||
self.branches = branches if branches else set()
|
||||
|
||||
def __repr__(self):
|
||||
str = "Merger('%s', %s, %s, %s, %s)"%(self.name, self.line,
|
||||
self.modules, self.export, self.branches)
|
||||
return str
|
||||
|
||||
class FilterRef(object):
|
||||
def __init__(self, name, line, NOT=False):
|
||||
self.name = name
|
||||
self.NOT = NOT
|
||||
self.line = line
|
||||
|
||||
def __repr__(self):
|
||||
str = "FilterRef('%s', %s, %s)"%(self.name, self.line, self.NOT)
|
||||
return str
|
||||
|
||||
class Arg(object):
|
||||
def __init__(self, type, value, str=''):
|
||||
self.type = type
|
||||
self.value = value
|
||||
self.str = str
|
||||
|
||||
def __repr__(self):
|
||||
str = "Arg('%s', %s, '%s')"%(self.type, repr(self.value), self.str)
|
||||
return str
|
BIN
statement.pyc
Normal file
BIN
statement.pyc
Normal file
Binary file not shown.
13
stats_print.py
Normal file
13
stats_print.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
import pickle
|
||||
f = open('./profile_stats1')
|
||||
|
||||
stats = pickle.load(f)
|
||||
|
||||
#for st in sorted(filter(lambda a: a[1][0]>1 and a[1][1]>10, stats), key=lambda a: a[1][1]/a[1][0], reverse=True):
|
||||
# print st[0], st[1][1]/st[1][0], st[1][1], st[1][0]
|
||||
|
||||
for st in sorted(stats, key=lambda a: a[1][1], reverse=True):
|
||||
print st
|
||||
#for st in sorted(stats, key=lambda a: a[1][0], reverse=True):
|
||||
# if st[0][1].find('flowy/src/flowy') != -1:
|
||||
# print (st[0][1].partition('flowy/src/flowy/'))[2], st[0][0], st[0][2], st[1][0], st[1][1]
|
62
timeindex.py
Normal file
62
timeindex.py
Normal file
|
@ -0,0 +1,62 @@
|
|||
#from pytables import FlowRecordsTable
|
||||
#from pytables import create_table_file
|
||||
#from tables import UIntAtom
|
||||
from math import floor
|
||||
|
||||
#class TimeIndex(object):
|
||||
# def __init__(self, start_time, delta, id_size_bytes=4):
|
||||
# self.start_time = start_time
|
||||
# self.delta = delta
|
||||
# self.id_size = id_size_bytes
|
||||
# self.index = self.get_index_file()
|
||||
#
|
||||
# def get_index_file(self):
|
||||
# if self.index: return self.index
|
||||
# create_table_file(self.file, {'t': UIntAtom(self.id_size)})
|
||||
#
|
||||
# self.index = FlowRecordsTable(self.file)
|
||||
# self.index.
|
||||
# return self.index
|
||||
|
||||
class TimeIndex(object):
|
||||
def __init__(self, interval=1000, maxsize=10**5):
|
||||
self.interval = float(interval)
|
||||
self.index = {}
|
||||
self.maxsize = maxsize
|
||||
self.mintime = float('inf') # later replaced with int
|
||||
self.maxtime = float('-inf') # later replaced with int
|
||||
|
||||
@property
|
||||
def len(self):
|
||||
return len(self.index)
|
||||
|
||||
def get_interval(self, stime, etime):
|
||||
start = int(floor(stime/self.interval))
|
||||
end = int(floor(etime/self.interval) + 1)
|
||||
return xrange(start, end)
|
||||
|
||||
def update_min_max_time(self, record):
|
||||
if self.mintime > record.stime:
|
||||
self.mintime = record.stime
|
||||
if self.maxtime < record.etime:
|
||||
self.maxtime = record.etime
|
||||
|
||||
def get_total_interval(self):
|
||||
return self.get_interval(self.mintime, self.maxtime)
|
||||
|
||||
|
||||
def add(self, record):
|
||||
interval = self.get_interval(record.stime, record.etime)
|
||||
for i in interval:
|
||||
self.index.setdefault(i, set()).add(record.rec_id)
|
||||
|
||||
self.update_min_max_time(record)
|
||||
if self.len > self.maxsize:
|
||||
print "Warning large index"
|
||||
|
||||
def get_interval_records(self, stime, etime):
|
||||
res = set()
|
||||
for i in self.get_interval(stime, etime):
|
||||
res |= self.index.setdefault(i, set()) # set union
|
||||
|
||||
return sorted(res)
|
BIN
timeindex.pyc
Normal file
BIN
timeindex.pyc
Normal file
Binary file not shown.
BIN
traces/2009-10-24/ft-v05.2009-10-24.000001+0200
Normal file
BIN
traces/2009-10-24/ft-v05.2009-10-24.000001+0200
Normal file
Binary file not shown.
BIN
traces/2009-10-24/ft-v05.2009-10-24.000501+0200
Normal file
BIN
traces/2009-10-24/ft-v05.2009-10-24.000501+0200
Normal file
Binary file not shown.
BIN
traces/2009-10-24/ft-v05.2009-10-24.001001+0200
Normal file
BIN
traces/2009-10-24/ft-v05.2009-10-24.001001+0200
Normal file
Binary file not shown.
BIN
traces/2009-10-24/ft-v05.2009-10-24.001501+0200
Normal file
BIN
traces/2009-10-24/ft-v05.2009-10-24.001501+0200
Normal file
Binary file not shown.
BIN
traces/2009-10-24/ft-v05.2009-10-24.002001+0200
Normal file
BIN
traces/2009-10-24/ft-v05.2009-10-24.002001+0200
Normal file
Binary file not shown.
BIN
traces/2009-10-24/ft-v05.2009-10-24.002501+0200
Normal file
BIN
traces/2009-10-24/ft-v05.2009-10-24.002501+0200
Normal file
Binary file not shown.
BIN
traces/2009-10-24/ft-v05.2009-10-24.003001+0200
Normal file
BIN
traces/2009-10-24/ft-v05.2009-10-24.003001+0200
Normal file
Binary file not shown.
BIN
traces/2009-10-24/ft-v05.2009-10-24.003501+0200
Normal file
BIN
traces/2009-10-24/ft-v05.2009-10-24.003501+0200
Normal file
Binary file not shown.
BIN
traces/2009-10-24/ft-v05.2009-10-24.004001+0200
Normal file
BIN
traces/2009-10-24/ft-v05.2009-10-24.004001+0200
Normal file
Binary file not shown.
BIN
traces/2009-10-24/ft-v05.2009-10-24.004501+0200
Normal file
BIN
traces/2009-10-24/ft-v05.2009-10-24.004501+0200
Normal file
Binary file not shown.
BIN
traces/2009-10-24/ft-v05.2009-10-24.005001+0200
Normal file
BIN
traces/2009-10-24/ft-v05.2009-10-24.005001+0200
Normal file
Binary file not shown.
BIN
traces/2009-10-24/ft-v05.2009-10-24.005501+0200
Normal file
BIN
traces/2009-10-24/ft-v05.2009-10-24.005501+0200
Normal file
Binary file not shown.
BIN
traces/2009-10-24/ft-v05.2009-10-24.010001+0200
Normal file
BIN
traces/2009-10-24/ft-v05.2009-10-24.010001+0200
Normal file
Binary file not shown.
BIN
traces/2009-10-24/ft-v05.2009-10-24.010501+0200
Normal file
BIN
traces/2009-10-24/ft-v05.2009-10-24.010501+0200
Normal file
Binary file not shown.
BIN
traces/2009-10-24/ft-v05.2009-10-24.011001+0200
Normal file
BIN
traces/2009-10-24/ft-v05.2009-10-24.011001+0200
Normal file
Binary file not shown.
BIN
traces/2009-10-24/ft-v05.2009-10-24.011501+0200
Normal file
BIN
traces/2009-10-24/ft-v05.2009-10-24.011501+0200
Normal file
Binary file not shown.
BIN
traces/2009-10-24/ft-v05.2009-10-24.012001+0200
Normal file
BIN
traces/2009-10-24/ft-v05.2009-10-24.012001+0200
Normal file
Binary file not shown.
BIN
traces/2009-10-24/ft-v05.2009-10-24.012501+0200
Normal file
BIN
traces/2009-10-24/ft-v05.2009-10-24.012501+0200
Normal file
Binary file not shown.
BIN
traces/2009-10-24/ft-v05.2009-10-24.013001+0200
Normal file
BIN
traces/2009-10-24/ft-v05.2009-10-24.013001+0200
Normal file
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue