179 lines
7.2 KiB
Python
179 lines
7.2 KiB
Python
from validator_common import *
|
|
from copy import deepcopy
|
|
from tables import UIntAtom, UIntCol
|
|
from grouper import GrouperModule as GrouperModuleImpl
|
|
from grouper import Grouper as GrouperImpl
|
|
from grouper import GrouperRule as GrouperRuleImpl
|
|
from grouper import AggrOp as AggrOpImpl
|
|
import profiler
|
|
|
|
class GrouperValidator(object):
|
|
def __init__(self, parser, splitter_validator):
|
|
self.parser = parser
|
|
self.fields_types = get_input_fields_types(
|
|
get_input_reader(self.parser))
|
|
self.groupers = deepcopy(parser.groupers)
|
|
# print splitter_validator.br_name_to_br
|
|
self.br_name_to_br = splitter_validator.br_name_to_br
|
|
self.br_name_to_grouper = {}
|
|
self.impl = self.create_impl()
|
|
' '
|
|
|
|
def validate(self):
|
|
self.check_field_refs()
|
|
self.check_duplicate_grouper_names()
|
|
for grouper in self.groupers:
|
|
self.check_duplicate_module_names(grouper)
|
|
|
|
for module in grouper.modules:
|
|
# Both of these come from the validator_common.py
|
|
# module in this case is/are the modules present in
|
|
# each instance of the grouper
|
|
#print module
|
|
replace_bound_rules(module)
|
|
replace_with_vals(module)
|
|
|
|
def check_duplicate_grouper_names(self):
|
|
duplicates = {}
|
|
for grouper in self.groupers:
|
|
old_val = duplicates.setdefault(grouper.name, 0)
|
|
duplicates[grouper.name] = old_val + 1
|
|
|
|
duplicate_names = [k for k,v in duplicates.iteritems() if v > 1]
|
|
if len(duplicate_names) > 0:
|
|
msg = "Grouper(s) %s"%duplicate_names
|
|
msg += " is/are all defined more than once."
|
|
raise SyntaxError(msg)
|
|
|
|
def check_duplicate_module_names(self, grouper):
|
|
duplicates = {}
|
|
for module in grouper.modules:
|
|
old_val = duplicates.setdefault(module.name, 0)# Insert (key, value) into the dictionary
|
|
duplicates[module.name] = old_val + 1
|
|
|
|
duplicate_names = [k for k,v in duplicates.iteritems() if v > 1]
|
|
if len(duplicate_names) > 0:
|
|
msg = "Module(s) %s"%duplicate_names
|
|
msg += " is/are all defined more than once in grouper"
|
|
msg += " %s."%grouper.name
|
|
raise SyntaxError(msg)
|
|
|
|
# Check for presence of the reference fields
|
|
def check_field_refs(self):
|
|
for grouper in self.groupers:
|
|
for module in grouper.modules:
|
|
for rule in module.rules:
|
|
# Checks if the rule names of modules match those that were established
|
|
# from the flow records (passed as a second argument here). Defined in
|
|
# validator_common
|
|
check_rule_fields(rule[0], self.fields_types.keys())
|
|
|
|
# This section checks the correctness of the field names passed to the aggregator
|
|
# section of the grouper stage. field_types are defined in init and are also
|
|
# obtained in the validaton_common module.
|
|
for aggr in grouper.aggr:
|
|
for arg in aggr.args:
|
|
if type(arg) == Field:
|
|
mod, _, field = arg.name.partition('.')
|
|
if field != '':
|
|
if field not in self.fields_types.keys():
|
|
msg = 'There is no such field %s, '%arg.name
|
|
msg += 'referenced at line %s'%aggr.line
|
|
raise SyntaxError(msg)
|
|
else:
|
|
if mod not in self.fields_types.keys():
|
|
msg = 'There is no such field %s, '%arg.name
|
|
msg += 'referenced at line %s'%aggr.line
|
|
raise SyntaxError(msg)
|
|
|
|
#
|
|
def create_grouper_rules_impl(self, grouper):
|
|
modules_list = []
|
|
for module in grouper.modules:
|
|
rule_impl_list = self.convert_module_rules(module)
|
|
aggr_ops_list = self.convert_module_aggr_ops(grouper, module)
|
|
module_impl = GrouperModuleImpl(module.name, rule_impl_list,
|
|
aggr_ops_list)
|
|
modules_list.append(module_impl)
|
|
|
|
grouper_aggr_ops = []
|
|
for aggr in grouper.aggr:
|
|
init_args = self.create_aggr_impl_init_args(aggr)
|
|
# print init_args
|
|
spl = str.split(init_args[1], '.')
|
|
if len(spl) > 1:
|
|
msg = 'There is no such grouper module %s, '%spl
|
|
msg += 'referenced on line %s'%aggr.line
|
|
raise SyntaxError(msg)
|
|
impl = AggrOpImpl(*init_args)
|
|
grouper_aggr_ops.append(impl)
|
|
|
|
groupers = [GrouperImpl(grouper.name, modules_list, grouper_aggr_ops,
|
|
self.br_name_to_br[br_name], br_name)
|
|
for br_name in grouper.branches]
|
|
|
|
for grouper in groupers:
|
|
self.br_name_to_grouper[grouper.branch_name] = grouper
|
|
# print self.br_name_to_grouper
|
|
return groupers
|
|
|
|
|
|
def convert_module_aggr_ops(self, grouper, module):
|
|
aggr_ops_list = []
|
|
del_list = []
|
|
for aggr in grouper.aggr:
|
|
op, field, gr_field, field_type = self.create_aggr_impl_init_args(
|
|
aggr)
|
|
mod_name, _, f = str.partition(field, '.')
|
|
if f != '':
|
|
if module.name == mod_name:
|
|
impl = AggrOpImpl(op, f, gr_field, field_type)
|
|
aggr_ops_list.append(impl)
|
|
del_list.append(aggr)
|
|
|
|
for a in del_list:
|
|
grouper.aggr.remove(a)
|
|
|
|
return aggr_ops_list
|
|
|
|
def create_aggr_impl_init_args(self, aggr):
|
|
field = aggr.args[0].name
|
|
if '.' in field:
|
|
_, _, non_qid_field = field.partition('.')
|
|
else:
|
|
non_qid_field = field
|
|
gr_field = aggr.args[1]
|
|
if aggr.op == 'count':
|
|
field_type = UIntCol(self.fields_types['rec_id'].itemsize)
|
|
elif aggr.op == 'union':
|
|
field_type = UIntAtom(self.fields_types[non_qid_field].itemsize)
|
|
else:
|
|
field_type = UIntCol(self.fields_types[non_qid_field].itemsize)
|
|
|
|
op = find_op(aggr, 'aggr_operators')
|
|
|
|
return op, field, gr_field, field_type
|
|
|
|
def convert_module_rules(self, module):
|
|
rule_impl_list = []
|
|
for rules in module.rules:
|
|
for rule in rules:
|
|
op = find_op(rule)
|
|
args = [arg.name if type(arg) is Field else arg
|
|
for arg in rule.args]
|
|
rule_impl_list.append(GrouperRuleImpl(op, *args))
|
|
return rule_impl_list
|
|
|
|
def create_impl(self):
|
|
self.validate()
|
|
groupers_impls = []
|
|
for grouper in self.groupers:
|
|
groupers_impls.extend(self.create_grouper_rules_impl(grouper))
|
|
|
|
# print self.br_name_to_grouper
|
|
for br_name in self.br_name_to_br.keys():
|
|
if br_name not in self.br_name_to_grouper.keys():
|
|
msg = 'There is no grouper for branch %s.'%br_name
|
|
raise SyntaxError(msg)
|
|
|
|
return groupers_impls
|