From 651e2be4480b19ac486cb8a4dd2fb08b448ebc67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Mo=C5=BEina?= Date: Tue, 17 Jan 2017 20:12:29 +0100 Subject: Added scripts for learning rules. --- abml/rules_prolog.py | 173 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 173 insertions(+) create mode 100644 abml/rules_prolog.py (limited to 'abml/rules_prolog.py') diff --git a/abml/rules_prolog.py b/abml/rules_prolog.py new file mode 100644 index 0000000..c5c4134 --- /dev/null +++ b/abml/rules_prolog.py @@ -0,0 +1,173 @@ +import numpy as np +import pickle +import itertools +from Orange.classification.rules import _RuleClassifier, GuardianValidator +import orangecontrib.abml.abrules as rules +from Orange.classification.rules import Rule + +class TrueCondValidator: + """ + Checks whether all conditions have positive values + """ + def __init__(self, max_rule_length, min_covered_examples): + self.max_rule_length = max_rule_length + self.min_covered_examples = min_covered_examples + self.guardian = GuardianValidator(self.max_rule_length, self.min_covered_examples) + + def validate_rule(self, rule): + for att, op, val in rule.selectors: + if op == "!=" and rule.domain[att].values[int(val)] == "T" or \ + op == "==" and rule.domain[att].values[int(val)] == "F": + return False + return self.guardian.validate_rule(rule) + +class PureAccuracyValidator: + def __init__(self, negative, threshold): + self.negative = negative + self.threshold = threshold + + def validate_rule(self, rule): + if (rule.target_class == self.negative and + (rule.curr_class_dist[rule.target_class] != rule.curr_class_dist.sum() and + rule.quality < self.threshold)): + return False + return True + +class RelativePureValidator: + def __init__(self, target, threshold, covered, Y): + self.target = target + self.threshold = threshold + self.covered = covered + self.Y = Y + + def validate_rule(self, rule): + if rule.target_class == self.target: + rel_covered = rule.covered_examples & ~self.covered + rel_Y = self.Y[rel_covered] + rf = rel_Y[rel_Y == rule.target_class].sum() + rf /= rel_covered.sum() + if rf < self.threshold: + return False + return True + +class NegativeFirstClassifier(_RuleClassifier): + """ + Classificator from rules that first checks if a negative rule covers + an example. If it does, it will automatically classify example as negative. + If it doesnt, then it checks for positive rules and assigns this example + best rule's class accuracy. """ + def __init__(self, domain, rule_list): + self.domain = domain + self.rule_list = rule_list + self.num_classes = len(self.domain.class_var.values) + self.negative = self.domain.class_var.values.index("F") + + def coverage(self, data): + self.predict(data.X) + coverages = np.zeros((self.X.shape[0], len(self.rule_list)), dtype=bool) + for ri, r in enumerate(self.rule_list): + coverages[:, ri] = r.evaluate_data(self.X) + return coverages + + def predict(self, X): + self.X = X + probabilities = np.zeros((X.shape[0], self.num_classes), dtype=float) + # negative rules first + neg_rules = [r for r in self.rule_list if r.target_class == self.negative] + solved = np.zeros(X.shape[0], dtype=bool) + for rule in neg_rules: + covered = rule.evaluate_data(X) + solved |= covered + probabilities[solved, self.negative] = 1.0 + # now positive class + pos_rules = [r for r in self.rule_list if r.target_class != self.negative] + for rule in pos_rules: + covered = rule.evaluate_data(X) + to_change = covered & ~solved + probabilities[to_change, rule.target_class] = rule.quality + probabilities[to_change, np.arange(self.num_classes) != rule.target_class] = (1-rule.quality)/(self.num_classes-1) + solved |= covered + + probabilities[~solved] = np.ones(self.num_classes) / self.num_classes + return probabilities + +class Rules4Prolog: + def __init__(self, name, threshold): + self.threshold = threshold + self.learner = rules.ABRuleLearner(width=50, parent_alpha=0.05) + self.learner.rule_finder.general_validator = TrueCondValidator(self.learner.rule_finder.general_validator.max_rule_length, + self.learner.rule_finder.general_validator.min_covered_examples) + self.learner.rule_validator = PureAccuracyValidator(0, self.threshold) + self.learner.classifier = NegativeFirstClassifier + self.learner.evds = pickle.load(open("data/{}/evds.pickle".format(name), "rb")) + + def __call__(self, data): + # first learn rules for negative class (quality should be higher than + # threshold or distribution should be pure) + self.learner.target_class = "F" + neg_rules = self.learner(data).rule_list + + # then create another data set and remove all examples that negative + # rules cover + coverage = np.zeros(len(data), dtype=bool) + for r in neg_rules: + coverage |= r.covered_examples + + # learn positive rules, however accept them only if relative frequency + # of rules on the temporary data set is higher than threshold OR there + # are no negative examples + X, Y, W = data.X, data.Y, data.W if data.W else None + Y = Y.astype(dtype=int) + self.learner.target_class = "T" + old_validator = self.learner.rule_validator + self.learner.rule_validator = RelativePureValidator(1, self.threshold, + coverage, Y) + cls = self.learner(data) + pos_rules = cls.rule_list + + # create sub rules that satisfy rule_validator's conditions + """all_rules = [] + all_dists = set() + for r in pos_rules: + covered = r.covered_examples.tostring() + tc = r.target_class + if (covered, tc) not in all_dists: + all_dists.add((covered, tc)) + all_rules.append(r) + # add sub rules to all_rules + s = r.selectors + ps = itertools.chain.from_iterable(itertools.combinations(s, i) for i in range(len(s))) + for p in ps: + if not p: + continue + newr = Rule(selectors = p, domain=r.domain, + initial_class_dist=r.initial_class_dist, + prior_class_dist=r.prior_class_dist, + quality_evaluator=r.quality_evaluator, + complexity_evaluator=r.complexity_evaluator) + newr.filter_and_store(X, Y, W, tc) + newr.do_evaluate() + covered = newr.covered_examples.tostring() + if (covered, tc) not in all_dists and \ + self.learner.rule_validator.validate_rule(newr): # such rule is not in the set yet + all_dists.add((covered, tc)) + all_rules.append(newr) + newr.create_model()""" + + # restore old validator to self.learner + self.learner.rule_validator = old_validator + return self.learner.classifier(domain=cls.domain, rule_list=neg_rules+pos_rules) #all_rules) + + + + +def create_learner(name, evds=True): + rule_learner = rules.ABRuleLearner(width=50, parent_alpha=0.05) + rule_learner.rule_finder.general_validator = TrueCondValidator(rule_learner.rule_finder.general_validator.max_rule_length, + rule_learner.rule_finder.general_validator.min_covered_examples) + rule_learner.rule_validator = PureAccuracyValidator(0) + rule_learner.classifier = NegativeFirstClassifier + if evds: + rule_learner.evds = pickle.load(open("data/{}/evds.pickle".format(name), "rb")) + return rule_learner + -- cgit v1.2.1