summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Možina <martin.mozina@fri.uni-lj.si>2017-01-17 20:12:29 +0100
committerMartin Možina <martin.mozina@fri.uni-lj.si>2017-01-17 20:12:29 +0100
commit651e2be4480b19ac486cb8a4dd2fb08b448ebc67 (patch)
tree67574dbca70ee5f56d8c21da52845254daa753d2
parent2c1661183cb95f4558c62a9a5e89bd74763eae8d (diff)
Added scripts for learning rules.
-rw-r--r--abml/evaluate.py75
-rw-r--r--abml/learn_dist.py15
-rw-r--r--abml/rules_prolog.py173
3 files changed, 263 insertions, 0 deletions
diff --git a/abml/evaluate.py b/abml/evaluate.py
new file mode 100644
index 0000000..2e318fb
--- /dev/null
+++ b/abml/evaluate.py
@@ -0,0 +1,75 @@
+import pickle
+import argparse
+import Orange
+from Orange.evaluation import TestOnTestData, CA, AUC, LogLoss
+import abml.rules_prolog as rp
+
+import orangecontrib.evcrules.logistic as logistic
+import orangecontrib.abml.abrules as rules
+import orangecontrib.abml.argumentation as arg
+
+parser = argparse.ArgumentParser(description='Learn and test rules for prolog programs.')
+parser.add_argument('Name', type=str, help='Predicate name.')
+args = parser.parse_args()
+name = args.Name
+
+# load data
+data = Orange.data.Table('data/{}/programs-train'.format(name))
+
+# create learner
+rule_learner = rp.Rules4Prolog(name, 0.9)
+
+
+
+# learn a classifier
+classifier = rule_learner(data)
+
+# save model
+fmodel = open("data/{}/model.txt".format(name), "wt")
+for r in classifier.rule_list:
+ print(r, r.curr_class_dist, r.quality)
+ fmodel.write("{} dist={} quality={}\n".format(str(r), str(r.curr_class_dist), r.quality))
+
+# accuracy of model
+testdata = Orange.data.Table('data/{}/programs-test'.format(name))
+predictions = classifier(testdata)
+acc = 0
+for i, p in enumerate(predictions):
+ acc += p == testdata.Y[i]
+acc /= len(testdata)
+print("Accuracy on test data: ", acc)
+predictions = classifier(data)
+acc = 0
+for i, p in enumerate(predictions):
+ acc += p == data.Y[i]
+acc /= len(data)
+print("Accuracy on train data: ", acc)
+
+# test model + other methodsstrong_piece_attack defends_around_king
+bayes = Orange.classification.NaiveBayesLearner()
+logistic = Orange.classification.LogisticRegressionLearner()
+tree = Orange.classification.TreeLearner()
+random_forest = Orange.classification.RandomForestLearner()
+svm = Orange.classification.SVMLearner()
+cn2 = Orange.classification.rules.CN2UnorderedLearner()
+learners = [rule_learner, logistic, bayes, cn2, tree, random_forest, svm]
+res = TestOnTestData(data, testdata, learners)
+ca = CA(res)
+auc = AUC(res)
+ll = LogLoss(res)
+
+names = ['logrules', 'logistic', 'naive-bayes', 'cn2', 'tree', 'random-forest', 'svm']
+scores = ""
+scores += "CA\tAUC\tLogLoss\tMethod\n"
+for ni, n in enumerate(names):
+ scores += "{}\t{}\t{}\t{}\n".format(ca[ni], auc[ni], ll[ni], n)
+print(scores)
+fscores = open("data/{}/scores.txt".format(name), "wt")
+fscores.write(scores)
+
+all_rules = classifier.rule_list
+all_rules.sort(key = lambda r: r.quality, reverse=True)
+rfile = open("data/{}/rules.txt".format(name), "wt")
+for r in all_rules:
+ print(r, r.curr_class_dist, r.quality)
+ rfile.write("{} {} {}\n".format(r, r.curr_class_dist, r.quality))
diff --git a/abml/learn_dist.py b/abml/learn_dist.py
new file mode 100644
index 0000000..58e4968
--- /dev/null
+++ b/abml/learn_dist.py
@@ -0,0 +1,15 @@
+import pickle
+import argparse
+from Orange.data import Table
+import abml.rules_prolog as rp
+
+parser = argparse.ArgumentParser(description='Learn and test rules for prolog programs.')
+parser.add_argument('Name', type=str, help='Predicate name.')
+args = parser.parse_args()
+name = args.Name
+
+data = Table('data/{}/programs-train'.format(name))
+
+rule_learner = rp.create_learner(name, evds=False)
+rule_learner.calculate_evds(data)
+pickle.dump(rule_learner.evds, open("data/{}/evds.pickle".format(name), "wb"))
diff --git a/abml/rules_prolog.py b/abml/rules_prolog.py
new file mode 100644
index 0000000..c5c4134
--- /dev/null
+++ b/abml/rules_prolog.py
@@ -0,0 +1,173 @@
+import numpy as np
+import pickle
+import itertools
+from Orange.classification.rules import _RuleClassifier, GuardianValidator
+import orangecontrib.abml.abrules as rules
+from Orange.classification.rules import Rule
+
+class TrueCondValidator:
+ """
+ Checks whether all conditions have positive values
+ """
+ def __init__(self, max_rule_length, min_covered_examples):
+ self.max_rule_length = max_rule_length
+ self.min_covered_examples = min_covered_examples
+ self.guardian = GuardianValidator(self.max_rule_length, self.min_covered_examples)
+
+ def validate_rule(self, rule):
+ for att, op, val in rule.selectors:
+ if op == "!=" and rule.domain[att].values[int(val)] == "T" or \
+ op == "==" and rule.domain[att].values[int(val)] == "F":
+ return False
+ return self.guardian.validate_rule(rule)
+
+class PureAccuracyValidator:
+ def __init__(self, negative, threshold):
+ self.negative = negative
+ self.threshold = threshold
+
+ def validate_rule(self, rule):
+ if (rule.target_class == self.negative and
+ (rule.curr_class_dist[rule.target_class] != rule.curr_class_dist.sum() and
+ rule.quality < self.threshold)):
+ return False
+ return True
+
+class RelativePureValidator:
+ def __init__(self, target, threshold, covered, Y):
+ self.target = target
+ self.threshold = threshold
+ self.covered = covered
+ self.Y = Y
+
+ def validate_rule(self, rule):
+ if rule.target_class == self.target:
+ rel_covered = rule.covered_examples & ~self.covered
+ rel_Y = self.Y[rel_covered]
+ rf = rel_Y[rel_Y == rule.target_class].sum()
+ rf /= rel_covered.sum()
+ if rf < self.threshold:
+ return False
+ return True
+
+class NegativeFirstClassifier(_RuleClassifier):
+ """
+ Classificator from rules that first checks if a negative rule covers
+ an example. If it does, it will automatically classify example as negative.
+ If it doesnt, then it checks for positive rules and assigns this example
+ best rule's class accuracy. """
+ def __init__(self, domain, rule_list):
+ self.domain = domain
+ self.rule_list = rule_list
+ self.num_classes = len(self.domain.class_var.values)
+ self.negative = self.domain.class_var.values.index("F")
+
+ def coverage(self, data):
+ self.predict(data.X)
+ coverages = np.zeros((self.X.shape[0], len(self.rule_list)), dtype=bool)
+ for ri, r in enumerate(self.rule_list):
+ coverages[:, ri] = r.evaluate_data(self.X)
+ return coverages
+
+ def predict(self, X):
+ self.X = X
+ probabilities = np.zeros((X.shape[0], self.num_classes), dtype=float)
+ # negative rules first
+ neg_rules = [r for r in self.rule_list if r.target_class == self.negative]
+ solved = np.zeros(X.shape[0], dtype=bool)
+ for rule in neg_rules:
+ covered = rule.evaluate_data(X)
+ solved |= covered
+ probabilities[solved, self.negative] = 1.0
+ # now positive class
+ pos_rules = [r for r in self.rule_list if r.target_class != self.negative]
+ for rule in pos_rules:
+ covered = rule.evaluate_data(X)
+ to_change = covered & ~solved
+ probabilities[to_change, rule.target_class] = rule.quality
+ probabilities[to_change, np.arange(self.num_classes) != rule.target_class] = (1-rule.quality)/(self.num_classes-1)
+ solved |= covered
+
+ probabilities[~solved] = np.ones(self.num_classes) / self.num_classes
+ return probabilities
+
+class Rules4Prolog:
+ def __init__(self, name, threshold):
+ self.threshold = threshold
+ self.learner = rules.ABRuleLearner(width=50, parent_alpha=0.05)
+ self.learner.rule_finder.general_validator = TrueCondValidator(self.learner.rule_finder.general_validator.max_rule_length,
+ self.learner.rule_finder.general_validator.min_covered_examples)
+ self.learner.rule_validator = PureAccuracyValidator(0, self.threshold)
+ self.learner.classifier = NegativeFirstClassifier
+ self.learner.evds = pickle.load(open("data/{}/evds.pickle".format(name), "rb"))
+
+ def __call__(self, data):
+ # first learn rules for negative class (quality should be higher than
+ # threshold or distribution should be pure)
+ self.learner.target_class = "F"
+ neg_rules = self.learner(data).rule_list
+
+ # then create another data set and remove all examples that negative
+ # rules cover
+ coverage = np.zeros(len(data), dtype=bool)
+ for r in neg_rules:
+ coverage |= r.covered_examples
+
+ # learn positive rules, however accept them only if relative frequency
+ # of rules on the temporary data set is higher than threshold OR there
+ # are no negative examples
+ X, Y, W = data.X, data.Y, data.W if data.W else None
+ Y = Y.astype(dtype=int)
+ self.learner.target_class = "T"
+ old_validator = self.learner.rule_validator
+ self.learner.rule_validator = RelativePureValidator(1, self.threshold,
+ coverage, Y)
+ cls = self.learner(data)
+ pos_rules = cls.rule_list
+
+ # create sub rules that satisfy rule_validator's conditions
+ """all_rules = []
+ all_dists = set()
+ for r in pos_rules:
+ covered = r.covered_examples.tostring()
+ tc = r.target_class
+ if (covered, tc) not in all_dists:
+ all_dists.add((covered, tc))
+ all_rules.append(r)
+ # add sub rules to all_rules
+ s = r.selectors
+ ps = itertools.chain.from_iterable(itertools.combinations(s, i) for i in range(len(s)))
+ for p in ps:
+ if not p:
+ continue
+ newr = Rule(selectors = p, domain=r.domain,
+ initial_class_dist=r.initial_class_dist,
+ prior_class_dist=r.prior_class_dist,
+ quality_evaluator=r.quality_evaluator,
+ complexity_evaluator=r.complexity_evaluator)
+ newr.filter_and_store(X, Y, W, tc)
+ newr.do_evaluate()
+ covered = newr.covered_examples.tostring()
+ if (covered, tc) not in all_dists and \
+ self.learner.rule_validator.validate_rule(newr): # such rule is not in the set yet
+ all_dists.add((covered, tc))
+ all_rules.append(newr)
+ newr.create_model()"""
+
+ # restore old validator to self.learner
+ self.learner.rule_validator = old_validator
+ return self.learner.classifier(domain=cls.domain, rule_list=neg_rules+pos_rules) #all_rules)
+
+
+
+
+def create_learner(name, evds=True):
+ rule_learner = rules.ABRuleLearner(width=50, parent_alpha=0.05)
+ rule_learner.rule_finder.general_validator = TrueCondValidator(rule_learner.rule_finder.general_validator.max_rule_length,
+ rule_learner.rule_finder.general_validator.min_covered_examples)
+ rule_learner.rule_validator = PureAccuracyValidator(0)
+ rule_learner.classifier = NegativeFirstClassifier
+ if evds:
+ rule_learner.evds = pickle.load(open("data/{}/evds.pickle".format(name), "rb"))
+ return rule_learner
+