def compute_entropy(counter, all): entropy = 0. all = float(all) for cl in counter.values(): p0 = float(cl) / all if p0 != 0: entropy += - p0 * math.log(p0,2.) return entropy def x2_statistics(counter_l, counter_r ,all_l, all_r): x2 = 0 ges = all_l + all_r for c in counter_l: o_l = counter_l[c] #observed frequency left of split o_r = counter_r[c] #observed freqeuncy right of split ges_c = o_l + o_r #number of samples with class c e_l = all_l/ges *ges_c #expected frequency e_r = all_r/ges *ges_c x2 += ((o_l - e_l)**2 / e_l + (o_r - e_r)**2 / e_r) return x2