View of /PEAK-Rules/peak/rules/indexing.py
Parent Directory
| Revision Log
Revision:
2426 -
(
download)
(
as text)
Wed Nov 21 19:59:35 2007 UTC (16 years, 5 months ago) by
pje
File size: 13392 byte(s)
Switch to using peak.util.extremes
from __future__ import division
import sys
from peak.util.addons import AddOn
from peak.rules.core import abstract, Dispatching, Engine, when
from peak.rules.criteria import *
from peak.rules.criteria import sorted, set, frozenset
from peak.util.extremes import Min, Max, Extreme
class Ordering(AddOn):
"""Track inter-expression ordering constraints"""
def __init__(self, owner, expr):
self.constraints = set()
def requires(self, guards):
c = frozenset(guards)
cs = self.constraints
if c in cs:
return
for oldc in list(cs):
if c >= oldc:
return # already a less-restrictive condition
elif oldc >= c:
cs.remove(oldc)
cs.add(c)
def can_precede(self, exprs):
if not self.constraints:
return True
for c in self.constraints:
for e in c:
if e in exprs:
break
else:
return True
else:
return False
def always_testable(expr):
"""Is `expr` safe to evaluate in any order?"""
return False
def define_ordering(ob, seq):
items = []
for key in seq:
Ordering(ob, key).requires(items)
items.append(key)
def to_bits(ints):
"""Return a bitset encoding the numbers contained in sequence `ints`"""
b = 0
for i in ints:
b |= 1<<i
return b
if sys.version<"2.4":
def to_bits(ints):
"""Return a bitset encoding the numbers contained in sequence `ints`"""
b = 0
for i in ints:
if i>31:
i = long(i)
b |= 1<<i # under 2.3, this breaks when i>31 unless it's a long
return b
def from_bits(n):
"""Yield the (ascending) numbers contained in bitset n"""
b = 0
while n:
while not n & 15:
n >>= 4
b += 4
if n & 1:
yield b
n >>= 1
b += 1
class TreeBuilder(object):
"""Template methods for the Chambers&Chen dispatch tree algorithm"""
def build_root(self, cases, exprs):
self.memo = {}
return self.build(cases, exprs)
def build(self, cases, exprs):
key = (cases, exprs)
if key in self.memo:
return self.memo[key]
if not exprs:
node = self.build_leaf(cases)
else:
best, rest = self.best_expr(cases, exprs)
assert len(rest) < len(exprs)
if best is None:
# No best expression found, recurse
node = self.build(cases, rest)
else:
node = self.build_node(best, cases, rest)
self.memo[key] = node
return node
def build_node(self, expr, cases, remaining_exprs):
raise NotImplementedError
def build_leaf(self, cases):
raise NotImplementedError
def selectivity(self, expression, cases):
"""Return (seedcount,totalcases) selectivity statistics"""
raise NotImplementedError
def cost(self, expr, remaining_exprs):
return 1
def best_expr(self, cases, exprs):
best_expr = None
best_spread = None
to_do = list(exprs)
remaining = dict.fromkeys(exprs)
active_cases = len(cases)
skipped = []
while to_do:
expr = to_do.pop()
if not Ordering(self, expr).can_precede(remaining):
# Skip criteria that have unchecked prerequisites
skipped.append(expr)
continue
branches, total_cases = self.selectivity(expr, cases)
if total_cases == active_cases * branches:
# None of the branches for this expression eliminate any
# cases, so this expression isn't needed for dispatching
del remaining[expr]
# recheck skipped exprs that might be legal now
to_do.extend(skipped)
skipped = []
continue
spread = total_cases / branches
if best_expr is None or spread < best_spread:
best_expr, best_spread = expr, spread
best_cost = self.cost(expr, remaining)
elif spread==best_spread:
cost = self.cost(expr, remaining)
if cost < best_cost:
best_expr, best_cost = expr, cost
if best_expr is not None:
del remaining[best_expr]
return best_expr, frozenset(remaining)
class BitmapIndex(AddOn):
"""Index that computes selectivity and handles basic caching w/bitmaps"""
known_cases = 0
def __init__(self, engine, expr):
self.extra = {}
self.all_seeds = {} # seed -> inc_cri, exc_cri
self.criteria_bits = {} # cri -> case bits
self.case_seeds = [] # case -> seed set
self.criteria_seeds = {} # cri -> seeds, inc_seeds, exc_seeds
self.expr = expr
self.engine = engine
def add_case(self, case_id, criterion):
self._extend_cases(case_id)
if criterion in self.criteria_seeds:
seeds, inc, exc = self.criteria_seeds[criterion]
else:
self.criteria_bits[criterion] = 0
seeds, inc, exc = self.criteria_seeds[criterion] \
= seeds_for(self, criterion)
self.case_seeds[case_id] = seeds
bit = to_bits([case_id])
if seeds is not self.all_seeds: self.known_cases |= bit
self.criteria_bits[criterion] |= bit
all_seeds = self.all_seeds
for i, seeds in ((0,inc), (1,exc)):
for seed in seeds:
if seed not in all_seeds:
all_seeds[seed] = set(), set()
all_seeds[seed][i].add(criterion)
def _extend_cases(self, case_id):
if case_id >= len(self.case_seeds):
self.case_seeds.extend(
[self.all_seeds]*(case_id+1-len(self.case_seeds))
)
def selectivity(self, cases):
if cases and cases[-1] >= len(self.case_seeds):
self._extend_cases(cases[-1])
cases = map(self.case_seeds.__getitem__, cases)
return (len(self.all_seeds), sum(map(len, cases)))
def seed_bits(self, cases):
bits = self.criteria_bits
return dict([
(seed,
(sum([bits[i] for i in inc]) & cases,
sum([bits[e] for e in exc]) & cases))
for seed, (inc, exc) in self.all_seeds.items()
])
def expanded_sets(self):
return [
(seed, [list(from_bits(inc)), list(from_bits(exc))])
for seed, (inc, exc) in self.seed_bits(self.known_cases).items()
]
abstract()
def seeds_for(index, criterion):
"""Determine the seeds needed to index `criterion`
Methods must return a 3-tuple (seeds, inclusions, exclusions) of seed sets
or sequences. See Indexing.txt for details.
"""
when(seeds_for, (BitmapIndex, bool))(
# True->all seeds, False->no seeds
lambda index, criterion: ([(), index.all_seeds][criterion], [], [])
)
class _FixedSubset(object):
__slots__ = 'parent'
def __init__(self, parent):
self.parent = parent
def __len__(self):
return len(self.parent)-1
when(seeds_for, (BitmapIndex, IsObject))
def seeds_for_pointer(index, criterion):
idref = id(criterion.ref)
if criterion.match:
return [idref], [idref], [None]
return _FixedSubset(index.all_seeds), [None], [idref]
class _RangeSubset(object):
__slots__ = 'offsets', 'seeds', 'lo', 'hi'
def __init__(self, index, lo, hi):
self.offsets = index.extra
self.seeds = index.all_seeds
self.lo = lo
self.hi = hi
def __len__(self):
off = self.offsets
if not off:
for n,k in enumerate(sorted(self.seeds)):
off[k] = n
return off[self.hi] - off[self.lo]
when(seeds_for, (BitmapIndex, Range))
def seeds_for_range(index, criterion):
lo, hi = criterion.lo, criterion.hi
if lo not in index.all_seeds or hi not in index.all_seeds:
index.extra.clear() # ensure offsets are rebuilt on next selectivity()
return _RangeSubset(index, lo, hi), [lo], [hi]
when(seeds_for, (BitmapIndex, Value))
def seeds_for_value(index, criterion):
v = (criterion.value, 0)
if v not in index.all_seeds:
index.extra.clear() # ensure offsets are rebuilt on next selectivity()
if criterion.match:
return [v], [v], []
else:
return _FixedSubset(index.all_seeds), [(Min, -1)], [v]
def split_ranges(ind, cases):
"""Return (exact, ranges) where `exact` is a dict[value]->bits and `ranges`
is a sorted list of ``((lo,hi),bits)`` tuples expressing non-inclusive
ranges. `ind` must be a bitmap index filled with ``Range`` and ``Value``
criteria, and `cases` must be a bitmap of the cases to be included in the
result.
"""
ranges = []
exact = {}
current = cases ^ (ind.known_cases & cases)
bitmap = ind.seed_bits(cases)
for (val,d), (inc, exc) in bitmap.iteritems():
if d:
break # something other than == was used; use full algorithm
exact[val] = current | inc
else:
return exact, ranges # all == criteria, no ranges or !=
low = Min
for (val,d), (inc, exc) in sorted(bitmap.iteritems()):
if val != low:
if ranges and ranges[-1][-1]==current:
low = ranges.pop()[0][0]
ranges.append(((low, val), current))
low = val
new = current | inc
new ^= (new & exc)
if d==0 or d<0 and not isinstance(val, Extreme):
exact[val] = new
if d:
current = new
if low != Max:
if ranges and ranges[-1][-1]==current:
low = ranges.pop()[0][0]
ranges.append(((low, Max), current))
return exact, ranges
when(seeds_for, (BitmapIndex, Class))
def seeds_for_class(index, criterion):
cls = criterion.cls
if isinstance(cls, type):
mro = cls.__mro__
else:
class tmp(cls, object): pass
mro = tmp.__mro__[1:]
parents = []
csmap = index.criteria_seeds
all_seeds = index.all_seeds
unions = index.extra
for base in mro:
parents.append(base)
c = Class(base)
if c not in csmap:
csmap[c] = set(
# ancestors aren't always parents of things past them in mro
[p for p in parents if issubclass(p, base)]
), set([base]), []
else:
csmap[c][0].add(cls)
if base not in all_seeds:
all_seeds[base] = set(), set()
if base in unions:
for s in unions[base]: s.add(cls)
if not criterion.match:
return _DiffSet(
index.all_seeds, csmap[Class(cls)][0]
), [object], [cls]
return csmap[criterion]
when(seeds_for, (BitmapIndex, Classes))
def seeds_for_classes(index, criterion):
csmap = index.criteria_seeds
excluded, required = sets = [], []
for c in criterion:
if c not in csmap:
csmap.setdefault(c, seeds_for(index, c))
sets[c.match].append(c)
ex_classes = [c.cls for c in excluded]
cex = Classes(excluded)
if cex not in csmap:
ex_union = reduce(
set.union, [csmap[c][0].subtract for c in excluded], set()
)
for c in ex_classes:
index.extra.setdefault(c, []).append(ex_union)
csmap[cex] = _DiffSet(index.all_seeds, ex_union), [object], ex_classes
if required:
required = [csmap[c][0] for c in required] or index.all
required = _MultiSet(index, criterion, required, csmap[cex][0].subtract)
return required, [], ex_classes
return csmap[cex]
class _MultiSet(object):
def __init__(self, index, classes, required, excluded):
self.all_seeds = index.all_seeds
self.classes = classes
self.lastlen = self.cachelen = 0
self.seen = set()
self.required = required
self.excluded = excluded
def __len__(self):
if len(self.all_seeds)==self.lastlen:
return self.cachelen
s = reduce(set.intersection, self.required) - self.excluded
l = self.cachelen = len(s)
self.lastlen = len(self.all_seeds)
if l > len(self.seen):
for cls in s - self.seen:
for c in cls.__bases__:
if c in s:
break # not a root if any of its bases are in the set
else:
# Flag the new root as an inclusion point for our criterion
self.all_seeds[cls][0].add(self.classes)
self.seen = s
return l
class _DiffSet(object):
def __init__(self, base, subtract):
self.base = base
self.subtract = subtract
self.baselen = -1
self.cache = None
def __len__(self):
if len(self.base)>self.baselen:
self.cache = set(self.base) - self.subtract
self.baselen = len(self.base)
return len(self.cache)