package Data::Hierarchy; $VERSION = '0.34'; use strict; use Storable qw(dclone); # XXX consider using Moose =head1 NAME Data::Hierarchy - Handle data in a hierarchical structure =head1 SYNOPSIS my $tree = Data::Hierarchy->new(); $tree->store ('/', {access => 'all'}); $tree->store ('/private', {access => 'auth', '.note' => 'this is private}); $info = $tree->get ('/private/somewhere/deep'); # return actual data points in list context ($info, @fromwhere) = $tree->get ('/private/somewhere/deep'); my @items = $tree->find ('/', {access => qr/.*/}); # override all children $tree->store ('/', {'.note' => undef}, {override_sticky_descendents => 1}); =head1 DESCRIPTION L provides a simple interface for manipulating inheritable data attached to a hierarchical environment (like a filesystem). One use of L is to allow an application to annotate paths in a real filesystem in a single compact data structure. However, the hierarchy does not actually need to correspond to an actual filesystem. Paths in a hierarchy are referred to in a Unix-like syntax; C<"/"> is the root "directory". (You can specify a different separator character than the slash when you construct a Data::Hierarchy object.) With the exception of the root path, paths should never contain trailing slashes. You can associate properties, which are arbitrary name/value pairs, with any path. (Properties cannot contain the undefined value.) By default, properties are inherited by child paths: thus, if you store some data at C: $tree->store('/some/path', {color => 'red'}); you can fetch it again at a C: print $tree->get('/some/path/below/that')->{'color'}; # prints red On the other hand, properties whose names begin with dots are uninherited, or "sticky": $tree->store('/some/path', {'.color' => 'blue'}); print $tree->get('/some/path')->{'.color'}; # prints blue print $tree->get('/some/path/below/that')->{'.color'}; # undefined Note that you do not need to (and in fact, cannot) explicitly add "files" or "directories" to the hierarchy; you simply add and delete properties to paths. =cut =head1 CONSTRUCTOR Creates a new hierarchy object. Takes the following options: =over =item sep The string used as a separator between path levels. Defaults to '/'. =back =cut sub new { my $class = shift; my %args = ( sep => '/', @_); my $self = bless {}, $class; $self->{sep} = $args{sep}; $self->{hash} = {}; $self->{sticky} = {}; return $self; } =head1 METHODS =head2 Instance Methods =over =cut =item C Given a path and a hash reference of properties, stores the properties at the path. Unless the C option is given with a false value, it eliminates any non-sticky property in a descendent of C<$path> with the same name. If the C option is given with a true value, it eliminates any sticky property in a descendent of C<$path> with the same name. override it. A value of undef removes that value; note, though, that if an ancestor of C<$path> defines that property, the ancestor's value will be inherited there; that is, with: $t->store('/a', {k => 'top'}); $t->store('/a/b', {k => 'bottom'}); $t->store('/a/b', {k => undef}); print $t->get('/a/b')->{'k'}; it will print 'top'. =cut sub store { my $self = shift; $self->_store_no_cleanup(@_); $self->_remove_redundant_properties_and_undefs($_[0]); } # Internal method. # # Does everything that store does, except for the cleanup at the # end (appropriate for use in e.g. merge, which calls this a bunch of # times and then does cleanup at the end). sub _store_no_cleanup { my $self = shift; my $path = shift; my $props = shift; my $opts = shift || {}; $self->_path_safe ($path); my %args = ( override_descendents => 1, override_sticky_descendents => 0, %$opts); $self->_remove_matching_properties_recursively($path, $props, $self->{hash}) if $args{override_descendents}; $self->_remove_matching_properties_recursively($path, $props, $self->{sticky}) if $args{override_sticky_descendents}; $self->_store ($path, $props); } =item C Given a path, looks up all of the properteies (sticky and not) and returns them in a hash reference. The values are clones, unless you pass a true value for C<$dont_clone>. If called in list context, returns that hash reference followed by all of the ancestral paths of C<$path> which contain non-sticky properties (possibly including itself). =cut sub get { my ($self, $path, $dont_clone) = @_; $self->_path_safe ($path); my $value = {}; my @datapoints = $self->_ancestors($self->{hash}, $path); for (@datapoints) { my $newv = $self->{hash}{$_}; $newv = dclone $newv unless $dont_clone; $value = {%$value, %$newv}; } if (exists $self->{sticky}{$path}) { my $newv = $self->{sticky}{$path}; $newv = dclone $newv unless $dont_clone; $value = {%$value, %$newv} } return wantarray ? ($value, @datapoints) : $value; } =item C Given a path and a hash reference of name/regular expression pairs, returns a list of all paths which are descendents of C<$path> (including itself) and define B (not inherited) all of the properties in the hash with values matching the given regular expressions. (You may want to use C to merely see if it has any value defined there.) Properties can be sticky or not. =cut sub find { my ($self, $path, $prop_regexps) = @_; $self->_path_safe ($path); my @items; my @datapoints = $self->_all_descendents($path); for my $subpath (@datapoints) { my $matched = 1; for (keys %$prop_regexps) { my $lookat = (index($_, '.') == 0) ? $self->{sticky}{$subpath} : $self->{hash}{$subpath}; $matched = 0 unless exists $lookat->{$_} && $lookat->{$_} =~ m/$prop_regexps->{$_}/; last unless $matched; } push @items, $subpath if $matched; } return @items; } =item C Given a second L object and a path, copies all the properties from the other object at C<$path> or below into the corresponding paths in the object this method is invoked on. All properties from the object this is invoked on at C<$path> or below are erased first. =cut sub merge { my ($self, $other, $path) = @_; $self->_path_safe ($path); my %datapoints = map {$_ => 1} ($self->_all_descendents ($path), $other->_all_descendents ($path)); for my $datapoint (sort keys %datapoints) { my $my_props = $self->get ($datapoint, 1); my $other_props = $other->get ($datapoint); for (keys %$my_props) { $other_props->{$_} = undef unless defined $other_props->{$_}; } $self->_store_no_cleanup ($datapoint, $other_props); } $self->_remove_redundant_properties_and_undefs; } =item C Given a path which B element of the hierarchy must be contained in, returns a special Data::Hierarchy::Relative object which represents the hierarchy relative that path. The B thing you can do with a Data::Hierarchy::Relative object is call C on it, which returns a new L object at that base path. For example, if everything in the hierarchy is rooted at C and it needs to be moved to C, you can do $hierarchy = $hierarchy->to_relative('/home/super_project')->to_absolute('/home/awesome_project'); (Data::Hierarchy::Relative objects may be a more convenient serialization format than Data::Hierarchy objects, if they are tracking the state of some relocatable resource.) =cut sub to_relative { my $self = shift; my $base_path = shift; return Data::Hierarchy::Relative->new($base_path, %$self); } # Internal method. # # Dies if the given path has a trailing slash and is not the root. If it is root, # destructively changes the path given as argument to the empty string. sub _path_safe { # Have to do this explicitly on the elements of @_ in order to be destructive if ($_[1] eq $_[0]->{sep}) { $_[1] = ''; return; } my $self = shift; my $path = shift; my $location_of_last_separator = rindex($path, $self->{sep}); return if $location_of_last_separator == -1; my $potential_location_of_trailing_separator = (length $path) - (length $self->{sep}); return unless $location_of_last_separator == $potential_location_of_trailing_separator; require Carp; Carp::confess('non-root path has a trailing slash!'); } # Internal method. # # Actually does property updates (to hash or sticky, depending on name). sub _store { my ($self, $path, $new_props) = @_; my $old_props = exists $self->{hash}{$path} ? $self->{hash}{$path} : undef; my $merged_props = {%{$old_props||{}}, %$new_props}; for (keys %$merged_props) { if (index($_, '.') == 0) { defined $merged_props->{$_} ? $self->{sticky}{$path}{$_} = $merged_props->{$_} : delete $self->{sticky}{$path}{$_}; delete $merged_props->{$_}; } else { delete $merged_props->{$_} unless defined $merged_props->{$_}; } } $self->{hash}{$path} = $merged_props; } # Internal method. # # Given a hash (probably $self->{hash}, $self->{sticky}, or their union), # returns a sorted list of the paths with data that are ancestors of the given # path (including it itself). sub _ancestors { my ($self, $hash, $path) = @_; my @ancestors; push @ancestors, '' if exists $hash->{''}; # Special case the root. return @ancestors if $path eq ''; my @parts = split m{\Q$self->{sep}}, $path; # Remove empty string at the front. my $current = ''; unless (length $parts[0]) { shift @parts; $current .= $self->{sep}; } for my $part (@parts) { $current .= $part; push @ancestors, $current if exists $hash->{$current}; $current .= $self->{sep}; } # XXX: could build cached pointer for fast traversal return @ancestors; } # Internal method. # # Given a hash (probably $self->{hash}, $self->{sticky}, or their union), # returns a sorted list of the paths with data that are descendents of the given # path (including it itself). sub _descendents { my ($self, $hash, $path) = @_; # If finding for everything, don't bother grepping return sort keys %$hash unless length($path); return sort grep {index($_.$self->{sep}, $path.$self->{sep}) == 0} keys %$hash; } # Internal method. # # Returns a sorted list of all of the paths which currently have any # properties (sticky or not) that are descendents of the given path # (including it itself). # # (Note that an arg of "/f" can return entries "/f" and "/f/g" but not # "/foo".) sub _all_descendents { my ($self, $path) = @_; $self->_path_safe ($path); my $both = {%{$self->{hash}}, %{$self->{sticky} || {}}}; return $self->_descendents($both, $path); } # Internal method. # # Given a path, a hash reference of properties, and a hash reference # (presumably {hash} or {sticky}), removes all properties from the # hash at the path or its descendents with the same name as a name in # the given property hash. (The values in the property hash are # ignored.) sub _remove_matching_properties_recursively { my ($self, $path, $remove_props, $hash) = @_; my @datapoints = $self->_descendents ($hash, $path); for my $datapoint (@datapoints) { delete $hash->{$datapoint}{$_} for keys %$remove_props; delete $hash->{$datapoint} unless %{$hash->{$datapoint}}; } } # Internal method. # # Returns the parent of a path; this is a purely textual operation, and is not necessarily a datapoint. # Do not pass in the root. sub _parent { my $self = shift; my $path = shift; return if $path eq q{} or $path eq $self->{sep}; # For example, say $path is "/foo/bar/baz"; # then $last_separator is 8. my $last_separator = rindex($path, $self->{sep}); # This happens if a path is passed in without a leading # slash. This is really a bug, but old version of # SVK::Editor::Status did this, and we might as well make it not # throw unintialized value errors, since it works otherwise. At # some point in the future this should be changed to a plain # "return" or an exception. return '' if $last_separator == -1; return substr($path, 0, $last_separator); } # Internal method. # # Cleans up the hash and sticky by removing redundant properties, # undef properties, and empty property hashes. sub _remove_redundant_properties_and_undefs { my $self = shift; my $prefix = shift; # This is not necessarily the most efficient way to implement this # cleanup, but that can be fixed later. # By sorting the keys, we guarantee that we never get to a path # before we've dealt with all of its ancestors. for my $path (sort keys %{$self->{hash}}) { next if $prefix && index($prefix.$self->{sep}, $path.$self->{sep}) != 0; my $props = $self->{hash}{$path}; # First check for undefs. for my $name (keys %$props) { if (not defined $props->{$name}) { delete $props->{$name}; } } # Now check for redundancy. # The root can't be redundant. if (length $path) { my $parent = $self->_parent($path); my $parent_props = $self->get($parent, 1); for my $name (keys %$props) { # We've already dealt with undefs in $props, so we # don't need to check that for defined. if (defined $parent_props->{$name} and $props->{$name} eq $parent_props->{$name}) { delete $props->{$name}; } } } # Clean up empty property hashes. delete $self->{hash}{$path} unless %{ $self->{hash}{$path} }; } for my $path (sort keys %{$self->{sticky}}) { # We only have to remove undefs from sticky, since there is no # inheritance. my $props = $self->{sticky}{$path}; for my $name (keys %$props) { if (not defined $props->{$name}) { delete $props->{$name}; } } # Clean up empty property hashes. delete $self->{sticky}{$path} unless %{ $self->{sticky}{$path} }; } } # These are for backwards compatibility only. sub store_recursively { my $self = shift; $self->store(@_, {override_sticky_descendents => 1}); } sub store_fast { my $self = shift; $self->store(@_, {override_descendents => 0}); } sub store_override { my $self = shift; $self->store(@_, {override_descendents => 0}); } package Data::Hierarchy::Relative; sub new { my $class = shift; my $base_path = shift; my %args = @_; my $self = bless { sep => $args{sep} }, $class; my $base_length = length $base_path; for my $item (qw/hash sticky/) { my $original = $args{$item}; my $result = {}; for my $path (sort keys %$original) { unless ($path eq $base_path or index($path, $base_path . $self->{sep}) == 0) { require Carp; Carp::confess("$path is not a child of $base_path"); } my $relative_path = substr($path, $base_length); $result->{$relative_path} = $original->{$path}; } $self->{$item} = $result; } return $self; } sub to_absolute { my $self = shift; my $base_path = shift; my $tree = { sep => $self->{sep} }; for my $item (qw/hash sticky/) { my $original = $self->{$item}; my $result = {}; for my $path (keys %$original) { $result->{$base_path . $path} = $original->{$path}; } $tree->{$item} = $result; } bless $tree, 'Data::Hierarchy'; return $tree; } 1; =back =head1 AUTHORS Chia-liang Kao Eclkao@clkao.orgE David Glasser Eglasser@mit.eduE =head1 COPYRIGHT Copyright 2003-2006 by Chia-liang Kao Eclkao@clkao.orgE. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. See L =cut