From 67978a2d5a19e3d2af10a476fb9e0306dda118fc Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Wed, 21 May 2014 14:00:44 +0200 Subject: [PATCH] =?utf8?q?Modul=20List::UtilsBy=20in=20Abh=C3=A4ngigkeiten?= =?utf8?q?=20und=20als=20Fallback=20aufgenommen?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- SL/InstallationCheck.pm | 1 + doc/modules/LICENSE.List-UtilsBy | 378 ++++++++++++++++++++++ doc/modules/README.List-UtilsBy | 238 ++++++++++++++ modules/fallback/List/UtilsBy.pm | 529 +++++++++++++++++++++++++++++++ 4 files changed, 1146 insertions(+) create mode 100644 doc/modules/LICENSE.List-UtilsBy create mode 100644 doc/modules/README.List-UtilsBy create mode 100644 modules/fallback/List/UtilsBy.pm diff --git a/SL/InstallationCheck.pm b/SL/InstallationCheck.pm index 0872fb8c5..8b7c33b09 100644 --- a/SL/InstallationCheck.pm +++ b/SL/InstallationCheck.pm @@ -34,6 +34,7 @@ BEGIN { { name => "Image::Info", url => "http://search.cpan.org/~srezic/", debian => 'libimage-info-perl' }, { name => "JSON", url => "http://search.cpan.org/~makamaka", debian => 'libjson-perl' }, { name => "List::MoreUtils", version => '0.21', url => "http://search.cpan.org/~vparseval/", debian => 'liblist-moreutils-perl' }, + { name => "List::UtilsBy", url => "http://search.cpan.org/~pevans/", debian => 'liblist-utilsby-perl' }, { name => "Params::Validate", url => "http://search.cpan.org/~drolsky/", debian => 'libparams-validate-perl' }, { name => "PDF::API2", version => '2.000', url => "http://search.cpan.org/~areibens/", debian => 'libpdf-api2-perl' }, { name => "Rose::Object", url => "http://search.cpan.org/~jsiracusa/", debian => 'librose-object-perl' }, diff --git a/doc/modules/LICENSE.List-UtilsBy b/doc/modules/LICENSE.List-UtilsBy new file mode 100644 index 000000000..67ba0bfde --- /dev/null +++ b/doc/modules/LICENSE.List-UtilsBy @@ -0,0 +1,378 @@ +This software is copyright (c) 2012 by Paul Evans . + +This is free software; you can redistribute it and/or modify it under +the same terms as the Perl 5 programming language system itself. + +Terms of the Perl programming language system itself + +a) the GNU General Public License as published by the Free + Software Foundation; either version 1, or (at your option) any + later version, or +b) the "Artistic License" + +--- The GNU General Public License, Version 1, February 1989 --- + +This software is Copyright (c) 2012 by Paul Evans . + +This is free software, licensed under: + + The GNU General Public License, Version 1, February 1989 + + GNU GENERAL PUBLIC LICENSE + Version 1, February 1989 + + Copyright (C) 1989 Free Software Foundation, Inc. + 51 Franklin St, Suite 500, Boston, MA 02110-1335 USA + + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The license agreements of most software companies try to keep users +at the mercy of those companies. By contrast, our General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. The +General Public License applies to the Free Software Foundation's +software and to any other program whose authors commit to using it. +You can use it for your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Specifically, the General Public License is designed to make +sure that you have the freedom to give away or sell copies of free +software, that you receive source code or can get it if you want it, +that you can change the software or use pieces of it in new free +programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of a such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must tell them their rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any program or other work which +contains a notice placed by the copyright holder saying it may be +distributed under the terms of this General Public License. The +"Program", below, refers to any such program or work, and a "work based +on the Program" means either the Program or any work containing the +Program or a portion of it, either verbatim or with modifications. Each +licensee is addressed as "you". + + 1. You may copy and distribute verbatim copies of the Program's source +code as you receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice and +disclaimer of warranty; keep intact all the notices that refer to this +General Public License and to the absence of any warranty; and give any +other recipients of the Program a copy of this General Public License +along with the Program. You may charge a fee for the physical act of +transferring a copy. + + 2. You may modify your copy or copies of the Program or any portion of +it, and copy and distribute such modifications under the terms of Paragraph +1 above, provided that you also do the following: + + a) cause the modified files to carry prominent notices stating that + you changed the files and the date of any change; and + + b) cause the whole of any work that you distribute or publish, that + in whole or in part contains the Program or any part thereof, either + with or without modifications, to be licensed at no charge to all + third parties under the terms of this General Public License (except + that you may choose to grant warranty protection to some or all + third parties, at your option). + + c) If the modified program normally reads commands interactively when + run, you must cause it, when started running for such interactive use + in the simplest and most usual way, to print or display an + announcement including an appropriate copyright notice and a notice + that there is no warranty (or else, saying that you provide a + warranty) and that users may redistribute the program under these + conditions, and telling the user how to view a copy of this General + Public License. + + d) You may charge a fee for the physical act of transferring a + copy, and you may at your option offer warranty protection in + exchange for a fee. + +Mere aggregation of another independent work with the Program (or its +derivative) on a volume of a storage or distribution medium does not bring +the other work under the scope of these terms. + + 3. You may copy and distribute the Program (or a portion or derivative of +it, under Paragraph 2) in object code or executable form under the terms of +Paragraphs 1 and 2 above provided that you also do one of the following: + + a) accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of + Paragraphs 1 and 2 above; or, + + b) accompany it with a written offer, valid for at least three + years, to give any third party free (except for a nominal charge + for the cost of distribution) a complete machine-readable copy of the + corresponding source code, to be distributed under the terms of + Paragraphs 1 and 2 above; or, + + c) accompany it with the information you received as to where the + corresponding source code may be obtained. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form alone.) + +Source code for a work means the preferred form of the work for making +modifications to it. For an executable file, complete source code means +all the source code for all modules it contains; but, as a special +exception, it need not include source code for modules which are standard +libraries that accompany the operating system on which the executable +file runs, or for standard header files or definitions files that +accompany that operating system. + + 4. You may not copy, modify, sublicense, distribute or transfer the +Program except as expressly provided under this General Public License. +Any attempt otherwise to copy, modify, sublicense, distribute or transfer +the Program is void, and will automatically terminate your rights to use +the Program under this License. However, parties who have received +copies, or rights to use copies, from you under this General Public +License will not have their licenses terminated so long as such parties +remain in full compliance. + + 5. By copying, distributing or modifying the Program (or any work based +on the Program) you indicate your acceptance of this license to do so, +and all its terms and conditions. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the original +licensor to copy, distribute or modify the Program subject to these +terms and conditions. You may not impose any further restrictions on the +recipients' exercise of the rights granted herein. + + 7. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of the license which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +the license, you may choose any version ever published by the Free Software +Foundation. + + 8. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 9. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 10. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + Appendix: How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to humanity, the best way to achieve this is to make it +free software which everyone can redistribute and change under these +terms. + + To do so, attach the following notices to the program. It is safest to +attach them to the start of each source file to most effectively convey +the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) 19yy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19xx name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the +appropriate parts of the General Public License. Of course, the +commands you use may be called something other than `show w' and `show +c'; they could even be mouse-clicks or menu items--whatever suits your +program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + program `Gnomovision' (a program to direct compilers to make passes + at assemblers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +That's all there is to it! + + +--- The Artistic License 1.0 --- + +This software is Copyright (c) 2012 by Paul Evans . + +This is free software, licensed under: + + The Artistic License 1.0 + +The Artistic License + +Preamble + +The intent of this document is to state the conditions under which a Package +may be copied, such that the Copyright Holder maintains some semblance of +artistic control over the development of the package, while giving the users of +the package the right to use and distribute the Package in a more-or-less +customary fashion, plus the right to make reasonable modifications. + +Definitions: + + - "Package" refers to the collection of files distributed by the Copyright + Holder, and derivatives of that collection of files created through + textual modification. + - "Standard Version" refers to such a Package if it has not been modified, + or has been modified in accordance with the wishes of the Copyright + Holder. + - "Copyright Holder" is whoever is named in the copyright or copyrights for + the package. + - "You" is you, if you're thinking about copying or distributing this Package. + - "Reasonable copying fee" is whatever you can justify on the basis of media + cost, duplication charges, time of people involved, and so on. (You will + not be required to justify it to the Copyright Holder, but only to the + computing community at large as a market that must bear the fee.) + - "Freely Available" means that no fee is charged for the item itself, though + there may be fees involved in handling the item. It also means that + recipients of the item may redistribute it under the same conditions they + received it. + +1. You may make and give away verbatim copies of the source form of the +Standard Version of this Package without restriction, provided that you +duplicate all of the original copyright notices and associated disclaimers. + +2. You may apply bug fixes, portability fixes and other modifications derived +from the Public Domain or from the Copyright Holder. A Package modified in such +a way shall still be considered the Standard Version. + +3. You may otherwise modify your copy of this Package in any way, provided that +you insert a prominent notice in each changed file stating how and when you +changed that file, and provided that you do at least ONE of the following: + + a) place your modifications in the Public Domain or otherwise make them + Freely Available, such as by posting said modifications to Usenet or an + equivalent medium, or placing the modifications on a major archive site + such as ftp.uu.net, or by allowing the Copyright Holder to include your + modifications in the Standard Version of the Package. + + b) use the modified Package only within your corporation or organization. + + c) rename any non-standard executables so the names do not conflict with + standard executables, which must also be provided, and provide a separate + manual page for each non-standard executable that clearly documents how it + differs from the Standard Version. + + d) make other distribution arrangements with the Copyright Holder. + +4. You may distribute the programs of this Package in object code or executable +form, provided that you do at least ONE of the following: + + a) distribute a Standard Version of the executables and library files, + together with instructions (in the manual page or equivalent) on where to + get the Standard Version. + + b) accompany the distribution with the machine-readable source of the Package + with your modifications. + + c) accompany any non-standard executables with their corresponding Standard + Version executables, giving the non-standard executables non-standard + names, and clearly documenting the differences in manual pages (or + equivalent), together with instructions on where to get the Standard + Version. + + d) make other distribution arrangements with the Copyright Holder. + +5. You may charge a reasonable copying fee for any distribution of this +Package. You may charge any fee you choose for support of this Package. You +may not charge a fee for this Package itself. However, you may distribute this +Package in aggregate with other (possibly commercial) programs as part of a +larger (possibly commercial) software distribution provided that you do not +advertise this Package as a product of your own. + +6. The scripts and library files supplied as input to or produced as output +from the programs of this Package do not automatically fall under the copyright +of this Package, but belong to whomever generated them, and may be sold +commercially, and may be aggregated with this Package. + +7. C or perl subroutines supplied by you and linked into this Package shall not +be considered part of this Package. + +8. The name of the Copyright Holder may not be used to endorse or promote +products derived from this software without specific prior written permission. + +9. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF +MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + +The End diff --git a/doc/modules/README.List-UtilsBy b/doc/modules/README.List-UtilsBy new file mode 100644 index 000000000..efdceb310 --- /dev/null +++ b/doc/modules/README.List-UtilsBy @@ -0,0 +1,238 @@ +NAME + `List::UtilsBy' - higher-order list utility functions + +SYNOPSIS + use List::UtilsBy qw( nsort_by min_by ); + + use File::stat qw( stat ); + my @files_by_age = nsort_by { stat($_)->mtime } @files; + + my $shortest_name = min_by { length } @names; + +DESCRIPTION + This module provides a number of list utility functions, all of which + take an initial code block to control their behaviour. They are + variations on similar core perl or `List::Util' functions of similar + names, but which use the block to control their behaviour. For example, + the core Perl function `sort' takes a list of values and returns them, + sorted into order by their string value. The `sort_by' function sorts + them according to the string value returned by the extra function, when + given each value. + + my @names_sorted = sort @names; + + my @people_sorted = sort_by { $_->name } @people; + +FUNCTIONS + @vals = sort_by { KEYFUNC } @vals + Returns the list of values sorted according to the string values + returned by the `KEYFUNC' block or function. A typical use of this may + be to sort objects according to the string value of some accessor, such + as + + sort_by { $_->name } @people + + The key function is called in scalar context, being passed each value in + turn as both `$_' and the only argument in the parameters, `@_'. The + values are then sorted according to string comparisons on the values + returned. + + This is equivalent to + + sort { $a->name cmp $b->name } @people + + except that it guarantees the `name' accessor will be executed only once + per value. + + One interesting use-case is to sort strings which may have numbers + embedded in them "naturally", rather than lexically. + + sort_by { s/(\d+)/sprintf "%09d", $1/eg; $_ } @strings + + This sorts strings by generating sort keys which zero-pad the embedded + numbers to some level (9 digits in this case), helping to ensure the + lexical sort puts them in the correct order. + + @vals = nsort_by { KEYFUNC } @vals + Similar to `sort_by' but compares its key values numerically. + + @vals = rev_sort_by { KEYFUNC } @vals + @vals = rev_nsort_by { KEYFUNC } @vals + Similar to `sort_by' and `nsort_by' but returns the list in the reverse + order. Equivalent to + + @vals = reverse sort_by { KEYFUNC } @vals + + except that these functions are slightly more efficient because they + avoid the final `reverse' operation. + + $optimal = max_by { KEYFUNC } @vals + @optimal = max_by { KEYFUNC } @vals + Returns the (first) value from `@vals' that gives the numerically + largest result from the key function. + + my $tallest = max_by { $_->height } @people + + use File::stat qw( stat ); + my $newest = max_by { stat($_)->mtime } @files; + + In scalar context, the first maximal value is returned. In list context, + a list of all the maximal values is returned. This may be used to obtain + positions other than the first, if order is significant. + + If called on an empty list, an empty list is returned. + + For symmetry with the `nsort_by' function, this is also provided under + the name `nmax_by' since it behaves numerically. + + $optimal = min_by { KEYFUNC } @vals + @optimal = min_by { KEYFUNC } @vals + Similar to `max_by' but returns values which give the numerically + smallest result from the key function. Also provided as `nmin_by' + + @vals = uniq_by { KEYFUNC } @vals + Returns a list of the subset of values for which the key function block + returns unique values. The first value yielding a particular key is + chosen, subsequent values are rejected. + + my @some_fruit = uniq_by { $_->colour } @fruit; + + To select instead the last value per key, reverse the input list. If the + order of the results is significant, don't forget to reverse the result + as well: + + my @some_fruit = reverse uniq_by { $_->colour } reverse @fruit; + + %parts = partition_by { KEYFUNC } @vals + Returns a key/value list of ARRAY refs containing all the original + values distributed according to the result of the key function block. + Each value will be an ARRAY ref containing all the values which returned + the string from the key function, in their original order. + + my %balls_by_colour = partition_by { $_->colour } @balls; + + Because the values returned by the key function are used as hash keys, + they ought to either be strings, or at least well-behaved as strings + (such as numbers, or object references which overload stringification in + a suitable manner). + + %counts = count_by { KEYFUNC } @vals + Returns a key/value list of integers, giving the number of times the key + function block returned the key, for each value in the list. + + my %count_of_balls = count_by { $_->colour } @balls; + + Because the values returned by the key function are used as hash keys, + they ought to either be strings, or at least well-behaved as strings + (such as numbers, or object references which overload stringification in + a suitable manner). + + @vals = zip_by { ITEMFUNC } \@arr0, \@arr1, \@arr2,... + Returns a list of each of the values returned by the function block, + when invoked with values from across each each of the given ARRAY + references. Each value in the returned list will be the result of the + function having been invoked with arguments at that position, from + across each of the arrays given. + + my @transposition = zip_by { [ @_ ] } @matrix; + + my @names = zip_by { "$_[1], $_[0]" } \@firstnames, \@surnames; + + print zip_by { "$_[0] => $_[1]\n" } [ keys %hash ], [ values %hash ]; + + If some of the arrays are shorter than others, the function will behave + as if they had `undef' in the trailing positions. The following two + lines are equivalent: + + zip_by { f(@_) } [ 1, 2, 3 ], [ "a", "b" ] + f( 1, "a" ), f( 2, "b" ), f( 3, undef ) + + The item function is called by `map', so if it returns a list, the + entire list is included in the result. This can be useful for example, + for generating a hash from two separate lists of keys and values + + my %nums = zip_by { @_ } [qw( one two three )], [ 1, 2, 3 ]; + # %nums = ( one => 1, two => 2, three => 3 ) + + (A function having this behaviour is sometimes called `zipWith', e.g. in + Haskell, but that name would not fit the naming scheme used by this + module). + + $arr0, $arr1, $arr2, ... = unzip_by { ITEMFUNC } @vals + Returns a list of ARRAY references containing the values returned by the + function block, when invoked for each of the values given in the input + list. Each of the returned ARRAY references will contain the values + returned at that corresponding position by the function block. That is, + the first returned ARRAY reference will contain all the values returned + in the first position by the function block, the second will contain all + the values from the second position, and so on. + + my ( $firstnames, $lastnames ) = unzip_by { m/^(.*?) (.*)$/ } @names; + + If the function returns lists of differing lengths, the result will be + padded with `undef' in the missing elements. + + This function is an inverse of `zip_by', if given a corresponding + inverse function. + + @vals = extract_by { SELECTFUNC } @arr + Removes elements from the referenced array on which the selection + function returns true, and returns a list containing those elements. + This function is similar to `grep', except that it modifies the + referenced array to remove the selected values from it, leaving only the + unselected ones. + + my @red_balls = extract_by { $_->color eq "red" } @balls; + + # Now there are no red balls in the @balls array + + This function modifies a real array, unlike most of the other functions + in this module. Because of this, it requires a real array, not just a + list. + + This function is implemented by invoking `splice()' on the array, not by + constructing a new list and assigning it. One result of this is that + weak references will not be disturbed. + + extract_by { !defined $_ } @refs; + + will leave weak references weakened in the `@refs' array, whereas + + @refs = grep { defined $_ } @refs; + + will strengthen them all again. + + @vals = weighted_shuffle_by { WEIGHTFUNC } @vals + Returns the list of values shuffled into a random order. The + randomisation is not uniform, but weighted by the value returned by the + `WEIGHTFUNC'. The probabilty of each item being returned first will be + distributed with the distribution of the weights, and so on recursively + for the remaining items. + + @vals = bundle_by { BLOCKFUNC } $number, @vals + Similar to a regular `map' functional, returns a list of the values + returned by `BLOCKFUNC'. Values from the input list are given to the + block function in bundles of `$number'. + + If given a list of values whose length does not evenly divide by + `$number', the final call will be passed fewer elements than the others. + +TODO + * XS implementations + These functions are currently all written in pure perl. Some at + least, may benefit from having XS implementations to speed up their + logic. + + * Merge into List::Util or List::MoreUtils + This module shouldn't really exist. The functions should instead be + part of one of the existing modules that already contain many list + utility functions. Having Yet Another List Utilty Module just + worsens the problem. + + I have attempted to contact the authors of both of the above + modules, to no avail; therefore I decided it best to write and + release this code here anyway so that it is at least on CPAN. Once + there, we can then see how best to merge it into an existing module. + +AUTHOR + Paul Evans diff --git a/modules/fallback/List/UtilsBy.pm b/modules/fallback/List/UtilsBy.pm new file mode 100644 index 000000000..d4244f9ee --- /dev/null +++ b/modules/fallback/List/UtilsBy.pm @@ -0,0 +1,529 @@ +# You may distribute under the terms of either the GNU General Public License +# or the Artistic License (the same terms as Perl itself) +# +# (C) Paul Evans, 2009-2012 -- leonerd@leonerd.org.uk + +package List::UtilsBy; + +use strict; +use warnings; + +our $VERSION = '0.09'; + +use Exporter 'import'; + +our @EXPORT_OK = qw( + sort_by + nsort_by + rev_sort_by + rev_nsort_by + + max_by nmax_by + min_by nmin_by + + uniq_by + + partition_by + count_by + + zip_by + unzip_by + + extract_by + + weighted_shuffle_by + + bundle_by +); + +=head1 NAME + +C - higher-order list utility functions + +=head1 SYNOPSIS + + use List::UtilsBy qw( nsort_by min_by ); + + use File::stat qw( stat ); + my @files_by_age = nsort_by { stat($_)->mtime } @files; + + my $shortest_name = min_by { length } @names; + +=head1 DESCRIPTION + +This module provides a number of list utility functions, all of which take an +initial code block to control their behaviour. They are variations on similar +core perl or C functions of similar names, but which use the block +to control their behaviour. For example, the core Perl function C takes +a list of values and returns them, sorted into order by their string value. +The C function sorts them according to the string value returned by +the extra function, when given each value. + + my @names_sorted = sort @names; + + my @people_sorted = sort_by { $_->name } @people; + +=cut + +=head1 FUNCTIONS + +=cut + +=head2 @vals = sort_by { KEYFUNC } @vals + +Returns the list of values sorted according to the string values returned by +the C block or function. A typical use of this may be to sort objects +according to the string value of some accessor, such as + + sort_by { $_->name } @people + +The key function is called in scalar context, being passed each value in turn +as both C<$_> and the only argument in the parameters, C<@_>. The values are +then sorted according to string comparisons on the values returned. + +This is equivalent to + + sort { $a->name cmp $b->name } @people + +except that it guarantees the C accessor will be executed only once per +value. + +One interesting use-case is to sort strings which may have numbers embedded in +them "naturally", rather than lexically. + + sort_by { s/(\d+)/sprintf "%09d", $1/eg; $_ } @strings + +This sorts strings by generating sort keys which zero-pad the embedded numbers +to some level (9 digits in this case), helping to ensure the lexical sort puts +them in the correct order. + +=cut + +sub sort_by(&@) +{ + my $keygen = shift; + + my @keys = map { local $_ = $_; scalar $keygen->( $_ ) } @_; + return @_[ sort { $keys[$a] cmp $keys[$b] } 0 .. $#_ ]; +} + +=head2 @vals = nsort_by { KEYFUNC } @vals + +Similar to C but compares its key values numerically. + +=cut + +sub nsort_by(&@) +{ + my $keygen = shift; + + my @keys = map { local $_ = $_; scalar $keygen->( $_ ) } @_; + return @_[ sort { $keys[$a] <=> $keys[$b] } 0 .. $#_ ]; +} + +=head2 @vals = rev_sort_by { KEYFUNC } @vals + +=head2 @vals = rev_nsort_by { KEYFUNC } @vals + +Similar to C and C but returns the list in the reverse +order. Equivalent to + + @vals = reverse sort_by { KEYFUNC } @vals + +except that these functions are slightly more efficient because they avoid +the final C operation. + +=cut + +sub rev_sort_by(&@) +{ + my $keygen = shift; + + my @keys = map { local $_ = $_; scalar $keygen->( $_ ) } @_; + return @_[ sort { $keys[$b] cmp $keys[$a] } 0 .. $#_ ]; +} + +sub rev_nsort_by(&@) +{ + my $keygen = shift; + + my @keys = map { local $_ = $_; scalar $keygen->( $_ ) } @_; + return @_[ sort { $keys[$b] <=> $keys[$a] } 0 .. $#_ ]; +} + +=head2 $optimal = max_by { KEYFUNC } @vals + +=head2 @optimal = max_by { KEYFUNC } @vals + +Returns the (first) value from C<@vals> that gives the numerically largest +result from the key function. + + my $tallest = max_by { $_->height } @people + + use File::stat qw( stat ); + my $newest = max_by { stat($_)->mtime } @files; + +In scalar context, the first maximal value is returned. In list context, a +list of all the maximal values is returned. This may be used to obtain +positions other than the first, if order is significant. + +If called on an empty list, an empty list is returned. + +For symmetry with the C function, this is also provided under the +name C since it behaves numerically. + +=cut + +sub max_by(&@) +{ + my $code = shift; + + return unless @_; + + local $_; + + my @maximal = $_ = shift @_; + my $max = $code->( $_ ); + + foreach ( @_ ) { + my $this = $code->( $_ ); + if( $this > $max ) { + @maximal = $_; + $max = $this; + } + elsif( wantarray and $this == $max ) { + push @maximal, $_; + } + } + + return wantarray ? @maximal : $maximal[0]; +} + +*nmax_by = \&max_by; + +=head2 $optimal = min_by { KEYFUNC } @vals + +=head2 @optimal = min_by { KEYFUNC } @vals + +Similar to C but returns values which give the numerically smallest +result from the key function. Also provided as C + +=cut + +sub min_by(&@) +{ + my $code = shift; + + return unless @_; + + local $_; + + my @minimal = $_ = shift @_; + my $min = $code->( $_ ); + + foreach ( @_ ) { + my $this = $code->( $_ ); + if( $this < $min ) { + @minimal = $_; + $min = $this; + } + elsif( wantarray and $this == $min ) { + push @minimal, $_; + } + } + + return wantarray ? @minimal : $minimal[0]; +} + +*nmin_by = \&min_by; + +=head2 @vals = uniq_by { KEYFUNC } @vals + +Returns a list of the subset of values for which the key function block +returns unique values. The first value yielding a particular key is chosen, +subsequent values are rejected. + + my @some_fruit = uniq_by { $_->colour } @fruit; + +To select instead the last value per key, reverse the input list. If the order +of the results is significant, don't forget to reverse the result as well: + + my @some_fruit = reverse uniq_by { $_->colour } reverse @fruit; + +=cut + +sub uniq_by(&@) +{ + my $code = shift; + + my %present; + return grep { + my $key = $code->( local $_ = $_ ); + !$present{$key}++ + } @_; +} + +=head2 %parts = partition_by { KEYFUNC } @vals + +Returns a key/value list of ARRAY refs containing all the original values +distributed according to the result of the key function block. Each value will +be an ARRAY ref containing all the values which returned the string from the +key function, in their original order. + + my %balls_by_colour = partition_by { $_->colour } @balls; + +Because the values returned by the key function are used as hash keys, they +ought to either be strings, or at least well-behaved as strings (such as +numbers, or object references which overload stringification in a suitable +manner). + +=cut + +sub partition_by(&@) +{ + my $code = shift; + + my %parts; + push @{ $parts{ $code->( local $_ = $_ ) } }, $_ for @_; + + return %parts; +} + +=head2 %counts = count_by { KEYFUNC } @vals + +Returns a key/value list of integers, giving the number of times the key +function block returned the key, for each value in the list. + + my %count_of_balls = count_by { $_->colour } @balls; + +Because the values returned by the key function are used as hash keys, they +ought to either be strings, or at least well-behaved as strings (such as +numbers, or object references which overload stringification in a suitable +manner). + +=cut + +sub count_by(&@) +{ + my $code = shift; + + my %counts; + $counts{ $code->( local $_ = $_ ) }++ for @_; + + return %counts; +} + +=head2 @vals = zip_by { ITEMFUNC } \@arr0, \@arr1, \@arr2,... + +Returns a list of each of the values returned by the function block, when +invoked with values from across each each of the given ARRAY references. Each +value in the returned list will be the result of the function having been +invoked with arguments at that position, from across each of the arrays given. + + my @transposition = zip_by { [ @_ ] } @matrix; + + my @names = zip_by { "$_[1], $_[0]" } \@firstnames, \@surnames; + + print zip_by { "$_[0] => $_[1]\n" } [ keys %hash ], [ values %hash ]; + +If some of the arrays are shorter than others, the function will behave as if +they had C in the trailing positions. The following two lines are +equivalent: + + zip_by { f(@_) } [ 1, 2, 3 ], [ "a", "b" ] + f( 1, "a" ), f( 2, "b" ), f( 3, undef ) + +The item function is called by C, so if it returns a list, the entire +list is included in the result. This can be useful for example, for generating +a hash from two separate lists of keys and values + + my %nums = zip_by { @_ } [qw( one two three )], [ 1, 2, 3 ]; + # %nums = ( one => 1, two => 2, three => 3 ) + +(A function having this behaviour is sometimes called C, e.g. in +Haskell, but that name would not fit the naming scheme used by this module). + +=cut + +sub zip_by(&@) +{ + my $code = shift; + + @_ or return; + + my $len = 0; + scalar @$_ > $len and $len = scalar @$_ for @_; + + return map { + my $idx = $_; + $code->( map { $_[$_][$idx] } 0 .. $#_ ) + } 0 .. $len-1; +} + +=head2 $arr0, $arr1, $arr2, ... = unzip_by { ITEMFUNC } @vals + +Returns a list of ARRAY references containing the values returned by the +function block, when invoked for each of the values given in the input list. +Each of the returned ARRAY references will contain the values returned at that +corresponding position by the function block. That is, the first returned +ARRAY reference will contain all the values returned in the first position by +the function block, the second will contain all the values from the second +position, and so on. + + my ( $firstnames, $lastnames ) = unzip_by { m/^(.*?) (.*)$/ } @names; + +If the function returns lists of differing lengths, the result will be padded +with C in the missing elements. + +This function is an inverse of C, if given a corresponding inverse +function. + +=cut + +sub unzip_by(&@) +{ + my $code = shift; + + my @ret; + foreach my $idx ( 0 .. $#_ ) { + my @slice = $code->( local $_ = $_[$idx] ); + $#slice = $#ret if @slice < @ret; + $ret[$_][$idx] = $slice[$_] for 0 .. $#slice; + } + + return @ret; +} + +=head2 @vals = extract_by { SELECTFUNC } @arr + +Removes elements from the referenced array on which the selection function +returns true, and returns a list containing those elements. This function is +similar to C, except that it modifies the referenced array to remove the +selected values from it, leaving only the unselected ones. + + my @red_balls = extract_by { $_->color eq "red" } @balls; + + # Now there are no red balls in the @balls array + +This function modifies a real array, unlike most of the other functions in this +module. Because of this, it requires a real array, not just a list. + +This function is implemented by invoking C on the array, not by +constructing a new list and assigning it. One result of this is that weak +references will not be disturbed. + + extract_by { !defined $_ } @refs; + +will leave weak references weakened in the C<@refs> array, whereas + + @refs = grep { defined $_ } @refs; + +will strengthen them all again. + +=cut + +sub extract_by(&\@) +{ + my $code = shift; + my ( $arrref ) = @_; + + my @ret; + for( my $idx = 0; $idx < scalar @$arrref; ) { + if( $code->( local $_ = $arrref->[$idx] ) ) { + push @ret, splice @$arrref, $idx, 1, (); + } + else { + $idx++; + } + } + + return @ret; +} + +=head2 @vals = weighted_shuffle_by { WEIGHTFUNC } @vals + +Returns the list of values shuffled into a random order. The randomisation is +not uniform, but weighted by the value returned by the C. The +probabilty of each item being returned first will be distributed with the +distribution of the weights, and so on recursively for the remaining items. + +=cut + +sub weighted_shuffle_by(&@) +{ + my $code = shift; + my @vals = @_; + + my @weights = map { $code->( local $_ = $_ ) } @vals; + + my @ret; + while( @vals > 1 ) { + my $total = 0; $total += $_ for @weights; + my $select = int rand $total; + my $idx = 0; + while( $select >= $weights[$idx] ) { + $select -= $weights[$idx++]; + } + + push @ret, splice @vals, $idx, 1, (); + splice @weights, $idx, 1, (); + } + + push @ret, @vals if @vals; + + return @ret; +} + +=head2 @vals = bundle_by { BLOCKFUNC } $number, @vals + +Similar to a regular C functional, returns a list of the values returned +by C. Values from the input list are given to the block function in +bundles of C<$number>. + +If given a list of values whose length does not evenly divide by C<$number>, +the final call will be passed fewer elements than the others. + +=cut + +sub bundle_by(&@) +{ + my $code = shift; + my $n = shift; + + my @ret; + for( my ( $pos, $next ) = ( 0, $n ); $pos < @_; $pos = $next, $next += $n ) { + $next = @_ if $next > @_; + push @ret, $code->( @_[$pos .. $next-1] ); + } + return @ret; +} + +=head1 TODO + +=over 4 + +=item * XS implementations + +These functions are currently all written in pure perl. Some at least, may +benefit from having XS implementations to speed up their logic. + +=item * Merge into L or L + +This module shouldn't really exist. The functions should instead be part of +one of the existing modules that already contain many list utility functions. +Having Yet Another List Utilty Module just worsens the problem. + +I have attempted to contact the authors of both of the above modules, to no +avail; therefore I decided it best to write and release this code here anyway +so that it is at least on CPAN. Once there, we can then see how best to merge +it into an existing module. + +=back + +=head1 AUTHOR + +Paul Evans + +=cut + +0x55AA; -- 2.20.1