mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
snapraid
This commit is contained in:
parent
cc6479303f
commit
c416528eaa
2
Makefile
2
Makefile
@ -4,7 +4,7 @@ INSTALL=install
|
||||
CFLAGS+=-std=gnu89 -O2 -g -MMD -Wall \
|
||||
-Wno-pointer-sign \
|
||||
-fno-strict-aliasing \
|
||||
-I. -Iinclude \
|
||||
-I. -Iinclude -Iraid \
|
||||
-D_FILE_OFFSET_BITS=64 \
|
||||
-D_GNU_SOURCE \
|
||||
-D_LGPL_SOURCE \
|
||||
|
@ -21,6 +21,8 @@
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <raid/raid.h>
|
||||
|
||||
#include "cmds.h"
|
||||
|
||||
static void usage(void)
|
||||
@ -141,6 +143,8 @@ static int data_cmds(int argc, char *argv[])
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
raid_init();
|
||||
|
||||
full_cmd = argv[0];
|
||||
|
||||
setvbuf(stdout, NULL, _IOLBF, 0);
|
||||
|
339
raid/COPYING
Normal file
339
raid/COPYING
Normal file
@ -0,0 +1,339 @@
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 2, June 1991
|
||||
|
||||
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
|
||||
675 Mass Ave, Cambridge, MA 02139, USA
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The licenses for most software are designed to take away your
|
||||
freedom to share and change it. By contrast, the GNU General Public
|
||||
License is intended to guarantee your freedom to share and change free
|
||||
software--to make sure the software is free for all its users. This
|
||||
General Public License applies to most of the Free Software
|
||||
Foundation's software and to any other program whose authors commit to
|
||||
using it. (Some other Free Software Foundation software is covered by
|
||||
the GNU Library General Public License instead.) You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
this service if you wish), that you receive source code or can get it
|
||||
if you want it, that you can change the software or use pieces of it
|
||||
in new free programs; and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to make restrictions that forbid
|
||||
anyone to deny you these rights or to ask you to surrender the rights.
|
||||
These restrictions translate to certain responsibilities for you if you
|
||||
distribute copies of the software, or if you modify it.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must give the recipients all the rights that
|
||||
you have. You must make sure that they, too, receive or can get the
|
||||
source code. And you must show them these terms so they know their
|
||||
rights.
|
||||
|
||||
We protect your rights with two steps: (1) copyright the software, and
|
||||
(2) offer you this license which gives you legal permission to copy,
|
||||
distribute and/or modify the software.
|
||||
|
||||
Also, for each author's protection and ours, we want to make certain
|
||||
that everyone understands that there is no warranty for this free
|
||||
software. If the software is modified by someone else and passed on, we
|
||||
want its recipients to know that what they have is not the original, so
|
||||
that any problems introduced by others will not reflect on the original
|
||||
authors' reputations.
|
||||
|
||||
Finally, any free program is threatened constantly by software
|
||||
patents. We wish to avoid the danger that redistributors of a free
|
||||
program will individually obtain patent licenses, in effect making the
|
||||
program proprietary. To prevent this, we have made it clear that any
|
||||
patent must be licensed for everyone's free use or not licensed at all.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. This License applies to any program or other work which contains
|
||||
a notice placed by the copyright holder saying it may be distributed
|
||||
under the terms of this General Public License. The "Program", below,
|
||||
refers to any such program or work, and a "work based on the Program"
|
||||
means either the Program or any derivative work under copyright law:
|
||||
that is to say, a work containing the Program or a portion of it,
|
||||
either verbatim or with modifications and/or translated into another
|
||||
language. (Hereinafter, translation is included without limitation in
|
||||
the term "modification".) Each licensee is addressed as "you".
|
||||
|
||||
Activities other than copying, distribution and modification are not
|
||||
covered by this License; they are outside its scope. The act of
|
||||
running the Program is not restricted, and the output from the Program
|
||||
is covered only if its contents constitute a work based on the
|
||||
Program (independent of having been made by running the Program).
|
||||
Whether that is true depends on what the Program does.
|
||||
|
||||
1. You may copy and distribute verbatim copies of the Program's
|
||||
source code as you receive it, in any medium, provided that you
|
||||
conspicuously and appropriately publish on each copy an appropriate
|
||||
copyright notice and disclaimer of warranty; keep intact all the
|
||||
notices that refer to this License and to the absence of any warranty;
|
||||
and give any other recipients of the Program a copy of this License
|
||||
along with the Program.
|
||||
|
||||
You may charge a fee for the physical act of transferring a copy, and
|
||||
you may at your option offer warranty protection in exchange for a fee.
|
||||
|
||||
2. You may modify your copy or copies of the Program or any portion
|
||||
of it, thus forming a work based on the Program, and copy and
|
||||
distribute such modifications or work under the terms of Section 1
|
||||
above, provided that you also meet all of these conditions:
|
||||
|
||||
a) You must cause the modified files to carry prominent notices
|
||||
stating that you changed the files and the date of any change.
|
||||
|
||||
b) You must cause any work that you distribute or publish, that in
|
||||
whole or in part contains or is derived from the Program or any
|
||||
part thereof, to be licensed as a whole at no charge to all third
|
||||
parties under the terms of this License.
|
||||
|
||||
c) If the modified program normally reads commands interactively
|
||||
when run, you must cause it, when started running for such
|
||||
interactive use in the most ordinary way, to print or display an
|
||||
announcement including an appropriate copyright notice and a
|
||||
notice that there is no warranty (or else, saying that you provide
|
||||
a warranty) and that users may redistribute the program under
|
||||
these conditions, and telling the user how to view a copy of this
|
||||
License. (Exception: if the Program itself is interactive but
|
||||
does not normally print such an announcement, your work based on
|
||||
the Program is not required to print an announcement.)
|
||||
|
||||
These requirements apply to the modified work as a whole. If
|
||||
identifiable sections of that work are not derived from the Program,
|
||||
and can be reasonably considered independent and separate works in
|
||||
themselves, then this License, and its terms, do not apply to those
|
||||
sections when you distribute them as separate works. But when you
|
||||
distribute the same sections as part of a whole which is a work based
|
||||
on the Program, the distribution of the whole must be on the terms of
|
||||
this License, whose permissions for other licensees extend to the
|
||||
entire whole, and thus to each and every part regardless of who wrote it.
|
||||
|
||||
Thus, it is not the intent of this section to claim rights or contest
|
||||
your rights to work written entirely by you; rather, the intent is to
|
||||
exercise the right to control the distribution of derivative or
|
||||
collective works based on the Program.
|
||||
|
||||
In addition, mere aggregation of another work not based on the Program
|
||||
with the Program (or with a work based on the Program) on a volume of
|
||||
a storage or distribution medium does not bring the other work under
|
||||
the scope of this License.
|
||||
|
||||
3. You may copy and distribute the Program (or a work based on it,
|
||||
under Section 2) in object code or executable form under the terms of
|
||||
Sections 1 and 2 above provided that you also do one of the following:
|
||||
|
||||
a) Accompany it with the complete corresponding machine-readable
|
||||
source code, which must be distributed under the terms of Sections
|
||||
1 and 2 above on a medium customarily used for software interchange; or,
|
||||
|
||||
b) Accompany it with a written offer, valid for at least three
|
||||
years, to give any third party, for a charge no more than your
|
||||
cost of physically performing source distribution, a complete
|
||||
machine-readable copy of the corresponding source code, to be
|
||||
distributed under the terms of Sections 1 and 2 above on a medium
|
||||
customarily used for software interchange; or,
|
||||
|
||||
c) Accompany it with the information you received as to the offer
|
||||
to distribute corresponding source code. (This alternative is
|
||||
allowed only for noncommercial distribution and only if you
|
||||
received the program in object code or executable form with such
|
||||
an offer, in accord with Subsection b above.)
|
||||
|
||||
The source code for a work means the preferred form of the work for
|
||||
making modifications to it. For an executable work, complete source
|
||||
code means all the source code for all modules it contains, plus any
|
||||
associated interface definition files, plus the scripts used to
|
||||
control compilation and installation of the executable. However, as a
|
||||
special exception, the source code distributed need not include
|
||||
anything that is normally distributed (in either source or binary
|
||||
form) with the major components (compiler, kernel, and so on) of the
|
||||
operating system on which the executable runs, unless that component
|
||||
itself accompanies the executable.
|
||||
|
||||
If distribution of executable or object code is made by offering
|
||||
access to copy from a designated place, then offering equivalent
|
||||
access to copy the source code from the same place counts as
|
||||
distribution of the source code, even though third parties are not
|
||||
compelled to copy the source along with the object code.
|
||||
|
||||
4. You may not copy, modify, sublicense, or distribute the Program
|
||||
except as expressly provided under this License. Any attempt
|
||||
otherwise to copy, modify, sublicense or distribute the Program is
|
||||
void, and will automatically terminate your rights under this License.
|
||||
However, parties who have received copies, or rights, from you under
|
||||
this License will not have their licenses terminated so long as such
|
||||
parties remain in full compliance.
|
||||
|
||||
5. You are not required to accept this License, since you have not
|
||||
signed it. However, nothing else grants you permission to modify or
|
||||
distribute the Program or its derivative works. These actions are
|
||||
prohibited by law if you do not accept this License. Therefore, by
|
||||
modifying or distributing the Program (or any work based on the
|
||||
Program), you indicate your acceptance of this License to do so, and
|
||||
all its terms and conditions for copying, distributing or modifying
|
||||
the Program or works based on it.
|
||||
|
||||
6. Each time you redistribute the Program (or any work based on the
|
||||
Program), the recipient automatically receives a license from the
|
||||
original licensor to copy, distribute or modify the Program subject to
|
||||
these terms and conditions. You may not impose any further
|
||||
restrictions on the recipients' exercise of the rights granted herein.
|
||||
You are not responsible for enforcing compliance by third parties to
|
||||
this License.
|
||||
|
||||
7. If, as a consequence of a court judgment or allegation of patent
|
||||
infringement or for any other reason (not limited to patent issues),
|
||||
conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot
|
||||
distribute so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you
|
||||
may not distribute the Program at all. For example, if a patent
|
||||
license would not permit royalty-free redistribution of the Program by
|
||||
all those who receive copies directly or indirectly through you, then
|
||||
the only way you could satisfy both it and this License would be to
|
||||
refrain entirely from distribution of the Program.
|
||||
|
||||
If any portion of this section is held invalid or unenforceable under
|
||||
any particular circumstance, the balance of the section is intended to
|
||||
apply and the section as a whole is intended to apply in other
|
||||
circumstances.
|
||||
|
||||
It is not the purpose of this section to induce you to infringe any
|
||||
patents or other property right claims or to contest validity of any
|
||||
such claims; this section has the sole purpose of protecting the
|
||||
integrity of the free software distribution system, which is
|
||||
implemented by public license practices. Many people have made
|
||||
generous contributions to the wide range of software distributed
|
||||
through that system in reliance on consistent application of that
|
||||
system; it is up to the author/donor to decide if he or she is willing
|
||||
to distribute software through any other system and a licensee cannot
|
||||
impose that choice.
|
||||
|
||||
This section is intended to make thoroughly clear what is believed to
|
||||
be a consequence of the rest of this License.
|
||||
|
||||
8. If the distribution and/or use of the Program is restricted in
|
||||
certain countries either by patents or by copyrighted interfaces, the
|
||||
original copyright holder who places the Program under this License
|
||||
may add an explicit geographical distribution limitation excluding
|
||||
those countries, so that distribution is permitted only in or among
|
||||
countries not thus excluded. In such case, this License incorporates
|
||||
the limitation as if written in the body of this License.
|
||||
|
||||
9. The Free Software Foundation may publish revised and/or new versions
|
||||
of the General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the Program
|
||||
specifies a version number of this License which applies to it and "any
|
||||
later version", you have the option of following the terms and conditions
|
||||
either of that version or of any later version published by the Free
|
||||
Software Foundation. If the Program does not specify a version number of
|
||||
this License, you may choose any version ever published by the Free Software
|
||||
Foundation.
|
||||
|
||||
10. If you wish to incorporate parts of the Program into other free
|
||||
programs whose distribution conditions are different, write to the author
|
||||
to ask for permission. For software which is copyrighted by the Free
|
||||
Software Foundation, write to the Free Software Foundation; we sometimes
|
||||
make exceptions for this. Our decision will be guided by the two goals
|
||||
of preserving the free status of all derivatives of our free software and
|
||||
of promoting the sharing and reuse of software generally.
|
||||
|
||||
NO WARRANTY
|
||||
|
||||
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
||||
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
||||
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
||||
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
|
||||
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
|
||||
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
|
||||
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
|
||||
REPAIR OR CORRECTION.
|
||||
|
||||
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
||||
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
|
||||
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
|
||||
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
|
||||
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
|
||||
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
|
||||
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGES.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Appendix: How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
convey the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) 19yy <name of author>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program is interactive, make it output a short notice like this
|
||||
when it starts in an interactive mode:
|
||||
|
||||
Gnomovision version 69, Copyright (C) 19yy name of author
|
||||
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, the commands you use may
|
||||
be called something other than `show w' and `show c'; they could even be
|
||||
mouse-clicks or menu items--whatever suits your program.
|
||||
|
||||
You should also get your employer (if you work as a programmer) or your
|
||||
school, if any, to sign a "copyright disclaimer" for the program, if
|
||||
necessary. Here is a sample; alter the names:
|
||||
|
||||
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
|
||||
`Gnomovision' (which makes passes at compilers) written by James Hacker.
|
||||
|
||||
<signature of Ty Coon>, 1 April 1989
|
||||
Ty Coon, President of Vice
|
||||
|
||||
This General Public License does not permit incorporating your program into
|
||||
proprietary programs. If your program is a subroutine library, you may
|
||||
consider it more useful to permit linking proprietary applications with the
|
||||
library. If this is what you want to do, use the GNU Library General
|
||||
Public License instead of this License.
|
185
raid/check.c
Normal file
185
raid/check.c
Normal file
@ -0,0 +1,185 @@
|
||||
/*
|
||||
* Copyright (C) 2015 Andrea Mazzoleni
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include "internal.h"
|
||||
#include "combo.h"
|
||||
#include "gf.h"
|
||||
|
||||
/**
|
||||
* Validate the provided failed blocks.
|
||||
*
|
||||
* This function checks if the specified failed blocks satisfy the redundancy
|
||||
* information using the data from the known valid parity blocks.
|
||||
*
|
||||
* It's similar at raid_check(), just with a different format for arguments.
|
||||
*
|
||||
* The number of failed blocks @nr must be strictly less than the number of
|
||||
* parities @nv, because you need one more parity to validate the recovering.
|
||||
*
|
||||
* No data or parity blocks are modified.
|
||||
*
|
||||
* @nr Number of failed data blocks.
|
||||
* @id[] Vector of @nr indexes of the failed data blocks.
|
||||
* The indexes start from 0. They must be in order.
|
||||
* @nv Number of valid parity blocks.
|
||||
* @ip[] Vector of @nv indexes of the valid parity blocks.
|
||||
* The indexes start from 0. They must be in order.
|
||||
* @nd Number of data blocks.
|
||||
* @size Size of the blocks pointed by @v. It must be a multipler of 64.
|
||||
* @v Vector of pointers to the blocks of data and parity.
|
||||
* It has (@nd + @ip[@nv - 1] + 1) elements. The starting elements are the
|
||||
* blocks for data, following with the parity blocks.
|
||||
* Each block has @size bytes.
|
||||
* @return 0 if the check is satisfied. -1 otherwise.
|
||||
*/
|
||||
static int raid_validate(int nr, int *id, int nv, int *ip, int nd, size_t size, void **vv)
|
||||
{
|
||||
uint8_t **v = (uint8_t **)vv;
|
||||
const uint8_t *T[RAID_PARITY_MAX][RAID_PARITY_MAX];
|
||||
uint8_t G[RAID_PARITY_MAX * RAID_PARITY_MAX];
|
||||
uint8_t V[RAID_PARITY_MAX * RAID_PARITY_MAX];
|
||||
size_t i;
|
||||
int j, k, l;
|
||||
|
||||
BUG_ON(nr >= nv);
|
||||
|
||||
/* setup the coefficients matrix */
|
||||
for (j = 0; j < nr; ++j)
|
||||
for (k = 0; k < nr; ++k)
|
||||
G[j * nr + k] = A(ip[j], id[k]);
|
||||
|
||||
/* invert it to solve the system of linear equations */
|
||||
raid_invert(G, V, nr);
|
||||
|
||||
/* get multiplication tables */
|
||||
for (j = 0; j < nr; ++j)
|
||||
for (k = 0; k < nr; ++k)
|
||||
T[j][k] = table(V[j * nr + k]);
|
||||
|
||||
/* check all positions */
|
||||
for (i = 0; i < size; ++i) {
|
||||
uint8_t p[RAID_PARITY_MAX];
|
||||
|
||||
/* get parity */
|
||||
for (j = 0; j < nv; ++j)
|
||||
p[j] = v[nd + ip[j]][i];
|
||||
|
||||
/* compute delta parity, skipping broken disks */
|
||||
for (j = 0, k = 0; j < nd; ++j) {
|
||||
uint8_t b;
|
||||
|
||||
/* skip broken disks */
|
||||
if (k < nr && id[k] == j) {
|
||||
++k;
|
||||
continue;
|
||||
}
|
||||
|
||||
b = v[j][i];
|
||||
for (l = 0; l < nv; ++l)
|
||||
p[l] ^= gfmul[b][gfgen[ip[l]][j]];
|
||||
}
|
||||
|
||||
/* reconstruct data */
|
||||
for (j = 0; j < nr; ++j) {
|
||||
uint8_t b = 0;
|
||||
int idj = id[j];
|
||||
|
||||
/* recompute the data */
|
||||
for (k = 0; k < nr; ++k)
|
||||
b ^= T[j][k][p[k]];
|
||||
|
||||
/* add the parity contribution of the reconstructed data */
|
||||
for (l = nr; l < nv; ++l)
|
||||
p[l] ^= gfmul[b][gfgen[ip[l]][idj]];
|
||||
}
|
||||
|
||||
/* check that the final parity is 0 */
|
||||
for (l = nr; l < nv; ++l)
|
||||
if (p[l] != 0)
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int raid_check(int nr, int *ir, int nd, int np, size_t size, void **v)
|
||||
{
|
||||
/* valid parity index */
|
||||
int ip[RAID_PARITY_MAX];
|
||||
int vp;
|
||||
int rd;
|
||||
int i, j;
|
||||
|
||||
/* enforce limit on size */
|
||||
BUG_ON(size % 64 != 0);
|
||||
|
||||
/* enforce limit on number of failures */
|
||||
BUG_ON(nr >= np); /* >= because we check with extra parity */
|
||||
BUG_ON(np > RAID_PARITY_MAX);
|
||||
|
||||
/* enforce order in index vector */
|
||||
BUG_ON(nr >= 2 && ir[0] >= ir[1]);
|
||||
BUG_ON(nr >= 3 && ir[1] >= ir[2]);
|
||||
BUG_ON(nr >= 4 && ir[2] >= ir[3]);
|
||||
BUG_ON(nr >= 5 && ir[3] >= ir[4]);
|
||||
BUG_ON(nr >= 6 && ir[4] >= ir[5]);
|
||||
|
||||
/* enforce limit on index vector */
|
||||
BUG_ON(nr > 0 && ir[nr-1] >= nd + np);
|
||||
|
||||
/* count failed data disk */
|
||||
rd = 0;
|
||||
while (rd < nr && ir[rd] < nd)
|
||||
++rd;
|
||||
|
||||
/* put valid parities into ip[] */
|
||||
vp = 0;
|
||||
for (i = rd, j = 0; j < np; ++j) {
|
||||
/* if parity is failed */
|
||||
if (i < nr && ir[i] == nd + j) {
|
||||
/* skip broken parity */
|
||||
++i;
|
||||
} else {
|
||||
/* store valid parity */
|
||||
ip[vp] = j;
|
||||
++vp;
|
||||
}
|
||||
}
|
||||
|
||||
return raid_validate(rd, ir, vp, ip, nd, size, v);
|
||||
}
|
||||
|
||||
int raid_scan(int *ir, int nd, int np, size_t size, void **v)
|
||||
{
|
||||
int r;
|
||||
|
||||
/* check the special case of no failure */
|
||||
if (np != 0 && raid_check(0, 0, nd, np, size, v) == 0)
|
||||
return 0;
|
||||
|
||||
/* for each number of possible failures */
|
||||
for (r = 1; r < np; ++r) {
|
||||
/* try all combinations of r failures on n disks */
|
||||
combination_first(r, nd + np, ir);
|
||||
do {
|
||||
/* verify if the combination is a valid one */
|
||||
if (raid_check(r, ir, nd, np, size, v) == 0)
|
||||
return r;
|
||||
} while (combination_next(r, nd + np, ir));
|
||||
}
|
||||
|
||||
/* no solution found */
|
||||
return -1;
|
||||
}
|
||||
|
155
raid/combo.h
Normal file
155
raid/combo.h
Normal file
@ -0,0 +1,155 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Andrea Mazzoleni
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#ifndef __RAID_COMBO_H
|
||||
#define __RAID_COMBO_H
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
/**
|
||||
* Get the first permutation with repetition of r of n elements.
|
||||
*
|
||||
* Typical use is with permutation_next() in the form :
|
||||
*
|
||||
* int i[R];
|
||||
* permutation_first(R, N, i);
|
||||
* do {
|
||||
* code using i[0], i[1], ..., i[R-1]
|
||||
* } while (permutation_next(R, N, i));
|
||||
*
|
||||
* It's equivalent at the code :
|
||||
*
|
||||
* for(i[0]=0;i[0]<N;++i[0])
|
||||
* for(i[1]=0;i[1]<N;++i[1])
|
||||
* ...
|
||||
* for(i[R-2]=0;i[R-2]<N;++i[R-2])
|
||||
* for(i[R-1]=0;i[R-1]<N;++i[R-1])
|
||||
* code using i[0], i[1], ..., i[R-1]
|
||||
*/
|
||||
static __always_inline void permutation_first(int r, int n, int *c)
|
||||
{
|
||||
int i;
|
||||
|
||||
(void)n; /* unused, but kept for clarity */
|
||||
assert(0 < r && r <= n);
|
||||
|
||||
for (i = 0; i < r; ++i)
|
||||
c[i] = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the next permutation with repetition of r of n elements.
|
||||
* Return ==0 when finished.
|
||||
*/
|
||||
static __always_inline int permutation_next(int r, int n, int *c)
|
||||
{
|
||||
int i = r - 1; /* present position */
|
||||
|
||||
recurse:
|
||||
/* next element at position i */
|
||||
++c[i];
|
||||
|
||||
/* if the position has reached the max */
|
||||
if (c[i] >= n) {
|
||||
|
||||
/* if we are at the first level, we have finished */
|
||||
if (i == 0)
|
||||
return 0;
|
||||
|
||||
/* increase the previous position */
|
||||
--i;
|
||||
goto recurse;
|
||||
}
|
||||
|
||||
++i;
|
||||
|
||||
/* initialize all the next positions, if any */
|
||||
while (i < r) {
|
||||
c[i] = 0;
|
||||
++i;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the first combination without repetition of r of n elements.
|
||||
*
|
||||
* Typical use is with combination_next() in the form :
|
||||
*
|
||||
* int i[R];
|
||||
* combination_first(R, N, i);
|
||||
* do {
|
||||
* code using i[0], i[1], ..., i[R-1]
|
||||
* } while (combination_next(R, N, i));
|
||||
*
|
||||
* It's equivalent at the code :
|
||||
*
|
||||
* for(i[0]=0;i[0]<N-(R-1);++i[0])
|
||||
* for(i[1]=i[0]+1;i[1]<N-(R-2);++i[1])
|
||||
* ...
|
||||
* for(i[R-2]=i[R-3]+1;i[R-2]<N-1;++i[R-2])
|
||||
* for(i[R-1]=i[R-2]+1;i[R-1]<N;++i[R-1])
|
||||
* code using i[0], i[1], ..., i[R-1]
|
||||
*/
|
||||
static __always_inline void combination_first(int r, int n, int *c)
|
||||
{
|
||||
int i;
|
||||
|
||||
(void)n; /* unused, but kept for clarity */
|
||||
assert(0 < r && r <= n);
|
||||
|
||||
for (i = 0; i < r; ++i)
|
||||
c[i] = i;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the next combination without repetition of r of n elements.
|
||||
* Return ==0 when finished.
|
||||
*/
|
||||
static __always_inline int combination_next(int r, int n, int *c)
|
||||
{
|
||||
int i = r - 1; /* present position */
|
||||
int h = n; /* high limit for this position */
|
||||
|
||||
recurse:
|
||||
/* next element at position i */
|
||||
++c[i];
|
||||
|
||||
/* if the position has reached the max */
|
||||
if (c[i] >= h) {
|
||||
|
||||
/* if we are at the first level, we have finished */
|
||||
if (i == 0)
|
||||
return 0;
|
||||
|
||||
/* increase the previous position */
|
||||
--i;
|
||||
--h;
|
||||
goto recurse;
|
||||
}
|
||||
|
||||
++i;
|
||||
|
||||
/* initialize all the next positions, if any */
|
||||
while (i < r) {
|
||||
/* each position start at the next value of the previous one */
|
||||
c[i] = c[i - 1] + 1;
|
||||
++i;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
331
raid/cpu.h
Normal file
331
raid/cpu.h
Normal file
@ -0,0 +1,331 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Andrea Mazzoleni
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#ifndef __RAID_CPU_H
|
||||
#define __RAID_CPU_H
|
||||
|
||||
#ifdef CONFIG_X86
|
||||
|
||||
static inline void raid_cpuid(uint32_t func_eax, uint32_t sub_ecx, uint32_t *reg)
|
||||
{
|
||||
asm volatile (
|
||||
#if defined(__i386__) && defined(__PIC__)
|
||||
/* allow compilation in PIC mode saving ebx */
|
||||
"xchgl %%ebx, %1\n"
|
||||
"cpuid\n"
|
||||
"xchgl %%ebx, %1\n"
|
||||
: "=a" (reg[0]), "=r" (reg[1]), "=c" (reg[2]), "=d" (reg[3])
|
||||
: "0" (func_eax), "2" (sub_ecx)
|
||||
#else
|
||||
"cpuid\n"
|
||||
: "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3])
|
||||
: "0" (func_eax), "2" (sub_ecx)
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
static inline void raid_xgetbv(uint32_t* reg)
|
||||
{
|
||||
/* get the value of the Extended Control Register ecx=0 */
|
||||
asm volatile (
|
||||
/* uses a direct encoding of the XGETBV instruction as only recent */
|
||||
/* assemblers support it. */
|
||||
/* the next line is equivalent at: "xgetbv\n" */
|
||||
".byte 0x0f, 0x01, 0xd0\n"
|
||||
: "=a" (reg[0]), "=d" (reg[3])
|
||||
: "c" (0)
|
||||
);
|
||||
}
|
||||
|
||||
#define CPU_VENDOR_MAX 13
|
||||
|
||||
static inline void raid_cpu_info(char *vendor, unsigned *family, unsigned *model)
|
||||
{
|
||||
uint32_t reg[4];
|
||||
unsigned f, ef, m, em;
|
||||
|
||||
raid_cpuid(0, 0, reg);
|
||||
|
||||
((uint32_t*)vendor)[0] = reg[1];
|
||||
((uint32_t*)vendor)[1] = reg[3];
|
||||
((uint32_t*)vendor)[2] = reg[2];
|
||||
vendor[12] = 0;
|
||||
|
||||
raid_cpuid(1, 0, reg);
|
||||
|
||||
f = (reg[0] >> 8) & 0xF;
|
||||
ef = (reg[0] >> 20) & 0xFF;
|
||||
m = (reg[0] >> 4) & 0xF;
|
||||
em = (reg[0] >> 16) & 0xF;
|
||||
|
||||
if (strcmp(vendor, "AuthenticAMD") == 0) {
|
||||
if (f < 15) {
|
||||
*family = f;
|
||||
*model = m;
|
||||
} else {
|
||||
*family = f + ef;
|
||||
*model = m + (em << 4);
|
||||
}
|
||||
} else {
|
||||
*family = f + ef;
|
||||
*model = m + (em << 4);
|
||||
}
|
||||
}
|
||||
|
||||
static inline int raid_cpu_match_sse(uint32_t cpuid_1_ecx, uint32_t cpuid_1_edx)
|
||||
{
|
||||
uint32_t reg[4];
|
||||
|
||||
raid_cpuid(1, 0, reg);
|
||||
if ((reg[2] & cpuid_1_ecx) != cpuid_1_ecx)
|
||||
return 0;
|
||||
if ((reg[3] & cpuid_1_edx) != cpuid_1_edx)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline int raid_cpu_match_avx(uint32_t cpuid_1_ecx, uint32_t cpuid_7_ebx, uint32_t xcr0)
|
||||
{
|
||||
uint32_t reg[4];
|
||||
|
||||
raid_cpuid(1, 0, reg);
|
||||
if ((reg[2] & cpuid_1_ecx) != cpuid_1_ecx)
|
||||
return 0;
|
||||
|
||||
raid_xgetbv(reg);
|
||||
if ((reg[0] & xcr0) != xcr0)
|
||||
return 0;
|
||||
|
||||
raid_cpuid(7, 0, reg);
|
||||
if ((reg[1] & cpuid_7_ebx) != cpuid_7_ebx)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline int raid_cpu_has_sse2(void)
|
||||
{
|
||||
/*
|
||||
* Intel® 64 and IA-32 Architectures Software Developer's Manual
|
||||
* 325462-048US September 2013
|
||||
*
|
||||
* 11.6.2 Checking for SSE/SSE2 Support
|
||||
* Before an application attempts to use the SSE and/or SSE2 extensions, it should check
|
||||
* that they are present on the processor:
|
||||
* 1. Check that the processor supports the CPUID instruction. Bit 21 of the EFLAGS
|
||||
* register can be used to check processor's support the CPUID instruction.
|
||||
* 2. Check that the processor supports the SSE and/or SSE2 extensions (true if
|
||||
* CPUID.01H:EDX.SSE[bit 25] = 1 and/or CPUID.01H:EDX.SSE2[bit 26] = 1).
|
||||
*/
|
||||
return raid_cpu_match_sse(
|
||||
0,
|
||||
1 << 26); /* SSE2 */
|
||||
}
|
||||
|
||||
static inline int raid_cpu_has_ssse3(void)
|
||||
{
|
||||
/*
|
||||
* Intel® 64 and IA-32 Architectures Software Developer's Manual
|
||||
* 325462-048US September 2013
|
||||
*
|
||||
* 12.7.2 Checking for SSSE3 Support
|
||||
* Before an application attempts to use the SSSE3 extensions, the application should
|
||||
* follow the steps illustrated in Section 11.6.2, "Checking for SSE/SSE2 Support."
|
||||
* Next, use the additional step provided below:
|
||||
* Check that the processor supports SSSE3 (if CPUID.01H:ECX.SSSE3[bit 9] = 1).
|
||||
*/
|
||||
return raid_cpu_match_sse(
|
||||
1 << 9, /* SSSE3 */
|
||||
1 << 26); /* SSE2 */
|
||||
}
|
||||
|
||||
static inline int raid_cpu_has_crc32(void)
|
||||
{
|
||||
/*
|
||||
* Intel® 64 and IA-32 Architectures Software Developer's Manual
|
||||
* 325462-048US September 2013
|
||||
*
|
||||
* 12.12.3 Checking for SSE4.2 Support
|
||||
* ...
|
||||
* Before an application attempts to use the CRC32 instruction, it must check
|
||||
* that the processor supports SSE4.2 (if CPUID.01H:ECX.SSE4_2[bit 20] = 1).
|
||||
*/
|
||||
return raid_cpu_match_sse(
|
||||
1 << 20, /* CRC32 */
|
||||
0);
|
||||
}
|
||||
|
||||
static inline int raid_cpu_has_avx2(void)
|
||||
{
|
||||
/*
|
||||
* Intel Architecture Instruction Set Extensions Programming Reference
|
||||
* 319433-022 October 2014
|
||||
*
|
||||
* 14.3 Detection of AVX instructions
|
||||
* 1) Detect CPUID.1:ECX.OSXSAVE[bit 27] = 1 (XGETBV enabled for application use1)
|
||||
* 2) Issue XGETBV and verify that XCR0[2:1] = `11b' (XMM state and YMM state are enabled by OS).
|
||||
* 3) detect CPUID.1:ECX.AVX[bit 28] = 1 (AVX instructions supported).
|
||||
* (Step 3 can be done in any order relative to 1 and 2)
|
||||
*
|
||||
* 14.7.1 Detection of AVX2
|
||||
* Hardware support for AVX2 is indicated by CPUID.(EAX=07H, ECX=0H):EBX.AVX2[bit 5]=1.
|
||||
* Application Software must identify that hardware supports AVX, after that it must
|
||||
* also detect support for AVX2 by checking CPUID.(EAX=07H, ECX=0H):EBX.AVX2[bit 5].
|
||||
*/
|
||||
return raid_cpu_match_avx(
|
||||
(1 << 27) | (1 << 28), /* OSXSAVE and AVX */
|
||||
1 << 5, /* AVX2 */
|
||||
3 << 1); /* OS saves XMM and YMM registers */
|
||||
}
|
||||
|
||||
static inline int raid_cpu_has_avx512bw(void)
|
||||
{
|
||||
/*
|
||||
* Intel Architecture Instruction Set Extensions Programming Reference
|
||||
* 319433-022 October 2014
|
||||
*
|
||||
* 2.2 Detection of 512-bit Instruction Groups of Intel AVX-512 Family
|
||||
* 1) Detect CPUID.1:ECX.OSXSAVE[bit 27] = 1 (XGETBV enabled for application use)
|
||||
* 2) Execute XGETBV and verify that XCR0[7:5] = `111b' (OPMASK state, upper 256-bit of
|
||||
* ZMM0-ZMM15 and ZMM16-ZMM31 state are enabled by OS) and that XCR0[2:1] = `11b'
|
||||
* (XMM state and YMM state are enabled by OS).
|
||||
* 3) Verify both CPUID.0x7.0:EBX.AVX512F[bit 16] = 1, CPUID.0x7.0:EBX.AVX512BW[bit 30] = 1.
|
||||
*/
|
||||
|
||||
/* note that intentionally we don't check for AVX and AVX2 */
|
||||
/* because the documentation doesn't require that */
|
||||
return raid_cpu_match_avx(
|
||||
1 << 27, /* XSAVE/XGETBV */
|
||||
(1 << 16) | (1 << 30), /* AVX512F and AVX512BW */
|
||||
(3 << 1) | (7 << 5)); /* OS saves XMM, YMM and ZMM registers */
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if it's an Intel Atom CPU.
|
||||
*/
|
||||
static inline int raid_cpu_is_atom(unsigned family, unsigned model)
|
||||
{
|
||||
if (family != 6)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* x86 Architecture CPUID
|
||||
* http://www.sandpile.org/x86/cpuid.htm
|
||||
*
|
||||
* Intel Atom
|
||||
* 1C (28) Atom (45 nm) with 512 KB on-die L2
|
||||
* 26 (38) Atom (45 nm) with 512 KB on-die L2
|
||||
* 36 (54) Atom (32 nm) with 512 KB on-die L2
|
||||
* 27 (39) Atom (32 nm) with 512 KB on-die L2
|
||||
* 35 (53) Atom (?? nm) with ??? KB on-die L2
|
||||
* 4A (74) Atom 2C (22 nm) 1 MB L2 + PowerVR (TGR)
|
||||
* 5A (90) Atom 4C (22 nm) 2 MB L2 + PowerVR (ANN)
|
||||
* 37 (55) Atom 4C (22 nm) 2 MB L2 + Intel Gen7 (BYT)
|
||||
* 4C (76) Atom 4C (14 nm) 2 MB L2 + Intel Gen8 (BSW)
|
||||
* 5D (93) Atom 4C (28 nm TSMC) 1 MB L2 + Mali (SoFIA)
|
||||
* 4D (77) Atom 8C (22 nm) 4 MB L2 (AVN)
|
||||
* ?? Atom ?C (14 nm) ? MB L2 (DVN)
|
||||
*/
|
||||
return model == 28 || model == 38 || model == 54
|
||||
|| model == 39 || model == 53 || model == 74
|
||||
|| model == 90 || model == 55 || model == 76
|
||||
|| model == 93 || model == 77;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the processor has a slow MULT implementation.
|
||||
* If yes, it's better to use a hash not based on multiplication.
|
||||
*/
|
||||
static inline int raid_cpu_has_slowmult(void)
|
||||
{
|
||||
char vendor[CPU_VENDOR_MAX];
|
||||
unsigned family;
|
||||
unsigned model;
|
||||
|
||||
/*
|
||||
* In some cases Murmur3 based on MUL instruction,
|
||||
* is a LOT slower than Spooky2 based on SHIFTs.
|
||||
*/
|
||||
raid_cpu_info(vendor, &family, &model);
|
||||
|
||||
if (strcmp(vendor, "GenuineIntel") == 0) {
|
||||
/*
|
||||
* Intel Atom (Model 28)
|
||||
* murmur3:378 MB/s, spooky2:3413 MB/s (x86)
|
||||
*
|
||||
* Intel Atom (Model 77)
|
||||
* murmur3:1311 MB/s, spooky2:4056 MB/s (x64)
|
||||
*/
|
||||
if (raid_cpu_is_atom(family, model))
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the processor has a slow extended set of SSE registers.
|
||||
* If yes, it's better to limit the unroll to the firsrt 8 registers.
|
||||
*/
|
||||
static inline int raid_cpu_has_slowextendedreg(void)
|
||||
{
|
||||
char vendor[CPU_VENDOR_MAX];
|
||||
unsigned family;
|
||||
unsigned model;
|
||||
|
||||
/*
|
||||
* In some cases the PAR2 implementation using 16 SSE registers
|
||||
* is a LITTLE slower than the one using only the first 8 registers.
|
||||
* This doesn't happen for PARZ.
|
||||
*/
|
||||
raid_cpu_info(vendor, &family, &model);
|
||||
|
||||
if (strcmp(vendor, "AuthenticAMD") == 0) {
|
||||
/*
|
||||
* AMD Bulldozer
|
||||
* par2_sse2:4922 MB/s, par2_sse2e:4465 MB/s
|
||||
*/
|
||||
if (family == 21)
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (strcmp(vendor, "GenuineIntel") == 0) {
|
||||
/*
|
||||
* Intel Atom (Model 77)
|
||||
* par2_sse2:5686 MB/s, par2_sse2e:5250 MB/s
|
||||
* parz_sse2:3100 MB/s, parz_sse2e:3400 MB/s
|
||||
* par3_sse3:1921 MB/s, par3_sse3e:1813 MB/s
|
||||
* par4_sse3:1175 MB/s, par4_sse3e:1113 MB/s
|
||||
* par5_sse3:876 MB/s, par5_sse3e:675 MB/s
|
||||
* par6_sse3:705 MB/s, par6_sse3e:529 MB/s
|
||||
*
|
||||
* Intel Atom (Model 77) "Avoton C2750"
|
||||
* par2_sse2:5661 MB/s, par2_sse2e:5382 MB/s
|
||||
* parz_sse2:3110 MB/s, parz_sse2e:3450 MB/s
|
||||
* par3_sse3:1769 MB/s, par3_sse3e:1856 MB/s
|
||||
* par4_sse3:1221 MB/s, par4_sse3e:1141 MB/s
|
||||
* par5_sse3:910 MB/s, par5_sse3e:675 MB/s
|
||||
* par6_sse3:720 MB/s, par6_sse3e:534 MB/s
|
||||
*/
|
||||
if (raid_cpu_is_atom(family, model))
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
137
raid/gf.h
Normal file
137
raid/gf.h
Normal file
@ -0,0 +1,137 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Andrea Mazzoleni
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#ifndef __RAID_GF_H
|
||||
#define __RAID_GF_H
|
||||
|
||||
/*
|
||||
* Galois field operations.
|
||||
*
|
||||
* Basic range checks are implemented using BUG_ON().
|
||||
*/
|
||||
|
||||
/*
|
||||
* GF a*b.
|
||||
*/
|
||||
static __always_inline uint8_t mul(uint8_t a, uint8_t b)
|
||||
{
|
||||
return gfmul[a][b];
|
||||
}
|
||||
|
||||
/*
|
||||
* GF 1/a.
|
||||
* Not defined for a == 0.
|
||||
*/
|
||||
static __always_inline uint8_t inv(uint8_t v)
|
||||
{
|
||||
BUG_ON(v == 0); /* division by zero */
|
||||
|
||||
return gfinv[v];
|
||||
}
|
||||
|
||||
/*
|
||||
* GF 2^a.
|
||||
*/
|
||||
static __always_inline uint8_t pow2(int v)
|
||||
{
|
||||
BUG_ON(v < 0 || v > 254); /* invalid exponent */
|
||||
|
||||
return gfexp[v];
|
||||
}
|
||||
|
||||
/*
|
||||
* Gets the multiplication table for a specified value.
|
||||
*/
|
||||
static __always_inline const uint8_t *table(uint8_t v)
|
||||
{
|
||||
return gfmul[v];
|
||||
}
|
||||
|
||||
/*
|
||||
* Gets the generator matrix coefficient for parity 'p' and disk 'd'.
|
||||
*/
|
||||
static __always_inline uint8_t A(int p, int d)
|
||||
{
|
||||
return gfgen[p][d];
|
||||
}
|
||||
|
||||
/*
|
||||
* Dereference as uint8_t
|
||||
*/
|
||||
#define v_8(p) (*(uint8_t *)&(p))
|
||||
|
||||
/*
|
||||
* Dereference as uint32_t
|
||||
*/
|
||||
#define v_32(p) (*(uint32_t *)&(p))
|
||||
|
||||
/*
|
||||
* Dereference as uint64_t
|
||||
*/
|
||||
#define v_64(p) (*(uint64_t *)&(p))
|
||||
|
||||
/*
|
||||
* Multiply each byte of a uint32 by 2 in the GF(2^8).
|
||||
*/
|
||||
static __always_inline uint32_t x2_32(uint32_t v)
|
||||
{
|
||||
uint32_t mask = v & 0x80808080U;
|
||||
|
||||
mask = (mask << 1) - (mask >> 7);
|
||||
v = (v << 1) & 0xfefefefeU;
|
||||
v ^= mask & 0x1d1d1d1dU;
|
||||
return v;
|
||||
}
|
||||
|
||||
/*
|
||||
* Multiply each byte of a uint64 by 2 in the GF(2^8).
|
||||
*/
|
||||
static __always_inline uint64_t x2_64(uint64_t v)
|
||||
{
|
||||
uint64_t mask = v & 0x8080808080808080ULL;
|
||||
|
||||
mask = (mask << 1) - (mask >> 7);
|
||||
v = (v << 1) & 0xfefefefefefefefeULL;
|
||||
v ^= mask & 0x1d1d1d1d1d1d1d1dULL;
|
||||
return v;
|
||||
}
|
||||
|
||||
/*
|
||||
* Divide each byte of a uint32 by 2 in the GF(2^8).
|
||||
*/
|
||||
static __always_inline uint32_t d2_32(uint32_t v)
|
||||
{
|
||||
uint32_t mask = v & 0x01010101U;
|
||||
|
||||
mask = (mask << 8) - mask;
|
||||
v = (v >> 1) & 0x7f7f7f7fU;
|
||||
v ^= mask & 0x8e8e8e8eU;
|
||||
return v;
|
||||
}
|
||||
|
||||
/*
|
||||
* Divide each byte of a uint64 by 2 in the GF(2^8).
|
||||
*/
|
||||
static __always_inline uint64_t d2_64(uint64_t v)
|
||||
{
|
||||
uint64_t mask = v & 0x0101010101010101ULL;
|
||||
|
||||
mask = (mask << 8) - mask;
|
||||
v = (v >> 1) & 0x7f7f7f7f7f7f7f7fULL;
|
||||
v ^= mask & 0x8e8e8e8e8e8e8e8eULL;
|
||||
return v;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
94
raid/helper.c
Normal file
94
raid/helper.c
Normal file
@ -0,0 +1,94 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Andrea Mazzoleni
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
#define RAID_SWAP(a, b) \
|
||||
do { \
|
||||
if (v[a] > v[b]) { \
|
||||
int t = v[a]; \
|
||||
v[a] = v[b]; \
|
||||
v[b] = t; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
void raid_sort(int n, int *v)
|
||||
{
|
||||
/* sorting networks generated with Batcher's Merge-Exchange */
|
||||
switch (n) {
|
||||
case 2:
|
||||
RAID_SWAP(0, 1);
|
||||
break;
|
||||
case 3:
|
||||
RAID_SWAP(0, 2);
|
||||
RAID_SWAP(0, 1);
|
||||
RAID_SWAP(1, 2);
|
||||
break;
|
||||
case 4:
|
||||
RAID_SWAP(0, 2);
|
||||
RAID_SWAP(1, 3);
|
||||
RAID_SWAP(0, 1);
|
||||
RAID_SWAP(2, 3);
|
||||
RAID_SWAP(1, 2);
|
||||
break;
|
||||
case 5:
|
||||
RAID_SWAP(0, 4);
|
||||
RAID_SWAP(0, 2);
|
||||
RAID_SWAP(1, 3);
|
||||
RAID_SWAP(2, 4);
|
||||
RAID_SWAP(0, 1);
|
||||
RAID_SWAP(2, 3);
|
||||
RAID_SWAP(1, 4);
|
||||
RAID_SWAP(1, 2);
|
||||
RAID_SWAP(3, 4);
|
||||
break;
|
||||
case 6:
|
||||
RAID_SWAP(0, 4);
|
||||
RAID_SWAP(1, 5);
|
||||
RAID_SWAP(0, 2);
|
||||
RAID_SWAP(1, 3);
|
||||
RAID_SWAP(2, 4);
|
||||
RAID_SWAP(3, 5);
|
||||
RAID_SWAP(0, 1);
|
||||
RAID_SWAP(2, 3);
|
||||
RAID_SWAP(4, 5);
|
||||
RAID_SWAP(1, 4);
|
||||
RAID_SWAP(1, 2);
|
||||
RAID_SWAP(3, 4);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void raid_insert(int n, int *v, int i)
|
||||
{
|
||||
/* we don't use binary search because this is intended */
|
||||
/* for very small vectors and we want to optimize the case */
|
||||
/* of elements inserted already in order */
|
||||
|
||||
/* insert at the end */
|
||||
v[n] = i;
|
||||
|
||||
/* swap until in the correct position */
|
||||
while (n > 0 && v[n - 1] > v[n]) {
|
||||
/* swap */
|
||||
int t = v[n - 1];
|
||||
|
||||
v[n - 1] = v[n];
|
||||
v[n] = t;
|
||||
|
||||
/* previous position */
|
||||
--n;
|
||||
}
|
||||
}
|
||||
|
43
raid/helper.h
Normal file
43
raid/helper.h
Normal file
@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Andrea Mazzoleni
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#ifndef __RAID_HELPER_H
|
||||
#define __RAID_HELPER_H
|
||||
|
||||
/**
|
||||
* Inserts an integer in a sorted vector.
|
||||
*
|
||||
* This function can be used to insert indexes in order, ready to be used for
|
||||
* calling raid_rec().
|
||||
*
|
||||
* @n Number of integers currently in the vector.
|
||||
* @v Vector of integers already sorted.
|
||||
* It must have extra space for the new elemet at the end.
|
||||
* @i Value to insert.
|
||||
*/
|
||||
void raid_insert(int n, int *v, int i);
|
||||
|
||||
/**
|
||||
* Sorts a small vector of integers.
|
||||
*
|
||||
* If you have indexes not in order, you can use this function to sort them
|
||||
* before calling raid_rec().
|
||||
*
|
||||
* @n Number of integers. No more than RAID_PARITY_MAX.
|
||||
* @v Vector of integers.
|
||||
*/
|
||||
void raid_sort(int n, int *v);
|
||||
|
||||
#endif
|
||||
|
556
raid/int.c
Normal file
556
raid/int.c
Normal file
@ -0,0 +1,556 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Andrea Mazzoleni
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include "internal.h"
|
||||
#include "gf.h"
|
||||
|
||||
/*
|
||||
* GEN1 (RAID5 with xor) 32bit C implementation
|
||||
*/
|
||||
void raid_gen1_int32(int nd, size_t size, void **vv)
|
||||
{
|
||||
uint8_t **v = (uint8_t **)vv;
|
||||
uint8_t *p;
|
||||
int d, l;
|
||||
size_t i;
|
||||
|
||||
uint32_t p0;
|
||||
uint32_t p1;
|
||||
|
||||
l = nd - 1;
|
||||
p = v[nd];
|
||||
|
||||
for (i = 0; i < size; i += 8) {
|
||||
p0 = v_32(v[l][i]);
|
||||
p1 = v_32(v[l][i + 4]);
|
||||
for (d = l - 1; d >= 0; --d) {
|
||||
p0 ^= v_32(v[d][i]);
|
||||
p1 ^= v_32(v[d][i + 4]);
|
||||
}
|
||||
v_32(p[i]) = p0;
|
||||
v_32(p[i + 4]) = p1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* GEN1 (RAID5 with xor) 64bit C implementation
|
||||
*/
|
||||
void raid_gen1_int64(int nd, size_t size, void **vv)
|
||||
{
|
||||
uint8_t **v = (uint8_t **)vv;
|
||||
uint8_t *p;
|
||||
int d, l;
|
||||
size_t i;
|
||||
|
||||
uint64_t p0;
|
||||
uint64_t p1;
|
||||
|
||||
l = nd - 1;
|
||||
p = v[nd];
|
||||
|
||||
for (i = 0; i < size; i += 16) {
|
||||
p0 = v_64(v[l][i]);
|
||||
p1 = v_64(v[l][i + 8]);
|
||||
for (d = l - 1; d >= 0; --d) {
|
||||
p0 ^= v_64(v[d][i]);
|
||||
p1 ^= v_64(v[d][i + 8]);
|
||||
}
|
||||
v_64(p[i]) = p0;
|
||||
v_64(p[i + 8]) = p1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* GEN2 (RAID6 with powers of 2) 32bit C implementation
|
||||
*/
|
||||
void raid_gen2_int32(int nd, size_t size, void **vv)
|
||||
{
|
||||
uint8_t **v = (uint8_t **)vv;
|
||||
uint8_t *p;
|
||||
uint8_t *q;
|
||||
int d, l;
|
||||
size_t i;
|
||||
|
||||
uint32_t d0, q0, p0;
|
||||
uint32_t d1, q1, p1;
|
||||
|
||||
l = nd - 1;
|
||||
p = v[nd];
|
||||
q = v[nd + 1];
|
||||
|
||||
for (i = 0; i < size; i += 8) {
|
||||
q0 = p0 = v_32(v[l][i]);
|
||||
q1 = p1 = v_32(v[l][i + 4]);
|
||||
for (d = l - 1; d >= 0; --d) {
|
||||
d0 = v_32(v[d][i]);
|
||||
d1 = v_32(v[d][i + 4]);
|
||||
|
||||
p0 ^= d0;
|
||||
p1 ^= d1;
|
||||
|
||||
q0 = x2_32(q0);
|
||||
q1 = x2_32(q1);
|
||||
|
||||
q0 ^= d0;
|
||||
q1 ^= d1;
|
||||
}
|
||||
v_32(p[i]) = p0;
|
||||
v_32(p[i + 4]) = p1;
|
||||
v_32(q[i]) = q0;
|
||||
v_32(q[i + 4]) = q1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* GEN2 (RAID6 with powers of 2) 64bit C implementation
|
||||
*/
|
||||
void raid_gen2_int64(int nd, size_t size, void **vv)
|
||||
{
|
||||
uint8_t **v = (uint8_t **)vv;
|
||||
uint8_t *p;
|
||||
uint8_t *q;
|
||||
int d, l;
|
||||
size_t i;
|
||||
|
||||
uint64_t d0, q0, p0;
|
||||
uint64_t d1, q1, p1;
|
||||
|
||||
l = nd - 1;
|
||||
p = v[nd];
|
||||
q = v[nd + 1];
|
||||
|
||||
for (i = 0; i < size; i += 16) {
|
||||
q0 = p0 = v_64(v[l][i]);
|
||||
q1 = p1 = v_64(v[l][i + 8]);
|
||||
for (d = l - 1; d >= 0; --d) {
|
||||
d0 = v_64(v[d][i]);
|
||||
d1 = v_64(v[d][i + 8]);
|
||||
|
||||
p0 ^= d0;
|
||||
p1 ^= d1;
|
||||
|
||||
q0 = x2_64(q0);
|
||||
q1 = x2_64(q1);
|
||||
|
||||
q0 ^= d0;
|
||||
q1 ^= d1;
|
||||
}
|
||||
v_64(p[i]) = p0;
|
||||
v_64(p[i + 8]) = p1;
|
||||
v_64(q[i]) = q0;
|
||||
v_64(q[i + 8]) = q1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* GEN3 (triple parity with Cauchy matrix) 8bit C implementation
|
||||
*
|
||||
* Note that instead of a generic multiplication table, likely resulting
|
||||
* in multiple cache misses, a precomputed table could be used.
|
||||
* But this is only a kind of reference function, and we are not really
|
||||
* interested in speed.
|
||||
*/
|
||||
void raid_gen3_int8(int nd, size_t size, void **vv)
|
||||
{
|
||||
uint8_t **v = (uint8_t **)vv;
|
||||
uint8_t *p;
|
||||
uint8_t *q;
|
||||
uint8_t *r;
|
||||
int d, l;
|
||||
size_t i;
|
||||
|
||||
uint8_t d0, r0, q0, p0;
|
||||
|
||||
l = nd - 1;
|
||||
p = v[nd];
|
||||
q = v[nd + 1];
|
||||
r = v[nd + 2];
|
||||
|
||||
for (i = 0; i < size; i += 1) {
|
||||
p0 = q0 = r0 = 0;
|
||||
for (d = l; d > 0; --d) {
|
||||
d0 = v_8(v[d][i]);
|
||||
|
||||
p0 ^= d0;
|
||||
q0 ^= gfmul[d0][gfgen[1][d]];
|
||||
r0 ^= gfmul[d0][gfgen[2][d]];
|
||||
}
|
||||
|
||||
/* first disk with all coefficients at 1 */
|
||||
d0 = v_8(v[0][i]);
|
||||
|
||||
p0 ^= d0;
|
||||
q0 ^= d0;
|
||||
r0 ^= d0;
|
||||
|
||||
v_8(p[i]) = p0;
|
||||
v_8(q[i]) = q0;
|
||||
v_8(r[i]) = r0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* GEN4 (quad parity with Cauchy matrix) 8bit C implementation
|
||||
*
|
||||
* Note that instead of a generic multiplication table, likely resulting
|
||||
* in multiple cache misses, a precomputed table could be used.
|
||||
* But this is only a kind of reference function, and we are not really
|
||||
* interested in speed.
|
||||
*/
|
||||
void raid_gen4_int8(int nd, size_t size, void **vv)
|
||||
{
|
||||
uint8_t **v = (uint8_t **)vv;
|
||||
uint8_t *p;
|
||||
uint8_t *q;
|
||||
uint8_t *r;
|
||||
uint8_t *s;
|
||||
int d, l;
|
||||
size_t i;
|
||||
|
||||
uint8_t d0, s0, r0, q0, p0;
|
||||
|
||||
l = nd - 1;
|
||||
p = v[nd];
|
||||
q = v[nd + 1];
|
||||
r = v[nd + 2];
|
||||
s = v[nd + 3];
|
||||
|
||||
for (i = 0; i < size; i += 1) {
|
||||
p0 = q0 = r0 = s0 = 0;
|
||||
for (d = l; d > 0; --d) {
|
||||
d0 = v_8(v[d][i]);
|
||||
|
||||
p0 ^= d0;
|
||||
q0 ^= gfmul[d0][gfgen[1][d]];
|
||||
r0 ^= gfmul[d0][gfgen[2][d]];
|
||||
s0 ^= gfmul[d0][gfgen[3][d]];
|
||||
}
|
||||
|
||||
/* first disk with all coefficients at 1 */
|
||||
d0 = v_8(v[0][i]);
|
||||
|
||||
p0 ^= d0;
|
||||
q0 ^= d0;
|
||||
r0 ^= d0;
|
||||
s0 ^= d0;
|
||||
|
||||
v_8(p[i]) = p0;
|
||||
v_8(q[i]) = q0;
|
||||
v_8(r[i]) = r0;
|
||||
v_8(s[i]) = s0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* GEN5 (penta parity with Cauchy matrix) 8bit C implementation
|
||||
*
|
||||
* Note that instead of a generic multiplication table, likely resulting
|
||||
* in multiple cache misses, a precomputed table could be used.
|
||||
* But this is only a kind of reference function, and we are not really
|
||||
* interested in speed.
|
||||
*/
|
||||
void raid_gen5_int8(int nd, size_t size, void **vv)
|
||||
{
|
||||
uint8_t **v = (uint8_t **)vv;
|
||||
uint8_t *p;
|
||||
uint8_t *q;
|
||||
uint8_t *r;
|
||||
uint8_t *s;
|
||||
uint8_t *t;
|
||||
int d, l;
|
||||
size_t i;
|
||||
|
||||
uint8_t d0, t0, s0, r0, q0, p0;
|
||||
|
||||
l = nd - 1;
|
||||
p = v[nd];
|
||||
q = v[nd + 1];
|
||||
r = v[nd + 2];
|
||||
s = v[nd + 3];
|
||||
t = v[nd + 4];
|
||||
|
||||
for (i = 0; i < size; i += 1) {
|
||||
p0 = q0 = r0 = s0 = t0 = 0;
|
||||
for (d = l; d > 0; --d) {
|
||||
d0 = v_8(v[d][i]);
|
||||
|
||||
p0 ^= d0;
|
||||
q0 ^= gfmul[d0][gfgen[1][d]];
|
||||
r0 ^= gfmul[d0][gfgen[2][d]];
|
||||
s0 ^= gfmul[d0][gfgen[3][d]];
|
||||
t0 ^= gfmul[d0][gfgen[4][d]];
|
||||
}
|
||||
|
||||
/* first disk with all coefficients at 1 */
|
||||
d0 = v_8(v[0][i]);
|
||||
|
||||
p0 ^= d0;
|
||||
q0 ^= d0;
|
||||
r0 ^= d0;
|
||||
s0 ^= d0;
|
||||
t0 ^= d0;
|
||||
|
||||
v_8(p[i]) = p0;
|
||||
v_8(q[i]) = q0;
|
||||
v_8(r[i]) = r0;
|
||||
v_8(s[i]) = s0;
|
||||
v_8(t[i]) = t0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* GEN6 (hexa parity with Cauchy matrix) 8bit C implementation
|
||||
*
|
||||
* Note that instead of a generic multiplication table, likely resulting
|
||||
* in multiple cache misses, a precomputed table could be used.
|
||||
* But this is only a kind of reference function, and we are not really
|
||||
* interested in speed.
|
||||
*/
|
||||
void raid_gen6_int8(int nd, size_t size, void **vv)
|
||||
{
|
||||
uint8_t **v = (uint8_t **)vv;
|
||||
uint8_t *p;
|
||||
uint8_t *q;
|
||||
uint8_t *r;
|
||||
uint8_t *s;
|
||||
uint8_t *t;
|
||||
uint8_t *u;
|
||||
int d, l;
|
||||
size_t i;
|
||||
|
||||
uint8_t d0, u0, t0, s0, r0, q0, p0;
|
||||
|
||||
l = nd - 1;
|
||||
p = v[nd];
|
||||
q = v[nd + 1];
|
||||
r = v[nd + 2];
|
||||
s = v[nd + 3];
|
||||
t = v[nd + 4];
|
||||
u = v[nd + 5];
|
||||
|
||||
for (i = 0; i < size; i += 1) {
|
||||
p0 = q0 = r0 = s0 = t0 = u0 = 0;
|
||||
for (d = l; d > 0; --d) {
|
||||
d0 = v_8(v[d][i]);
|
||||
|
||||
p0 ^= d0;
|
||||
q0 ^= gfmul[d0][gfgen[1][d]];
|
||||
r0 ^= gfmul[d0][gfgen[2][d]];
|
||||
s0 ^= gfmul[d0][gfgen[3][d]];
|
||||
t0 ^= gfmul[d0][gfgen[4][d]];
|
||||
u0 ^= gfmul[d0][gfgen[5][d]];
|
||||
}
|
||||
|
||||
/* first disk with all coefficients at 1 */
|
||||
d0 = v_8(v[0][i]);
|
||||
|
||||
p0 ^= d0;
|
||||
q0 ^= d0;
|
||||
r0 ^= d0;
|
||||
s0 ^= d0;
|
||||
t0 ^= d0;
|
||||
u0 ^= d0;
|
||||
|
||||
v_8(p[i]) = p0;
|
||||
v_8(q[i]) = q0;
|
||||
v_8(r[i]) = r0;
|
||||
v_8(s[i]) = s0;
|
||||
v_8(t[i]) = t0;
|
||||
v_8(u[i]) = u0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Recover failure of one data block at index id[0] using parity at index
|
||||
* ip[0] for any RAID level.
|
||||
*
|
||||
* Starting from the equation:
|
||||
*
|
||||
* Pd = A[ip[0],id[0]] * Dx
|
||||
*
|
||||
* and solving we get:
|
||||
*
|
||||
* Dx = A[ip[0],id[0]]^-1 * Pd
|
||||
*/
|
||||
void raid_rec1_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv)
|
||||
{
|
||||
uint8_t **v = (uint8_t **)vv;
|
||||
uint8_t *p;
|
||||
uint8_t *pa;
|
||||
const uint8_t *T;
|
||||
uint8_t G;
|
||||
uint8_t V;
|
||||
size_t i;
|
||||
|
||||
(void)nr; /* unused, it's always 1 */
|
||||
|
||||
/* if it's RAID5 uses the faster function */
|
||||
if (ip[0] == 0) {
|
||||
raid_rec1of1(id, nd, size, vv);
|
||||
return;
|
||||
}
|
||||
|
||||
/* setup the coefficients matrix */
|
||||
G = A(ip[0], id[0]);
|
||||
|
||||
/* invert it to solve the system of linear equations */
|
||||
V = inv(G);
|
||||
|
||||
/* get multiplication tables */
|
||||
T = table(V);
|
||||
|
||||
/* compute delta parity */
|
||||
raid_delta_gen(1, id, ip, nd, size, vv);
|
||||
|
||||
p = v[nd + ip[0]];
|
||||
pa = v[id[0]];
|
||||
|
||||
for (i = 0; i < size; ++i) {
|
||||
/* delta */
|
||||
uint8_t Pd = p[i] ^ pa[i];
|
||||
|
||||
/* reconstruct */
|
||||
pa[i] = T[Pd];
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Recover failure of two data blocks at indexes id[0],id[1] using parity at
|
||||
* indexes ip[0],ip[1] for any RAID level.
|
||||
*
|
||||
* Starting from the equations:
|
||||
*
|
||||
* Pd = A[ip[0],id[0]] * Dx + A[ip[0],id[1]] * Dy
|
||||
* Qd = A[ip[1],id[0]] * Dx + A[ip[1],id[1]] * Dy
|
||||
*
|
||||
* we solve inverting the coefficients matrix.
|
||||
*/
|
||||
void raid_rec2_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv)
|
||||
{
|
||||
uint8_t **v = (uint8_t **)vv;
|
||||
uint8_t *p;
|
||||
uint8_t *pa;
|
||||
uint8_t *q;
|
||||
uint8_t *qa;
|
||||
const int N = 2;
|
||||
const uint8_t *T[N][N];
|
||||
uint8_t G[N * N];
|
||||
uint8_t V[N * N];
|
||||
size_t i;
|
||||
int j, k;
|
||||
|
||||
(void)nr; /* unused, it's always 2 */
|
||||
|
||||
/* if it's RAID6 recovering with P and Q uses the faster function */
|
||||
if (ip[0] == 0 && ip[1] == 1) {
|
||||
raid_rec2of2_int8(id, ip, nd, size, vv);
|
||||
return;
|
||||
}
|
||||
|
||||
/* setup the coefficients matrix */
|
||||
for (j = 0; j < N; ++j)
|
||||
for (k = 0; k < N; ++k)
|
||||
G[j * N + k] = A(ip[j], id[k]);
|
||||
|
||||
/* invert it to solve the system of linear equations */
|
||||
raid_invert(G, V, N);
|
||||
|
||||
/* get multiplication tables */
|
||||
for (j = 0; j < N; ++j)
|
||||
for (k = 0; k < N; ++k)
|
||||
T[j][k] = table(V[j * N + k]);
|
||||
|
||||
/* compute delta parity */
|
||||
raid_delta_gen(2, id, ip, nd, size, vv);
|
||||
|
||||
p = v[nd + ip[0]];
|
||||
q = v[nd + ip[1]];
|
||||
pa = v[id[0]];
|
||||
qa = v[id[1]];
|
||||
|
||||
for (i = 0; i < size; ++i) {
|
||||
/* delta */
|
||||
uint8_t Pd = p[i] ^ pa[i];
|
||||
uint8_t Qd = q[i] ^ qa[i];
|
||||
|
||||
/* reconstruct */
|
||||
pa[i] = T[0][0][Pd] ^ T[0][1][Qd];
|
||||
qa[i] = T[1][0][Pd] ^ T[1][1][Qd];
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Recover failure of N data blocks at indexes id[N] using parity at indexes
|
||||
* ip[N] for any RAID level.
|
||||
*
|
||||
* Starting from the N equations, with 0<=i<N :
|
||||
*
|
||||
* PD[i] = sum(A[ip[i],id[j]] * D[i]) 0<=j<N
|
||||
*
|
||||
* we solve inverting the coefficients matrix.
|
||||
*
|
||||
* Note that referring at previous equations you have:
|
||||
* PD[0] = Pd, PD[1] = Qd, PD[2] = Rd, ...
|
||||
* D[0] = Dx, D[1] = Dy, D[2] = Dz, ...
|
||||
*/
|
||||
void raid_recX_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv)
|
||||
{
|
||||
uint8_t **v = (uint8_t **)vv;
|
||||
uint8_t *p[RAID_PARITY_MAX];
|
||||
uint8_t *pa[RAID_PARITY_MAX];
|
||||
const uint8_t *T[RAID_PARITY_MAX][RAID_PARITY_MAX];
|
||||
uint8_t G[RAID_PARITY_MAX * RAID_PARITY_MAX];
|
||||
uint8_t V[RAID_PARITY_MAX * RAID_PARITY_MAX];
|
||||
size_t i;
|
||||
int j, k;
|
||||
|
||||
/* setup the coefficients matrix */
|
||||
for (j = 0; j < nr; ++j)
|
||||
for (k = 0; k < nr; ++k)
|
||||
G[j * nr + k] = A(ip[j], id[k]);
|
||||
|
||||
/* invert it to solve the system of linear equations */
|
||||
raid_invert(G, V, nr);
|
||||
|
||||
/* get multiplication tables */
|
||||
for (j = 0; j < nr; ++j)
|
||||
for (k = 0; k < nr; ++k)
|
||||
T[j][k] = table(V[j * nr + k]);
|
||||
|
||||
/* compute delta parity */
|
||||
raid_delta_gen(nr, id, ip, nd, size, vv);
|
||||
|
||||
for (j = 0; j < nr; ++j) {
|
||||
p[j] = v[nd + ip[j]];
|
||||
pa[j] = v[id[j]];
|
||||
}
|
||||
|
||||
for (i = 0; i < size; ++i) {
|
||||
uint8_t PD[RAID_PARITY_MAX];
|
||||
|
||||
/* delta */
|
||||
for (j = 0; j < nr; ++j)
|
||||
PD[j] = p[j][i] ^ pa[j][i];
|
||||
|
||||
/* reconstruct */
|
||||
for (j = 0; j < nr; ++j) {
|
||||
uint8_t b = 0;
|
||||
|
||||
for (k = 0; k < nr; ++k)
|
||||
b ^= T[j][k][PD[k]];
|
||||
pa[j][i] = b;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
274
raid/internal.h
Normal file
274
raid/internal.h
Normal file
@ -0,0 +1,274 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Andrea Mazzoleni
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#ifndef __RAID_INTERNAL_H
|
||||
#define __RAID_INTERNAL_H
|
||||
|
||||
/*
|
||||
* Supported instruction sets.
|
||||
*
|
||||
* It may happen that the assembler is too old to support
|
||||
* all instructions, even if the architecture supports them.
|
||||
* These defines allow to exclude from the build the not supported ones.
|
||||
*
|
||||
* If in your project you use a predefined assembler, you can define them
|
||||
* using fixed values, instead of using the HAVE_* defines.
|
||||
*/
|
||||
#if HAVE_CONFIG_H
|
||||
|
||||
/* Includes the project configuration for HAVE_* defines */
|
||||
#include "config.h"
|
||||
|
||||
/* If the compiler supports assembly */
|
||||
#if HAVE_ASSEMBLY
|
||||
/* Autodetect from the compiler */
|
||||
#if defined(__i386__)
|
||||
#define CONFIG_X86 1
|
||||
#define CONFIG_X86_32 1
|
||||
#endif
|
||||
#if defined(__x86_64__)
|
||||
#define CONFIG_X86 1
|
||||
#define CONFIG_X86_64 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Enables SSE2, SSSE3, AVX2 only if the assembler supports it */
|
||||
#if HAVE_SSE2
|
||||
#define CONFIG_SSE2 1
|
||||
#endif
|
||||
#if HAVE_SSSE3
|
||||
#define CONFIG_SSSE3 1
|
||||
#endif
|
||||
#if HAVE_AVX2
|
||||
#define CONFIG_AVX2 1
|
||||
#endif
|
||||
|
||||
#else /* if HAVE_CONFIG_H is not defined */
|
||||
|
||||
/* Assume that assembly is always supported */
|
||||
#if defined(__i386__)
|
||||
#define CONFIG_X86 1
|
||||
#define CONFIG_X86_32 1
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#define CONFIG_X86 1
|
||||
#define CONFIG_X86_64 1
|
||||
#endif
|
||||
|
||||
/* Assumes that the assembler supports everything */
|
||||
#ifdef CONFIG_X86
|
||||
#define CONFIG_SSE2 1
|
||||
#define CONFIG_SSSE3 1
|
||||
#define CONFIG_AVX2 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Includes anything required for compatibility.
|
||||
*/
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
* Inverse assert.
|
||||
*/
|
||||
#define BUG_ON(a) assert(!(a))
|
||||
|
||||
/*
|
||||
* Forced inline.
|
||||
*/
|
||||
#ifndef __always_inline
|
||||
#define __always_inline inline __attribute__((always_inline))
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Forced alignment.
|
||||
*/
|
||||
#ifndef __aligned
|
||||
#define __aligned(a) __attribute__((aligned(a)))
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Align a pointer at the specified size.
|
||||
*/
|
||||
static __always_inline void *__align_ptr(void *ptr, uintptr_t size)
|
||||
{
|
||||
uintptr_t offset = (uintptr_t)ptr;
|
||||
|
||||
offset = (offset + size - 1U) & ~(size - 1U);
|
||||
|
||||
return (void *)offset;
|
||||
}
|
||||
|
||||
/*
|
||||
* Includes the main interface headers.
|
||||
*/
|
||||
#include "raid.h"
|
||||
#include "helper.h"
|
||||
|
||||
/*
|
||||
* Internal functions.
|
||||
*
|
||||
* These are intended to provide access for testing.
|
||||
*/
|
||||
int raid_selftest(void);
|
||||
void raid_gen_ref(int nd, int np, size_t size, void **vv);
|
||||
void raid_invert(uint8_t *M, uint8_t *V, int n);
|
||||
void raid_delta_gen(int nr, int *id, int *ip, int nd, size_t size, void **v);
|
||||
void raid_rec1of1(int *id, int nd, size_t size, void **v);
|
||||
void raid_rec2of2_int8(int *id, int *ip, int nd, size_t size, void **vv);
|
||||
void raid_gen1_int32(int nd, size_t size, void **vv);
|
||||
void raid_gen1_int64(int nd, size_t size, void **vv);
|
||||
void raid_gen1_sse2(int nd, size_t size, void **vv);
|
||||
void raid_gen1_avx2(int nd, size_t size, void **vv);
|
||||
void raid_gen2_int32(int nd, size_t size, void **vv);
|
||||
void raid_gen2_int64(int nd, size_t size, void **vv);
|
||||
void raid_gen2_sse2(int nd, size_t size, void **vv);
|
||||
void raid_gen2_avx2(int nd, size_t size, void **vv);
|
||||
void raid_gen2_sse2ext(int nd, size_t size, void **vv);
|
||||
void raid_genz_int32(int nd, size_t size, void **vv);
|
||||
void raid_genz_int64(int nd, size_t size, void **vv);
|
||||
void raid_genz_sse2(int nd, size_t size, void **vv);
|
||||
void raid_genz_sse2ext(int nd, size_t size, void **vv);
|
||||
void raid_genz_avx2ext(int nd, size_t size, void **vv);
|
||||
void raid_gen3_int8(int nd, size_t size, void **vv);
|
||||
void raid_gen3_ssse3(int nd, size_t size, void **vv);
|
||||
void raid_gen3_ssse3ext(int nd, size_t size, void **vv);
|
||||
void raid_gen3_avx2ext(int nd, size_t size, void **vv);
|
||||
void raid_gen4_int8(int nd, size_t size, void **vv);
|
||||
void raid_gen4_ssse3(int nd, size_t size, void **vv);
|
||||
void raid_gen4_ssse3ext(int nd, size_t size, void **vv);
|
||||
void raid_gen4_avx2ext(int nd, size_t size, void **vv);
|
||||
void raid_gen5_int8(int nd, size_t size, void **vv);
|
||||
void raid_gen5_ssse3(int nd, size_t size, void **vv);
|
||||
void raid_gen5_ssse3ext(int nd, size_t size, void **vv);
|
||||
void raid_gen5_avx2ext(int nd, size_t size, void **vv);
|
||||
void raid_gen6_int8(int nd, size_t size, void **vv);
|
||||
void raid_gen6_ssse3(int nd, size_t size, void **vv);
|
||||
void raid_gen6_ssse3ext(int nd, size_t size, void **vv);
|
||||
void raid_gen6_avx2ext(int nd, size_t size, void **vv);
|
||||
void raid_rec1_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv);
|
||||
void raid_rec2_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv);
|
||||
void raid_recX_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv);
|
||||
void raid_rec1_ssse3(int nr, int *id, int *ip, int nd, size_t size, void **vv);
|
||||
void raid_rec2_ssse3(int nr, int *id, int *ip, int nd, size_t size, void **vv);
|
||||
void raid_recX_ssse3(int nr, int *id, int *ip, int nd, size_t size, void **vv);
|
||||
void raid_rec1_avx2(int nr, int *id, int *ip, int nd, size_t size, void **vv);
|
||||
void raid_rec2_avx2(int nr, int *id, int *ip, int nd, size_t size, void **vv);
|
||||
void raid_recX_avx2(int nr, int *id, int *ip, int nd, size_t size, void **vv);
|
||||
|
||||
/*
|
||||
* Internal naming.
|
||||
*
|
||||
* These are intented to provide access for testing.
|
||||
*/
|
||||
const char *raid_gen1_tag(void);
|
||||
const char *raid_gen2_tag(void);
|
||||
const char *raid_genz_tag(void);
|
||||
const char *raid_gen3_tag(void);
|
||||
const char *raid_gen4_tag(void);
|
||||
const char *raid_gen5_tag(void);
|
||||
const char *raid_gen6_tag(void);
|
||||
const char *raid_rec1_tag(void);
|
||||
const char *raid_rec2_tag(void);
|
||||
const char *raid_recX_tag(void);
|
||||
|
||||
/*
|
||||
* Internal forwarders.
|
||||
*/
|
||||
extern void (*raid_gen3_ptr)(int nd, size_t size, void **vv);
|
||||
extern void (*raid_genz_ptr)(int nd, size_t size, void **vv);
|
||||
extern void (*raid_gen_ptr[RAID_PARITY_MAX])(
|
||||
int nd, size_t size, void **vv);
|
||||
extern void (*raid_rec_ptr[RAID_PARITY_MAX])(
|
||||
int nr, int *id, int *ip, int nd, size_t size, void **vv);
|
||||
|
||||
/*
|
||||
* Tables.
|
||||
*/
|
||||
extern const uint8_t raid_gfmul[256][256] __aligned(256);
|
||||
extern const uint8_t raid_gfexp[256] __aligned(256);
|
||||
extern const uint8_t raid_gfinv[256] __aligned(256);
|
||||
extern const uint8_t raid_gfvandermonde[3][256] __aligned(256);
|
||||
extern const uint8_t raid_gfcauchy[6][256] __aligned(256);
|
||||
extern const uint8_t raid_gfcauchypshufb[251][4][2][16] __aligned(256);
|
||||
extern const uint8_t raid_gfmulpshufb[256][2][16] __aligned(256);
|
||||
extern const uint8_t (*raid_gfgen)[256];
|
||||
#define gfmul raid_gfmul
|
||||
#define gfexp raid_gfexp
|
||||
#define gfinv raid_gfinv
|
||||
#define gfvandermonde raid_gfvandermonde
|
||||
#define gfcauchy raid_gfcauchy
|
||||
#define gfgenpshufb raid_gfcauchypshufb
|
||||
#define gfmulpshufb raid_gfmulpshufb
|
||||
#define gfgen raid_gfgen
|
||||
|
||||
/*
|
||||
* Assembler blocks.
|
||||
*/
|
||||
#ifdef CONFIG_X86
|
||||
#ifdef CONFIG_SSE2
|
||||
static __always_inline void raid_sse_begin(void)
|
||||
{
|
||||
}
|
||||
|
||||
static __always_inline void raid_sse_end(void)
|
||||
{
|
||||
/* SSE and AVX code uses non-temporal writes, like MOVNTDQ, */
|
||||
/* that use a weak memory model. To ensure that other processors */
|
||||
/* see correctly the data written, we use a store-store memory */
|
||||
/* barrier at the end of the asm code */
|
||||
asm volatile ("sfence" : : : "memory");
|
||||
|
||||
/* clobbers registers used in the asm code */
|
||||
/* this is required because in the Windows ABI, */
|
||||
/* registers xmm6-xmm15 should be kept by the callee. */
|
||||
/* this clobber list force the compiler to save any */
|
||||
/* register that needs to be saved */
|
||||
/* we check for __SSE2_ because we require that the */
|
||||
/* compiler supports SSE2 registers in the clobber list */
|
||||
#ifdef __SSE2__
|
||||
asm volatile ("" : : : "%xmm0", "%xmm1", "%xmm2", "%xmm3");
|
||||
asm volatile ("" : : : "%xmm4", "%xmm5", "%xmm6", "%xmm7");
|
||||
#ifdef CONFIG_X86_64
|
||||
asm volatile ("" : : : "%xmm8", "%xmm9", "%xmm10", "%xmm11");
|
||||
asm volatile ("" : : : "%xmm12", "%xmm13", "%xmm14", "%xmm15");
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_AVX2
|
||||
static __always_inline void raid_avx_begin(void)
|
||||
{
|
||||
raid_sse_begin();
|
||||
}
|
||||
|
||||
static __always_inline void raid_avx_end(void)
|
||||
{
|
||||
raid_sse_end();
|
||||
|
||||
/* reset the upper part of the ymm registers */
|
||||
/* to avoid the 70 clocks penality on the next */
|
||||
/* xmm register use */
|
||||
asm volatile ("vzeroupper" : : : "memory");
|
||||
}
|
||||
#endif
|
||||
#endif /* CONFIG_X86 */
|
||||
|
||||
#endif
|
||||
|
119
raid/intz.c
Normal file
119
raid/intz.c
Normal file
@ -0,0 +1,119 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Andrea Mazzoleni
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include "internal.h"
|
||||
#include "gf.h"
|
||||
|
||||
/*
|
||||
* GENz (triple parity with powers of 2^-1) 32bit C implementation
|
||||
*/
|
||||
void raid_genz_int32(int nd, size_t size, void **vv)
|
||||
{
|
||||
uint8_t **v = (uint8_t**)vv;
|
||||
uint8_t *p;
|
||||
uint8_t *q;
|
||||
uint8_t *r;
|
||||
int d, l;
|
||||
size_t i;
|
||||
|
||||
uint32_t d0, r0, q0, p0;
|
||||
uint32_t d1, r1, q1, p1;
|
||||
|
||||
l = nd - 1;
|
||||
p = v[nd];
|
||||
q = v[nd + 1];
|
||||
r = v[nd + 2];
|
||||
|
||||
for (i = 0; i < size; i += 8) {
|
||||
r0 = q0 = p0 = v_32(v[l][i]);
|
||||
r1 = q1 = p1 = v_32(v[l][i + 4]);
|
||||
for (d = l - 1; d >= 0; --d) {
|
||||
d0 = v_32(v[d][i]);
|
||||
d1 = v_32(v[d][i + 4]);
|
||||
|
||||
p0 ^= d0;
|
||||
p1 ^= d1;
|
||||
|
||||
q0 = x2_32(q0);
|
||||
q1 = x2_32(q1);
|
||||
|
||||
q0 ^= d0;
|
||||
q1 ^= d1;
|
||||
|
||||
r0 = d2_32(r0);
|
||||
r1 = d2_32(r1);
|
||||
|
||||
r0 ^= d0;
|
||||
r1 ^= d1;
|
||||
}
|
||||
v_32(p[i]) = p0;
|
||||
v_32(p[i + 4]) = p1;
|
||||
v_32(q[i]) = q0;
|
||||
v_32(q[i + 4]) = q1;
|
||||
v_32(r[i]) = r0;
|
||||
v_32(r[i + 4]) = r1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* GENz (triple parity with powers of 2^-1) 64bit C implementation
|
||||
*/
|
||||
void raid_genz_int64(int nd, size_t size, void **vv)
|
||||
{
|
||||
uint8_t **v = (uint8_t**)vv;
|
||||
uint8_t *p;
|
||||
uint8_t *q;
|
||||
uint8_t *r;
|
||||
int d, l;
|
||||
size_t i;
|
||||
|
||||
uint64_t d0, r0, q0, p0;
|
||||
uint64_t d1, r1, q1, p1;
|
||||
|
||||
l = nd - 1;
|
||||
p = v[nd];
|
||||
q = v[nd + 1];
|
||||
r = v[nd + 2];
|
||||
|
||||
for (i = 0; i < size; i += 16) {
|
||||
r0 = q0 = p0 = v_64(v[l][i]);
|
||||
r1 = q1 = p1 = v_64(v[l][i + 8]);
|
||||
for (d = l - 1; d >= 0; --d) {
|
||||
d0 = v_64(v[d][i]);
|
||||
d1 = v_64(v[d][i + 8]);
|
||||
|
||||
p0 ^= d0;
|
||||
p1 ^= d1;
|
||||
|
||||
q0 = x2_64(q0);
|
||||
q1 = x2_64(q1);
|
||||
|
||||
q0 ^= d0;
|
||||
q1 ^= d1;
|
||||
|
||||
r0 = d2_64(r0);
|
||||
r1 = d2_64(r1);
|
||||
|
||||
r0 ^= d0;
|
||||
r1 ^= d1;
|
||||
}
|
||||
v_64(p[i]) = p0;
|
||||
v_64(p[i + 8]) = p1;
|
||||
v_64(q[i]) = q0;
|
||||
v_64(q[i + 8]) = q1;
|
||||
v_64(r[i]) = r0;
|
||||
v_64(r[i + 8]) = r1;
|
||||
}
|
||||
}
|
||||
|
154
raid/memory.c
Normal file
154
raid/memory.c
Normal file
@ -0,0 +1,154 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Andrea Mazzoleni
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include "internal.h"
|
||||
#include "memory.h"
|
||||
|
||||
void *raid_malloc_align(size_t size, size_t align_size, void **freeptr)
|
||||
{
|
||||
unsigned char *ptr;
|
||||
uintptr_t offset;
|
||||
|
||||
ptr = malloc(size + align_size);
|
||||
if (!ptr) {
|
||||
/* LCOV_EXCL_START */
|
||||
return 0;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
*freeptr = ptr;
|
||||
|
||||
offset = ((uintptr_t)ptr) % align_size;
|
||||
|
||||
if (offset != 0)
|
||||
ptr += align_size - offset;
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void *raid_malloc(size_t size, void **freeptr)
|
||||
{
|
||||
return raid_malloc_align(size, RAID_MALLOC_ALIGN, freeptr);
|
||||
}
|
||||
|
||||
void **raid_malloc_vector_align(int nd, int n, size_t size, size_t align_size, size_t displacement_size, void **freeptr)
|
||||
{
|
||||
void **v;
|
||||
unsigned char *va;
|
||||
int i;
|
||||
|
||||
BUG_ON(n <= 0 || nd < 0);
|
||||
|
||||
v = malloc(n * sizeof(void *));
|
||||
if (!v) {
|
||||
/* LCOV_EXCL_START */
|
||||
return 0;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
va = raid_malloc_align(n * (size + displacement_size), align_size, freeptr);
|
||||
if (!va) {
|
||||
/* LCOV_EXCL_START */
|
||||
free(v);
|
||||
return 0;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
v[i] = va;
|
||||
va += size + displacement_size;
|
||||
}
|
||||
|
||||
/* reverse order of the data blocks */
|
||||
/* because they are usually accessed from the last one */
|
||||
for (i = 0; i < nd / 2; ++i) {
|
||||
void *ptr = v[i];
|
||||
|
||||
v[i] = v[nd - 1 - i];
|
||||
v[nd - 1 - i] = ptr;
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
void **raid_malloc_vector(int nd, int n, size_t size, void **freeptr)
|
||||
{
|
||||
return raid_malloc_vector_align(nd, n, size, RAID_MALLOC_ALIGN, RAID_MALLOC_DISPLACEMENT, freeptr);
|
||||
}
|
||||
|
||||
void raid_mrand_vector(unsigned seed, int n, size_t size, void **vv)
|
||||
{
|
||||
unsigned char **v = (unsigned char **)vv;
|
||||
int i;
|
||||
size_t j;
|
||||
|
||||
for (i = 0; i < n; ++i)
|
||||
for (j = 0; j < size; ++j) {
|
||||
/* basic C99/C11 linear congruential generator */
|
||||
seed = seed * 1103515245U + 12345U;
|
||||
|
||||
v[i][j] = seed >> 16;
|
||||
}
|
||||
}
|
||||
|
||||
int raid_mtest_vector(int n, size_t size, void **vv)
|
||||
{
|
||||
unsigned char **v = (unsigned char **)vv;
|
||||
int i;
|
||||
size_t j;
|
||||
unsigned k;
|
||||
unsigned char d;
|
||||
unsigned char p;
|
||||
|
||||
/* fill with 0 */
|
||||
d = 0;
|
||||
for (i = 0; i < n; ++i)
|
||||
for (j = 0; j < size; ++j)
|
||||
v[i][j] = d;
|
||||
|
||||
/* test with all the byte patterns */
|
||||
for (k = 1; k < 256; ++k) {
|
||||
p = d;
|
||||
d = k;
|
||||
|
||||
/* forward fill */
|
||||
for (i = 0; i < n; ++i) {
|
||||
for (j = 0; j < size; ++j) {
|
||||
if (v[i][j] != p) {
|
||||
/* LCOV_EXCL_START */
|
||||
return -1;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
v[i][j] = d;
|
||||
}
|
||||
}
|
||||
|
||||
p = d;
|
||||
d = ~p;
|
||||
/* backward fill with complement */
|
||||
for (i = 0; i < n; ++i) {
|
||||
for (j = size; j > 0; --j) {
|
||||
if (v[i][j - 1] != p) {
|
||||
/* LCOV_EXCL_START */
|
||||
return -1;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
v[i][j - 1] = d;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
96
raid/memory.h
Normal file
96
raid/memory.h
Normal file
@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Andrea Mazzoleni
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#ifndef __RAID_MEMORY_H
|
||||
#define __RAID_MEMORY_H
|
||||
|
||||
/**
|
||||
* Memory alignment provided by raid_malloc().
|
||||
*
|
||||
* It should guarantee good cache performance everywhere.
|
||||
*/
|
||||
#define RAID_MALLOC_ALIGN 256
|
||||
|
||||
/**
|
||||
* Memory displacement to avoid cache address sharing on contiguous blocks,
|
||||
* used by raid_malloc_vector().
|
||||
*
|
||||
* When allocating a sequence of blocks with a size of power of 2,
|
||||
* there is the risk that the addresses of each block are mapped into the
|
||||
* same cache line and prefetching predictor, resulting in a lot of cache
|
||||
* sharing if you access all the blocks in parallel, from the start to the
|
||||
* end.
|
||||
*
|
||||
* To avoid this effect, it's better if all the blocks are allocated
|
||||
* with a fixed displacement trying to reduce the cache addresses sharing.
|
||||
*
|
||||
* The selected displacement was chosen empirically with some speed tests
|
||||
* with 8/12/16/20/24 data buffers of 256 KB.
|
||||
*
|
||||
* These are the results in MB/s with no displacement:
|
||||
*
|
||||
* sse2
|
||||
* gen1 15368 [MB/s]
|
||||
* gen2 6814 [MB/s]
|
||||
* genz 3033 [MB/s]
|
||||
*
|
||||
* These are the results with displacement resulting in improvments
|
||||
* in the order of 20% or more:
|
||||
*
|
||||
* sse2
|
||||
* gen1 21936 [MB/s]
|
||||
* gen2 11902 [MB/s]
|
||||
* genz 5838 [MB/s]
|
||||
*
|
||||
*/
|
||||
#define RAID_MALLOC_DISPLACEMENT (7*256)
|
||||
|
||||
/**
|
||||
* Aligned malloc.
|
||||
* Use an alignment suitable for the raid functions.
|
||||
*/
|
||||
void *raid_malloc(size_t size, void **freeptr);
|
||||
|
||||
/**
|
||||
* Arbitrary aligned malloc.
|
||||
*/
|
||||
void *raid_malloc_align(size_t size, size_t align_size, void **freeptr);
|
||||
|
||||
/**
|
||||
* Aligned vector allocation.
|
||||
* Use an alignment suitable for the raid functions.
|
||||
* Returns a vector of @n pointers, each one pointing to a block of
|
||||
* the specified @size.
|
||||
* The first @nd elements are reversed in order.
|
||||
*/
|
||||
void **raid_malloc_vector(int nd, int n, size_t size, void **freeptr);
|
||||
|
||||
/**
|
||||
* Arbitrary aligned vector allocation.
|
||||
*/
|
||||
void **raid_malloc_vector_align(int nd, int n, size_t size, size_t align_size, size_t displacement_size, void **freeptr);
|
||||
|
||||
/**
|
||||
* Fills the memory vector with pseudo-random data based on the specified seed.
|
||||
*/
|
||||
void raid_mrand_vector(unsigned seed, int n, size_t size, void **vv);
|
||||
|
||||
/**
|
||||
* Tests the memory vector for RAM problems.
|
||||
* If a problem is found, it crashes.
|
||||
*/
|
||||
int raid_mtest_vector(int n, size_t size, void **vv);
|
||||
|
||||
#endif
|
||||
|
473
raid/module.c
Normal file
473
raid/module.c
Normal file
@ -0,0 +1,473 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Andrea Mazzoleni
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include "internal.h"
|
||||
#include "memory.h"
|
||||
#include "cpu.h"
|
||||
|
||||
/*
|
||||
* Initializes and selects the best algorithm.
|
||||
*/
|
||||
void raid_init(void)
|
||||
{
|
||||
raid_gen3_ptr = raid_gen3_int8;
|
||||
raid_gen_ptr[3] = raid_gen4_int8;
|
||||
raid_gen_ptr[4] = raid_gen5_int8;
|
||||
raid_gen_ptr[5] = raid_gen6_int8;
|
||||
|
||||
if (sizeof(void *) == 4) {
|
||||
raid_gen_ptr[0] = raid_gen1_int32;
|
||||
raid_gen_ptr[1] = raid_gen2_int32;
|
||||
raid_genz_ptr = raid_genz_int32;
|
||||
} else {
|
||||
raid_gen_ptr[0] = raid_gen1_int64;
|
||||
raid_gen_ptr[1] = raid_gen2_int64;
|
||||
raid_genz_ptr = raid_genz_int64;
|
||||
}
|
||||
|
||||
raid_rec_ptr[0] = raid_rec1_int8;
|
||||
raid_rec_ptr[1] = raid_rec2_int8;
|
||||
raid_rec_ptr[2] = raid_recX_int8;
|
||||
raid_rec_ptr[3] = raid_recX_int8;
|
||||
raid_rec_ptr[4] = raid_recX_int8;
|
||||
raid_rec_ptr[5] = raid_recX_int8;
|
||||
|
||||
#ifdef CONFIG_X86
|
||||
#ifdef CONFIG_SSE2
|
||||
if (raid_cpu_has_sse2()) {
|
||||
raid_gen_ptr[0] = raid_gen1_sse2;
|
||||
#ifdef CONFIG_X86_64
|
||||
if (raid_cpu_has_slowextendedreg()) {
|
||||
raid_gen_ptr[1] = raid_gen2_sse2;
|
||||
} else {
|
||||
raid_gen_ptr[1] = raid_gen2_sse2ext;
|
||||
}
|
||||
/* note that raid_cpu_has_slowextendedreg() doesn't affect parz */
|
||||
raid_genz_ptr = raid_genz_sse2ext;
|
||||
#else
|
||||
raid_gen_ptr[1] = raid_gen2_sse2;
|
||||
raid_genz_ptr = raid_genz_sse2;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SSSE3
|
||||
if (raid_cpu_has_ssse3()) {
|
||||
#ifdef CONFIG_X86_64
|
||||
if (raid_cpu_has_slowextendedreg()) {
|
||||
raid_gen3_ptr = raid_gen3_ssse3;
|
||||
raid_gen_ptr[3] = raid_gen4_ssse3;
|
||||
raid_gen_ptr[4] = raid_gen5_ssse3;
|
||||
raid_gen_ptr[5] = raid_gen6_ssse3;
|
||||
} else {
|
||||
raid_gen3_ptr = raid_gen3_ssse3ext;
|
||||
raid_gen_ptr[3] = raid_gen4_ssse3ext;
|
||||
raid_gen_ptr[4] = raid_gen5_ssse3ext;
|
||||
raid_gen_ptr[5] = raid_gen6_ssse3ext;
|
||||
}
|
||||
#else
|
||||
raid_gen3_ptr = raid_gen3_ssse3;
|
||||
raid_gen_ptr[3] = raid_gen4_ssse3;
|
||||
raid_gen_ptr[4] = raid_gen5_ssse3;
|
||||
raid_gen_ptr[5] = raid_gen6_ssse3;
|
||||
#endif
|
||||
raid_rec_ptr[0] = raid_rec1_ssse3;
|
||||
raid_rec_ptr[1] = raid_rec2_ssse3;
|
||||
raid_rec_ptr[2] = raid_recX_ssse3;
|
||||
raid_rec_ptr[3] = raid_recX_ssse3;
|
||||
raid_rec_ptr[4] = raid_recX_ssse3;
|
||||
raid_rec_ptr[5] = raid_recX_ssse3;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_AVX2
|
||||
if (raid_cpu_has_avx2()) {
|
||||
raid_gen_ptr[0] = raid_gen1_avx2;
|
||||
raid_gen_ptr[1] = raid_gen2_avx2;
|
||||
#ifdef CONFIG_X86_64
|
||||
raid_gen3_ptr = raid_gen3_avx2ext;
|
||||
raid_genz_ptr = raid_genz_avx2ext;
|
||||
raid_gen_ptr[3] = raid_gen4_avx2ext;
|
||||
raid_gen_ptr[4] = raid_gen5_avx2ext;
|
||||
raid_gen_ptr[5] = raid_gen6_avx2ext;
|
||||
#endif
|
||||
raid_rec_ptr[0] = raid_rec1_avx2;
|
||||
raid_rec_ptr[1] = raid_rec2_avx2;
|
||||
raid_rec_ptr[2] = raid_recX_avx2;
|
||||
raid_rec_ptr[3] = raid_recX_avx2;
|
||||
raid_rec_ptr[4] = raid_recX_avx2;
|
||||
raid_rec_ptr[5] = raid_recX_avx2;
|
||||
}
|
||||
#endif
|
||||
#endif /* CONFIG_X86 */
|
||||
|
||||
/* set the default mode */
|
||||
raid_mode(RAID_MODE_CAUCHY);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reference parity computation.
|
||||
*/
|
||||
void raid_gen_ref(int nd, int np, size_t size, void **vv)
|
||||
{
|
||||
uint8_t **v = (uint8_t **)vv;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < size; ++i) {
|
||||
uint8_t p[RAID_PARITY_MAX];
|
||||
int j, d;
|
||||
|
||||
for (j = 0; j < np; ++j)
|
||||
p[j] = 0;
|
||||
|
||||
for (d = 0; d < nd; ++d) {
|
||||
uint8_t b = v[d][i];
|
||||
|
||||
for (j = 0; j < np; ++j)
|
||||
p[j] ^= gfmul[b][gfgen[j][d]];
|
||||
}
|
||||
|
||||
for (j = 0; j < np; ++j)
|
||||
v[nd + j][i] = p[j];
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Size of the blocks to test.
|
||||
*/
|
||||
#define TEST_SIZE 4096
|
||||
|
||||
/*
|
||||
* Number of data blocks to test.
|
||||
*/
|
||||
#define TEST_COUNT (65536 / TEST_SIZE)
|
||||
|
||||
/*
|
||||
* Parity generation test.
|
||||
*/
|
||||
static int raid_test_par(int nd, int np, size_t size, void **v, void **ref)
|
||||
{
|
||||
int i;
|
||||
void *t[TEST_COUNT + RAID_PARITY_MAX];
|
||||
|
||||
/* setup data */
|
||||
for (i = 0; i < nd; ++i)
|
||||
t[i] = ref[i];
|
||||
|
||||
/* setup parity */
|
||||
for (i = 0; i < np; ++i)
|
||||
t[nd + i] = v[nd + i];
|
||||
|
||||
raid_gen(nd, np, size, t);
|
||||
|
||||
/* compare parity */
|
||||
for (i = 0; i < np; ++i) {
|
||||
if (memcmp(t[nd + i], ref[nd + i], size) != 0) {
|
||||
/* LCOV_EXCL_START */
|
||||
return -1;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Recovering test.
|
||||
*/
|
||||
static int raid_test_rec(int nr, int *ir, int nd, int np, size_t size, void **v, void **ref)
|
||||
{
|
||||
int i, j;
|
||||
void *t[TEST_COUNT + RAID_PARITY_MAX];
|
||||
|
||||
/* setup data and parity vector */
|
||||
for (i = 0, j = 0; i < nd + np; ++i) {
|
||||
if (j < nr && ir[j] == i) {
|
||||
/* this block has to be recovered */
|
||||
t[i] = v[i];
|
||||
++j;
|
||||
} else {
|
||||
/* this block is used for recovering */
|
||||
t[i] = ref[i];
|
||||
}
|
||||
}
|
||||
|
||||
raid_rec(nr, ir, nd, np, size, t);
|
||||
|
||||
/* compare all data and parity */
|
||||
for (i = 0; i < nd + np; ++i) {
|
||||
if (t[i] != ref[i]
|
||||
&& memcmp(t[i], ref[i], size) != 0) {
|
||||
/* LCOV_EXCL_START */
|
||||
return -1;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Recovering test for data.
|
||||
*/
|
||||
static int raid_test_data(int nr, int *id, int *ip, int nd, int np, size_t size, void **v, void **ref)
|
||||
{
|
||||
int i, j;
|
||||
void *t[TEST_COUNT + RAID_PARITY_MAX];
|
||||
|
||||
/* setup data vector */
|
||||
for (i = 0, j = 0; i < nd; ++i) {
|
||||
if (j < nr && id[j] == i) {
|
||||
/* this block has to be recovered */
|
||||
t[i] = v[i];
|
||||
++j;
|
||||
} else {
|
||||
/* this block is left unchanged */
|
||||
t[i] = ref[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* setup parity vector */
|
||||
for (i = 0, j = 0; i < np; ++i) {
|
||||
if (j < nr && ip[j] == i) {
|
||||
/* this block is used for recovering */
|
||||
t[nd + i] = ref[nd + i];
|
||||
++j;
|
||||
} else {
|
||||
/* this block should not be read or written */
|
||||
t[nd + i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
raid_data(nr, id, ip, nd, size, t);
|
||||
|
||||
/* compare all data and parity */
|
||||
for (i = 0; i < nd; ++i) {
|
||||
if (t[i] != ref[i]
|
||||
&& t[i] != 0
|
||||
&& memcmp(t[i], ref[i], size) != 0) {
|
||||
/* LCOV_EXCL_START */
|
||||
return -1;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Scan test.
|
||||
*/
|
||||
static int raid_test_scan(int nr, int *ir, int nd, int np, size_t size, void **v, void **ref)
|
||||
{
|
||||
int i, j, ret;
|
||||
void *t[TEST_COUNT + RAID_PARITY_MAX];
|
||||
int is[RAID_PARITY_MAX];
|
||||
|
||||
/* setup data and parity vector */
|
||||
for (i = 0, j = 0; i < nd + np; ++i) {
|
||||
if (j < nr && ir[j] == i) {
|
||||
/* this block is bad */
|
||||
t[i] = v[i];
|
||||
++j;
|
||||
} else {
|
||||
/* this block is used for recovering */
|
||||
t[i] = ref[i];
|
||||
}
|
||||
}
|
||||
|
||||
ret = raid_scan(is, nd, np, size, t);
|
||||
|
||||
/* compare identified bad blocks */
|
||||
if (ret != nr)
|
||||
return -1;
|
||||
for (i = 0; i < nr; ++i) {
|
||||
if (ir[i] != is[i]) {
|
||||
/* LCOV_EXCL_START */
|
||||
return -1;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Basic functionality self test.
|
||||
*/
|
||||
int raid_selftest(void)
|
||||
{
|
||||
const int nd = TEST_COUNT;
|
||||
const size_t size = TEST_SIZE;
|
||||
const int nv = nd + RAID_PARITY_MAX * 2 + 1;
|
||||
void *v_alloc;
|
||||
void **v;
|
||||
void *ref[nd + RAID_PARITY_MAX];
|
||||
int ir[RAID_PARITY_MAX];
|
||||
int ip[RAID_PARITY_MAX];
|
||||
int i, np;
|
||||
int ret = 0;
|
||||
|
||||
/* ensure to have enough space for data */
|
||||
BUG_ON(nd * size > 65536);
|
||||
|
||||
v = raid_malloc_vector(nd, nv, size, &v_alloc);
|
||||
if (!v) {
|
||||
/* LCOV_EXCL_START */
|
||||
return -1;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
memset(v[nv - 1], 0, size);
|
||||
raid_zero(v[nv - 1]);
|
||||
|
||||
/* use the multiplication table as data */
|
||||
for (i = 0; i < nd; ++i)
|
||||
ref[i] = ((uint8_t *)gfmul) + size * i;
|
||||
|
||||
/* setup reference parity */
|
||||
for (i = 0; i < RAID_PARITY_MAX; ++i)
|
||||
ref[nd + i] = v[nd + RAID_PARITY_MAX + i];
|
||||
|
||||
/* compute reference parity */
|
||||
raid_gen_ref(nd, RAID_PARITY_MAX, size, ref);
|
||||
|
||||
/* test for each parity level */
|
||||
for (np = 1; np <= RAID_PARITY_MAX; ++np) {
|
||||
/* test parity generation */
|
||||
ret = raid_test_par(nd, np, size, v, ref);
|
||||
if (ret != 0) {
|
||||
/* LCOV_EXCL_START */
|
||||
goto bail;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
/* test recovering with broken ending data disks */
|
||||
for (i = 0; i < np; ++i) {
|
||||
/* bad data */
|
||||
ir[i] = nd - np + i;
|
||||
|
||||
/* good parity */
|
||||
ip[i] = i;
|
||||
}
|
||||
|
||||
ret = raid_test_rec(np, ir, nd, np, size, v, ref);
|
||||
if (ret != 0) {
|
||||
/* LCOV_EXCL_START */
|
||||
goto bail;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
ret = raid_test_data(np, ir, ip, nd, np, size, v, ref);
|
||||
if (ret != 0) {
|
||||
/* LCOV_EXCL_START */
|
||||
goto bail;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
/* test recovering with broken leading data and broken leading parity */
|
||||
for (i = 0; i < np / 2; ++i) {
|
||||
/* bad data */
|
||||
ir[i] = i;
|
||||
|
||||
/* good parity */
|
||||
ip[i] = (np + 1) / 2 + i;
|
||||
}
|
||||
|
||||
/* bad parity */
|
||||
for (i = 0; i < (np + 1) / 2; ++i)
|
||||
ir[np / 2 + i] = nd + i;
|
||||
|
||||
ret = raid_test_rec(np, ir, nd, np, size, v, ref);
|
||||
if (ret != 0) {
|
||||
/* LCOV_EXCL_START */
|
||||
goto bail;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
ret = raid_test_data(np / 2, ir, ip, nd, np, size, v, ref);
|
||||
if (ret != 0) {
|
||||
/* LCOV_EXCL_START */
|
||||
goto bail;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
/* test recovering with broken leading data and broken ending parity */
|
||||
for (i = 0; i < np / 2; ++i) {
|
||||
/* bad data */
|
||||
ir[i] = i;
|
||||
|
||||
/* good parity */
|
||||
ip[i] = i;
|
||||
}
|
||||
|
||||
/* bad parity */
|
||||
for (i = 0; i < (np + 1) / 2; ++i)
|
||||
ir[np / 2 + i] = nd + np - (np + 1) / 2 + i;
|
||||
|
||||
ret = raid_test_rec(np, ir, nd, np, size, v, ref);
|
||||
if (ret != 0) {
|
||||
/* LCOV_EXCL_START */
|
||||
goto bail;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
ret = raid_test_data(np / 2, ir, ip, nd, np, size, v, ref);
|
||||
if (ret != 0) {
|
||||
/* LCOV_EXCL_START */
|
||||
goto bail;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
/* scan test with broken data and parity */
|
||||
for (i = 0; i < np / 2; ++i) {
|
||||
/* bad data */
|
||||
ir[i] = i;
|
||||
}
|
||||
for (i = 0; i < (np - 1) / 2; ++i) {
|
||||
/* bad parity */
|
||||
ir[np / 2 + i] = nd + i;
|
||||
}
|
||||
for (i = 0; i < np - 1; ++i) {
|
||||
/* make blocks bad */
|
||||
/* we cannot fill them with 0, because the original */
|
||||
/* data may be already filled with 0 */
|
||||
memset(v[ir[i]], 0x55, size);
|
||||
}
|
||||
|
||||
ret = raid_test_scan(np - 1, ir, nd, np, size, v, ref);
|
||||
if (ret != 0) {
|
||||
/* LCOV_EXCL_START */
|
||||
goto bail;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
}
|
||||
|
||||
/* scan test with no parity */
|
||||
ret = raid_test_scan(0, 0, nd, 0, size, v, ref);
|
||||
if (ret != -1) {
|
||||
/* LCOV_EXCL_START */
|
||||
goto bail;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
||||
bail:
|
||||
free(v);
|
||||
free(v_alloc);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
586
raid/raid.c
Normal file
586
raid/raid.c
Normal file
@ -0,0 +1,586 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Andrea Mazzoleni
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include "internal.h"
|
||||
#include "gf.h"
|
||||
|
||||
/*
|
||||
* This is a RAID implementation working in the Galois Field GF(2^8) with
|
||||
* the primitive polynomial x^8 + x^4 + x^3 + x^2 + 1 (285 decimal), and
|
||||
* supporting up to six parity levels.
|
||||
*
|
||||
* For RAID5 and RAID6 it works as as described in the H. Peter Anvin's
|
||||
* paper "The mathematics of RAID-6" [1]. Please refer to this paper for a
|
||||
* complete explanation.
|
||||
*
|
||||
* To support triple parity, it was first evaluated and then dropped, an
|
||||
* extension of the same approach, with additional parity coefficients set
|
||||
* as powers of 2^-1, with equations:
|
||||
*
|
||||
* P = sum(Di)
|
||||
* Q = sum(2^i * Di)
|
||||
* R = sum(2^-i * Di) with 0<=i<N
|
||||
*
|
||||
* This approach works well for triple parity and it's very efficient,
|
||||
* because we can implement very fast parallel multiplications and
|
||||
* divisions by 2 in GF(2^8).
|
||||
*
|
||||
* It's also similar at the approach used by ZFS RAIDZ3, with the
|
||||
* difference that ZFS uses powers of 4 instead of 2^-1.
|
||||
*
|
||||
* Unfortunately it doesn't work beyond triple parity, because whatever
|
||||
* value we choose to generate the power coefficients to compute other
|
||||
* parities, the resulting equations are not solvable for some
|
||||
* combinations of missing disks.
|
||||
*
|
||||
* This is expected, because the Vandermonde matrix used to compute the
|
||||
* parity has no guarantee to have all submatrices not singular
|
||||
* [2, Chap 11, Problem 7] and this is a requirement to have
|
||||
* a MDS (Maximum Distance Separable) code [2, Chap 11, Theorem 8].
|
||||
*
|
||||
* To overcome this limitation, we use a Cauchy matrix [3][4] to compute
|
||||
* the parity. A Cauchy matrix has the property to have all the square
|
||||
* submatrices not singular, resulting in always solvable equations,
|
||||
* for any combination of missing disks.
|
||||
*
|
||||
* The problem of this approach is that it requires the use of
|
||||
* generic multiplications, and not only by 2 or 2^-1, potentially
|
||||
* affecting badly the performance.
|
||||
*
|
||||
* Hopefully there is a method to implement parallel multiplications
|
||||
* using SSSE3 or AVX2 instructions [1][5]. Method competitive with the
|
||||
* computation of triple parity using power coefficients.
|
||||
*
|
||||
* Another important property of the Cauchy matrix is that we can setup
|
||||
* the first two rows with coeffients equal at the RAID5 and RAID6 approach
|
||||
* decribed, resulting in a compatible extension, and requiring SSSE3
|
||||
* or AVX2 instructions only if triple parity or beyond is used.
|
||||
*
|
||||
* The matrix is also adjusted, multipling each row by a constant factor
|
||||
* to make the first column of all 1, to optimize the computation for
|
||||
* the first disk.
|
||||
*
|
||||
* This results in the matrix A[row,col] defined as:
|
||||
*
|
||||
* 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01...
|
||||
* 01 02 04 08 10 20 40 80 1d 3a 74 e8 cd 87 13 26 4c 98 2d 5a b4 75...
|
||||
* 01 f5 d2 c4 9a 71 f1 7f fc 87 c1 c6 19 2f 40 55 3d ba 53 04 9c 61...
|
||||
* 01 bb a6 d7 c7 07 ce 82 4a 2f a5 9b b6 60 f1 ad e7 f4 06 d2 df 2e...
|
||||
* 01 97 7f 9c 7c 18 bd a2 58 1a da 74 70 a3 e5 47 29 07 f5 80 23 e9...
|
||||
* 01 2b 3f cf 73 2c d6 ed cb 74 15 78 8a c1 17 c9 89 68 21 ab 76 3b...
|
||||
*
|
||||
* This matrix supports 6 level of parity, one for each row, for up to 251
|
||||
* data disks, one for each column, with all the 377,342,351,231 square
|
||||
* submatrices not singular, verified also with brute-force.
|
||||
*
|
||||
* This matrix can be extended to support any number of parities, just
|
||||
* adding additional rows, and removing one column for each new row.
|
||||
* (see mktables.c for more details in how the matrix is generated)
|
||||
*
|
||||
* In details, parity is computed as:
|
||||
*
|
||||
* P = sum(Di)
|
||||
* Q = sum(2^i * Di)
|
||||
* R = sum(A[2,i] * Di)
|
||||
* S = sum(A[3,i] * Di)
|
||||
* T = sum(A[4,i] * Di)
|
||||
* U = sum(A[5,i] * Di) with 0<=i<N
|
||||
*
|
||||
* To recover from a failure of six disks at indexes x,y,z,h,v,w,
|
||||
* with 0<=x<y<z<h<v<w<N, we compute the parity of the available N-6
|
||||
* disks as:
|
||||
*
|
||||
* Pa = sum(Di)
|
||||
* Qa = sum(2^i * Di)
|
||||
* Ra = sum(A[2,i] * Di)
|
||||
* Sa = sum(A[3,i] * Di)
|
||||
* Ta = sum(A[4,i] * Di)
|
||||
* Ua = sum(A[5,i] * Di) with 0<=i<N,i!=x,i!=y,i!=z,i!=h,i!=v,i!=w.
|
||||
*
|
||||
* And if we define:
|
||||
*
|
||||
* Pd = Pa + P
|
||||
* Qd = Qa + Q
|
||||
* Rd = Ra + R
|
||||
* Sd = Sa + S
|
||||
* Td = Ta + T
|
||||
* Ud = Ua + U
|
||||
*
|
||||
* we can sum these two sets of equations, obtaining:
|
||||
*
|
||||
* Pd = Dx + Dy + Dz + Dh + Dv + Dw
|
||||
* Qd = 2^x * Dx + 2^y * Dy + 2^z * Dz + 2^h * Dh + 2^v * Dv + 2^w * Dw
|
||||
* Rd = A[2,x] * Dx + A[2,y] * Dy + A[2,z] * Dz + A[2,h] * Dh + A[2,v] * Dv + A[2,w] * Dw
|
||||
* Sd = A[3,x] * Dx + A[3,y] * Dy + A[3,z] * Dz + A[3,h] * Dh + A[3,v] * Dv + A[3,w] * Dw
|
||||
* Td = A[4,x] * Dx + A[4,y] * Dy + A[4,z] * Dz + A[4,h] * Dh + A[4,v] * Dv + A[4,w] * Dw
|
||||
* Ud = A[5,x] * Dx + A[5,y] * Dy + A[5,z] * Dz + A[5,h] * Dh + A[5,v] * Dv + A[5,w] * Dw
|
||||
*
|
||||
* A linear system always solvable because the coefficients matrix is
|
||||
* always not singular due the properties of the matrix A[].
|
||||
*
|
||||
* Resulting speed in x64, with 8 data disks, using a stripe of 256 KiB,
|
||||
* for a Core i5-4670K Haswell Quad-Core 3.4GHz is:
|
||||
*
|
||||
* int8 int32 int64 sse2 ssse3 avx2
|
||||
* gen1 13339 25438 45438 50588
|
||||
* gen2 4115 6514 21840 32201
|
||||
* gen3 814 10154 18613
|
||||
* gen4 620 7569 14229
|
||||
* gen5 496 5149 10051
|
||||
* gen6 413 4239 8190
|
||||
*
|
||||
* Values are in MiB/s of data processed by a single thread, not counting
|
||||
* generated parity.
|
||||
*
|
||||
* You can replicate these results in your machine using the
|
||||
* "raid/test/speedtest.c" program.
|
||||
*
|
||||
* For comparison, the triple parity computation using the power
|
||||
* coeffients "1,2,2^-1" is only a little faster than the one based on
|
||||
* the Cauchy matrix if SSSE3 or AVX2 is present.
|
||||
*
|
||||
* int8 int32 int64 sse2 ssse3 avx2
|
||||
* genz 2337 2874 10920 18944
|
||||
*
|
||||
* In conclusion, the use of power coefficients, and specifically powers
|
||||
* of 1,2,2^-1, is the best option to implement triple parity in CPUs
|
||||
* without SSSE3 and AVX2.
|
||||
* But if a modern CPU with SSSE3 or AVX2 is available, the Cauchy
|
||||
* matrix is the best option because it provides a fast and general
|
||||
* approach working for any number of parities.
|
||||
*
|
||||
* References:
|
||||
* [1] Anvin, "The mathematics of RAID-6", 2004
|
||||
* [2] MacWilliams, Sloane, "The Theory of Error-Correcting Codes", 1977
|
||||
* [3] Blomer, "An XOR-Based Erasure-Resilient Coding Scheme", 1995
|
||||
* [4] Roth, "Introduction to Coding Theory", 2006
|
||||
* [5] Plank, "Screaming Fast Galois Field Arithmetic Using Intel SIMD Instructions", 2013
|
||||
*/
|
||||
|
||||
/**
|
||||
* Generator matrix currently used.
|
||||
*/
|
||||
const uint8_t (*raid_gfgen)[256];
|
||||
|
||||
void raid_mode(int mode)
|
||||
{
|
||||
if (mode == RAID_MODE_VANDERMONDE) {
|
||||
raid_gen_ptr[2] = raid_genz_ptr;
|
||||
raid_gfgen = gfvandermonde;
|
||||
} else {
|
||||
raid_gen_ptr[2] = raid_gen3_ptr;
|
||||
raid_gfgen = gfcauchy;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Buffer filled with 0 used in recovering.
|
||||
*/
|
||||
static void *raid_zero_block;
|
||||
|
||||
void raid_zero(void *zero)
|
||||
{
|
||||
raid_zero_block = zero;
|
||||
}
|
||||
|
||||
/*
|
||||
* Forwarders for parity computation.
|
||||
*
|
||||
* These functions compute the parity blocks from the provided data.
|
||||
*
|
||||
* The number of parities to compute is implicit in the position in the
|
||||
* forwarder vector. Position at index #i, computes (#i+1) parities.
|
||||
*
|
||||
* All these functions give the guarantee that parities are written
|
||||
* in order. First parity P, then parity Q, and so on.
|
||||
* This allows to specify the same memory buffer for multiple parities
|
||||
* knowning that you'll get the latest written one.
|
||||
* This characteristic is used by the raid_delta_gen() function to
|
||||
* avoid to damage unused parities in recovering.
|
||||
*
|
||||
* @nd Number of data blocks
|
||||
* @size Size of the blocks pointed by @v. It must be a multipler of 64.
|
||||
* @v Vector of pointers to the blocks of data and parity.
|
||||
* It has (@nd + #parities) elements. The starting elements are the blocks
|
||||
* for data, following with the parity blocks.
|
||||
* Each block has @size bytes.
|
||||
*/
|
||||
void (*raid_gen_ptr[RAID_PARITY_MAX])(int nd, size_t size, void **vv);
|
||||
void (*raid_gen3_ptr)(int nd, size_t size, void **vv);
|
||||
void (*raid_genz_ptr)(int nd, size_t size, void **vv);
|
||||
|
||||
void raid_gen(int nd, int np, size_t size, void **v)
|
||||
{
|
||||
/* enforce limit on size */
|
||||
BUG_ON(size % 64 != 0);
|
||||
|
||||
/* enforce limit on number of failures */
|
||||
BUG_ON(np < 1);
|
||||
BUG_ON(np > RAID_PARITY_MAX);
|
||||
|
||||
raid_gen_ptr[np - 1](nd, size, v);
|
||||
}
|
||||
|
||||
/**
|
||||
* Inverts the square matrix M of size nxn into V.
|
||||
*
|
||||
* This is not a general matrix inversion because we assume the matrix M
|
||||
* to have all the square submatrix not singular.
|
||||
* We use Gauss elimination to invert.
|
||||
*
|
||||
* @M Matrix to invert with @n rows and @n columns.
|
||||
* @V Destination matrix where the result is put.
|
||||
* @n Number of rows and columns of the matrix.
|
||||
*/
|
||||
void raid_invert(uint8_t *M, uint8_t *V, int n)
|
||||
{
|
||||
int i, j, k;
|
||||
|
||||
/* set the identity matrix in V */
|
||||
for (i = 0; i < n; ++i)
|
||||
for (j = 0; j < n; ++j)
|
||||
V[i * n + j] = i == j;
|
||||
|
||||
/* for each element in the diagonal */
|
||||
for (k = 0; k < n; ++k) {
|
||||
uint8_t f;
|
||||
|
||||
/* the diagonal element cannot be 0 because */
|
||||
/* we are inverting matrices with all the square */
|
||||
/* submatrices not singular */
|
||||
BUG_ON(M[k * n + k] == 0);
|
||||
|
||||
/* make the diagonal element to be 1 */
|
||||
f = inv(M[k * n + k]);
|
||||
for (j = 0; j < n; ++j) {
|
||||
M[k * n + j] = mul(f, M[k * n + j]);
|
||||
V[k * n + j] = mul(f, V[k * n + j]);
|
||||
}
|
||||
|
||||
/* make all the elements over and under the diagonal */
|
||||
/* to be zero */
|
||||
for (i = 0; i < n; ++i) {
|
||||
if (i == k)
|
||||
continue;
|
||||
f = M[i * n + k];
|
||||
for (j = 0; j < n; ++j) {
|
||||
M[i * n + j] ^= mul(f, M[k * n + j]);
|
||||
V[i * n + j] ^= mul(f, V[k * n + j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the parity without the missing data blocks
|
||||
* and store it in the buffers of such data blocks.
|
||||
*
|
||||
* This is the parity expressed as Pa,Qa,Ra,Sa,Ta,Ua in the equations.
|
||||
*/
|
||||
void raid_delta_gen(int nr, int *id, int *ip, int nd, size_t size, void **v)
|
||||
{
|
||||
void *p[RAID_PARITY_MAX];
|
||||
void *pa[RAID_PARITY_MAX];
|
||||
int i, j;
|
||||
int np;
|
||||
void *latest;
|
||||
|
||||
/* total number of parities we are going to process */
|
||||
/* they are both the used and the unused ones */
|
||||
np = ip[nr - 1] + 1;
|
||||
|
||||
/* latest missing data block */
|
||||
latest = v[id[nr - 1]];
|
||||
|
||||
/* setup pointers for delta computation */
|
||||
for (i = 0, j = 0; i < np; ++i) {
|
||||
/* keep a copy of the original parity vector */
|
||||
p[i] = v[nd + i];
|
||||
|
||||
if (ip[j] == i) {
|
||||
/*
|
||||
* Set used parities to point to the missing
|
||||
* data blocks.
|
||||
*
|
||||
* The related data blocks are instead set
|
||||
* to point to the "zero" buffer.
|
||||
*/
|
||||
|
||||
/* the latest parity to use ends the for loop and */
|
||||
/* then it cannot happen to process more of them */
|
||||
BUG_ON(j >= nr);
|
||||
|
||||
/* buffer for missing data blocks */
|
||||
pa[j] = v[id[j]];
|
||||
|
||||
/* set at zero the missing data blocks */
|
||||
v[id[j]] = raid_zero_block;
|
||||
|
||||
/* compute the parity over the missing data blocks */
|
||||
v[nd + i] = pa[j];
|
||||
|
||||
/* check for the next used entry */
|
||||
++j;
|
||||
} else {
|
||||
/*
|
||||
* Unused parities are going to be rewritten with
|
||||
* not significative data, becase we don't have
|
||||
* functions able to compute only a subset of
|
||||
* parities.
|
||||
*
|
||||
* To avoid this, we reuse parity buffers,
|
||||
* assuming that all the parity functions write
|
||||
* parities in order.
|
||||
*
|
||||
* We assign the unused parity block to the same
|
||||
* block of the latest used parity that we know it
|
||||
* will be written.
|
||||
*
|
||||
* This means that this block will be written
|
||||
* multiple times and only the latest write will
|
||||
* contain the correct data.
|
||||
*/
|
||||
v[nd + i] = latest;
|
||||
}
|
||||
}
|
||||
|
||||
/* all the parities have to be processed */
|
||||
BUG_ON(j != nr);
|
||||
|
||||
/* recompute the parity, note that np may be smaller than the */
|
||||
/* total number of parities available */
|
||||
raid_gen(nd, np, size, v);
|
||||
|
||||
/* restore data buffers as before */
|
||||
for (j = 0; j < nr; ++j)
|
||||
v[id[j]] = pa[j];
|
||||
|
||||
/* restore parity buffers as before */
|
||||
for (i = 0; i < np; ++i)
|
||||
v[nd + i] = p[i];
|
||||
}
|
||||
|
||||
/**
|
||||
* Recover failure of one data block for PAR1.
|
||||
*
|
||||
* Starting from the equation:
|
||||
*
|
||||
* Pd = Dx
|
||||
*
|
||||
* and solving we get:
|
||||
*
|
||||
* Dx = Pd
|
||||
*/
|
||||
void raid_rec1of1(int *id, int nd, size_t size, void **v)
|
||||
{
|
||||
void *p;
|
||||
void *pa;
|
||||
|
||||
/* for PAR1 we can directly compute the missing block */
|
||||
/* and we don't need to use the zero buffer */
|
||||
p = v[nd];
|
||||
pa = v[id[0]];
|
||||
|
||||
/* use the parity as missing data block */
|
||||
v[id[0]] = p;
|
||||
|
||||
/* compute the parity over the missing data block */
|
||||
v[nd] = pa;
|
||||
|
||||
/* compute */
|
||||
raid_gen(nd, 1, size, v);
|
||||
|
||||
/* restore as before */
|
||||
v[id[0]] = pa;
|
||||
v[nd] = p;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recover failure of two data blocks for PAR2.
|
||||
*
|
||||
* Starting from the equations:
|
||||
*
|
||||
* Pd = Dx + Dy
|
||||
* Qd = 2^id[0] * Dx + 2^id[1] * Dy
|
||||
*
|
||||
* and solving we get:
|
||||
*
|
||||
* 1 2^(-id[0])
|
||||
* Dy = ------------------- * Pd + ------------------- * Qd
|
||||
* 2^(id[1]-id[0]) + 1 2^(id[1]-id[0]) + 1
|
||||
*
|
||||
* Dx = Dy + Pd
|
||||
*
|
||||
* with conditions:
|
||||
*
|
||||
* 2^id[0] != 0
|
||||
* 2^(id[1]-id[0]) + 1 != 0
|
||||
*
|
||||
* That are always satisfied for any 0<=id[0]<id[1]<255.
|
||||
*/
|
||||
void raid_rec2of2_int8(int *id, int *ip, int nd, size_t size, void **vv)
|
||||
{
|
||||
uint8_t **v = (uint8_t **)vv;
|
||||
size_t i;
|
||||
uint8_t *p;
|
||||
uint8_t *pa;
|
||||
uint8_t *q;
|
||||
uint8_t *qa;
|
||||
const uint8_t *T[2];
|
||||
|
||||
/* get multiplication tables */
|
||||
T[0] = table(inv(pow2(id[1] - id[0]) ^ 1));
|
||||
T[1] = table(inv(pow2(id[0]) ^ pow2(id[1])));
|
||||
|
||||
/* compute delta parity */
|
||||
raid_delta_gen(2, id, ip, nd, size, vv);
|
||||
|
||||
p = v[nd];
|
||||
q = v[nd + 1];
|
||||
pa = v[id[0]];
|
||||
qa = v[id[1]];
|
||||
|
||||
for (i = 0; i < size; ++i) {
|
||||
/* delta */
|
||||
uint8_t Pd = p[i] ^ pa[i];
|
||||
uint8_t Qd = q[i] ^ qa[i];
|
||||
|
||||
/* reconstruct */
|
||||
uint8_t Dy = T[0][Pd] ^ T[1][Qd];
|
||||
uint8_t Dx = Pd ^ Dy;
|
||||
|
||||
/* set */
|
||||
pa[i] = Dx;
|
||||
qa[i] = Dy;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Forwarders for data recovery.
|
||||
*
|
||||
* These functions recover data blocks using the specified parity
|
||||
* to recompute the missing data.
|
||||
*
|
||||
* Note that the format of vectors @id/@ip is different than raid_rec().
|
||||
* For example, in the vector @ip the first parity is represented with the
|
||||
* value 0 and not @nd.
|
||||
*
|
||||
* @nr Number of failed data blocks to recover.
|
||||
* @id[] Vector of @nr indexes of the data blocks to recover.
|
||||
* The indexes start from 0. They must be in order.
|
||||
* @ip[] Vector of @nr indexes of the parity blocks to use in the recovering.
|
||||
* The indexes start from 0. They must be in order.
|
||||
* @nd Number of data blocks.
|
||||
* @np Number of parity blocks.
|
||||
* @size Size of the blocks pointed by @v. It must be a multipler of 64.
|
||||
* @v Vector of pointers to the blocks of data and parity.
|
||||
* It has (@nd + @np) elements. The starting elements are the blocks
|
||||
* for data, following with the parity blocks.
|
||||
* Each block has @size bytes.
|
||||
*/
|
||||
void (*raid_rec_ptr[RAID_PARITY_MAX])(
|
||||
int nr, int *id, int *ip, int nd, size_t size, void **vv);
|
||||
|
||||
void raid_rec(int nr, int *ir, int nd, int np, size_t size, void **v)
|
||||
{
|
||||
int nrd; /* number of data blocks to recover */
|
||||
int nrp; /* number of parity blocks to recover */
|
||||
|
||||
/* enforce limit on size */
|
||||
BUG_ON(size % 64 != 0);
|
||||
|
||||
/* enforce limit on number of failures */
|
||||
BUG_ON(nr > np);
|
||||
BUG_ON(np > RAID_PARITY_MAX);
|
||||
|
||||
/* enforce order in index vector */
|
||||
BUG_ON(nr >= 2 && ir[0] >= ir[1]);
|
||||
BUG_ON(nr >= 3 && ir[1] >= ir[2]);
|
||||
BUG_ON(nr >= 4 && ir[2] >= ir[3]);
|
||||
BUG_ON(nr >= 5 && ir[3] >= ir[4]);
|
||||
BUG_ON(nr >= 6 && ir[4] >= ir[5]);
|
||||
|
||||
/* enforce limit on index vector */
|
||||
BUG_ON(nr > 0 && ir[nr-1] >= nd + np);
|
||||
|
||||
/* count the number of data blocks to recover */
|
||||
nrd = 0;
|
||||
while (nrd < nr && ir[nrd] < nd)
|
||||
++nrd;
|
||||
|
||||
/* all the remaining are parity */
|
||||
nrp = nr - nrd;
|
||||
|
||||
/* enforce limit on number of failures */
|
||||
BUG_ON(nrd > nd);
|
||||
BUG_ON(nrp > np);
|
||||
|
||||
/* if failed data is present */
|
||||
if (nrd != 0) {
|
||||
int ip[RAID_PARITY_MAX];
|
||||
int i, j, k;
|
||||
|
||||
/* setup the vector of parities to use */
|
||||
for (i = 0, j = 0, k = 0; i < np; ++i) {
|
||||
if (j < nrp && ir[nrd + j] == nd + i) {
|
||||
/* this parity has to be recovered */
|
||||
++j;
|
||||
} else {
|
||||
/* this parity is used for recovering */
|
||||
ip[k] = i;
|
||||
++k;
|
||||
}
|
||||
}
|
||||
|
||||
/* recover the nrd data blocks specified in ir[], */
|
||||
/* using the first nrd parity in ip[] for recovering */
|
||||
raid_rec_ptr[nrd - 1](nrd, ir, ip, nd, size, v);
|
||||
}
|
||||
|
||||
/* recompute all the parities up to the last bad one */
|
||||
if (nrp != 0)
|
||||
raid_gen(nd, ir[nr - 1] - nd + 1, size, v);
|
||||
}
|
||||
|
||||
void raid_data(int nr, int *id, int *ip, int nd, size_t size, void **v)
|
||||
{
|
||||
/* enforce limit on size */
|
||||
BUG_ON(size % 64 != 0);
|
||||
|
||||
/* enforce limit on number of failures */
|
||||
BUG_ON(nr > nd);
|
||||
BUG_ON(nr > RAID_PARITY_MAX);
|
||||
|
||||
/* enforce order in index vector for data */
|
||||
BUG_ON(nr >= 2 && id[0] >= id[1]);
|
||||
BUG_ON(nr >= 3 && id[1] >= id[2]);
|
||||
BUG_ON(nr >= 4 && id[2] >= id[3]);
|
||||
BUG_ON(nr >= 5 && id[3] >= id[4]);
|
||||
BUG_ON(nr >= 6 && id[4] >= id[5]);
|
||||
|
||||
/* enforce limit on index vector for data */
|
||||
BUG_ON(nr > 0 && id[nr-1] >= nd);
|
||||
|
||||
/* enforce order in index vector for parity */
|
||||
BUG_ON(nr >= 2 && ip[0] >= ip[1]);
|
||||
BUG_ON(nr >= 3 && ip[1] >= ip[2]);
|
||||
BUG_ON(nr >= 4 && ip[2] >= ip[3]);
|
||||
BUG_ON(nr >= 5 && ip[3] >= ip[4]);
|
||||
BUG_ON(nr >= 6 && ip[4] >= ip[5]);
|
||||
|
||||
/* if failed data is present */
|
||||
if (nr != 0)
|
||||
raid_rec_ptr[nr - 1](nr, id, ip, nd, size, v);
|
||||
}
|
||||
|
229
raid/raid.h
Normal file
229
raid/raid.h
Normal file
@ -0,0 +1,229 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Andrea Mazzoleni
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#ifndef __RAID_H
|
||||
#define __RAID_H
|
||||
|
||||
/**
|
||||
* RAID mode supporting up to 6 parities.
|
||||
*
|
||||
* It requires SSSE3 to get good performance with triple or more parities.
|
||||
*
|
||||
* This is the default mode set after calling raid_init().
|
||||
*/
|
||||
#define RAID_MODE_CAUCHY 0
|
||||
|
||||
/**
|
||||
* RAID mode supporting up to 3 parities,
|
||||
*
|
||||
* It has a fast triple parity implementation without SSSE3, but it cannot
|
||||
* go beyond triple parity.
|
||||
*
|
||||
* This is mostly intended for low end CPUs like ARM and AMD Athlon.
|
||||
*/
|
||||
#define RAID_MODE_VANDERMONDE 1
|
||||
|
||||
/**
|
||||
* Maximum number of parity disks supported.
|
||||
*/
|
||||
#define RAID_PARITY_MAX 6
|
||||
|
||||
/**
|
||||
* Maximum number of data disks supported.
|
||||
*/
|
||||
#define RAID_DATA_MAX 251
|
||||
|
||||
/**
|
||||
* Initializes the RAID system.
|
||||
*
|
||||
* You must call this function before any other.
|
||||
*
|
||||
* The RAID system is initialized in the RAID_MODE_CAUCHY mode.
|
||||
*/
|
||||
void raid_init(void);
|
||||
|
||||
/**
|
||||
* Runs a basic functionality self test.
|
||||
*
|
||||
* The test is immediate, and it's intended to be run at application
|
||||
* startup to check the integrity of the RAID system.
|
||||
*
|
||||
* It returns 0 on success.
|
||||
*/
|
||||
int raid_selftest(void);
|
||||
|
||||
/**
|
||||
* Sets the mode to use. One of RAID_MODE_*.
|
||||
*
|
||||
* You can change mode at any time, and it will affect next calls to raid_gen(),
|
||||
* raid_rec() and raid_data().
|
||||
*
|
||||
* The two modes are compatible for the first two levels of parity.
|
||||
* The third one is different.
|
||||
*/
|
||||
void raid_mode(int mode);
|
||||
|
||||
/**
|
||||
* Sets the zero buffer to use in recovering.
|
||||
*
|
||||
* Before calling raid_rec() and raid_data() you must provide a memory
|
||||
* buffer filled with zero with the same size of the blocks to recover.
|
||||
*
|
||||
* This buffer is only read and never written.
|
||||
*/
|
||||
void raid_zero(void *zero);
|
||||
|
||||
/**
|
||||
* Computes parity blocks.
|
||||
*
|
||||
* This function computes the specified number of parity blocks of the
|
||||
* provided set of data blocks.
|
||||
*
|
||||
* Each parity block allows to recover one data block.
|
||||
*
|
||||
* @nd Number of data blocks.
|
||||
* @np Number of parities blocks to compute.
|
||||
* @size Size of the blocks pointed by @v. It must be a multiplier of 64.
|
||||
* @v Vector of pointers to the blocks of data and parity.
|
||||
* It has (@nd + @np) elements. The starting elements are the blocks for
|
||||
* data, following with the parity blocks.
|
||||
* Data blocks are only read and not modified. Parity blocks are written.
|
||||
* Each block has @size bytes.
|
||||
*/
|
||||
void raid_gen(int nd, int np, size_t size, void **v);
|
||||
|
||||
/**
|
||||
* Recovers failures in data and parity blocks.
|
||||
*
|
||||
* This function recovers all the data and parity blocks marked as bad
|
||||
* in the @ir vector.
|
||||
*
|
||||
* Ensure to have @nr <= @np, otherwise recovering is not possible.
|
||||
*
|
||||
* The parities blocks used for recovering are automatically selected from
|
||||
* the ones NOT present in the @ir vector.
|
||||
*
|
||||
* In case there are more parity blocks than needed, the parities at lower
|
||||
* indexes are used in the recovering, and the others are ignored.
|
||||
*
|
||||
* Note that no internal integrity check is done when recovering. If the
|
||||
* provided parities are correct, the resulting data will be correct.
|
||||
* If parities are wrong, the resulting recovered data will be wrong.
|
||||
* This happens even in the case you have more parities blocks than needed,
|
||||
* and some form of integrity verification would be possible.
|
||||
*
|
||||
* @nr Number of failed data and parity blocks to recover.
|
||||
* @ir[] Vector of @nr indexes of the failed data and parity blocks.
|
||||
* The indexes start from 0. They must be in order.
|
||||
* The first parity is represented with value @nd, the second with value
|
||||
* @nd + 1, just like positions in the @v vector.
|
||||
* @nd Number of data blocks.
|
||||
* @np Number of parity blocks.
|
||||
* @size Size of the blocks pointed by @v. It must be a multiplier of 64.
|
||||
* @v Vector of pointers to the blocks of data and parity.
|
||||
* It has (@nd + @np) elements. The starting elements are the blocks
|
||||
* for data, following with the parity blocks.
|
||||
* Each block has @size bytes.
|
||||
*/
|
||||
void raid_rec(int nr, int *ir, int nd, int np, size_t size, void **v);
|
||||
|
||||
/**
|
||||
* Recovers failures in data blocks only.
|
||||
*
|
||||
* This function recovers all the data blocks marked as bad in the @id vector.
|
||||
* The parity blocks are not modified.
|
||||
*
|
||||
* @nr Number of failed data blocks to recover.
|
||||
* @id[] Vector of @nr indexes of the data blocks to recover.
|
||||
* The indexes start from 0. They must be in order.
|
||||
* @ip[] Vector of @nr indexes of the parity blocks to use for recovering.
|
||||
* The indexes start from 0. They must be in order.
|
||||
* @nd Number of data blocks.
|
||||
* @size Size of the blocks pointed by @v. It must be a multiplier of 64.
|
||||
* @v Vector of pointers to the blocks of data and parity.
|
||||
* It has (@nd + @ip[@nr - 1] + 1) elements. The starting elements are the
|
||||
* blocks for data, following with the parity blocks.
|
||||
* Each blocks has @size bytes.
|
||||
*/
|
||||
void raid_data(int nr, int *id, int *ip, int nd, size_t size, void **v);
|
||||
|
||||
/**
|
||||
* Check the provided failed blocks combination.
|
||||
*
|
||||
* This function checks if the specified failed blocks combination satisfies
|
||||
* the redundancy information. A combination is assumed matching, if the
|
||||
* remaining valid parity is matching the expected value after recovering.
|
||||
*
|
||||
* The number of failed blocks @nr must be strictly less than the number of
|
||||
* parities @np, because you need one more parity to validate the recovering.
|
||||
*
|
||||
* No data or parity blocks are modified.
|
||||
*
|
||||
* @nr Number of failed data and parity blocks.
|
||||
* @ir[] Vector of @nr indexes of the failed data and parity blocks.
|
||||
* The indexes start from 0. They must be in order.
|
||||
* The first parity is represented with value @nd, the second with value
|
||||
* @nd + 1, just like positions in the @v vector.
|
||||
* @nd Number of data blocks.
|
||||
* @np Number of parity blocks.
|
||||
* @size Size of the blocks pointed by @v. It must be a multiplier of 64.
|
||||
* @v Vector of pointers to the blocks of data and parity.
|
||||
* It has (@nd + @np) elements. The starting elements are the blocks
|
||||
* for data, following with the parity blocks.
|
||||
* Each block has @size bytes.
|
||||
* @return 0 if the check is satisfied. -1 otherwise.
|
||||
*/
|
||||
int raid_check(int nr, int *ir, int nd, int np, size_t size, void **v);
|
||||
|
||||
/**
|
||||
* Scan for failed blocks.
|
||||
*
|
||||
* This function identifies the failed data and parity blocks using the
|
||||
* available redundancy.
|
||||
*
|
||||
* It uses a brute force method, and then the call can be expansive.
|
||||
* The expected execution time is proportional at the binomial coefficient
|
||||
* @np + @nd choose @np - 1, usually written as:
|
||||
*
|
||||
* ( @np + @nd )
|
||||
* ( )
|
||||
* ( @np - 1 )
|
||||
*
|
||||
* No data or parity blocks are modified.
|
||||
*
|
||||
* The failed block indexes are returned in the @ir vector.
|
||||
* It must have space for at least @np - 1 values.
|
||||
*
|
||||
* The returned @ir vector can then be used in a raid_rec() call to recover
|
||||
* the failed data and parity blocks.
|
||||
*
|
||||
* @ir[] Vector filled with the indexes of the failed data and parity blocks.
|
||||
* The indexes start from 0 and they are in order.
|
||||
* The first parity is represented with value @nd, the second with value
|
||||
* @nd + 1, just like positions in the @v vector.
|
||||
* @nd Number of data blocks.
|
||||
* @np Number of parity blocks.
|
||||
* @size Size of the blocks pointed by @v. It must be a multiplier of 64.
|
||||
* @v Vector of pointers to the blocks of data and parity.
|
||||
* It has (@nd + @np) elements. The starting elements are the blocks
|
||||
* for data, following with the parity blocks.
|
||||
* Each block has @size bytes.
|
||||
* @return Number of block indexes returned in the @ir vector.
|
||||
* 0 if no error is detected.
|
||||
* -1 if it's not possible to identify the failed disks.
|
||||
*/
|
||||
int raid_scan(int *ir, int nd, int np, size_t size, void **v);
|
||||
|
||||
#endif
|
||||
|
14696
raid/tables.c
Normal file
14696
raid/tables.c
Normal file
File diff suppressed because it is too large
Load Diff
145
raid/tag.c
Normal file
145
raid/tag.c
Normal file
@ -0,0 +1,145 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Andrea Mazzoleni
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
static struct raid_func {
|
||||
const char *name;
|
||||
void (*p)();
|
||||
} RAID_FUNC[] = {
|
||||
{ "int8", raid_gen3_int8 },
|
||||
{ "int8", raid_gen4_int8 },
|
||||
{ "int8", raid_gen5_int8 },
|
||||
{ "int8", raid_gen6_int8 },
|
||||
{ "int32", raid_gen1_int32 },
|
||||
{ "int64", raid_gen1_int64 },
|
||||
{ "int32", raid_gen2_int32 },
|
||||
{ "int64", raid_gen2_int64 },
|
||||
{ "int32", raid_genz_int32 },
|
||||
{ "int64", raid_genz_int64 },
|
||||
{ "int8", raid_rec1_int8 },
|
||||
{ "int8", raid_rec2_int8 },
|
||||
{ "int8", raid_recX_int8 },
|
||||
|
||||
#ifdef CONFIG_X86
|
||||
#ifdef CONFIG_SSE2
|
||||
{ "sse2", raid_gen1_sse2 },
|
||||
{ "sse2", raid_gen2_sse2 },
|
||||
{ "sse2", raid_genz_sse2 },
|
||||
#endif
|
||||
#ifdef CONFIG_SSSE3
|
||||
{ "ssse3", raid_gen3_ssse3 },
|
||||
{ "ssse3", raid_gen4_ssse3 },
|
||||
{ "ssse3", raid_gen5_ssse3 },
|
||||
{ "ssse3", raid_gen6_ssse3 },
|
||||
{ "ssse3", raid_rec1_ssse3 },
|
||||
{ "ssse3", raid_rec2_ssse3 },
|
||||
{ "ssse3", raid_recX_ssse3 },
|
||||
#endif
|
||||
#ifdef CONFIG_AVX2
|
||||
{ "avx2", raid_gen1_avx2 },
|
||||
{ "avx2", raid_gen2_avx2 },
|
||||
{ "avx2", raid_rec1_avx2 },
|
||||
{ "avx2", raid_rec2_avx2 },
|
||||
{ "avx2", raid_recX_avx2 },
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#ifdef CONFIG_SSE2
|
||||
{ "sse2e", raid_gen2_sse2ext },
|
||||
{ "sse2e", raid_genz_sse2ext },
|
||||
#endif
|
||||
#ifdef CONFIG_SSSE3
|
||||
{ "ssse3e", raid_gen3_ssse3ext },
|
||||
{ "ssse3e", raid_gen4_ssse3ext },
|
||||
{ "ssse3e", raid_gen5_ssse3ext },
|
||||
{ "ssse3e", raid_gen6_ssse3ext },
|
||||
#endif
|
||||
#ifdef CONFIG_AVX2
|
||||
{ "avx2e", raid_gen3_avx2ext },
|
||||
{ "avx2e", raid_genz_avx2ext },
|
||||
{ "avx2e", raid_gen4_avx2ext },
|
||||
{ "avx2e", raid_gen5_avx2ext },
|
||||
{ "avx2e", raid_gen6_avx2ext },
|
||||
#endif
|
||||
#endif
|
||||
{ 0, 0 }
|
||||
};
|
||||
|
||||
static const char *raid_tag(void (*func)())
|
||||
{
|
||||
struct raid_func *i = RAID_FUNC;
|
||||
|
||||
while (i->name != 0) {
|
||||
if (i->p == func)
|
||||
return i->name;
|
||||
++i;
|
||||
}
|
||||
|
||||
/* LCOV_EXCL_START */
|
||||
return "unknown";
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
const char *raid_gen1_tag(void)
|
||||
{
|
||||
return raid_tag(raid_gen_ptr[0]);
|
||||
}
|
||||
|
||||
const char *raid_gen2_tag(void)
|
||||
{
|
||||
return raid_tag(raid_gen_ptr[1]);
|
||||
}
|
||||
|
||||
const char *raid_genz_tag(void)
|
||||
{
|
||||
return raid_tag(raid_genz_ptr);
|
||||
}
|
||||
|
||||
const char *raid_gen3_tag(void)
|
||||
{
|
||||
return raid_tag(raid_gen_ptr[2]);
|
||||
}
|
||||
|
||||
const char *raid_gen4_tag(void)
|
||||
{
|
||||
return raid_tag(raid_gen_ptr[3]);
|
||||
}
|
||||
|
||||
const char *raid_gen5_tag(void)
|
||||
{
|
||||
return raid_tag(raid_gen_ptr[4]);
|
||||
}
|
||||
|
||||
const char *raid_gen6_tag(void)
|
||||
{
|
||||
return raid_tag(raid_gen_ptr[5]);
|
||||
}
|
||||
|
||||
const char *raid_rec1_tag(void)
|
||||
{
|
||||
return raid_tag(raid_rec_ptr[0]);
|
||||
}
|
||||
|
||||
const char *raid_rec2_tag(void)
|
||||
{
|
||||
return raid_tag(raid_rec_ptr[1]);
|
||||
}
|
||||
|
||||
const char *raid_recX_tag(void)
|
||||
{
|
||||
return raid_tag(raid_rec_ptr[2]);
|
||||
}
|
||||
|
452
raid/test.c
Normal file
452
raid/test.c
Normal file
@ -0,0 +1,452 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Andrea Mazzoleni
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include "internal.h"
|
||||
#include "cpu.h"
|
||||
#include "combo.h"
|
||||
#include "memory.h"
|
||||
|
||||
/**
|
||||
* Binomial coefficient of n over r.
|
||||
*/
|
||||
static int ibc(int n, int r)
|
||||
{
|
||||
if (r == 0 || n == r)
|
||||
return 1;
|
||||
else
|
||||
return ibc(n - 1, r - 1) + ibc(n - 1, r);
|
||||
}
|
||||
|
||||
/**
|
||||
* Power n ^ r;
|
||||
*/
|
||||
static int ipow(int n, int r)
|
||||
{
|
||||
int v = 1;
|
||||
|
||||
while (r) {
|
||||
v *= n;
|
||||
--r;
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
int raid_test_combo(void)
|
||||
{
|
||||
int r;
|
||||
int count;
|
||||
int p[RAID_PARITY_MAX];
|
||||
|
||||
for (r = 1; r <= RAID_PARITY_MAX; ++r) {
|
||||
/* count combination (r of RAID_PARITY_MAX) elements */
|
||||
count = 0;
|
||||
combination_first(r, RAID_PARITY_MAX, p);
|
||||
|
||||
do {
|
||||
++count;
|
||||
} while (combination_next(r, RAID_PARITY_MAX, p));
|
||||
|
||||
if (count != ibc(RAID_PARITY_MAX, r)) {
|
||||
/* LCOV_EXCL_START */
|
||||
return -1;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
}
|
||||
|
||||
for (r = 1; r <= RAID_PARITY_MAX; ++r) {
|
||||
/* count permutation (r of RAID_PARITY_MAX) elements */
|
||||
count = 0;
|
||||
permutation_first(r, RAID_PARITY_MAX, p);
|
||||
|
||||
do {
|
||||
++count;
|
||||
} while (permutation_next(r, RAID_PARITY_MAX, p));
|
||||
|
||||
if (count != ipow(RAID_PARITY_MAX, r)) {
|
||||
/* LCOV_EXCL_START */
|
||||
return -1;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int raid_test_insert(void)
|
||||
{
|
||||
int p[RAID_PARITY_MAX];
|
||||
int r;
|
||||
|
||||
for (r = 1; r <= RAID_PARITY_MAX; ++r) {
|
||||
permutation_first(r, RAID_PARITY_MAX, p);
|
||||
do {
|
||||
int i[RAID_PARITY_MAX];
|
||||
int j;
|
||||
|
||||
/* insert in order */
|
||||
for (j = 0; j < r; ++j)
|
||||
raid_insert(j, i, p[j]);
|
||||
|
||||
/* check order */
|
||||
for (j = 1; j < r; ++j) {
|
||||
if (i[j - 1] > i[j]) {
|
||||
/* LCOV_EXCL_START */
|
||||
return -1;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
}
|
||||
} while (permutation_next(r, RAID_PARITY_MAX, p));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int raid_test_sort(void)
|
||||
{
|
||||
int p[RAID_PARITY_MAX];
|
||||
int r;
|
||||
|
||||
for (r = 1; r <= RAID_PARITY_MAX; ++r) {
|
||||
permutation_first(r, RAID_PARITY_MAX, p);
|
||||
do {
|
||||
int i[RAID_PARITY_MAX];
|
||||
int j;
|
||||
|
||||
/* make a copy */
|
||||
for (j = 0; j < r; ++j)
|
||||
i[j] = p[j];
|
||||
|
||||
raid_sort(r, i);
|
||||
|
||||
/* check order */
|
||||
for (j = 1; j < r; ++j) {
|
||||
if (i[j - 1] > i[j]) {
|
||||
/* LCOV_EXCL_START */
|
||||
return -1;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
}
|
||||
} while (permutation_next(r, RAID_PARITY_MAX, p));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int raid_test_rec(int mode, int nd, size_t size)
|
||||
{
|
||||
void (*f[RAID_PARITY_MAX][4])(
|
||||
int nr, int *id, int *ip, int nd, size_t size, void **vbuf);
|
||||
void *v_alloc;
|
||||
void **v;
|
||||
void **data;
|
||||
void **parity;
|
||||
void **test;
|
||||
void *data_save[RAID_PARITY_MAX];
|
||||
void *parity_save[RAID_PARITY_MAX];
|
||||
void *waste;
|
||||
int nv;
|
||||
int id[RAID_PARITY_MAX];
|
||||
int ip[RAID_PARITY_MAX];
|
||||
int i;
|
||||
int j;
|
||||
int nr;
|
||||
int nf[RAID_PARITY_MAX];
|
||||
int np;
|
||||
|
||||
raid_mode(mode);
|
||||
if (mode == RAID_MODE_CAUCHY)
|
||||
np = RAID_PARITY_MAX;
|
||||
else
|
||||
np = 3;
|
||||
|
||||
nv = nd + np * 2 + 2;
|
||||
|
||||
v = raid_malloc_vector(nd, nv, size, &v_alloc);
|
||||
if (!v) {
|
||||
/* LCOV_EXCL_START */
|
||||
return -1;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
data = v;
|
||||
parity = v + nd;
|
||||
test = v + nd + np;
|
||||
|
||||
for (i = 0; i < np; ++i)
|
||||
parity_save[i] = parity[i];
|
||||
|
||||
memset(v[nv - 2], 0, size);
|
||||
raid_zero(v[nv - 2]);
|
||||
|
||||
waste = v[nv - 1];
|
||||
|
||||
/* fill with pseudo-random data with the arbitrary seed "1" */
|
||||
raid_mrand_vector(1, nd, size, v);
|
||||
|
||||
/* setup recov functions */
|
||||
for (i = 0; i < np; ++i) {
|
||||
nf[i] = 0;
|
||||
if (i == 0) {
|
||||
f[i][nf[i]++] = raid_rec1_int8;
|
||||
#ifdef CONFIG_X86
|
||||
#ifdef CONFIG_SSSE3
|
||||
if (raid_cpu_has_ssse3())
|
||||
f[i][nf[i]++] = raid_rec1_ssse3;
|
||||
#endif
|
||||
#ifdef CONFIG_AVX2
|
||||
if (raid_cpu_has_avx2())
|
||||
f[i][nf[i]++] = raid_rec1_avx2;
|
||||
#endif
|
||||
#endif
|
||||
} else if (i == 1) {
|
||||
f[i][nf[i]++] = raid_rec2_int8;
|
||||
#ifdef CONFIG_X86
|
||||
#ifdef CONFIG_SSSE3
|
||||
if (raid_cpu_has_ssse3())
|
||||
f[i][nf[i]++] = raid_rec2_ssse3;
|
||||
#endif
|
||||
#ifdef CONFIG_AVX2
|
||||
if (raid_cpu_has_avx2())
|
||||
f[i][nf[i]++] = raid_rec2_avx2;
|
||||
#endif
|
||||
#endif
|
||||
} else {
|
||||
f[i][nf[i]++] = raid_recX_int8;
|
||||
#ifdef CONFIG_X86
|
||||
#ifdef CONFIG_SSSE3
|
||||
if (raid_cpu_has_ssse3())
|
||||
f[i][nf[i]++] = raid_recX_ssse3;
|
||||
#endif
|
||||
#ifdef CONFIG_AVX2
|
||||
if (raid_cpu_has_avx2())
|
||||
f[i][nf[i]++] = raid_recX_avx2;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/* compute the parity */
|
||||
raid_gen_ref(nd, np, size, v);
|
||||
|
||||
/* set all the parity to the waste v */
|
||||
for (i = 0; i < np; ++i)
|
||||
parity[i] = waste;
|
||||
|
||||
/* all parity levels */
|
||||
for (nr = 1; nr <= np; ++nr) {
|
||||
/* all combinations (nr of nd) disks */
|
||||
combination_first(nr, nd, id);
|
||||
do {
|
||||
/* all combinations (nr of np) parities */
|
||||
combination_first(nr, np, ip);
|
||||
do {
|
||||
/* for each recover function */
|
||||
for (j = 0; j < nf[nr - 1]; ++j) {
|
||||
/* set */
|
||||
for (i = 0; i < nr; ++i) {
|
||||
/* remove the missing data */
|
||||
data_save[i] = data[id[i]];
|
||||
data[id[i]] = test[i];
|
||||
/* set the parity to use */
|
||||
parity[ip[i]] = parity_save[ip[i]];
|
||||
}
|
||||
|
||||
/* recover */
|
||||
f[nr - 1][j](nr, id, ip, nd, size, v);
|
||||
|
||||
/* check */
|
||||
for (i = 0; i < nr; ++i) {
|
||||
if (memcmp(test[i], data_save[i], size) != 0) {
|
||||
/* LCOV_EXCL_START */
|
||||
goto bail;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
}
|
||||
|
||||
/* restore */
|
||||
for (i = 0; i < nr; ++i) {
|
||||
/* restore the data */
|
||||
data[id[i]] = data_save[i];
|
||||
/* restore the parity */
|
||||
parity[ip[i]] = waste;
|
||||
}
|
||||
}
|
||||
} while (combination_next(nr, np, ip));
|
||||
} while (combination_next(nr, nd, id));
|
||||
}
|
||||
|
||||
free(v_alloc);
|
||||
free(v);
|
||||
return 0;
|
||||
|
||||
bail:
|
||||
/* LCOV_EXCL_START */
|
||||
free(v_alloc);
|
||||
free(v);
|
||||
return -1;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
int raid_test_par(int mode, int nd, size_t size)
|
||||
{
|
||||
void (*f[64])(int nd, size_t size, void **vbuf);
|
||||
void *v_alloc;
|
||||
void **v;
|
||||
int nv;
|
||||
int i, j;
|
||||
int nf;
|
||||
int np;
|
||||
|
||||
raid_mode(mode);
|
||||
if (mode == RAID_MODE_CAUCHY)
|
||||
np = RAID_PARITY_MAX;
|
||||
else
|
||||
np = 3;
|
||||
|
||||
nv = nd + np * 2;
|
||||
|
||||
v = raid_malloc_vector(nd, nv, size, &v_alloc);
|
||||
if (!v) {
|
||||
/* LCOV_EXCL_START */
|
||||
return -1;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
/* check memory */
|
||||
if (raid_mtest_vector(nv, size, v) != 0) {
|
||||
/* LCOV_EXCL_START */
|
||||
goto bail;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
/* fill with pseudo-random data with the arbitrary seed "2" */
|
||||
raid_mrand_vector(2, nv, size, v);
|
||||
|
||||
/* compute the parity */
|
||||
raid_gen_ref(nd, np, size, v);
|
||||
|
||||
/* copy in back buffers */
|
||||
for (i = 0; i < np; ++i)
|
||||
memcpy(v[nd + np + i], v[nd + i], size);
|
||||
|
||||
/* load all the available functions */
|
||||
nf = 0;
|
||||
|
||||
f[nf++] = raid_gen1_int32;
|
||||
f[nf++] = raid_gen1_int64;
|
||||
f[nf++] = raid_gen2_int32;
|
||||
f[nf++] = raid_gen2_int64;
|
||||
|
||||
#ifdef CONFIG_X86
|
||||
#ifdef CONFIG_SSE2
|
||||
if (raid_cpu_has_sse2()) {
|
||||
f[nf++] = raid_gen1_sse2;
|
||||
f[nf++] = raid_gen2_sse2;
|
||||
#ifdef CONFIG_X86_64
|
||||
f[nf++] = raid_gen2_sse2ext;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_AVX2
|
||||
if (raid_cpu_has_avx2()) {
|
||||
f[nf++] = raid_gen1_avx2;
|
||||
f[nf++] = raid_gen2_avx2;
|
||||
}
|
||||
#endif
|
||||
#endif /* CONFIG_X86 */
|
||||
|
||||
if (mode == RAID_MODE_CAUCHY) {
|
||||
f[nf++] = raid_gen3_int8;
|
||||
f[nf++] = raid_gen4_int8;
|
||||
f[nf++] = raid_gen5_int8;
|
||||
f[nf++] = raid_gen6_int8;
|
||||
|
||||
#ifdef CONFIG_X86
|
||||
#ifdef CONFIG_SSSE3
|
||||
if (raid_cpu_has_ssse3()) {
|
||||
f[nf++] = raid_gen3_ssse3;
|
||||
f[nf++] = raid_gen4_ssse3;
|
||||
f[nf++] = raid_gen5_ssse3;
|
||||
f[nf++] = raid_gen6_ssse3;
|
||||
#ifdef CONFIG_X86_64
|
||||
f[nf++] = raid_gen3_ssse3ext;
|
||||
f[nf++] = raid_gen4_ssse3ext;
|
||||
f[nf++] = raid_gen5_ssse3ext;
|
||||
f[nf++] = raid_gen6_ssse3ext;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_AVX2
|
||||
#ifdef CONFIG_X86_64
|
||||
if (raid_cpu_has_avx2()) {
|
||||
f[nf++] = raid_gen3_avx2ext;
|
||||
f[nf++] = raid_gen4_avx2ext;
|
||||
f[nf++] = raid_gen5_avx2ext;
|
||||
f[nf++] = raid_gen6_avx2ext;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
#endif /* CONFIG_X86 */
|
||||
} else {
|
||||
f[nf++] = raid_genz_int32;
|
||||
f[nf++] = raid_genz_int64;
|
||||
|
||||
#ifdef CONFIG_X86
|
||||
#ifdef CONFIG_SSE2
|
||||
if (raid_cpu_has_sse2()) {
|
||||
f[nf++] = raid_genz_sse2;
|
||||
#ifdef CONFIG_X86_64
|
||||
f[nf++] = raid_genz_sse2ext;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_AVX2
|
||||
#ifdef CONFIG_X86_64
|
||||
if (raid_cpu_has_avx2())
|
||||
f[nf++] = raid_genz_avx2ext;
|
||||
#endif
|
||||
#endif
|
||||
#endif /* CONFIG_X86 */
|
||||
}
|
||||
|
||||
/* check all the functions */
|
||||
for (j = 0; j < nf; ++j) {
|
||||
/* compute parity */
|
||||
f[j](nd, size, v);
|
||||
|
||||
/* check it */
|
||||
for (i = 0; i < np; ++i) {
|
||||
if (memcmp(v[nd + np + i], v[nd + i], size) != 0) {
|
||||
/* LCOV_EXCL_START */
|
||||
goto bail;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(v_alloc);
|
||||
free(v);
|
||||
return 0;
|
||||
|
||||
bail:
|
||||
/* LCOV_EXCL_START */
|
||||
free(v_alloc);
|
||||
free(v);
|
||||
return -1;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
68
raid/test.h
Normal file
68
raid/test.h
Normal file
@ -0,0 +1,68 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Andrea Mazzoleni
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#ifndef __RAID_TEST_H
|
||||
#define __RAID_TEST_H
|
||||
|
||||
/**
|
||||
* Tests insertion function.
|
||||
*
|
||||
* Test raid_insert() with all the possible combinations of elements to insert.
|
||||
*
|
||||
* Returns 0 on success.
|
||||
*/
|
||||
int raid_test_insert(void);
|
||||
|
||||
/**
|
||||
* Tests sorting function.
|
||||
*
|
||||
* Test raid_sort() with all the possible combinations of elements to sort.
|
||||
*
|
||||
* Returns 0 on success.
|
||||
*/
|
||||
int raid_test_sort(void);
|
||||
|
||||
/**
|
||||
* Tests combination functions.
|
||||
*
|
||||
* Tests combination_first() and combination_next() for all the parity levels.
|
||||
*
|
||||
* Returns 0 on success.
|
||||
*/
|
||||
int raid_test_combo(void);
|
||||
|
||||
/**
|
||||
* Tests recovering functions.
|
||||
*
|
||||
* All the recovering functions are tested with all the combinations
|
||||
* of failing disks and recovering parities.
|
||||
*
|
||||
* Take care that the test time grows exponentially with the number of disks.
|
||||
*
|
||||
* Returns 0 on success.
|
||||
*/
|
||||
int raid_test_rec(unsigned mode, int nd, size_t size);
|
||||
|
||||
/**
|
||||
* Tests parity generation functions.
|
||||
*
|
||||
* All the parity generation functions are tested with the specified
|
||||
* number of disks.
|
||||
*
|
||||
* Returns 0 on success.
|
||||
*/
|
||||
int raid_test_par(unsigned mode, int nd, size_t size);
|
||||
|
||||
#endif
|
||||
|
2452
raid/x86.c
Normal file
2452
raid/x86.c
Normal file
File diff suppressed because it is too large
Load Diff
255
raid/x86z.c
Normal file
255
raid/x86z.c
Normal file
@ -0,0 +1,255 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Andrea Mazzoleni
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
#if defined(CONFIG_X86) && defined(CONFIG_SSE2)
|
||||
static const struct gfzconst16 {
|
||||
uint8_t poly[16];
|
||||
uint8_t half[16];
|
||||
uint8_t low7[16];
|
||||
} gfzconst16 __aligned(64) =
|
||||
{
|
||||
{
|
||||
0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
|
||||
0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d
|
||||
},
|
||||
{
|
||||
0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e,
|
||||
0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e
|
||||
},
|
||||
{
|
||||
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_X86) && defined(CONFIG_SSE2)
|
||||
/*
|
||||
* GENz (triple parity with powers of 2^-1) SSE2 implementation
|
||||
*/
|
||||
void raid_genz_sse2(int nd, size_t size, void **vv)
|
||||
{
|
||||
uint8_t **v = (uint8_t**)vv;
|
||||
uint8_t *p;
|
||||
uint8_t *q;
|
||||
uint8_t *r;
|
||||
int d, l;
|
||||
size_t i;
|
||||
|
||||
l = nd - 1;
|
||||
p = v[nd];
|
||||
q = v[nd + 1];
|
||||
r = v[nd + 2];
|
||||
|
||||
raid_sse_begin();
|
||||
|
||||
asm volatile ("movdqa %0,%%xmm7" : : "m" (gfzconst16.poly[0]));
|
||||
asm volatile ("movdqa %0,%%xmm3" : : "m" (gfzconst16.half[0]));
|
||||
asm volatile ("movdqa %0,%%xmm6" : : "m" (gfzconst16.low7[0]));
|
||||
|
||||
for (i = 0; i < size; i += 16) {
|
||||
asm volatile ("movdqa %0,%%xmm0" : : "m" (v[l][i]));
|
||||
asm volatile ("movdqa %xmm0,%xmm1");
|
||||
asm volatile ("movdqa %xmm0,%xmm2");
|
||||
for (d = l - 1; d >= 0; --d) {
|
||||
asm volatile ("pxor %xmm4,%xmm4");
|
||||
asm volatile ("pcmpgtb %xmm1,%xmm4");
|
||||
asm volatile ("paddb %xmm1,%xmm1");
|
||||
asm volatile ("pand %xmm7,%xmm4");
|
||||
asm volatile ("pxor %xmm4,%xmm1");
|
||||
|
||||
asm volatile ("movdqa %xmm2,%xmm4");
|
||||
asm volatile ("pxor %xmm5,%xmm5");
|
||||
asm volatile ("psllw $7,%xmm4");
|
||||
asm volatile ("psrlw $1,%xmm2");
|
||||
asm volatile ("pcmpgtb %xmm4,%xmm5");
|
||||
asm volatile ("pand %xmm6,%xmm2");
|
||||
asm volatile ("pand %xmm3,%xmm5");
|
||||
asm volatile ("pxor %xmm5,%xmm2");
|
||||
|
||||
asm volatile ("movdqa %0,%%xmm4" : : "m" (v[d][i]));
|
||||
asm volatile ("pxor %xmm4,%xmm0");
|
||||
asm volatile ("pxor %xmm4,%xmm1");
|
||||
asm volatile ("pxor %xmm4,%xmm2");
|
||||
}
|
||||
asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
|
||||
asm volatile ("movntdq %%xmm1,%0" : "=m" (q[i]));
|
||||
asm volatile ("movntdq %%xmm2,%0" : "=m" (r[i]));
|
||||
}
|
||||
|
||||
raid_sse_end();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_X86_64) && defined(CONFIG_SSE2)
|
||||
/*
|
||||
* GENz (triple parity with powers of 2^-1) SSE2 implementation
|
||||
*
|
||||
* Note that it uses 16 registers, meaning that x64 is required.
|
||||
*/
|
||||
void raid_genz_sse2ext(int nd, size_t size, void **vv)
|
||||
{
|
||||
uint8_t **v = (uint8_t**)vv;
|
||||
uint8_t *p;
|
||||
uint8_t *q;
|
||||
uint8_t *r;
|
||||
int d, l;
|
||||
size_t i;
|
||||
|
||||
l = nd - 1;
|
||||
p = v[nd];
|
||||
q = v[nd + 1];
|
||||
r = v[nd + 2];
|
||||
|
||||
raid_sse_begin();
|
||||
|
||||
asm volatile ("movdqa %0,%%xmm7" : : "m" (gfzconst16.poly[0]));
|
||||
asm volatile ("movdqa %0,%%xmm3" : : "m" (gfzconst16.half[0]));
|
||||
asm volatile ("movdqa %0,%%xmm11" : : "m" (gfzconst16.low7[0]));
|
||||
|
||||
for (i = 0; i < size; i += 32) {
|
||||
asm volatile ("movdqa %0,%%xmm0" : : "m" (v[l][i]));
|
||||
asm volatile ("movdqa %0,%%xmm8" : : "m" (v[l][i + 16]));
|
||||
asm volatile ("movdqa %xmm0,%xmm1");
|
||||
asm volatile ("movdqa %xmm8,%xmm9");
|
||||
asm volatile ("movdqa %xmm0,%xmm2");
|
||||
asm volatile ("movdqa %xmm8,%xmm10");
|
||||
for (d = l - 1; d >= 0; --d) {
|
||||
asm volatile ("movdqa %xmm2,%xmm6");
|
||||
asm volatile ("movdqa %xmm10,%xmm14");
|
||||
asm volatile ("pxor %xmm4,%xmm4");
|
||||
asm volatile ("pxor %xmm12,%xmm12");
|
||||
asm volatile ("pxor %xmm5,%xmm5");
|
||||
asm volatile ("pxor %xmm13,%xmm13");
|
||||
asm volatile ("psllw $7,%xmm6");
|
||||
asm volatile ("psllw $7,%xmm14");
|
||||
asm volatile ("psrlw $1,%xmm2");
|
||||
asm volatile ("psrlw $1,%xmm10");
|
||||
asm volatile ("pcmpgtb %xmm1,%xmm4");
|
||||
asm volatile ("pcmpgtb %xmm9,%xmm12");
|
||||
asm volatile ("pcmpgtb %xmm6,%xmm5");
|
||||
asm volatile ("pcmpgtb %xmm14,%xmm13");
|
||||
asm volatile ("paddb %xmm1,%xmm1");
|
||||
asm volatile ("paddb %xmm9,%xmm9");
|
||||
asm volatile ("pand %xmm11,%xmm2");
|
||||
asm volatile ("pand %xmm11,%xmm10");
|
||||
asm volatile ("pand %xmm7,%xmm4");
|
||||
asm volatile ("pand %xmm7,%xmm12");
|
||||
asm volatile ("pand %xmm3,%xmm5");
|
||||
asm volatile ("pand %xmm3,%xmm13");
|
||||
asm volatile ("pxor %xmm4,%xmm1");
|
||||
asm volatile ("pxor %xmm12,%xmm9");
|
||||
asm volatile ("pxor %xmm5,%xmm2");
|
||||
asm volatile ("pxor %xmm13,%xmm10");
|
||||
|
||||
asm volatile ("movdqa %0,%%xmm4" : : "m" (v[d][i]));
|
||||
asm volatile ("movdqa %0,%%xmm12" : : "m" (v[d][i + 16]));
|
||||
asm volatile ("pxor %xmm4,%xmm0");
|
||||
asm volatile ("pxor %xmm4,%xmm1");
|
||||
asm volatile ("pxor %xmm4,%xmm2");
|
||||
asm volatile ("pxor %xmm12,%xmm8");
|
||||
asm volatile ("pxor %xmm12,%xmm9");
|
||||
asm volatile ("pxor %xmm12,%xmm10");
|
||||
}
|
||||
asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
|
||||
asm volatile ("movntdq %%xmm8,%0" : "=m" (p[i + 16]));
|
||||
asm volatile ("movntdq %%xmm1,%0" : "=m" (q[i]));
|
||||
asm volatile ("movntdq %%xmm9,%0" : "=m" (q[i + 16]));
|
||||
asm volatile ("movntdq %%xmm2,%0" : "=m" (r[i]));
|
||||
asm volatile ("movntdq %%xmm10,%0" : "=m" (r[i + 16]));
|
||||
}
|
||||
|
||||
raid_sse_end();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_X86_64) && defined(CONFIG_AVX2)
|
||||
/*
|
||||
* GENz (triple parity with powers of 2^-1) AVX2 implementation
|
||||
*
|
||||
* Note that it uses 16 registers, meaning that x64 is required.
|
||||
*/
|
||||
void raid_genz_avx2ext(int nd, size_t size, void **vv)
|
||||
{
|
||||
uint8_t **v = (uint8_t**)vv;
|
||||
uint8_t *p;
|
||||
uint8_t *q;
|
||||
uint8_t *r;
|
||||
int d, l;
|
||||
size_t i;
|
||||
|
||||
l = nd - 1;
|
||||
p = v[nd];
|
||||
q = v[nd + 1];
|
||||
r = v[nd + 2];
|
||||
|
||||
raid_avx_begin();
|
||||
|
||||
asm volatile ("vbroadcasti128 %0,%%ymm7" : : "m" (gfzconst16.poly[0]));
|
||||
asm volatile ("vbroadcasti128 %0,%%ymm3" : : "m" (gfzconst16.half[0]));
|
||||
asm volatile ("vbroadcasti128 %0,%%ymm11" : : "m" (gfzconst16.low7[0]));
|
||||
asm volatile ("vpxor %ymm15,%ymm15,%ymm15");
|
||||
|
||||
for (i = 0; i < size; i += 64) {
|
||||
asm volatile ("vmovdqa %0,%%ymm0" : : "m" (v[l][i]));
|
||||
asm volatile ("vmovdqa %0,%%ymm8" : : "m" (v[l][i + 32]));
|
||||
asm volatile ("vmovdqa %ymm0,%ymm1");
|
||||
asm volatile ("vmovdqa %ymm8,%ymm9");
|
||||
asm volatile ("vmovdqa %ymm0,%ymm2");
|
||||
asm volatile ("vmovdqa %ymm8,%ymm10");
|
||||
for (d = l - 1; d >= 0; --d) {
|
||||
asm volatile ("vpsllw $7,%ymm2,%ymm6");
|
||||
asm volatile ("vpsllw $7,%ymm10,%ymm14");
|
||||
asm volatile ("vpsrlw $1,%ymm2,%ymm2");
|
||||
asm volatile ("vpsrlw $1,%ymm10,%ymm10");
|
||||
asm volatile ("vpcmpgtb %ymm1,%ymm15,%ymm4");
|
||||
asm volatile ("vpcmpgtb %ymm9,%ymm15,%ymm12");
|
||||
asm volatile ("vpcmpgtb %ymm6,%ymm15,%ymm5");
|
||||
asm volatile ("vpcmpgtb %ymm14,%ymm15,%ymm13");
|
||||
asm volatile ("vpaddb %ymm1,%ymm1,%ymm1");
|
||||
asm volatile ("vpaddb %ymm9,%ymm9,%ymm9");
|
||||
asm volatile ("vpand %ymm11,%ymm2,%ymm2");
|
||||
asm volatile ("vpand %ymm11,%ymm10,%ymm10");
|
||||
asm volatile ("vpand %ymm7,%ymm4,%ymm4");
|
||||
asm volatile ("vpand %ymm7,%ymm12,%ymm12");
|
||||
asm volatile ("vpand %ymm3,%ymm5,%ymm5");
|
||||
asm volatile ("vpand %ymm3,%ymm13,%ymm13");
|
||||
asm volatile ("vpxor %ymm4,%ymm1,%ymm1");
|
||||
asm volatile ("vpxor %ymm12,%ymm9,%ymm9");
|
||||
asm volatile ("vpxor %ymm5,%ymm2,%ymm2");
|
||||
asm volatile ("vpxor %ymm13,%ymm10,%ymm10");
|
||||
|
||||
asm volatile ("vmovdqa %0,%%ymm4" : : "m" (v[d][i]));
|
||||
asm volatile ("vmovdqa %0,%%ymm12" : : "m" (v[d][i + 32]));
|
||||
asm volatile ("vpxor %ymm4,%ymm0,%ymm0");
|
||||
asm volatile ("vpxor %ymm4,%ymm1,%ymm1");
|
||||
asm volatile ("vpxor %ymm4,%ymm2,%ymm2");
|
||||
asm volatile ("vpxor %ymm12,%ymm8,%ymm8");
|
||||
asm volatile ("vpxor %ymm12,%ymm9,%ymm9");
|
||||
asm volatile ("vpxor %ymm12,%ymm10,%ymm10");
|
||||
}
|
||||
asm volatile ("vmovntdq %%ymm0,%0" : "=m" (p[i]));
|
||||
asm volatile ("vmovntdq %%ymm8,%0" : "=m" (p[i + 32]));
|
||||
asm volatile ("vmovntdq %%ymm1,%0" : "=m" (q[i]));
|
||||
asm volatile ("vmovntdq %%ymm9,%0" : "=m" (q[i + 32]));
|
||||
asm volatile ("vmovntdq %%ymm2,%0" : "=m" (r[i]));
|
||||
asm volatile ("vmovntdq %%ymm10,%0" : "=m" (r[i + 32]));
|
||||
}
|
||||
|
||||
raid_avx_end();
|
||||
}
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user