matrixmultiply/
lib.rs

1// Copyright 2016 - 2023 Ulrik Sverdrup "bluss"
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8//!
9//! General matrix multiplication for f32, f64, and complex matrices. Operates on
10//! matrices with general layout (they can use arbitrary row and column stride).
11//!
12//! This crate uses the same macro/microkernel approach to matrix multiplication as
13//! the [BLIS][bl] project.
14//!
15//! We presently provide a few good microkernels, portable and for x86-64 and
16//! AArch64 NEON, and only one operation: the general matrix-matrix
17//! multiplication (“gemm”).
18//!
19//! [bl]: https://github.com/flame/blis
20//!
21//! ## Matrix Representation
22//!
23//! **matrixmultiply** supports matrices with general stride, so a matrix
24//! is passed using a pointer and four integers:
25//!
26//! - `a: *const f32`, pointer to the first element in the matrix
27//! - `m: usize`, number of rows
28//! - `k: usize`, number of columns
29//! - `rsa: isize`, row stride
30//! - `csa: isize`, column stride
31//!
32//! In this example, A is a m by k matrix. `a` is a pointer to the element at
33//! index *0, 0*.
34//!
35//! The *row stride* is the pointer offset (in number of elements) to the
36//! element on the next row. It’s the distance from element *i, j* to *i + 1,
37//! j*.
38//!
39//! The *column stride* is the pointer offset (in number of elements) to the
40//! element in the next column. It’s the distance from element *i, j* to *i,
41//! j + 1*.
42//!
43//! For example for a contiguous matrix, row major strides are *rsa=k,
44//! csa=1* and column major strides are *rsa=1, csa=m*.
45//!
46//! Strides can be negative or even zero, but for a mutable matrix elements
47//! may not alias each other.
48//!
49//! ## Portability and Performance
50//!
51//! - The default kernels are written in portable Rust and available
52//!   on all targets. These may depend on autovectorization to perform well.
53//!
54//! - *x86* and *x86-64* features can be detected at runtime by default or
55//!   compile time (if enabled), and the following kernel variants are
56//!   implemented:
57//!
58//!   - `fma`
59//!   - `avx`
60//!   - `sse2`
61//!
62//! - *aarch64* features can be detected at runtime by default or compile time
63//!   (if enabled), and the following kernel variants are implemented:
64//!
65//!   - `neon`
66//!
67//! ## Features
68//!
69//! ### `std`
70//!
71//! `std` is enabled by default.
72//!
73//! This crate can be used without the standard library (`#![no_std]`) by
74//! disabling the default `std` feature. To do so, use this in your
75//! `Cargo.toml`:
76//!
77//! ```toml
78//! matrixmultiply = { version = "0.3", default-features = false }
79//! ```
80//!
81//! Runtime CPU feature detection is available **only** when `std` is enabled.
82//! Without the `std` feature, the crate uses special CPU features only if they
83//! are enabled at compile time. (To enable CPU features at compile time, pass
84//! the relevant
85//! [`target-cpu`](https://doc.rust-lang.org/rustc/codegen-options/index.html#target-cpu)
86//! or
87//! [`target-feature`](https://doc.rust-lang.org/rustc/codegen-options/index.html#target-feature)
88//! option to `rustc`.)
89//!
90//! ### `threading`
91//!
92//! `threading` is an optional crate feature
93//!
94//! Threading enables multithreading for the operations. The environment variable
95//! `MATMUL_NUM_THREADS` decides how many threads are used at maximum. At the moment 1-4 are
96//! supported and the default is the number of physical cpus (as detected by `num_cpus`).
97//!
98//! ### `cgemm`
99//!
100//! `cgemm` is an optional crate feature.
101//!
102//! It enables the `cgemm` and `zgemm` methods for complex matrix multiplication.
103//! This is an **experimental feature** and not yet as performant as the float kernels on x86.
104//!
105//! The complex representation we use is `[f64; 2]`.
106//!
107//! ### `constconf`
108//!
109//! `constconf` is an optional feature. When enabled, cache-sensitive parameters of
110//! the gemm implementations can be tweaked *at compile time* by defining the following variables:
111//!
112//! - `MATMUL_SGEMM_MC`
113//!   (And so on, for S, D, C, ZGEMM and with NC, KC or MC).
114//!
115//! ## Other Notes
116//!
117//! The functions in this crate are thread safe, as long as the destination
118//! matrix is distinct.
119//!
120//! ## Rust Version
121//!
122//! This version requires Rust 1.41.1 or later; the crate follows a carefully
123//! considered upgrade policy, where updating the minimum Rust version is not a breaking
124//! change.
125//!
126//! Some features are enabled with later versions: from Rust 1.61 AArch64 NEON support.
127
128#![doc(html_root_url = "https://docs.rs/matrixmultiply/0.3/")]
129#![cfg_attr(not(feature = "std"), no_std)]
130
131#[cfg(not(feature = "std"))]
132extern crate alloc;
133#[cfg(feature = "std")]
134extern crate core;
135
136#[macro_use]
137mod debugmacros;
138#[macro_use]
139mod loopmacros;
140
141mod archparam_defaults;
142
143#[cfg(feature = "constconf")]
144mod archparam;
145#[cfg(feature = "constconf")]
146mod constparse;
147
148#[cfg(not(feature = "constconf"))]
149pub(crate) use archparam_defaults as archparam;
150
151mod gemm;
152mod kernel;
153mod packing;
154mod ptr;
155mod threading;
156
157mod aligned_alloc;
158mod util;
159
160#[macro_use]
161mod archmacros;
162#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
163#[macro_use]
164mod x86;
165#[cfg(any(target_arch = "aarch64"))]
166#[macro_use]
167mod aarch64;
168
169mod dgemm_kernel;
170mod sgemm_kernel;
171
172pub use crate::gemm::dgemm;
173pub use crate::gemm::sgemm;
174
175#[cfg(feature = "cgemm")]
176#[macro_use]
177mod cgemm_common;
178#[cfg(feature = "cgemm")]
179mod cgemm_kernel;
180#[cfg(feature = "cgemm")]
181mod zgemm_kernel;
182
183#[cfg(feature = "cgemm")]
184pub use crate::gemm::cgemm;
185#[cfg(feature = "cgemm")]
186pub use crate::gemm::zgemm;
187#[cfg(feature = "cgemm")]
188pub use crate::gemm::CGemmOption;