matrixmultiply/
archparam_defaults.rs

1// Copyright 2016 - 2018 Ulrik Sverdrup "bluss"
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8//! architechture specific parameters
9
10/// Columns in C, B that we handle at a time. (5th loop)
11///
12/// Cuts B into B0, B1, .. Bj, .. B_NC
13pub const S_NC: usize = 1024;
14//pub const S_NC: usize = option_env!("MATMUL_SGEMM_NC").map(parse_unwrap).unwrap_or(S_NC);
15
16/// Rows of Bj at a time (4th loop)
17///
18/// Columns of A at a time.
19///
20/// Cuts A into Ap
21///
22/// Cuts Bj into Bp, which is packed into B~.
23///
24/// Size of B~ is NC x KC
25pub const S_KC: usize = 256;
26
27/// Rows of Ap at a time. (3rd loop)
28///
29/// Cuts Ap into A0, A1, .., Ai, .. A_MC
30///
31/// Ai is packed into A~.
32///
33/// Size of A~ is KC x MC
34pub const S_MC: usize = 64;
35
36/// Columns in C, B that we handle at a time. (5th loop)
37///
38/// Cuts B into B0, B1, .. Bj, .. B_NC
39pub const D_NC: usize = 1024;
40
41/// Rows of Bj at a time (4th loop)
42///
43/// Columns of A at a time.
44///
45/// Cuts A into Ap
46///
47/// Cuts Bj into Bp, which is packed into B~.
48///
49/// Size of B~ is NC x KC
50pub const D_KC: usize = 256;
51
52/// Rows of Ap at a time. (3rd loop)
53///
54/// Cuts Ap into A0, A1, .., Ai, .. A_MC
55///
56/// Ai is packed into A~.
57///
58/// Size of A~ is KC x MC
59pub const D_MC: usize = 64;
60
61#[cfg(feature = "cgemm")]
62/// Columns in C, B that we handle at a time. (5th loop)
63///
64/// Cuts B into B0, B1, .. Bj, .. B_NC
65pub const C_NC: usize = S_NC / 2;
66
67#[cfg(feature = "cgemm")]
68/// Rows of Bj at a time (4th loop)
69///
70/// Columns of A at a time.
71///
72/// Cuts A into Ap
73///
74/// Cuts Bj into Bp, which is packed into B~.
75///
76/// Size of B~ is NC x KC
77pub const C_KC: usize = S_KC;
78
79#[cfg(feature = "cgemm")]
80/// Rows of Ap at a time. (3rd loop)
81///
82/// Cuts Ap into A0, A1, .., Ai, .. A_MC
83///
84/// Ai is packed into A~.
85///
86/// Size of A~ is KC x MC
87pub const C_MC: usize = S_MC / 2;
88
89#[cfg(feature = "cgemm")]
90/// Columns in C, B that we handle at a time. (5th loop)
91///
92/// Cuts B into B0, B1, .. Bj, .. B_NC
93pub const Z_NC: usize = D_NC / 2;
94
95#[cfg(feature = "cgemm")]
96/// Rows of Bj at a time (4th loop)
97///
98/// Columns of A at a time.
99///
100/// Cuts A into Ap
101///
102/// Cuts Bj into Bp, which is packed into B~.
103///
104/// Size of B~ is NC x KC
105pub const Z_KC: usize = D_KC;
106
107#[cfg(feature = "cgemm")]
108/// Rows of Ap at a time. (3rd loop)
109///
110/// Cuts Ap into A0, A1, .., Ai, .. A_MC
111///
112/// Ai is packed into A~.
113///
114/// Size of A~ is KC x MC
115pub const Z_MC: usize = D_MC / 2;