core enc util 全部转为javacard写法

This commit is contained in:
zcy
2025-09-05 20:14:55 +08:00
parent 57b385aaa2
commit a74ab6f212
3 changed files with 806 additions and 272 deletions

View File

@@ -1,14 +1,21 @@
package com.cscn; package com.cscn;
import javacard.framework.JCSystem;
import static com.cscn.Zuc256Util.L1; import static com.cscn.Zuc256Util.L1;
import static com.cscn.Zuc256Util.L2; import static com.cscn.Zuc256Util.L2;
import static com.cscn.Zuc256Util.add31; import static com.cscn.Zuc256Util.add31;
import static com.cscn.Zuc256Util.add31_pair; import static com.cscn.Zuc256Util.add32;
import static com.cscn.Zuc256Util.add64;
import static com.cscn.Zuc256Util.and64_7FFFFFFF_to32;
import static com.cscn.Zuc256Util.create_64b_from_32b;
import static com.cscn.Zuc256Util.makeU31; import static com.cscn.Zuc256Util.makeU31;
import static com.cscn.Zuc256Util.makeU32; import static com.cscn.Zuc256Util.makeU32;
import static com.cscn.Zuc256Util.rot31; import static com.cscn.Zuc256Util.rot31;
import static com.cscn.Zuc256Util.rot31_pair; import static com.cscn.Zuc256Util.shr32u1;
import static com.cscn.Zuc256Util.shr64u_31;
import static com.cscn.Zuc256Util.xor32;
/** /**
* ZUC-256 核心:状态初始化、密钥字生成、密钥流生成。 * ZUC-256 核心:状态初始化、密钥字生成、密钥流生成。
@@ -19,278 +26,555 @@ public final class Zuc256Core {
/** 初始化状态Key + IV */ /** 初始化状态Key + IV */
public static void initState(Zuc256State state, byte[] key32, byte[] iv) { public static void initState(Zuc256State state, byte[] key32, byte[] iv) {
zuc256SetMacKey(state, key32, iv, 0); zuc256SetMacKey(state, key32, iv, (short)0);
} }
/** 生成单个密钥字 */ /** 生成单个密钥字 */
public static int generateKeyword(Zuc256State state) { public static void zuc256GenerateKeyword(Zuc256State state, short[] out) {
int[] LFSR = state.LFSR; // int[] LFSR = state.LFSR;
int R1 = state.R1; // int R1 = state.R1;
int R2 = state.R2; // int R2 = state.R2;
int X0, X1, X2, X3; // int X0, X1, X2, X3;
int W1, W2, U, V; // int W1, W2, U, V;
int Z; // int Z;
short[] LFSR_hi = state.LFSR_hi;
short[] LFSR_lo = state.LFSR_lo;
// 工作寄存器32位值的临时 out32 缓冲全用short[2][lo, hi]
short[] X0 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
short[] X1 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
short[] X2 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
short[] X3 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
short[] R1 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
short[] R2 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
short[] W1 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
short[] W2 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
short[] U = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
short[] V = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
short[] Z = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
short[] TMP0 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
short[] TMP1 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
short[] TMP2 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
// 载入 R1,R2
R1[0] = state.R1_lo;
R1[1] = state.R1_hi;
R2[0] = state.R2_lo;
R2[1] = state.R2_hi;
// BitReconstruction4 // BitReconstruction4
X0 = ((LFSR[15] & 0x7FFF8000) << 1) | (LFSR[14] & 0xFFFF); // X0 = ((L15 & 0x7FFF8000) << 1) | (L14 & 0xFFFF)
X1 = ((LFSR[11] & 0xFFFF) << 16) | (LFSR[9] >>> 15); short c15 = (short)((LFSR_lo[15] & 0x8000) >>> 15); // 左移产生的进位
X2 = ((LFSR[7] & 0xFFFF) << 16) | (LFSR[5] >>> 15); X0[1] = (short)(((LFSR_hi[15] & 0x7FFF) << 1) | c15); // hi
X3 = ((LFSR[2] & 0xFFFF) << 16) | (LFSR[0] >>> 15); X0[0] = LFSR_lo[14]; // lo
Z = X3 ^ ((X0 ^ R1) + R2); // X1 = ((L11 & 0xFFFF) << 16) | (L9 >>> 15)
X1[1] = LFSR_lo[11];
X1[0] = (short)(((LFSR_lo[9] & 0x8000) >>> 15) | (LFSR_hi[9] << 1));
// X2 = ((L7 & 0xFFFF) << 16) | (L5 >>> 15)
X2[1] = LFSR_lo[7];
X2[0] = (short)(((LFSR_lo[5] & 0x8000) >>> 15) | (LFSR_hi[5] << 1));
// X3 = ((L2 & 0xFFFF) << 16) | (L0 >>> 15)
X3[1] = LFSR_lo[2];
X3[0] = (short)(((LFSR_lo[0] & 0x8000) >>> 15) | (LFSR_hi[0] << 1));
// ---- 输入X0,X1,X2,X3,R1,R2 均为 short[2]; 输出Z,W1,W2,U,V ----
// Z = X3 ^ ((X0 ^ R1) + R2)
xor32(X0[0], X0[1], R1[0], R1[1], TMP0); // TMP0 = X0 ^ R1
add32(TMP0[0], TMP0[1], R2[0], R2[1], TMP1); // TMP1 = TMP0 + R2
xor32(X3[0], X3[1], TMP1[0], TMP1[1], Z); // Z = X3 ^ TMP1
// F_(X1, X2) // F_(X1, X2)
W1 = R1 + X1; // W1 = R1 + X1
W2 = R2 ^ X2; add32(R1[0], R1[1], X1[0], X1[1], W1);
U = L1((W1 << 16) | (W2 >>> 16));
V = L2((W2 << 16) | (W1 >>> 16));
R1 = makeU32(Zuc256Tables.S0[(U >>> 24) & 0xFF], // W2 = R2 ^ X2
Zuc256Tables.S1[(U >>> 16) & 0xFF], xor32(R2[0], R2[1], X2[0], X2[1], W2);
Zuc256Tables.S0[(U >>> 8) & 0xFF],
Zuc256Tables.S1[U & 0xFF]);
R2 = makeU32(Zuc256Tables.S0[(V >>> 24) & 0xFF], // U = L1((W1 << 16) | (W2 >>> 16))
Zuc256Tables.S1[(V >>> 16) & 0xFF], // (W1<<16): lo=0, hi=W1_lo
Zuc256Tables.S0[(V >>> 8) & 0xFF], // (W2>>>16): lo=W2_hi, hi=0
Zuc256Tables.S1[V & 0xFF]); // OR 结果: lo=W2_hi, hi=W1_lo
L1(W2[1], W1[0], U);
// LFSRWithWorkMode // V = L2((W2 << 16) | (W1 >>> 16))
long a = LFSR[0]; // (W2<<16): lo=0, hi=W2_lo
a += (long)LFSR[0] << 8; // (W1>>>16): lo=W1_hi, hi=0
a += (long)LFSR[4] << 20; // OR 结果: lo=W1_hi, hi=W2_lo
a += (long)LFSR[10] << 21; L2(W1[1], W2[0], V);
a += (long)LFSR[13] << 17;
a += (long)LFSR[15] << 15;
a = (a & 0x7FFFFFFF) + (a >>> 31);
int v = (int) ((a & 0x7FFFFFFF) + (a >>> 31));
System.arraycopy(LFSR, 1, LFSR, 0, 15);
LFSR[15] = v;
state.R1 = R1; // R1 = makeU32(Zuc256Tables.S0[(U >>> 24) & 0xFF],
state.R2 = R2; // Zuc256Tables.S1[(U >>> 16) & 0xFF],
// Zuc256Tables.S0[(U >>> 8) & 0xFF],
// Zuc256Tables.S1[U & 0xFF]);
makeU32(
(short)(Zuc256Tables.S0[((U[1] >>> 8) & 0xFF)] & 0xFF), // (U >>> 24) & 0xFF
(short)(Zuc256Tables.S1[(U[1] & 0xFF)] & 0xFF), // (U >>> 16) & 0xFF
(short)(Zuc256Tables.S0[((U[0] >>> 8) & 0xFF)] & 0xFF), // (U >>> 8) & 0xFF
(short)(Zuc256Tables.S1[(U[0] & 0xFF)] & 0xFF), // (U >>> 0) & 0xFF
R1);
// R2 = makeU32(Zuc256Tables.S0[(V >>> 24) & 0xFF],
// Zuc256Tables.S1[(V >>> 16) & 0xFF],
// Zuc256Tables.S0[(V >>> 8) & 0xFF],
// Zuc256Tables.S1[V & 0xFF]);
makeU32(
(short)(Zuc256Tables.S0[((V[1] >>> 8) & 0xFF)] & 0xFF), // (V >>> 24) & 0xFF
(short)(Zuc256Tables.S1[(V[1] & 0xFF)] & 0xFF), // (V >>> 16) & 0xFF
(short)(Zuc256Tables.S0[((V[0] >>> 8) & 0xFF)] & 0xFF), // (V >>> 8) & 0xFF
(short)(Zuc256Tables.S1[(V[0] & 0xFF)] & 0xFF), // (V >>> 0) & 0xFF
R2);
// // LFSRWithWorkMode
// long a = LFSR[0];
// a += (long)LFSR[0] << 8;
// a += (long)LFSR[4] << 20;
// a += (long)LFSR[10] << 21;
// a += (long)LFSR[13] << 17;
// a += (long)LFSR[15] << 15;
// ---- 先准备累加器 A (64位) ----
short[] A = new short[4]; // 64位累加器初始全0
A[0] = 0; A[1] = 0; A[2] = 0; A[3] = 0;
// 临时缓冲
short[] tmp32 = new short[2]; // 保存一个32位数 (lo,hi)
short[] tmp64 = new short[4]; // 保存移位后的64位数
// a = LFSR[0]
tmp32[0] = state.LFSR_lo[0];
tmp32[1] = state.LFSR_hi[0];
create_64b_from_32b(tmp64, tmp32, (short)0);
add64(A, tmp64);
// a += (LFSR[0] << 8)
create_64b_from_32b(tmp64, tmp32, (short)8);
add64(A, tmp64);
// a += (LFSR[4] << 20)
tmp32[0] = state.LFSR_lo[4];
tmp32[1] = state.LFSR_hi[4];
create_64b_from_32b(tmp64, tmp32, (short)20);
add64(A, tmp64);
// a += (LFSR[10] << 21)
tmp32[0] = state.LFSR_lo[10];
tmp32[1] = state.LFSR_hi[10];
create_64b_from_32b(tmp64, tmp32, (short)21);
add64(A, tmp64);
// a += (LFSR[13] << 17)
tmp32[0] = state.LFSR_lo[13];
tmp32[1] = state.LFSR_hi[13];
create_64b_from_32b(tmp64, tmp32, (short)17);
add64(A, tmp64);
// a += (LFSR[15] << 15)
tmp32[0] = state.LFSR_lo[15];
tmp32[1] = state.LFSR_hi[15];
create_64b_from_32b(tmp64, tmp32, (short)15);
add64(A, tmp64);
// a = (a & 0x7FFFFFFF) + (a >>> 31);
// ---- 第一次折叠a = (a & 0x7FFFFFFF) + (a >>> 31) ----
short[] low31 = new short[4];
short[] r31 = new short[4];
and64_7FFFFFFF_to32(A, low31); // low31 = A & 0x7FFFFFFF
shr64u_31(A, r31); // r31 = A >>> 31
A[0]=0; A[1]=0; A[2]=0; A[3]=0;
add64(A, low31);
add64(A, r31);
// int v = (int) ((a & 0x7FFFFFFF) + (a >>> 31));
// ---- 第二次折叠,得到 v32位----
short[] low31b = new short[4];
short[] r31b = new short[4];
short[] v64 = new short[4];
and64_7FFFFFFF_to32(A, low31b);
shr64u_31(A, r31b);
v64[0]=0; v64[1]=0; v64[2]=0; v64[3]=0;
add64(v64, low31b);
add64(v64, r31b);
// v = 32位取 v64 的低两段
short v_lo = v64[0];
short v_hi = (short)(v64[1] & 0x7FFF); // 只保留31位
// System.arraycopy(LFSR, 1, LFSR, 0, 15);
// LFSR_lo 向左移
System.arraycopy(state.LFSR_lo, 1, state.LFSR_lo, 0, 15);
// LFSR_hi 向左移
System.arraycopy(state.LFSR_hi, 1, state.LFSR_hi, 0, 15);
// LFSR[15] = v;
// ---- 写回 LFSR[15] ----
state.LFSR_lo[15] = v_lo;
state.LFSR_hi[15] = v_hi;
// state.R1 = R1;
// state.R2 = R2;
state.R1_lo = R1[0];
state.R1_hi = R1[1];
state.R2_lo = R2[0];
state.R2_hi = R2[1];
// return Z;
out[0] = Z[0];
out[1] = Z[1];
return Z;
} }
// 生成指定长度的密钥流 // 生成指定长度的密钥流
public static void zuc256GenerateKeystream(Zuc256State state, int nwords, int[] keystream) { public static void zuc256GenerateKeystream(Zuc256State state,
int[] LFSR = state.LFSR; short nwords,
int R1 = state.R1; short[] keystream_hi,
int R2 = state.R2; short[] keystream_lo) {
int X0, X1, X2, X3; // 临时存放一个 32 位关键字
int W1, W2, U, V; short[] tmp = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
for (short i = 0; i < nwords; i++) {
for (int i = 0; i < nwords; i++) { // 生成一个关键字 -> tmp[0]=lo, tmp[1]=hi
// BitReconstruction4 zuc256GenerateKeyword(state, tmp);
X0 = ((LFSR[15] & 0x7FFF8000) << 1) | (LFSR[14] & 0xFFFF); // 存入输出数组
X1 = ((LFSR[11] & 0xFFFF) << 16) | (LFSR[9] >>> 15); keystream_lo[i] = tmp[0];
X2 = ((LFSR[7] & 0xFFFF) << 16) | (LFSR[5] >>> 15); keystream_hi[i] = tmp[1];
X3 = ((LFSR[2] & 0xFFFF) << 16) | (LFSR[0] >>> 15);
keystream[i] = X3 ^ ((X0 ^ R1) + R2);
// F_(X1, X2)
W1 = R1 + X1;
W2 = R2 ^ X2;
U = L1((W1 << 16) | (W2 >>> 16));
V = L2((W2 << 16) | (W1 >>> 16));
// S盒查找
int T0 = Zuc256Tables.S0[(U >>> 24) & 0xFF] & 0xFF;
int T2 = Zuc256Tables.S0[(U >>> 8) & 0xFF] & 0xFF;
int T4 = Zuc256Tables.S0[(V >>> 24) & 0xFF] & 0xFF;
int T6 = Zuc256Tables.S0[(V >>> 8) & 0xFF] & 0xFF;
int T1 = Zuc256Tables.S1[(U >>> 16) & 0xFF] & 0xFF;
int T3 = Zuc256Tables.S1[U & 0xFF] & 0xFF;
int T5 = Zuc256Tables.S1[(V >>> 16) & 0xFF] & 0xFF;
int T7 = Zuc256Tables.S1[V & 0xFF] & 0xFF;
R1 = makeU32(T0, T1, T2, T3);
R2 = makeU32(T4, T5, T6, T7);
// LFSRWithWorkMode
long a = LFSR[0];
a += (long)LFSR[0] << 8;
a += (long)LFSR[4] << 20;
a += (long)LFSR[10] << 21;
a += (long)LFSR[13] << 17;
a += (long)LFSR[15] << 15;
a = (a & 0x7FFFFFFF) + (a >>> 31);
int v = (int) ((a & 0x7FFFFFFF) + (a >>> 31));
System.arraycopy(LFSR, 1, LFSR, 0, 15);
LFSR[15] = v;
} }
state.R1 = R1;
state.R2 = R2;
} }
// 生成单个密钥字
public static int zuc256GenerateKeyword(Zuc256State state) {
int[] LFSR = state.LFSR;
int R1 = state.R1;
int R2 = state.R2;
int X0, X1, X2, X3;
int W1, W2, U, V;
int Z;
// BitReconstruction4
X0 = ((LFSR[15] & 0x7FFF8000) << 1) | (LFSR[14] & 0xFFFF);
X1 = ((LFSR[11] & 0xFFFF) << 16) | (LFSR[9] >>> 15);
X2 = ((LFSR[7] & 0xFFFF) << 16) | (LFSR[5] >>> 15);
X3 = ((LFSR[2] & 0xFFFF) << 16) | (LFSR[0] >>> 15);
Z = X3 ^ ((X0 ^ R1) + R2);
// F_(X1, X2)
W1 = R1 + X1;
W2 = R2 ^ X2;
U = L1((W1 << 16) | (W2 >>> 16));
V = L2((W2 << 16) | (W1 >>> 16));
R1 = makeU32(Zuc256Tables.S0[(U >>> 24) & 0xFF],
Zuc256Tables.S1[(U >>> 16) & 0xFF],
Zuc256Tables.S0[(U >>> 8) & 0xFF],
Zuc256Tables.S1[U & 0xFF]);
R2 = makeU32(Zuc256Tables.S0[(V >>> 24) & 0xFF],
Zuc256Tables.S1[(V >>> 16) & 0xFF],
Zuc256Tables.S0[(V >>> 8) & 0xFF],
Zuc256Tables.S1[V & 0xFF]);
// LFSRWithWorkMode
long a = LFSR[0];
a += (long)LFSR[0] << 8;
a += (long)LFSR[4] << 20;
a += (long)LFSR[10] << 21;
a += (long)LFSR[13] << 17;
a += (long)LFSR[15] << 15;
a = (a & 0x7FFFFFFF) + (a >>> 31);
int v = (int) ((a & 0x7FFFFFFF) + (a >>> 31));
System.arraycopy(LFSR, 1, LFSR, 0, 15);
LFSR[15] = v;
state.R1 = R1;
state.R2 = R2;
return Z;
}
// 初始化MAC密钥 // 初始化MAC密钥
private static void zuc256SetMacKey(Zuc256State key, byte[] K, byte[] IV, int macbits) { private static void zuc256SetMacKey(Zuc256State state, byte[] K, byte[] IV, short macbits) {
int[] LFSR = key.LFSR; short[] D;
int R1 = 0; short[] TMP = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
int R2 = 0; short[] X0 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
int X0, X1, X2; short[] X1 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
int W, W1, W2, U, V; short[] X2 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
int[] D; short[] R1 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
short[] R2 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
short[] W = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
short[] W1 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
short[] W2 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
short[] U = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
short[] V = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
short[] T = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
short[] T2 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
int IV17 = (IV[17] & 0xFF) >> 2;
int IV18 = ((IV[17] & 0x03) << 4) | ((IV[18] & 0xFF) >> 4);
int IV19 = ((IV[18] & 0x0F) << 2) | ((IV[19] & 0xFF) >> 6);
int IV20 = IV[19] & 0x3F;
int IV21 = (IV[20] & 0xFF) >> 2;
int IV22 = ((IV[20] & 0x03) << 4) | ((IV[21] & 0xFF) >> 4);
int IV23 = ((IV[21] & 0x0F) << 2) | ((IV[22] & 0xFF) >> 6);
int IV24 = IV[22] & 0x3F;
D = (macbits / 32 < 3) ? Zuc256Tables.ZUC256_D[macbits / 32] : Zuc256Tables.ZUC256_D[3]; // int IV17 = (IV[17] & 0xFF) >> 2;
// int IV18 = ((IV[17] & 0x03) << 4) | ((IV[18] & 0xFF) >> 4);
// int IV19 = ((IV[18] & 0x0F) << 2) | ((IV[19] & 0xFF) >> 6);
// int IV20 = IV[19] & 0x3F;
// int IV21 = (IV[20] & 0xFF) >> 2;
// int IV22 = ((IV[20] & 0x03) << 4) | ((IV[21] & 0xFF) >> 4);
// int IV23 = ((IV[21] & 0x0F) << 2) | ((IV[22] & 0xFF) >> 6);
// int IV24 = IV[22] & 0x3F;
// IV 拆分
short IV17 = (short)((IV[17] & 0xFF) >>> 2);
short IV18 = (short)(((IV[17] & 0x03) << 4) | ((IV[18] & 0xFF) >>> 4));
short IV19 = (short)(((IV[18] & 0x0F) << 2) | ((IV[19] & 0xFF) >>> 6));
short IV20 = (short)(IV[19] & 0x3F);
short IV21 = (short)((IV[20] & 0xFF) >>> 2);
short IV22 = (short)(((IV[20] & 0x03) << 4) | ((IV[21] & 0xFF) >>> 4));
short IV23 = (short)(((IV[21] & 0x0F) << 2) | ((IV[22] & 0xFF) >>> 6));
short IV24 = (short)(IV[22] & 0x3F);
LFSR[0] = makeU31(K[0] & 0xFF, D[0], K[21] & 0xFF, K[16] & 0xFF); // D = (macbits / 32 < 3) ? Zuc256Tables.ZUC256_D[macbits / 32] : Zuc256Tables.ZUC256_D[3];
LFSR[1] = makeU31(K[1] & 0xFF, D[1], K[22] & 0xFF, K[17] & 0xFF); if ((macbits / 32) < 3) {
LFSR[2] = makeU31(K[2] & 0xFF, D[2], K[23] & 0xFF, K[18] & 0xFF); D = Zuc256Tables.ZUC256_D[macbits / 32];
LFSR[3] = makeU31(K[3] & 0xFF, D[3], K[24] & 0xFF, K[19] & 0xFF); } else {
LFSR[4] = makeU31(K[4] & 0xFF, D[4], K[25] & 0xFF, K[20] & 0xFF); D = Zuc256Tables.ZUC256_D[3];
LFSR[5] = makeU31(IV[0] & 0xFF, (D[5] | IV17), K[5] & 0xFF, K[26] & 0xFF); }
LFSR[6] = makeU31(IV[1] & 0xFF, (D[6] | IV18), K[6] & 0xFF, K[27] & 0xFF);
LFSR[7] = makeU31(IV[10] & 0xFF, (D[7] | IV19), K[7] & 0xFF, IV[2] & 0xFF);
LFSR[8] = makeU31(K[8] & 0xFF, (D[8] | IV20), IV[3] & 0xFF, IV[11] & 0xFF);
LFSR[9] = makeU31(K[9] & 0xFF, (D[9] | IV21), IV[12] & 0xFF, IV[4] & 0xFF);
LFSR[10] = makeU31(IV[5] & 0xFF, (D[10] | IV22), K[10] & 0xFF, K[28] & 0xFF);
LFSR[11] = makeU31(K[11] & 0xFF, (D[11] | IV23), IV[6] & 0xFF, IV[13] & 0xFF);
LFSR[12] = makeU31(K[12] & 0xFF, (D[12] | IV24), IV[7] & 0xFF, IV[14] & 0xFF);
LFSR[13] = makeU31(K[13] & 0xFF, D[13], IV[15] & 0xFF, IV[8] & 0xFF);
LFSR[14] = makeU31(K[14] & 0xFF, (D[14] | (K[31] >>> 4)), IV[16] & 0xFF, IV[9] & 0xFF);
LFSR[15] = makeU31(K[15] & 0xFF, (D[15] | (K[31] & 0x0F)), K[30] & 0xFF, K[29] & 0xFF);
for (int i = 0; i < 32; i++) {
short[] tmp = new short[2]; // 临时存储 makeU31 输出 (lo,hi)
// 逐项装载 LFSR
// LFSR[0] = makeU31(K[0] & 0xFF, D[0], K[21] & 0xFF, K[16] & 0xFF);
makeU31((short)(K[0] & 0xFF), (short)D[0], (short)(K[21] & 0xFF), (short)(K[16] & 0xFF), tmp);
state.LFSR_lo[0] = tmp[0]; state.LFSR_hi[0] = tmp[1];
// LFSR[1] = makeU31(K[1] & 0xFF, D[1], K[22] & 0xFF, K[17] & 0xFF);
makeU31((short)(K[1] & 0xFF), (short)D[1], (short)(K[22] & 0xFF), (short)(K[17] & 0xFF), tmp);
state.LFSR_lo[1] = tmp[0]; state.LFSR_hi[1] = tmp[1];
// LFSR[2] = makeU31(K[2] & 0xFF, D[2], K[23] & 0xFF, K[18] & 0xFF);
makeU31((short)(K[2] & 0xFF), (short)D[2], (short)(K[23] & 0xFF), (short)(K[18] & 0xFF), tmp);
state.LFSR_lo[2] = tmp[0]; state.LFSR_hi[2] = tmp[1];
// LFSR[3] = makeU31(K[3] & 0xFF, D[3], K[24] & 0xFF, K[19] & 0xFF);
makeU31((short)(K[3] & 0xFF), (short)D[3], (short)(K[24] & 0xFF), (short)(K[19] & 0xFF), tmp);
state.LFSR_lo[3] = tmp[0]; state.LFSR_hi[3] = tmp[1];
// LFSR[4] = makeU31(K[4] & 0xFF, D[4], K[25] & 0xFF, K[20] & 0xFF);
makeU31((short)(K[4] & 0xFF), (short)D[4], (short)(K[25] & 0xFF), (short)(K[20] & 0xFF), tmp);
state.LFSR_lo[4] = tmp[0]; state.LFSR_hi[4] = tmp[1];
// LFSR[5] = makeU31(IV[0] & 0xFF, (D[5] | IV17), K[5] & 0xFF, K[26] & 0xFF);
makeU31((short)(IV[0] & 0xFF), (short)(D[5] | IV17), (short)(K[5] & 0xFF), (short)(K[26] & 0xFF), tmp);
state.LFSR_lo[5] = tmp[0]; state.LFSR_hi[5] = tmp[1];
// LFSR[6] = makeU31(IV[1] & 0xFF, (D[6] | IV18), K[6] & 0xFF, K[27] & 0xFF);
makeU31((short)(IV[1] & 0xFF), (short)(D[6] | IV18), (short)(K[6] & 0xFF), (short)(K[27] & 0xFF), tmp);
state.LFSR_lo[6] = tmp[0]; state.LFSR_hi[6] = tmp[1];
// LFSR[7] = makeU31(IV[10] & 0xFF, (D[7] | IV19), K[7] & 0xFF, IV[2] & 0xFF);
makeU31((short)(IV[10] & 0xFF), (short)(D[7] | IV19), (short)(K[7] & 0xFF), (short)(IV[2] & 0xFF), tmp);
state.LFSR_lo[7] = tmp[0]; state.LFSR_hi[7] = tmp[1];
// LFSR[8] = makeU31(K[8] & 0xFF, (D[8] | IV20), IV[3] & 0xFF, IV[11] & 0xFF);
makeU31((short)(K[8] & 0xFF), (short)(D[8] | IV20), (short)(IV[3] & 0xFF), (short)(IV[11] & 0xFF), tmp);
state.LFSR_lo[8] = tmp[0]; state.LFSR_hi[8] = tmp[1];
// LFSR[9] = makeU31(K[9] & 0xFF, (D[9] | IV21), IV[12] & 0xFF, IV[4] & 0xFF);
makeU31((short)(K[9] & 0xFF), (short)(D[9] | IV21), (short)(IV[12] & 0xFF), (short)(IV[4] & 0xFF), tmp);
state.LFSR_lo[9] = tmp[0]; state.LFSR_hi[9] = tmp[1];
// LFSR[10] = makeU31(IV[5] & 0xFF, (D[10] | IV22), K[10] & 0xFF, K[28] & 0xFF);
makeU31((short)(IV[5] & 0xFF), (short)(D[10] | IV22), (short)(K[10] & 0xFF), (short)(K[28] & 0xFF), tmp);
state.LFSR_lo[10] = tmp[0]; state.LFSR_hi[10] = tmp[1];
// LFSR[11] = makeU31(K[11] & 0xFF, (D[11] | IV23), IV[6] & 0xFF, IV[13] & 0xFF);
makeU31((short)(K[11] & 0xFF), (short)(D[11] | IV23), (short)(IV[6] & 0xFF), (short)(IV[13] & 0xFF), tmp);
state.LFSR_lo[11] = tmp[0]; state.LFSR_hi[11] = tmp[1];
// LFSR[12] = makeU31(K[12] & 0xFF, (D[12] | IV24), IV[7] & 0xFF, IV[14] & 0xFF);
makeU31((short)(K[12] & 0xFF), (short)(D[12] | IV24), (short)(IV[7] & 0xFF), (short)(IV[14] & 0xFF), tmp);
state.LFSR_lo[12] = tmp[0]; state.LFSR_hi[12] = tmp[1];
// LFSR[13] = makeU31(K[13] & 0xFF, D[13], IV[15] & 0xFF, IV[8] & 0xFF);
makeU31((short)(K[13] & 0xFF), (short)D[13], (short)(IV[15] & 0xFF), (short)(IV[8] & 0xFF), tmp);
state.LFSR_lo[13] = tmp[0]; state.LFSR_hi[13] = tmp[1];
// LFSR[14] = makeU31(K[14] & 0xFF, (D[14] | (K[31] >>> 4)), IV[16] & 0xFF, IV[9] & 0xFF);
makeU31((short)(K[14] & 0xFF), (short)(D[14] | ((K[31] & 0xFF) >>> 4)), (short)(IV[16] & 0xFF), (short)(IV[9] & 0xFF), tmp);
state.LFSR_lo[14] = tmp[0]; state.LFSR_hi[14] = tmp[1];
// LFSR[15] = makeU31(K[15] & 0xFF, (D[15] | (K[31] & 0x0F)), K[30] & 0xFF, K[29] & 0xFF);
makeU31((short)(K[15] & 0xFF), (short)(D[15] | (K[31] & 0x0F)), (short)(K[30] & 0xFF), (short)(K[29] & 0xFF), tmp);
state.LFSR_lo[15] = tmp[0]; state.LFSR_hi[15] = tmp[1];
for (short i = 0; i < 32; i++) {
// BitReconstruction3 // BitReconstruction3
X0 = ((LFSR[15] & 0x7FFF8000) << 1) | (LFSR[14] & 0xFFFF); // X0 = ((LFSR[15] & 0x7FFF8000) << 1) | (LFSR[14] & 0xFFFF);
X1 = ((LFSR[11] & 0xFFFF) << 16) | (LFSR[9] >>> 15); // X0 = ((L15 & 0x7FFF8000)<<1) | (L14 & 0xFFFF)
X2 = ((LFSR[7] & 0xFFFF) << 16) | (LFSR[5] >>> 15); short c15 = (short)((state.LFSR_lo[15] & 0x8000) >>> 15);
X0[1] = (short)(((state.LFSR_hi[15] & 0x7FFF) << 1) | c15);
X0[0] = state.LFSR_lo[14];
// X1 = ((LFSR[11] & 0xFFFF) << 16) | (LFSR[9] >>> 15);
// X1 = ((L11 & 0xFFFF)<<16) | (L9>>>15)
X1[1] = state.LFSR_lo[11];
X1[0] = (short)(((state.LFSR_lo[9] & 0x8000) >>> 15) | (state.LFSR_hi[9] << 1));
// X2 = ((LFSR[7] & 0xFFFF) << 16) | (LFSR[5] >>> 15);
// X2 = ((L7 & 0xFFFF)<<16) | (L5>>>15)
X2[1] = state.LFSR_lo[7];
X2[0] = (short)(((state.LFSR_lo[5] & 0x8000) >>> 15) | (state.LFSR_hi[5] << 1));
// F(X0, X1, X2) // F(X0, X1, X2)
W = (X0 ^ R1) + R2; // W = (X0 ^ R1) + R2
W1 = R1 + X1; xor32(X0[0], X0[1], R1[0], R1[1], TMP);
W2 = R2 ^ X2; add32(TMP[0], TMP[1], R2[0], R2[1], W);
U = L1((W1 << 16) | (W2 >>> 16));
V = L2((W2 << 16) | (W1 >>> 16));
R1 = makeU32(Zuc256Tables.S0[(U >>> 24) & 0xFF], // W1 = R1 + X1
Zuc256Tables.S1[(U >>> 16) & 0xFF], add32(R1[0], R1[1], X1[0], X1[1], W1);
Zuc256Tables.S0[(U >>> 8) & 0xFF],
Zuc256Tables.S1[U & 0xFF]);
R2 = makeU32(Zuc256Tables.S0[(V >>> 24) & 0xFF], // W2 = R2 ^ X2
Zuc256Tables.S1[(V >>> 16) & 0xFF], xor32(R2[0], R2[1], X2[0], X2[1], W2);
Zuc256Tables.S0[(V >>> 8) & 0xFF],
Zuc256Tables.S1[V & 0xFF]); // U = L1((W1<<16) | (W2>>>16))
L1(W2[1], W1[0], U);
// V = L2((W2<<16) | (W1>>>16))
L2(W1[1], W2[0], V);
// R1 = makeU32(Zuc256Tables.S0[(U >>> 24) & 0xFF],
// Zuc256Tables.S1[(U >>> 16) & 0xFF],
// Zuc256Tables.S0[(U >>> 8) & 0xFF],
// Zuc256Tables.S1[U & 0xFF]);
//
// R2 = makeU32(Zuc256Tables.S0[(V >>> 24) & 0xFF],
// Zuc256Tables.S1[(V >>> 16) & 0xFF],
// Zuc256Tables.S0[(V >>> 8) & 0xFF],
// Zuc256Tables.S1[V & 0xFF]);
// 更新 R1,R2
makeU32(
(short)(Zuc256Tables.S0[((U[1] >>> 8) & 0xFF)] & 0xFF),
(short)(Zuc256Tables.S1[(U[1] & 0xFF)] & 0xFF),
(short)(Zuc256Tables.S0[((U[0] >>> 8) & 0xFF)] & 0xFF),
(short)(Zuc256Tables.S1[(U[0] & 0xFF)] & 0xFF),
R1);
makeU32(
(short)(Zuc256Tables.S0[((V[1] >>> 8) & 0xFF)] & 0xFF),
(short)(Zuc256Tables.S1[(V[1] & 0xFF)] & 0xFF),
(short)(Zuc256Tables.S0[((V[0] >>> 8) & 0xFF)] & 0xFF),
(short)(Zuc256Tables.S1[(V[0] & 0xFF)] & 0xFF),
R2);
// LFSRWithInitialisationMode(W >> 1) // LFSRWithInitialisationMode(W >> 1)
int v = LFSR[0]; // int v = LFSR[0];
v = add31(v, rot31(LFSR[0], 8)); V[0] = state.LFSR_lo[0];
v = add31(v, rot31(LFSR[4], 20)); V[1] = state.LFSR_hi[0];
v = add31(v, rot31(LFSR[10], 21));
v = add31(v, rot31(LFSR[13], 17));
v = add31(v, rot31(LFSR[15], 15));
v = add31(v, W >>> 1);
System.arraycopy(LFSR, 1, LFSR, 0, 15); // v = add31(v, rot31(state.LFSR[0], 8))
LFSR[15] = v; rot31(state.LFSR_lo[0], state.LFSR_hi[0], (short)8, T);
add31(V[0], V[1], T[0], T[1], V);
// v = add31(v, rot31(state.LFSR[4], 20));
rot31(state.LFSR_lo[4], state.LFSR_hi[4], (short)20, T);
add31(V[0], V[1], T[0], T[1], V);
// v = add31(v, rot31(state.LFSR[10], 21));
rot31(state.LFSR_lo[10], state.LFSR_hi[10], (short)21, T);
add31(V[0], V[1], T[0], T[1], V);
// v = add31(v, rot31(state.LFSR[13], 17));
rot31(state.LFSR_lo[13], state.LFSR_hi[13], (short)17, T);
add31(V[0], V[1], T[0], T[1], V);
// v = add31(v, rot31(state.LFSR[15], 15));
rot31(state.LFSR_lo[15], state.LFSR_hi[15], (short)15, T);
add31(V[0], V[1], T[0], T[1], V);
// v = add31(v, W >>> 1);
shr32u1(W[0], W[1], T2); // T2[0]=lo, T2[1]=hi无符号>>>1
T2[1] = (short)(T2[1] & 0x7FFF); // 只保留31位
add31(V[0], V[1], T2[0], T2[1], V);
// System.arraycopy(state.LFSR, 1, state.LFSR, 0, 15)
System.arraycopy(state.LFSR_lo, 1, state.LFSR_lo, 0, 15);
System.arraycopy(state.LFSR_hi, 1, state.LFSR_hi, 0, 15);
// state.LFSR[15] = v;
state.LFSR_lo[15] = V[0];
state.LFSR_hi[15] = (short)(V[1] & 0x7FFF);
} }
// BitReconstruction2 // BitReconstruction2
X1 = ((LFSR[11] & 0xFFFF) << 16) | (LFSR[9] >>> 15); // X1 = ((LFSR[11] & 0xFFFF) << 16) | (LFSR[9] >>> 15);
X2 = ((LFSR[7] & 0xFFFF) << 16) | (LFSR[5] >>> 15); X1[1] = state.LFSR_lo[11];
X1[0] = (short)(((state.LFSR_lo[9] & 0x8000) >>> 15) | (state.LFSR_hi[9] << 1));
// X2 = ((LFSR[7] & 0xFFFF) << 16) | (LFSR[5] >>> 15);
X2[1] = state.LFSR_lo[7];
X2[0] = (short)(((state.LFSR_lo[5] & 0x8000) >>> 15) | (state.LFSR_hi[5] << 1));
// F_(X1, X2) // F_(X1, X2)
W1 = R1 + X1; // W1 = R1 + X1;
W2 = R2 ^ X2; add32(state.R1_lo, state.R1_hi, X1[0], X1[1], W1); // W1 = R1 + X1
U = L1((W1 << 16) | (W2 >>> 16)); // W2 = R2 ^ X2;
V = L2((W2 << 16) | (W1 >>> 16)); xor32(state.R2_lo, state.R2_hi, X2[0], X2[1], W2); // W2 = R2 ^ X2
R1 = makeU32(Zuc256Tables.S0[(U >>> 24) & 0xFF], // U = L1((W1 << 16) | (W2 >>> 16));
Zuc256Tables.S1[(U >>> 16) & 0xFF], // U = L1((W1<<16)|(W2>>>16)) → lo=W2_hi, hi=W1_lo
Zuc256Tables.S0[(U >>> 8) & 0xFF], L1(W2[1], W1[0], U);
Zuc256Tables.S1[U & 0xFF]);
R2 = makeU32(Zuc256Tables.S0[(V >>> 24) & 0xFF], // V = L2((W2 << 16) | (W1 >>> 16));
Zuc256Tables.S1[(V >>> 16) & 0xFF], // V = L2((W2<<16)|(W1>>>16)) → lo=W1_hi, hi=W2_lo
Zuc256Tables.S0[(V >>> 8) & 0xFF], L2(W1[1], W2[0], V);
Zuc256Tables.S1[V & 0xFF]);
// R1 = makeU32(Zuc256Tables.S0[(U >>> 24) & 0xFF],
// Zuc256Tables.S1[(U >>> 16) & 0xFF],
// Zuc256Tables.S0[(U >>> 8) & 0xFF],
// Zuc256Tables.S1[U & 0xFF]);
makeU32(
(short)(Zuc256Tables.S0[((U[1] >>> 8) & 0xFF)] & 0xFF),
(short)(Zuc256Tables.S1[(U[1] & 0xFF)] & 0xFF),
(short)(Zuc256Tables.S0[((U[0] >>> 8) & 0xFF)] & 0xFF),
(short)(Zuc256Tables.S1[(U[0] & 0xFF)] & 0xFF),
R1);
// R2 = makeU32(Zuc256Tables.S0[(V >>> 24) & 0xFF],
// Zuc256Tables.S1[(V >>> 16) & 0xFF],
// Zuc256Tables.S0[(V >>> 8) & 0xFF],
// Zuc256Tables.S1[V & 0xFF]);
makeU32(
(short)(Zuc256Tables.S0[((V[1] >>> 8) & 0xFF)] & 0xFF),
(short)(Zuc256Tables.S1[(V[1] & 0xFF)] & 0xFF),
(short)(Zuc256Tables.S0[((V[0] >>> 8) & 0xFF)] & 0xFF),
(short)(Zuc256Tables.S1[(V[0] & 0xFF)] & 0xFF),
R2);
// ---- LFSRWithWorkMode ----
short[] A = new short[4]; // 64位累加器
short[] tmp32 = new short[2];
short[] tmp64 = new short[4];
// LFSRWithWorkMode // LFSRWithWorkMode
long a = LFSR[0]; // long a = LFSR[0];
a += (long)LFSR[0] << 8; tmp32[0] = state.LFSR_lo[0];
a += (long)LFSR[4] << 20; tmp32[1] = state.LFSR_hi[0];
a += (long)LFSR[10] << 21; create_64b_from_32b(tmp64, tmp32, (short)0); add64(A, tmp64);
a += (long)LFSR[13] << 17;
a += (long)LFSR[15] << 15;
a = (a & 0x7FFFFFFF) + (a >>> 31);
int v = (int) ((a & 0x7FFFFFFF) + (a >>> 31));
System.arraycopy(LFSR, 1, LFSR, 0, 15); // a += (long)LFSR[0] << 8;
LFSR[15] = v; create_64b_from_32b(tmp64, tmp32, (short)8); add64(A, tmp64);
key.R1 = R1; // a += (long)LFSR[4] << 20;
key.R2 = R2; tmp32[0] = state.LFSR_lo[4]; tmp32[1] = state.LFSR_hi[4];
create_64b_from_32b(tmp64, tmp32, (short)20); add64(A, tmp64);
// a += (long)LFSR[10] << 21;
tmp32[0] = state.LFSR_lo[10]; tmp32[1] = state.LFSR_hi[10];
create_64b_from_32b(tmp64, tmp32, (short)21); add64(A, tmp64);
// a += (long)LFSR[13] << 17;
tmp32[0] = state.LFSR_lo[13]; tmp32[1] = state.LFSR_hi[13];
create_64b_from_32b(tmp64, tmp32, (short)17); add64(A, tmp64);
// a += (long)LFSR[15] << 15;
tmp32[0] = state.LFSR_lo[15]; tmp32[1] = state.LFSR_hi[15];
create_64b_from_32b(tmp64, tmp32, (short)15); add64(A, tmp64);
// a = (a & 0x7FFFFFFF) + (a >>> 31);
short[] low31 = new short[4];
short[] r31 = new short[4];
and64_7FFFFFFF_to32(A, low31);
shr64u_31(A, r31);
short[] v64 = new short[4];
add64(v64, low31);
add64(v64, r31);
// int v = (int) ((a & 0x7FFFFFFF) + (a >>> 31));
and64_7FFFFFFF_to32(v64, low31);
shr64u_31(v64, r31);
short[] vv = new short[4];
add64(vv, low31);
add64(vv, r31);
short v_lo = vv[0];
short v_hi = (short)(vv[1] & 0x7FFF);
// LFSR左移
// System.arraycopy(LFSR, 1, LFSR, 0, 15);
// LFSR[15] = v;
System.arraycopy(state.LFSR_lo, 1, state.LFSR_lo, 0, 15);
System.arraycopy(state.LFSR_hi, 1, state.LFSR_hi, 0, 15);
state.LFSR_lo[15] = v_lo;
state.LFSR_hi[15] = v_hi;
state.R1_lo = R1[0]; state.R1_hi = R1[1];
state.R2_lo = R2[0]; state.R2_hi = R2[1];
} }
} }

View File

@@ -1,9 +1,12 @@
package com.cscn.zuc256; package com.cscn;
import com.cscn.Zuc256Core; import javacard.framework.JCSystem;
import com.cscn.Zuc256State;
import java.util.Arrays; import static com.cscn.Zuc256Core.zuc256GenerateKeystream;
import static com.cscn.Zuc256Core.zuc256GenerateKeyword;
import static com.cscn.Zuc256Util.getU32;
import static com.cscn.Zuc256Util.putU32;
import static com.cscn.Zuc256Util.xor32;
/** /**
@@ -12,7 +15,7 @@ import java.util.Arrays;
public final class Zuc256EncryptCtx { public final class Zuc256EncryptCtx {
Zuc256State state; Zuc256State state;
byte[] buf; byte[] buf;
int buflen; short buflen;
public Zuc256EncryptCtx(Zuc256State state, byte[] buf){ public Zuc256EncryptCtx(Zuc256State state, byte[] buf){
this.state = state; this.state = state;
@@ -31,69 +34,120 @@ public final class Zuc256EncryptCtx {
// 初始化加密上下文 // 初始化加密上下文
public void init(byte[] key32, byte[] iv) { public void init(byte[] key32, byte[] iv) {
Arrays.fill(this.buf, (byte) 0); // Arrays.fill(this.buf, (byte) 0);
for (short i = 0; i < (short)this.buf.length; i++) {
this.buf[i] = (byte)0;
}
this.buflen = 0; this.buflen = 0;
Zuc256Core.initState(this.state, key32, iv); Zuc256Core.initState(this.state, key32, iv);
} }
// 分阶段处理加密数据 // 分阶段处理加密数据
public void update(byte[] in, int inlen, byte[] out) { public void update(byte[] in, short inlen, byte[] out) {
if (in == null || out == null || inlen == 0) return; if (in == null || out == null || inlen == 0) return;
short inPos = 0; // 输入偏移
short outPos = 0; // 输出偏移
// 处理缓冲区中剩余的非4字节数据 // 处理缓冲区中剩余的非4字节数据
if (this.buflen > 0) { if (this.buflen > 0) {
int need = 4 - this.buflen; // int need = 4 - this.buflen;
int copy = Math.min(inlen, need); short need = (short)(4 - this.buflen);
// int copy = Math.min(inlen, need);
short copy = (short)((inlen < need) ? inlen : need);
System.arraycopy(in, 0, this.buf, this.buflen, copy); System.arraycopy(in, 0, this.buf, this.buflen, copy);
this.buflen += copy; this.buflen += copy;
// 调整输入指针和长度 // 调整输入指针和长度
byte[] newIn = new byte[inlen - copy]; // byte[] newIn = new byte[inlen - copy];
if (inlen - copy > 0) { // if (inlen - copy > 0) {
System.arraycopy(in, copy, newIn, 0, inlen - copy); // System.arraycopy(in, copy, newIn, 0, inlen - copy);
} // }
in = newIn; // in = newIn;
// inlen -= copy;
// 推进输入指针与剩余长度
inPos += copy;
inlen -= copy; inlen -= copy;
// 缓冲区已满处理一个完整的4字节块 // 缓冲区已满处理一个完整的4字节块
if (this.buflen == 4) { if (this.buflen == 4) {
int keystream = zuc256GenerateKeyword(this.state); // int keystream = zuc256GenerateKeyword(this.state);
int plain = getU32(this.buf, 0); short[] ks = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
putU32(out, 0, plain ^ keystream); zuc256GenerateKeyword(this.state, ks); // ks[0]=lo, ks[1]=hi
// int plain = getU32(this.buf, 0);
// 取出 4 字节明文 → plain[0]=lo, plain[1]=hi
short[] plain = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
getU32(this.buf, (short)0, plain);
// putU32(out, 0, plain ^ keystream);
// plain ^ ks → res
short[] res = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
xor32(plain[0], plain[1], ks[0], ks[1], res);
// 写回 out 的前4字节
putU32(out, outPos, res[0], res[1]);
this.buflen = 0; this.buflen = 0;
Arrays.fill(this.buf, (byte) 0); // Arrays.fill(this.buf, (byte) 0);
for (short i = 0; i < (short)this.buf.length; i++) {
this.buf[i] = (byte)0;
}
// 调整输出指针 // 调整输出指针
byte[] newOut = new byte[out.length - 4]; // byte[] newOut = new byte[out.length - 4];
if (out.length - 4 > 0) { // if (out.length - 4 > 0) {
System.arraycopy(out, 4, newOut, 0, out.length - 4); // System.arraycopy(out, 4, newOut, 0, out.length - 4);
} // }
out = newOut; // out = newOut;
// 这里C实现就是直接指针+4的。JavaSE实现搞这个new干嘛。。
outPos += 4;
} }
} }
// 处理完整的4字节块 // 处理完整的4字节块
int fullBlocks = inlen / 4; // int fullBlocks = inlen / 4;
short fullBlocks = (short) (inlen / 4);
if (fullBlocks > 0) { if (fullBlocks > 0) {
int[] keystream = new int[fullBlocks]; // int[] keystream = new int[fullBlocks];
zuc256GenerateKeystream(this.state, fullBlocks, keystream); short[] ks_hi = JCSystem.makeTransientShortArray(fullBlocks, JCSystem.CLEAR_ON_DESELECT);
short[] ks_lo = JCSystem.makeTransientShortArray(fullBlocks, JCSystem.CLEAR_ON_DESELECT);
// zuc256GenerateKeystream(this.state, fullBlocks, keystream);
zuc256GenerateKeystream(this.state, fullBlocks, ks_hi, ks_lo);
// 临时装一个32位字
short[] word = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
// 逐块异或加密 // 逐块异或加密
for (int i = 0; i < fullBlocks; i++) { for (short i = 0; i < fullBlocks; i++) {
int plain = getU32(in, i * 4); // int plain = getU32(in, i * 4);
putU32(out, i * 4, plain ^ keystream[i]); short off = (short) (i << 2); // i*4
// 读明文
getU32(in, (short)(inPos+off), word); // word[0]=lo, word[1]=hi
// putU32(out, i * 4, plain ^ keystream[i]);
// XOR keystream
word[0] = (short)(word[0] ^ ks_lo[i]);
word[1] = (short)(word[1] ^ ks_hi[i]);
// 写密文
putU32(out, (short) (outPos+off), word[0], word[1]);
} }
// 调整输入指针和长度 // 调整输入指针和长度
int processed = fullBlocks * 4; // int processed = fullBlocks * 4;
byte[] newIn = new byte[inlen - processed]; short processed = (short)(fullBlocks * 4);
if (inlen - processed > 0) {
System.arraycopy(in, processed, newIn, 0, inlen - processed); // byte[] newIn = new byte[inlen - processed];
} // if (inlen - processed > 0) {
in = newIn; // System.arraycopy(in, processed, newIn, 0, inlen - processed);
// }
// in = newIn;
// inlen -= processed;
// 推进输入/输出指针与剩余长度
inPos += processed;
inlen -= processed; inlen -= processed;
outPos += processed;
} }
// 缓存剩余不足4字节的数据 // 缓存剩余不足4字节的数据
@@ -105,25 +159,46 @@ public final class Zuc256EncryptCtx {
// 完成加密处理 // 完成加密处理
public void finish(byte[] out) { public void finish(byte[] out) {
if (this == null || out == null) return; if (out == null) return;
// 处理缓冲区中剩余的不足4字节数据 // 处理缓冲区中剩余的不足4字节数据
if (this.buflen > 0) { if (this.buflen > 0) {
int keystream = zuc256GenerateKeyword(this.state); // int keystream = zuc256GenerateKeyword(this.state);
// 生成一个 32-bit 密钥字ks[0]=lo16, ks[1]=hi16
short[] ks = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT);
zuc256GenerateKeyword(this.state, ks);
// byte[] keystreamBytes = new byte[4];
// putU32(keystreamBytes, 0, keystream);
byte[] keystreamBytes = new byte[4]; byte[] keystreamBytes = new byte[4];
putU32(keystreamBytes, 0, keystream); putU32(keystreamBytes, (short)0, ks[0], ks[1]);
// 逐字节异或 // 逐字节异或
for (int i = 0; i < this.buflen; i++) { for (short i = 0; i < this.buflen; i++) {
out[i] = (byte) (this.buf[i] ^ keystreamBytes[i]); out[i] = (byte) (this.buf[i] ^ keystreamBytes[i]);
} }
} }
// 清理上下文 // 清理上下文
Arrays.fill(this.buf, (byte) 0); // Arrays.fill(this.buf, (byte) 0);
for (short i = 0; i < (short)this.buf.length; i++) {
this.buf[i] = (byte)0;
}
this.buflen = 0; this.buflen = 0;
Arrays.fill(this.state.LFSR, 0); // Arrays.fill(this.state.LFSR, 0);
this.state.R1 = 0; // LFSR 全部清零(高低位数组各 16 个元素)
this.state.R2 = 0; for (short i = 0; i < 16; i++) {
this.state.LFSR_lo[i] = 0;
this.state.LFSR_hi[i] = 0;
}
// this.state.R1 = 0;
// this.state.R2 = 0;
// R1、R2 清零
this.state.R1_lo = 0;
this.state.R1_hi = 0;
this.state.R2_lo = 0;
this.state.R2_hi = 0;
} }
} }

View File

@@ -321,6 +321,181 @@ public final class Zuc256Util {
output23Byte[22] = (byte) (((src[6] & 0x03) << 6) | src[7]); output23Byte[22] = (byte) (((src[6] & 0x03) << 6) | src[7]);
} }
/**
* 32位加法: (a_hi:a_lo) + (b_hi:b_lo)
* out[0] = lo, out[1] = hi
*/
static void add32(short a_lo, short a_hi,
short b_lo, short b_hi,
short[] out /*len=2*/) {
// ---- 低16位 ----
short lo_low = (short)((a_lo & 0x00FF) + (b_lo & 0x00FF));
short carry0 = (short)(((a_lo & 0x00FF) + (b_lo & 0x00FF)) >>> 8);
short a_lo_hi = (short)((a_lo >>> 8) & 0x00FF);
short b_lo_hi = (short)((b_lo >>> 8) & 0x00FF);
short lo_high = (short)(a_lo_hi + b_lo_hi + carry0);
short carry1 = (short)(lo_high >>> 8);
short lo_res = (short)((lo_high << 8) | (lo_low & 0x00FF));
// ---- 高16位 ----
short hi_low = (short)((a_hi & 0x00FF) + (b_hi & 0x00FF) + carry1);
short carry2 = (short)(hi_low >>> 8);
short a_hi_hi = (short)((a_hi >>> 8) & 0x00FF);
short b_hi_hi = (short)((b_hi >>> 8) & 0x00FF);
short hi_high = (short)(a_hi_hi + b_hi_hi + carry2);
short hi_res = (short)((hi_high << 8) | (hi_low & 0x00FF));
// ---- 输出 ----
out[0] = lo_res;
out[1] = hi_res;
}
/**
* 32位加法 + 返回进位
* 输入: (a_hi:a_lo) + (b_hi:b_lo)
* 输出: out[0]=lo, out[1]=hi
* 返回: 进位 (0或1)
*/
static short add32_with_carry(short a_lo, short a_hi,
short b_lo, short b_hi,
short[] out /*len=2*/) {
// 用你现成的 add32 得到结果
add32(a_lo, a_hi, b_lo, b_hi, out);
// 进位判断:如果结果 < 其中一个加数,则说明溢出
// (因为 add32 是 mod 2^32 的)
// 我们只看 hi 部分即可
int sum_hi = (out[1] & 0xFFFF);
int a_hi_u = (a_hi & 0xFFFF);
int b_hi_u = (b_hi & 0xFFFF);
if (sum_hi < a_hi_u || sum_hi < b_hi_u) {
return 1;
}
return 0;
}
/**
* 64位加法: a4 + b4 -> a4
* 输入输出: short[4],低到高 (a[0]=lo16, a[1]=hi16, a[2]=lo16 of high dword, a[3]=hi16 of high dword)
*/
static void add64(short[] a, short[] b) {
short[] tmp = new short[2];
// 低 32 位
short carry = add32_with_carry(a[0], a[1], b[0], b[1], tmp);
a[0] = tmp[0];
a[1] = tmp[1];
// 高 32 位 + carry
add32((short)(a[2] + (carry & 0xFFFF)), a[3], b[2], b[3], tmp);
a[2] = tmp[0];
a[3] = tmp[1];
}
// 32位异或
public static void xor32(short a_lo, short a_hi, short b_lo, short b_hi, short[] out /*len==2*/) {
out[0] = (short)(a_lo ^ b_lo);
out[1] = (short)(a_hi ^ b_hi);
}
/**
* 把32位数 b (b[0]=lo, b[1]=hi) 左移 k 位 (0 <= k < 32)
* 结果放到64位数 a (a[0]=最低16位 ... a[3]=最高16位)。
*/
static void create_64b_from_32b(short[] a/*len=4*/, short[] b/*len=2*/, short k) {
// 先清零
a[0] = 0; a[1] = 0; a[2] = 0; a[3] = 0;
if (k == 0) {
a[0] = b[0];
a[1] = b[1];
return;
}
if (k < 16) {
// lo << k
a[0] = (short)(b[0] << k);
// hi << k, 以及 lo >>> (16-k) 进位
a[1] = (short)((b[1] << k) | ((b[0] & 0xFFFF) >>> (16 - k)));
// hi >>> (16-k) 残留进到 a[2]
a[2] = (short)((b[1] & 0xFFFF) >>> (16 - k));
return;
}
if (k == 16) {
a[1] = b[0];
a[2] = b[1];
return;
}
// 16 < k < 32
short kk = (short)(k - 16);
a[1] = (short)(b[0] << kk);
a[2] = (short)((b[1] << kk) | ((b[0] & 0xFFFF) >>> (16 - kk)));
a[3] = (short)((b[1] & 0xFFFF) >>> (16 - kk));
}
/**
* (A & 0x7FFFFFFF),结果放在 out[4]只保留低32位并清掉最高bit。
*/
static void and64_7FFFFFFF_to32(short[] A, short[] out) {
out[0] = A[0]; // lo16
out[1] = (short)(A[1] & 0x7FFF); // hi16 (清除最高bit)
out[2] = 0;
out[3] = 0;
}
/**
* 64位无符号右移 31 位
* 输入: A[0..3] (short[4], A[0]最低16位)
* 输出: out[0..3]
*/
static void shr64u_31(short[] A, short[] out) {
// 先拼出 64bit 的逻辑,逐段右移
// A >>> 31 = (A >>> 16) >>> 15
// 先右移 16相当于丢掉 A[0],整体右移一半字
out[0] = A[1]; // 原 A[1] -> 新低16位
out[1] = A[2]; // 原 A[2]
out[2] = A[3]; // 原 A[3]
out[3] = 0; // 高位补0
// 再右移 15 位
short c0 = (short)((out[0] & 0xFFFF) >>> 15); // out[0] 最后一位变进位
short c1 = (short)((out[1] & 0xFFFF) >>> 15);
short c2 = (short)((out[2] & 0xFFFF) >>> 15);
out[0] = (short)(((out[0] & 0xFFFF) >>> 15) | (out[1] << 1));
out[1] = (short)(((out[1] & 0xFFFF) >>> 15) | (out[2] << 1));
out[2] = (short)(((out[2] & 0xFFFF) >>> 15) | (out[3] << 1));
out[3] = (short)((out[3] & 0xFFFF) >>> 15);
}
/**
* 32位无符号右移 1 位
* 输入: lo,hi (short) 表示 32 位数 (hi:高16位, lo:低16位)
* 输出: out[0]=lo, out[1]=hi
*/
static void shr32u1(short lo, short hi, short[] out) {
// >>>1先处理低16位
short newLo = (short)(((lo & 0xFFFF) >>> 1) | ((hi & 0x0001) << 15));
short newHi = (short)((hi & 0xFFFF) >>> 1);
out[0] = newLo;
out[1] = newHi;
}
/** 打印/*十六进制调试用TODO 生产/JC 环境可移除) *//* /** 打印/*十六进制调试用TODO 生产/JC 环境可移除) *//*
public static void printHex(String label, byte[] data, int len) { public static void printHex(String label, byte[] data, int len) {
System.out.print(label + ": "); System.out.print(label + ": ");