diff --git a/Project/Src/com/cscn/Zuc256Core.java b/Project/Src/com/cscn/Zuc256Core.java index eb5d84d..1a7b27b 100644 --- a/Project/Src/com/cscn/Zuc256Core.java +++ b/Project/Src/com/cscn/Zuc256Core.java @@ -1,14 +1,21 @@ package com.cscn; +import javacard.framework.JCSystem; + import static com.cscn.Zuc256Util.L1; import static com.cscn.Zuc256Util.L2; import static com.cscn.Zuc256Util.add31; -import static com.cscn.Zuc256Util.add31_pair; +import static com.cscn.Zuc256Util.add32; +import static com.cscn.Zuc256Util.add64; +import static com.cscn.Zuc256Util.and64_7FFFFFFF_to32; +import static com.cscn.Zuc256Util.create_64b_from_32b; import static com.cscn.Zuc256Util.makeU31; import static com.cscn.Zuc256Util.makeU32; import static com.cscn.Zuc256Util.rot31; -import static com.cscn.Zuc256Util.rot31_pair; +import static com.cscn.Zuc256Util.shr32u1; +import static com.cscn.Zuc256Util.shr64u_31; +import static com.cscn.Zuc256Util.xor32; /** * ZUC-256 核心:状态初始化、密钥字生成、密钥流生成。 @@ -19,278 +26,555 @@ public final class Zuc256Core { /** 初始化状态(Key + IV) */ public static void initState(Zuc256State state, byte[] key32, byte[] iv) { - zuc256SetMacKey(state, key32, iv, 0); + zuc256SetMacKey(state, key32, iv, (short)0); } /** 生成单个密钥字 */ - public static int generateKeyword(Zuc256State state) { - int[] LFSR = state.LFSR; - int R1 = state.R1; - int R2 = state.R2; - int X0, X1, X2, X3; - int W1, W2, U, V; - int Z; + public static void zuc256GenerateKeyword(Zuc256State state, short[] out) { +// int[] LFSR = state.LFSR; +// int R1 = state.R1; +// int R2 = state.R2; +// int X0, X1, X2, X3; +// int W1, W2, U, V; +// int Z; + + short[] LFSR_hi = state.LFSR_hi; + short[] LFSR_lo = state.LFSR_lo; + + // 工作寄存器(32位值的临时 out32 缓冲,全用short[2])[lo, hi] + short[] X0 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + short[] X1 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + short[] X2 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + short[] X3 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + + short[] R1 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + short[] R2 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + short[] W1 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + short[] W2 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + short[] U = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + short[] V = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + short[] Z = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + short[] TMP0 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + short[] TMP1 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + short[] TMP2 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + + // 载入 R1,R2 + R1[0] = state.R1_lo; + R1[1] = state.R1_hi; + R2[0] = state.R2_lo; + R2[1] = state.R2_hi; + // BitReconstruction4 - X0 = ((LFSR[15] & 0x7FFF8000) << 1) | (LFSR[14] & 0xFFFF); - X1 = ((LFSR[11] & 0xFFFF) << 16) | (LFSR[9] >>> 15); - X2 = ((LFSR[7] & 0xFFFF) << 16) | (LFSR[5] >>> 15); - X3 = ((LFSR[2] & 0xFFFF) << 16) | (LFSR[0] >>> 15); + // X0 = ((L15 & 0x7FFF8000) << 1) | (L14 & 0xFFFF) + short c15 = (short)((LFSR_lo[15] & 0x8000) >>> 15); // 左移产生的进位 + X0[1] = (short)(((LFSR_hi[15] & 0x7FFF) << 1) | c15); // hi + X0[0] = LFSR_lo[14]; // lo - Z = X3 ^ ((X0 ^ R1) + R2); + // X1 = ((L11 & 0xFFFF) << 16) | (L9 >>> 15) + X1[1] = LFSR_lo[11]; + X1[0] = (short)(((LFSR_lo[9] & 0x8000) >>> 15) | (LFSR_hi[9] << 1)); + + // X2 = ((L7 & 0xFFFF) << 16) | (L5 >>> 15) + X2[1] = LFSR_lo[7]; + X2[0] = (short)(((LFSR_lo[5] & 0x8000) >>> 15) | (LFSR_hi[5] << 1)); + + // X3 = ((L2 & 0xFFFF) << 16) | (L0 >>> 15) + X3[1] = LFSR_lo[2]; + X3[0] = (short)(((LFSR_lo[0] & 0x8000) >>> 15) | (LFSR_hi[0] << 1)); + + + + // ---- 输入:X0,X1,X2,X3,R1,R2 均为 short[2]; 输出:Z,W1,W2,U,V ---- + + // Z = X3 ^ ((X0 ^ R1) + R2) + xor32(X0[0], X0[1], R1[0], R1[1], TMP0); // TMP0 = X0 ^ R1 + add32(TMP0[0], TMP0[1], R2[0], R2[1], TMP1); // TMP1 = TMP0 + R2 + xor32(X3[0], X3[1], TMP1[0], TMP1[1], Z); // Z = X3 ^ TMP1 // F_(X1, X2) - W1 = R1 + X1; - W2 = R2 ^ X2; - U = L1((W1 << 16) | (W2 >>> 16)); - V = L2((W2 << 16) | (W1 >>> 16)); + // W1 = R1 + X1 + add32(R1[0], R1[1], X1[0], X1[1], W1); - R1 = makeU32(Zuc256Tables.S0[(U >>> 24) & 0xFF], - Zuc256Tables.S1[(U >>> 16) & 0xFF], - Zuc256Tables.S0[(U >>> 8) & 0xFF], - Zuc256Tables.S1[U & 0xFF]); + // W2 = R2 ^ X2 + xor32(R2[0], R2[1], X2[0], X2[1], W2); - R2 = makeU32(Zuc256Tables.S0[(V >>> 24) & 0xFF], - Zuc256Tables.S1[(V >>> 16) & 0xFF], - Zuc256Tables.S0[(V >>> 8) & 0xFF], - Zuc256Tables.S1[V & 0xFF]); + // U = L1((W1 << 16) | (W2 >>> 16)) + // (W1<<16): lo=0, hi=W1_lo + // (W2>>>16): lo=W2_hi, hi=0 + // OR 结果: lo=W2_hi, hi=W1_lo + L1(W2[1], W1[0], U); - // LFSRWithWorkMode - long a = LFSR[0]; - a += (long)LFSR[0] << 8; - a += (long)LFSR[4] << 20; - a += (long)LFSR[10] << 21; - a += (long)LFSR[13] << 17; - a += (long)LFSR[15] << 15; - a = (a & 0x7FFFFFFF) + (a >>> 31); - int v = (int) ((a & 0x7FFFFFFF) + (a >>> 31)); + // V = L2((W2 << 16) | (W1 >>> 16)) + // (W2<<16): lo=0, hi=W2_lo + // (W1>>>16): lo=W1_hi, hi=0 + // OR 结果: lo=W1_hi, hi=W2_lo + L2(W1[1], W2[0], V); - System.arraycopy(LFSR, 1, LFSR, 0, 15); - LFSR[15] = v; - state.R1 = R1; - state.R2 = R2; +// R1 = makeU32(Zuc256Tables.S0[(U >>> 24) & 0xFF], +// Zuc256Tables.S1[(U >>> 16) & 0xFF], +// Zuc256Tables.S0[(U >>> 8) & 0xFF], +// Zuc256Tables.S1[U & 0xFF]); + makeU32( + (short)(Zuc256Tables.S0[((U[1] >>> 8) & 0xFF)] & 0xFF), // (U >>> 24) & 0xFF + (short)(Zuc256Tables.S1[(U[1] & 0xFF)] & 0xFF), // (U >>> 16) & 0xFF + (short)(Zuc256Tables.S0[((U[0] >>> 8) & 0xFF)] & 0xFF), // (U >>> 8) & 0xFF + (short)(Zuc256Tables.S1[(U[0] & 0xFF)] & 0xFF), // (U >>> 0) & 0xFF + R1); + +// R2 = makeU32(Zuc256Tables.S0[(V >>> 24) & 0xFF], +// Zuc256Tables.S1[(V >>> 16) & 0xFF], +// Zuc256Tables.S0[(V >>> 8) & 0xFF], +// Zuc256Tables.S1[V & 0xFF]); + makeU32( + (short)(Zuc256Tables.S0[((V[1] >>> 8) & 0xFF)] & 0xFF), // (V >>> 24) & 0xFF + (short)(Zuc256Tables.S1[(V[1] & 0xFF)] & 0xFF), // (V >>> 16) & 0xFF + (short)(Zuc256Tables.S0[((V[0] >>> 8) & 0xFF)] & 0xFF), // (V >>> 8) & 0xFF + (short)(Zuc256Tables.S1[(V[0] & 0xFF)] & 0xFF), // (V >>> 0) & 0xFF + R2); + + + +// // LFSRWithWorkMode +// long a = LFSR[0]; +// a += (long)LFSR[0] << 8; +// a += (long)LFSR[4] << 20; +// a += (long)LFSR[10] << 21; +// a += (long)LFSR[13] << 17; +// a += (long)LFSR[15] << 15; + // ---- 先准备累加器 A (64位) ---- + short[] A = new short[4]; // 64位累加器,初始全0 + A[0] = 0; A[1] = 0; A[2] = 0; A[3] = 0; + + // 临时缓冲 + short[] tmp32 = new short[2]; // 保存一个32位数 (lo,hi) + short[] tmp64 = new short[4]; // 保存移位后的64位数 + + // a = LFSR[0] + tmp32[0] = state.LFSR_lo[0]; + tmp32[1] = state.LFSR_hi[0]; + create_64b_from_32b(tmp64, tmp32, (short)0); + add64(A, tmp64); + + // a += (LFSR[0] << 8) + create_64b_from_32b(tmp64, tmp32, (short)8); + add64(A, tmp64); + + // a += (LFSR[4] << 20) + tmp32[0] = state.LFSR_lo[4]; + tmp32[1] = state.LFSR_hi[4]; + create_64b_from_32b(tmp64, tmp32, (short)20); + add64(A, tmp64); + + // a += (LFSR[10] << 21) + tmp32[0] = state.LFSR_lo[10]; + tmp32[1] = state.LFSR_hi[10]; + create_64b_from_32b(tmp64, tmp32, (short)21); + add64(A, tmp64); + + // a += (LFSR[13] << 17) + tmp32[0] = state.LFSR_lo[13]; + tmp32[1] = state.LFSR_hi[13]; + create_64b_from_32b(tmp64, tmp32, (short)17); + add64(A, tmp64); + + // a += (LFSR[15] << 15) + tmp32[0] = state.LFSR_lo[15]; + tmp32[1] = state.LFSR_hi[15]; + create_64b_from_32b(tmp64, tmp32, (short)15); + add64(A, tmp64); + +// a = (a & 0x7FFFFFFF) + (a >>> 31); + // ---- 第一次折叠:a = (a & 0x7FFFFFFF) + (a >>> 31) ---- + short[] low31 = new short[4]; + short[] r31 = new short[4]; + + and64_7FFFFFFF_to32(A, low31); // low31 = A & 0x7FFFFFFF + shr64u_31(A, r31); // r31 = A >>> 31 + + A[0]=0; A[1]=0; A[2]=0; A[3]=0; + add64(A, low31); + add64(A, r31); +// int v = (int) ((a & 0x7FFFFFFF) + (a >>> 31)); + // ---- 第二次折叠,得到 v(32位)---- + short[] low31b = new short[4]; + short[] r31b = new short[4]; + short[] v64 = new short[4]; + + and64_7FFFFFFF_to32(A, low31b); + shr64u_31(A, r31b); + + v64[0]=0; v64[1]=0; v64[2]=0; v64[3]=0; + add64(v64, low31b); + add64(v64, r31b); + + // v = 32位,取 v64 的低两段 + short v_lo = v64[0]; + short v_hi = (short)(v64[1] & 0x7FFF); // 只保留31位 + +// System.arraycopy(LFSR, 1, LFSR, 0, 15); + // LFSR_lo 向左移 + System.arraycopy(state.LFSR_lo, 1, state.LFSR_lo, 0, 15); + // LFSR_hi 向左移 + System.arraycopy(state.LFSR_hi, 1, state.LFSR_hi, 0, 15); + +// LFSR[15] = v; + // ---- 写回 LFSR[15] ---- + state.LFSR_lo[15] = v_lo; + state.LFSR_hi[15] = v_hi; + +// state.R1 = R1; +// state.R2 = R2; + state.R1_lo = R1[0]; + state.R1_hi = R1[1]; + + state.R2_lo = R2[0]; + state.R2_hi = R2[1]; + + +// return Z; + out[0] = Z[0]; + out[1] = Z[1]; - return Z; } // 生成指定长度的密钥流 - public static void zuc256GenerateKeystream(Zuc256State state, int nwords, int[] keystream) { - int[] LFSR = state.LFSR; - int R1 = state.R1; - int R2 = state.R2; - int X0, X1, X2, X3; - int W1, W2, U, V; - - for (int i = 0; i < nwords; i++) { - // BitReconstruction4 - X0 = ((LFSR[15] & 0x7FFF8000) << 1) | (LFSR[14] & 0xFFFF); - X1 = ((LFSR[11] & 0xFFFF) << 16) | (LFSR[9] >>> 15); - X2 = ((LFSR[7] & 0xFFFF) << 16) | (LFSR[5] >>> 15); - X3 = ((LFSR[2] & 0xFFFF) << 16) | (LFSR[0] >>> 15); - - keystream[i] = X3 ^ ((X0 ^ R1) + R2); - - // F_(X1, X2) - W1 = R1 + X1; - W2 = R2 ^ X2; - U = L1((W1 << 16) | (W2 >>> 16)); - V = L2((W2 << 16) | (W1 >>> 16)); - - // S盒查找 - int T0 = Zuc256Tables.S0[(U >>> 24) & 0xFF] & 0xFF; - int T2 = Zuc256Tables.S0[(U >>> 8) & 0xFF] & 0xFF; - int T4 = Zuc256Tables.S0[(V >>> 24) & 0xFF] & 0xFF; - int T6 = Zuc256Tables.S0[(V >>> 8) & 0xFF] & 0xFF; - - int T1 = Zuc256Tables.S1[(U >>> 16) & 0xFF] & 0xFF; - int T3 = Zuc256Tables.S1[U & 0xFF] & 0xFF; - int T5 = Zuc256Tables.S1[(V >>> 16) & 0xFF] & 0xFF; - int T7 = Zuc256Tables.S1[V & 0xFF] & 0xFF; - - R1 = makeU32(T0, T1, T2, T3); - R2 = makeU32(T4, T5, T6, T7); - - // LFSRWithWorkMode - long a = LFSR[0]; - a += (long)LFSR[0] << 8; - a += (long)LFSR[4] << 20; - a += (long)LFSR[10] << 21; - a += (long)LFSR[13] << 17; - a += (long)LFSR[15] << 15; - a = (a & 0x7FFFFFFF) + (a >>> 31); - int v = (int) ((a & 0x7FFFFFFF) + (a >>> 31)); - - System.arraycopy(LFSR, 1, LFSR, 0, 15); - LFSR[15] = v; + public static void zuc256GenerateKeystream(Zuc256State state, + short nwords, + short[] keystream_hi, + short[] keystream_lo) { + // 临时存放一个 32 位关键字 + short[] tmp = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + for (short i = 0; i < nwords; i++) { + // 生成一个关键字 -> tmp[0]=lo, tmp[1]=hi + zuc256GenerateKeyword(state, tmp); + // 存入输出数组 + keystream_lo[i] = tmp[0]; + keystream_hi[i] = tmp[1]; } - - state.R1 = R1; - state.R2 = R2; } - // 生成单个密钥字 - public static int zuc256GenerateKeyword(Zuc256State state) { - int[] LFSR = state.LFSR; - int R1 = state.R1; - int R2 = state.R2; - int X0, X1, X2, X3; - int W1, W2, U, V; - int Z; - - // BitReconstruction4 - X0 = ((LFSR[15] & 0x7FFF8000) << 1) | (LFSR[14] & 0xFFFF); - X1 = ((LFSR[11] & 0xFFFF) << 16) | (LFSR[9] >>> 15); - X2 = ((LFSR[7] & 0xFFFF) << 16) | (LFSR[5] >>> 15); - X3 = ((LFSR[2] & 0xFFFF) << 16) | (LFSR[0] >>> 15); - - Z = X3 ^ ((X0 ^ R1) + R2); - - // F_(X1, X2) - W1 = R1 + X1; - W2 = R2 ^ X2; - U = L1((W1 << 16) | (W2 >>> 16)); - V = L2((W2 << 16) | (W1 >>> 16)); - - R1 = makeU32(Zuc256Tables.S0[(U >>> 24) & 0xFF], - Zuc256Tables.S1[(U >>> 16) & 0xFF], - Zuc256Tables.S0[(U >>> 8) & 0xFF], - Zuc256Tables.S1[U & 0xFF]); - - R2 = makeU32(Zuc256Tables.S0[(V >>> 24) & 0xFF], - Zuc256Tables.S1[(V >>> 16) & 0xFF], - Zuc256Tables.S0[(V >>> 8) & 0xFF], - Zuc256Tables.S1[V & 0xFF]); - - // LFSRWithWorkMode - long a = LFSR[0]; - a += (long)LFSR[0] << 8; - a += (long)LFSR[4] << 20; - a += (long)LFSR[10] << 21; - a += (long)LFSR[13] << 17; - a += (long)LFSR[15] << 15; - a = (a & 0x7FFFFFFF) + (a >>> 31); - int v = (int) ((a & 0x7FFFFFFF) + (a >>> 31)); - - System.arraycopy(LFSR, 1, LFSR, 0, 15); - LFSR[15] = v; - - state.R1 = R1; - state.R2 = R2; - - return Z; - } // 初始化MAC密钥 - private static void zuc256SetMacKey(Zuc256State key, byte[] K, byte[] IV, int macbits) { - int[] LFSR = key.LFSR; - int R1 = 0; - int R2 = 0; - int X0, X1, X2; - int W, W1, W2, U, V; - int[] D; + private static void zuc256SetMacKey(Zuc256State state, byte[] K, byte[] IV, short macbits) { + short[] D; + short[] TMP = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + short[] X0 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + short[] X1 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + short[] X2 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + short[] R1 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + short[] R2 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + short[] W = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + short[] W1 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + short[] W2 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + short[] U = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + short[] V = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + short[] T = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + short[] T2 = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); - int IV17 = (IV[17] & 0xFF) >> 2; - int IV18 = ((IV[17] & 0x03) << 4) | ((IV[18] & 0xFF) >> 4); - int IV19 = ((IV[18] & 0x0F) << 2) | ((IV[19] & 0xFF) >> 6); - int IV20 = IV[19] & 0x3F; - int IV21 = (IV[20] & 0xFF) >> 2; - int IV22 = ((IV[20] & 0x03) << 4) | ((IV[21] & 0xFF) >> 4); - int IV23 = ((IV[21] & 0x0F) << 2) | ((IV[22] & 0xFF) >> 6); - int IV24 = IV[22] & 0x3F; - D = (macbits / 32 < 3) ? Zuc256Tables.ZUC256_D[macbits / 32] : Zuc256Tables.ZUC256_D[3]; +// int IV17 = (IV[17] & 0xFF) >> 2; +// int IV18 = ((IV[17] & 0x03) << 4) | ((IV[18] & 0xFF) >> 4); +// int IV19 = ((IV[18] & 0x0F) << 2) | ((IV[19] & 0xFF) >> 6); +// int IV20 = IV[19] & 0x3F; +// int IV21 = (IV[20] & 0xFF) >> 2; +// int IV22 = ((IV[20] & 0x03) << 4) | ((IV[21] & 0xFF) >> 4); +// int IV23 = ((IV[21] & 0x0F) << 2) | ((IV[22] & 0xFF) >> 6); +// int IV24 = IV[22] & 0x3F; + // IV 拆分 + short IV17 = (short)((IV[17] & 0xFF) >>> 2); + short IV18 = (short)(((IV[17] & 0x03) << 4) | ((IV[18] & 0xFF) >>> 4)); + short IV19 = (short)(((IV[18] & 0x0F) << 2) | ((IV[19] & 0xFF) >>> 6)); + short IV20 = (short)(IV[19] & 0x3F); + short IV21 = (short)((IV[20] & 0xFF) >>> 2); + short IV22 = (short)(((IV[20] & 0x03) << 4) | ((IV[21] & 0xFF) >>> 4)); + short IV23 = (short)(((IV[21] & 0x0F) << 2) | ((IV[22] & 0xFF) >>> 6)); + short IV24 = (short)(IV[22] & 0x3F); - LFSR[0] = makeU31(K[0] & 0xFF, D[0], K[21] & 0xFF, K[16] & 0xFF); - LFSR[1] = makeU31(K[1] & 0xFF, D[1], K[22] & 0xFF, K[17] & 0xFF); - LFSR[2] = makeU31(K[2] & 0xFF, D[2], K[23] & 0xFF, K[18] & 0xFF); - LFSR[3] = makeU31(K[3] & 0xFF, D[3], K[24] & 0xFF, K[19] & 0xFF); - LFSR[4] = makeU31(K[4] & 0xFF, D[4], K[25] & 0xFF, K[20] & 0xFF); - LFSR[5] = makeU31(IV[0] & 0xFF, (D[5] | IV17), K[5] & 0xFF, K[26] & 0xFF); - LFSR[6] = makeU31(IV[1] & 0xFF, (D[6] | IV18), K[6] & 0xFF, K[27] & 0xFF); - LFSR[7] = makeU31(IV[10] & 0xFF, (D[7] | IV19), K[7] & 0xFF, IV[2] & 0xFF); - LFSR[8] = makeU31(K[8] & 0xFF, (D[8] | IV20), IV[3] & 0xFF, IV[11] & 0xFF); - LFSR[9] = makeU31(K[9] & 0xFF, (D[9] | IV21), IV[12] & 0xFF, IV[4] & 0xFF); - LFSR[10] = makeU31(IV[5] & 0xFF, (D[10] | IV22), K[10] & 0xFF, K[28] & 0xFF); - LFSR[11] = makeU31(K[11] & 0xFF, (D[11] | IV23), IV[6] & 0xFF, IV[13] & 0xFF); - LFSR[12] = makeU31(K[12] & 0xFF, (D[12] | IV24), IV[7] & 0xFF, IV[14] & 0xFF); - LFSR[13] = makeU31(K[13] & 0xFF, D[13], IV[15] & 0xFF, IV[8] & 0xFF); - LFSR[14] = makeU31(K[14] & 0xFF, (D[14] | (K[31] >>> 4)), IV[16] & 0xFF, IV[9] & 0xFF); - LFSR[15] = makeU31(K[15] & 0xFF, (D[15] | (K[31] & 0x0F)), K[30] & 0xFF, K[29] & 0xFF); +// D = (macbits / 32 < 3) ? Zuc256Tables.ZUC256_D[macbits / 32] : Zuc256Tables.ZUC256_D[3]; + if ((macbits / 32) < 3) { + D = Zuc256Tables.ZUC256_D[macbits / 32]; + } else { + D = Zuc256Tables.ZUC256_D[3]; + } - for (int i = 0; i < 32; i++) { + + short[] tmp = new short[2]; // 临时存储 makeU31 输出 (lo,hi) + + // 逐项装载 LFSR +// LFSR[0] = makeU31(K[0] & 0xFF, D[0], K[21] & 0xFF, K[16] & 0xFF); + makeU31((short)(K[0] & 0xFF), (short)D[0], (short)(K[21] & 0xFF), (short)(K[16] & 0xFF), tmp); + state.LFSR_lo[0] = tmp[0]; state.LFSR_hi[0] = tmp[1]; + +// LFSR[1] = makeU31(K[1] & 0xFF, D[1], K[22] & 0xFF, K[17] & 0xFF); + makeU31((short)(K[1] & 0xFF), (short)D[1], (short)(K[22] & 0xFF), (short)(K[17] & 0xFF), tmp); + state.LFSR_lo[1] = tmp[0]; state.LFSR_hi[1] = tmp[1]; + +// LFSR[2] = makeU31(K[2] & 0xFF, D[2], K[23] & 0xFF, K[18] & 0xFF); + makeU31((short)(K[2] & 0xFF), (short)D[2], (short)(K[23] & 0xFF), (short)(K[18] & 0xFF), tmp); + state.LFSR_lo[2] = tmp[0]; state.LFSR_hi[2] = tmp[1]; + +// LFSR[3] = makeU31(K[3] & 0xFF, D[3], K[24] & 0xFF, K[19] & 0xFF); + makeU31((short)(K[3] & 0xFF), (short)D[3], (short)(K[24] & 0xFF), (short)(K[19] & 0xFF), tmp); + state.LFSR_lo[3] = tmp[0]; state.LFSR_hi[3] = tmp[1]; + +// LFSR[4] = makeU31(K[4] & 0xFF, D[4], K[25] & 0xFF, K[20] & 0xFF); + makeU31((short)(K[4] & 0xFF), (short)D[4], (short)(K[25] & 0xFF), (short)(K[20] & 0xFF), tmp); + state.LFSR_lo[4] = tmp[0]; state.LFSR_hi[4] = tmp[1]; + +// LFSR[5] = makeU31(IV[0] & 0xFF, (D[5] | IV17), K[5] & 0xFF, K[26] & 0xFF); + makeU31((short)(IV[0] & 0xFF), (short)(D[5] | IV17), (short)(K[5] & 0xFF), (short)(K[26] & 0xFF), tmp); + state.LFSR_lo[5] = tmp[0]; state.LFSR_hi[5] = tmp[1]; + +// LFSR[6] = makeU31(IV[1] & 0xFF, (D[6] | IV18), K[6] & 0xFF, K[27] & 0xFF); + makeU31((short)(IV[1] & 0xFF), (short)(D[6] | IV18), (short)(K[6] & 0xFF), (short)(K[27] & 0xFF), tmp); + state.LFSR_lo[6] = tmp[0]; state.LFSR_hi[6] = tmp[1]; + +// LFSR[7] = makeU31(IV[10] & 0xFF, (D[7] | IV19), K[7] & 0xFF, IV[2] & 0xFF); + makeU31((short)(IV[10] & 0xFF), (short)(D[7] | IV19), (short)(K[7] & 0xFF), (short)(IV[2] & 0xFF), tmp); + state.LFSR_lo[7] = tmp[0]; state.LFSR_hi[7] = tmp[1]; + +// LFSR[8] = makeU31(K[8] & 0xFF, (D[8] | IV20), IV[3] & 0xFF, IV[11] & 0xFF); + makeU31((short)(K[8] & 0xFF), (short)(D[8] | IV20), (short)(IV[3] & 0xFF), (short)(IV[11] & 0xFF), tmp); + state.LFSR_lo[8] = tmp[0]; state.LFSR_hi[8] = tmp[1]; + +// LFSR[9] = makeU31(K[9] & 0xFF, (D[9] | IV21), IV[12] & 0xFF, IV[4] & 0xFF); + makeU31((short)(K[9] & 0xFF), (short)(D[9] | IV21), (short)(IV[12] & 0xFF), (short)(IV[4] & 0xFF), tmp); + state.LFSR_lo[9] = tmp[0]; state.LFSR_hi[9] = tmp[1]; + +// LFSR[10] = makeU31(IV[5] & 0xFF, (D[10] | IV22), K[10] & 0xFF, K[28] & 0xFF); + makeU31((short)(IV[5] & 0xFF), (short)(D[10] | IV22), (short)(K[10] & 0xFF), (short)(K[28] & 0xFF), tmp); + state.LFSR_lo[10] = tmp[0]; state.LFSR_hi[10] = tmp[1]; + +// LFSR[11] = makeU31(K[11] & 0xFF, (D[11] | IV23), IV[6] & 0xFF, IV[13] & 0xFF); + makeU31((short)(K[11] & 0xFF), (short)(D[11] | IV23), (short)(IV[6] & 0xFF), (short)(IV[13] & 0xFF), tmp); + state.LFSR_lo[11] = tmp[0]; state.LFSR_hi[11] = tmp[1]; + +// LFSR[12] = makeU31(K[12] & 0xFF, (D[12] | IV24), IV[7] & 0xFF, IV[14] & 0xFF); + makeU31((short)(K[12] & 0xFF), (short)(D[12] | IV24), (short)(IV[7] & 0xFF), (short)(IV[14] & 0xFF), tmp); + state.LFSR_lo[12] = tmp[0]; state.LFSR_hi[12] = tmp[1]; + +// LFSR[13] = makeU31(K[13] & 0xFF, D[13], IV[15] & 0xFF, IV[8] & 0xFF); + makeU31((short)(K[13] & 0xFF), (short)D[13], (short)(IV[15] & 0xFF), (short)(IV[8] & 0xFF), tmp); + state.LFSR_lo[13] = tmp[0]; state.LFSR_hi[13] = tmp[1]; + +// LFSR[14] = makeU31(K[14] & 0xFF, (D[14] | (K[31] >>> 4)), IV[16] & 0xFF, IV[9] & 0xFF); + makeU31((short)(K[14] & 0xFF), (short)(D[14] | ((K[31] & 0xFF) >>> 4)), (short)(IV[16] & 0xFF), (short)(IV[9] & 0xFF), tmp); + state.LFSR_lo[14] = tmp[0]; state.LFSR_hi[14] = tmp[1]; + +// LFSR[15] = makeU31(K[15] & 0xFF, (D[15] | (K[31] & 0x0F)), K[30] & 0xFF, K[29] & 0xFF); + makeU31((short)(K[15] & 0xFF), (short)(D[15] | (K[31] & 0x0F)), (short)(K[30] & 0xFF), (short)(K[29] & 0xFF), tmp); + state.LFSR_lo[15] = tmp[0]; state.LFSR_hi[15] = tmp[1]; + + + for (short i = 0; i < 32; i++) { // BitReconstruction3 - X0 = ((LFSR[15] & 0x7FFF8000) << 1) | (LFSR[14] & 0xFFFF); - X1 = ((LFSR[11] & 0xFFFF) << 16) | (LFSR[9] >>> 15); - X2 = ((LFSR[7] & 0xFFFF) << 16) | (LFSR[5] >>> 15); +// X0 = ((LFSR[15] & 0x7FFF8000) << 1) | (LFSR[14] & 0xFFFF); + // X0 = ((L15 & 0x7FFF8000)<<1) | (L14 & 0xFFFF) + short c15 = (short)((state.LFSR_lo[15] & 0x8000) >>> 15); + X0[1] = (short)(((state.LFSR_hi[15] & 0x7FFF) << 1) | c15); + X0[0] = state.LFSR_lo[14]; + +// X1 = ((LFSR[11] & 0xFFFF) << 16) | (LFSR[9] >>> 15); + // X1 = ((L11 & 0xFFFF)<<16) | (L9>>>15) + X1[1] = state.LFSR_lo[11]; + X1[0] = (short)(((state.LFSR_lo[9] & 0x8000) >>> 15) | (state.LFSR_hi[9] << 1)); + +// X2 = ((LFSR[7] & 0xFFFF) << 16) | (LFSR[5] >>> 15); + // X2 = ((L7 & 0xFFFF)<<16) | (L5>>>15) + X2[1] = state.LFSR_lo[7]; + X2[0] = (short)(((state.LFSR_lo[5] & 0x8000) >>> 15) | (state.LFSR_hi[5] << 1)); + + // F(X0, X1, X2) - W = (X0 ^ R1) + R2; - W1 = R1 + X1; - W2 = R2 ^ X2; - U = L1((W1 << 16) | (W2 >>> 16)); - V = L2((W2 << 16) | (W1 >>> 16)); + // W = (X0 ^ R1) + R2 + xor32(X0[0], X0[1], R1[0], R1[1], TMP); + add32(TMP[0], TMP[1], R2[0], R2[1], W); - R1 = makeU32(Zuc256Tables.S0[(U >>> 24) & 0xFF], - Zuc256Tables.S1[(U >>> 16) & 0xFF], - Zuc256Tables.S0[(U >>> 8) & 0xFF], - Zuc256Tables.S1[U & 0xFF]); + // W1 = R1 + X1 + add32(R1[0], R1[1], X1[0], X1[1], W1); - R2 = makeU32(Zuc256Tables.S0[(V >>> 24) & 0xFF], - Zuc256Tables.S1[(V >>> 16) & 0xFF], - Zuc256Tables.S0[(V >>> 8) & 0xFF], - Zuc256Tables.S1[V & 0xFF]); + // W2 = R2 ^ X2 + xor32(R2[0], R2[1], X2[0], X2[1], W2); + + // U = L1((W1<<16) | (W2>>>16)) + L1(W2[1], W1[0], U); + + // V = L2((W2<<16) | (W1>>>16)) + L2(W1[1], W2[0], V); + +// R1 = makeU32(Zuc256Tables.S0[(U >>> 24) & 0xFF], +// Zuc256Tables.S1[(U >>> 16) & 0xFF], +// Zuc256Tables.S0[(U >>> 8) & 0xFF], +// Zuc256Tables.S1[U & 0xFF]); +// +// R2 = makeU32(Zuc256Tables.S0[(V >>> 24) & 0xFF], +// Zuc256Tables.S1[(V >>> 16) & 0xFF], +// Zuc256Tables.S0[(V >>> 8) & 0xFF], +// Zuc256Tables.S1[V & 0xFF]); + // 更新 R1,R2 + makeU32( + (short)(Zuc256Tables.S0[((U[1] >>> 8) & 0xFF)] & 0xFF), + (short)(Zuc256Tables.S1[(U[1] & 0xFF)] & 0xFF), + (short)(Zuc256Tables.S0[((U[0] >>> 8) & 0xFF)] & 0xFF), + (short)(Zuc256Tables.S1[(U[0] & 0xFF)] & 0xFF), + R1); + + makeU32( + (short)(Zuc256Tables.S0[((V[1] >>> 8) & 0xFF)] & 0xFF), + (short)(Zuc256Tables.S1[(V[1] & 0xFF)] & 0xFF), + (short)(Zuc256Tables.S0[((V[0] >>> 8) & 0xFF)] & 0xFF), + (short)(Zuc256Tables.S1[(V[0] & 0xFF)] & 0xFF), + R2); // LFSRWithInitialisationMode(W >> 1) - int v = LFSR[0]; - v = add31(v, rot31(LFSR[0], 8)); - v = add31(v, rot31(LFSR[4], 20)); - v = add31(v, rot31(LFSR[10], 21)); - v = add31(v, rot31(LFSR[13], 17)); - v = add31(v, rot31(LFSR[15], 15)); - v = add31(v, W >>> 1); +// int v = LFSR[0]; + V[0] = state.LFSR_lo[0]; + V[1] = state.LFSR_hi[0]; - System.arraycopy(LFSR, 1, LFSR, 0, 15); - LFSR[15] = v; + // v = add31(v, rot31(state.LFSR[0], 8)) + rot31(state.LFSR_lo[0], state.LFSR_hi[0], (short)8, T); + add31(V[0], V[1], T[0], T[1], V); + +// v = add31(v, rot31(state.LFSR[4], 20)); + rot31(state.LFSR_lo[4], state.LFSR_hi[4], (short)20, T); + add31(V[0], V[1], T[0], T[1], V); + +// v = add31(v, rot31(state.LFSR[10], 21)); + rot31(state.LFSR_lo[10], state.LFSR_hi[10], (short)21, T); + add31(V[0], V[1], T[0], T[1], V); + +// v = add31(v, rot31(state.LFSR[13], 17)); + rot31(state.LFSR_lo[13], state.LFSR_hi[13], (short)17, T); + add31(V[0], V[1], T[0], T[1], V); + +// v = add31(v, rot31(state.LFSR[15], 15)); + rot31(state.LFSR_lo[15], state.LFSR_hi[15], (short)15, T); + add31(V[0], V[1], T[0], T[1], V); + +// v = add31(v, W >>> 1); + shr32u1(W[0], W[1], T2); // T2[0]=lo, T2[1]=hi(无符号>>>1) + T2[1] = (short)(T2[1] & 0x7FFF); // 只保留31位 + add31(V[0], V[1], T2[0], T2[1], V); + + // System.arraycopy(state.LFSR, 1, state.LFSR, 0, 15) + System.arraycopy(state.LFSR_lo, 1, state.LFSR_lo, 0, 15); + System.arraycopy(state.LFSR_hi, 1, state.LFSR_hi, 0, 15); + +// state.LFSR[15] = v; + state.LFSR_lo[15] = V[0]; + state.LFSR_hi[15] = (short)(V[1] & 0x7FFF); } // BitReconstruction2 - X1 = ((LFSR[11] & 0xFFFF) << 16) | (LFSR[9] >>> 15); - X2 = ((LFSR[7] & 0xFFFF) << 16) | (LFSR[5] >>> 15); +// X1 = ((LFSR[11] & 0xFFFF) << 16) | (LFSR[9] >>> 15); + X1[1] = state.LFSR_lo[11]; + X1[0] = (short)(((state.LFSR_lo[9] & 0x8000) >>> 15) | (state.LFSR_hi[9] << 1)); + +// X2 = ((LFSR[7] & 0xFFFF) << 16) | (LFSR[5] >>> 15); + X2[1] = state.LFSR_lo[7]; + X2[0] = (short)(((state.LFSR_lo[5] & 0x8000) >>> 15) | (state.LFSR_hi[5] << 1)); // F_(X1, X2) - W1 = R1 + X1; - W2 = R2 ^ X2; - U = L1((W1 << 16) | (W2 >>> 16)); - V = L2((W2 << 16) | (W1 >>> 16)); +// W1 = R1 + X1; + add32(state.R1_lo, state.R1_hi, X1[0], X1[1], W1); // W1 = R1 + X1 +// W2 = R2 ^ X2; + xor32(state.R2_lo, state.R2_hi, X2[0], X2[1], W2); // W2 = R2 ^ X2 - R1 = makeU32(Zuc256Tables.S0[(U >>> 24) & 0xFF], - Zuc256Tables.S1[(U >>> 16) & 0xFF], - Zuc256Tables.S0[(U >>> 8) & 0xFF], - Zuc256Tables.S1[U & 0xFF]); +// U = L1((W1 << 16) | (W2 >>> 16)); + // U = L1((W1<<16)|(W2>>>16)) → lo=W2_hi, hi=W1_lo + L1(W2[1], W1[0], U); - R2 = makeU32(Zuc256Tables.S0[(V >>> 24) & 0xFF], - Zuc256Tables.S1[(V >>> 16) & 0xFF], - Zuc256Tables.S0[(V >>> 8) & 0xFF], - Zuc256Tables.S1[V & 0xFF]); +// V = L2((W2 << 16) | (W1 >>> 16)); + // V = L2((W2<<16)|(W1>>>16)) → lo=W1_hi, hi=W2_lo + L2(W1[1], W2[0], V); + +// R1 = makeU32(Zuc256Tables.S0[(U >>> 24) & 0xFF], +// Zuc256Tables.S1[(U >>> 16) & 0xFF], +// Zuc256Tables.S0[(U >>> 8) & 0xFF], +// Zuc256Tables.S1[U & 0xFF]); + makeU32( + (short)(Zuc256Tables.S0[((U[1] >>> 8) & 0xFF)] & 0xFF), + (short)(Zuc256Tables.S1[(U[1] & 0xFF)] & 0xFF), + (short)(Zuc256Tables.S0[((U[0] >>> 8) & 0xFF)] & 0xFF), + (short)(Zuc256Tables.S1[(U[0] & 0xFF)] & 0xFF), + R1); + +// R2 = makeU32(Zuc256Tables.S0[(V >>> 24) & 0xFF], +// Zuc256Tables.S1[(V >>> 16) & 0xFF], +// Zuc256Tables.S0[(V >>> 8) & 0xFF], +// Zuc256Tables.S1[V & 0xFF]); + makeU32( + (short)(Zuc256Tables.S0[((V[1] >>> 8) & 0xFF)] & 0xFF), + (short)(Zuc256Tables.S1[(V[1] & 0xFF)] & 0xFF), + (short)(Zuc256Tables.S0[((V[0] >>> 8) & 0xFF)] & 0xFF), + (short)(Zuc256Tables.S1[(V[0] & 0xFF)] & 0xFF), + R2); + + // ---- LFSRWithWorkMode ---- + short[] A = new short[4]; // 64位累加器 + short[] tmp32 = new short[2]; + short[] tmp64 = new short[4]; // LFSRWithWorkMode - long a = LFSR[0]; - a += (long)LFSR[0] << 8; - a += (long)LFSR[4] << 20; - a += (long)LFSR[10] << 21; - a += (long)LFSR[13] << 17; - a += (long)LFSR[15] << 15; - a = (a & 0x7FFFFFFF) + (a >>> 31); - int v = (int) ((a & 0x7FFFFFFF) + (a >>> 31)); +// long a = LFSR[0]; + tmp32[0] = state.LFSR_lo[0]; + tmp32[1] = state.LFSR_hi[0]; + create_64b_from_32b(tmp64, tmp32, (short)0); add64(A, tmp64); - System.arraycopy(LFSR, 1, LFSR, 0, 15); - LFSR[15] = v; +// a += (long)LFSR[0] << 8; + create_64b_from_32b(tmp64, tmp32, (short)8); add64(A, tmp64); - key.R1 = R1; - key.R2 = R2; +// a += (long)LFSR[4] << 20; + tmp32[0] = state.LFSR_lo[4]; tmp32[1] = state.LFSR_hi[4]; + create_64b_from_32b(tmp64, tmp32, (short)20); add64(A, tmp64); + +// a += (long)LFSR[10] << 21; + tmp32[0] = state.LFSR_lo[10]; tmp32[1] = state.LFSR_hi[10]; + create_64b_from_32b(tmp64, tmp32, (short)21); add64(A, tmp64); + +// a += (long)LFSR[13] << 17; + tmp32[0] = state.LFSR_lo[13]; tmp32[1] = state.LFSR_hi[13]; + create_64b_from_32b(tmp64, tmp32, (short)17); add64(A, tmp64); + +// a += (long)LFSR[15] << 15; + tmp32[0] = state.LFSR_lo[15]; tmp32[1] = state.LFSR_hi[15]; + create_64b_from_32b(tmp64, tmp32, (short)15); add64(A, tmp64); + +// a = (a & 0x7FFFFFFF) + (a >>> 31); + short[] low31 = new short[4]; + short[] r31 = new short[4]; + and64_7FFFFFFF_to32(A, low31); + shr64u_31(A, r31); + + short[] v64 = new short[4]; + add64(v64, low31); + add64(v64, r31); + +// int v = (int) ((a & 0x7FFFFFFF) + (a >>> 31)); + and64_7FFFFFFF_to32(v64, low31); + shr64u_31(v64, r31); + short[] vv = new short[4]; + add64(vv, low31); + add64(vv, r31); + + short v_lo = vv[0]; + short v_hi = (short)(vv[1] & 0x7FFF); + +// LFSR左移 +// System.arraycopy(LFSR, 1, LFSR, 0, 15); +// LFSR[15] = v; + System.arraycopy(state.LFSR_lo, 1, state.LFSR_lo, 0, 15); + System.arraycopy(state.LFSR_hi, 1, state.LFSR_hi, 0, 15); + state.LFSR_lo[15] = v_lo; + state.LFSR_hi[15] = v_hi; + + state.R1_lo = R1[0]; state.R1_hi = R1[1]; + state.R2_lo = R2[0]; state.R2_hi = R2[1]; } } diff --git a/Project/Src/com/cscn/Zuc256EncryptCtx.java b/Project/Src/com/cscn/Zuc256EncryptCtx.java index 65d22bd..979529b 100644 --- a/Project/Src/com/cscn/Zuc256EncryptCtx.java +++ b/Project/Src/com/cscn/Zuc256EncryptCtx.java @@ -1,9 +1,12 @@ -package com.cscn.zuc256; +package com.cscn; -import com.cscn.Zuc256Core; -import com.cscn.Zuc256State; +import javacard.framework.JCSystem; -import java.util.Arrays; +import static com.cscn.Zuc256Core.zuc256GenerateKeystream; +import static com.cscn.Zuc256Core.zuc256GenerateKeyword; +import static com.cscn.Zuc256Util.getU32; +import static com.cscn.Zuc256Util.putU32; +import static com.cscn.Zuc256Util.xor32; /** @@ -12,7 +15,7 @@ import java.util.Arrays; public final class Zuc256EncryptCtx { Zuc256State state; byte[] buf; - int buflen; + short buflen; public Zuc256EncryptCtx(Zuc256State state, byte[] buf){ this.state = state; @@ -31,69 +34,120 @@ public final class Zuc256EncryptCtx { // 初始化加密上下文 public void init(byte[] key32, byte[] iv) { - Arrays.fill(this.buf, (byte) 0); +// Arrays.fill(this.buf, (byte) 0); + for (short i = 0; i < (short)this.buf.length; i++) { + this.buf[i] = (byte)0; + } this.buflen = 0; Zuc256Core.initState(this.state, key32, iv); } // 分阶段处理加密数据 - public void update(byte[] in, int inlen, byte[] out) { + public void update(byte[] in, short inlen, byte[] out) { if (in == null || out == null || inlen == 0) return; + short inPos = 0; // 输入偏移 + short outPos = 0; // 输出偏移 + // 处理缓冲区中剩余的非4字节数据 if (this.buflen > 0) { - int need = 4 - this.buflen; - int copy = Math.min(inlen, need); +// int need = 4 - this.buflen; + short need = (short)(4 - this.buflen); +// int copy = Math.min(inlen, need); + short copy = (short)((inlen < need) ? inlen : need); System.arraycopy(in, 0, this.buf, this.buflen, copy); this.buflen += copy; // 调整输入指针和长度 - byte[] newIn = new byte[inlen - copy]; - if (inlen - copy > 0) { - System.arraycopy(in, copy, newIn, 0, inlen - copy); - } - in = newIn; - inlen -= copy; +// byte[] newIn = new byte[inlen - copy]; +// if (inlen - copy > 0) { +// System.arraycopy(in, copy, newIn, 0, inlen - copy); +// } +// in = newIn; +// inlen -= copy; + // 推进输入指针与剩余长度 + inPos += copy; + inlen -= copy; // 缓冲区已满,处理一个完整的4字节块 if (this.buflen == 4) { - int keystream = zuc256GenerateKeyword(this.state); - int plain = getU32(this.buf, 0); - putU32(out, 0, plain ^ keystream); +// int keystream = zuc256GenerateKeyword(this.state); + short[] ks = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + zuc256GenerateKeyword(this.state, ks); // ks[0]=lo, ks[1]=hi + +// int plain = getU32(this.buf, 0); + // 取出 4 字节明文 → plain[0]=lo, plain[1]=hi + short[] plain = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + getU32(this.buf, (short)0, plain); + +// putU32(out, 0, plain ^ keystream); + // plain ^ ks → res + short[] res = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + xor32(plain[0], plain[1], ks[0], ks[1], res); + // 写回 out 的前4字节 + putU32(out, outPos, res[0], res[1]); this.buflen = 0; - Arrays.fill(this.buf, (byte) 0); +// Arrays.fill(this.buf, (byte) 0); + for (short i = 0; i < (short)this.buf.length; i++) { + this.buf[i] = (byte)0; + } // 调整输出指针 - byte[] newOut = new byte[out.length - 4]; - if (out.length - 4 > 0) { - System.arraycopy(out, 4, newOut, 0, out.length - 4); - } - out = newOut; +// byte[] newOut = new byte[out.length - 4]; +// if (out.length - 4 > 0) { +// System.arraycopy(out, 4, newOut, 0, out.length - 4); +// } +// out = newOut; + // 这里C实现就是直接指针+4的。JavaSE实现搞这个new干嘛。。 + outPos += 4; } } // 处理完整的4字节块 - int fullBlocks = inlen / 4; +// int fullBlocks = inlen / 4; + short fullBlocks = (short) (inlen / 4); if (fullBlocks > 0) { - int[] keystream = new int[fullBlocks]; - zuc256GenerateKeystream(this.state, fullBlocks, keystream); +// int[] keystream = new int[fullBlocks]; + short[] ks_hi = JCSystem.makeTransientShortArray(fullBlocks, JCSystem.CLEAR_ON_DESELECT); + short[] ks_lo = JCSystem.makeTransientShortArray(fullBlocks, JCSystem.CLEAR_ON_DESELECT); + +// zuc256GenerateKeystream(this.state, fullBlocks, keystream); + zuc256GenerateKeystream(this.state, fullBlocks, ks_hi, ks_lo); + + // 临时:装一个32位字 + short[] word = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); // 逐块异或加密 - for (int i = 0; i < fullBlocks; i++) { - int plain = getU32(in, i * 4); - putU32(out, i * 4, plain ^ keystream[i]); + for (short i = 0; i < fullBlocks; i++) { +// int plain = getU32(in, i * 4); + short off = (short) (i << 2); // i*4 + // 读明文 + getU32(in, (short)(inPos+off), word); // word[0]=lo, word[1]=hi + +// putU32(out, i * 4, plain ^ keystream[i]); + // XOR keystream + word[0] = (short)(word[0] ^ ks_lo[i]); + word[1] = (short)(word[1] ^ ks_hi[i]); + // 写密文 + putU32(out, (short) (outPos+off), word[0], word[1]); } // 调整输入指针和长度 - int processed = fullBlocks * 4; - byte[] newIn = new byte[inlen - processed]; - if (inlen - processed > 0) { - System.arraycopy(in, processed, newIn, 0, inlen - processed); - } - in = newIn; - inlen -= processed; +// int processed = fullBlocks * 4; + short processed = (short)(fullBlocks * 4); + +// byte[] newIn = new byte[inlen - processed]; +// if (inlen - processed > 0) { +// System.arraycopy(in, processed, newIn, 0, inlen - processed); +// } +// in = newIn; +// inlen -= processed; + // 推进输入/输出指针与剩余长度 + inPos += processed; + inlen -= processed; + outPos += processed; } // 缓存剩余不足4字节的数据 @@ -105,25 +159,46 @@ public final class Zuc256EncryptCtx { // 完成加密处理 public void finish(byte[] out) { - if (this == null || out == null) return; + if (out == null) return; // 处理缓冲区中剩余的不足4字节数据 if (this.buflen > 0) { - int keystream = zuc256GenerateKeyword(this.state); +// int keystream = zuc256GenerateKeyword(this.state); + // 生成一个 32-bit 密钥字:ks[0]=lo16, ks[1]=hi16 + short[] ks = JCSystem.makeTransientShortArray((short)2, JCSystem.CLEAR_ON_DESELECT); + zuc256GenerateKeyword(this.state, ks); + +// byte[] keystreamBytes = new byte[4]; +// putU32(keystreamBytes, 0, keystream); byte[] keystreamBytes = new byte[4]; - putU32(keystreamBytes, 0, keystream); + putU32(keystreamBytes, (short)0, ks[0], ks[1]); // 逐字节异或 - for (int i = 0; i < this.buflen; i++) { + for (short i = 0; i < this.buflen; i++) { out[i] = (byte) (this.buf[i] ^ keystreamBytes[i]); } } // 清理上下文 - Arrays.fill(this.buf, (byte) 0); +// Arrays.fill(this.buf, (byte) 0); + for (short i = 0; i < (short)this.buf.length; i++) { + this.buf[i] = (byte)0; + } + this.buflen = 0; - Arrays.fill(this.state.LFSR, 0); - this.state.R1 = 0; - this.state.R2 = 0; +// Arrays.fill(this.state.LFSR, 0); + // LFSR 全部清零(高低位数组各 16 个元素) + for (short i = 0; i < 16; i++) { + this.state.LFSR_lo[i] = 0; + this.state.LFSR_hi[i] = 0; + } + +// this.state.R1 = 0; +// this.state.R2 = 0; +// R1、R2 清零 + this.state.R1_lo = 0; + this.state.R1_hi = 0; + this.state.R2_lo = 0; + this.state.R2_hi = 0; } } diff --git a/Project/Src/com/cscn/Zuc256Util.java b/Project/Src/com/cscn/Zuc256Util.java index d57b598..a6cd657 100644 --- a/Project/Src/com/cscn/Zuc256Util.java +++ b/Project/Src/com/cscn/Zuc256Util.java @@ -321,6 +321,181 @@ public final class Zuc256Util { output23Byte[22] = (byte) (((src[6] & 0x03) << 6) | src[7]); } + /** + * 32位加法: (a_hi:a_lo) + (b_hi:b_lo) + * out[0] = lo, out[1] = hi + */ + static void add32(short a_lo, short a_hi, + short b_lo, short b_hi, + short[] out /*len=2*/) { + + // ---- 低16位 ---- + short lo_low = (short)((a_lo & 0x00FF) + (b_lo & 0x00FF)); + short carry0 = (short)(((a_lo & 0x00FF) + (b_lo & 0x00FF)) >>> 8); + + short a_lo_hi = (short)((a_lo >>> 8) & 0x00FF); + short b_lo_hi = (short)((b_lo >>> 8) & 0x00FF); + short lo_high = (short)(a_lo_hi + b_lo_hi + carry0); + short carry1 = (short)(lo_high >>> 8); + + short lo_res = (short)((lo_high << 8) | (lo_low & 0x00FF)); + + // ---- 高16位 ---- + short hi_low = (short)((a_hi & 0x00FF) + (b_hi & 0x00FF) + carry1); + short carry2 = (short)(hi_low >>> 8); + + short a_hi_hi = (short)((a_hi >>> 8) & 0x00FF); + short b_hi_hi = (short)((b_hi >>> 8) & 0x00FF); + short hi_high = (short)(a_hi_hi + b_hi_hi + carry2); + + short hi_res = (short)((hi_high << 8) | (hi_low & 0x00FF)); + + // ---- 输出 ---- + out[0] = lo_res; + out[1] = hi_res; + } + + /** + * 32位加法 + 返回进位 + * 输入: (a_hi:a_lo) + (b_hi:b_lo) + * 输出: out[0]=lo, out[1]=hi + * 返回: 进位 (0或1) + */ + static short add32_with_carry(short a_lo, short a_hi, + short b_lo, short b_hi, + short[] out /*len=2*/) { + // 用你现成的 add32 得到结果 + add32(a_lo, a_hi, b_lo, b_hi, out); + + // 进位判断:如果结果 < 其中一个加数,则说明溢出 + // (因为 add32 是 mod 2^32 的) + // 我们只看 hi 部分即可 + int sum_hi = (out[1] & 0xFFFF); + int a_hi_u = (a_hi & 0xFFFF); + int b_hi_u = (b_hi & 0xFFFF); + + if (sum_hi < a_hi_u || sum_hi < b_hi_u) { + return 1; + } + return 0; + } + + /** + * 64位加法: a4 + b4 -> a4 + * 输入输出: short[4],低到高 (a[0]=lo16, a[1]=hi16, a[2]=lo16 of high dword, a[3]=hi16 of high dword) + */ + static void add64(short[] a, short[] b) { + short[] tmp = new short[2]; + + // 低 32 位 + short carry = add32_with_carry(a[0], a[1], b[0], b[1], tmp); + a[0] = tmp[0]; + a[1] = tmp[1]; + + // 高 32 位 + carry + add32((short)(a[2] + (carry & 0xFFFF)), a[3], b[2], b[3], tmp); + a[2] = tmp[0]; + a[3] = tmp[1]; + } + + + + // 32位异或 + public static void xor32(short a_lo, short a_hi, short b_lo, short b_hi, short[] out /*len==2*/) { + out[0] = (short)(a_lo ^ b_lo); + out[1] = (short)(a_hi ^ b_hi); + } + + /** + * 把32位数 b (b[0]=lo, b[1]=hi) 左移 k 位 (0 <= k < 32), + * 结果放到64位数 a (a[0]=最低16位 ... a[3]=最高16位)。 + */ + static void create_64b_from_32b(short[] a/*len=4*/, short[] b/*len=2*/, short k) { + // 先清零 + a[0] = 0; a[1] = 0; a[2] = 0; a[3] = 0; + + if (k == 0) { + a[0] = b[0]; + a[1] = b[1]; + return; + } + + if (k < 16) { + // lo << k + a[0] = (short)(b[0] << k); + // hi << k, 以及 lo >>> (16-k) 进位 + a[1] = (short)((b[1] << k) | ((b[0] & 0xFFFF) >>> (16 - k))); + // hi >>> (16-k) 残留进到 a[2] + a[2] = (short)((b[1] & 0xFFFF) >>> (16 - k)); + return; + } + + if (k == 16) { + a[1] = b[0]; + a[2] = b[1]; + return; + } + + // 16 < k < 32 + short kk = (short)(k - 16); + a[1] = (short)(b[0] << kk); + a[2] = (short)((b[1] << kk) | ((b[0] & 0xFFFF) >>> (16 - kk))); + a[3] = (short)((b[1] & 0xFFFF) >>> (16 - kk)); + } + + /** + * (A & 0x7FFFFFFF),结果放在 out[4],只保留低32位并清掉最高bit。 + */ + static void and64_7FFFFFFF_to32(short[] A, short[] out) { + out[0] = A[0]; // lo16 + out[1] = (short)(A[1] & 0x7FFF); // hi16 (清除最高bit) + out[2] = 0; + out[3] = 0; + } + + /** + * 64位无符号右移 31 位 + * 输入: A[0..3] (short[4], A[0]最低16位) + * 输出: out[0..3] + */ + static void shr64u_31(short[] A, short[] out) { + // 先拼出 64bit 的逻辑,逐段右移 + // A >>> 31 = (A >>> 16) >>> 15 + + // 先右移 16,相当于丢掉 A[0],整体右移一半字 + out[0] = A[1]; // 原 A[1] -> 新低16位 + out[1] = A[2]; // 原 A[2] + out[2] = A[3]; // 原 A[3] + out[3] = 0; // 高位补0 + + // 再右移 15 位 + short c0 = (short)((out[0] & 0xFFFF) >>> 15); // out[0] 最后一位变进位 + short c1 = (short)((out[1] & 0xFFFF) >>> 15); + short c2 = (short)((out[2] & 0xFFFF) >>> 15); + + out[0] = (short)(((out[0] & 0xFFFF) >>> 15) | (out[1] << 1)); + out[1] = (short)(((out[1] & 0xFFFF) >>> 15) | (out[2] << 1)); + out[2] = (short)(((out[2] & 0xFFFF) >>> 15) | (out[3] << 1)); + out[3] = (short)((out[3] & 0xFFFF) >>> 15); + } + + /** + * 32位无符号右移 1 位 + * 输入: lo,hi (short) 表示 32 位数 (hi:高16位, lo:低16位) + * 输出: out[0]=lo, out[1]=hi + */ + static void shr32u1(short lo, short hi, short[] out) { + // >>>1:先处理低16位 + short newLo = (short)(((lo & 0xFFFF) >>> 1) | ((hi & 0x0001) << 15)); + short newHi = (short)((hi & 0xFFFF) >>> 1); + + out[0] = newLo; + out[1] = newHi; + } + + + + /** 打印/*十六进制(调试用,TODO 生产/JC 环境可移除) *//* public static void printHex(String label, byte[] data, int len) { System.out.print(label + ": ");