package com.cscn; import javacard.framework.Util; import javacard.framework.JCSystem; /** * 辅助工具:装载/存储、位运算、线性变换、打印等。 */ public final class Zuc256Util { private Zuc256Util() {} // /** 辅助方法:将字节数组转换为32位整数 */ // public static int getU32(byte[] p, int offset) { // return ((p[offset] & 0xFF) << 24) | // ((p[offset + 1] & 0xFF) << 16) | // ((p[offset + 2] & 0xFF) << 8) | // (p[offset + 3] & 0xFF); // } /** 辅助方法:从字节数组取出 32 位整数,存放到 short[2] (lo, hi) */ public static void getU32(byte[] p, short offset, short[] out32 /* len=2 */) { out32[0] = (short) (((p[(short)(offset + 2)] & 0xFF) << 8) | (p[(short)(offset + 3)] & 0xFF)); //低16位 out32[1] = (short) (((p[offset] & 0xFF) << 8) | (p[(short)(offset + 1)] & 0xFF)); //高16位 } // /** 辅助方法:将32位整数转换为字节数组 */ // public static void putU32(byte[] p, int offset, int v) { // p[offset] = (byte) (v >> 24); // p[offset + 1] = (byte) (v >> 16); // p[offset + 2] = (byte) (v >> 8); // p[offset + 3] = (byte) v; // } /** 辅助方法:将32位整数(vlo=低16位, vhi=高16位)写入字节数组 */ public static void putU32(byte[] p, short offset, short vlo, short vhi) { // 写高16位 p[offset] = (byte) ((vhi >> 8) & 0xFF); p[(short)(offset + 1)] = (byte) (vhi & 0xFF); // 写低16位 p[(short)(offset + 2)] = (byte) ((vlo >> 8) & 0xFF); p[(short)(offset + 3)] = (byte) (vlo & 0xFF); } // === 31/32 位运算 === // /** 31位加法 */ // public static int add31(int a, int b) { // long sum = (long)a + b; // return (int) ((sum & 0x7FFFFFFF) + (sum >> 31)); // } /** 31位加法: (a+b) mod (2^31 - 1) * 输入: a_lo=低16位, a_hi=高15位 * b_lo=低16位, b_hi=高15位 * 输出: out[0]=lo, out[1]=hi */ public static void add31(short a_lo, short a_hi, short b_lo, short b_hi, short[] out /* len==2 */) { // ---- 低16位相加 ---- short lo = (short)(a_lo + b_lo); short carry = (short)( ( ( (short)( (a_lo & b_lo) | ((a_lo | b_lo) & (short)~lo) ) ) & (short)0x8000 ) != 0 ? 1 : 0 ); // ---- 高15位相加 + 进位 ---- short hi_raw = (short)((short)((a_hi & 0x7FFF) + (b_hi & 0x7FFF)) + carry); // 提取第31位(hi_raw bit15) short topbit = (short)((hi_raw >>> 15) & 1); short hi = (short)(hi_raw & 0x7FFF); // 保留15位 // ---- 若第31位=1,再+1 ---- if (topbit == 1) { short lo2 = (short)(lo + 1); short c2 = (short)((lo2 == 0) ? 1 : 0); // lo溢出时进位 lo = lo2; hi = (short)((hi + c2) & 0x7FFF); } out[0] = lo; out[1] = hi; } // /** 31位旋转 */ // public static int rot31(int a, int k) { // return ((a << k) | (a >>> (31 - k))) & 0x7FFFFFFF; // } /** 31位循环左移: (a <<< k) mod (2^31 -1) * 输入: a_lo=低16位, a_hi=高15位 * 输出: out[0]=lo, out[1]=hi */ public static void rot31(short a_lo, short a_hi, short k, short[] out /* len==2 */) { k = (short)(k % 31); // 限制在 0..30 if (k == 0) { out[0] = a_lo; out[1] = (short)(a_hi & 0x7FFF); return; } // 拆成 31 位数组 [bit0..bit30] short[] bits = JCSystem.makeTransientShortArray((short)31, JCSystem.MEMORY_TYPE_TRANSIENT_RESET); for (short i = 0; i < 16; i++) { bits[i] = (short)((a_lo >>> i) & 1); } for (short i = 0; i < 15; i++) { bits[(short)(16 + i)] = (short)((a_hi >>> i) & 1); } // 旋转 short[] resBits = JCSystem.makeTransientShortArray((short)31, JCSystem.MEMORY_TYPE_TRANSIENT_RESET); for (short i = 0; i < 31; i++) { short j = (short)((i + k) % 31); resBits[j] = bits[i]; } // 拼回 lo, hi short lo = 0; for (short i = 0; i < 16; i++) { lo = (short)(lo | (resBits[i] << i)); } short hi = 0; for (short i = 0; i < 15; i++) { hi = (short)(hi | (resBits[(short)(16 + i)] << i)); } out[0] = lo; out[1] = hi; } // /** 32位旋转 */ // public static int rot32(int a, int k) { // return (a << k) | (a >>> (32 - k)); // } /** 32位循环左移: (a<< 0) { // 先做 1 位循环左移 // 注意:short 在 >>> 时会先提升为 int,所以下面都再用 &1 取最低位,避免符号扩展影响 nw_hi = (short)((hi << 1) | ((lo >>> 15) & 1)); nw_lo = (short)((lo << 1) | ((hi >>> 15) & 1)); hi = nw_hi; lo = nw_lo; k--; } out[0] = lo; // 低16位 out[1] = hi; // 高16位 } // /** // * L1函数 // */ // public static int L1(int x) { // return x ^ rot32(x, 2) ^ rot32(x, 10) ^ rot32(x, 18) ^ rot32(x, 24); // } /** * L1函数: x ^ (x<<<2) ^ (x<<<10) ^ (x<<<18) ^ (x<<<24) * 输入: x_lo, x_hi * 输出: out[0]=lo, out[1]=hi */ public static void L1(short x_lo, short x_hi, short[] out /*len==2*/) { short[] t = JCSystem.makeTransientShortArray((short)2, JCSystem.MEMORY_TYPE_TRANSIENT_RESET); short[] acc = JCSystem.makeTransientShortArray((short)2, JCSystem.MEMORY_TYPE_TRANSIENT_RESET); // acc = x acc[0] = x_lo; acc[1] = x_hi; // acc ^= rot32(x, 2) rot32(x_lo, x_hi, (short)2, t); acc[0] ^= t[0]; acc[1] ^= t[1]; // acc ^= rot32(x, 10) rot32(x_lo, x_hi, (short)10, t); acc[0] ^= t[0]; acc[1] ^= t[1]; // acc ^= rot32(x, 18) rot32(x_lo, x_hi, (short)18, t); acc[0] ^= t[0]; acc[1] ^= t[1]; // acc ^= rot32(x, 24) rot32(x_lo, x_hi, (short)24, t); acc[0] ^= t[0]; acc[1] ^= t[1]; out[0] = acc[0]; out[1] = acc[1]; } // /** // * L2函数 // */ // public static int L2(int x) { // return x ^ rot32(x, 8) ^ rot32(x, 14) ^ rot32(x, 22) ^ rot32(x, 30); // } /** * L2函数: x ^ (x<<<8) ^ (x<<<14) ^ (x<<<22) ^ (x<<<30) * 输入: x_lo, x_hi * 输出: out[0]=lo, out[1]=hi */ public static void L2(short x_lo, short x_hi, short[] out /*len==2*/) { short[] t = JCSystem.makeTransientShortArray((short)2, JCSystem.MEMORY_TYPE_TRANSIENT_RESET); short[] acc = JCSystem.makeTransientShortArray((short)2, JCSystem.MEMORY_TYPE_TRANSIENT_RESET); // acc = x acc[0] = x_lo; acc[1] = x_hi; // acc ^= rot32(x, 8) rot32(x_lo, x_hi, (short)8, t); acc[0] ^= t[0]; acc[1] ^= t[1]; // acc ^= rot32(x, 14) rot32(x_lo, x_hi, (short)14, t); acc[0] ^= t[0]; acc[1] ^= t[1]; // acc ^= rot32(x, 22) rot32(x_lo, x_hi, (short)22, t); acc[0] ^= t[0]; acc[1] ^= t[1]; // acc ^= rot32(x, 30) rot32(x_lo, x_hi, (short)30, t); acc[0] ^= t[0]; acc[1] ^= t[1]; out[0] = acc[0]; out[1] = acc[1]; } // /** 创建31位无符号整数 */ // public static int makeU31(int a, int b, int c, int d) { // return (((a & 0xFF) << 23) | // ((b & 0xFF) << 16) | // ((c & 0xFF) << 8) | // (d & 0xFF)) & 0x7FFFFFFF; // } /** 创建31位无符号整数,结果放到 out[0]=lo, out[1]=hi(15位) */ public static void makeU31(short a, short b, short c, short d, short[] out /*len==2*/) { // 四个字节 short b0 = (short)(a & 0xFF); // 最高字节 short b1 = (short)(b & 0xFF); short b2 = (short)(c & 0xFF); short b3 = (short)(d & 0xFF); // 最低字节 // 拼成 32 位: b0<<24 | b1<<16 | b2<<8 | b3 // lo = 低16位 out[0] = (short)((b2 << 8) | b3); // hi = 高15位(丢弃 bit31) out[1] = (short)((b0 << 7) | b1); } // /** 创建32位无符号整数 */ // public static int makeU32(int a, int b, int c, int d) { // return ((a & 0xFF) << 24) | // ((b & 0xFF) << 16) | // ((c & 0xFF) << 8) | // (d & 0xFF); // } /** 创建32位无符号整数,结果放到 out[0]=lo, out[1]=hi */ public static void makeU32(short a, short b, short c, short d, short[] out /*len==2*/) { // 四个字节 short b0 = (short)(a & 0xFF); // 最高字节 short b1 = (short)(b & 0xFF); short b2 = (short)(c & 0xFF); short b3 = (short)(d & 0xFF); // 最低字节 // lo = 低16位 out[0] = (short)((b2 << 8) | b3); // hi = 高16位 out[1] = (short)((b0 << 8) | b1); } /** 提取IV */ public static void extractIv(byte[] input25Byte, byte[] output23Byte) { if (input25Byte == null || output23Byte == null) return; // 复制前17字节 Util.arrayCopyNonAtomic(input25Byte, (short)0, output23Byte, (short)0, (short)17); // 处理剩余8字节 byte[] src = JCSystem.makeTransientByteArray((short)8, JCSystem.MEMORY_TYPE_TRANSIENT_RESET); for (short i = 0; i < 8; i++) { src[i] = (byte) (input25Byte[(short)(17 + i)] & 0x3F); } output23Byte[17] = (byte) ((src[0] << 2) | (src[1] >>> 4)); output23Byte[18] = (byte) (((src[1] & 0x0F) << 4) | (src[2] >>> 2)); output23Byte[19] = (byte) (((src[2] & 0x03) << 6) | src[3]); output23Byte[20] = (byte) ((src[4] << 2) | (src[5] >>> 4)); output23Byte[21] = (byte) (((src[5] & 0x0F) << 4) | (src[6] >>> 2)); output23Byte[22] = (byte) (((src[6] & 0x03) << 6) | src[7]); } /** * 32位加法: (a_hi:a_lo) + (b_hi:b_lo) * out[0] = lo, out[1] = hi */ static void add32(short a_lo, short a_hi, short b_lo, short b_hi, short[] out /*len=2*/) { // ---- 低16位 ---- short lo_low = (short)((a_lo & 0x00FF) + (b_lo & 0x00FF)); short carry0 = (short)(((a_lo & 0x00FF) + (b_lo & 0x00FF)) >>> 8); short a_lo_hi = (short)((a_lo >>> 8) & 0x00FF); short b_lo_hi = (short)((b_lo >>> 8) & 0x00FF); short lo_high = (short)(a_lo_hi + b_lo_hi + carry0); short carry1 = (short)(lo_high >>> 8); short lo_res = (short)((lo_high << 8) | (lo_low & 0x00FF)); // ---- 高16位 ---- short hi_low = (short)((a_hi & 0x00FF) + (b_hi & 0x00FF) + carry1); short carry2 = (short)(hi_low >>> 8); short a_hi_hi = (short)((a_hi >>> 8) & 0x00FF); short b_hi_hi = (short)((b_hi >>> 8) & 0x00FF); short hi_high = (short)(a_hi_hi + b_hi_hi + carry2); short hi_res = (short)((hi_high << 8) | (hi_low & 0x00FF)); // ---- 输出 ---- out[0] = lo_res; out[1] = hi_res; } /** * 32位加法 + 返回进位(只用 short) * 输入: (a_hi:a_lo) + (b_hi:b_lo) * 输出: out[0]=lo, out[1]=hi * 返回: 最终进位(0/1) */ static short add32_with_carry(short a_lo, short a_hi, short b_lo, short b_hi, short[] out /* len=2 */) { // ---- 低16位:分两段8位相加 ---- short s0 = (short)((a_lo & (short)0x00FF) + (b_lo & (short)0x00FF)); // 0..510 short c0 = (short)(s0 >>> 8); // 0/1 short s1 = (short)(((a_lo >>> 8) & (short)0x00FF) + ((b_lo >>> 8) & (short)0x00FF) + c0); // 0..511 short c1 = (short)(s1 >>> 8); // 0/1 short lo = (short)((s1 << 8) | (s0 & (short)0x00FF)); // ---- 高16位:再分两段8位相加,并加上 c1 ---- short s2 = (short)((a_hi & (short)0x00FF) + (b_hi & (short)0x00FF) + c1); short c2 = (short)(s2 >>> 8); // 0/1 short s3 = (short)(((a_hi >>> 8) & (short)0x00FF) + ((b_hi >>> 8) & (short)0x00FF) + c2); // 0..511 short c3 = (short)(s3 >>> 8); // 最终进位 0/1 short hi = (short)((s3 << 8) | (s2 & (short)0x00FF)); out[0] = lo; out[1] = hi; return (short)(c3 & 1); } /** * 64位加法: a4 + b4 -> a4 * 输入输出: short[4],低到高 (a[0]=lo16, a[1]=hi16, a[2]=lo16 of high dword, a[3]=hi16 of high dword) */ static void add64(short[] a, short[] b) { short[] tmp = JCSystem.makeTransientShortArray((short)2, JCSystem.MEMORY_TYPE_TRANSIENT_RESET); // 低 32 位 short carry = add32_with_carry(a[0], a[1], b[0], b[1], tmp); a[0] = tmp[0]; a[1] = tmp[1]; // 高 32 位 + carry add32((short)(a[2] + (short)(carry & (short)0x0001)), a[3], b[2], b[3], tmp); a[2] = tmp[0]; a[3] = tmp[1]; } // 32位异或 public static void xor32(short a_lo, short a_hi, short b_lo, short b_hi, short[] out /*len==2*/) { out[0] = (short)(a_lo ^ b_lo); out[1] = (short)(a_hi ^ b_hi); } /** * 把32位数 b (b[0]=lo, b[1]=hi) 左移 k 位 (0 <= k < 32), * 结果放到64位数 a (a[0]=最低16位 ... a[3]=最高16位)。 */ static void create_64b_from_32b(short[] a/*len=4*/, short[] b/*len=2*/, short k) { short a0 = b[0], a1 = b[1], a2 = 0, a3 = 0; if (k >= 16) { a3 = a2; // 0 a2 = a1; // 原 hi16 a1 = a0; // 原 lo16 a0 = 0; k = (short)(k - 16); } while (k > 0) { short c0 = (short)((a0 >>> 15) & 1); short c1 = (short)((a1 >>> 15) & 1); short c2 = (short)((a2 >>> 15) & 1); a3 = (short)((a3 << 1) | c2); a2 = (short)((a2 << 1) | c1); a1 = (short)((a1 << 1) | c0); a0 = (short)(a0 << 1); k--; } a[0] = a0; a[1] = a1; a[2] = a2; a[3] = a3; } /** * (A & 0x7FFFFFFF),结果放在 out[4],只保留低32位并清掉最高bit。 */ static void and64_7FFFFFFF_to32(short[] A, short[] out) { out[0] = A[0]; // lo16 out[1] = (short)(A[1] & 0x7FFF); // hi16 (清除最高bit) out[2] = 0; out[3] = 0; } /** * 64位无符号右移 31 位 * 输入: A[0..3] (short[4], A[0]最低16位) * 输出: out[0..3] */ static void shr64u_31(short[] A, short[] out) { // 先拼出 64bit 的逻辑,逐段右移 // A >>> 31 = (A >>> 16) >>> 15 // 先右移 16,相当于丢掉 A[0],整体右移一半字 out[0] = A[1]; // 原 A[1] -> 新低16位 out[1] = A[2]; // 原 A[2] out[2] = A[3]; // 原 A[3] out[3] = 0; // 高位补0 // 再右移 15 位 short c0 = (short)((out[0] & (short)0xFFFF) >>> 15); // out[0] 最后一位变进位 short c1 = (short)((out[1] & (short)0xFFFF) >>> 15); short c2 = (short)((out[2] & (short)0xFFFF) >>> 15); out[0] = (short)((c0 & 0x0001) | (out[1] << 1)); out[1] = (short)((c1 & 0x0001) | (out[2] << 1)); out[2] = (short)(c2 & 0x0001); } /** * 32位无符号右移 1 位 * 输入: lo,hi (short) 表示 32 位数 (hi:高16位, lo:低16位) * 输出: out[0]=lo, out[1]=hi */ static void shr32u1(short lo, short hi, short[] out) { // >>>1:先处理低16位 short nwLo = (short)(((((lo & (short)0xFFFF) >>> 1) & (short)0x7FFF)) | ((hi & 0x0001) << 15)); short nwHi = (short)(((hi & (short)0xFFFF) >>> 1) & (short)0x7FFF); out[0] = nwLo; out[1] = nwHi; } /** 打印/*十六进制(调试用,TODO 生产/JC 环境可移除) *//* public static void printHex(String label, byte[] data, int len) { System.out.print(label + ": "); for (int i = 0; i < len; i++) { System.out.printf("%02x ", data[i] & 0xFF); } System.out.println(); }*/ }