497 lines
16 KiB
Java
497 lines
16 KiB
Java
package com.cscn;
|
||
|
||
import javacard.framework.Util;
|
||
|
||
/**
|
||
* 辅助工具:装载/存储、位运算、线性变换、打印等。
|
||
*/
|
||
public final class Zuc256Util {
|
||
|
||
private Zuc256Util() {}
|
||
|
||
// /** 辅助方法:将字节数组转换为32位整数 */
|
||
// public static int getU32(byte[] p, int offset) {
|
||
// return ((p[offset] & 0xFF) << 24) |
|
||
// ((p[offset + 1] & 0xFF) << 16) |
|
||
// ((p[offset + 2] & 0xFF) << 8) |
|
||
// (p[offset + 3] & 0xFF);
|
||
// }
|
||
/** 辅助方法:从字节数组取出 32 位整数,存放到 short[2] (lo, hi) */
|
||
public static void getU32(byte[] p, short offset, short[] out32 /* len=2 */) {
|
||
out32[0] = (short) (((p[(short)(offset + 2)] & 0xFF) << 8) | (p[(short)(offset + 3)] & 0xFF)); //低16位
|
||
out32[1] = (short) (((p[offset] & 0xFF) << 8) | (p[(short)(offset + 1)] & 0xFF)); //高16位
|
||
}
|
||
|
||
|
||
// /** 辅助方法:将32位整数转换为字节数组 */
|
||
// public static void putU32(byte[] p, int offset, int v) {
|
||
// p[offset] = (byte) (v >> 24);
|
||
// p[offset + 1] = (byte) (v >> 16);
|
||
// p[offset + 2] = (byte) (v >> 8);
|
||
// p[offset + 3] = (byte) v;
|
||
// }
|
||
/** 辅助方法:将32位整数(vlo=低16位, vhi=高16位)写入字节数组 */
|
||
public static void putU32(byte[] p, short offset, short vlo, short vhi) {
|
||
// 写高16位
|
||
p[offset] = (byte) ((vhi >> 8) & 0xFF);
|
||
p[(short)(offset + 1)] = (byte) (vhi & 0xFF);
|
||
|
||
// 写低16位
|
||
p[(short)(offset + 2)] = (byte) ((vlo >> 8) & 0xFF);
|
||
p[(short)(offset + 3)] = (byte) (vlo & 0xFF);
|
||
}
|
||
|
||
|
||
// === 31/32 位运算 ===
|
||
|
||
// /** 31位加法 */
|
||
// public static int add31(int a, int b) {
|
||
// long sum = (long)a + b;
|
||
// return (int) ((sum & 0x7FFFFFFF) + (sum >> 31));
|
||
// }
|
||
/** 31位加法: (a+b) mod (2^31 - 1)
|
||
* 输入: a_lo=低16位, a_hi=高15位
|
||
* b_lo=低16位, b_hi=高15位
|
||
* 输出: out[0]=lo, out[1]=hi
|
||
*/
|
||
public static void add31(short a_lo, short a_hi, short b_lo, short b_hi, short[] out /* len==2 */) {
|
||
// ---- 低16位相加 ----
|
||
short lo = (short)(a_lo + b_lo);
|
||
short carry = (short)(
|
||
( ( (short)( (a_lo & b_lo) | ((a_lo | b_lo) & (short)~lo) ) ) & (short)0x8000 ) != 0
|
||
? 1 : 0
|
||
);
|
||
// ---- 高15位相加 + 进位 ----
|
||
short hi_raw = (short)((short)((a_hi & 0x7FFF) + (b_hi & 0x7FFF)) + carry);
|
||
|
||
// 提取第31位(hi_raw bit15)
|
||
short topbit = (short)((hi_raw >>> 15) & 1);
|
||
short hi = (short)(hi_raw & 0x7FFF); // 保留15位
|
||
|
||
// ---- 若第31位=1,再+1 ----
|
||
if (topbit == 1) {
|
||
short lo2 = (short)(lo + 1);
|
||
short c2 = (short)((lo2 == 0) ? 1 : 0); // lo溢出时进位
|
||
lo = lo2;
|
||
hi = (short)((hi + c2) & 0x7FFF);
|
||
}
|
||
|
||
out[0] = lo;
|
||
out[1] = hi;
|
||
}
|
||
|
||
|
||
// /** 31位旋转 */
|
||
// public static int rot31(int a, int k) {
|
||
// return ((a << k) | (a >>> (31 - k))) & 0x7FFFFFFF;
|
||
// }
|
||
/** 31位循环左移: (a <<< k) mod (2^31 -1)
|
||
* 输入: a_lo=低16位, a_hi=高15位
|
||
* 输出: out[0]=lo, out[1]=hi
|
||
*/
|
||
public static void rot31(short a_lo, short a_hi, short k, short[] out /* len==2 */) {
|
||
k = (short)(k % 31); // 限制在 0..30
|
||
if (k == 0) {
|
||
out[0] = a_lo;
|
||
out[1] = (short)(a_hi & 0x7FFF);
|
||
return;
|
||
}
|
||
|
||
// 拆成 31 位数组 [bit0..bit30]
|
||
short[] bits = JCSystem.makeTransientShortArray((short)31, JCSystem.MEMORY_TYPE_TRANSIENT_RESET);
|
||
for (short i = 0; i < 16; i++) {
|
||
bits[i] = (short)((a_lo >>> i) & 1);
|
||
}
|
||
for (short i = 0; i < 15; i++) {
|
||
bits[(short)(16 + i)] = (short)((a_hi >>> i) & 1);
|
||
}
|
||
|
||
// 旋转
|
||
short[] resBits = JCSystem.makeTransientShortArray((short)31, JCSystem.MEMORY_TYPE_TRANSIENT_RESET);
|
||
for (short i = 0; i < 31; i++) {
|
||
short j = (short)((i + k) % 31);
|
||
resBits[j] = bits[i];
|
||
}
|
||
|
||
// 拼回 lo, hi
|
||
short lo = 0;
|
||
for (short i = 0; i < 16; i++) {
|
||
lo = (short)(lo | (resBits[i] << i));
|
||
}
|
||
short hi = 0;
|
||
for (short i = 0; i < 15; i++) {
|
||
hi = (short)(hi | (resBits[(short)(16 + i)] << i));
|
||
}
|
||
|
||
out[0] = lo;
|
||
out[1] = hi;
|
||
}
|
||
|
||
|
||
// /** 32位旋转 */
|
||
// public static int rot32(int a, int k) {
|
||
// return (a << k) | (a >>> (32 - k));
|
||
// }
|
||
/** 32位循环左移: (a<<<k) */
|
||
public static void rot32(short a_lo, short a_hi, short k, short[] out /*len==2*/) {
|
||
k = (short)(k & 31); // 0..31
|
||
short lo = a_lo, hi = a_hi, nw_hi, nw_lo;
|
||
while (k > 0) {
|
||
// 先做 1 位循环左移
|
||
// 注意:short 在 >>> 时会先提升为 int,所以下面都再用 &1 取最低位,避免符号扩展影响
|
||
nw_hi = (short)((hi << 1) | ((lo >>> 15) & 1));
|
||
nw_lo = (short)((lo << 1) | ((hi >>> 15) & 1));
|
||
hi = nw_hi;
|
||
lo = nw_lo;
|
||
k--;
|
||
}
|
||
out[0] = lo; // 低16位
|
||
out[1] = hi; // 高16位
|
||
}
|
||
|
||
|
||
// /**
|
||
// * L1函数
|
||
// */
|
||
// public static int L1(int x) {
|
||
// return x ^ rot32(x, 2) ^ rot32(x, 10) ^ rot32(x, 18) ^ rot32(x, 24);
|
||
// }
|
||
/**
|
||
* L1函数: x ^ (x<<<2) ^ (x<<<10) ^ (x<<<18) ^ (x<<<24)
|
||
* 输入: x_lo, x_hi
|
||
* 输出: out[0]=lo, out[1]=hi
|
||
*/
|
||
public static void L1(short x_lo, short x_hi, short[] out /*len==2*/) {
|
||
short[] t = JCSystem.makeTransientShortArray((short)2, JCSystem.MEMORY_TYPE_TRANSIENT_RESET);
|
||
short[] acc = JCSystem.makeTransientShortArray((short)2, JCSystem.MEMORY_TYPE_TRANSIENT_RESET);
|
||
|
||
// acc = x
|
||
acc[0] = x_lo;
|
||
acc[1] = x_hi;
|
||
|
||
// acc ^= rot32(x, 2)
|
||
rot32(x_lo, x_hi, (short)2, t);
|
||
acc[0] ^= t[0];
|
||
acc[1] ^= t[1];
|
||
|
||
// acc ^= rot32(x, 10)
|
||
rot32(x_lo, x_hi, (short)10, t);
|
||
acc[0] ^= t[0];
|
||
acc[1] ^= t[1];
|
||
|
||
// acc ^= rot32(x, 18)
|
||
rot32(x_lo, x_hi, (short)18, t);
|
||
acc[0] ^= t[0];
|
||
acc[1] ^= t[1];
|
||
|
||
// acc ^= rot32(x, 24)
|
||
rot32(x_lo, x_hi, (short)24, t);
|
||
acc[0] ^= t[0];
|
||
acc[1] ^= t[1];
|
||
|
||
out[0] = acc[0];
|
||
out[1] = acc[1];
|
||
}
|
||
|
||
|
||
// /**
|
||
// * L2函数
|
||
// */
|
||
// public static int L2(int x) {
|
||
// return x ^ rot32(x, 8) ^ rot32(x, 14) ^ rot32(x, 22) ^ rot32(x, 30);
|
||
// }
|
||
/**
|
||
* L2函数: x ^ (x<<<8) ^ (x<<<14) ^ (x<<<22) ^ (x<<<30)
|
||
* 输入: x_lo, x_hi
|
||
* 输出: out[0]=lo, out[1]=hi
|
||
*/
|
||
public static void L2(short x_lo, short x_hi, short[] out /*len==2*/) {
|
||
short[] t = JCSystem.makeTransientShortArray((short)2, JCSystem.MEMORY_TYPE_TRANSIENT_RESET);
|
||
short[] acc = JCSystem.makeTransientShortArray((short)2, JCSystem.MEMORY_TYPE_TRANSIENT_RESET);
|
||
|
||
// acc = x
|
||
acc[0] = x_lo;
|
||
acc[1] = x_hi;
|
||
|
||
// acc ^= rot32(x, 8)
|
||
rot32(x_lo, x_hi, (short)8, t);
|
||
acc[0] ^= t[0];
|
||
acc[1] ^= t[1];
|
||
|
||
// acc ^= rot32(x, 14)
|
||
rot32(x_lo, x_hi, (short)14, t);
|
||
acc[0] ^= t[0];
|
||
acc[1] ^= t[1];
|
||
|
||
// acc ^= rot32(x, 22)
|
||
rot32(x_lo, x_hi, (short)22, t);
|
||
acc[0] ^= t[0];
|
||
acc[1] ^= t[1];
|
||
|
||
// acc ^= rot32(x, 30)
|
||
rot32(x_lo, x_hi, (short)30, t);
|
||
acc[0] ^= t[0];
|
||
acc[1] ^= t[1];
|
||
|
||
out[0] = acc[0];
|
||
out[1] = acc[1];
|
||
}
|
||
|
||
|
||
// /** 创建31位无符号整数 */
|
||
// public static int makeU31(int a, int b, int c, int d) {
|
||
// return (((a & 0xFF) << 23) |
|
||
// ((b & 0xFF) << 16) |
|
||
// ((c & 0xFF) << 8) |
|
||
// (d & 0xFF)) & 0x7FFFFFFF;
|
||
// }
|
||
/** 创建31位无符号整数,结果放到 out[0]=lo, out[1]=hi(15位) */
|
||
public static void makeU31(short a, short b, short c, short d, short[] out /*len==2*/) {
|
||
// 四个字节
|
||
short b0 = (short)(a & 0xFF); // 最高字节
|
||
short b1 = (short)(b & 0xFF);
|
||
short b2 = (short)(c & 0xFF);
|
||
short b3 = (short)(d & 0xFF); // 最低字节
|
||
|
||
// 拼成 32 位: b0<<24 | b1<<16 | b2<<8 | b3
|
||
// lo = 低16位
|
||
out[0] = (short)((b2 << 8) | b3);
|
||
|
||
// hi = 高15位(丢弃 bit31)
|
||
out[1] = (short)((b0 << 7) | b1);
|
||
}
|
||
|
||
|
||
// /** 创建32位无符号整数 */
|
||
// public static int makeU32(int a, int b, int c, int d) {
|
||
// return ((a & 0xFF) << 24) |
|
||
// ((b & 0xFF) << 16) |
|
||
// ((c & 0xFF) << 8) |
|
||
// (d & 0xFF);
|
||
// }
|
||
/** 创建32位无符号整数,结果放到 out[0]=lo, out[1]=hi */
|
||
public static void makeU32(short a, short b, short c, short d, short[] out /*len==2*/) {
|
||
// 四个字节
|
||
short b0 = (short)(a & 0xFF); // 最高字节
|
||
short b1 = (short)(b & 0xFF);
|
||
short b2 = (short)(c & 0xFF);
|
||
short b3 = (short)(d & 0xFF); // 最低字节
|
||
|
||
// lo = 低16位
|
||
out[0] = (short)((b2 << 8) | b3);
|
||
|
||
// hi = 高16位
|
||
out[1] = (short)((b0 << 8) | b1);
|
||
}
|
||
|
||
|
||
|
||
/** 提取IV */
|
||
public static void extractIv(byte[] input25Byte, byte[] output23Byte) {
|
||
if (input25Byte == null || output23Byte == null) return;
|
||
|
||
// 复制前17字节
|
||
Util.arrayCopyNonAtomic(input25Byte, (short)0, output23Byte, (short)0, (short)17);
|
||
|
||
|
||
// 处理剩余8字节
|
||
byte[] src = JCSystem.makeTransientShortArray((short)8, JCSystem.MEMORY_TYPE_TRANSIENT_RESET);
|
||
for (short i = 0; i < 8; i++) {
|
||
src[i] = (byte) (input25Byte[(short)(17 + i)] & 0x3F);
|
||
}
|
||
|
||
output23Byte[17] = (byte) ((src[0] << 2) | (src[1] >>> 4));
|
||
output23Byte[18] = (byte) (((src[1] & 0x0F) << 4) | (src[2] >>> 2));
|
||
output23Byte[19] = (byte) (((src[2] & 0x03) << 6) | src[3]);
|
||
output23Byte[20] = (byte) ((src[4] << 2) | (src[5] >>> 4));
|
||
output23Byte[21] = (byte) (((src[5] & 0x0F) << 4) | (src[6] >>> 2));
|
||
output23Byte[22] = (byte) (((src[6] & 0x03) << 6) | src[7]);
|
||
}
|
||
|
||
/**
|
||
* 32位加法: (a_hi:a_lo) + (b_hi:b_lo)
|
||
* out[0] = lo, out[1] = hi
|
||
*/
|
||
static void add32(short a_lo, short a_hi,
|
||
short b_lo, short b_hi,
|
||
short[] out /*len=2*/) {
|
||
|
||
// ---- 低16位 ----
|
||
short lo_low = (short)((a_lo & 0x00FF) + (b_lo & 0x00FF));
|
||
short carry0 = (short)(((a_lo & 0x00FF) + (b_lo & 0x00FF)) >>> 8);
|
||
|
||
short a_lo_hi = (short)((a_lo >>> 8) & 0x00FF);
|
||
short b_lo_hi = (short)((b_lo >>> 8) & 0x00FF);
|
||
short lo_high = (short)(a_lo_hi + b_lo_hi + carry0);
|
||
short carry1 = (short)(lo_high >>> 8);
|
||
|
||
short lo_res = (short)((lo_high << 8) | (lo_low & 0x00FF));
|
||
|
||
// ---- 高16位 ----
|
||
short hi_low = (short)((a_hi & 0x00FF) + (b_hi & 0x00FF) + carry1);
|
||
short carry2 = (short)(hi_low >>> 8);
|
||
|
||
short a_hi_hi = (short)((a_hi >>> 8) & 0x00FF);
|
||
short b_hi_hi = (short)((b_hi >>> 8) & 0x00FF);
|
||
short hi_high = (short)(a_hi_hi + b_hi_hi + carry2);
|
||
|
||
short hi_res = (short)((hi_high << 8) | (hi_low & 0x00FF));
|
||
|
||
// ---- 输出 ----
|
||
out[0] = lo_res;
|
||
out[1] = hi_res;
|
||
}
|
||
|
||
/**
|
||
* 32位加法 + 返回进位(只用 short)
|
||
* 输入: (a_hi:a_lo) + (b_hi:b_lo)
|
||
* 输出: out[0]=lo, out[1]=hi
|
||
* 返回: 最终进位(0/1)
|
||
*/
|
||
static short add32_with_carry(short a_lo, short a_hi,
|
||
short b_lo, short b_hi,
|
||
short[] out /* len=2 */) {
|
||
// ---- 低16位:分两段8位相加 ----
|
||
short s0 = (short)((a_lo & (short)0x00FF) + (b_lo & (short)0x00FF)); // 0..510
|
||
short c0 = (short)(s0 >>> 8); // 0/1
|
||
short s1 = (short)(((a_lo >>> 8) & (short)0x00FF)
|
||
+ ((b_lo >>> 8) & (short)0x00FF)
|
||
+ c0); // 0..511
|
||
short c1 = (short)(s1 >>> 8); // 0/1
|
||
short lo = (short)((s1 << 8) | (s0 & (short)0x00FF));
|
||
|
||
// ---- 高16位:再分两段8位相加,并加上 c1 ----
|
||
short s2 = (short)((a_hi & (short)0x00FF) + (b_hi & (short)0x00FF) + c1);
|
||
short c2 = (short)(s2 >>> 8); // 0/1
|
||
short s3 = (short)(((a_hi >>> 8) & (short)0x00FF)
|
||
+ ((b_hi >>> 8) & (short)0x00FF)
|
||
+ c2); // 0..511
|
||
short c3 = (short)(s3 >>> 8); // 最终进位 0/1
|
||
short hi = (short)((s3 << 8) | (s2 & (short)0x00FF));
|
||
|
||
out[0] = lo;
|
||
out[1] = hi;
|
||
return (short)(c3 & 1);
|
||
}
|
||
|
||
|
||
|
||
/**
|
||
* 64位加法: a4 + b4 -> a4
|
||
* 输入输出: short[4],低到高 (a[0]=lo16, a[1]=hi16, a[2]=lo16 of high dword, a[3]=hi16 of high dword)
|
||
*/
|
||
static void add64(short[] a, short[] b) {
|
||
short[] tmp = JCSystem.makeTransientShortArray((short)2, JCSystem.MEMORY_TYPE_TRANSIENT_RESET);
|
||
|
||
// 低 32 位
|
||
short carry = add32_with_carry(a[0], a[1], b[0], b[1], tmp);
|
||
a[0] = tmp[0];
|
||
a[1] = tmp[1];
|
||
|
||
// 高 32 位 + carry
|
||
add32((short)(a[2] + (short)(carry & (short)0x0001)), a[3], b[2], b[3], tmp);
|
||
a[2] = tmp[0];
|
||
a[3] = tmp[1];
|
||
}
|
||
|
||
|
||
|
||
// 32位异或
|
||
public static void xor32(short a_lo, short a_hi, short b_lo, short b_hi, short[] out /*len==2*/) {
|
||
out[0] = (short)(a_lo ^ b_lo);
|
||
out[1] = (short)(a_hi ^ b_hi);
|
||
}
|
||
|
||
/**
|
||
* 把32位数 b (b[0]=lo, b[1]=hi) 左移 k 位 (0 <= k < 32),
|
||
* 结果放到64位数 a (a[0]=最低16位 ... a[3]=最高16位)。
|
||
*/
|
||
static void create_64b_from_32b(short[] a/*len=4*/, short[] b/*len=2*/, short k) {
|
||
short a0 = b[0], a1 = b[1], a2 = 0, a3 = 0;
|
||
|
||
if (k >= 16) {
|
||
a3 = a2; // 0
|
||
a2 = a1; // 原 hi16
|
||
a1 = a0; // 原 lo16
|
||
a0 = 0;
|
||
k = (short)(k - 16);
|
||
}
|
||
|
||
while (k > 0) {
|
||
short c0 = (short)((a0 >>> 15) & 1);
|
||
short c1 = (short)((a1 >>> 15) & 1);
|
||
short c2 = (short)((a2 >>> 15) & 1);
|
||
|
||
a3 = (short)((a3 << 1) | c2);
|
||
a2 = (short)((a2 << 1) | c1);
|
||
a1 = (short)((a1 << 1) | c0);
|
||
a0 = (short)(a0 << 1);
|
||
k--;
|
||
}
|
||
|
||
a[0] = a0; a[1] = a1; a[2] = a2; a[3] = a3;
|
||
}
|
||
|
||
|
||
/**
|
||
* (A & 0x7FFFFFFF),结果放在 out[4],只保留低32位并清掉最高bit。
|
||
*/
|
||
static void and64_7FFFFFFF_to32(short[] A, short[] out) {
|
||
out[0] = A[0]; // lo16
|
||
out[1] = (short)(A[1] & 0x7FFF); // hi16 (清除最高bit)
|
||
out[2] = 0;
|
||
out[3] = 0;
|
||
}
|
||
|
||
/**
|
||
* 64位无符号右移 31 位
|
||
* 输入: A[0..3] (short[4], A[0]最低16位)
|
||
* 输出: out[0..3]
|
||
*/
|
||
static void shr64u_31(short[] A, short[] out) {
|
||
// 先拼出 64bit 的逻辑,逐段右移
|
||
// A >>> 31 = (A >>> 16) >>> 15
|
||
|
||
// 先右移 16,相当于丢掉 A[0],整体右移一半字
|
||
out[0] = A[1]; // 原 A[1] -> 新低16位
|
||
out[1] = A[2]; // 原 A[2]
|
||
out[2] = A[3]; // 原 A[3]
|
||
out[3] = 0; // 高位补0
|
||
|
||
// 再右移 15 位
|
||
short c0 = (short)((out[0] & (short)0xFFFF) >>> 15); // out[0] 最后一位变进位
|
||
short c1 = (short)((out[1] & (short)0xFFFF) >>> 15);
|
||
short c2 = (short)((out[2] & (short)0xFFFF) >>> 15);
|
||
|
||
out[0] = (short)((c0 & 0x0001) | (out[1] << 1));
|
||
out[1] = (short)((c1 & 0x0001) | (out[2] << 1));
|
||
out[2] = (short)(c2 & 0x0001);
|
||
}
|
||
|
||
/**
|
||
* 32位无符号右移 1 位
|
||
* 输入: lo,hi (short) 表示 32 位数 (hi:高16位, lo:低16位)
|
||
* 输出: out[0]=lo, out[1]=hi
|
||
*/
|
||
static void shr32u1(short lo, short hi, short[] out) {
|
||
// >>>1:先处理低16位
|
||
short nwLo = (short)(((((lo & (short)0xFFFF) >>> 1) & (short)0x7FFF)) | ((hi & 0x0001) << 15));
|
||
short nwHi = (short)(((hi & (short)0xFFFF) >>> 1) & (short)0x7FFF);
|
||
|
||
out[0] = nwLo;
|
||
out[1] = nwHi;
|
||
}
|
||
|
||
|
||
|
||
|
||
/** 打印/*十六进制(调试用,TODO 生产/JC 环境可移除) *//*
|
||
public static void printHex(String label, byte[] data, int len) {
|
||
System.out.print(label + ": ");
|
||
for (int i = 0; i < len; i++) {
|
||
System.out.printf("%02x ", data[i] & 0xFF);
|
||
}
|
||
System.out.println();
|
||
}*/
|
||
}
|