/* Racing SQUFOF, based in part on Ben Buhrow's code. */ #include #include "ptypes.h" #include "small_factor.h" /* Return 0 if n is not a perfect square. Set sqrtn to int(sqrt(n)) if so. * See Math::Prime::Util factor.c for more info on this. */ static int is_perfect_square(UV n, UV* sqrtn) { UV m; /* lm */ m = n & 127; if ((m*0x8bc40d7d) & (m*0xa1e2f5d1) & 0x14020a) return 0; /* Faster with just the first test on x86 m = n % 63; if ((m*0x3d491df7) & (m*0xc824a9f9) & 0x10f14008) return 0; */ m = sqrt(n); if (n != (m*m)) return 0; if (sqrtn != 0) *sqrtn = m; return 1; } static UV gcd_ui(UV x, UV y) { UV t; if (y < x) { t = x; x = y; y = t; } while (y > 0) { t = y; y = x % y; x = t; /* y1 <- x0 % y0 ; x1 <- y0 */ } return x; } typedef struct { UV mult; UV valid; UV P; UV bn; UV Qn; UV Q0; UV b0; UV it; UV imax; } mult_t; /* N < 2^63 (or 2^31). *f == 1 if no factor found */ static void squfof_unit(UV n, mult_t* mult_save, UV* f) { UV imax,i,Q0,b0,Qn,bn,P,bbn,Ro,S,So,t1,t2; int j; *f = 0; P = mult_save->P; bn = mult_save->bn; Qn = mult_save->Qn; Q0 = mult_save->Q0; b0 = mult_save->b0; i = mult_save->it; imax = i + mult_save->imax; #define SQUARE_SEARCH_ITERATION \ t1 = P; \ P = bn*Qn - P; \ t2 = Qn; \ Qn = Q0 + bn*(t1-P); \ Q0 = t2; \ bn = (b0 + P) / Qn; \ i++; while (1) { j = 0; if (i & 0x1) { SQUARE_SEARCH_ITERATION; } /* i is now even */ while (1) { /* We need to know P, bn, Qn, Q0, iteration count, i from prev */ if (i >= imax) { /* save state and try another multiplier */ mult_save->P = P; mult_save->bn = bn; mult_save->Qn = Qn; mult_save->Q0 = Q0; mult_save->it = i; *f = 0; return; } SQUARE_SEARCH_ITERATION; /* Even iteration. Check for square: Qn = S*S */ if (is_perfect_square( Qn, &S )) break; /* Odd iteration */ SQUARE_SEARCH_ITERATION; } /* printf("found square %lu after %lu iterations with mult %d\n", Qn, i, mult_save->mult); */ /* Reduce to G0 */ Ro = P + S*((b0 - P)/S); t1 = Ro; So = (n - t1*t1)/S; bbn = (b0+Ro)/So; /* Search for symmetry point */ #define SYMMETRY_POINT_ITERATION \ t1 = Ro; \ Ro = bbn*So - Ro; \ t2 = So; \ So = S + bbn*(t1-Ro); \ S = t2; \ bbn = (b0+Ro)/So; \ if (Ro == t1) break; j = 0; while (1) { SYMMETRY_POINT_ITERATION; SYMMETRY_POINT_ITERATION; SYMMETRY_POINT_ITERATION; SYMMETRY_POINT_ITERATION; if (j++ > 2000000) { mult_save->valid = 0; *f = 0; return; } } *f = gcd_ui(Ro, n); if (*f > 1) return; } } #define NSQUFOF_MULT (sizeof(multipliers)/sizeof(multipliers[0])) int racing_squfof_factor(UV n, UV *factors, UV rounds) { const UV multipliers[] = { 3*5*7*11, 3*5*7, 3*5*11, 3*5, 3*7*11, 3*7, 5*7*11, 5*7, 3*11, 3, 5*11, 5, 7*11, 7, 11, 1 }; const UV big2 = UV_MAX >> 2; mult_t mult_save[NSQUFOF_MULT]; int i, still_racing; UV nn64, mult, f64; UV rounds_done = 0; /* Caller should have handled these trivial cases */ MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in racing_squfof_factor"); /* Too big */ if (n > big2) { factors[0] = n; return 1; } for (i = 0; i < (int)NSQUFOF_MULT; i++) { mult = multipliers[i]; nn64 = n * mult; mult_save[i].mult = mult; if ((big2 / mult) < n) { mult_save[i].valid = 0; /* This multiplier would overflow 64-bit */ continue; } mult_save[i].valid = 1; mult_save[i].b0 = sqrt( (double)nn64 ); mult_save[i].imax = sqrt( (double)mult_save[i].b0 ) / 3; if (mult_save[i].imax < 20) mult_save[i].imax = 20; if (mult_save[i].imax > rounds) mult_save[i].imax = rounds; mult_save[i].Q0 = 1; mult_save[i].P = mult_save[i].b0; mult_save[i].Qn = nn64 - (mult_save[i].b0 * mult_save[i].b0); if (mult_save[i].Qn == 0) { factors[0] = mult_save[i].b0; factors[1] = n / mult_save[i].b0; MPUassert( factors[0] * factors[1] == n , "incorrect factoring"); return 2; } mult_save[i].bn = (mult_save[i].b0 + mult_save[i].P) / mult_save[i].Qn; mult_save[i].it = 0; } /* Process the multipliers a little at a time: 0.33*(n*mult)^1/4: 20-20k */ do { still_racing = 0; for (i = 0; i < (int)NSQUFOF_MULT; i++) { if (!mult_save[i].valid) continue; nn64 = n * (UV)multipliers[i]; squfof_unit(nn64, &mult_save[i], &f64); if (f64 > 1) { if (f64 != multipliers[i]) { f64 /= gcd_ui(f64, multipliers[i]); if (f64 != 1) { factors[0] = f64; factors[1] = n / f64; MPUassert( factors[0] * factors[1] == n , "incorrect factoring"); return 2; } } /* Found trivial factor. Quit working with this multiplier. */ mult_save[i].valid = 0; } if (mult_save[i].valid == 1) still_racing = 1; rounds_done += mult_save[i].imax; if (rounds_done >= rounds) break; } } while (still_racing && rounds_done < rounds); /* No factors found */ factors[0] = n; return 1; }