Fractals are just so fascinating. (Click the above image to zoom.)
Below, is the source code of the program used to generate that image*. It uses OpenMP for multithreading and SSE2 SIMD intrinsics for doing parallel mathematics on single core.
On a 4-core CPU, it calculates 8 pixels simultaneously. Language: C++.
#include <SDL.h>
#include <math.h>
#include <signal.h>
#include <iostream>
#include <emmintrin.h>
static const unsigned MaxIter = 2048;
static const double Xmin = -.743643135-.000014628/2;//-2.02;
static const double Xmax = -.743643135+.000014628/2;//+0.5;
static const double Ymin = .131825963-.000014628/2; //-1.15;
static const double Ymax = .131825963+.000014628/2;//+1.15;
static inline double Smooth(unsigned c, double r2, double i2)
{
//return c + 1 - log2(log2(sqrt(r2+i2)));
return log(log(r2+i2))*-1.442695+c+1.4712336;
}
static inline double Mand(double r,double i, double X,double Y, unsigned firstiter=0)
{
double p = sqrt((r-0.25)*(r-0.25) + i*i);
if(r < p-2*p*p+0.25
|| (r+1)*(r+1)+i*i < 1/16.0) return 0.0;
for(unsigned c=firstiter; ; )
{
double r2 = r*r;
double i2 = i*i;
if(r2+i2 >= 4) return Smooth(c, r2,i2);
if(++c >= MaxIter) break;
double ri = r*i;
i = ri+ri + Y;
r = r2-i2 + X;
}
return 0.0;
}
template<unsigned which>
static inline double GetD(__m128d v)
{
return _mm_cvtsd_f64(_mm_shuffle_pd(v,v, _MM_SHUFFLE2(0,which)));
}
static __m128d TwoMand(__m128d X, __m128d Y)
{
{ double r = GetD<0>(X), i = GetD<0>(Y);
double p = sqrt((r-0.25)*(r-0.25) + i*i);
if(r < p-2*p*p+0.25
|| (r+1)*(r+1)+i*i < 1/16.0) return _mm_set_pd(0.0, Mand(GetD<1>(X),GetD<1>(Y),GetD<1>(X),GetD<1>(Y)));
}
{ double r = GetD<1>(X), i = GetD<1>(Y);
double p = sqrt((r-0.25)*(r-0.25) + i*i);
if(r < p-2*p*p+0.25
|| (r+1)*(r+1)+i*i < 1/16.0) return _mm_set_pd(Mand(GetD<0>(X),GetD<0>(Y),GetD<0>(X),GetD<0>(Y)), 0.0);
}
__m128d r = X, i = Y;
const __m128d threshold = _mm_set_pd(4.0, 4.0);
for(unsigned c=0; ; )
{
__m128d r2 = _mm_mul_pd(r, r);
__m128d i2 = _mm_mul_pd(i, i);
__m128i comparison =
(__m128i) _mm_cmpge_pd(_mm_add_pd(r2,i2), threshold);
unsigned a = _mm_extract_epi16(comparison, 0);
if(__builtin_expect(a,0))
{
double lo_value = Smooth(c,GetD<0>(r2),GetD<0>(i2));
if(_mm_extract_epi16(comparison, 4))
{
double hi_value = Smooth(c,GetD<1>(r2),GetD<1>(i2));
return _mm_set_pd(lo_value,hi_value);
}
double rhi = GetD<1>(r), ihi = GetD<1>(i);
return _mm_set_pd(lo_value, Mand(rhi,ihi, GetD<1>(X),GetD<1>(Y), c));
}
else if(__builtin_expect(_mm_extract_epi16(comparison, 4),0))
{
double hi_value = Smooth(c,GetD<1>(r2),GetD<1>(i2));
double rlo = GetD<0>(r), ilo = GetD<0>(i);
return _mm_set_pd(Mand(rlo,ilo, GetD<0>(X),GetD<0>(Y), c), hi_value);
}
if(++c >= MaxIter) break;
__m128d ri = _mm_mul_pd(r, i);
i = _mm_add_pd(_mm_add_pd(ri,ri), Y);
r = _mm_add_pd(_mm_sub_pd(r2,i2), X);
}
return _mm_setzero_pd();
}
static int CscaleUp(double value, double mi,double ma)
{ return (int)((value-mi)*255.0/(ma-mi)); }
static int CscaleDown(double value, double mi,double ma)
{ return 255-CscaleUp(value,mi,ma); }
static void PutPix(unsigned char* target, double value)
{
value = value*4096/MaxIter;
if(value > 512+128) value = 512+128+(value-(512+128))/8;
value = fmod(value, 256.0);
if(value < 64) target[0] = CscaleUp(value,0,64);
else if(value < 96) target[0] = CscaleDown(value,64,96), target[2] = CscaleUp(value,64,96);
else if(value < 128) target[1] = CscaleUp(value,96,128), target[2]=255;
else if(value <192) target[0] = CscaleUp(value,128,192), target[2]=target[1]=255;
else target[2]=target[1]=target[0]=CscaleDown(value,192,256);
}
int main()
{
const unsigned Yres = 4096*3/4;
const unsigned Xres = 4096;
SDL_Init(SDL_INIT_VIDEO);
SDL_InitSubSystem(SDL_INIT_VIDEO);
SDL_Surface* surface = SDL_SetVideoMode(Xres,Yres, 32,0);
signal(SIGINT, SIG_DFL);
unsigned char* target = (unsigned char*)surface->pixels;
#pragma omp parallel for schedule(guided,1)
for(unsigned y=0; y<Yres; ++y)
{
double i = Ymin+y*(Ymax-Ymin)/(double)Yres;
__m128d twoi = _mm_set_pd(i,i);
double r = Xmin;
double rstep = (Xmax-Xmin) /(double)Xres;
double rstep2 = (Xmax-Xmin)*2.0/(double)Xres;
unsigned char* scanline = target + y*Xres*4;
for(unsigned x=0; x<Xres; x += 2, r += rstep2)
{
__m128d res = TwoMand( _mm_set_pd(r, r+rstep), twoi );
PutPix(scanline + 0 + x*4, GetD<0>(res));
PutPix(scanline + 4 + x*4, GetD<1>(res));
if(!(x&(511)))
{
#pragma omp critical(sdlflip)
SDL_Flip(surface);
}
}
}
SDL_Flip(surface);
std::cout << "P3\n" << Xres << " " << Yres << "\n255\n";
for(unsigned n=Xres*Yres, a=0; a<n; ++a)
{
std::cout << (int)target[a*4+2] << ' ';
std::cout << (int)target[a*4+1] << ' ';
std::cout << (int)target[a*4+0] << ' ';
}
std::cout << "\n";
return getchar();
}
*) The iteration count, the resolution and palette selection may differ.