/* 
 * Copyright (C) 2000-2004 by Oswald Buddenhagen <puf@ossi.cjb.net>
 * based on puf 0.1.x (C) 1999,2000 by Anders Gavare <gavare@hotmail.com>
 *
 * You may modify and distribute this code under the terms of the GPL.
 * There is NO WARRANTY of any kind. See COPYING for details.
 *
 * fetch.c - url fetch loop
 *
 */

#include "puf.h"


off_t max_bytes, fetched_bytes, total_bytes;
int max_dnss_active = DEFAULT_MAX_DNS_FORKS;
int max_urls_active = DEFAULT_MAX_ACTIVE;
int timeout_dns = DEFAULT_TIMEOUT_DNS;
int max_time;
int max_urls;
int num_urls;
int num_urls_active;
int num_urls_done;
int num_urls_fail;
int num_errors;
int show_stat = 1;
int waiting_proxies;
int all_proxy_wait = 1;		/* unused (immutable) */
struct timeval cur_tv, throttle;
struct sockaddr_in bind_addr;
wurl_t *queue_urls_connect;	/*  ready to connect  */
aurl_t *queue_urls_reconnect;	/*  ready to reconnect  */
aurl_t *list_urls_request;	/*  started connect, waiting for write  */
aurl_t *list_urls_reply;	/*  request sent, waiting for reply  */


static void 
conn_err(aurl_t *au, int dr, int errt, int errw,
	 const char *etww, const char *etnw)
{
    haddr_t *ip;
    hinfo_t *hi;

    if (!dr && au->proxy) {
	hi = au->proxy->host->info;
	ip = hi->ips + au->pipidx;
    } else {
	hi = au->url->host->info;
	ip = hi->ips + au->ipidx;
    }

    if (au->url->parm->opt->fail_no_wait) {
	if (werrm(au, errt == 1 ? 504 : 503, etnw) == RT_RETRY)
	    queue_url(au->url);
    } else {
	prx(WRN, etww, hi->name);
	queue_url(au->url);
    }

    if (ip->last_errt < 3 && cur_tv.tv_sec >= ip->retry_time) {
	if (errt != ip->last_errt) {
	    dbg(CON, (" setting new error type %d\n", errt));
	    ip->last_errt = errt;
	    ip->err_wait = errw;
	}
	if (++ip->attempt >= (unsigned)au->url->parm->opt->max_attempts) {
	    ip->last_errt += 2;
	    prx(WRN, "giving up address '%s' for host '%s'.\n",
			inet_ntoa(ip->addr), hi->name);
	} else {
	    dbg(CON, (" retrying in %d seconds\n", ip->err_wait));
	    ip->retry_time = cur_tv.tv_sec + ip->err_wait;
	    ip->err_wait *= 2;
	}
    }
}


static void 
fmt_time(char *d, int ti)
{
    if (ti == -1)
	strcpy(d, "??:??");
    else if (ti < 6000)
	sprintf(d, "%02d:%02d", ti / 60, ti % 60);
    else if (ti < 360000)
	sprintf(d, "%02dh%02d", ti / 3600, ti / 60 % 60);
    else if (ti < 8640000)
	sprintf(d, "%dd%02d", ti / 86400, ti / 3600 % 24);
    else
	strcpy(d, "> 99d");
}

int 
touch(aurl_t *au)
{
    struct utimbuf ut;
    
    if (!au->file_time || au->url->parm->opt->no_touch)
	return 0;
    ut.actime = ut.modtime = au->file_time;
    return utime(au->disposition, &ut);
}

static void 
cleanup(void)
{
    /*  kill off still running dns helpers  */
    ls_iterate_rm(list_dns_idle, dnsproc_t, pr, {
	dbg(QUE, ("iterate_rm dns helper\n"));
	ls_remove(pr);
	reap_dnsproc(pr);
	continue;
    });
}

void 
byebye(const char *msg)
{
    ls_iterate(list_urls_reply, aurl_t, au, touch(au););
    cleanup();
    
    die(1, msg);
}

static void sigint(int n) { (void)n; byebye("interrupted!"); }
static void sigterm(int n) { (void)n; byebye("terminated!"); }
static void sigalrm(int n) { (void)n; byebye("time quota exceeded!"); }

/*  Fetch all urls in parallel:  */
void 
fetch_all()
{
    fd_set rfds, wfds;

    struct timeval last_tv, start_tv, next_tv, next_conn_tv, next_dpy_tv, to;
    time_t next_fork_time;
    long timediff, tottime;
    
    off_t last_fetched_bytes;
    int top_speed, avg_speed;

    int num_dns_busy, num_dns_idle;
    
    url_t *u;
    int i, mxfd, nfds;
    int *spds, spdi, spdn;

    /*  Initialize some data:  */
    gettimeofday(&last_tv, NULL);
    start_tv = last_tv;
    timerclear(&next_conn_tv);
    timerclear(&next_dpy_tv);
    next_fork_time = 0;

    last_fetched_bytes = 0;
    top_speed = avg_speed = 0;
    
    num_dns_busy = num_dns_idle = 0;

    /*  Status info:  */
    spdi = spdn = 0;
    if (show_stat) {
	if (!(spds = mmalloc(sizeof(int) * AVERAGING_TIMEFRAME)))
	    show_stat = 0;
	else {
	    memset(spds, 0, sizeof(int) * AVERAGING_TIMEFRAME);
	    printf("\n      URLs             Connections         Bytes            Time       Kbyte/s\n"
		     "  done+ fail/ total   errs cur/max       done/total       pass left    cur/avg\n");
	}
    } else
	spds = 0;

    signal(SIGINT, sigint);
    signal(SIGTERM, sigterm);
    signal(SIGALRM, sigalrm);
    signal(SIGPIPE, SIG_IGN);

    alarm(max_time);

    /*  Megaloop:  */
    for (;;) {
	gettimeofday(&cur_tv, NULL);
	
	FD_ZERO(&rfds);
	FD_ZERO(&wfds);
	mxfd = -1;
	nfds = 0;

	dbg(QUE, ("---\n"));

	/*  urls waiting for initiation  */
	if (timercmp(&cur_tv, &next_conn_tv, <)) {
	    dbg(QUE, ("skipping url queue\n"));
	    nfds++;	/* so we don't cancel out */
	} else {
	    timerclear(&next_conn_tv);
	cq_consume(queue_urls_reconnect, aurl_t, au, {
	    int rt;

	    dbgu(QUE, (au->url, "consume $u\n"));
	    nfds++;
	    if (num_urls_active >= max_urls_active)
		break;
	    cq_rm1st(queue_urls_reconnect);
	    rt = connect_url(au, &next_conn_tv);
	    if (rt == RT_AGAIN) {
		cq_prepend(queue_urls_reconnect, au);
		goto dcon;
	    }
	    if (rt == RT_DONE)
		goto dcon;
	});
	cq_consume(queue_urls_connect, wurl_t, wu, {
	    aurl_t *au;
	    int rt;

	    dbgu(QUE, (wu->url, "consume $u\n"));
	    nfds++;
	    if (num_urls_active >= max_urls_active)
		break;
	    cq_rm1st(queue_urls_connect);
	    u = wu->url;
	    rt = activate_url(u, &au);
	    if (rt == RT_AGAIN) {	/*  transient server problem  */
		cq_append(queue_urls_connect, wu);
		continue;
	    } else if (rt == RT_RETRY) {	/*  transient error  */
		cq_prepend(queue_urls_connect, wu);
		break;
	    } else if (rt == RT_SKIP)	/*  already exists  */
		num_urls--;
	    else if (rt == RT_OK) {
		rt = connect_url(au, &next_conn_tv);
		if (rt == RT_AGAIN) {
		    cq_prepend(queue_urls_connect, wu);
		    break;
		} else if (rt == RT_DONE) {
		    free(wu);
		    break;
		}
	    } else if (rt != RT_GIVEUP)	/*  permanent error  */
		dbg(CON, ("unknown return code %d from activate_url\n", rt));
	    free(wu);
	});
	} /* timercmp next_conn_tv */
      dcon:

	/*  dns lookups waiting for initiation  */
	cq_consume(queue_dns_lookup, whost_t, wh, {
	    dnsproc_t *pr;
	  if (wh->host) {
	    dbg(QUE, ("consume host %s\n", wh->host->name));
	    nfds++;
	    if (num_dns_busy >= max_dnss_active)
		break;
	  whrt:
	    if ((pr = list_dns_idle)) {
		list_dns_idle = pr->next;
		num_dns_idle--;
	    } else {
		if (cur_tv.tv_sec < next_fork_time)
		    break;
		if (!(pr = fork_dnsproc())) {
		    next_fork_time = cur_tv.tv_sec + 1;
		    break;
		}
	    }
	    pr->whost = wh;
	    if (!start_lookup(pr)) {
		reap_dnsproc(pr);
		goto whrt;
	    }
	    ls_add(list_dns_busy, pr);
	    num_dns_busy++;
	  }
	    cq_rm1st(queue_dns_lookup);
	});

	/*  idle dns helpers  */
	ls_iterate_rm(list_dns_idle, dnsproc_t, pr, {
	    dbg(QUE, ("iterate_rm idle dns helper\n"));
	    if (pr->timeout < cur_tv.tv_sec) {
		dbg(QUE, ("  timeout\n"));
		ls_remove(pr);
		reap_dnsproc(pr);
		num_dns_idle--;
		continue;
	    }
	});

	/*  urls waiting for reply */
	ls_iterate(list_urls_reply, aurl_t, au, {
	    nfds++;
	    FD_SET(au->socket, &rfds);
	    if (au->socket > mxfd)
		mxfd = au->socket;
	});

	/*  urls waiting for connection establishement */
	ls_iterate(list_urls_request, aurl_t, au, {
	    nfds++;
	    FD_SET(au->socket, &wfds);
	    if (au->socket > mxfd)
		mxfd = au->socket;
	});

	/*  dns lookpus waiting for completion  */
	ls_iterate(list_dns_busy, dnsproc_t, pr, {
	    nfds++;
	    FD_SET(pr->fd, &rfds);
	    if (pr->fd > mxfd)
		mxfd = pr->fd;
	});

	if (show_stat) {
	    int esttimeleft, cur_speed, mid_speed;
	    char estts[10], totts[10];

	    timediff = (cur_tv.tv_sec - last_tv.tv_sec) * 100
		       + (cur_tv.tv_usec - last_tv.tv_usec) / 10000;

	    if (timediff >= 100 || !nfds) {
		cur_speed = timediff ?
			    (int)((fetched_bytes - last_fetched_bytes) * 100 /
			    timediff) : 0;

		spds[spdi] = cur_speed;
		if (++spdi >= AVERAGING_TIMEFRAME)
		    spdi = 0;
		if (spdn < AVERAGING_TIMEFRAME)
		    spdn++;

		for (mid_speed = 0, i = 0; i < spdn; i++)
		    mid_speed += spds[i];
		mid_speed /= spdn;
		esttimeleft = mid_speed ?
		    (total_bytes - fetched_bytes) / mid_speed : -1;

		if (cur_speed > top_speed)
		    top_speed = cur_speed;

		tottime = ((cur_tv.tv_sec - start_tv.tv_sec) * 100
		    + (cur_tv.tv_usec - start_tv.tv_usec) / 10000) / 100;
		avg_speed = tottime ? (int)(fetched_bytes / tottime) : 0;

		fmt_time(totts, tottime);
		fmt_time(estts, esttimeleft);

		printf("\r%6d+%5d/%6d %6d %3d/%-3d %10"SSOFFT"/%-10"SSOFFT
		       " %5s %5s %5d/%-4d",
		     num_urls_done, num_urls_fail, num_urls, num_errors,
		     num_urls_active, max_urls_active, fetched_bytes,
		     total_bytes, totts, estts,
		     cur_speed / 1024, avg_speed / 1024);
		fflush(stdout);

		last_fetched_bytes = fetched_bytes;
		last_tv = cur_tv;
		next_dpy_tv = cur_tv;
		next_dpy_tv.tv_sec++;
	    }
	}

	if (!nfds) {
	    if (show_stat) {
		printf("\n\nTop speed:     %9i bytes/second\n"
		           "Average speed: %9i bytes/second\n", 
		       top_speed, avg_speed);
		free(spds);
	    }
	    cleanup();
	    return;
	}

	next_tv = cur_tv;
	next_tv.tv_sec++; /* needed for other timeouts */
	if (timerisset(&next_conn_tv) && timercmp(&next_conn_tv, &next_tv, <))
	    next_tv = next_conn_tv;
	if (timerisset(&next_dpy_tv) && timercmp(&next_dpy_tv, &next_tv, <))
	    next_tv = next_dpy_tv;
	timersub( &next_tv, &cur_tv, &to);
	if (select(mxfd + 1, &rfds, &wfds, 0, &to) < 0)
	    die(1, "select() failed!");

	/*  urls waiting for reply */
	ls_iterate_rm(list_urls_reply, aurl_t, au, {
	    dbgu(QUE, (au->url, "iterate_rm reply $u\n"));
	    if (FD_ISSET(au->socket, &rfds)) {
		dbg(QUE, ("  has data\n"));
		switch (handle_reply(au)) {
		    case RT_OK:
			au->timeout = cur_tv.tv_sec + 
				      au->url->parm->opt->timeout_data;
			goto c_ur;
		    case RT_SKIP:
			num_urls--;
			goto gofrnu;
		    case RT_DONE:
			num_urls_done++;
			break;
		    case RT_AGAIN:
			queue_url(au->url);
			break;
		    case RT_RESTART:
			ls_remove(au);
			cq_append(queue_urls_reconnect, au);
			disconnect_url(au);
			dbg(QUE, ("  removed\n"));
			continue;
		    case RT_RETRY:
			queue_url(au->url);
			goto gofrnu;
		    case RT_GIVEUP:
			break;
		    case RT_TIMEOUT:
			conn_err (au, 1, 1, au->url->parm->opt->timeout_connect, 
				  "connect to '%s' timed out\n",
				  "connect for $u timed out");
			break;
		    case RT_REFUSED:
			conn_err (au, 1, 2, 3, 
				  "connect to '%s' failed\n", 
				  "connect for $u failed");
			break;
		}
	    } else {
		if (cur_tv.tv_sec < au->timeout)
		    goto c_ur;
		if (werrm(au, 554, "data fetch for $u timed out") == RT_RETRY)
		    queue_url(au->url);
	      gofrnu:
		if (au->url->parm->opt->delete_broken &&
		    au->disposition[au->displen])
		    unlink(au->disposition);
	    }

	    ls_remove(au);
	    deactivate_url(au);
	    dbg(QUE, ("  removed\n"));
	    continue;
	  c_ur: ;
	});

	/*  urls waiting for connection establishement */
	ls_iterate_rm(list_urls_request, aurl_t, au, {
	    dbgu(QUE, (au->url, "iterate_rm request $u\n"));
	    if (FD_ISSET(au->socket, &wfds)) {
		int err; int errl = sizeof(int);

		dbg(QUE, ("  connect event\n"));
		getsockopt(au->socket, SOL_SOCKET, SO_ERROR, 
			   (void *)&err, &errl);
		if (err)
		    conn_err (au, 0, 2, 3, "connect to '%s' failed\n", 
			      "connect for $u failed");
		else if (send_http_get(au) <= 0)
		    conn_err (au, 0, 2, 3, "HTTP request send to '%s' failed\n",
			      "HTTP request send for $u failed");
		else {
		    haddr_t *ip;
		    ls_remove(au);
		    ls_add(list_urls_reply, au);
		    au->timeout = cur_tv.tv_sec + 
				  au->url->parm->opt->timeout_data;
		    if (au->proxy)
			ip = au->proxy->host->info->ips + au->pipidx;
		    else
			ip = au->url->host->info->ips + au->ipidx;
		    ip->last_errt = 0;
		    ip->attempt = 0;
		    continue;
	    	}
	    } else {
		if (cur_tv.tv_sec < au->timeout)
	            goto c_uq;
		conn_err (au, 0, 1, au->url->parm->opt->timeout_connect, 
			  "connect to '%s' timed out\n",
			  "connect for $u timed out");
	    }

	    ls_remove(au);
	    deactivate_url(au);
	    free(au);
	    continue;
	  c_uq: ;
	});

	/*  dns lookups waiting for completion  */
	ls_iterate_rm(list_dns_busy, dnsproc_t, pr, {
	    dbg(QUE, ("iterate_rm host %s\n",
		      pr->whost->host ? pr->whost->host->name : "(dummy)"));
	  if (FD_ISSET(pr->fd, &rfds)) {
	    dbg(QUE, ("  has data\n"));
	    ls_remove(pr);
	    num_dns_busy--;
	    if (!finish_lookup(pr)) {
		cq_prepend(queue_dns_lookup, pr->whost);
		reap_dnsproc(pr);
	    } else {
		free(pr->whost);
		ls_add(list_dns_idle, pr);
		num_dns_idle++;
		pr->timeout = cur_tv.tv_sec + 60;
	    }
	    continue;
	  }
	});
    }
}
