#include <9pm/u.h>
#include <9pm/libc.h>
#include "dat.h"
#include "fns.h"

int	nrdy;
Ref	noteidalloc;

static Ref	pidalloc;

Procalloc procalloc;

typedef struct
{
	Lock	lk;
	Proc*	head;
	Proc*	tail;
	int	n;
} Schedq;
static Schedq	runq[Nrq];

char *statename[] =
{			/* BUG: generate automatically */
	"Dead",
	"Nascent",
	"Moribund",
	"Ready",
	"Scheding",
	"Running",
	"Queueing",
	"QueueingR",
	"QueueingW",
	"Wakeme",
	"Broken",
	"Stopped",
	"Rendez",
};

static void pidhash(Proc*);
static void pidunhash(Proc*);

/*
 * Always splhi()'ed.
 */
void
schedinit(void)		/* never returns */
{
	if(M == nil)
		panic("schedinit m");
	setlabel(&M->sched);
	if(up) {
		M->proc = 0;
		switch(up->state) {
		case Running:
			ready(up);
			break;
		case Moribund:
			up->state = Dead;
			/*
			 * Holding locks from pexit:
			 * 	procalloc
			 */
			up->qnext = procalloc.free;
			procalloc.free = up;

			unlock(&procalloc.lk);
			break;
		}
		up->mach = 0;
		up = 0;
	}
	sched();
}

/*
 *  If changing this routine, look also at sleep().  It
 *  contains a copy of the guts of sched().
 */
void
sched(void)
{
	if(M->machno == -1){
		print("sched called on wrong cpu\n");
		return;
	}
	if(up) {
		splhi();
		/* statistics */
		M->cs++;
		if(setlabel(&up->sched)){
			spllo();
			return;
		}
		gotolabel(&M->sched);
	}
	up = runproc();
	up->state = Running;
	up->mach = M;
	M->proc = up;
	gotolabel(&up->sched);
}

int
anyready(void)
{
	return nrdy;
}

int
anyhigher(void)
{
	Schedq *rq;

	if(nrdy == 0)
		return 0;

	for(rq = &runq[Nrq-1]; rq > &runq[up->priority]; rq--)
		if(rq->head != nil)
			return 1;
	
	return 0;
}

void
ready(Proc *p)
{
	int s, pri;
	Schedq *rq;

	s = splhi();
	if(p->fixedpri){
		pri = p->basepri;
	} else {
		/* history counts */
/*
		if(p->state == Running){
			p->rt++;
			pri = ((p->art + (p->rt<<1))>>2)/Squantum;
		} else {
			p->art = (p->art + (p->rt<<1))>>2;
			p->rt = 0;
			pri = p->art/Squantum;
		}
		pri = p->basepri - pri;
		if(pri < 0)
			pri = 0;
	
		if(pri < PriNormal && p->basepri > PriNormal)
			pri = PriNormal;
*/
		pri = p->basepri;
		/* stick at low priority any process waiting for a lock */
		if(p->lockwait)
			pri = PriLock;
	}

	p->priority = pri;
	rq = &runq[p->priority];

	lock(&runq->lk);
	p->rnext = 0;
	if(rq->tail)
		rq->tail->rnext = p;
	else
		rq->head = p;
	rq->tail = p;
	rq->n++;
	p->readytime = M->ticks;
	p->state = Ready;
	unlock(&runq->lk);
	splx(s);
	noidlehands();
}

Proc*
runproc(void)
{
	Schedq *rq, *xrq;
	Proc *p, *l;
	ulong rt;

loop:

	/*
	 *  find a process.
	 */
	for(;;){
		rt = 0xffffffff;
		xrq = nil;
		for(rq = runq; rq < &runq[Nrq]; rq++){
			p = rq->head;
			if(p == 0)
				continue;
			if(p->readytime < rt){
				xrq = rq;
				rt = p->readytime;
			}
		}
		if(xrq != nil){
			rq = xrq;
			p = rq->head;
			if(p != nil)
				p->movetime = 0;
			goto found;
		}
		idlehands();
	}

found:
	if(!canlock(&runq->lk))
		goto loop;

	l = 0;
	p = rq->head;

	/*
	 *  p->mach==0 only when process state is saved
	 */
	if(p == 0 || p->mach){
		unlock(&runq->lk);
		goto loop;
	}
	if(p->rnext == 0)
		rq->tail = l;
	if(l)
		l->rnext = p->rnext;
	else
		rq->head = p->rnext;
	rq->n--;
	nrdy--;
	if(p->state != Ready)
		print("runproc %s %lud %s\n", p->text, p->pid, statename[p->state]);
	unlock(&runq->lk);

	p->state = Scheding;
	return p;
}

Proc*
newproc(void)
{
	Proc *p;

Again:
	lock(&procalloc.lk);
	if(procalloc.free == nil){
		if(procalloc.nproc >= MAXPROC){
			unlock(&procalloc.lk);
			resrcwait("no procs");
			goto Again;
		}
		p = smalloc(sizeof(Proc));
		p->index = procalloc.nproc;
		procalloc.arena[procalloc.nproc++] = p;
	}else{
		p = procalloc.free;
		procalloc.free = p->qnext;
	}
	unlock(&procalloc.lk);

	p->state = Scheding;
	p->psstate = "New";
	memset(&p->nascent, 0, sizeof p->nascent);
	p->mach = 0;
	p->qnext = 0;
	p->nchild = 0;
	p->nwait = 0;
	p->waitq = 0;
	p->parent = 0;
	p->pgrp = 0;
	p->egrp = 0;
	p->fgrp = 0;
	p->rgrp = 0;
	p->pdbg = 0;
	p->kp = 0;
	p->procctl = 0;
	p->notepending = 0;
	p->movetime = 0;
	p->ureg = 0;
	p->privatemem = 0;
	p->errstr = p->errbuf0;
	p->syserrstr = p->errbuf1;
	p->errbuf0[0] = '\0';
	p->errbuf1[0] = '\0';
	kstrdup(&p->user, "*nouser");
	kstrdup(&p->text, "*notext");
	kstrdup(&p->args, "");
	p->nargs = 0;
/*	memset(p->seg, 0, sizeof p->seg);	*/
	p->pid = incref(&pidalloc);
	pidhash(p);
	p->noteid = incref(&noteidalloc);
	if(p->pid==0 || p->noteid==0)
		panic("pidalloc");
	if(p->kstack == 0)
		p->kstack = smalloc(KSTACK);

	return p;
}

void
procinit0(void)		/* bad planning - clashes with devproc.c */
{
	/* just pointers, can have lots */
	procalloc.arena = smalloc(MAXPROC*sizeof(Proc*));
	procalloc.nproc = 0;
}

/*
 *  sleep if a condition is not true.  Another process will
 *  awaken us after it sets the condition.  When we awaken
 *  the condition may no longer be true.
 *
 *  we lock both the process and the rendezvous to keep r->p
 *  and p->r synchronized.
 */
void
rendsleep(Rendez *r, int (*f)(void*), void *arg)
{
	int s;

	s = splhi();
	lock(&r->lk);
	lock(&up->rlock);
	if(r->p){
		print("double sleep %lud %lud\n", r->p->pid, up->pid);
		abort();
	}

	/*
	 *  Wakeup only knows there may be something to do by testing
	 *  r->p in order to get something to lock on.
	 *  Flush that information out to memory in case the sleep is
	 *  committed.
	 */
	r->p = up;

	if((*f)(arg) || up->notepending){
		/*
		 *  if condition happened or a note is pending
		 *  never mind
		 */
		r->p = nil;
		unlock(&up->rlock);
		unlock(&r->lk);
	} else {
		/*
		 *  now we are committed to
		 *  change state and call scheduler
		 */
		up->state = Wakeme;
		up->r = r;

		/* statistics */
		M->cs++;
	
		if(!setlabel(&up->sched)){
			/*
			 *  here to go to sleep (i.e. stop Running)
			 */
			unlock(&up->rlock);
			unlock(&r->lk);
			gotolabel(&M->sched);
		}
		/*
		 *  here when the process is awakened
		 */
	}

	if(up->notepending) {
		up->notepending = 0;
		splx(s);
		error(Eintr);
	}
	splx(s);
}

int
tfn(void *arg)
{
	return machp0->ticks >= up->twhen || up->tfn(arg);
}

void
rendtsleep(Rendez *r, int (*fn)(void*), void *arg, int ms)
{
	ulong when;
	Proc *f, **l;

	/* avoid overflows at the cost of precision */
	if(ms >= 1000000)
		when = ms/(1000/HZ);
	else
		when = MS2TK(ms);
	when += machp0->ticks;

	lock(&talarm.lk);
	/* take out of list if checkalarm didn't */
	if(up->trend) {
		l = &talarm.list;
		for(f = *l; f; f = f->tlink) {
			if(f == up) {
				*l = up->tlink;
				break;
			}
			l = &f->tlink;
		}
	}
	/* insert in increasing time order */
	l = &talarm.list;
	for(f = *l; f; f = f->tlink) {
		if(f->twhen >= when)
			break;
		l = &f->tlink;
	}
	up->trend = r;
	up->twhen = when;
	up->tfn = fn;
	up->tlink = *l;
	*l = up;
	unlock(&talarm.lk);

	if(waserror()){
		up->twhen = 0;
		nexterror();
	}
	rendsleep(r, tfn, arg);
	up->twhen = 0;
	poperror();
}

/*
 *  Expects that only one process can call wakeup for any given Rendez.
 *  We hold both locks to ensure that r->p and p->r remain consistent.
 *  Richard Miller has a better solution that doesn't require both to
 *  be held simultaneously, but I'm a paranoid - presotto.
 */
Proc*
rendwakeup(Rendez *r)
{
	int s;
	Proc *p;

	s = splhi();
	lock(&r->lk);
	p = r->p;
	if(p != nil){
		lock(&p->rlock);
		if(p->state != Wakeme || p->r != r)
			panic("wakeup: state");
		r->p = nil;
		p->r = nil;
		ready(p);
		unlock(&p->rlock);
	}
	unlock(&r->lk);
	splx(s);

	return p;
}

/*
 *  if waking a sleeping process, this routine must hold both
 *  p->rlock and r->lock.  However, it can't know them in
 *  the same order as wakeup causing a possible lock ordering
 *  deadlock.  We break the deadlock by giving up the p->rlock
 *  lock if we can't get the r->lock and retrying.
 */
int
postnote(Proc *p, int dolock, char *n, int flag)
{
	int ret;
	Rendez *r;
	Proc *d, **l;

	if(dolock)
		qlock(&p->debug);

	if(flag != NUser && (p->notify == 0 || p->notified))
		p->nnote = 0;

	ret = 0;
	if(p->nnote < NNOTE) {
		strcpy(p->note[p->nnote].msg, n);
		p->note[p->nnote++].flag = flag;
		ret = 1;
	}
	p->notepending = 1;
	if(dolock)
		qunlock(&p->debug);

	/* this loop is to avoid lock ordering problems. */
	for(;;){
		lock(&p->rlock);
		r = p->r;

		/* waiting for a wakeup? */
		if(r == nil)
			break;	/* no */

		/* try for the second lock */
		if(canlock(&r->lk)){
			if(p->state != Wakeme || r->p != p)
				panic("postnote: state %d %d %d", r->p != p, p->r != r, p->state);
			p->r = nil;
			r->p = nil;
			ready(p);
			unlock(&r->lk);
			break;
		}

		/* give other process time to get out of critical section and try again */
		unlock(&p->rlock);
		sched();
	}
	unlock(&p->rlock);

	if(p->state != Rendezvous)
		return ret;

	/* Try and pull out of a rendezvous */
	lock(&p->rgrp->ref.lk);
	if(p->state == Rendezvous) {
		p->rendval = ~0;
		l = &REND(p->rgrp, p->rendtag);
		for(d = *l; d; d = d->rendhash) {
			if(d == p) {
				*l = p->rendhash;
				break;
			}
			l = &d->rendhash;
		}
		ready(p);
	}
	unlock(&p->rgrp->ref.lk);
	return ret;
}

void
pexit(char *exitstr, int freemem)
{
	Proc *p;
	long utime, stime;
	Waitq *wq, *f, *next;
	Fgrp *fgrp;
	Egrp *egrp;
	Rgrp *rgrp;
	Pgrp *pgrp;
	Chan *dot;

	if(up->die)
		(*up->die)(up, exitstr);

	up->alarm = 0;

	/* nil out all the resources under lock (free later) */
	qlock(&up->debug);
	fgrp = up->fgrp;
	up->fgrp = nil;
	egrp = up->egrp;
	up->egrp = nil;
	rgrp = up->rgrp;
	up->rgrp = nil;
	pgrp = up->pgrp;
	up->pgrp = nil;
	dot = up->dot;
	up->dot = nil;
	qunlock(&up->debug);

	if(fgrp)
		closefgrp(fgrp);
	if(egrp)
		closeegrp(egrp);
	if(rgrp)
		closergrp(rgrp);
	if(dot)
		cclose(dot);
	if(pgrp)
		closepgrp(pgrp);

	/*
	 * if not a kernel process and have a parent,
	 * do some housekeeping.
	 */
	if(up->kp == 0) {
		p = up->parent;
		if(p == 0) {
			if(exitstr == 0)
				exitstr = "unknown";
			panic("boot process died: %s", exitstr);
		}

		while(waserror())
			;

		wq = smalloc(sizeof(Waitq));
		poperror();

		wq->w.pid = up->pid;
		wq->w.time[TUser] = utime = up->time[TUser] + up->time[TCUser];
		wq->w.time[TSys] = stime = up->time[TSys] + up->time[TCSys];
		wq->w.time[TReal] = TK2MS(machp0->ticks - up->time[TReal]);
		if(exitstr && exitstr[0])
			snprint(wq->w.msg, sizeof(wq->w.msg), "%s %lud: %s", up->text, up->pid, exitstr);
		else
			wq->w.msg[0] = '\0';

		lock(&p->exl);
		/*
		 *  If my parent is no longer alive, or if there would be more
		 *  than 128 zombie child processes for my parent, then don't
		 *  leave a wait record behind.  This helps prevent badly
		 *  written daemon processes from accumulating lots of wait
		 *  records.
		 */
		if(p->pid == up->parentpid && p->state != Broken && p->nwait < 128) {
			p->nchild--;
			p->time[TCUser] += utime;
			p->time[TCSys] += stime;

			wq->next = p->waitq;
			p->waitq = wq;
			p->nwait++;

			rendwakeup(&p->waitr);
			unlock(&p->exl);
		}
		else {
			unlock(&p->exl);
			free(wq);
		}
	}

//	if(!freemem)
//		addbroken(up);

	lock(&up->exl);		/* Prevent my children from leaving waits */
	pidunhash(up);
	up->pid = 0;
	rendwakeup(&up->waitr);
	unlock(&up->exl);

	for(f = up->waitq; f; f = next) {
		next = f->next;
		free(f);
	}

	/* release debuggers */
	qlock(&up->debug);
	if(up->pdbg) {
		rendwakeup(&up->pdbg->sleep);
		up->pdbg = 0;
	}
	qunlock(&up->debug);

	/* Sched must not loop for these locks */
	lock(&procalloc.lk);

	up->state = Moribund;
	sched();
	panic("pexit");
}

int
haswaitq(void *x)
{
	Proc *p;

	p = (Proc *)x;
	return p->waitq != 0;
}

ulong
pwait(Waitmsg *w)
{
	ulong cpid;
	Waitq *wq;

	if(!canqlock(&up->qwaitr))
		error(Einuse);

	if(waserror()) {
		qunlock(&up->qwaitr);
		nexterror();
	}

	lock(&up->exl);
	if(up->nchild == 0 && up->waitq == 0) {
		unlock(&up->exl);
		error(Enochild);
	}
	unlock(&up->exl);

	rendsleep(&up->waitr, haswaitq, up);

	lock(&up->exl);
	wq = up->waitq;
	up->waitq = wq->next;
	up->nwait--;
	unlock(&up->exl);

	qunlock(&up->qwaitr);
	poperror();

	if(w)
		memmove(w, &wq->w, sizeof(Waitmsg));
	cpid = wq->w.pid;
	free(wq);
	return cpid;
}

Proc*
proctab(int i)
{
	return procalloc.arena[i];
}

void
dumpaproc(Proc *p)
{
	char *s;

	if(p == 0)
		return;

	s = p->psstate;
	if(s == 0)
		s = statename[p->state];
	print("%3lud:%10s pc %8lux  %8s (%s) ut %ld st %ld bss %lux qpc %lux\n",
		p->pid, p->text, p->pc, s, statename[p->state],
		p->time[0], p->time[1], p->bss, p->qpc);
}

void
procdump(void)
{
	int i;
	Proc *p;

	if(up)
		print("up %lud\n", up->pid);
	else
		print("no current process\n");
	for(i=0; i<procalloc.nproc; i++) {
		p = procalloc.arena[i];
		if(p->state == Dead)
			continue;
		dumpaproc(p);
	}
}

void
scheddump(void)
{
	Proc *p;
	Schedq *rq;

	for(rq = &runq[Nrq-1]; rq >= runq; rq--){
		if(rq->head == 0)
			continue;
		print("rq%ld:", rq-runq);
		for(p = rq->head; p; p = p->rnext)
			print(" %lud(%lud, %lud)", p->pid, M->ticks - p->readytime,
				machp0->ticks - p->movetime);
		print("\n");
	}
	print("nrdy %d\n", nrdy);
}

void
kproc(char *name, void (*func)(void *), void *arg)
{
	Proc *p;
	static Pgrp *kpgrp;

	p = newproc();
	p->psstate = 0;
	p->procmode = 0640;
	p->kp = 1;

	p->nerrlab = 0;
	p->slash = up->slash;
	p->dot = up->dot;
	incref(&p->dot->ref);

	memmove(p->note, up->note, sizeof(p->note));
	p->nnote = up->nnote;
	p->notified = 0;
	p->lastnote = up->lastnote;
	p->notify = up->notify;
	p->ureg = 0;
	p->dbgreg = 0;

	p->basepri = PriKproc;
	p->priority = p->basepri;

	kprocchild(p, func, arg);

	kstrdup(&p->user, eve);
	kstrdup(&p->text, name);
	if(kpgrp == 0)
		kpgrp = newpgrp();
	p->pgrp = kpgrp;
	incref(&kpgrp->ref);

	memset(p->time, 0, sizeof(p->time));
	p->time[TReal] = machp0->ticks;
	ready(p);
}

/*
 *  called splhi() by notify().  See comment in notify for the
 *  reasoning.
 */
void
procctl(Proc *p)
{
	char *state;

	switch(p->procctl) {
	case Proc_exitbig:
		pexit("Killed: Insufficient physical memory", 1);

	case Proc_exitme:
		pexit("Killed", 1);

	case Proc_traceme:
		if(p->nnote == 0)
			return;
		/* No break */

	case Proc_stopme:
		p->procctl = 0;
		state = p->psstate;
		p->psstate = "Stopped";
		/* free a waiting debugger */
		qlock(&p->debug);
		if(p->pdbg) {
			rendwakeup(&p->pdbg->sleep);
			p->pdbg = 0;
		}
		qunlock(&p->debug);
		p->state = Stopped;
		sched();
		p->psstate = state;
		return;
	}
}

void
error(char *err)
{
	kstrcpy(up->errstr, err, ERRMAX);
	nexterror();
}

void
nexterror(void)
{
	gotolabel(&up->errlab[--up->nerrlab]);
}

void
exhausted(char *resource)
{
	char buf[ERRMAX];

	sprint(buf, "no free %s", resource);
	print("%s\n", buf);
	error(buf);
}

/*
 *  change ownership to 'new' of all processes owned by 'old'.  Used when
 *  eve changes.
 */
void
renameuser(char *old, char *new)
{
	Proc **lp, **ep, *p;

	ep = procalloc.arena+procalloc.nproc;
	for(lp = procalloc.arena; lp < ep; lp++){
		p = *lp;
		if(p->user!=nil && strcmp(old, p->user)==0)
			kstrdup(&p->user, new);
	}
}

/*
 *  time accounting called by clock() splhi'd
 */
void
accounttime(void)
{
	Proc *p;
	int n;
	static int nrun;

	p = M->proc;
	if(p) {
		nrun++;
		p->time[p->insyscall]++;
	}

	/* only one processor gets to compute system load averages */
	if(M->machno != 0)
		return;

	/* calculate decaying load average */
	n = nrun;
	nrun = 0;

	n = (nrdy+n)*1000;
	M->load = (M->load*19+n)/20;
}

static void
pidhash(Proc *p)
{
	int h;

	h = p->pid % nelem(procalloc.ht);
	lock(&procalloc.lk);
	p->pidhash = procalloc.ht[h];
	procalloc.ht[h] = p;
	unlock(&procalloc.lk);
}

static void
pidunhash(Proc *p)
{
	int h;
	Proc **l;

	h = p->pid % nelem(procalloc.ht);
	lock(&procalloc.lk);
	for(l = &procalloc.ht[h]; *l != nil; l = &(*l)->pidhash)
		if(*l == p){
			*l = p->pidhash;
			break;
		}
	unlock(&procalloc.lk);
}

int
procindex(ulong pid)
{
	Proc *p;
	int h;
	int s;

	s = -1;
	h = pid % nelem(procalloc.ht);
	lock(&procalloc.lk);
	for(p = procalloc.ht[h]; p != nil; p = p->pidhash)
		if(p->pid == pid){
			s = p->index;
			break;
		}
	unlock(&procalloc.lk);
	return s;
}


syntax highlighted by Code2HTML, v. 0.9.1