===================================================================
RCS file: RCS/Configfile,v
retrieving revision 1.1
diff -r1.1 Configfile
101a102,110
>     choice SCHED_FCFS "FCFS Scheduler" {
> 	display(OSsem)},
>     choice SCHED_SIMPLE  "Round-robin Scheduler" {
> 	display(OSsem)},
>     choice SCHED_PRIORITY  "Priority-flow Scheduler" {
> 	display(OSsem)}
> }
> 
> menu OSsem {
===================================================================
RCS file: RCS/OS.param,v
retrieving revision 1.1
diff -r1.1 OS.param
13a14,16
> #define SCHED_FCFS
> #undef SCHED_SIMPLE
> #undef SCHED_PRIORITY
===================================================================
RCS file: RCS/ParamHelp,v
retrieving revision 1.1
diff -r1.1 ParamHelp
125a126,134
> %SCHED_FCFS
> A first-come, first-served scheduler for runtime threads. 
> Threads run until they block or exit, and are kept in order.
> %SCHED_PRIORITY
> A simple priority-flow scheduler for runtime threads, which exploits
> randomization to "fairly" allocate time slices to threads.
> %SCHED_SIMPLE
> A simple round-robin scheduler for runtime threads. After the quantum
> expires, the next thread is chosen. However, they may not be kept in order. 
===================================================================
RCS file: RCS/SimMake,v
retrieving revision 1.1
diff -r1.1 SimMake
20c20
<  * $Header: /a/quimby.dartmouth.edu/usr/wildcat/dfk/research/parallel/proteus/proteus-V3beta/engine/RCS/SimMake,v 1.1 92/12/08 13:38:17 dfk Exp $
---
>  * $Header: /a/quimby.dartmouth.edu/usr/wildcat/dfk/research/parallel/proteus/engine/RCS/SimMake,v 1.2 92/12/09 13:43:30 dfk Exp Locker: dfk $
21a22,25
> # Revision 1.2  92/12/09  13:43:30  dfk
> # after my own personal changes
> # eg, adding scheduler stuff
> # 
92,93c96
< /* default scheduler and user libraries */
< SCHEDULER = rt_simple_sched
---
> /* default user libraries */
103c106
< LIBDIRS = -L/usr/local/lib/sim2
---
> LIBDIRS = -L/u/dfk/research/parallel/proteus/lib
111c114
< /* source list for saber C */
---
> /* source list for saber C, not including rt_*_sched.c */
117c120
< 	intr_queue.c snapshot.c net.c ihandler.c timer.c $(SCHEDULER).ca \
---
> 	intr_queue.c snapshot.c net.c ihandler.c timer.c rt_sched.ca \
125c128
< 	intr_queue.o snapshot.o net.o ihandler.o timer.o $(SCHEDULER).o \
---
> 	intr_queue.o snapshot.o net.o ihandler.o timer.o rt_sched.o \
195c198
< 	/bin/rm -f *.o $(SIMULATOR) useripi.c *.is
---
> 	/bin/rm -f *.o $(SIMULATOR) useripi.c *.is initshared initshared.new
209a213
> /* we're careful not to touch initshared unless there are changes */
211c215,216
< 	catis $(userobj) | sort -u | awk -f mk_initshared > initshared
---
> 	catis $(userobj) | sort -u | awk -f mk_initshared > initshared.new
> 	-cmp -s initshared initshared.new || mv initshared.new initshared
356,360c361,362
< rt_simple_sched.o: user.h sim.h /usr/include/stdio.h
< rt_simple_sched.o: /usr/include/ansi_compat.h /usr/include/assert.h
< rt_simple_sched.o: prototypes.h conf.param rt_thread_def.h OS.param
< rt_simple_sched.o: monitorswitch.h cache.h cache.param shmem.h mem.param
< rt_simple_sched.o: sema.h thread.h
---
> rt_sched.o: rt_sched.ca rt_FCFS_sched.ca rt_simple_sched.ca
> rt_sched.o: rt_priority_sched.ca $(USERH) simreq.h OS.param
===================================================================
RCS file: RCS/SimQuantum.S,v
retrieving revision 1.1
diff -r1.1 SimQuantum.S
6c6
<  * $Header: /a/quimby.dartmouth.edu/usr/wildcat/dfk/research/parallel/proteus/proteus-V3beta/engine/RCS/SimQuantum.S,v 1.1 92/12/14 10:03:46 dfk Exp $
---
>  * $Header: /a/quimby.dartmouth.edu/usr/wildcat/dfk/research/parallel/proteus/engine/RCS/SimQuantum.S,v 1.2 92/12/14 10:03:56 dfk Exp $
8,9c8,9
< Revision 1.1  92/12/14  10:03:46  dfk
< Initial revision
---
> Revision 1.2  92/12/14  10:03:56  dfk
> new version from Eric
10a11,14
>  * Revision 1.8  92/12/13  18:41:14  brewer
>  * Added room for argument build area in stack allocation for SimQuantum
>  * and for SimStack.
>  * 
43,46c47,50
< 	subu	$sp, 16		# add 16 instead of 12 to account for
< 	sw	$4, 0($sp)	#   $31 saved before call to SimQuantum
< 	sw	$5, 4($sp)	# save registers $4, $5, and $31
<  	sw	$31, 8($sp)
---
> 	subu	$sp, 24		# add 24 instead of 20 to account for
> 	sw	$4, 8($sp)	#   $31 saved before call to SimQuantum
> 	sw	$5, 12($sp)	# save registers $4, $5, and $31
>  	sw	$31, 16($sp)
48c52
< 	.frame	$sp, 16, $31
---
> 	.frame	$sp, 24, $31
56,58c60,62
< 	lw	$4, 0($sp)	# restore registers
< 	lw	$5, 4($sp)
<  	lw	$31, 8($sp)
---
> 	lw	$4, 8($sp)	# restore registers
> 	lw	$5, 12($sp)
>  	lw	$31, 16($sp)
60c64
< 	addu	$sp, 16		# restore stack pointer
---
> 	addu	$sp, 24		# restore stack pointer
72,73c76,77
<   	subu	$sp, 4
< 	sw	$31, 0($sp)
---
>   	subu	$sp, 24
> 	sw	$31, 20($sp)
75c79
< 	.frame	$sp, 4, $31
---
> 	.frame	$sp, 24, $31
78,79c82,83
< 	lw	$31, 0($sp)
< 	addu	$sp, 4	
---
> 	lw	$31, 20($sp)
> 	addu	$sp, 24	
82d85
< 
===================================================================
RCS file: RCS/UserMake,v
rcsdiff error: Can't open UserMake
===================================================================
RCS file: RCS/cache.param,v
retrieving revision 1.1
diff -r1.1 cache.param
===================================================================
RCS file: RCS/conf.param,v
retrieving revision 1.1
diff -r1.1 conf.param
===================================================================
RCS file: RCS/event.h,v
retrieving revision 1.1
diff -r1.1 event.h
40c40
<  * $Header: /a/quimby.dartmouth.edu/usr/wildcat/dfk/research/parallel/proteus/proteus-V3beta/engine/RCS/event.h,v 1.1 92/12/09 17:35:44 dfk Exp Locker: dfk $
---
>  * $Header: /a/quimby.dartmouth.edu/usr/wildcat/dfk/research/parallel/proteus/engine/RCS/event.h,v 1.1 92/12/09 17:35:44 dfk Exp Locker: dfk $
112a113,116
> #ifndef _CA_			/* if not a .ca file, ignore cycle counting stuff */
> #define CYCLE_COUNTING_ON
> #define CYCLE_COUNTING_OFF
> #endif _CA_
156,157c160,164
< #define ATEVENT(kind, index_and_value, time) \
<   AtEvent(kind, index_and_value, time)
---
> #define ATEVENT(kind, index_and_value, time) { \
>   CYCLE_COUNTING_OFF; \
>   AtEvent(kind, index_and_value, time); \
>   CYCLE_COUNTING_ON; \
> }
159,163c166,173
< #define SUM_STAT(stat, val, time) \
<     { stats_[stat].value += val; \
<       if (++(stats_[stat].count) >= stats_[stat].update) {\
< 	write_stat(stat, time); \
<         stats_[stat].value = 0; } \
---
> #define SUM_STAT(stat, val, time) { \
>   CYCLE_COUNTING_OFF; \
>   stats_[stat].value += val; \
>   if (++(stats_[stat].count) >= stats_[stat].update) {\
>      write_stat(stat, time); \
>      stats_[stat].value = 0; \
>   } \
>   CYCLE_COUNTING_ON; \
166,170c176,183
< #define SUM_ARRAY_STAT(stat, index, val, time) \
<     { astats_[stat].value[index] += val; \
<       if (++(astats_[stat].count[index]) >= astats_[stat].update) {\
< 	  write_array_stat(stat, index, time); \
< 	  astats_[stat].value[index] = 0; } \
---
> #define SUM_ARRAY_STAT(stat, index, val, time) { \
>   CYCLE_COUNTING_OFF; \
>   astats_[stat].value[index] += val; \
>   if (++(astats_[stat].count[index]) >= astats_[stat].update) {\
>      write_array_stat(stat, index, time); \
>      astats_[stat].value[index] = 0; \
>   } \
>   CYCLE_COUNTING_ON; \
180,181c193,197
< #define SET_METRIC(metric, val) (metrics_[metric].value = (float)(val))
< #define SET_ARRAY_METRIC(m, i, val) (ametrics_[m].value[i] = (float)(val))
---
> #define SET_METRIC(metric, val) { \
>   CYCLE_COUNTING_OFF; \
>   metrics_[metric].value = (float)(val); \
>   CYCLE_COUNTING_ON; \
> }
183c199,203
< #define SUM_METRIC(metric, val) sum_metric_(metric, (double)(val))
---
> #define SET_ARRAY_METRIC(m, i, val) { \
>   CYCLE_COUNTING_OFF; \
>   ametrics_[m].value[i] = (float)(val); \
>   CYCLE_COUNTING_ON; \
> }
185,186c205,209
< #define SUM_ARRAY_METRIC(metric, index, val) \
<     sum_array_metric_(metric, index, (double)(val))
---
> #define SUM_METRIC(metric, val) { \
>   CYCLE_COUNTING_OFF; \
>   sum_metric_(metric, (double)(val)); \
>   CYCLE_COUNTING_ON; \
> }
187a211,216
> #define SUM_ARRAY_METRIC(metric, index, val) { \
>   CYCLE_COUNTING_OFF; \
>   sum_array_metric_(metric, index, (double)(val)); \
>   CYCLE_COUNTING_ON; \
> }
> 
190,191c219,223
< #define AVG_METRIC(m, val) \
<     avg_metric_(m, (double)(val))
---
> #define AVG_METRIC(m, val) { \
>   CYCLE_COUNTING_OFF; \
>   avg_metric_(m, (double)(val)); \
>   CYCLE_COUNTING_ON; \
> }
193,194c225,229
< #define AVG_ARRAY_METRIC(m, i, val) \
<     avg_array_metric_(m, i, (double)(val))
---
> #define AVG_ARRAY_METRIC(m, i, val) { \
>   CYCLE_COUNTING_OFF; \
>   avg_array_metric_(m, i, (double)(val)); \
>   CYCLE_COUNTING_ON; \
> }
201,202c236,240
< #define STDDEV_METRIC(metric, avg_metric, val) \
<     stddev_metric_(metric, avg_metric, (double)(val))
---
> #define STDDEV_METRIC(metric, avg_metric, val) { \
>   CYCLE_COUNTING_OFF; \
>   stddev_metric_(metric, avg_metric, (double)(val)); \
>   CYCLE_COUNTING_ON; \
> }
204,205c242,246
< #define STDDEV_ARRAY_METRIC(metric, avg_met, i, val) \
<     stddev_array_metric_(metric, avg_met, i, (double)(val))
---
> #define STDDEV_ARRAY_METRIC(metric, avg_met, i, val) { \
>   CYCLE_COUNTING_OFF; \
>   stddev_array_metric_(metric, avg_met, i, (double)(val)); \
>   CYCLE_COUNTING_ON; \
> }
===================================================================
RCS file: RCS/interrupt.param,v
retrieving revision 1.1
diff -r1.1 interrupt.param
===================================================================
RCS file: RCS/ipi.param,v
retrieving revision 1.1
diff -r1.1 ipi.param
===================================================================
RCS file: RCS/main.c,v
retrieving revision 1.1
diff -r1.1 main.c
13c13
<  * Last Modified:  $Date: 92/12/11 19:17:54 $ ($Author)
---
>  * Last Modified:  $Date: 92/12/12 12:40:38 $ ($Author)
41c41
<  * $Header: /a/quimby.dartmouth.edu/usr/wildcat/dfk/research/parallel/proteus/proteus-V3beta/engine/RCS/main.c,v 1.1 92/12/11 19:17:54 dfk Exp $
---
>  * $Header: /a/quimby.dartmouth.edu/usr/wildcat/dfk/research/parallel/proteus/engine/RCS/main.c,v 1.2 92/12/12 12:40:38 dfk Exp $
42a43,46
>  * Revision 1.2  92/12/12  12:40:38  dfk
>  * fixed the long_to_string function to make it deal with 
>  * unsigned numbers, which are of course what it was trying to print.
>  * 
120c124
< static char *long_to_string(long number, char *buffer)
---
> static char *ulong_to_string(ulong number, char *buffer)
126c130
<     sprintf(tmp, "%ld", number);
---
>     sprintf(tmp, "%lu", number);
412c416
< 	long cyc_per_sec;
---
> 	ulong cyc_per_sec;
418c422
< 	       long_to_string((long)mt, tmp), NO_OF_PROCESSORS);
---
> 	       ulong_to_string((ulong)mt, tmp), NO_OF_PROCESSORS);
421c425
< 	       long_to_string((long)rt, tmp));
---
> 	       ulong_to_string((ulong)rt, tmp));
425c429
< 	       long_to_string(cyc_per_sec, tmp));
---
> 	       ulong_to_string(cyc_per_sec, tmp));
===================================================================
RCS file: RCS/mem.param,v
retrieving revision 1.1
diff -r1.1 mem.param
===================================================================
RCS file: RCS/net.param,v
retrieving revision 1.1
diff -r1.1 net.param
5,6c5,6
< #define N_UNIDIR
< #undef N_BIDIR
---
> #undef N_UNIDIR
> #define N_BIDIR
===================================================================
RCS file: RCS/prototypes.h,v
retrieving revision 1.1
diff -r1.1 prototypes.h
12c12
<  * Last Modified: $Date: 92/12/11 16:08:18 $
---
>  * Last Modified: $Date: 92/12/11 16:36:44 $
39c39
<  * $Header: /a/quimby.dartmouth.edu/usr/wildcat/dfk/research/parallel/proteus/proteus-V3beta/engine/RCS/prototypes.h,v 1.1 92/12/11 16:08:18 dfk Exp $
---
>  * $Header: /a/quimby.dartmouth.edu/usr/wildcat/dfk/research/parallel/proteus/engine/RCS/prototypes.h,v 1.2 92/12/11 16:36:44 dfk Exp $
40a41,51
>  * Revision 1.2  92/12/11  16:36:44  dfk
>  * changed the sleeping-thread system to be much more efficient.
>  * If a processor is idle, it will skip ahead to the wakeup time
>  * for the first sleeping thread.
>  * It runs MUCH faster than spinning on the TIMER_PERIOD method.
>  * Questions:
>  *   how does it interact with preemptive scheduling? It should work.
>  *   how does it interact with semaphores, which also use timer requests
>  *   does it work with multiple sleeping threads? it should
>  *   does it work when a new thread is creating on a sleeping processor? should.
>  * 
364c375
< void _timer_handler(void);
---
> void _timer_handler(int processor, int currtid, Time currenttime);
===================================================================
RCS file: RCS/queens.ca,v
rcsdiff error: Can't open queens.ca
===================================================================
RCS file: RCS/rt_FCFS_sched.ca,v
retrieving revision 1.1
diff -r1.1 rt_FCFS_sched.ca
1c1
< /* $Id: rt_FCFS_sched.ca,v 1.1 92/12/09 14:13:58 dfk Exp $
---
> /* $Id: rt_FCFS_sched.ca,v 1.2 92/12/09 15:23:37 dfk Exp $
8a9,12
>  *   9-Dec-92 DFK:
>  *        streamlined by removing priority
>  *        streamlined by removing Choose_Node
>  *        optimized choose_node by defining Current_Thread
22,23c26
<  *   information in making scheduling decisions, but does keep 
<  *   track of the total amount of priority for each processor.
---
>  *   information in making scheduling decisions.
28,30c31,35
<  *   stored in the array. Second, sched_choose just returns the Current_Node,
<  *   since the same job stays running until it has been removed. Thus, we
<  *   must be careful in sched_remove when removing the current node.
---
>  *   stored in the array. Second, sched_choose just returns the current
>  *   thread, since the same job stays running until it has been removed. 
>  *   Note that the current node is always the first one on the list, so
>  *   we don't need a separate variable Choose_node; it is just the one 
>  *   after the dummy-head-node, if any. 
31a37,43
>  *   However, since sched_choose is the common case, I add Current_Thread, 
>  *   maintained by insert and remove to point to the current thread, if any.
>  *   This optimizes sched_choose by 2 cycles, down to 5 cycles, but costs
>  *   5 cycles in each call to sched_insert or sched_remove. So sched_choose
>  *   must be called 5 times for every insert/remove pair, for this to
>  *   be worthwhile. It is probably true for longish threads.
>  *
85d96
<   long priority;
99,102c110,111
<  * Head_Node at a dummy node, and insert new nodes before that. 
<  * If Choose_Node is the dummy node, the list is empty. When the 
<  * Choose_Node is removed, we advance it along the list to the next 
<  * non-dummy node. We also keep unused nodes in a free list.
---
>  * Head_Node at a dummy node, and insert new nodes before that. The currently
>  * active thread is Head_Node->next.  We keep unused nodes in a free list.
105d113
<   long priority;
116,117c124
<  * Node_Sum		is the sum of the priorities for all nodes in Node.
<  * Choose_Node		is a pointer to the "current" node in sched_choose().
---
>  * Current_Thread	is a pointer to the currently executing thread
132,133c139
<   long node_sum;
<   Sched_Node *choose_node;
---
>   Thread *current_thread;
142,143c148
< #define Node_Sum		(Global->node_sum)
< #define Choose_Node		(Global->choose_node)
---
> #define Current_Thread	(Global->current_thread)
186,187c191
< 	   Choose_Node = node;	/* point chooser at dummy */
< 	   Node_Sum  = 0;		/* 0 sum priorities */
---
> 	   Current_Thread = NULL; /* initially, no thread running */
198c202
<     /* Now both Head_Node and Choose_Node are pointing at the
---
>     /* Now Head_Node is pointing at the
200c204
< 	* itself. Free_Node points at the first free node.
---
> 	* itself. Free_Node points at the first free node. Current_Thread=NULL.
233,234c237
< 			    "sched_insert: t =  %x, pri = %ld\n",
< 			    t, t->priority);
---
> 			    "sched_insert: t =  %x\n", t);
247d249
<     node->priority = t->priority;
255,257d256
<     /* update cumulative node priority */
<     Node_Sum += t->priority;
< 
264,266c263,266
<     /* If Choose_Node was at the dummy, move it up */
<     if (Choose_Node->thread == NULL)
< 	 Choose_Node = Choose_Node->next;
---
>     /* The current thread won't change if the list wasn't empty,
> 	* but it is easier to make the assignment than to make the test.
> 	*/
>     Current_Thread = Head_Node->next->thread;
294,297d293
<     /* Is it the current node? Then change current node */
<     if (Choose_Node == node) 
< 	 Choose_Node = Choose_Node->next;
< 
313,314c309,313
<     /* update cumulative node priority */
<     Node_Sum -= t->priority;
---
> 
>     /* The current thread only changes if node was current,
> 	* but it is easier to make the assignment than to make the test.
> 	*/
>     Current_Thread = Head_Node->next->thread;
319,330c318
<     /*
< 	* requires: (t->priority + delta) > 0
< 	* modifies: t, Node_Sum (global), Sched_Node associated with t (global)
< 	* effects:  Changes the priority of thread t by delta.
< 	*
< 	*/
< 
<     /* update cumulative node priority */
<     Node_Sum += delta;
< 
<     /* update priority for sched node associated with t */
<     ((Sched_Node *) (t->sched_node))->priority += delta;
---
>     /* we don't use priority at all, so this is empty */
340,342c328,331
< 	*  	  	  the same thing. If the list is empty, Choose_Node
< 	*  	  	  points to the dummy, which has thread==NULL, so we
< 	*  	  	  return NULL.
---
> 	*  	  	  the same thing. Note that the current thread is just the 
> 	*           one on the front of the list, right after Head_Node.
> 	*           If the list is empty, Head_Node->next == Head_Node, 
> 	*  	  	  and Head_Node->thread==NULL, so we return NULL.
348c337,338
< 			    Choose_Node, Choose_Node-Node, Choose_Node->thread);
---
> 			    Head_Node->next, Head_Node->next - Node, 
> 			    Head_Node->next->thread);
351c341
<     return(Choose_Node->thread);
---
>     return(Current_Thread);
371,373c361,362
<     (void) fprintf(stderr, "Choose_Node = %lx => index %d\n",
< 			    Choose_Node, Choose_Node - Node);
<     (void) fprintf(stderr, "Node_Sum    = %ld\n", Node_Sum);
---
>     (void) fprintf(stderr, "Current_Thread   = %lx, first thread = %lx\n",
> 			    Current_Thread, Head_Node->next->thread);
378c367
< 				 "Node %x => index %d (prev %d, next %d):  thread %x: pri = %ld, cnt = %ld\n",
---
> 				 "Node %x => index %d (prev %d, next %d):  thread %x: cnt = %ld\n",
380c369
< 				 node->thread, node->priority, node->thread->count);
---
> 				 node->thread, node->thread->count);
423d411
<       threads[i].priority = random() % 100;
509,510c497,498
< 		     "I = %d:\tpriority = %ld\texpect = %.3lf\tactual = %ld\n",
< 		     i, threads[i].priority, expected, threads[i].count);
---
> 		     "I = %d:\texpect = %.3lf\tactual = %ld\n",
> 		     i, expected, threads[i].count);
===================================================================
RCS file: RCS/rt_sched.ca,v
retrieving revision 1.1
diff -r1.1 rt_sched.ca
1,3c1,19
< /*
<  * $Header: /a/quimby.dartmouth.edu/usr/wildcat/dfk/research/parallel/proteus/proteus-V3beta/engine/RCS/rt_sched.ca,v 1.1 92/12/08 14:38:18 dfk Exp $
<  *
---
> 
> /****************************************************************************
>  *                                                                          *
>  *                     Parallel Architecture Simulator                      *
>  *                Eric A. Brewer  and  Chris N. Dellarocas                  *
>  *                     Laboratory for Computer Science                      *
>  *                  Massachusetts Institute of Technology                   *
>  *                                                                          *
>  * Module: rt_sched.ca
>  *                                                                          *
>  * Description: chooses one of the schedulers, depending on #defs           *
>  *                                                                          *
>  * Last Modified: 11/26/92                                                  *
>  * Global Functions: see the scheduler files                                *
>  *                                                                          *
>  * Global Variables: see the scheduler files                                *
>  *                                                                          *
>  ****************************************************************************
>  * $Header: /a/quimby.dartmouth.edu/usr/wildcat/dfk/research/parallel/proteus/engine/RCS/rt_sched.ca,v 1.2 92/12/08 14:39:35 dfk Exp $
5,6c21,22
< Revision 1.1  92/12/08  14:38:18  dfk
< Initial revision
---
> Revision 1.2  92/12/08  14:39:35  dfk
> now just a switch for all schedulers, depending on parameters
8,16d23
< Revision 1.2  92/04/02  13:19:37  brewer
< removed dependence on rt_rts.h
< cleaned up gcc warnings
< 
< Revision 1.1  92/02/11  16:07:30  brewer
< Initial revision
< 
<  * Revision 1.2  91/04/04  16:42:50  carl
<  * modified to conform to heinous naming conventions (don't blame me)
18,107c25
<  * Revision 1.1  91/04/04  16:31:49  carl
<  * Initial revision
<  * 
<  *
<  * Project:	PSG Runtime System Prototype
<  * Module:	Lottery Scheduler, Ready List
<  * Author:	Carl Waldspurger
<  * History:
<  *
<  *	04-Apr-91	Modified to user simulator per-processor "globals".
<  *			Added RCS Header.
<  *	01-Apr-91	Tested (it works), and documented.
<  *	28-Mar-91	Development.
<  *	27-Mar-91	Started.
<  *
<  * Overview:
<  *
<  *   This module is an ANSI-C implementation of a local "priority flow"
<  *   scheduler.  The implementation exploits randomization to "fairly"
<  *   allocate time slices to threads; i.e. the fraction of time allocated
<  *   to thread T should approximate (T.priority) / TotalPriority, where
<  *   TotalPriority is the sum of T.priority over all threads T.
<  *
<  *   The underlying ideas are based on my internal PSG note entitled
<  *   "Priority Flow: A Framework For Abstract, Adaptive Reource Management",
<  *   May 23, 1990.
<  *
<  *   The scheduler exports the following operations:
<  *
<  *	void sched_initialize()
<  *	void sched_insert(Thread *t)
<  *	void sched_remove(Thread *t)
<  *	void sched_change_priority(Thread *t, long delta)
<  *	Thread *sched_choose()
<  *
<  *   The following description of the "choose next thread" operation 
<  *   sched_choose() should help in deciphering the code:
<  *
<  *	First, a random number between 0 and (TotalPriority - 1) is generated.
<  *	This random number serves as a "priority index" into the "ready list"
<  *      of threads waiting to execute.  Conceptually, the ready list is
<  *	traversed sequentially, and a running sum of the priorities
<  *      of visited threads is maintained.  When this running sum reaches
<  *      the randomly chosen priority "index", the thread at that index
<  *      is selected as the next to be executed.
<  *
<  *	To avoid O(n) traversals of a linear "ready list", a more suitable
<  *      data structure is used to obtain O(lg N) times for all of the 
<  *      scheduler operations.  An array is used to represent a tree of
<  *      partial sums; the implementation technique similar to that used
<  *      for heaps represented by arrays.
<  *
<  *      For node N, N.sum represents the sum of the priorities for all of
<  *      the threads that N covers in the tree.  Scheduler operations that
<  *      alter, insert, or remove a thread T must update the nodes that are
<  *      ancestors of the (leaf) node for T.  Note that the leaves of the
<  *	tree are unordered, so complicated tree-balancing schemes are
<  *      unnecessary.
<  *
<  *   The following compile-time flags are provided to facilitate debugging:
<  *
<  *	DEBUG		Debugging information output to Debug_Stream.
<  *		        A test "main" program is generated.
<  *
<  *	DEBUG_INSERT	Additional debugging info for sched_insert().
<  *	DEBUG_REMOVE	Additional debugging info for sched_remove().
<  *	DEBUG_CHOOSE	Additional debugging info for sched_choose().
<  *
<  * Caveats:
<  *
<  *   (1) This implementation is not as straightforward as one might like.
<  *   This is due to the heavy use of macro-expansion in order to
<  *   improve efficiency via (manual) inlining and loop unrolling.
<  *   I guess a good optimizing compiler that supports "inlines" is just
<  *   too much to ask for.  Sigh.
<  *
<  *   (2) This implementation supports up to 256 active threads.
<  *   The limitation was imposed to simplify the code; it can be trivially
<  *   modified to support a larger static limit.  Alternatively, the
<  *   hard limit could be softened by dynamically expanding the data
<  *   structure (currently a statically allocated array) when the limit
<  *   is exceeded.  For the prototype runtime system, a hard limit seems
<  *   reasonable.
<  *
<  *   (3) Recommendations on improvements would be much appreciated.
<  *
<  *   (4) Don't blame the naming conventions on me, but rather on the
<  *   tyranny of the majority.
<  *
<  */
---
>  ****************************************************************************/
108a27
> #include "OS.param"
110,113c29,31
< /*
<  * Inclusions
<  *
<  */
---
> #ifdef SCHED_FCFS
> # include "rt_FCFS_sched.ca"
> #endif
115c33,35
< #include <malloc.h>
---
> #ifdef SCHED_SIMPLE
> # include "rt_simple_sched.ca"
> #endif
117,121c37,39
< #ifdef DEBUG
< #include <stdio.h>
< #else
< #include "user.h"
< #endif DEBUG
---
> #ifdef SCHED_PRIORITY
> # include "rt_priority_sched.ca"
> #endif
123,779d40
< /*
<  * Macros and Constants
<  *
<  *   The tree of partial sums has branching factor BRANCH and 
<  *   height (MAX_LEVEL + 1); levels are zero-based (leaves at level 0).
<  *   Macros are defined for computing the indices of CHILD and PARENT
<  *   nodes.
<  *
<  *   Since C macros are somewhat limited, the actual value of the BRANCH
<  *   and MAX_LEVEL constants often (unfortunately) enters into the code,
<  *   e.g. for loop unrolling.
<  *
<  */
< 
< /*
<  * Tree shape constants and macros.
<  *
<  */
< #define BRANCH		(4)
< #define LG_BRANCH	(2)
< 
< #define MAX_LEVEL	(4)
< #define MAX_SLOTS	(256)
< 
< #define CHILD(index, n)	((index << LG_BRANCH) + n)
< #define PARENT(index)	((index - 1) >> LG_BRANCH)
< 
< /*
<  * ROOTI is the index of the root node for level I.
<  * LEAF0 is the index of the first leaf node.
<  *
<  */
< #define ROOT4		(0)
< #define ROOT3		CHILD(ROOT4, 1)
< #define ROOT2		CHILD(ROOT3, 1)
< #define ROOT1		CHILD(ROOT2, 1)
< #define LEAF0		CHILD(ROOT1, 1)
< 
< /*
<  * NCOVERI is the number of leaf nodes (threads) covered by ROOTI
<  *
<  */
< #define NCOVER4		MAX_SLOTS
< #define NCOVER3		(NCOVER4 >> LG_BRANCH)
< #define NCOVER2		(NCOVER3 >> LG_BRANCH)
< #define NCOVER1		(NCOVER2 >> LG_BRANCH)
< #define NCOVER0		(NCOVER1 >> LG_BRANCH)
< 
< /*
<  * AFTERI is the index of the leaf that follows the last leaf node
<  * covered by ROOTI.
<  *
<  */
< #define AFTER4		(LEAF0 + NCOVER4)
< #define AFTER3		(LEAF0 + NCOVER3)
< #define AFTER2		(LEAF0 + NCOVER2)
< #define AFTER1		(LEAF0 + NCOVER1)
< #define AFTER0		(LEAF0 + NCOVER0)
< 
< #define MAX_NODE	AFTER4
< #define INVALID_NODE	(-1)
< 
< /*
<  * Other useful constants.
<  *
<  */
< 
< #ifdef DEBUG
< #define FALSE		(0)
< #define TRUE		(1)
< #endif DEBUG
< 
< 
< /*
<  * Types
<  *
<  */
< 
< #ifdef DEBUG
< typedef unsigned long ulong;
< 
< typedef struct {
<   ulong priority;
<   int sched_node;
<   int count;
< } Thread;
< #endif DEBUG
< 
< typedef struct {
<   ulong sum;
<   Thread *thread;
< } Sum_Node;
< 
< 
< /*
<  * Global Variables
<  *
<  *   Sched_Node   is the array that represents the tree of partial sums.
<  *   Current_Root is the index of the current root node.
<  *   Next_Node    is the index of the next available (free) node.
<  *   Debug_Stream is a file stream on which debugging output is placed.
<  *
<  *   All globals are private to this module (static).
<  *
<  */
< 
< typedef struct {
<   Sum_Node sched_node[MAX_NODE];
<   int current_root;
<   int next_node;
< } Sched_Global;
< 
< static Sched_Global Globals[NO_OF_PROCESSORS];
< static Sched_Global *Global;
< 
< #define Sched_Node	(Global->sched_node)
< #define Current_Root	(Global->current_root)
< #define Next_Node	(Global->next_node)
< 
< #ifdef DEBUG
< static FILE *Debug_Stream = stderr;
< #endif DEBUG
< 
< /*
<  * Scheduler Operations
<  *
<  */
< 
< void sched_initialize()
< {
<   /*
<    * modifies:  All Globals
<    * effects:   Initializes global scheduler data structures.
<    *
<    */
< 
<   int i, p;
< 
<   /*
<    * Invoke (once) simulator routine to hack per-processor globals.
<    * 
<    * Note that the cycle-counted time will still not accurately reflect
<    * the true costs, since the many references to Sched_Node[C], where
<    * C is a constant, would only really cost a single instruction.
<    * Since this is a pervasive operation, measured running times may
<    * be significantly inflated.
<    *
<    */
<   static int initialized_globals = FALSE;
<   if (!initialized_globals)
<     define_local((char **)&Global, (char *)Globals, sizeof(Sched_Global));
< 
<   for (p = 0; p < NO_OF_PROCESSORS; p++)
<     {
<       Globals[p].current_root = ROOT1;
<       Globals[p].next_node    = LEAF0;
<       
<       for (i = 0; i < MAX_NODE; i++)
< 	{
< 	  Globals[p].sched_node[i].sum = 0;
< 	  Globals[p].sched_node[i].thread = (Thread *) NULL;
< 	}
<     }
< }
< 
< /*
<  * Macros for traversing ancestors.
<  * (Losing C compiler doesn't support inline procedures.)
<  *
<  */
< 
< #define UPDATE_ROOT1(leaf, amount)			\
< {							\
<   Sched_Node[ROOT1].sum += amount;			\
< }
< 
< #define UPDATE_ROOT2(leaf, amount)			\
< {							\
<   Sched_Node[PARENT(leaf)].sum += amount;		\
<   Sched_Node[ROOT2].sum += amount;			\
< }
< 
< #define UPDATE_ROOT3(leaf, amount)			\
< {							\
<   int parent;						\
<   Sched_Node[parent = PARENT(leaf)].sum += amount;	\
<   Sched_Node[PARENT(parent)].sum += amount;		\
<   Sched_Node[ROOT3].sum += amount;			\
< }
< 
< #define UPDATE_ROOT4(leaf, amount)			\
< {							\
<   int parent;						\
<   Sched_Node[parent = PARENT(leaf)].sum += amount;	\
<   Sched_Node[parent = PARENT(parent)].sum += amount;	\
<   Sched_Node[PARENT(parent)].sum += amount;		\
<   Sched_Node[ROOT4].sum += amount;			\
< }
< 
< #define UPDATE_LEAF(leaf, t)				\
< {							\
<   Sched_Node[leaf].sum = t->priority;			\
<   Sched_Node[leaf].thread = t;		 		\
<   /* printf("leaf %d Updating %0x to %d\n", leaf, &((Sched_Node)[(leaf)].thread), t);    \ 
<   ; */ \
<   t->sched_node = leaf;					\
< }
< 
< void sched_insert(Thread *t)
< {
<   /*
<    * modifies:  t, All Globals
<    * effects:   Inserts thread t into the "ready list" of threads waiting
<    *            to execute.  Fails with fatal error iff the hard limit on the
<    *		number of threads (MAX_SLOTS) is exceeded.
<    *
<    */
< /*
<   printf("P%d Inserting thread %d Next_Node %d\n", 
< 	 CURR_PROCESSOR, t->t_stid, Next_Node);
< */
<   /* update leaf node for t */
<   UPDATE_LEAF(Next_Node, t);
< /*
<   printf("Update %0x %d\n",  &(Sched_Node[LEAF0].thread), Sched_Node[LEAF0].thread);
< */  
<   /* update ancestors of leaf node for t */
<   switch (Current_Root) {
<   case ROOT1:
<     if (Next_Node == AFTER1)
<       {
< 	/* expand to use ROOT2 */
< 	Sched_Node[ROOT2].sum = Sched_Node[ROOT1].sum;
< 	UPDATE_ROOT2(AFTER1, t->priority);
< 	Current_Root = ROOT2;
<       }
<     else
<       UPDATE_ROOT1(Next_Node, t->priority);
<     break;
< 
<   case ROOT2:
<     if (Next_Node == AFTER2)
<       {
< 	/* expand to use ROOT3 */
< 	Sched_Node[ROOT3].sum = Sched_Node[ROOT2].sum;
< 	UPDATE_ROOT3(AFTER2, t->priority);
< 	Current_Root = ROOT3;
<       }
<     else
<       UPDATE_ROOT2(Next_Node, t->priority);
<     break;
<     
<   case ROOT3:
<     if (Next_Node == AFTER3)
<       {
< 	/* expand to use ROOT4 */
< 	Sched_Node[ROOT4].sum = Sched_Node[ROOT3].sum;
< 	UPDATE_ROOT4(AFTER3, t->priority);
< 	Current_Root = ROOT4;
<       }
<     else
<       UPDATE_ROOT3(Next_Node, t->priority);
<     break;
< 
<   case ROOT4:
<     /* can't expand: fail if out of slots */
<     if (Next_Node == AFTER4)
<       fatal("sched_insert: exceeded limit of %d threads\n", MAX_SLOTS);
<     else
<       UPDATE_ROOT4(Next_Node, t->priority);
<     break;
<   };
<   
<   /* update Next_Node to reflect insert at Sched_Node[Next_Node] */
<   Next_Node++;
< /*
<   printf("P%d Exiting sched_insert Next_Node %d\n",
< 	 CURR_PROCESSOR, Next_Node);
< */
< }
< 
< void sched_change_priority(Thread *t, long delta)
< {
<   /*
<    * requires:  (t->priority + delta) is nonnegative
<    * modifies:  t, Sched_Node (global)
<    * effects:   Changes the priority of thread t by delta.
<    *
<    */
< 
<   t->priority += delta;
<   Sched_Node[t->sched_node].sum = t->priority;
< 
<   switch (Current_Root) {
<   case ROOT1:
<     UPDATE_ROOT1(t->sched_node, delta);
<     break;
<   case ROOT2:
<     UPDATE_ROOT2(t->sched_node, delta);
<     break;
<   case ROOT3:
<     UPDATE_ROOT3(t->sched_node, delta);
<     break;
<   case ROOT4:
<     UPDATE_ROOT4(t->sched_node, delta);
<     break;
<   }
< }
< 
< void sched_remove(Thread *t)
< {
<   /*
<    * requires:  t is currently managed by the scheduler
<    * modifies:  t, All Globals
<    * effects:   Removes thread t from the "ready list" of threads waiting
<    *		to execute.
<    */
< 
<   /*
<    * The basic idea is to move last used leaf to take place of
<    * the removed node.  This is allowable since the leaves are
<    * unordered.  Thus, we need to update the ancestors of both
<    * the leaf node that was removed, and the leaf node (if any)
<    * that was moved to take its place.
<    *
<    */
< 
<   int lastNode = --Next_Node;
< 
<   switch (Current_Root) {
<   case ROOT1:
<     UPDATE_ROOT1(lastNode, -Sched_Node[lastNode].sum);
<     if (t->sched_node != lastNode)
<       {
< 	UPDATE_ROOT1(t->sched_node, -t->priority + Sched_Node[lastNode].sum);
<         Sched_Node[t->sched_node] = Sched_Node[lastNode];
<         if (Sched_Node[t->sched_node].thread != NULL)
< 	  (Sched_Node[t->sched_node].thread)->sched_node = t->sched_node;
<         else
< 	{
< 	  (Sched_Node[t->sched_node].thread)=(Thread *)malloc(sizeof(Thread));
< 	  (Sched_Node[t->sched_node].thread)->sched_node = t->sched_node;
< 	}
<       }
<     break;
< 
<   case ROOT2:
<     UPDATE_ROOT2(lastNode, -Sched_Node[lastNode].sum);
<     if (t->sched_node != lastNode)
<       {
< 	UPDATE_ROOT2(t->sched_node, -t->priority + Sched_Node[lastNode].sum);
< 	Sched_Node[t->sched_node] = Sched_Node[lastNode];
< 	(Sched_Node[t->sched_node].thread)->sched_node = t->sched_node;
<       }
< 
<     /* contract to use ROOT1 */
<     if (lastNode == AFTER1)
<       Current_Root = ROOT1;
<     break;
< 
<   case ROOT3:
<     UPDATE_ROOT3(lastNode, -Sched_Node[lastNode].sum);
<     if (t->sched_node != lastNode)
<       {
< 	UPDATE_ROOT3(t->sched_node, -t->priority + Sched_Node[lastNode].sum);
< 	Sched_Node[t->sched_node] = Sched_Node[lastNode];
< 	(Sched_Node[t->sched_node].thread)->sched_node = t->sched_node;
<       }
< 
<     /* contract to use ROOT2 */
<     if (lastNode == AFTER2)
<       Current_Root = ROOT2;
<     break;
< 
<   case ROOT4:
<     UPDATE_ROOT4(lastNode, -Sched_Node[lastNode].sum);
<     if (t->sched_node != lastNode)
<       {
< 	UPDATE_ROOT4(t->sched_node, -t->priority + Sched_Node[lastNode].sum);
< 	Sched_Node[t->sched_node] = Sched_Node[lastNode];
< 	(Sched_Node[t->sched_node].thread)->sched_node = t->sched_node;
<       }
< 
<     /* contract to use ROOT3 */
<     if (lastNode == AFTER3)
<       Current_Root = ROOT3;
<     break;
<   };
< 
<   /* removed thread is no longer associated with any node */
<   t->sched_node = INVALID_NODE;
< }
< 
< /*
<  * Macros for performing "priority index" search.
<  * (Losing C compiler doesn't support inline procedures).
<  *
<  */
< 
< #define CHECK_WIN_RETURN(node, running_sum, winning_sum)		\
< {									\
<   /* repeat BRANCH times */						\
<   running_sum += Sched_Node[node].sum;					\
<   if (running_sum >= winning_sum) return(Sched_Node[node].thread);	\
<   running_sum += Sched_Node[node + 1].sum;				\
<   if (running_sum >= winning_sum) return(Sched_Node[node + 1].thread);	\
<   running_sum += Sched_Node[node + 2].sum;				\
<   if (running_sum >= winning_sum) return(Sched_Node[node + 2].thread);	\
<   /* must be last (BRANCHth) node by process of elimination */		\
<   return(Sched_Node[node + (BRANCH - 1)].thread);			\
< }        
< 
< #define CHECK_WIN_INDEX(node, running_sum, winning_sum, i)		\
< {									\
<   do {									\
<     ulong lastSum;							\
<     i = node;								\
< 									\
<     /* repeat BRANCH times */						\
<     lastSum = running_sum;						\
<     running_sum += Sched_Node[i].sum;					\
<     if (running_sum >= winning_sum) { running_sum = lastSum; break; }	\
<     lastSum = running_sum;						\
<     running_sum += Sched_Node[++i].sum;					\
<     if (running_sum >= winning_sum) { running_sum = lastSum; break; }	\
<     lastSum = running_sum;						\
<     running_sum += Sched_Node[++i].sum;					\
<     if (running_sum >= winning_sum) { running_sum = lastSum; break; }	\
<     /* must be last (BRANCHth) node by process of elimination */	\
<     ++i;								\
<   } while (FALSE);							\
< }
< 
< 
< #define mymod(a,b)  ((a) - (int)((a)/(b))*(b))
< 
< Thread *sched_choose()
< {
<   /*
<    * 
<    * modifies:	nothing
<    * effects:	Returns the next thread to run, based on the
<    *		probabilistically fair "priority flow" algorithm
<    *		described in the overview.
<    *
<    */
< 
<   extern ulong fast_random();
< 
<   ulong winning_sum, running_sum;
<   int i;
< /*
<   printf("P%d Entering sched_choose Next_Node = %d.\n", 
< 	 CURR_PROCESSOR, Next_Node);
< */
<   /* special case: no ready threads */
<   if (Next_Node == LEAF0)
<     return((Thread *) NULL);
< /*
<   printf("Returns %d %d\n", Sched_Node[LEAF0].thread,
< 	 Sched_Node[LEAF0].thread == NULL ? 0 : (Sched_Node[LEAF0].thread)->t_stid  );
< */
<   /* special case: a single ready thread */
<   if (Next_Node == (LEAF0 + 1))
<     return(Sched_Node[LEAF0].thread);
< 
<   /* choose random index */
<   winning_sum = fast_random() % Sched_Node[Current_Root].sum;
<   running_sum = 0;
< 
< #ifdef DEBUG_CHOOSE
<   (void) fprintf(Debug_Stream, 
< 		 "in sched_choose: winning_sum = %d\n", winning_sum);
< 
<   /* perform simple sequential scan */
<   for (i = LEAF0; i < Next_Node; i++)
<     {
<       running_sum += Sched_Node[i].sum;
<       if (running_sum >= winning_sum)
< 	break;
<     }
<   
<   (void) fprintf(Debug_Stream, 
< 		 "in sched_choose: chosen node should be %d\n", i);
<   
<   /* reset running_sum for non-debugging operation */
<   running_sum = 0;
< #endif DEBUG_CHOOSE    
< 
<   switch (Current_Root) {
<   case ROOT1:
<     CHECK_WIN_RETURN(LEAF0, running_sum, winning_sum);
<     break;
< 
<   case ROOT2:
<     CHECK_WIN_INDEX(ROOT1, running_sum, winning_sum, i);
<     CHECK_WIN_RETURN(CHILD(i, 1), running_sum, winning_sum);
<     break;
< 
<   case ROOT3:
<     CHECK_WIN_INDEX(ROOT2, running_sum, winning_sum, i);
<     CHECK_WIN_INDEX(CHILD(i, 1), running_sum, winning_sum, i);
<     CHECK_WIN_RETURN(CHILD(i, 1), running_sum, winning_sum);
<     break;
< 
<   case ROOT4:
<     CHECK_WIN_INDEX(ROOT3, running_sum, winning_sum, i);
<     CHECK_WIN_INDEX(CHILD(i, 1), running_sum, winning_sum, i);
<     CHECK_WIN_INDEX(CHILD(i, 1), running_sum, winning_sum, i);
<     CHECK_WIN_RETURN(CHILD(i, 1), running_sum, winning_sum);
<     break;
<   }
<   UNREACHABLE();
<   return((Thread *)NULL); /* unreachable, removes compiler warning */
< }
< 
< /*
<  * Note: The entire remainder of this file contains only
<  *       test code for debugging.
<  *
<  */
< 
< #ifdef DEBUG
< void node_print(FILE *stream, int index)
< {
<   /*
<    * modifies: stream
<    * effects:  Outputs a printed representation of Sched_Node[index] to stream.
<    *
<    */
< 
<   (void) fprintf(stream, "[%d: %d] ", index, Sched_Node[index].sum);
< }
< 
< void nodes_print(FILE *stream, int first, int last)
< {
<   /*
<    * modifies:  stream
<    * effects:	Outputs a printed representation of all nodes
<    *            from Sched_Node[first] to Sched_Node[last].
<    *
<    */
< 
<   int i;
<   for (i = first; i < last; i++)
<     node_print(stream, i);
<   (void) fputc('\n', stream);
< }
< 
< void sched_print(FILE *stream)
< {
<   /*
<    * modifies:	stream
<    * effects:	Outputs a printed representation of the tree of partial
<    *		sums to stream.
<    *
<    */
< 
<   (void) fprintf(stream, "Current_Root = %d\n", Current_Root);
<   (void) fprintf(stream, "Next_Node    = %d\n", Next_Node);
<   (void) fprintf(stream, "TotalSum    = %d\n", Sched_Node[Current_Root].sum);
< 
<   switch(Current_Root) {
<   case ROOT1:
<     nodes_print(stream, LEAF0, AFTER1);
<     break;
< 
<   case ROOT2:
<     nodes_print(stream, ROOT1, ROOT1 + BRANCH);
<     nodes_print(stream, LEAF0, AFTER2);
<     break;
< 
<   case ROOT3:
<     nodes_print(stream, ROOT2, ROOT2 + BRANCH);
<     nodes_print(stream, ROOT1, ROOT1 + (BRANCH * BRANCH));
<     nodes_print(stream, LEAF0, AFTER3);
<     break;
< 
<   case ROOT4:
<     nodes_print(stream, ROOT3, ROOT3 + BRANCH);
<     nodes_print(stream, ROOT2, ROOT2 + (BRANCH * BRANCH));
<     nodes_print(stream, ROOT1, ROOT1 + (BRANCH * BRANCH * BRANCH));
<     nodes_print(stream, LEAF0, AFTER4);
<     break;
< 
<   default:
<     (void) fprintf(stream, "ERROR: Current_Root (%d) Invalid\n", Current_Root);
<   }
< }
<   
< main(int argc, char *argv[])
< {
<   Thread threads[MAX_SLOTS];
<   Thread *t;
<   int i, j;
<   
<   /* initialize thread structures */
<   for (i = 0; i < MAX_SLOTS; i++)
<     threads[i].count = 0;
< 
<   /* initialize scheduler structures */
<   sched_initialize();
<   
<   for (i = 0; i < MAX_SLOTS; i++)
<     {
<       (void) fprintf(Debug_Stream, "I = %d\n", i);
< 
<       threads[i].priority = random() % 100;
< 
<       sched_insert(&threads[i]);
< #ifdef DEBUG_INSERT
<       (void) fprintf(Debug_Stream, 
< 		     "\nI = %d: INSERT, pri = %d\n", i, threads[i].priority);
<       sched_print(Debug_Stream);
< #endif DEBUG_INSERT
< 
<       sched_remove(&threads[i]);
< #ifdef DEBUG_REMOVE
<       (void) fprintf(Debug_Stream,
< 		     "\nI = %d: REMOVE, pri = %d\n", i, threads[i].priority);
<       sched_print(Debug_Stream);
< #endif DEBUG_REMOVE
< 
<       sched_insert(&threads[i]);
< #ifdef DEBUG_REMOVE
<       (void) fprintf(Debug_Stream,
< 		     "\nI = %d: RE-INS, pri = %d\n", i, threads[i].priority);
<       sched_print(Debug_Stream);
< #endif DEBUG_REMOVE
< 
< #ifdef DEBUG_CHOOSE
<       t = sched_choose();
<       (void) fprintf(Debug_Stream,
< 		     "\nI = %d: CHOOSE, node = %d\n", i, t->sched_node);
< #endif DEBUG_CHOOSE
<     }
< 
< #ifdef DEBUG_CHOOSE
< #define NTRIALS		(MAX_SLOTS * 100)
<   for (i = 0; i < NTRIALS; i++)
<     {
<       t = sched_choose();
<       t->count++;
<     }
< 
<   for (i = 0; i < MAX_SLOTS; i++)
<     {
<       double expectedRatio = ((double) threads[i].priority) 
< 	                       / ((double) Sched_Node[Current_Root].sum);
< 
<       (void) fprintf(Debug_Stream,
< 		   "I = %d: expected = %g, actual = %d\n",
< 		   i, expectedRatio * (double) NTRIALS, threads[i].count);
<     }
< #endif DEBUG_CHOOSE
< }
< #endif DEBUG
===================================================================
RCS file: RCS/rt_thread.aux.c,v
retrieving revision 1.1
diff -r1.1 rt_thread.aux.c
13c13
<  * Last Modified:  $Date: 92/12/11 15:09:40 $ (eab)
---
>  * Last Modified:  $Date: 92/12/11 16:36:47 $ (eab)
49c49
<  * $Header: /a/quimby.dartmouth.edu/usr/wildcat/dfk/research/parallel/proteus/proteus-V3beta/engine/RCS/rt_thread.aux.c,v 1.1 92/12/11 15:09:40 dfk Exp $
---
>  * $Header: /a/quimby.dartmouth.edu/usr/wildcat/dfk/research/parallel/proteus/engine/RCS/rt_thread.aux.c,v 1.2 92/12/11 16:36:47 dfk Exp $
50a51,61
>  * Revision 1.2  92/12/11  16:36:47  dfk
>  * changed the sleeping-thread system to be much more efficient.
>  * If a processor is idle, it will skip ahead to the wakeup time
>  * for the first sleeping thread.
>  * It runs MUCH faster than spinning on the TIMER_PERIOD method.
>  * Questions:
>  *   how does it interact with preemptive scheduling? It should work.
>  *   how does it interact with semaphores, which also use timer requests
>  *   does it work with multiple sleeping threads? it should
>  *   does it work when a new thread is creating on a sleeping processor? should.
>  * 
197a209,210
> 	      make_timer_request_(CURR_PROCESSOR, CURR_TID, 
> 						 CURR_TIME + ReadTimer(CURR_PROCESSOR));
199c212,215
< 	    if (PP->sleep_nonempty)
---
> 	    if (PP->sleep_nonempty) {
> 		   Time wakeup_time = *(PP->sleep_head); /* first thing to awake */
> 		   make_timer_request_(CURR_PROCESSOR, CURR_TID, wakeup_time);
> 	    }
201,202d216
< 	      make_timer_request_(CURR_PROCESSOR, CURR_TID,
< 				  CURR_TIME + ReadTimer(CURR_PROCESSOR));
===================================================================
RCS file: RCS/rt_thread.ca,v
retrieving revision 1.1
diff -r1.1 rt_thread.ca
13c13
<  * Last Modified:  $Date: 92/12/11 15:09:38 $ (eab)
---
>  * Last Modified:  $Date: 92/12/11 16:36:49 $ (eab)
53c53
<  * $Header: /a/quimby.dartmouth.edu/usr/wildcat/dfk/research/parallel/proteus/proteus-V3beta/engine/RCS/rt_thread.ca,v 1.1 92/12/11 15:09:38 dfk Exp $
---
>  * $Header: /a/quimby.dartmouth.edu/usr/wildcat/dfk/research/parallel/proteus/engine/RCS/rt_thread.ca,v 1.2 92/12/11 16:36:49 dfk Exp $
54a55,65
> Revision 1.2  92/12/11  16:36:49  dfk
> changed the sleeping-thread system to be much more efficient.
> If a processor is idle, it will skip ahead to the wakeup time
> for the first sleeping thread.
> It runs MUCH faster than spinning on the TIMER_PERIOD method.
> Questions:
>   how does it interact with preemptive scheduling? It should work.
>   how does it interact with semaphores, which also use timer requests
>   does it work with multiple sleeping threads? it should
>   does it work when a new thread is creating on a sleeping processor? should.
> 
176,180c187
<  * head node index of the sleep queue and key is the duration of sleep.
<  * (number of clock ticks between current time and time stid will be 
<  * woken up). The value written in the qkey field of the sleep queue 
<  * is the difference (in clock ticks) between the wakeup time of stid
<  * and the immediately preceding thread in the sleep queue
---
>  * head node index of the sleep queue and key is the wakeup time.
190c197
<       key -= q[next].qkey;
---
>       ;
196,197d202
<     if (next < MAX_THREADS )
<       q[next].qkey -= key;
216c221
< static void wakeup()
---
> static void wakeup(Time currenttime)
221c226
<     while( nonempty(clockq) && firstkey( clockq ) <= 0 ) {
---
>     while( nonempty(clockq) && firstkey( clockq ) <= currenttime ) {
226c231
<       PP->sleep_head = & q[ q[clockq].qnext ].qkey;
---
>       PP->sleep_head = (Time *) (& q[ q[clockq].qnext ].qkey);
333a339,340
> 	  make_timer_request_(CURR_PROCESSOR, CURR_TID, 
> 					  t + ReadTimer(CURR_PROCESSOR));
335c342,345
< 	if (PP->sleep_nonempty)
---
> 	if (PP->sleep_nonempty) {
> 	    Time wakeup_time = *(PP->sleep_head); /* first thing to awake */
> 	    make_timer_request_(CURR_PROCESSOR, CURR_TID, wakeup_time);
> 	}
337,338d346
< 	  make_timer_request_(CURR_PROCESSOR, CURR_TID,
< 			      t + ReadTimer(CURR_PROCESSOR));
666c674,676
< 	    if (currpptr->p_numthreads > 0)
---
> 	if (currpptr->p_numthreads > 0)
> 	  make_timer_request_(CURR_PROCESSOR, CURR_TID, 
> 					  t + ReadTimer(CURR_PROCESSOR));
668c678,681
< 	    if (PP->sleep_nonempty)
---
> 	if (PP->sleep_nonempty) {
> 	    Time wakeup_time = *(PP->sleep_head); /* first thing to awake */
> 	    make_timer_request_(CURR_PROCESSOR, CURR_TID, wakeup_time);
> 	}
670,671d682
< 	      make_timer_request_(CURR_PROCESSOR, CURR_TID,
< 				  t + ReadTimer(CURR_PROCESSOR));
788c799,801
<     
---
>     Time wakeup_time;
>     ProcBlk *pptr = &proc_table_[CURR_PROCESSOR];
> 
801,802c814,817
<     insertd(osptr->t_stid, PP->clockq, duration);
<     PP->sleep_head     = & q[ q[PP->clockq].qnext ].qkey;
---
>     /* use the wakeup time as the priority queue key */
>     wakeup_time = CURR_TIME + duration * TIMER_PERIOD;
>     insertd(osptr->t_stid, PP->clockq, wakeup_time);
>     PP->sleep_head = (Time *)(& q[ q[PP->clockq].qnext ].qkey);
813,815c828,829
< 	if (!PP->sleep_nonempty)
< 	  make_timer_request_(CURR_PROCESSOR, CURR_TID,
< 			      CURR_TIME + ReadTimer(CURR_PROCESSOR));
---
> 	CancelPendingTimerInterrupt(pptr);
> 	make_timer_request_(CURR_PROCESSOR, CURR_TID, wakeup_time);
824,826c838,839
< 	if (!PP->sleep_nonempty)
< 	  make_timer_request_(CURR_PROCESSOR, CURR_TID,
< 			      CURR_TIME + ReadTimer(CURR_PROCESSOR));
---
> 	CancelPendingTimerInterrupt(pptr);
> 	make_timer_request_(CURR_PROCESSOR, CURR_TID, wakeup_time);
844c857
< GLOBAL void _timer_handler()
---
> GLOBAL void _timer_handler(int processor, int currtid, Time currenttime)
848,849c861,862
<     /* Decrement remaining ticks in head of sleep queue. If they become zero,
<      * wakeup the threads
---
>     /* Check the head of sleep queue. If the current time has 
>      * advanced past its wakeup time, wakeup some threads.
852,856c865,866
< 	h = *PP->sleep_head;
< 	h--;
< 	*PP->sleep_head = h;
< 	if (h == 0 )
< 	  wakeup();
---
> 	if (*(PP->sleep_head) <= currenttime)
> 	  wakeup(currenttime);
===================================================================
RCS file: RCS/rt_thread_def.h,v
retrieving revision 1.1
diff -r1.1 rt_thread_def.h
12c12
<  * Last Modified:  $Date: 92/12/08 14:33:18 $ ($Author)
---
>  * Last Modified:  $Date: 92/12/11 16:34:16 $ ($Author)
47c47
<  * $Header: /a/quimby.dartmouth.edu/usr/wildcat/dfk/research/parallel/proteus/proteus-V3beta/engine/RCS/rt_thread_def.h,v 1.1 92/12/08 14:33:18 dfk Exp $
---
>  * $Header: /a/quimby.dartmouth.edu/usr/wildcat/dfk/research/parallel/proteus/engine/RCS/rt_thread_def.h,v 1.3 92/12/11 16:34:16 dfk Exp $
48a49,62
>  * Revision 1.3  92/12/11  16:34:16  dfk
>  * changed the sleeping-thread system to be much more efficient.
>  * If a processor is idle, it will skip ahead to the wakeup time
>  * for the first sleeping thread.
>  * It runs MUCH faster than spinning on the TIMER_PERIOD method.
>  * Questions:
>  *   how does it interact with preemptive scheduling? It should work.
>  *   how does it interact with semaphores, which also use timer requests
>  *   does it work with multiple sleeping threads? it should
>  *   does it work when a new thread is creating on a sleeping processor? should.
>  * 
>  * Revision 1.2  92/12/11  15:34:00  dfk
>  * added T_STATE and NO_TID
>  * 
121a136,137
> #define T_STATE(t)	    		(OSBLOCK(t)->t_state) /* DFK */
> #define NO_TID	 	    		(-1)	/* DFK; an invalid tid number */
138c154
<   int *sleep_head;                         /* ptr to head of sleep queue  */
---
>   Time *sleep_head;                        /* to first time on sleep queue  */
===================================================================
RCS file: RCS/sim.h,v
retrieving revision 1.1
diff -r1.1 sim.h
46c46
<  * $Header: /a/quimby.dartmouth.edu/usr/wildcat/dfk/research/parallel/proteus/proteus-V3beta/engine/RCS/sim.h,v 1.1 92/12/08 14:30:55 dfk Exp Locker: dfk $
---
>  * $Header: /a/quimby.dartmouth.edu/usr/wildcat/dfk/research/parallel/proteus/engine/RCS/sim.h,v 1.1 92/12/08 14:30:55 dfk Exp Locker: dfk $
85,86c85,90
< #define FALSE              0
< #define TRUE               1
---
> #ifndef FALSE
> # define FALSE              0
> #endif
> #ifndef TRUE
> # define TRUE               1
> #endif
===================================================================
RCS file: RCS/sim.param,v
retrieving revision 1.1
diff -r1.1 sim.param
===================================================================
RCS file: RCS/user.h,v
retrieving revision 1.1
diff -r1.1 user.h
50c50
<  * $Header: /a/quimby.dartmouth.edu/usr/wildcat/dfk/research/parallel/proteus/proteus-V3beta/engine/RCS/user.h,v 1.1 92/12/09 17:35:45 dfk Exp Locker: dfk $
---
>  * $Header: /a/quimby.dartmouth.edu/usr/wildcat/dfk/research/parallel/proteus/engine/RCS/user.h,v 1.1 92/12/09 17:35:45 dfk Exp Locker: dfk $
117,124c117,136
< #define USER_EVENT(kind, value) user_event_(kind, value)
< #define USER_TIME_EVENT(kind, value, time) \
<     user_time_event_(kind, value, time)
< #define INDEX_EVENT(kind, index, value) \
<     user_index_event_(kind, index, value)
< #define INDEX_TIME_EVENT(kind, index, value, time) \
<     user_index_time_event_(kind, index, value, time)
< 
---
> #define USER_EVENT(kind, value) { \
>   CYCLE_COUNTING_OFF; \
>   user_event_(kind, value); \
>   CYCLE_COUNTING_ON; \
> }
> #define USER_TIME_EVENT(kind, value, time) { \
>   CYCLE_COUNTING_OFF; \
>   user_time_event_(kind, value, time); \
>   CYCLE_COUNTING_ON; \
> }
> #define INDEX_EVENT(kind, index, value) { \
>   CYCLE_COUNTING_OFF; \
>   user_index_event_(kind, index, value); \
>   CYCLE_COUNTING_ON; \
> }
> #define INDEX_TIME_EVENT(kind, index, value, time) { \
>   CYCLE_COUNTING_OFF; \
>   user_index_time_event_(kind, index, value, time); \
>   CYCLE_COUNTING_ON; \
> }
