# # WARNING WARNING WARNING WARNING # # This diff file includes modifications to FreeBSD 2.1R # to implement DUMMYNET. It has been hand-edited from a # larger diff file, so line numbers might not match and # diff might complain. # The code should be very portable to other BSD-derived systems # as well. # # In order to compile it, you sohould use # # option DUMMYNET # # in your kernel config file. Also, to get better resolution, # you should use # # option "HZ=1000" # # # If you can, always try to get the latest version from # # http://www.iet.unipi.it/~luigi/research.html # diff -cbwr netinet.orig/in_proto.c netinet.new/in_proto.c *** netinet.orig/in_proto.c Tue Sep 5 21:31:34 1995 --- netinet.new/in_proto.c Tue Sep 3 00:40:57 1996 *************** *** 97,102 **** --- 97,105 ---- udp_usrreq, udp_init, 0, 0, 0, udp_sysctl }, + #ifdef DUMMYNET + #define tcp_input tcp_ip_input + #endif { SOCK_STREAM, &inetdomain, IPPROTO_TCP, PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD, tcp_input, 0, tcp_ctlinput, tcp_ctloutput, diff -cbwr netinet.orig/tcp_input.c netinet.new/tcp_input.c *** netinet.orig/tcp_input.c Wed Aug 23 16:52:06 1995 --- netinet.new/tcp_input.c Tue Sep 10 19:02:28 1996 *************** *** 2050,2056 **** --- 2396,2410 ---- /* * Don't force slow-start on local network. */ + #if DUMMYNET + /* except when using dummynet... I am wondering how does + * it work with PPP. Also, is there a chance that we get + * a delack ? + */ + if (tcp_dummynet || !in_localaddr(inp->inp_faddr)) + #else if (!in_localaddr(inp->inp_faddr)) + #endif tp->snd_cwnd = mss; if (rt->rt_rmx.rmx_ssthresh) { diff -cbwr netinet.orig/tcp_output.c netinet.new/tcp_output.c *** netinet.orig/tcp_output.c Thu Sep 14 19:58:07 1995 --- netinet.new/tcp_output.c Wed Sep 4 00:46:36 1996 *************** *** 66,71 **** --- 66,118 ---- extern struct mbuf *m_copypack(); #endif + #if DUMMYNET + #define ip_output(a,b,c,d,e) tcp_ip_output(a,b,c,d,e) + #endif /* * Tcp output routine: figure out what should be sent and send it. diff -cbwr netinet.orig/tcp_subr.c netinet.new/tcp_subr.c *** netinet.orig/tcp_subr.c Sat Jul 22 16:04:31 1995 --- netinet.new/tcp_subr.c Wed Sep 4 16:39:52 1996 *************** *** 65,74 **** --- 65,89 ---- #include #endif + #ifdef DUMMYNET + #define ip_output(a,b,c,d,e) tcp_ip_output(a,b,c,d,e) + #endif + /* patchable/settable parameters for tcp */ int tcp_mssdflt = TCP_MSS; int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ; diff -cbwr netinet.orig/tcp_timer.c netinet.new/tcp_timer.c *** netinet.orig/tcp_timer.c Sat Jul 29 10:16:52 1995 --- netinet.new/tcp_timer.c Tue Sep 10 20:03:47 1996 *************** *** 336,338 **** --- 373,717 ---- return (tp); } #endif /* TUBA_INCLUDE */ + + #if DUMMYNET + /* + * Code for a network simulator used for testing purposes. Calls to + * ip_output are replaced with tcp_ip_output which queues datagrams + * in a queue of length k. Every timer tick (T sec.), tcp_testbed removes + * at most B*T bytes of data from the queue and links them into a second, + * unbounded queue, where they remain for a t_p/T ticks. From + * there, segments are finally delivered by calling ip_output. + * Same thing occurs in the reverse direction. Data structures for + * both queues are defined in struct my_pipe. + * + * This is only done on tcp traffic, other datagrams + * remain unchanged. TCP communication thus occurs as follows: + * + * -->--[N buffers]-->--[bw = B, delay = t_p]-->-- + * local extern + * --<--[bw = B, delay = t_p]--<--[k buffers]--<-- + */ + + int tcp_dummynet=0; /* sysctl variable */ + + struct ip_out_par { + struct ip_out_par * next; + #if DUMMYNET_DEBUG + struct timeval beg, mid; /* testing only */ + int act_delay; /* testing only */ + int in_delay; /* testing only */ + #endif + + int delay; /* stays queued until delay=0 */ + struct mbuf *m0; /* parameters */ + struct mbuf *opt; + struct route *ro; + int flags; /* or iphlen for tcp_input */ + struct ip_moptions *imo; + }; + + struct my_pipe { + struct ip_out_par *r_head; + struct ip_out_par *r_tail; + int rq_len; /* elements in r_queue */ + struct ip_out_par *p_head; + struct ip_out_par *p_tail; + int ticks_from_last_insert; + long numbytes; /* which can send or receive */ + }; + + static struct my_pipe pipe_in = { NULL, NULL, 0, NULL, NULL, 5, 0 }; + static struct my_pipe pipe_out = { NULL, NULL, 0, NULL, NULL, 5, 0 }; + + /* the following are set with sysctl net.inet.tcp.dummynet. + * In decimal, bbbbssddd (d=delay*10ms, s=buffers, b=BW KB/s + */ + int tcp_rq_lim = 0; /* queue size, 0 means unlimited */ + int tcp_rq_bw = 0; /* bandwidth, 0 means unlimited */ + int tcp_pq_delay = 0; /* delay in 10ms units */ + + extern int hz; /* hz and tick are in param.c */ + extern int tick; + + static int testbed_idle = 1; + + void tcp_testbed(); + void + tcp_testbed_dequeue(struct ip_out_par **head, int type, int immediate); + + static void + testbed_restart() + { + /* schedules a timeout if necessary */ + if (!testbed_idle) + return; + if (pipe_in.p_head || pipe_in.r_head || + pipe_out.p_head || pipe_out.r_head + || pipe_in.numbytes < 0 || pipe_out.numbytes < 0) { + testbed_idle = 0; + timeout(tcp_testbed, (caddr_t)NULL, 1); + } + } + + void flush_pipe(struct my_pipe *p) + { + struct ip_out_par *q; + if (p->r_head) { + if (p->p_head) p->p_tail = p->r_head; + else p->p_head = p->r_head; + p->p_tail = p->r_tail; + p->r_head = NULL; + } + for (q = p->p_head; q ; q = q->next) + q->delay = 0; + p->rq_len = 0; + p->numbytes = 0; + p->ticks_from_last_insert = tcp_pq_delay; + } + + /* + * this is called whenever tcp_dummynet is modified via sysctl. + * It is important to make sure that the queues are flushed. + * The simplest way is to move everything from rq to pq, + * clear all times, and then start the testbed. + */ + void tcp_testbed_update() + { + int a, x; + tcp_pq_delay = tcp_dummynet % 1000; /* 10s max delay! */ + x = tcp_dummynet / 1000; + tcp_rq_lim = x % 100; /* max 100 buf */ + x = x / 100; + tcp_rq_bw = x; /* bandwidth KB/s */ + printf("%4d ms, %d KB/s, %d buffers\n", + tcp_pq_delay*1000/hz, tcp_rq_bw, tcp_rq_lim); + x = tcp_pq_delay * tcp_rq_bw/hz ; + if (x > 500) + printf("Warning: tcp_testbed: each pipe uses %d KB\n",x); + flush_pipe(&pipe_out); + flush_pipe(&pipe_in); + tcp_testbed_dequeue(&pipe_out.p_head, 0, 1); + tcp_testbed_dequeue(&pipe_in.p_head, 1, 1); + } + + #ifdef DUMMYNET_DEBUG + static void + pr_info(struct ip_out_par *p, char c) + { + static u_long cycle=0, err=0; + long dr, dp; + int tol = (tick * 95)/100; + struct timeval ora; + + cycle++; + microtime( &ora ); + dp = (ora.tv_sec - p->mid.tv_sec)*1000000 + + ora.tv_usec - p->mid.tv_usec; + dr = (p->mid.tv_sec - p->beg.tv_sec)*1000000 + + p->mid.tv_usec - p->beg.tv_usec; + + if (p->act_delay != tcp_pq_delay || + dp < tcp_pq_delay*tick - tol || + dp > tcp_pq_delay*tick + tol) + PR("-%c R%8d P%8d %d (%d) in %d (cycle %d err %d)\n", + c, + dr, dp, p->act_delay, tcp_pq_delay, p->in_delay, + cycle, ++err); + } + #endif + + void + tcp_testbed_dequeue(struct ip_out_par **head, int type, int immediate) + { + struct ip_out_par *p; + if (*head==NULL) + return; + if (!immediate) + (*head)->delay--; + while (*head && (*head)->delay < 1) { + /* first unlink, then call procedures since tcp_input + * can in turn call tcp_output causing nested calls + */ + p = *head; + *head = (*head)->next; + #ifdef DUMMYNET_DEBUG + pr_info(p, type ? 'i':'o'); + #endif + if (type ==0) + (void)ip_output(p->m0, p->opt, p->ro, p->flags, p->imo); + else + tcp_input(p->m0, p->flags); + free((void *)p, M_IPMADDR); + } + } + + void + tcp_testbed_move(struct my_pipe *pipe, int immediate) + { + struct ip_out_par *tmp, *p; + + if ( pipe->p_head == NULL && + pipe->ticks_from_last_insert != tcp_pq_delay) { + printf("Warning, empty pipe and delay %d %d\n", + pipe->ticks_from_last_insert, tcp_pq_delay); + pipe->ticks_from_last_insert = tcp_pq_delay; + } + /* this ought to go in tcp_testbed_dequeue */ + if (!immediate && pipe->ticks_from_last_insert < tcp_pq_delay) + pipe->ticks_from_last_insert++; + if ( p = pipe->r_head ) { + /* + * Remove at most numbytes bytes from src and move to dst. + * delay is set to ticks_from_last_insert, which + * is reset after the first insertion; + */ + while ( p ) { + struct ip *ip=mtod(p->m0, struct ip *); /* cast */ + if (tcp_rq_bw) { + if (pipe->numbytes < 0) + break; + pipe->numbytes -= ip->ip_len; + } + pipe->rq_len--; /* elements in queue */ + #if DUMMYNET_DEBUG + microtime( &p->mid ); + p->in_delay = pipe->ticks_from_last_insert; + #endif + p->delay = pipe->ticks_from_last_insert; + pipe->ticks_from_last_insert = 0; + /* compensate the -- done next in tcp_testbed_dequeue */ + if (!immediate && p->delay >0 && pipe->p_head==NULL) + p->delay++; + if (pipe->p_head == NULL) pipe->p_head = p; + else pipe->p_tail->next = p; + pipe->p_tail = p; + p = p->next; + pipe->p_tail->next = NULL; + } + pipe->r_head = p; + + /*** XXX just a sanity check */ + if ( (p==NULL && pipe->rq_len != 0) || + (p!=NULL && pipe->rq_len == 0) ) + printf("--Warning, pipe head %x len %d\n", + p, pipe->rq_len); + } + } + + void + tcp_testbed_out(int im) + { + tcp_testbed_move(&pipe_out, im); /*** output queue ***/ + tcp_testbed_dequeue(&pipe_out.p_head, 0, im); + } + + void + tcp_testbed_in(int im) + { + tcp_testbed_move(&pipe_in, im); /*** input queue ***/ + tcp_testbed_dequeue(&pipe_in.p_head, 1, im); + } + + void + tcp_testbed() + { + int s = splnet(); + #if DUMMYNET_DEBUG + struct ip_out_par *p; + for (p=pipe_in.p_head; p ; p=p->next) p->act_delay++; + for (p=pipe_out.p_head; p ; p=p->next) p->act_delay++; + #endif + pipe_in.numbytes += (tcp_rq_bw * 1024 / hz); + pipe_out.numbytes += (tcp_rq_bw * 1024 / hz); + tcp_testbed_out(0); + tcp_testbed_in(0); + + /* + * finally, if some queue has data, restart the timer. + */ + testbed_idle = 1; + testbed_restart(); + splx(s); + } + + void + tcp_ip_input(struct mbuf *m0, int iphlen) + { + struct ip_out_par *p; + + if ( tcp_dummynet == 0) { + tcp_input(m0, iphlen); + return; + } + if ( (tcp_rq_lim && pipe_in.rq_len >= tcp_rq_lim) || + (p= (struct ip_out_par *)malloc(sizeof (*p), + M_IPMADDR, M_NOWAIT) ) == NULL ) { + printf("-- tcp_ip_in drop, have %d packets\n", + pipe_in.rq_len); + m_freem(m0); + return; /* XXX error */ + } + /* build and enqueue packet */ + p->next = NULL; + p->delay = 0; + #if DUMMYNET_DEBUG + p->act_delay = 0; + microtime( &p->beg ); + #endif + p->m0 = m0; + p->flags = iphlen; + if (pipe_in.r_head == NULL) pipe_in.r_head = p; + else pipe_in.r_tail->next = p; + pipe_in.r_tail = p; + pipe_in.rq_len++; + + if (pipe_in.r_head == p) /* process immediately */ + tcp_testbed_in(1); + + testbed_restart(); + } + + int + tcp_ip_output(m0, opt, ro, flags, imo) + struct mbuf *m0; + struct mbuf *opt; + struct route *ro; + int flags; + struct ip_moptions *imo; + { + struct ip_out_par *p; + + if ( tcp_dummynet == 0) + return ip_output(m0, opt, ro, flags, imo); + + if ( (tcp_rq_lim && pipe_out.rq_len >= tcp_rq_lim) || + (p= (struct ip_out_par *)malloc(sizeof (*p), + M_IPMADDR, M_NOWAIT) ) == NULL ) { + printf("-- tcp_ip_out drop, have %d packets\n", pipe_out.rq_len); + m_freem(m0); + return 0; /* not ENOBUFS */ + } + /* build and enqueue packet */ + p->next = NULL; + p->delay = 0; + #if TESTBED_DEBUG + p->act_delay = 0; + microtime( &p->beg ); + #endif + p->m0 = m0; + p->opt = opt; + p->ro = ro; + p->flags = flags; + p->imo = imo; + if (pipe_out.r_head == NULL) pipe_out.r_head = p; + else pipe_out.r_tail->next = p; + pipe_out.r_tail = p; + pipe_out.rq_len++; + + if (pipe_out.r_head == p) /* process immediately */ + tcp_testbed_out(1); + testbed_restart(); + return 0; + } + #endif diff -cbwr netinet.orig/tcp_usrreq.c netinet.new/tcp_usrreq.c *** netinet.orig/tcp_usrreq.c Thu Nov 2 16:53:59 1995 --- netinet.new/tcp_usrreq.c Wed Sep 4 01:35:52 1996 *************** *** 726,731 **** --- 726,745 ---- case TCPCTL_RECVSPACE: return (sysctl_int(oldp, oldlenp, newp, newlen, (int *)&tcp_recvspace)); /* XXX */ + #if DUMMYNET + case TCPCTL_DUMMYNET: + { int retval; + retval= sysctl_int(oldp, oldlenp, newp, newlen, &tcp_dummynet); + + tcp_testbed_update(); + return retval; + } + #endif default: return (ENOPROTOOPT); } diff -cbwr netinet.orig/tcp_var.h netinet.new/tcp_var.h *** netinet.orig/tcp_var.h Sat Jul 29 10:16:53 1995 --- netinet.new/tcp_var.h Tue Sep 3 13:13:42 1996 *************** *** 300,306 **** #define TCPCTL_KEEPINTVL 7 /* interval to send keepalives */ #define TCPCTL_SENDSPACE 8 /* send buffer space */ #define TCPCTL_RECVSPACE 9 /* receive buffer space */ ! #define TCPCTL_MAXID 10 #define TCPCTL_NAMES { \ { 0, 0 }, \ --- 414,422 ---- #define TCPCTL_KEEPINTVL 7 /* interval to send keepalives */ #define TCPCTL_SENDSPACE 8 /* send buffer space */ #define TCPCTL_RECVSPACE 9 /* receive buffer space */ ! #define TCPCTL_TCPSACK 10 /* selective acknowledgements */ ! #define TCPCTL_DUMMYNET 11 /* dummynet */ ! #define TCPCTL_MAXID 12 #define TCPCTL_NAMES { \ { 0, 0 }, \ *************** *** 313,318 **** --- 429,436 ---- { "keepintvl", CTLTYPE_INT }, \ { "sendspace", CTLTYPE_INT }, \ { "recvspace", CTLTYPE_INT }, \ + { "sack", CTLTYPE_INT }, \ + { "dummynet", CTLTYPE_INT }, \ } #ifdef KERNEL *************** *** 372,377 **** --- 491,508 ---- int tcp_usrreq __P((struct socket *, int, struct mbuf *, struct mbuf *, struct mbuf *)); void tcp_xmit_timer __P((struct tcpcb *, int)); + #if DUMMYNET + void tcp_ip_input __P((struct mbuf *, int)); + extern int tcp_dummynet; + #endif extern u_long tcp_sendspace; extern u_long tcp_recvspace;