ipfw_mod/000755 000423 000000 00000000000 11311416635 013121 5ustar00luigiwheel000000 000000 ipfw_mod/dummynet/000755 000423 000000 00000000000 11311406517 014761 5ustar00luigiwheel000000 000000 ipfw_mod/ipfw/000755 000423 000000 00000000000 11311261630 014057 5ustar00luigiwheel000000 000000 ipfw_mod/Makefile000644 000423 000000 00000001434 11311375467 014572 0ustar00luigiwheel000000 000000 # $Id: Makefile 4486 2009-12-14 09:10:01Z luigi $ # # Top level makefile for building ipfw kernel and userspace. # You can run it manually or also under the Planetlab build. # Planetlab wants also the 'install' target. # # To build on system with non standard Kernel sources or userland files, # you should run this with # # make KERNELPATH=/path/to/linux-2.x.y.z USRDIR=/path/to/usr # # We assume that $(USRDIR) contains include/ and lib/ used to build userland. DATE ?= $(shell date +%Y%m%d) SNAPSHOT_NAME=ipfw_mod-$(DATE) _all: all all clean distclean: echo target is $(@) (cd ipfw && $(MAKE) $(@) ) (cd dummynet && $(MAKE) $(@) ) snapshot: (cd ..; tar cvzhf /tmp/$(SNAPSHOT_NAME).tgz --exclude .svn \ --exclude README.openwrt --exclude tags --exclude NOTES \ ipfw_mod ) install: ipfw_mod/glue.h000644 000423 000000 00000022714 11310234355 014230 0ustar00luigiwheel000000 000000 /* * Copyright (c) 2009 Luigi Rizzo, Marta Carbone, Universita` di Pisa * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * $Id: glue.h 4436 2009-12-10 18:31:49Z luigi $ * * glue code to adapt the FreeBSD version to linux and windows, * userland and kernel. * This is included before any other headers, so we do not have * a chance to override any #define that should appear in other * headers. */ #ifndef _GLUE_H #define _GLUE_H /* * common definitions to allow portability */ #ifndef __FBSDID #define __FBSDID(x) #endif /* FBSDID */ /* * emulation of FreeBSD's sockopt and thread * This was in sockopt.h */ enum sopt_dir { SOPT_GET, SOPT_SET }; #ifndef KERNEL_MODULE /* Userland part */ #include /* linux needs this in addition to sys/types.h */ #include /* for size_t */ #include #include #include #include #else /* KERNEL_MODULE, kernel part */ #ifndef _WIN32 #include #define ifnet net_device /* remap */ #define _KERNEL # make kernel structure visible #define KLD_MODULE # add the module glue #define INET # want inet support #include /* linux kernel */ #include /* linux kernel */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) // or 2.4.x #include /* linux/msg.h require this */ #include /* just MAX_ADDR_LEN 8 on 2.4 32 on 2.6, also brings in byteorder */ #endif #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0) && \ LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) // under 2.6.22 compilation is required by msg.h #include #endif #include /* XXX m_type define conflict with include/sys/mbuf.h, * so early include this file (to be solved) */ #include #include /* struct in_addr */ #include /* struct in6_addr */ #include /* * LIST_HEAD in queue.h conflict with linux/list.h * some previous linux include need list.h definition */ #undef LIST_HEAD #define IF_NAMESIZE 16 typedef uint32_t in_addr_t; #define printf(fmt, arg...) printk(KERN_ERR fmt, ##arg) #endif /* !_WIN32 */ #endif /* KERNEL_MODULE */ /* * In windows, we need to emulate the sockopt interface * so also the userland needs to have the struct sockopt defined. * No need to declare struct thread on linux, but we need on windows. */ struct thread { void *sopt_td; void *td_ucred; }; struct sockopt { enum sopt_dir sopt_dir; /* is this a get or a set? */ int sopt_level; /* second arg of [gs]etsockopt */ int sopt_name; /* third arg of [gs]etsockopt */ void *sopt_val; /* fourth arg of [gs]etsockopt */ size_t sopt_valsize; /* (almost) fifth arg of [gs]etsockopt */ struct thread *sopt_td; /* calling thread or null if kernel */ }; /* This must be included here after list.h */ #include /* both the kernel side and nat.c needs this */ #ifndef KERNEL_MODULE /* define internals for struct in6_addr netinet/in6.h on FreeBSD */ #define __u6_addr in6_u #define __u6_addr32 u6_addr32 /* define missing type for ipv6 (linux 2.6.28) */ #define in6_u __in6_u /* missing in linux netinet/ip.h */ #define IPTOS_ECN_ECT0 0x02 /* ECN-capable transport (0) */ #define IPTOS_ECN_CE 0x03 /* congestion experienced */ /* defined in freebsd netinet/icmp6.h */ #define ICMP6_MAXTYPE 201 /* on freebsd sys/socket.h pf specific */ #define NET_RT_IFLIST 3 /* survey interface list */ /* on freebsd net/if.h XXX used */ struct if_data { /* ... */ u_long ifi_mtu; /* maximum transmission unit */ }; /* * Message format for use in obtaining information about interfaces * from getkerninfo and the routing socket. * This is used in nat.c */ struct if_msghdr { u_short ifm_msglen; /* to skip over non-understood messages */ u_char ifm_version; /* future binary compatibility */ u_char ifm_type; /* message type */ int ifm_addrs; /* like rtm_addrs */ int ifm_flags; /* value of if_flags */ u_short ifm_index; /* index for associated ifp */ struct if_data ifm_data;/* statistics and other data about if */ }; /* * Message format for use in obtaining information about interface addresses * from getkerninfo and the routing socket */ struct ifa_msghdr { u_short ifam_msglen; /* to skip over non-understood messages */ u_char ifam_version; /* future binary compatibility */ u_char ifam_type; /* message type */ int ifam_addrs; /* like rtm_addrs */ int ifam_flags; /* value of ifa_flags */ u_short ifam_index; /* index for associated ifp */ int ifam_metric; /* value of ifa_metric */ }; #ifndef NO_RTM /* conflicting with netlink */ /* missing in net/route.h */ #define RTM_VERSION 5 /* Up the ante and ignore older versions */ #define RTM_IFINFO 0xe /* iface going up/down etc. */ #define RTM_NEWADDR 0xc /* address being added to iface */ #define RTA_IFA 0x20 /* interface addr sockaddr present */ #endif /* NO_RTM */ /* SA_SIZE is used in the userland nat.c modified */ #define SA_SIZE(sa) \ ( (!(sa) ) ? \ sizeof(long) : \ 1 + ( (sizeof(struct sockaddr) - 1) | (sizeof(long) - 1) ) ) /* sys/time.h */ /* * Getkerninfo clock information structure */ struct clockinfo { int hz; /* clock frequency */ int tick; /* micro-seconds per hz tick */ int spare; int stathz; /* statistics clock frequency */ int profhz; /* profiling clock frequency */ }; /* * linux does not have a reentrant version of qsort, * so we the FreeBSD stdlib version. */ void qsort_r(void *a, size_t n, size_t es, void *thunk, int cmp_t(void *, const void *, const void *)); /* prototypes from libutil */ /* humanize_number(3) */ #define HN_DECIMAL 0x01 #define HN_NOSPACE 0x02 #define HN_B 0x04 #define HN_DIVISOR_1000 0x08 #define HN_GETSCALE 0x10 #define HN_AUTOSCALE 0x20 int humanize_number(char *_buf, size_t _len, int64_t _number, const char *_suffix, int _scale, int _flags); int expand_number(const char *_buf, int64_t *_num); #define setprogname(x) /* not present in linux */ extern int optreset; /* not present in linux */ size_t strlcpy(char * dst, const char * src, size_t siz); long long int strtonum(const char *nptr, long long minval, long long maxval, const char **errstr); int sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen); #else /* KERNEL_MODULE */ /* linux and windows kernel do not have bcopy ? */ #define bcopy(_s, _d, _l) memcpy(_d, _s, _l) #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) // or 2.4.x #include #endif /* definitions useful for the kernel side */ struct route_in6 { }; #endif /* KERNEL_MODULE */ /* missing in netinet/in.h */ #define INET_ADDRSTRLEN 16 #ifdef linux /* linux does not have sin_len in sockaddr */ #define sin_len sin_zero[0] #endif /* linux */ /* * List of values used for set/getsockopt options. * The base value on FreeBSD is defined as a macro, * if not available we will use our own enum. * The TABLE_BASE value is used in the kernel. */ #ifndef IP_FW_TABLE_ADD #define _IPFW_SOCKOPT_BASE 100 /* 40 on freebsd */ enum ipfw_msg_type { IP_FW_TABLE_ADD = _IPFW_SOCKOPT_BASE, IP_FW_TABLE_DEL, IP_FW_TABLE_FLUSH, IP_FW_TABLE_GETSIZE, IP_FW_TABLE_LIST, IP_FW_DYN_GET, /* new addition */ /* IP_FW3 and IP_DUMMYNET3 are the new API */ IP_FW3 = _IPFW_SOCKOPT_BASE + 8, IP_DUMMYNET3, IP_FW_ADD = _IPFW_SOCKOPT_BASE + 10, IP_FW_DEL, IP_FW_FLUSH, IP_FW_ZERO, IP_FW_GET, IP_FW_RESETLOG, IP_FW_NAT_CFG, IP_FW_NAT_DEL, IP_FW_NAT_GET_CONFIG, IP_FW_NAT_GET_LOG, IP_DUMMYNET_CONFIGURE, IP_DUMMYNET_DEL , IP_DUMMYNET_FLUSH, /* 63 is missing */ IP_DUMMYNET_GET = _IPFW_SOCKOPT_BASE + 24, _IPFW_SOCKOPT_END }; #endif /* IP_FW_TABLE_ADD */ #endif /* !_GLUE_H */ ipfw_mod/README000644 000423 000000 00000017724 11311416630 014007 0ustar00luigiwheel000000 000000 # # $Id: README 4495 2009-12-14 11:36:41Z luigi $ # This directory contains a port of ipfw and dummynet to Linux and OpenWrt (including PlanetLab). A Windows version is in the works but not ready yet. Building the code produces: a kernel module, ipfw_mod.ko a userland program, /sbin/ipfw The source code here comes straight from FreeBSD (roughly the version in RELENG_7 and HEAD as of December 2009), plus some glue code and headers written from scratch. Unless specified otherwise, all the code here is under a BSD license. =================== BUILD INSTRUCTIONS ========================== ***** Linux 2.6.x ****** make KERNELPATH=/path/to/linux USRDIR=/path/to/usr where the two variables are optional an point to the linux kernel sources and the /usr directory. Defaults are USRDIR=/usr and KERNELPATH=/lib/modules/`uname -r`/build --- XXX check ? NOTE: make sure CONFIG_NETFILTER is enabled in the kernel configuration file. You can enable it by doing "(cd ${KERNELPATH}; make menuconfig)" and enabling the option listed below: Networking ---> Networking options ---> [*] Network packet filtering framework (Netfilter) ***** Linux 2.4.x ***** Almost as above, with an additional VER=2.4 make VER=2.4 KERNELPATH=... For 2.4, if KERNELPATH is not specified then we use KERNELPATH ?= /usr/src/`uname -r`/build You need to follow the same instruction for the 2.6 kernel, enabling netfilter in the kernel options: Networking options ---> [*] Network packet filtering (replaces ipchains) ***** Openwrt package ***** (Tested with kamikaze_8.09.1 and Linux 2.4) + Download and extract the OpenWrt package, e.g. wget http://downloads.openwrt.org/kamikaze/8.09.1/kamikaze_8.09.1_source.tar.bz2 tar xvjf kamikaze_8.09.1_source.tar.bz2 + "cd" to the directory with the OpenWrt sources (the one that contains Config.in, rules.mk ...) cd kamikaze_8.09.1 + Optional: to be sure that the tools are working, make a first compilation as follows: - run "make menuconfig" and set the correct target device, drivers, and so on; - run "make" to do the build + Add ipfw2 to the openwrt package, as follows: - copy the code from this directory to the place used for the build: cp -Rp /path_to_ipfw_mod ../ipfw_mod; If you want, you can fetch a newer version from the web (cd ..; rm -rf ipfw_mod; wget http://info.iet.unipi.it/~luigi/dummynet/ipfw_mod-latest.tgz;\ tar xvzf ipfw_mod-latest.tgz) - run the following commands: (mkdir package/ipfw2; cp ../ipfw_mod/Makefile.openwrt package/ipfw2/Makefile) to create the package/ipfw2 directory in the OpenWrt source directory, and copy Makefile.openwrt to package/ipfw2/Makefile: - if necessary, edit package/ipfw2/Makefile and set IPFW_DIR to point to the directory ipfw_mod, which contains the ipfw sources - run "make menuconfig" and select ipfw2 as a module in Kernel Modules -> Other modules -> kmod-ipfw2 - run "make" to build the package, "make V=99" for verbose build. - to modify the code, assuming you are in directory "kamikaze_8.09.1" (cd ../ipfw_mod && vi ...the files you are interested in ) rm -rf build_dir/linux-brcm-2.4/kmod-ipfw2 make package/ipfw2/compile V=99 The resulting package is located in bin/packages/mipsel/kmod-ipfw2*, upload the file and install on the target system, as follows: opkg install kmod-ipfw2_2.4.35.4-brcm-2.4-1_mipsel.ipk #install ls -l ls -l /lib/modules/2.4.35.4/ipfw* # check insmod /lib/modules/2.4.35.4/ipfw_mod.o # load the module /lib/modules/2.4.35.4/ipfw show # launch the userspace tool rmmod ipfw_mod.o # remove the module ***** PLANETLAB BUILD (within a slice) ***** Follow the instructions below. You can just cut&paste # install the various tools if not available sudo yum -y install subversion rpm-build rpm-devel m4 redhat-rpm-config make gcc # create and move to a work directory mkdir -p test # extract a planetlab distribution to directory XYZ (cd test; svn co http://svn.planet-lab.org/svn/build/trunk XYZ) # copy the planetlab/*mk files here, overriding existing ones cp planetlab/*mk test/XYZ # download the specfiles and do some patching. # Results are into SPEC/ (takes 5 minutes) (cd test/XYZ; make stage1=true PLDISTRO=planetlab ) # Building the slice code is fast, the root code takes longer # as it needs to rebuild the whole kernel (cd test/XYZ; sudo make ipfwslice ipfwroot) The kernel dependency phase is a bit time consuming, but does not need to be redone if we are changing the ipfw sources only. To clean up the code do (cd test/XYZ; sudo make ipfwroot-clean ipfwslice-clean) then after you have updated the repository again (cd test/XYZ; sudo make ipfwslice ipfwroot) --- other, instructions (to be verified) --- To build a kernel module for the PlanetLab distribution you need a build system. For an up-to-date and detailed information on how to build a local myplc installation, a local mirror, a PlanetLab test system see[1] To create a build system you need to do the following steps: 1. install CentOS 5, detailed information[2] 1.A download the image from the main site[3] for example: wget http://mi.mirror.garr.it/mirrors/CentOS/5.4/isos/i386/CentOS-5.4-i386-netinstall.iso 1.B Add the repository cat >> /etc/yum.repos.d/dhozac-vserver.repo < mybuild.log& 3. create the build 3.A Enter on the vserver, and create the build vserver mybuild enter cd \ svn co http://svn.planet-lab.org/svn/build/trunk build 4. build 4.A build[4] cd /build # full cleanup make distclean # the compilation is composed by several steps, # make help for more information # the first for the onelab compilation will download # the SPEC file from the repository specified in # onelab-tags.mk make stage1=true PLDISTRO=onelab # to download and build a module, for example ipfw: make ipfw # to do local changes cd /build/CODEBASE rm -rf ipfw # download the ipfw sources and extract it into ./ipfw # by svn svn+ssh://onelab2.iet.unipi.it/home/svn/ports-luigi/dummynet-branches/ipfw_mod ./ipfw # from web wget http://info.iet.unipi.it/~luigi/dummynet/ipfw_mod-latest.tgz tar xvzf ipfw_mod-latest.tgz # start the compilation rm -rf SOURCES/ipfw* rm -rf BUILD/ipfw-0.1/ rm -rf SRPMS/ipfw* rm -rf RPMS/i386/ipfw* make ipfw 5. download and install sources into a node 5.A Copy RPMS into the node and install it: # exit from the root context exit scp /vserver/mybuild/build/RPMS/i386/ipfw-* root@node.iet.unipi.it: ssh root@node.iet.unipi.it rpm -e ipfw rpm -ivh ./ipfw-0-9...TAB modprobe ipfw_mod # the ipfw package should be installed ipfw show --- References [1] https://svn.planet-lab.org/wiki/VserverCentos [2] http://wiki.linux-vserver.org/Installation_on_CentOS [3] http://mirror.centos.org/centos/5/isos/ [4] More information are in /build/README* files ipfw_mod/Makefile.openwrt000644 000423 000000 00000004255 11217706202 016261 0ustar00luigiwheel000000 000000 # Makefile to build the package in openwrt. # goes into package/ipfw2/Makefile # # Edit IPFW_DIR to point to the directory with the sources for ipfw IPFW_DIR := $(TOPDIR)/../ipfw_mod include $(TOPDIR)/rules.mk include $(INCLUDE_DIR)/kernel.mk PKG_NAME:=kmod-ipfw2 PKG_RELEASE:=1 # MV is undefined MV ?= mv include $(INCLUDE_DIR)/package.mk # Description for the package. # The names KernelPackage/ipfw2 must match the arguments to the # call $(eval $(call KernelPackage,ipfw2)) used to build it define KernelPackage/ipfw2 SUBMENU:=Other modules TITLE:= IPFW and dummynet # FILES is what makes up the module, both kernel and userland # It must be in the KernelPackage section FILES := $(PKG_BUILD_DIR)/dummynet/ipfw_mod.o $(PKG_BUILD_DIR)/ipfw/ipfw # AUTOLOAD:=$(call AutoLoad,80,ipfw_mod) endef define KernelPackage/ipfw2/description This package contains the ipfw and dummynet module endef # Standard entries for the openwrt builds: Build/Prepare and Build/Compile # Remember that commands must start with a tab # 'prepare' instructions for both kernel and userland # We copy the entire subtree, then build include_e/ which # contains empty headers used by the kernel sources. define Build/Prepare # $(warning Preparing ipfw sources) mkdir -p $(PKG_BUILD_DIR) $(CP) -Rp $(IPFW_DIR)/* $(PKG_BUILD_DIR)/ (cd $(PKG_BUILD_DIR)/dummynet && $(MAKE) include_e ) endef define Build/Compile # compile the kernel part for openwrt $(MAKE) -C "$(LINUX_DIR)" \ CROSS_COMPILE="$(TARGET_CROSS)" \ ARCH="$(LINUX_KARCH)" \ SUBDIRS="$(PKG_BUILD_DIR)/dummynet" \ VER=openwrt modules # compile the userland part for openwrt $(MAKE) -C $(PKG_BUILD_DIR)/ipfw \ $(TARGET_CONFIGURE_OPTS) \ CFLAGS="$(TARGET_CFLAGS) -I./include -include ../glue.h" \ VER=openwrt all endef define Package/ipfw2-userland SECTION:=utils CATEGORY:=Utilities TITLE := /sbin/ipfw DESCRIPTION := This is the control program for ipfw and dummynet endef define Package/ipfw2-userland/install $(INSTALL_DIR) $(1) /sbin endef # XXX not entirely clear why the install entry for userland works, # given that /sbin/ipfw is in KernelPackage/ipfw2 $(eval $(call Package,ipfw2-userland)) $(eval $(call KernelPackage,ipfw2)) ipfw_mod/planetlab/000755 000423 000000 00000000000 11311411502 015047 5ustar00luigiwheel000000 000000 ipfw_mod/planetlab/ipfw-cleanup000755 000423 000000 00000002075 11311371734 017407 0ustar00luigiwheel000000 000000 #!/bin/sh # # Marta Carbone # Copyright (C) 2009 Universita` di Pisa # $Id: ipfw-cleanup 4482 2009-12-14 08:38:38Z luigi $ # # This script parse the ipfw rules # and remove the old ones. # # The ipfw output is parsed and each time # value stored as comment is compared against # the current time. # If the time value is older than current, # the rules and related pipes will be deleted. # # $Id: ipfw-cleanup 4482 2009-12-14 08:38:38Z luigi $ RULE_LIST="ipfw show" # Get $NOW referred to UTC NOW=`date -u +%s` # check for module existence /sbin/lsmod | grep ipfw if [ x"$?" == x"1" ]; then echo "ipfw module does not exist"; exit 0; fi ${RULE_LIST} | awk ' BEGIN { print now a "Start to clean rules "; cleaned=0; } # delete rules and pipes function delete_rule(rule_id) { command="/sbin/ipfw delete " rule_id "; ipfw pipe delete " rule_id; system(command); } # awk main body /\/\/\ [0-9]*/ { # select timeout string timeout=$13; if (now > timeout) { delete_rule($1); cleaned++; } } END { print " " cleaned " rules cleaned"; } ' now=${NOW} ipfw_mod/planetlab/ipfw.8.gz000644 000423 000000 00000063434 11310130444 016540 0ustar00luigiwheel000000 000000 ͽk{ؕ&9'd[qȒ)[r$9U>z@ TY Hr:3HOOSWiS6|Z,wm7>^˧{ߤ{߼zyeh/Olմ^}զ|LM2Iߝ$En =N:4+'i[gi1Nl*ۺ˺قK4D-/7t)-l2IuZys6첽o&oSz,?yACm5m lvJ~l>L&|tc$-[^WO#jf(W^l j nJyz5 9G{ףA?uy1ϗټ]}t cU껈'SY=SF+ MòY{]'x׃Am v_j5y3/n*-ekf?!eV}Zp :hbʟjUلDE>lp{5/+Fe/8y #wsbcOٌ8/Y{SfFq]E7yjhE4rj^3< ~ɥϓob.'Wο WuҊVDи$OiɊWSH"+Oi]-=/$ǼmҬSmzK~4'tJmӬ(/i[G7-5Le^Nk>niDgꪅ1ͦD35OW5}#SN7SD.[cmm2UNS鎣=5U.*"~niLz &̃B4nl-JT"DFNV{]tuR4q`M}2Wbgt <֎氨j!?, cH[bQ04Y\jߘ(n :J^ZV -ԸyقXö7ϴ~$MZM+6]Ҳ/mv08ʐMh:}t_i9 m̼8=f0֨j<^Q)½AY,bn:A^c5846>Uٟ fU2gz\͇Ӌ߾y/'?{glֈ1%;eWP$1ʶiamhot]xă8 E˓4CK 'ˤs0ZɑveNj;rY~e׫kndv<3}/:*a ŨnɊw j0y^ͫKUQ䳽ݸ l^t1 q %t4'@$c_7s:6nځ~qZBuHپɅpBDK')9B}W,WsK5*{"w3ݙ5@t9z&6n>9PCg0ofyNE., Eݺqœ"t ;$gt -F(\zt]U,!8=vOdeΒ2['z?j/=w|{~݇kFnyU}\-fʏzH$)ļ>' uqS7R $Bm.6;RBlO[rH0uBC>5-i,Tx[%_Z'YWaߏ1ogDQⓟV·U"8t7V]&1`AdWib=!z\6O>z;%Ό􂍁Ih`zD dXAa%u5l: wTfbehhibDM6#:#oB8ɟݴS4V-Uw4L͌4Ib=#%YoXptH]sk*i"ăڝћ2mb:swu'h2.vC C"e}SUbd{';mEr K֔ڔ'v%p/;vήo+E{DžȲ*͛l{/ LLcY5mpt ;#>TxiǤUF1^(CݲhGd"b|_M6<5b ht4g<6- G<9t`*&ƴ[MVVCkȉNq6HAf&5`]4zPA8UH3 94&%`ކfQoA_KW,W@LĈ=n$fPvbBj3hU¶3f.˂{DH*YkqwaNe֧FVKs`?"L=S?G]luuQ%_O(À@8>4Z$_>2)%c uc=Mf7z_ç]4A0u@k '19('W%xY_mp[عFtq ֟C$PORtmjeDqYi.xOEo*O[X5C# %ɗM <Ŕxnk:=,]]N+ĤaKEJ59GRª$R`q?g Wr9[pX,8E{C5Mckrݶoimְ(϶&=Iz{*wڶƟah.Rb茪tQAlDz|EdWBn n+ :!9-I-5I@RjJ2m1a7SNPE?֗Č$|%Rh;Mķq&#t_$vcOQEt&mV]E1]JU*/ NVc"~rϽI09j^ z-KtqSB/G)_] Z&ō80=Yfb2E`AŒHvӗz>xWDQ(qwWlc8˕i<[mfD^LLna絅EO(FG2l:ԛ(4ت&OUIj^' Oѿn+kpJ}t_333W^/WOY[/V%F]DZ2[JҾP o>\ r!<}[?K&WG{c;oގqE.{8ԂU͢ z)qр5yTE_ֻ`:g%gEeWQ/dt-g-I06R^B̌3[wʪ쳫e& WUryRB)y|nA$|FVmjK.*q}+lkno&FqG>{Z_kjHzkg t^C;K/.di Ʀg%?<Ȓ Q֣!4|:'>^>H&]ptZ[ bڄ+>|@n9GԼp?=8I_=(<ZѲ3yQ1ϕꔭi^;,3]yEbrxM e[/|UoUV!`񔸀kOn-U5ٶn$wl'V$ՙװMz]ٱqG9q*(w}w ˨Y @9k# ousA4*K|HLz(;3y}¬%ۡZ8hHlFO Tۍd6n&86jhHԉ Bk!GS`e Y~9x99VIՃHבa؃D/HU߳ZpHx}HieI0Dq ӂc4t#A.PG״:[~I~QsnhH=w檝4h}}.+Uzc>M ׅmqZ2 !ΫunC| P1x Sð¬CIƷْ5_ZB,*x: RQH)~cmm".8JB%i2VO;0a}1*X`~grJH>^A-4 ۙ+]V:׽] +L$`ױpN&DΝdʿJfIDm:&BP xsiUKxpbJ_ip OR"M2 [ܷ|4$DTqr(Z̪n$L`h$7%uoӷ<9pqzD~u_ILD D*b68pk؎%;E:JSX,R3_ ы>=IKg(e-GMwCTp]eW?U" kB(YDPZ\ zwjFFμ=-3HW9:(dCոar# 9^ٌSEK⚅_U&mo"nAlzxBA:ض+L+nv]Z.U)u_q ֬FbNv4 g;w% /MYv^Ck|⊢"&::< M-KoVgjD @Ƣu+!w 04rpnzƴH4&nk83\v_DK+.h/.S\m8XhrsHA yeFbuXn!7A2Hq">#ZM 59|HZ!w̃0 qLIJc`R32cE 1_馦:f$! m[/*"b?PD"񰈽h _;d_N!,d47PcñáaȽ՜B\UxR4cTK" 5 [zX@E,R(͟bZ8t.?:C%kSr7)l7DwUh%Ij1]ۣ]եV5YQ

M-IsŪB3t4CWXJȢ{aYҒ$MOo8$HD`lzxK-Jq0߳Fic27; y,[>piX? Ӓ⅒f >M80+Js!S]H]۫-A/-Pnz灻'QDϭHyќH&cC|yjљld1W߯3>,'e=^ݳr($`UtX1cv#j>u߱iakVDFNZ(&QJ"o d3dvH:+IP8<G9$V`|6~N˔yFD a,DVC1&X* ^aΡpٕ?Pwʢ}?4đ``ҞڑɤĀo '.0\^EF0WSě:3U^s0-#KcKLUs*Nl5g!2xx^ (EM-f0"n]N1Š4m$K$aV"6wyABҾˉxesYWc#ᚐX,A:&ZxF>|.[JфDLo/?8~:MgA^uKhP j̳ 3؊kLj})g'Wߟ_|_\^"-gK;gW.g*DS076w;fO3WE$㎡w6o hOz0Lw=kbb~)[$Ap`ZAY!VK" ZbE8kL ,\o,à*q;m;l5eYI$-&'u(䦘ݰW9.YưLJ||V=&qB2rq& fH Ņ #/rN9ϯm9_A'^|w);k_L^c1a<#j-9j1XzU04=>XDtO/-xB70E+IfOZ|VǀDgTs֜Yj٣S[5<ݪс9n%*'$5]'&/m]݃_fn~{68y槫8Q_V},XO͛;97_\xp+Ң\ xN.ϓwB;<ǵ2)P43eܜ":ZFR~f&^9k3[dA{y;vˤy[ 8x fؖ3lÁ;imv3DS 0Ze/oKmT̀֬[! x>4=KNhI`hBZRB>‰AeҾ2=s@ciO(AqXԦ'9CuW.}^4[W/^ ,ӎVjo;w!)N6U0[`t6 nRII_ ;g uLc*9;|S0 TqBdya0x؆_r8B^G/" w,Õo0 T53r=8&{/{^_IȒRC|` xơ__{3nx䯗 H^O!` j d9K+HX]/[% AՂA8G]'R@;SΣZ7AHWyr[1_ nX'0Io0(g ЦDy<`>5 wz݆djWskd6 Z mʻ4㰢2MX؍2Pȴ˨Ŏ``Ftצ"@KIuoȅg7ͳ_']@ 3UǽbOܞ![Gb8bk\'N{o6t 7l2![#mqxCJ ; mUoϧ$͞yT~ ^nqPg.ӇtoQwpGNwĭc$ܰv5^zl9/)f  mAڬ7Q$tc9WLw^} J AAM>}o>xOqũL2[0ЬyD!^ijHxOVlsxZ҉h1,x0 &l8/h@MW" iW=tH}y]ΚGhRc# ,>7$ö}C%Dff=ƒ]7GKV=jQ}Shp1L=*?^{QqqXv950hdVxek g7p^Ь$U mu}7o?^Ųe!{PbX7 Jk}4ÐFAιP^ A][ q;6hЏ *:\KP|E$MclQ>ih&9? 3T]GXn7),2HA8`I#RMrNQGrh-d%GyLsA 5r+HY4Uyzsc8AދbH] C!ʀhLȍK3BGz%]NN(x!jv:R=74/k;\#`K7s[B7x HEfN{Sq{'L1`# >(RN3g_ CZo.DL2\H˘V ֮qpVm@6*=F>:k_Q59ڢtѬhL(|Հd7%ZYcZt &§KaŤ+6%UWcw\ )d1DeS,|F:JҍX(fi gɼΦCHR ѡ];k4 sa~vv+"^޸`t., g;7l8 ,@nV75-W y߮PɷRԎi;jŨ@)s.DBg+DAU8%G堣钟mե9plf2m>2f8s9D>e"޿x NΞ򵍈  %AIs71'ϞmuB:*iȄz6||)䜞<`i$-Q&UぺU7䟳+󾩅e4$l@L|\3r7GF&N.q&k19*C ʍ^ظ? NL!G`{3-IL?w7p`T$2Ǟ,[p4}RxÁ%UX:8.p {*0H0$m U1%p<#J d7*YMb{ƀL*eJs;ϐԡl/iMY#;G<ϻ= ]˺(1axUg6nu-Per&E7]켨 KƷt|E0(弄b}hl/\ ݮ͚y`+<+˼ cA#1Y;6{34%BYI* 62H.WwHEzgYayH{l\te>\7ge-V,ha,7mkg*9F^ #p)}[ &+HQ~R!z9ECt |Et/#c+VChm3Ao:|K=7C(cx'[>Iȓa1 @6Ÿ MfJEzߖSwQw)&Wi_m1l_G+)}49- 8vZ67g_ %K0 WblHT>q$FkM6"e@VMIJ}Vz,p2^~ 21.&ڱ*M.YQmR;G XZ&n8Wo }_gRsκX~>xqn-eߢ[QdѬJVl+*rE-'t4&c5vTm]zl/9Oג-]a Y-לKP+UYF>@jlv0=2K !==LW\HzetU&RЙ2 HUKn Q}.lpȍ;i{rNDnfSU-_H#G?Qb!*hO( AO%~-g`hzM5ϟ(6A(L(Z?NE`h}0jpۄpFl? ~ġ6_k3s@I>0$\;TZUJtGVᰯgsf Aj|fMqZ\bGAE6J^lbF&< GAv\p_OҴLXY63dUL:6dcMT ǝ-ꦃN6}dm9ݽ*߬ʉiKjO@YB]S_4 Jc4 rരJSUTF ˍ_ #>v٭\˰9LcqphAp*FS)W#ʣ̉*Oo g/"\(ib<C3JYF,;3< ML89yBCBji\(EQe6CeQSN) ˍҋlR|>nUhF0I.8$],>cǻp|'7E.4A;iq MX񧐱Z z3jQMQcTo(UiMdWJW}eupmCtUwUMdOq7` T=wa{c,iGb*t.BO4=^R8=)z]:mR e/,{r`JOm|h]ŎMw WZ+ *],q96Jjێ`.h 1T KOgys{A _>QB]ǰZHϛ.b-Z0H2܆(eU5?+{{>ܧ\{Ov_^P2)8.u׳$kF+.`󙡓p].( f-"X9q{vx29B I~֬r0'EX*ߝxiC$; jc`ZCW0hw0H9 ++_띢!Iznqlh6@ɑNGP, l pCb5׀|lҙN qcV-!(lpϊ2*`2ҕ>M'^A#ψI4 h4A R֛G^X@ס"oMz'f}D 15k\kF].>n!S2kWB*Th1qFx8G۵ ˖|'Ĺu2Қ8*Kuj,:\`{ɛoSwҬ-a4cGyqcGɠ !B¾)̠h̉9R2`f QИ8/C!]G'^oλ0^ltii~hIg ^_, &Q2 S]h ,E0ՓV+@S0 , ,^;TI%vR_Ic5$odx[H[8[ֽEKݻ V9cMkHYpxlΙK 8[7g#8Al)60bdg^*1K j ah$,Պ\D h+(QXx zy, e֪Zn]2SG }[4T@O:?IXzN8& hA(E 9.[ҶfܪŚa Ż%,[hO99EwIY5J5ZM[/{:ABa-px Eo+/c|vliHy 9gvƧ"ߕTj W{jeCZN RY'BKbv釓q꯼~7 xt-0* tH׫= b(\hw߉V"=ີą5xr&AM% [2c I&Ix8nY=ljxʪH8Fp}q @>=kq@DI 1ۂ;Y<5hLKQ@U I7% HW^yZ Y.0Y*9$~.>BY6@ ta1㡃µ[z)!MɹG;HnV Xz@Fj/$үww8 |W7Ҫq>Z 6Hոg]`dfӠ]a;/w] 0%% {VVp^)0\0?J ?A 9AI|"GiF?ؙz"5k Sҕ{.R6rbdһ-Ol#J2{ݭܪkPJn86͌7,3 k u_BYGtjnxl}] @>/f7ӟ$ dWXc'0Ta 447 Y7}qaK0g\d 4r/n^,i.d B6iWY?/d}TpnV|ToJG.%[ife`Ui :J$ 1up8 խF--Kjiwx5G!4}AxqOR;/ŠM\ C<dI|Lv|p -חΪjI+Ȅ?OVϽ6%+fTvq4[I*b{.Ta;|~$zCcQ$l;Q:8ꥵ h%-iWg<ƊcMDӡ.gr:ܶPd3 PB=o hnOm7LvCT /h%!P}܇YT["8rUߠiaebxS 0kU/13,9uw5>xAtQE}W'|*--jJ5r۔B9Z *;Nģ..9=hG#@F &8dPm{ZlZj|#G՜o2lBA0HGS/vƅ&1׈;թVUN;-ȅ-bOf{=/sX5)Qm*QsD'| kK,AEn;J^ap#UpCՖ]ѩhOZ tA;|"GW˃η> t!@u-_.^Nn JDdU|[v3"VUV\J}tBBIv}g(1 ҽdZ%bMO?&A인1ʁ@YՒRZ*-Lzoy:6mW^8>r'%ãEFlȦ\<#.7Xb6H1JY[Dcy/u90x7Eh987Io>#Qk>ӼCf\{d1 d>w{$}+ҩr$2 E= 'ͼjAm*ߡXYz.#  :b(OF&mԅMu8%BN]7e)sG1S۔\rzq /39&|v(&8,'٠,,)jF!\kDLDC(q(>(J'4*ď<,4TgAw ` VywA0~p##{7Śe16C6aEEIXn"HZ2HOQIFի>o0sNc '΅~:Bn쁠WEVf3cfLm7 ʡQ8 .i֕ $!hl *f2m|%nh zIXJD-Ut*$_JX@ˮ&d ë́=>&Ą߃a#TZ2-'z"R#ż53[1ʐpj$Sg, N/2]/?uFY Jo^f ^A$?cL_`=.Ip2JVC!>pc fA{vS@斥|ʅ$l͑lr_Kk_I]f 4`b*__O*i]P #*]7BT8A:ztoO{{zy|k^HZ2[kr$I, xn)vHѱ 5P %VSՏ>OKsdB grrfsGߝ\ǧ>DڡRjyb0-^l t{swQ'VŤ*KVv$8KFEK&5%~'?@wY,oI~S>Dاg'Wߟ_|_\^"-;gWЦ[H7$W4Ô5(Z1? (v/ե4kC;f Bd)ֲeT!Mt{g׸^1ild Ebu> s2+ 9'u"U\pr8̍vQ, rώ(< 8 PAI3t|^˼G$pf oM'zS%$iKZi:r"˭4[^n@5\&Ƽ !d7XGIX#|PzlaGqjqָei).NO/Nó%Wio %Ky+ۖSYHهhB ( G"!jP3!#0;p`_VaKh+F5iOkҦ&{Wr'F?csM˶Ho7cx1)q٪GْC+. TY# xmgFƧf}pW-[M<A-Ga[:1Ϥej S "šN;yϝSK[Q}푷i\6WSuN?4 K9 !VeHL41V8S"AHJHj^84KNw8 LfB]l!v[)(XIƗ~y9>L?lf"| nxH`s+b^my(]"P{[a% .#wۙi'Sل~Tso_?oi۩=5\m$ ܡ"zBN10x!js0|AKa|XYgV;)X-;x M[y׶ѰLȩ8iڦ Eoa kDh6&é^B/ľ7] ÓIgM4tKiHIgh-KChݜd׉yΖ$vz`dO Bp3:_g\n0%>U 8-`S>8&i!N5wBp4YXrcP4aEtf!հ }v큹# s#{X/ؒ1fd$rn6R&LU^9h&2`3bX4It꠿" U}\hs}rIM{6 xujNJs.)ߏ\Q) ni`1 =܍<3nT 3yQk+ׅ(b_6 b`Wie><=27a>=;XM&ueK81>@fԃ3YQSIxA-! {eV>vr#ݾNtVI%X_Ç@;mI soçP #EpU '8XeW8H,ڵ0 л,ec藠J ,TCK _v}/Q_n{6~62 FoF+_;  T0 \M0ĆOnQ_w+X@LERjPǡee/X&4rf|Flw癥nr$ɢ& =Syx'`Ls v(jT۠9b |:+0"D[pr8j_d  *1[ڰQ1nN'x}լ 9ϠzQNLT6@HB|G͖K75;EBnM09T3':,XaSGEbO;2 dFrOvoy "\NwHC[IHR6OЩt~c Tz7Lc3˔dLuNsk ]oa;4{|e9s}>=,6+sG= "}}I*1h="($i_e0cG|lK8Ef],}RlEPDY3,xe_ ͂ߝY!^a.$؅>f>T`*2taPX9 p$i <\J dE[0$BsbG?Kd=_  t!G}@yg!\ 'tKQ^V\P giBe:R %nv40 L)(Du. 5ָe-bjf.\؅$o\YU*DBbR}&1+<.vjDb(vb?IGes1jzyyHBVˠ PƢu*_-oxRlŵ:\(c ?M41>[85-hW~nΰ; 7xw8nKQ/qӲcpG5$Ip%7{YDXحiiDpSƀy0؆rhW0zݹft ΕZ10Q^c+j0LwwKqZ[e,@( Um8]*޸ʻJx<|㹎RhC{_z`Swm bU_FVP Y⺂ G (2CAxF.V &?yf4+e軪İ_fS.Q[K{ٙETtVHi!gPC_6㛊 )Tz6D^Νo*1\KIvyV w$d6ڙK~O[-9U3 94U{ GDi:AZgYJ5 vE֭(^zr n[Z_nmd# ¶T/_34N:;rdߠ6M8;mILHpr v3!%HRFH6 OxOK07zw9H6A_[&{= 4bkBKE~ކ#{Җjq78hMn~`sdO~Ez5M?ydu˂Eђp0K_n/G tLW1허C(oVU,曭MO5!LOe -ϒˑ`dkؔu֮s(Kl5_?.(z9g ۞<|w07~>߄|Öib UTMh%ezj7^ڻ7;x~ۗCrߩn_/ˇ7.O_hIza%-`sg6o?Ѥ{}t3^k.f5+I ׻7.ϾX`@@GU6WHAhߞ^^_aZvY-)z)iȍw\Ңwe!\ܢ! eقE͆K5p_@?\}{~q Ìnw^òm9Pho0k=t|G|s 7bzTF~vŮ*+sAˊOwr4q 5}qV<}Mc[v<>iWYW{ "=DjS?o5 ᜥĴcޘ% VgcF8|Z.'ϱPc2F9EiDSitc.EXops"#bt[AQoF2`xmkJIߨPb;і2raj i^uUX5exe4lft@X넑+Fe-U>\ .ByWRPĉǮe0|ِY}SEqLv.ϵ8'N1S>K:#LJ Ϟ9dOETH> ${PIPE_IN}" sudo sh -c "cat ${PIPE_OUT}" ipfw_mod/planetlab/check_planetlab_sync000755 000423 000000 00000000773 11310130457 021145 0ustar00luigiwheel000000 000000 #!/bin/sh # # This script is used to check the sync of the local repo # with the remote planetlab repository tmpfile=/tmp/chech_planetlab_sync.tmp # check for local copy sync svn diff > /tmp/chech_planetlab_sync.tmp if [ -s $tmpfile ] ; then echo "Local repo unsynced, can not continue" exit -1 rm $tmpfile fi # export remote copy svn --force export http://svn.planet-lab.org/svn/ipfw/trunk ./ >> /dev/null # check diffs again, output to the user svn diff svn status | grep -v check_planetlab_sync ipfw_mod/planetlab/ipfw.cron000644 000423 000000 00000000242 11311371734 016711 0ustar00luigiwheel000000 000000 # Runs every 5 minutes and clean ipfw expired rules # $Id: ipfw.cron 4482 2009-12-14 08:38:38Z luigi $ */5 * * * * root /usr/bin/ipfw-cleanup > /dev/null 2>&1 ipfw_mod/planetlab/sample_hook000755 000423 000000 00000002016 11310130457 017303 0ustar00luigiwheel000000 000000 #!/bin/sh # # Marta Carbone # 2009 - Universita` di Pisa # # This is a sample hook file in charge to collect # statistical information on netconfig usage. It dumps # on a log file slicename, port and the configuration string # used to configure a dummynet experiment. # # Each time a user configure a dummynet port, this file # will be executed. # The following variables will be passed as argument: # # ${SLICE} ${PORT} ${CONFIG_STRING} # ${SLICE} The slicename executing the netconfig command # ${PORT} The port to be configured # ${CONFIG_STRING} The configuration string # # Note that this script can get additional information # by executing the ipfw command, e.g. # ipfw list # list of installed rules # ipfw show # list of rules and statistical information # ipfw pipe show # list of pipes # # a complete list of ipfw commands is available at: # http://www.freebsd.org/cgi/man.cgi?query=ipfw&sektion=8 # logfile LOG_FILE=/tmp/ipfw_hook.log echo -e `date` >> ${LOG_FILE} echo "$*" >> ${LOG_FILE} ipfw_mod/planetlab/planetlab.mk000644 000423 000000 00000001444 11311411502 017345 0ustar00luigiwheel000000 000000 # $Id: planetlab.mk 4494 2009-12-14 10:52:36Z luigi $ # .mk file to build a module kernel-MODULES := linux-2.6 kernel-SPEC := kernel-2.6.spec kernel-BUILD-FROM-SRPM := yes ifeq "$(HOSTARCH)" "i386" kernel-RPMFLAGS:= --target i686 else kernel-RPMFLAGS:= --target $(HOSTARCH) endif ALL += kernel ipfwroot-MODULES := ipfwxy ipfwroot-SPEC := planetlab/ipfwroot.spec ipfwroot-DEPEND-DEVEL-RPMS := kernel-devel ipfwroot-SPECVARS = kernel_version=$(kernel.rpm-version) \ kernel_release=$(kernel.rpm-release) \ kernel_arch=$(kernel.rpm-arch) ALL += ipfwroot ipfwslice-MODULES := ipfwxy ipfwslice-SPEC := planetlab/ipfwslice.spec ipfwslice-SPECVARS = kernel_version=$(kernel.rpm-version) \ kernel_release=$(kernel.rpm-release) \ kernel_arch=$(kernel.rpm-arch) ALL += ipfwslice ipfw_mod/planetlab/planetlab-tags.mk000644 000423 000000 00000000445 11311404424 020306 0ustar00luigiwheel000000 000000 # $Id: planetlab-tags.mk 4491 2009-12-14 10:09:09Z luigi $ linux-2.6-SVNBRANCH := 22 linux-2.6-SVNPATH := http://svn.planet-lab.org/svn/linux-2.6/tags/linux-2.6-22-39-1 ipfwxy-SVNPATH := svn+ssh://luigi%40onelab2.iet.unipi.it/home/svn/ports-luigi/dummynet-branches/ipfw_mod ipfw_mod/planetlab/ipfwroot.spec000644 000423 000000 00000006430 11311411455 017606 0ustar00luigiwheel000000 000000 # # $Id: ipfw.spec 15891 2009-11-29 13:22:16Z thierry $ # # TODO: # restart crond # modprobe ipfw_mod.ko (depmod ?) # %define url $URL: http://onelab1.iet.unipi.it/svn/trunk/ipfw.spec $ # Marta Carbone # 2009 - Universita` di Pisa # License is BSD. # kernel_release, kernel_version and kernel_arch are expected to be set by the build to e.g. # kernel_release : vs2.3.0.29.1.planetlab # kernel_version : 2.6.22.14 %define name ipfw %define version 0.9 %define taglevel 7 %define release %{kernel_version}.%{taglevel}%{?pldistro:.%{pldistro}}%{?date:.%{date}} %define kernel_id_arch %{kernel_version}-%{kernel_release}-%{kernel_arch} %define kernel_id %{kernel_version}-%{kernel_release} Summary: ipfw and dummynet for Linux Name: %{name} Version: %{version} Release: %{release} License: BSD Group: System Environment/Kernel Source0: %{name}-%{version}.tar.bz2 BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-buildroot Requires: vixie-cron Vendor: unipi Packager: PlanetLab # XXX ask Distribution: PlanetLab %{plrelease} URL: %(echo %{url} | cut -d ' ' -f 2) %description ipfw is the Linux port of the FreeBSD ipfw and dummynet packages %prep %setup %build # clean the rpm build directory rm -rf $RPM_BUILD_ROOT # with the new build, we use the kernel-devel rpm for building %define kernelpath /usr/src/kernels/%{kernel_id_arch} %__make KERNELPATH=%kernelpath clean %__make KERNELPATH=%kernelpath IPFW_PLANETLAB=1 %install install -D -m 755 dummynet/ipfw_mod.ko $RPM_BUILD_ROOT/lib/modules/%{kernel_id}/net/netfilter/ipfw_mod.ko install -D -m 755 ipfw/ipfw $RPM_BUILD_ROOT/sbin/ipfw install -D -m 755 planetlab/ipfw-cleanup $RPM_BUILD_ROOT/usr/bin/ipfw-cleanup install -D -m 755 planetlab/ipfw.cron $RPM_BUILD_ROOT/%{_sysconfdir}/cron.d/ipfw.cron %clean rm -rf $RPM_BUILD_ROOT # here there is a list of the final installation directories %files %defattr(-,root,root) %dir /lib/modules/%{kernel_id} /lib/modules/%{kernel_id}/net/netfilter/ipfw_mod.ko /sbin/ipfw /usr/bin/ipfw-cleanup %{_sysconfdir}/cron.d/ipfw.cron %postun # unload the module if present LOADED=`cat /proc/modules | grep ^ipfw_mod`; if [ -n "$LOADED" ] ; then rmmod ipfw_mod; fi %changelog * Sun Nov 29 2009 Thierry Parmentelat - ipfw-0.9-7 - added missing qsort.c - tag 0.9-6 was broken * Thu Nov 26 2009 Thierry Parmentelat - ipfw-0.9-6 - root: removed goto into the main ipfw switch, enabled slice_id matching - slice: completely move netconfig checks into the backend * Mon Nov 09 2009 Thierry Parmentelat - ipfw-0.9-5 - additional features on matching packets, including uid match * Mon Sep 07 2009 Thierry Parmentelat - ipfw-0.9-4 - on behalf of Marta Carbone, more options and features * Thu Jul 23 2009 Thierry Parmentelat - ipfw-0.9-3 - fixed memory usage issue * Wed Jul 15 2009 Thierry Parmentelat - ipfw-0.9-2 - patch for building on x86_64 * Thu Jun 25 2009 Marta Carbone - post installation removed for deployment, moved manpages to the slice package * Fri Apr 17 2009 Marta Carbone - Initial release ipfw_mod/planetlab/ipfwslice.spec000644 000423 000000 00000004474 11311411464 017730 0ustar00luigiwheel000000 000000 # # $Id: ipfw-slice.spec 15891 2009-11-29 13:22:16Z thierry $ # # TODO: # restart crond # modprobe ipfw_mod.ko (depmod ?) # %define url $URL: http://onelab1.iet.unipi.it/svn/trunk/ipfw-slice.spec $ # Marta Carbone # 2009 - Universita` di Pisa # License is BSD. %define name ipfw-slice %define version 0.9 %define taglevel 7 %define release %{kernel_version}.%{taglevel}%{?pldistro:.%{pldistro}}%{?date:.%{date}} %define kernel_id_arch %{kernel_version}-%{kernel_release}-%{kernel_arch} %define kernel_id %{kernel_version}-%{kernel_release} Summary: ipfw and dummynet for Linux Name: %{name} Version: %{version} Release: %{release} License: BSD Group: System Environment/Kernel Source0: %{name}-%{version}.tar.bz2 BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-buildroot Vendor: unipi Packager: PlanetLab Distribution: PlanetLab %{plrelease} URL: %(echo %{url} | cut -d ' ' -f 2) %description the frontend part of the ipfw planetlab package %prep %setup %build rm -rf $RPM_BUILD_ROOT %install install -D -m 755 planetlab/netconfig $RPM_BUILD_ROOT/sbin/netconfig install -D -m 755 planetlab/ipfw.8.gz $RPM_BUILD_ROOT/%{_mandir}/man8/ipfw.8.gz %clean rm -rf $RPM_BUILD_ROOT # here there is a list of the final installation directories %files %defattr(-,root,root) /sbin/netconfig %{_mandir}/man8/ipfw.8* %changelog * Sun Nov 29 2009 Thierry Parmentelat - ipfw-0.9-7 - added missing qsort.c - tag 0.9-6 was broken * Thu Nov 26 2009 Thierry Parmentelat - ipfw-0.9-6 - root: removed goto into the main ipfw switch, enabled slice_id matching - slice: completely move netconfig checks into the backend * Mon Nov 09 2009 Thierry Parmentelat - ipfw-0.9-5 - additional features on matching packets, including uid match * Mon Sep 07 2009 Thierry Parmentelat - ipfw-0.9-4 - on behalf of Marta Carbone, more options and features * Thu Jul 23 2009 Thierry Parmentelat - ipfw-0.9-3 - fixed memory usage issue * Wed Jul 15 2009 Thierry Parmentelat - ipfw-0.9-2 - patch for building on x86_64 * Thu Jun 25 2009 Marta Carbone - Initial release ipfw_mod/ipfw/include/000755 000423 000000 00000000000 11307666303 015515 5ustar00luigiwheel000000 000000 ipfw_mod/ipfw/expand_number.c000644 000423 000000 00000006042 11305761047 017066 0ustar00luigiwheel000000 000000 /*- * Copyright (c) 2007 Eric Anderson * Copyright (c) 2007 Pawel Jakub Dawidek * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ // #include __FBSDID("$FreeBSD: src/lib/libutil/expand_number.c,v 1.2.4.2 2009/06/10 14:52:34 des Exp $"); #include #include #include #include //#include #include /* * Convert an expression of the following forms to a int64_t. * 1) A positive decimal number. * 2) A positive decimal number followed by a 'b' or 'B' (mult by 1). * 3) A positive decimal number followed by a 'k' or 'K' (mult by 1 << 10). * 4) A positive decimal number followed by a 'm' or 'M' (mult by 1 << 20). * 5) A positive decimal number followed by a 'g' or 'G' (mult by 1 << 30). * 6) A positive decimal number followed by a 't' or 'T' (mult by 1 << 40). * 7) A positive decimal number followed by a 'p' or 'P' (mult by 1 << 50). * 8) A positive decimal number followed by a 'e' or 'E' (mult by 1 << 60). */ int expand_number(const char *buf, int64_t *num) { static const char unit[] = "bkmgtpe"; char *endptr, s; int64_t number; int i; number = strtoimax(buf, &endptr, 0); if (endptr == buf) { /* No valid digits. */ errno = EINVAL; return (-1); } if (*endptr == '\0') { /* No unit. */ *num = number; return (0); } s = tolower(*endptr); switch (s) { case 'b': case 'k': case 'm': case 'g': case 't': case 'p': case 'e': break; default: /* Unrecognized unit. */ errno = EINVAL; return (-1); } for (i = 0; unit[i] != '\0'; i++) { if (s == unit[i]) break; if ((number < 0 && (number << 10) > number) || (number >= 0 && (number << 10) < number)) { errno = ERANGE; return (-1); } number <<= 10; } *num = number; return (0); } ipfw_mod/ipfw/add_rules000755 000423 000000 00000001056 11217660421 015757 0ustar00luigiwheel000000 000000 #!/bin/bash # # A test script to add rules PRG=./ipfw myfun() { $PRG add 10 count icmp from any to 131.114.9.128 $PRG add 20 count icmp from 131.114.9.128 to any $PRG add 20 count icmp from any to 131.114.9.130 $PRG add 30 count icmp from 131.114.9.130 to any $PRG add 40 count icmp from any to 131.114.9.129 $PRG add 50 count icmp from 131.114.9.129 to any $PRG add 60 count icmp from 131.114.9.236 to any sleep 1 $PRG del 10 $PRG del 20 $PRG del 20 $PRG del 30 $PRG del 40 $PRG del 50 $PRG del 60 } for ((i=0;i<100;i++)) ; do myfun done ipfw_mod/ipfw/altq.c000644 000423 000000 00000006536 11153706043 015204 0ustar00luigiwheel000000 000000 /* * Copyright (c) 2002-2003 Luigi Rizzo * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp * Copyright (c) 1994 Ugen J.S.Antsilevich * * Idea and grammar partially left from: * Copyright (c) 1993 Daniel Boulet * * Redistribution and use in source forms, with and without modification, * are permitted provided that this entire comment appears intact. * * Redistribution in binary form may occur without any restrictions. * Obviously, it would be nice if you gave credit where credit is due * but requiring it would be too onerous. * * This software is provided ``AS IS'' without any warranties of any kind. * * NEW command line interface for IP firewall facility * * $FreeBSD: head/sbin/ipfw/altq.c 187983 2009-02-01 16:00:49Z luigi $ * * altq interface */ #include #include #include #include "ipfw2.h" #include #include #include #include #include #include #include #include #include /* IFNAMSIZ */ #include #include #include /* * Map between current altq queue id numbers and names. */ static TAILQ_HEAD(, pf_altq) altq_entries = TAILQ_HEAD_INITIALIZER(altq_entries); void altq_set_enabled(int enabled) { int pffd; pffd = open("/dev/pf", O_RDWR); if (pffd == -1) err(EX_UNAVAILABLE, "altq support opening pf(4) control device"); if (enabled) { if (ioctl(pffd, DIOCSTARTALTQ) != 0 && errno != EEXIST) err(EX_UNAVAILABLE, "enabling altq"); } else { if (ioctl(pffd, DIOCSTOPALTQ) != 0 && errno != ENOENT) err(EX_UNAVAILABLE, "disabling altq"); } close(pffd); } static void altq_fetch(void) { struct pfioc_altq pfioc; struct pf_altq *altq; int pffd; unsigned int mnr; static int altq_fetched = 0; if (altq_fetched) return; altq_fetched = 1; pffd = open("/dev/pf", O_RDONLY); if (pffd == -1) { warn("altq support opening pf(4) control device"); return; } bzero(&pfioc, sizeof(pfioc)); if (ioctl(pffd, DIOCGETALTQS, &pfioc) != 0) { warn("altq support getting queue list"); close(pffd); return; } mnr = pfioc.nr; for (pfioc.nr = 0; pfioc.nr < mnr; pfioc.nr++) { if (ioctl(pffd, DIOCGETALTQ, &pfioc) != 0) { if (errno == EBUSY) break; warn("altq support getting queue list"); close(pffd); return; } if (pfioc.altq.qid == 0) continue; altq = safe_calloc(1, sizeof(*altq)); *altq = pfioc.altq; TAILQ_INSERT_TAIL(&altq_entries, altq, entries); } close(pffd); } u_int32_t altq_name_to_qid(const char *name) { struct pf_altq *altq; altq_fetch(); TAILQ_FOREACH(altq, &altq_entries, entries) if (strcmp(name, altq->qname) == 0) break; if (altq == NULL) errx(EX_DATAERR, "altq has no queue named `%s'", name); return altq->qid; } static const char * altq_qid_to_name(u_int32_t qid) { struct pf_altq *altq; altq_fetch(); TAILQ_FOREACH(altq, &altq_entries, entries) if (qid == altq->qid) break; if (altq == NULL) return NULL; return altq->qname; } void print_altq_cmd(ipfw_insn_altq *altqptr) { if (altqptr) { const char *qname; qname = altq_qid_to_name(altqptr->qid); if (qname == NULL) printf(" altq ?<%u>", altqptr->qid); else printf(" altq %s", qname); } } ipfw_mod/ipfw/dummynet.c000644 000423 000000 00000066602 11310017562 016101 0ustar00luigiwheel000000 000000 /* * Copyright (c) 2002-2003 Luigi Rizzo * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp * Copyright (c) 1994 Ugen J.S.Antsilevich * * Idea and grammar partially left from: * Copyright (c) 1993 Daniel Boulet * * Redistribution and use in source forms, with and without modification, * are permitted provided that this entire comment appears intact. * * Redistribution in binary form may occur without any restrictions. * Obviously, it would be nice if you gave credit where credit is due * but requiring it would be too onerous. * * This software is provided ``AS IS'' without any warranties of any kind. * * NEW command line interface for IP firewall facility * * $FreeBSD: head/sbin/ipfw/dummynet.c 187769 2009-01-27 11:06:59Z luigi $ * * dummynet support */ #include #include #include /* XXX there are several sysctl leftover here */ #include #include "ipfw2.h" #include #include #include #include #include #include #include #include #include #include #include #include #include /* inet_ntoa */ static struct _s_x dummynet_params[] = { { "plr", TOK_PLR }, { "noerror", TOK_NOERROR }, { "buckets", TOK_BUCKETS }, { "dst-ip", TOK_DSTIP }, { "src-ip", TOK_SRCIP }, { "dst-port", TOK_DSTPORT }, { "src-port", TOK_SRCPORT }, { "proto", TOK_PROTO }, { "weight", TOK_WEIGHT }, { "all", TOK_ALL }, { "mask", TOK_MASK }, { "droptail", TOK_DROPTAIL }, { "red", TOK_RED }, { "gred", TOK_GRED }, { "bw", TOK_BW }, { "bandwidth", TOK_BW }, { "delay", TOK_DELAY }, { "pipe", TOK_PIPE }, { "queue", TOK_QUEUE }, { "flow-id", TOK_FLOWID}, { "dst-ipv6", TOK_DSTIP6}, { "dst-ip6", TOK_DSTIP6}, { "src-ipv6", TOK_SRCIP6}, { "src-ip6", TOK_SRCIP6}, { "profile", TOK_PIPE_PROFILE}, { "burst", TOK_BURST}, { "dummynet-params", TOK_NULL }, { NULL, 0 } /* terminator */ }; static int sort_q(void *arg, const void *pa, const void *pb) { int rev = (co.do_sort < 0); int field = rev ? -co.do_sort : co.do_sort; long long res = 0; const struct dn_flow_queue *a = pa; const struct dn_flow_queue *b = pb; switch (field) { case 1: /* pkts */ res = a->len - b->len; break; case 2: /* bytes */ res = a->len_bytes - b->len_bytes; break; case 3: /* tot pkts */ res = a->tot_pkts - b->tot_pkts; break; case 4: /* tot bytes */ res = a->tot_bytes - b->tot_bytes; break; } if (res < 0) res = -1; if (res > 0) res = 1; return (int)(rev ? res : -res); } static void list_queues(struct dn_flow_set *fs, struct dn_flow_queue *q) { int l; int index_printed, indexes = 0; char buff[255]; struct protoent *pe; if (fs->rq_elements == 0) return; if (co.do_sort != 0) qsort_r(q, fs->rq_elements, sizeof *q, NULL, sort_q); /* Print IPv4 flows */ index_printed = 0; for (l = 0; l < fs->rq_elements; l++) { struct in_addr ina; /* XXX: Should check for IPv4 flows */ if (IS_IP6_FLOW_ID(&(q[l].id))) continue; if (!index_printed) { index_printed = 1; if (indexes > 0) /* currently a no-op */ printf("\n"); indexes++; printf(" " "mask: 0x%02x 0x%08x/0x%04x -> 0x%08x/0x%04x\n", fs->flow_mask.proto, fs->flow_mask.src_ip, fs->flow_mask.src_port, fs->flow_mask.dst_ip, fs->flow_mask.dst_port); printf("BKT Prot ___Source IP/port____ " "____Dest. IP/port____ " "Tot_pkt/bytes Pkt/Byte Drp\n"); } printf("%3d ", q[l].hash_slot); pe = getprotobynumber(q[l].id.proto); if (pe) printf("%-4s ", pe->p_name); else printf("%4u ", q[l].id.proto); ina.s_addr = htonl(q[l].id.src_ip); printf("%15s/%-5d ", inet_ntoa(ina), q[l].id.src_port); ina.s_addr = htonl(q[l].id.dst_ip); printf("%15s/%-5d ", inet_ntoa(ina), q[l].id.dst_port); printf("%4llu %8llu %2u %4u %3u\n", align_uint64(&q[l].tot_pkts), align_uint64(&q[l].tot_bytes), q[l].len, q[l].len_bytes, q[l].drops); if (co.verbose) printf(" S %20llu F %20llu\n", align_uint64(&q[l].S), align_uint64(&q[l].F)); } /* Print IPv6 flows */ index_printed = 0; for (l = 0; l < fs->rq_elements; l++) { if (!IS_IP6_FLOW_ID(&(q[l].id))) continue; if (!index_printed) { index_printed = 1; if (indexes > 0) printf("\n"); indexes++; printf("\n mask: proto: 0x%02x, flow_id: 0x%08x, ", fs->flow_mask.proto, fs->flow_mask.flow_id6); inet_ntop(AF_INET6, &(fs->flow_mask.src_ip6), buff, sizeof(buff)); printf("%s/0x%04x -> ", buff, fs->flow_mask.src_port); inet_ntop( AF_INET6, &(fs->flow_mask.dst_ip6), buff, sizeof(buff) ); printf("%s/0x%04x\n", buff, fs->flow_mask.dst_port); printf("BKT ___Prot___ _flow-id_ " "______________Source IPv6/port_______________ " "_______________Dest. IPv6/port_______________ " "Tot_pkt/bytes Pkt/Byte Drp\n"); } printf("%3d ", q[l].hash_slot); pe = getprotobynumber(q[l].id.proto); if (pe != NULL) printf("%9s ", pe->p_name); else printf("%9u ", q[l].id.proto); printf("%7d %39s/%-5d ", q[l].id.flow_id6, inet_ntop(AF_INET6, &(q[l].id.src_ip6), buff, sizeof(buff)), q[l].id.src_port); printf(" %39s/%-5d ", inet_ntop(AF_INET6, &(q[l].id.dst_ip6), buff, sizeof(buff)), q[l].id.dst_port); printf(" %4llu %8llu %2u %4u %3u\n", align_uint64(&q[l].tot_pkts), align_uint64(&q[l].tot_bytes), q[l].len, q[l].len_bytes, q[l].drops); if (co.verbose) printf(" S %20llu F %20llu\n", align_uint64(&q[l].S), align_uint64(&q[l].F)); } } static void print_flowset_parms(struct dn_flow_set *fs, char *prefix) { int l; char qs[30]; char plr[30]; char red[90]; /* Display RED parameters */ l = fs->qsize; if (fs->flags_fs & DN_QSIZE_IS_BYTES) { if (l >= 8192) sprintf(qs, "%d KB", l / 1024); else sprintf(qs, "%d B", l); } else sprintf(qs, "%3d sl.", l); if (fs->plr) sprintf(plr, "plr %f", 1.0 * fs->plr / (double)(0x7fffffff)); else plr[0] = '\0'; if (fs->flags_fs & DN_IS_RED) /* RED parameters */ sprintf(red, "\n\t %cRED w_q %f min_th %d max_th %d max_p %f", (fs->flags_fs & DN_IS_GENTLE_RED) ? 'G' : ' ', 1.0 * fs->w_q / (double)(1 << SCALE_RED), SCALE_VAL(fs->min_th), SCALE_VAL(fs->max_th), 1.0 * fs->max_p / (double)(1 << SCALE_RED)); else sprintf(red, "droptail"); printf("%s %s%s %d queues (%d buckets) %s\n", prefix, qs, plr, fs->rq_elements, fs->rq_size, red); } static void print_extra_delay_parms(struct dn_pipe *p) { double loss; if (p->samples_no <= 0) return; loss = p->loss_level; loss /= p->samples_no; printf("\t profile: name \"%s\" loss %f samples %d\n", p->name, loss, p->samples_no); } void ipfw_list_pipes(void *data, uint nbytes, int ac, char *av[]) { int rulenum; void *next = data; struct dn_pipe *p = (struct dn_pipe *) data; struct dn_flow_set *fs; struct dn_flow_queue *q; int l; if (ac > 0) rulenum = strtoul(*av++, NULL, 10); else rulenum = 0; for (; nbytes >= sizeof *p; p = (struct dn_pipe *)next) { double b = p->bandwidth; char buf[30]; char prefix[80]; char burst[5 + 7]; if (SLIST_NEXT(p, next) != (struct dn_pipe *)DN_IS_PIPE) break; /* done with pipes, now queues */ /* * compute length, as pipe have variable size */ l = sizeof(*p) + p->fs.rq_elements * sizeof(*q); next = (char *)p + l; nbytes -= l; if ((rulenum != 0 && rulenum != p->pipe_nr) || co.do_pipe == 2) continue; /* * Print rate (or clocking interface) */ if (p->if_name[0] != '\0') sprintf(buf, "%s", p->if_name); else if (b == 0) sprintf(buf, "unlimited"); else if (b >= 1000000) sprintf(buf, "%7.3f Mbit/s", b/1000000); else if (b >= 1000) sprintf(buf, "%7.3f Kbit/s", b/1000); else sprintf(buf, "%7.3f bit/s ", b); sprintf(prefix, "%05d: %s %4d ms ", p->pipe_nr, buf, p->delay); print_flowset_parms(&(p->fs), prefix); if (humanize_number(burst, sizeof(burst), p->burst, "Byte", HN_AUTOSCALE, 0) < 0 || co.verbose) printf("\t burst: %ju Byte\n", p->burst); else printf("\t burst: %s\n", burst); print_extra_delay_parms(p); q = (struct dn_flow_queue *)(p+1); list_queues(&(p->fs), q); } for (fs = next; nbytes >= sizeof *fs; fs = next) { char prefix[80]; if (SLIST_NEXT(fs, next) != (struct dn_flow_set *)DN_IS_QUEUE) break; l = sizeof(*fs) + fs->rq_elements * sizeof(*q); next = (char *)fs + l; nbytes -= l; if (rulenum != 0 && ((rulenum != fs->fs_nr && co.do_pipe == 2) || (rulenum != fs->parent_nr && co.do_pipe == 1))) { continue; } q = (struct dn_flow_queue *)(fs+1); sprintf(prefix, "q%05d: weight %d pipe %d ", fs->fs_nr, fs->weight, fs->parent_nr); print_flowset_parms(fs, prefix); list_queues(fs, q); } } /* * Delete pipe or queue i */ int ipfw_delete_pipe(int pipe_or_queue, int i) { struct dn_pipe p; memset(&p, 0, sizeof p); if (pipe_or_queue == 1) p.pipe_nr = i; /* pipe */ else p.fs.fs_nr = i; /* queue */ i = do_cmd(IP_DUMMYNET_DEL, &p, sizeof p); if (i) { i = 1; warn("rule %u: setsockopt(IP_DUMMYNET_DEL)", i); } return i; } /* * Code to parse delay profiles. * * Some link types introduce extra delays in the transmission * of a packet, e.g. because of MAC level framing, contention on * the use of the channel, MAC level retransmissions and so on. * From our point of view, the channel is effectively unavailable * for this extra time, which is constant or variable depending * on the link type. Additionally, packets may be dropped after this * time (e.g. on a wireless link after too many retransmissions). * We can model the additional delay with an empirical curve * that represents its distribution. * * cumulative probability * 1.0 ^ * | * L +-- loss-level x * | ****** * | * * | ***** * | * * | ** * | * * +-------*-------------------> * delay * * The empirical curve may have both vertical and horizontal lines. * Vertical lines represent constant delay for a range of * probabilities; horizontal lines correspond to a discontinuty * in the delay distribution: the pipe will use the largest delay * for a given probability. * * To pass the curve to dummynet, we must store the parameters * in a file as described below, and issue the command * * ipfw pipe config ... bw XXX profile ... * * The file format is the following, with whitespace acting as * a separator and '#' indicating the beginning a comment: * * samples N * the number of samples used in the internal * representation (2..1024; default 100); * * loss-level L * The probability above which packets are lost. * (0.0 <= L <= 1.0, default 1.0 i.e. no loss); * * name identifier * Optional a name (listed by "ipfw pipe show") * to identify the distribution; * * "delay prob" | "prob delay" * One of these two lines is mandatory and defines * the format of the following lines with data points. * * XXX YYY * 2 or more lines representing points in the curve, * with either delay or probability first, according * to the chosen format. * The unit for delay is milliseconds. * * Data points does not need to be ordered or equal to the number * specified in the "samples" line. ipfw will sort and interpolate * the curve as needed. * * Example of a profile file: name bla_bla_bla samples 100 loss-level 0.86 prob delay 0 200 # minimum overhead is 200ms 0.5 200 0.5 300 0.8 1000 0.9 1300 1 1300 * Internally, we will convert the curve to a fixed number of * samples, and when it is time to transmit a packet we will * model the extra delay as extra bits in the packet. * */ #define ED_MAX_LINE_LEN 256+ED_MAX_NAME_LEN #define ED_TOK_SAMPLES "samples" #define ED_TOK_LOSS "loss-level" #define ED_TOK_NAME "name" #define ED_TOK_DELAY "delay" #define ED_TOK_PROB "prob" #define ED_TOK_BW "bw" #define ED_SEPARATORS " \t\n" #define ED_MIN_SAMPLES_NO 2 /* * returns 1 if s is a non-negative number, with at least one '.' */ static int is_valid_number(const char *s) { int i, dots_found = 0; int len = strlen(s); for (i = 0; i 1)) return 0; return 1; } /* * Take as input a string describing a bandwidth value * and return the numeric bandwidth value. * set clocking interface or bandwidth value */ static void read_bandwidth(char *arg, int *bandwidth, char *if_name, int namelen) { if (*bandwidth != -1) warn("duplicate token, override bandwidth value!"); if (arg[0] >= 'a' && arg[0] <= 'z') { if (namelen >= IFNAMSIZ) warn("interface name truncated"); namelen--; /* interface name */ strncpy(if_name, arg, namelen); if_name[namelen] = '\0'; *bandwidth = 0; } else { /* read bandwidth value */ int bw; char *end = NULL; bw = strtoul(arg, &end, 0); if (*end == 'K' || *end == 'k') { end++; bw *= 1000; } else if (*end == 'M') { end++; bw *= 1000000; } if ((*end == 'B' && _substrcmp2(end, "Bi", "Bit/s") != 0) || _substrcmp2(end, "by", "bytes") == 0) bw *= 8; if (bw < 0) errx(EX_DATAERR, "bandwidth too large"); *bandwidth = bw; if_name[0] = '\0'; } } struct point { double prob; double delay; }; static int compare_points(const void *vp1, const void *vp2) { const struct point *p1 = vp1; const struct point *p2 = vp2; double res = 0; res = p1->prob - p2->prob; if (res == 0) res = p1->delay - p2->delay; if (res < 0) return -1; else if (res > 0) return 1; else return 0; } #define ED_EFMT(s) EX_DATAERR,"error in %s at line %d: "#s,filename,lineno static void load_extra_delays(const char *filename, struct dn_pipe *p) { char line[ED_MAX_LINE_LEN]; FILE *f; int lineno = 0; int i; int samples = -1; double loss = -1.0; char profile_name[ED_MAX_NAME_LEN]; int delay_first = -1; int do_points = 0; struct point points[ED_MAX_SAMPLES_NO]; int points_no = 0; profile_name[0] = '\0'; f = fopen(filename, "r"); if (f == NULL) err(EX_UNAVAILABLE, "fopen: %s", filename); while (fgets(line, ED_MAX_LINE_LEN, f)) { /* read commands */ char *s, *cur = line, *name = NULL, *arg = NULL; ++lineno; /* parse the line */ while (cur) { s = strsep(&cur, ED_SEPARATORS); if (s == NULL || *s == '#') break; if (*s == '\0') continue; if (arg) errx(ED_EFMT("too many arguments")); if (name == NULL) name = s; else arg = s; } if (name == NULL) /* empty line */ continue; if (arg == NULL) errx(ED_EFMT("missing arg for %s"), name); if (!strcasecmp(name, ED_TOK_SAMPLES)) { if (samples > 0) errx(ED_EFMT("duplicate ``samples'' line")); if (atoi(arg) <=0) errx(ED_EFMT("invalid number of samples")); samples = atoi(arg); if (samples>ED_MAX_SAMPLES_NO) errx(ED_EFMT("too many samples, maximum is %d"), ED_MAX_SAMPLES_NO); do_points = 0; } else if (!strcasecmp(name, ED_TOK_BW)) { read_bandwidth(arg, &p->bandwidth, p->if_name, sizeof(p->if_name)); } else if (!strcasecmp(name, ED_TOK_LOSS)) { if (loss != -1.0) errx(ED_EFMT("duplicated token: %s"), name); if (!is_valid_number(arg)) errx(ED_EFMT("invalid %s"), arg); loss = atof(arg); if (loss > 1) errx(ED_EFMT("%s greater than 1.0"), name); do_points = 0; } else if (!strcasecmp(name, ED_TOK_NAME)) { if (profile_name[0] != '\0') errx(ED_EFMT("duplicated token: %s"), name); strncpy(profile_name, arg, sizeof(profile_name) - 1); profile_name[sizeof(profile_name)-1] = '\0'; do_points = 0; } else if (!strcasecmp(name, ED_TOK_DELAY)) { if (do_points) errx(ED_EFMT("duplicated token: %s"), name); delay_first = 1; do_points = 1; } else if (!strcasecmp(name, ED_TOK_PROB)) { if (do_points) errx(ED_EFMT("duplicated token: %s"), name); delay_first = 0; do_points = 1; } else if (do_points) { if (!is_valid_number(name) || !is_valid_number(arg)) errx(ED_EFMT("invalid point found")); if (delay_first) { points[points_no].delay = atof(name); points[points_no].prob = atof(arg); } else { points[points_no].delay = atof(arg); points[points_no].prob = atof(name); } if (points[points_no].prob > 1.0) errx(ED_EFMT("probability greater than 1.0")); ++points_no; } else { errx(ED_EFMT("unrecognised command '%s'"), name); } } fclose (f); if (samples == -1) { warnx("'%s' not found, assuming 100", ED_TOK_SAMPLES); samples = 100; } if (loss == -1.0) { warnx("'%s' not found, assuming no loss", ED_TOK_LOSS); loss = 1; } /* make sure that there are enough points. */ if (points_no < ED_MIN_SAMPLES_NO) errx(ED_EFMT("too few samples, need at least %d"), ED_MIN_SAMPLES_NO); qsort(points, points_no, sizeof(struct point), compare_points); /* interpolation */ for (i = 0; isamples[index] = x1; } else { double m = (y2-y1)/(x2-x1); double c = y1 - m*x1; for (; indexsamples[index] = (index - c)/m; } } p->samples_no = samples; p->loss_level = loss * samples; strncpy(p->name, profile_name, sizeof(p->name)); } void ipfw_config_pipe(int ac, char **av) { int samples[ED_MAX_SAMPLES_NO]; struct dn_pipe p; int i; char *end; void *par = NULL; memset(&p, 0, sizeof p); p.bandwidth = -1; av++; ac--; /* Pipe number */ if (ac && isdigit(**av)) { i = atoi(*av); av++; ac--; if (co.do_pipe == 1) p.pipe_nr = i; else p.fs.fs_nr = i; } while (ac > 0) { double d; int tok = match_token(dummynet_params, *av); ac--; av++; switch(tok) { case TOK_NOERROR: p.fs.flags_fs |= DN_NOERROR; break; case TOK_PLR: NEED1("plr needs argument 0..1\n"); d = strtod(av[0], NULL); if (d > 1) d = 1; else if (d < 0) d = 0; p.fs.plr = (int)(d*0x7fffffff); ac--; av++; break; case TOK_QUEUE: NEED1("queue needs queue size\n"); end = NULL; p.fs.qsize = strtoul(av[0], &end, 0); if (*end == 'K' || *end == 'k') { p.fs.flags_fs |= DN_QSIZE_IS_BYTES; p.fs.qsize *= 1024; } else if (*end == 'B' || _substrcmp2(end, "by", "bytes") == 0) { p.fs.flags_fs |= DN_QSIZE_IS_BYTES; } ac--; av++; break; case TOK_BUCKETS: NEED1("buckets needs argument\n"); p.fs.rq_size = strtoul(av[0], NULL, 0); ac--; av++; break; case TOK_MASK: NEED1("mask needs mask specifier\n"); /* * per-flow queue, mask is dst_ip, dst_port, * src_ip, src_port, proto measured in bits */ par = NULL; bzero(&p.fs.flow_mask, sizeof(p.fs.flow_mask)); end = NULL; while (ac >= 1) { uint32_t *p32 = NULL; uint16_t *p16 = NULL; uint32_t *p20 = NULL; struct in6_addr *pa6 = NULL; uint32_t a; tok = match_token(dummynet_params, *av); ac--; av++; switch(tok) { case TOK_ALL: /* * special case, all bits significant */ p.fs.flow_mask.dst_ip = ~0; p.fs.flow_mask.src_ip = ~0; p.fs.flow_mask.dst_port = ~0; p.fs.flow_mask.src_port = ~0; p.fs.flow_mask.proto = ~0; n2mask(&(p.fs.flow_mask.dst_ip6), 128); n2mask(&(p.fs.flow_mask.src_ip6), 128); p.fs.flow_mask.flow_id6 = ~0; p.fs.flags_fs |= DN_HAVE_FLOW_MASK; goto end_mask; case TOK_DSTIP: p32 = &p.fs.flow_mask.dst_ip; break; case TOK_SRCIP: p32 = &p.fs.flow_mask.src_ip; break; case TOK_DSTIP6: pa6 = &(p.fs.flow_mask.dst_ip6); break; case TOK_SRCIP6: pa6 = &(p.fs.flow_mask.src_ip6); break; case TOK_FLOWID: p20 = &p.fs.flow_mask.flow_id6; break; case TOK_DSTPORT: p16 = &p.fs.flow_mask.dst_port; break; case TOK_SRCPORT: p16 = &p.fs.flow_mask.src_port; break; case TOK_PROTO: break; default: ac++; av--; /* backtrack */ goto end_mask; } if (ac < 1) errx(EX_USAGE, "mask: value missing"); if (*av[0] == '/') { a = strtoul(av[0]+1, &end, 0); if (pa6 == NULL) a = (a == 32) ? ~0 : (1 << a) - 1; } else a = strtoul(av[0], &end, 0); if (p32 != NULL) *p32 = a; else if (p16 != NULL) { if (a > 0xFFFF) errx(EX_DATAERR, "port mask must be 16 bit"); *p16 = (uint16_t)a; } else if (p20 != NULL) { if (a > 0xfffff) errx(EX_DATAERR, "flow_id mask must be 20 bit"); *p20 = (uint32_t)a; } else if (pa6 != NULL) { if (a > 128) errx(EX_DATAERR, "in6addr invalid mask len"); else n2mask(pa6, a); } else { if (a > 0xFF) errx(EX_DATAERR, "proto mask must be 8 bit"); p.fs.flow_mask.proto = (uint8_t)a; } if (a != 0) p.fs.flags_fs |= DN_HAVE_FLOW_MASK; ac--; av++; } /* end while, config masks */ end_mask: break; case TOK_RED: case TOK_GRED: NEED1("red/gred needs w_q/min_th/max_th/max_p\n"); p.fs.flags_fs |= DN_IS_RED; if (tok == TOK_GRED) p.fs.flags_fs |= DN_IS_GENTLE_RED; /* * the format for parameters is w_q/min_th/max_th/max_p */ if ((end = strsep(&av[0], "/"))) { double w_q = strtod(end, NULL); if (w_q > 1 || w_q <= 0) errx(EX_DATAERR, "0 < w_q <= 1"); p.fs.w_q = (int) (w_q * (1 << SCALE_RED)); } if ((end = strsep(&av[0], "/"))) { p.fs.min_th = strtoul(end, &end, 0); if (*end == 'K' || *end == 'k') p.fs.min_th *= 1024; } if ((end = strsep(&av[0], "/"))) { p.fs.max_th = strtoul(end, &end, 0); if (*end == 'K' || *end == 'k') p.fs.max_th *= 1024; } if ((end = strsep(&av[0], "/"))) { double max_p = strtod(end, NULL); if (max_p > 1 || max_p <= 0) errx(EX_DATAERR, "0 < max_p <= 1"); p.fs.max_p = (int)(max_p * (1 << SCALE_RED)); } ac--; av++; break; case TOK_DROPTAIL: p.fs.flags_fs &= ~(DN_IS_RED|DN_IS_GENTLE_RED); break; case TOK_BW: NEED1("bw needs bandwidth or interface\n"); if (co.do_pipe != 1) errx(EX_DATAERR, "bandwidth only valid for pipes"); read_bandwidth(av[0], &p.bandwidth, p.if_name, sizeof(p.if_name)); ac--; av++; break; case TOK_DELAY: if (co.do_pipe != 1) errx(EX_DATAERR, "delay only valid for pipes"); NEED1("delay needs argument 0..10000ms\n"); p.delay = strtoul(av[0], NULL, 0); ac--; av++; break; case TOK_WEIGHT: if (co.do_pipe == 1) errx(EX_DATAERR,"weight only valid for queues"); NEED1("weight needs argument 0..100\n"); p.fs.weight = strtoul(av[0], &end, 0); ac--; av++; break; case TOK_PIPE: if (co.do_pipe == 1) errx(EX_DATAERR,"pipe only valid for queues"); NEED1("pipe needs pipe_number\n"); p.fs.parent_nr = strtoul(av[0], &end, 0); ac--; av++; break; case TOK_PIPE_PROFILE: if (co.do_pipe != 1) errx(EX_DATAERR, "extra delay only valid for pipes"); NEED1("extra delay needs the file name\n"); p.samples = &samples[0]; load_extra_delays(av[0], &p); --ac; ++av; break; case TOK_BURST: if (co.do_pipe != 1) errx(EX_DATAERR, "burst only valid for pipes"); NEED1("burst needs argument\n"); errno = 0; if (expand_number(av[0], (int64_t *)&p.burst) < 0) if (errno != ERANGE) errx(EX_DATAERR, "burst: invalid argument"); if (errno || p.burst > (1ULL << 48) - 1) errx(EX_DATAERR, "burst: out of range (0..2^48-1)"); ac--; av++; break; default: errx(EX_DATAERR, "unrecognised option ``%s''", av[-1]); } } if (co.do_pipe == 1) { if (p.pipe_nr == 0) errx(EX_DATAERR, "pipe_nr must be > 0"); if (p.delay > 10000) errx(EX_DATAERR, "delay must be < 10000"); } else { /* co.do_pipe == 2, queue */ if (p.fs.parent_nr == 0) errx(EX_DATAERR, "pipe must be > 0"); if (p.fs.weight >100) errx(EX_DATAERR, "weight must be <= 100"); } /* check for bandwidth value */ if (p.bandwidth == -1) { p.bandwidth = 0; if (p.samples_no > 0) errx(EX_DATAERR, "profile requires a bandwidth limit"); } if (p.fs.flags_fs & DN_QSIZE_IS_BYTES) { size_t len; long limit; len = sizeof(limit); if (sysctlbyname("net.inet.ip.dummynet.pipe_byte_limit", &limit, &len, NULL, 0) == -1) limit = 1024*1024; if (p.fs.qsize > limit) errx(EX_DATAERR, "queue size must be < %ldB", limit); } else { size_t len; long limit; len = sizeof(limit); if (sysctlbyname("net.inet.ip.dummynet.pipe_slot_limit", &limit, &len, NULL, 0) == -1) limit = 100; if (p.fs.qsize > limit) errx(EX_DATAERR, "2 <= queue size <= %ld", limit); } if (p.fs.flags_fs & DN_IS_RED) { size_t len; int lookup_depth, avg_pkt_size; double s, idle, weight, w_q; struct clockinfo ck; int t; if (p.fs.min_th >= p.fs.max_th) errx(EX_DATAERR, "min_th %d must be < than max_th %d", p.fs.min_th, p.fs.max_th); if (p.fs.max_th == 0) errx(EX_DATAERR, "max_th must be > 0"); len = sizeof(int); if (sysctlbyname("net.inet.ip.dummynet.red_lookup_depth", &lookup_depth, &len, NULL, 0) == -1) errx(1, "sysctlbyname(\"%s\")", "net.inet.ip.dummynet.red_lookup_depth"); if (lookup_depth == 0) errx(EX_DATAERR, "net.inet.ip.dummynet.red_lookup_depth" " must be greater than zero"); len = sizeof(int); if (sysctlbyname("net.inet.ip.dummynet.red_avg_pkt_size", &avg_pkt_size, &len, NULL, 0) == -1) errx(1, "sysctlbyname(\"%s\")", "net.inet.ip.dummynet.red_avg_pkt_size"); if (avg_pkt_size == 0) errx(EX_DATAERR, "net.inet.ip.dummynet.red_avg_pkt_size must" " be greater than zero"); len = sizeof(struct clockinfo); if (sysctlbyname("kern.clockrate", &ck, &len, NULL, 0) == -1) errx(1, "sysctlbyname(\"%s\")", "kern.clockrate"); /* * Ticks needed for sending a medium-sized packet. * Unfortunately, when we are configuring a WF2Q+ queue, we * do not have bandwidth information, because that is stored * in the parent pipe, and also we have multiple queues * competing for it. So we set s=0, which is not very * correct. But on the other hand, why do we want RED with * WF2Q+ ? */ if (p.bandwidth==0) /* this is a WF2Q+ queue */ s = 0; else s = (double)ck.hz * avg_pkt_size * 8 / p.bandwidth; /* * max idle time (in ticks) before avg queue size becomes 0. * NOTA: (3/w_q) is approx the value x so that * (1-w_q)^x < 10^-3. */ w_q = ((double)p.fs.w_q) / (1 << SCALE_RED); idle = s * 3. / w_q; p.fs.lookup_step = (int)idle / lookup_depth; if (!p.fs.lookup_step) p.fs.lookup_step = 1; weight = 1 - w_q; for (t = p.fs.lookup_step; t > 1; --t) weight *= 1 - w_q; p.fs.lookup_weight = (int)(weight * (1 << SCALE_RED)); } if (p.samples_no <= 0) { i = do_cmd(IP_DUMMYNET_CONFIGURE, &p, sizeof p); } else { struct dn_pipe_max pm; int len = sizeof(pm); memcpy(&pm.pipe, &p, sizeof(pm.pipe)); memcpy(&pm.samples, samples, sizeof(pm.samples)); i = do_cmd(IP_DUMMYNET_CONFIGURE, &pm, len); } if (i) err(1, "setsockopt(%s)", "IP_DUMMYNET_CONFIGURE"); } ipfw_mod/ipfw/nat.c000644 000423 000000 00000056243 11153706043 015025 0ustar00luigiwheel000000 000000 /* * Copyright (c) 2002-2003 Luigi Rizzo * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp * Copyright (c) 1994 Ugen J.S.Antsilevich * * Idea and grammar partially left from: * Copyright (c) 1993 Daniel Boulet * * Redistribution and use in source forms, with and without modification, * are permitted provided that this entire comment appears intact. * * Redistribution in binary form may occur without any restrictions. * Obviously, it would be nice if you gave credit where credit is due * but requiring it would be too onerous. * * This software is provided ``AS IS'' without any warranties of any kind. * * NEW command line interface for IP firewall facility * * $FreeBSD: head/sbin/ipfw/nat.c 187770 2009-01-27 12:01:30Z luigi $ * * In-kernel nat support */ #include #include #include #include "ipfw2.h" #include #include #include #include #include #include #include #define IPFW_INTERNAL /* Access to protected structures in ip_fw.h. */ #include #include #include /* def. of struct route */ #include #include #include #include static struct _s_x nat_params[] = { { "ip", TOK_IP }, { "if", TOK_IF }, { "log", TOK_ALOG }, { "deny_in", TOK_DENY_INC }, { "same_ports", TOK_SAME_PORTS }, { "unreg_only", TOK_UNREG_ONLY }, { "reset", TOK_RESET_ADDR }, { "reverse", TOK_ALIAS_REV }, { "proxy_only", TOK_PROXY_ONLY }, { "redirect_addr", TOK_REDIR_ADDR }, { "redirect_port", TOK_REDIR_PORT }, { "redirect_proto", TOK_REDIR_PROTO }, { NULL, 0 } /* terminator */ }; /* * Search for interface with name "ifn", and fill n accordingly: * * n->ip ip address of interface "ifn" * n->if_name copy of interface name "ifn" */ static void set_addr_dynamic(const char *ifn, struct cfg_nat *n) { size_t needed; int mib[6]; char *buf, *lim, *next; struct if_msghdr *ifm; struct ifa_msghdr *ifam; struct sockaddr_dl *sdl; struct sockaddr_in *sin; int ifIndex, ifMTU; mib[0] = CTL_NET; mib[1] = PF_ROUTE; mib[2] = 0; mib[3] = AF_INET; mib[4] = NET_RT_IFLIST; mib[5] = 0; /* * Get interface data. */ if (sysctl(mib, 6, NULL, &needed, NULL, 0) == -1) err(1, "iflist-sysctl-estimate"); buf = safe_calloc(1, needed); if (sysctl(mib, 6, buf, &needed, NULL, 0) == -1) err(1, "iflist-sysctl-get"); lim = buf + needed; /* * Loop through interfaces until one with * given name is found. This is done to * find correct interface index for routing * message processing. */ ifIndex = 0; next = buf; while (next < lim) { ifm = (struct if_msghdr *)next; next += ifm->ifm_msglen; if (ifm->ifm_version != RTM_VERSION) { if (co.verbose) warnx("routing message version %d " "not understood", ifm->ifm_version); continue; } if (ifm->ifm_type == RTM_IFINFO) { sdl = (struct sockaddr_dl *)(ifm + 1); if (strlen(ifn) == sdl->sdl_nlen && strncmp(ifn, sdl->sdl_data, sdl->sdl_nlen) == 0) { ifIndex = ifm->ifm_index; ifMTU = ifm->ifm_data.ifi_mtu; break; } } } if (!ifIndex) errx(1, "unknown interface name %s", ifn); /* * Get interface address. */ sin = NULL; while (next < lim) { ifam = (struct ifa_msghdr *)next; next += ifam->ifam_msglen; if (ifam->ifam_version != RTM_VERSION) { if (co.verbose) warnx("routing message version %d " "not understood", ifam->ifam_version); continue; } if (ifam->ifam_type != RTM_NEWADDR) break; if (ifam->ifam_addrs & RTA_IFA) { int i; char *cp = (char *)(ifam + 1); for (i = 1; i < RTA_IFA; i <<= 1) { if (ifam->ifam_addrs & i) cp += SA_SIZE((struct sockaddr *)cp); } if (((struct sockaddr *)cp)->sa_family == AF_INET) { sin = (struct sockaddr_in *)cp; break; } } } if (sin == NULL) errx(1, "%s: cannot get interface address", ifn); n->ip = sin->sin_addr; strncpy(n->if_name, ifn, IF_NAMESIZE); free(buf); } /* * XXX - The following functions, macros and definitions come from natd.c: * it would be better to move them outside natd.c, in a file * (redirect_support.[ch]?) shared by ipfw and natd, but for now i can live * with it. */ /* * Definition of a port range, and macros to deal with values. * FORMAT: HI 16-bits == first port in range, 0 == all ports. * LO 16-bits == number of ports in range * NOTES: - Port values are not stored in network byte order. */ #define port_range u_long #define GETLOPORT(x) ((x) >> 0x10) #define GETNUMPORTS(x) ((x) & 0x0000ffff) #define GETHIPORT(x) (GETLOPORT((x)) + GETNUMPORTS((x))) /* Set y to be the low-port value in port_range variable x. */ #define SETLOPORT(x,y) ((x) = ((x) & 0x0000ffff) | ((y) << 0x10)) /* Set y to be the number of ports in port_range variable x. */ #define SETNUMPORTS(x,y) ((x) = ((x) & 0xffff0000) | (y)) static void StrToAddr (const char* str, struct in_addr* addr) { struct hostent* hp; if (inet_aton (str, addr)) return; hp = gethostbyname (str); if (!hp) errx (1, "unknown host %s", str); memcpy (addr, hp->h_addr, sizeof (struct in_addr)); } static int StrToPortRange (const char* str, const char* proto, port_range *portRange) { char* sep; struct servent* sp; char* end; u_short loPort; u_short hiPort; /* First see if this is a service, return corresponding port if so. */ sp = getservbyname (str,proto); if (sp) { SETLOPORT(*portRange, ntohs(sp->s_port)); SETNUMPORTS(*portRange, 1); return 0; } /* Not a service, see if it's a single port or port range. */ sep = strchr (str, '-'); if (sep == NULL) { SETLOPORT(*portRange, strtol(str, &end, 10)); if (end != str) { /* Single port. */ SETNUMPORTS(*portRange, 1); return 0; } /* Error in port range field. */ errx (EX_DATAERR, "%s/%s: unknown service", str, proto); } /* Port range, get the values and sanity check. */ sscanf (str, "%hu-%hu", &loPort, &hiPort); SETLOPORT(*portRange, loPort); SETNUMPORTS(*portRange, 0); /* Error by default */ if (loPort <= hiPort) SETNUMPORTS(*portRange, hiPort - loPort + 1); if (GETNUMPORTS(*portRange) == 0) errx (EX_DATAERR, "invalid port range %s", str); return 0; } static int StrToProto (const char* str) { if (!strcmp (str, "tcp")) return IPPROTO_TCP; if (!strcmp (str, "udp")) return IPPROTO_UDP; if (!strcmp (str, "sctp")) return IPPROTO_SCTP; errx (EX_DATAERR, "unknown protocol %s. Expected sctp, tcp or udp", str); } static int StrToAddrAndPortRange (const char* str, struct in_addr* addr, char* proto, port_range *portRange) { char* ptr; ptr = strchr (str, ':'); if (!ptr) errx (EX_DATAERR, "%s is missing port number", str); *ptr = '\0'; ++ptr; StrToAddr (str, addr); return StrToPortRange (ptr, proto, portRange); } /* End of stuff taken from natd.c. */ #define INC_ARGCV() do { \ (*_av)++; \ (*_ac)--; \ av = *_av; \ ac = *_ac; \ } while(0) /* * The next 3 functions add support for the addr, port and proto redirect and * their logic is loosely based on SetupAddressRedirect(), SetupPortRedirect() * and SetupProtoRedirect() from natd.c. * * Every setup_* function fills at least one redirect entry * (struct cfg_redir) and zero or more server pool entry (struct cfg_spool) * in buf. * * The format of data in buf is: * * * cfg_nat cfg_redir cfg_spool ...... cfg_spool * * ------------------------------------- ------------ * | | .....X ... | | | | ..... * ------------------------------------- ...... ------------ * ^ * spool_cnt n=0 ...... n=(X-1) * * len points to the amount of available space in buf * space counts the memory consumed by every function * * XXX - Every function get all the argv params so it * has to check, in optional parameters, that the next * args is a valid option for the redir entry and not * another token. Only redir_port and redir_proto are * affected by this. */ static int setup_redir_addr(char *spool_buf, unsigned int len, int *_ac, char ***_av) { char **av, *sep; /* Token separator. */ /* Temporary buffer used to hold server pool ip's. */ char tmp_spool_buf[NAT_BUF_LEN]; int ac, space, lsnat; struct cfg_redir *r; struct cfg_spool *tmp; av = *_av; ac = *_ac; space = 0; lsnat = 0; if (len >= SOF_REDIR) { r = (struct cfg_redir *)spool_buf; /* Skip cfg_redir at beginning of buf. */ spool_buf = &spool_buf[SOF_REDIR]; space = SOF_REDIR; len -= SOF_REDIR; } else goto nospace; r->mode = REDIR_ADDR; /* Extract local address. */ if (ac == 0) errx(EX_DATAERR, "redirect_addr: missing local address"); sep = strchr(*av, ','); if (sep) { /* LSNAT redirection syntax. */ r->laddr.s_addr = INADDR_NONE; /* Preserve av, copy spool servers to tmp_spool_buf. */ strncpy(tmp_spool_buf, *av, strlen(*av)+1); lsnat = 1; } else StrToAddr(*av, &r->laddr); INC_ARGCV(); /* Extract public address. */ if (ac == 0) errx(EX_DATAERR, "redirect_addr: missing public address"); StrToAddr(*av, &r->paddr); INC_ARGCV(); /* Setup LSNAT server pool. */ if (sep) { sep = strtok(tmp_spool_buf, ","); while (sep != NULL) { tmp = (struct cfg_spool *)spool_buf; if (len < SOF_SPOOL) goto nospace; len -= SOF_SPOOL; space += SOF_SPOOL; StrToAddr(sep, &tmp->addr); tmp->port = ~0; r->spool_cnt++; /* Point to the next possible cfg_spool. */ spool_buf = &spool_buf[SOF_SPOOL]; sep = strtok(NULL, ","); } } return(space); nospace: errx(EX_DATAERR, "redirect_addr: buf is too small\n"); } static int setup_redir_port(char *spool_buf, unsigned int len, int *_ac, char ***_av) { char **av, *sep, *protoName; char tmp_spool_buf[NAT_BUF_LEN]; int ac, space, lsnat; struct cfg_redir *r; struct cfg_spool *tmp; u_short numLocalPorts; port_range portRange; av = *_av; ac = *_ac; space = 0; lsnat = 0; numLocalPorts = 0; if (len >= SOF_REDIR) { r = (struct cfg_redir *)spool_buf; /* Skip cfg_redir at beginning of buf. */ spool_buf = &spool_buf[SOF_REDIR]; space = SOF_REDIR; len -= SOF_REDIR; } else goto nospace; r->mode = REDIR_PORT; /* * Extract protocol. */ if (ac == 0) errx (EX_DATAERR, "redirect_port: missing protocol"); r->proto = StrToProto(*av); protoName = *av; INC_ARGCV(); /* * Extract local address. */ if (ac == 0) errx (EX_DATAERR, "redirect_port: missing local address"); sep = strchr(*av, ','); /* LSNAT redirection syntax. */ if (sep) { r->laddr.s_addr = INADDR_NONE; r->lport = ~0; numLocalPorts = 1; /* Preserve av, copy spool servers to tmp_spool_buf. */ strncpy(tmp_spool_buf, *av, strlen(*av)+1); lsnat = 1; } else { /* * The sctp nat does not allow the port numbers to be mapped to * new port numbers. Therefore, no ports are to be specified * in the target port field. */ if (r->proto == IPPROTO_SCTP) { if (strchr (*av, ':')) errx(EX_DATAERR, "redirect_port:" "port numbers do not change in sctp, so do not " "specify them as part of the target"); else StrToAddr(*av, &r->laddr); } else { if (StrToAddrAndPortRange (*av, &r->laddr, protoName, &portRange) != 0) errx(EX_DATAERR, "redirect_port:" "invalid local port range"); r->lport = GETLOPORT(portRange); numLocalPorts = GETNUMPORTS(portRange); } } INC_ARGCV(); /* * Extract public port and optionally address. */ if (ac == 0) errx (EX_DATAERR, "redirect_port: missing public port"); sep = strchr (*av, ':'); if (sep) { if (StrToAddrAndPortRange (*av, &r->paddr, protoName, &portRange) != 0) errx(EX_DATAERR, "redirect_port:" "invalid public port range"); } else { r->paddr.s_addr = INADDR_ANY; if (StrToPortRange (*av, protoName, &portRange) != 0) errx(EX_DATAERR, "redirect_port:" "invalid public port range"); } r->pport = GETLOPORT(portRange); if (r->proto == IPPROTO_SCTP) { /* so the logic below still works */ numLocalPorts = GETNUMPORTS(portRange); r->lport = r->pport; } r->pport_cnt = GETNUMPORTS(portRange); INC_ARGCV(); /* * Extract remote address and optionally port. */ /* * NB: isalpha(**av) => we've to check that next parameter is really an * option for this redirect entry, else stop here processing arg[cv]. */ if (ac != 0 && !isalpha(**av)) { sep = strchr (*av, ':'); if (sep) { if (StrToAddrAndPortRange (*av, &r->raddr, protoName, &portRange) != 0) errx(EX_DATAERR, "redirect_port:" "invalid remote port range"); } else { SETLOPORT(portRange, 0); SETNUMPORTS(portRange, 1); StrToAddr (*av, &r->raddr); } INC_ARGCV(); } else { SETLOPORT(portRange, 0); SETNUMPORTS(portRange, 1); r->raddr.s_addr = INADDR_ANY; } r->rport = GETLOPORT(portRange); r->rport_cnt = GETNUMPORTS(portRange); /* * Make sure port ranges match up, then add the redirect ports. */ if (numLocalPorts != r->pport_cnt) errx(EX_DATAERR, "redirect_port:" "port ranges must be equal in size"); /* Remote port range is allowed to be '0' which means all ports. */ if (r->rport_cnt != numLocalPorts && (r->rport_cnt != 1 || r->rport != 0)) errx(EX_DATAERR, "redirect_port: remote port must" "be 0 or equal to local port range in size"); /* * Setup LSNAT server pool. */ if (lsnat) { sep = strtok(tmp_spool_buf, ","); while (sep != NULL) { tmp = (struct cfg_spool *)spool_buf; if (len < SOF_SPOOL) goto nospace; len -= SOF_SPOOL; space += SOF_SPOOL; /* * The sctp nat does not allow the port numbers to be mapped to new port numbers * Therefore, no ports are to be specified in the targetport field */ if (r->proto == IPPROTO_SCTP) { if (strchr (sep, ':')) { errx(EX_DATAERR, "redirect_port:" "port numbers do not change in " "sctp, so do not specify them as " "part of the target"); } else { StrToAddr(sep, &tmp->addr); tmp->port = r->pport; } } else { if (StrToAddrAndPortRange(sep, &tmp->addr, protoName, &portRange) != 0) errx(EX_DATAERR, "redirect_port:" "invalid local port range"); if (GETNUMPORTS(portRange) != 1) errx(EX_DATAERR, "redirect_port:" " local port must be single in " "this context"); tmp->port = GETLOPORT(portRange); } r->spool_cnt++; /* Point to the next possible cfg_spool. */ spool_buf = &spool_buf[SOF_SPOOL]; sep = strtok(NULL, ","); } } return (space); nospace: errx(EX_DATAERR, "redirect_port: buf is too small\n"); } static int setup_redir_proto(char *spool_buf, unsigned int len, int *_ac, char ***_av) { char **av; int ac, space; struct protoent *protoent; struct cfg_redir *r; av = *_av; ac = *_ac; if (len >= SOF_REDIR) { r = (struct cfg_redir *)spool_buf; /* Skip cfg_redir at beginning of buf. */ spool_buf = &spool_buf[SOF_REDIR]; space = SOF_REDIR; len -= SOF_REDIR; } else goto nospace; r->mode = REDIR_PROTO; /* * Extract protocol. */ if (ac == 0) errx(EX_DATAERR, "redirect_proto: missing protocol"); protoent = getprotobyname(*av); if (protoent == NULL) errx(EX_DATAERR, "redirect_proto: unknown protocol %s", *av); else r->proto = protoent->p_proto; INC_ARGCV(); /* * Extract local address. */ if (ac == 0) errx(EX_DATAERR, "redirect_proto: missing local address"); else StrToAddr(*av, &r->laddr); INC_ARGCV(); /* * Extract optional public address. */ if (ac == 0) { r->paddr.s_addr = INADDR_ANY; r->raddr.s_addr = INADDR_ANY; } else { /* see above in setup_redir_port() */ if (!isalpha(**av)) { StrToAddr(*av, &r->paddr); INC_ARGCV(); /* * Extract optional remote address. */ /* see above in setup_redir_port() */ if (ac!=0 && !isalpha(**av)) { StrToAddr(*av, &r->raddr); INC_ARGCV(); } } } return (space); nospace: errx(EX_DATAERR, "redirect_proto: buf is too small\n"); } static void print_nat_config(unsigned char *buf) { struct cfg_nat *n; int i, cnt, flag, off; struct cfg_redir *t; struct cfg_spool *s; struct protoent *p; n = (struct cfg_nat *)buf; flag = 1; off = sizeof(*n); printf("ipfw nat %u config", n->id); if (strlen(n->if_name) != 0) printf(" if %s", n->if_name); else if (n->ip.s_addr != 0) printf(" ip %s", inet_ntoa(n->ip)); while (n->mode != 0) { if (n->mode & PKT_ALIAS_LOG) { printf(" log"); n->mode &= ~PKT_ALIAS_LOG; } else if (n->mode & PKT_ALIAS_DENY_INCOMING) { printf(" deny_in"); n->mode &= ~PKT_ALIAS_DENY_INCOMING; } else if (n->mode & PKT_ALIAS_SAME_PORTS) { printf(" same_ports"); n->mode &= ~PKT_ALIAS_SAME_PORTS; } else if (n->mode & PKT_ALIAS_UNREGISTERED_ONLY) { printf(" unreg_only"); n->mode &= ~PKT_ALIAS_UNREGISTERED_ONLY; } else if (n->mode & PKT_ALIAS_RESET_ON_ADDR_CHANGE) { printf(" reset"); n->mode &= ~PKT_ALIAS_RESET_ON_ADDR_CHANGE; } else if (n->mode & PKT_ALIAS_REVERSE) { printf(" reverse"); n->mode &= ~PKT_ALIAS_REVERSE; } else if (n->mode & PKT_ALIAS_PROXY_ONLY) { printf(" proxy_only"); n->mode &= ~PKT_ALIAS_PROXY_ONLY; } } /* Print all the redirect's data configuration. */ for (cnt = 0; cnt < n->redir_cnt; cnt++) { t = (struct cfg_redir *)&buf[off]; off += SOF_REDIR; switch (t->mode) { case REDIR_ADDR: printf(" redirect_addr"); if (t->spool_cnt == 0) printf(" %s", inet_ntoa(t->laddr)); else for (i = 0; i < t->spool_cnt; i++) { s = (struct cfg_spool *)&buf[off]; if (i) printf(","); else printf(" "); printf("%s", inet_ntoa(s->addr)); off += SOF_SPOOL; } printf(" %s", inet_ntoa(t->paddr)); break; case REDIR_PORT: p = getprotobynumber(t->proto); printf(" redirect_port %s ", p->p_name); if (!t->spool_cnt) { printf("%s:%u", inet_ntoa(t->laddr), t->lport); if (t->pport_cnt > 1) printf("-%u", t->lport + t->pport_cnt - 1); } else for (i=0; i < t->spool_cnt; i++) { s = (struct cfg_spool *)&buf[off]; if (i) printf(","); printf("%s:%u", inet_ntoa(s->addr), s->port); off += SOF_SPOOL; } printf(" "); if (t->paddr.s_addr) printf("%s:", inet_ntoa(t->paddr)); printf("%u", t->pport); if (!t->spool_cnt && t->pport_cnt > 1) printf("-%u", t->pport + t->pport_cnt - 1); if (t->raddr.s_addr) { printf(" %s", inet_ntoa(t->raddr)); if (t->rport) { printf(":%u", t->rport); if (!t->spool_cnt && t->rport_cnt > 1) printf("-%u", t->rport + t->rport_cnt - 1); } } break; case REDIR_PROTO: p = getprotobynumber(t->proto); printf(" redirect_proto %s %s", p->p_name, inet_ntoa(t->laddr)); if (t->paddr.s_addr != 0) { printf(" %s", inet_ntoa(t->paddr)); if (t->raddr.s_addr) printf(" %s", inet_ntoa(t->raddr)); } break; default: errx(EX_DATAERR, "unknown redir mode"); break; } } printf("\n"); } void ipfw_config_nat(int ac, char **av) { struct cfg_nat *n; /* Nat instance configuration. */ int i, len, off, tok; char *id, buf[NAT_BUF_LEN]; /* Buffer for serialized data. */ len = NAT_BUF_LEN; /* Offset in buf: save space for n at the beginning. */ off = sizeof(*n); memset(buf, 0, sizeof(buf)); n = (struct cfg_nat *)buf; av++; ac--; /* Nat id. */ if (ac && isdigit(**av)) { id = *av; i = atoi(*av); ac--; av++; n->id = i; } else errx(EX_DATAERR, "missing nat id"); if (ac == 0) errx(EX_DATAERR, "missing option"); while (ac > 0) { tok = match_token(nat_params, *av); ac--; av++; switch (tok) { case TOK_IP: if (ac == 0) errx(EX_DATAERR, "missing option"); if (!inet_aton(av[0], &(n->ip))) errx(EX_DATAERR, "bad ip address ``%s''", av[0]); ac--; av++; break; case TOK_IF: if (ac == 0) errx(EX_DATAERR, "missing option"); set_addr_dynamic(av[0], n); ac--; av++; break; case TOK_ALOG: n->mode |= PKT_ALIAS_LOG; break; case TOK_DENY_INC: n->mode |= PKT_ALIAS_DENY_INCOMING; break; case TOK_SAME_PORTS: n->mode |= PKT_ALIAS_SAME_PORTS; break; case TOK_UNREG_ONLY: n->mode |= PKT_ALIAS_UNREGISTERED_ONLY; break; case TOK_RESET_ADDR: n->mode |= PKT_ALIAS_RESET_ON_ADDR_CHANGE; break; case TOK_ALIAS_REV: n->mode |= PKT_ALIAS_REVERSE; break; case TOK_PROXY_ONLY: n->mode |= PKT_ALIAS_PROXY_ONLY; break; /* * All the setup_redir_* functions work directly in the final * buffer, see above for details. */ case TOK_REDIR_ADDR: case TOK_REDIR_PORT: case TOK_REDIR_PROTO: switch (tok) { case TOK_REDIR_ADDR: i = setup_redir_addr(&buf[off], len, &ac, &av); break; case TOK_REDIR_PORT: i = setup_redir_port(&buf[off], len, &ac, &av); break; case TOK_REDIR_PROTO: i = setup_redir_proto(&buf[off], len, &ac, &av); break; } n->redir_cnt++; off += i; len -= i; break; default: errx(EX_DATAERR, "unrecognised option ``%s''", av[-1]); } } i = do_cmd(IP_FW_NAT_CFG, buf, off); if (i) err(1, "setsockopt(%s)", "IP_FW_NAT_CFG"); if (!co.do_quiet) { /* After every modification, we show the resultant rule. */ int _ac = 3; const char *_av[] = {"show", "config", id}; ipfw_show_nat(_ac, (char **)(void *)_av); } } void ipfw_show_nat(int ac, char **av) { struct cfg_nat *n; struct cfg_redir *e; int cmd, i, nbytes, do_cfg, do_rule, frule, lrule, nalloc, size; int nat_cnt, redir_cnt, r; uint8_t *data, *p; char *endptr; do_rule = 0; nalloc = 1024; size = 0; data = NULL; frule = 0; lrule = IPFW_DEFAULT_RULE; /* max ipfw rule number */ ac--; av++; if (co.test_only) return; /* Parse parameters. */ for (cmd = IP_FW_NAT_GET_LOG, do_cfg = 0; ac != 0; ac--, av++) { if (!strncmp(av[0], "config", strlen(av[0]))) { cmd = IP_FW_NAT_GET_CONFIG, do_cfg = 1; continue; } /* Convert command line rule #. */ frule = lrule = strtoul(av[0], &endptr, 10); if (*endptr == '-') lrule = strtoul(endptr+1, &endptr, 10); if (lrule == 0) err(EX_USAGE, "invalid rule number: %s", av[0]); do_rule = 1; } nbytes = nalloc; while (nbytes >= nalloc) { nalloc = nalloc * 2; nbytes = nalloc; data = safe_realloc(data, nbytes); if (do_cmd(cmd, data, (uintptr_t)&nbytes) < 0) err(EX_OSERR, "getsockopt(IP_FW_GET_%s)", (cmd == IP_FW_NAT_GET_LOG) ? "LOG" : "CONFIG"); } if (nbytes == 0) exit(0); if (do_cfg) { nat_cnt = *((int *)data); for (i = sizeof(nat_cnt); nat_cnt; nat_cnt--) { n = (struct cfg_nat *)&data[i]; if (frule <= n->id && lrule >= n->id) print_nat_config(&data[i]); i += sizeof(struct cfg_nat); for (redir_cnt = 0; redir_cnt < n->redir_cnt; redir_cnt++) { e = (struct cfg_redir *)&data[i]; i += sizeof(struct cfg_redir) + e->spool_cnt * sizeof(struct cfg_spool); } } } else { for (i = 0; 1; i += LIBALIAS_BUF_SIZE + sizeof(int)) { p = &data[i]; if (p == data + nbytes) break; bcopy(p, &r, sizeof(int)); if (do_rule) { if (!(frule <= r && lrule >= r)) continue; } printf("nat %u: %s\n", r, p+sizeof(int)); } } } ipfw_mod/ipfw/ipv6.c000644 000423 000000 00000031511 11151122421 015104 0ustar00luigiwheel000000 000000 /* * Copyright (c) 2002-2003 Luigi Rizzo * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp * Copyright (c) 1994 Ugen J.S.Antsilevich * * Idea and grammar partially left from: * Copyright (c) 1993 Daniel Boulet * * Redistribution and use in source forms, with and without modification, * are permitted provided that this entire comment appears intact. * * Redistribution in binary form may occur without any restrictions. * Obviously, it would be nice if you gave credit where credit is due * but requiring it would be too onerous. * * This software is provided ``AS IS'' without any warranties of any kind. * * NEW command line interface for IP firewall facility * * $FreeBSD: head/sbin/ipfw/ipv6.c 187770 2009-01-27 12:01:30Z luigi $ * * ipv6 support */ #include #include #include "ipfw2.h" #include #include #include #include #include #include #include #include #include #include #include #include #include static struct _s_x icmp6codes[] = { { "no-route", ICMP6_DST_UNREACH_NOROUTE }, { "admin-prohib", ICMP6_DST_UNREACH_ADMIN }, { "address", ICMP6_DST_UNREACH_ADDR }, { "port", ICMP6_DST_UNREACH_NOPORT }, { NULL, 0 } }; void fill_unreach6_code(u_short *codep, char *str) { int val; char *s; val = strtoul(str, &s, 0); if (s == str || *s != '\0' || val >= 0x100) val = match_token(icmp6codes, str); if (val < 0) errx(EX_DATAERR, "unknown ICMPv6 unreachable code ``%s''", str); *codep = val; return; } void print_unreach6_code(uint16_t code) { char const *s = match_value(icmp6codes, code); if (s != NULL) printf("unreach6 %s", s); else printf("unreach6 %u", code); } /* * Print the ip address contained in a command. */ void print_ip6(ipfw_insn_ip6 *cmd, char const *s) { struct hostent *he = NULL; int len = F_LEN((ipfw_insn *) cmd) - 1; struct in6_addr *a = &(cmd->addr6); char trad[255]; printf("%s%s ", cmd->o.len & F_NOT ? " not": "", s); if (cmd->o.opcode == O_IP6_SRC_ME || cmd->o.opcode == O_IP6_DST_ME) { printf("me6"); return; } if (cmd->o.opcode == O_IP6) { printf(" ip6"); return; } /* * len == 4 indicates a single IP, whereas lists of 1 or more * addr/mask pairs have len = (2n+1). We convert len to n so we * use that to count the number of entries. */ for (len = len / 4; len > 0; len -= 2, a += 2) { int mb = /* mask length */ (cmd->o.opcode == O_IP6_SRC || cmd->o.opcode == O_IP6_DST) ? 128 : contigmask((uint8_t *)&(a[1]), 128); if (mb == 128 && co.do_resolv) he = gethostbyaddr((char *)a, sizeof(*a), AF_INET6); if (he != NULL) /* resolved to name */ printf("%s", he->h_name); else if (mb == 0) /* any */ printf("any"); else { /* numeric IP followed by some kind of mask */ if (inet_ntop(AF_INET6, a, trad, sizeof( trad ) ) == NULL) printf("Error ntop in print_ip6\n"); printf("%s", trad ); if (mb < 0) /* XXX not really legal... */ printf(":%s", inet_ntop(AF_INET6, &a[1], trad, sizeof(trad))); else if (mb < 128) printf("/%d", mb); } if (len > 2) printf(","); } } void fill_icmp6types(ipfw_insn_icmp6 *cmd, char *av) { uint8_t type; bzero(cmd, sizeof(*cmd)); while (*av) { if (*av == ',') av++; type = strtoul(av, &av, 0); if (*av != ',' && *av != '\0') errx(EX_DATAERR, "invalid ICMP6 type"); /* * XXX: shouldn't this be 0xFF? I can't see any reason why * we shouldn't be able to filter all possiable values * regardless of the ability of the rest of the kernel to do * anything useful with them. */ if (type > ICMP6_MAXTYPE) errx(EX_DATAERR, "ICMP6 type out of range"); cmd->d[type / 32] |= ( 1 << (type % 32)); } cmd->o.opcode = O_ICMP6TYPE; cmd->o.len |= F_INSN_SIZE(ipfw_insn_icmp6); } void print_icmp6types(ipfw_insn_u32 *cmd) { int i, j; char sep= ' '; printf(" ip6 icmp6types"); for (i = 0; i < 7; i++) for (j=0; j < 32; ++j) { if ( (cmd->d[i] & (1 << (j))) == 0) continue; printf("%c%d", sep, (i*32 + j)); sep = ','; } } void print_flow6id( ipfw_insn_u32 *cmd) { uint16_t i, limit = cmd->o.arg1; char sep = ','; printf(" flow-id "); for( i=0; i < limit; ++i) { if (i == limit - 1) sep = ' '; printf("%d%c", cmd->d[i], sep); } } /* structure and define for the extension header in ipv6 */ static struct _s_x ext6hdrcodes[] = { { "frag", EXT_FRAGMENT }, { "hopopt", EXT_HOPOPTS }, { "route", EXT_ROUTING }, { "dstopt", EXT_DSTOPTS }, { "ah", EXT_AH }, { "esp", EXT_ESP }, { "rthdr0", EXT_RTHDR0 }, { "rthdr2", EXT_RTHDR2 }, { NULL, 0 } }; /* fills command for the extension header filtering */ int fill_ext6hdr( ipfw_insn *cmd, char *av) { int tok; char *s = av; cmd->arg1 = 0; while(s) { av = strsep( &s, ",") ; tok = match_token(ext6hdrcodes, av); switch (tok) { case EXT_FRAGMENT: cmd->arg1 |= EXT_FRAGMENT; break; case EXT_HOPOPTS: cmd->arg1 |= EXT_HOPOPTS; break; case EXT_ROUTING: cmd->arg1 |= EXT_ROUTING; break; case EXT_DSTOPTS: cmd->arg1 |= EXT_DSTOPTS; break; case EXT_AH: cmd->arg1 |= EXT_AH; break; case EXT_ESP: cmd->arg1 |= EXT_ESP; break; case EXT_RTHDR0: cmd->arg1 |= EXT_RTHDR0; break; case EXT_RTHDR2: cmd->arg1 |= EXT_RTHDR2; break; default: errx( EX_DATAERR, "invalid option for ipv6 exten header" ); break; } } if (cmd->arg1 == 0 ) return 0; cmd->opcode = O_EXT_HDR; cmd->len |= F_INSN_SIZE( ipfw_insn ); return 1; } void print_ext6hdr( ipfw_insn *cmd ) { char sep = ' '; printf(" extension header:"); if (cmd->arg1 & EXT_FRAGMENT ) { printf("%cfragmentation", sep); sep = ','; } if (cmd->arg1 & EXT_HOPOPTS ) { printf("%chop options", sep); sep = ','; } if (cmd->arg1 & EXT_ROUTING ) { printf("%crouting options", sep); sep = ','; } if (cmd->arg1 & EXT_RTHDR0 ) { printf("%crthdr0", sep); sep = ','; } if (cmd->arg1 & EXT_RTHDR2 ) { printf("%crthdr2", sep); sep = ','; } if (cmd->arg1 & EXT_DSTOPTS ) { printf("%cdestination options", sep); sep = ','; } if (cmd->arg1 & EXT_AH ) { printf("%cauthentication header", sep); sep = ','; } if (cmd->arg1 & EXT_ESP ) { printf("%cencapsulated security payload", sep); } } /* Try to find ipv6 address by hostname */ static int lookup_host6 (char *host, struct in6_addr *ip6addr) { struct hostent *he; if (!inet_pton(AF_INET6, host, ip6addr)) { if ((he = gethostbyname2(host, AF_INET6)) == NULL) return(-1); memcpy(ip6addr, he->h_addr_list[0], sizeof( struct in6_addr)); } return(0); } /* * fill the addr and mask fields in the instruction as appropriate from av. * Update length as appropriate. * The following formats are allowed: * any matches any IP6. Actually returns an empty instruction. * me returns O_IP6_*_ME * * 03f1::234:123:0342 single IP6 addres * 03f1::234:123:0342/24 address/mask * 03f1::234:123:0342/24,03f1::234:123:0343/ List of address * * Set of address (as in ipv6) not supported because ipv6 address * are typically random past the initial prefix. * Return 1 on success, 0 on failure. */ static int fill_ip6(ipfw_insn_ip6 *cmd, char *av) { int len = 0; struct in6_addr *d = &(cmd->addr6); /* * Needed for multiple address. * Note d[1] points to struct in6_add r mask6 of cmd */ cmd->o.len &= ~F_LEN_MASK; /* zero len */ if (strcmp(av, "any") == 0) return (1); if (strcmp(av, "me") == 0) { /* Set the data for "me" opt*/ cmd->o.len |= F_INSN_SIZE(ipfw_insn); return (1); } if (strcmp(av, "me6") == 0) { /* Set the data for "me" opt*/ cmd->o.len |= F_INSN_SIZE(ipfw_insn); return (1); } av = strdup(av); while (av) { /* * After the address we can have '/' indicating a mask, * or ',' indicating another address follows. */ char *p; int masklen; char md = '\0'; if ((p = strpbrk(av, "/,")) ) { md = *p; /* save the separator */ *p = '\0'; /* terminate address string */ p++; /* and skip past it */ } /* now p points to NULL, mask or next entry */ /* lookup stores address in *d as a side effect */ if (lookup_host6(av, d) != 0) { /* XXX: failed. Free memory and go */ errx(EX_DATAERR, "bad address \"%s\"", av); } /* next, look at the mask, if any */ masklen = (md == '/') ? atoi(p) : 128; if (masklen > 128 || masklen < 0) errx(EX_DATAERR, "bad width \"%s\''", p); else n2mask(&d[1], masklen); APPLY_MASK(d, &d[1]) /* mask base address with mask */ /* find next separator */ if (md == '/') { /* find separator past the mask */ p = strpbrk(p, ","); if (p != NULL) p++; } av = p; /* Check this entry */ if (masklen == 0) { /* * 'any' turns the entire list into a NOP. * 'not any' never matches, so it is removed from the * list unless it is the only item, in which case we * report an error. */ if (cmd->o.len & F_NOT && av == NULL && len == 0) errx(EX_DATAERR, "not any never matches"); continue; } /* * A single IP can be stored alone */ if (masklen == 128 && av == NULL && len == 0) { len = F_INSN_SIZE(struct in6_addr); break; } /* Update length and pointer to arguments */ len += F_INSN_SIZE(struct in6_addr)*2; d += 2; } /* end while */ /* * Total length of the command, remember that 1 is the size of * the base command. */ if (len + 1 > F_LEN_MASK) errx(EX_DATAERR, "address list too long"); cmd->o.len |= len+1; free(av); return (1); } /* * fills command for ipv6 flow-id filtering * note that the 20 bit flow number is stored in a array of u_int32_t * it's supported lists of flow-id, so in the o.arg1 we store how many * additional flow-id we want to filter, the basic is 1 */ void fill_flow6( ipfw_insn_u32 *cmd, char *av ) { u_int32_t type; /* Current flow number */ u_int16_t nflow = 0; /* Current flow index */ char *s = av; cmd->d[0] = 0; /* Initializing the base number*/ while (s) { av = strsep( &s, ",") ; type = strtoul(av, &av, 0); if (*av != ',' && *av != '\0') errx(EX_DATAERR, "invalid ipv6 flow number %s", av); if (type > 0xfffff) errx(EX_DATAERR, "flow number out of range %s", av); cmd->d[nflow] |= type; nflow++; } if( nflow > 0 ) { cmd->o.opcode = O_FLOW6ID; cmd->o.len |= F_INSN_SIZE(ipfw_insn_u32) + nflow; cmd->o.arg1 = nflow; } else { errx(EX_DATAERR, "invalid ipv6 flow number %s", av); } } ipfw_insn * add_srcip6(ipfw_insn *cmd, char *av) { fill_ip6((ipfw_insn_ip6 *)cmd, av); if (F_LEN(cmd) == 0) { /* any */ } else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn)) { /* "me" */ cmd->opcode = O_IP6_SRC_ME; } else if (F_LEN(cmd) == (F_INSN_SIZE(struct in6_addr) + F_INSN_SIZE(ipfw_insn))) { /* single IP, no mask*/ cmd->opcode = O_IP6_SRC; } else { /* addr/mask opt */ cmd->opcode = O_IP6_SRC_MASK; } return cmd; } ipfw_insn * add_dstip6(ipfw_insn *cmd, char *av) { fill_ip6((ipfw_insn_ip6 *)cmd, av); if (F_LEN(cmd) == 0) { /* any */ } else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn)) { /* "me" */ cmd->opcode = O_IP6_DST_ME; } else if (F_LEN(cmd) == (F_INSN_SIZE(struct in6_addr) + F_INSN_SIZE(ipfw_insn))) { /* single IP, no mask*/ cmd->opcode = O_IP6_DST; } else { /* addr/mask opt */ cmd->opcode = O_IP6_DST_MASK; } return cmd; } ipfw_mod/ipfw/main.c000644 000423 000000 00000032012 11170405731 015152 0ustar00luigiwheel000000 000000 /* * Copyright (c) 2002-2003 Luigi Rizzo * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp * Copyright (c) 1994 Ugen J.S.Antsilevich * * Idea and grammar partially left from: * Copyright (c) 1993 Daniel Boulet * * Redistribution and use in source forms, with and without modification, * are permitted provided that this entire comment appears intact. * * Redistribution in binary form may occur without any restrictions. * Obviously, it would be nice if you gave credit where credit is due * but requiring it would be too onerous. * * This software is provided ``AS IS'' without any warranties of any kind. * * Command line interface for IP firewall facility * * $FreeBSD: head/sbin/ipfw/main.c 187767 2009-01-27 10:18:55Z luigi $ */ #include #include #include #include #include #include #include #include #include #include #include "ipfw2.h" static void help(void) { fprintf(stderr, "ipfw syntax summary (but please do read the ipfw(8) manpage):\n\n" "\tipfw [-abcdefhnNqStTv] \n\n" "where is one of the following:\n\n" "add [num] [set N] [prob x] RULE-BODY\n" "{pipe|queue} N config PIPE-BODY\n" "[pipe|queue] {zero|delete|show} [N{,N}]\n" "nat N config {ip IPADDR|if IFNAME|log|deny_in|same_ports|unreg_only|reset|\n" " reverse|proxy_only|redirect_addr linkspec|\n" " redirect_port linkspec|redirect_proto linkspec}\n" "set [disable N... enable N...] | move [rule] X to Y | swap X Y | show\n" "set N {show|list|zero|resetlog|delete} [N{,N}] | flush\n" "table N {add ip[/bits] [value] | delete ip[/bits] | flush | list}\n" "table all {flush | list}\n" "\n" "RULE-BODY: check-state [PARAMS] | ACTION [PARAMS] ADDR [OPTION_LIST]\n" "ACTION: check-state | allow | count | deny | unreach{,6} CODE |\n" " skipto N | {divert|tee} PORT | forward ADDR |\n" " pipe N | queue N | nat N | setfib FIB | reass\n" "PARAMS: [log [logamount LOGLIMIT]] [altq QUEUE_NAME]\n" "ADDR: [ MAC dst src ether_type ] \n" " [ ip from IPADDR [ PORT ] to IPADDR [ PORTLIST ] ]\n" " [ ipv6|ip6 from IP6ADDR [ PORT ] to IP6ADDR [ PORTLIST ] ]\n" "IPADDR: [not] { any | me | ip/bits{x,y,z} | table(t[,v]) | IPLIST }\n" "IP6ADDR: [not] { any | me | me6 | ip6/bits | IP6LIST }\n" "IP6LIST: { ip6 | ip6/bits }[,IP6LIST]\n" "IPLIST: { ip | ip/bits | ip:mask }[,IPLIST]\n" "OPTION_LIST: OPTION [OPTION_LIST]\n" "OPTION: bridged | diverted | diverted-loopback | diverted-output |\n" " {dst-ip|src-ip} IPADDR | {dst-ip6|src-ip6|dst-ipv6|src-ipv6} IP6ADDR |\n" " {dst-port|src-port} LIST |\n" " estab | frag | {gid|uid} N | icmptypes LIST | in | out | ipid LIST |\n" " iplen LIST | ipoptions SPEC | ipprecedence | ipsec | iptos SPEC |\n" " ipttl LIST | ipversion VER | keep-state | layer2 | limit ... |\n" " icmp6types LIST | ext6hdr LIST | flow-id N[,N] | fib FIB |\n" " mac ... | mac-type LIST | proto LIST | {recv|xmit|via} {IF|IPADDR} |\n" " setup | {tcpack|tcpseq|tcpwin} NN | tcpflags SPEC | tcpoptions SPEC |\n" " tcpdatalen LIST | verrevpath | versrcreach | antispoof\n" ); exit(0); } /* * Free a the (locally allocated) copy of command line arguments. */ static void free_args(int ac, char **av) { int i; for (i=0; i < ac; i++) free(av[i]); free(av); } /* * Called with the arguments, including program name because getopt * wants it to be present. * Returns 0 if successful, 1 if empty command, errx() in case of errors. */ static int ipfw_main(int oldac, char **oldav) { int ch, ac, save_ac; const char *errstr; char **av, **save_av; int do_acct = 0; /* Show packet/byte count */ int try_next = 0; /* set if pipe cmd not found */ #define WHITESP " \t\f\v\n\r" if (oldac < 2) return 1; /* need at least one argument */ if (oldac == 2) { /* * If we are called with a single string, try to split it into * arguments for subsequent parsing. * But first, remove spaces after a ',', by copying the string * in-place. */ char *arg = oldav[1]; /* The string is the first arg. */ int l = strlen(arg); int copy = 0; /* 1 if we need to copy, 0 otherwise */ int i, j; for (i = j = 0; i < l; i++) { if (arg[i] == '#') /* comment marker */ break; if (copy) { arg[j++] = arg[i]; copy = !index("," WHITESP, arg[i]); } else { copy = !index(WHITESP, arg[i]); if (copy) arg[j++] = arg[i]; } } if (!copy && j > 0) /* last char was a 'blank', remove it */ j--; l = j; /* the new argument length */ arg[j++] = '\0'; if (l == 0) /* empty string! */ return 1; /* * First, count number of arguments. Because of the previous * processing, this is just the number of blanks plus 1. */ for (i = 0, ac = 1; i < l; i++) if (index(WHITESP, arg[i]) != NULL) ac++; /* * Allocate the argument list, including one entry for * the program name because getopt expects it. */ av = safe_calloc(ac + 1, sizeof(char *)); /* * Second, copy arguments from arg[] to av[]. For each one, * j is the initial character, i is the one past the end. */ for (ac = 1, i = j = 0; i < l; i++) if (index(WHITESP, arg[i]) != NULL || i == l-1) { if (i == l-1) i++; av[ac] = safe_calloc(i-j+1, 1); bcopy(arg+j, av[ac], i-j); ac++; j = i + 1; } } else { /* * If an argument ends with ',' join with the next one. */ int first, i, l; av = safe_calloc(oldac, sizeof(char *)); for (first = i = ac = 1, l = 0; i < oldac; i++) { char *arg = oldav[i]; int k = strlen(arg); l += k; if (arg[k-1] != ',' || i == oldac-1) { /* Time to copy. */ av[ac] = safe_calloc(l+1, 1); for (l=0; first <= i; first++) { strcat(av[ac]+l, oldav[first]); l += strlen(oldav[first]); } ac++; l = 0; first = i+1; } } } av[0] = strdup(oldav[0]); /* copy progname from the caller */ /* Set the force flag for non-interactive processes */ if (!co.do_force) co.do_force = !isatty(STDIN_FILENO); /* Save arguments for final freeing of memory. */ save_ac = ac; save_av = av; optind = optreset = 1; /* restart getopt() */ while ((ch = getopt(ac, av, "abcdefhinNqs:STtv")) != -1) switch (ch) { case 'a': do_acct = 1; break; case 'b': co.comment_only = 1; co.do_compact = 1; break; case 'c': co.do_compact = 1; break; case 'd': co.do_dynamic = 1; break; case 'e': co.do_expired = 1; break; case 'f': co.do_force = 1; break; case 'h': /* help */ free_args(save_ac, save_av); help(); break; /* NOTREACHED */ case 'i': co.do_value_as_ip = 1; break; case 'n': co.test_only = 1; break; case 'N': co.do_resolv = 1; break; case 'q': co.do_quiet = 1; break; case 's': /* sort */ co.do_sort = atoi(optarg); break; case 'S': co.show_sets = 1; break; case 't': co.do_time = 1; break; case 'T': co.do_time = 2; /* numeric timestamp */ break; case 'v': /* verbose */ co.verbose = 1; break; default: free_args(save_ac, save_av); return 1; } ac -= optind; av += optind; NEED1("bad arguments, for usage summary ``ipfw''"); /* * An undocumented behaviour of ipfw1 was to allow rule numbers first, * e.g. "100 add allow ..." instead of "add 100 allow ...". * In case, swap first and second argument to get the normal form. */ if (ac > 1 && isdigit(*av[0])) { char *p = av[0]; av[0] = av[1]; av[1] = p; } /* * Optional: pipe, queue or nat. */ co.do_nat = 0; co.do_pipe = 0; if (!strncmp(*av, "nat", strlen(*av))) co.do_nat = 1; else if (!strncmp(*av, "pipe", strlen(*av))) co.do_pipe = 1; else if (_substrcmp(*av, "queue") == 0) co.do_pipe = 2; else if (!strncmp(*av, "set", strlen(*av))) { if (ac > 1 && isdigit(av[1][0])) { co.use_set = strtonum(av[1], 0, resvd_set_number, &errstr); if (errstr) errx(EX_DATAERR, "invalid set number %s\n", av[1]); ac -= 2; av += 2; co.use_set++; } } if (co.do_pipe || co.do_nat) { ac--; av++; } NEED1("missing command"); /* * For pipes, queues and nats we normally say 'nat|pipe NN config' * but the code is easier to parse as 'nat|pipe config NN' * so we swap the two arguments. */ if ((co.do_pipe || co.do_nat) && ac > 1 && isdigit(*av[0])) { char *p = av[0]; av[0] = av[1]; av[1] = p; } if (co.use_set == 0) { if (_substrcmp(*av, "add") == 0) ipfw_add(ac, av); else if (co.do_nat && _substrcmp(*av, "show") == 0) ipfw_show_nat(ac, av); else if (co.do_pipe && _substrcmp(*av, "config") == 0) ipfw_config_pipe(ac, av); else if (co.do_nat && _substrcmp(*av, "config") == 0) ipfw_config_nat(ac, av); else if (_substrcmp(*av, "set") == 0) ipfw_sets_handler(ac, av); else if (_substrcmp(*av, "table") == 0) ipfw_table_handler(ac, av); else if (_substrcmp(*av, "enable") == 0) ipfw_sysctl_handler(ac, av, 1); else if (_substrcmp(*av, "disable") == 0) ipfw_sysctl_handler(ac, av, 0); else try_next = 1; } if (co.use_set || try_next) { if (_substrcmp(*av, "delete") == 0) ipfw_delete(ac, av); else if (_substrcmp(*av, "flush") == 0) ipfw_flush(co.do_force); else if (_substrcmp(*av, "zero") == 0) ipfw_zero(ac, av, 0 /* IP_FW_ZERO */); else if (_substrcmp(*av, "resetlog") == 0) ipfw_zero(ac, av, 1 /* IP_FW_RESETLOG */); else if (_substrcmp(*av, "print") == 0 || _substrcmp(*av, "list") == 0) ipfw_list(ac, av, do_acct); else if (_substrcmp(*av, "show") == 0) ipfw_list(ac, av, 1 /* show counters */); else errx(EX_USAGE, "bad command `%s'", *av); } /* Free memory allocated in the argument parsing. */ free_args(save_ac, save_av); return 0; } static void ipfw_readfile(int ac, char *av[]) { #define MAX_ARGS 32 char buf[BUFSIZ]; char *progname = av[0]; /* original program name */ const char *cmd = NULL; /* preprocessor name, if any */ const char *filename = av[ac-1]; /* file to read */ int c, lineno=0; FILE *f = NULL; pid_t preproc = 0; while ((c = getopt(ac, av, "cfNnp:qS")) != -1) { switch(c) { case 'c': co.do_compact = 1; break; case 'f': co.do_force = 1; break; case 'N': co.do_resolv = 1; break; case 'n': co.test_only = 1; break; case 'p': /* * ipfw -p cmd [args] filename * * We are done with getopt(). All arguments * except the filename go to the preprocessor, * so we need to do the following: * - check that a filename is actually present; * - advance av by optind-1 to skip arguments * already processed; * - decrease ac by optind, to remove the args * already processed and the final filename; * - set the last entry in av[] to NULL so * popen() can detect the end of the array; * - set optind=ac to let getopt() terminate. */ if (optind == ac) errx(EX_USAGE, "no filename argument"); cmd = optarg; av[ac-1] = NULL; av += optind - 1; ac -= optind; optind = ac; break; case 'q': co.do_quiet = 1; break; case 'S': co.show_sets = 1; break; default: errx(EX_USAGE, "bad arguments, for usage" " summary ``ipfw''"); } } if (cmd == NULL && ac != optind + 1) errx(EX_USAGE, "extraneous filename arguments %s", av[ac-1]); if ((f = fopen(filename, "r")) == NULL) err(EX_UNAVAILABLE, "fopen: %s", filename); if (cmd != NULL) { /* pipe through preprocessor */ int pipedes[2]; if (pipe(pipedes) == -1) err(EX_OSERR, "cannot create pipe"); preproc = fork(); if (preproc == -1) err(EX_OSERR, "cannot fork"); if (preproc == 0) { /* * Child, will run the preprocessor with the * file on stdin and the pipe on stdout. */ if (dup2(fileno(f), 0) == -1 || dup2(pipedes[1], 1) == -1) err(EX_OSERR, "dup2()"); fclose(f); close(pipedes[1]); close(pipedes[0]); execvp(cmd, av); err(EX_OSERR, "execvp(%s) failed", cmd); } else { /* parent, will reopen f as the pipe */ fclose(f); close(pipedes[1]); if ((f = fdopen(pipedes[0], "r")) == NULL) { int savederrno = errno; (void)kill(preproc, SIGTERM); errno = savederrno; err(EX_OSERR, "fdopen()"); } } } while (fgets(buf, BUFSIZ, f)) { /* read commands */ char linename[10]; char *args[2]; lineno++; sprintf(linename, "Line %d", lineno); setprogname(linename); /* XXX */ args[0] = progname; args[1] = buf; ipfw_main(2, args); } fclose(f); if (cmd != NULL) { int status; if (waitpid(preproc, &status, 0) == -1) errx(EX_OSERR, "waitpid()"); if (WIFEXITED(status) && WEXITSTATUS(status) != EX_OK) errx(EX_UNAVAILABLE, "preprocessor exited with status %d", WEXITSTATUS(status)); else if (WIFSIGNALED(status)) errx(EX_UNAVAILABLE, "preprocessor exited with signal %d", WTERMSIG(status)); } } int main(int ac, char *av[]) { /* * If the last argument is an absolute pathname, interpret it * as a file to be preprocessed. */ if (ac > 1 && av[ac - 1][0] == '/' && access(av[ac - 1], R_OK) == 0) ipfw_readfile(ac, av); else { if (ipfw_main(ac, av)) { errx(EX_USAGE, "usage: ipfw [options]\n" "do \"ipfw -h\" or \"man ipfw\" for details"); } } return EX_OK; } ipfw_mod/ipfw/ipfw2.c000644 000423 000000 00000263563 11310503103 015262 0ustar00luigiwheel000000 000000 /* * Copyright (c) 2002-2003 Luigi Rizzo * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp * Copyright (c) 1994 Ugen J.S.Antsilevich * * Idea and grammar partially left from: * Copyright (c) 1993 Daniel Boulet * * Redistribution and use in source forms, with and without modification, * are permitted provided that this entire comment appears intact. * * Redistribution in binary form may occur without any restrictions. * Obviously, it would be nice if you gave credit where credit is due * but requiring it would be too onerous. * * This software is provided ``AS IS'' without any warranties of any kind. * * NEW command line interface for IP firewall facility * * $FreeBSD: head/sbin/ipfw/ipfw2.c 187983 2009-02-01 16:00:49Z luigi $ */ #include #include #include #include #include "ipfw2.h" #include #include #include #include #include #include #include #include #include #include #include /* ctime */ #include /* _long_to_time */ #include #include #include #include /* only IFNAMSIZ */ #include #include /* only n_short, n_long */ #include #include #include #include #include struct cmdline_opts co; /* global options */ int resvd_set_number = RESVD_SET; #define GET_UINT_ARG(arg, min, max, tok, s_x) do { \ if (!ac) \ errx(EX_USAGE, "%s: missing argument", match_value(s_x, tok)); \ if (_substrcmp(*av, "tablearg") == 0) { \ arg = IP_FW_TABLEARG; \ break; \ } \ \ { \ long val; \ char *end; \ \ val = strtol(*av, &end, 10); \ \ if (!isdigit(**av) || *end != '\0' || (val == 0 && errno == EINVAL)) \ errx(EX_DATAERR, "%s: invalid argument: %s", \ match_value(s_x, tok), *av); \ \ if (errno == ERANGE || val < min || val > max) \ errx(EX_DATAERR, "%s: argument is out of range (%u..%u): %s", \ match_value(s_x, tok), min, max, *av); \ \ if (val == IP_FW_TABLEARG) \ errx(EX_DATAERR, "%s: illegal argument value: %s", \ match_value(s_x, tok), *av); \ arg = val; \ } \ } while (0) static void PRINT_UINT_ARG(const char *str, uint32_t arg) { if (str != NULL) printf("%s",str); if (arg == IP_FW_TABLEARG) printf("tablearg"); else printf("%u", arg); } static struct _s_x f_tcpflags[] = { { "syn", TH_SYN }, { "fin", TH_FIN }, { "ack", TH_ACK }, { "psh", TH_PUSH }, { "rst", TH_RST }, { "urg", TH_URG }, { "tcp flag", 0 }, { NULL, 0 } }; static struct _s_x f_tcpopts[] = { { "mss", IP_FW_TCPOPT_MSS }, { "maxseg", IP_FW_TCPOPT_MSS }, { "window", IP_FW_TCPOPT_WINDOW }, { "sack", IP_FW_TCPOPT_SACK }, { "ts", IP_FW_TCPOPT_TS }, { "timestamp", IP_FW_TCPOPT_TS }, { "cc", IP_FW_TCPOPT_CC }, { "tcp option", 0 }, { NULL, 0 } }; /* * IP options span the range 0 to 255 so we need to remap them * (though in fact only the low 5 bits are significant). */ static struct _s_x f_ipopts[] = { { "ssrr", IP_FW_IPOPT_SSRR}, { "lsrr", IP_FW_IPOPT_LSRR}, { "rr", IP_FW_IPOPT_RR}, { "ts", IP_FW_IPOPT_TS}, { "ip option", 0 }, { NULL, 0 } }; static struct _s_x f_iptos[] = { { "lowdelay", IPTOS_LOWDELAY}, { "throughput", IPTOS_THROUGHPUT}, { "reliability", IPTOS_RELIABILITY}, { "mincost", IPTOS_MINCOST}, { "congestion", IPTOS_ECN_CE}, { "ecntransport", IPTOS_ECN_ECT0}, { "ip tos option", 0}, { NULL, 0 } }; static struct _s_x limit_masks[] = { {"all", DYN_SRC_ADDR|DYN_SRC_PORT|DYN_DST_ADDR|DYN_DST_PORT}, {"src-addr", DYN_SRC_ADDR}, {"src-port", DYN_SRC_PORT}, {"dst-addr", DYN_DST_ADDR}, {"dst-port", DYN_DST_PORT}, {NULL, 0} }; /* * we use IPPROTO_ETHERTYPE as a fake protocol id to call the print routines * This is only used in this code. */ #define IPPROTO_ETHERTYPE 0x1000 static struct _s_x ether_types[] = { /* * Note, we cannot use "-:&/" in the names because they are field * separators in the type specifications. Also, we use s = NULL as * end-delimiter, because a type of 0 can be legal. */ { "ip", 0x0800 }, { "ipv4", 0x0800 }, { "ipv6", 0x86dd }, { "arp", 0x0806 }, { "rarp", 0x8035 }, { "vlan", 0x8100 }, { "loop", 0x9000 }, { "trail", 0x1000 }, { "at", 0x809b }, { "atalk", 0x809b }, { "aarp", 0x80f3 }, { "pppoe_disc", 0x8863 }, { "pppoe_sess", 0x8864 }, { "ipx_8022", 0x00E0 }, { "ipx_8023", 0x0000 }, { "ipx_ii", 0x8137 }, { "ipx_snap", 0x8137 }, { "ipx", 0x8137 }, { "ns", 0x0600 }, { NULL, 0 } }; static struct _s_x rule_actions[] = { { "accept", TOK_ACCEPT }, { "pass", TOK_ACCEPT }, { "allow", TOK_ACCEPT }, { "permit", TOK_ACCEPT }, { "count", TOK_COUNT }, { "pipe", TOK_PIPE }, { "queue", TOK_QUEUE }, { "divert", TOK_DIVERT }, { "tee", TOK_TEE }, { "netgraph", TOK_NETGRAPH }, { "ngtee", TOK_NGTEE }, { "fwd", TOK_FORWARD }, { "forward", TOK_FORWARD }, { "skipto", TOK_SKIPTO }, { "deny", TOK_DENY }, { "drop", TOK_DENY }, { "reject", TOK_REJECT }, { "reset6", TOK_RESET6 }, { "reset", TOK_RESET }, { "unreach6", TOK_UNREACH6 }, { "unreach", TOK_UNREACH }, { "check-state", TOK_CHECKSTATE }, { "//", TOK_COMMENT }, { "nat", TOK_NAT }, { "reass", TOK_REASS }, { "setfib", TOK_SETFIB }, { NULL, 0 } /* terminator */ }; static struct _s_x rule_action_params[] = { { "altq", TOK_ALTQ }, { "log", TOK_LOG }, { "tag", TOK_TAG }, { "untag", TOK_UNTAG }, { NULL, 0 } /* terminator */ }; /* index of 'lookup ... ' keys in the kernel */ static int lookup_key[] = { TOK_DSTIP, TOK_SRCIP, TOK_DSTPORT, TOK_SRCPORT, TOK_UID, TOK_GID, TOK_JAIL, TOK_PROTO, TOK_MACTYPE, 0, }; static struct _s_x rule_options[] = { { "tagged", TOK_TAGGED }, { "uid", TOK_UID }, { "gid", TOK_GID }, { "jail", TOK_JAIL }, { "in", TOK_IN }, { "limit", TOK_LIMIT }, { "keep-state", TOK_KEEPSTATE }, { "bridged", TOK_LAYER2 }, { "layer2", TOK_LAYER2 }, { "out", TOK_OUT }, { "diverted", TOK_DIVERTED }, { "diverted-loopback", TOK_DIVERTEDLOOPBACK }, { "diverted-output", TOK_DIVERTEDOUTPUT }, { "xmit", TOK_XMIT }, { "recv", TOK_RECV }, { "via", TOK_VIA }, { "fragment", TOK_FRAG }, { "frag", TOK_FRAG }, { "fib", TOK_FIB }, { "ipoptions", TOK_IPOPTS }, { "ipopts", TOK_IPOPTS }, { "iplen", TOK_IPLEN }, { "ipid", TOK_IPID }, { "ipprecedence", TOK_IPPRECEDENCE }, { "iptos", TOK_IPTOS }, { "ipttl", TOK_IPTTL }, { "ipversion", TOK_IPVER }, { "ipver", TOK_IPVER }, { "estab", TOK_ESTAB }, { "established", TOK_ESTAB }, { "setup", TOK_SETUP }, { "tcpdatalen", TOK_TCPDATALEN }, { "tcpflags", TOK_TCPFLAGS }, { "tcpflgs", TOK_TCPFLAGS }, { "tcpoptions", TOK_TCPOPTS }, { "tcpopts", TOK_TCPOPTS }, { "tcpseq", TOK_TCPSEQ }, { "tcpack", TOK_TCPACK }, { "tcpwin", TOK_TCPWIN }, { "icmptype", TOK_ICMPTYPES }, { "icmptypes", TOK_ICMPTYPES }, { "dst-ip", TOK_DSTIP }, { "src-ip", TOK_SRCIP }, { "dst-port", TOK_DSTPORT }, { "src-port", TOK_SRCPORT }, { "proto", TOK_PROTO }, { "MAC", TOK_MAC }, { "mac", TOK_MAC }, { "mac-type", TOK_MACTYPE }, { "verrevpath", TOK_VERREVPATH }, { "versrcreach", TOK_VERSRCREACH }, { "antispoof", TOK_ANTISPOOF }, { "ipsec", TOK_IPSEC }, { "icmp6type", TOK_ICMP6TYPES }, { "icmp6types", TOK_ICMP6TYPES }, { "ext6hdr", TOK_EXT6HDR}, { "flow-id", TOK_FLOWID}, { "ipv6", TOK_IPV6}, { "ip6", TOK_IPV6}, { "ipv4", TOK_IPV4}, { "ip4", TOK_IPV4}, { "dst-ipv6", TOK_DSTIP6}, { "dst-ip6", TOK_DSTIP6}, { "src-ipv6", TOK_SRCIP6}, { "src-ip6", TOK_SRCIP6}, { "lookup", TOK_LOOKUP}, { "//", TOK_COMMENT }, { "not", TOK_NOT }, /* pseudo option */ { "!", /* escape ? */ TOK_NOT }, /* pseudo option */ { "or", TOK_OR }, /* pseudo option */ { "|", /* escape */ TOK_OR }, /* pseudo option */ { "{", TOK_STARTBRACE }, /* pseudo option */ { "(", TOK_STARTBRACE }, /* pseudo option */ { "}", TOK_ENDBRACE }, /* pseudo option */ { ")", TOK_ENDBRACE }, /* pseudo option */ { NULL, 0 } /* terminator */ }; /* * The following is used to generate a printable argument for * 64-bit numbers, irrespective of platform alignment and bit size. * Because all the printf in this program use %llu as a format, * we just return an unsigned long long, which is larger than * we need in certain cases, but saves the hassle of using * PRIu64 as a format specifier. * We don't care about inlining, this is not performance critical code. */ unsigned long long align_uint64(const uint64_t *pll) { uint64_t ret; bcopy (pll, &ret, sizeof(ret)); return ret; } void * safe_calloc(size_t number, size_t size) { void *ret = calloc(number, size); if (ret == NULL) err(EX_OSERR, "calloc"); return ret; } void * safe_realloc(void *ptr, size_t size) { void *ret = realloc(ptr, size); if (ret == NULL) err(EX_OSERR, "realloc"); return ret; } /* * conditionally runs the command. */ int do_cmd(int optname, void *optval, uintptr_t optlen) { static int s = -1; /* the socket */ int i; if (co.test_only) return 0; if (s == -1) s = socket(AF_INET, SOCK_RAW, IPPROTO_RAW); if (s < 0) err(EX_UNAVAILABLE, "socket"); if (optname == IP_FW_GET || optname == IP_DUMMYNET_GET || optname == IP_FW_DYN_GET || optname == IP_FW_ADD || optname == IP_FW_TABLE_LIST || optname == IP_FW_TABLE_GETSIZE || optname == IP_FW_NAT_GET_CONFIG || optname == IP_FW_NAT_GET_LOG) i = getsockopt(s, IPPROTO_IP, optname, optval, (socklen_t *)optlen); else i = setsockopt(s, IPPROTO_IP, optname, optval, optlen); return i; } /** * match_token takes a table and a string, returns the value associated * with the string (-1 in case of failure). */ int match_token(struct _s_x *table, char *string) { struct _s_x *pt; uint i = strlen(string); for (pt = table ; i && pt->s != NULL ; pt++) if (strlen(pt->s) == i && !bcmp(string, pt->s, i)) return pt->x; return -1; } /** * match_value takes a table and a value, returns the string associated * with the value (NULL in case of failure). */ char const * match_value(struct _s_x *p, int value) { for (; p->s != NULL; p++) if (p->x == value) return p->s; return NULL; } /* * _substrcmp takes two strings and returns 1 if they do not match, * and 0 if they match exactly or the first string is a sub-string * of the second. A warning is printed to stderr in the case that the * first string is a sub-string of the second. * * This function will be removed in the future through the usual * deprecation process. */ int _substrcmp(const char *str1, const char* str2) { if (strncmp(str1, str2, strlen(str1)) != 0) return 1; if (strlen(str1) != strlen(str2)) warnx("DEPRECATED: '%s' matched '%s' as a sub-string", str1, str2); return 0; } /* * _substrcmp2 takes three strings and returns 1 if the first two do not match, * and 0 if they match exactly or the second string is a sub-string * of the first. A warning is printed to stderr in the case that the * first string does not match the third. * * This function exists to warn about the bizzare construction * strncmp(str, "by", 2) which is used to allow people to use a shotcut * for "bytes". The problem is that in addition to accepting "by", * "byt", "byte", and "bytes", it also excepts "by_rabid_dogs" and any * other string beginning with "by". * * This function will be removed in the future through the usual * deprecation process. */ int _substrcmp2(const char *str1, const char* str2, const char* str3) { if (strncmp(str1, str2, strlen(str2)) != 0) return 1; if (strcmp(str1, str3) != 0) warnx("DEPRECATED: '%s' matched '%s'", str1, str3); return 0; } /* * prints one port, symbolic or numeric */ static void print_port(int proto, uint16_t port) { if (proto == IPPROTO_ETHERTYPE) { char const *s; if (co.do_resolv && (s = match_value(ether_types, port)) ) printf("%s", s); else printf("0x%04x", port); } else { struct servent *se = NULL; if (co.do_resolv) { struct protoent *pe = getprotobynumber(proto); se = getservbyport(htons(port), pe ? pe->p_name : NULL); } if (se) printf("%s", se->s_name); else printf("%d", port); } } static struct _s_x _port_name[] = { {"dst-port", O_IP_DSTPORT}, {"src-port", O_IP_SRCPORT}, {"ipid", O_IPID}, {"iplen", O_IPLEN}, {"ipttl", O_IPTTL}, {"mac-type", O_MAC_TYPE}, {"tcpdatalen", O_TCPDATALEN}, {"tagged", O_TAGGED}, {NULL, 0} }; /* * Print the values in a list 16-bit items of the types above. * XXX todo: add support for mask. */ static void print_newports(ipfw_insn_u16 *cmd, int proto, int opcode) { uint16_t *p = cmd->ports; int i; char const *sep; if (opcode != 0) { sep = match_value(_port_name, opcode); if (sep == NULL) sep = "???"; printf (" %s", sep); } sep = " "; for (i = F_LEN((ipfw_insn *)cmd) - 1; i > 0; i--, p += 2) { printf("%s", sep); print_port(proto, p[0]); if (p[0] != p[1]) { printf("-"); print_port(proto, p[1]); } sep = ","; } } /* * Like strtol, but also translates service names into port numbers * for some protocols. * In particular: * proto == -1 disables the protocol check; * proto == IPPROTO_ETHERTYPE looks up an internal table * proto == matches the values there. * Returns *end == s in case the parameter is not found. */ static int strtoport(char *s, char **end, int base, int proto) { char *p, *buf; char *s1; int i; *end = s; /* default - not found */ if (*s == '\0') return 0; /* not found */ if (isdigit(*s)) return strtol(s, end, base); /* * find separator. '\\' escapes the next char. */ for (s1 = s; *s1 && (isalnum(*s1) || *s1 == '\\') ; s1++) if (*s1 == '\\' && s1[1] != '\0') s1++; buf = safe_calloc(s1 - s + 1, 1); /* * copy into a buffer skipping backslashes */ for (p = s, i = 0; p != s1 ; p++) if (*p != '\\') buf[i++] = *p; buf[i++] = '\0'; if (proto == IPPROTO_ETHERTYPE) { i = match_token(ether_types, buf); free(buf); if (i != -1) { /* found */ *end = s1; return i; } } else { struct protoent *pe = NULL; struct servent *se; if (proto != 0) pe = getprotobynumber(proto); setservent(1); se = getservbyname(buf, pe ? pe->p_name : NULL); free(buf); if (se != NULL) { *end = s1; return ntohs(se->s_port); } } return 0; /* not found */ } /* * Fill the body of the command with the list of port ranges. */ static int fill_newports(ipfw_insn_u16 *cmd, char *av, int proto) { uint16_t a, b, *p = cmd->ports; int i = 0; char *s = av; while (*s) { a = strtoport(av, &s, 0, proto); if (s == av) /* empty or invalid argument */ return (0); switch (*s) { case '-': /* a range */ av = s + 1; b = strtoport(av, &s, 0, proto); /* Reject expressions like '1-abc' or '1-2-3'. */ if (s == av || (*s != ',' && *s != '\0')) return (0); p[0] = a; p[1] = b; break; case ',': /* comma separated list */ case '\0': p[0] = p[1] = a; break; default: warnx("port list: invalid separator <%c> in <%s>", *s, av); return (0); } i++; p += 2; av = s + 1; } if (i > 0) { if (i + 1 > F_LEN_MASK) errx(EX_DATAERR, "too many ports/ranges\n"); cmd->o.len |= i + 1; /* leave F_NOT and F_OR untouched */ } return (i); } static struct _s_x icmpcodes[] = { { "net", ICMP_UNREACH_NET }, { "host", ICMP_UNREACH_HOST }, { "protocol", ICMP_UNREACH_PROTOCOL }, { "port", ICMP_UNREACH_PORT }, { "needfrag", ICMP_UNREACH_NEEDFRAG }, { "srcfail", ICMP_UNREACH_SRCFAIL }, { "net-unknown", ICMP_UNREACH_NET_UNKNOWN }, { "host-unknown", ICMP_UNREACH_HOST_UNKNOWN }, { "isolated", ICMP_UNREACH_ISOLATED }, { "net-prohib", ICMP_UNREACH_NET_PROHIB }, { "host-prohib", ICMP_UNREACH_HOST_PROHIB }, { "tosnet", ICMP_UNREACH_TOSNET }, { "toshost", ICMP_UNREACH_TOSHOST }, { "filter-prohib", ICMP_UNREACH_FILTER_PROHIB }, { "host-precedence", ICMP_UNREACH_HOST_PRECEDENCE }, { "precedence-cutoff", ICMP_UNREACH_PRECEDENCE_CUTOFF }, { NULL, 0 } }; static void fill_reject_code(u_short *codep, char *str) { int val; char *s; val = strtoul(str, &s, 0); if (s == str || *s != '\0' || val >= 0x100) val = match_token(icmpcodes, str); if (val < 0) errx(EX_DATAERR, "unknown ICMP unreachable code ``%s''", str); *codep = val; return; } static void print_reject_code(uint16_t code) { char const *s = match_value(icmpcodes, code); if (s != NULL) printf("unreach %s", s); else printf("unreach %u", code); } /* * Returns the number of bits set (from left) in a contiguous bitmask, * or -1 if the mask is not contiguous. * XXX this needs a proper fix. * This effectively works on masks in big-endian (network) format. * when compiled on little endian architectures. * * First bit is bit 7 of the first byte -- note, for MAC addresses, * the first bit on the wire is bit 0 of the first byte. * len is the max length in bits. */ int contigmask(uint8_t *p, int len) { int i, n; for (i=0; iarg1 & 0xff; uint8_t clear = (cmd->arg1 >> 8) & 0xff; if (list == f_tcpflags && set == TH_SYN && clear == TH_ACK) { printf(" setup"); return; } printf(" %s ", name); for (i=0; list[i].x != 0; i++) { if (set & list[i].x) { set &= ~list[i].x; printf("%s%s", comma, list[i].s); comma = ","; } if (clear & list[i].x) { clear &= ~list[i].x; printf("%s!%s", comma, list[i].s); comma = ","; } } } /* * Print the ip address contained in a command. */ static void print_ip(ipfw_insn_ip *cmd, char const *s) { struct hostent *he = NULL; int len = F_LEN((ipfw_insn *)cmd); uint32_t *a = ((ipfw_insn_u32 *)cmd)->d; if (cmd->o.opcode == O_IP_DST_LOOKUP && len > F_INSN_SIZE(ipfw_insn_u32)) { uint32_t d = a[1]; const char *arg = ""; if (d < sizeof(lookup_key)/sizeof(lookup_key[0])) arg = match_value(rule_options, lookup_key[d]); printf("%s lookup %s %d,%d", cmd->o.len & F_NOT ? " not": "", arg, cmd->o.arg1, a[0]); return; } printf("%s%s ", cmd->o.len & F_NOT ? " not": "", s); if (cmd->o.opcode == O_IP_SRC_ME || cmd->o.opcode == O_IP_DST_ME) { printf("me"); return; } if (cmd->o.opcode == O_IP_SRC_LOOKUP || cmd->o.opcode == O_IP_DST_LOOKUP) { printf("table(%u", ((ipfw_insn *)cmd)->arg1); if (len == F_INSN_SIZE(ipfw_insn_u32)) printf(",%u", *a); printf(")"); return; } if (cmd->o.opcode == O_IP_SRC_SET || cmd->o.opcode == O_IP_DST_SET) { uint32_t x, *map = (uint32_t *)&(cmd->mask); int i, j; char comma = '{'; x = cmd->o.arg1 - 1; x = htonl( ~x ); cmd->addr.s_addr = htonl(cmd->addr.s_addr); printf("%s/%d", inet_ntoa(cmd->addr), contigmask((uint8_t *)&x, 32)); x = cmd->addr.s_addr = htonl(cmd->addr.s_addr); x &= 0xff; /* base */ /* * Print bits and ranges. * Locate first bit set (i), then locate first bit unset (j). * If we have 3+ consecutive bits set, then print them as a * range, otherwise only print the initial bit and rescan. */ for (i=0; i < cmd->o.arg1; i++) if (map[i/32] & (1<<(i & 31))) { for (j=i+1; j < cmd->o.arg1; j++) if (!(map[ j/32] & (1<<(j & 31)))) break; printf("%c%d", comma, i+x); if (j>i+2) { /* range has at least 3 elements */ printf("-%d", j-1+x); i = j-1; } comma = ','; } printf("}"); return; } /* * len == 2 indicates a single IP, whereas lists of 1 or more * addr/mask pairs have len = (2n+1). We convert len to n so we * use that to count the number of entries. */ for (len = len / 2; len > 0; len--, a += 2) { int mb = /* mask length */ (cmd->o.opcode == O_IP_SRC || cmd->o.opcode == O_IP_DST) ? 32 : contigmask((uint8_t *)&(a[1]), 32); if (mb == 32 && co.do_resolv) he = gethostbyaddr((char *)&(a[0]), sizeof(u_long), AF_INET); if (he != NULL) /* resolved to name */ printf("%s", he->h_name); else if (mb == 0) /* any */ printf("any"); else { /* numeric IP followed by some kind of mask */ printf("%s", inet_ntoa( *((struct in_addr *)&a[0]) ) ); if (mb < 0) printf(":%s", inet_ntoa( *((struct in_addr *)&a[1]) ) ); else if (mb < 32) printf("/%d", mb); } if (len > 1) printf(","); } } /* * prints a MAC address/mask pair */ static void print_mac(uint8_t *addr, uint8_t *mask) { int l = contigmask(mask, 48); if (l == 0) printf(" any"); else { printf(" %02x:%02x:%02x:%02x:%02x:%02x", addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); if (l == -1) printf("&%02x:%02x:%02x:%02x:%02x:%02x", mask[0], mask[1], mask[2], mask[3], mask[4], mask[5]); else if (l < 48) printf("/%d", l); } } static void fill_icmptypes(ipfw_insn_u32 *cmd, char *av) { uint8_t type; cmd->d[0] = 0; while (*av) { if (*av == ',') av++; type = strtoul(av, &av, 0); if (*av != ',' && *av != '\0') errx(EX_DATAERR, "invalid ICMP type"); if (type > 31) errx(EX_DATAERR, "ICMP type out of range"); cmd->d[0] |= 1 << type; } cmd->o.opcode = O_ICMPTYPE; cmd->o.len |= F_INSN_SIZE(ipfw_insn_u32); } static void print_icmptypes(ipfw_insn_u32 *cmd) { int i; char sep= ' '; printf(" icmptypes"); for (i = 0; i < 32; i++) { if ( (cmd->d[0] & (1 << (i))) == 0) continue; printf("%c%d", sep, i); sep = ','; } } /* * show_ipfw() prints the body of an ipfw rule. * Because the standard rule has at least proto src_ip dst_ip, we use * a helper function to produce these entries if not provided explicitly. * The first argument is the list of fields we have, the second is * the list of fields we want to be printed. * * Special cases if we have provided a MAC header: * + if the rule does not contain IP addresses/ports, do not print them; * + if the rule does not contain an IP proto, print "all" instead of "ip"; * * Once we have 'have_options', IP header fields are printed as options. */ #define HAVE_PROTO 0x0001 #define HAVE_SRCIP 0x0002 #define HAVE_DSTIP 0x0004 #define HAVE_PROTO4 0x0008 #define HAVE_PROTO6 0x0010 #define HAVE_OPTIONS 0x8000 #define HAVE_IP (HAVE_PROTO | HAVE_SRCIP | HAVE_DSTIP) static void show_prerequisites(int *flags, int want, int cmd __unused) { if (co.comment_only) return; if ( (*flags & HAVE_IP) == HAVE_IP) *flags |= HAVE_OPTIONS; if ( !(*flags & HAVE_OPTIONS)) { if ( !(*flags & HAVE_PROTO) && (want & HAVE_PROTO)) { if ( (*flags & HAVE_PROTO4)) printf(" ip4"); else if ( (*flags & HAVE_PROTO6)) printf(" ip6"); else printf(" ip"); } if ( !(*flags & HAVE_SRCIP) && (want & HAVE_SRCIP)) printf(" from any"); if ( !(*flags & HAVE_DSTIP) && (want & HAVE_DSTIP)) printf(" to any"); } *flags |= want; } static void show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) { static int twidth = 0; int l; ipfw_insn *cmd, *tagptr = NULL; const char *comment = NULL; /* ptr to comment if we have one */ int proto = 0; /* default */ int flags = 0; /* prerequisites */ ipfw_insn_log *logptr = NULL; /* set if we find an O_LOG */ ipfw_insn_altq *altqptr = NULL; /* set if we find an O_ALTQ */ int or_block = 0; /* we are in an or block */ uint32_t set_disable; bcopy(&rule->next_rule, &set_disable, sizeof(set_disable)); if (set_disable & (1 << rule->set)) { /* disabled */ if (!co.show_sets) return; else printf("# DISABLED "); } printf("%05u ", rule->rulenum); if (pcwidth>0 || bcwidth>0) printf("%*llu %*llu ", pcwidth, align_uint64(&rule->pcnt), bcwidth, align_uint64(&rule->bcnt)); if (co.do_time == 2) printf("%10u ", rule->timestamp); else if (co.do_time == 1) { char timestr[30]; time_t t = (time_t)0; if (twidth == 0) { strcpy(timestr, ctime(&t)); *strchr(timestr, '\n') = '\0'; twidth = strlen(timestr); } if (rule->timestamp) { t = _long_to_time(rule->timestamp); strcpy(timestr, ctime(&t)); *strchr(timestr, '\n') = '\0'; printf("%s ", timestr); } else { printf("%*s", twidth, " "); } } if (co.show_sets) printf("set %d ", rule->set); /* * print the optional "match probability" */ if (rule->cmd_len > 0) { cmd = rule->cmd ; if (cmd->opcode == O_PROB) { ipfw_insn_u32 *p = (ipfw_insn_u32 *)cmd; double d = 1.0 * p->d[0]; d = (d / 0x7fffffff); printf("prob %f ", d); } } /* * first print actions */ for (l = rule->cmd_len - rule->act_ofs, cmd = ACTION_PTR(rule); l > 0 ; l -= F_LEN(cmd), cmd += F_LEN(cmd)) { switch(cmd->opcode) { case O_CHECK_STATE: printf("check-state"); flags = HAVE_IP; /* avoid printing anything else */ break; case O_ACCEPT: printf("allow"); break; case O_COUNT: printf("count"); break; case O_DENY: printf("deny"); break; case O_REJECT: if (cmd->arg1 == ICMP_REJECT_RST) printf("reset"); else if (cmd->arg1 == ICMP_UNREACH_HOST) printf("reject"); else print_reject_code(cmd->arg1); break; case O_UNREACH6: if (cmd->arg1 == ICMP6_UNREACH_RST) printf("reset6"); else print_unreach6_code(cmd->arg1); break; case O_SKIPTO: PRINT_UINT_ARG("skipto ", cmd->arg1); break; case O_PIPE: PRINT_UINT_ARG("pipe ", cmd->arg1); break; case O_QUEUE: PRINT_UINT_ARG("queue ", cmd->arg1); break; case O_DIVERT: PRINT_UINT_ARG("divert ", cmd->arg1); break; case O_TEE: PRINT_UINT_ARG("tee ", cmd->arg1); break; case O_NETGRAPH: PRINT_UINT_ARG("netgraph ", cmd->arg1); break; case O_NGTEE: PRINT_UINT_ARG("ngtee ", cmd->arg1); break; case O_FORWARD_IP: { ipfw_insn_sa *s = (ipfw_insn_sa *)cmd; if (s->sa.sin_addr.s_addr == INADDR_ANY) { printf("fwd tablearg"); } else { printf("fwd %s", inet_ntoa(s->sa.sin_addr)); } if (s->sa.sin_port) printf(",%d", s->sa.sin_port); } break; case O_LOG: /* O_LOG is printed last */ logptr = (ipfw_insn_log *)cmd; break; case O_ALTQ: /* O_ALTQ is printed after O_LOG */ altqptr = (ipfw_insn_altq *)cmd; break; case O_TAG: tagptr = cmd; break; case O_NAT: PRINT_UINT_ARG("nat ", cmd->arg1); break; case O_SETFIB: PRINT_UINT_ARG("setfib ", cmd->arg1); break; case O_REASS: printf("reass"); break; default: printf("** unrecognized action %d len %d ", cmd->opcode, cmd->len); } } if (logptr) { if (logptr->max_log > 0) printf(" log logamount %d", logptr->max_log); else printf(" log"); } if (altqptr) { print_altq_cmd(altqptr); } if (tagptr) { if (tagptr->len & F_NOT) PRINT_UINT_ARG(" untag ", tagptr->arg1); else PRINT_UINT_ARG(" tag ", tagptr->arg1); } /* * then print the body. */ for (l = rule->act_ofs, cmd = rule->cmd ; l > 0 ; l -= F_LEN(cmd) , cmd += F_LEN(cmd)) { if ((cmd->len & F_OR) || (cmd->len & F_NOT)) continue; if (cmd->opcode == O_IP4) { flags |= HAVE_PROTO4; break; } else if (cmd->opcode == O_IP6) { flags |= HAVE_PROTO6; break; } } if (rule->_pad & 1) { /* empty rules before options */ if (!co.do_compact) { show_prerequisites(&flags, HAVE_PROTO, 0); printf(" from any to any"); } flags |= HAVE_IP | HAVE_OPTIONS; } if (co.comment_only) comment = "..."; for (l = rule->act_ofs, cmd = rule->cmd ; l > 0 ; l -= F_LEN(cmd) , cmd += F_LEN(cmd)) { /* useful alias */ ipfw_insn_u32 *cmd32 = (ipfw_insn_u32 *)cmd; if (co.comment_only) { if (cmd->opcode != O_NOP) continue; printf(" // %s\n", (char *)(cmd + 1)); return; } show_prerequisites(&flags, 0, cmd->opcode); switch(cmd->opcode) { case O_PROB: break; /* done already */ case O_PROBE_STATE: break; /* no need to print anything here */ case O_IP_SRC: case O_IP_SRC_LOOKUP: case O_IP_SRC_MASK: case O_IP_SRC_ME: case O_IP_SRC_SET: show_prerequisites(&flags, HAVE_PROTO, 0); if (!(flags & HAVE_SRCIP)) printf(" from"); if ((cmd->len & F_OR) && !or_block) printf(" {"); print_ip((ipfw_insn_ip *)cmd, (flags & HAVE_OPTIONS) ? " src-ip" : ""); flags |= HAVE_SRCIP; break; case O_IP_DST: case O_IP_DST_LOOKUP: case O_IP_DST_MASK: case O_IP_DST_ME: case O_IP_DST_SET: show_prerequisites(&flags, HAVE_PROTO|HAVE_SRCIP, 0); if (!(flags & HAVE_DSTIP)) printf(" to"); if ((cmd->len & F_OR) && !or_block) printf(" {"); print_ip((ipfw_insn_ip *)cmd, (flags & HAVE_OPTIONS) ? " dst-ip" : ""); flags |= HAVE_DSTIP; break; case O_IP6_SRC: case O_IP6_SRC_MASK: case O_IP6_SRC_ME: show_prerequisites(&flags, HAVE_PROTO, 0); if (!(flags & HAVE_SRCIP)) printf(" from"); if ((cmd->len & F_OR) && !or_block) printf(" {"); print_ip6((ipfw_insn_ip6 *)cmd, (flags & HAVE_OPTIONS) ? " src-ip6" : ""); flags |= HAVE_SRCIP | HAVE_PROTO; break; case O_IP6_DST: case O_IP6_DST_MASK: case O_IP6_DST_ME: show_prerequisites(&flags, HAVE_PROTO|HAVE_SRCIP, 0); if (!(flags & HAVE_DSTIP)) printf(" to"); if ((cmd->len & F_OR) && !or_block) printf(" {"); print_ip6((ipfw_insn_ip6 *)cmd, (flags & HAVE_OPTIONS) ? " dst-ip6" : ""); flags |= HAVE_DSTIP; break; case O_FLOW6ID: print_flow6id( (ipfw_insn_u32 *) cmd ); flags |= HAVE_OPTIONS; break; case O_IP_DSTPORT: show_prerequisites(&flags, HAVE_IP, 0); case O_IP_SRCPORT: show_prerequisites(&flags, HAVE_PROTO|HAVE_SRCIP, 0); if ((cmd->len & F_OR) && !or_block) printf(" {"); if (cmd->len & F_NOT) printf(" not"); print_newports((ipfw_insn_u16 *)cmd, proto, (flags & HAVE_OPTIONS) ? cmd->opcode : 0); break; case O_PROTO: { struct protoent *pe = NULL; if ((cmd->len & F_OR) && !or_block) printf(" {"); if (cmd->len & F_NOT) printf(" not"); proto = cmd->arg1; pe = getprotobynumber(cmd->arg1); if ((flags & (HAVE_PROTO4 | HAVE_PROTO6)) && !(flags & HAVE_PROTO)) show_prerequisites(&flags, HAVE_IP | HAVE_OPTIONS, 0); if (flags & HAVE_OPTIONS) printf(" proto"); if (pe) printf(" %s", pe->p_name); else printf(" %u", cmd->arg1); } flags |= HAVE_PROTO; break; default: /*options ... */ if (!(cmd->len & (F_OR|F_NOT))) if (((cmd->opcode == O_IP6) && (flags & HAVE_PROTO6)) || ((cmd->opcode == O_IP4) && (flags & HAVE_PROTO4))) break; show_prerequisites(&flags, HAVE_IP | HAVE_OPTIONS, 0); if ((cmd->len & F_OR) && !or_block) printf(" {"); if (cmd->len & F_NOT && cmd->opcode != O_IN) printf(" not"); switch(cmd->opcode) { case O_MACADDR2: { ipfw_insn_mac *m = (ipfw_insn_mac *)cmd; printf(" MAC"); print_mac(m->addr, m->mask); print_mac(m->addr + 6, m->mask + 6); } break; case O_MAC_TYPE: print_newports((ipfw_insn_u16 *)cmd, IPPROTO_ETHERTYPE, cmd->opcode); break; case O_FRAG: printf(" frag"); break; case O_FIB: printf(" fib %u", cmd->arg1 ); break; case O_IN: printf(cmd->len & F_NOT ? " out" : " in"); break; case O_DIVERTED: switch (cmd->arg1) { case 3: printf(" diverted"); break; case 1: printf(" diverted-loopback"); break; case 2: printf(" diverted-output"); break; default: printf(" diverted-?<%u>", cmd->arg1); break; } break; case O_LAYER2: printf(" layer2"); break; case O_XMIT: case O_RECV: case O_VIA: { char const *s; ipfw_insn_if *cmdif = (ipfw_insn_if *)cmd; if (cmd->opcode == O_XMIT) s = "xmit"; else if (cmd->opcode == O_RECV) s = "recv"; else /* if (cmd->opcode == O_VIA) */ s = "via"; if (cmdif->name[0] == '\0') printf(" %s %s", s, inet_ntoa(cmdif->p.ip)); else printf(" %s %s", s, cmdif->name); break; } case O_IPID: if (F_LEN(cmd) == 1) printf(" ipid %u", cmd->arg1 ); else print_newports((ipfw_insn_u16 *)cmd, 0, O_IPID); break; case O_IPTTL: if (F_LEN(cmd) == 1) printf(" ipttl %u", cmd->arg1 ); else print_newports((ipfw_insn_u16 *)cmd, 0, O_IPTTL); break; case O_IPVER: printf(" ipver %u", cmd->arg1 ); break; case O_IPPRECEDENCE: printf(" ipprecedence %u", (cmd->arg1) >> 5 ); break; case O_IPLEN: if (F_LEN(cmd) == 1) printf(" iplen %u", cmd->arg1 ); else print_newports((ipfw_insn_u16 *)cmd, 0, O_IPLEN); break; case O_IPOPT: print_flags("ipoptions", cmd, f_ipopts); break; case O_IPTOS: print_flags("iptos", cmd, f_iptos); break; case O_ICMPTYPE: print_icmptypes((ipfw_insn_u32 *)cmd); break; case O_ESTAB: printf(" established"); break; case O_TCPDATALEN: if (F_LEN(cmd) == 1) printf(" tcpdatalen %u", cmd->arg1 ); else print_newports((ipfw_insn_u16 *)cmd, 0, O_TCPDATALEN); break; case O_TCPFLAGS: print_flags("tcpflags", cmd, f_tcpflags); break; case O_TCPOPTS: print_flags("tcpoptions", cmd, f_tcpopts); break; case O_TCPWIN: printf(" tcpwin %d", ntohs(cmd->arg1)); break; case O_TCPACK: printf(" tcpack %d", ntohl(cmd32->d[0])); break; case O_TCPSEQ: printf(" tcpseq %d", ntohl(cmd32->d[0])); break; case O_UID: { struct passwd *pwd = getpwuid(cmd32->d[0]); if (pwd) printf(" uid %s", pwd->pw_name); else printf(" uid %u", cmd32->d[0]); } break; case O_GID: { struct group *grp = getgrgid(cmd32->d[0]); if (grp) printf(" gid %s", grp->gr_name); else printf(" gid %u", cmd32->d[0]); } break; case O_JAIL: printf(" jail %d", cmd32->d[0]); break; case O_VERREVPATH: printf(" verrevpath"); break; case O_VERSRCREACH: printf(" versrcreach"); break; case O_ANTISPOOF: printf(" antispoof"); break; case O_IPSEC: printf(" ipsec"); break; case O_NOP: comment = (char *)(cmd + 1); break; case O_KEEP_STATE: printf(" keep-state"); break; case O_LIMIT: { struct _s_x *p = limit_masks; ipfw_insn_limit *c = (ipfw_insn_limit *)cmd; uint8_t x = c->limit_mask; char const *comma = " "; printf(" limit"); for (; p->x != 0 ; p++) if ((x & p->x) == p->x) { x &= ~p->x; printf("%s%s", comma, p->s); comma = ","; } PRINT_UINT_ARG(" ", c->conn_limit); break; } case O_IP6: printf(" ip6"); break; case O_IP4: printf(" ip4"); break; case O_ICMP6TYPE: print_icmp6types((ipfw_insn_u32 *)cmd); break; case O_EXT_HDR: print_ext6hdr( (ipfw_insn *) cmd ); break; case O_TAGGED: if (F_LEN(cmd) == 1) PRINT_UINT_ARG(" tagged ", cmd->arg1); else print_newports((ipfw_insn_u16 *)cmd, 0, O_TAGGED); break; default: printf(" [opcode %d len %d]", cmd->opcode, cmd->len); } } if (cmd->len & F_OR) { printf(" or"); or_block = 1; } else if (or_block) { printf(" }"); or_block = 0; } } show_prerequisites(&flags, HAVE_IP, 0); if (comment) printf(" // %s", comment); printf("\n"); } static void show_dyn_ipfw(ipfw_dyn_rule *d, int pcwidth, int bcwidth) { struct protoent *pe; struct in_addr a; uint16_t rulenum; char buf[INET6_ADDRSTRLEN]; if (!co.do_expired) { if (!d->expire && !(d->dyn_type == O_LIMIT_PARENT)) return; } bcopy(&d->rule, &rulenum, sizeof(rulenum)); printf("%05d", rulenum); if (pcwidth>0 || bcwidth>0) printf(" %*llu %*llu (%ds)", pcwidth, align_uint64(&d->pcnt), bcwidth, align_uint64(&d->bcnt), d->expire); switch (d->dyn_type) { case O_LIMIT_PARENT: printf(" PARENT %d", d->count); break; case O_LIMIT: printf(" LIMIT"); break; case O_KEEP_STATE: /* bidir, no mask */ printf(" STATE"); break; } if ((pe = getprotobynumber(d->id.proto)) != NULL) printf(" %s", pe->p_name); else printf(" proto %u", d->id.proto); if (d->id.addr_type == 4) { a.s_addr = htonl(d->id.src_ip); printf(" %s %d", inet_ntoa(a), d->id.src_port); a.s_addr = htonl(d->id.dst_ip); printf(" <-> %s %d", inet_ntoa(a), d->id.dst_port); } else if (d->id.addr_type == 6) { printf(" %s %d", inet_ntop(AF_INET6, &d->id.src_ip6, buf, sizeof(buf)), d->id.src_port); printf(" <-> %s %d", inet_ntop(AF_INET6, &d->id.dst_ip6, buf, sizeof(buf)), d->id.dst_port); } else printf(" UNKNOWN <-> UNKNOWN\n"); printf("\n"); } /* * This one handles all set-related commands * ipfw set { show | enable | disable } * ipfw set swap X Y * ipfw set move X to Y * ipfw set move rule X to Y */ void ipfw_sets_handler(int ac, char *av[]) { uint32_t set_disable, masks[2]; int i, nbytes; uint16_t rulenum; uint8_t cmd, new_set; ac--; av++; if (!ac) errx(EX_USAGE, "set needs command"); if (_substrcmp(*av, "show") == 0) { void *data; char const *msg; nbytes = sizeof(struct ip_fw); data = safe_calloc(1, nbytes); if (do_cmd(IP_FW_GET, data, (uintptr_t)&nbytes) < 0) err(EX_OSERR, "getsockopt(IP_FW_GET)"); bcopy(&((struct ip_fw *)data)->next_rule, &set_disable, sizeof(set_disable)); for (i = 0, msg = "disable" ; i < RESVD_SET; i++) if ((set_disable & (1< RESVD_SET) errx(EX_DATAERR, "invalid set number %s\n", av[0]); if (!isdigit(*(av[1])) || new_set > RESVD_SET) errx(EX_DATAERR, "invalid set number %s\n", av[1]); masks[0] = (4 << 24) | (new_set << 16) | (rulenum); i = do_cmd(IP_FW_DEL, masks, sizeof(uint32_t)); } else if (_substrcmp(*av, "move") == 0) { ac--; av++; if (ac && _substrcmp(*av, "rule") == 0) { cmd = 2; ac--; av++; } else cmd = 3; if (ac != 3 || _substrcmp(av[1], "to") != 0) errx(EX_USAGE, "syntax: set move [rule] X to Y\n"); rulenum = atoi(av[0]); new_set = atoi(av[2]); if (!isdigit(*(av[0])) || (cmd == 3 && rulenum > RESVD_SET) || (cmd == 2 && rulenum == IPFW_DEFAULT_RULE) ) errx(EX_DATAERR, "invalid source number %s\n", av[0]); if (!isdigit(*(av[2])) || new_set > RESVD_SET) errx(EX_DATAERR, "invalid dest. set %s\n", av[1]); masks[0] = (cmd << 24) | (new_set << 16) | (rulenum); i = do_cmd(IP_FW_DEL, masks, sizeof(uint32_t)); } else if (_substrcmp(*av, "disable") == 0 || _substrcmp(*av, "enable") == 0 ) { int which = _substrcmp(*av, "enable") == 0 ? 1 : 0; ac--; av++; masks[0] = masks[1] = 0; while (ac) { if (isdigit(**av)) { i = atoi(*av); if (i < 0 || i > RESVD_SET) errx(EX_DATAERR, "invalid set number %d\n", i); masks[which] |= (1<= nalloc) { nalloc = nalloc * 2 + 200; nbytes = nalloc; data = safe_realloc(data, nbytes); if (do_cmd(ocmd, data, (uintptr_t)&nbytes) < 0) err(EX_OSERR, "getsockopt(IP_%s_GET)", co.do_pipe ? "DUMMYNET" : "FW"); } if (co.do_pipe) { ipfw_list_pipes(data, nbytes, ac, av); goto done; } /* * Count static rules. They have variable size so we * need to scan the list to count them. */ nstat = 0; r = data; if (!co.do_dynamic) { for (nstat = 1, r = data, lim = (char *)data + nbytes; r->rulenum < IPFW_DEFAULT_RULE && (char *)r < lim; ++nstat, r = NEXT(r) ) ; /* nothing */ } /* * Count dynamic rules. This is easier as they have * fixed size. */ if (co.do_dynamic) { dynrules = (ipfw_dyn_rule *)r ; n = (char *)r - (char *)data; ndyn = (nbytes - n) / sizeof *dynrules; } /* if showing stats, figure out column widths ahead of time */ bcwidth = pcwidth = 0; if (show_counters) { for (n = 0, r = data; n < nstat; n++, r = NEXT(r)) { /* skip rules from another set */ if (co.use_set && r->set != co.use_set - 1) continue; /* packet counter */ width = snprintf(NULL, 0, "%llu", align_uint64(&r->pcnt)); if (width > pcwidth) pcwidth = width; /* byte counter */ width = snprintf(NULL, 0, "%llu", align_uint64(&r->bcnt)); if (width > bcwidth) bcwidth = width; } } if (co.do_dynamic && ndyn) { for (n = 0, d = dynrules; n < ndyn; n++, d++) { if (co.use_set) { /* skip rules from another set */ bcopy((char *)&d->rule + sizeof(uint16_t), &set, sizeof(uint8_t)); if (set != co.use_set - 1) continue; } width = snprintf(NULL, 0, "%llu", align_uint64(&d->pcnt)); if (width > pcwidth) pcwidth = width; width = snprintf(NULL, 0, "%llu", align_uint64(&d->bcnt)); if (width > bcwidth) bcwidth = width; } } /* if no rule numbers were specified, list all rules */ if (ac == 0) { for (n = 0, r = data; n < nstat; n++, r = NEXT(r)) { if (co.use_set && r->set != co.use_set - 1) continue; show_ipfw(r, pcwidth, bcwidth); } if (co.do_dynamic && ndyn) { printf("## Dynamic rules (%d):\n", ndyn); for (n = 0, d = dynrules; n < ndyn; n++, d++) { if (co.use_set) { bcopy((char *)&d->rule + sizeof(uint16_t), &set, sizeof(uint8_t)); if (set != co.use_set - 1) continue; } show_dyn_ipfw(d, pcwidth, bcwidth); } } goto done; } /* display specific rules requested on command line */ if (!co.do_dynamic) { for (lac = ac, lav = av; lac != 0; lac--) { /* convert command line rule # */ last = rnum = strtoul(*lav++, &endptr, 10); if (*endptr == '-') last = strtoul(endptr+1, &endptr, 10); if (*endptr) { exitval = EX_USAGE; warnx("invalid rule number: %s", *(lav - 1)); continue; } for (n = seen = 0, r = data; n < nstat; n++, r = NEXT(r) ) { if (r->rulenum > last) break; if (co.use_set && r->set != co.use_set - 1) continue; if (r->rulenum >= rnum && r->rulenum <= last) { show_ipfw(r, pcwidth, bcwidth); seen = 1; } } if (!seen) { /* give precedence to other error(s) */ if (exitval == EX_OK) exitval = EX_UNAVAILABLE; warnx("rule %lu does not exist", rnum); } } } if (co.do_dynamic && ndyn) { printf("## Dynamic rules:\n"); for (lac = ac, lav = av; lac != 0; lac--) { last = rnum = strtoul(*lav++, &endptr, 10); if (*endptr == '-') last = strtoul(endptr+1, &endptr, 10); if (*endptr) /* already warned */ continue; for (n = 0, d = dynrules; n < ndyn; n++, d++) { uint16_t rulenum; bcopy(&d->rule, &rulenum, sizeof(rulenum)); if (rulenum > rnum) break; if (co.use_set) { bcopy((char *)&d->rule + sizeof(uint16_t), &set, sizeof(uint8_t)); if (set != co.use_set - 1) continue; } if (r->rulenum >= rnum && r->rulenum <= last) show_dyn_ipfw(d, pcwidth, bcwidth); } } } ac = 0; done: free(data); if (exitval != EX_OK) exit(exitval); #undef NEXT } static int lookup_host (char *host, struct in_addr *ipaddr) { struct hostent *he; if (!inet_aton(host, ipaddr)) { if ((he = gethostbyname(host)) == NULL) return(-1); *ipaddr = *(struct in_addr *)he->h_addr_list[0]; } return(0); } /* * fills the addr and mask fields in the instruction as appropriate from av. * Update length as appropriate. * The following formats are allowed: * me returns O_IP_*_ME * 1.2.3.4 single IP address * 1.2.3.4:5.6.7.8 address:mask * 1.2.3.4/24 address/mask * 1.2.3.4/26{1,6,5,4,23} set of addresses in a subnet * We can have multiple comma-separated address/mask entries. */ static void fill_ip(ipfw_insn_ip *cmd, char *av) { int len = 0; uint32_t *d = ((ipfw_insn_u32 *)cmd)->d; cmd->o.len &= ~F_LEN_MASK; /* zero len */ if (_substrcmp(av, "any") == 0) return; if (_substrcmp(av, "me") == 0) { cmd->o.len |= F_INSN_SIZE(ipfw_insn); return; } if (strncmp(av, "table(", 6) == 0) { char *p = strchr(av + 6, ','); if (p) *p++ = '\0'; cmd->o.opcode = O_IP_DST_LOOKUP; cmd->o.arg1 = strtoul(av + 6, NULL, 0); if (p) { cmd->o.len |= F_INSN_SIZE(ipfw_insn_u32); d[0] = strtoul(p, NULL, 0); } else cmd->o.len |= F_INSN_SIZE(ipfw_insn); return; } while (av) { /* * After the address we can have '/' or ':' indicating a mask, * ',' indicating another address follows, '{' indicating a * set of addresses of unspecified size. */ char *t = NULL, *p = strpbrk(av, "/:,{"); int masklen; char md, nd = '\0'; if (p) { md = *p; *p++ = '\0'; if ((t = strpbrk(p, ",{")) != NULL) { nd = *t; *t = '\0'; } } else md = '\0'; if (lookup_host(av, (struct in_addr *)&d[0]) != 0) errx(EX_NOHOST, "hostname ``%s'' unknown", av); switch (md) { case ':': if (!inet_aton(p, (struct in_addr *)&d[1])) errx(EX_DATAERR, "bad netmask ``%s''", p); break; case '/': masklen = atoi(p); if (masklen == 0) d[1] = htonl(0); /* mask */ else if (masklen > 32) errx(EX_DATAERR, "bad width ``%s''", p); else d[1] = htonl(~0 << (32 - masklen)); break; case '{': /* no mask, assume /24 and put back the '{' */ d[1] = htonl(~0 << (32 - 24)); *(--p) = md; break; case ',': /* single address plus continuation */ *(--p) = md; /* FALLTHROUGH */ case 0: /* initialization value */ default: d[1] = htonl(~0); /* force /32 */ break; } d[0] &= d[1]; /* mask base address with mask */ if (t) *t = nd; /* find next separator */ if (p) p = strpbrk(p, ",{"); if (p && *p == '{') { /* * We have a set of addresses. They are stored as follows: * arg1 is the set size (powers of 2, 2..256) * addr is the base address IN HOST FORMAT * mask.. is an array of arg1 bits (rounded up to * the next multiple of 32) with bits set * for each host in the map. */ uint32_t *map = (uint32_t *)&cmd->mask; int low, high; int i = contigmask((uint8_t *)&(d[1]), 32); if (len > 0) errx(EX_DATAERR, "address set cannot be in a list"); if (i < 24 || i > 31) errx(EX_DATAERR, "invalid set with mask %d\n", i); cmd->o.arg1 = 1<<(32-i); /* map length */ d[0] = ntohl(d[0]); /* base addr in host format */ cmd->o.opcode = O_IP_DST_SET; /* default */ cmd->o.len |= F_INSN_SIZE(ipfw_insn_u32) + (cmd->o.arg1+31)/32; for (i = 0; i < (cmd->o.arg1+31)/32 ; i++) map[i] = 0; /* clear map */ av = p + 1; low = d[0] & 0xff; high = low + cmd->o.arg1 - 1; /* * Here, i stores the previous value when we specify a range * of addresses within a mask, e.g. 45-63. i = -1 means we * have no previous value. */ i = -1; /* previous value in a range */ while (isdigit(*av)) { char *s; int a = strtol(av, &s, 0); if (s == av) { /* no parameter */ if (*av != '}') errx(EX_DATAERR, "set not closed\n"); if (i != -1) errx(EX_DATAERR, "incomplete range %d-", i); break; } if (a < low || a > high) errx(EX_DATAERR, "addr %d out of range [%d-%d]\n", a, low, high); a -= low; if (i == -1) /* no previous in range */ i = a; else { /* check that range is valid */ if (i > a) errx(EX_DATAERR, "invalid range %d-%d", i+low, a+low); if (*s == '-') errx(EX_DATAERR, "double '-' in range"); } for (; i <= a; i++) map[i/32] |= 1<<(i & 31); i = -1; if (*s == '-') i = a; else if (*s == '}') break; av = s+1; } return; } av = p; if (av) /* then *av must be a ',' */ av++; /* Check this entry */ if (d[1] == 0) { /* "any", specified as x.x.x.x/0 */ /* * 'any' turns the entire list into a NOP. * 'not any' never matches, so it is removed from the * list unless it is the only item, in which case we * report an error. */ if (cmd->o.len & F_NOT) { /* "not any" never matches */ if (av == NULL && len == 0) /* only this entry */ errx(EX_DATAERR, "not any never matches"); } /* else do nothing and skip this entry */ return; } /* A single IP can be stored in an optimized format */ if (d[1] == ~0 && av == NULL && len == 0) { cmd->o.len |= F_INSN_SIZE(ipfw_insn_u32); return; } len += 2; /* two words... */ d += 2; } /* end while */ if (len + 1 > F_LEN_MASK) errx(EX_DATAERR, "address list too long"); cmd->o.len |= len+1; } /* n2mask sets n bits of the mask */ void n2mask(struct in6_addr *mask, int n) { static int minimask[9] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff }; u_char *p; memset(mask, 0, sizeof(struct in6_addr)); p = (u_char *) mask; for (; n > 0; p++, n -= 8) { if (n >= 8) *p = 0xff; else *p = minimask[n]; } return; } /* * helper function to process a set of flags and set bits in the * appropriate masks. */ static void fill_flags(ipfw_insn *cmd, enum ipfw_opcodes opcode, struct _s_x *flags, char *p) { uint8_t set=0, clear=0; while (p && *p) { char *q; /* points to the separator */ int val; uint8_t *which; /* mask we are working on */ if (*p == '!') { p++; which = &clear; } else which = &set; q = strchr(p, ','); if (q) *q++ = '\0'; val = match_token(flags, p); if (val <= 0) errx(EX_DATAERR, "invalid flag %s", p); *which |= (uint8_t)val; p = q; } cmd->opcode = opcode; cmd->len = (cmd->len & (F_NOT | F_OR)) | 1; cmd->arg1 = (set & 0xff) | ( (clear & 0xff) << 8); } void ipfw_delete(int ac, char *av[]) { uint32_t rulenum; int i; int exitval = EX_OK; int do_set = 0; av++; ac--; NEED1("missing rule specification"); if (ac > 0 && _substrcmp(*av, "set") == 0) { /* Do not allow using the following syntax: * ipfw set N delete set M */ if (co.use_set) errx(EX_DATAERR, "invalid syntax"); do_set = 1; /* delete set */ ac--; av++; } /* Rule number */ while (ac && isdigit(**av)) { i = atoi(*av); av++; ac--; if (co.do_nat) { exitval = do_cmd(IP_FW_NAT_DEL, &i, sizeof i); if (exitval) { exitval = EX_UNAVAILABLE; warn("rule %u not available", i); } } else if (co.do_pipe) { exitval = ipfw_delete_pipe(co.do_pipe, i); } else { if (co.use_set) rulenum = (i & 0xffff) | (5 << 24) | ((co.use_set - 1) << 16); else rulenum = (i & 0xffff) | (do_set << 24); i = do_cmd(IP_FW_DEL, &rulenum, sizeof rulenum); if (i) { exitval = EX_UNAVAILABLE; warn("rule %u: setsockopt(IP_FW_DEL)", rulenum); } } } if (exitval != EX_OK) exit(exitval); } /* * fill the interface structure. We do not check the name as we can * create interfaces dynamically, so checking them at insert time * makes relatively little sense. * Interface names containing '*', '?', or '[' are assumed to be shell * patterns which match interfaces. */ static void fill_iface(ipfw_insn_if *cmd, char *arg) { cmd->name[0] = '\0'; cmd->o.len |= F_INSN_SIZE(ipfw_insn_if); /* Parse the interface or address */ if (strcmp(arg, "any") == 0) cmd->o.len = 0; /* effectively ignore this command */ else if (!isdigit(*arg)) { strlcpy(cmd->name, arg, sizeof(cmd->name)); cmd->p.glob = strpbrk(arg, "*?[") != NULL ? 1 : 0; } else if (!inet_aton(arg, &cmd->p.ip)) errx(EX_DATAERR, "bad ip address ``%s''", arg); } static void get_mac_addr_mask(const char *p, uint8_t *addr, uint8_t *mask) { int i, l; char *ap, *ptr, *optr; struct ether_addr *mac; const char *macset = "0123456789abcdefABCDEF:"; if (strcmp(p, "any") == 0) { for (i = 0; i < ETHER_ADDR_LEN; i++) addr[i] = mask[i] = 0; return; } optr = ptr = strdup(p); if ((ap = strsep(&ptr, "&/")) != NULL && *ap != 0) { l = strlen(ap); if (strspn(ap, macset) != l || (mac = ether_aton(ap)) == NULL) errx(EX_DATAERR, "Incorrect MAC address"); bcopy(mac, addr, ETHER_ADDR_LEN); } else errx(EX_DATAERR, "Incorrect MAC address"); if (ptr != NULL) { /* we have mask? */ if (p[ptr - optr - 1] == '/') { /* mask len */ l = strtol(ptr, &ap, 10); if (*ap != 0 || l > ETHER_ADDR_LEN * 8 || l < 0) errx(EX_DATAERR, "Incorrect mask length"); for (i = 0; l > 0 && i < ETHER_ADDR_LEN; l -= 8, i++) mask[i] = (l >= 8) ? 0xff: (~0) << (8 - l); } else { /* mask */ l = strlen(ptr); if (strspn(ptr, macset) != l || (mac = ether_aton(ptr)) == NULL) errx(EX_DATAERR, "Incorrect mask"); bcopy(mac, mask, ETHER_ADDR_LEN); } } else { /* default mask: ff:ff:ff:ff:ff:ff */ for (i = 0; i < ETHER_ADDR_LEN; i++) mask[i] = 0xff; } for (i = 0; i < ETHER_ADDR_LEN; i++) addr[i] &= mask[i]; free(optr); } /* * helper function, updates the pointer to cmd with the length * of the current command, and also cleans up the first word of * the new command in case it has been clobbered before. */ static ipfw_insn * next_cmd(ipfw_insn *cmd) { cmd += F_LEN(cmd); bzero(cmd, sizeof(*cmd)); return cmd; } /* * Takes arguments and copies them into a comment */ static void fill_comment(ipfw_insn *cmd, int ac, char **av) { int i, l; char *p = (char *)(cmd + 1); cmd->opcode = O_NOP; cmd->len = (cmd->len & (F_NOT | F_OR)); /* Compute length of comment string. */ for (i = 0, l = 0; i < ac; i++) l += strlen(av[i]) + 1; if (l == 0) return; if (l > 84) errx(EX_DATAERR, "comment too long (max 80 chars)"); l = 1 + (l+3)/4; cmd->len = (cmd->len & (F_NOT | F_OR)) | l; for (i = 0; i < ac; i++) { strcpy(p, av[i]); p += strlen(av[i]); *p++ = ' '; } *(--p) = '\0'; } /* * A function to fill simple commands of size 1. * Existing flags are preserved. */ static void fill_cmd(ipfw_insn *cmd, enum ipfw_opcodes opcode, int flags, uint16_t arg) { cmd->opcode = opcode; cmd->len = ((cmd->len | flags) & (F_NOT | F_OR)) | 1; cmd->arg1 = arg; } /* * Fetch and add the MAC address and type, with masks. This generates one or * two microinstructions, and returns the pointer to the last one. */ static ipfw_insn * add_mac(ipfw_insn *cmd, int ac, char *av[]) { ipfw_insn_mac *mac; if (ac < 2) errx(EX_DATAERR, "MAC dst src"); cmd->opcode = O_MACADDR2; cmd->len = (cmd->len & (F_NOT | F_OR)) | F_INSN_SIZE(ipfw_insn_mac); mac = (ipfw_insn_mac *)cmd; get_mac_addr_mask(av[0], mac->addr, mac->mask); /* dst */ get_mac_addr_mask(av[1], &(mac->addr[ETHER_ADDR_LEN]), &(mac->mask[ETHER_ADDR_LEN])); /* src */ return cmd; } static ipfw_insn * add_mactype(ipfw_insn *cmd, int ac, char *av) { if (ac < 1) errx(EX_DATAERR, "missing MAC type"); if (strcmp(av, "any") != 0) { /* we have a non-null type */ fill_newports((ipfw_insn_u16 *)cmd, av, IPPROTO_ETHERTYPE); cmd->opcode = O_MAC_TYPE; return cmd; } else return NULL; } static ipfw_insn * add_proto0(ipfw_insn *cmd, char *av, u_char *protop) { struct protoent *pe; char *ep; int proto; proto = strtol(av, &ep, 10); if (*ep != '\0' || proto <= 0) { if ((pe = getprotobyname(av)) == NULL) return NULL; proto = pe->p_proto; } fill_cmd(cmd, O_PROTO, 0, proto); *protop = proto; return cmd; } static ipfw_insn * add_proto(ipfw_insn *cmd, char *av, u_char *protop) { u_char proto = IPPROTO_IP; if (_substrcmp(av, "all") == 0 || strcmp(av, "ip") == 0) ; /* do not set O_IP4 nor O_IP6 */ else if (strcmp(av, "ip4") == 0) /* explicit "just IPv4" rule */ fill_cmd(cmd, O_IP4, 0, 0); else if (strcmp(av, "ip6") == 0) { /* explicit "just IPv6" rule */ proto = IPPROTO_IPV6; fill_cmd(cmd, O_IP6, 0, 0); } else return add_proto0(cmd, av, protop); *protop = proto; return cmd; } static ipfw_insn * add_proto_compat(ipfw_insn *cmd, char *av, u_char *protop) { u_char proto = IPPROTO_IP; if (_substrcmp(av, "all") == 0 || strcmp(av, "ip") == 0) ; /* do not set O_IP4 nor O_IP6 */ else if (strcmp(av, "ipv4") == 0 || strcmp(av, "ip4") == 0) /* explicit "just IPv4" rule */ fill_cmd(cmd, O_IP4, 0, 0); else if (strcmp(av, "ipv6") == 0 || strcmp(av, "ip6") == 0) { /* explicit "just IPv6" rule */ proto = IPPROTO_IPV6; fill_cmd(cmd, O_IP6, 0, 0); } else return add_proto0(cmd, av, protop); *protop = proto; return cmd; } static ipfw_insn * add_srcip(ipfw_insn *cmd, char *av) { fill_ip((ipfw_insn_ip *)cmd, av); if (cmd->opcode == O_IP_DST_SET) /* set */ cmd->opcode = O_IP_SRC_SET; else if (cmd->opcode == O_IP_DST_LOOKUP) /* table */ cmd->opcode = O_IP_SRC_LOOKUP; else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn)) /* me */ cmd->opcode = O_IP_SRC_ME; else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn_u32)) /* one IP */ cmd->opcode = O_IP_SRC; else /* addr/mask */ cmd->opcode = O_IP_SRC_MASK; return cmd; } static ipfw_insn * add_dstip(ipfw_insn *cmd, char *av) { fill_ip((ipfw_insn_ip *)cmd, av); if (cmd->opcode == O_IP_DST_SET) /* set */ ; else if (cmd->opcode == O_IP_DST_LOOKUP) /* table */ ; else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn)) /* me */ cmd->opcode = O_IP_DST_ME; else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn_u32)) /* one IP */ cmd->opcode = O_IP_DST; else /* addr/mask */ cmd->opcode = O_IP_DST_MASK; return cmd; } static ipfw_insn * add_ports(ipfw_insn *cmd, char *av, u_char proto, int opcode) { if (_substrcmp(av, "any") == 0) { return NULL; } else if (fill_newports((ipfw_insn_u16 *)cmd, av, proto)) { /* XXX todo: check that we have a protocol with ports */ cmd->opcode = opcode; return cmd; } return NULL; } static ipfw_insn * add_src(ipfw_insn *cmd, char *av, u_char proto) { struct in6_addr a; char *host, *ch; ipfw_insn *ret = NULL; if ((host = strdup(av)) == NULL) return NULL; if ((ch = strrchr(host, '/')) != NULL) *ch = '\0'; if (proto == IPPROTO_IPV6 || strcmp(av, "me6") == 0 || inet_pton(AF_INET6, host, &a)) ret = add_srcip6(cmd, av); /* XXX: should check for IPv4, not !IPv6 */ if (ret == NULL && (proto == IPPROTO_IP || strcmp(av, "me") == 0 || !inet_pton(AF_INET6, host, &a))) ret = add_srcip(cmd, av); if (ret == NULL && strcmp(av, "any") != 0) ret = cmd; free(host); return ret; } static ipfw_insn * add_dst(ipfw_insn *cmd, char *av, u_char proto) { struct in6_addr a; char *host, *ch; ipfw_insn *ret = NULL; if ((host = strdup(av)) == NULL) return NULL; if ((ch = strrchr(host, '/')) != NULL) *ch = '\0'; if (proto == IPPROTO_IPV6 || strcmp(av, "me6") == 0 || inet_pton(AF_INET6, host, &a)) ret = add_dstip6(cmd, av); /* XXX: should check for IPv4, not !IPv6 */ if (ret == NULL && (proto == IPPROTO_IP || strcmp(av, "me") == 0 || !inet_pton(AF_INET6, host, &a))) ret = add_dstip(cmd, av); if (ret == NULL && strcmp(av, "any") != 0) ret = cmd; free(host); return ret; } /* * Parse arguments and assemble the microinstructions which make up a rule. * Rules are added into the 'rulebuf' and then copied in the correct order * into the actual rule. * * The syntax for a rule starts with the action, followed by * optional action parameters, and the various match patterns. * In the assembled microcode, the first opcode must be an O_PROBE_STATE * (generated if the rule includes a keep-state option), then the * various match patterns, log/altq actions, and the actual action. * */ void ipfw_add(int ac, char *av[]) { /* * rules are added into the 'rulebuf' and then copied in * the correct order into the actual rule. * Some things that need to go out of order (prob, action etc.) * go into actbuf[]. */ static uint32_t rulebuf[255], actbuf[255], cmdbuf[255]; ipfw_insn *src, *dst, *cmd, *action, *prev=NULL; ipfw_insn *first_cmd; /* first match pattern */ struct ip_fw *rule; /* * various flags used to record that we entered some fields. */ ipfw_insn *have_state = NULL; /* check-state or keep-state */ ipfw_insn *have_log = NULL, *have_altq = NULL, *have_tag = NULL; size_t len; int i; int open_par = 0; /* open parenthesis ( */ /* proto is here because it is used to fetch ports */ u_char proto = IPPROTO_IP; /* default protocol */ double match_prob = 1; /* match probability, default is always match */ bzero(actbuf, sizeof(actbuf)); /* actions go here */ bzero(cmdbuf, sizeof(cmdbuf)); bzero(rulebuf, sizeof(rulebuf)); rule = (struct ip_fw *)rulebuf; cmd = (ipfw_insn *)cmdbuf; action = (ipfw_insn *)actbuf; av++; ac--; /* [rule N] -- Rule number optional */ if (ac && isdigit(**av)) { rule->rulenum = atoi(*av); av++; ac--; } /* [set N] -- set number (0..RESVD_SET), optional */ if (ac > 1 && _substrcmp(*av, "set") == 0) { int set = strtoul(av[1], NULL, 10); if (set < 0 || set > RESVD_SET) errx(EX_DATAERR, "illegal set %s", av[1]); rule->set = set; av += 2; ac -= 2; } /* [prob D] -- match probability, optional */ if (ac > 1 && _substrcmp(*av, "prob") == 0) { match_prob = strtod(av[1], NULL); if (match_prob <= 0 || match_prob > 1) errx(EX_DATAERR, "illegal match prob. %s", av[1]); av += 2; ac -= 2; } /* action -- mandatory */ NEED1("missing action"); i = match_token(rule_actions, *av); ac--; av++; action->len = 1; /* default */ switch(i) { case TOK_CHECKSTATE: have_state = action; action->opcode = O_CHECK_STATE; break; case TOK_ACCEPT: action->opcode = O_ACCEPT; break; case TOK_DENY: action->opcode = O_DENY; action->arg1 = 0; break; case TOK_REJECT: action->opcode = O_REJECT; action->arg1 = ICMP_UNREACH_HOST; break; case TOK_RESET: action->opcode = O_REJECT; action->arg1 = ICMP_REJECT_RST; break; case TOK_RESET6: action->opcode = O_UNREACH6; action->arg1 = ICMP6_UNREACH_RST; break; case TOK_UNREACH: action->opcode = O_REJECT; NEED1("missing reject code"); fill_reject_code(&action->arg1, *av); ac--; av++; break; case TOK_UNREACH6: action->opcode = O_UNREACH6; NEED1("missing unreach code"); fill_unreach6_code(&action->arg1, *av); ac--; av++; break; case TOK_COUNT: action->opcode = O_COUNT; break; case TOK_NAT: action->opcode = O_NAT; action->len = F_INSN_SIZE(ipfw_insn_nat); goto chkarg; case TOK_QUEUE: action->opcode = O_QUEUE; goto chkarg; case TOK_PIPE: action->opcode = O_PIPE; goto chkarg; case TOK_SKIPTO: action->opcode = O_SKIPTO; goto chkarg; case TOK_NETGRAPH: action->opcode = O_NETGRAPH; goto chkarg; case TOK_NGTEE: action->opcode = O_NGTEE; goto chkarg; case TOK_DIVERT: action->opcode = O_DIVERT; goto chkarg; case TOK_TEE: action->opcode = O_TEE; chkarg: if (!ac) errx(EX_USAGE, "missing argument for %s", *(av - 1)); if (isdigit(**av)) { action->arg1 = strtoul(*av, NULL, 10); if (action->arg1 <= 0 || action->arg1 >= IP_FW_TABLEARG) errx(EX_DATAERR, "illegal argument for %s", *(av - 1)); } else if (_substrcmp(*av, "tablearg") == 0) { action->arg1 = IP_FW_TABLEARG; } else if (i == TOK_DIVERT || i == TOK_TEE) { struct servent *s; setservent(1); s = getservbyname(av[0], "divert"); if (s != NULL) action->arg1 = ntohs(s->s_port); else errx(EX_DATAERR, "illegal divert/tee port"); } else errx(EX_DATAERR, "illegal argument for %s", *(av - 1)); ac--; av++; break; case TOK_FORWARD: { ipfw_insn_sa *p = (ipfw_insn_sa *)action; char *s, *end; NEED1("missing forward address[:port]"); action->opcode = O_FORWARD_IP; action->len = F_INSN_SIZE(ipfw_insn_sa); /* * In the kernel we assume AF_INET and use only * sin_port and sin_addr. Remember to set sin_len as * the routing code seems to use it too. */ p->sa.sin_family = AF_INET; p->sa.sin_len = sizeof(struct sockaddr_in); p->sa.sin_port = 0; /* * locate the address-port separator (':' or ',') */ s = strchr(*av, ':'); if (s == NULL) s = strchr(*av, ','); if (s != NULL) { *(s++) = '\0'; i = strtoport(s, &end, 0 /* base */, 0 /* proto */); if (s == end) errx(EX_DATAERR, "illegal forwarding port ``%s''", s); p->sa.sin_port = (u_short)i; } if (_substrcmp(*av, "tablearg") == 0) p->sa.sin_addr.s_addr = INADDR_ANY; else lookup_host(*av, &(p->sa.sin_addr)); ac--; av++; break; } case TOK_COMMENT: /* pretend it is a 'count' rule followed by the comment */ action->opcode = O_COUNT; ac++; av--; /* go back... */ break; case TOK_SETFIB: { int numfibs; size_t intsize = sizeof(int); action->opcode = O_SETFIB; NEED1("missing fib number"); action->arg1 = strtoul(*av, NULL, 10); if (sysctlbyname("net.fibs", &numfibs, &intsize, NULL, 0) == -1) errx(EX_DATAERR, "fibs not suported.\n"); if (action->arg1 >= numfibs) /* Temporary */ errx(EX_DATAERR, "fib too large.\n"); ac--; av++; break; } case TOK_REASS: action->opcode = O_REASS; break; default: errx(EX_DATAERR, "invalid action %s\n", av[-1]); } action = next_cmd(action); /* * [altq queuename] -- altq tag, optional * [log [logamount N]] -- log, optional * * If they exist, it go first in the cmdbuf, but then it is * skipped in the copy section to the end of the buffer. */ while (ac != 0 && (i = match_token(rule_action_params, *av)) != -1) { ac--; av++; switch (i) { case TOK_LOG: { ipfw_insn_log *c = (ipfw_insn_log *)cmd; int l; if (have_log) errx(EX_DATAERR, "log cannot be specified more than once"); have_log = (ipfw_insn *)c; cmd->len = F_INSN_SIZE(ipfw_insn_log); cmd->opcode = O_LOG; if (ac && _substrcmp(*av, "logamount") == 0) { ac--; av++; NEED1("logamount requires argument"); l = atoi(*av); if (l < 0) errx(EX_DATAERR, "logamount must be positive"); c->max_log = l; ac--; av++; } else { len = sizeof(c->max_log); if (sysctlbyname("net.inet.ip.fw.verbose_limit", &c->max_log, &len, NULL, 0) == -1) errx(1, "sysctlbyname(\"%s\")", "net.inet.ip.fw.verbose_limit"); } } break; case TOK_ALTQ: { ipfw_insn_altq *a = (ipfw_insn_altq *)cmd; NEED1("missing altq queue name"); if (have_altq) errx(EX_DATAERR, "altq cannot be specified more than once"); have_altq = (ipfw_insn *)a; cmd->len = F_INSN_SIZE(ipfw_insn_altq); cmd->opcode = O_ALTQ; a->qid = altq_name_to_qid(*av); ac--; av++; } break; case TOK_TAG: case TOK_UNTAG: { uint16_t tag; if (have_tag) errx(EX_USAGE, "tag and untag cannot be " "specified more than once"); GET_UINT_ARG(tag, IPFW_ARG_MIN, IPFW_ARG_MAX, i, rule_action_params); have_tag = cmd; fill_cmd(cmd, O_TAG, (i == TOK_TAG) ? 0: F_NOT, tag); ac--; av++; break; } default: abort(); } cmd = next_cmd(cmd); } if (have_state) /* must be a check-state, we are done */ goto done; #define OR_START(target) \ if (ac && (*av[0] == '(' || *av[0] == '{')) { \ if (open_par) \ errx(EX_USAGE, "nested \"(\" not allowed\n"); \ prev = NULL; \ open_par = 1; \ if ( (av[0])[1] == '\0') { \ ac--; av++; \ } else \ (*av)++; \ } \ target: \ #define CLOSE_PAR \ if (open_par) { \ if (ac && ( \ strcmp(*av, ")") == 0 || \ strcmp(*av, "}") == 0)) { \ prev = NULL; \ open_par = 0; \ ac--; av++; \ } else \ errx(EX_USAGE, "missing \")\"\n"); \ } #define NOT_BLOCK \ if (ac && _substrcmp(*av, "not") == 0) { \ if (cmd->len & F_NOT) \ errx(EX_USAGE, "double \"not\" not allowed\n"); \ cmd->len |= F_NOT; \ ac--; av++; \ } #define OR_BLOCK(target) \ if (ac && _substrcmp(*av, "or") == 0) { \ if (prev == NULL || open_par == 0) \ errx(EX_DATAERR, "invalid OR block"); \ prev->len |= F_OR; \ ac--; av++; \ goto target; \ } \ CLOSE_PAR; first_cmd = cmd; #if 0 /* * MAC addresses, optional. * If we have this, we skip the part "proto from src to dst" * and jump straight to the option parsing. */ NOT_BLOCK; NEED1("missing protocol"); if (_substrcmp(*av, "MAC") == 0 || _substrcmp(*av, "mac") == 0) { ac--; av++; /* the "MAC" keyword */ add_mac(cmd, ac, av); /* exits in case of errors */ cmd = next_cmd(cmd); ac -= 2; av += 2; /* dst-mac and src-mac */ NOT_BLOCK; NEED1("missing mac type"); if (add_mactype(cmd, ac, av[0])) cmd = next_cmd(cmd); ac--; av++; /* any or mac-type */ goto read_options; } #endif /* * protocol, mandatory */ OR_START(get_proto); NOT_BLOCK; NEED1("missing protocol"); if (add_proto_compat(cmd, *av, &proto)) { av++; ac--; if (F_LEN(cmd) != 0) { prev = cmd; cmd = next_cmd(cmd); } } else if (first_cmd != cmd) { errx(EX_DATAERR, "invalid protocol ``%s''", *av); } else goto read_options; OR_BLOCK(get_proto); /* * "from", mandatory */ if (!ac || _substrcmp(*av, "from") != 0) errx(EX_USAGE, "missing ``from''"); ac--; av++; /* * source IP, mandatory */ OR_START(source_ip); NOT_BLOCK; /* optional "not" */ NEED1("missing source address"); if (add_src(cmd, *av, proto)) { ac--; av++; if (F_LEN(cmd) != 0) { /* ! any */ prev = cmd; cmd = next_cmd(cmd); } } else errx(EX_USAGE, "bad source address %s", *av); OR_BLOCK(source_ip); /* * source ports, optional */ NOT_BLOCK; /* optional "not" */ if (ac) { if (_substrcmp(*av, "any") == 0 || add_ports(cmd, *av, proto, O_IP_SRCPORT)) { ac--; av++; if (F_LEN(cmd) != 0) cmd = next_cmd(cmd); } } /* * "to", mandatory */ if (!ac || _substrcmp(*av, "to") != 0) errx(EX_USAGE, "missing ``to''"); av++; ac--; /* * destination, mandatory */ OR_START(dest_ip); NOT_BLOCK; /* optional "not" */ NEED1("missing dst address"); if (add_dst(cmd, *av, proto)) { ac--; av++; if (F_LEN(cmd) != 0) { /* ! any */ prev = cmd; cmd = next_cmd(cmd); } } else errx( EX_USAGE, "bad destination address %s", *av); OR_BLOCK(dest_ip); /* * dest. ports, optional */ NOT_BLOCK; /* optional "not" */ if (ac) { if (_substrcmp(*av, "any") == 0 || add_ports(cmd, *av, proto, O_IP_DSTPORT)) { ac--; av++; if (F_LEN(cmd) != 0) cmd = next_cmd(cmd); } } read_options: if (ac && first_cmd == cmd) { /* * nothing specified so far, store in the rule to ease * printout later. */ rule->_pad = 1; } prev = NULL; while (ac) { char *s; ipfw_insn_u32 *cmd32; /* alias for cmd */ s = *av; cmd32 = (ipfw_insn_u32 *)cmd; if (*s == '!') { /* alternate syntax for NOT */ if (cmd->len & F_NOT) errx(EX_USAGE, "double \"not\" not allowed\n"); cmd->len = F_NOT; s++; } i = match_token(rule_options, s); ac--; av++; switch(i) { case TOK_NOT: if (cmd->len & F_NOT) errx(EX_USAGE, "double \"not\" not allowed\n"); cmd->len = F_NOT; break; case TOK_OR: if (open_par == 0 || prev == NULL) errx(EX_USAGE, "invalid \"or\" block\n"); prev->len |= F_OR; break; case TOK_STARTBRACE: if (open_par) errx(EX_USAGE, "+nested \"(\" not allowed\n"); open_par = 1; break; case TOK_ENDBRACE: if (!open_par) errx(EX_USAGE, "+missing \")\"\n"); open_par = 0; prev = NULL; break; case TOK_IN: fill_cmd(cmd, O_IN, 0, 0); break; case TOK_OUT: cmd->len ^= F_NOT; /* toggle F_NOT */ fill_cmd(cmd, O_IN, 0, 0); break; case TOK_DIVERTED: fill_cmd(cmd, O_DIVERTED, 0, 3); break; case TOK_DIVERTEDLOOPBACK: fill_cmd(cmd, O_DIVERTED, 0, 1); break; case TOK_DIVERTEDOUTPUT: fill_cmd(cmd, O_DIVERTED, 0, 2); break; case TOK_FRAG: fill_cmd(cmd, O_FRAG, 0, 0); break; case TOK_LAYER2: fill_cmd(cmd, O_LAYER2, 0, 0); break; case TOK_XMIT: case TOK_RECV: case TOK_VIA: NEED1("recv, xmit, via require interface name" " or address"); fill_iface((ipfw_insn_if *)cmd, av[0]); ac--; av++; if (F_LEN(cmd) == 0) /* not a valid address */ break; if (i == TOK_XMIT) cmd->opcode = O_XMIT; else if (i == TOK_RECV) cmd->opcode = O_RECV; else if (i == TOK_VIA) cmd->opcode = O_VIA; break; case TOK_ICMPTYPES: NEED1("icmptypes requires list of types"); fill_icmptypes((ipfw_insn_u32 *)cmd, *av); av++; ac--; break; case TOK_ICMP6TYPES: NEED1("icmptypes requires list of types"); fill_icmp6types((ipfw_insn_icmp6 *)cmd, *av); av++; ac--; break; case TOK_IPTTL: NEED1("ipttl requires TTL"); if (strpbrk(*av, "-,")) { if (!add_ports(cmd, *av, 0, O_IPTTL)) errx(EX_DATAERR, "invalid ipttl %s", *av); } else fill_cmd(cmd, O_IPTTL, 0, strtoul(*av, NULL, 0)); ac--; av++; break; case TOK_IPID: NEED1("ipid requires id"); if (strpbrk(*av, "-,")) { if (!add_ports(cmd, *av, 0, O_IPID)) errx(EX_DATAERR, "invalid ipid %s", *av); } else fill_cmd(cmd, O_IPID, 0, strtoul(*av, NULL, 0)); ac--; av++; break; case TOK_IPLEN: NEED1("iplen requires length"); if (strpbrk(*av, "-,")) { if (!add_ports(cmd, *av, 0, O_IPLEN)) errx(EX_DATAERR, "invalid ip len %s", *av); } else fill_cmd(cmd, O_IPLEN, 0, strtoul(*av, NULL, 0)); ac--; av++; break; case TOK_IPVER: NEED1("ipver requires version"); fill_cmd(cmd, O_IPVER, 0, strtoul(*av, NULL, 0)); ac--; av++; break; case TOK_IPPRECEDENCE: NEED1("ipprecedence requires value"); fill_cmd(cmd, O_IPPRECEDENCE, 0, (strtoul(*av, NULL, 0) & 7) << 5); ac--; av++; break; case TOK_IPOPTS: NEED1("missing argument for ipoptions"); fill_flags(cmd, O_IPOPT, f_ipopts, *av); ac--; av++; break; case TOK_IPTOS: NEED1("missing argument for iptos"); fill_flags(cmd, O_IPTOS, f_iptos, *av); ac--; av++; break; case TOK_UID: NEED1("uid requires argument"); { char *end; uid_t uid; struct passwd *pwd; cmd->opcode = O_UID; uid = strtoul(*av, &end, 0); pwd = (*end == '\0') ? getpwuid(uid) : getpwnam(*av); if (pwd == NULL) errx(EX_DATAERR, "uid \"%s\" nonexistent", *av); cmd32->d[0] = pwd->pw_uid; cmd->len |= F_INSN_SIZE(ipfw_insn_u32); ac--; av++; } break; case TOK_GID: NEED1("gid requires argument"); { char *end; gid_t gid; struct group *grp; cmd->opcode = O_GID; gid = strtoul(*av, &end, 0); grp = (*end == '\0') ? getgrgid(gid) : getgrnam(*av); if (grp == NULL) errx(EX_DATAERR, "gid \"%s\" nonexistent", *av); cmd32->d[0] = grp->gr_gid; cmd->len |= F_INSN_SIZE(ipfw_insn_u32); ac--; av++; } break; case TOK_JAIL: NEED1("jail requires argument"); { char *end; int jid; cmd->opcode = O_JAIL; jid = (int)strtol(*av, &end, 0); if (jid < 0 || *end != '\0') errx(EX_DATAERR, "jail requires prison ID"); cmd32->d[0] = (uint32_t)jid; cmd->len |= F_INSN_SIZE(ipfw_insn_u32); ac--; av++; } break; case TOK_ESTAB: fill_cmd(cmd, O_ESTAB, 0, 0); break; case TOK_SETUP: fill_cmd(cmd, O_TCPFLAGS, 0, (TH_SYN) | ( (TH_ACK) & 0xff) <<8 ); break; case TOK_TCPDATALEN: NEED1("tcpdatalen requires length"); if (strpbrk(*av, "-,")) { if (!add_ports(cmd, *av, 0, O_TCPDATALEN)) errx(EX_DATAERR, "invalid tcpdata len %s", *av); } else fill_cmd(cmd, O_TCPDATALEN, 0, strtoul(*av, NULL, 0)); ac--; av++; break; case TOK_TCPOPTS: NEED1("missing argument for tcpoptions"); fill_flags(cmd, O_TCPOPTS, f_tcpopts, *av); ac--; av++; break; case TOK_TCPSEQ: case TOK_TCPACK: NEED1("tcpseq/tcpack requires argument"); cmd->len = F_INSN_SIZE(ipfw_insn_u32); cmd->opcode = (i == TOK_TCPSEQ) ? O_TCPSEQ : O_TCPACK; cmd32->d[0] = htonl(strtoul(*av, NULL, 0)); ac--; av++; break; case TOK_TCPWIN: NEED1("tcpwin requires length"); fill_cmd(cmd, O_TCPWIN, 0, htons(strtoul(*av, NULL, 0))); ac--; av++; break; case TOK_TCPFLAGS: NEED1("missing argument for tcpflags"); cmd->opcode = O_TCPFLAGS; fill_flags(cmd, O_TCPFLAGS, f_tcpflags, *av); ac--; av++; break; case TOK_KEEPSTATE: if (open_par) errx(EX_USAGE, "keep-state cannot be part " "of an or block"); if (have_state) errx(EX_USAGE, "only one of keep-state " "and limit is allowed"); have_state = cmd; fill_cmd(cmd, O_KEEP_STATE, 0, 0); break; case TOK_LIMIT: { ipfw_insn_limit *c = (ipfw_insn_limit *)cmd; int val; if (open_par) errx(EX_USAGE, "limit cannot be part of an or block"); if (have_state) errx(EX_USAGE, "only one of keep-state and " "limit is allowed"); have_state = cmd; cmd->len = F_INSN_SIZE(ipfw_insn_limit); cmd->opcode = O_LIMIT; c->limit_mask = c->conn_limit = 0; while (ac > 0) { if ((val = match_token(limit_masks, *av)) <= 0) break; c->limit_mask |= val; ac--; av++; } if (c->limit_mask == 0) errx(EX_USAGE, "limit: missing limit mask"); GET_UINT_ARG(c->conn_limit, IPFW_ARG_MIN, IPFW_ARG_MAX, TOK_LIMIT, rule_options); ac--; av++; break; } case TOK_PROTO: NEED1("missing protocol"); if (add_proto(cmd, *av, &proto)) { ac--; av++; } else errx(EX_DATAERR, "invalid protocol ``%s''", *av); break; case TOK_SRCIP: NEED1("missing source IP"); if (add_srcip(cmd, *av)) { ac--; av++; } break; case TOK_DSTIP: NEED1("missing destination IP"); if (add_dstip(cmd, *av)) { ac--; av++; } break; case TOK_SRCIP6: NEED1("missing source IP6"); if (add_srcip6(cmd, *av)) { ac--; av++; } break; case TOK_DSTIP6: NEED1("missing destination IP6"); if (add_dstip6(cmd, *av)) { ac--; av++; } break; case TOK_SRCPORT: NEED1("missing source port"); if (_substrcmp(*av, "any") == 0 || add_ports(cmd, *av, proto, O_IP_SRCPORT)) { ac--; av++; } else errx(EX_DATAERR, "invalid source port %s", *av); break; case TOK_DSTPORT: NEED1("missing destination port"); if (_substrcmp(*av, "any") == 0 || add_ports(cmd, *av, proto, O_IP_DSTPORT)) { ac--; av++; } else errx(EX_DATAERR, "invalid destination port %s", *av); break; case TOK_MAC: if (add_mac(cmd, ac, av)) { ac -= 2; av += 2; } break; case TOK_MACTYPE: NEED1("missing mac type"); if (!add_mactype(cmd, ac, *av)) errx(EX_DATAERR, "invalid mac type %s", *av); ac--; av++; break; case TOK_VERREVPATH: fill_cmd(cmd, O_VERREVPATH, 0, 0); break; case TOK_VERSRCREACH: fill_cmd(cmd, O_VERSRCREACH, 0, 0); break; case TOK_ANTISPOOF: fill_cmd(cmd, O_ANTISPOOF, 0, 0); break; case TOK_IPSEC: fill_cmd(cmd, O_IPSEC, 0, 0); break; case TOK_IPV6: fill_cmd(cmd, O_IP6, 0, 0); break; case TOK_IPV4: fill_cmd(cmd, O_IP4, 0, 0); break; case TOK_EXT6HDR: fill_ext6hdr( cmd, *av ); ac--; av++; break; case TOK_FLOWID: if (proto != IPPROTO_IPV6 ) errx( EX_USAGE, "flow-id filter is active " "only for ipv6 protocol\n"); fill_flow6( (ipfw_insn_u32 *) cmd, *av ); ac--; av++; break; case TOK_COMMENT: fill_comment(cmd, ac, av); av += ac; ac = 0; break; case TOK_TAGGED: if (ac > 0 && strpbrk(*av, "-,")) { if (!add_ports(cmd, *av, 0, O_TAGGED)) errx(EX_DATAERR, "tagged: invalid tag" " list: %s", *av); } else { uint16_t tag; GET_UINT_ARG(tag, IPFW_ARG_MIN, IPFW_ARG_MAX, TOK_TAGGED, rule_options); fill_cmd(cmd, O_TAGGED, 0, tag); } ac--; av++; break; case TOK_FIB: NEED1("fib requires fib number"); fill_cmd(cmd, O_FIB, 0, strtoul(*av, NULL, 0)); ac--; av++; break; case TOK_LOOKUP: { ipfw_insn_u32 *c = (ipfw_insn_u32 *)cmd; char *p; int j; if (ac < 2) errx(EX_USAGE, "format: lookup argument tablenum[,arg]"); cmd->opcode = O_IP_DST_LOOKUP; cmd->len |= F_INSN_SIZE(ipfw_insn) + 2; i = match_token(rule_options, *av); for (j = 0; lookup_key[j] ; j++) { if (i == lookup_key[j]) break; } if (lookup_key[j] == 0) errx(EX_USAGE, "format: cannot lookup on %s", *av); c->d[1] = j; // i converted to option ac--; av++; p = strchr(*av, ','); if (p) { *p++ = '\0'; c->d[0] = strtoul(p, NULL, 0); } else { c->d[0] = ~0; } cmd->arg1 = strtoul(*av, NULL, 0); ac--; av++; } break; default: errx(EX_USAGE, "unrecognised option [%d] %s\n", i, s); } if (F_LEN(cmd) > 0) { /* prepare to advance */ prev = cmd; cmd = next_cmd(cmd); } } done: /* * Now copy stuff into the rule. * If we have a keep-state option, the first instruction * must be a PROBE_STATE (which is generated here). * If we have a LOG option, it was stored as the first command, * and now must be moved to the top of the action part. */ dst = (ipfw_insn *)rule->cmd; /* * First thing to write into the command stream is the match probability. */ if (match_prob != 1) { /* 1 means always match */ dst->opcode = O_PROB; dst->len = 2; *((int32_t *)(dst+1)) = (int32_t)(match_prob * 0x7fffffff); dst += dst->len; } /* * generate O_PROBE_STATE if necessary */ if (have_state && have_state->opcode != O_CHECK_STATE) { fill_cmd(dst, O_PROBE_STATE, 0, 0); dst = next_cmd(dst); } /* copy all commands but O_LOG, O_KEEP_STATE, O_LIMIT, O_ALTQ, O_TAG */ for (src = (ipfw_insn *)cmdbuf; src != cmd; src += i) { i = F_LEN(src); switch (src->opcode) { case O_LOG: case O_KEEP_STATE: case O_LIMIT: case O_ALTQ: case O_TAG: break; default: bcopy(src, dst, i * sizeof(uint32_t)); dst += i; } } /* * put back the have_state command as last opcode */ if (have_state && have_state->opcode != O_CHECK_STATE) { i = F_LEN(have_state); bcopy(have_state, dst, i * sizeof(uint32_t)); dst += i; } /* * start action section */ rule->act_ofs = dst - rule->cmd; /* put back O_LOG, O_ALTQ, O_TAG if necessary */ if (have_log) { i = F_LEN(have_log); bcopy(have_log, dst, i * sizeof(uint32_t)); dst += i; } if (have_altq) { i = F_LEN(have_altq); bcopy(have_altq, dst, i * sizeof(uint32_t)); dst += i; } if (have_tag) { i = F_LEN(have_tag); bcopy(have_tag, dst, i * sizeof(uint32_t)); dst += i; } /* * copy all other actions */ for (src = (ipfw_insn *)actbuf; src != action; src += i) { i = F_LEN(src); bcopy(src, dst, i * sizeof(uint32_t)); dst += i; } rule->cmd_len = (uint32_t *)dst - (uint32_t *)(rule->cmd); i = (char *)dst - (char *)rule; if (do_cmd(IP_FW_ADD, rule, (uintptr_t)&i) == -1) err(EX_UNAVAILABLE, "getsockopt(%s)", "IP_FW_ADD"); if (!co.do_quiet) show_ipfw(rule, 0, 0); } /* * clear the counters or the log counters. */ void ipfw_zero(int ac, char *av[], int optname /* 0 = IP_FW_ZERO, 1 = IP_FW_RESETLOG */) { uint32_t arg, saved_arg; int failed = EX_OK; char const *errstr; char const *name = optname ? "RESETLOG" : "ZERO"; optname = optname ? IP_FW_RESETLOG : IP_FW_ZERO; av++; ac--; if (!ac) { /* clear all entries */ if (do_cmd(optname, NULL, 0) < 0) err(EX_UNAVAILABLE, "setsockopt(IP_FW_%s)", name); if (!co.do_quiet) printf("%s.\n", optname == IP_FW_ZERO ? "Accounting cleared":"Logging counts reset"); return; } while (ac) { /* Rule number */ if (isdigit(**av)) { arg = strtonum(*av, 0, 0xffff, &errstr); if (errstr) errx(EX_DATAERR, "invalid rule number %s\n", *av); saved_arg = arg; if (co.use_set) arg |= (1 << 24) | ((co.use_set - 1) << 16); av++; ac--; if (do_cmd(optname, &arg, sizeof(arg))) { warn("rule %u: setsockopt(IP_FW_%s)", saved_arg, name); failed = EX_UNAVAILABLE; } else if (!co.do_quiet) printf("Entry %d %s.\n", saved_arg, optname == IP_FW_ZERO ? "cleared" : "logging count reset"); } else { errx(EX_USAGE, "invalid rule number ``%s''", *av); } } if (failed != EX_OK) exit(failed); } void ipfw_flush(int force) { int cmd = co.do_pipe ? IP_DUMMYNET_FLUSH : IP_FW_FLUSH; if (!force && !co.do_quiet) { /* need to ask user */ int c; printf("Are you sure? [yn] "); fflush(stdout); do { c = toupper(getc(stdin)); while (c != '\n' && getc(stdin) != '\n') if (feof(stdin)) return; /* and do not flush */ } while (c != 'Y' && c != 'N'); printf("\n"); if (c == 'N') /* user said no */ return; } /* `ipfw set N flush` - is the same that `ipfw delete set N` */ if (co.use_set) { uint32_t arg = ((co.use_set - 1) & 0xffff) | (1 << 24); if (do_cmd(IP_FW_DEL, &arg, sizeof(arg)) < 0) err(EX_UNAVAILABLE, "setsockopt(IP_FW_DEL)"); } else if (do_cmd(cmd, NULL, 0) < 0) err(EX_UNAVAILABLE, "setsockopt(IP_%s_FLUSH)", co.do_pipe ? "DUMMYNET" : "FW"); if (!co.do_quiet) printf("Flushed all %s.\n", co.do_pipe ? "pipes" : "rules"); } static void table_list(ipfw_table_entry ent, int need_header); /* * This one handles all table-related commands * ipfw table N add addr[/masklen] [value] * ipfw table N delete addr[/masklen] * ipfw table {N | all} flush * ipfw table {N | all} list */ void ipfw_table_handler(int ac, char *av[]) { ipfw_table_entry ent; int do_add; int is_all; size_t len; char *p; uint32_t a; uint32_t tables_max; len = sizeof(tables_max); if (sysctlbyname("net.inet.ip.fw.tables_max", &tables_max, &len, NULL, 0) == -1) { #ifdef IPFW_TABLES_MAX warn("Warn: Failed to get the max tables number via sysctl. " "Using the compiled in defaults. \nThe reason was"); tables_max = IPFW_TABLES_MAX; #else errx(1, "Failed sysctlbyname(\"net.inet.ip.fw.tables_max\")"); #endif } ac--; av++; if (ac && isdigit(**av)) { ent.tbl = atoi(*av); is_all = 0; ac--; av++; } else if (ac && _substrcmp(*av, "all") == 0) { ent.tbl = 0; is_all = 1; ac--; av++; } else errx(EX_USAGE, "table number or 'all' keyword required"); if (ent.tbl >= tables_max) errx(EX_USAGE, "The table number exceeds the maximum allowed " "value (%d)", tables_max - 1); NEED1("table needs command"); if (is_all && _substrcmp(*av, "list") != 0 && _substrcmp(*av, "flush") != 0) errx(EX_USAGE, "table number required"); if (_substrcmp(*av, "add") == 0 || _substrcmp(*av, "delete") == 0) { do_add = **av == 'a'; ac--; av++; if (!ac) errx(EX_USAGE, "IP address required"); p = strchr(*av, '/'); if (p) { *p++ = '\0'; ent.masklen = atoi(p); if (ent.masklen > 32) errx(EX_DATAERR, "bad width ``%s''", p); } else ent.masklen = 32; if (lookup_host(*av, (struct in_addr *)&ent.addr) != 0) errx(EX_NOHOST, "hostname ``%s'' unknown", *av); ac--; av++; if (do_add && ac) { unsigned int tval; /* isdigit is a bit of a hack here.. */ if (strchr(*av, (int)'.') == NULL && isdigit(**av)) { ent.value = strtoul(*av, NULL, 0); } else { if (lookup_host(*av, (struct in_addr *)&tval) == 0) { /* The value must be stored in host order * * so that the values < 65k can be distinguished */ ent.value = ntohl(tval); } else { errx(EX_NOHOST, "hostname ``%s'' unknown", *av); } } } else ent.value = 0; if (do_cmd(do_add ? IP_FW_TABLE_ADD : IP_FW_TABLE_DEL, &ent, sizeof(ent)) < 0) { /* If running silent, don't bomb out on these errors. */ if (!(co.do_quiet && (errno == (do_add ? EEXIST : ESRCH)))) err(EX_OSERR, "setsockopt(IP_FW_TABLE_%s)", do_add ? "ADD" : "DEL"); /* In silent mode, react to a failed add by deleting */ if (do_add) { do_cmd(IP_FW_TABLE_DEL, &ent, sizeof(ent)); if (do_cmd(IP_FW_TABLE_ADD, &ent, sizeof(ent)) < 0) err(EX_OSERR, "setsockopt(IP_FW_TABLE_ADD)"); } } } else if (_substrcmp(*av, "flush") == 0) { a = is_all ? tables_max : (ent.tbl + 1); do { if (do_cmd(IP_FW_TABLE_FLUSH, &ent.tbl, sizeof(ent.tbl)) < 0) err(EX_OSERR, "setsockopt(IP_FW_TABLE_FLUSH)"); } while (++ent.tbl < a); } else if (_substrcmp(*av, "list") == 0) { a = is_all ? tables_max : (ent.tbl + 1); do { table_list(ent, is_all); } while (++ent.tbl < a); } else errx(EX_USAGE, "invalid table command %s", *av); } static void table_list(ipfw_table_entry ent, int need_header) { ipfw_table *tbl; socklen_t l; uint32_t a; a = ent.tbl; l = sizeof(a); if (do_cmd(IP_FW_TABLE_GETSIZE, &a, (uintptr_t)&l) < 0) err(EX_OSERR, "getsockopt(IP_FW_TABLE_GETSIZE)"); /* If a is zero we have nothing to do, the table is empty. */ if (a == 0) return; l = sizeof(*tbl) + a * sizeof(ipfw_table_entry); tbl = safe_calloc(1, l); tbl->tbl = ent.tbl; if (do_cmd(IP_FW_TABLE_LIST, tbl, (uintptr_t)&l) < 0) err(EX_OSERR, "getsockopt(IP_FW_TABLE_LIST)"); if (tbl->cnt && need_header) printf("---table(%d)---\n", tbl->tbl); for (a = 0; a < tbl->cnt; a++) { unsigned int tval; tval = tbl->ent[a].value; if (co.do_value_as_ip) { char tbuf[128]; strncpy(tbuf, inet_ntoa(*(struct in_addr *) &tbl->ent[a].addr), 127); /* inet_ntoa expects network order */ tval = htonl(tval); printf("%s/%u %s\n", tbuf, tbl->ent[a].masklen, inet_ntoa(*(struct in_addr *)&tval)); } else { printf("%s/%u %u\n", inet_ntoa(*(struct in_addr *)&tbl->ent[a].addr), tbl->ent[a].masklen, tval); } } free(tbl); } ipfw_mod/ipfw/Makefile000644 000423 000000 00000002455 11310145556 015534 0ustar00luigiwheel000000 000000 # # $Id: Makefile 4420 2009-12-10 10:07:37Z marta $ # # GNUMakefile to build the userland part of ipfw on Linux # # enable extra debugging information # Do not set with = or := so we can inherit from the caller $(warning Building userland ipfw for $(VER)) EXTRA_CFLAGS += -O1 EXTRA_CFLAGS += -Wall -Werror EXTRA_CFLAGS += -include ../glue.h EXTRA_CFLAGS += -I ./include ifneq ($(VER),openwrt) OSARCH := $(shell uname) ifeq ($(OSARCH),Linux) EXTRA_CFLAGS += -D__BSD_VISIBLE else HAVE_NAT := $(shell grep O_NAT /usr/include/netinet/ip_fw.h) # EXTRA_CFLAGS += ... endif endif # !openwrt CFLAGS += $(EXTRA_CFLAGS) # Location of OS headers and libraries. After our stuff. USRDIR?= /usr CFLAGS += -I$(USRDIR)/include LDFLAGS += -L$(USRDIR)/lib OBJS = ipfw2.o dummynet.o main.o ipv6.o altq.o qsort_r.o OBJS += expand_number.o humanize_number.o ifneq ($(HAVE_NAT),) OBJS += nat.o EXTRA_CFLAGS += -DHAVE_NAT endif OBJS += glue.o all: ipfw echo "VER is $(VER)" ipfw: $(OBJS) $(CC) $(LDFLAGS) -o $@ $^ $(OBJS) : ipfw2.h ../glue.h include/netinet include/netinet: -@rm -rf include/netinet -@mkdir -p include/netinet -(cd include/netinet; \ for i in ip_fw.h ip_dummynet.h tcp.h; do \ ln -s ../../../dummynet/include/netinet/$$i; done; ) clean distclean: -rm -f $(OBJS) ipfw -rm -rf include/netinet/ ipfw_mod/ipfw/glue.c000644 000423 000000 00000011651 11311261630 015163 0ustar00luigiwheel000000 000000 /* * Copyright (C) 2009 Luigi Rizzo, Marta Carbone, Universita` di Pisa * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * $Id: glue.c 4469 2009-12-11 20:23:11Z marta $ * * Userland functions missing in linux */ #include #include #include #ifndef HAVE_NAT /* dummy nat functions */ void ipfw_show_nat(int ac, char **av) { fprintf(stderr, "%s unsupported\n", __FUNCTION__); } void ipfw_config_nat(int ac, char **av) { fprintf(stderr, "%s unsupported\n", __FUNCTION__); } #endif #ifdef __linux__ int optreset; /* missing in linux */ #endif #if defined( __linux__ ) || defined(_WIN32) /* * not implemented in linux. * taken from /usr/src/lib/libc/string/strlcpy.c */ size_t strlcpy(char *dst, const char *src, size_t siz) { char *d = dst; const char *s = src; size_t n = siz; /* Copy as many bytes as will fit */ if (n != 0 && --n != 0) { do { if ((*d++ = *s++) == 0) break; } while (--n != 0); } /* Not enough room in dst, add NUL and traverse rest of src */ if (n == 0) { if (siz != 0) *d = '\0'; /* NUL-terminate dst */ while (*s++) ; } return(s - src - 1); /* count does not include NUL */ } /* missing in linux and windows */ long long int strtonum(const char *nptr, long long minval, long long maxval, const char **errstr) { return strtoll(nptr, (char **)errstr, 0); } /* * set or get system information * XXX lock acquisition/serialize calls * * we export this as sys/module/ipfw_mod/parameters/___ * This function get or/and set the value of the sysctl passed by * the name parameter. If the old value is not desired, * oldp and oldlenp should be set to NULL. * * XXX * I do not know how this works in FreeBSD in the case * where there are no write permission on the sysctl var. * We read the value and set return variables in any way * but returns -1 on write failures, regardless the * read success. * * Since there is no information on types, in the following * code we assume a lenght of 4 is a int. * * Returns 0 on success, -1 on errors. */ int sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { FILE *fp; char *basename = "/sys/module/ipfw_mod/parameters/"; char filename[256]; /* full filename */ char *varp; int ret = 0; /* return value */ int d; /* debug message */ if (0) fprintf(stderr, "%s name %s oldp %p oldlenp %p %d newp %p newlen %d\n", __FUNCTION__, name, \ oldp, oldlenp, oldlenp ? *oldlenp : -1 , newp, (int) newlen); if (name == NULL) /* XXX set errno */ return -1; /* locate the filename */ varp = strrchr(name, '.'); if (varp == NULL) /* XXX set errno */ return -1; snprintf(filename, sizeof(filename), "%s%s", basename, varp+1); /* * XXX we could open the file here, in rw mode * but need to check if a file have write * permissions. */ /* check parameters */ if (oldp && oldlenp) { /* read mode */ fp = fopen(filename, "r"); if (fp == NULL) { fprintf(stderr, "%s fopen error reading filename %s\n", __FUNCTION__, filename); return -1; } if (*oldlenp == 4) { if (fscanf(fp, "%d", &d) == 1) memcpy(oldp, &d, *oldlenp); else ret = -1; } fclose(fp); } if (newp && newlen) { /* write */ fp = fopen(filename, "w"); if (fp == NULL) { fprintf(stderr, "%s fopen error writing filename %s\n", __FUNCTION__, filename); return -1; } if (newlen == 4) { if (fprintf(fp, "%d", *(int*)newp) < 1) ret = -1; } fclose(fp); } return ret; } #endif /* __linux__ || _WIN32 */ ipfw_mod/ipfw/ipfw2.h000644 000423 000000 00000015154 11310017562 015267 0ustar00luigiwheel000000 000000 /* * Copyright (c) 2002-2003 Luigi Rizzo * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp * Copyright (c) 1994 Ugen J.S.Antsilevich * * Idea and grammar partially left from: * Copyright (c) 1993 Daniel Boulet * * Redistribution and use in source forms, with and without modification, * are permitted provided that this entire comment appears intact. * * Redistribution in binary form may occur without any restrictions. * Obviously, it would be nice if you gave credit where credit is due * but requiring it would be too onerous. * * This software is provided ``AS IS'' without any warranties of any kind. * * NEW command line interface for IP firewall facility * * $FreeBSD: head/sbin/ipfw/ipfw2.h 187983 2009-02-01 16:00:49Z luigi $ */ /* * Options that can be set on the command line. * When reading commands from a file, a subset of the options can also * be applied globally by specifying them before the file name. * After that, each line can contain its own option that changes * the global value. * XXX The context is not restored after each line. */ struct cmdline_opts { /* boolean options: */ int do_value_as_ip; /* show table value as IP */ int do_resolv; /* try to resolve all ip to names */ int do_time; /* Show time stamps */ int do_quiet; /* Be quiet in add and flush */ int do_pipe; /* this cmd refers to a pipe */ int do_nat; /* this cmd refers to a nat config */ int do_dynamic; /* display dynamic rules */ int do_expired; /* display expired dynamic rules */ int do_compact; /* show rules in compact mode */ int do_force; /* do not ask for confirmation */ int show_sets; /* display the set each rule belongs to */ int test_only; /* only check syntax */ int comment_only; /* only print action and comment */ int verbose; /* be verbose on some commands */ /* The options below can have multiple values. */ int do_sort; /* field to sort results (0 = no) */ /* valid fields are 1 and above */ int use_set; /* work with specified set number */ /* 0 means all sets, otherwise apply to set use_set - 1 */ }; extern struct cmdline_opts co; /* * _s_x is a structure that stores a string <-> token pairs, used in * various places in the parser. Entries are stored in arrays, * with an entry with s=NULL as terminator. * The search routines are match_token() and match_value(). * Often, an element with x=0 contains an error string. * */ struct _s_x { char const *s; int x; }; enum tokens { TOK_NULL=0, TOK_OR, TOK_NOT, TOK_STARTBRACE, TOK_ENDBRACE, TOK_ACCEPT, TOK_COUNT, TOK_PIPE, TOK_QUEUE, TOK_DIVERT, TOK_TEE, TOK_NETGRAPH, TOK_NGTEE, TOK_FORWARD, TOK_SKIPTO, TOK_DENY, TOK_REJECT, TOK_RESET, TOK_UNREACH, TOK_CHECKSTATE, TOK_NAT, TOK_REASS, TOK_ALTQ, TOK_LOG, TOK_TAG, TOK_UNTAG, TOK_TAGGED, TOK_UID, TOK_GID, TOK_JAIL, TOK_IN, TOK_LIMIT, TOK_KEEPSTATE, TOK_LAYER2, TOK_OUT, TOK_DIVERTED, TOK_DIVERTEDLOOPBACK, TOK_DIVERTEDOUTPUT, TOK_XMIT, TOK_RECV, TOK_VIA, TOK_FRAG, TOK_IPOPTS, TOK_IPLEN, TOK_IPID, TOK_IPPRECEDENCE, TOK_IPTOS, TOK_IPTTL, TOK_IPVER, TOK_ESTAB, TOK_SETUP, TOK_TCPDATALEN, TOK_TCPFLAGS, TOK_TCPOPTS, TOK_TCPSEQ, TOK_TCPACK, TOK_TCPWIN, TOK_ICMPTYPES, TOK_MAC, TOK_MACTYPE, TOK_VERREVPATH, TOK_VERSRCREACH, TOK_ANTISPOOF, TOK_IPSEC, TOK_COMMENT, TOK_PLR, TOK_NOERROR, TOK_BUCKETS, TOK_DSTIP, TOK_SRCIP, TOK_DSTPORT, TOK_SRCPORT, TOK_ALL, TOK_MASK, TOK_BW, TOK_DELAY, TOK_PIPE_PROFILE, TOK_BURST, TOK_RED, TOK_GRED, TOK_DROPTAIL, TOK_PROTO, TOK_WEIGHT, TOK_IP, TOK_IF, TOK_ALOG, TOK_DENY_INC, TOK_SAME_PORTS, TOK_UNREG_ONLY, TOK_RESET_ADDR, TOK_ALIAS_REV, TOK_PROXY_ONLY, TOK_REDIR_ADDR, TOK_REDIR_PORT, TOK_REDIR_PROTO, TOK_IPV6, TOK_FLOWID, TOK_ICMP6TYPES, TOK_EXT6HDR, TOK_DSTIP6, TOK_SRCIP6, TOK_IPV4, TOK_UNREACH6, TOK_RESET6, TOK_FIB, TOK_SETFIB, TOK_LOOKUP, }; /* * the following macro returns an error message if we run out of * arguments. */ #define NEED1(msg) {if (!ac) errx(EX_USAGE, msg);} unsigned long long align_uint64(const uint64_t *pll); /* memory allocation support */ void *safe_calloc(size_t number, size_t size); void *safe_realloc(void *ptr, size_t size); /* string comparison functions used for historical compatibility */ int _substrcmp(const char *str1, const char* str2); int _substrcmp2(const char *str1, const char* str2, const char* str3); /* utility functions */ int match_token(struct _s_x *table, char *string); char const *match_value(struct _s_x *p, int value); int do_cmd(int optname, void *optval, uintptr_t optlen); struct in6_addr; void n2mask(struct in6_addr *mask, int n); int contigmask(uint8_t *p, int len); /* * Forward declarations to avoid include way too many headers. * C does not allow duplicated typedefs, so we use the base struct * that the typedef points to. * Should the typedefs use a different type, the compiler will * still detect the change when compiling the body of the * functions involved, so we do not lose error checking. */ struct _ipfw_insn; struct _ipfw_insn_altq; struct _ipfw_insn_u32; struct _ipfw_insn_ip6; struct _ipfw_insn_icmp6; /* * The reserved set numer. This is a constant in ip_fw.h * but we store it in a variable so other files do not depend * in that header just for one constant. */ extern int resvd_set_number; /* first-level command handlers */ void ipfw_add(int ac, char *av[]); void ipfw_show_nat(int ac, char **av); void ipfw_config_pipe(int ac, char **av); void ipfw_config_nat(int ac, char **av); void ipfw_sets_handler(int ac, char *av[]); void ipfw_table_handler(int ac, char *av[]); void ipfw_sysctl_handler(int ac, char *av[], int which); void ipfw_delete(int ac, char *av[]); void ipfw_flush(int force); void ipfw_zero(int ac, char *av[], int optname); void ipfw_list(int ac, char *av[], int show_counters); /* altq.c */ void altq_set_enabled(int enabled); u_int32_t altq_name_to_qid(const char *name); void print_altq_cmd(struct _ipfw_insn_altq *altqptr); /* dummynet.c */ void ipfw_list_pipes(void *data, uint nbytes, int ac, char *av[]); int ipfw_delete_pipe(int pipe_or_queue, int n); /* ipv6.c */ void print_unreach6_code(uint16_t code); void print_ip6(struct _ipfw_insn_ip6 *cmd, char const *s); void print_flow6id(struct _ipfw_insn_u32 *cmd); void print_icmp6types(struct _ipfw_insn_u32 *cmd); void print_ext6hdr(struct _ipfw_insn *cmd ); struct _ipfw_insn *add_srcip6(struct _ipfw_insn *cmd, char *av); struct _ipfw_insn *add_dstip6(struct _ipfw_insn *cmd, char *av); void fill_flow6(struct _ipfw_insn_u32 *cmd, char *av ); void fill_unreach6_code(u_short *codep, char *str); void fill_icmp6types(struct _ipfw_insn_icmp6 *cmd, char *av); int fill_ext6hdr(struct _ipfw_insn *cmd, char *av); ipfw_mod/ipfw/qsort.c000644 000423 000000 00000012532 11236763321 015410 0ustar00luigiwheel000000 000000 /*- * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)qsort.c 8.1 (Berkeley) 6/4/93"; #endif /* LIBC_SCCS and not lint */ #include __FBSDID("$FreeBSD: src/lib/libc/stdlib/qsort.c,v 1.15 2008/01/14 09:21:34 das Exp $"); #include #ifdef I_AM_QSORT_R typedef int cmp_t(void *, const void *, const void *); #else typedef int cmp_t(const void *, const void *); #endif static inline char *med3(char *, char *, char *, cmp_t *, void *); static inline void swapfunc(char *, char *, int, int); #define min(a, b) (a) < (b) ? a : b /* * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function". */ #define swapcode(TYPE, parmi, parmj, n) { \ long i = (n) / sizeof (TYPE); \ TYPE *pi = (TYPE *) (parmi); \ TYPE *pj = (TYPE *) (parmj); \ do { \ TYPE t = *pi; \ *pi++ = *pj; \ *pj++ = t; \ } while (--i > 0); \ } #define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \ es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1; static inline void swapfunc(a, b, n, swaptype) char *a, *b; int n, swaptype; { if(swaptype <= 1) swapcode(long, a, b, n) else swapcode(char, a, b, n) } #define swap(a, b) \ if (swaptype == 0) { \ long t = *(long *)(a); \ *(long *)(a) = *(long *)(b); \ *(long *)(b) = t; \ } else \ swapfunc(a, b, es, swaptype) #define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype) #ifdef I_AM_QSORT_R #define CMP(t, x, y) (cmp((t), (x), (y))) #else #define CMP(t, x, y) (cmp((x), (y))) #endif static inline char * med3(char *a, char *b, char *c, cmp_t *cmp, void *thunk #ifndef I_AM_QSORT_R __unused #endif ) { return CMP(thunk, a, b) < 0 ? (CMP(thunk, b, c) < 0 ? b : (CMP(thunk, a, c) < 0 ? c : a )) :(CMP(thunk, b, c) > 0 ? b : (CMP(thunk, a, c) < 0 ? a : c )); } #ifdef I_AM_QSORT_R void qsort_r(void *a, size_t n, size_t es, void *thunk, cmp_t *cmp) #else #define thunk NULL void qsort(void *a, size_t n, size_t es, cmp_t *cmp) #endif { char *pa, *pb, *pc, *pd, *pl, *pm, *pn; size_t d, r; int cmp_result; int swaptype, swap_cnt; loop: SWAPINIT(a, es); swap_cnt = 0; if (n < 7) { for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es) for (pl = pm; pl > (char *)a && CMP(thunk, pl - es, pl) > 0; pl -= es) swap(pl, pl - es); return; } pm = (char *)a + (n / 2) * es; if (n > 7) { pl = a; pn = (char *)a + (n - 1) * es; if (n > 40) { d = (n / 8) * es; pl = med3(pl, pl + d, pl + 2 * d, cmp, thunk); pm = med3(pm - d, pm, pm + d, cmp, thunk); pn = med3(pn - 2 * d, pn - d, pn, cmp, thunk); } pm = med3(pl, pm, pn, cmp, thunk); } swap(a, pm); pa = pb = (char *)a + es; pc = pd = (char *)a + (n - 1) * es; for (;;) { while (pb <= pc && (cmp_result = CMP(thunk, pb, a)) <= 0) { if (cmp_result == 0) { swap_cnt = 1; swap(pa, pb); pa += es; } pb += es; } while (pb <= pc && (cmp_result = CMP(thunk, pc, a)) >= 0) { if (cmp_result == 0) { swap_cnt = 1; swap(pc, pd); pd -= es; } pc -= es; } if (pb > pc) break; swap(pb, pc); swap_cnt = 1; pb += es; pc -= es; } if (swap_cnt == 0) { /* Switch to insertion sort */ for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es) for (pl = pm; pl > (char *)a && CMP(thunk, pl - es, pl) > 0; pl -= es) swap(pl, pl - es); return; } pn = (char *)a + n * es; r = min(pa - (char *)a, pb - pa); vecswap(a, pb - r, r); r = min(pd - pc, pn - pd - es); vecswap(pb, pn - r, r); if ((r = pb - pa) > es) #ifdef I_AM_QSORT_R qsort_r(a, r / es, es, thunk, cmp); #else qsort(a, r / es, es, cmp); #endif if ((r = pd - pc) > es) { /* Iterate rather than recurse to save stack space */ a = pn - r; n = r / es; goto loop; } /* qsort(pn - r, r / es, es, cmp);*/ } ipfw_mod/ipfw/qsort_r.c000644 000423 000000 00000000331 11236763321 015723 0ustar00luigiwheel000000 000000 /* * This file is in the public domain. Originally written by Garrett * A. Wollman. * * $FreeBSD: src/lib/libc/stdlib/qsort_r.c,v 1.1 2002/09/10 02:04:49 wollman Exp $ */ #define I_AM_QSORT_R #include "qsort.c" ipfw_mod/ipfw/humanize_number.c000644 000423 000000 00000011152 11305761377 017433 0ustar00luigiwheel000000 000000 /* $NetBSD: humanize_number.c,v 1.13 2007/12/14 17:26:19 christos Exp $ */ /* * Copyright (c) 1997, 1998, 1999, 2002 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center, by Luke Mewburn and by Tomas Svensson. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the NetBSD * Foundation, Inc. and its contributors. * 4. Neither the name of The NetBSD Foundation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ // #include __FBSDID("$FreeBSD: src/lib/libutil/humanize_number.c,v 1.2.10.1 2008/04/20 16:29:01 antoine Exp $"); #include #include #include #include #include #include // #include //#include int humanize_number(char *buf, size_t len, int64_t bytes, const char *suffix, int scale, int flags) { const char *prefixes, *sep; int b, i, r, maxscale, s1, s2, sign; int64_t divisor, max; size_t baselen; assert(buf != NULL); assert(suffix != NULL); assert(scale >= 0); if (flags & HN_DIVISOR_1000) { /* SI for decimal multiplies */ divisor = 1000; if (flags & HN_B) prefixes = "B\0k\0M\0G\0T\0P\0E"; else prefixes = "\0\0k\0M\0G\0T\0P\0E"; } else { /* * binary multiplies * XXX IEC 60027-2 recommends Ki, Mi, Gi... */ divisor = 1024; if (flags & HN_B) prefixes = "B\0K\0M\0G\0T\0P\0E"; else prefixes = "\0\0K\0M\0G\0T\0P\0E"; } #define SCALE2PREFIX(scale) (&prefixes[(scale) << 1]) maxscale = 7; if (scale >= maxscale && (scale & (HN_AUTOSCALE | HN_GETSCALE)) == 0) return (-1); if (buf == NULL || suffix == NULL) return (-1); if (len > 0) buf[0] = '\0'; if (bytes < 0) { sign = -1; bytes *= -100; baselen = 3; /* sign, digit, prefix */ } else { sign = 1; bytes *= 100; baselen = 2; /* digit, prefix */ } if (flags & HN_NOSPACE) sep = ""; else { sep = " "; baselen++; } baselen += strlen(suffix); /* Check if enough room for `x y' + suffix + `\0' */ if (len < baselen + 1) return (-1); if (scale & (HN_AUTOSCALE | HN_GETSCALE)) { /* See if there is additional columns can be used. */ for (max = 100, i = len - baselen; i-- > 0;) max *= 10; /* * Divide the number until it fits the given column. * If there will be an overflow by the rounding below, * divide once more. */ for (i = 0; bytes >= max - 50 && i < maxscale; i++) bytes /= divisor; if (scale & HN_GETSCALE) return (i); } else for (i = 0; i < scale && i < maxscale; i++) bytes /= divisor; /* If a value <= 9.9 after rounding and ... */ if (bytes < 995 && i > 0 && flags & HN_DECIMAL) { /* baselen + \0 + .N */ if (len < baselen + 1 + 2) return (-1); b = ((int)bytes + 5) / 10; s1 = b / 10; s2 = b % 10; r = snprintf(buf, len, "%d%s%d%s%s%s", sign * s1, ".", s2, sep, SCALE2PREFIX(i), suffix); } else r = snprintf(buf, len, "%" PRId64 "%s%s%s", sign * ((bytes + 50) / 100), sep, SCALE2PREFIX(i), suffix); return (r); } ipfw_mod/ipfw/include/net/000755 000423 000000 00000000000 11151253341 016272 5ustar00luigiwheel000000 000000 ipfw_mod/ipfw/include/sys/000755 000423 000000 00000000000 11152004450 016316 5ustar00luigiwheel000000 000000 ipfw_mod/ipfw/include/timeconv.h000644 000423 000000 00000001402 11157432360 017504 0ustar00luigiwheel000000 000000 /* * simple override for _long_to_time() */ #ifndef _TIMECONV_H_ #define _TIMECONV_H_ static __inline time_t _long_to_time(long tlong) { if (sizeof(long) == sizeof(__int32_t)) return((time_t)(__int32_t)(tlong)); return((time_t)tlong); } #ifdef __linux__ /* * some linux headers have variables called __unused, whereas the name * is an alias for the gcc attribute on FreeBSD. * We have to define __unused appropriately, but this cannot be * global because it would clash with the linux headers. * * __unused is defined here because there is not a better place * and this file is included by ipfw2.c where the offending linux * headers are not included. */ #define __unused __attribute__ ((__unused__)) #endif #endif /* _TIMECONV_H_ */ ipfw_mod/ipfw/include/alias.h000644 000423 000000 00000004525 11151122421 016746 0ustar00luigiwheel000000 000000 #ifndef _ALIAS_H_ #define _ALIAS_H_ #define LIBALIAS_BUF_SIZE 128 /* * If PKT_ALIAS_LOG is set, a message will be printed to /var/log/alias.log * every time a link is created or deleted. This is useful for debugging. */ #define PKT_ALIAS_LOG 0x01 /* * If PKT_ALIAS_DENY_INCOMING is set, then incoming connections (e.g. to ftp, * telnet or web servers will be prevented by the aliasing mechanism. */ #define PKT_ALIAS_DENY_INCOMING 0x02 /* * If PKT_ALIAS_SAME_PORTS is set, packets will be attempted sent from the * same port as they originated on. This allows e.g. rsh to work *99% of the * time*, but _not_ 100% (it will be slightly flakey instead of not working * at all). This mode bit is set by PacketAliasInit(), so it is a default * mode of operation. */ #define PKT_ALIAS_SAME_PORTS 0x04 /* * If PKT_ALIAS_USE_SOCKETS is set, then when partially specified links (e.g. * destination port and/or address is zero), the packet aliasing engine will * attempt to allocate a socket for the aliasing port it chooses. This will * avoid interference with the host machine. Fully specified links do not * require this. This bit is set after a call to PacketAliasInit(), so it is * a default mode of operation. */ #ifndef NO_USE_SOCKETS #define PKT_ALIAS_USE_SOCKETS 0x08 #endif /*- * If PKT_ALIAS_UNREGISTERED_ONLY is set, then only packets with * unregistered source addresses will be aliased. Private * addresses are those in the following ranges: * * 10.0.0.0 -> 10.255.255.255 * 172.16.0.0 -> 172.31.255.255 * 192.168.0.0 -> 192.168.255.255 */ #define PKT_ALIAS_UNREGISTERED_ONLY 0x10 /* * If PKT_ALIAS_RESET_ON_ADDR_CHANGE is set, then the table of dynamic * aliasing links will be reset whenever PacketAliasSetAddress() changes the * default aliasing address. If the default aliasing address is left * unchanged by this function call, then the table of dynamic aliasing links * will be left intact. This bit is set after a call to PacketAliasInit(). */ #define PKT_ALIAS_RESET_ON_ADDR_CHANGE 0x20 /* * If PKT_ALIAS_PROXY_ONLY is set, then NAT will be disabled and only * transparent proxying is performed. */ #define PKT_ALIAS_PROXY_ONLY 0x40 /* * If PKT_ALIAS_REVERSE is set, the actions of PacketAliasIn() and * PacketAliasOut() are reversed. */ #define PKT_ALIAS_REVERSE 0x80 #endif /* !_ALIAS_H_ */ ipfw_mod/ipfw/include/sys/sockio.h000644 000423 000000 00000000000 11152004450 017744 0ustar00luigiwheel000000 000000 ipfw_mod/ipfw/include/net/if_dl.h000644 000423 000000 00000006462 11151122421 017522 0ustar00luigiwheel000000 000000 /*- * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if_dl.h 8.1 (Berkeley) 6/10/93 * $FreeBSD: src/sys/net/if_dl.h,v 1.14 2005/01/07 01:45:34 imp Exp $ */ #ifndef _NET_IF_DL_H_ #define _NET_IF_DL_H_ /* * A Link-Level Sockaddr may specify the interface in one of two * ways: either by means of a system-provided index number (computed * anew and possibly differently on every reboot), or by a human-readable * string such as "il0" (for managerial convenience). * * Census taking actions, such as something akin to SIOCGCONF would return * both the index and the human name. * * High volume transactions (such as giving a link-level ``from'' address * in a recvfrom or recvmsg call) may be likely only to provide the indexed * form, (which requires fewer copy operations and less space). * * The form and interpretation of the link-level address is purely a matter * of convention between the device driver and its consumers; however, it is * expected that all drivers for an interface of a given if_type will agree. */ /* * Structure of a Link-Level sockaddr: */ struct sockaddr_dl { u_char sdl_len; /* Total length of sockaddr */ u_char sdl_family; /* AF_LINK */ u_short sdl_index; /* if != 0, system given index for interface */ u_char sdl_type; /* interface type */ u_char sdl_nlen; /* interface name length, no trailing 0 reqd. */ u_char sdl_alen; /* link level address length */ u_char sdl_slen; /* link layer selector length */ char sdl_data[46]; /* minimum work area, can be larger; contains both if name and ll address */ }; #define LLADDR(s) ((caddr_t)((s)->sdl_data + (s)->sdl_nlen)) #ifndef _KERNEL #include __BEGIN_DECLS void link_addr(const char *, struct sockaddr_dl *); char *link_ntoa(const struct sockaddr_dl *); __END_DECLS #endif /* !_KERNEL */ #endif ipfw_mod/ipfw/include/net/pfvar.h000644 000423 000000 00000001323 11151253341 017560 0ustar00luigiwheel000000 000000 #ifndef _PF_VAR_H_ #define _PF_VAR_H_ /* * replacement for FreeBSD's pfqueue.h */ #include #define DIOCSTARTALTQ _IO ('D', 42) #define DIOCSTOPALTQ _IO ('D', 43) struct pf_altq { TAILQ_ENTRY(pf_altq) entries; /* ... */ u_int32_t qid; /* return value */ #define PF_QNAME_SIZE 64 char qname[PF_QNAME_SIZE]; /* queue name */ }; struct pfioc_altq { u_int32_t action; u_int32_t ticket; u_int32_t nr; struct pf_altq altq; }; #define DIOCGETALTQS _IOWR('D', 47, struct pfioc_altq) #define DIOCGETALTQ _IOWR('D', 48, struct pfioc_altq) #endif /* !_PF_VAR_H */ ipfw_mod/dummynet/include/000755 000423 000000 00000000000 11170407602 016403 5ustar00luigiwheel000000 000000 ipfw_mod/dummynet/radix.c000644 000423 000000 00000076111 11311404347 016241 0ustar00luigiwheel000000 000000 /*- * Copyright (c) 1988, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)radix.c 8.5 (Berkeley) 5/19/95 * $FreeBSD: head/sys/net/radix.c 200354 2009-12-10 10:34:30Z luigi $ */ /* * Routines to build and maintain radix trees for routing lookups. */ #include #ifdef _KERNEL #include #include "missing.h" #include #include #include #include #include #include #include #include "opt_mpath.h" #ifdef RADIX_MPATH #include #endif #else /* !_KERNEL */ #include #include #include #define log(x, arg...) fprintf(stderr, ## arg) #define panic(x) fprintf(stderr, "PANIC: %s", x), exit(1) #define min(a, b) ((a) < (b) ? (a) : (b) ) #include "include/net/radix.h" #endif /* !_KERNEL */ static int rn_walktree_from(struct radix_node_head *h, void *a, void *m, walktree_f_t *f, void *w); static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *); static struct radix_node *rn_insert(void *, struct radix_node_head *, int *, struct radix_node [2]), *rn_newpair(void *, int, struct radix_node[2]), *rn_search(void *, struct radix_node *), *rn_search_m(void *, struct radix_node *, void *); static int max_keylen; static struct radix_mask *rn_mkfreelist; static struct radix_node_head *mask_rnhead; /* * Work area -- the following point to 3 buffers of size max_keylen, * allocated in this order in a block of memory malloc'ed by rn_init. * rn_zeros, rn_ones are set in rn_init and used in readonly afterwards. * addmask_key is used in rn_addmask in rw mode and not thread-safe. */ static char *rn_zeros, *rn_ones, *addmask_key; #define MKGet(m) { \ if (rn_mkfreelist) { \ m = rn_mkfreelist; \ rn_mkfreelist = (m)->rm_mklist; \ } else \ R_Malloc(m, struct radix_mask *, sizeof (struct radix_mask)); } #define MKFree(m) { (m)->rm_mklist = rn_mkfreelist; rn_mkfreelist = (m);} #define rn_masktop (mask_rnhead->rnh_treetop) static int rn_lexobetter(void *m_arg, void *n_arg); static struct radix_mask * rn_new_radix_mask(struct radix_node *tt, struct radix_mask *next); static int rn_satisfies_leaf(char *trial, struct radix_node *leaf, int skip); /* * The data structure for the keys is a radix tree with one way * branching removed. The index rn_bit at an internal node n represents a bit * position to be tested. The tree is arranged so that all descendants * of a node n have keys whose bits all agree up to position rn_bit - 1. * (We say the index of n is rn_bit.) * * There is at least one descendant which has a one bit at position rn_bit, * and at least one with a zero there. * * A route is determined by a pair of key and mask. We require that the * bit-wise logical and of the key and mask to be the key. * We define the index of a route to associated with the mask to be * the first bit number in the mask where 0 occurs (with bit number 0 * representing the highest order bit). * * We say a mask is normal if every bit is 0, past the index of the mask. * If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit, * and m is a normal mask, then the route applies to every descendant of n. * If the index(m) < rn_bit, this implies the trailing last few bits of k * before bit b are all 0, (and hence consequently true of every descendant * of n), so the route applies to all descendants of the node as well. * * Similar logic shows that a non-normal mask m such that * index(m) <= index(n) could potentially apply to many children of n. * Thus, for each non-host route, we attach its mask to a list at an internal * node as high in the tree as we can go. * * The present version of the code makes use of normal routes in short- * circuiting an explict mask and compare operation when testing whether * a key satisfies a normal route, and also in remembering the unique leaf * that governs a subtree. */ /* * Most of the functions in this code assume that the key/mask arguments * are sockaddr-like structures, where the first byte is an u_char * indicating the size of the entire structure. * * To make the assumption more explicit, we use the LEN() macro to access * this field. It is safe to pass an expression with side effects * to LEN() as the argument is evaluated only once. * We cast the result to int as this is the dominant usage. */ #define LEN(x) ( (int) (*(const u_char *)(x)) ) /* * XXX THIS NEEDS TO BE FIXED * In the code, pointers to keys and masks are passed as either * 'void *' (because callers use to pass pointers of various kinds), or * 'caddr_t' (which is fine for pointer arithmetics, but not very * clean when you dereference it to access data). Furthermore, caddr_t * is really 'char *', while the natural type to operate on keys and * masks would be 'u_char'. This mismatch require a lot of casts and * intermediate variables to adapt types that clutter the code. */ /* * Search a node in the tree matching the key. */ static struct radix_node * rn_search(v_arg, head) void *v_arg; struct radix_node *head; { register struct radix_node *x; register caddr_t v; for (x = head, v = v_arg; x->rn_bit >= 0;) { if (x->rn_bmask & v[x->rn_offset]) x = x->rn_right; else x = x->rn_left; } return (x); } /* * Same as above, but with an additional mask. * XXX note this function is used only once. */ static struct radix_node * rn_search_m(v_arg, head, m_arg) struct radix_node *head; void *v_arg, *m_arg; { register struct radix_node *x; register caddr_t v = v_arg, m = m_arg; for (x = head; x->rn_bit >= 0;) { if ((x->rn_bmask & m[x->rn_offset]) && (x->rn_bmask & v[x->rn_offset])) x = x->rn_right; else x = x->rn_left; } return x; } int rn_refines(m_arg, n_arg) void *m_arg, *n_arg; { register caddr_t m = m_arg, n = n_arg; register caddr_t lim, lim2 = lim = n + LEN(n); int longer = LEN(n++) - LEN(m++); int masks_are_equal = 1; if (longer > 0) lim -= longer; while (n < lim) { if (*n & ~(*m)) return 0; if (*n++ != *m++) masks_are_equal = 0; } while (n < lim2) if (*n++) return 0; if (masks_are_equal && (longer < 0)) for (lim2 = m - longer; m < lim2; ) if (*m++) return 1; return (!masks_are_equal); } struct radix_node * rn_lookup(v_arg, m_arg, head) void *v_arg, *m_arg; struct radix_node_head *head; { register struct radix_node *x; caddr_t netmask = 0; if (m_arg) { x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_offset); if (x == 0) return (0); netmask = x->rn_key; } x = rn_match(v_arg, head); if (x && netmask) { while (x && x->rn_mask != netmask) x = x->rn_dupedkey; } return x; } static int rn_satisfies_leaf(trial, leaf, skip) char *trial; register struct radix_node *leaf; int skip; { register char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask; char *cplim; int length = min(LEN(cp), LEN(cp2)); if (cp3 == NULL) cp3 = rn_ones; else length = min(length, LEN(cp3)); cplim = cp + length; cp3 += skip; cp2 += skip; for (cp += skip; cp < cplim; cp++, cp2++, cp3++) if ((*cp ^ *cp2) & *cp3) return 0; return 1; } struct radix_node * rn_match(v_arg, head) void *v_arg; struct radix_node_head *head; { caddr_t v = v_arg; register struct radix_node *t = head->rnh_treetop, *x; register caddr_t cp = v, cp2; caddr_t cplim; struct radix_node *saved_t, *top = t; int off = t->rn_offset, vlen = LEN(cp), matched_off; register int test, b, rn_bit; /* * Open code rn_search(v, top) to avoid overhead of extra * subroutine call. */ for (; t->rn_bit >= 0; ) { if (t->rn_bmask & cp[t->rn_offset]) t = t->rn_right; else t = t->rn_left; } /* * See if we match exactly as a host destination * or at least learn how many bits match, for normal mask finesse. * * It doesn't hurt us to limit how many bytes to check * to the length of the mask, since if it matches we had a genuine * match and the leaf we have is the most specific one anyway; * if it didn't match with a shorter length it would fail * with a long one. This wins big for class B&C netmasks which * are probably the most common case... */ if (t->rn_mask) vlen = *(u_char *)t->rn_mask; cp += off; cp2 = t->rn_key + off; cplim = v + vlen; for (; cp < cplim; cp++, cp2++) if (*cp != *cp2) goto on1; /* * This extra grot is in case we are explicitly asked * to look up the default. Ugh! * * Never return the root node itself, it seems to cause a * lot of confusion. */ if (t->rn_flags & RNF_ROOT) t = t->rn_dupedkey; return t; on1: test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */ for (b = 7; (test >>= 1) > 0;) b--; matched_off = cp - v; b += matched_off << 3; rn_bit = -1 - b; /* * If there is a host route in a duped-key chain, it will be first. */ if ((saved_t = t)->rn_mask == 0) t = t->rn_dupedkey; for (; t; t = t->rn_dupedkey) /* * Even if we don't match exactly as a host, * we may match if the leaf we wound up at is * a route to a net. */ if (t->rn_flags & RNF_NORMAL) { if (rn_bit <= t->rn_bit) return t; } else if (rn_satisfies_leaf(v, t, matched_off)) return t; t = saved_t; /* start searching up the tree */ do { register struct radix_mask *m; t = t->rn_parent; m = t->rn_mklist; /* * If non-contiguous masks ever become important * we can restore the masking and open coding of * the search and satisfaction test and put the * calculation of "off" back before the "do". */ while (m) { if (m->rm_flags & RNF_NORMAL) { if (rn_bit <= m->rm_bit) return (m->rm_leaf); } else { off = min(t->rn_offset, matched_off); x = rn_search_m(v, t, m->rm_mask); while (x && x->rn_mask != m->rm_mask) x = x->rn_dupedkey; if (x && rn_satisfies_leaf(v, x, off)) return x; } m = m->rm_mklist; } } while (t != top); return 0; } #ifdef RN_DEBUG int rn_nodenum; struct radix_node *rn_clist; int rn_saveinfo; int rn_debug = 1; #endif /* * Whenever we add a new leaf to the tree, we also add a parent node, * so we allocate them as an array of two elements: the first one must be * the leaf (see RNTORT() in route.c), the second one is the parent. * This routine initializes the relevant fields of the nodes, so that * the leaf is the left child of the parent node, and both nodes have * (almost) all all fields filled as appropriate. * (XXX some fields are left unset, see the '#if 0' section). * The function returns a pointer to the parent node. */ static struct radix_node * rn_newpair(v, b, nodes) void *v; int b; struct radix_node nodes[2]; { register struct radix_node *tt = nodes, *t = tt + 1; t->rn_bit = b; t->rn_bmask = 0x80 >> (b & 7); t->rn_left = tt; t->rn_offset = b >> 3; #if 0 /* XXX perhaps we should fill these fields as well. */ t->rn_parent = t->rn_right = NULL; tt->rn_mask = NULL; tt->rn_dupedkey = NULL; tt->rn_bmask = 0; #endif tt->rn_bit = -1; tt->rn_key = (caddr_t)v; tt->rn_parent = t; tt->rn_flags = t->rn_flags = RNF_ACTIVE; tt->rn_mklist = t->rn_mklist = 0; #ifdef RN_DEBUG tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++; tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt; #endif return t; } static struct radix_node * rn_insert(v_arg, head, dupentry, nodes) void *v_arg; struct radix_node_head *head; int *dupentry; struct radix_node nodes[2]; { caddr_t v = v_arg; struct radix_node *top = head->rnh_treetop; int head_off = top->rn_offset, vlen = LEN(v); register struct radix_node *t = rn_search(v_arg, top); register caddr_t cp = v + head_off; register int b; struct radix_node *tt; /* * Find first bit at which v and t->rn_key differ */ { register caddr_t cp2 = t->rn_key + head_off; register int cmp_res; caddr_t cplim = v + vlen; while (cp < cplim) if (*cp2++ != *cp++) goto on1; *dupentry = 1; return t; on1: *dupentry = 0; cmp_res = (cp[-1] ^ cp2[-1]) & 0xff; for (b = (cp - v) << 3; cmp_res; b--) cmp_res >>= 1; } { register struct radix_node *p, *x = top; cp = v; do { p = x; if (cp[x->rn_offset] & x->rn_bmask) x = x->rn_right; else x = x->rn_left; } while (b > (unsigned) x->rn_bit); /* x->rn_bit < b && x->rn_bit >= 0 */ #ifdef RN_DEBUG if (rn_debug) log(LOG_DEBUG, "rn_insert: Going In:\n"), traverse(p); #endif t = rn_newpair(v_arg, b, nodes); tt = t->rn_left; if ((cp[p->rn_offset] & p->rn_bmask) == 0) p->rn_left = t; else p->rn_right = t; x->rn_parent = t; t->rn_parent = p; /* frees x, p as temp vars below */ if ((cp[t->rn_offset] & t->rn_bmask) == 0) { t->rn_right = x; } else { t->rn_right = tt; t->rn_left = x; } #ifdef RN_DEBUG if (rn_debug) log(LOG_DEBUG, "rn_insert: Coming Out:\n"), traverse(p); #endif } return (tt); } struct radix_node * rn_addmask(n_arg, search, skip) int search, skip; void *n_arg; { caddr_t netmask = (caddr_t)n_arg; register struct radix_node *x; register caddr_t cp, cplim; register int b = 0, mlen, j; int maskduplicated, m0, isnormal; struct radix_node *saved_x; static int last_zeroed = 0; if ((mlen = LEN(netmask)) > max_keylen) mlen = max_keylen; if (skip == 0) skip = 1; if (mlen <= skip) return (mask_rnhead->rnh_nodes); if (skip > 1) bcopy(rn_ones + 1, addmask_key + 1, skip - 1); if ((m0 = mlen) > skip) bcopy(netmask + skip, addmask_key + skip, mlen - skip); /* * Trim trailing zeroes. */ for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;) cp--; mlen = cp - addmask_key; if (mlen <= skip) { if (m0 >= last_zeroed) last_zeroed = mlen; return (mask_rnhead->rnh_nodes); } if (m0 < last_zeroed) bzero(addmask_key + m0, last_zeroed - m0); *addmask_key = last_zeroed = mlen; x = rn_search(addmask_key, rn_masktop); if (bcmp(addmask_key, x->rn_key, mlen) != 0) x = 0; if (x || search) return (x); R_Zalloc(x, struct radix_node *, max_keylen + 2 * sizeof (*x)); if ((saved_x = x) == 0) return (0); netmask = cp = (caddr_t)(x + 2); bcopy(addmask_key, cp, mlen); x = rn_insert(cp, mask_rnhead, &maskduplicated, x); if (maskduplicated) { log(LOG_ERR, "rn_addmask: mask impossibly already in tree"); Free(saved_x); return (x); } /* * Calculate index of mask, and check for normalcy. * First find the first byte with a 0 bit, then if there are * more bits left (remember we already trimmed the trailing 0's), * the pattern must be one of those in normal_chars[], or we have * a non-contiguous mask. */ cplim = netmask + mlen; isnormal = 1; for (cp = netmask + skip; (cp < cplim) && *(u_char *)cp == 0xff;) cp++; if (cp != cplim) { static char normal_chars[] = { 0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff}; for (j = 0x80; (j & *cp) != 0; j >>= 1) b++; if (*cp != normal_chars[b] || cp != (cplim - 1)) isnormal = 0; } b += (cp - netmask) << 3; x->rn_bit = -1 - b; if (isnormal) x->rn_flags |= RNF_NORMAL; return (x); } static int /* XXX: arbitrary ordering for non-contiguous masks */ rn_lexobetter(m_arg, n_arg) void *m_arg, *n_arg; { register u_char *mp = m_arg, *np = n_arg, *lim; if (LEN(mp) > LEN(np)) return 1; /* not really, but need to check longer one first */ if (LEN(mp) == LEN(np)) for (lim = mp + LEN(mp); mp < lim;) if (*mp++ > *np++) return 1; return 0; } static struct radix_mask * rn_new_radix_mask(tt, next) register struct radix_node *tt; register struct radix_mask *next; { register struct radix_mask *m; MKGet(m); if (m == 0) { log(LOG_ERR, "Mask for route not entered\n"); return (0); } bzero(m, sizeof *m); m->rm_bit = tt->rn_bit; m->rm_flags = tt->rn_flags; if (tt->rn_flags & RNF_NORMAL) m->rm_leaf = tt; else m->rm_mask = tt->rn_mask; m->rm_mklist = next; tt->rn_mklist = m; return m; } struct radix_node * rn_addroute(v_arg, n_arg, head, treenodes) void *v_arg, *n_arg; struct radix_node_head *head; struct radix_node treenodes[2]; { caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg; register struct radix_node *t, *x = 0, *tt; struct radix_node *saved_tt, *top = head->rnh_treetop; short b = 0, b_leaf = 0; int keyduplicated; caddr_t mmask; struct radix_mask *m, **mp; /* * In dealing with non-contiguous masks, there may be * many different routes which have the same mask. * We will find it useful to have a unique pointer to * the mask to speed avoiding duplicate references at * nodes and possibly save time in calculating indices. */ if (netmask) { if ((x = rn_addmask(netmask, 0, top->rn_offset)) == 0) return (0); b_leaf = x->rn_bit; b = -1 - x->rn_bit; netmask = x->rn_key; } /* * Deal with duplicated keys: attach node to previous instance */ saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes); if (keyduplicated) { for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) { #ifdef RADIX_MPATH /* permit multipath, if enabled for the family */ if (rn_mpath_capable(head) && netmask == tt->rn_mask) { /* * go down to the end of multipaths, so that * new entry goes into the end of rn_dupedkey * chain. */ do { t = tt; tt = tt->rn_dupedkey; } while (tt && t->rn_mask == tt->rn_mask); break; } #endif if (tt->rn_mask == netmask) return (0); if (netmask == 0 || (tt->rn_mask && ((b_leaf < tt->rn_bit) /* index(netmask) > node */ || rn_refines(netmask, tt->rn_mask) || rn_lexobetter(netmask, tt->rn_mask)))) break; } /* * If the mask is not duplicated, we wouldn't * find it among possible duplicate key entries * anyway, so the above test doesn't hurt. * * We sort the masks for a duplicated key the same way as * in a masklist -- most specific to least specific. * This may require the unfortunate nuisance of relocating * the head of the list. * * We also reverse, or doubly link the list through the * parent pointer. */ if (tt == saved_tt) { struct radix_node *xx = x; /* link in at head of list */ (tt = treenodes)->rn_dupedkey = t; tt->rn_flags = t->rn_flags; tt->rn_parent = x = t->rn_parent; t->rn_parent = tt; /* parent */ if (x->rn_left == t) x->rn_left = tt; else x->rn_right = tt; saved_tt = tt; x = xx; } else { (tt = treenodes)->rn_dupedkey = t->rn_dupedkey; t->rn_dupedkey = tt; tt->rn_parent = t; /* parent */ if (tt->rn_dupedkey) /* parent */ tt->rn_dupedkey->rn_parent = tt; /* parent */ } #ifdef RN_DEBUG t=tt+1; tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++; tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt; #endif tt->rn_key = (caddr_t) v; tt->rn_bit = -1; tt->rn_flags = RNF_ACTIVE; } /* * Put mask in tree. */ if (netmask) { tt->rn_mask = netmask; tt->rn_bit = x->rn_bit; tt->rn_flags |= x->rn_flags & RNF_NORMAL; } t = saved_tt->rn_parent; if (keyduplicated) goto on2; b_leaf = -1 - t->rn_bit; if (t->rn_right == saved_tt) x = t->rn_left; else x = t->rn_right; /* Promote general routes from below */ if (x->rn_bit < 0) { for (mp = &t->rn_mklist; x; x = x->rn_dupedkey) if (x->rn_mask && (x->rn_bit >= b_leaf) && x->rn_mklist == 0) { *mp = m = rn_new_radix_mask(x, 0); if (m) mp = &m->rm_mklist; } } else if (x->rn_mklist) { /* * Skip over masks whose index is > that of new node */ for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) if (m->rm_bit >= b_leaf) break; t->rn_mklist = m; *mp = 0; } on2: /* Add new route to highest possible ancestor's list */ if ((netmask == 0) || (b > t->rn_bit )) return tt; /* can't lift at all */ b_leaf = tt->rn_bit; do { x = t; t = t->rn_parent; } while (b <= t->rn_bit && x != top); /* * Search through routes associated with node to * insert new route according to index. * Need same criteria as when sorting dupedkeys to avoid * double loop on deletion. */ for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) { if (m->rm_bit < b_leaf) continue; if (m->rm_bit > b_leaf) break; if (m->rm_flags & RNF_NORMAL) { mmask = m->rm_leaf->rn_mask; if (tt->rn_flags & RNF_NORMAL) { log(LOG_ERR, "Non-unique normal route, mask not entered\n"); return tt; } } else mmask = m->rm_mask; if (mmask == netmask) { m->rm_refs++; tt->rn_mklist = m; return tt; } if (rn_refines(netmask, mmask) || rn_lexobetter(netmask, mmask)) break; } *mp = rn_new_radix_mask(tt, *mp); return tt; } struct radix_node * rn_delete(v_arg, netmask_arg, head) void *v_arg, *netmask_arg; struct radix_node_head *head; { register struct radix_node *t, *p, *x, *tt; struct radix_mask *m, *saved_m, **mp; struct radix_node *dupedkey, *saved_tt, *top; caddr_t v, netmask; int b, head_off, vlen; v = v_arg; netmask = netmask_arg; x = head->rnh_treetop; tt = rn_search(v, x); head_off = x->rn_offset; vlen = LEN(v); saved_tt = tt; top = x; if (tt == 0 || bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off)) return (0); /* * Delete our route from mask lists. */ if (netmask) { if ((x = rn_addmask(netmask, 1, head_off)) == 0) return (0); netmask = x->rn_key; while (tt->rn_mask != netmask) if ((tt = tt->rn_dupedkey) == 0) return (0); } if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0) goto on1; if (tt->rn_flags & RNF_NORMAL) { if (m->rm_leaf != tt || m->rm_refs > 0) { log(LOG_ERR, "rn_delete: inconsistent annotation\n"); return 0; /* dangling ref could cause disaster */ } } else { if (m->rm_mask != tt->rn_mask) { log(LOG_ERR, "rn_delete: inconsistent annotation\n"); goto on1; } if (--m->rm_refs >= 0) goto on1; } b = -1 - tt->rn_bit; t = saved_tt->rn_parent; if (b > t->rn_bit) goto on1; /* Wasn't lifted at all */ do { x = t; t = t->rn_parent; } while (b <= t->rn_bit && x != top); for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) if (m == saved_m) { *mp = m->rm_mklist; MKFree(m); break; } if (m == 0) { log(LOG_ERR, "rn_delete: couldn't find our annotation\n"); if (tt->rn_flags & RNF_NORMAL) return (0); /* Dangling ref to us */ } on1: /* * Eliminate us from tree */ if (tt->rn_flags & RNF_ROOT) return (0); #ifdef RN_DEBUG /* Get us out of the creation list */ for (t = rn_clist; t && t->rn_ybro != tt; t = t->rn_ybro) {} if (t) t->rn_ybro = tt->rn_ybro; #endif t = tt->rn_parent; dupedkey = saved_tt->rn_dupedkey; if (dupedkey) { /* * Here, tt is the deletion target and * saved_tt is the head of the dupekey chain. */ if (tt == saved_tt) { /* remove from head of chain */ x = dupedkey; x->rn_parent = t; if (t->rn_left == tt) t->rn_left = x; else t->rn_right = x; } else { /* find node in front of tt on the chain */ for (x = p = saved_tt; p && p->rn_dupedkey != tt;) p = p->rn_dupedkey; if (p) { p->rn_dupedkey = tt->rn_dupedkey; if (tt->rn_dupedkey) /* parent */ tt->rn_dupedkey->rn_parent = p; /* parent */ } else log(LOG_ERR, "rn_delete: couldn't find us\n"); } t = tt + 1; if (t->rn_flags & RNF_ACTIVE) { #ifndef RN_DEBUG *++x = *t; p = t->rn_parent; #else b = t->rn_info; *++x = *t; t->rn_info = b; p = t->rn_parent; #endif if (p->rn_left == t) p->rn_left = x; else p->rn_right = x; x->rn_left->rn_parent = x; x->rn_right->rn_parent = x; } goto out; } if (t->rn_left == tt) x = t->rn_right; else x = t->rn_left; p = t->rn_parent; if (p->rn_right == t) p->rn_right = x; else p->rn_left = x; x->rn_parent = p; /* * Demote routes attached to us. */ if (t->rn_mklist) { if (x->rn_bit >= 0) { for (mp = &x->rn_mklist; (m = *mp);) mp = &m->rm_mklist; *mp = t->rn_mklist; } else { /* If there are any key,mask pairs in a sibling duped-key chain, some subset will appear sorted in the same order attached to our mklist */ for (m = t->rn_mklist; m && x; x = x->rn_dupedkey) if (m == x->rn_mklist) { struct radix_mask *mm = m->rm_mklist; x->rn_mklist = 0; if (--(m->rm_refs) < 0) MKFree(m); m = mm; } if (m) log(LOG_ERR, "rn_delete: Orphaned Mask %p at %p\n", (void *)m, (void *)x); } } /* * We may be holding an active internal node in the tree. */ x = tt + 1; if (t != x) { #ifndef RN_DEBUG *t = *x; #else b = t->rn_info; *t = *x; t->rn_info = b; #endif t->rn_left->rn_parent = t; t->rn_right->rn_parent = t; p = x->rn_parent; if (p->rn_left == x) p->rn_left = t; else p->rn_right = t; } out: tt->rn_flags &= ~RNF_ACTIVE; tt[1].rn_flags &= ~RNF_ACTIVE; return (tt); } /* * This is the same as rn_walktree() except for the parameters and the * exit. */ static int rn_walktree_from(h, a, m, f, w) struct radix_node_head *h; void *a, *m; walktree_f_t *f; void *w; { int error; struct radix_node *base, *next; u_char *xa = (u_char *)a; u_char *xm = (u_char *)m; register struct radix_node *rn, *last = 0 /* shut up gcc */; int stopping = 0; int lastb; /* * rn_search_m is sort-of-open-coded here. We cannot use the * function because we need to keep track of the last node seen. */ /* printf("about to search\n"); */ for (rn = h->rnh_treetop; rn->rn_bit >= 0; ) { last = rn; /* printf("rn_bit %d, rn_bmask %x, xm[rn_offset] %x\n", rn->rn_bit, rn->rn_bmask, xm[rn->rn_offset]); */ if (!(rn->rn_bmask & xm[rn->rn_offset])) { break; } if (rn->rn_bmask & xa[rn->rn_offset]) { rn = rn->rn_right; } else { rn = rn->rn_left; } } /* printf("done searching\n"); */ /* * Two cases: either we stepped off the end of our mask, * in which case last == rn, or we reached a leaf, in which * case we want to start from the last node we looked at. * Either way, last is the node we want to start from. */ rn = last; lastb = rn->rn_bit; /* printf("rn %p, lastb %d\n", rn, lastb);*/ /* * This gets complicated because we may delete the node * while applying the function f to it, so we need to calculate * the successor node in advance. */ while (rn->rn_bit >= 0) rn = rn->rn_left; while (!stopping) { /* printf("node %p (%d)\n", rn, rn->rn_bit); */ base = rn; /* If at right child go back up, otherwise, go right */ while (rn->rn_parent->rn_right == rn && !(rn->rn_flags & RNF_ROOT)) { rn = rn->rn_parent; /* if went up beyond last, stop */ if (rn->rn_bit <= lastb) { stopping = 1; /* printf("up too far\n"); */ /* * XXX we should jump to the 'Process leaves' * part, because the values of 'rn' and 'next' * we compute will not be used. Not a big deal * because this loop will terminate, but it is * inefficient and hard to understand! */ } } /* * At the top of the tree, no need to traverse the right * half, prevent the traversal of the entire tree in the * case of default route. */ if (rn->rn_parent->rn_flags & RNF_ROOT) stopping = 1; /* Find the next *leaf* since next node might vanish, too */ for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;) rn = rn->rn_left; next = rn; /* Process leaves */ while ((rn = base) != 0) { base = rn->rn_dupedkey; /* printf("leaf %p\n", rn); */ if (!(rn->rn_flags & RNF_ROOT) && (error = (*f)(rn, w))) return (error); } rn = next; if (rn->rn_flags & RNF_ROOT) { /* printf("root, stopping"); */ stopping = 1; } } return 0; } static int rn_walktree(h, f, w) struct radix_node_head *h; walktree_f_t *f; void *w; { int error; struct radix_node *base, *next; register struct radix_node *rn = h->rnh_treetop; /* * This gets complicated because we may delete the node * while applying the function f to it, so we need to calculate * the successor node in advance. */ /* First time through node, go left */ while (rn->rn_bit >= 0) rn = rn->rn_left; for (;;) { base = rn; /* If at right child go back up, otherwise, go right */ while (rn->rn_parent->rn_right == rn && (rn->rn_flags & RNF_ROOT) == 0) rn = rn->rn_parent; /* Find the next *leaf* since next node might vanish, too */ for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;) rn = rn->rn_left; next = rn; /* Process leaves */ while ((rn = base)) { base = rn->rn_dupedkey; if (!(rn->rn_flags & RNF_ROOT) && (error = (*f)(rn, w))) return (error); } rn = next; if (rn->rn_flags & RNF_ROOT) return (0); } /* NOTREACHED */ } /* * Allocate and initialize an empty tree. This has 3 nodes, which are * part of the radix_node_head (in the order ) and are * marked RNF_ROOT so they cannot be freed. * The leaves have all-zero and all-one keys, with significant * bits starting at 'off'. * Return 1 on success, 0 on error. */ int rn_inithead(head, off) void **head; int off; { register struct radix_node_head *rnh; register struct radix_node *t, *tt, *ttt; if (*head) return (1); R_Zalloc(rnh, struct radix_node_head *, sizeof (*rnh)); if (rnh == 0) return (0); #ifdef _KERNEL RADIX_NODE_HEAD_LOCK_INIT(rnh); #endif *head = rnh; t = rn_newpair(rn_zeros, off, rnh->rnh_nodes); ttt = rnh->rnh_nodes + 2; t->rn_right = ttt; t->rn_parent = t; tt = t->rn_left; /* ... which in turn is rnh->rnh_nodes */ tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE; tt->rn_bit = -1 - off; *ttt = *tt; ttt->rn_key = rn_ones; rnh->rnh_addaddr = rn_addroute; rnh->rnh_deladdr = rn_delete; rnh->rnh_matchaddr = rn_match; rnh->rnh_lookup = rn_lookup; rnh->rnh_walktree = rn_walktree; rnh->rnh_walktree_from = rn_walktree_from; rnh->rnh_treetop = t; return (1); } void rn_init(int maxk) { char *cp, *cplim; max_keylen = maxk; if (max_keylen == 0) { log(LOG_ERR, "rn_init: radix functions require max_keylen be set\n"); return; } R_Malloc(rn_zeros, char *, 3 * max_keylen); if (rn_zeros == NULL) panic("rn_init"); bzero(rn_zeros, 3 * max_keylen); rn_ones = cp = rn_zeros + max_keylen; addmask_key = cplim = rn_ones + max_keylen; while (cp < cplim) *cp++ = -1; if (rn_inithead((void **)(void *)&mask_rnhead, 0) == 0) panic("rn_init 2"); } ipfw_mod/dummynet/ip_fw_pfil.c000644 000423 000000 00000032033 11310272047 017242 0ustar00luigiwheel000000 000000 /*- * Copyright (c) 2004 Andre Oppermann, Internet Business Solutions AG * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD: src/sys/netinet/ip_fw_pfil.c,v 1.25.2.2 2008/04/25 10:26:30 oleg Exp $"); #if !defined(KLD_MODULE) #include "opt_ipfw.h" #include "opt_ipdn.h" #include "opt_inet.h" #ifndef INET #error IPFIREWALL requires INET. #endif /* INET */ #endif /* KLD_MODULE */ #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include "missing.h" #include #include #include #include #include #include #include #include VNET_DEFINE(int, fw_enable) = 1; #ifdef INET6 VNET_DEFINE(int, fw6_enable) = 1; #endif int ipfw_chg_hook(SYSCTL_HANDLER_ARGS); /* Divert hooks. */ ip_divert_packet_t *ip_divert_ptr = NULL; /* ng_ipfw hooks. */ ng_ipfw_input_t *ng_ipfw_input_p = NULL; /* Forward declarations. */ static int ipfw_divert(struct mbuf **, int, int); #define DIV_DIR_IN 1 #define DIV_DIR_OUT 0 int ipfw_check_in(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, struct inpcb *inp) { struct ip_fw_args args; struct ng_ipfw_tag *ng_tag; struct m_tag *dn_tag; int ipfw = 0; int divert; int tee; #ifdef IPFIREWALL_FORWARD struct m_tag *fwd_tag; #endif KASSERT(dir == PFIL_IN, ("ipfw_check_in wrong direction!")); bzero(&args, sizeof(args)); ng_tag = (struct ng_ipfw_tag *)m_tag_locate(*m0, NGM_IPFW_COOKIE, 0, NULL); if (ng_tag != NULL) { KASSERT(ng_tag->dir == NG_IPFW_IN, ("ng_ipfw tag with wrong direction")); args.rule = ng_tag->rule; args.rule_id = ng_tag->rule_id; args.chain_id = ng_tag->chain_id; m_tag_delete(*m0, (struct m_tag *)ng_tag); } again: dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL); if (dn_tag != NULL){ struct dn_pkt_tag *dt; dt = (struct dn_pkt_tag *)(dn_tag+1); args.rule = dt->rule; args.rule_id = dt->rule_id; args.chain_id = dt->chain_id; m_tag_delete(*m0, dn_tag); } args.m = *m0; args.inp = inp; tee = 0; if (V_fw_one_pass == 0 || args.rule == NULL) { ipfw = ipfw_chk(&args); *m0 = args.m; } else ipfw = IP_FW_PASS; KASSERT(*m0 != NULL || ipfw == IP_FW_DENY, ("%s: m0 is NULL", __func__)); switch (ipfw) { case IP_FW_PASS: if (args.next_hop == NULL) goto pass; #ifdef IPFIREWALL_FORWARD fwd_tag = m_tag_get(PACKET_TAG_IPFORWARD, sizeof(struct sockaddr_in), M_NOWAIT); if (fwd_tag == NULL) goto drop; bcopy(args.next_hop, (fwd_tag+1), sizeof(struct sockaddr_in)); m_tag_prepend(*m0, fwd_tag); if (in_localip(args.next_hop->sin_addr)) (*m0)->m_flags |= M_FASTFWD_OURS; goto pass; #endif break; /* not reached */ case IP_FW_DENY: goto drop; break; /* not reached */ case IP_FW_DUMMYNET: if (ip_dn_io_ptr == NULL) goto drop; if (mtod(*m0, struct ip *)->ip_v == 4) ip_dn_io_ptr(m0, DN_TO_IP_IN, &args); else if (mtod(*m0, struct ip *)->ip_v == 6) ip_dn_io_ptr(m0, DN_TO_IP6_IN, &args); if (*m0 != NULL) goto again; return 0; /* packet consumed */ case IP_FW_TEE: tee = 1; /* fall through */ case IP_FW_DIVERT: divert = ipfw_divert(m0, DIV_DIR_IN, tee); if (divert) { *m0 = NULL; return 0; /* packet consumed */ } else { args.rule = NULL; goto again; /* continue with packet */ } case IP_FW_NGTEE: if (!NG_IPFW_LOADED) goto drop; (void)ng_ipfw_input_p(m0, NG_IPFW_IN, &args, 1); goto again; /* continue with packet */ case IP_FW_NETGRAPH: if (!NG_IPFW_LOADED) goto drop; return ng_ipfw_input_p(m0, NG_IPFW_IN, &args, 0); case IP_FW_NAT: goto again; /* continue with packet */ case IP_FW_REASS: goto again; default: KASSERT(0, ("%s: unknown retval", __func__)); } drop: if (*m0) m_freem(*m0); *m0 = NULL; return (EACCES); pass: return 0; /* not filtered */ } int ipfw_check_out(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, struct inpcb *inp) { struct ip_fw_args args; struct ng_ipfw_tag *ng_tag; struct m_tag *dn_tag; int ipfw = 0; int divert; int tee; #ifdef IPFIREWALL_FORWARD struct m_tag *fwd_tag; #endif KASSERT(dir == PFIL_OUT, ("ipfw_check_out wrong direction!")); bzero(&args, sizeof(args)); ng_tag = (struct ng_ipfw_tag *)m_tag_locate(*m0, NGM_IPFW_COOKIE, 0, NULL); if (ng_tag != NULL) { KASSERT(ng_tag->dir == NG_IPFW_OUT, ("ng_ipfw tag with wrong direction")); args.rule = ng_tag->rule; args.rule_id = ng_tag->rule_id; args.chain_id = ng_tag->chain_id; m_tag_delete(*m0, (struct m_tag *)ng_tag); } again: dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL); if (dn_tag != NULL) { struct dn_pkt_tag *dt; dt = (struct dn_pkt_tag *)(dn_tag+1); args.rule = dt->rule; args.rule_id = dt->rule_id; args.chain_id = dt->chain_id; m_tag_delete(*m0, dn_tag); } args.m = *m0; args.oif = ifp; args.inp = inp; tee = 0; if (V_fw_one_pass == 0 || args.rule == NULL) { ipfw = ipfw_chk(&args); *m0 = args.m; } else ipfw = IP_FW_PASS; KASSERT(*m0 != NULL || ipfw == IP_FW_DENY, ("%s: m0 is NULL", __func__)); switch (ipfw) { case IP_FW_PASS: if (args.next_hop == NULL) goto pass; #ifdef IPFIREWALL_FORWARD /* Overwrite existing tag. */ fwd_tag = m_tag_find(*m0, PACKET_TAG_IPFORWARD, NULL); if (fwd_tag == NULL) { fwd_tag = m_tag_get(PACKET_TAG_IPFORWARD, sizeof(struct sockaddr_in), M_NOWAIT); if (fwd_tag == NULL) goto drop; } else m_tag_unlink(*m0, fwd_tag); bcopy(args.next_hop, (fwd_tag+1), sizeof(struct sockaddr_in)); m_tag_prepend(*m0, fwd_tag); if (in_localip(args.next_hop->sin_addr)) (*m0)->m_flags |= M_FASTFWD_OURS; goto pass; #endif break; /* not reached */ case IP_FW_DENY: goto drop; break; /* not reached */ case IP_FW_DUMMYNET: if (ip_dn_io_ptr == NULL) break; if (mtod(*m0, struct ip *)->ip_v == 4) ip_dn_io_ptr(m0, DN_TO_IP_OUT, &args); else if (mtod(*m0, struct ip *)->ip_v == 6) ip_dn_io_ptr(m0, DN_TO_IP6_OUT, &args); if (*m0 != NULL) goto again; return 0; /* packet consumed */ break; case IP_FW_TEE: tee = 1; /* fall through */ case IP_FW_DIVERT: divert = ipfw_divert(m0, DIV_DIR_OUT, tee); if (divert) { *m0 = NULL; return 0; /* packet consumed */ } else { args.rule = NULL; goto again; /* continue with packet */ } case IP_FW_NGTEE: if (!NG_IPFW_LOADED) goto drop; (void)ng_ipfw_input_p(m0, NG_IPFW_OUT, &args, 1); goto again; /* continue with packet */ case IP_FW_NETGRAPH: if (!NG_IPFW_LOADED) goto drop; return ng_ipfw_input_p(m0, NG_IPFW_OUT, &args, 0); case IP_FW_NAT: goto again; /* continue with packet */ case IP_FW_REASS: goto again; default: KASSERT(0, ("%s: unknown retval", __func__)); } drop: if (*m0) m_freem(*m0); *m0 = NULL; return (EACCES); pass: return 0; /* not filtered */ } static int ipfw_divert(struct mbuf **m, int incoming, int tee) { /* * ipfw_chk() has already tagged the packet with the divert tag. * If tee is set, copy packet and return original. * If not tee, consume packet and send it to divert socket. */ struct mbuf *clone, *reass; struct ip *ip; int hlen; reass = NULL; /* Is divert module loaded? */ if (ip_divert_ptr == NULL) goto nodivert; /* Cloning needed for tee? */ if (tee) clone = m_dup(*m, M_DONTWAIT); else clone = *m; /* In case m_dup was unable to allocate mbufs. */ if (clone == NULL) goto teeout; /* * Divert listeners can only handle non-fragmented packets. * However when tee is set we will *not* de-fragment the packets; * Doing do would put the reassembly into double-jeopardy. On top * of that someone doing a tee will probably want to get the packet * in its original form. */ ip = mtod(clone, struct ip *); if (!tee && ip->ip_off & (IP_MF | IP_OFFMASK)) { /* Reassemble packet. */ reass = ip_reass(clone); /* * IP header checksum fixup after reassembly and leave header * in network byte order. */ if (reass != NULL) { ip = mtod(reass, struct ip *); hlen = ip->ip_hl << 2; ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); ip->ip_sum = 0; if (hlen == sizeof(struct ip)) ip->ip_sum = in_cksum_hdr(ip); else ip->ip_sum = in_cksum(reass, hlen); clone = reass; } else clone = NULL; } else { /* Convert header to network byte order. */ ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); } /* Do the dirty job... */ if (clone && ip_divert_ptr != NULL) ip_divert_ptr(clone, incoming); teeout: /* * For tee we leave the divert tag attached to original packet. * It will then continue rule evaluation after the tee rule. */ if (tee) return 0; /* Packet diverted and consumed */ return 1; nodivert: m_freem(*m); return 1; } int ipfw_hook(void) { struct pfil_head *pfh_inet; pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); if (pfh_inet == NULL) return ENOENT; (void)pfil_add_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet); (void)pfil_add_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet); return 0; } int ipfw_unhook(void) { struct pfil_head *pfh_inet; pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); if (pfh_inet == NULL) return ENOENT; (void)pfil_remove_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet); (void)pfil_remove_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet); return 0; } #ifdef INET6 int ipfw6_hook(void) { struct pfil_head *pfh_inet6; pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); if (pfh_inet6 == NULL) return ENOENT; (void)pfil_add_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6); (void)pfil_add_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6); return 0; } int ipfw6_unhook(void) { struct pfil_head *pfh_inet6; pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); if (pfh_inet6 == NULL) return ENOENT; (void)pfil_remove_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6); (void)pfil_remove_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6); return 0; } #endif /* INET6 */ int ipfw_chg_hook(SYSCTL_HANDLER_ARGS) { int enable; int oldenable; int error; if (arg1 == &VNET_NAME(fw_enable)) { enable = V_fw_enable; } #ifdef INET6 else if (arg1 == &VNET_NAME(fw6_enable)) { enable = V_fw6_enable; } #endif else return (EINVAL); oldenable = enable; error = sysctl_handle_int(oidp, &enable, 0, req); if (error) return (error); enable = (enable) ? 1 : 0; if (enable == oldenable) return (0); if (arg1 == &VNET_NAME(fw_enable)) { if (enable) error = ipfw_hook(); else error = ipfw_unhook(); if (error) return (error); V_fw_enable = enable; } #ifdef INET6 else if (arg1 == &VNET_NAME(fw6_enable)) { if (enable) error = ipfw6_hook(); else error = ipfw6_unhook(); if (error) return (error); V_fw6_enable = enable; } #endif return (0); } static int ipfw_modevent(module_t mod, int type, void *unused) { int err = 0; switch (type) { case MOD_LOAD: if ((err = ipfw_init()) != 0) { printf("ipfw_init() error\n"); break; } if ((err = ipfw_hook()) != 0) { printf("ipfw_hook() error\n"); break; } #ifdef INET6 if ((err = ipfw6_hook()) != 0) { printf("ipfw_hook() error\n"); break; } #endif break; case MOD_UNLOAD: if ((err = ipfw_unhook()) > 0) break; #ifdef INET6 if ((err = ipfw6_unhook()) > 0) break; #endif ipfw_destroy(); break; default: return EOPNOTSUPP; break; } return err; } static moduledata_t ipfwmod = { "ipfw", ipfw_modevent, 0 }; DECLARE_MODULE(ipfw, ipfwmod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY - 256); MODULE_VERSION(ipfw, 2); ipfw_mod/dummynet/missing.h000644 000423 000000 00000034500 11311406516 016604 0ustar00luigiwheel000000 000000 /* * Copyright (C) 2009 Luigi Rizzo, Marta Carbone, Universita` di Pisa * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * $Id: missing.h 4492 2009-12-14 10:27:12Z luigi $ * * Header for kernel variables and functions that are not available in * userland. */ #ifndef _MISSING_H_ #define _MISSING_H_ #ifdef _WIN32 #ifndef DEFINE_SPINLOCK #define DEFINE_SPINLOCK(x) FAST_MUTEX x #endif /* spinlock --> Guarded Mutex KGUARDED_MUTEX */ /* http://www.reactos.org/wiki/index.php/Guarded_Mutex */ #define spin_lock_init(_l) #define spin_lock_bh(_l) #define spin_unlock_bh(_l) #include /* bsd-compat.c */ #include /* bsd-compat.c */ #include /* local version */ #else /* __linux__ */ #include /* do_gettimeofday */ #include /* local version */ struct inpcb; /* * Kernel locking support. * FreeBSD uses mtx in dummynet.c and struct rwlock ip_fw2.c * * In linux we use spinlock_bh to implement both. * For 'struct rwlock' we need an #ifdef to change it to spinlock_t */ #ifndef DEFINE_SPINLOCK /* this is for linux 2.4 */ #define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED #endif #endif /* __linux__ */ #define rw_assert(a, b) #define rw_destroy(_l) #define rw_init(_l, msg) spin_lock_init(_l) #define rw_rlock(_l) spin_lock_bh(_l) #define rw_runlock(_l) spin_unlock_bh(_l) #define rw_wlock(_l) spin_lock_bh(_l) #define rw_wunlock(_l) spin_unlock_bh(_l) #define rw_init_flags(_l, s, v) #define mtx_assert(a, b) #define mtx_destroy(m) #define mtx_init(m, a,b,c) spin_lock_init(m) #define mtx_lock(_l) spin_lock_bh(_l) #define mtx_unlock(_l) spin_unlock_bh(_l) /* end of locking support */ /* in netinet/in.h */ #define in_nullhost(x) ((x).s_addr == INADDR_ANY) /* bzero not present on linux, but this should go in glue.h */ #define bzero(s, n) memset(s, 0, n) #define bcmp(p1, p2, n) memcmp(p1, p2, n) /* ethernet stuff */ #define ETHERTYPE_IP 0x0800 /* IP protocol */ #define ETHER_ADDR_LEN 6 /* length of an Ethernet address */ struct ether_header { u_char ether_dhost[ETHER_ADDR_LEN]; u_char ether_shost[ETHER_ADDR_LEN]; u_short ether_type; }; #define ETHER_ADDR_LEN 6 /* length of an Ethernet address */ #define ETHER_TYPE_LEN 2 /* length of the Ethernet type field */ #define ETHER_HDR_LEN (ETHER_ADDR_LEN*2+ETHER_TYPE_LEN) /* ip_dummynet.c */ #define __FreeBSD_version 500035 #ifdef __linux__ struct moduledata; int my_mod_register(struct moduledata *mod, const char *name, int order); /* define some macro for ip_dummynet */ struct malloc_type { }; #define MALLOC_DEFINE(type, shortdesc, longdesc) \ struct malloc_type type[1]; void *md_dummy_ ## type = type #define CTASSERT(x) #define log(_level, fmt, arg...) printk(KERN_ERR fmt, ##arg) /* * gettimeofday would be in sys/time.h but it is not * visible if _KERNEL is defined */ int gettimeofday(struct timeval *, struct timezone *); #else /* _WIN32 */ #define MALLOC_DEFINE(a,b,c) #endif /* _WIN32 */ extern int hz; extern long tick; /* exists in 2.4 but not in 2.6 */ extern int bootverbose; extern time_t time_uptime; extern struct timeval boottime; extern int max_linkhdr; extern int ip_defttl; extern u_long in_ifaddrhmask; /* mask for hash table */ extern struct in_ifaddrhashhead *in_ifaddrhashtbl; /* inet addr hash table */ /*-------------------------------------------------*/ /* define, includes and functions missing in linux */ /* include and define */ #include /* inet_ntoa */ struct mbuf; /* used by ip_dummynet.c */ void reinject_drop(struct mbuf* m); #include /* error define */ #include /* IFNAMESIZ */ /* * some network structure can be defined in the bsd way * by using the _FAVOR_BSD definition. This is not true * for icmp structure. * XXX struct icmp contains bsd names in * /usr/include/netinet/ip_icmp.h */ #ifdef __linux__ #define icmp_code code #define icmp_type type /* linux in6_addr has no member __u6_addr * replace the whole structure ? */ #define __u6_addr in6_u #define __u6_addr32 u6_addr32 #endif /* __linux__ */ /* defined in linux/sctp.h with no bsd definition */ struct sctphdr { uint16_t src_port; /* source port */ uint16_t dest_port; /* destination port */ uint32_t v_tag; /* verification tag of packet */ uint32_t checksum; /* Adler32 C-Sum */ /* chunks follow... */ }; /* missing definition */ #define TH_FIN 0x01 #define TH_SYN 0x02 #define TH_RST 0x04 #define TH_ACK 0x10 #define RTF_CLONING 0x100 /* generate new routes on use */ #define IPPROTO_OSPFIGP 89 /* OSPFIGP */ #define IPPROTO_CARP 112 /* CARP */ #ifndef _WIN32 #define IPPROTO_IPV4 IPPROTO_IPIP /* for compatibility */ #endif #define CARP_VERSION 2 #define CARP_ADVERTISEMENT 0x01 #define PRIV_NETINET_IPFW 491 /* Administer IPFW firewall. */ #define IP_FORWARDING 0x1 /* most of ip header exists */ #define NETISR_IP 2 /* same as AF_INET */ #define PRIV_NETINET_DUMMYNET 494 /* Administer DUMMYNET. */ extern int securelevel; struct carp_header { #if BYTE_ORDER == LITTLE_ENDIAN u_int8_t carp_type:4, carp_version:4; #endif #if BYTE_ORDER == BIG_ENDIAN u_int8_t carp_version:4, carp_type:4; #endif }; struct pim { int dummy; /* windows compiler does not like empty definition */ }; struct route { struct rtentry *ro_rt; struct sockaddr ro_dst; }; struct ifaltq { void *ifq_head; }; /* * ifnet->if_snd is used in ip_dummynet.c to take the transmission * clock. */ #if defined( __linux__) #define if_xname name #define if_snd XXX #elif defined( _WIN32 ) /* used in ip_dummynet.c */ struct ifnet { char if_xname[IFNAMSIZ]; /* external name (name + unit) */ // struct ifaltq if_snd; /* output queue (includes altq) */ }; struct net_device { char if_xname[IFNAMSIZ]; /* external name (name + unit) */ }; #endif /* involves mbufs */ int in_cksum(struct mbuf *m, int len); #define divert_cookie(mtag) 0 #define divert_info(mtag) 0 #define INADDR_TO_IFP(a, b) b = NULL #define pf_find_mtag(a) NULL #define pf_get_mtag(a) NULL #ifndef _WIN32 #define AF_LINK AF_ASH /* ? our sys/socket.h */ #endif struct pf_mtag { void *hdr; /* saved hdr pos in mbuf, for ECN */ sa_family_t af; /* for ECN */ u_int32_t qid; /* queue id */ }; #if 0 // ndef radix /* radix stuff in radix.h and radix.c */ struct radix_node { caddr_t rn_key; /* object of search */ caddr_t rn_mask; /* netmask, if present */ }; #endif /* !radix */ /* missing kernel functions */ char *inet_ntoa(struct in_addr ina); int random(void); /* * Return the risult of a/b * * this is used in linux kernel space, * since the 64bit division needs to * be done using a macro */ int64_t div64(int64_t a, int64_t b); char * inet_ntoa_r(struct in_addr ina, char *buf); /* from bsd sys/queue.h */ #define TAILQ_FOREACH_SAFE(var, head, field, tvar) \ for ((var) = TAILQ_FIRST((head)); \ (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \ (var) = (tvar)) #define SLIST_FOREACH_SAFE(var, head, field, tvar) \ for ((var) = SLIST_FIRST((head)); \ (var) && ((tvar) = SLIST_NEXT((var), field), 1); \ (var) = (tvar)) /* depending of linux version */ #ifndef ETHERTYPE_IPV6 #define ETHERTYPE_IPV6 0x86dd /* IP protocol version 6 */ #endif /*-------------------------------------------------*/ #define RT_NUMFIBS 1 extern u_int rt_numfibs; /* involves kernel locking function */ #ifdef RTFREE #undef RTFREE #define RTFREE(a) fprintf(stderr, "RTFREE: commented out locks\n"); #endif void getmicrouptime(struct timeval *tv); /* from sys/netinet/ip_output.c */ struct ip_moptions; struct route; struct ip; struct mbuf *ip_reass(struct mbuf *); u_short in_cksum_hdr(struct ip *); int ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, struct ip_moptions *imo, struct inpcb *inp); /* from net/netisr.c */ void netisr_dispatch(int num, struct mbuf *m); /* definition moved in missing.c */ int sooptcopyout(struct sockopt *sopt, const void *buf, size_t len); int sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen); /* defined in session.c */ int priv_check(struct thread *td, int priv); /* struct ucred is in linux/socket.h and has pid, uid, gid. * We need a 'bsd_ucred' to store also the extra info */ struct bsd_ucred { uid_t uid; gid_t gid; uint32_t xid; uint32_t nid; }; int securelevel_ge(struct ucred *cr, int level); struct sysctl_oid; struct sysctl_req; /* * sysctl are mapped into /sys/module/ipfw_mod parameters */ #define CTLFLAG_RD 1 #define CTLFLAG_RDTUN 1 #define CTLFLAG_RW 2 #define CTLFLAG_SECURE3 0 // unsupported #define CTLFLAG_VNET 0 /* unsupported */ #ifdef _WIN32 #define module_param_named(_name, _var, _ty, _perm) #else /* Linux 2.4 is mostly for openwrt */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) #include /* generic_ffs() used in ip_fw2.c */ typedef uint32_t __be32; typedef uint16_t __be16; struct sock; struct net; struct inet_hashinfo; struct sock *inet_lookup( struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, const int dif); static int inet_iif(const struct sk_buff *skb); struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif); #endif /* Linux < 2.6 */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) #define module_param_named(_name, _var, _ty, _perm) \ //module_param(_name, _ty, 0644) #endif #endif /* __linux__ */ #define SYSCTL_DECL(_1) #define SYSCTL_OID(_1, _2, _3, _4, _5, _6, _7, _8) #define SYSCTL_NODE(_1, _2, _3, _4, _5, _6) #define _SYSCTL_BASE(_name, _var, _ty, _perm) \ module_param_named(_name, *(_var), _ty, \ ( (_perm) == CTLFLAG_RD) ? 0444: 0644 ) #define SYSCTL_PROC(_base, _oid, _name, _mode, _var, _val, _desc, _a, _b) #define SYSCTL_INT(_base, _oid, _name, _mode, _var, _val, _desc) \ _SYSCTL_BASE(_name, _var, int, _mode) #define SYSCTL_LONG(_base, _oid, _name, _mode, _var, _val, _desc) \ _SYSCTL_BASE(_name, _var, long, _mode) #define SYSCTL_ULONG(_base, _oid, _name, _mode, _var, _val, _desc) \ _SYSCTL_BASE(_name, _var, ulong, _mode) #define SYSCTL_UINT(_base, _oid, _name, _mode, _var, _val, _desc) \ _SYSCTL_BASE(_name, _var, uint, _mode) #define SYSCTL_HANDLER_ARGS \ struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req int sysctl_handle_int(SYSCTL_HANDLER_ARGS); int sysctl_handle_long(SYSCTL_HANDLER_ARGS); #define TUNABLE_INT(_name, _ptr) void ether_demux(struct ifnet *ifp, struct mbuf *m); int ether_output_frame(struct ifnet *ifp, struct mbuf *m); void in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum); void icmp_error(struct mbuf *n, int type, int code, uint32_t dest, int mtu); void rtfree(struct rtentry *rt); u_short in_cksum_skip(struct mbuf *m, int len, int skip); #ifdef INP_LOCK_ASSERT #undef INP_LOCK_ASSERT #define INP_LOCK_ASSERT(a) #endif int jailed(struct ucred *cred); /* * Return 1 if an internet address is for a ``local'' host * (one to which we have a connection). If subnetsarelocal * is true, this includes other subnets of the local net. * Otherwise, it includes only the directly-connected (sub)nets. */ int in_localaddr(struct in_addr in); /* the prototype is already in the headers */ //int ipfw_chg_hook(SYSCTL_HANDLER_ARGS); int fnmatch(const char *pattern, const char *string, int flags); int linux_lookup(const int proto, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, struct sk_buff *skb, int dir, struct bsd_ucred *u); /* vnet wrappers, in vnet.h and ip_var.h */ int ipfw_init(void); void ipfw_destroy(void); struct ip_fw_args; extern int (*ip_dn_io_ptr)(struct mbuf **m, int dir, struct ip_fw_args *fwa); #define curvnet NULL #define CURVNET_SET(_v) #define CURVNET_RESTORE() #define VNET_ASSERT(condition) #define VNET_NAME(n) n #define VNET_DECLARE(t, n) extern t n #define VNET_DEFINE(t, n) t n #define _VNET_PTR(b, n) &VNET_NAME(n) /* * Virtualized global variable accessor macros. */ #define VNET_VNET_PTR(vnet, n) (&(n)) #define VNET_VNET(vnet, n) (n) #define VNET_PTR(n) (&(n)) #define VNET(n) (n) extern int (*ip_dn_ctl_ptr)(struct sockopt *); typedef int ip_fw_ctl_t(struct sockopt *); extern ip_fw_ctl_t *ip_fw_ctl_ptr; /* For kernel ipfw_ether and ipfw_bridge. */ struct ip_fw_args; typedef int ip_fw_chk_t(struct ip_fw_args *args); extern ip_fw_chk_t *ip_fw_chk_ptr; #define V_ip_fw_chk_ptr VNET(ip_fw_chk_ptr) #define V_ip_fw_ctl_ptr VNET(ip_fw_ctl_ptr) #define V_tcbinfo VNET(tcbinfo) #define V_udbinfo VNET(udbinfo) #define SYSCTL_VNET_PROC SYSCTL_PROC #define SYSCTL_VNET_INT SYSCTL_INT #endif /* !_MISSING_H_ */ ipfw_mod/dummynet/ip_fw2.c000644 000423 000000 00000405574 11311261630 016325 0ustar00luigiwheel000000 000000 /*- * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD: src/sys/netinet/ip_fw2.c,v 1.175.2.13 2008/10/30 16:29:04 bz Exp $"); #define DEB(x) #define DDB(x) x /* * Implement IP packet firewall (new version) */ #if !defined(KLD_MODULE) #include "opt_ipfw.h" #include "opt_ipdivert.h" #include "opt_ipdn.h" #include "opt_inet.h" #ifndef INET #error IPFIREWALL requires INET. #endif /* INET */ #endif #include "opt_inet6.h" #include "opt_ipsec.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for ETHERTYPE_IP */ #include #include #include #include #include #ifdef linux #define INP_LOCK_ASSERT /* define before missing.h otherwise ? */ #include "missing.h" #endif #define IPFW_INTERNAL /* Access to protected data structures in ip_fw.h. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #endif #include /* XXX for in_cksum */ #ifdef IPFW_HASHTABLES #include "hashtable.h" #endif #ifdef MAC #include #endif static VNET_DEFINE(int, ipfw_vnet_ready) = 0; #define V_ipfw_vnet_ready VNET(ipfw_vnet_ready) /* * set_disable contains one bit per set value (0..31). * If the bit is set, all rules with the corresponding set * are disabled. Set RESVD_SET(31) is reserved for the default rule * and rules that are not deleted by the flush command, * and CANNOT be disabled. * Rules in set RESVD_SET can only be deleted explicitly. */ static VNET_DEFINE(u_int32_t, set_disable); static VNET_DEFINE(int, fw_verbose); static VNET_DEFINE(struct callout, ipfw_timeout); static VNET_DEFINE(int, verbose_limit); #define V_set_disable VNET(set_disable) #define V_fw_verbose VNET(fw_verbose) #define V_ipfw_timeout VNET(ipfw_timeout) #define V_verbose_limit VNET(verbose_limit) #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT static int default_to_accept = 1; #else static int default_to_accept; #endif static uma_zone_t ipfw_dyn_rule_zone; /* * list of rules for layer 3 */ VNET_DEFINE(struct ip_fw_chain, layer3_chain); MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's"); MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables"); #define IPFW_NAT_LOADED (ipfw_nat_ptr != NULL) ipfw_nat_t *ipfw_nat_ptr = NULL; ipfw_nat_cfg_t *ipfw_nat_cfg_ptr; ipfw_nat_cfg_t *ipfw_nat_del_ptr; ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr; ipfw_nat_cfg_t *ipfw_nat_get_log_ptr; struct table_entry { struct radix_node rn[2]; struct sockaddr_in addr, mask; u_int32_t value; }; static VNET_DEFINE(int, autoinc_step); #define V_autoinc_step VNET(autoinc_step) static VNET_DEFINE(int, fw_deny_unknown_exthdrs); #define V_fw_deny_unknown_exthdrs VNET(fw_deny_unknown_exthdrs) extern int ipfw_chg_hook(SYSCTL_HANDLER_ARGS); #ifdef SYSCTL_NODE SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); SYSCTL_VNET_PROC(_net_inet_ip_fw, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_enable), 0, ipfw_chg_hook, "I", "Enable ipfw"); SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLFLAG_RW, &VNET_NAME(autoinc_step), 0, "Rule number auto-increment step"); SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, one_pass, CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0, "Only do a single pass through ipfw when using dummynet(4)"); SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose, CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_verbose), 0, "Log matches to ipfw rules"); SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW, &VNET_NAME(verbose_limit), 0, "Set upper limit of matches of ipfw rules logged"); static unsigned int dummy_default_rule = IPFW_DEFAULT_RULE; SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD, &dummy_default_rule, IPFW_DEFAULT_RULE, "The default/max possible rule number."); static unsigned int dummy_tables_max = IPFW_TABLES_MAX; SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, tables_max, CTLFLAG_RD, &dummy_tables_max, IPFW_TABLES_MAX, "The maximum number of tables."); static unsigned int skipto_entries = 256; SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, skipto_entries, CTLFLAG_RW, &skipto_entries, 0, "Number of entries in the skipto cache"); SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN, &default_to_accept, 0, "Make the default rule accept all packets."); TUNABLE_INT("net.inet.ip.fw.default_to_accept", &default_to_accept); #ifdef INET6 SYSCTL_DECL(_net_inet6_ip6); SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); SYSCTL_VNET_PROC(_net_inet6_ip6_fw, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw6_enable), 0, ipfw_chg_hook, "I", "Enable ipfw+6"); SYSCTL_VNET_INT(_net_inet6_ip6_fw, OID_AUTO, deny_unknown_exthdrs, CTLFLAG_RW | CTLFLAG_SECURE, &VNET_NAME(fw_deny_unknown_exthdrs), 0, "Deny packets with unknown IPv6 Extension Headers"); #endif /* INET6 */ #endif /* SYSCTL_NODE */ /* * Description of dynamic rules. * * Dynamic rules are stored in lists accessed through a hash table * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can * be modified through the sysctl variable dyn_buckets which is * updated when the table becomes empty. * * XXX currently there is only one list, ipfw_dyn. * * When a packet is received, its address fields are first masked * with the mask defined for the rule, then hashed, then matched * against the entries in the corresponding list. * Dynamic rules can be used for different purposes: * + stateful rules; * + enforcing limits on the number of sessions; * + in-kernel NAT (not implemented yet) * * The lifetime of dynamic rules is regulated by dyn_*_lifetime, * measured in seconds and depending on the flags. * * The total number of dynamic rules is stored in dyn_count. * The max number of dynamic rules is dyn_max. When we reach * the maximum number of rules we do not create anymore. This is * done to avoid consuming too much memory, but also too much * time when searching on each packet (ideally, we should try instead * to put a limit on the length of the list on each bucket...). * * Each dynamic rule holds a pointer to the parent ipfw rule so * we know what action to perform. Dynamic rules are removed when * the parent rule is deleted. XXX we should make them survive. * * There are some limitations with dynamic rules -- we do not * obey the 'randomized match', and we do not do multiple * passes through the firewall. XXX check the latter!!! */ static VNET_DEFINE(ipfw_dyn_rule **, ipfw_dyn_v); static VNET_DEFINE(u_int32_t, dyn_buckets); static VNET_DEFINE(u_int32_t, curr_dyn_buckets); #define V_ipfw_dyn_v VNET(ipfw_dyn_v) #define V_dyn_buckets VNET(dyn_buckets) #define V_curr_dyn_buckets VNET(curr_dyn_buckets) #if defined( __linux__ ) || defined( _WIN32 ) DEFINE_SPINLOCK(ipfw_dyn_mtx); #else static struct mtx ipfw_dyn_mtx; /* mutex guarding dynamic rules */ #endif /* !__linux__ */ #define IPFW_DYN_LOCK_INIT() \ mtx_init(&ipfw_dyn_mtx, "IPFW dynamic rules", NULL, MTX_DEF) #define IPFW_DYN_LOCK_DESTROY() mtx_destroy(&ipfw_dyn_mtx) #define IPFW_DYN_LOCK() mtx_lock(&ipfw_dyn_mtx) #define IPFW_DYN_UNLOCK() mtx_unlock(&ipfw_dyn_mtx) #define IPFW_DYN_LOCK_ASSERT() mtx_assert(&ipfw_dyn_mtx, MA_OWNED) static struct mbuf *send_pkt(struct mbuf *, struct ipfw_flow_id *, u_int32_t, u_int32_t, int); /* * Timeouts for various events in handing dynamic rules. */ static VNET_DEFINE(u_int32_t, dyn_ack_lifetime); static VNET_DEFINE(u_int32_t, dyn_syn_lifetime); static VNET_DEFINE(u_int32_t, dyn_fin_lifetime); static VNET_DEFINE(u_int32_t, dyn_rst_lifetime); static VNET_DEFINE(u_int32_t, dyn_udp_lifetime); static VNET_DEFINE(u_int32_t, dyn_short_lifetime); #define V_dyn_ack_lifetime VNET(dyn_ack_lifetime) #define V_dyn_syn_lifetime VNET(dyn_syn_lifetime) #define V_dyn_fin_lifetime VNET(dyn_fin_lifetime) #define V_dyn_rst_lifetime VNET(dyn_rst_lifetime) #define V_dyn_udp_lifetime VNET(dyn_udp_lifetime) #define V_dyn_short_lifetime VNET(dyn_short_lifetime) /* * Keepalives are sent if dyn_keepalive is set. They are sent every * dyn_keepalive_period seconds, in the last dyn_keepalive_interval * seconds of lifetime of a rule. * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower * than dyn_keepalive_period. */ static VNET_DEFINE(u_int32_t, dyn_keepalive_interval); static VNET_DEFINE(u_int32_t, dyn_keepalive_period); static VNET_DEFINE(u_int32_t, dyn_keepalive); #define V_dyn_keepalive_interval VNET(dyn_keepalive_interval) #define V_dyn_keepalive_period VNET(dyn_keepalive_period) #define V_dyn_keepalive VNET(dyn_keepalive) static VNET_DEFINE(u_int32_t, static_count); /* # of static rules */ static VNET_DEFINE(u_int32_t, static_len); /* bytes of static rules */ static VNET_DEFINE(u_int32_t, dyn_count); /* # of dynamic rules */ static VNET_DEFINE(u_int32_t, dyn_max); /* max # of dynamic rules */ #define V_static_count VNET(static_count) #define V_static_len VNET(static_len) #define V_dyn_count VNET(dyn_count) #define V_dyn_max VNET(dyn_max) #ifdef SYSCTL_NODE SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_buckets, CTLFLAG_RW, &VNET_NAME(dyn_buckets), 0, "Number of dyn. buckets"); SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, CTLFLAG_RD, &VNET_NAME(curr_dyn_buckets), 0, "Current Number of dyn. buckets"); SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_count, CTLFLAG_RD, &VNET_NAME(dyn_count), 0, "Number of dyn. rules"); SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_max, CTLFLAG_RW, &VNET_NAME(dyn_max), 0, "Max number of dyn. rules"); SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_RD, &VNET_NAME(static_count), 0, "Number of static rules"); SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_RW, &VNET_NAME(dyn_ack_lifetime), 0, "Lifetime of dyn. rules for acks"); SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_RW, &VNET_NAME(dyn_syn_lifetime), 0, "Lifetime of dyn. rules for syn"); SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, CTLFLAG_RW, &VNET_NAME(dyn_fin_lifetime), 0, "Lifetime of dyn. rules for fin"); SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, CTLFLAG_RW, &VNET_NAME(dyn_rst_lifetime), 0, "Lifetime of dyn. rules for rst"); SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_RW, &VNET_NAME(dyn_udp_lifetime), 0, "Lifetime of dyn. rules for UDP"); SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW, &VNET_NAME(dyn_short_lifetime), 0, "Lifetime of dyn. rules for other situations"); SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW, &VNET_NAME(dyn_keepalive), 0, "Enable keepalives for dyn. rules"); #endif /* SYSCTL_NODE */ /* * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T * Other macros just cast void * into the appropriate type */ #define L3HDR(T, ip) ((T *)((u_int32_t *)(ip) + (ip)->ip_hl)) #define TCP(p) ((struct tcphdr *)(p)) #define SCTP(p) ((struct sctphdr *)(p)) #define UDP(p) ((struct udphdr *)(p)) #define ICMP(p) ((struct icmphdr *)(p)) #define ICMP6(p) ((struct icmp6_hdr *)(p)) static __inline int icmptype_match(struct icmphdr *icmp, ipfw_insn_u32 *cmd) { int type = icmp->icmp_type; return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<icmp_type; return (type <= ICMP_MAXTYPE && (TT & (1<arg1 or cmd->d[0]. * * We scan options and store the bits we find set. We succeed if * * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear * * The code is sometimes optimized not to store additional variables. */ static int flags_match(ipfw_insn *cmd, u_int8_t bits) { u_char want_clear; bits = ~bits; if ( ((cmd->arg1 & 0xff) & bits) != 0) return 0; /* some bits we want set were clear */ want_clear = (cmd->arg1 >> 8) & 0xff; if ( (want_clear & bits) != want_clear) return 0; /* some bits we want clear were set */ return 1; } static int ipopts_match(struct ip *ip, ipfw_insn *cmd) { int optlen, bits = 0; u_char *cp = (u_char *)(ip + 1); int x = (ip->ip_hl << 2) - sizeof (struct ip); for (; x > 0; x -= optlen, cp += optlen) { int opt = cp[IPOPT_OPTVAL]; if (opt == IPOPT_EOL) break; if (opt == IPOPT_NOP) optlen = 1; else { optlen = cp[IPOPT_OLEN]; if (optlen <= 0 || optlen > x) return 0; /* invalid or truncated */ } switch (opt) { default: break; case IPOPT_LSRR: bits |= IP_FW_IPOPT_LSRR; break; case IPOPT_SSRR: bits |= IP_FW_IPOPT_SSRR; break; case IPOPT_RR: bits |= IP_FW_IPOPT_RR; break; case IPOPT_TS: bits |= IP_FW_IPOPT_TS; break; } } return (flags_match(cmd, bits)); } static int tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd) { int optlen, bits = 0; u_char *cp = (u_char *)(tcp + 1); int x = (tcp->th_off << 2) - sizeof(struct tcphdr); for (; x > 0; x -= optlen, cp += optlen) { int opt = cp[0]; if (opt == TCPOPT_EOL) break; if (opt == TCPOPT_NOP) optlen = 1; else { optlen = cp[1]; if (optlen <= 0) break; } switch (opt) { default: break; case TCPOPT_MAXSEG: bits |= IP_FW_TCPOPT_MSS; break; case TCPOPT_WINDOW: bits |= IP_FW_TCPOPT_WINDOW; break; case TCPOPT_SACK_PERMITTED: case TCPOPT_SACK: bits |= IP_FW_TCPOPT_SACK; break; case TCPOPT_TIMESTAMP: bits |= IP_FW_TCPOPT_TS; break; } } return (flags_match(cmd, bits)); } static int iface_match(struct ifnet *ifp, ipfw_insn_if *cmd) { if (ifp == NULL) /* no iface with this packet, match fails */ return 0; /* Check by name or by IP address */ if (cmd->name[0] != '\0') { /* match by name */ /* Check name */ if (cmd->p.glob) { if (fnmatch(cmd->name, ifp->if_xname, 0) == 0) return(1); } else { if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0) return(1); } } else { #if !defined( __linux__ ) && !defined( _WIN32 ) struct ifaddr *ia; if_addr_rlock(ifp); TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) { if (ia->ifa_addr->sa_family != AF_INET) continue; if (cmd->p.ip.s_addr == ((struct sockaddr_in *) (ia->ifa_addr))->sin_addr.s_addr) { if_addr_runlock(ifp); return(1); /* match */ } } if_addr_runlock(ifp); #endif } return(0); /* no match, fail ... */ } #if !defined( __linux__ ) && !defined( _WIN32 ) /* * The verify_path function checks if a route to the src exists and * if it is reachable via ifp (when provided). * * The 'verrevpath' option checks that the interface that an IP packet * arrives on is the same interface that traffic destined for the * packet's source address would be routed out of. The 'versrcreach' * option just checks that the source address is reachable via any route * (except default) in the routing table. These two are a measure to block * forged packets. This is also commonly known as "anti-spoofing" or Unicast * Reverse Path Forwarding (Unicast RFP) in Cisco-ese. The name of the knobs * is purposely reminiscent of the Cisco IOS command, * * ip verify unicast reverse-path * ip verify unicast source reachable-via any * * which implements the same functionality. But note that syntax is * misleading. The check may be performed on all IP packets whether unicast, * multicast, or broadcast. */ static int verify_path(struct in_addr src, struct ifnet *ifp, u_int fib) { struct route ro; struct sockaddr_in *dst; bzero(&ro, sizeof(ro)); dst = (struct sockaddr_in *)&(ro.ro_dst); dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = src; in_rtalloc_ign(&ro, 0, fib); if (ro.ro_rt == NULL) return 0; /* * If ifp is provided, check for equality with rtentry. * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp, * in order to pass packets injected back by if_simloop(): * if useloopback == 1 routing entry (via lo0) for our own address * may exist, so we need to handle routing assymetry. */ if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) { RTFREE(ro.ro_rt); return 0; } /* if no ifp provided, check if rtentry is not default route */ if (ifp == NULL && satosin(rt_key(ro.ro_rt))->sin_addr.s_addr == INADDR_ANY) { RTFREE(ro.ro_rt); return 0; } /* or if this is a blackhole/reject route */ if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { RTFREE(ro.ro_rt); return 0; } /* found valid route */ RTFREE(ro.ro_rt); return 1; } #endif #ifdef INET6 /* * ipv6 specific rules here... */ static __inline int icmp6type_match (int type, ipfw_insn_u32 *cmd) { return (type <= ICMP6_MAXTYPE && (cmd->d[type/32] & (1<<(type%32)) ) ); } static int flow6id_match( int curr_flow, ipfw_insn_u32 *cmd ) { int i; for (i=0; i <= cmd->o.arg1; ++i ) if (curr_flow == cmd->d[i] ) return 1; return 0; } /* support for IP6_*_ME opcodes */ static int search_ip6_addr_net (struct in6_addr * ip6_addr) { struct ifnet *mdc; struct ifaddr *mdc2; struct in6_ifaddr *fdm; struct in6_addr copia; TAILQ_FOREACH(mdc, &V_ifnet, if_link) { if_addr_rlock(mdc); TAILQ_FOREACH(mdc2, &mdc->if_addrhead, ifa_link) { if (mdc2->ifa_addr->sa_family == AF_INET6) { fdm = (struct in6_ifaddr *)mdc2; copia = fdm->ia_addr.sin6_addr; /* need for leaving scope_id in the sock_addr */ in6_clearscope(&copia); if (IN6_ARE_ADDR_EQUAL(ip6_addr, &copia)) { if_addr_runlock(mdc); return 1; } } } if_addr_runlock(mdc); } return 0; } static int verify_path6(struct in6_addr *src, struct ifnet *ifp) { struct route_in6 ro; struct sockaddr_in6 *dst; bzero(&ro, sizeof(ro)); dst = (struct sockaddr_in6 * )&(ro.ro_dst); dst->sin6_family = AF_INET6; dst->sin6_len = sizeof(*dst); dst->sin6_addr = *src; /* XXX MRT 0 for ipv6 at this time */ rtalloc_ign((struct route *)&ro, 0); if (ro.ro_rt == NULL) return 0; /* * if ifp is provided, check for equality with rtentry * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp, * to support the case of sending packets to an address of our own. * (where the former interface is the first argument of if_simloop() * (=ifp), the latter is lo0) */ if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) { RTFREE(ro.ro_rt); return 0; } /* if no ifp provided, check if rtentry is not default route */ if (ifp == NULL && IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(ro.ro_rt))->sin6_addr)) { RTFREE(ro.ro_rt); return 0; } /* or if this is a blackhole/reject route */ if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { RTFREE(ro.ro_rt); return 0; } /* found valid route */ RTFREE(ro.ro_rt); return 1; } static __inline int hash_packet6(struct ipfw_flow_id *id) { u_int32_t i; i = (id->dst_ip6.__u6_addr.__u6_addr32[2]) ^ (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^ (id->src_ip6.__u6_addr.__u6_addr32[2]) ^ (id->src_ip6.__u6_addr.__u6_addr32[3]) ^ (id->dst_port) ^ (id->src_port); return i; } static int is_icmp6_query(int icmp6_type) { if ((icmp6_type <= ICMP6_MAXTYPE) && (icmp6_type == ICMP6_ECHO_REQUEST || icmp6_type == ICMP6_MEMBERSHIP_QUERY || icmp6_type == ICMP6_WRUREQUEST || icmp6_type == ICMP6_FQDN_QUERY || icmp6_type == ICMP6_NI_QUERY)) return (1); return (0); } static void send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6) { struct mbuf *m; m = args->m; if (code == ICMP6_UNREACH_RST && args->f_id.proto == IPPROTO_TCP) { struct tcphdr *tcp; tcp = (struct tcphdr *)((char *)ip6 + hlen); if ((tcp->th_flags & TH_RST) == 0) { struct mbuf *m0; m0 = send_pkt(args->m, &(args->f_id), ntohl(tcp->th_seq), ntohl(tcp->th_ack), tcp->th_flags | TH_RST); if (m0 != NULL) ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL); } m_freem(m); } else if (code != ICMP6_UNREACH_RST) { /* Send an ICMPv6 unreach. */ #if 0 /* * Unlike above, the mbufs need to line up with the ip6 hdr, * as the contents are read. We need to m_adj() the * needed amount. * The mbuf will however be thrown away so we can adjust it. * Remember we did an m_pullup on it already so we * can make some assumptions about contiguousness. */ if (args->L3offset) m_adj(m, args->L3offset); #endif icmp6_error(m, ICMP6_DST_UNREACH, code, 0); } else m_freem(m); args->m = NULL; } #endif /* INET6 */ /* counter for ipfw_log(NULL...) */ static VNET_DEFINE(u_int64_t, norule_counter); #define V_norule_counter VNET(norule_counter) #define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0 #define SNP(buf) buf, sizeof(buf) /* * We enter here when we have a rule with O_LOG. * XXX this function alone takes about 2Kbytes of code! */ static void ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args, struct mbuf *m, struct ifnet *oif, u_short offset, uint32_t tablearg, struct ip *ip) { struct ether_header *eh = args->eh; char *action; int limit_reached = 0; char action2[40], proto[128], fragment[32]; fragment[0] = '\0'; proto[0] = '\0'; if (f == NULL) { /* bogus pkt */ if (V_verbose_limit != 0 && V_norule_counter >= V_verbose_limit) return; V_norule_counter++; if (V_norule_counter == V_verbose_limit) limit_reached = V_verbose_limit; action = "Refuse"; } else { /* O_LOG is the first action, find the real one */ ipfw_insn *cmd = ACTION_PTR(f); ipfw_insn_log *l = (ipfw_insn_log *)cmd; if (l->max_log != 0 && l->log_left == 0) return; l->log_left--; if (l->log_left == 0) limit_reached = l->max_log; cmd += F_LEN(cmd); /* point to first action */ if (cmd->opcode == O_ALTQ) { ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd; snprintf(SNPARGS(action2, 0), "Altq %d", altq->qid); cmd += F_LEN(cmd); } if (cmd->opcode == O_PROB) cmd += F_LEN(cmd); if (cmd->opcode == O_TAG) cmd += F_LEN(cmd); action = action2; switch (cmd->opcode) { case O_DENY: action = "Deny"; break; case O_REJECT: if (cmd->arg1==ICMP_REJECT_RST) action = "Reset"; else if (cmd->arg1==ICMP_UNREACH_HOST) action = "Reject"; else snprintf(SNPARGS(action2, 0), "Unreach %d", cmd->arg1); break; case O_UNREACH6: if (cmd->arg1==ICMP6_UNREACH_RST) action = "Reset"; else snprintf(SNPARGS(action2, 0), "Unreach %d", cmd->arg1); break; case O_ACCEPT: action = "Accept"; break; case O_COUNT: action = "Count"; break; case O_DIVERT: snprintf(SNPARGS(action2, 0), "Divert %d", cmd->arg1); break; case O_TEE: snprintf(SNPARGS(action2, 0), "Tee %d", cmd->arg1); break; case O_SETFIB: snprintf(SNPARGS(action2, 0), "SetFib %d", cmd->arg1); break; case O_SKIPTO: snprintf(SNPARGS(action2, 0), "SkipTo %d", cmd->arg1); break; case O_PIPE: snprintf(SNPARGS(action2, 0), "Pipe %d", cmd->arg1); break; case O_QUEUE: snprintf(SNPARGS(action2, 0), "Queue %d", cmd->arg1); break; case O_FORWARD_IP: { ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd; int len; struct in_addr dummyaddr; if (sa->sa.sin_addr.s_addr == INADDR_ANY) dummyaddr.s_addr = htonl(tablearg); else dummyaddr.s_addr = sa->sa.sin_addr.s_addr; len = snprintf(SNPARGS(action2, 0), "Forward to %s", inet_ntoa(dummyaddr)); if (sa->sa.sin_port) snprintf(SNPARGS(action2, len), ":%d", sa->sa.sin_port); } break; case O_NETGRAPH: snprintf(SNPARGS(action2, 0), "Netgraph %d", cmd->arg1); break; case O_NGTEE: snprintf(SNPARGS(action2, 0), "Ngtee %d", cmd->arg1); break; case O_NAT: action = "Nat"; break; case O_REASS: action = "Reass"; break; default: action = "UNKNOWN"; break; } } if (hlen == 0) { /* non-ip */ snprintf(SNPARGS(proto, 0), "MAC"); } else { int len; #ifdef INET6 char src[INET6_ADDRSTRLEN + 2], dst[INET6_ADDRSTRLEN + 2]; #else char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; #endif struct icmphdr *icmp; struct tcphdr *tcp; struct udphdr *udp; #ifdef INET6 struct ip6_hdr *ip6 = NULL; struct icmp6_hdr *icmp6; #endif src[0] = '\0'; dst[0] = '\0'; #ifdef INET6 if (IS_IP6_FLOW_ID(&(args->f_id))) { char ip6buf[INET6_ADDRSTRLEN]; snprintf(src, sizeof(src), "[%s]", ip6_sprintf(ip6buf, &args->f_id.src_ip6)); snprintf(dst, sizeof(dst), "[%s]", ip6_sprintf(ip6buf, &args->f_id.dst_ip6)); ip6 = (struct ip6_hdr *)ip; tcp = (struct tcphdr *)(((char *)ip) + hlen); udp = (struct udphdr *)(((char *)ip) + hlen); } else #endif { tcp = L3HDR(struct tcphdr, ip); udp = L3HDR(struct udphdr, ip); inet_ntoa_r(ip->ip_src, src); inet_ntoa_r(ip->ip_dst, dst); } switch (args->f_id.proto) { case IPPROTO_TCP: len = snprintf(SNPARGS(proto, 0), "TCP %s", src); if (offset == 0) snprintf(SNPARGS(proto, len), ":%d %s:%d", ntohs(tcp->th_sport), dst, ntohs(tcp->th_dport)); else snprintf(SNPARGS(proto, len), " %s", dst); break; case IPPROTO_UDP: len = snprintf(SNPARGS(proto, 0), "UDP %s", src); if (offset == 0) snprintf(SNPARGS(proto, len), ":%d %s:%d", ntohs(udp->uh_sport), dst, ntohs(udp->uh_dport)); else snprintf(SNPARGS(proto, len), " %s", dst); break; case IPPROTO_ICMP: icmp = L3HDR(struct icmphdr, ip); if (offset == 0) len = snprintf(SNPARGS(proto, 0), "ICMP:%u.%u ", icmp->icmp_type, icmp->icmp_code); else len = snprintf(SNPARGS(proto, 0), "ICMP "); len += snprintf(SNPARGS(proto, len), "%s", src); snprintf(SNPARGS(proto, len), " %s", dst); break; #ifdef INET6 case IPPROTO_ICMPV6: icmp6 = (struct icmp6_hdr *)(((char *)ip) + hlen); if (offset == 0) len = snprintf(SNPARGS(proto, 0), "ICMPv6:%u.%u ", icmp6->icmp6_type, icmp6->icmp6_code); else len = snprintf(SNPARGS(proto, 0), "ICMPv6 "); len += snprintf(SNPARGS(proto, len), "%s", src); snprintf(SNPARGS(proto, len), " %s", dst); break; #endif default: len = snprintf(SNPARGS(proto, 0), "P:%d %s", args->f_id.proto, src); snprintf(SNPARGS(proto, len), " %s", dst); break; } #ifdef INET6 if (IS_IP6_FLOW_ID(&(args->f_id))) { if (offset & (IP6F_OFF_MASK | IP6F_MORE_FRAG)) snprintf(SNPARGS(fragment, 0), " (frag %08x:%d@%d%s)", args->f_id.frag_id6, ntohs(ip6->ip6_plen) - hlen, ntohs(offset & IP6F_OFF_MASK) << 3, (offset & IP6F_MORE_FRAG) ? "+" : ""); } else #endif { int ip_off, ip_len; if (1 || eh != NULL) { /* layer 2 packets are as on the wire */ ip_off = ntohs(ip->ip_off); ip_len = ntohs(ip->ip_len); } else { ip_off = ip->ip_off; ip_len = ip->ip_len; } if (ip_off & (IP_MF | IP_OFFMASK)) snprintf(SNPARGS(fragment, 0), " (frag %d:%d@%d%s)", ntohs(ip->ip_id), ip_len - (ip->ip_hl << 2), offset << 3, (ip_off & IP_MF) ? "+" : ""); } } if (oif || m->m_pkthdr.rcvif) log(LOG_SECURITY | LOG_INFO, "ipfw: %d %s %s %s via %s%s\n", f ? f->rulenum : -1, action, proto, oif ? "out" : "in", oif ? oif->if_xname : m->m_pkthdr.rcvif->if_xname, fragment); else log(LOG_SECURITY | LOG_INFO, "ipfw: %d %s %s [no if info]%s\n", f ? f->rulenum : -1, action, proto, fragment); if (limit_reached) log(LOG_SECURITY | LOG_NOTICE, "ipfw: limit %d reached on entry %d\n", limit_reached, f ? f->rulenum : -1); } /* * IMPORTANT: the hash function for dynamic rules must be commutative * in source and destination (ip,port), because rules are bidirectional * and we want to find both in the same bucket. */ static __inline int hash_packet(struct ipfw_flow_id *id) { u_int32_t i; #ifdef INET6 if (IS_IP6_FLOW_ID(id)) i = hash_packet6(id); else #endif /* INET6 */ i = (id->dst_ip) ^ (id->src_ip) ^ (id->dst_port) ^ (id->src_port); i &= (V_curr_dyn_buckets - 1); return i; } static __inline void unlink_dyn_rule_print(struct ipfw_flow_id *id) { struct in_addr da; #ifdef INET6 char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN]; #else char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; #endif #ifdef INET6 if (IS_IP6_FLOW_ID(id)) { ip6_sprintf(src, &id->src_ip6); ip6_sprintf(dst, &id->dst_ip6); } else #endif { da.s_addr = htonl(id->src_ip); inet_ntoa_r(da, src); da.s_addr = htonl(id->dst_ip); inet_ntoa_r(da, dst); } printf("ipfw: unlink entry %s %d -> %s %d, %d left\n", src, id->src_port, dst, id->dst_port, V_dyn_count - 1); } /** * unlink a dynamic rule from a chain. prev is a pointer to * the previous one, q is a pointer to the rule to delete, * head is a pointer to the head of the queue. * Modifies q and potentially also head. */ #define UNLINK_DYN_RULE(prev, head, q) { \ ipfw_dyn_rule *old_q = q; \ \ /* remove a refcount to the parent */ \ if (q->dyn_type == O_LIMIT) \ q->parent->count--; \ DEB(unlink_dyn_rule_print(&q->id);) \ if (prev != NULL) \ prev->next = q = q->next; \ else \ head = q = q->next; \ V_dyn_count--; \ uma_zfree(ipfw_dyn_rule_zone, old_q); } #define TIME_LEQ(a,b) ((int)((a)-(b)) <= 0) /** * Remove dynamic rules pointing to "rule", or all of them if rule == NULL. * * If keep_me == NULL, rules are deleted even if not expired, * otherwise only expired rules are removed. * * The value of the second parameter is also used to point to identify * a rule we absolutely do not want to remove (e.g. because we are * holding a reference to it -- this is the case with O_LIMIT_PARENT * rules). The pointer is only used for comparison, so any non-null * value will do. */ static void remove_dyn_rule(struct ip_fw *rule, ipfw_dyn_rule *keep_me) { static u_int32_t last_remove = 0; #define FORCE (keep_me == NULL) ipfw_dyn_rule *prev, *q; int i, pass = 0, max_pass = 0; IPFW_DYN_LOCK_ASSERT(); if (V_ipfw_dyn_v == NULL || V_dyn_count == 0) return; /* do not expire more than once per second, it is useless */ if (!FORCE && last_remove == time_uptime) return; last_remove = time_uptime; /* * because O_LIMIT refer to parent rules, during the first pass only * remove child and mark any pending LIMIT_PARENT, and remove * them in a second pass. */ next_pass: for (i = 0 ; i < V_curr_dyn_buckets ; i++) { for (prev=NULL, q = V_ipfw_dyn_v[i] ; q ; ) { /* * Logic can become complex here, so we split tests. */ if (q == keep_me) goto next; if (rule != NULL && rule != q->rule) goto next; /* not the one we are looking for */ if (q->dyn_type == O_LIMIT_PARENT) { /* * handle parent in the second pass, * record we need one. */ max_pass = 1; if (pass == 0) goto next; if (FORCE && q->count != 0 ) { /* XXX should not happen! */ printf("ipfw: OUCH! cannot remove rule," " count %d\n", q->count); } } else { if (!FORCE && !TIME_LEQ( q->expire, time_uptime )) goto next; } if (q->dyn_type != O_LIMIT_PARENT || !q->count) { UNLINK_DYN_RULE(prev, V_ipfw_dyn_v[i], q); continue; } next: prev=q; q=q->next; } } if (pass++ < max_pass) goto next_pass; } /** * lookup a dynamic rule. */ static ipfw_dyn_rule * lookup_dyn_rule_locked(struct ipfw_flow_id *pkt, int *match_direction, struct tcphdr *tcp) { /* * stateful ipfw extensions. * Lookup into dynamic session queue */ #define MATCH_REVERSE 0 #define MATCH_FORWARD 1 #define MATCH_NONE 2 #define MATCH_UNKNOWN 3 int i, dir = MATCH_NONE; ipfw_dyn_rule *prev, *q=NULL; IPFW_DYN_LOCK_ASSERT(); if (V_ipfw_dyn_v == NULL) goto done; /* not found */ i = hash_packet( pkt ); for (prev=NULL, q = V_ipfw_dyn_v[i] ; q != NULL ; ) { if (q->dyn_type == O_LIMIT_PARENT && q->count) goto next; if (TIME_LEQ( q->expire, time_uptime)) { /* expire entry */ UNLINK_DYN_RULE(prev, V_ipfw_dyn_v[i], q); continue; } if (pkt->proto == q->id.proto && q->dyn_type != O_LIMIT_PARENT) { if (IS_IP6_FLOW_ID(pkt)) { if (IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), &(q->id.src_ip6)) && IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), &(q->id.dst_ip6)) && pkt->src_port == q->id.src_port && pkt->dst_port == q->id.dst_port ) { dir = MATCH_FORWARD; break; } if (IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), &(q->id.dst_ip6)) && IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), &(q->id.src_ip6)) && pkt->src_port == q->id.dst_port && pkt->dst_port == q->id.src_port ) { dir = MATCH_REVERSE; break; } } else { if (pkt->src_ip == q->id.src_ip && pkt->dst_ip == q->id.dst_ip && pkt->src_port == q->id.src_port && pkt->dst_port == q->id.dst_port ) { dir = MATCH_FORWARD; break; } if (pkt->src_ip == q->id.dst_ip && pkt->dst_ip == q->id.src_ip && pkt->src_port == q->id.dst_port && pkt->dst_port == q->id.src_port ) { dir = MATCH_REVERSE; break; } } } next: prev = q; q = q->next; } if (q == NULL) goto done; /* q = NULL, not found */ if ( prev != NULL) { /* found and not in front */ prev->next = q->next; q->next = V_ipfw_dyn_v[i]; V_ipfw_dyn_v[i] = q; } if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */ u_char flags = pkt->flags & (TH_FIN|TH_SYN|TH_RST); #define BOTH_SYN (TH_SYN | (TH_SYN << 8)) #define BOTH_FIN (TH_FIN | (TH_FIN << 8)) q->state |= (dir == MATCH_FORWARD ) ? flags : (flags << 8); switch (q->state) { case TH_SYN: /* opening */ q->expire = time_uptime + V_dyn_syn_lifetime; break; case BOTH_SYN: /* move to established */ case BOTH_SYN | TH_FIN : /* one side tries to close */ case BOTH_SYN | (TH_FIN << 8) : if (tcp) { #define _SEQ_GE(a,b) ((int)(a) - (int)(b) >= 0) u_int32_t ack = ntohl(tcp->th_ack); if (dir == MATCH_FORWARD) { if (q->ack_fwd == 0 || _SEQ_GE(ack, q->ack_fwd)) q->ack_fwd = ack; else { /* ignore out-of-sequence */ break; } } else { if (q->ack_rev == 0 || _SEQ_GE(ack, q->ack_rev)) q->ack_rev = ack; else { /* ignore out-of-sequence */ break; } } } q->expire = time_uptime + V_dyn_ack_lifetime; break; case BOTH_SYN | BOTH_FIN: /* both sides closed */ if (V_dyn_fin_lifetime >= V_dyn_keepalive_period) V_dyn_fin_lifetime = V_dyn_keepalive_period - 1; q->expire = time_uptime + V_dyn_fin_lifetime; break; default: #if 0 /* * reset or some invalid combination, but can also * occur if we use keep-state the wrong way. */ if ( (q->state & ((TH_RST << 8)|TH_RST)) == 0) printf("invalid state: 0x%x\n", q->state); #endif if (V_dyn_rst_lifetime >= V_dyn_keepalive_period) V_dyn_rst_lifetime = V_dyn_keepalive_period - 1; q->expire = time_uptime + V_dyn_rst_lifetime; break; } } else if (pkt->proto == IPPROTO_UDP) { q->expire = time_uptime + V_dyn_udp_lifetime; } else { /* other protocols */ q->expire = time_uptime + V_dyn_short_lifetime; } done: if (match_direction) *match_direction = dir; return q; } static ipfw_dyn_rule * lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction, struct tcphdr *tcp) { ipfw_dyn_rule *q; IPFW_DYN_LOCK(); q = lookup_dyn_rule_locked(pkt, match_direction, tcp); if (q == NULL) IPFW_DYN_UNLOCK(); /* NB: return table locked when q is not NULL */ return q; } static void realloc_dynamic_table(void) { IPFW_DYN_LOCK_ASSERT(); /* * Try reallocation, make sure we have a power of 2 and do * not allow more than 64k entries. In case of overflow, * default to 1024. */ if (V_dyn_buckets > 65536) V_dyn_buckets = 1024; if ((V_dyn_buckets & (V_dyn_buckets-1)) != 0) { /* not a power of 2 */ V_dyn_buckets = V_curr_dyn_buckets; /* reset */ return; } V_curr_dyn_buckets = V_dyn_buckets; if (V_ipfw_dyn_v != NULL) free(V_ipfw_dyn_v, M_IPFW); for (;;) { V_ipfw_dyn_v = malloc(V_curr_dyn_buckets * sizeof(ipfw_dyn_rule *), M_IPFW, M_NOWAIT | M_ZERO); if (V_ipfw_dyn_v != NULL || V_curr_dyn_buckets <= 2) break; V_curr_dyn_buckets /= 2; } } /** * Install state of type 'type' for a dynamic session. * The hash table contains two type of rules: * - regular rules (O_KEEP_STATE) * - rules for sessions with limited number of sess per user * (O_LIMIT). When they are created, the parent is * increased by 1, and decreased on delete. In this case, * the third parameter is the parent rule and not the chain. * - "parent" rules for the above (O_LIMIT_PARENT). */ static ipfw_dyn_rule * add_dyn_rule(struct ipfw_flow_id *id, u_int8_t dyn_type, struct ip_fw *rule) { ipfw_dyn_rule *r; int i; IPFW_DYN_LOCK_ASSERT(); if (V_ipfw_dyn_v == NULL || (V_dyn_count == 0 && V_dyn_buckets != V_curr_dyn_buckets)) { realloc_dynamic_table(); if (V_ipfw_dyn_v == NULL) return NULL; /* failed ! */ } i = hash_packet(id); r = uma_zalloc(ipfw_dyn_rule_zone, M_NOWAIT | M_ZERO); if (r == NULL) { printf ("ipfw: sorry cannot allocate state\n"); return NULL; } /* increase refcount on parent, and set pointer */ if (dyn_type == O_LIMIT) { ipfw_dyn_rule *parent = (ipfw_dyn_rule *)rule; if ( parent->dyn_type != O_LIMIT_PARENT) panic("invalid parent"); parent->count++; r->parent = parent; rule = parent->rule; } r->id = *id; r->expire = time_uptime + V_dyn_syn_lifetime; r->rule = rule; r->dyn_type = dyn_type; r->pcnt = r->bcnt = 0; r->count = 0; r->bucket = i; r->next = V_ipfw_dyn_v[i]; V_ipfw_dyn_v[i] = r; V_dyn_count++; DEB({ struct in_addr da; #ifdef INET6 char src[INET6_ADDRSTRLEN]; char dst[INET6_ADDRSTRLEN]; #else char src[INET_ADDRSTRLEN]; char dst[INET_ADDRSTRLEN]; #endif #ifdef INET6 if (IS_IP6_FLOW_ID(&(r->id))) { ip6_sprintf(src, &r->id.src_ip6); ip6_sprintf(dst, &r->id.dst_ip6); } else #endif { da.s_addr = htonl(r->id.src_ip); inet_ntoa_r(da, src); da.s_addr = htonl(r->id.dst_ip); inet_ntoa_r(da, dst); } printf("ipfw: add dyn entry ty %d %s %d -> %s %d, total %d\n", dyn_type, src, r->id.src_port, dst, r->id.dst_port, V_dyn_count); }) return r; } /** * lookup dynamic parent rule using pkt and rule as search keys. * If the lookup fails, then install one. */ static ipfw_dyn_rule * lookup_dyn_parent(struct ipfw_flow_id *pkt, struct ip_fw *rule) { ipfw_dyn_rule *q; int i; IPFW_DYN_LOCK_ASSERT(); if (V_ipfw_dyn_v) { int is_v6 = IS_IP6_FLOW_ID(pkt); i = hash_packet( pkt ); for (q = V_ipfw_dyn_v[i] ; q != NULL ; q=q->next) if (q->dyn_type == O_LIMIT_PARENT && rule== q->rule && pkt->proto == q->id.proto && pkt->src_port == q->id.src_port && pkt->dst_port == q->id.dst_port && ( (is_v6 && IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), &(q->id.src_ip6)) && IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), &(q->id.dst_ip6))) || (!is_v6 && pkt->src_ip == q->id.src_ip && pkt->dst_ip == q->id.dst_ip) ) ) { q->expire = time_uptime + V_dyn_short_lifetime; DEB(printf("ipfw: lookup_dyn_parent found 0x%p\n",q);) return q; } } return add_dyn_rule(pkt, O_LIMIT_PARENT, rule); } /** * Install dynamic state for rule type cmd->o.opcode * * Returns 1 (failure) if state is not installed because of errors or because * session limitations are enforced. */ static int install_state(struct ip_fw *rule, ipfw_insn_limit *cmd, struct ip_fw_args *args, uint32_t tablearg) { static int last_log; ipfw_dyn_rule *q; struct in_addr da; #ifdef INET6 char src[INET6_ADDRSTRLEN + 2], dst[INET6_ADDRSTRLEN + 2]; #else char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; #endif src[0] = '\0'; dst[0] = '\0'; IPFW_DYN_LOCK(); DEB( #ifdef INET6 if (IS_IP6_FLOW_ID(&(args->f_id))) { ip6_sprintf(src, &args->f_id.src_ip6); ip6_sprintf(dst, &args->f_id.dst_ip6); } else #endif { da.s_addr = htonl(args->f_id.src_ip); inet_ntoa_r(da, src); da.s_addr = htonl(args->f_id.dst_ip); inet_ntoa_r(da, dst); } printf("ipfw: %s: type %d %s %u -> %s %u\n", __func__, cmd->o.opcode, src, args->f_id.src_port, dst, args->f_id.dst_port); src[0] = '\0'; dst[0] = '\0'; ) q = lookup_dyn_rule_locked(&args->f_id, NULL, NULL); if (q != NULL) { /* should never occur */ if (last_log != time_uptime) { last_log = time_uptime; printf("ipfw: %s: entry already present, done\n", __func__); } IPFW_DYN_UNLOCK(); return (0); } if (V_dyn_count >= V_dyn_max) /* Run out of slots, try to remove any expired rule. */ remove_dyn_rule(NULL, (ipfw_dyn_rule *)1); if (V_dyn_count >= V_dyn_max) { if (last_log != time_uptime) { last_log = time_uptime; printf("ipfw: %s: Too many dynamic rules\n", __func__); } IPFW_DYN_UNLOCK(); return (1); /* cannot install, notify caller */ } switch (cmd->o.opcode) { case O_KEEP_STATE: /* bidir rule */ add_dyn_rule(&args->f_id, O_KEEP_STATE, rule); break; case O_LIMIT: { /* limit number of sessions */ struct ipfw_flow_id id; ipfw_dyn_rule *parent; uint32_t conn_limit; uint16_t limit_mask = cmd->limit_mask; conn_limit = (cmd->conn_limit == IP_FW_TABLEARG) ? tablearg : cmd->conn_limit; DEB( if (cmd->conn_limit == IP_FW_TABLEARG) printf("ipfw: %s: O_LIMIT rule, conn_limit: %u " "(tablearg)\n", __func__, conn_limit); else printf("ipfw: %s: O_LIMIT rule, conn_limit: %u\n", __func__, conn_limit); ) id.dst_ip = id.src_ip = id.dst_port = id.src_port = 0; id.proto = args->f_id.proto; id.addr_type = args->f_id.addr_type; id.fib = M_GETFIB(args->m); if (IS_IP6_FLOW_ID (&(args->f_id))) { if (limit_mask & DYN_SRC_ADDR) id.src_ip6 = args->f_id.src_ip6; if (limit_mask & DYN_DST_ADDR) id.dst_ip6 = args->f_id.dst_ip6; } else { if (limit_mask & DYN_SRC_ADDR) id.src_ip = args->f_id.src_ip; if (limit_mask & DYN_DST_ADDR) id.dst_ip = args->f_id.dst_ip; } if (limit_mask & DYN_SRC_PORT) id.src_port = args->f_id.src_port; if (limit_mask & DYN_DST_PORT) id.dst_port = args->f_id.dst_port; if ((parent = lookup_dyn_parent(&id, rule)) == NULL) { printf("ipfw: %s: add parent failed\n", __func__); IPFW_DYN_UNLOCK(); return (1); } if (parent->count >= conn_limit) { /* See if we can remove some expired rule. */ remove_dyn_rule(rule, parent); if (parent->count >= conn_limit) { if (V_fw_verbose && last_log != time_uptime) { last_log = time_uptime; #ifdef INET6 /* * XXX IPv6 flows are not * supported yet. */ if (IS_IP6_FLOW_ID(&(args->f_id))) { char ip6buf[INET6_ADDRSTRLEN]; snprintf(src, sizeof(src), "[%s]", ip6_sprintf(ip6buf, &args->f_id.src_ip6)); snprintf(dst, sizeof(dst), "[%s]", ip6_sprintf(ip6buf, &args->f_id.dst_ip6)); } else #endif { da.s_addr = htonl(args->f_id.src_ip); inet_ntoa_r(da, src); da.s_addr = htonl(args->f_id.dst_ip); inet_ntoa_r(da, dst); } log(LOG_SECURITY | LOG_DEBUG, "ipfw: %d %s %s:%u -> %s:%u, %s\n", parent->rule->rulenum, "drop session", src, (args->f_id.src_port), dst, (args->f_id.dst_port), "too many entries"); } IPFW_DYN_UNLOCK(); return (1); } } add_dyn_rule(&args->f_id, O_LIMIT, (struct ip_fw *)parent); break; } default: printf("ipfw: %s: unknown dynamic rule type %u\n", __func__, cmd->o.opcode); IPFW_DYN_UNLOCK(); return (1); } /* XXX just set lifetime */ lookup_dyn_rule_locked(&args->f_id, NULL, NULL); IPFW_DYN_UNLOCK(); return (0); } /* * Generate a TCP packet, containing either a RST or a keepalive. * When flags & TH_RST, we are sending a RST packet, because of a * "reset" action matched the packet. * Otherwise we are sending a keepalive, and flags & TH_ * The 'replyto' mbuf is the mbuf being replied to, if any, and is required * so that MAC can label the reply appropriately. */ static struct mbuf * send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq, u_int32_t ack, int flags) { #if defined( __linux__ ) || defined( _WIN32 ) return NULL; #else struct mbuf *m; int len, dir; struct ip *h = NULL; /* stupid compiler */ #ifdef INET6 struct ip6_hdr *h6 = NULL; #endif struct tcphdr *th = NULL; MGETHDR(m, M_DONTWAIT, MT_DATA); if (m == NULL) return (NULL); M_SETFIB(m, id->fib); #ifdef MAC if (replyto != NULL) mac_netinet_firewall_reply(replyto, m); else mac_netinet_firewall_send(m); #else (void)replyto; /* don't warn about unused arg */ #endif switch (id->addr_type) { case 4: len = sizeof(struct ip) + sizeof(struct tcphdr); break; #ifdef INET6 case 6: len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); break; #endif default: /* XXX: log me?!? */ m_freem(m); return (NULL); } dir = ((flags & (TH_SYN | TH_RST)) == TH_SYN); m->m_data += max_linkhdr; m->m_flags |= M_SKIP_FIREWALL; m->m_pkthdr.len = m->m_len = len; m->m_pkthdr.rcvif = NULL; bzero(m->m_data, len); switch (id->addr_type) { case 4: h = mtod(m, struct ip *); /* prepare for checksum */ h->ip_p = IPPROTO_TCP; h->ip_len = htons(sizeof(struct tcphdr)); if (dir) { h->ip_src.s_addr = htonl(id->src_ip); h->ip_dst.s_addr = htonl(id->dst_ip); } else { h->ip_src.s_addr = htonl(id->dst_ip); h->ip_dst.s_addr = htonl(id->src_ip); } th = (struct tcphdr *)(h + 1); break; #ifdef INET6 case 6: h6 = mtod(m, struct ip6_hdr *); /* prepare for checksum */ h6->ip6_nxt = IPPROTO_TCP; h6->ip6_plen = htons(sizeof(struct tcphdr)); if (dir) { h6->ip6_src = id->src_ip6; h6->ip6_dst = id->dst_ip6; } else { h6->ip6_src = id->dst_ip6; h6->ip6_dst = id->src_ip6; } th = (struct tcphdr *)(h6 + 1); break; #endif } if (dir) { th->th_sport = htons(id->src_port); th->th_dport = htons(id->dst_port); } else { th->th_sport = htons(id->dst_port); th->th_dport = htons(id->src_port); } th->th_off = sizeof(struct tcphdr) >> 2; if (flags & TH_RST) { if (flags & TH_ACK) { th->th_seq = htonl(ack); // XXX th->th_ack = htonl(0); th->th_flags = TH_RST; } else { if (flags & TH_SYN) seq++; // XXX th->th_seq = htonl(0); th->th_ack = htonl(seq); th->th_flags = TH_RST | TH_ACK; } } else { /* * Keepalive - use caller provided sequence numbers */ th->th_seq = htonl(seq); th->th_ack = htonl(ack); th->th_flags = TH_ACK; } switch (id->addr_type) { case 4: th->th_sum = in_cksum(m, len); /* finish the ip header */ h->ip_v = 4; h->ip_hl = sizeof(*h) >> 2; h->ip_tos = IPTOS_LOWDELAY; h->ip_off = 0; h->ip_len = len; h->ip_ttl = V_ip_defttl; h->ip_sum = 0; break; #ifdef INET6 case 6: th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(*h6), sizeof(struct tcphdr)); /* finish the ip6 header */ h6->ip6_vfc |= IPV6_VERSION; h6->ip6_hlim = IPV6_DEFHLIM; break; #endif } return (m); #endif /* !__linux__ */ } /* * sends a reject message, consuming the mbuf passed as an argument. */ static void send_reject(struct ip_fw_args *args, int code, int ip_len, struct ip *ip) { #if 0 /* XXX When ip is not guaranteed to be at mtod() we will * need to account for this */ * The mbuf will however be thrown away so we can adjust it. * Remember we did an m_pullup on it already so we * can make some assumptions about contiguousness. */ if (args->L3offset) m_adj(m, args->L3offset); #endif if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */ /* We need the IP header in host order for icmp_error(). */ #if !defined( __linux__ ) && !defined( _WIN32 ) if (args->eh != NULL) { ip->ip_len = ntohs(ip->ip_len); ip->ip_off = ntohs(ip->ip_off); } #endif icmp_error(args->m, ICMP_UNREACH, code, 0L, 0); } else if (args->f_id.proto == IPPROTO_TCP) { struct tcphdr *const tcp = L3HDR(struct tcphdr, mtod(args->m, struct ip *)); if ( (tcp->th_flags & TH_RST) == 0) { struct mbuf *m; m = send_pkt(args->m, &(args->f_id), ntohl(tcp->th_seq), ntohl(tcp->th_ack), tcp->th_flags | TH_RST); if (m != NULL) ip_output(m, NULL, NULL, 0, NULL, NULL); } m_freem(args->m); } else m_freem(args->m); args->m = NULL; } static void set_skipto_table(struct ip_fw_chain *ch) { int i, n, sh; struct ip_fw *f, **t, **oldt; for (sh = 15; sh > 0; sh--) if (skipto_entries > 1<rules; f; f = f->next) { n = f->rulenum >> sh ; while (i <= n) t[i++] = f; } V_layer3_chain.skipto_shift = sh; V_layer3_chain.skipto_size = skipto_entries; oldt = V_layer3_chain.skipto_ptrs; V_layer3_chain.skipto_ptrs = t; IPFW_RUNLOCK(ch); if (oldt) { IPFW_WLOCK(ch); IPFW_WUNLOCK(ch); /* now can free oldt */ free(oldt, M_IPFW_TBL); } } /* * Map a rule number to a rule pointer, using the skipto table. * First lookup the slot, then follow the chain until we find a * non-null entry with rulenum >= num. Return default_rule on error. */ static struct ip_fw * rule2ptr(struct ip_fw_chain *ch, int num) { struct ip_fw *r = NULL; int ix = (num & 0xffff) >> ch->skipto_shift; while (ix < ch->skipto_size && (r = ch->skipto_ptrs[ix]) == NULL) ix++; while (r && num < r->rulenum) r = r->next; return (r ? r : ch->default_rule); } /** * * Given an ip_fw *, lookup_next_rule will return a pointer * to the next rule, which can be either the jump * target (for skipto instructions) or the next one in the list (in * all other cases including a missing jump target). * The result is also written in the "next_rule" field of the rule. * Backward jumps are not allowed, so start looking from the next * rule... * * This never returns NULL -- in case we do not have an exact match, * the next rule is returned. When the ruleset is changed, * pointers are flushed so we are always correct. */ static struct ip_fw * lookup_next_rule(struct ip_fw_chain *ch, struct ip_fw *me, uint32_t tablearg) { struct ip_fw *rule = NULL; ipfw_insn *cmd; /* look for action, in case it is a skipto */ cmd = ACTION_PTR(me); if (cmd->opcode == O_LOG) cmd += F_LEN(cmd); if (cmd->opcode == O_ALTQ) cmd += F_LEN(cmd); if (cmd->opcode == O_TAG) cmd += F_LEN(cmd); if (cmd->opcode != O_SKIPTO ) { rule = me->next; } else { rule = rule2ptr(ch, tablearg ? tablearg : cmd->arg1); } me->next_rule = rule; /* XXX perhaps unnecessary ? */ return rule; } static int add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, uint8_t mlen, uint32_t value) { struct radix_node_head *rnh; struct table_entry *ent; struct radix_node *rn; #ifdef IPFW_HASHTABLES if (tbl >= 2*IPFW_TABLES_MAX) return EINVAL; return EINVAL; // XXX to be completed #endif if (tbl >= IPFW_TABLES_MAX) return (EINVAL); rnh = ch->tables[tbl]; ent = malloc(sizeof(*ent), M_IPFW_TBL, M_NOWAIT | M_ZERO); if (ent == NULL) return (ENOMEM); ent->value = value; #ifdef linux /* there is no sin_len on linux, and the code assumes the first * byte in the sockaddr to contain the length in bits. * So we just dump the number right there */ *((uint8_t *)&(ent->addr)) = 8; *((uint8_t *)&(ent->mask)) = 8; #else ent->addr.sin_len = ent->mask.sin_len = 8; #endif ent->mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); ent->addr.sin_addr.s_addr = addr & ent->mask.sin_addr.s_addr; IPFW_WLOCK(ch); rn = rnh->rnh_addaddr(&ent->addr, &ent->mask, rnh, (void *)ent); if (rn == NULL) { IPFW_WUNLOCK(ch); free(ent, M_IPFW_TBL); return (EEXIST); } IPFW_WUNLOCK(ch); return (0); } static int del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, uint8_t mlen) { struct radix_node_head *rnh; struct table_entry *ent; struct sockaddr_in sa, mask; #ifdef IPFW_HASHTABLES if (tbl >= 2*IPFW_TABLES_MAX) return EINVAL; return EINVAL; // XXX to be completed #endif if (tbl >= IPFW_TABLES_MAX) return (EINVAL); rnh = ch->tables[tbl]; #ifdef linux /* there is no sin_len on linux, see above */ *((uint8_t *)&sa) = 8; *((uint8_t *)&mask) = 8; #else sa.sin_len = mask.sin_len = 8; #endif mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); sa.sin_addr.s_addr = addr & mask.sin_addr.s_addr; IPFW_WLOCK(ch); ent = (struct table_entry *)rnh->rnh_deladdr(&sa, &mask, rnh); if (ent == NULL) { IPFW_WUNLOCK(ch); return (ESRCH); } IPFW_WUNLOCK(ch); free(ent, M_IPFW_TBL); return (0); } static int flush_table_entry(struct radix_node *rn, void *arg) { struct radix_node_head * const rnh = arg; struct table_entry *ent; ent = (struct table_entry *) rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh); if (ent != NULL) free(ent, M_IPFW_TBL); return (0); } static int flush_table(struct ip_fw_chain *ch, uint16_t tbl) { struct radix_node_head *rnh; IPFW_WLOCK_ASSERT(ch); #ifdef IPFW_HASHTABLES if (tbl >= 2*IPFW_TABLES_MAX) return EINVAL; return EINVAL; // XXX to be completed #endif if (tbl >= IPFW_TABLES_MAX) return (EINVAL); rnh = ch->tables[tbl]; KASSERT(rnh != NULL, ("NULL IPFW table")); rnh->rnh_walktree(rnh, flush_table_entry, rnh); return (0); } static void flush_tables(struct ip_fw_chain *ch) { uint16_t tbl; IPFW_WLOCK_ASSERT(ch); for (tbl = 0; tbl < IPFW_TABLES_MAX; tbl++) flush_table(ch, tbl); #ifdef IPFW_HASHTABLES for (tbl = 0; tbl < IPFW_TABLES_MAX; tbl++) ch->hashtab[tbl] = ipfw_ht_destroy(ch->hashtab[tbl]); #endif } static int init_tables(struct ip_fw_chain *ch) { int i; uint16_t j; for (i = 0; i < IPFW_TABLES_MAX; i++) { if (!rn_inithead((void **)&ch->tables[i], 32)) { for (j = 0; j < i; j++) { (void) flush_table(ch, j); } return (ENOMEM); } } #ifdef IPFW_HASHTABLES for (i = 0; i < IPFW_TABLES_MAX; i++) ch->hashtab[i] = ipfw_ht_destroy(ch->hashtab[i]); #endif return (0); } static int lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, uint32_t *val) { struct radix_node_head *rnh; struct table_entry *ent; struct sockaddr_in sa; if (tbl >= IPFW_TABLES_MAX) return (0); rnh = ch->tables[tbl]; #ifdef linux /* there is no sin_len on linux, see above */ *((uint8_t *)&sa) = 8; #else sa.sin_len = 8; #endif sa.sin_addr.s_addr = addr; ent = (struct table_entry *)(rnh->rnh_lookup(&sa, NULL, rnh)); if (ent != NULL) { *val = ent->value; return (1); } return (0); } static int count_table_entry(struct radix_node *rn, void *arg) { u_int32_t * const cnt = arg; (*cnt)++; return (0); } static int count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt) { struct radix_node_head *rnh; if (tbl >= IPFW_TABLES_MAX) return (EINVAL); rnh = ch->tables[tbl]; *cnt = 0; rnh->rnh_walktree(rnh, count_table_entry, cnt); return (0); } static int dump_table_entry(struct radix_node *rn, void *arg) { struct table_entry * const n = (struct table_entry *)rn; ipfw_table * const tbl = arg; ipfw_table_entry *ent; if (tbl->cnt == tbl->size) return (1); ent = &tbl->ent[tbl->cnt]; ent->tbl = tbl->tbl; if (in_nullhost(n->mask.sin_addr)) ent->masklen = 0; else ent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr)); ent->addr = n->addr.sin_addr.s_addr; ent->value = n->value; tbl->cnt++; return (0); } static int dump_table(struct ip_fw_chain *ch, ipfw_table *tbl) { struct radix_node_head *rnh; if (tbl->tbl >= IPFW_TABLES_MAX) return (EINVAL); rnh = ch->tables[tbl->tbl]; tbl->cnt = 0; rnh->rnh_walktree(rnh, dump_table_entry, tbl); return (0); } static int check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif, struct in_addr dst_ip, u_int16_t dst_port, struct in_addr src_ip, u_int16_t src_port, struct ucred **uc, int *ugid_lookup, struct inpcb *inp) { #ifdef linux int match = 0; struct sk_buff *skb = ((struct mbuf *)inp)->m_skb; struct bsd_ucred *u = (struct bsd_ucred *)uc; if (*ugid_lookup == 0) { /* actively lookup and copy in cache */ /* returns null if any element of the chain up to file is null. * if sk != NULL then we also have a reference */ *ugid_lookup = linux_lookup(proto, src_ip.s_addr, htons(src_port), dst_ip.s_addr, htons(dst_port), skb, oif ? 1 : 0, u); } if (*ugid_lookup < 0) return 0; if (insn->o.opcode == O_UID) match = (u->uid == (uid_t)insn->d[0]); else if (insn->o.opcode == O_JAIL) match = (u->xid == (uid_t)insn->d[0]); else if (insn->o.opcode == O_GID) match = (u->gid == (uid_t)insn->d[0]); return match; #else /* FreeBSD */ struct inpcbinfo *pi; int wildcard; struct inpcb *pcb; int match; /* * Check to see if the UDP or TCP stack supplied us with * the PCB. If so, rather then holding a lock and looking * up the PCB, we can use the one that was supplied. */ if (inp && *ugid_lookupp == 0) { INP_LOCK_ASSERT(inp); if (inp->inp_socket != NULL) { *uc = crhold(inp->inp_cred); *ugid_lookupp = 1; } else *ugid_lookupp = -1; } /* * If we have already been here and the packet has no * PCB entry associated with it, then we can safely * assume that this is a no match. */ if (*ugid_lookupp == -1) return (0); if (proto == IPPROTO_TCP) { wildcard = 0; pi = &V_tcbinfo; } else if (proto == IPPROTO_UDP) { wildcard = INPLOOKUP_WILDCARD; pi = &V_udbinfo; } else return 0; match = 0; if (*ugid_lookupp == 0) { INP_INFO_RLOCK(pi); pcb = (oif) ? in_pcblookup_hash(pi, dst_ip, htons(dst_port), src_ip, htons(src_port), wildcard, oif) : in_pcblookup_hash(pi, src_ip, htons(src_port), dst_ip, htons(dst_port), wildcard, NULL); if (pcb != NULL) { *uc = crhold(pcb->inp_cred); *ugid_lookupp = 1; } INP_INFO_RUNLOCK(pi); if (*ugid_lookupp == 0) { /* * If the lookup did not yield any results, there * is no sense in coming back and trying again. So * we can set lookup to -1 and ensure that we wont * bother the pcb system again. */ *ugid_lookupp = -1; return (0); } } if (insn->o.opcode == O_UID) match = ((*uc)->cr_uid == (uid_t)insn->d[0]); else if (insn->o.opcode == O_GID) match = groupmember((gid_t)insn->d[0], *uc); else if (insn->o.opcode == O_JAIL) match = ((*uc)->cr_prison->pr_id == (int)insn->d[0]); return match; #endif } /* * The main check routine for the firewall. * * All arguments are in args so we can modify them and return them * back to the caller. * * Parameters: * * args->m (in/out) The packet; we set to NULL when/if we nuke it. * Starts with the IP header. * args->eh (in) Mac header if present, or NULL for layer3 packet. * args->L3offset Number of bytes bypassed if we came from L2. * e.g. often sizeof(eh) ** NOTYET ** * args->oif Outgoing interface, or NULL if packet is incoming. * The incoming interface is in the mbuf. (in) * args->divert_rule (in/out) * Skip up to the first rule past this rule number; * upon return, non-zero port number for divert or tee. * * args->rule Pointer to the last matching rule (in/out) * args->next_hop Socket we are forwarding to (out). * args->f_id Addresses grabbed from the packet (out) * args->cookie a cookie depending on rule action * * Return value: * * IP_FW_PASS the packet must be accepted * IP_FW_DENY the packet must be dropped * IP_FW_DIVERT divert packet, port in m_tag * IP_FW_TEE tee packet, port in m_tag * IP_FW_DUMMYNET to dummynet, pipe in args->cookie * IP_FW_NETGRAPH into netgraph, cookie args->cookie * */ int ipfw_chk(struct ip_fw_args *args) { /* * Local variables holding state during the processing of a packet: * * IMPORTANT NOTE: to speed up the processing of rules, there * are some assumption on the values of the variables, which * are documented here. Should you change them, please check * the implementation of the various instructions to make sure * that they still work. * * args->eh The MAC header. It is non-null for a layer2 * packet, it is NULL for a layer-3 packet. * **notyet** * args->L3offset Offset in the packet to the L3 (IP or equiv.) header. * * m | args->m Pointer to the mbuf, as received from the caller. * It may change if ipfw_chk() does an m_pullup, or if it * consumes the packet because it calls send_reject(). * XXX This has to change, so that ipfw_chk() never modifies * or consumes the buffer. * ip is the beginning of the ip(4 or 6) header. * Calculated by adding the L3offset to the start of data. * (Until we start using L3offset, the packet is * supposed to start with the ip header). */ struct mbuf *m = args->m; struct ip *ip = mtod(m, struct ip *); /* * For rules which contain uid/gid or jail constraints, cache * a copy of the users credentials after the pcb lookup has been * executed. This will speed up the processing of rules with * these types of constraints, as well as decrease contention * on pcb related locks. */ struct bsd_ucred ucred_cache; int ucred_lookup = 0; /* * divinput_flags If non-zero, set to the IP_FW_DIVERT_*_FLAG * associated with a packet input on a divert socket. This * will allow to distinguish traffic and its direction when * it originates from a divert socket. */ u_int divinput_flags = 0; /* * oif | args->oif If NULL, ipfw_chk has been called on the * inbound path (ether_input, ip_input). * If non-NULL, ipfw_chk has been called on the outbound path * (ether_output, ip_output). */ struct ifnet *oif = args->oif; struct ip_fw *f = NULL; /* matching rule */ int retval = 0; /* * hlen The length of the IP header. */ u_int hlen = 0; /* hlen >0 means we have an IP pkt */ /* * offset The offset of a fragment. offset != 0 means that * we have a fragment at this offset of an IPv4 packet. * offset == 0 means that (if this is an IPv4 packet) * this is the first or only fragment. * For IPv6 offset == 0 means there is no Fragment Header. * If offset != 0 for IPv6 always use correct mask to * get the correct offset because we add IP6F_MORE_FRAG * to be able to dectect the first fragment which would * otherwise have offset = 0. */ u_short offset = 0; /* * Local copies of addresses. They are only valid if we have * an IP packet. * * proto The protocol. Set to 0 for non-ip packets, * or to the protocol read from the packet otherwise. * proto != 0 means that we have an IPv4 packet. * * src_port, dst_port port numbers, in HOST format. Only * valid for TCP and UDP packets. * * src_ip, dst_ip ip addresses, in NETWORK format. * Only valid for IPv4 packets. */ u_int8_t proto; u_int16_t src_port = 0, dst_port = 0; /* NOTE: host format */ struct in_addr src_ip, dst_ip; /* NOTE: network format */ u_int16_t ip_len=0; int pktlen; u_int16_t etype = 0; /* Host order stored ether type */ /* * dyn_dir = MATCH_UNKNOWN when rules unchecked, * MATCH_NONE when checked and not matched (q = NULL), * MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL) */ int dyn_dir = MATCH_UNKNOWN; ipfw_dyn_rule *q = NULL; struct ip_fw_chain *chain = &V_layer3_chain; struct m_tag *mtag; /* * We store in ulp a pointer to the upper layer protocol header. * In the ipv4 case this is easy to determine from the header, * but for ipv6 we might have some additional headers in the middle. * ulp is NULL if not found. */ void *ulp = NULL; /* upper layer protocol pointer. */ /* XXX ipv6 variables */ int is_ipv6 = 0; u_int16_t ext_hd = 0; /* bits vector for extension header filtering */ /* end of ipv6 variables */ int is_ipv4 = 0; int done = 0; /* flag to exit the outer loop */ if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready)) return (IP_FW_PASS); /* accept */ dst_ip.s_addr = 0; /* make sure it is initialized */ src_ip.s_addr = 0; /* make sure it is initialized */ pktlen = m->m_pkthdr.len; args->f_id.fib = M_GETFIB(m); /* note mbuf not altered) */ proto = args->f_id.proto = 0; /* mark f_id invalid */ /* XXX 0 is a valid proto: IP/IPv6 Hop-by-Hop Option */ /* * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous, * then it sets p to point at the offset "len" in the mbuf. WARNING: the * pointer might become stale after other pullups (but we never use it * this way). */ #define PULLUP_TO(_len, p, T) \ do { \ int x = (_len) + sizeof(T); \ if ((m)->m_len < x) { \ goto pullup_failed; \ } \ p = (mtod(m, char *) + (_len)); \ } while (0) /* * if we have an ether header, */ if (args->eh) etype = ntohs(args->eh->ether_type); /* Identify IP packets and fill up variables. */ if (pktlen >= sizeof(struct ip6_hdr) && (args->eh == NULL || etype == ETHERTYPE_IPV6) && ip->ip_v == 6) { struct ip6_hdr *ip6 = (struct ip6_hdr *)ip; is_ipv6 = 1; args->f_id.addr_type = 6; hlen = sizeof(struct ip6_hdr); proto = ip6->ip6_nxt; /* Search extension headers to find upper layer protocols */ while (ulp == NULL) { switch (proto) { case IPPROTO_ICMPV6: PULLUP_TO(hlen, ulp, struct icmp6_hdr); args->f_id.flags = ICMP6(ulp)->icmp6_type; break; case IPPROTO_TCP: PULLUP_TO(hlen, ulp, struct tcphdr); dst_port = TCP(ulp)->th_dport; src_port = TCP(ulp)->th_sport; args->f_id.flags = TCP(ulp)->th_flags; break; case IPPROTO_SCTP: PULLUP_TO(hlen, ulp, struct sctphdr); src_port = SCTP(ulp)->src_port; dst_port = SCTP(ulp)->dest_port; break; case IPPROTO_UDP: PULLUP_TO(hlen, ulp, struct udphdr); dst_port = UDP(ulp)->uh_dport; src_port = UDP(ulp)->uh_sport; break; case IPPROTO_HOPOPTS: /* RFC 2460 */ PULLUP_TO(hlen, ulp, struct ip6_hbh); ext_hd |= EXT_HOPOPTS; hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3; proto = ((struct ip6_hbh *)ulp)->ip6h_nxt; ulp = NULL; break; case IPPROTO_ROUTING: /* RFC 2460 */ PULLUP_TO(hlen, ulp, struct ip6_rthdr); switch (((struct ip6_rthdr *)ulp)->ip6r_type) { case 0: ext_hd |= EXT_RTHDR0; break; case 2: ext_hd |= EXT_RTHDR2; break; default: printf("IPFW2: IPV6 - Unknown Routing " "Header type(%d)\n", ((struct ip6_rthdr *)ulp)->ip6r_type); if (V_fw_deny_unknown_exthdrs) return (IP_FW_DENY); break; } ext_hd |= EXT_ROUTING; hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3; proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt; ulp = NULL; break; case IPPROTO_FRAGMENT: /* RFC 2460 */ PULLUP_TO(hlen, ulp, struct ip6_frag); ext_hd |= EXT_FRAGMENT; hlen += sizeof (struct ip6_frag); proto = ((struct ip6_frag *)ulp)->ip6f_nxt; offset = ((struct ip6_frag *)ulp)->ip6f_offlg & IP6F_OFF_MASK; /* Add IP6F_MORE_FRAG for offset of first * fragment to be != 0. */ offset |= ((struct ip6_frag *)ulp)->ip6f_offlg & IP6F_MORE_FRAG; if (offset == 0) { printf("IPFW2: IPV6 - Invalid Fragment " "Header\n"); if (V_fw_deny_unknown_exthdrs) return (IP_FW_DENY); break; } args->f_id.frag_id6 = ntohl(((struct ip6_frag *)ulp)->ip6f_ident); ulp = NULL; break; case IPPROTO_DSTOPTS: /* RFC 2460 */ PULLUP_TO(hlen, ulp, struct ip6_hbh); ext_hd |= EXT_DSTOPTS; hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3; proto = ((struct ip6_hbh *)ulp)->ip6h_nxt; ulp = NULL; break; case IPPROTO_AH: /* RFC 2402 */ PULLUP_TO(hlen, ulp, struct ip6_ext); ext_hd |= EXT_AH; hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2; proto = ((struct ip6_ext *)ulp)->ip6e_nxt; ulp = NULL; break; case IPPROTO_ESP: /* RFC 2406 */ PULLUP_TO(hlen, ulp, uint32_t); /* SPI, Seq# */ /* Anything past Seq# is variable length and * data past this ext. header is encrypted. */ ext_hd |= EXT_ESP; break; case IPPROTO_NONE: /* RFC 2460 */ /* * Packet ends here, and IPv6 header has * already been pulled up. If ip6e_len!=0 * then octets must be ignored. */ ulp = ip; /* non-NULL to get out of loop. */ break; case IPPROTO_OSPFIGP: /* XXX OSPF header check? */ PULLUP_TO(hlen, ulp, struct ip6_ext); break; case IPPROTO_PIM: /* XXX PIM header check? */ PULLUP_TO(hlen, ulp, struct pim); break; case IPPROTO_CARP: PULLUP_TO(hlen, ulp, struct carp_header); if (((struct carp_header *)ulp)->carp_version != CARP_VERSION) return (IP_FW_DENY); if (((struct carp_header *)ulp)->carp_type != CARP_ADVERTISEMENT) return (IP_FW_DENY); break; case IPPROTO_IPV6: /* RFC 2893 */ PULLUP_TO(hlen, ulp, struct ip6_hdr); break; case IPPROTO_IPV4: /* RFC 2893 */ PULLUP_TO(hlen, ulp, struct ip); break; default: printf("IPFW2: IPV6 - Unknown Extension " "Header(%d), ext_hd=%x\n", proto, ext_hd); if (V_fw_deny_unknown_exthdrs) return (IP_FW_DENY); PULLUP_TO(hlen, ulp, struct ip6_ext); break; } /*switch */ } ip = mtod(m, struct ip *); ip6 = (struct ip6_hdr *)ip; args->f_id.src_ip6 = ip6->ip6_src; args->f_id.dst_ip6 = ip6->ip6_dst; args->f_id.src_ip = 0; args->f_id.dst_ip = 0; args->f_id.flow_id6 = ntohl(ip6->ip6_flow); } else if (pktlen >= sizeof(struct ip) && (args->eh == NULL || etype == ETHERTYPE_IP) && ip->ip_v == 4) { is_ipv4 = 1; hlen = ip->ip_hl << 2; args->f_id.addr_type = 4; /* * Collect parameters into local variables for faster matching. */ proto = ip->ip_p; src_ip = ip->ip_src; dst_ip = ip->ip_dst; if (1 || args->eh != NULL) { /* layer 2 packets are as on the wire */ offset = ntohs(ip->ip_off) & IP_OFFMASK; ip_len = ntohs(ip->ip_len); } else { offset = ip->ip_off & IP_OFFMASK; ip_len = ip->ip_len; } pktlen = ip_len < pktlen ? ip_len : pktlen; if (offset == 0) { switch (proto) { case IPPROTO_TCP: PULLUP_TO(hlen, ulp, struct tcphdr); dst_port = TCP(ulp)->th_dport; src_port = TCP(ulp)->th_sport; args->f_id.flags = TCP(ulp)->th_flags; break; case IPPROTO_UDP: PULLUP_TO(hlen, ulp, struct udphdr); dst_port = UDP(ulp)->uh_dport; src_port = UDP(ulp)->uh_sport; break; case IPPROTO_ICMP: PULLUP_TO(hlen, ulp, struct icmphdr); args->f_id.flags = ICMP(ulp)->icmp_type; break; default: break; } } ip = mtod(m, struct ip *); args->f_id.src_ip = ntohl(src_ip.s_addr); args->f_id.dst_ip = ntohl(dst_ip.s_addr); } #undef PULLUP_TO if (proto) { /* we may have port numbers, store them */ args->f_id.proto = proto; args->f_id.src_port = src_port = ntohs(src_port); args->f_id.dst_port = dst_port = ntohs(dst_port); } IPFW_RLOCK(chain); if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */ IPFW_RUNLOCK(chain); return (IP_FW_PASS); /* accept */ } mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL); if (args->rule) { /* * Packet has already been tagged. Look for the next rule * to restart processing. Make sure that args->rule still * exists and not changed. * If fw_one_pass != 0 then just accept it. * XXX should not happen here, but optimized out in * the caller. */ if (V_fw_one_pass) { IPFW_RUNLOCK(chain); return (IP_FW_PASS); } if (chain->id != args->chain_id) { for (f = chain->rules; f != NULL; f = f->next) if (f == args->rule && f->id == args->rule_id) break; if (f != NULL) f = f->next_rule; else f = chain->default_rule; } else f = args->rule->next_rule; if (f == NULL) f = lookup_next_rule(chain, args->rule, 0); } else { /* * Find the starting rule. It can be either the first * one, or the one after divert_rule if asked so. */ int skipto = mtag ? divert_cookie(mtag) : 0; f = chain->rules; if (args->eh == NULL && skipto != 0) { if (skipto >= IPFW_DEFAULT_RULE) { IPFW_RUNLOCK(chain); return (IP_FW_DENY); /* invalid */ } f = rule2ptr(chain, skipto+1); } } /* reset divert rule to avoid confusion later */ if (mtag) { divinput_flags = divert_info(mtag) & (IP_FW_DIVERT_OUTPUT_FLAG | IP_FW_DIVERT_LOOPBACK_FLAG); m_tag_delete(m, mtag); } /* * Now scan the rules, and parse microinstructions for each rule. * We have two nested loops and an inner switch. Sometimes we * need to break out of one or both loops, or re-enter one of * the loops with updated variables. Loop variables are: * * f (outer loop) points to the current rule. * On output it points to the matching rule. * done (outer loop) is used as a flag to break the loop. * l (inner loop) residual length of current rule. * cmd points to the current microinstruction. * * We break the inner loop by setting l=0 and possibly * cmdlen=0 if we don't want to advance cmd. * We break the outer loop by setting done=1 * We can restart the inner loop by setting l>0 and f, cmd * as needed. */ for (; f; f = f->next) { ipfw_insn *cmd; uint32_t tablearg = 0; int l, cmdlen, skip_or; /* skip rest of OR block */ /* again: */ if (V_set_disable & (1 << f->set) ) continue; skip_or = 0; for (l = f->cmd_len, cmd = f->cmd ; l > 0 ; l -= cmdlen, cmd += cmdlen) { int match; /* * check_body is a jump target used when we find a * CHECK_STATE, and need to jump to the body of * the target rule. */ /* check_body: */ cmdlen = F_LEN(cmd); /* * An OR block (insn_1 || .. || insn_n) has the * F_OR bit set in all but the last instruction. * The first match will set "skip_or", and cause * the following instructions to be skipped until * past the one with the F_OR bit clear. */ if (skip_or) { /* skip this instruction */ if ((cmd->len & F_OR) == 0) skip_or = 0; /* next one is good */ continue; } match = 0; /* set to 1 if we succeed */ switch (cmd->opcode) { /* * The first set of opcodes compares the packet's * fields with some pattern, setting 'match' if a * match is found. At the end of the loop there is * logic to deal with F_NOT and F_OR flags associated * with the opcode. */ case O_NOP: match = 1; break; case O_FORWARD_MAC: printf("ipfw: opcode %d unimplemented\n", cmd->opcode); break; case O_GID: case O_UID: case O_JAIL: /* * We only check offset == 0 && proto != 0, * as this ensures that we have a * packet with the ports info. */ if (offset!=0) break; if (is_ipv6) /* XXX to be fixed later */ break; if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) match = check_uidgid( (ipfw_insn_u32 *)cmd, proto, oif, dst_ip, dst_port, src_ip, src_port, (struct ucred **)&ucred_cache, &ucred_lookup, (struct inpcb *)args->m); break; case O_RECV: match = iface_match(m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd); break; case O_XMIT: match = iface_match(oif, (ipfw_insn_if *)cmd); break; case O_VIA: match = iface_match(oif ? oif : m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd); break; case O_MACADDR2: if (args->eh != NULL) { /* have MAC header */ u_int32_t *want = (u_int32_t *) ((ipfw_insn_mac *)cmd)->addr; u_int32_t *mask = (u_int32_t *) ((ipfw_insn_mac *)cmd)->mask; u_int32_t *hdr = (u_int32_t *)args->eh; match = ( want[0] == (hdr[0] & mask[0]) && want[1] == (hdr[1] & mask[1]) && want[2] == (hdr[2] & mask[2]) ); } break; case O_MAC_TYPE: if (args->eh != NULL) { u_int16_t *p = ((ipfw_insn_u16 *)cmd)->ports; int i; for (i = cmdlen - 1; !match && i>0; i--, p += 2) match = (etype >= p[0] && etype <= p[1]); } break; case O_FRAG: match = (offset != 0); break; case O_IN: /* "out" is "not in" */ match = (oif == NULL); break; case O_LAYER2: match = (args->eh != NULL); break; case O_DIVERTED: match = (cmd->arg1 & 1 && divinput_flags & IP_FW_DIVERT_LOOPBACK_FLAG) || (cmd->arg1 & 2 && divinput_flags & IP_FW_DIVERT_OUTPUT_FLAG); break; case O_PROTO: /* * We do not allow an arg of 0 so the * check of "proto" only suffices. */ match = (proto == cmd->arg1); break; case O_IP_SRC: match = is_ipv4 && (((ipfw_insn_ip *)cmd)->addr.s_addr == src_ip.s_addr); break; case O_IP_SRC_LOOKUP: case O_IP_DST_LOOKUP: if (is_ipv4) { uint32_t a = (cmd->opcode == O_IP_DST_LOOKUP) ? dst_ip.s_addr : src_ip.s_addr; uint32_t v = 0; if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) { v = ((ipfw_insn_u32 *)cmd)->d[1]; if (v == 0) a = dst_ip.s_addr; else if (v == 1) a = src_ip.s_addr; else if (offset != 0) break; else if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) break; else if (v == 2) a = dst_port; else if (v == 3) a = src_port; else if (v >= 4 && v <= 6) { check_uidgid( (ipfw_insn_u32 *)cmd, proto, oif, dst_ip, dst_port, src_ip, src_port, (struct ucred **)&ucred_cache, &ucred_lookup, (struct inpcb *)args->m); #ifdef linux if (v ==4 /* O_UID */) a = ucred_cache.uid; else if (v == 5 /* O_GID */) a = ucred_cache.gid; else if (v == 6 /* O_JAIL */) a = ucred_cache.xid; #else if (v ==4 /* O_UID */) a = (*uc)->cr_uid; else if (v == 5 /* O_GID */) ; // a = groupmember((gid_t)insn->d[0], *uc); else if (v == 6 /* O_JAIL */) a = (*uc)->cr_prison->pr_id; #endif } else break; } match = lookup_table(chain, cmd->arg1, a, &v); if (!match) break; if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) match = ((ipfw_insn_u32 *)cmd)->d[0] == v; else tablearg = v; } break; case O_IP_SRC_MASK: case O_IP_DST_MASK: if (is_ipv4) { uint32_t a = (cmd->opcode == O_IP_DST_MASK) ? dst_ip.s_addr : src_ip.s_addr; uint32_t *p = ((ipfw_insn_u32 *)cmd)->d; int i = cmdlen-1; for (; !match && i>0; i-= 2, p+= 2) match = (p[0] == (a & p[1])); } break; case O_IP_SRC_ME: if (is_ipv4) { struct ifnet *tif; INADDR_TO_IFP(src_ip, tif); match = (tif != NULL); } break; case O_IP_DST_SET: case O_IP_SRC_SET: if (is_ipv4) { u_int32_t *d = (u_int32_t *)(cmd+1); u_int32_t addr = cmd->opcode == O_IP_DST_SET ? args->f_id.dst_ip : args->f_id.src_ip; if (addr < d[0]) break; addr -= d[0]; /* subtract base */ match = (addr < cmd->arg1) && ( d[ 1 + (addr>>5)] & (1<<(addr & 0x1f)) ); } break; case O_IP_DST: match = is_ipv4 && (((ipfw_insn_ip *)cmd)->addr.s_addr == dst_ip.s_addr); break; case O_IP_DST_ME: if (is_ipv4) { struct ifnet *tif; INADDR_TO_IFP(dst_ip, tif); match = (tif != NULL); } break; case O_IP_SRCPORT: case O_IP_DSTPORT: /* * offset == 0 && proto != 0 is enough * to guarantee that we have a * packet with port info. */ if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP) && offset == 0) { u_int16_t x = (cmd->opcode == O_IP_SRCPORT) ? src_port : dst_port ; u_int16_t *p = ((ipfw_insn_u16 *)cmd)->ports; int i; for (i = cmdlen - 1; !match && i>0; i--, p += 2) match = (x>=p[0] && x<=p[1]); } break; case O_ICMPTYPE: match = (offset == 0 && proto==IPPROTO_ICMP && icmptype_match(ICMP(ulp), (ipfw_insn_u32 *)cmd) ); break; #ifdef INET6 case O_ICMP6TYPE: match = is_ipv6 && offset == 0 && proto==IPPROTO_ICMPV6 && icmp6type_match( ICMP6(ulp)->icmp6_type, (ipfw_insn_u32 *)cmd); break; #endif /* INET6 */ case O_IPOPT: match = (is_ipv4 && ipopts_match(ip, cmd) ); break; case O_IPVER: match = (is_ipv4 && cmd->arg1 == ip->ip_v); break; case O_IPID: case O_IPLEN: case O_IPTTL: if (is_ipv4) { /* only for IP packets */ uint16_t x; uint16_t *p; int i; if (cmd->opcode == O_IPLEN) x = ip_len; else if (cmd->opcode == O_IPTTL) x = ip->ip_ttl; else /* must be IPID */ x = ntohs(ip->ip_id); if (cmdlen == 1) { match = (cmd->arg1 == x); break; } /* otherwise we have ranges */ p = ((ipfw_insn_u16 *)cmd)->ports; i = cmdlen - 1; for (; !match && i>0; i--, p += 2) match = (x >= p[0] && x <= p[1]); } break; case O_IPPRECEDENCE: match = (is_ipv4 && (cmd->arg1 == (ip->ip_tos & 0xe0)) ); break; case O_IPTOS: match = (is_ipv4 && flags_match(cmd, ip->ip_tos)); break; case O_TCPDATALEN: if (proto == IPPROTO_TCP && offset == 0) { struct tcphdr *tcp; uint16_t x; uint16_t *p; int i; tcp = TCP(ulp); x = ip_len - ((ip->ip_hl + tcp->th_off) << 2); if (cmdlen == 1) { match = (cmd->arg1 == x); break; } /* otherwise we have ranges */ p = ((ipfw_insn_u16 *)cmd)->ports; i = cmdlen - 1; for (; !match && i>0; i--, p += 2) match = (x >= p[0] && x <= p[1]); } break; case O_TCPFLAGS: match = (proto == IPPROTO_TCP && offset == 0 && flags_match(cmd, TCP(ulp)->th_flags)); break; case O_TCPOPTS: match = (proto == IPPROTO_TCP && offset == 0 && tcpopts_match(TCP(ulp), cmd)); break; case O_TCPSEQ: match = (proto == IPPROTO_TCP && offset == 0 && ((ipfw_insn_u32 *)cmd)->d[0] == TCP(ulp)->th_seq); break; case O_TCPACK: match = (proto == IPPROTO_TCP && offset == 0 && ((ipfw_insn_u32 *)cmd)->d[0] == TCP(ulp)->th_ack); break; case O_TCPWIN: match = (proto == IPPROTO_TCP && offset == 0 && cmd->arg1 == TCP(ulp)->th_win); break; case O_ESTAB: /* reject packets which have SYN only */ /* XXX should i also check for TH_ACK ? */ match = (proto == IPPROTO_TCP && offset == 0 && (TCP(ulp)->th_flags & (TH_RST | TH_ACK | TH_SYN)) != TH_SYN); break; case O_ALTQ: { struct pf_mtag *at; ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd; match = 1; at = pf_find_mtag(m); if (at != NULL && at->qid != 0) break; at = pf_get_mtag(m); if (at == NULL) { /* * Let the packet fall back to the * default ALTQ. */ break; } at->qid = altq->qid; if (is_ipv4) at->af = AF_INET; else at->af = AF_LINK; at->hdr = ip; break; } case O_LOG: if (V_fw_verbose) ipfw_log(f, hlen, args, m, oif, offset, tablearg, ip); match = 1; break; case O_PROB: match = (random()<((ipfw_insn_u32 *)cmd)->d[0]); break; #if 0 case O_VERREVPATH: /* Outgoing packets automatically pass/match */ match = ((oif != NULL) || (m->m_pkthdr.rcvif == NULL) || ( #ifdef INET6 is_ipv6 ? verify_path6(&(args->f_id.src_ip6), m->m_pkthdr.rcvif) : #endif verify_path(src_ip, m->m_pkthdr.rcvif, args->f_id.fib))); break; case O_VERSRCREACH: /* Outgoing packets automatically pass/match */ match = (hlen > 0 && ((oif != NULL) || #ifdef INET6 is_ipv6 ? verify_path6(&(args->f_id.src_ip6), NULL) : #endif verify_path(src_ip, NULL, args->f_id.fib))); break; case O_ANTISPOOF: /* Outgoing packets automatically pass/match */ if (oif == NULL && hlen > 0 && ( (is_ipv4 && in_localaddr(src_ip)) #ifdef INET6 || (is_ipv6 && in6_localaddr(&(args->f_id.src_ip6))) #endif )) match = #ifdef INET6 is_ipv6 ? verify_path6( &(args->f_id.src_ip6), m->m_pkthdr.rcvif) : #endif verify_path(src_ip, m->m_pkthdr.rcvif, args->f_id.fib); else match = 1; break; #endif case O_IPSEC: #ifdef IPSEC match = (m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL); #endif /* otherwise no match */ break; #ifdef INET6 case O_IP6_SRC: match = is_ipv6 && IN6_ARE_ADDR_EQUAL(&args->f_id.src_ip6, &((ipfw_insn_ip6 *)cmd)->addr6); break; case O_IP6_DST: match = is_ipv6 && IN6_ARE_ADDR_EQUAL(&args->f_id.dst_ip6, &((ipfw_insn_ip6 *)cmd)->addr6); break; case O_IP6_SRC_MASK: case O_IP6_DST_MASK: if (is_ipv6) { int i = cmdlen - 1; struct in6_addr p; struct in6_addr *d = &((ipfw_insn_ip6 *)cmd)->addr6; for (; !match && i > 0; d += 2, i -= F_INSN_SIZE(struct in6_addr) * 2) { p = (cmd->opcode == O_IP6_SRC_MASK) ? args->f_id.src_ip6: args->f_id.dst_ip6; APPLY_MASK(&p, &d[1]); match = IN6_ARE_ADDR_EQUAL(&d[0], &p); } } break; case O_IP6_SRC_ME: match= is_ipv6 && search_ip6_addr_net(&args->f_id.src_ip6); break; case O_IP6_DST_ME: match= is_ipv6 && search_ip6_addr_net(&args->f_id.dst_ip6); break; case O_FLOW6ID: match = is_ipv6 && flow6id_match(args->f_id.flow_id6, (ipfw_insn_u32 *) cmd); break; case O_EXT_HDR: match = is_ipv6 && (ext_hd & ((ipfw_insn *) cmd)->arg1); break; case O_IP6: match = is_ipv6; break; #endif case O_IP4: match = is_ipv4; break; #if 0 case O_TAG: { uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ? tablearg : cmd->arg1; /* Packet is already tagged with this tag? */ mtag = m_tag_locate(m, MTAG_IPFW, tag, NULL); /* We have `untag' action when F_NOT flag is * present. And we must remove this mtag from * mbuf and reset `match' to zero (`match' will * be inversed later). * Otherwise we should allocate new mtag and * push it into mbuf. */ if (cmd->len & F_NOT) { /* `untag' action */ if (mtag != NULL) m_tag_delete(m, mtag); } else if (mtag == NULL) { if ((mtag = m_tag_alloc(MTAG_IPFW, tag, 0, M_NOWAIT)) != NULL) m_tag_prepend(m, mtag); } match = (cmd->len & F_NOT) ? 0: 1; break; } case O_FIB: /* try match the specified fib */ if (args->f_id.fib == cmd->arg1) match = 1; break; case O_TAGGED: { uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ? tablearg : cmd->arg1; if (cmdlen == 1) { match = m_tag_locate(m, MTAG_IPFW, tag, NULL) != NULL; break; } /* we have ranges */ for (mtag = m_tag_first(m); mtag != NULL && !match; mtag = m_tag_next(m, mtag)) { uint16_t *p; int i; if (mtag->m_tag_cookie != MTAG_IPFW) continue; p = ((ipfw_insn_u16 *)cmd)->ports; i = cmdlen - 1; for(; !match && i > 0; i--, p += 2) match = mtag->m_tag_id >= p[0] && mtag->m_tag_id <= p[1]; } break; } #endif /* * The second set of opcodes represents 'actions', * i.e. the terminal part of a rule once the packet * matches all previous patterns. * Typically there is only one action for each rule, * and the opcode is stored at the end of the rule * (but there are exceptions -- see below). * * In general, here we set retval and terminate the * outer loop (would be a 'break 3' in some language, * but we need to set l=0, done=1) * * Exceptions: * O_COUNT and O_SKIPTO actions: * instead of terminating, we jump to the next rule * (setting l=0), or to the SKIPTO target (by * setting f, cmd and l as needed), respectively. * * O_TAG, O_LOG and O_ALTQ action parameters: * perform some action and set match = 1; * * O_LIMIT and O_KEEP_STATE: these opcodes are * not real 'actions', and are stored right * before the 'action' part of the rule. * These opcodes try to install an entry in the * state tables; if successful, we continue with * the next opcode (match=1; break;), otherwise * the packet must be dropped (set retval, * break loops with l=0, done=1) * * O_PROBE_STATE and O_CHECK_STATE: these opcodes * cause a lookup of the state table, and a jump * to the 'action' part of the parent rule * if an entry is found, or * (CHECK_STATE only) a jump to the next rule if * the entry is not found. * The result of the lookup is cached so that * further instances of these opcodes become NOPs. * The jump to the next rule is done by setting * l=0, cmdlen=0. */ case O_LIMIT: case O_KEEP_STATE: if (install_state(f, (ipfw_insn_limit *)cmd, args, tablearg)) { /* error or limit violation */ retval = IP_FW_DENY; l = 0; /* exit inner loop */ done = 1; /* exit outer loop */ } match = 1; break; case O_PROBE_STATE: case O_CHECK_STATE: /* * dynamic rules are checked at the first * keep-state or check-state occurrence, * with the result being stored in dyn_dir. * The compiler introduces a PROBE_STATE * instruction for us when we have a * KEEP_STATE (because PROBE_STATE needs * to be run first). */ if (dyn_dir == MATCH_UNKNOWN && (q = lookup_dyn_rule(&args->f_id, &dyn_dir, proto == IPPROTO_TCP ? TCP(ulp) : NULL)) != NULL) { /* * Found dynamic entry, update stats * and jump to the 'action' part of * the parent rule by setting * f, cmd, l and clearing cmdlen. */ q->pcnt++; q->bcnt += pktlen; f = q->rule; cmd = ACTION_PTR(f); l = f->cmd_len - f->act_ofs; IPFW_DYN_UNLOCK(); cmdlen = 0; match = 1; break; } /* * Dynamic entry not found. If CHECK_STATE, * skip to next rule, if PROBE_STATE just * ignore and continue with next opcode. */ if (cmd->opcode == O_CHECK_STATE) l = 0; /* exit inner loop */ match = 1; break; case O_ACCEPT: retval = 0; /* accept */ l = 0; /* exit inner loop */ done = 1; /* exit outer loop */ break; case O_PIPE: case O_QUEUE: args->rule = f; /* report matching rule */ args->rule_id = f->id; args->chain_id = chain->id; if (cmd->arg1 == IP_FW_TABLEARG) args->cookie = tablearg; else args->cookie = cmd->arg1; retval = IP_FW_DUMMYNET; l = 0; /* exit inner loop */ done = 1; /* exit outer loop */ break; #if 0 case O_DIVERT: case O_TEE: if (args->eh) /* not on layer 2 */ break; /* otherwise this is terminal */ l = 0; /* exit inner loop */ done = 1; /* exit outer loop */ mtag = m_tag_get(PACKET_TAG_DIVERT, sizeof(struct divert_tag), M_NOWAIT); if (mtag == NULL) { retval = IP_FW_DENY; } else { struct divert_tag *dt; dt = (struct divert_tag *)(mtag+1); dt->cookie = f->rulenum; if (cmd->arg1 == IP_FW_TABLEARG) dt->info = tablearg; else dt->info = cmd->arg1; m_tag_prepend(m, mtag); retval = (cmd->opcode == O_DIVERT) ? IP_FW_DIVERT : IP_FW_TEE; } break; #endif case O_COUNT: case O_SKIPTO: f->pcnt++; /* update stats */ f->bcnt += pktlen; f->timestamp = time_uptime; if (cmd->opcode == O_COUNT) { l = 0; /* exit inner loop */ break; } /* handle skipto */ if (cmd->arg1 == IP_FW_TABLEARG) { f = lookup_next_rule(chain, f, tablearg); } else { if (f->next_rule == NULL) lookup_next_rule(chain, f, 0); f = f->next_rule; } /* * Skip disabled rules, and * re-enter the inner loop * with the correct f, l and cmd. * Also clear cmdlen and skip_or */ while (f && (V_set_disable & (1 << f->set))) f = f->next; if (f) { /* found a valid rule */ l = f->cmd_len; cmd = f->cmd; } else { l = 0; /* exit inner loop */ } match = 1; cmdlen = 0; skip_or = 0; break; case O_REJECT: /* * Drop the packet and send a reject notice * if the packet is not ICMP (or is an ICMP * query), and it is not multicast/broadcast. */ if (hlen > 0 && is_ipv4 && offset == 0 && (proto != IPPROTO_ICMP || is_icmp_query(ICMP(ulp))) && !(m->m_flags & (M_BCAST|M_MCAST)) && !IN_MULTICAST(ntohl(dst_ip.s_addr))) { send_reject(args, cmd->arg1, ip_len, ip); m = args->m; } /* FALLTHROUGH */ #ifdef INET6 case O_UNREACH6: if (hlen > 0 && is_ipv6 && ((offset & IP6F_OFF_MASK) == 0) && (proto != IPPROTO_ICMPV6 || (is_icmp6_query(args->f_id.flags) == 1)) && !(m->m_flags & (M_BCAST|M_MCAST)) && !IN6_IS_ADDR_MULTICAST(&args->f_id.dst_ip6)) { send_reject6( args, cmd->arg1, hlen, (struct ip6_hdr *)ip); m = args->m; } /* FALLTHROUGH */ #endif case O_DENY: retval = IP_FW_DENY; l = 0; /* exit inner loop */ done = 1; /* exit outer loop */ break; case O_FORWARD_IP: if (args->eh) /* not valid on layer2 pkts */ break; if (!q || dyn_dir == MATCH_FORWARD) { struct sockaddr_in *sa; sa = &(((ipfw_insn_sa *)cmd)->sa); if (sa->sin_addr.s_addr == INADDR_ANY) { bcopy(sa, &args->hopstore, sizeof(*sa)); args->hopstore.sin_addr.s_addr = htonl(tablearg); args->next_hop = &args->hopstore; } else { args->next_hop = sa; } } retval = IP_FW_PASS; l = 0; /* exit inner loop */ done = 1; /* exit outer loop */ break; case O_NETGRAPH: case O_NGTEE: args->rule = f; /* report matching rule */ args->rule_id = f->id; args->chain_id = chain->id; if (cmd->arg1 == IP_FW_TABLEARG) args->cookie = tablearg; else args->cookie = cmd->arg1; retval = (cmd->opcode == O_NETGRAPH) ? IP_FW_NETGRAPH : IP_FW_NGTEE; l = 0; /* exit inner loop */ done = 1; /* exit outer loop */ break; #if 0 case O_SETFIB: f->pcnt++; /* update stats */ f->bcnt += pktlen; f->timestamp = time_uptime; M_SETFIB(m, cmd->arg1); args->f_id.fib = cmd->arg1; l = 0; /* exit inner loop */ break; case O_NAT: if (!IPFW_NAT_LOADED) { retval = IP_FW_DENY; } else { struct cfg_nat *t; int nat_id; args->rule = f; /* Report matching rule. */ args->rule_id = f->id; args->chain_id = chain->id; t = ((ipfw_insn_nat *)cmd)->nat; if (t == NULL) { nat_id = (cmd->arg1 == IP_FW_TABLEARG) ? tablearg : cmd->arg1; LOOKUP_NAT(V_layer3_chain, nat_id, t); if (t == NULL) { retval = IP_FW_DENY; l = 0; /* exit inner loop */ done = 1; /* exit outer loop */ break; } if (cmd->arg1 != IP_FW_TABLEARG) ((ipfw_insn_nat *)cmd)->nat = t; } retval = ipfw_nat_ptr(args, t, m); } l = 0; /* exit inner loop */ done = 1; /* exit outer loop */ break; case O_REASS: { int ip_off; f->pcnt++; f->bcnt += pktlen; l = 0; /* in any case exit inner loop */ ip_off = (args->eh != NULL) ? ntohs(ip->ip_off) : ip->ip_off; /* if not fragmented, go to next rule */ if ((ip_off & (IP_MF | IP_OFFMASK)) == 0) break; /* * ip_reass() expects len & off in host * byte order: fix them in case we come * from layer2. */ if (args->eh != NULL) { ip->ip_len = ntohs(ip->ip_len); ip->ip_off = ntohs(ip->ip_off); } args->m = m = ip_reass(m); /* * IP header checksum fixup after * reassembly and leave header * in network byte order. */ if (m == NULL) { /* fragment got swallowed */ retval = IP_FW_DENY; } else { /* good, packet complete */ int hlen; ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2; /* revert len & off for layer2 pkts */ if (args->eh != NULL) ip->ip_len = htons(ip->ip_len); ip->ip_sum = 0; if (hlen == sizeof(struct ip)) ip->ip_sum = in_cksum_hdr(ip); else ip->ip_sum = in_cksum(m, hlen); retval = IP_FW_REASS; args->rule = f; args->rule_id = f->id; args->chain_id = chain->id; } done = 1; /* exit outer loop */ break; } #endif default: break; // XXX we disabled some panic("-- unknown opcode %d\n", cmd->opcode); } /* end of switch() on opcodes */ /* * if we get here with l=0, then match is irrelevant. */ if (cmd->len & F_NOT) match = !match; if (match) { if (cmd->len & F_OR) skip_or = 1; } else { if (!(cmd->len & F_OR)) /* not an OR block, */ break; /* try next rule */ } } /* end of inner loop, scan opcodes */ if (done) break; /* next_rule:;*/ /* try next rule */ } /* end of outer for, scan rules */ if (done) { /* Update statistics */ f->pcnt++; f->bcnt += pktlen; f->timestamp = time_uptime; } else { retval = IP_FW_DENY; printf("ipfw: ouch!, skip past end of rules, denying packet\n"); } IPFW_RUNLOCK(chain); #ifdef __FreeBSD__ if (ucred_cache != NULL) crfree(ucred_cache); #endif return (retval); pullup_failed: if (V_fw_verbose) printf("ipfw: pullup failed\n"); return (IP_FW_DENY); } /* * When a rule is added/deleted, clear the next_rule pointers in all rules. * These will be reconstructed on the fly as packets are matched. */ static void flush_rule_ptrs(struct ip_fw_chain *chain) { struct ip_fw *rule; IPFW_WLOCK_ASSERT(chain); chain->id++; for (rule = chain->rules; rule; rule = rule->next) rule->next_rule = NULL; } /* * Add a new rule to the list. Copy the rule into a malloc'ed area, then * possibly create a rule number and add the rule to the list. * Update the rule_number in the input struct so the caller knows it as well. */ static int add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule) { struct ip_fw *rule, *f, *prev; int l = RULESIZE(input_rule); if (chain->rules == NULL && input_rule->rulenum != IPFW_DEFAULT_RULE) return (EINVAL); rule = malloc(l, M_IPFW, M_NOWAIT | M_ZERO); if (rule == NULL) return (ENOSPC); bcopy(input_rule, rule, l); rule->next = NULL; rule->next_rule = NULL; rule->pcnt = 0; rule->bcnt = 0; rule->timestamp = 0; IPFW_WLOCK(chain); if (chain->rules == NULL) { /* default rule */ chain->rules = rule; rule->id = ++chain->id; goto done; } if (V_autoinc_step < 1) V_autoinc_step = 1; else if (V_autoinc_step > 1000) V_autoinc_step = 1000; if (rule->rulenum == 0) { /* * If rulenum is 0, use highest numbered rule before * the default, adding autoinc_step if room. * Also set the number in the caller. */ for (f = chain->rules; f; f = f->next) { if (f->rulenum == IPFW_DEFAULT_RULE) break; rule->rulenum = f->rulenum; } if (rule->rulenum < IPFW_DEFAULT_RULE - V_autoinc_step) rule->rulenum += V_autoinc_step; input_rule->rulenum = rule->rulenum; } /* * Now insert the new rule in the right place in the sorted list. * XXX TODO also put in the skipto table. */ for (prev = NULL, f = chain->rules; f; prev = f, f = f->next) { if (f->rulenum > rule->rulenum) { /* found the location */ if (prev) { rule->next = f; prev->next = rule; } else { /* head insert */ rule->next = chain->rules; chain->rules = rule; } break; } } flush_rule_ptrs(chain); /* chain->id incremented inside flush_rule_ptrs() */ rule->id = chain->id; done: V_static_count++; V_static_len += l; IPFW_WUNLOCK(chain); DEB(printf("ipfw: installed rule %d, static count now %d\n", rule->rulenum, V_static_count);) return (0); } /** * Remove a static rule (including derived * dynamic rules) * and place it on the ``reap list'' for later reclamation. * The caller is in charge of clearing rule pointers to avoid * dangling pointers. * @return a pointer to the next entry. * Arguments are not checked, so they better be correct. */ static struct ip_fw * remove_rule(struct ip_fw_chain *chain, struct ip_fw *rule, struct ip_fw *prev) { struct ip_fw *n; int l = RULESIZE(rule); IPFW_WLOCK_ASSERT(chain); n = rule->next; IPFW_DYN_LOCK(); remove_dyn_rule(rule, NULL /* force removal */); IPFW_DYN_UNLOCK(); if (prev == NULL) chain->rules = n; else prev->next = n; V_static_count--; V_static_len -= l; // XXX remove from the skipto table rule->next = chain->reap; chain->reap = rule; return n; } /* * Reclaim storage associated with a list of rules. This is * typically the list created using remove_rule. * A NULL pointer on input is handled correctly. */ static void reap_rules(struct ip_fw *head) { struct ip_fw *rule; while ((rule = head) != NULL) { head = head->next; free(rule, M_IPFW); } } /* * Remove all rules from a chain (except rules in set RESVD_SET * unless kill_default = 1). The caller is responsible for * reclaiming storage for the rules left in chain->reap. */ static void free_chain(struct ip_fw_chain *chain, int kill_default) { struct ip_fw *prev, *rule; IPFW_WLOCK_ASSERT(chain); chain->reap = NULL; flush_rule_ptrs(chain); /* more efficient to do outside the loop */ for (prev = NULL, rule = chain->rules; rule ; ) if (kill_default || rule->set != RESVD_SET) rule = remove_rule(chain, rule, prev); else { prev = rule; rule = rule->next; } } /** * Remove all rules with given number, and also do set manipulation. * Assumes chain != NULL && *chain != NULL. * * The argument is an u_int32_t. The low 16 bit are the rule or set number, * the next 8 bits are the new set, the top 8 bits are the command: * * 0 delete rules with given number * 1 delete rules with given set number * 2 move rules with given number to new set * 3 move rules with given set number to new set * 4 swap sets with given numbers * 5 delete rules with given number and with given set number */ static int del_entry(struct ip_fw_chain *chain, u_int32_t arg) { struct ip_fw *prev = NULL, *rule; u_int16_t rulenum; /* rule or old_set */ u_int8_t cmd, new_set; rulenum = arg & 0xffff; cmd = (arg >> 24) & 0xff; new_set = (arg >> 16) & 0xff; if (cmd > 5 || new_set > RESVD_SET) return EINVAL; if (cmd == 0 || cmd == 2 || cmd == 5) { if (rulenum >= IPFW_DEFAULT_RULE) return EINVAL; } else { if (rulenum > RESVD_SET) /* old_set */ return EINVAL; } IPFW_WLOCK(chain); rule = chain->rules; /* common starting point */ chain->reap = NULL; /* prepare for deletions */ switch (cmd) { case 0: /* delete rules with given number */ /* * locate first rule to delete */ for (; rule->rulenum < rulenum; prev = rule, rule = rule->next) ; if (rule->rulenum != rulenum) { IPFW_WUNLOCK(chain); return EINVAL; } /* * flush pointers outside the loop, then delete all matching * rules. prev remains the same throughout the cycle. */ flush_rule_ptrs(chain); while (rule->rulenum == rulenum) rule = remove_rule(chain, rule, prev); break; case 1: /* delete all rules with given set number */ flush_rule_ptrs(chain); while (rule->rulenum < IPFW_DEFAULT_RULE) { if (rule->set == rulenum) rule = remove_rule(chain, rule, prev); else { prev = rule; rule = rule->next; } } break; case 2: /* move rules with given number to new set */ for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next) if (rule->rulenum == rulenum) rule->set = new_set; break; case 3: /* move rules with given set number to new set */ for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next) if (rule->set == rulenum) rule->set = new_set; break; case 4: /* swap two sets */ for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next) if (rule->set == rulenum) rule->set = new_set; else if (rule->set == new_set) rule->set = rulenum; break; case 5: /* delete rules with given number and with given set number. * rulenum - given rule number; * new_set - given set number. */ for (; rule->rulenum < rulenum; prev = rule, rule = rule->next) ; if (rule->rulenum != rulenum) { IPFW_WUNLOCK(chain); return (EINVAL); } flush_rule_ptrs(chain); while (rule->rulenum == rulenum) { if (rule->set == new_set) rule = remove_rule(chain, rule, prev); else { prev = rule; rule = rule->next; } } } /* * Look for rules to reclaim. We grab the list before * releasing the lock then reclaim them w/o the lock to * avoid a LOR with dummynet. */ rule = chain->reap; IPFW_WUNLOCK(chain); reap_rules(rule); return 0; } /* * Clear counters for a specific rule. * The enclosing "table" is assumed locked. */ static void clear_counters(struct ip_fw *rule, int log_only) { ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule); if (log_only == 0) { rule->bcnt = rule->pcnt = 0; rule->timestamp = 0; } if (l->o.opcode == O_LOG) l->log_left = l->max_log; } /** * Reset some or all counters on firewall rules. * The argument `arg' is an u_int32_t. The low 16 bit are the rule number, * the next 8 bits are the set number, the top 8 bits are the command: * 0 work with rules from all set's; * 1 work with rules only from specified set. * Specified rule number is zero if we want to clear all entries. * log_only is 1 if we only want to reset logs, zero otherwise. */ static int zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only) { struct ip_fw *rule; char *msg; uint16_t rulenum = arg & 0xffff; uint8_t set = (arg >> 16) & 0xff; uint8_t cmd = (arg >> 24) & 0xff; if (cmd > 1) return (EINVAL); if (cmd == 1 && set > RESVD_SET) return (EINVAL); IPFW_WLOCK(chain); if (rulenum == 0) { V_norule_counter = 0; for (rule = chain->rules; rule; rule = rule->next) { /* Skip rules from another set. */ if (cmd == 1 && rule->set != set) continue; clear_counters(rule, log_only); } msg = log_only ? "All logging counts reset" : "Accounting cleared"; } else { int cleared = 0; /* * We can have multiple rules with the same number, so we * need to clear them all. */ for (rule = chain->rules; rule; rule = rule->next) if (rule->rulenum == rulenum) { while (rule && rule->rulenum == rulenum) { if (cmd == 0 || rule->set == set) clear_counters(rule, log_only); rule = rule->next; } cleared = 1; break; } if (!cleared) { /* we did not find any matching rules */ IPFW_WUNLOCK(chain); return (EINVAL); } msg = log_only ? "logging count reset" : "cleared"; } IPFW_WUNLOCK(chain); if (V_fw_verbose) { #define lev LOG_SECURITY | LOG_NOTICE if (rulenum) log(lev, "ipfw: Entry %d %s.\n", rulenum, msg); else log(lev, "ipfw: %s.\n", msg); } return (0); } /* * Check validity of the structure before insert. * Fortunately rules are simple, so this mostly need to check rule sizes. */ static int check_ipfw_struct(struct ip_fw *rule, int size) { int l, cmdlen = 0; int have_action=0; ipfw_insn *cmd; if (size < sizeof(*rule)) { printf("ipfw: rule too short\n"); return (EINVAL); } /* first, check for valid size */ l = RULESIZE(rule); if (l != size) { printf("ipfw: size mismatch (have %d want %d)\n", size, l); return (EINVAL); } if (rule->act_ofs >= rule->cmd_len) { printf("ipfw: bogus action offset (%u > %u)\n", rule->act_ofs, rule->cmd_len - 1); return (EINVAL); } /* * Now go for the individual checks. Very simple ones, basically only * instruction sizes. */ for (l = rule->cmd_len, cmd = rule->cmd ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); if (cmdlen > l) { printf("ipfw: opcode %d size truncated\n", cmd->opcode); return EINVAL; } DEB(printf("ipfw: opcode %d\n", cmd->opcode);) switch (cmd->opcode) { case O_PROBE_STATE: case O_KEEP_STATE: case O_PROTO: case O_IP_SRC_ME: case O_IP_DST_ME: case O_LAYER2: case O_IN: case O_FRAG: case O_DIVERTED: case O_IPOPT: case O_IPTOS: case O_IPPRECEDENCE: case O_IPVER: case O_TCPWIN: case O_TCPFLAGS: case O_TCPOPTS: case O_ESTAB: case O_VERREVPATH: case O_VERSRCREACH: case O_ANTISPOOF: case O_IPSEC: #ifdef INET6 case O_IP6_SRC_ME: case O_IP6_DST_ME: case O_EXT_HDR: case O_IP6: #endif case O_IP4: case O_TAG: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; break; case O_FIB: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; if (cmd->arg1 >= rt_numfibs) { printf("ipfw: invalid fib number %d\n", cmd->arg1); return EINVAL; } break; case O_SETFIB: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; if (cmd->arg1 >= rt_numfibs) { printf("ipfw: invalid fib number %d\n", cmd->arg1); return EINVAL; } goto check_action; case O_UID: case O_GID: case O_JAIL: case O_IP_SRC: case O_IP_DST: case O_TCPSEQ: case O_TCPACK: case O_PROB: case O_ICMPTYPE: if (cmdlen != F_INSN_SIZE(ipfw_insn_u32)) goto bad_size; break; case O_LIMIT: if (cmdlen != F_INSN_SIZE(ipfw_insn_limit)) goto bad_size; break; case O_LOG: if (cmdlen != F_INSN_SIZE(ipfw_insn_log)) goto bad_size; ((ipfw_insn_log *)cmd)->log_left = ((ipfw_insn_log *)cmd)->max_log; break; case O_IP_SRC_MASK: case O_IP_DST_MASK: /* only odd command lengths */ if ( !(cmdlen & 1) || cmdlen > 31) goto bad_size; break; case O_IP_SRC_SET: case O_IP_DST_SET: if (cmd->arg1 == 0 || cmd->arg1 > 256) { printf("ipfw: invalid set size %d\n", cmd->arg1); return EINVAL; } if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + (cmd->arg1+31)/32 ) goto bad_size; break; case O_IP_SRC_LOOKUP: case O_IP_DST_LOOKUP: if (cmd->arg1 >= IPFW_TABLES_MAX) { printf("ipfw: invalid table number %d\n", cmd->arg1); return (EINVAL); } if (cmdlen != F_INSN_SIZE(ipfw_insn) && cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1 && cmdlen != F_INSN_SIZE(ipfw_insn_u32)) goto bad_size; break; case O_MACADDR2: if (cmdlen != F_INSN_SIZE(ipfw_insn_mac)) goto bad_size; break; case O_NOP: case O_IPID: case O_IPTTL: case O_IPLEN: case O_TCPDATALEN: case O_TAGGED: if (cmdlen < 1 || cmdlen > 31) goto bad_size; break; case O_MAC_TYPE: case O_IP_SRCPORT: case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */ if (cmdlen < 2 || cmdlen > 31) goto bad_size; break; case O_RECV: case O_XMIT: case O_VIA: if (cmdlen != F_INSN_SIZE(ipfw_insn_if)) goto bad_size; break; case O_ALTQ: if (cmdlen != F_INSN_SIZE(ipfw_insn_altq)) goto bad_size; break; case O_PIPE: case O_QUEUE: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; goto check_action; case O_FORWARD_IP: #ifdef IPFIREWALL_FORWARD if (cmdlen != F_INSN_SIZE(ipfw_insn_sa)) goto bad_size; goto check_action; #else return EINVAL; #endif case O_DIVERT: case O_TEE: if (ip_divert_ptr == NULL) return EINVAL; else goto check_size; case O_NETGRAPH: case O_NGTEE: if (!NG_IPFW_LOADED) return EINVAL; else goto check_size; case O_NAT: if (!IPFW_NAT_LOADED) return EINVAL; if (cmdlen != F_INSN_SIZE(ipfw_insn_nat)) goto bad_size; goto check_action; case O_FORWARD_MAC: /* XXX not implemented yet */ case O_CHECK_STATE: case O_COUNT: case O_ACCEPT: case O_DENY: case O_REJECT: #ifdef INET6 case O_UNREACH6: #endif case O_SKIPTO: case O_REASS: check_size: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; check_action: if (have_action) { printf("ipfw: opcode %d, multiple actions" " not allowed\n", cmd->opcode); return EINVAL; } have_action = 1; if (l != cmdlen) { printf("ipfw: opcode %d, action must be" " last opcode\n", cmd->opcode); return EINVAL; } break; #ifdef INET6 case O_IP6_SRC: case O_IP6_DST: if (cmdlen != F_INSN_SIZE(struct in6_addr) + F_INSN_SIZE(ipfw_insn)) goto bad_size; break; case O_FLOW6ID: if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + ((ipfw_insn_u32 *)cmd)->o.arg1) goto bad_size; break; case O_IP6_SRC_MASK: case O_IP6_DST_MASK: if ( !(cmdlen & 1) || cmdlen > 127) goto bad_size; break; case O_ICMP6TYPE: if( cmdlen != F_INSN_SIZE( ipfw_insn_icmp6 ) ) goto bad_size; break; #endif default: switch (cmd->opcode) { #ifndef INET6 case O_IP6_SRC_ME: case O_IP6_DST_ME: case O_EXT_HDR: case O_IP6: case O_UNREACH6: case O_IP6_SRC: case O_IP6_DST: case O_FLOW6ID: case O_IP6_SRC_MASK: case O_IP6_DST_MASK: case O_ICMP6TYPE: printf("ipfw: no IPv6 support in kernel\n"); return EPROTONOSUPPORT; #endif default: printf("ipfw: opcode %d, unknown opcode\n", cmd->opcode); return EINVAL; } } } if (have_action == 0) { printf("ipfw: missing action\n"); return EINVAL; } return 0; bad_size: printf("ipfw: opcode %d size %d wrong\n", cmd->opcode, cmdlen); return EINVAL; } /* * Copy the static rules to the supplied buffer * and return the amount of space actually used. */ static size_t ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space) { char *bp = buf; char *ep = bp + space; struct ip_fw *rule; int i; time_t boot_seconds; boot_seconds = boottime.tv_sec; /* XXX this can take a long time and locking will block packet flow */ IPFW_RLOCK(chain); for (rule = chain->rules; rule ; rule = rule->next) { /* * Verify the entry fits in the buffer in case the * rules changed between calculating buffer space and * now. This would be better done using a generation * number but should suffice for now. */ i = RULESIZE(rule); if (bp + i <= ep) { bcopy(rule, bp, i); /* * XXX HACK. Store the disable mask in the "next" * pointer in a wild attempt to keep the ABI the same. * Why do we do this on EVERY rule? */ bcopy(&V_set_disable, &(((struct ip_fw *)bp)->next_rule), sizeof(V_set_disable)); if (((struct ip_fw *)bp)->timestamp) ((struct ip_fw *)bp)->timestamp += boot_seconds; bp += i; } } IPFW_RUNLOCK(chain); return (bp - (char *)buf); } /* * Copy the dynamic rules to the supplied buffer * and return the amount of space actually used. * XXX marta if we allocate X and rules grows * we check for size limit while copying rules into the buffer */ static size_t ipfw_getdynrules(struct ip_fw_chain *chain, void *buf, size_t space) { char *bp = buf; char *ep = bp + space; int i; time_t boot_seconds; printf("dynrules requested\n"); boot_seconds = boottime.tv_sec; if (V_ipfw_dyn_v) { ipfw_dyn_rule *p, *last = NULL; IPFW_DYN_LOCK(); for (i = 0 ; i < V_curr_dyn_buckets; i++) for (p = V_ipfw_dyn_v[i] ; p != NULL; p = p->next) { if (bp + sizeof *p <= ep) { ipfw_dyn_rule *dst = (ipfw_dyn_rule *)bp; bcopy(p, dst, sizeof *p); bcopy(&(p->rule->rulenum), &(dst->rule), sizeof(p->rule->rulenum)); /* * store set number into high word of * dst->rule pointer. */ bcopy(&(p->rule->set), (char *)&dst->rule + sizeof(p->rule->rulenum), sizeof(p->rule->set)); /* * store a non-null value in "next". * The userland code will interpret a * NULL here as a marker * for the last dynamic rule. */ bcopy(&dst, &dst->next, sizeof(dst)); last = dst; dst->expire = TIME_LEQ(dst->expire, time_uptime) ? 0 : dst->expire - time_uptime ; bp += sizeof(ipfw_dyn_rule); } else { p = NULL; /* break the loop */ i = V_curr_dyn_buckets; } } IPFW_DYN_UNLOCK(); if (last != NULL) /* mark last dynamic rule */ bzero(&last->next, sizeof(last)); } return (bp - (char *)buf); } /** * {set|get}sockopt parser. */ static int ipfw_ctl(struct sockopt *sopt) { #define RULE_MAXSIZE (256*sizeof(u_int32_t)) int error; size_t size; struct ip_fw *buf, *rule; u_int32_t rulenum[2]; error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW); if (error) return (error); /* * Disallow modifications in really-really secure mode, but still allow * the logging counters to be reset. */ if (sopt->sopt_name == IP_FW_ADD || (sopt->sopt_dir == SOPT_SET && sopt->sopt_name != IP_FW_RESETLOG)) { error = securelevel_ge(sopt->sopt_td->td_ucred, 3); if (error) return (error); } error = 0; switch (sopt->sopt_name) { case IP_FW_GET: /* * pass up a copy of the current static rules. * The last static rule has number IPFW_DEFAULT_RULE. * * Note that the calculated size is used to bound the * amount of data returned to the user. The rule set may * change between calculating the size and returning the * data in which case we'll just return what fits. */ size = V_static_len; /* size of static rules */ /* * XXX todo: if the user passes a short length just to know * how much room is needed, do not bother filling up the * buffer, just jump to the sooptcopyout. */ buf = malloc(size, M_TEMP, M_WAITOK); error = sooptcopyout(sopt, buf, ipfw_getrules(&V_layer3_chain, buf, size)); free(buf, M_TEMP); break; case IP_FW_DYN_GET: /* * pass up a copy of the current dynamic rules. * The last dynamic rule has NULL in the "next" field. */ /* if (!V_ipfw_dyn_v) XXX check for empty set ? */ size = (V_dyn_count * sizeof(ipfw_dyn_rule)); /* size of dyn. rules */ buf = malloc(size, M_TEMP, M_WAITOK); error = sooptcopyout(sopt, buf, ipfw_getdynrules(&V_layer3_chain, buf, size)); free(buf, M_TEMP); break; case IP_FW_FLUSH: /* * Normally we cannot release the lock on each iteration. * We could do it here only because we start from the head all * the times so there is no risk of missing some entries. * On the other hand, the risk is that we end up with * a very inconsistent ruleset, so better keep the lock * around the whole cycle. * * XXX this code can be improved by resetting the head of * the list to point to the default rule, and then freeing * the old list without the need for a lock. */ IPFW_WLOCK(&V_layer3_chain); free_chain(&V_layer3_chain, 0 /* keep default rule */); rule = V_layer3_chain.reap; IPFW_WUNLOCK(&V_layer3_chain); reap_rules(rule); break; case IP_FW_ADD: rule = malloc(RULE_MAXSIZE, M_TEMP, M_WAITOK); error = sooptcopyin(sopt, rule, RULE_MAXSIZE, sizeof(struct ip_fw) ); if (error == 0) error = check_ipfw_struct(rule, sopt->sopt_valsize); if (error == 0) { error = add_rule(&V_layer3_chain, rule); size = RULESIZE(rule); if (!error && sopt->sopt_dir == SOPT_GET) error = sooptcopyout(sopt, rule, size); } free(rule, M_TEMP); break; case IP_FW_DEL: /* * IP_FW_DEL is used for deleting single rules or sets, * and (ab)used to atomically manipulate sets. Argument size * is used to distinguish between the two: * sizeof(u_int32_t) * delete single rule or set of rules, * or reassign rules (or sets) to a different set. * 2*sizeof(u_int32_t) * atomic disable/enable sets. * first u_int32_t contains sets to be disabled, * second u_int32_t contains sets to be enabled. */ error = sooptcopyin(sopt, rulenum, 2*sizeof(u_int32_t), sizeof(u_int32_t)); if (error) break; size = sopt->sopt_valsize; if (size == sizeof(u_int32_t)) /* delete or reassign */ error = del_entry(&V_layer3_chain, rulenum[0]); else if (size == 2*sizeof(u_int32_t)) /* set enable/disable */ V_set_disable = (V_set_disable | rulenum[0]) & ~rulenum[1] & ~(1<sopt_val != 0) { error = sooptcopyin(sopt, rulenum, sizeof(u_int32_t), sizeof(u_int32_t)); if (error) break; } error = zero_entry(&V_layer3_chain, rulenum[0], sopt->sopt_name == IP_FW_RESETLOG); break; case IP_FW_TABLE_ADD: { ipfw_table_entry ent; error = sooptcopyin(sopt, &ent, sizeof(ent), sizeof(ent)); if (error) break; error = add_table_entry(&V_layer3_chain, ent.tbl, ent.addr, ent.masklen, ent.value); } break; case IP_FW_TABLE_DEL: { ipfw_table_entry ent; error = sooptcopyin(sopt, &ent, sizeof(ent), sizeof(ent)); if (error) break; error = del_table_entry(&V_layer3_chain, ent.tbl, ent.addr, ent.masklen); } break; case IP_FW_TABLE_FLUSH: { u_int16_t tbl; error = sooptcopyin(sopt, &tbl, sizeof(tbl), sizeof(tbl)); if (error) break; IPFW_WLOCK(&V_layer3_chain); error = flush_table(&V_layer3_chain, tbl); IPFW_WUNLOCK(&V_layer3_chain); } break; case IP_FW_TABLE_GETSIZE: { u_int32_t tbl, cnt; if ((error = sooptcopyin(sopt, &tbl, sizeof(tbl), sizeof(tbl)))) break; IPFW_RLOCK(&V_layer3_chain); error = count_table(&V_layer3_chain, tbl, &cnt); IPFW_RUNLOCK(&V_layer3_chain); if (error) break; error = sooptcopyout(sopt, &cnt, sizeof(cnt)); } break; case IP_FW_TABLE_LIST: { ipfw_table *tbl; if (sopt->sopt_valsize < sizeof(*tbl)) { error = EINVAL; break; } size = sopt->sopt_valsize; tbl = malloc(size, M_TEMP, M_WAITOK); error = sooptcopyin(sopt, tbl, size, sizeof(*tbl)); if (error) { free(tbl, M_TEMP); break; } tbl->size = (size - sizeof(*tbl)) / sizeof(ipfw_table_entry); IPFW_RLOCK(&V_layer3_chain); error = dump_table(&V_layer3_chain, tbl); IPFW_RUNLOCK(&V_layer3_chain); if (error) { free(tbl, M_TEMP); break; } error = sooptcopyout(sopt, tbl, size); free(tbl, M_TEMP); } break; case IP_FW_NAT_CFG: if (IPFW_NAT_LOADED) error = ipfw_nat_cfg_ptr(sopt); else { printf("IP_FW_NAT_CFG: %s\n", "ipfw_nat not present, please load it"); error = EINVAL; } break; case IP_FW_NAT_DEL: if (IPFW_NAT_LOADED) error = ipfw_nat_del_ptr(sopt); else { printf("IP_FW_NAT_DEL: %s\n", "ipfw_nat not present, please load it"); error = EINVAL; } break; case IP_FW_NAT_GET_CONFIG: if (IPFW_NAT_LOADED) error = ipfw_nat_get_cfg_ptr(sopt); else { printf("IP_FW_NAT_GET_CFG: %s\n", "ipfw_nat not present, please load it"); error = EINVAL; } break; case IP_FW_NAT_GET_LOG: if (IPFW_NAT_LOADED) error = ipfw_nat_get_log_ptr(sopt); else { printf("IP_FW_NAT_GET_LOG: %s\n", "ipfw_nat not present, please load it"); error = EINVAL; } break; default: printf("ipfw: ipfw_ctl invalid option %d\n", sopt->sopt_name); error = EINVAL; } return (error); #undef RULE_MAXSIZE } /* * This procedure is only used to handle keepalives. It is invoked * every dyn_keepalive_period */ static void ipfw_tick(void * vnetx) { struct mbuf *m0, *m, *mnext, **mtailp; #ifdef INET6 struct mbuf *m6, **m6_tailp; #endif int i; ipfw_dyn_rule *q; #ifdef VIMAGE struct vnet *vp = vnetx; #endif CURVNET_SET(vp); if (V_dyn_keepalive == 0 || V_ipfw_dyn_v == NULL || V_dyn_count == 0) goto done; /* * We make a chain of packets to go out here -- not deferring * until after we drop the IPFW dynamic rule lock would result * in a lock order reversal with the normal packet input -> ipfw * call stack. */ m0 = NULL; mtailp = &m0; #ifdef INET6 m6 = NULL; m6_tailp = &m6; #endif IPFW_DYN_LOCK(); for (i = 0 ; i < V_curr_dyn_buckets ; i++) { for (q = V_ipfw_dyn_v[i] ; q ; q = q->next ) { if (q->dyn_type == O_LIMIT_PARENT) continue; if (q->id.proto != IPPROTO_TCP) continue; if ( (q->state & BOTH_SYN) != BOTH_SYN) continue; if (TIME_LEQ( time_uptime+V_dyn_keepalive_interval, q->expire)) continue; /* too early */ if (TIME_LEQ(q->expire, time_uptime)) continue; /* too late, rule expired */ m = send_pkt(NULL, &(q->id), q->ack_rev - 1, q->ack_fwd, TH_SYN); mnext = send_pkt(NULL, &(q->id), q->ack_fwd - 1, q->ack_rev, 0); switch (q->id.addr_type) { case 4: if (m != NULL) { *mtailp = m; mtailp = &(*mtailp)->m_nextpkt; } if (mnext != NULL) { *mtailp = mnext; mtailp = &(*mtailp)->m_nextpkt; } break; #ifdef INET6 case 6: if (m != NULL) { *m6_tailp = m; m6_tailp = &(*m6_tailp)->m_nextpkt; } if (mnext != NULL) { *m6_tailp = mnext; m6_tailp = &(*m6_tailp)->m_nextpkt; } break; #endif } m = mnext = NULL; } } IPFW_DYN_UNLOCK(); for (m = mnext = m0; m != NULL; m = mnext) { mnext = m->m_nextpkt; m->m_nextpkt = NULL; ip_output(m, NULL, NULL, 0, NULL, NULL); } #ifdef INET6 for (m = mnext = m6; m != NULL; m = mnext) { mnext = m->m_nextpkt; m->m_nextpkt = NULL; ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); } #endif done: callout_reset(&V_ipfw_timeout, V_dyn_keepalive_period*hz, ipfw_tick, vnetx); CURVNET_RESTORE(); } static int vnet_ipfw_init(const void *); int ipfw_init(void) { int error = 0; ipfw_dyn_rule_zone = uma_zcreate("IPFW dynamic rule", sizeof(ipfw_dyn_rule), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); IPFW_DYN_LOCK_INIT(); error = vnet_ipfw_init(NULL); if (error) { IPFW_DYN_LOCK_DESTROY(); IPFW_LOCK_DESTROY(&V_layer3_chain); uma_zdestroy(ipfw_dyn_rule_zone); return (error); } /* * Only print out this stuff the first time around, * when called from the sysinit code. */ printf("ipfw2 " #ifdef INET6 "(+ipv6) " #endif "initialized, divert %s, nat %s, " "rule-based forwarding " #ifdef IPFIREWALL_FORWARD "enabled, " #else "disabled, " #endif "default to %s, logging ", #ifdef IPDIVERT "enabled", #else "loadable", #endif #ifdef IPFIREWALL_NAT "enabled", #else "loadable", #endif default_to_accept ? "accept" : "deny"); /* * Note: V_xxx variables can be accessed here but the vnet specific * initializer may not have been called yet for the VIMAGE case. * Tuneables will have been processed. We will print out values for * the default vnet. * XXX This should all be rationalized AFTER 8.0 */ if (V_fw_verbose == 0) printf("disabled\n"); else if (V_verbose_limit == 0) printf("unlimited\n"); else printf("limited to %d packets/entry by default\n", V_verbose_limit); return (error); } void ipfw_destroy(void) { struct ip_fw *reap; ip_fw_chk_ptr = NULL; ip_fw_ctl_ptr = NULL; callout_drain(&ipfw_timeout); IPFW_WLOCK(&V_layer3_chain); flush_tables(&V_layer3_chain); V_layer3_chain.reap = NULL; free_chain(&V_layer3_chain, 1 /* kill default rule */); reap = V_layer3_chain.reap, V_layer3_chain.reap = NULL; IPFW_WUNLOCK(&V_layer3_chain); if (reap != NULL) reap_rules(reap); IPFW_DYN_LOCK_DESTROY(); uma_zdestroy(ipfw_dyn_rule_zone); if (V_ipfw_dyn_v != NULL) free(V_ipfw_dyn_v, M_IPFW); IPFW_LOCK_DESTROY(&V_layer3_chain); #ifdef INET6 /* Free IPv6 fw sysctl tree. */ sysctl_ctx_free(&ip6_fw_sysctl_ctx); #endif printf("IP firewall unloaded\n"); } /**************** * Stuff that must be initialized for every instance * (including the first of course). */ static int vnet_ipfw_init(const void *unused) { int error; struct ip_fw default_rule; /* First set up some values that are compile time options */ #ifdef IPFIREWALL_VERBOSE V_fw_verbose = 1; #endif #ifdef IPFIREWALL_VERBOSE_LIMIT V_verbose_limit = IPFIREWALL_VERBOSE_LIMIT; #endif error = init_tables(&V_layer3_chain); if (error) { panic("init_tables"); /* XXX Marko fix this ! */ } #ifdef IPFIREWALL_NAT LIST_INIT(&V_layer3_chain.nat); #endif V_autoinc_step = 100; /* bounded to 1..1000 in add_rule() */ V_ipfw_dyn_v = NULL; V_dyn_buckets = 256; /* must be power of 2 */ V_curr_dyn_buckets = 256; /* must be power of 2 */ V_dyn_ack_lifetime = 300; V_dyn_syn_lifetime = 20; V_dyn_fin_lifetime = 1; V_dyn_rst_lifetime = 1; V_dyn_udp_lifetime = 10; V_dyn_short_lifetime = 5; V_dyn_keepalive_interval = 20; V_dyn_keepalive_period = 5; V_dyn_keepalive = 1; /* do send keepalives */ V_dyn_max = 4096; /* max # of dynamic rules */ V_fw_deny_unknown_exthdrs = 1; V_layer3_chain.rules = NULL; IPFW_LOCK_INIT(&V_layer3_chain); callout_init(&V_ipfw_timeout, CALLOUT_MPSAFE); set_skipto_table(&V_layer3_chain); bzero(&default_rule, sizeof default_rule); default_rule.act_ofs = 0; default_rule.rulenum = IPFW_DEFAULT_RULE; default_rule.cmd_len = 1; default_rule.set = RESVD_SET; default_rule.cmd[0].len = 1; default_rule.cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY; error = add_rule(&V_layer3_chain, &default_rule); if (error != 0) { printf("ipfw2: error %u initializing default rule " "(support disabled)\n", error); IPFW_LOCK_DESTROY(&V_layer3_chain); printf("leaving ipfw_iattach (1) with error %d\n", error); return (error); } V_layer3_chain.default_rule = V_layer3_chain.rules; /* curvnet is NULL in the !VIMAGE case */ callout_reset(&V_ipfw_timeout, hz, ipfw_tick, curvnet); /* First set up some values that are compile time options */ V_ipfw_vnet_ready = 1; /* Open for business */ /* * Hook the sockopt handler, and the layer2 (V_ip_fw_chk_ptr) * and pfil hooks for ipv4 and ipv6. Even if the latter two fail * we still keep the module alive because the sockopt and * layer2 paths are still useful. * ipfw[6]_hook return 0 on success, ENOENT on failure, * so we can ignore the exact return value and just set a flag. * * Note that V_fw[6]_enable are manipulated by a SYSCTL_PROC so * changes in the underlying (per-vnet) variables trigger * immediate hook()/unhook() calls. * In layer2 we have the same behaviour, except that V_ether_ipfw * is checked on each packet because there are no pfil hooks. */ V_ip_fw_ctl_ptr = ipfw_ctl; V_ip_fw_chk_ptr = ipfw_chk; #ifndef linux if (V_fw_enable && ipfw_hook() != 0) { error = ENOENT; /* see ip_fw_pfil.c::ipfw_hook() */ printf("ipfw_hook() error\n"); } #ifdef INET6 if (V_fw6_enable && ipfw6_hook() != 0) { error = ENOENT; printf("ipfw6_hook() error\n"); } #endif #endif /* !linux */ return (error); } ipfw_mod/dummynet/ipfw2_mod.c000644 000423 000000 00000051514 11311404347 017020 0ustar00luigiwheel000000 000000 /* * Copyright (C) 2009 Luigi Rizzo, Marta Carbone, Universita` di Pisa * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * $Id: ipfw2_mod.c 4490 2009-12-14 09:55:26Z marta $ * * The main interface to build ipfw+dummynet as a linux module. * (and possibly as a windows module as well, though that part * is not complete yet). * * The control interface uses the sockopt mechanism * on a socket(AF_INET, SOCK_RAW, IPPROTO_RAW). * * The data interface uses the netfilter interface, at the moment * hooked to the PRE_ROUTING and POST_ROUTING hooks. * Unfortunately the netfilter interface is a moving target, * so we need a set of macros to adapt to the various cases. * * In the netfilter hook we just mark packet as 'QUEUE' and then * let the queue handler to do the whole work (filtering and * possibly emulation). * As we receive packets, we wrap them with an mbuf descriptor * so the existing ipfw+dummynet code runs unmodified. */ #include #include /* sizeof struct mbuf */ #include /* NGROUPS */ #include "missing.h" #ifdef __linux__ #include #include #include #include /* NF_IP_PRI_FILTER */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25) #include /* nf_queue */ #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) #define __read_mostly #endif #endif /* !__linux__ */ #include /* in_addr */ #include /* ip_fw_ctl_t, ip_fw_chk_t */ #include /* ip_dn_ctl_t, ip_dn_io_t */ #include /* PFIL_IN, PFIL_OUT */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) #warning --- inet_hashtables not present on 2.4 #include #include #include static inline int inet_iif(const struct sk_buff *skb) { return ((struct rtable *)skb->dst)->rt_iif; } #else #include /* inet_lookup */ #endif #include /* inet_iif */ /* * Here we allocate some global variables used in the firewall. */ //ip_dn_ctl_t *ip_dn_ctl_ptr; int (*ip_dn_ctl_ptr)(struct sockopt *); ip_fw_ctl_t *ip_fw_ctl_ptr; int (*ip_dn_io_ptr)(struct mbuf **m, int dir, struct ip_fw_args *fwa); ip_fw_chk_t *ip_fw_chk_ptr; void (*bridge_dn_p)(struct mbuf *, struct ifnet *); /*--- * Glue code to implement the registration of children with the parent. * Each child should call my_mod_register() when linking, so that * module_init() and module_exit() can call init_children() and * fini_children() to provide the necessary initialization. */ #include struct mod_args { struct moduledata *mod; const char *name; int order; }; static unsigned int mod_idx; static struct mod_args mods[10]; /* hard limit to 10 modules */ /* * my_mod_register should be called automatically as the init * functions in the submodules. Unfortunately this compiler/linker * trick is not supported yet so we call it manually. */ int my_mod_register(struct moduledata *mod, const char *name, int order) { struct mod_args m = { mod, name, order }; printf("%s %s called\n", __FUNCTION__, name); if (mod_idx < sizeof(mods) / sizeof(mods[0])) mods[mod_idx++] = m; return 0; } static void init_children(void) { unsigned int i; /* Call the functions registered at init time. */ printf("%s mod_idx value %d\n", __FUNCTION__, mod_idx); for (i = 0; i < mod_idx; i++) { printf("+++ start module %d %s %s at %p order 0x%x\n", i, mods[i].name, mods[i].mod->name, mods[i].mod, mods[i].order); mods[i].mod->evhand(NULL, MOD_LOAD, mods[i].mod->priv); } } static void fini_children(void) { int i; /* Call the functions registered at init time. */ for (i = mod_idx - 1; i >= 0; i--) { printf("+++ end module %d %s %s at %p order 0x%x\n", i, mods[i].name, mods[i].mod->name, mods[i].mod, mods[i].order); mods[i].mod->evhand(NULL, MOD_UNLOAD, mods[i].mod->priv); } } /*--- end of module binding helper functions ---*/ /*--- * Control hooks: * ipfw_ctl_h() is a wrapper for linux to FreeBSD sockopt call convention. * then call the ipfw handler in order to manage requests. * In turn this is called by the linux set/get handlers. */ static int ipfw_ctl_h(struct sockopt *s, int cmd, int dir, int len, void __user *user) { struct thread t; int ret = EINVAL; memset(s, 0, sizeof(s)); s->sopt_name = cmd; s->sopt_dir = dir; s->sopt_valsize = len; s->sopt_val = user; /* sopt_td is not used but it is referenced */ memset(&t, 0, sizeof(t)); s->sopt_td = &t; // printf("%s called with cmd %d len %d\n", __FUNCTION__, cmd, len); if (cmd < IP_DUMMYNET_CONFIGURE && ip_fw_ctl_ptr) ret = ip_fw_ctl_ptr(s); else if (cmd >= IP_DUMMYNET_CONFIGURE && ip_dn_ctl_ptr) ret = ip_dn_ctl_ptr(s); return -ret; /* errors are < 0 on linux */ } #ifdef _WIN32 void netisr_dispatch(int __unused num, struct mbuf *m) { } int ip_output(struct mbuf *m, struct mbuf __unused *opt, struct route __unused *ro, int __unused flags, struct ip_moptions __unused *imo, struct inpcb __unused *inp) { netisr_dispatch(0, m); return 0; } #else /* this is the linux glue */ /* * setsockopt hook has no return value other than the error code. */ static int do_ipfw_set_ctl(struct sock __unused *sk, int cmd, void __user *user, unsigned int len) { struct sockopt s; /* pass arguments */ return ipfw_ctl_h(&s, cmd, SOPT_SET, len, user); } /* * getsockopt can can return a block of data in response. */ static int do_ipfw_get_ctl(struct sock __unused *sk, int cmd, void __user *user, int *len) { struct sockopt s; /* pass arguments */ int ret = ipfw_ctl_h(&s, cmd, SOPT_GET, *len, user); *len = s.sopt_valsize; /* return lenght back to the caller */ return ret; } /* * declare our [get|set]sockopt hooks */ static struct nf_sockopt_ops ipfw_sockopts = { .pf = PF_INET, .set_optmin = _IPFW_SOCKOPT_BASE, .set_optmax = _IPFW_SOCKOPT_END, .set = do_ipfw_set_ctl, .get_optmin = _IPFW_SOCKOPT_BASE, .get_optmax = _IPFW_SOCKOPT_END, .get = do_ipfw_get_ctl, #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24) .owner = THIS_MODULE, #endif }; /*---- * We need a number of macros to adapt to the various APIs in * different linux versions. Among them: * * - the hook names change between macros (NF_IP*) and enum NF_INET_* * * - the second argument to the netfilter hook is * struct sk_buff ** in kernels <= 2.6.22 * struct sk_buff * in kernels > 2.6.22 * * - NF_STOP is not defined before 2.6 so we remap it to NF_ACCEPT * * - the packet descriptor passed to the queue handler is * struct nf_info in kernels <= 2.6.24 * struct nf_queue_entry in kernels <= 2.6.24 * * - the arguments to the queue handler also change; */ /* * declare hook to grab packets from the netfilter interface. * The NF_* names change in different versions of linux, in some * cases they are #defines, in others they are enum, so we * need to adapt. */ #ifndef NF_IP_PRE_ROUTING #define NF_IP_PRE_ROUTING NF_INET_PRE_ROUTING #endif #ifndef NF_IP_POST_ROUTING #define NF_IP_POST_ROUTING NF_INET_POST_ROUTING #endif /* * ipfw hooks into the POST_ROUTING and the PRE_ROUTING chains. * PlanetLab sets skb_tag to the slice id in the LOCAL_INPUT and * POST_ROUTING chains, so if we want to use that information we * need to hook the LOCAL_INPUT chain instead of the PRE_ROUTING. * However at the moment the skb_tag info is not reliable so * we stay with the standard hooks. */ #if 0 // defined(IPFW_PLANETLAB) #define IPFW_HOOK_IN NF_IP_LOCAL_IN #else #define IPFW_HOOK_IN NF_IP_PRE_ROUTING #endif /* * The main netfilter hook. * To make life simple, we queue everything and then do all the * decision in the queue handler. * * XXX note that in 2.4 and up to 2.6.22 the skbuf is passed as sk_buff** * so we have an #ifdef to set the proper argument type. */ static unsigned int call_ipfw(unsigned int __unused hooknum, #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) // in 2.6.22 we have ** struct sk_buff __unused **skb, #else struct sk_buff __unused *skb, #endif const struct net_device __unused *in, const struct net_device __unused *out, int __unused (*okfn)(struct sk_buff *)) { return NF_QUEUE; } #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) #define NF_STOP NF_ACCEPT #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) /* * nf_queue_entry is a recent addition, in previous versions * of the code the struct is called nf_info. */ #define nf_queue_entry nf_info /* for simplicity */ /* also, 2.4 and perhaps something else have different arguments */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) /* unsure on the exact boundary */ /* on 2.4 we use nf_info */ #define QH_ARGS struct sk_buff *skb, struct nf_info *info, void *data #else /* 2.6.1.. 2.6.24 */ #define QH_ARGS struct sk_buff *skb, struct nf_info *info, unsigned int qnum, void *data #endif #define DEFINE_SKB /* nothing, already an argument */ #define REINJECT(_inf, _verd) nf_reinject(skb, _inf, _verd) #else /* 2.6.25 and above */ #define QH_ARGS struct nf_queue_entry *info, unsigned int queuenum #define DEFINE_SKB struct sk_buff *skb = info->skb; #define REINJECT(_inf, _verd) nf_reinject(_inf, _verd) #endif /* * used by dummynet when dropping packets * XXX use dummynet_send() */ void reinject_drop(struct mbuf* m) { #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) /* unsure on the exact boundary */ struct sk_buff *skb = (struct sk_buff *)m; #endif REINJECT(m->queue_entry, NF_DROP); } /* * The real call to the firewall. nf_queue_entry points to the skbuf, * and eventually we need to return both through nf_reinject(). */ static int ipfw2_queue_handler(QH_ARGS) { DEFINE_SKB /* no semicolon here, goes in the macro */ int ret = 0; /* return value */ struct mbuf *m; #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) if (skb->nh.iph == NULL) { printf("null dp, len %d reinject now\n", skb->len); REINJECT(info, NF_ACCEPT); return 0; } #endif m = malloc(sizeof(*m), 0, 0); if (m == NULL) { printf("malloc fail, len %d reinject now\n", skb->len); REINJECT(info, NF_ACCEPT); return 0; } m->m_skb = skb; m->m_len = skb->len; /* len in this skbuf */ m->m_pkthdr.len = skb->len; /* total packet len */ m->m_pkthdr.rcvif = info->indev; m->queue_entry = info; #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) m->m_data = skb->nh.iph; #else m->m_data = skb_network_header(skb); #endif /* XXX add the interface */ if (info->hook == IPFW_HOOK_IN) { ret = ipfw_check_in(NULL, &m, info->indev, PFIL_IN, NULL); } else { ret = ipfw_check_out(NULL, &m, info->outdev, PFIL_OUT, NULL); } if (m != NULL) { /* Accept. reinject and free the mbuf */ REINJECT(info, NF_STOP); m_freem(m); } else if (ret == 0) { /* dummynet has kept the packet, will reinject later. */ } else { /* * Packet dropped by ipfw or dummynet, reinject as NF_DROP * mbuf already released by ipfw itself */ REINJECT(info, NF_DROP); } return 0; } struct route; struct ip_moptions; struct inpcb; /* XXX should include prototypes for netisr_dispatch and ip_output */ /* * The reinjection routine after a packet comes out from dummynet. * We must update the skb timestamp so ping reports the right time. */ void netisr_dispatch(int num, struct mbuf *m) { struct nf_queue_entry *info = m->queue_entry; struct sk_buff *skb = m->m_skb; /* always used */ m_freem(m); KASSERT((info != NULL), ("%s info null!\n", __FUNCTION__)); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) // XXX above 2.6.x ? __net_timestamp(skb); /* update timestamp */ #endif /* XXX to obey one-pass, possibly call the queue handler here */ REINJECT(info, ((num == -1)?NF_DROP:NF_STOP)); /* accept but no more firewall */ } int ip_output(struct mbuf *m, struct mbuf __unused *opt, struct route __unused *ro, int __unused flags, struct ip_moptions __unused *imo, struct inpcb __unused *inp) { netisr_dispatch(0, m); return 0; } /* * socket lookup function for linux. * This code is used to associate uid, gid, jail/xid to packets, * and store the info in a cache *ugp where they can be accessed quickly. * The function returns 1 if the info is found, -1 otherwise. * * We do this only on selected protocols: TCP, ... * * The chain is the following * sk_buff* sock* socket* file* * skb -> sk ->sk_socket->file ->f_owner ->pid * skb -> sk ->sk_socket->file ->f_uid (direct) * skb -> sk ->sk_socket->file ->f_cred->fsuid (2.6.29+) * * Related headers: * linux/skbuff.h struct skbuff * net/sock.h struct sock * linux/net.h struct socket * linux/fs.h struct file * * With vserver we may have sk->sk_xid and sk->sk_nid that * which we store in fw_groups[1] (matches O_JAIL) and fw_groups[2] * (no matches yet) * * Note- for locally generated, outgoing packets we should not need * need a lookup because the sk_buff already points to the socket where * the info is. */ extern struct inet_hashinfo tcp_hashinfo; int linux_lookup(const int proto, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, struct sk_buff *skb, int dir, struct bsd_ucred *u) { #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,0) return -1; #else struct sock *sk; int ret = -1; /* default return value */ int st = -1; /* state */ if (proto != IPPROTO_TCP) /* XXX extend for UDP */ return -1; if ((dir ? (void *)skb->dst : (void *)skb->dev) == NULL) { panic(" -- this should not happen\n"); return -1; } if (skb->sk) { sk = skb->sk; } else { /* * Try a lookup. On a match, sk has a refcount that we must * release on exit (we know it because skb->sk = NULL). * * inet_lookup above 2.6.24 has an additional 'net' parameter * so we use a macro to conditionally supply it. * swap dst and src depending on the direction. */ #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,24) #define _OPT_NET_ARG #else #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) /* there is no dev_net() on 2.6.25 */ #define _OPT_NET_ARG (skb->dev->nd_net), #else /* 2.6.26 and above */ #define _OPT_NET_ARG dev_net(skb->dev), #endif #endif sk = (dir) ? /* dir != 0 on output */ inet_lookup(_OPT_NET_ARG &tcp_hashinfo, daddr, dport, saddr, sport, // match outgoing inet_iif(skb)) : inet_lookup(_OPT_NET_ARG &tcp_hashinfo, saddr, sport, daddr, dport, // match incoming skb->dev->ifindex); #undef _OPT_NET_ARG if (sk == NULL) /* no match, nothing to be done */ return -1; } ret = 1; /* retrying won't make things better */ st = sk->sk_state; #ifdef CONFIG_VSERVER u->xid = sk->sk_xid; u->nid = sk->sk_nid; #else u->xid = u->nid = 0; #endif /* * Exclude tcp states where sk points to a inet_timewait_sock which * has no sk_socket field (surely TCP_TIME_WAIT, perhaps more). * To be safe, use a whitelist and not a blacklist. * Before dereferencing sk_socket grab a lock on sk_callback_lock. * * Once again we need conditional code because the UID and GID * location changes between kernels. */ #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,28) /* use the current's real uid/gid */ #define _CURR_UID f_uid #define _CURR_GID f_gid #else /* 2.6.29 and above */ /* use the current's file access real uid/gid */ #define _CURR_UID f_cred->fsuid #define _CURR_GID f_cred->fsgid #endif #define GOOD_STATES ( \ (1<sk_callback_lock); if (sk->sk_socket && sk->sk_socket->file) { u->uid = sk->sk_socket->file->_CURR_UID; u->gid = sk->sk_socket->file->_CURR_GID; } read_unlock_bh(&sk->sk_callback_lock); } else { u->uid = u->gid = 0; } if (!skb->sk) /* return the reference that came from the lookup */ sock_put(sk); #undef GOOD_STATES #undef _CURR_UID #undef _CURR_GID return ret; #endif /* LINUX > 2.4 */ } /* * Now prepare to hook the various functions. * Linux 2.4 has a different API so we need some adaptation * for register and unregister hooks * * the unregister function changed arguments between 2.6.22 and 2.6.24 */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) static int nf_register_hooks(struct nf_hook_ops *ops, int n) { int i, ret = 0; for (i = 0; i < n; i++) { ret = nf_register_hook(ops + i); if (ret < 0) break; } return ret; } static void nf_unregister_hooks(struct nf_hook_ops *ops, int n) { int i; for (i = 0; i < n; i++) { nf_unregister_hook(ops + i); } } #define REG_QH_ARG(fn) fn, NULL /* argument for nf_[un]register_queue_handler */ #define UNREG_QH_ARG(fn) //fn /* argument for nf_[un]register_queue_handler */ #define SET_MOD_OWNER #else /* linux >= 2.6.0 */ struct nf_queue_handler ipfw2_queue_handler_desc = { .outfn = ipfw2_queue_handler, .name = "ipfw2 dummynet queue", }; #define REG_QH_ARG(fn) &(fn ## _desc) #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) #define UNREG_QH_ARG(fn) //fn /* argument for nf_[un]register_queue_handler */ #else #define UNREG_QH_ARG(fn) , &(fn ## _desc) #endif /* 2.6.0 < LINUX > 2.6.24 */ #define SET_MOD_OWNER .owner = THIS_MODULE, #endif /* !LINUX < 2.6.0 */ static struct nf_hook_ops ipfw_ops[] __read_mostly = { { .hook = call_ipfw, .pf = PF_INET, .hooknum = IPFW_HOOK_IN, .priority = NF_IP_PRI_FILTER, SET_MOD_OWNER }, { .hook = call_ipfw, .pf = PF_INET, .hooknum = NF_IP_POST_ROUTING, .priority = NF_IP_PRI_FILTER, SET_MOD_OWNER }, }; #endif /* !__linux__ */ /* descriptors for the children */ extern moduledata_t *moddesc_ipfw; extern moduledata_t *moddesc_dummynet; extern void rn_init(void); /* * Module glue - init and exit function. */ static int __init ipfw_module_init(void) { int ret = 0; printf("%s in-hook %d svn id %s\n", __FUNCTION__, IPFW_HOOK_IN, "$Id: ipfw2_mod.c 4490 2009-12-14 09:55:26Z marta $"); rn_init(); my_mod_register(moddesc_ipfw, "ipfw", 1); my_mod_register(moddesc_dummynet, "dummynet", 2); init_children(); #ifdef _WIN32 return ret; #else /* linux hook */ /* sockopt register, in order to talk with user space */ ret = nf_register_sockopt(&ipfw_sockopts); if (ret < 0) { printf("error %d in nf_register_sockopt\n", ret); goto clean_modules; } /* queue handler registration, in order to get network * packet under a private queue */ ret = nf_register_queue_handler(PF_INET, REG_QH_ARG(ipfw2_queue_handler) ); if (ret < 0) /* queue busy */ goto unregister_sockopt; ret = nf_register_hooks(ipfw_ops, ARRAY_SIZE(ipfw_ops)); if (ret < 0) goto unregister_sockopt; printf("%s loaded\n", __FUNCTION__); return 0; /* handle errors on load */ unregister_sockopt: nf_unregister_queue_handler(PF_INET UNREG_QH_ARG(ipfw2_queue_handler) ); nf_unregister_sockopt(&ipfw_sockopts); clean_modules: fini_children(); printf("%s error\n", __FUNCTION__); return ret; #endif /* linux */ } /* module shutdown */ static void __exit ipfw_module_exit(void) { #ifdef _WIN32 #else /* linux hook */ nf_unregister_hooks(ipfw_ops, ARRAY_SIZE(ipfw_ops)); /* maybe drain the queue before unregistering ? */ nf_unregister_queue_handler(PF_INET UNREG_QH_ARG(ipfw2_queue_handler) ); nf_unregister_sockopt(&ipfw_sockopts); #endif /* linux */ fini_children(); printf("%s unloaded\n", __FUNCTION__); } #ifdef __linux__ module_init(ipfw_module_init) module_exit(ipfw_module_exit) MODULE_LICENSE("Dual BSD/GPL"); /* the code here is all BSD. */ #endif ipfw_mod/dummynet/in_cksum.c000644 000423 000000 00000010322 11310145556 016735 0ustar00luigiwheel000000 000000 /*- * Copyright (c) 1988, 1992, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 */ #include __FBSDID("$FreeBSD: src/sys/netinet/in_cksum.c,v 1.10 2007/10/07 20:44:22 silby Exp $"); #include #include /* * Checksum routine for Internet Protocol family headers (Portable Version). * * This routine is very heavily used in the network * code and should be modified for each CPU to be as fast as possible. */ #define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) #define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);} int in_cksum(struct mbuf *m, int len) { register u_short *w; register int sum = 0; register int mlen = 0; int byte_swapped = 0; union { char c[2]; u_short s; } s_util; union { u_short s[2]; long l; } l_util; for (;m && len; m = m->m_next) { if (m->m_len == 0) continue; w = mtod(m, u_short *); if (mlen == -1) { /* * The first byte of this mbuf is the continuation * of a word spanning between this mbuf and the * last mbuf. * * s_util.c[0] is already saved when scanning previous * mbuf. */ s_util.c[1] = *(char *)w; sum += s_util.s; w = (u_short *)((char *)w + 1); mlen = m->m_len - 1; len--; } else mlen = m->m_len; if (len < mlen) mlen = len; len -= mlen; /* * Force to even boundary. */ #if defined(CONFIG_X86_64) if ((1 & (long) w) && (mlen > 0)) { #else if ((1 & (int) w) && (mlen > 0)) { #endif REDUCE; sum <<= 8; s_util.c[0] = *(u_char *)w; w = (u_short *)((char *)w + 1); mlen--; byte_swapped = 1; } /* * Unroll the loop to make overhead from * branches &c small. */ while ((mlen -= 32) >= 0) { sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11]; sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; w += 16; } mlen += 32; while ((mlen -= 8) >= 0) { sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; w += 4; } mlen += 8; if (mlen == 0 && byte_swapped == 0) continue; REDUCE; while ((mlen -= 2) >= 0) { sum += *w++; } if (byte_swapped) { REDUCE; sum <<= 8; byte_swapped = 0; if (mlen == -1) { s_util.c[1] = *(char *)w; sum += s_util.s; mlen = 0; } else mlen = -1; } else if (mlen == -1) s_util.c[0] = *(char *)w; } if (len) printf("cksum: out of data\n"); if (mlen == -1) { /* The last mbuf has odd # of bytes. Follow the standard (the odd byte may be shifted left by 8 bits or not as determined by endian-ness of the machine) */ s_util.c[1] = 0; sum += s_util.s; } REDUCE; return (~sum & 0xffff); } ipfw_mod/dummynet/ip_dummynet.c000644 000423 000000 00000202213 11310267635 017464 0ustar00luigiwheel000000 000000 /*- * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa * Portions Copyright (c) 2000 Akamba Corp. * All rights reserved * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.110.2.4 2008/10/31 12:58:12 oleg Exp $"); #define DUMMYNET_DEBUG #include "opt_inet6.h" /* * This module implements IP dummynet, a bandwidth limiter/delay emulator * used in conjunction with the ipfw package. * Description of the data structures used is in ip_dummynet.h * Here you mainly find the following blocks of code: * + variable declarations; * + heap management functions; * + scheduler and dummynet functions; * + configuration and initialization. * * NOTA BENE: critical sections are protected by the "dummynet lock". * * Most important Changes: * * 011004: KLDable * 010124: Fixed WF2Q behaviour * 010122: Fixed spl protection. * 000601: WF2Q support * 000106: large rewrite, use heaps to handle very many pipes. * 980513: initial release * * include files marked with XXX are probably not needed */ #include "missing.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */ #include #include #include /* ip_len, ip_off */ #include #include #include /* ip_output(), IP_FORWARDING */ #include /* various ether_* routines */ #include /* for ip6_input, ip6_output prototypes */ #include /* * We keep a private variable for the simulation time, but we could * probably use an existing one ("softticks" in sys/kern/kern_timeout.c) */ static dn_key curr_time = 0 ; /* current simulation time */ static int dn_hash_size = 64 ; /* default hash size */ /* statistics on number of queue searches and search steps */ static long searches, search_steps ; static int pipe_expire = 1 ; /* expire queue if empty */ static int dn_max_ratio = 16 ; /* max queues/buckets ratio */ static long pipe_slot_limit = 100; /* Foot shooting limit for pipe queues. */ static long pipe_byte_limit = 1024 * 1024; static int red_lookup_depth = 256; /* RED - default lookup table depth */ static int red_avg_pkt_size = 512; /* RED - default medium packet size */ static int red_max_pkt_size = 1500; /* RED - default max packet size */ static struct timeval prev_t, t; static long tick_last; /* Last tick duration (usec). */ static long tick_delta; /* Last vs standard tick diff (usec). */ static long tick_delta_sum; /* Accumulated tick difference (usec).*/ static long tick_adjustment; /* Tick adjustments done. */ static long tick_lost; /* Lost(coalesced) ticks number. */ /* Adjusted vs non-adjusted curr_time difference (ticks). */ static long tick_diff; static int io_fast; static unsigned long io_pkt; static unsigned long io_pkt_fast; static unsigned long io_pkt_drop; /* * Three heaps contain queues and pipes that the scheduler handles: * * ready_heap contains all dn_flow_queue related to fixed-rate pipes. * * wfq_ready_heap contains the pipes associated with WF2Q flows * * extract_heap contains pipes associated with delay lines. * */ MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap"); static struct dn_heap ready_heap, extract_heap, wfq_ready_heap ; static int heap_init(struct dn_heap *h, int size); static int heap_insert (struct dn_heap *h, dn_key key1, void *p); static void heap_extract(struct dn_heap *h, void *obj); static void transmit_event(struct dn_pipe *pipe, struct mbuf **head, struct mbuf **tail); static void ready_event(struct dn_flow_queue *q, struct mbuf **head, struct mbuf **tail); static void ready_event_wfq(struct dn_pipe *p, struct mbuf **head, struct mbuf **tail); #define HASHSIZE 16 #define HASH(num) ((((num) >> 8) ^ ((num) >> 4) ^ (num)) & 0x0f) static struct dn_pipe_head pipehash[HASHSIZE]; /* all pipes */ static struct dn_flow_set_head flowsethash[HASHSIZE]; /* all flowsets */ static struct callout dn_timeout; extern void (*bridge_dn_p)(struct mbuf *, struct ifnet *); #ifdef SYSCTL_NODE SYSCTL_DECL(_net_inet); SYSCTL_DECL(_net_inet_ip); SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size, CTLFLAG_RW, &dn_hash_size, 0, "Default hash table size"); #if 0 /* curr_time is 64 bit */ SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, curr_time, CTLFLAG_RD, &curr_time, 0, "Current tick"); #endif SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap, CTLFLAG_RD, &ready_heap.size, 0, "Size of ready heap"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap, CTLFLAG_RD, &extract_heap.size, 0, "Size of extract heap"); SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, searches, CTLFLAG_RD, &searches, 0, "Number of queue searches"); SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, search_steps, CTLFLAG_RD, &search_steps, 0, "Number of queue search steps"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire, CTLFLAG_RW, &pipe_expire, 0, "Expire queue if empty"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, max_chain_len, CTLFLAG_RW, &dn_max_ratio, 0, "Max ratio between dynamic queues and buckets"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth, CTLFLAG_RD, &red_lookup_depth, 0, "Depth of RED lookup table"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size, CTLFLAG_RD, &red_avg_pkt_size, 0, "RED Medium packet size"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size, CTLFLAG_RD, &red_max_pkt_size, 0, "RED Max packet size"); SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta, CTLFLAG_RD, &tick_delta, 0, "Last vs standard tick difference (usec)."); SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta_sum, CTLFLAG_RD, &tick_delta_sum, 0, "Accumulated tick difference (usec)."); SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_adjustment, CTLFLAG_RD, &tick_adjustment, 0, "Tick adjustments done."); SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_diff, CTLFLAG_RD, &tick_diff, 0, "Adjusted vs non-adjusted curr_time difference (ticks)."); SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_lost, CTLFLAG_RD, &tick_lost, 0, "Number of ticks coalesced by dummynet taskqueue."); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, io_fast, CTLFLAG_RW, &io_fast, 0, "Enable fast dummynet io."); SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt, CTLFLAG_RD, &io_pkt, 0, "Number of packets passed to dummynet."); SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_fast, CTLFLAG_RD, &io_pkt_fast, 0, "Number of packets bypassed dummynet scheduler."); SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_drop, CTLFLAG_RD, &io_pkt_drop, 0, "Number of packets dropped by dummynet."); SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_slot_limit, CTLFLAG_RW, &pipe_slot_limit, 0, "Upper limit in slots for pipe queue."); SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_byte_limit, CTLFLAG_RW, &pipe_byte_limit, 0, "Upper limit in bytes for pipe queue."); #endif #ifdef DUMMYNET_DEBUG int dummynet_debug = 0; #ifdef SYSCTL_NODE SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug, CTLFLAG_RW, &dummynet_debug, 0, "control debugging printfs"); #endif #define DPRINTF(X) if (dummynet_debug) printf X #else #define DPRINTF(X) #endif static struct task dn_task; static struct taskqueue *dn_tq = NULL; static void dummynet_task(void *, int); #if defined( __linux__ ) || defined( _WIN32 ) static DEFINE_SPINLOCK(dummynet_mtx); #else static struct mtx dummynet_mtx; #endif #define DUMMYNET_LOCK_INIT() \ mtx_init(&dummynet_mtx, "dummynet", NULL, MTX_DEF) #define DUMMYNET_LOCK_DESTROY() mtx_destroy(&dummynet_mtx) #define DUMMYNET_LOCK() mtx_lock(&dummynet_mtx) #define DUMMYNET_UNLOCK() mtx_unlock(&dummynet_mtx) #define DUMMYNET_LOCK_ASSERT() mtx_assert(&dummynet_mtx, MA_OWNED) static int config_pipe(struct dn_pipe *p); static int ip_dn_ctl(struct sockopt *sopt); static void dummynet(void *); static void dummynet_flush(void); static void dummynet_send(struct mbuf *); void dummynet_drain(void); static int dummynet_io(struct mbuf **, int , struct ip_fw_args *); /* * Flow queue is idle if: * 1) it's empty for at least 1 tick * 2) it has invalid timestamp (WF2Q case) * 3) parent pipe has no 'exhausted' burst. */ #define QUEUE_IS_IDLE(q) ((q)->head == NULL && (q)->S == (q)->F + 1 && \ curr_time > (q)->idle_time + 1 && \ ((q)->numbytes + (curr_time - (q)->idle_time - 1) * \ (q)->fs->pipe->bandwidth >= (q)->fs->pipe->burst)) /* * Heap management functions. * * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2. * Some macros help finding parent/children so we can optimize them. * * heap_init() is called to expand the heap when needed. * Increment size in blocks of 16 entries. * XXX failure to allocate a new element is a pretty bad failure * as we basically stall a whole queue forever!! * Returns 1 on error, 0 on success */ #define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 ) #define HEAP_LEFT(x) ( 2*(x) + 1 ) #define HEAP_IS_LEFT(x) ( (x) & 1 ) #define HEAP_RIGHT(x) ( 2*(x) + 2 ) #define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; } #define HEAP_INCREMENT 15 static int heap_init(struct dn_heap *h, int new_size) { struct dn_heap_entry *p; if (h->size >= new_size ) { printf("dummynet: %s, Bogus call, have %d want %d\n", __func__, h->size, new_size); return 0 ; } new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT ; p = malloc(new_size * sizeof(*p), M_DUMMYNET, M_NOWAIT); if (p == NULL) { printf("dummynet: %s, resize %d failed\n", __func__, new_size ); return 1 ; /* error */ } if (h->size > 0) { bcopy(h->p, p, h->size * sizeof(*p) ); free(h->p, M_DUMMYNET); } h->p = p ; h->size = new_size ; return 0 ; } /* * Insert element in heap. Normally, p != NULL, we insert p in * a new position and bubble up. If p == NULL, then the element is * already in place, and key is the position where to start the * bubble-up. * Returns 1 on failure (cannot allocate new heap entry) * * If offset > 0 the position (index, int) of the element in the heap is * also stored in the element itself at the given offset in bytes. */ #define SET_OFFSET(heap, node) \ if (heap->offset > 0) \ *((int *)((char *)(heap->p[node].object) + heap->offset)) = node ; /* * RESET_OFFSET is used for sanity checks. It sets offset to an invalid value. */ #define RESET_OFFSET(heap, node) \ if (heap->offset > 0) \ *((int *)((char *)(heap->p[node].object) + heap->offset)) = -1 ; static int heap_insert(struct dn_heap *h, dn_key key1, void *p) { int son = h->elements ; if (p == NULL) /* data already there, set starting point */ son = key1 ; else { /* insert new element at the end, possibly resize */ son = h->elements ; if (son == h->size) /* need resize... */ if (heap_init(h, h->elements+1) ) return 1 ; /* failure... */ h->p[son].object = p ; h->p[son].key = key1 ; h->elements++ ; } while (son > 0) { /* bubble up */ int father = HEAP_FATHER(son) ; struct dn_heap_entry tmp ; if (DN_KEY_LT( h->p[father].key, h->p[son].key ) ) break ; /* found right position */ /* son smaller than father, swap and repeat */ HEAP_SWAP(h->p[son], h->p[father], tmp) ; SET_OFFSET(h, son); son = father ; } SET_OFFSET(h, son); return 0 ; } /* * remove top element from heap, or obj if obj != NULL */ static void heap_extract(struct dn_heap *h, void *obj) { int child, father, max = h->elements - 1 ; if (max < 0) { printf("dummynet: warning, extract from empty heap 0x%p\n", h); return ; } father = 0 ; /* default: move up smallest child */ if (obj != NULL) { /* extract specific element, index is at offset */ if (h->offset <= 0) panic("dummynet: heap_extract from middle not supported on this heap!!!\n"); father = *((int *)((char *)obj + h->offset)) ; if (father < 0 || father >= h->elements) { printf("dummynet: heap_extract, father %d out of bound 0..%d\n", father, h->elements); panic("dummynet: heap_extract"); } } RESET_OFFSET(h, father); child = HEAP_LEFT(father) ; /* left child */ while (child <= max) { /* valid entry */ if (child != max && DN_KEY_LT(h->p[child+1].key, h->p[child].key) ) child = child+1 ; /* take right child, otherwise left */ h->p[father] = h->p[child] ; SET_OFFSET(h, father); father = child ; child = HEAP_LEFT(child) ; /* left child for next loop */ } h->elements-- ; if (father != max) { /* * Fill hole with last entry and bubble up, reusing the insert code */ h->p[father] = h->p[max] ; heap_insert(h, father, NULL); /* this one cannot fail */ } } #if 0 /* * change object position and update references * XXX this one is never used! */ static void heap_move(struct dn_heap *h, dn_key new_key, void *object) { int temp; int i ; int max = h->elements-1 ; struct dn_heap_entry buf ; if (h->offset <= 0) panic("cannot move items on this heap"); i = *((int *)((char *)object + h->offset)); if (DN_KEY_LT(new_key, h->p[i].key) ) { /* must move up */ h->p[i].key = new_key ; for (; i>0 && DN_KEY_LT(new_key, h->p[(temp = HEAP_FATHER(i))].key) ; i = temp ) { /* bubble up */ HEAP_SWAP(h->p[i], h->p[temp], buf) ; SET_OFFSET(h, i); } } else { /* must move down */ h->p[i].key = new_key ; while ( (temp = HEAP_LEFT(i)) <= max ) { /* found left child */ if ((temp != max) && DN_KEY_GT(h->p[temp].key, h->p[temp+1].key)) temp++ ; /* select child with min key */ if (DN_KEY_GT(new_key, h->p[temp].key)) { /* go down */ HEAP_SWAP(h->p[i], h->p[temp], buf) ; SET_OFFSET(h, i); } else break ; i = temp ; } } SET_OFFSET(h, i); } #endif /* heap_move, unused */ /* * heapify() will reorganize data inside an array to maintain the * heap property. It is needed when we delete a bunch of entries. */ static void heapify(struct dn_heap *h) { int i ; for (i = 0 ; i < h->elements ; i++ ) heap_insert(h, i , NULL) ; } /* * cleanup the heap and free data structure */ static void heap_free(struct dn_heap *h) { if (h->size >0 ) free(h->p, M_DUMMYNET); bzero(h, sizeof(*h) ); } /* * --- end of heap management functions --- */ /* * Dispose a packet in dummynet. Use an inline functions so if we * need to free extra state associated to a packet, this is a * central point to do it. */ static __inline void *dn_free_pkt(struct mbuf *m) { #ifdef __linux__ netisr_dispatch(-1, m); /* -1 drop the packet */ #else m_freem(m); #endif return NULL; } static __inline void dn_free_pkts(struct mbuf *mnext) { struct mbuf *m; while ((m = mnext) != NULL) { mnext = m->m_nextpkt; dn_free_pkt(m); } } /* * Return the mbuf tag holding the dummynet state. As an optimization * this is assumed to be the first tag on the list. If this turns out * wrong we'll need to search the list. */ static struct dn_pkt_tag * dn_tag_get(struct mbuf *m) { struct m_tag *mtag = m_tag_first(m); KASSERT(mtag != NULL && mtag->m_tag_cookie == MTAG_ABI_COMPAT && mtag->m_tag_id == PACKET_TAG_DUMMYNET, ("packet on dummynet queue w/o dummynet tag!")); return (struct dn_pkt_tag *)(mtag+1); } /* * Scheduler functions: * * transmit_event() is called when the delay-line needs to enter * the scheduler, either because of existing pkts getting ready, * or new packets entering the queue. The event handled is the delivery * time of the packet. * * ready_event() does something similar with fixed-rate queues, and the * event handled is the finish time of the head pkt. * * wfq_ready_event() does something similar with WF2Q queues, and the * event handled is the start time of the head pkt. * * In all cases, we make sure that the data structures are consistent * before passing pkts out, because this might trigger recursive * invocations of the procedures. */ static void transmit_event(struct dn_pipe *pipe, struct mbuf **head, struct mbuf **tail) { struct mbuf *m; struct dn_pkt_tag *pkt; DUMMYNET_LOCK_ASSERT(); while ((m = pipe->head) != NULL) { pkt = dn_tag_get(m); if (!DN_KEY_LEQ(pkt->output_time, curr_time)) break; pipe->head = m->m_nextpkt; if (*tail != NULL) (*tail)->m_nextpkt = m; else *head = m; *tail = m; } if (*tail != NULL) (*tail)->m_nextpkt = NULL; /* If there are leftover packets, put into the heap for next event. */ if ((m = pipe->head) != NULL) { pkt = dn_tag_get(m); /* * XXX Should check errors on heap_insert, by draining the * whole pipe p and hoping in the future we are more successful. */ heap_insert(&extract_heap, pkt->output_time, pipe); } } #ifndef __linux__ #define div64(a, b) ((int64_t)(a) / (int64_t)(b)) #endif #define DN_TO_DROP 0xffff /* * Compute how many ticks we have to wait before being able to send * a packet. This is computed as the "wire time" for the packet * (length + extra bits), minus the credit available, scaled to ticks. * Check that the result is not be negative (it could be if we have * too much leftover credit in q->numbytes). */ static inline dn_key set_ticks(struct mbuf *m, struct dn_flow_queue *q, struct dn_pipe *p) { int64_t ret; ret = div64( (m->m_pkthdr.len * 8 + q->extra_bits) * hz - q->numbytes + p->bandwidth - 1 , p->bandwidth); #if 0 printf("%s %d extra_bits %d numb %d ret %d\n", __FUNCTION__, __LINE__, (int)(q->extra_bits & 0xffffffff), (int)(q->numbytes & 0xffffffff), (int)(ret & 0xffffffff)); #endif if (ret < 0) ret = 0; return ret; } /* * Convert the additional MAC overheads/delays into an equivalent * number of bits for the given data rate. The samples are in milliseconds * so we need to divide by 1000. */ static dn_key compute_extra_bits(struct mbuf *pkt, struct dn_pipe *p) { int index; dn_key extra_bits; if (!p->samples || p->samples_no == 0) return 0; index = random() % p->samples_no; extra_bits = div64((dn_key)p->samples[index] * p->bandwidth, 1000); if (index >= p->loss_level) { struct dn_pkt_tag *dt = dn_tag_get(pkt); if (dt) dt->dn_dir = DN_TO_DROP; } return extra_bits; } static void free_pipe(struct dn_pipe *p) { if (p->samples) free(p->samples, M_DUMMYNET); free(p, M_DUMMYNET); } /* * extract pkt from queue, compute output time (could be now) * and put into delay line (p_queue) */ static void move_pkt(struct mbuf *pkt, struct dn_flow_queue *q, struct dn_pipe *p, int len) { struct dn_pkt_tag *dt = dn_tag_get(pkt); q->head = pkt->m_nextpkt ; q->len-- ; q->len_bytes -= len ; dt->output_time = curr_time + p->delay ; if (p->head == NULL) p->head = pkt; else p->tail->m_nextpkt = pkt; p->tail = pkt; p->tail->m_nextpkt = NULL; } /* * ready_event() is invoked every time the queue must enter the * scheduler, either because the first packet arrives, or because * a previously scheduled event fired. * On invokation, drain as many pkts as possible (could be 0) and then * if there are leftover packets reinsert the pkt in the scheduler. */ static void ready_event(struct dn_flow_queue *q, struct mbuf **head, struct mbuf **tail) { struct mbuf *pkt; struct dn_pipe *p = q->fs->pipe; int p_was_empty; DUMMYNET_LOCK_ASSERT(); if (p == NULL) { printf("dummynet: ready_event- pipe is gone\n"); return; } p_was_empty = (p->head == NULL); /* * Schedule fixed-rate queues linked to this pipe: * account for the bw accumulated since last scheduling, then * drain as many pkts as allowed by q->numbytes and move to * the delay line (in p) computing output time. * bandwidth==0 (no limit) means we can drain the whole queue, * setting len_scaled = 0 does the job. */ q->numbytes += (curr_time - q->sched_time) * p->bandwidth; while ((pkt = q->head) != NULL) { int len = pkt->m_pkthdr.len; dn_key len_scaled = p->bandwidth ? len*8*hz + q->extra_bits*hz : 0; if (DN_KEY_GT(len_scaled, q->numbytes)) break; q->numbytes -= len_scaled; move_pkt(pkt, q, p, len); if (q->head) q->extra_bits = compute_extra_bits(q->head, p); } /* * If we have more packets queued, schedule next ready event * (can only occur when bandwidth != 0, otherwise we would have * flushed the whole queue in the previous loop). * To this purpose we record the current time and compute how many * ticks to go for the finish time of the packet. */ if ((pkt = q->head) != NULL) { /* this implies bandwidth != 0 */ dn_key t = set_ticks(pkt, q, p); /* ticks i have to wait */ q->sched_time = curr_time; heap_insert(&ready_heap, curr_time + t, (void *)q); /* * XXX Should check errors on heap_insert, and drain the whole * queue on error hoping next time we are luckier. */ } else /* RED needs to know when the queue becomes empty. */ q->idle_time = curr_time; /* * If the delay line was empty call transmit_event() now. * Otherwise, the scheduler will take care of it. */ if (p_was_empty) transmit_event(p, head, tail); } /* * Called when we can transmit packets on WF2Q queues. Take pkts out of * the queues at their start time, and enqueue into the delay line. * Packets are drained until p->numbytes < 0. As long as * len_scaled >= p->numbytes, the packet goes into the delay line * with a deadline p->delay. For the last packet, if p->numbytes < 0, * there is an additional delay. */ static void ready_event_wfq(struct dn_pipe *p, struct mbuf **head, struct mbuf **tail) { int p_was_empty = (p->head == NULL); struct dn_heap *sch = &(p->scheduler_heap); struct dn_heap *neh = &(p->not_eligible_heap); int64_t p_numbytes = p->numbytes; /* * p->numbytes is only 32bits in FBSD7, but we might need 64 bits. * Use a local variable for the computations, and write back the * results when done, saturating if needed. * The local variable has no impact on performance and helps * reducing diffs between the various branches. */ DUMMYNET_LOCK_ASSERT(); if (p->if_name[0] == 0) /* tx clock is simulated */ p_numbytes += (curr_time - p->sched_time) * p->bandwidth; else { /* * tx clock is for real, * the ifq must be empty or this is a NOP. * XXX not supported in Linux */ if (1) // p->ifp && p->ifp->if_snd.ifq_head != NULL) return; else { DPRINTF(("dummynet: pipe %d ready from %s --\n", p->pipe_nr, p->if_name)); } } /* * While we have backlogged traffic AND credit, we need to do * something on the queue. */ while (p_numbytes >= 0 && (sch->elements > 0 || neh->elements > 0)) { if (sch->elements > 0) { /* Have some eligible pkts to send out. */ struct dn_flow_queue *q = sch->p[0].object; struct mbuf *pkt = q->head; struct dn_flow_set *fs = q->fs; uint64_t len = pkt->m_pkthdr.len; int len_scaled = p->bandwidth ? len * 8 * hz : 0; heap_extract(sch, NULL); /* Remove queue from heap. */ p_numbytes -= len_scaled; move_pkt(pkt, q, p, len); p->V += div64((len << MY_M), p->sum); /* Update V. */ q->S = q->F; /* Update start time. */ if (q->len == 0) { /* Flow not backlogged any more. */ fs->backlogged--; heap_insert(&(p->idle_heap), q->F, q); } else { /* Still backlogged. */ /* * Update F and position in backlogged queue, * then put flow in not_eligible_heap * (we will fix this later). */ len = (q->head)->m_pkthdr.len; q->F += div64((len << MY_M), fs->weight); if (DN_KEY_LEQ(q->S, p->V)) heap_insert(neh, q->S, q); else heap_insert(sch, q->F, q); } } /* * Now compute V = max(V, min(S_i)). Remember that all elements * in sch have by definition S_i <= V so if sch is not empty, * V is surely the max and we must not update it. Conversely, * if sch is empty we only need to look at neh. */ if (sch->elements == 0 && neh->elements > 0) p->V = MAX64(p->V, neh->p[0].key); /* Move from neh to sch any packets that have become eligible */ while (neh->elements > 0 && DN_KEY_LEQ(neh->p[0].key, p->V)) { struct dn_flow_queue *q = neh->p[0].object; heap_extract(neh, NULL); heap_insert(sch, q->F, q); } if (p->if_name[0] != '\0') { /* Tx clock is from a real thing */ p_numbytes = -1; /* Mark not ready for I/O. */ break; } } if (sch->elements == 0 && neh->elements == 0 && p_numbytes >= 0) { p->idle_time = curr_time; /* * No traffic and no events scheduled. * We can get rid of idle-heap. */ if (p->idle_heap.elements > 0) { int i; for (i = 0; i < p->idle_heap.elements; i++) { struct dn_flow_queue *q; q = p->idle_heap.p[i].object; q->F = 0; q->S = q->F + 1; } p->sum = 0; p->V = 0; p->idle_heap.elements = 0; } } /* * If we are getting clocks from dummynet (not a real interface) and * If we are under credit, schedule the next ready event. * Also fix the delivery time of the last packet. */ if (p->if_name[0]==0 && p_numbytes < 0) { /* This implies bw > 0. */ dn_key t = 0; /* Number of ticks i have to wait. */ if (p->bandwidth > 0) t = div64(p->bandwidth - 1 - p_numbytes, p->bandwidth); dn_tag_get(p->tail)->output_time += t; p->sched_time = curr_time; heap_insert(&wfq_ready_heap, curr_time + t, (void *)p); /* * XXX Should check errors on heap_insert, and drain the whole * queue on error hoping next time we are luckier. */ } /* Write back p_numbytes (adjust 64->32bit if necessary). */ p->numbytes = p_numbytes; /* * If the delay line was empty call transmit_event() now. * Otherwise, the scheduler will take care of it. */ if (p_was_empty) transmit_event(p, head, tail); } /* * This is called one tick, after previous run. It is used to * schedule next run. */ static void dummynet(void * __unused unused) { taskqueue_enqueue(dn_tq, &dn_task); } /* * The main dummynet processing function. */ static void dummynet_task(void *context, int pending) { struct mbuf *head = NULL, *tail = NULL; struct dn_pipe *pipe; struct dn_heap *heaps[3]; struct dn_heap *h; void *p; /* generic parameter to handler */ int i; DUMMYNET_LOCK(); heaps[0] = &ready_heap; /* fixed-rate queues */ heaps[1] = &wfq_ready_heap; /* wfq queues */ heaps[2] = &extract_heap; /* delay line */ /* Update number of lost(coalesced) ticks. */ tick_lost += pending - 1; getmicrouptime(&t); /* Last tick duration (usec). */ tick_last = (t.tv_sec - prev_t.tv_sec) * 1000000 + (t.tv_usec - prev_t.tv_usec); /* Last tick vs standard tick difference (usec). */ tick_delta = (tick_last * hz - 1000000) / hz; /* Accumulated tick difference (usec). */ tick_delta_sum += tick_delta; prev_t = t; /* * Adjust curr_time if accumulated tick difference greater than * 'standard' tick. Since curr_time should be monotonically increasing, * we do positive adjustment as required and throttle curr_time in * case of negative adjustment. */ curr_time++; if (tick_delta_sum - tick >= 0) { int diff = tick_delta_sum / tick; curr_time += diff; tick_diff += diff; tick_delta_sum %= tick; tick_adjustment++; } else if (tick_delta_sum + tick <= 0) { curr_time--; tick_diff--; tick_delta_sum += tick; tick_adjustment++; } for (i = 0; i < 3; i++) { h = heaps[i]; while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time)) { if (h->p[0].key > curr_time) printf("dummynet: warning, " "heap %d is %d ticks late\n", i, (int)(curr_time - h->p[0].key)); /* store a copy before heap_extract */ p = h->p[0].object; /* need to extract before processing */ heap_extract(h, NULL); if (i == 0) ready_event(p, &head, &tail); else if (i == 1) { struct dn_pipe *pipe = p; if (pipe->if_name[0] != '\0') printf("dummynet: bad ready_event_wfq " "for pipe %s\n", pipe->if_name); else ready_event_wfq(p, &head, &tail); } else transmit_event(p, &head, &tail); } } /* Sweep pipes trying to expire idle flow_queues. */ for (i = 0; i < HASHSIZE; i++) SLIST_FOREACH(pipe, &pipehash[i], next) if (pipe->idle_heap.elements > 0 && DN_KEY_LT(pipe->idle_heap.p[0].key, pipe->V)) { struct dn_flow_queue *q = pipe->idle_heap.p[0].object; heap_extract(&(pipe->idle_heap), NULL); /* Mark timestamp as invalid. */ q->S = q->F + 1; pipe->sum -= q->fs->weight; } DUMMYNET_UNLOCK(); if (head != NULL) dummynet_send(head); callout_reset(&dn_timeout, 1, dummynet, NULL); } static void dummynet_send(struct mbuf *m) { struct dn_pkt_tag *pkt; struct mbuf *n; struct ip *ip; int dst; for (; m != NULL; m = n) { n = m->m_nextpkt; m->m_nextpkt = NULL; if (m_tag_first(m) == NULL) { pkt = NULL; /* probably unnecessary */ dst = DN_TO_DROP; } else { pkt = dn_tag_get(m); dst = pkt->dn_dir; } switch (dst) { case DN_TO_IP_OUT: ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL); break ; case DN_TO_IP_IN : ip = mtod(m, struct ip *); #ifndef __linux__ /* restore net format for FreeBSD */ ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); #endif netisr_dispatch(NETISR_IP, m); break; #ifdef INET6 case DN_TO_IP6_IN: netisr_dispatch(NETISR_IPV6, m); break; case DN_TO_IP6_OUT: ip6_output(m, NULL, NULL, IPV6_FORWARDING, NULL, NULL, NULL); break; #endif case DN_TO_IFB_FWD: if (bridge_dn_p != NULL) ((*bridge_dn_p)(m, pkt->ifp)); else printf("dummynet: if_bridge not loaded\n"); break; case DN_TO_ETH_DEMUX: /* * The Ethernet code assumes the Ethernet header is * contiguous in the first mbuf header. * Insure this is true. */ if (m->m_len < ETHER_HDR_LEN && (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) { printf("dummynet/ether: pullup failed, " "dropping packet\n"); break; } ether_demux(m->m_pkthdr.rcvif, m); break; case DN_TO_ETH_OUT: ether_output_frame(pkt->ifp, m); break; case DN_TO_DROP: /* drop the packet after some time */ dn_free_pkt(m); break; default: printf("dummynet: bad switch %d!\n", pkt->dn_dir); dn_free_pkt(m); break; } } } /* * Unconditionally expire empty queues in case of shortage. * Returns the number of queues freed. */ static int expire_queues(struct dn_flow_set *fs) { struct dn_flow_queue *q, *prev ; int i, initial_elements = fs->rq_elements ; if (fs->last_expired == time_uptime) return 0 ; fs->last_expired = time_uptime ; for (i = 0 ; i <= fs->rq_size ; i++) /* last one is overflow */ for (prev=NULL, q = fs->rq[i] ; q != NULL ; ) if (!QUEUE_IS_IDLE(q)) { prev = q ; q = q->next ; } else { /* entry is idle, expire it */ struct dn_flow_queue *old_q = q ; if (prev != NULL) prev->next = q = q->next ; else fs->rq[i] = q = q->next ; fs->rq_elements-- ; free(old_q, M_DUMMYNET); } return initial_elements - fs->rq_elements ; } /* * If room, create a new queue and put at head of slot i; * otherwise, create or use the default queue. */ static struct dn_flow_queue * create_queue(struct dn_flow_set *fs, int i) { struct dn_flow_queue *q; if (fs->rq_elements > fs->rq_size * dn_max_ratio && expire_queues(fs) == 0) { /* No way to get room, use or create overflow queue. */ i = fs->rq_size; if (fs->rq[i] != NULL) return fs->rq[i]; } q = malloc(sizeof(*q), M_DUMMYNET, M_NOWAIT | M_ZERO); if (q == NULL) { printf("dummynet: sorry, cannot allocate queue for new flow\n"); return (NULL); } q->fs = fs; q->hash_slot = i; q->next = fs->rq[i]; q->S = q->F + 1; /* hack - mark timestamp as invalid. */ q->numbytes = fs->pipe->burst + (io_fast ? fs->pipe->bandwidth : 0); fs->rq[i] = q; fs->rq_elements++; return (q); } /* * Given a flow_set and a pkt in last_pkt, find a matching queue * after appropriate masking. The queue is moved to front * so that further searches take less time. */ static struct dn_flow_queue * find_queue(struct dn_flow_set *fs, struct ipfw_flow_id *id) { int i = 0 ; /* we need i and q for new allocations */ struct dn_flow_queue *q, *prev; int is_v6 = IS_IP6_FLOW_ID(id); if ( !(fs->flags_fs & DN_HAVE_FLOW_MASK) ) q = fs->rq[0] ; else { /* first, do the masking, then hash */ id->dst_port &= fs->flow_mask.dst_port ; id->src_port &= fs->flow_mask.src_port ; id->proto &= fs->flow_mask.proto ; id->flags = 0 ; /* we don't care about this one */ if (is_v6) { APPLY_MASK(&id->dst_ip6, &fs->flow_mask.dst_ip6); APPLY_MASK(&id->src_ip6, &fs->flow_mask.src_ip6); id->flow_id6 &= fs->flow_mask.flow_id6; i = ((id->dst_ip6.__u6_addr.__u6_addr32[0]) & 0xffff)^ ((id->dst_ip6.__u6_addr.__u6_addr32[1]) & 0xffff)^ ((id->dst_ip6.__u6_addr.__u6_addr32[2]) & 0xffff)^ ((id->dst_ip6.__u6_addr.__u6_addr32[3]) & 0xffff)^ ((id->dst_ip6.__u6_addr.__u6_addr32[0] >> 15) & 0xffff)^ ((id->dst_ip6.__u6_addr.__u6_addr32[1] >> 15) & 0xffff)^ ((id->dst_ip6.__u6_addr.__u6_addr32[2] >> 15) & 0xffff)^ ((id->dst_ip6.__u6_addr.__u6_addr32[3] >> 15) & 0xffff)^ ((id->src_ip6.__u6_addr.__u6_addr32[0] << 1) & 0xfffff)^ ((id->src_ip6.__u6_addr.__u6_addr32[1] << 1) & 0xfffff)^ ((id->src_ip6.__u6_addr.__u6_addr32[2] << 1) & 0xfffff)^ ((id->src_ip6.__u6_addr.__u6_addr32[3] << 1) & 0xfffff)^ ((id->src_ip6.__u6_addr.__u6_addr32[0] << 16) & 0xffff)^ ((id->src_ip6.__u6_addr.__u6_addr32[1] << 16) & 0xffff)^ ((id->src_ip6.__u6_addr.__u6_addr32[2] << 16) & 0xffff)^ ((id->src_ip6.__u6_addr.__u6_addr32[3] << 16) & 0xffff)^ (id->dst_port << 1) ^ (id->src_port) ^ (id->proto ) ^ (id->flow_id6); } else { id->dst_ip &= fs->flow_mask.dst_ip ; id->src_ip &= fs->flow_mask.src_ip ; i = ( (id->dst_ip) & 0xffff ) ^ ( (id->dst_ip >> 15) & 0xffff ) ^ ( (id->src_ip << 1) & 0xffff ) ^ ( (id->src_ip >> 16 ) & 0xffff ) ^ (id->dst_port << 1) ^ (id->src_port) ^ (id->proto ); } i = i % fs->rq_size ; /* finally, scan the current list for a match */ searches++ ; for (prev=NULL, q = fs->rq[i] ; q ; ) { search_steps++; if (is_v6 && IN6_ARE_ADDR_EQUAL(&id->dst_ip6,&q->id.dst_ip6) && IN6_ARE_ADDR_EQUAL(&id->src_ip6,&q->id.src_ip6) && id->dst_port == q->id.dst_port && id->src_port == q->id.src_port && id->proto == q->id.proto && id->flags == q->id.flags && id->flow_id6 == q->id.flow_id6) break ; /* found */ if (!is_v6 && id->dst_ip == q->id.dst_ip && id->src_ip == q->id.src_ip && id->dst_port == q->id.dst_port && id->src_port == q->id.src_port && id->proto == q->id.proto && id->flags == q->id.flags) break ; /* found */ /* No match. Check if we can expire the entry */ if (pipe_expire && QUEUE_IS_IDLE(q)) { /* entry is idle and not in any heap, expire it */ struct dn_flow_queue *old_q = q ; if (prev != NULL) prev->next = q = q->next ; else fs->rq[i] = q = q->next ; fs->rq_elements-- ; free(old_q, M_DUMMYNET); continue ; } prev = q ; q = q->next ; } if (q && prev != NULL) { /* found and not in front */ prev->next = q->next ; q->next = fs->rq[i] ; fs->rq[i] = q ; } } if (q == NULL) { /* no match, need to allocate a new entry */ q = create_queue(fs, i); if (q != NULL) q->id = *id ; } return q ; } static int red_drops(struct dn_flow_set *fs, struct dn_flow_queue *q, int len) { /* * RED algorithm * * RED calculates the average queue size (avg) using a low-pass filter * with an exponential weighted (w_q) moving average: * avg <- (1-w_q) * avg + w_q * q_size * where q_size is the queue length (measured in bytes or * packets). * * If q_size == 0, we compute the idle time for the link, and set * avg = (1 - w_q)^(idle/s) * where s is the time needed for transmitting a medium-sized packet. * * Now, if avg < min_th the packet is enqueued. * If avg > max_th the packet is dropped. Otherwise, the packet is * dropped with probability P function of avg. */ int64_t p_b = 0; /* Queue in bytes or packets? */ u_int q_size = (fs->flags_fs & DN_QSIZE_IS_BYTES) ? q->len_bytes : q->len; DPRINTF(("\ndummynet: %d q: %2u ", (int)curr_time, q_size)); /* Average queue size estimation. */ if (q_size != 0) { /* Queue is not empty, avg <- avg + (q_size - avg) * w_q */ int diff = SCALE(q_size) - q->avg; int64_t v = SCALE_MUL((int64_t)diff, (int64_t)fs->w_q); q->avg += (int)v; } else { /* * Queue is empty, find for how long the queue has been * empty and use a lookup table for computing * (1 - * w_q)^(idle_time/s) where s is the time to send a * (small) packet. * XXX check wraps... */ if (q->avg) { u_int t = div64(curr_time - q->idle_time, fs->lookup_step); q->avg = (t < fs->lookup_depth) ? SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0; } } DPRINTF(("dummynet: avg: %u ", SCALE_VAL(q->avg))); /* Should i drop? */ if (q->avg < fs->min_th) { q->count = -1; return (0); /* accept packet */ } if (q->avg >= fs->max_th) { /* average queue >= max threshold */ if (fs->flags_fs & DN_IS_GENTLE_RED) { /* * According to Gentle-RED, if avg is greater than * max_th the packet is dropped with a probability * p_b = c_3 * avg - c_4 * where c_3 = (1 - max_p) / max_th * c_4 = 1 - 2 * max_p */ p_b = SCALE_MUL((int64_t)fs->c_3, (int64_t)q->avg) - fs->c_4; } else { q->count = -1; DPRINTF(("dummynet: - drop")); return (1); } } else if (q->avg > fs->min_th) { /* * We compute p_b using the linear dropping function * p_b = c_1 * avg - c_2 * where c_1 = max_p / (max_th - min_th) * c_2 = max_p * min_th / (max_th - min_th) */ p_b = SCALE_MUL((int64_t)fs->c_1, (int64_t)q->avg) - fs->c_2; } if (fs->flags_fs & DN_QSIZE_IS_BYTES) p_b = div64(p_b * len, fs->max_pkt_size); if (++q->count == 0) q->random = random() & 0xffff; else { /* * q->count counts packets arrived since last drop, so a greater * value of q->count means a greater packet drop probability. */ if (SCALE_MUL(p_b, SCALE((int64_t)q->count)) > q->random) { q->count = 0; DPRINTF(("dummynet: - red drop")); /* After a drop we calculate a new random value. */ q->random = random() & 0xffff; return (1); /* drop */ } } /* End of RED algorithm. */ return (0); /* accept */ } static __inline struct dn_flow_set * locate_flowset(int fs_nr) { struct dn_flow_set *fs; SLIST_FOREACH(fs, &flowsethash[HASH(fs_nr)], next) if (fs->fs_nr == fs_nr) return (fs); return (NULL); } static __inline struct dn_pipe * locate_pipe(int pipe_nr) { struct dn_pipe *pipe; SLIST_FOREACH(pipe, &pipehash[HASH(pipe_nr)], next) if (pipe->pipe_nr == pipe_nr) return (pipe); return (NULL); } /* * dummynet hook for packets. Below 'pipe' is a pipe or a queue * depending on whether WF2Q or fixed bw is used. * * pipe_nr pipe or queue the packet is destined for. * dir where shall we send the packet after dummynet. * m the mbuf with the packet * ifp the 'ifp' parameter from the caller. * NULL in ip_input, destination interface in ip_output, * rule matching rule, in case of multiple passes */ static int dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa) { struct mbuf *m = *m0, *head = NULL, *tail = NULL; struct dn_pkt_tag *pkt; struct m_tag *mtag; struct dn_flow_set *fs = NULL; struct dn_pipe *pipe; uint64_t len = m->m_pkthdr.len; struct dn_flow_queue *q = NULL; int is_pipe; ipfw_insn *cmd = ACTION_PTR(fwa->rule); KASSERT(m->m_nextpkt == NULL, ("dummynet_io: mbuf queue passed to dummynet")); if (cmd->opcode == O_LOG) cmd += F_LEN(cmd); if (cmd->opcode == O_ALTQ) cmd += F_LEN(cmd); if (cmd->opcode == O_TAG) cmd += F_LEN(cmd); is_pipe = (cmd->opcode == O_PIPE); DUMMYNET_LOCK(); io_pkt++; /* * This is a dummynet rule, so we expect an O_PIPE or O_QUEUE rule. * * XXXGL: probably the pipe->fs and fs->pipe logic here * below can be simplified. */ if (is_pipe) { pipe = locate_pipe(fwa->cookie); if (pipe != NULL) fs = &(pipe->fs); } else fs = locate_flowset(fwa->cookie); if (fs == NULL) goto dropit; /* This queue/pipe does not exist! */ pipe = fs->pipe; if (pipe == NULL) { /* Must be a queue, try find a matching pipe. */ pipe = locate_pipe(fs->parent_nr); if (pipe != NULL) fs->pipe = pipe; else { printf("dummynet: no pipe %d for queue %d, drop pkt\n", fs->parent_nr, fs->fs_nr); goto dropit; } } q = find_queue(fs, &(fwa->f_id)); if (q == NULL) goto dropit; /* Cannot allocate queue. */ /* Update statistics, then check reasons to drop pkt. */ q->tot_bytes += len; q->tot_pkts++; if (fs->plr && random() < fs->plr) goto dropit; /* Random pkt drop. */ if (fs->flags_fs & DN_QSIZE_IS_BYTES) { if (q->len_bytes > fs->qsize) goto dropit; /* Queue size overflow. */ } else { if (q->len >= fs->qsize) goto dropit; /* Queue count overflow. */ } if (fs->flags_fs & DN_IS_RED && red_drops(fs, q, len)) goto dropit; /* XXX expensive to zero, see if we can remove it. */ mtag = m_tag_get(PACKET_TAG_DUMMYNET, sizeof(struct dn_pkt_tag), M_NOWAIT | M_ZERO); if (mtag == NULL) goto dropit; /* Cannot allocate packet header. */ m_tag_prepend(m, mtag); /* Attach to mbuf chain. */ pkt = (struct dn_pkt_tag *)(mtag + 1); /* * Ok, i can handle the pkt now... * Build and enqueue packet + parameters. */ pkt->rule = fwa->rule; pkt->rule_id = fwa->rule_id; pkt->chain_id = fwa->chain_id; pkt->dn_dir = dir; pkt->ifp = fwa->oif; if (q->head == NULL) q->head = m; else q->tail->m_nextpkt = m; q->tail = m; q->len++; q->len_bytes += len; if (q->head != m) /* Flow was not idle, we are done. */ goto done; if (is_pipe) { /* Fixed rate queues. */ if (q->idle_time < curr_time) { /* Calculate available burst size. */ q->numbytes += (curr_time - q->idle_time - 1) * pipe->bandwidth; if (q->numbytes > pipe->burst) q->numbytes = pipe->burst; if (io_fast) q->numbytes += pipe->bandwidth; } } else { /* WF2Q. */ if (pipe->idle_time < curr_time && pipe->scheduler_heap.elements == 0 && pipe->not_eligible_heap.elements == 0) { /* Calculate available burst size. */ pipe->numbytes += (curr_time - pipe->idle_time - 1) * pipe->bandwidth; if (pipe->numbytes > 0 && pipe->numbytes > pipe->burst) pipe->numbytes = pipe->burst; if (io_fast) pipe->numbytes += pipe->bandwidth; } pipe->idle_time = curr_time; } /* Necessary for both: fixed rate & WF2Q queues. */ q->idle_time = curr_time; /* * If we reach this point the flow was previously idle, so we need * to schedule it. This involves different actions for fixed-rate or * WF2Q queues. */ if (is_pipe) { /* Fixed-rate queue: just insert into the ready_heap. */ dn_key t = 0; if (pipe->bandwidth) { q->extra_bits = compute_extra_bits(m, pipe); t = set_ticks(m, q, pipe); } q->sched_time = curr_time; if (t == 0) /* Must process it now. */ ready_event(q, &head, &tail); else heap_insert(&ready_heap, curr_time + t , q); } else { /* * WF2Q. First, compute start time S: if the flow was * idle (S = F + 1) set S to the virtual time V for the * controlling pipe, and update the sum of weights for the pipe; * otherwise, remove flow from idle_heap and set S to max(F,V). * Second, compute finish time F = S + len / weight. * Third, if pipe was idle, update V = max(S, V). * Fourth, count one more backlogged flow. */ if (DN_KEY_GT(q->S, q->F)) { /* Means timestamps are invalid. */ q->S = pipe->V; pipe->sum += fs->weight; /* Add weight of new queue. */ } else { heap_extract(&(pipe->idle_heap), q); q->S = MAX64(q->F, pipe->V); } q->F = q->S + div64(len << MY_M, fs->weight); if (pipe->not_eligible_heap.elements == 0 && pipe->scheduler_heap.elements == 0) pipe->V = MAX64(q->S, pipe->V); fs->backlogged++; /* * Look at eligibility. A flow is not eligibile if S>V (when * this happens, it means that there is some other flow already * scheduled for the same pipe, so the scheduler_heap cannot be * empty). If the flow is not eligible we just store it in the * not_eligible_heap. Otherwise, we store in the scheduler_heap * and possibly invoke ready_event_wfq() right now if there is * leftover credit. * Note that for all flows in scheduler_heap (SCH), S_i <= V, * and for all flows in not_eligible_heap (NEH), S_i > V. * So when we need to compute max(V, min(S_i)) forall i in * SCH+NEH, we only need to look into NEH. */ if (DN_KEY_GT(q->S, pipe->V)) { /* Not eligible. */ if (pipe->scheduler_heap.elements == 0) printf("dummynet: ++ ouch! not eligible but empty scheduler!\n"); heap_insert(&(pipe->not_eligible_heap), q->S, q); } else { heap_insert(&(pipe->scheduler_heap), q->F, q); if (pipe->numbytes >= 0) { /* Pipe is idle. */ if (pipe->scheduler_heap.elements != 1) printf("dummynet: OUCH! pipe should have been idle!\n"); DPRINTF(("dummynet: waking up pipe %d at %d\n", pipe->pipe_nr, (int)(q->F >> MY_M))); pipe->sched_time = curr_time; ready_event_wfq(pipe, &head, &tail); } } } done: if (head == m && dir != DN_TO_IFB_FWD && dir != DN_TO_ETH_DEMUX && dir != DN_TO_ETH_OUT) { /* Fast io. */ io_pkt_fast++; if (m->m_nextpkt != NULL) printf("dummynet: fast io: pkt chain detected!\n"); head = m->m_nextpkt = NULL; } else *m0 = NULL; /* Normal io. */ DUMMYNET_UNLOCK(); if (head != NULL) dummynet_send(head); return (0); dropit: io_pkt_drop++; if (q) q->drops++; DUMMYNET_UNLOCK(); *m0 = dn_free_pkt(m); return ((fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS); } /* * Dispose all packets and flow_queues on a flow_set. * If all=1, also remove red lookup table and other storage, * including the descriptor itself. * For the one in dn_pipe MUST also cleanup ready_heap... */ static void purge_flow_set(struct dn_flow_set *fs, int all) { struct dn_flow_queue *q, *qn; int i; DUMMYNET_LOCK_ASSERT(); for (i = 0; i <= fs->rq_size; i++) { for (q = fs->rq[i]; q != NULL; q = qn) { dn_free_pkts(q->head); qn = q->next; free(q, M_DUMMYNET); } fs->rq[i] = NULL; } fs->rq_elements = 0; if (all) { /* RED - free lookup table. */ if (fs->w_q_lookup != NULL) free(fs->w_q_lookup, M_DUMMYNET); if (fs->rq != NULL) free(fs->rq, M_DUMMYNET); /* If this fs is not part of a pipe, free it. */ if (fs->pipe == NULL || fs != &(fs->pipe->fs)) free(fs, M_DUMMYNET); } } /* * Dispose all packets queued on a pipe (not a flow_set). * Also free all resources associated to a pipe, which is about * to be deleted. */ static void purge_pipe(struct dn_pipe *pipe) { purge_flow_set( &(pipe->fs), 1 ); dn_free_pkts(pipe->head); heap_free( &(pipe->scheduler_heap) ); heap_free( &(pipe->not_eligible_heap) ); heap_free( &(pipe->idle_heap) ); } /* * Delete all pipes and heaps returning memory. Must also * remove references from all ipfw rules to all pipes. */ static void dummynet_flush(void) { struct dn_pipe *pipe, *pipe1; struct dn_flow_set *fs, *fs1; int i; DUMMYNET_LOCK(); /* Free heaps so we don't have unwanted events. */ heap_free(&ready_heap); heap_free(&wfq_ready_heap); heap_free(&extract_heap); /* * Now purge all queued pkts and delete all pipes. * * XXXGL: can we merge the for(;;) cycles into one or not? */ for (i = 0; i < HASHSIZE; i++) SLIST_FOREACH_SAFE(fs, &flowsethash[i], next, fs1) { SLIST_REMOVE(&flowsethash[i], fs, dn_flow_set, next); purge_flow_set(fs, 1); } for (i = 0; i < HASHSIZE; i++) SLIST_FOREACH_SAFE(pipe, &pipehash[i], next, pipe1) { SLIST_REMOVE(&pipehash[i], pipe, dn_pipe, next); purge_pipe(pipe); free_pipe(pipe); } DUMMYNET_UNLOCK(); } /* * setup RED parameters */ static int config_red(struct dn_flow_set *p, struct dn_flow_set *x) { int i; x->w_q = p->w_q; x->min_th = SCALE(p->min_th); x->max_th = SCALE(p->max_th); x->max_p = p->max_p; x->c_1 = p->max_p / (p->max_th - p->min_th); x->c_2 = SCALE_MUL(x->c_1, SCALE(p->min_th)); if (x->flags_fs & DN_IS_GENTLE_RED) { x->c_3 = (SCALE(1) - p->max_p) / p->max_th; x->c_4 = SCALE(1) - 2 * p->max_p; } /* If the lookup table already exist, free and create it again. */ if (x->w_q_lookup) { free(x->w_q_lookup, M_DUMMYNET); x->w_q_lookup = NULL; } if (red_lookup_depth == 0) { printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth" "must be > 0\n"); free(x, M_DUMMYNET); return (EINVAL); } x->lookup_depth = red_lookup_depth; x->w_q_lookup = (u_int *)malloc(x->lookup_depth * sizeof(int), M_DUMMYNET, M_NOWAIT); if (x->w_q_lookup == NULL) { printf("dummynet: sorry, cannot allocate red lookup table\n"); free(x, M_DUMMYNET); return(ENOSPC); } /* Fill the lookup table with (1 - w_q)^x */ x->lookup_step = p->lookup_step; x->lookup_weight = p->lookup_weight; x->w_q_lookup[0] = SCALE(1) - x->w_q; for (i = 1; i < x->lookup_depth; i++) x->w_q_lookup[i] = SCALE_MUL(x->w_q_lookup[i - 1], x->lookup_weight); if (red_avg_pkt_size < 1) red_avg_pkt_size = 512; x->avg_pkt_size = red_avg_pkt_size; if (red_max_pkt_size < 1) red_max_pkt_size = 1500; x->max_pkt_size = red_max_pkt_size; return (0); } static int alloc_hash(struct dn_flow_set *x, struct dn_flow_set *pfs) { if (x->flags_fs & DN_HAVE_FLOW_MASK) { /* allocate some slots */ int l = pfs->rq_size; if (l == 0) l = dn_hash_size; if (l < 4) l = 4; else if (l > DN_MAX_HASH_SIZE) l = DN_MAX_HASH_SIZE; x->rq_size = l; } else /* one is enough for null mask */ x->rq_size = 1; x->rq = malloc((1 + x->rq_size) * sizeof(struct dn_flow_queue *), M_DUMMYNET, M_NOWAIT | M_ZERO); if (x->rq == NULL) { printf("dummynet: sorry, cannot allocate queue\n"); return (ENOMEM); } x->rq_elements = 0; return 0 ; } static void set_fs_parms(struct dn_flow_set *x, struct dn_flow_set *src) { x->flags_fs = src->flags_fs; x->qsize = src->qsize; x->plr = src->plr; x->flow_mask = src->flow_mask; if (x->flags_fs & DN_QSIZE_IS_BYTES) { if (x->qsize > pipe_byte_limit) x->qsize = 1024 * 1024; } else { if (x->qsize == 0) x->qsize = 50; if (x->qsize > pipe_slot_limit) x->qsize = 50; } /* Configuring RED. */ if (x->flags_fs & DN_IS_RED) config_red(src, x); /* XXX should check errors */ } /* * Setup pipe or queue parameters. */ static int config_pipe(struct dn_pipe *p) { struct dn_flow_set *pfs = &(p->fs); struct dn_flow_queue *q; int i, error; /* * The config program passes parameters as follows: * bw = bits/second (0 means no limits), * delay = ms, must be translated into ticks. * qsize = slots/bytes */ p->delay = (p->delay * hz) / 1000; /* Scale burst size: bytes -> bits * hz */ p->burst *= 8 * hz; /* We need either a pipe number or a flow_set number. */ if (p->pipe_nr == 0 && pfs->fs_nr == 0) return (EINVAL); if (p->pipe_nr != 0 && pfs->fs_nr != 0) return (EINVAL); if (p->pipe_nr != 0) { /* this is a pipe */ struct dn_pipe *pipe; DUMMYNET_LOCK(); pipe = locate_pipe(p->pipe_nr); /* locate pipe */ if (pipe == NULL) { /* new pipe */ pipe = malloc(sizeof(struct dn_pipe), M_DUMMYNET, M_NOWAIT | M_ZERO); if (pipe == NULL) { DUMMYNET_UNLOCK(); printf("dummynet: no memory for new pipe\n"); return (ENOMEM); } pipe->pipe_nr = p->pipe_nr; pipe->fs.pipe = pipe; /* * idle_heap is the only one from which * we extract from the middle. */ pipe->idle_heap.size = pipe->idle_heap.elements = 0; pipe->idle_heap.offset = offsetof(struct dn_flow_queue, heap_pos); } else /* Flush accumulated credit for all queues. */ for (i = 0; i <= pipe->fs.rq_size; i++) for (q = pipe->fs.rq[i]; q; q = q->next) { q->numbytes = p->burst + (io_fast ? p->bandwidth : 0); } pipe->bandwidth = p->bandwidth; pipe->burst = p->burst; pipe->numbytes = pipe->burst + (io_fast ? pipe->bandwidth : 0); bcopy(p->if_name, pipe->if_name, sizeof(p->if_name)); pipe->ifp = NULL; /* reset interface ptr */ pipe->delay = p->delay; set_fs_parms(&(pipe->fs), pfs); /* Handle changes in the delay profile. */ if (p->samples_no > 0) { if (pipe->samples_no != p->samples_no) { if (pipe->samples != NULL) free(pipe->samples, M_DUMMYNET); pipe->samples = malloc(p->samples_no*sizeof(dn_key), M_DUMMYNET, M_NOWAIT | M_ZERO); if (pipe->samples == NULL) { DUMMYNET_UNLOCK(); printf("dummynet: no memory " "for new samples\n"); return (ENOMEM); } pipe->samples_no = p->samples_no; } strncpy(pipe->name,p->name,sizeof(pipe->name)); pipe->loss_level = p->loss_level; for (i = 0; isamples_no; ++i) pipe->samples[i] = p->samples[i]; } else if (pipe->samples != NULL) { free(pipe->samples, M_DUMMYNET); pipe->samples = NULL; pipe->samples_no = 0; } if (pipe->fs.rq == NULL) { /* a new pipe */ error = alloc_hash(&(pipe->fs), pfs); if (error) { DUMMYNET_UNLOCK(); free_pipe(pipe); return (error); } SLIST_INSERT_HEAD(&pipehash[HASH(pipe->pipe_nr)], pipe, next); } DUMMYNET_UNLOCK(); } else { /* config queue */ struct dn_flow_set *fs; DUMMYNET_LOCK(); fs = locate_flowset(pfs->fs_nr); /* locate flow_set */ if (fs == NULL) { /* new */ if (pfs->parent_nr == 0) { /* need link to a pipe */ DUMMYNET_UNLOCK(); return (EINVAL); } fs = malloc(sizeof(struct dn_flow_set), M_DUMMYNET, M_NOWAIT | M_ZERO); if (fs == NULL) { DUMMYNET_UNLOCK(); printf( "dummynet: no memory for new flow_set\n"); return (ENOMEM); } fs->fs_nr = pfs->fs_nr; fs->parent_nr = pfs->parent_nr; fs->weight = pfs->weight; if (fs->weight == 0) fs->weight = 1; else if (fs->weight > 100) fs->weight = 100; } else { /* * Change parent pipe not allowed; * must delete and recreate. */ if (pfs->parent_nr != 0 && fs->parent_nr != pfs->parent_nr) { DUMMYNET_UNLOCK(); return (EINVAL); } } set_fs_parms(fs, pfs); if (fs->rq == NULL) { /* a new flow_set */ error = alloc_hash(fs, pfs); if (error) { DUMMYNET_UNLOCK(); free(fs, M_DUMMYNET); return (error); } SLIST_INSERT_HEAD(&flowsethash[HASH(fs->fs_nr)], fs, next); } DUMMYNET_UNLOCK(); } return (0); } /* * Helper function to remove from a heap queues which are linked to * a flow_set about to be deleted. */ static void fs_remove_from_heap(struct dn_heap *h, struct dn_flow_set *fs) { int i = 0, found = 0 ; for (; i < h->elements ;) if ( ((struct dn_flow_queue *)h->p[i].object)->fs == fs) { h->elements-- ; h->p[i] = h->p[h->elements] ; found++ ; } else i++ ; if (found) heapify(h); } /* * helper function to remove a pipe from a heap (can be there at most once) */ static void pipe_remove_from_heap(struct dn_heap *h, struct dn_pipe *p) { if (h->elements > 0) { int i = 0 ; for (i=0; i < h->elements ; i++ ) { if (h->p[i].object == p) { /* found it */ h->elements-- ; h->p[i] = h->p[h->elements] ; heapify(h); break ; } } } } /* * drain all queues. Called in case of severe mbuf shortage. */ void dummynet_drain(void) { struct dn_flow_set *fs; struct dn_pipe *pipe; int i; DUMMYNET_LOCK_ASSERT(); heap_free(&ready_heap); heap_free(&wfq_ready_heap); heap_free(&extract_heap); /* remove all references to this pipe from flow_sets */ for (i = 0; i < HASHSIZE; i++) SLIST_FOREACH(fs, &flowsethash[i], next) purge_flow_set(fs, 0); for (i = 0; i < HASHSIZE; i++) { SLIST_FOREACH(pipe, &pipehash[i], next) { purge_flow_set(&(pipe->fs), 0); dn_free_pkts(pipe->head); pipe->head = pipe->tail = NULL; } } } /* * Fully delete a pipe or a queue, cleaning up associated info. */ static int delete_pipe(struct dn_pipe *p) { if (p->pipe_nr == 0 && p->fs.fs_nr == 0) return EINVAL ; if (p->pipe_nr != 0 && p->fs.fs_nr != 0) return EINVAL ; if (p->pipe_nr != 0) { /* this is an old-style pipe */ struct dn_pipe *pipe; struct dn_flow_set *fs; int i; DUMMYNET_LOCK(); pipe = locate_pipe(p->pipe_nr); /* locate pipe */ if (pipe == NULL) { DUMMYNET_UNLOCK(); return (ENOENT); /* not found */ } /* Unlink from list of pipes. */ SLIST_REMOVE(&pipehash[HASH(pipe->pipe_nr)], pipe, dn_pipe, next); /* Remove all references to this pipe from flow_sets. */ for (i = 0; i < HASHSIZE; i++) SLIST_FOREACH(fs, &flowsethash[i], next) if (fs->pipe == pipe) { printf("dummynet: ++ ref to pipe %d from fs %d\n", p->pipe_nr, fs->fs_nr); fs->pipe = NULL ; purge_flow_set(fs, 0); } fs_remove_from_heap(&ready_heap, &(pipe->fs)); purge_pipe(pipe); /* remove all data associated to this pipe */ /* remove reference to here from extract_heap and wfq_ready_heap */ pipe_remove_from_heap(&extract_heap, pipe); pipe_remove_from_heap(&wfq_ready_heap, pipe); DUMMYNET_UNLOCK(); free_pipe(pipe); } else { /* this is a WF2Q queue (dn_flow_set) */ struct dn_flow_set *fs; DUMMYNET_LOCK(); fs = locate_flowset(p->fs.fs_nr); /* locate set */ if (fs == NULL) { DUMMYNET_UNLOCK(); return (ENOENT); /* not found */ } /* Unlink from list of flowsets. */ SLIST_REMOVE( &flowsethash[HASH(fs->fs_nr)], fs, dn_flow_set, next); if (fs->pipe != NULL) { /* Update total weight on parent pipe and cleanup parent heaps. */ fs->pipe->sum -= fs->weight * fs->backlogged ; fs_remove_from_heap(&(fs->pipe->not_eligible_heap), fs); fs_remove_from_heap(&(fs->pipe->scheduler_heap), fs); #if 1 /* XXX should i remove from idle_heap as well ? */ fs_remove_from_heap(&(fs->pipe->idle_heap), fs); #endif } purge_flow_set(fs, 1); DUMMYNET_UNLOCK(); } return 0 ; } /* * helper function used to copy data from kernel in DUMMYNET_GET */ static char * dn_copy_set(struct dn_flow_set *set, char *bp) { int i, copied = 0 ; struct dn_flow_queue *q, *qp = (struct dn_flow_queue *)bp; DUMMYNET_LOCK_ASSERT(); for (i = 0 ; i <= set->rq_size ; i++) for (q = set->rq[i] ; q ; q = q->next, qp++ ) { if (q->hash_slot != i) printf("dummynet: ++ at %d: wrong slot (have %d, " "should be %d)\n", copied, q->hash_slot, i); if (q->fs != set) printf("dummynet: ++ at %d: wrong fs ptr (have %p, should be %p)\n", i, q->fs, set); copied++ ; bcopy(q, qp, sizeof( *q ) ); /* cleanup pointers */ qp->next = NULL ; qp->head = qp->tail = NULL ; qp->fs = NULL ; } if (copied != set->rq_elements) printf("dummynet: ++ wrong count, have %d should be %d\n", copied, set->rq_elements); return (char *)qp ; } static size_t dn_calc_size(void) { struct dn_flow_set *fs; struct dn_pipe *pipe; size_t size = 0; int i; DUMMYNET_LOCK_ASSERT(); /* * Compute size of data structures: list of pipes and flow_sets. */ for (i = 0; i < HASHSIZE; i++) { SLIST_FOREACH(pipe, &pipehash[i], next) size += sizeof(*pipe) + pipe->fs.rq_elements * sizeof(struct dn_flow_queue); SLIST_FOREACH(fs, &flowsethash[i], next) size += sizeof (*fs) + fs->rq_elements * sizeof(struct dn_flow_queue); } return size; } static int dummynet_get(struct sockopt *sopt) { char *buf, *bp ; /* bp is the "copy-pointer" */ size_t size ; struct dn_flow_set *fs; struct dn_pipe *pipe; int error=0, i ; /* XXX lock held too long */ DUMMYNET_LOCK(); /* * XXX: Ugly, but we need to allocate memory with M_WAITOK flag and we * cannot use this flag while holding a mutex. */ for (i = 0; i < 10; i++) { size = dn_calc_size(); DUMMYNET_UNLOCK(); buf = malloc(size, M_TEMP, M_WAITOK); DUMMYNET_LOCK(); if (size == dn_calc_size()) break; free(buf, M_TEMP); buf = NULL; } if (buf == NULL) { DUMMYNET_UNLOCK(); return ENOBUFS ; } bp = buf; for (i = 0; i < HASHSIZE; i++) SLIST_FOREACH(pipe, &pipehash[i], next) { struct dn_pipe *pipe_bp = (struct dn_pipe *)bp; /* * Copy pipe descriptor into *bp, convert delay back to ms, * then copy the flow_set descriptor(s) one at a time. * After each flow_set, copy the queue descriptor it owns. */ bcopy(pipe, bp, sizeof(*pipe)); pipe_bp->delay = (pipe_bp->delay * 1000) / hz; pipe_bp->burst = div64(pipe_bp->burst, 8 * hz); /* * XXX the following is a hack based on ->next being the * first field in dn_pipe and dn_flow_set. The correct * solution would be to move the dn_flow_set to the beginning * of struct dn_pipe. */ pipe_bp->next.sle_next = (struct dn_pipe *)DN_IS_PIPE; /* Clean pointers. */ pipe_bp->head = pipe_bp->tail = NULL; pipe_bp->fs.next.sle_next = NULL; pipe_bp->fs.pipe = NULL; pipe_bp->fs.rq = NULL; pipe_bp->samples = NULL; bp += sizeof(*pipe) ; bp = dn_copy_set(&(pipe->fs), bp); } for (i = 0; i < HASHSIZE; i++) SLIST_FOREACH(fs, &flowsethash[i], next) { struct dn_flow_set *fs_bp = (struct dn_flow_set *)bp; bcopy(fs, bp, sizeof(*fs)); /* XXX same hack as above */ fs_bp->next.sle_next = (struct dn_flow_set *)DN_IS_QUEUE; fs_bp->pipe = NULL; fs_bp->rq = NULL; bp += sizeof(*fs); bp = dn_copy_set(fs, bp); } DUMMYNET_UNLOCK(); error = sooptcopyout(sopt, buf, size); free(buf, M_TEMP); return error ; } /* * Handler for the various dummynet socket options (get, flush, config, del) */ static int ip_dn_ctl(struct sockopt *sopt) { int error; struct dn_pipe *p = NULL; error = priv_check(sopt->sopt_td, PRIV_NETINET_DUMMYNET); if (error) return (error); /* Disallow sets in really-really secure mode. */ if (sopt->sopt_dir == SOPT_SET) { #if __FreeBSD_version >= 500034 error = securelevel_ge(sopt->sopt_td->td_ucred, 3); if (error) return (error); #else if (securelevel >= 3) return (EPERM); #endif } switch (sopt->sopt_name) { default : printf("dummynet: -- unknown option %d", sopt->sopt_name); error = EINVAL ; break ; case IP_DUMMYNET_GET : error = dummynet_get(sopt); break ; case IP_DUMMYNET_FLUSH : dummynet_flush() ; break ; case IP_DUMMYNET_CONFIGURE : p = malloc(sizeof(struct dn_pipe_max), M_TEMP, M_WAITOK); error = sooptcopyin(sopt, p, sizeof(struct dn_pipe_max), sizeof *p); if (error) break ; if (p->samples_no > 0) p->samples = &( ((struct dn_pipe_max*) p)->samples[0] ); error = config_pipe(p); break ; case IP_DUMMYNET_DEL : /* remove a pipe or queue */ p = malloc(sizeof(struct dn_pipe), M_TEMP, M_WAITOK); error = sooptcopyin(sopt, p, sizeof (struct dn_pipe), sizeof *p); if (error) break ; error = delete_pipe(p); break ; } if (p != NULL) free(p, M_TEMP); return error ; } static void ip_dn_init(void) { int i; if (bootverbose) printf("DUMMYNET with IPv6 initialized (040826)\n"); DUMMYNET_LOCK_INIT(); for (i = 0; i < HASHSIZE; i++) { SLIST_INIT(&pipehash[i]); SLIST_INIT(&flowsethash[i]); } ready_heap.size = ready_heap.elements = 0; ready_heap.offset = 0; wfq_ready_heap.size = wfq_ready_heap.elements = 0; wfq_ready_heap.offset = 0; extract_heap.size = extract_heap.elements = 0; extract_heap.offset = 0; ip_dn_ctl_ptr = ip_dn_ctl; ip_dn_io_ptr = dummynet_io; TASK_INIT(&dn_task, 0, dummynet_task, NULL); dn_tq = taskqueue_create_fast("dummynet", M_NOWAIT, taskqueue_thread_enqueue, &dn_tq); taskqueue_start_threads(&dn_tq, 1, PI_NET, "dummynet"); callout_init(&dn_timeout, CALLOUT_MPSAFE); callout_reset(&dn_timeout, 1, dummynet, NULL); /* Initialize curr_time adjustment mechanics. */ getmicrouptime(&prev_t); } #ifdef KLD_MODULE static void ip_dn_destroy(void) { ip_dn_ctl_ptr = NULL; ip_dn_io_ptr = NULL; DUMMYNET_LOCK(); callout_stop(&dn_timeout); DUMMYNET_UNLOCK(); taskqueue_drain(dn_tq, &dn_task); taskqueue_free(dn_tq); dummynet_flush(); DUMMYNET_LOCK_DESTROY(); } #endif /* KLD_MODULE */ static int dummynet_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: if (ip_dn_io_ptr) { printf("DUMMYNET already loaded\n"); return EEXIST ; } ip_dn_init(); break; case MOD_UNLOAD: #if !defined(KLD_MODULE) printf("dummynet statically compiled, cannot unload\n"); return EINVAL ; #else ip_dn_destroy(); #endif break ; default: return EOPNOTSUPP; break ; } return 0 ; } static moduledata_t dummynet_mod = { "dummynet", dummynet_modevent, NULL }; DECLARE_MODULE(dummynet, dummynet_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); MODULE_DEPEND(dummynet, ipfw, 2, 2, 2); MODULE_VERSION(dummynet, 1); ipfw_mod/dummynet/bsd_compat.c000644 000423 000000 00000015153 11310145556 017247 0ustar00luigiwheel000000 000000 /* * Copyright (C) 2009 Luigi Rizzo, Marta Carbone, Universita` di Pisa * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * $Id: bsd_compat.c 4416 2009-12-10 09:49:21Z luigi $ * * kernel variables and functions that are not available in linux. */ #include #include /* do_div on 2.4 */ #include /* get_random_bytes on 2.4 */ #include "missing.h" /* * gettimeofday would be in sys/time.h but it is not * visible if _KERNEL is defined */ int gettimeofday(struct timeval *, struct timezone *); int ticks; /* kernel ticks counter */ int hz = 1000; /* default clock time */ long tick = 1000; /* XXX is this 100000/hz ? */ int bootverbose = 0; time_t time_uptime = 0; struct timeval boottime; int ip_defttl; int fw_one_pass = 1; u_long in_ifaddrhmask; /* mask for hash table */ struct in_ifaddrhashhead *in_ifaddrhashtbl; /* inet addr hash table */ u_int rt_numfibs = RT_NUMFIBS; /* * pfil hook support. * We make pfil_head_get return a non-null pointer, which is then ignored * in our 'add-hook' routines. */ struct pfil_head; typedef int (pfil_hook_t) (void *, struct mbuf **, struct ifnet *, int, struct inpcb *); struct pfil_head * pfil_head_get(int proto, u_long flags) { static int dummy; return (struct pfil_head *)&dummy; } int pfil_add_hook(pfil_hook_t *func, void *arg, int dir, struct pfil_head *h) { return 0; } int pfil_remove_hook(pfil_hook_t *func, void *arg, int dir, struct pfil_head *h) { return 0; } /* define empty body for kernel function */ int priv_check(struct thread *td, int priv) { return 0; } int securelevel_ge(struct ucred *cr, int level) { return 0; } int sysctl_handle_int(SYSCTL_HANDLER_ARGS) { return 0; } int sysctl_handle_long(SYSCTL_HANDLER_ARGS) { return 0; } void ether_demux(struct ifnet *ifp, struct mbuf *m) { return; } int ether_output_frame(struct ifnet *ifp, struct mbuf *m) { return 0; } void in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum) { return; } void icmp_error(struct mbuf *n, int type, int code, uint32_t dest, int mtu) { return; } u_short in_cksum_skip(struct mbuf *m, int len, int skip) { return 0; } u_short in_cksum_hdr(struct ip *ip) { return 0; } struct mbuf * ip_reass(struct mbuf *clone) { return clone; } #ifdef INP_LOCK_ASSERT #undef INP_LOCK_ASSERT #define INP_LOCK_ASSERT(a) #endif int jailed(struct ucred *cred) { return 0; } /* * Return 1 if an internet address is for a ``local'' host * (one to which we have a connection). If subnetsarelocal * is true, this includes other subnets of the local net. * Otherwise, it includes only the directly-connected (sub)nets. */ int in_localaddr(struct in_addr in) { return 1; } int sooptcopyout(struct sockopt *sopt, const void *buf, size_t len) { size_t valsize = sopt->sopt_valsize; if (len < valsize) sopt->sopt_valsize = valsize = len; bcopy(buf, sopt->sopt_val, valsize); return 0; } /* * copy data from userland to kernel */ int sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen) { size_t valsize = sopt->sopt_valsize; if (valsize < minlen) return EINVAL; if (valsize > len) sopt->sopt_valsize = valsize = len; bcopy(sopt->sopt_val, buf, valsize); return 0; } void getmicrouptime(struct timeval *tv) { #ifdef _WIN32 #else do_gettimeofday(tv); #endif } #include char * inet_ntoa_r(struct in_addr ina, char *buf) { #ifdef _WIN32 #else unsigned char *ucp = (unsigned char *)&ina; sprintf(buf, "%d.%d.%d.%d", ucp[0] & 0xff, ucp[1] & 0xff, ucp[2] & 0xff, ucp[3] & 0xff); #endif return buf; } char * inet_ntoa(struct in_addr ina) { static char buf[16]; return inet_ntoa_r(ina, buf); } int random(void) { #ifdef _WIN32 return 0x123456; #else int r; get_random_bytes(&r, sizeof(r)); return r & 0x7fffffff; #endif } /* * do_div really does a u64 / u32 bit division. * we save the sign and convert to uint befor calling. * We are safe just because we always call it with small operands. */ int64_t div64(int64_t a, int64_t b) { #ifdef _WIN32 int a1 = a, b1 = b; return a1/b1; #else uint64_t ua, ub; int sign = ((a>0)?1:-1) * ((b>0)?1:-1); ua = ((a>0)?a:-a); ub = ((b>0)?b:-b); do_div(ua, ub); return sign*ua; #endif } /* * compact version of fnmatch. */ int fnmatch(const char *pattern, const char *string, int flags) { char s; if (!string || !pattern) return 1; /* no match */ while ( (s = *string++) ) { char p = *pattern++; if (p == '\0') /* pattern is over, no match */ return 1; if (p == '*') /* wildcard, match */ return 0; if (p == '.' || p == s) /* char match, continue */ continue; return 1; /* no match */ } /* end of string, make sure the pattern is over too */ if (*pattern == '\0' || *pattern == '*') return 0; return 1; /* no match */ } #ifdef _WIN32 /* * as good as anywhere, place here the missing calls */ void * my_alloc(int size) { void *_ret = ExAllocatePoolWithTag(0, size, 'wfpi'); if (_ret) memset(_ret, 0, size); return _ret; } void panic(const char *fmt, ...) { printf("%s", fmt); for (;;); } #include extern int _vsnprintf(char *buf, int buf_size, char * fmt, va_list ap); /* * Windows' _snprintf doesn't terminate buffer with zero if size > buf_size */ int snprintf(char *buf, int buf_size, char *fmt, ...) { va_list ap; va_start(ap, fmt); if (_vsnprintf(buf, buf_size, fmt, ap) < 0) buf[buf_size - 1] = '\0'; va_end(ap); return 0; } #endif ipfw_mod/dummynet/Makefile000644 000423 000000 00000014545 11311404347 016431 0ustar00luigiwheel000000 000000 # # $Id: Makefile 4490 2009-12-14 09:55:26Z marta $ # # gnu Makefile to build linux module for ipfw+dummynet. # # The defaults are set to build without modifications on PlanetLab # and possibly 2.6 versions. # # Some variables need to have specific names, because they are used # by the build infrastructure on Linux and OpenWrt. They are: # # ccflags-y additional $(CC) flags # M used by Kbuild, we must set it to `pwd` # obj-m list of .o modules to build # $(MOD)-y for each $MOD in obj-m, the list of objects # obj-y same as above, for openwrt # O_TARGET the link target, for openwrt # EXTRA_CFLAGS as the name says... in openwrt # EXTRA_CFLAGS is used in 2.6.22 module kernel compilation too # KERNELPATH the path to the kernel sources or headers # # Not sure about this (the name might be reserved) # ipfw-cflags our flags for building the module # # Other variables are only private and can be renamed. They include: # # VER linux version we are building for (2.4 2.6 or openwrt) #--- $(warning including dummynet/Makefile) # lets default for 2.6 for planetlab builds VER ?= 2.6 # General values obj-m := ipfw_mod.o # generic cflags used on all systems #ipfw-cflags += -DIPFW_HASHTABLES ipfw-cflags += -DIPFIREWALL_DEFAULT_TO_ACCEPT -DTRACE # _BSD_SOURCE enables __FAVOR_BSD (udp/tcp bsd structs instead of posix) ipfw-cflags += -D_BSD_SOURCE ipfw-cflags += -DKERNEL_MODULE # build linux kernel module # the two header trees for empty and override files ipfw-cflags += -I $(M)/include_e -I $(M)/include ipfw-cflags += -include $(M)/../glue.h # headers $(warning "---- Building dummynet kernel module for Version $(VER)") # We have three sections for OpenWrt, Linux 2.4 and Linux 2.6 # ifeq ($(VER),openwrt) M=. obj-y := ipfw2_mod.o bsd_compat.o \ in_cksum.o ip_dummynet.o ip_fw2.o ip_fw_pfil.o radix.o O_TARGET := ipfw_mod.o # xcflags-y is a temporary variable where we store build options xcflags-y += -O1 -DLINUX_24 xcflags-y += -g EXTRA_CFLAGS := $(xcflags-y) $(ipfw-cflags) # we should not export anything #export-objs := ipfw2_mod.o -include $(TOPDIR)/Rules.make else # !openwrt, below we do linux builds for 2.4 and 2.6 # KERNELPATH is where the kernel headers reside. On PlanetLab # it is set already by the build system. # We can override it from the command line, or let the system guess. ifneq ($(shell echo $(VER)|grep '2.4'),) # The linux 2.4 version # guess the kernel path -- or is it under /lib/modules ? KERNELPATH ?= /usr/src/`uname -r`/build # Guess the gcc include directory # The gcc version is in the last line returned by gcc -v # gcc version 4.3.2 (Debian 4.3.2-1.1) MYGCC_VER ?= $(shell gcc -v 2>&1 |tail -n 1 | cut -d " " -f 3) # We don't know the exact directory unde /usr/lib/gcc so we guess MYGCC_INCLUDE ?= $(shell echo /usr/lib/gcc/*/$(MYGCC_VER) | cut -d " " -f 1)/include $(warning "---- gcc includes guessed to $(MYGCC_INCLUDE)") # additional warning #WARN = -Wp,-MD,/home/luigi/ports-luigi/dummynet-branches/ipfw_mod/dummynet/.ipfw2_mod.o.d #WARN += -Iinclude -include include/linux/autoconf.h WARN += -Wall -Wundef WARN += -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing WARN += -fno-common -Werror-implicit-function-declaration # WARN += -O2 -fno-stack-protector -m32 -msoft-float -mregparm=3 # -mregparm=3 gives a printk error WARN += -m32 -msoft-float # -mregparm=3 #WARN += -freg-struct-return -mpreferred-stack-boundary=2 WARN += -Wno-sign-compare WARN += -Wdeclaration-after-statement -Wno-pointer-sign ccflags-y += -O1 -DLINUX_24 CFLAGS = -DMODULE -D__KERNEL__ -nostdinc \ -isystem ${KERNELPATH}/include -isystem $(MYGCC_INCLUDE) ${ccflags-y} # The Main target all: mod24 else ifeq ($(IPFW_PLANETLAB),1) $(warning "---- Building for PlanetLab") ipfw-cflags += -DIPFW_PLANETLAB # PlanetLab compilation endif # if not set, use the version from the installed system KERNELPATH ?= /lib/modules/`uname -r`/build # the latest kernel #KERNELPATH = /usr/src/linux-2.6.22 $(warning "---- Building Version 2.6 $(VER) in $(KERNELPATH)") WARN := -O1 -Wall -Werror -DDEBUG_SPINLOCK -DDEBUG_MUTEXES # The main target # Required by kernel <= 2.6.22, ccflags-y is used on newer version LINUX_VERSION_CODE := $(shell grep LINUX_VERSION_CODE $(KERNELPATH)/include/linux/version.h|cut -d " " -f3) ifeq ($(LINUX_VERSION_CODE),132630) EXTRA_CFLAGS += $(ccflags-y) endif all: include_e $(MAKE) -C $(KERNELPATH) V=1 M=`pwd` modules endif #-- back to the common section of code # the list of objects used to build the module ipfw_mod-y = $(IPFW_SRCS:%.c=%.o) # Original ipfw and dummynet sources + FreeBSD stuff, IPFW_SRCS = ip_fw2.c ip_dummynet.c ip_fw_pfil.c in_cksum.c IPFW_SRCS += radix.c # Module glue and functions missing in linux IPFW_SRCS += ipfw2_mod.c bsd_compat.c hashtable.c # additional $(CC) flags ccflags-y += $(WARN) ccflags-y += $(ipfw-cflags) ccflags-y += -g mod24: include_e $(obj-m) $(obj-m): $(ipfw_mod-y) $(LD) $(LDFLAGS) -m elf_i386 -r -o $@ $^ clean: -rm -f *.o *.ko Module.symvers *.mod.c -rm -rf include_e distclean: clean -rm -f .*cmd modules.order opt_* -rm -rf .tmp_versions include_e -rm -rf .*.o.d # support to create empty dirs and files in include_e/ # EDIRS is the list of directories, EFILES is the list of files. EDIRS= altq arpa machine net netinet netinet6 sys EFILES += opt_inet6.h opt_ipfw.h opt_ipsec.h opt_mpath.h EFILES += opt_mbuf_stress_test.h opt_param.h EFILES += altq/if_altq.h EFILES += arpa/inet.h EFILES += machine/in_cksum.h EFILES += net/ethernet.h net/netisr.h net/pf_mtag.h EFILES += net/vnet.h EFILES += netinet/ether.h netinet/icmp6.h netinet/if_ether.h EFILES += netinet/in.h netinet/in_pcb.h netinet/in_var.h EFILES += netinet/ip_carp.h netinet/ip_var.h netinet/pim.h EFILES += netinet/sctp.h netinet/tcp_timer.h netinet/tcpip.h EFILES += netinet/udp_var.h EFILES += netinet6/ip6_var.h EFILES += sys/_lock.h sys/_rwlock.h sys/_mutex.h sys/jail.h EFILES += sys/condvar.h sys/eventhandler.h sys/domain.h EFILES += sys/limits.h sys/lock.h sys/mutex.h sys/priv.h EFILES += sys/proc.h sys/rwlock.h sys/socket.h sys/socketvar.h EFILES += sys/sysctl.h sys/time.h sys/ucred.h M ?= $(shell pwd) include_e: echo "running in $M" -@rm -rf $(M)/include_e opt_* -@mkdir -p $(M)/include_e -@(cd $(M)/include_e; mkdir -p $(EDIRS); touch $(EFILES) ) endif # !openwrt test_radix: test_radix.o radix.o test_radix: CFLAGS=-Wall -Werror -O1 ipfw_mod/dummynet/test_radix.o000644 000423 000000 00000014530 11311262406 017307 0ustar00luigiwheel000000 000000 ELF @4(US$E EEXEPEH ED$T$ $ӉEED$D$$}t E$$[]Ðt&U(EEE@@D$D$$Í'L$qUVSQ,E$@ED$ $ED$D$E}UEB@E@8EP8EP0E@ : ; I$ >   I&I : ;  : ; I8 ' I I < I!I/  : ;  : ;  : ; I' .: ; ' I@: ; I 4: ; I .? : ; ' I@: ; I 4: ; I? < E GNU C 4.2.1 20070719 [FreeBSD]test_radix.c/usr/ports-luigi/dummynet-branches/ipfw_mod/dummynetsigned char__uint8_t4unsigned charshort intshort unsigned intint__uint32_t8unsigned int__int64_t@ long long intlong long unsigned intlong unsigned intdoublecharlong int__off_t5u_char2uint8_tTuint32_t^caddr_tvSSfpos_t/g__sbufF _baseG# _sizeH#__sFILEXfZ _pg# _rh# _wi# _flagsj# _filek# _bfl# _lbfsizem# _cookiepy# _closeqj# _readr#$ _seeks#( _writet#, _ubw#0 _extrax#8 _ury#< _ubuf|#@ _nbuf}#C _lb#D _blksize#L _offset#P j yZ  y  p  y    y   __sFILEX v vFILE  ;O rn_Key<# rn_Mask=# rn_Dupedkey>#radix_node1 rn_mklist2# rn_parent3# rn_bit4# rn_bmask5S# rn_flags6{# rn_uE# O @ rn_OffA# rn_LB# rn_RC# :6rn_leaf? rn_nodeDradix_mask2 rm_bitY# rm_unusedZS# rm_flags[{# rm_mklist\# rm_rmu`# rm_refsa# 6]rmu_mask^rmu_leaf_walktree_f_tg   yradix_node_head|ib rnh_treetopj# rnh_addrsizek# rnh_pktsizel# rnh_addaddro# rnh_addpktr# rnh_deladdrt# rnh_delpktv# rnh_matchaddrx# rnh_lookupz# rnh_matchpkt|#$ rnh_walktree~#( rnh_walktree_from#, rnh_close)#0 rnh_nodes/#4  y y  b  y y   y     y   y y  y)   O?vd g len g# data# wvtable_entryD rn# x?#0 mask?#8 value#@ Ovdelk( rnargyrnh( lent- pwlist'p,z rn&arg&yent(- tmain0X argc/argv/ h1\rn2`i3dp4- h__bswap32/ _x__XP__stderrpB  /usr/include/machine/usr/include/sys/usr/include/usr/include/nettest_radix.cendian.h_types.h_types.htypes.hstdio.hradix.hrf%dVdeV,v,d:,,w$ %%%Vc;Vdel returns %p walking on node %d inserting %d gives %p --- walking... --- deleting all --- walking again | kAB Dp3AB (D  F AB C AB ttkupqtqstsutQttuttuI z mainGCC: (GNU) 4.2.1 20070719 [FreeBSD].symtab.strtab.shstrtab.rel.text.data.bss.debug_abbrev.rel.debug_info.rel.debug_line.rodata.rel.debug_frame.debug_loc.rel.debug_pubnames.rel.debug_aranges.comment@ %+0<BI > hRaN ^hpjf @ wd+ @F  Hf& @` kp3   !+38=FRYtest_radix.cdellist__bswap32__stderrpfprintffreemainrn_init2rn_initheadcallocfwrite4CK\~ )3NSkosw @ D H           4 8L Px |ipfw_mod/dummynet/test_radix.c000644 000423 000000 00000003631 11311370776 017305 0ustar00luigiwheel000000 000000 /* * Test the radix tree net */ #include #include #include #include #include /* htonl */ #include "include/net/radix.h" struct d { uint8_t len[4]; uint32_t data; }; struct table_entry { struct radix_node rn[2]; struct d x, mask; int value; }; static int del(struct radix_node *rn, void *arg) { struct radix_node_head * const rnh = arg; struct table_entry *ent; ent = (struct table_entry *) rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh); fprintf(stderr, "del returns %p\n", ent); if (0 && ent != NULL) free(ent); return (0); } int list(struct radix_node *rn, void *arg) { struct table_entry *ent = (struct table_entry *)rn; fprintf(stderr, "walking on node %d\n", ent->value); return (0); } static void print_dt(struct timeval *start, struct timeval *end, int n, const char *msg) { int ds = 0, du, l; du = end->tv_usec - start->tv_usec; if (du < 0) { ds = -1; du += 1000000; } ds += end->tv_sec - start->tv_sec; if (n <= 1) n = 1; l = (ds * 1000000+ du)/n; fprintf(stderr, "%d tries in %d.%06ds, %dus each\n", n, ds, du, l); } static void test1(struct radix_node_head *h, int n) { struct table_entry *p; struct timeval start, end; int i; p = calloc(n, sizeof(*p)); if (!p) return; for (i=0; i < n; i++) { p->value = i; p->x.len[0] = p->mask.len[0] = 8; p->mask.data = 0xffffffff; p->x.data = htonl(i); } gettimeofday(&start, NULL); for (i=0; i < n; i++) { h->rnh_addaddr(&(p->x), &(p->mask), h, (void *)p); } gettimeofday(&end, NULL); print_dt(&start, &end, n, NULL); h->rnh_walktree(h, del, h); } int main(int argc, char *argv[]) { struct radix_node_head *h = NULL; rn_init(64); // XXX bits or bytes ? rn_inithead((void **)&h, 32); /* data offset in bits */ test1(h, 1000000); return 0; } ipfw_mod/dummynet/hashtable.h000644 000423 000000 00000002666 11310200740 017064 0ustar00luigiwheel000000 000000 #ifndef __HASHTABLE_H_ #define __HASHTABLE_H_ /* * new_table_init creates a table with the specified * number of buckets (size). * obj_size is the size of individual objects (key+value), * the first function is the hash function (called with the * size and the payload pointer) * the second function is the compare function, to tell if two * objects are the same (XXX we could spare this if we also * pass a key_size and use a bcmp for comparisons) * Not extensible at the moment. */ struct malloc_type; struct ipfw_ht; struct ipfw_ht* ipfw_ht_new(int size, int obj_size, uint32_t (hash_fn)(const void *, uint32_t size), int (cmp_fn)(const void*, const void*, int sz), struct malloc_type *mtype); void *ipfw_ht_destroy(struct ipfw_ht *h); /* add a new object to the table, return success/failure */ int ipfw_ht_insert(struct ipfw_ht *h, const void *obj); /* * returns a pointer to the matching object or NULL if not found. * No refcounts. */ const void *ipfw_ht_extract(struct ipfw_ht *h, const void *key); /* remove an object from the table */ int ipfw_ht_remove(struct ipfw_ht *h, const void *key); /* return the number of elements in the table */ int ipfw_ht_count(const struct ipfw_ht *h); /* returns the first or next element. Works by hashing the * current object and then finds the next one. * If obj == NULL returns the first object in the table */ const void *ipfw_ht_next(struct ipfw_ht *h, const void *obj); #endif ipfw_mod/dummynet/new_glue.c000644 000423 000000 00000011445 11310417261 016734 0ustar00luigiwheel000000 000000 #include "missing.h" #define IPFW_INTERNAL #include #include "hashtable.h" #define IPFW_NEWTABLES_MAX 256 struct t_o { /* Object stored in the hash table */ uint32_t addr; uint32_t value; uint8_t mask; }; MALLOC_DEFINE(M_IPFW_HTBL, "ipfw_tbl", "IpFw tables"); int add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, uint8_t mlen, uint32_t value); int new_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr); int del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, uint8_t mlen); int new_flush_table(struct ip_fw_chain *ch, uint16_t tbl); int flush_table(struct ip_fw_chain *ch, uint16_t tbl); int lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, uint32_t *val); int new_count_table_entry(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt); int count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt); int new_dump_table_entry(struct ip_fw_chain *ch, ipfw_table *tbl); int dump_table(struct ip_fw_chain *ch, ipfw_table *tbl); int init_tables(struct ip_fw_chain *ch); /* hash and compare functions for 32-bit entries */ static uint32_t simple_hash32(const void *key, uint32_t size) { uint32_t ret = *(const uint32_t *)key % size; return ret; } static int cmp_func32(const void *key1, const void *key2, int sz) { int k1 = *(const int *)key1; int k2 = *(const int *)key2; int ret; if (k1 < k2) ret = -1; else if (k1 > k2) ret = 1; else ret = 0; return ret; } int add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, uint8_t mlen, uint32_t value) { /* TODO: * - Search the correct hash table (tbl - IPFW_TABLES_MAX) * - Search if the entry already exists * - Insert the new entry in the table * - Possibly reallocate the table if it is too small */ struct t_o obj; int ret; int i = tbl - IPFW_TABLES_MAX; int size = 128; int obj_size = sizeof(struct t_o); if (i < 0 || i > size-1) /* wrong table number */ return 1; if (ch->global_tables[i] == NULL) { ch->global_tables[i] = new_table_init(size, obj_size, simple_hash32, cmp_func32, M_IPFW_HTBL); } obj.addr = addr; obj.value = value; obj.mask = mlen; /* Insert the object in the table */ ret = new_table_insert_obj(ch->global_tables[i], &obj); return ret; } int new_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr) { int ret; int nr = tbl - IPFW_TABLES_MAX; ret = new_table_delete_obj(ch->global_tables[nr], &addr); return ret; } int del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, uint8_t mlen) { if (tbl >= IPFW_TABLES_MAX && tbl < IPFW_NEWTABLES_MAX) { new_del_table_entry(ch, tbl, addr); return 0; } return (EINVAL); } int new_flush_table(struct ip_fw_chain *ch, uint16_t tbl) { new_table_destroy(ch->global_tables[tbl - IPFW_TABLES_MAX]); return 0; } int flush_table(struct ip_fw_chain *ch, uint16_t tbl) { if (tbl >= IPFW_TABLES_MAX && tbl < IPFW_NEWTABLES_MAX) return new_flush_table(ch, tbl); return (EINVAL); } int lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, uint32_t *val) { if (tbl >= IPFW_TABLES_MAX && tbl < IPFW_NEWTABLES_MAX) { struct new_hash_table *h; const struct t_o *obj; h = ch->global_tables[tbl - IPFW_TABLES_MAX]; obj = new_table_extract_obj(h, (void *)&addr); if (obj == NULL) return 0; /* no match */ *val = obj->value; return 1; /* match */ } return 0; } int new_count_table_entry(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt) { *cnt = new_table_get_element(ch->global_tables[tbl - IPFW_TABLES_MAX]); return 0; } int count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt) { if (tbl >= IPFW_TABLES_MAX && tbl < IPFW_NEWTABLES_MAX) { new_count_table_entry(ch, tbl, cnt); return (0); } return (EINVAL); } int new_dump_table_entry(struct ip_fw_chain *ch, ipfw_table *tbl) { /* fill the tbl with all entryes */ ipfw_table_entry *ent; const struct t_o *obj; int i; int n_el; int nr = tbl->tbl - IPFW_TABLES_MAX; struct new_hash_table *t = ch->global_tables[nr]; i = 0; tbl->cnt = 0; /* XXX determine tbl->size */ n_el = new_table_get_element(t); obj = NULL; for (; n_el > 0; n_el--) { obj = table_next(t, obj); if (obj == NULL) break; ent = &tbl->ent[tbl->cnt]; ent->addr = obj->addr; ent->value = obj->value; ent->masklen = obj->mask; tbl->cnt++; } return 0; } int dump_table(struct ip_fw_chain *ch, ipfw_table *tbl) { if (tbl->tbl >= IPFW_TABLES_MAX && tbl->tbl < IPFW_NEWTABLES_MAX) { new_dump_table_entry(ch, tbl); return (0); } return (EINVAL); } int init_tables(struct ip_fw_chain *ch) { int i; /* Initialize new tables XXXMPD */ for (i = 0; i < IPFW_NEWTABLES_MAX - IPFW_TABLES_MAX; i++) { memset(&ch->global_tables[i], sizeof(struct new_hash_table*), 0); } return (0); } ipfw_mod/dummynet/radix.o000644 000423 000000 00000050634 11311262406 016255 0ustar00luigiwheel000000 000000 ELF @4(UE EUU-MQ MA E!Єt E@E URUMAfyNjEÍ&'U EEUUM MFEP MA E!Єt$EP MA E!Єt E@E URUMAfyE U$EEU UEMMEEEЋE)ȉEEEE}~RE)EJUЋM!Ѕt EUM8EEtEE9ErUEt ERM9Mr܃}t7}y1EU)‰UMEt EE9Er܃}E܋EÐ&U(E} t:E@ D$D$E $E}u ELUB EED$E$E}t"}t E@E}t UB;EuEEEÍt&'U,E UUH M@EEEEވUE8EvUވUE߉E}u M&EUUԉE؋M9M~EԉE؋U؉UEEEEEEEEE/MM1‹M!Єt EEEEE;ErEEÐt&UXEEE EEEȋUUMA EEE-EP MA E!Єt E@E URUMAfyNjUBtMAEEEȋEP E‰ŰEEEMM8u5EEE;ErUB t MIMEEMM1%EEm}Ѓ}UȋE)ȉEEEԋEЉE؋UUE@ueMIMZUB tMA;E|3EE6ED$UT$E$t MME@E}uUUMIMEEUBtM;EE܋@EUB MMEE9E~UUMMU܋BD$ML$E$E Eċ@Eă}tMċQM܋A9u}t%ED$EĉD$E$tUĉU&M܋IM܃}0E;EEEÐt&USEEUE fB] B EBE B MfAEMA EPB B MA MЃ[]Ív'ULEEE EE@ EEEED$E$EċEU‰UȋMċQ E‰UЋEEEUM8EEuE;ErۋEUĉU9EEȃEЃ1%EԋUȋE)E}ԃm}uEE܋UUȋM܉M؋U܋B EMA !Єt E܋@E U܋RU܋E̋MQ9wED$ẺD$E$3EċUċBEM؋A EMA !Єu UċE؉P EċM؉AMċU܉JU؋EĉPMċA EMA !Єu U܋EĉPEMĉAU܋EĉPMMEÍ&UXEEEEEء9E~E؃}uEU;U 4E}~(E¡HT$L$$M؉ME;E~)E+EE‹EEL$T$$UЉEm9Ev ẼtE̋)щM؋E;E#9E| U؉ 4E#9E}"+EEL$$M؉  D$$EȋE؋MȋQ D$T$ $tE}u} t EȉE0D$$EȋUȉU}u EQEȃ0E̋M̉ME؋D$ẺD$$ MȉL$ ED$T$Ẻ$EȋEt;D$ D$+D$$E$UȉUE؋MMEEU‰UEM9Ms E : ; I$ >   I&I : ;  : ; I8 ' I I < I!I/  : ;  : ;  : ; I' .: ; ' I@: ; I 4: ; I .? : ; ' I@4: ; I : ; I 4: ;I .? : ;' I@: ;I 4: ;I  : ; .: ;' I@ : ;I ! " #.? : ;' @$4: ; I? < bGNU C 4.2.1 20070719 [FreeBSD]radix.c/usr/ports-luigi/dummynet-branches/ipfw_mod/dummynetusigned charunsigned charshort intshort unsigned intintunsigned int__int64_t@long long intlong long unsigned intlong unsigned intdoublecharlong int__off_t5u_char2caddr_tvp+|+fpos_t/?__sbufF _baseG# _sizeH#__sFILEXf _pg# _rh# _wi# _flagsj# _filek# _bfl# _lbfsizem# _cookiepQ# _closeq## _readrC#$ _seeksc#( _writet#, _ubw#0 _extrax#8 _ury#< _ubuf|#@ _nbuf}#C _lb#D _blksize#L _offset#P # Q C Q p ) c Q  I  Q v i __sFILEX N NFILE ; rn_Key#radix_node1 rn_mklist2r# rn_parent3# rn_bit4# rn_bmask5+# rn_flags6S# rn_uE#  @ rn_OffA# rn_LB# rn_RC# :rn_leaf?rn_nodeDradix_mask2r rm_bitY# rm_unusedZ+# rm_flags[S# rm_mklist\r# rm_rmu`x# rm_refsa# ]rmu_mask^armu_leaf_walktree_f_tg   Qradix_node_head|i rnh_treetopj# rnh_addrsizek# rnh_pktsizel# rnh_addaddro@# rnh_addpktr@# rnh_deladdrt`# rnh_delpktv`# rnh_matchaddrx{# rnh_lookupz`# rnh_matchpkt|{#$ rnh_walktree~#( rnh_walktree_from#, rnh_close#0 rnh_nodes#4 : Q Q :  ` Q Q :F { Q :f  :  Q  : Q Q  Q  : Nrn_searchROQ)xuxvau|rn_search_m`,Q) Qxutvauxmau|rn_refinesXc  Q#Qmau`naudlimauhlim2aullonger͹pmasks_are_equalιtrn_lookup Q Q):xulatrn_satisfies_leafl trialpleafPskipcppudcp2puhcp3pulpplengthtrn_matchK Q):va\tu@xuDcpauHcp2auLa`saved_tdtophofflvlenpmatched_offttestuPbuTrn_bituXon1?}M3mYru\rn_newpairP vQ b nodesttuxtRrn_insert4\ Q): dupentry\  nodes va`topdhvlenltuDcpauHbuLttpon1k!7 cp2auPcmp_resuTatpuXxu\rn_addmask `#Q search skipahxuHcpauLauPbuTmlenuXju\maskduplicateddm0lisnormalpsaved_xt"r last_zeroednormal_chars.rn_lexobetter@  ?Q#?QmpAutnpAuxlimAu|Srn_new_radix_maskPr h w ttNu nextOru mQru|rn_addrouteip fQ#fQ)g: treenodesh vjaTjaXtkuDxkuHttkuL.l\topl`bmdb_leafmfkeyduplicatednPmmaskoahmprlmpppon2xxtrrn_delete \7Q netmask_argQ):tu@puDxuHttuLmrHsaved_mrLmpPdupedkeyT.Xtop\va`adbhlvlenpon1Tout+ Qmmrtrn_walktree_from`"<3 h: aQ mQ f wQerror\base`nextdxahxmlrnuXlastu\stoppingplastbtrn_walktree>0 h h;: f< w=Qerror?lbase@pnext@trnAulrn_initheado /:)m: offnrnhp:uxtqSttqRtttqQQ#rn_init0\|cppppt#rn_init2`u maxk +Nmax_keylenGrn_mkfreelistHrmask_rnheadI: rn_zerosPprn_onesPpaddmask_keyPp$__stderrp_ /usr/include/machine/usr/include/sys/usr/include/usr/include/netradix.c_types.h_types.htypes.hstdio.hradix.hg: e2/:dd#sdd,m,,,esddH,̎)d&H LR5N \P^8Tj:f dP  w;? Q?  Q?7?&@dD0  G R`q19PvDN ` q|   `0  (H")3:?Fp R <\d p0,x}`radix.crn_searchrn_search_mrn_satisfies_leafrn_onesrn_newpairrn_insertnormal_chars.3155last_zeroed.3148max_keylenmask_rnheadaddmask_keyrn_lexobetterrn_new_radix_maskrn_mkfreelistrn_walktree_fromrn_walktreern_zerosrn_refinesrn_lookuprn_addmaskrn_matchbcopybzerobcmpcalloc__stderrpfwritefreemallocrn_addroutern_deletefprintfrn_initheadrn_initexitrn_init2>"k#  0 $L g $l          %     9 I &p  '  $  (  )& *     + (  ) % "w H(ch) &"h()()We()()9Gn(.L'u,-#! 7@([`)ky+~(.1%)/2(:BJ.V1jo0jnr )cgktv z ~        L             ( , 0 9 H     ( 8 < w {     ko;?C$#QUYHLPNRVm/H04HL`dx| $(<@TXlpipfw_mod/dummynet/hashtable.c000644 000423 000000 00000010116 11310417261 017054 0ustar00luigiwheel000000 000000 /* * XXX Copyright */ #include #include #include #include "hashtable.h" // XXX fix path later struct new_obj { struct new_obj *next; /* Next object in the list */ char obj[0]; /* actually bigger */ }; /* Hash table */ struct ipfw_ht { int table_size; /* Size of the table (buckets) */ int table_obj; /* number of object in the table */ int obj_size; /* size of object (key + value) */ /* Hash function for this table */ uint32_t (*hash)(const void *key, uint32_t size); int (*cmp)(const void *obj1, const void *obj2, int sz); int hash_arg; /* hash function parameter */ struct malloc_type *mtype; struct new_obj **table_ptr; /* Pointer to the table */ }; /* * initialize an hash table * - size: size of table (number of buckets) * - obj_size: size of the object to store in the table (key + value) * - hf: pointer to the hash function for this table * - compare: function to compare two objects * * Return value: pointer to the hash table, NULL if error occurs */ struct ipfw_ht * ipfw_ht_new(int size, int obj_size, uint32_t (hf)(const void *, uint32_t size), int (compare)(const void *, const void *, int), struct malloc_type *mtype) { struct ipfw_ht *h; h = malloc(sizeof(*h), mtype, M_NOWAIT | M_ZERO); if (h == NULL) return NULL; h->table_ptr = malloc(size * sizeof(struct new_obj*), mtype, M_NOWAIT | M_ZERO); if (h->table_ptr == NULL) { /* no memory */ free (h, mtype); return 0; } h->table_size = size; h->hash = hf; h->cmp = compare; h->mtype = mtype; h->obj_size = obj_size; return h; } int ipfw_ht_insert(struct ipfw_ht *h, const void *obj) { int i; /* array index */ struct new_obj *o, *ot; i = h->hash(obj, h->table_size); /* same key not allowed */ for (ot = h->table_ptr[i]; ot; ot = ot->next) { if (h->cmp(obj, ot->obj, h->obj_size) == 0) return 1; /* error */ } /* allocate a single chunk of memory */ o = malloc(sizeof(*o) + h->obj_size, h->mtype, M_NOWAIT); if (o == NULL) return 1; bcopy(obj, o->obj, h->obj_size); /* put at the head */ o->next = h->table_ptr[i]; h->table_ptr[i] = o; h->table_obj++; return 0; } int ipfw_ht_remove(struct ipfw_ht *h, const void *obj) { int i; struct new_obj *obj1, *prev; i = h->hash(obj, h->table_size); for (prev = NULL, obj1 = h->table_ptr[i]; obj1; obj1 = obj1->next) { if (h->cmp(obj, obj1->obj, h->obj_size) != 0) continue; /* Object found, delete */ if (prev != NULL) prev->next = obj1->next; else h->table_ptr[i] = obj1->next; free(obj1, h->mtype); h->table_obj--; return 0; } return 1; /* Not found */ } const void * ipfw_ht_extract(struct ipfw_ht *h, const void *obj) { struct new_obj *o; int i; if (h == NULL || h->table_obj == 0) return NULL; i = h->hash(obj, h->table_size); for (o = h->table_ptr[i]; o; o = o->next) { if (h->cmp(o->obj, obj, h->obj_size) == 0) return o->obj; } return NULL; } void * ipfw_ht_destroy(struct ipfw_ht *h) { int i; struct new_obj *cur, *next; if (!h || !h->table_ptr) return NULL; for (i = 0; i < h->table_size; i++) { for (cur = h->table_ptr[i]; cur; cur = next) { next = cur->next; free(cur, h->mtype); } } free (h->table_ptr, h->mtype); free (h, h->mtype); return NULL; } /* returns the number of elements in the table */ int ipfw_ht_count(const struct ipfw_ht *h) { return h ? h->table_obj : 0; } const void * table_next(struct ipfw_ht *h, const void *o) { int i; struct new_obj *obj; if (h == NULL || h->table_obj == 0) return NULL; if (o == NULL) { for (i = 0; i < h->table_size; i++) if (h->table_ptr[i]) return h->table_ptr[i]->obj; return NULL; /* XXX should not happen */ } /* here we can optimize if we can map o to the bucket, * otherwise locate o and find the next one. */ i = h->hash(o, h->table_size); for (obj = h->table_ptr[i]; obj; obj = obj->next) { if (h->cmp(obj->obj, o, h->obj_size) == 0) break; } if (obj && obj->next != NULL) return obj->next->obj; /* take the first of the next bucket */ for (i++; i < h->table_size; i++) { if (h->table_ptr[i]) return h->table_ptr[i]->obj; } return NULL; } ipfw_mod/dummynet/test_radix000755 000423 000000 00000065652 11311262406 017070 0ustar00luigiwheel000000 000000 ELF Є40[4 (# 444(((0X(   /libexec/ld-elf.so.1FreeBSD     (52J4cBعR;!toC.]Gܹ${m+ _Jv_RegisterClasseslibc.so.7bcmp__stderrpmallocbzerocallocenvironfprintf__prognamebcopy_init_tlsatexitfwritefree_endFBSD_1.0(z     ! 5%%h%h%h%h%h %h(%h0%h8p%h@`%hHP%hP@UVS]эt ۉ5ع~6Et/t# t/u񣄸u츠t4 $$wEt$D$$'$͐U=t ҡuÐUxtt $xÐUS$E EEXEPEH ED$T$ $ӉEED$D$s$}t E$x$[]Ðt&U(EEE@@D$D$$Í'L$qUVSQ,E$@*ED$ $ED$D$E}UEB@E@8EP8EP0E@N^n~$FreeBSD: src/lib/csu/common/crtbrand.c,v 1.4.20.1 2007/12/06 13:43:43 kib Exp $$FreeBSD: src/lib/csu/i386-elf/crt1.c,v 1.15 2005/10/07 22:13:17 bde Exp $GCC: (GNU) 4.2.1 20070719 [FreeBSD]GCC: (GNU) 4.2.1 20070719 [FreeBSD]GCC: (GNU) 4.2.1 20070719 [FreeBSD]GCC: (GNU) 4.2.1 20070719 [FreeBSD]GCC: (GNU) 4.2.1 20070719 [FreeBSD]ЅI puI z mainI frn_refinesc rn_lookupl rn_matchb rn_addmaskwrn_addroutern_deletern_inithead@rn_init|rn_init2E GNU C 4.2.1 20070719 [FreeBSD]test_radix.c/usr/ports-luigi/dummynet-branches/ipfw_mod/dummynetЅjsigned char__uint8_t4unsigned charshort intshort unsigned intint__uint32_t8unsigned int__int64_t@ long long intlong long unsigned intlong unsigned intdoublecharlong int__off_t5u_char2uint8_tTuint32_t^caddr_tvSSfpos_t/g__sbufF _baseG# _sizeH#__sFILEXfZ _pg# _rh# _wi# _flagsj# _filek# _bfl# _lbfsizem# _cookiepy# _closeqj# _readr#$ _seeks#( _writet#, _ubw#0 _extrax#8 _ury#< _ubuf|#@ _nbuf}#C _lb#D _blksize#L _offset#P j yZ  y  p  y    y   __sFILEX v vFILE  ;O rn_Key<# rn_Mask=# rn_Dupedkey>#radix_node1 rn_mklist2# rn_parent3# rn_bit4# rn_bmask5S# rn_flags6{# rn_uE# O @ rn_OffA# rn_LB# rn_RC# :6rn_leaf? rn_nodeDradix_mask2 rm_bitY# rm_unusedZS# rm_flags[{# rm_mklist\# rm_rmu`# rm_refsa# 6]rmu_mask^rmu_leaf_walktree_f_tg   yradix_node_head|ib rnh_treetopj# rnh_addrsizek# rnh_pktsizel# rnh_addaddro# rnh_addpktr# rnh_deladdrt# rnh_delpktv# rnh_matchaddrx# rnh_lookupz# rnh_matchpkt|#$ rnh_walktree~#( rnh_walktree_from#, rnh_close)#0 rnh_nodes/#4  y y  b  y y   y     y   y y  y)   O?vd g len g# data# wvtable_entryD rn# x?#0 mask?#8 value#@ OvdelЅ;( rnargyrnh( lent- pwlist'@s,z rn&arg&yent(- tmain0TX argc/argv/ h1\rn2`i3dp4- h__bswap32`j/ _xch__XP__stderrpB b<GNU C 4.2.1 20070719 [FreeBSD]radix.c/usr/ports-luigi/dummynet-branches/ipfw_mod/dummynetpsigned charunsigned charshort intshort unsigned intintunsigned int__int64_t@long long intlong long unsigned intlong unsigned intdoublecharlong int__off_t5u_char2caddr_tvp+|+fpos_t/?__sbufF _baseG# _sizeH#__sFILEXf _pg# _rh# _wi# _flagsj# _filek# _bfl# _lbfsizem# _cookiepQ# _closeq## _readrC#$ _seeksc#( _writet#, _ubw#0 _extrax#8 _ury#< _ubuf|#@ _nbuf}#C _lb#D _blksize#L _offset#P # Q C Q p ) c Q  I  Q v i __sFILEX N NFILE ; rn_Key#radix_node1 rn_mklist2r# rn_parent3# rn_bit4# rn_bmask5+# rn_flags6S# rn_uE#  @ rn_OffA# rn_LB# rn_RC# :rn_leaf?rn_nodeDradix_mask2r rm_bitY# rm_unusedZ+# rm_flags[S# rm_mklist\r# rm_rmu`x# rm_refsa# ]rmu_mask^armu_leaf_walktree_f_tg   Qradix_node_head|i rnh_treetopj# rnh_addrsizek# rnh_pktsizel# rnh_addaddro@# rnh_addpktr@# rnh_deladdrt`# rnh_delpktv`# rnh_matchaddrx{# rnh_lookupz`# rnh_matchpkt|{#$ rnh_walktree~#( rnh_walktree_from#, rnh_close#0 rnh_nodes#4 : Q Q :  ` Q Q :F { Q :f  :  Q  : Q Q  Q  : Nrn_searchpˆOQ)xuxvau|rn_search_mЈAQ) Qxutvauxmau|rn_refinesPxc  Q#Qmau`naudlimauhlim2aullonger͹pmasks_are_equalιtrn_lookupK Q Q):xulatrn_satisfies_leaf wl trialpleafPskipcppudcp2puhcp3pulpplengthtrn_match Q):va\tu@xuDcpauHcp2auLa`saved_tdtophofflvlenpmatched_offttestuPbuTrn_bituXon1?mYru\rn_newpair6 vQ b nodesttuxtRrn_insert@\ Q): dupentry\  nodes va`topdhvlenltuDcpauHbuLttpon1ۏ!$7 cp2auPcmp_resuTat$puXxu\rn_addmask h'#Q search skipahxuHcpauLauPbuTmlenuXju\maskduplicateddm0lisnormalpsaved_xt"(last_zeroednormal_chars.rn_lexobetter@pS ?Q#?QmpAutnpAuxlimAu|Srn_new_radix_maskPrؕw ttNu nextOru mQru|rn_addrouteifQ#fQ)g: treenodesh vjaTjaXtkuDxkuHttkuL.l\topl`bmdb_leafmfkeyduplicatednPmmaskoahmprlmpppon2%xxtrrn_delete̠7Q netmask_argQ):tu@puDxuHttuLmrHsaved_mrLmpPdupedkeyT.Xtop\va`adbhlvlenpon1T:out}mmrtrn_walktree_fromР3 h: aQ mQ f wQerror\base`nextdxahxmlrnuXlastu\stoppingplastbtrn_walktree>/ h;: f< w=Qerror?lbase@pnext@trnAulrn_initheado[:)m: offnrnhp:uxtqSttqRtttqQQ#rn_init̥|cppppt#rn_init2Х maxk +Nmax_keylenGrn_mkfreelistHrĹmask_rnheadI:ȹrn_zerosPp̹rn_onesPpйaddmask_keyPpԹ$__stderrp_% $ > : ; I$ >   I&I : ;  : ; I8 ' I I < I!I/  : ;  : ;  : ; I' .: ; ' I@: ; I 4: ; I .? : ; ' I@: ; I 4: ; I? < % $ > : ; I$ >   I&I : ;  : ; I8 ' I I < I!I/  : ;  : ;  : ; I' .: ; ' I@: ; I 4: ; I .? : ; ' I@4: ; I : ; I 4: ;I .? : ;' I@: ;I 4: ;I  : ; .: ;' I@ : ;I ! " #.? : ;' @$4: ; I? <  /usr/include/machine/usr/include/sys/usr/include/usr/include/nettest_radix.cendian.h_types.h_types.htypes.hstdio.hradix.hЅrf%dVdeV,v,d:,,w$ %%%Vc;V /usr/include/machine/usr/include/sys/usr/include/usr/include/netradix.c_types.h_types.htypes.hstdio.hradix.hpg: e2/:dd#sdd,m,,,esddH,̎)d&Hcrtstuff.c__CTOR_LIST____DTOR_LIST____JCR_LIST____do_global_dtors_auxcompleted.4698p.4696frame_dummy__CTOR_END____DTOR_END____FRAME_END____JCR_END____do_global_ctors_aux/usr/src/lib/csu/i386-elf/crtn.Stest_radix.cdellist__bswap32radix.crn_searchrn_search_mrn_satisfies_leafrn_onesrn_newpairrn_insertnormal_chars.3155last_zeroed.3148max_keylenmask_rnheadaddmask_keyrn_lexobetterrn_new_radix_maskrn_mkfreelistrn_walktree_fromrn_walktreern_zerosbcmp@@FBSD_1.0bzero@@FBSD_1.0fprintf@@FBSD_1.0rn_delete_DYNAMICrn_match__dso_handle_init_tls@@FBSD_1.0_initenviron__progname_startrn_addmask__bss_startmaincalloc@@FBSD_1.0_finifwrite@@FBSD_1.0rn_refinesexit@@FBSD_1.0malloc@@FBSD_1.0bcopy@@FBSD_1.0_edata_GLOBAL_OFFSET_TABLE__end__stderrp@@FBSD_1.0free@@FBSD_1.0rn_initrn_lookupatexit@@FBSD_1.0rn_init2rn_addroute_Jv_RegisterClassesrn_initheadipfw_mod/dummynet/include/net/000755 000423 000000 00000000000 11311370776 017201 5ustar00luigiwheel000000 000000 ipfw_mod/dummynet/include/netinet/000755 000423 000000 00000000000 11310231725 020046 5ustar00luigiwheel000000 000000 ipfw_mod/dummynet/include/netgraph/000755 000423 000000 00000000000 11307662711 020221 5ustar00luigiwheel000000 000000 ipfw_mod/dummynet/include/sys/000755 000423 000000 00000000000 11310131063 017210 5ustar00luigiwheel000000 000000 ipfw_mod/dummynet/include/sys/kernel.h000644 000423 000000 00000001044 11152004450 020643 0ustar00luigiwheel000000 000000 /* * from freebsd's kernel.h */ #ifndef _SYS_KERNEL_H_ #define _SYS_KERNEL_H_ #define SYSINIT(a, b, c, d, e) \ void *dummy_ ## d = d /* * Some enumerated orders; "ANY" sorts last. */ enum sysinit_elem_order { SI_ORDER_FIRST = 0x0000000, /* first*/ SI_ORDER_SECOND = 0x0000001, /* second*/ SI_ORDER_THIRD = 0x0000002, /* third*/ SI_ORDER_MIDDLE = 0x1000000, /* somewhere in the middle */ SI_ORDER_ANY = 0xfffffff /* last*/ }; #endif ipfw_mod/dummynet/include/sys/mbuf.h000644 000423 000000 00000013343 11310131063 020316 0ustar00luigiwheel000000 000000 /* * Copyright (C) 2009 Luigi Rizzo, Universita` di Pisa * * BSD copyright. * * A simple compatibility interface to map mbufs onto sk_buff */ #ifndef _SYS_MBUF_H_ #define _SYS_MBUF_H_ #include /* we use free() */ /* hopefully queue.h is already included by someone else */ #include #ifdef _KERNEL /* bzero not present on linux, but this should go in glue.h */ // #define bzero(s, n) memset(s, 0, n) /* * We implement a very simplified UMA allocator where the backend * is simply malloc, and uma_zone only stores the length of the components. */ typedef int uma_zone_t; /* the zone size */ #define uma_zcreate(name, len, _3, _4, _5, _6, _7, _8) (len) #define uma_zfree(zone, item) free(item, M_IPFW) #define uma_zalloc(zone, flags) malloc(zone, M_IPFW, flags) #define uma_zdestroy(zone) do {} while (0) /*- * Macros for type conversion: * mtod(m, t) -- Convert mbuf pointer to data pointer of correct type. */ #define mtod(m, t) ((t)((m)->m_data)) #endif /* _KERNEL */ /* * Packet tag structure (see below for details). */ struct m_tag { SLIST_ENTRY(m_tag) m_tag_link; /* List of packet tags */ u_int16_t m_tag_id; /* Tag ID */ u_int16_t m_tag_len; /* Length of data */ u_int32_t m_tag_cookie; /* ABI/Module ID */ void (*m_tag_free)(struct m_tag *); }; #if defined(__linux__) || defined( _WIN32 ) /* * Auxiliary structure to store values from the sk_buf. * Note that we should not alter the sk_buff, and if we do * so make sure to keep the values in sync between the mbuf * and the sk_buff (especially m_len and m_pkthdr.len). */ struct mbuf { struct mbuf *m_next; struct mbuf *m_nextpkt; void *m_data; int m_len; /* length in this mbuf */ int m_flags; #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) struct nf_info *queue_entry; #else struct nf_queue_entry *queue_entry; #endif struct sk_buff *m_skb; struct { struct net_device *rcvif; int len; /* total packet len */ SLIST_HEAD (packet_tags, m_tag) tags; } m_pkthdr; }; #define M_SKIP_FIREWALL 0x01 /* skip firewall processing */ #define M_BCAST 0x02 /* send/received as link-level broadcast */ #define M_MCAST 0x04 /* send/received as link-level multicast */ #define M_DONTWAIT M_NOWAIT /* should not be here... */ /* * m_dup() is used in the TEE case, currently unsupported so we * just return. */ static __inline struct mbuf *m_dup(struct mbuf __unused *m, int __unused n) { return NULL; }; #define MTAG_ABI_COMPAT 0 /* compatibility ABI */ static __inline struct m_tag * m_tag_find(struct mbuf __unused *m, int __unused type, struct m_tag __unused *start) { return NULL; }; static __inline void m_tag_prepend(struct mbuf *m, struct m_tag *t) { SLIST_INSERT_HEAD(&m->m_pkthdr.tags, t, m_tag_link); } /* * Create an mtag of the given type */ static __inline struct m_tag * m_tag_get(int type, int length, int wait) { int l = length + sizeof(struct m_tag); struct m_tag *m = malloc(l, 0, M_NOWAIT); if (m) { memset(m, 0, l); m->m_tag_id = type; m->m_tag_len = length; } return m; }; static __inline struct m_tag * m_tag_first(struct mbuf *m) { return SLIST_FIRST(&m->m_pkthdr.tags); }; static __inline void m_tag_delete(struct mbuf *m, struct m_tag *t) { }; static __inline struct m_tag * m_tag_locate(struct mbuf *m, u_int32_t n, int x, struct m_tag *t) { return NULL; }; static __inline void m_freem(struct mbuf *m) { struct m_tag *t; /* free the m_tag chain */ while ( (t = SLIST_FIRST(&m->m_pkthdr.tags) ) ) { SLIST_REMOVE_HEAD(&m->m_pkthdr.tags, m_tag_link); free(t, 0); } /* free the mbuf */ free(m, M_IPFW); }; /* we cannot pullup */ #define m_pullup(__m, __i) (m) #define M_GETFIB(_m) 0 #endif /* !__linux__ */ /* * Persistent tags stay with an mbuf until the mbuf is reclaimed. Otherwise * tags are expected to ``vanish'' when they pass through a network * interface. For most interfaces this happens normally as the tags are * reclaimed when the mbuf is free'd. However in some special cases * reclaiming must be done manually. An example is packets that pass through * the loopback interface. Also, one must be careful to do this when * ``turning around'' packets (e.g., icmp_reflect). * * To mark a tag persistent bit-or this flag in when defining the tag id. * The tag will then be treated as described above. */ #define MTAG_PERSISTENT 0x800 #define PACKET_TAG_NONE 0 /* Nadda */ /* Packet tags for use with PACKET_ABI_COMPAT. */ #define PACKET_TAG_IPSEC_IN_DONE 1 /* IPsec applied, in */ #define PACKET_TAG_IPSEC_OUT_DONE 2 /* IPsec applied, out */ #define PACKET_TAG_IPSEC_IN_CRYPTO_DONE 3 /* NIC IPsec crypto done */ #define PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED 4 /* NIC IPsec crypto req'ed */ #define PACKET_TAG_IPSEC_IN_COULD_DO_CRYPTO 5 /* NIC notifies IPsec */ #define PACKET_TAG_IPSEC_PENDING_TDB 6 /* Reminder to do IPsec */ #define PACKET_TAG_BRIDGE 7 /* Bridge processing done */ #define PACKET_TAG_GIF 8 /* GIF processing done */ #define PACKET_TAG_GRE 9 /* GRE processing done */ #define PACKET_TAG_IN_PACKET_CHECKSUM 10 /* NIC checksumming done */ #define PACKET_TAG_ENCAP 11 /* Encap. processing */ #define PACKET_TAG_IPSEC_SOCKET 12 /* IPSEC socket ref */ #define PACKET_TAG_IPSEC_HISTORY 13 /* IPSEC history */ #define PACKET_TAG_IPV6_INPUT 14 /* IPV6 input processing */ #define PACKET_TAG_DUMMYNET 15 /* dummynet info */ #define PACKET_TAG_DIVERT 17 /* divert info */ #define PACKET_TAG_IPFORWARD 18 /* ipforward info */ #define PACKET_TAG_MACLABEL (19 | MTAG_PERSISTENT) /* MAC label */ #define PACKET_TAG_PF 21 /* PF + ALTQ information */ #define PACKET_TAG_RTSOCKFAM 25 /* rtsock sa family */ #define PACKET_TAG_IPOPTIONS 27 /* Saved IP options */ #define PACKET_TAG_CARP 28 /* CARP info */ #endif /* !_SYS_MBUF_H_ */ ipfw_mod/dummynet/include/sys/syslog.h000644 000423 000000 00000000252 11151122421 020701 0ustar00luigiwheel000000 000000 #ifndef _SYS_SYSLOG_H_ #define _SYS_SYSLOG_H_ /* XXX find linux equivalent */ #define LOG_SECURITY 0 #define LOG_NOTICE 0 #define LOG_DEBUG 0 #endif /* _SYS_SYSLOG_H_ */ ipfw_mod/dummynet/include/sys/param.h000644 000423 000000 00000000233 11151253341 020466 0ustar00luigiwheel000000 000000 #ifndef _SYS_PARAM_H_ #define _SYS_PARAM_H_ /* * number of additional groups */ #ifndef LINUX_24 #define NGROUPS 16 #endif #endif /* _SYS_PARAM_H_ */ ipfw_mod/dummynet/include/sys/module.h000644 000423 000000 00000002062 11157432360 020663 0ustar00luigiwheel000000 000000 /* * trivial module support */ #ifndef _SYS_MODULE_H_ #define _SYS_MODULE_H_ typedef struct module *module_t; typedef int (*modeventhand_t)(module_t, int /* modeventtype_t */, void *); typedef enum modeventtype { MOD_LOAD, MOD_UNLOAD, MOD_SHUTDOWN, MOD_QUIESCE } modeventtype_t; typedef struct moduledata { const char *name; /* module name */ modeventhand_t evhand; /* event handler */ void *priv; /* extra data */ } moduledata_t; int my_mod_register(struct moduledata *mod, const char *name, int order); /* * Hook the module descriptor, md, into our list of things to do. * We should in principle respect the order of loading. * * XXX use the gcc .init functions */ #define DECLARE_MODULE(a, md, c,d) \ moduledata_t *moddesc_##a = &md; /* * XXX MODULE_VERSION is define in linux too */ #define MODULE_DEPEND(a,b,c,d,e) #if defined( __linux__ ) || defined( _WIN32 ) #undef MODULE_VERSION #define MODULE_VERSION(a,b) #endif #endif /* _SYS_MODULE_H_ */ ipfw_mod/dummynet/include/sys/queue.h000644 000423 000000 00000047251 11151122421 020517 0ustar00luigiwheel000000 000000 /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)queue.h 8.5 (Berkeley) 8/20/94 * $FreeBSD: src/sys/sys/queue.h,v 1.68 2006/10/24 11:20:29 ru Exp $ */ #ifndef _SYS_QUEUE_H_ #define _SYS_QUEUE_H_ //#include /* * This file defines four types of data structures: singly-linked lists, * singly-linked tail queues, lists and tail queues. * * A singly-linked list is headed by a single forward pointer. The elements * are singly linked for minimum space and pointer manipulation overhead at * the expense of O(n) removal for arbitrary elements. New elements can be * added to the list after an existing element or at the head of the list. * Elements being removed from the head of the list should use the explicit * macro for this purpose for optimum efficiency. A singly-linked list may * only be traversed in the forward direction. Singly-linked lists are ideal * for applications with large datasets and few or no removals or for * implementing a LIFO queue. * * A singly-linked tail queue is headed by a pair of pointers, one to the * head of the list and the other to the tail of the list. The elements are * singly linked for minimum space and pointer manipulation overhead at the * expense of O(n) removal for arbitrary elements. New elements can be added * to the list after an existing element, at the head of the list, or at the * end of the list. Elements being removed from the head of the tail queue * should use the explicit macro for this purpose for optimum efficiency. * A singly-linked tail queue may only be traversed in the forward direction. * Singly-linked tail queues are ideal for applications with large datasets * and few or no removals or for implementing a FIFO queue. * * A list is headed by a single forward pointer (or an array of forward * pointers for a hash table header). The elements are doubly linked * so that an arbitrary element can be removed without a need to * traverse the list. New elements can be added to the list before * or after an existing element or at the head of the list. A list * may only be traversed in the forward direction. * * A tail queue is headed by a pair of pointers, one to the head of the * list and the other to the tail of the list. The elements are doubly * linked so that an arbitrary element can be removed without a need to * traverse the list. New elements can be added to the list before or * after an existing element, at the head of the list, or at the end of * the list. A tail queue may be traversed in either direction. * * For details on the use of these macros, see the queue(3) manual page. * * * SLIST LIST STAILQ TAILQ * _HEAD + + + + * _HEAD_INITIALIZER + + + + * _ENTRY + + + + * _INIT + + + + * _EMPTY + + + + * _FIRST + + + + * _NEXT + + + + * _PREV - - - + * _LAST - - + + * _FOREACH + + + + * _FOREACH_SAFE + + + + * _FOREACH_REVERSE - - - + * _FOREACH_REVERSE_SAFE - - - + * _INSERT_HEAD + + + + * _INSERT_BEFORE - + - + * _INSERT_AFTER + + + + * _INSERT_TAIL - - + + * _CONCAT - - + + * _REMOVE_HEAD + - + - * _REMOVE + + + + * */ #ifdef QUEUE_MACRO_DEBUG /* Store the last 2 places the queue element or head was altered */ struct qm_trace { char * lastfile; int lastline; char * prevfile; int prevline; }; #define TRACEBUF struct qm_trace trace; #define TRASHIT(x) do {(x) = (void *)-1;} while (0) #define QMD_TRACE_HEAD(head) do { \ (head)->trace.prevline = (head)->trace.lastline; \ (head)->trace.prevfile = (head)->trace.lastfile; \ (head)->trace.lastline = __LINE__; \ (head)->trace.lastfile = __FILE__; \ } while (0) #define QMD_TRACE_ELEM(elem) do { \ (elem)->trace.prevline = (elem)->trace.lastline; \ (elem)->trace.prevfile = (elem)->trace.lastfile; \ (elem)->trace.lastline = __LINE__; \ (elem)->trace.lastfile = __FILE__; \ } while (0) #else #define QMD_TRACE_ELEM(elem) #define QMD_TRACE_HEAD(head) #define TRACEBUF #define TRASHIT(x) #endif /* QUEUE_MACRO_DEBUG */ /* * Singly-linked List declarations. */ #define SLIST_HEAD(name, type) \ struct name { \ struct type *slh_first; /* first element */ \ } #define SLIST_HEAD_INITIALIZER(head) \ { NULL } #define SLIST_ENTRY(type) \ struct { \ struct type *sle_next; /* next element */ \ } /* * Singly-linked List functions. */ #define SLIST_EMPTY(head) ((head)->slh_first == NULL) #define SLIST_FIRST(head) ((head)->slh_first) #define SLIST_FOREACH(var, head, field) \ for ((var) = SLIST_FIRST((head)); \ (var); \ (var) = SLIST_NEXT((var), field)) #define SLIST_FOREACH_SAFE(var, head, field, tvar) \ for ((var) = SLIST_FIRST((head)); \ (var) && ((tvar) = SLIST_NEXT((var), field), 1); \ (var) = (tvar)) #define SLIST_FOREACH_PREVPTR(var, varp, head, field) \ for ((varp) = &SLIST_FIRST((head)); \ ((var) = *(varp)) != NULL; \ (varp) = &SLIST_NEXT((var), field)) #define SLIST_INIT(head) do { \ SLIST_FIRST((head)) = NULL; \ } while (0) #define SLIST_INSERT_AFTER(slistelm, elm, field) do { \ SLIST_NEXT((elm), field) = SLIST_NEXT((slistelm), field); \ SLIST_NEXT((slistelm), field) = (elm); \ } while (0) #define SLIST_INSERT_HEAD(head, elm, field) do { \ SLIST_NEXT((elm), field) = SLIST_FIRST((head)); \ SLIST_FIRST((head)) = (elm); \ } while (0) #define SLIST_NEXT(elm, field) ((elm)->field.sle_next) #define SLIST_REMOVE(head, elm, type, field) do { \ if (SLIST_FIRST((head)) == (elm)) { \ SLIST_REMOVE_HEAD((head), field); \ } \ else { \ struct type *curelm = SLIST_FIRST((head)); \ while (SLIST_NEXT(curelm, field) != (elm)) \ curelm = SLIST_NEXT(curelm, field); \ SLIST_NEXT(curelm, field) = \ SLIST_NEXT(SLIST_NEXT(curelm, field), field); \ } \ TRASHIT((elm)->field.sle_next); \ } while (0) #define SLIST_REMOVE_HEAD(head, field) do { \ SLIST_FIRST((head)) = SLIST_NEXT(SLIST_FIRST((head)), field); \ } while (0) /* * Singly-linked Tail queue declarations. */ #define STAILQ_HEAD(name, type) \ struct name { \ struct type *stqh_first;/* first element */ \ struct type **stqh_last;/* addr of last next element */ \ } #define STAILQ_HEAD_INITIALIZER(head) \ { NULL, &(head).stqh_first } #define STAILQ_ENTRY(type) \ struct { \ struct type *stqe_next; /* next element */ \ } /* * Singly-linked Tail queue functions. */ #define STAILQ_CONCAT(head1, head2) do { \ if (!STAILQ_EMPTY((head2))) { \ *(head1)->stqh_last = (head2)->stqh_first; \ (head1)->stqh_last = (head2)->stqh_last; \ STAILQ_INIT((head2)); \ } \ } while (0) #define STAILQ_EMPTY(head) ((head)->stqh_first == NULL) #define STAILQ_FIRST(head) ((head)->stqh_first) #define STAILQ_FOREACH(var, head, field) \ for((var) = STAILQ_FIRST((head)); \ (var); \ (var) = STAILQ_NEXT((var), field)) #define STAILQ_FOREACH_SAFE(var, head, field, tvar) \ for ((var) = STAILQ_FIRST((head)); \ (var) && ((tvar) = STAILQ_NEXT((var), field), 1); \ (var) = (tvar)) #define STAILQ_INIT(head) do { \ STAILQ_FIRST((head)) = NULL; \ (head)->stqh_last = &STAILQ_FIRST((head)); \ } while (0) #define STAILQ_INSERT_AFTER(head, tqelm, elm, field) do { \ if ((STAILQ_NEXT((elm), field) = STAILQ_NEXT((tqelm), field)) == NULL)\ (head)->stqh_last = &STAILQ_NEXT((elm), field); \ STAILQ_NEXT((tqelm), field) = (elm); \ } while (0) #define STAILQ_INSERT_HEAD(head, elm, field) do { \ if ((STAILQ_NEXT((elm), field) = STAILQ_FIRST((head))) == NULL) \ (head)->stqh_last = &STAILQ_NEXT((elm), field); \ STAILQ_FIRST((head)) = (elm); \ } while (0) #define STAILQ_INSERT_TAIL(head, elm, field) do { \ STAILQ_NEXT((elm), field) = NULL; \ *(head)->stqh_last = (elm); \ (head)->stqh_last = &STAILQ_NEXT((elm), field); \ } while (0) #define STAILQ_LAST(head, type, field) \ (STAILQ_EMPTY((head)) ? \ NULL : \ ((struct type *)(void *) \ ((char *)((head)->stqh_last) - __offsetof(struct type, field)))) #define STAILQ_NEXT(elm, field) ((elm)->field.stqe_next) #define STAILQ_REMOVE(head, elm, type, field) do { \ if (STAILQ_FIRST((head)) == (elm)) { \ STAILQ_REMOVE_HEAD((head), field); \ } \ else { \ struct type *curelm = STAILQ_FIRST((head)); \ while (STAILQ_NEXT(curelm, field) != (elm)) \ curelm = STAILQ_NEXT(curelm, field); \ if ((STAILQ_NEXT(curelm, field) = \ STAILQ_NEXT(STAILQ_NEXT(curelm, field), field)) == NULL)\ (head)->stqh_last = &STAILQ_NEXT((curelm), field);\ } \ TRASHIT((elm)->field.stqe_next); \ } while (0) #define STAILQ_REMOVE_HEAD(head, field) do { \ if ((STAILQ_FIRST((head)) = \ STAILQ_NEXT(STAILQ_FIRST((head)), field)) == NULL) \ (head)->stqh_last = &STAILQ_FIRST((head)); \ } while (0) #ifndef LIST_HEAD /* * List declarations. */ #define LIST_HEAD(name, type) \ struct name { \ struct type *lh_first; /* first element */ \ } #define LIST_HEAD_INITIALIZER(head) \ { NULL } #define LIST_ENTRY(type) \ struct { \ struct type *le_next; /* next element */ \ struct type **le_prev; /* address of previous next element */ \ } /* * List functions. */ #if (defined(_KERNEL) && defined(INVARIANTS)) #define QMD_LIST_CHECK_HEAD(head, field) do { \ if (LIST_FIRST((head)) != NULL && \ LIST_FIRST((head))->field.le_prev != \ &LIST_FIRST((head))) \ panic("Bad list head %p first->prev != head", (head)); \ } while (0) #define QMD_LIST_CHECK_NEXT(elm, field) do { \ if (LIST_NEXT((elm), field) != NULL && \ LIST_NEXT((elm), field)->field.le_prev != \ &((elm)->field.le_next)) \ panic("Bad link elm %p next->prev != elm", (elm)); \ } while (0) #define QMD_LIST_CHECK_PREV(elm, field) do { \ if (*(elm)->field.le_prev != (elm)) \ panic("Bad link elm %p prev->next != elm", (elm)); \ } while (0) #else #define QMD_LIST_CHECK_HEAD(head, field) #define QMD_LIST_CHECK_NEXT(elm, field) #define QMD_LIST_CHECK_PREV(elm, field) #endif /* (_KERNEL && INVARIANTS) */ #define LIST_EMPTY(head) ((head)->lh_first == NULL) #define LIST_FIRST(head) ((head)->lh_first) #define LIST_FOREACH(var, head, field) \ for ((var) = LIST_FIRST((head)); \ (var); \ (var) = LIST_NEXT((var), field)) #define LIST_FOREACH_SAFE(var, head, field, tvar) \ for ((var) = LIST_FIRST((head)); \ (var) && ((tvar) = LIST_NEXT((var), field), 1); \ (var) = (tvar)) #define LIST_INIT(head) do { \ LIST_FIRST((head)) = NULL; \ } while (0) #define LIST_INSERT_AFTER(listelm, elm, field) do { \ QMD_LIST_CHECK_NEXT(listelm, field); \ if ((LIST_NEXT((elm), field) = LIST_NEXT((listelm), field)) != NULL)\ LIST_NEXT((listelm), field)->field.le_prev = \ &LIST_NEXT((elm), field); \ LIST_NEXT((listelm), field) = (elm); \ (elm)->field.le_prev = &LIST_NEXT((listelm), field); \ } while (0) #define LIST_INSERT_BEFORE(listelm, elm, field) do { \ QMD_LIST_CHECK_PREV(listelm, field); \ (elm)->field.le_prev = (listelm)->field.le_prev; \ LIST_NEXT((elm), field) = (listelm); \ *(listelm)->field.le_prev = (elm); \ (listelm)->field.le_prev = &LIST_NEXT((elm), field); \ } while (0) #define LIST_INSERT_HEAD(head, elm, field) do { \ QMD_LIST_CHECK_HEAD((head), field); \ if ((LIST_NEXT((elm), field) = LIST_FIRST((head))) != NULL) \ LIST_FIRST((head))->field.le_prev = &LIST_NEXT((elm), field);\ LIST_FIRST((head)) = (elm); \ (elm)->field.le_prev = &LIST_FIRST((head)); \ } while (0) #define LIST_NEXT(elm, field) ((elm)->field.le_next) #define LIST_REMOVE(elm, field) do { \ QMD_LIST_CHECK_NEXT(elm, field); \ QMD_LIST_CHECK_PREV(elm, field); \ if (LIST_NEXT((elm), field) != NULL) \ LIST_NEXT((elm), field)->field.le_prev = \ (elm)->field.le_prev; \ *(elm)->field.le_prev = LIST_NEXT((elm), field); \ TRASHIT((elm)->field.le_next); \ TRASHIT((elm)->field.le_prev); \ } while (0) #endif /* LIST_HEAD */ /* * Tail queue declarations. */ #define TAILQ_HEAD(name, type) \ struct name { \ struct type *tqh_first; /* first element */ \ struct type **tqh_last; /* addr of last next element */ \ TRACEBUF \ } #define TAILQ_HEAD_INITIALIZER(head) \ { NULL, &(head).tqh_first } #define TAILQ_ENTRY(type) \ struct { \ struct type *tqe_next; /* next element */ \ struct type **tqe_prev; /* address of previous next element */ \ TRACEBUF \ } /* * Tail queue functions. */ #if (defined(_KERNEL) && defined(INVARIANTS)) #define QMD_TAILQ_CHECK_HEAD(head, field) do { \ if (!TAILQ_EMPTY(head) && \ TAILQ_FIRST((head))->field.tqe_prev != \ &TAILQ_FIRST((head))) \ panic("Bad tailq head %p first->prev != head", (head)); \ } while (0) #define QMD_TAILQ_CHECK_TAIL(head, field) do { \ if (*(head)->tqh_last != NULL) \ panic("Bad tailq NEXT(%p->tqh_last) != NULL", (head)); \ } while (0) #define QMD_TAILQ_CHECK_NEXT(elm, field) do { \ if (TAILQ_NEXT((elm), field) != NULL && \ TAILQ_NEXT((elm), field)->field.tqe_prev != \ &((elm)->field.tqe_next)) \ panic("Bad link elm %p next->prev != elm", (elm)); \ } while (0) #define QMD_TAILQ_CHECK_PREV(elm, field) do { \ if (*(elm)->field.tqe_prev != (elm)) \ panic("Bad link elm %p prev->next != elm", (elm)); \ } while (0) #else #define QMD_TAILQ_CHECK_HEAD(head, field) #define QMD_TAILQ_CHECK_TAIL(head, headname) #define QMD_TAILQ_CHECK_NEXT(elm, field) #define QMD_TAILQ_CHECK_PREV(elm, field) #endif /* (_KERNEL && INVARIANTS) */ #define TAILQ_CONCAT(head1, head2, field) do { \ if (!TAILQ_EMPTY(head2)) { \ *(head1)->tqh_last = (head2)->tqh_first; \ (head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \ (head1)->tqh_last = (head2)->tqh_last; \ TAILQ_INIT((head2)); \ QMD_TRACE_HEAD(head1); \ QMD_TRACE_HEAD(head2); \ } \ } while (0) #define TAILQ_EMPTY(head) ((head)->tqh_first == NULL) #define TAILQ_FIRST(head) ((head)->tqh_first) #define TAILQ_FOREACH(var, head, field) \ for ((var) = TAILQ_FIRST((head)); \ (var); \ (var) = TAILQ_NEXT((var), field)) #define TAILQ_FOREACH_SAFE(var, head, field, tvar) \ for ((var) = TAILQ_FIRST((head)); \ (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \ (var) = (tvar)) #define TAILQ_FOREACH_REVERSE(var, head, headname, field) \ for ((var) = TAILQ_LAST((head), headname); \ (var); \ (var) = TAILQ_PREV((var), headname, field)) #define TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar) \ for ((var) = TAILQ_LAST((head), headname); \ (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \ (var) = (tvar)) #define TAILQ_INIT(head) do { \ TAILQ_FIRST((head)) = NULL; \ (head)->tqh_last = &TAILQ_FIRST((head)); \ QMD_TRACE_HEAD(head); \ } while (0) #define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ QMD_TAILQ_CHECK_NEXT(listelm, field); \ if ((TAILQ_NEXT((elm), field) = TAILQ_NEXT((listelm), field)) != NULL)\ TAILQ_NEXT((elm), field)->field.tqe_prev = \ &TAILQ_NEXT((elm), field); \ else { \ (head)->tqh_last = &TAILQ_NEXT((elm), field); \ QMD_TRACE_HEAD(head); \ } \ TAILQ_NEXT((listelm), field) = (elm); \ (elm)->field.tqe_prev = &TAILQ_NEXT((listelm), field); \ QMD_TRACE_ELEM(&(elm)->field); \ QMD_TRACE_ELEM(&listelm->field); \ } while (0) #define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \ QMD_TAILQ_CHECK_PREV(listelm, field); \ (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \ TAILQ_NEXT((elm), field) = (listelm); \ *(listelm)->field.tqe_prev = (elm); \ (listelm)->field.tqe_prev = &TAILQ_NEXT((elm), field); \ QMD_TRACE_ELEM(&(elm)->field); \ QMD_TRACE_ELEM(&listelm->field); \ } while (0) #define TAILQ_INSERT_HEAD(head, elm, field) do { \ QMD_TAILQ_CHECK_HEAD(head, field); \ if ((TAILQ_NEXT((elm), field) = TAILQ_FIRST((head))) != NULL) \ TAILQ_FIRST((head))->field.tqe_prev = \ &TAILQ_NEXT((elm), field); \ else \ (head)->tqh_last = &TAILQ_NEXT((elm), field); \ TAILQ_FIRST((head)) = (elm); \ (elm)->field.tqe_prev = &TAILQ_FIRST((head)); \ QMD_TRACE_HEAD(head); \ QMD_TRACE_ELEM(&(elm)->field); \ } while (0) #define TAILQ_INSERT_TAIL(head, elm, field) do { \ QMD_TAILQ_CHECK_TAIL(head, field); \ TAILQ_NEXT((elm), field) = NULL; \ (elm)->field.tqe_prev = (head)->tqh_last; \ *(head)->tqh_last = (elm); \ (head)->tqh_last = &TAILQ_NEXT((elm), field); \ QMD_TRACE_HEAD(head); \ QMD_TRACE_ELEM(&(elm)->field); \ } while (0) #define TAILQ_LAST(head, headname) \ (*(((struct headname *)((head)->tqh_last))->tqh_last)) #define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next) #define TAILQ_PREV(elm, headname, field) \ (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) #define TAILQ_REMOVE(head, elm, field) do { \ QMD_TAILQ_CHECK_NEXT(elm, field); \ QMD_TAILQ_CHECK_PREV(elm, field); \ if ((TAILQ_NEXT((elm), field)) != NULL) \ TAILQ_NEXT((elm), field)->field.tqe_prev = \ (elm)->field.tqe_prev; \ else { \ (head)->tqh_last = (elm)->field.tqe_prev; \ QMD_TRACE_HEAD(head); \ } \ *(elm)->field.tqe_prev = TAILQ_NEXT((elm), field); \ TRASHIT((elm)->field.tqe_next); \ TRASHIT((elm)->field.tqe_prev); \ QMD_TRACE_ELEM(&(elm)->field); \ } while (0) #ifdef _KERNEL /* * XXX insque() and remque() are an old way of handling certain queues. * They bogusly assumes that all queue heads look alike. */ struct quehead { struct quehead *qh_link; struct quehead *qh_rlink; }; #ifdef __CC_SUPPORTS___INLINE static __inline void insque(void *a, void *b) { struct quehead *element = (struct quehead *)a, *head = (struct quehead *)b; element->qh_link = head->qh_link; element->qh_rlink = head; head->qh_link = element; element->qh_link->qh_rlink = element; } static __inline void remque(void *a) { struct quehead *element = (struct quehead *)a; element->qh_link->qh_rlink = element->qh_rlink; element->qh_rlink->qh_link = element->qh_link; element->qh_rlink = 0; } #else /* !__CC_SUPPORTS___INLINE */ void insque(void *a, void *b); void remque(void *a); #endif /* __CC_SUPPORTS___INLINE */ #endif /* _KERNEL */ #endif /* !_SYS_QUEUE_H_ */ ipfw_mod/dummynet/include/sys/malloc.h000644 000423 000000 00000003651 11300232743 020643 0ustar00luigiwheel000000 000000 #ifndef _SYS_MALLOC_H_ #define _SYS_MALLOC_H_ /* * No matter what, try to get clear memory and be non-blocking. * XXX check if 2.4 has a native way to zero memory, * XXX obey to the flags (M_NOWAIT <-> GPF_ATOMIC, M_WAIT <-> GPF_KERNEL) */ #ifndef _WIN32 /* this is the linux version */ /* * XXX On zeroshell (2.6.25.17) we get a load error * __you_cannot_kmalloc_that_much * which is triggered when kmalloc() is called with a large * compile-time constant argument (include/linux/slab_def.h) * * I think it may be a compiler (or source) bug because there is no * evidence that such a large request is made. * Making the _size argument to kmalloc volatile prevents the compiler * from making the mistake, though it is clearly not ideal. */ #if !defined (LINUX_24) && LINUX_VERSION_CODE > KERNEL_VERSION(2,6,22) #define malloc(_size, type, flags) \ ({ volatile int _v = _size; kmalloc(_v, GFP_ATOMIC | __GFP_ZERO); }) #else /* LINUX <= 2.6.22 and LINUX_24 */ /* linux 2.6.22 does not zero allocated memory */ #define malloc(_size, type, flags) \ ({ int _s = _size; \ void *_ret = kmalloc(_s, GFP_ATOMIC); \ if (_ret) memset(_ret, 0, _s); \ (_ret); \ }) #endif /* LINUX <= 2.6.22 */ #define calloc(_n, _s) malloc((_n * _s), NULL, GFP_ATOMIC | __GFP_ZERO) #define free(_var, type) kfree(_var) #else /* _WIN32, the windows version */ /* * ntddk.h uses win_malloc() and MmFreeContiguousMemory(). * wipfw uses * ExAllocatePoolWithTag(, pool, len, tag) * ExFreePoolWithTag(ptr, tag) */ #define malloc(_size, _type, _flags) my_alloc(_size) void *my_alloc(int _size); /* the 'tag' version does not work without -Gz in the linker */ #define free(_var, type) ExFreePool(_var) //#define free(_var, type) ExFreePoolWithTag(_var, 'wfpi') #endif /* _WIN32 */ #define M_NOWAIT 0x0001 /* do not block */ #define M_ZERO 0x0100 /* bzero the allocation */ #endif /* _SYS_MALLOC_H_ */ ipfw_mod/dummynet/include/sys/taskqueue.h000644 000423 000000 00000001242 11157432360 021404 0ustar00luigiwheel000000 000000 #ifndef _SYS_TASKQUEUE_H_ #define _SYS_TASKQUEUE_H_ /* * Remap taskqueue to direct calls */ struct task { void (*func)(void); }; #define taskqueue_enqueue(tq, ta) (ta)->func() #define TASK_INIT(a,b,c,d) do { \ (a)->func = (void (*)(void))c; } while (0) #define taskqueue_create_fast(_a, _b, _c, _d) NULL #define taskqueue_start_threads(_a, _b, _c, _d) #define taskqueue_drain(_a, _b) /* XXX to be completed */ #define taskqueue_free(_a) /* XXX to be completed */ #define PRI_MIN (0) /* Highest priority. */ #define PRI_MIN_ITHD (PRI_MIN) #define PI_NET (PRI_MIN_ITHD + 16) #endif /* !_SYS_TASKQUEUE_H_ */ ipfw_mod/dummynet/include/sys/cdefs.h000644 000423 000000 00000001405 11157432360 020462 0ustar00luigiwheel000000 000000 #ifndef _CDEFS_H_ #define _CDEFS_H_ /* * various compiler macros and common functions */ #ifndef __unused #define __unused __attribute__ ((__unused__)) #endif #ifndef __packed #define __packed __attribute__ ((__packed__)) #endif #ifndef __aligned #define __aligned(x) __attribute__((__aligned__(x))) #endif /* defined as assert */ void panic(const char *fmt, ...); #define KASSERT(exp,msg) do { \ if (__predict_false(!(exp))) \ panic msg; \ } while (0) /* don't bother to optimize */ #ifndef __predict_false #define __predict_false(x) (x) /* __builtin_expect((exp), 0) */ #endif #endif /* !_CDEFS_H_ */ ipfw_mod/dummynet/include/sys/systm.h000644 000423 000000 00000003520 11247450325 020556 0ustar00luigiwheel000000 000000 #ifndef _SYS_SYSTM_H_ #define _SYS_SYSTM_H_ #ifndef _WIN32 /* this is the linux version */ /* callout support, in on FreeBSD */ /* * callout support on linux module is done using timers */ #include #ifdef LINUX_24 #include /* jiffies definition is here in 2.4 */ #endif #define callout timer_list static __inline int callout_reset(struct callout *co, int ticks, void (*fn)(void *), void *arg) { co->expires = jiffies + ticks; co->function = (void (*)(unsigned long))fn; co->data = (unsigned long)arg; add_timer(co); return 0; } #define callout_init(co, safe) init_timer(co) #define callout_drain(co) del_timer(co) #define callout_stop(co) del_timer(co) #define CALLOUT_ACTIVE 0x0002 /* callout is currently active */ #define CALLOUT_MPSAFE 0x0008 /* callout handler is mp safe */ #else /* _WIN32 */ /* This is the windows part for callout support */ struct callout { int dummy; }; static __inline int callout_reset(struct callout *co, int ticks, void (*fn)(void *), void *arg) { return 0; } #define callout_init(co, safe) #define callout_drain(co) #define callout_stop(co) #endif /* !_WIN32 */ #if 0 /* add out timer to the kernel global timer list */ NTSTATUS IoInitializeTimer( IN PDEVICE_OBJECT DeviceObject, IN PIO_TIMER_ROUTINE TimerRoutine, IN PVOID Context ); /* see differences : IoInitializeDpcRequest http://dsrg.mff.cuni.cz/~ceres/sch/osy/text/ch04s01s01.php example http://www.beyondlogic.org/interrupts/winnt_isr_dpc.htm KeInitializeDpc IRQL: Any level IoInitializeTimer IRQL: Passive level KeInitializeTimer */ VOID KeInitializeDpc( IN PRKDPC Dpc, IN PKDEFERRED_ROUTINE DeferredRoutine, IN PVOID DeferredContext ); #endif /* commented out */ #endif /* _SYS_SYSTM_H_ */ ipfw_mod/dummynet/include/netgraph/ng_ipfw.h000644 000423 000000 00000004220 11307662711 022021 0ustar00luigiwheel000000 000000 /*- * Copyright 2005, Gleb Smirnoff * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD: src/sys/netgraph/ng_ipfw.h,v 1.2 2006/02/17 09:42:49 glebius Exp $ */ #ifndef __NG_IPFW_H #define __NG_IPFW_H #define NG_IPFW_NODE_TYPE "ipfw" #define NGM_IPFW_COOKIE 1105988990 #ifdef _KERNEL struct mbuf; struct ip_fw_args; typedef int ng_ipfw_input_t(struct mbuf **, int, struct ip_fw_args *, int); extern ng_ipfw_input_t *ng_ipfw_input_p; #define NG_IPFW_LOADED (ng_ipfw_input_p != NULL) struct ng_ipfw_tag { struct m_tag mt; /* tag header */ struct ip_fw *rule; /* matching rule */ uint32_t rule_id; /* matching rule id */ uint32_t chain_id; /* ruleset id */ struct ifnet *ifp; /* interface, for ip_output */ int dir; #define NG_IPFW_OUT 0 #define NG_IPFW_IN 1 }; #define TAGSIZ (sizeof(struct ng_ipfw_tag) - sizeof(struct m_tag)) #endif /* _KERNEL */ #endif /* __NG_IPFW_H */ ipfw_mod/dummynet/include/netinet/ip_dummynet.h000644 000423 000000 00000035574 11310173447 022575 0ustar00luigiwheel000000 000000 /*- * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa * Portions Copyright (c) 2000 Akamba Corp. * All rights reserved * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD: src/sys/netinet/ip_dummynet.h,v 1.40.2.1 2008/04/25 10:26:30 oleg Exp $ */ #ifndef _IP_DUMMYNET_H #define _IP_DUMMYNET_H /* * Definition of dummynet data structures. In the structures, I decided * not to use the macros in in the hope of making the code * easier to port to other architectures. The type of lists and queue we * use here is pretty simple anyways. */ /* * We start with a heap, which is used in the scheduler to decide when * to transmit packets etc. * * The key for the heap is used for two different values: * * 1. timer ticks- max 10K/second, so 32 bits are enough; * * 2. virtual times. These increase in steps of len/x, where len is the * packet length, and x is either the weight of the flow, or the * sum of all weights. * If we limit to max 1000 flows and a max weight of 100, then * x needs 17 bits. The packet size is 16 bits, so we can easily * overflow if we do not allow errors. * So we use a key "dn_key" which is 64 bits. Some macros are used to * compare key values and handle wraparounds. * MAX64 returns the largest of two key values. * MY_M is used as a shift count when doing fixed point arithmetic * (a better name would be useful...). */ typedef u_int64_t dn_key ; /* sorting key */ #define DN_KEY_LT(a,b) ((int64_t)((a)-(b)) < 0) #define DN_KEY_LEQ(a,b) ((int64_t)((a)-(b)) <= 0) #define DN_KEY_GT(a,b) ((int64_t)((a)-(b)) > 0) #define DN_KEY_GEQ(a,b) ((int64_t)((a)-(b)) >= 0) #define MAX64(x,y) (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x) #define MY_M 16 /* number of left shift to obtain a larger precision */ /* * XXX With this scaling, max 1000 flows, max weight 100, 1Gbit/s, the * virtual time wraps every 15 days. */ /* * The maximum hash table size for queues. This value must be a power * of 2. */ #define DN_MAX_HASH_SIZE 65536 /* * A heap entry is made of a key and a pointer to the actual * object stored in the heap. * The heap is an array of dn_heap_entry entries, dynamically allocated. * Current size is "size", with "elements" actually in use. * The heap normally supports only ordered insert and extract from the top. * If we want to extract an object from the middle of the heap, we * have to know where the object itself is located in the heap (or we * need to scan the whole array). To this purpose, an object has a * field (int) which contains the index of the object itself into the * heap. When the object is moved, the field must also be updated. * The offset of the index in the object is stored in the 'offset' * field in the heap descriptor. The assumption is that this offset * is non-zero if we want to support extract from the middle. */ struct dn_heap_entry { dn_key key ; /* sorting key. Topmost element is smallest one */ void *object ; /* object pointer */ } ; struct dn_heap { int size ; int elements ; int offset ; /* XXX if > 0 this is the offset of direct ptr to obj */ struct dn_heap_entry *p ; /* really an array of "size" entries */ } ; #ifdef _KERNEL /* * Packets processed by dummynet have an mbuf tag associated with * them that carries their dummynet state. This is used within * the dummynet code as well as outside when checking for special * processing requirements. */ struct dn_pkt_tag { struct ip_fw *rule; /* matching rule */ uint32_t rule_id; /* matching rule id */ uint32_t chain_id; /* ruleset id */ int dn_dir; /* action when packet comes out. */ #define DN_TO_IP_OUT 1 #define DN_TO_IP_IN 2 /* Obsolete: #define DN_TO_BDG_FWD 3 */ #define DN_TO_ETH_DEMUX 4 #define DN_TO_ETH_OUT 5 #define DN_TO_IP6_IN 6 #define DN_TO_IP6_OUT 7 #define DN_TO_IFB_FWD 8 dn_key output_time; /* when the pkt is due for delivery */ struct ifnet *ifp; /* interface, for ip_output */ struct _ip6dn_args ip6opt; /* XXX ipv6 options */ }; #endif /* _KERNEL */ /* * Overall structure of dummynet (with WF2Q+): In dummynet, packets are selected with the firewall rules, and passed to two different objects: PIPE or QUEUE. A QUEUE is just a queue with configurable size and queue management policy. It is also associated with a mask (to discriminate among different flows), a weight (used to give different shares of the bandwidth to different flows) and a "pipe", which essentially supplies the transmit clock for all queues associated with that pipe. A PIPE emulates a fixed-bandwidth link, whose bandwidth is configurable. The "clock" for a pipe can come from either an internal timer, or from the transmit interrupt of an interface. A pipe is also associated with one (or more, if masks are used) queue, where all packets for that pipe are stored. The bandwidth available on the pipe is shared by the queues associated with that pipe (only one in case the packet is sent to a PIPE) according to the WF2Q+ scheduling algorithm and the configured weights. In general, incoming packets are stored in the appropriate queue, which is then placed into one of a few heaps managed by a scheduler to decide when the packet should be extracted. The scheduler (a function called dummynet()) is run at every timer tick, and grabs queues from the head of the heaps when they are ready for processing. There are three data structures definining a pipe and associated queues: + dn_pipe, which contains the main configuration parameters related to delay and bandwidth; + dn_flow_set, which contains WF2Q+ configuration, flow masks, plr and RED configuration; + dn_flow_queue, which is the per-flow queue (containing the packets) Multiple dn_flow_set can be linked to the same pipe, and multiple dn_flow_queue can be linked to the same dn_flow_set. All data structures are linked in a linear list which is used for housekeeping purposes. During configuration, we create and initialize the dn_flow_set and dn_pipe structures (a dn_pipe also contains a dn_flow_set). At runtime: packets are sent to the appropriate dn_flow_set (either WFQ ones, or the one embedded in the dn_pipe for fixed-rate flows), which in turn dispatches them to the appropriate dn_flow_queue (created dynamically according to the masks). The transmit clock for fixed rate flows (ready_event()) selects the dn_flow_queue to be used to transmit the next packet. For WF2Q, wfq_ready_event() extract a pipe which in turn selects the right flow using a number of heaps defined into the pipe itself. * */ /* * per flow queue. This contains the flow identifier, the queue * of packets, counters, and parameters used to support both RED and * WF2Q+. * * A dn_flow_queue is created and initialized whenever a packet for * a new flow arrives. */ struct dn_flow_queue { struct dn_flow_queue *next ; struct ipfw_flow_id id ; struct mbuf *head, *tail ; /* queue of packets */ u_int len ; u_int len_bytes ; /* * When we emulate MAC overheads, or channel unavailability due * to other traffic on a shared medium, we augment the packet at * the head of the queue with an 'extra_bits' field representsing * the additional delay the packet will be subject to: * extra_bits = bw*unavailable_time. * With large bandwidth and large delays, extra_bits (and also numbytes) * can become very large, so better play safe and use 64 bit */ uint64_t numbytes ; /* credit for transmission (dynamic queues) */ int64_t extra_bits; /* extra bits simulating unavailable channel */ u_int64_t tot_pkts ; /* statistics counters */ u_int64_t tot_bytes ; u_int32_t drops ; int hash_slot ; /* debugging/diagnostic */ /* RED parameters */ int avg ; /* average queue length est. (scaled) */ int count ; /* arrivals since last RED drop */ int random ; /* random value (scaled) */ dn_key idle_time; /* start of queue idle time */ /* WF2Q+ support */ struct dn_flow_set *fs ; /* parent flow set */ int heap_pos ; /* position (index) of struct in heap */ dn_key sched_time ; /* current time when queue enters ready_heap */ dn_key S,F ; /* start time, finish time */ /* * Setting F < S means the timestamp is invalid. We only need * to test this when the queue is empty. */ } ; /* * flow_set descriptor. Contains the "template" parameters for the * queue configuration, and pointers to the hash table of dn_flow_queue's. * * The hash table is an array of lists -- we identify the slot by * hashing the flow-id, then scan the list looking for a match. * The size of the hash table (buckets) is configurable on a per-queue * basis. * * A dn_flow_set is created whenever a new queue or pipe is created (in the * latter case, the structure is located inside the struct dn_pipe). */ struct dn_flow_set { SLIST_ENTRY(dn_flow_set) next; /* linked list in a hash slot */ u_short fs_nr ; /* flow_set number */ u_short flags_fs; #define DN_HAVE_FLOW_MASK 0x0001 #define DN_IS_RED 0x0002 #define DN_IS_GENTLE_RED 0x0004 #define DN_QSIZE_IS_BYTES 0x0008 /* queue size is measured in bytes */ #define DN_NOERROR 0x0010 /* do not report ENOBUFS on drops */ #define DN_HAS_PROFILE 0x0020 /* the pipe has a delay profile. */ #define DN_IS_PIPE 0x4000 #define DN_IS_QUEUE 0x8000 struct dn_pipe *pipe ; /* pointer to parent pipe */ u_short parent_nr ; /* parent pipe#, 0 if local to a pipe */ int weight ; /* WFQ queue weight */ int qsize ; /* queue size in slots or bytes */ int plr ; /* pkt loss rate (2^31-1 means 100%) */ struct ipfw_flow_id flow_mask ; /* hash table of queues onto this flow_set */ int rq_size ; /* number of slots */ int rq_elements ; /* active elements */ struct dn_flow_queue **rq; /* array of rq_size entries */ u_int32_t last_expired ; /* do not expire too frequently */ int backlogged ; /* #active queues for this flowset */ /* RED parameters */ #define SCALE_RED 16 #define SCALE(x) ( (x) << SCALE_RED ) #define SCALE_VAL(x) ( (x) >> SCALE_RED ) #define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED ) int w_q ; /* queue weight (scaled) */ int max_th ; /* maximum threshold for queue (scaled) */ int min_th ; /* minimum threshold for queue (scaled) */ int max_p ; /* maximum value for p_b (scaled) */ u_int c_1 ; /* max_p/(max_th-min_th) (scaled) */ u_int c_2 ; /* max_p*min_th/(max_th-min_th) (scaled) */ u_int c_3 ; /* for GRED, (1-max_p)/max_th (scaled) */ u_int c_4 ; /* for GRED, 1 - 2*max_p (scaled) */ u_int * w_q_lookup ; /* lookup table for computing (1-w_q)^t */ u_int lookup_depth ; /* depth of lookup table */ int lookup_step ; /* granularity inside the lookup table */ int lookup_weight ; /* equal to (1-w_q)^t / (1-w_q)^(t+1) */ int avg_pkt_size ; /* medium packet size */ int max_pkt_size ; /* max packet size */ }; SLIST_HEAD(dn_flow_set_head, dn_flow_set); /* * Pipe descriptor. Contains global parameters, delay-line queue, * and the flow_set used for fixed-rate queues. * * For WF2Q+ support it also has 3 heaps holding dn_flow_queue: * not_eligible_heap, for queues whose start time is higher * than the virtual time. Sorted by start time. * scheduler_heap, for queues eligible for scheduling. Sorted by * finish time. * idle_heap, all flows that are idle and can be removed. We * do that on each tick so we do not slow down too much * operations during forwarding. * */ struct dn_pipe { /* a pipe */ SLIST_ENTRY(dn_pipe) next; /* linked list in a hash slot */ int pipe_nr ; /* number */ int bandwidth; /* really, bytes/tick. */ int delay ; /* really, ticks */ struct mbuf *head, *tail ; /* packets in delay line */ /* WF2Q+ */ struct dn_heap scheduler_heap ; /* top extract - key Finish time*/ struct dn_heap not_eligible_heap; /* top extract- key Start time */ struct dn_heap idle_heap ; /* random extract - key Start=Finish time */ dn_key V ; /* virtual time */ int sum; /* sum of weights of all active sessions */ /* Same as in dn_flow_queue, numbytes can become large */ int64_t numbytes; /* bits I can transmit (more or less). */ uint64_t burst; /* burst size, scaled: bits * hz */ dn_key sched_time ; /* time pipe was scheduled in ready_heap */ dn_key idle_time; /* start of pipe idle time */ /* * When the tx clock come from an interface (if_name[0] != '\0'), its name * is stored below, whereas the ifp is filled when the rule is configured. */ char if_name[IFNAMSIZ]; struct ifnet *ifp ; int ready ; /* set if ifp != NULL and we got a signal from it */ struct dn_flow_set fs ; /* used with fixed-rate flows */ /* fields to simulate a delay profile */ #define ED_MAX_NAME_LEN 32 char name[ED_MAX_NAME_LEN]; int loss_level; int samples_no; int *samples; }; /* dn_pipe_max is used to pass pipe configuration from userland onto * kernel space and back */ #define ED_MAX_SAMPLES_NO 1024 struct dn_pipe_max { struct dn_pipe pipe; int samples[ED_MAX_SAMPLES_NO]; }; SLIST_HEAD(dn_pipe_head, dn_pipe); #ifdef _KERNEL typedef void ip_dn_ruledel_t(void *); /* ip_fw.c */ extern ip_dn_ruledel_t *ip_dn_ruledel_ptr; /* * Return the IPFW rule associated with the dummynet tag; if any. * Make sure that the dummynet tag is not reused by lower layers. */ static __inline struct ip_fw * ip_dn_claim_rule(struct mbuf *m) { struct m_tag *mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); if (mtag != NULL) { mtag->m_tag_id = PACKET_TAG_NONE; return (((struct dn_pkt_tag *)(mtag+1))->rule); } else return (NULL); } #endif #endif /* _IP_DUMMYNET_H */ ipfw_mod/dummynet/include/netinet/ip6.h000644 000423 000000 00000004454 11151122421 020717 0ustar00luigiwheel000000 000000 #ifndef _NETINET_IP6_H_ #define _NETINET_IP6_H_ #define IN6_ARE_ADDR_EQUAL(a, b) \ (memcmp(&(a)->s6_addr[0], &(b)->s6_addr[0], sizeof(struct in6_addr)) == 0) struct ip6_hdr { union { struct ip6_hdrctl { u_int32_t ip6_un1_flow; /* 20 bits of flow-ID */ u_int16_t ip6_un1_plen; /* payload length */ u_int8_t ip6_un1_nxt; /* next header */ u_int8_t ip6_un1_hlim; /* hop limit */ } ip6_un1; u_int8_t ip6_un2_vfc; /* 4 bits version, top 4 bits class */ } ip6_ctlun; struct in6_addr ip6_src; /* source address */ struct in6_addr ip6_dst; /* destination address */ }; #define ip6_nxt ip6_ctlun.ip6_un1.ip6_un1_nxt #define ip6_flow ip6_ctlun.ip6_un1.ip6_un1_flow struct icmp6_hdr { u_int8_t icmp6_type; /* type field */ u_int8_t icmp6_code; /* code field */ u_int16_t icmp6_cksum; /* checksum field */ union { u_int32_t icmp6_un_data32[1]; /* type-specific field */ u_int16_t icmp6_un_data16[2]; /* type-specific field */ u_int8_t icmp6_un_data8[4]; /* type-specific field */ } icmp6_dataun; }; struct ip6_hbh { u_int8_t ip6h_nxt; /* next header */ u_int8_t ip6h_len; /* length in units of 8 octets */ /* followed by options */ }; struct ip6_rthdr { u_int8_t ip6r_nxt; /* next header */ u_int8_t ip6r_len; /* length in units of 8 octets */ u_int8_t ip6r_type; /* routing type */ u_int8_t ip6r_segleft; /* segments left */ /* followed by routing type specific data */ }; struct ip6_frag { u_int8_t ip6f_nxt; /* next header */ u_int8_t ip6f_reserved; /* reserved field */ u_int16_t ip6f_offlg; /* offset, reserved, and flag */ u_int32_t ip6f_ident; /* identification */ }; #define IP6F_OFF_MASK 0xfff8 /* mask out offset from _offlg */ #define IP6F_MORE_FRAG 0x0001 /* more-fragments flag */ struct ip6_ext { u_int8_t ip6e_nxt; u_int8_t ip6e_len; }; #endif /* _NETINET_IP6_H_ */ ipfw_mod/dummynet/include/netinet/tcp_var.h000644 000423 000000 00000000163 11151122421 021650 0ustar00luigiwheel000000 000000 #ifndef _NETINET_TCP_VAR_H_ #define _NETINET_TCP_VAR_H_ #include #endif /* !_NETINET_TCP_VAR_H_ */ ipfw_mod/dummynet/include/netinet/ip_icmp.h000644 000423 000000 00000001251 11151122421 021631 0ustar00luigiwheel000000 000000 /* * additional define not present in linux * should go in glue.h */ #ifndef _NETINET_IP_ICMP_H_ #define _NETINET_IP_ICMP_H_ #define ICMP_MAXTYPE 40 /* defined as 18 in compat.h */ #define ICMP_ROUTERSOLICIT 10 /* router solicitation */ #define ICMP_TSTAMP 13 /* timestamp request */ #define ICMP_IREQ 15 /* information request */ #define ICMP_MASKREQ 17 /* address mask request */ #define ICMP_UNREACH_HOST 1 /* bad host */ #define ICMP_UNREACH 3 /* dest unreachable, codes: */ #endif /* _NETINET_IP_ICMP_H_ */ ipfw_mod/dummynet/include/netinet/ip_fw.h000644 000423 000000 00000053522 11310231725 021332 0ustar00luigiwheel000000 000000 /*- * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD: src/sys/netinet/ip_fw.h,v 1.110.2.6 2008/10/14 08:03:58 rwatson Exp $ */ #ifndef _IPFW2_H #define _IPFW2_H /* * The default rule number. By the design of ip_fw, the default rule * is the last one, so its number can also serve as the highest number * allowed for a rule. The ip_fw code relies on both meanings of this * constant. */ #define IPFW_DEFAULT_RULE 65535 /* * The number of ipfw tables. The maximum allowed table number is the * (IPFW_TABLES_MAX - 1). */ #define IPFW_TABLES_MAX 128 /* * Most commands (queue, pipe, tag, untag, limit...) can have a 16-bit * argument between 1 and 65534. The value 0 is unused, the value * 65535 (IP_FW_TABLEARG) is used to represent 'tablearg', i.e. the * can be 1..65534, or 65535 to indicate the use of a 'tablearg' * result of the most recent table() lookup. * Note that 16bit is only a historical limit, resulting from * the use of a 16-bit fields for that value. In reality, we can have * 2^32 pipes, queues, tag values and so on, and use 0 as a tablearg. */ #define IPFW_ARG_MIN 1 #define IPFW_ARG_MAX 65534 #define IP_FW_TABLEARG 65535 /* XXX should use 0 */ /* * The kernel representation of ipfw rules is made of a list of * 'instructions' (for all practical purposes equivalent to BPF * instructions), which specify which fields of the packet * (or its metadata) should be analysed. * * Each instruction is stored in a structure which begins with * "ipfw_insn", and can contain extra fields depending on the * instruction type (listed below). * Note that the code is written so that individual instructions * have a size which is a multiple of 32 bits. This means that, if * such structures contain pointers or other 64-bit entities, * (there is just one instance now) they may end up unaligned on * 64-bit architectures, so the must be handled with care. * * "enum ipfw_opcodes" are the opcodes supported. We can have up * to 256 different opcodes. When adding new opcodes, they should * be appended to the end of the opcode list before O_LAST_OPCODE, * this will prevent the ABI from being broken, otherwise users * will have to recompile ipfw(8) when they update the kernel. */ enum ipfw_opcodes { /* arguments (4 byte each) */ O_NOP, O_IP_SRC, /* u32 = IP */ O_IP_SRC_MASK, /* ip = IP/mask */ O_IP_SRC_ME, /* none */ O_IP_SRC_SET, /* u32=base, arg1=len, bitmap */ O_IP_DST, /* u32 = IP */ O_IP_DST_MASK, /* ip = IP/mask */ O_IP_DST_ME, /* none */ O_IP_DST_SET, /* u32=base, arg1=len, bitmap */ O_IP_SRCPORT, /* (n)port list:mask 4 byte ea */ O_IP_DSTPORT, /* (n)port list:mask 4 byte ea */ O_PROTO, /* arg1=protocol */ O_MACADDR2, /* 2 mac addr:mask */ O_MAC_TYPE, /* same as srcport */ O_LAYER2, /* none */ O_IN, /* none */ O_FRAG, /* none */ O_RECV, /* none */ O_XMIT, /* none */ O_VIA, /* none */ O_IPOPT, /* arg1 = 2*u8 bitmap */ O_IPLEN, /* arg1 = len */ O_IPID, /* arg1 = id */ O_IPTOS, /* arg1 = id */ O_IPPRECEDENCE, /* arg1 = precedence << 5 */ O_IPTTL, /* arg1 = TTL */ O_IPVER, /* arg1 = version */ O_UID, /* u32 = id */ O_GID, /* u32 = id */ O_ESTAB, /* none (tcp established) */ O_TCPFLAGS, /* arg1 = 2*u8 bitmap */ O_TCPWIN, /* arg1 = desired win */ O_TCPSEQ, /* u32 = desired seq. */ O_TCPACK, /* u32 = desired seq. */ O_ICMPTYPE, /* u32 = icmp bitmap */ O_TCPOPTS, /* arg1 = 2*u8 bitmap */ O_VERREVPATH, /* none */ O_VERSRCREACH, /* none */ O_PROBE_STATE, /* none */ O_KEEP_STATE, /* none */ O_LIMIT, /* ipfw_insn_limit */ O_LIMIT_PARENT, /* dyn_type, not an opcode. */ /* * These are really 'actions'. */ O_LOG, /* ipfw_insn_log */ O_PROB, /* u32 = match probability */ O_CHECK_STATE, /* none */ O_ACCEPT, /* none */ O_DENY, /* none */ O_REJECT, /* arg1=icmp arg (same as deny) */ O_COUNT, /* none */ O_SKIPTO, /* arg1=next rule number */ O_PIPE, /* arg1=pipe number */ O_QUEUE, /* arg1=queue number */ O_DIVERT, /* arg1=port number */ O_TEE, /* arg1=port number */ O_FORWARD_IP, /* fwd sockaddr */ O_FORWARD_MAC, /* fwd mac */ O_NAT, /* nope */ O_REASS, /* none */ /* * More opcodes. */ O_IPSEC, /* has ipsec history */ O_IP_SRC_LOOKUP, /* arg1=table number, u32=value */ O_IP_DST_LOOKUP, /* arg1=table number, u32=value */ O_ANTISPOOF, /* none */ O_JAIL, /* u32 = id */ O_ALTQ, /* u32 = altq classif. qid */ O_DIVERTED, /* arg1=bitmap (1:loop, 2:out) */ O_TCPDATALEN, /* arg1 = tcp data len */ O_IP6_SRC, /* address without mask */ O_IP6_SRC_ME, /* my addresses */ O_IP6_SRC_MASK, /* address with the mask */ O_IP6_DST, O_IP6_DST_ME, O_IP6_DST_MASK, O_FLOW6ID, /* for flow id tag in the ipv6 pkt */ O_ICMP6TYPE, /* icmp6 packet type filtering */ O_EXT_HDR, /* filtering for ipv6 extension header */ O_IP6, /* * actions for ng_ipfw */ O_NETGRAPH, /* send to ng_ipfw */ O_NGTEE, /* copy to ng_ipfw */ O_IP4, O_UNREACH6, /* arg1=icmpv6 code arg (deny) */ O_TAG, /* arg1=tag number */ O_TAGGED, /* arg1=tag number */ O_SETFIB, /* arg1=FIB number */ O_FIB, /* arg1=FIB desired fib number */ O_LAST_OPCODE /* not an opcode! */ }; /* * The extension header are filtered only for presence using a bit * vector with a flag for each header. */ #define EXT_FRAGMENT 0x1 #define EXT_HOPOPTS 0x2 #define EXT_ROUTING 0x4 #define EXT_AH 0x8 #define EXT_ESP 0x10 #define EXT_DSTOPTS 0x20 #define EXT_RTHDR0 0x40 #define EXT_RTHDR2 0x80 /* * Template for instructions. * * ipfw_insn is used for all instructions which require no operands, * a single 16-bit value (arg1), or a couple of 8-bit values. * * For other instructions which require different/larger arguments * we have derived structures, ipfw_insn_*. * * The size of the instruction (in 32-bit words) is in the low * 6 bits of "len". The 2 remaining bits are used to implement * NOT and OR on individual instructions. Given a type, you can * compute the length to be put in "len" using F_INSN_SIZE(t) * * F_NOT negates the match result of the instruction. * * F_OR is used to build or blocks. By default, instructions * are evaluated as part of a logical AND. An "or" block * { X or Y or Z } contains F_OR set in all but the last * instruction of the block. A match will cause the code * to skip past the last instruction of the block. * * NOTA BENE: in a couple of places we assume that * sizeof(ipfw_insn) == sizeof(u_int32_t) * this needs to be fixed. * */ typedef struct _ipfw_insn { /* template for instructions */ u_int8_t opcode; u_int8_t len; /* number of 32-bit words */ #define F_NOT 0x80 #define F_OR 0x40 #define F_LEN_MASK 0x3f #define F_LEN(cmd) ((cmd)->len & F_LEN_MASK) u_int16_t arg1; } ipfw_insn; /* * The F_INSN_SIZE(type) computes the size, in 4-byte words, of * a given type. */ #define F_INSN_SIZE(t) ((sizeof (t))/sizeof(u_int32_t)) /* * This is used to store an array of 16-bit entries (ports etc.) */ typedef struct _ipfw_insn_u16 { ipfw_insn o; u_int16_t ports[2]; /* there may be more */ } ipfw_insn_u16; /* * This is used to store an array of 32-bit entries * (uid, single IPv4 addresses etc.) */ typedef struct _ipfw_insn_u32 { ipfw_insn o; u_int32_t d[1]; /* one or more */ } ipfw_insn_u32; /* * This is used to store IP addr-mask pairs. */ typedef struct _ipfw_insn_ip { ipfw_insn o; struct in_addr addr; struct in_addr mask; } ipfw_insn_ip; /* * This is used to forward to a given address (ip). */ typedef struct _ipfw_insn_sa { ipfw_insn o; struct sockaddr_in sa; } ipfw_insn_sa; /* * This is used for MAC addr-mask pairs. */ typedef struct _ipfw_insn_mac { ipfw_insn o; u_char addr[12]; /* dst[6] + src[6] */ u_char mask[12]; /* dst[6] + src[6] */ } ipfw_insn_mac; /* * This is used for interface match rules (recv xx, xmit xx). */ typedef struct _ipfw_insn_if { ipfw_insn o; union { struct in_addr ip; int glob; } p; char name[IFNAMSIZ]; } ipfw_insn_if; /* * This is used for storing an altq queue id number. */ typedef struct _ipfw_insn_altq { ipfw_insn o; u_int32_t qid; } ipfw_insn_altq; /* * This is used for limit rules. */ typedef struct _ipfw_insn_limit { ipfw_insn o; u_int8_t _pad; u_int8_t limit_mask; /* combination of DYN_* below */ #define DYN_SRC_ADDR 0x1 #define DYN_SRC_PORT 0x2 #define DYN_DST_ADDR 0x4 #define DYN_DST_PORT 0x8 u_int16_t conn_limit; } ipfw_insn_limit; /* * This is used for log instructions. */ typedef struct _ipfw_insn_log { ipfw_insn o; u_int32_t max_log; /* how many do we log -- 0 = all */ u_int32_t log_left; /* how many left to log */ } ipfw_insn_log; /* * Data structures required by both ipfw(8) and ipfw(4) but not part of the * management API are protected by IPFW_INTERNAL. */ #ifdef IPFW_INTERNAL /* Server pool support (LSNAT). */ struct cfg_spool { LIST_ENTRY(cfg_spool) _next; /* chain of spool instances */ struct in_addr addr; u_short port; }; #endif /* Redirect modes id. */ #define REDIR_ADDR 0x01 #define REDIR_PORT 0x02 #define REDIR_PROTO 0x04 #ifdef IPFW_INTERNAL /* Nat redirect configuration. */ struct cfg_redir { LIST_ENTRY(cfg_redir) _next; /* chain of redir instances */ u_int16_t mode; /* type of redirect mode */ struct in_addr laddr; /* local ip address */ struct in_addr paddr; /* public ip address */ struct in_addr raddr; /* remote ip address */ u_short lport; /* local port */ u_short pport; /* public port */ u_short rport; /* remote port */ u_short pport_cnt; /* number of public ports */ u_short rport_cnt; /* number of remote ports */ int proto; /* protocol: tcp/udp */ struct alias_link **alink; /* num of entry in spool chain */ u_int16_t spool_cnt; /* chain of spool instances */ LIST_HEAD(spool_chain, cfg_spool) spool_chain; }; #endif #define NAT_BUF_LEN 1024 #ifdef IPFW_INTERNAL /* Nat configuration data struct. */ struct cfg_nat { /* chain of nat instances */ LIST_ENTRY(cfg_nat) _next; int id; /* nat id */ struct in_addr ip; /* nat ip address */ char if_name[IF_NAMESIZE]; /* interface name */ int mode; /* aliasing mode */ struct libalias *lib; /* libalias instance */ /* number of entry in spool chain */ int redir_cnt; /* chain of redir instances */ LIST_HEAD(redir_chain, cfg_redir) redir_chain; }; #endif #define SOF_NAT sizeof(struct cfg_nat) #define SOF_REDIR sizeof(struct cfg_redir) #define SOF_SPOOL sizeof(struct cfg_spool) /* Nat command. */ typedef struct _ipfw_insn_nat { ipfw_insn o; struct cfg_nat *nat; } ipfw_insn_nat; /* Apply ipv6 mask on ipv6 addr */ #define APPLY_MASK(addr,mask) \ (addr)->__u6_addr.__u6_addr32[0] &= (mask)->__u6_addr.__u6_addr32[0]; \ (addr)->__u6_addr.__u6_addr32[1] &= (mask)->__u6_addr.__u6_addr32[1]; \ (addr)->__u6_addr.__u6_addr32[2] &= (mask)->__u6_addr.__u6_addr32[2]; \ (addr)->__u6_addr.__u6_addr32[3] &= (mask)->__u6_addr.__u6_addr32[3]; /* Structure for ipv6 */ typedef struct _ipfw_insn_ip6 { ipfw_insn o; struct in6_addr addr6; struct in6_addr mask6; } ipfw_insn_ip6; /* Used to support icmp6 types */ typedef struct _ipfw_insn_icmp6 { ipfw_insn o; uint32_t d[7]; /* XXX This number si related to the netinet/icmp6.h * define ICMP6_MAXTYPE * as follows: n = ICMP6_MAXTYPE/32 + 1 * Actually is 203 */ } ipfw_insn_icmp6; /* * Here we have the structure representing an ipfw rule. * * It starts with a general area (with link fields and counters) * followed by an array of one or more instructions, which the code * accesses as an array of 32-bit values. * * Given a rule pointer r: * * r->cmd is the start of the first instruction. * ACTION_PTR(r) is the start of the first action (things to do * once a rule matched). * * When assembling instruction, remember the following: * * + if a rule has a "keep-state" (or "limit") option, then the * first instruction (at r->cmd) MUST BE an O_PROBE_STATE * + if a rule has a "log" option, then the first action * (at ACTION_PTR(r)) MUST be O_LOG * + if a rule has an "altq" option, it comes after "log" * + if a rule has an O_TAG option, it comes after "log" and "altq" * * NOTE: we use a simple linked list of rules because we never need * to delete a rule without scanning the list. We do not use * queue(3) macros for portability and readability. */ struct ip_fw { struct ip_fw *next; /* linked list of rules */ struct ip_fw *next_rule; /* ptr to next [skipto] rule */ /* 'next_rule' is used to pass up 'set_disable' status */ uint16_t act_ofs; /* offset of action in 32-bit units */ uint16_t cmd_len; /* # of 32-bit words in cmd */ uint16_t rulenum; /* rule number */ uint8_t set; /* rule set (0..31) */ #define RESVD_SET 31 /* set for default and persistent rules */ uint8_t _pad; /* padding */ uint32_t id; /* rule id */ /* These fields are present in all rules. */ uint64_t pcnt; /* Packet counter */ uint64_t bcnt; /* Byte counter */ uint32_t timestamp; /* tv_sec of last match */ ipfw_insn cmd[1]; /* storage for commands */ }; #define ACTION_PTR(rule) \ (ipfw_insn *)( (u_int32_t *)((rule)->cmd) + ((rule)->act_ofs) ) #define RULESIZE(rule) (sizeof(struct ip_fw) + \ ((struct ip_fw *)(rule))->cmd_len * 4 - 4) /* * This structure is used as a flow mask and a flow id for various * parts of the code. */ struct ipfw_flow_id { u_int32_t dst_ip; u_int32_t src_ip; u_int16_t dst_port; u_int16_t src_port; u_int8_t fib; u_int8_t proto; u_int8_t flags; /* protocol-specific flags */ uint8_t addr_type; /* 4 = ipv4, 6 = ipv6, 1=ether ? */ struct in6_addr dst_ip6; /* could also store MAC addr! */ struct in6_addr src_ip6; u_int32_t flow_id6; u_int32_t frag_id6; }; #define IS_IP6_FLOW_ID(id) ((id)->addr_type == 6) /* * Dynamic ipfw rule. */ typedef struct _ipfw_dyn_rule ipfw_dyn_rule; struct _ipfw_dyn_rule { ipfw_dyn_rule *next; /* linked list of rules. */ struct ip_fw *rule; /* pointer to rule */ /* 'rule' is used to pass up the rule number (from the parent) */ ipfw_dyn_rule *parent; /* pointer to parent rule */ u_int64_t pcnt; /* packet match counter */ u_int64_t bcnt; /* byte match counter */ struct ipfw_flow_id id; /* (masked) flow id */ u_int32_t expire; /* expire time */ u_int32_t bucket; /* which bucket in hash table */ u_int32_t state; /* state of this rule (typically a * combination of TCP flags) */ u_int32_t ack_fwd; /* most recent ACKs in forward */ u_int32_t ack_rev; /* and reverse directions (used */ /* to generate keepalives) */ u_int16_t dyn_type; /* rule type */ u_int16_t count; /* refcount */ }; /* * Definitions for IP option names. */ #define IP_FW_IPOPT_LSRR 0x01 #define IP_FW_IPOPT_SSRR 0x02 #define IP_FW_IPOPT_RR 0x04 #define IP_FW_IPOPT_TS 0x08 /* * Definitions for TCP option names. */ #define IP_FW_TCPOPT_MSS 0x01 #define IP_FW_TCPOPT_WINDOW 0x02 #define IP_FW_TCPOPT_SACK 0x04 #define IP_FW_TCPOPT_TS 0x08 #define IP_FW_TCPOPT_CC 0x10 #define ICMP_REJECT_RST 0x100 /* fake ICMP code (send a TCP RST) */ #define ICMP6_UNREACH_RST 0x100 /* fake ICMPv6 code (send a TCP RST) */ /* * These are used for lookup tables. */ typedef struct _ipfw_table_entry { in_addr_t addr; /* network address */ u_int32_t value; /* value */ u_int16_t tbl; /* table number */ u_int8_t masklen; /* mask length */ } ipfw_table_entry; typedef struct _ipfw_table { u_int32_t size; /* size of entries in bytes */ u_int32_t cnt; /* # of entries */ u_int16_t tbl; /* table number */ ipfw_table_entry ent[0]; /* entries */ } ipfw_table; /* * Main firewall chains definitions and global var's definitions. */ #ifdef _KERNEL #define MTAG_IPFW 1148380143 /* IPFW-tagged cookie */ /* Return values from ipfw_chk() */ enum { IP_FW_PASS = 0, IP_FW_DENY, IP_FW_DIVERT, IP_FW_TEE, IP_FW_DUMMYNET, IP_FW_NETGRAPH, IP_FW_NGTEE, IP_FW_NAT, IP_FW_REASS, }; /* flags for divert mtag */ #define IP_FW_DIVERT_LOOPBACK_FLAG 0x00080000 #define IP_FW_DIVERT_OUTPUT_FLAG 0x00100000 /* * Structure for collecting parameters to dummynet for ip6_output forwarding */ struct _ip6dn_args { struct ip6_pktopts *opt_or; struct route_in6 ro_or; int flags_or; struct ip6_moptions *im6o_or; struct ifnet *origifp_or; struct ifnet *ifp_or; struct sockaddr_in6 dst_or; u_long mtu_or; struct route_in6 ro_pmtu_or; }; /* * Arguments for calling ipfw_chk() and dummynet_io(). We put them * all into a structure because this way it is easier and more * efficient to pass variables around and extend the interface. */ struct ip_fw_args { struct mbuf *m; /* the mbuf chain */ struct ifnet *oif; /* output interface */ struct sockaddr_in *next_hop; /* forward address */ struct ip_fw *rule; /* matching rule */ uint32_t rule_id; /* matching rule id */ uint32_t chain_id; /* ruleset id */ struct ether_header *eh; /* for bridged packets */ struct ipfw_flow_id f_id; /* grabbed from IP header */ uint32_t cookie; /* a cookie depending on rule action */ struct inpcb *inp; struct _ip6dn_args dummypar; /* dummynet->ip6_output */ struct sockaddr_in hopstore; /* store here if cannot use a pointer */ }; /* * Function definitions. */ /* Firewall hooks */ struct sockopt; struct dn_flow_set; int ipfw_check_in(void *, struct mbuf **, struct ifnet *, int, struct inpcb *inp); int ipfw_check_out(void *, struct mbuf **, struct ifnet *, int, struct inpcb *inp); int ipfw_chk(struct ip_fw_args *); int ipfw_hook(void); int ipfw6_hook(void); int ipfw_unhook(void); int ipfw6_unhook(void); #ifdef NOTYET void ipfw_nat_destroy(void); #endif VNET_DECLARE(int, fw_one_pass); VNET_DECLARE(int, fw_enable); #define V_fw_one_pass VNET(fw_one_pass) #define V_fw_enable VNET(fw_enable) #ifdef INET6 VNET_DECLARE(int, fw6_enable); #define V_fw6_enable VNET(fw6_enable) #endif struct ip_fw_chain { struct ip_fw *rules; /* list of rules */ struct ip_fw *default_rule; struct ip_fw *reap; /* list of rules to reap */ LIST_HEAD(, cfg_nat) nat; /* list of nat entries */ struct radix_node_head *tables[IPFW_TABLES_MAX]; #if defined( __linux__ ) || defined( _WIN32 ) spinlock_t rwmtx; #else struct rwlock rwmtx; #endif /* !__linux__ */ uint32_t id; /* ruleset id */ /* * To optimize jumps, we use a table with skipto_entries pointers * (a power of 2, set with a sysctl depending on available memory). * Entry i points to the first rule i*64k/n <= n < (i+1)*64k/n. * On insert/delete we simply update the relevant entry * with O(1) additional cost. Updates to the sysctl variable * that controls the table are managed at the next add/delete. */ int skipto_shift; /* shifts to compute the index in skipto-ptrs */ int skipto_size; /* number of entries in the table */ struct ip_fw **skipto_ptrs; #ifdef IPFW_HASHTABLES struct ipfw_ht *hashtab[IPFW_TABLES_MAX]; #endif }; #ifdef IPFW_INTERNAL #define IPFW_LOCK_INIT(_chain) \ rw_init(&(_chain)->rwmtx, "IPFW static rules") #define IPFW_LOCK_DESTROY(_chain) rw_destroy(&(_chain)->rwmtx) #define IPFW_WLOCK_ASSERT(_chain) rw_assert(&(_chain)->rwmtx, RA_WLOCKED) #define IPFW_RLOCK(p) rw_rlock(&(p)->rwmtx) #define IPFW_RUNLOCK(p) rw_runlock(&(p)->rwmtx) #define IPFW_WLOCK(p) rw_wlock(&(p)->rwmtx) #define IPFW_WUNLOCK(p) rw_wunlock(&(p)->rwmtx) #define LOOKUP_NAT(l, i, p) do { \ LIST_FOREACH((p), &(l.nat), _next) { \ if ((p)->id == (i)) { \ break; \ } \ } \ } while (0) typedef int ipfw_nat_t(struct ip_fw_args *, struct cfg_nat *, struct mbuf *); typedef int ipfw_nat_cfg_t(struct sockopt *); #endif VNET_DECLARE(struct ip_fw_chain, layer3_chain); #define V_layer3_chain VNET(layer3_chain) #endif /* _KERNEL */ #endif /* _IPFW2_H */ ipfw_mod/dummynet/include/netinet/ip_divert.h000644 000423 000000 00000000507 11151122421 022201 0ustar00luigiwheel000000 000000 #ifndef _IP_DIVERT_H #define _IP_DIVERT_H struct mbuf; typedef void ip_divert_packet_t(struct mbuf *, int); extern ip_divert_packet_t *ip_divert_ptr; struct divert_tag { u_int32_t info; /* port & flags */ u_int16_t cookie; /* ipfw rule number */ }; #endif /* !_IP_DIVERT_H */ ipfw_mod/dummynet/include/netinet/tcp.h000644 000423 000000 00000017462 11151122421 021012 0ustar00luigiwheel000000 000000 /*- * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)tcp.h 8.1 (Berkeley) 6/10/93 * $FreeBSD: src/sys/netinet/tcp.h,v 1.40.2.2 2008/07/31 06:10:25 kmacy Exp $ */ #ifndef _NETINET_TCP_H_ #define _NETINET_TCP_H_ #include #define __BSD_VISIBLE 1 #if __BSD_VISIBLE typedef u_int32_t tcp_seq; #define tcp6_seq tcp_seq /* for KAME src sync over BSD*'s */ #define tcp6hdr tcphdr /* for KAME src sync over BSD*'s */ /* * TCP header. * Per RFC 793, September, 1981. */ struct tcphdr { u_short th_sport; /* source port */ u_short th_dport; /* destination port */ tcp_seq th_seq; /* sequence number */ tcp_seq th_ack; /* acknowledgement number */ #if BYTE_ORDER == LITTLE_ENDIAN u_int th_x2:4, /* (unused) */ th_off:4; /* data offset */ #endif #if BYTE_ORDER == BIG_ENDIAN u_int th_off:4, /* data offset */ th_x2:4; /* (unused) */ #endif u_char th_flags; #define TH_FIN 0x01 #define TH_SYN 0x02 #define TH_RST 0x04 #define TH_PUSH 0x08 #define TH_ACK 0x10 #define TH_URG 0x20 #define TH_ECE 0x40 #define TH_CWR 0x80 #define TH_FLAGS (TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG|TH_ECE|TH_CWR) #define PRINT_TH_FLAGS "\20\1FIN\2SYN\3RST\4PUSH\5ACK\6URG\7ECE\10CWR" u_short th_win; /* window */ u_short th_sum; /* checksum */ u_short th_urp; /* urgent pointer */ }; #define TCPOPT_EOL 0 #define TCPOLEN_EOL 1 #define TCPOPT_PAD 0 /* padding after EOL */ #define TCPOLEN_PAD 1 #define TCPOPT_NOP 1 #define TCPOLEN_NOP 1 #define TCPOPT_MAXSEG 2 #define TCPOLEN_MAXSEG 4 #define TCPOPT_WINDOW 3 #define TCPOLEN_WINDOW 3 #define TCPOPT_SACK_PERMITTED 4 #define TCPOLEN_SACK_PERMITTED 2 #define TCPOPT_SACK 5 #define TCPOLEN_SACKHDR 2 #define TCPOLEN_SACK 8 /* 2*sizeof(tcp_seq) */ #define TCPOPT_TIMESTAMP 8 #define TCPOLEN_TIMESTAMP 10 #define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP+2) /* appendix A */ #define TCPOPT_SIGNATURE 19 /* Keyed MD5: RFC 2385 */ #define TCPOLEN_SIGNATURE 18 /* Miscellaneous constants */ #define MAX_SACK_BLKS 6 /* Max # SACK blocks stored at receiver side */ #define TCP_MAX_SACK 4 /* MAX # SACKs sent in any segment */ /* * Default maximum segment size for TCP. * With an IP MTU of 576, this is 536, * but 512 is probably more convenient. * This should be defined as MIN(512, IP_MSS - sizeof (struct tcpiphdr)). */ #define TCP_MSS 512 /* * TCP_MINMSS is defined to be 216 which is fine for the smallest * link MTU (256 bytes, AX.25 packet radio) in the Internet. * However it is very unlikely to come across such low MTU interfaces * these days (anno dato 2003). * See tcp_subr.c tcp_minmss SYSCTL declaration for more comments. * Setting this to "0" disables the minmss check. */ #define TCP_MINMSS 216 /* * Default maximum segment size for TCP6. * With an IP6 MSS of 1280, this is 1220, * but 1024 is probably more convenient. (xxx kazu in doubt) * This should be defined as MIN(1024, IP6_MSS - sizeof (struct tcpip6hdr)) */ #define TCP6_MSS 1024 #define TCP_MAXWIN 65535 /* largest value for (unscaled) window */ #define TTCP_CLIENT_SND_WND 4096 /* dflt send window for T/TCP client */ #define TCP_MAX_WINSHIFT 14 /* maximum window shift */ #define TCP_MAXBURST 4 /* maximum segments in a burst */ #define TCP_MAXHLEN (0xf<<2) /* max length of header in bytes */ #define TCP_MAXOLEN (TCP_MAXHLEN - sizeof(struct tcphdr)) /* max space left for options */ #endif /* __BSD_VISIBLE */ /* * User-settable options (used with setsockopt). */ #define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */ #if __BSD_VISIBLE #define TCP_MAXSEG 0x02 /* set maximum segment size */ #define TCP_NOPUSH 0x04 /* don't push last block of write */ #define TCP_NOOPT 0x08 /* don't use TCP options */ #define TCP_MD5SIG 0x10 /* use MD5 digests (RFC2385) */ #define TCP_INFO 0x20 /* retrieve tcp_info structure */ #define TCP_CONGESTION 0x40 /* get/set congestion control algorithm */ #define TCP_CA_NAME_MAX 16 /* max congestion control name length */ #define TCPI_OPT_TIMESTAMPS 0x01 #define TCPI_OPT_SACK 0x02 #define TCPI_OPT_WSCALE 0x04 #define TCPI_OPT_ECN 0x08 #define TCPI_OPT_TOE 0x10 /* * The TCP_INFO socket option comes from the Linux 2.6 TCP API, and permits * the caller to query certain information about the state of a TCP * connection. We provide an overlapping set of fields with the Linux * implementation, but since this is a fixed size structure, room has been * left for growth. In order to maximize potential future compatibility with * the Linux API, the same variable names and order have been adopted, and * padding left to make room for omitted fields in case they are added later. * * XXX: This is currently an unstable ABI/API, in that it is expected to * change. */ struct tcp_info { u_int8_t tcpi_state; /* TCP FSM state. */ u_int8_t __tcpi_ca_state; u_int8_t __tcpi_retransmits; u_int8_t __tcpi_probes; u_int8_t __tcpi_backoff; u_int8_t tcpi_options; /* Options enabled on conn. */ u_int8_t tcpi_snd_wscale:4, /* RFC1323 send shift value. */ tcpi_rcv_wscale:4; /* RFC1323 recv shift value. */ u_int32_t __tcpi_rto; u_int32_t __tcpi_ato; u_int32_t __tcpi_snd_mss; u_int32_t __tcpi_rcv_mss; u_int32_t __tcpi_unacked; u_int32_t __tcpi_sacked; u_int32_t __tcpi_lost; u_int32_t __tcpi_retrans; u_int32_t __tcpi_fackets; /* Times; measurements in usecs. */ u_int32_t __tcpi_last_data_sent; u_int32_t __tcpi_last_ack_sent; /* Also unimpl. on Linux? */ u_int32_t __tcpi_last_data_recv; u_int32_t __tcpi_last_ack_recv; /* Metrics; variable units. */ u_int32_t __tcpi_pmtu; u_int32_t __tcpi_rcv_ssthresh; u_int32_t tcpi_rtt; /* Smoothed RTT in usecs. */ u_int32_t tcpi_rttvar; /* RTT variance in usecs. */ u_int32_t tcpi_snd_ssthresh; /* Slow start threshold. */ u_int32_t tcpi_snd_cwnd; /* Send congestion window. */ u_int32_t __tcpi_advmss; u_int32_t __tcpi_reordering; u_int32_t __tcpi_rcv_rtt; u_int32_t tcpi_rcv_space; /* Advertised recv window. */ /* FreeBSD extensions to tcp_info. */ u_int32_t tcpi_snd_wnd; /* Advertised send window. */ u_int32_t tcpi_snd_bwnd; /* Bandwidth send window. */ u_int32_t tcpi_snd_nxt; /* Next egress seqno */ u_int32_t tcpi_rcv_nxt; /* Next ingress seqno */ u_int32_t tcpi_toe_tid; /* HWTID for TOE endpoints */ /* Padding to grow without breaking ABI. */ u_int32_t __tcpi_pad[29]; /* Padding. */ }; #endif #endif /* !_NETINET_TCP_H_ */ ipfw_mod/dummynet/include/netinet/ip.h000644 000423 000000 00000003137 11152004450 020630 0ustar00luigiwheel000000 000000 #ifndef _NETINET_IP_H_ #define _NETINET_IP_H_ #define LITTLE_ENDIAN 1234 #define BIG_ENDIAN 4321 #if defined(__BIG_ENDIAN) #error we are in bigendian #elif defined(__LITTLE_ENDIAN) //#warning we are in littleendian #define BYTE_ORDER LITTLE_ENDIAN #else #error no platform #endif /* XXX endiannes doesn't belong here */ // #define LITTLE_ENDIAN 1234 // #define BIG_ENDIAN 4321 // #define BYTE_ORDER LITTLE_ENDIAN /* * Structure of an internet header, naked of options. */ struct ip { #if BYTE_ORDER == LITTLE_ENDIAN u_int ip_hl:4, /* header length */ ip_v:4; /* version */ #endif #if BYTE_ORDER == BIG_ENDIAN u_int ip_v:4, /* version */ ip_hl:4; /* header length */ #endif u_char ip_tos; /* type of service */ u_short ip_len; /* total length */ u_short ip_id; /* identification */ u_short ip_off; /* fragment offset field */ #define IP_RF 0x8000 /* reserved fragment flag */ #define IP_DF 0x4000 /* dont fragment flag */ #define IP_MF 0x2000 /* more fragments flag */ #define IP_OFFMASK 0x1fff /* mask for fragmenting bits */ u_char ip_ttl; /* time to live */ u_char ip_p; /* protocol */ u_short ip_sum; /* checksum */ struct in_addr ip_src,ip_dst; /* source and dest address */ } __packed __aligned(4); #endif /* _NETINET_IP_H_ */ ipfw_mod/dummynet/include/netinet/udp.h000644 000423 000000 00000003736 11151122421 021013 0ustar00luigiwheel000000 000000 /*- * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)udp.h 8.1 (Berkeley) 6/10/93 * $FreeBSD: src/sys/netinet/udp.h,v 1.10 2007/02/20 10:13:11 rwatson Exp $ */ #ifndef _NETINET_UDP_H_ #define _NETINET_UDP_H_ /* * UDP protocol header. * Per RFC 768, September, 1981. */ struct udphdr { u_short uh_sport; /* source port */ u_short uh_dport; /* destination port */ u_short uh_ulen; /* udp length */ u_short uh_sum; /* udp checksum */ }; #endif ipfw_mod/dummynet/include/net/radix.h000644 000423 000000 00000014437 11311370776 020472 0ustar00luigiwheel000000 000000 /*- * Copyright (c) 1988, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)radix.h 8.2 (Berkeley) 10/31/94 * $FreeBSD: head/sys/net/radix.h 185747 2008-12-07 21:15:43Z kmacy $ */ #ifndef _RADIX_H_ #define _RADIX_H_ #ifdef _KERNEL #include #include #include #endif #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_RTABLE); #endif /* * Radix search tree node layout. */ struct radix_node { struct radix_mask *rn_mklist; /* list of masks contained in subtree */ struct radix_node *rn_parent; /* parent */ short rn_bit; /* bit offset; -1-index(netmask) */ char rn_bmask; /* node: mask for bit test*/ u_char rn_flags; /* enumerated next */ #define RNF_NORMAL 1 /* leaf contains normal route */ #define RNF_ROOT 2 /* leaf is root leaf for tree */ #define RNF_ACTIVE 4 /* This node is alive (for rtfree) */ union { struct { /* leaf only data: */ caddr_t rn_Key; /* object of search */ caddr_t rn_Mask; /* netmask, if present */ struct radix_node *rn_Dupedkey; } rn_leaf; struct { /* node only data: */ int rn_Off; /* where to start compare */ struct radix_node *rn_L;/* progeny */ struct radix_node *rn_R;/* progeny */ } rn_node; } rn_u; #ifdef RN_DEBUG int rn_info; struct radix_node *rn_twin; struct radix_node *rn_ybro; #endif }; #define rn_dupedkey rn_u.rn_leaf.rn_Dupedkey #define rn_key rn_u.rn_leaf.rn_Key #define rn_mask rn_u.rn_leaf.rn_Mask #define rn_offset rn_u.rn_node.rn_Off #define rn_left rn_u.rn_node.rn_L #define rn_right rn_u.rn_node.rn_R /* * Annotations to tree concerning potential routes applying to subtrees. */ struct radix_mask { short rm_bit; /* bit offset; -1-index(netmask) */ char rm_unused; /* cf. rn_bmask */ u_char rm_flags; /* cf. rn_flags */ struct radix_mask *rm_mklist; /* more masks to try */ union { caddr_t rmu_mask; /* the mask */ struct radix_node *rmu_leaf; /* for normal routes */ } rm_rmu; int rm_refs; /* # of references to this struct */ }; #define rm_mask rm_rmu.rmu_mask #define rm_leaf rm_rmu.rmu_leaf /* extra field would make 32 bytes */ typedef int walktree_f_t(struct radix_node *, void *); struct radix_node_head { struct radix_node *rnh_treetop; struct radix_node *(*rnh_addaddr) /* add based on sockaddr */ (void *v, void *mask, struct radix_node_head *head, struct radix_node nodes[]); struct radix_node *(*rnh_deladdr) /* remove based on sockaddr */ (void *v, void *mask, struct radix_node_head *head); struct radix_node *(*rnh_matchaddr) /* locate based on sockaddr */ (void *v, struct radix_node_head *head); struct radix_node *(*rnh_lookup) /* locate based on sockaddr */ (void *v, void *mask, struct radix_node_head *head); int (*rnh_walktree) /* traverse tree */ (struct radix_node_head *head, walktree_f_t *f, void *w); int (*rnh_walktree_from) /* traverse tree below a */ (struct radix_node_head *head, void *a, void *m, walktree_f_t *f, void *w); void (*rnh_close) /* do something when the last ref drops */ (struct radix_node *rn, struct radix_node_head *head); struct radix_node rnh_nodes[3]; /* empty tree for common case */ int rnh_multipath; /* multipath capable ? */ #ifdef _KERNEL #if defined( __linux__ ) || defined( _WIN32 ) spinlock_t rnh_lock; #else struct rwlock rnh_lock; /* locks entire radix tree */ #endif /* !__linux__ */ #endif }; #ifndef _KERNEL #define R_Malloc(p, t, n) (p = (t) malloc((unsigned int)(n))) #define R_Zalloc(p, t, n) (p = (t) calloc(1,(unsigned int)(n))) #define Free(p) free((char *)p); #else #define R_Malloc(p, t, n) (p = (t) malloc((unsigned long)(n), M_RTABLE, M_NOWAIT)) #define R_Zalloc(p, t, n) (p = (t) malloc((unsigned long)(n), M_RTABLE, M_NOWAIT | M_ZERO)) #define Free(p) free((caddr_t)p, M_RTABLE); #define RADIX_NODE_HEAD_LOCK_INIT(rnh) \ rw_init_flags(&(rnh)->rnh_lock, "radix node head", 0) #define RADIX_NODE_HEAD_LOCK(rnh) rw_wlock(&(rnh)->rnh_lock) #define RADIX_NODE_HEAD_UNLOCK(rnh) rw_wunlock(&(rnh)->rnh_lock) #define RADIX_NODE_HEAD_RLOCK(rnh) rw_rlock(&(rnh)->rnh_lock) #define RADIX_NODE_HEAD_RUNLOCK(rnh) rw_runlock(&(rnh)->rnh_lock) #define RADIX_NODE_HEAD_LOCK_TRY_UPGRADE(rnh) rw_try_upgrade(&(rnh)->rnh_lock) #define RADIX_NODE_HEAD_DESTROY(rnh) rw_destroy(&(rnh)->rnh_lock) #define RADIX_NODE_HEAD_LOCK_ASSERT(rnh) rw_assert(&(rnh)->rnh_lock, RA_LOCKED) #define RADIX_NODE_HEAD_WLOCK_ASSERT(rnh) rw_assert(&(rnh)->rnh_lock, RA_WLOCKED) #endif /* _KERNEL */ void rn_init(int); int rn_inithead(void **, int); int rn_refines(void *, void *); struct radix_node *rn_addmask(void *, int, int), *rn_addroute (void *, void *, struct radix_node_head *, struct radix_node [2]), *rn_delete(void *, void *, struct radix_node_head *), *rn_lookup (void *v_arg, void *m_arg, struct radix_node_head *head), *rn_match(void *, struct radix_node_head *); #endif /* _RADIX_H_ */ ipfw_mod/dummynet/include/net/pfil.h000644 000423 000000 00000007470 11157432360 020310 0ustar00luigiwheel000000 000000 /* $FreeBSD: src/sys/net/pfil.h,v 1.16 2007/06/08 12:43:25 gallatin Exp $ */ /* $NetBSD: pfil.h,v 1.22 2003/06/23 12:57:08 martin Exp $ */ /*- * Copyright (c) 1996 Matthew R. Green * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _NET_PFIL_H_ #define _NET_PFIL_H_ #include #include #include #include #include #include struct mbuf; struct ifnet; struct inpcb; /* * The packet filter hooks are designed for anything to call them to * possibly intercept the packet. */ struct packet_filter_hook { TAILQ_ENTRY(packet_filter_hook) pfil_link; int (*pfil_func)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *); void *pfil_arg; int pfil_flags; }; #define PFIL_IN 0x00000001 #define PFIL_OUT 0x00000002 #define PFIL_WAITOK 0x00000004 #define PFIL_ALL (PFIL_IN|PFIL_OUT) typedef TAILQ_HEAD(pfil_list, packet_filter_hook) pfil_list_t; #define PFIL_TYPE_AF 1 /* key is AF_* type */ #define PFIL_TYPE_IFNET 2 /* key is ifnet pointer */ struct pfil_head { pfil_list_t ph_in; pfil_list_t ph_out; int ph_type; int ph_nhooks; #if defined( __linux__ ) || defined( _WIN32 ) rwlock_t ph_mtx; #else struct rwlock ph_mtx; #endif union { u_long phu_val; void *phu_ptr; } ph_un; #define ph_af ph_un.phu_val #define ph_ifnet ph_un.phu_ptr LIST_ENTRY(pfil_head) ph_list; }; int pfil_run_hooks(struct pfil_head *, struct mbuf **, struct ifnet *, int, struct inpcb *inp); int pfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *), void *, int, struct pfil_head *); int pfil_remove_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *), void *, int, struct pfil_head *); int pfil_head_register(struct pfil_head *); int pfil_head_unregister(struct pfil_head *); struct pfil_head *pfil_head_get(int, u_long); #define PFIL_HOOKED(p) ((p)->ph_nhooks > 0) #define PFIL_RLOCK(p) rw_rlock(&(p)->ph_mtx) #define PFIL_WLOCK(p) rw_wlock(&(p)->ph_mtx) #define PFIL_RUNLOCK(p) rw_runlock(&(p)->ph_mtx) #define PFIL_WUNLOCK(p) rw_wunlock(&(p)->ph_mtx) #define PFIL_LIST_LOCK() mtx_lock(&pfil_global_lock) #define PFIL_LIST_UNLOCK() mtx_unlock(&pfil_global_lock) static __inline struct packet_filter_hook * pfil_hook_get(int dir, struct pfil_head *ph) { if (dir == PFIL_IN) return (TAILQ_FIRST(&ph->ph_in)); else if (dir == PFIL_OUT) return (TAILQ_FIRST(&ph->ph_out)); else return (NULL); } #endif /* _NET_PFIL_H_ */ ipfw_mod/dummynet/include/net/if.h000644 000423 000000 00000000026 11151122421 017726 0ustar00luigiwheel000000 000000 #include