diff options
Diffstat (limited to 'ecos/packages/net/tcpip/current/src')
60 files changed, 37117 insertions, 0 deletions
diff --git a/ecos/packages/net/tcpip/current/src/ecos/init.cxx b/ecos/packages/net/tcpip/current/src/ecos/init.cxx new file mode 100644 index 0000000..0c2c5f0 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/ecos/init.cxx @@ -0,0 +1,59 @@ +//========================================================================== +// +// ecos/init.cxx +// +// Networking package initializer class +// +//========================================================================== +// ####ECOSPDCOPYRIGHTBEGIN#### +// ------------------------------------------- +// This file is part of eCos, the Embedded Configurable Operating System. +// Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc. +// +// Permission is granted to use, copy, modify and redistribute this +// file. +// +// ------------------------------------------- +// ####ECOSPDCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +// Network initialization + +#include <pkgconf/system.h> +#include <pkgconf/hal.h> +#include <cyg/infra/cyg_type.h> + +#define NET_INIT CYGBLD_ATTRIB_INIT_AFTER(CYG_INIT_LIBC) + +// This is a dummy class just so we can execute the network package +// initialization at it's proper priority + +externC void cyg_net_init(void); + +class net_init_class { +public: + net_init_class(void) { + cyg_net_init(); + } +}; + +// And here's an instance of the class just to make the code run +static net_init_class _net_init NET_INIT; + +externC void +cyg_do_net_init(void) +{ +} diff --git a/ecos/packages/net/tcpip/current/src/ecos/support.c b/ecos/packages/net/tcpip/current/src/ecos/support.c new file mode 100644 index 0000000..4bce185 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/ecos/support.c @@ -0,0 +1,728 @@ +//========================================================================== +// +// ecos/support.c +// +// eCos wrapper and support functions +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas, hmt +// Contributors: gthomas, hmt +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +// Support routines, etc., used by network code + +#include <sys/param.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/kernel.h> +#include <sys/domain.h> +#include <sys/protosw.h> +#include <sys/sockio.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <net/if.h> +#include <net/route.h> +#include <net/netisr.h> +#include <netinet/in.h> +#include <netinet/in_var.h> +#include <arpa/inet.h> + +#include <machine/cpu.h> + +#include <pkgconf/net.h> + +#include <cyg/infra/diag.h> +#include <cyg/hal/hal_intr.h> +#include <cyg/kernel/kapi.h> + +#include <cyg/infra/cyg_ass.h> + +#if !CYGPKG_NET_DRIVER_FRAMEWORK // Interface +#error At least one network driver framework must be defined! +#else +#include <cyg/io/eth/netdev.h> + +// Define table boundaries +CYG_HAL_TABLE_BEGIN( __NETDEVTAB__, netdev ); +CYG_HAL_TABLE_END( __NETDEVTAB_END__, netdev ); + +// Used for system-wide "ticks per second" +int hz = 100; +int tick = 10000; // usec per "tick" + +volatile struct timeval mono_time; +volatile struct timeval ktime; + +// Low-level network debugging +int net_debug = 0; + +#define STACK_SIZE CYGNUM_HAL_STACK_SIZE_TYPICAL +static char netint_stack[STACK_SIZE]; +static cyg_thread netint_thread_data; +static cyg_handle_t netint_thread_handle; + +cyg_flag_t netint_flags; +#define NETISR_ANY 0xFFFFFFFF // Any possible bit... + +extern void cyg_test_exit(void); // TEMP +void +cyg_panic(const char *msg, ...) +{ + cyg_uint32 old_ints; + CYG_FAIL( msg ); + HAL_DISABLE_INTERRUPTS(old_ints); + diag_printf("PANIC: %s\n", msg); + cyg_test_exit(); // FIXME +} + + +// Round a number 'n' up to a multiple of 'm' +#define round(n,m) ((((n)+((m)-1))/(m))*(m)) + +#define NET_MEMPOOL_SIZE round(CYGPKG_NET_MEM_USAGE/4,MSIZE) +#define NET_MBUFS_SIZE round(CYGPKG_NET_MEM_USAGE/4,MSIZE) +#define NET_CLUSTERS_SIZE round(CYGPKG_NET_MEM_USAGE/2,MCLBYTES) + +static unsigned char net_mempool_area[NET_MEMPOOL_SIZE]; +static cyg_mempool_var net_mem_pool; +static cyg_handle_t net_mem; +static unsigned char net_mbufs_area[NET_MBUFS_SIZE]; +static cyg_mempool_fix net_mbufs_pool; +static cyg_handle_t net_mbufs; +static unsigned char net_clusters_area[NET_CLUSTERS_SIZE]; +static cyg_mempool_fix net_clusters_pool; +static cyg_handle_t net_clusters; +static char net_clusters_refcnt[(NET_CLUSTERS_SIZE/MCLBYTES)+1]; + +#ifdef CYGDBG_NET_TIMING_STATS +static struct net_stats stats_malloc, stats_free, + stats_memcpy, stats_memset, + stats_mbuf_alloc, stats_mbuf_free, stats_cluster_alloc; +extern struct net_stats stats_in_cksum; + +// Display a number of ticks as microseconds +// Note: for improved calculation significance, values are kept in ticks*1000 +static long rtc_resolution[] = CYGNUM_KERNEL_COUNTERS_RTC_RESOLUTION; +static long ns_per_system_clock; + +static void +show_ticks_in_us(cyg_uint32 ticks) +{ + long long ns; + ns_per_system_clock = 1000000/rtc_resolution[1]; + ns = (ns_per_system_clock * ((long long)ticks * 1000)) / + CYGNUM_KERNEL_COUNTERS_RTC_PERIOD; + ns += 5; // for rounding to .01us + diag_printf("%7d.%02d", (int)(ns/1000), (int)((ns%1000)/10)); +} + +void +show_net_stats(struct net_stats *stats, const char *title) +{ + int ave; + ave = stats->total_time / stats->count; + diag_printf("%s:\n", title); + diag_printf(" count: %6d", stats->count); + diag_printf(", min: "); + show_ticks_in_us(stats->min_time); + diag_printf(", max: "); + show_ticks_in_us(stats->max_time); + diag_printf(", total: "); + show_ticks_in_us(stats->total_time); + diag_printf(", ave: "); + show_ticks_in_us(ave); + diag_printf("\n"); + // Reset stats + memset(stats, 0, sizeof(*stats)); +} + +void +show_net_times(void) +{ + show_net_stats(&stats_malloc, "Net malloc"); + show_net_stats(&stats_free, "Net free"); + show_net_stats(&stats_mbuf_alloc, "Mbuf alloc"); + show_net_stats(&stats_mbuf_free, "Mbuf free"); + show_net_stats(&stats_cluster_alloc, "Cluster alloc"); + show_net_stats(&stats_in_cksum, "Checksum"); + show_net_stats(&stats_memcpy, "Net memcpy"); + show_net_stats(&stats_memset, "Net memset"); +} +#endif /* CYGDBG_NET_TIMING_STATS */ + +void * +cyg_net_malloc(u_long size, int type, int flags) +{ + void *res; + START_STATS(); + if (flags & M_NOWAIT) { + res = cyg_mempool_var_try_alloc(net_mem, size); + } else { + res = cyg_mempool_var_alloc(net_mem, size); + } + FINISH_STATS(stats_malloc); + return (res); +} + +void +cyg_net_free(caddr_t addr, int type) +{ + START_STATS(); + cyg_mempool_var_free(net_mem, addr); + FINISH_STATS(stats_free); +} + +void * +cyg_net_mbuf_alloc(int type, int flags) +{ + void *res; + START_STATS(); + mbstat.m_mbufs++; + if (flags & M_NOWAIT) { + res = cyg_mempool_fix_try_alloc(net_mbufs); + } else { + res = cyg_mempool_fix_alloc(net_mbufs); + } + FINISH_STATS(stats_mbuf_alloc); + // Check that this nastiness works OK + CYG_ASSERT( dtom(res) == res, "dtom failed, base of mbuf" ); + CYG_ASSERT( dtom((char *)res + MSIZE/2) == res, "dtom failed, mid mbuf" ); + return (res); +} + +void +cyg_net_mbuf_free(caddr_t addr, int type) +{ + START_STATS(); + mbstat.m_mbufs--; + cyg_mempool_fix_free(net_mbufs, addr); + FINISH_STATS(stats_mbuf_free); +} + +void * +cyg_net_cluster_alloc(void) +{ + void *res; + START_STATS(); + res = cyg_mempool_fix_try_alloc(net_clusters); + FINISH_STATS(stats_cluster_alloc); + return res; +} + +static void +cyg_kmem_init(void) +{ + unsigned char *p; +#ifdef CYGPKG_NET_DEBUG + diag_printf("Network stack using %d bytes for misc space\n", NET_MEMPOOL_SIZE); + diag_printf(" %d bytes for mbufs\n", NET_MBUFS_SIZE); + diag_printf(" %d bytes for mbuf clusters\n", NET_CLUSTERS_SIZE); +#endif + cyg_mempool_var_create(&net_mempool_area, + NET_MEMPOOL_SIZE, + &net_mem, + &net_mem_pool); + // Align the mbufs on MSIZE boudaries so that dtom() can work. + p = (unsigned char *)(((long)(&net_mbufs_area) + MSIZE - 1) & ~(MSIZE-1)); + cyg_mempool_fix_create(p, + ((&(net_mbufs_area[NET_MBUFS_SIZE])) - p) & ~(MSIZE-1), + MSIZE, + &net_mbufs, + &net_mbufs_pool); + cyg_mempool_fix_create(&net_clusters_area, + NET_CLUSTERS_SIZE, + MCLBYTES, + &net_clusters, + &net_clusters_pool); + mbutl = (struct mbuf *)&net_clusters_area; + mclrefcnt = net_clusters_refcnt; +} + +void cyg_kmem_print_stats( void ) +{ + cyg_mempool_info info; + + diag_printf( "Network stack mbuf stats:\n" ); + diag_printf( " mbufs %d, clusters %d, free clusters %d\n", + mbstat.m_mbufs, /* mbufs obtained from page pool */ + mbstat.m_clusters, /* clusters obtained from page pool */ + /* mbstat.m_spare, */ /* spare field */ + mbstat.m_clfree /* free clusters */ + ); + diag_printf( " Failed to get %d times\n" + " Waited to get %d times\n" + " Drained queues to get %d times\n", + mbstat.m_drops, /* times failed to find space */ + mbstat.m_wait, /* times waited for space */ + mbstat.m_drain /* times drained protocols for space */ + /* mbstat.m_mtypes[256]; type specific mbuf allocations */ + ); + + cyg_mempool_var_get_info( net_mem, &info ); + diag_printf( "Misc mpool: total %7d, free %7d, max free block %d\n", + info.totalmem, + info.freemem, + info.maxfree + ); + + cyg_mempool_fix_get_info( net_mbufs, &info ); + diag_printf( "Mbufs pool: total %7d, free %7d, blocksize %4d\n", + info.totalmem, + info.freemem, + info.blocksize + ); + + + cyg_mempool_fix_get_info( net_clusters, &info ); + diag_printf( "Clust pool: total %7d, free %7d, blocksize %4d\n", + info.totalmem, + info.freemem, + info.blocksize + ); +} + +// This API is for our own automated network tests. It's not in any header +// files because it's not at all supported. +int cyg_net_get_mem_stats( int which, cyg_mempool_info *p ) +{ + CYG_CHECK_DATA_PTR( p, "Bad pointer to mempool_info" ); + CYG_ASSERT( 0 <= which, "Mempool selector underflow" ); + CYG_ASSERT( 2 >=which, "Mempool selector overflow" ); + + if ( p ) + switch ( which ) { + case 0: + cyg_mempool_var_get_info( net_mem, p ); + break; + case 1: + cyg_mempool_fix_get_info( net_mbufs, p ); + break; + case 2: + cyg_mempool_fix_get_info( net_clusters, p ); + break; + default: + return 0; + } + return (int)p; +} + +int +cyg_mtocl(u_long x) +{ + int res; + res = (((u_long)(x) - (u_long)mbutl) >> MCLSHIFT); + return res; +} + +struct mbuf * +cyg_cltom(u_long x) +{ + struct mbuf *res; + res = (struct mbuf *)((caddr_t)((u_long)mbutl + ((u_long)(x) << MCLSHIFT))); + return res; +} + +externC void +net_memcpy(void *d, void *s, int n) +{ + START_STATS(); + memcpy(d, s, n); + FINISH_STATS(stats_memcpy); +} + +externC void +net_memset(void *s, int v, int n) +{ + START_STATS(); + memset(s, v, n); + FINISH_STATS(stats_memset); +} + +// Rather than bring in the whole BSD 'random' code... +int +arc4random(void) +{ + cyg_uint32 res; + static unsigned long seed = 0xDEADB00B; + HAL_CLOCK_READ(&res); // Not so bad... (but often 0..N where N is small) + seed = ((seed & 0x007F00FF) << 7) ^ + ((seed & 0x0F80FF00) >> 8) ^ // be sure to stir those low bits + (res << 13) ^ (res >> 9); // using the clock too! + return (int)seed; +} + +void +get_random_bytes(void *buf, size_t len) +{ + unsigned long ranbuf, *lp; + lp = (unsigned long *)buf; + while (len > 0) { + ranbuf = arc4random(); + *lp++ = ranbuf; + len -= sizeof(ranbuf); + } +} + +void +microtime(struct timeval *tp) +{ + panic("microtime"); +} + +void +get_mono_time(void) +{ + panic("get_mono_time"); +} + +void +csignal(pid_t pgid, int signum, uid_t uid, uid_t euid) +{ + panic("csignal"); +} + +int +bcmp(const void *_p1, const void *_p2, size_t len) +{ + int res = 0; + unsigned char *p1 = (unsigned char *)_p1; + unsigned char *p2 = (unsigned char *)_p2; + while (len-- > 0) { + res = *p1++ - *p2++; + if (res) break; + } + return res; +} + +int +copyout(const void *s, void *d, size_t len) +{ + memcpy(d, s, len); + return 0; +} + +int +copyin(const void *s, void *d, size_t len) +{ + memcpy(d, s, len); + return 0; +} + +void +ovbcopy(const void *s, void *d, size_t len) +{ + memcpy(d, s, len); +} + +// ------------------------------------------------------------------------ +// THE NETWORK THREAD ITSELF +// +// Network software interrupt handler +// This function is run as a separate thread to allow +// processing of network events (mostly incoming packets) +// at "user level" instead of at interrupt time. +// +static void +cyg_netint(cyg_addrword_t param) +{ + cyg_flag_value_t curisr; + int spl; + while (true) { + curisr = cyg_flag_wait(&netint_flags, NETISR_ANY, + CYG_FLAG_WAITMODE_OR|CYG_FLAG_WAITMODE_CLR); + spl = splsoftnet(); // Prevent any overlapping "stack" processing +#ifdef INET + if (curisr & (1 << NETISR_ARP)) { + // Pending ARP requests + arpintr(); + } + if (curisr & (1 << NETISR_IP)) { + // Pending IPv4 input + ipintr(); + } +#endif +#ifdef INET6 + if (curisr & (1 << NETISR_IPV6)) { + // Pending IPv6 input + ip6intr(); + } +#endif +#if NBRIDGE > 0 + if (curisr & (1 << NETISR_BRIDGE)) { + // Pending bridge input + bridgeintr(); + } +#endif + splx(spl); + } +} + + +// This just sets one of the pseudo-ISR bits used above. +void +setsoftnet(void) +{ + // This is called if we are out of MBUFs - it doesn't do anything, and + // that situation is handled OK, so don't bother with the diagnostic: + + // diag_printf("setsoftnet\n"); + + // No need to do this because it is ignored anyway: + // schednetisr(NETISR_SOFTNET); +} + + +/* Update the kernel globel ktime. */ +static void +cyg_ktime_func(cyg_handle_t alarm,cyg_addrword_t data) +{ + cyg_tick_count_t now = cyg_current_time(); + + ktime.tv_usec = (now % hz) * tick; + ktime.tv_sec = 1 + now / hz; +} + +static void +cyg_ktime_init(void) +{ + cyg_handle_t ktime_alarm_handle; + static cyg_alarm ktime_alarm; + cyg_handle_t counter; + + // Do not start at 0 - net stack thinks 0 an invalid time; + // Have a valid time available from right now: + ktime.tv_usec = 0; + ktime.tv_sec = 1; + + cyg_clock_to_counter(cyg_real_time_clock(),&counter); + cyg_alarm_create(counter, + cyg_ktime_func, + 0, + &ktime_alarm_handle, + &ktime_alarm); + + /* We want one alarm every 10ms. */ + cyg_alarm_initialize(ktime_alarm_handle,cyg_current_time()+1,1); + cyg_alarm_enable(ktime_alarm_handle); +} + +// +// Network initialization +// This function is called during system initialization to setup the whole +// networking environment. + +// Linker magic to execute this function as 'init' +extern void cyg_do_net_init(void); + +extern void ifinit(void); +extern void loopattach(int); +extern void bridgeattach(int); + +// Internal init functions: +extern void cyg_alarm_timeout_init(void); +extern void cyg_tsleep_init(void); + +void +cyg_net_init(void) +{ + static int _init = false; + cyg_netdevtab_entry_t *t; + + if (_init) return; + + cyg_do_net_init(); // Just forces linking in the initializer/constructor + // Initialize interrupt "flags" + cyg_flag_init(&netint_flags); + // Initialize timeouts and net service thread (pseudo-DSRs) + cyg_alarm_timeout_init(); + // Initialize tsleep/wakeup support + cyg_tsleep_init(); + // Initialize network memory system + cyg_kmem_init(); + mbinit(); + cyg_ktime_init(); + + // Create network background thread + cyg_thread_create(CYGPKG_NET_THREAD_PRIORITY, // Priority + cyg_netint, // entry + 0, // entry parameter + "Network support", // Name + &netint_stack[0], // Stack + STACK_SIZE, // Size + &netint_thread_handle, // Handle + &netint_thread_data // Thread data structure + ); + cyg_thread_resume(netint_thread_handle); // Start it + + // Initialize all network devices + for (t = &__NETDEVTAB__[0]; t != &__NETDEVTAB_END__; t++) { +// diag_printf("Init device '%s'\n", t->name); + if (t->init(t)) { + t->status = CYG_NETDEVTAB_STATUS_AVAIL; + } else { + // What to do if device init fails? + t->status = 0; // Device not [currently] available + } + } + // And attach the loopback interface +#ifdef CYGPKG_NET_NLOOP +#if 0 < CYGPKG_NET_NLOOP + loopattach(0); +#endif +#endif +#if NBRIDGE > 0 + bridgeattach(0); +#endif + // Start up the network processing + ifinit(); + domaininit(); + + // Done + _init = true; +} + +// Copyright (C) 2002 Gary Thomas + +#include <net/if.h> +#include <net/route.h> +#include <net/netdb.h> +externC void if_indextoname(int indx, char *buf, int len); + +typedef void pr_fun(char *fmt, ...); + +static void +_mask(struct sockaddr *sa, char *buf, int _len) +{ + char *cp = ((char *)sa) + 4; + int len = sa->sa_len - 4; + int tot = 0; + + while (len-- > 0) { + if (tot) *buf++ = '.'; + buf += diag_sprintf(buf, "%d", *cp++); + tot++; + } + + while (tot < 4) { + if (tot) *buf++ = '.'; + buf += diag_sprintf(buf, "%d", 0); + tot++; + } +} + +static void +_show_ifp(struct ifnet *ifp, pr_fun *pr) +{ + struct ifaddr *ifa; + char addr[64], netmask[64], broadcast[64]; + + (*pr)("%-8s", ifp->if_xname); + TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { + if (ifa->ifa_addr->sa_family != AF_LINK) { + getnameinfo (ifa->ifa_addr, ifa->ifa_addr->sa_len, addr, sizeof(addr), 0, 0, 0); + getnameinfo (ifa->ifa_dstaddr, ifa->ifa_dstaddr->sa_len, broadcast, sizeof(broadcast), 0, 0, 0); + _mask(ifa->ifa_netmask, netmask, 64); + (*pr)("IP: %s, Broadcast: %s, Netmask: %s\n", addr, broadcast, netmask); + (*pr)(" "); + if ((ifp->if_flags & IFF_UP)) (*pr)("UP "); + if ((ifp->if_flags & IFF_BROADCAST)) (*pr)("BROADCAST "); + if ((ifp->if_flags & IFF_LOOPBACK)) (*pr)("LOOPBACK "); + if ((ifp->if_flags & IFF_RUNNING)) (*pr)("RUNNING "); + if ((ifp->if_flags & IFF_PROMISC)) (*pr)("PROMISC "); + if ((ifp->if_flags & IFF_MULTICAST)) (*pr)("MULTICAST "); + if ((ifp->if_flags & IFF_ALLMULTI)) (*pr)("ALLMULTI "); + (*pr)("MTU: %d, Metric: %d\n", ifp->if_mtu, ifp->if_metric); + (*pr)(" Rx - Packets: %d, Bytes: %d", ifp->if_data.ifi_ipackets, ifp->if_data.ifi_ibytes); + (*pr)(", Tx - Packets: %d, Bytes: %d\n", ifp->if_data.ifi_opackets, ifp->if_data.ifi_obytes); + } + } +} + +static int +_dumpentry(struct radix_node *rn, void *vw) +{ + struct rtentry *rt = (struct rtentry *)rn; + struct sockaddr *dst, *gate, *netmask, *genmask; + char addr[32], *cp; + pr_fun *pr = (pr_fun *)vw; + + dst = rt_key(rt); + gate = rt->rt_gateway; + netmask = rt_mask(rt); + genmask = rt->rt_genmask; + if ((rt->rt_flags & (RTF_UP | RTF_LLINFO)) == RTF_UP) { + if (netmask == NULL) { + return 0; + } + _inet_ntop(dst, addr, sizeof(addr)); + (*pr)("%-15s ", addr); + if (gate != NULL) { + _inet_ntop(gate, addr, sizeof(addr)); + (*pr)("%-15s ", addr); + } else { + (*pr)("%-15s ", " "); + } + if (netmask != NULL) { + _mask(netmask, addr, sizeof(addr)); + (*pr)("%-15s ", addr); + } else { + (*pr)("%-15s ", " "); + } + cp = addr; + if ((rt->rt_flags & RTF_UP)) *cp++ = 'U'; + if ((rt->rt_flags & RTF_GATEWAY)) *cp++ = 'G'; + if ((rt->rt_flags & RTF_STATIC)) *cp++ = 'S'; + if ((rt->rt_flags & RTF_DYNAMIC)) *cp++ = 'D'; + *cp = '\0'; + (*pr)("%-8s ", addr); // Flags + if_indextoname(rt->rt_ifp->if_index, addr, 64); + (*pr)("%-8s ", addr); + (*pr)("\n"); + } + return 0; +} + +void +show_network_tables(pr_fun *pr) +{ + int i, error; + struct radix_node_head *rnh; + struct ifnet *ifp; + + cyg_scheduler_lock(); + (*pr)("Routing tables\n"); + (*pr)("Destination Gateway Mask Flags Interface\n"); + for (i = 1; i <= AF_MAX; i++) { + if ((rnh = rt_tables[i]) != NULL) { + error = rnh->rnh_walktree(rnh, _dumpentry, pr); + } + } + + (*pr)("Interface statistics\n"); + for (ifp = ifnet.tqh_first; ifp != 0; ifp = ifp->if_list.tqe_next) { + _show_ifp(ifp, pr); + } + cyg_scheduler_unlock(); +} + +#endif // CYGPKG_NET_DRIVER_FRAMEWORK + +// EOF support.c diff --git a/ecos/packages/net/tcpip/current/src/ecos/synch.c b/ecos/packages/net/tcpip/current/src/ecos/synch.c new file mode 100644 index 0000000..972c832 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/ecos/synch.c @@ -0,0 +1,416 @@ +//========================================================================== +// +// ecos/synch.c +// +// eCos wrapper and synch functions +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas, hmt +// Contributors: gthomas, hmt +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +// Synch routines, etc., used by network code + +#include <sys/param.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/kernel.h> +#include <sys/domain.h> +#include <sys/protosw.h> +#include <sys/sockio.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <net/if.h> +#include <net/route.h> +#include <net/netisr.h> +#include <netinet/in.h> +#include <netinet/in_var.h> +#include <arpa/inet.h> + +#include <machine/cpu.h> + +#include <pkgconf/net.h> + +#include <cyg/infra/diag.h> +#include <cyg/hal/hal_intr.h> +#include <cyg/kernel/kapi.h> + +#include <cyg/infra/cyg_ass.h> + +#include <cyg/io/eth/netdev.h> + +//---------------------------- splx() emulation ------------------------------ +// This contains both the SPLX stuff and tsleep/wakeup - because those must +// be SPLX aware. They release the SPLX lock when sleeping, and reclaim it +// (if needs be) at wakeup. +// +// The variable spl_state (and the associated bit patterns) is used to keep +// track of the "splx()" level. This is an artifact of the original stack, +// based on the BSD interrupt world (interrupts and processing could be +// masked based on a level value, supported by hardware). This is not very +// real-time, so the emulation uses proper eCos tools and techniques to +// accomplish the same result. The key here is in the analysis of the +// various "levels", why they are used, etc. +// +// SPL_IMP is called in order to protect internal data structures +// short-term, primarily so that interrupt processing does not interfere +// with them. +// +// SPL_CLOCK is called in order to ensure that a timestamp is valid i.e. no +// time passes while the stamp is being taken (since it is a potentially +// non-idempotent data structure). +// +// SPL_SOFTNET is used to prevent all other stack processing, including +// interrupts (DSRs), etc. +// +// SPL_INTERNAL is used when running the pseudo-DSR in timeout.c - this +// runs what should really be the network interface device's DSR, and any +// timeout routines that are scheduled. (They are broken out into a thread +// to isolate the network locking from the rest of the system) +// +// NB a thread in thi state can tsleep(); see below. Tsleep releases and +// reclaims the locks and so on. This necessary because of the possible +// conflict where +// I splsoft +// I tsleep +// He runs, he is lower priority +// He splsofts +// He or something else awakens me +// I want to run, but he has splsoft, so I wait +// He runs and releases splsoft +// I awaken and go. + +static volatile cyg_uint32 spl_state = 0; +#define SPL_IMP 0x01 +#define SPL_NET 0x02 +#define SPL_CLOCK 0x04 +#define SPL_SOFTNET 0x08 +#define SPL_INTERNAL 0x10 + +static cyg_mutex_t splx_mutex; +static volatile cyg_handle_t splx_thread; + + +#ifdef CYGIMPL_TRACE_SPLX +#define SPLXARGS const char *file, const int line +#define SPLXMOREARGS , const char *file, const int line +#define SPLXTRACE do_sched_event(__FUNCTION__, file, line, spl_state) +#else +#define SPLXARGS void +#define SPLXMOREARGS +#define SPLXTRACE +#endif + + +static inline cyg_uint32 +spl_any( cyg_uint32 which ) +{ + cyg_uint32 old_spl = spl_state; + if ( cyg_thread_self() != splx_thread ) { + cyg_mutex_lock( &splx_mutex ); + old_spl = 0; // Free when we unlock this context + CYG_ASSERT( 0 == splx_thread, "Thread still owned" ); + CYG_ASSERT( 0 == spl_state, "spl still set" ); + splx_thread = cyg_thread_self(); + } + CYG_ASSERT( splx_mutex.locked, "spl_any: mutex not locked" ); + CYG_ASSERT( (cyg_handle_t)splx_mutex.owner == cyg_thread_self(), + "spl_any: mutex not mine" ); + spl_state |= which; + return old_spl; +} + + +cyg_uint32 +cyg_splimp(SPLXARGS) +{ + SPLXTRACE; + return spl_any( SPL_IMP ); +} + +cyg_uint32 +cyg_splclock(SPLXARGS) +{ + SPLXTRACE; + return spl_any( SPL_CLOCK ); +} + +cyg_uint32 +cyg_splnet(SPLXARGS) +{ + SPLXTRACE; + return spl_any( SPL_NET ); +} + +cyg_uint32 +cyg_splhigh(SPLXARGS) +{ + SPLXTRACE; + // splhigh did SPLSOFTNET in the contrib, so this is the same + return spl_any( SPL_SOFTNET ); +} + +cyg_uint32 +cyg_splsoftnet(SPLXARGS) +{ + SPLXTRACE; + return spl_any( SPL_SOFTNET ); +} + +cyg_uint32 +cyg_splinternal(SPLXARGS) +{ + SPLXTRACE; + return spl_any( SPL_INTERNAL ); +} + + +// +// Return to a previous interrupt state/level. +// +void +cyg_splx(cyg_uint32 old_state SPLXMOREARGS) +{ + SPLXTRACE; + + CYG_ASSERT( 0 != spl_state, "No state set" ); + CYG_ASSERT( splx_mutex.locked, "splx: mutex not locked" ); + CYG_ASSERT( (cyg_handle_t)splx_mutex.owner == cyg_thread_self(), + "splx: mutex not mine" ); + + spl_state &= old_state; + + if ( 0 == spl_state ) { + splx_thread = 0; + cyg_mutex_unlock( &splx_mutex ); + } +} + +//------------------ tsleep() and wakeup() emulation --------------------------- +// +// Structure used to keep track of 'tsleep' style events +// +struct wakeup_event { + void *chan; + cyg_sem_t sem; +}; +static struct wakeup_event wakeup_list[CYGPKG_NET_NUM_WAKEUP_EVENTS]; + + +// Called to initialize structures used by timeout functions +void +cyg_tsleep_init(void) +{ + int i; + struct wakeup_event *ev; + // Create list of "wakeup event" semaphores + for (i = 0, ev = wakeup_list; i < CYGPKG_NET_NUM_WAKEUP_EVENTS; i++, ev++) { + ev->chan = 0; + cyg_semaphore_init(&ev->sem, 0); + } + // Initialize the mutex and thread id: + cyg_mutex_init( &splx_mutex ); + splx_thread = 0; +} + + +// +// Signal an event +void +cyg_wakeup(void *chan) +{ + int i; + struct wakeup_event *ev; + cyg_scheduler_lock(); // Ensure scan is safe + // NB this is broadcast semantics because a sleeper/wakee holds the + // slot until they exit. This avoids a race condition whereby the + // semaphore can get an extra post - and then the slot is freed, so the + // sem wait returns immediately, AOK, so the slot wasn't freed. + for (i = 0, ev = wakeup_list; i < CYGPKG_NET_NUM_WAKEUP_EVENTS; i++, ev++) + if (ev->chan == chan) + cyg_semaphore_post(&ev->sem); + + cyg_scheduler_unlock(); +} + +// ------------------------------------------------------------------------ +// Wait for an event with timeout +// tsleep(event, priority, state, timeout) +// event - the thing to wait for +// priority - unused +// state - a descriptive message +// timeout - max time (in ticks) to wait +// returns: +// 0 - event was "signalled" +// ETIMEDOUT - timeout occurred +// EINTR - thread broken out of sleep +// +int +cyg_tsleep(void *chan, int pri, char *wmesg, int timo) +{ + int i, res = 0; + struct wakeup_event *ev; + cyg_tick_count_t sleep_time; + cyg_handle_t self = cyg_thread_self(); + int old_splflags = 0; // no flags held + + cyg_scheduler_lock(); + + // Safely find a free slot: + for (i = 0, ev = wakeup_list; i < CYGPKG_NET_NUM_WAKEUP_EVENTS; i++, ev++) { + if (ev->chan == 0) { + ev->chan = chan; + break; + } + } + CYG_ASSERT( i < CYGPKG_NET_NUM_WAKEUP_EVENTS, "no sleep slots" ); + CYG_ASSERT( 1 == cyg_scheduler_read_lock(), + "Tsleep - called with scheduler locked" ); + // Defensive: + if ( i >= CYGPKG_NET_NUM_WAKEUP_EVENTS ) { + cyg_scheduler_unlock(); + return ETIMEDOUT; + } + + // If we are the owner, then we must release the mutex when + // we wait. + if ( self == splx_thread ) { + old_splflags = spl_state; // Keep them for restoration + CYG_ASSERT( spl_state, "spl_state not set" ); + // Also want to assert that the mutex is locked... + CYG_ASSERT( splx_mutex.locked, "Splx mutex not locked" ); + CYG_ASSERT( (cyg_handle_t)splx_mutex.owner == self, "Splx mutex not mine" ); + splx_thread = 0; + spl_state = 0; + cyg_mutex_unlock( &splx_mutex ); + } + + // Re-initialize the semaphore - it might have counted up arbitrarily + // in the time between a prior sleeper being signalled and them + // actually running. + cyg_semaphore_init(&ev->sem, 0); + + // This part actually does the wait: + // As of the new kernel, we can do this without unlocking the scheduler + if (timo) { + sleep_time = cyg_current_time() + timo; + if (!cyg_semaphore_timed_wait(&ev->sem, sleep_time)) { + if( cyg_current_time() >= sleep_time ) + res = ETIMEDOUT; + else + res = EINTR; + } + } else { + if (!cyg_semaphore_wait(&ev->sem) ) { + res = EINTR; + } + } + + ev->chan = 0; // Free the slot - the wakeup call cannot do this. + + if ( old_splflags ) { // restore to previous state + // As of the new kernel, we can do this with the scheduler locked + cyg_mutex_lock( &splx_mutex ); // this might wait + CYG_ASSERT( 0 == splx_thread, "Splx thread set in tsleep" ); + CYG_ASSERT( 0 == spl_state, "spl_state set in tsleep" ); + splx_thread = self; // got it now... + spl_state = old_splflags; + } + + cyg_scheduler_unlock(); + return res; +} + + + +// ------------------------------------------------------------------------ +// DEBUGGING ROUTINES +#ifdef CYGIMPL_TRACE_SPLX +#undef cyg_scheduler_lock +#undef cyg_scheduler_safe_lock +#undef cyg_scheduler_unlock + +#define MAX_SCHED_EVENTS 256 +static struct _sched_event { + char *fun, *file; + int line, lock; +} sched_event[MAX_SCHED_EVENTS]; +static int next_sched_event = 0; +static int total_sched_events = 0; + +static void +do_sched_event(char *fun, char *file, int line, int lock) +{ + struct _sched_event *se = &sched_event[next_sched_event]; + if (++next_sched_event == MAX_SCHED_EVENTS) { + next_sched_event = 0; + } + se->fun = fun; + se->file = file; + se->line = line; + se->lock = lock; + total_sched_events++; +} + +static void +show_sched_events(void) +{ + int i; + struct _sched_event *se; + if (total_sched_events < MAX_SCHED_EVENTS) { + i = 0; + } else { + i = next_sched_event + 1; + if (i == MAX_SCHED_EVENTS) i = 0; + } + diag_printf("%d total scheduler events\n", total_sched_events); + while (i != next_sched_event) { + se = &sched_event[i]; + diag_printf("%s - lock: %d, called from %s.%d\n", se->fun, se->lock, se->file, se->line); + if (++i == MAX_SCHED_EVENTS) i = 0; + } +} + +#define SPLX_TRACE_DATA() cyg_scheduler_read_lock() + +void +_cyg_scheduler_lock(char *file, int line) +{ + cyg_scheduler_lock(); + do_sched_event(__FUNCTION__, file, line, SPLX_TRACE_DATA()); +} + +void +_cyg_scheduler_safe_lock(char *file, int line) +{ + cyg_scheduler_safe_lock(); + do_sched_event(__FUNCTION__, file, line, SPLX_TRACE_DATA()); +} + +void +_cyg_scheduler_unlock(char *file, int line) +{ + cyg_scheduler_unlock(); + do_sched_event(__FUNCTION__, file, line, SPLX_TRACE_DATA()); +} +#endif // CYGIMPL_TRACE_SPLX + +// EOF synch.c diff --git a/ecos/packages/net/tcpip/current/src/ecos/timeout.c b/ecos/packages/net/tcpip/current/src/ecos/timeout.c new file mode 100644 index 0000000..7091b3e --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/ecos/timeout.c @@ -0,0 +1,314 @@ +//========================================================================== +// +// lib/timeout.c +// +// timeout support +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas, hmt +// Contributors: gthomas, hmt +// Date: 1999-02-05 +// Description: Simple timeout functions +//####DESCRIPTIONEND#### + +#include <sys/param.h> +#include <pkgconf/net.h> +#include <cyg/kernel/kapi.h> +#include <cyg/infra/cyg_ass.h> + +// Timeout support + +void alarm_timeout_init(void); + +#ifndef NTIMEOUTS +#define NTIMEOUTS 8 +#endif +typedef struct { + cyg_int32 delta; // Number of "ticks" in the future for this timeout + timeout_fun *fun; // Function to execute when it expires + void *arg; // Argument to pass when it does +} timeout_entry; +static timeout_entry timeouts[NTIMEOUTS]; +static cyg_handle_t timeout_alarm_handle; +static cyg_alarm timeout_alarm; +static cyg_int32 last_delta; +static cyg_tick_count_t last_set_time; + +#define STACK_SIZE CYGNUM_HAL_STACK_SIZE_TYPICAL +static char alarm_stack[STACK_SIZE]; +static cyg_thread alarm_thread_data; +static cyg_handle_t alarm_thread_handle; + +static cyg_flag_t alarm_flag; + +// ------------------------------------------------------------------------ +// This routine exists so that this module can synchronize: +extern cyg_uint32 cyg_splinternal(void); + +// ------------------------------------------------------------------------ +// CALLBACK FUNCTION +// Called from the thread, this runs the alarm callbacks. +// Locking is already in place when this is called. +static void +do_timeout(void) +{ + int i; + cyg_int32 min_delta; + timeout_entry *e; + + CYG_ASSERT( 0 < last_delta, "last_delta underflow" ); + + min_delta = last_delta; // local copy + last_delta = -1; // flag recursive call underway + + for (e = timeouts, i = 0; i < NTIMEOUTS; i++, e++) { + if (e->delta) { + CYG_ASSERT( e->delta >= min_delta, "e->delta underflow" ); + e->delta -= min_delta; + if (e->delta <= 0) { // Defensive + // Time for this item to 'fire' + timeout_fun *fun = e->fun; + void *arg = e->arg; + // Call it *after* cleansing the record + e->fun = 0; + e->delta = 0; + (*fun)(arg); + } + } + } + + // Now scan for a new timeout *after* running all the callbacks + // (because they can add timeouts themselves) + min_delta = 0x7FFFFFFF; // Maxint + for (e = timeouts, i = 0; i < NTIMEOUTS; i++, e++) + if (e->delta) + if (e->delta < min_delta) + min_delta = e->delta; + + CYG_ASSERT( 0 < min_delta, "min_delta underflow" ); + + if (min_delta != 0x7FFFFFFF) { + // Still something to do, schedule it + last_set_time = cyg_current_time(); + cyg_alarm_initialize(timeout_alarm_handle, last_set_time+min_delta, 0); + last_delta = min_delta; + } else { + last_delta = 0; // flag no activity + } +} + +// ------------------------------------------------------------------------ +// ALARM EVENT FUNCTION +// This is the DSR for the alarm firing: +static void +do_alarm(cyg_handle_t alarm, cyg_addrword_t data) +{ + cyg_flag_setbits( &alarm_flag, 1 ); +} + +void ecos_synch_eth_drv_dsr(void) +{ + cyg_flag_setbits( &alarm_flag, 2 ); +} + +// ------------------------------------------------------------------------ +// HANDLER THREAD ENTRY ROUTINE +// This waits on the DSR to tell it to run: +static void +alarm_thread(cyg_addrword_t param) +{ + // This is from the logical ethernet dev; it calls those delivery + // functions who need attention. + extern void eth_drv_run_deliveries( void ); + + // This is from the logical ethernet dev; it tickles somehow + // all ethernet devices in case one is wedged. + extern void eth_drv_tickle_devices( void ); + + while ( 1 ) { + int spl; + int x; +#ifdef CYGPKG_NET_FAST_THREAD_TICKLE_DEVS + cyg_tick_count_t later = cyg_current_time(); + later += CYGNUM_NET_FAST_THREAD_TICKLE_DEVS_DELAY; + x = cyg_flag_timed_wait( + &alarm_flag, + -1, + CYG_FLAG_WAITMODE_OR | CYG_FLAG_WAITMODE_CLR, + later ); +#else + x = cyg_flag_wait( + &alarm_flag, + -1, + CYG_FLAG_WAITMODE_OR | CYG_FLAG_WAITMODE_CLR ); + + CYG_ASSERT( 3 & x, "Lost my bits" ); +#endif // CYGPKG_NET_FAST_THREAD_TICKLE_DEVS + CYG_ASSERT( !((~3) & x), "Extra bits" ); + + spl = cyg_splinternal(); + + CYG_ASSERT( 0 == spl, "spl nonzero" ); + + if ( 2 & x ) + eth_drv_run_deliveries(); +#ifdef CYGPKG_NET_FAST_THREAD_TICKLE_DEVS + // This is in the else clause for "do we deliver" because the + // network stack might have continuous timing events anyway - so + // the timeout would not occur, x would be 1 every time. + else // Tickle the devices... + eth_drv_tickle_devices(); +#endif // CYGPKG_NET_FAST_THREAD_TICKLE_DEVS + + if ( 1 & x ) + do_timeout(); + + cyg_splx(spl); + } +} + +// ------------------------------------------------------------------------ +// INITIALIZATION FUNCTION +void +cyg_alarm_timeout_init( void ) +{ + // Init the alarm object, attached to the real time clock + cyg_handle_t h; + cyg_clock_to_counter(cyg_real_time_clock(), &h); + cyg_alarm_create(h, do_alarm, 0, &timeout_alarm_handle, &timeout_alarm); + // Init the flag of waking up + cyg_flag_init( &alarm_flag ); + // Create alarm background thread to run the callbacks + cyg_thread_create( + CYGPKG_NET_FAST_THREAD_PRIORITY, // Priority + alarm_thread, // entry + 0, // entry parameter + "Network alarm support", // Name + &alarm_stack[0], // Stack + STACK_SIZE, // Size + &alarm_thread_handle, // Handle + &alarm_thread_data // Thread data structure + ); + cyg_thread_resume(alarm_thread_handle); // Start it +} + +// ------------------------------------------------------------------------ +// EXPORTED API: SET A TIMEOUT +// This can be called from anywhere, including recursively from the timeout +// functions themselves. +cyg_uint32 +timeout(timeout_fun *fun, void *arg, cyg_int32 delta) +{ + int i; + timeout_entry *e; + cyg_uint32 stamp; + + // this needs to be atomic - recursive calls from the alarm + // handler thread itself are allowed: + int spl = cyg_splinternal(); + + CYG_ASSERT( 0 < delta, "delta is right now, or even sooner!" ); + + // Renormalize delta wrt the existing set alarm, if there is one + if ( last_delta > 0 ) + delta += (cyg_int32)(cyg_current_time() - last_set_time); + // So recorded_delta is set to either: + // alarm is active: delta + NOW - THEN + // alarm is inactive: delta + + stamp = 0; // Assume no slots available + for (e = timeouts, i = 0; i < NTIMEOUTS; i++, e++) { + if ((e->delta == 0) && (e->fun == 0)) { + // Free entry + e->delta = delta; + e->fun = fun; + e->arg = arg; + stamp = (cyg_uint32)e; + break; + } + } + + if ( stamp && // we did add a record AND + (0 == last_delta || // alarm was inactive OR + delta < last_delta) ) { // alarm was active but later than we need + + // (if last_delta is -1, this call is recursive from the handler so + // also do nothing in that case) + + // Here, we know the new item added is sooner than that which was + // most recently set, if any, so we can just go and set it up. + if ( 0 == last_delta ) + last_set_time = cyg_current_time(); + + // So we use, to set the alarm either: + // alarm is active: (delta + NOW - THEN) + THEN + // alarm is inactive: delta + NOW + // and in either case it is true that + // (recorded_delta + last_set_time) == (delta + NOW) + cyg_alarm_initialize(timeout_alarm_handle, last_set_time+delta, 0); + last_delta = delta; + } + // Otherwise, the alarm is active, AND it is set to fire sooner than we + // require, so when it does, that will sort out calling the item we + // just added. Or we didn't actually add a record, so nothing has + // changed. + +#ifdef CYGPKG_INFRA_DEBUG + // Do some more checking akin to that in the alarm handler: + if ( last_delta != -1 ) { // not a recursive call + cyg_tick_count_t now = cyg_current_time(); + CYG_ASSERT( last_delta >= 0, "Bad last delta" ); + delta = 0x7fffffff; + for (e = timeouts, i = 0; i < NTIMEOUTS; i++, e++) { + if (e->delta) { + CYG_ASSERT( e->delta >= last_delta, "e->delta underflow" ); + CYG_ASSERT( last_set_time + e->delta + 1000 > now, + "Recorded alarm not in the future!" ); + if ( e->delta < delta ) + delta = e->delta; + } else { + CYG_ASSERT( 0 == e->fun, "Function recorded for 0 delta" ); + } + } + CYG_ASSERT( delta == last_delta, "We didn't pick the smallest delta!" ); + } +#endif + + cyg_splx(spl); + return stamp; +} + +// ------------------------------------------------------------------------ +// EXPORTED API: CANCEL A TIMEOUT +// This can be called from anywhere, including recursively from the timeout +// functions themselves. +void +untimeout(timeout_fun *fun, void * arg) +{ + int i; + timeout_entry *e; + int spl = cyg_splinternal(); + + for (e = timeouts, i = 0; i < NTIMEOUTS; i++, e++) { + if (e->delta && (e->fun == fun) && (e->arg == arg)) { + e->delta = 0; + e->fun = 0; + break; + } + } + cyg_splx(spl); +} + +// ------------------------------------------------------------------------ + +// EOF timeout.c diff --git a/ecos/packages/net/tcpip/current/src/lib/accept.c b/ecos/packages/net/tcpip/current/src/lib/accept.c new file mode 100644 index 0000000..c2844e1 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/lib/accept.c @@ -0,0 +1,52 @@ +//========================================================================== +// +// lib/accept.c +// +// accept() system call +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +#include <sys/param.h> +#include <cyg/io/file.h> +#include <sys/socket.h> +#include <sys/socketvar.h> + +#include <sys/syscallargs.h> + +int +accept(int s, const struct sockaddr *name, socklen_t *anamelen) +{ + struct sys_accept_args args; + int res, error; + SYSCALLARG(args,s) = s; + SYSCALLARG(args,name) = (struct sockaddr *)name; + SYSCALLARG(args,anamelen) = anamelen; + error = sys_accept(&args, &res); + if (error) { + errno = error; + return -1; + } else { + return res; + } +} diff --git a/ecos/packages/net/tcpip/current/src/lib/bind.c b/ecos/packages/net/tcpip/current/src/lib/bind.c new file mode 100644 index 0000000..994bc6c --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/lib/bind.c @@ -0,0 +1,52 @@ +//========================================================================== +// +// lib/bind.c +// +// bind() system call +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +#include <sys/param.h> +#include <cyg/io/file.h> +#include <sys/socket.h> +#include <sys/socketvar.h> + +#include <sys/syscallargs.h> + +int +bind(int s, const struct sockaddr *name, socklen_t namelen) +{ + struct sys_bind_args args; + int res, error; + SYSCALLARG(args,s) = s; + SYSCALLARG(args,name) = name; + SYSCALLARG(args,namelen) = namelen; + error = sys_bind(&args, &res); + if (error) { + errno = error; + return -1; + } else { + return 0; + } +} diff --git a/ecos/packages/net/tcpip/current/src/lib/close.c b/ecos/packages/net/tcpip/current/src/lib/close.c new file mode 100644 index 0000000..7cf8f4e --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/lib/close.c @@ -0,0 +1,52 @@ +//========================================================================== +// +// lib/close.c +// +// close() system call +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +#include <sys/param.h> +#include <cyg/io/file.h> +#include <sys/socket.h> +#include <sys/socketvar.h> + +#include <sys/syscallargs.h> + +int +close(int fd) +{ + int error; + struct file *fp; + if (getfp(fd, &fp)) + return (EBADF); + error = (*fp->f_ops->fo_close)(fp); + if (error) { + errno = error; + return -1; + } else { + ffree(fp); + return 0; + } +} diff --git a/ecos/packages/net/tcpip/current/src/lib/connect.c b/ecos/packages/net/tcpip/current/src/lib/connect.c new file mode 100644 index 0000000..10ec5c7 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/lib/connect.c @@ -0,0 +1,52 @@ +//========================================================================== +// +// lib/connect.c +// +// connect() system call +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +#include <sys/param.h> +#include <cyg/io/file.h> +#include <sys/socket.h> +#include <sys/socketvar.h> + +#include <sys/syscallargs.h> + +int +connect(int s, const struct sockaddr *name, socklen_t namelen) +{ + struct sys_connect_args args; + int res, error; + SYSCALLARG(args,s) = s; + SYSCALLARG(args,name) = name; + SYSCALLARG(args,namelen) = namelen; + error = sys_connect(&args, &res); + if (error) { + errno = error; + return -1; + } else { + return 0; + } +} diff --git a/ecos/packages/net/tcpip/current/src/lib/getpeername.c b/ecos/packages/net/tcpip/current/src/lib/getpeername.c new file mode 100644 index 0000000..45eefc2 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/lib/getpeername.c @@ -0,0 +1,52 @@ +//========================================================================== +// +// lib/getpeername.c +// +// getpeername() system call +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +#include <sys/param.h> +#include <cyg/io/file.h> +#include <sys/socket.h> +#include <sys/socketvar.h> + +#include <sys/syscallargs.h> + +int +getpeername(int s, const struct sockaddr *name, socklen_t *namelen) +{ + struct sys_getpeername_args args; + int res, error; + SYSCALLARG(args,fdes) = s; + SYSCALLARG(args,asa) = (struct sockaddr *)name; + SYSCALLARG(args,alen) = namelen; + error = sys_getpeername(&args, &res); + if (error) { + errno = error; + return -1; + } else { + return 0; + } +} diff --git a/ecos/packages/net/tcpip/current/src/lib/getsockname.c b/ecos/packages/net/tcpip/current/src/lib/getsockname.c new file mode 100644 index 0000000..32044f9 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/lib/getsockname.c @@ -0,0 +1,52 @@ +//========================================================================== +// +// lib/getsockname.c +// +// getsockname() system call +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +#include <sys/param.h> +#include <cyg/io/file.h> +#include <sys/socket.h> +#include <sys/socketvar.h> + +#include <sys/syscallargs.h> + +int +getsockname(int s, const struct sockaddr *name, socklen_t *namelen) +{ + struct sys_getsockname_args args; + int res, error; + SYSCALLARG(args,fdes) = s; + SYSCALLARG(args,asa) = (struct sockaddr *)name; + SYSCALLARG(args,alen) = namelen; + error = sys_getsockname(&args, &res); + if (error) { + errno = error; + return -1; + } else { + return 0; + } +} diff --git a/ecos/packages/net/tcpip/current/src/lib/getsockopt.c b/ecos/packages/net/tcpip/current/src/lib/getsockopt.c new file mode 100644 index 0000000..22e2b70 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/lib/getsockopt.c @@ -0,0 +1,54 @@ +//========================================================================== +// +// lib/getsockopt.c +// +// getsockopt() system call +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +#include <sys/param.h> +#include <cyg/io/file.h> +#include <sys/socket.h> +#include <sys/socketvar.h> + +#include <sys/syscallargs.h> + +int +getsockopt(int s, int level, int name, void *val, socklen_t *avalsize) +{ + struct sys_getsockopt_args args; + int res, error; + SYSCALLARG(args,s) = s; + SYSCALLARG(args,level) = level; + SYSCALLARG(args,name) = name; + SYSCALLARG(args,val) = val; + SYSCALLARG(args,avalsize) = avalsize; + error = sys_getsockopt(&args, &res); + if (error) { + errno = error; + return -1; + } else { + return 0; + } +} diff --git a/ecos/packages/net/tcpip/current/src/lib/ioctl.c b/ecos/packages/net/tcpip/current/src/lib/ioctl.c new file mode 100644 index 0000000..7af846c --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/lib/ioctl.c @@ -0,0 +1,52 @@ +//========================================================================== +// +// lib/ioctl.c +// +// ioctl() system call +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +#include <sys/param.h> +#include <cyg/io/file.h> +#include <sys/socket.h> +#include <sys/socketvar.h> + +#include <sys/syscallargs.h> + +int +ioctl(int fd, u_long cmd, void *data) +{ + struct sys_ioctl_args args; + int res, error; + SYSCALLARG(args,fd) = fd; + SYSCALLARG(args,com) = cmd; + SYSCALLARG(args,data) = data; + error = sys_ioctl(&args, &res); + if (error) { + errno = error; + return -1; + } else { + return 0; + } +} diff --git a/ecos/packages/net/tcpip/current/src/lib/listen.c b/ecos/packages/net/tcpip/current/src/lib/listen.c new file mode 100644 index 0000000..fc489da --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/lib/listen.c @@ -0,0 +1,51 @@ +//========================================================================== +// +// lib/listen.c +// +// listen() system call +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +#include <sys/param.h> +#include <cyg/io/file.h> +#include <sys/socket.h> +#include <sys/socketvar.h> + +#include <sys/syscallargs.h> + +int +listen(int s, int backlog) +{ + struct sys_listen_args args; + int res, error; + SYSCALLARG(args,s) = s; + SYSCALLARG(args,backlog) = backlog; + error = sys_listen(&args, &res); + if (error) { + errno = error; + return -1; + } else { + return 0; + } +} diff --git a/ecos/packages/net/tcpip/current/src/lib/read.c b/ecos/packages/net/tcpip/current/src/lib/read.c new file mode 100644 index 0000000..a9d59ab --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/lib/read.c @@ -0,0 +1,52 @@ +//========================================================================== +// +// lib/read.c +// +// read() system call +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +#include <sys/param.h> +#include <cyg/io/file.h> +#include <sys/socket.h> +#include <sys/socketvar.h> + +#include <sys/syscallargs.h> + +int +read(int fd, void *buf, size_t len) +{ + struct sys_read_args args; + int res, error; + SYSCALLARG(args,fd) = fd; + SYSCALLARG(args,buf) = buf; + SYSCALLARG(args,nbyte) = len; + error = sys_read(&args, &res); + if (error) { + errno = error; + return -1; + } else { + return res; + } +} diff --git a/ecos/packages/net/tcpip/current/src/lib/recv.c b/ecos/packages/net/tcpip/current/src/lib/recv.c new file mode 100644 index 0000000..39866c1 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/lib/recv.c @@ -0,0 +1,42 @@ +//========================================================================== +// +// lib/recv.c +// +// recv() system call +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas,andrew.lunn@ascom.ch +// Contributors: gthomas +// Date: 2001-11-01 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +#include <sys/param.h> +#include <cyg/io/file.h> +#include <sys/socket.h> +#include <sys/socketvar.h> + +extern ssize_t recvfrom (int, void *, size_t, int, struct sockaddr *, socklen_t *); + +ssize_t +recv(int s, void *buf, size_t buflen, int flags) +{ + + return(recvfrom(s,buf,buflen,flags,NULL,0)); +} diff --git a/ecos/packages/net/tcpip/current/src/lib/recvfrom.c b/ecos/packages/net/tcpip/current/src/lib/recvfrom.c new file mode 100644 index 0000000..63c1b27 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/lib/recvfrom.c @@ -0,0 +1,56 @@ +//========================================================================== +// +// lib/recvfrom.c +// +// recvfrom() system call +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +#include <sys/param.h> +#include <cyg/io/file.h> +#include <sys/socket.h> +#include <sys/socketvar.h> + +#include <sys/syscallargs.h> + +ssize_t +recvfrom(int s, const void *buf, size_t buflen, + int flags, const struct sockaddr *from, socklen_t *fromlen) +{ + struct sys_recvfrom_args args; + int res, error; + SYSCALLARG(args,s) = s; + SYSCALLARG(args,buf) = (void *)buf; + SYSCALLARG(args,len) = buflen; + SYSCALLARG(args,flags) = flags; + SYSCALLARG(args,from) = (struct sockaddr *)from; + SYSCALLARG(args,fromlenaddr) = fromlen; + error = sys_recvfrom(&args, &res); + if (error) { + errno = error; + return -1; + } else { + return res; + } +} diff --git a/ecos/packages/net/tcpip/current/src/lib/select.c b/ecos/packages/net/tcpip/current/src/lib/select.c new file mode 100644 index 0000000..9316913 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/lib/select.c @@ -0,0 +1,209 @@ +//========================================================================== +// +// lib/select.c +// +// 'select()' system call +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: eCos implementation of 'select()' system call +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + +#include <sys/param.h> +#include <cyg/io/file.h> +#include <cyg/kernel/kapi.h> +#include <sys/select.h> +#include <sys/bsdselect.h> + +static cyg_flag_t select_flag; +static cyg_bool select_flag_init = false; +#define SELECT_WAKE 0x01 +#define SELECT_ABORT 0x02 + +// +// Private function which does all the work for 'select()' +// +static int +_cyg_select(int nfd, fd_set *in, fd_set *out, fd_set *ex, + struct timeval *tv, cyg_bool_t abortable) +{ + int fd, mode, num, ticks; + struct file *fp; + fd_set in_res, out_res, ex_res; // Result sets + fd_set *selection[3], *result[3]; + cyg_tick_count_t now, then; + int mode_type[] = {FREAD, FWRITE, 0}; + cyg_flag_value_t flag, wait_flag; + + // Note: since this is called by application programs, it needs no protection + if (!select_flag_init) { + select_flag_init = true; + cyg_flag_init(&select_flag); + } + wait_flag = SELECT_WAKE; + if (abortable) wait_flag |= SELECT_ABORT; + FD_ZERO(&in_res); + FD_ZERO(&out_res); + FD_ZERO(&ex_res); + // Set up sets + selection[0] = in; result[0] = &in_res; + selection[1] = out; result[1] = &out_res; + selection[2] = ex; result[2] = &ex_res; + // Compute end time + if (tv) { + now = cyg_current_time(); + ticks = (tv->tv_sec * 100) + (tv->tv_usec / 10000); + then = now + ticks; + } else { + then = 0; // Compiler warnings :-( + ticks = 0; + } + // Scan sets for possible I/O until something found, timeout or error. + while (true) { + num = 0; // Total file descriptors "ready" + + cyg_scheduler_lock(); // Scan the list atomically wrt electing to sleep + + for (mode = 0; mode < 3; mode++) { + if (selection[mode]) { + for (fd = 0; fd < nfd; fd++) { + if (FD_ISSET(fd, selection[mode])) { + if (getfp(fd, &fp)) { + cyg_scheduler_unlock(); // return. + errno = EBADF; + return -1; + } + if ((*fp->f_ops->fo_select)(fp, mode_type[mode])) { + FD_SET(fd, result[mode]); + num++; + } + } + } + } + } + if (num) { + + cyg_scheduler_unlock(); // Happy, about to return. + + // Found something, update user's sets + if (in) { + memcpy(in, &in_res, sizeof(in_res)); + } + if (out) { + memcpy(out, &out_res, sizeof(out_res)); + } + if (ex) { + memcpy(ex, &ex_res, sizeof(ex_res)); + } + return num; + } + // Nothing found, see if we want to wait + if (tv) { + if (ticks == 0) { + // Special case of "poll" + cyg_scheduler_unlock(); // About to return. + return 0; + } + flag = cyg_flag_timed_wait(&select_flag, wait_flag, + CYG_FLAG_WAITMODE_OR, + then); + } else { + // Wait forever (until something happens) + flag = cyg_flag_wait(&select_flag, wait_flag, + CYG_FLAG_WAITMODE_OR); + } + + cyg_scheduler_unlock(); // waited atomically + + if (flag & SELECT_ABORT) { + errno = EINTR; + return -1; + } + if (!flag) { + return 0; // meaning no activity, ergo timeout occurred + } + } + errno = ENOSYS; + return -1; +} + +// +// This function is called by the lower layers to record the +// fact that a particular 'select' event is being requested. +// +void +selrecord(void *selector, struct selinfo *info) +{ + // Unused by this implementation +} + +// +// This function is called to indicate that a 'select' event +// may have occurred. +// +void +selwakeup(struct selinfo *info) +{ + // Need these ops to be atomic to make sure the clear occurs - + // otherwise a higher prio thread could hog the CPU when its fds are + // not ready, but the flag is (already) set, or set for someone else. + cyg_scheduler_lock(); + cyg_flag_setbits(&select_flag, SELECT_WAKE); + cyg_flag_maskbits(&select_flag, 0 ); // clear all + cyg_scheduler_unlock(); +} + +// +// The public function used by 'normal' programs. This interface does not allow +// the 'select()' to be externally interrupted. +// +int +select(int nfd, fd_set *in, fd_set *out, fd_set *ex, + struct timeval *tv) +{ + return _cyg_select(nfd, in, out, ex, tv, false); +} + +// +// The public function used by programs which wish to allow interruption, +// using the 'cyg_select_abort()' function below. +// +int +cyg_select_with_abort(int nfd, fd_set *in, fd_set *out, fd_set *ex, + struct timeval *tv) +{ + return _cyg_select(nfd, in, out, ex, tv, true); +} + +// +// This function can be called by the user to forceably abort any +// current selects. +// +void +cyg_select_abort(void) +{ + // See comments in selwakeup()... + cyg_scheduler_lock(); + cyg_flag_setbits(&select_flag, SELECT_ABORT); + cyg_flag_maskbits(&select_flag, 0 ); + cyg_scheduler_unlock(); +} + + diff --git a/ecos/packages/net/tcpip/current/src/lib/sendto.c b/ecos/packages/net/tcpip/current/src/lib/sendto.c new file mode 100644 index 0000000..d319150 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/lib/sendto.c @@ -0,0 +1,56 @@ +//========================================================================== +// +// lib/sendto.c +// +// sendto() system call +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +#include <sys/param.h> +#include <cyg/io/file.h> +#include <sys/socket.h> +#include <sys/socketvar.h> + +#include <sys/syscallargs.h> + +ssize_t +sendto(int s, const void *buf, size_t buflen, + int flags, const struct sockaddr *to, socklen_t tolen) +{ + struct sys_sendto_args args; + int res, error; + SYSCALLARG(args,s) = s; + SYSCALLARG(args,buf) = buf; + SYSCALLARG(args,len) = buflen; + SYSCALLARG(args,flags) = flags; + SYSCALLARG(args,to) = to; + SYSCALLARG(args,tolen) = tolen; + error = sys_sendto(&args, &res); + if (error) { + errno = error; + return -1; + } else { + return res; + } +} diff --git a/ecos/packages/net/tcpip/current/src/lib/setsockopt.c b/ecos/packages/net/tcpip/current/src/lib/setsockopt.c new file mode 100644 index 0000000..88902f0 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/lib/setsockopt.c @@ -0,0 +1,54 @@ +//========================================================================== +// +// lib/setsockopt.c +// +// setsockopt() system call +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +#include <sys/param.h> +#include <cyg/io/file.h> +#include <sys/socket.h> +#include <sys/socketvar.h> + +#include <sys/syscallargs.h> + +int +setsockopt(int s, int level, int name, const void *val, socklen_t valsize) +{ + struct sys_setsockopt_args args; + int res, error; + SYSCALLARG(args,s) = s; + SYSCALLARG(args,level) = level; + SYSCALLARG(args,name) = name; + SYSCALLARG(args,val) = val; + SYSCALLARG(args,valsize) = valsize; + error = sys_setsockopt(&args, &res); + if (error) { + errno = error; + return -1; + } else { + return 0; + } +} diff --git a/ecos/packages/net/tcpip/current/src/lib/shutdown.c b/ecos/packages/net/tcpip/current/src/lib/shutdown.c new file mode 100644 index 0000000..a8956eb --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/lib/shutdown.c @@ -0,0 +1,51 @@ +//========================================================================== +// +// lib/shutdown.c +// +// shutdown() system call +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +#include <sys/param.h> +#include <cyg/io/file.h> +#include <sys/socket.h> +#include <sys/socketvar.h> + +#include <sys/syscallargs.h> + +int +shutdown(int s, int how) +{ + struct sys_shutdown_args args; + int res, error; + SYSCALLARG(args,s) = s; + SYSCALLARG(args,how) = how; + error = sys_shutdown(&args, &res); + if (error) { + errno = error; + return -1; + } else { + return 0; + } +} diff --git a/ecos/packages/net/tcpip/current/src/lib/socket.c b/ecos/packages/net/tcpip/current/src/lib/socket.c new file mode 100644 index 0000000..f92c225 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/lib/socket.c @@ -0,0 +1,52 @@ +//========================================================================== +// +// lib/socket.c +// +// socket() system call +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +#include <sys/param.h> +#include <cyg/io/file.h> +#include <sys/socket.h> +#include <sys/socketvar.h> + +#include <sys/syscallargs.h> + +int +socket(int domain, int type, int protocol) +{ + struct sys_socket_args args; + int res, error; + SYSCALLARG(args,domain) = domain; + SYSCALLARG(args,type) = type; + SYSCALLARG(args,protocol) = protocol; + error = sys_socket(&args, &res); + if (error) { + errno = error; + return -1; + } else { + return res; + } +} diff --git a/ecos/packages/net/tcpip/current/src/lib/write.c b/ecos/packages/net/tcpip/current/src/lib/write.c new file mode 100644 index 0000000..0667631 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/lib/write.c @@ -0,0 +1,52 @@ +//========================================================================== +// +// lib/write.c +// +// write() system call +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +#include <sys/param.h> +#include <cyg/io/file.h> +#include <sys/socket.h> +#include <sys/socketvar.h> + +#include <sys/syscallargs.h> + +int +write(int fd, const void *buf, size_t len) +{ + struct sys_write_args args; + int res, error; + SYSCALLARG(args,fd) = fd; + SYSCALLARG(args,buf) = buf; + SYSCALLARG(args,nbyte) = len; + error = sys_write(&args, &res); + if (error) { + errno = error; + return -1; + } else { + return res; + } +} diff --git a/ecos/packages/net/tcpip/current/src/sys/kern/kern_subr.c b/ecos/packages/net/tcpip/current/src/sys/kern/kern_subr.c new file mode 100644 index 0000000..63cf342 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/kern/kern_subr.c @@ -0,0 +1,357 @@ +//========================================================================== +// +// sys/kern/kern_subr.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: kern_subr.c,v 1.10 1999/11/07 17:39:14 provos Exp $ */ +/* $NetBSD: kern_subr.c,v 1.15 1996/04/09 17:21:56 ragge Exp $ */ + +/* + * Copyright (c) 1982, 1986, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 + */ + +#include <sys/param.h> +#ifndef __ECOS +#include <sys/systm.h> +#include <sys/proc.h> +#endif // __ECOS +#include <sys/malloc.h> +#include <sys/queue.h> + +int +uiomove(cp, n, uio) + register caddr_t cp; + register int n; + register struct uio *uio; +{ + register struct iovec *iov; + u_int cnt; + int error = 0; + +#ifdef DIAGNOSTIC + if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE) + panic("uiomove: mode"); + if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) + panic("uiomove proc"); +#endif + while (n > 0 && uio->uio_resid) { + iov = uio->uio_iov; + cnt = iov->iov_len; + if (cnt == 0) { + uio->uio_iov++; + uio->uio_iovcnt--; + continue; + } + if (cnt > n) + cnt = n; + switch (uio->uio_segflg) { + + case UIO_USERSPACE: + if (uio->uio_rw == UIO_READ) + error = copyout(cp, iov->iov_base, cnt); + else + error = copyin(iov->iov_base, cp, cnt); + if (error) + return (error); + break; + + case UIO_SYSSPACE: +#if defined(UVM) + if (uio->uio_rw == UIO_READ) + error = kcopy(cp, iov->iov_base, cnt); + else + error = kcopy(iov->iov_base, cp, cnt); + if (error) + return(error); +#else + if (uio->uio_rw == UIO_READ) + bcopy((caddr_t)cp, iov->iov_base, cnt); + else + bcopy(iov->iov_base, (caddr_t)cp, cnt); + break; +#endif + } + iov->iov_base = (char *)iov->iov_base + cnt; + iov->iov_len -= cnt; + uio->uio_resid -= cnt; + uio->uio_offset += cnt; + cp += cnt; + n -= cnt; + } + return (error); +} + +#ifndef __ECOS +/* + * Give next character to user as result of read. + */ +int +ureadc(c, uio) + register int c; + register struct uio *uio; +{ + register struct iovec *iov; + + if (uio->uio_resid == 0) +#ifdef DIAGNOSTIC + panic("ureadc: zero resid"); +#else + return (EINVAL); +#endif +again: + if (uio->uio_iovcnt <= 0) +#ifdef DIAGNOSTIC + panic("ureadc: non-positive iovcnt"); +#else + return (EINVAL); +#endif + iov = uio->uio_iov; + if (iov->iov_len <= 0) { + uio->uio_iovcnt--; + uio->uio_iov++; + goto again; + } + switch (uio->uio_segflg) { + + case UIO_USERSPACE: + if (subyte(iov->iov_base, c) < 0) + return (EFAULT); + break; + + case UIO_SYSSPACE: + *(char *)iov->iov_base = c; + break; + } + iov->iov_base++; + iov->iov_len--; + uio->uio_resid--; + uio->uio_offset++; + return (0); +} +#endif // __ECOS + +/* + * General routine to allocate a hash table. + */ +#ifdef __ECOS +void * +hashinit(int elements, int type, int flags, u_long *hashmask) +#else +void * +hashinit(elements, type, flags, hashmask) + int elements, type, flags; + u_long *hashmask; +#endif +{ + long hashsize; + LIST_HEAD(generic, generic) *hashtbl; + int i; + + if (elements <= 0) + panic("hashinit: bad cnt"); + for (hashsize = 1; hashsize <= elements; hashsize <<= 1) + continue; + hashsize >>= 1; + hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, flags); + for (i = 0; i < hashsize; i++) + LIST_INIT(&hashtbl[i]); + *hashmask = hashsize - 1; + return (hashtbl); +} + +#ifndef __ECOS +/* + * "Shutdown hook" types, functions, and variables. + */ + +struct shutdownhook_desc { + LIST_ENTRY(shutdownhook_desc) sfd_list; + void (*sfd_fn) __P((void *)); + void *sfd_arg; +}; + +LIST_HEAD(, shutdownhook_desc) shutdownhook_list; + +int shutdownhooks_done; + +void * +shutdownhook_establish(fn, arg) + void (*fn) __P((void *)); + void *arg; +{ + struct shutdownhook_desc *ndp; + + ndp = (struct shutdownhook_desc *) + malloc(sizeof (*ndp), M_DEVBUF, M_NOWAIT); + if (ndp == NULL) + return NULL; + + ndp->sfd_fn = fn; + ndp->sfd_arg = arg; + LIST_INSERT_HEAD(&shutdownhook_list, ndp, sfd_list); + + return (ndp); +} + +void +shutdownhook_disestablish(vhook) + void *vhook; +{ +#ifdef DIAGNOSTIC + struct shutdownhook_desc *dp; + + for (dp = shutdownhook_list.lh_first; dp != NULL; + dp = dp->sfd_list.le_next) + if (dp == vhook) + break; + if (dp == NULL) + panic("shutdownhook_disestablish: hook not established"); +#endif + + LIST_REMOVE((struct shutdownhook_desc *)vhook, sfd_list); +} + +/* + * Run shutdown hooks. Should be invoked immediately before the + * system is halted or rebooted, i.e. after file systems unmounted, + * after crash dump done, etc. + */ +void +doshutdownhooks() +{ + struct shutdownhook_desc *dp; + + if (shutdownhooks_done) + return; + + for (dp = shutdownhook_list.lh_first; dp != NULL; dp = + dp->sfd_list.le_next) + (*dp->sfd_fn)(dp->sfd_arg); +} + +/* + * "Power hook" types, functions, and variables. + */ + +struct powerhook_desc { + LIST_ENTRY(powerhook_desc) sfd_list; + void (*sfd_fn) __P((int, void *)); + void *sfd_arg; +}; + +LIST_HEAD(, powerhook_desc) powerhook_list; + +void * +powerhook_establish(fn, arg) + void (*fn) __P((int, void *)); + void *arg; +{ + struct powerhook_desc *ndp; + + ndp = (struct powerhook_desc *) + malloc(sizeof(*ndp), M_DEVBUF, M_NOWAIT); + if (ndp == NULL) + return NULL; + + ndp->sfd_fn = fn; + ndp->sfd_arg = arg; + LIST_INSERT_HEAD(&powerhook_list, ndp, sfd_list); + + return (ndp); +} + +void +powerhook_disestablish(vhook) + void *vhook; +{ +#ifdef DIAGNOSTIC + struct powerhook_desc *dp; + + for (dp = powerhook_list.lh_first; dp != NULL; + dp = dp->sfd_list.le_next) + if (dp == vhook) + break; + if (dp == NULL) + panic("powerhook_disestablish: hook not established"); +#endif + + LIST_REMOVE((struct powerhook_desc *)vhook, sfd_list); + free(vhook, M_DEVBUF); +} + +/* + * Run power hooks. + */ +void +dopowerhooks(why) + int why; +{ + struct powerhook_desc *dp; + + for (dp = LIST_FIRST(&powerhook_list); + dp != NULL; + dp = LIST_NEXT(dp, sfd_list)) { + (*dp->sfd_fn)(why, dp->sfd_arg); + } +} +#endif // __ECOS diff --git a/ecos/packages/net/tcpip/current/src/sys/kern/sockio.c b/ecos/packages/net/tcpip/current/src/sys/kern/sockio.c new file mode 100644 index 0000000..5cead64 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/kern/sockio.c @@ -0,0 +1,950 @@ +//========================================================================== +// +// sys/kern/sockio.c +// +// Socket interface to Fileio subsystem +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): nickg +// Contributors: nickg +// Date: 2000-06-06 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + +/* + * Copyright (c) 1982, 1986, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ +//========================================================================== + +#include <pkgconf/net.h> +#include <pkgconf/io_fileio.h> + +#include <sys/types.h> + +#include <cyg/io/file.h> + +#include <cyg/fileio/fileio.h> +#include <cyg/fileio/sockio.h> + +#include <sys/param.h> +#include <sys/mbuf.h> +#include <sys/protosw.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/ioctl.h> + +#include <net/if.h> +#include <net/route.h> + +//========================================================================== +// Forward definitions + +static int bsd_init( cyg_nstab_entry *nste ); +static int bsd_socket( cyg_nstab_entry *nste, int domain, int type, + int protocol, cyg_file *file ); + +static int bsd_bind ( cyg_file *fp, const sockaddr *sa, socklen_t len ); +static int bsd_connect ( cyg_file *fp, const sockaddr *sa, socklen_t len ); +static int bsd_accept ( cyg_file *fp, cyg_file *new_fp, + struct sockaddr *name, socklen_t *anamelen ); +static int bsd_listen ( cyg_file *fp, int len ); +static int bsd_getname ( cyg_file *fp, sockaddr *sa, socklen_t *len, int peer ); +static int bsd_shutdown ( cyg_file *fp, int flags ); +static int bsd_getsockopt( cyg_file *fp, int level, int optname, + void *optval, socklen_t *optlen); +static int bsd_setsockopt( cyg_file *fp, int level, int optname, + const void *optval, socklen_t optlen); +static int bsd_sendmsg ( cyg_file *fp, const struct msghdr *m, + int flags, ssize_t *retsize ); +static int bsd_recvmsg ( cyg_file *fp, struct msghdr *m, + socklen_t *namelen, ssize_t *retsize ); + + +// File operations +static int bsd_read (struct CYG_FILE_TAG *fp, struct CYG_UIO_TAG *uio); +static int bsd_write (struct CYG_FILE_TAG *fp, struct CYG_UIO_TAG *uio); +static int bsd_lseek (struct CYG_FILE_TAG *fp, off_t *pos, int whence ); +static int bsd_ioctl (struct CYG_FILE_TAG *fp, CYG_ADDRWORD com, + CYG_ADDRWORD data); +static int bsd_select (struct CYG_FILE_TAG *fp, int which, CYG_ADDRWORD info); +static int bsd_fsync (struct CYG_FILE_TAG *fp, int mode ); +static int bsd_close (struct CYG_FILE_TAG *fp); +static int bsd_fstat (struct CYG_FILE_TAG *fp, struct stat *buf ); +static int bsd_getinfo (struct CYG_FILE_TAG *fp, int key, void *buf, int len ); +static int bsd_setinfo (struct CYG_FILE_TAG *fp, int key, void *buf, int len ); + +static int +bsd_recvit(cyg_file *fp, struct msghdr *mp, socklen_t *namelenp, ssize_t *retsize); +static int +bsd_sendit(cyg_file *fp, const struct msghdr *mp, int flags, ssize_t *retsize); + + +//========================================================================== +// Table entrys + +NSTAB_ENTRY( bsd_nste, 0, + "bsd_tcpip", + "", + 0, + bsd_init, + bsd_socket); + +struct cyg_sock_ops bsd_sockops = +{ + bsd_bind, + bsd_connect, + bsd_accept, + bsd_listen, + bsd_getname, + bsd_shutdown, + bsd_getsockopt, + bsd_setsockopt, + bsd_sendmsg, + bsd_recvmsg +}; + +cyg_fileops bsd_sock_fileops = +{ + bsd_read, + bsd_write, + bsd_lseek, + bsd_ioctl, + bsd_select, + bsd_fsync, + bsd_close, + bsd_fstat, + bsd_getinfo, + bsd_setinfo +}; + +//========================================================================== +// NStab functions + + + +// ------------------------------------------------------------------------- + +static int bsd_init( cyg_nstab_entry *nste ) +{ + // Initialization already handled via constructor + + return ENOERR; +} + +// ------------------------------------------------------------------------- + +static int bsd_socket( cyg_nstab_entry *nste, int domain, int type, + int protocol, cyg_file *file ) +{ + int error = 0; + struct socket *so; + + error = socreate( domain, &so, type, protocol ); + + if( error == ENOERR ) + { + + cyg_selinit(&so->so_rcv.sb_sel); + cyg_selinit(&so->so_snd.sb_sel); + + file->f_flag |= CYG_FREAD|CYG_FWRITE; + file->f_type = CYG_FILE_TYPE_SOCKET; + file->f_ops = &bsd_sock_fileops; + file->f_offset = 0; + file->f_data = (CYG_ADDRWORD)so; + file->f_xops = (CYG_ADDRWORD)&bsd_sockops; + } + + return error; +} + + +//========================================================================== +// Sockops functions + +// ------------------------------------------------------------------------- + +static int bsd_bind ( cyg_file *fp, const sockaddr *sa, socklen_t len ) +{ + struct mbuf *nam; + int error; + + error = sockargs(&nam, (caddr_t)sa, len, MT_SONAME); + + if (error) + return (error); + + error = sobind((struct socket *)fp->f_data, nam); + + m_freem(nam); + + return error; +} + +// ------------------------------------------------------------------------- + +static int bsd_connect ( cyg_file *fp, const sockaddr *sa, socklen_t len ) +{ + register struct socket *so; + struct mbuf *nam; + int error, s; + + so = (struct socket *)fp->f_data; + + if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) + return (EALREADY); + + error = sockargs(&nam, (caddr_t)sa, len, MT_SONAME); + + if (error) + return (error); + + error = soconnect(so, nam); + if (error) + goto bad; + + if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { + m_freem(nam); + return (EINPROGRESS); + } + + s = splsoftnet(); + while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { + error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, + netcon, 0); + if (error) + break; + } + + if (error == 0) { + error = so->so_error; + so->so_error = 0; + } + + splx(s); + +bad: + so->so_state &= ~SS_ISCONNECTING; + m_freem(nam); + + return error; +} + +// ------------------------------------------------------------------------- + +static int bsd_accept ( cyg_file *fp, cyg_file *new_fp, + struct sockaddr *name, socklen_t *anamelen ) +{ + struct mbuf *nam; + socklen_t namelen = 0; + int error = 0, s; + register struct socket *so; + + if( anamelen != NULL ) + namelen = *anamelen; + + s = splsoftnet(); + so = (struct socket *)fp->f_data; + + if ((so->so_options & SO_ACCEPTCONN) == 0) { + splx(s); + return (EINVAL); + } + + if ((so->so_state & SS_NBIO) && so->so_qlen == 0) { + splx(s); + return (EWOULDBLOCK); + } + + while (so->so_qlen == 0 && so->so_error == 0) { + if (so->so_state & SS_CANTRCVMORE) { + so->so_error = ECONNABORTED; + break; + } + error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, + netcon, 0); + if (error) { + splx(s); + return (error); + } + } + + if (so->so_error) { + error = so->so_error; + so->so_error = 0; + splx(s); + return (error); + } + + { + struct socket *aso = so->so_q; + if (soqremque(aso, 1) == 0) + panic("accept"); + so = aso; + } + + cyg_selinit(&so->so_rcv.sb_sel); + cyg_selinit(&so->so_snd.sb_sel); + + new_fp->f_type = DTYPE_SOCKET; + new_fp->f_flag |= FREAD|FWRITE; + new_fp->f_offset = 0; + new_fp->f_ops = &bsd_sock_fileops; + new_fp->f_data = (CYG_ADDRWORD)so; + new_fp->f_xops = (CYG_ADDRWORD)&bsd_sockops; + + nam = m_get(M_WAIT, MT_SONAME); + (void) soaccept(so, nam); + if (name) { + if (namelen > nam->m_len) + namelen = nam->m_len; + /* SHOULD COPY OUT A CHAIN HERE */ + if ((error = copyout(mtod(nam, caddr_t), + (caddr_t)name, namelen)) == 0) + *anamelen = namelen; + } + m_freem(nam); + splx(s); + + return (error); +} + +// ------------------------------------------------------------------------- + +static int bsd_listen ( cyg_file *fp, int backlog ) +{ + return (solisten((struct socket *)fp->f_data, backlog)); +} + +// ------------------------------------------------------------------------- + +static int bsd_getname ( cyg_file *fp, sockaddr *asa, socklen_t *alen, int peer ) +{ + register struct socket *so; + struct mbuf *m; + socklen_t len = 0; + int error; + int type = peer ? PRU_PEERADDR : PRU_SOCKADDR; + + if( alen != NULL ) + len = *alen; + + so = (struct socket *)fp->f_data; + + if ( peer && (so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) + return (ENOTCONN); + + m = m_getclr(M_WAIT, MT_SONAME); + if (m == NULL) + return (ENOBUFS); + + error = (*so->so_proto->pr_usrreq)(so, type, 0, m, 0); + if (error) + goto bad; + + if (len > m->m_len) + len = m->m_len; + + error = copyout(mtod(m, caddr_t), (caddr_t)asa, len); + + if (error == 0) + *alen = len; + +bad: + m_freem(m); + + return (error); +} + +// ------------------------------------------------------------------------- + +static int bsd_shutdown ( cyg_file *fp, int how ) +{ + return (soshutdown((struct socket *)fp->f_data, how)); +} + +// ------------------------------------------------------------------------- + +static int bsd_getsockopt( cyg_file *fp, int level, int optname, + void *optval, socklen_t *optlen) +{ + struct mbuf *m = NULL; + socklen_t valsize = 0; + int error; + + if( optval != NULL && optlen != NULL ) + valsize = *optlen; + + error = sogetopt((struct socket *)fp->f_data, level, optname, &m); + + if( error == ENOERR && valsize != 0 && m != NULL) + { + if (valsize > m->m_len) + valsize = m->m_len; + + error = copyout(mtod(m, caddr_t), optval, valsize); + + if( error == ENOERR ) + *optlen = valsize; + + } + + if (m != NULL) + (void) m_free(m); + return (error); +} + +// ------------------------------------------------------------------------- + +static int bsd_setsockopt( cyg_file *fp, int level, int optname, + const void *optval, socklen_t optlen) +{ + int error; + struct mbuf *m = NULL; + + if( optlen > MCLBYTES ) + return EINVAL; + + if (optval != NULL) { + m = m_get(M_WAIT, MT_SOOPTS); + if (optlen > MLEN) { + MCLGET(m, M_DONTWAIT); + if ((m->m_flags & M_EXT) == 0) { + m_freem(m); + return (ENOBUFS); + } + } + if (m == NULL) + return (ENOBUFS); + error = copyin(optval, mtod(m, caddr_t), optlen); + if (error) { + (void) m_free(m); + return (error); + } + m->m_len = optlen; + } + + return (sosetopt((struct socket *)fp->f_data, level, optname, m)); +} + +// ------------------------------------------------------------------------- + +static int bsd_sendmsg ( cyg_file *fp, const struct msghdr *m, int flags, ssize_t *retsize ) +{ + return bsd_sendit( fp, m, flags, retsize); +} + +// ------------------------------------------------------------------------- + +static int bsd_recvmsg ( cyg_file *fp, struct msghdr *m, socklen_t *namelen, ssize_t *retsize ) +{ + return bsd_recvit( fp, m, namelen, retsize); +} + +//========================================================================== +// File system call functions + +// ------------------------------------------------------------------------- + +static int bsd_read (struct CYG_FILE_TAG *fp, struct CYG_UIO_TAG *uio) +{ + return (soreceive((struct socket *)fp->f_data, (struct mbuf **)0, + uio, (struct mbuf **)0, (struct mbuf **)0, (int *)0)); +} + +// ------------------------------------------------------------------------- + +static int bsd_write (struct CYG_FILE_TAG *fp, struct CYG_UIO_TAG *uio) +{ + return (sosend((struct socket *)fp->f_data, (struct mbuf *)0, + uio, (struct mbuf *)0, (struct mbuf *)0, 0)); +} + +// ------------------------------------------------------------------------- + +static int bsd_lseek (struct CYG_FILE_TAG *fp, off_t *pos, int whence ) +{ + return ESPIPE; +} + +// ------------------------------------------------------------------------- + +static int bsd_ioctl (struct CYG_FILE_TAG *fp, CYG_ADDRWORD cmd, + CYG_ADDRWORD data) +{ + register struct socket *so = (struct socket *)fp->f_data; + void *p = 0; + + switch (cmd) { + + case FIONBIO: + if (*(int *)data) + so->so_state |= SS_NBIO; + else + so->so_state &= ~SS_NBIO; + return (0); + + case FIOASYNC: + if (*(int *)data) { + so->so_state |= SS_ASYNC; + so->so_rcv.sb_flags |= SB_ASYNC; + so->so_snd.sb_flags |= SB_ASYNC; + } else { + so->so_state &= ~SS_ASYNC; + so->so_rcv.sb_flags &= ~SB_ASYNC; + so->so_snd.sb_flags &= ~SB_ASYNC; + } + return (0); + + case FIONREAD: + *(int *)data = so->so_rcv.sb_cc; + return (0); + + case SIOCATMARK: + *(int *)data = (so->so_state&SS_RCVATMARK) != 0; + return (0); + } + /* + * Interface/routing/protocol specific ioctls: + * interface and routing ioctls should have a + * different entry since a socket's unnecessary + */ + if (IOCGROUP(cmd) == 'i') + return (ifioctl(so, (u_long)cmd, (caddr_t)data, p)); + if (IOCGROUP(cmd) == 'r') + return (rtioctl((u_long)cmd, (caddr_t)data, p)); + return ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL, + (struct mbuf *)cmd, (struct mbuf *)data, (struct mbuf *)0)); + +} + +// ------------------------------------------------------------------------- + +static int bsd_select (struct CYG_FILE_TAG *fp, int which, CYG_ADDRWORD info) +{ + register struct socket *so = (struct socket *)fp->f_data; + register int s = splsoftnet(); + + switch (which) { + + case FREAD: + if (soreadable(so)) { + splx(s); + return (1); + } + cyg_selrecord(info, &so->so_rcv.sb_sel); + so->so_rcv.sb_flags |= SB_SEL; + break; + + case FWRITE: + if (sowriteable(so)) { + splx(s); + return (1); + } + cyg_selrecord(info, &so->so_snd.sb_sel); + so->so_snd.sb_flags |= SB_SEL; + break; + + case 0: + if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) { + splx(s); + return (1); + } + cyg_selrecord(info, &so->so_rcv.sb_sel); + so->so_rcv.sb_flags |= SB_SEL; + break; + } + splx(s); + + return ENOERR; +} + +// ------------------------------------------------------------------------- + +static int bsd_fsync (struct CYG_FILE_TAG *fp, int mode ) +{ + // FIXME: call some sort of flush IOCTL? + return 0; +} + +// ------------------------------------------------------------------------- + +static int bsd_close (struct CYG_FILE_TAG *fp) +{ + int error = 0; + + if (fp->f_data) + error = soclose((struct socket *)fp->f_data); + fp->f_data = 0; + return (error); +} + +// ------------------------------------------------------------------------- + +static int bsd_fstat (struct CYG_FILE_TAG *fp, struct stat *buf ) +{ + register struct socket *so = (struct socket *)fp->f_data; + + bzero((caddr_t)buf, sizeof (*buf)); + + // Mark socket as a fifo for now. We need to add socket types to + // sys/stat.h. + buf->st_mode = __stat_mode_FIFO; + + return ((*so->so_proto->pr_usrreq)(so, PRU_SENSE, + (struct mbuf *)buf, + (struct mbuf *)0, + (struct mbuf *)0)); +} + +// ------------------------------------------------------------------------- + +static int bsd_getinfo (struct CYG_FILE_TAG *fp, int key, void *buf, int len ) +{ + return ENOSYS; +} + +// ------------------------------------------------------------------------- + +static int bsd_setinfo (struct CYG_FILE_TAG *fp, int key, void *buf, int len ) +{ + return ENOSYS; +} + + + +//========================================================================== +// Select support + +// ------------------------------------------------------------------------- +// This function is called by the lower layers to record the +// fact that a particular 'select' event is being requested. +// + +void +selrecord(void *selector, struct selinfo *info) +{ + // Unused by this implementation +} + +// ------------------------------------------------------------------------- +// This function is called to indicate that a 'select' event +// may have occurred. +// + +void +selwakeup(struct selinfo *info) +{ + cyg_selwakeup( info ); +} + +//========================================================================== +// Misc support functions + +int +sockargs(mp, buf, buflen, type) + struct mbuf **mp; + caddr_t buf; + socklen_t buflen; + int type; +{ + register struct sockaddr *sa; + register struct mbuf *m; + int error; + + if (buflen > MLEN) { +#ifdef COMPAT_OLDSOCK + if (type == MT_SONAME && buflen <= 112) + buflen = MLEN; /* unix domain compat. hack */ + else +#endif + return (EINVAL); + } + m = m_get(M_WAIT, type); + if (m == NULL) + return (ENOBUFS); + m->m_len = buflen; + error = copyin(buf, mtod(m, caddr_t), buflen); + if (error) { + (void) m_free(m); + return (error); + } + *mp = m; + if (type == MT_SONAME) { + sa = mtod(m, struct sockaddr *); + +#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN + if (sa->sa_family == 0 && sa->sa_len < AF_MAX) + sa->sa_family = sa->sa_len; +#endif + sa->sa_len = buflen; + } + return (0); +} + + +// ------------------------------------------------------------------------- +// bsd_recvit() +// Support for message reception. This is a lightly edited version of the +// recvit() function is uipc_syscalls.c. + +static int +bsd_recvit(cyg_file *fp, struct msghdr *mp, socklen_t *namelenp, ssize_t *retsize) +{ + struct uio auio; + register struct iovec *iov; + register int i; + size_t len; + int error; + struct mbuf *from = 0, *control = 0; + + auio.uio_iov = mp->msg_iov; + auio.uio_iovcnt = mp->msg_iovlen; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_rw = UIO_READ; + auio.uio_offset = 0; /* XXX */ + auio.uio_resid = 0; + iov = mp->msg_iov; + for (i = 0; i < mp->msg_iovlen; i++, iov++) { + /* Don't allow sum > SSIZE_MAX */ + if (iov->iov_len > SSIZE_MAX || + (auio.uio_resid += iov->iov_len) > SSIZE_MAX) + return (EINVAL); + } + + len = auio.uio_resid; + error = soreceive((struct socket *)fp->f_data, &from, &auio, + NULL, mp->msg_control ? &control : NULL, + &mp->msg_flags); + if (error) { + if (auio.uio_resid != len && + (error == EINTR || error == EWOULDBLOCK)) + error = 0; + } + if (error) + goto out; + *retsize = len - auio.uio_resid; + if (mp->msg_name) { + len = mp->msg_namelen; + if (len <= 0 || from == 0) + len = 0; + else { + /* save sa_len before it is destroyed by MSG_COMPAT */ + if (len > from->m_len) + len = from->m_len; + /* else if len < from->m_len ??? */ +#ifdef COMPAT_OLDSOCK + if (mp->msg_flags & MSG_COMPAT) + mtod(from, struct osockaddr *)->sa_family = + mtod(from, struct sockaddr *)->sa_family; +#endif + error = copyout(mtod(from, caddr_t), + (caddr_t)mp->msg_name, (unsigned)len); + if (error) + goto out; + } + mp->msg_namelen = len; + if (namelenp ) { + *namelenp = len; +#ifdef COMPAT_OLDSOCK + if (mp->msg_flags & MSG_COMPAT) + error = 0; /* old recvfrom didn't check */ + else +#endif + goto out; + } + } + if (mp->msg_control) { +#ifdef COMPAT_OLDSOCK + /* + * We assume that old recvmsg calls won't receive access + * rights and other control info, esp. as control info + * is always optional and those options didn't exist in 4.3. + * If we receive rights, trim the cmsghdr; anything else + * is tossed. + */ + if (control && mp->msg_flags & MSG_COMPAT) { + if (mtod(control, struct cmsghdr *)->cmsg_level != + SOL_SOCKET || + mtod(control, struct cmsghdr *)->cmsg_type != + SCM_RIGHTS) { + mp->msg_controllen = 0; + goto out; + } + control->m_len -= sizeof (struct cmsghdr); + control->m_data += sizeof (struct cmsghdr); + } +#endif + len = mp->msg_controllen; + if (len <= 0 || control == 0) + len = 0; + else { + struct mbuf *m = control; + caddr_t p = (caddr_t)mp->msg_control; + + do { + i = m->m_len; + if (len < i) { + mp->msg_flags |= MSG_CTRUNC; + i = len; + } + error = copyout(mtod(m, caddr_t), p, + (unsigned)i); + if (m->m_next) + i = ALIGN(i); + p += i; + len -= i; + if (error != 0 || len <= 0) + break; + } while ((m = m->m_next) != NULL); + len = p - (caddr_t)mp->msg_control; + } + mp->msg_controllen = len; + } +out: + if (from) + m_freem(from); + if (control) + m_freem(control); + return (error); +} + +// ------------------------------------------------------------------------- +// sendit() +// Support for message transmission. This is a lightly edited version of the +// synonymous function is uipc_syscalls.c. + +static int +bsd_sendit(cyg_file *fp, const struct msghdr *mp, int flags, ssize_t *retsize) +{ + struct uio auio; + register struct iovec *iov; + register int i; + struct mbuf *to, *control; + int len, error; + + auio.uio_iov = mp->msg_iov; + auio.uio_iovcnt = mp->msg_iovlen; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_rw = UIO_WRITE; + auio.uio_offset = 0; /* XXX */ + auio.uio_resid = 0; + iov = mp->msg_iov; + for (i = 0; i < mp->msg_iovlen; i++, iov++) { + /* Don't allow sum > SSIZE_MAX */ + if (iov->iov_len > SSIZE_MAX || + (auio.uio_resid += iov->iov_len) > SSIZE_MAX) + return (EINVAL); + } + if (mp->msg_name) { + error = sockargs(&to, mp->msg_name, mp->msg_namelen, + MT_SONAME); + if (error) + return (error); + } else + to = 0; + if (mp->msg_control) { + if (mp->msg_controllen < sizeof(struct cmsghdr) +#ifdef COMPAT_OLDSOCK + && mp->msg_flags != MSG_COMPAT +#endif + ) { + error = EINVAL; + goto bad; + } + error = sockargs(&control, mp->msg_control, + mp->msg_controllen, MT_CONTROL); + if (error) + goto bad; +#ifdef COMPAT_OLDSOCK + if (mp->msg_flags == MSG_COMPAT) { + register struct cmsghdr *cm; + + M_PREPEND(control, sizeof(*cm), M_WAIT); + if (control == 0) { + error = ENOBUFS; + goto bad; + } else { + cm = mtod(control, struct cmsghdr *); + cm->cmsg_len = control->m_len; + cm->cmsg_level = SOL_SOCKET; + cm->cmsg_type = SCM_RIGHTS; + } + } +#endif + } else + control = 0; + + len = auio.uio_resid; + error = sosend((struct socket *)fp->f_data, to, &auio, + NULL, control, flags); + if (error) { + if (auio.uio_resid != len && + (error == EINTR || error == EWOULDBLOCK)) + error = 0; +#ifndef __ECOS + if (error == EPIPE) + psignal(p, SIGPIPE); +#endif + } + if (error == 0) + *retsize = len - auio.uio_resid; +bad: + if (to) + m_freem(to); + return (error); +} + + +//========================================================================== +// End of sockio.c diff --git a/ecos/packages/net/tcpip/current/src/sys/kern/sys_generic.c b/ecos/packages/net/tcpip/current/src/sys/kern/sys_generic.c new file mode 100644 index 0000000..bf0e9ed --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/kern/sys_generic.c @@ -0,0 +1,1022 @@ +//========================================================================== +// +// sys/kern/sys_generic.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: sys_generic.c,v 1.22 1999/11/29 22:02:14 deraadt Exp $ */ +/* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ + +/* + * Copyright (c) 1996 Theo de Raadt + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 + */ + +#include <sys/param.h> +#ifndef __ECOS +#include <sys/systm.h> +#include <sys/filedesc.h> +#endif // __ECOS +#include <sys/ioctl.h> +#ifdef __ECOS +#include <cyg/io/file.h> +#else // __ECOS +#include <sys/file.h> +#include <sys/proc.h> +#include <sys/resourcevar.h> +#endif // __ECOS +#include <sys/socketvar.h> +#ifndef __ECOS +#include <sys/signalvar.h> +#include <sys/kernel.h> +#include <sys/uio.h> +#include <sys/stat.h> +#endif // __ECOS +#include <sys/malloc.h> +#ifndef __ECOS +#include <sys/poll.h> +#endif // __ECOS +#ifdef KTRACE +#include <sys/ktrace.h> +#endif + +#ifndef __ECOS +#include <sys/mount.h> +#endif // __ECOS +#include <sys/syscallargs.h> + +#ifndef __ECOS +int selscan __P((struct proc *, fd_set *, fd_set *, int, register_t *)); +int seltrue __P((dev_t, int, struct proc *)); +void pollscan __P((struct proc *, struct pollfd *, int, register_t *)); +#endif // __ECOS + +/* + * Read system call. + */ +#ifdef __ECOS +int +sys_read(struct sys_read_args *uap, register_t *retval) +#else +/* ARGSUSED */ +int +sys_read(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +#endif +{ +#ifndef __ECOS + register struct sys_read_args /* { + syscallarg(int) fd; + syscallarg(void *) buf; + syscallarg(size_t) nbyte; + } */ *uap = v; + register struct filedesc *fdp = p->p_fd; +#endif + struct file *fp; + struct uio auio; + struct iovec aiov; + long cnt, error = 0; +#ifdef KTRACE + struct iovec ktriov; +#endif + +#ifdef __ECOS + if (getfp((u_int)SCARG(uap, fd), &fp) || +#else + if (((u_int)SCARG(uap, fd)) >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL || +#endif + (fp->f_flag & FREAD) == 0) + return (EBADF); + /* Don't allow nbyte to be larger than max return val */ + if (SCARG(uap, nbyte) > SSIZE_MAX) + return(EINVAL); + aiov.iov_base = (caddr_t)SCARG(uap, buf); + aiov.iov_len = SCARG(uap, nbyte); + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_resid = SCARG(uap, nbyte); + auio.uio_rw = UIO_READ; + auio.uio_segflg = UIO_USERSPACE; +#ifndef __ECOS + auio.uio_procp = p; +#endif +#ifdef KTRACE + /* + * if tracing, save a copy of iovec + */ + if (KTRPOINT(p, KTR_GENIO)) + ktriov = aiov; +#endif + cnt = SCARG(uap, nbyte); +#ifdef __ECOS + error = (*fp->f_ops->fo_read)(fp, &auio); +#else + error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred); +#endif + if (error) +#ifdef __ECOS + if (auio.uio_resid != cnt && ( +#else + if (auio.uio_resid != cnt && (error == ERESTART || +#endif + error == EINTR || error == EWOULDBLOCK)) + error = 0; + cnt -= auio.uio_resid; +#ifdef KTRACE + if (KTRPOINT(p, KTR_GENIO) && error == 0) + ktrgenio(p->p_tracep, SCARG(uap, fd), UIO_READ, &ktriov, + cnt, error); +#endif + *retval = cnt; + return (error); +} + + +#ifndef __ECOS +/* + * Scatter read system call. + */ +int +sys_readv(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +{ + register struct sys_readv_args /* { + syscallarg(int) fd; + syscallarg(struct iovec *) iovp; + syscallarg(int) iovcnt; + } */ *uap = v; + register struct file *fp; + register struct filedesc *fdp = p->p_fd; + struct uio auio; + register struct iovec *iov; + struct iovec *needfree; + struct iovec aiov[UIO_SMALLIOV]; + long i, cnt, error = 0; + u_int iovlen; +#ifdef KTRACE + struct iovec *ktriov = NULL; +#endif + + if (((u_int)SCARG(uap, fd)) >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL || + (fp->f_flag & FREAD) == 0) + return (EBADF); + if (SCARG(uap, iovcnt) <= 0) + return (EINVAL); + /* note: can't use iovlen until iovcnt is validated */ + iovlen = SCARG(uap, iovcnt) * sizeof (struct iovec); + if (SCARG(uap, iovcnt) > UIO_SMALLIOV) { + if (SCARG(uap, iovcnt) > IOV_MAX) + return (EINVAL); + MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); + needfree = iov; + } else { + iov = aiov; + needfree = NULL; + } + auio.uio_iov = iov; + auio.uio_iovcnt = SCARG(uap, iovcnt); + auio.uio_rw = UIO_READ; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_procp = p; + error = copyin((caddr_t)SCARG(uap, iovp), (caddr_t)iov, iovlen); + if (error) + goto done; + auio.uio_resid = 0; + for (i = 0; i < SCARG(uap, iovcnt); i++, iov++) { + /* Don't allow sum > SSIZE_MAX */ + if (iov->iov_len > SSIZE_MAX || + (auio.uio_resid += iov->iov_len) > SSIZE_MAX) { + error = EINVAL; + goto done; + } + } +#ifdef KTRACE + /* + * if tracing, save a copy of iovec + */ + if (KTRPOINT(p, KTR_GENIO)) { + MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); + bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); + } +#endif + cnt = auio.uio_resid; + error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred); + if (error) + if (auio.uio_resid != cnt && (error == ERESTART || + error == EINTR || error == EWOULDBLOCK)) + error = 0; + cnt -= auio.uio_resid; +#ifdef KTRACE + if (ktriov != NULL) { + if (error == 0) + ktrgenio(p->p_tracep, SCARG(uap, fd), UIO_READ, ktriov, + cnt, error); + FREE(ktriov, M_TEMP); + } +#endif + *retval = cnt; +done: + if (needfree) + FREE(needfree, M_IOV); + return (error); +} +#endif + +/* + * Write system call + */ +#ifdef __ECOS +int +sys_write(struct sys_write_args *uap, register_t *retval) +#else +int +sys_write(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +#endif +{ +#ifndef __ECOS + register struct sys_write_args /* { + syscallarg(int) fd; + syscallarg(void *) buf; + syscallarg(size_t) nbyte; + } */ *uap = v; + register struct filedesc *fdp = p->p_fd; +#endif + struct file *fp; + struct uio auio; + struct iovec aiov; + long cnt, error = 0; +#ifdef KTRACE + struct iovec ktriov; +#endif + +#ifdef __ECOS + if (getfp((u_int)SCARG(uap, fd), &fp) || +#else + if (((u_int)SCARG(uap, fd)) >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL || +#endif + (fp->f_flag & FWRITE) == 0) + return (EBADF); + /* Don't allow nbyte to be larger than max return val */ + if (SCARG(uap, nbyte) > SSIZE_MAX) + return(EINVAL); + aiov.iov_base = (caddr_t)SCARG(uap, buf); + aiov.iov_len = SCARG(uap, nbyte); + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_resid = SCARG(uap, nbyte); + auio.uio_rw = UIO_WRITE; + auio.uio_segflg = UIO_USERSPACE; +#ifndef __ECOS + auio.uio_procp = p; +#endif +#ifdef KTRACE + /* + * if tracing, save a copy of iovec + */ + if (KTRPOINT(p, KTR_GENIO)) + ktriov = aiov; +#endif + cnt = SCARG(uap, nbyte); +#ifdef __ECOS + error = (*fp->f_ops->fo_write)(fp, &auio); +#else + error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred); +#endif + if (error) { +#ifdef __ECOS + if (auio.uio_resid != cnt && + (error == EINTR || error == EWOULDBLOCK)) + error = 0; +#else + if (auio.uio_resid != cnt && (error == ERESTART || + error == EINTR || error == EWOULDBLOCK)) + error = 0; + if (error == EPIPE) + psignal(p, SIGPIPE); +#endif + } + cnt -= auio.uio_resid; +#ifdef KTRACE + if (KTRPOINT(p, KTR_GENIO) && error == 0) + ktrgenio(p->p_tracep, SCARG(uap, fd), UIO_WRITE, + &ktriov, cnt, error); +#endif + *retval = cnt; + return (error); +} + +#ifndef __ECOS +/* + * Gather write system call + */ +int +sys_writev(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +{ + register struct sys_writev_args /* { + syscallarg(int) fd; + syscallarg(struct iovec *) iovp; + syscallarg(int) iovcnt; + } */ *uap = v; + register struct file *fp; + register struct filedesc *fdp = p->p_fd; + struct uio auio; + register struct iovec *iov; + struct iovec *needfree; + struct iovec aiov[UIO_SMALLIOV]; + long i, cnt, error = 0; + u_int iovlen; +#ifdef KTRACE + struct iovec *ktriov = NULL; +#endif + + if (((u_int)SCARG(uap, fd)) >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL || + (fp->f_flag & FWRITE) == 0) + return (EBADF); + if (SCARG(uap, iovcnt) <= 0) + return (EINVAL); + /* note: can't use iovlen until iovcnt is validated */ + iovlen = SCARG(uap, iovcnt) * sizeof (struct iovec); + if (SCARG(uap, iovcnt) > UIO_SMALLIOV) { + if (SCARG(uap, iovcnt) > IOV_MAX) + return (EINVAL); + MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); + needfree = iov; + } else { + iov = aiov; + needfree = NULL; + } + auio.uio_iov = iov; + auio.uio_iovcnt = SCARG(uap, iovcnt); + auio.uio_rw = UIO_WRITE; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_procp = p; + error = copyin((caddr_t)SCARG(uap, iovp), (caddr_t)iov, iovlen); + if (error) + goto done; + auio.uio_resid = 0; + for (i = 0; i < SCARG(uap, iovcnt); i++, iov++) { + /* Don't allow sum > SSIZE_MAX */ + if (iov->iov_len > SSIZE_MAX || + (auio.uio_resid += iov->iov_len) > SSIZE_MAX) { + error = EINVAL; + goto done; + } + } +#ifdef KTRACE + /* + * if tracing, save a copy of iovec + */ + if (KTRPOINT(p, KTR_GENIO)) { + MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); + bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); + } +#endif + cnt = auio.uio_resid; + error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred); + if (error) { + if (auio.uio_resid != cnt && (error == ERESTART || + error == EINTR || error == EWOULDBLOCK)) + error = 0; + if (error == EPIPE) + psignal(p, SIGPIPE); + } + cnt -= auio.uio_resid; +#ifdef KTRACE + if (ktriov != NULL) { + if (error == 0) + ktrgenio(p->p_tracep, SCARG(uap, fd), UIO_WRITE, + ktriov, cnt, error); + FREE(ktriov, M_TEMP); + } +#endif + *retval = cnt; +done: + if (needfree) + FREE(needfree, M_IOV); + return (error); +} +#endif + +/* + * Ioctl system call + */ +#ifdef __ECOS +int +sys_ioctl(struct sys_ioctl_args *uap, register_t *retval) +#else +/* ARGSUSED */ +int +sys_ioctl(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +#endif +{ +#ifndef __ECOS + register struct sys_ioctl_args /* { + syscallarg(int) fd; + syscallarg(u_long) com; + syscallarg(caddr_t) data; + } */ *uap = v; + register struct filedesc *fdp; +#endif + int tmp; + struct file *fp; + register u_long com; + register int error; + register u_int size; + caddr_t data, memp; +#define STK_PARAMS 128 + char stkbuf[STK_PARAMS]; + +#ifdef __ECOS + if (getfp(SCARG(uap, fd), &fp)) +#else + fdp = p->p_fd; + if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL) +#endif + return (EBADF); + + if ((fp->f_flag & (FREAD | FWRITE)) == 0) + return (EBADF); + +#ifdef __ECOS + com = SCARG(uap, com); +#else + switch (com = SCARG(uap, com)) { + case FIONCLEX: + fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; + return (0); + case FIOCLEX: + fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; + return (0); + } +#endif + + /* + * Interpret high order word to find amount of data to be + * copied to/from the user's address space. + */ + size = IOCPARM_LEN(com); +#ifndef __ECOS + if (size > IOCPARM_MAX) + return (ENOTTY); +#endif + memp = NULL; + if (size > sizeof (stkbuf)) { + memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); + data = memp; + } else + data = stkbuf; + if (com&IOC_IN) { + if (size) { + error = copyin(SCARG(uap, data), data, (u_int)size); + if (error) { + if (memp) + free(memp, M_IOCTLOPS); + return (error); + } + } else + *(caddr_t *)data = SCARG(uap, data); + } else if ((com&IOC_OUT) && size) + /* + * Zero the buffer so the user always + * gets back something deterministic. + */ + bzero(data, size); + else if (com&IOC_VOID) + *(caddr_t *)data = SCARG(uap, data); + + switch (com) { + + case FIONBIO: + if ((tmp = *(int *)data) != 0) + fp->f_flag |= FNONBLOCK; + else + fp->f_flag &= ~FNONBLOCK; +#ifdef __ECOS + error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (CYG_ADDRWORD)&tmp); +#else + error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); +#endif + break; + + case FIOASYNC: + if ((tmp = *(int *)data) != 0) + fp->f_flag |= FASYNC; + else + fp->f_flag &= ~FASYNC; +#ifdef __ECOS + error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (CYG_ADDRWORD)&tmp); +#else + error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); +#endif + break; + +#ifndef __ECOS + case FIOSETOWN: + tmp = *(int *)data; + if (fp->f_type == DTYPE_SOCKET) { + struct socket *so = (struct socket *)fp->f_data; + + so->so_pgid = tmp; + so->so_siguid = p->p_cred->p_ruid; + so->so_sigeuid = p->p_ucred->cr_uid; + error = 0; + break; + } + if (tmp <= 0) { + tmp = -tmp; + } else { + struct proc *p1 = pfind(tmp); + if (p1 == 0) { + error = ESRCH; + break; + } + tmp = p1->p_pgrp->pg_id; + } + error = (*fp->f_ops->fo_ioctl) + (fp, TIOCSPGRP, (caddr_t)&tmp, p); + break; + + case FIOGETOWN: + if (fp->f_type == DTYPE_SOCKET) { + error = 0; + *(int *)data = ((struct socket *)fp->f_data)->so_pgid; + break; + } + error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); + *(int *)data = -*(int *)data; + break; +#endif + default: +#ifdef __ECOS + error = (*fp->f_ops->fo_ioctl)(fp, com, (CYG_ADDRWORD)data); +#else + error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); +#endif + /* + * Copy any data to user, size was + * already set and checked above. + */ + if (error == 0 && (com&IOC_OUT) && size) + error = copyout(data, SCARG(uap, data), (u_int)size); + break; + } + if (memp) + free(memp, M_IOCTLOPS); + return (error); +} + +#ifndef __ECOS +int selwait, nselcoll; + +/* + * Select system call. + */ +int +sys_select(p, v, retval) + register struct proc *p; + void *v; + register_t *retval; +{ + register struct sys_select_args /* { + syscallarg(int) nd; + syscallarg(fd_set *) in; + syscallarg(fd_set *) ou; + syscallarg(fd_set *) ex; + syscallarg(struct timeval *) tv; + } */ *uap = v; + fd_set bits[6], *pibits[3], *pobits[3]; + struct timeval atv; + int s, ncoll, error = 0, timo; + u_int ni; + + if (SCARG(uap, nd) > p->p_fd->fd_nfiles) { + /* forgiving; slightly wrong */ + SCARG(uap, nd) = p->p_fd->fd_nfiles; + } + ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask); + if (SCARG(uap, nd) > FD_SETSIZE) { + caddr_t mbits; + + if ((mbits = malloc(ni * 6, M_TEMP, M_WAITOK)) == NULL) { + error = EINVAL; + goto cleanup; + } + bzero(mbits, ni * 6); + pibits[0] = (fd_set *)&mbits[ni * 0]; + pibits[1] = (fd_set *)&mbits[ni * 1]; + pibits[2] = (fd_set *)&mbits[ni * 2]; + pobits[0] = (fd_set *)&mbits[ni * 3]; + pobits[1] = (fd_set *)&mbits[ni * 4]; + pobits[2] = (fd_set *)&mbits[ni * 5]; + } else { + bzero((caddr_t)bits, sizeof(bits)); + pibits[0] = &bits[0]; + pibits[1] = &bits[1]; + pibits[2] = &bits[2]; + pobits[0] = &bits[3]; + pobits[1] = &bits[4]; + pobits[2] = &bits[5]; + } + +#define getbits(name, x) \ + if (SCARG(uap, name) && (error = copyin((caddr_t)SCARG(uap, name), \ + (caddr_t)pibits[x], ni))) \ + goto done; + getbits(in, 0); + getbits(ou, 1); + getbits(ex, 2); +#undef getbits + + if (SCARG(uap, tv)) { + error = copyin((caddr_t)SCARG(uap, tv), (caddr_t)&atv, + sizeof (atv)); + if (error) + goto done; + if (itimerfix(&atv)) { + error = EINVAL; + goto done; + } + s = splclock(); + timeradd(&atv, &time, &atv); + timo = hzto(&atv); + /* + * Avoid inadvertently sleeping forever. + */ + if (timo == 0) + timo = 1; + splx(s); + } else + timo = 0; +retry: + ncoll = nselcoll; + p->p_flag |= P_SELECT; + error = selscan(p, pibits[0], pobits[0], SCARG(uap, nd), retval); + if (error || *retval) + goto done; + s = splhigh(); + /* this should be timercmp(&time, &atv, >=) */ + if (SCARG(uap, tv) && (time.tv_sec > atv.tv_sec || + (time.tv_sec == atv.tv_sec && time.tv_usec >= atv.tv_usec))) { + splx(s); + goto done; + } + if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { + splx(s); + goto retry; + } + p->p_flag &= ~P_SELECT; + error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); + splx(s); + if (error == 0) + goto retry; +done: + p->p_flag &= ~P_SELECT; + /* select is not restarted after signals... */ + if (error == ERESTART) + error = EINTR; + if (error == EWOULDBLOCK) + error = 0; +#define putbits(name, x) \ + if (SCARG(uap, name) && (error2 = copyout((caddr_t)pobits[x], \ + (caddr_t)SCARG(uap, name), ni))) \ + error = error2; + if (error == 0) { + int error2; + + putbits(in, 0); + putbits(ou, 1); + putbits(ex, 2); +#undef putbits + } + +cleanup: + if (pibits[0] != &bits[0]) + free(pibits[0], M_TEMP); + return (error); +} + +int +selscan(p, ibits, obits, nfd, retval) + struct proc *p; + fd_set *ibits, *obits; + int nfd; + register_t *retval; +{ + caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; + register struct filedesc *fdp = p->p_fd; + register int msk, i, j, fd; + register fd_mask bits; + struct file *fp; + int ni, n = 0; + static int flag[3] = { FREAD, FWRITE, 0 }; + + /* + * if nfd > FD_SETSIZE then the fd_set's contain nfd bits (rounded + * up to the next byte) otherwise the fd_set's are normal sized. + */ + ni = sizeof(fd_set); + if (nfd > FD_SETSIZE) + ni = howmany(nfd, NFDBITS) * sizeof(fd_mask); + + for (msk = 0; msk < 3; msk++) { + fd_set *pibits = (fd_set *)&cibits[msk*ni]; + fd_set *pobits = (fd_set *)&cobits[msk*ni]; + + for (i = 0; i < nfd; i += NFDBITS) { + bits = pibits->fds_bits[i/NFDBITS]; + while ((j = ffs(bits)) && (fd = i + --j) < nfd) { + bits &= ~(1 << j); + fp = fdp->fd_ofiles[fd]; + if (fp == NULL) + return (EBADF); + if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) { + FD_SET(fd, pobits); + n++; + } + } + } + } + *retval = n; + return (0); +} + +/*ARGSUSED*/ +int +seltrue(dev, flag, p) + dev_t dev; + int flag; + struct proc *p; +{ + + return (1); +} + +/* + * Record a select request. + */ +void +selrecord(selector, sip) + struct proc *selector; + struct selinfo *sip; +{ + struct proc *p; + pid_t mypid; + + mypid = selector->p_pid; + if (sip->si_selpid == mypid) + return; + if (sip->si_selpid && (p = pfind(sip->si_selpid)) && + p->p_wchan == (caddr_t)&selwait) + sip->si_flags |= SI_COLL; + else + sip->si_selpid = mypid; +} + +/* + * Do a wakeup when a selectable event occurs. + */ +void +selwakeup(sip) + register struct selinfo *sip; +{ + register struct proc *p; + int s; + + if (sip->si_selpid == 0) + return; + if (sip->si_flags & SI_COLL) { + nselcoll++; + sip->si_flags &= ~SI_COLL; + wakeup((caddr_t)&selwait); + } + p = pfind(sip->si_selpid); + sip->si_selpid = 0; + if (p != NULL) { + s = splhigh(); + if (p->p_wchan == (caddr_t)&selwait) { + if (p->p_stat == SSLEEP) + setrunnable(p); + else + unsleep(p); + } else if (p->p_flag & P_SELECT) + p->p_flag &= ~P_SELECT; + splx(s); + } +} + +void +pollscan(p, pl, nfd, retval) + struct proc *p; + struct pollfd *pl; + int nfd; + register_t *retval; +{ + register struct filedesc *fdp = p->p_fd; + register int msk, i; + struct file *fp; + int x, n = 0; + static int flag[3] = { FREAD, FWRITE, 0 }; + static int pflag[3] = { POLLIN|POLLRDNORM, POLLOUT, POLLERR }; + + /* + * XXX: We need to implement the rest of the flags. + */ + for (i = 0; i < nfd; i++) { + /* Check the file descriptor. */ + if (pl[i].fd < 0) + continue; + if (pl[i].fd >= fdp->fd_nfiles) { + pl[i].revents = POLLNVAL; + n++; + continue; + } + + fp = fdp->fd_ofiles[pl[i].fd]; + if (fp == NULL) { + pl[i].revents = POLLNVAL; + n++; + continue; + } + for (x = msk = 0; msk < 3; msk++) { + if (pl[i].events & pflag[msk]) { + if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) { + pl[i].revents |= pflag[msk] & + pl[i].events; + x++; + } + } + } + if (x) + n++; + } + *retval = n; +} + +/* + * We are using the same mechanism as select only we encode/decode args + * differently. + */ +int +sys_poll(p, v, retval) + register struct proc *p; + void *v; + register_t *retval; +{ + struct sys_poll_args *uap = v; + size_t sz; + struct pollfd pfds[4], *pl = pfds; + int msec = SCARG(uap, timeout); + struct timeval atv; + int timo, ncoll, i, s, error, error2; + extern int nselcoll, selwait; + + /* Standards say no more than MAX_OPEN; this is possibly better. */ + if (SCARG(uap, nfds) > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, + maxfiles)) + return (EINVAL); + + sz = sizeof(struct pollfd) * SCARG(uap, nfds); + + /* optimize for the default case, of a small nfds value */ + if (sz > sizeof(pfds)) + pl = (struct pollfd *) malloc(sz, M_TEMP, M_WAITOK); + + if ((error = copyin(SCARG(uap, fds), pl, sz)) != 0) + goto bad; + + for (i = 0; i < SCARG(uap, nfds); i++) + pl[i].revents = 0; + + if (msec != -1) { + atv.tv_sec = msec / 1000; + atv.tv_usec = (msec - (atv.tv_sec * 1000)) * 1000; + + if (itimerfix(&atv)) { + error = EINVAL; + goto done; + } + s = splclock(); + timeradd(&atv, &time, &atv); + timo = hzto(&atv); + /* + * Avoid inadvertently sleeping forever. + */ + if (timo == 0) + timo = 1; + splx(s); + } else + timo = 0; + +retry: + ncoll = nselcoll; + p->p_flag |= P_SELECT; + pollscan(p, pl, SCARG(uap, nfds), retval); + if (*retval) + goto done; + s = splhigh(); + if (timo && timercmp(&time, &atv, >=)) { + splx(s); + goto done; + } + if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { + splx(s); + goto retry; + } + p->p_flag &= ~P_SELECT; + error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo); + splx(s); + if (error == 0) + goto retry; + +done: + p->p_flag &= ~P_SELECT; + /* poll is not restarted after signals... */ + if (error == ERESTART) + error = EINTR; + if (error == EWOULDBLOCK) + error = 0; + if ((error2 = copyout(pl, SCARG(uap, fds), sz)) != 0) + error = error2; +bad: + if (pl != pfds) + free((char *) pl, M_TEMP); + return (error); +} +#endif diff --git a/ecos/packages/net/tcpip/current/src/sys/kern/sys_socket.c b/ecos/packages/net/tcpip/current/src/sys/kern/sys_socket.c new file mode 100644 index 0000000..6c36169 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/kern/sys_socket.c @@ -0,0 +1,279 @@ +//========================================================================== +// +// sys/kern/sys_socket.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: sys_socket.c,v 1.3 1997/08/31 20:42:23 deraadt Exp $ */ +/* $NetBSD: sys_socket.c,v 1.13 1995/08/12 23:59:09 mycroft Exp $ */ + +/* + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)sys_socket.c 8.1 (Berkeley) 6/10/93 + */ + +#include <sys/param.h> +#ifdef __ECOS +#include <cyg/io/file.h> +#else +#include <sys/systm.h> +#include <sys/file.h> +#include <sys/proc.h> +#endif +#include <sys/mbuf.h> +#include <sys/protosw.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/ioctl.h> +#ifndef __ECOS +#include <sys/stat.h> +#endif + +#include <net/if.h> +#include <net/route.h> + +struct fileops socketops = + { soo_read, soo_write, soo_ioctl, soo_select, soo_close }; + +#ifdef __ECOS +/* ARGSUSED */ +int +soo_read(struct file *fp, struct uio *uio) +#else +/* ARGSUSED */ +int +soo_read(fp, uio, cred) + struct file *fp; + struct uio *uio; + struct ucred *cred; +#endif +{ + + return (soreceive((struct socket *)fp->f_data, (struct mbuf **)0, + uio, (struct mbuf **)0, (struct mbuf **)0, (int *)0)); +} + +#ifdef __ECOS +int +soo_write(struct file *fp, struct uio *uio) +#else +/* ARGSUSED */ +int +soo_write(fp, uio, cred) + struct file *fp; + struct uio *uio; + struct ucred *cred; +#endif +{ + + return (sosend((struct socket *)fp->f_data, (struct mbuf *)0, + uio, (struct mbuf *)0, (struct mbuf *)0, 0)); +} + +#ifdef __ECOS +int +soo_ioctl(struct file *fp, CYG_ADDRWORD cmd, CYG_ADDRWORD data) +#else +int +soo_ioctl(fp, cmd, data, p) + struct file *fp; + u_long cmd; + register caddr_t data; + struct proc *p; +#endif +{ + register struct socket *so = (struct socket *)fp->f_data; +#ifdef __ECOS + void *p = 0; +#endif + + switch (cmd) { + + case FIONBIO: + if (*(int *)data) + so->so_state |= SS_NBIO; + else + so->so_state &= ~SS_NBIO; + return (0); + + case FIOASYNC: + if (*(int *)data) { + so->so_state |= SS_ASYNC; + so->so_rcv.sb_flags |= SB_ASYNC; + so->so_snd.sb_flags |= SB_ASYNC; + } else { + so->so_state &= ~SS_ASYNC; + so->so_rcv.sb_flags &= ~SB_ASYNC; + so->so_snd.sb_flags &= ~SB_ASYNC; + } + return (0); + + case FIONREAD: + *(int *)data = so->so_rcv.sb_cc; + return (0); + +#ifndef __ECOS + case SIOCSPGRP: + so->so_pgid = *(int *)data; + so->so_siguid = p->p_cred->p_ruid; + so->so_sigeuid = p->p_ucred->cr_uid; + return (0); + + case SIOCGPGRP: + *(int *)data = so->so_pgid; + return (0); +#endif + + case SIOCATMARK: + *(int *)data = (so->so_state&SS_RCVATMARK) != 0; + return (0); + } + /* + * Interface/routing/protocol specific ioctls: + * interface and routing ioctls should have a + * different entry since a socket's unnecessary + */ + if (IOCGROUP(cmd) == 'i') + return (ifioctl(so, (u_long)cmd, (caddr_t)data, p)); + if (IOCGROUP(cmd) == 'r') + return (rtioctl((u_long)cmd, (caddr_t)data, p)); + return ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL, + (struct mbuf *)cmd, (struct mbuf *)data, (struct mbuf *)0)); +} + +#ifdef __ECOS +int +soo_select(struct file *fp, int which) +#else +int +soo_select(fp, which, p) + struct file *fp; + int which; + struct proc *p; +#endif +{ + register struct socket *so = (struct socket *)fp->f_data; + register int s = splsoftnet(); +#ifdef __ECOS + void *p = 0; +#endif + + switch (which) { + + case FREAD: + if (soreadable(so)) { + splx(s); + return (1); + } + selrecord(p, &so->so_rcv.sb_sel); + so->so_rcv.sb_flags |= SB_SEL; + break; + + case FWRITE: + if (sowriteable(so)) { + splx(s); + return (1); + } + selrecord(p, &so->so_snd.sb_sel); + so->so_snd.sb_flags |= SB_SEL; + break; + + case 0: + if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) { + splx(s); + return (1); + } + selrecord(p, &so->so_rcv.sb_sel); + so->so_rcv.sb_flags |= SB_SEL; + break; + } + splx(s); + return (0); +} + +#ifndef __ECOS +int +soo_stat(so, ub) + register struct socket *so; + register struct stat *ub; +{ + + bzero((caddr_t)ub, sizeof (*ub)); + ub->st_mode = S_IFSOCK; + return ((*so->so_proto->pr_usrreq)(so, PRU_SENSE, + (struct mbuf *)ub, (struct mbuf *)0, + (struct mbuf *)0)); +} +#endif + +#ifdef __ECOS +int +soo_close(struct file *fp) +#else +/* ARGSUSED */ +int +soo_close(fp, p) + struct file *fp; + struct proc *p; +#endif +{ + int error = 0; + + if (fp->f_data) + error = soclose((struct socket *)fp->f_data); + fp->f_data = 0; + return (error); +} diff --git a/ecos/packages/net/tcpip/current/src/sys/kern/uipc_domain.c b/ecos/packages/net/tcpip/current/src/sys/kern/uipc_domain.c new file mode 100644 index 0000000..be7c073 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/kern/uipc_domain.c @@ -0,0 +1,308 @@ +//========================================================================== +// +// sys/kern/uipc_domain.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: uipc_domain.c,v 1.9 1999/12/08 06:50:17 itojun Exp $ */ +/* $NetBSD: uipc_domain.c,v 1.14 1996/02/09 19:00:44 christos Exp $ */ + +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)uipc_domain.c 8.2 (Berkeley) 10/18/93 + */ + +#include <sys/param.h> +#include <sys/socket.h> +#include <sys/protosw.h> +#include <sys/domain.h> +#include <sys/mbuf.h> +#include <sys/time.h> +#include <sys/kernel.h> +#ifndef __ECOS +#include <sys/systm.h> +#include <sys/proc.h> + +#include <vm/vm.h> +#include <sys/sysctl.h> +#endif + +void pffasttimo __P((void *)); +void pfslowtimo __P((void *)); +#if defined (KEY) || defined (IPSEC) +int pfkey_init __P((void)); +#endif /* KEY || IPSEC */ + +#define ADDDOMAIN(x) { \ + extern struct domain __CONCAT(x,domain); \ + __CONCAT(x,domain.dom_next) = domains; \ + domains = &__CONCAT(x,domain); \ +} + +void +domaininit() +{ + register struct domain *dp; + register struct protosw *pr; + +#undef unix + /* + * KAME NOTE: ADDDOMAIN(route) is moved to the last part so that + * it will be initialized as the *first* element. confusing! + */ +#ifndef lint +#ifndef __ECOS + ADDDOMAIN(unix); +#endif +#ifdef INET + ADDDOMAIN(inet); +#endif +#ifdef INET6 + ADDDOMAIN(inet6); +#endif /* INET6 */ +#if defined (KEY) || defined (IPSEC) + pfkey_init(); +#endif /* KEY || IPSEC */ +#ifdef IPX + ADDDOMAIN(ipx); +#endif +#ifdef NETATALK + ADDDOMAIN(atalk); +#endif +#ifdef NS + ADDDOMAIN(ns); +#endif +#ifdef ISO + ADDDOMAIN(iso); +#endif +#ifdef CCITT + ADDDOMAIN(ccitt); +#endif +#ifdef notdef /* XXXX */ +#include "imp.h" +#if NIMP > 0 + ADDDOMAIN(imp); +#endif +#endif +#ifdef IPSEC +#ifdef __KAME__ + ADDDOMAIN(key); +#endif +#endif + ADDDOMAIN(route); +#endif + + for (dp = domains; dp; dp = dp->dom_next) { + if (dp->dom_init) + (*dp->dom_init)(); + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_init) + (*pr->pr_init)(); + } + +if (max_linkhdr < 16) /* XXX */ +max_linkhdr = 16; + max_hdr = max_linkhdr + max_protohdr; + max_datalen = MHLEN - max_hdr; + timeout(pffasttimo, NULL, 1); + timeout(pfslowtimo, NULL, 1); +} + +struct protosw * +pffindtype(family, type) + int family, type; +{ + register struct domain *dp; + register struct protosw *pr; + + for (dp = domains; dp; dp = dp->dom_next) + if (dp->dom_family == family) + goto found; + return (0); +found: + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_type && pr->pr_type == type) + return (pr); + return (0); +} + +struct protosw * +pffindproto(family, protocol, type) + int family, protocol, type; +{ + register struct domain *dp; + register struct protosw *pr; + struct protosw *maybe = 0; + + if (family == 0) + return (0); + for (dp = domains; dp; dp = dp->dom_next) + if (dp->dom_family == family) + goto found; + return (0); +found: + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) { + if ((pr->pr_protocol == protocol) && (pr->pr_type == type)) + return (pr); + + if (type == SOCK_RAW && pr->pr_type == SOCK_RAW && + pr->pr_protocol == 0 && maybe == (struct protosw *)0) + maybe = pr; + } + return (maybe); +} + +#ifndef __ECOS +int +net_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) + int *name; + u_int namelen; + void *oldp; + size_t *oldlenp; + void *newp; + size_t newlen; + struct proc *p; +{ + register struct domain *dp; + register struct protosw *pr; + int family, protocol; + + /* + * All sysctl names at this level are nonterminal. + * PF_KEY: next component is protocol family, and then at least one + * additional component. + * usually: next two components are protocol family and protocol + * number, then at least one addition component. + */ + if (namelen < 2) + return (EISDIR); /* overloaded */ + family = name[0]; + + if (family == 0) + return (0); + for (dp = domains; dp; dp = dp->dom_next) + if (dp->dom_family == family) + goto found; + return (ENOPROTOOPT); +found: + switch (family) { +#ifdef IPSEC +#ifdef __KAME__ + case PF_KEY: + pr = dp->dom_protosw; + if (pr->pr_sysctl) + return ((*pr->pr_sysctl)(name + 1, namelen - 1, + oldp, oldlenp, newp, newlen)); + return (ENOPROTOOPT); +#endif +#endif + default: + break; + } + if (namelen < 3) + return (EISDIR); /* overloaded */ + protocol = name[1]; + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_protocol == protocol && pr->pr_sysctl) + return ((*pr->pr_sysctl)(name + 2, namelen - 2, + oldp, oldlenp, newp, newlen)); + return (ENOPROTOOPT); +} +#endif + +void +pfctlinput(cmd, sa) + int cmd; + struct sockaddr *sa; +{ + register struct domain *dp; + register struct protosw *pr; + + for (dp = domains; dp; dp = dp->dom_next) + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_ctlinput) + (*pr->pr_ctlinput)(cmd, sa, NULL); +} + +void +pfslowtimo(arg) + void *arg; +{ + register struct domain *dp; + register struct protosw *pr; + + for (dp = domains; dp; dp = dp->dom_next) + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_slowtimo) { + (*pr->pr_slowtimo)(); + } + timeout(pfslowtimo, NULL, hz/2); +} + +void +pffasttimo(arg) + void *arg; +{ + register struct domain *dp; + register struct protosw *pr; + + for (dp = domains; dp; dp = dp->dom_next) + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_fasttimo) { + (*pr->pr_fasttimo)(); + } + timeout(pffasttimo, NULL, hz/5); +} diff --git a/ecos/packages/net/tcpip/current/src/sys/kern/uipc_mbuf.c b/ecos/packages/net/tcpip/current/src/sys/kern/uipc_mbuf.c new file mode 100644 index 0000000..ddbc5bc --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/kern/uipc_mbuf.c @@ -0,0 +1,1093 @@ +//========================================================================== +// +// sys/kern/uipc_mbuf.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: uipc_mbuf.c,v 1.18 1999/12/05 07:30:31 angelos Exp $ */ +/* $NetBSD: uipc_mbuf.c,v 1.15.4.1 1996/06/13 17:11:44 cgd Exp $ */ + +/* + * Copyright (c) 1982, 1986, 1988, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 + */ + +/* +%%% portions-copyright-nrl-95 +Portions of this software are Copyright 1995-1998 by Randall Atkinson, +Ronald Lee, Daniel McDonald, Bao Phan, and Chris Winters. All Rights +Reserved. All rights under this copyright have been assigned to the US +Naval Research Laboratory (NRL). The NRL Copyright Notice and License +Agreement Version 1.1 (January 17, 1995) applies to these portions of the +software. +You should have received a copy of the license with this software. If you +didn't get a copy, you may request one from <license@ipv6.nrl.navy.mil>. +*/ + +#include <sys/param.h> +#ifndef __ECOS +#include <sys/systm.h> +#include <sys/proc.h> +#endif +#include <sys/malloc.h> +#ifndef __ECOS +#include <sys/map.h> +#endif +#define MBTYPES +#include <sys/mbuf.h> +#include <sys/kernel.h> +#ifndef __ECOS +#include <sys/syslog.h> +#endif +#include <sys/domain.h> +#include <sys/protosw.h> + +#include <machine/cpu.h> + +#ifndef __ECOS +#include <vm/vm.h> +#endif + +#if defined(UVM) +#include <uvm/uvm_extern.h> +#endif + +#ifndef __ECOS +extern vm_map_t mb_map; +#endif +struct mbuf *mbutl; +char *mclrefcnt; +int needqueuedrain; + +#ifdef __ECOS +extern void setsoftnet(void); +extern void *cyg_net_cluster_alloc(void); +#endif + +/* Declarations of variables move from mbuf.h to keep g++ happy */ +#ifdef __ECOS +struct mbstat mbstat; +union mcluster *mclfree; +int max_linkhdr; /* largest link-level header */ +int max_protohdr; /* largest protocol header */ +int max_hdr; /* largest link+protocol header */ +int max_datalen; /* MHLEN - max_hdr */ +#ifdef MBTYPES +int mbtypes[] = { /* XXX */ + M_FREE, /* MT_FREE 0 should be on free list */ + M_MBUF, /* MT_DATA 1 dynamic (data) allocation */ + M_MBUF, /* MT_HEADER 2 packet header */ + M_SOCKET, /* MT_SOCKET 3 socket structure */ + M_PCB, /* MT_PCB 4 protocol control block */ + M_RTABLE, /* MT_RTABLE 5 routing tables */ + M_HTABLE, /* MT_HTABLE 6 IMP host tables */ + 0, /* MT_ATABLE 7 address resolution tables */ + M_MBUF, /* MT_SONAME 8 socket name */ + 0, /* 9 */ + M_SOOPTS, /* MT_SOOPTS 10 socket options */ + M_FTABLE, /* MT_FTABLE 11 fragment reassembly header */ + M_MBUF, /* MT_RIGHTS 12 access rights */ + M_IFADDR, /* MT_IFADDR 13 interface address */ + M_MBUF, /* MT_CONTROL 14 extra-data protocol message */ + M_MBUF, /* MT_OOBDATA 15 expedited data */ +#ifdef DATAKIT + 25, 26, 27, 28, 29, 30, 31, 32 /* datakit ugliness */ +#endif +}; +#endif +#endif // __ECOS + +void +mbinit() +{ + int s; + + s = splimp(); +#ifdef __ECOS + if (m_clalloc(1, M_DONTWAIT) == 0) +#else + if (m_clalloc(max(4096 / CLBYTES, 1), M_DONTWAIT) == 0) +#endif + goto bad; + splx(s); + return; +bad: + panic("mbinit"); +} + +/* + * Allocate some number of mbuf clusters + * and place on cluster free list. + * Must be called at splimp. + */ +/* ARGSUSED */ +int +m_clalloc(ncl, nowait) + register int ncl; + int nowait; +{ +#ifdef __ECOS + caddr_t p; + int i; + + if (ncl != 1) { + panic("Allocate multiple clusters!"); + } + p = (caddr_t)cyg_net_cluster_alloc(); + if (p == NULL) { + m_reclaim(); + return (mclfree != NULL); + } + for (i = 0; i < ncl; i++) { + ((union mcluster *)p)->mcl_next = mclfree; + mclfree = (union mcluster *)p; + p += MCLBYTES; + mbstat.m_clfree++; + } + mbstat.m_clusters += ncl; + return (1); +#else // __ECOS + volatile static struct timeval lastlogged; + struct timeval curtime, logdiff; + register caddr_t p; + register int i; + int npg, s; + + npg = ncl * CLSIZE; +#if defined(UVM) + p = (caddr_t)uvm_km_kmemalloc(mb_map, uvmexp.mb_object, ctob(npg), + nowait ? 0 : UVM_KMF_NOWAIT); +#else + p = (caddr_t)kmem_malloc(mb_map, ctob(npg), !nowait); +#endif + if (p == NULL) { + s = splclock(); + curtime = time; + splx(s); + timersub(&curtime, &lastlogged, &logdiff); + if (logdiff.tv_sec >= 60) { + lastlogged = curtime; + log(LOG_ERR, "mb_map full\n"); + } + m_reclaim(); + return (mclfree != NULL); + } + ncl = ncl * CLBYTES / MCLBYTES; + for (i = 0; i < ncl; i++) { + ((union mcluster *)p)->mcl_next = mclfree; + mclfree = (union mcluster *)p; + p += MCLBYTES; + mbstat.m_clfree++; + } + mbstat.m_clusters += ncl; + return (1); +#endif // __ECOS +} + +/* + * When MGET failes, ask protocols to free space when short of memory, + * then re-attempt to allocate an mbuf. + */ +struct mbuf * +m_retry(i, t) + int i, t; +{ + register struct mbuf *m; + + if (i & M_DONTWAIT) { + needqueuedrain = 1; + setsoftnet(); + return (NULL); + } + m_reclaim(); +#define m_retry(i, t) NULL + MGET(m, i, t); +#undef m_retry + return (m); +} + +/* + * As above; retry an MGETHDR. + */ +struct mbuf * +m_retryhdr(i, t) + int i, t; +{ + register struct mbuf *m; + + if (i & M_DONTWAIT) { + needqueuedrain = 1; + setsoftnet(); + return (NULL); + } + m_reclaim(); +#define m_retryhdr(i, t) NULL + MGETHDR(m, i, t); +#undef m_retryhdr + return (m); +} + +void +m_reclaim() +{ + register struct domain *dp; + register struct protosw *pr; + int s = splimp(); + + needqueuedrain = 0; + for (dp = domains; dp; dp = dp->dom_next) + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_drain) + (*pr->pr_drain)(); + splx(s); + mbstat.m_drain++; +} + +/* + * Space allocation routines. + * These are also available as macros + * for critical paths. + */ +struct mbuf * +m_get(nowait, type) + int nowait, type; +{ + register struct mbuf *m; + + MGET(m, nowait, type); + return (m); +} + +struct mbuf * +m_gethdr(nowait, type) + int nowait, type; +{ + register struct mbuf *m; + + MGETHDR(m, nowait, type); + return (m); +} + +struct mbuf * +m_getclr(nowait, type) + int nowait, type; +{ + register struct mbuf *m; + + MGET(m, nowait, type); + if (m == NULL) + return (NULL); + bzero(mtod(m, caddr_t), MLEN); + return (m); +} + +struct mbuf * +m_free(m) + struct mbuf *m; +{ + register struct mbuf *n; + + MFREE(m, n); + return (n); +} + +void +m_freem(m) + register struct mbuf *m; +{ + register struct mbuf *n; + + if (m == NULL) + return; + do { + MFREE(m, n); + } while ((m = n) != NULL); +} + +/* + * Mbuffer utility routines. + */ + +/* + * Lesser-used path for M_PREPEND: + * allocate new mbuf to prepend to chain, + * copy junk along. + */ +struct mbuf * +m_prepend(m, len, how) + register struct mbuf *m; + int len, how; +{ + struct mbuf *mn; + + MGET(mn, how, m->m_type); + if (mn == NULL) { + m_freem(m); + return (NULL); + } + if (m->m_flags & M_PKTHDR) { + M_COPY_PKTHDR(mn, m); + m->m_flags &= ~M_PKTHDR; + } + mn->m_next = m; + m = mn; + if (len < MHLEN) + MH_ALIGN(m, len); + m->m_len = len; + return (m); +} + +/* + * Make a copy of an mbuf chain starting "off0" bytes from the beginning, + * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. + * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller. + */ +int MCFail; + +struct mbuf * +m_copym(m, off0, len, wait) + register struct mbuf *m; + int off0, wait; + register int len; +{ + register struct mbuf *n, **np; + register int off = off0; + struct mbuf *top; + int copyhdr = 0; + + if (off < 0) + panic("m_copym: off %d < 0", off); + if (len < 0) + panic("m_copym: len %d < 0", len); + if (off == 0 && m->m_flags & M_PKTHDR) + copyhdr = 1; + while (off > 0) { + if (m == NULL) + panic("m_copym: null mbuf"); + if (off < m->m_len) + break; + off -= m->m_len; + m = m->m_next; + } + np = ⊤ + top = NULL; + while (len > 0) { + if (m == NULL) { + if (len != M_COPYALL) + panic("m_copym: %d not M_COPYALL", len); + break; + } + MGET(n, wait, m->m_type); + *np = n; + if (n == NULL) + goto nospace; + if (copyhdr) { + M_COPY_PKTHDR(n, m); + if (len == M_COPYALL) + n->m_pkthdr.len -= off0; + else + n->m_pkthdr.len = len; + copyhdr = 0; + } + n->m_len = min(len, m->m_len - off); + if (m->m_flags & M_EXT) { + n->m_data = m->m_data + off; + if (!m->m_ext.ext_ref) + mclrefcnt[mtocl(m->m_ext.ext_buf)]++; + else + (*(m->m_ext.ext_ref))(m); + n->m_ext = m->m_ext; + n->m_flags |= M_EXT; + } else + bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), + (unsigned)n->m_len); + if (len != M_COPYALL) + len -= n->m_len; + off = 0; + m = m->m_next; + np = &n->m_next; + } + if (top == NULL) + MCFail++; + return (top); +nospace: + m_freem(top); + MCFail++; + return (NULL); +} + +/* + * m_copym2() is like m_copym(), except it COPIES cluster mbufs, instead + * of merely bumping the reference count. + */ +struct mbuf * +m_copym2(m, off0, len, wait) + register struct mbuf *m; + int off0, wait; + register int len; +{ + register struct mbuf *n, **np; + register int off = off0; + struct mbuf *top; + int copyhdr = 0; + + if (len < 0) + panic("m_copym2: len %d < 0", len); + if (off < 0) + panic("m_copym2: off %d < 0", off); + if (off == 0 && m->m_flags & M_PKTHDR) + copyhdr = 1; + while (off > 0) { + if (m == NULL) + panic("m_copym2: null mbuf"); + if (off < m->m_len) + break; + off -= m->m_len; + m = m->m_next; + } + np = ⊤ + top = NULL; + while (len > 0) { + if (m == NULL) { + if (len != M_COPYALL) + panic("m_copym2: %d != M_COPYALL", len); + break; + } + MGET(n, wait, m->m_type); + *np = n; + if (n == NULL) + goto nospace; + if (copyhdr) { + M_COPY_PKTHDR(n, m); + if (len == M_COPYALL) + n->m_pkthdr.len -= off0; + else + n->m_pkthdr.len = len; + copyhdr = 0; + } + n->m_len = min(len, m->m_len - off); + if ((m->m_flags & M_EXT) && (n->m_len > MHLEN)) { + /* This is a cheesy hack. */ + MCLGET(n, wait); + if (n->m_flags & M_EXT) + bcopy(mtod(m, caddr_t) + off, mtod(n, caddr_t), + (unsigned)n->m_len); + else + goto nospace; + } else + bcopy(mtod(m, caddr_t) + off, mtod(n, caddr_t), + (unsigned)n->m_len); + if (len != M_COPYALL) + len -= n->m_len; + off = 0; + m = m->m_next; + np = &n->m_next; + } + if (top == NULL) + MCFail++; + return (top); +nospace: + m_freem(top); + MCFail++; + return (NULL); +} + +/* + * Copy data from an mbuf chain starting "off" bytes from the beginning, + * continuing for "len" bytes, into the indicated buffer. + */ +void +m_copydata(m, off, len, cp) + register struct mbuf *m; + register int off; + register int len; + caddr_t cp; +{ + register unsigned count; + + if (off < 0) + panic("m_copydata: off %d < 0", off); + if (len < 0) + panic("m_copydata: len %d < 0", len); + while (off > 0) { + if (m == NULL) + panic("m_copydata: null mbuf in skip"); + if (off < m->m_len) + break; + off -= m->m_len; + m = m->m_next; + } + while (len > 0) { + if (m == NULL) + panic("m_copydata: null mbuf"); + count = min(m->m_len - off, len); + bcopy(mtod(m, caddr_t) + off, cp, count); + len -= count; + cp += count; + off = 0; + m = m->m_next; + } +} + +/* + * Concatenate mbuf chain n to m. + * Both chains must be of the same type (e.g. MT_DATA). + * Any m_pkthdr is not updated. + */ +void +m_cat(m, n) + register struct mbuf *m, *n; +{ + while (m->m_next) + m = m->m_next; + while (n) { + if (m->m_flags & M_EXT || + m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { + /* just join the two chains */ + m->m_next = n; + return; + } + /* splat the data from one into the other */ + bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, + (u_int)n->m_len); + m->m_len += n->m_len; + n = m_free(n); + } +} + +void +m_adj(mp, req_len) + struct mbuf *mp; + int req_len; +{ + register int len = req_len; + register struct mbuf *m; + register int count; + + if ((m = mp) == NULL) + return; + if (len >= 0) { + /* + * Trim from head. + */ + while (m != NULL && len > 0) { + if (m->m_len <= len) { + len -= m->m_len; + m->m_len = 0; + m = m->m_next; + } else { + m->m_len -= len; + m->m_data += len; + len = 0; + } + } + m = mp; + if (mp->m_flags & M_PKTHDR) + m->m_pkthdr.len -= (req_len - len); + } else { + /* + * Trim from tail. Scan the mbuf chain, + * calculating its length and finding the last mbuf. + * If the adjustment only affects this mbuf, then just + * adjust and return. Otherwise, rescan and truncate + * after the remaining size. + */ + len = -len; + count = 0; + for (;;) { + count += m->m_len; + if (m->m_next == NULL) + break; + m = m->m_next; + } + if (m->m_len >= len) { + m->m_len -= len; + if (mp->m_flags & M_PKTHDR) + mp->m_pkthdr.len -= len; + return; + } + count -= len; + if (count < 0) + count = 0; + /* + * Correct length for chain is "count". + * Find the mbuf with last data, adjust its length, + * and toss data from remaining mbufs on chain. + */ + m = mp; + if (m->m_flags & M_PKTHDR) + m->m_pkthdr.len = count; + for (; m; m = m->m_next) { + if (m->m_len >= count) { + m->m_len = count; + break; + } + count -= m->m_len; + } + while ((m = m->m_next) != NULL) + m->m_len = 0; + } +} + +/* + * Rearange an mbuf chain so that len bytes are contiguous + * and in the data area of an mbuf (so that mtod and dtom + * will work for a structure of size len). Returns the resulting + * mbuf chain on success, frees it and returns null on failure. + * If there is room, it will add up to max_protohdr-len extra bytes to the + * contiguous region in an attempt to avoid being called next time. + */ +int MPFail; + +struct mbuf * +m_pullup(n, len) + register struct mbuf *n; + int len; +{ + register struct mbuf *m; + register int count; + int space; + + /* + * If first mbuf has no cluster, and has room for len bytes + * without shifting current data, pullup into it, + * otherwise allocate a new mbuf to prepend to the chain. + */ + if ((n->m_flags & M_EXT) == 0 && + n->m_data + len < &n->m_dat[MLEN] && n->m_next) { + if (n->m_len >= len) + return (n); + m = n; + n = n->m_next; + len -= m->m_len; + } else { + if (len > MHLEN) + goto bad; + MGET(m, M_DONTWAIT, n->m_type); + if (m == NULL) + goto bad; + m->m_len = 0; + if (n->m_flags & M_PKTHDR) { + M_COPY_PKTHDR(m, n); + n->m_flags &= ~M_PKTHDR; + } + } + space = &m->m_dat[MLEN] - (m->m_data + m->m_len); + do { + count = min(min(max(len, max_protohdr), space), n->m_len); + bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, + (unsigned)count); + len -= count; + m->m_len += count; + n->m_len -= count; + space -= count; + if (n->m_len) + n->m_data += count; + else + n = m_free(n); + } while (len > 0 && n); + if (len > 0) { + (void)m_free(m); + goto bad; + } + m->m_next = n; + return (m); +bad: + m_freem(n); + MPFail++; + return (NULL); +} + +/* + * m_pullup2() works like m_pullup, save that len can be <= MCLBYTES. + * m_pullup2() only works on values of len such that MHLEN < len <= MCLBYTES, + * it calls m_pullup() for values <= MHLEN. It also only coagulates the + * reqested number of bytes. (For those of us who expect unwieldly option + * headers. + * + * KEBE SAYS: Remember that dtom() calls with data in clusters does not work! + */ +struct mbuf * +m_pullup2(n, len) + register struct mbuf *n; + int len; +{ + register struct mbuf *m; + register int count; + int space; + if (len <= MHLEN) + return m_pullup(n, len); + + if ((n->m_flags & M_EXT) != 0 && + n->m_data + len < &n->m_data[MCLBYTES] && n->m_next) { + if (n->m_len >= len) + return (n); + m = n; + n = n->m_next; + len -= m->m_len; + } else { + if (len > MCLBYTES) + goto bad; + MGET(m, M_DONTWAIT, n->m_type); + if (m == NULL) + goto bad; + MCLGET(m, M_DONTWAIT); + if ((m->m_flags & M_EXT) == 0) + goto bad; + m->m_len = 0; + if (n->m_flags & M_PKTHDR) { + /* M_COPY_PKTHDR(m, n);*//* Too many adverse side effects. */ + m->m_pkthdr = n->m_pkthdr; + m->m_flags = (n->m_flags & M_COPYFLAGS) | M_EXT; + n->m_flags &= ~M_PKTHDR; + /* n->m_data is cool. */ + } + } + + do { + count = min(len, n->m_len); + bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, + (unsigned)count); + len -= count; + m->m_len += count; + n->m_len -= count; + space -= count; + if (n->m_len) + n->m_data += count; + else + n = m_free(n); + } while (len > 0 && n); + if (len > 0) { + (void)m_free(m); + goto bad; + } + m->m_next = n; + + return (m); +bad: + m_freem(n); + MPFail++; + return (NULL); +} + +/* + * Inject a new mbuf chain of length siz in mbuf chain m0 at + * position len0. Returns a pointer to the first injected mbuf, or + * NULL on failure (m0 is left undisturbed). Note that if there is + * enough space for an object of size siz in the appropriate position, + * no memory will be allocated. Also, there will be no data movement in + * the first len0 bytes (pointers to that will remain valid). + * + * XXX It is assumed that siz is less than the size of an mbuf at the moment. + */ +struct mbuf * +m_inject(m0, len0, siz, wait) + register struct mbuf *m0; + int len0, siz, wait; +{ + register struct mbuf *m, *n, *n2 = NULL, *n3; + unsigned len = len0, remain; + + if ((siz >= MHLEN) || (len0 <= 0)) + return (NULL); + for (m = m0; m && len > m->m_len; m = m->m_next) + len -= m->m_len; + if (m == NULL) + return (NULL); + remain = m->m_len - len; + if (remain == 0) { + if ((m->m_next) && (M_LEADINGSPACE(m->m_next) >= siz)) { + m->m_next->m_len += siz; + m0->m_pkthdr.len += siz; + m->m_next->m_data -= siz; + return m->m_next; + } + } else { + n2 = m_copym2(m, len, remain, wait); + if (n2 == NULL) + return (NULL); + } + + MGET(n, wait, MT_DATA); + if (n == NULL) { + if (n2) + m_freem(n2); + return (NULL); + } + + n->m_len = siz; + m0->m_pkthdr.len += siz; + m->m_len -= remain; /* Trim */ + if (n2) { + for (n3 = n; n3->m_next != NULL; n3 = n3->m_next) + ; + n3->m_next = n2; + } else + n3 = n; + for (; n3->m_next != NULL; n3 = n3->m_next) + ; + n3->m_next = m->m_next; + m->m_next = n; + return n; +} + +/* + * Partition an mbuf chain in two pieces, returning the tail -- + * all but the first len0 bytes. In case of failure, it returns NULL and + * attempts to restore the chain to its original state. + */ +struct mbuf * +m_split(m0, len0, wait) + register struct mbuf *m0; + int len0, wait; +{ + register struct mbuf *m, *n; + unsigned len = len0, remain, olen; + + for (m = m0; m && len > m->m_len; m = m->m_next) + len -= m->m_len; + if (m == NULL) + return (NULL); + remain = m->m_len - len; + if (m0->m_flags & M_PKTHDR) { + MGETHDR(n, wait, m0->m_type); + if (n == NULL) + return (NULL); + n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; + n->m_pkthdr.len = m0->m_pkthdr.len - len0; + olen = m0->m_pkthdr.len; + m0->m_pkthdr.len = len0; + if (m->m_flags & M_EXT) + goto extpacket; + if (remain > MHLEN) { + /* m can't be the lead packet */ + MH_ALIGN(n, 0); + n->m_next = m_split(m, len, wait); + if (n->m_next == NULL) { + (void) m_free(n); + m0->m_pkthdr.len = olen; + return (NULL); + } else + return (n); + } else + MH_ALIGN(n, remain); + } else if (remain == 0) { + n = m->m_next; + m->m_next = NULL; + return (n); + } else { + MGET(n, wait, m->m_type); + if (n == NULL) + return (NULL); + M_ALIGN(n, remain); + } +extpacket: + if (m->m_flags & M_EXT) { + n->m_flags |= M_EXT; + n->m_ext = m->m_ext; + if(!m->m_ext.ext_ref) + mclrefcnt[mtocl(m->m_ext.ext_buf)]++; + else + (*(m->m_ext.ext_ref))(m); + m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */ + n->m_data = m->m_data + len; + } else { + bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); + } + n->m_len = remain; + m->m_len = len; + n->m_next = m->m_next; + m->m_next = NULL; + return (n); +} +/* + * Routine to copy from device local memory into mbufs. + */ +struct mbuf * +m_devget(buf, totlen, off0, ifp, copy) + char *buf; + int totlen, off0; + struct ifnet *ifp; + void (*copy) __P((const void *, void *, size_t)); +{ + register struct mbuf *m; + struct mbuf *top = NULL, **mp = ⊤ + register int off = off0, len; + register char *cp; + char *epkt; + + cp = buf; + epkt = cp + totlen; + if (off) { + /* + * If 'off' is non-zero, packet is trailer-encapsulated, + * so we have to skip the type and length fields. + */ + cp += off + 2 * sizeof(u_int16_t); + totlen -= 2 * sizeof(u_int16_t); + } + MGETHDR(m, M_DONTWAIT, MT_DATA); + if (m == NULL) + return (NULL); + m->m_pkthdr.rcvif = ifp; + m->m_pkthdr.len = totlen; + m->m_len = MHLEN; + + while (totlen > 0) { + if (top != NULL) { + MGET(m, M_DONTWAIT, MT_DATA); + if (m == NULL) { + m_freem(top); + return (NULL); + } + m->m_len = MLEN; + } + len = min(totlen, epkt - cp); + if (len >= MINCLSIZE) { + MCLGET(m, M_DONTWAIT); + if (m->m_flags & M_EXT) + m->m_len = len = min(len, MCLBYTES); + else + len = m->m_len; + } else { + /* + * Place initial small packet/header at end of mbuf. + */ + if (len < m->m_len) { + if (top == NULL && + len + max_linkhdr <= m->m_len) + m->m_data += max_linkhdr; + m->m_len = len; + } else + len = m->m_len; + } + if (copy) + copy(cp, mtod(m, caddr_t), (size_t)len); + else + bcopy(cp, mtod(m, caddr_t), (size_t)len); + cp += len; + *mp = m; + mp = &m->m_next; + totlen -= len; + if (cp == epkt) + cp = buf; + } + return (top); +} + +void +m_zero(m) + struct mbuf *m; +{ + while (m) { + if (m->m_flags & M_PKTHDR) + bzero((unsigned char *)m + sizeof(struct m_hdr) + + sizeof(struct pkthdr), MHLEN); + else + bzero((unsigned char *)m + sizeof(struct m_hdr), MLEN); + if ((m->m_flags & M_EXT) && + (m->m_ext.ext_free == NULL) && + !mclrefcnt[mtocl((m)->m_ext.ext_buf)]) + bzero(m->m_ext.ext_buf, m->m_ext.ext_size); + m = m->m_next; + } +} + +/* + * Apply function f to the data in an mbuf chain starting "off" bytes from the + * beginning, continuing for "len" bytes. + */ +int +m_apply(m, off, len, f, fstate) + struct mbuf *m; + int off; + int len; + /* fstate, data, len */ + int (*f)(caddr_t, caddr_t, unsigned int); + caddr_t fstate; +{ + int rval; + unsigned int count; + + if (len < 0) + panic("m_apply: len %d < 0", len); + if (off < 0) + panic("m_apply: off %d < 0", off); + while (off > 0) { + if (m == NULL) + panic("m_apply: null mbuf in skip"); + if (off < m->m_len) + break; + off -= m->m_len; + m = m->m_next; + } + while (len > 0) { + if (m == NULL) + panic("m_apply: null mbuf"); + count = min(m->m_len - off, len); + + rval = f(fstate, mtod(m, caddr_t) + off, count); + if (rval) + return (rval); + + len -= count; + off = 0; + m = m->m_next; + } + + return (0); +} + diff --git a/ecos/packages/net/tcpip/current/src/sys/kern/uipc_proto.c b/ecos/packages/net/tcpip/current/src/sys/kern/uipc_proto.c new file mode 100644 index 0000000..7439f49 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/kern/uipc_proto.c @@ -0,0 +1,105 @@ +//========================================================================== +// +// sys/kern/uipc_proto.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: uipc_proto.c,v 1.3 1998/04/26 22:40:42 millert Exp $ */ +/* $NetBSD: uipc_proto.c,v 1.8 1996/02/13 21:10:47 christos Exp $ */ + +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)uipc_proto.c 8.1 (Berkeley) 6/10/93 + */ + +#include <sys/param.h> +#include <sys/socket.h> +#include <sys/protosw.h> +#include <sys/domain.h> +#include <sys/mbuf.h> +#include <sys/un.h> +#include <sys/socketvar.h> + +#include <net/if.h> +#include <net/raw_cb.h> + +/* + * Definitions of protocols supported in the UNIX domain. + */ + +extern struct domain unixdomain; /* or at least forward */ + +struct protosw unixsw[] = { +{ SOCK_STREAM, &unixdomain, PF_LOCAL, PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS, + 0, 0, 0, 0, + uipc_usrreq, + 0, 0, 0, 0, +}, +{ SOCK_DGRAM, &unixdomain, PF_LOCAL, PR_ATOMIC|PR_ADDR|PR_RIGHTS, + 0, 0, 0, 0, + uipc_usrreq, + 0, 0, 0, 0, +}, +{ 0, 0, 0, 0, + raw_input, 0, raw_ctlinput, 0, + raw_usrreq, + raw_init, 0, 0, 0, +} +}; + +struct domain unixdomain = + { AF_LOCAL, "unix", 0, unp_externalize, unp_dispose, + unixsw, &unixsw[sizeof(unixsw)/sizeof(unixsw[0])] }; diff --git a/ecos/packages/net/tcpip/current/src/sys/kern/uipc_socket.c b/ecos/packages/net/tcpip/current/src/sys/kern/uipc_socket.c new file mode 100644 index 0000000..c840164 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/kern/uipc_socket.c @@ -0,0 +1,1144 @@ +//========================================================================== +// +// sys/kern/uipc_socket.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: uipc_socket.c,v 1.27 1999/10/14 08:18:49 cmetz Exp $ */ +/* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ + +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 + */ + +#include <sys/param.h> +#ifdef __ECOS +#include <cyg/io/file.h> +#else +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/file.h> +#endif +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/domain.h> +#include <sys/kernel.h> +#include <sys/protosw.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#ifndef __ECOS +#include <sys/signalvar.h> +#include <sys/resourcevar.h> +#endif + +#ifndef SOMINCONN +#define SOMINCONN 80 +#endif /* SOMINCONN */ + +int somaxconn = SOMAXCONN; +int sominconn = SOMINCONN; + +/* + * Socket operation routines. + * These routines are called by the routines in + * sys_socket.c or from a system process, and + * implement the semantics of socket operations by + * switching out to the protocol specific routines. + */ +/*ARGSUSED*/ +int +socreate(dom, aso, type, proto) + int dom; + struct socket **aso; + register int type; + int proto; +{ +#ifndef __ECOS + struct proc *p = curproc; /* XXX */ +#endif + register struct protosw *prp; + register struct socket *so; + register int error; + + if (proto) + prp = pffindproto(dom, proto, type); + else + prp = pffindtype(dom, type); + if (prp == 0 || prp->pr_usrreq == 0) + return (EPROTONOSUPPORT); + if (prp->pr_type != type) + return (EPROTOTYPE); + MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT); + bzero((caddr_t)so, sizeof(*so)); + so->so_type = type; +#ifdef __ECOS + so->so_state = SS_PRIV; + so->so_ruid = 0; // FIXME + so->so_euid = 0; // FIXME +#else + if (p->p_ucred->cr_uid == 0) + so->so_state = SS_PRIV; + so->so_ruid = p->p_cred->p_ruid; + so->so_euid = p->p_ucred->cr_uid; +#endif + so->so_proto = prp; + error = + (*prp->pr_usrreq)(so, PRU_ATTACH, NULL, (struct mbuf *)(long)proto, + NULL); + if (error) { + so->so_state |= SS_NOFDREF; + sofree(so); + return (error); + } +#ifdef COMPAT_SUNOS + { + extern struct emul emul_sunos; + if (p->p_emul == &emul_sunos && type == SOCK_DGRAM) + so->so_options |= SO_BROADCAST; + } +#endif + *aso = so; + return (0); +} + +int +sobind(so, nam) + struct socket *so; + struct mbuf *nam; +{ + int s = splsoftnet(); + int error; + + error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL); + splx(s); + return (error); +} + +int +solisten(so, backlog) + register struct socket *so; + int backlog; +{ + int s = splsoftnet(), error; + + error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL); + if (error) { + splx(s); + return (error); + } + if (so->so_q == 0) + so->so_options |= SO_ACCEPTCONN; + if (backlog < 0 || backlog > somaxconn) + backlog = somaxconn; + if (backlog < sominconn) + backlog = sominconn; + so->so_qlimit = backlog; + splx(s); + return (0); +} + +void +sofree(so) + register struct socket *so; +{ + + if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) + return; + if (so->so_head) { + /* + * We must not decommission a socket that's on the accept(2) + * queue. If we do, then accept(2) may hang after select(2) + * indicated that the listening socket was ready. + */ + if (!soqremque(so, 0)) + return; + } + sbrelease(&so->so_snd); + sorflush(so); + FREE(so, M_SOCKET); +} + +/* + * Close a socket on last file table reference removal. + * Initiate disconnect if connected. + * Free socket when disconnect complete. + */ +int +soclose(so) + register struct socket *so; +{ + struct socket *so2; + int s = splsoftnet(); /* conservative */ + int error = 0; + + if (so->so_options & SO_ACCEPTCONN) { + while ((so2 = so->so_q0) != NULL) { + (void) soqremque(so2, 0); + (void) soabort(so2); + } + while ((so2 = so->so_q) != NULL) { + (void) soqremque(so2, 1); + (void) soabort(so2); + } + } + if (so->so_pcb == 0) + goto discard; + if (so->so_state & SS_ISCONNECTED) { + if ((so->so_state & SS_ISDISCONNECTING) == 0) { + error = sodisconnect(so); + if (error) + goto drop; + } + if (so->so_options & SO_LINGER) { + if ((so->so_state & SS_ISDISCONNECTING) && + (so->so_state & SS_NBIO)) + goto drop; + while (so->so_state & SS_ISCONNECTED) { + error = tsleep((caddr_t)&so->so_timeo, + PSOCK | PCATCH, netcls, + so->so_linger * hz); + if (error) + break; + } + } + } +drop: + if (so->so_pcb) { + int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, NULL, + NULL, NULL); + if (error == 0) + error = error2; + } +discard: + if (so->so_state & SS_NOFDREF) + panic("soclose: NOFDREF"); + so->so_state |= SS_NOFDREF; + sofree(so); + splx(s); + return (error); +} + +/* + * Must be called at splsoftnet... + */ +int +soabort(so) + struct socket *so; +{ + + return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, NULL, NULL); +} + +int +soaccept(so, nam) + register struct socket *so; + struct mbuf *nam; +{ + int s = splsoftnet(); + int error = 0; + + if ((so->so_state & SS_NOFDREF) == 0) + panic("soaccept: !NOFDREF"); + so->so_state &= ~SS_NOFDREF; + if ((so->so_state & SS_ISDISCONNECTED) == 0) + error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, NULL, + nam, NULL); + splx(s); + return (error); +} + +int +soconnect(so, nam) + register struct socket *so; + struct mbuf *nam; +{ + int s; + int error; + + if (so->so_options & SO_ACCEPTCONN) + return (EOPNOTSUPP); + s = splsoftnet(); + /* + * If protocol is connection-based, can only connect once. + * Otherwise, if connected, try to disconnect first. + * This allows user to disconnect by connecting to, e.g., + * a null address. + */ + if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && + ((so->so_proto->pr_flags & PR_CONNREQUIRED) || + (error = sodisconnect(so)))) + error = EISCONN; + else + error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, + NULL, nam, NULL); + splx(s); + return (error); +} + +int +soconnect2(so1, so2) + register struct socket *so1; + struct socket *so2; +{ + int s = splsoftnet(); + int error; + + error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL, + (struct mbuf *)so2, NULL); + splx(s); + return (error); +} + +int +sodisconnect(so) + register struct socket *so; +{ + int s = splsoftnet(); + int error; + + if ((so->so_state & SS_ISCONNECTED) == 0) { + error = ENOTCONN; + goto bad; + } + if (so->so_state & SS_ISDISCONNECTING) { + error = EALREADY; + goto bad; + } + error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, NULL, NULL, + NULL); +bad: + splx(s); + return (error); +} + +#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) +/* + * Send on a socket. + * If send must go all at once and message is larger than + * send buffering, then hard error. + * Lock against other senders. + * If must go all at once and not enough room now, then + * inform user that this would block and do nothing. + * Otherwise, if nonblocking, send as much as possible. + * The data to be sent is described by "uio" if nonzero, + * otherwise by the mbuf chain "top" (which must be null + * if uio is not). Data provided in mbuf chain must be small + * enough to send all at once. + * + * Returns nonzero on error, timeout or signal; callers + * must check for short counts if EINTR/ERESTART are returned. + * Data and control buffers are freed on return. + */ +int +sosend(so, addr, uio, top, control, flags) + register struct socket *so; + struct mbuf *addr; + struct uio *uio; + struct mbuf *top; + struct mbuf *control; + int flags; +{ +#ifndef __ECOS + struct proc *p = curproc; /* XXX */ +#endif + struct mbuf **mp; + register struct mbuf *m; + register long space, len; + register quad_t resid; + int clen = 0, error, s, dontroute, mlen; + int atomic = sosendallatonce(so) || top; + + if (uio) + resid = uio->uio_resid; + else + resid = top->m_pkthdr.len; + /* + * In theory resid should be unsigned (since uio->uio_resid is). + * However, space must be signed, as it might be less than 0 + * if we over-committed, and we must use a signed comparison + * of space and resid. On the other hand, a negative resid + * causes us to loop sending 0-length segments to the protocol. + * MSG_EOR on a SOCK_STREAM socket is also invalid. + */ + if (resid < 0 || + (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { + error = EINVAL; + goto out; + } + dontroute = + (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && + (so->so_proto->pr_flags & PR_ATOMIC); +#ifndef __ECOS + p->p_stats->p_ru.ru_msgsnd++; +#endif + if (control) + clen = control->m_len; +#define snderr(errno) { error = errno; splx(s); goto release; } + +restart: + if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) + goto out; + do { + s = splsoftnet(); + if (so->so_state & SS_CANTSENDMORE) + snderr(EPIPE); + if (so->so_error) + snderr(so->so_error); + if ((so->so_state & SS_ISCONNECTED) == 0) { + if (so->so_proto->pr_flags & PR_CONNREQUIRED) { + if ((so->so_state & SS_ISCONFIRMING) == 0 && + !(resid == 0 && clen != 0)) + snderr(ENOTCONN); + } else if (addr == 0) + snderr(EDESTADDRREQ); + } + space = sbspace(&so->so_snd); + if (flags & MSG_OOB) + space += 1024; + if ((atomic && resid > so->so_snd.sb_hiwat) || + clen > so->so_snd.sb_hiwat) + snderr(EMSGSIZE); + if (space < resid + clen && uio && + (atomic || space < so->so_snd.sb_lowat || space < clen)) { + if (so->so_state & SS_NBIO) + snderr(EWOULDBLOCK); + sbunlock(&so->so_snd); + error = sbwait(&so->so_snd); + splx(s); + if (error) + goto out; + goto restart; + } + splx(s); + mp = ⊤ + space -= clen; + do { + if (uio == NULL) { + /* + * Data is prepackaged in "top". + */ + resid = 0; + if (flags & MSG_EOR) + top->m_flags |= M_EOR; + } else do { + if (top == 0) { + MGETHDR(m, M_WAIT, MT_DATA); + mlen = MHLEN; + m->m_pkthdr.len = 0; + m->m_pkthdr.rcvif = (struct ifnet *)0; + } else { + MGET(m, M_WAIT, MT_DATA); + mlen = MLEN; + } + if (resid >= MINCLSIZE && space >= MCLBYTES) { + MCLGET(m, M_WAIT); + if ((m->m_flags & M_EXT) == 0) + goto nopages; + mlen = MCLBYTES; +#ifdef MAPPED_MBUFS + len = min(MCLBYTES, resid); +#else + if (atomic && top == 0) { + len = min(MCLBYTES - max_hdr, resid); + m->m_data += max_hdr; + } else + len = min(MCLBYTES, resid); +#endif + space -= len; + } else { +nopages: + len = min(min(mlen, resid), space); + space -= len; + /* + * For datagram protocols, leave room + * for protocol headers in first mbuf. + */ + if (atomic && top == 0 && len < mlen) + MH_ALIGN(m, len); + } + error = uiomove(mtod(m, caddr_t), (int)len, uio); + resid = uio->uio_resid; + m->m_len = len; + *mp = m; + top->m_pkthdr.len += len; + if (error) + goto release; + mp = &m->m_next; + if (resid <= 0) { + if (flags & MSG_EOR) + top->m_flags |= M_EOR; + break; + } + } while (space > 0 && atomic); + if (dontroute) + so->so_options |= SO_DONTROUTE; + s = splsoftnet(); /* XXX */ + error = (*so->so_proto->pr_usrreq)(so, (flags & MSG_OOB) ? + PRU_SENDOOB : PRU_SEND, + top, addr, control); + splx(s); + if (dontroute) + so->so_options &= ~SO_DONTROUTE; + clen = 0; + control = 0; + top = 0; + mp = ⊤ + if (error) + goto release; + } while (resid && space > 0); + } while (resid); + +release: + sbunlock(&so->so_snd); +out: + if (top) + m_freem(top); + if (control) + m_freem(control); + return (error); +} + +/* + * Implement receive operations on a socket. + * We depend on the way that records are added to the sockbuf + * by sbappend*. In particular, each record (mbufs linked through m_next) + * must begin with an address if the protocol so specifies, + * followed by an optional mbuf or mbufs containing ancillary data, + * and then zero or more mbufs of data. + * In order to avoid blocking network interrupts for the entire time here, + * we splx() while doing the actual copy to user space. + * Although the sockbuf is locked, new data may still be appended, + * and thus we must maintain consistency of the sockbuf during that time. + * + * The caller may receive the data as a single mbuf chain by supplying + * an mbuf **mp0 for use in returning the chain. The uio is then used + * only for the count in uio_resid. + */ +int +soreceive(so, paddr, uio, mp0, controlp, flagsp) + register struct socket *so; + struct mbuf **paddr; + struct uio *uio; + struct mbuf **mp0; + struct mbuf **controlp; + int *flagsp; +{ + register struct mbuf *m, **mp; + register int flags, len, error, s, offset; + struct protosw *pr = so->so_proto; + struct mbuf *nextrecord; + int moff, type = 0; + size_t orig_resid = uio->uio_resid; + int uio_error = 0; + int resid; + + mp = mp0; + if (paddr) + *paddr = 0; + if (controlp) + *controlp = 0; + if (flagsp) + flags = *flagsp &~ MSG_EOR; + else + flags = 0; + if (so->so_state & SS_NBIO) + flags |= MSG_DONTWAIT; + if (flags & MSG_OOB) { + m = m_get(M_WAIT, MT_DATA); + error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, + (struct mbuf *)(long)(flags & MSG_PEEK), NULL); + if (error) + goto bad; + do { + error = uiomove(mtod(m, caddr_t), + (int) min(uio->uio_resid, m->m_len), uio); + m = m_free(m); + } while (uio->uio_resid && error == 0 && m); +bad: + if (m) + m_freem(m); + return (error); + } + if (mp) + *mp = (struct mbuf *)0; + if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) + (*pr->pr_usrreq)(so, PRU_RCVD, NULL, NULL, NULL); + +restart: + if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) + return (error); + s = splsoftnet(); + + m = so->so_rcv.sb_mb; + /* + * If we have less data than requested, block awaiting more + * (subject to any timeout) if: + * 1. the current count is less than the low water mark, + * 2. MSG_WAITALL is set, and it is possible to do the entire + * receive operation at once if we block (resid <= hiwat), or + * 3. MSG_DONTWAIT is not set. + * If MSG_WAITALL is set but resid is larger than the receive buffer, + * we have to do the receive in sections, and thus risk returning + * a short count if a timeout or signal occurs after we start. + */ + if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && + so->so_rcv.sb_cc < uio->uio_resid) && + (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || + ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && + m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { +#ifdef DIAGNOSTIC + if (m == 0 && so->so_rcv.sb_cc) + panic("receive 1"); +#endif + if (so->so_error) { + if (m) + goto dontblock; + error = so->so_error; + if ((flags & MSG_PEEK) == 0) + so->so_error = 0; + goto release; + } + if (so->so_state & SS_CANTRCVMORE) { + if (m) + goto dontblock; + else + goto release; + } + for (; m; m = m->m_next) + if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { + m = so->so_rcv.sb_mb; + goto dontblock; + } + if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && + (so->so_proto->pr_flags & PR_CONNREQUIRED)) { + error = ENOTCONN; + goto release; + } + if (uio->uio_resid == 0 && controlp == NULL) + goto release; + if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { + error = EWOULDBLOCK; + goto release; + } + sbunlock(&so->so_rcv); + error = sbwait(&so->so_rcv); + splx(s); + if (error) + return (error); + goto restart; + } +dontblock: +#ifdef notyet /* XXXX */ + if (uio->uio_procp) + uio->uio_procp->p_stats->p_ru.ru_msgrcv++; +#endif + nextrecord = m->m_nextpkt; + if (pr->pr_flags & PR_ADDR) { +#ifdef DIAGNOSTIC + if (m->m_type != MT_SONAME) + panic("receive 1a"); +#endif + orig_resid = 0; + if (flags & MSG_PEEK) { + if (paddr) + *paddr = m_copy(m, 0, m->m_len); + m = m->m_next; + } else { + sbfree(&so->so_rcv, m); + if (paddr) { + *paddr = m; + so->so_rcv.sb_mb = m->m_next; + m->m_next = 0; + m = so->so_rcv.sb_mb; + } else { + MFREE(m, so->so_rcv.sb_mb); + m = so->so_rcv.sb_mb; + } + } + } + while (m && m->m_type == MT_CONTROL && error == 0) { + if (flags & MSG_PEEK) { + if (controlp) + *controlp = m_copy(m, 0, m->m_len); + m = m->m_next; + } else { + sbfree(&so->so_rcv, m); + if (controlp) { + if (pr->pr_domain->dom_externalize && + mtod(m, struct cmsghdr *)->cmsg_type == + SCM_RIGHTS) + error = (*pr->pr_domain->dom_externalize)(m); + *controlp = m; + so->so_rcv.sb_mb = m->m_next; + m->m_next = 0; + m = so->so_rcv.sb_mb; + } else { + MFREE(m, so->so_rcv.sb_mb); + m = so->so_rcv.sb_mb; + } + } + if (controlp) { + orig_resid = 0; + controlp = &(*controlp)->m_next; + } + } + if (m) { + if ((flags & MSG_PEEK) == 0) + m->m_nextpkt = nextrecord; + type = m->m_type; + if (type == MT_OOBDATA) + flags |= MSG_OOB; + if (m->m_flags & M_BCAST) + flags |= MSG_BCAST; + if (m->m_flags & M_MCAST) + flags |= MSG_MCAST; + } + moff = 0; + offset = 0; + while (m && uio->uio_resid > 0 && error == 0) { + if (m->m_type == MT_OOBDATA) { + if (type != MT_OOBDATA) + break; + } else if (type == MT_OOBDATA) + break; +#ifdef DIAGNOSTIC + else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) + panic("receive 3"); +#endif + so->so_state &= ~SS_RCVATMARK; + len = uio->uio_resid; + if (so->so_oobmark && len > so->so_oobmark - offset) + len = so->so_oobmark - offset; + if (len > m->m_len - moff) + len = m->m_len - moff; + /* + * If mp is set, just pass back the mbufs. + * Otherwise copy them out via the uio, then free. + * Sockbuf must be consistent here (points to current mbuf, + * it points to next record) when we drop priority; + * we must note any additions to the sockbuf when we + * block interrupts again. + */ + if (mp == 0 && uio_error == 0) { + resid = uio->uio_resid; + splx(s); + uio_error = + uiomove(mtod(m, caddr_t) + moff, (int)len, + uio); + s = splsoftnet(); + if (uio_error) + uio->uio_resid = resid - len; + } else + uio->uio_resid -= len; + if (len == m->m_len - moff) { + if (m->m_flags & M_EOR) + flags |= MSG_EOR; + if (flags & MSG_PEEK) { + m = m->m_next; + moff = 0; + } else { + nextrecord = m->m_nextpkt; + sbfree(&so->so_rcv, m); + if (mp) { + *mp = m; + mp = &m->m_next; + so->so_rcv.sb_mb = m = m->m_next; + *mp = (struct mbuf *)0; + } else { + MFREE(m, so->so_rcv.sb_mb); + m = so->so_rcv.sb_mb; + } + if (m) + m->m_nextpkt = nextrecord; + } + } else { + if (flags & MSG_PEEK) + moff += len; + else { + if (mp) + *mp = m_copym(m, 0, len, M_WAIT); + m->m_data += len; + m->m_len -= len; + so->so_rcv.sb_cc -= len; + } + } + if (so->so_oobmark) { + if ((flags & MSG_PEEK) == 0) { + so->so_oobmark -= len; + if (so->so_oobmark == 0) { + so->so_state |= SS_RCVATMARK; + break; + } + } else { + offset += len; + if (offset == so->so_oobmark) + break; + } + } + if (flags & MSG_EOR) + break; + /* + * If the MSG_WAITALL flag is set (for non-atomic socket), + * we must not quit until "uio->uio_resid == 0" or an error + * termination. If a signal/timeout occurs, return + * with a short count but without error. + * Keep sockbuf locked against other readers. + */ + while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && + !sosendallatonce(so) && !nextrecord) { + if (so->so_error || so->so_state & SS_CANTRCVMORE) + break; + error = sbwait(&so->so_rcv); + if (error) { + sbunlock(&so->so_rcv); + splx(s); + return (0); + } + if ((m = so->so_rcv.sb_mb) != NULL) + nextrecord = m->m_nextpkt; + } + } + + if (m && pr->pr_flags & PR_ATOMIC) { + flags |= MSG_TRUNC; + if ((flags & MSG_PEEK) == 0) + (void) sbdroprecord(&so->so_rcv); + } + if ((flags & MSG_PEEK) == 0) { + if (m == 0) + so->so_rcv.sb_mb = nextrecord; + if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) + (*pr->pr_usrreq)(so, PRU_RCVD, NULL, + (struct mbuf *)(long)flags, NULL); + } + if (orig_resid == uio->uio_resid && orig_resid && + (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { + sbunlock(&so->so_rcv); + splx(s); + goto restart; + } + + if (uio_error) + error = uio_error; + + if (flagsp) + *flagsp |= flags; +release: + sbunlock(&so->so_rcv); + splx(s); + return (error); +} + +int +soshutdown(so, how) + register struct socket *so; + register int how; +{ + register struct protosw *pr = so->so_proto; + + how++; + if (how & ~(FREAD|FWRITE)) + return (EINVAL); + if (how & FREAD) + sorflush(so); + if (how & FWRITE) + return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, NULL, NULL, NULL); + return (0); +} + +void +sorflush(so) + register struct socket *so; +{ + register struct sockbuf *sb = &so->so_rcv; + register struct protosw *pr = so->so_proto; + register int s; + struct sockbuf asb; + + sb->sb_flags |= SB_NOINTR; + (void) sblock(sb, M_WAITOK); + s = splimp(); + socantrcvmore(so); + sbunlock(sb); + asb = *sb; + bzero((caddr_t)sb, sizeof (*sb)); + splx(s); + if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) + (*pr->pr_domain->dom_dispose)(asb.sb_mb); + sbrelease(&asb); +} + +int +sosetopt(so, level, optname, m0) + register struct socket *so; + int level, optname; + struct mbuf *m0; +{ + int error = 0; + register struct mbuf *m = m0; + + if (level != SOL_SOCKET) { + if (so->so_proto && so->so_proto->pr_ctloutput) + return ((*so->so_proto->pr_ctloutput) + (PRCO_SETOPT, so, level, optname, &m0)); + error = ENOPROTOOPT; + } else { + switch (optname) { + + case SO_LINGER: + if (m == NULL || m->m_len != sizeof (struct linger)) { + error = EINVAL; + goto bad; + } + so->so_linger = mtod(m, struct linger *)->l_linger; + /* fall thru... */ + + case SO_DEBUG: + case SO_KEEPALIVE: + case SO_DONTROUTE: + case SO_USELOOPBACK: + case SO_BROADCAST: + case SO_REUSEADDR: + case SO_REUSEPORT: + case SO_OOBINLINE: + if (m == NULL || m->m_len < sizeof (int)) { + error = EINVAL; + goto bad; + } + if (*mtod(m, int *)) + so->so_options |= optname; + else + so->so_options &= ~optname; + break; + + case SO_SNDBUF: + case SO_RCVBUF: + case SO_SNDLOWAT: + case SO_RCVLOWAT: + { + u_long cnt; + + if (m == NULL || m->m_len < sizeof (int)) { + error = EINVAL; + goto bad; + } + cnt = *mtod(m, int *); + if ((long)cnt <= 0) + cnt = 1; + switch (optname) { + + case SO_SNDBUF: + case SO_RCVBUF: + if (sbreserve(optname == SO_SNDBUF ? + &so->so_snd : &so->so_rcv, + cnt) == 0) { + error = ENOBUFS; + goto bad; + } + break; + + case SO_SNDLOWAT: + so->so_snd.sb_lowat = (cnt > so->so_snd.sb_hiwat) ? + so->so_snd.sb_hiwat : cnt; + break; + case SO_RCVLOWAT: + so->so_rcv.sb_lowat = (cnt > so->so_rcv.sb_hiwat) ? + so->so_rcv.sb_hiwat : cnt; + break; + } + break; + } + + case SO_SNDTIMEO: + case SO_RCVTIMEO: + { + struct timeval *tv; + short val; + + if (m == NULL || m->m_len < sizeof (*tv)) { + error = EINVAL; + goto bad; + } + tv = mtod(m, struct timeval *); + if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) { + error = EDOM; + goto bad; + } + val = tv->tv_sec * hz + tv->tv_usec / tick; + + switch (optname) { + + case SO_SNDTIMEO: + so->so_snd.sb_timeo = val; + break; + case SO_RCVTIMEO: + so->so_rcv.sb_timeo = val; + break; + } + break; + } + + default: + error = ENOPROTOOPT; + break; + } + if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { + (void) ((*so->so_proto->pr_ctloutput) + (PRCO_SETOPT, so, level, optname, &m0)); + m = NULL; /* freed by protocol */ + } + } +bad: + if (m) + (void) m_free(m); + return (error); +} + +int +sogetopt(so, level, optname, mp) + register struct socket *so; + int level, optname; + struct mbuf **mp; +{ + register struct mbuf *m; + + if (level != SOL_SOCKET) { + if (so->so_proto && so->so_proto->pr_ctloutput) { + return ((*so->so_proto->pr_ctloutput) + (PRCO_GETOPT, so, level, optname, mp)); + } else + return (ENOPROTOOPT); + } else { + m = m_get(M_WAIT, MT_SOOPTS); + m->m_len = sizeof (int); + + switch (optname) { + + case SO_LINGER: + m->m_len = sizeof (struct linger); + mtod(m, struct linger *)->l_onoff = + so->so_options & SO_LINGER; + mtod(m, struct linger *)->l_linger = so->so_linger; + break; + + case SO_USELOOPBACK: + case SO_DONTROUTE: + case SO_DEBUG: + case SO_KEEPALIVE: + case SO_REUSEADDR: + case SO_REUSEPORT: + case SO_BROADCAST: + case SO_OOBINLINE: + *mtod(m, int *) = so->so_options & optname; + break; + + case SO_TYPE: + *mtod(m, int *) = so->so_type; + break; + + case SO_ERROR: + *mtod(m, int *) = so->so_error; + so->so_error = 0; + break; + + case SO_SNDBUF: + *mtod(m, int *) = so->so_snd.sb_hiwat; + break; + + case SO_RCVBUF: + *mtod(m, int *) = so->so_rcv.sb_hiwat; + break; + + case SO_SNDLOWAT: + *mtod(m, int *) = so->so_snd.sb_lowat; + break; + + case SO_RCVLOWAT: + *mtod(m, int *) = so->so_rcv.sb_lowat; + break; + + case SO_SNDTIMEO: + case SO_RCVTIMEO: + { + int val = (optname == SO_SNDTIMEO ? + so->so_snd.sb_timeo : so->so_rcv.sb_timeo); + + m->m_len = sizeof(struct timeval); + mtod(m, struct timeval *)->tv_sec = val / hz; + mtod(m, struct timeval *)->tv_usec = + (val % hz) * tick; + break; + } + + default: + (void)m_free(m); + return (ENOPROTOOPT); + } + *mp = m; + return (0); + } +} + +void +sohasoutofband(so) + register struct socket *so; +{ +#ifndef __ECOS + csignal(so->so_pgid, SIGURG, so->so_siguid, so->so_sigeuid); +#endif + selwakeup(&so->so_rcv.sb_sel); +} diff --git a/ecos/packages/net/tcpip/current/src/sys/kern/uipc_socket2.c b/ecos/packages/net/tcpip/current/src/sys/kern/uipc_socket2.c new file mode 100644 index 0000000..04a3674 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/kern/uipc_socket2.c @@ -0,0 +1,906 @@ +//========================================================================== +// +// sys/kern/uipc_socket2.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: uipc_socket2.c,v 1.11 1999/12/08 06:50:17 itojun Exp $ */ +/* $NetBSD: uipc_socket2.c,v 1.11 1996/02/04 02:17:55 christos Exp $ */ + +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93 + */ + +#include <sys/param.h> +#ifndef __ECOS +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/file.h> +#include <sys/buf.h> +#endif +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/protosw.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#ifndef __ECOS +#include <sys/signalvar.h> +#endif + +#ifdef __ECOS +#include <cyg/infra/diag.h> +#endif + +/* + * Primitive routines for operating on sockets and socket buffers + */ + +/* strings for sleep message: */ +char netio[] = "netio"; +char netcon[] = "netcon"; +char netcls[] = "netcls"; + +u_long sb_max = SB_MAX; /* patchable */ + +/* + * Procedures to manipulate state flags of socket + * and do appropriate wakeups. Normal sequence from the + * active (originating) side is that soisconnecting() is + * called during processing of connect() call, + * resulting in an eventual call to soisconnected() if/when the + * connection is established. When the connection is torn down + * soisdisconnecting() is called during processing of disconnect() call, + * and soisdisconnected() is called when the connection to the peer + * is totally severed. The semantics of these routines are such that + * connectionless protocols can call soisconnected() and soisdisconnected() + * only, bypassing the in-progress calls when setting up a ``connection'' + * takes no time. + * + * From the passive side, a socket is created with + * two queues of sockets: so_q0 for connections in progress + * and so_q for connections already made and awaiting user acceptance. + * As a protocol is preparing incoming connections, it creates a socket + * structure queued on so_q0 by calling sonewconn(). When the connection + * is established, soisconnected() is called, and transfers the + * socket structure to so_q, making it available to accept(). + * + * If a socket is closed with sockets on either + * so_q0 or so_q, these sockets are dropped. + * + * If higher level protocols are implemented in + * the kernel, the wakeups done here will sometimes + * cause software-interrupt process scheduling. + */ + +void +soisconnecting(so) + register struct socket *so; +{ + + so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); + so->so_state |= SS_ISCONNECTING; +} + +void +soisconnected(so) + register struct socket *so; +{ + register struct socket *head = so->so_head; + + so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); + so->so_state |= SS_ISCONNECTED; + if (head && soqremque(so, 0)) { + soqinsque(head, so, 1); + sorwakeup(head); + wakeup((caddr_t)&head->so_timeo); + } else { + wakeup((caddr_t)&so->so_timeo); + sorwakeup(so); + sowwakeup(so); + } +} + +void +soisdisconnecting(so) + register struct socket *so; +{ + + so->so_state &= ~SS_ISCONNECTING; + so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); + wakeup((caddr_t)&so->so_timeo); + sowwakeup(so); + sorwakeup(so); +} + +void +soisdisconnected(so) + register struct socket *so; +{ + + so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); + so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); + wakeup((caddr_t)&so->so_timeo); + sowwakeup(so); + sorwakeup(so); +} + +/* + * When an attempt at a new connection is noted on a socket + * which accepts connections, sonewconn is called. If the + * connection is possible (subject to space constraints, etc.) + * then we allocate a new structure, propoerly linked into the + * data structure of the original socket, and return this. + * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED. + * + * Currently, sonewconn() is defined as sonewconn1() in socketvar.h + * to catch calls that are missing the (new) second parameter. + */ +struct socket * +sonewconn1(head, connstatus) + register struct socket *head; + int connstatus; +{ + register struct socket *so; + int soqueue = connstatus ? 1 : 0; + + if (head->so_qlen + head->so_q0len > head->so_qlimit * 3) + return ((struct socket *)0); + MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_DONTWAIT); + if (so == NULL) + return ((struct socket *)0); + bzero((caddr_t)so, sizeof(*so)); + so->so_type = head->so_type; + so->so_options = head->so_options &~ SO_ACCEPTCONN; + so->so_linger = head->so_linger; + so->so_state = head->so_state | SS_NOFDREF; + so->so_proto = head->so_proto; + so->so_timeo = head->so_timeo; + so->so_pgid = head->so_pgid; + so->so_euid = head->so_euid; + so->so_ruid = head->so_ruid; + (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat); + soqinsque(head, so, soqueue); + if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, + (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)) { + (void) soqremque(so, soqueue); + (void) free((caddr_t)so, M_SOCKET); + return ((struct socket *)0); + } + if (connstatus) { + sorwakeup(head); + wakeup((caddr_t)&head->so_timeo); + so->so_state |= connstatus; + } + return (so); +} + +void +soqinsque(head, so, q) + register struct socket *head, *so; + int q; +{ + + register struct socket **prev; + so->so_head = head; + if (q == 0) { + head->so_q0len++; + so->so_q0 = 0; + for (prev = &(head->so_q0); *prev; ) + prev = &((*prev)->so_q0); + } else { + head->so_qlen++; + so->so_q = 0; + for (prev = &(head->so_q); *prev; ) + prev = &((*prev)->so_q); + } + *prev = so; +} + +int +soqremque(so, q) + register struct socket *so; + int q; +{ + register struct socket *head, *prev, *next; + + head = so->so_head; + prev = head; + for (;;) { + next = q ? prev->so_q : prev->so_q0; + if (next == so) + break; + if (next == 0) + return (0); + prev = next; + } + if (q == 0) { + prev->so_q0 = next->so_q0; + head->so_q0len--; + } else { + prev->so_q = next->so_q; + head->so_qlen--; + } + next->so_q0 = next->so_q = 0; + next->so_head = 0; + return (1); +} + +/* + * Socantsendmore indicates that no more data will be sent on the + * socket; it would normally be applied to a socket when the user + * informs the system that no more data is to be sent, by the protocol + * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data + * will be received, and will normally be applied to the socket by a + * protocol when it detects that the peer will send no more data. + * Data queued for reading in the socket may yet be read. + */ + +void +socantsendmore(so) + struct socket *so; +{ + + so->so_state |= SS_CANTSENDMORE; + sowwakeup(so); +} + +void +socantrcvmore(so) + struct socket *so; +{ + + so->so_state |= SS_CANTRCVMORE; + sorwakeup(so); +} + +/* + * Wait for data to arrive at/drain from a socket buffer. + */ +int +sbwait(sb) + struct sockbuf *sb; +{ + + sb->sb_flags |= SB_WAIT; + return (tsleep((caddr_t)&sb->sb_cc, + (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio, + sb->sb_timeo)); +} + +/* + * Lock a sockbuf already known to be locked; + * return any error returned from sleep (EINTR). + */ +int +sb_lock(sb) + register struct sockbuf *sb; +{ + int error; + + while (sb->sb_flags & SB_LOCK) { + sb->sb_flags |= SB_WANT; + error = tsleep((caddr_t)&sb->sb_flags, + (sb->sb_flags & SB_NOINTR) ? + PSOCK : PSOCK|PCATCH, netio, 0); + if (error) + return (error); + } + sb->sb_flags |= SB_LOCK; + return (0); +} + +#ifdef __ECOS +/* + * Set lock on sockbuf sb; sleep if lock is already held. + * Unless SB_NOINTR is set on sockbuf, sleep is interruptible. + * Returns error without lock if sleep is interrupted. + */ +int +sblock(struct sockbuf *sb, int wf) +{ + int res; + cyg_scheduler_safe_lock(); + if (sb->sb_flags & SB_LOCK) { + // Already locked by another thread + if (wf == M_WAITOK) { + res = sb_lock(sb); + // Note: scheduler unlocked by 'sb_lock()' + } else { + res = EWOULDBLOCK; + cyg_scheduler_unlock(); + } + } else { + sb->sb_flags |= SB_LOCK; + res = 0; + cyg_scheduler_unlock(); + } + return res; +} + +/* release lock on sockbuf sb */ +void +sbunlock(struct sockbuf *sb) +{ + cyg_scheduler_lock(); + sb->sb_flags &= ~SB_LOCK; + if (sb->sb_flags & SB_WANT) { + sb->sb_flags &= ~SB_WANT; + wakeup((caddr_t)&sb->sb_flags); + } + cyg_scheduler_unlock(); +} +#endif + +/* + * Wakeup processes waiting on a socket buffer. + * Do asynchronous notification via SIGIO + * if the socket has the SS_ASYNC flag set. + */ +void +sowakeup(so, sb) + register struct socket *so; + register struct sockbuf *sb; +{ + selwakeup(&sb->sb_sel); + sb->sb_flags &= ~SB_SEL; + if (sb->sb_flags & SB_WAIT) { + sb->sb_flags &= ~SB_WAIT; + wakeup((caddr_t)&sb->sb_cc); + } +#ifndef __ECOS + if (so->so_state & SS_ASYNC) + csignal(so->so_pgid, SIGIO, so->so_siguid, so->so_sigeuid); +#endif +} + +/* + * Socket buffer (struct sockbuf) utility routines. + * + * Each socket contains two socket buffers: one for sending data and + * one for receiving data. Each buffer contains a queue of mbufs, + * information about the number of mbufs and amount of data in the + * queue, and other fields allowing select() statements and notification + * on data availability to be implemented. + * + * Data stored in a socket buffer is maintained as a list of records. + * Each record is a list of mbufs chained together with the m_next + * field. Records are chained together with the m_nextpkt field. The upper + * level routine soreceive() expects the following conventions to be + * observed when placing information in the receive buffer: + * + * 1. If the protocol requires each message be preceded by the sender's + * name, then a record containing that name must be present before + * any associated data (mbuf's must be of type MT_SONAME). + * 2. If the protocol supports the exchange of ``access rights'' (really + * just additional data associated with the message), and there are + * ``rights'' to be received, then a record containing this data + * should be present (mbuf's must be of type MT_CONTROL). + * 3. If a name or rights record exists, then it must be followed by + * a data record, perhaps of zero length. + * + * Before using a new socket structure it is first necessary to reserve + * buffer space to the socket, by calling sbreserve(). This should commit + * some of the available buffer space in the system buffer pool for the + * socket (currently, it does nothing but enforce limits). The space + * should be released by calling sbrelease() when the socket is destroyed. + */ + +int +soreserve(so, sndcc, rcvcc) + register struct socket *so; + u_long sndcc, rcvcc; +{ + + if (sbreserve(&so->so_snd, sndcc) == 0) + goto bad; + if (sbreserve(&so->so_rcv, rcvcc) == 0) + goto bad2; + if (so->so_rcv.sb_lowat == 0) + so->so_rcv.sb_lowat = 1; + if (so->so_snd.sb_lowat == 0) + so->so_snd.sb_lowat = MCLBYTES; + if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) + so->so_snd.sb_lowat = so->so_snd.sb_hiwat; + return (0); +bad2: + sbrelease(&so->so_snd); +bad: + return (ENOBUFS); +} + +/* + * Allot mbufs to a sockbuf. + * Attempt to scale mbmax so that mbcnt doesn't become limiting + * if buffering efficiency is near the normal case. + */ +int +sbreserve(sb, cc) + struct sockbuf *sb; + u_long cc; +{ + + if (cc == 0 || cc > sb_max * MCLBYTES / (MSIZE + MCLBYTES)) + return (0); + sb->sb_hiwat = cc; + sb->sb_mbmax = min(cc * 2, sb_max); + if (sb->sb_lowat > sb->sb_hiwat) + sb->sb_lowat = sb->sb_hiwat; + return (1); +} + +/* + * Free mbufs held by a socket, and reserved mbuf space. + */ +void +sbrelease(sb) + struct sockbuf *sb; +{ + + sbflush(sb); + sb->sb_hiwat = sb->sb_mbmax = 0; +} + +/* + * Routines to add and remove + * data from an mbuf queue. + * + * The routines sbappend() or sbappendrecord() are normally called to + * append new mbufs to a socket buffer, after checking that adequate + * space is available, comparing the function sbspace() with the amount + * of data to be added. sbappendrecord() differs from sbappend() in + * that data supplied is treated as the beginning of a new record. + * To place a sender's address, optional access rights, and data in a + * socket receive buffer, sbappendaddr() should be used. To place + * access rights and data in a socket receive buffer, sbappendrights() + * should be used. In either case, the new data begins a new record. + * Note that unlike sbappend() and sbappendrecord(), these routines check + * for the caller that there will be enough space to store the data. + * Each fails if there is not enough space, or if it cannot find mbufs + * to store additional information in. + * + * Reliable protocols may use the socket send buffer to hold data + * awaiting acknowledgement. Data is normally copied from a socket + * send buffer in a protocol with m_copy for output to a peer, + * and then removing the data from the socket buffer with sbdrop() + * or sbdroprecord() when the data is acknowledged by the peer. + */ + +/* + * Append mbuf chain m to the last record in the + * socket buffer sb. The additional space associated + * the mbuf chain is recorded in sb. Empty mbufs are + * discarded and mbufs are compacted where possible. + */ +void +sbappend(sb, m) + struct sockbuf *sb; + struct mbuf *m; +{ + register struct mbuf *n; + + if (m == 0) + return; + if ((n = sb->sb_mb) != NULL) { + while (n->m_nextpkt) + n = n->m_nextpkt; + do { + if (n->m_flags & M_EOR) { + sbappendrecord(sb, m); /* XXXXXX!!!! */ + return; + } + } while (n->m_next && (n = n->m_next)); + } + sbcompress(sb, m, n); +} + +#ifdef SOCKBUF_DEBUG +void +sbcheck(sb) + register struct sockbuf *sb; +{ + register struct mbuf *m; + register int len = 0, mbcnt = 0; + + for (m = sb->sb_mb; m; m = m->m_next) { + len += m->m_len; + mbcnt += MSIZE; + if (m->m_flags & M_EXT) + mbcnt += m->m_ext.ext_size; + if (m->m_nextpkt) + panic("sbcheck nextpkt"); + } + if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { + printf("cc %d != %d || mbcnt %d != %d\n", len, sb->sb_cc, + mbcnt, sb->sb_mbcnt); + panic("sbcheck"); + } +} +#endif + +/* + * As above, except the mbuf chain + * begins a new record. + */ +void +sbappendrecord(sb, m0) + register struct sockbuf *sb; + register struct mbuf *m0; +{ + register struct mbuf *m; + + if (m0 == 0) + return; + if ((m = sb->sb_mb) != NULL) + while (m->m_nextpkt) + m = m->m_nextpkt; + /* + * Put the first mbuf on the queue. + * Note this permits zero length records. + */ + sballoc(sb, m0); + if (m) + m->m_nextpkt = m0; + else + sb->sb_mb = m0; + m = m0->m_next; + m0->m_next = 0; + if (m && (m0->m_flags & M_EOR)) { + m0->m_flags &= ~M_EOR; + m->m_flags |= M_EOR; + } + sbcompress(sb, m, m0); +} + +/* + * As above except that OOB data + * is inserted at the beginning of the sockbuf, + * but after any other OOB data. + */ +void +sbinsertoob(sb, m0) + register struct sockbuf *sb; + register struct mbuf *m0; +{ + register struct mbuf *m; + register struct mbuf **mp; + + if (m0 == 0) + return; + for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) { + again: + switch (m->m_type) { + + case MT_OOBDATA: + continue; /* WANT next train */ + + case MT_CONTROL: + if ((m = m->m_next) != NULL) + goto again; /* inspect THIS train further */ + } + break; + } + /* + * Put the first mbuf on the queue. + * Note this permits zero length records. + */ + sballoc(sb, m0); + m0->m_nextpkt = *mp; + *mp = m0; + m = m0->m_next; + m0->m_next = 0; + if (m && (m0->m_flags & M_EOR)) { + m0->m_flags &= ~M_EOR; + m->m_flags |= M_EOR; + } + sbcompress(sb, m, m0); +} + +/* + * Append address and data, and optionally, control (ancillary) data + * to the receive queue of a socket. If present, + * m0 must include a packet header with total length. + * Returns 0 if no space in sockbuf or insufficient mbufs. + */ +int +sbappendaddr(sb, asa, m0, control) + register struct sockbuf *sb; + struct sockaddr *asa; + struct mbuf *m0, *control; +{ + register struct mbuf *m, *n; + int space = asa->sa_len; + +if (m0 && (m0->m_flags & M_PKTHDR) == 0) +panic("sbappendaddr"); + if (m0) + space += m0->m_pkthdr.len; + for (n = control; n; n = n->m_next) { + space += n->m_len; + if (n->m_next == 0) /* keep pointer to last control buf */ + break; + } + if (space > sbspace(sb)) + return (0); + if (asa->sa_len > MLEN) + return (0); + MGET(m, M_DONTWAIT, MT_SONAME); + if (m == 0) + return (0); + m->m_len = asa->sa_len; + bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len); + if (n) + n->m_next = m0; /* concatenate data to control */ + else + control = m0; + m->m_next = control; + for (n = m; n; n = n->m_next) + sballoc(sb, n); + if ((n = sb->sb_mb) != NULL) { + while (n->m_nextpkt) + n = n->m_nextpkt; + n->m_nextpkt = m; + } else + sb->sb_mb = m; + return (1); +} + +int +sbappendcontrol(sb, m0, control) + struct sockbuf *sb; + struct mbuf *m0, *control; +{ + register struct mbuf *m, *n; + int space = 0; + + if (control == 0) + panic("sbappendcontrol"); + for (m = control; ; m = m->m_next) { + space += m->m_len; + if (m->m_next == 0) + break; + } + n = m; /* save pointer to last control buffer */ + for (m = m0; m; m = m->m_next) + space += m->m_len; + if (space > sbspace(sb)) + return (0); + n->m_next = m0; /* concatenate data to control */ + for (m = control; m; m = m->m_next) + sballoc(sb, m); + if ((n = sb->sb_mb) != NULL) { + while (n->m_nextpkt) + n = n->m_nextpkt; + n->m_nextpkt = control; + } else + sb->sb_mb = control; + return (1); +} + +/* + * Compress mbuf chain m into the socket + * buffer sb following mbuf n. If n + * is null, the buffer is presumed empty. + */ +void +sbcompress(sb, m, n) + register struct sockbuf *sb; + register struct mbuf *m, *n; +{ + register int eor = 0; + register struct mbuf *o; + + while (m) { + eor |= m->m_flags & M_EOR; + if (m->m_len == 0 && + (eor == 0 || + (((o = m->m_next) || (o = n)) && + o->m_type == m->m_type))) { + m = m_free(m); + continue; + } + if (n && (n->m_flags & (M_EXT | M_EOR)) == 0 && + (n->m_data + n->m_len + m->m_len) < &n->m_dat[MLEN] && + n->m_type == m->m_type) { + bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, + (unsigned)m->m_len); + n->m_len += m->m_len; + sb->sb_cc += m->m_len; + m = m_free(m); + continue; + } + if (n) + n->m_next = m; + else + sb->sb_mb = m; + sballoc(sb, m); + n = m; + m->m_flags &= ~M_EOR; + m = m->m_next; + n->m_next = 0; + } + if (eor) { + if (n) + n->m_flags |= eor; + else +#ifdef __ECOS + diag_printf("semi-panic: sbcompress\n"); +#else + printf("semi-panic: sbcompress\n"); +#endif + } +} + +/* + * Free all mbufs in a sockbuf. + * Check that all resources are reclaimed. + */ +void +sbflush(sb) + register struct sockbuf *sb; +{ + + if (sb->sb_flags & SB_LOCK) + panic("sbflush"); + while (sb->sb_mbcnt) + sbdrop(sb, (int)sb->sb_cc); + if (sb->sb_cc || sb->sb_mb) + panic("sbflush 2"); +} + +/* + * Drop data from (the front of) a sockbuf. + */ +void +sbdrop(sb, len) + register struct sockbuf *sb; + register int len; +{ + register struct mbuf *m, *mn; + struct mbuf *next; + + next = (m = sb->sb_mb) ? m->m_nextpkt : 0; + while (len > 0) { + if (m == 0) { + if (next == 0) + panic("sbdrop"); + m = next; + next = m->m_nextpkt; + continue; + } + if (m->m_len > len) { + m->m_len -= len; + m->m_data += len; + sb->sb_cc -= len; + break; + } + len -= m->m_len; + sbfree(sb, m); + MFREE(m, mn); + m = mn; + } + while (m && m->m_len == 0) { + sbfree(sb, m); + MFREE(m, mn); + m = mn; + } + if (m) { + sb->sb_mb = m; + m->m_nextpkt = next; + } else + sb->sb_mb = next; +} + +/* + * Drop a record off the front of a sockbuf + * and move the next record to the front. + */ +void +sbdroprecord(sb) + register struct sockbuf *sb; +{ + register struct mbuf *m, *mn; + + m = sb->sb_mb; + if (m) { + sb->sb_mb = m->m_nextpkt; + do { + sbfree(sb, m); + MFREE(m, mn); + } while ((m = mn) != NULL); + } +} + +/* + * Create a "control" mbuf containing the specified data + * with the specified type for presentation on a socket buffer. + */ +struct mbuf * +sbcreatecontrol(p, size, type, level) + caddr_t p; + register int size; + int type, level; +{ + register struct cmsghdr *cp; + struct mbuf *m; + + if (size + sizeof(*cp) > MCLBYTES) { +#ifdef __ECOS + diag_printf("sbcreatecontrol: message too large %d\n", size); +#else + printf("sbcreatecontrol: message too large %d\n", size); +#endif + return NULL; + } + + if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) + return ((struct mbuf *) NULL); + if (size + sizeof(*cp) > MLEN) { + MCLGET(m, M_DONTWAIT); + if ((m->m_flags & M_EXT) == 0) { + m_free(m); + return NULL; + } + } + cp = mtod(m, struct cmsghdr *); + bcopy(p, CMSG_DATA(cp), size); + size += sizeof(*cp); + m->m_len = size; + cp->cmsg_len = size; + cp->cmsg_level = level; + cp->cmsg_type = type; + return (m); +} diff --git a/ecos/packages/net/tcpip/current/src/sys/kern/uipc_syscalls.c b/ecos/packages/net/tcpip/current/src/sys/kern/uipc_syscalls.c new file mode 100644 index 0000000..d09b422 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/kern/uipc_syscalls.c @@ -0,0 +1,1269 @@ +//========================================================================== +// +// sys/kern/uipc_syscalls.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: uipc_syscalls.c,v 1.29 1999/12/08 06:50:17 itojun Exp $ */ +/* $NetBSD: uipc_syscalls.c,v 1.19 1996/02/09 19:00:48 christos Exp $ */ + +/* + * Copyright (c) 1982, 1986, 1989, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 + */ + +#include <sys/param.h> +#ifdef __ECOS +#include <cyg/io/file.h> +static int ecos_getsock(int fdes, struct file **fpp); +#define getsock(fdp, fdes, fpp) ecos_getsock(fdes, fpp) +#else +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/filedesc.h> +#include <sys/file.h> +#include <sys/buf.h> +#endif +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/protosw.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#ifndef __ECOS +#include <sys/signalvar.h> +#include <sys/un.h> +#endif +#ifdef KTRACE +#include <sys/ktrace.h> +#endif + +#ifndef __ECOS +#include <sys/mount.h> +#endif +#include <sys/syscallargs.h> + +/* + * System call interface to the socket abstraction. + */ +extern struct fileops socketops; + +#ifdef __ECOS +int +sys_socket(struct sys_socket_args *uap, register_t *retval) +{ +#else +int +sys_socket(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +{ + register struct sys_socket_args /* { + syscallarg(int) domain; + syscallarg(int) type; + syscallarg(int) protocol; + } */ *uap = v; + struct filedesc *fdp = p->p_fd; +#endif // __ECOS + struct socket *so; + struct file *fp; + int fd, error; + +#ifdef __ECOS + if ((error = falloc(&fp, &fd)) != 0) +#else + if ((error = falloc(p, &fp, &fd)) != 0) +#endif + return (error); + fp->f_flag = FREAD|FWRITE; + fp->f_type = DTYPE_SOCKET; + fp->f_ops = &socketops; + error = socreate(SCARG(uap, domain), &so, SCARG(uap, type), + SCARG(uap, protocol)); + if (error) { +#ifndef __ECOS + fdremove(fdp, fd); +#endif + ffree(fp); + } else { +#ifdef __ECOS + fp->f_data = (CYG_ADDRWORD)so; +#else + fp->f_data = (caddr_t)so; +#endif + *retval = fd; + } + return (error); +} + +#ifdef __ECOS +int +sys_bind(struct sys_bind_args *uap, register_t *retval) + { +#else +/* ARGSUSED */ +int +sys_bind(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +{ + register struct sys_bind_args /* { + syscallarg(int) s; + syscallarg(struct sockaddr *) name; + syscallarg(socklen_t) namelen; + } */ *uap = v; +#endif // __ECOS + struct file *fp; + struct mbuf *nam; + int error; + + if ((error = getsock(p->p_fd, SCARG(uap, s), &fp)) != 0) + return (error); + error = sockargs(&nam, (caddr_t)SCARG(uap, name), SCARG(uap, namelen), + MT_SONAME); + if (error) + return (error); + error = sobind((struct socket *)fp->f_data, nam); + m_freem(nam); + return (error); +} + +/* ARGSUSED */ +#ifdef __ECOS +int +sys_listen(struct sys_listen_args *uap, register_t *retval) +{ +#else +int +sys_listen(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +{ + register struct sys_listen_args /* { + syscallarg(int) s; + syscallarg(int) backlog; + } */ *uap = v; +#endif // __ECOS + struct file *fp; + int error; + + if ((error = getsock(p->p_fd, SCARG(uap, s), &fp)) != 0) + return (error); + return (solisten((struct socket *)fp->f_data, SCARG(uap, backlog))); +} + +#ifdef __ECOS +int + sys_accept(struct sys_accept_args *uap, register_t *retval) +{ +#else +int +sys_accept(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +{ + register struct sys_accept_args /* { + syscallarg(int) s; + syscallarg(struct sockaddr *) name; + syscallarg(socklen_t *) anamelen; + } */ *uap = v; +#endif + struct file *fp; + struct mbuf *nam; + socklen_t namelen; + int error, s, tmpfd; + register struct socket *so; + + if (SCARG(uap, name) && (error = copyin((caddr_t)SCARG(uap, anamelen), + (caddr_t)&namelen, sizeof (namelen)))) + return (error); + if ((error = getsock(p->p_fd, SCARG(uap, s), &fp)) != 0) + return (error); + s = splsoftnet(); + so = (struct socket *)fp->f_data; + if ((so->so_options & SO_ACCEPTCONN) == 0) { + splx(s); + return (EINVAL); + } + if ((so->so_state & SS_NBIO) && so->so_qlen == 0) { + splx(s); + return (EWOULDBLOCK); + } + while (so->so_qlen == 0 && so->so_error == 0) { + if (so->so_state & SS_CANTRCVMORE) { + so->so_error = ECONNABORTED; + break; + } + error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, + netcon, 0); + if (error) { + splx(s); + return (error); + } + } + if (so->so_error) { + error = so->so_error; + so->so_error = 0; + splx(s); + return (error); + } +#ifdef __ECOS + if ((error = falloc(&fp, &tmpfd)) != 0) { +#else + if ((error = falloc(p, &fp, &tmpfd)) != 0) { +#endif + splx(s); + return (error); + } + *retval = tmpfd; + { struct socket *aso = so->so_q; + if (soqremque(aso, 1) == 0) + panic("accept"); + so = aso; + } + fp->f_type = DTYPE_SOCKET; + fp->f_flag = FREAD|FWRITE; + fp->f_ops = &socketops; +#ifdef __ECOS + fp->f_data = (CYG_ADDRWORD)so; +#else + fp->f_data = (caddr_t)so; +#endif + nam = m_get(M_WAIT, MT_SONAME); + (void) soaccept(so, nam); + if (SCARG(uap, name)) { + if (namelen > nam->m_len) + namelen = nam->m_len; + /* SHOULD COPY OUT A CHAIN HERE */ + if ((error = copyout(mtod(nam, caddr_t), + (caddr_t)SCARG(uap, name), namelen)) == 0) + error = copyout((caddr_t)&namelen, + (caddr_t)SCARG(uap, anamelen), + sizeof (*SCARG(uap, anamelen))); + } + m_freem(nam); + splx(s); + return (error); +} + +#ifdef __ECOS +int +sys_connect(struct sys_connect_args *uap, register_t *retval) +{ +#else +/* ARGSUSED */ +int +sys_connect(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +{ + register struct sys_connect_args /* { + syscallarg(int) s; + syscallarg(struct sockaddr *) name; + syscallarg(socklen_t) namelen; + } */ *uap = v; +#endif // __ECOS + struct file *fp; + register struct socket *so; + struct mbuf *nam; + int error, s; + + if ((error = getsock(p->p_fd, SCARG(uap, s), &fp)) != 0) + return (error); + so = (struct socket *)fp->f_data; + if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) + return (EALREADY); + error = sockargs(&nam, (caddr_t)SCARG(uap, name), SCARG(uap, namelen), + MT_SONAME); + if (error) + return (error); + error = soconnect(so, nam); + if (error) + goto bad; + if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { + m_freem(nam); + return (EINPROGRESS); + } + s = splsoftnet(); + while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { + error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, + netcon, 0); + if (error) + break; + } + if (error == 0) { + error = so->so_error; + so->so_error = 0; + } + splx(s); +bad: + so->so_state &= ~SS_ISCONNECTING; + m_freem(nam); +#ifndef __ECOS + if (error == ERESTART) + error = EINTR; +#endif + return (error); +} + +#ifndef __ECOS +int +sys_socketpair(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +{ + register struct sys_socketpair_args /* { + syscallarg(int) domain; + syscallarg(int) type; + syscallarg(int) protocol; + syscallarg(int *) rsv; + } */ *uap = v; + register struct filedesc *fdp = p->p_fd; + struct file *fp1, *fp2; + struct socket *so1, *so2; + int fd, error, sv[2]; + + error = socreate(SCARG(uap, domain), &so1, SCARG(uap, type), + SCARG(uap, protocol)); + if (error) + return (error); + error = socreate(SCARG(uap, domain), &so2, SCARG(uap, type), + SCARG(uap, protocol)); + if (error) + goto free1; + if ((error = falloc(p, &fp1, &fd)) != 0) + goto free2; + sv[0] = fd; + fp1->f_flag = FREAD|FWRITE; + fp1->f_type = DTYPE_SOCKET; + fp1->f_ops = &socketops; + fp1->f_data = (caddr_t)so1; + if ((error = falloc(p, &fp2, &fd)) != 0) + goto free3; + fp2->f_flag = FREAD|FWRITE; + fp2->f_type = DTYPE_SOCKET; + fp2->f_ops = &socketops; + fp2->f_data = (caddr_t)so2; + sv[1] = fd; + if ((error = soconnect2(so1, so2)) != 0) + goto free4; + if (SCARG(uap, type) == SOCK_DGRAM) { + /* + * Datagram socket connection is asymmetric. + */ + if ((error = soconnect2(so2, so1)) != 0) + goto free4; + } + error = copyout((caddr_t)sv, (caddr_t)SCARG(uap, rsv), + 2 * sizeof (int)); + if (error == 0) + return (error); +free4: + ffree(fp2); + fdremove(fdp, sv[1]); +free3: + ffree(fp1); + fdremove(fdp, sv[0]); +free2: + (void)soclose(so2); +free1: + (void)soclose(so1); + return (error); +} +#endif + +#ifdef __ECOS +int +sys_sendto(struct sys_sendto_args *uap, register_t *retval) +{ +#else +int +sys_sendto(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +{ + register struct sys_sendto_args /* { + syscallarg(int) s; + syscallarg(caddr_t) buf; + syscallarg(size_t) len; + syscallarg(int) flags; + syscallarg(struct sockaddr *) to; + syscallarg(socklen_t) tolen; + } */ *uap = v; +#endif // __ECOS + struct msghdr msg; + struct iovec aiov; + + msg.msg_name = (caddr_t)SCARG(uap, to); + msg.msg_namelen = SCARG(uap, tolen); + msg.msg_iov = &aiov; + msg.msg_iovlen = 1; + msg.msg_control = 0; +#ifdef COMPAT_OLDSOCK + msg.msg_flags = 0; +#endif + aiov.iov_base = (char *)SCARG(uap, buf); + aiov.iov_len = SCARG(uap, len); +#ifdef __ECOS + return (sendit(SCARG(uap, s), &msg, SCARG(uap, flags), retval)); +#else + return (sendit(p, SCARG(uap, s), &msg, SCARG(uap, flags), retval)); +#endif +} + +#ifndef __ECOS +int +sys_sendmsg(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +{ + register struct sys_sendmsg_args /* { + syscallarg(int) s; + syscallarg(caddr_t) msg; + syscallarg(int) flags; + } */ *uap = v; + struct msghdr msg; + struct iovec aiov[UIO_SMALLIOV], *iov; + int error; + + error = copyin(SCARG(uap, msg), (caddr_t)&msg, sizeof (msg)); + if (error) + return (error); + if (msg.msg_iovlen <= 0 || msg.msg_iovlen > IOV_MAX) + return (EMSGSIZE); + if (msg.msg_iovlen > UIO_SMALLIOV) + MALLOC(iov, struct iovec *, + sizeof(struct iovec) * msg.msg_iovlen, M_IOV, M_WAITOK); + else + iov = aiov; + if (msg.msg_iovlen && + (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, + (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) + goto done; + msg.msg_iov = iov; +#ifdef COMPAT_OLDSOCK + msg.msg_flags = 0; +#endif + error = sendit(p, SCARG(uap, s), &msg, SCARG(uap, flags), retval); +done: + if (iov != aiov) + FREE(iov, M_IOV); + return (error); +} +#endif + +#ifdef __ECOS +int +sendit(int s, struct msghdr *mp, int flags, register_t *retsize) +{ +#else +int +sendit(p, s, mp, flags, retsize) + register struct proc *p; + int s; + register struct msghdr *mp; + int flags; + register_t *retsize; +{ +#endif // __ECOS + struct file *fp; + struct uio auio; + register struct iovec *iov; + register int i; + struct mbuf *to, *control; + int len, error; +#ifdef KTRACE + struct iovec *ktriov = NULL; +#endif + + if ((error = getsock(p->p_fd, s, &fp)) != 0) + return (error); + auio.uio_iov = mp->msg_iov; + auio.uio_iovcnt = mp->msg_iovlen; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_rw = UIO_WRITE; +#ifndef __ECOS + auio.uio_procp = p; +#endif + auio.uio_offset = 0; /* XXX */ + auio.uio_resid = 0; + iov = mp->msg_iov; + for (i = 0; i < mp->msg_iovlen; i++, iov++) { + /* Don't allow sum > SSIZE_MAX */ + if (iov->iov_len > SSIZE_MAX || + (auio.uio_resid += iov->iov_len) > SSIZE_MAX) + return (EINVAL); + } + if (mp->msg_name) { + error = sockargs(&to, mp->msg_name, mp->msg_namelen, + MT_SONAME); + if (error) + return (error); + } else + to = 0; + if (mp->msg_control) { + if (mp->msg_controllen < sizeof(struct cmsghdr) +#ifdef COMPAT_OLDSOCK + && mp->msg_flags != MSG_COMPAT +#endif + ) { + error = EINVAL; + goto bad; + } + error = sockargs(&control, mp->msg_control, + mp->msg_controllen, MT_CONTROL); + if (error) + goto bad; +#ifdef COMPAT_OLDSOCK + if (mp->msg_flags == MSG_COMPAT) { + register struct cmsghdr *cm; + + M_PREPEND(control, sizeof(*cm), M_WAIT); + if (control == 0) { + error = ENOBUFS; + goto bad; + } else { + cm = mtod(control, struct cmsghdr *); + cm->cmsg_len = control->m_len; + cm->cmsg_level = SOL_SOCKET; + cm->cmsg_type = SCM_RIGHTS; + } + } +#endif + } else + control = 0; +#ifdef KTRACE + if (KTRPOINT(p, KTR_GENIO)) { + int iovlen = auio.uio_iovcnt * sizeof (struct iovec); + + MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); + bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); + } +#endif + len = auio.uio_resid; + error = sosend((struct socket *)fp->f_data, to, &auio, + NULL, control, flags); + if (error) { +#ifdef __ECOS + if (auio.uio_resid != len && + (error == EINTR || error == EWOULDBLOCK)) +#else + if (auio.uio_resid != len && (error == ERESTART || + error == EINTR || error == EWOULDBLOCK)) +#endif + error = 0; +#ifndef __ECOS + if (error == EPIPE) + psignal(p, SIGPIPE); +#endif + } + if (error == 0) + *retsize = len - auio.uio_resid; +#ifdef KTRACE + if (ktriov != NULL) { + if (error == 0) + ktrgenio(p->p_tracep, s, UIO_WRITE, + ktriov, *retsize, error); + FREE(ktriov, M_TEMP); + } +#endif +bad: + if (to) + m_freem(to); + return (error); +} + +#ifdef __ECOS +int +sys_recvfrom(struct sys_recvfrom_args *uap, register_t *retval) +{ +#else +int +sys_recvfrom(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +{ + register struct sys_recvfrom_args /* { + syscallarg(int) s; + syscallarg(caddr_t) buf; + syscallarg(size_t) len; + syscallarg(int) flags; + syscallarg(struct sockaddr *) from; + syscallarg(socklen_t *) fromlenaddr; + } */ *uap = v; +#endif // __ECOS + struct msghdr msg; + struct iovec aiov; + int error; + + if (SCARG(uap, fromlenaddr)) { + error = copyin((caddr_t)SCARG(uap, fromlenaddr), + (caddr_t)&msg.msg_namelen, + sizeof (msg.msg_namelen)); + if (error) + return (error); + } else + msg.msg_namelen = 0; + msg.msg_name = (caddr_t)SCARG(uap, from); + msg.msg_iov = &aiov; + msg.msg_iovlen = 1; + aiov.iov_base = SCARG(uap, buf); + aiov.iov_len = SCARG(uap, len); + msg.msg_control = 0; + msg.msg_flags = SCARG(uap, flags); +#ifdef __ECOS + return (recvit(SCARG(uap, s), &msg, + (caddr_t)SCARG(uap, fromlenaddr), retval)); +#else + return (recvit(p, SCARG(uap, s), &msg, + (caddr_t)SCARG(uap, fromlenaddr), retval)); +#endif +} + +#ifndef __ECOS +int +sys_recvmsg(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +{ + register struct sys_recvmsg_args /* { + syscallarg(int) s; + syscallarg(struct msghdr *) msg; + syscallarg(int) flags; + } */ *uap = v; + struct msghdr msg; + struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; + register int error; + + error = copyin((caddr_t)SCARG(uap, msg), (caddr_t)&msg, + sizeof (msg)); + if (error) + return (error); + if (msg.msg_iovlen <= 0 || msg.msg_iovlen > IOV_MAX) + return (EMSGSIZE); + if (msg.msg_iovlen > UIO_SMALLIOV) + MALLOC(iov, struct iovec *, + sizeof(struct iovec) * msg.msg_iovlen, M_IOV, M_WAITOK); + else + iov = aiov; +#ifdef COMPAT_OLDSOCK + msg.msg_flags = SCARG(uap, flags) &~ MSG_COMPAT; +#else + msg.msg_flags = SCARG(uap, flags); +#endif + uiov = msg.msg_iov; + msg.msg_iov = iov; + error = copyin((caddr_t)uiov, (caddr_t)iov, + (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); + if (error) + goto done; + if ((error = recvit(p, SCARG(uap, s), &msg, (caddr_t)0, retval)) == 0) { + msg.msg_iov = uiov; + error = copyout((caddr_t)&msg, (caddr_t)SCARG(uap, msg), + sizeof(msg)); + } +done: + if (iov != aiov) + FREE(iov, M_IOV); + return (error); +} +#endif + +#ifdef __ECOS +int +recvit(int s, struct msghdr *mp, caddr_t namelenp, register_t *retsize) +{ +#else +int +recvit(p, s, mp, namelenp, retsize) + register struct proc *p; + int s; + register struct msghdr *mp; + caddr_t namelenp; + register_t *retsize; +{ +#endif // __ECOS + struct file *fp; + struct uio auio; + register struct iovec *iov; + register int i; + size_t len; + int error; + struct mbuf *from = 0, *control = 0; +#ifdef KTRACE + struct iovec *ktriov = NULL; +#endif + + if ((error = getsock(p->p_fd, s, &fp)) != 0) + return (error); + auio.uio_iov = mp->msg_iov; + auio.uio_iovcnt = mp->msg_iovlen; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_rw = UIO_READ; +#ifndef __ECOS + auio.uio_procp = p; +#endif + auio.uio_offset = 0; /* XXX */ + auio.uio_resid = 0; + iov = mp->msg_iov; + for (i = 0; i < mp->msg_iovlen; i++, iov++) { + /* Don't allow sum > SSIZE_MAX */ + if (iov->iov_len > SSIZE_MAX || + (auio.uio_resid += iov->iov_len) > SSIZE_MAX) + return (EINVAL); + } +#ifdef KTRACE + if (KTRPOINT(p, KTR_GENIO)) { + int iovlen = auio.uio_iovcnt * sizeof (struct iovec); + + MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); + bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); + } +#endif + len = auio.uio_resid; + error = soreceive((struct socket *)fp->f_data, &from, &auio, + NULL, mp->msg_control ? &control : NULL, + &mp->msg_flags); + if (error) { +#ifdef __ECOS + if (auio.uio_resid != len && + (error == EINTR || error == EWOULDBLOCK)) +#else + if (auio.uio_resid != len && (error == ERESTART || + error == EINTR || error == EWOULDBLOCK)) +#endif + error = 0; + } +#ifdef KTRACE + if (ktriov != NULL) { + if (error == 0) + ktrgenio(p->p_tracep, s, UIO_READ, + ktriov, len - auio.uio_resid, error); + FREE(ktriov, M_TEMP); + } +#endif + if (error) + goto out; + *retsize = len - auio.uio_resid; + if (mp->msg_name) { + len = mp->msg_namelen; + if (len <= 0 || from == 0) + len = 0; + else { + /* save sa_len before it is destroyed by MSG_COMPAT */ + if (len > from->m_len) + len = from->m_len; + /* else if len < from->m_len ??? */ +#ifdef COMPAT_OLDSOCK + if (mp->msg_flags & MSG_COMPAT) + mtod(from, struct osockaddr *)->sa_family = + mtod(from, struct sockaddr *)->sa_family; +#endif + error = copyout(mtod(from, caddr_t), + (caddr_t)mp->msg_name, (unsigned)len); + if (error) + goto out; + } + mp->msg_namelen = len; + if (namelenp && + (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) { +#ifdef COMPAT_OLDSOCK + if (mp->msg_flags & MSG_COMPAT) + error = 0; /* old recvfrom didn't check */ + else +#endif + goto out; + } + } + if (mp->msg_control) { +#ifdef COMPAT_OLDSOCK + /* + * We assume that old recvmsg calls won't receive access + * rights and other control info, esp. as control info + * is always optional and those options didn't exist in 4.3. + * If we receive rights, trim the cmsghdr; anything else + * is tossed. + */ + if (control && mp->msg_flags & MSG_COMPAT) { + if (mtod(control, struct cmsghdr *)->cmsg_level != + SOL_SOCKET || + mtod(control, struct cmsghdr *)->cmsg_type != + SCM_RIGHTS) { + mp->msg_controllen = 0; + goto out; + } + control->m_len -= sizeof (struct cmsghdr); + control->m_data += sizeof (struct cmsghdr); + } +#endif + len = mp->msg_controllen; + if (len <= 0 || control == 0) + len = 0; + else { + struct mbuf *m = control; + caddr_t p = (caddr_t)mp->msg_control; + + do { + i = m->m_len; + if (len < i) { + mp->msg_flags |= MSG_CTRUNC; + i = len; + } + error = copyout(mtod(m, caddr_t), p, + (unsigned)i); + if (m->m_next) + i = ALIGN(i); + p += i; + len -= i; + if (error != 0 || len <= 0) + break; + } while ((m = m->m_next) != NULL); + len = p - (caddr_t)mp->msg_control; + } + mp->msg_controllen = len; + } +out: + if (from) + m_freem(from); + if (control) + m_freem(control); + return (error); +} + +/* ARGSUSED */ +#ifdef __ECOS +int +sys_shutdown(struct sys_shutdown_args *uap, register_t *retval) +{ +#else +int +sys_shutdown(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +{ + register struct sys_shutdown_args /* { + syscallarg(int) s; + syscallarg(int) how; + } */ *uap = v; +#endif // __ECOS + struct file *fp; + int error; + + if ((error = getsock(p->p_fd, SCARG(uap, s), &fp)) != 0) + return (error); + return (soshutdown((struct socket *)fp->f_data, SCARG(uap, how))); +} + +#ifdef __ECOS +int +sys_setsockopt(struct sys_setsockopt_args *uap, register_t *retval) +{ +#else +/* ARGSUSED */ +int +sys_setsockopt(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +{ + register struct sys_setsockopt_args /* { + syscallarg(int) s; + syscallarg(int) level; + syscallarg(int) name; + syscallarg(caddr_t) val; + syscallarg(socklen_t) valsize; + } */ *uap = v; +#endif // __ECOS + struct file *fp; + struct mbuf *m = NULL; + int error; + + if ((error = getsock(p->p_fd, SCARG(uap, s), &fp)) != 0) + return (error); + if (SCARG(uap, valsize) > MCLBYTES) + return (EINVAL); + if (SCARG(uap, val)) { + m = m_get(M_WAIT, MT_SOOPTS); + if (SCARG(uap, valsize) > MLEN) { + MCLGET(m, M_DONTWAIT); + if ((m->m_flags & M_EXT) == 0) { + m_freem(m); + return (ENOBUFS); + } + } + if (m == NULL) + return (ENOBUFS); + error = copyin(SCARG(uap, val), mtod(m, caddr_t), + SCARG(uap, valsize)); + if (error) { + (void) m_free(m); + return (error); + } + m->m_len = SCARG(uap, valsize); + } + return (sosetopt((struct socket *)fp->f_data, SCARG(uap, level), + SCARG(uap, name), m)); +} + +#ifdef __ECOS +int +sys_getsockopt(struct sys_getsockopt_args *uap, register_t *retval) +{ +#else +/* ARGSUSED */ +int +sys_getsockopt(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +{ + register struct sys_getsockopt_args /* { + syscallarg(int) s; + syscallarg(int) level; + syscallarg(int) name; + syscallarg(caddr_t) val; + syscallarg(socklen_t *) avalsize; + } */ *uap = v; +#endif // __ECOS + struct file *fp; + struct mbuf *m = NULL; + socklen_t valsize; + int error; + + if ((error = getsock(p->p_fd, SCARG(uap, s), &fp)) != 0) + return (error); + if (SCARG(uap, val)) { + error = copyin((caddr_t)SCARG(uap, avalsize), + (caddr_t)&valsize, sizeof (valsize)); + if (error) + return (error); + } else + valsize = 0; + if ((error = sogetopt((struct socket *)fp->f_data, SCARG(uap, level), + SCARG(uap, name), &m)) == 0 && SCARG(uap, val) && valsize && + m != NULL) { + if (valsize > m->m_len) + valsize = m->m_len; + error = copyout(mtod(m, caddr_t), SCARG(uap, val), valsize); + if (error == 0) + error = copyout((caddr_t)&valsize, + (caddr_t)SCARG(uap, avalsize), sizeof (valsize)); + } + if (m != NULL) + (void) m_free(m); + return (error); +} + +#ifndef __ECOS +int +sys_pipe(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +{ + register struct sys_pipe_args /* { + syscallarg(int *) fdp; + } */ *uap = v; + int error, fds[2]; + register_t rval[2]; + + if ((error = sys_opipe(p, v, rval)) == -1) + return (error); + + fds[0] = rval[0]; + fds[1] = rval[1]; + error = copyout((caddr_t)fds, (caddr_t)SCARG(uap, fdp), + 2 * sizeof (int)); + if (error) { + fdrelease(p, retval[0]); + fdrelease(p, retval[1]); + } + return (error); +} + +#ifdef OLD_PIPE + +/* ARGSUSED */ +int +sys_opipe(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +{ + register struct filedesc *fdp = p->p_fd; + struct file *rf, *wf; + struct socket *rso, *wso; + int fd, error; + + if ((error = socreate(AF_UNIX, &rso, SOCK_STREAM, 0)) != 0) + return (error); + if ((error = socreate(AF_UNIX, &wso, SOCK_STREAM, 0)) != 0) + goto free1; + if ((error = falloc(p, &rf, &fd)) != 0) + goto free2; + retval[0] = fd; + rf->f_flag = FREAD; + rf->f_type = DTYPE_SOCKET; + rf->f_ops = &socketops; + rf->f_data = (caddr_t)rso; + if ((error = falloc(p, &wf, &fd)) != 0) + goto free3; + wf->f_flag = FWRITE; + wf->f_type = DTYPE_SOCKET; + wf->f_ops = &socketops; + wf->f_data = (caddr_t)wso; + retval[1] = fd; + if ((error = unp_connect2(wso, rso)) != 0) + goto free4; + return (0); +free4: + ffree(wf); + fdremove(fdp, retval[1]); +free3: + ffree(rf); + fdremove(fdp, retval[0]); +free2: + (void)soclose(wso); +free1: + (void)soclose(rso); + return (error); +} +#endif +#endif // __ECOS + +/* + * Get socket name. + */ +#ifdef __ECOS +int +sys_getsockname(struct sys_getsockname_args *uap, register_t *retval) +{ +#else +/* ARGSUSED */ +int +sys_getsockname(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +{ + register struct sys_getsockname_args /* { + syscallarg(int) fdes; + syscallarg(caddr_t) asa; + syscallarg(socklen_t *) alen; + } */ *uap = v; +#endif // __ECOS + struct file *fp; + register struct socket *so; + struct mbuf *m; + socklen_t len; + int error; + + if ((error = getsock(p->p_fd, SCARG(uap, fdes), &fp)) != 0) + return (error); + error = copyin((caddr_t)SCARG(uap, alen), (caddr_t)&len, sizeof (len)); + if (error) + return (error); + so = (struct socket *)fp->f_data; + m = m_getclr(M_WAIT, MT_SONAME); + if (m == NULL) + return (ENOBUFS); + error = (*so->so_proto->pr_usrreq)(so, PRU_SOCKADDR, 0, m, 0); + if (error) + goto bad; + if (len > m->m_len) + len = m->m_len; + error = copyout(mtod(m, caddr_t), (caddr_t)SCARG(uap, asa), len); + if (error == 0) + error = copyout((caddr_t)&len, (caddr_t)SCARG(uap, alen), + sizeof (len)); +bad: + m_freem(m); + return (error); +} + +/* + * Get name of peer for connected socket. + */ +/* ARGSUSED */ +#ifdef __ECOS +int +sys_getpeername(struct sys_getpeername_args *uap, register_t *retval) +{ +#else +int +sys_getpeername(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +{ + register struct sys_getpeername_args /* { + syscallarg(int) fdes; + syscallarg(caddr_t) asa; + syscallarg(socklen_t *) alen; + } */ *uap = v; +#endif // __ECOS + struct file *fp; + register struct socket *so; + struct mbuf *m; + socklen_t len; + int error; + + if ((error = getsock(p->p_fd, SCARG(uap, fdes), &fp)) != 0) + return (error); + so = (struct socket *)fp->f_data; + if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) + return (ENOTCONN); + error = copyin((caddr_t)SCARG(uap, alen), (caddr_t)&len, sizeof (len)); + if (error) + return (error); + m = m_getclr(M_WAIT, MT_SONAME); + if (m == NULL) + return (ENOBUFS); + error = (*so->so_proto->pr_usrreq)(so, PRU_PEERADDR, 0, m, 0); + if (error) + goto bad; + if (len > m->m_len) + len = m->m_len; + error = copyout(mtod(m, caddr_t), (caddr_t)SCARG(uap, asa), len); + if (error == 0) + error = copyout((caddr_t)&len, (caddr_t)SCARG(uap, alen), + sizeof (len)); +bad: + m_freem(m); + return (error); +} + +int +sockargs(mp, buf, buflen, type) + struct mbuf **mp; + caddr_t buf; + socklen_t buflen; + int type; +{ + register struct sockaddr *sa; + register struct mbuf *m; + int error; + + if (buflen > MLEN) { +#ifdef COMPAT_OLDSOCK + if (type == MT_SONAME && buflen <= 112) + buflen = MLEN; /* unix domain compat. hack */ + else +#endif + return (EINVAL); + } + m = m_get(M_WAIT, type); + if (m == NULL) + return (ENOBUFS); + m->m_len = buflen; + error = copyin(buf, mtod(m, caddr_t), buflen); + if (error) { + (void) m_free(m); + return (error); + } + *mp = m; + if (type == MT_SONAME) { + sa = mtod(m, struct sockaddr *); + +#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN + if (sa->sa_family == 0 && sa->sa_len < AF_MAX) + sa->sa_family = sa->sa_len; +#endif + sa->sa_len = buflen; + } + return (0); +} + +#ifdef __ECOS +static int +ecos_getsock(int fdes, struct file **fpp) +{ + struct file *fp; + if (getfp(fdes, &fp)) + return (EBADF); + if (fp->f_type != DTYPE_SOCKET) + return (ENOTSOCK); + *fpp = fp; + return (0); +} +#else +int +getsock(fdp, fdes, fpp) + struct filedesc *fdp; + int fdes; + struct file **fpp; +{ + register struct file *fp; + + if ((unsigned)fdes >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fdes]) == NULL) + return (EBADF); + if (fp->f_type != DTYPE_SOCKET) + return (ENOTSOCK); + *fpp = fp; + return (0); +} +#endif diff --git a/ecos/packages/net/tcpip/current/src/sys/net/bridgestp.c b/ecos/packages/net/tcpip/current/src/sys/net/bridgestp.c new file mode 100644 index 0000000..e2c4afb --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/net/bridgestp.c @@ -0,0 +1,1511 @@ +//========================================================================== +// +// sys/net/bridgestp.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): Jason L. Wright (jason@thought.net) +// Contributors: Manu Sharma (manu.sharma@ascom.com) +// Date: 2000-07-18 +// Purpose: Implementation of the spanning tree protocol as defined in +// ISO/IEC Final DIS 15802-3 (IEEE P802.1D/D17), May 25, 1998. +// (In English: IEEE 802.1D, Draft 17, 1998) +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== +// +/* $OpenBSD: bridgestp.c,v 1.15 2002/12/10 13:22:55 markus Exp $ */ + +/* + * Copyright (c) 2000 Jason L. Wright (jason@thought.net) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Implementation of the spanning tree protocol as defined in + * ISO/IEC Final DIS 15802-3 (IEEE P802.1D/D17), May 25, 1998. + * (In English: IEEE 802.1D, Draft 17, 1998) + */ + +#ifdef __ECOS +#include <pkgconf/net.h> +#include <stdio.h> +#else +#include "bridge.h" +#endif + +#ifdef CYGPKG_NET_BRIDGE_STP_CODE +#if NBRIDGE + +#include <sys/param.h> +#ifndef __ECOS +#include <sys/systm.h> +#endif +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#ifndef __ECOS +#include <sys/device.h> +#endif +#include <sys/kernel.h> + +#include <net/if.h> +#include <net/if_types.h> +#include <net/if_llc.h> +#include <net/netisr.h> + +#ifdef INET +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/in_var.h> +#include <netinet/ip.h> +#include <netinet/if_ether.h> +#endif + +#if NBPFILTER > 0 +#include <net/bpf.h> +#endif + +#include <net/if_bridge.h> + +#define STP_DEBUG +#define STP_DETAILED_DEBUG +#ifdef STP_DEBUG +#define STPLOG(x) diag_printf x +#else +#define STPLOG(x) +#endif + +#ifdef STP_DETAILED_DEBUG +#define STP_OPERATION_LOG(x) diag_printf x +#else +#define STP_OPERATION_LOG(x) +#endif + +/* BPDU message types */ +#define BSTP_MSGTYPE_CFG 0x00 /* Configuration */ +#define BSTP_MSGTYPE_TCN 0x80 /* Topology chg notification */ + +/* BPDU flags */ +#define BSTP_FLAG_TC 0x01 /* Topology change */ +#define BSTP_FLAG_TCA 0x80 /* Topology change ack */ + +#define BSTP_MESSAGE_AGE_INCR (1 * 256) /* in 256ths of a second */ +#define BSTP_TICK_VAL (1 * 256) /* in 256ths of a second */ + +/* + * Because BPDU's do not make nicely aligned structures, two different + * declarations are used: bstp_?bpdu (wire representation, packed) and + * bstp_*_unit (internal, nicely aligned version). + */ + +/* configuration bridge protocol data unit */ +struct bstp_cbpdu { + u_int8_t cbu_dsap; /* LLC: destination sap */ + u_int8_t cbu_ssap; /* LLC: source sap */ + u_int8_t cbu_ctl; /* LLC: control */ + u_int16_t cbu_protoid; /* protocol id */ + u_int8_t cbu_protover; /* protocol version */ + u_int8_t cbu_bpdutype; /* message type */ + u_int8_t cbu_flags; /* flags (below) */ + + /* root id */ + u_int16_t cbu_rootpri; /* root priority */ + u_int8_t cbu_rootaddr[6]; /* root address */ + + u_int32_t cbu_rootpathcost; /* root path cost */ + + /* bridge id */ + u_int16_t cbu_bridgepri; /* bridge priority */ + u_int8_t cbu_bridgeaddr[6]; /* bridge address */ + + u_int16_t cbu_portid; /* port id */ + u_int16_t cbu_messageage; /* current message age */ + u_int16_t cbu_maxage; /* maximum age */ + u_int16_t cbu_hellotime; /* hello time */ + u_int16_t cbu_forwarddelay; /* forwarding delay */ +} __attribute__((__packed__)); + +/* topology change notification bridge protocol data unit */ +struct bstp_tbpdu { + u_int8_t tbu_dsap; /* LLC: destination sap */ + u_int8_t tbu_ssap; /* LLC: source sap */ + u_int8_t tbu_ctl; /* LLC: control */ + u_int16_t tbu_protoid; /* protocol id */ + u_int8_t tbu_protover; /* protocol version */ + u_int8_t tbu_bpdutype; /* message type */ +} __attribute__((__packed__)); + +u_int8_t bstp_etheraddr[] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; +u_int8_t bstp_init_done; + +void bstp_initialization(struct bridge_softc *); +void bstp_stop(struct bridge_softc *); +void bstp_initialize_port(struct bridge_softc *, struct bridge_iflist *); +void bstp_ifupdstatus(struct bridge_softc *, struct bridge_iflist *); +void bstp_enable_port(struct bridge_softc *, struct bridge_iflist *); +void bstp_disable_port(struct bridge_softc *, struct bridge_iflist *); +void bstp_enable_change_detection(struct bridge_iflist *); +void bstp_disable_change_detection(struct bridge_iflist *); +int bstp_root_bridge(struct bridge_softc *sc); +int bstp_supersedes_port_info(struct bridge_softc *, struct bridge_iflist *, + struct bstp_config_unit *); +int bstp_designated_port(struct bridge_softc *, struct bridge_iflist *); +int bstp_designated_for_some_port(struct bridge_softc *); +void bstp_transmit_config(struct bridge_softc *, struct bridge_iflist *); +void bstp_transmit_tcn(struct bridge_softc *); +struct mbuf *bstp_input(struct bridge_softc *, struct ifnet *, + struct ether_header *, struct mbuf *); +void bstp_received_config_bpdu(struct bridge_softc *, struct bridge_iflist *, + struct bstp_config_unit *); +void bstp_received_tcn_bpdu(struct bridge_softc *, struct bridge_iflist *, + struct bstp_tcn_unit *); +void bstp_record_config_information(struct bridge_softc *, + struct bridge_iflist *, struct bstp_config_unit *); +void bstp_record_config_timeout_values(struct bridge_softc *, + struct bstp_config_unit *); +void bstp_config_bpdu_generation(struct bridge_softc *); +void bstp_send_config_bpdu(struct bridge_iflist *, struct bstp_config_unit *); +void bstp_configuration_update(struct bridge_softc *); +void bstp_root_selection(struct bridge_softc *); +void bstp_designated_port_selection(struct bridge_softc *); +void bstp_become_designated_port(struct bridge_softc *, struct bridge_iflist *); +void bstp_port_state_selection(struct bridge_softc *); +void bstp_make_forwarding(struct bridge_softc *, struct bridge_iflist *); +void bstp_make_blocking(struct bridge_softc *, struct bridge_iflist *); +void bstp_set_port_state(struct bridge_iflist *, u_int8_t); +void bstp_set_bridge_priority(struct bridge_softc *, u_int64_t); +void bstp_set_port_priority(struct bridge_softc *, struct bridge_iflist *, + u_int16_t); +void bstp_set_path_cost(struct bridge_softc *, struct bridge_iflist *, + u_int32_t); +void bstp_topology_change_detection(struct bridge_softc *); +void bstp_topology_change_acknowledged(struct bridge_softc *); +void bstp_acknowledge_topology_change(struct bridge_softc *, + struct bridge_iflist *); + +void bstp_tick(void *); +void bstp_timer_start(struct bridge_timer *, u_int16_t); +void bstp_timer_stop(struct bridge_timer *); +int bstp_timer_expired(struct bridge_timer *, u_int16_t); + +void bstp_hold_timer_expiry(struct bridge_softc *, struct bridge_iflist *); +void bstp_message_age_timer_expiry(struct bridge_softc *, + struct bridge_iflist *); +void bstp_forward_delay_timer_expiry(struct bridge_softc *, + struct bridge_iflist *); +void bstp_topology_change_timer_expiry(struct bridge_softc *); +void bstp_tcn_timer_expiry(struct bridge_softc *); +void bstp_hello_timer_expiry(struct bridge_softc *); + +char stp_buf [32]; +char * +stp_port_state (unsigned int state) { + memset (stp_buf, 0x0, 32); + switch (state) { + case BSTP_IFSTATE_DISABLED : sprintf (stp_buf, " Disabled "); break; + case BSTP_IFSTATE_LISTENING : sprintf (stp_buf, " Listn'ng "); break; + case BSTP_IFSTATE_LEARNING : sprintf (stp_buf, " Lrn'ng "); break; + case BSTP_IFSTATE_FORWARDING : sprintf (stp_buf, " Fwd'ng "); break; + case BSTP_IFSTATE_BLOCKING : sprintf (stp_buf, " Blk'ng "); break; + default: sprintf (stp_buf, " UNKNOWN "); break; + } + return stp_buf; +} + +void +bstp_transmit_config(sc, bif) + struct bridge_softc *sc; + struct bridge_iflist *bif; +{ + if (bif->bif_hold_timer.active) { + bif->bif_config_pending = 1; + return; + } + + bif->bif_config_bpdu.cu_message_type = BSTP_MSGTYPE_CFG; + bif->bif_config_bpdu.cu_rootid = sc->sc_designated_root; + bif->bif_config_bpdu.cu_root_path_cost = sc->sc_root_path_cost; + bif->bif_config_bpdu.cu_bridge_id = sc->sc_bridge_id; + bif->bif_config_bpdu.cu_port_id = bif->bif_port_id; + + if (bstp_root_bridge(sc)) + bif->bif_config_bpdu.cu_message_age = 0; + else + bif->bif_config_bpdu.cu_message_age = + sc->sc_root_port->bif_message_age_timer.value + + BSTP_MESSAGE_AGE_INCR; + + bif->bif_config_bpdu.cu_max_age = sc->sc_max_age; + bif->bif_config_bpdu.cu_hello_time = sc->sc_hello_time; + bif->bif_config_bpdu.cu_forward_delay = sc->sc_forward_delay; + bif->bif_config_bpdu.cu_topology_change_acknowledgment + = bif->bif_topology_change_acknowledge; + bif->bif_config_bpdu.cu_topology_change = sc->sc_topology_change; + + if (bif->bif_config_bpdu.cu_message_age < sc->sc_max_age) { + bif->bif_topology_change_acknowledge = 0; + bif->bif_config_pending = 0; + bstp_send_config_bpdu(bif, &bif->bif_config_bpdu); + bstp_timer_start(&bif->bif_hold_timer, 0); + } +} + +void +bstp_send_config_bpdu(bif, cu) + struct bridge_iflist *bif; + struct bstp_config_unit *cu; +{ + struct arpcom *arp; + struct ifnet *ifp; + struct mbuf *m; + struct ether_header eh; + struct bstp_cbpdu bpdu; + int s, error; + + s = splimp(); + ifp = bif->ifp; + arp = (struct arpcom *)ifp; + + if ((ifp->if_flags & IFF_RUNNING) == 0) { + splx(s); + return; + } +#ifdef ALTQ + if (!ALTQ_IS_ENABLED(&ifp->if_snd)) +#endif + if (IF_QFULL(&ifp->if_snd)) { + splx(s); + return; + } + + MGETHDR(m, M_DONTWAIT, MT_DATA); + if (m == NULL) { + splx(s); + return; + } + m->m_pkthdr.rcvif = ifp; + m->m_pkthdr.len = sizeof(eh) + sizeof(bpdu); + m->m_len = m->m_pkthdr.len; + + bpdu.cbu_ssap = bpdu.cbu_dsap = LLC_8021D_LSAP; + bpdu.cbu_ctl = LLC_UI; + bpdu.cbu_protoid = htons(0); + bpdu.cbu_protover = 0; + bpdu.cbu_bpdutype = cu->cu_message_type; + bpdu.cbu_flags = (cu->cu_topology_change ? BSTP_FLAG_TC : 0) | + (cu->cu_topology_change_acknowledgment ? BSTP_FLAG_TCA : 0); + + bpdu.cbu_rootpri = htons(cu->cu_rootid >> 48); + bpdu.cbu_rootaddr[0] = cu->cu_rootid >> 40; + bpdu.cbu_rootaddr[1] = cu->cu_rootid >> 32; + bpdu.cbu_rootaddr[2] = cu->cu_rootid >> 24; + bpdu.cbu_rootaddr[3] = cu->cu_rootid >> 16; + bpdu.cbu_rootaddr[4] = cu->cu_rootid >> 8; + bpdu.cbu_rootaddr[5] = cu->cu_rootid >> 0; + + bpdu.cbu_rootpathcost = htonl(cu->cu_root_path_cost); + + bpdu.cbu_bridgepri = htons(cu->cu_rootid >> 48); + bpdu.cbu_bridgeaddr[0] = cu->cu_rootid >> 40; + bpdu.cbu_bridgeaddr[1] = cu->cu_rootid >> 32; + bpdu.cbu_bridgeaddr[2] = cu->cu_rootid >> 24; + bpdu.cbu_bridgeaddr[3] = cu->cu_rootid >> 16; + bpdu.cbu_bridgeaddr[4] = cu->cu_rootid >> 8; + bpdu.cbu_bridgeaddr[5] = cu->cu_rootid >> 0; + + bpdu.cbu_portid = htons(cu->cu_port_id); + bpdu.cbu_messageage = htons(cu->cu_message_age); + bpdu.cbu_maxage = htons(cu->cu_max_age); + bpdu.cbu_hellotime = htons(cu->cu_hello_time); + bpdu.cbu_forwarddelay = htons(cu->cu_forward_delay); + + bcopy(arp->ac_enaddr, eh.ether_shost, ETHER_ADDR_LEN); + bcopy(bstp_etheraddr, eh.ether_dhost, ETHER_ADDR_LEN); + eh.ether_type = htons(sizeof(bpdu)); + + bcopy(&eh, m->m_data, sizeof(eh)); + bcopy(&bpdu, m->m_data + sizeof(eh), sizeof(bpdu)); + + STPLOG ((" - <%s>: Tx with Port 0x%x; Age %d; Flags %d\n", bif->ifp->if_xname, + cu->cu_port_id, cu->cu_message_age, bpdu.cbu_flags)); + + IFQ_ENQUEUE(&ifp->if_snd, m, NULL, error); + if (error == 0 && (ifp->if_flags & IFF_OACTIVE) == 0) + (*ifp->if_start)(ifp); + splx(s); +} + +int +bstp_root_bridge(sc) + struct bridge_softc *sc; +{ + return (sc->sc_designated_root == sc->sc_bridge_id); +} + +int +bstp_supersedes_port_info(sc, bif, cu) + struct bridge_softc *sc; + struct bridge_iflist *bif; + struct bstp_config_unit *cu; +{ + if (cu->cu_rootid < bif->bif_designated_root) { + return (1); + } + if (cu->cu_rootid > bif->bif_designated_root) { + return (0); + } + + if (cu->cu_root_path_cost < bif->bif_designated_cost) { + return (1); + } + if (cu->cu_root_path_cost > bif->bif_designated_cost) { + return (0); + } + + if (cu->cu_bridge_id < bif->bif_designated_bridge) { + return (1); + } + if (cu->cu_bridge_id > bif->bif_designated_bridge) { + return (0); + } + + if (sc->sc_bridge_id != cu->cu_bridge_id) { + return (1); + } + if (cu->cu_port_id <= bif->bif_designated_port) { + return (1); + } + return (0); +} + +void +bstp_record_config_information(sc, bif, cu) + struct bridge_softc *sc; + struct bridge_iflist *bif; + struct bstp_config_unit *cu; +{ + bif->bif_designated_root = cu->cu_rootid; + bif->bif_designated_cost = cu->cu_root_path_cost; + bif->bif_designated_bridge = cu->cu_bridge_id; + bif->bif_designated_port = cu->cu_port_id; + bstp_timer_start(&bif->bif_message_age_timer, cu->cu_message_age); +} + +void +bstp_record_config_timeout_values(sc, config) + struct bridge_softc *sc; + struct bstp_config_unit *config; +{ + sc->sc_max_age = config->cu_max_age; + sc->sc_hello_time = config->cu_hello_time; + sc->sc_forward_delay = config->cu_forward_delay; + sc->sc_topology_change = config->cu_topology_change; +} + +void +bstp_config_bpdu_generation(sc) + struct bridge_softc *sc; +{ + struct bridge_iflist *bif; + + STPLOG (("STP : Tx Hello BPDU ...\n")); + LIST_FOREACH(bif, &sc->sc_iflist, next) { + if (!(bif->bif_flags & IFBIF_STP)) + continue; + if (bstp_designated_port(sc, bif) && + (bif->bif_state != BSTP_IFSTATE_DISABLED)) { + bstp_transmit_config(sc, bif); + } else { + STPLOG ((" - <%s>: No Tx\n", bif->ifp->if_xname)); + } + } +} + +int +bstp_designated_port(sc, bif) + struct bridge_softc *sc; + struct bridge_iflist *bif; +{ + return ((bif->bif_designated_bridge == sc->sc_bridge_id) + && (bif->bif_designated_port == bif->bif_port_id)); +} + +void +bstp_transmit_tcn(sc) + struct bridge_softc *sc; +{ + struct bstp_tbpdu bpdu; + struct bridge_iflist *bif = sc->sc_root_port; + struct ifnet *ifp = bif->ifp; + struct arpcom *arp = (struct arpcom *)ifp; + struct ether_header *eh; + struct mbuf *m; + int s, error; + + if ((ifp->if_flags & IFF_RUNNING) == 0) + return; + + MGETHDR(m, M_DONTWAIT, MT_DATA); + if (m == NULL) + return; + + STPLOG (("STP : Tx TCN BPDU on <%s> ...\n", ifp->if_xname)); + + m->m_pkthdr.rcvif = ifp; + m->m_pkthdr.len = sizeof(*eh) + sizeof(bpdu); + m->m_len = m->m_pkthdr.len; + + eh = mtod(m, struct ether_header *); + bcopy(arp->ac_enaddr, eh->ether_shost, ETHER_ADDR_LEN); + bcopy(bstp_etheraddr, eh->ether_dhost, ETHER_ADDR_LEN); + eh->ether_type = htons(sizeof(bpdu)); + + bpdu.tbu_ssap = bpdu.tbu_dsap = LLC_8021D_LSAP; + bpdu.tbu_ctl = LLC_UI; + bpdu.tbu_protoid = 0; + bpdu.tbu_protover = 0; + bpdu.tbu_bpdutype = BSTP_MSGTYPE_TCN; + bcopy(&bpdu, m->m_data + sizeof(*eh), sizeof(bpdu)); + + s = splimp(); + IFQ_ENQUEUE(&ifp->if_snd, m, NULL, error); + if (error == 0 && (ifp->if_flags & IFF_OACTIVE) == 0) + (*ifp->if_start)(ifp); + m = NULL; + + splx(s); + if (m != NULL) + m_freem(m); +} + +void +bstp_configuration_update(sc) + struct bridge_softc *sc; +{ + STP_OPERATION_LOG ((" %s \n", __FUNCTION__)); + bstp_root_selection(sc); + bstp_designated_port_selection(sc); +} + +void +bstp_root_selection(sc) + struct bridge_softc *sc; +{ + struct bridge_iflist *root_port = NULL, *bif; + + STP_OPERATION_LOG ((" => Root Port selection ...\n", __FUNCTION__)); + LIST_FOREACH(bif, &sc->sc_iflist, next) { + STP_OPERATION_LOG ((" - <%s>", bif->ifp->if_xname)); + if (!(bif->bif_flags & IFBIF_STP)) { + STP_OPERATION_LOG ((" STP not configured\n")); + continue; + } + if (bstp_designated_port(sc, bif)) { + STP_OPERATION_LOG ((" dsgntd port\n")); + continue; + } + if (bif->bif_state == BSTP_IFSTATE_DISABLED) { + STP_OPERATION_LOG ((" .. in state DISABLED \n")); + continue; + } + if (bif->bif_designated_root >= sc->sc_bridge_id) { + STP_OPERATION_LOG ((" root: ifp dsgntd >= sc\n")); + continue; + } + if (root_port == NULL) { + STP_OPERATION_LOG ((" Set as Root Port\n")); + goto set_port; + } + + if (bif->bif_designated_root < root_port->bif_designated_root) { + STP_OPERATION_LOG ((" ifp dsgntd < root dsgntd; Set as Root Port \n")); + goto set_port; + } + if (bif->bif_designated_root > root_port->bif_designated_root) { + STP_OPERATION_LOG ((" root: ifp dsgntd > root dsgntd \n")); + continue; + } + + if ((bif->bif_designated_cost + bif->bif_path_cost) < + (root_port->bif_designated_cost + root_port->bif_path_cost)) { + STP_OPERATION_LOG ((" (dsgntd_cost+path_cost): ifp < root; Set as Root Port \n")); + goto set_port; + } + if ((bif->bif_designated_cost + bif->bif_path_cost) > + (root_port->bif_designated_cost + root_port->bif_path_cost)) { + STP_OPERATION_LOG ((" (dsgntd_cost+path_cost): ifp > root \n")); + continue; + } + + if (bif->bif_designated_bridge < root_port->bif_designated_bridge) { + STP_OPERATION_LOG ((" bridge: ifp dsgntd < root dsgntd; Set as Root Port \n")); + goto set_port; + } + if (bif->bif_designated_bridge > root_port->bif_designated_bridge) { + STP_OPERATION_LOG ((" bridge: ifp dsgntd > root dsgntd \n")); + continue; + } + + if (bif->bif_designated_port < root_port->bif_designated_port) { + STP_OPERATION_LOG ((" port: ifp dsgntd (%d) < root dsgntd (%d); Set as Root Port\n", + bif->bif_designated_port, root_port->bif_designated_port)); + goto set_port; + } + if (bif->bif_designated_port > root_port->bif_designated_port) { + STP_OPERATION_LOG ((" port: ifp dsgntd (%d) > root dsgntd (%d)\n", + bif->bif_designated_port, root_port->bif_designated_port)); + continue; + } + + if (bif->bif_port_id >= root_port->bif_port_id) { + STP_OPERATION_LOG ((" ifp port_id (%d) >= root port_id (%d) 8 \n", + bif->bif_port_id, root_port->bif_port_id)); + continue; + } +set_port: + root_port = bif; + } + + sc->sc_root_port = root_port; + if (root_port == NULL) { + sc->sc_designated_root = sc->sc_bridge_id; + sc->sc_root_path_cost = 0; + STPLOG ((" Root Port : <SELF ROOT>, Path Cost : 0\n")); + } else { + sc->sc_designated_root = root_port->bif_designated_root; + sc->sc_root_path_cost = root_port->bif_designated_cost + + root_port->bif_path_cost; + STPLOG ((" Root Port : <%s>, Path Cost : 0\n", + root_port->ifp->if_xname, sc->sc_root_path_cost)); + } +} + +void +bstp_designated_port_selection(sc) + struct bridge_softc *sc; +{ + struct bridge_iflist *bif; + + STP_OPERATION_LOG ((" => Designated Port selection ...\n")); + LIST_FOREACH(bif, &sc->sc_iflist, next) { + if (!(bif->bif_flags & IFBIF_STP)) + continue; + if (bstp_designated_port(sc, bif)) { + STPLOG ((" - <%s>: already designated \n", bif->ifp->if_xname)); + goto designated; + } + if (bif->bif_designated_root != sc->sc_designated_root) { + STPLOG ((" - <%s>: becomes designated\n", bif->ifp->if_xname)); + goto designated; + } + + /* + * PLC network is special, we cannot go as per standards because of + * our network topology and behavior + * + if (sc->sc_root_path_cost < bif->bif_designated_cost) { + STPLOG ((" - <%s>: becomes designated path cost (1)\n", bif->ifp->if_xname)); + goto designated; + } + if (sc->sc_root_path_cost > bif->bif_designated_cost) { + STPLOG ((" - <%s>: Cannot be designated; Root cost (%d) > Desig cost (%d) \n", + bif->ifp->if_xname, sc->sc_root_path_cost, bif->bif_designated_cost)); + continue; + } + */ + + if (sc->sc_bridge_id < bif->bif_designated_bridge) { + STPLOG ((" - <%s>: becomes dsgntd (self-brdgid < dsgntd-brdgid); \n", bif->ifp->if_xname)); + goto designated; + } + if (sc->sc_bridge_id > bif->bif_designated_bridge) { + STPLOG ((" - <%s>: Cannot be dsgntd (self-brdgid > dsgntd-brdgid); \n", bif->ifp->if_xname)); + continue; + } + + if (bif->bif_port_id > bif->bif_designated_port) { + STPLOG ((" - <%s>: Cannot be dsgntd (self-portid > bsgntd-portid); \n", bif->ifp->if_xname)); + continue; + } + + /* + * PLC network is special, we cannot go as per standards because of + * our network topology and behavior + */ + if (sc->sc_root_path_cost < bif->bif_designated_cost) { + STPLOG ((" - <%s>: becomes dsgntd path cost (1)\n", bif->ifp->if_xname)); + goto designated; + } + if (sc->sc_root_path_cost > bif->bif_designated_cost) { + STPLOG ((" - <%s>: Cannot be dsgntd; Root cost (%d) > Desig cost (%d) \n", + bif->ifp->if_xname, sc->sc_root_path_cost, bif->bif_designated_cost)); + continue; + } +designated: + bstp_become_designated_port(sc, bif); + } +} + +void +bstp_become_designated_port(sc, bif) + struct bridge_softc *sc; + struct bridge_iflist *bif; +{ + bif->bif_designated_root = sc->sc_designated_root; + bif->bif_designated_cost = sc->sc_root_path_cost; + bif->bif_designated_bridge = sc->sc_bridge_id; + bif->bif_designated_port = bif->bif_port_id; + + STP_OPERATION_LOG ((" => %s values (cost:%d, port:0x%x)\n", + __FUNCTION__, bif->ifp->if_xname, bif->bif_designated_cost, bif->bif_designated_port)); + //STP_OPERATION_LOG (("(cost:%d, port:0x%x)\n", bif->bif_designated_cost, bif->bif_designated_port)); +} + +void +bstp_port_state_selection(sc) + struct bridge_softc *sc; +{ + struct bridge_iflist *bif; + + LIST_FOREACH(bif, &sc->sc_iflist, next) { + if (!(bif->bif_flags & IFBIF_STP)) + continue; + if (bif == sc->sc_root_port) { + bif->bif_config_pending = 0; + bif->bif_topology_change_acknowledge = 0; + bstp_make_forwarding(sc, bif); + } else if (bstp_designated_port(sc, bif)) { + bstp_timer_stop(&bif->bif_message_age_timer); + bstp_make_forwarding(sc, bif); + } else { + bif->bif_config_pending = 0; + bif->bif_topology_change_acknowledge = 0; + bstp_make_blocking(sc, bif); + } + } +} + +void +bstp_make_forwarding(sc, bif) + struct bridge_softc *sc; + struct bridge_iflist *bif; +{ + if (bif->bif_state == BSTP_IFSTATE_BLOCKING) { + bstp_set_port_state(bif, BSTP_IFSTATE_LISTENING); + bstp_timer_start(&bif->bif_forward_delay_timer, 0); + } +} + +void +bstp_make_blocking(sc, bif) + struct bridge_softc *sc; + struct bridge_iflist *bif; +{ + if ((bif->bif_state != BSTP_IFSTATE_DISABLED) && + (bif->bif_state != BSTP_IFSTATE_BLOCKING)) { + if ((bif->bif_state == BSTP_IFSTATE_FORWARDING) || + (bif->bif_state == BSTP_IFSTATE_LEARNING)) { + if (bif->bif_change_detection_enabled) { + bstp_topology_change_detection(sc); + } + bridge_rtdelete(sc, bif->ifp, 1); + } + bstp_set_port_state(bif, BSTP_IFSTATE_BLOCKING); + bstp_timer_stop(&bif->bif_forward_delay_timer); + } +} + +void +bstp_set_port_state(bif, state) + struct bridge_iflist *bif; + u_int8_t state; +{ + bif->bif_state = state; +} + +void +bstp_topology_change_detection(sc) + struct bridge_softc *sc; +{ + if (bstp_root_bridge(sc)) { + STPLOG ((" -> %s : Root Bridge\n", __FUNCTION__)); + sc->sc_topology_change = 1; + bstp_timer_start(&sc->sc_topology_change_timer, 0); + } else if (!sc->sc_topology_change_detected) { + STPLOG ((" -> %s : NOT Root Bridge, Tx TCN\n", __FUNCTION__)); + bstp_transmit_tcn(sc); + bstp_timer_start(&sc->sc_tcn_timer, 0); + } + sc->sc_topology_change_detected = 1; +} + +void +bstp_topology_change_acknowledged(sc) + struct bridge_softc *sc; +{ + sc->sc_topology_change_detected = 0; + bstp_timer_stop(&sc->sc_tcn_timer); +} + +void +bstp_acknowledge_topology_change(sc, bif) + struct bridge_softc *sc; + struct bridge_iflist *bif; +{ + bif->bif_topology_change_acknowledge = 1; + bstp_transmit_config(sc, bif); +} + +struct mbuf * +bstp_input(sc, ifp, eh, m) + struct bridge_softc *sc; + struct ifnet *ifp; + struct ether_header *eh; + struct mbuf *m; +{ + struct bridge_iflist *bif = NULL; + struct bstp_tbpdu tpdu; + struct bstp_cbpdu cpdu; + struct bstp_config_unit cu; + struct bstp_tcn_unit tu; + u_int16_t len; + + LIST_FOREACH(bif, &sc->sc_iflist, next) { + if (!(bif->bif_flags & IFBIF_STP)) + continue; + if (bif->ifp == ifp) + break; + } + + if (bif == NULL) + goto out; + + len = ntohs(eh->ether_type); + if (len < sizeof(tpdu)) { + goto out; + } + if (m->m_pkthdr.len > len) + m_adj(m, len - m->m_pkthdr.len); + if ((m = m_pullup(m, sizeof(tpdu))) == NULL) { + goto out; + } + bcopy(mtod(m, struct tpdu *), &tpdu, sizeof(tpdu)); + + if (tpdu.tbu_dsap != LLC_8021D_LSAP || + tpdu.tbu_ssap != LLC_8021D_LSAP || + tpdu.tbu_ctl != LLC_UI) { + goto out; + } + if (tpdu.tbu_protoid != 0 || tpdu.tbu_protover != 0) { + goto out; + } + + switch (tpdu.tbu_bpdutype) { + case BSTP_MSGTYPE_TCN: + + STPLOG (("STP : Rx TCN on <%s> from %02X:%02X:%02X:%02X:%02X:%02X \n", bif->ifp->if_xname, + eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2], + eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5])); + + tu.tu_message_type = tpdu.tbu_bpdutype; + bstp_received_tcn_bpdu(sc, bif, &tu); + break; + + case BSTP_MSGTYPE_CFG: + + STPLOG (("STP : Rx CFG on <%s> from %02X:%02X:%02X:%02X:%02X:%02X \n", bif->ifp->if_xname, + eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2], + eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5])); + + if ((m = m_pullup(m, sizeof(cpdu))) == NULL) + goto out; + bcopy(mtod(m, struct bstp_cpdu *), &cpdu, sizeof(cpdu)); + + cu.cu_rootid = + (((u_int64_t)ntohs(cpdu.cbu_rootpri)) << 48) | + (((u_int64_t)cpdu.cbu_rootaddr[0]) << 40) | + (((u_int64_t)cpdu.cbu_rootaddr[1]) << 32) | + (((u_int64_t)cpdu.cbu_rootaddr[2]) << 24) | + (((u_int64_t)cpdu.cbu_rootaddr[3]) << 16) | + (((u_int64_t)cpdu.cbu_rootaddr[4]) << 8) | + (((u_int64_t)cpdu.cbu_rootaddr[5]) << 0); + + cu.cu_bridge_id = + (((u_int64_t)ntohs(cpdu.cbu_bridgepri)) << 48) | + (((u_int64_t)cpdu.cbu_bridgeaddr[0]) << 40) | + (((u_int64_t)cpdu.cbu_bridgeaddr[1]) << 32) | + (((u_int64_t)cpdu.cbu_bridgeaddr[2]) << 24) | + (((u_int64_t)cpdu.cbu_bridgeaddr[3]) << 16) | + (((u_int64_t)cpdu.cbu_bridgeaddr[4]) << 8) | + (((u_int64_t)cpdu.cbu_bridgeaddr[5]) << 0); + + cu.cu_root_path_cost = ntohl(cpdu.cbu_rootpathcost); + cu.cu_message_age = ntohs(cpdu.cbu_messageage); + cu.cu_max_age = ntohs(cpdu.cbu_maxage); + cu.cu_hello_time = ntohs(cpdu.cbu_hellotime); + cu.cu_forward_delay = ntohs(cpdu.cbu_forwarddelay); + cu.cu_port_id = ntohs(cpdu.cbu_portid); + cu.cu_message_type = cpdu.cbu_bpdutype; + cu.cu_topology_change_acknowledgment = + (cpdu.cbu_flags & BSTP_FLAG_TCA) ? 1 : 0; + cu.cu_topology_change = + (cpdu.cbu_flags & BSTP_FLAG_TC) ? 1 : 0; + bstp_received_config_bpdu(sc, bif, &cu); + break; + default: + goto out; + } + +out: + if (m) + m_freem(m); + return (NULL); +} + +void +bstp_received_config_bpdu(sc, bif, cu) + struct bridge_softc *sc; + struct bridge_iflist *bif; + struct bstp_config_unit *cu; +{ + int root; + + root = bstp_root_bridge(sc); + + if (bif->bif_state != BSTP_IFSTATE_DISABLED) { + if (bstp_supersedes_port_info(sc, bif, cu)) { + STPLOG ((" Rx CFG on <%s> superseds port info; RE-Evaluating...\n", bif->ifp->if_xname)); + bstp_record_config_information(sc, bif, cu); + bstp_configuration_update(sc); + bstp_port_state_selection(sc); + + if ((!bstp_root_bridge(sc)) && root) { + bstp_timer_stop(&sc->sc_hello_timer); + + if (sc->sc_topology_change_detected) { + bstp_timer_stop(&sc->sc_topology_change_timer); + bstp_transmit_tcn(sc); + bstp_timer_start(&sc->sc_tcn_timer, 0); + } + } + + if (bif == sc->sc_root_port) { + bstp_record_config_timeout_values(sc, cu); + bstp_config_bpdu_generation(sc); + + if (cu->cu_topology_change_acknowledgment) { + bstp_topology_change_acknowledged(sc); + } + } + } else if (bstp_designated_port(sc, bif)) { + STPLOG ((" Port <%s> supersedes Rx CFG info\n", bif->ifp->if_xname)); + bstp_transmit_config(sc, bif); + } + } +} + +void +bstp_received_tcn_bpdu(sc, bif, tcn) + struct bridge_softc *sc; + struct bridge_iflist *bif; + struct bstp_tcn_unit *tcn; +{ + if (bif->bif_state != BSTP_IFSTATE_DISABLED && + bstp_designated_port(sc, bif)) { + bstp_topology_change_detection(sc); + bstp_acknowledge_topology_change(sc, bif); + } +} + +void +bstp_hello_timer_expiry(sc) + struct bridge_softc *sc; +{ + bstp_config_bpdu_generation(sc); + bstp_timer_start(&sc->sc_hello_timer, 0); +} + +void +bstp_message_age_timer_expiry(sc, bif) + struct bridge_softc *sc; + struct bridge_iflist *bif; +{ + int root; + + root = bstp_root_bridge(sc); + bstp_become_designated_port(sc, bif); + bstp_configuration_update(sc); + bstp_port_state_selection(sc); + + if ((bstp_root_bridge(sc)) && (!root)) { + sc->sc_max_age = sc->sc_bridge_max_age; + sc->sc_hello_time = sc->sc_bridge_hello_time; + sc->sc_forward_delay = sc->sc_bridge_forward_delay; + bstp_topology_change_detection(sc); + bstp_timer_stop(&sc->sc_tcn_timer); + bstp_config_bpdu_generation(sc); + bstp_timer_start(&sc->sc_hello_timer, 0); + } +} + +void +bstp_forward_delay_timer_expiry(sc, bif) + struct bridge_softc *sc; + struct bridge_iflist *bif; +{ + if (bif->bif_state == BSTP_IFSTATE_LISTENING) { + bstp_set_port_state(bif, BSTP_IFSTATE_LEARNING); + bstp_timer_start(&bif->bif_forward_delay_timer, 0); + } else if (bif->bif_state == BSTP_IFSTATE_LEARNING) { + bstp_set_port_state(bif, BSTP_IFSTATE_FORWARDING); + if (bstp_designated_for_some_port(sc) && + bif->bif_change_detection_enabled) + bstp_topology_change_detection(sc); + } +} + +int +bstp_designated_for_some_port(sc) + struct bridge_softc *sc; +{ + + struct bridge_iflist *bif; + + LIST_FOREACH(bif, &sc->sc_iflist, next) { + if (!(bif->bif_flags & IFBIF_STP)) + continue; + if (bif->bif_designated_bridge == sc->sc_bridge_id) + return (1); + } + return (0); +} + +void +bstp_tcn_timer_expiry(sc) + struct bridge_softc *sc; +{ + STPLOG ((" STP: tcn timer expired \n")); + bstp_transmit_tcn(sc); + bstp_timer_start(&sc->sc_tcn_timer, 0); +} + +void +bstp_topology_change_timer_expiry(sc) + struct bridge_softc *sc; +{ + STPLOG ((" STP: topology change timer expired \n")); + sc->sc_topology_change_detected = 0; + sc->sc_topology_change = 0; +} + +void +bstp_hold_timer_expiry(sc, bif) + struct bridge_softc *sc; + struct bridge_iflist *bif; +{ + if (bif->bif_config_pending) + bstp_transmit_config(sc, bif); +} + +void +bstp_initialization(sc) + struct bridge_softc *sc; +{ + struct bridge_iflist *bif, *mif; + struct arpcom *ac, *mac; + + mif = NULL; mac = NULL; + /* Browse through the ethernet address of each interface and pick the + * one which has lowest value + */ + LIST_FOREACH(bif, &sc->sc_iflist, next) { + if (!(bif->bif_flags & IFBIF_STP)) { + continue; + } + if (bif->ifp->if_type != IFT_ETHER) { + continue; + } + bif->bif_port_id = (bif->bif_priority << 8) | + (bif->ifp->if_index & 0xff); + + if (mif == NULL) { + mif = bif; + mac = (struct arpcom *)bif->ifp; + continue; + } + ac = (struct arpcom *)bif->ifp; + if (memcmp(ac->ac_enaddr, mac->ac_enaddr, ETHER_ADDR_LEN) < 0) { + mif = bif; + mac = (struct arpcom *)bif->ifp; + continue; + } + } + + if (mif == NULL) { + bstp_stop(sc); + return; + } + + /* Configure bridge-id as specified in standards + */ + sc->sc_bridge_id = + (((u_int64_t)sc->sc_bridge_priority) << 48) | + (((u_int64_t)mac->ac_enaddr[0]) << 40) | + (((u_int64_t)mac->ac_enaddr[1]) << 32) | + (mac->ac_enaddr[2] << 24) | (mac->ac_enaddr[3] << 16) | + (mac->ac_enaddr[4] << 8) | (mac->ac_enaddr[5]); + + /* Configure self as root bridge + */ + sc->sc_designated_root = sc->sc_bridge_id; + sc->sc_root_path_cost = 0; + sc->sc_root_port = NULL; + + sc->sc_max_age = sc->sc_bridge_max_age; + sc->sc_hello_time = sc->sc_bridge_hello_time; + sc->sc_forward_delay = sc->sc_bridge_forward_delay; + sc->sc_topology_change_detected = 0; + sc->sc_topology_change = 0; + bstp_timer_stop(&sc->sc_tcn_timer); + bstp_timer_stop(&sc->sc_topology_change_timer); + + /* If there is a timeout already set on this, cancel it and restart. + * The intent is to avoid setting duplicate timeouts. If the timeout + * is already set, untimeout would cancel it. + */ +#ifdef __ECOS + if (!bstp_init_done) { + untimeout(bstp_tick, sc); + timeout(bstp_tick, sc, hz); + bstp_init_done = 1; + } +#else + if (!timeout_initialized(&sc->sc_bstptimeout)) + timeout_set(&sc->sc_bstptimeout, bstp_tick, sc); + if (!timeout_pending(&sc->sc_bstptimeout)) + timeout_add(&sc->sc_bstptimeout, hz); +#endif // __ECOS + + LIST_FOREACH(bif, &sc->sc_iflist, next) { + if (bif->bif_flags & IFBIF_STP) { + STPLOG (("%s:%s ..(en). \n", __FUNCTION__, bif->ifp->if_xname)); + bstp_enable_port(sc, bif); + } + else { + STPLOG (("%s:%s ..(dis). \n", __FUNCTION__, bif->ifp->if_xname)); + bstp_disable_port(sc, bif); + } + } + + bstp_port_state_selection(sc); + bstp_config_bpdu_generation(sc); + bstp_timer_start(&sc->sc_hello_timer, 0); +} + +void +bstp_stop(sc) + struct bridge_softc *sc; +{ + + struct bridge_iflist *bif; + + STPLOG ((" %s\n", __FUNCTION__)); + LIST_FOREACH(bif, &sc->sc_iflist, next) { + bstp_set_port_state(bif, BSTP_IFSTATE_DISABLED); + bstp_timer_stop(&bif->bif_hold_timer); + bstp_timer_stop(&bif->bif_message_age_timer); + bstp_timer_stop(&bif->bif_forward_delay_timer); + } + +#ifdef __ECOS + if (bstp_init_done) { + STPLOG (("##### untimeout ##### loc 1\n")); + untimeout(bstp_tick, sc); + bstp_init_done = 0; + } +#else + if (timeout_initialized(&sc->sc_bstptimeout) && + timeout_pending(&sc->sc_bstptimeout)) + timeout_del(&sc->sc_bstptimeout); +#endif //__ECOS + + bstp_timer_stop(&sc->sc_topology_change_timer); + bstp_timer_stop(&sc->sc_tcn_timer); + bstp_timer_stop(&sc->sc_hello_timer); + +} + +void +bstp_initialize_port(sc, bif) + struct bridge_softc *sc; + struct bridge_iflist *bif; +{ + bstp_become_designated_port(sc, bif); + bstp_set_port_state(bif, BSTP_IFSTATE_BLOCKING); + bif->bif_topology_change_acknowledge = 0; + bif->bif_config_pending = 0; + bstp_enable_change_detection(bif); + bstp_timer_stop(&bif->bif_message_age_timer); + bstp_timer_stop(&bif->bif_forward_delay_timer); + bstp_timer_stop(&bif->bif_hold_timer); +} + +void +bstp_enable_port(sc, bif) + struct bridge_softc *sc; + struct bridge_iflist *bif; +{ + bstp_initialize_port(sc, bif); + bstp_port_state_selection(sc); +} + +void +bstp_disable_port(sc, bif) + struct bridge_softc *sc; + struct bridge_iflist *bif; +{ + int root; + + root = bstp_root_bridge(sc); + bstp_become_designated_port(sc, bif); + bstp_set_port_state(bif, BSTP_IFSTATE_DISABLED); + bif->bif_topology_change_acknowledge = 0; + bif->bif_config_pending = 0; + bstp_timer_stop(&bif->bif_message_age_timer); + bstp_timer_stop(&bif->bif_forward_delay_timer); + bstp_configuration_update(sc); + bridge_rtdelete(sc, bif->ifp, 1); + + if (bstp_root_bridge(sc) && (!root)) { + sc->sc_max_age = sc->sc_bridge_max_age; + sc->sc_hello_time = sc->sc_bridge_hello_time; + sc->sc_forward_delay = sc->sc_bridge_forward_delay; + bstp_topology_change_detection(sc); + bstp_timer_stop(&sc->sc_tcn_timer); + bstp_config_bpdu_generation(sc); + bstp_timer_start(&sc->sc_hello_timer, 0); + } +} + +void +bstp_set_bridge_priority(sc, new_bridge_id) + struct bridge_softc *sc; + u_int64_t new_bridge_id; +{ + int root; + struct bridge_iflist *bif; + + root = bstp_root_bridge(sc); + + LIST_FOREACH(bif, &sc->sc_iflist, next) { + if (!(bif->bif_flags & IFBIF_STP)) + continue; + if (bstp_designated_port(sc, bif)) + bif->bif_designated_bridge = new_bridge_id; + } + + sc->sc_bridge_id = new_bridge_id; + + bstp_configuration_update(sc); + bstp_port_state_selection(sc); + + if (bstp_root_bridge(sc) && (!root)) { + sc->sc_max_age = sc->sc_bridge_max_age; + sc->sc_hello_time = sc->sc_bridge_hello_time; + sc->sc_forward_delay = sc->sc_bridge_forward_delay; + bstp_topology_change_detection(sc); + bstp_timer_stop(&sc->sc_tcn_timer); + bstp_config_bpdu_generation(sc); + bstp_timer_start(&sc->sc_hello_timer, 0); + } +} + +void +bstp_set_port_priority(sc, bif, new_port_id) + struct bridge_softc *sc; + struct bridge_iflist *bif; + u_int16_t new_port_id; +{ + if (bstp_designated_port(sc, bif)) + bif->bif_designated_port = new_port_id; + + bif->bif_port_id = new_port_id; + + if ((sc->sc_bridge_id == bif->bif_designated_bridge) && + (bif->bif_port_id < bif->bif_designated_port)) { + bstp_become_designated_port(sc, bif); + bstp_port_state_selection(sc); + } +} + +void +bstp_set_path_cost(sc, bif, path_cost) + struct bridge_softc *sc; + struct bridge_iflist *bif; + u_int32_t path_cost; +{ + bif->bif_path_cost = path_cost; + bstp_configuration_update(sc); + bstp_port_state_selection(sc); +} + +void +bstp_enable_change_detection(bif) + struct bridge_iflist *bif; +{ + bif->bif_change_detection_enabled = 1; +} + +void +bstp_disable_change_detection(bif) + struct bridge_iflist *bif; +{ + bif->bif_change_detection_enabled = 0; +} + +void +bstp_ifupdstatus(sc, bif) + struct bridge_softc *sc; + struct bridge_iflist *bif; +{ + struct ifnet *ifp = bif->ifp; +#ifndef __ECOS + struct ifmediareq ifmr; +#endif + int err = EOPNOTSUPP; + + if (ifp->if_flags & IFF_UP) { +#ifndef __ECOS + ifmr.ifm_count = 0; + err = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (caddr_t)&ifmr); +#endif + if (err) { + if (bif->bif_state == BSTP_IFSTATE_DISABLED) + bstp_enable_port(sc, bif); + STPLOG (("<%s:%s> ", bif->ifp->if_xname, stp_port_state(bif->bif_state))); + return; + } +#ifndef __ECOS + if (!(ifmr.ifm_status & IFM_AVALID)) { + if (bif->bif_state == BSTP_IFSTATE_DISABLED) + bstp_enable_port(sc, bif); + return; + } + + if (ifmr.ifm_status & IFM_ACTIVE) { + if (bif->bif_state == BSTP_IFSTATE_DISABLED) + bstp_enable_port(sc, bif); + return; + } +#endif + + if (bif->bif_state != BSTP_IFSTATE_DISABLED) { + bstp_disable_port(sc, bif); + } + + return; + } + + if (bif->bif_state != BSTP_IFSTATE_DISABLED) + bstp_disable_port(sc, bif); +} + +void +bstp_tick(vsc) + void *vsc; +{ + struct bridge_softc *sc = vsc; + struct bridge_iflist *bif; + int s; + + s = splnet(); + + STPLOG (("Port Status: ")); + LIST_FOREACH(bif, &sc->sc_iflist, next) { + if (!(bif->bif_flags & IFBIF_STP)) + continue; + bstp_ifupdstatus(sc, bif); + } + STPLOG (("\n")); + + if (bstp_timer_expired(&sc->sc_hello_timer, sc->sc_hello_time)) + bstp_hello_timer_expiry(sc); + + if (bstp_timer_expired(&sc->sc_tcn_timer, sc->sc_bridge_hello_time)) + bstp_tcn_timer_expiry(sc); + + if (bstp_timer_expired(&sc->sc_topology_change_timer, + sc->sc_topology_change_time)) + bstp_topology_change_timer_expiry(sc); + + LIST_FOREACH(bif, &sc->sc_iflist, next) { + if (!(bif->bif_flags & IFBIF_STP)) + continue; + if (bstp_timer_expired(&bif->bif_message_age_timer, + sc->sc_max_age)) + bstp_message_age_timer_expiry(sc, bif); + } + + LIST_FOREACH(bif, &sc->sc_iflist, next) { + if (!(bif->bif_flags & IFBIF_STP)) + continue; + if (bstp_timer_expired(&bif->bif_forward_delay_timer, + sc->sc_forward_delay)) + bstp_forward_delay_timer_expiry(sc, bif); + + if (bstp_timer_expired(&bif->bif_hold_timer, + sc->sc_hold_time)) + bstp_hold_timer_expiry(sc, bif); + } + +#ifdef __ECOS + if (sc->sc_if.if_flags & IFF_RUNNING) { + timeout (bstp_tick, sc, hz); + } +#else + if (sc->sc_if.if_flags & IFF_RUNNING) + timeout_add(&sc->sc_bstptimeout, hz); +#endif //__ECOS + + splx(s); +} + +void +bstp_timer_start(t, v) + struct bridge_timer *t; + u_int16_t v; +{ + t->value = v; + t->active = 1; +} + +void +bstp_timer_stop(t) + struct bridge_timer *t; +{ + t->value = 0; + t->active = 0; +} + +int +bstp_timer_expired(t, v) + struct bridge_timer *t; + u_int16_t v; +{ + if (!t->active) + return (0); + t->value += BSTP_TICK_VAL; + if (t->value >= v) { + bstp_timer_stop(t); + return (1); + } + return (0); + +} + +int +bstp_ioctl(ifp, cmd, data) + struct ifnet *ifp; + u_long cmd; + caddr_t data; +{ + struct ifbrparam *bp = (struct ifbrparam *)data; + struct bridge_softc *sc = (struct bridge_softc *)ifp; + int r = 0, err = 0; + + switch (cmd) { + case SIOCBRDGGPRI: + bp->ifbrp_prio = sc->sc_bridge_priority; + break; + case SIOCBRDGGMA: + bp->ifbrp_maxage = sc->sc_bridge_max_age >> 8; + break; + case SIOCBRDGGHT: + bp->ifbrp_hellotime = sc->sc_bridge_hello_time >> 8; + break; + case SIOCBRDGGFD: + bp->ifbrp_fwddelay = sc->sc_bridge_forward_delay >> 8; + break; + case SIOCBRDGSPRI: + sc->sc_bridge_priority = bp->ifbrp_prio; + r = 1; + break; + case SIOCBRDGSMA: + if (bp->ifbrp_maxage == 0) { + err = EINVAL; + break; + } + sc->sc_bridge_max_age = bp->ifbrp_maxage << 8; + r = 1; + break; + case SIOCBRDGSHT: + if (bp->ifbrp_hellotime == 0) { + err = EINVAL; + break; + } + sc->sc_bridge_hello_time = bp->ifbrp_hellotime << 8; + r = 1; + break; + case SIOCBRDGSFD: + if (bp->ifbrp_fwddelay == 0) { + err = EINVAL; + break; + } + sc->sc_bridge_forward_delay = bp->ifbrp_fwddelay << 8; + r = 1; + break; + case SIOCBRDGSIFCOST: + case SIOCBRDGSIFPRIO: + case SIOCBRDGSIFFLGS: + case SIOCBRDGADD: + case SIOCBRDGDEL: + r = 1; + break; + default: + break; + } + + if (r) + bstp_initialization(sc); + + return (err); +} + +#endif /* NBRIDGE */ +#endif /* CYGPKG_NET_BRIDGE_STP_CODE */ diff --git a/ecos/packages/net/tcpip/current/src/sys/net/if.c b/ecos/packages/net/tcpip/current/src/sys/net/if.c new file mode 100644 index 0000000..3a07cb7 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/net/if.c @@ -0,0 +1,959 @@ +//========================================================================== +// +// sys/net/if.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: if.c,v 1.25 1999/12/08 06:50:17 itojun Exp $ */ +/* $NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $ */ + +/* + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright (c) 1980, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)if.c 8.3 (Berkeley) 1/4/94 + */ + +#ifdef __ECOS +#include <pkgconf/net.h> +#else +#include "bpfilter.h" +#include "bridge.h" +#endif + +#include <sys/param.h> +#include <sys/mbuf.h> +#ifndef __ECOS +#include <sys/systm.h> +#include <sys/proc.h> +#endif +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/protosw.h> +#include <sys/kernel.h> +#include <sys/ioctl.h> + +#include <net/if.h> +#include <net/if_dl.h> +#include <net/if_types.h> +#include <net/radix.h> + +#include <net/route.h> + +#ifdef INET +#include <netinet/in.h> +#include <netinet/in_var.h> +#include <netinet/if_ether.h> +#include <netinet/igmp.h> +#ifdef MROUTING +#include <netinet/ip_mroute.h> +#endif +#endif + +#ifdef INET6 +#ifndef INET +#include <netinet/in.h> +#endif +#endif + +#ifdef IPFILTER +#include <netinet/ip_fil_compat.h> +#include <netinet/ip_fil.h> +#include <netinet/ip_nat.h> +#endif + +#if NBPFILTER > 0 +#include <net/bpf.h> +#endif + +#if NBRIDGE > 0 +#include <net/if_bridge.h> +#endif + +void if_attachsetup __P((struct ifnet *)); +int if_detach_rtdelete __P((struct radix_node *, void *)); + +int ifqmaxlen = IFQ_MAXLEN; +void if_slowtimo __P((void *arg)); + +#ifdef INET6 +/* + * XXX: declare here to avoid to include many inet6 related files.. + * should be more generalized? + */ +extern void nd6_setmtu __P((struct ifnet *)); +#endif + +/* + * Network interface utility routines. + * + * Routines with ifa_ifwith* names take sockaddr *'s as + * parameters. + */ +void +ifinit() +{ + register struct ifnet *ifp; + + for (ifp = ifnet.tqh_first; ifp != 0; ifp = ifp->if_list.tqe_next) + if (ifp->if_snd.ifq_maxlen == 0) + ifp->if_snd.ifq_maxlen = ifqmaxlen; + if_slowtimo(NULL); +} + +int if_index = 0; +struct ifaddr **ifnet_addrs = NULL; +struct ifnet **ifindex2ifnet = NULL; + +/* + * Attach an interface to the + * list of "active" interfaces. + */ +void +if_attachsetup(ifp) + struct ifnet *ifp; +{ + unsigned int socksize, ifasize; + int namelen, masklen; + register struct sockaddr_dl *sdl; + register struct ifaddr *ifa; + static int if_indexlim = 8; + + ifp->if_index = ++if_index; + + /* + * We have some arrays that should be indexed by if_index. + * since if_index will grow dynamically, they should grow too. + * struct ifadd **ifnet_addrs + * struct ifnet **ifindex2ifnet + */ + if (ifnet_addrs == 0 || ifindex2ifnet == 0 || if_index >= if_indexlim) { + size_t n; + caddr_t q; + + while (if_index >= if_indexlim) + if_indexlim <<= 1; + + /* grow ifnet_addrs */ + n = if_indexlim * sizeof(ifa); + q = (caddr_t)malloc(n, M_IFADDR, M_WAITOK); + bzero(q, n); + if (ifnet_addrs) { + bcopy((caddr_t)ifnet_addrs, q, n/2); + free((caddr_t)ifnet_addrs, M_IFADDR); + } + ifnet_addrs = (struct ifaddr **)q; + + /* grow ifindex2ifnet */ + n = if_indexlim * sizeof(struct ifnet *); + q = (caddr_t)malloc(n, M_IFADDR, M_WAITOK); + bzero(q, n); + if (ifindex2ifnet) { + bcopy((caddr_t)ifindex2ifnet, q, n/2); + free((caddr_t)ifindex2ifnet, M_IFADDR); + } + ifindex2ifnet = (struct ifnet **)q; + } + + ifindex2ifnet[if_index] = ifp; + + /* + * create a Link Level name for this device + */ + namelen = strlen(ifp->if_xname); +#define _offsetof(t, m) ((int)((caddr_t)&((t *)0)->m)) + masklen = _offsetof(struct sockaddr_dl, sdl_data[0]) + namelen; + socksize = masklen + ifp->if_addrlen; +#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1))) + if (socksize < sizeof(*sdl)) + socksize = sizeof(*sdl); + socksize = ROUNDUP(socksize); + ifasize = sizeof(*ifa) + 2 * socksize; + ifa = (struct ifaddr *)malloc(ifasize, M_IFADDR, M_WAITOK); + bzero((caddr_t)ifa, ifasize); + sdl = (struct sockaddr_dl *)(ifa + 1); + sdl->sdl_len = socksize; + sdl->sdl_family = AF_LINK; + bcopy(ifp->if_xname, sdl->sdl_data, namelen); + sdl->sdl_nlen = namelen; + sdl->sdl_index = ifp->if_index; + sdl->sdl_type = ifp->if_type; + ifnet_addrs[if_index] = ifa; + ifa->ifa_ifp = ifp; + ifa->ifa_rtrequest = link_rtrequest; + TAILQ_INSERT_HEAD(&ifp->if_addrlist, ifa, ifa_list); + ifa->ifa_addr = (struct sockaddr *)sdl; + sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl); + ifa->ifa_netmask = (struct sockaddr *)sdl; + sdl->sdl_len = masklen; + while (namelen != 0) + sdl->sdl_data[--namelen] = 0xff; +} + +void +if_attachhead(ifp) + struct ifnet *ifp; +{ + if (if_index == 0) + TAILQ_INIT(&ifnet); + TAILQ_INIT(&ifp->if_addrlist); + TAILQ_INSERT_HEAD(&ifnet, ifp, if_list); + if_attachsetup(ifp); +} + +void +if_attach(ifp) + struct ifnet *ifp; +{ +// Initialize queue - moved here to support "late" attaches + if (ifp->if_snd.ifq_maxlen == 0) + ifp->if_snd.ifq_maxlen = ifqmaxlen; +// + if (if_index == 0) + TAILQ_INIT(&ifnet); + TAILQ_INIT(&ifp->if_addrlist); + TAILQ_INSERT_TAIL(&ifnet, ifp, if_list); + if_attachsetup(ifp); +} + +/* + * Delete a route if it has a specific interface for output. + * This function complies to the rn_walktree callback API. + */ +int +if_detach_rtdelete(rn, vifp) + struct radix_node *rn; + void *vifp; +{ + struct ifnet *ifp = vifp; + struct rtentry *rt = (struct rtentry *)rn; + + if (rt->rt_ifp == ifp) + rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt), + 0, NULL); + + /* + * XXX There should be no need to check for rt_ifa belonging to this + * interface, because then rt_ifp is set, right? + */ + + return (0); +} + +/* + * Detach an interface from everything in the kernel. Also deallocate + * private resources. + * XXX So far only the INET protocol family has been looked over + * wrt resource usage that needs to be decoupled. + */ +void +if_detach(ifp) + struct ifnet *ifp; +{ + struct ifaddr *ifa; + int i, s = splimp(); + struct radix_node_head *rnh; + +#if NBRIDGE > 0 + /* Remove the interface from any bridge it is part of. */ + if (ifp->if_bridge) + bridge_ifdetach(ifp); +#endif + +#if NBPFILTER > 0 + /* If there is a bpf device attached, detach from it. */ + if (ifp->if_bpf) + bpfdetach(ifp); +#endif + + /* + * Find and remove all routes which is using this interface. + * XXX Factor out into a route.c function? + */ + for (i = 1; i <= AF_MAX; i++) { + rnh = rt_tables[i]; + if (rnh) + (*rnh->rnh_walktree)(rnh, if_detach_rtdelete, ifp); + } + +#ifdef INET + rti_delete(ifp); + myip_ifp = NULL; +#ifdef MROUTING + vif_delete(ifp); +#endif +#endif + /* + * XXX transient ifp refs? inpcb.ip_moptions.imo_multicast_ifp? + * Other network stacks than INET? + */ + + /* Remove the interface from the list of all interfaces. */ + TAILQ_REMOVE(&ifnet, ifp, if_list); + +#ifdef IPFILTER + /* XXX More ipf & ipnat cleanup needed. */ + nat_ifdetach(ifp); +#endif + + /* Deallocate private resources. */ + for (ifa = TAILQ_FIRST(&ifp->if_addrlist); ifa; + ifa = TAILQ_FIRST(&ifp->if_addrlist)) { + TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list); +#ifdef INET + if (ifa->ifa_addr->sa_family == AF_INET) + TAILQ_REMOVE(&in_ifaddr, (struct in_ifaddr *)ifa, + ia_list); +#endif + free(ifa, M_IFADDR); + } + splx(s); +} + +/* + * Locate an interface based on a complete address. + */ +/*ARGSUSED*/ +struct ifaddr * +ifa_ifwithaddr(addr) + register struct sockaddr *addr; +{ + register struct ifnet *ifp; + register struct ifaddr *ifa; + +#define equal(a1, a2) \ + (bcmp((caddr_t)(a1), (caddr_t)(a2), ((struct sockaddr *)(a1))->sa_len) == 0) + for (ifp = ifnet.tqh_first; ifp != 0; ifp = ifp->if_list.tqe_next) + for (ifa = ifp->if_addrlist.tqh_first; ifa != 0; ifa = ifa->ifa_list.tqe_next) { + if (ifa->ifa_addr->sa_family != addr->sa_family) + continue; + if (ifa->ifa_dstaddr == NULL) + continue; + if (equal(addr, ifa->ifa_addr)) + return (ifa); + if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr && + /* IP6 doesn't have broadcast */ + ifa->ifa_broadaddr->sa_len != 0 && + equal(ifa->ifa_broadaddr, addr)) + return (ifa); + } + return (NULL); +} +/* + * Locate the point to point interface with a given destination address. + */ +/*ARGSUSED*/ +struct ifaddr * +ifa_ifwithdstaddr(addr) + register struct sockaddr *addr; +{ + register struct ifnet *ifp; + register struct ifaddr *ifa; + + for (ifp = ifnet.tqh_first; ifp != 0; ifp = ifp->if_list.tqe_next) + if (ifp->if_flags & IFF_POINTOPOINT) + for (ifa = ifp->if_addrlist.tqh_first; ifa != 0; ifa = ifa->ifa_list.tqe_next) { + if (ifa->ifa_addr->sa_family != addr->sa_family || + ifa->ifa_dstaddr == NULL) + continue; + if (equal(addr, ifa->ifa_dstaddr)) + return (ifa); + } + return (NULL); +} + +/* + * Find an interface on a specific network. If many, choice + * is most specific found. + */ +struct ifaddr * +ifa_ifwithnet(addr) + struct sockaddr *addr; +{ + register struct ifnet *ifp; + register struct ifaddr *ifa; + struct ifaddr *ifa_maybe = 0; + u_int af = addr->sa_family; + char *addr_data = addr->sa_data, *cplim; + + if (af == AF_LINK) { + register struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr; + if (sdl->sdl_index && sdl->sdl_index <= if_index) + return (ifnet_addrs[sdl->sdl_index]); + } + for (ifp = ifnet.tqh_first; ifp != 0; ifp = ifp->if_list.tqe_next) + for (ifa = ifp->if_addrlist.tqh_first; ifa != 0; ifa = ifa->ifa_list.tqe_next) { + register char *cp, *cp2, *cp3; + + if (ifa->ifa_addr->sa_family != af || + ifa->ifa_netmask == 0) + next: continue; + cp = addr_data; + cp2 = ifa->ifa_addr->sa_data; + cp3 = ifa->ifa_netmask->sa_data; + cplim = (char *)ifa->ifa_netmask + + ifa->ifa_netmask->sa_len; + while (cp3 < cplim) + if ((*cp++ ^ *cp2++) & *cp3++) + /* want to continue for() loop */ + goto next; + if (ifa_maybe == 0 || + rn_refines((caddr_t)ifa->ifa_netmask, + (caddr_t)ifa_maybe->ifa_netmask)) + ifa_maybe = ifa; + } + return (ifa_maybe); +} + +/* + * Find an interface using a specific address family + */ +struct ifaddr * +ifa_ifwithaf(af) + register int af; +{ + register struct ifnet *ifp; + register struct ifaddr *ifa; + + for (ifp = ifnet.tqh_first; ifp != 0; ifp = ifp->if_list.tqe_next) + for (ifa = ifp->if_addrlist.tqh_first; ifa != 0; ifa = ifa->ifa_list.tqe_next) + if (ifa->ifa_addr->sa_family == af) + return (ifa); + return (NULL); +} + +/* + * Find an interface address specific to an interface best matching + * a given address. + */ +struct ifaddr * +ifaof_ifpforaddr(addr, ifp) + struct sockaddr *addr; + register struct ifnet *ifp; +{ + register struct ifaddr *ifa; + register char *cp, *cp2, *cp3; + register char *cplim; + struct ifaddr *ifa_maybe = 0; + u_int af = addr->sa_family; + + if (af >= AF_MAX) + return (NULL); + for (ifa = ifp->if_addrlist.tqh_first; ifa != 0; ifa = ifa->ifa_list.tqe_next) { + if (ifa->ifa_addr->sa_family != af) + continue; + ifa_maybe = ifa; + if (ifa->ifa_netmask == 0) { + if (equal(addr, ifa->ifa_addr) || + (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr))) + return (ifa); + continue; + } + cp = addr->sa_data; + cp2 = ifa->ifa_addr->sa_data; + cp3 = ifa->ifa_netmask->sa_data; + cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; + for (; cp3 < cplim; cp3++) + if ((*cp++ ^ *cp2++) & *cp3) + break; + if (cp3 == cplim) + return (ifa); + } + return (ifa_maybe); +} + +/* + * Default action when installing a route with a Link Level gateway. + * Lookup an appropriate real ifa to point to. + * This should be moved to /sys/net/link.c eventually. + */ +void +link_rtrequest(cmd, rt, sa) + int cmd; + register struct rtentry *rt; + struct sockaddr *sa; +{ + register struct ifaddr *ifa; + struct sockaddr *dst; + struct ifnet *ifp; + + if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) || + ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0)) + return; + if ((ifa = ifaof_ifpforaddr(dst, ifp)) != NULL) { + IFAFREE(rt->rt_ifa); + rt->rt_ifa = ifa; + ifa->ifa_refcnt++; + if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest) + ifa->ifa_rtrequest(cmd, rt, sa); + } +} + +/* + * Mark an interface down and notify protocols of + * the transition. + * NOTE: must be called at splsoftnet or equivalent. + */ +void +if_down(ifp) + register struct ifnet *ifp; +{ + register struct ifaddr *ifa; + + ifp->if_flags &= ~IFF_UP; + for (ifa = ifp->if_addrlist.tqh_first; ifa != 0; ifa = ifa->ifa_list.tqe_next) + pfctlinput(PRC_IFDOWN, ifa->ifa_addr); + if_qflush(&ifp->if_snd); + rt_ifmsg(ifp); +} + +/* + * Mark an interface up and notify protocols of + * the transition. + * NOTE: must be called at splsoftnet or equivalent. + */ +void +if_up(ifp) + register struct ifnet *ifp; +{ +#ifdef notyet + register struct ifaddr *ifa; +#endif + + ifp->if_flags |= IFF_UP; +#ifdef notyet + /* this has no effect on IP, and will kill all ISO connections XXX */ + for (ifa = ifp->if_addrlist.tqh_first; ifa != 0; + ifa = ifa->ifa_list.tqe_next) + pfctlinput(PRC_IFUP, ifa->ifa_addr); +#endif + rt_ifmsg(ifp); +#ifdef INET6 + in6_if_up(ifp); +#endif +} + +/* + * Flush an interface queue. + */ +void +if_qflush(ifq) + register struct ifqueue *ifq; +{ + register struct mbuf *m, *n; + + n = ifq->ifq_head; + while ((m = n) != NULL) { + n = m->m_act; + m_freem(m); + } + ifq->ifq_head = 0; + ifq->ifq_tail = 0; + ifq->ifq_len = 0; +} + +/* + * Handle interface watchdog timer routines. Called + * from softclock, we decrement timers (if set) and + * call the appropriate interface routine on expiration. + */ +void +if_slowtimo(arg) + void *arg; +{ + register struct ifnet *ifp; + int s = splimp(); + + for (ifp = ifnet.tqh_first; ifp != 0; ifp = ifp->if_list.tqe_next) { + if (ifp->if_timer == 0 || --ifp->if_timer) + continue; + if (ifp->if_watchdog) + (*ifp->if_watchdog)(ifp); + } + splx(s); + timeout(if_slowtimo, NULL, hz / IFNET_SLOWHZ); +} + +/* + * Map interface name to + * interface structure pointer. + */ +struct ifnet * +ifunit(name) + register char *name; +{ + register struct ifnet *ifp; + + for (ifp = ifnet.tqh_first; ifp != 0; ifp = ifp->if_list.tqe_next) + if (strcmp(ifp->if_xname, name) == 0) + return (ifp); + + return (NULL); +} + + +/* + * Map interface name in a sockaddr_dl to + * interface structure pointer. + */ +struct ifnet * +if_withname(sa) + struct sockaddr *sa; +{ + char ifname[IFNAMSIZ+1]; + struct sockaddr_dl *sdl = (struct sockaddr_dl *)sa; + + if ( (sa->sa_family != AF_LINK) || (sdl->sdl_nlen == 0) || + (sdl->sdl_nlen > IFNAMSIZ) ) + return NULL; + + /* + * ifunit wants a null-terminated name. It may not be null-terminated + * in the sockaddr. We don't want to change the caller's sockaddr, + * and there might not be room to put the trailing null anyway, so we + * make a local copy that we know we can null terminate safely. + */ + + bcopy(sdl->sdl_data, ifname, sdl->sdl_nlen); + ifname[sdl->sdl_nlen] = '\0'; + return ifunit(ifname); +} + + +/* + * Interface ioctls. + */ +int +ifioctl(so, cmd, data, p) + struct socket *so; + u_long cmd; + caddr_t data; + struct proc *p; +{ + register struct ifnet *ifp; + register struct ifreq *ifr; + int error = 0; + short oif_flags; + + switch (cmd) { + + case SIOCGIFCONF: + case OSIOCGIFCONF: + return (ifconf(cmd, data)); + } + ifr = (struct ifreq *)data; + ifp = ifunit(ifr->ifr_name); + if (ifp == 0) + return (ENXIO); + oif_flags = ifp->if_flags; + switch (cmd) { + + case SIOCGIFFLAGS: + ifr->ifr_flags = ifp->if_flags; + break; + + case SIOCGIFMETRIC: + ifr->ifr_metric = ifp->if_metric; + break; + + case SIOCGIFDATA: + error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data, + sizeof(ifp->if_data)); + break; + + case SIOCSIFFLAGS: +#ifndef __ECOS + if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) + return (error); +#endif + if (ifp->if_flags & IFF_UP && (ifr->ifr_flags & IFF_UP) == 0) { + int s = splimp(); + if_down(ifp); + splx(s); + } + if (ifr->ifr_flags & IFF_UP && (ifp->if_flags & IFF_UP) == 0) { + int s = splimp(); + if_up(ifp); + splx(s); + } + ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | + (ifr->ifr_flags &~ IFF_CANTCHANGE); + if (ifp->if_ioctl) + (void) (*ifp->if_ioctl)(ifp, cmd, data); + break; + + case SIOCSIFMETRIC: +#ifndef __ECOS + if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) + return (error); +#endif + ifp->if_metric = ifr->ifr_metric; + break; + + case SIOCADDMULTI: + case SIOCDELMULTI: + case SIOCSIFMEDIA: +#ifndef __ECOS + if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) + return (error); +#endif + /* FALLTHROUGH */ +#ifdef SIOCGIFSTATS + case SIOCGIFSTATS: +#ifdef SIOCGIFSTATSUD + case SIOCGIFSTATSUD: +#endif +#endif // SIOCGIFSTATS + case SIOCGIFMEDIA: + if (ifp->if_ioctl == 0) + return (EOPNOTSUPP); + error = (*ifp->if_ioctl)(ifp, cmd, data); + break; + + default: + if (so->so_proto == 0) + return (EOPNOTSUPP); +#if !defined(COMPAT_43) && !defined(COMPAT_LINUX) && !defined(COMPAT_SVR4) + error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL, + (struct mbuf *) cmd, (struct mbuf *) data, + (struct mbuf *) ifp)); +#else + { + u_long ocmd = cmd; + + switch (cmd) { + + case SIOCSIFADDR: + case SIOCSIFDSTADDR: + case SIOCSIFBRDADDR: + case SIOCSIFNETMASK: +#if BYTE_ORDER != BIG_ENDIAN + if (ifr->ifr_addr.sa_family == 0 && + ifr->ifr_addr.sa_len < 16) { + ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len; + ifr->ifr_addr.sa_len = 16; + } +#else + if (ifr->ifr_addr.sa_len == 0) + ifr->ifr_addr.sa_len = 16; +#endif + break; + + case OSIOCGIFADDR: + cmd = SIOCGIFADDR; + break; + + case OSIOCGIFDSTADDR: + cmd = SIOCGIFDSTADDR; + break; + + case OSIOCGIFBRDADDR: + cmd = SIOCGIFBRDADDR; + break; + + case OSIOCGIFNETMASK: + cmd = SIOCGIFNETMASK; + } + error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL, + (struct mbuf *) cmd, + (struct mbuf *) data, + (struct mbuf *) ifp)); + switch (ocmd) { + + case OSIOCGIFADDR: + case OSIOCGIFDSTADDR: + case OSIOCGIFBRDADDR: + case OSIOCGIFNETMASK: + *(u_int16_t *)&ifr->ifr_addr = ifr->ifr_addr.sa_family; + } + + } +#endif + break; + } + + if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0) { +#ifdef INET6 + if ((ifp->if_flags & IFF_UP) != 0) { + int s = splimp(); + in6_if_up(ifp); + splx(s); + } +#endif + } + return (error); +} + +/* + * Return interface configuration + * of system. List may be used + * in later ioctl's (above) to get + * other information. + */ +/*ARGSUSED*/ +int +ifconf(cmd, data) + u_long cmd; + caddr_t data; +{ + register struct ifconf *ifc = (struct ifconf *)data; + register struct ifnet *ifp; + register struct ifaddr *ifa; + struct ifreq ifr, *ifrp; + int space = ifc->ifc_len, error = 0; + + /* If ifc->ifc_len is 0, fill it in with the needed size and return. */ + if (space == 0) { + for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_list.tqe_next) { + register struct sockaddr *sa; + + if ((ifa = ifp->if_addrlist.tqh_first) == 0) + space += sizeof (ifr); + else + for (; ifa != 0; ifa = ifa->ifa_list.tqe_next) { + sa = ifa->ifa_addr; +#if defined(COMPAT_43) || defined(COMPAT_LINUX) || defined(COMPAT_SVR4) + if (cmd != OSIOCGIFCONF) +#endif + if (sa->sa_len > sizeof(*sa)) + space += sa->sa_len - + sizeof (*sa); + space += sizeof (ifr); + } + } + ifc->ifc_len = space; + return(0); + } + + ifrp = ifc->ifc_req; + for (ifp = ifnet.tqh_first; space >= sizeof (ifr) && ifp != 0; + ifp = ifp->if_list.tqe_next) { + bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ); + if ((ifa = ifp->if_addrlist.tqh_first) == 0) { + bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr)); + error = copyout((caddr_t)&ifr, (caddr_t)ifrp, + sizeof(ifr)); + if (error) + break; + space -= sizeof (ifr), ifrp++; + } else + for (; space >= sizeof (ifr) && ifa != 0; + ifa = ifa->ifa_list.tqe_next) { + register struct sockaddr *sa = ifa->ifa_addr; +#if defined(COMPAT_43) || defined(COMPAT_LINUX) || defined(COMPAT_SVR4) + if (cmd == OSIOCGIFCONF) { + struct osockaddr *osa = + (struct osockaddr *)&ifr.ifr_addr; + ifr.ifr_addr = *sa; + osa->sa_family = sa->sa_family; + error = copyout((caddr_t)&ifr, (caddr_t)ifrp, + sizeof (ifr)); + ifrp++; + } else +#endif + if (sa->sa_len <= sizeof(*sa)) { + ifr.ifr_addr = *sa; + error = copyout((caddr_t)&ifr, (caddr_t)ifrp, + sizeof (ifr)); + ifrp++; + } else { + space -= sa->sa_len - sizeof(*sa); + if (space < sizeof (ifr)) + break; + error = copyout((caddr_t)&ifr, (caddr_t)ifrp, + sizeof (ifr.ifr_name)); + if (error == 0) + error = copyout((caddr_t)sa, + (caddr_t)&ifrp->ifr_addr, + sa->sa_len); + ifrp = (struct ifreq *)(sa->sa_len + + (caddr_t)&ifrp->ifr_addr); + } + if (error) + break; + space -= sizeof (ifr); + } + } + ifc->ifc_len -= space; + return (error); +} diff --git a/ecos/packages/net/tcpip/current/src/sys/net/if_bridge.c b/ecos/packages/net/tcpip/current/src/sys/net/if_bridge.c new file mode 100644 index 0000000..04093c6 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/net/if_bridge.c @@ -0,0 +1,2349 @@ +//========================================================================== +// +// sys/net/if_bridge.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): Jason L. Wright (jason@thought.net) +// Contributors: andrew.lunn@ascom.ch (Andrew Lunn), hmt, manu.sharma@ascom.com +// Date: 2000-07-18 +// Purpose: Ethernet bridge +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== +/* $OpenBSD: if_bridge.c,v 1.33 2000/06/20 05:50:16 jason Exp $ */ + +/* + * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Jason L. Wright + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef __ECOS +#include <pkgconf/net.h> +#else +#include "bridge.h" +#include "bpfilter.h" +#include "enc.h" +#endif + +#include <sys/param.h> +#ifndef __ECOS +#include <sys/proc.h> +#include <sys/systm.h> +#endif +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <sys/errno.h> +#ifndef __ECOS +#include <sys/device.h> +#endif +#include <sys/kernel.h> +#include <machine/cpu.h> + +#include <net/if.h> +#include <net/if_types.h> +#include <net/if_llc.h> +#include <net/route.h> +#include <net/netisr.h> + +#ifdef INET +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/in_var.h> +#include <netinet/ip.h> +#include <netinet/if_ether.h> +#include <netinet/ip_ipsp.h> + +#ifndef __ECOS +#include <net/if_enc.h> +#endif +#ifdef IPFILTER +#include <netinet/ip_fil_compat.h> +#include <netinet/ip_fil.h> +#endif +#endif + +#if NBPFILTER > 0 +#include <net/bpf.h> +#endif + +#include <net/if_bridge.h> + +#ifdef __ECOS +#include <stdio.h> /* for sprintf */ +#endif + +#ifndef BRIDGE_RTABLE_SIZE +#define BRIDGE_RTABLE_SIZE 1024 +#endif +#define BRIDGE_RTABLE_MASK (BRIDGE_RTABLE_SIZE - 1) + +/* + * Maximum number of addresses to cache + */ +#ifndef BRIDGE_RTABLE_MAX +#define BRIDGE_RTABLE_MAX 100 +#endif + +/* + * Timeout (in seconds) for entries learned dynamically + */ +#ifndef BRIDGE_RTABLE_TIMEOUT +#define BRIDGE_RTABLE_TIMEOUT 300 +#endif + +/* Spanning tree defaults */ +#define BSTP_DEFAULT_MAX_AGE (20 * 256) +#define BSTP_DEFAULT_HELLO_TIME (2 * 256) +#define BSTP_DEFAULT_FORWARD_DELAY (15 * 256) +#define BSTP_DEFAULT_HOLD_TIME (1 * 256) +#define BSTP_DEFAULT_BRIDGE_PRIORITY 0x8000 +#define BSTP_DEFAULT_PORT_PRIORITY 0x80 +#define BSTP_DEFAULT_PATH_COST 55 + +extern int ifqmaxlen; + +/* SNAP LLC header */ +struct snap { + u_int8_t dsap; + u_int8_t ssap; + u_int8_t control; + u_int8_t org[3]; + u_int16_t type; +}; + +struct bridge_softc bridgectl[CYGNUM_NET_BRIDGES]; + +void bridgeattach __P((int)); +int bridge_ioctl __P((struct ifnet *, u_long, caddr_t)); +void bridge_start __P((struct ifnet *)); +void bridgeintr_frame __P((struct bridge_softc *, struct mbuf *)); +void bridge_broadcast __P((struct bridge_softc *, struct ifnet *, + struct ether_header *, struct mbuf *)) __attribute ((weak)); +void bridge_stop __P((struct bridge_softc *)); +void bridge_init __P((struct bridge_softc *)); +int bridge_bifconf __P((struct bridge_softc *, struct ifbifconf *)); + +int bridge_rtfind __P((struct bridge_softc *, struct ifbaconf *)); +void bridge_rtage __P((void *)); +void bridge_rttrim __P((struct bridge_softc *)); +int bridge_rtdaddr __P((struct bridge_softc *, struct ether_addr *)); +int bridge_rtflush __P((struct bridge_softc *, int)); +struct ifnet * bridge_rtupdate __P((struct bridge_softc *, + struct ether_addr *, struct ifnet *ifp, int, u_int8_t)); +struct ifnet * bridge_rtlookup __P((struct bridge_softc *, + struct ether_addr *)); +u_int32_t bridge_hash __P((struct ether_addr *)); +int bridge_blocknonip __P((struct ether_header *, struct mbuf *)); +int bridge_addrule __P((struct bridge_iflist *, + struct ifbrlreq *, int out)); +int bridge_flushrule __P((struct bridge_iflist *)); +int bridge_brlconf __P((struct bridge_softc *, struct ifbrlconf *)); +u_int8_t bridge_filterrule __P((struct brl_node *, struct ether_header *)); +int bridge_ifenqueue __P((struct bridge_softc *, struct ifnet *, struct mbuf *)); + +#ifdef CYGPKG_NET_BRIDGE_STP_CODE +void bridge_span (struct bridge_softc *, struct ether_header *, struct mbuf *); +#endif + +#define ETHERADDR_IS_IP_MCAST(a) \ + /* struct etheraddr *a; */ \ + ((a)->ether_addr_octet[0] == 0x01 && \ + (a)->ether_addr_octet[1] == 0x00 && \ + (a)->ether_addr_octet[2] == 0x5e) + + +#if defined(INET) && (defined(IPFILTER) || defined(IPFILTER_LKM)) +/* + * Filter hooks + */ +struct mbuf *bridge_filter __P((struct bridge_softc *, struct ifnet *, + struct ether_header *, struct mbuf *m)); +#endif + +void +bridgeattach(unused) + int unused; +{ + int i; + struct ifnet *ifp; + + for (i = 0; i < CYGNUM_NET_BRIDGES; i++) { + bridgectl[i].sc_brtmax = BRIDGE_RTABLE_MAX; + bridgectl[i].sc_brttimeout = (BRIDGE_RTABLE_TIMEOUT * hz) / 2; + bridgectl[i].sc_bridge_max_age = BSTP_DEFAULT_MAX_AGE; + bridgectl[i].sc_bridge_hello_time = BSTP_DEFAULT_HELLO_TIME; + bridgectl[i].sc_bridge_forward_delay= BSTP_DEFAULT_FORWARD_DELAY; + bridgectl[i].sc_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY; + bridgectl[i].sc_hold_time = BSTP_DEFAULT_HOLD_TIME; + LIST_INIT(&bridgectl[i].sc_iflist); + LIST_INIT(&bridgectl[i].sc_spanlist); + ifp = &bridgectl[i].sc_if; + sprintf(ifp->if_xname, "bridge%d", i); + ifp->if_softc = &bridgectl[i]; + ifp->if_mtu = ETHERMTU; + ifp->if_ioctl = bridge_ioctl; + ifp->if_output = bridge_output; + ifp->if_start = bridge_start; + ifp->if_type = IFT_PROPVIRTUAL; + ifp->if_snd.ifq_maxlen = ifqmaxlen; + ifp->if_hdrlen = sizeof(struct ether_header); + if_attach(ifp); +#if NBPFILTER > 0 + bpfattach(&bridgectl[i].sc_if.if_bpf, ifp, + DLT_EN10MB, sizeof(struct ether_header)); +#endif + } +} + +int +bridge_ioctl(ifp, cmd, data) + struct ifnet *ifp; + u_long cmd; + caddr_t data; +{ +#ifndef __ECOS + struct proc *prc = curproc; /* XXX */ +#endif + struct ifnet *ifs; + struct bridge_softc *sc = (struct bridge_softc *)ifp->if_softc; + struct ifbreq *req = (struct ifbreq *)data; + struct ifbaconf *baconf = (struct ifbaconf *)data; + struct ifbareq *bareq = (struct ifbareq *)data; + struct ifbcachereq *bcachereq = (struct ifbcachereq *)data; + struct ifbifconf *bifconf = (struct ifbifconf *)data; + struct ifbcachetoreq *bcacheto = (struct ifbcachetoreq *)data; + struct ifbrlreq *brlreq = (struct ifbrlreq *)data; + struct ifbrlconf *brlconf = (struct ifbrlconf *)data; + struct ifreq ifreq; + int error = 0, s; + struct bridge_iflist *p; + + s = splimp(); + switch (cmd) { + case SIOCBRDGADD: +#ifndef __ECOS + if ((error = suser(prc->p_ucred, &prc->p_acflag)) != 0) + break; +#endif + ifs = ifunit(req->ifbr_ifsname); + if (ifs == NULL) { /* no such interface */ + error = ENOENT; + break; + } + if (ifs->if_bridge == (caddr_t)sc) { + error = EEXIST; + break; + } + if (ifs->if_bridge != NULL) { + error = EBUSY; + break; + } + +#ifdef CYGPKG_NET_BRIDGE_STP_CODE + /* If it's in the span list, it can't be a member. */ + LIST_FOREACH(p, &sc->sc_spanlist, next) { + if (p->ifp == ifs) + break; + } + if (p != LIST_END(&sc->sc_spanlist)) { + error = EBUSY; + break; + } +#endif + + + if (ifs->if_type == IFT_ETHER) { + if ((ifs->if_flags & IFF_UP) == 0) { + /* + * Bring interface up long enough to set + * promiscuous flag, then shut it down again. + */ + strncpy(ifreq.ifr_name, req->ifbr_ifsname, + sizeof(ifreq.ifr_name) - 1); + ifreq.ifr_name[sizeof(ifreq.ifr_name) - 1] = '\0'; + ifs->if_flags |= IFF_UP; + ifreq.ifr_flags = ifs->if_flags; + error = (*ifs->if_ioctl)(ifs, SIOCSIFFLAGS, + (caddr_t)&ifreq); + if (error != 0) + break; + + error = ifpromisc(ifs, 1); + if (error != 0) + break; + + strncpy(ifreq.ifr_name, req->ifbr_ifsname, + sizeof(ifreq.ifr_name) - 1); + ifreq.ifr_name[sizeof(ifreq.ifr_name) - 1] = '\0'; + ifs->if_flags &= ~IFF_UP; + ifreq.ifr_flags = ifs->if_flags; + error = (*ifs->if_ioctl)(ifs, SIOCSIFFLAGS, + (caddr_t)&ifreq); + if (error != 0) { + ifpromisc(ifs, 0); + break; + } + } else { + error = ifpromisc(ifs, 1); + if (error != 0) + break; + } + } +#ifndef __ECOS +#if NENC > 0 + else if (ifs->if_type == IFT_ENC) { + /* Can't bind enc0 to a bridge */ + if (ifs->if_softc == &encif[0]) { + error = EINVAL; + break; + } + } +#endif /* NENC */ +#endif + else { + error = EINVAL; + break; + } + + p = (struct bridge_iflist *) malloc( + sizeof(struct bridge_iflist), M_DEVBUF, M_NOWAIT); + if (p == NULL && ifs->if_type == IFT_ETHER) { + error = ENOMEM; + ifpromisc(ifs, 0); + break; + } + + p->ifp = ifs; + p->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER; + p->bif_priority = BSTP_DEFAULT_PORT_PRIORITY; + p->bif_path_cost = BSTP_DEFAULT_PATH_COST; + SIMPLEQ_INIT(&p->bif_brlin); + SIMPLEQ_INIT(&p->bif_brlout); + LIST_INSERT_HEAD(&sc->sc_iflist, p, next); + ifs->if_bridge = (caddr_t)sc; + break; + case SIOCBRDGDEL: +#ifndef __ECOS + if ((error = suser(prc->p_ucred, &prc->p_acflag)) != 0) + break; +#endif + p = LIST_FIRST(&sc->sc_iflist); + while (p != NULL) { + if (strncmp(p->ifp->if_xname, req->ifbr_ifsname, + sizeof(p->ifp->if_xname)) == 0) { + p->ifp->if_bridge = NULL; + + error = ifpromisc(p->ifp, 0); + + LIST_REMOVE(p, next); + bridge_rtdelete(sc, p->ifp, 0); + bridge_flushrule(p); + free(p, M_DEVBUF); + break; + } + p = LIST_NEXT(p, next); + } + if (p == NULL) { + error = ENOENT; + break; + } + break; + case SIOCBRDGIFS: + error = bridge_bifconf(sc, bifconf); + break; +#ifdef CYGPKG_NET_BRIDGE_STP_CODE + case SIOCBRDGADDS: +#ifndef __ECOS + if ((error = suser(prc->p_ucred, &prc->p_acflag)) != 0) + break; +#endif + + ifs = ifunit(req->ifbr_ifsname); + if (ifs == NULL) { /* no such interface */ + error = ENOENT; + break; + } + if (ifs->if_bridge == (caddr_t)sc) { + error = EEXIST; + break; + } + if (ifs->if_bridge != NULL) { + error = EBUSY; + break; + } + LIST_FOREACH(p, &sc->sc_spanlist, next) { + if (p->ifp == ifs) + break; + } + if (p != LIST_END(&sc->sc_spanlist)) { + error = EBUSY; + break; + } + p = (struct bridge_iflist *)malloc( + sizeof(struct bridge_iflist), M_DEVBUF, M_NOWAIT); + if (p == NULL) { + error = ENOMEM; + break; + } + bzero(p, sizeof(struct bridge_iflist)); + p->ifp = ifs; + SIMPLEQ_INIT(&p->bif_brlin); + SIMPLEQ_INIT(&p->bif_brlout); + LIST_INSERT_HEAD(&sc->sc_spanlist, p, next); + break; + case SIOCBRDGDELS: +#ifndef __ECOS + if ((error = suser(prc->p_ucred, &prc->p_acflag)) != 0) + break; +#endif + + LIST_FOREACH(p, &sc->sc_spanlist, next) { + if (strncmp(p->ifp->if_xname, req->ifbr_ifsname, + sizeof(p->ifp->if_xname)) == 0) { + LIST_REMOVE(p, next); + free(p, M_DEVBUF); + break; + } + } + if (p == LIST_END(&sc->sc_spanlist)) { + error = ENOENT; + break; + } + break; +#endif + + case SIOCBRDGGIFFLGS: + ifs = ifunit(req->ifbr_ifsname); + if (ifs == NULL) { + error = ENOENT; + break; + } + if ((caddr_t)sc != ifs->if_bridge) { + error = ESRCH; + break; + } + p = LIST_FIRST(&sc->sc_iflist); + while (p != NULL && p->ifp != ifs) { + p = LIST_NEXT(p, next); + } + if (p == NULL) { + error = ESRCH; + break; + } + req->ifbr_ifsflags = p->bif_flags; + req->ifbr_state = p->bif_state; + req->ifbr_priority = p->bif_priority; + req->ifbr_path_cost = p->bif_path_cost; + req->ifbr_portno = p->ifp->if_index & 0xff; + break; + case SIOCBRDGSIFFLGS: +#ifndef __ECOS + if ((error = suser(prc->p_ucred, &prc->p_acflag)) != 0) + break; +#endif + ifs = ifunit(req->ifbr_ifsname); + if (ifs == NULL) { + error = ENOENT; + break; + } + if ((caddr_t)sc != ifs->if_bridge) { + error = ESRCH; + break; + } + p = LIST_FIRST(&sc->sc_iflist); + while (p != NULL && p->ifp != ifs) { + p = LIST_NEXT(p, next); + } + if (p == NULL) { + error = ESRCH; + break; + } + p->bif_flags = req->ifbr_ifsflags; + break; + case SIOCBRDGSIFPRIO: + case SIOCBRDGSIFCOST: +#ifndef __ECOS + if ((error = suser(prc->p_ucred, &prc->p_acflag)) != 0) + break; +#endif + + ifs = ifunit(req->ifbr_ifsname); + if (ifs == NULL) { + error = ENOENT; + break; + } + if ((caddr_t)sc != ifs->if_bridge) { + error = ESRCH; + break; + } + LIST_FOREACH(p, &sc->sc_iflist, next) { + if (p->ifp == ifs) + break; + } + if (p == LIST_END(&sc->sc_iflist)) { + error = ESRCH; + break; + } + if (cmd == SIOCBRDGSIFPRIO) + p->bif_priority = req->ifbr_priority; + else { + if (req->ifbr_path_cost < 1) + error = EINVAL; + else + p->bif_path_cost = req->ifbr_path_cost; + } + break; + case SIOCBRDGRTS: + error = bridge_rtfind(sc, baconf); + break; + case SIOCBRDGFLUSH: +#ifndef __ECOS + if ((error = suser(prc->p_ucred, &prc->p_acflag)) != 0) + break; +#endif + error = bridge_rtflush(sc, req->ifbr_ifsflags); + break; + case SIOCBRDGSADDR: +#ifndef __ECOS + if ((error = suser(prc->p_ucred, &prc->p_acflag)) != 0) + break; +#endif + ifs = ifunit(bareq->ifba_ifsname); + if (ifs == NULL) { /* no such interface */ + error = ENOENT; + break; + } + + if (ifs->if_bridge == NULL || + ifs->if_bridge != (caddr_t)sc) { + error = ESRCH; + break; + } + + ifs = bridge_rtupdate(sc, &bareq->ifba_dst, ifs, 1, + bareq->ifba_flags); + if (ifs == NULL) + error = ENOMEM; + break; + case SIOCBRDGDADDR: +#ifndef __ECOS + if ((error = suser(prc->p_ucred, &prc->p_acflag)) != 0) + break; +#endif + error = bridge_rtdaddr(sc, &bareq->ifba_dst); + break; + case SIOCBRDGGCACHE: + bcachereq->ifbc_size = sc->sc_brtmax; + break; + case SIOCBRDGSCACHE: +#ifndef __ECOS + if ((error = suser(prc->p_ucred, &prc->p_acflag)) != 0) + break; +#endif + sc->sc_brtmax = bcachereq->ifbc_size; + bridge_rttrim(sc); + break; + case SIOCBRDGSTO: +#ifndef __ECOS + if ((error = suser(prc->p_ucred, &prc->p_acflag)) != 0) + break; +#endif + sc->sc_brttimeout = (bcacheto->ifbct_time * hz) / 2; + untimeout(bridge_rtage, sc); + if (bcacheto->ifbct_time != 0) + timeout(bridge_rtage, sc, sc->sc_brttimeout); + break; + case SIOCBRDGGTO: + bcacheto->ifbct_time = (2 * sc->sc_brttimeout) / hz; + break; + case SIOCSIFFLAGS: + if ((ifp->if_flags & IFF_UP) == IFF_UP) + bridge_init(sc); + + if ((ifp->if_flags & IFF_UP) == 0) + bridge_stop(sc); + + break; + case SIOCBRDGARL: +#ifndef __ECOS + if ((error = suser(prc->p_ucred, &prc->p_acflag)) != 0) + break; +#endif + ifs = ifunit(brlreq->ifbr_ifsname); + if (ifs == NULL) { + error = ENOENT; + break; + } + if (ifs->if_bridge == NULL || + ifs->if_bridge != (caddr_t)sc) { + error = ESRCH; + break; + } + p = LIST_FIRST(&sc->sc_iflist); + while (p != NULL && p->ifp != ifs) { + p = LIST_NEXT(p, next); + } + if (p == NULL) { + error = ESRCH; + break; + } + if ((brlreq->ifbr_action != BRL_ACTION_BLOCK && + brlreq->ifbr_action != BRL_ACTION_PASS) || + (brlreq->ifbr_flags & (BRL_FLAG_IN|BRL_FLAG_OUT)) == 0) { + error = EINVAL; + break; + } + if (brlreq->ifbr_flags & BRL_FLAG_IN) { + error = bridge_addrule(p, brlreq, 0); + if (error) + break; + } + if (brlreq->ifbr_flags & BRL_FLAG_OUT) { + error = bridge_addrule(p, brlreq, 1); + if (error) + break; + } + break; + case SIOCBRDGFRL: +#ifndef __ECOS + if ((error = suser(prc->p_ucred, &prc->p_acflag)) != 0) + break; +#endif + ifs = ifunit(brlreq->ifbr_ifsname); + if (ifs == NULL) { + error = ENOENT; + break; + } + if (ifs->if_bridge == NULL || + ifs->if_bridge != (caddr_t)sc) { + error = ESRCH; + break; + } + p = LIST_FIRST(&sc->sc_iflist); + while (p != NULL && p->ifp != ifs) { + p = LIST_NEXT(p, next); + } + if (p == NULL) { + error = ESRCH; + break; + } + error = bridge_flushrule(p); + break; + case SIOCBRDGGRL: + error = bridge_brlconf(sc, brlconf); + break; + case SIOCBRDGGPRI: + case SIOCBRDGGMA: + case SIOCBRDGGHT: + case SIOCBRDGGFD: + break; + case SIOCBRDGSPRI: + case SIOCBRDGSFD: + case SIOCBRDGSMA: + case SIOCBRDGSHT: +#ifndef __ECOS + error = suser(prc->p_ucred, &prc->p_acflag); +#endif + break; + default: + error = EINVAL; + } + +#ifdef CYGPKG_NET_BRIDGE_STP_CODE + if (!error) + error = bstp_ioctl(ifp, cmd, data); +#endif + splx(s); + return (error); +} + +/* Detach an interface from a bridge. */ +void +bridge_ifdetach(ifp) + struct ifnet *ifp; +{ + struct bridge_softc *bsc = (struct bridge_softc *)ifp->if_bridge; + struct bridge_iflist *bif; + + for (bif = LIST_FIRST(&bsc->sc_iflist); bif; + bif = LIST_NEXT(bif, next)) + if (bif->ifp == ifp) { + LIST_REMOVE(bif, next); + bridge_rtdelete(bsc, ifp, 0); + bridge_flushrule(bif); + free(bif, M_DEVBUF); + ifp->if_bridge = NULL; + break; + } +} + +int +bridge_bifconf(sc, bifc) + struct bridge_softc *sc; + struct ifbifconf *bifc; +{ + struct bridge_iflist *p; + u_int32_t total = 0, i; +#ifdef CYGPKG_NET_BRIDGE_STP_CODE + u_int32_t j; +#endif + int error = 0; + struct ifbreq breq; + + p = LIST_FIRST(&sc->sc_iflist); + while (p != NULL) { + total++; + p = LIST_NEXT(p, next); + } + +#ifdef CYGPKG_NET_BRIDGE_STP_CODE + p = LIST_FIRST(&sc->sc_spanlist); + while (p != NULL) { + total++; + p = LIST_NEXT(p, next); + } +#endif + + if (bifc->ifbic_len == 0) { + i = total; + goto done; + } + + p = LIST_FIRST(&sc->sc_iflist); + i = 0; + while (p != NULL && bifc->ifbic_len > i * sizeof(breq)) { + strncpy(breq.ifbr_name, sc->sc_if.if_xname, + sizeof(breq.ifbr_name)-1); + breq.ifbr_name[sizeof(breq.ifbr_name) - 1] = '\0'; + strncpy(breq.ifbr_ifsname, p->ifp->if_xname, + sizeof(breq.ifbr_ifsname)-1); + breq.ifbr_ifsname[sizeof(breq.ifbr_ifsname) - 1] = '\0'; + breq.ifbr_ifsflags = p->bif_flags; + breq.ifbr_state = p->bif_state; + breq.ifbr_priority = p->bif_priority; + breq.ifbr_path_cost = p->bif_path_cost; + breq.ifbr_portno = p->ifp->if_index & 0xff; + error = copyout((caddr_t)&breq, + (caddr_t)(bifc->ifbic_req + i), sizeof(breq)); + if (error) + goto done; + p = LIST_NEXT(p, next); + i++; + bifc->ifbic_len -= sizeof(breq); + } + +#ifdef CYGPKG_NET_BRIDGE_STP_CODE + p = LIST_FIRST(&sc->sc_spanlist); + j = 0; + while (p != NULL && bifc->ifbic_len > j * sizeof(breq)) { + strncpy(breq.ifbr_name, sc->sc_if.if_xname, + sizeof(breq.ifbr_name)-1); + breq.ifbr_name[sizeof(breq.ifbr_name) - 1] = '\0'; + strncpy(breq.ifbr_ifsname, p->ifp->if_xname, + sizeof(breq.ifbr_ifsname)-1); + breq.ifbr_ifsname[sizeof(breq.ifbr_ifsname) - 1] = '\0'; + breq.ifbr_ifsflags = p->bif_flags | IFBIF_SPAN; + breq.ifbr_state = p->bif_state; + breq.ifbr_priority = p->bif_priority; + breq.ifbr_path_cost = p->bif_path_cost; + breq.ifbr_portno = p->ifp->if_index & 0xff; + error = copyout((caddr_t)&breq, + (caddr_t)(bifc->ifbic_req + j), sizeof(breq)); + if (error) + goto done; + p = LIST_NEXT(p, next); + j++; + bifc->ifbic_len -= sizeof(breq); + } +#endif +done: + bifc->ifbic_len = i * sizeof(breq); + return (error); +} + +int +bridge_brlconf(sc, bc) + struct bridge_softc *sc; + struct ifbrlconf *bc; +{ + struct ifnet *ifp; + struct bridge_iflist *ifl; + struct brl_node *n; + struct ifbrlreq req; + int error = 0; + u_int32_t i, total=0; + + ifp = ifunit(bc->ifbrl_ifsname); + if (ifp == NULL) + return (ENOENT); + if (ifp->if_bridge == NULL || ifp->if_bridge != (caddr_t)sc) + return (ESRCH); + ifl = LIST_FIRST(&sc->sc_iflist); + while (ifl != NULL && ifl->ifp != ifp) + ifl = LIST_NEXT(ifl, next); + if (ifl == NULL) + return (ESRCH); + + n = SIMPLEQ_FIRST(&ifl->bif_brlin); + while (n != NULL) { + total++; + n = SIMPLEQ_NEXT(n, brl_next); + } + n = SIMPLEQ_FIRST(&ifl->bif_brlout); + while (n != NULL) { + total++; + n = SIMPLEQ_NEXT(n, brl_next); + } + + if (bc->ifbrl_len == 0) { + i = total; + goto done; + } + + i = 0; + n = SIMPLEQ_FIRST(&ifl->bif_brlin); + while (n != NULL && bc->ifbrl_len > i * sizeof(req)) { + strncpy(req.ifbr_name, sc->sc_if.if_xname, + sizeof(req.ifbr_name) - 1); + req.ifbr_name[sizeof(req.ifbr_name) - 1] = '\0'; + strncpy(req.ifbr_ifsname, ifl->ifp->if_xname, + sizeof(req.ifbr_ifsname) - 1); + req.ifbr_ifsname[sizeof(req.ifbr_ifsname) - 1] = '\0'; + req.ifbr_action = n->brl_action; + req.ifbr_flags = n->brl_flags; + req.ifbr_src = n->brl_src; + req.ifbr_dst = n->brl_dst; + error = copyout((caddr_t)&req, + (caddr_t)(bc->ifbrl_buf + (i * sizeof(req))), sizeof(req)); + if (error) + goto done; + n = SIMPLEQ_NEXT(n, brl_next); + i++; + bc->ifbrl_len -= sizeof(req); + } + + n = SIMPLEQ_FIRST(&ifl->bif_brlout); + while (n != NULL && bc->ifbrl_len > i * sizeof(req)) { + strncpy(req.ifbr_name, sc->sc_if.if_xname, + sizeof(req.ifbr_name) - 1); + req.ifbr_name[sizeof(req.ifbr_name) - 1] = '\0'; + strncpy(req.ifbr_ifsname, ifl->ifp->if_xname, + sizeof(req.ifbr_ifsname) - 1); + req.ifbr_ifsname[sizeof(req.ifbr_ifsname) - 1] = '\0'; + req.ifbr_action = n->brl_action; + req.ifbr_flags = n->brl_flags; + req.ifbr_src = n->brl_src; + req.ifbr_dst = n->brl_dst; + error = copyout((caddr_t)&req, + (caddr_t)(bc->ifbrl_buf + (i * sizeof(req))), sizeof(req)); + if (error) + goto done; + n = SIMPLEQ_NEXT(n, brl_next); + i++; + bc->ifbrl_len -= sizeof(req); + } + +done: + bc->ifbrl_len = i * sizeof(req); + return (error); +} + +void +bridge_init(sc) + struct bridge_softc *sc; +{ + struct ifnet *ifp = &sc->sc_if; + int i, s; + + if ((ifp->if_flags & IFF_RUNNING) == IFF_RUNNING) + return; + + s = splhigh(); + if (sc->sc_rts == NULL) { + sc->sc_rts = (struct bridge_rthead *)malloc( + BRIDGE_RTABLE_SIZE * (sizeof(struct bridge_rthead)), + M_DEVBUF, M_NOWAIT); + if (sc->sc_rts == NULL) { + splx(s); + return; + } + for (i = 0; i < BRIDGE_RTABLE_SIZE; i++) { + LIST_INIT(&sc->sc_rts[i]); + } + } + ifp->if_flags |= IFF_RUNNING; + splx(s); + + if (sc->sc_brttimeout != 0) + timeout(bridge_rtage, sc, sc->sc_brttimeout); +} + +/* + * Stop the bridge and deallocate the routing table. + */ +void +bridge_stop(sc) + struct bridge_softc *sc; +{ + struct ifnet *ifp = &sc->sc_if; + + /* + * If we're not running, there's nothing to do. + */ + if ((ifp->if_flags & IFF_RUNNING) == 0) + return; + + untimeout(bridge_rtage, sc); + + bridge_rtflush(sc, IFBF_FLUSHDYN); + + ifp->if_flags &= ~IFF_RUNNING; +} + +/* + * Send output from the bridge. The mbuf has the ethernet header + * already attached. We must enqueue or free the mbuf before exiting. + */ +int +bridge_output(ifp, m, sa, rt) + struct ifnet *ifp; + struct mbuf *m; + struct sockaddr *sa; + struct rtentry *rt; +{ + struct ether_header *eh; + struct ifnet *dst_if; + struct ether_addr *src, *dst; + struct bridge_softc *sc; + int s; + + if (m->m_len < sizeof(*eh)) { + m = m_pullup(m, sizeof(*eh)); + if (m == NULL) + return (0); + } + eh = mtod(m, struct ether_header *); + dst = (struct ether_addr *)&eh->ether_dhost[0]; + src = (struct ether_addr *)&eh->ether_shost[0]; + sc = (struct bridge_softc *)ifp->if_bridge; + + s = splimp(); + + /* + * If bridge is down, but original output interface is up, + * go ahead and send out that interface. Otherwise the packet + * is dropped below. + */ + if ((sc->sc_if.if_flags & IFF_RUNNING) == 0) { + dst_if = ifp; + goto sendunicast; + } + + /* + * If the packet is a broadcast or we don't know a better way to + * get there, send to all interfaces. + */ + dst_if = bridge_rtlookup(sc, dst); + if (dst_if == NULL || eh->ether_dhost[0] & 1) { + struct bridge_iflist *p; + struct mbuf *mc; + int used = 0; + +#ifdef CYGPKG_NET_BRIDGE_STP_CODE + bridge_span(sc, NULL, m); +#endif + + for (p = LIST_FIRST(&sc->sc_iflist); p != NULL; + p = LIST_NEXT(p, next)) { + if ((p->ifp->if_flags & IFF_RUNNING) == 0) + continue; + if (IF_QFULL(&p->ifp->if_snd)) { + sc->sc_if.if_oerrors++; + continue; + } + + if (LIST_NEXT(p, next) == NULL) { + used = 1; + mc = m; + } else { + mc = m_copym(m, 0, M_COPYALL, M_NOWAIT); + if (mc == NULL) { + sc->sc_if.if_oerrors++; + continue; + } + } + + sc->sc_if.if_opackets++; + sc->sc_if.if_obytes += m->m_pkthdr.len; + // Also count the bytes in the outgoing interface; normally + // done in if_ethersubr.c but here we bypass that route. + p->ifp->if_obytes += m->m_pkthdr.len; + IF_ENQUEUE(&p->ifp->if_snd, mc); + if ((p->ifp->if_flags & IFF_OACTIVE) == 0) + (*p->ifp->if_start)(p->ifp); + } + if (!used) + m_freem(m); + splx(s); + return (0); + } + +sendunicast: + if ((dst_if->if_flags & IFF_RUNNING) == 0) { + m_freem(m); + splx(s); + return (0); + } + if (IF_QFULL(&dst_if->if_snd)) { + sc->sc_if.if_oerrors++; + m_freem(m); + splx(s); + return (0); + } + sc->sc_if.if_opackets++; + sc->sc_if.if_obytes += m->m_pkthdr.len; + // Also count the bytes in the outgoing interface; normally + // done in if_ethersubr.c but here we bypass that route. + dst_if->if_obytes += m->m_pkthdr.len; + IF_ENQUEUE(&dst_if->if_snd, m); + if ((dst_if->if_flags & IFF_OACTIVE) == 0) + (*dst_if->if_start)(dst_if); + splx(s); + return (0); +} + +/* + * Start output on the bridge. This function should never be called. + */ +void +bridge_start(ifp) + struct ifnet *ifp; +{ +} + +void +bridgeintr(void) +{ + struct bridge_softc *sc; + struct mbuf *m; + int i, s; + + for (i = 0; i < CYGNUM_NET_BRIDGES; i++) { + sc = &bridgectl[i]; + for (;;) { + s = splimp(); + IF_DEQUEUE(&sc->sc_if.if_snd, m); + splx(s); + if (m == NULL) + break; + bridgeintr_frame(sc, m); + } + } +} + +/* + * Loop through each bridge interface and process their input queues. + */ +void +bridgeintr_frame(sc, m) + struct bridge_softc *sc; + struct mbuf *m; +{ + int s; + struct ifnet *src_if, *dst_if; + struct bridge_iflist *ifl; + struct ether_addr *dst, *src; + struct ether_header eh; + + if ((sc->sc_if.if_flags & IFF_RUNNING) == 0) { + m_freem(m); + return; + } + + src_if = m->m_pkthdr.rcvif; + + /* + * Pick out 802.1D packets. + * */ +#ifdef CYGPKG_NET_BRIDGE_STP_CODE +#ifdef __ECOS + if (m->m_flags & (M_BCAST | M_MCAST)) { + if (bcmp (mtod(m,struct ether_header *), bstp_etheraddr, ETHER_ADDR_LEN) == 0) { + m_copydata(m, 0, sizeof(struct ether_header), (caddr_t)&eh); + m_adj (m, sizeof(struct ether_header)); + m = bstp_input(sc, src_if, &eh, m); + if (m == NULL) + return; + } + } +#endif // __ECOS +#endif + +#if NBPFILTER > 0 + if (sc->sc_if.if_bpf) + bpf_mtap(sc->sc_if.if_bpf, m); +#endif + + sc->sc_if.if_lastchange = time; + sc->sc_if.if_ipackets++; + sc->sc_if.if_ibytes += m->m_pkthdr.len; + + ifl = LIST_FIRST(&sc->sc_iflist); + while (ifl != NULL && ifl->ifp != src_if) { + ifl = LIST_NEXT(ifl, next); + } + if (ifl == NULL) { + m_freem(m); + return; + } + +#ifdef CYGPKG_NET_BRIDGE_STP_CODE + if ((ifl->bif_flags & IFBIF_STP) && + (ifl->bif_state == BSTP_IFSTATE_BLOCKING || + ifl->bif_state == BSTP_IFSTATE_LISTENING || + ifl->bif_state == BSTP_IFSTATE_DISABLED)) { + m_freem(m); + return; + } +#endif + + if (m->m_pkthdr.len < sizeof(eh)) { + m_freem(m); + return; + } + m_copydata(m, 0, sizeof(struct ether_header), (caddr_t)&eh); + dst = (struct ether_addr *)&eh.ether_dhost[0]; + src = (struct ether_addr *)&eh.ether_shost[0]; + + /* + * If interface is learning, and if source address + * is not broadcast or multicast, record it's address. + */ + if ((ifl->bif_flags & IFBIF_LEARNING) && + (eh.ether_shost[0] & 1) == 0 && + !(eh.ether_shost[0] == 0 && + eh.ether_shost[1] == 0 && + eh.ether_shost[2] == 0 && + eh.ether_shost[3] == 0 && + eh.ether_shost[4] == 0 && + eh.ether_shost[5] == 0)) + bridge_rtupdate(sc, src, src_if, 0, IFBAF_DYNAMIC); + +#ifdef CYGPKG_NET_BRIDGE_STP_CODE + if ((ifl->bif_flags & IFBIF_STP) && + (ifl->bif_state == BSTP_IFSTATE_LEARNING)) { + m_freem(m); + return; + } +#endif + /* + * At this point, the port either does not participate in stp or + * it is in forwarding state. + */ + + /* + * If packet is unicast, destined for someone on "this" + * side of the bridge, drop it. + */ + if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) { + dst_if = bridge_rtlookup(sc, dst); + if (dst_if == src_if) { + m_freem(m); + return; + } + } else + dst_if = NULL; + + /* + * Multicast packets get handled a little differently: + * If interface is: + * -link0,-link1 (default) Forward all multicast + * as broadcast. + * -link0,link1 Drop non-IP multicast, forward + * as broadcast IP multicast. + * link0,-link1 Drop IP multicast, forward as + * broadcast non-IP multicast. + * link0,link1 Drop all multicast. + */ + if (m->m_flags & M_MCAST) { + if ((sc->sc_if.if_flags & + (IFF_LINK0 | IFF_LINK1)) == + (IFF_LINK0 | IFF_LINK1)) { + m_freem(m); + return; + } + if (sc->sc_if.if_flags & IFF_LINK0 && + ETHERADDR_IS_IP_MCAST(dst)) { + m_freem(m); + return; + } + if (sc->sc_if.if_flags & IFF_LINK1 && + !ETHERADDR_IS_IP_MCAST(dst)) { + m_freem(m); + return; + } + } + + if (ifl->bif_flags & IFBIF_BLOCKNONIP && bridge_blocknonip(&eh, m)) { + m_freem(m); + return; + } + + if (SIMPLEQ_FIRST(&ifl->bif_brlin) && + bridge_filterrule(SIMPLEQ_FIRST(&ifl->bif_brlin), &eh) == + BRL_ACTION_BLOCK) { + m_freem(m); + return; + } + +#if defined(INET) && (defined(IPFILTER) || defined(IPFILTER_LKM)) + m = bridge_filter(sc, src_if, &eh, m); + if (m == NULL) + return; +#endif + + /* + * If the packet is a multicast or broadcast OR if we don't + * know any better, forward it to all interfaces. + */ + if ((m->m_flags & (M_BCAST | M_MCAST)) || dst_if == NULL) { + sc->sc_if.if_imcasts++; + s = splimp(); + bridge_broadcast(sc, src_if, &eh, m); + splx(s); + return; + } + + /* + * At this point, we're dealing with a unicast frame going to a + * different interface + */ + if ((dst_if->if_flags & IFF_RUNNING) == 0) { + m_freem(m); + return; + } + ifl = LIST_FIRST(&sc->sc_iflist); + while (ifl != NULL && ifl->ifp != dst_if) + ifl = LIST_NEXT(ifl, next); +#ifdef CYGPKG_NET_BRIDGE_STP_CODE + if ((ifl->bif_flags & IFBIF_STP) && + (ifl->bif_state == BSTP_IFSTATE_DISABLED || + ifl->bif_state == BSTP_IFSTATE_BLOCKING)) { + m_freem(m); + return; + } +#endif + + if (SIMPLEQ_FIRST(&ifl->bif_brlout) && + bridge_filterrule(SIMPLEQ_FIRST(&ifl->bif_brlout), &eh) == + BRL_ACTION_BLOCK) { + m_freem(m); + return; + } + s = splimp(); + if (IF_QFULL(&dst_if->if_snd)) { + sc->sc_if.if_oerrors++; + m_freem(m); + splx(s); + return; + } + sc->sc_if.if_opackets++; + sc->sc_if.if_obytes += m->m_pkthdr.len; + // Also count the bytes in the outgoing interface; normally + // done in if_ethersubr.c but here we bypass that route. + dst_if->if_obytes += m->m_pkthdr.len; + IF_ENQUEUE(&dst_if->if_snd, m); + if ((dst_if->if_flags & IFF_OACTIVE) == 0) + (*dst_if->if_start)(dst_if); + splx(s); +} + +/* + * Receive input from an interface. Queue the packet for bridging if its + * not for us, and schedule an interrupt. + */ +struct mbuf * +bridge_input(ifp, eh, m) + struct ifnet *ifp; + struct ether_header *eh; + struct mbuf *m; +{ + struct bridge_softc *sc; + int s; + struct bridge_iflist *ifl; + struct arpcom *ac; + struct mbuf *mc; + + /* + * Make sure this interface is a bridge member. + */ + if (ifp == NULL || ifp->if_bridge == NULL || m == NULL) + return (m); + + if ((m->m_flags & M_PKTHDR) == 0) + panic("bridge_input(): no HDR"); + + sc = (struct bridge_softc *)ifp->if_bridge; + if ((sc->sc_if.if_flags & IFF_RUNNING) == 0) + return (m); + + LIST_FOREACH (ifl, &sc->sc_iflist, next) { + if (ifl->ifp == ifp) + break; + } + if (ifl == LIST_END (&sc->sc_iflist)) + return (m); + +#ifdef CYGPKG_NET_BRIDGE_STP_CODE + bridge_span(sc, eh, m); + /* + * Tap off 802.1D packets, they do not get forwarded + */ + if (m->m_flags & (M_BCAST | M_MCAST)) { + if (bcmp(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN) == 0) { +#ifdef __ECOS + M_PREPEND(m, sizeof (struct ether_header), M_DONTWAIT); + if (m == NULL) + return (NULL); + bcopy(eh, mtod(m, caddr_t), sizeof(struct ether_header)); + + s = splimp (); + if (IF_QFULL(&sc->sc_if.if_snd)) { + m_freem (m); + splx (s); + return (NULL); + } + IF_ENQUEUE(&sc->sc_if.if_snd, m); + splx (s); + schednetisr (NETISR_BRIDGE); + return (NULL); +#else + m = bstp_input(sc, ifp, eh, m); + if (m == NULL) + return (NULL); +#endif + } + } +#endif + + if (m->m_flags & (M_BCAST | M_MCAST)) { + /* + * make a copy of 'm' with 'eh' tacked on to the + * beginning. Return 'm' for local processing + * and enqueue the copy. Schedule netisr. + */ + mc = m_copym2(m, 0, M_COPYALL, M_NOWAIT); + if (mc == NULL) + return (m); + M_PREPEND(mc, sizeof(struct ether_header), M_DONTWAIT); + if (mc == NULL) + return (m); + bcopy(eh, mtod(mc, caddr_t), sizeof(struct ether_header)); + s = splimp(); + if (IF_QFULL(&sc->sc_if.if_snd)) { + m_freem(mc); + splx(s); + return (m); + } + IF_ENQUEUE(&sc->sc_if.if_snd, mc); + splx(s); + schednetisr(NETISR_BRIDGE); + return (m); + } + + /* + * No need to queue frames for ifs in blocking, disabled or listening state + */ +#ifdef CYGPKG_NET_BRIDGE_STP_CODE + if ((ifl->bif_flags & IFBIF_STP) && + ((ifl->bif_state == BSTP_IFSTATE_BLOCKING) || + (ifl->bif_state == BSTP_IFSTATE_LISTENING) || + (ifl->bif_state == BSTP_IFSTATE_DISABLED))) + return (m); +#endif + + /* + * Unicast, make sure it's not for us. + */ + for (ifl = LIST_FIRST(&sc->sc_iflist);ifl; ifl = LIST_NEXT(ifl,next)) { + if (ifl->ifp->if_type != IFT_ETHER) + continue; + ac = (struct arpcom *)ifl->ifp; + if (bcmp(ac->ac_enaddr, eh->ether_dhost, ETHER_ADDR_LEN) == 0) { + if (ifl->bif_flags & IFBIF_LEARNING) + bridge_rtupdate(sc, + (struct ether_addr *)&eh->ether_shost, + ifp, 0, IFBAF_DYNAMIC); + m->m_pkthdr.rcvif = ifl->ifp; + return (m); + } + if (bcmp(ac->ac_enaddr, eh->ether_shost, ETHER_ADDR_LEN) == 0) { + m_freem(m); + return (NULL); + } + } + M_PREPEND(m, sizeof(struct ether_header), M_DONTWAIT); + if (m == NULL) + return (NULL); + bcopy(eh, mtod(m, caddr_t), sizeof(struct ether_header)); + s = splimp(); + if (IF_QFULL(&sc->sc_if.if_snd)) { + m_freem(m); + splx(s); + return (NULL); + } + IF_ENQUEUE(&sc->sc_if.if_snd, m); + splx(s); + schednetisr(NETISR_BRIDGE); + return (NULL); +} + +/* + * Send a frame to all interfaces that are members of the bridge + * (except the one it came in on). This code assumes that it is + * running at splnet or higher. + */ +void +bridge_broadcast(sc, ifp, eh, m) + struct bridge_softc *sc; + struct ifnet *ifp; + struct ether_header *eh; + struct mbuf *m; +{ + struct bridge_iflist *p; + struct mbuf *mc; + int used = 0; + + for (p = LIST_FIRST(&sc->sc_iflist); p; p = LIST_NEXT(p, next)) { + /* + * Don't retransmit out of the same interface where + * the packet was received from. + */ + if (p->ifp->if_index == ifp->if_index) + continue; + +#ifdef CYGPKG_NET_BRIDGE_STP_CODE + if ((p->bif_flags & IFBIF_STP) && + (p->bif_state != BSTP_IFSTATE_FORWARDING)) + continue; +#endif + + if ((p->bif_flags & IFBIF_DISCOVER) == 0 && + (m->m_flags & (M_BCAST | M_MCAST)) == 0) + continue; + + if ((p->ifp->if_flags & IFF_RUNNING) == 0) + continue; + + if (IF_QFULL(&p->ifp->if_snd)) { + sc->sc_if.if_oerrors++; + continue; + } + + if (SIMPLEQ_FIRST(&p->bif_brlout) && + bridge_filterrule(SIMPLEQ_FIRST(&p->bif_brlout), eh) == + BRL_ACTION_BLOCK) + continue; + + /* If last one, reuse the passed-in mbuf */ + if (LIST_NEXT(p, next) == NULL) { + mc = m; + used = 1; + } else { + mc = m_copym(m, 0, M_COPYALL, M_DONTWAIT); + if (mc == NULL) { + sc->sc_if.if_oerrors++; + continue; + } + } + + if (p->bif_flags & IFBIF_BLOCKNONIP && + bridge_blocknonip(eh, mc)) { + m_freem(mc); + continue; + } + + sc->sc_if.if_opackets++; + sc->sc_if.if_obytes += mc->m_pkthdr.len; + if (ifp && ((eh->ether_shost[0] & 1) == 0) ) + ifp->if_omcasts++; + // Also count the bytes in the outgoing interface; normally + // done in if_ethersubr.c but here we bypass that route. + p->ifp->if_obytes += m->m_pkthdr.len; + IF_ENQUEUE(&p->ifp->if_snd, mc); + if ((p->ifp->if_flags & IFF_OACTIVE) == 0) + (*p->ifp->if_start)(p->ifp); + } + + if (!used) + m_freem(m); +} + +#ifdef CYGPKG_NET_BRIDGE_STP_CODE +void +bridge_span(sc, eh, morig) + struct bridge_softc *sc; + struct ether_header *eh; + struct mbuf *morig; +{ + struct bridge_iflist *p; + struct ifnet *ifp; + struct mbuf *mc, *m; + int error; + + if (LIST_EMPTY(&sc->sc_spanlist)) + return; + + m = m_copym2(morig, 0, M_COPYALL, M_NOWAIT); + if (m == NULL) + return; + if (eh != NULL) { + M_PREPEND(m, sizeof(struct ether_header), M_DONTWAIT); + if (m == NULL) + return; + bcopy(eh, mtod(m, caddr_t), sizeof(struct ether_header)); + } + + LIST_FOREACH(p, &sc->sc_spanlist, next) { + ifp = p->ifp; + + if ((ifp->if_flags & IFF_RUNNING) == 0) + continue; + +#ifdef ALTQ + if (ALTQ_IS_ENABLED(&ifp->if_snd) == 0) +#endif + if (IF_QFULL(&ifp->if_snd)) { + IF_DROP(&ifp->if_snd); + sc->sc_if.if_oerrors++; + continue; + } + + mc = m_copym(m, 0, M_COPYALL, M_DONTWAIT); + if (mc == NULL) { + sc->sc_if.if_oerrors++; + continue; + } + + error = bridge_ifenqueue(sc, ifp, m); + if (error) + continue; + } + m_freem(m); +} +#endif + +struct ifnet * +bridge_rtupdate(sc, ea, ifp, setflags, flags) + struct bridge_softc *sc; + struct ether_addr *ea; + struct ifnet *ifp; + int setflags; + u_int8_t flags; +{ + struct bridge_rtnode *p, *q; + u_int32_t h; + int s, dir; + + s = splhigh(); + if (sc->sc_rts == NULL) { + if (setflags && flags == IFBAF_STATIC) { + sc->sc_rts = (struct bridge_rthead *)malloc( + BRIDGE_RTABLE_SIZE * + (sizeof(struct bridge_rthead)),M_DEVBUF,M_NOWAIT); + + if (sc->sc_rts == NULL) + goto done; + + for (h = 0; h < BRIDGE_RTABLE_SIZE; h++) + LIST_INIT(&sc->sc_rts[h]); + } else + goto done; + } + + h = bridge_hash(ea); + p = LIST_FIRST(&sc->sc_rts[h]); + if (p == NULL) { + if (sc->sc_brtcnt >= sc->sc_brtmax) + goto done; + p = (struct bridge_rtnode *)malloc( + sizeof(struct bridge_rtnode), M_DEVBUF, M_NOWAIT); + if (p == NULL) + goto done; + + bcopy(ea, &p->brt_addr, sizeof(p->brt_addr)); + p->brt_if = ifp; + p->brt_age = 1; + + if (setflags) + p->brt_flags = flags; + else + p->brt_flags = IFBAF_DYNAMIC; + + LIST_INSERT_HEAD(&sc->sc_rts[h], p, brt_next); + sc->sc_brtcnt++; + goto want; + } + + do { + q = p; + p = LIST_NEXT(p, brt_next); + + dir = memcmp(ea, &q->brt_addr, sizeof(q->brt_addr)); + if (dir == 0) { + if (setflags) { + q->brt_if = ifp; + q->brt_flags = flags; + } + + if (q->brt_if == ifp) + q->brt_age = 1; + ifp = q->brt_if; + goto want; + } + + if (dir > 0) { + if (sc->sc_brtcnt >= sc->sc_brtmax) + goto done; + p = (struct bridge_rtnode *)malloc( + sizeof(struct bridge_rtnode), M_DEVBUF, M_NOWAIT); + if (p == NULL) + goto done; + + bcopy(ea, &p->brt_addr, sizeof(p->brt_addr)); + p->brt_if = ifp; + p->brt_age = 1; + + if (setflags) + p->brt_flags = flags; + else + p->brt_flags = IFBAF_DYNAMIC; + + LIST_INSERT_BEFORE(q, p, brt_next); + sc->sc_brtcnt++; + goto want; + } + + if (p == NULL) { + if (sc->sc_brtcnt >= sc->sc_brtmax) + goto done; + p = (struct bridge_rtnode *)malloc( + sizeof(struct bridge_rtnode), M_DEVBUF, M_NOWAIT); + if (p == NULL) + goto done; + + bcopy(ea, &p->brt_addr, sizeof(p->brt_addr)); + p->brt_if = ifp; + p->brt_age = 1; + + if (setflags) + p->brt_flags = flags; + else + p->brt_flags = IFBAF_DYNAMIC; + LIST_INSERT_AFTER(q, p, brt_next); + sc->sc_brtcnt++; + goto want; + } + } while (p != NULL); + +done: + ifp = NULL; +want: + splx(s); + return (ifp); +} + +struct ifnet * +bridge_rtlookup(sc, ea) + struct bridge_softc *sc; + struct ether_addr *ea; +{ + struct bridge_rtnode *p; + u_int32_t h; + int s, dir; + + /* + * Lock out everything else + */ + s = splhigh(); + + if (sc->sc_rts == NULL) + goto fail; + + h = bridge_hash(ea); + p = LIST_FIRST(&sc->sc_rts[h]); + while (p != NULL) { + dir = memcmp(ea, &p->brt_addr, sizeof(p->brt_addr)); + if (dir == 0) { + splx(s); + return (p->brt_if); + } + if (dir > 0) + goto fail; + p = LIST_NEXT(p, brt_next); + } +fail: + splx(s); + return (NULL); +} + +/* + * The following hash function is adapted from 'Hash Functions' by Bob Jenkins + * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). + * "You may use this code any way you wish, private, educational, or + * commercial. It's free." + */ +#define mix(a,b,c) \ + do { \ + a -= b; a -= c; a ^= (c >> 13); \ + b -= c; b -= a; b ^= (a << 8); \ + c -= a; c -= b; c ^= (b >> 13); \ + a -= b; a -= c; a ^= (c >> 12); \ + b -= c; b -= a; b ^= (a << 16); \ + c -= a; c -= b; c ^= (b >> 5); \ + a -= b; a -= c; a ^= (c >> 3); \ + b -= c; b -= a; b ^= (a << 10); \ + c -= a; c -= b; c ^= (b >> 15); \ + } while(0) + +u_int32_t +bridge_hash(addr) + struct ether_addr *addr; +{ + u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0xdeadbeef; + + b += addr->ether_addr_octet[5] << 8; + b += addr->ether_addr_octet[4]; + a += addr->ether_addr_octet[3] << 24; + a += addr->ether_addr_octet[2] << 16; + a += addr->ether_addr_octet[1] << 8; + a += addr->ether_addr_octet[0]; + + mix(a, b, c); + return (c & BRIDGE_RTABLE_MASK); +} + +/* + * Trim the routing table so that we've got a number of routes + * less than or equal to the maximum. + */ +void +bridge_rttrim(sc) + struct bridge_softc *sc; +{ + struct bridge_rtnode *n, *p; + int s, i; + + s = splhigh(); + if (sc->sc_rts == NULL) + goto done; + + /* + * Make sure we have to trim the address table + */ + if (sc->sc_brtcnt <= sc->sc_brtmax) + goto done; + + /* + * Force an aging cycle, this might trim enough addresses. + */ + splx(s); + bridge_rtage(sc); + s = splhigh(); + + if (sc->sc_brtcnt <= sc->sc_brtmax) + goto done; + + for (i = 0; i < BRIDGE_RTABLE_SIZE; i++) { + n = LIST_FIRST(&sc->sc_rts[i]); + while (n != NULL) { + p = LIST_NEXT(n, brt_next); + if ((n->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { + LIST_REMOVE(n, brt_next); + sc->sc_brtcnt--; + free(n, M_DEVBUF); + n = p; + if (sc->sc_brtcnt <= sc->sc_brtmax) + goto done; + } + } + } + +done: + if (sc->sc_rts != NULL && sc->sc_brtcnt == 0 && + (sc->sc_if.if_flags & IFF_UP) == 0) { + free(sc->sc_rts, M_DEVBUF); + sc->sc_rts = NULL; + } + + splx(s); +} + +/* + * Perform an aging cycle + */ +void +bridge_rtage(vsc) + void *vsc; +{ + struct bridge_softc *sc = (struct bridge_softc *)vsc; + struct bridge_rtnode *n, *p; + int s, i; + + s = splhigh(); + if (sc->sc_rts == NULL) { + splx(s); + return; + } + + for (i = 0; i < BRIDGE_RTABLE_SIZE; i++) { + n = LIST_FIRST(&sc->sc_rts[i]); + while (n != NULL) { + if ((n->brt_flags & IFBAF_TYPEMASK) == IFBAF_STATIC) { + n->brt_age = !n->brt_age; + if (n->brt_age) + n->brt_age = 0; + n = LIST_NEXT(n, brt_next); + } else if (n->brt_age) { + n->brt_age = 0; + n = LIST_NEXT(n, brt_next); + } else { + p = LIST_NEXT(n, brt_next); + LIST_REMOVE(n, brt_next); + sc->sc_brtcnt--; + free(n, M_DEVBUF); + n = p; + } + } + } + splx(s); + + if (sc->sc_brttimeout != 0) + timeout(bridge_rtage, sc, sc->sc_brttimeout); +} + +/* + * Remove all dynamic addresses from the cache + */ +int +bridge_rtflush(sc, full) + struct bridge_softc *sc; + int full; +{ + int s, i; + struct bridge_rtnode *p, *n; + + s = splhigh(); + if (sc->sc_rts == NULL) + goto done; + + for (i = 0; i < BRIDGE_RTABLE_SIZE; i++) { + n = LIST_FIRST(&sc->sc_rts[i]); + while (n != NULL) { + if (full || + (n->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { + p = LIST_NEXT(n, brt_next); + LIST_REMOVE(n, brt_next); + sc->sc_brtcnt--; + free(n, M_DEVBUF); + n = p; + } else + n = LIST_NEXT(n, brt_next); + } + } + + if (sc->sc_brtcnt == 0 && (sc->sc_if.if_flags & IFF_UP) == 0) { + free(sc->sc_rts, M_DEVBUF); + sc->sc_rts = NULL; + } + +done: + splx(s); + return (0); +} + +/* + * Remove an address from the cache + */ +int +bridge_rtdaddr(sc, ea) + struct bridge_softc *sc; + struct ether_addr *ea; +{ + int h, s; + struct bridge_rtnode *p; + + s = splhigh(); + if (sc->sc_rts == NULL) + goto done; + + h = bridge_hash(ea); + p = LIST_FIRST(&sc->sc_rts[h]); + while (p != NULL) { + if (bcmp(ea, &p->brt_addr, sizeof(p->brt_addr)) == 0) { + LIST_REMOVE(p, brt_next); + sc->sc_brtcnt--; + free(p, M_DEVBUF); + if (sc->sc_brtcnt == 0 && + (sc->sc_if.if_flags & IFF_UP) == 0) { + free(sc->sc_rts, M_DEVBUF); + sc->sc_rts = NULL; + } + splx(s); + return (0); + } + p = LIST_NEXT(p, brt_next); + } + +done: + splx(s); + return (ENOENT); +} +/* + * Delete routes to a specific interface member. + */ +void +bridge_rtdelete(sc, ifp, dynonly) + struct bridge_softc *sc; + struct ifnet *ifp; +{ + int i, s; + struct bridge_rtnode *n, *p; + + s = splhigh(); + if (sc->sc_rts == NULL) + goto done; + + /* + * Loop through all of the hash buckets and traverse each + * chain looking for routes to this interface. + */ + for (i = 0; i < BRIDGE_RTABLE_SIZE; i++) { + n = LIST_FIRST(&sc->sc_rts[i]); + while (n != NULL) { + if (n->brt_if == ifp) { /* found one */ + p = LIST_NEXT(n, brt_next); + LIST_REMOVE(n, brt_next); + sc->sc_brtcnt--; + free(n, M_DEVBUF); + n = p; + } else + n = LIST_NEXT(n, brt_next); + } + } + if (sc->sc_brtcnt == 0 && (sc->sc_if.if_flags & IFF_UP) == 0) { + free(sc->sc_rts, M_DEVBUF); + sc->sc_rts = NULL; + } + +done: + splx(s); +} + +/* + * Gather all of the routes for this interface. + */ +int +bridge_rtfind(sc, baconf) + struct bridge_softc *sc; + struct ifbaconf *baconf; +{ + int i, s, error = 0; + u_int32_t cnt = 0; + struct bridge_rtnode *n; + struct ifbareq bareq; + + s = splhigh(); + + if (sc->sc_rts == NULL || baconf->ifbac_len == 0) + goto done; + + for (i = 0, cnt = 0; i < BRIDGE_RTABLE_SIZE; i++) { + n = LIST_FIRST(&sc->sc_rts[i]); + while (n != NULL) { + if (baconf->ifbac_len < + (cnt + 1) * sizeof(struct ifbareq)) + goto done; + bcopy(sc->sc_if.if_xname, bareq.ifba_name, + sizeof(bareq.ifba_name)); + bcopy(n->brt_if->if_xname, bareq.ifba_ifsname, + sizeof(bareq.ifba_ifsname)); + bcopy(&n->brt_addr, &bareq.ifba_dst, + sizeof(bareq.ifba_dst)); + bareq.ifba_age = n->brt_age; + bareq.ifba_flags = n->brt_flags; + error = copyout((caddr_t)&bareq, + (caddr_t)(baconf->ifbac_req + cnt), sizeof(bareq)); + if (error) + goto done; + n = LIST_NEXT(n, brt_next); + cnt++; + } + } +done: + baconf->ifbac_len = cnt * sizeof(struct ifbareq); + splx(s); + return (error); +} + +/* + * Block non-ip frames: + * Returns 0 if frame is ip, and 1 if it should be dropped. + */ +int +bridge_blocknonip(eh, m) + struct ether_header *eh; + struct mbuf *m; +{ + struct snap snap; + u_int16_t etype; + + if (m->m_pkthdr.len < sizeof(struct ether_header)) + return (1); + + etype = ntohs(eh->ether_type); + switch (etype) { + case ETHERTYPE_ARP: + case ETHERTYPE_REVARP: + case ETHERTYPE_IP: + case ETHERTYPE_IPV6: + return (0); + } + + if (etype > ETHERMTU) + return (1); + + if (m->m_pkthdr.len < + (sizeof(struct ether_header) + sizeof(struct snap))) + return (1); + + m_copydata(m, sizeof(struct ether_header), sizeof(struct snap), + (caddr_t)&snap); + + etype = ntohs(snap.type); + if (snap.dsap == LLC_SNAP_LSAP && snap.ssap == LLC_SNAP_LSAP && + snap.control == LLC_UI && + snap.org[0] == 0 && snap.org[1] == 0 && snap.org[2] == 0 && + (etype == ETHERTYPE_ARP || + etype == ETHERTYPE_REVARP || + etype == ETHERTYPE_IP || + etype == ETHERTYPE_IPV6)) { + return (0); + } + + return (1); +} + +u_int8_t +bridge_filterrule(n, eh) + struct brl_node *n; + struct ether_header *eh; +{ + u_int8_t flags; + + for (; n != NULL; n = SIMPLEQ_NEXT(n, brl_next)) { + flags = n->brl_flags & (BRL_FLAG_SRCVALID|BRL_FLAG_DSTVALID); + if (flags == 0) + return (n->brl_action); + if (flags == (BRL_FLAG_SRCVALID|BRL_FLAG_DSTVALID)) { + if (bcmp(eh->ether_shost, &n->brl_src, ETHER_ADDR_LEN)) + continue; + if (bcmp(eh->ether_dhost, &n->brl_src, ETHER_ADDR_LEN)) + continue; + return (n->brl_action); + } + if (flags == BRL_FLAG_SRCVALID) { + if (bcmp(eh->ether_shost, &n->brl_src, ETHER_ADDR_LEN)) + continue; + return (n->brl_action); + } + if (flags == BRL_FLAG_DSTVALID) { + if (bcmp(eh->ether_dhost, &n->brl_dst, ETHER_ADDR_LEN)) + continue; + return (n->brl_action); + } + } + return (BRL_ACTION_PASS); +} + +int +bridge_addrule(bif, req, out) + struct bridge_iflist *bif; + struct ifbrlreq *req; + int out; +{ + struct brl_node *n; + + n = (struct brl_node *)malloc(sizeof(struct brl_node), M_DEVBUF, M_NOWAIT); + if (n == NULL) + return (ENOMEM); + bcopy(&req->ifbr_src, &n->brl_src, sizeof(struct ether_addr)); + bcopy(&req->ifbr_dst, &n->brl_dst, sizeof(struct ether_addr)); + n->brl_action = req->ifbr_action; + n->brl_flags = req->ifbr_flags; + if (out) { + n->brl_flags &= ~BRL_FLAG_IN; + n->brl_flags |= BRL_FLAG_OUT; + SIMPLEQ_INSERT_TAIL(&bif->bif_brlout, n, brl_next); + } else { + n->brl_flags &= ~BRL_FLAG_OUT; + n->brl_flags |= BRL_FLAG_IN; + SIMPLEQ_INSERT_TAIL(&bif->bif_brlin, n, brl_next); + } + return (0); +} + +int +bridge_flushrule(bif) + struct bridge_iflist *bif; +{ + struct brl_node *p, *q; + + p = SIMPLEQ_FIRST(&bif->bif_brlin); + while (p != NULL) { + q = SIMPLEQ_NEXT(p, brl_next); + SIMPLEQ_REMOVE_HEAD(&bif->bif_brlin, p, brl_next); + free(p, M_DEVBUF); + p = q; + } + p = SIMPLEQ_FIRST(&bif->bif_brlout); + while (p != NULL) { + q = SIMPLEQ_NEXT(p, brl_next); + SIMPLEQ_REMOVE_HEAD(&bif->bif_brlout, p, brl_next); + free(p, M_DEVBUF); + p = q; + } + return (0); +} + +#if defined(INET) && (defined(IPFILTER) || defined(IPFILTER_LKM)) + +/* + * Maximum sized IP header + */ +union maxip { + struct ip ip; + u_int32_t _padding[16]; +}; + +/* + * Filter IP packets by peeking into the ethernet frame. This violates + * the ISO model, but allows us to act as a IP filter at the data link + * layer. As a result, most of this code will look familiar to those + * who've read net/if_ethersubr.c and netinet/ip_input.c + */ +struct mbuf * +bridge_filter(sc, ifp, eh, m) + struct bridge_softc *sc; + struct ifnet *ifp; + struct ether_header *eh; + struct mbuf *m; +{ + struct snap snap; + int hassnap = 0; + struct ip *ip; + int hlen; + + if (fr_checkp == NULL) + return (m); + + if (eh->ether_type != htons(ETHERTYPE_IP)) { + if (eh->ether_type > ETHERMTU || + m->m_pkthdr.len < (sizeof(struct snap) + + sizeof(struct ether_header))) + return (m); + + m_copydata(m, sizeof(struct ether_header), + sizeof(struct snap), (caddr_t)&snap); + + if (snap.dsap != LLC_SNAP_LSAP || snap.ssap != LLC_SNAP_LSAP || + snap.control != LLC_UI || + snap.org[0] != 0 || snap.org[1] != 0 || snap.org[2] || + snap.type != htons(ETHERTYPE_IP)) + return (m); + hassnap = 1; + } + + m_adj(m, sizeof(struct ether_header)); + if (hassnap) + m_adj(m, sizeof(struct snap)); + + if (m->m_pkthdr.len < sizeof(struct ip)) + goto dropit; + + /* Copy minimal header, and drop invalids */ + if (m->m_len < sizeof(struct ip) && + (m = m_pullup(m, sizeof(struct ip))) == NULL) + return (NULL); + ip = mtod(m, struct ip *); + + if (ip->ip_v != IPVERSION) + goto dropit; + + hlen = ip->ip_hl << 2; /* get whole header length */ + if (hlen < sizeof(struct ip)) + goto dropit; + if (hlen > m->m_len) { + if ((m = m_pullup(m, sizeof(struct ip))) == NULL) + return (NULL); + ip = mtod(m, struct ip *); + } + + if ((ip->ip_sum = in_cksum(m, hlen)) != 0) + goto dropit; + + NTOHS(ip->ip_len); + if (ip->ip_len < hlen) + goto dropit; + NTOHS(ip->ip_id); + NTOHS(ip->ip_off); + + if (m->m_pkthdr.len < ip->ip_len) + goto dropit; + if (m->m_pkthdr.len > ip->ip_len) { + if (m->m_len == m->m_pkthdr.len) { + m->m_len = ip->ip_len; + m->m_pkthdr.len = ip->ip_len; + } else + m_adj(m, ip->ip_len - m->m_pkthdr.len); + } + + /* Finally, we get to filter the packet! */ + if (fr_checkp && (*fr_checkp)(ip, hlen, ifp, 0, &m)) + return (NULL); + + /* Rebuild the IP header */ + if (m->m_len < hlen && ((m = m_pullup(m, hlen)) == NULL)) + return (NULL); + if (m->m_len < sizeof(struct ip)) + goto dropit; + ip = mtod(m, struct ip *); + HTONS(ip->ip_len); + HTONS(ip->ip_id); + HTONS(ip->ip_off); + ip->ip_sum = in_cksum(m, hlen); + + /* Reattach SNAP header */ + if (hassnap) { + M_PREPEND(m, sizeof(snap), M_DONTWAIT); + if (m == NULL) + goto dropit; + bcopy(&snap, mtod(m, caddr_t), sizeof(snap)); + } + + /* Reattach ethernet header */ + M_PREPEND(m, sizeof(*eh), M_DONTWAIT); + if (m == NULL) + goto dropit; + bcopy(eh, mtod(m, caddr_t), sizeof(*eh)); + + return (m); + +dropit: + if (m != NULL) + m_freem(m); + return (NULL); +} +#endif + +int +ifpromisc(ifp, pswitch) + struct ifnet *ifp; + int pswitch; +{ + struct ifreq ifr; + + if (pswitch) { + /* + * If the device is not configured up, we cannot put it in + * promiscuous mode. + */ + if ((ifp->if_flags & IFF_UP) == 0) + return (ENETDOWN); + if (ifp->if_pcount++ != 0) + return (0); + ifp->if_flags |= IFF_PROMISC; + } else { + if (--ifp->if_pcount > 0) + return (0); + ifp->if_flags &= ~IFF_PROMISC; + /* + * If the device is not configured up, we should not need to + * turn off promiscuous mode (device should have turned it + * off when interface went down; and will look at IFF_PROMISC + * again next time interface comes up). + */ + if ((ifp->if_flags & IFF_UP) == 0) + return (0); + } + ifr.ifr_flags = ifp->if_flags; + return ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr)); +} + +int +bridge_ifenqueue(sc, ifp, m) + struct bridge_softc *sc; + struct ifnet *ifp; + struct mbuf *m; +{ + int error, len; + short mflags; + + len = m->m_pkthdr.len; + mflags = m->m_flags; + IFQ_ENQUEUE(&ifp->if_snd, m, NULL, error); + if (error) { + sc->sc_if.if_oerrors++; + return (error); + } + sc->sc_if.if_opackets++; + sc->sc_if.if_obytes += len; + ifp->if_obytes += len; + if (mflags & M_MCAST) + ifp->if_omcasts++; + if ((ifp->if_flags & IFF_OACTIVE) == 0) + (*ifp->if_start)(ifp); + return (0); +} diff --git a/ecos/packages/net/tcpip/current/src/sys/net/if_ethersubr.c b/ecos/packages/net/tcpip/current/src/sys/net/if_ethersubr.c new file mode 100644 index 0000000..d1be3bd --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/net/if_ethersubr.c @@ -0,0 +1,1179 @@ +//========================================================================== +// +// sys/net/if_ethersubr.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: if_ethersubr.c,v 1.32 1999/12/08 06:50:17 itojun Exp $ */ +/* $NetBSD: if_ethersubr.c,v 1.19 1996/05/07 02:40:30 thorpej Exp $ */ + +/* + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright (c) 1982, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)if_ethersubr.c 8.1 (Berkeley) 6/10/93 + */ + +/* +%%% portions-copyright-nrl-95 +Portions of this software are Copyright 1995-1998 by Randall Atkinson, +Ronald Lee, Daniel McDonald, Bao Phan, and Chris Winters. All Rights +Reserved. All rights under this copyright have been assigned to the US +Naval Research Laboratory (NRL). The NRL Copyright Notice and License +Agreement Version 1.1 (January 17, 1995) applies to these portions of the +software. +You should have received a copy of the license with this software. If you +didn't get a copy, you may request one from <license@ipv6.nrl.navy.mil>. +*/ + +#include <sys/param.h> +#ifndef __ECOS +#include <sys/systm.h> +#endif +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/protosw.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <sys/errno.h> +#ifndef __ECOS +#include <sys/syslog.h> +#endif + +#include <machine/cpu.h> + +#include <net/if.h> +#include <net/netisr.h> +#include <net/route.h> +#include <net/if_llc.h> +#include <net/if_dl.h> +#include <net/if_types.h> + +#include <netinet/in.h> +#ifdef INET +#include <netinet/in_var.h> +#endif +#include <netinet/if_ether.h> + +#ifndef __ECOS +#include "bridge.h" +#endif +#if NBRIDGE > 0 +#include <net/if_bridge.h> +#endif + +#ifdef INET6 +#ifndef INET +#include <netinet/in.h> +#endif +#include <netinet6/in6_var.h> +#include <netinet6/nd6.h> +#endif + +#ifdef NS +#include <netns/ns.h> +#include <netns/ns_if.h> +#endif + +#ifdef IPX +#include <netipx/ipx.h> +#include <netipx/ipx_if.h> +#endif + +#ifdef ISO +#include <netiso/argo_debug.h> +#include <netiso/iso.h> +#include <netiso/iso_var.h> +#include <netiso/iso_snpac.h> +#endif + +#ifdef CCITT +#include <netccitt/x25.h> +#include <netccitt/pk.h> +#include <netccitt/pk_extern.h> +#include <netccitt/dll.h> +#include <netccitt/llc_var.h> +#endif + +#ifdef NETATALK +#include <netatalk/at.h> +#include <netatalk/at_var.h> +#include <netatalk/at_extern.h> + +#define llc_snap_org_code llc_un.type_snap.org_code +#define llc_snap_ether_type llc_un.type_snap.ether_type + +extern u_char at_org_code[ 3 ]; +extern u_char aarp_org_code[ 3 ]; +#endif /* NETATALK */ + +#if defined(CCITT) +#include <sys/socketvar.h> +#endif + +#if 0 /*NRL INET6*/ +#include <netinet6/in6.h> +#include <netinet6/in6_var.h> +#endif /* INET6 */ + +u_char etherbroadcastaddr[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +#define senderr(e) { error = (e); goto bad;} + + +int +ether_ioctl(ifp, arp, cmd, data) + register struct ifnet *ifp; + struct arpcom *arp; + u_long cmd; + caddr_t data; +{ + struct ifaddr *ifa = (struct ifaddr *)data; + int error = 0; + + switch (cmd) { + +#if defined(CCITT) + case SIOCSIFCONF_X25: + ifp->if_flags |= IFF_UP; + ifa->ifa_rtrequest = cons_rtrequest; + error = x25_llcglue(PRC_IFUP, ifa->ifa_addr); + break; +#endif /* CCITT */ + case SIOCSIFADDR: + switch (ifa->ifa_addr->sa_family) { +#ifdef IPX + case AF_IPX: + { + struct ipx_addr *ina = &IA_SIPX(ifa)->sipx_addr; + + if (ipx_nullhost(*ina)) + ina->ipx_host = + *(union ipx_host *)(arp->ac_enaddr); + else + bcopy(ina->ipx_host.c_host, + arp->ac_enaddr, sizeof(arp->ac_enaddr)); + break; + } +#endif /* IPX */ +#ifdef NETATALK + case AF_APPLETALK: + /* Nothing to do. */ + break; +#endif /* NETATALK */ +#ifdef NS + /* XXX - This code is probably wrong. */ + case AF_NS: + { + struct ns_addr *ina = &IA_SNS(ifa)->sns_addr; + + if (ns_nullhost(*ina)) + ina->x_host = + *(union ns_host *)(arp->ac_enaddr); + else + bcopy(ina->x_host.c_host, + arp->ac_enaddr, sizeof(arp->ac_enaddr)); + break; + } +#endif /* NS */ + } + break; + default: + break; + } + + return error; +} + +/* + * Ethernet output routine. + * Encapsulate a packet of type family for the local net. + * Assumes that ifp is actually pointer to arpcom structure. + */ +int +ether_output(ifp, m0, dst, rt0) + register struct ifnet *ifp; + struct mbuf *m0; + struct sockaddr *dst; + struct rtentry *rt0; +{ + u_int16_t etype; + int s, error = 0; + u_char edst[6]; + register struct mbuf *m = m0; + register struct rtentry *rt; + struct mbuf *mcopy = (struct mbuf *)0; + register struct ether_header *eh; + struct arpcom *ac = (struct arpcom *)ifp; + + if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) + senderr(ENETDOWN); + ifp->if_lastchange = time; + if ((rt = rt0) != NULL) { + if ((rt->rt_flags & RTF_UP) == 0) { + if ((rt0 = rt = rtalloc1(dst, 1)) != NULL) + rt->rt_refcnt--; + else + senderr(EHOSTUNREACH); + } + if (rt->rt_flags & RTF_GATEWAY) { + if (rt->rt_gwroute == 0) + goto lookup; + if (((rt = rt->rt_gwroute)->rt_flags & RTF_UP) == 0) { + rtfree(rt); rt = rt0; + lookup: rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1); + if ((rt = rt->rt_gwroute) == 0) + senderr(EHOSTUNREACH); + } + } + if (rt->rt_flags & RTF_REJECT) + if (rt->rt_rmx.rmx_expire == 0 || + time.tv_sec < rt->rt_rmx.rmx_expire) + senderr(rt == rt0 ? EHOSTDOWN : EHOSTUNREACH); + } + switch (dst->sa_family) { + +#ifdef INET + case AF_INET: + if (!arpresolve(ac, rt, m, dst, edst)) + return (0); /* if not yet resolved */ + /* If broadcasting on a simplex interface, loopback a copy */ + if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX)) + mcopy = m_copy(m, 0, (int)M_COPYALL); + etype = htons(ETHERTYPE_IP); + break; +#endif +#ifdef INET6 + case AF_INET6: +#ifndef OLDIP6OUTPUT + if (!nd6_storelladdr(ifp, rt, m, dst, (u_char *)edst)) + return(0); /* it must be impossible, but... */ +#else + if (!nd6_resolve(ifp, rt, m, dst, (u_char *)edst)) + return(0); /* if not yet resolves */ +#endif + etype = htons(ETHERTYPE_IPV6); + break; +#endif +#ifdef NS + case AF_NS: + etype = htons(ETHERTYPE_NS); + bcopy((caddr_t)&(((struct sockaddr_ns *)dst)->sns_addr.x_host), + (caddr_t)edst, sizeof (edst)); + if (!bcmp((caddr_t)edst, (caddr_t)&ns_thishost, sizeof(edst))) + return (looutput(ifp, m, dst, rt)); + /* If broadcasting on a simplex interface, loopback a copy */ + if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX)) + mcopy = m_copy(m, 0, (int)M_COPYALL); + break; +#endif +#ifdef IPX + case AF_IPX: + etype = htons(ETHERTYPE_IPX); + bcopy((caddr_t)&satosipx(dst)->sipx_addr.ipx_host, + (caddr_t)edst, sizeof (edst)); + if (!bcmp((caddr_t)edst, (caddr_t)&ipx_thishost, sizeof(edst))) + return (looutput(ifp, m, dst, rt)); + /* If broadcasting on a simplex interface, loopback a copy */ + if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX)) + mcopy = m_copy(m, 0, (int)M_COPYALL); + break; +#endif +#if 0 /*NRL INET6*/ + case AF_INET6: + /* + * The bottom line here is to either queue the outgoing packet + * in the discovery engine, or fill in edst with something + * that'll work. + */ + if (m->m_flags & M_MCAST) { + /* + * If multicast dest., then use IPv6 -> Ethernet + * mcast mapping. Really simple. + */ + ETHER_MAP_IPV6_MULTICAST(&((struct sockaddr_in6 *)dst)->sin6_addr, + edst); + } else { + /* Do unicast neighbor discovery stuff. */ + if (!ipv6_discov_resolve(ifp, rt, m, dst, edst)) + return 0; + } + etype = htons(ETHERTYPE_IPV6); + break; +#endif /* INET6 */ +#ifdef NETATALK + case AF_APPLETALK: { + struct at_ifaddr *aa; + + if (!aarpresolve(ac, m, (struct sockaddr_at *)dst, edst)) { +#ifdef NETATALKDEBUG + extern char *prsockaddr(struct sockaddr *); + printf("aarpresolv: failed for %s\n", prsockaddr(dst)); +#endif /* NETATALKDEBUG */ + return (0); + } + + /* + * ifaddr is the first thing in at_ifaddr + */ + aa = (struct at_ifaddr *)at_ifawithnet( + (struct sockaddr_at *)dst, + ifp->if_addrlist.tqh_first); + if (aa == 0) + goto bad; + + /* + * In the phase 2 case, we need to prepend an mbuf for the llc + * header. Since we must preserve the value of m, which is + * passed to us by value, we m_copy() the first mbuf, + * and use it for our llc header. + */ + if ( aa->aa_flags & AFA_PHASE2 ) { + struct llc llc; + + /* XXX Really this should use netisr too */ + M_PREPEND(m, AT_LLC_SIZE, M_WAIT); + /* + * FreeBSD doesn't count the LLC len in + * ifp->obytes, so they increment a length + * field here. We don't do this. + */ + llc.llc_dsap = llc.llc_ssap = LLC_SNAP_LSAP; + llc.llc_control = LLC_UI; + bcopy(at_org_code, llc.llc_snap_org_code, + sizeof(at_org_code)); + llc.llc_snap_ether_type = htons( ETHERTYPE_AT ); + bcopy(&llc, mtod(m, caddr_t), AT_LLC_SIZE); + etype = htons(m->m_pkthdr.len); + } else { + etype = htons(ETHERTYPE_AT); + } + } break; +#endif /* NETATALK */ +#ifdef ISO + case AF_ISO: { + int snpalen; + struct llc *l; + register struct sockaddr_dl *sdl; + + if (rt && (sdl = (struct sockaddr_dl *)rt->rt_gateway) && + sdl->sdl_family == AF_LINK && sdl->sdl_alen > 0) { + bcopy(LLADDR(sdl), (caddr_t)edst, sizeof(edst)); + } else { + error = iso_snparesolve(ifp, (struct sockaddr_iso *)dst, + (char *)edst, &snpalen); + if (error) + goto bad; /* Not Resolved */ + } + /* If broadcasting on a simplex interface, loopback a copy */ + if (*edst & 1) + m->m_flags |= (M_BCAST|M_MCAST); + if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX) && + (mcopy = m_copy(m, 0, (int)M_COPYALL))) { + M_PREPEND(mcopy, sizeof (*eh), M_DONTWAIT); + if (mcopy) { + eh = mtod(mcopy, struct ether_header *); + bcopy(edst, eh->ether_dhost, sizeof (edst)); + bcopy(ac->ac_enaddr, eh->ether_shost, + sizeof (edst)); + } + } + M_PREPEND(m, 3, M_DONTWAIT); + if (m == NULL) + return (0); + etype = htons(m->m_pkthdr.len); + l = mtod(m, struct llc *); + l->llc_dsap = l->llc_ssap = LLC_ISO_LSAP; + l->llc_control = LLC_UI; +#ifdef ARGO_DEBUG + if (argo_debug[D_ETHER]) { + int i; + printf("unoutput: sending pkt to: "); + for (i=0; i<6; i++) + printf("%x ", edst[i] & 0xff); + printf("\n"); + } +#endif + } break; +#endif /* ISO */ +/* case AF_NSAP: */ + case AF_CCITT: { + register struct sockaddr_dl *sdl = + (struct sockaddr_dl *) rt -> rt_gateway; + + if (sdl && sdl->sdl_family == AF_LINK + && sdl->sdl_alen > 0) { + bcopy(LLADDR(sdl), (char *)edst, + sizeof(edst)); + } else goto bad; /* Not a link interface ? Funny ... */ + if ((ifp->if_flags & IFF_SIMPLEX) && (*edst & 1) && + (mcopy = m_copy(m, 0, (int)M_COPYALL))) { + M_PREPEND(mcopy, sizeof (*eh), M_DONTWAIT); + if (mcopy) { + eh = mtod(mcopy, struct ether_header *); + bcopy(edst, eh->ether_dhost, sizeof (edst)); + bcopy(ac->ac_enaddr, eh->ether_shost, + sizeof (edst)); + } + } + etype = htons(m->m_pkthdr.len); +#ifdef LLC_DEBUG + { + int i; + register struct llc *l = mtod(m, struct llc *); + + printf("ether_output: sending LLC2 pkt to: "); + for (i=0; i<6; i++) + printf("%x ", edst[i] & 0xff); + printf(" len 0x%x dsap 0x%x ssap 0x%x control 0x%x\n", + m->m_pkthdr.len, l->llc_dsap & 0xff, l->llc_ssap &0xff, + l->llc_control & 0xff); + + } +#endif /* LLC_DEBUG */ + } break; + + case AF_UNSPEC: + eh = (struct ether_header *)dst->sa_data; + bcopy((caddr_t)eh->ether_dhost, (caddr_t)edst, sizeof (edst)); + /* AF_UNSPEC doesn't swap the byte order of the ether_type. */ + etype = eh->ether_type; + break; + + default: +#ifdef __ECOS +// diag_printf("%s: can't handle af%d\n", ifp->if_xname, +// dst->sa_family); +#else + printf("%s: can't handle af%d\n", ifp->if_xname, + dst->sa_family); +#endif + senderr(EAFNOSUPPORT); + } + + if (mcopy) + (void) looutput(ifp, mcopy, dst, rt); + + /* + * Add local net header. If no space in first mbuf, + * allocate another. + */ + M_PREPEND(m, sizeof (struct ether_header), M_DONTWAIT); + if (m == 0) + senderr(ENOBUFS); + eh = mtod(m, struct ether_header *); + bcopy((caddr_t)&etype,(caddr_t)&eh->ether_type, + sizeof(eh->ether_type)); + bcopy((caddr_t)edst, (caddr_t)eh->ether_dhost, sizeof (edst)); + bcopy((caddr_t)ac->ac_enaddr, (caddr_t)eh->ether_shost, + sizeof(eh->ether_shost)); + +#if NBRIDGE > 0 + /* + * Interfaces that are bridge members need special handling + * for output. + */ + if (ifp->if_bridge) { + bridge_output(ifp, m, NULL, NULL); + return (error); + } +#endif + + s = splimp(); + /* + * Queue message on interface, and start output if interface + * not yet active. + */ + if (IF_QFULL(&ifp->if_snd)) { + // Let the interface try a dequeue anyway, in case the + // interface has "got better" from whatever made the queue + // fill up - being unplugged for example. + if ((ifp->if_flags & IFF_OACTIVE) == 0) + (*ifp->if_start)(ifp); + IF_DROP(&ifp->if_snd); + splx(s); + senderr(ENOBUFS); + } + ifp->if_obytes += m->m_pkthdr.len; + IF_ENQUEUE(&ifp->if_snd, m); + if (m->m_flags & M_MCAST) + ifp->if_omcasts++; + if ((ifp->if_flags & IFF_OACTIVE) == 0) + (*ifp->if_start)(ifp); + splx(s); + return (error); + +bad: + if (m) + m_freem(m); + return (error); +} + +/* + * Process a received Ethernet packet; + * the packet is in the mbuf chain m without + * the ether header, which is provided separately. + */ +void +ether_input(ifp, eh, m) + struct ifnet *ifp; + register struct ether_header *eh; + struct mbuf *m; +{ + register struct ifqueue *inq; + u_int16_t etype; + int s, llcfound = 0; + register struct llc *l; + struct arpcom *ac = (struct arpcom *)ifp; +#ifdef __ECOS + unsigned int sched_what; +#endif + + if ((ifp->if_flags & IFF_UP) == 0) { + m_freem(m); + return; + } + ifp->if_lastchange = time; + ifp->if_ibytes += m->m_pkthdr.len + sizeof (*eh); + if (eh->ether_dhost[0] & 1) { + if (bcmp((caddr_t)etherbroadcastaddr, (caddr_t)eh->ether_dhost, + sizeof(etherbroadcastaddr)) == 0) + m->m_flags |= M_BCAST; + else + m->m_flags |= M_MCAST; + } + if (m->m_flags & (M_BCAST|M_MCAST)) + ifp->if_imcasts++; + +#if NBRIDGE > 0 + /* + * Tap the packet off here for a bridge, if configured and + * active for this interface. bridge_input returns + * NULL if it has consumed the packet, otherwise, it + * gets processed as normal. + */ + if (ifp->if_bridge) { + m = bridge_input(ifp, eh, m); + if (m == NULL) + return; + /* The bridge has determined it's for us. */ + goto decapsulate; + } +#endif + /* + * If packet is unicast and we're in promiscuous mode, make sure it + * is for us. Drop otherwise. + */ + if ((m->m_flags & (M_BCAST|M_MCAST)) == 0 && + (ifp->if_flags & IFF_PROMISC)) { + if (bcmp(ac->ac_enaddr, (caddr_t)eh->ether_dhost, + ETHER_ADDR_LEN)) { + m_freem(m); + return; + } + } + +decapsulate: + etype = ntohs(eh->ether_type); + switch (etype) { +#ifdef INET + case ETHERTYPE_IP: +#ifdef __ECOS + sched_what = NETISR_IP; +#else + schednetisr(NETISR_IP); +#endif + inq = &ipintrq; + break; + + case ETHERTYPE_ARP: + if (ifp->if_flags & IFF_NOARP) + goto dropanyway; +#ifdef __ECOS + sched_what = NETISR_ARP; +#else + schednetisr(NETISR_ARP); +#endif + inq = &arpintrq; + break; + + case ETHERTYPE_REVARP: + if (ifp->if_flags & IFF_NOARP) + goto dropanyway; + revarpinput(m); /* XXX queue? */ + return; + +#endif +#ifdef INET6 + /* + * Schedule IPv6 software interrupt for incoming IPv6 packet. + */ + case ETHERTYPE_IPV6: +#ifdef __ECOS + sched_what = NETISR_IPV6; +#else + schednetisr(NETISR_IPV6); +#endif + inq = &ip6intrq; + break; +#endif /* INET6 */ +#ifdef IPX + case ETHERTYPE_IPX: +#ifdef __ECOS + sched_what = NETISR_IPX; +#else + schednetisr(NETISR_IPX); +#endif + inq = &ipxintrq; + break; +#endif +#ifdef NS + case ETHERTYPE_NS: +#ifdef __ECOS + sched_what = NETISR_NS; +#else + schednetisr(NETISR_NS); +#endif + inq = &nsintrq; + break; +#endif +#ifdef NETATALK + case ETHERTYPE_AT: +#ifdef __ECOS + sched_what = NETISR_ATALK; +#else + schednetisr(NETISR_ATALK); +#endif + inq = &atintrq1; + break; + case ETHERTYPE_AARP: + /* probably this should be done with a NETISR as well */ + /* XXX queue this */ + aarpinput((struct arpcom *)ifp, m); + return; +#endif + default: + if (llcfound || etype > ETHERMTU) + goto dropanyway; + llcfound = 1; + l = mtod(m, struct llc *); + switch (l->llc_dsap) { + case LLC_SNAP_LSAP: +#ifdef NETATALK + /* + * Some protocols (like Appletalk) need special + * handling depending on if they are type II + * or SNAP encapsulated. Everything else + * gets handled by stripping off the SNAP header + * and going back up to decapsulate. + */ + if (l->llc_control == LLC_UI && + l->llc_ssap == LLC_SNAP_LSAP && + Bcmp(&(l->llc_snap_org_code)[0], + at_org_code, sizeof(at_org_code)) == 0 && + ntohs(l->llc_snap_ether_type) == ETHERTYPE_AT) { + inq = &atintrq2; + m_adj(m, AT_LLC_SIZE); +#ifdef __ECOS + sched_what = NETISR_ATALK; +#else + schednetisr(NETISR_ATALK); +#endif + break; + } + + if (l->llc_control == LLC_UI && + l->llc_ssap == LLC_SNAP_LSAP && + Bcmp(&(l->llc_snap_org_code)[0], + aarp_org_code, sizeof(aarp_org_code)) == 0 && + ntohs(l->llc_snap_ether_type) == ETHERTYPE_AARP) { + m_adj(m, AT_LLC_SIZE); + /* XXX Really this should use netisr too */ + aarpinput((struct arpcom *)ifp, m); + return; + } +#endif /* NETATALK */ + if (l->llc_control == LLC_UI && + l->llc_dsap == LLC_SNAP_LSAP && + l->llc_ssap == LLC_SNAP_LSAP) { + /* SNAP */ + if (m->m_pkthdr.len > etype) + m_adj(m, etype - m->m_pkthdr.len); + m->m_data += 6; /* XXX */ + m->m_len -= 6; /* XXX */ + m->m_pkthdr.len -= 6; /* XXX */ + M_PREPEND(m, sizeof *eh, M_DONTWAIT); + if (m == 0) + return; + *mtod(m, struct ether_header *) = *eh; + goto decapsulate; + } + goto dropanyway; +#ifdef ISO + case LLC_ISO_LSAP: + switch (l->llc_control) { + case LLC_UI: + /* LLC_UI_P forbidden in class 1 service */ + if ((l->llc_dsap == LLC_ISO_LSAP) && + (l->llc_ssap == LLC_ISO_LSAP)) { + /* LSAP for ISO */ + if (m->m_pkthdr.len > etype) + m_adj(m, etype - m->m_pkthdr.len); + m->m_data += 3; /* XXX */ + m->m_len -= 3; /* XXX */ + m->m_pkthdr.len -= 3; /* XXX */ + M_PREPEND(m, sizeof *eh, M_DONTWAIT); + if (m == 0) + return; + *mtod(m, struct ether_header *) = *eh; +#ifdef ARGO_DEBUG + if (argo_debug[D_ETHER]) + printf("clnp packet"); +#endif +#ifdef __ECOS + sched_what = NETISR_ISO; +#else + schednetisr(NETISR_ISO); +#endif + inq = &clnlintrq; + break; + } + goto dropanyway; + + case LLC_XID: + case LLC_XID_P: + if(m->m_len < 6) + goto dropanyway; + l->llc_window = 0; + l->llc_fid = 9; + l->llc_class = 1; + l->llc_dsap = l->llc_ssap = 0; + /* Fall through to */ + case LLC_TEST: + case LLC_TEST_P: + { + struct sockaddr sa; + register struct ether_header *eh2; + int i; + u_char c = l->llc_dsap; + + l->llc_dsap = l->llc_ssap; + l->llc_ssap = c; + if (m->m_flags & (M_BCAST | M_MCAST)) + bcopy(ac->ac_enaddr, + eh->ether_dhost, 6); + sa.sa_family = AF_UNSPEC; + sa.sa_len = sizeof(sa); + eh2 = (struct ether_header *)sa.sa_data; + for (i = 0; i < 6; i++) { + eh2->ether_shost[i] = c = eh->ether_dhost[i]; + eh2->ether_dhost[i] = + eh->ether_dhost[i] = eh->ether_shost[i]; + eh->ether_shost[i] = c; + } + ifp->if_output(ifp, m, &sa, NULL); + return; + } + break; + } +#endif /* ISO */ +#ifdef CCITT + case LLC_X25_LSAP: + if (m->m_pkthdr.len > etype) + m_adj(m, etype - m->m_pkthdr.len); + M_PREPEND(m, sizeof(struct sdl_hdr) , M_DONTWAIT); + if (m == 0) + return; + if (!sdl_sethdrif(ifp, eh->ether_shost, LLC_X25_LSAP, + eh->ether_dhost, LLC_X25_LSAP, 6, + mtod(m, struct sdl_hdr *))) + panic("ETHER cons addr failure"); + mtod(m, struct sdl_hdr *)->sdlhdr_len = etype; +#ifdef LLC_DEBUG + printf("llc packet\n"); +#endif /* LLC_DEBUG */ +#ifdef __ECOS + sched_what = NETISR_CCITT; +#else + schednetisr(NETISR_CCITT); +#endif + inq = &llcintrq; + break; +#endif /* CCITT */ + dropanyway: + default: + m_freem(m); + return; + } + } + + s = splimp(); + if (IF_QFULL(inq)) { + IF_DROP(inq); + m_freem(m); + } else + IF_ENQUEUE(inq, m); + splx(s); +#ifdef __ECOS + schednetisr(sched_what); +#endif +} + +/* + * Convert Ethernet address to printable (loggable) representation. + */ +static char digits[] = "0123456789abcdef"; +char * +ether_sprintf(ap) + register u_char *ap; +{ + register int i; + static char etherbuf[18]; + register char *cp = etherbuf; + + for (i = 0; i < 6; i++) { + *cp++ = digits[*ap >> 4]; + *cp++ = digits[*ap++ & 0xf]; + *cp++ = ':'; + } + *--cp = 0; + return (etherbuf); +} + +/* + * Perform common duties while attaching to interface list + */ +void +ether_ifattach(ifp) + register struct ifnet *ifp; +{ + register struct ifaddr *ifa; + register struct sockaddr_dl *sdl; + + ifp->if_type = IFT_ETHER; + ifp->if_addrlen = 6; + ifp->if_hdrlen = 14; + ifp->if_mtu = ETHERMTU; + ifp->if_output = ether_output; + for (ifa = ifp->if_addrlist.tqh_first; ifa != 0; + ifa = ifa->ifa_list.tqe_next) + if ((sdl = (struct sockaddr_dl *)ifa->ifa_addr) && + sdl->sdl_family == AF_LINK) { + sdl->sdl_type = IFT_ETHER; + sdl->sdl_alen = ifp->if_addrlen; + bcopy((caddr_t)((struct arpcom *)ifp)->ac_enaddr, + LLADDR(sdl), ifp->if_addrlen); + break; + } + LIST_INIT(&((struct arpcom *)ifp)->ac_multiaddrs); +} + +void +ether_ifdetach(ifp) + struct ifnet *ifp; +{ + struct arpcom *ac = (struct arpcom *)ifp; + struct ether_multi *enm; + + for (enm = LIST_FIRST(&ac->ac_multiaddrs); enm; + enm = LIST_FIRST(&ac->ac_multiaddrs)) { + LIST_REMOVE(enm, enm_list); + free(enm, M_IFMADDR); + } +} + +u_char ether_ipmulticast_min[6] = { 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 }; +u_char ether_ipmulticast_max[6] = { 0x01, 0x00, 0x5e, 0x7f, 0xff, 0xff }; + +#ifdef INET6 +u_char ether_ip6multicast_min[6] = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 }; +u_char ether_ip6multicast_max[6] = { 0x33, 0x33, 0xff, 0xff, 0xff, 0xff }; +#endif + +/* + * Add an Ethernet multicast address or range of addresses to the list for a + * given interface. + */ +int +ether_addmulti(ifr, ac) + struct ifreq *ifr; + register struct arpcom *ac; +{ + register struct ether_multi *enm; + struct sockaddr_in *sin; +#ifdef INET6 + struct sockaddr_in6 *sin6; +#endif /* INET6 */ + u_char addrlo[6]; + u_char addrhi[6]; + int s = splimp(); + + switch (ifr->ifr_addr.sa_family) { + + case AF_UNSPEC: + bcopy(ifr->ifr_addr.sa_data, addrlo, 6); + bcopy(addrlo, addrhi, 6); + break; + +#ifdef INET + case AF_INET: + sin = (struct sockaddr_in *)&(ifr->ifr_addr); + if (sin->sin_addr.s_addr == INADDR_ANY) { + /* + * An IP address of INADDR_ANY means listen to all + * of the Ethernet multicast addresses used for IP. + * (This is for the sake of IP multicast routers.) + */ + bcopy(ether_ipmulticast_min, addrlo, 6); + bcopy(ether_ipmulticast_max, addrhi, 6); + } + else { + ETHER_MAP_IP_MULTICAST(&sin->sin_addr, addrlo); + bcopy(addrlo, addrhi, 6); + } + break; +#endif +#ifdef INET6 + case AF_INET6: + sin6 = (struct sockaddr_in6 *) + &(((struct in6_ifreq *)ifr)->ifr_addr); + if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + /* + * An unspecified IPv6 address means listen to all + * of the IPv6 multicast addresses on this Ethernet. + * (Multicast routers like this.) + */ + bcopy(ether_ip6multicast_min, addrlo, ETHER_ADDR_LEN); + bcopy(ether_ip6multicast_max, addrhi, ETHER_ADDR_LEN); + } else { + ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, addrlo); + bcopy(addrlo, addrhi, ETHER_ADDR_LEN); + } + break; +#endif /* INET6 */ + + default: + splx(s); + return (EAFNOSUPPORT); + } + + /* + * Verify that we have valid Ethernet multicast addresses. + */ + if ((addrlo[0] & 0x01) != 1 || (addrhi[0] & 0x01) != 1) { + splx(s); + return (EINVAL); + } + /* + * See if the address range is already in the list. + */ + ETHER_LOOKUP_MULTI(addrlo, addrhi, ac, enm); + if (enm != NULL) { + /* + * Found it; just increment the reference count. + */ + ++enm->enm_refcount; + splx(s); + return (0); + } + /* + * New address or range; malloc a new multicast record + * and link it into the interface's multicast list. + */ + enm = (struct ether_multi *)malloc(sizeof(*enm), M_IFMADDR, M_NOWAIT); + if (enm == NULL) { + splx(s); + return (ENOBUFS); + } + bcopy(addrlo, enm->enm_addrlo, 6); + bcopy(addrhi, enm->enm_addrhi, 6); + enm->enm_ac = ac; + enm->enm_refcount = 1; + LIST_INSERT_HEAD(&ac->ac_multiaddrs, enm, enm_list); + ac->ac_multicnt++; + splx(s); + /* + * Return ENETRESET to inform the driver that the list has changed + * and its reception filter should be adjusted accordingly. + */ + return (ENETRESET); +} + +/* + * Delete a multicast address record. + */ +int +ether_delmulti(ifr, ac) + struct ifreq *ifr; + register struct arpcom *ac; +{ + register struct ether_multi *enm; + struct sockaddr_in *sin; +#ifdef INET6 + struct sockaddr_in6 *sin6; +#endif /* INET6 */ + u_char addrlo[6]; + u_char addrhi[6]; + int s = splimp(); + + switch (ifr->ifr_addr.sa_family) { + + case AF_UNSPEC: + bcopy(ifr->ifr_addr.sa_data, addrlo, 6); + bcopy(addrlo, addrhi, 6); + break; + +#ifdef INET + case AF_INET: + sin = (struct sockaddr_in *)&(ifr->ifr_addr); + if (sin->sin_addr.s_addr == INADDR_ANY) { + /* + * An IP address of INADDR_ANY means stop listening + * to the range of Ethernet multicast addresses used + * for IP. + */ + bcopy(ether_ipmulticast_min, addrlo, 6); + bcopy(ether_ipmulticast_max, addrhi, 6); + } + else { + ETHER_MAP_IP_MULTICAST(&sin->sin_addr, addrlo); + bcopy(addrlo, addrhi, 6); + } + break; +#endif +#ifdef INET6 + case AF_INET6: + sin6 = (struct sockaddr_in6 *)&(ifr->ifr_addr); + if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + /* + * An unspecified IPv6 address means stop listening to + * all IPv6 multicast addresses on this Ethernet.' + * + * (This might not be healthy, given IPv6's reliance on + * multicast for things like neighbor discovery. + * Perhaps initializing all-nodes, solicited nodes, and + * possibly all-routers for this interface afterwards + * is not a bad idea.) + */ + bcopy(ether_ip6multicast_min, addrlo, ETHER_ADDR_LEN); + bcopy(ether_ip6multicast_max, addrhi, ETHER_ADDR_LEN); + } else { + ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, addrlo); + bcopy(addrlo, addrhi, ETHER_ADDR_LEN); + } + break; +#endif /* INET6 */ + + default: + splx(s); + return (EAFNOSUPPORT); + } + + /* + * Look up the address in our list. + */ + ETHER_LOOKUP_MULTI(addrlo, addrhi, ac, enm); + if (enm == NULL) { + splx(s); + return (ENXIO); + } + if (--enm->enm_refcount != 0) { + /* + * Still some claims to this record. + */ + splx(s); + return (0); + } + /* + * No remaining claims to this record; unlink and free it. + */ + LIST_REMOVE(enm, enm_list); + free(enm, M_IFMADDR); + ac->ac_multicnt--; + splx(s); + /* + * Return ENETRESET to inform the driver that the list has changed + * and its reception filter should be adjusted accordingly. + */ + return (ENETRESET); +} diff --git a/ecos/packages/net/tcpip/current/src/sys/net/if_loop.c b/ecos/packages/net/tcpip/current/src/sys/net/if_loop.c new file mode 100644 index 0000000..7929f42 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/net/if_loop.c @@ -0,0 +1,446 @@ +//========================================================================== +// +// sys/net/if_loop.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: if_loop.c,v 1.12 1999/12/08 06:50:18 itojun Exp $ */ +/* $NetBSD: if_loop.c,v 1.15 1996/05/07 02:40:33 thorpej Exp $ */ + +/* + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)if_loop.c 8.1 (Berkeley) 6/10/93 + */ + +/* +%%% portions-copyright-nrl-95 +Portions of this software are Copyright 1995-1998 by Randall Atkinson, +Ronald Lee, Daniel McDonald, Bao Phan, and Chris Winters. All Rights +Reserved. All rights under this copyright have been assigned to the US +Naval Research Laboratory (NRL). The NRL Copyright Notice and License +Agreement Version 1.1 (January 17, 1995) applies to these portions of the +software. +You should have received a copy of the license with this software. If you +didn't get a copy, you may request one from <license@ipv6.nrl.navy.mil>. +*/ + +/* + * Loopback interface driver for protocol testing and timing. + */ + +#ifndef __ECOS +#include "bpfilter.h" +#include "loop.h" +#endif + +#include <sys/param.h> +#ifndef __ECOS +#include <sys/systm.h> +#endif +#include <sys/kernel.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/errno.h> +#include <sys/ioctl.h> +#include <sys/time.h> + +#include <machine/cpu.h> + +#include <net/if.h> +#include <net/if_types.h> +#include <net/netisr.h> +#include <net/route.h> + +#ifdef INET +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/in_var.h> +#include <netinet/ip.h> +#endif + +#ifdef INET6 +#ifndef INET +#include <netinet/in.h> +#endif +#include <netinet6/in6_var.h> +#include <netinet6/ip6.h> +#endif + +#ifdef NS +#include <netns/ns.h> +#include <netns/ns_if.h> +#endif + +#ifdef IPX +#include <netipx/ipx.h> +#include <netipx/ipx_if.h> +#endif + +#ifdef ISO +#include <netiso/iso.h> +#include <netiso/iso_var.h> +#endif + +#ifdef NETATALK +#include <netinet/if_ether.h> +#include <netatalk/at.h> +#include <netatalk/at_var.h> +#endif + +#if NBPFILTER > 0 +#include <net/bpf.h> +#endif + +#if defined(LARGE_LOMTU) +#define LOMTU (131072 + MHLEN + MLEN) +#else +#define LOMTU (32768 + MHLEN + MLEN) +#endif + +#ifndef __ECOS +#include <stdio.h> // for 'sprintf()' +#endif + +struct ifnet loif[NLOOP]; + +void +loopattach(n) + int n; +{ + register int i; + register struct ifnet *ifp; + + for (i = NLOOP; i--; ) { + ifp = &loif[i]; +#if !defined(__ECOS) || (CYGPKG_NET_NLOOP > 1) + sprintf(ifp->if_xname, "lo%d", i); +#else + strcpy(ifp->if_xname, "lo0"); +#endif + ifp->if_softc = NULL; + ifp->if_mtu = LOMTU; + ifp->if_flags = IFF_LOOPBACK | IFF_MULTICAST; + ifp->if_ioctl = loioctl; + ifp->if_output = looutput; + ifp->if_type = IFT_LOOP; + ifp->if_hdrlen = sizeof(u_int32_t); + ifp->if_addrlen = 0; + if_attachhead(ifp); +#if NBPFILTER > 0 + bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t)); +#endif + } +} + +int +looutput(ifp, m, dst, rt) + struct ifnet *ifp; + register struct mbuf *m; + struct sockaddr *dst; + register struct rtentry *rt; +{ + int s, isr; + register struct ifqueue *ifq = 0; + + if ((m->m_flags & M_PKTHDR) == 0) + panic("looutput: no header mbuf"); + ifp->if_lastchange = time; +#if NBPFILTER > 0 + /* + * only send packets to bpf if they are real loopback packets; + * looutput() is also called for SIMPLEX interfaces to duplicate + * packets for local use. But don't dup them to bpf. + */ + if (ifp->if_bpf && (ifp->if_flags&IFF_LOOPBACK)) { + /* + * We need to prepend the address family as + * a four byte field. Cons up a dummy header + * to pacify bpf. This is safe because bpf + * will only read from the mbuf (i.e., it won't + * try to free it or keep a pointer to it). + */ + struct mbuf m0; + u_int32_t af = htonl(dst->sa_family); + + m0.m_next = m; + m0.m_len = sizeof(af); + m0.m_data = (char *)⁡ + + bpf_mtap(ifp->if_bpf, &m0); + } +#endif + m->m_pkthdr.rcvif = ifp; + + if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { + m_freem(m); + return (rt->rt_flags & RTF_BLACKHOLE ? 0 : + rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); + } + +#ifndef PULLDOWN_TEST + /* + * KAME requires that the packet to be contiguous on the + * mbuf. We need to make that sure. + * this kind of code should be avoided. + * XXX other conditions to avoid running this part? + */ + if (m && m->m_next != NULL) { + struct mbuf *n; + + MGETHDR(n, M_DONTWAIT, MT_HEADER); + if (n) { + MCLGET(n, M_DONTWAIT); + if ((n->m_flags & M_EXT) == 0) { + m_free(n); + n = NULL; + } + } + if (!n) { +#ifdef __ECOS + diag_printf("looutput: mbuf allocation failed\n"); +#else + printf("looutput: mbuf allocation failed\n"); +#endif + m_freem(m); + return ENOBUFS; + } + + n->m_pkthdr.rcvif = m->m_pkthdr.rcvif; + n->m_pkthdr.len = m->m_pkthdr.len; + if (m->m_pkthdr.len <= MCLBYTES) { + m_copydata(m, 0, m->m_pkthdr.len, mtod(n, caddr_t)); + n->m_len = m->m_pkthdr.len; + m_freem(m); + } else { + m_copydata(m, 0, MCLBYTES, mtod(n, caddr_t)); + m_adj(m, MCLBYTES); + n->m_len = MCLBYTES; + n->m_next = m; + m->m_flags &= ~M_PKTHDR; + } + m = n; + } +#if 0 + if (m && m->m_next != NULL) { + printf("loop: not contiguous...\n"); + m_freem(m); + return ENOBUFS; + } +#endif +#endif + + ifp->if_opackets++; + ifp->if_obytes += m->m_pkthdr.len; + switch (dst->sa_family) { + +#ifdef INET + case AF_INET: + ifq = &ipintrq; + isr = NETISR_IP; + break; +#endif +#ifdef INET6 + case AF_INET6: + ifq = &ip6intrq; + isr = NETISR_IPV6; + break; +#endif /* INET6 */ +#ifdef NS + case AF_NS: + ifq = &nsintrq; + isr = NETISR_NS; + break; +#endif +#ifdef IPX + case AF_IPX: + ifq = &ipxintrq; + isr = NETISR_IPX; + break; +#endif +#ifdef ISO + case AF_ISO: + ifq = &clnlintrq; + isr = NETISR_ISO; + break; +#endif +#ifdef NETATALK + case AF_APPLETALK: + ifq = &atintrq2; + isr = NETISR_ATALK; + break; +#endif + default: +#ifdef __ECOS + diag_printf("%s: can't handle af%d\n", ifp->if_xname, + dst->sa_family); +#else + printf("%s: can't handle af%d\n", ifp->if_xname, + dst->sa_family); +#endif + m_freem(m); + return (EAFNOSUPPORT); + } + s = splimp(); + if (IF_QFULL(ifq)) { + IF_DROP(ifq); + m_freem(m); + splx(s); + return (ENOBUFS); + } + IF_ENQUEUE(ifq, m); + schednetisr(isr); + ifp->if_ipackets++; + ifp->if_ibytes += m->m_pkthdr.len; + splx(s); + return (0); +} + +/* ARGSUSED */ +void +lortrequest(cmd, rt, sa) + int cmd; + struct rtentry *rt; + struct sockaddr *sa; +{ + + if (rt) + rt->rt_rmx.rmx_mtu = LOMTU; +} + +/* + * Process an ioctl request. + */ +/* ARGSUSED */ +int +loioctl(ifp, cmd, data) + register struct ifnet *ifp; + u_long cmd; + caddr_t data; +{ + register struct ifaddr *ifa; + register struct ifreq *ifr; + register int error = 0; + + switch (cmd) { + + case SIOCSIFADDR: + ifp->if_flags |= IFF_UP; + ifa = (struct ifaddr *)data; + if (ifa != 0 /*&& ifa->ifa_addr->sa_family == AF_ISO*/) + ifa->ifa_rtrequest = lortrequest; + /* + * Everything else is done at a higher level. + */ + break; + + case SIOCADDMULTI: + case SIOCDELMULTI: + ifr = (struct ifreq *)data; + if (ifr == 0) { + error = EAFNOSUPPORT; /* XXX */ + break; + } + switch (ifr->ifr_addr.sa_family) { + +#ifdef INET + case AF_INET: + break; +#endif +#ifdef INET6 + case AF_INET6: + break; +#endif /* INET6 */ + + default: + error = EAFNOSUPPORT; + break; + } + break; + + default: + error = EINVAL; + } + return (error); +} diff --git a/ecos/packages/net/tcpip/current/src/sys/net/radix.c b/ecos/packages/net/tcpip/current/src/sys/net/radix.c new file mode 100644 index 0000000..04f20d5 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/net/radix.c @@ -0,0 +1,965 @@ +//========================================================================== +// +// sys/net/radix.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: radix.c,v 1.4 1996/09/05 08:42:32 mickey Exp $ */ +/* $NetBSD: radix.c,v 1.11 1996/03/16 23:55:36 christos Exp $ */ + +/* + * Copyright (c) 1988, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)radix.c 8.4 (Berkeley) 11/2/94 + */ + +/* + * Routines to build and maintain radix trees for routing lookups. + */ +#include <sys/param.h> +#ifdef _KERNEL +#ifndef __ECOS +#include <sys/systm.h> +#include <sys/syslog.h> +#endif +#include <sys/malloc.h> +#define M_DONTWAIT M_NOWAIT +#include <sys/domain.h> +#else +#include <stdlib.h> +#endif +#include <net/radix.h> + +int max_keylen; +struct radix_mask *rn_mkfreelist; +struct radix_node_head *mask_rnhead; +static char *addmask_key; +static char normal_chars[] = {0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, -1}; +static char *rn_zeros, *rn_ones; + +#define rn_masktop (mask_rnhead->rnh_treetop) +#undef Bcmp +#define Bcmp(a, b, l) (l == 0 ? 0 : bcmp((caddr_t)(a), (caddr_t)(b), (u_long)l)) + + +static int rn_satsifies_leaf __P((char *, struct radix_node *, int)); +static int rn_lexobetter __P((void *, void *)); +static struct radix_mask *rn_new_radix_mask __P((struct radix_node *, + struct radix_mask *)); +/* + * The data structure for the keys is a radix tree with one way + * branching removed. The index rn_b at an internal node n represents a bit + * position to be tested. The tree is arranged so that all descendants + * of a node n have keys whose bits all agree up to position rn_b - 1. + * (We say the index of n is rn_b.) + * + * There is at least one descendant which has a one bit at position rn_b, + * and at least one with a zero there. + * + * A route is determined by a pair of key and mask. We require that the + * bit-wise logical and of the key and mask to be the key. + * We define the index of a route to associated with the mask to be + * the first bit number in the mask where 0 occurs (with bit number 0 + * representing the highest order bit). + * + * We say a mask is normal if every bit is 0, past the index of the mask. + * If a node n has a descendant (k, m) with index(m) == index(n) == rn_b, + * and m is a normal mask, then the route applies to every descendant of n. + * If the index(m) < rn_b, this implies the trailing last few bits of k + * before bit b are all 0, (and hence consequently true of every descendant + * of n), so the route applies to all descendants of the node as well. + * + * Similar logic shows that a non-normal mask m such that + * index(m) <= index(n) could potentially apply to many children of n. + * Thus, for each non-host route, we attach its mask to a list at an internal + * node as high in the tree as we can go. + * + * The present version of the code makes use of normal routes in short- + * circuiting an explict mask and compare operation when testing whether + * a key satisfies a normal route, and also in remembering the unique leaf + * that governs a subtree. + */ + +struct radix_node * +rn_search(v_arg, head) + void *v_arg; + struct radix_node *head; +{ + register struct radix_node *x; + register caddr_t v; + + for (x = head, v = v_arg; x->rn_b >= 0;) { + if (x->rn_bmask & v[x->rn_off]) + x = x->rn_r; + else + x = x->rn_l; + } + return (x); +} + +struct radix_node * +rn_search_m(v_arg, head, m_arg) + struct radix_node *head; + void *v_arg, *m_arg; +{ + register struct radix_node *x; + register caddr_t v = v_arg, m = m_arg; + + for (x = head; x->rn_b >= 0;) { + if ((x->rn_bmask & m[x->rn_off]) && + (x->rn_bmask & v[x->rn_off])) + x = x->rn_r; + else + x = x->rn_l; + } + return x; +} + +int +rn_refines(m_arg, n_arg) + void *m_arg, *n_arg; +{ + register caddr_t m = m_arg, n = n_arg; + register caddr_t lim, lim2 = lim = n + *(u_char *)n; + int longer = (*(u_char *)n++) - (int)(*(u_char *)m++); + int masks_are_equal = 1; + + if (longer > 0) + lim -= longer; + while (n < lim) { + if (*n & ~(*m)) + return 0; + if (*n++ != *m++) + masks_are_equal = 0; + } + while (n < lim2) + if (*n++) + return 0; + if (masks_are_equal && (longer < 0)) + for (lim2 = m - longer; m < lim2; ) + if (*m++) + return 1; + return (!masks_are_equal); +} + +struct radix_node * +rn_lookup(v_arg, m_arg, head) + void *v_arg, *m_arg; + struct radix_node_head *head; +{ + register struct radix_node *x; + caddr_t netmask = 0; + + if (m_arg) { + if ((x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_off)) == 0) + return (0); + netmask = x->rn_key; + } + x = rn_match(v_arg, head); + if (x && netmask) { + while (x && x->rn_mask != netmask) + x = x->rn_dupedkey; + } + return x; +} + +static int +rn_satsifies_leaf(trial, leaf, skip) + char *trial; + register struct radix_node *leaf; + int skip; +{ + register char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask; + char *cplim; + int length = min(*(u_char *)cp, *(u_char *)cp2); + + if (cp3 == 0) + cp3 = rn_ones; + else + length = min(length, *(u_char *)cp3); + cplim = cp + length; cp3 += skip; cp2 += skip; + for (cp += skip; cp < cplim; cp++, cp2++, cp3++) + if ((*cp ^ *cp2) & *cp3) + return 0; + return 1; +} + +struct radix_node * +rn_match(v_arg, head) + void *v_arg; + struct radix_node_head *head; +{ + caddr_t v = v_arg; + register struct radix_node *t = head->rnh_treetop, *x; + register caddr_t cp = v, cp2; + caddr_t cplim; + struct radix_node *saved_t, *top = t; + int off = t->rn_off, vlen = *(u_char *)cp, matched_off; + register int test, b, rn_b; + + /* + * Open code rn_search(v, top) to avoid overhead of extra + * subroutine call. + */ + for (; t->rn_b >= 0; ) { + if (t->rn_bmask & cp[t->rn_off]) + t = t->rn_r; + else + t = t->rn_l; + } + /* + * See if we match exactly as a host destination + * or at least learn how many bits match, for normal mask finesse. + * + * It doesn't hurt us to limit how many bytes to check + * to the length of the mask, since if it matches we had a genuine + * match and the leaf we have is the most specific one anyway; + * if it didn't match with a shorter length it would fail + * with a long one. This wins big for class B&C netmasks which + * are probably the most common case... + */ + if (t->rn_mask) + vlen = *(u_char *)t->rn_mask; + cp += off; cp2 = t->rn_key + off; cplim = v + vlen; + for (; cp < cplim; cp++, cp2++) + if (*cp != *cp2) + goto on1; + /* + * This extra grot is in case we are explicitly asked + * to look up the default. Ugh! + */ + if ((t->rn_flags & RNF_ROOT) && t->rn_dupedkey) + t = t->rn_dupedkey; + return t; +on1: + test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */ + for (b = 7; (test >>= 1) > 0;) + b--; + matched_off = cp - v; + b += matched_off << 3; + rn_b = -1 - b; + /* + * If there is a host route in a duped-key chain, it will be first. + */ + if ((saved_t = t)->rn_mask == 0) + t = t->rn_dupedkey; + for (; t; t = t->rn_dupedkey) + /* + * Even if we don't match exactly as a host, + * we may match if the leaf we wound up at is + * a route to a net. + */ + if (t->rn_flags & RNF_NORMAL) { + if (rn_b <= t->rn_b) + return t; + } else if (rn_satsifies_leaf(v, t, matched_off)) + return t; + t = saved_t; + /* start searching up the tree */ + do { + register struct radix_mask *m; + t = t->rn_p; + if ((m = t->rn_mklist) != NULL) { + /* + * If non-contiguous masks ever become important + * we can restore the masking and open coding of + * the search and satisfaction test and put the + * calculation of "off" back before the "do". + */ + do { + if (m->rm_flags & RNF_NORMAL) { + if (rn_b <= m->rm_b) + return (m->rm_leaf); + } else { + off = min(t->rn_off, matched_off); + x = rn_search_m(v, t, m->rm_mask); + while (x && x->rn_mask != m->rm_mask) + x = x->rn_dupedkey; + if (x && rn_satsifies_leaf(v, x, off)) + return x; + } + } while ((m = m->rm_mklist) != NULL); + } + } while (t != top); + return 0; +} + +#ifdef RN_DEBUG +int rn_nodenum; +struct radix_node *rn_clist; +int rn_saveinfo; +int rn_debug = 1; +#endif + +struct radix_node * +rn_newpair(v, b, nodes) + void *v; + int b; + struct radix_node nodes[2]; +{ + register struct radix_node *tt = nodes, *t = tt + 1; + t->rn_b = b; t->rn_bmask = 0x80 >> (b & 7); + t->rn_l = tt; t->rn_off = b >> 3; + tt->rn_b = -1; tt->rn_key = (caddr_t)v; tt->rn_p = t; + tt->rn_flags = t->rn_flags = RNF_ACTIVE; +#ifdef RN_DEBUG + tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++; + tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt; +#endif + return t; +} + +struct radix_node * +rn_insert(v_arg, head, dupentry, nodes) + void *v_arg; + struct radix_node_head *head; + int *dupentry; + struct radix_node nodes[2]; +{ + caddr_t v = v_arg; + struct radix_node *top = head->rnh_treetop; + int head_off = top->rn_off, vlen = (int)*((u_char *)v); + register struct radix_node *t = rn_search(v_arg, top); + register caddr_t cp = v + head_off; + register int b; + struct radix_node *tt; + /* + * Find first bit at which v and t->rn_key differ + */ + { + register caddr_t cp2 = t->rn_key + head_off; + register int cmp_res; + caddr_t cplim = v + vlen; + + while (cp < cplim) + if (*cp2++ != *cp++) + goto on1; + *dupentry = 1; + return t; +on1: + *dupentry = 0; + cmp_res = (cp[-1] ^ cp2[-1]) & 0xff; + for (b = (cp - v) << 3; cmp_res; b--) + cmp_res >>= 1; + } + { + register struct radix_node *p, *x = top; + cp = v; + do { + p = x; + if (cp[x->rn_off] & x->rn_bmask) + x = x->rn_r; + else x = x->rn_l; + } while (b > (unsigned) x->rn_b); /* x->rn_b < b && x->rn_b >= 0 */ +#ifdef __ECOS +#else +#ifdef RN_DEBUG + if (rn_debug) + log(LOG_DEBUG, "rn_insert: Going In:\n"), traverse(p); +#endif +#endif + t = rn_newpair(v_arg, b, nodes); tt = t->rn_l; + if ((cp[p->rn_off] & p->rn_bmask) == 0) + p->rn_l = t; + else + p->rn_r = t; + x->rn_p = t; t->rn_p = p; /* frees x, p as temp vars below */ + if ((cp[t->rn_off] & t->rn_bmask) == 0) { + t->rn_r = x; + } else { + t->rn_r = tt; t->rn_l = x; + } +#ifdef __ECOS +#else +#ifdef RN_DEBUG + if (rn_debug) + log(LOG_DEBUG, "rn_insert: Coming Out:\n"), traverse(p); +#endif +#endif + } + return (tt); +} + +struct radix_node * +rn_addmask(n_arg, search, skip) + int search, skip; + void *n_arg; +{ + caddr_t netmask = (caddr_t)n_arg; + register struct radix_node *x; + register caddr_t cp, cplim; + register int b = 0, mlen, j; + int maskduplicated, m0, isnormal; + struct radix_node *saved_x; + static int last_zeroed = 0; + + if ((mlen = *(u_char *)netmask) > max_keylen) + mlen = max_keylen; + if (skip == 0) + skip = 1; + if (mlen <= skip) + return (mask_rnhead->rnh_nodes); + if (skip > 1) + Bcopy(rn_ones + 1, addmask_key + 1, skip - 1); + if ((m0 = mlen) > skip) + Bcopy(netmask + skip, addmask_key + skip, mlen - skip); + /* + * Trim trailing zeroes. + */ + for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;) + cp--; + mlen = cp - addmask_key; + if (mlen <= skip) { + if (m0 >= last_zeroed) + last_zeroed = mlen; + return (mask_rnhead->rnh_nodes); + } + if (m0 < last_zeroed) + Bzero(addmask_key + m0, last_zeroed - m0); + *addmask_key = last_zeroed = mlen; + x = rn_search(addmask_key, rn_masktop); + if (Bcmp(addmask_key, x->rn_key, mlen) != 0) + x = 0; + if (x || search) + return (x); + R_Malloc(x, struct radix_node *, max_keylen + 2 * sizeof (*x)); + if ((saved_x = x) == 0) + return (0); + Bzero(x, max_keylen + 2 * sizeof (*x)); + netmask = cp = (caddr_t)(x + 2); + Bcopy(addmask_key, cp, mlen); + x = rn_insert(cp, mask_rnhead, &maskduplicated, x); + if (maskduplicated) { +#ifdef __ECOS +#else + log(LOG_ERR, "rn_addmask: mask impossibly already in tree"); +#endif + Free(saved_x); + return (x); + } + /* + * Calculate index of mask, and check for normalcy. + */ + cplim = netmask + mlen; isnormal = 1; + for (cp = netmask + skip; (cp < cplim) && *(u_char *)cp == 0xff;) + cp++; + if (cp != cplim) { + for (j = 0x80; (j & *cp) != 0; j >>= 1) + b++; + if (*cp != normal_chars[b] || cp != (cplim - 1)) + isnormal = 0; + } + b += (cp - netmask) << 3; + x->rn_b = -1 - b; + if (isnormal) + x->rn_flags |= RNF_NORMAL; + return (x); +} + +static int /* XXX: arbitrary ordering for non-contiguous masks */ +rn_lexobetter(m_arg, n_arg) + void *m_arg, *n_arg; +{ + register u_char *mp = m_arg, *np = n_arg, *lim; + + if (*mp > *np) + return 1; /* not really, but need to check longer one first */ + if (*mp == *np) + for (lim = mp + *mp; mp < lim;) + if (*mp++ > *np++) + return 1; + return 0; +} + +static struct radix_mask * +rn_new_radix_mask(tt, next) + register struct radix_node *tt; + register struct radix_mask *next; +{ + register struct radix_mask *m; + + MKGet(m); + if (m == 0) { +#ifdef __ECOS +#else + log(LOG_ERR, "Mask for route not entered\n"); +#endif + return (0); + } + Bzero(m, sizeof *m); + m->rm_b = tt->rn_b; + m->rm_flags = tt->rn_flags; + if (tt->rn_flags & RNF_NORMAL) + m->rm_leaf = tt; + else + m->rm_mask = tt->rn_mask; + m->rm_mklist = next; + tt->rn_mklist = m; + return m; +} + +struct radix_node * +rn_addroute(v_arg, n_arg, head, treenodes) + void *v_arg, *n_arg; + struct radix_node_head *head; + struct radix_node treenodes[2]; +{ + caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg; + register struct radix_node *t, *x = NULL, *tt; + struct radix_node *saved_tt, *top = head->rnh_treetop; + short b = 0, b_leaf = 0; + int keyduplicated; + caddr_t mmask; + struct radix_mask *m, **mp; + + /* + * In dealing with non-contiguous masks, there may be + * many different routes which have the same mask. + * We will find it useful to have a unique pointer to + * the mask to speed avoiding duplicate references at + * nodes and possibly save time in calculating indices. + */ + if (netmask) { + if ((x = rn_addmask(netmask, 0, top->rn_off)) == 0) + return (0); + b_leaf = x->rn_b; + b = -1 - x->rn_b; + netmask = x->rn_key; + } + /* + * Deal with duplicated keys: attach node to previous instance + */ + saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes); + if (keyduplicated) { + for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) { + if (tt->rn_mask == netmask) + return (0); + if (netmask == 0 || + (tt->rn_mask && + ((b_leaf < tt->rn_b) || /* index(netmask) > node */ + rn_refines(netmask, tt->rn_mask) || + rn_lexobetter(netmask, tt->rn_mask)))) + break; + } + /* + * If the mask is not duplicated, we wouldn't + * find it among possible duplicate key entries + * anyway, so the above test doesn't hurt. + * + * We sort the masks for a duplicated key the same way as + * in a masklist -- most specific to least specific. + * This may require the unfortunate nuisance of relocating + * the head of the list. + */ + if (tt == saved_tt) { + struct radix_node *xx = x; + /* link in at head of list */ + (tt = treenodes)->rn_dupedkey = t; + tt->rn_flags = t->rn_flags; + tt->rn_p = x = t->rn_p; + if (x->rn_l == t) x->rn_l = tt; else x->rn_r = tt; + saved_tt = tt; x = xx; + } else { + (tt = treenodes)->rn_dupedkey = t->rn_dupedkey; + t->rn_dupedkey = tt; + } +#ifdef RN_DEBUG + t=tt+1; tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++; + tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt; +#endif + tt->rn_key = (caddr_t) v; + tt->rn_b = -1; + tt->rn_flags = RNF_ACTIVE; + } + /* + * Put mask in tree. + */ + if (netmask) { + tt->rn_mask = netmask; + tt->rn_b = x->rn_b; + tt->rn_flags |= x->rn_flags & RNF_NORMAL; + } + t = saved_tt->rn_p; + if (keyduplicated) + goto on2; + b_leaf = -1 - t->rn_b; + if (t->rn_r == saved_tt) x = t->rn_l; else x = t->rn_r; + /* Promote general routes from below */ + if (x->rn_b < 0) { + for (mp = &t->rn_mklist; x; x = x->rn_dupedkey) + if (x->rn_mask && (x->rn_b >= b_leaf) && x->rn_mklist == 0) { + *mp = m = rn_new_radix_mask(x, 0); + if (m) + mp = &m->rm_mklist; + } + } else if (x->rn_mklist) { + /* + * Skip over masks whose index is > that of new node + */ + for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) + if (m->rm_b >= b_leaf) + break; + t->rn_mklist = m; *mp = 0; + } +on2: + /* Add new route to highest possible ancestor's list */ + if ((netmask == 0) || (b > t->rn_b )) + return tt; /* can't lift at all */ + b_leaf = tt->rn_b; + do { + x = t; + t = t->rn_p; + } while (b <= t->rn_b && x != top); + /* + * Search through routes associated with node to + * insert new route according to index. + * Need same criteria as when sorting dupedkeys to avoid + * double loop on deletion. + */ + for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) { + if (m->rm_b < b_leaf) + continue; + if (m->rm_b > b_leaf) + break; + if (m->rm_flags & RNF_NORMAL) { + mmask = m->rm_leaf->rn_mask; + if (tt->rn_flags & RNF_NORMAL) { +#ifdef __ECOS +#else + log(LOG_ERR, + "Non-unique normal route, mask not entered"); +#endif + return tt; + } + } else + mmask = m->rm_mask; + if (mmask == netmask) { + m->rm_refs++; + tt->rn_mklist = m; + return tt; + } + if (rn_refines(netmask, mmask) || rn_lexobetter(netmask, mmask)) + break; + } + *mp = rn_new_radix_mask(tt, *mp); + return tt; +} + +struct radix_node * +rn_delete(v_arg, netmask_arg, head) + void *v_arg, *netmask_arg; + struct radix_node_head *head; +{ + register struct radix_node *t, *p, *x, *tt; + struct radix_mask *m, *saved_m, **mp; + struct radix_node *dupedkey, *saved_tt, *top; + caddr_t v, netmask; + int b, head_off, vlen; + + v = v_arg; + netmask = netmask_arg; + x = head->rnh_treetop; + tt = rn_search(v, x); + head_off = x->rn_off; + vlen = *(u_char *)v; + saved_tt = tt; + top = x; + if (tt == 0 || + Bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off)) + return (0); + /* + * Delete our route from mask lists. + */ + if (netmask) { + if ((x = rn_addmask(netmask, 1, head_off)) == 0) + return (0); + netmask = x->rn_key; + while (tt->rn_mask != netmask) + if ((tt = tt->rn_dupedkey) == 0) + return (0); + } + if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0) + goto on1; + if (tt->rn_flags & RNF_NORMAL) { + if (m->rm_leaf != tt || m->rm_refs > 0) { +#ifdef __ECOS +#else + log(LOG_ERR, "rn_delete: inconsistent annotation\n"); +#endif + return 0; /* dangling ref could cause disaster */ + } + } else { + if (m->rm_mask != tt->rn_mask) { +#ifdef __ECOS +#else + log(LOG_ERR, "rn_delete: inconsistent annotation\n"); +#endif + goto on1; + } + if (--m->rm_refs >= 0) + goto on1; + } + b = -1 - tt->rn_b; + t = saved_tt->rn_p; + if (b > t->rn_b) + goto on1; /* Wasn't lifted at all */ + do { + x = t; + t = t->rn_p; + } while (b <= t->rn_b && x != top); + for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) + if (m == saved_m) { + *mp = m->rm_mklist; + MKFree(m); + break; + } + if (m == 0) { +#ifdef __ECOS +#else + log(LOG_ERR, "rn_delete: couldn't find our annotation\n"); +#endif + if (tt->rn_flags & RNF_NORMAL) + return (0); /* Dangling ref to us */ + } +on1: + /* + * Eliminate us from tree + */ + if (tt->rn_flags & RNF_ROOT) + return (0); +#ifdef RN_DEBUG + /* Get us out of the creation list */ + for (t = rn_clist; t && t->rn_ybro != tt; t = t->rn_ybro) {} + if (t) t->rn_ybro = tt->rn_ybro; +#endif + t = tt->rn_p; + if ((dupedkey = saved_tt->rn_dupedkey) != 0) { + if (tt == saved_tt) { + x = dupedkey; x->rn_p = t; + if (t->rn_l == tt) t->rn_l = x; else t->rn_r = x; + } else { + for (x = p = saved_tt; p && p->rn_dupedkey != tt;) + p = p->rn_dupedkey; + if (p) p->rn_dupedkey = tt->rn_dupedkey; +#ifdef __ECOS +#else + else log(LOG_ERR, "rn_delete: couldn't find us\n"); +#endif + } + t = tt + 1; + if (t->rn_flags & RNF_ACTIVE) { +#ifndef RN_DEBUG + *++x = *t; p = t->rn_p; +#else + b = t->rn_info; *++x = *t; t->rn_info = b; p = t->rn_p; +#endif + if (p->rn_l == t) p->rn_l = x; else p->rn_r = x; + x->rn_l->rn_p = x; x->rn_r->rn_p = x; + } + goto out; + } + if (t->rn_l == tt) x = t->rn_r; else x = t->rn_l; + p = t->rn_p; + if (p->rn_r == t) p->rn_r = x; else p->rn_l = x; + x->rn_p = p; + /* + * Demote routes attached to us. + */ + if (t->rn_mklist) { + if (x->rn_b >= 0) { + for (mp = &x->rn_mklist; (m = *mp) != NULL;) + mp = &m->rm_mklist; + *mp = t->rn_mklist; + } else { + /* If there are any key,mask pairs in a sibling + duped-key chain, some subset will appear sorted + in the same order attached to our mklist */ + for (m = t->rn_mklist; m && x; x = x->rn_dupedkey) + if (m == x->rn_mklist) { + struct radix_mask *mm = m->rm_mklist; + x->rn_mklist = 0; + if (--(m->rm_refs) < 0) + MKFree(m); + m = mm; + } +#ifdef __ECOS +#else + if (m) + log(LOG_ERR, "%s %p at %p\n", + "rn_delete: Orphaned Mask", m, x); +#endif + } + } + /* + * We may be holding an active internal node in the tree. + */ + x = tt + 1; + if (t != x) { +#ifndef RN_DEBUG + *t = *x; +#else + b = t->rn_info; *t = *x; t->rn_info = b; +#endif + t->rn_l->rn_p = t; t->rn_r->rn_p = t; + p = x->rn_p; + if (p->rn_l == x) p->rn_l = t; else p->rn_r = t; + } +out: + tt->rn_flags &= ~RNF_ACTIVE; + tt[1].rn_flags &= ~RNF_ACTIVE; + return (tt); +} + +int +rn_walktree(h, f, w) + struct radix_node_head *h; + register int (*f) __P((struct radix_node *, void *)); + void *w; +{ + int error; + struct radix_node *base, *next; + register struct radix_node *rn = h->rnh_treetop; + /* + * This gets complicated because we may delete the node + * while applying the function f to it, so we need to calculate + * the successor node in advance. + */ + /* First time through node, go left */ + while (rn->rn_b >= 0) + rn = rn->rn_l; + for (;;) { + base = rn; + /* If at right child go back up, otherwise, go right */ + while (rn->rn_p->rn_r == rn && (rn->rn_flags & RNF_ROOT) == 0) + rn = rn->rn_p; + /* Find the next *leaf* since next node might vanish, too */ + for (rn = rn->rn_p->rn_r; rn->rn_b >= 0;) + rn = rn->rn_l; + next = rn; + /* Process leaves */ + while ((rn = base) != NULL) { + base = rn->rn_dupedkey; + if (!(rn->rn_flags & RNF_ROOT) && (error = (*f)(rn, w))) + return (error); + } + rn = next; + if (rn->rn_flags & RNF_ROOT) + return (0); + } + /* NOTREACHED */ +} + +int +rn_inithead(head, off) + void **head; + int off; +{ + register struct radix_node_head *rnh; + register struct radix_node *t, *tt, *ttt; + if (*head) + return (1); + R_Malloc(rnh, struct radix_node_head *, sizeof (*rnh)); + if (rnh == 0) + return (0); + Bzero(rnh, sizeof (*rnh)); + *head = rnh; + t = rn_newpair(rn_zeros, off, rnh->rnh_nodes); + ttt = rnh->rnh_nodes + 2; + t->rn_r = ttt; + t->rn_p = t; + tt = t->rn_l; + tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE; + tt->rn_b = -1 - off; + *ttt = *tt; + ttt->rn_key = rn_ones; + rnh->rnh_addaddr = rn_addroute; + rnh->rnh_deladdr = rn_delete; + rnh->rnh_matchaddr = rn_match; + rnh->rnh_lookup = rn_lookup; + rnh->rnh_walktree = rn_walktree; + rnh->rnh_treetop = t; + return (1); +} + +void +rn_init() +{ + char *cp, *cplim; +#ifdef _KERNEL + struct domain *dom; + + for (dom = domains; dom; dom = dom->dom_next) + if (dom->dom_maxrtkey > max_keylen) + max_keylen = dom->dom_maxrtkey; +#endif + if (max_keylen == 0) { +#ifdef __ECOS +#else + log(LOG_ERR, + "rn_init: radix functions require max_keylen be set\n"); +#endif + return; + } + R_Malloc(rn_zeros, char *, 3 * max_keylen); + if (rn_zeros == NULL) + panic("rn_init"); + Bzero(rn_zeros, 3 * max_keylen); + rn_ones = cp = rn_zeros + max_keylen; + addmask_key = cplim = rn_ones + max_keylen; + while (cp < cplim) + *cp++ = -1; + if (rn_inithead((void **)&mask_rnhead, 0) == 0) + panic("rn_init 2"); +} diff --git a/ecos/packages/net/tcpip/current/src/sys/net/raw_cb.c b/ecos/packages/net/tcpip/current/src/sys/net/raw_cb.c new file mode 100644 index 0000000..7dacc34 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/net/raw_cb.c @@ -0,0 +1,178 @@ +//========================================================================== +// +// sys/net/raw_cb.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: raw_cb.c,v 1.2 1996/03/03 21:07:16 niklas Exp $ */ +/* $NetBSD: raw_cb.c,v 1.9 1996/02/13 22:00:39 christos Exp $ */ + +/* + * Copyright (c) 1980, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)raw_cb.c 8.1 (Berkeley) 6/10/93 + */ + +#include <sys/param.h> +#ifndef __ECOS +#include <sys/systm.h> +#endif +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/domain.h> +#include <sys/protosw.h> +#include <sys/errno.h> + +#include <net/if.h> +#include <net/route.h> +#include <net/raw_cb.h> +#include <netinet/in.h> + +/* + * Routines to manage the raw protocol control blocks. + * + * TODO: + * hash lookups by protocol family/protocol + address family + * take care of unique address problems per AF? + * redo address binding to allow wildcards + */ + +u_long raw_sendspace = RAWSNDQ; +u_long raw_recvspace = RAWRCVQ; + +/* + * Allocate a control block and a nominal amount + * of buffer space for the socket. + */ +int +raw_attach(so, proto) + register struct socket *so; + int proto; +{ + register struct rawcb *rp = sotorawcb(so); + int error; + + /* + * It is assumed that raw_attach is called + * after space has been allocated for the + * rawcb. + */ + if (rp == 0) + return (ENOBUFS); + if ((error = soreserve(so, raw_sendspace, raw_recvspace)) != 0) + return (error); + rp->rcb_socket = so; + rp->rcb_proto.sp_family = so->so_proto->pr_domain->dom_family; + rp->rcb_proto.sp_protocol = proto; + LIST_INSERT_HEAD(&rawcb, rp, rcb_list); + return (0); +} + +/* + * Detach the raw connection block and discard + * socket resources. + */ +void +raw_detach(rp) + register struct rawcb *rp; +{ + struct socket *so = rp->rcb_socket; + + so->so_pcb = 0; + sofree(so); + LIST_REMOVE(rp, rcb_list); +#ifdef notdef + if (rp->rcb_laddr) + m_freem(dtom(rp->rcb_laddr)); + rp->rcb_laddr = 0; +#endif + free((caddr_t)(rp), M_PCB); +} + +/* + * Disconnect and possibly release resources. + */ +void +raw_disconnect(rp) + struct rawcb *rp; +{ + +#ifdef notdef + if (rp->rcb_faddr) + m_freem(dtom(rp->rcb_faddr)); + rp->rcb_faddr = 0; +#endif + if (rp->rcb_socket->so_state & SS_NOFDREF) + raw_detach(rp); +} + +#ifdef notdef +int +raw_bind(so, nam) + register struct socket *so; + struct mbuf *nam; +{ + struct sockaddr *addr = mtod(nam, struct sockaddr *); + register struct rawcb *rp; + + if (ifnet == 0) + return (EADDRNOTAVAIL); + rp = sotorawcb(so); + nam = m_copym(nam, 0, M_COPYALL, M_WAITOK); + rp->rcb_laddr = mtod(nam, struct sockaddr *); + return (0); +} +#endif diff --git a/ecos/packages/net/tcpip/current/src/sys/net/raw_usrreq.c b/ecos/packages/net/tcpip/current/src/sys/net/raw_usrreq.c new file mode 100644 index 0000000..f0fde95 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/net/raw_usrreq.c @@ -0,0 +1,360 @@ +//========================================================================== +// +// sys/net/raw_usrreq.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: raw_usrreq.c,v 1.3 1998/09/17 12:29:55 deraadt Exp $ */ +/* $NetBSD: raw_usrreq.c,v 1.11 1996/02/13 22:00:43 christos Exp $ */ + +/* + * Copyright (c) 1980, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)raw_usrreq.c 8.1 (Berkeley) 6/10/93 + */ + +#include <sys/param.h> +#include <sys/mbuf.h> +#include <sys/domain.h> +#include <sys/protosw.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/errno.h> +#ifndef __ECOS +#include <sys/systm.h> +#endif + +#include <net/if.h> +#include <net/route.h> +#include <net/netisr.h> +#include <net/raw_cb.h> + +#include <machine/stdarg.h> +/* + * Initialize raw connection block q. + */ +void +raw_init() +{ + + LIST_INIT(&rawcb); +} + + +/* + * Raw protocol input routine. Find the socket + * associated with the packet(s) and move them over. If + * nothing exists for this packet, drop it. + */ +/* + * Raw protocol interface. + */ +void +#if __STDC__ +raw_input(struct mbuf *m0, ...) +#else +raw_input(m0, va_alist) + struct mbuf *m0; + va_dcl +#endif +{ + register struct rawcb *rp; + register struct mbuf *m = m0; + register int sockets = 0; + struct socket *last; + va_list ap; + register struct sockproto *proto; + struct sockaddr *src, *dst; + + va_start(ap, m0); + proto = va_arg(ap, struct sockproto *); + src = va_arg(ap, struct sockaddr *); + dst = va_arg(ap, struct sockaddr *); + va_end(ap); + + last = 0; + for (rp = rawcb.lh_first; rp != 0; rp = rp->rcb_list.le_next) { + if (rp->rcb_proto.sp_family != proto->sp_family) + continue; + if (rp->rcb_proto.sp_protocol && + rp->rcb_proto.sp_protocol != proto->sp_protocol) + continue; + /* + * We assume the lower level routines have + * placed the address in a canonical format + * suitable for a structure comparison. + * + * Note that if the lengths are not the same + * the comparison will fail at the first byte. + */ +#define equal(a1, a2) \ + (bcmp((caddr_t)(a1), (caddr_t)(a2), a1->sa_len) == 0) + if (rp->rcb_laddr && !equal(rp->rcb_laddr, dst)) + continue; + if (rp->rcb_faddr && !equal(rp->rcb_faddr, src)) + continue; + if (last) { + struct mbuf *n; + if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) { + if (sbappendaddr(&last->so_rcv, src, + n, (struct mbuf *)0) == 0) + /* should notify about lost packet */ + m_freem(n); + else { + sorwakeup(last); + sockets++; + } + } + } + last = rp->rcb_socket; + } + if (last) { + if (sbappendaddr(&last->so_rcv, src, + m, (struct mbuf *)0) == 0) + m_freem(m); + else { + sorwakeup(last); + sockets++; + } + } else + m_freem(m); +} + +/*ARGSUSED*/ +void * +raw_ctlinput(cmd, arg, d) + int cmd; + struct sockaddr *arg; + void *d; +{ + + if (cmd < 0 || cmd > PRC_NCMDS) + return NULL; + return NULL; + /* INCOMPLETE */ +} + +/*ARGSUSED*/ +int +raw_usrreq(so, req, m, nam, control) + struct socket *so; + int req; + struct mbuf *m, *nam, *control; +{ + register struct rawcb *rp = sotorawcb(so); + register int error = 0; + int len; + + if (req == PRU_CONTROL) + return (EOPNOTSUPP); + if (control && control->m_len) { + error = EOPNOTSUPP; + goto release; + } + if (rp == 0) { + error = EINVAL; + goto release; + } + switch (req) { + + /* + * Allocate a raw control block and fill in the + * necessary info to allow packets to be routed to + * the appropriate raw interface routine. + */ + case PRU_ATTACH: +#ifndef __ECOS + if ((so->so_state & SS_PRIV) == 0) { + error = EACCES; + break; + } +#endif + error = raw_attach(so, (int)(long)nam); + break; + + /* + * Destroy state just before socket deallocation. + * Flush data or not depending on the options. + */ + case PRU_DETACH: + if (rp == 0) { + error = ENOTCONN; + break; + } + raw_detach(rp); + break; + +#ifdef notdef + /* + * If a socket isn't bound to a single address, + * the raw input routine will hand it anything + * within that protocol family (assuming there's + * nothing else around it should go to). + */ + case PRU_CONNECT: + if (rp->rcb_faddr) { + error = EISCONN; + break; + } + nam = m_copym(nam, 0, M_COPYALL, M_WAIT); + rp->rcb_faddr = mtod(nam, struct sockaddr *); + soisconnected(so); + break; + + case PRU_BIND: + if (rp->rcb_laddr) { + error = EINVAL; /* XXX */ + break; + } + error = raw_bind(so, nam); + break; +#else + case PRU_CONNECT: + case PRU_BIND: +#endif + case PRU_CONNECT2: + error = EOPNOTSUPP; + goto release; + + case PRU_DISCONNECT: + if (rp->rcb_faddr == 0) { + error = ENOTCONN; + break; + } + raw_disconnect(rp); + soisdisconnected(so); + break; + + /* + * Mark the connection as being incapable of further input. + */ + case PRU_SHUTDOWN: + socantsendmore(so); + break; + + /* + * Ship a packet out. The appropriate raw output + * routine handles any massaging necessary. + */ + case PRU_SEND: + if (nam) { + if (rp->rcb_faddr) { + error = EISCONN; + break; + } + rp->rcb_faddr = mtod(nam, struct sockaddr *); + } else if (rp->rcb_faddr == 0) { + error = ENOTCONN; + break; + } + error = (*so->so_proto->pr_output)(m, so); + m = NULL; + if (nam) + rp->rcb_faddr = 0; + break; + + case PRU_ABORT: + raw_disconnect(rp); + sofree(so); + soisdisconnected(so); + break; + + case PRU_SENSE: + /* + * stat: don't bother with a blocksize. + */ + return (0); + + /* + * Not supported. + */ + case PRU_RCVOOB: + case PRU_RCVD: + return(EOPNOTSUPP); + + case PRU_LISTEN: + case PRU_ACCEPT: + case PRU_SENDOOB: + error = EOPNOTSUPP; + break; + + case PRU_SOCKADDR: + if (rp->rcb_laddr == 0) { + error = EINVAL; + break; + } + len = rp->rcb_laddr->sa_len; + bcopy((caddr_t)rp->rcb_laddr, mtod(nam, caddr_t), (unsigned)len); + nam->m_len = len; + break; + + case PRU_PEERADDR: + if (rp->rcb_faddr == 0) { + error = ENOTCONN; + break; + } + len = rp->rcb_faddr->sa_len; + bcopy((caddr_t)rp->rcb_faddr, mtod(nam, caddr_t), (unsigned)len); + nam->m_len = len; + break; + + default: + panic("raw_usrreq"); + } +release: + if (m != NULL) + m_freem(m); + return (error); +} diff --git a/ecos/packages/net/tcpip/current/src/sys/net/route.c b/ecos/packages/net/tcpip/current/src/sys/net/route.c new file mode 100644 index 0000000..67673d9 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/net/route.c @@ -0,0 +1,1074 @@ +//========================================================================== +// +// sys/net/route.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: route.c,v 1.16 1999/12/08 06:50:18 itojun Exp $ */ +/* $NetBSD: route.c,v 1.14 1996/02/13 22:00:46 christos Exp $ */ + +/* + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright (c) 1980, 1986, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)route.c 8.2 (Berkeley) 11/15/93 + */ + +/* +%%% portions-copyright-nrl-95 +Portions of this software are Copyright 1995-1998 by Randall Atkinson, +Ronald Lee, Daniel McDonald, Bao Phan, and Chris Winters. All Rights +Reserved. All rights under this copyright have been assigned to the US +Naval Research Laboratory (NRL). The NRL Copyright Notice and License +Agreement Version 1.1 (January 17, 1995) applies to these portions of the +software. +You should have received a copy of the license with this software. If you +didn't get a copy, you may request one from <license@ipv6.nrl.navy.mil>. +*/ + +#include <sys/param.h> +#ifdef __ECOS +struct proc { + int __unused; +}; +#else +#include <sys/systm.h> +#include <sys/proc.h> +#endif +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/domain.h> +#include <sys/protosw.h> +#include <sys/ioctl.h> +#include <sys/kernel.h> + +#include <net/if.h> +#include <net/route.h> +#include <net/raw_cb.h> + +#include <netinet/in.h> +#include <netinet/in_var.h> + +#ifdef NS +#include <netns/ns.h> +#endif + +#ifdef IPSEC +#include <netinet/ip_ipsp.h> + +extern struct ifnet encif; +#endif + +#define SA(p) ((struct sockaddr *)(p)) + +int rttrash; /* routes not in table but not freed */ +struct sockaddr wildcard; /* zero valued cookie for wildcard searches */ + +static int okaytoclone __P((u_int, int)); + +#ifdef IPSEC + +static struct ifaddr * +encap_findgwifa(struct sockaddr *gw) +{ + return encif.if_addrlist.tqh_first; +} + +#endif + +void +rtable_init(table) + void **table; +{ + struct domain *dom; + for (dom = domains; dom; dom = dom->dom_next) + if (dom->dom_rtattach) + dom->dom_rtattach(&table[dom->dom_family], + dom->dom_rtoffset); +} + +void +route_init() +{ + rn_init(); /* initialize all zeroes, all ones, mask table */ + rtable_init((void **)rt_tables); +} + +// FIXME: This function is only here because BOOTP fails on a second interface. +// This failure is due to the fact that a route to 0.0.0.0 seems to be +// incredibly sticky, i.e. can't be deleted. BOOTP uses this to +// achieve a generic broadcast. Sadly it seems that BOOTP servers will +// only work this way, thus the hack. +// +// This version enumerates all routes and deletes them - this leaks less +// store than the previous version. + +static int +rt_reinit_rtdelete( struct radix_node *rn, void *vifp ) +{ + struct rtentry *rt = (struct rtentry *)rn; + rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt), + 0, NULL); + return (0); +} + +void +cyg_route_reinit(void) +{ + int i; + for (i = 0; i < AF_MAX+1; i++) { + struct radix_node_head *rnh; + rnh = rt_tables[i]; + if (rnh) { + (*rnh->rnh_walktree)(rnh, rt_reinit_rtdelete, NULL); + } + } +} + +void +rtalloc_noclone(ro, howstrict) + register struct route *ro; + int howstrict; +{ + if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP)) + return; /* XXX */ + ro->ro_rt = rtalloc2(&ro->ro_dst, 1, howstrict); +} + +static int +okaytoclone(flags, howstrict) + u_int flags; + int howstrict; +{ + if (howstrict == ALL_CLONING) + return 1; + if (howstrict == ONNET_CLONING && !(flags & (RTF_GATEWAY|RTF_TUNNEL))) + return 1; + return 0; +} + +struct rtentry * +rtalloc2(dst, report,howstrict) + register struct sockaddr *dst; + int report,howstrict; +{ + register struct radix_node_head *rnh = rt_tables[dst->sa_family]; + register struct rtentry *rt; + register struct radix_node *rn; + struct rtentry *newrt = 0; + struct rt_addrinfo info; + int s = splnet(), err = 0, msgtype = RTM_MISS; + + if (rnh && (rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) && + ((rn->rn_flags & RNF_ROOT) == 0)) { + newrt = rt = (struct rtentry *)rn; + if (report && (rt->rt_flags & RTF_CLONING) && + okaytoclone(rt->rt_flags, howstrict)) { + err = rtrequest(RTM_RESOLVE, dst, SA(0), SA(0), 0, + &newrt); + if (err) { + newrt = rt; + rt->rt_refcnt++; + goto miss; + } + if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) { + msgtype = RTM_RESOLVE; + goto miss; + } + } else + rt->rt_refcnt++; + } else { + rtstat.rts_unreach++; +miss: if (report) { + bzero((caddr_t)&info, sizeof(info)); + info.rti_info[RTAX_DST] = dst; + rt_missmsg(msgtype, &info, 0, err); + } + } + splx(s); + return (newrt); +} + +/* + * Packet routing routines. + */ +void +rtalloc(ro) + register struct route *ro; +{ + if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP)) + return; /* XXX */ + ro->ro_rt = rtalloc1(&ro->ro_dst, 1); +} + +struct rtentry * +rtalloc1(dst, report) + register struct sockaddr *dst; + int report; +{ + register struct radix_node_head *rnh = rt_tables[dst->sa_family]; + register struct rtentry *rt; + register struct radix_node *rn; + struct rtentry *newrt = 0; + struct rt_addrinfo info; + int s = splsoftnet(), err = 0, msgtype = RTM_MISS; + + if (rnh && (rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) && + ((rn->rn_flags & RNF_ROOT) == 0)) { + newrt = rt = (struct rtentry *)rn; + if (report && (rt->rt_flags & RTF_CLONING)) { + err = rtrequest(RTM_RESOLVE, dst, SA(NULL), + SA(NULL), 0, &newrt); + if (err) { + newrt = rt; + rt->rt_refcnt++; + goto miss; + } + if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) { + msgtype = RTM_RESOLVE; + goto miss; + } + } else + rt->rt_refcnt++; + } else { + if (dst->sa_family != PF_KEY) + rtstat.rts_unreach++; + /* + * IP encapsulation does lots of lookups where we don't need nor want + * the RTM_MISSes that would be generated. It causes RTM_MISS storms + * sent upward breaking user-level routing queries. + */ + miss: if (report && dst->sa_family != PF_KEY) { + bzero((caddr_t)&info, sizeof(info)); + info.rti_info[RTAX_DST] = dst; + rt_missmsg(msgtype, &info, 0, err); + } + } + splx(s); + return (newrt); +} + +void +rtfree(rt) + register struct rtentry *rt; +{ + register struct ifaddr *ifa; + + if (rt == NULL) + panic("rtfree"); + rt->rt_refcnt--; + if (rt->rt_refcnt <= 0 && (rt->rt_flags & RTF_UP) == 0) { + if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) + panic ("rtfree 2"); + rttrash--; + if (rt->rt_refcnt < 0) { +#ifdef __ECOS + diag_printf("rtfree: %x not freed (neg refs)\n", rt); +#else + printf("rtfree: %x not freed (neg refs)\n", rt); +#endif + return; + } + rt_timer_remove_all(rt); + ifa = rt->rt_ifa; + if (ifa) + IFAFREE(ifa); + Free(rt_key(rt)); + Free(rt); + } +} + +void +ifafree(ifa) + register struct ifaddr *ifa; +{ + if (ifa == NULL) + panic("ifafree"); + if (ifa->ifa_refcnt == 0) + free(ifa, M_IFADDR); + else + ifa->ifa_refcnt--; +} + +/* + * Force a routing table entry to the specified + * destination to go through the given gateway. + * Normally called as a result of a routing redirect + * message from the network layer. + * + * N.B.: must be called at splsoftnet + */ +void +rtredirect(dst, gateway, netmask, flags, src, rtp) + struct sockaddr *dst, *gateway, *netmask, *src; + int flags; + struct rtentry **rtp; +{ + register struct rtentry *rt; + int error = 0; + u_int32_t *stat = NULL; + struct rt_addrinfo info; + struct ifaddr *ifa; + + /* verify the gateway is directly reachable */ + if ((ifa = ifa_ifwithnet(gateway)) == NULL) { + error = ENETUNREACH; + goto out; + } + rt = rtalloc1(dst, 0); + /* + * If the redirect isn't from our current router for this dst, + * it's either old or wrong. If it redirects us to ourselves, + * we have a routing loop, perhaps as a result of an interface + * going down recently. + */ +#define equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0) + if (!(flags & RTF_DONE) && rt && + (!equal(src, rt->rt_gateway) || rt->rt_ifa != ifa)) + error = EINVAL; + else if (ifa_ifwithaddr(gateway) != NULL) + error = EHOSTUNREACH; + if (error) + goto done; + /* + * Create a new entry if we just got back a wildcard entry + * or the the lookup failed. This is necessary for hosts + * which use routing redirects generated by smart gateways + * to dynamically build the routing tables. + */ + if ((rt == NULL) || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) + goto create; + /* + * Don't listen to the redirect if it's + * for a route to an interface. + */ + if (rt->rt_flags & RTF_GATEWAY) { + if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { + /* + * Changing from route to net => route to host. + * Create new route, rather than smashing route to net. + */ + create: + flags |= RTF_GATEWAY | RTF_DYNAMIC; + error = rtrequest((int)RTM_ADD, dst, gateway, + netmask, flags, + (struct rtentry **)0); + stat = &rtstat.rts_dynamic; + } else { + /* + * Smash the current notion of the gateway to + * this destination. Should check about netmask!!! + */ + rt->rt_flags |= RTF_MODIFIED; + flags |= RTF_MODIFIED; + stat = &rtstat.rts_newgateway; + rt_setgate(rt, rt_key(rt), gateway); + } + } else + error = EHOSTUNREACH; +done: + if (rt) { + if (rtp && !error) + *rtp = rt; + else + rtfree(rt); + } +out: + if (error) + rtstat.rts_badredirect++; + else if (stat != NULL) + (*stat)++; + bzero((caddr_t)&info, sizeof(info)); + info.rti_info[RTAX_DST] = dst; + info.rti_info[RTAX_GATEWAY] = gateway; + info.rti_info[RTAX_NETMASK] = netmask; + info.rti_info[RTAX_AUTHOR] = src; + rt_missmsg(RTM_REDIRECT, &info, flags, error); +} + +/* +* Routing table ioctl interface. +*/ +int +rtioctl(req, data, p) + u_long req; + caddr_t data; + struct proc *p; +{ +#ifdef __ECOS + struct ecos_rtentry *rt; + int res; + + switch (req) { + case SIOCADDRT: + rt = (struct ecos_rtentry *)data; + res = rtrequest(RTM_ADD, + &rt->rt_dst, + &rt->rt_gateway, + &rt->rt_genmask, + rt->rt_flags, + NULL); + return (res); + case SIOCDELRT: + rt = (struct ecos_rtentry *)data; + res = rtrequest(RTM_DELETE, + &rt->rt_dst, + &rt->rt_gateway, + &rt->rt_genmask, + rt->rt_flags, + NULL); + return (res); + default: + return (EOPNOTSUPP); + } +#else + return (EOPNOTSUPP); +#endif +} + +struct ifaddr * +ifa_ifwithroute(flags, dst, gateway) + int flags; + struct sockaddr *dst, *gateway; +{ + register struct ifaddr *ifa; + +#ifdef IPSEC + /* + * If the destination is a PF_KEY address, we'll look + * for the existence of a encap interface number or address + * in the options list of the gateway. By default, we'll return + * enc0. + */ + if (dst && (dst->sa_family == PF_KEY)) + return encap_findgwifa(gateway); +#endif + + if ((flags & RTF_GATEWAY) == 0) { + /* + * If we are adding a route to an interface, + * and the interface is a pt to pt link + * we should search for the destination + * as our clue to the interface. Otherwise + * we can use the local address. + */ + ifa = NULL; + if (flags & RTF_HOST) + ifa = ifa_ifwithdstaddr(dst); + if (ifa == NULL) + ifa = ifa_ifwithaddr(gateway); + } else { + /* + * If we are adding a route to a remote net + * or host, the gateway may still be on the + * other end of a pt to pt link. + */ + ifa = ifa_ifwithdstaddr(gateway); + } + if (ifa == NULL) + ifa = ifa_ifwithnet(gateway); + if (ifa == NULL) { + struct rtentry *rt = rtalloc1(gateway, 0); + if (rt == NULL) + return (NULL); + rt->rt_refcnt--; + /* The gateway must be local if the same address family. */ + if (!(flags & RTF_TUNNEL) && (rt->rt_flags & RTF_GATEWAY) && + rt_key(rt)->sa_family == dst->sa_family) + return (0); + if ((ifa = rt->rt_ifa) == NULL) + return (NULL); + } + if (ifa->ifa_addr->sa_family != dst->sa_family) { + struct ifaddr *oifa = ifa; + ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); + if (ifa == NULL) + ifa = oifa; + } + return (ifa); +} + +#define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) + +const char * +_rt_cmd(int req) +{ + switch (req) + { + case RTM_DELETE: + return "DELETE"; + case RTM_RESOLVE: + return "RESOLVE"; + case RTM_ADD: + return "ADD"; + default: + return "???"; + } +} + +int +rtrequest(req, dst, gateway, netmask, flags, ret_nrt) + int req, flags; + struct sockaddr *dst, *gateway, *netmask; + struct rtentry **ret_nrt; +{ + int s = splsoftnet(); int error = 0; + register struct rtentry *rt; + register struct radix_node *rn; + register struct radix_node_head *rnh; + struct ifaddr *ifa; + struct sockaddr *ndst; +#define senderr(x) { error = x ; goto bad; } + + if ((rnh = rt_tables[dst->sa_family]) == 0) + senderr(EAFNOSUPPORT); + if (flags & RTF_HOST) + netmask = 0; + switch (req) { + case RTM_DELETE: + if ((rn = rnh->rnh_deladdr(dst, netmask, rnh)) == NULL) + senderr(ESRCH); + if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) + panic ("rtrequest delete"); + rt = (struct rtentry *)rn; + rt->rt_flags &= ~RTF_UP; + if (rt->rt_gwroute) { + if (rt != rt->rt_gwroute) + RTFREE( rt->rt_gwroute ); // Free it up as normal + else + rt->rt_refcnt--; // Just dec the refcount - freeing + // it here would be premature + rt->rt_gwroute = NULL; + } + if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) + ifa->ifa_rtrequest(RTM_DELETE, rt, SA(NULL)); + rttrash++; + if (ret_nrt) + *ret_nrt = rt; + else if (rt->rt_refcnt <= 0) { + rt->rt_refcnt++; + rtfree(rt); + } + break; + + case RTM_RESOLVE: + if (ret_nrt == NULL || (rt = *ret_nrt) == NULL) + senderr(EINVAL); + ifa = rt->rt_ifa; + flags = rt->rt_flags & ~RTF_CLONING; + gateway = rt->rt_gateway; + if ((netmask = rt->rt_genmask) == NULL) + flags |= RTF_HOST; + goto makeroute; + + case RTM_ADD: + if ((ifa = ifa_ifwithroute(flags, dst, gateway)) == NULL) + senderr(ENETUNREACH); + + /* The interface found in the previous statement may + * be overridden later by rt_setif. See the code + * for case RTM_ADD in rtsock.c:route_output. + */ + makeroute: + R_Malloc(rt, struct rtentry *, sizeof(*rt)); + if (rt == NULL) + senderr(ENOBUFS); + Bzero(rt, sizeof(*rt)); + rt->rt_flags = RTF_UP | flags; + LIST_INIT(&rt->rt_timer); + if (rt_setgate(rt, dst, gateway)) { + Free(rt); + senderr(ENOBUFS); + } + ndst = rt_key(rt); + if (netmask) { + rt_maskedcopy(dst, ndst, netmask); + } else + Bcopy(dst, ndst, dst->sa_len); +if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU)) { /* XXX */ + rt->rt_rmx.rmx_mtu = ifa->ifa_ifp->if_mtu; +} + rn = rnh->rnh_addaddr((caddr_t)ndst, (caddr_t)netmask, + rnh, rt->rt_nodes); + if (rn == NULL) { + if (rt->rt_gwroute) + rtfree(rt->rt_gwroute); + Free(rt_key(rt)); + Free(rt); + senderr(EEXIST); + } + ifa->ifa_refcnt++; + rt->rt_ifa = ifa; + rt->rt_ifp = ifa->ifa_ifp; + if (req == RTM_RESOLVE) { + /* + * Copy both metrics and a back pointer to the cloned + * route's parent. + */ + rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */ + rt->rt_parent = *ret_nrt; /* Back ptr. to parent. */ + } + if (ifa->ifa_rtrequest) + ifa->ifa_rtrequest(req, rt, SA(ret_nrt ? *ret_nrt : NULL)); + if (ret_nrt) { + *ret_nrt = rt; + rt->rt_refcnt++; + } +#ifdef INET6 + /* If we have a v4_in_v4 or a v4_in_v6 tunnel route + * then do some tunnel state (e.g. security state) + * initialization. + * + * Since IPV6 packets flow down this path, we don't + * want it using ipv4_tunnelsetup(rt) (since they + * have their own ipv6_tunnel_parent/child() + * routines which are called ipv6_rtrequest().) + * + * Thus, we check to see if the packet is to a v4 + * destination. + */ + if (dst->sa_family == AF_INET && (rt->rt_flags & RTF_TUNNEL)) + ipv4_tunnelsetup(rt); +#endif /* INET6 */ + break; + } +bad: + splx(s); + return (error); +} + +/* + * Set up any tunnel states (e.g. security) information + * for v4_in_v4 or v4_in_v6 tunnel routes. + */ +void +ipv4_tunnelsetup(rt) + register struct rtentry *rt; +{ + /* XXX */ +} + +int +rt_setgate(rt0, dst, gate) + struct rtentry *rt0; + struct sockaddr *dst, *gate; +{ + caddr_t new, old; + int dlen = ROUNDUP(dst->sa_len), glen = ROUNDUP(gate->sa_len); + register struct rtentry *rt = rt0; + + if (rt->rt_gateway == NULL || glen > ROUNDUP(rt->rt_gateway->sa_len)) { + old = (caddr_t)rt_key(rt); + R_Malloc(new, caddr_t, dlen + glen); + if (new == NULL) + return 1; + rt->rt_nodes->rn_key = new; + } else { + new = rt->rt_nodes->rn_key; + old = NULL; + } + Bcopy(gate, (rt->rt_gateway = (struct sockaddr *)(new + dlen)), glen); + if (old) { + Bcopy(dst, new, dlen); + Free(old); + } + if (rt->rt_gwroute != NULL) { + rt = rt->rt_gwroute; + RTFREE(rt); + rt = rt0; + rt->rt_gwroute = NULL; + } + if (rt->rt_flags & RTF_GATEWAY) { + rt->rt_gwroute = rtalloc1(gate, 1); + } + return 0; +} + +void +rt_maskedcopy(src, dst, netmask) + struct sockaddr *src, *dst, *netmask; +{ + register u_char *cp1 = (u_char *)src; + register u_char *cp2 = (u_char *)dst; + register u_char *cp3 = (u_char *)netmask; + u_char *cplim = cp2 + *cp3; + u_char *cplim2 = cp2 + *cp1; + + *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ + cp3 += 2; + if (cplim > cplim2) + cplim = cplim2; + while (cp2 < cplim) + *cp2++ = *cp1++ & *cp3++; + if (cp2 < cplim2) + bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2)); +} + +/* + * Set up a routing table entry, normally + * for an interface. + */ +int +rtinit(ifa, cmd, flags) + register struct ifaddr *ifa; + int cmd, flags; +{ + register struct rtentry *rt; + register struct sockaddr *dst; + register struct sockaddr *deldst; + struct mbuf *m = NULL; + struct rtentry *nrt = NULL; + int error; + + dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; + if (cmd == RTM_DELETE) { + if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) { + m = m_get(M_DONTWAIT, MT_SONAME); + if (m == NULL) + return(ENOBUFS); + deldst = mtod(m, struct sockaddr *); + rt_maskedcopy(dst, deldst, ifa->ifa_netmask); + dst = deldst; + } + if ((rt = rtalloc1(dst, 0)) != NULL) { + rt->rt_refcnt--; + if (rt->rt_ifa != ifa) { + if (m != NULL) + (void) m_free(m); + return (flags & RTF_HOST ? EHOSTUNREACH + : ENETUNREACH); + } + } + } + error = rtrequest(cmd, dst, ifa->ifa_addr, ifa->ifa_netmask, + flags | ifa->ifa_flags, &nrt); + if (m != NULL) + (void) m_free(m); + if (cmd == RTM_DELETE && error == 0 && (rt = nrt) != NULL) { + rt_newaddrmsg(cmd, ifa, error, nrt); + if (rt->rt_refcnt <= 0) { + rt->rt_refcnt++; + rtfree(rt); + } + } + if (cmd == RTM_ADD && error == 0 && (rt = nrt) != NULL) { + rt->rt_refcnt--; +#ifdef INET6 + /* Initialize Path MTU for IPv6 interface route */ + if (ifa->ifa_addr->sa_family == AF_INET6 && + !rt->rt_rmx.rmx_mtu) + rt->rt_rmx.rmx_mtu = ifa->ifa_ifp->if_mtu; +#endif /* INET6 */ + if (rt->rt_ifa != ifa) { +#ifdef __ECOS + diag_printf("rtinit: wrong ifa (%x) was (%x)\n", + ifa, rt->rt_ifa); +#else + printf("rtinit: wrong ifa (%x) was (%x)\n", + ifa, rt->rt_ifa); +#endif + if (rt->rt_ifa->ifa_rtrequest) + rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, + SA(NULL)); + IFAFREE(rt->rt_ifa); + rt->rt_ifa = ifa; + rt->rt_ifp = ifa->ifa_ifp; + rt->rt_rmx.rmx_mtu = ifa->ifa_ifp->if_mtu; /*XXX*/ + ifa->ifa_refcnt++; + if (ifa->ifa_rtrequest) + ifa->ifa_rtrequest(RTM_ADD, rt, SA(NULL)); + } + rt_newaddrmsg(cmd, ifa, error, nrt); + } + return (error); +} + +/* + * Route timer routines. These routes allow functions to be called + * for various routes at any time. This is useful in supporting + * path MTU discovery and redirect route deletion. + * + * This is similar to some BSDI internal functions, but it provides + * for multiple queues for efficiency's sake... + */ + +LIST_HEAD(, rttimer_queue) rttimer_queue_head; +static int rt_init_done = 0; + +#define RTTIMER_CALLOUT(r) { \ + if (r->rtt_func != NULL) { \ + (*r->rtt_func)(r->rtt_rt, r); \ + } else { \ + rtrequest((int) RTM_DELETE, \ + (struct sockaddr *)rt_key(r->rtt_rt), \ + 0, 0, 0, 0); \ + } \ +} + +/* + * Some subtle order problems with domain initialization mean that + * we cannot count on this being run from rt_init before various + * protocol initializations are done. Therefore, we make sure + * that this is run when the first queue is added... + */ + +void +rt_timer_init() +{ +#ifndef __ECOS + assert(rt_init_done == 0); +#endif + +#if 0 + pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl", + 0, NULL, NULL, M_RTABLE); +#endif + + LIST_INIT(&rttimer_queue_head); + timeout(rt_timer_timer, NULL, hz); /* every second */ + rt_init_done = 1; +} + +struct rttimer_queue * +rt_timer_queue_create(timeout) + u_int timeout; +{ + struct rttimer_queue *rtq; + + if (rt_init_done == 0) + rt_timer_init(); + + R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq); + if (rtq == NULL) + return (NULL); + + rtq->rtq_timeout = timeout; + TAILQ_INIT(&rtq->rtq_head); + LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link); + + return (rtq); +} + +void +rt_timer_queue_change(rtq, timeout) + struct rttimer_queue *rtq; + long timeout; +{ + + rtq->rtq_timeout = timeout; +} + + +void +rt_timer_queue_destroy(rtq, destroy) + struct rttimer_queue *rtq; + int destroy; +{ + struct rttimer *r; + + while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) { + LIST_REMOVE(r, rtt_link); + TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); + if (destroy) + RTTIMER_CALLOUT(r); +#if 0 + pool_put(&rttimer_pool, r); +#else + free(r, M_RTABLE); +#endif + } + + LIST_REMOVE(rtq, rtq_link); + + /* + * Caller is responsible for freeing the rttimer_queue structure. + */ +} + +void +rt_timer_remove_all(rt) + struct rtentry *rt; +{ + struct rttimer *r; + + while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) { + LIST_REMOVE(r, rtt_link); + TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); +#if 0 + pool_put(&rttimer_pool, r); +#else + free(r, M_RTABLE); +#endif + } +} + +int +rt_timer_add(rt, func, queue) + struct rtentry *rt; + void(*func) __P((struct rtentry *, struct rttimer *)); + struct rttimer_queue *queue; +{ + struct rttimer *r; + long current_time; + int s; + + s = splclock(); +#ifdef __ECOS + get_mono_time(); +#endif + current_time = mono_time.tv_sec; + splx(s); + + /* + * If there's already a timer with this action, destroy it before + * we add a new one. + */ + for (r = LIST_FIRST(&rt->rt_timer); r != NULL; + r = LIST_NEXT(r, rtt_link)) { + if (r->rtt_func == func) { + LIST_REMOVE(r, rtt_link); + TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); +#if 0 + pool_put(&rttimer_pool, r); +#else + free(r, M_RTABLE); +#endif + break; /* only one per list, so we can quit... */ + } + } + +#if 0 + r = pool_get(&rttimer_pool, PR_NOWAIT); +#else + r = (struct rttimer *)malloc(sizeof(*r), M_RTABLE, M_NOWAIT); +#endif + if (r == NULL) + return (ENOBUFS); + + r->rtt_rt = rt; + r->rtt_time = current_time; + r->rtt_func = func; + r->rtt_queue = queue; + LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link); + TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next); + + return (0); +} + +/* ARGSUSED */ +void +rt_timer_timer(arg) + void *arg; +{ + struct rttimer_queue *rtq; + struct rttimer *r; + long current_time; + int s; + + s = splclock(); +#ifdef __ECOS + get_mono_time(); +#endif + current_time = mono_time.tv_sec; + splx(s); + + s = splsoftnet(); + for (rtq = LIST_FIRST(&rttimer_queue_head); rtq != NULL; + rtq = LIST_NEXT(rtq, rtq_link)) { + while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL && + (r->rtt_time + rtq->rtq_timeout) < current_time) { + LIST_REMOVE(r, rtt_link); + TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); + RTTIMER_CALLOUT(r); +#if 0 + pool_put(&rttimer_pool, r); +#else + free(r, M_RTABLE); +#endif + } + } + splx(s); + + timeout(rt_timer_timer, NULL, hz); /* every second */ +} diff --git a/ecos/packages/net/tcpip/current/src/sys/net/rtsock.c b/ecos/packages/net/tcpip/current/src/sys/net/rtsock.c new file mode 100644 index 0000000..5e8317b --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/net/rtsock.c @@ -0,0 +1,1053 @@ +//========================================================================== +// +// sys/net/rtsock.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: rtsock.c,v 1.8 1999/12/08 06:50:18 itojun Exp $ */ +/* $NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $ */ + +/* + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright (c) 1988, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)rtsock.c 8.6 (Berkeley) 2/11/95 + */ + +#include <sys/param.h> +#ifndef __ECOS +#include <sys/systm.h> +#include <sys/proc.h> +#endif +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/domain.h> +#include <sys/protosw.h> + +#ifndef __ECOS +#include <vm/vm.h> +#include <sys/sysctl.h> +#endif + +#include <net/if.h> +#include <net/route.h> +#include <net/raw_cb.h> + +#include <machine/stdarg.h> + +struct sockaddr route_dst = { 2, PF_ROUTE, }; +struct sockaddr route_src = { 2, PF_ROUTE, }; +struct sockproto route_proto = { PF_ROUTE, }; + +struct walkarg { + int w_op, w_arg, w_given, w_needed, w_tmemsize; + caddr_t w_where, w_tmem; +}; + +static struct mbuf * + rt_msg1 __P((int, struct rt_addrinfo *)); +static int rt_msg2 __P((int, + struct rt_addrinfo *, caddr_t, struct walkarg *)); +static void rt_xaddrs __P((caddr_t, caddr_t, struct rt_addrinfo *)); +static void rt_setif __P((struct rtentry *, struct sockaddr *, + struct sockaddr *, struct sockaddr *)); + +/* Sleazy use of local variables throughout file, warning!!!! */ +#define dst info.rti_info[RTAX_DST] +#define gate info.rti_info[RTAX_GATEWAY] +#define netmask info.rti_info[RTAX_NETMASK] +#define genmask info.rti_info[RTAX_GENMASK] +#define ifpaddr info.rti_info[RTAX_IFP] +#define ifaaddr info.rti_info[RTAX_IFA] +#define brdaddr info.rti_info[RTAX_BRD] + +/*ARGSUSED*/ +int +route_usrreq(so, req, m, nam, control) + register struct socket *so; + int req; + struct mbuf *m, *nam, *control; +{ + register int error = 0; + register struct rawcb *rp = sotorawcb(so); + int s; + + if (req == PRU_ATTACH) { + MALLOC(rp, struct rawcb *, sizeof(*rp), M_PCB, M_WAITOK); + if ((so->so_pcb = rp) != NULL) + bzero(so->so_pcb, sizeof(*rp)); + + } + if (req == PRU_DETACH && rp) { + int af = rp->rcb_proto.sp_protocol; + if (af == AF_INET) + route_cb.ip_count--; + else if (af == AF_INET6) + route_cb.ip6_count--; + else if (af == AF_NS) + route_cb.ns_count--; + else if (af == AF_ISO) + route_cb.iso_count--; + route_cb.any_count--; + } + s = splsoftnet(); + /* + * Don't call raw_usrreq() in the attach case, because + * we want to allow non-privileged processes to listen on + * and send "safe" commands to the routing socket. + */ + if (req == PRU_ATTACH) { +#ifndef __ECOS + if (curproc == 0) + error = EACCES; + else +#endif // FIXME? + error = raw_attach(so, (int)(long)nam); + } else + error = raw_usrreq(so, req, m, nam, control); + + rp = sotorawcb(so); + if (req == PRU_ATTACH && rp) { + int af = rp->rcb_proto.sp_protocol; + if (error) { + free((caddr_t)rp, M_PCB); + splx(s); + return (error); + } + if (af == AF_INET) + route_cb.ip_count++; + else if (af == AF_INET6) + route_cb.ip6_count++; + else if (af == AF_NS) + route_cb.ns_count++; + else if (af == AF_ISO) + route_cb.iso_count++; + rp->rcb_faddr = &route_src; + route_cb.any_count++; + soisconnected(so); + so->so_options |= SO_USELOOPBACK; + } + splx(s); + return (error); +} + +/*ARGSUSED*/ +int +#if __STDC__ +route_output(struct mbuf *m, ...) +#else +route_output(m, va_alist) + struct mbuf *m; + va_dcl +#endif +{ + register struct rt_msghdr *rtm = 0; + register struct rtentry *rt = 0; + struct rtentry *saved_nrt = 0; + struct radix_node_head *rnh; + struct rt_addrinfo info; + int len, error = 0; + struct ifnet *ifp = 0; + struct socket *so; + va_list ap; + + va_start(ap, m); + so = va_arg(ap, struct socket *); + va_end(ap); + + bzero(&info, sizeof(info)); +#define senderr(e) { error = e; goto flush;} + if (m == 0 || ((m->m_len < sizeof(int32_t)) && + (m = m_pullup(m, sizeof(int32_t))) == 0)) + return (ENOBUFS); + if ((m->m_flags & M_PKTHDR) == 0) + panic("route_output"); + len = m->m_pkthdr.len; + if (len < sizeof(*rtm) || + len != mtod(m, struct rt_msghdr *)->rtm_msglen) { + dst = 0; + senderr(EINVAL); + } + R_Malloc(rtm, struct rt_msghdr *, len); + if (rtm == 0) { + dst = 0; + senderr(ENOBUFS); + } + m_copydata(m, 0, len, (caddr_t)rtm); + if (rtm->rtm_version != RTM_VERSION) { + dst = 0; + senderr(EPROTONOSUPPORT); + } +#ifdef __ECOS + rtm->rtm_pid = 0; // FIXME +#else + rtm->rtm_pid = curproc->p_pid; +#endif + info.rti_addrs = rtm->rtm_addrs; + rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info); + if (dst == 0) + senderr(EINVAL); + if (genmask) { + struct radix_node *t; + t = rn_addmask((caddr_t)genmask, 0, 1); + if (t && Bcmp(genmask, t->rn_key, *(u_char *)genmask) == 0) + genmask = (struct sockaddr *)(t->rn_key); + else + senderr(ENOBUFS); + } + + /* + * Verify that the caller has the appropriate privilege; RTM_GET + * is the only operation the non-superuser is allowed. + */ +#ifndef __ECOS + if (rtm->rtm_type != RTM_GET && + suser(curproc->p_ucred, &curproc->p_acflag) != 0) + senderr(EACCES); +#endif + switch (rtm->rtm_type) { + + case RTM_ADD: + if (gate == 0) + senderr(EINVAL); + error = rtrequest(RTM_ADD, dst, gate, netmask, + rtm->rtm_flags, &saved_nrt); + if (error == 0 && saved_nrt) { + /* + * If the route request specified an interface with + * IFA and/or IFP, we set the requested interface on + * the route with rt_setif. It would be much better + * to do this inside rtrequest, but that would + * require passing the desired interface, in some + * form, to rtrequest. Since rtrequest is called in + * so many places (roughly 40 in our source), adding + * a parameter is to much for us to swallow; this is + * something for the FreeBSD developers to tackle. + * Instead, we let rtrequest compute whatever + * interface it wants, then come in behind it and + * stick in the interface that we really want. This + * works reasonably well except when rtrequest can't + * figure out what interface to use (with + * ifa_withroute) and returns ENETUNREACH. Ideally + * it shouldn't matter if rtrequest can't figure out + * the interface if we're going to explicitly set it + * ourselves anyway. But practically we can't + * recover here because rtrequest will not do any of + * the work necessary to add the route if it can't + * find an interface. As long as there is a default + * route that leads to some interface, rtrequest will + * find an interface, so this problem should be + * rarely encountered. + * dwiggins@bbn.com + */ + + rt_setif(saved_nrt, ifpaddr, ifaaddr, gate); + rt_setmetrics(rtm->rtm_inits, + &rtm->rtm_rmx, &saved_nrt->rt_rmx); + saved_nrt->rt_refcnt--; + saved_nrt->rt_genmask = genmask; + } + break; + + case RTM_DELETE: + error = rtrequest(RTM_DELETE, dst, gate, netmask, + rtm->rtm_flags, &saved_nrt); + if (error == 0) { + (rt = saved_nrt)->rt_refcnt++; + goto report; + } + break; + + case RTM_GET: + case RTM_CHANGE: + case RTM_LOCK: + if ((rnh = rt_tables[dst->sa_family]) == 0) { + senderr(EAFNOSUPPORT); + } else if ((rt = (struct rtentry *) + rnh->rnh_lookup(dst, netmask, rnh)) != NULL) + rt->rt_refcnt++; + else + senderr(ESRCH); + switch(rtm->rtm_type) { + + case RTM_GET: + report: + dst = rt_key(rt); + gate = rt->rt_gateway; + netmask = rt_mask(rt); + genmask = rt->rt_genmask; + if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) { + if ((ifp = rt->rt_ifp) != NULL) { + ifpaddr = ifp->if_addrlist.tqh_first->ifa_addr; + ifaaddr = rt->rt_ifa->ifa_addr; + if (ifp->if_flags & IFF_POINTOPOINT) + brdaddr = rt->rt_ifa->ifa_dstaddr; + else + brdaddr = 0; + rtm->rtm_index = ifp->if_index; + } else { + ifpaddr = 0; + ifaaddr = 0; + } + } + len = rt_msg2(rtm->rtm_type, &info, (caddr_t)0, + (struct walkarg *)0); + if (len > rtm->rtm_msglen) { + struct rt_msghdr *new_rtm; + R_Malloc(new_rtm, struct rt_msghdr *, len); + if (new_rtm == 0) + senderr(ENOBUFS); + Bcopy(rtm, new_rtm, rtm->rtm_msglen); + Free(rtm); rtm = new_rtm; + } + (void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, + (struct walkarg *)0); + rtm->rtm_flags = rt->rt_flags; + rtm->rtm_rmx = rt->rt_rmx; + rtm->rtm_addrs = info.rti_addrs; + break; + + case RTM_CHANGE: + if (gate && rt_setgate(rt, rt_key(rt), gate)) +#ifdef __ECOS + senderr(EMFILE); +#else + senderr(EDQUOT); +#endif + +#if 1 + rt_setif(rt, ifpaddr, ifaaddr, gate); +#else + /* new gateway could require new ifaddr, ifp; + flags may also be different; ifp may be specified + by ll sockaddr when protocol address is ambiguous */ + if (ifpaddr && (ifa = ifa_ifwithnet(ifpaddr)) && + (ifp = ifa->ifa_ifp) && (ifaaddr || gate)) + ifa = ifaof_ifpforaddr(ifaaddr ? ifaaddr : gate, + ifp); + else if ((ifaaddr && (ifa = ifa_ifwithaddr(ifaaddr))) || + (gate && (ifa = ifa_ifwithroute(rt->rt_flags, + rt_key(rt), gate)))) + ifp = ifa->ifa_ifp; + if (ifa) { + register struct ifaddr *oifa = rt->rt_ifa; + if (oifa != ifa) { + if (oifa && oifa->ifa_rtrequest) + oifa->ifa_rtrequest(RTM_DELETE, + rt, gate); + IFAFREE(rt->rt_ifa); + rt->rt_ifa = ifa; + ifa->ifa_refcnt++; + rt->rt_ifp = ifp; + } + } +#endif + rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, + &rt->rt_rmx); +#if 0 + if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest) + rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, gate); +#endif + if (genmask) + rt->rt_genmask = genmask; + /* + * Fall into + */ + case RTM_LOCK: + rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits); + rt->rt_rmx.rmx_locks |= + (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); + break; + } + break; + + default: + senderr(EOPNOTSUPP); + } + +flush: + if (rtm) { + if (error) + rtm->rtm_errno = error; + else + rtm->rtm_flags |= RTF_DONE; + } + if (rt) + rtfree(rt); + { + register struct rawcb *rp = 0; + /* + * Check to see if we don't want our own messages. + */ + if ((so->so_options & SO_USELOOPBACK) == 0) { + if (route_cb.any_count <= 1) { + if (rtm) + Free(rtm); + m_freem(m); + return (error); + } + /* There is another listener, so construct message */ + rp = sotorawcb(so); + } + if (rtm) { + m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm); + Free(rtm); + } + if (rp) + rp->rcb_proto.sp_family = 0; /* Avoid us */ + if (dst) + route_proto.sp_protocol = dst->sa_family; + raw_input(m, &route_proto, &route_src, &route_dst); + if (rp) + rp->rcb_proto.sp_family = PF_ROUTE; + } + return (error); +} + +void +rt_setmetrics(which, in, out) + u_long which; + register struct rt_metrics *in, *out; +{ +#define metric(f, e) if (which & (f)) out->e = in->e; + metric(RTV_RPIPE, rmx_recvpipe); + metric(RTV_SPIPE, rmx_sendpipe); + metric(RTV_SSTHRESH, rmx_ssthresh); + metric(RTV_RTT, rmx_rtt); + metric(RTV_RTTVAR, rmx_rttvar); + metric(RTV_HOPCOUNT, rmx_hopcount); + metric(RTV_MTU, rmx_mtu); + metric(RTV_EXPIRE, rmx_expire); +#undef metric +} + +/* + * Set route's interface given ifpaddr, ifaaddr, and gateway. + */ +static void +rt_setif(rt, Ifpaddr, Ifaaddr, Gate) + struct rtentry *rt; + struct sockaddr *Ifpaddr, *Ifaaddr, *Gate; +{ + struct ifaddr *ifa = 0; + struct ifnet *ifp = 0; + + /* new gateway could require new ifaddr, ifp; + flags may also be different; ifp may be specified + by ll sockaddr when protocol address is ambiguous */ + if (Ifpaddr && (ifa = ifa_ifwithnet(Ifpaddr)) && + (ifp = ifa->ifa_ifp) && (Ifaaddr || Gate)) + ifa = ifaof_ifpforaddr(Ifaaddr ? Ifaaddr : Gate, + ifp); + else if (Ifpaddr && (ifp = if_withname(Ifpaddr)) ) { + ifa = Gate ? ifaof_ifpforaddr(Gate, ifp) : + TAILQ_FIRST(&ifp->if_addrlist); + } + else if ((Ifaaddr && (ifa = ifa_ifwithaddr(Ifaaddr))) || + (Gate && (ifa = ifa_ifwithroute(rt->rt_flags, + rt_key(rt), Gate)))) + ifp = ifa->ifa_ifp; + if (ifa) { + register struct ifaddr *oifa = rt->rt_ifa; + if (oifa != ifa) { + if (oifa && oifa->ifa_rtrequest) + oifa->ifa_rtrequest(RTM_DELETE, + rt, Gate); + IFAFREE(rt->rt_ifa); + rt->rt_ifa = ifa; + ifa->ifa_refcnt++; + rt->rt_ifp = ifp; + rt->rt_rmx.rmx_mtu = ifp->if_mtu; + if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest) + rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, Gate); + } else + goto call_ifareq; + return; + } + call_ifareq: + /* XXX: to reset gateway to correct value, at RTM_CHANGE */ + if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest) + rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, Gate); +} + + +#define ROUNDUP(a) \ + ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) +#define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len)) + +static void +rt_xaddrs(cp, cplim, rtinfo) + register caddr_t cp, cplim; + register struct rt_addrinfo *rtinfo; +{ + register struct sockaddr *sa; + register int i; + + bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info)); + for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) { + if ((rtinfo->rti_addrs & (1 << i)) == 0) + continue; + rtinfo->rti_info[i] = sa = (struct sockaddr *)cp; + ADVANCE(cp, sa); + } +} + +/* + * Copy data from a buffer back into the indicated mbuf chain, + * starting "off" bytes from the beginning, extending the mbuf + * chain if necessary. The mbuf needs to be properly initalized + * including the setting of m_len. + */ +void +m_copyback(m0, off, len, cp) + struct mbuf *m0; + register int off; + register int len; + caddr_t cp; +{ + register int mlen; + register struct mbuf *m = m0, *n; + int totlen = 0; + + if (m0 == 0) + return; + while (off > (mlen = m->m_len)) { + off -= mlen; + totlen += mlen; + if (m->m_next == 0) { + n = m_getclr(M_DONTWAIT, m->m_type); + if (n == 0) + goto out; + n->m_len = min(MLEN, len + off); + m->m_next = n; + } + m = m->m_next; + } + while (len > 0) { + mlen = min (m->m_len - off, len); + bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen); + cp += mlen; + len -= mlen; + mlen += off; + off = 0; + totlen += mlen; + if (len == 0) + break; + if (m->m_next == 0) { + n = m_get(M_DONTWAIT, m->m_type); + if (n == 0) + break; + n->m_len = min(MLEN, len); + m->m_next = n; + } + m = m->m_next; + } +out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) + m->m_pkthdr.len = totlen; +} + +static struct mbuf * +rt_msg1(type, rtinfo) + int type; + register struct rt_addrinfo *rtinfo; +{ + register struct rt_msghdr *rtm; + register struct mbuf *m; + register int i; + register struct sockaddr *sa; + int len, dlen; + + m = m_gethdr(M_DONTWAIT, MT_DATA); + if (m == 0) + return (m); + switch (type) { + + case RTM_DELADDR: + case RTM_NEWADDR: + len = sizeof(struct ifa_msghdr); + break; + + case RTM_IFINFO: + len = sizeof(struct if_msghdr); + break; + + default: + len = sizeof(struct rt_msghdr); + } + if (len > MHLEN) + panic("rt_msg1"); + m->m_pkthdr.len = m->m_len = len; + m->m_pkthdr.rcvif = 0; + rtm = mtod(m, struct rt_msghdr *); + bzero((caddr_t)rtm, len); + for (i = 0; i < RTAX_MAX; i++) { + if ((sa = rtinfo->rti_info[i]) == NULL) + continue; + rtinfo->rti_addrs |= (1 << i); + dlen = ROUNDUP(sa->sa_len); + m_copyback(m, len, dlen, (caddr_t)sa); + len += dlen; + } + if (m->m_pkthdr.len != len) { + m_freem(m); + return (NULL); + } + rtm->rtm_msglen = len; + rtm->rtm_version = RTM_VERSION; + rtm->rtm_type = type; + return (m); +} + +static int +rt_msg2(type, rtinfo, cp, w) + int type; + register struct rt_addrinfo *rtinfo; + caddr_t cp; + struct walkarg *w; +{ + register int i; + int len, dlen, second_time = 0; + caddr_t cp0; + + rtinfo->rti_addrs = 0; +again: + switch (type) { + + case RTM_DELADDR: + case RTM_NEWADDR: + len = sizeof(struct ifa_msghdr); + break; + + case RTM_IFINFO: + len = sizeof(struct if_msghdr); + break; + + default: + len = sizeof(struct rt_msghdr); + } + if ((cp0 = cp) != NULL) + cp += len; + for (i = 0; i < RTAX_MAX; i++) { + register struct sockaddr *sa; + + if ((sa = rtinfo->rti_info[i]) == 0) + continue; + rtinfo->rti_addrs |= (1 << i); + dlen = ROUNDUP(sa->sa_len); + if (cp) { + bcopy((caddr_t)sa, cp, (unsigned)dlen); + cp += dlen; + } + len += dlen; + } + if (cp == 0 && w != NULL && !second_time) { + register struct walkarg *rw = w; + + rw->w_needed += len; + if (rw->w_needed <= 0 && rw->w_where) { + if (rw->w_tmemsize < len) { + if (rw->w_tmem) + free(rw->w_tmem, M_RTABLE); + rw->w_tmem = (caddr_t) malloc(len, M_RTABLE, + M_NOWAIT); + if (rw->w_tmem) + rw->w_tmemsize = len; + } + if (rw->w_tmem) { + cp = rw->w_tmem; + second_time = 1; + goto again; + } else + rw->w_where = 0; + } + } + if (cp) { + register struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; + + rtm->rtm_version = RTM_VERSION; + rtm->rtm_type = type; + rtm->rtm_msglen = len; + } + return (len); +} + +/* + * This routine is called to generate a message from the routing + * socket indicating that a redirect has occured, a routing lookup + * has failed, or that a protocol has detected timeouts to a particular + * destination. + */ +void +rt_missmsg(type, rtinfo, flags, error) + int type, flags, error; + register struct rt_addrinfo *rtinfo; +{ + register struct rt_msghdr *rtm; + register struct mbuf *m; + struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; + + if (route_cb.any_count == 0) + return; + m = rt_msg1(type, rtinfo); + if (m == 0) + return; + rtm = mtod(m, struct rt_msghdr *); + rtm->rtm_flags = RTF_DONE | flags; + rtm->rtm_errno = error; + rtm->rtm_addrs = rtinfo->rti_addrs; + route_proto.sp_protocol = sa ? sa->sa_family : 0; + raw_input(m, &route_proto, &route_src, &route_dst); +} + +/* + * This routine is called to generate a message from the routing + * socket indicating that the status of a network interface has changed. + */ +void +rt_ifmsg(ifp) + register struct ifnet *ifp; +{ + register struct if_msghdr *ifm; + struct mbuf *m; + struct rt_addrinfo info; + + if (route_cb.any_count == 0) + return; + bzero((caddr_t)&info, sizeof(info)); + m = rt_msg1(RTM_IFINFO, &info); + if (m == 0) + return; + ifm = mtod(m, struct if_msghdr *); + ifm->ifm_index = ifp->if_index; + ifm->ifm_flags = ifp->if_flags; + ifm->ifm_data = ifp->if_data; + ifm->ifm_addrs = 0; + route_proto.sp_protocol = 0; + raw_input(m, &route_proto, &route_src, &route_dst); +} + +/* + * This is called to generate messages from the routing socket + * indicating a network interface has had addresses associated with it. + * if we ever reverse the logic and replace messages TO the routing + * socket indicate a request to configure interfaces, then it will + * be unnecessary as the routing socket will automatically generate + * copies of it. + */ +void +rt_newaddrmsg(cmd, ifa, error, rt) + int cmd, error; + register struct ifaddr *ifa; + register struct rtentry *rt; +{ + struct rt_addrinfo info; + struct sockaddr *sa = NULL; + int pass; + struct mbuf *m = NULL; + struct ifnet *ifp = ifa->ifa_ifp; + + if (route_cb.any_count == 0) + return; + for (pass = 1; pass < 3; pass++) { + bzero((caddr_t)&info, sizeof(info)); + if ((cmd == RTM_ADD && pass == 1) || + (cmd == RTM_DELETE && pass == 2)) { + register struct ifa_msghdr *ifam; + int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR; + + ifaaddr = sa = ifa->ifa_addr; + ifpaddr = ifp->if_addrlist.tqh_first->ifa_addr; + netmask = ifa->ifa_netmask; + brdaddr = ifa->ifa_dstaddr; + if ((m = rt_msg1(ncmd, &info)) == NULL) + continue; + ifam = mtod(m, struct ifa_msghdr *); + ifam->ifam_index = ifp->if_index; + ifam->ifam_metric = ifa->ifa_metric; + ifam->ifam_flags = ifa->ifa_flags; + ifam->ifam_addrs = info.rti_addrs; + } + if ((cmd == RTM_ADD && pass == 2) || + (cmd == RTM_DELETE && pass == 1)) { + register struct rt_msghdr *rtm; + + if (rt == 0) + continue; + netmask = rt_mask(rt); + dst = sa = rt_key(rt); + gate = rt->rt_gateway; + if ((m = rt_msg1(cmd, &info)) == NULL) + continue; + rtm = mtod(m, struct rt_msghdr *); + rtm->rtm_index = ifp->if_index; + rtm->rtm_flags |= rt->rt_flags; + rtm->rtm_errno = error; + rtm->rtm_addrs = info.rti_addrs; + } + route_proto.sp_protocol = sa ? sa->sa_family : 0; + raw_input(m, &route_proto, &route_src, &route_dst); + } +} + +#ifndef __ECOS +/* + * This is used in dumping the kernel table via sysctl(). + */ +int +sysctl_dumpentry(rn, v) + struct radix_node *rn; + register void *v; +{ + register struct walkarg *w = v; + register struct rtentry *rt = (struct rtentry *)rn; + int error = 0, size; + struct rt_addrinfo info; + + if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) + return 0; + bzero((caddr_t)&info, sizeof(info)); + dst = rt_key(rt); + gate = rt->rt_gateway; + netmask = rt_mask(rt); + genmask = rt->rt_genmask; + if (rt->rt_ifp) { + ifpaddr = rt->rt_ifp->if_addrlist.tqh_first->ifa_addr; + ifaaddr = rt->rt_ifa->ifa_addr; + if (rt->rt_ifp->if_flags & IFF_POINTOPOINT) + brdaddr = rt->rt_ifa->ifa_dstaddr; + } + size = rt_msg2(RTM_GET, &info, 0, w); + if (w->w_where && w->w_tmem) { + register struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; + + rtm->rtm_flags = rt->rt_flags; + rtm->rtm_use = rt->rt_use; + rtm->rtm_rmx = rt->rt_rmx; + rtm->rtm_index = rt->rt_ifp->if_index; + rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0; + rtm->rtm_addrs = info.rti_addrs; + if ((error = copyout((caddr_t)rtm, w->w_where, size)) != 0) + w->w_where = NULL; + else + w->w_where += size; + } + return (error); +} + +int +sysctl_iflist(af, w) + int af; + register struct walkarg *w; +{ + register struct ifnet *ifp; + register struct ifaddr *ifa; + struct rt_addrinfo info; + int len, error = 0; + + bzero((caddr_t)&info, sizeof(info)); + for (ifp = ifnet.tqh_first; ifp != 0; ifp = ifp->if_list.tqe_next) { + if (w->w_arg && w->w_arg != ifp->if_index) + continue; + ifa = ifp->if_addrlist.tqh_first; + ifpaddr = ifa->ifa_addr; + len = rt_msg2(RTM_IFINFO, &info, (caddr_t)0, w); + ifpaddr = 0; + if (w->w_where && w->w_tmem) { + register struct if_msghdr *ifm; + + ifm = (struct if_msghdr *)w->w_tmem; + ifm->ifm_index = ifp->if_index; + ifm->ifm_flags = ifp->if_flags; + ifm->ifm_data = ifp->if_data; + ifm->ifm_addrs = info.rti_addrs; + error = copyout((caddr_t)ifm, w->w_where, len); + if (error) + return (error); + w->w_where += len; + } + while ((ifa = ifa->ifa_list.tqe_next) != NULL) { + if (af && af != ifa->ifa_addr->sa_family) + continue; + ifaaddr = ifa->ifa_addr; + netmask = ifa->ifa_netmask; + brdaddr = ifa->ifa_dstaddr; + len = rt_msg2(RTM_NEWADDR, &info, 0, w); + if (w->w_where && w->w_tmem) { + register struct ifa_msghdr *ifam; + + ifam = (struct ifa_msghdr *)w->w_tmem; + ifam->ifam_index = ifa->ifa_ifp->if_index; + ifam->ifam_flags = ifa->ifa_flags; + ifam->ifam_metric = ifa->ifa_metric; + ifam->ifam_addrs = info.rti_addrs; + error = copyout(w->w_tmem, w->w_where, len); + if (error) + return (error); + w->w_where += len; + } + } + ifaaddr = netmask = brdaddr = 0; + } + return (0); +} + +int +sysctl_rtable(name, namelen, where, given, new, newlen) + int *name; + u_int namelen; + void *where; + size_t *given; + void *new; + size_t newlen; +{ + register struct radix_node_head *rnh; + int i, s, error = EINVAL; + u_char af; + struct walkarg w; + + if (new) + return (EPERM); + if (namelen != 3) + return (EINVAL); + af = name[0]; + Bzero(&w, sizeof(w)); + w.w_where = where; + w.w_given = *given; + w.w_needed = 0 - w.w_given; + w.w_op = name[1]; + w.w_arg = name[2]; + + s = splsoftnet(); + switch (w.w_op) { + + case NET_RT_DUMP: + case NET_RT_FLAGS: + for (i = 1; i <= AF_MAX; i++) + if ((rnh = rt_tables[i]) && (af == 0 || af == i) && + (error = (*rnh->rnh_walktree)(rnh, + sysctl_dumpentry, + &w))) + break; + break; + + case NET_RT_IFLIST: + error = sysctl_iflist(af, &w); + } + splx(s); + if (w.w_tmem) + free(w.w_tmem, M_RTABLE); + w.w_needed += w.w_given; + if (where) { + *given = w.w_where - (caddr_t) where; + if (*given < w.w_needed) + return (ENOMEM); + } else { + *given = (11 * w.w_needed) / 10; + } + return (error); +} +#endif + +/* + * Definitions of protocols supported in the ROUTE domain. + */ + +extern struct domain routedomain; /* or at least forward */ + +struct protosw routesw[] = { +{ SOCK_RAW, &routedomain, 0, PR_ATOMIC|PR_ADDR, + raw_input, route_output, raw_ctlinput, 0, + route_usrreq, + raw_init, 0, 0, 0, +#ifdef __ECOS + 0, +#else + sysctl_rtable, +#endif +} +}; + +struct domain routedomain = + { PF_ROUTE, "route", route_init, 0, 0, + routesw, &routesw[sizeof(routesw)/sizeof(routesw[0])] }; diff --git a/ecos/packages/net/tcpip/current/src/sys/netinet/if_ether.c b/ecos/packages/net/tcpip/current/src/sys/netinet/if_ether.c new file mode 100644 index 0000000..93b1fdf --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/netinet/if_ether.c @@ -0,0 +1,996 @@ +//========================================================================== +// +// sys/netinet/if_ether.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: if_ether.c,v 1.19 1999/11/10 18:48:47 chris Exp $ */ +/* $NetBSD: if_ether.c,v 1.31 1996/05/11 12:59:58 mycroft Exp $ */ + +/* + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)if_ether.c 8.1 (Berkeley) 6/10/93 + */ + +/* + * Ethernet address resolution protocol. + * TODO: + * add "inuse/lock" bit (or ref. count) along with valid bit + */ + +#include <sys/param.h> +#ifndef __ECOS +#include <sys/systm.h> +#endif +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <sys/kernel.h> +#include <sys/errno.h> +#include <sys/ioctl.h> +#ifndef __ECOS +#include <sys/syslog.h> +#include <sys/proc.h> +#endif + +#ifdef INET + +#include <net/if.h> +#include <net/if_dl.h> +#include <net/route.h> + +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/in_var.h> +#include <netinet/ip.h> +#include <netinet/if_ether.h> + +#define SIN(s) ((struct sockaddr_in *)s) +#define SDL(s) ((struct sockaddr_dl *)s) +#define SRP(s) ((struct sockaddr_inarp *)s) + +/* + * ARP trailer negotiation. Trailer protocol is not IP specific, + * but ARP request/response use IP addresses. + */ +#define ETHERTYPE_IPTRAILERS ETHERTYPE_TRAIL + +/* timer values */ +int arpt_prune = (5*60*1); /* walk list every 5 minutes */ +int arpt_keep = (20*60); /* once resolved, good for 20 more minutes */ +int arpt_down = 20; /* once declared down, don't send for 20 secs */ +#define rt_expire rt_rmx.rmx_expire + +static void arprequest + __P((struct arpcom *, u_int32_t *, u_int32_t *, u_int8_t *)); +static void arptfree __P((struct llinfo_arp *)); +static void arptimer __P((void *)); +static struct llinfo_arp *arplookup __P((u_int32_t, int, int)); +static void in_arpinput __P((struct mbuf *)); + +extern struct ifnet loif; +LIST_HEAD(, llinfo_arp) llinfo_arp; +struct ifqueue arpintrq = {0, 0, 0, 50}; +int arp_inuse, arp_allocated, arp_intimer; +int arp_maxtries = 5; +int useloopback = 1; /* use loopback interface for local traffic */ +int arpinit_done = 0; + +/* revarp state */ +static struct in_addr myip, srv_ip; +static int myip_initialized = 0; +static int revarp_in_progress = 0; +struct ifnet *myip_ifp = NULL; + +static void arptimer __P((void *)); +static void arprequest __P((struct arpcom *, u_int32_t *, u_int32_t *, + u_int8_t *)); +static void in_arpinput __P((struct mbuf *)); +static void arptfree __P((struct llinfo_arp *)); +static struct llinfo_arp *arplookup __P((u_int32_t, int, int )); +#ifdef DDB +#include <vm/vm.h> + +static void db_print_sa __P((struct sockaddr *)); +static void db_print_ifa __P((struct ifaddr *)); +static void db_print_llinfo __P((caddr_t)); +static int db_show_radix_node __P((struct radix_node *, void *)); +#endif + +/* + * Timeout routine. Age arp_tab entries periodically. + */ +/* ARGSUSED */ +static void +arptimer(arg) + void *arg; +{ + int s; + register struct llinfo_arp *la, *nla; + + s = splsoftnet(); + timeout(arptimer, NULL, arpt_prune * hz); + for (la = llinfo_arp.lh_first; la != 0; la = nla) { + register struct rtentry *rt = la->la_rt; + + nla = la->la_list.le_next; + if (rt->rt_expire && rt->rt_expire <= time.tv_sec) + arptfree(la); /* timer has expired; clear */ + } + splx(s); +} + +/* + * Parallel to llc_rtrequest. + */ +void +arp_rtrequest(req, rt, sa) + int req; + register struct rtentry *rt; + struct sockaddr *sa; +{ + register struct sockaddr *gate = rt->rt_gateway; + register struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo; + static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; + + if (!arpinit_done) { + arpinit_done = 1; + /* + * We generate expiration times from time.tv_sec + * so avoid accidently creating permanent routes. + */ + if (time.tv_sec == 0) { + time.tv_sec++; + } + timeout(arptimer, (caddr_t)0, hz); + } + if (rt->rt_flags & RTF_GATEWAY) + return; + switch (req) { + + case RTM_ADD: + /* + * XXX: If this is a manually added route to interface + * such as older version of routed or gated might provide, + * restore cloning bit. + */ + if ((rt->rt_flags & RTF_HOST) == 0 && + SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff) + rt->rt_flags |= RTF_CLONING; + if (rt->rt_flags & RTF_CLONING) { + /* + * Case 1: This route should come from a route to iface. + */ + rt_setgate(rt, rt_key(rt), + (struct sockaddr *)&null_sdl); + gate = rt->rt_gateway; + SDL(gate)->sdl_type = rt->rt_ifp->if_type; + SDL(gate)->sdl_index = rt->rt_ifp->if_index; + /* + * Give this route an expiration time, even though + * it's a "permanent" route, so that routes cloned + * from it do not need their expiration time set. + */ + rt->rt_expire = time.tv_sec; + break; + } + /* Announce a new entry if requested. */ + if (rt->rt_flags & RTF_ANNOUNCE) + arprequest((struct arpcom *)rt->rt_ifp, + &SIN(rt_key(rt))->sin_addr.s_addr, + &SIN(rt_key(rt))->sin_addr.s_addr, + (u_char *)LLADDR(SDL(gate))); + /*FALLTHROUGH*/ + case RTM_RESOLVE: + if (gate->sa_family != AF_LINK || + gate->sa_len < sizeof(null_sdl)) { +#ifdef __ECOS +#else + log(LOG_DEBUG, "arp_rtrequest: bad gateway value\n"); +#endif + break; + } + SDL(gate)->sdl_type = rt->rt_ifp->if_type; + SDL(gate)->sdl_index = rt->rt_ifp->if_index; + if (la != 0) + break; /* This happens on a route change */ + /* + * Case 2: This route may come from cloning, or a manual route + * add with a LL address. + */ + R_Malloc(la, struct llinfo_arp *, sizeof(*la)); + rt->rt_llinfo = (caddr_t)la; + if (la == 0) { +#ifdef __ECOS +#else + log(LOG_DEBUG, "arp_rtrequest: malloc failed\n"); +#endif + break; + } + arp_inuse++, arp_allocated++; + Bzero(la, sizeof(*la)); + la->la_rt = rt; + rt->rt_flags |= RTF_LLINFO; + LIST_INSERT_HEAD(&llinfo_arp, la, la_list); + if (SIN(rt_key(rt))->sin_addr.s_addr == + (IA_SIN(rt->rt_ifa))->sin_addr.s_addr) { + /* + * This test used to be + * if (loif.if_flags & IFF_UP) + * It allowed local traffic to be forced through + * the hardware by configuring the loopback down. + * However, it causes problems during network + * configuration for boards that can't receive + * packets they send. It is now necessary to clear + * "useloopback" and remove the route to force + * traffic out to the hardware. + */ + rt->rt_expire = 0; + Bcopy(((struct arpcom *)rt->rt_ifp)->ac_enaddr, + LLADDR(SDL(gate)), + SDL(gate)->sdl_alen = ETHER_ADDR_LEN); + if (useloopback) + rt->rt_ifp = &loif; + } + break; + + case RTM_DELETE: + if (la == 0) + break; + arp_inuse--; + LIST_REMOVE(la, la_list); + rt->rt_llinfo = 0; + rt->rt_flags &= ~RTF_LLINFO; + if (la->la_hold) + m_freem(la->la_hold); + Free((caddr_t)la); + } +} + +/* + * Broadcast an ARP request. Caller specifies: + * - arp header source ip address + * - arp header target ip address + * - arp header source ethernet address + */ +static void +arprequest(ac, sip, tip, enaddr) + register struct arpcom *ac; + register u_int32_t *sip, *tip; + register u_int8_t *enaddr; +{ + register struct mbuf *m; + register struct ether_header *eh; + register struct ether_arp *ea; + struct sockaddr sa; + + if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) + return; + m->m_len = sizeof(*ea); + m->m_pkthdr.len = sizeof(*ea); + MH_ALIGN(m, sizeof(*ea)); + ea = mtod(m, struct ether_arp *); + eh = (struct ether_header *)sa.sa_data; + bzero((caddr_t)ea, sizeof (*ea)); + bcopy((caddr_t)etherbroadcastaddr, (caddr_t)eh->ether_dhost, + sizeof(eh->ether_dhost)); + eh->ether_type = htons(ETHERTYPE_ARP); /* if_output will not swap */ + ea->arp_hrd = htons(ARPHRD_ETHER); + ea->arp_pro = htons(ETHERTYPE_IP); + ea->arp_hln = sizeof(ea->arp_sha); /* hardware address length */ + ea->arp_pln = sizeof(ea->arp_spa); /* protocol address length */ + ea->arp_op = htons(ARPOP_REQUEST); + bcopy((caddr_t)enaddr, (caddr_t)eh->ether_shost, + sizeof(eh->ether_shost)); + bcopy((caddr_t)enaddr, (caddr_t)ea->arp_sha, sizeof(ea->arp_sha)); + bcopy((caddr_t)sip, (caddr_t)ea->arp_spa, sizeof(ea->arp_spa)); + bcopy((caddr_t)tip, (caddr_t)ea->arp_tpa, sizeof(ea->arp_tpa)); + sa.sa_family = AF_UNSPEC; + sa.sa_len = sizeof(sa); + (*ac->ac_if.if_output)(&ac->ac_if, m, &sa, (struct rtentry *)0); +} + +/* + * Resolve an IP address into an ethernet address. If success, + * desten is filled in. If there is no entry in arptab, + * set one up and broadcast a request for the IP address. + * Hold onto this mbuf and resend it once the address + * is finally resolved. A return value of 1 indicates + * that desten has been filled in and the packet should be sent + * normally; a 0 return indicates that the packet has been + * taken over here, either now or for later transmission. + */ +int +arpresolve(ac, rt, m, dst, desten) + register struct arpcom *ac; + register struct rtentry *rt; + struct mbuf *m; + register struct sockaddr *dst; + register u_char *desten; +{ + register struct llinfo_arp *la; + struct sockaddr_dl *sdl; + + if (m->m_flags & M_BCAST) { /* broadcast */ + bcopy((caddr_t)etherbroadcastaddr, (caddr_t)desten, + sizeof(etherbroadcastaddr)); + return (1); + } + if (m->m_flags & M_MCAST) { /* multicast */ + ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten); + return (1); + } + if (rt) + la = (struct llinfo_arp *)rt->rt_llinfo; + else { + if ((la = arplookup(SIN(dst)->sin_addr.s_addr, 1, 0)) != NULL) + rt = la->la_rt; + } + if (la == 0 || rt == 0) { +#ifdef __ECOS +#else + log(LOG_DEBUG, "arpresolve: can't allocate llinfo\n"); +#endif + m_freem(m); + return (0); + } + sdl = SDL(rt->rt_gateway); + /* + * Check the address family and length is valid, the address + * is resolved; otherwise, try to resolve. + */ + if ((rt->rt_expire == 0 || rt->rt_expire > time.tv_sec) && + sdl->sdl_family == AF_LINK && sdl->sdl_alen != 0) { + bcopy(LLADDR(sdl), desten, sdl->sdl_alen); + return 1; + } + if (((struct ifnet *)ac)->if_flags & IFF_NOARP) + return 0; + + /* + * There is an arptab entry, but no ethernet address + * response yet. Replace the held mbuf with this + * latest one. + */ + if (la->la_hold) + m_freem(la->la_hold); + la->la_hold = m; + /* + * Re-send the ARP request when appropriate. + */ +#ifdef DIAGNOSTIC + if (rt->rt_expire == 0) { + /* This should never happen. (Should it? -gwr) */ + printf("arpresolve: unresolved and rt_expire == 0\n"); + /* Set expiration time to now (expired). */ + rt->rt_expire = time.tv_sec; + } +#endif + if (rt->rt_expire) { + rt->rt_flags &= ~RTF_REJECT; + if (la->la_asked == 0 || rt->rt_expire != time.tv_sec) { + rt->rt_expire = time.tv_sec; + if (la->la_asked++ < arp_maxtries) + arprequest(ac, + &(SIN(rt->rt_ifa->ifa_addr)->sin_addr.s_addr), + &(SIN(dst)->sin_addr.s_addr), + ac->ac_enaddr); + else { + rt->rt_flags |= RTF_REJECT; + rt->rt_expire += arpt_down; + la->la_asked = 0; + } + } + } + return (0); +} + +/* + * Common length and type checks are done here, + * then the protocol-specific routine is called. + */ +void +arpintr() +{ + register struct mbuf *m; + register struct arphdr *ar; + int s; + + while (arpintrq.ifq_head) { + s = splimp(); + IF_DEQUEUE(&arpintrq, m); + splx(s); + if (m == 0 || (m->m_flags & M_PKTHDR) == 0) + panic("arpintr"); + if (m->m_len >= sizeof(struct arphdr) && + (ar = mtod(m, struct arphdr *)) && + ntohs(ar->ar_hrd) == ARPHRD_ETHER && + m->m_len >= + sizeof(struct arphdr) + 2 * (ar->ar_hln + ar->ar_pln)) + switch (ntohs(ar->ar_pro)) { + + case ETHERTYPE_IP: + case ETHERTYPE_IPTRAILERS: + in_arpinput(m); + continue; + } + m_freem(m); + } +} + +/* + * ARP for Internet protocols on Ethernet. + * Algorithm is that given in RFC 826. + * In addition, a sanity check is performed on the sender + * protocol address, to catch impersonators. + * We no longer handle negotiations for use of trailer protocol: + * Formerly, ARP replied for protocol type ETHERTYPE_TRAIL sent + * along with IP replies if we wanted trailers sent to us, + * and also sent them in response to IP replies. + * This allowed either end to announce the desire to receive + * trailer packets. + * We no longer reply to requests for ETHERTYPE_TRAIL protocol either, + * but formerly didn't normally send requests. + */ +static void +in_arpinput(m) + struct mbuf *m; +{ + register struct ether_arp *ea; + register struct arpcom *ac = (struct arpcom *)m->m_pkthdr.rcvif; + struct ether_header *eh; + register struct llinfo_arp *la = 0; + register struct rtentry *rt; + struct in_ifaddr *ia, *maybe_ia = 0; + struct sockaddr_dl *sdl; + struct sockaddr sa; + struct in_addr isaddr, itaddr, myaddr; + int op; + + ea = mtod(m, struct ether_arp *); + op = ntohs(ea->arp_op); + bcopy((caddr_t)ea->arp_spa, (caddr_t)&isaddr, sizeof (isaddr)); + bcopy((caddr_t)ea->arp_tpa, (caddr_t)&itaddr, sizeof (itaddr)); + for (ia = in_ifaddr.tqh_first; ia != 0; ia = ia->ia_list.tqe_next) + if (ia->ia_ifp == &ac->ac_if || + (ia->ia_ifp->if_bridge && + ia->ia_ifp->if_bridge == ac->ac_if.if_bridge)) { + maybe_ia = ia; + if (itaddr.s_addr == ia->ia_addr.sin_addr.s_addr || + isaddr.s_addr == ia->ia_addr.sin_addr.s_addr) + break; + } + if (maybe_ia == 0) + goto out; + myaddr = ia ? ia->ia_addr.sin_addr : maybe_ia->ia_addr.sin_addr; + if (!bcmp((caddr_t)ea->arp_sha, (caddr_t)ac->ac_enaddr, + sizeof (ea->arp_sha))) + goto out; /* it's from me, ignore it. */ + if (!bcmp((caddr_t)ea->arp_sha, (caddr_t)etherbroadcastaddr, + sizeof (ea->arp_sha))) { +#ifdef __ECOS +#else + log(LOG_ERR, + "arp: ether address is broadcast for IP address %s!\n", + inet_ntoa(isaddr)); +#endif + goto out; + } + if (isaddr.s_addr == myaddr.s_addr) { +#ifdef __ECOS +#else + log(LOG_ERR, + "duplicate IP address %s sent from ethernet address %s\n", + inet_ntoa(isaddr), ether_sprintf(ea->arp_sha)); +#endif + itaddr = myaddr; + goto reply; + } + la = arplookup(isaddr.s_addr, itaddr.s_addr == myaddr.s_addr, 0); + if (la && (rt = la->la_rt) && (sdl = SDL(rt->rt_gateway))) { + if (sdl->sdl_alen && + bcmp((caddr_t)ea->arp_sha, LLADDR(sdl), sdl->sdl_alen)) { + if (rt->rt_flags & RTF_PERMANENT_ARP) { +#ifdef __ECOS +#else + log(LOG_WARNING, + "arp: attempt to overwrite permanent " + "entry for %s by %s on %s\n", + inet_ntoa(isaddr), + ether_sprintf(ea->arp_sha), + (&ac->ac_if)->if_xname); +#endif + goto out; + } else if (rt->rt_ifp != &ac->ac_if) { +#ifdef __ECOS +#else + log(LOG_WARNING, + "arp: attempt to overwrite entry for %s " + "on %s by %s on %s\n", + inet_ntoa(isaddr), rt->rt_ifp->if_xname, + ether_sprintf(ea->arp_sha), + (&ac->ac_if)->if_xname); +#endif + goto out; + } else { +#ifdef __ECOS +#else + log(LOG_INFO, + "arp info overwritten for %s by %s on %s\n", + inet_ntoa(isaddr), + ether_sprintf(ea->arp_sha), + (&ac->ac_if)->if_xname); +#endif + rt->rt_expire = 1; /* no longer static */ + } + } + bcopy((caddr_t)ea->arp_sha, LLADDR(sdl), + sdl->sdl_alen = sizeof(ea->arp_sha)); + if (rt->rt_expire) + rt->rt_expire = time.tv_sec + arpt_keep; + rt->rt_flags &= ~RTF_REJECT; + la->la_asked = 0; + if (la->la_hold) { + (*ac->ac_if.if_output)(&ac->ac_if, la->la_hold, + rt_key(rt), rt); + la->la_hold = 0; + } + } +reply: + if (op != ARPOP_REQUEST) { + out: + m_freem(m); + return; + } + if (itaddr.s_addr == myaddr.s_addr) { + /* I am the target */ + bcopy((caddr_t)ea->arp_sha, (caddr_t)ea->arp_tha, + sizeof(ea->arp_sha)); + bcopy((caddr_t)ac->ac_enaddr, (caddr_t)ea->arp_sha, + sizeof(ea->arp_sha)); + } else { + la = arplookup(itaddr.s_addr, 0, SIN_PROXY); + if (la == 0) + goto out; + rt = la->la_rt; + bcopy((caddr_t)ea->arp_sha, (caddr_t)ea->arp_tha, + sizeof(ea->arp_sha)); + sdl = SDL(rt->rt_gateway); + bcopy(LLADDR(sdl), (caddr_t)ea->arp_sha, sizeof(ea->arp_sha)); + } + + bcopy((caddr_t)ea->arp_spa, (caddr_t)ea->arp_tpa, sizeof(ea->arp_spa)); + bcopy((caddr_t)&itaddr, (caddr_t)ea->arp_spa, sizeof(ea->arp_spa)); + ea->arp_op = htons(ARPOP_REPLY); + ea->arp_pro = htons(ETHERTYPE_IP); /* let's be sure! */ + eh = (struct ether_header *)sa.sa_data; + bcopy((caddr_t)ea->arp_tha, (caddr_t)eh->ether_dhost, + sizeof(eh->ether_dhost)); + bcopy((caddr_t)ac->ac_enaddr, (caddr_t)eh->ether_shost, + sizeof(eh->ether_shost)); + eh->ether_type = htons(ETHERTYPE_ARP); + sa.sa_family = AF_UNSPEC; + sa.sa_len = sizeof(sa); + (*ac->ac_if.if_output)(&ac->ac_if, m, &sa, (struct rtentry *)0); + return; +} + +/* + * Free an arp entry. + */ +static void +arptfree(la) + register struct llinfo_arp *la; +{ + register struct rtentry *rt = la->la_rt; + register struct sockaddr_dl *sdl; + + if (rt == 0) + panic("arptfree"); + if (rt->rt_refcnt > 0 && (sdl = SDL(rt->rt_gateway)) && + sdl->sdl_family == AF_LINK) { + sdl->sdl_alen = 0; + la->la_asked = 0; + rt->rt_flags &= ~RTF_REJECT; + return; + } + rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, rt_mask(rt), + 0, (struct rtentry **)0); +} + +/* + * Lookup or enter a new address in arptab. + */ +static struct llinfo_arp * +arplookup(addr, create, proxy) + u_int32_t addr; + int create, proxy; +{ + register struct rtentry *rt; + static struct sockaddr_inarp sin; + + sin.sin_len = sizeof(sin); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = addr; + sin.sin_other = proxy ? SIN_PROXY : 0; + rt = rtalloc1(sintosa(&sin), create); + if (rt == 0) + return (0); + rt->rt_refcnt--; + if ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 || + rt->rt_gateway->sa_family != AF_LINK) { + if (create) +#ifdef __ECOS +#else + log(LOG_DEBUG, + "arplookup: unable to enter address for %s\n", + inet_ntoa(sin.sin_addr)); +#endif + return (0); + } + return ((struct llinfo_arp *)rt->rt_llinfo); +} + +int +arpioctl(cmd, data) + u_long cmd; + caddr_t data; +{ + + return (EOPNOTSUPP); +} + +void +arp_ifinit(ac, ifa) + struct arpcom *ac; + struct ifaddr *ifa; +{ + + /* Warn the user if another station has this IP address. */ + arprequest(ac, + &(IA_SIN(ifa)->sin_addr.s_addr), + &(IA_SIN(ifa)->sin_addr.s_addr), + ac->ac_enaddr); + ifa->ifa_rtrequest = arp_rtrequest; + ifa->ifa_flags |= RTF_CLONING; +} + +/* + * Called from Ethernet interrupt handlers + * when ether packet type ETHERTYPE_REVARP + * is received. Common length and type checks are done here, + * then the protocol-specific routine is called. + */ +void +revarpinput(m) + struct mbuf *m; +{ + struct arphdr *ar; + + if (m->m_len < sizeof(struct arphdr)) + goto out; + ar = mtod(m, struct arphdr *); + if (ntohs(ar->ar_hrd) != ARPHRD_ETHER) + goto out; + if (m->m_len < sizeof(struct arphdr) + 2 * (ar->ar_hln + ar->ar_pln)) + goto out; + switch (ntohs(ar->ar_pro)) { + + case ETHERTYPE_IP: + case ETHERTYPE_IPTRAILERS: + in_revarpinput(m); + return; + + default: + break; + } +out: + m_freem(m); +} + +/* + * RARP for Internet protocols on Ethernet. + * Algorithm is that given in RFC 903. + * We are only using for bootstrap purposes to get an ip address for one of + * our interfaces. Thus we support no user-interface. + * + * Since the contents of the RARP reply are specific to the interface that + * sent the request, this code must ensure that they are properly associated. + * + * Note: also supports ARP via RARP packets, per the RFC. + */ +void +in_revarpinput(m) + struct mbuf *m; +{ + struct ifnet *ifp; + struct ether_arp *ar; + int op; + + ar = mtod(m, struct ether_arp *); + op = ntohs(ar->arp_op); + switch (op) { + case ARPOP_REQUEST: + case ARPOP_REPLY: /* per RFC */ + in_arpinput(m); + return; + case ARPOP_REVREPLY: + break; + case ARPOP_REVREQUEST: /* handled by rarpd(8) */ + default: + goto out; + } + if (!revarp_in_progress) + goto out; + ifp = m->m_pkthdr.rcvif; + if (ifp != myip_ifp) /* !same interface */ + goto out; + if (myip_initialized) + goto wake; + if (bcmp(ar->arp_tha, ((struct arpcom *)ifp)->ac_enaddr, + sizeof(ar->arp_tha))) + goto out; + bcopy((caddr_t)ar->arp_spa, (caddr_t)&srv_ip, sizeof(srv_ip)); + bcopy((caddr_t)ar->arp_tpa, (caddr_t)&myip, sizeof(myip)); + myip_initialized = 1; +wake: /* Do wakeup every time in case it was missed. */ + wakeup((caddr_t)&myip); + +out: + m_freem(m); +} + +/* + * Send a RARP request for the ip address of the specified interface. + * The request should be RFC 903-compliant. + */ +void +revarprequest(ifp) + struct ifnet *ifp; +{ + struct sockaddr sa; + struct mbuf *m; + struct ether_header *eh; + struct ether_arp *ea; + struct arpcom *ac = (struct arpcom *)ifp; + + if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) + return; + m->m_len = sizeof(*ea); + m->m_pkthdr.len = sizeof(*ea); + MH_ALIGN(m, sizeof(*ea)); + ea = mtod(m, struct ether_arp *); + eh = (struct ether_header *)sa.sa_data; + bzero((caddr_t)ea, sizeof(*ea)); + bcopy((caddr_t)etherbroadcastaddr, (caddr_t)eh->ether_dhost, + sizeof(eh->ether_dhost)); + eh->ether_type = htons(ETHERTYPE_REVARP); + ea->arp_hrd = htons(ARPHRD_ETHER); + ea->arp_pro = htons(ETHERTYPE_IP); + ea->arp_hln = sizeof(ea->arp_sha); /* hardware address length */ + ea->arp_pln = sizeof(ea->arp_spa); /* protocol address length */ + ea->arp_op = htons(ARPOP_REVREQUEST); + bcopy((caddr_t)ac->ac_enaddr, (caddr_t)eh->ether_shost, + sizeof(ea->arp_tha)); + bcopy((caddr_t)ac->ac_enaddr, (caddr_t)ea->arp_sha, + sizeof(ea->arp_sha)); + bcopy((caddr_t)ac->ac_enaddr, (caddr_t)ea->arp_tha, + sizeof(ea->arp_tha)); + sa.sa_family = AF_UNSPEC; + sa.sa_len = sizeof(sa); + ifp->if_output(ifp, m, &sa, (struct rtentry *)0); +} + +/* + * RARP for the ip address of the specified interface, but also + * save the ip address of the server that sent the answer. + * Timeout if no response is received. + */ +int +revarpwhoarewe(ifp, serv_in, clnt_in) + struct ifnet *ifp; + struct in_addr *serv_in; + struct in_addr *clnt_in; +{ + int result, count = 20; + + if (myip_initialized) + return EIO; + + myip_ifp = ifp; + revarp_in_progress = 1; + while (count--) { + revarprequest(ifp); + result = tsleep((caddr_t)&myip, PSOCK, "revarp", hz/2); + if (result != EWOULDBLOCK) + break; + } + revarp_in_progress = 0; + if (!myip_initialized) + return ENETUNREACH; + + bcopy((caddr_t)&srv_ip, serv_in, sizeof(*serv_in)); + bcopy((caddr_t)&myip, clnt_in, sizeof(*clnt_in)); + return 0; +} + +/* For compatibility: only saves interface address. */ +int +revarpwhoami(in, ifp) + struct in_addr *in; + struct ifnet *ifp; +{ + struct in_addr server; + return (revarpwhoarewe(ifp, &server, in)); +} + + +#ifdef DDB + +#include <machine/db_machdep.h> +#include <ddb/db_interface.h> +#include <ddb/db_output.h> + +static void +db_print_sa(sa) + struct sockaddr *sa; +{ + int len; + u_char *p; + + if (sa == 0) { + db_printf("[NULL]"); + return; + } + + p = (u_char*)sa; + len = sa->sa_len; + db_printf("["); + while (len > 0) { + db_printf("%d", *p); + p++; + len--; + if (len) + db_printf(","); + } + db_printf("]\n"); +} + +static void +db_print_ifa(ifa) + struct ifaddr *ifa; +{ + if (ifa == 0) + return; + db_printf(" ifa_addr="); + db_print_sa(ifa->ifa_addr); + db_printf(" ifa_dsta="); + db_print_sa(ifa->ifa_dstaddr); + db_printf(" ifa_mask="); + db_print_sa(ifa->ifa_netmask); + db_printf(" flags=0x%x, refcnt=%d, metric=%d\n", + ifa->ifa_flags, ifa->ifa_refcnt, ifa->ifa_metric); +} + +static void +db_print_llinfo(li) + caddr_t li; +{ + struct llinfo_arp *la; + + if (li == 0) + return; + la = (struct llinfo_arp *)li; + db_printf(" la_rt=%p la_hold=%p, la_asked=0x%lx\n", + la->la_rt, la->la_hold, la->la_asked); +} + +/* + * Function to pass to rn_walktree(). + * Return non-zero error to abort walk. + */ +static int +db_show_radix_node(rn, w) + struct radix_node *rn; + void *w; +{ + struct rtentry *rt = (struct rtentry *)rn; + + db_printf("rtentry=%p", rt); + + db_printf(" flags=0x%x refcnt=%d use=%ld expire=%ld\n", + rt->rt_flags, rt->rt_refcnt, rt->rt_use, rt->rt_expire); + + db_printf(" key="); db_print_sa(rt_key(rt)); + db_printf(" mask="); db_print_sa(rt_mask(rt)); + db_printf(" gw="); db_print_sa(rt->rt_gateway); + + db_printf(" ifp=%p ", rt->rt_ifp); + if (rt->rt_ifp) + db_printf("(%s)", rt->rt_ifp->if_xname); + else + db_printf("(NULL)"); + + db_printf(" ifa=%p\n", rt->rt_ifa); + db_print_ifa(rt->rt_ifa); + + db_printf(" genmask="); db_print_sa(rt->rt_genmask); + + db_printf(" gwroute=%p llinfo=%p\n", rt->rt_gwroute, rt->rt_llinfo); + db_print_llinfo(rt->rt_llinfo); + return (0); +} + +/* + * Function to print all the route trees. + * Use this from ddb: "call db_show_arptab" + */ +int +db_show_arptab() +{ + struct radix_node_head *rnh; + rnh = rt_tables[AF_INET]; + db_printf("Route tree for AF_INET\n"); + if (rnh == NULL) { + db_printf(" (not initialized)\n"); + return (0); + } + rn_walktree(rnh, db_show_radix_node, NULL); + return (0); +} +#endif +#endif /* INET */ diff --git a/ecos/packages/net/tcpip/current/src/sys/netinet/igmp.c b/ecos/packages/net/tcpip/current/src/sys/netinet/igmp.c new file mode 100644 index 0000000..f84c84f --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/netinet/igmp.c @@ -0,0 +1,614 @@ +//========================================================================== +// +// sys/netinet/igmp.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: igmp.c,v 1.6 1999/12/08 06:50:19 itojun Exp $ */ +/* $NetBSD: igmp.c,v 1.15 1996/02/13 23:41:25 christos Exp $ */ + +/* + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Internet Group Management Protocol (IGMP) routines. + * + * Written by Steve Deering, Stanford, May 1988. + * Modified by Rosen Sharma, Stanford, Aug 1994. + * Modified by Bill Fenner, Xerox PARC, Feb 1995. + * + * MULTICAST Revision: 1.3 + */ + +#include <sys/param.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/protosw.h> +#ifndef __ECOS +#include <sys/systm.h> +#endif + +#include <net/if.h> +#include <net/route.h> + +#include <netinet/in.h> +#include <netinet/in_var.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/ip_var.h> +#include <netinet/igmp.h> +#include <netinet/igmp_var.h> +#ifndef __ECOS +#include <dev/rndvar.h> +#endif + +#include <machine/stdarg.h> + +#define IP_MULTICASTOPTS 0 + +int igmp_timers_are_running; +static struct router_info *rti_head; + +void igmp_sendpkt __P((struct in_multi *, int)); +static int rti_fill __P((struct in_multi *)); +static struct router_info * rti_find __P((struct ifnet *)); + +void +igmp_init() +{ + + /* + * To avoid byte-swapping the same value over and over again. + */ + igmp_timers_are_running = 0; + rti_head = 0; +} + +static int +rti_fill(inm) + struct in_multi *inm; +{ + register struct router_info *rti; + + for (rti = rti_head; rti != 0; rti = rti->rti_next) { + if (rti->rti_ifp == inm->inm_ifp) { + inm->inm_rti = rti; + if (rti->rti_type == IGMP_v1_ROUTER) + return (IGMP_v1_HOST_MEMBERSHIP_REPORT); + else + return (IGMP_v2_HOST_MEMBERSHIP_REPORT); + } + } + + rti = (struct router_info *)malloc(sizeof(struct router_info), + M_MRTABLE, M_NOWAIT); + rti->rti_ifp = inm->inm_ifp; + rti->rti_type = IGMP_v2_ROUTER; + rti->rti_next = rti_head; + rti_head = rti; + inm->inm_rti = rti; + return (IGMP_v2_HOST_MEMBERSHIP_REPORT); +} + +static struct router_info * +rti_find(ifp) + struct ifnet *ifp; +{ + register struct router_info *rti; + + for (rti = rti_head; rti != 0; rti = rti->rti_next) { + if (rti->rti_ifp == ifp) + return (rti); + } + + rti = (struct router_info *)malloc(sizeof(struct router_info), + M_MRTABLE, M_NOWAIT); + rti->rti_ifp = ifp; + rti->rti_type = IGMP_v2_ROUTER; + rti->rti_next = rti_head; + rti_head = rti; + return (rti); +} + +void +rti_delete(ifp) + struct ifnet *ifp; +{ + struct router_info *rti, **prti = &rti_head; + + for (rti = rti_head; rti != 0; rti = rti->rti_next) { + if (rti->rti_ifp == ifp) { + *prti = rti->rti_next; + free(rti, M_MRTABLE); + break; + } + prti = &rti->rti_next; + } +} + +void +#if __STDC__ +igmp_input(struct mbuf *m, ...) +#else +igmp_input(m, va_alist) + struct mbuf *m; + va_dcl +#endif +{ + int proto; + register int iphlen; + register struct ifnet *ifp = m->m_pkthdr.rcvif; + register struct ip *ip = mtod(m, struct ip *); + register struct igmp *igmp; + register int igmplen; + register int minlen; + struct in_multi *inm; + struct in_multistep step; + struct router_info *rti; + register struct in_ifaddr *ia; + int timer; + va_list ap; + + va_start(ap, m); + iphlen = va_arg(ap, int); + proto = va_arg(ap, int); + va_end(ap); + + ++igmpstat.igps_rcv_total; + + igmplen = ip->ip_len; + + /* + * Validate lengths + */ + if (igmplen < IGMP_MINLEN) { + ++igmpstat.igps_rcv_tooshort; + m_freem(m); + return; + } + minlen = iphlen + IGMP_MINLEN; + if ((m->m_flags & M_EXT || m->m_len < minlen) && + (m = m_pullup(m, minlen)) == 0) { + ++igmpstat.igps_rcv_tooshort; + return; + } + + /* + * Validate checksum + */ + m->m_data += iphlen; + m->m_len -= iphlen; + igmp = mtod(m, struct igmp *); + if (in_cksum(m, igmplen)) { + ++igmpstat.igps_rcv_badsum; + m_freem(m); + return; + } + m->m_data -= iphlen; + m->m_len += iphlen; + ip = mtod(m, struct ip *); + + switch (igmp->igmp_type) { + + case IGMP_HOST_MEMBERSHIP_QUERY: + ++igmpstat.igps_rcv_queries; + + if (ifp->if_flags & IFF_LOOPBACK) + break; + + if (igmp->igmp_code == 0) { + rti = rti_find(ifp); + rti->rti_type = IGMP_v1_ROUTER; + rti->rti_age = 0; + + if (ip->ip_dst.s_addr != INADDR_ALLHOSTS_GROUP) { + ++igmpstat.igps_rcv_badqueries; + m_freem(m); + return; + } + + /* + * Start the timers in all of our membership records + * for the interface on which the query arrived, + * except those that are already running and those + * that belong to a "local" group (224.0.0.X). + */ + IN_FIRST_MULTI(step, inm); + while (inm != NULL) { + if (inm->inm_ifp == ifp && + inm->inm_timer == 0 && + !IN_LOCAL_GROUP(inm->inm_addr.s_addr)) { + inm->inm_state = IGMP_DELAYING_MEMBER; + inm->inm_timer = IGMP_RANDOM_DELAY( + IGMP_MAX_HOST_REPORT_DELAY * PR_FASTHZ); + igmp_timers_are_running = 1; + } + IN_NEXT_MULTI(step, inm); + } + } else { + if (!IN_MULTICAST(ip->ip_dst.s_addr)) { + ++igmpstat.igps_rcv_badqueries; + m_freem(m); + return; + } + + timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE; + + /* + * Start the timers in all of our membership records + * for the interface on which the query arrived, + * except those that are already running and those + * that belong to a "local" group (224.0.0.X). For + * timers already running, check if they need to be + * reset. + */ + IN_FIRST_MULTI(step, inm); + while (inm != NULL) { + if (inm->inm_ifp == ifp && + !IN_LOCAL_GROUP(inm->inm_addr.s_addr) && + (ip->ip_dst.s_addr == INADDR_ALLHOSTS_GROUP || + ip->ip_dst.s_addr == inm->inm_addr.s_addr)) { + switch (inm->inm_state) { + case IGMP_DELAYING_MEMBER: + if (inm->inm_timer <= timer) + break; + /* FALLTHROUGH */ + case IGMP_IDLE_MEMBER: + case IGMP_LAZY_MEMBER: + case IGMP_AWAKENING_MEMBER: + inm->inm_state = + IGMP_DELAYING_MEMBER; + inm->inm_timer = + IGMP_RANDOM_DELAY(timer); + igmp_timers_are_running = 1; + break; + case IGMP_SLEEPING_MEMBER: + inm->inm_state = + IGMP_AWAKENING_MEMBER; + break; + } + } + IN_NEXT_MULTI(step, inm); + } + } + + break; + + case IGMP_v1_HOST_MEMBERSHIP_REPORT: + ++igmpstat.igps_rcv_reports; + + if (ifp->if_flags & IFF_LOOPBACK) + break; + + if (!IN_MULTICAST(igmp->igmp_group.s_addr) || + igmp->igmp_group.s_addr != ip->ip_dst.s_addr) { + ++igmpstat.igps_rcv_badreports; + m_freem(m); + return; + } + + /* + * KLUDGE: if the IP source address of the report has an + * unspecified (i.e., zero) subnet number, as is allowed for + * a booting host, replace it with the correct subnet number + * so that a process-level multicast routing daemon can + * determine which subnet it arrived from. This is necessary + * to compensate for the lack of any way for a process to + * determine the arrival interface of an incoming packet. + */ + if ((ip->ip_src.s_addr & IN_CLASSA_NET) == 0) { + IFP_TO_IA(ifp, ia); + if (ia) + ip->ip_src.s_addr = ia->ia_subnet; + } + + /* + * If we belong to the group being reported, stop + * our timer for that group. + */ + IN_LOOKUP_MULTI(igmp->igmp_group, ifp, inm); + if (inm != NULL) { + inm->inm_timer = 0; + ++igmpstat.igps_rcv_ourreports; + + switch (inm->inm_state) { + case IGMP_IDLE_MEMBER: + case IGMP_LAZY_MEMBER: + case IGMP_AWAKENING_MEMBER: + case IGMP_SLEEPING_MEMBER: + inm->inm_state = IGMP_SLEEPING_MEMBER; + break; + case IGMP_DELAYING_MEMBER: + if (inm->inm_rti->rti_type == IGMP_v1_ROUTER) + inm->inm_state = IGMP_LAZY_MEMBER; + else + inm->inm_state = IGMP_SLEEPING_MEMBER; + break; + } + } + + break; + + case IGMP_v2_HOST_MEMBERSHIP_REPORT: +#ifdef MROUTING + /* + * Make sure we don't hear our own membership report. Fast + * leave requires knowing that we are the only member of a + * group. + */ + IFP_TO_IA(ifp, ia); + if (ia && ip->ip_src.s_addr == ia->ia_addr.sin_addr.s_addr) + break; +#endif + + ++igmpstat.igps_rcv_reports; + + if (ifp->if_flags & IFF_LOOPBACK) + break; + + if (!IN_MULTICAST(igmp->igmp_group.s_addr) || + igmp->igmp_group.s_addr != ip->ip_dst.s_addr) { + ++igmpstat.igps_rcv_badreports; + m_freem(m); + return; + } + + /* + * KLUDGE: if the IP source address of the report has an + * unspecified (i.e., zero) subnet number, as is allowed for + * a booting host, replace it with the correct subnet number + * so that a process-level multicast routing daemon can + * determine which subnet it arrived from. This is necessary + * to compensate for the lack of any way for a process to + * determine the arrival interface of an incoming packet. + */ + if ((ip->ip_src.s_addr & IN_CLASSA_NET) == 0) { +#ifndef MROUTING + IFP_TO_IA(ifp, ia); +#endif + if (ia) + ip->ip_src.s_addr = ia->ia_subnet; + } + + /* + * If we belong to the group being reported, stop + * our timer for that group. + */ + IN_LOOKUP_MULTI(igmp->igmp_group, ifp, inm); + if (inm != NULL) { + inm->inm_timer = 0; + ++igmpstat.igps_rcv_ourreports; + + switch (inm->inm_state) { + case IGMP_DELAYING_MEMBER: + case IGMP_IDLE_MEMBER: + case IGMP_AWAKENING_MEMBER: + inm->inm_state = IGMP_LAZY_MEMBER; + break; + case IGMP_LAZY_MEMBER: + case IGMP_SLEEPING_MEMBER: + break; + } + } + + break; + + } + + /* + * Pass all valid IGMP packets up to any process(es) listening + * on a raw IGMP socket. + */ + rip_input(m, iphlen, proto); + return; +} + +void +igmp_joingroup(inm) + struct in_multi *inm; +{ + int s = splsoftnet(); + + inm->inm_state = IGMP_IDLE_MEMBER; + + if (!IN_LOCAL_GROUP(inm->inm_addr.s_addr) && + (inm->inm_ifp->if_flags & IFF_LOOPBACK) == 0) { + igmp_sendpkt(inm, rti_fill(inm)); + inm->inm_state = IGMP_DELAYING_MEMBER; + inm->inm_timer = IGMP_RANDOM_DELAY( + IGMP_MAX_HOST_REPORT_DELAY * PR_FASTHZ); + igmp_timers_are_running = 1; + } else + inm->inm_timer = 0; + splx(s); +} + +void +igmp_leavegroup(inm) + struct in_multi *inm; +{ + + switch (inm->inm_state) { + case IGMP_DELAYING_MEMBER: + case IGMP_IDLE_MEMBER: + if (!IN_LOCAL_GROUP(inm->inm_addr.s_addr) && + (inm->inm_ifp->if_flags & IFF_LOOPBACK) == 0) + if (inm->inm_rti->rti_type != IGMP_v1_ROUTER) + igmp_sendpkt(inm, IGMP_HOST_LEAVE_MESSAGE); + break; + case IGMP_LAZY_MEMBER: + case IGMP_AWAKENING_MEMBER: + case IGMP_SLEEPING_MEMBER: + break; + } +} + +void +igmp_fasttimo() +{ + register struct in_multi *inm; + struct in_multistep step; + int s; + + /* + * Quick check to see if any work needs to be done, in order + * to minimize the overhead of fasttimo processing. + */ + if (!igmp_timers_are_running) + return; + + s = splsoftnet(); + igmp_timers_are_running = 0; + IN_FIRST_MULTI(step, inm); + while (inm != NULL) { + if (inm->inm_timer == 0) { + /* do nothing */ + } else if (--inm->inm_timer == 0) { + if (inm->inm_state == IGMP_DELAYING_MEMBER) { + if (inm->inm_rti->rti_type == IGMP_v1_ROUTER) + igmp_sendpkt(inm, + IGMP_v1_HOST_MEMBERSHIP_REPORT); + else + igmp_sendpkt(inm, + IGMP_v2_HOST_MEMBERSHIP_REPORT); + inm->inm_state = IGMP_IDLE_MEMBER; + } + } else { + igmp_timers_are_running = 1; + } + IN_NEXT_MULTI(step, inm); + } + splx(s); +} + +void +igmp_slowtimo() +{ + register struct router_info *rti; + int s; + + s = splsoftnet(); + for (rti = rti_head; rti != 0; rti = rti->rti_next) { + if (rti->rti_type == IGMP_v1_ROUTER && + ++rti->rti_age >= IGMP_AGE_THRESHOLD) { + rti->rti_type = IGMP_v2_ROUTER; + } + } + splx(s); +} + +void +igmp_sendpkt(inm, type) + struct in_multi *inm; + int type; +{ + struct mbuf *m; + struct igmp *igmp; + struct ip *ip; + struct ip_moptions imo; +#ifdef MROUTING + extern struct socket *ip_mrouter; +#endif /* MROUTING */ + + MGETHDR(m, M_DONTWAIT, MT_HEADER); + if (m == NULL) + return; + /* + * Assume max_linkhdr + sizeof(struct ip) + IGMP_MINLEN + * is smaller than mbuf size returned by MGETHDR. + */ + m->m_data += max_linkhdr; + m->m_len = sizeof(struct ip) + IGMP_MINLEN; + m->m_pkthdr.len = sizeof(struct ip) + IGMP_MINLEN; + + ip = mtod(m, struct ip *); + ip->ip_tos = 0; + ip->ip_len = sizeof(struct ip) + IGMP_MINLEN; + ip->ip_off = 0; + ip->ip_p = IPPROTO_IGMP; + ip->ip_src.s_addr = INADDR_ANY; + ip->ip_dst = inm->inm_addr; + + m->m_data += sizeof(struct ip); + m->m_len -= sizeof(struct ip); + igmp = mtod(m, struct igmp *); + igmp->igmp_type = type; + igmp->igmp_code = 0; + igmp->igmp_group = inm->inm_addr; + igmp->igmp_cksum = 0; + igmp->igmp_cksum = in_cksum(m, IGMP_MINLEN); + m->m_data -= sizeof(struct ip); + m->m_len += sizeof(struct ip); + + imo.imo_multicast_ifp = inm->inm_ifp; + imo.imo_multicast_ttl = 1; +#ifdef RSVP_ISI + imo.imo_multicast_vif = -1; +#endif + /* + * Request loopback of the report if we are acting as a multicast + * router, so that the process-level routing demon can hear it. + */ +#ifdef MROUTING + imo.imo_multicast_loop = (ip_mrouter != NULL); +#else + imo.imo_multicast_loop = 0; +#endif /* MROUTING */ + +#if 0 /*KAME IPSEC*/ + m->m_pkthdr.rcvif = NULL; +#endif /*IPSEC*/ + ip_output(m, (struct mbuf *)0, (struct route *)0, IP_MULTICASTOPTS, + &imo, NULL); + + ++igmpstat.igps_snd_reports; +} diff --git a/ecos/packages/net/tcpip/current/src/sys/netinet/in.c b/ecos/packages/net/tcpip/current/src/sys/netinet/in.c new file mode 100644 index 0000000..1fb2f09 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/netinet/in.c @@ -0,0 +1,902 @@ +//========================================================================== +// +// sys/netinet/in.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: in.c,v 1.14 1999/12/08 06:50:19 itojun Exp $ */ +/* $NetBSD: in.c,v 1.26 1996/02/13 23:41:39 christos Exp $ */ + +/* + * Copyright (c) 1982, 1986, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in.c 8.2 (Berkeley) 11/15/93 + */ + +#ifdef __ECOS +#include <pkgconf/net.h> +#else +#include "ether.h" +#include "gif.h" +#endif + +#include <sys/param.h> +#include <sys/ioctl.h> +#include <sys/errno.h> +#include <sys/malloc.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#ifndef __ECOS +#include <sys/systm.h> +#endif + +#include <net/if.h> +#include <net/if_types.h> +#include <net/route.h> +#if NGIF > 0 +#include <net/if_gif.h> +#endif + +#include <netinet/in_systm.h> +#include <netinet/in.h> +#include <netinet/in_var.h> +#include <netinet/if_ether.h> +#include <netinet/igmp_var.h> + +#ifdef MROUTING +#include <netinet/ip_mroute.h> +#endif + +#ifdef INET + +static int in_mask2len __P((struct in_addr *)); +static void in_len2mask __P((struct in_addr *, int)); +static int in_lifaddr_ioctl __P((struct socket *, u_long, caddr_t, + struct ifnet *)); + +#ifndef SUBNETSARELOCAL +#define SUBNETSARELOCAL 0 +#endif +int subnetsarelocal = SUBNETSARELOCAL; +/* + * Return 1 if an internet address is for a ``local'' host + * (one to which we have a connection). If subnetsarelocal + * is true, this includes other subnets of the local net. + * Otherwise, it includes only the directly-connected (sub)nets. + */ +int +in_localaddr(in) + struct in_addr in; +{ + register struct in_ifaddr *ia; + + if (subnetsarelocal) { + for (ia = in_ifaddr.tqh_first; ia != 0; ia = ia->ia_list.tqe_next) + if ((in.s_addr & ia->ia_netmask) == ia->ia_net) + return (1); + } else { + for (ia = in_ifaddr.tqh_first; ia != 0; ia = ia->ia_list.tqe_next) + if ((in.s_addr & ia->ia_subnetmask) == ia->ia_subnet) + return (1); + } + return (0); +} + +/* + * Determine whether an IP address is in a reserved set of addresses + * that may not be forwarded, or whether datagrams to that destination + * may be forwarded. + */ +int +in_canforward(in) + struct in_addr in; +{ + register u_int32_t net; + + if (IN_EXPERIMENTAL(in.s_addr) || IN_MULTICAST(in.s_addr)) + return (0); + if (IN_CLASSA(in.s_addr)) { + net = in.s_addr & IN_CLASSA_NET; + if (net == 0 || net == htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT)) + return (0); + } + return (1); +} + +/* + * Trim a mask in a sockaddr + */ +void +in_socktrim(ap) + struct sockaddr_in *ap; +{ + register char *cplim = (char *) &ap->sin_addr; + register char *cp = (char *) (&ap->sin_addr + 1); + + ap->sin_len = 0; + while (--cp >= cplim) + if (*cp) { + (ap)->sin_len = cp - (char *) (ap) + 1; + break; + } +} + +static int +in_mask2len(mask) + struct in_addr *mask; +{ + int x, y; + u_char *p; + + p = (u_char *)mask; + for (x = 0; x < sizeof(*mask); x++) { + if (p[x] != 0xff) + break; + } + y = 0; + if (x < sizeof(*mask)) { + for (y = 0; y < 8; y++) { + if ((p[x] & (0x80 >> y)) == 0) + break; + } + } + return x * 8 + y; +} + +static void +in_len2mask(mask, len) + struct in_addr *mask; + int len; +{ + int i; + u_char *p; + + p = (u_char *)mask; + bzero(mask, sizeof(*mask)); + for (i = 0; i < len / 8; i++) + p[i] = 0xff; + if (len % 8) + p[i] = (0xff00 >> (len % 8)) & 0xff; +} + +int in_interfaces; /* number of external internet interfaces */ + +/* + * Generic internet control operations (ioctl's). + * Ifp is 0 if not an interface-specific ioctl. + */ +/* ARGSUSED */ +int +in_control(so, cmd, data, ifp) + struct socket *so; + u_long cmd; + caddr_t data; + register struct ifnet *ifp; +{ + register struct ifreq *ifr = (struct ifreq *)data; + register struct in_ifaddr *ia = 0; + struct in_aliasreq *ifra = (struct in_aliasreq *)data; + struct sockaddr_in oldaddr; + int error, hostIsNew, maskIsNew; + +#if NGIF > 0 + if (ifp && ifp->if_type == IFT_GIF) { + switch (cmd) { + case SIOCSIFPHYADDR: + if ((so->so_state & SS_PRIV) == 0) + return(EPERM); + case SIOCGIFPSRCADDR: + case SIOCGIFPDSTADDR: + return gif_ioctl(ifp, cmd, data); + } + } +#endif + + switch (cmd) { + case SIOCALIFADDR: + case SIOCDLIFADDR: + if ((so->so_state & SS_PRIV) == 0) + return(EPERM); + /*fall through*/ + case SIOCGLIFADDR: + if (!ifp) + return EINVAL; + return in_lifaddr_ioctl(so, cmd, data, ifp); + } + + /* + * Find address for this interface, if it exists. + */ + if (ifp) + for (ia = in_ifaddr.tqh_first; ia; ia = ia->ia_list.tqe_next) + if (ia->ia_ifp == ifp) + break; + + switch (cmd) { + + case SIOCAIFADDR: + case SIOCDIFADDR: + + case SIOCSIFADDR: // Moved from after this search, otherwise repeated + // identical SIOCSIFADDRs leaked the previously allocated record. + + if (ifra->ifra_addr.sin_family == AF_INET) + for (; ia != 0; ia = ia->ia_list.tqe_next) { + if (ia->ia_ifp == ifp && + ia->ia_addr.sin_addr.s_addr == + ifra->ifra_addr.sin_addr.s_addr) + break; + } + if (cmd == SIOCDIFADDR && ia == 0) + return (EADDRNOTAVAIL); + /* FALLTHROUGH */ + case SIOCSIFNETMASK: + case SIOCSIFDSTADDR: + if ((so->so_state & SS_PRIV) == 0) + return (EPERM); + + if (ifp == 0) + panic("in_control"); + if (ia == (struct in_ifaddr *)0) { + ia = (struct in_ifaddr *) + malloc(sizeof *ia, M_IFADDR, M_WAITOK); // This alloc was leaked + if (ia == (struct in_ifaddr *)0) + return (ENOBUFS); + bzero((caddr_t)ia, sizeof *ia); + TAILQ_INSERT_TAIL(&in_ifaddr, ia, ia_list); + TAILQ_INSERT_TAIL(&ifp->if_addrlist, (struct ifaddr *)ia, + ifa_list); + ia->ia_ifa.ifa_addr = sintosa(&ia->ia_addr); + ia->ia_ifa.ifa_dstaddr = sintosa(&ia->ia_dstaddr); + ia->ia_ifa.ifa_netmask = sintosa(&ia->ia_sockmask); + ia->ia_sockmask.sin_len = 8; + if (ifp->if_flags & IFF_BROADCAST) { + ia->ia_broadaddr.sin_len = sizeof(ia->ia_addr); + ia->ia_broadaddr.sin_family = AF_INET; + } + ia->ia_ifp = ifp; + LIST_INIT(&ia->ia_multiaddrs); + if ((ifp->if_flags & IFF_LOOPBACK) == 0) + in_interfaces++; + } + break; + + case SIOCSIFBRDADDR: + if ((so->so_state & SS_PRIV) == 0) + return (EPERM); + /* FALLTHROUGH */ + + case SIOCGIFADDR: + case SIOCGIFNETMASK: + case SIOCGIFDSTADDR: + case SIOCGIFBRDADDR: + if (ia && satosin(&ifr->ifr_addr)->sin_addr.s_addr) { + struct in_ifaddr *ia2; + + for (ia2 = ia; ia2; ia2 = ia2->ia_list.tqe_next) { + if (ia2->ia_ifp == ifp && + ia2->ia_addr.sin_addr.s_addr == + satosin(&ifr->ifr_addr)->sin_addr.s_addr) + break; + } + if (ia2 && ia2->ia_ifp == ifp) + ia = ia2; + } + if (ia == (struct in_ifaddr *)0) + return (EADDRNOTAVAIL); + break; + } + switch (cmd) { + + case SIOCGIFADDR: + *satosin(&ifr->ifr_addr) = ia->ia_addr; + break; + + case SIOCGIFBRDADDR: + if ((ifp->if_flags & IFF_BROADCAST) == 0) + return (EINVAL); + *satosin(&ifr->ifr_dstaddr) = ia->ia_broadaddr; + break; + + case SIOCGIFDSTADDR: + if ((ifp->if_flags & IFF_POINTOPOINT) == 0) + return (EINVAL); + *satosin(&ifr->ifr_dstaddr) = ia->ia_dstaddr; + break; + + case SIOCGIFNETMASK: + *satosin(&ifr->ifr_addr) = ia->ia_sockmask; + break; + + case SIOCSIFDSTADDR: + if ((ifp->if_flags & IFF_POINTOPOINT) == 0) + return (EINVAL); + oldaddr = ia->ia_dstaddr; + ia->ia_dstaddr = *satosin(&ifr->ifr_dstaddr); + if (ifp->if_ioctl && (error = (*ifp->if_ioctl) + (ifp, SIOCSIFDSTADDR, (caddr_t)ia))) { + ia->ia_dstaddr = oldaddr; + return (error); + } + if (ia->ia_flags & IFA_ROUTE) { + ia->ia_ifa.ifa_dstaddr = sintosa(&oldaddr); + rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST); + ia->ia_ifa.ifa_dstaddr = sintosa(&ia->ia_dstaddr); + rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_HOST|RTF_UP); + } + break; + + case SIOCSIFBRDADDR: + if ((ifp->if_flags & IFF_BROADCAST) == 0) + return (EINVAL); + ia->ia_broadaddr = *satosin(&ifr->ifr_broadaddr); + break; + + case SIOCSIFADDR: + return (in_ifinit(ifp, ia, satosin(&ifr->ifr_addr), 1)); + + case SIOCSIFNETMASK: + ia->ia_subnetmask = ia->ia_sockmask.sin_addr.s_addr = + ifra->ifra_addr.sin_addr.s_addr; + break; + + case SIOCAIFADDR: + maskIsNew = 0; + hostIsNew = 1; + error = 0; + if (ia->ia_addr.sin_family == AF_INET) { + if (ifra->ifra_addr.sin_len == 0) { + ifra->ifra_addr = ia->ia_addr; + hostIsNew = 0; + } else if (ifra->ifra_addr.sin_addr.s_addr == + ia->ia_addr.sin_addr.s_addr) + hostIsNew = 0; + } + if (ifra->ifra_mask.sin_len) { + in_ifscrub(ifp, ia); + ia->ia_sockmask = ifra->ifra_mask; + ia->ia_subnetmask = ia->ia_sockmask.sin_addr.s_addr; + maskIsNew = 1; + } + if ((ifp->if_flags & IFF_POINTOPOINT) && + (ifra->ifra_dstaddr.sin_family == AF_INET)) { + in_ifscrub(ifp, ia); + ia->ia_dstaddr = ifra->ifra_dstaddr; + maskIsNew = 1; /* We lie; but the effect's the same */ + } + if (ifra->ifra_addr.sin_family == AF_INET && + (hostIsNew || maskIsNew)) + error = in_ifinit(ifp, ia, &ifra->ifra_addr, 0); + if ((ifp->if_flags & IFF_BROADCAST) && + (ifra->ifra_broadaddr.sin_family == AF_INET)) + ia->ia_broadaddr = ifra->ifra_broadaddr; + return (error); + + case SIOCDIFADDR: + in_ifscrub(ifp, ia); + TAILQ_REMOVE(&ifp->if_addrlist, (struct ifaddr *)ia, ifa_list); + TAILQ_REMOVE(&in_ifaddr, ia, ia_list); + IFAFREE((&ia->ia_ifa)); + break; + +#ifdef MROUTING + case SIOCGETVIFCNT: + case SIOCGETSGCNT: + return (mrt_ioctl(cmd, data)); +#endif /* MROUTING */ + + default: + if (ifp == 0 || ifp->if_ioctl == 0) + return (EOPNOTSUPP); + return ((*ifp->if_ioctl)(ifp, cmd, data)); + } + return (0); +} + +/* + * SIOC[GAD]LIFADDR. + * SIOCGLIFADDR: get first address. ( ??? ) + * SIOCGLIFADDR with IFLR_PREFIX: + * get first address that matches the specified prefix. + * SIOCALIFADDR: add the specified address. + * SIOCALIFADDR with IFLR_PREFIX: + * EINVAL since we can't deduce hostid part of the address. + * SIOCDLIFADDR: delete the specified address. + * SIOCDLIFADDR with IFLR_PREFIX: + * delete the first address that matches the specified prefix. + * return values: + * EINVAL on invalid parameters + * EADDRNOTAVAIL on prefix match failed/specified address not found + * other values may be returned from in_ioctl() + */ +static int +in_lifaddr_ioctl(so, cmd, data, ifp) + struct socket *so; + u_long cmd; + caddr_t data; + struct ifnet *ifp; +{ + struct if_laddrreq *iflr = (struct if_laddrreq *)data; + struct ifaddr *ifa; + struct sockaddr *sa; + + /* sanity checks */ + if (!data || !ifp) { + panic("invalid argument to in_lifaddr_ioctl"); + /*NOTRECHED*/ + } + + switch (cmd) { + case SIOCGLIFADDR: + /* address must be specified on GET with IFLR_PREFIX */ + if ((iflr->flags & IFLR_PREFIX) == 0) + break; + /*FALLTHROUGH*/ + case SIOCALIFADDR: + case SIOCDLIFADDR: + /* address must be specified on ADD and DELETE */ + sa = (struct sockaddr *)&iflr->addr; + if (sa->sa_family != AF_INET) + return EINVAL; + if (sa->sa_len != sizeof(struct sockaddr_in)) + return EINVAL; + /* XXX need improvement */ + sa = (struct sockaddr *)&iflr->dstaddr; + if (sa->sa_family + && sa->sa_family != AF_INET) + return EINVAL; + if (sa->sa_len && sa->sa_len != sizeof(struct sockaddr_in)) + return EINVAL; + break; + default: /*shouldn't happen*/ +#if 0 + panic("invalid cmd to in_lifaddr_ioctl"); + /*NOTREACHED*/ +#else + return EOPNOTSUPP; +#endif + } + if (sizeof(struct in_addr) * 8 < iflr->prefixlen) + return EINVAL; + + switch (cmd) { + case SIOCALIFADDR: + { + struct in_aliasreq ifra; + + if (iflr->flags & IFLR_PREFIX) + return EINVAL; + + /* copy args to in_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */ + bzero(&ifra, sizeof(ifra)); + bcopy(iflr->iflr_name, ifra.ifra_name, + sizeof(ifra.ifra_name)); + + bcopy(&iflr->addr, &ifra.ifra_addr, + ((struct sockaddr *)&iflr->addr)->sa_len); + + if (((struct sockaddr *)&iflr->dstaddr)->sa_family) { /*XXX*/ + bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr, + ((struct sockaddr *)&iflr->dstaddr)->sa_len); + } + + ifra.ifra_mask.sin_family = AF_INET; + ifra.ifra_mask.sin_len = sizeof(struct sockaddr_in); + in_len2mask(&ifra.ifra_mask.sin_addr, iflr->prefixlen); + + return in_control(so, SIOCAIFADDR, (caddr_t)&ifra, ifp); + } + case SIOCGLIFADDR: + case SIOCDLIFADDR: + { + struct in_ifaddr *ia; + struct in_addr mask, candidate, match; + struct sockaddr_in *sin; + int cmp; + + bzero(&mask, sizeof(mask)); + if (iflr->flags & IFLR_PREFIX) { + /* lookup a prefix rather than address. */ + in_len2mask(&mask, iflr->prefixlen); + + sin = (struct sockaddr_in *)&iflr->addr; + match.s_addr = sin->sin_addr.s_addr; + match.s_addr &= mask.s_addr; + + /* if you set extra bits, that's wrong */ + if (match.s_addr != sin->sin_addr.s_addr) + return EINVAL; + + cmp = 1; + } else { + if (cmd == SIOCGLIFADDR) { + /* on getting an address, take the 1st match */ + cmp = 0; /*XXX*/ + } else { + /* on deleting an address, do exact match */ + in_len2mask(&mask, 32); + sin = (struct sockaddr_in *)&iflr->addr; + match.s_addr = sin->sin_addr.s_addr; + + cmp = 1; + } + } + + for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = ifa->ifa_list.tqe_next) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + if (!cmp) + break; + candidate.s_addr = ((struct sockaddr_in *)&ifa->ifa_addr)->sin_addr.s_addr; + candidate.s_addr &= mask.s_addr; + if (candidate.s_addr == match.s_addr) + break; + } + if (!ifa) + return EADDRNOTAVAIL; + ia = (struct in_ifaddr *)ifa; + + if (cmd == SIOCGLIFADDR) { + /* fill in the if_laddrreq structure */ + bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin_len); + + if ((ifp->if_flags & IFF_POINTOPOINT) != 0) { + bcopy(&ia->ia_dstaddr, &iflr->dstaddr, + ia->ia_dstaddr.sin_len); + } else + bzero(&iflr->dstaddr, sizeof(iflr->dstaddr)); + + iflr->prefixlen = + in_mask2len(&ia->ia_sockmask.sin_addr); + + iflr->flags = 0; /*XXX*/ + + return 0; + } else { + struct in_aliasreq ifra; + + /* fill in_aliasreq and do ioctl(SIOCDIFADDR_IN6) */ + bzero(&ifra, sizeof(ifra)); + bcopy(iflr->iflr_name, ifra.ifra_name, + sizeof(ifra.ifra_name)); + + bcopy(&ia->ia_addr, &ifra.ifra_addr, + ia->ia_addr.sin_len); + if ((ifp->if_flags & IFF_POINTOPOINT) != 0) { + bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr, + ia->ia_dstaddr.sin_len); + } + bcopy(&ia->ia_sockmask, &ifra.ifra_dstaddr, + ia->ia_sockmask.sin_len); + + return in_control(so, SIOCDIFADDR, (caddr_t)&ifra, ifp); + } + } + } + + return EOPNOTSUPP; /*just for safety*/ +} + +/* + * Delete any existing route for an interface. + */ +void +in_ifscrub(ifp, ia) + register struct ifnet *ifp; + register struct in_ifaddr *ia; +{ + + if ((ia->ia_flags & IFA_ROUTE) == 0) + return; + if (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)) + rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST); + else + rtinit(&(ia->ia_ifa), (int)RTM_DELETE, 0); + ia->ia_flags &= ~IFA_ROUTE; +} + +/* + * Initialize an interface's internet address + * and routing table entry. + */ +int +in_ifinit(ifp, ia, sin, scrub) + register struct ifnet *ifp; + register struct in_ifaddr *ia; + struct sockaddr_in *sin; + int scrub; +{ + register u_int32_t i = sin->sin_addr.s_addr; + struct sockaddr_in oldaddr; + int s = splimp(), flags = RTF_UP, error; + + oldaddr = ia->ia_addr; + ia->ia_addr = *sin; + /* + * Give the interface a chance to initialize + * if this is its first address, + * and to validate the address if necessary. + */ + if (ifp->if_ioctl && + (error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia))) { + splx(s); + ia->ia_addr = oldaddr; + return (error); + } + splx(s); + if (scrub) { + ia->ia_ifa.ifa_addr = sintosa(&oldaddr); + in_ifscrub(ifp, ia); + ia->ia_ifa.ifa_addr = sintosa(&ia->ia_addr); + } + if (IN_CLASSA(i)) + ia->ia_netmask = IN_CLASSA_NET; + else if (IN_CLASSB(i)) + ia->ia_netmask = IN_CLASSB_NET; + else + ia->ia_netmask = IN_CLASSC_NET; + /* + * The subnet mask usually includes at least the standard network part, + * but may may be smaller in the case of supernetting. + * If it is set, we believe it. + */ + if (ia->ia_subnetmask == 0) { + ia->ia_subnetmask = ia->ia_netmask; + ia->ia_sockmask.sin_addr.s_addr = ia->ia_subnetmask; + } else + ia->ia_netmask &= ia->ia_subnetmask; + ia->ia_net = i & ia->ia_netmask; + ia->ia_subnet = i & ia->ia_subnetmask; + in_socktrim(&ia->ia_sockmask); + /* + * Add route for the network. + */ + ia->ia_ifa.ifa_metric = ifp->if_metric; + if (ifp->if_flags & IFF_BROADCAST) { + ia->ia_broadaddr.sin_addr.s_addr = + ia->ia_subnet | ~ia->ia_subnetmask; + ia->ia_netbroadcast.s_addr = + ia->ia_net | ~ia->ia_netmask; + } else if (ifp->if_flags & IFF_LOOPBACK) { + ia->ia_ifa.ifa_dstaddr = ia->ia_ifa.ifa_addr; + flags |= RTF_HOST; + } else if (ifp->if_flags & IFF_POINTOPOINT) { + if (ia->ia_dstaddr.sin_family != AF_INET) + return (0); + flags |= RTF_HOST; + } + if ((error = rtinit(&(ia->ia_ifa), (int)RTM_ADD, flags)) == 0) + ia->ia_flags |= IFA_ROUTE; + /* + * If the interface supports multicast, join the "all hosts" + * multicast group on that interface. + */ + if (ifp->if_flags & IFF_MULTICAST) { + struct in_addr addr; + + addr.s_addr = INADDR_ALLHOSTS_GROUP; + in_addmulti(&addr, ifp); + } + return (error); +} + + +/* + * Return 1 if the address might be a local broadcast address. + */ +int +in_broadcast(in, ifp) + struct in_addr in; + struct ifnet *ifp; +{ + struct ifnet *ifn, *if_first, *if_target; + register struct ifaddr *ifa; + + if (in.s_addr == INADDR_BROADCAST || + in.s_addr == INADDR_ANY) + return 1; + if (ifp && ((ifp->if_flags & IFF_BROADCAST) == 0)) + return 0; + + if (ifp == NULL) + { + if_first = ifnet.tqh_first; + if_target = 0; + } + else + { + if_first = ifp; + if_target = ifp->if_list.tqe_next; + } + +#define ia (ifatoia(ifa)) + /* + * Look through the list of addresses for a match + * with a broadcast address. + * If ifp is NULL, check against all the interfaces. + */ + for (ifn = if_first; ifn != if_target; ifn = ifn->if_list.tqe_next) + for (ifa = ifn->if_addrlist.tqh_first; ifa; + ifa = ifa->ifa_list.tqe_next) + if (!ifp) + { + if (ifa->ifa_addr->sa_family == AF_INET && + ((ia->ia_subnetmask != 0xffffffff && + (((ifn->if_flags & IFF_BROADCAST) && + in.s_addr == ia->ia_broadaddr.sin_addr.s_addr) || + in.s_addr == ia->ia_subnet)) || + /* + * Check for old-style (host 0) broadcast. + */ + (in.s_addr == ia->ia_netbroadcast.s_addr || + in.s_addr == ia->ia_net))) + return 1; + } + else + if (ifa->ifa_addr->sa_family == AF_INET && + (((ifn->if_flags & IFF_BROADCAST) && + in.s_addr == ia->ia_broadaddr.sin_addr.s_addr) || + in.s_addr == ia->ia_netbroadcast.s_addr || + /* + * Check for old-style (host 0) broadcast. + */ + in.s_addr == ia->ia_subnet || + in.s_addr == ia->ia_net)) + return 1; + return (0); +#undef ia +} + +/* + * Add an address to the list of IP multicast addresses for a given interface. + */ +struct in_multi * +in_addmulti(ap, ifp) + register struct in_addr *ap; + register struct ifnet *ifp; +{ + register struct in_multi *inm; + struct ifreq ifr; + struct in_ifaddr *ia; + int s = splsoftnet(); + + /* + * See if address already in list. + */ + IN_LOOKUP_MULTI(*ap, ifp, inm); + if (inm != NULL) { + /* + * Found it; just increment the reference count. + */ + ++inm->inm_refcount; + } else { + /* + * New address; allocate a new multicast record + * and link it into the interface's multicast list. + */ + inm = (struct in_multi *)malloc(sizeof(*inm), + M_IPMADDR, M_NOWAIT); + if (inm == NULL) { + splx(s); + return (NULL); + } + inm->inm_addr = *ap; + inm->inm_ifp = ifp; + inm->inm_refcount = 1; + IFP_TO_IA(ifp, ia); + if (ia == NULL) { + free(inm, M_IPMADDR); + splx(s); + return (NULL); + } + inm->inm_ia = ia; + LIST_INSERT_HEAD(&ia->ia_multiaddrs, inm, inm_list); + /* + * Ask the network driver to update its multicast reception + * filter appropriately for the new address. + */ + satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in); + satosin(&ifr.ifr_addr)->sin_family = AF_INET; + satosin(&ifr.ifr_addr)->sin_addr = *ap; + if ((ifp->if_ioctl == NULL) || + (*ifp->if_ioctl)(ifp, SIOCADDMULTI,(caddr_t)&ifr) != 0) { + LIST_REMOVE(inm, inm_list); + free(inm, M_IPMADDR); + splx(s); + return (NULL); + } + /* + * Let IGMP know that we have joined a new IP multicast group. + */ + igmp_joingroup(inm); + } + splx(s); + return (inm); +} + +/* + * Delete a multicast address record. + */ +void +in_delmulti(inm) + register struct in_multi *inm; +{ + struct ifreq ifr; + int s = splsoftnet(); + + if (--inm->inm_refcount == 0) { + /* + * No remaining claims to this record; let IGMP know that + * we are leaving the multicast group. + */ + igmp_leavegroup(inm); + /* + * Unlink from list. + */ + LIST_REMOVE(inm, inm_list); + /* + * Notify the network driver to update its multicast reception + * filter. + */ + satosin(&ifr.ifr_addr)->sin_family = AF_INET; + satosin(&ifr.ifr_addr)->sin_addr = inm->inm_addr; + (*inm->inm_ifp->if_ioctl)(inm->inm_ifp, SIOCDELMULTI, + (caddr_t)&ifr); + free(inm, M_IPMADDR); + } + splx(s); +} + +#endif diff --git a/ecos/packages/net/tcpip/current/src/sys/netinet/in_cksum.c b/ecos/packages/net/tcpip/current/src/sys/netinet/in_cksum.c new file mode 100644 index 0000000..3a74df8 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/netinet/in_cksum.c @@ -0,0 +1,196 @@ +//========================================================================== +// +// sys/netinet/in_cksum.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: in_cksum.c,v 1.3 1997/02/24 14:06:35 niklas Exp $ */ +/* $NetBSD: in_cksum.c,v 1.11 1996/04/08 19:55:37 jonathan Exp $ */ + +/* + * Copyright (c) 1988, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 + */ + +#include <sys/param.h> +#include <sys/mbuf.h> +#ifndef __ECOS +#include <sys/systm.h> +#endif +#include <netinet/in.h> + +struct net_stats stats_in_cksum; + +/* + * Checksum routine for Internet Protocol family headers (Portable Version). + * + * This routine is very heavily used in the network + * code and should be modified for each CPU to be as fast as possible. + */ + +#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) +#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);} + +int +in_cksum(m, len) + register struct mbuf *m; + register int len; +{ + register u_int16_t *w; + register int sum = 0; + register int mlen = 0; + int byte_swapped = 0; + + union { + u_int8_t c[2]; + u_int16_t s; + } s_util; + union { + u_int16_t s[2]; + u_int32_t l; + } l_util; + + START_STATS(); + + for (;m && len; m = m->m_next) { + if (m->m_len == 0) + continue; + w = mtod(m, u_int16_t *); + if (mlen == -1) { + /* + * The first byte of this mbuf is the continuation + * of a word spanning between this mbuf and the + * last mbuf. + * + * s_util.c[0] is already saved when scanning previous + * mbuf. + */ + s_util.c[1] = *(u_int8_t *)w; + sum += s_util.s; + w = (u_int16_t *)((u_int8_t *)w + 1); + mlen = m->m_len - 1; + len--; + } else + mlen = m->m_len; + if (len < mlen) + mlen = len; + len -= mlen; + /* + * Force to even boundary. + */ + if ((1 & (long) w) && (mlen > 0)) { + REDUCE; + sum <<= 8; + s_util.c[0] = *(u_int8_t *)w; + w = (u_int16_t *)((int8_t *)w + 1); + mlen--; + byte_swapped = 1; + } + /* + * Unroll the loop to make overhead from + * branches &c small. + */ + while ((mlen -= 32) >= 0) { + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; + sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; + sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11]; + sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; + w += 16; + } + mlen += 32; + while ((mlen -= 8) >= 0) { + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; + w += 4; + } + mlen += 8; + if (mlen == 0 && byte_swapped == 0) + continue; + REDUCE; + while ((mlen -= 2) >= 0) { + sum += *w++; + } + if (byte_swapped) { + REDUCE; + sum <<= 8; + byte_swapped = 0; + if (mlen == -1) { + s_util.c[1] = *(u_int8_t *)w; + sum += s_util.s; + mlen = 0; + } else + mlen = -1; + } else if (mlen == -1) + s_util.c[0] = *(u_int8_t *)w; + } + if (len) +#ifdef __ECOS + diag_printf("cksum: out of data\n"); +#else + printf("cksum: out of data\n"); +#endif + if (mlen == -1) { + /* The last mbuf has odd # of bytes. Follow the + standard (the odd byte may be shifted left by 8 bits + or not as determined by endian-ness of the machine) */ + s_util.c[1] = 0; + sum += s_util.s; + } + REDUCE; + + FINISH_STATS(stats_in_cksum); + + return (~sum & 0xffff); +} diff --git a/ecos/packages/net/tcpip/current/src/sys/netinet/in_pcb.c b/ecos/packages/net/tcpip/current/src/sys/netinet/in_pcb.c new file mode 100644 index 0000000..4b295f4 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/netinet/in_pcb.c @@ -0,0 +1,1089 @@ +//========================================================================== +// +// sys/netinet/in_pcb.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: in_pcb.c,v 1.36 1999/12/08 11:36:40 angelos Exp $ */ +/* $NetBSD: in_pcb.c,v 1.25 1996/02/13 23:41:53 christos Exp $ */ + +/* + * Copyright (c) 1982, 1986, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 + */ + +/* +%%% portions-copyright-nrl-95 +Portions of this software are Copyright 1995-1998 by Randall Atkinson, +Ronald Lee, Daniel McDonald, Bao Phan, and Chris Winters. All Rights +Reserved. All rights under this copyright have been assigned to the US +Naval Research Laboratory (NRL). The NRL Copyright Notice and License +Agreement Version 1.1 (January 17, 1995) applies to these portions of the +software. +You should have received a copy of the license with this software. If you +didn't get a copy, you may request one from <license@ipv6.nrl.navy.mil>. +*/ + +#include <sys/param.h> +#ifndef __ECOS +#include <sys/systm.h> +#endif +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/protosw.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/ioctl.h> +#include <sys/errno.h> +#ifdef __ECOS +#undef errno +#endif +#include <sys/time.h> +#ifndef __ECOS +#include <sys/proc.h> +#endif +#include <sys/domain.h> + +#include <net/if.h> +#include <net/route.h> + +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/in_pcb.h> +#include <netinet/in_var.h> +#include <netinet/ip_var.h> +#ifndef __ECOS +#include <dev/rndvar.h> +#endif + +#ifdef INET6 +#include <netinet6/ip6_var.h> +#endif /* INET6 */ + +#ifdef IPSEC +#include <netinet/ip_ipsp.h> + +extern int check_ipsec_policy __P((struct inpcb *, u_int32_t)); +#endif + +#if 0 /*KAME IPSEC*/ +#include <netinet6/ipsec.h> +#include <netkey/key.h> +#include <netkey/key_debug.h> +#endif /* IPSEC */ + +struct in_addr zeroin_addr; + +extern int ipsec_auth_default_level; +extern int ipsec_esp_trans_default_level; +extern int ipsec_esp_network_default_level; + +/* + * These configure the range of local port addresses assigned to + * "unspecified" outgoing connections/packets/whatever. + */ +int ipport_firstauto = IPPORT_RESERVED; /* 1024 */ +int ipport_lastauto = IPPORT_USERRESERVED; /* 5000 */ +int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 40000 */ +int ipport_hilastauto = IPPORT_HILASTAUTO; /* 44999 */ + +#define INPCBHASH(table, faddr, fport, laddr, lport) \ + &(table)->inpt_hashtbl[(ntohl((faddr)->s_addr) + \ + ntohs((fport)) + ntohs((lport))) & (table->inpt_hash)] + +#define IN6PCBHASH(table, faddr, fport, laddr, lport) \ + &(table)->inpt_hashtbl[(ntohl((faddr)->s6_addr32[0] ^ \ + (faddr)->s6_addr32[3]) + ntohs((fport)) + ntohs((lport))) & \ + (table->inpt_hash)] + +void +in_pcbinit(table, hashsize) + struct inpcbtable *table; + int hashsize; +{ + + CIRCLEQ_INIT(&table->inpt_queue); + table->inpt_hashtbl = hashinit(hashsize, M_PCB, M_WAITOK, &table->inpt_hash); + table->inpt_lastport = 0; +} + +struct baddynamicports baddynamicports; + +/* + * Check if the specified port is invalid for dynamic allocation. + */ +int +in_baddynamic(port, proto) + u_int16_t port; + u_int16_t proto; +{ + + if (port < IPPORT_RESERVED/2 || port >= IPPORT_RESERVED) + return(0); + + switch (proto) { + case IPPROTO_TCP: + return (DP_ISSET(baddynamicports.tcp, port)); + case IPPROTO_UDP: + return (DP_ISSET(baddynamicports.udp, port)); + default: + return (0); + } +} + +int +in_pcballoc(so, v) + struct socket *so; + void *v; +{ + struct inpcbtable *table = v; + register struct inpcb *inp; + int s; + + MALLOC(inp, struct inpcb *, sizeof(*inp), M_PCB, M_NOWAIT); + if (inp == NULL) + return (ENOBUFS); + bzero((caddr_t)inp, sizeof(*inp)); + inp->inp_table = table; + inp->inp_socket = so; + inp->inp_seclevel[SL_AUTH] = ipsec_auth_default_level; + inp->inp_seclevel[SL_ESP_TRANS] = ipsec_esp_trans_default_level; + inp->inp_seclevel[SL_ESP_NETWORK] = ipsec_esp_network_default_level; + s = splnet(); + CIRCLEQ_INSERT_HEAD(&table->inpt_queue, inp, inp_queue); + LIST_INSERT_HEAD(INPCBHASH(table, &inp->inp_faddr, inp->inp_fport, + &inp->inp_laddr, inp->inp_lport), inp, inp_hash); + splx(s); + so->so_pcb = inp; + inp->inp_hops = -1; + +#ifdef INET6 + /* + * Small change in this function to set the INP_IPV6 flag so routines + * outside pcb-specific routines don't need to use sotopf(), and all + * of it's pointer chasing, later. + */ + if (sotopf(so) == PF_INET6) + inp->inp_flags = INP_IPV6; + inp->inp_csumoffset = -1; +#endif /* INET6 */ + return (0); +} + +int +in_pcbbind(v, nam) + register void *v; + struct mbuf *nam; +{ + register struct inpcb *inp = v; + register struct socket *so = inp->inp_socket; + register struct inpcbtable *table = inp->inp_table; + u_int16_t *lastport = &inp->inp_table->inpt_lastport; + register struct sockaddr_in *sin; +#ifndef __ECOS + struct proc *p = curproc; /* XXX */ + int error; +#endif + u_int16_t lport = 0; + int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); + +#ifdef INET6 + if (sotopf(so) == PF_INET6) + return in6_pcbbind(inp, nam); +#endif /* INET6 */ + + if (in_ifaddr.tqh_first == 0) + return (EADDRNOTAVAIL); + if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY) + return (EINVAL); + if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 && + ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 || + (so->so_options & SO_ACCEPTCONN) == 0)) + wild = INPLOOKUP_WILDCARD; + if (nam) { + sin = mtod(nam, struct sockaddr_in *); + if (nam->m_len != sizeof (*sin)) + return (EINVAL); +#ifdef notdef + /* + * We should check the family, but old programs + * incorrectly fail to initialize it. + */ + if (sin->sin_family != AF_INET) + return (EAFNOSUPPORT); +#endif + lport = sin->sin_port; + if (IN_MULTICAST(sin->sin_addr.s_addr)) { + /* + * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; + * allow complete duplication of binding if + * SO_REUSEPORT is set, or if SO_REUSEADDR is set + * and a multicast address is bound on both + * new and duplicated sockets. + */ + if (so->so_options & SO_REUSEADDR) + reuseport = SO_REUSEADDR|SO_REUSEPORT; + } else if (sin->sin_addr.s_addr != INADDR_ANY) { + sin->sin_port = 0; /* yech... */ + if (in_iawithaddr(sin->sin_addr, NULL) == 0) + return (EADDRNOTAVAIL); + } + if (lport) { + struct inpcb *t; + + /* GROSS */ +#ifndef __ECOS + if (ntohs(lport) < IPPORT_RESERVED && + (error = suser(p->p_ucred, &p->p_acflag))) + return (EACCES); +#endif + if (so->so_euid) { + t = in_pcblookup(table, &zeroin_addr, 0, + &sin->sin_addr, lport, INPLOOKUP_WILDCARD); + if (t && (so->so_euid != t->inp_socket->so_euid)) + return (EADDRINUSE); + } + t = in_pcblookup(table, &zeroin_addr, 0, + &sin->sin_addr, lport, wild); + if (t && (reuseport & t->inp_socket->so_options) == 0) + return (EADDRINUSE); + } + inp->inp_laddr = sin->sin_addr; + } + if (lport == 0) { + u_int16_t first, last, old = 0; + int count; + int loopcount = 0; + + if (inp->inp_flags & INP_HIGHPORT) { + first = ipport_hifirstauto; /* sysctl */ + last = ipport_hilastauto; + } else if (inp->inp_flags & INP_LOWPORT) { +#ifndef __ECOS + if ((error = suser(p->p_ucred, &p->p_acflag))) + return (EACCES); +#endif + first = IPPORT_RESERVED-1; /* 1023 */ + last = 600; /* not IPPORT_RESERVED/2 */ + } else { + first = ipport_firstauto; /* sysctl */ + last = ipport_lastauto; + } + + /* + * Simple check to ensure all ports are not used up causing + * a deadlock here. + * + * We split the two cases (up and down) so that the direction + * is not being tested on each round of the loop. + */ + +portloop: + if (first > last) { + /* + * counting down + */ + if (loopcount == 0) { /* only do this once. */ + old = first; + first -= (arc4random() % (first - last)); + } + count = first - last; + *lastport = first; /* restart each time */ + + do { + if (count-- <= 0) { /* completely used? */ + if (loopcount == 0) { + last = old; + loopcount++; + goto portloop; + } + return (EADDRNOTAVAIL); + } + --*lastport; + if (*lastport > first || *lastport < last) + *lastport = first; + lport = htons(*lastport); + } while (in_baddynamic(*lastport, so->so_proto->pr_protocol) || + in_pcblookup(table, &zeroin_addr, 0, + &inp->inp_laddr, lport, wild)); + } else { + /* + * counting up + */ + if (loopcount == 0) { /* only do this once. */ + old = first; + first += (arc4random() % (last - first)); + } + count = last - first; + *lastport = first; /* restart each time */ + + do { + if (count-- <= 0) { /* completely used? */ + if (loopcount == 0) { + first = old; + loopcount++; + goto portloop; + } + return (EADDRNOTAVAIL); + } + ++*lastport; + if (*lastport < first || *lastport > last) + *lastport = first; + lport = htons(*lastport); + } while (in_baddynamic(*lastport, so->so_proto->pr_protocol) || + in_pcblookup(table, &zeroin_addr, 0, + &inp->inp_laddr, lport, wild)); + } + } + inp->inp_lport = lport; + in_pcbrehash(inp); + return (0); +} + +/* + * Connect from a socket to a specified address. + * Both address and port must be specified in argument sin. + * If don't have a local address for this socket yet, + * then pick one. + */ +int +in_pcbconnect(v, nam) + register void *v; + struct mbuf *nam; +{ + register struct inpcb *inp = v; + struct sockaddr_in *ifaddr = NULL; + register struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *); + +#ifdef INET6 + if (sotopf(inp->inp_socket) == PF_INET6) + return (in6_pcbconnect(inp, nam)); +#endif /* INET6 */ + + if (nam->m_len != sizeof (*sin)) + return (EINVAL); + if (sin->sin_family != AF_INET) + return (EAFNOSUPPORT); + if (sin->sin_port == 0) + return (EADDRNOTAVAIL); + if (in_ifaddr.tqh_first != 0) { + /* + * If the destination address is INADDR_ANY, + * use the primary local address. + * If the supplied address is INADDR_BROADCAST, + * and the primary interface supports broadcast, + * choose the broadcast address for that interface. + */ + if (sin->sin_addr.s_addr == INADDR_ANY) + sin->sin_addr = in_ifaddr.tqh_first->ia_addr.sin_addr; + else if (sin->sin_addr.s_addr == INADDR_BROADCAST && + (in_ifaddr.tqh_first->ia_ifp->if_flags & IFF_BROADCAST)) + sin->sin_addr = in_ifaddr.tqh_first->ia_broadaddr.sin_addr; + } + if (inp->inp_laddr.s_addr == INADDR_ANY) { +#if 0 + register struct route *ro; + struct sockaddr_in *sin2; + struct in_ifaddr *ia; + + ia = (struct in_ifaddr *)0; + /* + * If route is known or can be allocated now, + * our src addr is taken from the i/f, else punt. + */ + ro = &inp->inp_route; + if (ro->ro_rt && + (satosin(&ro->ro_dst)->sin_addr.s_addr != + sin->sin_addr.s_addr || + inp->inp_socket->so_options & SO_DONTROUTE)) { + RTFREE(ro->ro_rt); + ro->ro_rt = (struct rtentry *)0; + } + if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/ + (ro->ro_rt == (struct rtentry *)0 || + ro->ro_rt->rt_ifp == (struct ifnet *)0)) { + /* No route yet, so try to acquire one */ + ro->ro_dst.sa_family = AF_INET; + ro->ro_dst.sa_len = sizeof(struct sockaddr_in); + satosin(&ro->ro_dst)->sin_addr = sin->sin_addr; + rtalloc(ro); + + /* + * It is important to bzero out the rest of the + * struct sockaddr_in when mixing v6 & v4! + */ + sin2 = (struct sockaddr_in *)&ro->ro_dst; + bzero(sin2->sin_zero, sizeof(sin2->sin_zero)); + } + /* + * If we found a route, use the address + * corresponding to the outgoing interface + * unless it is the loopback (in case a route + * to our address on another net goes to loopback). + */ + if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) + ia = ifatoia(ro->ro_rt->rt_ifa); + if (ia == 0) { + u_int16_t fport = sin->sin_port; + + sin->sin_port = 0; + ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin))); + if (ia == 0) + ia = ifatoia(ifa_ifwithnet(sintosa(sin))); + sin->sin_port = fport; + if (ia == 0) + ia = in_ifaddr.tqh_first; + if (ia == 0) + return (EADDRNOTAVAIL); + } + /* + * If the destination address is multicast and an outgoing + * interface has been set as a multicast option, use the + * address of that interface as our source address. + */ + if (IN_MULTICAST(sin->sin_addr.s_addr) && +#ifdef INET6 + inp->inp_moptions != NULL && + !(inp->inp_flags & INP_IPV6_MCAST)) +#else + inp->inp_moptions != NULL) +#endif + { + struct ip_moptions *imo; + struct ifnet *ifp; + + imo = inp->inp_moptions; + if (imo->imo_multicast_ifp != NULL) { + ifp = imo->imo_multicast_ifp; + for (ia = in_ifaddr.tqh_first; ia != 0; + ia = ia->ia_list.tqe_next) + if (ia->ia_ifp == ifp) + break; + if (ia == 0) + return (EADDRNOTAVAIL); + } + } + ifaddr = satosin(&ia->ia_addr); +#else + int error; + ifaddr = in_selectsrc(sin, &inp->inp_route, + inp->inp_socket->so_options, inp->inp_moptions, &error); + if (ifaddr == NULL) { + if (error == 0) + error = EADDRNOTAVAIL; + return error; + } +#endif + } + if (in_pcbhashlookup(inp->inp_table, sin->sin_addr, sin->sin_port, + inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr, + inp->inp_lport) != 0) + return (EADDRINUSE); + if (inp->inp_laddr.s_addr == INADDR_ANY) { + if (inp->inp_lport == 0 && + in_pcbbind(inp, (struct mbuf *)0) == EADDRNOTAVAIL) + return (EADDRNOTAVAIL); + inp->inp_laddr = ifaddr->sin_addr; + } + inp->inp_faddr = sin->sin_addr; + inp->inp_fport = sin->sin_port; + in_pcbrehash(inp); +#ifdef IPSEC + return (check_ipsec_policy(inp, 0)); +#else + return (0); +#endif +} + +void +in_pcbdisconnect(v) + void *v; +{ + struct inpcb *inp = v; + +#ifdef INET6 + if (sotopf(inp->inp_socket) == PF_INET6) { + inp->inp_faddr6 = in6addr_any; + /* Disconnected AF_INET6 sockets cannot be "v4-mapped" */ + inp->inp_flags &= ~INP_IPV6_MAPPED; + } else +#endif + inp->inp_faddr.s_addr = INADDR_ANY; + + inp->inp_fport = 0; + in_pcbrehash(inp); + if (inp->inp_socket->so_state & SS_NOFDREF) + in_pcbdetach(inp); +} + +void +in_pcbdetach(v) + void *v; +{ + struct inpcb *inp = v; + struct socket *so = inp->inp_socket; + int s; + +#if 0 /*KAME IPSEC*/ + if (so->so_pcb) { + KEYDEBUG(KEYDEBUG_KEY_STAMP, + printf("DP call free SO=%p from in_pcbdetach\n", so)); + key_freeso(so); + } + ipsec4_delete_pcbpolicy(inp); +#endif /*IPSEC*/ + so->so_pcb = 0; + sofree(so); + if (inp->inp_options) + (void)m_freem(inp->inp_options); + if (inp->inp_route.ro_rt) + rtfree(inp->inp_route.ro_rt); +#ifdef INET6 + if (inp->inp_flags & INP_IPV6) + ip6_freemoptions(inp->inp_moptions6); + else +#endif + ip_freemoptions(inp->inp_moptions); +#ifdef IPSEC + /* XXX IPsec cleanup here */ + s = spltdb(); + if (inp->inp_tdb) + TAILQ_REMOVE(&inp->inp_tdb->tdb_inp, inp, inp_tdb_next); + splx(s); +#endif + s = splnet(); + LIST_REMOVE(inp, inp_hash); + CIRCLEQ_REMOVE(&inp->inp_table->inpt_queue, inp, inp_queue); + splx(s); + FREE(inp, M_PCB); +} + +void +in_setsockaddr(inp, nam) + register struct inpcb *inp; + struct mbuf *nam; +{ + register struct sockaddr_in *sin; + + nam->m_len = sizeof (*sin); + sin = mtod(nam, struct sockaddr_in *); + bzero((caddr_t)sin, sizeof (*sin)); + sin->sin_family = AF_INET; + sin->sin_len = sizeof(*sin); + sin->sin_port = inp->inp_lport; + sin->sin_addr = inp->inp_laddr; +} + +void +in_setpeeraddr(inp, nam) + struct inpcb *inp; + struct mbuf *nam; +{ + register struct sockaddr_in *sin; + +#ifdef INET6 + if (sotopf(inp->inp_socket) == PF_INET6) + in6_setpeeraddr(inp, nam); +#endif /* INET6 */ + + nam->m_len = sizeof (*sin); + sin = mtod(nam, struct sockaddr_in *); + bzero((caddr_t)sin, sizeof (*sin)); + sin->sin_family = AF_INET; + sin->sin_len = sizeof(*sin); + sin->sin_port = inp->inp_fport; + sin->sin_addr = inp->inp_faddr; +} + +/* + * Pass some notification to all connections of a protocol + * associated with address dst. The local address and/or port numbers + * may be specified to limit the search. The "usual action" will be + * taken, depending on the ctlinput cmd. The caller must filter any + * cmds that are uninteresting (e.g., no error in the map). + * Call the protocol specific routine (if any) to report + * any errors for each matching socket. + * + * Must be called at splsoftnet. + */ +void +in_pcbnotify(table, dst, fport_arg, laddr, lport_arg, errno, notify) + struct inpcbtable *table; + struct sockaddr *dst; + u_int fport_arg, lport_arg; + struct in_addr laddr; + int errno; + void (*notify) __P((struct inpcb *, int)); +{ + register struct inpcb *inp, *oinp; + struct in_addr faddr; + u_int16_t fport = fport_arg, lport = lport_arg; + +#ifdef INET6 + /* + * See in6_pcbnotify() for IPv6 codepath. By the time this + * gets called, the addresses passed are either definitely IPv4 or + * IPv6; *_pcbnotify() never gets called with v4-mapped v6 addresses. + */ +#endif /* INET6 */ + + if (dst->sa_family != AF_INET) + return; + faddr = satosin(dst)->sin_addr; + if (faddr.s_addr == INADDR_ANY) + return; + + for (inp = table->inpt_queue.cqh_first; + inp != (struct inpcb *)&table->inpt_queue;) { + if (inp->inp_faddr.s_addr != faddr.s_addr || + inp->inp_socket == 0 || + inp->inp_fport != fport || + inp->inp_lport != lport || + inp->inp_laddr.s_addr != laddr.s_addr) { + inp = inp->inp_queue.cqe_next; + continue; + } + oinp = inp; + inp = inp->inp_queue.cqe_next; + if (notify) + (*notify)(oinp, errno); + } +} + +void +in_pcbnotifyall(table, dst, errno, notify) + struct inpcbtable *table; + struct sockaddr *dst; + int errno; + void (*notify) __P((struct inpcb *, int)); +{ + register struct inpcb *inp, *oinp; + struct in_addr faddr; + +#ifdef INET6 + /* + * See in6_pcbnotify() for IPv6 codepath. By the time this + * gets called, the addresses passed are either definitely IPv4 or + * IPv6; *_pcbnotify() never gets called with v4-mapped v6 addresses. + */ +#endif /* INET6 */ + + if (dst->sa_family != AF_INET) + return; + faddr = satosin(dst)->sin_addr; + if (faddr.s_addr == INADDR_ANY) + return; + + for (inp = table->inpt_queue.cqh_first; + inp != (struct inpcb *)&table->inpt_queue;) { + if (inp->inp_faddr.s_addr != faddr.s_addr || + inp->inp_socket == 0) { + inp = inp->inp_queue.cqe_next; + continue; + } + oinp = inp; + inp = inp->inp_queue.cqe_next; + if (notify) + (*notify)(oinp, errno); + } +} + +/* + * Check for alternatives when higher level complains + * about service problems. For now, invalidate cached + * routing information. If the route was created dynamically + * (by a redirect), time to try a default gateway again. + */ +void +in_losing(inp) + struct inpcb *inp; +{ + register struct rtentry *rt; + struct rt_addrinfo info; + + if ((rt = inp->inp_route.ro_rt)) { + inp->inp_route.ro_rt = 0; + bzero((caddr_t)&info, sizeof(info)); + info.rti_info[RTAX_DST] = &inp->inp_route.ro_dst; + info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; + info.rti_info[RTAX_NETMASK] = rt_mask(rt); + rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); + if (rt->rt_flags & RTF_DYNAMIC) + (void) rtrequest(RTM_DELETE, rt_key(rt), + rt->rt_gateway, rt_mask(rt), rt->rt_flags, + (struct rtentry **)0); + else + /* + * A new route can be allocated + * the next time output is attempted. + */ + rtfree(rt); + } +} + +/* + * After a routing change, flush old routing + * and allocate a (hopefully) better one. + */ +void +in_rtchange(inp, errno) + register struct inpcb *inp; + int errno; +{ + if (inp->inp_route.ro_rt) { + rtfree(inp->inp_route.ro_rt); + inp->inp_route.ro_rt = 0; + /* + * A new route can be allocated the next time + * output is attempted. + */ + } +} + +struct inpcb * +in_pcblookup(table, faddrp, fport_arg, laddrp, lport_arg, flags) + struct inpcbtable *table; + void *faddrp, *laddrp; + u_int fport_arg, lport_arg; + int flags; +{ + register struct inpcb *inp, *match = 0; + int matchwild = 3, wildcard; + u_int16_t fport = fport_arg, lport = lport_arg; + struct in_addr faddr = *(struct in_addr *)faddrp; + struct in_addr laddr = *(struct in_addr *)laddrp; + + for (inp = table->inpt_queue.cqh_first; + inp != (struct inpcb *)&table->inpt_queue; + inp = inp->inp_queue.cqe_next) { + if (inp->inp_lport != lport) + continue; + wildcard = 0; +#ifdef INET6 + if (flags & INPLOOKUP_IPV6) { + struct in6_addr *laddr6 = (struct in6_addr *)laddrp; + struct in6_addr *faddr6 = (struct in6_addr *)faddrp; + + /* + * Always skip AF_INET sockets when looking + * for AF_INET6 addresses. The only problem + * with this comes if the PF_INET6 addresses + * are v4-mapped addresses. From what I've + * been able to see, none of the callers cause + * such a situation to occur. If such a + * situation DID occur, then it is possible to + * miss a matching PCB. + */ + if (!(inp->inp_flags & INP_IPV6)) + continue; + + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) { + if (IN6_IS_ADDR_UNSPECIFIED(laddr6)) + wildcard++; + else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr6)) + continue; + } else { + if (!IN6_IS_ADDR_UNSPECIFIED(laddr6)) + wildcard++; + } + + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) { + if (IN6_IS_ADDR_UNSPECIFIED(faddr6)) + wildcard++; + else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, + faddr6) || inp->inp_fport != fport) + continue; + } else { + if (!IN6_IS_ADDR_UNSPECIFIED(faddr6)) + wildcard++; + } + } else +#endif /* INET6 */ + { + if (inp->inp_faddr.s_addr != INADDR_ANY) { + if (faddr.s_addr == INADDR_ANY) + wildcard++; + else if (inp->inp_faddr.s_addr != faddr.s_addr || + inp->inp_fport != fport) + continue; + } else { + if (faddr.s_addr != INADDR_ANY) + wildcard++; + } + if (inp->inp_laddr.s_addr != INADDR_ANY) { + if (laddr.s_addr == INADDR_ANY) + wildcard++; + else if (inp->inp_laddr.s_addr != laddr.s_addr) + continue; + } else { + if (laddr.s_addr != INADDR_ANY) + wildcard++; + } + } + if ((!wildcard || (flags & INPLOOKUP_WILDCARD)) && + wildcard < matchwild) { + match = inp; + if ((matchwild = wildcard) == 0) + break; + } + } + return (match); +} + +struct sockaddr_in * +in_selectsrc(sin, ro, soopts, mopts, errorp) + struct sockaddr_in *sin; + struct route *ro; + int soopts; + struct ip_moptions *mopts; + int *errorp; +{ + struct sockaddr_in *sin2; + struct in_ifaddr *ia; + + ia = (struct in_ifaddr *)0; + /* + * If route is known or can be allocated now, + * our src addr is taken from the i/f, else punt. + */ + if (ro->ro_rt && + (satosin(&ro->ro_dst)->sin_addr.s_addr != + sin->sin_addr.s_addr || + soopts & SO_DONTROUTE)) { + RTFREE(ro->ro_rt); + ro->ro_rt = (struct rtentry *)0; + } + if ((soopts & SO_DONTROUTE) == 0 && /*XXX*/ + (ro->ro_rt == (struct rtentry *)0 || + ro->ro_rt->rt_ifp == (struct ifnet *)0)) { + /* No route yet, so try to acquire one */ + ro->ro_dst.sa_family = AF_INET; + ro->ro_dst.sa_len = sizeof(struct sockaddr_in); + satosin(&ro->ro_dst)->sin_addr = sin->sin_addr; + rtalloc(ro); + + /* + * It is important to bzero out the rest of the + * struct sockaddr_in when mixing v6 & v4! + */ + sin2 = (struct sockaddr_in *)&ro->ro_dst; + bzero(sin2->sin_zero, sizeof(sin2->sin_zero)); + } + /* + * If we found a route, use the address + * corresponding to the outgoing interface + * unless it is the loopback (in case a route + * to our address on another net goes to loopback). + */ + if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) + ia = ifatoia(ro->ro_rt->rt_ifa); + if (ia == 0) { + u_int16_t fport = sin->sin_port; + + sin->sin_port = 0; + ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin))); + if (ia == 0) + ia = ifatoia(ifa_ifwithnet(sintosa(sin))); + sin->sin_port = fport; + if (ia == 0) + ia = in_ifaddr.tqh_first; + if (ia == 0) { + *errorp = EADDRNOTAVAIL; + return NULL; + } + } + /* + * If the destination address is multicast and an outgoing + * interface has been set as a multicast option, use the + * address of that interface as our source address. + */ + if (IN_MULTICAST(sin->sin_addr.s_addr) && +#if 0 /*def INET6*/ + mopts != NULL && + !(inp->inp_flags & INP_IPV6_MCAST)) +#else + mopts != NULL) +#endif + { + struct ip_moptions *imo; + struct ifnet *ifp; + + imo = mopts; + if (imo->imo_multicast_ifp != NULL) { + ifp = imo->imo_multicast_ifp; + for (ia = in_ifaddr.tqh_first; ia != 0; + ia = ia->ia_list.tqe_next) + if (ia->ia_ifp == ifp) + break; + if (ia == 0) { + *errorp = EADDRNOTAVAIL; + return NULL; + } + } + } + return satosin(&ia->ia_addr); +} + +void +in_pcbrehash(inp) + struct inpcb *inp; +{ + struct inpcbtable *table = inp->inp_table; + int s; + + s = splnet(); + LIST_REMOVE(inp, inp_hash); +#ifdef INET6 + if (inp->inp_flags & INP_IPV6) { + LIST_INSERT_HEAD(IN6PCBHASH(table, &inp->inp_faddr6, + inp->inp_fport, &inp->inp_laddr6, inp->inp_lport), + inp, inp_hash); + } else { +#endif /* INET6 */ + LIST_INSERT_HEAD(INPCBHASH(table, &inp->inp_faddr, + inp->inp_fport, &inp->inp_laddr, inp->inp_lport), + inp, inp_hash); +#ifdef INET6 + } +#endif /* INET6 */ + splx(s); +} + +#ifdef DIAGNOSTIC +int in_pcbnotifymiss = 0; +#endif + +struct inpcb * +in_pcbhashlookup(table, faddr, fport_arg, laddr, lport_arg) + struct inpcbtable *table; + struct in_addr faddr, laddr; + u_int fport_arg, lport_arg; +{ + struct inpcbhead *head; + register struct inpcb *inp; + u_int16_t fport = fport_arg, lport = lport_arg; + + head = INPCBHASH(table, &faddr, fport, &laddr, lport); + for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) { + if (inp->inp_faddr.s_addr == faddr.s_addr && + inp->inp_fport == fport && + inp->inp_lport == lport && + inp->inp_laddr.s_addr == laddr.s_addr) { + /* + * Move this PCB to the head of hash chain so that + * repeated accesses are quicker. This is analogous to + * the historic single-entry PCB cache. + */ + if (inp != head->lh_first) { + LIST_REMOVE(inp, inp_hash); + LIST_INSERT_HEAD(head, inp, inp_hash); + } + break; + } + } +#ifdef DIAGNOSTIC + if (inp == NULL && in_pcbnotifymiss) { + printf("in_pcbhashlookup: faddr=%08x fport=%d laddr=%08x lport=%d\n", + ntohl(faddr.s_addr), ntohs(fport), + ntohl(laddr.s_addr), ntohs(lport)); + } +#endif + return (inp); +} + +#ifdef INET6 +struct inpcb * +in6_pcbhashlookup(table, faddr, fport_arg, laddr, lport_arg) + struct inpcbtable *table; + struct in6_addr *faddr, *laddr; + u_int fport_arg, lport_arg; +{ + struct inpcbhead *head; + register struct inpcb *inp; + u_int16_t fport = fport_arg, lport = lport_arg; + + head = IN6PCBHASH(table, faddr, fport, laddr, lport); + for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) { + if (!(inp->inp_flags & INP_IPV6)) + continue; + if (IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, faddr) && + inp->inp_fport == fport && inp->inp_lport == lport && + IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr)) { + /* + * Move this PCB to the head of hash chain so that + * repeated accesses are quicker. This is analogous to + * the historic single-entry PCB cache. + */ + if (inp != head->lh_first) { + LIST_REMOVE(inp, inp_hash); + LIST_INSERT_HEAD(head, inp, inp_hash); + } + break; + } + } +#ifdef DIAGNOSTIC + if (inp == NULL && in_pcbnotifymiss) { + printf("in6_pcblookup_connect: faddr="); + printf(" fport=%d laddr=", ntohs(fport)); + printf(" lport=%d\n", ntohs(lport)); + } +#endif + return (inp); +} +#endif /* INET6 */ + + diff --git a/ecos/packages/net/tcpip/current/src/sys/netinet/in_proto.c b/ecos/packages/net/tcpip/current/src/sys/netinet/in_proto.c new file mode 100644 index 0000000..36e64b2 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/netinet/in_proto.c @@ -0,0 +1,384 @@ +//========================================================================== +// +// sys/netinet/in_proto.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: in_proto.c,v 1.17 1999/12/09 03:46:59 angelos Exp $ */ +/* $NetBSD: in_proto.c,v 1.14 1996/02/18 18:58:32 christos Exp $ */ + +/* + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_proto.c 8.1 (Berkeley) 6/10/93 + */ + +/* +%%% portions-copyright-nrl-95 +Portions of this software are Copyright 1995-1998 by Randall Atkinson, +Ronald Lee, Daniel McDonald, Bao Phan, and Chris Winters. All Rights +Reserved. All rights under this copyright have been assigned to the US +Naval Research Laboratory (NRL). The NRL Copyright Notice and License +Agreement Version 1.1 (January 17, 1995) applies to these portions of the +software. +You should have received a copy of the license with this software. If you +didn't get a copy, you may request one from <license@ipv6.nrl.navy.mil>. +*/ + +#include <sys/param.h> +#include <sys/socket.h> +#include <sys/protosw.h> +#include <sys/domain.h> +#include <sys/mbuf.h> + +#include <net/if.h> +#include <net/radix.h> +#include <net/route.h> + +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/ip_var.h> +#include <netinet/ip_icmp.h> +#include <netinet/in_pcb.h> + +#ifdef INET6 +#ifndef INET +#include <netinet/in.h> +#endif +#include <netinet/ip6.h> +#endif + +#include <netinet/igmp_var.h> +#include <netinet/tcp.h> +#include <netinet/tcp_fsm.h> +#include <netinet/tcp_seq.h> +#include <netinet/tcp_timer.h> +#include <netinet/tcp_var.h> +#include <netinet/tcpip.h> +#include <netinet/tcp_debug.h> +#include <netinet/udp.h> +#include <netinet/udp_var.h> +/* + * TCP/IP protocol family: IP, ICMP, UDP, TCP. + */ + +#if 0 /*KAME IPSEC*/ +#include <netinet6/ah.h> +#ifdef IPSEC_ESP +#include <netinet6/esp.h> +#endif +#include <netinet6/ipcomp.h> +#endif /* IPSEC */ + +#ifndef __ECOS +#include "gif.h" +#endif +#if NGIF > 0 +#include <netinet/in_gif.h> +#endif + +#ifdef NSIP +#include <netns/ns_var.h> +#include <netns/idp_var.h> +#endif /* NSIP */ + +#ifdef IPXIP +#include <netipx/ipx.h> +#include <netipx/ipx_ip.h> +#endif /* NSIP */ + +#ifdef TPIP +#include <netiso/tp_param.h> +#include <netiso/tp_var.h> +#endif /* TPIP */ + +#ifdef EON +#include <netiso/eonvar.h> +#endif /* EON */ + +#ifdef MROUTING +#include <netinet/ip_mroute.h> +#endif /* MROUTING */ + +#ifdef IPFILTER +void iplinit __P((void)); +#define ip_init iplinit +#endif + +#ifdef INET6 +#include <netinet6/ip6_var.h> +#endif /* INET6 */ + +#ifdef IPSEC +#include <netinet/ip_ipsp.h> +#include <netinet/ip_ah.h> +#include <netinet/ip_esp.h> +#include <netinet/ip_ip4.h> +#include <netinet/ip_ether.h> +#endif + +#ifndef CYGPKG_NET_SYSCTL +#define ip_sysctl 0 +#define udp_sysctl 0 +#define tcp_sysctl 0 +#define icmp_sysctl 0 +#endif + +extern struct domain inetdomain; + +struct protosw inetsw[] = { +{ 0, &inetdomain, 0, 0, + 0, ip_output, 0, 0, + 0, + ip_init, 0, ip_slowtimo, ip_drain, ip_sysctl +}, +{ SOCK_DGRAM, &inetdomain, IPPROTO_UDP, PR_ATOMIC|PR_ADDR, + udp_input, 0, udp_ctlinput, ip_ctloutput, + udp_usrreq, + udp_init, 0, 0, 0, udp_sysctl +}, +{ SOCK_STREAM, &inetdomain, IPPROTO_TCP, PR_CONNREQUIRED|PR_WANTRCVD, + tcp_input, 0, tcp_ctlinput, tcp_ctloutput, + tcp_usrreq, + tcp_init, tcp_fasttimo, tcp_slowtimo, tcp_drain, tcp_sysctl +}, +{ SOCK_RAW, &inetdomain, IPPROTO_RAW, PR_ATOMIC|PR_ADDR, + rip_input, rip_output, 0, rip_ctloutput, + rip_usrreq, + 0, 0, 0, 0, +}, +{ SOCK_RAW, &inetdomain, IPPROTO_ICMP, PR_ATOMIC|PR_ADDR, + icmp_input, rip_output, 0, rip_ctloutput, + rip_usrreq, + 0, 0, 0, 0, icmp_sysctl +}, +#if NGIF > 0 && !defined(IPSEC) +{ SOCK_RAW, &inetdomain, IPPROTO_IPV4, PR_ATOMIC|PR_ADDR, + in_gif_input, 0, 0, 0, + 0, + 0, 0, 0, 0, +}, +#ifdef INET6 +{ SOCK_RAW, &inetdomain, IPPROTO_IPV6, PR_ATOMIC|PR_ADDR, + in_gif_input, 0, 0, 0, + 0, + 0, 0, 0, 0, +}, +#endif /* INET6 */ +#else /* NGIF */ +#if defined(IPSEC) || defined(MROUTING) +{ SOCK_RAW, &inetdomain, IPPROTO_IPIP, PR_ATOMIC|PR_ADDR, + ip4_input, rip_output, 0, rip_ctloutput, + rip_usrreq, /* XXX */ + 0, 0, 0, 0, ip4_sysctl +}, +#if NGIF > 0 && defined(INET6) +{ SOCK_RAW, &inetdomain, IPPROTO_IPV6, PR_ATOMIC|PR_ADDR, + in_gif_input, 0, 0, 0, + 0, + 0, 0, 0, 0, +}, +#endif /* NGIF && INET6 */ +#endif /* MROUTING || IPSEC */ +#endif /*NGIF*/ +{ SOCK_RAW, &inetdomain, IPPROTO_IGMP, PR_ATOMIC|PR_ADDR, + igmp_input, rip_output, 0, rip_ctloutput, + rip_usrreq, + igmp_init, igmp_fasttimo, igmp_slowtimo, 0, +}, +#ifdef TPIP +{ SOCK_SEQPACKET,&inetdomain, IPPROTO_TP, PR_CONNREQUIRED|PR_WANTRCVD, + tpip_input, 0, tpip_ctlinput, tp_ctloutput, + tp_usrreq, + tp_init, 0, tp_slowtimo, tp_drain, +}, +#endif /* TPIP */ +/* EON (ISO CLNL over IP) */ +#ifdef EON +{ SOCK_RAW, &inetdomain, IPPROTO_EON, 0, + eoninput, 0, eonctlinput, 0, + 0, + eonprotoinit, 0, 0, 0, +}, +#endif /* EON */ +#ifdef IPXIP +{ SOCK_RAW, &inetdomain, IPPROTO_IDP, PR_ATOMIC|PR_ADDR, + ipxip_input, rip_output, ipxip_ctlinput, 0, + rip_usrreq, + ipxipprotoinit,0, 0, 0, +}, +#endif /* NSIP */ +#ifdef NSIP +{ SOCK_RAW, &inetdomain, IPPROTO_IDP, PR_ATOMIC|PR_ADDR, + idpip_input, rip_output, nsip_ctlinput, 0, + rip_usrreq, + 0, 0, 0, 0, +}, +#endif /* NSIP */ +#ifdef IPSEC +{ SOCK_RAW, &inetdomain, IPPROTO_AH, PR_ATOMIC|PR_ADDR, + ah_input, rip_output, 0, rip_ctloutput, + rip_usrreq, + 0, 0, 0, 0, ah_sysctl +}, +{ SOCK_RAW, &inetdomain, IPPROTO_ESP, PR_ATOMIC|PR_ADDR, + esp_input, rip_output, 0, rip_ctloutput, + rip_usrreq, + 0, 0, 0, 0, esp_sysctl +}, +{ SOCK_RAW, &inetdomain, IPPROTO_ETHERIP, PR_ATOMIC|PR_ADDR, + etherip_input, rip_output, 0, rip_ctloutput, + rip_usrreq, + 0, 0, 0, 0, etherip_sysctl +}, +#endif +#if 0 /*NRL IPv6*/ +/* IPv6 in IPv4 tunneled packets... */ +{ SOCK_RAW, &inetdomain, IPPROTO_IPV6, PR_ATOMIC|PR_ADDR, + ip6_input, rip_output, ipv6_trans_ctlinput, rip_ctloutput, + rip_usrreq, + 0, 0, 0, 0 +}, +#if 0 +/* IPv4 in IPv4 tunneled packets... */ +{ SOCK_RAW, &inetdomain, IPPROTO_IPV4, PR_ATOMIC|PR_ADDR, + ipv4_input, 0, 0, 0, + 0, + 0, 0, 0, 0 +}, +#endif /* 0 */ +#endif /* defined(INET6) */ +/* raw wildcard */ +{ SOCK_RAW, &inetdomain, 0, PR_ATOMIC|PR_ADDR, + rip_input, rip_output, 0, rip_ctloutput, + rip_usrreq, + rip_init, 0, 0, 0, +}, +}; + +struct domain inetdomain = + { AF_INET, "internet", 0, 0, 0, + inetsw, &inetsw[sizeof(inetsw)/sizeof(inetsw[0])], 0, + rn_inithead, 32, sizeof(struct sockaddr_in) }; + +#ifdef notyet /* XXXX */ +#include "imp.h" +#if NIMP > 0 +extern struct domain impdomain; +int rimp_output(), hostslowtimo(); + +struct protosw impsw[] = { +{ SOCK_RAW, &impdomain, 0, PR_ATOMIC|PR_ADDR, + 0, rimp_output, 0, 0, + rip_usrreq, + 0, 0, hostslowtimo, 0, +}, +}; + +struct domain impdomain = + { AF_IMPLINK, "imp", 0, 0, 0, + impsw, &impsw[sizeof (impsw)/sizeof(impsw[0])] }; +#endif + +#include "hy.h" +#if NHY > 0 +/* + * HYPERchannel protocol family: raw interface. + */ +int rhy_output(); +extern struct domain hydomain; + +struct protosw hysw[] = { +{ SOCK_RAW, &hydomain, 0, PR_ATOMIC|PR_ADDR, + 0, rhy_output, 0, 0, + rip_usrreq, + 0, 0, 0, 0, +}, +}; + +struct domain hydomain = + { AF_HYLINK, "hy", 0, 0, 0, hysw, &hysw[sizeof (hysw)/sizeof(hysw[0])] }; +#endif +#endif diff --git a/ecos/packages/net/tcpip/current/src/sys/netinet/ip_icmp.c b/ecos/packages/net/tcpip/current/src/sys/netinet/ip_icmp.c new file mode 100644 index 0000000..ff123c2 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/netinet/ip_icmp.c @@ -0,0 +1,767 @@ +//========================================================================== +// +// sys/netinet/ip_icmp.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: ip_icmp.c,v 1.19 1999/12/08 06:50:19 itojun Exp $ */ +/* $NetBSD: ip_icmp.c,v 1.19 1996/02/13 23:42:22 christos Exp $ */ + +/* + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 + */ + +/* +%%% portions-copyright-nrl-95 +Portions of this software are Copyright 1995-1998 by Randall Atkinson, +Ronald Lee, Daniel McDonald, Bao Phan, and Chris Winters. All Rights +Reserved. All rights under this copyright have been assigned to the US +Naval Research Laboratory (NRL). The NRL Copyright Notice and License +Agreement Version 1.1 (January 17, 1995) applies to these portions of the +software. +You should have received a copy of the license with this software. If you +didn't get a copy, you may request one from <license@ipv6.nrl.navy.mil>. +*/ + +#include <sys/param.h> +#ifndef __ECOS +#include <sys/systm.h> +#endif +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/protosw.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <sys/kernel.h> +#ifndef __ECOS +#include <sys/proc.h> + +#include <vm/vm.h> +#include <sys/sysctl.h> +#endif + +#include <net/if.h> +#include <net/route.h> + +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/in_var.h> +#include <netinet/ip.h> +#include <netinet/ip_icmp.h> +#include <netinet/ip_var.h> +#include <netinet/icmp_var.h> + +#if 0 /*KAME IPSEC*/ +#include <netinet6/ipsec.h> +#include <netkey/key.h> +#include <netkey/key_debug.h> +#endif + +#include <machine/stdarg.h> + +/* + * ICMP routines: error generation, receive packet processing, and + * routines to turnaround packets back to the originator, and + * host table maintenance routines. + */ + +int icmpmaskrepl = 0; +int icmpbmcastecho = 0; +#ifdef ICMPPRINTFS +int icmpprintfs = 0; +#endif + +#if 0 +static int ip_next_mtu __P((int, int)); +#else +/*static*/ int ip_next_mtu __P((int, int)); +#endif + +extern struct protosw inetsw[]; + +/* + * Generate an error packet of type error + * in response to bad packet ip. + * + * The ip packet inside has ip_off and ip_len in host byte order. + */ +void +icmp_error(n, type, code, dest, destifp) + struct mbuf *n; + int type, code; + n_long dest; + struct ifnet *destifp; +{ + register struct ip *oip = mtod(n, struct ip *), *nip; + register unsigned oiplen = oip->ip_hl << 2; + register struct icmp *icp; + struct mbuf *m, m0; + unsigned icmplen; + +#ifdef ICMPPRINTFS + if (icmpprintfs) + printf("icmp_error(%x, %d, %d)\n", oip, type, code); +#endif + if (type != ICMP_REDIRECT) + icmpstat.icps_error++; + /* + * Don't send error if not the first fragment of message. + * Don't error if the old packet protocol was ICMP + * error message, only known informational types. + */ + if (oip->ip_off &~ (IP_MF|IP_DF)) + goto freeit; + if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT && + n->m_len >= oiplen + ICMP_MINLEN && + !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) { + icmpstat.icps_oldicmp++; + goto freeit; + } + /* Don't send error in response to a multicast or broadcast packet */ + if (n->m_flags & (M_BCAST|M_MCAST)) + goto freeit; + /* + * First, formulate icmp message + */ + m = m_gethdr(M_DONTWAIT, MT_HEADER); + if (m == NULL) + goto freeit; + icmplen = oiplen + min(8, oip->ip_len); + m->m_len = icmplen + ICMP_MINLEN; + MH_ALIGN(m, m->m_len); + icp = mtod(m, struct icmp *); + if ((u_int)type > ICMP_MAXTYPE) + panic("icmp_error"); + icmpstat.icps_outhist[type]++; + icp->icmp_type = type; + if (type == ICMP_REDIRECT) + icp->icmp_gwaddr.s_addr = dest; + else { + icp->icmp_void = 0; + /* + * The following assignments assume an overlay with the + * zeroed icmp_void field. + */ + if (type == ICMP_PARAMPROB) { + icp->icmp_pptr = code; + code = 0; + } else if (type == ICMP_UNREACH && + code == ICMP_UNREACH_NEEDFRAG && destifp) + icp->icmp_nextmtu = htons(destifp->if_mtu); + } + + icp->icmp_code = code; + bcopy((caddr_t)oip, (caddr_t)&icp->icmp_ip, icmplen); + nip = &icp->icmp_ip; + nip->ip_off = htons(nip->ip_off); + nip->ip_len = htons(nip->ip_len); + + m0.m_next = NULL; /* correct nip->ip_sum */ + m0.m_data = (char *)nip; + m0.m_len = nip->ip_hl << 2; + nip->ip_sum = 0; + nip->ip_sum = in_cksum(&m0, nip->ip_hl << 2); + + /* + * Now, copy old ip header (without options) + * in front of icmp message. + */ + if (m->m_data - sizeof(struct ip) < m->m_pktdat) + panic("icmp len"); + m->m_data -= sizeof(struct ip); + m->m_len += sizeof(struct ip); + m->m_pkthdr.len = m->m_len; + m->m_pkthdr.rcvif = n->m_pkthdr.rcvif; + nip = mtod(m, struct ip *); + bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip)); + nip->ip_off = htons(nip->ip_off); + nip->ip_len = m->m_len; + nip->ip_hl = sizeof(struct ip) >> 2; + nip->ip_p = IPPROTO_ICMP; + nip->ip_tos = 0; + icmp_reflect(m); + +freeit: + m_freem(n); +} + +static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET }; +static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET }; +static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET }; +struct sockaddr_in icmpmask = { 8, 0 }; + +/* + * Process a received ICMP message. + */ +void +#if __STDC__ +icmp_input(struct mbuf *m, ...) +#else +icmp_input(m, va_alist) + struct mbuf *m; + va_dcl +#endif +{ + int proto; + register struct icmp *icp; + register struct ip *ip = mtod(m, struct ip *); + int icmplen = ip->ip_len; + register int i; + struct in_ifaddr *ia; + void *(*ctlfunc) __P((int, struct sockaddr *, void *)); + int code; + extern u_char ip_protox[]; + int hlen; + va_list ap; + + va_start(ap, m); + hlen = va_arg(ap, int); + proto = va_arg(ap, int); + va_end(ap); + + /* + * Locate icmp structure in mbuf, and check + * that not corrupted and of at least minimum length. + */ +#ifdef ICMPPRINTFS + if (icmpprintfs) { + char buf[4*sizeof "123"]; + + strcpy(buf, inet_ntoa(ip->ip_dst)); + printf("icmp_input from %s to %s, len %d\n", + inet_ntoa(ip->ip_src), buf, icmplen); + } +#endif + if (icmplen < ICMP_MINLEN) { + icmpstat.icps_tooshort++; + goto freeit; + } + i = hlen + min(icmplen, ICMP_ADVLENMIN); + if (m->m_len < i && (m = m_pullup(m, i)) == 0) { + icmpstat.icps_tooshort++; + return; + } + ip = mtod(m, struct ip *); + m->m_len -= hlen; + m->m_data += hlen; + icp = mtod(m, struct icmp *); + if (in_cksum(m, icmplen)) { + icmpstat.icps_checksum++; + goto freeit; + } + m->m_len += hlen; + m->m_data -= hlen; + +#ifdef ICMPPRINTFS + /* + * Message type specific processing. + */ + if (icmpprintfs) + printf("icmp_input, type %d code %d\n", icp->icmp_type, + icp->icmp_code); +#endif +#if 0 /*KAME IPSEC*/ + /* drop it if it does not match the policy */ + if (ipsec4_in_reject(m, NULL)) { + ipsecstat.in_polvio++; + goto freeit; + } +#endif + if (icp->icmp_type > ICMP_MAXTYPE) + goto raw; + icmpstat.icps_inhist[icp->icmp_type]++; + code = icp->icmp_code; + switch (icp->icmp_type) { + + case ICMP_UNREACH: + switch (code) { + case ICMP_UNREACH_NET: + case ICMP_UNREACH_HOST: + case ICMP_UNREACH_PROTOCOL: + case ICMP_UNREACH_PORT: + case ICMP_UNREACH_SRCFAIL: + code += PRC_UNREACH_NET; + break; + + case ICMP_UNREACH_NEEDFRAG: +#if 0 /*NRL INET6*/ + if (icp->icmp_nextmtu) { + extern int ipv6_trans_mtu + __P((struct mbuf **, int, int)); + struct mbuf *m0 = m; + + /* + * Do cool v4-related path MTU, for now, + * only v6-in-v4 can handle it. + */ + if (icmplen >= ICMP_V6ADVLENMIN && + icmplen >= ICMP_V6ADVLEN(icp) && + icp->icmp_ip.ip_p == IPPROTO_IPV6) { + /* + * ipv6_trans_mtu returns 1 if + * the mbuf is still intact. + */ + if (ipv6_trans_mtu(&m0,icp->icmp_nextmtu, + hlen + ICMP_V6ADVLEN(icp))) { + m = m0; + goto raw; + } else + return; + } + } +#endif /* INET6 */ + code = PRC_MSGSIZE; + break; + + case ICMP_UNREACH_NET_UNKNOWN: + case ICMP_UNREACH_NET_PROHIB: + case ICMP_UNREACH_TOSNET: + code = PRC_UNREACH_NET; + break; + + case ICMP_UNREACH_HOST_UNKNOWN: + case ICMP_UNREACH_ISOLATED: + case ICMP_UNREACH_HOST_PROHIB: + case ICMP_UNREACH_TOSHOST: + case ICMP_UNREACH_FILTER_PROHIB: + case ICMP_UNREACH_HOST_PRECEDENCE: + case ICMP_UNREACH_PRECEDENCE_CUTOFF: + code = PRC_UNREACH_HOST; + break; + + default: + goto badcode; + } + goto deliver; + + case ICMP_TIMXCEED: + if (code > 1) + goto badcode; + code += PRC_TIMXCEED_INTRANS; + goto deliver; + + case ICMP_PARAMPROB: + if (code > 1) + goto badcode; + code = PRC_PARAMPROB; + goto deliver; + + case ICMP_SOURCEQUENCH: + if (code) + goto badcode; + code = PRC_QUENCH; + deliver: + /* + * Problem with datagram; advise higher level routines. + */ + if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || + icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) { + icmpstat.icps_badlen++; + goto freeit; + } + if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr)) + goto badcode; + NTOHS(icp->icmp_ip.ip_len); +#ifdef INET6 + /* Get more contiguous data for a v6 in v4 ICMP message. */ + if (icp->icmp_ip.ip_p == IPPROTO_IPV6) { + if (icmplen < ICMP_V6ADVLENMIN || + icmplen < ICMP_V6ADVLEN(icp)) { + icmpstat.icps_badlen++; + goto freeit; + } else { + if (!(m = m_pullup(m, (ip->ip_hl << 2) + + ICMP_V6ADVLEN(icp)))) { + icmpstat.icps_tooshort++; + return; + } + ip = mtod(m, struct ip *); + icp = (struct icmp *)(m->m_data + (ip->ip_hl << 2)); + } + } +#endif /* INET6 */ +#ifdef ICMPPRINTFS + if (icmpprintfs) + printf("deliver to protocol %d\n", icp->icmp_ip.ip_p); +#endif + icmpsrc.sin_addr = icp->icmp_ip.ip_dst; + /* + * XXX if the packet contains [IPv4 AH TCP], we can't make a + * notification to TCP layer. + */ + ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput; + if (ctlfunc) + (*ctlfunc)(code, sintosa(&icmpsrc), &icp->icmp_ip); + break; + + badcode: + icmpstat.icps_badcode++; + break; + + case ICMP_ECHO: + if (!icmpbmcastecho && + (m->m_flags & (M_MCAST | M_BCAST)) != 0) { + icmpstat.icps_bmcastecho++; + break; + } + icp->icmp_type = ICMP_ECHOREPLY; + goto reflect; + + case ICMP_TSTAMP: + if (!icmpbmcastecho && + (m->m_flags & (M_MCAST | M_BCAST)) != 0) { + icmpstat.icps_bmcastecho++; + break; + } + if (icmplen < ICMP_TSLEN) { + icmpstat.icps_badlen++; + break; + } + icp->icmp_type = ICMP_TSTAMPREPLY; + icp->icmp_rtime = iptime(); + icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */ + goto reflect; + + case ICMP_MASKREQ: + if (icmpmaskrepl == 0) + break; + /* + * We are not able to respond with all ones broadcast + * unless we receive it over a point-to-point interface. + */ + if (icmplen < ICMP_MASKLEN) { + icmpstat.icps_badlen++; + break; + } + if (ip->ip_dst.s_addr == INADDR_BROADCAST || + ip->ip_dst.s_addr == INADDR_ANY) + icmpdst.sin_addr = ip->ip_src; + else + icmpdst.sin_addr = ip->ip_dst; + ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst), + m->m_pkthdr.rcvif)); + if (ia == 0) + break; + icp->icmp_type = ICMP_MASKREPLY; + icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr; + if (ip->ip_src.s_addr == 0) { + if (ia->ia_ifp->if_flags & IFF_BROADCAST) + ip->ip_src = ia->ia_broadaddr.sin_addr; + else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT) + ip->ip_src = ia->ia_dstaddr.sin_addr; + } +reflect: + ip->ip_len += hlen; /* since ip_input deducts this */ + icmpstat.icps_reflect++; + icmpstat.icps_outhist[icp->icmp_type]++; + icmp_reflect(m); + return; + + case ICMP_REDIRECT: + if (code > 3) + goto badcode; + if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || + icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) { + icmpstat.icps_badlen++; + break; + } + /* + * Short circuit routing redirects to force + * immediate change in the kernel's routing + * tables. The message is also handed to anyone + * listening on a raw socket (e.g. the routing + * daemon for use in updating its tables). + */ + icmpgw.sin_addr = ip->ip_src; + icmpdst.sin_addr = icp->icmp_gwaddr; +#ifdef ICMPPRINTFS + if (icmpprintfs) { + char buf[4 * sizeof "123"]; + strcpy(buf, inet_ntoa(icp->icmp_ip.ip_dst)); + + printf("redirect dst %s to %s\n", + buf, inet_ntoa(icp->icmp_gwaddr)); + } +#endif + icmpsrc.sin_addr = icp->icmp_ip.ip_dst; + rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst), + (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST, + sintosa(&icmpgw), (struct rtentry **)0); + pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc)); +#if 0 /*KAME IPSEC*/ + key_sa_routechange((struct sockaddr *)&icmpsrc); +#endif + break; + + /* + * No kernel processing for the following; + * just fall through to send to raw listener. + */ + case ICMP_ECHOREPLY: + case ICMP_ROUTERADVERT: + case ICMP_ROUTERSOLICIT: + case ICMP_TSTAMPREPLY: + case ICMP_IREQREPLY: + case ICMP_MASKREPLY: + default: + break; + } + +raw: + rip_input(m, hlen, proto); + return; + +freeit: + m_freem(m); +} + +/* + * Reflect the ip packet back to the source + */ +void +icmp_reflect(m) + struct mbuf *m; +{ + register struct ip *ip = mtod(m, struct ip *); + register struct in_ifaddr *ia; + struct in_addr t; + struct mbuf *opts = 0; + int optlen = (ip->ip_hl << 2) - sizeof(struct ip); + + if (!in_canforward(ip->ip_src) && + ((ip->ip_src.s_addr & IN_CLASSA_NET) != + htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) { + m_freem(m); /* Bad return address */ + goto done; /* ip_output() will check for broadcast */ + } + t = ip->ip_dst; + ip->ip_dst = ip->ip_src; + /* + * If the incoming packet was addressed directly to us, + * use dst as the src for the reply. Otherwise (broadcast + * or anonymous), use the address which corresponds + * to the incoming interface. + */ + for (ia = in_ifaddr.tqh_first; ia; ia = ia->ia_list.tqe_next) { + if (t.s_addr == ia->ia_addr.sin_addr.s_addr) + break; + if ((ia->ia_ifp->if_flags & IFF_BROADCAST) && + t.s_addr == ia->ia_broadaddr.sin_addr.s_addr) + break; + } + icmpdst.sin_addr = t; + if (ia == (struct in_ifaddr *)0) + ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst), + m->m_pkthdr.rcvif)); + /* + * The following happens if the packet was not addressed to us, + * and was received on an interface with no IP address. + */ + if (ia == (struct in_ifaddr *)0) + ia = in_ifaddr.tqh_first; + t = ia->ia_addr.sin_addr; + ip->ip_src = t; + ip->ip_ttl = MAXTTL; + + if (optlen > 0) { + register u_char *cp; + int opt, cnt; + u_int len; + + /* + * Retrieve any source routing from the incoming packet; + * add on any record-route or timestamp options. + */ + cp = (u_char *) (ip + 1); + if ((opts = ip_srcroute()) == 0 && + (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) { + opts->m_len = sizeof(struct in_addr); + mtod(opts, struct in_addr *)->s_addr = 0; + } + if (opts) { +#ifdef ICMPPRINTFS + if (icmpprintfs) + printf("icmp_reflect optlen %d rt %d => ", + optlen, opts->m_len); +#endif + for (cnt = optlen; cnt > 0; cnt -= len, cp += len) { + opt = cp[IPOPT_OPTVAL]; + if (opt == IPOPT_EOL) + break; + if (opt == IPOPT_NOP) + len = 1; + else { + len = cp[IPOPT_OLEN]; + if (len <= 0 || len > cnt) + break; + } + /* + * Should check for overflow, but it "can't happen" + */ + if (opt == IPOPT_RR || opt == IPOPT_TS || + opt == IPOPT_SECURITY) { + bcopy((caddr_t)cp, + mtod(opts, caddr_t) + opts->m_len, len); + opts->m_len += len; + } + } + /* Terminate & pad, if necessary */ + if ((cnt = opts->m_len % 4) != 0) { + for (; cnt < 4; cnt++) { + *(mtod(opts, caddr_t) + opts->m_len) = + IPOPT_EOL; + opts->m_len++; + } + } +#ifdef ICMPPRINTFS + if (icmpprintfs) + printf("%d\n", opts->m_len); +#endif + } + /* + * Now strip out original options by copying rest of first + * mbuf's data back, and adjust the IP length. + */ + ip->ip_len -= optlen; + ip->ip_hl = sizeof(struct ip) >> 2; + m->m_len -= optlen; + if (m->m_flags & M_PKTHDR) + m->m_pkthdr.len -= optlen; + optlen += sizeof(struct ip); + bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1), + (unsigned)(m->m_len - sizeof(struct ip))); + } + m->m_flags &= ~(M_BCAST|M_MCAST); + icmp_send(m, opts); +done: + if (opts) + (void)m_free(opts); +} + +/* + * Send an icmp packet back to the ip level, + * after supplying a checksum. + */ +void +icmp_send(m, opts) + register struct mbuf *m; + struct mbuf *opts; +{ + register struct ip *ip = mtod(m, struct ip *); + register int hlen; + register struct icmp *icp; + + hlen = ip->ip_hl << 2; + m->m_data += hlen; + m->m_len -= hlen; + icp = mtod(m, struct icmp *); + icp->icmp_cksum = 0; + icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen); + m->m_data -= hlen; + m->m_len += hlen; +#ifdef ICMPPRINTFS + if (icmpprintfs) { + char buf[4 * sizeof "123"]; + + strcpy(buf, inet_ntoa(ip->ip_dst)); + printf("icmp_send dst %s src %s\n", + buf, inet_ntoa(ip->ip_src)); + } +#endif +#if 0 /*KAME IPSEC*/ + m->m_pkthdr.rcvif = NULL; +#endif /*IPSEC*/ + (void) ip_output(m, opts, NULL, 0, NULL, NULL); +} + +n_time +iptime() +{ + struct timeval atv; + u_long t; + + microtime(&atv); + t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000; + return (htonl(t)); +} + +#ifdef CYGPKG_NET_SYSCTL +int +icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen) + int *name; + u_int namelen; + void *oldp; + size_t *oldlenp; + void *newp; + size_t newlen; +{ + + /* All sysctl names at this level are terminal. */ + if (namelen != 1) + return (ENOTDIR); + + switch (name[0]) { + case ICMPCTL_MASKREPL: + return (sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl)); + case ICMPCTL_BMCASTECHO: + return (sysctl_int(oldp, oldlenp, newp, newlen, &icmpbmcastecho)); + default: + return (ENOPROTOOPT); + } + /* NOTREACHED */ +} +#endif // CYGPKG_NET_SYSCTL diff --git a/ecos/packages/net/tcpip/current/src/sys/netinet/ip_id.c b/ecos/packages/net/tcpip/current/src/sys/netinet/ip_id.c new file mode 100644 index 0000000..177bb58 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/netinet/ip_id.c @@ -0,0 +1,232 @@ +//========================================================================== +// +// sys/netinet/ip_id.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: ip_id.c,v 1.2 1999/08/26 13:37:01 provos Exp $ */ + +/* + * Copyright 1998 Niels Provos <provos@citi.umich.edu> + * All rights reserved. + * + * Theo de Raadt <deraadt@openbsd.org> came up with the idea of using + * such a mathematical system to generate more random (yet non-repeating) + * ids to solve the resolver/named problem. But Niels designed the + * actual system based on the constraints. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Niels Provos. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * seed = random 15bit + * n = prime, g0 = generator to n, + * j = random so that gcd(j,n-1) == 1 + * g = g0^j mod n will be a generator again. + * + * X[0] = random seed. + * X[n] = a*X[n-1]+b mod m is a Linear Congruential Generator + * with a = 7^(even random) mod m, + * b = random with gcd(b,m) == 1 + * m = 31104 and a maximal period of m-1. + * + * The transaction id is determined by: + * id[n] = seed xor (g^X[n] mod n) + * + * Effectivly the id is restricted to the lower 15 bits, thus + * yielding two different cycles by toggling the msb on and off. + * This avoids reuse issues caused by reseeding. + */ + +#include <sys/param.h> +#include <sys/time.h> +#include <sys/kernel.h> + +#ifndef __ECOS +#include <dev/rndvar.h> +#endif + +#define RU_OUT 180 /* Time after wich will be reseeded */ +#define RU_MAX 30000 /* Uniq cycle, avoid blackjack prediction */ +#define RU_GEN 2 /* Starting generator */ +#define RU_N 32749 /* RU_N-1 = 2*2*3*2729 */ +#define RU_AGEN 7 /* determine ru_a as RU_AGEN^(2*rand) */ +#define RU_M 31104 /* RU_M = 2^7*3^5 - don't change */ + +#define PFAC_N 3 +const static u_int16_t pfacts[PFAC_N] = { + 2, + 3, + 2729 +}; + +static u_int16_t ru_x; +static u_int16_t ru_seed, ru_seed2; +static u_int16_t ru_a, ru_b; +static u_int16_t ru_g; +static u_int16_t ru_counter = 0; +static u_int16_t ru_msb = 0; +static long ru_reseed; +static u_int32_t tmp; /* Storage for unused random */ + +static u_int16_t pmod __P((u_int16_t, u_int16_t, u_int16_t)); +static void ip_initid __P((void)); +u_int16_t ip_randomid __P((void)); + +/* + * Do a fast modular exponation, returned value will be in the range + * of 0 - (mod-1) + */ + +#ifdef __STDC__ +static u_int16_t +pmod(u_int16_t gen, u_int16_t exp, u_int16_t mod) +#else +static u_int16_t +pmod(gen, exp, mod) + u_int16_t gen, exp, mod; +#endif +{ + u_int16_t s, t, u; + + s = 1; + t = gen; + u = exp; + + while (u) { + if (u & 1) + s = (s*t) % mod; + u >>= 1; + t = (t*t) % mod; + } + return (s); +} + +/* + * Initalizes the seed and chooses a suitable generator. Also toggles + * the msb flag. The msb flag is used to generate two distinct + * cycles of random numbers and thus avoiding reuse of ids. + * + * This function is called from id_randomid() when needed, an + * application does not have to worry about it. + */ +static void +ip_initid(void) +{ + u_int16_t j, i; + int noprime = 1; + + get_random_bytes((void *) &tmp, sizeof(tmp)); + ru_x = (tmp & 0xFFFF) % RU_M; + + /* 15 bits of random seed */ + ru_seed = (tmp >> 16) & 0x7FFF; + get_random_bytes((void *) &tmp, sizeof(tmp)); + ru_seed2 = tmp & 0x7FFF; + + get_random_bytes((void *) &tmp, sizeof(tmp)); + + /* Determine the LCG we use */ + ru_b = (tmp & 0xfffe) | 1; + ru_a = pmod(RU_AGEN, (tmp >> 16) & 0xfffe, RU_M); + while (ru_b % 3 == 0) + ru_b += 2; + + get_random_bytes((void *) &tmp, sizeof(tmp)); + j = tmp % RU_N; + tmp = tmp >> 16; + + /* + * Do a fast gcd(j,RU_N-1), so we can find a j with + * gcd(j, RU_N-1) == 1, giving a new generator for + * RU_GEN^j mod RU_N + */ + + while (noprime) { + for (i=0; i<PFAC_N; i++) + if (j%pfacts[i] == 0) + break; + + if (i>=PFAC_N) + noprime = 0; + else + j = (j+1) % RU_N; + } + + ru_g = pmod(RU_GEN,j,RU_N); + ru_counter = 0; + + ru_reseed = time.tv_sec + RU_OUT; + ru_msb = ru_msb == 0x8000 ? 0 : 0x8000; +} + +u_int16_t +ip_randomid(void) +{ + int i, n; + + if (ru_counter >= RU_MAX || time.tv_sec > ru_reseed) + ip_initid(); + + if (!tmp) + get_random_bytes((void *) &tmp, sizeof(tmp)); + + /* Skip a random number of ids */ + n = tmp & 0x3; tmp = tmp >> 2; + if (ru_counter + n >= RU_MAX) + ip_initid(); + + for (i = 0; i <= n; i++) + /* Linear Congruential Generator */ + ru_x = (ru_a*ru_x + ru_b) % RU_M; + + ru_counter += i; + + return (ru_seed ^ pmod(ru_g,ru_seed2 ^ ru_x,RU_N)) | ru_msb; +} diff --git a/ecos/packages/net/tcpip/current/src/sys/netinet/ip_input.c b/ecos/packages/net/tcpip/current/src/sys/netinet/ip_input.c new file mode 100644 index 0000000..b3850a9 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/netinet/ip_input.c @@ -0,0 +1,1562 @@ +//========================================================================== +// +// sys/netinet/ip_input.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: ip_input.c,v 1.44 1999/12/08 06:50:20 itojun Exp $ */ +/* $NetBSD: ip_input.c,v 1.30 1996/03/16 23:53:58 christos Exp $ */ + +/* + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 + */ + +#include <sys/param.h> +#ifndef __ECOS +#include <sys/systm.h> +#endif +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/domain.h> +#include <sys/protosw.h> +#include <sys/socket.h> +#include <sys/errno.h> +#include <sys/time.h> +#include <sys/kernel.h> +#ifndef __ECOS +#include <sys/syslog.h> +#include <sys/proc.h> + +#include <vm/vm.h> +#include <sys/sysctl.h> +#endif + +#include <net/if.h> +#include <net/if_dl.h> +#include <net/route.h> + +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/if_ether.h> +#include <netinet/ip.h> +#include <netinet/in_pcb.h> +#include <netinet/in_var.h> +#include <netinet/ip_var.h> +#include <netinet/ip_icmp.h> +#include <netinet/ip_ipsp.h> + +#ifdef __ECOS +#include <stdio.h> // for 'sprintf()' +#endif + +#ifndef IPFORWARDING +#ifdef GATEWAY +#define IPFORWARDING 1 /* forward IP packets not for us */ +#else /* GATEWAY */ +#define IPFORWARDING 0 /* don't forward IP packets not for us */ +#endif /* GATEWAY */ +#endif /* IPFORWARDING */ +#ifndef IPSENDREDIRECTS +#define IPSENDREDIRECTS 1 +#endif + +int encdebug = 0; + +/* + * Note: DIRECTED_BROADCAST is handled this way so that previous + * configuration using this option will Just Work. + */ +#ifndef IPDIRECTEDBCAST +#ifdef DIRECTED_BROADCAST +#define IPDIRECTEDBCAST 1 +#else +#define IPDIRECTEDBCAST 0 +#endif /* DIRECTED_BROADCAST */ +#endif /* IPDIRECTEDBCAST */ +int ipforwarding = IPFORWARDING; +int ipsendredirects = IPSENDREDIRECTS; +int ip_dosourceroute = 0; /* no src-routing unless sysctl'd to enable */ +int ip_defttl = IPDEFTTL; +int ip_directedbcast = IPDIRECTEDBCAST; +#ifdef DIAGNOSTIC +int ipprintfs = 0; +#endif + +int ipsec_auth_default_level = IPSEC_AUTH_LEVEL_DEFAULT; +int ipsec_esp_trans_default_level = IPSEC_ESP_TRANS_LEVEL_DEFAULT; +int ipsec_esp_network_default_level = IPSEC_ESP_NETWORK_LEVEL_DEFAULT; + +/* Keep track of memory used for reassembly */ +int ip_maxqueue = 300; +int ip_frags = 0; + +/* from in_pcb.c */ +extern int ipport_firstauto; +extern int ipport_lastauto; +extern int ipport_hifirstauto; +extern int ipport_hilastauto; +extern struct baddynamicports baddynamicports; + +extern struct domain inetdomain; +extern struct protosw inetsw[]; +u_char ip_protox[IPPROTO_MAX]; +int ipqmaxlen = IFQ_MAXLEN; +struct in_ifaddrhead in_ifaddr; +struct ifqueue ipintrq; +struct ipstat ipstat; +#if defined(IPFILTER) || defined(IPFILTER_LKM) +int (*fr_checkp) __P((struct ip *, int, struct ifnet *, int, + struct mbuf **)); +#endif + +int ipq_locked; +static __inline int ipq_lock_try __P((void)); +static __inline void ipq_unlock __P((void)); + +static __inline int +ipq_lock_try() +{ + int s; + + s = splimp(); + if (ipq_locked) { + splx(s); + return (0); + } + ipq_locked = 1; + splx(s); + return (1); +} + +#define ipq_lock() ipq_lock_try() + +static __inline void +ipq_unlock() +{ + int s; + + s = splimp(); + ipq_locked = 0; + splx(s); +} + +#if 0 // Now in common layer + +static char *ui8tod( cyg_uint8 n, char *p ) +{ + if( n > 99 ) *p++ = (n/100) + '0'; + if( n > 9 ) *p++ = ((n/10)%10) + '0'; + *p++ = (n%10) + '0'; + return p; +} + +char * +inet_ntoa(ina) + struct in_addr ina; +{ + static char buf[4*sizeof "123"]; + char *p = buf; + unsigned char *ucp = (unsigned char *)&ina; + +// sprintf(buf, "%d.%d.%d.%d", ucp[0] & 0xff, ucp[1] & 0xff, +// ucp[2] & 0xff, ucp[3] & 0xff); + + p = ui8tod( ucp[0] & 0xFF, p); + *p++ = '.'; + p = ui8tod( ucp[1] & 0xFF, p); + *p++ = '.'; + p = ui8tod( ucp[2] & 0xFF, p); + *p++ = '.'; + p = ui8tod( ucp[3] & 0xFF, p); + *p++ = '\0'; + + return (buf); +} +#endif + +/* + * We need to save the IP options in case a protocol wants to respond + * to an incoming packet over the same route if the packet got here + * using IP source routing. This allows connection establishment and + * maintenance when the remote end is on a network that is not known + * to us. + */ +int ip_nhops = 0; +static struct ip_srcrt { + struct in_addr dst; /* final destination */ + char nop; /* one NOP to align */ + char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */ + struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)]; +} ip_srcrt; + +static void save_rte __P((u_char *, struct in_addr)); +static int ip_weadvertise(u_int32_t); + +/* + * IP initialization: fill in IP protocol switch table. + * All protocols not implemented in kernel go to raw IP protocol handler. + */ +void +ip_init() +{ + register struct protosw *pr; + register int i; + const u_int16_t defbaddynamicports_tcp[] = DEFBADDYNAMICPORTS_TCP; + const u_int16_t defbaddynamicports_udp[] = DEFBADDYNAMICPORTS_UDP; + + pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); + if (pr == 0) + panic("ip_init"); + for (i = 0; i < IPPROTO_MAX; i++) + ip_protox[i] = pr - inetsw; + for (pr = inetdomain.dom_protosw; + pr < inetdomain.dom_protoswNPROTOSW; pr++) + if (pr->pr_domain->dom_family == PF_INET && + pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) + ip_protox[pr->pr_protocol] = pr - inetsw; + LIST_INIT(&ipq); + ipintrq.ifq_maxlen = ipqmaxlen; + TAILQ_INIT(&in_ifaddr); + + /* Fill in list of ports not to allocate dynamically. */ + bzero((void *)&baddynamicports, sizeof(baddynamicports)); + for (i = 0; defbaddynamicports_tcp[i] != 0; i++) + DP_SET(baddynamicports.tcp, defbaddynamicports_tcp[i]); + for (i = 0; defbaddynamicports_udp[i] != 0; i++) + DP_SET(baddynamicports.udp, defbaddynamicports_tcp[i]); +} + +struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET }; +struct route ipforward_rt; + +void +ipintr() +{ + register struct mbuf *m; + int s; + + if (needqueuedrain) + m_reclaim(); + + while (1) { + /* + * Get next datagram off input queue and get IP header + * in first mbuf. + */ + s = splimp(); + IF_DEQUEUE(&ipintrq, m); + splx(s); + if (m == 0) + return; +#ifdef DIAGNOSTIC + if ((m->m_flags & M_PKTHDR) == 0) + panic("ipintr no HDR"); +#endif + ipv4_input(m, 0, NULL, 0); + } +} + +/* + * Ip input routine. Checksum and byte swap header. If fragmented + * try to reassemble. Process options. Pass to next level. + */ +void +ipv4_input(struct mbuf *m, ...) +{ + register struct ip *ip; + register struct ipq *fp; + struct in_ifaddr *ia; + struct ipqent *ipqe; + int hlen, mff; + va_list ap; + int extra; + + va_start(ap, m); + extra = va_arg(ap, int); + va_end(ap); + + if (extra) { + struct mbuf *newpacket; + + if (!(newpacket = m_split(m, extra, M_NOWAIT))) { + m_freem(m); + return; + } + + newpacket->m_flags |= m->m_flags; + m_freem(m); + m = newpacket; + extra = 0; + } + + /* + * If no IP addresses have been set yet but the interfaces + * are receiving, can't do anything with incoming packets yet. + */ + if (in_ifaddr.tqh_first == 0) + goto bad; + ipstat.ips_total++; + if (m->m_len < sizeof (struct ip) && + (m = m_pullup(m, sizeof (struct ip))) == 0) { + ipstat.ips_toosmall++; + return; + } + ip = mtod(m, struct ip *); + if (ip->ip_v != IPVERSION) { + ipstat.ips_badvers++; + goto bad; + } + hlen = ip->ip_hl << 2; + if (hlen < sizeof(struct ip)) { /* minimum header length */ + ipstat.ips_badhlen++; + goto bad; + } + if (hlen > m->m_len) { + if ((m = m_pullup(m, hlen)) == 0) { + ipstat.ips_badhlen++; + return; + } + ip = mtod(m, struct ip *); + } + if ((ip->ip_sum = in_cksum(m, hlen)) != 0) { + ipstat.ips_badsum++; + goto bad; + } + + /* + * Convert fields to host representation. + */ + NTOHS(ip->ip_len); + if (ip->ip_len < hlen) { + ipstat.ips_badlen++; + goto bad; + } + NTOHS(ip->ip_id); + NTOHS(ip->ip_off); + + /* + * Check that the amount of data in the buffers + * is as at least much as the IP header would have us expect. + * Trim mbufs if longer than we expect. + * Drop packet if shorter than we expect. + */ + if (m->m_pkthdr.len < ip->ip_len) { + ipstat.ips_tooshort++; + goto bad; + } + if (m->m_pkthdr.len > ip->ip_len) { + if (m->m_len == m->m_pkthdr.len) { + m->m_len = ip->ip_len; + m->m_pkthdr.len = ip->ip_len; + } else + m_adj(m, ip->ip_len - m->m_pkthdr.len); + } + +#if defined(IPFILTER) || defined(IPFILTER_LKM) + /* + * Check if we want to allow this packet to be processed. + * Consider it to be bad if not. + */ + { + struct mbuf *m0 = m; + if (fr_checkp && (*fr_checkp)(ip, hlen, m->m_pkthdr.rcvif, 0, &m0)) + return; + ip = mtod(m = m0, struct ip *); + } +#endif + + /* + * Process options and, if not destined for us, + * ship it on. ip_dooptions returns 1 when an + * error was detected (causing an icmp message + * to be sent and the original packet to be freed). + */ + ip_nhops = 0; /* for source routed packets */ + if (hlen > sizeof (struct ip) && ip_dooptions(m)) + return; + + /* + * Check our list of addresses, to see if the packet is for us. + */ + if ((ia = in_iawithaddr(ip->ip_dst, m)) != NULL && + (ia->ia_ifp->if_flags & IFF_UP)) + goto ours; + + if (IN_MULTICAST(ip->ip_dst.s_addr)) { + struct in_multi *inm; +#ifdef MROUTING + extern struct socket *ip_mrouter; + + if (m->m_flags & M_EXT) { + if ((m = m_pullup(m, hlen)) == 0) { + ipstat.ips_toosmall++; + return; + } + ip = mtod(m, struct ip *); + } + + if (ip_mrouter) { + /* + * If we are acting as a multicast router, all + * incoming multicast packets are passed to the + * kernel-level multicast forwarding function. + * The packet is returned (relatively) intact; if + * ip_mforward() returns a non-zero value, the packet + * must be discarded, else it may be accepted below. + * + * (The IP ident field is put in the same byte order + * as expected when ip_mforward() is called from + * ip_output().) + */ + ip->ip_id = htons(ip->ip_id); + if (ip_mforward(m, m->m_pkthdr.rcvif) != 0) { + ipstat.ips_cantforward++; + m_freem(m); + return; + } + ip->ip_id = ntohs(ip->ip_id); + + /* + * The process-level routing demon needs to receive + * all multicast IGMP packets, whether or not this + * host belongs to their destination groups. + */ + if (ip->ip_p == IPPROTO_IGMP) + goto ours; + ipstat.ips_forward++; + } +#endif + /* + * See if we belong to the destination multicast group on the + * arrival interface. + */ + IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm); + if (inm == NULL) { + ipstat.ips_cantforward++; + m_freem(m); + return; + } + goto ours; + } + if (ip->ip_dst.s_addr == INADDR_BROADCAST || + ip->ip_dst.s_addr == INADDR_ANY) + goto ours; + + /* + * Not for us; forward if possible and desirable. + */ + if (ipforwarding == 0) { + ipstat.ips_cantforward++; + m_freem(m); + } else + ip_forward(m, 0); + return; + +ours: + /* + * If offset or IP_MF are set, must reassemble. + * Otherwise, nothing need be done. + * (We could look in the reassembly queue to see + * if the packet was previously fragmented, + * but it's not worth the time; just let them time out.) + */ + if (ip->ip_off &~ (IP_DF | IP_RF)) { + if (m->m_flags & M_EXT) { /* XXX */ + if ((m = m_pullup(m, hlen)) == 0) { + ipstat.ips_toosmall++; + return; + } + ip = mtod(m, struct ip *); + } + + /* + * Look for queue of fragments + * of this datagram. + */ + ipq_lock(); + for (fp = ipq.lh_first; fp != NULL; fp = fp->ipq_q.le_next) + if (ip->ip_id == fp->ipq_id && + ip->ip_src.s_addr == fp->ipq_src.s_addr && + ip->ip_dst.s_addr == fp->ipq_dst.s_addr && + ip->ip_p == fp->ipq_p) + goto found; + fp = 0; +found: + + /* + * Adjust ip_len to not reflect header, + * set ipqe_mff if more fragments are expected, + * convert offset of this to bytes. + */ + ip->ip_len -= hlen; + mff = (ip->ip_off & IP_MF) != 0; + if (mff) { + /* + * Make sure that fragments have a data length + * that's a non-zero multiple of 8 bytes. + */ + if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) { + ipstat.ips_badfrags++; + ipq_unlock(); + goto bad; + } + } + ip->ip_off <<= 3; + + /* + * If datagram marked as having more fragments + * or if this is not the first fragment, + * attempt reassembly; if it succeeds, proceed. + */ + if (mff || ip->ip_off) { + ipstat.ips_fragments++; + if (ip_frags + 1 > ip_maxqueue) { + ip_flush(); + ipstat.ips_rcvmemdrop++; + ipq_unlock(); + goto bad; + } + + MALLOC(ipqe, struct ipqent *, sizeof (struct ipqent), + M_IPQ, M_NOWAIT); + if (ipqe == NULL) { + ipstat.ips_rcvmemdrop++; + ipq_unlock(); + goto bad; + } + ip_frags++; + ipqe->ipqe_mff = mff; + ipqe->ipqe_ip = ip; + ip = ip_reass(ipqe, fp); + if (ip == 0) { + ipq_unlock(); + return; + } + ipstat.ips_reassembled++; + m = dtom(ip); + hlen = ip->ip_hl << 2; + } else + if (fp) + ip_freef(fp); + ipq_unlock(); + } else + ip->ip_len -= hlen; + + /* + * Switch out to protocol's input routine. + */ + ipstat.ips_delivered++; + (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen, NULL, 0); + return; +bad: + m_freem(m); +} + +struct in_ifaddr * +in_iawithaddr(ina, m) + struct in_addr ina; + register struct mbuf *m; +{ + register struct in_ifaddr *ia; + + for (ia = in_ifaddr.tqh_first; ia; ia = ia->ia_list.tqe_next) { + if ((ina.s_addr == ia->ia_addr.sin_addr.s_addr) || + ((ia->ia_ifp->if_flags & (IFF_LOOPBACK|IFF_LINK1)) == + (IFF_LOOPBACK|IFF_LINK1) && + ia->ia_subnet == (ina.s_addr & ia->ia_subnetmask))) + return ia; + if (m && ((ip_directedbcast == 0) || (ip_directedbcast && + ia->ia_ifp == m->m_pkthdr.rcvif)) && + (ia->ia_ifp->if_flags & IFF_BROADCAST)) { + if (ina.s_addr == ia->ia_broadaddr.sin_addr.s_addr || + ina.s_addr == ia->ia_netbroadcast.s_addr || + /* + * Look for all-0's host part (old broadcast addr), + * either for subnet or net. + */ + ina.s_addr == ia->ia_subnet || + ina.s_addr == ia->ia_net) { + /* Make sure M_BCAST is set */ + m->m_flags |= M_BCAST; + return ia; + } + } + } + + return NULL; +} + +/* + * Take incoming datagram fragment and try to + * reassemble it into whole datagram. If a chain for + * reassembly of this datagram already exists, then it + * is given as fp; otherwise have to make a chain. + */ +struct ip * +ip_reass(ipqe, fp) + register struct ipqent *ipqe; + register struct ipq *fp; +{ + register struct mbuf *m = dtom(ipqe->ipqe_ip); + register struct ipqent *nq, *p, *q; + struct ip *ip; + struct mbuf *t; + int hlen = ipqe->ipqe_ip->ip_hl << 2; + int i, next; + + /* + * Presence of header sizes in mbufs + * would confuse code below. + */ + m->m_data += hlen; + m->m_len -= hlen; + + /* + * If first fragment to arrive, create a reassembly queue. + */ + if (fp == 0) { + if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL) + goto dropfrag; + fp = mtod(t, struct ipq *); + LIST_INSERT_HEAD(&ipq, fp, ipq_q); + fp->ipq_ttl = IPFRAGTTL; + fp->ipq_p = ipqe->ipqe_ip->ip_p; + fp->ipq_id = ipqe->ipqe_ip->ip_id; + LIST_INIT(&fp->ipq_fragq); + fp->ipq_src = ipqe->ipqe_ip->ip_src; + fp->ipq_dst = ipqe->ipqe_ip->ip_dst; + p = NULL; + goto insert; + } + + /* + * Find a segment which begins after this one does. + */ + for (p = NULL, q = fp->ipq_fragq.lh_first; q != NULL; + p = q, q = q->ipqe_q.le_next) + if (q->ipqe_ip->ip_off > ipqe->ipqe_ip->ip_off) + break; + + /* + * If there is a preceding segment, it may provide some of + * our data already. If so, drop the data from the incoming + * segment. If it provides all of our data, drop us. + */ + if (p != NULL) { + i = p->ipqe_ip->ip_off + p->ipqe_ip->ip_len - + ipqe->ipqe_ip->ip_off; + if (i > 0) { + if (i >= ipqe->ipqe_ip->ip_len) + goto dropfrag; + m_adj(dtom(ipqe->ipqe_ip), i); + ipqe->ipqe_ip->ip_off += i; + ipqe->ipqe_ip->ip_len -= i; + } + } + + /* + * While we overlap succeeding segments trim them or, + * if they are completely covered, dequeue them. + */ + for (; q != NULL && ipqe->ipqe_ip->ip_off + ipqe->ipqe_ip->ip_len > + q->ipqe_ip->ip_off; q = nq) { + i = (ipqe->ipqe_ip->ip_off + ipqe->ipqe_ip->ip_len) - + q->ipqe_ip->ip_off; + if (i < q->ipqe_ip->ip_len) { + q->ipqe_ip->ip_len -= i; + q->ipqe_ip->ip_off += i; + m_adj(dtom(q->ipqe_ip), i); + break; + } + nq = q->ipqe_q.le_next; + m_freem(dtom(q->ipqe_ip)); + LIST_REMOVE(q, ipqe_q); + FREE(q, M_IPQ); + ip_frags--; + } + +insert: + /* + * Stick new segment in its place; + * check for complete reassembly. + */ + if (p == NULL) { + LIST_INSERT_HEAD(&fp->ipq_fragq, ipqe, ipqe_q); + } else { + LIST_INSERT_AFTER(p, ipqe, ipqe_q); + } + next = 0; + for (p = NULL, q = fp->ipq_fragq.lh_first; q != NULL; + p = q, q = q->ipqe_q.le_next) { + if (q->ipqe_ip->ip_off != next) + return (0); + next += q->ipqe_ip->ip_len; + } + if (p->ipqe_mff) + return (0); + + /* + * Reassembly is complete. Check for a bogus message size and + * concatenate fragments. + */ + q = fp->ipq_fragq.lh_first; + ip = q->ipqe_ip; + if ((next + (ip->ip_hl << 2)) > IP_MAXPACKET) { + ipstat.ips_toolong++; + ip_freef(fp); + return (0); + } + m = dtom(q->ipqe_ip); + t = m->m_next; + m->m_next = 0; + m_cat(m, t); + nq = q->ipqe_q.le_next; + FREE(q, M_IPQ); + ip_frags--; + for (q = nq; q != NULL; q = nq) { + t = dtom(q->ipqe_ip); + nq = q->ipqe_q.le_next; + FREE(q, M_IPQ); + ip_frags--; + m_cat(m, t); + } + + /* + * Create header for new ip packet by + * modifying header of first packet; + * dequeue and discard fragment reassembly header. + * Make header visible. + */ + ip->ip_len = next; + ip->ip_ttl = 0; /* xxx */ + ip->ip_sum = 0; + ip->ip_src = fp->ipq_src; + ip->ip_dst = fp->ipq_dst; + LIST_REMOVE(fp, ipq_q); + (void) m_free(dtom(fp)); + m->m_len += (ip->ip_hl << 2); + m->m_data -= (ip->ip_hl << 2); + /* some debugging cruft by sklower, below, will go away soon */ + if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */ + register int plen = 0; + for (t = m; m; m = m->m_next) + plen += m->m_len; + t->m_pkthdr.len = plen; + } + return (ip); + +dropfrag: + ipstat.ips_fragdropped++; + m_freem(m); + FREE(ipqe, M_IPQ); + ip_frags--; + return (0); +} + +/* + * Free a fragment reassembly header and all + * associated datagrams. + */ +void +ip_freef(fp) + struct ipq *fp; +{ + register struct ipqent *q, *p; + + for (q = fp->ipq_fragq.lh_first; q != NULL; q = p) { + p = q->ipqe_q.le_next; + m_freem(dtom(q->ipqe_ip)); + LIST_REMOVE(q, ipqe_q); + FREE(q, M_IPQ); + ip_frags--; + } + LIST_REMOVE(fp, ipq_q); + (void) m_free(dtom(fp)); +} + +/* + * IP timer processing; + * if a timer expires on a reassembly + * queue, discard it. + */ +void +ip_slowtimo() +{ + register struct ipq *fp, *nfp; + int s = splsoftnet(); + + ipq_lock(); + for (fp = ipq.lh_first; fp != NULL; fp = nfp) { + nfp = fp->ipq_q.le_next; + if (--fp->ipq_ttl == 0) { + ipstat.ips_fragtimeout++; + ip_freef(fp); + } + } + ipq_unlock(); + splx(s); +} + +/* + * Drain off all datagram fragments. + */ +void +ip_drain() +{ + + if (ipq_lock_try() == 0) + return; + while (ipq.lh_first != NULL) { + ipstat.ips_fragdropped++; + ip_freef(ipq.lh_first); + } + ipq_unlock(); +} + +/* + * Flush a bunch of datagram fragments, till we are down to 75%. + */ +void +ip_flush() +{ + int max = 50; + + /* ipq already locked */ + while (ipq.lh_first != NULL && ip_frags > ip_maxqueue * 3 / 4 && --max) { + ipstat.ips_fragdropped++; + ip_freef(ipq.lh_first); + } +} + +/* + * Do option processing on a datagram, + * possibly discarding it if bad options are encountered, + * or forwarding it if source-routed. + * Returns 1 if packet has been forwarded/freed, + * 0 if the packet should be processed further. + */ +int +ip_dooptions(m) + struct mbuf *m; +{ + register struct ip *ip = mtod(m, struct ip *); + register u_char *cp; + register struct ip_timestamp *ipt; + register struct in_ifaddr *ia; + int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0; + struct in_addr *sin, dst; + n_time ntime; + + dst = ip->ip_dst; + cp = (u_char *)(ip + 1); + cnt = (ip->ip_hl << 2) - sizeof (struct ip); + for (; cnt > 0; cnt -= optlen, cp += optlen) { + opt = cp[IPOPT_OPTVAL]; + if (opt == IPOPT_EOL) + break; + if (opt == IPOPT_NOP) + optlen = 1; + else { + optlen = cp[IPOPT_OLEN]; + if (optlen <= 0 || optlen > cnt) { + code = &cp[IPOPT_OLEN] - (u_char *)ip; + goto bad; + } + } + switch (opt) { + + default: + break; + + /* + * Source routing with record. + * Find interface with current destination address. + * If none on this machine then drop if strictly routed, + * or do nothing if loosely routed. + * Record interface address and bring up next address + * component. If strictly routed make sure next + * address is on directly accessible net. + */ + case IPOPT_LSRR: + case IPOPT_SSRR: + if (!ip_dosourceroute) { +#ifndef __ECOS + char buf[4*sizeof "123"]; + + strcpy(buf, inet_ntoa(ip->ip_dst)); + log(LOG_WARNING, + "attempted source route from %s to %s\n", + inet_ntoa(ip->ip_src), buf); +#endif + type = ICMP_UNREACH; + code = ICMP_UNREACH_SRCFAIL; + goto bad; + } + if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) { + code = &cp[IPOPT_OFFSET] - (u_char *)ip; + goto bad; + } + ipaddr.sin_addr = ip->ip_dst; + ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr))); + if (ia == 0) { + if (opt == IPOPT_SSRR) { + type = ICMP_UNREACH; + code = ICMP_UNREACH_SRCFAIL; + goto bad; + } + /* + * Loose routing, and not at next destination + * yet; nothing to do except forward. + */ + break; + } + off--; /* 0 origin */ + if (off > optlen - sizeof(struct in_addr)) { + /* + * End of source route. Should be for us. + */ + save_rte(cp, ip->ip_src); + break; + } + + /* + * locate outgoing interface + */ + bcopy((caddr_t)(cp + off), (caddr_t)&ipaddr.sin_addr, + sizeof(ipaddr.sin_addr)); + if (opt == IPOPT_SSRR) { +#define INA struct in_ifaddr * +#define SA struct sockaddr * + if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0) + ia = (INA)ifa_ifwithnet((SA)&ipaddr); + } else + ia = ip_rtaddr(ipaddr.sin_addr); + if (ia == 0) { + type = ICMP_UNREACH; + code = ICMP_UNREACH_SRCFAIL; + goto bad; + } + ip->ip_dst = ipaddr.sin_addr; + bcopy((caddr_t)&ia->ia_addr.sin_addr, + (caddr_t)(cp + off), sizeof(struct in_addr)); + cp[IPOPT_OFFSET] += sizeof(struct in_addr); + /* + * Let ip_intr's mcast routing check handle mcast pkts + */ + forward = !IN_MULTICAST(ip->ip_dst.s_addr); + break; + + case IPOPT_RR: + if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) { + code = &cp[IPOPT_OFFSET] - (u_char *)ip; + goto bad; + } + + /* + * If no space remains, ignore. + */ + off--; /* 0 origin */ + if (off > optlen - sizeof(struct in_addr)) + break; + bcopy((caddr_t)(&ip->ip_dst), (caddr_t)&ipaddr.sin_addr, + sizeof(ipaddr.sin_addr)); + /* + * locate outgoing interface; if we're the destination, + * use the incoming interface (should be same). + */ + if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 && + (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) { + type = ICMP_UNREACH; + code = ICMP_UNREACH_HOST; + goto bad; + } + bcopy((caddr_t)&ia->ia_addr.sin_addr, + (caddr_t)(cp + off), sizeof(struct in_addr)); + cp[IPOPT_OFFSET] += sizeof(struct in_addr); + break; + + case IPOPT_TS: + code = cp - (u_char *)ip; + ipt = (struct ip_timestamp *)cp; + if (ipt->ipt_ptr < 5 || ipt->ipt_len < 5) + goto bad; + if (ipt->ipt_ptr - 1 + sizeof(n_time) > ipt->ipt_len) { + if (++ipt->ipt_oflw == 0) + goto bad; + break; + } + sin = (struct in_addr *)(cp + ipt->ipt_ptr - 1); + switch (ipt->ipt_flg) { + + case IPOPT_TS_TSONLY: + break; + + case IPOPT_TS_TSANDADDR: + if (ipt->ipt_ptr - 1 + sizeof(n_time) + + sizeof(struct in_addr) > ipt->ipt_len) + goto bad; + ipaddr.sin_addr = dst; + ia = (INA)ifaof_ifpforaddr((SA)&ipaddr, + m->m_pkthdr.rcvif); + if (ia == 0) + continue; + bcopy((caddr_t)&ia->ia_addr.sin_addr, + (caddr_t)sin, sizeof(struct in_addr)); + ipt->ipt_ptr += sizeof(struct in_addr); + break; + + case IPOPT_TS_PRESPEC: + if (ipt->ipt_ptr - 1 + sizeof(n_time) + + sizeof(struct in_addr) > ipt->ipt_len) + goto bad; + bcopy((caddr_t)sin, (caddr_t)&ipaddr.sin_addr, + sizeof(struct in_addr)); + if (ifa_ifwithaddr((SA)&ipaddr) == 0) + continue; + ipt->ipt_ptr += sizeof(struct in_addr); + break; + + default: + goto bad; + } + ntime = iptime(); + bcopy((caddr_t)&ntime, (caddr_t)cp + ipt->ipt_ptr - 1, + sizeof(n_time)); + ipt->ipt_ptr += sizeof(n_time); + } + } + if (forward && ipforwarding) { + ip_forward(m, 1); + return (1); + } + return (0); +bad: + ip->ip_len -= ip->ip_hl << 2; /* XXX icmp_error adds in hdr length */ + HTONS(ip->ip_len); /* XXX because ip_input changed these three */ + HTONS(ip->ip_id); + HTONS(ip->ip_off); + icmp_error(m, type, code, 0, 0); + ipstat.ips_badoptions++; + return (1); +} + +/* + * Given address of next destination (final or next hop), + * return internet address info of interface to be used to get there. + */ +struct in_ifaddr * +ip_rtaddr(dst) + struct in_addr dst; +{ + register struct sockaddr_in *sin; + + sin = satosin(&ipforward_rt.ro_dst); + + if (ipforward_rt.ro_rt == 0 || dst.s_addr != sin->sin_addr.s_addr) { + if (ipforward_rt.ro_rt) { + RTFREE(ipforward_rt.ro_rt); + ipforward_rt.ro_rt = 0; + } + sin->sin_family = AF_INET; + sin->sin_len = sizeof(*sin); + sin->sin_addr = dst; + + rtalloc(&ipforward_rt); + } + if (ipforward_rt.ro_rt == 0) + return ((struct in_ifaddr *)0); + return (ifatoia(ipforward_rt.ro_rt->rt_ifa)); +} + +/* + * Save incoming source route for use in replies, + * to be picked up later by ip_srcroute if the receiver is interested. + */ +void +save_rte(option, dst) + u_char *option; + struct in_addr dst; +{ + unsigned olen; + + olen = option[IPOPT_OLEN]; +#ifdef DIAGNOSTIC + if (ipprintfs) + printf("save_rte: olen %d\n", olen); +#endif /* 0 */ + if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst))) + return; + bcopy((caddr_t)option, (caddr_t)ip_srcrt.srcopt, olen); + ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr); + ip_srcrt.dst = dst; +} + +/* + * Check whether we do proxy ARP for this address and we point to ourselves. + * Code shamelessly copied from arplookup(). + */ +static int +ip_weadvertise(addr) + u_int32_t addr; +{ + register struct rtentry *rt; + register struct ifnet *ifp; + register struct ifaddr *ifa; + struct sockaddr_inarp sin; + + sin.sin_len = sizeof(sin); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = addr; + sin.sin_other = SIN_PROXY; + rt = rtalloc1(sintosa(&sin), 0); + if (rt == 0) + return 0; + + RTFREE(rt); + + if ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 || + rt->rt_gateway->sa_family != AF_LINK) + return 0; + + for (ifp = ifnet.tqh_first; ifp != 0; ifp = ifp->if_list.tqe_next) + for (ifa = ifp->if_addrlist.tqh_first; ifa != 0; + ifa = ifa->ifa_list.tqe_next) { + if (ifa->ifa_addr->sa_family != rt->rt_gateway->sa_family) + continue; + + if (!bcmp(LLADDR((struct sockaddr_dl *)ifa->ifa_addr), + LLADDR((struct sockaddr_dl *)rt->rt_gateway), + ETHER_ADDR_LEN)) + return 1; + } + + return 0; +} + +/* + * Retrieve incoming source route for use in replies, + * in the same form used by setsockopt. + * The first hop is placed before the options, will be removed later. + */ +struct mbuf * +ip_srcroute() +{ + register struct in_addr *p, *q; + register struct mbuf *m; + + if (ip_nhops == 0) + return ((struct mbuf *)0); + m = m_get(M_DONTWAIT, MT_SOOPTS); + if (m == 0) + return ((struct mbuf *)0); + +#define OPTSIZ (sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt)) + + /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */ + m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) + + OPTSIZ; +#ifdef DIAGNOSTIC + if (ipprintfs) + printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len); +#endif + + /* + * First save first hop for return route + */ + p = &ip_srcrt.route[ip_nhops - 1]; + *(mtod(m, struct in_addr *)) = *p--; +#ifdef DIAGNOSTIC + if (ipprintfs) + printf(" hops %x", ntohl(mtod(m, struct in_addr *)->s_addr)); +#endif + + /* + * Copy option fields and padding (nop) to mbuf. + */ + ip_srcrt.nop = IPOPT_NOP; + ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF; + bcopy((caddr_t)&ip_srcrt.nop, + mtod(m, caddr_t) + sizeof(struct in_addr), OPTSIZ); + q = (struct in_addr *)(mtod(m, caddr_t) + + sizeof(struct in_addr) + OPTSIZ); +#undef OPTSIZ + /* + * Record return path as an IP source route, + * reversing the path (pointers are now aligned). + */ + while (p >= ip_srcrt.route) { +#ifdef DIAGNOSTIC + if (ipprintfs) + printf(" %x", ntohl(q->s_addr)); +#endif + *q++ = *p--; + } + /* + * Last hop goes to final destination. + */ + *q = ip_srcrt.dst; +#ifdef DIAGNOSTIC + if (ipprintfs) + printf(" %x\n", ntohl(q->s_addr)); +#endif + return (m); +} + +/* + * Strip out IP options, at higher + * level protocol in the kernel. + * Second argument is buffer to which options + * will be moved, and return value is their length. + * XXX should be deleted; last arg currently ignored. + */ +void +ip_stripoptions(m, mopt) + register struct mbuf *m; + struct mbuf *mopt; +{ + register int i; + struct ip *ip = mtod(m, struct ip *); + register caddr_t opts; + int olen; + + olen = (ip->ip_hl<<2) - sizeof (struct ip); + opts = (caddr_t)(ip + 1); + i = m->m_len - (sizeof (struct ip) + olen); + bcopy(opts + olen, opts, (unsigned)i); + m->m_len -= olen; + if (m->m_flags & M_PKTHDR) + m->m_pkthdr.len -= olen; + ip->ip_hl = sizeof(struct ip) >> 2; +} + +int inetctlerrmap[PRC_NCMDS] = { + 0, 0, 0, 0, + 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, + EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, + EMSGSIZE, EHOSTUNREACH, 0, 0, + 0, 0, 0, 0, + ENOPROTOOPT +}; + +/* + * Forward a packet. If some error occurs return the sender + * an icmp packet. Note we can't always generate a meaningful + * icmp message because icmp doesn't have a large enough repertoire + * of codes and types. + * + * If not forwarding, just drop the packet. This could be confusing + * if ipforwarding was zero but some routing protocol was advancing + * us as a gateway to somewhere. However, we must let the routing + * protocol deal with that. + * + * The srcrt parameter indicates whether the packet is being forwarded + * via a source route. + */ +void +ip_forward(m, srcrt) + struct mbuf *m; + int srcrt; +{ + register struct ip *ip = mtod(m, struct ip *); + register struct sockaddr_in *sin; + register struct rtentry *rt; + int error, type = 0, code = 0; + struct mbuf *mcopy; + n_long dest; + struct ifnet *destifp; +#if 0 /*KAME IPSEC*/ + struct ifnet dummyifp; +#endif + + dest = 0; +#ifdef DIAGNOSTIC + if (ipprintfs) + printf("forward: src %x dst %x ttl %x\n", ip->ip_src.s_addr, + ip->ip_dst.s_addr, ip->ip_ttl); +#endif + if (m->m_flags & M_BCAST || in_canforward(ip->ip_dst) == 0) { + ipstat.ips_cantforward++; + m_freem(m); + return; + } + HTONS(ip->ip_id); + if (ip->ip_ttl <= IPTTLDEC) { + icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0); + return; + } + ip->ip_ttl -= IPTTLDEC; + + sin = satosin(&ipforward_rt.ro_dst); + if ((rt = ipforward_rt.ro_rt) == 0 || + ip->ip_dst.s_addr != sin->sin_addr.s_addr) { + if (ipforward_rt.ro_rt) { + RTFREE(ipforward_rt.ro_rt); + ipforward_rt.ro_rt = 0; + } + sin->sin_family = AF_INET; + sin->sin_len = sizeof(*sin); + sin->sin_addr = ip->ip_dst; + + rtalloc(&ipforward_rt); + if (ipforward_rt.ro_rt == 0) { + icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0); + return; + } + rt = ipforward_rt.ro_rt; + } + + /* + * Save at most 68 bytes of the packet in case + * we need to generate an ICMP message to the src. + */ + mcopy = m_copy(m, 0, imin((int)ip->ip_len, 68)); + + /* + * If forwarding packet using same interface that it came in on, + * perhaps should send a redirect to sender to shortcut a hop. + * Only send redirect if source is sending directly to us, + * and if packet was not source routed (or has any options). + * Also, don't send redirect if forwarding using a default route + * or a route modified by a redirect. + * Don't send redirect if we advertise destination's arp address + * as ours (proxy arp). + */ + if (rt->rt_ifp == m->m_pkthdr.rcvif && + (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 && + satosin(rt_key(rt))->sin_addr.s_addr != 0 && + ipsendredirects && !srcrt && + !ip_weadvertise(satosin(rt_key(rt))->sin_addr.s_addr)) { + if (rt->rt_ifa && + (ip->ip_src.s_addr & ifatoia(rt->rt_ifa)->ia_subnetmask) == + ifatoia(rt->rt_ifa)->ia_subnet) { + if (rt->rt_flags & RTF_GATEWAY) + dest = satosin(rt->rt_gateway)->sin_addr.s_addr; + else + dest = ip->ip_dst.s_addr; + /* Router requirements says to only send host redirects */ + type = ICMP_REDIRECT; + code = ICMP_REDIRECT_HOST; +#ifdef DIAGNOSTIC + if (ipprintfs) + printf("redirect (%d) to %x\n", code, (u_int32_t)dest); +#endif + } + } + +#if 0 /*KAME IPSEC*/ + m->m_pkthdr.rcvif = NULL; +#endif /*IPSEC*/ + error = ip_output(m, (struct mbuf *)0, &ipforward_rt, + (IP_FORWARDING | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)), + 0, NULL, NULL); + if (error) + ipstat.ips_cantforward++; + else { + ipstat.ips_forward++; + if (type) + ipstat.ips_redirectsent++; + else { + if (mcopy) + m_freem(mcopy); + return; + } + } + if (mcopy == NULL) + return; + destifp = NULL; + + switch (error) { + + case 0: /* forwarded, but need redirect */ + /* type, code set above */ + break; + + case ENETUNREACH: /* shouldn't happen, checked above */ + case EHOSTUNREACH: + case ENETDOWN: + case EHOSTDOWN: + default: + type = ICMP_UNREACH; + code = ICMP_UNREACH_HOST; + break; + + case EMSGSIZE: + type = ICMP_UNREACH; + code = ICMP_UNREACH_NEEDFRAG; +#if 1 /*KAME IPSEC*/ + if (ipforward_rt.ro_rt) + destifp = ipforward_rt.ro_rt->rt_ifp; +#else + /* + * If the packet is routed over IPsec tunnel, tell the + * originator the tunnel MTU. + * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz + * XXX quickhack!!! + */ + if (ipforward_rt.ro_rt) { + struct secpolicy *sp; + int ipsecerror; + int ipsechdr; + struct route *ro; + + sp = ipsec4_getpolicybyaddr(mcopy, + IP_FORWARDING, + &ipsecerror); + + if (sp == NULL) + destifp = ipforward_rt.ro_rt->rt_ifp; + else { + /* count IPsec header size */ + ipsechdr = ipsec4_hdrsiz(mcopy, NULL); + + /* + * find the correct route for outer IPv4 + * header, compute tunnel MTU. + * + * XXX BUG ALERT + * The "dummyifp" code relies upon the fact + * that icmp_error() touches only ifp->if_mtu. + */ + /*XXX*/ + destifp = NULL; + if (sp->req != NULL + && sp->req->sa != NULL) { + ro = &sp->req->sa->saidx->sa_route; + if (ro->ro_rt && ro->ro_rt->rt_ifp) { + dummyifp.if_mtu = + ro->ro_rt->rt_ifp->if_mtu; + dummyifp.if_mtu -= ipsechdr; + destifp = &dummyifp; + } + } + + key_freesp(sp); + } + } +#endif /*IPSEC*/ + ipstat.ips_cantfrag++; + break; + + case ENOBUFS: + type = ICMP_SOURCEQUENCH; + code = 0; + break; + } + + icmp_error(mcopy, type, code, dest, destifp); +} + +#ifdef CYGPKG_NET_SYSCTL +int +ip_sysctl(name, namelen, oldp, oldlenp, newp, newlen) + int *name; + u_int namelen; + void *oldp; + size_t *oldlenp; + void *newp; + size_t newlen; +{ + /* All sysctl names at this level are terminal. */ + if (namelen != 1) + return (ENOTDIR); + + switch (name[0]) { + case IPCTL_FORWARDING: + return (sysctl_int(oldp, oldlenp, newp, newlen, &ipforwarding)); + case IPCTL_SENDREDIRECTS: + return (sysctl_int(oldp, oldlenp, newp, newlen, + &ipsendredirects)); + case IPCTL_DEFTTL: + return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_defttl)); +#ifdef notyet + case IPCTL_DEFMTU: + return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_mtu)); +#endif + case IPCTL_SOURCEROUTE: + /* + * Don't allow this to change in a secure environment. + */ + if (newp && securelevel > 0) + return (EPERM); + return (sysctl_int(oldp, oldlenp, newp, newlen, + &ip_dosourceroute)); + case IPCTL_DIRECTEDBCAST: + return (sysctl_int(oldp, oldlenp, newp, newlen, + &ip_directedbcast)); + case IPCTL_IPPORT_FIRSTAUTO: + return (sysctl_int(oldp, oldlenp, newp, newlen, + &ipport_firstauto)); + case IPCTL_IPPORT_LASTAUTO: + return (sysctl_int(oldp, oldlenp, newp, newlen, + &ipport_lastauto)); + case IPCTL_IPPORT_HIFIRSTAUTO: + return (sysctl_int(oldp, oldlenp, newp, newlen, + &ipport_hifirstauto)); + case IPCTL_IPPORT_HILASTAUTO: + return (sysctl_int(oldp, oldlenp, newp, newlen, + &ipport_hilastauto)); + case IPCTL_IPPORT_MAXQUEUE: + return (sysctl_int(oldp, oldlenp, newp, newlen, + &ip_maxqueue)); + case IPCTL_ENCDEBUG: + return (sysctl_int(oldp, oldlenp, newp, newlen, &encdebug)); + default: + return (EOPNOTSUPP); + } + /* NOTREACHED */ +} +#endif // CYGPKG_NET_SYSCTL diff --git a/ecos/packages/net/tcpip/current/src/sys/netinet/ip_output.c b/ecos/packages/net/tcpip/current/src/sys/netinet/ip_output.c new file mode 100644 index 0000000..40f125a --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/netinet/ip_output.c @@ -0,0 +1,1757 @@ +//========================================================================== +// +// sys/netinet/ip_output.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: ip_output.c,v 1.57 1999/12/10 08:55:23 angelos Exp $ */ +/* $NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $ */ + +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 + */ + +#include <sys/param.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/errno.h> +#include <sys/protosw.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#ifndef __ECOS +#include <sys/systm.h> +#endif +#include <sys/kernel.h> +#ifndef __ECOS +#include <sys/proc.h> + +#include <vm/vm.h> +#include <sys/proc.h> +#endif + +#include <net/if.h> +#include <net/route.h> + +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/in_pcb.h> +#include <netinet/in_var.h> +#include <netinet/ip_var.h> + +#ifdef vax +#include <machine/mtpr.h> +#endif + +#include <machine/stdarg.h> + +#ifdef IPSEC +#include <netinet/ip_ah.h> +#include <netinet/ip_esp.h> +#include <netinet/udp.h> +#include <netinet/tcp.h> +#include <net/pfkeyv2.h> + +#ifdef ENCDEBUG +#define DPRINTF(x) do { if (encdebug) printf x ; } while (0) +#else +#define DPRINTF(x) +#endif + +#ifndef offsetof +#define offsetof(s, e) ((int)&((s *)0)->e) +#endif + +extern u_int8_t get_sa_require __P((struct inpcb *)); + +#endif /* IPSEC */ + +static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *)); +static void ip_mloopback + __P((struct ifnet *, struct mbuf *, struct sockaddr_in *)); +#if defined(IPFILTER) || defined(IPFILTER_LKM) +int (*fr_checkp) __P((struct ip *, int, struct ifnet *, int, struct mbuf **)); +#endif + +#ifdef IPSEC +extern int ipsec_auth_default_level; +extern int ipsec_esp_trans_default_level; +extern int ipsec_esp_network_default_level; + +extern int pfkeyv2_acquire(struct tdb *, int); +#endif + +#ifndef RAMDOM_IP_ID +u_short ip_id; +#endif + +/* + * IP output. The packet in mbuf chain m contains a skeletal IP + * header (with len, off, ttl, proto, tos, src, dst). + * The mbuf chain containing the packet will be freed. + * The mbuf opt, if present, will not be freed. + */ +int +#if __STDC__ +ip_output(struct mbuf *m0, ...) +#else +ip_output(m0, va_alist) + struct mbuf *m0; + va_dcl +#endif +{ + register struct ip *ip, *mhip; + register struct ifnet *ifp; + register struct mbuf *m = m0; + register int hlen = sizeof (struct ip); + int len, off, error = 0; + struct route iproute; + struct sockaddr_in *dst; + struct in_ifaddr *ia; + struct mbuf *opt; + struct route *ro; + int flags; + struct ip_moptions *imo; + va_list ap; +#ifdef IPSEC + union sockaddr_union sunion; + struct mbuf *mp; + struct udphdr *udp; + struct tcphdr *tcp; + struct inpcb *inp; + + struct route_enc re0, *re = &re0; + struct sockaddr_encap *ddst, *gw; + u_int8_t sa_require, sa_have = 0; + struct tdb *tdb, *t; + int s, ip6flag; + +#ifdef INET6 + struct ip6_hdr *ip6; +#endif /* INET6 */ +#endif /* IPSEC */ + + va_start(ap, m0); + opt = va_arg(ap, struct mbuf *); + ro = va_arg(ap, struct route *); + flags = va_arg(ap, int); + imo = va_arg(ap, struct ip_moptions *); +#ifdef IPSEC + inp = va_arg(ap, struct inpcb *); +#endif /* IPSEC */ + va_end(ap); + +#ifdef DIAGNOSTIC + if ((m->m_flags & M_PKTHDR) == 0) + panic("ip_output no HDR"); +#endif + if (opt) { + m = ip_insertoptions(m, opt, &len); + hlen = len; + } + ip = mtod(m, struct ip *); + /* + * Fill in IP header. + */ + if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { + ip->ip_v = IPVERSION; + ip->ip_off &= IP_DF; +#ifdef RANDOM_IP_ID + ip->ip_id = ip_randomid(); +#else + ip->ip_id = htons(ip_id++); +#endif + ip->ip_hl = hlen >> 2; + ipstat.ips_localout++; + } else { + hlen = ip->ip_hl << 2; + } + + /* + * Route packet. + */ + if (ro == 0) { + ro = &iproute; + bzero((caddr_t)ro, sizeof (*ro)); + } + dst = satosin(&ro->ro_dst); + /* + * If there is a cached route, + * check that it is to the same destination + * and is still up. If not, free it and try again. + */ + if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || + dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { + RTFREE(ro->ro_rt); + ro->ro_rt = (struct rtentry *)0; + } + if (ro->ro_rt == 0) { + dst->sin_family = AF_INET; + dst->sin_len = sizeof(*dst); + dst->sin_addr = ip->ip_dst; + } + /* + * If routing to interface only, + * short circuit routing lookup. + */ + if (flags & IP_ROUTETOIF) { + if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 && + (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { + ipstat.ips_noroute++; + error = ENETUNREACH; + goto bad; + } + ifp = ia->ia_ifp; + ip->ip_ttl = 1; + } else { + if (ro->ro_rt == 0) + rtalloc(ro); + if (ro->ro_rt == 0) { + ipstat.ips_noroute++; + error = EHOSTUNREACH; + goto bad; + } + ia = ifatoia(ro->ro_rt->rt_ifa); + ifp = ro->ro_rt->rt_ifp; + ro->ro_rt->rt_use++; + if (ro->ro_rt->rt_flags & RTF_GATEWAY) + dst = satosin(ro->ro_rt->rt_gateway); + } + if (IN_MULTICAST(ip->ip_dst.s_addr)) { + struct in_multi *inm; + + m->m_flags |= M_MCAST; + /* + * IP destination address is multicast. Make sure "dst" + * still points to the address in "ro". (It may have been + * changed to point to a gateway address, above.) + */ + dst = satosin(&ro->ro_dst); + /* + * See if the caller provided any multicast options + */ + if (imo != NULL) { + ip->ip_ttl = imo->imo_multicast_ttl; + if (imo->imo_multicast_ifp != NULL) + ifp = imo->imo_multicast_ifp; + } else + ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; + /* + * Confirm that the outgoing interface supports multicast. + */ + if ((ifp->if_flags & IFF_MULTICAST) == 0) { + ipstat.ips_noroute++; + error = ENETUNREACH; + goto bad; + } + /* + * If source address not specified yet, use address + * of outgoing interface. + */ + if (ip->ip_src.s_addr == INADDR_ANY) { + register struct in_ifaddr *ia; + + for (ia = in_ifaddr.tqh_first; ia; ia = ia->ia_list.tqe_next) + if (ia->ia_ifp == ifp) { + ip->ip_src = ia->ia_addr.sin_addr; + break; + } + } + + IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm); + if (inm != NULL && + (imo == NULL || imo->imo_multicast_loop)) { + /* + * If we belong to the destination multicast group + * on the outgoing interface, and the caller did not + * forbid loopback, loop back a copy. + */ + ip_mloopback(ifp, m, dst); + } +#ifdef MROUTING + else { + /* + * If we are acting as a multicast router, perform + * multicast forwarding as if the packet had just + * arrived on the interface to which we are about + * to send. The multicast forwarding function + * recursively calls this function, using the + * IP_FORWARDING flag to prevent infinite recursion. + * + * Multicasts that are looped back by ip_mloopback(), + * above, will be forwarded by the ip_input() routine, + * if necessary. + */ + extern struct socket *ip_mrouter; + + if (ip_mrouter && (flags & IP_FORWARDING) == 0) { + if (ip_mforward(m, ifp) != 0) { + m_freem(m); + goto done; + } + } + } +#endif + /* + * Multicasts with a time-to-live of zero may be looped- + * back, above, but must not be transmitted on a network. + * Also, multicasts addressed to the loopback interface + * are not sent -- the above call to ip_mloopback() will + * loop back a copy if this host actually belongs to the + * destination group on the loopback interface. + */ + if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) { + m_freem(m); + goto done; + } + + goto sendit; + } +#ifndef notdef + /* + * If source address not specified yet, use address + * of outgoing interface. + */ + if (ip->ip_src.s_addr == INADDR_ANY) + ip->ip_src = ia->ia_addr.sin_addr; +#endif + /* + * Look for broadcast address and + * and verify user is allowed to send + * such a packet. + */ + if (in_broadcast(dst->sin_addr, ifp)) { + if ((ifp->if_flags & IFF_BROADCAST) == 0) { + error = EADDRNOTAVAIL; + goto bad; + } + if ((flags & IP_ALLOWBROADCAST) == 0) { + error = EACCES; + goto bad; + } + /* don't allow broadcast messages to be fragmented */ + if ((u_int16_t)ip->ip_len > ifp->if_mtu) { + error = EMSGSIZE; + goto bad; + } + m->m_flags |= M_BCAST; + } else + m->m_flags &= ~M_BCAST; + +sendit: +#ifdef IPSEC + /* + * Check if the packet needs encapsulation. + */ + if (!(flags & IP_ENCAPSULATED) && + (inp == NULL || + inp->inp_seclevel[SL_AUTH] != IPSEC_LEVEL_BYPASS || + inp->inp_seclevel[SL_ESP_TRANS] != IPSEC_LEVEL_BYPASS || + inp->inp_seclevel[SL_ESP_NETWORK] != IPSEC_LEVEL_BYPASS)) { + if (inp == NULL) + sa_require = get_sa_require(inp); + else + sa_require = inp->inp_secrequire; + + bzero((caddr_t) re, sizeof(*re)); + + /* + * splnet is chosen over spltdb because we are not allowed to + * lower the level, and udp_output calls us in splnet(). + */ + s = splnet(); + + /* + * Check if there was an outgoing SA bound to the flow + * from a transport protocol. + */ + if (inp && inp->inp_tdb && + (inp->inp_tdb->tdb_dst.sin.sin_addr.s_addr == INADDR_ANY || + !bcmp(&inp->inp_tdb->tdb_dst.sin.sin_addr, + &ip->ip_dst, sizeof(ip->ip_dst)))) { + tdb = inp->inp_tdb; + goto have_tdb; + } + + if (!ipsec_in_use) { + splx(s); + goto no_encap; + } + + ddst = (struct sockaddr_encap *) &re->re_dst; + ddst->sen_family = PF_KEY; + ddst->sen_len = SENT_IP4_LEN; + ddst->sen_type = SENT_IP4; + ddst->sen_ip_src = ip->ip_src; + ddst->sen_ip_dst = ip->ip_dst; + ddst->sen_proto = ip->ip_p; + + switch (ip->ip_p) { + case IPPROTO_UDP: + if (m->m_len < hlen + 2 * sizeof(u_int16_t)) { + if ((m = m_pullup(m, hlen + 2 * + sizeof(u_int16_t))) == 0) + return ENOBUFS; + ip = mtod(m, struct ip *); + } + udp = (struct udphdr *) (mtod(m, u_char *) + hlen); + ddst->sen_sport = ntohs(udp->uh_sport); + ddst->sen_dport = ntohs(udp->uh_dport); + break; + + case IPPROTO_TCP: + if (m->m_len < hlen + 2 * sizeof(u_int16_t)) { + if ((m = m_pullup(m, hlen + 2 * + sizeof(u_int16_t))) == 0) + return ENOBUFS; + ip = mtod(m, struct ip *); + } + tcp = (struct tcphdr *) (mtod(m, u_char *) + hlen); + ddst->sen_sport = ntohs(tcp->th_sport); + ddst->sen_dport = ntohs(tcp->th_dport); + break; + + default: + ddst->sen_sport = 0; + ddst->sen_dport = 0; + } + + rtalloc((struct route *) re); + if (re->re_rt == NULL) { + splx(s); + goto no_encap; + } + + gw = (struct sockaddr_encap *) (re->re_rt->rt_gateway); + + /* Sanity check */ + if (gw == NULL || ((gw->sen_type != SENT_IPSP) && + (gw->sen_type != SENT_IPSP6))) { + splx(s); + DPRINTF(("ip_output(): no gw or gw data not IPSP\n")); + + if (re->re_rt) + RTFREE(re->re_rt); + error = EHOSTUNREACH; + m_freem(m); + goto done; + } + + /* + * There might be a specific route, that tells us to avoid + * doing IPsec; this is useful for specific routes that we + * don't want to have IPsec applied on, like the key + * management ports. + */ + + if ((gw != NULL) && (gw->sen_ipsp_sproto == 0) && + (gw->sen_ipsp_spi == 0)) { + if ((gw->sen_family == AF_INET) && + (gw->sen_ipsp_dst.s_addr == 0)) { + splx(s); + goto no_encap; + } + +#ifdef INET6 + if ((gw->sen_family == AF_INET6) && + IN6_IS_ADDR_UNSPECIFIED(&gw->sen_ipsp6_dst)) { + splx(s); + goto no_encap; + } +#endif /* INET6 */ + } + + /* + * At this point we have an IPSP "gateway" (tunnel) spec. + * Use the destination of the tunnel and the SPI to + * look up the necessary Tunnel Control Block. Look it up, + * and then pass it, along with the packet and the gw, + * to the appropriate transformation. + */ + bzero(&sunion, sizeof(sunion)); + + if (gw->sen_type == SENT_IPSP) { + sunion.sin.sin_family = AF_INET; + sunion.sin.sin_len = sizeof(struct sockaddr_in); + sunion.sin.sin_addr = gw->sen_ipsp_dst; + } +#ifdef INET6 + if (gw->sen_type == SENT_IPSP6) { + sunion.sin6.sin6_family = AF_INET6; + sunion.sin6.sin6_len = sizeof(struct sockaddr_in6); + sunion.sin6.sin6_addr = gw->sen_ipsp6_dst; + } +#endif /* INET6 */ + + tdb = (struct tdb *) gettdb(gw->sen_ipsp_spi, &sunion, + gw->sen_ipsp_sproto); + + /* + * For VPNs a route with a reserved SPI is used to + * indicate the need for an SA when none is established. + */ + if (((ntohl(gw->sen_ipsp_spi) == SPI_LOCAL_USE) && + (gw->sen_type == SENT_IPSP)) || + ((ntohl(gw->sen_ipsp6_spi) == SPI_LOCAL_USE) && + (gw->sen_type == SENT_IPSP6))) { + if (tdb == NULL) { + /* + * XXX We should construct a TDB from system + * default (which should be tunable via sysctl). + * For now, drop packet and ignore SPD entry. + */ + splx(s); + goto no_encap; + } + else { + if (tdb->tdb_authalgxform) + sa_require = NOTIFY_SATYPE_AUTH; + if (tdb->tdb_encalgxform) + sa_require |= NOTIFY_SATYPE_CONF; + if (tdb->tdb_flags & TDBF_TUNNELING) + sa_require |= NOTIFY_SATYPE_TUNNEL; + } + + /* PF_KEYv2 notification message */ + if (tdb && tdb->tdb_satype != SADB_X_SATYPE_BYPASS) + if ((error = pfkeyv2_acquire(tdb, 0)) != 0) + return error; + + splx(s); + + /* + * When sa_require is set, the packet will be dropped + * at no_encap. + */ + goto no_encap; + } + + have_tdb: + + ip->ip_len = htons((u_short) ip->ip_len); + ip->ip_off = htons((u_short) ip->ip_off); + ip->ip_sum = 0; + + /* + * Now we check if this tdb has all the transforms which + * are requried by the socket or our default policy. + */ + SPI_CHAIN_ATTRIB(sa_have, tdb_onext, tdb); + + if (sa_require & ~sa_have) + goto no_encap; + + if (tdb == NULL) { + splx(s); + if (gw->sen_type == SENT_IPSP) + DPRINTF(("ip_output(): non-existant TDB for SA %s/%08x/%u\n", inet_ntoa4(gw->sen_ipsp_dst), ntohl(gw->sen_ipsp_spi), gw->sen_ipsp_sproto)); +#ifdef INET6 + else + DPRINTF(("ip_output(): non-existant TDB for SA %s/%08x/%u\n", inet6_ntoa4(gw->sen_ipsp6_dst), ntohl(gw->sen_ipsp6_spi), gw->sen_ipsp6_sproto)); +#endif /* INET6 */ + + if (re->re_rt) + RTFREE(re->re_rt); + error = EHOSTUNREACH; + m_freem(m); + goto done; + } + + for (t = tdb; t != NULL; t = t->tdb_onext) + if ((t->tdb_sproto == IPPROTO_ESP && !esp_enable) || + (t->tdb_sproto == IPPROTO_AH && !ah_enable)) { + DPRINTF(("ip_output(): IPSec outbound packet dropped due to policy\n")); + + if (re->re_rt) + RTFREE(re->re_rt); + error = EHOSTUNREACH; + m_freem(m); + goto done; + } + + while (tdb && tdb->tdb_xform) { + /* Check if the SPI is invalid */ + if (tdb->tdb_flags & TDBF_INVALID) { + splx(s); + DPRINTF(("ip_output(): attempt to use invalid SA %s/%08x/%u\n", ipsp_address(tdb->tdb_dst), ntohl(tdb->tdb_spi), tdb->tdb_sproto)); + m_freem(m); + if (re->re_rt) + RTFREE(re->re_rt); + return ENXIO; + } + +#ifndef INET6 + /* Sanity check */ + if (tdb->tdb_dst.sa.sa_family != AF_INET) { + splx(s); + DPRINTF(("ip_output(): attempt to use SA %s/%08x/%u for protocol family %d\n", ipsp_address(tdb->tdb_dst), ntohl(tdb->tdb_spi), tdb->tdb_sproto, tdb->tdb_dst.sa.sa_family)); + m_freem(m); + if (re->re_rt) + RTFREE(re->re_rt); + return ENXIO; + } +#endif /* INET6 */ + + /* Register first use, setup expiration timer */ + if (tdb->tdb_first_use == 0) { + tdb->tdb_first_use = time.tv_sec; + tdb_expiration(tdb, TDBEXP_TIMEOUT); + } + + /* Check for tunneling */ + if (((tdb->tdb_dst.sa.sa_family == AF_INET) && + (tdb->tdb_dst.sin.sin_addr.s_addr != + INADDR_ANY) && + (tdb->tdb_dst.sin.sin_addr.s_addr != + ip->ip_dst.s_addr)) || + (tdb->tdb_dst.sa.sa_family == AF_INET6) || + ((tdb->tdb_flags & TDBF_TUNNELING) && + (tdb->tdb_xform->xf_type != XF_IP4))) { + /* Fix length and checksum */ + ip->ip_len = htons(m->m_pkthdr.len); + ip->ip_sum = in_cksum(m, ip->ip_hl << 2); + error = ipe4_output(m, tdb, &mp, + ip->ip_hl << 2, + offsetof(struct ip, ip_p)); + if (mp == NULL) + error = EFAULT; + if (error) { + splx(s); + if (re->re_rt) + RTFREE(re->re_rt); + return error; + } + if (tdb->tdb_dst.sa.sa_family == AF_INET) + ip6flag = 0; +#ifdef INET6 + if (tdb->tdb_dst.sa.sa_family == AF_INET6) + ip6flag = 1; +#endif /* INET6 */ + m = mp; + mp = NULL; + } + + if ((tdb->tdb_xform->xf_type == XF_IP4) && + (tdb->tdb_dst.sa.sa_family == AF_INET)) { + ip = mtod(m, struct ip *); + ip->ip_len = htons(m->m_pkthdr.len); + ip->ip_sum = in_cksum(m, ip->ip_hl << 2); + } + +#ifdef INET6 + if ((tdb->tdb_xform->xf_type == XF_IP4) && + (tdb->tdb_dst.sa.sa_family == AF_INET6)) { + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_plen = htons(m->m_pkthdr.len); + } +#endif /* INET6 */ + +#ifdef INET6 + /* + * This assumes that there is only just an IPv6 + * header prepended. + */ + if (ip6flag) + error = (*(tdb->tdb_xform->xf_output))(m, tdb, &mp, sizeof(struct ip6_hdr), offsetof(struct ip6_hdr, ip6_nxt)); +#endif /* INET6 */ + + if (!ip6flag) + error = (*(tdb->tdb_xform->xf_output))(m, tdb, &mp, ip->ip_hl << 2, offsetof(struct ip, ip_p)); + if (!error && mp == NULL) + error = EFAULT; + if (error) { + splx(s); + if (mp != NULL) + m_freem(mp); + if (re->re_rt) + RTFREE(re->re_rt); + return error; + } + + m = mp; + mp = NULL; + + if (!ip6flag) { + ip = mtod(m, struct ip *); + ip->ip_len = htons(m->m_pkthdr.len); + } + +#ifdef INET6 + if (ip6flag) { + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_plen = htons(m->m_pkthdr.len); + } +#endif /* INET6 */ + tdb = tdb->tdb_onext; + } + splx(s); + + if (!ip6flag) + ip->ip_sum = in_cksum(m, ip->ip_hl << 2); + + /* + * At this point, m is pointing to an mbuf chain with the + * processed packet. Call ourselves recursively, but + * bypass the encap code. + */ + if (re->re_rt) + RTFREE(re->re_rt); + + if (!ip6flag) { + ip = mtod(m, struct ip *); + NTOHS(ip->ip_len); + NTOHS(ip->ip_off); + + return ip_output(m, NULL, NULL, + IP_ENCAPSULATED | IP_RAWOUTPUT, + NULL, NULL); + } + +#ifdef INET6 + if (ip6flag) { + ip6 = mtod(m, struct ip6_hdr *); + NTOHS(ip6->ip6_plen); + + /* Naturally, ip6_output() has to honor those two flags */ + return ip6_output(m, NULL, NULL, + IP_ENCAPSULATED | IP_RAWOUTPUT, + NULL, NULL); + } +#endif /* INET6 */ + +no_encap: + /* This is for possible future use, don't move or delete */ + if (re->re_rt) + RTFREE(re->re_rt); + /* No IPSec processing though it was required, drop packet */ + if (sa_require) { + error = EHOSTUNREACH; + m_freem(m); + goto done; + } + } +#endif /* IPSEC */ + +#if defined(IPFILTER) || defined(IPFILTER_LKM) + /* + * looks like most checking has been done now...do a filter check + */ + { + struct mbuf *m0 = m; + if (fr_checkp && (*fr_checkp)(ip, hlen, ifp, 1, &m0)) { + error = EHOSTUNREACH; + goto done; + } else + ip = mtod(m = m0, struct ip *); + } +#endif + /* + * If small enough for interface, can just send directly. + */ + if ((u_int16_t)ip->ip_len <= ifp->if_mtu) { + ip->ip_len = htons((u_int16_t)ip->ip_len); + ip->ip_off = htons((u_int16_t)ip->ip_off); + ip->ip_sum = 0; + ip->ip_sum = in_cksum(m, hlen); + error = (*ifp->if_output)(ifp, m, sintosa(dst), ro->ro_rt); + goto done; + } + + /* + * Too large for interface; fragment if possible. + * Must be able to put at least 8 bytes per fragment. + */ +#if 0 + /* + * If IPsec packet is too big for the interface, try fragment it. + * XXX This really is a quickhack. May be inappropriate. + * XXX fails if somebody is sending AH'ed packet, with: + * sizeof(packet without AH) < mtu < sizeof(packet with AH) + */ + if (sab && ip->ip_p != IPPROTO_AH && (flags & IP_FORWARDING) == 0) + ip->ip_off &= ~IP_DF; +#endif /*IPSEC*/ + if (ip->ip_off & IP_DF) { + error = EMSGSIZE; + ipstat.ips_cantfrag++; + goto bad; + } + len = (ifp->if_mtu - hlen) &~ 7; + if (len < 8) { + error = EMSGSIZE; + goto bad; + } + + { + int mhlen, firstlen = len; + struct mbuf **mnext = &m->m_nextpkt; + + /* + * Loop through length of segment after first fragment, + * make new header and copy data of each part and link onto chain. + */ + m0 = m; + mhlen = sizeof (struct ip); + for (off = hlen + len; off < (u_int16_t)ip->ip_len; off += len) { + MGETHDR(m, M_DONTWAIT, MT_HEADER); + if (m == 0) { + error = ENOBUFS; + ipstat.ips_odropped++; + goto sendorfree; + } + *mnext = m; + mnext = &m->m_nextpkt; + m->m_data += max_linkhdr; + mhip = mtod(m, struct ip *); + *mhip = *ip; + if (hlen > sizeof (struct ip)) { + mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); + mhip->ip_hl = mhlen >> 2; + } + m->m_len = mhlen; + mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF); + if (ip->ip_off & IP_MF) + mhip->ip_off |= IP_MF; + if (off + len >= (u_int16_t)ip->ip_len) + len = (u_int16_t)ip->ip_len - off; + else + mhip->ip_off |= IP_MF; + mhip->ip_len = htons((u_int16_t)(len + mhlen)); + m->m_next = m_copy(m0, off, len); + if (m->m_next == 0) { + error = ENOBUFS; /* ??? */ + ipstat.ips_odropped++; + goto sendorfree; + } + m->m_pkthdr.len = mhlen + len; + m->m_pkthdr.rcvif = (struct ifnet *)0; + mhip->ip_off = htons((u_int16_t)mhip->ip_off); + mhip->ip_sum = 0; + mhip->ip_sum = in_cksum(m, mhlen); + ipstat.ips_ofragments++; + } + /* + * Update first fragment by trimming what's been copied out + * and updating header, then send each fragment (in order). + */ + m = m0; + m_adj(m, hlen + firstlen - (u_int16_t)ip->ip_len); + m->m_pkthdr.len = hlen + firstlen; + ip->ip_len = htons((u_int16_t)m->m_pkthdr.len); + ip->ip_off = htons((u_int16_t)(ip->ip_off | IP_MF)); + ip->ip_sum = 0; + ip->ip_sum = in_cksum(m, hlen); +sendorfree: + for (m = m0; m; m = m0) { + m0 = m->m_nextpkt; + m->m_nextpkt = 0; + if (error == 0) + error = (*ifp->if_output)(ifp, m, sintosa(dst), + ro->ro_rt); + else + m_freem(m); + } + + if (error == 0) + ipstat.ips_fragmented++; + } +done: + if (ro == &iproute && (flags & IP_ROUTETOIF) == 0 && ro->ro_rt) + RTFREE(ro->ro_rt); + return (error); +bad: + m_freem(m0); + goto done; +} + +/* + * Insert IP options into preformed packet. + * Adjust IP destination as required for IP source routing, + * as indicated by a non-zero in_addr at the start of the options. + */ +static struct mbuf * +ip_insertoptions(m, opt, phlen) + register struct mbuf *m; + struct mbuf *opt; + int *phlen; +{ + register struct ipoption *p = mtod(opt, struct ipoption *); + struct mbuf *n; + register struct ip *ip = mtod(m, struct ip *); + unsigned optlen; + + optlen = opt->m_len - sizeof(p->ipopt_dst); + if (optlen + (u_int16_t)ip->ip_len > IP_MAXPACKET) + return (m); /* XXX should fail */ + if (p->ipopt_dst.s_addr) + ip->ip_dst = p->ipopt_dst; + if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { + MGETHDR(n, M_DONTWAIT, MT_HEADER); + if (n == 0) + return (m); + n->m_pkthdr.len = m->m_pkthdr.len + optlen; + m->m_len -= sizeof(struct ip); + m->m_data += sizeof(struct ip); + n->m_next = m; + m = n; + m->m_len = optlen + sizeof(struct ip); + m->m_data += max_linkhdr; + bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); + } else { + m->m_data -= optlen; + m->m_len += optlen; + m->m_pkthdr.len += optlen; + ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); + } + ip = mtod(m, struct ip *); + bcopy((caddr_t)p->ipopt_list, (caddr_t)(ip + 1), (unsigned)optlen); + *phlen = sizeof(struct ip) + optlen; + ip->ip_len += optlen; + return (m); +} + +/* + * Copy options from ip to jp, + * omitting those not copied during fragmentation. + */ +int +ip_optcopy(ip, jp) + struct ip *ip, *jp; +{ + register u_char *cp, *dp; + int opt, optlen, cnt; + + cp = (u_char *)(ip + 1); + dp = (u_char *)(jp + 1); + cnt = (ip->ip_hl << 2) - sizeof (struct ip); + for (; cnt > 0; cnt -= optlen, cp += optlen) { + opt = cp[0]; + if (opt == IPOPT_EOL) + break; + if (opt == IPOPT_NOP) { + /* Preserve for IP mcast tunnel's LSRR alignment. */ + *dp++ = IPOPT_NOP; + optlen = 1; + continue; + } else + optlen = cp[IPOPT_OLEN]; + /* bogus lengths should have been caught by ip_dooptions */ + if (optlen > cnt) + optlen = cnt; + if (IPOPT_COPIED(opt)) { + bcopy((caddr_t)cp, (caddr_t)dp, (unsigned)optlen); + dp += optlen; + } + } + for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) + *dp++ = IPOPT_EOL; + return (optlen); +} + +/* + * IP socket option processing. + */ +int +ip_ctloutput(op, so, level, optname, mp) + int op; + struct socket *so; + int level, optname; + struct mbuf **mp; +{ + register struct inpcb *inp = sotoinpcb(so); + register struct mbuf *m = *mp; + register int optval = 0; +#ifdef IPSEC + struct proc *p = curproc; /* XXX */ + struct tdb *tdb; + struct tdb_ident *tdbip, tdbi; + int s; +#endif + int error = 0; + + if (level != IPPROTO_IP) { + error = EINVAL; + if (op == PRCO_SETOPT && *mp) + (void) m_free(*mp); + } else switch (op) { + + case PRCO_SETOPT: + switch (optname) { + case IP_OPTIONS: +#ifdef notyet + case IP_RETOPTS: + return (ip_pcbopts(optname, &inp->inp_options, m)); +#else + return (ip_pcbopts(&inp->inp_options, m)); +#endif + + case IP_TOS: + case IP_TTL: + case IP_RECVOPTS: + case IP_RECVRETOPTS: + case IP_RECVDSTADDR: + if (m == NULL || m->m_len != sizeof(int)) + error = EINVAL; + else { + optval = *mtod(m, int *); + switch (optname) { + + case IP_TOS: + inp->inp_ip.ip_tos = optval; + break; + + case IP_TTL: + inp->inp_ip.ip_ttl = optval; + break; +#define OPTSET(bit) \ + if (optval) \ + inp->inp_flags |= bit; \ + else \ + inp->inp_flags &= ~bit; + + case IP_RECVOPTS: + OPTSET(INP_RECVOPTS); + break; + + case IP_RECVRETOPTS: + OPTSET(INP_RECVRETOPTS); + break; + + case IP_RECVDSTADDR: + OPTSET(INP_RECVDSTADDR); + break; + } + } + break; +#undef OPTSET + + case IP_MULTICAST_IF: + case IP_MULTICAST_TTL: + case IP_MULTICAST_LOOP: + case IP_ADD_MEMBERSHIP: + case IP_DROP_MEMBERSHIP: + error = ip_setmoptions(optname, &inp->inp_moptions, m); + break; + + case IP_PORTRANGE: + if (m == 0 || m->m_len != sizeof(int)) + error = EINVAL; + else { + optval = *mtod(m, int *); + + switch (optval) { + + case IP_PORTRANGE_DEFAULT: + inp->inp_flags &= ~(INP_LOWPORT); + inp->inp_flags &= ~(INP_HIGHPORT); + break; + + case IP_PORTRANGE_HIGH: + inp->inp_flags &= ~(INP_LOWPORT); + inp->inp_flags |= INP_HIGHPORT; + break; + + case IP_PORTRANGE_LOW: + inp->inp_flags &= ~(INP_HIGHPORT); + inp->inp_flags |= INP_LOWPORT; + break; + + default: + + error = EINVAL; + break; + } + } + break; + case IPSEC_OUTSA: +#ifndef IPSEC + error = EINVAL; +#else + s = spltdb(); + if (m == 0 || m->m_len != sizeof(struct tdb_ident)) { + error = EINVAL; + } else { + tdbip = mtod(m, struct tdb_ident *); + tdb = gettdb(tdbip->spi, &tdbip->dst, + tdbip->proto); + if (tdb == NULL) + error = ESRCH; + else + tdb_add_inp(tdb, inp); + } + splx(s); +#endif /* IPSEC */ + break; + + case IP_AUTH_LEVEL: + case IP_ESP_TRANS_LEVEL: + case IP_ESP_NETWORK_LEVEL: +#ifndef IPSEC + error = EINVAL; +#else + if (m == 0 || m->m_len != sizeof(int)) { + error = EINVAL; + break; + } + optval = *mtod(m, u_char *); + + if (optval < IPSEC_LEVEL_BYPASS || + optval > IPSEC_LEVEL_UNIQUE) { + error = EINVAL; + break; + } + + switch (optname) { + case IP_AUTH_LEVEL: + if (optval < ipsec_auth_default_level && + suser(p->p_ucred, &p->p_acflag)) { + error = EACCES; + break; + } + inp->inp_seclevel[SL_AUTH] = optval; + break; + + case IP_ESP_TRANS_LEVEL: + if (optval < ipsec_esp_trans_default_level && + suser(p->p_ucred, &p->p_acflag)) { + error = EACCES; + break; + } + inp->inp_seclevel[SL_ESP_TRANS] = optval; + break; + + case IP_ESP_NETWORK_LEVEL: + if (optval < ipsec_esp_network_default_level && + suser(p->p_ucred, &p->p_acflag)) { + error = EACCES; + break; + } + inp->inp_seclevel[SL_ESP_NETWORK] = optval; + break; + } + if (!error) + inp->inp_secrequire = get_sa_require(inp); +#endif + break; + + default: + error = ENOPROTOOPT; + break; + } + if (m) + (void)m_free(m); + break; + + case PRCO_GETOPT: + switch (optname) { + case IP_OPTIONS: + case IP_RETOPTS: + *mp = m = m_get(M_WAIT, MT_SOOPTS); + if (inp->inp_options) { + m->m_len = inp->inp_options->m_len; + bcopy(mtod(inp->inp_options, caddr_t), + mtod(m, caddr_t), (unsigned)m->m_len); + } else + m->m_len = 0; + break; + + case IP_TOS: + case IP_TTL: + case IP_RECVOPTS: + case IP_RECVRETOPTS: + case IP_RECVDSTADDR: + *mp = m = m_get(M_WAIT, MT_SOOPTS); + m->m_len = sizeof(int); + switch (optname) { + + case IP_TOS: + optval = inp->inp_ip.ip_tos; + break; + + case IP_TTL: + optval = inp->inp_ip.ip_ttl; + break; + +#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) + + case IP_RECVOPTS: + optval = OPTBIT(INP_RECVOPTS); + break; + + case IP_RECVRETOPTS: + optval = OPTBIT(INP_RECVRETOPTS); + break; + + case IP_RECVDSTADDR: + optval = OPTBIT(INP_RECVDSTADDR); + break; + } + *mtod(m, int *) = optval; + break; + + case IP_MULTICAST_IF: + case IP_MULTICAST_TTL: + case IP_MULTICAST_LOOP: + case IP_ADD_MEMBERSHIP: + case IP_DROP_MEMBERSHIP: + error = ip_getmoptions(optname, inp->inp_moptions, mp); + break; + + case IP_PORTRANGE: + *mp = m = m_get(M_WAIT, MT_SOOPTS); + m->m_len = sizeof(int); + + if (inp->inp_flags & INP_HIGHPORT) + optval = IP_PORTRANGE_HIGH; + else if (inp->inp_flags & INP_LOWPORT) + optval = IP_PORTRANGE_LOW; + else + optval = 0; + + *mtod(m, int *) = optval; + break; + + case IPSEC_OUTSA: +#ifndef IPSEC + error = EINVAL; +#else + s = spltdb(); + if (inp->inp_tdb == NULL) { + error = ENOENT; + } else { + tdbi.spi = inp->inp_tdb->tdb_spi; + tdbi.dst = inp->inp_tdb->tdb_dst; + tdbi.proto = inp->inp_tdb->tdb_sproto; + *mp = m = m_get(M_WAIT, MT_SOOPTS); + m->m_len = sizeof(tdbi); + bcopy((caddr_t)&tdbi, mtod(m, caddr_t), + (unsigned)m->m_len); + } + splx(s); +#endif /* IPSEC */ + break; + + case IP_AUTH_LEVEL: + case IP_ESP_TRANS_LEVEL: + case IP_ESP_NETWORK_LEVEL: +#ifndef IPSEC + *mtod(m, int *) = IPSEC_LEVEL_NONE; +#else + switch (optname) { + case IP_AUTH_LEVEL: + optval = inp->inp_seclevel[SL_AUTH]; + break; + + case IP_ESP_TRANS_LEVEL: + optval = inp->inp_seclevel[SL_ESP_TRANS]; + break; + + case IP_ESP_NETWORK_LEVEL: + optval = inp->inp_seclevel[SL_ESP_NETWORK]; + break; + } + *mtod(m, int *) = optval; +#endif + break; + default: + error = ENOPROTOOPT; + break; + } + break; + } + return (error); +} + +/* + * Set up IP options in pcb for insertion in output packets. + * Store in mbuf with pointer in pcbopt, adding pseudo-option + * with destination address if source routed. + */ +int +#ifdef notyet +ip_pcbopts(optname, pcbopt, m) + int optname; +#else +ip_pcbopts(pcbopt, m) +#endif + struct mbuf **pcbopt; + register struct mbuf *m; +{ + register int cnt, optlen; + register u_char *cp; + u_char opt; + + /* turn off any old options */ + if (*pcbopt) + (void)m_free(*pcbopt); + *pcbopt = 0; + if (m == (struct mbuf *)0 || m->m_len == 0) { + /* + * Only turning off any previous options. + */ + if (m) + (void)m_free(m); + return (0); + } + +#ifndef vax + if (m->m_len % sizeof(int32_t)) + goto bad; +#endif + /* + * IP first-hop destination address will be stored before + * actual options; move other options back + * and clear it when none present. + */ + if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) + goto bad; + cnt = m->m_len; + m->m_len += sizeof(struct in_addr); + cp = mtod(m, u_char *) + sizeof(struct in_addr); + ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt); + bzero(mtod(m, caddr_t), sizeof(struct in_addr)); + + for (; cnt > 0; cnt -= optlen, cp += optlen) { + opt = cp[IPOPT_OPTVAL]; + if (opt == IPOPT_EOL) + break; + if (opt == IPOPT_NOP) + optlen = 1; + else { + optlen = cp[IPOPT_OLEN]; + if (optlen <= IPOPT_OLEN || optlen > cnt) + goto bad; + } + switch (opt) { + + default: + break; + + case IPOPT_LSRR: + case IPOPT_SSRR: + /* + * user process specifies route as: + * ->A->B->C->D + * D must be our final destination (but we can't + * check that since we may not have connected yet). + * A is first hop destination, which doesn't appear in + * actual IP option, but is stored before the options. + */ + if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) + goto bad; + m->m_len -= sizeof(struct in_addr); + cnt -= sizeof(struct in_addr); + optlen -= sizeof(struct in_addr); + cp[IPOPT_OLEN] = optlen; + /* + * Move first hop before start of options. + */ + bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t), + sizeof(struct in_addr)); + /* + * Then copy rest of options back + * to close up the deleted entry. + */ + ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] + + sizeof(struct in_addr)), + (caddr_t)&cp[IPOPT_OFFSET+1], + (unsigned)cnt + sizeof(struct in_addr)); + break; + } + } + if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) + goto bad; + *pcbopt = m; + return (0); + +bad: + (void)m_free(m); + return (EINVAL); +} + +/* + * Set the IP multicast options in response to user setsockopt(). + */ +int +ip_setmoptions(optname, imop, m) + int optname; + struct ip_moptions **imop; + struct mbuf *m; +{ + register int error = 0; + u_char loop; + register int i; + struct in_addr addr; + register struct ip_mreq *mreq; + register struct ifnet *ifp; + register struct ip_moptions *imo = *imop; + struct route ro; + register struct sockaddr_in *dst; + + if (imo == NULL) { + /* + * No multicast option buffer attached to the pcb; + * allocate one and initialize to default values. + */ + imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS, + M_WAITOK); + + if (imo == NULL) + return (ENOBUFS); + *imop = imo; + imo->imo_multicast_ifp = NULL; + imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; + imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; + imo->imo_num_memberships = 0; + } + + switch (optname) { + + case IP_MULTICAST_IF: + /* + * Select the interface for outgoing multicast packets. + */ + if (m == NULL || m->m_len != sizeof(struct in_addr)) { + error = EINVAL; + break; + } + addr = *(mtod(m, struct in_addr *)); + /* + * INADDR_ANY is used to remove a previous selection. + * When no interface is selected, a default one is + * chosen every time a multicast packet is sent. + */ + if (addr.s_addr == INADDR_ANY) { + imo->imo_multicast_ifp = NULL; + break; + } + /* + * The selected interface is identified by its local + * IP address. Find the interface and confirm that + * it supports multicasting. + */ + INADDR_TO_IFP(addr, ifp); + if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { + error = EADDRNOTAVAIL; + break; + } + imo->imo_multicast_ifp = ifp; + break; + + case IP_MULTICAST_TTL: + /* + * Set the IP time-to-live for outgoing multicast packets. + */ + if (m == NULL || m->m_len != 1) { + error = EINVAL; + break; + } + imo->imo_multicast_ttl = *(mtod(m, u_char *)); + break; + + case IP_MULTICAST_LOOP: + /* + * Set the loopback flag for outgoing multicast packets. + * Must be zero or one. + */ + if (m == NULL || m->m_len != 1 || + (loop = *(mtod(m, u_char *))) > 1) { + error = EINVAL; + break; + } + imo->imo_multicast_loop = loop; + break; + + case IP_ADD_MEMBERSHIP: + /* + * Add a multicast group membership. + * Group must be a valid IP multicast address. + */ + if (m == NULL || m->m_len != sizeof(struct ip_mreq)) { + error = EINVAL; + break; + } + mreq = mtod(m, struct ip_mreq *); + if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) { + error = EINVAL; + break; + } + /* + * If no interface address was provided, use the interface of + * the route to the given multicast address. + */ + if (mreq->imr_interface.s_addr == INADDR_ANY) { + ro.ro_rt = NULL; + dst = satosin(&ro.ro_dst); + dst->sin_len = sizeof(*dst); + dst->sin_family = AF_INET; + dst->sin_addr = mreq->imr_multiaddr; + rtalloc(&ro); + if (ro.ro_rt == NULL) { + error = EADDRNOTAVAIL; + break; + } + ifp = ro.ro_rt->rt_ifp; + rtfree(ro.ro_rt); + } else { + INADDR_TO_IFP(mreq->imr_interface, ifp); + } + /* + * See if we found an interface, and confirm that it + * supports multicast. + */ + if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { + error = EADDRNOTAVAIL; + break; + } + /* + * See if the membership already exists or if all the + * membership slots are full. + */ + for (i = 0; i < imo->imo_num_memberships; ++i) { + if (imo->imo_membership[i]->inm_ifp == ifp && + imo->imo_membership[i]->inm_addr.s_addr + == mreq->imr_multiaddr.s_addr) + break; + } + if (i < imo->imo_num_memberships) { + error = EADDRINUSE; + break; + } + if (i == IP_MAX_MEMBERSHIPS) { + error = ETOOMANYREFS; + break; + } + /* + * Everything looks good; add a new record to the multicast + * address list for the given interface. + */ + if ((imo->imo_membership[i] = + in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) { + error = ENOBUFS; + break; + } + ++imo->imo_num_memberships; + break; + + case IP_DROP_MEMBERSHIP: + /* + * Drop a multicast group membership. + * Group must be a valid IP multicast address. + */ + if (m == NULL || m->m_len != sizeof(struct ip_mreq)) { + error = EINVAL; + break; + } + mreq = mtod(m, struct ip_mreq *); + if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) { + error = EINVAL; + break; + } + /* + * If an interface address was specified, get a pointer + * to its ifnet structure. + */ + if (mreq->imr_interface.s_addr == INADDR_ANY) + ifp = NULL; + else { + INADDR_TO_IFP(mreq->imr_interface, ifp); + if (ifp == NULL) { + error = EADDRNOTAVAIL; + break; + } + } + /* + * Find the membership in the membership array. + */ + for (i = 0; i < imo->imo_num_memberships; ++i) { + if ((ifp == NULL || + imo->imo_membership[i]->inm_ifp == ifp) && + imo->imo_membership[i]->inm_addr.s_addr == + mreq->imr_multiaddr.s_addr) + break; + } + if (i == imo->imo_num_memberships) { + error = EADDRNOTAVAIL; + break; + } + /* + * Give up the multicast address record to which the + * membership points. + */ + in_delmulti(imo->imo_membership[i]); + /* + * Remove the gap in the membership array. + */ + for (++i; i < imo->imo_num_memberships; ++i) + imo->imo_membership[i-1] = imo->imo_membership[i]; + --imo->imo_num_memberships; + break; + + default: + error = EOPNOTSUPP; + break; + } + + /* + * If all options have default values, no need to keep the mbuf. + */ + if (imo->imo_multicast_ifp == NULL && + imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && + imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && + imo->imo_num_memberships == 0) { + free(*imop, M_IPMOPTS); + *imop = NULL; + } + + return (error); +} + +/* + * Return the IP multicast options in response to user getsockopt(). + */ +int +ip_getmoptions(optname, imo, mp) + int optname; + register struct ip_moptions *imo; + register struct mbuf **mp; +{ + u_char *ttl; + u_char *loop; + struct in_addr *addr; + struct in_ifaddr *ia; + + *mp = m_get(M_WAIT, MT_SOOPTS); + + switch (optname) { + + case IP_MULTICAST_IF: + addr = mtod(*mp, struct in_addr *); + (*mp)->m_len = sizeof(struct in_addr); + if (imo == NULL || imo->imo_multicast_ifp == NULL) + addr->s_addr = INADDR_ANY; + else { + IFP_TO_IA(imo->imo_multicast_ifp, ia); + addr->s_addr = (ia == NULL) ? INADDR_ANY + : ia->ia_addr.sin_addr.s_addr; + } + return (0); + + case IP_MULTICAST_TTL: + ttl = mtod(*mp, u_char *); + (*mp)->m_len = 1; + *ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL + : imo->imo_multicast_ttl; + return (0); + + case IP_MULTICAST_LOOP: + loop = mtod(*mp, u_char *); + (*mp)->m_len = 1; + *loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP + : imo->imo_multicast_loop; + return (0); + + default: + return (EOPNOTSUPP); + } +} + +/* + * Discard the IP multicast options. + */ +void +ip_freemoptions(imo) + register struct ip_moptions *imo; +{ + register int i; + + if (imo != NULL) { + for (i = 0; i < imo->imo_num_memberships; ++i) + in_delmulti(imo->imo_membership[i]); + free(imo, M_IPMOPTS); + } +} + +/* + * Routine called from ip_output() to loop back a copy of an IP multicast + * packet to the input queue of a specified interface. Note that this + * calls the output routine of the loopback "driver", but with an interface + * pointer that might NOT be &loif -- easier than replicating that code here. + */ +static void +ip_mloopback(ifp, m, dst) + struct ifnet *ifp; + register struct mbuf *m; + register struct sockaddr_in *dst; +{ + register struct ip *ip; + struct mbuf *copym; + + copym = m_copy(m, 0, M_COPYALL); + if (copym != NULL) { + /* + * We don't bother to fragment if the IP length is greater + * than the interface's MTU. Can this possibly matter? + */ + ip = mtod(copym, struct ip *); + ip->ip_len = htons((u_int16_t)ip->ip_len); + ip->ip_off = htons((u_int16_t)ip->ip_off); + ip->ip_sum = 0; + ip->ip_sum = in_cksum(copym, ip->ip_hl << 2); + (void) looutput(ifp, copym, sintosa(dst), NULL); + } +} diff --git a/ecos/packages/net/tcpip/current/src/sys/netinet/raw_ip.c b/ecos/packages/net/tcpip/current/src/sys/netinet/raw_ip.c new file mode 100644 index 0000000..06d86e1 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/netinet/raw_ip.c @@ -0,0 +1,522 @@ +//========================================================================== +// +// sys/netinet/raw_ip.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: raw_ip.c,v 1.19 1999/09/23 07:20:35 deraadt Exp $ */ +/* $NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $ */ + +/* + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)raw_ip.c 8.2 (Berkeley) 1/4/94 + */ + +/* +%%% portions-copyright-nrl-95 +Portions of this software are Copyright 1995-1998 by Randall Atkinson, +Ronald Lee, Daniel McDonald, Bao Phan, and Chris Winters. All Rights +Reserved. All rights under this copyright have been assigned to the US +Naval Research Laboratory (NRL). The NRL Copyright Notice and License +Agreement Version 1.1 (January 17, 1995) applies to these portions of the +software. +You should have received a copy of the license with this software. If you +didn't get a copy, you may request one from <license@ipv6.nrl.navy.mil>. +*/ + +#include <sys/param.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/protosw.h> +#include <sys/socketvar.h> +#include <sys/errno.h> +#ifndef __ECOS +#include <sys/systm.h> +#endif + +#include <net/if.h> +#include <net/route.h> + +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/ip_mroute.h> +#include <netinet/ip_var.h> +#include <netinet/in_pcb.h> +#include <netinet/in_var.h> +#include <netinet/ip_icmp.h> + +#ifdef IPSEC +extern int check_ipsec_policy __P((struct inpcb *, u_int32_t)); +#endif + +#include <machine/stdarg.h> + +struct inpcbtable rawcbtable; + +/* + * Nominal space allocated to a raw ip socket. + */ +#define RIPSNDQ 8192 +#define RIPRCVQ 8192 + +/* + * Raw interface to IP protocol. + */ + +/* + * Initialize raw connection block q. + */ +void +rip_init() +{ + + in_pcbinit(&rawcbtable, 1); +} + +struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET }; + +/* + * Setup generic address and protocol structures + * for raw_input routine, then pass them along with + * mbuf chain. + */ +void +#if __STDC__ +rip_input(struct mbuf *m, ...) +#else +rip_input(m, va_alist) + struct mbuf *m; + va_dcl +#endif +{ + register struct ip *ip = mtod(m, struct ip *); + register struct inpcb *inp; + struct socket *last = 0; + + ripsrc.sin_addr = ip->ip_src; + for (inp = rawcbtable.inpt_queue.cqh_first; + inp != (struct inpcb *)&rawcbtable.inpt_queue; + inp = inp->inp_queue.cqe_next) { + if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p) + continue; + if (inp->inp_laddr.s_addr && + inp->inp_laddr.s_addr != ip->ip_dst.s_addr) + continue; + if (inp->inp_faddr.s_addr && + inp->inp_faddr.s_addr != ip->ip_src.s_addr) + continue; + if (last) { + struct mbuf *n; + if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) { + if (sbappendaddr(&last->so_rcv, + sintosa(&ripsrc), n, + (struct mbuf *)0) == 0) + /* should notify about lost packet */ + m_freem(n); + else + sorwakeup(last); + } + } + last = inp->inp_socket; + } + if (last) { + if (sbappendaddr(&last->so_rcv, sintosa(&ripsrc), m, + (struct mbuf *)0) == 0) + m_freem(m); + else + sorwakeup(last); + } else { + if (ip->ip_p != IPPROTO_ICMP) + icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0); + else + m_freem(m); + ipstat.ips_noproto++; + ipstat.ips_delivered--; + } +} + +/* + * Generate IP header and pass packet to ip_output. + * Tack on options user may have setup with control call. + */ +int +#if __STDC__ +rip_output(struct mbuf *m, ...) +#else +rip_output(m, va_alist) + struct mbuf *m; + va_dcl +#endif +{ + struct socket *so; + u_long dst; + register struct ip *ip; + register struct inpcb *inp; + int flags; + va_list ap; + + va_start(ap, m); + so = va_arg(ap, struct socket *); + dst = va_arg(ap, u_long); + va_end(ap); + + inp = sotoinpcb(so); + flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST; + + /* + * If the user handed us a complete IP packet, use it. + * Otherwise, allocate an mbuf for a header and fill it in. + */ + if ((inp->inp_flags & INP_HDRINCL) == 0) { + if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) { + m_freem(m); + return (EMSGSIZE); + } + M_PREPEND(m, sizeof(struct ip), M_WAIT); + ip = mtod(m, struct ip *); + ip->ip_tos = 0; + ip->ip_off = 0; + ip->ip_p = inp->inp_ip.ip_p; + ip->ip_len = m->m_pkthdr.len; + ip->ip_src = inp->inp_laddr; + ip->ip_dst.s_addr = dst; + ip->ip_ttl = MAXTTL; + } else { + if (m->m_pkthdr.len > IP_MAXPACKET) { + m_freem(m); + return (EMSGSIZE); + } + ip = mtod(m, struct ip *); + NTOHS(ip->ip_len); + NTOHS(ip->ip_off); + /* + * don't allow both user specified and setsockopt options, + * and don't allow packet length sizes that will crash + */ + if ((ip->ip_hl != (sizeof (*ip) >> 2) && inp->inp_options) || + ip->ip_len > m->m_pkthdr.len || + ip->ip_len < ip->ip_hl << 2) { + m_freem(m); + return (EINVAL); + } + if (ip->ip_id == 0) { +#ifdef RANDOM_IP_ID + ip->ip_id = ip_randomid(); +#else + ip->ip_id = htons(ip_id++); +#endif + } + /* XXX prevent ip_output from overwriting header fields */ + flags |= IP_RAWOUTPUT; + ipstat.ips_rawout++; + } +#ifdef INET6 + /* + * A thought: Even though raw IP shouldn't be able to set IPv6 + * multicast options, if it does, the last parameter to + * ip_output should be guarded against v6/v4 problems. + */ +#endif + return (ip_output(m, inp->inp_options, &inp->inp_route, flags, + inp->inp_moptions, inp)); +} + +/* + * Raw IP socket option processing. + */ +int +rip_ctloutput(op, so, level, optname, m) + int op; + struct socket *so; + int level, optname; + struct mbuf **m; +{ + register struct inpcb *inp = sotoinpcb(so); + register int error; + + if (level != IPPROTO_IP) { + if (op == PRCO_SETOPT && *m) + (void) m_free(*m); + return (EINVAL); + } + + switch (optname) { + + case IP_HDRINCL: + error = 0; + if (op == PRCO_SETOPT) { + if (*m == 0 || (*m)->m_len < sizeof (int)) + error = EINVAL; + else if (*mtod(*m, int *)) + inp->inp_flags |= INP_HDRINCL; + else + inp->inp_flags &= ~INP_HDRINCL; + if (*m) + (void)m_free(*m); + } else { + *m = m_get(M_WAIT, M_SOOPTS); + (*m)->m_len = sizeof(int); + *mtod(*m, int *) = inp->inp_flags & INP_HDRINCL; + } + return (error); + + case MRT_INIT: + case MRT_DONE: + case MRT_ADD_VIF: + case MRT_DEL_VIF: + case MRT_ADD_MFC: + case MRT_DEL_MFC: + case MRT_VERSION: + case MRT_ASSERT: +#ifdef MROUTING + switch (op) { + case PRCO_SETOPT: + error = ip_mrouter_set(optname, so, m); + break; + case PRCO_GETOPT: + error = ip_mrouter_get(optname, so, m); + break; + default: + error = EINVAL; + break; + } + return (error); +#else + if (op == PRCO_SETOPT && *m) + m_free(*m); + return (EOPNOTSUPP); +#endif + } + return (ip_ctloutput(op, so, level, optname, m)); +} + +u_long rip_sendspace = RIPSNDQ; +u_long rip_recvspace = RIPRCVQ; + +/*ARGSUSED*/ +int +rip_usrreq(so, req, m, nam, control) + register struct socket *so; + int req; + struct mbuf *m, *nam, *control; +{ + register int error = 0; + register struct inpcb *inp = sotoinpcb(so); +#ifdef MROUTING + extern struct socket *ip_mrouter; +#endif + if (req == PRU_CONTROL) + return (in_control(so, (u_long)m, (caddr_t)nam, + (struct ifnet *)control)); + + if (inp == NULL && req != PRU_ATTACH) { + error = EINVAL; + goto release; + } + + switch (req) { + + case PRU_ATTACH: + if (inp) + panic("rip_attach"); +#ifndef __ECOS + if ((so->so_state & SS_PRIV) == 0) { + error = EACCES; + break; + } +#endif + if ((error = soreserve(so, rip_sendspace, rip_recvspace)) || + (error = in_pcballoc(so, &rawcbtable))) + break; + inp = (struct inpcb *)so->so_pcb; + inp->inp_ip.ip_p = (long)nam; + break; + + case PRU_DISCONNECT: + if ((so->so_state & SS_ISCONNECTED) == 0) { + error = ENOTCONN; + break; + } + /* FALLTHROUGH */ + case PRU_ABORT: + soisdisconnected(so); + /* FALLTHROUGH */ + case PRU_DETACH: + if (inp == 0) + panic("rip_detach"); +#ifdef MROUTING + if (so == ip_mrouter) + ip_mrouter_done(); +#endif + in_pcbdetach(inp); + break; + + case PRU_BIND: + { + struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); + + if (nam->m_len != sizeof(*addr)) { + error = EINVAL; + break; + } + if ((ifnet.tqh_first == 0) || + ((addr->sin_family != AF_INET) && + (addr->sin_family != AF_IMPLINK)) || + (addr->sin_addr.s_addr && + ifa_ifwithaddr(sintosa(addr)) == 0)) { + error = EADDRNOTAVAIL; + break; + } + inp->inp_laddr = addr->sin_addr; + break; + } + case PRU_CONNECT: + { + struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); + + if (nam->m_len != sizeof(*addr)) { + error = EINVAL; + break; + } + if (ifnet.tqh_first == 0) { + error = EADDRNOTAVAIL; + break; + } + if ((addr->sin_family != AF_INET) && + (addr->sin_family != AF_IMPLINK)) { + error = EAFNOSUPPORT; + break; + } + inp->inp_faddr = addr->sin_addr; + soisconnected(so); + break; + } + + case PRU_CONNECT2: + error = EOPNOTSUPP; + break; + + /* + * Mark the connection as being incapable of further input. + */ + case PRU_SHUTDOWN: + socantsendmore(so); + break; + + /* + * Ship a packet out. The appropriate raw output + * routine handles any massaging necessary. + */ + case PRU_SEND: + { + register u_int32_t dst; + + if (so->so_state & SS_ISCONNECTED) { + if (nam) { + error = EISCONN; + break; + } + dst = inp->inp_faddr.s_addr; + } else { + if (nam == NULL) { + error = ENOTCONN; + break; + } + dst = mtod(nam, struct sockaddr_in *)->sin_addr.s_addr; + } +#ifdef IPSEC + if (!(error = check_ipsec_policy(inp, dst))) +#endif + error = rip_output(m, so, dst); + m = NULL; + break; + } + + case PRU_SENSE: + /* + * stat: don't bother with a blocksize. + */ + return (0); + + /* + * Not supported. + */ + case PRU_RCVOOB: + case PRU_RCVD: + case PRU_LISTEN: + case PRU_ACCEPT: + case PRU_SENDOOB: + error = EOPNOTSUPP; + break; + + case PRU_SOCKADDR: + in_setsockaddr(inp, nam); + break; + + case PRU_PEERADDR: + in_setpeeraddr(inp, nam); + break; + + default: + panic("rip_usrreq"); + } +release: + if (m != NULL) + m_freem(m); + return (error); +} diff --git a/ecos/packages/net/tcpip/current/src/sys/netinet/tcp_debug.c b/ecos/packages/net/tcpip/current/src/sys/netinet/tcp_debug.c new file mode 100644 index 0000000..a4627b6 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/netinet/tcp_debug.c @@ -0,0 +1,235 @@ +//========================================================================== +// +// sys/netinet/tcp_debug.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: tcp_debug.c,v 1.6 1999/12/08 06:50:20 itojun Exp $ */ +/* $NetBSD: tcp_debug.c,v 1.10 1996/02/13 23:43:36 christos Exp $ */ + +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_debug.c 8.1 (Berkeley) 6/10/93 + */ + +/* +%%% portions-copyright-nrl-95 +Portions of this software are Copyright 1995-1998 by Randall Atkinson, +Ronald Lee, Daniel McDonald, Bao Phan, and Chris Winters. All Rights +Reserved. All rights under this copyright have been assigned to the US +Naval Research Laboratory (NRL). The NRL Copyright Notice and License +Agreement Version 1.1 (January 17, 1995) applies to these portions of the +software. +You should have received a copy of the license with this software. If you +didn't get a copy, you may request one from <license@ipv6.nrl.navy.mil>. +*/ + +#ifdef TCPDEBUG +/* load symbolic names */ +#define PRUREQUESTS +#define TCPSTATES +#define TCPTIMERS +#define TANAMES + +#include <sys/param.h> +#ifndef __ECOS +#include <sys/systm.h> +#endif +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/protosw.h> +#include <sys/errno.h> + +#include <net/route.h> +#include <net/if.h> + +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/in_pcb.h> +#include <netinet/ip_var.h> +#include <netinet/tcp.h> +#include <netinet/tcp_fsm.h> +#include <netinet/tcp_seq.h> +#include <netinet/tcp_timer.h> +#include <netinet/tcp_var.h> +#include <netinet/tcpip.h> +#include <netinet/tcp_debug.h> + +#ifdef INET6 +#ifndef INET +#include <netinet/in.h> +#endif +#include <netinet6/ip6.h> +#endif /* INET6 */ + +#ifdef TCPDEBUG +int tcpconsdebug = 1; +#endif +/* + * Tcp debug routines + */ +void +tcp_trace(act, ostate, tp, headers, req, len) + short act, ostate; + struct tcpcb *tp; + caddr_t headers; + int req; + int len; +{ +#ifdef TCPDEBUG + tcp_seq seq, ack; + int flags; +#endif + struct tcp_debug *td = &tcp_debug[tcp_debx++]; + struct tcpiphdr *ti = (struct tcpiphdr *)headers; +#ifdef INET6 + struct tcphdr *th; + struct tcpipv6hdr *ti6 = (struct tcpipv6hdr *)ti; +#endif + + if (tcp_debx == TCP_NDEBUG) + tcp_debx = 0; + td->td_time = iptime(); + td->td_act = act; + td->td_ostate = ostate; + td->td_tcb = (caddr_t)tp; + if (tp) + td->td_cb = *tp; + else + bzero((caddr_t)&td->td_cb, sizeof (*tp)); +#ifdef INET6 + if (tp->pf == PF_INET6) { + if (ti) { + th = &ti6->ti6_t; + td->td_ti6 = *ti6; + } else { + bzero(&td->td_ti6, sizeof(struct tcpipv6hdr)); + } + } else { + if (ti) { + th = &ti->ti_t; + td->td_ti = *ti; + } else { + bzero(&td->td_ti, sizeof(struct tcpiphdr)); + } + } +#else /* INET6 */ + if (ti) + td->td_ti = *ti; + else + bzero((caddr_t)&td->td_ti, sizeof (*ti)); +#endif /* INET6 */ + + td->td_req = req; +#ifdef TCPDEBUG + if (tcpconsdebug == 0) + return; + if (tp) + printf("%x %s:", tp, tcpstates[ostate]); + else + printf("???????? "); + printf("%s ", tanames[act]); + switch (act) { + + case TA_INPUT: + case TA_OUTPUT: + case TA_DROP: + if (ti == 0) + break; + seq = th->th_seq; + ack = th->th_ack; + if (act == TA_OUTPUT) { + seq = ntohl(seq); + ack = ntohl(ack); + } + if (len) + printf("[%x..%x)", seq, seq+len); + else + printf("%x", seq); + printf("@%x, urp=%x", ack, th->th_urp); + flags = th->th_flags; + if (flags) { +#ifndef lint + char *cp = "<"; +#define pf(f) { if (th->th_flags&TH_/**/f) { printf("%s%s", cp, "f"); cp = ","; } } + pf(SYN); pf(ACK); pf(FIN); pf(RST); pf(PUSH); pf(URG); +#endif + printf(">"); + } + break; + + case TA_USER: + printf("%s", prurequests[req&0xff]); + if ((req & 0xff) == PRU_SLOWTIMO) + printf("<%s>", tcptimers[req>>8]); + break; + } + if (tp) + printf(" -> %s", tcpstates[tp->t_state]); + /* print out internal state of tp !?! */ + printf("\n"); + if (tp == 0) + return; + printf("\trcv_(nxt,wnd,up) (%x,%x,%x) snd_(una,nxt,max) (%x,%x,%x)\n", + tp->rcv_nxt, tp->rcv_wnd, tp->rcv_up, tp->snd_una, tp->snd_nxt, + tp->snd_max); + printf("\tsnd_(wl1,wl2,wnd) (%x,%x,%x)\n", + tp->snd_wl1, tp->snd_wl2, tp->snd_wnd); +#endif /* TCPDEBUG */ +} +#endif diff --git a/ecos/packages/net/tcpip/current/src/sys/netinet/tcp_input.c b/ecos/packages/net/tcpip/current/src/sys/netinet/tcp_input.c new file mode 100644 index 0000000..b7a4ad9 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/netinet/tcp_input.c @@ -0,0 +1,2996 @@ +//========================================================================== +// +// sys/netinet/tcp_input.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: tcp_input.c,v 1.54 1999/12/15 16:37:20 provos Exp $ */ +/* $NetBSD: tcp_input.c,v 1.23 1996/02/13 23:43:44 christos Exp $ */ + +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_input.c 8.5 (Berkeley) 4/10/94 + */ + +/* +%%% portions-copyright-nrl-95 +Portions of this software are Copyright 1995-1998 by Randall Atkinson, +Ronald Lee, Daniel McDonald, Bao Phan, and Chris Winters. All Rights +Reserved. All rights under this copyright have been assigned to the US +Naval Research Laboratory (NRL). The NRL Copyright Notice and License +Agreement Version 1.1 (January 17, 1995) applies to these portions of the +software. +You should have received a copy of the license with this software. If you +didn't get a copy, you may request one from <license@ipv6.nrl.navy.mil>. +*/ + +#ifndef TUBA_INCLUDE +#include <sys/param.h> +#ifndef __ECOS +#include <sys/systm.h> +#endif +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/protosw.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/errno.h> + +#include <net/if.h> +#include <net/route.h> + +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/in_pcb.h> +#include <netinet/ip_var.h> +#include <netinet/tcp.h> +#include <netinet/tcp_fsm.h> +#include <netinet/tcp_seq.h> +#include <netinet/tcp_timer.h> +#include <netinet/tcp_var.h> +#include <netinet/tcpip.h> +#include <netinet/tcp_debug.h> +#ifndef __ECOS +#include <dev/rndvar.h> +#endif +#include <machine/stdarg.h> +#ifndef __ECOS +#include <sys/md5k.h> +#endif + +#ifdef IPSEC +#include <netinet/ip_ipsp.h> +#endif /* IPSEC */ + +#ifdef INET6 +#ifndef INET +#include <netinet/in.h> +#endif +#include <sys/domain.h> +#include <netinet6/in6_var.h> +#include <netinet/ip6.h> +#include <netinet6/ip6_var.h> +#include <netinet6/tcpipv6.h> +#include <netinet/icmp6.h> +#include <netinet6/nd6.h> + +#ifndef CREATE_IPV6_MAPPED +#define CREATE_IPV6_MAPPED(a6, a4) \ +do { \ + bzero(&(a6), sizeof(a6)); \ + (a6).s6_addr[10] = (a6).s6_addr[11] = 0xff; \ + *(u_int32_t *)&(a6).s6_addr[12] = (a4); \ +} while (0) +#endif + +struct tcpiphdr tcp_saveti; +struct tcpipv6hdr tcp_saveti6; + +/* for the packet header length in the mbuf */ +#define M_PH_LEN(m) (((struct mbuf *)(m))->m_pkthdr.len) +#define M_V6_LEN(m) (M_PH_LEN(m) - sizeof(struct ip6_hdr)) +#define M_V4_LEN(m) (M_PH_LEN(m) - sizeof(struct ip)) +#endif /* INET6 */ + +int tcprexmtthresh = 3; +struct tcpiphdr tcp_saveti; +int tcptv_keep_init = TCPTV_KEEP_INIT; + +extern u_long sb_max; + +#endif /* TUBA_INCLUDE */ +#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ) + +/* for modulo comparisons of timestamps */ +#define TSTMP_LT(a,b) ((int)((a)-(b)) < 0) +#define TSTMP_GEQ(a,b) ((int)((a)-(b)) >= 0) + +/* + * Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. + */ +#ifdef INET6 +#define ND6_HINT(tp) \ +do { \ + if (tp && tp->t_inpcb && (tp->t_inpcb->inp_flags & INP_IPV6) \ + && !(tp->t_inpcb->inp_flags & INP_IPV6_MAPPED) \ + && tp->t_inpcb->inp_route6.ro_rt) { \ + nd6_nud_hint(tp->t_inpcb->inp_route6.ro_rt, NULL); \ + } \ +} while (0) +#else +#define ND6_HINT(tp) +#endif + +/* + * Insert segment ti into reassembly queue of tcp with + * control block tp. Return TH_FIN if reassembly now includes + * a segment with FIN. The macro form does the common case inline + * (segment is the next to be received on an established connection, + * and the queue is empty), avoiding linkage into and removal + * from the queue and repetition of various conversions. + * Set DELACK for segments received in order, but ack immediately + * when segments are out of order (so fast retransmit can work). + */ + +#ifndef TUBA_INCLUDE + +int +tcp_reass(tp, th, m, tlen) + register struct tcpcb *tp; + register struct tcphdr *th; + struct mbuf *m; + int *tlen; +{ + register struct ipqent *p, *q, *nq, *tiqe; + struct socket *so = tp->t_inpcb->inp_socket; + int flags; + + /* + * Call with th==0 after become established to + * force pre-ESTABLISHED data up to user socket. + */ + if (th == 0) + goto present; + + /* + * Allocate a new queue entry, before we throw away any data. + * If we can't, just drop the packet. XXX + */ + MALLOC(tiqe, struct ipqent *, sizeof (struct ipqent), M_IPQ, M_NOWAIT); + if (tiqe == NULL) { + tcpstat.tcps_rcvmemdrop++; + m_freem(m); + return (0); + } + + /* + * Find a segment which begins after this one does. + */ + for (p = NULL, q = tp->segq.lh_first; q != NULL; + p = q, q = q->ipqe_q.le_next) + if (SEQ_GT(q->ipqe_tcp->th_seq, th->th_seq)) + break; + + /* + * If there is a preceding segment, it may provide some of + * our data already. If so, drop the data from the incoming + * segment. If it provides all of our data, drop us. + */ + if (p != NULL) { + register struct tcphdr *phdr = p->ipqe_tcp; + register int i; + + /* conversion to int (in i) handles seq wraparound */ + i = phdr->th_seq + phdr->th_reseqlen - th->th_seq; + if (i > 0) { + if (i >= *tlen) { + tcpstat.tcps_rcvduppack++; + tcpstat.tcps_rcvdupbyte += *tlen; + m_freem(m); + FREE(tiqe, M_IPQ); + return (0); + } + m_adj(m, i); + *tlen -= i; + th->th_seq += i; + } + } + tcpstat.tcps_rcvoopack++; + tcpstat.tcps_rcvoobyte += *tlen; + + /* + * While we overlap succeeding segments trim them or, + * if they are completely covered, dequeue them. + */ + for (; q != NULL; q = nq) { + register struct tcphdr *qhdr = q->ipqe_tcp; + register int i = (th->th_seq + *tlen) - qhdr->th_seq; + + if (i <= 0) + break; + if (i < qhdr->th_reseqlen) { + qhdr->th_seq += i; + qhdr->th_reseqlen -= i; + m_adj(q->ipqe_m, i); + break; + } + nq = q->ipqe_q.le_next; + m_freem(q->ipqe_m); + LIST_REMOVE(q, ipqe_q); + FREE(q, M_IPQ); + } + + /* Insert the new fragment queue entry into place. */ + tiqe->ipqe_m = m; + th->th_reseqlen = *tlen; + tiqe->ipqe_tcp = th; + if (p == NULL) { + LIST_INSERT_HEAD(&tp->segq, tiqe, ipqe_q); + } else { + LIST_INSERT_AFTER(p, tiqe, ipqe_q); + } + +present: + /* + * Present data to user, advancing rcv_nxt through + * completed sequence space. + */ + if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) + return (0); + q = tp->segq.lh_first; + if (q == NULL || q->ipqe_tcp->th_seq != tp->rcv_nxt) + return (0); + if (tp->t_state == TCPS_SYN_RECEIVED && q->ipqe_tcp->th_reseqlen) + return (0); + do { + tp->rcv_nxt += q->ipqe_tcp->th_reseqlen; + flags = q->ipqe_tcp->th_flags & TH_FIN; + + nq = q->ipqe_q.le_next; + LIST_REMOVE(q, ipqe_q); + ND6_HINT(tp); + if (so->so_state & SS_CANTRCVMORE) + m_freem(q->ipqe_m); + else + sbappend(&so->so_rcv, q->ipqe_m); + FREE(q, M_IPQ); + q = nq; + } while (q != NULL && q->ipqe_tcp->th_seq == tp->rcv_nxt); + sorwakeup(so); + return (flags); +} + +/* + * First check for a port-specific bomb. We do not want to drop half-opens + * for other ports if this is the only port being bombed. We only check + * the bottom 40 half open connections, to avoid wasting too much time. + * + * Or, otherwise it is more likely a generic syn bomb, so delete the oldest + * half-open connection. + */ +void +tcpdropoldhalfopen(avoidtp, port) + struct tcpcb *avoidtp; + u_int16_t port; +{ + register struct inpcb *inp; + register struct tcpcb *tp; + int ncheck = 40; + int s; + + s = splnet(); + inp = tcbtable.inpt_queue.cqh_first; + if (inp) /* XXX */ + for (; inp != (struct inpcb *)&tcbtable.inpt_queue && --ncheck; + inp = inp->inp_queue.cqe_prev) { + if ((tp = (struct tcpcb *)inp->inp_ppcb) && + tp != avoidtp && + tp->t_state == TCPS_SYN_RECEIVED && + port == inp->inp_lport) { + tcp_close(tp); + goto done; + } + } + + inp = tcbtable.inpt_queue.cqh_first; + if (inp) /* XXX */ + for (; inp != (struct inpcb *)&tcbtable.inpt_queue; + inp = inp->inp_queue.cqe_prev) { + if ((tp = (struct tcpcb *)inp->inp_ppcb) && + tp != avoidtp && + tp->t_state == TCPS_SYN_RECEIVED) { + tcp_close(tp); + goto done; + } + } +done: + splx(s); +} + +#if defined(INET6) && !defined(TCP6) +int +tcp6_input(mp, offp, proto) + struct mbuf **mp; + int *offp, proto; +{ + struct mbuf *m = *mp; + +#if defined(NFAITH) && 0 < NFAITH + if (m->m_pkthdr.rcvif) { + if (m->m_pkthdr.rcvif->if_type == IFT_FAITH) { + /* XXX send icmp6 host/port unreach? */ + m_freem(m); + return IPPROTO_DONE; + } + } +#endif + + /* + * draft-itojun-ipv6-tcp-to-anycast + * better place to put this in? + */ + if (m->m_flags & M_ANYCAST6) { + if (m->m_len >= sizeof(struct ip6_hdr)) { + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + icmp6_error(m, ICMP6_DST_UNREACH, + ICMP6_DST_UNREACH_ADDR, + (caddr_t)&ip6->ip6_dst - (caddr_t)ip6); + } else + m_freem(m); + return IPPROTO_DONE; + } + + tcp_input(m, *offp, proto); + return IPPROTO_DONE; +} +#endif + +/* + * TCP input routine, follows pages 65-76 of the + * protocol specification dated September, 1981 very closely. + */ +void +#if __STDC__ +tcp_input(struct mbuf *m, ...) +#else +tcp_input(m, va_alist) + register struct mbuf *m; +#endif +{ + register struct tcpiphdr *ti; + register struct inpcb *inp; + caddr_t optp = NULL; + int optlen = 0; + int len, tlen, off; + register struct tcpcb *tp = 0; + register int tiflags; + struct socket *so = NULL; + int todrop, acked, ourfinisacked, needoutput = 0; + int hdroptlen = 0; + short ostate = 0; + struct in_addr laddr; + int dropsocket = 0; + int iss = 0; + u_long tiwin; + u_int32_t ts_val, ts_ecr; + int ts_present = 0; + int iphlen; + va_list ap; + register struct tcphdr *th; +#ifdef IPSEC + struct tdb *tdb = NULL; +#endif /* IPSEC */ +#ifdef INET6 + struct in6_addr laddr6; + unsigned short is_ipv6; /* Type of incoming datagram. */ + struct ip6_hdr *ipv6 = NULL; +#endif /* INET6 */ + + va_start(ap, m); + iphlen = va_arg(ap, int); + va_end(ap); + + tcpstat.tcps_rcvtotal++; + +#ifdef IPSEC + /* Save the last SA which was used to process the mbuf */ + if ((m->m_flags & (M_CONF|M_AUTH)) && m->m_pkthdr.tdbi) { + struct tdb_ident *tdbi = m->m_pkthdr.tdbi; + /* XXX gettdb() should really be called at spltdb(). */ + /* XXX this is splsoftnet(), currently they are the same. */ + tdb = gettdb(tdbi->spi, &tdbi->dst, tdbi->proto); + free(m->m_pkthdr.tdbi, M_TEMP); + m->m_pkthdr.tdbi = NULL; + } +#endif /* IPSEC */ +#ifdef INET6 + /* + * Before we do ANYTHING, we have to figure out if it's TCP/IPv6 or + * TCP/IPv4. + */ + is_ipv6 = mtod(m, struct ip *)->ip_v == 6; +#endif /* INET6 */ + + /* + * Get IP and TCP header together in first mbuf. + * Note: IP leaves IP header in first mbuf. + */ +#ifndef INET6 + ti = mtod(m, struct tcpiphdr *); +#else /* INET6 */ + if (!is_ipv6) +#endif /* INET6 */ + if (iphlen > sizeof (struct ip)) { +#if 0 /*XXX*/ + ip_stripoptions(m, (struct mbuf *)0); +#else +#ifdef __ECOS + diag_printf("extension headers are not allowed\n"); +#else + printf("extension headers are not allowed\n"); +#endif + m_freem(m); + return; +#endif + } + if (m->m_len < iphlen + sizeof(struct tcphdr)) { + if ((m = m_pullup2(m, iphlen + sizeof(struct tcphdr))) == 0) { + tcpstat.tcps_rcvshort++; + return; + } +#ifndef INET6 + ti = mtod(m, struct tcpiphdr *); +#endif /* INET6 */ + } + + tlen = m->m_pkthdr.len - iphlen; + +#ifdef INET6 + /* + * After that, do initial segment processing which is still very + * dependent on what IP version you're using. + */ + + if (is_ipv6) { +#ifdef DIAGNOSTIC + if (iphlen < sizeof(struct ip6_hdr)) { + m_freem(m); + return; + } +#endif /* DIAGNOSTIC */ + + /* strip off any options */ + if (iphlen > sizeof(struct ip6_hdr)) { +#if 0 /*XXX*/ + ipv6_stripoptions(m, iphlen); +#else +#ifdef __ECOS + diag_printf("extension headers are not allowed\n"); +#else + printf("extension headers are not allowed\n"); +#endif + m_freem(m); + return; +#endif + iphlen = sizeof(struct ip6_hdr); + } + + ti = NULL; + ipv6 = mtod(m, struct ip6_hdr *); + + if (in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr), tlen)) { + tcpstat.tcps_rcvbadsum++; + goto drop; + } /* endif in6_cksum */ + } else { + ti = mtod(m, struct tcpiphdr *); +#endif /* INET6 */ + + /* + * Checksum extended TCP header and data. + */ +#ifndef INET6 + tlen = ((struct ip *)ti)->ip_len; +#endif /* INET6 */ + len = sizeof (struct ip) + tlen; + bzero(ti->ti_x1, sizeof ti->ti_x1); + ti->ti_len = (u_int16_t)tlen; + HTONS(ti->ti_len); + if ((ti->ti_sum = in_cksum(m, len)) != 0) { + tcpstat.tcps_rcvbadsum++; + goto drop; + } +#ifdef INET6 + } +#endif /* INET6 */ +#endif /* TUBA_INCLUDE */ + + th = (struct tcphdr *)(mtod(m, caddr_t) + iphlen); + + /* + * Check that TCP offset makes sense, + * pull out TCP options and adjust length. XXX + */ + off = th->th_off << 2; + if (off < sizeof (struct tcphdr) || off > tlen) { + tcpstat.tcps_rcvbadoff++; + goto drop; + } + tlen -= off; + if (off > sizeof (struct tcphdr)) { + if (m->m_len < iphlen + off) { + if ((m = m_pullup2(m, iphlen + off)) == 0) { + tcpstat.tcps_rcvshort++; + return; + } +#ifdef INET6 + if (is_ipv6) + ipv6 = mtod(m, struct ip6_hdr *); + else +#endif /* INET6 */ + ti = mtod(m, struct tcpiphdr *); + th = (struct tcphdr *)(mtod(m, caddr_t) + iphlen); + } + optlen = off - sizeof (struct tcphdr); + optp = mtod(m, caddr_t) + iphlen + sizeof(struct tcphdr); + /* + * Do quick retrieval of timestamp options ("options + * prediction?"). If timestamp is the only option and it's + * formatted as recommended in RFC 1323 appendix A, we + * quickly get the values now and not bother calling + * tcp_dooptions(), etc. + */ + if ((optlen == TCPOLEN_TSTAMP_APPA || + (optlen > TCPOLEN_TSTAMP_APPA && + optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) && + *(u_int32_t *)optp == htonl(TCPOPT_TSTAMP_HDR) && + (th->th_flags & TH_SYN) == 0) { + ts_present = 1; + ts_val = ntohl(*(u_int32_t *)(optp + 4)); + ts_ecr = ntohl(*(u_int32_t *)(optp + 8)); + optp = NULL; /* we've parsed the options */ + } + } + tiflags = th->th_flags; + + /* + * Convert TCP protocol specific fields to host format. + */ + NTOHL(th->th_seq); + NTOHL(th->th_ack); + NTOHS(th->th_win); + NTOHS(th->th_urp); + + /* + * Locate pcb for segment. + */ +findpcb: +#ifdef INET6 + if (is_ipv6) { + inp = in6_pcbhashlookup(&tcbtable, &ipv6->ip6_src, th->th_sport, + &ipv6->ip6_dst, th->th_dport); + } else +#endif /* INET6 */ + inp = in_pcbhashlookup(&tcbtable, ti->ti_src, ti->ti_sport, + ti->ti_dst, ti->ti_dport); + if (inp == 0) { + ++tcpstat.tcps_pcbhashmiss; +#ifdef INET6 + if (is_ipv6) + inp = in_pcblookup(&tcbtable, &ipv6->ip6_src, + th->th_sport, &ipv6->ip6_dst, th->th_dport, + INPLOOKUP_WILDCARD | INPLOOKUP_IPV6); + else +#endif /* INET6 */ + inp = in_pcblookup(&tcbtable, &ti->ti_src, ti->ti_sport, + &ti->ti_dst, ti->ti_dport, INPLOOKUP_WILDCARD); + /* + * If the state is CLOSED (i.e., TCB does not exist) then + * all data in the incoming segment is discarded. + * If the TCB exists but is in CLOSED state, it is embryonic, + * but should either do a listen or a connect soon. + */ + if (inp == 0) { + ++tcpstat.tcps_noport; + goto dropwithreset; + } + } + + tp = intotcpcb(inp); + if (tp == 0) + goto dropwithreset; + if (tp->t_state == TCPS_CLOSED) + goto drop; + + /* Unscale the window into a 32-bit value. */ + if ((tiflags & TH_SYN) == 0) + tiwin = th->th_win << tp->snd_scale; + else + tiwin = th->th_win; + + so = inp->inp_socket; + if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) { + if (so->so_options & SO_DEBUG) { + ostate = tp->t_state; +#ifdef INET6 + if (is_ipv6) + tcp_saveti6 = *(mtod(m, struct tcpipv6hdr *)); + else +#endif /* INET6 */ + tcp_saveti = *ti; + } + if (so->so_options & SO_ACCEPTCONN) { + struct socket *so1; + + so1 = sonewconn(so, 0); + if (so1 == NULL) { + tcpdropoldhalfopen(tp, th->th_dport); + so1 = sonewconn(so, 0); + if (so1 == NULL) + goto drop; + } + so = so1; + /* + * This is ugly, but .... + * + * Mark socket as temporary until we're + * committed to keeping it. The code at + * ``drop'' and ``dropwithreset'' check the + * flag dropsocket to see if the temporary + * socket created here should be discarded. + * We mark the socket as discardable until + * we're committed to it below in TCPS_LISTEN. + */ + dropsocket++; +#ifdef IPSEC + /* + * We need to copy the required security levels + * from the old pcb. + */ + { + struct inpcb *newinp = (struct inpcb *)so->so_pcb; + bcopy(inp->inp_seclevel, newinp->inp_seclevel, + sizeof(inp->inp_seclevel)); + newinp->inp_secrequire = inp->inp_secrequire; + } +#endif /* IPSEC */ +#ifdef INET6 + /* + * inp still has the OLD in_pcb stuff, set the + * v6-related flags on the new guy, too. This is + * done particularly for the case where an AF_INET6 + * socket is bound only to a port, and a v4 connection + * comes in on that port. + * we also copy the flowinfo from the original pcb + * to the new one. + */ + { + int flags = inp->inp_flags; + struct inpcb *oldinpcb = inp; + + inp = (struct inpcb *)so->so_pcb; + inp->inp_flags |= (flags & (INP_IPV6 | INP_IPV6_UNDEC + | INP_IPV6_MAPPED)); + if ((inp->inp_flags & INP_IPV6) && + !(inp->inp_flags & INP_IPV6_MAPPED)) { + inp->inp_ipv6.ip6_hlim = + oldinpcb->inp_ipv6.ip6_hlim; + inp->inp_ipv6.ip6_flow = + oldinpcb->inp_ipv6.ip6_flow; + } + } +#else /* INET6 */ + inp = (struct inpcb *)so->so_pcb; +#endif /* INET6 */ + inp->inp_lport = th->th_dport; +#ifdef INET6 + if (is_ipv6) { + inp->inp_laddr6 = ipv6->ip6_dst; + inp->inp_fflowinfo = htonl(0x0fffffff) & + ipv6->ip6_flow; + + /*inp->inp_options = ip6_srcroute();*/ /* soon. */ + /* still need to tweak outbound options + processing to include this mbuf in + the right place and put the correct + NextHdr values in the right places. + XXX rja */ + } else { + if (inp->inp_flags & INP_IPV6) {/* v4 to v6 socket */ + CREATE_IPV6_MAPPED(inp->inp_laddr6, + ti->ti_dst.s_addr); + } else { +#endif /* INET6 */ + inp->inp_laddr = ti->ti_dst; + inp->inp_options = ip_srcroute(); +#ifdef INET6 + } + } +#endif /* INET6 */ + in_pcbrehash(inp); + tp = intotcpcb(inp); + tp->t_state = TCPS_LISTEN; + + /* Compute proper scaling value from buffer space + */ + while (tp->request_r_scale < TCP_MAX_WINSHIFT && + TCP_MAXWIN << tp->request_r_scale < so->so_rcv.sb_hiwat) + tp->request_r_scale++; + } + } + +#ifdef IPSEC + /* Check if this socket requires security for incoming packets */ + if ((inp->inp_seclevel[SL_AUTH] >= IPSEC_LEVEL_REQUIRE && + !(m->m_flags & M_AUTH)) || + (inp->inp_seclevel[SL_ESP_TRANS] >= IPSEC_LEVEL_REQUIRE && + !(m->m_flags & M_CONF))) { +#ifdef notyet +#ifdef INET6 + if (is_ipv6) + icmp6_error(m, ICMPV6_BLAH, ICMPV6_BLAH, 0); + else +#endif /* INET6 */ + icmp_error(m, ICMP_BLAH, ICMP_BLAH, 0, 0); +#endif /* notyet */ + tcpstat.tcps_rcvnosec++; + goto drop; + } + /* Use tdb_bind_out for this inp's outbound communication */ + if (tdb) + tdb_add_inp(tdb, inp); +#endif /*IPSEC */ + + /* + * Segment received on connection. + * Reset idle time and keep-alive timer. + */ + tp->t_idle = 0; + if (tp->t_state != TCPS_SYN_RECEIVED) + tp->t_timer[TCPT_KEEP] = tcp_keepidle; + +#ifdef TCP_SACK + if (!tp->sack_disable) + tcp_del_sackholes(tp, th); /* Delete stale SACK holes */ +#endif /* TCP_SACK */ + + /* + * Process options if not in LISTEN state, + * else do it below (after getting remote address). + */ + if (optp && tp->t_state != TCPS_LISTEN) + tcp_dooptions(tp, optp, optlen, th, + &ts_present, &ts_val, &ts_ecr); + +#ifdef TCP_SACK + if (!tp->sack_disable) { + tp->rcv_laststart = th->th_seq; /* last rec'vd segment*/ + tp->rcv_lastend = th->th_seq + tlen; + } +#endif /* TCP_SACK */ + /* + * Header prediction: check for the two common cases + * of a uni-directional data xfer. If the packet has + * no control flags, is in-sequence, the window didn't + * change and we're not retransmitting, it's a + * candidate. If the length is zero and the ack moved + * forward, we're the sender side of the xfer. Just + * free the data acked & wake any higher level process + * that was blocked waiting for space. If the length + * is non-zero and the ack didn't move, we're the + * receiver side. If we're getting packets in-order + * (the reassembly queue is empty), add the data to + * the socket buffer and note that we need a delayed ack. + */ + if (tp->t_state == TCPS_ESTABLISHED && + (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK && + (!ts_present || TSTMP_GEQ(ts_val, tp->ts_recent)) && + th->th_seq == tp->rcv_nxt && + tiwin && tiwin == tp->snd_wnd && + tp->snd_nxt == tp->snd_max) { + + /* + * If last ACK falls within this segment's sequence numbers, + * record the timestamp. + * Fix from Braden, see Stevens p. 870 + */ + if (ts_present && SEQ_LEQ(th->th_seq, tp->last_ack_sent)) { + tp->ts_recent_age = tcp_now; + tp->ts_recent = ts_val; + } + + if (tlen == 0) { + if (SEQ_GT(th->th_ack, tp->snd_una) && + SEQ_LEQ(th->th_ack, tp->snd_max) && + tp->snd_cwnd >= tp->snd_wnd && + tp->t_dupacks == 0) { + /* + * this is a pure ack for outstanding data. + */ + ++tcpstat.tcps_predack; + if (ts_present) + tcp_xmit_timer(tp, tcp_now-ts_ecr+1); + else if (tp->t_rtt && + SEQ_GT(th->th_ack, tp->t_rtseq)) + tcp_xmit_timer(tp, tp->t_rtt); + acked = th->th_ack - tp->snd_una; + tcpstat.tcps_rcvackpack++; + tcpstat.tcps_rcvackbyte += acked; + ND6_HINT(tp); + sbdrop(&so->so_snd, acked); + tp->snd_una = th->th_ack; +#if defined(TCP_SACK) || defined(TCP_NEWRENO) + /* + * We want snd_last to track snd_una so + * as to avoid sequence wraparound problems + * for very large transfers. + */ + tp->snd_last = tp->snd_una; +#endif /* TCP_SACK or TCP_NEWRENO */ +#if defined(TCP_SACK) && defined(TCP_FACK) + tp->snd_fack = tp->snd_una; + tp->retran_data = 0; +#endif /* TCP_FACK */ + m_freem(m); + + /* + * If all outstanding data are acked, stop + * retransmit timer, otherwise restart timer + * using current (possibly backed-off) value. + * If process is waiting for space, + * wakeup/selwakeup/signal. If data + * are ready to send, let tcp_output + * decide between more output or persist. + */ + if (tp->snd_una == tp->snd_max) + tp->t_timer[TCPT_REXMT] = 0; + else if (tp->t_timer[TCPT_PERSIST] == 0) + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + + if (sb_notify(&so->so_snd)) + sowwakeup(so); + if (so->so_snd.sb_cc) + (void) tcp_output(tp); + return; + } + } else if (th->th_ack == tp->snd_una && + tp->segq.lh_first == NULL && + tlen <= sbspace(&so->so_rcv)) { + /* + * This is a pure, in-sequence data packet + * with nothing on the reassembly queue and + * we have enough buffer space to take it. + */ +#ifdef TCP_SACK + /* Clean receiver SACK report if present */ + if (!tp->sack_disable && tp->rcv_numsacks) + tcp_clean_sackreport(tp); +#endif /* TCP_SACK */ + ++tcpstat.tcps_preddat; + tp->rcv_nxt += tlen; + tcpstat.tcps_rcvpack++; + tcpstat.tcps_rcvbyte += tlen; + ND6_HINT(tp); + /* + * Drop TCP, IP headers and TCP options then add data + * to socket buffer. + */ + m_adj(m, iphlen + off); + sbappend(&so->so_rcv, m); + sorwakeup(so); + if (th->th_flags & TH_PUSH) + tp->t_flags |= TF_ACKNOW; + else + tp->t_flags |= TF_DELACK; + return; + } + } + + /* + * Compute mbuf offset to TCP data segment. + */ + hdroptlen = iphlen + off; + + /* + * Calculate amount of space in receive window, + * and then do TCP input processing. + * Receive window is amount of space in rcv queue, + * but not less than advertised window. + */ + { int win; + + win = sbspace(&so->so_rcv); + if (win < 0) + win = 0; + tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt)); + } + + switch (tp->t_state) { + + /* + * If the state is LISTEN then ignore segment if it contains an RST. + * If the segment contains an ACK then it is bad and send a RST. + * If it does not contain a SYN then it is not interesting; drop it. + * If it is from this socket, drop it, it must be forged. + * Don't bother responding if the destination was a broadcast. + * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial + * tp->iss, and send a segment: + * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK> + * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. + * Fill in remote peer address fields if not previously specified. + * Enter SYN_RECEIVED state, and process any other fields of this + * segment in this state. + */ + case TCPS_LISTEN: { + struct mbuf *am; + register struct sockaddr_in *sin; +#ifdef INET6 + register struct sockaddr_in6 *sin6; +#endif /* INET6 */ + + if (tiflags & TH_RST) + goto drop; + if (tiflags & TH_ACK) + goto dropwithreset; + if ((tiflags & TH_SYN) == 0) + goto drop; + if (th->th_dport == th->th_sport) { +#ifdef INET6 + if (is_ipv6) { + if (IN6_ARE_ADDR_EQUAL(&ipv6->ip6_src, &ipv6->ip6_dst)) + goto drop; + } else { +#endif /* INET6 */ + if (ti->ti_dst.s_addr == ti->ti_src.s_addr) + goto drop; +#ifdef INET6 + } +#endif /* INET6 */ + } + + /* + * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN + * in_broadcast() should never return true on a received + * packet with M_BCAST not set. + */ + if (m->m_flags & (M_BCAST|M_MCAST)) + goto drop; +#ifdef INET6 + if (is_ipv6) { + /* XXX What about IPv6 Anycasting ?? :-( rja */ + if (IN6_IS_ADDR_MULTICAST(&ipv6->ip6_dst)) + goto drop; + } else +#endif /* INET6 */ + if (IN_MULTICAST(ti->ti_dst.s_addr)) + goto drop; + am = m_get(M_DONTWAIT, MT_SONAME); /* XXX */ + if (am == NULL) + goto drop; +#ifdef INET6 + if (is_ipv6) { + /* + * This is probably the place to set the tp->pf value. + * (Don't forget to do it in the v4 code as well!) + * + * Also, remember to blank out things like flowlabel, or + * set flowlabel for accepted sockets in v6. + * + * FURTHERMORE, this is PROBABLY the place where the whole + * business of key munging is set up for passive + * connections. + */ + am->m_len = sizeof(struct sockaddr_in6); + sin6 = mtod(am, struct sockaddr_in6 *); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(struct sockaddr_in6); + sin6->sin6_addr = ipv6->ip6_src; + sin6->sin6_port = th->th_sport; + sin6->sin6_flowinfo = htonl(0x0fffffff) & + inp->inp_ipv6.ip6_flow; + laddr6 = inp->inp_laddr6; + if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) + inp->inp_laddr6 = ipv6->ip6_dst; + /* This is a good optimization. */ + if (in6_pcbconnect(inp, am)) { + inp->inp_laddr6 = laddr6; + (void) m_free(am); + goto drop; + } /* endif in6_pcbconnect() */ + tp->pf = PF_INET6; + } else { + /* + * Letting v4 incoming datagrams to reach valid + * PF_INET6 sockets causes some overhead here. + */ + if (inp->inp_flags & INP_IPV6) { + if (!(inp->inp_flags & (INP_IPV6_UNDEC|INP_IPV6_MAPPED))) { + (void) m_free(am); + goto drop; + } + + am->m_len = sizeof(struct sockaddr_in6); + + sin6 = mtod(am, struct sockaddr_in6 *); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(*sin6); + CREATE_IPV6_MAPPED(sin6->sin6_addr, ti->ti_src.s_addr); + sin6->sin6_port = th->th_sport; + sin6->sin6_flowinfo = 0; + + laddr6 = inp->inp_laddr6; + if (inp->inp_laddr.s_addr == INADDR_ANY) + CREATE_IPV6_MAPPED(inp->inp_laddr6, ti->ti_dst.s_addr); + + /* + * The pcb initially has the v6 default hoplimit + * set. We're sending v4 packets so we need to set + * the v4 ttl and tos. + */ + inp->inp_ip.ip_ttl = ip_defttl; + inp->inp_ip.ip_tos = 0; + + if (in6_pcbconnect(inp, am)) { + inp->inp_laddr6 = laddr6; + (void) m_freem(am); + goto drop; + } + tp->pf = PF_INET; + } else { +#endif /* INET6 */ + am->m_len = sizeof (struct sockaddr_in); + sin = mtod(am, struct sockaddr_in *); + sin->sin_family = AF_INET; + sin->sin_len = sizeof(*sin); + sin->sin_addr = ti->ti_src; + sin->sin_port = ti->ti_sport; + bzero((caddr_t)sin->sin_zero, sizeof(sin->sin_zero)); + laddr = inp->inp_laddr; + if (inp->inp_laddr.s_addr == INADDR_ANY) + inp->inp_laddr = ti->ti_dst; + if (in_pcbconnect(inp, am)) { + inp->inp_laddr = laddr; + (void) m_free(am); + goto drop; + } + (void) m_free(am); + tp->pf = PF_INET; +#ifdef INET6 + } /* if (inp->inp_flags & INP_IPV6) */ + } /* if (is_ipv6) */ +#endif /* INET6 */ + tp->t_template = tcp_template(tp); + if (tp->t_template == 0) { + tp = tcp_drop(tp, ENOBUFS); + dropsocket = 0; /* socket is already gone */ + goto drop; + } + if (optp) + tcp_dooptions(tp, optp, optlen, th, + &ts_present, &ts_val, &ts_ecr); +#ifdef TCP_SACK + /* + * If peer did not send a SACK_PERMITTED option (i.e., if + * tcp_dooptions() did not set TF_SACK_PERMIT), set + * sack_disable to 1 if it is currently 0. + */ + if (!tp->sack_disable) + if ((tp->t_flags & TF_SACK_PERMIT) == 0) + tp->sack_disable = 1; +#endif + + if (iss) + tp->iss = iss; + else + tp->iss = tcp_iss; +#ifdef TCP_COMPAT_42 + tcp_iss += TCP_ISSINCR/2; +#else /* TCP_COMPAT_42 */ + tcp_iss += arc4random() % TCP_ISSINCR + 1; +#endif /* !TCP_COMPAT_42 */ + tp->irs = th->th_seq; + tcp_sendseqinit(tp); +#if defined (TCP_SACK) || defined (TCP_NEWRENO) + tp->snd_last = tp->snd_una; +#endif /* TCP_SACK || TCP_NEWRENO */ +#if defined(TCP_SACK) && defined(TCP_FACK) + tp->snd_fack = tp->snd_una; + tp->retran_data = 0; + tp->snd_awnd = 0; +#endif /* TCP_FACK */ + tcp_rcvseqinit(tp); + tp->t_flags |= TF_ACKNOW; + tp->t_state = TCPS_SYN_RECEIVED; + tp->t_timer[TCPT_KEEP] = tcptv_keep_init; + dropsocket = 0; /* committed to socket */ + tcpstat.tcps_accepts++; + goto trimthenstep6; + } + + /* + * If the state is SYN_RECEIVED: + * if seg contains SYN/ACK, send an RST. + * if seg contains an ACK, but not for our SYN/ACK, send an RST + */ + + case TCPS_SYN_RECEIVED: + if (tiflags & TH_ACK) { + if (tiflags & TH_SYN) { + tcpstat.tcps_badsyn++; + goto dropwithreset; + } + if (SEQ_LEQ(th->th_ack, tp->snd_una) || + SEQ_GT(th->th_ack, tp->snd_max)) + goto dropwithreset; + } + break; + + /* + * If the state is SYN_SENT: + * if seg contains an ACK, but not for our SYN, drop the input. + * if seg contains a RST, then drop the connection. + * if seg does not contain SYN, then drop it. + * Otherwise this is an acceptable SYN segment + * initialize tp->rcv_nxt and tp->irs + * if seg contains ack then advance tp->snd_una + * if SYN has been acked change to ESTABLISHED else SYN_RCVD state + * arrange for segment to be acked (eventually) + * continue processing rest of data/controls, beginning with URG + */ + case TCPS_SYN_SENT: + if ((tiflags & TH_ACK) && + (SEQ_LEQ(th->th_ack, tp->iss) || + SEQ_GT(th->th_ack, tp->snd_max))) + goto dropwithreset; + if (tiflags & TH_RST) { + if (tiflags & TH_ACK) + tp = tcp_drop(tp, ECONNREFUSED); + goto drop; + } + if ((tiflags & TH_SYN) == 0) + goto drop; + if (tiflags & TH_ACK) { + tp->snd_una = th->th_ack; + if (SEQ_LT(tp->snd_nxt, tp->snd_una)) + tp->snd_nxt = tp->snd_una; + } + tp->t_timer[TCPT_REXMT] = 0; + tp->irs = th->th_seq; + tcp_rcvseqinit(tp); + tp->t_flags |= TF_ACKNOW; +#ifdef TCP_SACK + /* + * If we've sent a SACK_PERMITTED option, and the peer + * also replied with one, then TF_SACK_PERMIT should have + * been set in tcp_dooptions(). If it was not, disable SACKs. + */ + if (!tp->sack_disable) + if ((tp->t_flags & TF_SACK_PERMIT) == 0) + tp->sack_disable = 1; +#endif + if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) { + tcpstat.tcps_connects++; + soisconnected(so); + tp->t_state = TCPS_ESTABLISHED; + /* Do window scaling on this connection? */ + if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == + (TF_RCVD_SCALE|TF_REQ_SCALE)) { + tp->snd_scale = tp->requested_s_scale; + tp->rcv_scale = tp->request_r_scale; + } + (void) tcp_reass(tp, (struct tcphdr *)0, + (struct mbuf *)0, &tlen); + /* + * if we didn't have to retransmit the SYN, + * use its rtt as our initial srtt & rtt var. + */ + if (tp->t_rtt) + tcp_xmit_timer(tp, tp->t_rtt); + /* + * Since new data was acked (the SYN), open the + * congestion window by one MSS. We do this + * here, because we won't go through the normal + * ACK processing below. And since this is the + * start of the connection, we know we are in + * the exponential phase of slow-start. + */ + tp->snd_cwnd += tp->t_maxseg; + } else + tp->t_state = TCPS_SYN_RECEIVED; + +trimthenstep6: + /* + * Advance ti->ti_seq to correspond to first data byte. + * If data, trim to stay within window, + * dropping FIN if necessary. + */ + th->th_seq++; + if (tlen > tp->rcv_wnd) { + todrop = tlen - tp->rcv_wnd; + m_adj(m, -todrop); + tlen = tp->rcv_wnd; + tiflags &= ~TH_FIN; + tcpstat.tcps_rcvpackafterwin++; + tcpstat.tcps_rcvbyteafterwin += todrop; + } + tp->snd_wl1 = th->th_seq - 1; + tp->rcv_up = th->th_seq; + goto step6; + } + + /* + * States other than LISTEN or SYN_SENT. + * First check timestamp, if present. + * Then check that at least some bytes of segment are within + * receive window. If segment begins before rcv_nxt, + * drop leading data (and SYN); if nothing left, just ack. + * + * RFC 1323 PAWS: If we have a timestamp reply on this segment + * and it's less than ts_recent, drop it. + */ + if (ts_present && (tiflags & TH_RST) == 0 && tp->ts_recent && + TSTMP_LT(ts_val, tp->ts_recent)) { + + /* Check to see if ts_recent is over 24 days old. */ + if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) { + /* + * Invalidate ts_recent. If this segment updates + * ts_recent, the age will be reset later and ts_recent + * will get a valid value. If it does not, setting + * ts_recent to zero will at least satisfy the + * requirement that zero be placed in the timestamp + * echo reply when ts_recent isn't valid. The + * age isn't reset until we get a valid ts_recent + * because we don't want out-of-order segments to be + * dropped when ts_recent is old. + */ + tp->ts_recent = 0; + } else { + tcpstat.tcps_rcvduppack++; + tcpstat.tcps_rcvdupbyte += tlen; + tcpstat.tcps_pawsdrop++; + goto dropafterack; + } + } + + todrop = tp->rcv_nxt - th->th_seq; + if (todrop > 0) { + if (tiflags & TH_SYN) { + tiflags &= ~TH_SYN; + th->th_seq++; + if (th->th_urp > 1) + th->th_urp--; + else + tiflags &= ~TH_URG; + todrop--; + } + if (todrop >= tlen || + (todrop == tlen && (tiflags & TH_FIN) == 0)) { + /* + * Any valid FIN must be to the left of the + * window. At this point, FIN must be a + * duplicate or out-of-sequence, so drop it. + */ + tiflags &= ~TH_FIN; + /* + * Send ACK to resynchronize, and drop any data, + * but keep on processing for RST or ACK. + */ + tp->t_flags |= TF_ACKNOW; + tcpstat.tcps_rcvdupbyte += todrop = tlen; + tcpstat.tcps_rcvduppack++; + } else { + tcpstat.tcps_rcvpartduppack++; + tcpstat.tcps_rcvpartdupbyte += todrop; + } + hdroptlen += todrop; /* drop from head afterwards */ + th->th_seq += todrop; + tlen -= todrop; + if (th->th_urp > todrop) + th->th_urp -= todrop; + else { + tiflags &= ~TH_URG; + th->th_urp = 0; + } + } + + /* + * If new data are received on a connection after the + * user processes are gone, then RST the other end. + */ + if ((so->so_state & SS_NOFDREF) && + tp->t_state > TCPS_CLOSE_WAIT && tlen) { + tp = tcp_close(tp); + tcpstat.tcps_rcvafterclose++; + goto dropwithreset; + } + + /* + * If segment ends after window, drop trailing data + * (and PUSH and FIN); if nothing left, just ACK. + */ + todrop = (th->th_seq + tlen) - (tp->rcv_nxt+tp->rcv_wnd); + if (todrop > 0) { + tcpstat.tcps_rcvpackafterwin++; + if (todrop >= tlen) { + tcpstat.tcps_rcvbyteafterwin += tlen; + /* + * If a new connection request is received + * while in TIME_WAIT, drop the old connection + * and start over if the sequence numbers + * are above the previous ones. + */ + if (tiflags & TH_SYN && + tp->t_state == TCPS_TIME_WAIT && + SEQ_GT(th->th_seq, tp->rcv_nxt)) { + iss = tp->snd_nxt + TCP_ISSINCR; + tp = tcp_close(tp); + goto findpcb; + } + /* + * If window is closed can only take segments at + * window edge, and have to drop data and PUSH from + * incoming segments. Continue processing, but + * remember to ack. Otherwise, drop segment + * and ack. + */ + if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) { + tp->t_flags |= TF_ACKNOW; + tcpstat.tcps_rcvwinprobe++; + } else + goto dropafterack; + } else + tcpstat.tcps_rcvbyteafterwin += todrop; + m_adj(m, -todrop); + tlen -= todrop; + tiflags &= ~(TH_PUSH|TH_FIN); + } + + /* + * If last ACK falls within this segment's sequence numbers, + * record its timestamp. + * Fix from Braden, see Stevens p. 870 + */ + if (ts_present && TSTMP_GEQ(ts_val, tp->ts_recent) && + SEQ_LEQ(th->th_seq, tp->last_ack_sent)) { + tp->ts_recent_age = tcp_now; + tp->ts_recent = ts_val; + } + + /* + * If the RST bit is set examine the state: + * SYN_RECEIVED STATE: + * If passive open, return to LISTEN state. + * If active open, inform user that connection was refused. + * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: + * Inform user that connection was reset, and close tcb. + * CLOSING, LAST_ACK, TIME_WAIT STATES + * Close the tcb. + */ + if (tiflags & TH_RST) { +#ifndef INET6 + if (ti->ti_seq != tp->last_ack_sent) +#else + if (th->th_seq != tp->last_ack_sent) +#endif + goto drop; + + switch (tp->t_state) { + case TCPS_SYN_RECEIVED: + so->so_error = ECONNREFUSED; + goto close; + + case TCPS_ESTABLISHED: + case TCPS_FIN_WAIT_1: + case TCPS_FIN_WAIT_2: + case TCPS_CLOSE_WAIT: + so->so_error = ECONNRESET; + close: + tp->t_state = TCPS_CLOSED; + tcpstat.tcps_drops++; + tp = tcp_close(tp); + goto drop; + case TCPS_CLOSING: + case TCPS_LAST_ACK: + case TCPS_TIME_WAIT: + tp = tcp_close(tp); + goto drop; + } + } + + /* + * If a SYN is in the window, then this is an + * error and we send an RST and drop the connection. + */ + if (tiflags & TH_SYN) { + tp = tcp_drop(tp, ECONNRESET); + goto dropwithreset; + } + + /* + * If the ACK bit is off we drop the segment and return. + */ + if ((tiflags & TH_ACK) == 0) { + if (tp->t_flags & TF_ACKNOW) + goto dropafterack; + else + goto drop; + } + + /* + * Ack processing. + */ + switch (tp->t_state) { + + /* + * In SYN_RECEIVED state, the ack ACKs our SYN, so enter + * ESTABLISHED state and continue processing. + * The ACK was checked above. + */ + case TCPS_SYN_RECEIVED: + tcpstat.tcps_connects++; + soisconnected(so); + tp->t_state = TCPS_ESTABLISHED; + /* Do window scaling? */ + if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == + (TF_RCVD_SCALE|TF_REQ_SCALE)) { + tp->snd_scale = tp->requested_s_scale; + tp->rcv_scale = tp->request_r_scale; + } + (void) tcp_reass(tp, (struct tcphdr *)0, (struct mbuf *)0, + &tlen); + tp->snd_wl1 = th->th_seq - 1; + /* fall into ... */ + + /* + * In ESTABLISHED state: drop duplicate ACKs; ACK out of range + * ACKs. If the ack is in the range + * tp->snd_una < ti->ti_ack <= tp->snd_max + * then advance tp->snd_una to ti->ti_ack and drop + * data from the retransmission queue. If this ACK reflects + * more up to date window information we update our window information. + */ + case TCPS_ESTABLISHED: + case TCPS_FIN_WAIT_1: + case TCPS_FIN_WAIT_2: + case TCPS_CLOSE_WAIT: + case TCPS_CLOSING: + case TCPS_LAST_ACK: + case TCPS_TIME_WAIT: + if (SEQ_LEQ(th->th_ack, tp->snd_una)) { + /* + * Duplicate/old ACK processing. + * Increments t_dupacks: + * Pure duplicate (same seq/ack/window, no data) + * Doesn't affect t_dupacks: + * Data packets. + * Normal window updates (window opens) + * Resets t_dupacks: + * New data ACKed. + * Window shrinks + * Old ACK + */ + if (tlen) + break; + /* + * If we get an old ACK, there is probably packet + * reordering going on. Be conservative and reset + * t_dupacks so that we are less agressive in + * doing a fast retransmit. + */ + if (th->th_ack != tp->snd_una) { + tp->t_dupacks = 0; + break; + } + if (tiwin == tp->snd_wnd) { + tcpstat.tcps_rcvdupack++; + /* + * If we have outstanding data (other than + * a window probe), this is a completely + * duplicate ack (ie, window info didn't + * change), the ack is the biggest we've + * seen and we've seen exactly our rexmt + * threshhold of them, assume a packet + * has been dropped and retransmit it. + * Kludge snd_nxt & the congestion + * window so we send only this one + * packet. + * + * We know we're losing at the current + * window size so do congestion avoidance + * (set ssthresh to half the current window + * and pull our congestion window back to + * the new ssthresh). + * + * Dup acks mean that packets have left the + * network (they're now cached at the receiver) + * so bump cwnd by the amount in the receiver + * to keep a constant cwnd packets in the + * network. + */ + if (tp->t_timer[TCPT_REXMT] == 0) + tp->t_dupacks = 0; +#if defined(TCP_SACK) && defined(TCP_FACK) + /* + * In FACK, can enter fast rec. if the receiver + * reports a reass. queue longer than 3 segs. + */ + else if (++tp->t_dupacks == tcprexmtthresh || + ((SEQ_GT(tp->snd_fack, tcprexmtthresh * + tp->t_maxseg + tp->snd_una)) && + SEQ_GT(tp->snd_una, tp->snd_last))) { +#else + else if (++tp->t_dupacks == tcprexmtthresh) { +#endif /* TCP_FACK */ + tcp_seq onxt = tp->snd_nxt; + u_long win = + ulmin(tp->snd_wnd, tp->snd_cwnd) / + 2 / tp->t_maxseg; + +#if defined(TCP_SACK) || defined(TCP_NEWRENO) + if (SEQ_LT(th->th_ack, tp->snd_last)){ + /* + * False fast retx after + * timeout. Do not cut window. + */ + tp->snd_cwnd += tp->t_maxseg; + tp->t_dupacks = 0; + (void) tcp_output(tp); + goto drop; + } +#endif + if (win < 2) + win = 2; + tp->snd_ssthresh = win * tp->t_maxseg; +#if defined(TCP_SACK) || defined(TCP_NEWRENO) + tp->snd_last = tp->snd_max; +#endif +#ifdef TCP_SACK + if (!tp->sack_disable) { + tp->t_timer[TCPT_REXMT] = 0; + tp->t_rtt = 0; + tcpstat.tcps_sndrexmitfast++; +#if defined(TCP_SACK) && defined(TCP_FACK) + (void) tcp_output(tp); + /* + * During FR, snd_cwnd is held + * constant for FACK. + */ + tp->snd_cwnd = tp->snd_ssthresh; + tp->t_dupacks = tcprexmtthresh; +#else + /* + * tcp_output() will send + * oldest SACK-eligible rtx. + */ + (void) tcp_output(tp); + tp->snd_cwnd = tp->snd_ssthresh+ + tp->t_maxseg * tp->t_dupacks; +#endif /* TCP_FACK */ + goto drop; + } +#endif /* TCP_SACK */ + tp->t_timer[TCPT_REXMT] = 0; + tp->t_rtt = 0; + tp->snd_nxt = th->th_ack; + tp->snd_cwnd = tp->t_maxseg; + tcpstat.tcps_sndrexmitfast++; + (void) tcp_output(tp); + + tp->snd_cwnd = tp->snd_ssthresh + + tp->t_maxseg * tp->t_dupacks; + if (SEQ_GT(onxt, tp->snd_nxt)) + tp->snd_nxt = onxt; + goto drop; + } else if (tp->t_dupacks > tcprexmtthresh) { +#if defined(TCP_SACK) && defined(TCP_FACK) + /* + * while (awnd < cwnd) + * sendsomething(); + */ + if (!tp->sack_disable) { + if (tp->snd_awnd < tp->snd_cwnd) + tcp_output(tp); + goto drop; + } +#endif /* TCP_FACK */ + tp->snd_cwnd += tp->t_maxseg; + (void) tcp_output(tp); + goto drop; + } + } else if (tiwin < tp->snd_wnd) { + /* + * The window was retracted! Previous dup + * ACKs may have been due to packets arriving + * after the shrunken window, not a missing + * packet, so play it safe and reset t_dupacks + */ + tp->t_dupacks = 0; + } + break; + } + /* + * If the congestion window was inflated to account + * for the other side's cached packets, retract it. + */ +#ifdef TCP_NEWRENO + if (tp->t_dupacks >= tcprexmtthresh && !tcp_newreno(tp, th)) { + /* Out of fast recovery */ + tp->snd_cwnd = tp->snd_ssthresh; + /* + * Window inflation should have left us with approx. + * snd_ssthresh outstanding data. But in case we + * would be inclined to send a burst, better to do + * it via the slow start mechanism. + */ + if (tcp_seq_subtract(tp->snd_max, th->th_ack) < + tp->snd_ssthresh) + tp->snd_cwnd = tcp_seq_subtract(tp->snd_max, + th->th_ack) + tp->t_maxseg; + tp->t_dupacks = 0; + } +#elif defined(TCP_SACK) + if (!tp->sack_disable) { + if (tp->t_dupacks >= tcprexmtthresh) { + /* Check for a partial ACK */ + if (tcp_sack_partialack(tp, th)) { +#if defined(TCP_SACK) && defined(TCP_FACK) + /* Force call to tcp_output */ + if (tp->snd_awnd < tp->snd_cwnd) + needoutput = 1; +#else + tp->snd_cwnd += tp->t_maxseg; + needoutput = 1; +#endif /* TCP_FACK */ + } else { + /* Out of fast recovery */ + tp->snd_cwnd = tp->snd_ssthresh; + if (tcp_seq_subtract(tp->snd_max, + th->th_ack) < tp->snd_ssthresh) + tp->snd_cwnd = + tcp_seq_subtract(tp->snd_max, + th->th_ack) + tp->t_maxseg; + tp->t_dupacks = 0; +#if defined(TCP_SACK) && defined(TCP_FACK) + if (SEQ_GT(th->th_ack, tp->snd_fack)) + tp->snd_fack = th->th_ack; +#endif /* TCP_FACK */ + } + } + } else { + if (tp->t_dupacks >= tcprexmtthresh && + !tcp_newreno(tp, th)) { + /* Out of fast recovery */ + tp->snd_cwnd = tp->snd_ssthresh; + if (tcp_seq_subtract(tp->snd_max, th->th_ack) < + tp->snd_ssthresh) + tp->snd_cwnd = + tcp_seq_subtract(tp->snd_max, + th->th_ack) + tp->t_maxseg; + tp->t_dupacks = 0; + } + } +#else /* else neither TCP_NEWRENO nor TCP_SACK */ + if (tp->t_dupacks >= tcprexmtthresh && + tp->snd_cwnd > tp->snd_ssthresh) + tp->snd_cwnd = tp->snd_ssthresh; + tp->t_dupacks = 0; +#endif + if (SEQ_GT(th->th_ack, tp->snd_max)) { + tcpstat.tcps_rcvacktoomuch++; + goto dropafterack; + } + acked = th->th_ack - tp->snd_una; + tcpstat.tcps_rcvackpack++; + tcpstat.tcps_rcvackbyte += acked; + + /* + * If we have a timestamp reply, update smoothed + * round trip time. If no timestamp is present but + * transmit timer is running and timed sequence + * number was acked, update smoothed round trip time. + * Since we now have an rtt measurement, cancel the + * timer backoff (cf., Phil Karn's retransmit alg.). + * Recompute the initial retransmit timer. + */ + if (ts_present) + tcp_xmit_timer(tp, tcp_now-ts_ecr+1); + else if (tp->t_rtt && SEQ_GT(th->th_ack, tp->t_rtseq)) + tcp_xmit_timer(tp,tp->t_rtt); + + /* + * If all outstanding data is acked, stop retransmit + * timer and remember to restart (more output or persist). + * If there is more data to be acked, restart retransmit + * timer, using current (possibly backed-off) value. + */ + if (th->th_ack == tp->snd_max) { + tp->t_timer[TCPT_REXMT] = 0; + needoutput = 1; + } else if (tp->t_timer[TCPT_PERSIST] == 0) + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + /* + * When new data is acked, open the congestion window. + * If the window gives us less than ssthresh packets + * in flight, open exponentially (maxseg per packet). + * Otherwise open linearly: maxseg per window + * (maxseg^2 / cwnd per packet). + */ + { + register u_int cw = tp->snd_cwnd; + register u_int incr = tp->t_maxseg; + + if (cw > tp->snd_ssthresh) + incr = incr * incr / cw; +#if defined (TCP_NEWRENO) || defined (TCP_SACK) + if (SEQ_GEQ(th->th_ack, tp->snd_last)) +#endif + tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale); + } + ND6_HINT(tp); + if (acked > so->so_snd.sb_cc) { + tp->snd_wnd -= so->so_snd.sb_cc; + sbdrop(&so->so_snd, (int)so->so_snd.sb_cc); + ourfinisacked = 1; + } else { + sbdrop(&so->so_snd, acked); + tp->snd_wnd -= acked; + ourfinisacked = 0; + } + if (sb_notify(&so->so_snd)) + sowwakeup(so); + tp->snd_una = th->th_ack; + if (SEQ_LT(tp->snd_nxt, tp->snd_una)) + tp->snd_nxt = tp->snd_una; +#if defined (TCP_SACK) && defined (TCP_FACK) + if (SEQ_GT(tp->snd_una, tp->snd_fack)) + tp->snd_fack = tp->snd_una; +#endif + + switch (tp->t_state) { + + /* + * In FIN_WAIT_1 STATE in addition to the processing + * for the ESTABLISHED state if our FIN is now acknowledged + * then enter FIN_WAIT_2. + */ + case TCPS_FIN_WAIT_1: + if (ourfinisacked) { + /* + * If we can't receive any more + * data, then closing user can proceed. + * Starting the timer is contrary to the + * specification, but if we don't get a FIN + * we'll hang forever. + */ + if (so->so_state & SS_CANTRCVMORE) { + soisdisconnected(so); + tp->t_timer[TCPT_2MSL] = tcp_maxidle; + } + tp->t_state = TCPS_FIN_WAIT_2; + } + break; + + /* + * In CLOSING STATE in addition to the processing for + * the ESTABLISHED state if the ACK acknowledges our FIN + * then enter the TIME-WAIT state, otherwise ignore + * the segment. + */ + case TCPS_CLOSING: + if (ourfinisacked) { + tp->t_state = TCPS_TIME_WAIT; + tcp_canceltimers(tp); + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; + soisdisconnected(so); + } + break; + + /* + * In LAST_ACK, we may still be waiting for data to drain + * and/or to be acked, as well as for the ack of our FIN. + * If our FIN is now acknowledged, delete the TCB, + * enter the closed state and return. + */ + case TCPS_LAST_ACK: + if (ourfinisacked) { + tp = tcp_close(tp); + goto drop; + } + break; + + /* + * In TIME_WAIT state the only thing that should arrive + * is a retransmission of the remote FIN. Acknowledge + * it and restart the finack timer. + */ + case TCPS_TIME_WAIT: + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; + goto dropafterack; + } + } + +step6: + /* + * Update window information. + * Don't look at window if no ACK: TAC's send garbage on first SYN. + */ + if ((tiflags & TH_ACK) && (SEQ_LT(tp->snd_wl1, th->th_seq) || + (tp->snd_wl1 == th->th_seq && SEQ_LT(tp->snd_wl2, th->th_ack)) || + (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))) { + /* keep track of pure window updates */ + if (tlen == 0 && + tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd) + tcpstat.tcps_rcvwinupd++; + tp->snd_wnd = tiwin; + tp->snd_wl1 = th->th_seq; + tp->snd_wl2 = th->th_ack; + if (tp->snd_wnd > tp->max_sndwnd) + tp->max_sndwnd = tp->snd_wnd; + needoutput = 1; + } + + /* + * Process segments with URG. + */ + if ((tiflags & TH_URG) && th->th_urp && + TCPS_HAVERCVDFIN(tp->t_state) == 0) { + /* + * This is a kludge, but if we receive and accept + * random urgent pointers, we'll crash in + * soreceive. It's hard to imagine someone + * actually wanting to send this much urgent data. + */ + if (th->th_urp + so->so_rcv.sb_cc > sb_max) { + th->th_urp = 0; /* XXX */ + tiflags &= ~TH_URG; /* XXX */ + goto dodata; /* XXX */ + } + /* + * If this segment advances the known urgent pointer, + * then mark the data stream. This should not happen + * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since + * a FIN has been received from the remote side. + * In these states we ignore the URG. + * + * According to RFC961 (Assigned Protocols), + * the urgent pointer points to the last octet + * of urgent data. We continue, however, + * to consider it to indicate the first octet + * of data past the urgent section as the original + * spec states (in one of two places). + */ + if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) { + tp->rcv_up = th->th_seq + th->th_urp; + so->so_oobmark = so->so_rcv.sb_cc + + (tp->rcv_up - tp->rcv_nxt) - 1; + if (so->so_oobmark == 0) + so->so_state |= SS_RCVATMARK; + sohasoutofband(so); + tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA); + } + /* + * Remove out of band data so doesn't get presented to user. + * This can happen independent of advancing the URG pointer, + * but if two URG's are pending at once, some out-of-band + * data may creep in... ick. + */ + if (th->th_urp <= (u_int16_t) tlen +#ifdef SO_OOBINLINE + && (so->so_options & SO_OOBINLINE) == 0 +#endif + ) + tcp_pulloutofband(so, th->th_urp, m, hdroptlen); + } else + /* + * If no out of band data is expected, + * pull receive urgent pointer along + * with the receive window. + */ + if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) + tp->rcv_up = tp->rcv_nxt; +dodata: /* XXX */ + + /* + * Process the segment text, merging it into the TCP sequencing queue, + * and arranging for acknowledgment of receipt if necessary. + * This process logically involves adjusting tp->rcv_wnd as data + * is presented to the user (this happens in tcp_usrreq.c, + * case PRU_RCVD). If a FIN has already been received on this + * connection then we just ignore the text. + */ + if ((tlen || (tiflags & TH_FIN)) && + TCPS_HAVERCVDFIN(tp->t_state) == 0) { + if (th->th_seq == tp->rcv_nxt && tp->segq.lh_first == NULL && + tp->t_state == TCPS_ESTABLISHED) { + if (th->th_flags & TH_PUSH) + tp->t_flags |= TF_ACKNOW; + else + tp->t_flags |= TF_DELACK; + tp->rcv_nxt += tlen; + tiflags = th->th_flags & TH_FIN; + tcpstat.tcps_rcvpack++; + tcpstat.tcps_rcvbyte += tlen; + ND6_HINT(tp); + m_adj(m, hdroptlen); + sbappend(&so->so_rcv, m); + sorwakeup(so); + } else { + m_adj(m, hdroptlen); + tiflags = tcp_reass(tp, th, m, &tlen); + tp->t_flags |= TF_ACKNOW; + } +#ifdef TCP_SACK + if (!tp->sack_disable) + tcp_update_sack_list(tp); +#endif + + /* + * variable len never referenced again in modern BSD, + * so why bother computing it ?? + */ +#if 0 + /* + * Note the amount of data that peer has sent into + * our window, in order to estimate the sender's + * buffer size. + */ + len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt); +#endif /* 0 */ + } else { + m_freem(m); + tiflags &= ~TH_FIN; + } + + /* + * If FIN is received ACK the FIN and let the user know + * that the connection is closing. Ignore a FIN received before + * the connection is fully established. + */ + if ((tiflags & TH_FIN) && TCPS_HAVEESTABLISHED(tp->t_state)) { + if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { + socantrcvmore(so); + tp->t_flags |= TF_ACKNOW; + tp->rcv_nxt++; + } + switch (tp->t_state) { + + /* + * In ESTABLISHED STATE enter the CLOSE_WAIT state. + */ + case TCPS_ESTABLISHED: + tp->t_state = TCPS_CLOSE_WAIT; + break; + + /* + * If still in FIN_WAIT_1 STATE FIN has not been acked so + * enter the CLOSING state. + */ + case TCPS_FIN_WAIT_1: + tp->t_state = TCPS_CLOSING; + break; + + /* + * In FIN_WAIT_2 state enter the TIME_WAIT state, + * starting the time-wait timer, turning off the other + * standard timers. + */ + case TCPS_FIN_WAIT_2: + tp->t_state = TCPS_TIME_WAIT; + tcp_canceltimers(tp); + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; + soisdisconnected(so); + break; + + /* + * In TIME_WAIT state restart the 2 MSL time_wait timer. + */ + case TCPS_TIME_WAIT: + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; + break; + } + } +#ifdef TCPDEBUG + if (so->so_options & SO_DEBUG) { +#ifdef INET6 + if (tp->pf == PF_INET6) + tcp_trace(TA_INPUT, ostate, tp, (caddr_t) &tcp_saveti6, 0, tlen); + else +#endif /* INET6 */ + tcp_trace(TA_INPUT, ostate, tp, (caddr_t) &tcp_saveti, 0, tlen); + } +#endif /* TCPDEBUG */ + + /* + * Return any desired output. + */ + if (needoutput || (tp->t_flags & TF_ACKNOW)) { + (void) tcp_output(tp); + } + return; + +dropafterack: + /* + * Generate an ACK dropping incoming segment if it occupies + * sequence space, where the ACK reflects our state. + */ + if (tiflags & TH_RST) + goto drop; + m_freem(m); + tp->t_flags |= TF_ACKNOW; + (void) tcp_output(tp); + return; + +dropwithreset: + /* + * Generate a RST, dropping incoming segment. + * Make ACK acceptable to originator of segment. + * Don't bother to respond if destination was broadcast/multicast. + */ + if ((tiflags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST)) + goto drop; +#ifdef INET6 + if (is_ipv6) { + /* For following calls to tcp_respond */ + ti = mtod(m, struct tcpiphdr *); + if (IN6_IS_ADDR_MULTICAST(&ipv6->ip6_dst)) + goto drop; + } else { +#endif /* INET6 */ + if (IN_MULTICAST(ti->ti_dst.s_addr)) + goto drop; +#ifdef INET6 + } +#endif /* INET6 */ + if (tiflags & TH_ACK) + tcp_respond(tp, (caddr_t) ti, m, (tcp_seq)0, th->th_ack, TH_RST); + else { + if (tiflags & TH_SYN) + tlen++; + tcp_respond(tp, (caddr_t) ti, m, th->th_seq+tlen, (tcp_seq)0, + TH_RST|TH_ACK); + } + /* destroy temporarily created socket */ + if (dropsocket) + (void) soabort(so); + return; + +drop: + /* + * Drop space held by incoming segment and return. + */ +#ifdef TCPDEBUG + if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) { +#ifdef INET6 + if (tp->pf == PF_INET6) + tcp_trace(TA_DROP, ostate, tp, (caddr_t) &tcp_saveti6, 0, tlen); + else +#endif /* INET6 */ + tcp_trace(TA_DROP, ostate, tp, (caddr_t) &tcp_saveti, 0, tlen); + } +#endif /* TCPDEBUG */ + + m_freem(m); + /* destroy temporarily created socket */ + if (dropsocket) + (void) soabort(so); + return; +#ifndef TUBA_INCLUDE +} + +void +tcp_dooptions(tp, cp, cnt, th, ts_present, ts_val, ts_ecr) + struct tcpcb *tp; + u_char *cp; + int cnt; + struct tcphdr *th; + int *ts_present; + u_int32_t *ts_val, *ts_ecr; +{ + u_int16_t mss = 0; + int opt, optlen; + + for (; cnt > 0; cnt -= optlen, cp += optlen) { + opt = cp[0]; + if (opt == TCPOPT_EOL) + break; + if (opt == TCPOPT_NOP) + optlen = 1; + else { + optlen = cp[1]; + if (optlen <= 0) + break; + } + switch (opt) { + + default: + continue; + + case TCPOPT_MAXSEG: + if (optlen != TCPOLEN_MAXSEG) + continue; + if (!(th->th_flags & TH_SYN)) + continue; + bcopy((char *) cp + 2, (char *) &mss, sizeof(mss)); + NTOHS(mss); + break; + + case TCPOPT_WINDOW: + if (optlen != TCPOLEN_WINDOW) + continue; + if (!(th->th_flags & TH_SYN)) + continue; + tp->t_flags |= TF_RCVD_SCALE; + tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT); + break; + + case TCPOPT_TIMESTAMP: + if (optlen != TCPOLEN_TIMESTAMP) + continue; + *ts_present = 1; + bcopy((char *)cp + 2, (char *) ts_val, sizeof(*ts_val)); + NTOHL(*ts_val); + bcopy((char *)cp + 6, (char *) ts_ecr, sizeof(*ts_ecr)); + NTOHL(*ts_ecr); + + /* + * A timestamp received in a SYN makes + * it ok to send timestamp requests and replies. + */ + if (th->th_flags & TH_SYN) { + tp->t_flags |= TF_RCVD_TSTMP; + tp->ts_recent = *ts_val; + tp->ts_recent_age = tcp_now; + } + break; + +#ifdef TCP_SACK + case TCPOPT_SACK_PERMITTED: + if (tp->sack_disable || optlen!=TCPOLEN_SACK_PERMITTED) + continue; + if (th->th_flags & TH_SYN) + /* MUST only be set on SYN */ + tp->t_flags |= TF_SACK_PERMIT; + break; + case TCPOPT_SACK: + if (tcp_sack_option(tp, th, cp, optlen)) + continue; + break; +#endif + } + } + /* Update t_maxopd and t_maxseg after all options are processed */ + if (th->th_flags & TH_SYN) + (void) tcp_mss(tp, mss); /* sets t_maxseg */ +} + +#if defined(TCP_SACK) || defined(TCP_NEWRENO) +u_long +tcp_seq_subtract(a, b) + u_long a, b; +{ + return ((long)(a - b)); +} +#endif + + +#ifdef TCP_SACK +/* + * This function is called upon receipt of new valid data (while not in header + * prediction mode), and it updates the ordered list of sacks. + */ +void +tcp_update_sack_list(tp) + struct tcpcb *tp; +{ + /* + * First reported block MUST be the most recent one. Subsequent + * blocks SHOULD be in the order in which they arrived at the + * receiver. These two conditions make the implementation fully + * compliant with RFC 2018. + */ + int i, j = 0, count = 0, lastpos = -1; + struct sackblk sack, firstsack, temp[MAX_SACK_BLKS]; + + /* First clean up current list of sacks */ + for (i = 0; i < tp->rcv_numsacks; i++) { + sack = tp->sackblks[i]; + if (sack.start == 0 && sack.end == 0) { + count++; /* count = number of blocks to be discarded */ + continue; + } + if (SEQ_LEQ(sack.end, tp->rcv_nxt)) { + tp->sackblks[i].start = tp->sackblks[i].end = 0; + count++; + } else { + temp[j].start = tp->sackblks[i].start; + temp[j++].end = tp->sackblks[i].end; + } + } + tp->rcv_numsacks -= count; + if (tp->rcv_numsacks == 0) { /* no sack blocks currently (fast path) */ + tcp_clean_sackreport(tp); + if (SEQ_LT(tp->rcv_nxt, tp->rcv_laststart)) { + /* ==> need first sack block */ + tp->sackblks[0].start = tp->rcv_laststart; + tp->sackblks[0].end = tp->rcv_lastend; + tp->rcv_numsacks = 1; + } + return; + } + /* Otherwise, sack blocks are already present. */ + for (i = 0; i < tp->rcv_numsacks; i++) + tp->sackblks[i] = temp[i]; /* first copy back sack list */ + if (SEQ_GEQ(tp->rcv_nxt, tp->rcv_lastend)) + return; /* sack list remains unchanged */ + /* + * From here, segment just received should be (part of) the 1st sack. + * Go through list, possibly coalescing sack block entries. + */ + firstsack.start = tp->rcv_laststart; + firstsack.end = tp->rcv_lastend; + for (i = 0; i < tp->rcv_numsacks; i++) { + sack = tp->sackblks[i]; + if (SEQ_LT(sack.end, firstsack.start) || + SEQ_GT(sack.start, firstsack.end)) + continue; /* no overlap */ + if (sack.start == firstsack.start && sack.end == firstsack.end){ + /* + * identical block; delete it here since we will + * move it to the front of the list. + */ + tp->sackblks[i].start = tp->sackblks[i].end = 0; + lastpos = i; /* last posn with a zero entry */ + continue; + } + if (SEQ_LEQ(sack.start, firstsack.start)) + firstsack.start = sack.start; /* merge blocks */ + if (SEQ_GEQ(sack.end, firstsack.end)) + firstsack.end = sack.end; /* merge blocks */ + tp->sackblks[i].start = tp->sackblks[i].end = 0; + lastpos = i; /* last posn with a zero entry */ + } + if (lastpos != -1) { /* at least one merge */ + for (i = 0, j = 1; i < tp->rcv_numsacks; i++) { + sack = tp->sackblks[i]; + if (sack.start == 0 && sack.end == 0) + continue; + temp[j++] = sack; + } + tp->rcv_numsacks = j; /* including first blk (added later) */ + for (i = 1; i < tp->rcv_numsacks; i++) /* now copy back */ + tp->sackblks[i] = temp[i]; + } else { /* no merges -- shift sacks by 1 */ + if (tp->rcv_numsacks < MAX_SACK_BLKS) + tp->rcv_numsacks++; + for (i = tp->rcv_numsacks-1; i > 0; i--) + tp->sackblks[i] = tp->sackblks[i-1]; + } + tp->sackblks[0] = firstsack; + return; +} + +/* + * Process the TCP SACK option. Returns 1 if tcp_dooptions() should continue, + * and 0 otherwise, if the option was fine. tp->snd_holes is an ordered list + * of holes (oldest to newest, in terms of the sequence space). + */ +int +tcp_sack_option(tp, th, cp, optlen) + struct tcpcb *tp; + struct tcphdr *th; + u_char *cp; + int optlen; +{ + int tmp_olen; + u_char *tmp_cp; + struct sackhole *cur, *p, *temp; + + if (tp->sack_disable) + return 1; + + /* Note: TCPOLEN_SACK must be 2*sizeof(tcp_seq) */ + if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0) + return 1; + tmp_cp = cp + 2; + tmp_olen = optlen - 2; + if (tp->snd_numholes < 0) + tp->snd_numholes = 0; + if (tp->t_maxseg == 0) + panic("tcp_sack_option"); /* Should never happen */ + while (tmp_olen > 0) { + struct sackblk sack; + + bcopy((char *) tmp_cp, (char *) &(sack.start), sizeof(tcp_seq)); + NTOHL(sack.start); + bcopy((char *) tmp_cp + sizeof(tcp_seq), + (char *) &(sack.end), sizeof(tcp_seq)); + NTOHL(sack.end); + tmp_olen -= TCPOLEN_SACK; + tmp_cp += TCPOLEN_SACK; + if (SEQ_LEQ(sack.end, sack.start)) + continue; /* bad SACK fields */ + if (SEQ_LEQ(sack.end, tp->snd_una)) + continue; /* old block */ +#if defined(TCP_SACK) && defined(TCP_FACK) + /* Updates snd_fack. */ + if (SEQ_GEQ(sack.end, tp->snd_fack)) + tp->snd_fack = sack.end; +#endif /* TCP_FACK */ + if (SEQ_GT(th->th_ack, tp->snd_una)) { + if (SEQ_LT(sack.start, th->th_ack)) + continue; + } else { + if (SEQ_LT(sack.start, tp->snd_una)) + continue; + } + if (SEQ_GT(sack.end, tp->snd_max)) + continue; + if (tp->snd_holes == 0) { /* first hole */ + tp->snd_holes = (struct sackhole *) + malloc(sizeof(struct sackhole), M_PCB, M_NOWAIT); + if (tp->snd_holes == NULL) { + /* ENOBUFS, so ignore SACKed block for now*/ + continue; + } + cur = tp->snd_holes; + cur->start = th->th_ack; + cur->end = sack.start; + cur->rxmit = cur->start; + cur->next = 0; + tp->snd_numholes = 1; + tp->rcv_lastsack = sack.end; + /* + * dups is at least one. If more data has been + * SACKed, it can be greater than one. + */ + cur->dups = min(tcprexmtthresh, + ((sack.end - cur->end)/tp->t_maxseg)); + if (cur->dups < 1) + cur->dups = 1; + continue; /* with next sack block */ + } + /* Go thru list of holes: p = previous, cur = current */ + p = cur = tp->snd_holes; + while (cur) { + if (SEQ_LEQ(sack.end, cur->start)) + /* SACKs data before the current hole */ + break; /* no use going through more holes */ + if (SEQ_GEQ(sack.start, cur->end)) { + /* SACKs data beyond the current hole */ + cur->dups++; + if ( ((sack.end - cur->end)/tp->t_maxseg) >= + tcprexmtthresh) + cur->dups = tcprexmtthresh; + p = cur; + cur = cur->next; + continue; + } + if (SEQ_LEQ(sack.start, cur->start)) { + /* Data acks at least the beginning of hole */ +#if defined(TCP_SACK) && defined(TCP_FACK) + if (SEQ_GT(sack.end, cur->rxmit)) + tp->retran_data -= + tcp_seq_subtract(cur->rxmit, + cur->start); + else + tp->retran_data -= + tcp_seq_subtract(sack.end, + cur->start); +#endif /* TCP_FACK */ + if (SEQ_GEQ(sack.end,cur->end)){ + /* Acks entire hole, so delete hole */ + if (p != cur) { + p->next = cur->next; + free(cur, M_PCB); + cur = p->next; + } else { + cur=cur->next; + free(p, M_PCB); + p = cur; + tp->snd_holes = p; + } + tp->snd_numholes--; + continue; + } + /* otherwise, move start of hole forward */ + cur->start = sack.end; + cur->rxmit = max (cur->rxmit, cur->start); + p = cur; + cur = cur->next; + continue; + } + /* move end of hole backward */ + if (SEQ_GEQ(sack.end, cur->end)) { +#if defined(TCP_SACK) && defined(TCP_FACK) + if (SEQ_GT(cur->rxmit, sack.start)) + tp->retran_data -= + tcp_seq_subtract(cur->rxmit, + sack.start); +#endif /* TCP_FACK */ + cur->end = sack.start; + cur->rxmit = min (cur->rxmit, cur->end); + cur->dups++; + if ( ((sack.end - cur->end)/tp->t_maxseg) >= + tcprexmtthresh) + cur->dups = tcprexmtthresh; + p = cur; + cur = cur->next; + continue; + } + if (SEQ_LT(cur->start, sack.start) && + SEQ_GT(cur->end, sack.end)) { + /* + * ACKs some data in middle of a hole; need to + * split current hole + */ + temp = (struct sackhole *)malloc(sizeof(*temp), + M_PCB,M_NOWAIT); + if (temp == NULL) + continue; /* ENOBUFS */ +#if defined(TCP_SACK) && defined(TCP_FACK) + if (SEQ_GT(cur->rxmit, sack.end)) + tp->retran_data -= + tcp_seq_subtract(sack.end, + sack.start); + else if (SEQ_GT(cur->rxmit, sack.start)) + tp->retran_data -= + tcp_seq_subtract(cur->rxmit, + sack.start); +#endif /* TCP_FACK */ + temp->next = cur->next; + temp->start = sack.end; + temp->end = cur->end; + temp->dups = cur->dups; + temp->rxmit = max (cur->rxmit, temp->start); + cur->end = sack.start; + cur->rxmit = min (cur->rxmit, cur->end); + cur->dups++; + if ( ((sack.end - cur->end)/tp->t_maxseg) >= + tcprexmtthresh) + cur->dups = tcprexmtthresh; + cur->next = temp; + p = temp; + cur = p->next; + tp->snd_numholes++; + } + } + /* At this point, p points to the last hole on the list */ + if (SEQ_LT(tp->rcv_lastsack, sack.start)) { + /* + * Need to append new hole at end. + * Last hole is p (and it's not NULL). + */ + temp = (struct sackhole *) malloc(sizeof(*temp), + M_PCB, M_NOWAIT); + if (temp == NULL) + continue; /* ENOBUFS */ + temp->start = tp->rcv_lastsack; + temp->end = sack.start; + temp->dups = min(tcprexmtthresh, + ((sack.end - sack.start)/tp->t_maxseg)); + if (temp->dups < 1) + temp->dups = 1; + temp->rxmit = temp->start; + temp->next = 0; + p->next = temp; + tp->rcv_lastsack = sack.end; + tp->snd_numholes++; + } + } +#if defined(TCP_SACK) && defined(TCP_FACK) + /* + * Update retran_data and snd_awnd. Go through the list of + * holes. Increment retran_data by (hole->rxmit - hole->start). + */ + tp->retran_data = 0; + cur = tp->snd_holes; + while (cur) { + tp->retran_data += cur->rxmit - cur->start; + cur = cur->next; + } + tp->snd_awnd = tcp_seq_subtract(tp->snd_nxt, tp->snd_fack) + + tp->retran_data; +#endif /* TCP_FACK */ + + return 0; +} + +/* + * Delete stale (i.e, cumulatively ack'd) holes. Hole is deleted only if + * it is completely acked; otherwise, tcp_sack_option(), called from + * tcp_dooptions(), will fix up the hole. + */ +void +tcp_del_sackholes(tp, th) + struct tcpcb *tp; + struct tcphdr *th; +{ + if (!tp->sack_disable && tp->t_state != TCPS_LISTEN) { + /* max because this could be an older ack just arrived */ + tcp_seq lastack = SEQ_GT(th->th_ack, tp->snd_una) ? + th->th_ack : tp->snd_una; + struct sackhole *cur = tp->snd_holes; + struct sackhole *prev = cur; + while (cur) + if (SEQ_LEQ(cur->end, lastack)) { + cur = cur->next; + free(prev, M_PCB); + prev = cur; + tp->snd_numholes--; + } else if (SEQ_LT(cur->start, lastack)) { + cur->start = lastack; + break; + } else + break; + tp->snd_holes = cur; + } +} + +/* + * Delete all receiver-side SACK information. + */ +void +tcp_clean_sackreport(tp) + struct tcpcb *tp; +{ + int i; + + tp->rcv_numsacks = 0; + for (i = 0; i < MAX_SACK_BLKS; i++) + tp->sackblks[i].start = tp->sackblks[i].end=0; + +} + +/* + * Checks for partial ack. If partial ack arrives, turn off retransmission + * timer, deflate the window, do not clear tp->t_dupacks, and return 1. + * If the ack advances at least to tp->snd_last, return 0. + */ +int +tcp_sack_partialack(tp, th) + struct tcpcb *tp; + struct tcphdr *th; +{ + if (SEQ_LT(th->th_ack, tp->snd_last)) { + /* Turn off retx. timer (will start again next segment) */ + tp->t_timer[TCPT_REXMT] = 0; + tp->t_rtt = 0; +#ifndef TCP_FACK + /* + * Partial window deflation. This statement relies on the + * fact that tp->snd_una has not been updated yet. In FACK + * hold snd_cwnd constant during fast recovery. + */ + tp->snd_cwnd -= (th->th_ack - tp->snd_una - tp->t_maxseg); +#endif + return 1; + } + return 0; +} +#endif /* TCP_SACK */ + +/* + * Pull out of band byte out of a segment so + * it doesn't appear in the user's data queue. + * It is still reflected in the segment length for + * sequencing purposes. + */ +void +tcp_pulloutofband(so, urgent, m, off) + struct socket *so; + u_int urgent; + register struct mbuf *m; + int off; +{ + int cnt = off + urgent - 1; + + while (cnt >= 0) { + if (m->m_len > cnt) { + char *cp = mtod(m, caddr_t) + cnt; + struct tcpcb *tp = sototcpcb(so); + + tp->t_iobc = *cp; + tp->t_oobflags |= TCPOOB_HAVEDATA; + bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1)); + m->m_len--; + return; + } + cnt -= m->m_len; + m = m->m_next; + if (m == 0) + break; + } + panic("tcp_pulloutofband"); +} + +/* + * Collect new round-trip time estimate + * and update averages and current timeout. + */ +void +tcp_xmit_timer(tp, rtt) + register struct tcpcb *tp; + short rtt; +{ + register short delta; + short rttmin; + + tcpstat.tcps_rttupdated++; + --rtt; + if (tp->t_srtt != 0) { + /* + * srtt is stored as fixed point with 3 bits after the + * binary point (i.e., scaled by 8). The following magic + * is equivalent to the smoothing algorithm in rfc793 with + * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed + * point). Adjust rtt to origin 0. + */ + delta = (rtt << 2) - (tp->t_srtt >> TCP_RTT_SHIFT); + if ((tp->t_srtt += delta) <= 0) + tp->t_srtt = 1; + /* + * We accumulate a smoothed rtt variance (actually, a + * smoothed mean difference), then set the retransmit + * timer to smoothed rtt + 4 times the smoothed variance. + * rttvar is stored as fixed point with 2 bits after the + * binary point (scaled by 4). The following is + * equivalent to rfc793 smoothing with an alpha of .75 + * (rttvar = rttvar*3/4 + |delta| / 4). This replaces + * rfc793's wired-in beta. + */ + if (delta < 0) + delta = -delta; + delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT); + if ((tp->t_rttvar += delta) <= 0) + tp->t_rttvar = 1; + } else { + /* + * No rtt measurement yet - use the unsmoothed rtt. + * Set the variance to half the rtt (so our first + * retransmit happens at 3*rtt). + */ + tp->t_srtt = rtt << (TCP_RTT_SHIFT + 2); + tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT + 2 - 1); + } + tp->t_rtt = 0; + tp->t_rxtshift = 0; + + /* + * the retransmit should happen at rtt + 4 * rttvar. + * Because of the way we do the smoothing, srtt and rttvar + * will each average +1/2 tick of bias. When we compute + * the retransmit timer, we want 1/2 tick of rounding and + * 1 extra tick because of +-1/2 tick uncertainty in the + * firing of the timer. The bias will give us exactly the + * 1.5 tick we need. But, because the bias is + * statistical, we have to test that we don't drop below + * the minimum feasible timer (which is 2 ticks). + */ + if (tp->t_rttmin > rtt + 2) + rttmin = tp->t_rttmin; + else + rttmin = rtt + 2; + TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), rttmin, TCPTV_REXMTMAX); + + /* + * We received an ack for a packet that wasn't retransmitted; + * it is probably safe to discard any error indications we've + * received recently. This isn't quite right, but close enough + * for now (a route might have failed after we sent a segment, + * and the return path might not be symmetrical). + */ + tp->t_softerror = 0; +} + +/* + * Determine a reasonable value for maxseg size. + * If the route is known, check route for mtu. + * If none, use an mss that can be handled on the outgoing + * interface without forcing IP to fragment; if bigger than + * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES + * to utilize large mbufs. If no route is found, route has no mtu, + * or the destination isn't local, use a default, hopefully conservative + * size (usually 512 or the default IP max size, but no more than the mtu + * of the interface), as we can't discover anything about intervening + * gateways or networks. We also initialize the congestion/slow start + * window to be a single segment if the destination isn't local. + * While looking at the routing entry, we also initialize other path-dependent + * parameters from pre-set or cached values in the routing entry. + * + * Also take into account the space needed for options that we + * send regularly. Make maxseg shorter by that amount to assure + * that we can send maxseg amount of data even when the options + * are present. Store the upper limit of the length of options plus + * data in maxopd. + */ +int +tcp_mss(tp, offer) + register struct tcpcb *tp; + u_int offer; +{ + struct route *ro; + register struct rtentry *rt; + struct ifnet *ifp; + register int rtt, mss; + u_long bufsize; + struct inpcb *inp; + struct socket *so; + + inp = tp->t_inpcb; + ro = &inp->inp_route; + so = inp->inp_socket; + + if ((rt = ro->ro_rt) == (struct rtentry *)0) { + /* No route yet, so try to acquire one */ +#ifdef INET6 + /* + * Get a new IPv6 route if an IPv6 destination, otherwise, get + * and IPv4 route (including those pesky IPv4-mapped addresses). + */ + bzero(ro,sizeof(struct route_in6)); + if (sotopf(so) == AF_INET6) { + if (IN6_IS_ADDR_V4MAPPED(&inp->inp_faddr6)) { + /* Get an IPv4 route. */ + ro->ro_dst.sa_family = AF_INET; + ro->ro_dst.sa_len = sizeof(ro->ro_dst); + ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = + inp->inp_faddr; + rtalloc(ro); + } else { + ro->ro_dst.sa_family = AF_INET6; + ro->ro_dst.sa_len = sizeof(struct sockaddr_in6); + ((struct sockaddr_in6 *) &ro->ro_dst)->sin6_addr = + inp->inp_faddr6; + rtalloc(ro); + } + } else +#endif /* INET6 */ + if (inp->inp_faddr.s_addr != INADDR_ANY) { + ro->ro_dst.sa_family = AF_INET; + ro->ro_dst.sa_len = sizeof(ro->ro_dst); + satosin(&ro->ro_dst)->sin_addr = inp->inp_faddr; + rtalloc(ro); + } + if ((rt = ro->ro_rt) == (struct rtentry *)0) { + tp->t_maxopd = tp->t_maxseg = tcp_mssdflt; + return (tcp_mssdflt); + } + } + ifp = rt->rt_ifp; + +#ifdef RTV_MTU /* if route characteristics exist ... */ + /* + * While we're here, check if there's an initial rtt + * or rttvar. Convert from the route-table units + * to scaled multiples of the slow timeout timer. + */ + if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) { + /* + * XXX the lock bit for MTU indicates that the value + * is also a minimum value; this is subject to time. + */ + if (rt->rt_rmx.rmx_locks & RTV_RTT) + TCPT_RANGESET(tp->t_rttmin, + rtt / (RTM_RTTUNIT / PR_SLOWHZ), + TCPTV_MIN, TCPTV_REXMTMAX); + tp->t_srtt = rtt / (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE)); + if (rt->rt_rmx.rmx_rttvar) + tp->t_rttvar = rt->rt_rmx.rmx_rttvar / + (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE)); + else + /* default variation is +- 1 rtt */ + tp->t_rttvar = + tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE; + TCPT_RANGESET(tp->t_rxtcur, + ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1, + tp->t_rttmin, TCPTV_REXMTMAX); + } + /* + * if there's an mtu associated with the route, use it + */ + if (rt->rt_rmx.rmx_mtu) +#ifdef INET6 + { + /* + * One may wish to lower MSS to take into account options, + * especially security-related options. + */ + if (tp->pf == AF_INET6) + mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpipv6hdr); + else +#endif /* INET6 */ + mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr); +#ifdef INET6 + } +#endif /* INET6 */ + else +#endif /* RTV_MTU */ + { + /* + * ifp may be null and rmx_mtu may be zero in certain + * v6 cases (e.g., if ND wasn't able to resolve the + * destination host. + */ + mss = ifp ? ifp->if_mtu - sizeof(struct tcpiphdr) : 0; +#ifdef INET6 + if (tp->pf == AF_INET) +#endif /* INET6 */ + if (!in_localaddr(inp->inp_faddr)) + mss = min(mss, tcp_mssdflt); + } + /* + * The current mss, t_maxseg, is initialized to the default value. + * If we compute a smaller value, reduce the current mss. + * If we compute a larger value, return it for use in sending + * a max seg size option, but don't store it for use + * unless we received an offer at least that large from peer. + * However, do not accept offers under 32 bytes. + */ + if (offer) + mss = min(mss, offer); + mss = max(mss, 64); /* sanity - at least max opt. space */ + /* + * maxopd stores the maximum length of data AND options + * in a segment; maxseg is the amount of data in a normal + * segment. We need to store this value (maxopd) apart + * from maxseg, because now every segment carries options + * and thus we normally have somewhat less data in segments. + */ + tp->t_maxopd = mss; + + if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && + (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP) + mss -= TCPOLEN_TSTAMP_APPA; + +#if (MCLBYTES & (MCLBYTES - 1)) == 0 + if (mss > MCLBYTES) + mss &= ~(MCLBYTES-1); +#else + if (mss > MCLBYTES) + mss = mss / MCLBYTES * MCLBYTES; +#endif + /* + * If there's a pipesize, change the socket buffer + * to that size. Make the socket buffers an integral + * number of mss units; if the mss is larger than + * the socket buffer, decrease the mss. + */ +#ifdef RTV_SPIPE + if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0) +#endif + bufsize = so->so_snd.sb_hiwat; + if (bufsize < mss) + mss = bufsize; + else { + bufsize = roundup(bufsize, mss); + if (bufsize > sb_max) + bufsize = sb_max; + (void)sbreserve(&so->so_snd, bufsize); + } + tp->t_maxseg = mss; + +#ifdef RTV_RPIPE + if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0) +#endif + bufsize = so->so_rcv.sb_hiwat; + if (bufsize > mss) { + bufsize = roundup(bufsize, mss); + if (bufsize > sb_max) + bufsize = sb_max; + (void)sbreserve(&so->so_rcv, bufsize); + } + tp->snd_cwnd = mss; + +#ifdef RTV_SSTHRESH + if (rt->rt_rmx.rmx_ssthresh) { + /* + * There's some sort of gateway or interface + * buffer limit on the path. Use this to set + * the slow start threshhold, but set the + * threshold to no less than 2*mss. + */ + tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh); + } +#endif /* RTV_MTU */ + return (mss); +} +#endif /* TUBA_INCLUDE */ + +#if defined(TCP_NEWRENO) || defined (TCP_SACK) +/* + * Checks for partial ack. If partial ack arrives, force the retransmission + * of the next unacknowledged segment, do not clear tp->t_dupacks, and return + * 1. By setting snd_nxt to ti_ack, this forces retransmission timer to + * be started again. If the ack advances at least to tp->snd_last, return 0. + */ +int +tcp_newreno(tp, th) + struct tcpcb *tp; + struct tcphdr *th; +{ + if (SEQ_LT(th->th_ack, tp->snd_last)) { + /* + * snd_una has not been updated and the socket send buffer + * not yet drained of the acked data, so we have to leave + * snd_una as it was to get the correct data offset in + * tcp_output(). + */ + tcp_seq onxt = tp->snd_nxt; + u_long ocwnd = tp->snd_cwnd; + tp->t_timer[TCPT_REXMT] = 0; + tp->t_rtt = 0; + tp->snd_nxt = th->th_ack; + /* + * Set snd_cwnd to one segment beyond acknowledged offset + * (tp->snd_una not yet updated when this function is called) + */ + tp->snd_cwnd = tp->t_maxseg + (th->th_ack - tp->snd_una); + (void) tcp_output(tp); + tp->snd_cwnd = ocwnd; + if (SEQ_GT(onxt, tp->snd_nxt)) + tp->snd_nxt = onxt; + /* + * Partial window deflation. Relies on fact that tp->snd_una + * not updated yet. + */ + tp->snd_cwnd -= (th->th_ack - tp->snd_una - tp->t_maxseg); + return 1; + } + return 0; +} +#endif /* TCP_NEWRENO || TCP_SACK */ diff --git a/ecos/packages/net/tcpip/current/src/sys/netinet/tcp_output.c b/ecos/packages/net/tcpip/current/src/sys/netinet/tcp_output.c new file mode 100644 index 0000000..adab0e1 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/netinet/tcp_output.c @@ -0,0 +1,1124 @@ +//========================================================================== +// +// sys/netinet/tcp_output.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: tcp_output.c,v 1.27 1999/12/15 16:37:20 provos Exp $ */ +/* $NetBSD: tcp_output.c,v 1.16 1997/06/03 16:17:09 kml Exp $ */ + +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_output.c 8.3 (Berkeley) 12/30/93 + */ + +/* +%%% portions-copyright-nrl-95 +Portions of this software are Copyright 1995-1998 by Randall Atkinson, +Ronald Lee, Daniel McDonald, Bao Phan, and Chris Winters. All Rights +Reserved. All rights under this copyright have been assigned to the US +Naval Research Laboratory (NRL). The NRL Copyright Notice and License +Agreement Version 1.1 (January 17, 1995) applies to these portions of the +software. +You should have received a copy of the license with this software. If you +didn't get a copy, you may request one from <license@ipv6.nrl.navy.mil>. +*/ + +#include <sys/param.h> +#ifndef __ECOS +#include <sys/systm.h> +#endif +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/protosw.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/errno.h> +#include <sys/domain.h> + +#include <net/route.h> + +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/in_pcb.h> +#include <netinet/ip_var.h> +#include <netinet/tcp.h> +#define TCPOUTFLAGS +#include <netinet/tcp_fsm.h> +#include <netinet/tcp_seq.h> +#include <netinet/tcp_timer.h> +#include <netinet/tcp_var.h> +#include <netinet/tcpip.h> +#include <netinet/tcp_debug.h> + +#ifdef TUBA +#include <netiso/iso.h> +#include <netiso/tuba_table.h> +#endif + +#ifdef INET6 +#include <netinet6/tcpipv6.h> +#endif /* INET6 */ + +#ifdef TCP_SIGNATURE +#include <sys/md5k.h> +#endif /* TCP_SIGNATURE */ + +#ifdef notyet +extern struct mbuf *m_copypack(); +#endif + +#ifdef TCP_SACK +extern int tcprexmtthresh; +#endif + +#ifdef TCP_SACK +#ifdef TCP_SACK_DEBUG +void +tcp_print_holes(tp) +struct tcpcb *tp; +{ + struct sackhole *p = tp->snd_holes; + if (p == 0) + return; + printf("Hole report: start--end dups rxmit\n"); + while (p) { + printf("%x--%x d %d r %x\n", p->start, p->end, p->dups, + p->rxmit); + p = p->next; + } + printf("\n"); +} +#endif /* TCP_SACK_DEBUG */ + +/* + * Returns pointer to a sackhole if there are any pending retransmissions; + * NULL otherwise. + */ +struct sackhole * +tcp_sack_output(tp) +register struct tcpcb *tp; +{ + struct sackhole *p; + if (tp->sack_disable) + return 0; + p = tp->snd_holes; + while (p) { + if (p->dups >= tcprexmtthresh && SEQ_LT(p->rxmit, p->end)) { + if (SEQ_LT(p->rxmit, tp->snd_una)) {/* old SACK hole */ + p = p->next; + continue; + } +#ifdef TCP_SACK_DEBUG + if (p) + tcp_print_holes(tp); +#endif + return p; + } + p = p->next; + } + return 0; +} + +/* + * After a timeout, the SACK list may be rebuilt. This SACK information + * should be used to avoid retransmitting SACKed data. This function + * traverses the SACK list to see if snd_nxt should be moved forward. + */ +void +tcp_sack_adjust(tp) + struct tcpcb *tp; +{ + int i; + + for (i = 0; i < tp->rcv_numsacks; i++) { + if (SEQ_LT(tp->snd_nxt, tp->sackblks[i].start)) + break; + if (SEQ_LEQ(tp->sackblks[i].end, tp->snd_nxt)) + continue; + if (tp->sackblks[i].start == 0 && tp->sackblks[i].end == 0) + continue; + /* snd_nxt must be in middle of block of SACKed data */ + tp->snd_nxt = tp->sackblks[i].end; + break; + } +} +#endif /* TCP_SACK */ + +/* + * Tcp output routine: figure out what should be sent and send it. + */ +int +tcp_output(tp) + register struct tcpcb *tp; +{ + register struct socket *so = tp->t_inpcb->inp_socket; + register long len, win; + int off, flags, error = 0; + register struct mbuf *m; + register struct tcphdr *th; + u_char opt[MAX_TCPOPTLEN]; + unsigned int optlen, hdrlen; + int idle, sendalot; +#ifdef TCP_SACK + int i, sack_rxmit = 0; + struct sackhole *p; +#endif +#if defined(TCP_SACK) || defined(TCP_NEWRENO) + int maxburst = TCP_MAXBURST; +#endif +#ifdef TCP_SIGNATURE + unsigned int sigoff; +#endif /* TCP_SIGNATURE */ + +#if defined(TCP_SACK) && defined(TCP_SIGNATURE) && defined(DIAGNOSTIC) + if (!tp->sack_disable && (tp->t_flags & TF_SIGNATURE)) + return (EINVAL); +#endif /* defined(TCP_SACK) && defined(TCP_SIGNATURE) && defined(DIAGNOSTIC) */ + + /* + * Determine length of data that should be transmitted, + * and flags that will be used. + * If there is some data or critical controls (SYN, RST) + * to send, then transmit; otherwise, investigate further. + */ + idle = (tp->snd_max == tp->snd_una); + if (idle && tp->t_idle >= tp->t_rxtcur) + /* + * We have been idle for "a while" and no acks are + * expected to clock out any data we send -- + * slow start to get ack "clock" running again. + */ + tp->snd_cwnd = tp->t_maxseg; +again: + sendalot = 0; +#ifdef TCP_SACK + /* + * If we've recently taken a timeout, snd_max will be greater than + * snd_nxt. There may be SACK information that allows us to avoid + * resending already delivered data. Adjust snd_nxt accordingly. + */ + if (!tp->sack_disable && SEQ_LT(tp->snd_nxt, tp->snd_max)) + tcp_sack_adjust(tp); +#endif + off = tp->snd_nxt - tp->snd_una; + win = ulmin(tp->snd_wnd, tp->snd_cwnd); + + flags = tcp_outflags[tp->t_state]; + /* + * If in persist timeout with window of 0, send 1 byte. + * Otherwise, if window is small but nonzero + * and timer expired, we will send what we can + * and go to transmit state. + */ + +#ifdef TCP_SACK + /* + * Send any SACK-generated retransmissions. If we're explicitly trying + * to send out new data (when sendalot is 1), bypass this function. + * If we retransmit in fast recovery mode, decrement snd_cwnd, since + * we're replacing a (future) new transmission with a retransmission + * now, and we previously incremented snd_cwnd in tcp_input(). + */ + if (!tp->sack_disable && !sendalot) { + if ((p = tcp_sack_output(tp))) { + off = p->rxmit - tp->snd_una; + sack_rxmit = 1; +#if 0 + /* Coalesce holes into a single retransmission */ +#endif + len = min(tp->t_maxseg, p->end - p->rxmit); +#ifndef TCP_FACK + /* in FACK, hold snd_cwnd constant during recovery */ + if (SEQ_LT(tp->snd_una, tp->snd_last)) + tp->snd_cwnd -= tp->t_maxseg; +#endif + } + } +#endif /* TCP_SACK */ + + if (tp->t_force) { + if (win == 0) { + /* + * If we still have some data to send, then + * clear the FIN bit. Usually this would + * happen below when it realizes that we + * aren't sending all the data. However, + * if we have exactly 1 byte of unset data, + * then it won't clear the FIN bit below, + * and if we are in persist state, we wind + * up sending the packet without recording + * that we sent the FIN bit. + * + * We can't just blindly clear the FIN bit, + * because if we don't have any more data + * to send then the probe will be the FIN + * itself. + */ + if (off < so->so_snd.sb_cc) + flags &= ~TH_FIN; + win = 1; + } else { + tp->t_timer[TCPT_PERSIST] = 0; + tp->t_rxtshift = 0; + } + } + +#ifdef TCP_SACK + if (!sack_rxmit) { +#endif + len = ulmin(so->so_snd.sb_cc, win) - off; + +#if defined(TCP_SACK) && defined(TCP_FACK) + /* + * If we're in fast recovery (SEQ_GT(tp->snd_last, tp->snd_una)), and + * amount of outstanding data (snd_awnd) is >= snd_cwnd, then + * do not send data (like zero window conditions) + */ + if (!tp->sack_disable && len && SEQ_GT(tp->snd_last, tp->snd_una) && + (tp->snd_awnd >= tp->snd_cwnd)) + len = 0; +#endif /* TCP_FACK */ +#ifdef TCP_SACK + } +#endif + + if (len < 0) { + /* + * If FIN has been sent but not acked, + * but we haven't been called to retransmit, + * len will be -1. Otherwise, window shrank + * after we sent into it. If window shrank to 0, + * cancel pending retransmit and pull snd_nxt + * back to (closed) window. We will enter persist + * state below. If the window didn't close completely, + * just wait for an ACK. + */ + len = 0; + if (win == 0) { + tp->t_timer[TCPT_REXMT] = 0; + tp->snd_nxt = tp->snd_una; + } + } + if (len > tp->t_maxseg) { + len = tp->t_maxseg; + sendalot = 1; + } + if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) + flags &= ~TH_FIN; + + win = sbspace(&so->so_rcv); + + /* + * Sender silly window avoidance. If connection is idle + * and can send all data, a maximum segment, + * at least a maximum default-size segment do it, + * or are forced, do it; otherwise don't bother. + * If peer's buffer is tiny, then send + * when window is at least half open. + * If retransmitting (possibly after persist timer forced us + * to send into a small window), then must resend. + */ + if (len) { + if (len == tp->t_maxseg) + goto send; + if ((idle || tp->t_flags & TF_NODELAY) && + len + off >= so->so_snd.sb_cc) + goto send; + if (tp->t_force) + goto send; + if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) + goto send; + if (SEQ_LT(tp->snd_nxt, tp->snd_max)) + goto send; +#ifdef TCP_SACK + if (sack_rxmit) + goto send; +#endif + } + + /* + * Compare available window to amount of window + * known to peer (as advertised window less + * next expected input). If the difference is at least two + * max size segments, or at least 50% of the maximum possible + * window, then want to send a window update to peer. + */ + if (win > 0) { + /* + * "adv" is the amount we can increase the window, + * taking into account that we are limited by + * TCP_MAXWIN << tp->rcv_scale. + */ + long adv = lmin(win, (long)TCP_MAXWIN << tp->rcv_scale) - + (tp->rcv_adv - tp->rcv_nxt); + + if (adv >= (long) (2 * tp->t_maxseg)) + goto send; + if (2 * adv >= (long) so->so_rcv.sb_hiwat) + goto send; + } + + /* + * Send if we owe peer an ACK. + */ + if (tp->t_flags & TF_ACKNOW) + goto send; + if (flags & (TH_SYN|TH_RST)) + goto send; + if (SEQ_GT(tp->snd_up, tp->snd_una)) + goto send; + /* + * If our state indicates that FIN should be sent + * and we have not yet done so, or we're retransmitting the FIN, + * then we need to send. + */ + if (flags & TH_FIN && + ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una)) + goto send; +#ifdef TCP_SACK + /* + * In SACK, it is possible for tcp_output to fail to send a segment + * after the retransmission timer has been turned off. Make sure + * that the retransmission timer is set. + */ + if (SEQ_GT(tp->snd_max, tp->snd_una) && + tp->t_timer[TCPT_REXMT] == 0 && + tp->t_timer[TCPT_PERSIST] == 0) { + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + return (0); + } +#endif /* TCP_SACK */ + + /* + * TCP window updates are not reliable, rather a polling protocol + * using ``persist'' packets is used to insure receipt of window + * updates. The three ``states'' for the output side are: + * idle not doing retransmits or persists + * persisting to move a small or zero window + * (re)transmitting and thereby not persisting + * + * tp->t_timer[TCPT_PERSIST] + * is set when we are in persist state. + * tp->t_force + * is set when we are called to send a persist packet. + * tp->t_timer[TCPT_REXMT] + * is set when we are retransmitting + * The output side is idle when both timers are zero. + * + * If send window is too small, there is data to transmit, and no + * retransmit or persist is pending, then go to persist state. + * If nothing happens soon, send when timer expires: + * if window is nonzero, transmit what we can, + * otherwise force out a byte. + */ + if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 && + tp->t_timer[TCPT_PERSIST] == 0) { + tp->t_rxtshift = 0; + tcp_setpersist(tp); + } + + /* + * No reason to send a segment, just return. + */ + return (0); + +send: + /* + * Before ESTABLISHED, force sending of initial options + * unless TCP set not to do any options. + * NOTE: we assume that the IP/TCP header plus TCP options + * always fit in a single mbuf, leaving room for a maximum + * link header, i.e. + * max_linkhdr + sizeof(network header) + sizeof(struct tcphdr + + * optlen <= MHLEN + */ + optlen = 0; + + switch (tp->pf) { + case 0: /*default to PF_INET*/ +#ifdef INET + case PF_INET: + hdrlen = sizeof(struct ip) + sizeof(struct tcphdr); + break; +#endif /* INET */ +#ifdef INET6 + case PF_INET6: + hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); + break; +#endif /* INET6 */ + default: + return (EPFNOSUPPORT); + } + + if (flags & TH_SYN) { + tp->snd_nxt = tp->iss; + if ((tp->t_flags & TF_NOOPT) == 0) { + u_int16_t mss; + + opt[0] = TCPOPT_MAXSEG; + opt[1] = 4; + mss = htons((u_int16_t) tcp_mss(tp, 0)); + bcopy((caddr_t)&mss, (caddr_t)(opt + 2), sizeof(mss)); + optlen = 4; +#ifdef TCP_SACK + /* + * If this is the first SYN of connection (not a SYN + * ACK), include SACK_PERMIT_HDR option. If this is a + * SYN ACK, include SACK_PERMIT_HDR option if peer has + * already done so. + */ + if (!tp->sack_disable && ((flags & TH_ACK) == 0 || + (tp->t_flags & TF_SACK_PERMIT))) { + *((u_int32_t *) (opt + optlen)) = + htonl(TCPOPT_SACK_PERMIT_HDR); + optlen += 4; + } +#endif + + if ((tp->t_flags & TF_REQ_SCALE) && + ((flags & TH_ACK) == 0 || + (tp->t_flags & TF_RCVD_SCALE))) { + *((u_int32_t *) (opt + optlen)) = htonl( + TCPOPT_NOP << 24 | + TCPOPT_WINDOW << 16 | + TCPOLEN_WINDOW << 8 | + tp->request_r_scale); + optlen += 4; + } + } + } + + /* + * Send a timestamp and echo-reply if this is a SYN and our side + * wants to use timestamps (TF_REQ_TSTMP is set) or both our side + * and our peer have sent timestamps in our SYN's. + */ + if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && + (flags & TH_RST) == 0 && + ((flags & (TH_SYN|TH_ACK)) == TH_SYN || + (tp->t_flags & TF_RCVD_TSTMP))) { + u_int32_t *lp = (u_int32_t *)(opt + optlen); + + /* Form timestamp option as shown in appendix A of RFC 1323. */ + *lp++ = htonl(TCPOPT_TSTAMP_HDR); + *lp++ = htonl(tcp_now); + *lp = htonl(tp->ts_recent); + optlen += TCPOLEN_TSTAMP_APPA; + } + +#ifdef TCP_SIGNATURE + if (tp->t_flags & TF_SIGNATURE) { + u_int8_t *bp = (u_int8_t *)(opt + optlen); + + /* Send signature option */ + *(bp++) = TCPOPT_SIGNATURE; + *(bp++) = TCPOLEN_SIGNATURE; + sigoff = optlen + 2; + + { + unsigned int i; + + for (i = 0; i < 16; i++) + *(bp++) = 0; + } + + optlen += TCPOLEN_SIGNATURE; + + /* Pad options list to the next 32 bit boundary and + * terminate it. + */ + *bp++ = TCPOPT_NOP; + *bp++ = TCPOPT_EOL; + optlen += 2; + } +#endif /* TCP_SIGNATURE */ + +#ifdef TCP_SACK + /* + * Send SACKs if necessary. This should be the last option processed. + * Only as many SACKs are sent as are permitted by the maximum options + * size. No more than three SACKs are sent. + */ + if (!tp->sack_disable && tp->t_state == TCPS_ESTABLISHED && + (tp->t_flags & (TF_SACK_PERMIT|TF_NOOPT)) == TF_SACK_PERMIT && + tp->rcv_numsacks) { + u_int32_t *lp = (u_int32_t *)(opt + optlen); + u_int32_t *olp = lp++; + int count = 0; /* actual number of SACKs inserted */ + int maxsack = (MAX_TCPOPTLEN - (optlen + 4))/TCPOLEN_SACK; + + maxsack = min(maxsack, TCP_MAX_SACK); + for (i = 0; (i < tp->rcv_numsacks && count < maxsack); i++) { + struct sackblk sack = tp->sackblks[i]; + if (sack.start == 0 && sack.end == 0) + continue; + *lp++ = htonl(sack.start); + *lp++ = htonl(sack.end); + count++; + } + *olp = htonl(TCPOPT_SACK_HDR|(TCPOLEN_SACK*count+2)); + optlen += TCPOLEN_SACK*count + 4; /* including leading NOPs */ + } +#endif /* TCP_SACK */ + +#ifdef DIAGNOSTIC + if (optlen > MAX_TCPOPTLEN) + panic("tcp_output: options too long"); +#endif /* DIAGNOSTIC */ + + hdrlen += optlen; + + /* + * Adjust data length if insertion of options will + * bump the packet length beyond the t_maxopd length. + */ + if (len > tp->t_maxopd - optlen) { + len = tp->t_maxopd - optlen; + sendalot = 1; + flags &= ~TH_FIN; + } + +#ifdef DIAGNOSTIC + if (max_linkhdr + hdrlen > MCLBYTES) + panic("tcphdr too big"); +#endif + + /* + * Grab a header mbuf, attaching a copy of data to + * be transmitted, and initialize the header from + * the template for sends on this connection. + */ + if (len) { + if (tp->t_force && len == 1) + tcpstat.tcps_sndprobe++; + else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) { + tcpstat.tcps_sndrexmitpack++; + tcpstat.tcps_sndrexmitbyte += len; + } else { + tcpstat.tcps_sndpack++; + tcpstat.tcps_sndbyte += len; + } +#ifdef notyet + if ((m = m_copypack(so->so_snd.sb_mb, off, + (int)len, max_linkhdr + hdrlen)) == 0) { + error = ENOBUFS; + goto out; + } + /* + * m_copypack left space for our hdr; use it. + */ + m->m_len += hdrlen; + m->m_data -= hdrlen; +#else + MGETHDR(m, M_DONTWAIT, MT_HEADER); + if (m != NULL) { + MCLGET(m, M_DONTWAIT); + if ((m->m_flags & M_EXT) == 0) { + m_freem(m); + m = NULL; + } + } + if (m == NULL) { + error = ENOBUFS; + goto out; + } + m->m_data += max_linkhdr; + m->m_len = hdrlen; + if (len <= MCLBYTES - hdrlen - max_linkhdr) { + m_copydata(so->so_snd.sb_mb, off, (int) len, + mtod(m, caddr_t) + hdrlen); + m->m_len += len; + } else { + m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len); + if (m->m_next == 0) { + (void) m_free(m); + error = ENOBUFS; + goto out; + } + } +#endif + /* + * If we're sending everything we've got, set PUSH. + * (This will keep happy those implementations which only + * give data to the user when a buffer fills or + * a PUSH comes in.) + */ + if (off + len == so->so_snd.sb_cc) + flags |= TH_PUSH; + } else { + if (tp->t_flags & TF_ACKNOW) + tcpstat.tcps_sndacks++; + else if (flags & (TH_SYN|TH_FIN|TH_RST)) + tcpstat.tcps_sndctrl++; + else if (SEQ_GT(tp->snd_up, tp->snd_una)) + tcpstat.tcps_sndurg++; + else + tcpstat.tcps_sndwinup++; + + MGETHDR(m, M_DONTWAIT, MT_HEADER); + if (m != NULL) { + MCLGET(m, M_DONTWAIT); + if ((m->m_flags & M_EXT) == 0) { + m_freem(m); + m = NULL; + } + } + if (m == NULL) { + error = ENOBUFS; + goto out; + } + m->m_data += max_linkhdr; + m->m_len = hdrlen; + } + m->m_pkthdr.rcvif = (struct ifnet *)0; + + if (!tp->t_template) + panic("tcp_output"); +#ifdef DIAGNOSTIC + if (tp->t_template->m_len != hdrlen - optlen) + panic("tcp_output: template len != hdrlen - optlen"); +#endif /* DIAGNOSTIC */ + bcopy(mtod(tp->t_template, caddr_t), mtod(m, caddr_t), + tp->t_template->m_len); + th = (struct tcphdr *)(mtod(m, caddr_t) + tp->t_template->m_len - + sizeof(struct tcphdr)); + + /* + * Fill in fields, remembering maximum advertised + * window for use in delaying messages about window sizes. + * If resending a FIN, be sure not to use a new sequence number. + */ + if ((flags & TH_FIN) && (tp->t_flags & TF_SENTFIN) && + (tp->snd_nxt == tp->snd_max)) + tp->snd_nxt--; + /* + * If we are doing retransmissions, then snd_nxt will + * not reflect the first unsent octet. For ACK only + * packets, we do not want the sequence number of the + * retransmitted packet, we want the sequence number + * of the next unsent octet. So, if there is no data + * (and no SYN or FIN), use snd_max instead of snd_nxt + * when filling in ti_seq. But if we are in persist + * state, snd_max might reflect one byte beyond the + * right edge of the window, so use snd_nxt in that + * case, since we know we aren't doing a retransmission. + * (retransmit and persist are mutually exclusive...) + */ + if (len || (flags & (TH_SYN|TH_FIN)) || tp->t_timer[TCPT_PERSIST]) + th->th_seq = htonl(tp->snd_nxt); + else + th->th_seq = htonl(tp->snd_max); + +#ifdef TCP_SACK + if (sack_rxmit) { + /* + * If sendalot was turned on (due to option stuffing), turn it + * off. Properly set th_seq field. Advance the ret'x pointer + * by len. + */ + if (sendalot) + sendalot = 0; + th->th_seq = htonl(p->rxmit); + p->rxmit += len; +#if defined(TCP_SACK) && defined(TCP_FACK) + tp->retran_data += len; +#endif /* TCP_FACK */ + } +#endif /* TCP_SACK */ + + th->th_ack = htonl(tp->rcv_nxt); + if (optlen) { + bcopy((caddr_t)opt, (caddr_t)(th + 1), optlen); + th->th_off = (sizeof (struct tcphdr) + optlen) >> 2; + } + th->th_flags = flags; + + /* + * Calculate receive window. Don't shrink window, + * but avoid silly window syndrome. + */ + if (win < (long)(so->so_rcv.sb_hiwat / 4) && win < (long)tp->t_maxseg) + win = 0; + if (win > (long)TCP_MAXWIN << tp->rcv_scale) + win = (long)TCP_MAXWIN << tp->rcv_scale; + if (win < (long)(tp->rcv_adv - tp->rcv_nxt)) + win = (long)(tp->rcv_adv - tp->rcv_nxt); + if (flags & TH_RST) + win = 0; + th->th_win = htons((u_int16_t) (win>>tp->rcv_scale)); + if (SEQ_GT(tp->snd_up, tp->snd_nxt)) { + u_int32_t urp = tp->snd_up - tp->snd_nxt; + if (urp > IP_MAXPACKET) + urp = IP_MAXPACKET; + th->th_urp = htons((u_int16_t)urp); + th->th_flags |= TH_URG; + } else + /* + * If no urgent pointer to send, then we pull + * the urgent pointer to the left edge of the send window + * so that it doesn't drift into the send window on sequence + * number wraparound. + */ + tp->snd_up = tp->snd_una; /* drag it along */ + + /* Put TCP length in pseudo-header */ + switch (tp->pf) { + case 0: /*default to PF_INET*/ +#ifdef INET + case AF_INET: + if (len + optlen) + mtod(m, struct ipovly *)->ih_len = htons((u_int16_t)( + sizeof (struct tcphdr) + optlen + len)); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + break; +#endif /* INET6 */ + } + +#ifdef TCP_SIGNATURE + if (tp->t_flags & TF_SIGNATURE) { + MD5_CTX ctx; + union sockaddr_union sa; + struct tdb *tdb; + + memset(&sa, 0, sizeof(union sockaddr_union)); + + switch (tp->pf) { + case 0: /*default to PF_INET*/ +#ifdef INET + case AF_INET: + sa.sa.sa_len = sizeof(struct sockaddr_in); + sa.sa.sa_family = AF_INET; + sa.sin.sin_addr = mtod(m, struct ip *)->ip_dst; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + sa.sa.sa_len = sizeof(struct sockaddr_in6); + sa.sa.sa_family = AF_INET6; + sa.sin6.sin6_addr = mtod(m, struct ip6_hdr *)->ip6_dst; + break; +#endif /* INET6 */ + } + + /* XXX gettdb() should really be called at spltdb(). */ + /* XXX this is splsoftnet(), currently they are the same. */ + tdb = gettdb(0, &sa, IPPROTO_TCP); + if (tdb == NULL) + return (EPERM); + + MD5Init(&ctx); + + switch (tp->pf) { + case 0: /*default to PF_INET*/ +#ifdef INET + case AF_INET: + { + struct ippseudo ippseudo; + struct ipovly *ipovly; + + ipovly = mtod(m, struct ipovly *); + + ippseudo.ippseudo_src = ipovly->ih_src; + ippseudo.ippseudo_dst = ipovly->ih_dst; + ippseudo.ippseudo_pad = 0; + ippseudo.ippseudo_p = IPPROTO_TCP; + ippseudo.ippseudo_len = ipovly->ih_len; + MD5Update(&ctx, (char *)&ippseudo, + sizeof(struct ippseudo)); + MD5Update(&ctx, mtod(m, caddr_t) + + sizeof(struct ip), + sizeof(struct tcphdr)); + } + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + { + static int printed = 0; + + if (!printed) { + printf("error: TCP MD5 support for " + "IPv6 not yet implemented.\n"); + printed = 1; + } + } + break; +#endif /* INET6 */ + } + + if (len && m_apply(m, hdrlen, len, tcp_signature_apply, + (caddr_t)&ctx)) + return (EINVAL); + + MD5Update(&ctx, tdb->tdb_amxkey, tdb->tdb_amxkeylen); + MD5Final(mtod(m, caddr_t) + hdrlen - optlen + sigoff, &ctx); + } +#endif /* TCP_SIGNATURE */ + + /* + * Put TCP length in extended header, and then + * checksum extended header and data. + */ + switch (tp->pf) { + case 0: /*default to PF_INET*/ +#ifdef INET + case AF_INET: + th->th_sum = in_cksum(m, (int)(hdrlen + len)); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + m->m_pkthdr.len = hdrlen + len; + th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr), + hdrlen - sizeof(struct ip6_hdr) + len); + break; +#endif /* INET6 */ + } + + /* + * In transmit state, time the transmission and arrange for + * the retransmit. In persist state, just set snd_max. + */ + if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) { + tcp_seq startseq = tp->snd_nxt; + + /* + * Advance snd_nxt over sequence space of this segment. + */ + if (flags & (TH_SYN|TH_FIN)) { + if (flags & TH_SYN) + tp->snd_nxt++; + if (flags & TH_FIN) { + tp->snd_nxt++; + tp->t_flags |= TF_SENTFIN; + } + } +#ifdef TCP_SACK + if (!tp->sack_disable) { + if (sack_rxmit && (p->rxmit != tp->snd_nxt)) { + goto timer; + } + } +#endif + tp->snd_nxt += len; + if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { + tp->snd_max = tp->snd_nxt; + /* + * Time this transmission if not a retransmission and + * not currently timing anything. + */ + if (tp->t_rtt == 0) { + tp->t_rtt = 1; + tp->t_rtseq = startseq; + tcpstat.tcps_segstimed++; + } + } + + /* + * Set retransmit timer if not currently set, + * and not doing an ack or a keep-alive probe. + * Initial value for retransmit timer is smoothed + * round-trip time + 2 * round-trip time variance. + * Initialize shift counter which is used for backoff + * of retransmit time. + */ +#ifdef TCP_SACK + timer: + if (!tp->sack_disable && sack_rxmit && + tp->t_timer[TCPT_REXMT] == 0 && + tp->snd_nxt != tp->snd_max) { + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + if (tp->t_timer[TCPT_PERSIST]) { + tp->t_timer[TCPT_PERSIST] = 0; + tp->t_rxtshift = 0; + } + } +#endif + + if (tp->t_timer[TCPT_REXMT] == 0 && + tp->snd_nxt != tp->snd_una) { + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + if (tp->t_timer[TCPT_PERSIST]) { + tp->t_timer[TCPT_PERSIST] = 0; + tp->t_rxtshift = 0; + } + } + } else + if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) + tp->snd_max = tp->snd_nxt + len; + + /* + * Trace. + */ +#ifdef TCPDEBUG + if (so->so_options & SO_DEBUG) + tcp_trace(TA_OUTPUT, tp->t_state, tp, mtod(m, caddr_t), 0, + len); +#endif /* TCPDEBUG */ + + /* + * Fill in IP length and desired time to live and + * send to IP level. There should be a better way + * to handle ttl and tos; we could keep them in + * the template, but need a way to checksum without them. + */ + m->m_pkthdr.len = hdrlen + len; + + switch (tp->pf) { + case 0: /*default to PF_INET*/ +#ifdef INET + case AF_INET: + { + struct ip *ip; + + ip = mtod(m, struct ip *); + ip->ip_len = m->m_pkthdr.len; + ip->ip_ttl = tp->t_inpcb->inp_ip.ip_ttl; + ip->ip_tos = tp->t_inpcb->inp_ip.ip_tos; + } + error = ip_output(m, tp->t_inpcb->inp_options, + &tp->t_inpcb->inp_route, so->so_options & SO_DONTROUTE, + 0, tp->t_inpcb); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + { + struct ip6_hdr *ipv6; + + ipv6 = mtod(m, struct ip6_hdr *); + ipv6->ip6_plen = m->m_pkthdr.len - + sizeof(struct ip6_hdr); + ipv6->ip6_nxt = IPPROTO_TCP; + ipv6->ip6_hlim = in6_selecthlim(tp->t_inpcb, NULL); + } + error = ip6_output(m, tp->t_inpcb->inp_outputopts6, + &tp->t_inpcb->inp_route6, + (so->so_options & SO_DONTROUTE), NULL, NULL); + break; +#endif /* INET6 */ +#ifdef TUBA + case AF_ISO: + if (tp->t_tuba_pcb) + error = tuba_output(m, tp); + break; +#endif /* TUBA */ + } + +#if defined(TCP_SACK) && defined(TCP_FACK) + /* Update snd_awnd to reflect the new data that was sent. */ + tp->snd_awnd = tcp_seq_subtract(tp->snd_max, tp->snd_fack) + + tp->retran_data; +#endif /* defined(TCP_SACK) && defined(TCP_FACK) */ + + if (error) { +out: + if (error == ENOBUFS) { + tcp_quench(tp->t_inpcb, 0); + return (0); + } + if ((error == EHOSTUNREACH || error == ENETDOWN) + && TCPS_HAVERCVDSYN(tp->t_state)) { + tp->t_softerror = error; + return (0); + } + return (error); + } + tcpstat.tcps_sndtotal++; + + /* + * Data sent (as far as we can tell). + * If this advertises a larger window than any other segment, + * then remember the size of the advertised window. + * Any pending ACK has now been sent. + */ + if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv)) + tp->rcv_adv = tp->rcv_nxt + win; + tp->last_ack_sent = tp->rcv_nxt; + tp->t_flags &= ~(TF_ACKNOW|TF_DELACK); +#if defined(TCP_SACK) || defined(TCP_NEWRENO) + if (sendalot && --maxburst) +#else + if (sendalot) +#endif + goto again; + return (0); +} + +void +tcp_setpersist(tp) + register struct tcpcb *tp; +{ + register int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; + + if (tp->t_timer[TCPT_REXMT]) + panic("tcp_output REXMT"); + /* + * Start/restart persistance timer. + */ + if (t < tp->t_rttmin) + t = tp->t_rttmin; + TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], + t * tcp_backoff[tp->t_rxtshift], + TCPTV_PERSMIN, TCPTV_PERSMAX); + if (tp->t_rxtshift < TCP_MAXRXTSHIFT) + tp->t_rxtshift++; +} diff --git a/ecos/packages/net/tcpip/current/src/sys/netinet/tcp_subr.c b/ecos/packages/net/tcpip/current/src/sys/netinet/tcp_subr.c new file mode 100644 index 0000000..24937e1 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/netinet/tcp_subr.c @@ -0,0 +1,899 @@ +//========================================================================== +// +// sys/netinet/tcp_subr.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: tcp_subr.c,v 1.21 1999/12/08 06:50:20 itojun Exp $ */ +/* $NetBSD: tcp_subr.c,v 1.22 1996/02/13 23:44:00 christos Exp $ */ + +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_subr.c 8.1 (Berkeley) 6/10/93 + */ + +/* +%%% portions-copyright-nrl-95 +Portions of this software are Copyright 1995-1998 by Randall Atkinson, +Ronald Lee, Daniel McDonald, Bao Phan, and Chris Winters. All Rights +Reserved. All rights under this copyright have been assigned to the US +Naval Research Laboratory (NRL). The NRL Copyright Notice and License +Agreement Version 1.1 (January 17, 1995) applies to these portions of the +software. +You should have received a copy of the license with this software. If you +didn't get a copy, you may request one from <license@ipv6.nrl.navy.mil>. +*/ + +#include <sys/param.h> +#ifndef __ECOS +#include <sys/proc.h> +#include <sys/systm.h> +#endif +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/protosw.h> +#include <sys/errno.h> +#ifdef __ECOS +#undef errno +#endif + +#include <net/route.h> +#include <net/if.h> + +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/in_pcb.h> +#include <netinet/ip_var.h> +#include <netinet/ip_icmp.h> +#include <netinet/tcp.h> +#include <netinet/tcp_fsm.h> +#include <netinet/tcp_seq.h> +#include <netinet/tcp_timer.h> +#include <netinet/tcp_var.h> +#include <netinet/tcpip.h> +#ifndef __ECOS +#include <dev/rndvar.h> +#endif + +#ifdef INET6 +#include <netinet6/ip6_var.h> +#include <netinet6/tcpipv6.h> +#include <sys/domain.h> +#endif /* INET6 */ + +#ifdef TCP_SIGNATURE +#include <sys/md5k.h> +#endif /* TCP_SIGNATURE */ + +/* patchable/settable parameters for tcp */ +int tcp_mssdflt = TCP_MSS; +int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ; + +/* + * Configure kernel with options "TCP_DO_RFC1323=0" to disable RFC1323 stuff. + * This is a good idea over slow SLIP/PPP links, because the timestamp + * pretty well destroys the VJ compression (any packet with a timestamp + * different from the previous one can't be compressed), as well as adding + * more overhead. + * XXX And it should be a settable per route characteristic (with this just + * used as the default). + */ +#ifndef TCP_DO_RFC1323 +#define TCP_DO_RFC1323 1 +#endif +int tcp_do_rfc1323 = TCP_DO_RFC1323; + +#ifndef TCP_DO_SACK +#ifdef TCP_SACK +#define TCP_DO_SACK 0 /* XXX - make this 1 when SACK is fixed */ +#else +#define TCP_DO_SACK 0 +#endif +#endif +int tcp_do_sack = TCP_DO_SACK; /* RFC 2018 selective ACKs */ + +#ifndef TCBHASHSIZE +#define TCBHASHSIZE 128 +#endif +int tcbhashsize = TCBHASHSIZE; + +#ifdef INET6 +extern int ip6_defhlim; +#endif /* INET6 */ + +/* + * Tcp initialization + */ +void +tcp_init() +{ +#ifdef TCP_COMPAT_42 + tcp_iss = 1; /* wrong */ +#else /* TCP_COMPAT_42 */ + tcp_iss = arc4random() + 1; +#endif /* !TCP_COMPAT_42 */ + in_pcbinit(&tcbtable, tcbhashsize); + +#ifdef INET6 + /* + * Since sizeof(struct ip6_hdr) > sizeof(struct ip), we + * do max length checks/computations only on the former. + */ + if (max_protohdr < (sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) + max_protohdr = (sizeof(struct ip6_hdr) + sizeof(struct tcphdr)); + if ((max_linkhdr + sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) > + MHLEN) + panic("tcp_init"); +#endif /* INET6 */ +} + +/* + * Create template to be used to send tcp packets on a connection. + * Call after host entry created, allocates an mbuf and fills + * in a skeletal tcp/ip header, minimizing the amount of work + * necessary when the connection is used. + * + * To support IPv6 in addition to IPv4 and considering that the sizes of + * the IPv4 and IPv6 headers are not the same, we now use a separate pointer + * for the TCP header. Also, we made the former tcpiphdr header pointer + * into just an IP overlay pointer, with casting as appropriate for v6. rja + */ +struct mbuf * +tcp_template(tp) + struct tcpcb *tp; +{ + register struct inpcb *inp = tp->t_inpcb; + register struct mbuf *m; + register struct tcphdr *th = (struct tcphdr *)0; + + if ((m = tp->t_template) == 0) { + m = m_get(M_DONTWAIT, MT_HEADER); + if (m == NULL) + return (0); + + switch (tp->pf) { + case 0: /*default to PF_INET*/ +#ifdef INET + case AF_INET: + m->m_len = sizeof(struct ip); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + m->m_len = sizeof(struct ip6_hdr); + break; +#endif /* INET6 */ + } + m->m_len += sizeof (struct tcphdr); + + /* + * The link header, network header, TCP header, and TCP options + * all must fit in this mbuf. For now, assume the worst case of + * TCP options size. Eventually, compute this from tp flags. + */ + if (m->m_len + MAX_TCPOPTLEN + max_linkhdr >= MHLEN) { + MCLGET(m, M_DONTWAIT); + if ((m->m_flags & M_EXT) == 0) { + m_free(m); + return (0); + } + } + } + + switch(tp->pf) { +#ifdef INET + case AF_INET: + { + struct ipovly *ipovly; + + ipovly = mtod(m, struct ipovly *); + + bzero(ipovly->ih_x1, sizeof ipovly->ih_x1); + ipovly->ih_pr = IPPROTO_TCP; + ipovly->ih_len = htons(sizeof (struct tcpiphdr) - + sizeof (struct ip)); + ipovly->ih_src = inp->inp_laddr; + ipovly->ih_dst = inp->inp_faddr; + + th = (struct tcphdr *)(mtod(m, caddr_t) + + sizeof(struct ip)); + } + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + { + struct ip6_hdr *ipv6; + + ipv6 = mtod(m, struct ip6_hdr *); + + ipv6->ip6_src = inp->inp_laddr6; + ipv6->ip6_dst = inp->inp_faddr6; + ipv6->ip6_flow = htonl(0x60000000) | + (inp->inp_ipv6.ip6_flow & htonl(0x0fffffff)); + + + ipv6->ip6_nxt = IPPROTO_TCP; + ipv6->ip6_plen = htons(sizeof(struct tcphdr)); /*XXX*/ + ipv6->ip6_hlim = in6_selecthlim(inp, NULL); /*XXX*/ + + th = (struct tcphdr *)(mtod(m, caddr_t) + + sizeof(struct ip6_hdr)); + } + break; +#endif /* INET6 */ + } + + th->th_sport = inp->inp_lport; + th->th_dport = inp->inp_fport; + th->th_seq = 0; + th->th_ack = 0; + th->th_x2 = 0; + th->th_off = 5; + th->th_flags = 0; + th->th_win = 0; + th->th_sum = 0; + th->th_urp = 0; + return (m); +} + +/* + * Send a single message to the TCP at address specified by + * the given TCP/IP header. If m == 0, then we make a copy + * of the tcpiphdr at ti and send directly to the addressed host. + * This is used to force keep alive messages out using the TCP + * template for a connection tp->t_template. If flags are given + * then we send a message back to the TCP which originated the + * segment ti, and discard the mbuf containing it and any other + * attached mbufs. + * + * In any case the ack and sequence number of the transmitted + * segment are as specified by the parameters. + */ +#ifdef INET6 +/* This function looks hairy, because it was so IPv4-dependent. */ +#endif /* INET6 */ +void +tcp_respond(tp, template, m, ack, seq, flags) + struct tcpcb *tp; + caddr_t template; + register struct mbuf *m; + tcp_seq ack, seq; + int flags; +{ + register int tlen; + int win = 0; + struct route *ro = 0; + register struct tcphdr *th; + register struct tcpiphdr *ti = (struct tcpiphdr *)template; +#ifdef INET6 + int is_ipv6 = 0; /* true iff IPv6 */ +#endif /* INET6 */ + + if (tp) { + win = sbspace(&tp->t_inpcb->inp_socket->so_rcv); +#ifdef INET6 + /* + * If this is called with an unconnected + * socket/tp/pcb (tp->pf is 0), we lose. + */ + is_ipv6 = (tp->pf == PF_INET6); + + /* + * The route/route6 distinction is meaningless + * unless you're allocating space or passing parameters. + */ +#endif /* INET6 */ + ro = &tp->t_inpcb->inp_route; + } +#ifdef INET6 + else + is_ipv6 = (((struct ip *)ti)->ip_v == 6); +#endif /* INET6 */ + if (m == 0) { + m = m_gethdr(M_DONTWAIT, MT_HEADER); + if (m == NULL) + return; +#ifdef TCP_COMPAT_42 + tlen = 1; +#else + tlen = 0; +#endif + m->m_data += max_linkhdr; +#ifdef INET6 + if (is_ipv6) + bcopy(ti, mtod(m, caddr_t), sizeof(struct tcphdr) + + sizeof(struct ip6_hdr)); + else +#endif /* INET6 */ + bcopy(ti, mtod(m, caddr_t), sizeof(struct tcphdr) + + sizeof(struct ip)); + + ti = mtod(m, struct tcpiphdr *); + flags = TH_ACK; + } else { + m_freem(m->m_next); + m->m_next = 0; + m->m_data = (caddr_t)ti; + tlen = 0; +#define xchg(a,b,type) { type t; t=a; a=b; b=t; } +#ifdef INET6 + if (is_ipv6) { + m->m_len = sizeof(struct tcphdr) + sizeof(struct ip6_hdr); + xchg(((struct ip6_hdr *)ti)->ip6_dst,\ + ((struct ip6_hdr *)ti)->ip6_src,\ + struct in6_addr); + th = (void *)ti + sizeof(struct ip6_hdr); + } else +#endif /* INET6 */ + { + m->m_len = sizeof (struct tcpiphdr); + xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_int32_t); + th = (void *)((caddr_t)ti + sizeof(struct ip)); + } + xchg(th->th_dport, th->th_sport, u_int16_t); +#undef xchg + } +#ifdef INET6 + if (is_ipv6) { + tlen += sizeof(struct tcphdr) + sizeof(struct ip6_hdr); + th = (struct tcphdr *)((caddr_t)ti + sizeof(struct ip6_hdr)); + } else +#endif /* INET6 */ + { + ti->ti_len = htons((u_int16_t)(sizeof (struct tcphdr) + tlen)); + tlen += sizeof (struct tcpiphdr); + th = (struct tcphdr *)((caddr_t)ti + sizeof(struct ip)); + } + + m->m_len = tlen; + m->m_pkthdr.len = tlen; + m->m_pkthdr.rcvif = (struct ifnet *) 0; + th->th_seq = htonl(seq); + th->th_ack = htonl(ack); + th->th_x2 = 0; + th->th_off = sizeof (struct tcphdr) >> 2; + th->th_flags = flags; + if (tp) + th->th_win = htons((u_int16_t) (win >> tp->rcv_scale)); + else + th->th_win = htons((u_int16_t)win); + th->th_urp = 0; + +#ifdef INET6 + if (is_ipv6) { + ((struct ip6_hdr *)ti)->ip6_flow = htonl(0x60000000); + ((struct ip6_hdr *)ti)->ip6_nxt = IPPROTO_TCP; + ((struct ip6_hdr *)ti)->ip6_hlim = + in6_selecthlim(tp ? tp->t_inpcb : NULL, NULL); /*XXX*/ + ((struct ip6_hdr *)ti)->ip6_plen = tlen - sizeof(struct ip6_hdr); + th->th_sum = 0; + th->th_sum = in6_cksum(m, IPPROTO_TCP, + sizeof(struct ip6_hdr), ((struct ip6_hdr *)ti)->ip6_plen); + HTONS(((struct ip6_hdr *)ti)->ip6_plen); + ip6_output(m, tp ? tp->t_inpcb->inp_outputopts6 : NULL, + (struct route_in6 *)ro, 0, NULL, NULL); + } else +#endif /* INET6 */ + { + bzero(ti->ti_x1, sizeof ti->ti_x1); + ti->ti_len = htons((u_short)tlen - sizeof(struct ip)); + th->th_sum = in_cksum(m, tlen); + ((struct ip *)ti)->ip_len = tlen; + ((struct ip *)ti)->ip_ttl = ip_defttl; + ip_output(m, NULL, ro, 0, NULL, tp ? tp->t_inpcb : NULL); + } +} + +/* + * Create a new TCP control block, making an + * empty reassembly queue and hooking it to the argument + * protocol control block. + */ +struct tcpcb * +tcp_newtcpcb(inp) + struct inpcb *inp; +{ + register struct tcpcb *tp; + + tp = malloc(sizeof(*tp), M_PCB, M_NOWAIT); + if (tp == NULL) + return ((struct tcpcb *)0); + bzero((char *) tp, sizeof(struct tcpcb)); + LIST_INIT(&tp->segq); + tp->t_maxseg = tp->t_maxopd = tcp_mssdflt; + +#ifdef TCP_SACK + tp->sack_disable = tcp_do_sack ? 0 : 1; +#endif + tp->t_flags = tcp_do_rfc1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0; + tp->t_inpcb = inp; + /* + * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no + * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives + * reasonable initial retransmit time. + */ + tp->t_srtt = TCPTV_SRTTBASE; + tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << (TCP_RTTVAR_SHIFT + 2 - 1); + tp->t_rttmin = TCPTV_MIN; + TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), + TCPTV_MIN, TCPTV_REXMTMAX); + tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; + tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; +#ifdef INET6 + /* + * If we want to use tp->pf for a quick-n-easy way to determine + * the outbound dgram type, we cannot make this decision + * until a connection is established! Bzero() sets pf to zero, and + * that's the way we want it, unless, of course, it's an AF_INET + * socket... + */ + if ((inp->inp_flags & INP_IPV6) == 0) + tp->pf = PF_INET; /* If AF_INET socket, we can't do v6 from it. */ +#else + tp->pf = PF_INET; +#endif + +#ifdef INET6 + if (inp->inp_flags & INP_IPV6) + inp->inp_ipv6.ip6_hlim = ip6_defhlim; + else +#endif /* INET6 */ + inp->inp_ip.ip_ttl = ip_defttl; + + inp->inp_ppcb = (caddr_t)tp; + return (tp); +} + +/* + * Drop a TCP connection, reporting + * the specified error. If connection is synchronized, + * then send a RST to peer. + */ +struct tcpcb * +tcp_drop(tp, errno) + register struct tcpcb *tp; + int errno; +{ + struct socket *so = tp->t_inpcb->inp_socket; + + if (TCPS_HAVERCVDSYN(tp->t_state)) { + tp->t_state = TCPS_CLOSED; + (void) tcp_output(tp); + tcpstat.tcps_drops++; + } else + tcpstat.tcps_conndrops++; + if (errno == ETIMEDOUT && tp->t_softerror) + errno = tp->t_softerror; + so->so_error = errno; + return (tcp_close(tp)); +} + +/* + * Close a TCP control block: + * discard all space held by the tcp + * discard internet protocol block + * wake up any sleepers + */ +struct tcpcb * +tcp_close(tp) + register struct tcpcb *tp; +{ + register struct ipqent *qe; + struct inpcb *inp = tp->t_inpcb; + struct socket *so = inp->inp_socket; +#ifdef TCP_SACK + struct sackhole *p, *q; +#endif +#ifdef RTV_RTT + register struct rtentry *rt; +#ifdef INET6 + register int bound_to_specific = 0; /* I.e. non-default */ + + /* + * This code checks the nature of the route for this connection. + * Normally this is done by two simple checks in the next + * INET/INET6 ifdef block, but because of two possible lower layers, + * that check is done here. + * + * Perhaps should be doing this only for a RTF_HOST route. + */ + rt = inp->inp_route.ro_rt; /* Same for route or route6. */ + if (tp->pf == PF_INET6) { + if (rt) + bound_to_specific = + !(IN6_IS_ADDR_UNSPECIFIED(& + ((struct sockaddr_in6 *)rt_key(rt))->sin6_addr)); + } else { + if (rt) + bound_to_specific = + (((struct sockaddr_in *)rt_key(rt))-> + sin_addr.s_addr != INADDR_ANY); + } +#endif /* INET6 */ + + /* + * If we sent enough data to get some meaningful characteristics, + * save them in the routing entry. 'Enough' is arbitrarily + * defined as the sendpipesize (default 4K) * 16. This would + * give us 16 rtt samples assuming we only get one sample per + * window (the usual case on a long haul net). 16 samples is + * enough for the srtt filter to converge to within 5% of the correct + * value; fewer samples and we could save a very bogus rtt. + * + * Don't update the default route's characteristics and don't + * update anything that the user "locked". + */ +#ifdef INET6 + /* + * Note that rt and bound_to_specific are set above. + */ + if (SEQ_LT(tp->iss + so->so_snd.sb_hiwat * 16, tp->snd_max) && + rt && bound_to_specific) { +#else /* INET6 */ + if (SEQ_LT(tp->iss + so->so_snd.sb_hiwat * 16, tp->snd_max) && + (rt = inp->inp_route.ro_rt) && + satosin(rt_key(rt))->sin_addr.s_addr != INADDR_ANY) { +#endif /* INET6 */ + register u_long i = 0; + + if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) { + i = tp->t_srtt * + (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE)); + if (rt->rt_rmx.rmx_rtt && i) + /* + * filter this update to half the old & half + * the new values, converting scale. + * See route.h and tcp_var.h for a + * description of the scaling constants. + */ + rt->rt_rmx.rmx_rtt = + (rt->rt_rmx.rmx_rtt + i) / 2; + else + rt->rt_rmx.rmx_rtt = i; + } + if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) { + i = tp->t_rttvar * + (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE)); + if (rt->rt_rmx.rmx_rttvar && i) + rt->rt_rmx.rmx_rttvar = + (rt->rt_rmx.rmx_rttvar + i) / 2; + else + rt->rt_rmx.rmx_rttvar = i; + } + /* + * update the pipelimit (ssthresh) if it has been updated + * already or if a pipesize was specified & the threshhold + * got below half the pipesize. I.e., wait for bad news + * before we start updating, then update on both good + * and bad news. + */ + if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 && + (i = tp->snd_ssthresh) && rt->rt_rmx.rmx_ssthresh) || + i < (rt->rt_rmx.rmx_sendpipe / 2)) { + /* + * convert the limit from user data bytes to + * packets then to packet data bytes. + */ + i = (i + tp->t_maxseg / 2) / tp->t_maxseg; + if (i < 2) + i = 2; +#ifdef INET6 + if (tp->pf == PF_INET6) + i *= (u_long)(tp->t_maxseg + sizeof (struct tcphdr) + + sizeof(struct ip6_hdr)); + else +#endif /* INET6 */ + i *= (u_long)(tp->t_maxseg + + sizeof (struct tcpiphdr)); + + if (rt->rt_rmx.rmx_ssthresh) + rt->rt_rmx.rmx_ssthresh = + (rt->rt_rmx.rmx_ssthresh + i) / 2; + else + rt->rt_rmx.rmx_ssthresh = i; + } + } +#endif /* RTV_RTT */ + + /* free the reassembly queue, if any */ +#ifdef INET6 + /* Reassembling TCP segments in v6 might be sufficiently different + * to merit two codepaths to free the reasssembly queue. + * If an undecided TCP socket, then the IPv4 codepath will be used + * because it won't matter much anyway. + */ + if (tp->pf == AF_INET6) { + while ((qe = tp->segq.lh_first) != NULL) { + LIST_REMOVE(qe, ipqe_q); + m_freem(qe->ipqe_m); + FREE(qe, M_IPQ); + } + } else +#endif /* INET6 */ + while ((qe = tp->segq.lh_first) != NULL) { + LIST_REMOVE(qe, ipqe_q); + m_freem(qe->ipqe_m); + FREE(qe, M_IPQ); + } +#ifdef TCP_SACK + /* Free SACK holes. */ + q = p = tp->snd_holes; + while (p != 0) { + q = p->next; + free(p, M_PCB); + p = q; + } +#endif + if (tp->t_template) + (void) m_free(tp->t_template); + free(tp, M_PCB); + inp->inp_ppcb = 0; + soisdisconnected(so); + in_pcbdetach(inp); + tcpstat.tcps_closed++; + return ((struct tcpcb *)0); +} + +void +tcp_drain() +{ + +} + +/* + * Notify a tcp user of an asynchronous error; + * store error as soft error, but wake up user + * (for now, won't do anything until can select for soft error). + */ +void +tcp_notify(inp, error) + struct inpcb *inp; + int error; +{ + register struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb; + register struct socket *so = inp->inp_socket; + + /* + * Ignore some errors if we are hooked up. + * If connection hasn't completed, has retransmitted several times, + * and receives a second error, give up now. This is better + * than waiting a long time to establish a connection that + * can never complete. + */ + if (tp->t_state == TCPS_ESTABLISHED && + (error == EHOSTUNREACH || error == ENETUNREACH || + error == EHOSTDOWN)) { + return; + } else if (TCPS_HAVEESTABLISHED(tp->t_state) == 0 && + tp->t_rxtshift > 3 && tp->t_softerror) + so->so_error = error; + else + tp->t_softerror = error; + wakeup((caddr_t) &so->so_timeo); + sorwakeup(so); + sowwakeup(so); +} + +#if defined(INET6) && !defined(TCP6) +void +tcp6_ctlinput(cmd, sa, d) + int cmd; + struct sockaddr *sa; + void *d; +{ + (void)tcp_ctlinput(cmd, sa, NULL); /*XXX*/ +} +#endif + +void * +tcp_ctlinput(cmd, sa, v) + int cmd; + struct sockaddr *sa; + register void *v; +{ + register struct ip *ip = v; + register struct tcphdr *th; + extern int inetctlerrmap[]; + void (*notify) __P((struct inpcb *, int)) = tcp_notify; + int errno; + + if ((unsigned)cmd >= PRC_NCMDS) + return NULL; + errno = inetctlerrmap[cmd]; + if (cmd == PRC_QUENCH) + notify = tcp_quench; + else if (PRC_IS_REDIRECT(cmd)) + notify = in_rtchange, ip = 0; + else if (cmd == PRC_HOSTDEAD) + ip = 0; + else if (errno == 0) + return NULL; + +#ifdef INET6 + if (sa->sa_family == AF_INET6) { + if (ip) { + struct ip6_hdr *ipv6 = (struct ip6_hdr *)ip; + + th = (struct tcphdr *)(ipv6 + 1); +#if 0 /*XXX*/ + in6_pcbnotify(&tcbtable, sa, th->th_dport, + &ipv6->ip6_src, th->th_sport, cmd, notify); +#endif + } else { +#if 0 /*XXX*/ + in6_pcbnotify(&tcbtable, sa, 0, + (struct in6_addr *)&in6addr_any, 0, cmd, notify); +#endif + } + } else +#endif /* INET6 */ + { + if (ip) { + th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); + in_pcbnotify(&tcbtable, sa, th->th_dport, ip->ip_src, + th->th_sport, errno, notify); + } else + in_pcbnotifyall(&tcbtable, sa, errno, notify); + } + return NULL; +} + +/* + * When a source quench is received, close congestion window + * to one segment. We will gradually open it again as we proceed. + */ +void +tcp_quench(inp, errno) + struct inpcb *inp; + int errno; +{ + struct tcpcb *tp = intotcpcb(inp); + + if (tp) + tp->snd_cwnd = tp->t_maxseg; +} + +#ifdef TCP_SIGNATURE +int +tcp_signature_tdb_attach() +{ + return (0); +} + +int +tcp_signature_tdb_init(tdbp, xsp, ii) + struct tdb *tdbp; + struct xformsw *xsp; + struct ipsecinit *ii; +{ + char *c; +#define isdigit(c) (((c) >= '0') && ((c) <= '9')) +#define isalpha(c) ( (((c) >= 'A') && ((c) <= 'Z')) || \ + (((c) >= 'a') && ((c) <= 'z')) ) + + if ((ii->ii_authkeylen < 1) || (ii->ii_authkeylen > 80)) + return (EINVAL); + + c = (char *)ii->ii_authkey; + + while (c < (char *)ii->ii_authkey + ii->ii_authkeylen - 1) { + if (isdigit(*c)) { + if (*(c + 1) == ' ') + return (EINVAL); + } else { + if (!isalpha(*c)) + return (EINVAL); + } + + c++; + } + + if (!isdigit(*c) && !isalpha(*c)) + return (EINVAL); + + tdbp->tdb_amxkey = malloc(ii->ii_authkeylen, M_XDATA, M_DONTWAIT); + if (tdbp->tdb_amxkey == NULL) + return (ENOMEM); + bcopy(ii->ii_authkey, tdbp->tdb_amxkey, ii->ii_authkeylen); + tdbp->tdb_amxkeylen = ii->ii_authkeylen; + + return (0); +} + +int +tcp_signature_tdb_zeroize(tdbp) + struct tdb *tdbp; +{ + if (tdbp->tdb_amxkey) { + bzero(tdbp->tdb_amxkey, tdbp->tdb_amxkeylen); + free(tdbp->tdb_amxkey, M_XDATA); + tdbp->tdb_amxkey = NULL; + } + + return (0); +} + +struct mbuf * +tcp_signature_tdb_input(m, tdbp) + struct mbuf *m; + struct tdb *tdbp; +{ + return (0); +} + +int +tcp_signature_tdb_output(m, tdbp, mp) + struct mbuf *m; + struct tdb *tdbp; + struct mbuf **mp; +{ + return (EINVAL); +} + +int +tcp_signature_apply(fstate, data, len) + caddr_t fstate; + caddr_t data; + unsigned int len; +{ + MD5Update((MD5_CTX *)fstate, (char *)data, len); + return 0; +} +#endif /* TCP_SIGNATURE */ diff --git a/ecos/packages/net/tcpip/current/src/sys/netinet/tcp_timer.c b/ecos/packages/net/tcpip/current/src/sys/netinet/tcp_timer.c new file mode 100644 index 0000000..514b76b --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/netinet/tcp_timer.c @@ -0,0 +1,412 @@ +//========================================================================== +// +// sys/netinet/tcp_timer.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: tcp_timer.c,v 1.15 1999/11/15 05:50:59 hugh Exp $ */ +/* $NetBSD: tcp_timer.c,v 1.14 1996/02/13 23:44:09 christos Exp $ */ + +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_timer.c 8.1 (Berkeley) 6/10/93 + */ + +#ifndef TUBA_INCLUDE +#include <sys/param.h> +#ifndef __ECOS +#include <sys/systm.h> +#endif +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/protosw.h> +#include <sys/errno.h> + +#include <net/if.h> +#include <net/route.h> + +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/in_pcb.h> +#include <netinet/ip_var.h> +#include <netinet/tcp.h> +#include <netinet/tcp_fsm.h> +#include <netinet/tcp_seq.h> +#include <netinet/tcp_timer.h> +#include <netinet/tcp_var.h> +#include <netinet/tcpip.h> +#ifndef __ECOS +#include <dev/rndvar.h> +#endif + +int tcp_keepidle = TCPTV_KEEP_IDLE; +int tcp_keepintvl = TCPTV_KEEPINTVL; +int tcp_maxpersistidle = TCPTV_KEEP_IDLE; /* max idle time in persist */ +int tcp_maxidle; +#endif /* TUBA_INCLUDE */ +/* + * Fast timeout routine for processing delayed acks + */ +void +tcp_fasttimo() +{ + register struct inpcb *inp; + register struct tcpcb *tp; + int s; + + s = splsoftnet(); + inp = tcbtable.inpt_queue.cqh_first; + if (inp) /* XXX */ + for (; inp != (struct inpcb *)&tcbtable.inpt_queue; + inp = inp->inp_queue.cqe_next) { + if ((tp = (struct tcpcb *)inp->inp_ppcb) && + (tp->t_flags & TF_DELACK)) { + tp->t_flags &= ~TF_DELACK; + tp->t_flags |= TF_ACKNOW; + tcpstat.tcps_delack++; + (void) tcp_output(tp); + } + } + splx(s); +} + +/* + * Tcp protocol timeout routine called every 500 ms. + * Updates the timers in all active tcb's and + * causes finite state machine actions if timers expire. + */ +void +tcp_slowtimo() +{ + register struct inpcb *ip, *ipnxt; + register struct tcpcb *tp; + int s; + register long i; + + s = splsoftnet(); + tcp_maxidle = TCPTV_KEEPCNT * tcp_keepintvl; + /* + * Search through tcb's and update active timers. + */ + ip = tcbtable.inpt_queue.cqh_first; + if (ip == (struct inpcb *)0) { /* XXX */ + splx(s); + return; + } + for (; ip != (struct inpcb *)&tcbtable.inpt_queue; ip = ipnxt) { + ipnxt = ip->inp_queue.cqe_next; + tp = intotcpcb(ip); + if (tp == 0 || tp->t_state == TCPS_LISTEN) + continue; + for (i = 0; i < TCPT_NTIMERS; i++) { + if (tp->t_timer[i] && --tp->t_timer[i] == 0) { + (void) tcp_usrreq(tp->t_inpcb->inp_socket, + PRU_SLOWTIMO, (struct mbuf *)0, + (struct mbuf *)i, (struct mbuf *)0); + /* XXX NOT MP SAFE */ + if ((ipnxt == (void *)&tcbtable.inpt_queue && + tcbtable.inpt_queue.cqh_last != ip) || + ipnxt->inp_queue.cqe_prev != ip) + goto tpgone; + } + } + tp->t_idle++; + if (tp->t_rtt) + tp->t_rtt++; +tpgone: + ; + } +#ifdef TCP_COMPAT_42 + tcp_iss += TCP_ISSINCR/PR_SLOWHZ; /* increment iss */ + if ((int)tcp_iss < 0) + tcp_iss = 0; /* XXX */ +#else /* TCP_COMPAT_42 */ + tcp_iss += arc4random() % (2 * TCP_ISSINCR / PR_SLOWHZ) + 1; /* increment iss */ +#endif /* !TCP_COMPAT_42 */ + tcp_now++; /* for timestamps */ + splx(s); +} +#ifndef TUBA_INCLUDE + +/* + * Cancel all timers for TCP tp. + */ +void +tcp_canceltimers(tp) + struct tcpcb *tp; +{ + register int i; + + for (i = 0; i < TCPT_NTIMERS; i++) + tp->t_timer[i] = 0; +} + +int tcp_backoff[TCP_MAXRXTSHIFT + 1] = + { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 }; + +int tcp_totbackoff = 511; /* sum of tcp_backoff[] */ + +/* + * TCP timer processing. + */ +struct tcpcb * +tcp_timers(tp, timer) + register struct tcpcb *tp; + int timer; +{ + short rto; +#ifdef TCP_SACK + struct sackhole *p, *q; + /* + * Free SACK holes for 2MSL and REXMT timers. + */ + if (timer == TCPT_2MSL || timer == TCPT_REXMT) { + q = p = tp->snd_holes; + while (p != 0) { + q = p->next; + free(p, M_PCB); + p = q; + } + tp->snd_holes = 0; +#if defined(TCP_SACK) && defined(TCP_FACK) + tp->snd_fack = tp->snd_una; + tp->retran_data = 0; + tp->snd_awnd = 0; +#endif /* TCP_FACK */ + } +#endif /* TCP_SACK */ + + switch (timer) { + + /* + * 2 MSL timeout in shutdown went off. If we're closed but + * still waiting for peer to close and connection has been idle + * too long, or if 2MSL time is up from TIME_WAIT, delete connection + * control block. Otherwise, check again in a bit. + */ + case TCPT_2MSL: + if (tp->t_state != TCPS_TIME_WAIT && + tp->t_idle <= tcp_maxidle) + tp->t_timer[TCPT_2MSL] = tcp_keepintvl; + else + tp = tcp_close(tp); + break; + + /* + * Retransmission timer went off. Message has not + * been acked within retransmit interval. Back off + * to a longer retransmit interval and retransmit one segment. + */ + case TCPT_REXMT: + if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { + tp->t_rxtshift = TCP_MAXRXTSHIFT; + tcpstat.tcps_timeoutdrop++; + tp = tcp_drop(tp, tp->t_softerror ? + tp->t_softerror : ETIMEDOUT); + break; + } + tcpstat.tcps_rexmttimeo++; + rto = TCP_REXMTVAL(tp); + if (rto < tp->t_rttmin) + rto = tp->t_rttmin; + TCPT_RANGESET(tp->t_rxtcur, + rto * tcp_backoff[tp->t_rxtshift], + tp->t_rttmin, TCPTV_REXMTMAX); + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + /* + * If losing, let the lower level know and try for + * a better route. Also, if we backed off this far, + * our srtt estimate is probably bogus. Clobber it + * so we'll take the next rtt measurement as our srtt; + * move the current srtt into rttvar to keep the current + * retransmit times until then. + */ + if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { + in_losing(tp->t_inpcb); + tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); + tp->t_srtt = 0; + } + tp->snd_nxt = tp->snd_una; +#if defined(TCP_SACK) || defined(TCP_NEWRENO) + /* + * Note: We overload snd_last to function also as the + * snd_last variable described in RFC 2582 + */ + tp->snd_last = tp->snd_max; +#endif /* TCP_SACK or TCP_NEWRENO */ + /* + * If timing a segment in this window, stop the timer. + */ + tp->t_rtt = 0; + /* + * Close the congestion window down to one segment + * (we'll open it by one segment for each ack we get). + * Since we probably have a window's worth of unacked + * data accumulated, this "slow start" keeps us from + * dumping all that data as back-to-back packets (which + * might overwhelm an intermediate gateway). + * + * There are two phases to the opening: Initially we + * open by one mss on each ack. This makes the window + * size increase exponentially with time. If the + * window is larger than the path can handle, this + * exponential growth results in dropped packet(s) + * almost immediately. To get more time between + * drops but still "push" the network to take advantage + * of improving conditions, we switch from exponential + * to linear window opening at some threshhold size. + * For a threshhold, we use half the current window + * size, truncated to a multiple of the mss. + * + * (the minimum cwnd that will give us exponential + * growth is 2 mss. We don't allow the threshhold + * to go below this.) + */ + { + u_long win = ulmin(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; + if (win < 2) + win = 2; + tp->snd_cwnd = tp->t_maxseg; + tp->snd_ssthresh = win * tp->t_maxseg; + tp->t_dupacks = 0; + } + (void) tcp_output(tp); + break; + + /* + * Persistance timer into zero window. + * Force a byte to be output, if possible. + */ + case TCPT_PERSIST: + tcpstat.tcps_persisttimeo++; + /* + * Hack: if the peer is dead/unreachable, we do not + * time out if the window is closed. After a full + * backoff, drop the connection if the idle time + * (no responses to probes) reaches the maximum + * backoff that we would use if retransmitting. + */ + rto = TCP_REXMTVAL(tp); + if (rto < tp->t_rttmin) + rto = tp->t_rttmin; + if (tp->t_rxtshift == TCP_MAXRXTSHIFT && + (tp->t_idle >= tcp_maxpersistidle || + tp->t_idle >= rto * tcp_totbackoff)) { + tcpstat.tcps_persistdrop++; + tp = tcp_drop(tp, ETIMEDOUT); + break; + } + tcp_setpersist(tp); + tp->t_force = 1; + (void) tcp_output(tp); + tp->t_force = 0; + break; + + /* + * Keep-alive timer went off; send something + * or drop connection if idle for too long. + */ + case TCPT_KEEP: + tcpstat.tcps_keeptimeo++; + if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) + goto dropit; + if (tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE && + tp->t_state <= TCPS_CLOSE_WAIT) { + if (tp->t_idle >= tcp_keepidle + tcp_maxidle) + goto dropit; + /* + * Send a packet designed to force a response + * if the peer is up and reachable: + * either an ACK if the connection is still alive, + * or an RST if the peer has closed the connection + * due to timeout or reboot. + * Using sequence number tp->snd_una-1 + * causes the transmitted zero-length segment + * to lie outside the receive window; + * by the protocol spec, this requires the + * correspondent TCP to respond. + */ + tcpstat.tcps_keepprobe++; +#ifdef TCP_COMPAT_42 + /* + * The keepalive packet must have nonzero length + * to get a 4.2 host to respond. + */ + tcp_respond(tp, + mtod(tp->t_template, caddr_t), + (struct mbuf *)NULL, + tp->rcv_nxt - 1, tp->snd_una - 1, 0); +#else + tcp_respond(tp, + mtod(tp->t_template, caddr_t), + (struct mbuf *)NULL, + tp->rcv_nxt, tp->snd_una - 1, 0); +#endif + tp->t_timer[TCPT_KEEP] = tcp_keepintvl; + } else + tp->t_timer[TCPT_KEEP] = tcp_keepidle; + break; + dropit: + tcpstat.tcps_keepdrops++; + tp = tcp_drop(tp, ETIMEDOUT); + break; + } + return (tp); +} +#endif /* TUBA_INCLUDE */ diff --git a/ecos/packages/net/tcpip/current/src/sys/netinet/tcp_usrreq.c b/ecos/packages/net/tcpip/current/src/sys/netinet/tcp_usrreq.c new file mode 100644 index 0000000..5f049a8 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/netinet/tcp_usrreq.c @@ -0,0 +1,942 @@ +//========================================================================== +// +// sys/netinet/tcp_usrreq.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: tcp_usrreq.c,v 1.37 1999/12/08 06:50:20 itojun Exp $ */ +/* $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */ + +/* + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94 + */ + +/* +%%% portions-copyright-nrl-95 +Portions of this software are Copyright 1995-1998 by Randall Atkinson, +Ronald Lee, Daniel McDonald, Bao Phan, and Chris Winters. All Rights +Reserved. All rights under this copyright have been assigned to the US +Naval Research Laboratory (NRL). The NRL Copyright Notice and License +Agreement Version 1.1 (January 17, 1995) applies to these portions of the +software. +You should have received a copy of the license with this software. If you +didn't get a copy, you may request one from <license@ipv6.nrl.navy.mil>. +*/ + +#include <sys/param.h> +#ifndef __ECOS +#include <sys/systm.h> +#endif +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/protosw.h> +#include <sys/errno.h> +#ifndef __ECOS +#include <sys/stat.h> +#include <sys/proc.h> +#include <sys/ucred.h> + +#include <vm/vm.h> +#include <sys/sysctl.h> +#endif + +#include <net/if.h> +#include <net/route.h> + +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/in_var.h> +#include <netinet/ip.h> +#include <netinet/in_pcb.h> +#include <netinet/ip_var.h> +#include <netinet/tcp.h> +#include <netinet/tcp_fsm.h> +#include <netinet/tcp_seq.h> +#include <netinet/tcp_timer.h> +#include <netinet/tcp_var.h> +#include <netinet/tcpip.h> +#include <netinet/tcp_debug.h> +#ifndef __ECOS +#include <dev/rndvar.h> +#endif + +#ifdef IPSEC +extern int check_ipsec_policy __P((struct inpcb *, u_int32_t)); +#endif + +#ifdef INET6 +#include <sys/domain.h> +#endif /* INET6 */ + +/* + * TCP protocol interface to socket abstraction. + */ +extern char *tcpstates[]; +extern int tcptv_keep_init; + +/* from in_pcb.c */ +extern struct baddynamicports baddynamicports; + +int tcp_ident __P((void *, size_t *, void *, size_t)); + +#if defined(INET6) && !defined(TCP6) +int +tcp6_usrreq(so, req, m, nam, control, p) + struct socket *so; + int req; + struct mbuf *m, *nam, *control; + struct proc *p; +{ + return tcp_usrreq(so, req, m, nam, control); +} +#endif + +/* + * Process a TCP user request for TCP tb. If this is a send request + * then m is the mbuf chain of send data. If this is a timer expiration + * (called from the software clock routine), then timertype tells which timer. + */ +/*ARGSUSED*/ +int +tcp_usrreq(so, req, m, nam, control) + struct socket *so; + int req; + struct mbuf *m, *nam, *control; +{ + struct sockaddr_in *sin; + register struct inpcb *inp; + register struct tcpcb *tp = NULL; + int s; + int error = 0; + int ostate; + + if (req == PRU_CONTROL) { +#ifdef INET6 + if (sotopf(so) == PF_INET6) + return in6_control(so, (u_long)m, (caddr_t)nam, + (struct ifnet *)control, 0); + else +#endif /* INET6 */ + return (in_control(so, (u_long)m, (caddr_t)nam, + (struct ifnet *)control)); + } + if (control && control->m_len) { + m_freem(control); + if (m) + m_freem(m); + return (EINVAL); + } + + s = splsoftnet(); + inp = sotoinpcb(so); + /* + * When a TCP is attached to a socket, then there will be + * a (struct inpcb) pointed at by the socket, and this + * structure will point at a subsidary (struct tcpcb). + */ + if (inp == 0 && req != PRU_ATTACH) { + splx(s); + /* + * The following corrects an mbuf leak under rare + * circumstances + */ + if (m && (req == PRU_SEND || req == PRU_SENDOOB)) + m_freem(m); + return (EINVAL); /* XXX */ + } + if (inp) { + tp = intotcpcb(inp); + /* WHAT IF TP IS 0? */ +#ifdef KPROF + tcp_acounts[tp->t_state][req]++; +#endif + ostate = tp->t_state; + } else + ostate = 0; + switch (req) { + + /* + * TCP attaches to socket via PRU_ATTACH, reserving space, + * and an internet control block. + */ + case PRU_ATTACH: + if (inp) { + error = EISCONN; + break; + } + error = tcp_attach(so); + if (error) + break; + if ((so->so_options & SO_LINGER) && so->so_linger == 0) + so->so_linger = TCP_LINGERTIME; + tp = sototcpcb(so); + break; + + /* + * PRU_DETACH detaches the TCP protocol from the socket. + * If the protocol state is non-embryonic, then can't + * do this directly: have to initiate a PRU_DISCONNECT, + * which may finish later; embryonic TCB's can just + * be discarded here. + */ + case PRU_DETACH: + tp = tcp_disconnect(tp); + break; + + /* + * Give the socket an address. + */ + case PRU_BIND: +#ifdef INET6 + if (inp->inp_flags & INP_IPV6) + error = in6_pcbbind(inp, nam); + else +#endif + error = in_pcbbind(inp, nam); + if (error) + break; +#ifdef INET6 + /* + * If we bind to an address, set up the tp->pf accordingly! + */ + if (inp->inp_flags & INP_IPV6) { + /* If a PF_INET6 socket... */ + if (inp->inp_flags & INP_IPV6_MAPPED) + tp->pf = AF_INET; + else if ((inp->inp_flags & INP_IPV6_UNDEC) == 0) + tp->pf = AF_INET6; + /* else tp->pf is still 0. */ + } + /* else socket is PF_INET, and tp->pf is PF_INET. */ +#endif /* INET6 */ + break; + + /* + * Prepare to accept connections. + */ + case PRU_LISTEN: + if (inp->inp_lport == 0) { +#ifdef INET6 + if (inp->inp_flags & INP_IPV6) + error = in6_pcbbind(inp, NULL); + else +#endif + error = in_pcbbind(inp, NULL); + } + /* If the in_pcbbind() above is called, the tp->pf + should still be whatever it was before. */ + if (error == 0) + tp->t_state = TCPS_LISTEN; + break; + + /* + * Initiate connection to peer. + * Create a template for use in transmissions on this connection. + * Enter SYN_SENT state, and mark socket as connecting. + * Start keep-alive timer, and seed output sequence space. + * Send initial segment on connection. + */ + case PRU_CONNECT: + sin = mtod(nam, struct sockaddr_in *); + +#ifdef INET6 + if (sin->sin_family == AF_INET6) { + struct in6_addr *in6_addr = &mtod(nam, + struct sockaddr_in6 *)->sin6_addr; + + if (IN6_IS_ADDR_UNSPECIFIED(in6_addr) || + IN6_IS_ADDR_MULTICAST(in6_addr) || + (IN6_IS_ADDR_V4MAPPED(in6_addr) && + ((in6_addr->s6_addr32[3] == INADDR_ANY) || + IN_MULTICAST(in6_addr->s6_addr32[3]) || + in_broadcast(sin->sin_addr, NULL)))) { + error = EINVAL; + break; + } + + if (inp->inp_lport == 0) { + error = in6_pcbbind(inp, NULL); + if (error) + break; + } + error = in6_pcbconnect(inp, nam); + } else if (sin->sin_family == AF_INET) +#endif /* INET6 */ + { + if ((sin->sin_addr.s_addr == INADDR_ANY) || + IN_MULTICAST(sin->sin_addr.s_addr) || + in_broadcast(sin->sin_addr, NULL)) { + error = EINVAL; + break; + } + + /* Trying to connect to some broadcast address */ + if (in_broadcast(sin->sin_addr, NULL)) { + error = EINVAL; + break; + } + + if (inp->inp_lport == 0) { + error = in_pcbbind(inp, NULL); + if (error) + break; + } + error = in_pcbconnect(inp, nam); + } + + if (error) + break; + +#ifdef INET6 + /* + * With a connection, I now know the version of IP + * is in use and hence can set tp->pf with authority. + */ + if (inp->inp_flags & INP_IPV6) { + if (inp->inp_flags & INP_IPV6_MAPPED) + tp->pf = PF_INET; + else + tp->pf = PF_INET6; + } + /* else I'm a PF_INET socket, and hence tp->pf is PF_INET. */ +#endif /* INET6 */ + + tp->t_template = tcp_template(tp); + if (tp->t_template == 0) { + in_pcbdisconnect(inp); + error = ENOBUFS; + break; + } + +#ifdef INET6 + if ((inp->inp_flags & INP_IPV6) && (tp->pf == PF_INET)) { + inp->inp_ip.ip_ttl = ip_defttl; + inp->inp_ip.ip_tos = 0; + } +#endif /* INET6 */ + + so->so_state |= SS_CONNECTOUT; + /* Compute window scaling to request. */ + while (tp->request_r_scale < TCP_MAX_WINSHIFT && + (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) + tp->request_r_scale++; + soisconnecting(so); + tcpstat.tcps_connattempt++; + tp->t_state = TCPS_SYN_SENT; + tp->t_timer[TCPT_KEEP] = tcptv_keep_init; + tp->iss = tcp_iss; +#ifdef TCP_COMPAT_42 + tcp_iss += TCP_ISSINCR/2; +#else /* TCP_COMPAT_42 */ + tcp_iss += arc4random() % TCP_ISSINCR + 1; +#endif /* !TCP_COMPAT_42 */ + tcp_sendseqinit(tp); +#if defined(TCP_SACK) || defined(TCP_NEWRENO) + tp->snd_last = tp->snd_una; +#endif +#if defined(TCP_SACK) && defined(TCP_FACK) + tp->snd_fack = tp->snd_una; + tp->retran_data = 0; + tp->snd_awnd = 0; +#endif + error = tcp_output(tp); + break; + + /* + * Create a TCP connection between two sockets. + */ + case PRU_CONNECT2: + error = EOPNOTSUPP; + break; + + /* + * Initiate disconnect from peer. + * If connection never passed embryonic stage, just drop; + * else if don't need to let data drain, then can just drop anyways, + * else have to begin TCP shutdown process: mark socket disconnecting, + * drain unread data, state switch to reflect user close, and + * send segment (e.g. FIN) to peer. Socket will be really disconnected + * when peer sends FIN and acks ours. + * + * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. + */ + case PRU_DISCONNECT: + tp = tcp_disconnect(tp); + break; + + /* + * Accept a connection. Essentially all the work is + * done at higher levels; just return the address + * of the peer, storing through addr. + */ + case PRU_ACCEPT: +#ifdef INET6 + if (inp->inp_flags & INP_IPV6) + in6_setpeeraddr(inp, nam); + else +#endif + in_setpeeraddr(inp, nam); + break; + + /* + * Mark the connection as being incapable of further output. + */ + case PRU_SHUTDOWN: + if (so->so_state & SS_CANTSENDMORE) + break; + socantsendmore(so); + tp = tcp_usrclosed(tp); + if (tp) + error = tcp_output(tp); + break; + + /* + * After a receive, possibly send window update to peer. + */ + case PRU_RCVD: + (void) tcp_output(tp); + break; + + /* + * Do a send by putting data in output queue and updating urgent + * marker if URG set. Possibly send more data. + */ + case PRU_SEND: +#ifdef IPSEC + error = check_ipsec_policy(inp, 0); + if (error) + break; +#endif + sbappend(&so->so_snd, m); + error = tcp_output(tp); + break; + + /* + * Abort the TCP. + */ + case PRU_ABORT: + tp = tcp_drop(tp, ECONNABORTED); + break; + + case PRU_SENSE: +#ifndef __ECOS + ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; +#endif + (void) splx(s); + return (0); + + case PRU_RCVOOB: + if ((so->so_oobmark == 0 && + (so->so_state & SS_RCVATMARK) == 0) || + so->so_options & SO_OOBINLINE || + tp->t_oobflags & TCPOOB_HADDATA) { + error = EINVAL; + break; + } + if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { + error = EWOULDBLOCK; + break; + } + m->m_len = 1; + *mtod(m, caddr_t) = tp->t_iobc; + if (((long)nam & MSG_PEEK) == 0) + tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); + break; + + case PRU_SENDOOB: + if (sbspace(&so->so_snd) < -512) { + m_freem(m); + error = ENOBUFS; + break; + } + /* + * According to RFC961 (Assigned Protocols), + * the urgent pointer points to the last octet + * of urgent data. We continue, however, + * to consider it to indicate the first octet + * of data past the urgent section. + * Otherwise, snd_up should be one lower. + */ + sbappend(&so->so_snd, m); + tp->snd_up = tp->snd_una + so->so_snd.sb_cc; + tp->t_force = 1; + error = tcp_output(tp); + tp->t_force = 0; + break; + + case PRU_SOCKADDR: +#ifdef INET6 + if (inp->inp_flags & INP_IPV6) + in6_setsockaddr(inp, nam); + else +#endif + in_setsockaddr(inp, nam); + break; + + case PRU_PEERADDR: +#ifdef INET6 + if (inp->inp_flags & INP_IPV6) + in6_setpeeraddr(inp, nam); + else +#endif + in_setpeeraddr(inp, nam); + break; + + /* + * TCP slow timer went off; going through this + * routine for tracing's sake. + */ + case PRU_SLOWTIMO: + tp = tcp_timers(tp, (long)nam); + req |= (long)nam << 8; /* for debug's sake */ + break; + + default: + panic("tcp_usrreq"); + } +#ifdef TCPDEBUG + if (tp && (so->so_options & SO_DEBUG)) + tcp_trace(TA_USER, ostate, tp, (caddr_t)0, req, 0); +#endif /* TCPDEBUG */ + splx(s); + return (error); +} + +int +tcp_ctloutput(op, so, level, optname, mp) + int op; + struct socket *so; + int level, optname; + struct mbuf **mp; +{ + int error = 0, s; + struct inpcb *inp; + register struct tcpcb *tp; + register struct mbuf *m; + register int i; + + s = splsoftnet(); + inp = sotoinpcb(so); + if (inp == NULL) { + splx(s); + if (op == PRCO_SETOPT && *mp) + (void) m_free(*mp); + return (ECONNRESET); + } +#ifdef INET6 + tp = intotcpcb(inp); +#endif /* INET6 */ + if (level != IPPROTO_TCP) { +#ifdef INET6 + /* + * Not sure if this is the best approach. + * It seems to be, but we don't set tp->pf until the connection + * is established, which may lead to confusion in the case of + * AF_INET6 sockets which get SET/GET options for IPv4. + */ + if (tp->pf == PF_INET6) + error = ip6_ctloutput(op, so, level, optname, mp); + else +#endif /* INET6 */ + error = ip_ctloutput(op, so, level, optname, mp); + splx(s); + return (error); + } +#ifndef INET6 + tp = intotcpcb(inp); +#endif /* !INET6 */ + + switch (op) { + + case PRCO_SETOPT: + m = *mp; + switch (optname) { + + case TCP_NODELAY: + if (m == NULL || m->m_len < sizeof (int)) + error = EINVAL; + else if (*mtod(m, int *)) + tp->t_flags |= TF_NODELAY; + else + tp->t_flags &= ~TF_NODELAY; + break; + + case TCP_MAXSEG: + if (m == NULL || m->m_len < sizeof (int)) { + error = EINVAL; + break; + } + + i = *mtod(m, int *); + if (i > 0 && i <= tp->t_maxseg) + tp->t_maxseg = i; + else + error = EINVAL; + break; + +#ifdef TCP_SACK + case TCP_SACK_DISABLE: + if (m == NULL || m->m_len < sizeof (int)) { + error = EINVAL; + break; + } + + if (TCPS_HAVEESTABLISHED(tp->t_state)) { + error = EPERM; + break; + } + + if (tp->t_flags & TF_SIGNATURE) { + error = EPERM; + break; + } + + if (*mtod(m, int *)) + tp->sack_disable = 1; + else + tp->sack_disable = 0; + break; +#endif +#ifdef TCP_SIGNATURE + case TCP_SIGNATURE_ENABLE: + if (m == NULL || m->m_len < sizeof (int)) { + error = EINVAL; + break; + } + + if (TCPS_HAVEESTABLISHED(tp->t_state)) { + error = EPERM; + break; + } + + if (*mtod(m, int *)) { + tp->t_flags |= TF_SIGNATURE; +#ifdef TCP_SACK + tp->sack_disable = 1; +#endif /* TCP_SACK */ + } else + tp->t_flags &= ~TF_SIGNATURE; + break; +#endif /* TCP_SIGNATURE */ + default: + error = ENOPROTOOPT; + break; + } + if (m) + (void) m_free(m); + break; + + case PRCO_GETOPT: + *mp = m = m_get(M_WAIT, MT_SOOPTS); + m->m_len = sizeof(int); + + switch (optname) { + case TCP_NODELAY: + *mtod(m, int *) = tp->t_flags & TF_NODELAY; + break; + case TCP_MAXSEG: + *mtod(m, int *) = tp->t_maxseg; + break; +#ifdef TCP_SACK + case TCP_SACK_DISABLE: + *mtod(m, int *) = tp->sack_disable; + break; +#endif + default: + error = ENOPROTOOPT; + break; + } + break; + } + splx(s); + return (error); +} + +#ifndef TCP_SENDSPACE +#define TCP_SENDSPACE 1024*16; +#endif +u_int tcp_sendspace = TCP_SENDSPACE; +#ifndef TCP_RECVSPACE +#define TCP_RECVSPACE 1024*16; +#endif +u_int tcp_recvspace = TCP_RECVSPACE; + +/* + * Attach TCP protocol to socket, allocating + * internet protocol control block, tcp control block, + * bufer space, and entering LISTEN state if to accept connections. + */ +int +tcp_attach(so) + struct socket *so; +{ + register struct tcpcb *tp; + struct inpcb *inp; + int error; + + if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { + error = soreserve(so, tcp_sendspace, tcp_recvspace); + if (error) + return (error); + } + error = in_pcballoc(so, &tcbtable); + if (error) + return (error); + inp = sotoinpcb(so); + tp = tcp_newtcpcb(inp); + if (tp == NULL) { + int nofd = so->so_state & SS_NOFDREF; /* XXX */ + + so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ + in_pcbdetach(inp); + so->so_state |= nofd; + return (ENOBUFS); + } + tp->t_state = TCPS_CLOSED; + return (0); +} + +/* + * Initiate (or continue) disconnect. + * If embryonic state, just send reset (once). + * If in ``let data drain'' option and linger null, just drop. + * Otherwise (hard), mark socket disconnecting and drop + * current input data; switch states based on user close, and + * send segment to peer (with FIN). + */ +struct tcpcb * +tcp_disconnect(tp) + register struct tcpcb *tp; +{ + struct socket *so = tp->t_inpcb->inp_socket; + + if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) + tp = tcp_close(tp); + else if ((so->so_options & SO_LINGER) && so->so_linger == 0) + tp = tcp_drop(tp, 0); + else { + soisdisconnecting(so); + sbflush(&so->so_rcv); + tp = tcp_usrclosed(tp); + if (tp) + (void) tcp_output(tp); + } + return (tp); +} + +/* + * User issued close, and wish to trail through shutdown states: + * if never received SYN, just forget it. If got a SYN from peer, + * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. + * If already got a FIN from peer, then almost done; go to LAST_ACK + * state. In all other cases, have already sent FIN to peer (e.g. + * after PRU_SHUTDOWN), and just have to play tedious game waiting + * for peer to send FIN or not respond to keep-alives, etc. + * We can let the user exit from the close as soon as the FIN is acked. + */ +struct tcpcb * +tcp_usrclosed(tp) + register struct tcpcb *tp; +{ + + switch (tp->t_state) { + + case TCPS_CLOSED: + case TCPS_LISTEN: + case TCPS_SYN_SENT: + tp->t_state = TCPS_CLOSED; + tp = tcp_close(tp); + break; + + case TCPS_SYN_RECEIVED: + case TCPS_ESTABLISHED: + tp->t_state = TCPS_FIN_WAIT_1; + break; + + case TCPS_CLOSE_WAIT: + tp->t_state = TCPS_LAST_ACK; + break; + } + if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { + soisdisconnected(tp->t_inpcb->inp_socket); + /* + * If we are in FIN_WAIT_2, we arrived here because the + * application did a shutdown of the send side. Like the + * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after + * a full close, we start a timer to make sure sockets are + * not left in FIN_WAIT_2 forever. + */ + if (tp->t_state == TCPS_FIN_WAIT_2) + tp->t_timer[TCPT_2MSL] = tcp_maxidle; + } + return (tp); +} + +/* + * Look up a socket for ident.. + */ +int +tcp_ident(oldp, oldlenp, newp, newlen) + void *oldp; + size_t *oldlenp; + void *newp; + size_t newlen; +{ + int error = 0, s; + struct tcp_ident_mapping tir; + struct inpcb *inp; + struct sockaddr_in *fin, *lin; + + if (oldp == NULL || newp != NULL || newlen != 0) + return (EINVAL); + if (*oldlenp < sizeof(tir)) + return (ENOMEM); + if ((error = copyin(oldp, &tir, sizeof (tir))) != 0 ) + return (error); + if (tir.faddr.sa_len != sizeof (struct sockaddr) || + tir.faddr.sa_family != AF_INET) + return (EINVAL); + fin = (struct sockaddr_in *)&tir.faddr; + lin = (struct sockaddr_in *)&tir.laddr; + + s = splsoftnet(); + inp = in_pcbhashlookup(&tcbtable, fin->sin_addr, fin->sin_port, + lin->sin_addr, lin->sin_port); + if (inp == NULL) { + ++tcpstat.tcps_pcbhashmiss; + inp = in_pcblookup(&tcbtable, &fin->sin_addr, fin->sin_port, + &lin->sin_addr, lin->sin_port, 0); + } + if (inp != NULL && (inp->inp_socket->so_state & SS_CONNECTOUT)) { + tir.ruid = inp->inp_socket->so_ruid; + tir.euid = inp->inp_socket->so_euid; + } else { + tir.ruid = -1; + tir.euid = -1; + } + splx(s); + + *oldlenp = sizeof (tir); + error = copyout((void *)&tir, oldp, sizeof (tir)); + return (error); +} + +#ifdef CYGPKG_NET_SYSCTL +/* + * Sysctl for tcp variables. + */ +int +tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen) + int *name; + u_int namelen; + void *oldp; + size_t *oldlenp; + void *newp; + size_t newlen; +{ + + /* All sysctl names at this level are terminal. */ + if (namelen != 1) + return (ENOTDIR); + + switch (name[0]) { + case TCPCTL_RFC1323: + return (sysctl_int(oldp, oldlenp, newp, newlen, + &tcp_do_rfc1323)); +#ifdef TCP_SACK + case TCPCTL_SACK: + return (sysctl_int(oldp, oldlenp, newp, newlen, + &tcp_do_sack)); +#endif + case TCPCTL_MSSDFLT: + return (sysctl_int(oldp, oldlenp, newp, newlen, + &tcp_mssdflt)); + case TCPCTL_KEEPINITTIME: + return (sysctl_int(oldp, oldlenp, newp, newlen, + &tcptv_keep_init)); + + case TCPCTL_KEEPIDLE: + return (sysctl_int(oldp, oldlenp, newp, newlen, + &tcp_keepidle)); + + case TCPCTL_KEEPINTVL: + return (sysctl_int(oldp, oldlenp, newp, newlen, + &tcp_keepintvl)); + + case TCPCTL_SLOWHZ: + return (sysctl_rdint(oldp, oldlenp, newp, PR_SLOWHZ)); + + case TCPCTL_BADDYNAMIC: + return (sysctl_struct(oldp, oldlenp, newp, newlen, + baddynamicports.tcp, sizeof(baddynamicports.tcp))); + + case TCPCTL_RECVSPACE: + return (sysctl_int(oldp, oldlenp, newp, newlen,&tcp_recvspace)); + + case TCPCTL_SENDSPACE: + return (sysctl_int(oldp, oldlenp, newp, newlen,&tcp_sendspace)); + case TCPCTL_IDENT: + return (tcp_ident(oldp, oldlenp, newp, newlen)); + default: + return (ENOPROTOOPT); + } + /* NOTREACHED */ +} +#endif // CYGPKG_NET_SYSCTL diff --git a/ecos/packages/net/tcpip/current/src/sys/netinet/udp_usrreq.c b/ecos/packages/net/tcpip/current/src/sys/netinet/udp_usrreq.c new file mode 100644 index 0000000..b5549f6 --- /dev/null +++ b/ecos/packages/net/tcpip/current/src/sys/netinet/udp_usrreq.c @@ -0,0 +1,1371 @@ +//========================================================================== +// +// sys/netinet/udp_usrreq.c +// +// +// +//========================================================================== +// ####BSDALTCOPYRIGHTBEGIN#### +// ------------------------------------------- +// Portions of this software may have been derived from OpenBSD +// or other sources, and if so are covered by the appropriate copyright +// and license included herein. +// ------------------------------------------- +// ####BSDALTCOPYRIGHTEND#### +//========================================================================== +//#####DESCRIPTIONBEGIN#### +// +// Author(s): gthomas +// Contributors: gthomas +// Date: 2000-01-10 +// Purpose: +// Description: +// +// +//####DESCRIPTIONEND#### +// +//========================================================================== + + +/* $OpenBSD: udp_usrreq.c,v 1.30 1999/12/12 10:59:41 itojun Exp $ */ +/* $NetBSD: udp_usrreq.c,v 1.28 1996/03/16 23:54:03 christos Exp $ */ + +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)udp_usrreq.c 8.4 (Berkeley) 1/21/94 + */ + +/* +%%% portions-copyright-nrl-95 +Portions of this software are Copyright 1995-1998 by Randall Atkinson, +Ronald Lee, Daniel McDonald, Bao Phan, and Chris Winters. All Rights +Reserved. All rights under this copyright have been assigned to the US +Naval Research Laboratory (NRL). The NRL Copyright Notice and License +Agreement Version 1.1 (January 17, 1995) applies to these portions of the +software. +You should have received a copy of the license with this software. If you +didn't get a copy, you may request one from <license@ipv6.nrl.navy.mil>. +*/ + +#include <sys/param.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/protosw.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/errno.h> +#ifdef __ECOS +#undef errno +#endif +#ifndef __ECOS +#include <sys/stat.h> +#include <sys/systm.h> +#include <sys/proc.h> + +#include <vm/vm.h> +#include <sys/sysctl.h> +#endif + +#include <net/if.h> +#include <net/route.h> + +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/in_var.h> +#include <netinet/ip.h> +#include <netinet/in_pcb.h> +#include <netinet/ip_var.h> +#include <netinet/ip_icmp.h> +#include <netinet/udp.h> +#include <netinet/udp_var.h> + +#ifdef IPSEC +#include <netinet/ip_ipsp.h> + +extern int check_ipsec_policy __P((struct inpcb *, u_int32_t)); +#endif + +#include <machine/stdarg.h> + +#ifdef INET6 +#ifndef INET +#include <netinet/in.h> +#endif +#include <netinet6/ip6.h> +#include <netinet6/in6_var.h> +#include <netinet6/ip6_var.h> +#include <netinet6/icmp6.h> +#include <netinet6/ip6protosw.h> + +#ifndef CREATE_IPV6_MAPPED +#define CREATE_IPV6_MAPPED(a6, a4) \ +do { \ + bzero(&(a6), sizeof(a6)); \ + (a6).s6_addr[10] = (a6).s6_addr[11] = 0xff; \ + *(u_int32_t *)&(a6).s6_addr[12] = (a4); \ +} while (0) +#endif + +extern int ip6_defhlim; + +#endif /* INET6 */ + +/* + * UDP protocol implementation. + * Per RFC 768, August, 1980. + */ +int udpcksum = 1; + + +static void udp_detach __P((struct inpcb *)); +static void udp_notify __P((struct inpcb *, int)); +static struct mbuf *udp_saveopt __P((caddr_t, int, int)); + +#ifndef UDBHASHSIZE +#define UDBHASHSIZE 128 +#endif +int udbhashsize = UDBHASHSIZE; + +/* from in_pcb.c */ +extern struct baddynamicports baddynamicports; + +void +udp_init() +{ + + in_pcbinit(&udbtable, udbhashsize); +} + +#if defined(INET6) && !defined(TCP6) +int +udp6_input(mp, offp, proto) + struct mbuf **mp; + int *offp, proto; +{ + struct mbuf *m = *mp; + +#if defined(NFAITH) && 0 < NFAITH + if (m->m_pkthdr.rcvif) { + if (m->m_pkthdr.rcvif->if_type == IFT_FAITH) { + /* XXX send icmp6 host/port unreach? */ + m_freem(m); + return IPPROTO_DONE; + } + } +#endif + + udp_input(m, *offp, proto); + return IPPROTO_DONE; +} +#endif + +void +#if __STDC__ +udp_input(struct mbuf *m, ...) +#else +udp_input(m, va_alist) + struct mbuf *m; + va_dcl +#endif +{ + register struct ip *ip; + register struct udphdr *uh; + register struct inpcb *inp; + struct mbuf *opts = 0; + int len; + struct ip save_ip; + int iphlen; + va_list ap; + u_int16_t savesum; + union { + struct sockaddr sa; + struct sockaddr_in sin; +#ifdef INET6 + struct sockaddr_in6 sin6; +#endif /* INET6 */ + } srcsa, dstsa; +#ifdef INET6 + struct ip6_hdr *ipv6; + struct sockaddr_in6 src_v4mapped; +#endif /* INET6 */ +#ifdef IPSEC + struct tdb *tdb = NULL; +#endif /* IPSEC */ + + va_start(ap, m); + iphlen = va_arg(ap, int); + va_end(ap); + + udpstat.udps_ipackets++; + +#ifdef IPSEC + /* Save the last SA which was used to process the mbuf */ + if ((m->m_flags & (M_CONF|M_AUTH)) && m->m_pkthdr.tdbi) { + struct tdb_ident *tdbi = m->m_pkthdr.tdbi; + /* XXX gettdb() should really be called at spltdb(). */ + /* XXX this is splsoftnet(), currently they are the same. */ + tdb = gettdb(tdbi->spi, &tdbi->dst, tdbi->proto); + free(m->m_pkthdr.tdbi, M_TEMP); + m->m_pkthdr.tdbi = NULL; + } +#endif /* IPSEC */ + + switch (mtod(m, struct ip *)->ip_v) { + case 4: + ip = mtod(m, struct ip *); +#ifdef INET6 + ipv6 = NULL; +#endif /* INET6 */ + srcsa.sa.sa_family = AF_INET; + break; +#ifdef INET6 + case 6: + ip = NULL; + ipv6 = mtod(m, struct ip6_hdr *); + srcsa.sa.sa_family = AF_INET6; + break; +#endif /* INET6 */ + default: +#ifdef __ECOS + diag_printf("udp_input: received unknown IP version %d", + mtod(m, struct ip *)->ip_v); +#else + printf("udp_input: received unknown IP version %d", + mtod(m, struct ip *)->ip_v); +#endif + goto bad; + } + + /* + * Strip IP options, if any; should skip this, + * make available to user, and use on returned packets, + * but we don't yet have a way to check the checksum + * with options still present. + */ + /* + * (contd. from above...) Furthermore, we may want to strip options + * for such things as ICMP errors, where options just get in the way. + */ + if (ip && iphlen > sizeof (struct ip)) { + ip_stripoptions(m, (struct mbuf *)0); + iphlen = sizeof(struct ip); + } + + /* + * Get IP and UDP header together in first mbuf. + */ + if (m->m_len < iphlen + sizeof(struct udphdr)) { + if ((m = m_pullup2(m, iphlen + sizeof(struct udphdr))) == 0) { + udpstat.udps_hdrops++; + return; + } +#ifdef INET6 + if (ipv6) + ipv6 = mtod(m, struct ip6_hdr *); + else +#endif /* INET6 */ + ip = mtod(m, struct ip *); + } + uh = (struct udphdr *)(mtod(m, caddr_t) + iphlen); + + /* + * Make mbuf data length reflect UDP length. + * If not enough data to reflect UDP length, drop. + */ + len = ntohs((u_int16_t)uh->uh_ulen); + if (m->m_pkthdr.len - iphlen != len) { + if (len > (m->m_pkthdr.len - iphlen) || + len < sizeof(struct udphdr)) { + udpstat.udps_badlen++; + goto bad; + } + m_adj(m, len - (m->m_pkthdr.len - iphlen)); + } + /* + * Save a copy of the IP header in case we want restore it + * for sending an ICMP error message in response. + */ + if (ip) + save_ip = *ip; + + /* + * Checksum extended UDP header and data. + * from W.R.Stevens: check incoming udp cksums even if + * udpcksum is not set. + */ + savesum = uh->uh_sum; +#ifdef INET6 + if (ipv6) { + /* + * In IPv6, the UDP checksum is ALWAYS used. + */ + if ((uh->uh_sum = in6_cksum(m, IPPROTO_UDP, iphlen, len))) { + udpstat.udps_badsum++; + goto bad; + } + } else +#endif /* INET6 */ + if (uh->uh_sum) { + bzero(((struct ipovly *)ip)->ih_x1, + sizeof ((struct ipovly *)ip)->ih_x1); + ((struct ipovly *)ip)->ih_len = uh->uh_ulen; + if ((uh->uh_sum = in_cksum(m, len + sizeof (struct ip))) != 0) { + udpstat.udps_badsum++; + m_freem(m); + return; + } + } else + udpstat.udps_nosum++; + + switch (srcsa.sa.sa_family) { + case AF_INET: + bzero(&srcsa, sizeof(struct sockaddr_in)); + srcsa.sin.sin_len = sizeof(struct sockaddr_in); + srcsa.sin.sin_family = AF_INET; + srcsa.sin.sin_port = uh->uh_sport; + srcsa.sin.sin_addr = ip->ip_src; + +#ifdef INET6 + bzero(&src_v4mapped, sizeof(struct sockaddr_in6)); + src_v4mapped.sin6_len = sizeof(struct sockaddr_in6); + src_v4mapped.sin6_family = AF_INET6; + src_v4mapped.sin6_port = uh->uh_sport; + CREATE_IPV6_MAPPED(src_v4mapped.sin6_addr, ip->ip_src.s_addr); +#endif /* INET6 */ + + bzero(&dstsa, sizeof(struct sockaddr_in)); + dstsa.sin.sin_len = sizeof(struct sockaddr_in); + dstsa.sin.sin_family = AF_INET; + dstsa.sin.sin_port = uh->uh_dport; + dstsa.sin.sin_addr = ip->ip_dst; + break; +#ifdef INET6 + case AF_INET6: + bzero(&srcsa, sizeof(struct sockaddr_in6)); + srcsa.sin6.sin6_len = sizeof(struct sockaddr_in6); + srcsa.sin6.sin6_family = AF_INET6; + srcsa.sin6.sin6_port = uh->uh_sport; + srcsa.sin6.sin6_flowinfo = htonl(0x0fffffff) & ipv6->ip6_flow; + srcsa.sin6.sin6_addr = ipv6->ip6_src; + if (IN6_IS_SCOPE_LINKLOCAL(&srcsa.sin6.sin6_addr)) + srcsa.sin6.sin6_addr.s6_addr16[1] = 0; + if (m->m_pkthdr.rcvif) { + if (IN6_IS_SCOPE_LINKLOCAL(&srcsa.sin6.sin6_addr)) { + srcsa.sin6.sin6_scope_id = + m->m_pkthdr.rcvif->if_index; + } else + srcsa.sin6.sin6_scope_id = 0; + } else + srcsa.sin6.sin6_scope_id = 0; + + bzero(&dstsa, sizeof(struct sockaddr_in6)); + dstsa.sin6.sin6_len = sizeof(struct sockaddr_in6); + dstsa.sin6.sin6_family = AF_INET6; + dstsa.sin6.sin6_port = uh->uh_dport; + dstsa.sin6.sin6_addr = ipv6->ip6_dst; + break; +#endif /* INET6 */ + } + +#ifdef INET6 + if ((ipv6 && IN6_IS_ADDR_MULTICAST(&ipv6->ip6_dst)) || + (ip && IN_MULTICAST(ip->ip_dst.s_addr)) || + (ip && in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))) { +#else /* INET6 */ + if (IN_MULTICAST(ip->ip_dst.s_addr) || + in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { +#endif /* INET6 */ + struct socket *last; + /* + * Deliver a multicast or broadcast datagram to *all* sockets + * for which the local and remote addresses and ports match + * those of the incoming datagram. This allows more than + * one process to receive multi/broadcasts on the same port. + * (This really ought to be done for unicast datagrams as + * well, but that would cause problems with existing + * applications that open both address-specific sockets and + * a wildcard socket listening to the same port -- they would + * end up receiving duplicates of every unicast datagram. + * Those applications open the multiple sockets to overcome an + * inadequacy of the UDP socket interface, but for backwards + * compatibility we avoid the problem here rather than + * fixing the interface. Maybe 4.5BSD will remedy this?) + */ + + iphlen += sizeof(struct udphdr); + + /* + * Locate pcb(s) for datagram. + * (Algorithm copied from raw_intr().) + */ + last = NULL; + for (inp = udbtable.inpt_queue.cqh_first; + inp != (struct inpcb *)&udbtable.inpt_queue; + inp = inp->inp_queue.cqe_next) { + if (inp->inp_lport != uh->uh_dport) + continue; +#ifdef INET6 + if (ipv6) { + if (!(inp->inp_flags & INP_IPV6)) + continue; + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) + if (!IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, + &ipv6->ip6_dst)) + continue; + } else +#endif /* INET6 */ + if (inp->inp_laddr.s_addr != INADDR_ANY) { + if (inp->inp_laddr.s_addr != + ip->ip_dst.s_addr) + continue; + } +#ifdef INET6 + if (ipv6) { + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) + if (!IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, + &ipv6->ip6_src) || + inp->inp_fport != uh->uh_sport) + continue; + } else +#endif /* INET6 */ + if (inp->inp_faddr.s_addr != INADDR_ANY) { + if (inp->inp_faddr.s_addr != + ip->ip_src.s_addr || + inp->inp_fport != uh->uh_sport) + continue; + } + + if (last != NULL) { + struct mbuf *n; + + if ((n = m_copy(m, 0, M_COPYALL)) != NULL) { + opts = NULL; +#ifdef INET6 + if (ipv6 && (inp->inp_flags & IN6P_CONTROLOPTS)) + ip6_savecontrol(inp, &opts, ipv6, n); +#endif /* INET6 */ + m_adj(n, iphlen); + if (sbappendaddr(&last->so_rcv, +#ifdef INET6 + /* + * This cruft is needed in (the rare) + * case I deliver a {multi,broad}cast + * IPv4 packet to an AF_INET6 socket. + */ + ((((struct inpcb *)last->so_pcb)->inp_flags + & INP_IPV6) && ip) ? + (struct sockaddr *)&src_v4mapped : +#endif /* INET6 */ + &srcsa.sa, n, opts) == 0) { + m_freem(n); + udpstat.udps_fullsock++; + } else + sorwakeup(last); + } + } + last = inp->inp_socket; + /* + * Don't look for additional matches if this one does + * not have either the SO_REUSEPORT or SO_REUSEADDR + * socket options set. This heuristic avoids searching + * through all pcbs in the common case of a non-shared + * port. It * assumes that an application will never + * clear these options after setting them. + */ + if ((last->so_options&(SO_REUSEPORT|SO_REUSEADDR)) == 0) + break; + } + + if (last == NULL) { + /* + * No matching pcb found; discard datagram. + * (No need to send an ICMP Port Unreachable + * for a broadcast or multicast datgram.) + */ + udpstat.udps_noportbcast++; + goto bad; + } + + opts = NULL; +#ifdef INET6 + if (ipv6 && (inp->inp_flags & IN6P_CONTROLOPTS)) + ip6_savecontrol(inp, &opts, ipv6, m); +#endif /* INET6 */ + m_adj(m, iphlen); + if (sbappendaddr(&last->so_rcv, +#ifdef INET6 + /* + * This cruft is needed in (the rare) case I + * deliver a {multi,broad}cast IPv4 packet to + * an AF_INET6 socket. + */ + ((((struct inpcb *)last->so_pcb)->inp_flags & INP_IPV6) && ip) ? + (struct sockaddr *)&src_v4mapped : +#endif /* INET6 */ + &srcsa.sa, m, opts) == 0) { + udpstat.udps_fullsock++; + goto bad; + } + sorwakeup(last); + return; + } + /* + * Locate pcb for datagram. + */ +#ifdef INET6 + if (ipv6) + inp = in6_pcbhashlookup(&udbtable, &ipv6->ip6_src, uh->uh_sport, + &ipv6->ip6_dst, uh->uh_dport); + else +#endif /* INET6 */ + inp = in_pcbhashlookup(&udbtable, ip->ip_src, uh->uh_sport, + ip->ip_dst, uh->uh_dport); + if (inp == 0) { + ++udpstat.udps_pcbhashmiss; +#ifdef INET6 + if (ipv6) { + inp = in_pcblookup(&udbtable, + (struct in_addr *)&(ipv6->ip6_src), + uh->uh_sport, (struct in_addr *)&(ipv6->ip6_dst), + uh->uh_dport, INPLOOKUP_WILDCARD | INPLOOKUP_IPV6); + } else +#endif /* INET6 */ + inp = in_pcblookup(&udbtable, &ip->ip_src, uh->uh_sport, + &ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD); + if (inp == 0) { + udpstat.udps_noport++; + if (m->m_flags & (M_BCAST | M_MCAST)) { + udpstat.udps_noportbcast++; + goto bad; + } +#ifdef INET6 + if (ipv6) { + icmp6_error(m, ICMP6_DST_UNREACH, + ICMP6_DST_UNREACH_NOPORT,0); + } else +#endif /* INET6 */ + { + *ip = save_ip; + HTONS(ip->ip_len); + HTONS(ip->ip_id); + HTONS(ip->ip_off); + uh->uh_sum = savesum; + icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, + 0, 0); + } + return; + } + } + +#ifdef IPSEC + /* Check if this socket requires security for incoming packets */ + if ((inp->inp_seclevel[SL_AUTH] >= IPSEC_LEVEL_REQUIRE && + !(m->m_flags & M_AUTH)) || + (inp->inp_seclevel[SL_ESP_TRANS] >= IPSEC_LEVEL_REQUIRE && + !(m->m_flags & M_CONF))) { +#ifdef notyet +#ifdef INET6 + if (ipv6) + ipv6_icmp_error(m, ICMPV6_BLAH, ICMPV6_BLAH, 0); + else +#endif /* INET6 */ + icmp_error(m, ICMP_BLAH, ICMP_BLAH, 0, 0); + m = NULL; +#endif /* notyet */ + udpstat.udps_nosec++; + goto bad; + } + /* Use tdb_bind_out for this inp's outbound communication */ + if (tdb) + tdb_add_inp(tdb, inp); +#endif /*IPSEC */ + + opts = NULL; +#ifdef INET6 + if (ipv6 && (inp->inp_flags & IN6P_CONTROLOPTS)) + ip6_savecontrol(inp, &opts, ipv6, m); +#endif /* INET6 */ + if (ip && (inp->inp_flags & INP_CONTROLOPTS)) { + struct mbuf **mp = &opts; + + if (inp->inp_flags & INP_RECVDSTADDR) { + *mp = udp_saveopt((caddr_t) &ip->ip_dst, + sizeof(struct in_addr), IP_RECVDSTADDR); + if (*mp) + mp = &(*mp)->m_next; + } +#ifdef notyet + /* options were tossed above */ + if (inp->inp_flags & INP_RECVOPTS) { + *mp = udp_saveopt((caddr_t) opts_deleted_above, + sizeof(struct in_addr), IP_RECVOPTS); + if (*mp) + mp = &(*mp)->m_next; + } + /* ip_srcroute doesn't do what we want here, need to fix */ + if (inp->inp_flags & INP_RECVRETOPTS) { + *mp = udp_saveopt((caddr_t) ip_srcroute(), + sizeof(struct in_addr), IP_RECVRETOPTS); + if (*mp) + mp = &(*mp)->m_next; + } +#endif + } + iphlen += sizeof(struct udphdr); + m_adj(m, iphlen); + if (sbappendaddr(&inp->inp_socket->so_rcv, +#ifdef INET6 + /* + * This cruft is needed to deliver a IPv4 packet to + * an AF_INET6 socket. + */ + ((((struct inpcb *)inp->inp_socket->so_pcb)->inp_flags & INP_IPV6) + && ip) ? (struct sockaddr *)&src_v4mapped : +#endif /* INET6 */ + &srcsa.sa, m, opts) == 0) { + udpstat.udps_fullsock++; + goto bad; + } + sorwakeup(inp->inp_socket); + return; +bad: + m_freem(m); + if (opts) + m_freem(opts); +} + +/* + * Create a "control" mbuf containing the specified data + * with the specified type for presentation with a datagram. + */ +struct mbuf * +udp_saveopt(p, size, type) + caddr_t p; + register int size; + int type; +{ + register struct cmsghdr *cp; + struct mbuf *m; + + if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) + return ((struct mbuf *) NULL); + cp = (struct cmsghdr *) mtod(m, struct cmsghdr *); + bcopy(p, CMSG_DATA(cp), size); + size += sizeof(*cp); + m->m_len = size; + cp->cmsg_len = size; + cp->cmsg_level = IPPROTO_IP; + cp->cmsg_type = type; + return (m); +} + +/* + * Notify a udp user of an asynchronous error; + * just wake up so that he can collect error status. + */ +static void +udp_notify(inp, errno) + register struct inpcb *inp; + int errno; +{ + inp->inp_socket->so_error = errno; + sorwakeup(inp->inp_socket); + sowwakeup(inp->inp_socket); +} + +#if defined(INET6) && !defined(TCP6) +void +udp6_ctlinput(cmd, sa, d) + int cmd; + struct sockaddr *sa; + void *d; +{ + struct sockaddr_in6 sa6; + struct ip6_hdr *ip6; + struct mbuf *m; + int off; + + if (sa == NULL) + return; + if (sa->sa_family != AF_INET6) + return; + + /* decode parameter from icmp6. */ + if (d != NULL) { + struct ip6ctlparam *ip6cp = (struct ip6ctlparam *)d; + ip6 = ip6cp->ip6c_ip6; + m = ip6cp->ip6c_m; + off = ip6cp->ip6c_off; + } else + return; + + /* translate addresses into internal form */ + sa6 = *(struct sockaddr_in6 *)sa; + if (IN6_IS_ADDR_LINKLOCAL(&sa6.sin6_addr) && m && m->m_pkthdr.rcvif) + sa6.sin6_addr.s6_addr16[1] = htons(m->m_pkthdr.rcvif->if_index); + sa = (struct sockaddr *)&sa6; + + (void)udp_ctlinput(cmd, sa, (void *)ip6); +} +#endif + +void * +udp_ctlinput(cmd, sa, v) + int cmd; + struct sockaddr *sa; + void *v; +{ + register struct ip *ip = v; + register struct udphdr *uh; + extern int inetctlerrmap[]; + void (*notify) __P((struct inpcb *, int)) = udp_notify; + int errno; + + if ((unsigned)cmd >= PRC_NCMDS) + return NULL; + errno = inetctlerrmap[cmd]; + if (PRC_IS_REDIRECT(cmd)) + notify = in_rtchange, ip = 0; + else if (cmd == PRC_HOSTDEAD) + ip = 0; + else if (errno == 0) + return NULL; + if (sa == NULL) + return NULL; +#ifdef INET6 + if (sa->sa_family == AF_INET6) { + if (ip) { + struct ip6_hdr *ipv6 = (struct ip6_hdr *)ip; + + uh = (struct udphdr *)((caddr_t)ipv6 + sizeof(struct ip6_hdr)); +#if 0 /*XXX*/ + in6_pcbnotify(&udbtable, sa, uh->uh_dport, + &(ipv6->ip6_src), uh->uh_sport, cmd, udp_notify); +#endif + } else { +#if 0 /*XXX*/ + in6_pcbnotify(&udbtable, sa, 0, + (struct in6_addr *)&in6addr_any, 0, cmd, udp_notify); +#endif + } + } else +#endif /* INET6 */ + if (ip) { + uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); + in_pcbnotify(&udbtable, sa, uh->uh_dport, ip->ip_src, + uh->uh_sport, errno, notify); + } else + in_pcbnotifyall(&udbtable, sa, errno, notify); + return NULL; +} + +int +#if __STDC__ +udp_output(struct mbuf *m, ...) +#else +udp_output(m, va_alist) + struct mbuf *m; + va_dcl +#endif +{ + register struct inpcb *inp; + struct mbuf *addr, *control; + register struct udpiphdr *ui; + register int len = m->m_pkthdr.len; + struct in_addr laddr; + int s = 0, error = 0; + va_list ap; +#ifdef INET6 + register struct in6_addr laddr6; + int v6packet = 0; + struct sockaddr_in6 *sin6 = NULL; + struct ip6_pktopts opt, *stickyopt = NULL; +#endif /* INET6 */ + int pcbflags = 0; + + va_start(ap, m); + inp = va_arg(ap, struct inpcb *); + addr = va_arg(ap, struct mbuf *); + control = va_arg(ap, struct mbuf *); + va_end(ap); + +#ifdef INET6 + v6packet = ((inp->inp_flags & INP_IPV6) && + !(inp->inp_flags & INP_IPV6_MAPPED)); +#endif + +#ifdef INET6 + stickyopt = inp->inp_outputopts6; + if (control && v6packet) { + error = ip6_setpktoptions(control, &opt, + ((inp->inp_socket->so_state & SS_PRIV) != 0)); + if (error != 0) + goto release; + inp->inp_outputopts6 = &opt; + } +#endif + + if (addr) { +#ifdef INET6 + sin6 = mtod(addr, struct sockaddr_in6 *); +#endif + + /* + * Save current PCB flags because they may change during + * temporary connection, particularly the INP_IPV6_UNDEC + * flag. + */ + pcbflags = inp->inp_flags; + +#ifdef INET6 + if (inp->inp_flags & INP_IPV6) + laddr6 = inp->inp_laddr6; + else +#endif /* INET6 */ + laddr = inp->inp_laddr; +#ifdef INET6 + if (((inp->inp_flags & INP_IPV6) && + !IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) || + (inp->inp_faddr.s_addr != INADDR_ANY)) +#else /* INET6 */ + if (inp->inp_faddr.s_addr != INADDR_ANY) +#endif /* INET6 */ + { + error = EISCONN; + goto release; + } + /* + * Must block input while temporarily connected. + */ + s = splsoftnet(); + error = in_pcbconnect(inp, addr); + if (error) { + splx(s); + goto release; + } + } else { +#ifdef INET6 + if (((inp->inp_flags & INP_IPV6) && + IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) || + (inp->inp_faddr.s_addr == INADDR_ANY)) +#else /* INET6 */ + if (inp->inp_faddr.s_addr == INADDR_ANY) +#endif /* INET6 */ + { + error = ENOTCONN; + goto release; + } + } + /* + * Calculate data length and get a mbuf + * for UDP and IP headers. + */ +#ifdef INET6 + /* + * Handles IPv4-mapped IPv6 address because temporary connect sets + * the right flag. + */ + M_PREPEND(m, v6packet ? (sizeof(struct udphdr) + + sizeof(struct ip6_hdr)) : sizeof(struct udpiphdr), M_DONTWAIT); +#else /* INET6 */ + M_PREPEND(m, sizeof(struct udpiphdr), M_DONTWAIT); +#endif /* INET6 */ + if (m == 0) { + error = ENOBUFS; + goto bail; + } + + /* + * Compute the packet length of the IP header, and + * punt if the length looks bogus. + */ + if ((len + sizeof(struct udpiphdr)) > IP_MAXPACKET) { + error = EMSGSIZE; + goto release; + } + + /* + * Fill in mbuf with extended UDP header + * and addresses and length put into network format. + */ +#ifdef INET6 + if (v6packet) { + struct ip6_hdr *ipv6 = mtod(m, struct ip6_hdr *); + struct udphdr *uh = (struct udphdr *)(mtod(m, caddr_t) + + sizeof(struct ip6_hdr)); + int payload = sizeof(struct ip6_hdr); + struct in6_addr *faddr; + struct in6_addr *laddr; + struct ifnet *oifp = NULL; + + ipv6->ip6_flow = htonl(0x60000000) | + (inp->inp_ipv6.ip6_flow & htonl(0x0fffffff)); + + ipv6->ip6_nxt = IPPROTO_UDP; + ipv6->ip6_dst = inp->inp_faddr6; + /* + * If the scope of the destination is link-local, + * embed the interface + * index in the address. + * + * XXX advanced-api value overrides sin6_scope_id + */ + faddr = &ipv6->ip6_dst; + if (IN6_IS_ADDR_LINKLOCAL(faddr) || + IN6_IS_ADDR_MC_LINKLOCAL(faddr)) { + struct ip6_pktopts *optp = inp->inp_outputopts6; + struct in6_pktinfo *pi = NULL; + struct ip6_moptions *mopt = NULL; + + /* + * XXX Boundary check is assumed to be already done in + * ip6_setpktoptions(). + */ + if (optp && (pi = optp->ip6po_pktinfo) && + pi->ipi6_ifindex) { + faddr->s6_addr16[1] = htons(pi->ipi6_ifindex); + oifp = ifindex2ifnet[pi->ipi6_ifindex]; + } + else if (IN6_IS_ADDR_MULTICAST(faddr) && + (mopt = inp->inp_moptions6) && + mopt->im6o_multicast_ifp) { + oifp = mopt->im6o_multicast_ifp; + faddr->s6_addr16[1] = oifp->if_index; + } else if (sin6 && sin6->sin6_scope_id) { + /* boundary check */ + if (sin6->sin6_scope_id < 0 + || if_index < sin6->sin6_scope_id) { + error = ENXIO; /* XXX EINVAL? */ + goto release; + } + /* XXX */ + faddr->s6_addr16[1] = + htons(sin6->sin6_scope_id & 0xffff); + } + } + ipv6->ip6_hlim = in6_selecthlim(inp, oifp); + if (sin6) { /*XXX*/ + laddr = in6_selectsrc(sin6, inp->inp_outputopts6, + inp->inp_moptions6, + &inp->inp_route6, + &inp->inp_laddr6, &error); + if (laddr == NULL) { + if (error == 0) + error = EADDRNOTAVAIL; + goto release; + } + } else + laddr = &inp->inp_laddr6; + + ipv6->ip6_src = *laddr; + + ipv6->ip6_plen = (u_short)len + sizeof(struct udphdr); + + uh->uh_sport = inp->inp_lport; + uh->uh_dport = inp->inp_fport; + uh->uh_ulen = htons(ipv6->ip6_plen); + uh->uh_sum = 0; + + /* + * Always calculate udp checksum for IPv6 datagrams + */ + if (!(uh->uh_sum = in6_cksum(m, IPPROTO_UDP, + payload, len + sizeof(struct udphdr)))) + uh->uh_sum = 0xffff; + + error = ip6_output(m, inp->inp_outputopts6, &inp->inp_route6, + inp->inp_socket->so_options & SO_DONTROUTE, + (inp->inp_flags & INP_IPV6_MCAST)?inp->inp_moptions6:NULL, + NULL); + } else +#endif /* INET6 */ + { + ui = mtod(m, struct udpiphdr *); + bzero(ui->ui_x1, sizeof ui->ui_x1); + ui->ui_pr = IPPROTO_UDP; + ui->ui_len = htons((u_int16_t)len + sizeof (struct udphdr)); + ui->ui_src = inp->inp_laddr; + ui->ui_dst = inp->inp_faddr; + ui->ui_sport = inp->inp_lport; + ui->ui_dport = inp->inp_fport; + ui->ui_ulen = ui->ui_len; + + /* + * Stuff checksum and output datagram. + */ + + ui->ui_sum = 0; + if (udpcksum) { + if ((ui->ui_sum = in_cksum(m, sizeof (struct udpiphdr) + + len)) == 0) + ui->ui_sum = 0xffff; + } + ((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len; +#ifdef INET6 + /* + * For now, we use the default values for ttl and tos for + * v4 packets sent using a v6 pcb. We probably want to + * later allow v4 setsockopt operations on a v6 socket to + * modify the ttl and tos for v4 packets sent using + * the mapped address format. We really ought to + * save the v4 ttl and v6 hoplimit in separate places + * instead of craming both in the inp_hu union. + */ + if (inp->inp_flags & INP_IPV6) { + ((struct ip *)ui)->ip_ttl = ip_defttl; + ((struct ip *)ui)->ip_tos = 0; + } else +#endif /* INET6 */ + { + ((struct ip *)ui)->ip_ttl = inp->inp_ip.ip_ttl; + ((struct ip *)ui)->ip_tos = inp->inp_ip.ip_tos; + } + + udpstat.udps_opackets++; +#ifdef INET6 + if (inp->inp_flags & INP_IPV6_MCAST) { + error = ip_output(m, inp->inp_options, &inp->inp_route, + inp->inp_socket->so_options & + (SO_DONTROUTE | SO_BROADCAST), + NULL, NULL, inp->inp_socket); + } else +#endif /* INET6 */ + { + error = ip_output(m, inp->inp_options, &inp->inp_route, + inp->inp_socket->so_options & + (SO_DONTROUTE | SO_BROADCAST), + inp->inp_moptions, inp, NULL); + } + } + +bail: + if (addr) { + in_pcbdisconnect(inp); + inp->inp_flags = pcbflags; +#ifdef INET6 + if (inp->inp_flags & INP_IPV6) + inp->inp_laddr6 = laddr6; + else +#endif + inp->inp_laddr = laddr; + splx(s); + } + if (control) { +#ifdef INET6 + if (v6packet) + inp->inp_outputopts6 = stickyopt; +#endif + m_freem(control); + } + return (error); + +release: + m_freem(m); + if (control) { +#ifdef INET6 + if (v6packet) + inp->inp_outputopts6 = stickyopt; +#endif + m_freem(control); + } + return (error); +} + +u_int udp_sendspace = 9216; /* really max datagram size */ +u_int udp_recvspace = 40 * (1024 + sizeof(struct sockaddr_in)); + /* 40 1K datagrams */ + +#if defined(INET6) && !defined(TCP6) +/*ARGSUSED*/ +int +udp6_usrreq(so, req, m, addr, control, p) + struct socket *so; + int req; + struct mbuf *m, *addr, *control; + struct proc *p; +{ + return udp_usrreq(so, req, m, addr, control); +} +#endif + +/*ARGSUSED*/ +int +udp_usrreq(so, req, m, addr, control) + struct socket *so; + int req; + struct mbuf *m, *addr, *control; +{ + struct inpcb *inp = sotoinpcb(so); + int error = 0; + int s; + + if (req == PRU_CONTROL) { +#ifdef INET6 + if (inp->inp_flags & INP_IPV6) + return (in6_control(so, (u_long)m, (caddr_t)addr, + (struct ifnet *)control, 0)); + else +#endif /* INET6 */ + return (in_control(so, (u_long)m, (caddr_t)addr, + (struct ifnet *)control)); + } + if (inp == NULL && req != PRU_ATTACH) { + error = EINVAL; + goto release; + } + /* + * Note: need to block udp_input while changing + * the udp pcb queue and/or pcb addresses. + */ + switch (req) { + + case PRU_ATTACH: + if (inp != NULL) { + error = EINVAL; + break; + } + s = splsoftnet(); + error = in_pcballoc(so, &udbtable); + splx(s); + if (error) + break; + error = soreserve(so, udp_sendspace, udp_recvspace); + if (error) + break; +#ifdef INET6 + if (((struct inpcb *)so->so_pcb)->inp_flags & INP_IPV6) + ((struct inpcb *) so->so_pcb)->inp_ipv6.ip6_hlim = + ip6_defhlim; + else +#endif /* INET6 */ + ((struct inpcb *) so->so_pcb)->inp_ip.ip_ttl = ip_defttl; + break; + + case PRU_DETACH: + udp_detach(inp); + break; + + case PRU_BIND: + s = splsoftnet(); +#ifdef INET6 + if (inp->inp_flags & INP_IPV6) + error = in6_pcbbind(inp, addr); + else +#endif + error = in_pcbbind(inp, addr); + splx(s); + break; + + case PRU_LISTEN: + error = EOPNOTSUPP; + break; + + case PRU_CONNECT: +#ifdef INET6 + if (inp->inp_flags & INP_IPV6) { + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) { + error = EISCONN; + break; + } + s = splsoftnet(); + error = in6_pcbconnect(inp, addr); + splx(s); + } else +#endif /* INET6 */ + { + if (inp->inp_faddr.s_addr != INADDR_ANY) { + error = EISCONN; + break; + } + s = splsoftnet(); + error = in_pcbconnect(inp, addr); + splx(s); + } + + if (error == 0) + soisconnected(so); + break; + + case PRU_CONNECT2: + error = EOPNOTSUPP; + break; + + case PRU_ACCEPT: + error = EOPNOTSUPP; + break; + + case PRU_DISCONNECT: +#ifdef INET6 + if (inp->inp_flags & INP_IPV6) { + if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) { + error = ENOTCONN; + break; + } + } else +#endif /* INET6 */ + if (inp->inp_faddr.s_addr == INADDR_ANY) { + error = ENOTCONN; + break; + } + + s = splsoftnet(); + in_pcbdisconnect(inp); +#ifdef INET6 + if (inp->inp_flags & INP_IPV6) + inp->inp_laddr6 = in6addr_any; + else +#endif /* INET6 */ + inp->inp_laddr.s_addr = INADDR_ANY; + + splx(s); + so->so_state &= ~SS_ISCONNECTED; /* XXX */ + break; + + case PRU_SHUTDOWN: + socantsendmore(so); + break; + + case PRU_SEND: +#ifdef IPSEC + error = check_ipsec_policy(inp,0); + if (error) + return (error); +#endif + return (udp_output(m, inp, addr, control)); + + case PRU_ABORT: + soisdisconnected(so); + udp_detach(inp); + break; + + case PRU_SOCKADDR: +#ifdef INET6 + if (inp->inp_flags & INP_IPV6) + in6_setsockaddr(inp, addr); + else +#endif /* INET6 */ + in_setsockaddr(inp, addr); + break; + + case PRU_PEERADDR: +#ifdef INET6 + if (inp->inp_flags & INP_IPV6) + in6_setpeeraddr(inp, addr); + else +#endif /* INET6 */ + in_setpeeraddr(inp, addr); + break; + + case PRU_SENSE: + /* + * stat: don't bother with a blocksize. + */ + /* + * Perhaps Path MTU might be returned for a connected + * UDP socket in this case. + */ + return (0); + + case PRU_SENDOOB: + case PRU_FASTTIMO: + case PRU_SLOWTIMO: + case PRU_PROTORCV: + case PRU_PROTOSEND: + error = EOPNOTSUPP; + break; + + case PRU_RCVD: + case PRU_RCVOOB: + return (EOPNOTSUPP); /* do not free mbuf's */ + + default: + panic("udp_usrreq"); + } + +release: + if (control) { +#ifdef __ECOS + diag_printf("udp control data unexpectedly retained\n"); +#else + printf("udp control data unexpectedly retained\n"); +#endif + m_freem(control); + } + if (m) + m_freem(m); + return (error); +} + +static void +udp_detach(inp) + struct inpcb *inp; +{ + int s = splsoftnet(); + + in_pcbdetach(inp); + splx(s); +} + +#ifdef CYGPKG_NET_SYSCTL +/* + * Sysctl for udp variables. + */ +int +udp_sysctl(name, namelen, oldp, oldlenp, newp, newlen) + int *name; + u_int namelen; + void *oldp; + size_t *oldlenp; + void *newp; + size_t newlen; +{ + /* All sysctl names at this level are terminal. */ + if (namelen != 1) + return (ENOTDIR); + + switch (name[0]) { + case UDPCTL_CHECKSUM: + return (sysctl_int(oldp, oldlenp, newp, newlen, &udpcksum)); + case UDPCTL_BADDYNAMIC: + return (sysctl_struct(oldp, oldlenp, newp, newlen, + baddynamicports.udp, sizeof(baddynamicports.udp))); + case UDPCTL_RECVSPACE: + return (sysctl_int(oldp, oldlenp, newp, newlen,&udp_recvspace)); + case UDPCTL_SENDSPACE: + return (sysctl_int(oldp, oldlenp, newp, newlen,&udp_sendspace)); + default: + return (ENOPROTOOPT); + } + /* NOTREACHED */ +} +#endif // CYGPKG_NET_SYSCTL |