diff --git a/reactos/ChangeLog b/reactos/ChangeLog
index 38d4cbefaa5..ccf18c00e90 100644
--- a/reactos/ChangeLog
+++ b/reactos/ChangeLog
@@ -1,3 +1,18 @@
+2003-01-15  Casper S. Hornstrup  <chorns@users.sourceforge.net>
+
+	* drivers/net/tcpip/makefile (TCP_OBJECTS): Add transport/tcp/tcpcore.o,
+	transport/tcp/tcp_input.o, transport/tcp/tcp_ipv4.o,
+	transport/tcp/tcp_output.o, and transport/tcp/tcp_timer.o.
+	* drivers/net/tcpip/transport/tcp/tcp.c (TCPStartup): Call tcp_init().
+	* drivers/net/tcpip/include/linux.h: New file.
+	* drivers/net/tcpip/include/tcpcore.h: Ditto.
+	* drivers/net/tcpip/include/tcpdef.h: Ditto.
+	* drivers/net/tcpip/transport/tcp/tcp_input.c: Ditto.
+	* drivers/net/tcpip/transport/tcp/tcp_ipv4.c: Ditto.
+	* drivers/net/tcpip/transport/tcp/tcp_output.c: Ditto.
+	* drivers/net/tcpip/transport/tcp/tcp_timer.c: Ditto.
+	* drivers/net/tcpip/transport/tcp/tcpcore.c: Ditto.
+
 2003-01-15  Casper S. Hornstrup  <chorns@users.sourceforge.net>
 
 	* lib/kernel32/k32.h: New file.
diff --git a/reactos/drivers/net/tcpip/include/linux.h b/reactos/drivers/net/tcpip/include/linux.h
new file mode 100755
index 00000000000..86db7ae0180
--- /dev/null
+++ b/reactos/drivers/net/tcpip/include/linux.h
@@ -0,0 +1,1876 @@
+#ifndef _LINUX_TYPES_H
+#define _LINUX_TYPES_H
+
+#include <ddk/ntddk.h>
+
+#ifndef NULL
+#define NULL (void*)0
+#endif
+
+typedef struct page {
+  int x;
+} mem_map_t;
+
+
+
+
+
+
+
+
+
+
+
+
+
+/* i386 */
+
+typedef unsigned short umode_t;
+
+/*
+ * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
+ * header files exported to user space
+ */
+
+typedef __signed__ char __s8;
+typedef unsigned char __u8;
+
+typedef __signed__ short __s16;
+typedef unsigned short __u16;
+
+typedef __signed__ int __s32;
+typedef unsigned int __u32;
+
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+typedef __signed__ long long __s64;
+typedef unsigned long long __u64;
+#endif
+
+/*
+ * These aren't exported outside the kernel to avoid name space clashes
+ */
+typedef signed char s8;
+typedef unsigned char u8;
+
+typedef signed short s16;
+typedef unsigned short u16;
+
+typedef signed int s32;
+typedef unsigned int u32;
+
+typedef signed long long s64;
+typedef unsigned long long u64;
+
+#define BITS_PER_LONG 32
+
+/* DMA addresses come in generic and 64-bit flavours.  */
+
+#ifdef CONFIG_HIGHMEM64G
+typedef u64 dma_addr_t;
+#else
+typedef u32 dma_addr_t;
+#endif
+typedef u64 dma64_addr_t;
+
+
+
+/*
+ * This allows for 1024 file descriptors: if NR_OPEN is ever grown
+ * beyond that you'll have to change this too. But 1024 fd's seem to be
+ * enough even for such "real" unices like OSF/1, so hopefully this is
+ * one limit that doesn't have to be changed [again].
+ *
+ * Note that POSIX wants the FD_CLEAR(fd,fdsetp) defines to be in
+ * <sys/time.h> (and thus <linux/time.h>) - but this is a more logical
+ * place for them. Solved by having dummy defines in <sys/time.h>.
+ */
+
+/*
+ * Those macros may have been defined in <gnu/types.h>. But we always
+ * use the ones here. 
+ */
+#undef __NFDBITS
+#define __NFDBITS	(8 * sizeof(unsigned long))
+
+#undef __FD_SETSIZE
+#define __FD_SETSIZE	1024
+
+#undef __FDSET_LONGS
+#define __FDSET_LONGS	(__FD_SETSIZE/__NFDBITS)
+
+#undef __FDELT
+#define	__FDELT(d)	((d) / __NFDBITS)
+
+#undef __FDMASK
+#define	__FDMASK(d)	(1UL << ((d) % __NFDBITS))
+
+typedef struct {
+	unsigned long fds_bits [__FDSET_LONGS];
+} __kernel_fd_set;
+
+/* Type of a signal handler.  */
+typedef void (*__kernel_sighandler_t)(int);
+
+/* Type of a SYSV IPC key.  */
+typedef int __kernel_key_t;
+
+
+/*
+ * This file is generally used by user-level software, so you need to
+ * be a little careful about namespace pollution etc.  Also, we cannot
+ * assume GCC is being used.
+ */
+
+typedef unsigned short	__kernel_dev_t;
+typedef unsigned long	__kernel_ino_t;
+typedef unsigned short	__kernel_mode_t;
+typedef unsigned short	__kernel_nlink_t;
+typedef long		__kernel_off_t;
+typedef int		__kernel_pid_t;
+typedef unsigned short	__kernel_ipc_pid_t;
+typedef unsigned short	__kernel_uid_t;
+typedef unsigned short	__kernel_gid_t;
+typedef unsigned int	__kernel_size_t;
+typedef int		__kernel_ssize_t;
+typedef int		__kernel_ptrdiff_t;
+typedef long		__kernel_time_t;
+typedef long		__kernel_suseconds_t;
+typedef long		__kernel_clock_t;
+typedef int		__kernel_daddr_t;
+typedef char *		__kernel_caddr_t;
+typedef unsigned short	__kernel_uid16_t;
+typedef unsigned short	__kernel_gid16_t;
+typedef unsigned int	__kernel_uid32_t;
+typedef unsigned int	__kernel_gid32_t;
+
+typedef unsigned short	__kernel_old_uid_t;
+typedef unsigned short	__kernel_old_gid_t;
+
+#ifdef __GNUC__
+typedef long long	__kernel_loff_t;
+#endif
+
+typedef struct {
+#if defined(__KERNEL__) || defined(__USE_ALL)
+	int	val[2];
+#else /* !defined(__KERNEL__) && !defined(__USE_ALL) */
+	int	__val[2];
+#endif /* !defined(__KERNEL__) && !defined(__USE_ALL) */
+} __kernel_fsid_t;
+
+#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
+
+#undef	__FD_SET
+#define __FD_SET(fd,fdsetp) \
+		__asm__ __volatile__("btsl %1,%0": \
+			"=m" (*(__kernel_fd_set *) (fdsetp)):"r" ((int) (fd)))
+
+#undef	__FD_CLR
+#define __FD_CLR(fd,fdsetp) \
+		__asm__ __volatile__("btrl %1,%0": \
+			"=m" (*(__kernel_fd_set *) (fdsetp)):"r" ((int) (fd)))
+
+#undef	__FD_ISSET
+#define __FD_ISSET(fd,fdsetp) (__extension__ ({ \
+		unsigned char __result; \
+		__asm__ __volatile__("btl %1,%2 ; setb %0" \
+			:"=q" (__result) :"r" ((int) (fd)), \
+			"m" (*(__kernel_fd_set *) (fdsetp))); \
+		__result; }))
+
+#undef	__FD_ZERO
+#define __FD_ZERO(fdsetp) \
+do { \
+	int __d0, __d1; \
+	__asm__ __volatile__("cld ; rep ; stosl" \
+			:"=m" (*(__kernel_fd_set *) (fdsetp)), \
+			  "=&c" (__d0), "=&D" (__d1) \
+			:"a" (0), "1" (__FDSET_LONGS), \
+			"2" ((__kernel_fd_set *) (fdsetp)) : "memory"); \
+} while (0)
+
+#endif /* defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) */
+
+
+#ifndef __KERNEL_STRICT_NAMES
+
+typedef __kernel_fd_set		fd_set;
+typedef __kernel_dev_t		dev_t;
+typedef __kernel_ino_t		ino_t;
+typedef __kernel_mode_t		mode_t;
+typedef __kernel_nlink_t	nlink_t;
+typedef __kernel_off_t		off_t;
+typedef __kernel_pid_t		pid_t;
+typedef __kernel_daddr_t	daddr_t;
+typedef __kernel_key_t		key_t;
+typedef __kernel_suseconds_t	suseconds_t;
+
+#ifdef __KERNEL__
+typedef __kernel_uid32_t	uid_t;
+typedef __kernel_gid32_t	gid_t;
+typedef __kernel_uid16_t        uid16_t;
+typedef __kernel_gid16_t        gid16_t;
+
+#ifdef CONFIG_UID16
+/* This is defined by include/asm-{arch}/posix_types.h */
+typedef __kernel_old_uid_t	old_uid_t;
+typedef __kernel_old_gid_t	old_gid_t;
+#endif /* CONFIG_UID16 */
+
+/* libc5 includes this file to define uid_t, thus uid_t can never change
+ * when it is included by non-kernel code
+ */
+#else
+typedef __kernel_uid_t		uid_t;
+typedef __kernel_gid_t		gid_t;
+#endif /* __KERNEL__ */
+
+#if defined(__GNUC__)
+typedef __kernel_loff_t		loff_t;
+#endif
+
+/*
+ * The following typedefs are also protected by individual ifdefs for
+ * historical reasons:
+ */
+#ifndef _SIZE_T
+#define _SIZE_T
+typedef __kernel_size_t		size_t;
+#endif
+
+#ifndef _SSIZE_T
+#define _SSIZE_T
+typedef __kernel_ssize_t	ssize_t;
+#endif
+
+#ifndef _PTRDIFF_T
+#define _PTRDIFF_T
+typedef __kernel_ptrdiff_t	ptrdiff_t;
+#endif
+
+#ifndef _TIME_T
+#define _TIME_T
+typedef __kernel_time_t		time_t;
+#endif
+
+#ifndef _CLOCK_T
+#define _CLOCK_T
+typedef __kernel_clock_t	clock_t;
+#endif
+
+#ifndef _CADDR_T
+#define _CADDR_T
+typedef __kernel_caddr_t	caddr_t;
+#endif
+
+/* bsd */
+typedef unsigned char		u_char;
+typedef unsigned short		u_short;
+typedef unsigned int		u_int;
+typedef unsigned long		u_long;
+
+/* sysv */
+typedef unsigned char		unchar;
+typedef unsigned short		ushort;
+typedef unsigned int		uint;
+typedef unsigned long		ulong;
+
+#ifndef __BIT_TYPES_DEFINED__
+#define __BIT_TYPES_DEFINED__
+
+typedef		__u8		u_int8_t;
+typedef		__s8		int8_t;
+typedef		__u16		u_int16_t;
+typedef		__s16		int16_t;
+typedef		__u32		u_int32_t;
+typedef		__s32		int32_t;
+
+#endif /* !(__BIT_TYPES_DEFINED__) */
+
+typedef		__u8		uint8_t;
+typedef		__u16		uint16_t;
+typedef		__u32		uint32_t;
+
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+typedef		__u64		uint64_t;
+typedef		__u64		u_int64_t;
+typedef		__s64		int64_t;
+#endif
+
+#endif /* __KERNEL_STRICT_NAMES */
+
+/*
+ * Below are truly Linux-specific types that should never collide with
+ * any application/library that wants linux/types.h.
+ */
+
+struct ustat {
+	__kernel_daddr_t	f_tfree;
+	__kernel_ino_t		f_tinode;
+	char			f_fname[6];
+	char			f_fpack[6];
+};
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#ifndef __LITTLE_ENDIAN
+#define __LITTLE_ENDIAN 1234
+#endif
+#ifndef __LITTLE_ENDIAN_BITFIELD
+#define __LITTLE_ENDIAN_BITFIELD
+#endif
+
+#if 1 /* swab */
+
+/*
+ * linux/byteorder/swab.h
+ * Byte-swapping, independently from CPU endianness
+ *	swabXX[ps]?(foo)
+ *
+ * Francois-Rene Rideau <fare@tunes.org> 19971205
+ *    separated swab functions from cpu_to_XX,
+ *    to clean up support for bizarre-endian architectures.
+ *
+ * See asm-i386/byteorder.h and suches for examples of how to provide
+ * architecture-dependent optimized versions
+ *
+ */
+
+/* casts are necessary for constants, because we never know how for sure
+ * how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way.
+ */
+#define ___swab16(x) \
+({ \
+	__u16 __x = (x); \
+	((__u16)( \
+		(((__u16)(__x) & (__u16)0x00ffU) << 8) | \
+		(((__u16)(__x) & (__u16)0xff00U) >> 8) )); \
+})
+
+#define ___swab24(x) \
+({ \
+	__u32 __x = (x); \
+	((__u32)( \
+		((__x & (__u32)0x000000ffUL) << 16) | \
+		 (__x & (__u32)0x0000ff00UL)        | \
+		((__x & (__u32)0x00ff0000UL) >> 16) )); \
+})
+
+#define ___swab32(x) \
+({ \
+	__u32 __x = (x); \
+	((__u32)( \
+		(((__u32)(__x) & (__u32)0x000000ffUL) << 24) | \
+		(((__u32)(__x) & (__u32)0x0000ff00UL) <<  8) | \
+		(((__u32)(__x) & (__u32)0x00ff0000UL) >>  8) | \
+		(((__u32)(__x) & (__u32)0xff000000UL) >> 24) )); \
+})
+
+#define ___swab64(x) \
+({ \
+	__u64 __x = (x); \
+	((__u64)( \
+		(__u64)(((__u64)(__x) & (__u64)0x00000000000000ffULL) << 56) | \
+		(__u64)(((__u64)(__x) & (__u64)0x000000000000ff00ULL) << 40) | \
+		(__u64)(((__u64)(__x) & (__u64)0x0000000000ff0000ULL) << 24) | \
+		(__u64)(((__u64)(__x) & (__u64)0x00000000ff000000ULL) <<  8) | \
+	        (__u64)(((__u64)(__x) & (__u64)0x000000ff00000000ULL) >>  8) | \
+		(__u64)(((__u64)(__x) & (__u64)0x0000ff0000000000ULL) >> 24) | \
+		(__u64)(((__u64)(__x) & (__u64)0x00ff000000000000ULL) >> 40) | \
+		(__u64)(((__u64)(__x) & (__u64)0xff00000000000000ULL) >> 56) )); \
+})
+
+#define ___constant_swab16(x) \
+	((__u16)( \
+		(((__u16)(x) & (__u16)0x00ffU) << 8) | \
+		(((__u16)(x) & (__u16)0xff00U) >> 8) ))
+#define ___constant_swab24(x) \
+	((__u32)( \
+		(((__u32)(x) & (__u32)0x000000ffU) << 16) | \
+		(((__u32)(x) & (__u32)0x0000ff00U)	  | \
+		(((__u32)(x) & (__u32)0x00ff0000U) >> 16) ))
+#define ___constant_swab32(x) \
+	((__u32)( \
+		(((__u32)(x) & (__u32)0x000000ffUL) << 24) | \
+		(((__u32)(x) & (__u32)0x0000ff00UL) <<  8) | \
+		(((__u32)(x) & (__u32)0x00ff0000UL) >>  8) | \
+		(((__u32)(x) & (__u32)0xff000000UL) >> 24) ))
+#define ___constant_swab64(x) \
+	((__u64)( \
+		(__u64)(((__u64)(x) & (__u64)0x00000000000000ffULL) << 56) | \
+		(__u64)(((__u64)(x) & (__u64)0x000000000000ff00ULL) << 40) | \
+		(__u64)(((__u64)(x) & (__u64)0x0000000000ff0000ULL) << 24) | \
+		(__u64)(((__u64)(x) & (__u64)0x00000000ff000000ULL) <<  8) | \
+	        (__u64)(((__u64)(x) & (__u64)0x000000ff00000000ULL) >>  8) | \
+		(__u64)(((__u64)(x) & (__u64)0x0000ff0000000000ULL) >> 24) | \
+		(__u64)(((__u64)(x) & (__u64)0x00ff000000000000ULL) >> 40) | \
+		(__u64)(((__u64)(x) & (__u64)0xff00000000000000ULL) >> 56) ))
+
+/*
+ * provide defaults when no architecture-specific optimization is detected
+ */
+#ifndef __arch__swab16
+#  define __arch__swab16(x) ({ __u16 __tmp = (x) ; ___swab16(__tmp); })
+#endif
+#ifndef __arch__swab24
+#  define __arch__swab24(x) ({ __u32 __tmp = (x) ; ___swab24(__tmp); })
+#endif
+#ifndef __arch__swab32
+#  define __arch__swab32(x) ({ __u32 __tmp = (x) ; ___swab32(__tmp); })
+#endif
+#ifndef __arch__swab64
+#  define __arch__swab64(x) ({ __u64 __tmp = (x) ; ___swab64(__tmp); })
+#endif
+
+#ifndef __arch__swab16p
+#  define __arch__swab16p(x) __arch__swab16(*(x))
+#endif
+#ifndef __arch__swab24p
+#  define __arch__swab24p(x) __arch__swab24(*(x))
+#endif
+#ifndef __arch__swab32p
+#  define __arch__swab32p(x) __arch__swab32(*(x))
+#endif
+#ifndef __arch__swab64p
+#  define __arch__swab64p(x) __arch__swab64(*(x))
+#endif
+
+#ifndef __arch__swab16s
+#  define __arch__swab16s(x) do { *(x) = __arch__swab16p((x)); } while (0)
+#endif
+#ifndef __arch__swab24s
+#  define __arch__swab24s(x) do { *(x) = __arch__swab24p((x)); } while (0)
+#endif
+#ifndef __arch__swab32s
+#  define __arch__swab32s(x) do { *(x) = __arch__swab32p((x)); } while (0)
+#endif
+#ifndef __arch__swab64s
+#  define __arch__swab64s(x) do { *(x) = __arch__swab64p((x)); } while (0)
+#endif
+
+
+/*
+ * Allow constant folding
+ */
+#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__)
+#  define __swab16(x) \
+(__builtin_constant_p((__u16)(x)) ? \
+ ___swab16((x)) : \
+ __fswab16((x)))
+#  define __swab24(x) \
+(__builtin_constant_p((__u32)(x)) ? \
+ ___swab24((x)) : \
+ __fswab24((x)))
+#  define __swab32(x) \
+(__builtin_constant_p((__u32)(x)) ? \
+ ___swab32((x)) : \
+ __fswab32((x)))
+#  define __swab64(x) \
+(__builtin_constant_p((__u64)(x)) ? \
+ ___swab64((x)) : \
+ __fswab64((x)))
+#else
+#  define __swab16(x) __fswab16(x)
+#  define __swab24(x) __fswab24(x)
+#  define __swab32(x) __fswab32(x)
+#  define __swab64(x) __fswab64(x)
+#endif /* OPTIMIZE */
+
+
+static __inline__ __const__ __u16 __fswab16(__u16 x)
+{
+	return __arch__swab16(x);
+}
+static __inline__ __u16 __swab16p(__u16 *x)
+{
+	return __arch__swab16p(x);
+}
+static __inline__ void __swab16s(__u16 *addr)
+{
+	__arch__swab16s(addr);
+}
+
+static __inline__ __const__ __u32 __fswab24(__u32 x)
+{
+	return __arch__swab24(x);
+}
+static __inline__ __u32 __swab24p(__u32 *x)
+{
+	return __arch__swab24p(x);
+}
+static __inline__ void __swab24s(__u32 *addr)
+{
+	__arch__swab24s(addr);
+}
+
+static __inline__ __const__ __u32 __fswab32(__u32 x)
+{
+	return __arch__swab32(x);
+}
+static __inline__ __u32 __swab32p(__u32 *x)
+{
+	return __arch__swab32p(x);
+}
+static __inline__ void __swab32s(__u32 *addr)
+{
+	__arch__swab32s(addr);
+}
+
+#ifdef __BYTEORDER_HAS_U64__
+static __inline__ __const__ __u64 __fswab64(__u64 x)
+{
+#  ifdef __SWAB_64_THRU_32__
+	__u32 h = x >> 32;
+        __u32 l = x & ((1ULL<<32)-1);
+        return (((__u64)__swab32(l)) << 32) | ((__u64)(__swab32(h)));
+#  else
+	return __arch__swab64(x);
+#  endif
+}
+static __inline__ __u64 __swab64p(__u64 *x)
+{
+	return __arch__swab64p(x);
+}
+static __inline__ void __swab64s(__u64 *addr)
+{
+	__arch__swab64s(addr);
+}
+#endif /* __BYTEORDER_HAS_U64__ */
+
+#if defined(__KERNEL__)
+#define swab16 __swab16
+#define swab24 __swab24
+#define swab32 __swab32
+#define swab64 __swab64
+#define swab16p __swab16p
+#define swab24p __swab24p
+#define swab32p __swab32p
+#define swab64p __swab64p
+#define swab16s __swab16s
+#define swab24s __swab24s
+#define swab32s __swab32s
+#define swab64s __swab64s
+#endif
+
+#endif /* swab */
+
+
+
+#if 1 /* generic */
+
+/*
+ * linux/byteorder_generic.h
+ * Generic Byte-reordering support
+ *
+ * Francois-Rene Rideau <fare@tunes.org> 19970707
+ *    gathered all the good ideas from all asm-foo/byteorder.h into one file,
+ *    cleaned them up.
+ *    I hope it is compliant with non-GCC compilers.
+ *    I decided to put __BYTEORDER_HAS_U64__ in byteorder.h,
+ *    because I wasn't sure it would be ok to put it in types.h
+ *    Upgraded it to 2.1.43
+ * Francois-Rene Rideau <fare@tunes.org> 19971012
+ *    Upgraded it to 2.1.57
+ *    to please Linus T., replaced huge #ifdef's between little/big endian
+ *    by nestedly #include'd files.
+ * Francois-Rene Rideau <fare@tunes.org> 19971205
+ *    Made it to 2.1.71; now a facelift:
+ *    Put files under include/linux/byteorder/
+ *    Split swab from generic support.
+ *
+ * TODO:
+ *   = Regular kernel maintainers could also replace all these manual
+ *    byteswap macros that remain, disseminated among drivers,
+ *    after some grep or the sources...
+ *   = Linus might want to rename all these macros and files to fit his taste,
+ *    to fit his personal naming scheme.
+ *   = it seems that a few drivers would also appreciate
+ *    nybble swapping support...
+ *   = every architecture could add their byteswap macro in asm/byteorder.h
+ *    see how some architectures already do (i386, alpha, ppc, etc)
+ *   = cpu_to_beXX and beXX_to_cpu might some day need to be well
+ *    distinguished throughout the kernel. This is not the case currently,
+ *    since little endian, big endian, and pdp endian machines needn't it.
+ *    But this might be the case for, say, a port of Linux to 20/21 bit
+ *    architectures (and F21 Linux addict around?).
+ */
+
+/*
+ * The following macros are to be defined by <asm/byteorder.h>:
+ *
+ * Conversion of long and short int between network and host format
+ *	ntohl(__u32 x)
+ *	ntohs(__u16 x)
+ *	htonl(__u32 x)
+ *	htons(__u16 x)
+ * It seems that some programs (which? where? or perhaps a standard? POSIX?)
+ * might like the above to be functions, not macros (why?).
+ * if that's true, then detect them, and take measures.
+ * Anyway, the measure is: define only ___ntohl as a macro instead,
+ * and in a separate file, have
+ * unsigned long inline ntohl(x){return ___ntohl(x);}
+ *
+ * The same for constant arguments
+ *	__constant_ntohl(__u32 x)
+ *	__constant_ntohs(__u16 x)
+ *	__constant_htonl(__u32 x)
+ *	__constant_htons(__u16 x)
+ *
+ * Conversion of XX-bit integers (16- 32- or 64-)
+ * between native CPU format and little/big endian format
+ * 64-bit stuff only defined for proper architectures
+ *	cpu_to_[bl]eXX(__uXX x)
+ *	[bl]eXX_to_cpu(__uXX x)
+ *
+ * The same, but takes a pointer to the value to convert
+ *	cpu_to_[bl]eXXp(__uXX x)
+ *	[bl]eXX_to_cpup(__uXX x)
+ *
+ * The same, but change in situ
+ *	cpu_to_[bl]eXXs(__uXX x)
+ *	[bl]eXX_to_cpus(__uXX x)
+ *
+ * See asm-foo/byteorder.h for examples of how to provide
+ * architecture-optimized versions
+ *
+ */
+
+
+#if defined(__KERNEL__)
+/*
+ * inside the kernel, we can use nicknames;
+ * outside of it, we must avoid POSIX namespace pollution...
+ */
+#define cpu_to_le64 __cpu_to_le64
+#define le64_to_cpu __le64_to_cpu
+#define cpu_to_le32 __cpu_to_le32
+#define le32_to_cpu __le32_to_cpu
+#define cpu_to_le16 __cpu_to_le16
+#define le16_to_cpu __le16_to_cpu
+#define cpu_to_be64 __cpu_to_be64
+#define be64_to_cpu __be64_to_cpu
+#define cpu_to_be32 __cpu_to_be32
+#define be32_to_cpu __be32_to_cpu
+#define cpu_to_be16 __cpu_to_be16
+#define be16_to_cpu __be16_to_cpu
+#define cpu_to_le64p __cpu_to_le64p
+#define le64_to_cpup __le64_to_cpup
+#define cpu_to_le32p __cpu_to_le32p
+#define le32_to_cpup __le32_to_cpup
+#define cpu_to_le16p __cpu_to_le16p
+#define le16_to_cpup __le16_to_cpup
+#define cpu_to_be64p __cpu_to_be64p
+#define be64_to_cpup __be64_to_cpup
+#define cpu_to_be32p __cpu_to_be32p
+#define be32_to_cpup __be32_to_cpup
+#define cpu_to_be16p __cpu_to_be16p
+#define be16_to_cpup __be16_to_cpup
+#define cpu_to_le64s __cpu_to_le64s
+#define le64_to_cpus __le64_to_cpus
+#define cpu_to_le32s __cpu_to_le32s
+#define le32_to_cpus __le32_to_cpus
+#define cpu_to_le16s __cpu_to_le16s
+#define le16_to_cpus __le16_to_cpus
+#define cpu_to_be64s __cpu_to_be64s
+#define be64_to_cpus __be64_to_cpus
+#define cpu_to_be32s __cpu_to_be32s
+#define be32_to_cpus __be32_to_cpus
+#define cpu_to_be16s __cpu_to_be16s
+#define be16_to_cpus __be16_to_cpus
+#endif
+
+
+/*
+ * Handle ntohl and suches. These have various compatibility
+ * issues - like we want to give the prototype even though we
+ * also have a macro for them in case some strange program
+ * wants to take the address of the thing or something..
+ *
+ * Note that these used to return a "long" in libc5, even though
+ * long is often 64-bit these days.. Thus the casts.
+ *
+ * They have to be macros in order to do the constant folding
+ * correctly - if the argument passed into a inline function
+ * it is no longer constant according to gcc..
+ */
+
+#undef ntohl
+#undef ntohs
+#undef htonl
+#undef htons
+
+/*
+ * Do the prototypes. Somebody might want to take the
+ * address or some such sick thing..
+ */
+#if defined(__KERNEL__) || (defined (__GLIBC__) && __GLIBC__ >= 2)
+extern __u32			ntohl(__u32);
+extern __u32			htonl(__u32);
+#else
+extern unsigned long int	ntohl(unsigned long int);
+extern unsigned long int	htonl(unsigned long int);
+#endif
+extern unsigned short int	ntohs(unsigned short int);
+extern unsigned short int	htons(unsigned short int);
+
+
+#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__) && !defined(__STRICT_ANSI__)
+
+#define ___htonl(x) __cpu_to_be32(x)
+#define ___htons(x) __cpu_to_be16(x)
+#define ___ntohl(x) __be32_to_cpu(x)
+#define ___ntohs(x) __be16_to_cpu(x)
+
+#if defined(__KERNEL__) || (defined (__GLIBC__) && __GLIBC__ >= 2)
+#define htonl(x) ___htonl(x)
+#define ntohl(x) ___ntohl(x)
+#else
+#define htonl(x) ((unsigned long)___htonl(x))
+#define ntohl(x) ((unsigned long)___ntohl(x))
+#endif
+#define htons(x) ___htons(x)
+#define ntohs(x) ___ntohs(x)
+
+#endif /* OPTIMIZE */
+
+#endif /* generic */
+
+
+#define __constant_htonl(x) ___constant_swab32((x))
+#define __constant_ntohl(x) ___constant_swab32((x))
+#define __constant_htons(x) ___constant_swab16((x))
+#define __constant_ntohs(x) ___constant_swab16((x))
+#define __constant_cpu_to_le64(x) ((__u64)(x))
+#define __constant_le64_to_cpu(x) ((__u64)(x))
+#define __constant_cpu_to_le32(x) ((__u32)(x))
+#define __constant_le32_to_cpu(x) ((__u32)(x))
+#define __constant_cpu_to_le24(x) ((__u32)(x))
+#define __constant_le24_to_cpu(x) ((__u32)(x))
+#define __constant_cpu_to_le16(x) ((__u16)(x))
+#define __constant_le16_to_cpu(x) ((__u16)(x))
+#define __constant_cpu_to_be64(x) ___constant_swab64((x))
+#define __constant_be64_to_cpu(x) ___constant_swab64((x))
+#define __constant_cpu_to_be32(x) ___constant_swab32((x))
+#define __constant_be32_to_cpu(x) ___constant_swab32((x))
+#define __constant_cpu_to_be24(x) ___constant_swab24((x))
+#define __constant_be24_to_cpu(x) ___constant_swab24((x))
+#define __constant_cpu_to_be16(x) ___constant_swab16((x))
+#define __constant_be16_to_cpu(x) ___constant_swab16((x))
+#define __cpu_to_le64(x) ((__u64)(x))
+#define __le64_to_cpu(x) ((__u64)(x))
+#define __cpu_to_le32(x) ((__u32)(x))
+#define __le32_to_cpu(x) ((__u32)(x))
+#define __cpu_to_le24(x) ((__u32)(x))
+#define __le24_to_cpu(x) ((__u32)(x))
+#define __cpu_to_le16(x) ((__u16)(x))
+#define __le16_to_cpu(x) ((__u16)(x))
+#define __cpu_to_be64(x) __swab64((x))
+#define __be64_to_cpu(x) __swab64((x))
+#define __cpu_to_be32(x) __swab32((x))
+#define __be32_to_cpu(x) __swab32((x))
+#define __cpu_to_be24(x) __swab24((x))
+#define __be24_to_cpu(x) __swab24((x))
+#define __cpu_to_be16(x) __swab16((x))
+#define __be16_to_cpu(x) __swab16((x))
+#define __cpu_to_le64p(x) (*(__u64*)(x))
+#define __le64_to_cpup(x) (*(__u64*)(x))
+#define __cpu_to_le32p(x) (*(__u32*)(x))
+#define __le32_to_cpup(x) (*(__u32*)(x))
+#define __cpu_to_le24p(x) (*(__u32*)(x))
+#define __le24_to_cpup(x) (*(__u32*)(x))
+#define __cpu_to_le16p(x) (*(__u16*)(x))
+#define __le16_to_cpup(x) (*(__u16*)(x))
+#define __cpu_to_be64p(x) __swab64p((x))
+#define __be64_to_cpup(x) __swab64p((x))
+#define __cpu_to_be32p(x) __swab32p((x))
+#define __be32_to_cpup(x) __swab32p((x))
+#define __cpu_to_be24p(x) __swab24p((x))
+#define __be24_to_cpup(x) __swab24p((x))
+#define __cpu_to_be16p(x) __swab16p((x))
+#define __be16_to_cpup(x) __swab16p((x))
+#define __cpu_to_le64s(x) do {} while (0)
+#define __le64_to_cpus(x) do {} while (0)
+#define __cpu_to_le32s(x) do {} while (0)
+#define __le32_to_cpus(x) do {} while (0)
+#define __cpu_to_le24s(x) do {} while (0)
+#define __le24_to_cpus(x) do {} while (0)
+#define __cpu_to_le16s(x) do {} while (0)
+#define __le16_to_cpus(x) do {} while (0)
+#define __cpu_to_be64s(x) __swab64s((x))
+#define __be64_to_cpus(x) __swab64s((x))
+#define __cpu_to_be32s(x) __swab32s((x))
+#define __be32_to_cpus(x) __swab32s((x))
+#define __cpu_to_be24s(x) __swab24s((x))
+#define __be24_to_cpus(x) __swab24s((x))
+#define __cpu_to_be16s(x) __swab16s((x))
+#define __be16_to_cpus(x) __swab16s((x))
+
+
+
+
+
+
+
+
+#if 1
+
+/* Dummy types */
+
+#define ____cacheline_aligned
+
+typedef struct 
+{
+  volatile unsigned int lock;
+} rwlock_t;
+
+typedef struct {
+	volatile unsigned int lock;
+} spinlock_t;
+
+struct task_struct;
+
+
+
+
+
+#if 1 /* atomic */
+
+/*
+ * Atomic operations that C can't guarantee us.  Useful for
+ * resource counting etc..
+ */
+
+#ifdef CONFIG_SMP
+#define LOCK "lock ; "
+#else
+#define LOCK ""
+#endif
+
+/*
+ * Make sure gcc doesn't try to be clever and move things around
+ * on us. We need to use _exactly_ the address the user gave us,
+ * not some alias that contains the same information.
+ */
+typedef struct { volatile int counter; } atomic_t;
+
+#define ATOMIC_INIT(i)	{ (i) }
+
+/**
+ * atomic_read - read atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically reads the value of @v.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+#define atomic_read(v)		((v)->counter)
+
+/**
+ * atomic_set - set atomic variable
+ * @v: pointer of type atomic_t
+ * @i: required value
+ * 
+ * Atomically sets the value of @v to @i.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+#define atomic_set(v,i)		(((v)->counter) = (i))
+
+/**
+ * atomic_add - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically adds @i to @v.  Note that the guaranteed useful range
+ * of an atomic_t is only 24 bits.
+ */
+static __inline__ void atomic_add(int i, atomic_t *v)
+{
+#if 0
+	__asm__ __volatile__(
+		LOCK "addl %1,%0"
+		:"=m" (v->counter)
+		:"ir" (i), "m" (v->counter));
+#endif
+}
+
+/**
+ * atomic_sub - subtract the atomic variable
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically subtracts @i from @v.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+static __inline__ void atomic_sub(int i, atomic_t *v)
+{
+#if 0
+	__asm__ __volatile__(
+		LOCK "subl %1,%0"
+		:"=m" (v->counter)
+		:"ir" (i), "m" (v->counter));
+#endif
+}
+
+/**
+ * atomic_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically subtracts @i from @v and returns
+ * true if the result is zero, or false for all
+ * other cases.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+static __inline__ int atomic_sub_and_test(int i, atomic_t *v)
+{
+#if 0
+	unsigned char c;
+
+	__asm__ __volatile__(
+		LOCK "subl %2,%0; sete %1"
+		:"=m" (v->counter), "=qm" (c)
+		:"ir" (i), "m" (v->counter) : "memory");
+	return c;
+#endif
+}
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically increments @v by 1.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ void atomic_inc(atomic_t *v)
+{
+#if 0
+	__asm__ __volatile__(
+		LOCK "incl %0"
+		:"=m" (v->counter)
+		:"m" (v->counter));
+#endif
+}
+
+/**
+ * atomic_dec - decrement atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically decrements @v by 1.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ void atomic_dec(atomic_t *v)
+{
+#if 0
+	__asm__ __volatile__(
+		LOCK "decl %0"
+		:"=m" (v->counter)
+		:"m" (v->counter));
+#endif
+}
+
+/**
+ * atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ int atomic_dec_and_test(atomic_t *v)
+{
+#if 0
+	unsigned char c;
+
+	__asm__ __volatile__(
+		LOCK "decl %0; sete %1"
+		:"=m" (v->counter), "=qm" (c)
+		:"m" (v->counter) : "memory");
+	return c != 0;
+#else
+  return 1;
+#endif
+}
+
+/**
+ * atomic_inc_and_test - increment and test 
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ int atomic_inc_and_test(atomic_t *v)
+{
+#if 0
+	unsigned char c;
+
+	__asm__ __volatile__(
+		LOCK "incl %0; sete %1"
+		:"=m" (v->counter), "=qm" (c)
+		:"m" (v->counter) : "memory");
+	return c != 0;
+#else
+  return 1;
+#endif
+}
+
+/**
+ * atomic_add_negative - add and test if negative
+ * @v: pointer of type atomic_t
+ * @i: integer value to add
+ * 
+ * Atomically adds @i to @v and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ int atomic_add_negative(int i, atomic_t *v)
+{
+#if 0
+	unsigned char c;
+
+	__asm__ __volatile__(
+		LOCK "addl %2,%0; sets %1"
+		:"=m" (v->counter), "=qm" (c)
+		:"ir" (i), "m" (v->counter) : "memory");
+	return c;
+#else
+  return 0;
+#endif
+}
+
+/* These are x86-specific, used by some header files */
+#define atomic_clear_mask(mask, addr)
+#if 0
+__asm__ __volatile__(LOCK "andl %0,%1" \
+: : "r" (~(mask)),"m" (*addr) : "memory")
+#endif
+
+#define atomic_set_mask(mask, addr)
+#if 0
+__asm__ __volatile__(LOCK "orl %0,%1" \
+: : "r" (mask),"m" (*addr) : "memory")
+#endif
+
+/* Atomic operations are already serializing on x86 */
+#define smp_mb__before_atomic_dec()
+#define smp_mb__after_atomic_dec()
+#define smp_mb__before_atomic_inc()
+#define smp_mb__after_atomic_inc()
+
+
+
+#endif /* atomic */
+
+
+
+
+
+#if 1 /* list */
+
+struct list_head {
+	struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+	struct list_head name = LIST_HEAD_INIT(name)
+
+#define INIT_LIST_HEAD(ptr) do { \
+	(ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+/*
+ * Insert a new entry between two known consecutive entries. 
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_add(struct list_head *new,
+			      struct list_head *prev,
+			      struct list_head *next)
+{
+#if 0
+	next->prev = new;
+	new->next = next;
+	new->prev = prev;
+	prev->next = new;
+#endif
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+#if 0
+	__list_add(new, head, head->next);
+#endif
+}
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+#if 0
+	__list_add(new, head->prev, head);
+#endif
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head *prev, struct list_head *next)
+{
+	next->prev = prev;
+	prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is in an undefined state.
+ */
+static inline void list_del(struct list_head *entry)
+{
+#if 0
+	__list_del(entry->prev, entry->next);
+	entry->next = (void *) 0;
+	entry->prev = (void *) 0;
+#endif
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static inline void list_del_init(struct list_head *entry)
+{
+#if 0
+	__list_del(entry->prev, entry->next);
+	INIT_LIST_HEAD(entry); 
+#endif
+}
+
+/**
+ * list_move - delete from one list and add as another's head
+ * @list: the entry to move
+ * @head: the head that will precede our entry
+ */
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+#if 0
+        __list_del(list->prev, list->next);
+        list_add(list, head);
+#endif
+}
+
+/**
+ * list_move_tail - delete from one list and add as another's tail
+ * @list: the entry to move
+ * @head: the head that will follow our entry
+ */
+static inline void list_move_tail(struct list_head *list,
+				  struct list_head *head)
+{
+#if 0
+        __list_del(list->prev, list->next);
+        list_add_tail(list, head);
+#endif
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty(struct list_head *head)
+{
+	return head->next == head;
+}
+
+static inline void __list_splice(struct list_head *list,
+				 struct list_head *head)
+{
+#if 0
+	struct list_head *first = list->next;
+	struct list_head *last = list->prev;
+	struct list_head *at = head->next;
+
+	first->prev = head;
+	head->next = first;
+
+	last->next = at;
+	at->prev = last;
+#endif
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice(struct list_head *list, struct list_head *head)
+{
+#if 0
+	if (!list_empty(list))
+		__list_splice(list, head);
+#endif
+}
+
+/**
+ * list_splice_init - join two lists and reinitialise the emptied list.
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_init(struct list_head *list,
+				    struct list_head *head)
+{
+#if 0
+	if (!list_empty(list)) {
+		__list_splice(list, head);
+		INIT_LIST_HEAD(list);
+	}
+#endif
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr:	the &struct list_head pointer.
+ * @type:	the type of the struct this is embedded in.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member)
+#if 0
+	((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+#endif
+
+/**
+ * list_for_each	-	iterate over a list
+ * @pos:	the &struct list_head to use as a loop counter.
+ * @head:	the head for your list.
+ */
+#define list_for_each(pos, head)
+#if 0
+	for (pos = (head)->next, prefetch(pos->next); pos != (head); \
+        	pos = pos->next, prefetch(pos->next))
+#endif
+
+/**
+ * list_for_each_prev	-	iterate over a list backwards
+ * @pos:	the &struct list_head to use as a loop counter.
+ * @head:	the head for your list.
+ */
+#define list_for_each_prev(pos, head)
+#if 0
+	for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \
+        	pos = pos->prev, prefetch(pos->prev))
+#endif   	
+
+/**
+ * list_for_each_safe	-	iterate over a list safe against removal of list entry
+ * @pos:	the &struct list_head to use as a loop counter.
+ * @n:		another &struct list_head to use as temporary storage
+ * @head:	the head for your list.
+ */
+#define list_for_each_safe(pos, n, head)
+#if 0
+	for (pos = (head)->next, n = pos->next; pos != (head); \
+		pos = n, n = pos->next)
+#endif
+
+/**
+ * list_for_each_entry	-	iterate over list of given type
+ * @pos:	the type * to use as a loop counter.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry(pos, head, member)
+#if 0
+	for (pos = list_entry((head)->next, typeof(*pos), member),	\
+		     prefetch(pos->member.next);			\
+	     &pos->member != (head); 					\
+	     pos = list_entry(pos->member.next, typeof(*pos), member),	\
+		     prefetch(pos->member.next))
+#endif
+
+#endif /* list */
+
+
+
+
+
+#if 1 /* wait */
+
+#define WNOHANG		0x00000001
+#define WUNTRACED	0x00000002
+
+#define __WNOTHREAD	0x20000000	/* Don't wait on children of other threads in this group */
+#define __WALL		0x40000000	/* Wait on all children, regardless of type */
+#define __WCLONE	0x80000000	/* Wait only on non-SIGCHLD children */
+
+#if 0
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/stddef.h>
+#include <linux/spinlock.h>
+#include <linux/config.h>
+
+#include <asm/page.h>
+#include <asm/processor.h>
+#endif
+
+/*
+ * Debug control.  Slow but useful.
+ */
+#if defined(CONFIG_DEBUG_WAITQ)
+#define WAITQUEUE_DEBUG 1
+#else
+#define WAITQUEUE_DEBUG 0
+#endif
+
+struct __wait_queue {
+	unsigned int flags;
+#define WQ_FLAG_EXCLUSIVE	0x01
+	struct task_struct * task;
+	struct list_head task_list;
+#if WAITQUEUE_DEBUG
+	long __magic;
+	long __waker;
+#endif
+};
+typedef struct __wait_queue wait_queue_t;
+
+/*
+ * 'dual' spinlock architecture. Can be switched between spinlock_t and
+ * rwlock_t locks via changing this define. Since waitqueues are quite
+ * decoupled in the new architecture, lightweight 'simple' spinlocks give
+ * us slightly better latencies and smaller waitqueue structure size.
+ */
+#define USE_RW_WAIT_QUEUE_SPINLOCK 0
+
+#if USE_RW_WAIT_QUEUE_SPINLOCK
+# define wq_lock_t rwlock_t
+# define WAITQUEUE_RW_LOCK_UNLOCKED RW_LOCK_UNLOCKED
+
+# define wq_read_lock read_lock
+# define wq_read_lock_irqsave read_lock_irqsave
+# define wq_read_unlock_irqrestore read_unlock_irqrestore
+# define wq_read_unlock read_unlock
+# define wq_write_lock_irq write_lock_irq
+# define wq_write_lock_irqsave write_lock_irqsave
+# define wq_write_unlock_irqrestore write_unlock_irqrestore
+# define wq_write_unlock write_unlock
+#else
+# define wq_lock_t spinlock_t
+# define WAITQUEUE_RW_LOCK_UNLOCKED SPIN_LOCK_UNLOCKED
+
+# define wq_read_lock spin_lock
+# define wq_read_lock_irqsave spin_lock_irqsave
+# define wq_read_unlock spin_unlock
+# define wq_read_unlock_irqrestore spin_unlock_irqrestore
+# define wq_write_lock_irq spin_lock_irq
+# define wq_write_lock_irqsave spin_lock_irqsave
+# define wq_write_unlock_irqrestore spin_unlock_irqrestore
+# define wq_write_unlock spin_unlock
+#endif
+
+struct __wait_queue_head {
+	wq_lock_t lock;
+	struct list_head task_list;
+#if WAITQUEUE_DEBUG
+	long __magic;
+	long __creator;
+#endif
+};
+typedef struct __wait_queue_head wait_queue_head_t;
+
+
+/*
+ * Debugging macros.  We eschew `do { } while (0)' because gcc can generate
+ * spurious .aligns.
+ */
+#if WAITQUEUE_DEBUG
+#define WQ_BUG()	BUG()
+#define CHECK_MAGIC(x)
+#if 0
+	do {									\
+		if ((x) != (long)&(x)) {					\
+			printk("bad magic %lx (should be %lx), ",		\
+				(long)x, (long)&(x));				\
+			WQ_BUG();						\
+		}								\
+	} while (0)
+#endif
+
+#define CHECK_MAGIC_WQHEAD(x)
+#if 0
+	do {									\
+		if ((x)->__magic != (long)&((x)->__magic)) {			\
+			printk("bad magic %lx (should be %lx, creator %lx), ",	\
+			(x)->__magic, (long)&((x)->__magic), (x)->__creator);	\
+			WQ_BUG();						\
+		}								\
+	} while (0)
+#endif
+
+#define WQ_CHECK_LIST_HEAD(list)
+#if 0
+	do {									\
+		if (!(list)->next || !(list)->prev)				\
+			WQ_BUG();						\
+	} while(0)
+#endif
+
+#define WQ_NOTE_WAKER(tsk)
+#if 0
+	do {									\
+		(tsk)->__waker = (long)__builtin_return_address(0);		\
+	} while (0)
+#endif
+#else
+#define WQ_BUG()
+#define CHECK_MAGIC(x)
+#define CHECK_MAGIC_WQHEAD(x)
+#define WQ_CHECK_LIST_HEAD(list)
+#define WQ_NOTE_WAKER(tsk)
+#endif
+
+/*
+ * Macros for declaration and initialisaton of the datatypes
+ */
+
+#if WAITQUEUE_DEBUG
+# define __WAITQUEUE_DEBUG_INIT(name) //(long)&(name).__magic, 0
+# define __WAITQUEUE_HEAD_DEBUG_INIT(name) //(long)&(name).__magic, (long)&(name).__magic
+#else
+# define __WAITQUEUE_DEBUG_INIT(name)
+# define __WAITQUEUE_HEAD_DEBUG_INIT(name)
+#endif
+
+#define __WAITQUEUE_INITIALIZER(name, tsk)
+#if 0
+{
+	task:		tsk,						\
+	task_list:	{ NULL, NULL },					\
+			 __WAITQUEUE_DEBUG_INIT(name)}
+#endif
+
+#define DECLARE_WAITQUEUE(name, tsk)
+#if 0
+	wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk)
+#endif
+
+#define __WAIT_QUEUE_HEAD_INITIALIZER(name)
+#if 0
+{
+	lock:		WAITQUEUE_RW_LOCK_UNLOCKED,			\
+	task_list:	{ &(name).task_list, &(name).task_list },	\
+			__WAITQUEUE_HEAD_DEBUG_INIT(name)}
+#endif
+
+#define DECLARE_WAIT_QUEUE_HEAD(name)
+#if 0
+	wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
+#endif
+
+static inline void init_waitqueue_head(wait_queue_head_t *q)
+{
+#if 0
+#if WAITQUEUE_DEBUG
+	if (!q)
+		WQ_BUG();
+#endif
+	q->lock = WAITQUEUE_RW_LOCK_UNLOCKED;
+	INIT_LIST_HEAD(&q->task_list);
+#if WAITQUEUE_DEBUG
+	q->__magic = (long)&q->__magic;
+	q->__creator = (long)current_text_addr();
+#endif
+#endif
+}
+
+static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
+{
+#if 0
+#if WAITQUEUE_DEBUG
+	if (!q || !p)
+		WQ_BUG();
+#endif
+	q->flags = 0;
+	q->task = p;
+#if WAITQUEUE_DEBUG
+	q->__magic = (long)&q->__magic;
+#endif
+#endif
+}
+
+static inline int waitqueue_active(wait_queue_head_t *q)
+{
+#if 0
+#if WAITQUEUE_DEBUG
+	if (!q)
+		WQ_BUG();
+	CHECK_MAGIC_WQHEAD(q);
+#endif
+
+	return !list_empty(&q->task_list);
+#endif
+}
+
+static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
+{
+#if 0
+#if WAITQUEUE_DEBUG
+	if (!head || !new)
+		WQ_BUG();
+	CHECK_MAGIC_WQHEAD(head);
+	CHECK_MAGIC(new->__magic);
+	if (!head->task_list.next || !head->task_list.prev)
+		WQ_BUG();
+#endif
+	list_add(&new->task_list, &head->task_list);
+#endif
+}
+
+/*
+ * Used for wake-one threads:
+ */
+static inline void __add_wait_queue_tail(wait_queue_head_t *head,
+						wait_queue_t *new)
+{
+#if 0
+#if WAITQUEUE_DEBUG
+	if (!head || !new)
+		WQ_BUG();
+	CHECK_MAGIC_WQHEAD(head);
+	CHECK_MAGIC(new->__magic);
+	if (!head->task_list.next || !head->task_list.prev)
+		WQ_BUG();
+#endif
+	list_add_tail(&new->task_list, &head->task_list);
+#endif
+}
+
+static inline void __remove_wait_queue(wait_queue_head_t *head,
+							wait_queue_t *old)
+{
+#if 0
+#if WAITQUEUE_DEBUG
+	if (!old)
+		WQ_BUG();
+	CHECK_MAGIC(old->__magic);
+#endif
+	list_del(&old->task_list);
+#endif
+}
+
+
+
+
+#endif /* wait */
+
+
+#endif
+
+
+
+
+#if 1 /* slab */
+
+typedef struct
+{
+ int x;
+} kmem_cache_s;
+
+typedef struct kmem_cache_s kmem_cache_t;
+
+#if 0
+#include	<linux/mm.h>
+#include	<linux/cache.h>
+#endif
+
+/* flags for kmem_cache_alloc() */
+#define	SLAB_NOFS		GFP_NOFS
+#define	SLAB_NOIO		GFP_NOIO
+#define SLAB_NOHIGHIO		GFP_NOHIGHIO
+#define	SLAB_ATOMIC		GFP_ATOMIC
+#define	SLAB_USER		GFP_USER
+#define	SLAB_KERNEL		GFP_KERNEL
+#define	SLAB_NFS		GFP_NFS
+#define	SLAB_DMA		GFP_DMA
+
+#define SLAB_LEVEL_MASK		(__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_HIGHIO|__GFP_FS)
+#define	SLAB_NO_GROW		0x00001000UL	/* don't grow a cache */
+
+/* flags to pass to kmem_cache_create().
+ * The first 3 are only valid when the allocator as been build
+ * SLAB_DEBUG_SUPPORT.
+ */
+#define	SLAB_DEBUG_FREE		0x00000100UL	/* Peform (expensive) checks on free */
+#define	SLAB_DEBUG_INITIAL	0x00000200UL	/* Call constructor (as verifier) */
+#define	SLAB_RED_ZONE		0x00000400UL	/* Red zone objs in a cache */
+#define	SLAB_POISON		0x00000800UL	/* Poison objects */
+#define	SLAB_NO_REAP		0x00001000UL	/* never reap from the cache */
+#define	SLAB_HWCACHE_ALIGN	0x00002000UL	/* align objs on a h/w cache lines */
+#define SLAB_CACHE_DMA		0x00004000UL	/* use GFP_DMA memory */
+#define SLAB_MUST_HWCACHE_ALIGN	0x00008000UL	/* force alignment */
+
+/* flags passed to a constructor func */
+#define	SLAB_CTOR_CONSTRUCTOR	0x001UL		/* if not set, then deconstructor */
+#define SLAB_CTOR_ATOMIC	0x002UL		/* tell constructor it can't sleep */
+#define	SLAB_CTOR_VERIFY	0x004UL		/* tell constructor it's a verify call */
+
+/* prototypes */
+extern void kmem_cache_init(void);
+extern void kmem_cache_sizes_init(void);
+
+extern kmem_cache_t *kmem_find_general_cachep(size_t, int gfpflags);
+extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned long,
+				       void (*)(void *, kmem_cache_t *, unsigned long),
+				       void (*)(void *, kmem_cache_t *, unsigned long));
+extern int kmem_cache_destroy(kmem_cache_t *);
+extern int kmem_cache_shrink(kmem_cache_t *);
+extern void *kmem_cache_alloc(kmem_cache_t *, int);
+extern void kmem_cache_free(kmem_cache_t *, void *);
+extern unsigned int kmem_cache_size(kmem_cache_t *);
+
+extern void *kmalloc(size_t, int);
+extern void kfree(const void *);
+
+//extern int FASTCALL(kmem_cache_reap(int));
+
+/* System wide caches */
+extern kmem_cache_t	*vm_area_cachep;
+extern kmem_cache_t	*mm_cachep;
+extern kmem_cache_t	*names_cachep;
+extern kmem_cache_t	*files_cachep;
+extern kmem_cache_t	*filp_cachep;
+extern kmem_cache_t	*dquot_cachep;
+extern kmem_cache_t	*bh_cachep;
+extern kmem_cache_t	*fs_cachep;
+extern kmem_cache_t	*sigact_cachep;
+
+#endif /* slab */
+
+
+
+/*
+ *	Berkeley style UIO structures	-	Alan Cox 1994.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+
+/* A word of warning: Our uio structure will clash with the C library one (which is now obsolete). Remove the C
+   library one from sys/uio.h if you have a very old library set */
+
+struct iovec
+{
+	void *iov_base;		/* BSD uses caddr_t (1003.1g requires void *) */
+	__kernel_size_t iov_len; /* Must be size_t (1003.1g) */
+};
+
+/*
+ *	UIO_MAXIOV shall be at least 16 1003.1g (5.4.1.1)
+ */
+ 
+#define UIO_FASTIOV	8
+#define UIO_MAXIOV	1024
+#if 0
+#define UIO_MAXIOV	16	/* Maximum iovec's in one operation 
+				   16 matches BSD */
+                                /* Beg pardon: BSD has 1024 --ANK */
+#endif
+
+
+
+/*
+ * In Linux 2.4, static timers have been removed from the kernel.
+ * Timers may be dynamically created and destroyed, and should be initialized
+ * by a call to init_timer() upon creation.
+ *
+ * The "data" field enables use of a common timeout function for several
+ * timeouts. You can use this field to distinguish between the different
+ * invocations.
+ */
+struct timer_list {
+	struct list_head list;
+	unsigned long expires;
+	unsigned long data;
+	void (*function)(unsigned long);
+};
+
+
+
+struct timeval {
+  unsigned long tv_sec;
+  unsigned long tv_usec;
+//	time_t		tv_sec;		/* seconds */
+//	suseconds_t	tv_usec;	/* microseconds */
+};
+
+
+
+
+
+
+
+#if 1 /* poll */
+
+struct file;
+
+struct poll_table_page;
+
+typedef struct poll_table_struct {
+	int error;
+	struct poll_table_page * table;
+} poll_table;
+
+extern void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p);
+
+static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
+{
+	if (p && wait_address)
+		__pollwait(filp, wait_address, p);
+}
+
+static inline void poll_initwait(poll_table* pt)
+{
+	pt->error = 0;
+	pt->table = NULL;
+}
+extern void poll_freewait(poll_table* pt);
+
+
+/*
+ * Scaleable version of the fd_set.
+ */
+
+typedef struct {
+	unsigned long *in, *out, *ex;
+	unsigned long *res_in, *res_out, *res_ex;
+} fd_set_bits;
+
+/*
+ * How many longwords for "nr" bits?
+ */
+#define FDS_BITPERLONG	(8*sizeof(long))
+#define FDS_LONGS(nr)	(((nr)+FDS_BITPERLONG-1)/FDS_BITPERLONG)
+#define FDS_BYTES(nr)	(FDS_LONGS(nr)*sizeof(long))
+
+/*
+ * We do a VERIFY_WRITE here even though we are only reading this time:
+ * we'll write to it eventually..
+ *
+ * Use "unsigned long" accesses to let user-mode fd_set's be long-aligned.
+ */
+static inline
+int get_fd_set(unsigned long nr, void *ufdset, unsigned long *fdset)
+{
+#if 0
+	nr = FDS_BYTES(nr);
+	if (ufdset) {
+		int error;
+		error = verify_area(VERIFY_WRITE, ufdset, nr);
+		if (!error && __copy_from_user(fdset, ufdset, nr))
+			error = -EFAULT;
+		return error;
+	}
+	memset(fdset, 0, nr);
+	return 0;
+#else
+	return 0;
+#endif
+}
+
+static inline
+void set_fd_set(unsigned long nr, void *ufdset, unsigned long *fdset)
+{
+#if 0
+	if (ufdset)
+		__copy_to_user(ufdset, fdset, FDS_BYTES(nr));
+#endif
+}
+
+static inline
+void zero_fd_set(unsigned long nr, unsigned long *fdset)
+{
+#if 0
+	memset(fdset, 0, FDS_BYTES(nr));
+#endif
+}
+
+extern int do_select(int n, fd_set_bits *fds, long *timeout);
+
+#endif /* poll */
+
+
+
+typedef struct
+{
+  int x;
+} read_descriptor_t;
+
+
+
+
+
+#if 1 /* poll */
+
+/* These are specified by iBCS2 */
+#define POLLIN		0x0001
+#define POLLPRI		0x0002
+#define POLLOUT		0x0004
+#define POLLERR		0x0008
+#define POLLHUP		0x0010
+#define POLLNVAL	0x0020
+
+/* The rest seem to be more-or-less nonstandard. Check them! */
+#define POLLRDNORM	0x0040
+#define POLLRDBAND	0x0080
+#define POLLWRNORM	0x0100
+#define POLLWRBAND	0x0200
+#define POLLMSG		0x0400
+
+struct pollfd {
+	int fd;
+	short events;
+	short revents;
+};
+
+#endif /* poll */
+
+#endif /* _LINUX_TYPES_H */
diff --git a/reactos/drivers/net/tcpip/include/tcpcore.h b/reactos/drivers/net/tcpip/include/tcpcore.h
new file mode 100755
index 00000000000..cda54ed221e
--- /dev/null
+++ b/reactos/drivers/net/tcpip/include/tcpcore.h
@@ -0,0 +1,3856 @@
+/*
+ * COPYRIGHT:   See COPYING in the top level directory
+ * PROJECT:     ReactOS TCP/IP protocol driver
+ * FILE:        include/tcpcore.h
+ * PURPOSE:     Transmission Control Protocol definitions
+ * REVISIONS:
+ *   CSH 01/01-2003 Ported from linux kernel 2.4.20
+ */
+
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Definitions for the TCP module.
+ *
+ * Version:	@(#)tcp.h	1.0.5	05/23/93
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+#ifndef __TCPCORE_H
+#define __TCPCORE_H
+
+#include "tcpdef.h"
+
+
+struct socket;
+
+
+
+#if 1 /* skbuff */
+
+#define HAVE_ALLOC_SKB		/* For the drivers to know */
+#define HAVE_ALIGNABLE_SKB	/* Ditto 8)		   */
+#define SLAB_SKB 		/* Slabified skbuffs 	   */
+
+#define CHECKSUM_NONE 0
+#define CHECKSUM_HW 1
+#define CHECKSUM_UNNECESSARY 2
+
+#define SKB_DATA_ALIGN(X)	(((X) + (SMP_CACHE_BYTES-1)) & ~(SMP_CACHE_BYTES-1))
+#define SKB_MAX_ORDER(X,ORDER)	(((PAGE_SIZE<<(ORDER)) - (X) - sizeof(struct skb_shared_info))&~(SMP_CACHE_BYTES-1))
+#define SKB_MAX_HEAD(X)		(SKB_MAX_ORDER((X),0))
+#define SKB_MAX_ALLOC		(SKB_MAX_ORDER(0,2))
+
+/* A. Checksumming of received packets by device.
+ *
+ *	NONE: device failed to checksum this packet.
+ *		skb->csum is undefined.
+ *
+ *	UNNECESSARY: device parsed packet and wouldbe verified checksum.
+ *		skb->csum is undefined.
+ *	      It is bad option, but, unfortunately, many of vendors do this.
+ *	      Apparently with secret goal to sell you new device, when you
+ *	      will add new protocol to your host. F.e. IPv6. 8)
+ *
+ *	HW: the most generic way. Device supplied checksum of _all_
+ *	    the packet as seen by netif_rx in skb->csum.
+ *	    NOTE: Even if device supports only some protocols, but
+ *	    is able to produce some skb->csum, it MUST use HW,
+ *	    not UNNECESSARY.
+ *
+ * B. Checksumming on output.
+ *
+ *	NONE: skb is checksummed by protocol or csum is not required.
+ *
+ *	HW: device is required to csum packet as seen by hard_start_xmit
+ *	from skb->h.raw to the end and to record the checksum
+ *	at skb->h.raw+skb->csum.
+ *
+ *	Device must show its capabilities in dev->features, set
+ *	at device setup time.
+ *	NETIF_F_HW_CSUM	- it is clever device, it is able to checksum
+ *			  everything.
+ *	NETIF_F_NO_CSUM - loopback or reliable single hop media.
+ *	NETIF_F_IP_CSUM - device is dumb. It is able to csum only
+ *			  TCP/UDP over IPv4. Sigh. Vendors like this
+ *			  way by an unknown reason. Though, see comment above
+ *			  about CHECKSUM_UNNECESSARY. 8)
+ *
+ *	Any questions? No questions, good. 		--ANK
+ */
+
+#ifdef __i386__
+#define NET_CALLER(arg) (*(((void**)&arg)-1))
+#else
+#define NET_CALLER(arg) __builtin_return_address(0)
+#endif
+
+#ifdef CONFIG_NETFILTER
+struct nf_conntrack {
+	atomic_t use;
+	void (*destroy)(struct nf_conntrack *);
+};
+
+struct nf_ct_info {
+	struct nf_conntrack *master;
+};
+#endif
+
+struct sk_buff_head {
+	/* These two members must be first. */
+	struct sk_buff	* next;
+	struct sk_buff	* prev;
+
+	__u32		qlen;
+	spinlock_t	lock;
+};
+
+struct sk_buff;
+
+#define MAX_SKB_FRAGS 6
+
+typedef struct skb_frag_struct skb_frag_t;
+
+struct skb_frag_struct
+{
+	struct page *page;
+	__u16 page_offset;
+	__u16 size;
+};
+
+/* This data is invariant across clones and lives at
+ * the end of the header data, ie. at skb->end.
+ */
+struct skb_shared_info {
+	atomic_t	dataref;
+	unsigned int	nr_frags;
+	struct sk_buff	*frag_list;
+	skb_frag_t	frags[MAX_SKB_FRAGS];
+};
+
+struct sk_buff {
+	/* These two members must be first. */
+	struct sk_buff	* next;			/* Next buffer in list 				*/
+	struct sk_buff	* prev;			/* Previous buffer in list 			*/
+
+	struct sk_buff_head * list;		/* List we are on				*/
+	struct sock	*sk;			/* Socket we are owned by 			*/
+	struct timeval	stamp;			/* Time we arrived				*/
+	struct net_device	*dev;		/* Device we arrived on/are leaving by		*/
+
+	/* Transport layer header */
+	union
+	{
+		struct tcphdr	*th;
+		struct udphdr	*uh;
+		struct icmphdr	*icmph;
+		struct igmphdr	*igmph;
+		struct iphdr	*ipiph;
+		struct spxhdr	*spxh;
+		unsigned char	*raw;
+	} h;
+
+	/* Network layer header */
+	union
+	{
+		struct iphdr	*iph;
+		struct ipv6hdr	*ipv6h;
+		struct arphdr	*arph;
+		struct ipxhdr	*ipxh;
+		unsigned char	*raw;
+	} nh;
+  
+	/* Link layer header */
+	union 
+	{	
+	  	struct ethhdr	*ethernet;
+	  	unsigned char 	*raw;
+	} mac;
+
+	struct  dst_entry *dst;
+
+	/* 
+	 * This is the control buffer. It is free to use for every
+	 * layer. Please put your private variables there. If you
+	 * want to keep them across layers you have to do a skb_clone()
+	 * first. This is owned by whoever has the skb queued ATM.
+	 */ 
+	char		cb[48];	 
+
+	unsigned int 	len;			/* Length of actual data			*/
+ 	unsigned int 	data_len;
+	unsigned int	csum;			/* Checksum 					*/
+	unsigned char 	__unused,		/* Dead field, may be reused			*/
+			cloned, 		/* head may be cloned (check refcnt to be sure). */
+  			pkt_type,		/* Packet class					*/
+  			ip_summed;		/* Driver fed us an IP checksum			*/
+	__u32		priority;		/* Packet queueing priority			*/
+	atomic_t	users;			/* User count - see datagram.c,tcp.c 		*/
+	unsigned short	protocol;		/* Packet protocol from driver. 		*/
+	unsigned short	security;		/* Security level of packet			*/
+	unsigned int	truesize;		/* Buffer size 					*/
+
+	unsigned char	*head;			/* Head of buffer 				*/
+	unsigned char	*data;			/* Data head pointer				*/
+	unsigned char	*tail;			/* Tail pointer					*/
+	unsigned char 	*end;			/* End pointer					*/
+
+	void 		(*destructor)(struct sk_buff *);	/* Destruct function		*/
+#ifdef CONFIG_NETFILTER
+	/* Can be used for communication between hooks. */
+        unsigned long	nfmark;
+	/* Cache info */
+	__u32		nfcache;
+	/* Associated connection, if any */
+	struct nf_ct_info *nfct;
+#ifdef CONFIG_NETFILTER_DEBUG
+        unsigned int nf_debug;
+#endif
+#endif /*CONFIG_NETFILTER*/
+
+#if defined(CONFIG_HIPPI)
+	union{
+		__u32	ifield;
+	} private;
+#endif
+
+#ifdef CONFIG_NET_SCHED
+       __u32           tc_index;               /* traffic control index */
+#endif
+};
+
+#define SK_WMEM_MAX	65535
+#define SK_RMEM_MAX	65535
+
+#if 1
+//#ifdef __KERNEL__
+/*
+ *	Handling routines are only of interest to the kernel
+ */
+
+extern void			__kfree_skb(struct sk_buff *skb);
+extern struct sk_buff *		alloc_skb(unsigned int size, int priority);
+extern void			kfree_skbmem(struct sk_buff *skb);
+extern struct sk_buff *		skb_clone(struct sk_buff *skb, int priority);
+extern struct sk_buff *		skb_copy(const struct sk_buff *skb, int priority);
+extern struct sk_buff *		pskb_copy(struct sk_buff *skb, int gfp_mask);
+extern int			pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask);
+extern struct sk_buff *		skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom);
+extern struct sk_buff *		skb_copy_expand(const struct sk_buff *skb, 
+						int newheadroom,
+						int newtailroom,
+						int priority);
+#define dev_kfree_skb(a)	kfree_skb(a)
+extern void	skb_over_panic(struct sk_buff *skb, int len, void *here);
+extern void	skb_under_panic(struct sk_buff *skb, int len, void *here);
+
+/* Internal */
+#define skb_shinfo(SKB)		((struct skb_shared_info *)((SKB)->end))
+
+/**
+ *	skb_queue_empty - check if a queue is empty
+ *	@list: queue head
+ *
+ *	Returns true if the queue is empty, false otherwise.
+ */
+ 
+static inline int skb_queue_empty(struct sk_buff_head *list)
+{
+	return (list->next == (struct sk_buff *) list);
+}
+
+/**
+ *	skb_get - reference buffer
+ *	@skb: buffer to reference
+ *
+ *	Makes another reference to a socket buffer and returns a pointer
+ *	to the buffer.
+ */
+ 
+static inline struct sk_buff *skb_get(struct sk_buff *skb)
+{
+	atomic_inc(&skb->users);
+	return skb;
+}
+
+/*
+ * If users==1, we are the only owner and are can avoid redundant
+ * atomic change.
+ */
+ 
+/**
+ *	kfree_skb - free an sk_buff
+ *	@skb: buffer to free
+ *
+ *	Drop a reference to the buffer and free it if the usage count has
+ *	hit zero.
+ */
+ 
+static inline void kfree_skb(struct sk_buff *skb)
+{
+	if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
+		__kfree_skb(skb);
+}
+
+/* Use this if you didn't touch the skb state [for fast switching] */
+static inline void kfree_skb_fast(struct sk_buff *skb)
+{
+	if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
+		kfree_skbmem(skb);	
+}
+
+/**
+ *	skb_cloned - is the buffer a clone
+ *	@skb: buffer to check
+ *
+ *	Returns true if the buffer was generated with skb_clone() and is
+ *	one of multiple shared copies of the buffer. Cloned buffers are
+ *	shared data so must not be written to under normal circumstances.
+ */
+
+static inline int skb_cloned(struct sk_buff *skb)
+{
+	return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1;
+}
+
+/**
+ *	skb_shared - is the buffer shared
+ *	@skb: buffer to check
+ *
+ *	Returns true if more than one person has a reference to this
+ *	buffer.
+ */
+ 
+static inline int skb_shared(struct sk_buff *skb)
+{
+	return (atomic_read(&skb->users) != 1);
+}
+
+/** 
+ *	skb_share_check - check if buffer is shared and if so clone it
+ *	@skb: buffer to check
+ *	@pri: priority for memory allocation
+ *	
+ *	If the buffer is shared the buffer is cloned and the old copy
+ *	drops a reference. A new clone with a single reference is returned.
+ *	If the buffer is not shared the original buffer is returned. When
+ *	being called from interrupt status or with spinlocks held pri must
+ *	be GFP_ATOMIC.
+ *
+ *	NULL is returned on a memory allocation failure.
+ */
+ 
+static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri)
+{
+	if (skb_shared(skb)) {
+		struct sk_buff *nskb;
+		nskb = skb_clone(skb, pri);
+		kfree_skb(skb);
+		return nskb;
+	}
+	return skb;
+}
+
+
+/*
+ *	Copy shared buffers into a new sk_buff. We effectively do COW on
+ *	packets to handle cases where we have a local reader and forward
+ *	and a couple of other messy ones. The normal one is tcpdumping
+ *	a packet thats being forwarded.
+ */
+ 
+/**
+ *	skb_unshare - make a copy of a shared buffer
+ *	@skb: buffer to check
+ *	@pri: priority for memory allocation
+ *
+ *	If the socket buffer is a clone then this function creates a new
+ *	copy of the data, drops a reference count on the old copy and returns
+ *	the new copy with the reference count at 1. If the buffer is not a clone
+ *	the original buffer is returned. When called with a spinlock held or
+ *	from interrupt state @pri must be %GFP_ATOMIC
+ *
+ *	%NULL is returned on a memory allocation failure.
+ */
+ 
+static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri)
+{
+	struct sk_buff *nskb;
+	if(!skb_cloned(skb))
+		return skb;
+	nskb=skb_copy(skb, pri);
+	kfree_skb(skb);		/* Free our shared copy */
+	return nskb;
+}
+
+/**
+ *	skb_peek
+ *	@list_: list to peek at
+ *
+ *	Peek an &sk_buff. Unlike most other operations you _MUST_
+ *	be careful with this one. A peek leaves the buffer on the
+ *	list and someone else may run off with it. You must hold
+ *	the appropriate locks or have a private queue to do this.
+ *
+ *	Returns %NULL for an empty list or a pointer to the head element.
+ *	The reference count is not incremented and the reference is therefore
+ *	volatile. Use with caution.
+ */
+ 
+static inline struct sk_buff *skb_peek(struct sk_buff_head *list_)
+{
+	struct sk_buff *list = ((struct sk_buff *)list_)->next;
+	if (list == (struct sk_buff *)list_)
+		list = NULL;
+	return list;
+}
+
+/**
+ *	skb_peek_tail
+ *	@list_: list to peek at
+ *
+ *	Peek an &sk_buff. Unlike most other operations you _MUST_
+ *	be careful with this one. A peek leaves the buffer on the
+ *	list and someone else may run off with it. You must hold
+ *	the appropriate locks or have a private queue to do this.
+ *
+ *	Returns %NULL for an empty list or a pointer to the tail element.
+ *	The reference count is not incremented and the reference is therefore
+ *	volatile. Use with caution.
+ */
+
+static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_)
+{
+	struct sk_buff *list = ((struct sk_buff *)list_)->prev;
+	if (list == (struct sk_buff *)list_)
+		list = NULL;
+	return list;
+}
+
+/**
+ *	skb_queue_len	- get queue length
+ *	@list_: list to measure
+ *
+ *	Return the length of an &sk_buff queue. 
+ */
+ 
+static inline __u32 skb_queue_len(struct sk_buff_head *list_)
+{
+	return(list_->qlen);
+}
+
+static inline void skb_queue_head_init(struct sk_buff_head *list)
+{
+	spin_lock_init(&list->lock);
+	list->prev = (struct sk_buff *)list;
+	list->next = (struct sk_buff *)list;
+	list->qlen = 0;
+}
+
+/*
+ *	Insert an sk_buff at the start of a list.
+ *
+ *	The "__skb_xxxx()" functions are the non-atomic ones that
+ *	can only be called with interrupts disabled.
+ */
+
+/**
+ *	__skb_queue_head - queue a buffer at the list head
+ *	@list: list to use
+ *	@newsk: buffer to queue
+ *
+ *	Queue a buffer at the start of a list. This function takes no locks
+ *	and you must therefore hold required locks before calling it.
+ *
+ *	A buffer cannot be placed on two lists at the same time.
+ */	
+ 
+static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
+{
+	struct sk_buff *prev, *next;
+
+	newsk->list = list;
+	list->qlen++;
+	prev = (struct sk_buff *)list;
+	next = prev->next;
+	newsk->next = next;
+	newsk->prev = prev;
+	next->prev = newsk;
+	prev->next = newsk;
+}
+
+
+/**
+ *	skb_queue_head - queue a buffer at the list head
+ *	@list: list to use
+ *	@newsk: buffer to queue
+ *
+ *	Queue a buffer at the start of the list. This function takes the
+ *	list lock and can be used safely with other locking &sk_buff functions
+ *	safely.
+ *
+ *	A buffer cannot be placed on two lists at the same time.
+ */	
+
+static inline void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&list->lock, flags);
+	__skb_queue_head(list, newsk);
+	spin_unlock_irqrestore(&list->lock, flags);
+}
+
+/**
+ *	__skb_queue_tail - queue a buffer at the list tail
+ *	@list: list to use
+ *	@newsk: buffer to queue
+ *
+ *	Queue a buffer at the end of a list. This function takes no locks
+ *	and you must therefore hold required locks before calling it.
+ *
+ *	A buffer cannot be placed on two lists at the same time.
+ */	
+ 
+
+static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
+{
+	struct sk_buff *prev, *next;
+
+	newsk->list = list;
+	list->qlen++;
+	next = (struct sk_buff *)list;
+	prev = next->prev;
+	newsk->next = next;
+	newsk->prev = prev;
+	next->prev = newsk;
+	prev->next = newsk;
+}
+
+/**
+ *	skb_queue_tail - queue a buffer at the list tail
+ *	@list: list to use
+ *	@newsk: buffer to queue
+ *
+ *	Queue a buffer at the tail of the list. This function takes the
+ *	list lock and can be used safely with other locking &sk_buff functions
+ *	safely.
+ *
+ *	A buffer cannot be placed on two lists at the same time.
+ */	
+
+static inline void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&list->lock, flags);
+	__skb_queue_tail(list, newsk);
+	spin_unlock_irqrestore(&list->lock, flags);
+}
+
+/**
+ *	__skb_dequeue - remove from the head of the queue
+ *	@list: list to dequeue from
+ *
+ *	Remove the head of the list. This function does not take any locks
+ *	so must be used with appropriate locks held only. The head item is
+ *	returned or %NULL if the list is empty.
+ */
+
+static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
+{
+	struct sk_buff *next, *prev, *result;
+
+	prev = (struct sk_buff *) list;
+	next = prev->next;
+	result = NULL;
+	if (next != prev) {
+		result = next;
+		next = next->next;
+		list->qlen--;
+		next->prev = prev;
+		prev->next = next;
+		result->next = NULL;
+		result->prev = NULL;
+		result->list = NULL;
+	}
+	return result;
+}
+
+/**
+ *	skb_dequeue - remove from the head of the queue
+ *	@list: list to dequeue from
+ *
+ *	Remove the head of the list. The list lock is taken so the function
+ *	may be used safely with other locking list functions. The head item is
+ *	returned or %NULL if the list is empty.
+ */
+
+static inline struct sk_buff *skb_dequeue(struct sk_buff_head *list)
+{
+	unsigned long flags;
+	struct sk_buff *result;
+
+	spin_lock_irqsave(&list->lock, flags);
+	result = __skb_dequeue(list);
+	spin_unlock_irqrestore(&list->lock, flags);
+	return result;
+}
+
+/*
+ *	Insert a packet on a list.
+ */
+
+static inline void __skb_insert(struct sk_buff *newsk,
+	struct sk_buff * prev, struct sk_buff *next,
+	struct sk_buff_head * list)
+{
+	newsk->next = next;
+	newsk->prev = prev;
+	next->prev = newsk;
+	prev->next = newsk;
+	newsk->list = list;
+	list->qlen++;
+}
+
+/**
+ *	skb_insert	-	insert a buffer
+ *	@old: buffer to insert before
+ *	@newsk: buffer to insert
+ *
+ *	Place a packet before a given packet in a list. The list locks are taken
+ *	and this function is atomic with respect to other list locked calls
+ *	A buffer cannot be placed on two lists at the same time.
+ */
+
+static inline void skb_insert(struct sk_buff *old, struct sk_buff *newsk)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&old->list->lock, flags);
+	__skb_insert(newsk, old->prev, old, old->list);
+	spin_unlock_irqrestore(&old->list->lock, flags);
+}
+
+/*
+ *	Place a packet after a given packet in a list.
+ */
+
+static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk)
+{
+	__skb_insert(newsk, old, old->next, old->list);
+}
+
+/**
+ *	skb_append	-	append a buffer
+ *	@old: buffer to insert after
+ *	@newsk: buffer to insert
+ *
+ *	Place a packet after a given packet in a list. The list locks are taken
+ *	and this function is atomic with respect to other list locked calls.
+ *	A buffer cannot be placed on two lists at the same time.
+ */
+
+
+static inline void skb_append(struct sk_buff *old, struct sk_buff *newsk)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&old->list->lock, flags);
+	__skb_append(old, newsk);
+	spin_unlock_irqrestore(&old->list->lock, flags);
+}
+
+/*
+ * remove sk_buff from list. _Must_ be called atomically, and with
+ * the list known..
+ */
+ 
+static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
+{
+	struct sk_buff * next, * prev;
+
+	list->qlen--;
+	next = skb->next;
+	prev = skb->prev;
+	skb->next = NULL;
+	skb->prev = NULL;
+	skb->list = NULL;
+	next->prev = prev;
+	prev->next = next;
+}
+
+/**
+ *	skb_unlink	-	remove a buffer from a list
+ *	@skb: buffer to remove
+ *
+ *	Place a packet after a given packet in a list. The list locks are taken
+ *	and this function is atomic with respect to other list locked calls
+ *	
+ *	Works even without knowing the list it is sitting on, which can be 
+ *	handy at times. It also means that THE LIST MUST EXIST when you 
+ *	unlink. Thus a list must have its contents unlinked before it is
+ *	destroyed.
+ */
+
+static inline void skb_unlink(struct sk_buff *skb)
+{
+	struct sk_buff_head *list = skb->list;
+
+	if(list) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&list->lock, flags);
+		if(skb->list == list)
+			__skb_unlink(skb, skb->list);
+		spin_unlock_irqrestore(&list->lock, flags);
+	}
+}
+
+/* XXX: more streamlined implementation */
+
+/**
+ *	__skb_dequeue_tail - remove from the tail of the queue
+ *	@list: list to dequeue from
+ *
+ *	Remove the tail of the list. This function does not take any locks
+ *	so must be used with appropriate locks held only. The tail item is
+ *	returned or %NULL if the list is empty.
+ */
+
+static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
+{
+	struct sk_buff *skb = skb_peek_tail(list); 
+	if (skb)
+		__skb_unlink(skb, list);
+	return skb;
+}
+
+/**
+ *	skb_dequeue - remove from the head of the queue
+ *	@list: list to dequeue from
+ *
+ *	Remove the head of the list. The list lock is taken so the function
+ *	may be used safely with other locking list functions. The tail item is
+ *	returned or %NULL if the list is empty.
+ */
+
+static inline struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
+{
+	unsigned long flags;
+	struct sk_buff *result;
+
+	spin_lock_irqsave(&list->lock, flags);
+	result = __skb_dequeue_tail(list);
+	spin_unlock_irqrestore(&list->lock, flags);
+	return result;
+}
+
+static inline int skb_is_nonlinear(const struct sk_buff *skb)
+{
+	return skb->data_len;
+}
+
+static inline int skb_headlen(const struct sk_buff *skb)
+{
+	return skb->len - skb->data_len;
+}
+
+#define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) out_of_line_bug(); } while (0)
+#define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) out_of_line_bug(); } while (0)
+#define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) out_of_line_bug(); } while (0)
+
+/*
+ *	Add data to an sk_buff
+ */
+ 
+static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
+{
+	unsigned char *tmp=skb->tail;
+	SKB_LINEAR_ASSERT(skb);
+	skb->tail+=len;
+	skb->len+=len;
+	return tmp;
+}
+
+/**
+ *	skb_put - add data to a buffer
+ *	@skb: buffer to use 
+ *	@len: amount of data to add
+ *
+ *	This function extends the used data area of the buffer. If this would
+ *	exceed the total buffer size the kernel will panic. A pointer to the
+ *	first byte of the extra data is returned.
+ */
+ 
+static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
+{
+#if 0
+	unsigned char *tmp=skb->tail;
+	SKB_LINEAR_ASSERT(skb);
+	skb->tail+=len;
+	skb->len+=len;
+	if(skb->tail>skb->end) {
+		skb_over_panic(skb, len, current_text_addr());
+	}
+	return tmp;
+#else
+return NULL;
+#endif
+}
+
+static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
+{
+	skb->data-=len;
+	skb->len+=len;
+	return skb->data;
+}
+
+/**
+ *	skb_push - add data to the start of a buffer
+ *	@skb: buffer to use 
+ *	@len: amount of data to add
+ *
+ *	This function extends the used data area of the buffer at the buffer
+ *	start. If this would exceed the total buffer headroom the kernel will
+ *	panic. A pointer to the first byte of the extra data is returned.
+ */
+
+static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
+{
+#if 0
+	skb->data-=len;
+	skb->len+=len;
+	if(skb->data<skb->head) {
+		skb_under_panic(skb, len, current_text_addr());
+	}
+	return skb->data;
+#else
+  return NULL;
+#endif
+}
+
+static inline char *__skb_pull(struct sk_buff *skb, unsigned int len)
+{
+	skb->len-=len;
+	if (skb->len < skb->data_len)
+		out_of_line_bug();
+	return 	skb->data+=len;
+}
+
+/**
+ *	skb_pull - remove data from the start of a buffer
+ *	@skb: buffer to use 
+ *	@len: amount of data to remove
+ *
+ *	This function removes data from the start of a buffer, returning
+ *	the memory to the headroom. A pointer to the next data in the buffer
+ *	is returned. Once the data has been pulled future pushes will overwrite
+ *	the old data.
+ */
+
+static inline unsigned char * skb_pull(struct sk_buff *skb, unsigned int len)
+{	
+	if (len > skb->len)
+		return NULL;
+	return __skb_pull(skb,len);
+}
+
+extern unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta);
+
+static inline char *__pskb_pull(struct sk_buff *skb, unsigned int len)
+{
+	if (len > skb_headlen(skb) &&
+	    __pskb_pull_tail(skb, len-skb_headlen(skb)) == NULL)
+		return NULL;
+	skb->len -= len;
+	return 	skb->data += len;
+}
+
+static inline unsigned char * pskb_pull(struct sk_buff *skb, unsigned int len)
+{	
+	if (len > skb->len)
+		return NULL;
+	return __pskb_pull(skb,len);
+}
+
+static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len)
+{
+	if (len <= skb_headlen(skb))
+		return 1;
+	if (len > skb->len)
+		return 0;
+	return (__pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL);
+}
+
+/**
+ *	skb_headroom - bytes at buffer head
+ *	@skb: buffer to check
+ *
+ *	Return the number of bytes of free space at the head of an &sk_buff.
+ */
+ 
+static inline int skb_headroom(const struct sk_buff *skb)
+{
+	return skb->data-skb->head;
+}
+
+/**
+ *	skb_tailroom - bytes at buffer end
+ *	@skb: buffer to check
+ *
+ *	Return the number of bytes of free space at the tail of an sk_buff
+ */
+
+static inline int skb_tailroom(const struct sk_buff *skb)
+{
+	return skb_is_nonlinear(skb) ? 0 : skb->end-skb->tail;
+}
+
+/**
+ *	skb_reserve - adjust headroom
+ *	@skb: buffer to alter
+ *	@len: bytes to move
+ *
+ *	Increase the headroom of an empty &sk_buff by reducing the tail
+ *	room. This is only allowed for an empty buffer.
+ */
+
+static inline void skb_reserve(struct sk_buff *skb, unsigned int len)
+{
+	skb->data+=len;
+	skb->tail+=len;
+}
+
+extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc);
+
+static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
+{
+	if (!skb->data_len) {
+		skb->len = len;
+		skb->tail = skb->data+len;
+	} else {
+		___pskb_trim(skb, len, 0);
+	}
+}
+
+/**
+ *	skb_trim - remove end from a buffer
+ *	@skb: buffer to alter
+ *	@len: new length
+ *
+ *	Cut the length of a buffer down by removing data from the tail. If
+ *	the buffer is already under the length specified it is not modified.
+ */
+
+static inline void skb_trim(struct sk_buff *skb, unsigned int len)
+{
+	if (skb->len > len) {
+		__skb_trim(skb, len);
+	}
+}
+
+
+static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
+{
+	if (!skb->data_len) {
+		skb->len = len;
+		skb->tail = skb->data+len;
+		return 0;
+	} else {
+		return ___pskb_trim(skb, len, 1);
+	}
+}
+
+static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
+{
+	if (len < skb->len)
+		return __pskb_trim(skb, len);
+	return 0;
+}
+
+/**
+ *	skb_orphan - orphan a buffer
+ *	@skb: buffer to orphan
+ *
+ *	If a buffer currently has an owner then we call the owner's
+ *	destructor function and make the @skb unowned. The buffer continues
+ *	to exist but is no longer charged to its former owner.
+ */
+
+
+static inline void skb_orphan(struct sk_buff *skb)
+{
+	if (skb->destructor)
+		skb->destructor(skb);
+	skb->destructor = NULL;
+	skb->sk = NULL;
+}
+
+/**
+ *	skb_purge - empty a list
+ *	@list: list to empty
+ *
+ *	Delete all buffers on an &sk_buff list. Each buffer is removed from
+ *	the list and one reference dropped. This function takes the list
+ *	lock and is atomic with respect to other list locking functions.
+ */
+
+
+static inline void skb_queue_purge(struct sk_buff_head *list)
+{
+	struct sk_buff *skb;
+	while ((skb=skb_dequeue(list))!=NULL)
+		kfree_skb(skb);
+}
+
+/**
+ *	__skb_purge - empty a list
+ *	@list: list to empty
+ *
+ *	Delete all buffers on an &sk_buff list. Each buffer is removed from
+ *	the list and one reference dropped. This function does not take the
+ *	list lock and the caller must hold the relevant locks to use it.
+ */
+
+
+static inline void __skb_queue_purge(struct sk_buff_head *list)
+{
+	struct sk_buff *skb;
+	while ((skb=__skb_dequeue(list))!=NULL)
+		kfree_skb(skb);
+}
+
+/**
+ *	__dev_alloc_skb - allocate an skbuff for sending
+ *	@length: length to allocate
+ *	@gfp_mask: get_free_pages mask, passed to alloc_skb
+ *
+ *	Allocate a new &sk_buff and assign it a usage count of one. The
+ *	buffer has unspecified headroom built in. Users should allocate
+ *	the headroom they think they need without accounting for the
+ *	built in space. The built in space is used for optimisations.
+ *
+ *	%NULL is returned in there is no free memory.
+ */
+ 
+static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
+					      int gfp_mask)
+{
+	struct sk_buff *skb;
+
+	skb = alloc_skb(length+16, gfp_mask);
+	if (skb)
+		skb_reserve(skb,16);
+	return skb;
+}
+
+/**
+ *	dev_alloc_skb - allocate an skbuff for sending
+ *	@length: length to allocate
+ *
+ *	Allocate a new &sk_buff and assign it a usage count of one. The
+ *	buffer has unspecified headroom built in. Users should allocate
+ *	the headroom they think they need without accounting for the
+ *	built in space. The built in space is used for optimisations.
+ *
+ *	%NULL is returned in there is no free memory. Although this function
+ *	allocates memory it can be called from an interrupt.
+ */
+ 
+static inline struct sk_buff *dev_alloc_skb(unsigned int length)
+{
+#if 0
+	return __dev_alloc_skb(length, GFP_ATOMIC);
+#else
+  return NULL;
+#endif
+}
+
+/**
+ *	skb_cow - copy header of skb when it is required
+ *	@skb: buffer to cow
+ *	@headroom: needed headroom
+ *
+ *	If the skb passed lacks sufficient headroom or its data part
+ *	is shared, data is reallocated. If reallocation fails, an error
+ *	is returned and original skb is not changed.
+ *
+ *	The result is skb with writable area skb->head...skb->tail
+ *	and at least @headroom of space at head.
+ */
+
+static inline int
+skb_cow(struct sk_buff *skb, unsigned int headroom)
+{
+#if 0
+	int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb);
+
+	if (delta < 0)
+		delta = 0;
+
+	if (delta || skb_cloned(skb))
+		return pskb_expand_head(skb, (delta+15)&~15, 0, GFP_ATOMIC);
+	return 0;
+#else
+  return 0;
+#endif
+}
+
+/**
+ *	skb_linearize - convert paged skb to linear one
+ *	@skb: buffer to linarize
+ *	@gfp: allocation mode
+ *
+ *	If there is no free memory -ENOMEM is returned, otherwise zero
+ *	is returned and the old skb data released.  */
+int skb_linearize(struct sk_buff *skb, int gfp);
+
+static inline void *kmap_skb_frag(const skb_frag_t *frag)
+{
+#if 0
+#ifdef CONFIG_HIGHMEM
+	if (in_irq())
+		out_of_line_bug();
+
+	local_bh_disable();
+#endif
+	return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ);
+#else
+  return NULL;
+#endif 
+}
+
+static inline void kunmap_skb_frag(void *vaddr)
+{
+#if 0
+	kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
+#ifdef CONFIG_HIGHMEM
+	local_bh_enable();
+#endif
+#endif
+}
+
+#define skb_queue_walk(queue, skb) \
+		for (skb = (queue)->next;			\
+		     (skb != (struct sk_buff *)(queue));	\
+		     skb=skb->next)
+
+
+extern struct sk_buff *		skb_recv_datagram(struct sock *sk,unsigned flags,int noblock, int *err);
+extern unsigned int		datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait);
+extern int			skb_copy_datagram(const struct sk_buff *from, int offset, char *to,int size);
+extern int			skb_copy_datagram_iovec(const struct sk_buff *from, int offset, struct iovec *to,int size);
+extern int			skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump);
+extern int			skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb, int hlen, struct iovec *iov);
+extern void			skb_free_datagram(struct sock * sk, struct sk_buff *skb);
+
+extern unsigned int		skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum);
+extern int			skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
+extern unsigned int		skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum);
+extern void			skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
+
+extern void skb_init(void);
+extern void skb_add_mtu(int mtu);
+
+#ifdef CONFIG_NETFILTER
+static inline void
+nf_conntrack_put(struct nf_ct_info *nfct)
+{
+	if (nfct && atomic_dec_and_test(&nfct->master->use))
+		nfct->master->destroy(nfct->master);
+}
+static inline void
+nf_conntrack_get(struct nf_ct_info *nfct)
+{
+	if (nfct)
+		atomic_inc(&nfct->master->use);
+}
+#endif
+
+
+#endif /* skbuff */
+
+
+
+
+
+struct sock;
+
+typedef struct sockaddr
+{
+  int x;
+} _sockaddr;
+
+
+struct msghdr {
+	void	*	msg_name;	/* Socket name			*/
+	int		msg_namelen;	/* Length of name		*/
+	struct iovec *	msg_iov;	/* Data blocks			*/
+	__kernel_size_t	msg_iovlen;	/* Number of blocks		*/
+	void 	*	msg_control;	/* Per protocol magic (eg BSD file descriptor passing) */
+	__kernel_size_t	msg_controllen;	/* Length of cmsg list */
+	unsigned	msg_flags;
+};
+
+
+/* IP protocol blocks we attach to sockets.
+ * socket layer -> transport layer interface
+ * transport -> network interface is defined by struct inet_proto
+ */
+struct proto {
+	void			(*close)(struct sock *sk, 
+					long timeout);
+	int			(*connect)(struct sock *sk,
+				        struct sockaddr *uaddr, 
+					int addr_len);
+	int			(*disconnect)(struct sock *sk, int flags);
+
+	struct sock *		(*accept) (struct sock *sk, int flags, int *err);
+
+	int			(*ioctl)(struct sock *sk, int cmd,
+					 unsigned long arg);
+	int			(*init)(struct sock *sk);
+	int			(*destroy)(struct sock *sk);
+	void			(*shutdown)(struct sock *sk, int how);
+	int			(*setsockopt)(struct sock *sk, int level, 
+					int optname, char *optval, int optlen);
+	int			(*getsockopt)(struct sock *sk, int level, 
+					int optname, char *optval, 
+					int *option);  	 
+	int			(*sendmsg)(struct sock *sk, struct msghdr *msg,
+					   int len);
+	int			(*recvmsg)(struct sock *sk, struct msghdr *msg,
+					int len, int noblock, int flags, 
+					int *addr_len);
+	int			(*bind)(struct sock *sk, 
+					struct sockaddr *uaddr, int addr_len);
+
+	int			(*backlog_rcv) (struct sock *sk, 
+						struct sk_buff *skb);
+
+	/* Keeping track of sk's, looking them up, and port selection methods. */
+	void			(*hash)(struct sock *sk);
+	void			(*unhash)(struct sock *sk);
+	int			(*get_port)(struct sock *sk, unsigned short snum);
+
+	char			name[32];
+
+	struct {
+		int inuse;
+  } stats[32];
+//		u8  __pad[SMP_CACHE_BYTES - sizeof(int)];
+//	} stats[NR_CPUS];
+};
+
+
+
+
+
+
+
+/* This defines a selective acknowledgement block. */
+struct tcp_sack_block {
+	__u32	start_seq;
+	__u32	end_seq;
+};
+
+
+struct tcp_opt {
+	int	tcp_header_len;	/* Bytes of tcp header to send		*/
+
+/*
+ *	Header prediction flags
+ *	0x5?10 << 16 + snd_wnd in net byte order
+ */
+	__u32	pred_flags;
+
+/*
+ *	RFC793 variables by their proper names. This means you can
+ *	read the code and the spec side by side (and laugh ...)
+ *	See RFC793 and RFC1122. The RFC writes these in capitals.
+ */
+ 	__u32	rcv_nxt;	/* What we want to receive next 	*/
+ 	__u32	snd_nxt;	/* Next sequence we send		*/
+
+ 	__u32	snd_una;	/* First byte we want an ack for	*/
+ 	__u32	snd_sml;	/* Last byte of the most recently transmitted small packet */
+	__u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */
+	__u32	lsndtime;	/* timestamp of last sent data packet (for restart window) */
+
+	/* Delayed ACK control data */
+	struct {
+		__u8	pending;	/* ACK is pending */
+		__u8	quick;		/* Scheduled number of quick acks	*/
+		__u8	pingpong;	/* The session is interactive		*/
+		__u8	blocked;	/* Delayed ACK was blocked by socket lock*/
+		__u32	ato;		/* Predicted tick of soft clock		*/
+		unsigned long timeout;	/* Currently scheduled timeout		*/
+		__u32	lrcvtime;	/* timestamp of last received data packet*/
+		__u16	last_seg_size;	/* Size of last incoming segment	*/
+		__u16	rcv_mss;	/* MSS used for delayed ACK decisions	*/ 
+	} ack;
+
+	/* Data for direct copy to user */
+	struct {
+		//struct sk_buff_head	prequeue;
+		struct task_struct	*task;
+		struct iovec		*iov;
+		int			memory;
+		int			len;
+	} ucopy;
+
+	__u32	snd_wl1;	/* Sequence for window update		*/
+	__u32	snd_wnd;	/* The window we expect to receive	*/
+	__u32	max_window;	/* Maximal window ever seen from peer	*/
+	__u32	pmtu_cookie;	/* Last pmtu seen by socket		*/
+	__u16	mss_cache;	/* Cached effective mss, not including SACKS */
+	__u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
+	__u16	ext_header_len;	/* Network protocol overhead (IP/IPv6 options) */
+	__u8	ca_state;	/* State of fast-retransmit machine 	*/
+	__u8	retransmits;	/* Number of unrecovered RTO timeouts.	*/
+
+	__u8	reordering;	/* Packet reordering metric.		*/
+	__u8	queue_shrunk;	/* Write queue has been shrunk recently.*/
+	__u8	defer_accept;	/* User waits for some data after accept() */
+
+/* RTT measurement */
+	__u8	backoff;	/* backoff				*/
+	__u32	srtt;		/* smothed round trip time << 3		*/
+	__u32	mdev;		/* medium deviation			*/
+	__u32	mdev_max;	/* maximal mdev for the last rtt period	*/
+	__u32	rttvar;		/* smoothed mdev_max			*/
+	__u32	rtt_seq;	/* sequence number to update rttvar	*/
+	__u32	rto;		/* retransmit timeout			*/
+
+	__u32	packets_out;	/* Packets which are "in flight"	*/
+	__u32	left_out;	/* Packets which leaved network		*/
+	__u32	retrans_out;	/* Retransmitted packets out		*/
+
+
+/*
+ *	Slow start and congestion control (see also Nagle, and Karn & Partridge)
+ */
+ 	__u32	snd_ssthresh;	/* Slow start size threshold		*/
+ 	__u32	snd_cwnd;	/* Sending congestion window		*/
+ 	__u16	snd_cwnd_cnt;	/* Linear increase counter		*/
+	__u16	snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
+	__u32	snd_cwnd_used;
+	__u32	snd_cwnd_stamp;
+
+	/* Two commonly used timers in both sender and receiver paths. */
+	unsigned long		timeout;
+ 	struct timer_list	retransmit_timer;	/* Resend (no ack)	*/
+ 	struct timer_list	delack_timer;		/* Ack delay 		*/
+
+	struct sk_buff_head	out_of_order_queue; /* Out of order segments go here */
+
+	struct tcp_func		*af_specific;	/* Operations which are AF_INET{4,6} specific	*/
+	struct sk_buff		*send_head;	/* Front of stuff to transmit			*/
+	struct page		*sndmsg_page;	/* Cached page for sendmsg			*/
+	u32			sndmsg_off;	/* Cached offset for sendmsg			*/
+
+ 	__u32	rcv_wnd;	/* Current receiver window		*/
+	__u32	rcv_wup;	/* rcv_nxt on last window update sent	*/
+	__u32	write_seq;	/* Tail(+1) of data held in tcp send buffer */
+	__u32	pushed_seq;	/* Last pushed seq, required to talk to windows */
+	__u32	copied_seq;	/* Head of yet unread data		*/
+/*
+ *      Options received (usually on last packet, some only on SYN packets).
+ */
+	char	tstamp_ok,	/* TIMESTAMP seen on SYN packet		*/
+		wscale_ok,	/* Wscale seen on SYN packet		*/
+		sack_ok;	/* SACK seen on SYN packet		*/
+	char	saw_tstamp;	/* Saw TIMESTAMP on last packet		*/
+        __u8	snd_wscale;	/* Window scaling received from sender	*/
+        __u8	rcv_wscale;	/* Window scaling to send to receiver	*/
+	__u8	nonagle;	/* Disable Nagle algorithm?             */
+	__u8	keepalive_probes; /* num of allowed keep alive probes	*/
+
+/*	PAWS/RTTM data	*/
+        __u32	rcv_tsval;	/* Time stamp value             	*/
+        __u32	rcv_tsecr;	/* Time stamp echo reply        	*/
+        __u32	ts_recent;	/* Time stamp to echo next		*/
+        long	ts_recent_stamp;/* Time we stored ts_recent (for aging) */
+
+/*	SACKs data	*/
+	__u16	user_mss;  	/* mss requested by user in ioctl */
+	__u8	dsack;		/* D-SACK is scheduled			*/
+	__u8	eff_sacks;	/* Size of SACK array to send with next packet */
+	struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
+	struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/
+
+	__u32	window_clamp;	/* Maximal window to advertise		*/
+	__u32	rcv_ssthresh;	/* Current window clamp			*/
+	__u8	probes_out;	/* unanswered 0 window probes		*/
+	__u8	num_sacks;	/* Number of SACK blocks		*/
+	__u16	advmss;		/* Advertised MSS			*/
+
+	__u8	syn_retries;	/* num of allowed syn retries */
+	__u8	ecn_flags;	/* ECN status bits.			*/
+	__u16	prior_ssthresh; /* ssthresh saved at recovery start	*/
+	__u32	lost_out;	/* Lost packets				*/
+	__u32	sacked_out;	/* SACK'd packets			*/
+	__u32	fackets_out;	/* FACK'd packets			*/
+	__u32	high_seq;	/* snd_nxt at onset of congestion	*/
+
+	__u32	retrans_stamp;	/* Timestamp of the last retransmit,
+				 * also used in SYN-SENT to remember stamp of
+				 * the first SYN. */
+	__u32	undo_marker;	/* tracking retrans started here. */
+	int	undo_retrans;	/* number of undoable retransmissions. */
+	__u32	urg_seq;	/* Seq of received urgent pointer */
+	__u16	urg_data;	/* Saved octet of OOB data and control flags */
+	__u8	pending;	/* Scheduled timer event	*/
+	__u8	urg_mode;	/* In urgent mode		*/
+	__u32	snd_up;		/* Urgent pointer		*/
+
+	/* The syn_wait_lock is necessary only to avoid tcp_get_info having
+	 * to grab the main lock sock while browsing the listening hash
+	 * (otherwise it's deadlock prone).
+	 * This lock is acquired in read mode only from tcp_get_info() and
+	 * it's acquired in write mode _only_ from code that is actively
+	 * changing the syn_wait_queue. All readers that are holding
+	 * the master sock lock don't need to grab this lock in read mode
+	 * too as the syn_wait_queue writes are always protected from
+	 * the main sock lock.
+	 */
+	rwlock_t		syn_wait_lock;
+	struct tcp_listen_opt	*listen_opt;
+
+	/* FIFO of established children */
+	struct open_request	*accept_queue;
+	struct open_request	*accept_queue_tail;
+
+	int			write_pending;	/* A write to socket waits to start. */
+
+	unsigned int		keepalive_time;	  /* time before keep alive takes place */
+	unsigned int		keepalive_intvl;  /* time interval between keep alive probes */
+	int			linger2;
+
+	unsigned long last_synq_overflow; 
+};
+
+
+
+
+/* This is the per-socket lock.  The spinlock provides a synchronization
+ * between user contexts and software interrupt processing, whereas the
+ * mini-semaphore synchronizes multiple users amongst themselves.
+ */
+typedef struct {
+	spinlock_t		slock;
+	unsigned int		users;
+	wait_queue_head_t	wq;
+} socket_lock_t;
+
+struct sock {
+	/* Socket demultiplex comparisons on incoming packets. */
+	__u32			daddr;		/* Foreign IPv4 addr			*/
+	__u32			rcv_saddr;	/* Bound local IPv4 addr		*/
+	__u16			dport;		/* Destination port			*/
+	unsigned short		num;		/* Local port				*/
+	int			bound_dev_if;	/* Bound device index if != 0		*/
+
+	/* Main hash linkage for various protocol lookup tables. */
+	struct sock		*next;
+	struct sock		**pprev;
+	struct sock		*bind_next;
+	struct sock		**bind_pprev;
+
+	volatile unsigned char	state,		/* Connection state			*/
+				zapped;		/* In ax25 & ipx means not linked	*/
+	__u16			sport;		/* Source port				*/
+
+	unsigned short		family;		/* Address family			*/
+	unsigned char		reuse;		/* SO_REUSEADDR setting			*/
+	unsigned char		shutdown;
+	atomic_t		refcnt;		/* Reference count			*/
+
+	socket_lock_t		lock;		/* Synchronizer...			*/
+	int			rcvbuf;		/* Size of receive buffer in bytes	*/
+
+	wait_queue_head_t	*sleep;		/* Sock wait queue			*/
+	struct dst_entry	*dst_cache;	/* Destination cache			*/
+	rwlock_t		dst_lock;
+	atomic_t		rmem_alloc;	/* Receive queue bytes committed	*/
+	struct sk_buff_head	receive_queue;	/* Incoming packets			*/
+	atomic_t		wmem_alloc;	/* Transmit queue bytes committed	*/
+	struct sk_buff_head	write_queue;	/* Packet sending queue			*/
+	atomic_t		omem_alloc;	/* "o" is "option" or "other" */
+	int			wmem_queued;	/* Persistent queue size */
+	int			forward_alloc;	/* Space allocated forward. */
+	__u32			saddr;		/* Sending source			*/
+	unsigned int		allocation;	/* Allocation mode			*/
+	int			sndbuf;		/* Size of send buffer in bytes		*/
+	struct sock		*prev;
+
+	/* Not all are volatile, but some are, so we might as well say they all are.
+	 * XXX Make this a flag word -DaveM
+	 */
+	volatile char		dead,
+				done,
+				urginline,
+				keepopen,
+				linger,
+				destroy,
+				no_check,
+				broadcast,
+				bsdism;
+	unsigned char		debug;
+	unsigned char		rcvtstamp;
+	unsigned char		use_write_queue;
+	unsigned char		userlocks;
+	/* Hole of 3 bytes. Try to pack. */
+	int			route_caps;
+	int			proc;
+	unsigned long	        lingertime;
+
+	int			hashent;
+	struct sock		*pair;
+
+	/* The backlog queue is special, it is always used with
+	 * the per-socket spinlock held and requires low latency
+	 * access.  Therefore we special case it's implementation.
+	 */
+	struct {
+		struct sk_buff *head;
+		struct sk_buff *tail;
+	} backlog;
+
+	rwlock_t		callback_lock;
+
+	/* Error queue, rarely used. */
+	struct sk_buff_head	error_queue;
+
+	struct proto		*prot;
+
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+	union {
+		struct ipv6_pinfo	af_inet6;
+	} net_pinfo;
+#endif
+
+	union {
+		struct tcp_opt		af_tcp;
+#if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
+		struct raw_opt		tp_raw4;
+#endif
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+		struct raw6_opt		tp_raw;
+#endif /* CONFIG_IPV6 */
+#if defined(CONFIG_SPX) || defined (CONFIG_SPX_MODULE)
+		struct spx_opt		af_spx;
+#endif /* CONFIG_SPX */
+
+	} tp_pinfo;
+
+	int			err, err_soft;	/* Soft holds errors that don't
+						   cause failure but are the cause
+						   of a persistent failure not just
+						   'timed out' */
+	unsigned short		ack_backlog;
+	unsigned short		max_ack_backlog;
+	__u32			priority;
+	unsigned short		type;
+	unsigned char		localroute;	/* Route locally only */
+	unsigned char		protocol;
+//	struct ucred		peercred;
+	int			rcvlowat;
+	long			rcvtimeo;
+	long			sndtimeo;
+
+#ifdef CONFIG_FILTER
+	/* Socket Filtering Instructions */
+	struct sk_filter      	*filter;
+#endif /* CONFIG_FILTER */
+
+	/* This is where all the private (optional) areas that don't
+	 * overlap will eventually live. 
+	 */
+	union {
+		void *destruct_hook;
+//	  	struct unix_opt	af_unix;
+#if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
+		struct inet_opt af_inet;
+#endif
+#if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE)
+		struct atalk_sock	af_at;
+#endif
+#if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE)
+		struct ipx_opt		af_ipx;
+#endif
+#if defined (CONFIG_DECNET) || defined(CONFIG_DECNET_MODULE)
+		struct dn_scp           dn;
+#endif
+#if defined (CONFIG_PACKET) || defined(CONFIG_PACKET_MODULE)
+		struct packet_opt	*af_packet;
+#endif
+#if defined(CONFIG_X25) || defined(CONFIG_X25_MODULE)
+		x25_cb			*x25;
+#endif
+#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+		ax25_cb			*ax25;
+#endif
+#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
+		nr_cb			*nr;
+#endif
+#if defined(CONFIG_ROSE) || defined(CONFIG_ROSE_MODULE)
+		rose_cb			*rose;
+#endif
+#if defined(CONFIG_PPPOE) || defined(CONFIG_PPPOE_MODULE)
+		struct pppox_opt	*pppox;
+#endif
+		struct netlink_opt	*af_netlink;
+#if defined(CONFIG_ECONET) || defined(CONFIG_ECONET_MODULE)
+		struct econet_opt	*af_econet;
+#endif
+#if defined(CONFIG_ATM) || defined(CONFIG_ATM_MODULE)
+		struct atm_vcc		*af_atm;
+#endif
+#if defined(CONFIG_IRDA) || defined(CONFIG_IRDA_MODULE)
+		struct irda_sock        *irda;
+#endif
+#if defined(CONFIG_WAN_ROUTER) || defined(CONFIG_WAN_ROUTER_MODULE)
+               struct wanpipe_opt      *af_wanpipe;
+#endif
+	} protinfo;  		
+
+
+	/* This part is used for the timeout functions. */
+	struct timer_list	timer;		/* This is the sock cleanup timer. */
+	struct timeval		stamp;
+
+	/* Identd and reporting IO signals */
+	struct socket		*socket;
+
+	/* RPC layer private data */
+	void			*user_data;
+  
+	/* Callbacks */
+	void			(*state_change)(struct sock *sk);
+	void			(*data_ready)(struct sock *sk,int bytes);
+	void			(*write_space)(struct sock *sk);
+	void			(*error_report)(struct sock *sk);
+
+  	int			(*backlog_rcv) (struct sock *sk,
+						struct sk_buff *skb);  
+	void                    (*destruct)(struct sock *sk);
+};
+
+
+
+
+#if 1 /* dst (_NET_DST_H) */
+
+#if 0
+#include <linux/config.h>
+#include <net/neighbour.h>
+#endif
+
+/*
+ * 0 - no debugging messages
+ * 1 - rare events and bugs (default)
+ * 2 - trace mode.
+ */
+#define RT_CACHE_DEBUG		0
+
+#define DST_GC_MIN	(1*HZ)
+#define DST_GC_INC	(5*HZ)
+#define DST_GC_MAX	(120*HZ)
+
+struct sk_buff;
+
+struct dst_entry
+{
+	struct dst_entry        *next;
+	atomic_t		__refcnt;	/* client references	*/
+	int			__use;
+	struct net_device       *dev;
+	int			obsolete;
+	int			flags;
+#define DST_HOST		1
+	unsigned long		lastuse;
+	unsigned long		expires;
+
+	unsigned		mxlock;
+	unsigned		pmtu;
+	unsigned		window;
+	unsigned		rtt;
+	unsigned		rttvar;
+	unsigned		ssthresh;
+	unsigned		cwnd;
+	unsigned		advmss;
+	unsigned		reordering;
+
+	unsigned long		rate_last;	/* rate limiting for ICMP */
+	unsigned long		rate_tokens;
+
+	int			error;
+
+	struct neighbour	*neighbour;
+	struct hh_cache		*hh;
+
+	int			(*input)(struct sk_buff*);
+	int			(*output)(struct sk_buff*);
+
+#ifdef CONFIG_NET_CLS_ROUTE
+	__u32			tclassid;
+#endif
+
+	struct  dst_ops	        *ops;
+		
+	char			info[0];
+};
+
+
+struct dst_ops
+{
+	unsigned short		family;
+	unsigned short		protocol;
+	unsigned		gc_thresh;
+
+	int			(*gc)(void);
+	struct dst_entry *	(*check)(struct dst_entry *, __u32 cookie);
+	struct dst_entry *	(*reroute)(struct dst_entry *,
+					   struct sk_buff *);
+	void			(*destroy)(struct dst_entry *);
+	struct dst_entry *	(*negative_advice)(struct dst_entry *);
+	void			(*link_failure)(struct sk_buff *);
+	int			entry_size;
+
+	atomic_t		entries;
+	kmem_cache_t 		*kmem_cachep;
+};
+
+#ifdef __KERNEL__
+
+static inline void dst_hold(struct dst_entry * dst)
+{
+	atomic_inc(&dst->__refcnt);
+}
+
+static inline
+struct dst_entry * dst_clone(struct dst_entry * dst)
+{
+	if (dst)
+		atomic_inc(&dst->__refcnt);
+	return dst;
+}
+
+static inline
+void dst_release(struct dst_entry * dst)
+{
+	if (dst)
+		atomic_dec(&dst->__refcnt);
+}
+
+extern void * dst_alloc(struct dst_ops * ops);
+extern void __dst_free(struct dst_entry * dst);
+extern void dst_destroy(struct dst_entry * dst);
+
+static inline
+void dst_free(struct dst_entry * dst)
+{
+	if (dst->obsolete > 1)
+		return;
+	if (!atomic_read(&dst->__refcnt)) {
+		dst_destroy(dst);
+		return;
+	}
+	__dst_free(dst);
+}
+
+static inline void dst_confirm(struct dst_entry *dst)
+{
+	if (dst)
+		neigh_confirm(dst->neighbour);
+}
+
+static inline void dst_negative_advice(struct dst_entry **dst_p)
+{
+	struct dst_entry * dst = *dst_p;
+	if (dst && dst->ops->negative_advice)
+		*dst_p = dst->ops->negative_advice(dst);
+}
+
+static inline void dst_link_failure(struct sk_buff *skb)
+{
+	struct dst_entry * dst = skb->dst;
+	if (dst && dst->ops && dst->ops->link_failure)
+		dst->ops->link_failure(skb);
+}
+
+static inline void dst_set_expires(struct dst_entry *dst, int timeout)
+{
+	unsigned long expires = jiffies + timeout;
+
+	if (expires == 0)
+		expires = 1;
+
+	if (dst->expires == 0 || (long)(dst->expires - expires) > 0)
+		dst->expires = expires;
+}
+
+extern void		dst_init(void);
+
+#endif /* dst */
+
+
+
+#if 1
+/* dummy types */
+
+
+#endif
+
+#define TCP_DEBUG 1
+#define FASTRETRANS_DEBUG 1
+
+/* Cancel timers, when they are not required. */
+#undef TCP_CLEAR_TIMERS
+
+#if 0
+#include <linux/config.h>
+#include <linux/tcp.h>
+#include <linux/slab.h>
+#include <linux/cache.h>
+#include <net/checksum.h>
+#include <net/sock.h>
+#else
+#include "linux.h"
+#endif
+
+/* This is for all connections with a full identity, no wildcards.
+ * New scheme, half the table is for TIME_WAIT, the other half is
+ * for the rest.  I'll experiment with dynamic table growth later.
+ */
+struct tcp_ehash_bucket {
+	rwlock_t	lock;
+	struct sock	*chain;
+} __attribute__((__aligned__(8)));
+
+/* This is for listening sockets, thus all sockets which possess wildcards. */
+#define TCP_LHTABLE_SIZE	32	/* Yes, really, this is all you need. */
+
+/* There are a few simple rules, which allow for local port reuse by
+ * an application.  In essence:
+ *
+ *	1) Sockets bound to different interfaces may share a local port.
+ *	   Failing that, goto test 2.
+ *	2) If all sockets have sk->reuse set, and none of them are in
+ *	   TCP_LISTEN state, the port may be shared.
+ *	   Failing that, goto test 3.
+ *	3) If all sockets are bound to a specific sk->rcv_saddr local
+ *	   address, and none of them are the same, the port may be
+ *	   shared.
+ *	   Failing this, the port cannot be shared.
+ *
+ * The interesting point, is test #2.  This is what an FTP server does
+ * all day.  To optimize this case we use a specific flag bit defined
+ * below.  As we add sockets to a bind bucket list, we perform a
+ * check of: (newsk->reuse && (newsk->state != TCP_LISTEN))
+ * As long as all sockets added to a bind bucket pass this test,
+ * the flag bit will be set.
+ * The resulting situation is that tcp_v[46]_verify_bind() can just check
+ * for this flag bit, if it is set and the socket trying to bind has
+ * sk->reuse set, we don't even have to walk the owners list at all,
+ * we return that it is ok to bind this socket to the requested local port.
+ *
+ * Sounds like a lot of work, but it is worth it.  In a more naive
+ * implementation (ie. current FreeBSD etc.) the entire list of ports
+ * must be walked for each data port opened by an ftp server.  Needless
+ * to say, this does not scale at all.  With a couple thousand FTP
+ * users logged onto your box, isn't it nice to know that new data
+ * ports are created in O(1) time?  I thought so. ;-)	-DaveM
+ */
+struct tcp_bind_bucket {
+	unsigned short		port;
+	signed short		fastreuse;
+	struct tcp_bind_bucket	*next;
+	struct sock		*owners;
+	struct tcp_bind_bucket	**pprev;
+};
+
+struct tcp_bind_hashbucket {
+	spinlock_t		lock;
+	struct tcp_bind_bucket	*chain;
+};
+
+extern struct tcp_hashinfo {
+	/* This is for sockets with full identity only.  Sockets here will
+	 * always be without wildcards and will have the following invariant:
+	 *
+	 *          TCP_ESTABLISHED <= sk->state < TCP_CLOSE
+	 *
+	 * First half of the table is for sockets not in TIME_WAIT, second half
+	 * is for TIME_WAIT sockets only.
+	 */
+	struct tcp_ehash_bucket *__tcp_ehash;
+
+	/* Ok, let's try this, I give up, we do need a local binding
+	 * TCP hash as well as the others for fast bind/connect.
+	 */
+	struct tcp_bind_hashbucket *__tcp_bhash;
+
+	int __tcp_bhash_size;
+	int __tcp_ehash_size;
+
+	/* All sockets in TCP_LISTEN state will be in here.  This is the only
+	 * table where wildcard'd TCP sockets can exist.  Hash function here
+	 * is just local port number.
+	 */
+	struct sock *__tcp_listening_hash[TCP_LHTABLE_SIZE];
+
+	/* All the above members are written once at bootup and
+	 * never written again _or_ are predominantly read-access.
+	 *
+	 * Now align to a new cache line as all the following members
+	 * are often dirty.
+	 */
+	rwlock_t __tcp_lhash_lock ____cacheline_aligned;
+	atomic_t __tcp_lhash_users;
+	wait_queue_head_t __tcp_lhash_wait;
+	spinlock_t __tcp_portalloc_lock;
+} tcp_hashinfo;
+
+#define tcp_ehash	(tcp_hashinfo.__tcp_ehash)
+#define tcp_bhash	(tcp_hashinfo.__tcp_bhash)
+#define tcp_ehash_size	(tcp_hashinfo.__tcp_ehash_size)
+#define tcp_bhash_size	(tcp_hashinfo.__tcp_bhash_size)
+#define tcp_listening_hash (tcp_hashinfo.__tcp_listening_hash)
+#define tcp_lhash_lock	(tcp_hashinfo.__tcp_lhash_lock)
+#define tcp_lhash_users	(tcp_hashinfo.__tcp_lhash_users)
+#define tcp_lhash_wait	(tcp_hashinfo.__tcp_lhash_wait)
+#define tcp_portalloc_lock (tcp_hashinfo.__tcp_portalloc_lock)
+
+extern kmem_cache_t *tcp_bucket_cachep;
+extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
+						 unsigned short snum);
+extern void tcp_bucket_unlock(struct sock *sk);
+extern int tcp_port_rover;
+extern struct sock *tcp_v4_lookup_listener(u32 addr, unsigned short hnum, int dif);
+
+/* These are AF independent. */
+static __inline__ int tcp_bhashfn(__u16 lport)
+{
+	return (lport & (tcp_bhash_size - 1));
+}
+
+/* This is a TIME_WAIT bucket.  It works around the memory consumption
+ * problems of sockets in such a state on heavily loaded servers, but
+ * without violating the protocol specification.
+ */
+struct tcp_tw_bucket {
+	/* These _must_ match the beginning of struct sock precisely.
+	 * XXX Yes I know this is gross, but I'd have to edit every single
+	 * XXX networking file if I created a "struct sock_header". -DaveM
+	 */
+	__u32			daddr;
+	__u32			rcv_saddr;
+	__u16			dport;
+	unsigned short		num;
+	int			bound_dev_if;
+	struct sock		*next;
+	struct sock		**pprev;
+	struct sock		*bind_next;
+	struct sock		**bind_pprev;
+	unsigned char		state,
+				substate; /* "zapped" is replaced with "substate" */
+	__u16			sport;
+	unsigned short		family;
+	unsigned char		reuse,
+				rcv_wscale; /* It is also TW bucket specific */
+	atomic_t		refcnt;
+
+	/* And these are ours. */
+	int			hashent;
+	int			timeout;
+	__u32			rcv_nxt;
+	__u32			snd_nxt;
+	__u32			rcv_wnd;
+        __u32			ts_recent;
+        long			ts_recent_stamp;
+	unsigned long		ttd;
+	struct tcp_bind_bucket	*tb;
+	struct tcp_tw_bucket	*next_death;
+	struct tcp_tw_bucket	**pprev_death;
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct in6_addr		v6_daddr;
+	struct in6_addr		v6_rcv_saddr;
+#endif
+};
+
+extern kmem_cache_t *tcp_timewait_cachep;
+
+static inline void tcp_tw_put(struct tcp_tw_bucket *tw)
+{
+	if (atomic_dec_and_test(&tw->refcnt)) {
+#ifdef INET_REFCNT_DEBUG
+		printk(KERN_DEBUG "tw_bucket %p released\n", tw);
+#endif
+		kmem_cache_free(tcp_timewait_cachep, tw);
+	}
+}
+
+extern atomic_t tcp_orphan_count;
+extern int tcp_tw_count;
+extern void tcp_time_wait(struct sock *sk, int state, int timeo);
+extern void tcp_timewait_kill(struct tcp_tw_bucket *tw);
+extern void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo);
+extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw);
+
+
+/* Socket demux engine toys. */
+#ifdef __BIG_ENDIAN
+#define TCP_COMBINED_PORTS(__sport, __dport) \
+	(((__u32)(__sport)<<16) | (__u32)(__dport))
+#else /* __LITTLE_ENDIAN */
+#define TCP_COMBINED_PORTS(__sport, __dport) \
+	(((__u32)(__dport)<<16) | (__u32)(__sport))
+#endif
+
+#if (BITS_PER_LONG == 64)
+#ifdef __BIG_ENDIAN
+#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
+	__u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr));
+#else /* __LITTLE_ENDIAN */
+#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
+	__u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr));
+#endif /* __BIG_ENDIAN */
+#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
+	(((*((__u64 *)&((__sk)->daddr)))== (__cookie))	&&		\
+	 ((*((__u32 *)&((__sk)->dport)))== (__ports))   &&		\
+	 (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))
+#else /* 32-bit arch */
+#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr)
+#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
+	(((__sk)->daddr			== (__saddr))	&&		\
+	 ((__sk)->rcv_saddr		== (__daddr))	&&		\
+	 ((*((__u32 *)&((__sk)->dport)))== (__ports))   &&		\
+	 (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))
+#endif /* 64-bit arch */
+
+#define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif)			   \
+	(((*((__u32 *)&((__sk)->dport)))== (__ports))   			&& \
+	 ((__sk)->family		== AF_INET6)				&& \
+	 !ipv6_addr_cmp(&(__sk)->net_pinfo.af_inet6.daddr, (__saddr))		&& \
+	 !ipv6_addr_cmp(&(__sk)->net_pinfo.af_inet6.rcv_saddr, (__daddr))	&& \
+	 (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))
+
+/* These can have wildcards, don't try too hard. */
+static __inline__ int tcp_lhashfn(unsigned short num)
+{
+#if 0
+	return num & (TCP_LHTABLE_SIZE - 1);
+#else
+  return 0;
+#endif
+}
+
+static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
+{
+#if 0
+	return tcp_lhashfn(sk->num);
+#else
+  return 0;
+#endif
+}
+
+#define MAX_TCP_HEADER	(128 + MAX_HEADER)
+
+/* 
+ * Never offer a window over 32767 without using window scaling. Some
+ * poor stacks do signed 16bit maths! 
+ */
+#define MAX_TCP_WINDOW		32767U
+
+/* Minimal accepted MSS. It is (60+60+8) - (20+20). */
+#define TCP_MIN_MSS		88U
+
+/* Minimal RCV_MSS. */
+#define TCP_MIN_RCVMSS		536U
+
+/* After receiving this amount of duplicate ACKs fast retransmit starts. */
+#define TCP_FASTRETRANS_THRESH 3
+
+/* Maximal reordering. */
+#define TCP_MAX_REORDERING	127
+
+/* Maximal number of ACKs sent quickly to accelerate slow-start. */
+#define TCP_MAX_QUICKACKS	16U
+
+/* urg_data states */
+#define TCP_URG_VALID	0x0100
+#define TCP_URG_NOTYET	0x0200
+#define TCP_URG_READ	0x0400
+
+#define TCP_RETR1	3	/*
+				 * This is how many retries it does before it
+				 * tries to figure out if the gateway is
+				 * down. Minimal RFC value is 3; it corresponds
+				 * to ~3sec-8min depending on RTO.
+				 */
+
+#define TCP_RETR2	15	/*
+				 * This should take at least
+				 * 90 minutes to time out.
+				 * RFC1122 says that the limit is 100 sec.
+				 * 15 is ~13-30min depending on RTO.
+				 */
+
+#define TCP_SYN_RETRIES	 5	/* number of times to retry active opening a
+				 * connection: ~180sec is RFC minumum	*/
+
+#define TCP_SYNACK_RETRIES 5	/* number of times to retry passive opening a
+				 * connection: ~180sec is RFC minumum	*/
+
+
+#define TCP_ORPHAN_RETRIES 7	/* number of times to retry on an orphaned
+				 * socket. 7 is ~50sec-16min.
+				 */
+
+
+#define TCP_TIMEWAIT_LEN (60*1000)
+//#define TCP_TIMEWAIT_LEN (60*HZ)
+/* how long to wait to destroy TIME-WAIT
+				  * state, about 60 seconds	*/
+#define TCP_FIN_TIMEOUT	TCP_TIMEWAIT_LEN
+                                 /* BSD style FIN_WAIT2 deadlock breaker.
+				  * It used to be 3min, new value is 60sec,
+				  * to combine FIN-WAIT-2 timeout with
+				  * TIME-WAIT timer.
+				  */
+
+#define TCP_DELACK_MAX	((unsigned)(HZ/5))	/* maximal time to delay before sending an ACK */
+#if HZ >= 100
+#define TCP_DELACK_MIN	((unsigned)(HZ/25))	/* minimal time to delay before sending an ACK */
+#define TCP_ATO_MIN	((unsigned)(HZ/25))
+#else
+#define TCP_DELACK_MIN	4U
+#define TCP_ATO_MIN	4U
+#endif
+#define TCP_RTO_MAX	((unsigned)(120*HZ))
+#define TCP_RTO_MIN	((unsigned)(HZ/5))
+#define TCP_TIMEOUT_INIT ((unsigned)(3*HZ))	/* RFC 1122 initial RTO value	*/
+
+#define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes
+					                 * for local resources.
+					                 */
+
+#define TCP_KEEPALIVE_TIME	(120*60*HZ)	/* two hours */
+#define TCP_KEEPALIVE_PROBES	9		/* Max of 9 keepalive probes	*/
+#define TCP_KEEPALIVE_INTVL	(75*HZ)
+
+#define MAX_TCP_KEEPIDLE	32767
+#define MAX_TCP_KEEPINTVL	32767
+#define MAX_TCP_KEEPCNT		127
+#define MAX_TCP_SYNCNT		127
+
+/* TIME_WAIT reaping mechanism. */
+#define TCP_TWKILL_SLOTS	8	/* Please keep this a power of 2. */
+#define TCP_TWKILL_PERIOD	(TCP_TIMEWAIT_LEN/TCP_TWKILL_SLOTS)
+
+#define TCP_SYNQ_INTERVAL	(HZ/5)	/* Period of SYNACK timer */
+#define TCP_SYNQ_HSIZE		512	/* Size of SYNACK hash table */
+
+#define TCP_PAWS_24DAYS	(60 * 60 * 24 * 24)
+#define TCP_PAWS_MSL	60		/* Per-host timestamps are invalidated
+					 * after this time. It should be equal
+					 * (or greater than) TCP_TIMEWAIT_LEN
+					 * to provide reliability equal to one
+					 * provided by timewait state.
+					 */
+#define TCP_PAWS_WINDOW	1		/* Replay window for per-host
+					 * timestamps. It must be less than
+					 * minimal timewait lifetime.
+					 */
+
+#define TCP_TW_RECYCLE_SLOTS_LOG	5
+#define TCP_TW_RECYCLE_SLOTS		(1<<TCP_TW_RECYCLE_SLOTS_LOG)
+
+/* If time > 4sec, it is "slow" path, no recycling is required,
+   so that we select tick to get range about 4 seconds.
+ */
+
+#if 0
+#if HZ <= 16 || HZ > 4096
+# error Unsupported: HZ <= 16 or HZ > 4096
+#elif HZ <= 32
+# define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG)
+#elif HZ <= 64
+# define TCP_TW_RECYCLE_TICK (6+2-TCP_TW_RECYCLE_SLOTS_LOG)
+#elif HZ <= 128
+# define TCP_TW_RECYCLE_TICK (7+2-TCP_TW_RECYCLE_SLOTS_LOG)
+#elif HZ <= 256
+# define TCP_TW_RECYCLE_TICK (8+2-TCP_TW_RECYCLE_SLOTS_LOG)
+#elif HZ <= 512
+# define TCP_TW_RECYCLE_TICK (9+2-TCP_TW_RECYCLE_SLOTS_LOG)
+#elif HZ <= 1024
+# define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG)
+#elif HZ <= 2048
+# define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG)
+#else
+# define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG)
+#endif
+#else
+#define TCP_TW_RECYCLE_TICK (0)
+#endif
+
+/*
+ *	TCP option
+ */
+ 
+#define TCPOPT_NOP		1	/* Padding */
+#define TCPOPT_EOL		0	/* End of options */
+#define TCPOPT_MSS		2	/* Segment size negotiating */
+#define TCPOPT_WINDOW		3	/* Window scaling */
+#define TCPOPT_SACK_PERM        4       /* SACK Permitted */
+#define TCPOPT_SACK             5       /* SACK Block */
+#define TCPOPT_TIMESTAMP	8	/* Better RTT estimations/PAWS */
+
+/*
+ *     TCP option lengths
+ */
+
+#define TCPOLEN_MSS            4
+#define TCPOLEN_WINDOW         3
+#define TCPOLEN_SACK_PERM      2
+#define TCPOLEN_TIMESTAMP      10
+
+/* But this is what stacks really send out. */
+#define TCPOLEN_TSTAMP_ALIGNED		12
+#define TCPOLEN_WSCALE_ALIGNED		4
+#define TCPOLEN_SACKPERM_ALIGNED	4
+#define TCPOLEN_SACK_BASE		2
+#define TCPOLEN_SACK_BASE_ALIGNED	4
+#define TCPOLEN_SACK_PERBLOCK		8
+
+#define TCP_TIME_RETRANS	1	/* Retransmit timer */
+#define TCP_TIME_DACK		2	/* Delayed ack timer */
+#define TCP_TIME_PROBE0		3	/* Zero window probe timer */
+#define TCP_TIME_KEEPOPEN	4	/* Keepalive timer */
+
+#if 0
+/* sysctl variables for tcp */
+extern int sysctl_max_syn_backlog;
+extern int sysctl_tcp_timestamps;
+extern int sysctl_tcp_window_scaling;
+extern int sysctl_tcp_sack;
+extern int sysctl_tcp_fin_timeout;
+extern int sysctl_tcp_tw_recycle;
+extern int sysctl_tcp_keepalive_time;
+extern int sysctl_tcp_keepalive_probes;
+extern int sysctl_tcp_keepalive_intvl;
+extern int sysctl_tcp_syn_retries;
+extern int sysctl_tcp_synack_retries;
+extern int sysctl_tcp_retries1;
+extern int sysctl_tcp_retries2;
+extern int sysctl_tcp_orphan_retries;
+extern int sysctl_tcp_syncookies;
+extern int sysctl_tcp_retrans_collapse;
+extern int sysctl_tcp_stdurg;
+extern int sysctl_tcp_rfc1337;
+extern int sysctl_tcp_abort_on_overflow;
+extern int sysctl_tcp_max_orphans;
+extern int sysctl_tcp_max_tw_buckets;
+extern int sysctl_tcp_fack;
+extern int sysctl_tcp_reordering;
+extern int sysctl_tcp_ecn;
+extern int sysctl_tcp_dsack;
+extern int sysctl_tcp_mem[3];
+extern int sysctl_tcp_wmem[3];
+extern int sysctl_tcp_rmem[3];
+extern int sysctl_tcp_app_win;
+extern int sysctl_tcp_adv_win_scale;
+extern int sysctl_tcp_tw_reuse;
+#endif
+
+extern atomic_t tcp_memory_allocated;
+extern atomic_t tcp_sockets_allocated;
+extern int tcp_memory_pressure;
+
+struct open_request;
+
+struct or_calltable {
+	int  family;
+	int  (*rtx_syn_ack)	(struct sock *sk, struct open_request *req, struct dst_entry*);
+	void (*send_ack)	(struct sk_buff *skb, struct open_request *req);
+	void (*destructor)	(struct open_request *req);
+	void (*send_reset)	(struct sk_buff *skb);
+};
+
+struct tcp_v4_open_req {
+	__u32			loc_addr;
+	__u32			rmt_addr;
+	struct ip_options	*opt;
+};
+
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+struct tcp_v6_open_req {
+	struct in6_addr		loc_addr;
+	struct in6_addr		rmt_addr;
+	struct sk_buff		*pktopts;
+	int			iif;
+};
+#endif
+
+/* this structure is too big */
+struct open_request {
+	struct open_request	*dl_next; /* Must be first member! */
+	__u32			rcv_isn;
+	__u32			snt_isn;
+	__u16			rmt_port;
+	__u16			mss;
+	__u8			retrans;
+	__u8			__pad;
+	__u16	snd_wscale : 4, 
+		rcv_wscale : 4, 
+		tstamp_ok : 1,
+		sack_ok : 1,
+		wscale_ok : 1,
+		ecn_ok : 1,
+		acked : 1;
+	/* The following two fields can be easily recomputed I think -AK */
+	__u32			window_clamp;	/* window clamp at creation time */
+	__u32			rcv_wnd;	/* rcv_wnd offered first time */
+	__u32			ts_recent;
+	unsigned long		expires;
+	struct or_calltable	*class;
+	struct sock		*sk;
+	union {
+		struct tcp_v4_open_req v4_req;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+		struct tcp_v6_open_req v6_req;
+#endif
+	} af;
+};
+
+/* SLAB cache for open requests. */
+extern kmem_cache_t *tcp_openreq_cachep;
+
+#define tcp_openreq_alloc()		kmem_cache_alloc(tcp_openreq_cachep, SLAB_ATOMIC)
+#define tcp_openreq_fastfree(req)	kmem_cache_free(tcp_openreq_cachep, req)
+
+static inline void tcp_openreq_free(struct open_request *req)
+{
+	req->class->destructor(req);
+	tcp_openreq_fastfree(req);
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#define TCP_INET_FAMILY(fam) ((fam) == AF_INET)
+#else
+#define TCP_INET_FAMILY(fam) 1
+#endif
+
+/*
+ *	Pointers to address related TCP functions
+ *	(i.e. things that depend on the address family)
+ *
+ * 	BUGGG_FUTURE: all the idea behind this struct is wrong.
+ *	It mixes socket frontend with transport function.
+ *	With port sharing between IPv6/v4 it gives the only advantage,
+ *	only poor IPv6 needs to permanently recheck, that it
+ *	is still IPv6 8)8) It must be cleaned up as soon as possible.
+ *						--ANK (980802)
+ */
+
+struct tcp_func {
+	int			(*queue_xmit)		(struct sk_buff *skb);
+
+	void			(*send_check)		(struct sock *sk,
+							 struct tcphdr *th,
+							 int len,
+							 struct sk_buff *skb);
+
+	int			(*rebuild_header)	(struct sock *sk);
+
+	int			(*conn_request)		(struct sock *sk,
+							 struct sk_buff *skb);
+
+	struct sock *		(*syn_recv_sock)	(struct sock *sk,
+							 struct sk_buff *skb,
+							 struct open_request *req,
+							 struct dst_entry *dst);
+    
+	int			(*remember_stamp)	(struct sock *sk);
+
+	__u16			net_header_len;
+
+	int			(*setsockopt)		(struct sock *sk, 
+							 int level, 
+							 int optname, 
+							 char *optval, 
+							 int optlen);
+
+	int			(*getsockopt)		(struct sock *sk, 
+							 int level, 
+							 int optname, 
+							 char *optval, 
+							 int *optlen);
+
+
+	void			(*addr2sockaddr)	(struct sock *sk,
+							 struct sockaddr *);
+
+	int sockaddr_len;
+};
+
+/*
+ * The next routines deal with comparing 32 bit unsigned ints
+ * and worry about wraparound (automatic with unsigned arithmetic).
+ */
+
+extern __inline int before(__u32 seq1, __u32 seq2)
+{
+        return (__s32)(seq1-seq2) < 0;
+}
+
+extern __inline int after(__u32 seq1, __u32 seq2)
+{
+	return (__s32)(seq2-seq1) < 0;
+}
+
+
+/* is s2<=s1<=s3 ? */
+extern __inline int between(__u32 seq1, __u32 seq2, __u32 seq3)
+{
+	return seq3 - seq2 >= seq1 - seq2;
+}
+
+
+extern struct proto tcp_prot;
+
+#ifdef ROS_STATISTICS
+extern struct tcp_mib tcp_statistics[NR_CPUS*2];
+
+#define TCP_INC_STATS(field)		SNMP_INC_STATS(tcp_statistics, field)
+#define TCP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(tcp_statistics, field)
+#define TCP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(tcp_statistics, field)
+#endif
+
+extern void			tcp_put_port(struct sock *sk);
+extern void			__tcp_put_port(struct sock *sk);
+extern void			tcp_inherit_port(struct sock *sk, struct sock *child);
+
+extern void			tcp_v4_err(struct sk_buff *skb, u32);
+
+extern void			tcp_shutdown (struct sock *sk, int how);
+
+extern int			tcp_v4_rcv(struct sk_buff *skb);
+
+extern int			tcp_v4_remember_stamp(struct sock *sk);
+
+extern int		    	tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw);
+
+extern int			tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size);
+extern ssize_t			tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
+
+extern int			tcp_ioctl(struct sock *sk, 
+					  int cmd, 
+					  unsigned long arg);
+
+extern int			tcp_rcv_state_process(struct sock *sk, 
+						      struct sk_buff *skb,
+						      struct tcphdr *th,
+						      unsigned len);
+
+extern int			tcp_rcv_established(struct sock *sk, 
+						    struct sk_buff *skb,
+						    struct tcphdr *th, 
+						    unsigned len);
+
+enum tcp_ack_state_t
+{
+	TCP_ACK_SCHED = 1,
+	TCP_ACK_TIMER = 2,
+	TCP_ACK_PUSHED= 4
+};
+
+static inline void tcp_schedule_ack(struct tcp_opt *tp)
+{
+	tp->ack.pending |= TCP_ACK_SCHED;
+}
+
+static inline int tcp_ack_scheduled(struct tcp_opt *tp)
+{
+	return tp->ack.pending&TCP_ACK_SCHED;
+}
+
+static __inline__ void tcp_dec_quickack_mode(struct tcp_opt *tp)
+{
+	if (tp->ack.quick && --tp->ack.quick == 0) {
+		/* Leaving quickack mode we deflate ATO. */
+		tp->ack.ato = TCP_ATO_MIN;
+	}
+}
+
+extern void tcp_enter_quickack_mode(struct tcp_opt *tp);
+
+static __inline__ void tcp_delack_init(struct tcp_opt *tp)
+{
+	memset(&tp->ack, 0, sizeof(tp->ack));
+}
+
+static inline void tcp_clear_options(struct tcp_opt *tp)
+{
+ 	tp->tstamp_ok = tp->sack_ok = tp->wscale_ok = tp->snd_wscale = 0;
+}
+
+enum tcp_tw_status
+{
+	TCP_TW_SUCCESS = 0,
+	TCP_TW_RST = 1,
+	TCP_TW_ACK = 2,
+	TCP_TW_SYN = 3
+};
+
+
+extern enum tcp_tw_status	tcp_timewait_state_process(struct tcp_tw_bucket *tw,
+							   struct sk_buff *skb,
+							   struct tcphdr *th,
+							   unsigned len);
+
+extern struct sock *		tcp_check_req(struct sock *sk,struct sk_buff *skb,
+					      struct open_request *req,
+					      struct open_request **prev);
+extern int			tcp_child_process(struct sock *parent,
+						  struct sock *child,
+						  struct sk_buff *skb);
+extern void			tcp_enter_loss(struct sock *sk, int how);
+extern void			tcp_clear_retrans(struct tcp_opt *tp);
+extern void			tcp_update_metrics(struct sock *sk);
+
+extern void			tcp_close(struct sock *sk, 
+					  long timeout);
+extern struct sock *		tcp_accept(struct sock *sk, int flags, int *err);
+extern unsigned int		tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait);
+extern void			tcp_write_space(struct sock *sk); 
+
+extern int			tcp_getsockopt(struct sock *sk, int level, 
+					       int optname, char *optval, 
+					       int *optlen);
+extern int			tcp_setsockopt(struct sock *sk, int level, 
+					       int optname, char *optval, 
+					       int optlen);
+extern void			tcp_set_keepalive(struct sock *sk, int val);
+extern int			tcp_recvmsg(struct sock *sk, 
+					    struct msghdr *msg,
+					    int len, int nonblock, 
+					    int flags, int *addr_len);
+
+extern int			tcp_listen_start(struct sock *sk);
+
+extern void			tcp_parse_options(struct sk_buff *skb,
+						  struct tcp_opt *tp,
+						  int estab);
+
+/*
+ *	TCP v4 functions exported for the inet6 API
+ */
+
+extern int		       	tcp_v4_rebuild_header(struct sock *sk);
+
+extern int		       	tcp_v4_build_header(struct sock *sk, 
+						    struct sk_buff *skb);
+
+extern void		       	tcp_v4_send_check(struct sock *sk, 
+						  struct tcphdr *th, int len, 
+						  struct sk_buff *skb);
+
+extern int			tcp_v4_conn_request(struct sock *sk,
+						    struct sk_buff *skb);
+
+extern struct sock *		tcp_create_openreq_child(struct sock *sk,
+							 struct open_request *req,
+							 struct sk_buff *skb);
+
+extern struct sock *		tcp_v4_syn_recv_sock(struct sock *sk,
+						     struct sk_buff *skb,
+						     struct open_request *req,
+							struct dst_entry *dst);
+
+extern int			tcp_v4_do_rcv(struct sock *sk,
+					      struct sk_buff *skb);
+
+extern int			tcp_v4_connect(struct sock *sk,
+					       struct sockaddr *uaddr,
+					       int addr_len);
+
+extern int			tcp_connect(struct sock *sk);
+
+extern struct sk_buff *		tcp_make_synack(struct sock *sk,
+						struct dst_entry *dst,
+						struct open_request *req);
+
+extern int			tcp_disconnect(struct sock *sk, int flags);
+
+extern void			tcp_unhash(struct sock *sk);
+
+extern int			tcp_v4_hash_connecting(struct sock *sk);
+
+
+/* From syncookies.c */
+extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, 
+				    struct ip_options *opt);
+extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, 
+				     __u16 *mss);
+
+/* tcp_output.c */
+
+extern int tcp_write_xmit(struct sock *, int nonagle);
+extern int tcp_retransmit_skb(struct sock *, struct sk_buff *);
+extern void tcp_xmit_retransmit_queue(struct sock *);
+extern void tcp_simple_retransmit(struct sock *);
+
+extern void tcp_send_probe0(struct sock *);
+extern void tcp_send_partial(struct sock *);
+extern int  tcp_write_wakeup(struct sock *);
+extern void tcp_send_fin(struct sock *sk);
+extern void tcp_send_active_reset(struct sock *sk, int priority);
+extern int  tcp_send_synack(struct sock *);
+extern int  tcp_transmit_skb(struct sock *, struct sk_buff *);
+extern void tcp_send_skb(struct sock *, struct sk_buff *, int force_queue, unsigned mss_now);
+extern void tcp_push_one(struct sock *, unsigned mss_now);
+extern void tcp_send_ack(struct sock *sk);
+extern void tcp_send_delayed_ack(struct sock *sk);
+
+/* tcp_timer.c */
+extern void tcp_init_xmit_timers(struct sock *);
+extern void tcp_clear_xmit_timers(struct sock *);
+
+extern void tcp_delete_keepalive_timer (struct sock *);
+extern void tcp_reset_keepalive_timer (struct sock *, unsigned long);
+extern int tcp_sync_mss(struct sock *sk, u32 pmtu);
+
+extern const char timer_bug_msg[];
+
+/* Read 'sendfile()'-style from a TCP socket */
+typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
+				unsigned int, size_t);
+extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
+			 sk_read_actor_t recv_actor);
+
+static inline void tcp_clear_xmit_timer(struct sock *sk, int what)
+{
+#if 0
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	
+	switch (what) {
+	case TCP_TIME_RETRANS:
+	case TCP_TIME_PROBE0:
+		tp->pending = 0;
+
+#ifdef TCP_CLEAR_TIMERS
+		if (timer_pending(&tp->retransmit_timer) &&
+		    del_timer(&tp->retransmit_timer))
+			__sock_put(sk);
+#endif
+		break;
+	case TCP_TIME_DACK:
+		tp->ack.blocked = 0;
+		tp->ack.pending = 0;
+
+#ifdef TCP_CLEAR_TIMERS
+		if (timer_pending(&tp->delack_timer) &&
+		    del_timer(&tp->delack_timer))
+			__sock_put(sk);
+#endif
+		break;
+	default:
+		printk(timer_bug_msg);
+		return;
+	};
+#endif
+}
+
+/*
+ *	Reset the retransmission timer
+ */
+static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when)
+{
+#if 0
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+
+	if (when > TCP_RTO_MAX) {
+#ifdef TCP_DEBUG
+		printk(KERN_DEBUG "reset_xmit_timer sk=%p %d when=0x%lx, caller=%p\n", sk, what, when, current_text_addr());
+#endif
+		when = TCP_RTO_MAX;
+	}
+
+	switch (what) {
+	case TCP_TIME_RETRANS:
+	case TCP_TIME_PROBE0:
+		tp->pending = what;
+		tp->timeout = jiffies+when;
+		if (!mod_timer(&tp->retransmit_timer, tp->timeout))
+			sock_hold(sk);
+		break;
+
+	case TCP_TIME_DACK:
+		tp->ack.pending |= TCP_ACK_TIMER;
+		tp->ack.timeout = jiffies+when;
+		if (!mod_timer(&tp->delack_timer, tp->ack.timeout))
+			sock_hold(sk);
+		break;
+
+	default:
+		printk(KERN_DEBUG "bug: unknown timer value\n");
+	};
+#endif
+}
+
+/* Compute the current effective MSS, taking SACKs and IP options,
+ * and even PMTU discovery events into account.
+ */
+
+static __inline__ unsigned int tcp_current_mss(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	struct dst_entry *dst = __sk_dst_get(sk);
+	int mss_now = tp->mss_cache; 
+
+	if (dst && dst->pmtu != tp->pmtu_cookie)
+		mss_now = tcp_sync_mss(sk, dst->pmtu);
+
+	if (tp->eff_sacks)
+		mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
+			    (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
+	return mss_now;
+#else
+  return 0;
+#endif
+}
+
+/* Initialize RCV_MSS value.
+ * RCV_MSS is an our guess about MSS used by the peer.
+ * We haven't any direct information about the MSS.
+ * It's better to underestimate the RCV_MSS rather than overestimate.
+ * Overestimations make us ACKing less frequently than needed.
+ * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss().
+ */
+
+static inline void tcp_initialize_rcv_mss(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	unsigned int hint = min(tp->advmss, tp->mss_cache);
+
+	hint = min(hint, tp->rcv_wnd/2);
+	hint = min(hint, TCP_MIN_RCVMSS);
+	hint = max(hint, TCP_MIN_MSS);
+
+	tp->ack.rcv_mss = hint;
+#endif
+}
+
+static __inline__ void __tcp_fast_path_on(struct tcp_opt *tp, u32 snd_wnd)
+{
+#if 0
+	tp->pred_flags = htonl((tp->tcp_header_len << 26) |
+			       ntohl(TCP_FLAG_ACK) |
+			       snd_wnd);
+#endif
+}
+
+static __inline__ void tcp_fast_path_on(struct tcp_opt *tp)
+{
+#if 0
+	__tcp_fast_path_on(tp, tp->snd_wnd>>tp->snd_wscale);
+#endif
+}
+
+static inline void tcp_fast_path_check(struct sock *sk, struct tcp_opt *tp)
+{
+#if 0
+	if (skb_queue_len(&tp->out_of_order_queue) == 0 &&
+	    tp->rcv_wnd &&
+	    atomic_read(&sk->rmem_alloc) < sk->rcvbuf &&
+	    !tp->urg_data)
+		tcp_fast_path_on(tp);
+#endif
+}
+
+/* Compute the actual receive window we are currently advertising.
+ * Rcv_nxt can be after the window if our peer push more data
+ * than the offered window.
+ */
+static __inline__ u32 tcp_receive_window(struct tcp_opt *tp)
+{
+#if 0
+	s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt;
+
+	if (win < 0)
+		win = 0;
+	return (u32) win;
+#else
+  return 0;
+#endif
+}
+
+/* Choose a new window, without checks for shrinking, and without
+ * scaling applied to the result.  The caller does these things
+ * if necessary.  This is a "raw" window selection.
+ */
+extern u32	__tcp_select_window(struct sock *sk);
+
+/* TCP timestamps are only 32-bits, this causes a slight
+ * complication on 64-bit systems since we store a snapshot
+ * of jiffies in the buffer control blocks below.  We decidely
+ * only use of the low 32-bits of jiffies and hide the ugly
+ * casts with the following macro.
+ */
+#define tcp_time_stamp		((__u32)(jiffies))
+
+/* This is what the send packet queueing engine uses to pass
+ * TCP per-packet control information to the transmission
+ * code.  We also store the host-order sequence numbers in
+ * here too.  This is 36 bytes on 32-bit architectures,
+ * 40 bytes on 64-bit machines, if this grows please adjust
+ * skbuff.h:skbuff->cb[xxx] size appropriately.
+ */
+struct tcp_skb_cb {
+	union {
+#if 0
+		struct inet_skb_parm	h4;
+#endif
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+		struct inet6_skb_parm	h6;
+#endif
+	} header;	/* For incoming frames		*/
+	__u32		seq;		/* Starting sequence number	*/
+	__u32		end_seq;	/* SEQ + FIN + SYN + datalen	*/
+	__u32		when;		/* used to compute rtt's	*/
+	__u8		flags;		/* TCP header flags.		*/
+
+	/* NOTE: These must match up to the flags byte in a
+	 *       real TCP header.
+	 */
+#define TCPCB_FLAG_FIN		0x01
+#define TCPCB_FLAG_SYN		0x02
+#define TCPCB_FLAG_RST		0x04
+#define TCPCB_FLAG_PSH		0x08
+#define TCPCB_FLAG_ACK		0x10
+#define TCPCB_FLAG_URG		0x20
+#define TCPCB_FLAG_ECE		0x40
+#define TCPCB_FLAG_CWR		0x80
+
+	__u8		sacked;		/* State flags for SACK/FACK.	*/
+#define TCPCB_SACKED_ACKED	0x01	/* SKB ACK'd by a SACK block	*/
+#define TCPCB_SACKED_RETRANS	0x02	/* SKB retransmitted		*/
+#define TCPCB_LOST		0x04	/* SKB is lost			*/
+#define TCPCB_TAGBITS		0x07	/* All tag bits			*/
+
+#define TCPCB_EVER_RETRANS	0x80	/* Ever retransmitted frame	*/
+#define TCPCB_RETRANS		(TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)
+
+#define TCPCB_URG		0x20	/* Urgent pointer advenced here	*/
+
+#define TCPCB_AT_TAIL		(TCPCB_URG)
+
+	__u16		urg_ptr;	/* Valid w/URG flags is set.	*/
+	__u32		ack_seq;	/* Sequence number ACK'd	*/
+};
+
+#define TCP_SKB_CB(__skb)	((struct tcp_skb_cb *)&((__skb)->cb[0]))
+
+#define for_retrans_queue(skb, sk, tp) \
+		for (skb = (sk)->write_queue.next;			\
+		     (skb != (tp)->send_head) &&			\
+		     (skb != (struct sk_buff *)&(sk)->write_queue);	\
+		     skb=skb->next)
+
+
+//#include <net/tcp_ecn.h>
+
+
+/*
+ *	Compute minimal free write space needed to queue new packets. 
+ */
+static inline int tcp_min_write_space(struct sock *sk)
+{
+#if 0
+	return sk->wmem_queued/2;
+#else
+return 0;
+#endif
+}
+ 
+static inline int tcp_wspace(struct sock *sk)
+{
+#if 0
+	return sk->sndbuf - sk->wmem_queued;
+#else
+return 0;
+#endif
+}
+
+
+/* This determines how many packets are "in the network" to the best
+ * of our knowledge.  In many cases it is conservative, but where
+ * detailed information is available from the receiver (via SACK
+ * blocks etc.) we can make more aggressive calculations.
+ *
+ * Use this for decisions involving congestion control, use just
+ * tp->packets_out to determine if the send queue is empty or not.
+ *
+ * Read this equation as:
+ *
+ *	"Packets sent once on transmission queue" MINUS
+ *	"Packets left network, but not honestly ACKed yet" PLUS
+ *	"Packets fast retransmitted"
+ */
+static __inline__ unsigned int tcp_packets_in_flight(struct tcp_opt *tp)
+{
+#if 0
+	return tp->packets_out - tp->left_out + tp->retrans_out;
+#else
+  return 0;
+#endif
+}
+
+/* Recalculate snd_ssthresh, we want to set it to:
+ *
+ * 	one half the current congestion window, but no
+ *	less than two segments
+ */
+static inline __u32 tcp_recalc_ssthresh(struct tcp_opt *tp)
+{
+#if 0
+	return max(tp->snd_cwnd >> 1U, 2U);
+#else
+  return 0;
+#endif
+}
+
+/* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
+ * The exception is rate halving phase, when cwnd is decreasing towards
+ * ssthresh.
+ */
+static inline __u32 tcp_current_ssthresh(struct tcp_opt *tp)
+{
+#if 0
+	if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery))
+		return tp->snd_ssthresh;
+	else
+		return max(tp->snd_ssthresh,
+			   ((tp->snd_cwnd >> 1) +
+			    (tp->snd_cwnd >> 2)));
+#else
+  return 0;
+#endif
+}
+
+static inline void tcp_sync_left_out(struct tcp_opt *tp)
+{
+#if 0
+	if (tp->sack_ok && tp->sacked_out >= tp->packets_out - tp->lost_out)
+		tp->sacked_out = tp->packets_out - tp->lost_out;
+	tp->left_out = tp->sacked_out + tp->lost_out;
+#endif
+}
+
+extern void tcp_cwnd_application_limited(struct sock *sk);
+
+/* Congestion window validation. (RFC2861) */
+
+static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_opt *tp)
+{
+#if 0
+	if (tp->packets_out >= tp->snd_cwnd) {
+		/* Network is feed fully. */
+		tp->snd_cwnd_used = 0;
+		tp->snd_cwnd_stamp = tcp_time_stamp;
+	} else {
+		/* Network starves. */
+		if (tp->packets_out > tp->snd_cwnd_used)
+			tp->snd_cwnd_used = tp->packets_out;
+
+		if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto)
+			tcp_cwnd_application_limited(sk);
+	}
+#endif
+}
+
+/* Set slow start threshould and cwnd not falling to slow start */
+static inline void __tcp_enter_cwr(struct tcp_opt *tp)
+{
+#if 0
+	tp->undo_marker = 0;
+	tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
+	tp->snd_cwnd = min(tp->snd_cwnd,
+			   tcp_packets_in_flight(tp) + 1U);
+	tp->snd_cwnd_cnt = 0;
+	tp->high_seq = tp->snd_nxt;
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+	TCP_ECN_queue_cwr(tp);
+#endif
+}
+
+static inline void tcp_enter_cwr(struct tcp_opt *tp)
+{
+#if 0
+	tp->prior_ssthresh = 0;
+	if (tp->ca_state < TCP_CA_CWR) {
+		__tcp_enter_cwr(tp);
+		tp->ca_state = TCP_CA_CWR;
+	}
+#endif
+}
+
+extern __u32 tcp_init_cwnd(struct tcp_opt *tp);
+
+/* Slow start with delack produces 3 packets of burst, so that
+ * it is safe "de facto".
+ */
+static __inline__ __u32 tcp_max_burst(struct tcp_opt *tp)
+{
+	return 3;
+}
+
+static __inline__ int tcp_minshall_check(struct tcp_opt *tp)
+{
+#if 0
+	return after(tp->snd_sml,tp->snd_una) &&
+		!after(tp->snd_sml, tp->snd_nxt);
+#else
+  return 0;
+#endif
+}
+
+static __inline__ void tcp_minshall_update(struct tcp_opt *tp, int mss, struct sk_buff *skb)
+{
+#if 0
+	if (skb->len < mss)
+		tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
+#endif
+}
+
+/* Return 0, if packet can be sent now without violation Nagle's rules:
+   1. It is full sized.
+   2. Or it contains FIN.
+   3. Or TCP_NODELAY was set.
+   4. Or TCP_CORK is not set, and all sent packets are ACKed.
+      With Minshall's modification: all sent small packets are ACKed.
+ */
+
+static __inline__ int
+tcp_nagle_check(struct tcp_opt *tp, struct sk_buff *skb, unsigned mss_now, int nonagle)
+{
+#if 0
+	return (skb->len < mss_now &&
+		!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
+		(nonagle == 2 ||
+		 (!nonagle &&
+		  tp->packets_out &&
+		  tcp_minshall_check(tp))));
+#else
+  return 0;
+#endif
+}
+
+/* This checks if the data bearing packet SKB (usually tp->send_head)
+ * should be put on the wire right now.
+ */
+static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb,
+				   unsigned cur_mss, int nonagle)
+{
+#if 0
+	/*	RFC 1122 - section 4.2.3.4
+	 *
+	 *	We must queue if
+	 *
+	 *	a) The right edge of this frame exceeds the window
+	 *	b) There are packets in flight and we have a small segment
+	 *	   [SWS avoidance and Nagle algorithm]
+	 *	   (part of SWS is done on packetization)
+	 *	   Minshall version sounds: there are no _small_
+	 *	   segments in flight. (tcp_nagle_check)
+	 *	c) We have too many packets 'in flight'
+	 *
+	 * 	Don't use the nagle rule for urgent data (or
+	 *	for the final FIN -DaveM).
+	 *
+	 *	Also, Nagle rule does not apply to frames, which
+	 *	sit in the middle of queue (they have no chances
+	 *	to get new data) and if room at tail of skb is
+	 *	not enough to save something seriously (<32 for now).
+	 */
+
+	/* Don't be strict about the congestion window for the
+	 * final FIN frame.  -DaveM
+	 */
+	return ((nonagle==1 || tp->urg_mode
+		 || !tcp_nagle_check(tp, skb, cur_mss, nonagle)) &&
+		((tcp_packets_in_flight(tp) < tp->snd_cwnd) ||
+		 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) &&
+		!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd));
+#else
+  return 0;
+#endif
+}
+
+static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_opt *tp)
+{
+#if 0
+	if (!tp->packets_out && !tp->pending)
+		tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto);
+#endif
+}
+
+static __inline__ int tcp_skb_is_last(struct sock *sk, struct sk_buff *skb)
+{
+#if 0
+	return (skb->next == (struct sk_buff*)&sk->write_queue);
+#else
+  return 0;
+#endif
+}
+
+/* Push out any pending frames which were held back due to
+ * TCP_CORK or attempt at coalescing tiny packets.
+ * The socket must be locked by the caller.
+ */
+static __inline__ void __tcp_push_pending_frames(struct sock *sk,
+						 struct tcp_opt *tp,
+						 unsigned cur_mss,
+						 int nonagle)
+{
+#if 0
+	struct sk_buff *skb = tp->send_head;
+
+	if (skb) {
+		if (!tcp_skb_is_last(sk, skb))
+			nonagle = 1;
+		if (!tcp_snd_test(tp, skb, cur_mss, nonagle) ||
+		    tcp_write_xmit(sk, nonagle))
+			tcp_check_probe_timer(sk, tp);
+	}
+	tcp_cwnd_validate(sk, tp);
+#endif
+}
+
+static __inline__ void tcp_push_pending_frames(struct sock *sk,
+					       struct tcp_opt *tp)
+{
+#if 0
+	__tcp_push_pending_frames(sk, tp, tcp_current_mss(sk), tp->nonagle);
+#endif
+}
+
+static __inline__ int tcp_may_send_now(struct sock *sk, struct tcp_opt *tp)
+{
+#if 0
+	struct sk_buff *skb = tp->send_head;
+
+	return (skb &&
+		tcp_snd_test(tp, skb, tcp_current_mss(sk),
+			     tcp_skb_is_last(sk, skb) ? 1 : tp->nonagle));
+#else
+  return 0;
+#endif
+}
+
+static __inline__ void tcp_init_wl(struct tcp_opt *tp, u32 ack, u32 seq)
+{
+#if 0
+	tp->snd_wl1 = seq;
+#endif
+}
+
+static __inline__ void tcp_update_wl(struct tcp_opt *tp, u32 ack, u32 seq)
+{
+#if 0
+	tp->snd_wl1 = seq;
+#endif
+}
+
+extern void			tcp_destroy_sock(struct sock *sk);
+
+
+/*
+ * Calculate(/check) TCP checksum
+ */
+static __inline__ u16 tcp_v4_check(struct tcphdr *th, int len,
+				   unsigned long saddr, unsigned long daddr, 
+				   unsigned long base)
+{
+#if 0
+	return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
+#else
+  return 0;
+#endif
+}
+
+static __inline__ int __tcp_checksum_complete(struct sk_buff *skb)
+{
+#if 0
+	return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
+#else
+  return 0;
+#endif
+}
+
+static __inline__ int tcp_checksum_complete(struct sk_buff *skb)
+{
+#if 0
+	return skb->ip_summed != CHECKSUM_UNNECESSARY &&
+		__tcp_checksum_complete(skb);
+#else
+  return 0;
+#endif
+}
+
+/* Prequeue for VJ style copy to user, combined with checksumming. */
+
+static __inline__ void tcp_prequeue_init(struct tcp_opt *tp)
+{
+#if 0
+	tp->ucopy.task = NULL;
+	tp->ucopy.len = 0;
+	tp->ucopy.memory = 0;
+	skb_queue_head_init(&tp->ucopy.prequeue);
+#endif
+}
+
+/* Packet is added to VJ-style prequeue for processing in process
+ * context, if a reader task is waiting. Apparently, this exciting
+ * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
+ * failed somewhere. Latency? Burstiness? Well, at least now we will
+ * see, why it failed. 8)8)				  --ANK
+ *
+ * NOTE: is this not too big to inline?
+ */
+static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
+{
+#if 0
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+
+	if (tp->ucopy.task) {
+		__skb_queue_tail(&tp->ucopy.prequeue, skb);
+		tp->ucopy.memory += skb->truesize;
+		if (tp->ucopy.memory > sk->rcvbuf) {
+			struct sk_buff *skb1;
+
+			if (sk->lock.users)
+				out_of_line_bug();
+
+			while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
+				sk->backlog_rcv(sk, skb1);
+				NET_INC_STATS_BH(TCPPrequeueDropped);
+			}
+
+			tp->ucopy.memory = 0;
+		} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
+			wake_up_interruptible(sk->sleep);
+			if (!tcp_ack_scheduled(tp))
+				tcp_reset_xmit_timer(sk, TCP_TIME_DACK, (3*TCP_RTO_MIN)/4);
+		}
+		return 1;
+	}
+	return 0;
+#else
+  return 0;
+#endif
+}
+
+
+#undef STATE_TRACE
+
+#ifdef STATE_TRACE
+static char *statename[]={
+	"Unused","Established","Syn Sent","Syn Recv",
+	"Fin Wait 1","Fin Wait 2","Time Wait", "Close",
+	"Close Wait","Last ACK","Listen","Closing"
+};
+#endif
+
+static __inline__ void tcp_set_state(struct sock *sk, int state)
+{
+#if 0
+	int oldstate = sk->state;
+
+	switch (state) {
+	case TCP_ESTABLISHED:
+		if (oldstate != TCP_ESTABLISHED)
+			TCP_INC_STATS(TcpCurrEstab);
+		break;
+
+	case TCP_CLOSE:
+		sk->prot->unhash(sk);
+		if (sk->prev && !(sk->userlocks&SOCK_BINDPORT_LOCK))
+			tcp_put_port(sk);
+		/* fall through */
+	default:
+		if (oldstate==TCP_ESTABLISHED)
+			tcp_statistics[smp_processor_id()*2+!in_softirq()].TcpCurrEstab--;
+	}
+
+	/* Change state AFTER socket is unhashed to avoid closed
+	 * socket sitting in hash tables.
+	 */
+	sk->state = state;
+
+#ifdef STATE_TRACE
+	SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n",sk, statename[oldstate],statename[state]);
+#endif	
+#endif
+}
+
+static __inline__ void tcp_done(struct sock *sk)
+{
+#if 0
+	tcp_set_state(sk, TCP_CLOSE);
+	tcp_clear_xmit_timers(sk);
+
+	sk->shutdown = SHUTDOWN_MASK;
+
+	if (!sk->dead)
+		sk->state_change(sk);
+	else
+		tcp_destroy_sock(sk);
+#endif
+}
+
+static __inline__ void tcp_sack_reset(struct tcp_opt *tp)
+{
+#if 0
+	tp->dsack = 0;
+	tp->eff_sacks = 0;
+	tp->num_sacks = 0;
+#endif
+}
+
+static __inline__ void tcp_build_and_update_options(__u32 *ptr, struct tcp_opt *tp, __u32 tstamp)
+{
+#if 0
+	if (tp->tstamp_ok) {
+		*ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
+					  (TCPOPT_NOP << 16) |
+					  (TCPOPT_TIMESTAMP << 8) |
+					  TCPOLEN_TIMESTAMP);
+		*ptr++ = htonl(tstamp);
+		*ptr++ = htonl(tp->ts_recent);
+	}
+	if (tp->eff_sacks) {
+		struct tcp_sack_block *sp = tp->dsack ? tp->duplicate_sack : tp->selective_acks;
+		int this_sack;
+
+		*ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
+					  (TCPOPT_NOP << 16) |
+					  (TCPOPT_SACK << 8) |
+					  (TCPOLEN_SACK_BASE +
+					   (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK)));
+		for(this_sack = 0; this_sack < tp->eff_sacks; this_sack++) {
+			*ptr++ = htonl(sp[this_sack].start_seq);
+			*ptr++ = htonl(sp[this_sack].end_seq);
+		}
+		if (tp->dsack) {
+			tp->dsack = 0;
+			tp->eff_sacks--;
+		}
+	}
+#endif
+}
+
+/* Construct a tcp options header for a SYN or SYN_ACK packet.
+ * If this is every changed make sure to change the definition of
+ * MAX_SYN_SIZE to match the new maximum number of options that you
+ * can generate.
+ */
+static inline void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack,
+					     int offer_wscale, int wscale, __u32 tstamp, __u32 ts_recent)
+{
+#if 0
+	/* We always get an MSS option.
+	 * The option bytes which will be seen in normal data
+	 * packets should timestamps be used, must be in the MSS
+	 * advertised.  But we subtract them from tp->mss_cache so
+	 * that calculations in tcp_sendmsg are simpler etc.
+	 * So account for this fact here if necessary.  If we
+	 * don't do this correctly, as a receiver we won't
+	 * recognize data packets as being full sized when we
+	 * should, and thus we won't abide by the delayed ACK
+	 * rules correctly.
+	 * SACKs don't matter, we never delay an ACK when we
+	 * have any of those going out.
+	 */
+	*ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
+	if (ts) {
+		if(sack)
+			*ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) |
+						  (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
+		else
+			*ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+						  (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
+		*ptr++ = htonl(tstamp);		/* TSVAL */
+		*ptr++ = htonl(ts_recent);	/* TSECR */
+	} else if(sack)
+		*ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+					  (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM);
+	if (offer_wscale)
+		*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale));
+#endif
+}
+
+/* Determine a window scaling and initial window to offer.
+ * Based on the assumption that the given amount of space
+ * will be offered. Store the results in the tp structure.
+ * NOTE: for smooth operation initial space offering should
+ * be a multiple of mss if possible. We assume here that mss >= 1.
+ * This MUST be enforced by all callers.
+ */
+static inline void tcp_select_initial_window(int __space, __u32 mss,
+	__u32 *rcv_wnd,
+	__u32 *window_clamp,
+	int wscale_ok,
+	__u8 *rcv_wscale)
+{
+#if 0
+	unsigned int space = (__space < 0 ? 0 : __space);
+
+	/* If no clamp set the clamp to the max possible scaled window */
+	if (*window_clamp == 0)
+		(*window_clamp) = (65535 << 14);
+	space = min(*window_clamp, space);
+
+	/* Quantize space offering to a multiple of mss if possible. */
+	if (space > mss)
+		space = (space / mss) * mss;
+
+	/* NOTE: offering an initial window larger than 32767
+	 * will break some buggy TCP stacks. We try to be nice.
+	 * If we are not window scaling, then this truncates
+	 * our initial window offering to 32k. There should also
+	 * be a sysctl option to stop being nice.
+	 */
+	(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
+	(*rcv_wscale) = 0;
+	if (wscale_ok) {
+		/* See RFC1323 for an explanation of the limit to 14 */
+		while (space > 65535 && (*rcv_wscale) < 14) {
+			space >>= 1;
+			(*rcv_wscale)++;
+		}
+		if (*rcv_wscale && sysctl_tcp_app_win && space>=mss &&
+		    space - max((space>>sysctl_tcp_app_win), mss>>*rcv_wscale) < 65536/2)
+			(*rcv_wscale)--;
+	}
+
+	/* Set initial window to value enough for senders,
+	 * following RFC1414. Senders, not following this RFC,
+	 * will be satisfied with 2.
+	 */
+	if (mss > (1<<*rcv_wscale)) {
+		int init_cwnd = 4;
+		if (mss > 1460*3)
+			init_cwnd = 2;
+		else if (mss > 1460)
+			init_cwnd = 3;
+		if (*rcv_wnd > init_cwnd*mss)
+			*rcv_wnd = init_cwnd*mss;
+	}
+	/* Set the clamp no higher than max representable value */
+	(*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
+#endif
+}
+
+static inline int tcp_win_from_space(int space)
+{
+#if 0
+	return sysctl_tcp_adv_win_scale<=0 ?
+		(space>>(-sysctl_tcp_adv_win_scale)) :
+		space - (space>>sysctl_tcp_adv_win_scale);
+#else
+  return 0;
+#endif
+}
+
+/* Note: caller must be prepared to deal with negative returns */ 
+static inline int tcp_space(struct sock *sk)
+{
+#if 0
+	return tcp_win_from_space(sk->rcvbuf - atomic_read(&sk->rmem_alloc));
+#else
+  return 0;
+#endif
+} 
+
+static inline int tcp_full_space( struct sock *sk)
+{
+#if 0
+	return tcp_win_from_space(sk->rcvbuf); 
+#else
+  return 0;
+#endif
+}
+
+static inline void tcp_acceptq_removed(struct sock *sk)
+{
+#if 0
+	sk->ack_backlog--;
+#endif
+}
+
+static inline void tcp_acceptq_added(struct sock *sk)
+{
+#if 0
+	sk->ack_backlog++;
+#endif
+}
+
+static inline int tcp_acceptq_is_full(struct sock *sk)
+{
+#if 0
+	return sk->ack_backlog > sk->max_ack_backlog;
+#else
+  return 0;
+#endif
+}
+
+static inline void tcp_acceptq_queue(struct sock *sk, struct open_request *req,
+					 struct sock *child)
+{
+#if 0
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+
+	req->sk = child;
+	tcp_acceptq_added(sk);
+
+	if (!tp->accept_queue_tail) {
+		tp->accept_queue = req;
+	} else {
+		tp->accept_queue_tail->dl_next = req;
+	}
+	tp->accept_queue_tail = req;
+	req->dl_next = NULL;
+#endif
+}
+
+struct tcp_listen_opt
+{
+	u8			max_qlen_log;	/* log_2 of maximal queued SYNs */
+	int			qlen;
+	int			qlen_young;
+	int			clock_hand;
+	struct open_request	*syn_table[TCP_SYNQ_HSIZE];
+};
+
+static inline void
+tcp_synq_removed(struct sock *sk, struct open_request *req)
+{
+#if 0
+	struct tcp_listen_opt *lopt = sk->tp_pinfo.af_tcp.listen_opt;
+
+	if (--lopt->qlen == 0)
+		tcp_delete_keepalive_timer(sk);
+	if (req->retrans == 0)
+		lopt->qlen_young--;
+#endif
+}
+
+static inline void tcp_synq_added(struct sock *sk)
+{
+#if 0
+	struct tcp_listen_opt *lopt = sk->tp_pinfo.af_tcp.listen_opt;
+
+	if (lopt->qlen++ == 0)
+		tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT);
+	lopt->qlen_young++;
+#endif
+}
+
+static inline int tcp_synq_len(struct sock *sk)
+{
+#if 0
+	return sk->tp_pinfo.af_tcp.listen_opt->qlen;
+#else
+  return 0;
+#endif
+}
+
+static inline int tcp_synq_young(struct sock *sk)
+{
+#if 0
+	return sk->tp_pinfo.af_tcp.listen_opt->qlen_young;
+#else
+  return 0;
+#endif
+}
+
+static inline int tcp_synq_is_full(struct sock *sk)
+{
+#if 0
+	return tcp_synq_len(sk)>>sk->tp_pinfo.af_tcp.listen_opt->max_qlen_log;
+#else
+  return 0;
+#endif
+}
+
+static inline void tcp_synq_unlink(struct tcp_opt *tp, struct open_request *req,
+				       struct open_request **prev)
+{
+#if 0
+	write_lock(&tp->syn_wait_lock);
+	*prev = req->dl_next;
+	write_unlock(&tp->syn_wait_lock);
+#endif
+}
+
+static inline void tcp_synq_drop(struct sock *sk, struct open_request *req,
+				     struct open_request **prev)
+{
+#if 0
+	tcp_synq_unlink(&sk->tp_pinfo.af_tcp, req, prev);
+	tcp_synq_removed(sk, req);
+	tcp_openreq_free(req);
+#endif
+}
+
+static __inline__ void tcp_openreq_init(struct open_request *req,
+					struct tcp_opt *tp,
+					struct sk_buff *skb)
+{
+#if 0
+	req->rcv_wnd = 0;		/* So that tcp_send_synack() knows! */
+	req->rcv_isn = TCP_SKB_CB(skb)->seq;
+	req->mss = tp->mss_clamp;
+	req->ts_recent = tp->saw_tstamp ? tp->rcv_tsval : 0;
+	req->tstamp_ok = tp->tstamp_ok;
+	req->sack_ok = tp->sack_ok;
+	req->snd_wscale = tp->snd_wscale;
+	req->wscale_ok = tp->wscale_ok;
+	req->acked = 0;
+	req->ecn_ok = 0;
+	req->rmt_port = skb->h.th->source;
+#endif
+}
+
+#define TCP_MEM_QUANTUM	((int)PAGE_SIZE)
+
+static inline void tcp_free_skb(struct sock *sk, struct sk_buff *skb)
+{
+#if 0
+	sk->tp_pinfo.af_tcp.queue_shrunk = 1;
+	sk->wmem_queued -= skb->truesize;
+	sk->forward_alloc += skb->truesize;
+	__kfree_skb(skb);
+#endif
+}
+
+static inline void tcp_charge_skb(struct sock *sk, struct sk_buff *skb)
+{
+#if 0
+	sk->wmem_queued += skb->truesize;
+	sk->forward_alloc -= skb->truesize;
+#endif
+}
+
+extern void __tcp_mem_reclaim(struct sock *sk);
+extern int tcp_mem_schedule(struct sock *sk, int size, int kind);
+
+static inline void tcp_mem_reclaim(struct sock *sk)
+{
+#if 0
+	if (sk->forward_alloc >= TCP_MEM_QUANTUM)
+		__tcp_mem_reclaim(sk);
+#endif
+}
+
+static inline void tcp_enter_memory_pressure(void)
+{
+#if 0
+	if (!tcp_memory_pressure) {
+		NET_INC_STATS(TCPMemoryPressures);
+		tcp_memory_pressure = 1;
+	}
+#endif
+}
+
+static inline void tcp_moderate_sndbuf(struct sock *sk)
+{
+#if 0
+	if (!(sk->userlocks&SOCK_SNDBUF_LOCK)) {
+		sk->sndbuf = min(sk->sndbuf, sk->wmem_queued/2);
+		sk->sndbuf = max(sk->sndbuf, SOCK_MIN_SNDBUF);
+	}
+#endif
+}
+
+static inline struct sk_buff *tcp_alloc_pskb(struct sock *sk, int size, int mem, int gfp)
+{
+#if 0
+	struct sk_buff *skb = alloc_skb(size+MAX_TCP_HEADER, gfp);
+
+	if (skb) {
+		skb->truesize += mem;
+		if (sk->forward_alloc >= (int)skb->truesize ||
+		    tcp_mem_schedule(sk, skb->truesize, 0)) {
+			skb_reserve(skb, MAX_TCP_HEADER);
+			return skb;
+		}
+		__kfree_skb(skb);
+	} else {
+		tcp_enter_memory_pressure();
+		tcp_moderate_sndbuf(sk);
+	}
+	return NULL;
+#else
+  return NULL;
+#endif
+}
+
+static inline struct sk_buff *tcp_alloc_skb(struct sock *sk, int size, int gfp)
+{
+#if 0
+	return tcp_alloc_pskb(sk, size, 0, gfp);
+#else
+  return NULL;
+#endif
+}
+
+static inline struct page * tcp_alloc_page(struct sock *sk)
+{
+#if 0
+	if (sk->forward_alloc >= (int)PAGE_SIZE ||
+	    tcp_mem_schedule(sk, PAGE_SIZE, 0)) {
+		struct page *page = alloc_pages(sk->allocation, 0);
+		if (page)
+			return page;
+	}
+	tcp_enter_memory_pressure();
+	tcp_moderate_sndbuf(sk);
+	return NULL;
+#else
+  return NULL;
+#endif
+}
+
+static inline void tcp_writequeue_purge(struct sock *sk)
+{
+#if 0
+	struct sk_buff *skb;
+
+	while ((skb = __skb_dequeue(&sk->write_queue)) != NULL)
+		tcp_free_skb(sk, skb);
+	tcp_mem_reclaim(sk);
+#endif
+}
+
+extern void tcp_rfree(struct sk_buff *skb);
+
+static inline void tcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
+{
+#if 0
+	skb->sk = sk;
+	skb->destructor = tcp_rfree;
+	atomic_add(skb->truesize, &sk->rmem_alloc);
+	sk->forward_alloc -= skb->truesize;
+#endif
+}
+
+extern void tcp_listen_wlock(void);
+
+/* - We may sleep inside this lock.
+ * - If sleeping is not required (or called from BH),
+ *   use plain read_(un)lock(&tcp_lhash_lock).
+ */
+
+static inline void tcp_listen_lock(void)
+{
+#if 0
+	/* read_lock synchronizes to candidates to writers */
+	read_lock(&tcp_lhash_lock);
+	atomic_inc(&tcp_lhash_users);
+	read_unlock(&tcp_lhash_lock);
+#endif
+}
+
+static inline void tcp_listen_unlock(void)
+{
+#if 0
+	if (atomic_dec_and_test(&tcp_lhash_users))
+		wake_up(&tcp_lhash_wait);
+#endif
+}
+
+static inline int keepalive_intvl_when(struct tcp_opt *tp)
+{
+#if 0
+	return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl;
+#else
+  return 0;
+#endif
+}
+
+static inline int keepalive_time_when(struct tcp_opt *tp)
+{
+#if 0
+	return tp->keepalive_time ? : sysctl_tcp_keepalive_time;
+#else
+  return 0;
+#endif
+}
+
+static inline int tcp_fin_time(struct tcp_opt *tp)
+{
+#if 0
+	int fin_timeout = tp->linger2 ? : sysctl_tcp_fin_timeout;
+
+	if (fin_timeout < (tp->rto<<2) - (tp->rto>>1))
+		fin_timeout = (tp->rto<<2) - (tp->rto>>1);
+
+	return fin_timeout;
+#else
+  return 0;
+#endif
+}
+
+static inline int tcp_paws_check(struct tcp_opt *tp, int rst)
+{
+#if 0
+	if ((s32)(tp->rcv_tsval - tp->ts_recent) >= 0)
+		return 0;
+	if (xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_24DAYS)
+		return 0;
+
+	/* RST segments are not recommended to carry timestamp,
+	   and, if they do, it is recommended to ignore PAWS because
+	   "their cleanup function should take precedence over timestamps."
+	   Certainly, it is mistake. It is necessary to understand the reasons
+	   of this constraint to relax it: if peer reboots, clock may go
+	   out-of-sync and half-open connections will not be reset.
+	   Actually, the problem would be not existing if all
+	   the implementations followed draft about maintaining clock
+	   via reboots. Linux-2.2 DOES NOT!
+
+	   However, we can relax time bounds for RST segments to MSL.
+	 */
+	if (rst && xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_MSL)
+		return 0;
+	return 1;
+#else
+  return 0;
+#endif
+}
+
+#define TCP_CHECK_TIMER(sk) do { } while (0)
+
+#endif /* __TCPCORE_H */
+
+
+//
+#endif
+#endif
diff --git a/reactos/drivers/net/tcpip/include/tcpdef.h b/reactos/drivers/net/tcpip/include/tcpdef.h
new file mode 100755
index 00000000000..8005de2be6d
--- /dev/null
+++ b/reactos/drivers/net/tcpip/include/tcpdef.h
@@ -0,0 +1,187 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Definitions for the TCP protocol.
+ *
+ * Version:	@(#)tcp.h	1.0.2	04/28/93
+ *
+ * Author:	Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+#ifndef _LINUX_TCP_H
+#define _LINUX_TCP_H
+
+#include "linux.h"
+
+struct tcphdr {
+	__u16	source;
+	__u16	dest;
+	__u32	seq;
+	__u32	ack_seq;
+//#if defined(__LITTLE_ENDIAN_BITFIELD)
+	__u16	res1:4,
+		doff:4,
+		fin:1,
+		syn:1,
+		rst:1,
+		psh:1,
+		ack:1,
+		urg:1,
+		ece:1,
+		cwr:1;
+/*#elif defined(__BIG_ENDIAN_BITFIELD)
+	__u16	doff:4,
+		res1:4,
+		cwr:1,
+		ece:1,
+		urg:1,
+		ack:1,
+		psh:1,
+		rst:1,
+		syn:1,
+		fin:1;
+#else
+#error	"Adjust your <asm/byteorder.h> defines"
+#endif	*/
+	__u16	window;
+	__u16	check;
+	__u16	urg_ptr;
+};
+
+
+enum {
+  TCP_ESTABLISHED = 1,
+  TCP_SYN_SENT,
+  TCP_SYN_RECV,
+  TCP_FIN_WAIT1,
+  TCP_FIN_WAIT2,
+  TCP_TIME_WAIT,
+  TCP_CLOSE,
+  TCP_CLOSE_WAIT,
+  TCP_LAST_ACK,
+  TCP_LISTEN,
+  TCP_CLOSING,	 /* now a valid state */
+
+  TCP_MAX_STATES /* Leave at the end! */
+};
+
+#define TCP_STATE_MASK	0xF
+#define TCP_ACTION_FIN	(1 << 7)
+
+enum {
+  TCPF_ESTABLISHED = (1 << 1),
+  TCPF_SYN_SENT  = (1 << 2),
+  TCPF_SYN_RECV  = (1 << 3),
+  TCPF_FIN_WAIT1 = (1 << 4),
+  TCPF_FIN_WAIT2 = (1 << 5),
+  TCPF_TIME_WAIT = (1 << 6),
+  TCPF_CLOSE     = (1 << 7),
+  TCPF_CLOSE_WAIT = (1 << 8),
+  TCPF_LAST_ACK  = (1 << 9),
+  TCPF_LISTEN    = (1 << 10),
+  TCPF_CLOSING   = (1 << 11) 
+};
+
+/*
+ *	The union cast uses a gcc extension to avoid aliasing problems
+ *  (union is compatible to any of its members)
+ *  This means this part of the code is -fstrict-aliasing safe now.
+ */
+union tcp_word_hdr { 
+	struct tcphdr hdr;
+	__u32 		  words[5];
+}; 
+
+#define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3]) 
+
+enum { 
+	TCP_FLAG_CWR = 0x00800000, // __constant_htonl(0x00800000), 
+	TCP_FLAG_ECE = 0x00400000, //__constant_htonl(0x00400000), 
+	TCP_FLAG_URG = 0x00200000, //__constant_htonl(0x00200000), 
+	TCP_FLAG_ACK = 0x00100000, //__constant_htonl(0x00100000), 
+	TCP_FLAG_PSH = 0x00080000, //__constant_htonl(0x00080000), 
+	TCP_FLAG_RST = 0x00040000, //__constant_htonl(0x00040000), 
+	TCP_FLAG_SYN = 0x00020000, //__constant_htonl(0x00020000), 
+	TCP_FLAG_FIN = 0x00010000, //__constant_htonl(0x00010000),
+	TCP_RESERVED_BITS = 0x0F000000, //__constant_htonl(0x0F000000),
+	TCP_DATA_OFFSET = 0xF0000000, //__constant_htonl(0xF0000000)
+}; 
+
+/* TCP socket options */
+#define TCP_NODELAY		1	/* Turn off Nagle's algorithm. */
+#define TCP_MAXSEG		2	/* Limit MSS */
+#define TCP_CORK		3	/* Never send partially complete segments */
+#define TCP_KEEPIDLE		4	/* Start keeplives after this period */
+#define TCP_KEEPINTVL		5	/* Interval between keepalives */
+#define TCP_KEEPCNT		6	/* Number of keepalives before death */
+#define TCP_SYNCNT		7	/* Number of SYN retransmits */
+#define TCP_LINGER2		8	/* Life time of orphaned FIN-WAIT-2 state */
+#define TCP_DEFER_ACCEPT	9	/* Wake up listener only when data arrive */
+#define TCP_WINDOW_CLAMP	10	/* Bound advertised window */
+#define TCP_INFO		11	/* Information about this connection. */
+#define TCP_QUICKACK		12	/* Block/reenable quick acks */
+
+#define TCPI_OPT_TIMESTAMPS	1
+#define TCPI_OPT_SACK		2
+#define TCPI_OPT_WSCALE		4
+#define TCPI_OPT_ECN		8
+
+enum tcp_ca_state
+{
+	TCP_CA_Open = 0,
+#define TCPF_CA_Open	(1<<TCP_CA_Open)
+	TCP_CA_Disorder = 1,
+#define TCPF_CA_Disorder (1<<TCP_CA_Disorder)
+	TCP_CA_CWR = 2,
+#define TCPF_CA_CWR	(1<<TCP_CA_CWR)
+	TCP_CA_Recovery = 3,
+#define TCPF_CA_Recovery (1<<TCP_CA_Recovery)
+	TCP_CA_Loss = 4
+#define TCPF_CA_Loss	(1<<TCP_CA_Loss)
+};
+
+struct tcp_info
+{
+	__u8	tcpi_state;
+	__u8	tcpi_ca_state;
+	__u8	tcpi_retransmits;
+	__u8	tcpi_probes;
+	__u8	tcpi_backoff;
+	__u8	tcpi_options;
+	__u8	tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4;
+
+	__u32	tcpi_rto;
+	__u32	tcpi_ato;
+	__u32	tcpi_snd_mss;
+	__u32	tcpi_rcv_mss;
+
+	__u32	tcpi_unacked;
+	__u32	tcpi_sacked;
+	__u32	tcpi_lost;
+	__u32	tcpi_retrans;
+	__u32	tcpi_fackets;
+
+	/* Times. */
+	__u32	tcpi_last_data_sent;
+	__u32	tcpi_last_ack_sent;     /* Not remembered, sorry. */
+	__u32	tcpi_last_data_recv;
+	__u32	tcpi_last_ack_recv;
+
+	/* Metrics. */
+	__u32	tcpi_pmtu;
+	__u32	tcpi_rcv_ssthresh;
+	__u32	tcpi_rtt;
+	__u32	tcpi_rttvar;
+	__u32	tcpi_snd_ssthresh;
+	__u32	tcpi_snd_cwnd;
+	__u32	tcpi_advmss;
+	__u32	tcpi_reordering;
+};
+
+#endif	/* _LINUX_TCP_H */
diff --git a/reactos/drivers/net/tcpip/makefile b/reactos/drivers/net/tcpip/makefile
index c7cf37968ba..a6f3b0e29aa 100644
--- a/reactos/drivers/net/tcpip/makefile
+++ b/reactos/drivers/net/tcpip/makefile
@@ -1,4 +1,4 @@
-# $Id: makefile,v 1.11 2001/08/21 20:13:15 chorns Exp $
+# $Id: makefile,v 1.12 2003/01/15 21:57:31 chorns Exp $
 
 PATH_TO_TOP = ../../..
 
@@ -28,7 +28,9 @@ NETWORK_OBJECTS  = network/icmp.o network/ip.o network/neighbor.o \
                    network/transmit.o
 DATAGRAM_OBJECTS = transport/datagram/datagram.o
 RAWIP_OBJECTS    = transport/rawip/rawip.o
-TCP_OBJECTS      = transport/tcp/tcp.o
+TCP_OBJECTS      = transport/tcp/tcp.o transport/tcp/tcpcore.o \
+                   transport/tcp/tcp_input.o transport/tcp/tcp_ipv4.o \
+                   transport/tcp/tcp_output.o transport/tcp/tcp_timer.o
 UDP_OBJECTS      = transport/udp/udp.o
 
 TARGET_OBJECTS = \
diff --git a/reactos/drivers/net/tcpip/transport/tcp/tcp.c b/reactos/drivers/net/tcpip/transport/tcp/tcp.c
index ffebef0f3ac..a22c1333b71 100644
--- a/reactos/drivers/net/tcpip/transport/tcp/tcp.c
+++ b/reactos/drivers/net/tcpip/transport/tcp/tcp.c
@@ -467,6 +467,8 @@ NTSTATUS TCPStartup(
  *     Status of operation
  */
 {
+  tcp_init();
+
   /* Register this protocol with IP layer */
   IPRegisterProtocol(IPPROTO_TCP, TCPReceive);
 
diff --git a/reactos/drivers/net/tcpip/transport/tcp/tcp_input.c b/reactos/drivers/net/tcpip/transport/tcp/tcp_input.c
new file mode 100755
index 00000000000..b4bb72a6a7c
--- /dev/null
+++ b/reactos/drivers/net/tcpip/transport/tcp/tcp_input.c
@@ -0,0 +1,4184 @@
+/*
+ * COPYRIGHT:   See COPYING in the top level directory
+ * PROJECT:     ReactOS TCP/IP protocol driver
+ * FILE:        transport/tcp/tcp_input.c
+ * PURPOSE:     Transmission Control Protocol
+ * PROGRAMMERS: Casper S. Hornstrup (chorns@users.sourceforge.net)
+ * REVISIONS:
+ *   CSH 15-01-2003 Imported from linux kernel 2.4.20
+ */
+
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Implementation of the Transmission Control Protocol(TCP).
+ *
+ * Version:	$Id: tcp_input.c,v 1.1 2003/01/15 21:57:31 chorns Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *		Corey Minyard <wf-rch!minyard@relay.EU.net>
+ *		Florian La Roche, <flla@stud.uni-sb.de>
+ *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
+ *		Linus Torvalds, <torvalds@cs.helsinki.fi>
+ *		Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *		Matthew Dillon, <dillon@apollo.west.oic.com>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		Jorge Cwik, <jorge@laser.satlink.net>
+ */
+
+/*
+ * Changes:
+ *		Pedro Roque	:	Fast Retransmit/Recovery.
+ *					Two receive queues.
+ *					Retransmit queue handled by TCP.
+ *					Better retransmit timer handling.
+ *					New congestion avoidance.
+ *					Header prediction.
+ *					Variable renaming.
+ *
+ *		Eric		:	Fast Retransmit.
+ *		Randy Scott	:	MSS option defines.
+ *		Eric Schenk	:	Fixes to slow start algorithm.
+ *		Eric Schenk	:	Yet another double ACK bug.
+ *		Eric Schenk	:	Delayed ACK bug fixes.
+ *		Eric Schenk	:	Floyd style fast retrans war avoidance.
+ *		David S. Miller	:	Don't allow zero congestion window.
+ *		Eric Schenk	:	Fix retransmitter so that it sends
+ *					next packet on ack of previous packet.
+ *		Andi Kleen	:	Moved open_request checking here
+ *					and process RSTs for open_requests.
+ *		Andi Kleen	:	Better prune_queue, and other fixes.
+ *		Andrey Savochkin:	Fix RTT measurements in the presnce of
+ *					timestamps.
+ *		Andrey Savochkin:	Check sequence numbers correctly when
+ *					removing SACKs due to in sequence incoming
+ *					data segments.
+ *		Andi Kleen:		Make sure we never ack data there is not
+ *					enough room for. Also make this condition
+ *					a fatal error if it might still happen.
+ *		Andi Kleen:		Add tcp_measure_rcv_mss to make 
+ *					connections with MSS<min(MTU,ann. MSS)
+ *					work without delayed acks. 
+ *		Andi Kleen:		Process packets with PSH set in the
+ *					fast path.
+ *		J Hadi Salim:		ECN support
+ *	 	Andrei Gurtov,
+ *		Pasi Sarolahti,
+ *		Panu Kuhlberg:		Experimental audit of TCP (re)transmission
+ *					engine. Lots of bugs are found.
+ */
+
+#if 0
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <net/tcp.h>
+#include <net/inet_common.h>
+#include <linux/ipsec.h>
+#else
+#include "linux.h"
+#include "tcpcore.h"
+#endif
+
+int sysctl_tcp_timestamps = 1;
+int sysctl_tcp_window_scaling = 1;
+int sysctl_tcp_sack = 1;
+int sysctl_tcp_fack = 1;
+int sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
+#ifdef CONFIG_INET_ECN
+int sysctl_tcp_ecn = 1;
+#else
+int sysctl_tcp_ecn = 0;
+#endif
+int sysctl_tcp_dsack = 1;
+int sysctl_tcp_app_win = 31;
+int sysctl_tcp_adv_win_scale = 2;
+
+int sysctl_tcp_stdurg = 0;
+int sysctl_tcp_rfc1337 = 0;
+//int sysctl_tcp_max_orphans = NR_FILE;
+
+#define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
+#define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
+#define FLAG_DATA_ACKED		0x04 /* This ACK acknowledged new data.		*/
+#define FLAG_RETRANS_DATA_ACKED	0x08 /* "" "" some of which was retransmitted.	*/
+#define FLAG_SYN_ACKED		0x10 /* This ACK acknowledged SYN.		*/
+#define FLAG_DATA_SACKED	0x20 /* New SACK.				*/
+#define FLAG_ECE		0x40 /* ECE in this ACK				*/
+#define FLAG_DATA_LOST		0x80 /* SACK detected data lossage.		*/
+#define FLAG_SLOWPATH		0x100 /* Do not skip RFC checks for window update.*/
+
+#define FLAG_ACKED		(FLAG_DATA_ACKED|FLAG_SYN_ACKED)
+#define FLAG_NOT_DUP		(FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
+#define FLAG_CA_ALERT		(FLAG_DATA_SACKED|FLAG_ECE)
+#define FLAG_FORWARD_PROGRESS	(FLAG_ACKED|FLAG_DATA_SACKED)
+
+#define IsReno(tp) ((tp)->sack_ok == 0)
+#define IsFack(tp) ((tp)->sack_ok & 2)
+#define IsDSack(tp) ((tp)->sack_ok & 4)
+
+#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
+
+/* Adapt the MSS value used to make delayed ack decision to the 
+ * real world.
+ */ 
+static __inline__ void tcp_measure_rcv_mss(struct tcp_opt *tp, struct sk_buff *skb)
+{
+#if 0
+	unsigned int len, lss;
+
+	lss = tp->ack.last_seg_size; 
+	tp->ack.last_seg_size = 0; 
+
+	/* skb->len may jitter because of SACKs, even if peer
+	 * sends good full-sized frames.
+	 */
+	len = skb->len;
+	if (len >= tp->ack.rcv_mss) {
+		tp->ack.rcv_mss = len;
+	} else {
+		/* Otherwise, we make more careful check taking into account,
+		 * that SACKs block is variable.
+		 *
+		 * "len" is invariant segment length, including TCP header.
+		 */
+		len += skb->data - skb->h.raw;
+		if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) ||
+		    /* If PSH is not set, packet should be
+		     * full sized, provided peer TCP is not badly broken.
+		     * This observation (if it is correct 8)) allows
+		     * to handle super-low mtu links fairly.
+		     */
+		    (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
+		     !(tcp_flag_word(skb->h.th)&TCP_REMNANT))) {
+			/* Subtract also invariant (if peer is RFC compliant),
+			 * tcp header plus fixed timestamp option length.
+			 * Resulting "len" is MSS free of SACK jitter.
+			 */
+			len -= tp->tcp_header_len;
+			tp->ack.last_seg_size = len;
+			if (len == lss) {
+				tp->ack.rcv_mss = len;
+				return;
+			}
+		}
+		tp->ack.pending |= TCP_ACK_PUSHED;
+	}
+#endif
+}
+
+static void tcp_incr_quickack(struct tcp_opt *tp)
+{
+#if 0
+	unsigned quickacks = tp->rcv_wnd/(2*tp->ack.rcv_mss);
+
+	if (quickacks==0)
+		quickacks=2;
+	if (quickacks > tp->ack.quick)
+		tp->ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
+#endif
+}
+
+void tcp_enter_quickack_mode(struct tcp_opt *tp)
+{
+#if 0
+	tcp_incr_quickack(tp);
+	tp->ack.pingpong = 0;
+	tp->ack.ato = TCP_ATO_MIN;
+#endif
+}
+
+/* Send ACKs quickly, if "quick" count is not exhausted
+ * and the session is not interactive.
+ */
+
+static __inline__ int tcp_in_quickack_mode(struct tcp_opt *tp)
+{
+#if 0
+	return (tp->ack.quick && !tp->ack.pingpong);
+#else
+  return 0;
+#endif
+}
+
+/* Buffer size and advertised window tuning.
+ *
+ * 1. Tuning sk->sndbuf, when connection enters established state.
+ */
+
+static void tcp_fixup_sndbuf(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	int sndmem = tp->mss_clamp+MAX_TCP_HEADER+16+sizeof(struct sk_buff);
+
+	if (sk->sndbuf < 3*sndmem)
+		sk->sndbuf = min(3*sndmem, sysctl_tcp_wmem[2]);
+#endif
+}
+
+/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
+ *
+ * All tcp_full_space() is split to two parts: "network" buffer, allocated
+ * forward and advertised in receiver window (tp->rcv_wnd) and
+ * "application buffer", required to isolate scheduling/application
+ * latencies from network.
+ * window_clamp is maximal advertised window. It can be less than
+ * tcp_full_space(), in this case tcp_full_space() - window_clamp
+ * is reserved for "application" buffer. The less window_clamp is
+ * the smoother our behaviour from viewpoint of network, but the lower
+ * throughput and the higher sensitivity of the connection to losses. 8)
+ *
+ * rcv_ssthresh is more strict window_clamp used at "slow start"
+ * phase to predict further behaviour of this connection.
+ * It is used for two goals:
+ * - to enforce header prediction at sender, even when application
+ *   requires some significant "application buffer". It is check #1.
+ * - to prevent pruning of receive queue because of misprediction
+ *   of receiver window. Check #2.
+ *
+ * The scheme does not work when sender sends good segments opening
+ * window and then starts to feed us spagetti. But it should work
+ * in common situations. Otherwise, we have to rely on queue collapsing.
+ */
+
+/* Slow part of check#2. */
+static int
+__tcp_grow_window(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb)
+{
+#if 0
+	/* Optimize this! */
+	int truesize = tcp_win_from_space(skb->truesize)/2;
+	int window = tcp_full_space(sk)/2;
+
+	while (tp->rcv_ssthresh <= window) {
+		if (truesize <= skb->len)
+			return 2*tp->ack.rcv_mss;
+
+		truesize >>= 1;
+		window >>= 1;
+	}
+	return 0;
+#else
+  return 0;
+#endif
+}
+
+static __inline__ void
+tcp_grow_window(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb)
+{
+#if 0
+	/* Check #1 */
+	if (tp->rcv_ssthresh < tp->window_clamp &&
+	    (int)tp->rcv_ssthresh < tcp_space(sk) &&
+	    !tcp_memory_pressure) {
+		int incr;
+
+		/* Check #2. Increase window, if skb with such overhead
+		 * will fit to rcvbuf in future.
+		 */
+		if (tcp_win_from_space(skb->truesize) <= skb->len)
+			incr = 2*tp->advmss;
+		else
+			incr = __tcp_grow_window(sk, tp, skb);
+
+		if (incr) {
+			tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp);
+			tp->ack.quick |= 1;
+		}
+	}
+#endif
+}
+
+/* 3. Tuning rcvbuf, when connection enters established state. */
+
+static void tcp_fixup_rcvbuf(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	int rcvmem = tp->advmss+MAX_TCP_HEADER+16+sizeof(struct sk_buff);
+
+	/* Try to select rcvbuf so that 4 mss-sized segments
+	 * will fit to window and correspoding skbs will fit to our rcvbuf.
+	 * (was 3; 4 is minimum to allow fast retransmit to work.)
+	 */
+	while (tcp_win_from_space(rcvmem) < tp->advmss)
+		rcvmem += 128;
+	if (sk->rcvbuf < 4*rcvmem)
+		sk->rcvbuf = min(4*rcvmem, sysctl_tcp_rmem[2]);
+#endif
+}
+
+/* 4. Try to fixup all. It is made iimediately after connection enters
+ *    established state.
+ */
+static void tcp_init_buffer_space(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	int maxwin;
+
+	if (!(sk->userlocks&SOCK_RCVBUF_LOCK))
+		tcp_fixup_rcvbuf(sk);
+	if (!(sk->userlocks&SOCK_SNDBUF_LOCK))
+		tcp_fixup_sndbuf(sk);
+
+	maxwin = tcp_full_space(sk);
+
+	if (tp->window_clamp >= maxwin) {
+		tp->window_clamp = maxwin;
+
+		if (sysctl_tcp_app_win && maxwin>4*tp->advmss)
+			tp->window_clamp = max(maxwin-(maxwin>>sysctl_tcp_app_win), 4*tp->advmss);
+	}
+
+	/* Force reservation of one segment. */
+	if (sysctl_tcp_app_win &&
+	    tp->window_clamp > 2*tp->advmss &&
+	    tp->window_clamp + tp->advmss > maxwin)
+		tp->window_clamp = max(2*tp->advmss, maxwin-tp->advmss);
+
+	tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+#endif
+}
+
+/* 5. Recalculate window clamp after socket hit its memory bounds. */
+static void tcp_clamp_window(struct sock *sk, struct tcp_opt *tp)
+{
+#if 0
+	struct sk_buff *skb;
+	unsigned int app_win = tp->rcv_nxt - tp->copied_seq;
+	int ofo_win = 0;
+
+	tp->ack.quick = 0;
+
+	skb_queue_walk(&tp->out_of_order_queue, skb) {
+		ofo_win += skb->len;
+	}
+
+	/* If overcommit is due to out of order segments,
+	 * do not clamp window. Try to expand rcvbuf instead.
+	 */
+	if (ofo_win) {
+		if (sk->rcvbuf < sysctl_tcp_rmem[2] &&
+		    !(sk->userlocks&SOCK_RCVBUF_LOCK) &&
+		    !tcp_memory_pressure &&
+		    atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0])
+			sk->rcvbuf = min(atomic_read(&sk->rmem_alloc), sysctl_tcp_rmem[2]);
+	}
+	if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf) {
+		app_win += ofo_win;
+		if (atomic_read(&sk->rmem_alloc) >= 2*sk->rcvbuf)
+			app_win >>= 1;
+		if (app_win > tp->ack.rcv_mss)
+			app_win -= tp->ack.rcv_mss;
+		app_win = max(app_win, 2U*tp->advmss);
+
+		if (!ofo_win)
+			tp->window_clamp = min(tp->window_clamp, app_win);
+		tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss);
+	}
+#endif
+}
+
+/* There is something which you must keep in mind when you analyze the
+ * behavior of the tp->ato delayed ack timeout interval.  When a
+ * connection starts up, we want to ack as quickly as possible.  The
+ * problem is that "good" TCP's do slow start at the beginning of data
+ * transmission.  The means that until we send the first few ACK's the
+ * sender will sit on his end and only queue most of his data, because
+ * he can only send snd_cwnd unacked packets at any given time.  For
+ * each ACK we send, he increments snd_cwnd and transmits more of his
+ * queue.  -DaveM
+ */
+static void tcp_event_data_recv(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb)
+{
+#if 0
+	u32 now;
+
+	tcp_schedule_ack(tp);
+
+	tcp_measure_rcv_mss(tp, skb);
+
+	now = tcp_time_stamp;
+
+	if (!tp->ack.ato) {
+		/* The _first_ data packet received, initialize
+		 * delayed ACK engine.
+		 */
+		tcp_incr_quickack(tp);
+		tp->ack.ato = TCP_ATO_MIN;
+	} else {
+		int m = now - tp->ack.lrcvtime;
+
+		if (m <= TCP_ATO_MIN/2) {
+			/* The fastest case is the first. */
+			tp->ack.ato = (tp->ack.ato>>1) + TCP_ATO_MIN/2;
+		} else if (m < tp->ack.ato) {
+			tp->ack.ato = (tp->ack.ato>>1) + m;
+			if (tp->ack.ato > tp->rto)
+				tp->ack.ato = tp->rto;
+		} else if (m > tp->rto) {
+			/* Too long gap. Apparently sender falled to
+			 * restart window, so that we send ACKs quickly.
+			 */
+			tcp_incr_quickack(tp);
+			tcp_mem_reclaim(sk);
+		}
+	}
+	tp->ack.lrcvtime = now;
+
+	TCP_ECN_check_ce(tp, skb);
+
+	if (skb->len >= 128)
+		tcp_grow_window(sk, tp, skb);
+#endif
+}
+
+/* Called to compute a smoothed rtt estimate. The data fed to this
+ * routine either comes from timestamps, or from segments that were
+ * known _not_ to have been retransmitted [see Karn/Partridge
+ * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88
+ * piece by Van Jacobson.
+ * NOTE: the next three routines used to be one big routine.
+ * To save cycles in the RFC 1323 implementation it was better to break
+ * it up into three procedures. -- erics
+ */
+static __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt)
+{
+#if 0
+	long m = mrtt; /* RTT */
+
+	/*	The following amusing code comes from Jacobson's
+	 *	article in SIGCOMM '88.  Note that rtt and mdev
+	 *	are scaled versions of rtt and mean deviation.
+	 *	This is designed to be as fast as possible 
+	 *	m stands for "measurement".
+	 *
+	 *	On a 1990 paper the rto value is changed to:
+	 *	RTO = rtt + 4 * mdev
+	 *
+	 * Funny. This algorithm seems to be very broken.
+	 * These formulae increase RTO, when it should be decreased, increase
+	 * too slowly, when it should be incresed fastly, decrease too fastly
+	 * etc. I guess in BSD RTO takes ONE value, so that it is absolutely
+	 * does not matter how to _calculate_ it. Seems, it was trap
+	 * that VJ failed to avoid. 8)
+	 */
+	if(m == 0)
+		m = 1;
+	if (tp->srtt != 0) {
+		m -= (tp->srtt >> 3);	/* m is now error in rtt est */
+		tp->srtt += m;		/* rtt = 7/8 rtt + 1/8 new */
+		if (m < 0) {
+			m = -m;		/* m is now abs(error) */
+			m -= (tp->mdev >> 2);   /* similar update on mdev */
+			/* This is similar to one of Eifel findings.
+			 * Eifel blocks mdev updates when rtt decreases.
+			 * This solution is a bit different: we use finer gain
+			 * for mdev in this case (alpha*beta).
+			 * Like Eifel it also prevents growth of rto,
+			 * but also it limits too fast rto decreases,
+			 * happening in pure Eifel.
+			 */
+			if (m > 0)
+				m >>= 3;
+		} else {
+			m -= (tp->mdev >> 2);   /* similar update on mdev */
+		}
+		tp->mdev += m;	    	/* mdev = 3/4 mdev + 1/4 new */
+		if (tp->mdev > tp->mdev_max) {
+			tp->mdev_max = tp->mdev;
+			if (tp->mdev_max > tp->rttvar)
+				tp->rttvar = tp->mdev_max;
+		}
+		if (after(tp->snd_una, tp->rtt_seq)) {
+			if (tp->mdev_max < tp->rttvar)
+				tp->rttvar -= (tp->rttvar-tp->mdev_max)>>2;
+			tp->rtt_seq = tp->snd_nxt;
+			tp->mdev_max = TCP_RTO_MIN;
+		}
+	} else {
+		/* no previous measure. */
+		tp->srtt = m<<3;	/* take the measured time to be rtt */
+		tp->mdev = m<<1;	/* make sure rto = 3*rtt */
+		tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
+		tp->rtt_seq = tp->snd_nxt;
+	}
+#endif
+}
+
+/* Calculate rto without backoff.  This is the second half of Van Jacobson's
+ * routine referred to above.
+ */
+static __inline__ void tcp_set_rto(struct tcp_opt *tp)
+{
+#if 0
+	/* Old crap is replaced with new one. 8)
+	 *
+	 * More seriously:
+	 * 1. If rtt variance happened to be less 50msec, it is hallucination.
+	 *    It cannot be less due to utterly erratic ACK generation made
+	 *    at least by solaris and freebsd. "Erratic ACKs" has _nothing_
+	 *    to do with delayed acks, because at cwnd>2 true delack timeout
+	 *    is invisible. Actually, Linux-2.4 also generates erratic
+	 *    ACKs in some curcumstances.
+	 */
+	tp->rto = (tp->srtt >> 3) + tp->rttvar;
+
+	/* 2. Fixups made earlier cannot be right.
+	 *    If we do not estimate RTO correctly without them,
+	 *    all the algo is pure shit and should be replaced
+	 *    with correct one. It is exaclty, which we pretend to do.
+	 */
+#endif
+}
+
+/* NOTE: clamping at TCP_RTO_MIN is not required, current algo
+ * guarantees that rto is higher.
+ */
+static __inline__ void tcp_bound_rto(struct tcp_opt *tp)
+{
+#if 0
+	if (tp->rto > TCP_RTO_MAX)
+		tp->rto = TCP_RTO_MAX;
+#endif
+}
+
+/* Save metrics learned by this TCP session.
+   This function is called only, when TCP finishes successfully
+   i.e. when it enters TIME-WAIT or goes from LAST-ACK to CLOSE.
+ */
+void tcp_update_metrics(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct dst_entry *dst = __sk_dst_get(sk);
+
+	dst_confirm(dst);
+
+	if (dst && (dst->flags&DST_HOST)) {
+		int m;
+
+		if (tp->backoff || !tp->srtt) {
+			/* This session failed to estimate rtt. Why?
+			 * Probably, no packets returned in time.
+			 * Reset our results.
+			 */
+			if (!(dst->mxlock&(1<<RTAX_RTT)))
+				dst->rtt = 0;
+			return;
+		}
+
+		m = dst->rtt - tp->srtt;
+
+		/* If newly calculated rtt larger than stored one,
+		 * store new one. Otherwise, use EWMA. Remember,
+		 * rtt overestimation is always better than underestimation.
+		 */
+		if (!(dst->mxlock&(1<<RTAX_RTT))) {
+			if (m <= 0)
+				dst->rtt = tp->srtt;
+			else
+				dst->rtt -= (m>>3);
+		}
+
+		if (!(dst->mxlock&(1<<RTAX_RTTVAR))) {
+			if (m < 0)
+				m = -m;
+
+			/* Scale deviation to rttvar fixed point */
+			m >>= 1;
+			if (m < tp->mdev)
+				m = tp->mdev;
+
+			if (m >= dst->rttvar)
+				dst->rttvar = m;
+			else
+				dst->rttvar -= (dst->rttvar - m)>>2;
+		}
+
+		if (tp->snd_ssthresh >= 0xFFFF) {
+			/* Slow start still did not finish. */
+			if (dst->ssthresh &&
+			    !(dst->mxlock&(1<<RTAX_SSTHRESH)) &&
+			    (tp->snd_cwnd>>1) > dst->ssthresh)
+				dst->ssthresh = (tp->snd_cwnd>>1);
+			if (!(dst->mxlock&(1<<RTAX_CWND)) &&
+			    tp->snd_cwnd > dst->cwnd)
+				dst->cwnd = tp->snd_cwnd;
+		} else if (tp->snd_cwnd > tp->snd_ssthresh &&
+			   tp->ca_state == TCP_CA_Open) {
+			/* Cong. avoidance phase, cwnd is reliable. */
+			if (!(dst->mxlock&(1<<RTAX_SSTHRESH)))
+				dst->ssthresh = max(tp->snd_cwnd>>1, tp->snd_ssthresh);
+			if (!(dst->mxlock&(1<<RTAX_CWND)))
+				dst->cwnd = (dst->cwnd + tp->snd_cwnd)>>1;
+		} else {
+			/* Else slow start did not finish, cwnd is non-sense,
+			   ssthresh may be also invalid.
+			 */
+			if (!(dst->mxlock&(1<<RTAX_CWND)))
+				dst->cwnd = (dst->cwnd + tp->snd_ssthresh)>>1;
+			if (dst->ssthresh &&
+			    !(dst->mxlock&(1<<RTAX_SSTHRESH)) &&
+			    tp->snd_ssthresh > dst->ssthresh)
+				dst->ssthresh = tp->snd_ssthresh;
+		}
+
+		if (!(dst->mxlock&(1<<RTAX_REORDERING))) {
+			if (dst->reordering < tp->reordering &&
+			    tp->reordering != sysctl_tcp_reordering)
+				dst->reordering = tp->reordering;
+		}
+	}
+#endif
+}
+
+/* Increase initial CWND conservatively: if estimated
+ * RTT is low enough (<20msec) or if we have some preset ssthresh.
+ *
+ * Numbers are taken from RFC2414.
+ */
+__u32 tcp_init_cwnd(struct tcp_opt *tp)
+{
+#if 0
+	__u32 cwnd;
+
+	if (tp->mss_cache > 1460)
+		return 2;
+
+	cwnd = (tp->mss_cache > 1095) ? 3 : 4;
+
+	if (!tp->srtt || (tp->snd_ssthresh >= 0xFFFF && tp->srtt > ((HZ/50)<<3)))
+		cwnd = 2;
+	else if (cwnd > tp->snd_ssthresh)
+		cwnd = tp->snd_ssthresh;
+
+	return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
+#else
+  return 0;
+#endif
+}
+
+/* Initialize metrics on socket. */
+
+static void tcp_init_metrics(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct dst_entry *dst = __sk_dst_get(sk);
+
+	if (dst == NULL)
+		goto reset;
+
+	dst_confirm(dst);
+
+	if (dst->mxlock&(1<<RTAX_CWND))
+		tp->snd_cwnd_clamp = dst->cwnd;
+	if (dst->ssthresh) {
+		tp->snd_ssthresh = dst->ssthresh;
+		if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
+			tp->snd_ssthresh = tp->snd_cwnd_clamp;
+	}
+	if (dst->reordering && tp->reordering != dst->reordering) {
+		tp->sack_ok &= ~2;
+		tp->reordering = dst->reordering;
+	}
+
+	if (dst->rtt == 0)
+		goto reset;
+
+	if (!tp->srtt && dst->rtt < (TCP_TIMEOUT_INIT<<3))
+		goto reset;
+
+	/* Initial rtt is determined from SYN,SYN-ACK.
+	 * The segment is small and rtt may appear much
+	 * less than real one. Use per-dst memory
+	 * to make it more realistic.
+	 *
+	 * A bit of theory. RTT is time passed after "normal" sized packet
+	 * is sent until it is ACKed. In normal curcumstances sending small
+	 * packets force peer to delay ACKs and calculation is correct too.
+	 * The algorithm is adaptive and, provided we follow specs, it
+	 * NEVER underestimate RTT. BUT! If peer tries to make some clever
+	 * tricks sort of "quick acks" for time long enough to decrease RTT
+	 * to low value, and then abruptly stops to do it and starts to delay
+	 * ACKs, wait for troubles.
+	 */
+	if (dst->rtt > tp->srtt)
+		tp->srtt = dst->rtt;
+	if (dst->rttvar > tp->mdev) {
+		tp->mdev = dst->rttvar;
+		tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
+	}
+	tcp_set_rto(tp);
+	tcp_bound_rto(tp);
+	if (tp->rto < TCP_TIMEOUT_INIT && !tp->saw_tstamp)
+		goto reset;
+	tp->snd_cwnd = tcp_init_cwnd(tp);
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+	return;
+
+reset:
+	/* Play conservative. If timestamps are not
+	 * supported, TCP will fail to recalculate correct
+	 * rtt, if initial rto is too small. FORGET ALL AND RESET!
+	 */
+	if (!tp->saw_tstamp && tp->srtt) {
+		tp->srtt = 0;
+		tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
+		tp->rto = TCP_TIMEOUT_INIT;
+	}
+#endif
+}
+
+static void tcp_update_reordering(struct tcp_opt *tp, int metric, int ts)
+{
+#if 0
+	if (metric > tp->reordering) {
+		tp->reordering = min(TCP_MAX_REORDERING, metric);
+
+		/* This exciting event is worth to be remembered. 8) */
+		if (ts)
+			NET_INC_STATS_BH(TCPTSReorder);
+		else if (IsReno(tp))
+			NET_INC_STATS_BH(TCPRenoReorder);
+		else if (IsFack(tp))
+			NET_INC_STATS_BH(TCPFACKReorder);
+		else
+			NET_INC_STATS_BH(TCPSACKReorder);
+#if FASTRETRANS_DEBUG > 1
+		printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
+		       tp->sack_ok, tp->ca_state,
+		       tp->reordering, tp->fackets_out, tp->sacked_out,
+		       tp->undo_marker ? tp->undo_retrans : 0);
+#endif
+		/* Disable FACK yet. */
+		tp->sack_ok &= ~2;
+	}
+#endif
+}
+
+/* This procedure tags the retransmission queue when SACKs arrive.
+ *
+ * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L).
+ * Packets in queue with these bits set are counted in variables
+ * sacked_out, retrans_out and lost_out, correspondingly.
+ *
+ * Valid combinations are:
+ * Tag  InFlight	Description
+ * 0	1		- orig segment is in flight.
+ * S	0		- nothing flies, orig reached receiver.
+ * L	0		- nothing flies, orig lost by net.
+ * R	2		- both orig and retransmit are in flight.
+ * L|R	1		- orig is lost, retransmit is in flight.
+ * S|R  1		- orig reached receiver, retrans is still in flight.
+ * (L|S|R is logically valid, it could occur when L|R is sacked,
+ *  but it is equivalent to plain S and code short-curcuits it to S.
+ *  L|S is logically invalid, it would mean -1 packet in flight 8))
+ *
+ * These 6 states form finite state machine, controlled by the following events:
+ * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue())
+ * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue())
+ * 3. Loss detection event of one of three flavors:
+ *	A. Scoreboard estimator decided the packet is lost.
+ *	   A'. Reno "three dupacks" marks head of queue lost.
+ *	   A''. Its FACK modfication, head until snd.fack is lost.
+ *	B. SACK arrives sacking data transmitted after never retransmitted
+ *	   hole was sent out.
+ *	C. SACK arrives sacking SND.NXT at the moment, when the
+ *	   segment was retransmitted.
+ * 4. D-SACK added new rule: D-SACK changes any tag to S.
+ *
+ * It is pleasant to note, that state diagram turns out to be commutative,
+ * so that we are allowed not to be bothered by order of our actions,
+ * when multiple events arrive simultaneously. (see the function below).
+ *
+ * Reordering detection.
+ * --------------------
+ * Reordering metric is maximal distance, which a packet can be displaced
+ * in packet stream. With SACKs we can estimate it:
+ *
+ * 1. SACK fills old hole and the corresponding segment was not
+ *    ever retransmitted -> reordering. Alas, we cannot use it
+ *    when segment was retransmitted.
+ * 2. The last flaw is solved with D-SACK. D-SACK arrives
+ *    for retransmitted and already SACKed segment -> reordering..
+ * Both of these heuristics are not used in Loss state, when we cannot
+ * account for retransmits accurately.
+ */
+static int
+tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked;
+	struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2);
+	int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
+	int reord = tp->packets_out;
+	int prior_fackets;
+	u32 lost_retrans = 0;
+	int flag = 0;
+	int i;
+
+	if (!tp->sacked_out)
+		tp->fackets_out = 0;
+	prior_fackets = tp->fackets_out;
+
+	for (i=0; i<num_sacks; i++, sp++) {
+		struct sk_buff *skb;
+		__u32 start_seq = ntohl(sp->start_seq);
+		__u32 end_seq = ntohl(sp->end_seq);
+		int fack_count = 0;
+		int dup_sack = 0;
+
+		/* Check for D-SACK. */
+		if (i == 0) {
+			u32 ack = TCP_SKB_CB(ack_skb)->ack_seq;
+
+			if (before(start_seq, ack)) {
+				dup_sack = 1;
+				tp->sack_ok |= 4;
+				NET_INC_STATS_BH(TCPDSACKRecv);
+			} else if (num_sacks > 1 &&
+				   !after(end_seq, ntohl(sp[1].end_seq)) &&
+				   !before(start_seq, ntohl(sp[1].start_seq))) {
+				dup_sack = 1;
+				tp->sack_ok |= 4;
+				NET_INC_STATS_BH(TCPDSACKOfoRecv);
+			}
+
+			/* D-SACK for already forgotten data...
+			 * Do dumb counting. */
+			if (dup_sack &&
+			    !after(end_seq, prior_snd_una) &&
+			    after(end_seq, tp->undo_marker))
+				tp->undo_retrans--;
+
+			/* Eliminate too old ACKs, but take into
+			 * account more or less fresh ones, they can
+			 * contain valid SACK info.
+			 */
+			if (before(ack, prior_snd_una-tp->max_window))
+				return 0;
+		}
+
+		/* Event "B" in the comment above. */
+		if (after(end_seq, tp->high_seq))
+			flag |= FLAG_DATA_LOST;
+
+		for_retrans_queue(skb, sk, tp) {
+			u8 sacked = TCP_SKB_CB(skb)->sacked;
+			int in_sack;
+
+			/* The retransmission queue is always in order, so
+			 * we can short-circuit the walk early.
+			 */
+			if(!before(TCP_SKB_CB(skb)->seq, end_seq))
+				break;
+
+			fack_count++;
+
+			in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
+				!before(end_seq, TCP_SKB_CB(skb)->end_seq);
+
+			/* Account D-SACK for retransmitted packet. */
+			if ((dup_sack && in_sack) &&
+			    (sacked & TCPCB_RETRANS) &&
+			    after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
+				tp->undo_retrans--;
+
+			/* The frame is ACKed. */
+			if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) {
+				if (sacked&TCPCB_RETRANS) {
+					if ((dup_sack && in_sack) &&
+					    (sacked&TCPCB_SACKED_ACKED))
+						reord = min(fack_count, reord);
+				} else {
+					/* If it was in a hole, we detected reordering. */
+					if (fack_count < prior_fackets &&
+					    !(sacked&TCPCB_SACKED_ACKED))
+						reord = min(fack_count, reord);
+				}
+
+				/* Nothing to do; acked frame is about to be dropped. */
+				continue;
+			}
+
+			if ((sacked&TCPCB_SACKED_RETRANS) &&
+			    after(end_seq, TCP_SKB_CB(skb)->ack_seq) &&
+			    (!lost_retrans || after(end_seq, lost_retrans)))
+				lost_retrans = end_seq;
+
+			if (!in_sack)
+				continue;
+
+			if (!(sacked&TCPCB_SACKED_ACKED)) {
+				if (sacked & TCPCB_SACKED_RETRANS) {
+					/* If the segment is not tagged as lost,
+					 * we do not clear RETRANS, believing
+					 * that retransmission is still in flight.
+					 */
+					if (sacked & TCPCB_LOST) {
+						TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
+						tp->lost_out--;
+						tp->retrans_out--;
+					}
+				} else {
+					/* New sack for not retransmitted frame,
+					 * which was in hole. It is reordering.
+					 */
+					if (!(sacked & TCPCB_RETRANS) &&
+					    fack_count < prior_fackets)
+						reord = min(fack_count, reord);
+
+					if (sacked & TCPCB_LOST) {
+						TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
+						tp->lost_out--;
+					}
+				}
+
+				TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
+				flag |= FLAG_DATA_SACKED;
+				tp->sacked_out++;
+
+				if (fack_count > tp->fackets_out)
+					tp->fackets_out = fack_count;
+			} else {
+				if (dup_sack && (sacked&TCPCB_RETRANS))
+					reord = min(fack_count, reord);
+			}
+
+			/* D-SACK. We can detect redundant retransmission
+			 * in S|R and plain R frames and clear it.
+			 * undo_retrans is decreased above, L|R frames
+			 * are accounted above as well.
+			 */
+			if (dup_sack &&
+			    (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {
+				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
+				tp->retrans_out--;
+			}
+		}
+	}
+
+	/* Check for lost retransmit. This superb idea is
+	 * borrowed from "ratehalving". Event "C".
+	 * Later note: FACK people cheated me again 8),
+	 * we have to account for reordering! Ugly,
+	 * but should help.
+	 */
+	if (lost_retrans && tp->ca_state == TCP_CA_Recovery) {
+		struct sk_buff *skb;
+
+		for_retrans_queue(skb, sk, tp) {
+			if (after(TCP_SKB_CB(skb)->seq, lost_retrans))
+				break;
+			if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
+				continue;
+			if ((TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) &&
+			    after(lost_retrans, TCP_SKB_CB(skb)->ack_seq) &&
+			    (IsFack(tp) ||
+			     !before(lost_retrans, TCP_SKB_CB(skb)->ack_seq+tp->reordering*tp->mss_cache))) {
+				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
+				tp->retrans_out--;
+
+				if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) {
+					tp->lost_out++;
+					TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
+					flag |= FLAG_DATA_SACKED;
+					NET_INC_STATS_BH(TCPLostRetransmit);
+				}
+			}
+		}
+	}
+
+	tp->left_out = tp->sacked_out + tp->lost_out;
+
+	if (reord < tp->fackets_out && tp->ca_state != TCP_CA_Loss)
+		tcp_update_reordering(tp, (tp->fackets_out+1)-reord, 0);
+
+#if FASTRETRANS_DEBUG > 0
+	BUG_TRAP((int)tp->sacked_out >= 0);
+	BUG_TRAP((int)tp->lost_out >= 0);
+	BUG_TRAP((int)tp->retrans_out >= 0);
+	BUG_TRAP((int)tcp_packets_in_flight(tp) >= 0);
+#endif
+	return flag;
+#else
+  return 0;
+#endif
+}
+
+void tcp_clear_retrans(struct tcp_opt *tp)
+{
+#if 0
+	tp->left_out = 0;
+	tp->retrans_out = 0;
+
+	tp->fackets_out = 0;
+	tp->sacked_out = 0;
+	tp->lost_out = 0;
+
+	tp->undo_marker = 0;
+	tp->undo_retrans = 0;
+#endif
+}
+
+/* Enter Loss state. If "how" is not zero, forget all SACK information
+ * and reset tags completely, otherwise preserve SACKs. If receiver
+ * dropped its ofo queue, we will know this due to reneging detection.
+ */
+void tcp_enter_loss(struct sock *sk, int how)
+{
+#if 0
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	struct sk_buff *skb;
+	int cnt = 0;
+
+	/* Reduce ssthresh if it has not yet been made inside this window. */
+	if (tp->ca_state <= TCP_CA_Disorder ||
+	    tp->snd_una == tp->high_seq ||
+	    (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) {
+		tp->prior_ssthresh = tcp_current_ssthresh(tp);
+		tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
+	}
+	tp->snd_cwnd = 1;
+	tp->snd_cwnd_cnt = 0;
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+
+	tcp_clear_retrans(tp);
+
+	/* Push undo marker, if it was plain RTO and nothing
+	 * was retransmitted. */
+	if (!how)
+		tp->undo_marker = tp->snd_una;
+
+	for_retrans_queue(skb, sk, tp) {
+		cnt++;
+		if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
+			tp->undo_marker = 0;
+		TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
+		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
+			TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
+			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
+			tp->lost_out++;
+		} else {
+			tp->sacked_out++;
+			tp->fackets_out = cnt;
+		}
+	}
+	tcp_sync_left_out(tp);
+
+	tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering);
+	tp->ca_state = TCP_CA_Loss;
+	tp->high_seq = tp->snd_nxt;
+	TCP_ECN_queue_cwr(tp);
+#endif
+}
+
+static int tcp_check_sack_reneging(struct sock *sk, struct tcp_opt *tp)
+{
+#if 0
+	struct sk_buff *skb;
+
+	/* If ACK arrived pointing to a remembered SACK,
+	 * it means that our remembered SACKs do not reflect
+	 * real state of receiver i.e.
+	 * receiver _host_ is heavily congested (or buggy).
+	 * Do processing similar to RTO timeout.
+	 */
+	if ((skb = skb_peek(&sk->write_queue)) != NULL &&
+	    (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
+		NET_INC_STATS_BH(TCPSACKReneging);
+
+		tcp_enter_loss(sk, 1);
+		tp->retransmits++;
+		tcp_retransmit_skb(sk, skb_peek(&sk->write_queue));
+		tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+		return 1;
+	}
+	return 0;
+#else
+  return 0;
+#endif
+}
+
+static inline int tcp_fackets_out(struct tcp_opt *tp)
+{
+#if 0
+	return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out;
+#else
+  return 0;
+#endif
+}
+
+static inline int tcp_skb_timedout(struct tcp_opt *tp, struct sk_buff *skb)
+{
+#if 0
+	return (tcp_time_stamp - TCP_SKB_CB(skb)->when > tp->rto);
+#else
+  return 0;
+#endif
+}
+
+static inline int tcp_head_timedout(struct sock *sk, struct tcp_opt *tp)
+{
+#if 0
+	return tp->packets_out && tcp_skb_timedout(tp, skb_peek(&sk->write_queue));
+#else
+  return 0;
+#endif
+}
+
+/* Linux NewReno/SACK/FACK/ECN state machine.
+ * --------------------------------------
+ *
+ * "Open"	Normal state, no dubious events, fast path.
+ * "Disorder"   In all the respects it is "Open",
+ *		but requires a bit more attention. It is entered when
+ *		we see some SACKs or dupacks. It is split of "Open"
+ *		mainly to move some processing from fast path to slow one.
+ * "CWR"	CWND was reduced due to some Congestion Notification event.
+ *		It can be ECN, ICMP source quench, local device congestion.
+ * "Recovery"	CWND was reduced, we are fast-retransmitting.
+ * "Loss"	CWND was reduced due to RTO timeout or SACK reneging.
+ *
+ * tcp_fastretrans_alert() is entered:
+ * - each incoming ACK, if state is not "Open"
+ * - when arrived ACK is unusual, namely:
+ *	* SACK
+ *	* Duplicate ACK.
+ *	* ECN ECE.
+ *
+ * Counting packets in flight is pretty simple.
+ *
+ *	in_flight = packets_out - left_out + retrans_out
+ *
+ *	packets_out is SND.NXT-SND.UNA counted in packets.
+ *
+ *	retrans_out is number of retransmitted segments.
+ *
+ *	left_out is number of segments left network, but not ACKed yet.
+ *
+ *		left_out = sacked_out + lost_out
+ *
+ *     sacked_out: Packets, which arrived to receiver out of order
+ *		   and hence not ACKed. With SACKs this number is simply
+ *		   amount of SACKed data. Even without SACKs
+ *		   it is easy to give pretty reliable estimate of this number,
+ *		   counting duplicate ACKs.
+ *
+ *       lost_out: Packets lost by network. TCP has no explicit
+ *		   "loss notification" feedback from network (for now).
+ *		   It means that this number can be only _guessed_.
+ *		   Actually, it is the heuristics to predict lossage that
+ *		   distinguishes different algorithms.
+ *
+ *	F.e. after RTO, when all the queue is considered as lost,
+ *	lost_out = packets_out and in_flight = retrans_out.
+ *
+ *		Essentially, we have now two algorithms counting
+ *		lost packets.
+ *
+ *		FACK: It is the simplest heuristics. As soon as we decided
+ *		that something is lost, we decide that _all_ not SACKed
+ *		packets until the most forward SACK are lost. I.e.
+ *		lost_out = fackets_out - sacked_out and left_out = fackets_out.
+ *		It is absolutely correct estimate, if network does not reorder
+ *		packets. And it loses any connection to reality when reordering
+ *		takes place. We use FACK by default until reordering
+ *		is suspected on the path to this destination.
+ *
+ *		NewReno: when Recovery is entered, we assume that one segment
+ *		is lost (classic Reno). While we are in Recovery and
+ *		a partial ACK arrives, we assume that one more packet
+ *		is lost (NewReno). This heuristics are the same in NewReno
+ *		and SACK.
+ *
+ *  Imagine, that's all! Forget about all this shamanism about CWND inflation
+ *  deflation etc. CWND is real congestion window, never inflated, changes
+ *  only according to classic VJ rules.
+ *
+ * Really tricky (and requiring careful tuning) part of algorithm
+ * is hidden in functions tcp_time_to_recover() and tcp_xmit_retransmit_queue().
+ * The first determines the moment _when_ we should reduce CWND and,
+ * hence, slow down forward transmission. In fact, it determines the moment
+ * when we decide that hole is caused by loss, rather than by a reorder.
+ *
+ * tcp_xmit_retransmit_queue() decides, _what_ we should retransmit to fill
+ * holes, caused by lost packets.
+ *
+ * And the most logically complicated part of algorithm is undo
+ * heuristics. We detect false retransmits due to both too early
+ * fast retransmit (reordering) and underestimated RTO, analyzing
+ * timestamps and D-SACKs. When we detect that some segments were
+ * retransmitted by mistake and CWND reduction was wrong, we undo
+ * window reduction and abort recovery phase. This logic is hidden
+ * inside several functions named tcp_try_undo_<something>.
+ */
+
+/* This function decides, when we should leave Disordered state
+ * and enter Recovery phase, reducing congestion window.
+ *
+ * Main question: may we further continue forward transmission
+ * with the same cwnd?
+ */
+static int
+tcp_time_to_recover(struct sock *sk, struct tcp_opt *tp)
+{
+#if 0
+	/* Trick#1: The loss is proven. */
+	if (tp->lost_out)
+		return 1;
+
+	/* Not-A-Trick#2 : Classic rule... */
+	if (tcp_fackets_out(tp) > tp->reordering)
+		return 1;
+
+	/* Trick#3 : when we use RFC2988 timer restart, fast
+	 * retransmit can be triggered by timeout of queue head.
+	 */
+	if (tcp_head_timedout(sk, tp))
+		return 1;
+
+	/* Trick#4: It is still not OK... But will it be useful to delay
+	 * recovery more?
+	 */
+	if (tp->packets_out <= tp->reordering &&
+	    tp->sacked_out >= max_t(__u32, tp->packets_out/2, sysctl_tcp_reordering) &&
+	    !tcp_may_send_now(sk, tp)) {
+		/* We have nothing to send. This connection is limited
+		 * either by receiver window or by application.
+		 */
+		return 1;
+	}
+
+	return 0;
+#else
+  return 0;
+#endif
+}
+
+/* If we receive more dupacks than we expected counting segments
+ * in assumption of absent reordering, interpret this as reordering.
+ * The only another reason could be bug in receiver TCP.
+ */
+static void tcp_check_reno_reordering(struct tcp_opt *tp, int addend)
+{
+#if 0
+	u32 holes;
+
+	holes = max(tp->lost_out, 1U);
+	holes = min(holes, tp->packets_out);
+
+	if (tp->sacked_out + holes > tp->packets_out) {
+		tp->sacked_out = tp->packets_out - holes;
+		tcp_update_reordering(tp, tp->packets_out+addend, 0);
+	}
+#endif
+}
+
+/* Emulate SACKs for SACKless connection: account for a new dupack. */
+
+static void tcp_add_reno_sack(struct tcp_opt *tp)
+{
+#if 0
+	++tp->sacked_out;
+	tcp_check_reno_reordering(tp, 0);
+	tcp_sync_left_out(tp);
+#endif
+}
+
+/* Account for ACK, ACKing some data in Reno Recovery phase. */
+
+static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_opt *tp, int acked)
+{
+#if 0
+	if (acked > 0) {
+		/* One ACK acked hole. The rest eat duplicate ACKs. */
+		if (acked-1 >= tp->sacked_out)
+			tp->sacked_out = 0;
+		else
+			tp->sacked_out -= acked-1;
+	}
+	tcp_check_reno_reordering(tp, acked);
+	tcp_sync_left_out(tp);
+#endif
+}
+
+static inline void tcp_reset_reno_sack(struct tcp_opt *tp)
+{
+#if 0
+	tp->sacked_out = 0;
+	tp->left_out = tp->lost_out;
+#endif
+}
+
+/* Mark head of queue up as lost. */
+static void
+tcp_mark_head_lost(struct sock *sk, struct tcp_opt *tp, int packets, u32 high_seq)
+{
+#if 0
+	struct sk_buff *skb;
+	int cnt = packets;
+
+	BUG_TRAP(cnt <= tp->packets_out);
+
+	for_retrans_queue(skb, sk, tp) {
+		if (--cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq))
+			break;
+		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
+			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
+			tp->lost_out++;
+		}
+	}
+	tcp_sync_left_out(tp);
+#endif
+}
+
+/* Account newly detected lost packet(s) */
+
+static void tcp_update_scoreboard(struct sock *sk, struct tcp_opt *tp)
+{
+#if 0
+	if (IsFack(tp)) {
+		int lost = tp->fackets_out - tp->reordering;
+		if (lost <= 0)
+			lost = 1;
+		tcp_mark_head_lost(sk, tp, lost, tp->high_seq);
+	} else {
+		tcp_mark_head_lost(sk, tp, 1, tp->high_seq);
+	}
+
+	/* New heuristics: it is possible only after we switched
+	 * to restart timer each time when something is ACKed.
+	 * Hence, we can detect timed out packets during fast
+	 * retransmit without falling to slow start.
+	 */
+	if (tcp_head_timedout(sk, tp)) {
+		struct sk_buff *skb;
+
+		for_retrans_queue(skb, sk, tp) {
+			if (tcp_skb_timedout(tp, skb) &&
+			    !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
+				TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
+				tp->lost_out++;
+			}
+		}
+		tcp_sync_left_out(tp);
+	}
+#endif
+}
+
+/* CWND moderation, preventing bursts due to too big ACKs
+ * in dubious situations.
+ */
+static __inline__ void tcp_moderate_cwnd(struct tcp_opt *tp)
+{
+#if 0
+	tp->snd_cwnd = min(tp->snd_cwnd,
+			   tcp_packets_in_flight(tp)+tcp_max_burst(tp));
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+#endif
+}
+
+/* Decrease cwnd each second ack. */
+
+static void tcp_cwnd_down(struct tcp_opt *tp)
+{
+#if 0
+	int decr = tp->snd_cwnd_cnt + 1;
+
+	tp->snd_cwnd_cnt = decr&1;
+	decr >>= 1;
+
+	if (decr && tp->snd_cwnd > tp->snd_ssthresh/2)
+		tp->snd_cwnd -= decr;
+
+	tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1);
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+#endif
+}
+
+/* Nothing was retransmitted or returned timestamp is less
+ * than timestamp of the first retransmission.
+ */
+static __inline__ int tcp_packet_delayed(struct tcp_opt *tp)
+{
+#if 0
+	return !tp->retrans_stamp ||
+		(tp->saw_tstamp && tp->rcv_tsecr &&
+		 (__s32)(tp->rcv_tsecr - tp->retrans_stamp) < 0);
+#else
+  return 0;
+#endif
+}
+
+/* Undo procedures. */
+
+#if FASTRETRANS_DEBUG > 1
+static void DBGUNDO(struct sock *sk, struct tcp_opt *tp, const char *msg)
+{
+#if 0
+	printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n",
+	       msg,
+	       NIPQUAD(sk->daddr), ntohs(sk->dport),
+	       tp->snd_cwnd, tp->left_out,
+	       tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out);
+#endif
+}
+#else
+#define DBGUNDO(x...) do { } while (0)
+#endif
+
+static void tcp_undo_cwr(struct tcp_opt *tp, int undo)
+{
+#if 0
+	if (tp->prior_ssthresh) {
+		tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1);
+
+		if (undo && tp->prior_ssthresh > tp->snd_ssthresh) {
+			tp->snd_ssthresh = tp->prior_ssthresh;
+			TCP_ECN_withdraw_cwr(tp);
+		}
+	} else {
+		tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
+	}
+	tcp_moderate_cwnd(tp);
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+#endif
+}
+
+static inline int tcp_may_undo(struct tcp_opt *tp)
+{
+#if 0
+	return tp->undo_marker &&
+		(!tp->undo_retrans || tcp_packet_delayed(tp));
+#else
+  return 0;
+#endif
+}
+
+/* People celebrate: "We love our President!" */
+static int tcp_try_undo_recovery(struct sock *sk, struct tcp_opt *tp)
+{
+#if 0
+	if (tcp_may_undo(tp)) {
+		/* Happy end! We did not retransmit anything
+		 * or our original transmission succeeded.
+		 */
+		DBGUNDO(sk, tp, tp->ca_state == TCP_CA_Loss ? "loss" : "retrans");
+		tcp_undo_cwr(tp, 1);
+		if (tp->ca_state == TCP_CA_Loss)
+			NET_INC_STATS_BH(TCPLossUndo);
+		else
+			NET_INC_STATS_BH(TCPFullUndo);
+		tp->undo_marker = 0;
+	}
+	if (tp->snd_una == tp->high_seq && IsReno(tp)) {
+		/* Hold old state until something *above* high_seq
+		 * is ACKed. For Reno it is MUST to prevent false
+		 * fast retransmits (RFC2582). SACK TCP is safe. */
+		tcp_moderate_cwnd(tp);
+		return 1;
+	}
+	tp->ca_state = TCP_CA_Open;
+	return 0;
+#else
+  return 0;
+#endif
+}
+
+/* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */
+static void tcp_try_undo_dsack(struct sock *sk, struct tcp_opt *tp)
+{
+#if 0
+	if (tp->undo_marker && !tp->undo_retrans) {
+		DBGUNDO(sk, tp, "D-SACK");
+		tcp_undo_cwr(tp, 1);
+		tp->undo_marker = 0;
+		NET_INC_STATS_BH(TCPDSACKUndo);
+	}
+#endif
+}
+
+/* Undo during fast recovery after partial ACK. */
+
+static int tcp_try_undo_partial(struct sock *sk, struct tcp_opt *tp, int acked)
+{
+#if 0
+	/* Partial ACK arrived. Force Hoe's retransmit. */
+	int failed = IsReno(tp) || tp->fackets_out>tp->reordering;
+
+	if (tcp_may_undo(tp)) {
+		/* Plain luck! Hole if filled with delayed
+		 * packet, rather than with a retransmit.
+		 */
+		if (tp->retrans_out == 0)
+			tp->retrans_stamp = 0;
+
+		tcp_update_reordering(tp, tcp_fackets_out(tp)+acked, 1);
+
+		DBGUNDO(sk, tp, "Hoe");
+		tcp_undo_cwr(tp, 0);
+		NET_INC_STATS_BH(TCPPartialUndo);
+
+		/* So... Do not make Hoe's retransmit yet.
+		 * If the first packet was delayed, the rest
+		 * ones are most probably delayed as well.
+		 */
+		failed = 0;
+	}
+	return failed;
+#else
+  return 0;
+#endif
+}
+
+/* Undo during loss recovery after partial ACK. */
+static int tcp_try_undo_loss(struct sock *sk, struct tcp_opt *tp)
+{
+#if 0
+	if (tcp_may_undo(tp)) {
+		struct sk_buff *skb;
+		for_retrans_queue(skb, sk, tp) {
+			TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
+		}
+		DBGUNDO(sk, tp, "partial loss");
+		tp->lost_out = 0;
+		tp->left_out = tp->sacked_out;
+		tcp_undo_cwr(tp, 1);
+		NET_INC_STATS_BH(TCPLossUndo);
+		tp->retransmits = 0;
+		tp->undo_marker = 0;
+		if (!IsReno(tp))
+			tp->ca_state = TCP_CA_Open;
+		return 1;
+	}
+	return 0;
+#else
+  return 0;
+#endif
+}
+
+static __inline__ void tcp_complete_cwr(struct tcp_opt *tp)
+{
+#if 0
+	tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+#endif
+}
+
+static void tcp_try_to_open(struct sock *sk, struct tcp_opt *tp, int flag)
+{
+#if 0
+	tp->left_out = tp->sacked_out;
+
+	if (tp->retrans_out == 0)
+		tp->retrans_stamp = 0;
+
+	if (flag&FLAG_ECE)
+		tcp_enter_cwr(tp);
+
+	if (tp->ca_state != TCP_CA_CWR) {
+		int state = TCP_CA_Open;
+
+		if (tp->left_out ||
+		    tp->retrans_out ||
+		    tp->undo_marker)
+			state = TCP_CA_Disorder;
+
+		if (tp->ca_state != state) {
+			tp->ca_state = state;
+			tp->high_seq = tp->snd_nxt;
+		}
+		tcp_moderate_cwnd(tp);
+	} else {
+		tcp_cwnd_down(tp);
+	}
+#endif
+}
+
+/* Process an event, which can update packets-in-flight not trivially.
+ * Main goal of this function is to calculate new estimate for left_out,
+ * taking into account both packets sitting in receiver's buffer and
+ * packets lost by network.
+ *
+ * Besides that it does CWND reduction, when packet loss is detected
+ * and changes state of machine.
+ *
+ * It does _not_ decide what to send, it is made in function
+ * tcp_xmit_retransmit_queue().
+ */
+static void
+tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
+		      int prior_packets, int flag)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	int is_dupack = (tp->snd_una == prior_snd_una && !(flag&FLAG_NOT_DUP));
+
+	/* Some technical things:
+	 * 1. Reno does not count dupacks (sacked_out) automatically. */
+	if (!tp->packets_out)
+		tp->sacked_out = 0;
+        /* 2. SACK counts snd_fack in packets inaccurately. */
+	if (tp->sacked_out == 0)
+		tp->fackets_out = 0;
+
+        /* Now state machine starts.
+	 * A. ECE, hence prohibit cwnd undoing, the reduction is required. */
+	if (flag&FLAG_ECE)
+		tp->prior_ssthresh = 0;
+
+	/* B. In all the states check for reneging SACKs. */
+	if (tp->sacked_out && tcp_check_sack_reneging(sk, tp))
+		return;
+
+	/* C. Process data loss notification, provided it is valid. */
+	if ((flag&FLAG_DATA_LOST) &&
+	    before(tp->snd_una, tp->high_seq) &&
+	    tp->ca_state != TCP_CA_Open &&
+	    tp->fackets_out > tp->reordering) {
+		tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq);
+		NET_INC_STATS_BH(TCPLoss);
+	}
+
+	/* D. Synchronize left_out to current state. */
+	tcp_sync_left_out(tp);
+
+	/* E. Check state exit conditions. State can be terminated
+	 *    when high_seq is ACKed. */
+	if (tp->ca_state == TCP_CA_Open) {
+		BUG_TRAP(tp->retrans_out == 0);
+		tp->retrans_stamp = 0;
+	} else if (!before(tp->snd_una, tp->high_seq)) {
+		switch (tp->ca_state) {
+		case TCP_CA_Loss:
+			tp->retransmits = 0;
+			if (tcp_try_undo_recovery(sk, tp))
+				return;
+			break;
+
+		case TCP_CA_CWR:
+			/* CWR is to be held something *above* high_seq
+			 * is ACKed for CWR bit to reach receiver. */
+			if (tp->snd_una != tp->high_seq) {
+				tcp_complete_cwr(tp);
+				tp->ca_state = TCP_CA_Open;
+			}
+			break;
+
+		case TCP_CA_Disorder:
+			tcp_try_undo_dsack(sk, tp);
+			if (!tp->undo_marker ||
+			    /* For SACK case do not Open to allow to undo
+			     * catching for all duplicate ACKs. */
+			    IsReno(tp) || tp->snd_una != tp->high_seq) {
+				tp->undo_marker = 0;
+				tp->ca_state = TCP_CA_Open;
+			}
+			break;
+
+		case TCP_CA_Recovery:
+			if (IsReno(tp))
+				tcp_reset_reno_sack(tp);
+			if (tcp_try_undo_recovery(sk, tp))
+				return;
+			tcp_complete_cwr(tp);
+			break;
+		}
+	}
+
+	/* F. Process state. */
+	switch (tp->ca_state) {
+	case TCP_CA_Recovery:
+		if (prior_snd_una == tp->snd_una) {
+			if (IsReno(tp) && is_dupack)
+				tcp_add_reno_sack(tp);
+		} else {
+			int acked = prior_packets - tp->packets_out;
+			if (IsReno(tp))
+				tcp_remove_reno_sacks(sk, tp, acked);
+			is_dupack = tcp_try_undo_partial(sk, tp, acked);
+		}
+		break;
+	case TCP_CA_Loss:
+		if (flag&FLAG_DATA_ACKED)
+			tp->retransmits = 0;
+		if (!tcp_try_undo_loss(sk, tp)) {
+			tcp_moderate_cwnd(tp);
+			tcp_xmit_retransmit_queue(sk);
+			return;
+		}
+		if (tp->ca_state != TCP_CA_Open)
+			return;
+		/* Loss is undone; fall through to processing in Open state. */
+	default:
+		if (IsReno(tp)) {
+			if (tp->snd_una != prior_snd_una)
+				tcp_reset_reno_sack(tp);
+			if (is_dupack)
+				tcp_add_reno_sack(tp);
+		}
+
+		if (tp->ca_state == TCP_CA_Disorder)
+			tcp_try_undo_dsack(sk, tp);
+
+		if (!tcp_time_to_recover(sk, tp)) {
+			tcp_try_to_open(sk, tp, flag);
+			return;
+		}
+
+		/* Otherwise enter Recovery state */
+
+		if (IsReno(tp))
+			NET_INC_STATS_BH(TCPRenoRecovery);
+		else
+			NET_INC_STATS_BH(TCPSackRecovery);
+
+		tp->high_seq = tp->snd_nxt;
+		tp->prior_ssthresh = 0;
+		tp->undo_marker = tp->snd_una;
+		tp->undo_retrans = tp->retrans_out;
+
+		if (tp->ca_state < TCP_CA_CWR) {
+			if (!(flag&FLAG_ECE))
+				tp->prior_ssthresh = tcp_current_ssthresh(tp);
+			tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
+			TCP_ECN_queue_cwr(tp);
+		}
+
+		tp->snd_cwnd_cnt = 0;
+		tp->ca_state = TCP_CA_Recovery;
+	}
+
+	if (is_dupack || tcp_head_timedout(sk, tp))
+		tcp_update_scoreboard(sk, tp);
+	tcp_cwnd_down(tp);
+	tcp_xmit_retransmit_queue(sk);
+#endif
+}
+
+/* Read draft-ietf-tcplw-high-performance before mucking
+ * with this code. (Superceeds RFC1323)
+ */
+static void tcp_ack_saw_tstamp(struct tcp_opt *tp, int flag)
+{
+#if 0
+	__u32 seq_rtt;
+
+	/* RTTM Rule: A TSecr value received in a segment is used to
+	 * update the averaged RTT measurement only if the segment
+	 * acknowledges some new data, i.e., only if it advances the
+	 * left edge of the send window.
+	 *
+	 * See draft-ietf-tcplw-high-performance-00, section 3.3.
+	 * 1998/04/10 Andrey V. Savochkin <saw@msu.ru>
+	 *
+	 * Changed: reset backoff as soon as we see the first valid sample.
+	 * If we do not, we get strongly overstimated rto. With timestamps
+	 * samples are accepted even from very old segments: f.e., when rtt=1
+	 * increases to 8, we retransmit 5 times and after 8 seconds delayed
+	 * answer arrives rto becomes 120 seconds! If at least one of segments
+	 * in window is lost... Voila.	 			--ANK (010210)
+	 */
+	seq_rtt = tcp_time_stamp - tp->rcv_tsecr;
+	tcp_rtt_estimator(tp, seq_rtt);
+	tcp_set_rto(tp);
+	tp->backoff = 0;
+	tcp_bound_rto(tp);
+#endif
+}
+
+static void tcp_ack_no_tstamp(struct tcp_opt *tp, u32 seq_rtt, int flag)
+{
+#if 0
+	/* We don't have a timestamp. Can only use
+	 * packets that are not retransmitted to determine
+	 * rtt estimates. Also, we must not reset the
+	 * backoff for rto until we get a non-retransmitted
+	 * packet. This allows us to deal with a situation
+	 * where the network delay has increased suddenly.
+	 * I.e. Karn's algorithm. (SIGCOMM '87, p5.)
+	 */
+
+	if (flag & FLAG_RETRANS_DATA_ACKED)
+		return;
+
+	tcp_rtt_estimator(tp, seq_rtt);
+	tcp_set_rto(tp);
+	tp->backoff = 0;
+	tcp_bound_rto(tp);
+#endif
+}
+
+static __inline__ void
+tcp_ack_update_rtt(struct tcp_opt *tp, int flag, s32 seq_rtt)
+{
+#if 0
+	/* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */
+	if (tp->saw_tstamp && tp->rcv_tsecr)
+		tcp_ack_saw_tstamp(tp, flag);
+	else if (seq_rtt >= 0)
+		tcp_ack_no_tstamp(tp, seq_rtt, flag);
+#endif
+}
+
+/* This is Jacobson's slow start and congestion avoidance. 
+ * SIGCOMM '88, p. 328.
+ */
+static __inline__ void tcp_cong_avoid(struct tcp_opt *tp)
+{
+#if 0
+        if (tp->snd_cwnd <= tp->snd_ssthresh) {
+                /* In "safe" area, increase. */
+		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+			tp->snd_cwnd++;
+	} else {
+                /* In dangerous area, increase slowly.
+		 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
+		 */
+		if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+				tp->snd_cwnd++;
+			tp->snd_cwnd_cnt=0;
+		} else
+			tp->snd_cwnd_cnt++;
+        }
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+#endif
+}
+
+/* Restart timer after forward progress on connection.
+ * RFC2988 recommends to restart timer to now+rto.
+ */
+
+static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp)
+{
+#if 0
+	if (tp->packets_out==0) {
+		tcp_clear_xmit_timer(sk, TCP_TIME_RETRANS);
+	} else {
+		tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+	}
+#endif
+}
+
+/* Remove acknowledged frames from the retransmission queue. */
+static int tcp_clean_rtx_queue(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct sk_buff *skb;
+	__u32 now = tcp_time_stamp;
+	int acked = 0;
+	__s32 seq_rtt = -1;
+
+	while((skb=skb_peek(&sk->write_queue)) && (skb != tp->send_head)) {
+		struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 
+		__u8 sacked = scb->sacked;
+
+		/* If our packet is before the ack sequence we can
+		 * discard it as it's confirmed to have arrived at
+		 * the other end.
+		 */
+		if (after(scb->end_seq, tp->snd_una))
+			break;
+
+		/* Initial outgoing SYN's get put onto the write_queue
+		 * just like anything else we transmit.  It is not
+		 * true data, and if we misinform our callers that
+		 * this ACK acks real data, we will erroneously exit
+		 * connection startup slow start one packet too
+		 * quickly.  This is severely frowned upon behavior.
+		 */
+		if(!(scb->flags & TCPCB_FLAG_SYN)) {
+			acked |= FLAG_DATA_ACKED;
+		} else {
+			acked |= FLAG_SYN_ACKED;
+			tp->retrans_stamp = 0;
+		}
+
+		if (sacked) {
+			if(sacked & TCPCB_RETRANS) {
+				if(sacked & TCPCB_SACKED_RETRANS)
+					tp->retrans_out--;
+				acked |= FLAG_RETRANS_DATA_ACKED;
+				seq_rtt = -1;
+			} else if (seq_rtt < 0)
+				seq_rtt = now - scb->when;
+			if(sacked & TCPCB_SACKED_ACKED)
+				tp->sacked_out--;
+			if(sacked & TCPCB_LOST)
+				tp->lost_out--;
+			if(sacked & TCPCB_URG) {
+				if (tp->urg_mode &&
+				    !before(scb->end_seq, tp->snd_up))
+					tp->urg_mode = 0;
+			}
+		} else if (seq_rtt < 0)
+			seq_rtt = now - scb->when;
+		if(tp->fackets_out)
+			tp->fackets_out--;
+		tp->packets_out--;
+		__skb_unlink(skb, skb->list);
+		tcp_free_skb(sk, skb);
+	}
+
+	if (acked&FLAG_ACKED) {
+		tcp_ack_update_rtt(tp, acked, seq_rtt);
+		tcp_ack_packets_out(sk, tp);
+	}
+
+#if FASTRETRANS_DEBUG > 0
+	BUG_TRAP((int)tp->sacked_out >= 0);
+	BUG_TRAP((int)tp->lost_out >= 0);
+	BUG_TRAP((int)tp->retrans_out >= 0);
+	if (tp->packets_out==0 && tp->sack_ok) {
+		if (tp->lost_out) {
+			printk(KERN_DEBUG "Leak l=%u %d\n", tp->lost_out, tp->ca_state);
+			tp->lost_out = 0;
+		}
+		if (tp->sacked_out) {
+			printk(KERN_DEBUG "Leak s=%u %d\n", tp->sacked_out, tp->ca_state);
+			tp->sacked_out = 0;
+		}
+		if (tp->retrans_out) {
+			printk(KERN_DEBUG "Leak r=%u %d\n", tp->retrans_out, tp->ca_state);
+			tp->retrans_out = 0;
+		}
+	}
+#endif
+	return acked;
+#else
+  return 0;
+#endif
+}
+
+static void tcp_ack_probe(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	/* Was it a usable window open? */
+
+	if (!after(TCP_SKB_CB(tp->send_head)->end_seq, tp->snd_una + tp->snd_wnd)) {
+		tp->backoff = 0;
+		tcp_clear_xmit_timer(sk, TCP_TIME_PROBE0);
+		/* Socket must be waked up by subsequent tcp_data_snd_check().
+		 * This function is not for random using!
+		 */
+	} else {
+		tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0,
+				     min(tp->rto << tp->backoff, TCP_RTO_MAX));
+	}
+#endif
+}
+
+static __inline__ int tcp_ack_is_dubious(struct tcp_opt *tp, int flag)
+{
+#if 0
+	return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
+		tp->ca_state != TCP_CA_Open);
+#else
+  return 0;
+#endif
+}
+
+static __inline__ int tcp_may_raise_cwnd(struct tcp_opt *tp, int flag)
+{
+#if 0
+	return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
+		!((1<<tp->ca_state)&(TCPF_CA_Recovery|TCPF_CA_CWR));
+#else
+  return 0;
+#endif
+}
+
+/* Check that window update is acceptable.
+ * The function assumes that snd_una<=ack<=snd_next.
+ */
+static __inline__ int
+tcp_may_update_window(struct tcp_opt *tp, u32 ack, u32 ack_seq, u32 nwin)
+{
+#if 0
+	return (after(ack, tp->snd_una) ||
+		after(ack_seq, tp->snd_wl1) ||
+		(ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd));
+#else
+  return 0;
+#endif
+}
+
+/* Update our send window.
+ *
+ * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
+ * and in FreeBSD. NetBSD's one is even worse.) is wrong.
+ */
+static int tcp_ack_update_window(struct sock *sk, struct tcp_opt *tp,
+				 struct sk_buff *skb, u32 ack, u32 ack_seq)
+{
+#if 0
+	int flag = 0;
+	u32 nwin = ntohs(skb->h.th->window) << tp->snd_wscale;
+
+	if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
+		flag |= FLAG_WIN_UPDATE;
+		tcp_update_wl(tp, ack, ack_seq);
+
+		if (tp->snd_wnd != nwin) {
+			tp->snd_wnd = nwin;
+
+			/* Note, it is the only place, where
+			 * fast path is recovered for sending TCP.
+			 */
+			tcp_fast_path_check(sk, tp);
+
+			if (nwin > tp->max_window) {
+				tp->max_window = nwin;
+				tcp_sync_mss(sk, tp->pmtu_cookie);
+			}
+		}
+	}
+
+	tp->snd_una = ack;
+
+	return flag;
+#else
+  return 0;
+#endif
+}
+
+/* This routine deals with incoming acks, but not outgoing ones. */
+static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	u32 prior_snd_una = tp->snd_una;
+	u32 ack_seq = TCP_SKB_CB(skb)->seq;
+	u32 ack = TCP_SKB_CB(skb)->ack_seq;
+	u32 prior_in_flight;
+	int prior_packets;
+
+	/* If the ack is newer than sent or older than previous acks
+	 * then we can probably ignore it.
+	 */
+	if (after(ack, tp->snd_nxt))
+		goto uninteresting_ack;
+
+	if (before(ack, prior_snd_una))
+		goto old_ack;
+
+	if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
+		/* Window is constant, pure forward advance.
+		 * No more checks are required.
+		 * Note, we use the fact that SND.UNA>=SND.WL2.
+		 */
+		tcp_update_wl(tp, ack, ack_seq);
+		tp->snd_una = ack;
+		flag |= FLAG_WIN_UPDATE;
+
+		NET_INC_STATS_BH(TCPHPAcks);
+	} else {
+		if (ack_seq != TCP_SKB_CB(skb)->end_seq)
+			flag |= FLAG_DATA;
+		else
+			NET_INC_STATS_BH(TCPPureAcks);
+
+		flag |= tcp_ack_update_window(sk, tp, skb, ack, ack_seq);
+
+		if (TCP_SKB_CB(skb)->sacked)
+			flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
+
+		if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th))
+			flag |= FLAG_ECE;
+	}
+
+	/* We passed data and got it acked, remove any soft error
+	 * log. Something worked...
+	 */
+	sk->err_soft = 0;
+	tp->rcv_tstamp = tcp_time_stamp;
+	if ((prior_packets = tp->packets_out) == 0)
+		goto no_queue;
+
+	prior_in_flight = tcp_packets_in_flight(tp);
+
+	/* See if we can take anything off of the retransmit queue. */
+	flag |= tcp_clean_rtx_queue(sk);
+
+	if (tcp_ack_is_dubious(tp, flag)) {
+		/* Advanve CWND, if state allows this. */
+		if ((flag&FLAG_DATA_ACKED) && prior_in_flight >= tp->snd_cwnd &&
+		    tcp_may_raise_cwnd(tp, flag))
+			tcp_cong_avoid(tp);
+		tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag);
+	} else {
+		if ((flag&FLAG_DATA_ACKED) && prior_in_flight >= tp->snd_cwnd)
+			tcp_cong_avoid(tp);
+	}
+
+	if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP))
+		dst_confirm(sk->dst_cache);
+
+	return 1;
+
+no_queue:
+	tp->probes_out = 0;
+
+	/* If this ack opens up a zero window, clear backoff.  It was
+	 * being used to time the probes, and is probably far higher than
+	 * it needs to be for normal retransmission.
+	 */
+	if (tp->send_head)
+		tcp_ack_probe(sk);
+	return 1;
+
+old_ack:
+	if (TCP_SKB_CB(skb)->sacked)
+		tcp_sacktag_write_queue(sk, skb, prior_snd_una);
+
+uninteresting_ack:
+	SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
+	return 0;
+#else
+  return 0;
+#endif
+}
+
+
+/* Look for tcp options. Normally only called on SYN and SYNACK packets.
+ * But, this can also be called on packets in the established flow when
+ * the fast version below fails.
+ */
+void tcp_parse_options(struct sk_buff *skb, struct tcp_opt *tp, int estab)
+{
+#if 0
+	unsigned char *ptr;
+	struct tcphdr *th = skb->h.th;
+	int length=(th->doff*4)-sizeof(struct tcphdr);
+
+	ptr = (unsigned char *)(th + 1);
+	tp->saw_tstamp = 0;
+
+	while(length>0) {
+	  	int opcode=*ptr++;
+		int opsize;
+
+		switch (opcode) {
+			case TCPOPT_EOL:
+				return;
+			case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
+				length--;
+				continue;
+			default:
+				opsize=*ptr++;
+				if (opsize < 2) /* "silly options" */
+					return;
+				if (opsize > length)
+					return;	/* don't parse partial options */
+	  			switch(opcode) {
+				case TCPOPT_MSS:
+					if(opsize==TCPOLEN_MSS && th->syn && !estab) {
+						u16 in_mss = ntohs(*(__u16 *)ptr);
+						if (in_mss) {
+							if (tp->user_mss && tp->user_mss < in_mss)
+								in_mss = tp->user_mss;
+							tp->mss_clamp = in_mss;
+						}
+					}
+					break;
+				case TCPOPT_WINDOW:
+					if(opsize==TCPOLEN_WINDOW && th->syn && !estab)
+						if (sysctl_tcp_window_scaling) {
+							tp->wscale_ok = 1;
+							tp->snd_wscale = *(__u8 *)ptr;
+							if(tp->snd_wscale > 14) {
+								if(net_ratelimit())
+									printk("tcp_parse_options: Illegal window "
+									       "scaling value %d >14 received.",
+									       tp->snd_wscale);
+								tp->snd_wscale = 14;
+							}
+						}
+					break;
+				case TCPOPT_TIMESTAMP:
+					if(opsize==TCPOLEN_TIMESTAMP) {
+						if ((estab && tp->tstamp_ok) ||
+						    (!estab && sysctl_tcp_timestamps)) {
+							tp->saw_tstamp = 1;
+							tp->rcv_tsval = ntohl(*(__u32 *)ptr);
+							tp->rcv_tsecr = ntohl(*(__u32 *)(ptr+4));
+						}
+					}
+					break;
+				case TCPOPT_SACK_PERM:
+					if(opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
+						if (sysctl_tcp_sack) {
+							tp->sack_ok = 1;
+							tcp_sack_reset(tp);
+						}
+					}
+					break;
+
+				case TCPOPT_SACK:
+					if((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
+					   !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
+					   tp->sack_ok) {
+						TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
+					}
+	  			};
+	  			ptr+=opsize-2;
+	  			length-=opsize;
+	  	};
+	}
+#endif
+}
+
+/* Fast parse options. This hopes to only see timestamps.
+ * If it is wrong it falls back on tcp_parse_options().
+ */
+static __inline__ int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, struct tcp_opt *tp)
+{
+#if 0
+	if (th->doff == sizeof(struct tcphdr)>>2) {
+		tp->saw_tstamp = 0;
+		return 0;
+	} else if (tp->tstamp_ok &&
+		   th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
+		__u32 *ptr = (__u32 *)(th + 1);
+		if (*ptr == ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
+				  | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
+			tp->saw_tstamp = 1;
+			++ptr;
+			tp->rcv_tsval = ntohl(*ptr);
+			++ptr;
+			tp->rcv_tsecr = ntohl(*ptr);
+			return 1;
+		}
+	}
+	tcp_parse_options(skb, tp, 1);
+	return 1;
+#else
+  return 0;
+#endif
+}
+
+extern __inline__ void
+tcp_store_ts_recent(struct tcp_opt *tp)
+{
+#if 0
+	tp->ts_recent = tp->rcv_tsval;
+	tp->ts_recent_stamp = xtime.tv_sec;
+#endif
+}
+
+extern __inline__ void
+tcp_replace_ts_recent(struct tcp_opt *tp, u32 seq)
+{
+#if 0
+	if (tp->saw_tstamp && !after(seq, tp->rcv_wup)) {
+		/* PAWS bug workaround wrt. ACK frames, the PAWS discard
+		 * extra check below makes sure this can only happen
+		 * for pure ACK frames.  -DaveM
+		 *
+		 * Not only, also it occurs for expired timestamps.
+		 */
+
+		if((s32)(tp->rcv_tsval - tp->ts_recent) >= 0 ||
+		   xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_24DAYS)
+			tcp_store_ts_recent(tp);
+	}
+#endif
+}
+
+/* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM
+ *
+ * It is not fatal. If this ACK does _not_ change critical state (seqs, window)
+ * it can pass through stack. So, the following predicate verifies that
+ * this segment is not used for anything but congestion avoidance or
+ * fast retransmit. Moreover, we even are able to eliminate most of such
+ * second order effects, if we apply some small "replay" window (~RTO)
+ * to timestamp space.
+ *
+ * All these measures still do not guarantee that we reject wrapped ACKs
+ * on networks with high bandwidth, when sequence space is recycled fastly,
+ * but it guarantees that such events will be very rare and do not affect
+ * connection seriously. This doesn't look nice, but alas, PAWS is really
+ * buggy extension.
+ *
+ * [ Later note. Even worse! It is buggy for segments _with_ data. RFC
+ * states that events when retransmit arrives after original data are rare.
+ * It is a blatant lie. VJ forgot about fast retransmit! 8)8) It is
+ * the biggest problem on large power networks even with minor reordering.
+ * OK, let's give it small replay window. If peer clock is even 1hz, it is safe
+ * up to bandwidth of 18Gigabit/sec. 8) ]
+ */
+
+static int tcp_disordered_ack(struct tcp_opt *tp, struct sk_buff *skb)
+{
+#if 0
+	struct tcphdr *th = skb->h.th;
+	u32 seq = TCP_SKB_CB(skb)->seq;
+	u32 ack = TCP_SKB_CB(skb)->ack_seq;
+
+	return (/* 1. Pure ACK with correct sequence number. */
+		(th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
+
+		/* 2. ... and duplicate ACK. */
+		ack == tp->snd_una &&
+
+		/* 3. ... and does not update window. */
+		!tcp_may_update_window(tp, ack, seq, ntohs(th->window)<<tp->snd_wscale) &&
+
+		/* 4. ... and sits in replay window. */
+		(s32)(tp->ts_recent - tp->rcv_tsval) <= (tp->rto*1024)/HZ);
+#endif
+}
+
+extern __inline__ int tcp_paws_discard(struct tcp_opt *tp, struct sk_buff *skb)
+{
+#if 0
+	return ((s32)(tp->ts_recent - tp->rcv_tsval) > TCP_PAWS_WINDOW &&
+		xtime.tv_sec < tp->ts_recent_stamp + TCP_PAWS_24DAYS &&
+		!tcp_disordered_ack(tp, skb));
+#else
+  return 0;
+#endif
+}
+
+/* Check segment sequence number for validity.
+ *
+ * Segment controls are considered valid, if the segment
+ * fits to the window after truncation to the window. Acceptability
+ * of data (and SYN, FIN, of course) is checked separately.
+ * See tcp_data_queue(), for example.
+ *
+ * Also, controls (RST is main one) are accepted using RCV.WUP instead
+ * of RCV.NXT. Peer still did not advance his SND.UNA when we
+ * delayed ACK, so that hisSND.UNA<=ourRCV.WUP.
+ * (borrowed from freebsd)
+ */
+
+static inline int tcp_sequence(struct tcp_opt *tp, u32 seq, u32 end_seq)
+{
+#if 0
+	return	!before(end_seq, tp->rcv_wup) &&
+		!after(seq, tp->rcv_nxt + tcp_receive_window(tp));
+#else
+  return 0;
+#endif
+}
+
+/* When we get a reset we do this. */
+static void tcp_reset(struct sock *sk)
+{
+#if 0
+	/* We want the right error as BSD sees it (and indeed as we do). */
+	switch (sk->state) {
+		case TCP_SYN_SENT:
+			sk->err = ECONNREFUSED;
+			break;
+		case TCP_CLOSE_WAIT:
+			sk->err = EPIPE;
+			break;
+		case TCP_CLOSE:
+			return;
+		default:
+			sk->err = ECONNRESET;
+	}
+
+	if (!sk->dead)
+		sk->error_report(sk);
+
+	tcp_done(sk);
+#endif
+}
+
+/*
+ * 	Process the FIN bit. This now behaves as it is supposed to work
+ *	and the FIN takes effect when it is validly part of sequence
+ *	space. Not before when we get holes.
+ *
+ *	If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
+ *	(and thence onto LAST-ACK and finally, CLOSE, we never enter
+ *	TIME-WAIT)
+ *
+ *	If we are in FINWAIT-1, a received FIN indicates simultaneous
+ *	close and we go into CLOSING (and later onto TIME-WAIT)
+ *
+ *	If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
+ */
+static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	tcp_schedule_ack(tp);
+
+	sk->shutdown |= RCV_SHUTDOWN;
+	sk->done = 1;
+
+	switch(sk->state) {
+		case TCP_SYN_RECV:
+		case TCP_ESTABLISHED:
+			/* Move to CLOSE_WAIT */
+			tcp_set_state(sk, TCP_CLOSE_WAIT);
+			tp->ack.pingpong = 1;
+			break;
+
+		case TCP_CLOSE_WAIT:
+		case TCP_CLOSING:
+			/* Received a retransmission of the FIN, do
+			 * nothing.
+			 */
+			break;
+		case TCP_LAST_ACK:
+			/* RFC793: Remain in the LAST-ACK state. */
+			break;
+
+		case TCP_FIN_WAIT1:
+			/* This case occurs when a simultaneous close
+			 * happens, we must ack the received FIN and
+			 * enter the CLOSING state.
+			 */
+			tcp_send_ack(sk);
+			tcp_set_state(sk, TCP_CLOSING);
+			break;
+		case TCP_FIN_WAIT2:
+			/* Received a FIN -- send ACK and enter TIME_WAIT. */
+			tcp_send_ack(sk);
+			tcp_time_wait(sk, TCP_TIME_WAIT, 0);
+			break;
+		default:
+			/* Only TCP_LISTEN and TCP_CLOSE are left, in these
+			 * cases we should never reach this piece of code.
+			 */
+			printk("tcp_fin: Impossible, sk->state=%d\n", sk->state);
+			break;
+	};
+
+	/* It _is_ possible, that we have something out-of-order _after_ FIN.
+	 * Probably, we should reset in this case. For now drop them.
+	 */
+	__skb_queue_purge(&tp->out_of_order_queue);
+	if (tp->sack_ok)
+		tcp_sack_reset(tp);
+	tcp_mem_reclaim(sk);
+
+	if (!sk->dead) {
+		sk->state_change(sk);
+
+		/* Do not send POLL_HUP for half duplex close. */
+		if (sk->shutdown == SHUTDOWN_MASK || sk->state == TCP_CLOSE)
+			sk_wake_async(sk, 1, POLL_HUP);
+		else
+			sk_wake_async(sk, 1, POLL_IN);
+	}
+#endif
+}
+
+static __inline__ int
+tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
+{
+#if 0
+	if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
+		if (before(seq, sp->start_seq))
+			sp->start_seq = seq;
+		if (after(end_seq, sp->end_seq))
+			sp->end_seq = end_seq;
+		return 1;
+	}
+	return 0;
+#else
+  return 0;
+#endif
+}
+
+static __inline__ void tcp_dsack_set(struct tcp_opt *tp, u32 seq, u32 end_seq)
+{
+#if 0
+	if (tp->sack_ok && sysctl_tcp_dsack) {
+		if (before(seq, tp->rcv_nxt))
+			NET_INC_STATS_BH(TCPDSACKOldSent);
+		else
+			NET_INC_STATS_BH(TCPDSACKOfoSent);
+
+		tp->dsack = 1;
+		tp->duplicate_sack[0].start_seq = seq;
+		tp->duplicate_sack[0].end_seq = end_seq;
+		tp->eff_sacks = min(tp->num_sacks+1, 4-tp->tstamp_ok);
+	}
+#endif
+}
+
+static __inline__ void tcp_dsack_extend(struct tcp_opt *tp, u32 seq, u32 end_seq)
+{
+#if 0
+	if (!tp->dsack)
+		tcp_dsack_set(tp, seq, end_seq);
+	else
+		tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
+#endif
+}
+
+static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
+	    before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
+		NET_INC_STATS_BH(DelayedACKLost);
+		tcp_enter_quickack_mode(tp);
+
+		if (tp->sack_ok && sysctl_tcp_dsack) {
+			u32 end_seq = TCP_SKB_CB(skb)->end_seq;
+
+			if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
+				end_seq = tp->rcv_nxt;
+			tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, end_seq);
+		}
+	}
+
+	tcp_send_ack(sk);
+#endif
+}
+
+/* These routines update the SACK block as out-of-order packets arrive or
+ * in-order packets close up the sequence space.
+ */
+static void tcp_sack_maybe_coalesce(struct tcp_opt *tp)
+{
+#if 0
+	int this_sack;
+	struct tcp_sack_block *sp = &tp->selective_acks[0];
+	struct tcp_sack_block *swalk = sp+1;
+
+	/* See if the recent change to the first SACK eats into
+	 * or hits the sequence space of other SACK blocks, if so coalesce.
+	 */
+	for (this_sack = 1; this_sack < tp->num_sacks; ) {
+		if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
+			int i;
+
+			/* Zap SWALK, by moving every further SACK up by one slot.
+			 * Decrease num_sacks.
+			 */
+			tp->num_sacks--;
+			tp->eff_sacks = min(tp->num_sacks+tp->dsack, 4-tp->tstamp_ok);
+			for(i=this_sack; i < tp->num_sacks; i++)
+				sp[i] = sp[i+1];
+			continue;
+		}
+		this_sack++, swalk++;
+	}
+#endif
+}
+
+static __inline__ void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2)
+{
+#if 0
+	__u32 tmp;
+
+	tmp = sack1->start_seq;
+	sack1->start_seq = sack2->start_seq;
+	sack2->start_seq = tmp;
+
+	tmp = sack1->end_seq;
+	sack1->end_seq = sack2->end_seq;
+	sack2->end_seq = tmp;
+#endif
+}
+
+static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct tcp_sack_block *sp = &tp->selective_acks[0];
+	int cur_sacks = tp->num_sacks;
+	int this_sack;
+
+	if (!cur_sacks)
+		goto new_sack;
+
+	for (this_sack=0; this_sack<cur_sacks; this_sack++, sp++) {
+		if (tcp_sack_extend(sp, seq, end_seq)) {
+			/* Rotate this_sack to the first one. */
+			for (; this_sack>0; this_sack--, sp--)
+				tcp_sack_swap(sp, sp-1);
+			if (cur_sacks > 1)
+				tcp_sack_maybe_coalesce(tp);
+			return;
+		}
+	}
+
+	/* Could not find an adjacent existing SACK, build a new one,
+	 * put it at the front, and shift everyone else down.  We
+	 * always know there is at least one SACK present already here.
+	 *
+	 * If the sack array is full, forget about the last one.
+	 */
+	if (this_sack >= 4) {
+		this_sack--;
+		tp->num_sacks--;
+		sp--;
+	}
+	for(; this_sack > 0; this_sack--, sp--)
+		*sp = *(sp-1);
+
+new_sack:
+	/* Build the new head SACK, and we're done. */
+	sp->start_seq = seq;
+	sp->end_seq = end_seq;
+	tp->num_sacks++;
+	tp->eff_sacks = min(tp->num_sacks+tp->dsack, 4-tp->tstamp_ok);
+#endif
+}
+
+/* RCV.NXT advances, some SACKs should be eaten. */
+
+static void tcp_sack_remove(struct tcp_opt *tp)
+{
+#if 0
+	struct tcp_sack_block *sp = &tp->selective_acks[0];
+	int num_sacks = tp->num_sacks;
+	int this_sack;
+
+	/* Empty ofo queue, hence, all the SACKs are eaten. Clear. */
+	if (skb_queue_len(&tp->out_of_order_queue) == 0) {
+		tp->num_sacks = 0;
+		tp->eff_sacks = tp->dsack;
+		return;
+	}
+
+	for(this_sack = 0; this_sack < num_sacks; ) {
+		/* Check if the start of the sack is covered by RCV.NXT. */
+		if (!before(tp->rcv_nxt, sp->start_seq)) {
+			int i;
+
+			/* RCV.NXT must cover all the block! */
+			BUG_TRAP(!before(tp->rcv_nxt, sp->end_seq));
+
+			/* Zap this SACK, by moving forward any other SACKS. */
+			for (i=this_sack+1; i < num_sacks; i++)
+				tp->selective_acks[i-1] = tp->selective_acks[i];
+			num_sacks--;
+			continue;
+		}
+		this_sack++;
+		sp++;
+	}
+	if (num_sacks != tp->num_sacks) {
+		tp->num_sacks = num_sacks;
+		tp->eff_sacks = min(tp->num_sacks+tp->dsack, 4-tp->tstamp_ok);
+	}
+#endif
+}
+
+/* This one checks to see if we can put data from the
+ * out_of_order queue into the receive_queue.
+ */
+static void tcp_ofo_queue(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	__u32 dsack_high = tp->rcv_nxt;
+	struct sk_buff *skb;
+
+	while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
+		if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
+			break;
+
+		if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
+			__u32 dsack = dsack_high;
+			if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
+				dsack_high = TCP_SKB_CB(skb)->end_seq;
+			tcp_dsack_extend(tp, TCP_SKB_CB(skb)->seq, dsack);
+		}
+
+		if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
+			SOCK_DEBUG(sk, "ofo packet was already received \n");
+			__skb_unlink(skb, skb->list);
+			__kfree_skb(skb);
+			continue;
+		}
+		SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
+			   tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
+			   TCP_SKB_CB(skb)->end_seq);
+
+		__skb_unlink(skb, skb->list);
+		__skb_queue_tail(&sk->receive_queue, skb);
+		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+		if(skb->h.th->fin)
+			tcp_fin(skb, sk, skb->h.th);
+	}
+#endif
+}
+
+static inline int tcp_rmem_schedule(struct sock *sk, struct sk_buff *skb)
+{
+#if 0
+	return (int)skb->truesize <= sk->forward_alloc ||
+		tcp_mem_schedule(sk, skb->truesize, 1);
+#else
+  return 0;
+#endif
+}
+
+static int tcp_prune_queue(struct sock *sk);
+
+static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
+{
+#if 0
+	struct tcphdr *th = skb->h.th;
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	int eaten = -1;
+
+	if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
+		goto drop;
+
+	th = skb->h.th;
+	__skb_pull(skb, th->doff*4);
+
+	TCP_ECN_accept_cwr(tp, skb);
+
+	if (tp->dsack) {
+		tp->dsack = 0;
+		tp->eff_sacks = min_t(unsigned int, tp->num_sacks, 4-tp->tstamp_ok);
+	}
+
+	/*  Queue data for delivery to the user.
+	 *  Packets in sequence go to the receive queue.
+	 *  Out of sequence packets to the out_of_order_queue.
+	 */
+	if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
+		if (tcp_receive_window(tp) == 0)
+			goto out_of_window;
+
+		/* Ok. In sequence. In window. */
+		if (tp->ucopy.task == current &&
+		    tp->copied_seq == tp->rcv_nxt &&
+		    tp->ucopy.len &&
+		    sk->lock.users &&
+		    !tp->urg_data) {
+			int chunk = min_t(unsigned int, skb->len, tp->ucopy.len);
+
+			__set_current_state(TASK_RUNNING);
+
+			local_bh_enable();
+			if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) {
+				tp->ucopy.len -= chunk;
+				tp->copied_seq += chunk;
+				eaten = (chunk == skb->len && !th->fin);
+			}
+			local_bh_disable();
+		}
+
+		if (eaten <= 0) {
+queue_and_out:
+			if (eaten < 0 &&
+			    (atomic_read(&sk->rmem_alloc) > sk->rcvbuf ||
+			     !tcp_rmem_schedule(sk, skb))) {
+				if (tcp_prune_queue(sk) < 0 || !tcp_rmem_schedule(sk, skb))
+					goto drop;
+			}
+			tcp_set_owner_r(skb, sk);
+			__skb_queue_tail(&sk->receive_queue, skb);
+		}
+		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+		if(skb->len)
+			tcp_event_data_recv(sk, tp, skb);
+		if(th->fin)
+			tcp_fin(skb, sk, th);
+
+		if (skb_queue_len(&tp->out_of_order_queue)) {
+			tcp_ofo_queue(sk);
+
+			/* RFC2581. 4.2. SHOULD send immediate ACK, when
+			 * gap in queue is filled.
+			 */
+			if (skb_queue_len(&tp->out_of_order_queue) == 0)
+				tp->ack.pingpong = 0;
+		}
+
+		if(tp->num_sacks)
+			tcp_sack_remove(tp);
+
+		tcp_fast_path_check(sk, tp);
+
+		if (eaten > 0) {
+			__kfree_skb(skb);
+		} else if (!sk->dead)
+			sk->data_ready(sk, 0);
+		return;
+	}
+
+	if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
+		/* A retransmit, 2nd most common case.  Force an immediate ack. */
+		NET_INC_STATS_BH(DelayedACKLost);
+		tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+
+out_of_window:
+		tcp_enter_quickack_mode(tp);
+		tcp_schedule_ack(tp);
+drop:
+		__kfree_skb(skb);
+		return;
+	}
+
+	/* Out of window. F.e. zero window probe. */
+	if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt+tcp_receive_window(tp)))
+		goto out_of_window;
+
+	tcp_enter_quickack_mode(tp);
+
+	if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
+		/* Partial packet, seq < rcv_next < end_seq */
+		SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
+			   tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
+			   TCP_SKB_CB(skb)->end_seq);
+
+		tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
+		
+		/* If window is closed, drop tail of packet. But after
+		 * remembering D-SACK for its head made in previous line.
+		 */
+		if (!tcp_receive_window(tp))
+			goto out_of_window;
+		goto queue_and_out;
+	}
+
+	TCP_ECN_check_ce(tp, skb);
+
+	if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf ||
+	    !tcp_rmem_schedule(sk, skb)) {
+		if (tcp_prune_queue(sk) < 0 || !tcp_rmem_schedule(sk, skb))
+			goto drop;
+	}
+
+	/* Disable header prediction. */
+	tp->pred_flags = 0;
+	tcp_schedule_ack(tp);
+
+	SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
+		   tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+
+	tcp_set_owner_r(skb, sk);
+
+	if (skb_peek(&tp->out_of_order_queue) == NULL) {
+		/* Initial out of order segment, build 1 SACK. */
+		if(tp->sack_ok) {
+			tp->num_sacks = 1;
+			tp->dsack = 0;
+			tp->eff_sacks = 1;
+			tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
+			tp->selective_acks[0].end_seq = TCP_SKB_CB(skb)->end_seq;
+		}
+		__skb_queue_head(&tp->out_of_order_queue,skb);
+	} else {
+		struct sk_buff *skb1=tp->out_of_order_queue.prev;
+		u32 seq = TCP_SKB_CB(skb)->seq;
+		u32 end_seq = TCP_SKB_CB(skb)->end_seq;
+
+		if (seq == TCP_SKB_CB(skb1)->end_seq) {
+			__skb_append(skb1, skb);
+
+			if (tp->num_sacks == 0 ||
+			    tp->selective_acks[0].end_seq != seq)
+				goto add_sack;
+
+			/* Common case: data arrive in order after hole. */
+			tp->selective_acks[0].end_seq = end_seq;
+			return;
+		}
+
+		/* Find place to insert this segment. */
+		do {
+			if (!after(TCP_SKB_CB(skb1)->seq, seq))
+				break;
+		} while ((skb1=skb1->prev) != (struct sk_buff*)&tp->out_of_order_queue);
+
+		/* Do skb overlap to previous one? */
+		if (skb1 != (struct sk_buff*)&tp->out_of_order_queue &&
+		    before(seq, TCP_SKB_CB(skb1)->end_seq)) {
+			if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
+				/* All the bits are present. Drop. */
+				__kfree_skb(skb);
+				tcp_dsack_set(tp, seq, end_seq);
+				goto add_sack;
+			}
+			if (after(seq, TCP_SKB_CB(skb1)->seq)) {
+				/* Partial overlap. */
+				tcp_dsack_set(tp, seq, TCP_SKB_CB(skb1)->end_seq);
+			} else {
+				skb1 = skb1->prev;
+			}
+		}
+		__skb_insert(skb, skb1, skb1->next, &tp->out_of_order_queue);
+		
+		/* And clean segments covered by new one as whole. */
+		while ((skb1 = skb->next) != (struct sk_buff*)&tp->out_of_order_queue &&
+		       after(end_seq, TCP_SKB_CB(skb1)->seq)) {
+		       if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
+			       tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, end_seq);
+			       break;
+		       }
+		       __skb_unlink(skb1, skb1->list);
+		       tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq);
+		       __kfree_skb(skb1);
+		}
+
+add_sack:
+		if (tp->sack_ok)
+			tcp_sack_new_ofo_skb(sk, seq, end_seq);
+	}
+#endif
+}
+
+/* Collapse contiguous sequence of skbs head..tail with
+ * sequence numbers start..end.
+ * Segments with FIN/SYN are not collapsed (only because this
+ * simplifies code)
+ */
+static void
+tcp_collapse(struct sock *sk, struct sk_buff *head,
+	     struct sk_buff *tail, u32 start, u32 end)
+{
+#if 0
+	struct sk_buff *skb;
+
+	/* First, check that queue is collapsable and find
+	 * the point where collapsing can be useful. */
+	for (skb = head; skb != tail; ) {
+		/* No new bits? It is possible on ofo queue. */
+		if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
+			struct sk_buff *next = skb->next;
+			__skb_unlink(skb, skb->list);
+			__kfree_skb(skb);
+			NET_INC_STATS_BH(TCPRcvCollapsed);
+			skb = next;
+			continue;
+		}
+
+		/* The first skb to collapse is:
+		 * - not SYN/FIN and
+		 * - bloated or contains data before "start" or
+		 *   overlaps to the next one.
+		 */
+		if (!skb->h.th->syn && !skb->h.th->fin &&
+		    (tcp_win_from_space(skb->truesize) > skb->len ||
+		     before(TCP_SKB_CB(skb)->seq, start) ||
+		     (skb->next != tail &&
+		      TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb->next)->seq)))
+			break;
+
+		/* Decided to skip this, advance start seq. */
+		start = TCP_SKB_CB(skb)->end_seq;
+		skb = skb->next;
+	}
+	if (skb == tail || skb->h.th->syn || skb->h.th->fin)
+		return;
+
+	while (before(start, end)) {
+		struct sk_buff *nskb;
+		int header = skb_headroom(skb);
+		int copy = (PAGE_SIZE - sizeof(struct sk_buff) -
+			    sizeof(struct skb_shared_info) - header - 31)&~15;
+
+		/* Too big header? This can happen with IPv6. */
+		if (copy < 0)
+			return;
+		if (end-start < copy)
+			copy = end-start;
+		nskb = alloc_skb(copy+header, GFP_ATOMIC);
+		if (!nskb)
+			return;
+		skb_reserve(nskb, header);
+		memcpy(nskb->head, skb->head, header);
+		nskb->nh.raw = nskb->head + (skb->nh.raw-skb->head);
+		nskb->h.raw = nskb->head + (skb->h.raw-skb->head);
+		nskb->mac.raw = nskb->head + (skb->mac.raw-skb->head);
+		memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
+		TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
+		__skb_insert(nskb, skb->prev, skb, skb->list);
+		tcp_set_owner_r(nskb, sk);
+
+		/* Copy data, releasing collapsed skbs. */
+		while (copy > 0) {
+			int offset = start - TCP_SKB_CB(skb)->seq;
+			int size = TCP_SKB_CB(skb)->end_seq - start;
+
+			if (offset < 0) BUG();
+			if (size > 0) {
+				size = min(copy, size);
+				if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
+					BUG();
+				TCP_SKB_CB(nskb)->end_seq += size;
+				copy -= size;
+				start += size;
+			}
+			if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
+				struct sk_buff *next = skb->next;
+				__skb_unlink(skb, skb->list);
+				__kfree_skb(skb);
+				NET_INC_STATS_BH(TCPRcvCollapsed);
+				skb = next;
+				if (skb == tail || skb->h.th->syn || skb->h.th->fin)
+					return;
+			}
+		}
+	}
+#endif
+}
+
+/* Collapse ofo queue. Algorithm: select contiguous sequence of skbs
+ * and tcp_collapse() them until all the queue is collapsed.
+ */
+static void tcp_collapse_ofo_queue(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
+	struct sk_buff *head;
+	u32 start, end;
+
+	if (skb == NULL)
+		return;
+
+	start = TCP_SKB_CB(skb)->seq;
+	end = TCP_SKB_CB(skb)->end_seq;
+	head = skb;
+
+	for (;;) {
+		skb = skb->next;
+
+		/* Segment is terminated when we see gap or when
+		 * we are at the end of all the queue. */
+		if (skb == (struct sk_buff *)&tp->out_of_order_queue ||
+		    after(TCP_SKB_CB(skb)->seq, end) ||
+		    before(TCP_SKB_CB(skb)->end_seq, start)) {
+			tcp_collapse(sk, head, skb, start, end);
+			head = skb;
+			if (skb == (struct sk_buff *)&tp->out_of_order_queue)
+				break;
+			/* Start new segment */
+			start = TCP_SKB_CB(skb)->seq;
+			end = TCP_SKB_CB(skb)->end_seq;
+		} else {
+			if (before(TCP_SKB_CB(skb)->seq, start))
+				start = TCP_SKB_CB(skb)->seq;
+			if (after(TCP_SKB_CB(skb)->end_seq, end))
+				end = TCP_SKB_CB(skb)->end_seq;
+		}
+	}
+#endif
+}
+
+/* Reduce allocated memory if we can, trying to get
+ * the socket within its memory limits again.
+ *
+ * Return less than zero if we should start dropping frames
+ * until the socket owning process reads some of the data
+ * to stabilize the situation.
+ */
+static int tcp_prune_queue(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; 
+
+	SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
+
+	NET_INC_STATS_BH(PruneCalled);
+
+	if (atomic_read(&sk->rmem_alloc) >= sk->rcvbuf)
+		tcp_clamp_window(sk, tp);
+	else if (tcp_memory_pressure)
+		tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss);
+
+	tcp_collapse_ofo_queue(sk);
+	tcp_collapse(sk, sk->receive_queue.next,
+		     (struct sk_buff*)&sk->receive_queue,
+		     tp->copied_seq, tp->rcv_nxt);
+	tcp_mem_reclaim(sk);
+
+	if (atomic_read(&sk->rmem_alloc) <= sk->rcvbuf)
+		return 0;
+
+	/* Collapsing did not help, destructive actions follow.
+	 * This must not ever occur. */
+
+	/* First, purge the out_of_order queue. */
+	if (skb_queue_len(&tp->out_of_order_queue)) {
+		net_statistics[smp_processor_id()*2].OfoPruned += skb_queue_len(&tp->out_of_order_queue);
+		__skb_queue_purge(&tp->out_of_order_queue);
+
+		/* Reset SACK state.  A conforming SACK implementation will
+		 * do the same at a timeout based retransmit.  When a connection
+		 * is in a sad state like this, we care only about integrity
+		 * of the connection not performance.
+		 */
+		if(tp->sack_ok)
+			tcp_sack_reset(tp);
+		tcp_mem_reclaim(sk);
+	}
+
+	if(atomic_read(&sk->rmem_alloc) <= sk->rcvbuf)
+		return 0;
+
+	/* If we are really being abused, tell the caller to silently
+	 * drop receive data on the floor.  It will get retransmitted
+	 * and hopefully then we'll have sufficient space.
+	 */
+	NET_INC_STATS_BH(RcvPruned);
+
+	/* Massive buffer overcommit. */
+	tp->pred_flags = 0;
+	return -1;
+#else
+  return 0;
+#endif
+}
+
+
+/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto.
+ * As additional protections, we do not touch cwnd in retransmission phases,
+ * and if application hit its sndbuf limit recently.
+ */
+void tcp_cwnd_application_limited(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	if (tp->ca_state == TCP_CA_Open &&
+	    sk->socket && !test_bit(SOCK_NOSPACE, &sk->socket->flags)) {
+		/* Limited by application or receiver window. */
+		u32 win_used = max(tp->snd_cwnd_used, 2U);
+		if (win_used < tp->snd_cwnd) {
+			tp->snd_ssthresh = tcp_current_ssthresh(tp);
+			tp->snd_cwnd = (tp->snd_cwnd+win_used)>>1;
+		}
+		tp->snd_cwnd_used = 0;
+	}
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+#endif
+}
+
+
+/* When incoming ACK allowed to free some skb from write_queue,
+ * we remember this event in flag tp->queue_shrunk and wake up socket
+ * on the exit from tcp input handler.
+ */
+static void tcp_new_space(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	if (tp->packets_out < tp->snd_cwnd &&
+	    !(sk->userlocks&SOCK_SNDBUF_LOCK) &&
+	    !tcp_memory_pressure &&
+	    atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
+		int sndmem, demanded;
+
+		sndmem = tp->mss_clamp+MAX_TCP_HEADER+16+sizeof(struct sk_buff);
+		demanded = max_t(unsigned int, tp->snd_cwnd, tp->reordering+1);
+		sndmem *= 2*demanded;
+		if (sndmem > sk->sndbuf)
+			sk->sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
+		tp->snd_cwnd_stamp = tcp_time_stamp;
+	}
+
+	sk->write_space(sk);
+#endif
+}
+
+static inline void tcp_check_space(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	if (tp->queue_shrunk) {
+		tp->queue_shrunk = 0;
+		if (sk->socket && test_bit(SOCK_NOSPACE, &sk->socket->flags))
+			tcp_new_space(sk);
+	}
+#endif
+}
+
+static void __tcp_data_snd_check(struct sock *sk, struct sk_buff *skb)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	if (after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd) ||
+	    tcp_packets_in_flight(tp) >= tp->snd_cwnd ||
+	    tcp_write_xmit(sk, tp->nonagle))
+		tcp_check_probe_timer(sk, tp);
+#endif
+}
+
+static __inline__ void tcp_data_snd_check(struct sock *sk)
+{
+#if 0
+	struct sk_buff *skb = sk->tp_pinfo.af_tcp.send_head;
+
+	if (skb != NULL)
+		__tcp_data_snd_check(sk, skb);
+	tcp_check_space(sk);
+#endif
+}
+
+/*
+ * Check if sending an ack is needed.
+ */
+static __inline__ void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	    /* More than one full frame received... */
+	if (((tp->rcv_nxt - tp->rcv_wup) > tp->ack.rcv_mss
+	     /* ... and right edge of window advances far enough.
+	      * (tcp_recvmsg() will send ACK otherwise). Or...
+	      */
+	     && __tcp_select_window(sk) >= tp->rcv_wnd) ||
+	    /* We ACK each frame or... */
+	    tcp_in_quickack_mode(tp) ||
+	    /* We have out of order data. */
+	    (ofo_possible &&
+	     skb_peek(&tp->out_of_order_queue) != NULL)) {
+		/* Then ack it now */
+		tcp_send_ack(sk);
+	} else {
+		/* Else, send delayed ack. */
+		tcp_send_delayed_ack(sk);
+	}
+#endif
+}
+
+static __inline__ void tcp_ack_snd_check(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	if (!tcp_ack_scheduled(tp)) {
+		/* We sent a data segment already. */
+		return;
+	}
+	__tcp_ack_snd_check(sk, 1);
+#endif
+}
+
+/*
+ *	This routine is only called when we have urgent data
+ *	signalled. Its the 'slow' part of tcp_urg. It could be
+ *	moved inline now as tcp_urg is only called from one
+ *	place. We handle URGent data wrong. We have to - as
+ *	BSD still doesn't use the correction from RFC961.
+ *	For 1003.1g we should support a new option TCP_STDURG to permit
+ *	either form (or just set the sysctl tcp_stdurg).
+ */
+ 
+static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	u32 ptr = ntohs(th->urg_ptr);
+
+	if (ptr && !sysctl_tcp_stdurg)
+		ptr--;
+	ptr += ntohl(th->seq);
+
+	/* Ignore urgent data that we've already seen and read. */
+	if (after(tp->copied_seq, ptr))
+		return;
+
+	/* Do not replay urg ptr.
+	 *
+	 * NOTE: interesting situation not covered by specs.
+	 * Misbehaving sender may send urg ptr, pointing to segment,
+	 * which we already have in ofo queue. We are not able to fetch
+	 * such data and will stay in TCP_URG_NOTYET until will be eaten
+	 * by recvmsg(). Seems, we are not obliged to handle such wicked
+	 * situations. But it is worth to think about possibility of some
+	 * DoSes using some hypothetical application level deadlock.
+	 */
+	if (before(ptr, tp->rcv_nxt))
+		return;
+
+	/* Do we already have a newer (or duplicate) urgent pointer? */
+	if (tp->urg_data && !after(ptr, tp->urg_seq))
+		return;
+
+	/* Tell the world about our new urgent pointer. */
+	if (sk->proc != 0) {
+		if (sk->proc > 0)
+			kill_proc(sk->proc, SIGURG, 1);
+		else
+			kill_pg(-sk->proc, SIGURG, 1);
+		sk_wake_async(sk, 3, POLL_PRI);
+	}
+
+	/* We may be adding urgent data when the last byte read was
+	 * urgent. To do this requires some care. We cannot just ignore
+	 * tp->copied_seq since we would read the last urgent byte again
+	 * as data, nor can we alter copied_seq until this data arrives
+	 * or we break the sematics of SIOCATMARK (and thus sockatmark())
+	 *
+	 * NOTE. Double Dutch. Rendering to plain English: author of comment
+	 * above did something sort of 	send("A", MSG_OOB); send("B", MSG_OOB);
+	 * and expect that both A and B disappear from stream. This is _wrong_.
+	 * Though this happens in BSD with high probability, this is occasional.
+	 * Any application relying on this is buggy. Note also, that fix "works"
+	 * only in this artificial test. Insert some normal data between A and B and we will
+	 * decline of BSD again. Verdict: it is better to remove to trap
+	 * buggy users.
+	 */
+	if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
+	    !sk->urginline &&
+	    tp->copied_seq != tp->rcv_nxt) {
+		struct sk_buff *skb = skb_peek(&sk->receive_queue);
+		tp->copied_seq++;
+		if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
+			__skb_unlink(skb, skb->list);
+			__kfree_skb(skb);
+		}
+	}
+
+	tp->urg_data = TCP_URG_NOTYET;
+	tp->urg_seq = ptr;
+
+	/* Disable header prediction. */
+	tp->pred_flags = 0;
+#endif
+}
+
+/* This is the 'fast' part of urgent handling. */
+static inline void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	/* Check if we get a new urgent pointer - normally not. */
+	if (th->urg)
+		tcp_check_urg(sk,th);
+
+	/* Do we wait for any urgent data? - normally not... */
+	if (tp->urg_data == TCP_URG_NOTYET) {
+		u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff*4) - th->syn;
+
+		/* Is the urgent pointer pointing into this packet? */	 
+		if (ptr < skb->len) {
+			u8 tmp;
+			if (skb_copy_bits(skb, ptr, &tmp, 1))
+				BUG();
+			tp->urg_data = TCP_URG_VALID | tmp;
+			if (!sk->dead)
+				sk->data_ready(sk,0);
+		}
+	}
+#endif
+}
+
+static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	int chunk = skb->len - hlen;
+	int err;
+
+	local_bh_enable();
+	if (skb->ip_summed==CHECKSUM_UNNECESSARY)
+		err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk);
+	else
+		err = skb_copy_and_csum_datagram_iovec(skb, hlen, tp->ucopy.iov);
+
+	if (!err) {
+		tp->ucopy.len -= chunk;
+		tp->copied_seq += chunk;
+	}
+
+	local_bh_disable();
+	return err;
+#else
+  return 0;
+#endif
+}
+
+static int __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
+{
+#if 0
+	int result;
+
+	if (sk->lock.users) {
+		local_bh_enable();
+		result = __tcp_checksum_complete(skb);
+		local_bh_disable();
+	} else {
+		result = __tcp_checksum_complete(skb);
+	}
+	return result;
+#else
+  return 0;
+#endif
+}
+
+static __inline__ int
+tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
+{
+#if 0
+	return skb->ip_summed != CHECKSUM_UNNECESSARY &&
+		__tcp_checksum_complete_user(sk, skb);
+#else
+  return 0;
+#endif
+}
+
+/*
+ *	TCP receive function for the ESTABLISHED state. 
+ *
+ *	It is split into a fast path and a slow path. The fast path is 
+ * 	disabled when:
+ *	- A zero window was announced from us - zero window probing
+ *        is only handled properly in the slow path. 
+ *	- Out of order segments arrived.
+ *	- Urgent data is expected.
+ *	- There is no buffer space left
+ *	- Unexpected TCP flags/window values/header lengths are received
+ *	  (detected by checking the TCP header against pred_flags) 
+ *	- Data is sent in both directions. Fast path only supports pure senders
+ *	  or pure receivers (this means either the sequence number or the ack
+ *	  value must stay constant)
+ *	- Unexpected TCP option.
+ *
+ *	When these conditions are not satisfied it drops into a standard 
+ *	receive procedure patterned after RFC793 to handle all cases.
+ *	The first three cases are guaranteed by proper pred_flags setting,
+ *	the rest is checked inline. Fast processing is turned on in 
+ *	tcp_data_queue when everything is OK.
+ */
+int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
+			struct tcphdr *th, unsigned len)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	/*
+	 *	Header prediction.
+	 *	The code losely follows the one in the famous 
+	 *	"30 instruction TCP receive" Van Jacobson mail.
+	 *	
+	 *	Van's trick is to deposit buffers into socket queue 
+	 *	on a device interrupt, to call tcp_recv function
+	 *	on the receive process context and checksum and copy
+	 *	the buffer to user space. smart...
+	 *
+	 *	Our current scheme is not silly either but we take the 
+	 *	extra cost of the net_bh soft interrupt processing...
+	 *	We do checksum and copy also but from device to kernel.
+	 */
+
+	tp->saw_tstamp = 0;
+
+	/*	pred_flags is 0xS?10 << 16 + snd_wnd
+	 *	if header_predition is to be made
+	 *	'S' will always be tp->tcp_header_len >> 2
+	 *	'?' will be 0 for the fast path, otherwise pred_flags is 0 to
+	 *  turn it off	(when there are holes in the receive 
+	 *	 space for instance)
+	 *	PSH flag is ignored.
+	 */
+
+	if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
+		TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
+		int tcp_header_len = tp->tcp_header_len;
+
+		/* Timestamp header prediction: tcp_header_len
+		 * is automatically equal to th->doff*4 due to pred_flags
+		 * match.
+		 */
+
+		/* Check timestamp */
+		if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
+			__u32 *ptr = (__u32 *)(th + 1);
+
+			/* No? Slow path! */
+			if (*ptr != ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
+					   | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP))
+				goto slow_path;
+
+			tp->saw_tstamp = 1;
+			++ptr; 
+			tp->rcv_tsval = ntohl(*ptr);
+			++ptr;
+			tp->rcv_tsecr = ntohl(*ptr);
+
+			/* If PAWS failed, check it more carefully in slow path */
+			if ((s32)(tp->rcv_tsval - tp->ts_recent) < 0)
+				goto slow_path;
+
+			/* DO NOT update ts_recent here, if checksum fails
+			 * and timestamp was corrupted part, it will result
+			 * in a hung connection since we will drop all
+			 * future packets due to the PAWS test.
+			 */
+		}
+
+		if (len <= tcp_header_len) {
+			/* Bulk data transfer: sender */
+			if (len == tcp_header_len) {
+				/* Predicted packet is in window by definition.
+				 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
+				 * Hence, check seq<=rcv_wup reduces to:
+				 */
+				if (tcp_header_len ==
+				    (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
+				    tp->rcv_nxt == tp->rcv_wup)
+					tcp_store_ts_recent(tp);
+				/* We know that such packets are checksummed
+				 * on entry.
+				 */
+				tcp_ack(sk, skb, 0);
+				__kfree_skb(skb); 
+				tcp_data_snd_check(sk);
+				return 0;
+			} else { /* Header too small */
+				TCP_INC_STATS_BH(TcpInErrs);
+				goto discard;
+			}
+		} else {
+			int eaten = 0;
+
+			if (tp->ucopy.task == current &&
+			    tp->copied_seq == tp->rcv_nxt &&
+			    len - tcp_header_len <= tp->ucopy.len &&
+			    sk->lock.users) {
+				__set_current_state(TASK_RUNNING);
+
+				if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) {
+					/* Predicted packet is in window by definition.
+					 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
+					 * Hence, check seq<=rcv_wup reduces to:
+					 */
+					if (tcp_header_len ==
+					    (sizeof(struct tcphdr) +
+					     TCPOLEN_TSTAMP_ALIGNED) &&
+					    tp->rcv_nxt == tp->rcv_wup)
+						tcp_store_ts_recent(tp);
+
+					__skb_pull(skb, tcp_header_len);
+					tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+					NET_INC_STATS_BH(TCPHPHitsToUser);
+					eaten = 1;
+				}
+			}
+			if (!eaten) {
+				if (tcp_checksum_complete_user(sk, skb))
+					goto csum_error;
+
+				/* Predicted packet is in window by definition.
+				 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
+				 * Hence, check seq<=rcv_wup reduces to:
+				 */
+				if (tcp_header_len ==
+				    (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
+				    tp->rcv_nxt == tp->rcv_wup)
+					tcp_store_ts_recent(tp);
+
+				if ((int)skb->truesize > sk->forward_alloc)
+					goto step5;
+
+				NET_INC_STATS_BH(TCPHPHits);
+
+				/* Bulk data transfer: receiver */
+				__skb_pull(skb,tcp_header_len);
+				__skb_queue_tail(&sk->receive_queue, skb);
+				tcp_set_owner_r(skb, sk);
+				tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+			}
+
+			tcp_event_data_recv(sk, tp, skb);
+
+			if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
+				/* Well, only one small jumplet in fast path... */
+				tcp_ack(sk, skb, FLAG_DATA);
+				tcp_data_snd_check(sk);
+				if (!tcp_ack_scheduled(tp))
+					goto no_ack;
+			}
+
+			if (eaten) {
+				if (tcp_in_quickack_mode(tp)) {
+					tcp_send_ack(sk);
+				} else {
+					tcp_send_delayed_ack(sk);
+				}
+			} else {
+				__tcp_ack_snd_check(sk, 0);
+			}
+
+no_ack:
+			if (eaten)
+				__kfree_skb(skb);
+			else
+				sk->data_ready(sk, 0);
+			return 0;
+		}
+	}
+
+slow_path:
+	if (len < (th->doff<<2) || tcp_checksum_complete_user(sk, skb))
+		goto csum_error;
+
+	/*
+	 * RFC1323: H1. Apply PAWS check first.
+	 */
+	if (tcp_fast_parse_options(skb, th, tp) && tp->saw_tstamp &&
+	    tcp_paws_discard(tp, skb)) {
+		if (!th->rst) {
+			NET_INC_STATS_BH(PAWSEstabRejected);
+			tcp_send_dupack(sk, skb);
+			goto discard;
+		}
+		/* Resets are accepted even if PAWS failed.
+
+		   ts_recent update must be made after we are sure
+		   that the packet is in window.
+		 */
+	}
+
+	/*
+	 *	Standard slow path.
+	 */
+
+	if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
+		/* RFC793, page 37: "In all states except SYN-SENT, all reset
+		 * (RST) segments are validated by checking their SEQ-fields."
+		 * And page 69: "If an incoming segment is not acceptable,
+		 * an acknowledgment should be sent in reply (unless the RST bit
+		 * is set, if so drop the segment and return)".
+		 */
+		if (!th->rst)
+			tcp_send_dupack(sk, skb);
+		goto discard;
+	}
+
+	if(th->rst) {
+		tcp_reset(sk);
+		goto discard;
+	}
+
+	tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
+
+	if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
+		TCP_INC_STATS_BH(TcpInErrs);
+		NET_INC_STATS_BH(TCPAbortOnSyn);
+		tcp_reset(sk);
+		return 1;
+	}
+
+step5:
+	if(th->ack)
+		tcp_ack(sk, skb, FLAG_SLOWPATH);
+
+	/* Process urgent data. */
+	tcp_urg(sk, skb, th);
+
+	/* step 7: process the segment text */
+	tcp_data_queue(sk, skb);
+
+	tcp_data_snd_check(sk);
+	tcp_ack_snd_check(sk);
+	return 0;
+
+csum_error:
+	TCP_INC_STATS_BH(TcpInErrs);
+
+discard:
+	__kfree_skb(skb);
+	return 0;
+#else
+  return 0;
+#endif
+}
+
+static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
+					 struct tcphdr *th, unsigned len)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	int saved_clamp = tp->mss_clamp;
+
+	tcp_parse_options(skb, tp, 0);
+
+	if (th->ack) {
+		/* rfc793:
+		 * "If the state is SYN-SENT then
+		 *    first check the ACK bit
+		 *      If the ACK bit is set
+		 *	  If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send
+		 *        a reset (unless the RST bit is set, if so drop
+		 *        the segment and return)"
+		 *
+		 *  We do not send data with SYN, so that RFC-correct
+		 *  test reduces to:
+		 */
+		if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
+			goto reset_and_undo;
+
+		if (tp->saw_tstamp && tp->rcv_tsecr &&
+		    !between(tp->rcv_tsecr, tp->retrans_stamp, tcp_time_stamp)) {
+			NET_INC_STATS_BH(PAWSActiveRejected);
+			goto reset_and_undo;
+		}
+
+		/* Now ACK is acceptable.
+		 *
+		 * "If the RST bit is set
+		 *    If the ACK was acceptable then signal the user "error:
+		 *    connection reset", drop the segment, enter CLOSED state,
+		 *    delete TCB, and return."
+		 */
+
+		if (th->rst) {
+			tcp_reset(sk);
+			goto discard;
+		}
+
+		/* rfc793:
+		 *   "fifth, if neither of the SYN or RST bits is set then
+		 *    drop the segment and return."
+		 *
+		 *    See note below!
+		 *                                        --ANK(990513)
+		 */
+		if (!th->syn)
+			goto discard_and_undo;
+
+		/* rfc793:
+		 *   "If the SYN bit is on ...
+		 *    are acceptable then ...
+		 *    (our SYN has been ACKed), change the connection
+		 *    state to ESTABLISHED..."
+		 */
+
+		TCP_ECN_rcv_synack(tp, th);
+
+		tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
+		tcp_ack(sk, skb, FLAG_SLOWPATH);
+
+		/* Ok.. it's good. Set up sequence numbers and
+		 * move to established.
+		 */
+		tp->rcv_nxt = TCP_SKB_CB(skb)->seq+1;
+		tp->rcv_wup = TCP_SKB_CB(skb)->seq+1;
+
+		/* RFC1323: The window in SYN & SYN/ACK segments is
+		 * never scaled.
+		 */
+		tp->snd_wnd = ntohs(th->window);
+		tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq);
+
+		if (tp->wscale_ok == 0) {
+			tp->snd_wscale = tp->rcv_wscale = 0;
+			tp->window_clamp = min(tp->window_clamp, 65535U);
+		}
+
+		if (tp->saw_tstamp) {
+			tp->tstamp_ok = 1;
+			tp->tcp_header_len =
+				sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
+			tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
+			tcp_store_ts_recent(tp);
+		} else {
+			tp->tcp_header_len = sizeof(struct tcphdr);
+		}
+
+		if (tp->sack_ok && sysctl_tcp_fack)
+			tp->sack_ok |= 2;
+
+		tcp_sync_mss(sk, tp->pmtu_cookie);
+		tcp_initialize_rcv_mss(sk);
+		tcp_init_metrics(sk);
+		tcp_init_buffer_space(sk);
+
+		if (sk->keepopen)
+			tcp_reset_keepalive_timer(sk, keepalive_time_when(tp));
+
+		if (tp->snd_wscale == 0)
+			__tcp_fast_path_on(tp, tp->snd_wnd);
+		else
+			tp->pred_flags = 0;
+
+		/* Remember, tcp_poll() does not lock socket!
+		 * Change state from SYN-SENT only after copied_seq
+		 * is initialized. */
+		tp->copied_seq = tp->rcv_nxt;
+		mb();
+		tcp_set_state(sk, TCP_ESTABLISHED);
+
+		if(!sk->dead) {
+			sk->state_change(sk);
+			sk_wake_async(sk, 0, POLL_OUT);
+		}
+
+		if (tp->write_pending || tp->defer_accept || tp->ack.pingpong) {
+			/* Save one ACK. Data will be ready after
+			 * several ticks, if write_pending is set.
+			 *
+			 * It may be deleted, but with this feature tcpdumps
+			 * look so _wonderfully_ clever, that I was not able
+			 * to stand against the temptation 8)     --ANK
+			 */
+			tcp_schedule_ack(tp);
+			tp->ack.lrcvtime = tcp_time_stamp;
+			tp->ack.ato = TCP_ATO_MIN;
+			tcp_incr_quickack(tp);
+			tcp_enter_quickack_mode(tp);
+			tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX);
+
+discard:
+			__kfree_skb(skb);
+			return 0;
+		} else {
+			tcp_send_ack(sk);
+		}
+		return -1;
+	}
+
+	/* No ACK in the segment */
+
+	if (th->rst) {
+		/* rfc793:
+		 * "If the RST bit is set
+		 *
+		 *      Otherwise (no ACK) drop the segment and return."
+		 */
+
+		goto discard_and_undo;
+	}
+
+	/* PAWS check. */
+	if (tp->ts_recent_stamp && tp->saw_tstamp && tcp_paws_check(tp, 0))
+		goto discard_and_undo;
+
+	if (th->syn) {
+		/* We see SYN without ACK. It is attempt of
+		 * simultaneous connect with crossed SYNs.
+		 * Particularly, it can be connect to self.
+		 */
+		tcp_set_state(sk, TCP_SYN_RECV);
+
+		if (tp->saw_tstamp) {
+			tp->tstamp_ok = 1;
+			tcp_store_ts_recent(tp);
+			tp->tcp_header_len =
+				sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
+		} else {
+			tp->tcp_header_len = sizeof(struct tcphdr);
+		}
+
+		tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
+		tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
+
+		/* RFC1323: The window in SYN & SYN/ACK segments is
+		 * never scaled.
+		 */
+		tp->snd_wnd = ntohs(th->window);
+		tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
+		tp->max_window = tp->snd_wnd;
+
+		tcp_sync_mss(sk, tp->pmtu_cookie);
+		tcp_initialize_rcv_mss(sk);
+
+		TCP_ECN_rcv_syn(tp, th);
+
+		tcp_send_synack(sk);
+#if 0
+		/* Note, we could accept data and URG from this segment.
+		 * There are no obstacles to make this.
+		 *
+		 * However, if we ignore data in ACKless segments sometimes,
+		 * we have no reasons to accept it sometimes.
+		 * Also, seems the code doing it in step6 of tcp_rcv_state_process
+		 * is not flawless. So, discard packet for sanity.
+		 * Uncomment this return to process the data.
+		 */
+		return -1;
+#else
+		goto discard;
+#endif
+	}
+	/* "fifth, if neither of the SYN or RST bits is set then
+	 * drop the segment and return."
+	 */
+
+discard_and_undo:
+	tcp_clear_options(tp);
+	tp->mss_clamp = saved_clamp;
+	goto discard;
+
+reset_and_undo:
+	tcp_clear_options(tp);
+	tp->mss_clamp = saved_clamp;
+	return 1;
+#else
+  return 0;
+#endif
+}
+
+
+/*
+ *	This function implements the receiving procedure of RFC 793 for
+ *	all states except ESTABLISHED and TIME_WAIT. 
+ *	It's called from both tcp_v4_rcv and tcp_v6_rcv and should be
+ *	address independent.
+ */
+	
+int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
+			  struct tcphdr *th, unsigned len)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	int queued = 0;
+
+	tp->saw_tstamp = 0;
+
+	switch (sk->state) {
+	case TCP_CLOSE:
+		goto discard;
+
+	case TCP_LISTEN:
+		if(th->ack)
+			return 1;
+
+		if(th->rst)
+			goto discard;
+
+		if(th->syn) {
+			if(tp->af_specific->conn_request(sk, skb) < 0)
+				return 1;
+
+			/* Now we have several options: In theory there is 
+			 * nothing else in the frame. KA9Q has an option to 
+			 * send data with the syn, BSD accepts data with the
+			 * syn up to the [to be] advertised window and 
+			 * Solaris 2.1 gives you a protocol error. For now 
+			 * we just ignore it, that fits the spec precisely 
+			 * and avoids incompatibilities. It would be nice in
+			 * future to drop through and process the data.
+			 *
+			 * Now that TTCP is starting to be used we ought to 
+			 * queue this data.
+			 * But, this leaves one open to an easy denial of
+		 	 * service attack, and SYN cookies can't defend
+			 * against this problem. So, we drop the data
+			 * in the interest of security over speed.
+			 */
+			goto discard;
+		}
+		goto discard;
+
+	case TCP_SYN_SENT:
+		queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
+		if (queued >= 0)
+			return queued;
+
+		/* Do step6 onward by hand. */
+		tcp_urg(sk, skb, th);
+		__kfree_skb(skb);
+		tcp_data_snd_check(sk);
+		return 0;
+	}
+
+	if (tcp_fast_parse_options(skb, th, tp) && tp->saw_tstamp &&
+	    tcp_paws_discard(tp, skb)) {
+		if (!th->rst) {
+			NET_INC_STATS_BH(PAWSEstabRejected);
+			tcp_send_dupack(sk, skb);
+			goto discard;
+		}
+		/* Reset is accepted even if it did not pass PAWS. */
+	}
+
+	/* step 1: check sequence number */
+	if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
+		if (!th->rst)
+			tcp_send_dupack(sk, skb);
+		goto discard;
+	}
+
+	/* step 2: check RST bit */
+	if(th->rst) {
+		tcp_reset(sk);
+		goto discard;
+	}
+
+	tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
+
+	/* step 3: check security and precedence [ignored] */
+
+	/*	step 4:
+	 *
+	 *	Check for a SYN in window.
+	 */
+	if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
+		NET_INC_STATS_BH(TCPAbortOnSyn);
+		tcp_reset(sk);
+		return 1;
+	}
+
+	/* step 5: check the ACK field */
+	if (th->ack) {
+		int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH);
+
+		switch(sk->state) {
+		case TCP_SYN_RECV:
+			if (acceptable) {
+				tp->copied_seq = tp->rcv_nxt;
+				mb();
+				tcp_set_state(sk, TCP_ESTABLISHED);
+				sk->state_change(sk);
+
+				/* Note, that this wakeup is only for marginal
+				 * crossed SYN case. Passively open sockets
+				 * are not waked up, because sk->sleep == NULL
+				 * and sk->socket == NULL.
+				 */
+				if (sk->socket) {
+					sk_wake_async(sk,0,POLL_OUT);
+				}
+
+				tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
+				tp->snd_wnd = ntohs(th->window) << tp->snd_wscale;
+				tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq);
+
+				/* tcp_ack considers this ACK as duplicate
+				 * and does not calculate rtt.
+				 * Fix it at least with timestamps.
+				 */
+				if (tp->saw_tstamp && tp->rcv_tsecr && !tp->srtt)
+					tcp_ack_saw_tstamp(tp, 0);
+
+				if (tp->tstamp_ok)
+					tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
+
+				tcp_init_metrics(sk);
+				tcp_initialize_rcv_mss(sk);
+				tcp_init_buffer_space(sk);
+				tcp_fast_path_on(tp);
+			} else {
+				return 1;
+			}
+			break;
+
+		case TCP_FIN_WAIT1:
+			if (tp->snd_una == tp->write_seq) {
+				tcp_set_state(sk, TCP_FIN_WAIT2);
+				sk->shutdown |= SEND_SHUTDOWN;
+				dst_confirm(sk->dst_cache);
+
+				if (!sk->dead) {
+					/* Wake up lingering close() */
+					sk->state_change(sk);
+				} else {
+					int tmo;
+
+					if (tp->linger2 < 0 ||
+					    (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
+					     after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
+						tcp_done(sk);
+						NET_INC_STATS_BH(TCPAbortOnData);
+						return 1;
+					}
+
+					tmo = tcp_fin_time(tp);
+					if (tmo > TCP_TIMEWAIT_LEN) {
+						tcp_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
+					} else if (th->fin || sk->lock.users) {
+						/* Bad case. We could lose such FIN otherwise.
+						 * It is not a big problem, but it looks confusing
+						 * and not so rare event. We still can lose it now,
+						 * if it spins in bh_lock_sock(), but it is really
+						 * marginal case.
+						 */
+						tcp_reset_keepalive_timer(sk, tmo);
+					} else {
+						tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
+						goto discard;
+					}
+				}
+			}
+			break;
+
+		case TCP_CLOSING:
+			if (tp->snd_una == tp->write_seq) {
+				tcp_time_wait(sk, TCP_TIME_WAIT, 0);
+				goto discard;
+			}
+			break;
+
+		case TCP_LAST_ACK:
+			if (tp->snd_una == tp->write_seq) {
+				tcp_update_metrics(sk);
+				tcp_done(sk);
+				goto discard;
+			}
+			break;
+		}
+	} else
+		goto discard;
+
+	/* step 6: check the URG bit */
+	tcp_urg(sk, skb, th);
+
+	/* step 7: process the segment text */
+	switch (sk->state) {
+	case TCP_CLOSE_WAIT:
+	case TCP_CLOSING:
+	case TCP_LAST_ACK:
+		if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
+			break;
+	case TCP_FIN_WAIT1:
+	case TCP_FIN_WAIT2:
+		/* RFC 793 says to queue data in these states,
+		 * RFC 1122 says we MUST send a reset. 
+		 * BSD 4.4 also does reset.
+		 */
+		if (sk->shutdown & RCV_SHUTDOWN) {
+			if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
+			    after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
+				NET_INC_STATS_BH(TCPAbortOnData);
+				tcp_reset(sk);
+				return 1;
+			}
+		}
+		/* Fall through */
+	case TCP_ESTABLISHED: 
+		tcp_data_queue(sk, skb);
+		queued = 1;
+		break;
+	}
+
+	/* tcp_data could move socket to TIME-WAIT */
+	if (sk->state != TCP_CLOSE) {
+		tcp_data_snd_check(sk);
+		tcp_ack_snd_check(sk);
+	}
+
+	if (!queued) { 
+discard:
+		__kfree_skb(skb);
+	}
+	return 0;
+#else
+  return 0;
+#endif
+}
diff --git a/reactos/drivers/net/tcpip/transport/tcp/tcp_ipv4.c b/reactos/drivers/net/tcpip/transport/tcp/tcp_ipv4.c
new file mode 100755
index 00000000000..cf45c02046b
--- /dev/null
+++ b/reactos/drivers/net/tcpip/transport/tcp/tcp_ipv4.c
@@ -0,0 +1,2523 @@
+/*
+ * COPYRIGHT:   See COPYING in the top level directory
+ * PROJECT:     ReactOS TCP/IP protocol driver
+ * FILE:        transport/tcp/tcp_ipv4.c
+ * PURPOSE:     Transmission Control Protocol
+ * PROGRAMMERS: Casper S. Hornstrup (chorns@users.sourceforge.net)
+ * REVISIONS:
+ *   CSH 15-01-2003 Imported from linux kernel 2.4.20
+ */
+
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Implementation of the Transmission Control Protocol(TCP).
+ *
+ * Version:	$Id: tcp_ipv4.c,v 1.1 2003/01/15 21:57:31 chorns Exp $
+ *
+ *		IPv4 specific functions
+ *
+ *
+ *		code split from:
+ *		linux/ipv4/tcp.c
+ *		linux/ipv4/tcp_input.c
+ *		linux/ipv4/tcp_output.c
+ *
+ *		See tcp.c for author information
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * Changes:
+ *		David S. Miller	:	New socket lookup architecture.
+ *					This code is dedicated to John Dyson.
+ *		David S. Miller :	Change semantics of established hash,
+ *					half is devoted to TIME_WAIT sockets
+ *					and the rest go in the other half.
+ *		Andi Kleen :		Add support for syncookies and fixed
+ *					some bugs: ip options weren't passed to
+ *					the TCP layer, missed a check for an ACK bit.
+ *		Andi Kleen :		Implemented fast path mtu discovery.
+ *	     				Fixed many serious bugs in the
+ *					open_request handling and moved
+ *					most of it into the af independent code.
+ *					Added tail drop and some other bugfixes.
+ *					Added new listen sematics.
+ *		Mike McLagan	:	Routing by source
+ *	Juan Jose Ciarlante:		ip_dynaddr bits
+ *		Andi Kleen:		various fixes.
+ *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
+ *	Andi Kleen		:	Fix new listen.
+ *	Andi Kleen		:	Fix accept error reporting.
+ */
+
+#if 0
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/random.h>
+#include <linux/cache.h>
+#include <linux/init.h>
+
+#include <net/icmp.h>
+#include <net/tcp.h>
+#include <net/ipv6.h>
+#include <net/inet_common.h>
+
+#include <linux/inet.h>
+#include <linux/stddef.h>
+#include <linux/ipsec.h>
+#else
+#include "linux.h"
+#include "tcpcore.h"
+#endif
+
+extern int sysctl_ip_dynaddr;
+extern int sysctl_ip_default_ttl;
+int sysctl_tcp_tw_reuse = 0;
+
+/* Check TCP sequence numbers in ICMP packets. */
+#define ICMP_MIN_LENGTH 8
+
+/* Socket used for sending RSTs */ 	
+#if 0
+static struct inode tcp_inode;
+static struct socket *tcp_socket=&tcp_inode.u.socket_i;
+#endif
+
+void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, 
+		       struct sk_buff *skb);
+
+/*
+ * ALL members must be initialised to prevent gcc-2.7.2.3 miscompilation
+ */
+#if 0
+struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = {
+	__tcp_ehash:          NULL,
+	__tcp_bhash:          NULL,
+	__tcp_bhash_size:     0,
+	__tcp_ehash_size:     0,
+	__tcp_listening_hash: { NULL, },
+	__tcp_lhash_lock:     RW_LOCK_UNLOCKED,
+	__tcp_lhash_users:    ATOMIC_INIT(0),
+	__tcp_lhash_wait:
+	  __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.__tcp_lhash_wait),
+	__tcp_portalloc_lock: SPIN_LOCK_UNLOCKED
+};
+#endif
+
+/*
+ * This array holds the first and last local port number.
+ * For high-usage systems, use sysctl to change this to
+ * 32768-61000
+ */
+int sysctl_local_port_range[2] = { 1024, 4999 };
+int tcp_port_rover = (1024 - 1);
+
+static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport,
+				 __u32 faddr, __u16 fport)
+{
+	int h = ((laddr ^ lport) ^ (faddr ^ fport));
+	h ^= h>>16;
+	h ^= h>>8;
+	return h & (tcp_ehash_size - 1);
+}
+
+static __inline__ int tcp_sk_hashfn(struct sock *sk)
+{
+	__u32 laddr = sk->rcv_saddr;
+	__u16 lport = sk->num;
+	__u32 faddr = sk->daddr;
+	__u16 fport = sk->dport;
+
+	return tcp_hashfn(laddr, lport, faddr, fport);
+}
+
+/* Allocate and initialize a new TCP local port bind bucket.
+ * The bindhash mutex for snum's hash chain must be held here.
+ */
+struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
+					  unsigned short snum)
+{
+#if 0
+	struct tcp_bind_bucket *tb;
+
+	tb = kmem_cache_alloc(tcp_bucket_cachep, SLAB_ATOMIC);
+	if(tb != NULL) {
+		tb->port = snum;
+		tb->fastreuse = 0;
+		tb->owners = NULL;
+		if((tb->next = head->chain) != NULL)
+			tb->next->pprev = &tb->next;
+		head->chain = tb;
+		tb->pprev = &head->chain;
+	}
+	return tb;
+#else
+  return NULL;
+#endif
+}
+
+/* Caller must disable local BH processing. */
+static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child)
+{
+#if 0
+	struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(child->num)];
+	struct tcp_bind_bucket *tb;
+
+	spin_lock(&head->lock);
+	tb = (struct tcp_bind_bucket *)sk->prev;
+	if ((child->bind_next = tb->owners) != NULL)
+		tb->owners->bind_pprev = &child->bind_next;
+	tb->owners = child;
+	child->bind_pprev = &tb->owners;
+	child->prev = (struct sock *) tb;
+	spin_unlock(&head->lock);
+#endif
+}
+
+__inline__ void tcp_inherit_port(struct sock *sk, struct sock *child)
+{
+#if 0
+	local_bh_disable();
+	__tcp_inherit_port(sk, child);
+	local_bh_enable();
+#endif
+}
+
+static inline void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb, unsigned short snum)
+{
+#if 0
+	sk->num = snum;
+	if ((sk->bind_next = tb->owners) != NULL)
+		tb->owners->bind_pprev = &sk->bind_next;
+	tb->owners = sk;
+	sk->bind_pprev = &tb->owners;
+	sk->prev = (struct sock *) tb;
+#endif
+}
+
+static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb)
+{
+#if 0
+	struct sock *sk2 = tb->owners;
+	int sk_reuse = sk->reuse;
+	
+	for( ; sk2 != NULL; sk2 = sk2->bind_next) {
+		if (sk != sk2 &&
+		    sk2->reuse <= 1 &&
+		    sk->bound_dev_if == sk2->bound_dev_if) {
+			if (!sk_reuse	||
+			    !sk2->reuse	||
+			    sk2->state == TCP_LISTEN) {
+				if (!sk2->rcv_saddr	||
+				    !sk->rcv_saddr	||
+				    (sk2->rcv_saddr == sk->rcv_saddr))
+					break;
+			}
+		}
+	}
+	return sk2 != NULL;
+#else
+  return 0;
+#endif
+}
+
+/* Obtain a reference to a local port for the given sock,
+ * if snum is zero it means select any available local port.
+ */
+static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
+{
+#if 0
+	struct tcp_bind_hashbucket *head;
+	struct tcp_bind_bucket *tb;
+	int ret;
+
+	local_bh_disable();
+	if (snum == 0) {
+		int low = sysctl_local_port_range[0];
+		int high = sysctl_local_port_range[1];
+		int remaining = (high - low) + 1;
+		int rover;
+
+		spin_lock(&tcp_portalloc_lock);
+		rover = tcp_port_rover;
+		do {	rover++;
+			if ((rover < low) || (rover > high))
+				rover = low;
+			head = &tcp_bhash[tcp_bhashfn(rover)];
+			spin_lock(&head->lock);
+			for (tb = head->chain; tb; tb = tb->next)
+				if (tb->port == rover)
+					goto next;
+			break;
+		next:
+			spin_unlock(&head->lock);
+		} while (--remaining > 0);
+		tcp_port_rover = rover;
+		spin_unlock(&tcp_portalloc_lock);
+
+		/* Exhausted local port range during search? */
+		ret = 1;
+		if (remaining <= 0)
+			goto fail;
+
+		/* OK, here is the one we will use.  HEAD is
+		 * non-NULL and we hold it's mutex.
+		 */
+		snum = rover;
+		tb = NULL;
+	} else {
+		head = &tcp_bhash[tcp_bhashfn(snum)];
+		spin_lock(&head->lock);
+		for (tb = head->chain; tb != NULL; tb = tb->next)
+			if (tb->port == snum)
+				break;
+	}
+	if (tb != NULL && tb->owners != NULL) {
+		if (sk->reuse > 1)
+			goto success;
+		if (tb->fastreuse > 0 && sk->reuse != 0 && sk->state != TCP_LISTEN) {
+			goto success;
+		} else {
+			ret = 1;
+			if (tcp_bind_conflict(sk, tb))
+				goto fail_unlock;
+		}
+	}
+	ret = 1;
+	if (tb == NULL &&
+	    (tb = tcp_bucket_create(head, snum)) == NULL)
+			goto fail_unlock;
+	if (tb->owners == NULL) {
+		if (sk->reuse && sk->state != TCP_LISTEN)
+			tb->fastreuse = 1;
+		else
+			tb->fastreuse = 0;
+	} else if (tb->fastreuse &&
+		   ((sk->reuse == 0) || (sk->state == TCP_LISTEN)))
+		tb->fastreuse = 0;
+success:
+	if (sk->prev == NULL)
+		tcp_bind_hash(sk, tb, snum);
+	BUG_TRAP(sk->prev == (struct sock *) tb);
+ 	ret = 0;
+
+fail_unlock:
+	spin_unlock(&head->lock);
+fail:
+	local_bh_enable();
+	return ret;
+#else
+  return 0;
+#endif
+}
+
+/* Get rid of any references to a local port held by the
+ * given sock.
+ */
+__inline__ void __tcp_put_port(struct sock *sk)
+{
+#if 0
+	struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(sk->num)];
+	struct tcp_bind_bucket *tb;
+
+	spin_lock(&head->lock);
+	tb = (struct tcp_bind_bucket *) sk->prev;
+	if (sk->bind_next)
+		sk->bind_next->bind_pprev = sk->bind_pprev;
+	*(sk->bind_pprev) = sk->bind_next;
+	sk->prev = NULL;
+	sk->num = 0;
+	if (tb->owners == NULL) {
+		if (tb->next)
+			tb->next->pprev = tb->pprev;
+		*(tb->pprev) = tb->next;
+		kmem_cache_free(tcp_bucket_cachep, tb);
+	}
+	spin_unlock(&head->lock);
+#endif
+}
+
+void tcp_put_port(struct sock *sk)
+{
+#if 0
+	local_bh_disable();
+	__tcp_put_port(sk);
+	local_bh_enable();
+#endif
+}
+
+/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP.
+ * Look, when several writers sleep and reader wakes them up, all but one
+ * immediately hit write lock and grab all the cpus. Exclusive sleep solves
+ * this, _but_ remember, it adds useless work on UP machines (wake up each
+ * exclusive lock release). It should be ifdefed really.
+ */
+
+void tcp_listen_wlock(void)
+{
+#if 0
+	write_lock(&tcp_lhash_lock);
+
+	if (atomic_read(&tcp_lhash_users)) {
+		DECLARE_WAITQUEUE(wait, current);
+
+		add_wait_queue_exclusive(&tcp_lhash_wait, &wait);
+		for (;;) {
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			if (atomic_read(&tcp_lhash_users) == 0)
+				break;
+			write_unlock_bh(&tcp_lhash_lock);
+			schedule();
+			write_lock_bh(&tcp_lhash_lock);
+		}
+
+		__set_current_state(TASK_RUNNING);
+		remove_wait_queue(&tcp_lhash_wait, &wait);
+	}
+#endif
+}
+
+static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible)
+{
+#if 0
+	struct sock **skp;
+	rwlock_t *lock;
+
+	BUG_TRAP(sk->pprev==NULL);
+	if(listen_possible && sk->state == TCP_LISTEN) {
+		skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
+		lock = &tcp_lhash_lock;
+		tcp_listen_wlock();
+	} else {
+		skp = &tcp_ehash[(sk->hashent = tcp_sk_hashfn(sk))].chain;
+		lock = &tcp_ehash[sk->hashent].lock;
+		write_lock(lock);
+	}
+	if((sk->next = *skp) != NULL)
+		(*skp)->pprev = &sk->next;
+	*skp = sk;
+	sk->pprev = skp;
+	sock_prot_inc_use(sk->prot);
+	write_unlock(lock);
+	if (listen_possible && sk->state == TCP_LISTEN)
+		wake_up(&tcp_lhash_wait);
+#endif
+}
+
+static void tcp_v4_hash(struct sock *sk)
+{
+#if 0
+	if (sk->state != TCP_CLOSE) {
+		local_bh_disable();
+		__tcp_v4_hash(sk, 1);
+		local_bh_enable();
+	}
+#endif
+}
+
+void tcp_unhash(struct sock *sk)
+{
+#if 0
+	rwlock_t *lock;
+
+	if (!sk->pprev)
+		goto ende;
+
+	if (sk->state == TCP_LISTEN) {
+		local_bh_disable();
+		tcp_listen_wlock();
+		lock = &tcp_lhash_lock;
+	} else {
+		struct tcp_ehash_bucket *head = &tcp_ehash[sk->hashent];
+		lock = &head->lock;
+		write_lock_bh(&head->lock);
+	}
+
+	if(sk->pprev) {
+		if(sk->next)
+			sk->next->pprev = sk->pprev;
+		*sk->pprev = sk->next;
+		sk->pprev = NULL;
+		sock_prot_dec_use(sk->prot);
+	}
+	write_unlock_bh(lock);
+
+ ende:
+	if (sk->state == TCP_LISTEN)
+		wake_up(&tcp_lhash_wait);
+#endif
+}
+
+/* Don't inline this cruft.  Here are some nice properties to
+ * exploit here.  The BSD API does not allow a listening TCP
+ * to specify the remote port nor the remote address for the
+ * connection.  So always assume those are both wildcarded
+ * during the search since they can never be otherwise.
+ */
+static struct sock *__tcp_v4_lookup_listener(struct sock *sk, u32 daddr, unsigned short hnum, int dif)
+{
+#if 0
+	struct sock *result = NULL;
+	int score, hiscore;
+
+	hiscore=0;
+	for(; sk; sk = sk->next) {
+		if(sk->num == hnum) {
+			__u32 rcv_saddr = sk->rcv_saddr;
+
+			score = 1;
+			if(rcv_saddr) {
+				if (rcv_saddr != daddr)
+					continue;
+				score++;
+			}
+			if (sk->bound_dev_if) {
+				if (sk->bound_dev_if != dif)
+					continue;
+				score++;
+			}
+			if (score == 3)
+				return sk;
+			if (score > hiscore) {
+				hiscore = score;
+				result = sk;
+			}
+		}
+	}
+	return result;
+#else
+  return NULL;
+#endif
+}
+
+/* Optimize the common listener case. */
+__inline__ struct sock *tcp_v4_lookup_listener(u32 daddr, unsigned short hnum, int dif)
+{
+#if 0
+	struct sock *sk;
+
+	read_lock(&tcp_lhash_lock);
+	sk = tcp_listening_hash[tcp_lhashfn(hnum)];
+	if (sk) {
+		if (sk->num == hnum &&
+		    sk->next == NULL &&
+		    (!sk->rcv_saddr || sk->rcv_saddr == daddr) &&
+		    !sk->bound_dev_if)
+			goto sherry_cache;
+		sk = __tcp_v4_lookup_listener(sk, daddr, hnum, dif);
+	}
+	if (sk) {
+sherry_cache:
+		sock_hold(sk);
+	}
+	read_unlock(&tcp_lhash_lock);
+	return sk;
+#else
+  return NULL;
+#endif
+}
+
+/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
+ * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
+ *
+ * Local BH must be disabled here.
+ */
+
+static inline struct sock *__tcp_v4_lookup_established(u32 saddr, u16 sport,
+						       u32 daddr, u16 hnum, int dif)
+{
+#if 0
+	struct tcp_ehash_bucket *head;
+	TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
+	__u32 ports = TCP_COMBINED_PORTS(sport, hnum);
+	struct sock *sk;
+	int hash;
+
+	/* Optimize here for direct hit, only listening connections can
+	 * have wildcards anyways.
+	 */
+	hash = tcp_hashfn(daddr, hnum, saddr, sport);
+	head = &tcp_ehash[hash];
+	read_lock(&head->lock);
+	for(sk = head->chain; sk; sk = sk->next) {
+		if(TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif))
+			goto hit; /* You sunk my battleship! */
+	}
+
+	/* Must check for a TIME_WAIT'er before going to listener hash. */
+	for(sk = (head + tcp_ehash_size)->chain; sk; sk = sk->next)
+		if(TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif))
+			goto hit;
+	read_unlock(&head->lock);
+
+	return NULL;
+
+hit:
+	sock_hold(sk);
+	read_unlock(&head->lock);
+	return sk;
+#else
+  return NULL;
+#endif
+}
+
+static inline struct sock *__tcp_v4_lookup(u32 saddr, u16 sport,
+					   u32 daddr, u16 hnum, int dif)
+{
+#if 0
+	struct sock *sk;
+
+	sk = __tcp_v4_lookup_established(saddr, sport, daddr, hnum, dif);
+
+	if (sk)
+		return sk;
+		
+	return tcp_v4_lookup_listener(daddr, hnum, dif);
+#else
+  return NULL;
+#endif
+}
+
+__inline__ struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
+{
+#if 0
+	struct sock *sk;
+
+	local_bh_disable();
+	sk = __tcp_v4_lookup(saddr, sport, daddr, ntohs(dport), dif);
+	local_bh_enable();
+
+	return sk;
+#else
+  return NULL;
+#endif
+}
+
+static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
+{
+#if 0
+	return secure_tcp_sequence_number(skb->nh.iph->daddr,
+					  skb->nh.iph->saddr,
+					  skb->h.th->dest,
+					  skb->h.th->source);
+#else
+  return 0;
+#endif
+}
+
+/* called with local bh disabled */
+static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
+				      struct tcp_tw_bucket **twp)
+{
+#if 0
+	u32 daddr = sk->rcv_saddr;
+	u32 saddr = sk->daddr;
+	int dif = sk->bound_dev_if;
+	TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
+	__u32 ports = TCP_COMBINED_PORTS(sk->dport, lport);
+	int hash = tcp_hashfn(daddr, lport, saddr, sk->dport);
+	struct tcp_ehash_bucket *head = &tcp_ehash[hash];
+	struct sock *sk2, **skp;
+	struct tcp_tw_bucket *tw;
+
+	write_lock(&head->lock);
+
+	/* Check TIME-WAIT sockets first. */
+	for(skp = &(head + tcp_ehash_size)->chain; (sk2=*skp) != NULL;
+	    skp = &sk2->next) {
+		tw = (struct tcp_tw_bucket*)sk2;
+
+		if(TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif)) {
+			struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+			/* With PAWS, it is safe from the viewpoint
+			   of data integrity. Even without PAWS it
+			   is safe provided sequence spaces do not
+			   overlap i.e. at data rates <= 80Mbit/sec.
+
+			   Actually, the idea is close to VJ's one,
+			   only timestamp cache is held not per host,
+			   but per port pair and TW bucket is used
+			   as state holder.
+
+			   If TW bucket has been already destroyed we
+			   fall back to VJ's scheme and use initial
+			   timestamp retrieved from peer table.
+			 */
+			if (tw->ts_recent_stamp &&
+			    (!twp || (sysctl_tcp_tw_reuse &&
+				      xtime.tv_sec - tw->ts_recent_stamp > 1))) {
+				if ((tp->write_seq = tw->snd_nxt+65535+2) == 0)
+					tp->write_seq = 1;
+				tp->ts_recent = tw->ts_recent;
+				tp->ts_recent_stamp = tw->ts_recent_stamp;
+				sock_hold(sk2);
+				skp = &head->chain;
+				goto unique;
+			} else
+				goto not_unique;
+		}
+	}
+	tw = NULL;
+
+	/* And established part... */
+	for(skp = &head->chain; (sk2=*skp)!=NULL; skp = &sk2->next) {
+		if(TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif))
+			goto not_unique;
+	}
+
+unique:
+	/* Must record num and sport now. Otherwise we will see
+	 * in hash table socket with a funny identity. */
+	sk->num = lport;
+	sk->sport = htons(lport);
+	BUG_TRAP(sk->pprev==NULL);
+	if ((sk->next = *skp) != NULL)
+		(*skp)->pprev = &sk->next;
+
+	*skp = sk;
+	sk->pprev = skp;
+	sk->hashent = hash;
+	sock_prot_inc_use(sk->prot);
+	write_unlock(&head->lock);
+
+	if (twp) {
+		*twp = tw;
+		NET_INC_STATS_BH(TimeWaitRecycled);
+	} else if (tw) {
+		/* Silly. Should hash-dance instead... */
+		tcp_tw_deschedule(tw);
+		tcp_timewait_kill(tw);
+		NET_INC_STATS_BH(TimeWaitRecycled);
+
+		tcp_tw_put(tw);
+	}
+
+	return 0;
+
+not_unique:
+	write_unlock(&head->lock);
+	return -EADDRNOTAVAIL;
+#else
+  return 0;
+#endif
+}
+
+/*
+ * Bind a port for a connect operation and hash it.
+ */
+static int tcp_v4_hash_connect(struct sock *sk)
+{
+#if 0
+	unsigned short snum = sk->num;
+	struct tcp_bind_hashbucket *head;
+	struct tcp_bind_bucket *tb;
+
+	if (snum == 0) {
+		int rover;
+		int low = sysctl_local_port_range[0];
+		int high = sysctl_local_port_range[1];
+		int remaining = (high - low) + 1;
+		struct tcp_tw_bucket *tw = NULL;
+
+		local_bh_disable();
+
+		/* TODO. Actually it is not so bad idea to remove
+		 * tcp_portalloc_lock before next submission to Linus.
+		 * As soon as we touch this place at all it is time to think.
+		 *
+		 * Now it protects single _advisory_ variable tcp_port_rover,
+		 * hence it is mostly useless.
+		 * Code will work nicely if we just delete it, but
+		 * I am afraid in contented case it will work not better or
+		 * even worse: another cpu just will hit the same bucket
+		 * and spin there.
+		 * So some cpu salt could remove both contention and
+		 * memory pingpong. Any ideas how to do this in a nice way?
+		 */
+		spin_lock(&tcp_portalloc_lock);
+		rover = tcp_port_rover;
+
+		do {
+			rover++;
+			if ((rover < low) || (rover > high))
+				rover = low;
+			head = &tcp_bhash[tcp_bhashfn(rover)];
+			spin_lock(&head->lock);		
+
+			/* Does not bother with rcv_saddr checks,
+			 * because the established check is already
+			 * unique enough.
+			 */
+			for (tb = head->chain; tb; tb = tb->next) {
+				if (tb->port == rover) {
+					BUG_TRAP(tb->owners != NULL);
+					if (tb->fastreuse >= 0)
+						goto next_port;
+					if (!__tcp_v4_check_established(sk, rover, &tw))
+						goto ok;
+					goto next_port;
+				}
+			}
+
+			tb = tcp_bucket_create(head, rover);
+			if (!tb) {
+				spin_unlock(&head->lock);
+				break;
+			}
+			tb->fastreuse = -1;
+			goto ok;
+
+		next_port:
+			spin_unlock(&head->lock);
+		} while (--remaining > 0);
+		tcp_port_rover = rover;
+		spin_unlock(&tcp_portalloc_lock);
+
+		local_bh_enable();
+
+		return -EADDRNOTAVAIL;
+
+	ok:
+		/* All locks still held and bhs disabled */
+		tcp_port_rover = rover;
+		spin_unlock(&tcp_portalloc_lock);
+
+		tcp_bind_hash(sk, tb, rover);
+		if (!sk->pprev) {
+			sk->sport = htons(rover);
+			__tcp_v4_hash(sk, 0);
+		}
+		spin_unlock(&head->lock);
+
+		if (tw) {
+			tcp_tw_deschedule(tw);
+			tcp_timewait_kill(tw);
+			tcp_tw_put(tw);
+		}
+
+		local_bh_enable();
+		return 0;
+	}
+
+	head  = &tcp_bhash[tcp_bhashfn(snum)];
+	tb  = (struct tcp_bind_bucket *)sk->prev;
+	spin_lock_bh(&head->lock);
+	if (tb->owners == sk && sk->bind_next == NULL) {
+		__tcp_v4_hash(sk, 0);
+		spin_unlock_bh(&head->lock);
+		return 0;
+	} else {
+		int ret;
+		spin_unlock(&head->lock);
+		/* No definite answer... Walk to established hash table */
+		ret = __tcp_v4_check_established(sk, snum, NULL);
+		local_bh_enable();
+		return ret;
+	}
+#else
+  return 0;
+#endif
+}
+
+/* This will initiate an outgoing connection. */
+int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
+	struct rtable *rt;
+	u32 daddr, nexthop;
+	int tmp;
+	int err;
+
+	if (addr_len < sizeof(struct sockaddr_in))
+		return(-EINVAL);
+
+	if (usin->sin_family != AF_INET)
+		return(-EAFNOSUPPORT);
+
+	nexthop = daddr = usin->sin_addr.s_addr;
+	if (sk->protinfo.af_inet.opt && sk->protinfo.af_inet.opt->srr) {
+		if (daddr == 0)
+			return -EINVAL;
+		nexthop = sk->protinfo.af_inet.opt->faddr;
+	}
+
+	tmp = ip_route_connect(&rt, nexthop, sk->saddr,
+			       RT_CONN_FLAGS(sk), sk->bound_dev_if);
+	if (tmp < 0)
+		return tmp;
+
+	if (rt->rt_flags&(RTCF_MULTICAST|RTCF_BROADCAST)) {
+		ip_rt_put(rt);
+		return -ENETUNREACH;
+	}
+
+	__sk_dst_set(sk, &rt->u.dst);
+	sk->route_caps = rt->u.dst.dev->features;
+
+	if (!sk->protinfo.af_inet.opt || !sk->protinfo.af_inet.opt->srr)
+		daddr = rt->rt_dst;
+
+	if (!sk->saddr)
+		sk->saddr = rt->rt_src;
+	sk->rcv_saddr = sk->saddr;
+
+	if (tp->ts_recent_stamp && sk->daddr != daddr) {
+		/* Reset inherited state */
+		tp->ts_recent = 0;
+		tp->ts_recent_stamp = 0;
+		tp->write_seq = 0;
+	}
+
+	if (sysctl_tcp_tw_recycle &&
+	    !tp->ts_recent_stamp &&
+	    rt->rt_dst == daddr) {
+		struct inet_peer *peer = rt_get_peer(rt);
+
+		/* VJ's idea. We save last timestamp seen from
+		 * the destination in peer table, when entering state TIME-WAIT
+		 * and initialize ts_recent from it, when trying new connection.
+		 */
+
+		if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
+			tp->ts_recent_stamp = peer->tcp_ts_stamp;
+			tp->ts_recent = peer->tcp_ts;
+		}
+	}
+
+	sk->dport = usin->sin_port;
+	sk->daddr = daddr;
+
+	tp->ext_header_len = 0;
+	if (sk->protinfo.af_inet.opt)
+		tp->ext_header_len = sk->protinfo.af_inet.opt->optlen;
+
+	tp->mss_clamp = 536;
+
+	/* Socket identity is still unknown (sport may be zero).
+	 * However we set state to SYN-SENT and not releasing socket
+	 * lock select source port, enter ourselves into the hash tables and
+	 * complete initalization after this.
+	 */
+	tcp_set_state(sk, TCP_SYN_SENT);
+	err = tcp_v4_hash_connect(sk);
+	if (err)
+		goto failure;
+
+	if (!tp->write_seq)
+		tp->write_seq = secure_tcp_sequence_number(sk->saddr, sk->daddr,
+							   sk->sport, usin->sin_port);
+
+	sk->protinfo.af_inet.id = tp->write_seq^jiffies;
+
+	err = tcp_connect(sk);
+	if (err)
+		goto failure;
+
+	return 0;
+
+failure:
+	tcp_set_state(sk, TCP_CLOSE);
+	__sk_dst_reset(sk);
+	sk->route_caps = 0;
+	sk->dport = 0;
+	return err;
+#else
+  return 0;
+#endif
+}
+
+static __inline__ int tcp_v4_iif(struct sk_buff *skb)
+{
+#if 0
+	return ((struct rtable*)skb->dst)->rt_iif;
+#else
+  return 0;
+#endif
+}
+
+static __inline__ unsigned tcp_v4_synq_hash(u32 raddr, u16 rport)
+{
+#if 0
+	unsigned h = raddr ^ rport;
+	h ^= h>>16;
+	h ^= h>>8;
+	return h&(TCP_SYNQ_HSIZE-1);
+#else
+  return 0;
+#endif
+}
+
+static struct open_request *tcp_v4_search_req(struct tcp_opt *tp, 
+					      struct open_request ***prevp,
+					      __u16 rport,
+					      __u32 raddr, __u32 laddr)
+{
+#if 0
+	struct tcp_listen_opt *lopt = tp->listen_opt;
+	struct open_request *req, **prev;  
+
+	for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport)];
+	     (req = *prev) != NULL;
+	     prev = &req->dl_next) {
+		if (req->rmt_port == rport &&
+		    req->af.v4_req.rmt_addr == raddr &&
+		    req->af.v4_req.loc_addr == laddr &&
+		    TCP_INET_FAMILY(req->class->family)) {
+			BUG_TRAP(req->sk == NULL);
+			*prevp = prev;
+			return req; 
+		}
+	}
+
+	return NULL;
+#else
+  return NULL;
+#endif
+}
+
+static void tcp_v4_synq_add(struct sock *sk, struct open_request *req)
+{
+#if 0
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	struct tcp_listen_opt *lopt = tp->listen_opt;
+	unsigned h = tcp_v4_synq_hash(req->af.v4_req.rmt_addr, req->rmt_port);
+
+	req->expires = jiffies + TCP_TIMEOUT_INIT;
+	req->retrans = 0;
+	req->sk = NULL;
+	req->dl_next = lopt->syn_table[h];
+
+	write_lock(&tp->syn_wait_lock);
+	lopt->syn_table[h] = req;
+	write_unlock(&tp->syn_wait_lock);
+
+	tcp_synq_added(sk);
+#endif
+}
+
+
+/* 
+ * This routine does path mtu discovery as defined in RFC1191.
+ */
+static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *ip, unsigned mtu)
+{
+#if 0
+	struct dst_entry *dst;
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+
+	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
+	 * send out by Linux are always <576bytes so they should go through
+	 * unfragmented).
+	 */
+	if (sk->state == TCP_LISTEN)
+		return; 
+
+	/* We don't check in the destentry if pmtu discovery is forbidden
+	 * on this route. We just assume that no packet_to_big packets
+	 * are send back when pmtu discovery is not active.
+     	 * There is a small race when the user changes this flag in the
+	 * route, but I think that's acceptable.
+	 */
+	if ((dst = __sk_dst_check(sk, 0)) == NULL)
+		return;
+
+	ip_rt_update_pmtu(dst, mtu);
+
+	/* Something is about to be wrong... Remember soft error
+	 * for the case, if this connection will not able to recover.
+	 */
+	if (mtu < dst->pmtu && ip_dont_fragment(sk, dst))
+		sk->err_soft = EMSGSIZE;
+
+	if (sk->protinfo.af_inet.pmtudisc != IP_PMTUDISC_DONT &&
+	    tp->pmtu_cookie > dst->pmtu) {
+		tcp_sync_mss(sk, dst->pmtu);
+
+		/* Resend the TCP packet because it's  
+		 * clear that the old packet has been
+		 * dropped. This is the new "fast" path mtu
+		 * discovery.
+		 */
+		tcp_simple_retransmit(sk);
+	} /* else let the usual retransmit timer handle it */
+#endif
+}
+
+/*
+ * This routine is called by the ICMP module when it gets some
+ * sort of error condition.  If err < 0 then the socket should
+ * be closed and the error returned to the user.  If err > 0
+ * it's just the icmp type << 8 | icmp code.  After adjustment
+ * header points to the first 8 bytes of the tcp header.  We need
+ * to find the appropriate port.
+ *
+ * The locking strategy used here is very "optimistic". When
+ * someone else accesses the socket the ICMP is just dropped
+ * and for some paths there is no check at all.
+ * A more general error queue to queue errors for later handling
+ * is probably better.
+ *
+ */
+
+void tcp_v4_err(struct sk_buff *skb, u32 info)
+{
+#if 0
+	struct iphdr *iph = (struct iphdr*)skb->data;
+	struct tcphdr *th = (struct tcphdr*)(skb->data+(iph->ihl<<2));
+	struct tcp_opt *tp;
+	int type = skb->h.icmph->type;
+	int code = skb->h.icmph->code;
+	struct sock *sk;
+	__u32 seq;
+	int err;
+
+	if (skb->len < (iph->ihl << 2) + 8) {
+		ICMP_INC_STATS_BH(IcmpInErrors); 
+		return;
+	}
+
+	sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source, tcp_v4_iif(skb));
+	if (sk == NULL) {
+		ICMP_INC_STATS_BH(IcmpInErrors);
+		return;
+	}
+	if (sk->state == TCP_TIME_WAIT) {
+		tcp_tw_put((struct tcp_tw_bucket*)sk);
+		return;
+	}
+
+	bh_lock_sock(sk);
+	/* If too many ICMPs get dropped on busy
+	 * servers this needs to be solved differently.
+	 */
+	if (sk->lock.users != 0)
+		NET_INC_STATS_BH(LockDroppedIcmps);
+
+	if (sk->state == TCP_CLOSE)
+		goto out;
+
+	tp = &sk->tp_pinfo.af_tcp;
+	seq = ntohl(th->seq);
+	if (sk->state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) {
+		NET_INC_STATS(OutOfWindowIcmps);
+		goto out;
+	}
+
+	switch (type) {
+	case ICMP_SOURCE_QUENCH:
+		/* This is deprecated, but if someone generated it,
+		 * we have no reasons to ignore it.
+		 */
+		if (sk->lock.users == 0)
+			tcp_enter_cwr(tp);
+		goto out;
+	case ICMP_PARAMETERPROB:
+		err = EPROTO;
+		break; 
+	case ICMP_DEST_UNREACH:
+		if (code > NR_ICMP_UNREACH)
+			goto out;
+
+		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
+			if (sk->lock.users == 0)
+				do_pmtu_discovery(sk, iph, info);
+			goto out;
+		}
+
+		err = icmp_err_convert[code].errno;
+		break;
+	case ICMP_TIME_EXCEEDED:
+		err = EHOSTUNREACH;
+		break;
+	default:
+		goto out;
+	}
+
+	switch (sk->state) {
+		struct open_request *req, **prev;
+	case TCP_LISTEN:
+		if (sk->lock.users != 0)
+			goto out;
+
+		req = tcp_v4_search_req(tp, &prev,
+					th->dest,
+					iph->daddr, iph->saddr); 
+		if (!req)
+			goto out;
+
+		/* ICMPs are not backlogged, hence we cannot get
+		   an established socket here.
+		 */
+		BUG_TRAP(req->sk == NULL);
+
+		if (seq != req->snt_isn) {
+			NET_INC_STATS_BH(OutOfWindowIcmps);
+			goto out;
+		}
+
+		/* 
+		 * Still in SYN_RECV, just remove it silently.
+		 * There is no good way to pass the error to the newly
+		 * created socket, and POSIX does not want network
+		 * errors returned from accept(). 
+		 */ 
+		tcp_synq_drop(sk, req, prev);
+		goto out;
+
+	case TCP_SYN_SENT:
+	case TCP_SYN_RECV:  /* Cannot happen.
+			       It can f.e. if SYNs crossed.
+			     */ 
+		if (sk->lock.users == 0) {
+			TCP_INC_STATS_BH(TcpAttemptFails);
+			sk->err = err;
+
+			sk->error_report(sk);
+
+			tcp_done(sk);
+		} else {
+			sk->err_soft = err;
+		}
+		goto out;
+	}
+
+	/* If we've already connected we will keep trying
+	 * until we time out, or the user gives up.
+	 *
+	 * rfc1122 4.2.3.9 allows to consider as hard errors
+	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
+	 * but it is obsoleted by pmtu discovery).
+	 *
+	 * Note, that in modern internet, where routing is unreliable
+	 * and in each dark corner broken firewalls sit, sending random
+	 * errors ordered by their masters even this two messages finally lose
+	 * their original sense (even Linux sends invalid PORT_UNREACHs)
+	 *
+	 * Now we are in compliance with RFCs.
+	 *							--ANK (980905)
+	 */
+
+	if (sk->lock.users == 0 && sk->protinfo.af_inet.recverr) {
+		sk->err = err;
+		sk->error_report(sk);
+	} else	{ /* Only an error on timeout */
+		sk->err_soft = err;
+	}
+
+out:
+	bh_unlock_sock(sk);
+	sock_put(sk);
+#endif
+}
+
+/* This routine computes an IPv4 TCP checksum. */
+void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, 
+		       struct sk_buff *skb)
+{
+#if 0
+	if (skb->ip_summed == CHECKSUM_HW) {
+		th->check = ~tcp_v4_check(th, len, sk->saddr, sk->daddr, 0);
+		skb->csum = offsetof(struct tcphdr, check);
+	} else {
+		th->check = tcp_v4_check(th, len, sk->saddr, sk->daddr,
+					 csum_partial((char *)th, th->doff<<2, skb->csum));
+	}
+#endif
+}
+
+/*
+ *	This routine will send an RST to the other tcp.
+ *
+ *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
+ *		      for reset.
+ *	Answer: if a packet caused RST, it is not for a socket
+ *		existing in our system, if it is matched to a socket,
+ *		it is just duplicate segment or bug in other side's TCP.
+ *		So that we build reply only basing on parameters
+ *		arrived with segment.
+ *	Exception: precedence violation. We do not implement it in any case.
+ */
+
+static void tcp_v4_send_reset(struct sk_buff *skb)
+{
+#if 0
+	struct tcphdr *th = skb->h.th;
+	struct tcphdr rth;
+	struct ip_reply_arg arg;
+
+	/* Never send a reset in response to a reset. */
+	if (th->rst)
+		return;
+
+	if (((struct rtable*)skb->dst)->rt_type != RTN_LOCAL)
+		return;
+
+	/* Swap the send and the receive. */
+	memset(&rth, 0, sizeof(struct tcphdr)); 
+	rth.dest = th->source;
+	rth.source = th->dest; 
+	rth.doff = sizeof(struct tcphdr)/4;
+	rth.rst = 1;
+
+	if (th->ack) {
+		rth.seq = th->ack_seq;
+	} else {
+		rth.ack = 1;
+		rth.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
+				    + skb->len - (th->doff<<2));
+	}
+
+	memset(&arg, 0, sizeof arg); 
+	arg.iov[0].iov_base = (unsigned char *)&rth; 
+	arg.iov[0].iov_len  = sizeof rth;
+	arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, 
+				      skb->nh.iph->saddr, /*XXX*/
+				      sizeof(struct tcphdr),
+				      IPPROTO_TCP,
+				      0); 
+	arg.n_iov = 1;
+	arg.csumoffset = offsetof(struct tcphdr, check) / 2; 
+
+	tcp_socket->sk->protinfo.af_inet.ttl = sysctl_ip_default_ttl;
+	ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth);
+
+	TCP_INC_STATS_BH(TcpOutSegs);
+	TCP_INC_STATS_BH(TcpOutRsts);
+#endif
+}
+
+/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
+   outside socket context is ugly, certainly. What can I do?
+ */
+
+static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
+{
+#if 0
+	struct tcphdr *th = skb->h.th;
+	struct {
+		struct tcphdr th;
+		u32 tsopt[3];
+	} rep;
+	struct ip_reply_arg arg;
+
+	memset(&rep.th, 0, sizeof(struct tcphdr));
+	memset(&arg, 0, sizeof arg);
+
+	arg.iov[0].iov_base = (unsigned char *)&rep; 
+	arg.iov[0].iov_len  = sizeof(rep.th);
+	arg.n_iov = 1;
+	if (ts) {
+		rep.tsopt[0] = htonl((TCPOPT_NOP << 24) |
+				     (TCPOPT_NOP << 16) |
+				     (TCPOPT_TIMESTAMP << 8) |
+				     TCPOLEN_TIMESTAMP);
+		rep.tsopt[1] = htonl(tcp_time_stamp);
+		rep.tsopt[2] = htonl(ts);
+		arg.iov[0].iov_len = sizeof(rep);
+	}
+
+	/* Swap the send and the receive. */
+	rep.th.dest = th->source;
+	rep.th.source = th->dest; 
+	rep.th.doff = arg.iov[0].iov_len/4;
+	rep.th.seq = htonl(seq);
+	rep.th.ack_seq = htonl(ack);
+	rep.th.ack = 1;
+	rep.th.window = htons(win);
+
+	arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, 
+				      skb->nh.iph->saddr, /*XXX*/
+				      arg.iov[0].iov_len,
+				      IPPROTO_TCP,
+				      0);
+	arg.csumoffset = offsetof(struct tcphdr, check) / 2; 
+
+	ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
+
+	TCP_INC_STATS_BH(TcpOutSegs);
+#endif
+}
+
+static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
+{
+#if 0
+	struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
+
+	tcp_v4_send_ack(skb, tw->snd_nxt, tw->rcv_nxt,
+			tw->rcv_wnd>>tw->rcv_wscale, tw->ts_recent);
+
+	tcp_tw_put(tw);
+#endif
+}
+
+static void tcp_v4_or_send_ack(struct sk_buff *skb, struct open_request *req)
+{
+#if 0
+	tcp_v4_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd,
+			req->ts_recent);
+#endif
+}
+
+static struct dst_entry* tcp_v4_route_req(struct sock *sk, struct open_request *req)
+{
+#if 0
+	struct rtable *rt;
+	struct ip_options *opt;
+
+	opt = req->af.v4_req.opt;
+	if(ip_route_output(&rt, ((opt && opt->srr) ?
+				 opt->faddr :
+				 req->af.v4_req.rmt_addr),
+			   req->af.v4_req.loc_addr,
+			   RT_CONN_FLAGS(sk), sk->bound_dev_if)) {
+		IP_INC_STATS_BH(IpOutNoRoutes);
+		return NULL;
+	}
+	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {
+		ip_rt_put(rt);
+		IP_INC_STATS_BH(IpOutNoRoutes);
+		return NULL;
+	}
+	return &rt->u.dst;
+#else
+  return NULL;
+#endif
+}
+
+/*
+ *	Send a SYN-ACK after having received an ACK. 
+ *	This still operates on a open_request only, not on a big
+ *	socket.
+ */ 
+static int tcp_v4_send_synack(struct sock *sk, struct open_request *req,
+			      struct dst_entry *dst)
+{
+#if 0
+	int err = -1;
+	struct sk_buff * skb;
+
+	/* First, grab a route. */
+	if (dst == NULL &&
+	    (dst = tcp_v4_route_req(sk, req)) == NULL)
+		goto out;
+
+	skb = tcp_make_synack(sk, dst, req);
+
+	if (skb) {
+		struct tcphdr *th = skb->h.th;
+
+		th->check = tcp_v4_check(th, skb->len,
+					 req->af.v4_req.loc_addr, req->af.v4_req.rmt_addr,
+					 csum_partial((char *)th, skb->len, skb->csum));
+
+		err = ip_build_and_send_pkt(skb, sk, req->af.v4_req.loc_addr,
+					    req->af.v4_req.rmt_addr, req->af.v4_req.opt);
+		if (err == NET_XMIT_CN)
+			err = 0;
+	}
+
+out:
+	dst_release(dst);
+	return err;
+#else
+  return 0;
+#endif
+}
+
+/*
+ *	IPv4 open_request destructor.
+ */ 
+static void tcp_v4_or_free(struct open_request *req)
+{
+#if 0
+	if (req->af.v4_req.opt)
+		kfree(req->af.v4_req.opt);
+#endif
+}
+
+static inline void syn_flood_warning(struct sk_buff *skb)
+{
+#if 0
+	static unsigned long warntime;
+	
+	if (jiffies - warntime > HZ*60) {
+		warntime = jiffies;
+		printk(KERN_INFO 
+		       "possible SYN flooding on port %d. Sending cookies.\n",  
+		       ntohs(skb->h.th->dest));
+	}
+#endif
+}
+
+/* 
+ * Save and compile IPv4 options into the open_request if needed. 
+ */
+static inline struct ip_options * 
+tcp_v4_save_options(struct sock *sk, struct sk_buff *skb)
+{
+#if 0
+	struct ip_options *opt = &(IPCB(skb)->opt);
+	struct ip_options *dopt = NULL; 
+
+	if (opt && opt->optlen) {
+		int opt_size = optlength(opt); 
+		dopt = kmalloc(opt_size, GFP_ATOMIC);
+		if (dopt) {
+			if (ip_options_echo(dopt, skb)) {
+				kfree(dopt);
+				dopt = NULL;
+			}
+		}
+	}
+	return dopt;
+#else
+  return NULL;
+#endif
+}
+
+/* 
+ * Maximum number of SYN_RECV sockets in queue per LISTEN socket.
+ * One SYN_RECV socket costs about 80bytes on a 32bit machine.
+ * It would be better to replace it with a global counter for all sockets
+ * but then some measure against one socket starving all other sockets
+ * would be needed.
+ *
+ * It was 128 by default. Experiments with real servers show, that
+ * it is absolutely not enough even at 100conn/sec. 256 cures most
+ * of problems. This value is adjusted to 128 for very small machines
+ * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb).
+ * Further increasing requires to change hash table size.
+ */
+int sysctl_max_syn_backlog = 256; 
+
+#if 0
+struct or_calltable or_ipv4 = {
+	PF_INET,
+	tcp_v4_send_synack,
+	tcp_v4_or_send_ack,
+	tcp_v4_or_free,
+	tcp_v4_send_reset
+};
+#endif
+
+int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
+{
+#if 0
+	struct tcp_opt tp;
+	struct open_request *req;
+	__u32 saddr = skb->nh.iph->saddr;
+	__u32 daddr = skb->nh.iph->daddr;
+	__u32 isn = TCP_SKB_CB(skb)->when;
+	struct dst_entry *dst = NULL;
+#ifdef CONFIG_SYN_COOKIES
+	int want_cookie = 0;
+#else
+#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
+#endif
+
+	/* Never answer to SYNs send to broadcast or multicast */
+	if (((struct rtable *)skb->dst)->rt_flags & 
+	    (RTCF_BROADCAST|RTCF_MULTICAST))
+		goto drop; 
+
+	/* TW buckets are converted to open requests without
+	 * limitations, they conserve resources and peer is
+	 * evidently real one.
+	 */
+	if (tcp_synq_is_full(sk) && !isn) {
+#ifdef CONFIG_SYN_COOKIES
+		if (sysctl_tcp_syncookies) {
+			want_cookie = 1; 
+		} else
+#endif
+		goto drop;
+	}
+
+	/* Accept backlog is full. If we have already queued enough
+	 * of warm entries in syn queue, drop request. It is better than
+	 * clogging syn queue with openreqs with exponentially increasing
+	 * timeout.
+	 */
+	if (tcp_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
+		goto drop;
+
+	req = tcp_openreq_alloc();
+	if (req == NULL)
+		goto drop;
+
+	tcp_clear_options(&tp);
+	tp.mss_clamp = 536;
+	tp.user_mss = sk->tp_pinfo.af_tcp.user_mss;
+
+	tcp_parse_options(skb, &tp, 0);
+
+	if (want_cookie) {
+		tcp_clear_options(&tp);
+		tp.saw_tstamp = 0;
+	}
+
+	if (tp.saw_tstamp && tp.rcv_tsval == 0) {
+		/* Some OSes (unknown ones, but I see them on web server, which
+		 * contains information interesting only for windows'
+		 * users) do not send their stamp in SYN. It is easy case.
+		 * We simply do not advertise TS support.
+		 */
+		tp.saw_tstamp = 0;
+		tp.tstamp_ok = 0;
+	}
+	tp.tstamp_ok = tp.saw_tstamp;
+
+	tcp_openreq_init(req, &tp, skb);
+
+	req->af.v4_req.loc_addr = daddr;
+	req->af.v4_req.rmt_addr = saddr;
+	req->af.v4_req.opt = tcp_v4_save_options(sk, skb);
+	req->class = &or_ipv4;
+	if (!want_cookie)
+		TCP_ECN_create_request(req, skb->h.th);
+
+	if (want_cookie) {
+#ifdef CONFIG_SYN_COOKIES
+		syn_flood_warning(skb);
+#endif
+		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
+	} else if (isn == 0) {
+		struct inet_peer *peer = NULL;
+
+		/* VJ's idea. We save last timestamp seen
+		 * from the destination in peer table, when entering
+		 * state TIME-WAIT, and check against it before
+		 * accepting new connection request.
+		 *
+		 * If "isn" is not zero, this request hit alive
+		 * timewait bucket, so that all the necessary checks
+		 * are made in the function processing timewait state.
+		 */
+		if (tp.saw_tstamp &&
+		    sysctl_tcp_tw_recycle &&
+		    (dst = tcp_v4_route_req(sk, req)) != NULL &&
+		    (peer = rt_get_peer((struct rtable*)dst)) != NULL &&
+		    peer->v4daddr == saddr) {
+			if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
+			    (s32)(peer->tcp_ts - req->ts_recent) > TCP_PAWS_WINDOW) {
+				NET_INC_STATS_BH(PAWSPassiveRejected);
+				dst_release(dst);
+				goto drop_and_free;
+			}
+		}
+		/* Kill the following clause, if you dislike this way. */
+		else if (!sysctl_tcp_syncookies &&
+			 (sysctl_max_syn_backlog - tcp_synq_len(sk)
+			  < (sysctl_max_syn_backlog>>2)) &&
+			 (!peer || !peer->tcp_ts_stamp) &&
+			 (!dst || !dst->rtt)) {
+			/* Without syncookies last quarter of
+			 * backlog is filled with destinations, proven to be alive.
+			 * It means that we continue to communicate
+			 * to destinations, already remembered
+			 * to the moment of synflood.
+			 */
+			NETDEBUG(if (net_ratelimit()) \
+				printk(KERN_DEBUG "TCP: drop open request from %u.%u.%u.%u/%u\n", \
+					NIPQUAD(saddr), ntohs(skb->h.th->source)));
+			dst_release(dst);
+			goto drop_and_free;
+		}
+
+		isn = tcp_v4_init_sequence(sk, skb);
+	}
+	req->snt_isn = isn;
+
+	if (tcp_v4_send_synack(sk, req, dst))
+		goto drop_and_free;
+
+	if (want_cookie) {
+	   	tcp_openreq_free(req); 
+	} else {
+		tcp_v4_synq_add(sk, req);
+	}
+	return 0;
+
+drop_and_free:
+	tcp_openreq_free(req); 
+drop:
+	TCP_INC_STATS_BH(TcpAttemptFails);
+	return 0;
+#else
+  return 0;
+#endif
+}
+
+
+/* 
+ * The three way handshake has completed - we got a valid synack - 
+ * now create the new socket. 
+ */
+struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
+				   struct open_request *req,
+				   struct dst_entry *dst)
+{
+#if 0
+	struct tcp_opt *newtp;
+	struct sock *newsk;
+
+	if (tcp_acceptq_is_full(sk))
+		goto exit_overflow;
+
+	if (dst == NULL &&
+	    (dst = tcp_v4_route_req(sk, req)) == NULL)
+		goto exit;
+
+	newsk = tcp_create_openreq_child(sk, req, skb);
+	if (!newsk)
+		goto exit;
+
+	newsk->dst_cache = dst;
+	newsk->route_caps = dst->dev->features;
+
+	newtp = &(newsk->tp_pinfo.af_tcp);
+	newsk->daddr = req->af.v4_req.rmt_addr;
+	newsk->saddr = req->af.v4_req.loc_addr;
+	newsk->rcv_saddr = req->af.v4_req.loc_addr;
+	newsk->protinfo.af_inet.opt = req->af.v4_req.opt;
+	req->af.v4_req.opt = NULL;
+	newsk->protinfo.af_inet.mc_index = tcp_v4_iif(skb);
+	newsk->protinfo.af_inet.mc_ttl = skb->nh.iph->ttl;
+	newtp->ext_header_len = 0;
+	if (newsk->protinfo.af_inet.opt)
+		newtp->ext_header_len = newsk->protinfo.af_inet.opt->optlen;
+	newsk->protinfo.af_inet.id = newtp->write_seq^jiffies;
+
+	tcp_sync_mss(newsk, dst->pmtu);
+	newtp->advmss = dst->advmss;
+	tcp_initialize_rcv_mss(newsk);
+
+	__tcp_v4_hash(newsk, 0);
+	__tcp_inherit_port(sk, newsk);
+
+	return newsk;
+
+exit_overflow:
+	NET_INC_STATS_BH(ListenOverflows);
+exit:
+	NET_INC_STATS_BH(ListenDrops);
+	dst_release(dst);
+	return NULL;
+#else
+  return NULL;
+#endif
+}
+
+static struct sock *tcp_v4_hnd_req(struct sock *sk,struct sk_buff *skb)
+{
+#if 0
+	struct open_request *req, **prev;
+	struct tcphdr *th = skb->h.th;
+	struct iphdr *iph = skb->nh.iph;
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct sock *nsk;
+
+	/* Find possible connection requests. */
+	req = tcp_v4_search_req(tp, &prev,
+				th->source,
+				iph->saddr, iph->daddr);
+	if (req)
+		return tcp_check_req(sk, skb, req, prev);
+
+	nsk = __tcp_v4_lookup_established(skb->nh.iph->saddr,
+					  th->source,
+					  skb->nh.iph->daddr,
+					  ntohs(th->dest),
+					  tcp_v4_iif(skb));
+
+	if (nsk) {
+		if (nsk->state != TCP_TIME_WAIT) {
+			bh_lock_sock(nsk);
+			return nsk;
+		}
+		tcp_tw_put((struct tcp_tw_bucket*)nsk);
+		return NULL;
+	}
+
+#ifdef CONFIG_SYN_COOKIES
+	if (!th->rst && !th->syn && th->ack)
+		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
+#endif
+	return sk;
+#else
+  return NULL;
+#endif
+}
+
+static int tcp_v4_checksum_init(struct sk_buff *skb)
+{
+#if 0
+	if (skb->ip_summed == CHECKSUM_HW) {
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		if (!tcp_v4_check(skb->h.th,skb->len,skb->nh.iph->saddr,
+				  skb->nh.iph->daddr,skb->csum))
+			return 0;
+
+		NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "hw tcp v4 csum failed\n"));
+		skb->ip_summed = CHECKSUM_NONE;
+	}
+	if (skb->len <= 76) {
+		if (tcp_v4_check(skb->h.th,skb->len,skb->nh.iph->saddr,
+				 skb->nh.iph->daddr,
+				 skb_checksum(skb, 0, skb->len, 0)))
+			return -1;
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	} else {
+		skb->csum = ~tcp_v4_check(skb->h.th,skb->len,skb->nh.iph->saddr,
+					  skb->nh.iph->daddr,0);
+	}
+	return 0;
+#else
+  return 0;
+#endif
+}
+
+
+/* The socket must have it's spinlock held when we get
+ * here.
+ *
+ * We have a potential double-lock case here, so even when
+ * doing backlog processing we use the BH locking scheme.
+ * This is because we cannot sleep with the original spinlock
+ * held.
+ */
+int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
+{
+#if 0
+#ifdef CONFIG_FILTER
+	struct sk_filter *filter = sk->filter;
+	if (filter && sk_filter(skb, filter))
+		goto discard;
+#endif /* CONFIG_FILTER */
+
+  	IP_INC_STATS_BH(IpInDelivers);
+
+	if (sk->state == TCP_ESTABLISHED) { /* Fast path */
+		TCP_CHECK_TIMER(sk);
+		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
+			goto reset;
+		TCP_CHECK_TIMER(sk);
+		return 0; 
+	}
+
+	if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
+		goto csum_err;
+
+	if (sk->state == TCP_LISTEN) { 
+		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
+		if (!nsk)
+			goto discard;
+
+		if (nsk != sk) {
+			if (tcp_child_process(sk, nsk, skb))
+				goto reset;
+			return 0;
+		}
+	}
+
+	TCP_CHECK_TIMER(sk);
+	if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
+		goto reset;
+	TCP_CHECK_TIMER(sk);
+	return 0;
+
+reset:
+	tcp_v4_send_reset(skb);
+discard:
+	kfree_skb(skb);
+	/* Be careful here. If this function gets more complicated and
+	 * gcc suffers from register pressure on the x86, sk (in %ebx) 
+	 * might be destroyed here. This current version compiles correctly,
+	 * but you have been warned.
+	 */
+	return 0;
+
+csum_err:
+	TCP_INC_STATS_BH(TcpInErrs);
+	goto discard;
+#else
+  return 0;
+#endif
+}
+
+/*
+ *	From tcp_input.c
+ */
+
+int tcp_v4_rcv(struct sk_buff *skb)
+{
+#if 0
+	struct tcphdr *th;
+	struct sock *sk;
+	int ret;
+
+	if (skb->pkt_type!=PACKET_HOST)
+		goto discard_it;
+
+	/* Count it even if it's bad */
+	TCP_INC_STATS_BH(TcpInSegs);
+
+	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
+		goto discard_it;
+
+	th = skb->h.th;
+
+	if (th->doff < sizeof(struct tcphdr)/4)
+		goto bad_packet;
+	if (!pskb_may_pull(skb, th->doff*4))
+		goto discard_it;
+
+	/* An explanation is required here, I think.
+	 * Packet length and doff are validated by header prediction,
+	 * provided case of th->doff==0 is elimineted.
+	 * So, we defer the checks. */
+	if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
+	     tcp_v4_checksum_init(skb) < 0))
+		goto bad_packet;
+
+	th = skb->h.th;
+	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
+	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
+				    skb->len - th->doff*4);
+	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
+	TCP_SKB_CB(skb)->when = 0;
+	TCP_SKB_CB(skb)->flags = skb->nh.iph->tos;
+	TCP_SKB_CB(skb)->sacked = 0;
+
+	sk = __tcp_v4_lookup(skb->nh.iph->saddr, th->source,
+			     skb->nh.iph->daddr, ntohs(th->dest), tcp_v4_iif(skb));
+
+	if (!sk)
+		goto no_tcp_socket;
+
+process:
+	if(!ipsec_sk_policy(sk,skb))
+		goto discard_and_relse;
+
+	if (sk->state == TCP_TIME_WAIT)
+		goto do_time_wait;
+
+	skb->dev = NULL;
+
+	bh_lock_sock(sk);
+	ret = 0;
+	if (!sk->lock.users) {
+		if (!tcp_prequeue(sk, skb))
+			ret = tcp_v4_do_rcv(sk, skb);
+	} else
+		sk_add_backlog(sk, skb);
+	bh_unlock_sock(sk);
+
+	sock_put(sk);
+
+	return ret;
+
+no_tcp_socket:
+	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
+bad_packet:
+		TCP_INC_STATS_BH(TcpInErrs);
+	} else {
+		tcp_v4_send_reset(skb);
+	}
+
+discard_it:
+	/* Discard frame. */
+	kfree_skb(skb);
+  	return 0;
+
+discard_and_relse:
+	sock_put(sk);
+	goto discard_it;
+
+do_time_wait:
+	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
+		TCP_INC_STATS_BH(TcpInErrs);
+		goto discard_and_relse;
+	}
+	switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
+					  skb, th, skb->len)) {
+	case TCP_TW_SYN:
+	{
+		struct sock *sk2;
+
+		sk2 = tcp_v4_lookup_listener(skb->nh.iph->daddr, ntohs(th->dest), tcp_v4_iif(skb));
+		if (sk2 != NULL) {
+			tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
+			tcp_timewait_kill((struct tcp_tw_bucket *)sk);
+			tcp_tw_put((struct tcp_tw_bucket *)sk);
+			sk = sk2;
+			goto process;
+		}
+		/* Fall through to ACK */
+	}
+	case TCP_TW_ACK:
+		tcp_v4_timewait_ack(sk, skb);
+		break;
+	case TCP_TW_RST:
+		goto no_tcp_socket;
+	case TCP_TW_SUCCESS:;
+	}
+	goto discard_it;
+#endif
+}
+
+/* With per-bucket locks this operation is not-atomic, so that
+ * this version is not worse.
+ */
+static void __tcp_v4_rehash(struct sock *sk)
+{
+#if 0
+	sk->prot->unhash(sk);
+	sk->prot->hash(sk);
+#endif
+}
+
+static int tcp_v4_reselect_saddr(struct sock *sk)
+{
+#if 0
+	int err;
+	struct rtable *rt;
+	__u32 old_saddr = sk->saddr;
+	__u32 new_saddr;
+	__u32 daddr = sk->daddr;
+
+	if(sk->protinfo.af_inet.opt && sk->protinfo.af_inet.opt->srr)
+		daddr = sk->protinfo.af_inet.opt->faddr;
+
+	/* Query new route. */
+	err = ip_route_connect(&rt, daddr, 0,
+			       RT_TOS(sk->protinfo.af_inet.tos)|sk->localroute,
+			       sk->bound_dev_if);
+	if (err)
+		return err;
+
+	__sk_dst_set(sk, &rt->u.dst);
+	sk->route_caps = rt->u.dst.dev->features;
+
+	new_saddr = rt->rt_src;
+
+	if (new_saddr == old_saddr)
+		return 0;
+
+	if (sysctl_ip_dynaddr > 1) {
+		printk(KERN_INFO "tcp_v4_rebuild_header(): shifting sk->saddr "
+		       "from %d.%d.%d.%d to %d.%d.%d.%d\n",
+		       NIPQUAD(old_saddr), 
+		       NIPQUAD(new_saddr));
+	}
+
+	sk->saddr = new_saddr;
+	sk->rcv_saddr = new_saddr;
+
+	/* XXX The only one ugly spot where we need to
+	 * XXX really change the sockets identity after
+	 * XXX it has entered the hashes. -DaveM
+	 *
+	 * Besides that, it does not check for connection
+	 * uniqueness. Wait for troubles.
+	 */
+	__tcp_v4_rehash(sk);
+	return 0;
+#else
+  return 0;
+#endif
+}
+
+int tcp_v4_rebuild_header(struct sock *sk)
+{
+#if 0
+	struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
+	u32 daddr;
+	int err;
+
+	/* Route is OK, nothing to do. */
+	if (rt != NULL)
+		return 0;
+
+	/* Reroute. */
+	daddr = sk->daddr;
+	if(sk->protinfo.af_inet.opt && sk->protinfo.af_inet.opt->srr)
+		daddr = sk->protinfo.af_inet.opt->faddr;
+
+	err = ip_route_output(&rt, daddr, sk->saddr,
+			      RT_CONN_FLAGS(sk), sk->bound_dev_if);
+	if (!err) {
+		__sk_dst_set(sk, &rt->u.dst);
+		sk->route_caps = rt->u.dst.dev->features;
+		return 0;
+	}
+
+	/* Routing failed... */
+	sk->route_caps = 0;
+
+	if (!sysctl_ip_dynaddr ||
+	    sk->state != TCP_SYN_SENT ||
+	    (sk->userlocks & SOCK_BINDADDR_LOCK) ||
+	    (err = tcp_v4_reselect_saddr(sk)) != 0)
+		sk->err_soft=-err;
+
+	return err;
+#else
+  return 0;
+#endif
+}
+
+static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
+{
+#if 0
+	struct sockaddr_in *sin = (struct sockaddr_in *) uaddr;
+
+	sin->sin_family		= AF_INET;
+	sin->sin_addr.s_addr	= sk->daddr;
+	sin->sin_port		= sk->dport;
+#endif
+}
+
+/* VJ's idea. Save last timestamp seen from this destination
+ * and hold it at least for normal timewait interval to use for duplicate
+ * segment detection in subsequent connections, before they enter synchronized
+ * state.
+ */
+
+int tcp_v4_remember_stamp(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	struct rtable *rt = (struct rtable*)__sk_dst_get(sk);
+	struct inet_peer *peer = NULL;
+	int release_it = 0;
+
+	if (rt == NULL || rt->rt_dst != sk->daddr) {
+		peer = inet_getpeer(sk->daddr, 1);
+		release_it = 1;
+	} else {
+		if (rt->peer == NULL)
+			rt_bind_peer(rt, 1);
+		peer = rt->peer;
+	}
+
+	if (peer) {
+		if ((s32)(peer->tcp_ts - tp->ts_recent) <= 0 ||
+		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
+		     peer->tcp_ts_stamp <= tp->ts_recent_stamp)) {
+			peer->tcp_ts_stamp = tp->ts_recent_stamp;
+			peer->tcp_ts = tp->ts_recent;
+		}
+		if (release_it)
+			inet_putpeer(peer);
+		return 1;
+	}
+
+	return 0;
+#else
+  return 0;
+#endif
+}
+
+int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw)
+{
+#if 0
+	struct inet_peer *peer = NULL;
+
+	peer = inet_getpeer(tw->daddr, 1);
+
+	if (peer) {
+		if ((s32)(peer->tcp_ts - tw->ts_recent) <= 0 ||
+		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
+		     peer->tcp_ts_stamp <= tw->ts_recent_stamp)) {
+			peer->tcp_ts_stamp = tw->ts_recent_stamp;
+			peer->tcp_ts = tw->ts_recent;
+		}
+		inet_putpeer(peer);
+		return 1;
+	}
+
+	return 0;
+#else
+  return 0;
+#endif
+}
+
+#if 0
+struct tcp_func ipv4_specific = {
+	ip_queue_xmit,
+	tcp_v4_send_check,
+	tcp_v4_rebuild_header,
+	tcp_v4_conn_request,
+	tcp_v4_syn_recv_sock,
+	tcp_v4_remember_stamp,
+	sizeof(struct iphdr),
+
+	ip_setsockopt,
+	ip_getsockopt,
+	v4_addr2sockaddr,
+	sizeof(struct sockaddr_in)
+};
+#endif
+
+/* NOTE: A lot of things set to zero explicitly by call to
+ *       sk_alloc() so need not be done here.
+ */
+static int tcp_v4_init_sock(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	skb_queue_head_init(&tp->out_of_order_queue);
+	tcp_init_xmit_timers(sk);
+	tcp_prequeue_init(tp);
+
+	tp->rto  = TCP_TIMEOUT_INIT;
+	tp->mdev = TCP_TIMEOUT_INIT;
+      
+	/* So many TCP implementations out there (incorrectly) count the
+	 * initial SYN frame in their delayed-ACK and congestion control
+	 * algorithms that we must have the following bandaid to talk
+	 * efficiently to them.  -DaveM
+	 */
+	tp->snd_cwnd = 2;
+
+	/* See draft-stevens-tcpca-spec-01 for discussion of the
+	 * initialization of these values.
+	 */
+	tp->snd_ssthresh = 0x7fffffff;	/* Infinity */
+	tp->snd_cwnd_clamp = ~0;
+	tp->mss_cache = 536;
+
+	tp->reordering = sysctl_tcp_reordering;
+
+	sk->state = TCP_CLOSE;
+
+	sk->write_space = tcp_write_space;
+	sk->use_write_queue = 1;
+
+	sk->tp_pinfo.af_tcp.af_specific = &ipv4_specific;
+
+	sk->sndbuf = sysctl_tcp_wmem[1];
+	sk->rcvbuf = sysctl_tcp_rmem[1];
+
+	atomic_inc(&tcp_sockets_allocated);
+
+	return 0;
+#else
+  return 0;
+#endif
+}
+
+static int tcp_v4_destroy_sock(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	tcp_clear_xmit_timers(sk);
+
+	/* Cleanup up the write buffer. */
+  	tcp_writequeue_purge(sk);
+
+	/* Cleans up our, hopefully empty, out_of_order_queue. */
+  	__skb_queue_purge(&tp->out_of_order_queue);
+
+	/* Clean prequeue, it must be empty really */
+	__skb_queue_purge(&tp->ucopy.prequeue);
+
+	/* Clean up a referenced TCP bind bucket. */
+	if(sk->prev != NULL)
+		tcp_put_port(sk);
+
+	/* If sendmsg cached page exists, toss it. */
+	if (tp->sndmsg_page != NULL)
+		__free_page(tp->sndmsg_page);
+
+	atomic_dec(&tcp_sockets_allocated);
+
+	return 0;
+#else
+  return 0;
+#endif
+}
+
+/* Proc filesystem TCP sock list dumping. */
+static void get_openreq(struct sock *sk, struct open_request *req, char *tmpbuf, int i, int uid)
+{
+#if 0
+	int ttd = req->expires - jiffies;
+
+	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+		" %02X %08X:%08X %02X:%08X %08X %5d %8d %u %d %p",
+		i,
+		req->af.v4_req.loc_addr,
+		ntohs(sk->sport),
+		req->af.v4_req.rmt_addr,
+		ntohs(req->rmt_port),
+		TCP_SYN_RECV,
+		0,0, /* could print option size, but that is af dependent. */
+		1,   /* timers active (only the expire timer) */  
+		ttd, 
+		req->retrans,
+		uid,
+		0,  /* non standard timer */  
+		0, /* open_requests have no inode */
+		atomic_read(&sk->refcnt),
+		req
+		); 
+#endif
+}
+
+static void get_tcp_sock(struct sock *sp, char *tmpbuf, int i)
+{
+#if 0
+	unsigned int dest, src;
+	__u16 destp, srcp;
+	int timer_active;
+	unsigned long timer_expires;
+	struct tcp_opt *tp = &sp->tp_pinfo.af_tcp;
+
+	dest  = sp->daddr;
+	src   = sp->rcv_saddr;
+	destp = ntohs(sp->dport);
+	srcp  = ntohs(sp->sport);
+	if (tp->pending == TCP_TIME_RETRANS) {
+		timer_active	= 1;
+		timer_expires	= tp->timeout;
+	} else if (tp->pending == TCP_TIME_PROBE0) {
+		timer_active	= 4;
+		timer_expires	= tp->timeout;
+	} else if (timer_pending(&sp->timer)) {
+		timer_active	= 2;
+		timer_expires	= sp->timer.expires;
+	} else {
+		timer_active	= 0;
+		timer_expires = jiffies;
+	}
+
+	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d",
+		i, src, srcp, dest, destp, sp->state, 
+		tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
+		timer_active, timer_expires-jiffies,
+		tp->retransmits,
+		sock_i_uid(sp),
+		tp->probes_out,
+		sock_i_ino(sp),
+		atomic_read(&sp->refcnt), sp,
+		tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
+		tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
+		);
+#endif
+}
+
+static void get_timewait_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i)
+{
+#if 0
+	unsigned int dest, src;
+	__u16 destp, srcp;
+	int ttd = tw->ttd - jiffies;
+
+	if (ttd < 0)
+		ttd = 0;
+
+	dest  = tw->daddr;
+	src   = tw->rcv_saddr;
+	destp = ntohs(tw->dport);
+	srcp  = ntohs(tw->sport);
+
+	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+		" %02X %08X:%08X %02X:%08X %08X %5d %8d %d %d %p",
+		i, src, srcp, dest, destp, tw->substate, 0, 0,
+		3, ttd, 0, 0, 0, 0,
+		atomic_read(&tw->refcnt), tw);
+#endif
+}
+
+#define TMPSZ 150
+
+int tcp_get_info(char *buffer, char **start, off_t offset, int length)
+{
+#if 0
+	int len = 0, num = 0, i;
+	off_t begin, pos = 0;
+	char tmpbuf[TMPSZ+1];
+
+	if (offset < TMPSZ)
+		len += sprintf(buffer, "%-*s\n", TMPSZ-1,
+			       "  sl  local_address rem_address   st tx_queue "
+			       "rx_queue tr tm->when retrnsmt   uid  timeout inode");
+
+	pos = TMPSZ;
+
+	/* First, walk listening socket table. */
+	tcp_listen_lock();
+	for(i = 0; i < TCP_LHTABLE_SIZE; i++) {
+		struct sock *sk;
+		struct tcp_listen_opt *lopt;
+		int k;
+
+		for (sk = tcp_listening_hash[i]; sk; sk = sk->next, num++) {
+			struct open_request *req;
+			int uid;
+			struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+			if (!TCP_INET_FAMILY(sk->family))
+				goto skip_listen;
+
+			pos += TMPSZ;
+			if (pos >= offset) {
+				get_tcp_sock(sk, tmpbuf, num);
+				len += sprintf(buffer+len, "%-*s\n", TMPSZ-1, tmpbuf);
+				if (pos >= offset + length) {
+					tcp_listen_unlock();
+					goto out_no_bh;
+				}
+			}
+
+skip_listen:
+			uid = sock_i_uid(sk);
+			read_lock_bh(&tp->syn_wait_lock);
+			lopt = tp->listen_opt;
+			if (lopt && lopt->qlen != 0) {
+				for (k=0; k<TCP_SYNQ_HSIZE; k++) {
+					for (req = lopt->syn_table[k]; req; req = req->dl_next, num++) {
+						if (!TCP_INET_FAMILY(req->class->family))
+							continue;
+
+						pos += TMPSZ;
+						if (pos <= offset)
+							continue;
+						get_openreq(sk, req, tmpbuf, num, uid);
+						len += sprintf(buffer+len, "%-*s\n", TMPSZ-1, tmpbuf);
+						if (pos >= offset + length) {
+							read_unlock_bh(&tp->syn_wait_lock);
+							tcp_listen_unlock();
+							goto out_no_bh;
+						}
+					}
+				}
+			}
+			read_unlock_bh(&tp->syn_wait_lock);
+
+			/* Completed requests are in normal socket hash table */
+		}
+	}
+	tcp_listen_unlock();
+
+	local_bh_disable();
+
+	/* Next, walk established hash chain. */
+	for (i = 0; i < tcp_ehash_size; i++) {
+		struct tcp_ehash_bucket *head = &tcp_ehash[i];
+		struct sock *sk;
+		struct tcp_tw_bucket *tw;
+
+		read_lock(&head->lock);
+		for(sk = head->chain; sk; sk = sk->next, num++) {
+			if (!TCP_INET_FAMILY(sk->family))
+				continue;
+			pos += TMPSZ;
+			if (pos <= offset)
+				continue;
+			get_tcp_sock(sk, tmpbuf, num);
+			len += sprintf(buffer+len, "%-*s\n", TMPSZ-1, tmpbuf);
+			if (pos >= offset + length) {
+				read_unlock(&head->lock);
+				goto out;
+			}
+		}
+		for (tw = (struct tcp_tw_bucket *)tcp_ehash[i+tcp_ehash_size].chain;
+		     tw != NULL;
+		     tw = (struct tcp_tw_bucket *)tw->next, num++) {
+			if (!TCP_INET_FAMILY(tw->family))
+				continue;
+			pos += TMPSZ;
+			if (pos <= offset)
+				continue;
+			get_timewait_sock(tw, tmpbuf, num);
+			len += sprintf(buffer+len, "%-*s\n", TMPSZ-1, tmpbuf);
+			if (pos >= offset + length) {
+				read_unlock(&head->lock);
+				goto out;
+			}
+		}
+		read_unlock(&head->lock);
+	}
+
+out:
+	local_bh_enable();
+out_no_bh:
+
+	begin = len - (pos - offset);
+	*start = buffer + begin;
+	len -= begin;
+	if (len > length)
+		len = length;
+	if (len < 0)
+		len = 0; 
+	return len;
+#endif
+}
+
+struct proto tcp_prot = {
+	name:		"TCP",
+	close:		tcp_close,
+	connect:	tcp_v4_connect,
+	disconnect:	tcp_disconnect,
+	accept:		tcp_accept,
+	ioctl:		tcp_ioctl,
+	init:		tcp_v4_init_sock,
+	destroy:	tcp_v4_destroy_sock,
+	shutdown:	tcp_shutdown,
+	setsockopt:	tcp_setsockopt,
+	getsockopt:	tcp_getsockopt,
+	sendmsg:	tcp_sendmsg,
+	recvmsg:	tcp_recvmsg,
+	backlog_rcv:	tcp_v4_do_rcv,
+	hash:		tcp_v4_hash,
+	unhash:		tcp_unhash,
+	get_port:	tcp_v4_get_port,
+};
+
+
+
+void tcp_v4_init(struct net_proto_family *ops)
+{
+#if 0
+	int err;
+
+	tcp_inode.i_mode = S_IFSOCK;
+	tcp_inode.i_sock = 1;
+	tcp_inode.i_uid = 0;
+	tcp_inode.i_gid = 0;
+	init_waitqueue_head(&tcp_inode.i_wait);
+	init_waitqueue_head(&tcp_inode.u.socket_i.wait);
+
+	tcp_socket->inode = &tcp_inode;
+	tcp_socket->state = SS_UNCONNECTED;
+	tcp_socket->type=SOCK_RAW;
+
+	if ((err=ops->create(tcp_socket, IPPROTO_TCP))<0)
+		panic("Failed to create the TCP control socket.\n");
+	tcp_socket->sk->allocation=GFP_ATOMIC;
+	tcp_socket->sk->protinfo.af_inet.ttl = MAXTTL;
+
+	/* Unhash it so that IP input processing does not even
+	 * see it, we do not wish this socket to see incoming
+	 * packets.
+	 */
+	tcp_socket->sk->prot->unhash(tcp_socket->sk);
+#endif
+}
diff --git a/reactos/drivers/net/tcpip/transport/tcp/tcp_output.c b/reactos/drivers/net/tcpip/transport/tcp/tcp_output.c
new file mode 100755
index 00000000000..c20b019e1cd
--- /dev/null
+++ b/reactos/drivers/net/tcpip/transport/tcp/tcp_output.c
@@ -0,0 +1,1549 @@
+/*
+ * COPYRIGHT:   See COPYING in the top level directory
+ * PROJECT:     ReactOS TCP/IP protocol driver
+ * FILE:        transport/tcp/tcp_output.c
+ * PURPOSE:     Transmission Control Protocol
+ * PROGRAMMERS: Casper S. Hornstrup (chorns@users.sourceforge.net)
+ * REVISIONS:
+ *   CSH 15-01-2003 Imported from linux kernel 2.4.20
+ */
+
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Implementation of the Transmission Control Protocol(TCP).
+ *
+ * Version:	$Id: tcp_output.c,v 1.1 2003/01/15 21:57:31 chorns Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *		Corey Minyard <wf-rch!minyard@relay.EU.net>
+ *		Florian La Roche, <flla@stud.uni-sb.de>
+ *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
+ *		Linus Torvalds, <torvalds@cs.helsinki.fi>
+ *		Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *		Matthew Dillon, <dillon@apollo.west.oic.com>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		Jorge Cwik, <jorge@laser.satlink.net>
+ */
+
+/*
+ * Changes:	Pedro Roque	:	Retransmit queue handled by TCP.
+ *				:	Fragmentation on mtu decrease
+ *				:	Segment collapse on retransmit
+ *				:	AF independence
+ *
+ *		Linus Torvalds	:	send_delayed_ack
+ *		David S. Miller	:	Charge memory using the right skb
+ *					during syn/ack processing.
+ *		David S. Miller :	Output engine completely rewritten.
+ *		Andrea Arcangeli:	SYNACK carry ts_recent in tsecr.
+ *		Cacophonix Gaul :	draft-minshall-nagle-01
+ *		J Hadi Salim	:	ECN support
+ *
+ */
+
+#if 0
+#include <net/tcp.h>
+
+#include <linux/compiler.h>
+#include <linux/smp_lock.h>
+#else
+#include "linux.h"
+#include "tcpcore.h"
+#endif
+
+/* People can turn this off for buggy TCP's found in printers etc. */
+int sysctl_tcp_retrans_collapse = 1;
+
+static __inline__
+void update_send_head(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb)
+{
+	tp->send_head = skb->next;
+	if (tp->send_head == (struct sk_buff *) &sk->write_queue)
+		tp->send_head = NULL;
+	tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
+	if (tp->packets_out++ == 0)
+		tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+}
+
+/* SND.NXT, if window was not shrunk.
+ * If window has been shrunk, what should we make? It is not clear at all.
+ * Using SND.UNA we will fail to open window, SND.NXT is out of window. :-(
+ * Anything in between SND.UNA...SND.UNA+SND.WND also can be already
+ * invalid. OK, let's make this for now:
+ */
+static __inline__ __u32 tcp_acceptable_seq(struct sock *sk, struct tcp_opt *tp)
+{
+	if (!before(tp->snd_una+tp->snd_wnd, tp->snd_nxt))
+		return tp->snd_nxt;
+	else
+		return tp->snd_una+tp->snd_wnd;
+}
+
+/* Calculate mss to advertise in SYN segment.
+ * RFC1122, RFC1063, draft-ietf-tcpimpl-pmtud-01 state that:
+ *
+ * 1. It is independent of path mtu.
+ * 2. Ideally, it is maximal possible segment size i.e. 65535-40.
+ * 3. For IPv4 it is reasonable to calculate it from maximal MTU of
+ *    attached devices, because some buggy hosts are confused by
+ *    large MSS.
+ * 4. We do not make 3, we advertise MSS, calculated from first
+ *    hop device mtu, but allow to raise it to ip_rt_min_advmss.
+ *    This may be overriden via information stored in routing table.
+ * 5. Value 65535 for MSS is valid in IPv6 and means "as large as possible,
+ *    probably even Jumbo".
+ */
+static __u16 tcp_advertise_mss(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct dst_entry *dst = __sk_dst_get(sk);
+	int mss = tp->advmss;
+
+	if (dst && dst->advmss < mss) {
+		mss = dst->advmss;
+		tp->advmss = mss;
+	}
+
+	return (__u16)mss;
+#else
+  return 0;
+#endif
+}
+
+/* RFC2861. Reset CWND after idle period longer RTO to "restart window".
+ * This is the first part of cwnd validation mechanism. */
+static void tcp_cwnd_restart(struct tcp_opt *tp)
+{
+#if 0
+	s32 delta = tcp_time_stamp - tp->lsndtime;
+	u32 restart_cwnd = tcp_init_cwnd(tp);
+	u32 cwnd = tp->snd_cwnd;
+
+	tp->snd_ssthresh = tcp_current_ssthresh(tp);
+	restart_cwnd = min(restart_cwnd, cwnd);
+
+	while ((delta -= tp->rto) > 0 && cwnd > restart_cwnd)
+		cwnd >>= 1;
+	tp->snd_cwnd = max(cwnd, restart_cwnd);
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+	tp->snd_cwnd_used = 0;
+#endif
+}
+
+static __inline__ void tcp_event_data_sent(struct tcp_opt *tp, struct sk_buff *skb)
+{
+#if 0
+	u32 now = tcp_time_stamp;
+
+	if (!tp->packets_out && (s32)(now - tp->lsndtime) > tp->rto)
+		tcp_cwnd_restart(tp);
+
+	tp->lsndtime = now;
+
+	/* If it is a reply for ato after last received
+	 * packet, enter pingpong mode.
+	 */
+	if ((u32)(now - tp->ack.lrcvtime) < tp->ack.ato)
+		tp->ack.pingpong = 1;
+#endif
+}
+
+static __inline__ void tcp_event_ack_sent(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	tcp_dec_quickack_mode(tp);
+	tcp_clear_xmit_timer(sk, TCP_TIME_DACK);
+#endif
+}
+
+/* Chose a new window to advertise, update state in tcp_opt for the
+ * socket, and return result with RFC1323 scaling applied.  The return
+ * value can be stuffed directly into th->window for an outgoing
+ * frame.
+ */
+static __inline__ u16 tcp_select_window(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	u32 cur_win = tcp_receive_window(tp);
+	u32 new_win = __tcp_select_window(sk);
+
+	/* Never shrink the offered window */
+	if(new_win < cur_win) {
+		/* Danger Will Robinson!
+		 * Don't update rcv_wup/rcv_wnd here or else
+		 * we will not be able to advertise a zero
+		 * window in time.  --DaveM
+		 *
+		 * Relax Will Robinson.
+		 */
+		new_win = cur_win;
+	}
+	tp->rcv_wnd = new_win;
+	tp->rcv_wup = tp->rcv_nxt;
+
+	/* RFC1323 scaling applied */
+	new_win >>= tp->rcv_wscale;
+
+	/* If we advertise zero window, disable fast path. */
+	if (new_win == 0)
+		tp->pred_flags = 0;
+
+	return new_win;
+#else
+  return 0;
+#endif
+}
+
+
+/* This routine actually transmits TCP packets queued in by
+ * tcp_do_sendmsg().  This is used by both the initial
+ * transmission and possible later retransmissions.
+ * All SKB's seen here are completely headerless.  It is our
+ * job to build the TCP header, and pass the packet down to
+ * IP so it can do the same plus pass the packet off to the
+ * device.
+ *
+ * We are working here with either a clone of the original
+ * SKB, or a fresh unique copy made by the retransmit engine.
+ */
+int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
+{
+#if 0
+	if(skb != NULL) {
+		struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+		struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
+		int tcp_header_size = tp->tcp_header_len;
+		struct tcphdr *th;
+		int sysctl_flags;
+		int err;
+
+#define SYSCTL_FLAG_TSTAMPS	0x1
+#define SYSCTL_FLAG_WSCALE	0x2
+#define SYSCTL_FLAG_SACK	0x4
+
+		sysctl_flags = 0;
+		if (tcb->flags & TCPCB_FLAG_SYN) {
+			tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
+			if(sysctl_tcp_timestamps) {
+				tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
+				sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
+			}
+			if(sysctl_tcp_window_scaling) {
+				tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
+				sysctl_flags |= SYSCTL_FLAG_WSCALE;
+			}
+			if(sysctl_tcp_sack) {
+				sysctl_flags |= SYSCTL_FLAG_SACK;
+				if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
+					tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
+			}
+		} else if (tp->eff_sacks) {
+			/* A SACK is 2 pad bytes, a 2 byte header, plus
+			 * 2 32-bit sequence numbers for each SACK block.
+			 */
+			tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
+					    (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
+		}
+		th = (struct tcphdr *) skb_push(skb, tcp_header_size);
+		skb->h.th = th;
+		skb_set_owner_w(skb, sk);
+
+		/* Build TCP header and checksum it. */
+		th->source		= sk->sport;
+		th->dest		= sk->dport;
+		th->seq			= htonl(tcb->seq);
+		th->ack_seq		= htonl(tp->rcv_nxt);
+		*(((__u16 *)th) + 6)	= htons(((tcp_header_size >> 2) << 12) | tcb->flags);
+		if (tcb->flags & TCPCB_FLAG_SYN) {
+			/* RFC1323: The window in SYN & SYN/ACK segments
+			 * is never scaled.
+			 */
+			th->window	= htons(tp->rcv_wnd);
+		} else {
+			th->window	= htons(tcp_select_window(sk));
+		}
+		th->check		= 0;
+		th->urg_ptr		= 0;
+
+		if (tp->urg_mode &&
+		    between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF)) {
+			th->urg_ptr		= htons(tp->snd_up-tcb->seq);
+			th->urg			= 1;
+		}
+
+		if (tcb->flags & TCPCB_FLAG_SYN) {
+			tcp_syn_build_options((__u32 *)(th + 1),
+					      tcp_advertise_mss(sk),
+					      (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
+					      (sysctl_flags & SYSCTL_FLAG_SACK),
+					      (sysctl_flags & SYSCTL_FLAG_WSCALE),
+					      tp->rcv_wscale,
+					      tcb->when,
+		      			      tp->ts_recent);
+		} else {
+			tcp_build_and_update_options((__u32 *)(th + 1),
+						     tp, tcb->when);
+
+			TCP_ECN_send(sk, tp, skb, tcp_header_size);
+		}
+		tp->af_specific->send_check(sk, th, skb->len, skb);
+
+		if (tcb->flags & TCPCB_FLAG_ACK)
+			tcp_event_ack_sent(sk);
+
+		if (skb->len != tcp_header_size)
+			tcp_event_data_sent(tp, skb);
+
+		TCP_INC_STATS(TcpOutSegs);
+
+		err = tp->af_specific->queue_xmit(skb);
+		if (err <= 0)
+			return err;
+
+		tcp_enter_cwr(tp);
+
+		/* NET_XMIT_CN is special. It does not guarantee,
+		 * that this packet is lost. It tells that device
+		 * is about to start to drop packets or already
+		 * drops some packets of the same priority and
+		 * invokes us to send less aggressively.
+		 */
+		return err == NET_XMIT_CN ? 0 : err;
+	}
+	return -ENOBUFS;
+#undef SYSCTL_FLAG_TSTAMPS
+#undef SYSCTL_FLAG_WSCALE
+#undef SYSCTL_FLAG_SACK
+#else
+  return 0;
+#endif
+}
+
+
+/* This is the main buffer sending routine. We queue the buffer
+ * and decide whether to queue or transmit now.
+ *
+ * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
+ * otherwise socket can stall.
+ */
+void tcp_send_skb(struct sock *sk, struct sk_buff *skb, int force_queue, unsigned cur_mss)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	/* Advance write_seq and place onto the write_queue. */
+	tp->write_seq = TCP_SKB_CB(skb)->end_seq;
+	__skb_queue_tail(&sk->write_queue, skb);
+	tcp_charge_skb(sk, skb);
+
+	if (!force_queue && tp->send_head == NULL && tcp_snd_test(tp, skb, cur_mss, tp->nonagle)) {
+		/* Send it out now. */
+		TCP_SKB_CB(skb)->when = tcp_time_stamp;
+		if (tcp_transmit_skb(sk, skb_clone(skb, sk->allocation)) == 0) {
+			tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
+			tcp_minshall_update(tp, cur_mss, skb);
+			if (tp->packets_out++ == 0)
+				tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+			return;
+		}
+	}
+	/* Queue it, remembering where we must start sending. */
+	if (tp->send_head == NULL)
+		tp->send_head = skb;
+#endif
+}
+
+/* Send _single_ skb sitting at the send head. This function requires
+ * true push pending frames to setup probe timer etc.
+ */
+void tcp_push_one(struct sock *sk, unsigned cur_mss)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct sk_buff *skb = tp->send_head;
+
+	if (tcp_snd_test(tp, skb, cur_mss, 1)) {
+		/* Send it out now. */
+		TCP_SKB_CB(skb)->when = tcp_time_stamp;
+		if (tcp_transmit_skb(sk, skb_clone(skb, sk->allocation)) == 0) {
+			tp->send_head = NULL;
+			tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
+			if (tp->packets_out++ == 0)
+				tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+			return;
+		}
+	}
+#endif
+}
+
+/* Split fragmented skb to two parts at length len. */
+
+static void skb_split(struct sk_buff *skb, struct sk_buff *skb1, u32 len)
+{
+#if 0
+	int i;
+	int pos = skb->len - skb->data_len;
+
+	if (len < pos) {
+		/* Split line is inside header. */
+		memcpy(skb_put(skb1, pos-len), skb->data + len, pos-len);
+
+		/* And move data appendix as is. */
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+			skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
+
+		skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
+		skb_shinfo(skb)->nr_frags = 0;
+
+		skb1->data_len = skb->data_len;
+		skb1->len += skb1->data_len;
+		skb->data_len = 0;
+		skb->len = len;
+		skb->tail = skb->data+len;
+	} else {
+		int k = 0;
+		int nfrags = skb_shinfo(skb)->nr_frags;
+
+		/* Second chunk has no header, nothing to copy. */
+
+		skb_shinfo(skb)->nr_frags = 0;
+		skb1->len = skb1->data_len = skb->len - len;
+		skb->len = len;
+		skb->data_len = len - pos;
+
+		for (i=0; i<nfrags; i++) {
+			int size = skb_shinfo(skb)->frags[i].size;
+			if (pos + size > len) {
+				skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
+
+				if (pos < len) {
+					/* Split frag.
+					 * We have to variants in this case:
+					 * 1. Move all the frag to the second
+					 *    part, if it is possible. F.e.
+					 *    this approach is mandatory for TUX,
+					 *    where splitting is expensive.
+					 * 2. Split is accurately. We make this.
+					 */
+					get_page(skb_shinfo(skb)->frags[i].page);
+					skb_shinfo(skb1)->frags[0].page_offset += (len-pos);
+					skb_shinfo(skb1)->frags[0].size -= (len-pos);
+					skb_shinfo(skb)->frags[i].size = len-pos;
+					skb_shinfo(skb)->nr_frags++;
+				}
+				k++;
+			} else {
+				skb_shinfo(skb)->nr_frags++;
+			}
+			pos += size;
+		}
+		skb_shinfo(skb1)->nr_frags = k;
+	}
+#endif
+}
+
+/* Function to create two new TCP segments.  Shrinks the given segment
+ * to the specified size and appends a new segment with the rest of the
+ * packet to the list.  This won't be called frequently, I hope. 
+ * Remember, these are still headerless SKBs at this point.
+ */
+static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
+{
+#if 0
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	struct sk_buff *buff;
+	int nsize = skb->len - len;
+	u16 flags;
+
+	if (skb_cloned(skb) &&
+	    skb_is_nonlinear(skb) &&
+	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+		return -ENOMEM;
+
+	/* Get a new skb... force flag on. */
+	buff = tcp_alloc_skb(sk, nsize, GFP_ATOMIC);
+	if (buff == NULL)
+		return -ENOMEM; /* We'll just try again later. */
+	tcp_charge_skb(sk, buff);
+
+	/* Correct the sequence numbers. */
+	TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
+	TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
+	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
+
+	/* PSH and FIN should only be set in the second packet. */
+	flags = TCP_SKB_CB(skb)->flags;
+	TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
+	TCP_SKB_CB(buff)->flags = flags;
+	TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_EVER_RETRANS|TCPCB_AT_TAIL);
+	if (TCP_SKB_CB(buff)->sacked&TCPCB_LOST) {
+		tp->lost_out++;
+		tp->left_out++;
+	}
+	TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;
+
+	if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) {
+		/* Copy and checksum data tail into the new buffer. */
+		buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize),
+						       nsize, 0);
+
+		skb_trim(skb, len);
+
+		skb->csum = csum_block_sub(skb->csum, buff->csum, len);
+	} else {
+		skb->ip_summed = CHECKSUM_HW;
+		skb_split(skb, buff, len);
+	}
+
+	buff->ip_summed = skb->ip_summed;
+
+	/* Looks stupid, but our code really uses when of
+	 * skbs, which it never sent before. --ANK
+	 */
+	TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
+
+	/* Link BUFF into the send queue. */
+	__skb_append(skb, buff);
+
+	return 0;
+#else
+  return 0;
+#endif
+}
+
+/* This function synchronize snd mss to current pmtu/exthdr set.
+
+   tp->user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
+   for TCP options, but includes only bare TCP header.
+
+   tp->mss_clamp is mss negotiated at connection setup.
+   It is minumum of user_mss and mss received with SYN.
+   It also does not include TCP options.
+
+   tp->pmtu_cookie is last pmtu, seen by this function.
+
+   tp->mss_cache is current effective sending mss, including
+   all tcp options except for SACKs. It is evaluated,
+   taking into account current pmtu, but never exceeds
+   tp->mss_clamp.
+
+   NOTE1. rfc1122 clearly states that advertised MSS
+   DOES NOT include either tcp or ip options.
+
+   NOTE2. tp->pmtu_cookie and tp->mss_cache are READ ONLY outside
+   this function.			--ANK (980731)
+ */
+
+int tcp_sync_mss(struct sock *sk, u32 pmtu)
+{
+#if 0
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	int mss_now;
+
+	/* Calculate base mss without TCP options:
+	   It is MMS_S - sizeof(tcphdr) of rfc1122
+	 */
+
+	mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct tcphdr);
+
+	/* Clamp it (mss_clamp does not include tcp options) */
+	if (mss_now > tp->mss_clamp)
+		mss_now = tp->mss_clamp;
+
+	/* Now subtract optional transport overhead */
+	mss_now -= tp->ext_header_len;
+
+	/* Then reserve room for full set of TCP options and 8 bytes of data */
+	if (mss_now < 48)
+		mss_now = 48;
+
+	/* Now subtract TCP options size, not including SACKs */
+	mss_now -= tp->tcp_header_len - sizeof(struct tcphdr);
+
+	/* Bound mss with half of window */
+	if (tp->max_window && mss_now > (tp->max_window>>1))
+		mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len);
+
+	/* And store cached results */
+	tp->pmtu_cookie = pmtu;
+	tp->mss_cache = mss_now;
+	return mss_now;
+#else
+  return 0;
+#endif
+}
+
+
+/* This routine writes packets to the network.  It advances the
+ * send_head.  This happens as incoming acks open up the remote
+ * window for us.
+ *
+ * Returns 1, if no segments are in flight and we have queued segments, but
+ * cannot send anything now because of SWS or another problem.
+ */
+int tcp_write_xmit(struct sock *sk, int nonagle)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	unsigned int mss_now;
+
+	/* If we are closed, the bytes will have to remain here.
+	 * In time closedown will finish, we empty the write queue and all
+	 * will be happy.
+	 */
+	if(sk->state != TCP_CLOSE) {
+		struct sk_buff *skb;
+		int sent_pkts = 0;
+
+		/* Account for SACKS, we may need to fragment due to this.
+		 * It is just like the real MSS changing on us midstream.
+		 * We also handle things correctly when the user adds some
+		 * IP options mid-stream.  Silly to do, but cover it.
+		 */
+		mss_now = tcp_current_mss(sk); 
+
+		while((skb = tp->send_head) &&
+		      tcp_snd_test(tp, skb, mss_now, tcp_skb_is_last(sk, skb) ? nonagle : 1)) {
+			if (skb->len > mss_now) {
+				if (tcp_fragment(sk, skb, mss_now))
+					break;
+			}
+
+			TCP_SKB_CB(skb)->when = tcp_time_stamp;
+			if (tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)))
+				break;
+			/* Advance the send_head.  This one is sent out. */
+			update_send_head(sk, tp, skb);
+			tcp_minshall_update(tp, mss_now, skb);
+			sent_pkts = 1;
+		}
+
+		if (sent_pkts) {
+			tcp_cwnd_validate(sk, tp);
+			return 0;
+		}
+
+		return !tp->packets_out && tp->send_head;
+	}
+	return 0;
+#else
+  return 0;
+#endif
+}
+
+/* This function returns the amount that we can raise the
+ * usable window based on the following constraints
+ *  
+ * 1. The window can never be shrunk once it is offered (RFC 793)
+ * 2. We limit memory per socket
+ *
+ * RFC 1122:
+ * "the suggested [SWS] avoidance algorithm for the receiver is to keep
+ *  RECV.NEXT + RCV.WIN fixed until:
+ *  RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)"
+ *
+ * i.e. don't raise the right edge of the window until you can raise
+ * it at least MSS bytes.
+ *
+ * Unfortunately, the recommended algorithm breaks header prediction,
+ * since header prediction assumes th->window stays fixed.
+ *
+ * Strictly speaking, keeping th->window fixed violates the receiver
+ * side SWS prevention criteria. The problem is that under this rule
+ * a stream of single byte packets will cause the right side of the
+ * window to always advance by a single byte.
+ * 
+ * Of course, if the sender implements sender side SWS prevention
+ * then this will not be a problem.
+ * 
+ * BSD seems to make the following compromise:
+ * 
+ *	If the free space is less than the 1/4 of the maximum
+ *	space available and the free space is less than 1/2 mss,
+ *	then set the window to 0.
+ *	[ Actually, bsd uses MSS and 1/4 of maximal _window_ ]
+ *	Otherwise, just prevent the window from shrinking
+ *	and from being larger than the largest representable value.
+ *
+ * This prevents incremental opening of the window in the regime
+ * where TCP is limited by the speed of the reader side taking
+ * data out of the TCP receive queue. It does nothing about
+ * those cases where the window is constrained on the sender side
+ * because the pipeline is full.
+ *
+ * BSD also seems to "accidentally" limit itself to windows that are a
+ * multiple of MSS, at least until the free space gets quite small.
+ * This would appear to be a side effect of the mbuf implementation.
+ * Combining these two algorithms results in the observed behavior
+ * of having a fixed window size at almost all times.
+ *
+ * Below we obtain similar behavior by forcing the offered window to
+ * a multiple of the mss when it is feasible to do so.
+ *
+ * Note, we don't "adjust" for TIMESTAMP or SACK option bytes.
+ * Regular options like TIMESTAMP are taken into account.
+ */
+u32 __tcp_select_window(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	/* MSS for the peer's data.  Previous verions used mss_clamp
+	 * here.  I don't know if the value based on our guesses
+	 * of peer's MSS is better for the performance.  It's more correct
+	 * but may be worse for the performance because of rcv_mss
+	 * fluctuations.  --SAW  1998/11/1
+	 */
+	int mss = tp->ack.rcv_mss;
+	int free_space = tcp_space(sk);
+	int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk));
+	int window;
+
+	if (mss > full_space)
+		mss = full_space; 
+
+	if (free_space < full_space/2) {
+		tp->ack.quick = 0;
+
+		if (tcp_memory_pressure)
+			tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss);
+
+		if (free_space < mss)
+			return 0;
+	}
+
+	if (free_space > tp->rcv_ssthresh)
+		free_space = tp->rcv_ssthresh;
+
+	/* Get the largest window that is a nice multiple of mss.
+	 * Window clamp already applied above.
+	 * If our current window offering is within 1 mss of the
+	 * free space we just keep it. This prevents the divide
+	 * and multiply from happening most of the time.
+	 * We also don't do any window rounding when the free space
+	 * is too small.
+	 */
+	window = tp->rcv_wnd;
+	if (window <= free_space - mss || window > free_space)
+		window = (free_space/mss)*mss;
+
+	return window;
+#else
+  return 0;
+#endif
+}
+
+/* Attempt to collapse two adjacent SKB's during retransmission. */
+static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now)
+{
+#if 0
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	struct sk_buff *next_skb = skb->next;
+
+	/* The first test we must make is that neither of these two
+	 * SKB's are still referenced by someone else.
+	 */
+	if(!skb_cloned(skb) && !skb_cloned(next_skb)) {
+		int skb_size = skb->len, next_skb_size = next_skb->len;
+		u16 flags = TCP_SKB_CB(skb)->flags;
+
+		/* Also punt if next skb has been SACK'd. */
+		if(TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
+			return;
+
+		/* Next skb is out of window. */
+		if (after(TCP_SKB_CB(next_skb)->end_seq, tp->snd_una+tp->snd_wnd))
+			return;
+
+		/* Punt if not enough space exists in the first SKB for
+		 * the data in the second, or the total combined payload
+		 * would exceed the MSS.
+		 */
+		if ((next_skb_size > skb_tailroom(skb)) ||
+		    ((skb_size + next_skb_size) > mss_now))
+			return;
+
+		/* Ok.  We will be able to collapse the packet. */
+		__skb_unlink(next_skb, next_skb->list);
+
+		if (next_skb->ip_summed == CHECKSUM_HW)
+			skb->ip_summed = CHECKSUM_HW;
+
+		if (skb->ip_summed != CHECKSUM_HW) {
+			memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
+			skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
+		}
+
+		/* Update sequence range on original skb. */
+		TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
+
+		/* Merge over control information. */
+		flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */
+		TCP_SKB_CB(skb)->flags = flags;
+
+		/* All done, get rid of second SKB and account for it so
+		 * packet counting does not break.
+		 */
+		TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL);
+		if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS)
+			tp->retrans_out--;
+		if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST) {
+			tp->lost_out--;
+			tp->left_out--;
+		}
+		/* Reno case is special. Sigh... */
+		if (!tp->sack_ok && tp->sacked_out) {
+			tp->sacked_out--;
+			tp->left_out--;
+		}
+
+		/* Not quite right: it can be > snd.fack, but
+		 * it is better to underestimate fackets.
+		 */
+		if (tp->fackets_out)
+			tp->fackets_out--;
+		tcp_free_skb(sk, next_skb);
+		tp->packets_out--;
+	}
+#endif
+}
+
+/* Do a simple retransmit without using the backoff mechanisms in
+ * tcp_timer. This is used for path mtu discovery. 
+ * The socket is already locked here.
+ */ 
+void tcp_simple_retransmit(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct sk_buff *skb;
+	unsigned int mss = tcp_current_mss(sk);
+	int lost = 0;
+
+	for_retrans_queue(skb, sk, tp) {
+		if (skb->len > mss && 
+		    !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
+			if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
+				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
+				tp->retrans_out--;
+			}
+			if (!(TCP_SKB_CB(skb)->sacked&TCPCB_LOST)) {
+				TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
+				tp->lost_out++;
+				lost = 1;
+			}
+		}
+	}
+
+	if (!lost)
+		return;
+
+	tcp_sync_left_out(tp);
+
+ 	/* Don't muck with the congestion window here.
+	 * Reason is that we do not increase amount of _data_
+	 * in network, but units changed and effective
+	 * cwnd/ssthresh really reduced now.
+	 */
+	if (tp->ca_state != TCP_CA_Loss) {
+		tp->high_seq = tp->snd_nxt;
+		tp->snd_ssthresh = tcp_current_ssthresh(tp);
+		tp->prior_ssthresh = 0;
+		tp->undo_marker = 0;
+		tp->ca_state = TCP_CA_Loss;
+	}
+	tcp_xmit_retransmit_queue(sk);
+#endif
+}
+
+/* This retransmits one SKB.  Policy decisions and retransmit queue
+ * state updates are done by the caller.  Returns non-zero if an
+ * error occurred which prevented the send.
+ */
+int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	unsigned int cur_mss = tcp_current_mss(sk);
+	int err;
+
+	/* Do not sent more than we queued. 1/4 is reserved for possible
+	 * copying overhead: frgagmentation, tunneling, mangling etc.
+	 */
+	if (atomic_read(&sk->wmem_alloc) > min(sk->wmem_queued+(sk->wmem_queued>>2),sk->sndbuf))
+		return -EAGAIN;
+
+	/* If receiver has shrunk his window, and skb is out of
+	 * new window, do not retransmit it. The exception is the
+	 * case, when window is shrunk to zero. In this case
+	 * our retransmit serves as a zero window probe.
+	 */
+	if (!before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)
+	    && TCP_SKB_CB(skb)->seq != tp->snd_una)
+		return -EAGAIN;
+
+	if(skb->len > cur_mss) {
+		if(tcp_fragment(sk, skb, cur_mss))
+			return -ENOMEM; /* We'll try again later. */
+
+		/* New SKB created, account for it. */
+		tp->packets_out++;
+	}
+
+	/* Collapse two adjacent packets if worthwhile and we can. */
+	if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
+	   (skb->len < (cur_mss >> 1)) &&
+	   (skb->next != tp->send_head) &&
+	   (skb->next != (struct sk_buff *)&sk->write_queue) &&
+	   (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(skb->next)->nr_frags == 0) &&
+	   (sysctl_tcp_retrans_collapse != 0))
+		tcp_retrans_try_collapse(sk, skb, cur_mss);
+
+	if(tp->af_specific->rebuild_header(sk))
+		return -EHOSTUNREACH; /* Routing failure or similar. */
+
+	/* Some Solaris stacks overoptimize and ignore the FIN on a
+	 * retransmit when old data is attached.  So strip it off
+	 * since it is cheap to do so and saves bytes on the network.
+	 */
+	if(skb->len > 0 &&
+	   (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
+	   tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
+		if (!pskb_trim(skb, 0)) {
+			TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;
+			skb->ip_summed = CHECKSUM_NONE;
+			skb->csum = 0;
+		}
+	}
+
+	/* Make a copy, if the first transmission SKB clone we made
+	 * is still in somebody's hands, else make a clone.
+	 */
+	TCP_SKB_CB(skb)->when = tcp_time_stamp;
+
+	err = tcp_transmit_skb(sk, (skb_cloned(skb) ?
+				    pskb_copy(skb, GFP_ATOMIC):
+				    skb_clone(skb, GFP_ATOMIC)));
+
+	if (err == 0) {
+		/* Update global TCP statistics. */
+		TCP_INC_STATS(TcpRetransSegs);
+
+#if FASTRETRANS_DEBUG > 0
+		if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
+			if (net_ratelimit())
+				printk(KERN_DEBUG "retrans_out leaked.\n");
+		}
+#endif
+		TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
+		tp->retrans_out++;
+
+		/* Save stamp of the first retransmit. */
+		if (!tp->retrans_stamp)
+			tp->retrans_stamp = TCP_SKB_CB(skb)->when;
+
+		tp->undo_retrans++;
+
+		/* snd_nxt is stored to detect loss of retransmitted segment,
+		 * see tcp_input.c tcp_sacktag_write_queue().
+		 */
+		TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
+	}
+	return err;
+#else
+  return 0;
+#endif
+}
+
+/* This gets called after a retransmit timeout, and the initially
+ * retransmitted data is acknowledged.  It tries to continue
+ * resending the rest of the retransmit queue, until either
+ * we've sent it all or the congestion window limit is reached.
+ * If doing SACK, the first ACK which comes back for a timeout
+ * based retransmit packet might feed us FACK information again.
+ * If so, we use it to avoid unnecessarily retransmissions.
+ */
+void tcp_xmit_retransmit_queue(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct sk_buff *skb;
+	int packet_cnt = tp->lost_out;
+
+	/* First pass: retransmit lost packets. */
+	if (packet_cnt) {
+		for_retrans_queue(skb, sk, tp) {
+			__u8 sacked = TCP_SKB_CB(skb)->sacked;
+
+			if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
+				return;
+
+			if (sacked&TCPCB_LOST) {
+				if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
+					if (tcp_retransmit_skb(sk, skb))
+						return;
+					if (tp->ca_state != TCP_CA_Loss)
+						NET_INC_STATS_BH(TCPFastRetrans);
+					else
+						NET_INC_STATS_BH(TCPSlowStartRetrans);
+
+					if (skb == skb_peek(&sk->write_queue))
+						tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+				}
+
+				if (--packet_cnt <= 0)
+					break;
+			}
+		}
+	}
+
+	/* OK, demanded retransmission is finished. */
+
+	/* Forward retransmissions are possible only during Recovery. */
+	if (tp->ca_state != TCP_CA_Recovery)
+		return;
+
+	/* No forward retransmissions in Reno are possible. */
+	if (!tp->sack_ok)
+		return;
+
+	/* Yeah, we have to make difficult choice between forward transmission
+	 * and retransmission... Both ways have their merits...
+	 *
+	 * For now we do not retrnamsit anything, while we have some new
+	 * segments to send.
+	 */
+
+	if (tcp_may_send_now(sk, tp))
+		return;
+
+	packet_cnt = 0;
+
+	for_retrans_queue(skb, sk, tp) {
+		if(++packet_cnt > tp->fackets_out)
+			break;
+
+		if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
+			break;
+
+		if(TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS)
+			continue;
+
+		/* Ok, retransmit it. */
+		if(tcp_retransmit_skb(sk, skb))
+			break;
+
+		if (skb == skb_peek(&sk->write_queue))
+			tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+
+		NET_INC_STATS_BH(TCPForwardRetrans);
+	}
+#endif
+}
+
+
+/* Send a fin.  The caller locks the socket for us.  This cannot be
+ * allowed to fail queueing a FIN frame under any circumstances.
+ */
+void tcp_send_fin(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);	
+	struct sk_buff *skb = skb_peek_tail(&sk->write_queue);
+	unsigned int mss_now;
+	
+	/* Optimization, tack on the FIN if we have a queue of
+	 * unsent frames.  But be careful about outgoing SACKS
+	 * and IP options.
+	 */
+	mss_now = tcp_current_mss(sk); 
+
+	if(tp->send_head != NULL) {
+		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
+		TCP_SKB_CB(skb)->end_seq++;
+		tp->write_seq++;
+	} else {
+		/* Socket is locked, keep trying until memory is available. */
+		for (;;) {
+			skb = alloc_skb(MAX_TCP_HEADER, GFP_KERNEL);
+			if (skb)
+				break;
+			yield();
+		}
+
+		/* Reserve space for headers and prepare control bits. */
+		skb_reserve(skb, MAX_TCP_HEADER);
+		skb->csum = 0;
+		TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
+		TCP_SKB_CB(skb)->sacked = 0;
+
+		/* FIN eats a sequence byte, write_seq advanced by tcp_send_skb(). */
+		TCP_SKB_CB(skb)->seq = tp->write_seq;
+		TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
+		tcp_send_skb(sk, skb, 1, mss_now);
+	}
+	__tcp_push_pending_frames(sk, tp, mss_now, 1);
+#endif
+}
+
+/* We get here when a process closes a file descriptor (either due to
+ * an explicit close() or as a byproduct of exit()'ing) and there
+ * was unread data in the receive queue.  This behavior is recommended
+ * by draft-ietf-tcpimpl-prob-03.txt section 3.10.  -DaveM
+ */
+void tcp_send_active_reset(struct sock *sk, int priority)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct sk_buff *skb;
+
+	/* NOTE: No TCP options attached and we never retransmit this. */
+	skb = alloc_skb(MAX_TCP_HEADER, priority);
+	if (!skb) {
+		NET_INC_STATS(TCPAbortFailed);
+		return;
+	}
+
+	/* Reserve space for headers and prepare control bits. */
+	skb_reserve(skb, MAX_TCP_HEADER);
+	skb->csum = 0;
+	TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
+	TCP_SKB_CB(skb)->sacked = 0;
+
+	/* Send it off. */
+	TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
+	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
+	TCP_SKB_CB(skb)->when = tcp_time_stamp;
+	if (tcp_transmit_skb(sk, skb))
+		NET_INC_STATS(TCPAbortFailed);
+#endif
+}
+
+/* WARNING: This routine must only be called when we have already sent
+ * a SYN packet that crossed the incoming SYN that caused this routine
+ * to get called. If this assumption fails then the initial rcv_wnd
+ * and rcv_wscale values will not be correct.
+ */
+int tcp_send_synack(struct sock *sk)
+{
+#if 0
+	struct sk_buff* skb;
+
+	skb = skb_peek(&sk->write_queue);
+	if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) {
+		printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
+		return -EFAULT;
+	}
+	if (!(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_ACK)) {
+		if (skb_cloned(skb)) {
+			struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
+			if (nskb == NULL)
+				return -ENOMEM;
+			__skb_unlink(skb, &sk->write_queue);
+			__skb_queue_head(&sk->write_queue, nskb);
+			tcp_free_skb(sk, skb);
+			tcp_charge_skb(sk, nskb);
+			skb = nskb;
+		}
+
+		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ACK;
+		TCP_ECN_send_synack(&sk->tp_pinfo.af_tcp, skb);
+	}
+	TCP_SKB_CB(skb)->when = tcp_time_stamp;
+	return tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
+#else
+  return 0;
+#endif
+}
+
+/*
+ * Prepare a SYN-ACK.
+ */
+struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
+				 struct open_request *req)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct tcphdr *th;
+	int tcp_header_size;
+	struct sk_buff *skb;
+
+	skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
+	if (skb == NULL)
+		return NULL;
+
+	/* Reserve space for headers. */
+	skb_reserve(skb, MAX_TCP_HEADER);
+
+	skb->dst = dst_clone(dst);
+
+	tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS +
+			   (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) +
+			   (req->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) +
+			   /* SACK_PERM is in the place of NOP NOP of TS */
+			   ((req->sack_ok && !req->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0));
+	skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size);
+
+	memset(th, 0, sizeof(struct tcphdr));
+	th->syn = 1;
+	th->ack = 1;
+	TCP_ECN_make_synack(req, th);
+	th->source = sk->sport;
+	th->dest = req->rmt_port;
+	TCP_SKB_CB(skb)->seq = req->snt_isn;
+	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
+	th->seq = htonl(TCP_SKB_CB(skb)->seq);
+	th->ack_seq = htonl(req->rcv_isn + 1);
+	if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
+		__u8 rcv_wscale; 
+		/* Set this up on the first call only */
+		req->window_clamp = tp->window_clamp ? : dst->window;
+		/* tcp_full_space because it is guaranteed to be the first packet */
+		tcp_select_initial_window(tcp_full_space(sk), 
+			dst->advmss - (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
+			&req->rcv_wnd,
+			&req->window_clamp,
+			req->wscale_ok,
+			&rcv_wscale);
+		req->rcv_wscale = rcv_wscale; 
+	}
+
+	/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
+	th->window = htons(req->rcv_wnd);
+
+	TCP_SKB_CB(skb)->when = tcp_time_stamp;
+	tcp_syn_build_options((__u32 *)(th + 1), dst->advmss, req->tstamp_ok,
+			      req->sack_ok, req->wscale_ok, req->rcv_wscale,
+			      TCP_SKB_CB(skb)->when,
+			      req->ts_recent);
+
+	skb->csum = 0;
+	th->doff = (tcp_header_size >> 2);
+	TCP_INC_STATS(TcpOutSegs);
+	return skb;
+#else
+  return 0;
+#endif
+}
+
+/* 
+ * Do all connect socket setups that can be done AF independent.
+ */ 
+static inline void tcp_connect_init(struct sock *sk)
+{
+#if 0
+	struct dst_entry *dst = __sk_dst_get(sk);
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	/* We'll fix this up when we get a response from the other end.
+	 * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
+	 */
+	tp->tcp_header_len = sizeof(struct tcphdr) +
+		(sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
+
+	/* If user gave his TCP_MAXSEG, record it to clamp */
+	if (tp->user_mss)
+		tp->mss_clamp = tp->user_mss;
+	tp->max_window = 0;
+	tcp_sync_mss(sk, dst->pmtu);
+
+	if (!tp->window_clamp)
+		tp->window_clamp = dst->window;
+	tp->advmss = dst->advmss;
+	tcp_initialize_rcv_mss(sk);
+
+	tcp_select_initial_window(tcp_full_space(sk),
+				  tp->advmss - (tp->ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
+				  &tp->rcv_wnd,
+				  &tp->window_clamp,
+				  sysctl_tcp_window_scaling,
+				  &tp->rcv_wscale);
+
+	tp->rcv_ssthresh = tp->rcv_wnd;
+
+	sk->err = 0;
+	sk->done = 0;
+	tp->snd_wnd = 0;
+	tcp_init_wl(tp, tp->write_seq, 0);
+	tp->snd_una = tp->write_seq;
+	tp->snd_sml = tp->write_seq;
+	tp->rcv_nxt = 0;
+	tp->rcv_wup = 0;
+	tp->copied_seq = 0;
+
+	tp->rto = TCP_TIMEOUT_INIT;
+	tp->retransmits = 0;
+	tcp_clear_retrans(tp);
+#endif
+}
+
+/*
+ * Build a SYN and send it off.
+ */ 
+int tcp_connect(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct sk_buff *buff;
+
+	tcp_connect_init(sk);
+
+	buff = alloc_skb(MAX_TCP_HEADER + 15, sk->allocation);
+	if (unlikely(buff == NULL))
+		return -ENOBUFS;
+
+	/* Reserve space for headers. */
+	skb_reserve(buff, MAX_TCP_HEADER);
+
+	TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
+	TCP_ECN_send_syn(tp, buff);
+	TCP_SKB_CB(buff)->sacked = 0;
+	buff->csum = 0;
+	TCP_SKB_CB(buff)->seq = tp->write_seq++;
+	TCP_SKB_CB(buff)->end_seq = tp->write_seq;
+	tp->snd_nxt = tp->write_seq;
+	tp->pushed_seq = tp->write_seq;
+
+	/* Send it off. */
+	TCP_SKB_CB(buff)->when = tcp_time_stamp;
+	tp->retrans_stamp = TCP_SKB_CB(buff)->when;
+	__skb_queue_tail(&sk->write_queue, buff);
+	tcp_charge_skb(sk, buff);
+	tp->packets_out++;
+	tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL));
+	TCP_INC_STATS(TcpActiveOpens);
+
+	/* Timer for repeating the SYN until an answer. */
+	tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+	return 0;
+#else
+  return 0;
+#endif
+}
+
+/* Send out a delayed ack, the caller does the policy checking
+ * to see if we should even be here.  See tcp_input.c:tcp_ack_snd_check()
+ * for details.
+ */
+void tcp_send_delayed_ack(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	int ato = tp->ack.ato;
+	unsigned long timeout;
+
+	if (ato > TCP_DELACK_MIN) {
+		int max_ato = HZ/2;
+
+		if (tp->ack.pingpong || (tp->ack.pending&TCP_ACK_PUSHED))
+			max_ato = TCP_DELACK_MAX;
+
+		/* Slow path, intersegment interval is "high". */
+
+		/* If some rtt estimate is known, use it to bound delayed ack.
+		 * Do not use tp->rto here, use results of rtt measurements
+		 * directly.
+		 */
+		if (tp->srtt) {
+			int rtt = max(tp->srtt>>3, TCP_DELACK_MIN);
+
+			if (rtt < max_ato)
+				max_ato = rtt;
+		}
+
+		ato = min(ato, max_ato);
+	}
+
+	/* Stay within the limit we were given */
+	timeout = jiffies + ato;
+
+	/* Use new timeout only if there wasn't a older one earlier. */
+	if (tp->ack.pending&TCP_ACK_TIMER) {
+		/* If delack timer was blocked or is about to expire,
+		 * send ACK now.
+		 */
+		if (tp->ack.blocked || time_before_eq(tp->ack.timeout, jiffies+(ato>>2))) {
+			tcp_send_ack(sk);
+			return;
+		}
+
+		if (!time_before(timeout, tp->ack.timeout))
+			timeout = tp->ack.timeout;
+	}
+	tp->ack.pending |= TCP_ACK_SCHED|TCP_ACK_TIMER;
+	tp->ack.timeout = timeout;
+	if (!mod_timer(&tp->delack_timer, timeout))
+		sock_hold(sk);
+#endif
+}
+
+/* This routine sends an ack and also updates the window. */
+void tcp_send_ack(struct sock *sk)
+{
+#if 0
+	/* If we have been reset, we may not send again. */
+	if(sk->state != TCP_CLOSE) {
+		struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+		struct sk_buff *buff;
+
+		/* We are not putting this on the write queue, so
+		 * tcp_transmit_skb() will set the ownership to this
+		 * sock.
+		 */
+		buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
+		if (buff == NULL) {
+			tcp_schedule_ack(tp);
+			tp->ack.ato = TCP_ATO_MIN;
+			tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX);
+			return;
+		}
+
+		/* Reserve space for headers and prepare control bits. */
+		skb_reserve(buff, MAX_TCP_HEADER);
+		buff->csum = 0;
+		TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK;
+		TCP_SKB_CB(buff)->sacked = 0;
+
+		/* Send it off, this clears delayed acks for us. */
+		TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
+		TCP_SKB_CB(buff)->when = tcp_time_stamp;
+		tcp_transmit_skb(sk, buff);
+	}
+#else
+  return 0;
+#endif
+}
+
+/* This routine sends a packet with an out of date sequence
+ * number. It assumes the other end will try to ack it.
+ *
+ * Question: what should we make while urgent mode?
+ * 4.4BSD forces sending single byte of data. We cannot send
+ * out of window data, because we have SND.NXT==SND.MAX...
+ *
+ * Current solution: to send TWO zero-length segments in urgent mode:
+ * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is
+ * out-of-date with SND.UNA-1 to probe window.
+ */
+static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct sk_buff *skb;
+
+	/* We don't queue it, tcp_transmit_skb() sets ownership. */
+	skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
+	if (skb == NULL) 
+		return -1;
+
+	/* Reserve space for headers and set control bits. */
+	skb_reserve(skb, MAX_TCP_HEADER);
+	skb->csum = 0;
+	TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
+	TCP_SKB_CB(skb)->sacked = urgent;
+
+	/* Use a previous sequence.  This should cause the other
+	 * end to send an ack.  Don't queue or clone SKB, just
+	 * send it.
+	 */
+	TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1;
+	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
+	TCP_SKB_CB(skb)->when = tcp_time_stamp;
+	return tcp_transmit_skb(sk, skb);
+#else
+  return 0;
+#endif
+}
+
+int tcp_write_wakeup(struct sock *sk)
+{
+#if 0
+	if (sk->state != TCP_CLOSE) {
+		struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+		struct sk_buff *skb;
+
+		if ((skb = tp->send_head) != NULL &&
+		    before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) {
+			int err;
+			int mss = tcp_current_mss(sk);
+			int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq;
+
+			if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
+				tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
+
+			/* We are probing the opening of a window
+			 * but the window size is != 0
+			 * must have been a result SWS avoidance ( sender )
+			 */
+			if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
+			    skb->len > mss) {
+				seg_size = min(seg_size, mss);
+				TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
+				if (tcp_fragment(sk, skb, seg_size))
+					return -1;
+			}
+			TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
+			TCP_SKB_CB(skb)->when = tcp_time_stamp;
+			err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
+			if (!err) {
+				update_send_head(sk, tp, skb);
+			}
+			return err;
+		} else {
+			if (tp->urg_mode &&
+			    between(tp->snd_up, tp->snd_una+1, tp->snd_una+0xFFFF))
+				tcp_xmit_probe_skb(sk, TCPCB_URG);
+			return tcp_xmit_probe_skb(sk, 0);
+		}
+	}
+	return -1;
+#else
+  return 0;
+#endif
+}
+
+/* A window probe timeout has occurred.  If window is not closed send
+ * a partial packet else a zero probe.
+ */
+void tcp_send_probe0(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	int err;
+
+	err = tcp_write_wakeup(sk);
+
+	if (tp->packets_out || !tp->send_head) {
+		/* Cancel probe timer, if it is not required. */
+		tp->probes_out = 0;
+		tp->backoff = 0;
+		return;
+	}
+
+	if (err <= 0) {
+		tp->backoff++;
+		tp->probes_out++;
+		tcp_reset_xmit_timer (sk, TCP_TIME_PROBE0, 
+				      min(tp->rto << tp->backoff, TCP_RTO_MAX));
+	} else {
+		/* If packet was not sent due to local congestion,
+		 * do not backoff and do not remember probes_out.
+		 * Let local senders to fight for local resources.
+		 *
+		 * Use accumulated backoff yet.
+		 */
+		if (!tp->probes_out)
+			tp->probes_out=1;
+		tcp_reset_xmit_timer (sk, TCP_TIME_PROBE0, 
+				      min(tp->rto << tp->backoff, TCP_RESOURCE_PROBE_INTERVAL));
+	}
+#endif
+}
diff --git a/reactos/drivers/net/tcpip/transport/tcp/tcp_timer.c b/reactos/drivers/net/tcpip/transport/tcp/tcp_timer.c
new file mode 100755
index 00000000000..5f9affc5c4d
--- /dev/null
+++ b/reactos/drivers/net/tcpip/transport/tcp/tcp_timer.c
@@ -0,0 +1,702 @@
+/*
+ * COPYRIGHT:   See COPYING in the top level directory
+ * PROJECT:     ReactOS TCP/IP protocol driver
+ * FILE:        transport/tcp/tcp_input.c
+ * PURPOSE:     Transmission Control Protocol
+ * PROGRAMMERS: Casper S. Hornstrup (chorns@users.sourceforge.net)
+ * REVISIONS:
+ *   CSH 15-01-2003 Imported from linux kernel 2.4.20
+ */
+
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Implementation of the Transmission Control Protocol(TCP).
+ *
+ * Version:	$Id: tcp_timer.c,v 1.1 2003/01/15 21:57:31 chorns Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *		Corey Minyard <wf-rch!minyard@relay.EU.net>
+ *		Florian La Roche, <flla@stud.uni-sb.de>
+ *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
+ *		Linus Torvalds, <torvalds@cs.helsinki.fi>
+ *		Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *		Matthew Dillon, <dillon@apollo.west.oic.com>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		Jorge Cwik, <jorge@laser.satlink.net>
+ */
+
+#if 0
+#include <net/tcp.h>
+#else
+#include "linux.h"
+#include "tcpcore.h"
+#endif
+
+int sysctl_tcp_syn_retries = TCP_SYN_RETRIES; 
+int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; 
+//int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
+int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
+//int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
+int sysctl_tcp_retries1 = TCP_RETR1;
+int sysctl_tcp_retries2 = TCP_RETR2;
+int sysctl_tcp_orphan_retries;
+
+static void tcp_write_timer(unsigned long);
+static void tcp_delack_timer(unsigned long);
+static void tcp_keepalive_timer (unsigned long data);
+
+//const char timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n";
+
+/*
+ * Using different timers for retransmit, delayed acks and probes
+ * We may wish use just one timer maintaining a list of expire jiffies 
+ * to optimize.
+ */
+
+void tcp_init_xmit_timers(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+
+	init_timer(&tp->retransmit_timer);
+	tp->retransmit_timer.function=&tcp_write_timer;
+	tp->retransmit_timer.data = (unsigned long) sk;
+	tp->pending = 0;
+
+	init_timer(&tp->delack_timer);
+	tp->delack_timer.function=&tcp_delack_timer;
+	tp->delack_timer.data = (unsigned long) sk;
+	tp->ack.pending = 0;
+
+	init_timer(&sk->timer);
+	sk->timer.function=&tcp_keepalive_timer;
+	sk->timer.data = (unsigned long) sk;
+#endif
+}
+
+void tcp_clear_xmit_timers(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+
+	tp->pending = 0;
+	if (timer_pending(&tp->retransmit_timer) &&
+	    del_timer(&tp->retransmit_timer))
+		__sock_put(sk);
+
+	tp->ack.pending = 0;
+	tp->ack.blocked = 0;
+	if (timer_pending(&tp->delack_timer) &&
+	    del_timer(&tp->delack_timer))
+		__sock_put(sk);
+
+	if(timer_pending(&sk->timer) && del_timer(&sk->timer))
+		__sock_put(sk);
+#endif
+}
+
+static void tcp_write_err(struct sock *sk)
+{
+#if 0
+	sk->err = sk->err_soft ? : ETIMEDOUT;
+	sk->error_report(sk);
+
+	tcp_done(sk);
+	NET_INC_STATS_BH(TCPAbortOnTimeout);
+#endif
+}
+
+/* Do not allow orphaned sockets to eat all our resources.
+ * This is direct violation of TCP specs, but it is required
+ * to prevent DoS attacks. It is called when a retransmission timeout
+ * or zero probe timeout occurs on orphaned socket.
+ *
+ * Criterium is still not confirmed experimentally and may change.
+ * We kill the socket, if:
+ * 1. If number of orphaned sockets exceeds an administratively configured
+ *    limit.
+ * 2. If we have strong memory pressure.
+ */
+static int tcp_out_of_resources(struct sock *sk, int do_reset)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	int orphans = atomic_read(&tcp_orphan_count);
+
+	/* If peer does not open window for long time, or did not transmit 
+	 * anything for long time, penalize it. */
+	if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
+		orphans <<= 1;
+
+	/* If some dubious ICMP arrived, penalize even more. */
+	if (sk->err_soft)
+		orphans <<= 1;
+
+	if (orphans >= sysctl_tcp_max_orphans ||
+	    (sk->wmem_queued > SOCK_MIN_SNDBUF &&
+	     atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
+		if (net_ratelimit())
+			printk(KERN_INFO "Out of socket memory\n");
+
+		/* Catch exceptional cases, when connection requires reset.
+		 *      1. Last segment was sent recently. */
+		if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
+		    /*  2. Window is closed. */
+		    (!tp->snd_wnd && !tp->packets_out))
+			do_reset = 1;
+		if (do_reset)
+			tcp_send_active_reset(sk, GFP_ATOMIC);
+		tcp_done(sk);
+		NET_INC_STATS_BH(TCPAbortOnMemory);
+		return 1;
+	}
+	return 0;
+#else
+	return 0;
+#endif
+}
+
+/* Calculate maximal number or retries on an orphaned socket. */
+static int tcp_orphan_retries(struct sock *sk, int alive)
+{
+#if 0
+	int retries = sysctl_tcp_orphan_retries; /* May be zero. */
+
+	/* We know from an ICMP that something is wrong. */
+	if (sk->err_soft && !alive)
+		retries = 0;
+
+	/* However, if socket sent something recently, select some safe
+	 * number of retries. 8 corresponds to >100 seconds with minimal
+	 * RTO of 200msec. */
+	if (retries == 0 && alive)
+		retries = 8;
+	return retries;
+#else
+	return 0;
+#endif
+}
+
+/* A write timeout has occurred. Process the after effects. */
+static int tcp_write_timeout(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	int retry_until;
+
+	if ((1<<sk->state)&(TCPF_SYN_SENT|TCPF_SYN_RECV)) {
+		if (tp->retransmits)
+			dst_negative_advice(&sk->dst_cache);
+		retry_until = tp->syn_retries ? : sysctl_tcp_syn_retries;
+	} else {
+		if (tp->retransmits >= sysctl_tcp_retries1) {
+			/* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black
+			   hole detection. :-(
+
+			   It is place to make it. It is not made. I do not want
+			   to make it. It is disguisting. It does not work in any
+			   case. Let me to cite the same draft, which requires for
+			   us to implement this:
+
+   "The one security concern raised by this memo is that ICMP black holes
+   are often caused by over-zealous security administrators who block
+   all ICMP messages.  It is vitally important that those who design and
+   deploy security systems understand the impact of strict filtering on
+   upper-layer protocols.  The safest web site in the world is worthless
+   if most TCP implementations cannot transfer data from it.  It would
+   be far nicer to have all of the black holes fixed rather than fixing
+   all of the TCP implementations."
+
+                           Golden words :-).
+		   */
+
+			dst_negative_advice(&sk->dst_cache);
+		}
+
+		retry_until = sysctl_tcp_retries2;
+		if (sk->dead) {
+			int alive = (tp->rto < TCP_RTO_MAX);
+ 
+			retry_until = tcp_orphan_retries(sk, alive);
+
+			if (tcp_out_of_resources(sk, alive || tp->retransmits < retry_until))
+				return 1;
+		}
+	}
+
+	if (tp->retransmits >= retry_until) {
+		/* Has it gone just too far? */
+		tcp_write_err(sk);
+		return 1;
+	}
+	return 0;
+#else
+	return 0;
+#endif
+}
+
+static void tcp_delack_timer(unsigned long data)
+{
+#if 0
+	struct sock *sk = (struct sock*)data;
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	bh_lock_sock(sk);
+	if (sk->lock.users) {
+		/* Try again later. */
+		tp->ack.blocked = 1;
+		NET_INC_STATS_BH(DelayedACKLocked);
+		if (!mod_timer(&tp->delack_timer, jiffies + TCP_DELACK_MIN))
+			sock_hold(sk);
+		goto out_unlock;
+	}
+
+	tcp_mem_reclaim(sk);
+
+	if (sk->state == TCP_CLOSE || !(tp->ack.pending&TCP_ACK_TIMER))
+		goto out;
+
+	if ((long)(tp->ack.timeout - jiffies) > 0) {
+		if (!mod_timer(&tp->delack_timer, tp->ack.timeout))
+			sock_hold(sk);
+		goto out;
+	}
+	tp->ack.pending &= ~TCP_ACK_TIMER;
+
+	if (skb_queue_len(&tp->ucopy.prequeue)) {
+		struct sk_buff *skb;
+
+		net_statistics[smp_processor_id()*2].TCPSchedulerFailed += skb_queue_len(&tp->ucopy.prequeue);
+
+		while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
+			sk->backlog_rcv(sk, skb);
+
+		tp->ucopy.memory = 0;
+	}
+
+	if (tcp_ack_scheduled(tp)) {
+		if (!tp->ack.pingpong) {
+			/* Delayed ACK missed: inflate ATO. */
+			tp->ack.ato = min(tp->ack.ato << 1, tp->rto);
+		} else {
+			/* Delayed ACK missed: leave pingpong mode and
+			 * deflate ATO.
+			 */
+			tp->ack.pingpong = 0;
+			tp->ack.ato = TCP_ATO_MIN;
+		}
+		tcp_send_ack(sk);
+		NET_INC_STATS_BH(DelayedACKs);
+	}
+	TCP_CHECK_TIMER(sk);
+
+out:
+	if (tcp_memory_pressure)
+		tcp_mem_reclaim(sk);
+out_unlock:
+	bh_unlock_sock(sk);
+	sock_put(sk);
+#endif
+}
+
+static void tcp_probe_timer(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	int max_probes;
+
+	if (tp->packets_out || !tp->send_head) {
+		tp->probes_out = 0;
+		return;
+	}
+
+	/* *WARNING* RFC 1122 forbids this
+	 *
+	 * It doesn't AFAIK, because we kill the retransmit timer -AK
+	 *
+	 * FIXME: We ought not to do it, Solaris 2.5 actually has fixing
+	 * this behaviour in Solaris down as a bug fix. [AC]
+	 *
+	 * Let me to explain. probes_out is zeroed by incoming ACKs
+	 * even if they advertise zero window. Hence, connection is killed only
+	 * if we received no ACKs for normal connection timeout. It is not killed
+	 * only because window stays zero for some time, window may be zero
+	 * until armageddon and even later. We are in full accordance
+	 * with RFCs, only probe timer combines both retransmission timeout
+	 * and probe timeout in one bottle.				--ANK
+	 */
+	max_probes = sysctl_tcp_retries2;
+
+	if (sk->dead) {
+		int alive = ((tp->rto<<tp->backoff) < TCP_RTO_MAX);
+ 
+		max_probes = tcp_orphan_retries(sk, alive);
+
+		if (tcp_out_of_resources(sk, alive || tp->probes_out <= max_probes))
+			return;
+	}
+
+	if (tp->probes_out > max_probes) {
+		tcp_write_err(sk);
+	} else {
+		/* Only send another probe if we didn't close things up. */
+		tcp_send_probe0(sk);
+	}
+#endif
+}
+
+/*
+ *	The TCP retransmit timer.
+ */
+
+static void tcp_retransmit_timer(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+
+	if (tp->packets_out == 0)
+		goto out;
+
+	BUG_TRAP(!skb_queue_empty(&sk->write_queue));
+
+	if (tp->snd_wnd == 0 && !sk->dead &&
+	    !((1<<sk->state)&(TCPF_SYN_SENT|TCPF_SYN_RECV))) {
+		/* Receiver dastardly shrinks window. Our retransmits
+		 * become zero probes, but we should not timeout this
+		 * connection. If the socket is an orphan, time it out,
+		 * we cannot allow such beasts to hang infinitely.
+		 */
+#ifdef TCP_DEBUG
+		if (net_ratelimit())
+			printk(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n",
+			       NIPQUAD(sk->daddr), htons(sk->dport), sk->num,
+			       tp->snd_una, tp->snd_nxt);
+#endif
+		if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
+			tcp_write_err(sk);
+			goto out;
+		}
+		tcp_enter_loss(sk, 0);
+		tcp_retransmit_skb(sk, skb_peek(&sk->write_queue));
+		__sk_dst_reset(sk);
+		goto out_reset_timer;
+	}
+
+	if (tcp_write_timeout(sk))
+		goto out;
+
+	if (tp->retransmits == 0) {
+		if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) {
+			if (tp->sack_ok) {
+				if (tp->ca_state == TCP_CA_Recovery)
+					NET_INC_STATS_BH(TCPSackRecoveryFail);
+				else
+					NET_INC_STATS_BH(TCPSackFailures);
+			} else {
+				if (tp->ca_state == TCP_CA_Recovery)
+					NET_INC_STATS_BH(TCPRenoRecoveryFail);
+				else
+					NET_INC_STATS_BH(TCPRenoFailures);
+			}
+		} else if (tp->ca_state == TCP_CA_Loss) {
+			NET_INC_STATS_BH(TCPLossFailures);
+		} else {
+			NET_INC_STATS_BH(TCPTimeouts);
+		}
+	}
+
+	tcp_enter_loss(sk, 0);
+
+	if (tcp_retransmit_skb(sk, skb_peek(&sk->write_queue)) > 0) {
+		/* Retransmission failed because of local congestion,
+		 * do not backoff.
+		 */
+		if (!tp->retransmits)
+			tp->retransmits=1;
+		tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS,
+				     min(tp->rto, TCP_RESOURCE_PROBE_INTERVAL));
+		goto out;
+	}
+
+	/* Increase the timeout each time we retransmit.  Note that
+	 * we do not increase the rtt estimate.  rto is initialized
+	 * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
+	 * that doubling rto each time is the least we can get away with.
+	 * In KA9Q, Karn uses this for the first few times, and then
+	 * goes to quadratic.  netBSD doubles, but only goes up to *64,
+	 * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
+	 * defined in the protocol as the maximum possible RTT.  I guess
+	 * we'll have to use something other than TCP to talk to the
+	 * University of Mars.
+	 *
+	 * PAWS allows us longer timeouts and large windows, so once
+	 * implemented ftp to mars will work nicely. We will have to fix
+	 * the 120 second clamps though!
+	 */
+	tp->backoff++;
+	tp->retransmits++;
+
+out_reset_timer:
+	tp->rto = min(tp->rto << 1, TCP_RTO_MAX);
+	tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+	if (tp->retransmits > sysctl_tcp_retries1)
+		__sk_dst_reset(sk);
+
+out:;
+#endif
+}
+
+static void tcp_write_timer(unsigned long data)
+{
+#if 0
+	struct sock *sk = (struct sock*)data;
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	int event;
+
+	bh_lock_sock(sk);
+	if (sk->lock.users) {
+		/* Try again later */
+		if (!mod_timer(&tp->retransmit_timer, jiffies + (HZ/20)))
+			sock_hold(sk);
+		goto out_unlock;
+	}
+
+	if (sk->state == TCP_CLOSE || !tp->pending)
+		goto out;
+
+	if ((long)(tp->timeout - jiffies) > 0) {
+		if (!mod_timer(&tp->retransmit_timer, tp->timeout))
+			sock_hold(sk);
+		goto out;
+	}
+
+	event = tp->pending;
+	tp->pending = 0;
+
+	switch (event) {
+	case TCP_TIME_RETRANS:
+		tcp_retransmit_timer(sk);
+		break;
+	case TCP_TIME_PROBE0:
+		tcp_probe_timer(sk);
+		break;
+	}
+	TCP_CHECK_TIMER(sk);
+
+out:
+	tcp_mem_reclaim(sk);
+out_unlock:
+	bh_unlock_sock(sk);
+	sock_put(sk);
+#endif
+}
+
+/*
+ *	Timer for listening sockets
+ */
+
+static void tcp_synack_timer(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct tcp_listen_opt *lopt = tp->listen_opt;
+	int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries;
+	int thresh = max_retries;
+	unsigned long now = jiffies;
+	struct open_request **reqp, *req;
+	int i, budget;
+
+	if (lopt == NULL || lopt->qlen == 0)
+		return;
+
+	/* Normally all the openreqs are young and become mature
+	 * (i.e. converted to established socket) for first timeout.
+	 * If synack was not acknowledged for 3 seconds, it means
+	 * one of the following things: synack was lost, ack was lost,
+	 * rtt is high or nobody planned to ack (i.e. synflood).
+	 * When server is a bit loaded, queue is populated with old
+	 * open requests, reducing effective size of queue.
+	 * When server is well loaded, queue size reduces to zero
+	 * after several minutes of work. It is not synflood,
+	 * it is normal operation. The solution is pruning
+	 * too old entries overriding normal timeout, when
+	 * situation becomes dangerous.
+	 *
+	 * Essentially, we reserve half of room for young
+	 * embrions; and abort old ones without pity, if old
+	 * ones are about to clog our table.
+	 */
+	if (lopt->qlen>>(lopt->max_qlen_log-1)) {
+		int young = (lopt->qlen_young<<1);
+
+		while (thresh > 2) {
+			if (lopt->qlen < young)
+				break;
+			thresh--;
+			young <<= 1;
+		}
+	}
+
+	if (tp->defer_accept)
+		max_retries = tp->defer_accept;
+
+	budget = 2*(TCP_SYNQ_HSIZE/(TCP_TIMEOUT_INIT/TCP_SYNQ_INTERVAL));
+	i = lopt->clock_hand;
+
+	do {
+		reqp=&lopt->syn_table[i];
+		while ((req = *reqp) != NULL) {
+			if ((long)(now - req->expires) >= 0) {
+				if ((req->retrans < thresh ||
+				     (req->acked && req->retrans < max_retries))
+				    && !req->class->rtx_syn_ack(sk, req, NULL)) {
+					unsigned long timeo;
+
+					if (req->retrans++ == 0)
+						lopt->qlen_young--;
+					timeo = min((TCP_TIMEOUT_INIT << req->retrans),
+						    TCP_RTO_MAX);
+					req->expires = now + timeo;
+					reqp = &req->dl_next;
+					continue;
+				}
+
+				/* Drop this request */
+				write_lock(&tp->syn_wait_lock);
+				*reqp = req->dl_next;
+				write_unlock(&tp->syn_wait_lock);
+				lopt->qlen--;
+				if (req->retrans == 0)
+					lopt->qlen_young--;
+				tcp_openreq_free(req);
+				continue;
+			}
+			reqp = &req->dl_next;
+		}
+
+		i = (i+1)&(TCP_SYNQ_HSIZE-1);
+
+	} while (--budget > 0);
+
+	lopt->clock_hand = i;
+
+	if (lopt->qlen)
+		tcp_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL);
+#endif
+}
+
+void tcp_delete_keepalive_timer (struct sock *sk)
+{
+#if 0
+	if (timer_pending(&sk->timer) && del_timer (&sk->timer))
+		__sock_put(sk);
+#endif
+}
+
+void tcp_reset_keepalive_timer (struct sock *sk, unsigned long len)
+{
+#if 0
+	if (!mod_timer(&sk->timer, jiffies+len))
+		sock_hold(sk);
+#endif
+}
+
+void tcp_set_keepalive(struct sock *sk, int val)
+{
+#if 0
+	if ((1<<sk->state)&(TCPF_CLOSE|TCPF_LISTEN))
+		return;
+
+	if (val && !sk->keepopen)
+		tcp_reset_keepalive_timer(sk, keepalive_time_when(&sk->tp_pinfo.af_tcp));
+	else if (!val)
+		tcp_delete_keepalive_timer(sk);
+#endif
+}
+
+
+static void tcp_keepalive_timer (unsigned long data)
+{
+#if 0
+	struct sock *sk = (struct sock *) data;
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	__u32 elapsed;
+
+	/* Only process if socket is not in use. */
+	bh_lock_sock(sk);
+	if (sk->lock.users) {
+		/* Try again later. */ 
+		tcp_reset_keepalive_timer (sk, HZ/20);
+		goto out;
+	}
+
+	if (sk->state == TCP_LISTEN) {
+		tcp_synack_timer(sk);
+		goto out;
+	}
+
+	if (sk->state == TCP_FIN_WAIT2 && sk->dead) {
+		if (tp->linger2 >= 0) {
+			int tmo = tcp_fin_time(tp) - TCP_TIMEWAIT_LEN;
+
+			if (tmo > 0) {
+				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
+				goto out;
+			}
+		}
+		tcp_send_active_reset(sk, GFP_ATOMIC);
+		goto death;
+	}
+
+	if (!sk->keepopen || sk->state == TCP_CLOSE)
+		goto out;
+
+	elapsed = keepalive_time_when(tp);
+
+	/* It is alive without keepalive 8) */
+	if (tp->packets_out || tp->send_head)
+		goto resched;
+
+	elapsed = tcp_time_stamp - tp->rcv_tstamp;
+
+	if (elapsed >= keepalive_time_when(tp)) {
+		if ((!tp->keepalive_probes && tp->probes_out >= sysctl_tcp_keepalive_probes) ||
+		     (tp->keepalive_probes && tp->probes_out >= tp->keepalive_probes)) {
+			tcp_send_active_reset(sk, GFP_ATOMIC);
+			tcp_write_err(sk);
+			goto out;
+		}
+		if (tcp_write_wakeup(sk) <= 0) {
+			tp->probes_out++;
+			elapsed = keepalive_intvl_when(tp);
+		} else {
+			/* If keepalive was lost due to local congestion,
+			 * try harder.
+			 */
+			elapsed = TCP_RESOURCE_PROBE_INTERVAL;
+		}
+	} else {
+		/* It is tp->rcv_tstamp + keepalive_time_when(tp) */
+		elapsed = keepalive_time_when(tp) - elapsed;
+	}
+
+	TCP_CHECK_TIMER(sk);
+	tcp_mem_reclaim(sk);
+
+resched:
+	tcp_reset_keepalive_timer (sk, elapsed);
+	goto out;
+
+death:	
+	tcp_done(sk);
+
+out:
+	bh_unlock_sock(sk);
+	sock_put(sk);
+#endif
+}
diff --git a/reactos/drivers/net/tcpip/transport/tcp/tcpcore.c b/reactos/drivers/net/tcpip/transport/tcp/tcpcore.c
new file mode 100755
index 00000000000..8e1b3f93736
--- /dev/null
+++ b/reactos/drivers/net/tcpip/transport/tcp/tcpcore.c
@@ -0,0 +1,2783 @@
+/*
+ * COPYRIGHT:   See COPYING in the top level directory
+ * PROJECT:     ReactOS TCP/IP protocol driver
+ * FILE:        transport/tcp/tcpcore.c
+ * PURPOSE:     Transmission Control Protocol
+ * PROGRAMMERS: Casper S. Hornstrup (chorns@users.sourceforge.net)
+ * REVISIONS:
+ *   CSH 15-01-2003 Imported from linux kernel 2.4.20
+ */
+
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Implementation of the Transmission Control Protocol(TCP).
+ *
+ * Version:	$Id: tcpcore.c,v 1.1 2003/01/15 21:57:31 chorns Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *		Corey Minyard <wf-rch!minyard@relay.EU.net>
+ *		Florian La Roche, <flla@stud.uni-sb.de>
+ *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
+ *		Linus Torvalds, <torvalds@cs.helsinki.fi>
+ *		Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *		Matthew Dillon, <dillon@apollo.west.oic.com>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		Jorge Cwik, <jorge@laser.satlink.net>
+ *
+ * Fixes:
+ *		Alan Cox	:	Numerous verify_area() calls
+ *		Alan Cox	:	Set the ACK bit on a reset
+ *		Alan Cox	:	Stopped it crashing if it closed while
+ *					sk->inuse=1 and was trying to connect
+ *					(tcp_err()).
+ *		Alan Cox	:	All icmp error handling was broken
+ *					pointers passed where wrong and the
+ *					socket was looked up backwards. Nobody
+ *					tested any icmp error code obviously.
+ *		Alan Cox	:	tcp_err() now handled properly. It
+ *					wakes people on errors. poll
+ *					behaves and the icmp error race
+ *					has gone by moving it into sock.c
+ *		Alan Cox	:	tcp_send_reset() fixed to work for
+ *					everything not just packets for
+ *					unknown sockets.
+ *		Alan Cox	:	tcp option processing.
+ *		Alan Cox	:	Reset tweaked (still not 100%) [Had
+ *					syn rule wrong]
+ *		Herp Rosmanith  :	More reset fixes
+ *		Alan Cox	:	No longer acks invalid rst frames.
+ *					Acking any kind of RST is right out.
+ *		Alan Cox	:	Sets an ignore me flag on an rst
+ *					receive otherwise odd bits of prattle
+ *					escape still
+ *		Alan Cox	:	Fixed another acking RST frame bug.
+ *					Should stop LAN workplace lockups.
+ *		Alan Cox	: 	Some tidyups using the new skb list
+ *					facilities
+ *		Alan Cox	:	sk->keepopen now seems to work
+ *		Alan Cox	:	Pulls options out correctly on accepts
+ *		Alan Cox	:	Fixed assorted sk->rqueue->next errors
+ *		Alan Cox	:	PSH doesn't end a TCP read. Switched a
+ *					bit to skb ops.
+ *		Alan Cox	:	Tidied tcp_data to avoid a potential
+ *					nasty.
+ *		Alan Cox	:	Added some better commenting, as the
+ *					tcp is hard to follow
+ *		Alan Cox	:	Removed incorrect check for 20 * psh
+ *	Michael O'Reilly	:	ack < copied bug fix.
+ *	Johannes Stille		:	Misc tcp fixes (not all in yet).
+ *		Alan Cox	:	FIN with no memory -> CRASH
+ *		Alan Cox	:	Added socket option proto entries.
+ *					Also added awareness of them to accept.
+ *		Alan Cox	:	Added TCP options (SOL_TCP)
+ *		Alan Cox	:	Switched wakeup calls to callbacks,
+ *					so the kernel can layer network
+ *					sockets.
+ *		Alan Cox	:	Use ip_tos/ip_ttl settings.
+ *		Alan Cox	:	Handle FIN (more) properly (we hope).
+ *		Alan Cox	:	RST frames sent on unsynchronised
+ *					state ack error.
+ *		Alan Cox	:	Put in missing check for SYN bit.
+ *		Alan Cox	:	Added tcp_select_window() aka NET2E
+ *					window non shrink trick.
+ *		Alan Cox	:	Added a couple of small NET2E timer
+ *					fixes
+ *		Charles Hedrick :	TCP fixes
+ *		Toomas Tamm	:	TCP window fixes
+ *		Alan Cox	:	Small URG fix to rlogin ^C ack fight
+ *		Charles Hedrick	:	Rewrote most of it to actually work
+ *		Linus		:	Rewrote tcp_read() and URG handling
+ *					completely
+ *		Gerhard Koerting:	Fixed some missing timer handling
+ *		Matthew Dillon  :	Reworked TCP machine states as per RFC
+ *		Gerhard Koerting:	PC/TCP workarounds
+ *		Adam Caldwell	:	Assorted timer/timing errors
+ *		Matthew Dillon	:	Fixed another RST bug
+ *		Alan Cox	:	Move to kernel side addressing changes.
+ *		Alan Cox	:	Beginning work on TCP fastpathing
+ *					(not yet usable)
+ *		Arnt Gulbrandsen:	Turbocharged tcp_check() routine.
+ *		Alan Cox	:	TCP fast path debugging
+ *		Alan Cox	:	Window clamping
+ *		Michael Riepe	:	Bug in tcp_check()
+ *		Matt Dillon	:	More TCP improvements and RST bug fixes
+ *		Matt Dillon	:	Yet more small nasties remove from the
+ *					TCP code (Be very nice to this man if
+ *					tcp finally works 100%) 8)
+ *		Alan Cox	:	BSD accept semantics.
+ *		Alan Cox	:	Reset on closedown bug.
+ *	Peter De Schrijver	:	ENOTCONN check missing in tcp_sendto().
+ *		Michael Pall	:	Handle poll() after URG properly in
+ *					all cases.
+ *		Michael Pall	:	Undo the last fix in tcp_read_urg()
+ *					(multi URG PUSH broke rlogin).
+ *		Michael Pall	:	Fix the multi URG PUSH problem in
+ *					tcp_readable(), poll() after URG
+ *					works now.
+ *		Michael Pall	:	recv(...,MSG_OOB) never blocks in the
+ *					BSD api.
+ *		Alan Cox	:	Changed the semantics of sk->socket to
+ *					fix a race and a signal problem with
+ *					accept() and async I/O.
+ *		Alan Cox	:	Relaxed the rules on tcp_sendto().
+ *		Yury Shevchuk	:	Really fixed accept() blocking problem.
+ *		Craig I. Hagan  :	Allow for BSD compatible TIME_WAIT for
+ *					clients/servers which listen in on
+ *					fixed ports.
+ *		Alan Cox	:	Cleaned the above up and shrank it to
+ *					a sensible code size.
+ *		Alan Cox	:	Self connect lockup fix.
+ *		Alan Cox	:	No connect to multicast.
+ *		Ross Biro	:	Close unaccepted children on master
+ *					socket close.
+ *		Alan Cox	:	Reset tracing code.
+ *		Alan Cox	:	Spurious resets on shutdown.
+ *		Alan Cox	:	Giant 15 minute/60 second timer error
+ *		Alan Cox	:	Small whoops in polling before an
+ *					accept.
+ *		Alan Cox	:	Kept the state trace facility since
+ *					it's handy for debugging.
+ *		Alan Cox	:	More reset handler fixes.
+ *		Alan Cox	:	Started rewriting the code based on
+ *					the RFC's for other useful protocol
+ *					references see: Comer, KA9Q NOS, and
+ *					for a reference on the difference
+ *					between specifications and how BSD
+ *					works see the 4.4lite source.
+ *		A.N.Kuznetsov	:	Don't time wait on completion of tidy
+ *					close.
+ *		Linus Torvalds	:	Fin/Shutdown & copied_seq changes.
+ *		Linus Torvalds	:	Fixed BSD port reuse to work first syn
+ *		Alan Cox	:	Reimplemented timers as per the RFC
+ *					and using multiple timers for sanity.
+ *		Alan Cox	:	Small bug fixes, and a lot of new
+ *					comments.
+ *		Alan Cox	:	Fixed dual reader crash by locking
+ *					the buffers (much like datagram.c)
+ *		Alan Cox	:	Fixed stuck sockets in probe. A probe
+ *					now gets fed up of retrying without
+ *					(even a no space) answer.
+ *		Alan Cox	:	Extracted closing code better
+ *		Alan Cox	:	Fixed the closing state machine to
+ *					resemble the RFC.
+ *		Alan Cox	:	More 'per spec' fixes.
+ *		Jorge Cwik	:	Even faster checksumming.
+ *		Alan Cox	:	tcp_data() doesn't ack illegal PSH
+ *					only frames. At least one pc tcp stack
+ *					generates them.
+ *		Alan Cox	:	Cache last socket.
+ *		Alan Cox	:	Per route irtt.
+ *		Matt Day	:	poll()->select() match BSD precisely on error
+ *		Alan Cox	:	New buffers
+ *		Marc Tamsky	:	Various sk->prot->retransmits and
+ *					sk->retransmits misupdating fixed.
+ *					Fixed tcp_write_timeout: stuck close,
+ *					and TCP syn retries gets used now.
+ *		Mark Yarvis	:	In tcp_read_wakeup(), don't send an
+ *					ack if state is TCP_CLOSED.
+ *		Alan Cox	:	Look up device on a retransmit - routes may
+ *					change. Doesn't yet cope with MSS shrink right
+ *					but its a start!
+ *		Marc Tamsky	:	Closing in closing fixes.
+ *		Mike Shaver	:	RFC1122 verifications.
+ *		Alan Cox	:	rcv_saddr errors.
+ *		Alan Cox	:	Block double connect().
+ *		Alan Cox	:	Small hooks for enSKIP.
+ *		Alexey Kuznetsov:	Path MTU discovery.
+ *		Alan Cox	:	Support soft errors.
+ *		Alan Cox	:	Fix MTU discovery pathological case
+ *					when the remote claims no mtu!
+ *		Marc Tamsky	:	TCP_CLOSE fix.
+ *		Colin (G3TNE)	:	Send a reset on syn ack replies in
+ *					window but wrong (fixes NT lpd problems)
+ *		Pedro Roque	:	Better TCP window handling, delayed ack.
+ *		Joerg Reuter	:	No modification of locked buffers in
+ *					tcp_do_retransmit()
+ *		Eric Schenk	:	Changed receiver side silly window
+ *					avoidance algorithm to BSD style
+ *					algorithm. This doubles throughput
+ *					against machines running Solaris,
+ *					and seems to result in general
+ *					improvement.
+ *	Stefan Magdalinski	:	adjusted tcp_readable() to fix FIONREAD
+ *	Willy Konynenberg	:	Transparent proxying support.
+ *	Mike McLagan		:	Routing by source
+ *		Keith Owens	:	Do proper merging with partial SKB's in
+ *					tcp_do_sendmsg to avoid burstiness.
+ *		Eric Schenk	:	Fix fast close down bug with
+ *					shutdown() followed by close().
+ *		Andi Kleen 	:	Make poll agree with SIGIO
+ *	Salvatore Sanfilippo	:	Support SO_LINGER with linger == 1 and
+ *					lingertime == 0 (RFC 793 ABORT Call)
+ *					
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or(at your option) any later version.
+ *
+ * Description of States:
+ *
+ *	TCP_SYN_SENT		sent a connection request, waiting for ack
+ *
+ *	TCP_SYN_RECV		received a connection request, sent ack,
+ *				waiting for final ack in three-way handshake.
+ *
+ *	TCP_ESTABLISHED		connection established
+ *
+ *	TCP_FIN_WAIT1		our side has shutdown, waiting to complete
+ *				transmission of remaining buffered data
+ *
+ *	TCP_FIN_WAIT2		all buffered data sent, waiting for remote
+ *				to shutdown
+ *
+ *	TCP_CLOSING		both sides have shutdown but we still have
+ *				data we have to finish sending
+ *
+ *	TCP_TIME_WAIT		timeout to catch resent junk before entering
+ *				closed, can only be entered from FIN_WAIT2
+ *				or CLOSING.  Required because the other end
+ *				may not have gotten our last ACK causing it
+ *				to retransmit the data packet (which we ignore)
+ *
+ *	TCP_CLOSE_WAIT		remote side has shutdown and is waiting for
+ *				us to finish writing our data and to shutdown
+ *				(we have to close() to move on to LAST_ACK)
+ *
+ *	TCP_LAST_ACK		out side has shutdown after remote has
+ *				shutdown.  There may still be data in our
+ *				buffer that we have to finish sending
+ *
+ *	TCP_CLOSE		socket is finished
+ */
+
+#if 0
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/poll.h>
+#include <linux/init.h>
+#include <linux/smp_lock.h>
+#include <linux/fs.h>
+
+#include <net/icmp.h>
+#include <net/tcp.h>
+
+#include <asm/uaccess.h>
+#include <asm/ioctls.h>
+#else
+#include "linux.h"
+#include "tcpcore.h"
+#endif
+
+int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
+
+#ifdef ROS_STATISTICS
+struct tcp_mib	tcp_statistics[NR_CPUS*2];
+#endif
+
+kmem_cache_t *tcp_openreq_cachep;
+kmem_cache_t *tcp_bucket_cachep;
+kmem_cache_t *tcp_timewait_cachep;
+
+#if 0
+atomic_t tcp_orphan_count = ATOMIC_INIT(0);
+#endif
+
+int sysctl_tcp_mem[3];
+int sysctl_tcp_wmem[3] = { 4*1024, 16*1024, 128*1024 };
+int sysctl_tcp_rmem[3] = { 4*1024, 87380, 87380*2 };
+
+atomic_t tcp_memory_allocated;	/* Current allocated memory. */
+atomic_t tcp_sockets_allocated;	/* Current number of TCP sockets. */
+
+/* Pressure flag: try to collapse.
+ * Technical note: it is used by multiple contexts non atomically.
+ * All the tcp_mem_schedule() is of this nature: accounting
+ * is strict, actions are advisory and have some latency. */
+int tcp_memory_pressure;
+
+#define TCP_PAGES(amt) (((amt)+TCP_MEM_QUANTUM-1)/TCP_MEM_QUANTUM)
+
+int tcp_mem_schedule(struct sock *sk, int size, int kind)
+{
+	int amt = TCP_PAGES(size);
+
+	sk->forward_alloc += amt*TCP_MEM_QUANTUM;
+	atomic_add(amt, &tcp_memory_allocated);
+
+	/* Under limit. */
+	if (atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
+		if (tcp_memory_pressure)
+			tcp_memory_pressure = 0;
+		return 1;
+	}
+
+	/* Over hard limit. */
+	if (atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]) {
+		tcp_enter_memory_pressure();
+		goto suppress_allocation;
+	}
+
+	/* Under pressure. */
+	if (atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[1])
+		tcp_enter_memory_pressure();
+
+	if (kind) {
+		if (atomic_read(&sk->rmem_alloc) < sysctl_tcp_rmem[0])
+			return 1;
+	} else {
+		if (sk->wmem_queued < sysctl_tcp_wmem[0])
+			return 1;
+	}
+
+	if (!tcp_memory_pressure ||
+	    sysctl_tcp_mem[2] > atomic_read(&tcp_sockets_allocated)
+	    * TCP_PAGES(sk->wmem_queued+atomic_read(&sk->rmem_alloc)+
+			sk->forward_alloc))
+		return 1;
+
+suppress_allocation:
+
+	if (kind == 0) {
+		tcp_moderate_sndbuf(sk);
+
+		/* Fail only if socket is _under_ its sndbuf.
+		 * In this case we cannot block, so that we have to fail.
+		 */
+		if (sk->wmem_queued+size >= sk->sndbuf)
+			return 1;
+	}
+
+	/* Alas. Undo changes. */
+	sk->forward_alloc -= amt*TCP_MEM_QUANTUM;
+	atomic_sub(amt, &tcp_memory_allocated);
+	return 0;
+}
+
+void __tcp_mem_reclaim(struct sock *sk)
+{
+	if (sk->forward_alloc >= TCP_MEM_QUANTUM) {
+		atomic_sub(sk->forward_alloc/TCP_MEM_QUANTUM, &tcp_memory_allocated);
+		sk->forward_alloc &= (TCP_MEM_QUANTUM-1);
+		if (tcp_memory_pressure &&
+		    atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0])
+			tcp_memory_pressure = 0;
+	}
+}
+
+void tcp_rfree(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+
+	atomic_sub(skb->truesize, &sk->rmem_alloc);
+	sk->forward_alloc += skb->truesize;
+}
+
+/*
+ * LISTEN is a special case for poll..
+ */
+static __inline__ unsigned int tcp_listen_poll(struct sock *sk, poll_table *wait)
+{
+	return sk->tp_pinfo.af_tcp.accept_queue ? (POLLIN | POLLRDNORM) : 0;
+}
+
+/*
+ *	Wait for a TCP event.
+ *
+ *	Note that we don't need to lock the socket, as the upper poll layers
+ *	take care of normal races (between the test and the event) and we don't
+ *	go look at any of the socket buffers directly.
+ */
+unsigned int tcp_poll(struct file * file, struct socket *sock, poll_table *wait)
+{
+#if 0
+	unsigned int mask;
+	struct sock *sk = sock->sk;
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	poll_wait(file, sk->sleep, wait);
+	if (sk->state == TCP_LISTEN)
+		return tcp_listen_poll(sk, wait);
+
+	/* Socket is not locked. We are protected from async events
+	   by poll logic and correct handling of state changes
+	   made by another threads is impossible in any case.
+	 */
+
+	mask = 0;
+	if (sk->err)
+		mask = POLLERR;
+
+	/*
+	 * POLLHUP is certainly not done right. But poll() doesn't
+	 * have a notion of HUP in just one direction, and for a
+	 * socket the read side is more interesting.
+	 *
+	 * Some poll() documentation says that POLLHUP is incompatible
+	 * with the POLLOUT/POLLWR flags, so somebody should check this
+	 * all. But careful, it tends to be safer to return too many
+	 * bits than too few, and you can easily break real applications
+	 * if you don't tell them that something has hung up!
+	 *
+	 * Check-me.
+	 *
+	 * Check number 1. POLLHUP is _UNMASKABLE_ event (see UNIX98 and
+	 * our fs/select.c). It means that after we received EOF,
+	 * poll always returns immediately, making impossible poll() on write()
+	 * in state CLOSE_WAIT. One solution is evident --- to set POLLHUP
+	 * if and only if shutdown has been made in both directions.
+	 * Actually, it is interesting to look how Solaris and DUX
+	 * solve this dilemma. I would prefer, if PULLHUP were maskable,
+	 * then we could set it on SND_SHUTDOWN. BTW examples given
+	 * in Stevens' books assume exactly this behaviour, it explains
+	 * why PULLHUP is incompatible with POLLOUT.	--ANK
+	 *
+	 * NOTE. Check for TCP_CLOSE is added. The goal is to prevent
+	 * blocking on fresh not-connected or disconnected socket. --ANK
+	 */
+	if (sk->shutdown == SHUTDOWN_MASK || sk->state == TCP_CLOSE)
+		mask |= POLLHUP;
+	if (sk->shutdown & RCV_SHUTDOWN)
+		mask |= POLLIN | POLLRDNORM;
+
+	/* Connected? */
+	if ((1 << sk->state) & ~(TCPF_SYN_SENT|TCPF_SYN_RECV)) {
+		/* Potential race condition. If read of tp below will
+		 * escape above sk->state, we can be illegally awaken
+		 * in SYN_* states. */
+		if ((tp->rcv_nxt != tp->copied_seq) &&
+		    (tp->urg_seq != tp->copied_seq ||
+		     tp->rcv_nxt != tp->copied_seq+1 ||
+		     sk->urginline || !tp->urg_data))
+			mask |= POLLIN | POLLRDNORM;
+
+		if (!(sk->shutdown & SEND_SHUTDOWN)) {
+			if (tcp_wspace(sk) >= tcp_min_write_space(sk)) {
+				mask |= POLLOUT | POLLWRNORM;
+			} else {  /* send SIGIO later */
+				set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
+				set_bit(SOCK_NOSPACE, &sk->socket->flags);
+
+				/* Race breaker. If space is freed after
+				 * wspace test but before the flags are set,
+				 * IO signal will be lost.
+				 */
+				if (tcp_wspace(sk) >= tcp_min_write_space(sk))
+					mask |= POLLOUT | POLLWRNORM;
+			}
+		}
+
+		if (tp->urg_data & TCP_URG_VALID)
+			mask |= POLLPRI;
+	}
+	return mask;
+#else
+  return 0;
+#endif
+}
+
+/*
+ *	TCP socket write_space callback.
+ */
+void tcp_write_space(struct sock *sk)
+{
+#if 0
+	struct socket *sock = sk->socket;
+
+	if (tcp_wspace(sk) >= tcp_min_write_space(sk) && sock) {
+		clear_bit(SOCK_NOSPACE, &sock->flags);
+
+		if (sk->sleep && waitqueue_active(sk->sleep))
+			wake_up_interruptible(sk->sleep);
+
+		if (sock->fasync_list && !(sk->shutdown&SEND_SHUTDOWN))
+			sock_wake_async(sock, 2, POLL_OUT);
+	}
+#endif
+}
+
+int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	int answ;
+
+	switch(cmd) {
+	case SIOCINQ:
+		if (sk->state == TCP_LISTEN)
+			return(-EINVAL);
+
+		lock_sock(sk);
+		if ((1<<sk->state) & (TCPF_SYN_SENT|TCPF_SYN_RECV))
+			answ = 0;
+		else if (sk->urginline || !tp->urg_data ||
+			 before(tp->urg_seq,tp->copied_seq) ||
+			 !before(tp->urg_seq,tp->rcv_nxt)) {
+			answ = tp->rcv_nxt - tp->copied_seq;
+
+			/* Subtract 1, if FIN is in queue. */
+			if (answ && !skb_queue_empty(&sk->receive_queue))
+				answ -= ((struct sk_buff*)sk->receive_queue.prev)->h.th->fin;
+		} else
+			answ = tp->urg_seq - tp->copied_seq;
+		release_sock(sk);
+		break;
+	case SIOCATMARK:
+		{
+			answ = tp->urg_data && tp->urg_seq == tp->copied_seq;
+			break;
+		}
+	case SIOCOUTQ:
+		if (sk->state == TCP_LISTEN)
+			return(-EINVAL);
+
+		if ((1<<sk->state) & (TCPF_SYN_SENT|TCPF_SYN_RECV))
+			answ = 0;
+		else
+			answ = tp->write_seq - tp->snd_una;
+		break;
+	default:
+		return(-ENOIOCTLCMD);
+	};
+
+	return put_user(answ, (int *)arg);
+#else
+return 0;
+#endif
+}
+
+
+int tcp_listen_start(struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct tcp_listen_opt *lopt;
+
+	sk->max_ack_backlog = 0;
+	sk->ack_backlog = 0;
+	tp->accept_queue = tp->accept_queue_tail = NULL;
+	tp->syn_wait_lock = RW_LOCK_UNLOCKED;
+	tcp_delack_init(tp);
+
+	lopt = kmalloc(sizeof(struct tcp_listen_opt), GFP_KERNEL);
+	if (!lopt)
+		return -ENOMEM;
+
+	memset(lopt, 0, sizeof(struct tcp_listen_opt));
+	for (lopt->max_qlen_log = 6; ; lopt->max_qlen_log++)
+		if ((1<<lopt->max_qlen_log) >= sysctl_max_syn_backlog)
+			break;
+
+	write_lock_bh(&tp->syn_wait_lock);
+	tp->listen_opt = lopt;
+	write_unlock_bh(&tp->syn_wait_lock);
+
+	/* There is race window here: we announce ourselves listening,
+	 * but this transition is still not validated by get_port().
+	 * It is OK, because this socket enters to hash table only
+	 * after validation is complete.
+	 */
+	sk->state = TCP_LISTEN;
+	if (sk->prot->get_port(sk, sk->num) == 0) {
+		sk->sport = htons(sk->num);
+
+		sk_dst_reset(sk);
+		sk->prot->hash(sk);
+
+		return 0;
+	}
+
+	sk->state = TCP_CLOSE;
+	write_lock_bh(&tp->syn_wait_lock);
+	tp->listen_opt = NULL;
+	write_unlock_bh(&tp->syn_wait_lock);
+	kfree(lopt);
+	return -EADDRINUSE;
+#endif
+}
+
+/*
+ *	This routine closes sockets which have been at least partially
+ *	opened, but not yet accepted.
+ */
+
+static void tcp_listen_stop (struct sock *sk)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct tcp_listen_opt *lopt = tp->listen_opt;
+	struct open_request *acc_req = tp->accept_queue;
+	struct open_request *req;
+	int i;
+
+	tcp_delete_keepalive_timer(sk);
+
+	/* make all the listen_opt local to us */
+	write_lock_bh(&tp->syn_wait_lock);
+	tp->listen_opt =NULL;
+	write_unlock_bh(&tp->syn_wait_lock);
+	tp->accept_queue = tp->accept_queue_tail = NULL;
+
+	if (lopt->qlen) {
+		for (i=0; i<TCP_SYNQ_HSIZE; i++) {
+			while ((req = lopt->syn_table[i]) != NULL) {
+				lopt->syn_table[i] = req->dl_next;
+				lopt->qlen--;
+				tcp_openreq_free(req);
+
+		/* Following specs, it would be better either to send FIN
+		 * (and enter FIN-WAIT-1, it is normal close)
+		 * or to send active reset (abort). 
+		 * Certainly, it is pretty dangerous while synflood, but it is
+		 * bad justification for our negligence 8)
+		 * To be honest, we are not able to make either
+		 * of the variants now.			--ANK
+		 */
+			}
+		}
+	}
+	BUG_TRAP(lopt->qlen == 0);
+
+	kfree(lopt);
+
+	while ((req=acc_req) != NULL) {
+		struct sock *child = req->sk;
+
+		acc_req = req->dl_next;
+
+		local_bh_disable();
+		bh_lock_sock(child);
+		BUG_TRAP(child->lock.users==0);
+		sock_hold(child);
+
+		tcp_disconnect(child, O_NONBLOCK);
+
+		sock_orphan(child);
+
+		atomic_inc(&tcp_orphan_count);
+
+		tcp_destroy_sock(child);
+
+		bh_unlock_sock(child);
+		local_bh_enable();
+		sock_put(child);
+
+		tcp_acceptq_removed(sk);
+		tcp_openreq_fastfree(req);
+	}
+	BUG_TRAP(sk->ack_backlog == 0);
+#endif
+}
+
+/*
+ *	Wait for a socket to get into the connected state
+ *
+ *	Note: Must be called with the socket locked.
+ */
+static int wait_for_tcp_connect(struct sock * sk, int flags, long *timeo_p)
+{
+#if 0
+	struct task_struct *tsk = current;
+	DECLARE_WAITQUEUE(wait, tsk);
+
+	while((1 << sk->state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
+		if(sk->err)
+			return sock_error(sk);
+		if((1 << sk->state) &
+		   ~(TCPF_SYN_SENT | TCPF_SYN_RECV))
+			return -EPIPE;
+		if(!*timeo_p)
+			return -EAGAIN;
+		if(signal_pending(tsk))
+			return sock_intr_errno(*timeo_p);
+
+		__set_task_state(tsk, TASK_INTERRUPTIBLE);
+		add_wait_queue(sk->sleep, &wait);
+		sk->tp_pinfo.af_tcp.write_pending++;
+
+		release_sock(sk);
+		*timeo_p = schedule_timeout(*timeo_p);
+		lock_sock(sk);
+
+		__set_task_state(tsk, TASK_RUNNING);
+		remove_wait_queue(sk->sleep, &wait);
+		sk->tp_pinfo.af_tcp.write_pending--;
+	}
+	return 0;
+#else
+  return 0;
+#endif
+}
+
+static inline int tcp_memory_free(struct sock *sk)
+{
+	return sk->wmem_queued < sk->sndbuf;
+}
+
+/*
+ *	Wait for more memory for a socket
+ */
+static int wait_for_tcp_memory(struct sock * sk, long *timeo)
+{
+#if 0
+	int err = 0;
+	long vm_wait = 0;
+	long current_timeo = *timeo;
+	DECLARE_WAITQUEUE(wait, current);
+
+	if (tcp_memory_free(sk))
+		current_timeo = vm_wait = (net_random()%(HZ/5))+2;
+
+	add_wait_queue(sk->sleep, &wait);
+	for (;;) {
+		set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		if (sk->err || (sk->shutdown & SEND_SHUTDOWN))
+			goto do_error;
+		if (!*timeo)
+			goto do_nonblock;
+		if (signal_pending(current))
+			goto do_interrupted;
+		clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
+		if (tcp_memory_free(sk) && !vm_wait)
+			break;
+
+		set_bit(SOCK_NOSPACE, &sk->socket->flags);
+		sk->tp_pinfo.af_tcp.write_pending++;
+		release_sock(sk);
+		if (!tcp_memory_free(sk) || vm_wait)
+			current_timeo = schedule_timeout(current_timeo);
+		lock_sock(sk);
+		sk->tp_pinfo.af_tcp.write_pending--;
+
+		if (vm_wait) {
+			vm_wait -= current_timeo;
+			current_timeo = *timeo;
+			if (current_timeo != MAX_SCHEDULE_TIMEOUT &&
+			    (current_timeo -= vm_wait) < 0)
+				current_timeo = 0;
+			vm_wait = 0;
+		}
+		*timeo = current_timeo;
+	}
+out:
+	current->state = TASK_RUNNING;
+	remove_wait_queue(sk->sleep, &wait);
+	return err;
+
+do_error:
+	err = -EPIPE;
+	goto out;
+do_nonblock:
+	err = -EAGAIN;
+	goto out;
+do_interrupted:
+	err = sock_intr_errno(*timeo);
+	goto out;
+#endif
+}
+
+ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags);
+
+static inline int
+can_coalesce(struct sk_buff *skb, int i, struct page *page, int off)
+{
+#if 0
+	if (i) {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
+		return page == frag->page &&
+			off == frag->page_offset+frag->size;
+	}
+	return 0;
+#else
+return 0;
+#endif
+}
+
+static inline void
+fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size)
+{
+	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+	frag->page = page;
+	frag->page_offset = off;
+	frag->size = size;
+	skb_shinfo(skb)->nr_frags = i+1;
+}
+
+static inline void tcp_mark_push(struct tcp_opt *tp, struct sk_buff *skb)
+{
+	TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
+	tp->pushed_seq = tp->write_seq;
+}
+
+static inline int forced_push(struct tcp_opt *tp)
+{
+	return after(tp->write_seq, tp->pushed_seq + (tp->max_window>>1));
+}
+
+static inline void
+skb_entail(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb)
+{
+	skb->csum = 0;
+	TCP_SKB_CB(skb)->seq = tp->write_seq;
+	TCP_SKB_CB(skb)->end_seq = tp->write_seq;
+	TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
+	TCP_SKB_CB(skb)->sacked = 0;
+	__skb_queue_tail(&sk->write_queue, skb);
+	tcp_charge_skb(sk, skb);
+	if (tp->send_head == NULL)
+		tp->send_head = skb;
+}
+
+static inline void
+tcp_mark_urg(struct tcp_opt *tp, int flags, struct sk_buff *skb)
+{
+#if 0
+	if (flags & MSG_OOB) {
+		tp->urg_mode = 1;
+		tp->snd_up = tp->write_seq;
+		TCP_SKB_CB(skb)->sacked |= TCPCB_URG;
+	}
+#endif
+}
+
+static inline void
+tcp_push(struct sock *sk, struct tcp_opt *tp, int flags, int mss_now, int nonagle)
+{
+#if 0
+	if (tp->send_head) {
+		struct sk_buff *skb = sk->write_queue.prev;
+		if (!(flags&MSG_MORE) || forced_push(tp))
+			tcp_mark_push(tp, skb);
+		tcp_mark_urg(tp, flags, skb);
+		__tcp_push_pending_frames(sk, tp, mss_now, (flags&MSG_MORE) ? 2 : nonagle);
+	}
+#endif
+}
+
+static int tcp_error(struct sock *sk, int flags, int err)
+{
+#if 0
+	if (err == -EPIPE)
+		err = sock_error(sk) ? : -EPIPE;
+	if (err == -EPIPE && !(flags&MSG_NOSIGNAL))
+		send_sig(SIGPIPE, current, 0);
+	return err;
+#else
+  return 0;
+#endif
+}
+
+ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	int mss_now;
+	int err;
+	ssize_t copied;
+	long timeo = sock_sndtimeo(sk, flags&MSG_DONTWAIT);
+
+	/* Wait for a connection to finish. */
+	if ((1 << sk->state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
+		if((err = wait_for_tcp_connect(sk, 0, &timeo)) != 0)
+			goto out_err;
+
+	clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
+
+	mss_now = tcp_current_mss(sk);
+	copied = 0;
+
+	err = -EPIPE;
+	if (sk->err || (sk->shutdown & SEND_SHUTDOWN))
+		goto do_error;
+
+	while (psize > 0) {
+		struct sk_buff *skb = sk->write_queue.prev;
+		int offset, size, copy, i;
+		struct page *page;
+
+		page = pages[poffset/PAGE_SIZE];
+		offset = poffset % PAGE_SIZE;
+		size = min_t(size_t, psize, PAGE_SIZE-offset);
+
+		if (tp->send_head==NULL || (copy = mss_now - skb->len) <= 0) {
+new_segment:
+			if (!tcp_memory_free(sk))
+				goto wait_for_sndbuf;
+
+			skb = tcp_alloc_pskb(sk, 0, tp->mss_cache, sk->allocation);
+			if (skb == NULL)
+				goto wait_for_memory;
+
+			skb_entail(sk, tp, skb);
+			copy = mss_now;
+		}
+
+		if (copy > size)
+			copy = size;
+
+		i = skb_shinfo(skb)->nr_frags;
+		if (can_coalesce(skb, i, page, offset)) {
+			skb_shinfo(skb)->frags[i-1].size += copy;
+		} else if (i < MAX_SKB_FRAGS) {
+			get_page(page);
+			fill_page_desc(skb, i, page, offset, copy);
+		} else {
+			tcp_mark_push(tp, skb);
+			goto new_segment;
+		}
+
+		skb->len += copy;
+		skb->data_len += copy;
+		skb->ip_summed = CHECKSUM_HW;
+		tp->write_seq += copy;
+		TCP_SKB_CB(skb)->end_seq += copy;
+
+		if (!copied)
+			TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;
+
+		copied += copy;
+		poffset += copy;
+		if (!(psize -= copy))
+			goto out;
+
+		if (skb->len != mss_now || (flags&MSG_OOB))
+			continue;
+
+		if (forced_push(tp)) {
+			tcp_mark_push(tp, skb);
+			__tcp_push_pending_frames(sk, tp, mss_now, 1);
+		} else if (skb == tp->send_head)
+			tcp_push_one(sk, mss_now);
+		continue;
+
+wait_for_sndbuf:
+		set_bit(SOCK_NOSPACE, &sk->socket->flags);
+wait_for_memory:
+		if (copied)
+			tcp_push(sk, tp, flags&~MSG_MORE, mss_now, 1);
+
+		if ((err = wait_for_tcp_memory(sk, &timeo)) != 0)
+			goto do_error;
+
+		mss_now = tcp_current_mss(sk);
+	}
+
+out:
+	if (copied)
+		tcp_push(sk, tp, flags, mss_now, tp->nonagle);
+	return copied;
+
+do_error:
+	if (copied)
+		goto out;
+out_err:
+	return tcp_error(sk, flags, err);
+#else 
+return 0;
+#endif
+}
+
+ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
+{
+#if 0
+	ssize_t res;
+	struct sock *sk = sock->sk;
+
+#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM)
+
+	if (!(sk->route_caps & NETIF_F_SG) || 
+	    !(sk->route_caps & TCP_ZC_CSUM_FLAGS))
+		return sock_no_sendpage(sock, page, offset, size, flags);
+
+#undef TCP_ZC_CSUM_FLAGS
+
+	lock_sock(sk);
+	TCP_CHECK_TIMER(sk);
+	res = do_tcp_sendpages(sk, &page, offset, size, flags);
+	TCP_CHECK_TIMER(sk);
+	release_sock(sk);
+	return res;
+#else
+  return 0;
+#endif
+}
+
+#define TCP_PAGE(sk)	(sk->tp_pinfo.af_tcp.sndmsg_page)
+#define TCP_OFF(sk)	(sk->tp_pinfo.af_tcp.sndmsg_off)
+
+static inline int
+tcp_copy_to_page(struct sock *sk, char *from, struct sk_buff *skb,
+		 struct page *page, int off, int copy)
+{
+	int err = 0;
+	unsigned int csum;
+
+	csum = csum_and_copy_from_user(from, page_address(page)+off,
+				       copy, 0, &err);
+	if (!err) {
+		if (skb->ip_summed == CHECKSUM_NONE)
+			skb->csum = csum_block_add(skb->csum, csum, skb->len);
+		skb->len += copy;
+		skb->data_len += copy;
+		skb->truesize += copy;
+		sk->wmem_queued += copy;
+		sk->forward_alloc -= copy;
+	}
+	return err;
+}
+
+static inline int
+skb_add_data(struct sk_buff *skb, char *from, int copy)
+{
+#if 0
+	int err = 0;
+	unsigned int csum;
+	int off = skb->len;
+
+	csum = csum_and_copy_from_user(from, skb_put(skb, copy),
+				       copy, 0, &err);
+	if (!err) {
+		skb->csum = csum_block_add(skb->csum, csum, off);
+		return 0;
+	}
+
+	__skb_trim(skb, off);
+	return -EFAULT;
+#else
+return 0;
+#endif
+}
+
+static inline int select_size(struct sock *sk, struct tcp_opt *tp)
+{
+#if 0
+	int tmp = tp->mss_cache;
+
+	if (sk->route_caps&NETIF_F_SG) {
+		int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
+
+		if (tmp >= pgbreak && tmp <= pgbreak + (MAX_SKB_FRAGS-1)*PAGE_SIZE)
+			tmp = pgbreak;
+	}
+	return tmp;
+#else
+  return 0;
+#endif
+}
+
+int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size)
+{
+#if 0
+	struct iovec *iov;
+	struct tcp_opt *tp;
+	struct sk_buff *skb;
+	int iovlen, flags;
+	int mss_now;
+	int err, copied;
+	long timeo;
+
+	tp = &(sk->tp_pinfo.af_tcp);
+
+	lock_sock(sk);
+	TCP_CHECK_TIMER(sk);
+
+	flags = msg->msg_flags;
+	timeo = sock_sndtimeo(sk, flags&MSG_DONTWAIT);
+
+	/* Wait for a connection to finish. */
+	if ((1 << sk->state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
+		if((err = wait_for_tcp_connect(sk, flags, &timeo)) != 0)
+			goto out_err;
+
+	/* This should be in poll */
+	clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
+
+	mss_now = tcp_current_mss(sk);
+
+	/* Ok commence sending. */
+	iovlen = msg->msg_iovlen;
+	iov = msg->msg_iov;
+	copied = 0;
+
+	err = -EPIPE;
+	if (sk->err || (sk->shutdown&SEND_SHUTDOWN))
+		goto do_error;
+
+	while (--iovlen >= 0) {
+		int seglen=iov->iov_len;
+		unsigned char * from=iov->iov_base;
+
+		iov++;
+
+		while (seglen > 0) {
+			int copy;
+			
+			skb = sk->write_queue.prev;
+
+			if (tp->send_head == NULL ||
+			    (copy = mss_now - skb->len) <= 0) {
+
+new_segment:
+				/* Allocate new segment. If the interface is SG,
+				 * allocate skb fitting to single page.
+				 */
+				if (!tcp_memory_free(sk))
+					goto wait_for_sndbuf;
+
+				skb = tcp_alloc_pskb(sk, select_size(sk, tp), 0, sk->allocation);
+				if (skb == NULL)
+					goto wait_for_memory;
+
+				skb_entail(sk, tp, skb);
+				copy = mss_now;
+			}
+
+			/* Try to append data to the end of skb. */
+			if (copy > seglen)
+				copy = seglen;
+
+			/* Where to copy to? */
+			if (skb_tailroom(skb) > 0) {
+				/* We have some space in skb head. Superb! */
+				if (copy > skb_tailroom(skb))
+					copy = skb_tailroom(skb);
+				if ((err = skb_add_data(skb, from, copy)) != 0)
+					goto do_fault;
+			} else {
+				int merge = 0;
+				int i = skb_shinfo(skb)->nr_frags;
+				struct page *page = TCP_PAGE(sk);
+				int off = TCP_OFF(sk);
+
+				if (can_coalesce(skb, i, page, off) && off != PAGE_SIZE) {
+					/* We can extend the last page fragment. */
+					merge = 1;
+				} else if (i == MAX_SKB_FRAGS ||
+					   (i == 0 && !(sk->route_caps&NETIF_F_SG))) {
+					/* Need to add new fragment and cannot
+					 * do this because interface is non-SG,
+					 * or because all the page slots are busy.
+					 */
+					tcp_mark_push(tp, skb);
+					goto new_segment;
+				} else if (page) {
+					/* If page is cached, align
+					 * offset to L1 cache boundary
+					 */
+					off = (off+L1_CACHE_BYTES-1)&~(L1_CACHE_BYTES-1);
+					if (off == PAGE_SIZE) {
+						put_page(page);
+						TCP_PAGE(sk) = page = NULL;
+					}
+				}
+
+				if (!page) {
+					/* Allocate new cache page. */
+					if (!(page=tcp_alloc_page(sk)))
+						goto wait_for_memory;
+					off = 0;
+				}
+
+				if (copy > PAGE_SIZE-off)
+					copy = PAGE_SIZE-off;
+
+				/* Time to copy data. We are close to the end! */
+				err = tcp_copy_to_page(sk, from, skb, page, off, copy);
+				if (err) {
+					/* If this page was new, give it to the
+					 * socket so it does not get leaked.
+					 */
+					if (TCP_PAGE(sk) == NULL) {
+						TCP_PAGE(sk) = page;
+						TCP_OFF(sk) = 0;
+					}
+					goto do_error;
+				}
+
+				/* Update the skb. */
+				if (merge) {
+					skb_shinfo(skb)->frags[i-1].size += copy;
+				} else {
+					fill_page_desc(skb, i, page, off, copy);
+					if (TCP_PAGE(sk)) {
+						get_page(page);
+					} else if (off + copy < PAGE_SIZE) {
+						get_page(page);
+						TCP_PAGE(sk) = page;
+					}
+				}
+
+				TCP_OFF(sk) = off+copy;
+			}
+
+			if (!copied)
+				TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;
+
+			tp->write_seq += copy;
+			TCP_SKB_CB(skb)->end_seq += copy;
+
+			from += copy;
+			copied += copy;
+			if ((seglen -= copy) == 0 && iovlen == 0)
+				goto out;
+
+			if (skb->len != mss_now || (flags&MSG_OOB))
+				continue;
+
+			if (forced_push(tp)) {
+				tcp_mark_push(tp, skb);
+				__tcp_push_pending_frames(sk, tp, mss_now, 1);
+			} else if (skb == tp->send_head)
+				tcp_push_one(sk, mss_now);
+			continue;
+
+wait_for_sndbuf:
+			set_bit(SOCK_NOSPACE, &sk->socket->flags);
+wait_for_memory:
+			if (copied)
+				tcp_push(sk, tp, flags&~MSG_MORE, mss_now, 1);
+
+			if ((err = wait_for_tcp_memory(sk, &timeo)) != 0)
+				goto do_error;
+
+			mss_now = tcp_current_mss(sk);
+		}
+	}
+
+out:
+	if (copied)
+		tcp_push(sk, tp, flags, mss_now, tp->nonagle);
+	TCP_CHECK_TIMER(sk);
+	release_sock(sk);
+	return copied;
+
+do_fault:
+	if (skb->len == 0) {
+		if (tp->send_head == skb)
+			tp->send_head = NULL;
+		__skb_unlink(skb, skb->list);
+		tcp_free_skb(sk, skb);
+	}
+
+do_error:
+	if (copied)
+		goto out;
+out_err:
+	err = tcp_error(sk, flags, err);
+	TCP_CHECK_TIMER(sk);
+	release_sock(sk);
+	return err;
+#else
+  return 0;
+#endif
+}
+
+/*
+ *	Handle reading urgent data. BSD has very simple semantics for
+ *	this, no blocking and very strange errors 8)
+ */
+
+static int tcp_recv_urg(struct sock * sk, long timeo,
+			struct msghdr *msg, int len, int flags, 
+			int *addr_len)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	/* No URG data to read. */
+	if (sk->urginline || !tp->urg_data || tp->urg_data == TCP_URG_READ)
+		return -EINVAL;	/* Yes this is right ! */
+
+	if (sk->state==TCP_CLOSE && !sk->done)
+		return -ENOTCONN;
+
+	if (tp->urg_data & TCP_URG_VALID) {
+		int err = 0; 
+		char c = tp->urg_data;
+
+		if (!(flags & MSG_PEEK))
+			tp->urg_data = TCP_URG_READ;
+
+		/* Read urgent data. */
+		msg->msg_flags|=MSG_OOB;
+
+		if(len>0) {
+			if (!(flags & MSG_TRUNC))
+				err = memcpy_toiovec(msg->msg_iov, &c, 1);
+			len = 1;
+		} else
+			msg->msg_flags|=MSG_TRUNC;
+
+		return err ? -EFAULT : len;
+	}
+
+	if (sk->state == TCP_CLOSE || (sk->shutdown & RCV_SHUTDOWN))
+		return 0;
+
+	/* Fixed the recv(..., MSG_OOB) behaviour.  BSD docs and
+	 * the available implementations agree in this case:
+	 * this call should never block, independent of the
+	 * blocking state of the socket.
+	 * Mike <pall@rz.uni-karlsruhe.de>
+	 */
+	return -EAGAIN;
+#else
+return 0;
+#endif
+}
+
+/*
+ *	Release a skb if it is no longer needed. This routine
+ *	must be called with interrupts disabled or with the
+ *	socket locked so that the sk_buff queue operation is ok.
+ */
+
+static inline void tcp_eat_skb(struct sock *sk, struct sk_buff * skb)
+{
+#if 0
+	__skb_unlink(skb, &sk->receive_queue);
+	__kfree_skb(skb);
+#endif
+}
+
+/* Clean up the receive buffer for full frames taken by the user,
+ * then send an ACK if necessary.  COPIED is the number of bytes
+ * tcp_recvmsg has given to the user so far, it speeds up the
+ * calculation of whether or not we must ACK for the sake of
+ * a window update.
+ */
+static void cleanup_rbuf(struct sock *sk, int copied)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	int time_to_ack = 0;
+
+#if TCP_DEBUG
+	struct sk_buff *skb = skb_peek(&sk->receive_queue);
+
+	BUG_TRAP(skb==NULL || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq));
+#endif
+
+	if (tcp_ack_scheduled(tp)) {
+		   /* Delayed ACKs frequently hit locked sockets during bulk receive. */
+		if (tp->ack.blocked
+		    /* Once-per-two-segments ACK was not sent by tcp_input.c */
+		    || tp->rcv_nxt - tp->rcv_wup > tp->ack.rcv_mss
+		    /*
+		     * If this read emptied read buffer, we send ACK, if
+		     * connection is not bidirectional, user drained
+		     * receive buffer and there was a small segment
+		     * in queue.
+		     */
+		    || (copied > 0 &&
+			(tp->ack.pending&TCP_ACK_PUSHED) &&
+			!tp->ack.pingpong &&
+			atomic_read(&sk->rmem_alloc) == 0)) {
+			time_to_ack = 1;
+		}
+	}
+
+  	/* We send an ACK if we can now advertise a non-zero window
+	 * which has been raised "significantly".
+	 *
+	 * Even if window raised up to infinity, do not send window open ACK
+	 * in states, where we will not receive more. It is useless.
+  	 */
+	if(copied > 0 && !time_to_ack && !(sk->shutdown&RCV_SHUTDOWN)) {
+		__u32 rcv_window_now = tcp_receive_window(tp);
+
+		/* Optimize, __tcp_select_window() is not cheap. */
+		if (2*rcv_window_now <= tp->window_clamp) {
+			__u32 new_window = __tcp_select_window(sk);
+
+			/* Send ACK now, if this read freed lots of space
+			 * in our buffer. Certainly, new_window is new window.
+			 * We can advertise it now, if it is not less than current one.
+			 * "Lots" means "at least twice" here.
+			 */
+			if(new_window && new_window >= 2*rcv_window_now)
+				time_to_ack = 1;
+		}
+	}
+	if (time_to_ack)
+		tcp_send_ack(sk);
+#endif
+}
+
+/* Now socket state including sk->err is changed only under lock,
+ * hence we may omit checks after joining wait queue.
+ * We check receive queue before schedule() only as optimization;
+ * it is very likely that release_sock() added new data.
+ */
+
+static long tcp_data_wait(struct sock *sk, long timeo)
+{
+#if 0
+	DECLARE_WAITQUEUE(wait, current);
+
+	add_wait_queue(sk->sleep, &wait);
+
+	__set_current_state(TASK_INTERRUPTIBLE);
+
+	set_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags);
+	release_sock(sk);
+
+	if (skb_queue_empty(&sk->receive_queue))
+		timeo = schedule_timeout(timeo);
+
+	lock_sock(sk);
+	clear_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags);
+
+	remove_wait_queue(sk->sleep, &wait);
+	__set_current_state(TASK_RUNNING);
+	return timeo;
+#else
+  return 0;
+#endif
+}
+
+static void tcp_prequeue_process(struct sock *sk)
+{
+#if 0
+	struct sk_buff *skb;
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	net_statistics[smp_processor_id()*2+1].TCPPrequeued += skb_queue_len(&tp->ucopy.prequeue);
+
+	/* RX process wants to run with disabled BHs, though it is not necessary */
+	local_bh_disable();
+	while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
+		sk->backlog_rcv(sk, skb);
+	local_bh_enable();
+
+	/* Clear memory counter. */
+	tp->ucopy.memory = 0;
+#endif
+}
+
+static inline
+struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
+{
+#if 0
+	struct sk_buff *skb;
+	u32 offset;
+
+	skb_queue_walk(&sk->receive_queue, skb) {
+		offset = seq - TCP_SKB_CB(skb)->seq;
+		if (skb->h.th->syn)
+			offset--;
+		if (offset < skb->len || skb->h.th->fin) {
+			*off = offset;
+			return skb;
+		}
+	}
+	return NULL;
+#else
+	return NULL;
+#endif
+}
+
+/*
+ * This routine provides an alternative to tcp_recvmsg() for routines
+ * that would like to handle copying from skbuffs directly in 'sendfile'
+ * fashion.
+ * Note:
+ *	- It is assumed that the socket was locked by the caller.
+ *	- The routine does not block.
+ *	- At present, there is no support for reading OOB data
+ *	  or for 'peeking' the socket using this routine
+ *	  (although both would be easy to implement).
+ */
+int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
+		  sk_read_actor_t recv_actor)
+{
+#if 0
+	struct sk_buff *skb;
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	u32 seq = tp->copied_seq;
+	u32 offset;
+	int copied = 0;
+
+	if (sk->state == TCP_LISTEN)
+		return -ENOTCONN;
+	while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
+		if (offset < skb->len) {
+			size_t used, len;
+
+			len = skb->len - offset;
+			/* Stop reading if we hit a patch of urgent data */
+			if (tp->urg_data) {
+				u32 urg_offset = tp->urg_seq - seq;
+				if (urg_offset < len)
+					len = urg_offset;
+				if (!len)
+					break;
+			}
+			used = recv_actor(desc, skb, offset, len);
+			if (used <= len) {
+				seq += used;
+				copied += used;
+				offset += used;
+			}
+			if (offset != skb->len)
+				break;
+		}
+		if (skb->h.th->fin) {
+			tcp_eat_skb(sk, skb);
+			++seq;
+			break;
+		}
+		tcp_eat_skb(sk, skb);
+		if (!desc->count)
+			break;
+	}
+	tp->copied_seq = seq;
+	/* Clean up data we have read: This will do ACK frames. */
+	if (copied)
+		cleanup_rbuf(sk, copied);
+	return copied;
+#else
+#endif
+}
+
+/*
+ *	This routine copies from a sock struct into the user buffer. 
+ *
+ *	Technical note: in 2.3 we work on _locked_ socket, so that
+ *	tricks with *seq access order and skb->users are not required.
+ *	Probably, code can be easily improved even more.
+ */
+ 
+int tcp_recvmsg(struct sock *sk, struct msghdr *msg,
+		int len, int nonblock, int flags, int *addr_len)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	int copied = 0;
+	u32 peek_seq;
+	u32 *seq;
+	unsigned long used;
+	int err;
+	int target;		/* Read at least this many bytes */
+	long timeo;
+	struct task_struct *user_recv = NULL;
+
+	lock_sock(sk);
+
+	TCP_CHECK_TIMER(sk);
+
+	err = -ENOTCONN;
+	if (sk->state == TCP_LISTEN)
+		goto out;
+
+	timeo = sock_rcvtimeo(sk, nonblock);
+
+	/* Urgent data needs to be handled specially. */
+	if (flags & MSG_OOB)
+		goto recv_urg;
+
+	seq = &tp->copied_seq;
+	if (flags & MSG_PEEK) {
+		peek_seq = tp->copied_seq;
+		seq = &peek_seq;
+	}
+
+	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
+
+	do {
+		struct sk_buff * skb;
+		u32 offset;
+
+		/* Are we at urgent data? Stop if we have read anything. */
+		if (copied && tp->urg_data && tp->urg_seq == *seq)
+			break;
+
+		/* We need to check signals first, to get correct SIGURG
+		 * handling. FIXME: Need to check this doesn't impact 1003.1g
+		 * and move it down to the bottom of the loop
+		 */
+		if (signal_pending(current)) {
+			if (copied)
+				break;
+			copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
+			break;
+		}
+
+		/* Next get a buffer. */
+
+		skb = skb_peek(&sk->receive_queue);
+		do {
+			if (!skb)
+				break;
+
+			/* Now that we have two receive queues this 
+			 * shouldn't happen.
+			 */
+			if (before(*seq, TCP_SKB_CB(skb)->seq)) {
+				printk(KERN_INFO "recvmsg bug: copied %X seq %X\n",
+				       *seq, TCP_SKB_CB(skb)->seq);
+				break;
+			}
+			offset = *seq - TCP_SKB_CB(skb)->seq;
+			if (skb->h.th->syn)
+				offset--;
+			if (offset < skb->len)
+				goto found_ok_skb;
+			if (skb->h.th->fin)
+				goto found_fin_ok;
+			BUG_TRAP(flags&MSG_PEEK);
+			skb = skb->next;
+		} while (skb != (struct sk_buff *)&sk->receive_queue);
+
+		/* Well, if we have backlog, try to process it now yet. */
+
+		if (copied >= target && sk->backlog.tail == NULL)
+			break;
+
+		if (copied) {
+			if (sk->err ||
+			    sk->state == TCP_CLOSE ||
+			    (sk->shutdown & RCV_SHUTDOWN) ||
+			    !timeo ||
+			    (flags & MSG_PEEK))
+				break;
+		} else {
+			if (sk->done)
+				break;
+
+			if (sk->err) {
+				copied = sock_error(sk);
+				break;
+			}
+
+			if (sk->shutdown & RCV_SHUTDOWN)
+				break;
+
+			if (sk->state == TCP_CLOSE) {
+				if (!sk->done) {
+					/* This occurs when user tries to read
+					 * from never connected socket.
+					 */
+					copied = -ENOTCONN;
+					break;
+				}
+				break;
+			}
+
+			if (!timeo) {
+				copied = -EAGAIN;
+				break;
+			}
+		}
+
+		cleanup_rbuf(sk, copied);
+
+		if (tp->ucopy.task == user_recv) {
+			/* Install new reader */
+			if (user_recv == NULL && !(flags&(MSG_TRUNC|MSG_PEEK))) {
+				user_recv = current;
+				tp->ucopy.task = user_recv;
+				tp->ucopy.iov = msg->msg_iov;
+			}
+
+			tp->ucopy.len = len;
+
+			BUG_TRAP(tp->copied_seq == tp->rcv_nxt || (flags&(MSG_PEEK|MSG_TRUNC)));
+
+			/* Ugly... If prequeue is not empty, we have to
+			 * process it before releasing socket, otherwise
+			 * order will be broken at second iteration.
+			 * More elegant solution is required!!!
+			 *
+			 * Look: we have the following (pseudo)queues:
+			 *
+			 * 1. packets in flight
+			 * 2. backlog
+			 * 3. prequeue
+			 * 4. receive_queue
+			 *
+			 * Each queue can be processed only if the next ones
+			 * are empty. At this point we have empty receive_queue.
+			 * But prequeue _can_ be not empty after second iteration,
+			 * when we jumped to start of loop because backlog
+			 * processing added something to receive_queue.
+			 * We cannot release_sock(), because backlog contains
+			 * packets arrived _after_ prequeued ones.
+			 *
+			 * Shortly, algorithm is clear --- to process all
+			 * the queues in order. We could make it more directly,
+			 * requeueing packets from backlog to prequeue, if
+			 * is not empty. It is more elegant, but eats cycles,
+			 * unfortunately.
+			 */
+			if (skb_queue_len(&tp->ucopy.prequeue))
+				goto do_prequeue;
+
+			/* __ Set realtime policy in scheduler __ */
+		}
+
+		if (copied >= target) {
+			/* Do not sleep, just process backlog. */
+			release_sock(sk);
+			lock_sock(sk);
+		} else {
+			timeo = tcp_data_wait(sk, timeo);
+		}
+
+		if (user_recv) {
+			int chunk;
+
+			/* __ Restore normal policy in scheduler __ */
+
+			if ((chunk = len - tp->ucopy.len) != 0) {
+				net_statistics[smp_processor_id()*2+1].TCPDirectCopyFromBacklog += chunk;
+				len -= chunk;
+				copied += chunk;
+			}
+
+			if (tp->rcv_nxt == tp->copied_seq &&
+			    skb_queue_len(&tp->ucopy.prequeue)) {
+do_prequeue:
+				tcp_prequeue_process(sk);
+
+				if ((chunk = len - tp->ucopy.len) != 0) {
+					net_statistics[smp_processor_id()*2+1].TCPDirectCopyFromPrequeue += chunk;
+					len -= chunk;
+					copied += chunk;
+				}
+			}
+		}
+		if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) {
+			if (net_ratelimit())
+				printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n",
+				       current->comm, current->pid);
+			peek_seq = tp->copied_seq;
+		}
+		continue;
+
+	found_ok_skb:
+		/* Ok so how much can we use? */
+		used = skb->len - offset;
+		if (len < used)
+			used = len;
+
+		/* Do we have urgent data here? */
+		if (tp->urg_data) {
+			u32 urg_offset = tp->urg_seq - *seq;
+			if (urg_offset < used) {
+				if (!urg_offset) {
+					if (!sk->urginline) {
+						++*seq;
+						offset++;
+						used--;
+						if (!used)
+							goto skip_copy;
+					}
+				} else
+					used = urg_offset;
+			}
+		}
+
+		if (!(flags&MSG_TRUNC)) {
+			err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, used);
+			if (err) {
+				/* Exception. Bailout! */
+				if (!copied)
+					copied = -EFAULT;
+				break;
+			}
+		}
+
+		*seq += used;
+		copied += used;
+		len -= used;
+
+skip_copy:
+		if (tp->urg_data && after(tp->copied_seq,tp->urg_seq)) {
+			tp->urg_data = 0;
+			tcp_fast_path_check(sk, tp);
+		}
+		if (used + offset < skb->len)
+			continue;
+
+		if (skb->h.th->fin)
+			goto found_fin_ok;
+		if (!(flags & MSG_PEEK))
+			tcp_eat_skb(sk, skb);
+		continue;
+
+	found_fin_ok:
+		/* Process the FIN. */
+		++*seq;
+		if (!(flags & MSG_PEEK))
+			tcp_eat_skb(sk, skb);
+		break;
+	} while (len > 0);
+
+	if (user_recv) {
+		if (skb_queue_len(&tp->ucopy.prequeue)) {
+			int chunk;
+
+			tp->ucopy.len = copied > 0 ? len : 0;
+
+			tcp_prequeue_process(sk);
+
+			if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
+				net_statistics[smp_processor_id()*2+1].TCPDirectCopyFromPrequeue += chunk;
+				len -= chunk;
+				copied += chunk;
+			}
+		}
+
+		tp->ucopy.task = NULL;
+		tp->ucopy.len = 0;
+	}
+
+	/* According to UNIX98, msg_name/msg_namelen are ignored
+	 * on connected socket. I was just happy when found this 8) --ANK
+	 */
+
+	/* Clean up data we have read: This will do ACK frames. */
+	cleanup_rbuf(sk, copied);
+
+	TCP_CHECK_TIMER(sk);
+	release_sock(sk);
+	return copied;
+
+out:
+	TCP_CHECK_TIMER(sk);
+	release_sock(sk);
+	return err;
+
+recv_urg:
+	err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len);
+	goto out;
+#else
+	return 0;
+#endif
+}
+
+/*
+ *	State processing on a close. This implements the state shift for
+ *	sending our FIN frame. Note that we only send a FIN for some
+ *	states. A shutdown() may have already sent the FIN, or we may be
+ *	closed.
+ */
+
+static unsigned char new_state[16] = {
+  /* current state:        new state:      action:	*/
+  /* (Invalid)		*/ TCP_CLOSE,
+  /* TCP_ESTABLISHED	*/ TCP_FIN_WAIT1 | TCP_ACTION_FIN,
+  /* TCP_SYN_SENT	*/ TCP_CLOSE,
+  /* TCP_SYN_RECV	*/ TCP_FIN_WAIT1 | TCP_ACTION_FIN,
+  /* TCP_FIN_WAIT1	*/ TCP_FIN_WAIT1,
+  /* TCP_FIN_WAIT2	*/ TCP_FIN_WAIT2,
+  /* TCP_TIME_WAIT	*/ TCP_CLOSE,
+  /* TCP_CLOSE		*/ TCP_CLOSE,
+  /* TCP_CLOSE_WAIT	*/ TCP_LAST_ACK  | TCP_ACTION_FIN,
+  /* TCP_LAST_ACK	*/ TCP_LAST_ACK,
+  /* TCP_LISTEN		*/ TCP_CLOSE,
+  /* TCP_CLOSING	*/ TCP_CLOSING,
+};
+
+static int tcp_close_state(struct sock *sk)
+{
+#if 0
+	int next = (int) new_state[sk->state];
+	int ns = (next & TCP_STATE_MASK);
+
+	tcp_set_state(sk, ns);
+
+	return (next & TCP_ACTION_FIN);
+#else
+	return 0;
+#endif
+}
+
+/*
+ *	Shutdown the sending side of a connection. Much like close except
+ *	that we don't receive shut down or set sk->dead.
+ */
+
+void tcp_shutdown(struct sock *sk, int how)
+{
+#if 0
+	/*	We need to grab some memory, and put together a FIN,
+	 *	and then put it into the queue to be sent.
+	 *		Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
+	 */
+	if (!(how & SEND_SHUTDOWN))
+		return;
+
+	/* If we've already sent a FIN, or it's a closed state, skip this. */
+	if ((1 << sk->state) &
+	    (TCPF_ESTABLISHED|TCPF_SYN_SENT|TCPF_SYN_RECV|TCPF_CLOSE_WAIT)) {
+		/* Clear out any half completed packets.  FIN if needed. */
+		if (tcp_close_state(sk))
+			tcp_send_fin(sk);
+	}
+#endif
+}
+
+
+/*
+ *	Return 1 if we still have things to send in our buffers.
+ */
+
+static inline int closing(struct sock * sk)
+{
+#if 0
+	return ((1 << sk->state) & (TCPF_FIN_WAIT1|TCPF_CLOSING|TCPF_LAST_ACK));
+#else
+	return 0;
+#endif
+}
+
+static __inline__ void tcp_kill_sk_queues(struct sock *sk)
+{
+#if 0
+	/* First the read buffer. */
+	__skb_queue_purge(&sk->receive_queue);
+
+	/* Next, the error queue. */
+	__skb_queue_purge(&sk->error_queue);
+
+	/* Next, the write queue. */
+	BUG_TRAP(skb_queue_empty(&sk->write_queue));
+
+	/* Account for returned memory. */
+	tcp_mem_reclaim(sk);
+
+	BUG_TRAP(sk->wmem_queued == 0);
+	BUG_TRAP(sk->forward_alloc == 0);
+
+	/* It is _impossible_ for the backlog to contain anything
+	 * when we get here.  All user references to this socket
+	 * have gone away, only the net layer knows can touch it.
+	 */
+#endif
+}
+
+/*
+ * At this point, there should be no process reference to this
+ * socket, and thus no user references at all.  Therefore we
+ * can assume the socket waitqueue is inactive and nobody will
+ * try to jump onto it.
+ */
+void tcp_destroy_sock(struct sock *sk)
+{
+#if 0
+	BUG_TRAP(sk->state==TCP_CLOSE);
+	BUG_TRAP(sk->dead);
+
+	/* It cannot be in hash table! */
+	BUG_TRAP(sk->pprev==NULL);
+
+	/* If it has not 0 sk->num, it must be bound */
+	BUG_TRAP(!sk->num || sk->prev!=NULL);
+
+#ifdef TCP_DEBUG
+	if (sk->zapped) {
+		printk(KERN_DEBUG "TCP: double destroy sk=%p\n", sk);
+		sock_hold(sk);
+	}
+	sk->zapped = 1;
+#endif
+
+	sk->prot->destroy(sk);
+
+	tcp_kill_sk_queues(sk);
+
+#ifdef INET_REFCNT_DEBUG
+	if (atomic_read(&sk->refcnt) != 1) {
+		printk(KERN_DEBUG "Destruction TCP %p delayed, c=%d\n", sk, atomic_read(&sk->refcnt));
+	}
+#endif
+
+	atomic_dec(&tcp_orphan_count);
+	sock_put(sk);
+#endif
+}
+
+void tcp_close(struct sock *sk, long timeout)
+{
+#if 0
+	struct sk_buff *skb;
+	int data_was_unread = 0;
+
+	lock_sock(sk);
+	sk->shutdown = SHUTDOWN_MASK;
+
+	if(sk->state == TCP_LISTEN) {
+		tcp_set_state(sk, TCP_CLOSE);
+
+		/* Special case. */
+		tcp_listen_stop(sk);
+
+		goto adjudge_to_death;
+	}
+
+	/*  We need to flush the recv. buffs.  We do this only on the
+	 *  descriptor close, not protocol-sourced closes, because the
+	 *  reader process may not have drained the data yet!
+	 */
+	while((skb=__skb_dequeue(&sk->receive_queue))!=NULL) {
+		u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq - skb->h.th->fin;
+		data_was_unread += len;
+		__kfree_skb(skb);
+	}
+
+	tcp_mem_reclaim(sk);
+
+	/* As outlined in draft-ietf-tcpimpl-prob-03.txt, section
+	 * 3.10, we send a RST here because data was lost.  To
+	 * witness the awful effects of the old behavior of always
+	 * doing a FIN, run an older 2.1.x kernel or 2.0.x, start
+	 * a bulk GET in an FTP client, suspend the process, wait
+	 * for the client to advertise a zero window, then kill -9
+	 * the FTP client, wheee...  Note: timeout is always zero
+	 * in such a case.
+	 */
+	if(data_was_unread != 0) {
+		/* Unread data was tossed, zap the connection. */
+		NET_INC_STATS_USER(TCPAbortOnClose);
+		tcp_set_state(sk, TCP_CLOSE);
+		tcp_send_active_reset(sk, GFP_KERNEL);
+	} else if (sk->linger && sk->lingertime==0) {
+		/* Check zero linger _after_ checking for unread data. */
+		sk->prot->disconnect(sk, 0);
+		NET_INC_STATS_USER(TCPAbortOnData);
+	} else if (tcp_close_state(sk)) {
+		/* We FIN if the application ate all the data before
+		 * zapping the connection.
+		 */
+
+		/* RED-PEN. Formally speaking, we have broken TCP state
+		 * machine. State transitions:
+		 *
+		 * TCP_ESTABLISHED -> TCP_FIN_WAIT1
+		 * TCP_SYN_RECV	-> TCP_FIN_WAIT1 (forget it, it's impossible)
+		 * TCP_CLOSE_WAIT -> TCP_LAST_ACK
+		 *
+		 * are legal only when FIN has been sent (i.e. in window),
+		 * rather than queued out of window. Purists blame.
+		 *
+		 * F.e. "RFC state" is ESTABLISHED,
+		 * if Linux state is FIN-WAIT-1, but FIN is still not sent.
+		 *
+		 * The visible declinations are that sometimes
+		 * we enter time-wait state, when it is not required really
+		 * (harmless), do not send active resets, when they are
+		 * required by specs (TCP_ESTABLISHED, TCP_CLOSE_WAIT, when
+		 * they look as CLOSING or LAST_ACK for Linux)
+		 * Probably, I missed some more holelets.
+		 * 						--ANK
+		 */
+		tcp_send_fin(sk);
+	}
+
+	if (timeout) {
+		struct task_struct *tsk = current;
+		DECLARE_WAITQUEUE(wait, current);
+
+		add_wait_queue(sk->sleep, &wait);
+
+		do {
+			set_current_state(TASK_INTERRUPTIBLE);
+			if (!closing(sk))
+				break;
+			release_sock(sk);
+			timeout = schedule_timeout(timeout);
+			lock_sock(sk);
+		} while (!signal_pending(tsk) && timeout);
+
+		tsk->state = TASK_RUNNING;
+		remove_wait_queue(sk->sleep, &wait);
+	}
+
+adjudge_to_death:
+	/* It is the last release_sock in its life. It will remove backlog. */
+	release_sock(sk);
+
+
+	/* Now socket is owned by kernel and we acquire BH lock
+	   to finish close. No need to check for user refs.
+	 */
+	local_bh_disable();
+	bh_lock_sock(sk);
+	BUG_TRAP(sk->lock.users==0);
+
+	sock_hold(sk);
+	sock_orphan(sk);
+
+	/*	This is a (useful) BSD violating of the RFC. There is a
+	 *	problem with TCP as specified in that the other end could
+	 *	keep a socket open forever with no application left this end.
+	 *	We use a 3 minute timeout (about the same as BSD) then kill
+	 *	our end. If they send after that then tough - BUT: long enough
+	 *	that we won't make the old 4*rto = almost no time - whoops
+	 *	reset mistake.
+	 *
+	 *	Nope, it was not mistake. It is really desired behaviour
+	 *	f.e. on http servers, when such sockets are useless, but
+	 *	consume significant resources. Let's do it with special
+	 *	linger2	option.					--ANK
+	 */
+
+	if (sk->state == TCP_FIN_WAIT2) {
+		struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+		if (tp->linger2 < 0) {
+			tcp_set_state(sk, TCP_CLOSE);
+			tcp_send_active_reset(sk, GFP_ATOMIC);
+			NET_INC_STATS_BH(TCPAbortOnLinger);
+		} else {
+			int tmo = tcp_fin_time(tp);
+
+			if (tmo > TCP_TIMEWAIT_LEN) {
+				tcp_reset_keepalive_timer(sk, tcp_fin_time(tp));
+			} else {
+				atomic_inc(&tcp_orphan_count);
+				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
+				goto out;
+			}
+		}
+	}
+	if (sk->state != TCP_CLOSE) {
+		tcp_mem_reclaim(sk);
+		if (atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans ||
+		    (sk->wmem_queued > SOCK_MIN_SNDBUF &&
+		     atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
+			if (net_ratelimit())
+				printk(KERN_INFO "TCP: too many of orphaned sockets\n");
+			tcp_set_state(sk, TCP_CLOSE);
+			tcp_send_active_reset(sk, GFP_ATOMIC);
+			NET_INC_STATS_BH(TCPAbortOnMemory);
+		}
+	}
+	atomic_inc(&tcp_orphan_count);
+
+	if (sk->state == TCP_CLOSE)
+		tcp_destroy_sock(sk);
+	/* Otherwise, socket is reprieved until protocol close. */
+
+out:
+	bh_unlock_sock(sk);
+	local_bh_enable();
+	sock_put(sk);
+#endif
+}
+
+/* These states need RST on ABORT according to RFC793 */
+
+extern __inline__ int tcp_need_reset(int state)
+{
+#if 0
+	return ((1 << state) &
+	       	(TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_FIN_WAIT1|
+		 TCPF_FIN_WAIT2|TCPF_SYN_RECV));
+#else
+	return 0;
+#endif
+}
+
+int tcp_disconnect(struct sock *sk, int flags)
+{
+#if 0
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	int old_state;
+	int err = 0;
+
+	old_state = sk->state;
+	if (old_state != TCP_CLOSE)
+		tcp_set_state(sk, TCP_CLOSE);
+
+	/* ABORT function of RFC793 */
+	if (old_state == TCP_LISTEN) {
+		tcp_listen_stop(sk);
+	} else if (tcp_need_reset(old_state) ||
+		   (tp->snd_nxt != tp->write_seq &&
+		    (1<<old_state)&(TCPF_CLOSING|TCPF_LAST_ACK))) {
+		/* The last check adjusts for discrepance of Linux wrt. RFC
+		 * states
+		 */
+		tcp_send_active_reset(sk, gfp_any());
+		sk->err = ECONNRESET;
+	} else if (old_state == TCP_SYN_SENT)
+		sk->err = ECONNRESET;
+
+	tcp_clear_xmit_timers(sk);
+	__skb_queue_purge(&sk->receive_queue);
+  	tcp_writequeue_purge(sk);
+  	__skb_queue_purge(&tp->out_of_order_queue);
+
+	sk->dport = 0;
+
+	if (!(sk->userlocks&SOCK_BINDADDR_LOCK)) {
+		sk->rcv_saddr = 0;
+		sk->saddr = 0;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		memset(&sk->net_pinfo.af_inet6.saddr, 0, 16);
+		memset(&sk->net_pinfo.af_inet6.rcv_saddr, 0, 16);
+#endif
+	}
+
+	sk->shutdown = 0;
+	sk->done = 0;
+	tp->srtt = 0;
+	if ((tp->write_seq += tp->max_window+2) == 0)
+		tp->write_seq = 1;
+	tp->backoff = 0;
+	tp->snd_cwnd = 2;
+	tp->probes_out = 0;
+	tp->packets_out = 0;
+	tp->snd_ssthresh = 0x7fffffff;
+	tp->snd_cwnd_cnt = 0;
+	tp->ca_state = TCP_CA_Open;
+	tcp_clear_retrans(tp);
+	tcp_delack_init(tp);
+	tp->send_head = NULL;
+	tp->saw_tstamp = 0;
+	tcp_sack_reset(tp);
+	__sk_dst_reset(sk);
+
+	BUG_TRAP(!sk->num || sk->prev);
+
+	sk->error_report(sk);
+	return err;
+#else
+	return 0;
+#endif
+}
+
+/*
+ *	Wait for an incoming connection, avoid race
+ *	conditions. This must be called with the socket locked.
+ */
+static int wait_for_connect(struct sock * sk, long timeo)
+{
+#if 0
+	DECLARE_WAITQUEUE(wait, current);
+	int err;
+
+	/*
+	 * True wake-one mechanism for incoming connections: only
+	 * one process gets woken up, not the 'whole herd'.
+	 * Since we do not 'race & poll' for established sockets
+	 * anymore, the common case will execute the loop only once.
+	 *
+	 * Subtle issue: "add_wait_queue_exclusive()" will be added
+	 * after any current non-exclusive waiters, and we know that
+	 * it will always _stay_ after any new non-exclusive waiters
+	 * because all non-exclusive waiters are added at the
+	 * beginning of the wait-queue. As such, it's ok to "drop"
+	 * our exclusiveness temporarily when we get woken up without
+	 * having to remove and re-insert us on the wait queue.
+	 */
+	add_wait_queue_exclusive(sk->sleep, &wait);
+	for (;;) {
+		current->state = TASK_INTERRUPTIBLE;
+		release_sock(sk);
+		if (sk->tp_pinfo.af_tcp.accept_queue == NULL)
+			timeo = schedule_timeout(timeo);
+		lock_sock(sk);
+		err = 0;
+		if (sk->tp_pinfo.af_tcp.accept_queue)
+			break;
+		err = -EINVAL;
+		if (sk->state != TCP_LISTEN)
+			break;
+		err = sock_intr_errno(timeo);
+		if (signal_pending(current))
+			break;
+		err = -EAGAIN;
+		if (!timeo)
+			break;
+	}
+	current->state = TASK_RUNNING;
+	remove_wait_queue(sk->sleep, &wait);
+	return err;
+#else
+	return 0;
+#endif
+}
+
+/*
+ *	This will accept the next outstanding connection.
+ */
+
+struct sock *tcp_accept(struct sock *sk, int flags, int *err)
+{
+#if 0
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	struct open_request *req;
+	struct sock *newsk;
+	int error;
+
+	lock_sock(sk); 
+
+	/* We need to make sure that this socket is listening,
+	 * and that it has something pending.
+	 */
+	error = -EINVAL;
+	if (sk->state != TCP_LISTEN)
+		goto out;
+
+	/* Find already established connection */
+	if (!tp->accept_queue) {
+		long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+
+		/* If this is a non blocking socket don't sleep */
+		error = -EAGAIN;
+		if (!timeo)
+			goto out;
+
+		error = wait_for_connect(sk, timeo);
+		if (error)
+			goto out;
+	}
+
+	req = tp->accept_queue;
+	if ((tp->accept_queue = req->dl_next) == NULL)
+		tp->accept_queue_tail = NULL;
+
+ 	newsk = req->sk;
+	tcp_acceptq_removed(sk);
+	tcp_openreq_fastfree(req);
+	BUG_TRAP(newsk->state != TCP_SYN_RECV);
+	release_sock(sk);
+	return newsk;
+
+out:
+	release_sock(sk);
+	*err = error; 
+	return NULL;
+#else
+	return NULL;
+#endif
+}
+
+/*
+ *	Socket option code for TCP. 
+ */
+  
+int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, 
+		   int optlen)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	int val;
+	int err = 0;
+
+	if (level != SOL_TCP)
+		return tp->af_specific->setsockopt(sk, level, optname, 
+						   optval, optlen);
+
+	if(optlen<sizeof(int))
+		return -EINVAL;
+
+	if (get_user(val, (int *)optval))
+		return -EFAULT;
+
+	lock_sock(sk);
+
+	switch(optname) {
+	case TCP_MAXSEG:
+		/* values greater than interface MTU won't take effect.  however at
+		 * the point when this call is done we typically don't yet know
+		 * which interface is going to be used
+		 */
+		if(val < 8 || val > MAX_TCP_WINDOW) {
+			err = -EINVAL;
+			break;
+		}
+		tp->user_mss = val;
+		break;
+
+	case TCP_NODELAY:
+		/* You cannot try to use this and TCP_CORK in
+		 * tandem, so let the user know.
+		 */
+		if (tp->nonagle == 2) {
+			err = -EINVAL;
+			break;
+		}
+		tp->nonagle = (val == 0) ? 0 : 1;
+		if (val)
+			tcp_push_pending_frames(sk, tp);
+		break;
+
+	case TCP_CORK:
+		/* When set indicates to always queue non-full frames.
+		 * Later the user clears this option and we transmit
+		 * any pending partial frames in the queue.  This is
+		 * meant to be used alongside sendfile() to get properly
+		 * filled frames when the user (for example) must write
+		 * out headers with a write() call first and then use
+		 * sendfile to send out the data parts.
+		 *
+		 * You cannot try to use TCP_NODELAY and this mechanism
+		 * at the same time, so let the user know.
+		 */
+		if (tp->nonagle == 1) {
+			err = -EINVAL;
+			break;
+		}
+		if (val != 0) {
+			tp->nonagle = 2;
+		} else {
+			tp->nonagle = 0;
+
+			tcp_push_pending_frames(sk, tp);
+		}
+		break;
+		
+	case TCP_KEEPIDLE:
+		if (val < 1 || val > MAX_TCP_KEEPIDLE)
+			err = -EINVAL;
+		else {
+			tp->keepalive_time = val * HZ;
+			if (sk->keepopen && !((1<<sk->state)&(TCPF_CLOSE|TCPF_LISTEN))) {
+				__u32 elapsed = tcp_time_stamp - tp->rcv_tstamp;
+				if (tp->keepalive_time > elapsed)
+					elapsed = tp->keepalive_time - elapsed;
+				else
+					elapsed = 0;
+				tcp_reset_keepalive_timer(sk, elapsed);
+			}
+		}
+		break;
+	case TCP_KEEPINTVL:
+		if (val < 1 || val > MAX_TCP_KEEPINTVL)
+			err = -EINVAL;
+		else
+			tp->keepalive_intvl = val * HZ;
+		break;
+	case TCP_KEEPCNT:
+		if (val < 1 || val > MAX_TCP_KEEPCNT)
+			err = -EINVAL;
+		else
+			tp->keepalive_probes = val;
+		break;
+	case TCP_SYNCNT:
+		if (val < 1 || val > MAX_TCP_SYNCNT)
+			err = -EINVAL;
+		else
+			tp->syn_retries = val;
+		break;
+
+	case TCP_LINGER2:
+		if (val < 0)
+			tp->linger2 = -1;
+		else if (val > sysctl_tcp_fin_timeout/HZ)
+			tp->linger2 = 0;
+		else
+			tp->linger2 = val*HZ;
+		break;
+
+	case TCP_DEFER_ACCEPT:
+		tp->defer_accept = 0;
+		if (val > 0) {
+			/* Translate value in seconds to number of retransmits */
+			while (tp->defer_accept < 32 && val > ((TCP_TIMEOUT_INIT/HZ)<<tp->defer_accept))
+				tp->defer_accept++;
+			tp->defer_accept++;
+		}
+		break;
+
+	case TCP_WINDOW_CLAMP:
+		if (val==0) {
+			if (sk->state != TCP_CLOSE) {
+				err = -EINVAL;
+				break;
+			}
+			tp->window_clamp = 0;
+		} else {
+			tp->window_clamp = val<SOCK_MIN_RCVBUF/2 ?
+				SOCK_MIN_RCVBUF/2 : val;
+		}
+		break;
+
+	case TCP_QUICKACK:
+		if (!val) {
+			tp->ack.pingpong = 1;
+		} else {
+			tp->ack.pingpong = 0;
+			if ((1<<sk->state)&(TCPF_ESTABLISHED|TCPF_CLOSE_WAIT) &&
+			    tcp_ack_scheduled(tp)) {
+				tp->ack.pending |= TCP_ACK_PUSHED;
+				cleanup_rbuf(sk, 1);
+				if (!(val & 1))
+					tp->ack.pingpong = 1;
+			}
+		}
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	};
+	release_sock(sk);
+	return err;
+#else
+	return 0;
+#endif
+}
+
+int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval,
+		   int *optlen)
+{
+#if 0
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	int val, len;
+
+	if(level != SOL_TCP)
+		return tp->af_specific->getsockopt(sk, level, optname,
+						   optval, optlen);
+
+	if(get_user(len,optlen))
+		return -EFAULT;
+
+	len = min_t(unsigned int, len, sizeof(int));
+	
+	if(len < 0)
+		return -EINVAL;
+
+	switch(optname) {
+	case TCP_MAXSEG:
+		val = tp->mss_cache;
+		if (val == 0 && ((1<<sk->state)&(TCPF_CLOSE|TCPF_LISTEN)))
+			val = tp->user_mss;
+		break;
+	case TCP_NODELAY:
+		val = (tp->nonagle == 1);
+		break;
+	case TCP_CORK:
+		val = (tp->nonagle == 2);
+		break;
+	case TCP_KEEPIDLE:
+		val = (tp->keepalive_time ? : sysctl_tcp_keepalive_time)/HZ;
+		break;
+	case TCP_KEEPINTVL:
+		val = (tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl)/HZ;
+		break;
+	case TCP_KEEPCNT:
+		val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes;
+		break;
+	case TCP_SYNCNT:
+		val = tp->syn_retries ? : sysctl_tcp_syn_retries;
+		break;
+	case TCP_LINGER2:
+		val = tp->linger2;
+		if (val >= 0)
+			val = (val ? : sysctl_tcp_fin_timeout)/HZ;
+		break;
+	case TCP_DEFER_ACCEPT:
+		val = tp->defer_accept == 0 ? 0 : ((TCP_TIMEOUT_INIT/HZ)<<(tp->defer_accept-1));
+		break;
+	case TCP_WINDOW_CLAMP:
+		val = tp->window_clamp;
+		break;
+	case TCP_INFO:
+	{
+		struct tcp_info info;
+		u32 now = tcp_time_stamp;
+
+		if(get_user(len,optlen))
+			return -EFAULT;
+		info.tcpi_state = sk->state;
+		info.tcpi_ca_state = tp->ca_state;
+		info.tcpi_retransmits = tp->retransmits;
+		info.tcpi_probes = tp->probes_out;
+		info.tcpi_backoff = tp->backoff;
+		info.tcpi_options = 0;
+		if (tp->tstamp_ok)
+			info.tcpi_options |= TCPI_OPT_TIMESTAMPS;
+		if (tp->sack_ok)
+			info.tcpi_options |= TCPI_OPT_SACK;
+		if (tp->wscale_ok) {
+			info.tcpi_options |= TCPI_OPT_WSCALE;
+			info.tcpi_snd_wscale = tp->snd_wscale;
+			info.tcpi_rcv_wscale = tp->rcv_wscale;
+		} else {
+			info.tcpi_snd_wscale = 0;
+			info.tcpi_rcv_wscale = 0;
+		}
+		if (tp->ecn_flags&TCP_ECN_OK)
+			info.tcpi_options |= TCPI_OPT_ECN;
+
+		info.tcpi_rto = (1000000*tp->rto)/HZ;
+		info.tcpi_ato = (1000000*tp->ack.ato)/HZ;
+		info.tcpi_snd_mss = tp->mss_cache;
+		info.tcpi_rcv_mss = tp->ack.rcv_mss;
+
+		info.tcpi_unacked = tp->packets_out;
+		info.tcpi_sacked = tp->sacked_out;
+		info.tcpi_lost = tp->lost_out;
+		info.tcpi_retrans = tp->retrans_out;
+		info.tcpi_fackets = tp->fackets_out;
+
+		info.tcpi_last_data_sent = ((now - tp->lsndtime)*1000)/HZ;
+		info.tcpi_last_ack_sent = 0;
+		info.tcpi_last_data_recv = ((now - tp->ack.lrcvtime)*1000)/HZ;
+		info.tcpi_last_ack_recv = ((now - tp->rcv_tstamp)*1000)/HZ;
+
+		info.tcpi_pmtu = tp->pmtu_cookie;
+		info.tcpi_rcv_ssthresh = tp->rcv_ssthresh;
+		info.tcpi_rtt = ((1000000*tp->srtt)/HZ)>>3;
+		info.tcpi_rttvar = ((1000000*tp->mdev)/HZ)>>2;
+		info.tcpi_snd_ssthresh = tp->snd_ssthresh;
+		info.tcpi_snd_cwnd = tp->snd_cwnd;
+		info.tcpi_advmss = tp->advmss;
+		info.tcpi_reordering = tp->reordering;
+
+		len = min_t(unsigned int, len, sizeof(info));
+		if(put_user(len, optlen))
+			return -EFAULT;
+		if(copy_to_user(optval, &info,len))
+			return -EFAULT;
+		return 0;
+	}
+	case TCP_QUICKACK:
+		val = !tp->ack.pingpong;
+		break;
+	default:
+		return -ENOPROTOOPT;
+	};
+
+  	if(put_user(len, optlen))
+  		return -EFAULT;
+	if(copy_to_user(optval, &val,len))
+		return -EFAULT;
+  	return 0;
+#else
+	return 0;
+#endif
+}
+
+
+//extern void __skb_cb_too_small_for_tcp(int, int);
+//extern void tcpdiag_init(void);
+
+void /* __init */ tcp_init(void)
+{
+#if 0
+	struct sk_buff *skb = NULL;
+	unsigned long goal;
+	int order, i;
+
+	if(sizeof(struct tcp_skb_cb) > sizeof(skb->cb))
+		__skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb),
+					   sizeof(skb->cb));
+
+	tcp_openreq_cachep = kmem_cache_create("tcp_open_request",
+						   sizeof(struct open_request),
+					       0, SLAB_HWCACHE_ALIGN,
+					       NULL, NULL);
+	if(!tcp_openreq_cachep)
+		panic("tcp_init: Cannot alloc open_request cache.");
+
+	tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket",
+					      sizeof(struct tcp_bind_bucket),
+					      0, SLAB_HWCACHE_ALIGN,
+					      NULL, NULL);
+	if(!tcp_bucket_cachep)
+		panic("tcp_init: Cannot alloc tcp_bind_bucket cache.");
+
+	tcp_timewait_cachep = kmem_cache_create("tcp_tw_bucket",
+						sizeof(struct tcp_tw_bucket),
+						0, SLAB_HWCACHE_ALIGN,
+						NULL, NULL);
+	if(!tcp_timewait_cachep)
+		panic("tcp_init: Cannot alloc tcp_tw_bucket cache.");
+
+	/* Size and allocate the main established and bind bucket
+	 * hash tables.
+	 *
+	 * The methodology is similar to that of the buffer cache.
+	 */
+	if (num_physpages >= (128 * 1024))
+		goal = num_physpages >> (21 - PAGE_SHIFT);
+	else
+		goal = num_physpages >> (23 - PAGE_SHIFT);
+
+	for(order = 0; (1UL << order) < goal; order++)
+		;
+	do {
+		tcp_ehash_size = (1UL << order) * PAGE_SIZE /
+			sizeof(struct tcp_ehash_bucket);
+		tcp_ehash_size >>= 1;
+		while (tcp_ehash_size & (tcp_ehash_size-1))
+			tcp_ehash_size--;
+		tcp_ehash = (struct tcp_ehash_bucket *)
+			__get_free_pages(GFP_ATOMIC, order);
+	} while (tcp_ehash == NULL && --order > 0);
+
+	if (!tcp_ehash)
+		panic("Failed to allocate TCP established hash table\n");
+	for (i = 0; i < (tcp_ehash_size<<1); i++) {
+		tcp_ehash[i].lock = RW_LOCK_UNLOCKED;
+		tcp_ehash[i].chain = NULL;
+	}
+
+	do {
+		tcp_bhash_size = (1UL << order) * PAGE_SIZE /
+			sizeof(struct tcp_bind_hashbucket);
+		if ((tcp_bhash_size > (64 * 1024)) && order > 0)
+			continue;
+		tcp_bhash = (struct tcp_bind_hashbucket *)
+			__get_free_pages(GFP_ATOMIC, order);
+	} while (tcp_bhash == NULL && --order >= 0);
+
+	if (!tcp_bhash)
+		panic("Failed to allocate TCP bind hash table\n");
+	for (i = 0; i < tcp_bhash_size; i++) {
+		tcp_bhash[i].lock = SPIN_LOCK_UNLOCKED;
+		tcp_bhash[i].chain = NULL;
+	}
+
+	/* Try to be a bit smarter and adjust defaults depending
+	 * on available memory.
+	 */
+	if (order > 4) {
+		sysctl_local_port_range[0] = 32768;
+		sysctl_local_port_range[1] = 61000;
+		sysctl_tcp_max_tw_buckets = 180000;
+		sysctl_tcp_max_orphans = 4096<<(order-4);
+		sysctl_max_syn_backlog = 1024;
+	} else if (order < 3) {
+		sysctl_local_port_range[0] = 1024*(3-order);
+		sysctl_tcp_max_tw_buckets >>= (3-order);
+		sysctl_tcp_max_orphans >>= (3-order);
+		sysctl_max_syn_backlog = 128;
+	}
+	tcp_port_rover = sysctl_local_port_range[0] - 1;
+
+	sysctl_tcp_mem[0] = 768<<order;
+	sysctl_tcp_mem[1] = 1024<<order;
+	sysctl_tcp_mem[2] = 1536<<order;
+	if (sysctl_tcp_mem[2] - sysctl_tcp_mem[1] > 512)
+		sysctl_tcp_mem[1] = sysctl_tcp_mem[2] - 512;
+	if (sysctl_tcp_mem[1] - sysctl_tcp_mem[0] > 512)
+		sysctl_tcp_mem[0] = sysctl_tcp_mem[1] - 512;
+
+	if (order < 3) {
+		sysctl_tcp_wmem[2] = 64*1024;
+		sysctl_tcp_rmem[0] = PAGE_SIZE;
+		sysctl_tcp_rmem[1] = 43689;
+		sysctl_tcp_rmem[2] = 2*43689;
+	}
+
+	printk(KERN_INFO "TCP: Hash tables configured (established %d bind %d)\n",
+	       tcp_ehash_size<<1, tcp_bhash_size);
+
+	tcpdiag_init();
+#endif
+}