diff --git a/reactos/drivers/lib/oskittcp/include/freebsd/src/sys/netinet/in_pcb.h b/reactos/drivers/lib/oskittcp/include/freebsd/src/sys/netinet/in_pcb.h index f40058a4f20..0a8b58d182e 100644 --- a/reactos/drivers/lib/oskittcp/include/freebsd/src/sys/netinet/in_pcb.h +++ b/reactos/drivers/lib/oskittcp/include/freebsd/src/sys/netinet/in_pcb.h @@ -97,8 +97,7 @@ struct inpcb * in_pcblookuphash __P((struct inpcbinfo *, struct in_addr, u_int, struct in_addr, u_int)); void in_pcbnotify __P((struct inpcbhead *, struct sockaddr *, - u_int, struct in_addr, u_int, int, void (*)(struct inpcb *, int))); -void in_pcbrehash __P((struct inpcb *)); + u_int, struct in_addr, u_int, int, void (*)(struct inpcb *, int)));void in_pcbrehash __P((struct inpcb *)); void in_rtchange __P((struct inpcb *, int)); void in_setpeeraddr __P((struct inpcb *, struct mbuf *)); void in_setsockaddr __P((struct inpcb *, struct mbuf *)); diff --git a/reactos/drivers/lib/oskittcp/include/freebsd/src/sys/sys/mbuf.h b/reactos/drivers/lib/oskittcp/include/freebsd/src/sys/sys/mbuf.h index 3583ee7da50..50138e22dca 100644 --- a/reactos/drivers/lib/oskittcp/include/freebsd/src/sys/sys/mbuf.h +++ b/reactos/drivers/lib/oskittcp/include/freebsd/src/sys/sys/mbuf.h @@ -351,7 +351,7 @@ union mcluster { if ((m)->m_flags & M_EXT) { \ MCLFREE((m)->m_ext.ext_buf); \ } \ - (nn) = (m)->m_next; \ + /* (nn) = (m)->m_next; */ \ FREE((m), mbtypes[(m)->m_type]); \ } #endif /* OSKIT */ diff --git a/reactos/drivers/lib/oskittcp/include/freebsd/src/sys/sys/proc.h b/reactos/drivers/lib/oskittcp/include/freebsd/src/sys/sys/proc.h index e8634b7550e..03b31b94012 100644 --- a/reactos/drivers/lib/oskittcp/include/freebsd/src/sys/sys/proc.h +++ b/reactos/drivers/lib/oskittcp/include/freebsd/src/sys/sys/proc.h @@ -62,6 +62,7 @@ #include /* For struct rtprio. */ #include /* For struct selinfo. */ #include /* For structs itimerval, timeval. */ +#include #ifdef OSKIT #include @@ -319,7 +320,7 @@ void cpu_switch __P((struct proc *)); void sleep __P((void *chan, int pri)); int tsleep __P((void *chan, int pri, char *wmesg, int timo)); void unsleep __P((struct proc *)); -void wakeup __P((void *chan)); +void wakeup __P((struct socket *so, struct selinfo *si, void *chan)); __dead void cpu_exit __P((struct proc *)) __dead2; __dead void exit1 __P((struct proc *, int)) __dead2; diff --git a/reactos/drivers/lib/oskittcp/include/freebsd/src/sys/sys/socketvar.h b/reactos/drivers/lib/oskittcp/include/freebsd/src/sys/sys/socketvar.h index d5b4fb59f90..2c90e24d085 100644 --- a/reactos/drivers/lib/oskittcp/include/freebsd/src/sys/sys/socketvar.h +++ b/reactos/drivers/lib/oskittcp/include/freebsd/src/sys/sys/socketvar.h @@ -181,11 +181,11 @@ struct socket { ((sb)->sb_flags |= SB_LOCK), 0) /* release lock on sockbuf sb */ -#define sbunlock(sb) { \ +#define sbunlock(so, sb) { \ (sb)->sb_flags &= ~SB_LOCK; \ if ((sb)->sb_flags & SB_WANT) { \ (sb)->sb_flags &= ~SB_WANT; \ - wakeup((caddr_t)&(sb)->sb_flags); \ + wakeup(so, &(sb)->sb_sel, (caddr_t)&(sb)->sb_flags); \ } \ } @@ -195,6 +195,7 @@ struct socket { } #define sowwakeup(so) sowakeup((so), &(so)->so_snd) +#define socwakeup(so) sowakeup((so), &(so)->so_snd) #ifdef KERNEL extern u_long sb_max; diff --git a/reactos/drivers/lib/oskittcp/include/memtrack.h b/reactos/drivers/lib/oskittcp/include/memtrack.h index b4c0bbc6b49..e63e327b71b 100644 --- a/reactos/drivers/lib/oskittcp/include/memtrack.h +++ b/reactos/drivers/lib/oskittcp/include/memtrack.h @@ -68,6 +68,11 @@ static inline VOID ExFreePoolX( PVOID Data, PCHAR File, ULONG Line ) { #define Untrack(x) #define TrackTag(x) #define FreeNdisPacket FreeNdisPacketX +#define exFreePool(x) ExFreePool(x) +#define exAllocatePool(x,y) ExAllocatePool(x,y) +#define exAllocatePoolWithTag(x,y,z) ExAllocatePoolWithTag(x,y,z) +#define TrackWithTag(a,b,c,d) +#define UntrackFL(a,b,c) #endif #endif/*MEMMTRAC_H*/ diff --git a/reactos/drivers/lib/oskittcp/include/oskiterrno.h b/reactos/drivers/lib/oskittcp/include/oskiterrno.h index d8d2efa8ed6..acb15f8c563 100644 --- a/reactos/drivers/lib/oskittcp/include/oskiterrno.h +++ b/reactos/drivers/lib/oskittcp/include/oskiterrno.h @@ -51,7 +51,7 @@ extern int errno; /* global error number */ #define OSK_ENOENT 2 /* No such file or directory */ #define OSK_ESRCH 3 /* No such process */ #define OSK_EINTR 4 /* Interrupted system call */ -#define OSK_EIO 5 /* Input/output error */ +#define OSK_EIO 5 /* Input/output error */ #define OSK_ENXIO 6 /* Device not configured */ #define OSK_E2BIG 7 /* Argument list too long */ #define OSK_ENOEXEC 8 /* Exec format error */ diff --git a/reactos/drivers/lib/oskittcp/include/oskittcp.h b/reactos/drivers/lib/oskittcp/include/oskittcp.h index b98b2fe2df7..ee4f7ea151a 100644 --- a/reactos/drivers/lib/oskittcp/include/oskittcp.h +++ b/reactos/drivers/lib/oskittcp/include/oskittcp.h @@ -39,46 +39,34 @@ struct connect_args { #include #define IPHDR_SIZE 20 +#define SEL_CONNECT 1 +#define SEL_FIN 2 +#define SEL_RST 4 +#define SEL_ABRT 8 +#define SEL_READ 16 +#define SEL_WRITE 32 +#define SEL_ACCEPT 64 +#define SEL_OOB 128 +#define SEL_ERROR 256 +#define SEL_FINOUT 512 -typedef void (*OSKITTCP_SOCKET_DATA_AVAILABLE) +typedef void (*OSKITTCP_SOCKET_STATE) ( void *ClientData, void *WhichSocket, void *WhichConnection, - OSK_PCHAR Data, - OSK_UINT Len ); -typedef void (*OSKITTCP_SOCKET_CONNECT_INDICATION) - ( void *ClientData, - void *WhichSocket, - void *WhichConnection ); -typedef void (*OSKITTCP_SOCKET_CLOSE_INDICATION) - ( void *WhichSocket ); -typedef void (*OSKITTCP_SOCKET_PENDING_CONNECT_INDICATION) - ( void *WhichSocket ); -typedef void (*OSKITTCP_SOCKET_RESET_INDICATION) - ( void *WhichSocket ); + OSK_UINT SelFlags, + OSK_UINT SocketState ); typedef int (*OSKITTCP_SEND_PACKET) ( void *ClientData, void *WhichSocket, void *WhichConnection, OSK_PCHAR Data, OSK_UINT Len ); -typedef int (*OSKITTCP_NEED_BIND) - ( void *ClientData, - void *WhichSocket, - void *WhichConnection, - struct sockaddr *address, - OSK_UINT addrlen, - OSK_UINT reuseport ); typedef struct _OSKITTCP_EVENT_HANDLERS { void *ClientData; - OSKITTCP_SOCKET_DATA_AVAILABLE SocketDataAvailable; - OSKITTCP_SOCKET_CONNECT_INDICATION SocketConnectIndication; - OSKITTCP_SOCKET_CLOSE_INDICATION SocketCloseIndication; - OSKITTCP_SOCKET_PENDING_CONNECT_INDICATION SocketPendingConnectIndication; - OSKITTCP_SOCKET_RESET_INDICATION SocketResetIndication; + OSKITTCP_SOCKET_STATE SocketState; OSKITTCP_SEND_PACKET PacketSend; - OSKITTCP_NEED_BIND Bind; } OSKITTCP_EVENT_HANDLERS, *POSKITTCP_EVENT_HANDLERS; extern OSKITTCP_EVENT_HANDLERS OtcpEvent; @@ -92,7 +80,12 @@ extern void RegisterOskitTCPEventHandlers ( POSKITTCP_EVENT_HANDLERS EventHandlers ); extern void OskitTCPReceiveDatagram( OSK_PCHAR Data, OSK_UINT Len, OSK_UINT IpHeaderLen ); - +extern int OskitTCPReceive( void *socket, + void *Addr, + OSK_PCHAR Data, + OSK_UINT Len, + OSK_UINT *OutLen, + OSK_UINT Flags ); #undef errno #define malloc(x,...) fbsd_malloc(x,__FILE__,__LINE__) @@ -103,4 +96,8 @@ extern void OskitTCPReceiveDatagram( OSK_PCHAR Data, OSK_UINT Len, #define SOCK_MAXADDRLEN 255 +#define OSK_MSG_OOB 0x01 +#define OSK_MSG_PEEK 0x02 +#define OSK_MSG_DONTWAIT 0x80 + #endif/*OSKITTCP_H*/ diff --git a/reactos/drivers/lib/oskittcp/oskittcp/in.c b/reactos/drivers/lib/oskittcp/oskittcp/in.c index 89ad849f36f..c2a2965bce2 100644 --- a/reactos/drivers/lib/oskittcp/oskittcp/in.c +++ b/reactos/drivers/lib/oskittcp/oskittcp/in.c @@ -64,38 +64,6 @@ struct multi_kludge { struct in_multihead mk_head; }; -/* - * Return the network number from an internet address. - */ -u_long -in_netof(in) - struct in_addr in; -{ - register u_long i = ntohl(in.s_addr); - register u_long net; - register struct in_ifaddr *ia; - - if (IN_CLASSA(i)) - net = i & IN_CLASSA_NET; - else if (IN_CLASSB(i)) - net = i & IN_CLASSB_NET; - else if (IN_CLASSC(i)) - net = i & IN_CLASSC_NET; - else if (IN_CLASSD(i)) - net = i & IN_CLASSD_NET; - else - return (0); - - /* - * Check whether network is a subnet; - * if so, return subnet number. - */ - for (ia = in_ifaddr; ia; ia = ia->ia_next) - if (net == ia->ia_net) - return (i & ia->ia_subnetmask); - return (net); -} - #ifndef SUBNETSARELOCAL #define SUBNETSARELOCAL 1 #endif diff --git a/reactos/drivers/lib/oskittcp/oskittcp/interface.c b/reactos/drivers/lib/oskittcp/oskittcp/interface.c index 08ef28fd604..3d7ee049a45 100644 --- a/reactos/drivers/lib/oskittcp/oskittcp/interface.c +++ b/reactos/drivers/lib/oskittcp/oskittcp/interface.c @@ -54,9 +54,6 @@ void RegisterOskitTCPEventHandlers( POSKITTCP_EVENT_HANDLERS EventHandlers ) { if( OtcpEvent.PacketSend ) OS_DbgPrint(OSK_MID_TRACE,("SendPacket handler registered: %x\n", OtcpEvent.PacketSend)); - if( OtcpEvent.Bind ) - OS_DbgPrint(OSK_MID_TRACE,("Bind handler registered: %x\n", - OtcpEvent.Bind)); } void OskitDumpBuffer( OSK_PCHAR Data, OSK_UINT Len ) { @@ -85,42 +82,49 @@ int OskitTCPSocket( void *context, *aso = so; } return error; -#if 0 - register struct protosw *prp; - register struct socket *so; - register int error; - - if (proto) { - prp = pffindproto(domain, proto, type); - } else { - prp = pffindtype(domain, type); - } - if (prp == 0 || prp->pr_usrreq == 0) { - return (EPROTONOSUPPORT); - } - if (prp->pr_type != type) { - return (EPROTOTYPE); - } - MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT); - bzero((caddr_t)so, sizeof(*so)); - so->so_type = type; - so->so_proto = prp; - error = - (*prp->pr_usrreq)(so, PRU_ATTACH, - (struct mbuf *)0, - (struct mbuf *)proto, - (struct mbuf *)0); - if (error) { - so->so_state |= SS_NOFDREF; - sofree(so); - return (error); - } - *aso = so; - OS_DbgPrint(OSK_MAX_TRACE,("Returning Socket %x\n", so)); - return STATUS_SUCCESS; -#endif } +int OskitTCPRecv( void *connection, + void *Addr, + OSK_PCHAR Data, + OSK_UINT Len, + OSK_UINT *OutLen, + OSK_UINT Flags ) { + struct mbuf *paddr = 0; + struct mbuf m, *mp; + struct uio uio = { 0 }; + int error = 0; + int tcp_flags = 0; + + if( Flags & OSK_MSG_OOB ) tcp_flags |= MSG_OOB; + if( Flags & OSK_MSG_DONTWAIT ) tcp_flags |= MSG_DONTWAIT; + if( Flags & OSK_MSG_PEEK ) tcp_flags |= MSG_PEEK; + + uio.uio_resid = Len; + m.m_len = Len; + m.m_data = Data; + m.m_type = MT_DATA; + m.m_flags = M_PKTHDR | M_EOR; + + mp = &m; + + OS_DbgPrint(OSK_MID_TRACE,("Reading %d bytes from TCP:\n", Len)); + + error = soreceive( connection, &paddr, &uio, &mp, NULL /* SCM_RIGHTS */, + &tcp_flags ); + + if( error == 0 ) { + OS_DbgPrint(OSK_MID_TRACE,("Successful read from TCP:\n")); + OskitDumpBuffer( m.m_data, uio.uio_resid ); + } + + if( paddr ) + memcpy( Addr, paddr, min(sizeof(struct sockaddr),paddr->m_len) ); + + *OutLen = uio.uio_resid; + return error; +} + static int getsockaddr(namp, uaddr, len) /* [<][>][^][v][top][bottom][index][help] */ @@ -209,10 +213,13 @@ done: return (error); } -DWORD OskitTCPClose( VOID *socket ) { +DWORD OskitTCPClose( void *socket ) { + struct socket *so = socket; + so->so_connection = 0; + soclose( so ); } -DWORD OskitTCPSend( VOID *socket, OSK_PCHAR Data, OSK_UINT Len, int flags ) { +DWORD OskitTCPSend( void *socket, OSK_PCHAR Data, OSK_UINT Len, int flags ) { OskitDumpBuffer( Data, Len ); struct mbuf mb; mb.m_data = Data; @@ -220,11 +227,17 @@ DWORD OskitTCPSend( VOID *socket, OSK_PCHAR Data, OSK_UINT Len, int flags ) { return sosend( socket, NULL, NULL, (struct mbuf *)&mb, NULL, 0 ); } -void OskitTCPReceive( VOID *socket, PVOID AddrOut, - OSK_PCHAR Data, OSK_UINT Len, OSK_UINT *OutLen ) { -} +void *OskitTCPAccept( void *socket, + void *AddrOut, + OSK_UINT AddrLen, + OSK_UINT *OutAddrLen ) { + struct mbuf nam; + int error; -VOID *OskitTCPAccept( VOID *socket, PVOID AddrOut ) { + nam.m_data = AddrOut; + nam.m_len = AddrLen; + + return soaccept( socket, &nam ); } void OskitTCPReceiveDatagram( OSK_PCHAR Data, OSK_UINT Len, @@ -246,13 +259,11 @@ void OskitTCPReceiveDatagram( OSK_PCHAR Data, OSK_UINT Len, /* The buffer Ip is freed by tcp_input */ } -void OskitTCPBind( VOID *socket, PVOID name ) { +void OskitTCPListen( void *socket, int backlog ) { + return solisten( socket, backlog ); } -void OskitTCPListen( VOID *socket, int backlog ) { -} - -void OskitTCPSetAddress( VOID *socket, +void OskitTCPSetAddress( void *socket, ULONG LocalAddress, USHORT LocalPort, ULONG RemoteAddress, @@ -268,7 +279,7 @@ void OskitTCPSetAddress( VOID *socket, RemoteAddress, RemotePort); } -void OskitTCPGetAddress( VOID *socket, +void OskitTCPGetAddress( void *socket, PULONG LocalAddress, PUSHORT LocalPort, PULONG RemoteAddress, @@ -290,3 +301,4 @@ void oskittcp_die( const char *file, int line ) { DbgPrint("\n\n*** OSKITTCP: Panic Called at %s:%d ***\n", file, line); KeBugCheck(0); } + diff --git a/reactos/drivers/lib/oskittcp/oskittcp/ip_output.c b/reactos/drivers/lib/oskittcp/oskittcp/ip_output.c index 4a00269f56c..7e53ee3b594 100644 --- a/reactos/drivers/lib/oskittcp/oskittcp/ip_output.c +++ b/reactos/drivers/lib/oskittcp/oskittcp/ip_output.c @@ -132,47 +132,11 @@ ip_output(so, m0, opt, ro, flags, imo) m->m_data + IPHDR_SIZE, m->m_len - IPHDR_SIZE ); } + OS_DbgPrint(OSK_MID_TRACE,("Error from upper layer: %d\n", error)); + return (error); } -/* - * Copy options from ip to jp, - * omitting those not copied during fragmentation. - */ -int -ip_optcopy(ip, jp) - struct ip *ip, *jp; -{ - register u_char *cp, *dp; - int opt, optlen, cnt; - - cp = (u_char *)(ip + 1); - dp = (u_char *)(jp + 1); - cnt = (ip->ip_hl << 2) - sizeof (struct ip); - for (; cnt > 0; cnt -= optlen, cp += optlen) { - opt = cp[0]; - if (opt == IPOPT_EOL) - break; - if (opt == IPOPT_NOP) { - /* Preserve for IP mcast tunnel's LSRR alignment. */ - *dp++ = IPOPT_NOP; - optlen = 1; - continue; - } else - optlen = cp[IPOPT_OLEN]; - /* bogus lengths should have been caught by ip_dooptions */ - if (optlen > cnt) - optlen = cnt; - if (IPOPT_COPIED(opt)) { - (void)memcpy(dp, cp, (unsigned)optlen); - dp += optlen; - } - } - for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) - *dp++ = IPOPT_EOL; - return (optlen); -} - /* * IP socket option processing. */ diff --git a/reactos/drivers/lib/oskittcp/oskittcp/kern_clock.c b/reactos/drivers/lib/oskittcp/oskittcp/kern_clock.c index c53581fd5fe..02c70ca96e1 100644 --- a/reactos/drivers/lib/oskittcp/oskittcp/kern_clock.c +++ b/reactos/drivers/lib/oskittcp/oskittcp/kern_clock.c @@ -139,59 +139,6 @@ int ticks; volatile struct timeval time; volatile struct timeval mono_time; -/* - * The real-time timer, interrupting hz times per second. - */ -void bsd_hardclock(void) -{ - register struct callout *p1; - register int needsoft; - unsigned cpl; - int s; - - save_cpl(&cpl); - splhigh(); - - /* - * Update real-time timeout queue. - * At front of queue are some number of events which are ``due''. - * The time to these is <= 0 and if negative represents the - * number of ticks which have passed since it was supposed to happen. - * The rest of the q elements (times > 0) are events yet to happen, - * where the time for each is given as a delta from the previous. - * Decrementing just the first of these serves to decrement the time - * to all events. - */ - needsoft = 0; - for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) { - if (--p1->c_time > 0) - break; - needsoft = 1; - if (p1->c_time == 0) - break; - } - - /* - * Increment the time-of-day. - */ - ticks++; - BUMPTIME(&mono_time, tick); - BUMPTIME(&time, tick); - - /* - * At this point, we might need a soft interrupt. We say, what the - * heck, let's do the work now. Reduce priority level to softclock - * first, though. The alternative (postponing it) would be called - * 'setsoftclock' - */ - if (needsoft) { - (void)splsoftclock(); - softclock(); - } - - restore_cpl(cpl); -} - /* * Software (low priority) clock interrupt. * Run periodic events from timeout queue. diff --git a/reactos/drivers/lib/oskittcp/oskittcp/sleep.c b/reactos/drivers/lib/oskittcp/oskittcp/sleep.c index 83bed686a77..f56fa322af4 100644 --- a/reactos/drivers/lib/oskittcp/oskittcp/sleep.c +++ b/reactos/drivers/lib/oskittcp/oskittcp/sleep.c @@ -12,18 +12,18 @@ typedef struct _SLEEPING_THREAD { } SLEEPING_THREAD, *PSLEEPING_THREAD; LIST_ENTRY SleepingThreadsList; -KSPIN_LOCK SleepingThreadsLock; +FAST_MUTEX SleepingThreadsLock; /* clock_init */ int ncallout = 256; struct callout *callout; void init_freebsd_sched() { - KeInitializeSpinLock( &SleepingThreadsLock ); + ExInitializeFastMutex( &SleepingThreadsLock ); InitializeListHead( &SleepingThreadsList ); } -void tsleep( void *token, int priority, char *wmesg, int tmio ) { +int tsleep( void *token, int priority, char *wmesg, int tmio ) { KIRQL OldIrql; KEVENT Event; PLIST_ENTRY Entry; @@ -37,43 +37,65 @@ void tsleep( void *token, int priority, char *wmesg, int tmio ) { if( SleepingThread ) { KeInitializeEvent( &SleepingThread->Event, NotificationEvent, FALSE ); SleepingThread->SleepToken = token; - ExInterlockedInsertTailList( &SleepingThreadsList, - &SleepingThread->Entry, - &SleepingThreadsLock ); - } - OS_DbgPrint(OSK_MID_TRACE,("Waiting on %x\n", token)); - KeWaitForSingleObject( &SleepingThread->Event, - WrSuspended, - KernelMode, - TRUE, - NULL ); + ExAcquireFastMutex( &SleepingThreadsLock ); + InsertTailList( &SleepingThreadsList, &SleepingThread->Entry ); + ExReleaseFastMutex( &SleepingThreadsLock ); + + OS_DbgPrint(OSK_MID_TRACE,("Waiting on %x\n", token)); + KeWaitForSingleObject( &SleepingThread->Event, + WrSuspended, + KernelMode, + TRUE, + NULL ); + + ExAcquireFastMutex( &SleepingThreadsLock ); + RemoveEntryList( &SleepingThread->Entry ); + ExReleaseFastMutex( &SleepingThreadsLock ); + + ExFreePool( SleepingThread ); + } OS_DbgPrint(OSK_MID_TRACE,("Waiting finished: %x\n", token)); + return 0; } -void wakeup( void *token ) { +void wakeup( struct socket *so, struct selinfo *si, void *token ) { KIRQL OldIrql; KEVENT Event; PLIST_ENTRY Entry; PSLEEPING_THREAD SleepingThread; - - OS_DbgPrint(OSK_MID_TRACE,("Wakeup %x!\n",token)); - KeAcquireSpinLock( &SleepingThreadsLock, &OldIrql ); + OS_DbgPrint + (OSK_MID_TRACE,("XXX Bytes to receive: %d\n", so->so_rcv.sb_cc)); + + if( so->so_rcv.sb_cc && si ) + si->si_flags |= SEL_READ; + + OS_DbgPrint(OSK_MID_TRACE,("Wakeup %x (socket %x, si_flags %x, state %x)!\n", + token, so, si ? si->si_flags : 0, + so->so_state)); + + if( OtcpEvent.SocketState ) { + OS_DbgPrint(OSK_MID_TRACE,("Calling client's socket state fn\n")); + OtcpEvent.SocketState( OtcpEvent.ClientData, + so, + so->so_connection, + si ? si->si_flags : 0, + so->so_state ); + } + + ExAcquireFastMutex( &SleepingThreadsLock ); Entry = SleepingThreadsList.Flink; while( Entry != &SleepingThreadsList ) { SleepingThread = CONTAINING_RECORD(Entry, SLEEPING_THREAD, Entry); + OS_DbgPrint(OSK_MID_TRACE,("Sleeper @ %x\n", SleepingThread)); if( SleepingThread->SleepToken == token ) { - RemoveEntryList(Entry); - KeReleaseSpinLock( &SleepingThreadsLock, OldIrql ); OS_DbgPrint(OSK_MID_TRACE,("Setting event to wake %x\n", token)); - KeSetEvent( &SleepingThread->Event, IO_NO_INCREMENT, FALSE ); - ExFreePool( SleepingThread ); - return; + KeSetEvent( &SleepingThread->Event, IO_NETWORK_INCREMENT, FALSE ); } Entry = Entry->Flink; } - KeReleaseSpinLock( &SleepingThreadsLock, OldIrql ); + ExReleaseFastMutex( &SleepingThreadsLock ); OS_DbgPrint(OSK_MID_TRACE,("Wakeup done %x\n", token)); } @@ -142,32 +164,6 @@ static __inline int name(void) \ return (x); \ } -#if 0 -GENSPL(splbio, cpl |= bio_imask) -GENSPL(splclock, cpl = HWI_MASK | SWI_MASK) -GENSPL(splhigh, cpl = HWI_MASK | SWI_MASK) -GENSPL(splimp, cpl |= net_imask) -GENSPL(splnet, cpl |= SWI_NET_MASK) -GENSPL(splsoftclock, cpl = SWI_CLOCK_MASK) -GENSPL(splsofttty, cpl |= SWI_TTY_MASK) -GENSPL(splstatclock, cpl |= stat_imask) -GENSPL(spltty, cpl |= tty_imask) -#endif - -#if 0 -void spl0(void) { - cpl = SWI_AST_MASK; - if (ipending & ~SWI_AST_MASK) - splz(); -} - -void splx(int ipl) { - cpl = ipl; - if (ipending & ~ipl) - splz(); -} -#endif - void splz(void) { OS_DbgPrint(OSK_MID_TRACE,("Called SPLZ\n")); } @@ -184,26 +180,3 @@ void restore_cpl(unsigned x) { cpl = x; } - -void selrecord( struct proc *selector, struct selinfo *sip) { - OS_DbgPrint(OSK_MID_TRACE,("Called selrecord\n")); -} - -void wakeupsocket( struct socket *so, struct selinfo *sel ) { - void *connection = so->so_connection; - char *data = 0; - int datalen = 0; - int flags = 0; - - OS_DbgPrint(OSK_MID_TRACE,("Wakeup: %x\n", so)); -#if 0 - if( soreceive(so, &paddr, 0, &mp0, &controlp, flags) == 0 ) { - /* We have data available */ - OS_DbgPrint(OSK_MID_TRACE,("Data available on %x\n", so)); - } -#endif -} - -void selwakeup( struct selinfo *sel ) { - OS_DbgPrint(OSK_MID_TRACE,("Called selwakeup\n")); -} diff --git a/reactos/drivers/lib/oskittcp/oskittcp/tcp_input.c b/reactos/drivers/lib/oskittcp/oskittcp/tcp_input.c index 260b695237f..0ab79a24d37 100644 --- a/reactos/drivers/lib/oskittcp/oskittcp/tcp_input.c +++ b/reactos/drivers/lib/oskittcp/oskittcp/tcp_input.c @@ -74,58 +74,30 @@ struct inpcbinfo tcbinfo; #endif /* TUBA_INCLUDE */ -/* - * Insert segment ti into reassembly queue of tcp with - * control block tp. Return TH_FIN if reassembly now includes - * a segment with FIN. The macro form does the common case inline - * (segment is the next to be received on an established connection, - * and the queue is empty), avoiding linkage into and removal - * from the queue and repetition of various conversions. - * Set DELACK for segments received in order, but ack immediately - * when segments are out of order (so fast retransmit can work). - */ -#ifdef TCP_ACK_HACK -#define TCP_REASS(tp, ti, m, so, flags) { \ - if ((ti)->ti_seq == (tp)->rcv_nxt && \ - (tp)->seg_next == (struct tcpiphdr *)(tp) && \ - (tp)->t_state == TCPS_ESTABLISHED) { \ - if (ti->ti_flags & TH_PUSH) \ - tp->t_flags |= TF_ACKNOW; \ - else \ - tp->t_flags |= TF_DELACK; \ - (tp)->rcv_nxt += (ti)->ti_len - IPHDR_SIZE; \ - OS_DbgPrint(OSK_MID_TRACE,("Added %d to rcv_nxt\n", \ - (ti)->ti_len - IPHDR_SIZE)); \ - flags = (ti)->ti_flags & TH_FIN; \ - tcpstat.tcps_rcvpack++;\ - tcpstat.tcps_rcvbyte += (ti)->ti_len;\ - sbappend(so, &(so)->so_rcv, (m)); \ - sorwakeup(so); \ - } else { \ - (flags) = tcp_reass((tp), (ti), (m)); \ - tp->t_flags |= TF_ACKNOW; \ - } \ -} -#else #define TCP_REASS(tp, ti, m, so, flags) { \ if ((ti)->ti_seq == (tp)->rcv_nxt && \ (tp)->seg_next == (struct tcpiphdr *)(tp) && \ (tp)->t_state == TCPS_ESTABLISHED) { \ tp->t_flags |= TF_DELACK; \ - (tp)->rcv_nxt += (ti)->ti_len - IPHDR_SIZE; \ - OS_DbgPrint(OSK_MID_TRACE,("Added %d to rcv_nxt\n", \ - (ti)->ti_len - IPHDR_SIZE)); \ + if (!(ti)->ti_flags & TH_FIN && \ + !(ti)->ti_flags & TH_RST) { \ + (tp)->rcv_nxt += (ti)->ti_len - sizeof(struct ip); \ + OS_DbgPrint(OSK_MID_TRACE,("(REASS2) Added %d to rcv_nxt\n", \ + (ti)->ti_len - sizeof(struct ip))); \ + } else { \ + so->so_rcv.sb_sel.si_flags |= SEL_FIN; \ + } \ flags = (ti)->ti_flags & TH_FIN; \ - tcpstat.tcps_rcvpack++;\ - tcpstat.tcps_rcvbyte += (ti)->ti_len;\ - sbappend(so, &(so)->so_rcv, (m)); \ + tcpstat.tcps_rcvpack++; \ + tcpstat.tcps_rcvbyte += (ti)->ti_len; \ + sbappend(so, &so->so_rcv, (m)); \ sorwakeup(so); \ } else { \ (flags) = tcp_reass((tp), (ti), (m)); \ tp->t_flags |= TF_ACKNOW; \ } \ } -#endif + #ifndef TUBA_INCLUDE int @@ -231,13 +203,30 @@ present: ti = (struct tcpiphdr *)ti->ti_next; if (so->so_state & SS_CANTRCVMORE) m_freem(m); - else - sbappend(so, &so->so_rcv, m); + else { + sbappend(so, &so->so_rcv, (m)); + } } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt); sorwakeup(so); return (flags); } +void rip_input_mini(so, ti, m) + struct socket *so; + struct tcpiphdr *ti; + struct mbuf *m; +{ + register struct ip *ip = mtod(m, struct ip *); + register struct inpcb *inp; + struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET }; + + ripsrc.sin_addr = ti->ti_src; + + sbappendaddr(&so->so_rcv, + (struct sockaddr *)&ripsrc, m, + (struct mbuf *)0); +} + /* * TCP input routine, follows pages 65-76 of the * protocol specification dated September, 1981 very closely. @@ -249,6 +238,8 @@ tcp_input(m, iphlen) { register struct tcpiphdr *ti; register struct inpcb *inp; + struct sockaddr_in addr = { 0 }; + struct mbuf mhdr = { 0 }, mpayload = { 0 }; caddr_t optp = NULL; int optlen = 0; int len, tlen, off; @@ -342,6 +333,8 @@ tcp_input(m, iphlen) m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); + OskitDumpBuffer(m->m_data, m->m_len); + /* * Locate pcb for segment. */ @@ -380,6 +373,22 @@ findpcb: tiwin = ti->ti_win; so = inp->inp_socket; + +#if 0 + mhdr.m_type = MT_HEADER; + memcpy(mhdr.m_pktdat, ti, sizeof(ti)); + mhdr.m_data = mhdr.m_pktdat; + mhdr.m_len = 0; + mhdr.m_flags = M_PKTHDR | M_EOR; + mhdr.m_next = &mpayload; + mpayload.m_type = MT_DATA; + mpayload.m_data = m->m_data + sizeof(*ti); + mpayload.m_len = m->m_len - sizeof(*ti); + mpayload.m_flags = M_EOR; + + rip_input_mini(so, ti, &mhdr); +#endif + if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) { #ifdef TCPDEBUG if (so->so_options & SO_DEBUG) { @@ -589,14 +598,17 @@ findpcb: */ ++tcpstat.tcps_preddat; tp->rcv_nxt += ti->ti_len; - OS_DbgPrint(OSK_MID_TRACE,("Added %d to rcv_nxt\n", - ti->ti_len)); tcpstat.tcps_rcvpack++; tcpstat.tcps_rcvbyte += ti->ti_len; /* * Add data to socket buffer. */ - sbappend(so, &so->so_rcv, m); + OS_DbgPrint + (OSK_MID_TRACE,("Adding %d to socket buffer\n", + m->m_len)); + + sbappend(so, &so->so_rcv, (m)); + so->so_rcv.sb_cc += m->m_len; sorwakeup(so); #ifdef TCP_ACK_HACK /* @@ -879,6 +891,7 @@ findpcb: so)); tp->t_state = TCPS_ESTABLISHED; tp->t_timer[TCPT_KEEP] = tcp_keepidle; + socwakeup(so); } } else { /* @@ -1173,8 +1186,9 @@ trimthenstep6: * error and we send an RST and drop the connection. */ if (tiflags & TH_SYN) { - tp = tcp_drop(tp, ECONNRESET); - goto dropwithreset; + OS_DbgPrint(OSK_MID_TRACE,("SYN In window\n")); + tp = tcp_drop(tp, ECONNRESET); + goto dropwithreset; } /* @@ -1415,7 +1429,7 @@ process_ACK: ourfinisacked = 0; } if (so->so_snd.sb_flags & SB_NOTIFY) - sowwakeup(so); + sowwakeup(so); tp->snd_una = ti->ti_ack; if (SEQ_LT(tp->snd_nxt, tp->snd_una)) tp->snd_nxt = tp->snd_una; @@ -1522,6 +1536,9 @@ step6: * actually wanting to send this much urgent data. */ if (ti->ti_urp + so->so_rcv.sb_cc > sb_max) { + OS_DbgPrint(OSK_MID_TRACE, + ("%x: Urgent pointer out of range: %x\n", + ti->ti_urp)); ti->ti_urp = 0; /* XXX */ tiflags &= ~TH_URG; /* XXX */ goto dodata; /* XXX */ @@ -1579,6 +1596,9 @@ dodata: /* XXX */ * case PRU_RCVD). If a FIN has already been received on this * connection then we just ignore the text. */ + + OS_DbgPrint(OSK_MID_TRACE,("TIFlags: %x\n", tiflags)); + if ((ti->ti_len || (tiflags&TH_FIN)) && TCPS_HAVERCVDFIN(tp->t_state) == 0) { TCP_REASS(tp, ti, m, so, tiflags); @@ -1589,7 +1609,7 @@ dodata: /* XXX */ */ len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt); } else { - m_freem(m); + /*m_freem(m);*/ tiflags &= ~TH_FIN; } diff --git a/reactos/drivers/lib/oskittcp/oskittcp/tcp_output.c b/reactos/drivers/lib/oskittcp/oskittcp/tcp_output.c index 976057494d3..a21e53d898b 100644 --- a/reactos/drivers/lib/oskittcp/oskittcp/tcp_output.c +++ b/reactos/drivers/lib/oskittcp/oskittcp/tcp_output.c @@ -85,8 +85,7 @@ tcp_output(tp) struct rmxp_tao *taop; struct rmxp_tao tao_noncached; - OS_DbgPrint(OSK_MID_TRACE, - ("Before IP_OUTPUT : tp->rcv_nxt %x\n", tp->rcv_nxt)); + OS_DbgPrint(OSK_MID_TRACE,("Start\n")); /* * Determine length of data that should be transmitted, @@ -102,7 +101,9 @@ tcp_output(tp) * slow start to get ack "clock" running again. */ tp->snd_cwnd = tp->t_maxseg; + again: + OS_DbgPrint(OSK_MID_TRACE,("Again...\n")); sendalot = 0; off = tp->snd_nxt - tp->snd_una; win = min(tp->snd_wnd, tp->snd_cwnd); @@ -237,9 +238,6 @@ again: goto send; } - OS_DbgPrint(OSK_MID_TRACE, - ("Before IP_OUTPUT : tp->rcv_nxt %x\n", tp->rcv_nxt)); - /* * Compare available window to amount of window * known to peer (as advertised window less @@ -262,9 +260,6 @@ again: goto send; } - OS_DbgPrint(OSK_MID_TRACE, - ("Before IP_OUTPUT : tp->rcv_nxt %x\n", tp->rcv_nxt)); - /* * Send if we owe peer an ACK. */ @@ -284,9 +279,6 @@ again: ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una)) goto send; - OS_DbgPrint(OSK_MID_TRACE, - ("Before IP_OUTPUT : tp->rcv_nxt %x\n", tp->rcv_nxt)); - /* * TCP window updates are not reliable, rather a polling protocol * using ``persist'' packets is used to insure receipt of window @@ -321,9 +313,6 @@ again: /*return (0);*/ send: - OS_DbgPrint(OSK_MID_TRACE, - ("Before IP_OUTPUT : tp->rcv_nxt %x\n", tp->rcv_nxt)); - /* * Before ESTABLISHED, force sending of initial options * unless TCP set not to do any options. @@ -358,9 +347,6 @@ send: } } - OS_DbgPrint(OSK_MID_TRACE, - ("Before IP_OUTPUT : tp->rcv_nxt %x\n", tp->rcv_nxt)); - /* * Send a timestamp and echo-reply if this is a SYN and our side * wants to use timestamps (TF_REQ_TSTMP is set) or both our side @@ -379,9 +365,6 @@ send: optlen += TCPOLEN_TSTAMP_APPA; } - OS_DbgPrint(OSK_MID_TRACE, - ("Before IP_OUTPUT : tp->rcv_nxt %x\n", tp->rcv_nxt)); - /* * Send `CC-family' options if our side wants to use them (TF_REQ_CC), * options are allowed (!TF_NOOPT) and it's not a RST. @@ -457,9 +440,6 @@ send: hdrlen += optlen; - OS_DbgPrint(OSK_MID_TRACE, - ("Before IP_OUTPUT : tp->rcv_nxt %x\n", tp->rcv_nxt)); - /* * Adjust data length if insertion of options will * bump the packet length beyond the t_maxopd length. @@ -480,8 +460,6 @@ send: panic("tcphdr too big"); /*#endif*/ - OS_DbgPrint(OSK_MID_TRACE, - ("Before IP_OUTPUT : tp->rcv_nxt %x\n", tp->rcv_nxt)); /* * Grab a header mbuf, attaching a copy of data to @@ -607,21 +585,12 @@ send: if (win < (long)(so->so_rcv.sb_hiwat / 4) && win < (long)tp->t_maxseg) win = 0; - OS_DbgPrint(OSK_MID_TRACE, - ("Before IP_OUTPUT : AckNumber %x\n", ti->ti_ack)); - if (win > (long)TCP_MAXWIN << tp->rcv_scale) win = (long)TCP_MAXWIN << tp->rcv_scale; - OS_DbgPrint(OSK_MID_TRACE, - ("Before IP_OUTPUT : AckNumber %x\n", ti->ti_ack)); - if (win < (long)(tp->rcv_adv - tp->rcv_nxt)) win = (long)(tp->rcv_adv - tp->rcv_nxt); - OS_DbgPrint(OSK_MID_TRACE, - ("Before IP_OUTPUT : AckNumber %x\n", ti->ti_ack)); - ti->ti_win = htons((u_short) (win>>tp->rcv_scale)); if (SEQ_GT(tp->snd_up, tp->snd_nxt)) { ti->ti_urp = htons((u_short)(tp->snd_up - tp->snd_nxt)); @@ -640,9 +609,6 @@ send: * checksum extended header and data. */ - OS_DbgPrint(OSK_MID_TRACE, - ("Before IP_OUTPUT : AckNumber %x\n", ti->ti_ack)); - if (len + optlen) { ti->ti_src.s_addr = tp->t_inpcb->inp_laddr.s_addr; ti->ti_dst.s_addr = tp->t_inpcb->inp_faddr.s_addr; @@ -651,14 +617,8 @@ send: optlen + len)); } - OS_DbgPrint(OSK_MID_TRACE, - ("Before IP_OUTPUT : AckNumber %x\n", ti->ti_ack)); - ti->ti_sum = in_cksum(m, (int)(hdrlen + len)); - OS_DbgPrint(OSK_MID_TRACE, - ("Before IP_OUTPUT : AckNumber %x\n", ti->ti_ack)); - /* * In transmit state, time the transmission and arrange for * the retransmit. In persist state, just set snd_max. @@ -711,9 +671,6 @@ send: if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) tp->snd_max = tp->snd_nxt + len; - OS_DbgPrint(OSK_MID_TRACE, - ("Before IP_OUTPUT : AckNumber %x\n", ti->ti_ack)); - #ifdef TCPDEBUG /* * Trace. @@ -756,9 +713,7 @@ send: } #endif - OS_DbgPrint(OSK_MID_TRACE, - ("Before IP_OUTPUT : AckNumber %x\n", ti->ti_ack)); - + OS_DbgPrint(OSK_MID_TRACE,("Calling ip_output\n")); error = ip_output(so, m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route, so->so_options & SO_DONTROUTE, 0); } @@ -799,6 +754,8 @@ out: tp->rcv_adv = tp->rcv_nxt + win; tp->last_ack_sent = tp->rcv_nxt; tp->t_flags &= ~(TF_ACKNOW|TF_DELACK); + OS_DbgPrint(OSK_MID_TRACE,("sendalot: %d (flags %x)\n", + sendalot, tp->t_flags)); if (sendalot) goto again; return (0); diff --git a/reactos/drivers/lib/oskittcp/oskittcp/tcp_subr.c b/reactos/drivers/lib/oskittcp/oskittcp/tcp_subr.c index b67a1e91db5..fc3eafdd639 100644 --- a/reactos/drivers/lib/oskittcp/oskittcp/tcp_subr.c +++ b/reactos/drivers/lib/oskittcp/oskittcp/tcp_subr.c @@ -427,7 +427,7 @@ tcp_notify(inp, error) so->so_error = error; else tp->t_softerror = error; - wakeup((caddr_t) &so->so_timeo); + wakeup(so, NULL, (caddr_t) &so->so_timeo); sorwakeup(so); sowwakeup(so); } diff --git a/reactos/drivers/lib/oskittcp/oskittcp/uipc_socket.c b/reactos/drivers/lib/oskittcp/oskittcp/uipc_socket.c index 8d46e40c533..c7ae553c3c8 100644 --- a/reactos/drivers/lib/oskittcp/oskittcp/uipc_socket.c +++ b/reactos/drivers/lib/oskittcp/oskittcp/uipc_socket.c @@ -393,7 +393,7 @@ restart: (atomic || space < so->so_snd.sb_lowat || space < clen)) { if (so->so_state & SS_NBIO) snderr(EWOULDBLOCK); - sbunlock(&so->so_snd); + sbunlock(so, &so->so_snd); error = sbwait(&so->so_snd); splx(s); if (error) @@ -480,12 +480,14 @@ nopages: } while (resid); release: - sbunlock(&so->so_snd); + sbunlock(so, &so->so_snd); out: +#ifndef __REACTOS__ if (top) - m_freem(top); + m_freem(top); if (control) - m_freem(control); + m_freem(control); +#endif /* The caller owns top and control */ return (error); } @@ -506,305 +508,87 @@ out: * only for the count in uio_resid. */ int -soreceive(so, paddr, uio, mp0, controlp, flagsp) +soreceive(so, paddr, uio, _mp0, controlp, flagsp) register struct socket *so; struct mbuf **paddr; struct uio *uio; - struct mbuf **mp0; + struct mbuf **_mp0; struct mbuf **controlp; int *flagsp; { - register struct mbuf *m, **mp; - register int flags, len, error, s, offset; - struct protosw *pr = so->so_proto; - struct mbuf *nextrecord; - int moff, type = 0; - int orig_resid = uio->uio_resid; + struct mbuf dummy = { 0 }; + struct mbuf *mb = &dummy, *mn, *mp0 = *_mp0; + int total = 0, copyamt; - mp = mp0; - if (paddr) - *paddr = 0; - if (controlp) - *controlp = 0; - if (flagsp) - flags = *flagsp &~ MSG_EOR; - else - flags = 0; - if (flags & MSG_OOB) { - m = m_get(M_WAIT, MT_DATA); - error = (*pr->pr_usrreq)(so, PRU_RCVOOB, - m, (struct mbuf *)(flags & MSG_PEEK), (struct mbuf *)0); - if (error) - goto bad; - do { - error = uiomove(mtod(m, caddr_t), - (int) min(uio->uio_resid, m->m_len), uio); - /*m = m_free(m);*/ - } while (uio->uio_resid && error == 0 && m); -bad: - if (m) - /*m_freem(m);*/ - return (error); - } - if (mp) - *mp = (struct mbuf *)0; - if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) - (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, - (struct mbuf *)0, (struct mbuf *)0); + dummy.m_nextpkt = so->so_rcv.sb_mb; + uio->uio_resid = 0; -restart: - error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); - if (error) - return (error); - s = splnet(); + if( mp0->m_len == 0 ) + return 0; - m = so->so_rcv.sb_mb; - /* - * If we have less data than requested, block awaiting more - * (subject to any timeout) if: - * 1. the current count is less than the low water mark, or - * 2. MSG_WAITALL is set, and it is possible to do the entire - * receive operation at once if we block (resid <= hiwat). - * 3. MSG_DONTWAIT is not set - * If MSG_WAITALL is set but resid is larger than the receive buffer, - * we have to do the receive in sections, and thus risk returning - * a short count if a timeout or signal occurs after we start. - */ - if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && - so->so_rcv.sb_cc < uio->uio_resid) && - (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || - ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && - m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { -#ifdef DIAGNOSTIC - if (m == 0 && so->so_rcv.sb_cc) - panic("receive 1"); -#endif - if (so->so_error) { - if (m) - goto dontblock; - error = so->so_error; - if ((flags & MSG_PEEK) == 0) - so->so_error = 0; - goto release; - } - if (so->so_state & SS_CANTRCVMORE) { - if (m) - goto dontblock; - else - goto release; - } - for (; m; m = m->m_next) - if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { - m = so->so_rcv.sb_mb; - goto dontblock; - } - if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && - (so->so_proto->pr_flags & PR_CONNREQUIRED)) { - error = ENOTCONN; - goto release; - } - if (uio->uio_resid == 0) - goto release; - if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { - error = EWOULDBLOCK; - goto release; - } - sbunlock(&so->so_rcv); - error = sbwait(&so->so_rcv); - splx(s); - if (error) - return (error); - goto restart; - } -dontblock: - if (uio->uio_procp) - uio->uio_procp->p_stats->p_ru.ru_msgrcv++; - nextrecord = m->m_nextpkt; - if (pr->pr_flags & PR_ADDR) { -#ifdef DIAGNOSTIC - if (m->m_type != MT_SONAME) - panic("receive 1a"); -#endif - orig_resid = 0; - if (flags & MSG_PEEK) { - if (paddr) - *paddr = m_copy(m, 0, m->m_len); - m = m->m_next; - } else { - sbfree(&so->so_rcv, m); - if (paddr) { - *paddr = m; - so->so_rcv.sb_mb = m->m_next; - m->m_next = 0; - m = so->so_rcv.sb_mb; - } else { - MFREE(m, so->so_rcv.sb_mb); - m = so->so_rcv.sb_mb; - } - } - } - while (m && m->m_type == MT_CONTROL && error == 0) { - if (flags & MSG_PEEK) { - if (controlp) - *controlp = m_copy(m, 0, m->m_len); - m = m->m_next; - } else { - sbfree(&so->so_rcv, m); - if (controlp) { -#ifndef __REACTOS__ - if (pr->pr_domain->dom_externalize && - mtod(m, struct cmsghdr *)->cmsg_type == - SCM_RIGHTS) - error = (*pr->pr_domain->dom_externalize)(m); - *controlp = m; - so->so_rcv.sb_mb = m->m_next; - m->m_next = 0; - m = so->so_rcv.sb_mb; -#endif - } else { - /*MFREE(m, so->so_rcv.sb_mb);*/ - m = so->so_rcv.sb_mb; - } - } - if (controlp) { - orig_resid = 0; - controlp = &(*controlp)->m_next; - } - } - if (m) { - if ((flags & MSG_PEEK) == 0) - m->m_nextpkt = nextrecord; - type = m->m_type; - if (type == MT_OOBDATA) - flags |= MSG_OOB; - } - moff = 0; - offset = 0; - while (m && uio->uio_resid > 0 && error == 0) { - if (m->m_type == MT_OOBDATA) { - if (type != MT_OOBDATA) - break; - } else if (type == MT_OOBDATA) - break; -#ifdef DIAGNOSTIC - else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) - panic("receive 3"); -#endif - so->so_state &= ~SS_RCVATMARK; - len = uio->uio_resid; - if (so->so_oobmark && len > so->so_oobmark - offset) - len = so->so_oobmark - offset; - if (len > m->m_len - moff) - len = m->m_len - moff; - /* - * If mp is set, just pass back the mbufs. - * Otherwise copy them out via the uio, then free. - * Sockbuf must be consistent here (points to current mbuf, - * it points to next record) when we drop priority; - * we must note any additions to the sockbuf when we - * block interrupts again. - */ - if (mp == 0) { - splx(s); - error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); - s = splnet(); - } else - uio->uio_resid -= len; - if (len == m->m_len - moff) { - if (m->m_flags & M_EOR) - flags |= MSG_EOR; - if (flags & MSG_PEEK) { - m = m->m_next; - moff = 0; - } else { - nextrecord = m->m_nextpkt; - sbfree(&so->so_rcv, m); - if (mp) { - *mp = m; - mp = &m->m_next; - so->so_rcv.sb_mb = m = m->m_next; - *mp = (struct mbuf *)0; - } else { - /*MFREE(m, so->so_rcv.sb_mb);*/ - m = so->so_rcv.sb_mb; - } - if (m) - m->m_nextpkt = nextrecord; - } - } else { - if (flags & MSG_PEEK) - moff += len; - else { - if (mp) - *mp = m_copym(m, 0, len, M_WAIT); - m->m_data += len; - m->m_len -= len; - so->so_rcv.sb_cc -= len; - } - } - if (so->so_oobmark) { - if ((flags & MSG_PEEK) == 0) { - so->so_oobmark -= len; - if (so->so_oobmark == 0) { - so->so_state |= SS_RCVATMARK; - break; - } - } else { - offset += len; - if (offset == so->so_oobmark) - break; - } - } - if (flags & MSG_EOR) - break; - /* - * If the MSG_WAITALL flag is set (for non-atomic socket), - * we must not quit until "uio->uio_resid == 0" or an error - * termination. If a signal/timeout occurs, return - * with a short count but without error. - * Keep sockbuf locked against other readers. - */ - while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && - !sosendallatonce(so) && !nextrecord) { - if (so->so_error || so->so_state & SS_CANTRCVMORE) - break; - error = sbwait(&so->so_rcv); - if (error) { - sbunlock(&so->so_rcv); - splx(s); - return (0); - } - m = so->so_rcv.sb_mb; - if (m) - nextrecord = m->m_nextpkt; - } + if( so->so_rcv.sb_cc == 0 && so->so_rcv.sb_sel.si_flags & SEL_FIN ) + return OSK_ESHUTDOWN; + + while( mb->m_nextpkt && total < mp0->m_len ) { + OS_DbgPrint(OSK_MID_TRACE, ("Looking at packet %x\n", mb)); + + if( *flagsp & MSG_OOB ) { + OS_DbgPrint(OSK_MID_TRACE, ("Searching for oob: %x\n", mb)); + while( mb->m_nextpkt && mb->m_nextpkt->m_type != MT_OOBDATA ) + mb = mb->m_nextpkt; + } else { + while( mb->m_nextpkt && mb->m_nextpkt->m_type != MT_DATA ) { + OS_DbgPrint + (OSK_MID_TRACE, + ("Searching for data: %x (%x@%x)\n", + mb->m_nextpkt->m_type, + mb->m_nextpkt->m_len, + mb->m_nextpkt->m_data)); + mb = mb->m_nextpkt; + } } - if (m && pr->pr_flags & PR_ATOMIC) { - flags |= MSG_TRUNC; - if ((flags & MSG_PEEK) == 0) - (void) sbdroprecord(&so->so_rcv); - } - if ((flags & MSG_PEEK) == 0) { - if (m == 0) - so->so_rcv.sb_mb = nextrecord; - if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) - (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, - (struct mbuf *)flags, (struct mbuf *)0); - } - if (orig_resid == uio->uio_resid && orig_resid && - (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { - sbunlock(&so->so_rcv); - splx(s); - goto restart; - } + if( !mb->m_nextpkt ) break; - if (flagsp) - *flagsp |= flags; -release: - sbunlock(&so->so_rcv); - splx(s); - return (error); + mn = mb->m_nextpkt; + + if( !mn->m_next ) mn->m_next = mn->m_data; + copyamt = min(mn->m_len, mp0->m_len); + OS_DbgPrint(OSK_MID_TRACE, + ("Copying %d bytes (%d,%d) (total is %d so far)\n", + copyamt, mn->m_len, mp0->m_len, total + copyamt)); + memcpy( mp0->m_data + total, mn->m_next, copyamt ); + total += copyamt; + mp0->m_len -= copyamt; + + if( copyamt < mn->m_len ) { + if( !(*flagsp & MSG_PEEK) ) + ((caddr_t)mn->m_next) += copyamt; + break; + } + + mb->m_nextpkt = mn->m_nextpkt; + } + + /* Free previous buffers if not peek */ + if( !(*flagsp & MSG_PEEK) ) { + while( dummy.m_nextpkt && dummy.m_nextpkt != mn ) { + mb = dummy.m_nextpkt; + dummy.m_nextpkt = mb->m_nextpkt; + OS_DbgPrint + (OSK_MID_TRACE, + ("Deleting buf %x", mb)); + m_free(mb); + } + so->so_rcv.sb_cc -= total; + so->so_rcv.sb_mb = dummy.m_nextpkt; + } + + uio->uio_resid = total; + + OS_DbgPrint(OSK_MID_TRACE,("Leaving (success)\n")); + + return 0; } int @@ -836,7 +620,7 @@ sorflush(so) (void) sblock(sb, M_WAITOK); s = splimp(); socantrcvmore(so); - sbunlock(sb); + sbunlock(so, sb); asb = *sb; bzero((caddr_t)sb, sizeof (*sb)); splx(s); @@ -958,7 +742,7 @@ sosetopt(so, level, optname, m0) } bad: if (m) - /*(void) m_free(m);*/ + (void) m_free(m); return (error); } @@ -1039,8 +823,8 @@ sogetopt(so, level, optname, mp) } default: - (void)m_free(m); - return (ENOPROTOOPT); + (void)m_free(m); + return (ENOPROTOOPT); } *mp = m; return (0); @@ -1057,5 +841,5 @@ sohasoutofband(so) gsignal(-so->so_pgid, SIGURG); else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) psignal(p, SIGURG); - selwakeup(&so->so_rcv.sb_sel); + wakeup(so, NULL, 0); } diff --git a/reactos/drivers/lib/oskittcp/oskittcp/uipc_socket2.c b/reactos/drivers/lib/oskittcp/oskittcp/uipc_socket2.c index f1f7d74c819..6c0f86f8f60 100644 --- a/reactos/drivers/lib/oskittcp/oskittcp/uipc_socket2.c +++ b/reactos/drivers/lib/oskittcp/oskittcp/uipc_socket2.c @@ -111,20 +111,14 @@ soisconnected(so) so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); so->so_state |= SS_ISCONNECTED; if (head && soqremque(so, 0)) { - soqinsque(head, so, 1); - sorwakeup(head); - wakeup((caddr_t)&head->so_timeo); + soqinsque(head, so, 1); + sorwakeup(head); + wakeup(so, NULL, (caddr_t)&head->so_timeo); } else { - wakeup((caddr_t)&so->so_timeo); - sorwakeup(so); - sowwakeup(so); + wakeup(so, NULL, (caddr_t)&so->so_timeo); + sorwakeup(so); + socwakeup(so); } - - if( OtcpEvent.SocketConnectIndication ) - OtcpEvent.SocketConnectIndication - ( OtcpEvent.ClientData, - so, - so->so_connection ); } void @@ -134,7 +128,7 @@ soisdisconnecting(so) so->so_state &= ~SS_ISCONNECTING; so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); - wakeup((caddr_t)&so->so_timeo); + wakeup(so, NULL, (caddr_t)&so->so_timeo); sowwakeup(so); sorwakeup(so); } @@ -146,7 +140,7 @@ soisdisconnected(so) so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); - wakeup((caddr_t)&so->so_timeo); + wakeup(so, NULL, (caddr_t)&so->so_timeo); sowwakeup(so); sorwakeup(so); } @@ -194,7 +188,7 @@ sonewconn1(head, connstatus) } if (connstatus) { sorwakeup(head); - wakeup((caddr_t)&head->so_timeo); + wakeup(so, NULL, (caddr_t)&head->so_timeo); so->so_state |= connstatus; } return (so); @@ -327,23 +321,7 @@ sowakeup(so, sb) { struct proc *p; - selwakeup(&sb->sb_sel); -#ifndef OSKIT - /* - * in the OS Kit, we do not want notifications to stop - */ - sb->sb_flags &= ~SB_SEL; -#endif - if (sb->sb_flags & SB_WAIT) { - sb->sb_flags &= ~SB_WAIT; - wakeup((caddr_t)&sb->sb_cc); - } - if (so->so_state & SS_ASYNC) { - if (so->so_pgid < 0) - gsignal(-so->so_pgid, SIGIO); - else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) - psignal(p, SIGIO); - } + wakeup(so, &sb->sb_sel, (caddr_t)&sb->sb_cc); } /* @@ -383,7 +361,6 @@ soreserve(so, sndcc, rcvcc) register struct socket *so; u_long sndcc, rcvcc; { - if (sbreserve(&so->so_snd, sndcc) == 0) goto bad; if (sbreserve(&so->so_rcv, rcvcc) == 0) @@ -470,28 +447,47 @@ sbappend(so, sb, m) struct sockbuf *sb; struct mbuf *m; { - register struct mbuf *n; + register struct mbuf *n, *new_mbuf; + + free( malloc( 2 ) ); if (m == 0) return; + n = sb->sb_mb; - if (n) { - while (n->m_nextpkt) - n = n->m_nextpkt; - do { - if (n->m_flags & M_EOR) { - sbappendrecord(sb, m); /* XXXXXX!!!! */ - return; - } - } while (n->m_next && (n = n->m_next)); + OS_DbgPrint(OSK_MID_TRACE,("sbappendrecord: %x\n", n)); + + while( n && n->m_nextpkt ) n = n->m_nextpkt; + + new_mbuf = malloc( sizeof( *m ) + m->m_len ); + memset( new_mbuf, 0, sizeof( *m ) ); + + free( malloc( 2 ) ); + + new_mbuf->m_type = MT_DATA; + free( malloc( 2 ) ); + + new_mbuf->m_len = m->m_len; + free( malloc( 2 ) ); + + new_mbuf->m_data = ((caddr_t)new_mbuf) + sizeof(*new_mbuf); + free( malloc( 2 ) ); + + memcpy( new_mbuf->m_data, m->m_data, m->m_len ); + + free( malloc( 2 ) ); + + if( n ) { + n->m_nextpkt = new_mbuf; + OS_DbgPrint(OSK_MID_TRACE,("SK BUFF NEW: %x\n", n->m_nextpkt)); + } else { + sb->sb_mb = new_mbuf; + OS_DbgPrint(OSK_MID_TRACE, + ("SK BUFF HEAD: %x (new pkt %d bytes)\n", + sb->sb_mb, sb->sb_mb->m_len)); } - OskitDumpBuffer( m->m_data, m->m_len ); - if( OtcpEvent.SocketDataAvailable ) - OtcpEvent.SocketDataAvailable - ( OtcpEvent.ClientData, - so, - so->so_connection, - m->m_data, m->m_len ); + + free( malloc( 2 ) ); } #ifdef SOCKBUF_DEBUG @@ -539,6 +535,7 @@ sbappendrecord(sb, m0) * Put the first mbuf on the queue. * Note this permits zero length records. */ + sballoc(sb, m0); if (m) m->m_nextpkt = m0; @@ -615,7 +612,7 @@ sbappendaddr(sb, asa, m0, control) int space = /*asa->sa_len;*/ sizeof(struct sockaddr); if (m0 && (m0->m_flags & M_PKTHDR) == 0) -panic("sbappendaddr"); + panic("sbappendaddr"); if (m0) space += m0->m_pkthdr.len; for (n = control; n; n = n->m_next) { @@ -709,6 +706,8 @@ sbcompress(sb, m, n) (unsigned)m->m_len); n->m_len += m->m_len; sb->sb_cc += m->m_len; + OS_DbgPrint(OSK_MID_TRACE,("SB->SB_CC = %d\n", + sb->sb_cc)); m = m_free(m); continue; } @@ -771,6 +770,8 @@ sbdrop(sb, len) m->m_len -= len; m->m_data += len; sb->sb_cc -= len; + OS_DbgPrint(OSK_MID_TRACE,("SB->SB_CC = %d\n", + sb->sb_cc)); break; } len -= m->m_len; diff --git a/reactos/drivers/net/ndis/ndis/buffer.c b/reactos/drivers/net/ndis/ndis/buffer.c index 1fd334c1a39..fe9825a4f1c 100644 --- a/reactos/drivers/net/ndis/ndis/buffer.c +++ b/reactos/drivers/net/ndis/ndis/buffer.c @@ -52,7 +52,6 @@ __inline ULONG SkipToOffset( return Offset; } - UINT CopyBufferToBufferChain( PNDIS_BUFFER DstBuffer, UINT DstOffset, @@ -355,6 +354,18 @@ NdisAllocateBuffer( "VirtualAddress (0x%X) Length (%d)\n", Status, Buffer, PoolHandle, VirtualAddress, Length)); +#if 0 + Temp = Pool->FreeList; + while( Temp ) { + NDIS_DbgPrint(MID_TRACE,("Free buffer -> %x\n", Temp)); + Temp = Temp->Next; + } + + NDIS_DbgPrint(MID_TRACE,("|:. <- End free buffers")); +#endif + + if(!VirtualAddress && !Length) return; + KeAcquireSpinLock(&Pool->SpinLock, &OldIrql); if (Pool->FreeList) { @@ -370,13 +381,13 @@ NdisAllocateBuffer( Temp->Mdl.MdlFlags |= (MDL_SOURCE_IS_NONPAGED_POOL | MDL_ALLOCATED_FIXED_SIZE); Temp->Mdl.MappedSystemVa = VirtualAddress; #else - Temp->Mdl.Next = (PMDL)NULL; - Temp->Mdl.Size = (CSHORT)(sizeof(MDL) + - (ADDRESS_AND_SIZE_TO_SPAN_PAGES(VirtualAddress, Length) * sizeof(ULONG))); - Temp->Mdl.MdlFlags = (MDL_SOURCE_IS_NONPAGED_POOL | MDL_ALLOCATED_FIXED_SIZE); -; Temp->Mdl.StartVa = (PVOID)PAGE_ROUND_DOWN(VirtualAddress); - Temp->Mdl.ByteOffset = (ULONG_PTR)(VirtualAddress - PAGE_ROUND_DOWN(VirtualAddress)); - Temp->Mdl.ByteCount = Length; + Temp->Mdl.Next = (PMDL)NULL; + Temp->Mdl.Size = (CSHORT)(sizeof(MDL) + + (ADDRESS_AND_SIZE_TO_SPAN_PAGES(VirtualAddress, Length) * sizeof(ULONG))); + Temp->Mdl.MdlFlags = (MDL_SOURCE_IS_NONPAGED_POOL | MDL_ALLOCATED_FIXED_SIZE); + ; Temp->Mdl.StartVa = (PVOID)PAGE_ROUND_DOWN(VirtualAddress); + Temp->Mdl.ByteOffset = (ULONG_PTR)(VirtualAddress - PAGE_ROUND_DOWN(VirtualAddress)); + Temp->Mdl.ByteCount = Length; Temp->Mdl.MappedSystemVa = VirtualAddress; #if 0 //Temp->Mdl.Process = PsGetCurrentProcess(); @@ -392,6 +403,8 @@ NdisAllocateBuffer( } else { KeReleaseSpinLock(&Pool->SpinLock, OldIrql); *Status = NDIS_STATUS_FAILURE; + NDIS_DbgPrint(MID_TRACE, ("Can't get another packet.\n")); + KeBugCheck(0); } } @@ -429,10 +442,12 @@ NdisAllocateBufferPool( if (NumberOfDescriptors > 0) { Buffer = &Pool->Buffers[0]; + DbgPrint("NDIS BUFFER ADDRESS << %x >>\n", Buffer); Pool->FreeList = Buffer; for (i = 1; i < NumberOfDescriptors; i++) { Buffer->Next = &Pool->Buffers[i]; Buffer = Buffer->Next; + DbgPrint("NDIS BUFFER ADDRESS << %x >>\n", Buffer); } Buffer->Next = NULL; } else @@ -850,8 +865,8 @@ NdisFreeBuffer( Pool = Temp->BufferPool; KeAcquireSpinLock(&Pool->SpinLock, &OldIrql); - Buffer->Next = (PMDL)Pool->FreeList; - Pool->FreeList = (PNETWORK_HEADER)Buffer; + Temp->Next = (PNETWORK_HEADER)Pool->FreeList; + Pool->FreeList = (PNETWORK_HEADER)Temp; KeReleaseSpinLock(&Pool->SpinLock, OldIrql); } diff --git a/reactos/drivers/net/ndis/ndis/miniport.c b/reactos/drivers/net/ndis/ndis/miniport.c index 575396f4c87..bea89356f40 100644 --- a/reactos/drivers/net/ndis/ndis/miniport.c +++ b/reactos/drivers/net/ndis/ndis/miniport.c @@ -322,6 +322,8 @@ MiniEthReceiveComplete( NDIS_DbgPrint(DEBUG_MINIPORT, ("Called.\n")); + if( !Filter ) return; + Adapter = (PLOGICAL_ADAPTER)Filter->Miniport; NDIS_DbgPrint(MAX_TRACE, ("acquiring miniport block lock\n")); @@ -372,13 +374,15 @@ MiniEthReceiveIndication( * PacketSize = Total size of received packet */ { - MiniIndicateData((PLOGICAL_ADAPTER)Filter->Miniport, - MacReceiveContext, - HeaderBuffer, - HeaderBufferSize, - LookaheadBuffer, - LookaheadBufferSize, - PacketSize); + if( Filter ) { + MiniIndicateData((PLOGICAL_ADAPTER)Filter->Miniport, + MacReceiveContext, + HeaderBuffer, + HeaderBufferSize, + LookaheadBuffer, + LookaheadBufferSize, + PacketSize); + } } @@ -1555,6 +1559,7 @@ NdisIStartAdapter( NDIS_DbgPrint(MIN_TRACE, ("error: unsupported media\n")); ExFreePool(Adapter); ASSERT(FALSE); + KeReleaseSpinLock(&Adapter->NdisMiniportBlock.Lock, OldIrql); return; } diff --git a/reactos/drivers/net/ndis/ndis/protocol.c b/reactos/drivers/net/ndis/ndis/protocol.c index 6cc859ab264..8fbbd2cd50c 100644 --- a/reactos/drivers/net/ndis/ndis/protocol.c +++ b/reactos/drivers/net/ndis/ndis/protocol.c @@ -411,6 +411,7 @@ ProTransferData( /* XXX sd - why is that true? */ if (Packet == Adapter->LoopPacket) { + NDIS_DbgPrint(MAX_TRACE, ("LoopPacket\n")); /* NDIS is responsible for looping this packet */ NdisCopyFromPacketToPacket(Packet, ByteOffset, diff --git a/reactos/drivers/net/tcpip/datalink/arp.c b/reactos/drivers/net/tcpip/datalink/arp.c index 80f5f2c0e35..b66b63574c8 100644 --- a/reactos/drivers/net/tcpip/datalink/arp.c +++ b/reactos/drivers/net/tcpip/datalink/arp.c @@ -7,6 +7,7 @@ * REVISIONS: * CSH 01/08-2000 Created */ +#include #include #include #include @@ -43,42 +44,25 @@ PNDIS_PACKET PrepareARPPacket( */ { PNDIS_PACKET NdisPacket; - PNDIS_BUFFER NdisBuffer; NDIS_STATUS NdisStatus; PARP_HEADER Header; PVOID DataBuffer; - ULONG Size; + ULONG Size, Contig; TI_DbgPrint(DEBUG_ARP, ("Called.\n")); /* Prepare ARP packet */ - Size = MaxLLHeaderSize + sizeof(ARP_HEADER) + + Size = MaxLLHeaderSize + + sizeof(ARP_HEADER) + 2 * LinkAddressLength + /* Hardware address length */ 2 * ProtoAddressLength; /* Protocol address length */ Size = MAX(Size, MinLLFrameSize); - DataBuffer = ExAllocatePool(NonPagedPool, Size); - if (!DataBuffer) - return NULL; + NdisStatus = AllocatePacketWithBuffer( &NdisPacket, NULL, Size ); + if( !NT_SUCCESS(NdisStatus) ) return NULL; - /* Allocate NDIS packet */ - NdisAllocatePacket(&NdisStatus, &NdisPacket, GlobalPacketPool); - if (NdisStatus != NDIS_STATUS_SUCCESS) { - ExFreePool(DataBuffer); - return NULL; - } + GetDataPtr( NdisPacket, 0, &DataBuffer, &Contig ); - /* Allocate NDIS buffer for maximum link level header and ARP packet */ - NdisAllocateBuffer(&NdisStatus, &NdisBuffer, GlobalBufferPool, - DataBuffer, Size); - if (NdisStatus != NDIS_STATUS_SUCCESS) { - NdisFreePacket(NdisPacket); - ExFreePool(DataBuffer); - return NULL; - } - - /* Link NDIS buffer into packet */ - NdisChainBufferAtFront(NdisPacket, NdisBuffer); RtlZeroMemory(DataBuffer, Size); Header = (PARP_HEADER)((ULONG_PTR)DataBuffer + MaxLLHeaderSize); Header->HWType = HardwareType; @@ -126,7 +110,6 @@ VOID ARPTransmitComplete( */ { TI_DbgPrint(DEBUG_ARP, ("Called.\n")); - FreeNdisPacket(NdisPacket); } diff --git a/reactos/drivers/net/tcpip/datalink/lan.c b/reactos/drivers/net/tcpip/datalink/lan.c index fb84e4cdc58..b7639557e5d 100644 --- a/reactos/drivers/net/tcpip/datalink/lan.c +++ b/reactos/drivers/net/tcpip/datalink/lan.c @@ -7,6 +7,7 @@ * REVISIONS: * CSH 01/08-2000 Created */ +#include #include #include #include @@ -20,7 +21,6 @@ BOOLEAN ProtocolRegistered = FALSE; LIST_ENTRY AdapterListHead; KSPIN_LOCK AdapterListLock; - NDIS_STATUS NDISCall( PLAN_ADAPTER Adapter, NDIS_REQUEST_TYPE Type, @@ -73,72 +73,6 @@ NDIS_STATUS NDISCall( } -PNDIS_PACKET AllocateTDPacket( - PLAN_ADAPTER Adapter) -/* - * FUNCTION: Allocates an NDIS packet for NdisTransferData - * ARGUMENTS: - * Adapter = Pointer to LAN_ADAPTER structure - * RETURNS: - * Pointer to NDIS packet or NULL if there was not enough free - * non-paged memory - */ -{ - NDIS_STATUS NdisStatus; - PNDIS_PACKET NdisPacket; - PNDIS_BUFFER Buffer; - PVOID Data; - - NdisAllocatePacket(&NdisStatus, &NdisPacket, GlobalPacketPool); - if (NdisStatus != NDIS_STATUS_SUCCESS) - return NULL; - - Data = ExAllocatePool(NonPagedPool, Adapter->MTU); - if (!Data) { - NdisFreePacket(NdisPacket); - return NULL; - } - - NdisAllocateBuffer(&NdisStatus, - &Buffer, - GlobalBufferPool, - Data, - Adapter->MTU); - if (NdisStatus != NDIS_STATUS_SUCCESS) { - NdisFreePacket(NdisPacket); - ExFreePool(Data); - return NULL; - } - - NdisChainBufferAtFront(NdisPacket, Buffer); - - PC(NdisPacket)->Context = NULL; /* End of list */ - - return NdisPacket; -} - - -VOID FreeTDPackets( - PLAN_ADAPTER Adapter) -/* - * FUNCTION: Frees transfer data packets - * ARGUMENTS: - * Adapter = Pointer to LAN_ADAPTER structure - */ -{ - PNDIS_PACKET NdisPacket, Next; - - /* Release transfer data packets */ - NdisPacket = Adapter->TDPackets; - while (NdisPacket) { - Next = PC(NdisPacket)->Context; - FreeNdisPacket(NdisPacket); - NdisPacket = Next; - } - Adapter->TDPackets = NULL; -} - - VOID FreeAdapter( PLAN_ADAPTER Adapter) /* @@ -147,8 +81,7 @@ VOID FreeAdapter( * Adapter = Pointer to LAN_ADAPTER structure to free */ { - FreeTDPackets(Adapter); - ExFreePool(Adapter); + exFreePool(Adapter); } @@ -241,12 +174,11 @@ VOID STDCALL ProtocolSendComplete( * Status = Status of the operation */ { - PLAN_ADAPTER Adapter = (PLAN_ADAPTER)BindingContext; + PLAN_ADAPTER Adapter = (PLAN_ADAPTER)BindingContext; TI_DbgPrint(DEBUG_DATALINK, ("Called.\n")); AdjustPacket(Packet, Adapter->HeaderSize, PC(Packet)->DLOffset); - (*PC(Packet)->DLComplete)(Adapter->Context, Packet, Status); } @@ -285,28 +217,35 @@ VOID STDCALL ProtocolTransferDataComplete( &IPPacket.ContigSize, &IPPacket.TotalSize); + IPPacket.ContigSize = IPPacket.TotalSize = BytesTransferred; /* Determine which upper layer protocol that should receive this packet and pass it to the correct receive handler */ + + OskitDumpBuffer( IPPacket.Header, BytesTransferred ); + PacketType = ((PETH_HEADER)IPPacket.Header)->EType; + IPPacket.Header += MaxLLHeaderSize; + + TI_DbgPrint + (DEBUG_DATALINK, + ("Ether Type = %x ContigSize = %d Total = %d\n", + PacketType, IPPacket.ContigSize, IPPacket.TotalSize)); + switch (PacketType) { case ETYPE_IPv4: case ETYPE_IPv6: + TI_DbgPrint(MID_TRACE,("Received IP Packet\n")); IPReceive(Adapter->Context, &IPPacket); break; case ETYPE_ARP: + TI_DbgPrint(MID_TRACE,("Received ARP Packet\n")); ARPReceive(Adapter->Context, &IPPacket); default: break; } } - /* Release the packet descriptor */ - KeAcquireSpinLockAtDpcLevel(&Adapter->Lock); - - PC(Packet)->Context = Adapter->TDPackets; - Adapter->TDPackets = Packet; - - KeReleaseSpinLockFromDpcLevel(&Adapter->Lock); + FreeNdisPacket( Packet ); } @@ -333,8 +272,10 @@ NDIS_STATUS STDCALL ProtocolReceive( */ { USHORT EType; - UINT PacketType; + UINT PacketType, BytesTransferred; IP_PACKET IPPacket; + PCHAR BufferData; + NDIS_STATUS NdisStatus; PNDIS_PACKET NdisPacket; PNDIS_BUFFER NdisBuffer; PLAN_ADAPTER Adapter = (PLAN_ADAPTER)BindingContext; @@ -374,22 +315,13 @@ NDIS_STATUS STDCALL ProtocolReceive( /* Get a transfer data packet */ KeAcquireSpinLockAtDpcLevel(&Adapter->Lock); + NdisStatus = AllocatePacketWithBuffer( &NdisPacket, NULL, Adapter->MTU ); + if( NdisStatus != NDIS_STATUS_SUCCESS ) return NDIS_STATUS_NOT_ACCEPTED; + GetDataPtr( NdisPacket, 0, &BufferData, &PacketSize ); - NdisPacket = Adapter->TDPackets; - if (NdisPacket == (PNDIS_PACKET)NULL) { - TI_DbgPrint(DEBUG_DATALINK, ("No available packet descriptors.\n")); - /* We don't have a free packet descriptor. Drop the packet */ - KeReleaseSpinLockFromDpcLevel(&Adapter->Lock); - return NDIS_STATUS_SUCCESS; - } - Adapter->TDPackets = PC(NdisPacket)->Context; - - KeReleaseSpinLockFromDpcLevel(&Adapter->Lock); - + IPPacket.NdisPacket = NdisPacket; + if (LookaheadBufferSize < PacketSize) { - NDIS_STATUS NdisStatus; - UINT BytesTransferred; - /* Get the data */ NdisTransferData(&NdisStatus, Adapter->NdisHandle, @@ -398,45 +330,23 @@ NDIS_STATUS STDCALL ProtocolReceive( PacketSize, NdisPacket, &BytesTransferred); - if (NdisStatus != NDIS_STATUS_PENDING) - ProtocolTransferDataComplete(BindingContext, - NdisPacket, - NdisStatus, - BytesTransferred); - - return NDIS_STATUS_SUCCESS; + } else { + NdisStatus = NDIS_STATUS_SUCCESS; + BytesTransferred = PacketSize; + RtlCopyMemory(BufferData, + HeaderBuffer, + HeaderBufferSize); + RtlCopyMemory(BufferData + HeaderBufferSize, + LookaheadBuffer, LookaheadBufferSize); } - /* We got all the data in the lookahead buffer */ - - IPPacket.NdisPacket = NdisPacket; - - NdisGetFirstBufferFromPacket(NdisPacket, - &NdisBuffer, - &IPPacket.Header, - &IPPacket.ContigSize, - &IPPacket.TotalSize); - - RtlCopyMemory(IPPacket.Header, LookaheadBuffer, PacketSize); - - switch (PacketType) { - case ETYPE_IPv4: - case ETYPE_IPv6: - IPReceive(Adapter->Context, &IPPacket); - break; - case ETYPE_ARP: - ARPReceive(Adapter->Context, &IPPacket); - break; - default: - break; - } + if (NdisStatus != NDIS_STATUS_PENDING) + ProtocolTransferDataComplete(BindingContext, + NdisPacket, + NdisStatus, + BytesTransferred); /* Release the packet descriptor */ - KeAcquireSpinLockAtDpcLevel(&Adapter->Lock); - - PC(NdisPacket)->Context = Adapter->TDPackets; - Adapter->TDPackets = NdisPacket; - KeReleaseSpinLockFromDpcLevel(&Adapter->Lock); return NDIS_STATUS_SUCCESS; @@ -537,7 +447,7 @@ VOID LANTransmit( area so it can be undone before we release the packet */ Data = AdjustPacket(NdisPacket, Offset, Adapter->HeaderSize); PC(NdisPacket)->DLOffset = Offset; - + if (Adapter->State == LAN_STATE_STARTED) { switch (Adapter->Media) { case NdisMedium802_3: @@ -633,7 +543,7 @@ static NTSTATUS ReadIPAddressFromRegistry( HANDLE RegHandle, if(!AnsiLen) return STATUS_NO_MEMORY; - AnsiAddress.Buffer = ExAllocatePoolWithTag(PagedPool, AnsiLen, 0x01020304); + AnsiAddress.Buffer = exAllocatePoolWithTag(PagedPool, AnsiLen, 0x01020304); if(!AnsiAddress.Buffer) return STATUS_NO_MEMORY; @@ -642,14 +552,14 @@ static NTSTATUS ReadIPAddressFromRegistry( HANDLE RegHandle, Status = RtlUnicodeStringToAnsiString(&AnsiAddress, &UnicodeAddress, FALSE); if (!NT_SUCCESS(Status)) { - ExFreePool(AnsiAddress.Buffer); + exFreePool(AnsiAddress.Buffer); return STATUS_UNSUCCESSFUL; } AnsiAddress.Buffer[AnsiAddress.Length] = 0; *Address = AddrBuildIPv4(inet_addr(AnsiAddress.Buffer)); if (!Address) { - ExFreePool(AnsiAddress.Buffer); + exFreePool(AnsiAddress.Buffer); return STATUS_UNSUCCESSFUL; } @@ -693,20 +603,6 @@ VOID BindAdapter( return; } - /* Allocate packets for NdisTransferData */ - /* FIXME: How many should we allocate? */ - Adapter->TDPackets = NULL; - for (i = 0; i < 2; i++) { - Packet = AllocateTDPacket(Adapter); - if (!Packet) { - TI_DbgPrint(MIN_TRACE, ("Insufficient resources.\n")); - FreeTDPackets(Adapter); - return; - } - PC(Packet)->Context = Adapter->TDPackets; - Adapter->TDPackets = Packet; - } - /* Bind the adapter to IP layer */ BindInfo.Context = Adapter; BindInfo.HeaderSize = Adapter->HeaderSize; @@ -717,9 +613,9 @@ VOID BindAdapter( BindInfo.Transmit = LANTransmit; IF = IPCreateInterface(&BindInfo); + if (!IF) { TI_DbgPrint(MIN_TRACE, ("Insufficient resources.\n")); - FreeTDPackets(Adapter); return; } @@ -750,7 +646,6 @@ VOID BindAdapter( ZwClose(RegHandle); if(Address) Address->Free(Address); if(Netmask) Netmask->Free(Netmask); - FreeTDPackets(Adapter); IPDestroyInterface(IF); return; } @@ -769,7 +664,6 @@ VOID BindAdapter( if (!IPCreateNTE(IF, Address, AddrCountPrefixBits(Netmask))) { Netmask->Free(Netmask); TI_DbgPrint(MIN_TRACE, ("IPCreateNTE() failed.\n")); - FreeTDPackets(Adapter); IPDestroyInterface(IF); return; } @@ -791,13 +685,11 @@ VOID BindAdapter( sizeof(UINT)); if (NdisStatus != NDIS_STATUS_SUCCESS) { TI_DbgPrint(MID_TRACE, ("Could not set packet filter (0x%X).\n", NdisStatus)); - FreeTDPackets(Adapter); IPDestroyInterface(IF); return; } Adapter->Context = IF; - Adapter->State = LAN_STATE_STARTED; } @@ -818,9 +710,6 @@ VOID UnbindAdapter( IPUnregisterInterface(IF); IPDestroyInterface(IF); - - /* Free transfer data packets */ - FreeTDPackets(Adapter); } } @@ -848,7 +737,7 @@ NDIS_STATUS LANRegisterAdapter( TI_DbgPrint(DEBUG_DATALINK, ("Called.\n")); - IF = ExAllocatePool(NonPagedPool, sizeof(LAN_ADAPTER)); + IF = exAllocatePool(NonPagedPool, sizeof(LAN_ADAPTER)); if (!IF) { TI_DbgPrint(MIN_TRACE, ("Insufficient resources.\n")); return NDIS_STATUS_RESOURCES; @@ -885,7 +774,7 @@ NDIS_STATUS LANRegisterAdapter( if (NdisStatus == NDIS_STATUS_PENDING) KeWaitForSingleObject(&IF->Event, UserRequest, KernelMode, FALSE, NULL); else if (NdisStatus != NDIS_STATUS_SUCCESS) { - ExFreePool(IF); + exFreePool(IF); return NdisStatus; } @@ -910,7 +799,7 @@ NDIS_STATUS LANRegisterAdapter( default: /* Unsupported media */ TI_DbgPrint(MIN_TRACE, ("Unsupported media.\n")); - ExFreePool(IF); + exFreePool(IF); return NDIS_STATUS_NOT_SUPPORTED; } @@ -921,7 +810,7 @@ NDIS_STATUS LANRegisterAdapter( &IF->MTU, sizeof(UINT)); if (NdisStatus != NDIS_STATUS_SUCCESS) { - ExFreePool(IF); + exFreePool(IF); return NdisStatus; } @@ -933,7 +822,7 @@ NDIS_STATUS LANRegisterAdapter( sizeof(UINT)); if (NdisStatus != NDIS_STATUS_SUCCESS) { TI_DbgPrint(MIN_TRACE, ("Query for maximum packet size failed.\n")); - ExFreePool(IF); + exFreePool(IF); return NdisStatus; } @@ -956,7 +845,7 @@ NDIS_STATUS LANRegisterAdapter( IF->HWAddressLength); if (NdisStatus != NDIS_STATUS_SUCCESS) { TI_DbgPrint(MIN_TRACE, ("Query for current hardware address failed.\n")); - ExFreePool(IF); + exFreePool(IF); return NdisStatus; } @@ -968,7 +857,7 @@ NDIS_STATUS LANRegisterAdapter( sizeof(UINT)); if (NdisStatus != NDIS_STATUS_SUCCESS) { TI_DbgPrint(MIN_TRACE, ("Query for maximum link speed failed.\n")); - ExFreePool(IF); + exFreePool(IF); return NdisStatus; } diff --git a/reactos/drivers/net/tcpip/datalink/loopback.c b/reactos/drivers/net/tcpip/datalink/loopback.c index beaca01f08b..cf0f72d3458 100644 --- a/reactos/drivers/net/tcpip/datalink/loopback.c +++ b/reactos/drivers/net/tcpip/datalink/loopback.c @@ -7,6 +7,7 @@ * REVISIONS: * CSH 01/08-2000 Created */ +#include #include #include #include @@ -169,6 +170,7 @@ NDIS_STATUS LoopRegisterAdapter( BindInfo.Transmit = LoopTransmit; Loopback = IPCreateInterface(&BindInfo); + if ((Loopback != NULL) && (IPCreateNTE(Loopback, Address, 8))) { /* Reference the interface for the NTE. The reference for diff --git a/reactos/drivers/net/tcpip/include/datagram.h b/reactos/drivers/net/tcpip/include/datagram.h index f2cc413064c..3d11136fbc9 100644 --- a/reactos/drivers/net/tcpip/include/datagram.h +++ b/reactos/drivers/net/tcpip/include/datagram.h @@ -35,9 +35,7 @@ NTSTATUS DGTransmit( NTSTATUS DGSendDatagram( PTDI_REQUEST Request, PTDI_CONNECTION_INFORMATION ConnInfo, - PNDIS_BUFFER Buffer, - ULONG DataSize, - DATAGRAM_BUILD_ROUTINE Build); + PIP_PACKET Packet); NTSTATUS DGReceiveDatagram( PTDI_REQUEST Request, diff --git a/reactos/drivers/net/tcpip/include/debug.h b/reactos/drivers/net/tcpip/include/debug.h index 83ad8c95301..fffbe2c7781 100644 --- a/reactos/drivers/net/tcpip/include/debug.h +++ b/reactos/drivers/net/tcpip/include/debug.h @@ -57,6 +57,7 @@ extern DWORD DebugTraceLevel; #endif /* _MSC_VER */ +#if 0 #ifdef ASSERT #undef ASSERT #endif @@ -66,6 +67,7 @@ extern DWORD DebugTraceLevel; #else /* NASSERT */ #define ASSERT(x) if (!(x)) { TI_DbgPrint(MIN_TRACE, ("Assertion "#x" failed at %s:%d\n", __FILE__, __LINE__)); KeBugCheck(0); } #endif /* NASSERT */ +#endif #define ASSERT_IRQL(x) ASSERT(KeGetCurrentIrql() <= (x)) @@ -73,8 +75,10 @@ extern DWORD DebugTraceLevel; #define TI_DbgPrint(_t_, _x_) +#if 0 #define ASSERT_IRQL(x) #define ASSERT(x) +#endif #endif /* DBG */ @@ -103,6 +107,8 @@ extern DWORD DebugTraceLevel; #define CP CHECKPOINT +#include + #endif /* __DEBUG_H */ /* EOF */ diff --git a/reactos/drivers/net/tcpip/include/info.h b/reactos/drivers/net/tcpip/include/info.h index 2ead71a6e88..bb14a1d72a8 100644 --- a/reactos/drivers/net/tcpip/include/info.h +++ b/reactos/drivers/net/tcpip/include/info.h @@ -48,7 +48,7 @@ typedef struct IPADDR_ENTRY { typedef struct IPROUTE_ENTRY { ULONG Dest; - ULONG Index; //matches if_index in IFEntry and iae_index in IPAddrEntry + ULONG Index; //matches if_index in IFEntry and iae_index in IPAddrEntry ULONG Metric1; ULONG Metric2; ULONG Metric3; @@ -87,13 +87,13 @@ typedef struct IFENTRY { #define IP_MIB_STATS_ID 1 #define IF_MIB_STATS_ID 1 + +#ifndef IP_MIB_ROUTETABLE_ENTRY_ID +#define IP_MIB_ROUTETABLE_ENTRY_ID 0x101 +#endif #ifndef IP_MIB_ADDRTABLE_ENTRY_ID #define IP_MIB_ADDRTABLE_ENTRY_ID 0x102 #endif -#ifndef IP_MIB_ROUTETABLE_ENTRY_ID -#define IP_MIB_ROUTETABLE_ENTRY_ID 0x103 -#endif - #ifndef MAX_PHYSADDR_SIZE #define MAX_PHYSADDR_SIZE 8 #endif @@ -137,6 +137,59 @@ TDI_STATUS InfoTdiSetInformationEx( PVOID Buffer, UINT BufferSize); +/* Network layer info functions */ +TDI_STATUS InfoNetworkLayerTdiQueryEx( UINT InfoClass, + UINT InfoType, + UINT InfoId, + PVOID Context, + TDIEntityID *id, + PNDIS_BUFFER Buffer, + PUINT BufferSize ); + +TDI_STATUS InfoNetworkLayerTdiSetEx( UINT InfoClass, + UINT InfoType, + UINT InfoId, + PVOID Context, + TDIEntityID *id, + PCHAR Buffer, + UINT BufferSize ); + +TDI_STATUS InfoTransportLayerTdiQueryEx( UINT InfoClass, + UINT InfoType, + UINT InfoId, + PVOID Context, + TDIEntityID *id, + PNDIS_BUFFER Buffer, + PUINT BufferSize ); + +TDI_STATUS InfoTransportLayerTdiSetEx( UINT InfoClass, + UINT InfoType, + UINT InfoId, + PVOID Context, + TDIEntityID *id, + PCHAR Buffer, + UINT BufferSize ); + +TDI_STATUS InfoInterfaceTdiQueryEx( UINT InfoClass, + UINT InfoType, + UINT InfoId, + PVOID Context, + TDIEntityID *id, + PNDIS_BUFFER Buffer, + PUINT BufferSize ); + +TDI_STATUS InfoInterfaceTdiSetEx( UINT InfoClass, + UINT InfoType, + UINT InfoId, + PVOID Context, + TDIEntityID *id, + PCHAR Buffer, + UINT BufferSize ); + +/* Insert and remove interface entities */ +VOID InsertTDIInterfaceEntity( PIP_INTERFACE Interface ); +VOID RemoveTDIInterfaceEntity( PIP_INTERFACE Interface ); + #endif /* __INFO_H */ /* EOF */ diff --git a/reactos/drivers/net/tcpip/include/interface.h b/reactos/drivers/net/tcpip/include/interface.h new file mode 100644 index 00000000000..7269208438d --- /dev/null +++ b/reactos/drivers/net/tcpip/include/interface.h @@ -0,0 +1,16 @@ +#ifndef _TCPIP_INTERFACE_H +#define _TCPIP_INTERFACE_H + +#include + +NTSTATUS GetInterfaceIPv4Address( PIP_INTERFACE Interface, + ULONG Type, + PULONG Address ); + +UINT CountInterfaces(); + +NTSTATUS GetInterfaceSpeed( PIP_INTERFACE Interface, PUINT Speed ); +NTSTATUS GetInterfaceName( PIP_INTERFACE Interface, PCHAR NameBuffer, + UINT NameMaxLen ); + +#endif//_TCPIP_INTERFACE_H diff --git a/reactos/drivers/net/tcpip/include/ip.h b/reactos/drivers/net/tcpip/include/ip.h index 6c2b7190861..70716677e06 100644 --- a/reactos/drivers/net/tcpip/include/ip.h +++ b/reactos/drivers/net/tcpip/include/ip.h @@ -9,6 +9,7 @@ typedef VOID (*OBJECT_FREE_ROUTINE)(PVOID Object); +#define FOURCC(a,b,c,d) (((a)<<24)|((b)<<16)|((c)<<8)|(d)) /* Raw IPv4 style address */ typedef ULONG IPv4_RAW_ADDRESS; @@ -93,7 +94,6 @@ typedef struct _PACKET_CONTEXT { /* The ProtocolReserved field is structured as a PACKET_CONTEXT */ #define PC(Packet) ((PPACKET_CONTEXT)(&Packet->ProtocolReserved)) - /* Address information a.k.a ADE */ typedef struct _ADDRESS_ENTRY { DEFINE_TAG @@ -105,10 +105,12 @@ typedef struct _ADDRESS_ENTRY { PIP_ADDRESS Address; /* Pointer to address identifying this entry */ } ADDRESS_ENTRY, *PADDRESS_ENTRY; -/* Values for address type */ -#define ADE_UNICAST 0x01 -#define ADE_MULTICAST 0x02 -#define ADE_ADDRMASK 0x03 +/* Values for address type -- also the interface flags */ +/* These values are mean to overlap meaningfully with the BSD ones */ +#define ADE_UNICAST 0x01 +#define ADE_MULTICAST 0x02 +#define ADE_ADDRMASK 0x04 +#define ADE_POINTOPOINT 0x10 /* There is one NTE for each source (unicast) address assigned to an interface */ typedef struct _NET_TABLE_ENTRY { @@ -162,17 +164,6 @@ typedef struct _IP_INTERFACE { } IP_INTERFACE, *PIP_INTERFACE; -/* Prefix List Entry */ -typedef struct _PREFIX_LIST_ENTRY { - DEFINE_TAG - LIST_ENTRY ListEntry; /* Entry on list */ - ULONG RefCount; /* Reference count */ - PIP_INTERFACE Interface; /* Pointer to interface */ - PIP_ADDRESS Prefix; /* Pointer to prefix */ - UINT PrefixLength; /* Length of prefix */ -} PREFIX_LIST_ENTRY, *PPREFIX_LIST_ENTRY; - - #define IP_PROTOCOL_TABLE_SIZE 0x100 typedef VOID (*IP_PROTOCOL_HANDLER)( @@ -202,8 +193,6 @@ extern LIST_ENTRY InterfaceListHead; extern KSPIN_LOCK InterfaceListLock; extern LIST_ENTRY NetTableListHead; extern KSPIN_LOCK NetTableListLock; -extern LIST_ENTRY PrefixListHead; -extern KSPIN_LOCK PrefixListLock; extern UINT MaxLLHeaderSize; extern UINT MinLLFrameSize; diff --git a/reactos/drivers/net/tcpip/include/lan.h b/reactos/drivers/net/tcpip/include/lan.h index 7fe2631ea50..fc59398c09f 100644 --- a/reactos/drivers/net/tcpip/include/lan.h +++ b/reactos/drivers/net/tcpip/include/lan.h @@ -56,7 +56,6 @@ typedef struct LAN_ADAPTER { UINT MacOptions; /* MAC options for NIC driver/adapter */ UINT Speed; /* Link speed */ UINT PacketFilter; /* Packet filter for this adapter */ - PNDIS_PACKET TDPackets; /* Transfer Data packets */ } LAN_ADAPTER, *PLAN_ADAPTER; /* LAN adapter state constants */ diff --git a/reactos/drivers/net/tcpip/include/memtrack.h b/reactos/drivers/net/tcpip/include/memtrack.h new file mode 100644 index 00000000000..bdffba6eb7d --- /dev/null +++ b/reactos/drivers/net/tcpip/include/memtrack.h @@ -0,0 +1,74 @@ +#ifndef MEMTRACK_H +#define MEMTRACK_H + +#ifndef FOURCC +#define FOURCC(a,b,c,d) (((a)<<24)|((b)<<16)|((c)<<8)|(d)) +#endif + +#define FBSD_MALLOC FOURCC('d','s','b','f') +#define EXALLOC_TAG FOURCC('E','x','A','l') + +#define AllocatePacketWithBuffer(x,y,z) AllocatePacketWithBufferX(x,y,z,__FILE__,__LINE__) +#define FreeNdisPacket(x) FreeNdisPacketX(x,__FILE__,__LINE__) + +#ifdef MEMTRACK +#define MTMARK() TrackDumpFL(__FILE__, __LINE__) +#define NdisAllocateBuffer(x,y,z,a,b) { \ + NdisAllocateBuffer(x,y,z,a,b); \ + if( *x == NDIS_STATUS_SUCCESS ) { \ + Track(NDIS_BUFFER_TAG, *y); \ + } \ +} +#define NdisAllocatePacket(x,y,z) { \ + NdisAllocatePacket(x,y,z); \ + if( *x == NDIS_STATUS_SUCCESS ) { \ + Track(NDIS_PACKET_TAG, *y); \ + } \ +} +#define NdisFreePacket(x) { Untrack(x); NdisFreePacket(x); } +#define NdisFreeBuffer(x) { Untrack(x); NdisFreeBuffer(x); } +#define exAllocatePool(x,y) ExAllocatePoolX(x,y,__FILE__,__LINE__) +#define exAllocatePoolWithTag(x,y,z) ExAllocatePoolX(x,y,__FILE__,__LINE__) +#define exFreePool(x) ExFreePoolX(x,__FILE__,__LINE__) + +extern LIST_ENTRY AllocatedObjectsHead; +extern KSPIN_LOCK AllocatedObjectsLock; + +typedef struct _ALLOCATION_TRACKER { + LIST_ENTRY Entry; + DWORD Tag; + PVOID Thing; + PCHAR FileName; + DWORD LineNo; +} ALLOCATION_TRACKER, *PALLOCATION_TRACKER; + +VOID TrackingInit(); +VOID TrackWithTag( DWORD Tag, PVOID Thing, PCHAR File, DWORD Line ); +#define Track(Tag,Thing) TrackWithTag(Tag,Thing,__FILE__,__LINE__) +VOID UntrackFL( PCHAR File, DWORD Line, PVOID Thing ); +#define Untrack(Thing) UntrackFL(__FILE__,__LINE__,Thing) +VOID TrackDumpFL( PCHAR File, DWORD Line ); +#define TrackDump() TrackDumpFL(__FILE__,__LINE__) +VOID TrackTag( DWORD Tag ); + +static inline PVOID ExAllocatePoolX( POOL_TYPE type, SIZE_T size, PCHAR File, ULONG Line ) { + PVOID Out = ExAllocatePool( type, size ); + if( Out ) TrackWithTag( EXALLOC_TAG, Out, File, Line ); + return Out; +} +static inline VOID ExFreePoolX( PVOID Data, PCHAR File, ULONG Line ) { + UntrackFL(File, Line, Data); + ExFreePool(Data); +} + +#define MEMTRACK_MAX_TAGS_TO_TRACK 64 +#else +#define MTMARK() +#define Track(x,y) +#define TrackingInit() +#define TrackDump() +#define Untrack(x) +#define TrackTag(x) +#endif + +#endif/*MEMMTRAC_H*/ diff --git a/reactos/drivers/net/tcpip/include/prefix.h b/reactos/drivers/net/tcpip/include/prefix.h new file mode 100644 index 00000000000..21eb7544019 --- /dev/null +++ b/reactos/drivers/net/tcpip/include/prefix.h @@ -0,0 +1,28 @@ +/* + * COPYRIGHT: See COPYING in the top level directory + * PROJECT: ReactOS TCP/IP protocol driver + * FILE: include/info.h + * PURPOSE: TdiQueryInformation definitions + */ +#ifndef __PREFIX_H +#define __PREFIX_H + +/* Prefix List Entry */ +typedef struct _PREFIX_LIST_ENTRY { + DEFINE_TAG + LIST_ENTRY ListEntry; /* Entry on list */ + ULONG RefCount; /* Reference count */ + PIP_INTERFACE Interface; /* Pointer to interface */ + PIP_ADDRESS Prefix; /* Pointer to prefix */ + UINT PrefixLength; /* Length of prefix */ +} PREFIX_LIST_ENTRY, *PPREFIX_LIST_ENTRY; + +extern LIST_ENTRY PrefixListHead; +extern KSPIN_LOCK PrefixListLock; + +VOID InitPLE(); +PPREFIX_LIST_ENTRY CreatePLE(PIP_INTERFACE IF, PIP_ADDRESS Prefix, UINT Len); +VOID DestroyPLE(PPREFIX_LIST_ENTRY PLE); +VOID DestroyPLEs(); + +#endif/*__PREFIX_H*/ diff --git a/reactos/drivers/net/tcpip/include/router.h b/reactos/drivers/net/tcpip/include/router.h index d6157ba8410..117d2133af6 100644 --- a/reactos/drivers/net/tcpip/include/router.h +++ b/reactos/drivers/net/tcpip/include/router.h @@ -17,7 +17,6 @@ typedef struct _FIB_ENTRY { OBJECT_FREE_ROUTINE Free; /* Routine used to free resources for the object */ PIP_ADDRESS NetworkAddress; /* Address of network */ PIP_ADDRESS Netmask; /* Netmask of network */ - PNET_TABLE_ENTRY NTE; /* Pointer to NTE to use */ PNEIGHBOR_CACHE_ENTRY Router; /* Pointer to NCE of router to use */ UINT Metric; /* Cost of this route */ } FIB_ENTRY, *PFIB_ENTRY; @@ -34,7 +33,6 @@ PIP_INTERFACE RouterFindOnLinkInterface( PFIB_ENTRY RouterAddRoute( PIP_ADDRESS NetworkAddress, PIP_ADDRESS Netmask, - PNET_TABLE_ENTRY NTE, PNEIGHBOR_CACHE_ENTRY Router, UINT Metric); @@ -58,6 +56,10 @@ NTSTATUS RouterStartup( NTSTATUS RouterShutdown( VOID); +UINT CountFIBs(); + +UINT CopyFIBs( PFIB_ENTRY Target ); + #endif /* __ROUTER_H */ /* EOF */ diff --git a/reactos/drivers/net/tcpip/include/routines.h b/reactos/drivers/net/tcpip/include/routines.h index 350309a797f..2c546f5247c 100644 --- a/reactos/drivers/net/tcpip/include/routines.h +++ b/reactos/drivers/net/tcpip/include/routines.h @@ -8,25 +8,7 @@ #define __ROUTINES_H -inline NTSTATUS BuildDatagramSendRequest( - PDATAGRAM_SEND_REQUEST *SendRequest, - PIP_ADDRESS RemoteAddress, - USHORT RemotePort, - PNDIS_BUFFER Buffer, - DWORD BufferSize, - DATAGRAM_COMPLETION_ROUTINE Complete, - PVOID Context, - DATAGRAM_BUILD_ROUTINE Build, - ULONG Flags); - -inline NTSTATUS BuildTCPSendRequest( - PTCP_SEND_REQUEST *SendRequest, - DATAGRAM_COMPLETION_ROUTINE Complete, - PVOID Context, - PVOID ProtocolContext); - -UINT Random( - VOID); +UINT Random(VOID); UINT CopyBufferToBufferChain( PNDIS_BUFFER DstBuffer, @@ -53,8 +35,10 @@ UINT CopyPacketToBufferChain( UINT SrcOffset, UINT Length); -VOID FreeNdisPacket( - PNDIS_PACKET Packet); +VOID FreeNdisPacketX( + PNDIS_PACKET Packet, + PCHAR File, + UINT Line); PVOID AdjustPacket( PNDIS_PACKET Packet, diff --git a/reactos/drivers/net/tcpip/include/tcp.h b/reactos/drivers/net/tcpip/include/tcp.h index 949dee73950..c71391ba8ae 100644 --- a/reactos/drivers/net/tcpip/include/tcp.h +++ b/reactos/drivers/net/tcpip/include/tcp.h @@ -35,6 +35,9 @@ typedef struct TCPv4_HEADER { #define TCPOPTLEN_MAX_SEG_SIZE 0x4 +/* Data offset; 32-bit words (leftmost 4 bits); convert to bytes */ +#define TCP_DATA_OFFSET(DataOffset)(((DataOffset) & 0xF0) >> (4-2)) + /* TCPv4 pseudo header */ typedef struct TCPv4_PSEUDO_HEADER { @@ -70,10 +73,9 @@ typedef struct TCPv4_PSEUDO_HEADER { #define SRF_SYN TCP_SYN #define SRF_FIN TCP_FIN - PTCP_SEGMENT TCPCreateSegment( PIP_PACKET IPPacket, - ULONG SequenceNumber, + PTCPv4_HEADER TCPHeader, ULONG SegmentLength); VOID TCPFreeSegment( @@ -81,25 +83,8 @@ VOID TCPFreeSegment( VOID TCPAddSegment( PCONNECTION_ENDPOINT Connection, - PTCP_SEGMENT Segment); - -inline NTSTATUS TCPBuildSendRequest( - PTCP_SEND_REQUEST *SendRequest, - PDATAGRAM_SEND_REQUEST *DGSendRequest, - PCONNECTION_ENDPOINT Connection, - DATAGRAM_COMPLETION_ROUTINE Complete, - PVOID Context, - PNDIS_BUFFER Buffer, - DWORD BufferSize, - ULONG Flags); - -inline NTSTATUS TCPBuildAndTransmitSendRequest( - PCONNECTION_ENDPOINT Connection, - DATAGRAM_COMPLETION_ROUTINE Complete, - PVOID Context, - PNDIS_BUFFER Buffer, - DWORD BufferSize, - ULONG Flags); + PTCP_SEGMENT Segment, + PULONG Acknowledged); NTSTATUS TCPConnect( PTDI_REQUEST Request, @@ -108,10 +93,16 @@ NTSTATUS TCPConnect( NTSTATUS TCPListen( PTDI_REQUEST Request, - PTDI_CONNECTION_INFORMATION ConnInfo, - PTDI_CONNECTION_INFORMATION ReturnInfo); + UINT Backlog ); -NTSTATUS TCPSendDatagram( +NTSTATUS TCPReceiveData( + PTDI_REQUEST Request, + PNDIS_BUFFER Buffer, + ULONG ReceiveLength, + ULONG ReceiveFlags, + PULONG BytesReceived); + +NTSTATUS TCPSendData( PTDI_REQUEST Request, PTDI_CONNECTION_INFORMATION ConnInfo, PNDIS_BUFFER Buffer, diff --git a/reactos/drivers/net/tcpip/include/tcpip.h b/reactos/drivers/net/tcpip/include/tcpip.h index 67a0d16c2e8..1d658c9f9bc 100644 --- a/reactos/drivers/net/tcpip/include/tcpip.h +++ b/reactos/drivers/net/tcpip/include/tcpip.h @@ -63,6 +63,8 @@ #define MAX(value1, value2) \ ((value1 > value2)? value1 : value2) +#define NDIS_BUFFER_TAG FOURCC('n','b','u','f') +#define NDIS_PACKET_TAG FOURCC('n','p','k','t') #ifdef i386 @@ -110,6 +112,34 @@ #endif /* i386 */ +typedef TDI_STATUS (*InfoRequest_f)( UINT InfoClass, + UINT InfoType, + UINT InfoId, + PVOID Context, + TDIEntityID *id, + PNDIS_BUFFER Buffer, + PUINT BufferSize ); + +typedef TDI_STATUS (*InfoSet_f)( UINT InfoClass, + UINT InfoType, + UINT InfoId, + PVOID Context, + TDIEntityID *id, + PCHAR Buffer, + UINT BufferSize ); + +/* Sufficient information to manage the entity list */ +typedef struct { + UINT tei_entity; + UINT tei_instance; + PVOID context; + InfoRequest_f info_req; + InfoSet_f info_set; +} TDIEntityInfo; + +#ifndef htons +#define htons(x) (((x) << 16) | (((x) >> 8) & 0xff)) +#endif /* Global variable */ extern PDEVICE_OBJECT TCPDeviceObject; @@ -123,7 +153,7 @@ extern KSPIN_LOCK AddressFileListLock; extern NDIS_HANDLE GlobalPacketPool; extern NDIS_HANDLE GlobalBufferPool; extern KSPIN_LOCK EntityListLock; -extern TDIEntityID *EntityList; +extern TDIEntityInfo *EntityList; extern ULONG EntityCount; extern ULONG EntityMax; extern UDP_STATISTICS UDPStats; diff --git a/reactos/drivers/net/tcpip/include/titypes.h b/reactos/drivers/net/tcpip/include/titypes.h index b2cbfc3b90c..46955a231a9 100644 --- a/reactos/drivers/net/tcpip/include/titypes.h +++ b/reactos/drivers/net/tcpip/include/titypes.h @@ -65,7 +65,7 @@ #else /* DBG */ #define DEFINE_TAG -#define INIT_TAG (Object, Tag) +#define INIT_TAG(Object, Tag) /* * VOID ReferenceObject( @@ -127,17 +127,18 @@ typedef NTSTATUS (*DATAGRAM_BUILD_ROUTINE)( PIP_PACKET *IPPacket); typedef struct _DATAGRAM_SEND_REQUEST { - LIST_ENTRY ListEntry; /* Entry on list */ - PIP_ADDRESS RemoteAddress; /* Pointer to remote IP address */ - USHORT RemotePort; /* Remote port number */ - PNDIS_BUFFER Buffer; /* Pointer to NDIS buffer to send */ - DWORD BufferSize; /* Size of Buffer */ + LIST_ENTRY ListEntry; + PNDIS_PACKET PacketToSend; DATAGRAM_COMPLETION_ROUTINE Complete; /* Completion routine */ PVOID Context; /* Pointer to context information */ - DATAGRAM_BUILD_ROUTINE Build; /* Datagram build routine */ + IP_PACKET Packet; + UINT BufferSize; + IP_ADDRESS RemoteAddress; + USHORT RemotePort; ULONG Flags; /* Protocol specific flags */ } DATAGRAM_SEND_REQUEST, *PDATAGRAM_SEND_REQUEST; +#if 0 #define InitializeDatagramSendRequest( \ _SendRequest, \ _RemoteAddress, \ @@ -157,6 +158,7 @@ typedef struct _DATAGRAM_SEND_REQUEST { (_SendRequest)->Build = (_Build); \ (_SendRequest)->Flags = (_Flags); \ } +#endif /* These things bug me... They hide the member names. */ /* Transport address file context structure. The FileObject->FsContext2 field holds a pointer to this structure */ @@ -254,32 +256,17 @@ typedef struct _AF_SEARCH { USHORT Protocol; /* Protocol number */ } AF_SEARCH, *PAF_SEARCH; - - /******************************************************* * Connection-oriented communication support structures * *******************************************************/ -typedef struct _TCP_SEND_REQUEST { +typedef struct _TCP_RECEIVE_REQUEST { LIST_ENTRY ListEntry; /* Entry on list */ + PNDIS_BUFFER Buffer; /* Pointer to receive buffer */ + ULONG BufferSize; /* Size of Buffer */ DATAGRAM_COMPLETION_ROUTINE Complete; /* Completion routine */ PVOID Context; /* Pointer to context information */ - PVOID ProtocolContext; /* Protocol specific context */ - ULONG Flags; /* Protocol specific flags */ - ULONG SequenceNumber; /* Sequence number (network byte order) */ - ULONG AckNumber; /* Acknowledgement number (network byte order) */ -} TCP_SEND_REQUEST, *PTCP_SEND_REQUEST; - -#define InitializeTCPSendRequest( \ - _SendRequest, \ - _Complete, \ - _Context, \ - _ProtocolContext) { \ - (_SendRequest)->Complete = (_Complete); \ - (_SendRequest)->Context = (_Context); \ - (_SendRequest)->ProtocolContext = (_ProtocolContext); \ - } - +} TCP_RECEIVE_REQUEST, *PTCP_RECEIVE_REQUEST; /* Connection states */ typedef enum { @@ -302,8 +289,10 @@ typedef enum { typedef struct _TCP_SEGMENT { LIST_ENTRY ListEntry; PIP_PACKET IPPacket; /* Pointer to IP packet */ + PVOID SegmentData; /* Pointer to segment data */ ULONG SequenceNumber; /* Sequence number of first byte in segment */ ULONG Length; /* Number of bytes in segment */ + ULONG BytesDelivered; /* Number of bytes already delivered to the client */ } TCP_SEGMENT, *PTCP_SEGMENT; @@ -316,15 +305,16 @@ typedef struct _CONNECTION_ENDPOINT { ULONG RefCount; /* Number of references to this object */ PVOID ClientContext; /* Pointer to client context information */ PADDRESS_FILE AddressFile; /* Associated address file object (NULL if none) */ + PVOID SocketContext; /* Context for lower layer */ - CONNECTION_STATE State; /* Connection state */ - +#if 0 PIP_ADDRESS LocalAddress; /* Pointer to local IP address */ USHORT LocalPort; /* Local port number (network byte order) */ PIP_ADDRESS RemoteAddress; /* Pointer to remote IP address */ USHORT RemotePort; /* Remote port number (network byte order) */ + CONNECTION_STATE State; /* Connection state */ /* Send sequence variables */ ULONG SendUnacknowledged; /* Highest sequence number that is acknowledged */ ULONG SendNext; /* Sequence number of last data block sent */ @@ -335,20 +325,23 @@ typedef struct _CONNECTION_ENDPOINT { ULONG SendISS; /* Initial send sequence number */ /* Receive sequence variables */ - ULONG ReceiveNext; /* Sequence number of last data block received */ + ULONG ReceiveNext; /* Next sequence number expected and start of receive window */ ULONG ReceiveWindow; /* Maximum allowed number of octets in a segment */ ULONG ReceiveUrgentPointer; /* Sequence number of start of urgent data */ ULONG ReceiveIRS; /* Initial receive sequence number */ + ULONG ReceiveDelivered; /* Next sequence number to be delivered to the client */ /* Statistics for computing the retransmission timeout */ ULONG TimestampSend; /* Timestamp when sending a segment */ ULONG TimestampAck; /* Timestamp when receiving acknowledgment */ +#endif /* Requests */ PTDI_REQUEST ListenRequest; /* Queued listen request */ + LIST_ENTRY ReceiveRequests; /* Queued receive requests */ /* Queues */ - LIST_ENTRY ReceivedSegments; + LIST_ENTRY ReceivedSegments;/* Segments that are received */ } CONNECTION_ENDPOINT, *PCONNECTION_ENDPOINT; diff --git a/reactos/drivers/net/tcpip/makefile b/reactos/drivers/net/tcpip/makefile index 1453a369a79..51969f528f8 100644 --- a/reactos/drivers/net/tcpip/makefile +++ b/reactos/drivers/net/tcpip/makefile @@ -1,4 +1,4 @@ -# $Id: makefile,v 1.16 2004/03/04 20:45:38 chorns Exp $ +# $Id: makefile,v 1.17 2004/06/09 18:11:37 arty Exp $ PATH_TO_TOP = ../../.. @@ -8,9 +8,16 @@ TARGET_TYPE = export_driver TARGET_NAME = tcpip -TARGET_CFLAGS = -I./include -DDBG=1 -DNDIS40 -D__USE_W32API +TARGET_CFLAGS = \ + -D__USE_W32API \ + -DNDIS40 \ + -DMEMTRACK \ + -I./include \ + -I$(PATH_TO_TOP)/drivers/lib/oskittcp/include \ + -I$(PATH_TO_TOP)/w32api/include \ + -I$(PATH_TO_TOP)/include -TARGET_DDKLIBS = ndis.a +TARGET_DDKLIBS = ndis.a $(PATH_TO_TOP)/dk/w32/lib/oskittcp.a TARGET_CLEAN = \ tcpip/*.o \ @@ -19,32 +26,35 @@ TARGET_CLEAN = \ transport/datagram/*.o \ transport/rawip/*.o \ transport/tcp/*.o \ - transport/udp/*.o + transport/udp/*.o TCPIP_OBJECTS = tcpip/main.o tcpip/address.o tcpip/checksum.o \ - tcpip/dispatch.o tcpip/fileobjs.o tcpip/info.o \ - tcpip/pool.o tcpip/routines.o + tcpip/dispatch.o tcpip/fileobjs.o \ + tcpip/pool.o tcpip/routines.o tcpip/interface.o \ + tcpip/memtrack.o tcpip/irp.o +INFO_OBJECTS = tcpip/info.o tcpip/ninfo.o tcpip/tinfo.o tcpip/iinfo.o DATALINK_OBJECTS = datalink/arp.o datalink/lan.o datalink/loopback.o NETWORK_OBJECTS = network/icmp.o network/ip.o network/neighbor.o \ network/receive.o network/route.o network/router.o \ - network/transmit.o + network/transmit.o network/prefix.o DATAGRAM_OBJECTS = transport/datagram/datagram.o RAWIP_OBJECTS = transport/rawip/rawip.o -TCP_OBJECTS = transport/tcp/tcp.o transport/tcp/tcpcore.o \ - transport/tcp/tcp_input.o transport/tcp/tcp_ipv4.o \ - transport/tcp/tcp_output.o transport/tcp/tcp_timer.o +TCP_OBJECTS = transport/tcp/tcp.o transport/tcp/event.o transport/tcp/if.o UDP_OBJECTS = transport/udp/udp.o ARCH_OBJECTS = tcpip/i386/checksum.o TARGET_OBJECTS = \ $(TCPIP_OBJECTS) \ + $(INFO_OBJECTS) \ $(DATALINK_OBJECTS) \ $(NETWORK_OBJECTS) \ $(DATAGRAM_OBJECTS) \ $(RAWIP_OBJECTS) \ $(TCP_OBJECTS) \ + $(FREEBSD_OBJECTS) \ + $(REACTOS_OBJECTS) \ $(UDP_OBJECTS) \ - $(ARCH_OBJECTS) + $(ARCH_OBJECTS) \ include $(PATH_TO_TOP)/rules.mak diff --git a/reactos/drivers/net/tcpip/network/icmp.c b/reactos/drivers/net/tcpip/network/icmp.c index c5855cea471..38bd7d918f3 100644 --- a/reactos/drivers/net/tcpip/network/icmp.c +++ b/reactos/drivers/net/tcpip/network/icmp.c @@ -7,6 +7,7 @@ * REVISIONS: * CSH 01/08-2000 Created */ +#include #include #include #include @@ -38,8 +39,6 @@ VOID SendICMPComplete( FreeNdisPacket(Packet); TI_DbgPrint(DEBUG_ICMP, ("Freeing IP packet at %X.\n", IPPacket)); - - (*IPPacket->Free)(IPPacket); } @@ -79,9 +78,8 @@ PIP_PACKET PrepareICMPPacket( Size = MaxLLHeaderSize + sizeof(IPv4_HEADER) + sizeof(ICMP_HEADER) + DataSize; - DataBuffer = ExAllocatePool(NonPagedPool, Size); + DataBuffer = exAllocatePool(NonPagedPool, Size); if (!DataBuffer) { - (*IPPacket->Free)(IPPacket); return NULL; } @@ -90,10 +88,10 @@ PIP_PACKET PrepareICMPPacket( /* Allocate NDIS packet */ NdisAllocatePacket(&NdisStatus, &NdisPacket, GlobalPacketPool); if (NdisStatus != NDIS_STATUS_SUCCESS) { - (*IPPacket->Free)(IPPacket); - ExFreePool(DataBuffer); + exFreePool(DataBuffer); return NULL; } + Track(NDIS_PACKET_TAG,NdisPacket); TI_DbgPrint(MAX_TRACE, ("NdisPacket at (0x%X).\n", NdisPacket)); @@ -101,12 +99,12 @@ PIP_PACKET PrepareICMPPacket( NdisAllocateBuffer(&NdisStatus, &NdisBuffer, GlobalBufferPool, DataBuffer, Size); if (NdisStatus != NDIS_STATUS_SUCCESS) { - (*IPPacket->Free)(IPPacket); - NdisFreePacket(NdisPacket); - ExFreePool(DataBuffer); + FreeNdisPacket(NdisPacket); + exFreePool(DataBuffer); return NULL; } - + Track(NDIS_BUFFER_TAG,NdisBuffer); + TI_DbgPrint(MAX_TRACE, ("NdisBuffer at (0x%X).\n", NdisBuffer)); /* Link NDIS buffer into packet */ @@ -202,9 +200,10 @@ VOID ICMPReceive( ((PICMP_HEADER)NewPacket->Data)->Code = 0; ((PICMP_HEADER)NewPacket->Data)->Checksum = 0; +#ifdef DBG DisplayIPPacket(IPPacket); - DisplayIPPacket(NewPacket); +#endif ICMPTransmit(NTE, NewPacket); @@ -245,11 +244,11 @@ VOID ICMPTransmit( IPv4Checksum(IPPacket->Data, IPPacket->TotalSize - IPPacket->HeaderSize, 0); /* Get a route to the destination address */ + PNEIGHBOR_CACHE_ENTRY *NCE = RouterGetRoute( &IPPacket->DstAddr, NULL ); if (RouteGetRouteToDestination(&IPPacket->DstAddr, NTE, &RCN) == IP_SUCCESS) { /* Send the packet */ if (IPSendDatagram(IPPacket, RCN) != STATUS_SUCCESS) { FreeNdisPacket(IPPacket->NdisPacket); - (*IPPacket->Free)(IPPacket); } /* We're done with the RCN */ DereferenceObject(RCN); @@ -261,7 +260,6 @@ VOID ICMPTransmit( IPPacket->DstAddr.Address.IPv4Address)); /* Discard packet */ FreeNdisPacket(IPPacket->NdisPacket); - (*IPPacket->Free)(IPPacket); } } diff --git a/reactos/drivers/net/tcpip/network/ip.c b/reactos/drivers/net/tcpip/network/ip.c index 5e7390689ea..f42185cf6eb 100644 --- a/reactos/drivers/net/tcpip/network/ip.c +++ b/reactos/drivers/net/tcpip/network/ip.c @@ -7,6 +7,7 @@ * REVISIONS: * CSH 01/08-2000 Created */ +#include #include #include #include @@ -14,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -25,8 +27,6 @@ LIST_ENTRY InterfaceListHead; KSPIN_LOCK InterfaceListLock; LIST_ENTRY NetTableListHead; KSPIN_LOCK NetTableListLock; -LIST_ENTRY PrefixListHead; -KSPIN_LOCK PrefixListLock; UINT MaxLLHeaderSize; /* Largest maximum header size */ UINT MinLLFrameSize; /* Largest minimum frame size */ BOOLEAN IPInitialized = FALSE; @@ -47,6 +47,17 @@ VOID FreePacket( } +VOID DontFreePacket( + PVOID Object) +/* + * FUNCTION: Do nothing for when the IPPacket struct is part of another + * ARGUMENTS: + * Object = Pointer to an IP packet structure + */ +{ +} + + VOID FreeADE( PVOID Object) /* @@ -55,7 +66,7 @@ VOID FreeADE( * Object = Pointer to an address entry structure */ { - ExFreePool(Object); + exFreePool(Object); } @@ -67,7 +78,7 @@ VOID FreeNTE( * Object = Pointer to an net table entry structure */ { - ExFreePool(Object); + exFreePool(Object); } @@ -79,7 +90,7 @@ VOID FreeIF( * Object = Pointer to an interface structure */ { - ExFreePool(Object); + exFreePool(Object); } @@ -114,7 +125,7 @@ PADDRESS_ENTRY CreateADE( A2S(Address), A2S(NTE->Address))); /* Allocate space for an ADE and set it up */ - ADE = ExAllocatePool(NonPagedPool, sizeof(ADDRESS_ENTRY)); + ADE = exAllocatePool(NonPagedPool, sizeof(ADDRESS_ENTRY)); if (!ADE) { TI_DbgPrint(MIN_TRACE, ("Insufficient resources.\n")); return NULL; @@ -221,119 +232,35 @@ PIP_PACKET IPCreatePacket( INIT_TAG(IPPacket, TAG('I','P','K','T')); - IPPacket->Free = FreePacket; - IPPacket->RefCount = 1; - IPPacket->Type = Type; + IPPacket->Free = FreePacket; + IPPacket->RefCount = 1; + IPPacket->Type = Type; + IPPacket->HeaderSize = 20; return IPPacket; } - -PPREFIX_LIST_ENTRY CreatePLE( - PIP_INTERFACE IF, - PIP_ADDRESS Prefix, - UINT Length) +PIP_PACKET IPInitializePacket( + PIP_PACKET IPPacket, + ULONG Type) /* - * FUNCTION: Creates a prefix list entry and binds it to an interface + * FUNCTION: Creates an IP packet object * ARGUMENTS: - * IF = Pointer to interface - * Prefix = Pointer to prefix - * Length = Length of prefix + * Type = Type of IP packet * RETURNS: - * Pointer to PLE, NULL if there was not enough free resources - * NOTES: - * The prefix list entry retains a reference to the interface and - * the provided address. The caller is responsible for providing - * these references + * Pointer to the created IP packet. NULL if there was not enough free resources. */ { - PPREFIX_LIST_ENTRY PLE; - - TI_DbgPrint(DEBUG_IP, ("Called. IF (0x%X) Prefix (0x%X) Length (%d).\n", IF, Prefix, Length)); - - TI_DbgPrint(DEBUG_IP, ("Prefix (%s).\n", A2S(Prefix))); - - /* Allocate space for an PLE and set it up */ - PLE = ExAllocatePool(NonPagedPool, sizeof(PREFIX_LIST_ENTRY)); - if (!PLE) { - TI_DbgPrint(MIN_TRACE, ("Insufficient resources.\n")); - return NULL; - } - - INIT_TAG(PLE, TAG('P','L','E',' ')); - PLE->RefCount = 1; - PLE->Interface = IF; - PLE->Prefix = Prefix; - PLE->PrefixLength = Length; - - /* Add PLE to the global prefix list */ - ExInterlockedInsertTailList(&PrefixListHead, &PLE->ListEntry, &PrefixListLock); - - return PLE; -} - - -VOID DestroyPLE( - PPREFIX_LIST_ENTRY PLE) -/* - * FUNCTION: Destroys an prefix list entry - * ARGUMENTS: - * PLE = Pointer to prefix list entry - * NOTES: - * The prefix list lock must be held when called - */ -{ - TI_DbgPrint(DEBUG_IP, ("Called. PLE (0x%X).\n", PLE)); - - TI_DbgPrint(DEBUG_IP, ("PLE (%s).\n", PLE->Prefix)); - - /* Unlink the prefix list entry from the list */ - RemoveEntryList(&PLE->ListEntry); - - /* Dereference the address */ - DereferenceObject(PLE->Prefix); - - /* Dereference the interface */ - DereferenceObject(PLE->Interface); - -#ifdef DBG - PLE->RefCount--; - - if (PLE->RefCount != 0) { - TI_DbgPrint(MIN_TRACE, ("Prefix list entry at (0x%X) has (%d) references (should be 0).\n", PLE, PLE->RefCount)); - } -#endif - - /* And free the PLE */ - ExFreePool(PLE); -} - - -VOID DestroyPLEs( - VOID) -/* - * FUNCTION: Destroys all prefix list entries - */ -{ - KIRQL OldIrql; - PLIST_ENTRY CurrentEntry; - PLIST_ENTRY NextEntry; - PPREFIX_LIST_ENTRY Current; - - TI_DbgPrint(DEBUG_IP, ("Called.\n")); - - KeAcquireSpinLock(&PrefixListLock, &OldIrql); - - /* Search the list and remove every PLE we find */ - CurrentEntry = PrefixListHead.Flink; - while (CurrentEntry != &PrefixListHead) { - NextEntry = CurrentEntry->Flink; - Current = CONTAINING_RECORD(CurrentEntry, PREFIX_LIST_ENTRY, ListEntry); - /* Destroy the PLE */ - DestroyPLE(Current); - CurrentEntry = NextEntry; - } - KeReleaseSpinLock(&PrefixListLock, OldIrql); + /* FIXME: Is this needed? */ + RtlZeroMemory(IPPacket, sizeof(IP_PACKET)); + + INIT_TAG(IPPacket, TAG('I','P','K','T')); + + IPPacket->Free = DontFreePacket; + IPPacket->RefCount = 1; + IPPacket->Type = Type; + + return IPPacket; } @@ -364,7 +291,7 @@ PNET_TABLE_ENTRY IPCreateNTE( TI_DbgPrint(DEBUG_IP, ("Address (%s).\n", A2S(Address))); /* Allocate room for an NTE */ - NTE = ExAllocatePool(NonPagedPool, sizeof(NET_TABLE_ENTRY)); + NTE = exAllocatePool(NonPagedPool, sizeof(NET_TABLE_ENTRY)); if (!NTE) { TI_DbgPrint(MIN_TRACE, ("Insufficient resources.\n")); return NULL; @@ -392,7 +319,7 @@ PNET_TABLE_ENTRY IPCreateNTE( ADE = CreateADE(IF, NTE->Address, ADE_UNICAST, NTE); if (!ADE) { TI_DbgPrint(MIN_TRACE, ("Insufficient resources.\n")); - ExFreePool(NTE); + exFreePool(NTE); return NULL; } @@ -400,7 +327,7 @@ PNET_TABLE_ENTRY IPCreateNTE( NTE->PLE = CreatePLE(IF, NTE->Address, PrefixLength); if (!NTE->PLE) { DestroyADE(IF, ADE); - ExFreePool(NTE); + exFreePool(NTE); return NULL; } @@ -462,7 +389,7 @@ VOID DestroyNTE( } #endif /* And free the NTE */ - ExFreePool(NTE); + exFreePool(NTE); } @@ -792,7 +719,7 @@ PIP_INTERFACE IPCreateInterface( } #endif - IF = ExAllocatePool(NonPagedPool, sizeof(IP_INTERFACE)); + IF = exAllocatePool(NonPagedPool, sizeof(IP_INTERFACE)); if (!IF) { TI_DbgPrint(MIN_TRACE, ("Insufficient resources.\n")); return NULL; @@ -821,6 +748,8 @@ PIP_INTERFACE IPCreateInterface( KeInitializeSpinLock(&IF->Lock); + InsertTDIInterfaceEntity( IF ); + return IF; } @@ -838,6 +767,8 @@ VOID IPDestroyInterface( TI_DbgPrint(DEBUG_IP, ("Called. IF (0x%X).\n", IF)); + RemoveTDIInterfaceEntity( IF ); + KeAcquireSpinLock(&NetTableListLock, &OldIrql1); KeAcquireSpinLock(&IF->Lock, &OldIrql2); DestroyADEs(IF); @@ -852,7 +783,8 @@ VOID IPDestroyInterface( TI_DbgPrint(MIN_TRACE, ("Interface at (0x%X) has (%d) references (should be 0).\n", IF, IF->RefCount)); } #endif - ExFreePool(IF); + + exFreePool(IF); } @@ -891,30 +823,29 @@ BOOLEAN IPRegisterInterface( KeReleaseSpinLock(&IF->Lock, OldIrql); return FALSE; } -#if 1 + /* Reference objects for forward information base */ ReferenceObject(Current->Address); ReferenceObject(Current->PLE->Prefix); - ReferenceObject(Current); + ReferenceObject(NCE); + /* NCE is already referenced */ - if (!RouterAddRoute(Current->Address, Current->PLE->Prefix, Current, NCE, 1)) { + if (!RouterAddRoute(Current->Address, Current->PLE->Prefix, NCE, 1)) { TI_DbgPrint(MIN_TRACE, ("Could not add route due to insufficient resources.\n")); DereferenceObject(Current->Address); DereferenceObject(Current->PLE->Prefix); - DereferenceObject(Current); DereferenceObject(NCE); } -#else + RCN = RouteAddRouteToDestination(Current->Address, Current, IF, NCE); if (!RCN) { TI_DbgPrint(MIN_TRACE, ("Could not create RCN.\n")); DereferenceObject(Current->Address); KeReleaseSpinLock(&IF->Lock, OldIrql); - return FALSE; } /* Don't need this any more since the route cache references the NCE */ DereferenceObject(NCE); -#endif + CurrentEntry = CurrentEntry->Flink; } @@ -1096,9 +1027,7 @@ NTSTATUS IPStartup( InitializeListHead(&ReassemblyListHead); KeInitializeSpinLock(&ReassemblyListLock); - /* Initialize the prefix list and protecting lock */ - InitializeListHead(&PrefixListHead); - KeInitializeSpinLock(&PrefixListLock); + InitPLE(); /* Initialize our periodic timer and its associated DPC object. When the timer expires, the IPTimeout deferred procedure call (DPC) is queued */ diff --git a/reactos/drivers/net/tcpip/network/neighbor.c b/reactos/drivers/net/tcpip/network/neighbor.c index cf76f3749da..e9e58fded49 100644 --- a/reactos/drivers/net/tcpip/network/neighbor.c +++ b/reactos/drivers/net/tcpip/network/neighbor.c @@ -7,10 +7,10 @@ * REVISIONS: * CSH 01/08-2000 Created */ +#include #include #include #include -#include #include #include #include @@ -186,7 +186,7 @@ VOID NBShutdown( NdisPacket = NextNdisPacket; } -#if DBG +#ifdef DBG if (CurNCE->RefCount != 1) { TI_DbgPrint(DEBUG_REFCOUNT, ("NCE at (0x%X) has (%d) references (should be 1).\n", CurNCE, CurNCE->RefCount)); @@ -293,7 +293,7 @@ PNEIGHBOR_CACHE_ENTRY NBAddNeighbor( NCE->Interface = Interface; NCE->Address = Address; NCE->LinkAddressLength = LinkAddressLength; - NCE->LinkAddress = (PVOID)((ULONG_PTR)NCE + sizeof(NEIGHBOR_CACHE_ENTRY)); + NCE->LinkAddress = (PVOID)&NCE[1]; if (LinkAddress != NULL) { RtlCopyMemory(NCE->LinkAddress, LinkAddress, LinkAddressLength); @@ -524,7 +524,7 @@ VOID NBRemoveNeighbor( /* Remove reference to the address */ DereferenceObject(CurNCE->Address); -#if DBG +#ifdef DBG CurNCE->RefCount--; if (CurNCE->RefCount != 0) diff --git a/reactos/drivers/net/tcpip/network/prefix.c b/reactos/drivers/net/tcpip/network/prefix.c new file mode 100644 index 00000000000..8854258b928 --- /dev/null +++ b/reactos/drivers/net/tcpip/network/prefix.c @@ -0,0 +1,139 @@ +/* + * COPYRIGHT: See COPYING in the top level directory + * PROJECT: ReactOS TCP/IP protocol driver + * FILE: network/ip.c + * PURPOSE: Internet Protocol module + * PROGRAMMERS: Casper S. Hornstrup (chorns@users.sourceforge.net) + * Art Yerkes (arty@users.sourceforge.net) + * REVISIONS: + * CSH 01/08-2000 Created + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +LIST_ENTRY PrefixListHead; +KSPIN_LOCK PrefixListLock; + +/* --------- The Prefix List ---------- */ + +VOID InitPLE() { + /* Initialize the prefix list and protecting lock */ + InitializeListHead(&PrefixListHead); + KeInitializeSpinLock(&PrefixListLock); +} + + +PPREFIX_LIST_ENTRY CreatePLE(PIP_INTERFACE IF, PIP_ADDRESS Prefix, UINT Length) +/* + * FUNCTION: Creates a prefix list entry and binds it to an interface + * ARGUMENTS: + * IF = Pointer to interface + * Prefix = Pointer to prefix + * Length = Length of prefix + * RETURNS: + * Pointer to PLE, NULL if there was not enough free resources + * NOTES: + * The prefix list entry retains a reference to the interface and + * the provided address. The caller is responsible for providing + * these references + */ +{ + PPREFIX_LIST_ENTRY PLE; + + TI_DbgPrint(DEBUG_IP, ("Called. IF (0x%X) Prefix (0x%X) Length (%d).\n", IF, Prefix, Length)); + + TI_DbgPrint(DEBUG_IP, ("Prefix (%s).\n", A2S(Prefix))); + + /* Allocate space for an PLE and set it up */ + PLE = ExAllocatePool(NonPagedPool, sizeof(PREFIX_LIST_ENTRY)); + if (!PLE) { + TI_DbgPrint(MIN_TRACE, ("Insufficient resources.\n")); + return NULL; + } + + INIT_TAG(PLE, TAG('P','L','E',' ')); + PLE->RefCount = 1; + PLE->Interface = IF; + PLE->Prefix = Prefix; + PLE->PrefixLength = Length; + + /* Add PLE to the global prefix list */ + ExInterlockedInsertTailList(&PrefixListHead, &PLE->ListEntry, &PrefixListLock); + + return PLE; +} + + +VOID DestroyPLE( + PPREFIX_LIST_ENTRY PLE) +/* + * FUNCTION: Destroys an prefix list entry + * ARGUMENTS: + * PLE = Pointer to prefix list entry + * NOTES: + * The prefix list lock must be held when called + */ +{ + TI_DbgPrint(DEBUG_IP, ("Called. PLE (0x%X).\n", PLE)); + + TI_DbgPrint(DEBUG_IP, ("PLE (%s).\n", PLE->Prefix)); + + /* Unlink the prefix list entry from the list */ + RemoveEntryList(&PLE->ListEntry); + + /* Dereference the address */ + DereferenceObject(PLE->Prefix); + + /* Dereference the interface */ + DereferenceObject(PLE->Interface); + +#ifdef DBG + PLE->RefCount--; + + if (PLE->RefCount != 0) { + TI_DbgPrint(MIN_TRACE, ("Prefix list entry at (0x%X) has (%d) references (should be 0).\n", PLE, PLE->RefCount)); + } +#endif + + /* And free the PLE */ + ExFreePool(PLE); +} + + +VOID DestroyPLEs( + VOID) +/* + * FUNCTION: Destroys all prefix list entries + */ +{ + KIRQL OldIrql; + PLIST_ENTRY CurrentEntry; + PLIST_ENTRY NextEntry; + PPREFIX_LIST_ENTRY Current; + + TI_DbgPrint(DEBUG_IP, ("Called.\n")); + + KeAcquireSpinLock(&PrefixListLock, &OldIrql); + + /* Search the list and remove every PLE we find */ + CurrentEntry = PrefixListHead.Flink; + while (CurrentEntry != &PrefixListHead) { + NextEntry = CurrentEntry->Flink; + Current = CONTAINING_RECORD(CurrentEntry, PREFIX_LIST_ENTRY, ListEntry); + /* Destroy the PLE */ + DestroyPLE(Current); + CurrentEntry = NextEntry; + } + KeReleaseSpinLock(&PrefixListLock, OldIrql); +} + diff --git a/reactos/drivers/net/tcpip/network/receive.c b/reactos/drivers/net/tcpip/network/receive.c index 8e57493e7eb..960785e2f10 100644 --- a/reactos/drivers/net/tcpip/network/receive.c +++ b/reactos/drivers/net/tcpip/network/receive.c @@ -9,6 +9,7 @@ * REVISIONS: * CSH 01/08-2000 Created */ +#include #include #include #include @@ -100,7 +101,7 @@ VOID FreeIPDR( TI_DbgPrint(DEBUG_IP, ("Freeing fragment data at (0x%X).\n", CurrentF->Data)); /* Free the fragment data buffer */ - ExFreePool(CurrentF->Data); + exFreePool(CurrentF->Data); TI_DbgPrint(DEBUG_IP, ("Freeing fragment at (0x%X).\n", CurrentF)); @@ -112,7 +113,7 @@ VOID FreeIPDR( /* Free resources for the header, if it exists */ if (IPDR->IPv4Header) { TI_DbgPrint(DEBUG_IP, ("Freeing IPv4 header data at (0x%X).\n", IPDR->IPv4Header)); - ExFreePool(IPDR->IPv4Header); + exFreePool(IPDR->IPv4Header); } TI_DbgPrint(DEBUG_IP, ("Freeing IPDR data at (0x%X).\n", IPDR)); @@ -201,6 +202,11 @@ PIP_PACKET ReassembleDatagram( PVOID Data; TI_DbgPrint(DEBUG_IP, ("Reassembling datagram from IPDR at (0x%X).\n", IPDR)); + TI_DbgPrint(DEBUG_IP, ("IPDR->HeaderSize = %d\n", IPDR->HeaderSize)); + TI_DbgPrint(DEBUG_IP, ("IPDR->DataSize = %d\n", IPDR->DataSize)); + + TI_DbgPrint(DEBUG_IP, ("Fragment header:\n")); + OskitDumpBuffer(IPDR->IPv4Header, IPDR->HeaderSize); /* FIXME: Assume IPv4 */ IPPacket = IPCreatePacket(IP_ADDRESS_V4); @@ -216,7 +222,7 @@ PIP_PACKET ReassembleDatagram( RtlCopyMemory(&IPPacket->DstAddr, &IPDR->DstAddr, sizeof(IP_ADDRESS)); /* Allocate space for full IP datagram */ - IPPacket->Header = ExAllocatePool(NonPagedPool, IPPacket->TotalSize); + IPPacket->Header = exAllocatePool(NonPagedPool, IPPacket->TotalSize); if (!IPPacket->Header) { TI_DbgPrint(MIN_TRACE, ("Insufficient resources.\n")); (*IPPacket->Free)(IPPacket); @@ -226,7 +232,7 @@ PIP_PACKET ReassembleDatagram( /* Copy the header into the buffer */ RtlCopyMemory(IPPacket->Header, IPDR->IPv4Header, IPDR->HeaderSize); - Data = (PVOID)((ULONG_PTR)IPPacket->Header + IPDR->HeaderSize); + Data = IPPacket->Header + IPDR->HeaderSize; IPPacket->Data = Data; /* Copy data from all fragments into buffer */ @@ -237,11 +243,10 @@ PIP_PACKET ReassembleDatagram( TI_DbgPrint(DEBUG_IP, ("Copying (%d) bytes of fragment data from (0x%X) to offset (%d).\n", Current->Size, Data, Current->Offset)); /* Copy fragment data to the destination buffer at the correct offset */ - RtlCopyMemory( - (PVOID)((ULONG_PTR)Data + Current->Offset), - Current->Data, - Current->Size); - + RtlCopyMemory((PVOID)((ULONG_PTR)Data + Current->Offset), + Current->Data, + Current->Size); + OskitDumpBuffer( Data, Current->Offset + Current->Size ); CurrentEntry = CurrentEntry->Flink; } @@ -269,7 +274,7 @@ __inline VOID Cleanup( RemoveIPDR(IPDR); FreeIPDR(IPDR); if (Buffer) - ExFreePool(Buffer); + exFreePool(Buffer); } @@ -399,7 +404,7 @@ VOID ProcessFragment( /* If this is the first fragment, save the IP header */ if (FragFirst == 0) { - IPDR->IPv4Header = ExAllocatePool(NonPagedPool, IPPacket->HeaderSize); + IPDR->IPv4Header = exAllocatePool(NonPagedPool, IPPacket->HeaderSize); if (!IPDR->IPv4Header) { /* We don't have the resources to process this packet, discard it */ Cleanup(&IPDR->Lock, OldIrql, IPDR, NULL); @@ -426,7 +431,7 @@ VOID ProcessFragment( TI_DbgPrint(DEBUG_IP, ("Fragment descriptor allocated at (0x%X).\n", Fragment)); Fragment->Size = IPPacket->TotalSize - IPPacket->HeaderSize; - Fragment->Data = ExAllocatePool(NonPagedPool, Fragment->Size); + Fragment->Data = exAllocatePool(NonPagedPool, Fragment->Size); if (!Fragment->Data) { /* We don't have the resources to process this packet, discard it */ Cleanup(&IPDR->Lock, OldIrql, IPDR, Fragment); @@ -437,13 +442,12 @@ VOID ProcessFragment( Fragment->Data, Fragment->Size)); /* Copy datagram data into fragment buffer */ - CopyPacketToBuffer( - Fragment->Data, - IPPacket->NdisPacket, - IPPacket->Position, - Fragment->Size); - Fragment->Offset = FragFirst; - + CopyPacketToBuffer(Fragment->Data, + IPPacket->NdisPacket, + IPPacket->Position + MaxLLHeaderSize, + Fragment->Size); + Fragment->Offset = FragFirst; + /* If this is the last fragment, compute and save the datagram data size */ if (!MoreFragments) IPDR->DataSize = FragFirst + Fragment->Size; @@ -463,7 +467,7 @@ VOID ProcessFragment( Datagram = ReassembleDatagram(IPDR); - KeReleaseSpinLock(&IPDR->Lock, OldIrql); + KeReleaseSpinLock(&IPDR->Lock, OldIrql); RemoveIPDR(IPDR); FreeIPDR(IPDR); @@ -478,7 +482,7 @@ VOID ProcessFragment( IPDispatchProtocol(NTE, Datagram); /* We're done with this datagram */ - ExFreePool(Datagram->Header); + exFreePool(Datagram->Header); TI_DbgPrint(MAX_TRACE, ("Freeing datagram at (0x%X).\n", Datagram)); (*Datagram->Free)(Datagram); } else @@ -525,7 +529,6 @@ VOID IPDatagramReassemblyTimeout( { } - VOID IPv4Receive( PVOID Context, PIP_PACKET IPPacket) @@ -536,81 +539,77 @@ VOID IPv4Receive( * IPPacket = Pointer to IP packet */ { - PNEIGHBOR_CACHE_ENTRY NCE; - PNET_TABLE_ENTRY NTE; - UINT AddressType; + PNEIGHBOR_CACHE_ENTRY NCE; + PNET_TABLE_ENTRY NTE; + UINT AddressType; + + TI_DbgPrint(DEBUG_IP, ("Received IPv4 datagram.\n")); + + IPPacket->HeaderSize = (((PIPv4_HEADER)IPPacket->Header)->VerIHL & 0x0F) << 2; + TI_DbgPrint(DEBUG_IP, ("IPPacket->HeaderSize = %d\n", IPPacket->HeaderSize)); - TI_DbgPrint(DEBUG_IP, ("Received IPv4 datagram.\n")); - - IPPacket->HeaderSize = (((PIPv4_HEADER)IPPacket->Header)->VerIHL & 0x0F) << 2; - - if (IPPacket->HeaderSize > IPv4_MAX_HEADER_SIZE) { - TI_DbgPrint(MIN_TRACE, ("Datagram received with incorrect header size (%d).\n", - IPPacket->HeaderSize)); - /* Discard packet */ - return; - } - - /* Checksum IPv4 header */ - if (!IPv4CorrectChecksum(IPPacket->Header, IPPacket->HeaderSize)) { - TI_DbgPrint(MIN_TRACE, ("Datagram received with bad checksum. Checksum field (0x%X)\n", - WN2H(((PIPv4_HEADER)IPPacket->Header)->Checksum))); - /* Discard packet */ - return; - } - -// TI_DbgPrint(DEBUG_IP, ("TotalSize (datalink) is (%d).\n", IPPacket->TotalSize)); - - IPPacket->TotalSize = WN2H(((PIPv4_HEADER)IPPacket->Header)->TotalLength); - -// TI_DbgPrint(DEBUG_IP, ("TotalSize (IPv4) is (%d).\n", IPPacket->TotalSize)); - - AddrInitIPv4(&IPPacket->SrcAddr, ((PIPv4_HEADER)IPPacket->Header)->SrcAddr); - AddrInitIPv4(&IPPacket->DstAddr, ((PIPv4_HEADER)IPPacket->Header)->DstAddr); - - IPPacket->Position = IPPacket->HeaderSize; - IPPacket->Data = (PVOID)((ULONG_PTR)IPPacket->Header + IPPacket->HeaderSize); - - /* FIXME: Possibly forward packets with multicast addresses */ - - /* FIXME: Should we allow packets to be received on the wrong interface? */ -#if 0 - NTE = IPLocateNTE(&IPPacket->DstAddr, &AddressType); -#else - NTE = IPLocateNTEOnInterface((PIP_INTERFACE)Context, &IPPacket->DstAddr, &AddressType); -#endif - if (NTE) { - /* This packet is destined for us */ - ProcessFragment((PIP_INTERFACE)Context, IPPacket, NTE); - - /* Done with this NTE */ - DereferenceObject(NTE); - } else { - /* This packet is not destined for us. If we are a router, - try to find a route and forward the packet */ - - /* FIXME: Check if acting as a router */ -#if 1 - //NCE = RouteFindRouter(&IPPacket->DstAddr, NULL); - NCE = NULL; - if (NCE) { - /* FIXME: Possibly fragment datagram */ - /* Forward the packet */ - IPSendFragment(IPPacket, NCE); - } else { - TI_DbgPrint(MIN_TRACE, ("No route to destination (0x%X).\n", - IPPacket->DstAddr.Address.IPv4Address)); - - /* FIXME: Send ICMP error code */ + if (IPPacket->HeaderSize > IPv4_MAX_HEADER_SIZE) { + TI_DbgPrint + (MIN_TRACE, + ("Datagram received with incorrect header size (%d).\n", + IPPacket->HeaderSize)); + /* Discard packet */ + return; + } + + /* Checksum IPv4 header */ + if (!IPv4CorrectChecksum(IPPacket->Header, IPPacket->HeaderSize)) { + TI_DbgPrint + (MIN_TRACE, + ("Datagram received with bad checksum. Checksum field (0x%X)\n", + WN2H(((PIPv4_HEADER)IPPacket->Header)->Checksum))); + /* Discard packet */ + return; + } + + IPPacket->TotalSize = WN2H(((PIPv4_HEADER)IPPacket->Header)->TotalLength); + + AddrInitIPv4(&IPPacket->SrcAddr, ((PIPv4_HEADER)IPPacket->Header)->SrcAddr); + AddrInitIPv4(&IPPacket->DstAddr, ((PIPv4_HEADER)IPPacket->Header)->DstAddr); + + IPPacket->Position = IPPacket->HeaderSize; + IPPacket->Data = (PVOID)((ULONG_PTR)IPPacket->Header + IPPacket->HeaderSize) + 14; /* XXX 14 */ + + OskitDumpBuffer(IPPacket->Data - IPPacket->HeaderSize, IPPacket->TotalSize); + + /* FIXME: Possibly forward packets with multicast addresses */ + + /* FIXME: Should we allow packets to be received on the wrong interface? */ + NTE = IPLocateNTEOnInterface((PIP_INTERFACE)Context, &IPPacket->DstAddr, &AddressType); + + if (NTE) { + /* This packet is destined for us */ + ProcessFragment((PIP_INTERFACE)Context, IPPacket, NTE); + + /* Done with this NTE */ + DereferenceObject(NTE); + } else { + /* This packet is not destined for us. If we are a router, + try to find a route and forward the packet */ + + /* FIXME: Check if acting as a router */ + NCE = NULL; + if (NCE) { + /* FIXME: Possibly fragment datagram */ + /* Forward the packet */ + IPSendFragment(IPPacket, NCE); + } else { + TI_DbgPrint(MIN_TRACE, ("No route to destination (0x%X).\n", + IPPacket->DstAddr.Address.IPv4Address)); + + /* FIXME: Send ICMP error code */ + } } -#endif - } } -VOID IPReceive( - PVOID Context, - PIP_PACKET IPPacket) +VOID IPReceive( PVOID Context, + PIP_PACKET IPPacket ) /* * FUNCTION: Receives an IP datagram (or fragment) * ARGUMENTS: diff --git a/reactos/drivers/net/tcpip/network/route.c b/reactos/drivers/net/tcpip/network/route.c index 12618287c3b..a086f3bdf11 100644 --- a/reactos/drivers/net/tcpip/network/route.c +++ b/reactos/drivers/net/tcpip/network/route.c @@ -6,9 +6,15 @@ * PROGRAMMERS: Casper S. Hornstrup (chorns@users.sourceforge.net) * NOTES: The route cache is implemented as a binary search * tree to obtain fast searches + * + * This data is not authoritative. It is a searchable cache that allows + * quick access to route information to selected hosts. This information + * should always defer to the FIB. + * * REVISIONS: * CSH 01/08-2000 Created */ +#include #include #include #include @@ -23,7 +29,7 @@ KSPIN_LOCK RouteCacheLock; NPAGED_LOOKASIDE_LIST IPRCNList; -#if DBG +#ifdef DBG VOID PrintTree( PROUTE_CACHE_NODE Node) /* @@ -50,6 +56,17 @@ VOID PrintTree( } #endif +UINT CountRouteNodes( PROUTE_CACHE_NODE Node ) { + if( !Node ) Node = RouteCache; + if( IsInternalRCN(Node) ) + return + /* Traverse left subtree */ + CountRouteNodes(Node->Left) + + /* Traverse right subtree */ + CountRouteNodes(Node->Right) + 1; + else + return 0; +} VOID FreeRCN( PVOID Object) @@ -168,6 +185,8 @@ PROUTE_CACHE_NODE ExpandExternalRCN(VOID) { PROUTE_CACHE_NODE RCN; + MTMARK(); + TI_DbgPrint(DEBUG_RCACHE, ("Called.\n")); RCN = ExAllocateFromNPagedLookasideList(&IPRCNList); @@ -176,6 +195,8 @@ PROUTE_CACHE_NODE ExpandExternalRCN(VOID) return NULL; } + MTMARK(); + RCN->Free = FreeRCN; if (ExternalRCN->Left) @@ -186,6 +207,8 @@ PROUTE_CACHE_NODE ExpandExternalRCN(VOID) RCN->Left = ExternalRCN; RCN->Right = ExternalRCN; + MTMARK(); + return RCN; } @@ -364,7 +387,7 @@ VOID RemoveSubtree( DereferenceObject(Node->NTE); DereferenceObject(Node->NCE); -#if DBG +#ifdef DBG if (Node->RefCount != 1) TI_DbgPrint(MIN_TRACE, ("RCN at (0x%X) has (%d) references (should be 1).\n", Node, Node->RefCount)); #endif diff --git a/reactos/drivers/net/tcpip/network/router.c b/reactos/drivers/net/tcpip/network/router.c index 5ea1d288d26..da49b8fad90 100644 --- a/reactos/drivers/net/tcpip/network/router.c +++ b/reactos/drivers/net/tcpip/network/router.c @@ -4,12 +4,18 @@ * FILE: network/router.c * PURPOSE: IP routing subsystem * PROGRAMMERS: Casper S. Hornstrup (chorns@users.sourceforge.net) + * NOTES: + * This file holds authoritative routing information. + * Information queries on the route table should be handled here. + * This information should always override the route cache info. * REVISIONS: * CSH 01/08-2000 Created */ +#include #include #include #include +#include #include @@ -48,7 +54,6 @@ VOID DestroyFIBE( DereferenceObject(FIBE->NetworkAddress); DereferenceObject(FIBE->Netmask); DereferenceObject(FIBE->Router); - DereferenceObject(FIBE->NTE); #ifdef DBG FIBE->RefCount--; @@ -79,7 +84,7 @@ VOID DestroyFIBEs( CurrentEntry = FIBListHead.Flink; while (CurrentEntry != &FIBListHead) { NextEntry = CurrentEntry->Flink; - Current = CONTAINING_RECORD(CurrentEntry, FIB_ENTRY, ListEntry); + Current = CONTAINING_RECORD(CurrentEntry, FIB_ENTRY, ListEntry); /* Destroy the FIB entry */ DestroyFIBE(Current); CurrentEntry = NextEntry; @@ -87,6 +92,43 @@ VOID DestroyFIBEs( } +UINT CountFIBs() { + UINT FibCount = 0; + PLIST_ENTRY CurrentEntry; + PLIST_ENTRY NextEntry; + + /* Search the list and remove every FIB entry we find */ + CurrentEntry = FIBListHead.Flink; + while (CurrentEntry != &FIBListHead) { + NextEntry = CurrentEntry->Flink; + CurrentEntry = NextEntry; + FibCount++; + } + + return FibCount; +} + + +UINT CopyFIBs( PFIB_ENTRY Target ) { + UINT FibCount = 0; + PLIST_ENTRY CurrentEntry; + PLIST_ENTRY NextEntry; + PFIB_ENTRY Current; + + /* Search the list and remove every FIB entry we find */ + CurrentEntry = FIBListHead.Flink; + while (CurrentEntry != &FIBListHead) { + NextEntry = CurrentEntry->Flink; + Current = CONTAINING_RECORD(CurrentEntry, FIB_ENTRY, ListEntry); + Target[FibCount] = *Current; + CurrentEntry = NextEntry; + FibCount++; + } + + return FibCount; +} + + UINT CommonPrefixLength( PIP_ADDRESS Address1, PIP_ADDRESS Address2) @@ -108,33 +150,25 @@ UINT CommonPrefixLength( TI_DbgPrint(DEBUG_ROUTER, ("Called. Address1 (0x%X) Address2 (0x%X).\n", Address1, Address2)); - TI_DbgPrint(DEBUG_ROUTER, ("Address1 (%s) Address2 (%s).\n", - A2S(Address1), A2S(Address2))); + /*TI_DbgPrint(DEBUG_ROUTER, ("Target (%s) \n", A2S(Address1)));*/ + /*TI_DbgPrint(DEBUG_ROUTER, ("Adapter (%s).\n", A2S(Address2)));*/ if (Address1->Type == IP_ADDRESS_V4) Size = sizeof(IPv4_RAW_ADDRESS); else Size = sizeof(IPv6_RAW_ADDRESS); - Addr1 = (PUCHAR)&Address1->Address; - Addr2 = (PUCHAR)&Address2->Address; + Addr1 = (PUCHAR)&Address1->Address.IPv4Address; + Addr2 = (PUCHAR)&Address2->Address.IPv4Address; /* Find first non-matching byte */ - for (i = 0; ; i++) { - if (i == Size) - return 8 * i; /* The two addresses are equal */ - - if (Addr1[i] != Addr2[i]) - break; - } + for (i = 0; i < Size && Addr1[i] == Addr2[i]; i++); + if( i == Size ) return 8 * i; /* Find first non-matching bit */ Bitmask = 0x80; - for (j = 0; ; j++) { - if ((Addr1[i] & Bitmask) != (Addr2[i] & Bitmask)) - break; + for (j = 0; (Addr1[i] & Bitmask) != (Addr2[i] & Bitmask); j++) Bitmask >>= 1; - } return 8 * i + j; } @@ -161,8 +195,10 @@ BOOLEAN HasPrefix( TI_DbgPrint(DEBUG_ROUTER, ("Called. Address (0x%X) Prefix (0x%X) Length (%d).\n", Address, Prefix, Length)); +#if 0 TI_DbgPrint(DEBUG_ROUTER, ("Address (%s) Prefix (%s).\n", A2S(Address), A2S(Prefix))); +#endif /* Check that initial integral bytes match */ while (Length > 8) { @@ -207,7 +243,8 @@ PNET_TABLE_ENTRY RouterFindBestNTE( CurrentEntry = Interface->NTEListHead.Flink; while (CurrentEntry != &Interface->NTEListHead) { - Current = CONTAINING_RECORD(CurrentEntry, NET_TABLE_ENTRY, IFListEntry); + Current = CONTAINING_RECORD(CurrentEntry, NET_TABLE_ENTRY, IFListEntry); + TI_DbgPrint(DEBUG_ROUTER, ("Looking at NTE %s\n", A2S(Current->Address))); Length = CommonPrefixLength(Destination, Current->Address); if (BestNTE) { @@ -271,7 +308,6 @@ PIP_INTERFACE RouterFindOnLinkInterface( PFIB_ENTRY RouterAddRoute( PIP_ADDRESS NetworkAddress, PIP_ADDRESS Netmask, - PNET_TABLE_ENTRY NTE, PNEIGHBOR_CACHE_ENTRY Router, UINT Metric) /* @@ -279,24 +315,23 @@ PFIB_ENTRY RouterAddRoute( * ARGUMENTS: * NetworkAddress = Pointer to address of network * Netmask = Pointer to netmask of network - * NTE = Pointer to NTE to use * Router = Pointer to NCE of router to use * Metric = Cost of this route * RETURNS: * Pointer to FIB entry if the route was added, NULL if not * NOTES: - * The FIB entry references the NetworkAddress, Netmask, NTE and + * The FIB entry references the NetworkAddress, Netmask and * the NCE of the router. The caller is responsible for providing * these references */ { PFIB_ENTRY FIBE; - TI_DbgPrint(DEBUG_ROUTER, ("Called. NetworkAddress (0x%X) Netmask (0x%X) NTE (0x%X) " - "Router (0x%X) Metric (%d).\n", NetworkAddress, Netmask, NTE, Router, Metric)); + TI_DbgPrint(DEBUG_ROUTER, ("Called. NetworkAddress (0x%X) Netmask (0x%X) " + "Router (0x%X) Metric (%d).\n", NetworkAddress, Netmask, Router, Metric)); - TI_DbgPrint(DEBUG_ROUTER, ("NetworkAddress (%s) Netmask (%s) NTE (%s) Router (%s).\n", - A2S(NetworkAddress), A2S(Netmask), A2S(NTE->Address), A2S(Router->Address))); + TI_DbgPrint(DEBUG_ROUTER, ("NetworkAddress (%s) Netmask (%s) Router (%s).\n", + A2S(NetworkAddress), A2S(Netmask), A2S(Router->Address))); FIBE = ExAllocatePool(NonPagedPool, sizeof(FIB_ENTRY)); if (!FIBE) { @@ -304,13 +339,11 @@ PFIB_ENTRY RouterAddRoute( return NULL; } - INIT_TAG(NTE, TAG('N','T','E',' ')); INIT_TAG(Router, TAG('R','O','U','T')); FIBE->Free = FreeFIB; FIBE->NetworkAddress = NetworkAddress; FIBE->Netmask = Netmask; - FIBE->NTE = NTE; FIBE->Router = Router; FIBE->Metric = Metric; @@ -346,8 +379,9 @@ PNEIGHBOR_CACHE_ENTRY RouterGetRoute( TI_DbgPrint(DEBUG_ROUTER, ("Called. Destination (0x%X) NTE (0x%X).\n", Destination, NTE)); - TI_DbgPrint(DEBUG_ROUTER, ("Destination (%s) NTE (%s).\n", - A2S(Destination), A2S(NTE->Address))); + TI_DbgPrint(DEBUG_ROUTER, ("Destination (%s)\n", A2S(Destination))); + if( NTE ) + TI_DbgPrint(DEBUG_ROUTER, ("NTE (%s).\n", A2S(NTE->Address))); KeAcquireSpinLock(&FIBLock, &OldIrql); @@ -476,7 +510,7 @@ PFIB_ENTRY RouterCreateRouteIPv4( ReferenceObject(pNetworkAddress); ReferenceObject(pNetmask); - FIBE = RouterAddRoute(pNetworkAddress, pNetmask, NTE, NCE, 1); + FIBE = RouterAddRoute(pNetworkAddress, pNetmask, NCE, 1); if (!FIBE) { /* Not enough free resources */ NBRemoveNeighbor(NCE); diff --git a/reactos/drivers/net/tcpip/network/transmit.c b/reactos/drivers/net/tcpip/network/transmit.c index fc8168ae656..020ad46e88c 100644 --- a/reactos/drivers/net/tcpip/network/transmit.c +++ b/reactos/drivers/net/tcpip/network/transmit.c @@ -7,6 +7,7 @@ * REVISIONS: * CSH 01/08-2000 Created */ +#include #include #include #include @@ -66,6 +67,7 @@ BOOLEAN PrepareNextFragment( /* Calculate checksum of IP header */ Header->Checksum = 0; Header->Checksum = (USHORT)IPv4Checksum(Header, IFC->HeaderSize, 0); + TI_DbgPrint(MID_TRACE,("IP Check: %x\n", Header->Checksum)); /* Update pointers */ IFC->DatagramData = (PVOID)((ULONG_PTR)IFC->DatagramData + DataSize); @@ -103,23 +105,23 @@ NTSTATUS SendFragments( TI_DbgPrint(MAX_TRACE, ("Called. IPPacket (0x%X) NCE (0x%X) PathMTU (%d).\n", IPPacket, NCE, PathMTU)); - IFC = ExAllocatePool(NonPagedPool, sizeof(IPFRAGMENT_CONTEXT)); + IFC = exAllocatePool(NonPagedPool, sizeof(IPFRAGMENT_CONTEXT)); if (IFC == NULL) return STATUS_INSUFFICIENT_RESOURCES; /* We allocate a buffer for a PathMTU sized packet and reuse it for all fragments */ - Data = ExAllocatePool(NonPagedPool, MaxLLHeaderSize + PathMTU); + Data = exAllocatePool(NonPagedPool, MaxLLHeaderSize + PathMTU); if (Data == NULL) { - ExFreePool(IFC); + exFreePool(IFC); return STATUS_INSUFFICIENT_RESOURCES; } /* Allocate NDIS packet */ NdisAllocatePacket(&NdisStatus, &IFC->NdisPacket, GlobalPacketPool); if (NdisStatus != NDIS_STATUS_SUCCESS) { - ExFreePool(Data); - ExFreePool(IFC); + exFreePool(Data); + exFreePool(IFC); return STATUS_INSUFFICIENT_RESOURCES; } @@ -127,9 +129,9 @@ NTSTATUS SendFragments( NdisAllocateBuffer(&NdisStatus, &IFC->NdisBuffer, GlobalBufferPool, Data, MaxLLHeaderSize + PathMTU); if (NdisStatus != NDIS_STATUS_SUCCESS) { - NdisFreePacket(IFC->NdisPacket); - ExFreePool(Data); - ExFreePool(IFC); + FreeNdisPacket(IFC->NdisPacket); + exFreePool(Data); + exFreePool(IFC); return STATUS_INSUFFICIENT_RESOURCES; } @@ -206,8 +208,11 @@ VOID IPSendComplete( /* There are no more fragments to transmit, so call completion handler */ NdisPacket = IFC->Datagram; FreeNdisPacket(IFC->NdisPacket); - ExFreePool(IFC); - (*PC(NdisPacket)->Complete)(PC(NdisPacket)->Context, NdisPacket, NdisStatus); + exFreePool(IFC); + (*PC(NdisPacket)->Complete) + (PC(NdisPacket)->Context, + NdisPacket, + NdisStatus); } } } @@ -304,10 +309,11 @@ NTSTATUS IPSendDatagram( TI_DbgPrint(MAX_TRACE, ("Called. IPPacket (0x%X) RCN (0x%X)\n", IPPacket, RCN)); DISPLAY_IP_PACKET(IPPacket); + OskitDumpBuffer( IPPacket->Header, IPPacket->TotalSize ); NCE = RCN->NCE; -#if DBG +#ifdef DBG if (!NCE) { TI_DbgPrint(MIN_TRACE, ("No NCE to use.\n")); FreeNdisPacket(IPPacket->NdisPacket); @@ -317,16 +323,20 @@ NTSTATUS IPSendDatagram( /* Fetch path MTU now, because it may change */ PathMTU = RCN->PathMTU; + TI_DbgPrint(MID_TRACE,("PathMTU: %d\n", PathMTU)); if (IPPacket->TotalSize > PathMTU) { + TI_DbgPrint(MID_TRACE,("Doing SendFragments\n")); return SendFragments(IPPacket, NCE, PathMTU); } else { if ((IPPacket->Flags & IP_PACKET_FLAG_RAW) == 0) { /* Calculate checksum of IP header */ + TI_DbgPrint(MID_TRACE,("-> not IP_PACKET_FLAG_RAW\n")); ((PIPv4_HEADER)IPPacket->Header)->Checksum = 0; ((PIPv4_HEADER)IPPacket->Header)->Checksum = (USHORT) IPv4Checksum(IPPacket->Header, IPPacket->HeaderSize, 0); + TI_DbgPrint(MID_TRACE,("IP Check: %x\n", ((PIPv4_HEADER)IPPacket->Header)->Checksum)); TI_DbgPrint(MAX_TRACE, ("Sending packet (length is %d).\n", WN2H(((PIPv4_HEADER)IPPacket->Header)->TotalLength))); diff --git a/reactos/drivers/net/tcpip/notes.txt b/reactos/drivers/net/tcpip/notes.txt index 242d649039c..6d877f4fad7 100644 --- a/reactos/drivers/net/tcpip/notes.txt +++ b/reactos/drivers/net/tcpip/notes.txt @@ -1,4 +1,12 @@ +Ideas for optimizations: + +* transmit.c (SendFragments, IPSendComplete): + Keep IPFRAGMENT_CONTEXT objects and PathMTU buffers in a pool + + +References: + RFC 791 - Internet Protocol http://www.faqs.org/rfcs/rfc791.html diff --git a/reactos/drivers/net/tcpip/tcpip/address.c b/reactos/drivers/net/tcpip/tcpip/address.c index 5828c36d4da..3443675e974 100644 --- a/reactos/drivers/net/tcpip/tcpip/address.c +++ b/reactos/drivers/net/tcpip/tcpip/address.c @@ -7,6 +7,7 @@ * REVISIONS: * CSH 01/08-2000 Created */ +#include #include #include #include @@ -164,7 +165,9 @@ NTSTATUS AddrGetAddress( } } - IPAddress = ExAllocatePool(NonPagedPool, sizeof(IP_ADDRESS)); + IPAddress = ExAllocatePoolWithTag(NonPagedPool, + sizeof(IP_ADDRESS), + FOURCC('I','P','v','4')); if (IPAddress) { AddrInitIPv4(IPAddress, ValidAddr->in_addr); *Address = IPAddress; diff --git a/reactos/drivers/net/tcpip/tcpip/checksum.c b/reactos/drivers/net/tcpip/tcpip/checksum.c index 2566b6b82c4..1a5b45daec5 100644 --- a/reactos/drivers/net/tcpip/tcpip/checksum.c +++ b/reactos/drivers/net/tcpip/tcpip/checksum.c @@ -8,6 +8,7 @@ * REVISIONS: * CSH 01/08-2000 Created */ +#include #include #include diff --git a/reactos/drivers/net/tcpip/tcpip/dispatch.c b/reactos/drivers/net/tcpip/tcpip/dispatch.c index cec2fb22ef5..fe1ec1e7fa5 100644 --- a/reactos/drivers/net/tcpip/tcpip/dispatch.c +++ b/reactos/drivers/net/tcpip/tcpip/dispatch.c @@ -8,6 +8,7 @@ * CSH 01/08-2000 Created * TODO: Validate device object in all dispatch routines */ +#include #include #include #include @@ -53,13 +54,9 @@ NTSTATUS DispPrepareIrpForCancel( Irp->IoStatus.Status = STATUS_CANCELLED; Irp->IoStatus.Information = 0; - TI_DbgPrint(DEBUG_IRP, ("Completing IRP at (0x%X).\n", Irp)); - - IoCompleteRequest(Irp, IO_NETWORK_INCREMENT); - TI_DbgPrint(DEBUG_IRP, ("Leaving (IRP was already cancelled).\n")); - return STATUS_CANCELLED; + return IRPFinish(Irp, STATUS_CANCELLED); } @@ -223,7 +220,7 @@ VOID DispDataRequestComplete( TI_DbgPrint(DEBUG_IRP, ("Completing IRP at (0x%X).\n", Irp)); - IoCompleteRequest(Irp, IO_NETWORK_INCREMENT); + IRPFinish(Irp, STATUS_SUCCESS); } @@ -239,7 +236,7 @@ NTSTATUS DispTdiAccept( { TI_DbgPrint(DEBUG_IRP, ("Called.\n")); - return STATUS_NOT_IMPLEMENTED; + return STATUS_NOT_IMPLEMENTED; } @@ -378,11 +375,14 @@ NTSTATUS DispTdiConnect( Request.RequestNotifyObject = DispDataRequestComplete; Request.RequestContext = Irp; + /* XXX Handle connected UDP, etc... */ Status = TCPConnect( &Request, Parameters->RequestConnectionInformation, Parameters->ReturnConnectionInformation); + TI_DbgPrint(MAX_TRACE, ("TCP Connect returned %08x\n", Status)); + return Status; } @@ -555,36 +555,7 @@ NTSTATUS DispTdiListen( Parameters = (PTDI_REQUEST_KERNEL)&IrpSp->Parameters; - /* Initialize a listen request */ - Request = (PTDI_REQUEST) ExAllocatePool(NonPagedPool, sizeof(TDI_REQUEST)); - if (Request == NULL) - { - return STATUS_NO_MEMORY; - } - - Status = DispPrepareIrpForCancel(TranContext, Irp, NULL); - if (NT_SUCCESS(Status)) - { - Request->Handle.ConnectionContext = TranContext->Handle.ConnectionContext; - Request->RequestNotifyObject = DispDataRequestComplete; - Request->RequestContext = Irp; - - Status = TCPListen( - Request, - Parameters->RequestConnectionInformation, - Parameters->ReturnConnectionInformation); - if (Status != STATUS_PENDING) - { - IoAcquireCancelSpinLock(&OldIrql); - IoSetCancelRoutine(Irp, NULL); - IoReleaseCancelSpinLock(OldIrql); - } - } - - if (Status != STATUS_PENDING) - { - ExFreePool(Request); - } + Status = TCPListen( Request, 1024 /* BACKLOG */ ); return Status; } @@ -683,9 +654,61 @@ NTSTATUS DispTdiReceive( * Status of operation */ { + PIO_STACK_LOCATION IrpSp; + PTDI_REQUEST_KERNEL_RECEIVE ReceiveInfo; + PTRANSPORT_CONTEXT TranContext; + TDI_REQUEST Request; + NTSTATUS Status; + ULONG BytesReceived; + TI_DbgPrint(DEBUG_IRP, ("Called.\n")); - return STATUS_NOT_IMPLEMENTED; + IrpSp = IoGetCurrentIrpStackLocation(Irp); + ReceiveInfo = (PTDI_REQUEST_KERNEL_RECEIVE)&(IrpSp->Parameters); + + TranContext = IrpSp->FileObject->FsContext; + if (TranContext == NULL) + { + TI_DbgPrint(MID_TRACE, ("Bad transport context.\n")); + return STATUS_INVALID_CONNECTION; + } + + if (TranContext->Handle.ConnectionContext == NULL) + { + TI_DbgPrint(MID_TRACE, ("No connection endpoint file object.\n")); + return STATUS_INVALID_CONNECTION; + } + + /* Initialize a receive request */ + Request.Handle.ConnectionContext = TranContext->Handle.ConnectionContext; + Request.RequestNotifyObject = DispDataRequestComplete; + Request.RequestContext = Irp; + Status = DispPrepareIrpForCancel( + IrpSp->FileObject->FsContext, + Irp, + (PDRIVER_CANCEL)DispCancelRequest); + if (NT_SUCCESS(Status)) + { + Status = TCPReceiveData( + &Request, + (PNDIS_BUFFER)Irp->MdlAddress, + ReceiveInfo->ReceiveLength, + ReceiveInfo->ReceiveFlags, + &BytesReceived); + if (Status != STATUS_PENDING) + { + DispDataRequestComplete(Irp, Status, BytesReceived); + } + } + + if (Status != STATUS_PENDING) + { + IrpSp->Control &= ~SL_PENDING_RETURNED; + } + + TI_DbgPrint(DEBUG_IRP, ("Leaving. Status is (0x%X)\n", Status)); + + return Status; } @@ -712,6 +735,12 @@ NTSTATUS DispTdiReceiveDatagram( DgramInfo = (PTDI_REQUEST_KERNEL_RECEIVEDG)&(IrpSp->Parameters); TranContext = IrpSp->FileObject->FsContext; + if (TranContext == NULL) + { + TI_DbgPrint(MID_TRACE, ("Bad transport context.\n")); + return STATUS_INVALID_ADDRESS; + } + /* Initialize a receive request */ Request.Handle.AddressHandle = TranContext->Handle.AddressHandle; Request.RequestNotifyObject = DispDataRequestComplete; @@ -720,22 +749,26 @@ NTSTATUS DispTdiReceiveDatagram( IrpSp->FileObject->FsContext, Irp, (PDRIVER_CANCEL)DispCancelRequest); - if (NT_SUCCESS(Status)) { - Status = UDPReceiveDatagram( - &Request, - DgramInfo->ReceiveDatagramInformation, - (PNDIS_BUFFER)Irp->MdlAddress, - DgramInfo->ReceiveLength, - DgramInfo->ReceiveFlags, - DgramInfo->ReturnDatagramInformation, - &BytesReceived); - if (Status != STATUS_PENDING) { - DispDataRequestComplete(Irp, Status, BytesReceived); - /* Return STATUS_PENDING because DispPrepareIrpForCancel marks - the Irp as pending */ - Status = STATUS_PENDING; + if (NT_SUCCESS(Status)) + { + Status = UDPReceiveDatagram( + &Request, + DgramInfo->ReceiveDatagramInformation, + (PNDIS_BUFFER)Irp->MdlAddress, + DgramInfo->ReceiveLength, + DgramInfo->ReceiveFlags, + DgramInfo->ReturnDatagramInformation, + &BytesReceived); + if (Status != STATUS_PENDING) + { + DispDataRequestComplete(Irp, Status, BytesReceived); + } + } + + if (Status != STATUS_PENDING) + { + IrpSp->Control &= ~SL_PENDING_RETURNED; } - } TI_DbgPrint(DEBUG_IRP, ("Leaving. Status is (0x%X)\n", Status)); @@ -753,9 +786,46 @@ NTSTATUS DispTdiSend( * Status of operation */ { + PIO_STACK_LOCATION IrpSp; + TDI_REQUEST Request; + PTDI_REQUEST_KERNEL_SEND SendInfo; + PTRANSPORT_CONTEXT TranContext; + NTSTATUS Status; + TI_DbgPrint(DEBUG_IRP, ("Called.\n")); - return STATUS_NOT_IMPLEMENTED; + IrpSp = IoGetCurrentIrpStackLocation(Irp); + SendInfo = (PTDI_REQUEST_KERNEL_SEND)&(IrpSp->Parameters); + TranContext = IrpSp->FileObject->FsContext; + + /* Initialize a send request */ + Request.Handle.AddressHandle = TranContext->Handle.AddressHandle; + Request.RequestNotifyObject = DispDataRequestComplete; + Request.RequestContext = Irp; + + Status = DispPrepareIrpForCancel( + IrpSp->FileObject->FsContext, + Irp, + (PDRIVER_CANCEL)DispCancelRequest); + if (NT_SUCCESS(Status)) { + + /* FIXME: DgramInfo->SendDatagramInformation->RemoteAddress + must be of type PTDI_ADDRESS_IP */ + + Status = (*((PADDRESS_FILE)Request.Handle.AddressHandle)->Send)( + &Request, NULL, + (PNDIS_BUFFER)Irp->MdlAddress, SendInfo->SendLength); + if (Status != STATUS_PENDING) { + DispDataRequestComplete(Irp, Status, 0); + /* Return STATUS_PENDING because DispPrepareIrpForCancel + marks Irp as pending */ + Status = STATUS_PENDING; + } + } + + TI_DbgPrint(DEBUG_IRP, ("Leaving.\n")); + + return Status; } @@ -1209,9 +1279,7 @@ NTSTATUS DispTdiSetInformationEx( TI_DbgPrint(DEBUG_IRP, ("Completing IRP at (0x%X).\n", Irp)); - IoCompleteRequest(Irp, IO_NETWORK_INCREMENT); - - return STATUS_INVALID_PARAMETER; + return IRPFinish(Irp, STATUS_INVALID_PARAMETER); } Status = DispPrepareIrpForCancel(TranContext, Irp, NULL); diff --git a/reactos/drivers/net/tcpip/tcpip/fileobjs.c b/reactos/drivers/net/tcpip/tcpip/fileobjs.c index dbfef4f77c4..a482a62152f 100644 --- a/reactos/drivers/net/tcpip/tcpip/fileobjs.c +++ b/reactos/drivers/net/tcpip/tcpip/fileobjs.c @@ -7,6 +7,7 @@ * REVISIONS: * CSH 01/08-2000 Created */ +#include #include #include #include @@ -16,6 +17,7 @@ #include #include #include +#include /* List of all address file objects managed by this driver */ LIST_ENTRY AddressFileListHead; @@ -98,7 +100,8 @@ VOID DeleteAddress( CurrentEntry = AddrFile->TransmitQueue.Flink; while (CurrentEntry != &AddrFile->TransmitQueue) { NextEntry = CurrentEntry->Flink; - SendRequest = CONTAINING_RECORD(CurrentEntry, DATAGRAM_SEND_REQUEST, ListEntry); + SendRequest = CONTAINING_RECORD(CurrentEntry, + DATAGRAM_SEND_REQUEST, ListEntry); /* Abort the request and free its resources */ KeReleaseSpinLock(&AddrFile->Lock, OldIrql); (*SendRequest->Complete)(SendRequest->Context, STATUS_ADDRESS_CLOSED, 0); @@ -309,7 +312,7 @@ NTSTATUS FileOpenAddress( case IPPROTO_TCP: /* FIXME: If specified port is 0, a port is chosen dynamically */ AddrFile->Port = Address->Address[0].Address[0].sin_port; - AddrFile->Send = TCPSendDatagram; + AddrFile->Send = TCPSendData; break; case IPPROTO_UDP: @@ -437,7 +440,9 @@ NTSTATUS FileOpenConnection( PTDI_REQUEST Request, PVOID ClientContext) { + NTSTATUS Status; PCONNECTION_ENDPOINT Connection; + PADDRESS_FILE AddrFile; TI_DbgPrint(MID_TRACE, ("Called.\n")); @@ -455,15 +460,26 @@ NTSTATUS FileOpenConnection( /* Reference the object */ Connection->RefCount = 1; - /* Put connection in the closed state */ - Connection->State = ctClosed; - /* Save client context pointer */ Connection->ClientContext = ClientContext; + Status = OskitTCPSocket( Connection, + &Connection->SocketContext, + AF_INET, + SOCK_STREAM, + IPPROTO_TCP ); + DbgPrint("STATUS from OSKITTCP was %08x\n", Status); + + /* Initialize receive requests queue */ + InitializeListHead(&Connection->ReceiveRequests); - /* Initialize receive queue */ + /* Initialize received segments queue */ InitializeListHead(&Connection->ReceivedSegments); +TI_DbgPrint(MIN_TRACE, ("X1 cur 0x%x\n", &Connection->ReceivedSegments)); +TI_DbgPrint(MIN_TRACE, ("X1 Flink 0x%x\n", Connection->ReceivedSegments.Flink)); +TI_DbgPrint(MIN_TRACE, ("X1 Blink 0x%x\n", Connection->ReceivedSegments.Blink)); + + /* Return connection endpoint file object */ Request->Handle.ConnectionContext = Connection; @@ -497,43 +513,9 @@ NTSTATUS FileCloseConnection( Connection = Request->Handle.ConnectionContext; -#if 0 - KeAcquireSpinLock(&Connection->Lock, &OldIrql); - if ((!AF_IS_BUSY(Connection)) && (Connection->RefCount == 1)) { - /* Set connection endpoint file object exclusive to us */ - AF_SET_BUSY(Connection); - AF_CLR_VALID(Connection); + TCPClose(Request); + DeleteConnectionEndpoint(Connection); - KeReleaseSpinLock(&Connection->Lock, OldIrql); -#endif - DeleteConnectionEndpoint(Connection); -#if 0 - } else { - if (!AF_IS_PENDING(Connection, AFF_DELETE)) { - Connection->Complete = Request->RequestNotifyObject; - Connection->Context = Request->RequestContext; - - /* Shedule connection endpoint for deletion */ - AF_SET_PENDING(Connection, AFF_DELETE); - AF_CLR_VALID(Connection); - - if (!AF_IS_BUSY(Connection)) { - /* Worker function is not running, so shedule it to run */ - AF_SET_BUSY(Connection); - KeReleaseSpinLock(&Connection->Lock, OldIrql); - ExQueueWorkItem(&Connection->WorkItem, CriticalWorkQueue); - } else - KeReleaseSpinLock(&Connection->Lock, OldIrql); - - TI_DbgPrint(MAX_TRACE, ("Leaving (pending).\n")); - - return STATUS_PENDING; - } else - Status = STATUS_ADDRESS_CLOSED; - - KeReleaseSpinLock(&Connection->Lock, OldIrql); - } -#endif TI_DbgPrint(MAX_TRACE, ("Leaving.\n")); return Status; diff --git a/reactos/drivers/net/tcpip/tcpip/iinfo.c b/reactos/drivers/net/tcpip/tcpip/iinfo.c new file mode 100644 index 00000000000..68935d6d87c --- /dev/null +++ b/reactos/drivers/net/tcpip/tcpip/iinfo.c @@ -0,0 +1,108 @@ +/* + * COPYRIGHT: See COPYING in the top level directory + * PROJECT: ReactOS TCP/IP protocol driver + * FILE: tcpip/iinfo.c + * PURPOSE: Per-interface information. + * PROGRAMMERS: Art Yerkes + * REVISIONS: + * CSH 01/08-2000 Created + */ +#include +#include +#include +#include +#include +#include +#include + +TDI_STATUS InfoTdiQueryGetInterfaceMIB(TDIEntityID *ID, + PIP_INTERFACE Interface, + PNDIS_BUFFER Buffer, + PUINT BufferSize) { + TDI_STATUS Status = TDI_INVALID_REQUEST; + PIFENTRY OutData; + PLAN_ADAPTER IF = (PLAN_ADAPTER)Interface->Context; + PCHAR IFDescr; + KIRQL OldIrql; + ULONG Size; + UINT DescrLenMax = MAX_IFDESCR_LEN - 1; + + TI_DbgPrint(MAX_TRACE, + ("Getting IFEntry MIB (IF %08x LA %08x) (%04x:%d)\n", + Interface, IF, ID->tei_entity, ID->tei_instance)); + + OutData = + (PIFENTRY)ExAllocatePool( NonPagedPool, + sizeof(IFENTRY) + MAX_IFDESCR_LEN ); + + if( !OutData ) return TDI_INVALID_REQUEST; /* Out of memory */ + + RtlZeroMemory( OutData, sizeof(IFENTRY) + MAX_IFDESCR_LEN ); + + OutData->Index = ID->tei_instance + 1; + /* viz: tcpip keeps those indices */ + OutData->Type = IF ? 1 : 0; /* XXX other -- for now ... */ + OutData->Mtu = Interface->MTU; + TI_DbgPrint(MAX_TRACE, + ("Getting interface speed\n")); + OutData->PhysAddrLen = Interface->AddressLength; + OutData->AdminStatus = 1; /* XXX Up -- How do I know? */ + OutData->OperStatus = 1; /* XXX Up -- How do I know? */ + + IFDescr = (PCHAR)&OutData[1]; + + if( IF ) { + GetInterfaceSpeed( Interface, &OutData->Speed ); + TI_DbgPrint(MAX_TRACE, + ("IF Speed = %d * 100bps\n", OutData->Speed)); + memcpy(OutData->PhysAddr,Interface->Address,Interface->AddressLength); + TI_DbgPrint(MAX_TRACE, ("Got HWAddr\n")); + GetInterfaceName( Interface, IFDescr, MAX_IFDESCR_LEN - 1 ); + DescrLenMax = strlen( IFDescr ) + 1; + } + + IFDescr[DescrLenMax] = 0; /* Terminate ifdescr string */ + + TI_DbgPrint(MAX_TRACE, ("Copied in name %s\n", IFDescr)); + OutData->DescrLen = DescrLenMax; + IFDescr += DescrLenMax; + Size = IFDescr - (PCHAR)OutData + 1; + + TI_DbgPrint(MAX_TRACE, ("Finished IFEntry MIB (%04x:%d) size %d\n", + ID->tei_entity, ID->tei_instance, Size)); + + Status = InfoCopyOut( OutData, Size, Buffer, BufferSize ); + ExFreePool( OutData ); + + return Status; +} + +TDI_STATUS InfoInterfaceTdiQueryEx( UINT InfoClass, + UINT InfoType, + UINT InfoId, + PVOID Context, + TDIEntityID *id, + PNDIS_BUFFER Buffer, + PUINT BufferSize ) { + if( InfoClass == INFO_CLASS_GENERIC && + InfoType == INFO_TYPE_PROVIDER && + InfoId == ENTITY_TYPE_ID ) { + ULONG Temp = IF_MIB; + return InfoCopyOut( &Temp, sizeof(Temp), Buffer, BufferSize ); + } else if( InfoClass == INFO_CLASS_PROTOCOL && + InfoType == INFO_TYPE_PROVIDER && + InfoId == IF_MIB_STATS_ID ) { + return InfoTdiQueryGetInterfaceMIB( id, Context, Buffer, BufferSize ); + } else + return TDI_INVALID_REQUEST; +} + +TDI_STATUS InfoInterfaceTdiSetEx( UINT InfoClass, + UINT InfoType, + UINT InfoId, + PVOID Context, + TDIEntityID *id, + PCHAR Buffer, + UINT BufferSize ) { + return TDI_INVALID_REQUEST; +} diff --git a/reactos/drivers/net/tcpip/tcpip/info.c b/reactos/drivers/net/tcpip/tcpip/info.c index 2331821ba4d..0a54f6bf480 100644 --- a/reactos/drivers/net/tcpip/tcpip/info.c +++ b/reactos/drivers/net/tcpip/tcpip/info.c @@ -7,321 +7,110 @@ * REVISIONS: * CSH 01/08-2000 Created */ +#include #include #include #include +#include +TDI_STATUS InfoCopyOut( PCHAR DataOut, UINT SizeOut, + PNDIS_BUFFER ClientBuf, PUINT ClientBufSize ) { + UINT RememberedCBSize = *ClientBufSize; + *ClientBufSize = SizeOut; + if( RememberedCBSize < SizeOut ) + return TDI_BUFFER_TOO_SMALL; + else { + CopyBufferToBufferChain( ClientBuf, 0, (PUCHAR)DataOut, SizeOut ); + return TDI_SUCCESS; + } +} -TDI_STATUS IPTdiQueryInformationEx( - PTDI_REQUEST Request, - TDIObjectID *ID, - PNDIS_BUFFER Buffer, - PUINT BufferSize, - PVOID Context) -/* - * FUNCTION: Returns extended information about network layer - * ARGUMENTS: - * Request = Pointer to TDI request structure for the request - * ID = TDI object ID - * Buffer = Pointer to buffer with data to use. - * BufferSize = Pointer to buffer with size of Buffer. On return - * this is filled with number of bytes returned - * Context = Pointer to context buffer - * RETURNS: - * Status of operation - */ -{ - PLIST_ENTRY CurrentIFEntry; - PLIST_ENTRY CurrentADEEntry; - PADDRESS_ENTRY CurrentADE; - PIP_INTERFACE CurrentIF; - IPADDR_ENTRY IpAddress; - IPSNMP_INFO SnmpInfo; - KIRQL OldIrql; - ULONG Entity; - ULONG Temp; - UINT Count; - UINT BufSize; +VOID InsertTDIInterfaceEntity( PIP_INTERFACE Interface ) { + KIRQL OldIrql; + UINT Count = 0, i; - BufSize = *BufferSize; + TI_DbgPrint(MAX_TRACE, + ("Inserting interface %08x (%d entities already)\n", + Interface, EntityCount)); - /* Make return parameters consistent every time */ - *BufferSize = 0; + KeAcquireSpinLock( &EntityListLock, &OldIrql ); - Entity = ID->toi_entity.tei_entity; - if (Entity != CL_NL_ENTITY) - { - /* We can't handle this entity */ - return TDI_INVALID_PARAMETER; + /* Count IP Entities */ + for( i = 0; i < EntityCount; i++ ) + if( EntityList[i].tei_entity == IF_ENTITY ) { + Count++; + TI_DbgPrint(MAX_TRACE, ("Entity %d is an IF. Found %d\n", + i, Count)); + } + + EntityList[EntityCount].tei_entity = IF_ENTITY; + EntityList[EntityCount].tei_instance = Count; + EntityList[EntityCount].context = Interface; + EntityList[EntityCount].info_req = InfoInterfaceTdiQueryEx; + EntityList[EntityCount].info_set = InfoInterfaceTdiSetEx; + + EntityCount++; + + KeReleaseSpinLock( &EntityListLock, OldIrql ); +} + +VOID RemoveTDIInterfaceEntity( PIP_INTERFACE Interface ) { + KIRQL OldIrql; + UINT Count = 0, i; + + KeAcquireSpinLock( &EntityListLock, &OldIrql ); + + /* Remove entities that have this interface as context + * In the future, this might include AT_ENTITY types, too + */ + for( i = 0; i < EntityCount; i++ ) { + if( EntityList[i].context == Interface ) { + if( i != EntityCount-1 ) + memcpy( &EntityList[i], + &EntityList[--EntityCount], + sizeof(EntityList[i]) ); + } } - if (ID->toi_entity.tei_instance != TL_INSTANCE) - { - /* Only a single instance is supported */ - return TDI_INVALID_REQUEST; - } - - if (ID->toi_class == INFO_CLASS_GENERIC) - { - if ((ID->toi_type == INFO_TYPE_PROVIDER) && - (ID->toi_id == ENTITY_TYPE_ID)) - { - if (BufSize < sizeof(ULONG)) - { - return TDI_BUFFER_TOO_SMALL; - } - - Temp = CL_NL_IP; - Count = CopyBufferToBufferChain(Buffer, 0, (PUCHAR)&Temp, sizeof(ULONG)); - - return TDI_SUCCESS; - } - - return TDI_INVALID_PARAMETER; - } - - if (ID->toi_class == INFO_CLASS_PROTOCOL) - { - if (ID->toi_type != INFO_TYPE_PROVIDER) - { - return TDI_INVALID_PARAMETER; - } - - switch (ID->toi_id) - { - case IP_MIB_ADDRTABLE_ENTRY_ID: - Temp = 0; - - KeAcquireSpinLock(&InterfaceListLock, &OldIrql); - - CurrentIFEntry = InterfaceListHead.Flink; - while (CurrentIFEntry != &InterfaceListHead) - { - CurrentIF = CONTAINING_RECORD(CurrentIFEntry, IP_INTERFACE, ListEntry); - - if (Temp + sizeof(IPADDR_ENTRY) > BufSize) - { - KeReleaseSpinLock(&InterfaceListLock, OldIrql); - return TDI_BUFFER_TOO_SMALL; - } - - IpAddress.Addr = 0; - IpAddress.BcastAddr = 0; - IpAddress.Mask = 0; - - /* Locate the diffrent addresses and put them the right place */ - CurrentADEEntry = CurrentIF->ADEListHead.Flink; - while (CurrentADEEntry != &CurrentIF->ADEListHead) - { - CurrentADE = CONTAINING_RECORD(CurrentADEEntry, ADDRESS_ENTRY, ListEntry); - - switch (CurrentADE->Type) - { - case ADE_UNICAST: - IpAddress.Addr = CurrentADE->Address->Address.IPv4Address; - break; - case ADE_MULTICAST: - IpAddress.BcastAddr = CurrentADE->Address->Address.IPv4Address; - break; - case ADE_ADDRMASK: - IpAddress.Mask = CurrentADE->Address->Address.IPv4Address; - break; - default: - /* Should not happen */ - TI_DbgPrint(MIN_TRACE, ("Unknown address entry type (0x%X)\n", CurrentADE->Type)); - break; - } - CurrentADEEntry = CurrentADEEntry->Flink; - } - - /* Pack the address information into IPADDR_ENTRY structure */ - IpAddress.Index = 0; - IpAddress.ReasmSize = 0; - IpAddress.Context = 0; - IpAddress.Pad = 0; - - Count = CopyBufferToBufferChain(Buffer, Temp, (PUCHAR)&IpAddress, sizeof(IPADDR_ENTRY)); - Temp += sizeof(IPADDR_ENTRY); - - CurrentIFEntry = CurrentIFEntry->Flink; - } - - KeReleaseSpinLock(&InterfaceListLock, OldIrql); - - return TDI_SUCCESS; - - case IP_MIB_STATS_ID: - if (BufSize < sizeof(IPSNMP_INFO)) - { - return TDI_BUFFER_TOO_SMALL; - } - - RtlZeroMemory(&SnmpInfo, sizeof(IPSNMP_INFO)); - - /* Count number of interfaces */ - Count = 0; - KeAcquireSpinLock(&InterfaceListLock, &OldIrql); - - CurrentIFEntry = InterfaceListHead.Flink; - while (CurrentIFEntry != &InterfaceListHead) - { - Count++; - CurrentIFEntry = CurrentIFEntry->Flink; - } - - KeReleaseSpinLock(&InterfaceListLock, OldIrql); - - SnmpInfo.NumIf = Count; - - /* Count number of addresses */ - Count = 0; - KeAcquireSpinLock(&InterfaceListLock, &OldIrql); - - CurrentIFEntry = InterfaceListHead.Flink; - while (CurrentIFEntry != &InterfaceListHead) - { - CurrentIF = CONTAINING_RECORD(CurrentIFEntry, IP_INTERFACE, ListEntry); - Count++; - CurrentIFEntry = CurrentIFEntry->Flink; - } - - KeReleaseSpinLock(&InterfaceListLock, OldIrql); - - SnmpInfo.NumAddr = Count; - Count = CopyBufferToBufferChain(Buffer, 0, (PUCHAR)&SnmpInfo, sizeof(IPSNMP_INFO)); - - return TDI_SUCCESS; - - default: - /* We can't handle this ID */ - return TDI_INVALID_PARAMETER; - } - } - - return TDI_INVALID_PARAMETER; + KeReleaseSpinLock( &EntityListLock, OldIrql ); } TDI_STATUS InfoTdiQueryListEntities(PNDIS_BUFFER Buffer, - UINT BufSize, PUINT BufferSize) { - UINT Count, Size, Temp; + UINT Count, Size, BufSize = *BufferSize; KIRQL OldIrql; + TDIEntityID *EntityOutList; PLIST_ENTRY CurrentIFEntry; - /* Count Adapters */ - KeAcquireSpinLock(&InterfaceListLock, &OldIrql); + TI_DbgPrint(MAX_TRACE,("About to copy %d TDIEntityIDs to user\n", + EntityCount)); - CurrentIFEntry = InterfaceListHead.Flink; - Count = EntityCount; - - while( CurrentIFEntry != &InterfaceListHead ) { - Count++; - CurrentIFEntry = CurrentIFEntry->Flink; - } - - KeReleaseSpinLock(&InterfaceListLock, OldIrql); - - Size = Count * sizeof(TDIEntityID); + KeAcquireSpinLock(&EntityListLock, &OldIrql); + + Size = EntityCount * sizeof(TDIEntityID); *BufferSize = Size; if (BufSize < Size) { + KeReleaseSpinLock( &EntityListLock, OldIrql ); /* The buffer is too small to contain requested data */ return TDI_BUFFER_TOO_SMALL; } - - DbgPrint("About to copy %d TDIEntityIDs (%d bytes) to user\n", - Count, Size); - - KeAcquireSpinLock(&EntityListLock, &OldIrql); - - /* Update entity list */ - for( Temp = EntityCount; Temp < Count; Temp++ ) { - EntityList[Temp].tei_entity = IF_ENTITY; - EntityList[Temp].tei_instance = Temp - EntityCount; + + /* Return entity list -- Copy only the TDIEntityID parts. */ + for( Count = 0; Count < EntityCount; Count++ ) { + CopyBufferToBufferChain(Buffer, + Count * sizeof(TDIEntityID), + (PUCHAR)&EntityList[Count], + sizeof(TDIEntityID)); } - EntityMax = Count; - - /* Return entity list */ - Count = CopyBufferToBufferChain(Buffer, 0, (PUCHAR)EntityList, Size); KeReleaseSpinLock(&EntityListLock, OldIrql); - *BufferSize = Size; - return TDI_SUCCESS; } -TDI_STATUS InfoTdiQueryGetInterfaceMIB(TDIObjectID *ID, - PNDIS_BUFFER Buffer, - UINT BufSize, - PUINT BufferSize) { - PIFENTRY OutData; - UINT ListedIfIndex, Count, Size; - PLIST_ENTRY CurrentADEEntry; - PADDRESS_ENTRY CurrentADE; - PLIST_ENTRY CurrentIFEntry; - PIP_INTERFACE CurrentIF; - PCHAR IFDescr; - KIRQL OldIrql; - - OutData = ExAllocatePool( NonPagedPool, - sizeof(IFENTRY) + MAX_IFDESCR_LEN ); - - if( !OutData ) return STATUS_NO_MEMORY; - - RtlZeroMemory( OutData,sizeof(IFENTRY) + MAX_IFDESCR_LEN ); - - KeAcquireSpinLock(&EntityListLock, &OldIrql); - ListedIfIndex = ID->toi_entity.tei_instance - EntityCount; - if( ListedIfIndex > EntityMax ) { - KeReleaseSpinLock(&EntityListLock,OldIrql); - return TDI_INVALID_REQUEST; - } - - CurrentIFEntry = InterfaceListHead.Flink; - - for( Count = 0; Count < ListedIfIndex; Count++ ) - CurrentIFEntry = CurrentIFEntry->Flink; - - CurrentIF = CONTAINING_RECORD(CurrentIFEntry, IP_INTERFACE, ListEntry); - - CurrentADEEntry = CurrentIF->ADEListHead.Flink; - if( CurrentADEEntry == &CurrentIF->ADEListHead ) { - KeReleaseSpinLock( &EntityListLock, OldIrql ); - return TDI_INVALID_REQUEST; - } - - CurrentADE = CONTAINING_RECORD(CurrentADEEntry, ADDRESS_ENTRY, ListEntry); - - OutData->Index = Count + 1; /* XXX - arty What goes here?? */ - OutData->Type = CurrentADE->Type; - OutData->Mtu = CurrentIF->MTU; - OutData->Speed = 10000000; /* XXX - arty Not sure */ - memcpy(OutData->PhysAddr, - CurrentIF->Address,CurrentIF->AddressLength); - OutData->PhysAddrLen = CurrentIF->AddressLength; - OutData->AdminStatus = TRUE; - OutData->OperStatus = TRUE; - IFDescr = (PCHAR)&OutData[1]; - strcpy(IFDescr,"ethernet adapter"); - OutData->DescrLen = strlen(IFDescr); - IFDescr = IFDescr + strlen(IFDescr); - Size = IFDescr - (PCHAR)OutData; - - KeReleaseSpinLock(&InterfaceListLock, OldIrql); - - *BufferSize = Size; - - if( BufSize < Size ) { - return TDI_BUFFER_TOO_SMALL; - } else { - CopyBufferToBufferChain(Buffer, 0, (PUCHAR)&OutData, Size); - return TDI_SUCCESS; - } -} - TDI_STATUS InfoTdiQueryInformationEx( PTDI_REQUEST Request, TDIObjectID *ID, @@ -341,170 +130,72 @@ TDI_STATUS InfoTdiQueryInformationEx( * Status of operation */ { - PLIST_ENTRY CurrentIFEntry; - PLIST_ENTRY CurrentADEEntry; - PLIST_ENTRY CurrentADFEntry; - PADDRESS_FILE CurrentADF; - PADDRESS_ENTRY CurrentADE; - PIP_INTERFACE CurrentIF; - ADDRESS_INFO Info; - KIRQL OldIrql; - UINT BufSize; - UINT Entity; - UINT Offset; - ULONG Temp; - UINT Count; - UINT Size; - TDIEntityID EntityId; + KIRQL OldIrql; + UINT i; + PVOID context; + NTSTATUS Status = STATUS_SUCCESS; + TDIEntityID EntityId; + BOOL FoundEntity = FALSE; + InfoRequest_f InfoRequest; - Offset = 0; - BufSize = *BufferSize; + TI_DbgPrint(MAX_TRACE, + ("InfoEx Req: %x %x %x!%04x:%d\n", + ID->toi_class, + ID->toi_type, + ID->toi_id, + ID->toi_entity.tei_entity, + ID->toi_entity.tei_instance)); - /* Check wether it is a query for a list of entities */ - Entity = ID->toi_entity.tei_entity; - if (Entity == GENERIC_ENTITY) + /* Check wether it is a query for a list of entities */ + if (ID->toi_entity.tei_entity == GENERIC_ENTITY) { - if ((ID->toi_class != INFO_CLASS_GENERIC) || - (ID->toi_type != INFO_TYPE_PROVIDER) || - (ID->toi_id != ENTITY_LIST_ID)) - { - return TDI_INVALID_PARAMETER; - } - - return InfoTdiQueryListEntities(Buffer, BufSize, BufferSize); - } - - /* Get an IFENTRY */ - if (ID->toi_class == INFO_CLASS_PROTOCOL && - ID->toi_type == INFO_TYPE_PROVIDER && - ID->toi_id == IF_MIB_STATS_ID) - { - if(ID->toi_entity.tei_entity != IF_ENTITY) - return TDI_INVALID_REQUEST; - - return InfoTdiQueryGetInterfaceMIB(ID, Buffer, BufSize, BufferSize); - } - - if ((Entity != CL_TL_ENTITY) && (Entity != CO_TL_ENTITY)) - { - /* We can't handle this entity, pass it on */ - return IPTdiQueryInformationEx( - Request, ID, Buffer, BufferSize, Context); - } - - /* Make return parameters consistent every time */ - *BufferSize = 0; - - if (ID->toi_entity.tei_instance != TL_INSTANCE) - { - /* We only support a single instance */ - return TDI_INVALID_REQUEST; - } - - if (ID->toi_class == INFO_CLASS_GENERIC) - { - if ((ID->toi_type != INFO_TYPE_PROVIDER) || - (ID->toi_id != ENTITY_TYPE_ID)) - return TDI_INVALID_PARAMETER; - - if (BufSize < sizeof(ULONG)) - { - return TDI_BUFFER_TOO_SMALL; - } - - if (Entity == CL_TL_ENTITY) - { - Temp = CL_TL_UDP; - } - else if (Entity == CO_TL_ENTITY) - { - Temp = CO_TL_TCP; - } + if ((ID->toi_class != INFO_CLASS_GENERIC) || + (ID->toi_type != INFO_TYPE_PROVIDER) || + (ID->toi_id != ENTITY_LIST_ID)) + Status = TDI_INVALID_PARAMETER; else - { - return TDI_INVALID_PARAMETER; - } - - Count = CopyBufferToBufferChain(Buffer, 0, (PUCHAR)&Temp, sizeof(ULONG)); - - return TDI_SUCCESS; + Status = InfoTdiQueryListEntities(Buffer, BufferSize); + } else { + KeAcquireSpinLock( &EntityListLock, &OldIrql ); + + for( i = 0; i < EntityCount; i++ ) { + if( EntityList[i].tei_entity == ID->toi_entity.tei_entity && + EntityList[i].tei_instance == ID->toi_entity.tei_instance ) { + InfoRequest = EntityList[i].info_req; + context = EntityList[i].context; + FoundEntity = TRUE; + break; + } + } + + KeReleaseSpinLock( &EntityListLock, OldIrql ); + + if( FoundEntity ) { + TI_DbgPrint(MAX_TRACE, + ("Calling Entity %d (%04x:%d) InfoEx (%x,%x,%x)\n", + i, ID->toi_entity.tei_entity, + ID->toi_entity.tei_instance, + ID->toi_class, ID->toi_type, ID->toi_id)); + Status = InfoRequest( ID->toi_class, + ID->toi_type, + ID->toi_id, + context, + &ID->toi_entity, + Buffer, + BufferSize ); + } } - if (ID->toi_class == INFO_CLASS_PROTOCOL) - { - if (ID->toi_type != INFO_TYPE_PROVIDER) - { - return TDI_INVALID_PARAMETER; - } + TI_DbgPrint(MAX_TRACE,("Status: %08x\n", Status)); - switch (ID->toi_id) - { - case UDP_MIB_STAT_ID: - if (Entity != CL_TL_ENTITY) - { - return TDI_INVALID_PARAMETER; - } - - if (BufSize < sizeof(UDPStats)) - { - return TDI_BUFFER_TOO_SMALL; - } - - Count = CopyBufferToBufferChain(Buffer, 0, (PUCHAR)&UDPStats, sizeof(UDP_STATISTICS)); - - return TDI_SUCCESS; - - case UDP_MIB_TABLE_ID: - if (Entity != CL_TL_ENTITY) - { - return TDI_INVALID_PARAMETER; - } - - Offset = 0; - - KeAcquireSpinLock(&AddressFileListLock, &OldIrql); - - CurrentADFEntry = AddressFileListHead.Flink; - while (CurrentADFEntry != &AddressFileListHead) - { - CurrentADF = CONTAINING_RECORD(CurrentADFEntry, ADDRESS_FILE, ListEntry); - - if (Offset + sizeof(ADDRESS_INFO) > BufSize) - { - KeReleaseSpinLock(&AddressFileListLock, OldIrql); - *BufferSize = Offset; - return TDI_BUFFER_OVERFLOW; - } - - Info.LocalAddress = CurrentADF->ADE->Address->Address.IPv4Address; - Info.LocalPort = CurrentADF->Port; - - Count = CopyBufferToBufferChain(Buffer, Offset, (PUCHAR)&Info, sizeof(ADDRESS_INFO)); - Offset += Count; - - CurrentADFEntry = CurrentADFEntry->Flink; - } - - KeReleaseSpinLock(&AddressFileListLock, OldIrql); - - *BufferSize = Offset; - - return STATUS_SUCCESS; - - default: - /* We can't handle this ID */ - return TDI_INVALID_PARAMETER; - } - } - - return TDI_INVALID_PARAMETER; + return Status; } -TDI_STATUS InfoTdiSetInformationEx( - PTDI_REQUEST Request, - TDIObjectID *ID, - PVOID Buffer, - UINT BufferSize) +TDI_STATUS InfoTdiSetInformationEx +(PTDI_REQUEST Request, + TDIObjectID *ID, + PVOID Buffer, + UINT BufferSize) /* * FUNCTION: Sets extended information * ARGUMENTS: @@ -516,30 +207,30 @@ TDI_STATUS InfoTdiSetInformationEx( * Status of operation */ { - switch( ID->toi_class ) { - case INFO_CLASS_PROTOCOL: - switch( ID->toi_type ) { - case INFO_TYPE_PROVIDER: - switch( ID->toi_id ) { - case IP_MIB_ROUTETABLE_ENTRY_ID: - if( ID->toi_entity.tei_entity == CL_NL_ENTITY && - ID->toi_entity.tei_instance == TL_INSTANCE && - BufferSize >= sizeof(IPROUTE_ENTRY) ) { - /* Add route -- buffer is an IPRouteEntry */ - PIPROUTE_ENTRY ire = (PIPROUTE_ENTRY)Buffer; - RouteFriendlyAddRoute( ire ); - } else { - return TDI_INVALID_PARAMETER; - /* In my experience, we are being over - protective compared to windows */ + switch( ID->toi_class ) { + case INFO_CLASS_PROTOCOL: + switch( ID->toi_type ) { + case INFO_TYPE_PROVIDER: + switch( ID->toi_id ) { + case IP_MIB_ROUTETABLE_ENTRY_ID: + if( ID->toi_entity.tei_entity == CL_NL_ENTITY && + ID->toi_entity.tei_instance == TL_INSTANCE && + BufferSize >= sizeof(IPROUTE_ENTRY) ) { + /* Add route -- buffer is an IPRouteEntry */ + PIPROUTE_ENTRY ire = (PIPROUTE_ENTRY)Buffer; + RouteFriendlyAddRoute( ire ); + } else { + return TDI_INVALID_PARAMETER; + /* In my experience, we are being over + protective compared to windows */ + } + break; + } + break; } break; - } - break; } - break; - } - - return TDI_INVALID_PARAMETER; -} + return TDI_INVALID_PARAMETER; +} + diff --git a/reactos/drivers/net/tcpip/tcpip/interface.c b/reactos/drivers/net/tcpip/tcpip/interface.c new file mode 100644 index 00000000000..f19f1022e28 --- /dev/null +++ b/reactos/drivers/net/tcpip/tcpip/interface.c @@ -0,0 +1,108 @@ +/* + * COPYRIGHT: See COPYING in the top level directory + * PROJECT: ReactOS TCP/IP protocol driver + * FILE: tcpip/interface.c + * PURPOSE: Convenient abstraction for getting and setting information + * in IP_INTERFACE. + * PROGRAMMERS: Art Yerkes + * REVISIONS: + * CSH 01/08-2000 Created + */ +#include +#include +#include +#include +#include +#include + +NTSTATUS GetInterfaceIPv4Address( PIP_INTERFACE Interface, + ULONG TargetType, + PULONG Address ) { + PLIST_ENTRY CurrentIFEntry; + PLIST_ENTRY CurrentADEEntry; + PADDRESS_ENTRY CurrentADE; + + CurrentADEEntry = Interface->ADEListHead.Flink; + while (CurrentADEEntry != &Interface->ADEListHead) + { + CurrentADE = CONTAINING_RECORD(CurrentADEEntry, ADDRESS_ENTRY, ListEntry); + if (CurrentADE->Type == TargetType) { + *Address = CurrentADE->Address->Address.IPv4Address; + return STATUS_SUCCESS; + } + CurrentADEEntry = CurrentADEEntry->Flink; + } + + return STATUS_UNSUCCESSFUL; +} + +UINT CountInterfaces() { + DWORD Count = 0; + KIRQL OldIrql; + PLIST_ENTRY CurrentIFEntry; + + KeAcquireSpinLock(&InterfaceListLock, &OldIrql); + + CurrentIFEntry = InterfaceListHead.Flink; + while (CurrentIFEntry != &InterfaceListHead) { + Count++; + CurrentIFEntry = CurrentIFEntry->Flink; + } + + KeReleaseSpinLock(&InterfaceListLock, OldIrql); + + return Count; +} + +UINT CountInterfaceAddresses( PIP_INTERFACE Interface ) { + UINT AddrCount = 0; + PADDRESS_ENTRY CurrentADE; + PLIST_ENTRY CurrentADEntry; + + CurrentADEntry = Interface->ADEListHead.Flink; + + while( CurrentADEntry != &Interface->ADEListHead ) { + CurrentADEntry = CurrentADEntry->Flink; + CurrentADE = CONTAINING_RECORD(CurrentADEntry, + ADDRESS_ENTRY, + ListEntry); + if( CurrentADE->Type == ADE_UNICAST ) + AddrCount++; + } + + return AddrCount; +} + +NTSTATUS GetInterfaceSpeed( PIP_INTERFACE Interface, PUINT Speed ) { + NDIS_STATUS NdisStatus; + PLAN_ADAPTER IF = (PLAN_ADAPTER)Interface->Context; + + /* Get maximum link speed */ + NdisStatus = NDISCall(IF, + NdisRequestQueryInformation, + OID_GEN_LINK_SPEED, + Speed, + sizeof(UINT)); + + return + NdisStatus != NDIS_STATUS_SUCCESS ? + STATUS_UNSUCCESSFUL : STATUS_SUCCESS; +} + +NTSTATUS GetInterfaceName( PIP_INTERFACE Interface, + PCHAR NameBuffer, + UINT Len ) { + NDIS_STATUS NdisStatus; + PLAN_ADAPTER IF = (PLAN_ADAPTER)Interface->Context; + + /* Get maximum link speed */ + NdisStatus = NDISCall(IF, + NdisRequestQueryInformation, + OID_GEN_FRIENDLY_NAME, + NameBuffer, + Len); + + return + NdisStatus != NDIS_STATUS_SUCCESS ? + STATUS_UNSUCCESSFUL : STATUS_SUCCESS; +} diff --git a/reactos/drivers/net/tcpip/tcpip/irp.c b/reactos/drivers/net/tcpip/tcpip/irp.c new file mode 100644 index 00000000000..a19e10fabc2 --- /dev/null +++ b/reactos/drivers/net/tcpip/tcpip/irp.c @@ -0,0 +1,30 @@ +/* + * COPYRIGHT: See COPYING in the top level directory + * PROJECT: ReactOS TCP/IP protocol driver + * FILE: tcpip/dispatch.h + * PURPOSE: TDI dispatch routines + * PROGRAMMERS: Casper S. Hornstrup (chorns@users.sourceforge.net) + * REVISIONS: + * CSH 01/08-2000 Created + * TODO: Validate device object in all dispatch routines + */ +#include +#include +#include +#include +#include +#include + +NTSTATUS IRPFinish( PIRP Irp, NTSTATUS Status ) { + IoSetCancelRoutine( Irp, NULL ); + + if( Status == STATUS_PENDING ) + IoMarkIrpPending( Irp ); + else { + Irp->IoStatus.Status = Status; + IoCompleteRequest( Irp, IO_NETWORK_INCREMENT ); + } + + return Status; +} + diff --git a/reactos/drivers/net/tcpip/tcpip/main.c b/reactos/drivers/net/tcpip/tcpip/main.c index 1aec90bb6b4..4fa181cfa1c 100644 --- a/reactos/drivers/net/tcpip/tcpip/main.c +++ b/reactos/drivers/net/tcpip/tcpip/main.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include #define NDEBUG @@ -33,7 +35,7 @@ PDEVICE_OBJECT RawIPDeviceObject = NULL; NDIS_HANDLE GlobalPacketPool = NULL; NDIS_HANDLE GlobalBufferPool = NULL; KSPIN_LOCK EntityListLock; -TDIEntityID *EntityList = NULL; +TDIEntityInfo *EntityList = NULL; ULONG EntityCount = 0; ULONG EntityMax = 0; UDP_STATISTICS UDPStats; @@ -456,18 +458,9 @@ TiDispatchOpenClose( Status = STATUS_INVALID_DEVICE_REQUEST; } - if (Status != STATUS_PENDING) { - IrpSp->Control &= ~SL_PENDING_RETURNED; - Irp->IoStatus.Status = Status; - - TI_DbgPrint(DEBUG_IRP, ("Completing IRP at (0x%X).\n", Irp)); - - IoCompleteRequest(Irp, IO_NETWORK_INCREMENT); - } - TI_DbgPrint(DEBUG_IRP, ("Leaving. Status is (0x%X)\n", Status)); - return Status; + return IRPFinish( Irp, Status ); } @@ -487,7 +480,7 @@ TiDispatchInternal( * Status of the operation */ { - NTSTATUS Status; + NTSTATUS Status; PIO_STACK_LOCATION IrpSp; IrpSp = IoGetCurrentIrpStackLocation(Irp); @@ -560,17 +553,9 @@ TiDispatchInternal( Status = STATUS_INVALID_DEVICE_REQUEST; } - if (Status != STATUS_PENDING) { - Irp->IoStatus.Status = Status; - - TI_DbgPrint(DEBUG_IRP, ("Completing IRP at (0x%X).\n", Irp)); - - IoCompleteRequest(Irp, IO_NETWORK_INCREMENT); - } - TI_DbgPrint(DEBUG_IRP, ("Leaving. Status = (0x%X).\n", Status)); - return Status; + return IRPFinish( Irp, Status ); } @@ -627,17 +612,9 @@ TiDispatch( } } - if (Status != STATUS_PENDING) { - Irp->IoStatus.Status = Status; - - TI_DbgPrint(DEBUG_IRP, ("Completing IRP at (0x%X).\n", Irp)); - - IoCompleteRequest(Irp, IO_NETWORK_INCREMENT); - } - TI_DbgPrint(DEBUG_IRP, ("Leaving. Status = (0x%X).\n", Status)); - return Status; + return IRPFinish( Irp, Status ); } @@ -726,6 +703,14 @@ DriverEntry( NDIS_STRING DeviceName; TI_DbgPrint(MAX_TRACE, ("Called.\n")); + + TrackingInit(); + TrackTag(NDIS_BUFFER_TAG); + TrackTag(NDIS_PACKET_TAG); + TrackTag(FBSD_MALLOC); + TrackTag(EXALLOC_TAG); + + InitOskitTCP(); /* TdiInitialize() ? */ @@ -770,6 +755,28 @@ DriverEntry( return Status; } + /* Setup network layer and transport layer entities */ + KeInitializeSpinLock(&EntityListLock); + EntityList = ExAllocatePool(NonPagedPool, sizeof(TDIEntityID) * MAX_TDI_ENTITIES ); + if (!NT_SUCCESS(Status)) { + TI_DbgPrint(MIN_TRACE, ("Insufficient resources.\n")); + TiUnload(DriverObject); + return STATUS_INSUFFICIENT_RESOURCES; + } + + EntityList[0].tei_entity = CL_NL_ENTITY; + EntityList[0].tei_instance = 0; + EntityList[0].context = 0; + EntityList[0].info_req = InfoNetworkLayerTdiQueryEx; + EntityList[0].info_set = InfoNetworkLayerTdiSetEx; + EntityList[1].tei_entity = CL_TL_ENTITY; + EntityList[1].tei_instance = 0; + EntityList[1].context = 0; + EntityList[1].info_req = InfoTransportLayerTdiQueryEx; + EntityList[1].info_set = InfoTransportLayerTdiSetEx; + EntityCount = 2; + EntityMax = MAX_TDI_ENTITIES; + /* Allocate NDIS packet descriptors */ NdisAllocatePacketPool(&NdisStatus, &GlobalPacketPool, 100, sizeof(PACKET_CONTEXT)); if (NdisStatus != NDIS_STATUS_SUCCESS) { @@ -830,22 +837,6 @@ DriverEntry( return STATUS_INSUFFICIENT_RESOURCES; } - /* Setup network layer and transport layer entities */ - KeInitializeSpinLock(&EntityListLock); - EntityList = ExAllocatePool(NonPagedPool, sizeof(TDIEntityID) * MAX_TDI_ENTITIES ); - if (!NT_SUCCESS(Status)) { - TI_DbgPrint(MIN_TRACE, ("Insufficient resources.\n")); - TiUnload(DriverObject); - return STATUS_INSUFFICIENT_RESOURCES; - } - - EntityList[0].tei_entity = CL_NL_ENTITY; - EntityList[0].tei_instance = 0; - EntityList[1].tei_entity = CL_TL_ENTITY; - EntityList[1].tei_instance = 0; - EntityCount = 2; - EntityMax = MAX_TDI_ENTITIES; - /* Use direct I/O */ IPDeviceObject->Flags |= DO_DIRECT_IO; RawIPDeviceObject->Flags |= DO_DIRECT_IO; diff --git a/reactos/drivers/net/tcpip/tcpip/memtrack.c b/reactos/drivers/net/tcpip/tcpip/memtrack.c new file mode 100644 index 00000000000..ea75ca65563 --- /dev/null +++ b/reactos/drivers/net/tcpip/tcpip/memtrack.c @@ -0,0 +1,146 @@ +#define MEMTRACK_NO_POOL +#include +#include +#include +#include + +#ifdef MEMTRACK +LIST_ENTRY AllocatedObjectsList; +KSPIN_LOCK AllocatedObjectsLock; +DWORD TagsToShow[MEMTRACK_MAX_TAGS_TO_TRACK] = { 0 }; + +VOID TrackTag( DWORD Tag ) { + UINT i; + + for( i = 0; TagsToShow[i]; i++ ); + TagsToShow[i] = Tag; +} + +VOID TrackingInit() { + KeInitializeSpinLock( &AllocatedObjectsLock ); + InitializeListHead( &AllocatedObjectsList ); +} + +VOID ShowTrackedThing( PCHAR What, PALLOCATION_TRACKER Thing, + PCHAR File, UINT Line ) { + /* if( ShowTag( Thing->Tag ) ) */ + if( File ) { + DbgPrint( "[%s] Thing %08x %c%c%c%c (%s:%d) (Called from %s:%d)\n", + What, + Thing->Thing, + ((PCHAR)&Thing->Tag)[3], + ((PCHAR)&Thing->Tag)[2], + ((PCHAR)&Thing->Tag)[1], + ((PCHAR)&Thing->Tag)[0], + Thing->FileName, + Thing->LineNo, + File, Line ); + } else { + DbgPrint( "[%s] Thing %08x %c%c%c%c (%s:%d)\n", + What, + Thing->Thing, + ((PCHAR)&Thing->Tag)[3], + ((PCHAR)&Thing->Tag)[2], + ((PCHAR)&Thing->Tag)[1], + ((PCHAR)&Thing->Tag)[0], + Thing->FileName, + Thing->LineNo ); + } +} + +VOID TrackWithTag( DWORD Tag, PVOID Thing, PCHAR FileName, DWORD LineNo ) { + PALLOCATION_TRACKER TrackedThing = + ExAllocatePool( NonPagedPool, sizeof(*TrackedThing) ); + + KIRQL OldIrql; + PLIST_ENTRY Entry; + PALLOCATION_TRACKER ThingInList; + + KeAcquireSpinLock( &AllocatedObjectsLock, &OldIrql ); + Entry = AllocatedObjectsList.Flink; + while( Entry != &AllocatedObjectsList ) { + ThingInList = CONTAINING_RECORD(Entry, ALLOCATION_TRACKER, Entry); + if( ThingInList->Thing == Thing ) { + RemoveEntryList(Entry); + + ShowTrackedThing( "Alloc", ThingInList, FileName, LineNo ); + + ExFreePool( ThingInList ); + TrackDumpFL( FileName, LineNo ); + KeReleaseSpinLock( &AllocatedObjectsLock, OldIrql ); + DbgPrint("TRACK: SPECIFIED ALREADY ALLOCATED ITEM %x\n", Thing); + KeBugCheck( 0 ); + } + Entry = Entry->Flink; + } + + KeReleaseSpinLock( &AllocatedObjectsLock, OldIrql ); + + if( TrackedThing ) { + TrackedThing->Tag = Tag; + TrackedThing->Thing = Thing; + TrackedThing->FileName = FileName; + TrackedThing->LineNo = LineNo; + + ExInterlockedInsertTailList( &AllocatedObjectsList, + &TrackedThing->Entry, + &AllocatedObjectsLock ); + ShowTrackedThing( "Alloc", TrackedThing, FileName, LineNo ); + } + + /*TrackDumpFL( FileName, LineNo );*/ +} + +BOOL ShowTag( DWORD Tag ) { + UINT i; + + for( i = 0; TagsToShow[i] && TagsToShow[i] != Tag; i++ ); + + return TagsToShow[i] ? TRUE : FALSE; +} + +VOID UntrackFL( PCHAR File, DWORD Line, PVOID Thing ) { + KIRQL OldIrql; + PLIST_ENTRY Entry; + PALLOCATION_TRACKER ThingInList; + + KeAcquireSpinLock( &AllocatedObjectsLock, &OldIrql ); + Entry = AllocatedObjectsList.Flink; + while( Entry != &AllocatedObjectsList ) { + ThingInList = CONTAINING_RECORD(Entry, ALLOCATION_TRACKER, Entry); + if( ThingInList->Thing == Thing ) { + RemoveEntryList(Entry); + + ShowTrackedThing( "Free ", ThingInList, File, Line ); + + ExFreePool( ThingInList ); + KeReleaseSpinLock( &AllocatedObjectsLock, OldIrql ); + /* TrackDumpFL( File, Line ); */ + return; + } + Entry = Entry->Flink; + } + KeReleaseSpinLock( &AllocatedObjectsLock, OldIrql ); + TrackDumpFL( File, Line ); + DbgPrint("UNTRACK: SPECIFIED ALREADY FREE ITEM %x\n", Thing); + KeBugCheck( 0 ); +} + +VOID TrackDumpFL( PCHAR File, DWORD Line ) { + KIRQL OldIrql; + PLIST_ENTRY Entry; + PALLOCATION_TRACKER Thing; + + DbgPrint("Dump: %s:%d\n", File, Line); + + KeAcquireSpinLock( &AllocatedObjectsLock, &OldIrql ); + Entry = AllocatedObjectsList.Flink; + while( Entry != &AllocatedObjectsList ) { + Thing = CONTAINING_RECORD(Entry, ALLOCATION_TRACKER, Entry); + ShowTrackedThing( "Dump ", Thing, 0, 0 ); + Entry = Entry->Flink; + } + KeReleaseSpinLock( &AllocatedObjectsLock, OldIrql ); +} + +#endif/*MEMTRACK*/ diff --git a/reactos/drivers/net/tcpip/tcpip/ninfo.c b/reactos/drivers/net/tcpip/tcpip/ninfo.c new file mode 100644 index 00000000000..f6f61f8e595 --- /dev/null +++ b/reactos/drivers/net/tcpip/tcpip/ninfo.c @@ -0,0 +1,248 @@ +/* + * COPYRIGHT: See COPYING in the top level directory + * PROJECT: ReactOS TCP/IP protocol driver + * FILE: tcpip/ninfo.c + * PURPOSE: Network information + * PROGRAMMERS: Art Yerkes + * REVISIONS: + * CSH 01/08-2000 Created + */ +#include +#include +#include +#include +#include +#include +#include +#include + +TDI_STATUS InfoTdiQueryGetAddrTable( PNDIS_BUFFER Buffer, + PUINT BufferSize ) { + PIP_INTERFACE CurrentIF; + PLIST_ENTRY CurrentIFEntry; + TDI_STATUS Status = TDI_INVALID_REQUEST; + KIRQL OldIrql; + UINT Count = 1; /* Start adapter indices at 1 */ + UINT IfCount = CountInterfaces(); + PIPADDR_ENTRY IpAddress = + ExAllocatePool( NonPagedPool, sizeof( IPADDR_ENTRY ) * IfCount ); + PIPADDR_ENTRY IpCurrent = IpAddress; + + TI_DbgPrint(MAX_TRACE, ("Called.\n")); + + KeAcquireSpinLock(&InterfaceListLock, &OldIrql); + + CurrentIFEntry = InterfaceListHead.Flink; + while (CurrentIFEntry != &InterfaceListHead) + { + CurrentIF = CONTAINING_RECORD(CurrentIFEntry, IP_INTERFACE, ListEntry); + + IpCurrent->Index = Count; + IpCurrent->Addr = 0; + IpCurrent->BcastAddr = 0; + IpCurrent->Mask = 0; + + /* Locate the diffrent addresses and put them the right place */ + GetInterfaceIPv4Address( CurrentIF, + ADE_UNICAST, + &IpAddress->Addr ); + GetInterfaceIPv4Address( CurrentIF, + ADE_MULTICAST, + &IpAddress->BcastAddr ); + GetInterfaceIPv4Address( CurrentIF, + ADE_ADDRMASK, + &IpAddress->Mask ); + IpCurrent++; + CurrentIFEntry = CurrentIFEntry->Flink; + Count++; + } + + KeReleaseSpinLock(&InterfaceListLock, OldIrql); + + Status = InfoCopyOut( IpAddress, sizeof(*IpAddress) * Count, + Buffer, BufferSize ); + + ExFreePool( IpAddress ); + + TI_DbgPrint(MAX_TRACE, ("Returning %08x\n", Status)); + + return Status; +} + +/* Get IPRouteEntry s for each of the routes in the system */ +TDI_STATUS InfoTdiQueryGetRouteTable( PNDIS_BUFFER Buffer, PUINT BufferSize ) { + PIP_INTERFACE CurrentIF; + PLIST_ENTRY CurrentIFEntry; + TDI_STATUS Status; + KIRQL OldIrql; + UINT RtCount = CountFIBs(), + Size = sizeof( IPROUTE_ENTRY ) * RtCount; + PFIB_ENTRY RCache = + ExAllocatePool( NonPagedPool, sizeof( FIB_ENTRY ) * RtCount ), + RCacheCur = RCache; + PIPROUTE_ENTRY RouteEntries = ExAllocatePool( NonPagedPool, Size ), + RtCurrent = RouteEntries; + + TI_DbgPrint(MAX_TRACE, ("Called, routes = %d, RCache = %08x\n", + RtCount, RCache)); + + if( !RCache || !RouteEntries ) { + if( RCache ) ExFreePool( RCache ); + if( RouteEntries ) ExFreePool( RouteEntries ); + return STATUS_NO_MEMORY; + } + + RtlZeroMemory( RouteEntries, Size ); + + RtCount = CopyFIBs( RCache ); + + while( RtCurrent < RouteEntries + RtCount ) { + /* Copy Desitnation */ + if( RCacheCur->NetworkAddress && RCacheCur->Netmask && + RCacheCur->Router && RCacheCur->Router->Address ) { + TI_DbgPrint(MAX_TRACE, ("%d: NA %08x NM %08x GW %08x MT %d\n", + RtCurrent - RouteEntries, + RCacheCur->NetworkAddress->Address, + RCacheCur->Netmask->Address, + RCacheCur->Router->Address->Address, + RCacheCur->Metric)); + + RtlCopyMemory( &RtCurrent->Dest, + &RCacheCur->NetworkAddress->Address, + sizeof(RtCurrent->Dest) ); + RtlCopyMemory( &RtCurrent->Mask, + &RCacheCur->Netmask->Address, + sizeof(RtCurrent->Mask) ); + /* Currently, this address is stuffed into the pointer. + * That probably is not intended. */ + RtlCopyMemory( &RtCurrent->Gw, + &RCacheCur->Router->Address->Address, + sizeof(RtCurrent->Gw) ); + RtCurrent->Metric1 = RCacheCur->Metric; + RtCurrent->Type = 2 /* PF_INET */; + + KeAcquireSpinLock(&EntityListLock, &OldIrql); + for( RtCurrent->Index = EntityCount - 1; + RtCurrent->Index >= 0 && + RCacheCur->Router->Interface != + EntityList[RtCurrent->Index].context; + RtCurrent->Index-- ); + RtCurrent->Index = EntityList[RtCurrent->Index].tei_instance; + KeReleaseSpinLock(&EntityListLock, OldIrql); + } else { + TI_DbgPrint(MAX_TRACE, ("%d: BAD: NA %08x NM %08x GW %08x MT %d\n", + RtCurrent - RouteEntries, + RCacheCur->NetworkAddress, + RCacheCur->Netmask, + RCacheCur->Router, + RCacheCur->Router ? + RCacheCur->Router->Address : 0, + RCacheCur->Metric)); + } + RtCurrent++; RCacheCur++; + } + + Status = InfoCopyOut( RouteEntries, Size, Buffer, BufferSize ); + + ExFreePool( RouteEntries ); + ExFreePool( RCache ); + + TI_DbgPrint(MAX_TRACE, ("Returning %08x\n", Status)); + + return Status; +} + +TDI_STATUS InfoTdiQueryGetIPSnmpInfo( PNDIS_BUFFER Buffer, + PUINT BufferSize ) { + KIRQL OldIrql; + PIP_INTERFACE CurrentIF; + PLIST_ENTRY CurrentIFEntry; + IPSNMP_INFO SnmpInfo; + UINT IfCount = CountInterfaces(); + UINT AddrCount = 0; + UINT RouteCount = CountRouteNodes( NULL ); + TDI_STATUS Status = TDI_INVALID_REQUEST; + + TI_DbgPrint(MAX_TRACE, ("Called.\n")); + + RtlZeroMemory(&SnmpInfo, sizeof(IPSNMP_INFO)); + + /* Count number of addresses */ + AddrCount = 0; + KeAcquireSpinLock(&InterfaceListLock, &OldIrql); + + CurrentIFEntry = InterfaceListHead.Flink; + while (CurrentIFEntry != &InterfaceListHead) + { + CurrentIF = CONTAINING_RECORD(CurrentIFEntry, IP_INTERFACE, ListEntry); + AddrCount += CountInterfaceAddresses( CurrentIF ); + CurrentIFEntry = CurrentIFEntry->Flink; + } + + KeReleaseSpinLock(&InterfaceListLock, OldIrql); + + SnmpInfo.NumIf = IfCount; + SnmpInfo.NumAddr = AddrCount; + SnmpInfo.NumRoutes = RouteCount; + + Status = InfoCopyOut( &SnmpInfo, sizeof(SnmpInfo), + Buffer, BufferSize ); + + TI_DbgPrint(MAX_TRACE, ("Returning %08x\n", Status)); + + return Status; +} + +TDI_STATUS InfoNetworkLayerTdiQueryEx( UINT InfoClass, + UINT InfoType, + UINT InfoId, + PVOID Context, + TDIEntityID *id, + PNDIS_BUFFER Buffer, + PUINT BufferSize ) { + TDI_STATUS Status = TDI_INVALID_REQUEST; + + TI_DbgPrint(MAX_TRACE, ("Called.\n")); + + switch( InfoClass ) { + case INFO_CLASS_GENERIC: + if( InfoType == INFO_TYPE_PROVIDER && InfoId == ENTITY_TYPE_ID ) { + ULONG Return = CL_NL_IP; + Status = InfoCopyOut( &Return, sizeof(Return), + Buffer, BufferSize ); + } + break; + + case INFO_CLASS_PROTOCOL: + switch( InfoType ) { + case INFO_TYPE_PROVIDER: + switch( InfoId ) { + case IP_MIB_ADDRTABLE_ENTRY_ID: + Status = InfoTdiQueryGetAddrTable( Buffer, BufferSize ); + break; + + case IP_MIB_ROUTETABLE_ENTRY_ID: + Status = InfoTdiQueryGetRouteTable( Buffer, BufferSize ); + break; + + case IP_MIB_STATS_ID: + Status = InfoTdiQueryGetIPSnmpInfo( Buffer, BufferSize ); + break; + } + break; + } + } + + TI_DbgPrint(MAX_TRACE, ("Returning %08x\n", Status)); + + return Status; +} + +TDI_STATUS InfoNetworkLayerTdiSetEx( UINT InfoClass, + UINT InfoType, + UINT InfoId, + PVOID Context, + TDIEntityID *id, + PCHAR Buffer, + UINT BufferSize ) { +} diff --git a/reactos/drivers/net/tcpip/tcpip/pool.c b/reactos/drivers/net/tcpip/tcpip/pool.c index ca6782289e3..00b764f9e17 100644 --- a/reactos/drivers/net/tcpip/tcpip/pool.c +++ b/reactos/drivers/net/tcpip/tcpip/pool.c @@ -7,6 +7,7 @@ * REVISIONS: * CSH 01/08-2000 Created */ +#include #include #include diff --git a/reactos/drivers/net/tcpip/tcpip/routines.c b/reactos/drivers/net/tcpip/tcpip/routines.c index dbf303d230b..cfbac1d14df 100644 --- a/reactos/drivers/net/tcpip/tcpip/routines.c +++ b/reactos/drivers/net/tcpip/tcpip/routines.c @@ -7,6 +7,7 @@ * REVISIONS: * CSH 01/08-2000 Created */ +#include #include #include #include @@ -16,89 +17,6 @@ static UINT RandomNumber = 0x12345678; -inline NTSTATUS BuildDatagramSendRequest( - PDATAGRAM_SEND_REQUEST *SendRequest, - PIP_ADDRESS RemoteAddress, - USHORT RemotePort, - PNDIS_BUFFER Buffer, - DWORD BufferSize, - DATAGRAM_COMPLETION_ROUTINE Complete, - PVOID Context, - DATAGRAM_BUILD_ROUTINE Build, - ULONG Flags) -/* - * FUNCTION: Allocates and intializes a datagram send request - * ARGUMENTS: - * SendRequest = Pointer to datagram send request - * RemoteAddress = Pointer to remote IP address - * RemotePort = Remote port number - * Buffer = Pointer to NDIS buffer to send - * BufferSize = Size of Buffer - * Complete = Completion routine - * Context = Pointer to context information - * Build = Datagram build routine - * Flags = Protocol specific flags - * RETURNS: - * Status of operation - */ -{ - PDATAGRAM_SEND_REQUEST Request; - - Request = ExAllocatePool(NonPagedPool, sizeof(DATAGRAM_SEND_REQUEST)); - if (!Request) - return STATUS_INSUFFICIENT_RESOURCES; - - InitializeDatagramSendRequest( - Request, - RemoteAddress, - RemotePort, - Buffer, - BufferSize, - Complete, - Context, - Build, - Flags); - - *SendRequest = Request; - - return STATUS_SUCCESS; -} - - -inline NTSTATUS BuildTCPSendRequest( - PTCP_SEND_REQUEST *SendRequest, - DATAGRAM_COMPLETION_ROUTINE Complete, - PVOID Context, - PVOID ProtocolContext) -/* - * FUNCTION: Allocates and intializes a TCP send request - * ARGUMENTS: - * SendRequest = Pointer to TCP send request - * Complete = Completion routine - * Context = Pointer to context information - * ProtocolContext = Protocol specific context - * RETURNS: - * Status of operation - */ -{ - PTCP_SEND_REQUEST Request; - - Request = ExAllocatePool(NonPagedPool, sizeof(TCP_SEND_REQUEST)); - if (!Request) - return STATUS_INSUFFICIENT_RESOURCES; - - InitializeTCPSendRequest( - Request, - Complete, - Context, - ProtocolContext); - - *SendRequest = Request; - - return STATUS_SUCCESS; -} - - UINT Random( VOID) /* @@ -389,35 +307,6 @@ UINT CopyPacketToBufferChain( } -VOID FreeNdisPacket( - PNDIS_PACKET Packet) -/* - * FUNCTION: Frees an NDIS packet - * ARGUMENTS: - * Packet = Pointer to NDIS packet to be freed - */ -{ - PNDIS_BUFFER Buffer, NextBuffer; - - TI_DbgPrint(DEBUG_BUFFER, ("Packet (0x%X)\n", Packet)); - - /* Free all the buffers in the packet first */ - NdisQueryPacket(Packet, NULL, NULL, &Buffer, NULL); - for (; Buffer != NULL; Buffer = NextBuffer) { - PVOID Data; - UINT Length; - - NdisGetNextBuffer(Buffer, &NextBuffer); - NdisQueryBuffer(Buffer, &Data, &Length); - NdisFreeBuffer(Buffer); - ExFreePool(Data); - } - - /* Finally free the NDIS packet discriptor */ - NdisFreePacket(Packet); -} - - PVOID AdjustPacket( PNDIS_PACKET Packet, UINT Available, @@ -538,32 +427,21 @@ VOID DisplayIPPacket( for (; Buffer != NULL; Buffer = NextBuffer) { NdisGetNextBuffer(Buffer, &NextBuffer); NdisQueryBuffer(Buffer, (PVOID)&p, &Length); - - for (i = 0; i < Length; i++) { - if (i % 16 == 0) - DbgPrint("\n"); - DbgPrint("%02X ", (p[i]) & 0xFF); - } - DbgPrint("\n"); + OskitDumpBuffer( p, Length ); } } else { p = IPPacket->Header; Length = IPPacket->ContigSize; - for (i = 0; i < Length; i++) { - if (i % 16 == 0) - DbgPrint("\n"); - DbgPrint("%02X ", (p[i]) & 0xFF); - } - DbgPrint("\n"); + OskitDumpBuffer( p, Length ); } if (IPPacket->NdisPacket) { NdisQueryPacket(IPPacket->NdisPacket, NULL, NULL, NULL, &Length); Length -= MaxLLHeaderSize; - CharBuffer = ExAllocatePool(NonPagedPool, Length); + CharBuffer = exAllocatePool(NonPagedPool, Length); Length = CopyPacketToBuffer(CharBuffer, IPPacket->NdisPacket, MaxLLHeaderSize, Length); DisplayIPHeader(CharBuffer, Length); - ExFreePool(CharBuffer); + exFreePool(CharBuffer); } else { CharBuffer = IPPacket->Header; Length = IPPacket->ContigSize; @@ -633,10 +511,10 @@ VOID DisplayTCPPacket( if (IPPacket->NdisPacket) { NdisQueryPacket(IPPacket->NdisPacket, NULL, NULL, NULL, &Length); Length -= MaxLLHeaderSize; - Buffer = ExAllocatePool(NonPagedPool, Length); + Buffer = exAllocatePool(NonPagedPool, Length); Length = CopyPacketToBuffer(Buffer, IPPacket->NdisPacket, MaxLLHeaderSize, Length); DisplayTCPHeader(Buffer, Length); - ExFreePool(Buffer); + exFreePool(Buffer); } else { Buffer = IPPacket->Header; Length = IPPacket->ContigSize; @@ -644,4 +522,90 @@ VOID DisplayTCPPacket( } } -#endif /* DBG */ +#endif DBG /* DBG */ + +void GetDataPtr( PNDIS_PACKET Packet, + UINT Offset, + PUCHAR *DataOut, + PUINT Size ) { + PNDIS_BUFFER Buffer; + + NdisQueryPacket(Packet, NULL, NULL, &Buffer, NULL); + if( !Buffer ) return NULL; + SkipToOffset( Buffer, Offset, DataOut, Size ); +} + + +#undef NdisAllocatePacket +#undef NdisAllocateBuffer +#undef NdisFreeBuffer +#undef NdisFreePacket + +NDIS_STATUS AllocatePacketWithBufferX( PNDIS_PACKET *NdisPacket, + PCHAR Data, UINT Len, + PCHAR File, UINT Line ) { + PNDIS_PACKET Packet; + PNDIS_BUFFER Buffer; + NDIS_STATUS Status; + PCHAR NewData; + + NewData = ExAllocatePool( NonPagedPool, Len ); + if( !NewData ) return NDIS_STATUS_NOT_ACCEPTED; // XXX + TrackWithTag(EXALLOC_TAG, NewData, File, Line); + + if( Data ) + RtlCopyMemory(NewData, Data, Len); + + NdisAllocatePacket( &Status, &Packet, GlobalPacketPool ); + if( Status != NDIS_STATUS_SUCCESS ) { + ExFreePool( NewData ); + return Status; + } + TrackWithTag(NDIS_PACKET_TAG, Packet, File, Line); + + NdisAllocateBuffer( &Status, &Buffer, GlobalBufferPool, NewData, Len ); + if( Status != NDIS_STATUS_SUCCESS ) { + ExFreePool( NewData ); + FreeNdisPacket( Packet ); + } + TrackWithTag(NDIS_BUFFER_TAG, Buffer, File, Line); + + NdisChainBufferAtFront( Packet, Buffer ); + *NdisPacket = Packet; + + return NDIS_STATUS_SUCCESS; +} + + +VOID FreeNdisPacketX +( PNDIS_PACKET Packet, + PCHAR File, + UINT Line ) +/* + * FUNCTION: Frees an NDIS packet + * ARGUMENTS: + * Packet = Pointer to NDIS packet to be freed + */ +{ + PNDIS_BUFFER Buffer, NextBuffer; + + TI_DbgPrint(DEBUG_BUFFER, ("Packet (0x%X)\n", Packet)); + + /* Free all the buffers in the packet first */ + NdisQueryPacket(Packet, NULL, NULL, &Buffer, NULL); + for (; Buffer != NULL; Buffer = NextBuffer) { + PVOID Data; + UINT Length; + + NdisGetNextBuffer(Buffer, &NextBuffer); + NdisQueryBuffer(Buffer, &Data, &Length); + NdisFreeBuffer(Buffer); + UntrackFL(File,Line,Buffer); + ExFreePool(Data); + UntrackFL(File,Line,Data); + } + + /* Finally free the NDIS packet discriptor */ + NdisFreePacket(Packet); + UntrackFL(File,Line,Packet); +} diff --git a/reactos/drivers/net/tcpip/tcpip/tinfo.c b/reactos/drivers/net/tcpip/tcpip/tinfo.c new file mode 100644 index 00000000000..6e2ee7628b2 --- /dev/null +++ b/reactos/drivers/net/tcpip/tcpip/tinfo.c @@ -0,0 +1,40 @@ +/* + * COPYRIGHT: See COPYING in the top level directory + * PROJECT: ReactOS TCP/IP protocol driver + * FILE: tcpip/tinfo.c + * PURPOSE: Transport layer information + * PROGRAMMERS: Art Yerkes + * REVISIONS: + * CSH 01/08-2000 Created + */ +#include +#include +#include +#include +#include + +TDI_STATUS InfoTransportLayerTdiQueryEx( UINT InfoClass, + UINT InfoType, + UINT InfoId, + PVOID Context, + TDIEntityID *id, + PNDIS_BUFFER Buffer, + PUINT BufferSize ) { + if( InfoClass == INFO_CLASS_GENERIC && + InfoType == INFO_TYPE_PROVIDER && + InfoId == ENTITY_TYPE_ID ) { + ULONG Temp = CL_TL_UDP; + return InfoCopyOut( &Temp, sizeof(Temp), Buffer, BufferSize ); + } + + return TDI_INVALID_REQUEST; +} + +TDI_STATUS InfoTransportLayerTdiSetEx( UINT InfoClass, + UINT InfoType, + UINT InfoId, + PVOID Context, + TDIEntityID *id, + PCHAR Buffer, + UINT BufferSize ) { +} diff --git a/reactos/drivers/net/tcpip/transport/datagram/datagram.c b/reactos/drivers/net/tcpip/transport/datagram/datagram.c index a7beb2746c5..14d6b2ca1d3 100644 --- a/reactos/drivers/net/tcpip/transport/datagram/datagram.c +++ b/reactos/drivers/net/tcpip/transport/datagram/datagram.c @@ -7,6 +7,7 @@ * REVISIONS: * CSH 01/08-2000 Created */ +#include #include #include #include @@ -124,13 +125,6 @@ VOID SendDatagramComplete( CompleteContext = SendRequest->Context; BytesSent = SendRequest->BufferSize; - /* Remove data buffer before releasing memory for packet buffers */ - NdisQueryPacket(Packet, NULL, NULL, &NdisBuffer, NULL); - NdisUnchainBufferAtBack(Packet, &NdisBuffer); - FreeNdisPacket(Packet); - DereferenceObject(SendRequest->RemoteAddress); - ExFreePool(SendRequest); - /* If there are pending send requests, shedule worker function */ KeAcquireSpinLock(&DGPendingListLock, &OldIrql); QueueWorkItem = (!IsListEmpty(&DGPendingListHead)); @@ -141,7 +135,7 @@ VOID SendDatagramComplete( TI_DbgPrint(MAX_TRACE, ("Calling 0x%X.\n", Complete)); /* Call completion routine for send request */ - (*Complete)(CompleteContext, NdisStatus, BytesSent); + (*Complete)(Context, NdisStatus, BytesSent); TI_DbgPrint(MAX_TRACE, ("Leaving.\n")); } @@ -168,8 +162,6 @@ VOID DGSend( TI_DbgPrint(MAX_TRACE, ("Called.\n")); - ASSERT(SendRequest->Build); - /* Get the information we need from the address file now so we minimize the time we hold the spin lock */ KeAcquireSpinLock(&AddrFile->Lock, &OldIrql); @@ -182,71 +174,73 @@ VOID DGSend( /* Loop until there are no more send requests in the transmit queue or until we run out of resources */ for (;;) - { - Status = (*SendRequest->Build)(SendRequest, ADE->Address, LocalPort, &IPPacket); + { + TI_DbgPrint(MIN_TRACE, ("Looping on DGSend !!!! WHEE!\n")); if (!NT_SUCCESS(Status)) - { + { KeAcquireSpinLock(&AddrFile->Lock, &OldIrql); /* An error occurred, enqueue the send request again and return */ InsertHeadList(&AddrFile->TransmitQueue, &SendRequest->ListEntry); DereferenceObject(ADE); KeReleaseSpinLock(&AddrFile->Lock, OldIrql); - + TI_DbgPrint(MIN_TRACE, ("Leaving (insufficient resources).\n")); return; - } - + } + /* Get a route to the destination address */ - if (RouteGetRouteToDestination(SendRequest->RemoteAddress, ADE->NTE, &RCN) == IP_SUCCESS) - { + if (RouteGetRouteToDestination(&SendRequest->RemoteAddress, ADE->NTE, &RCN) == IP_SUCCESS) + { /* Set completion routine and send the packet */ + IPPacket = &SendRequest->Packet; PC(IPPacket->NdisPacket)->Complete = SendDatagramComplete; PC(IPPacket->NdisPacket)->Context = SendRequest; if (IPSendDatagram(IPPacket, RCN) != STATUS_SUCCESS) - { - SendDatagramComplete(SendRequest, - IPPacket->NdisPacket, - NDIS_STATUS_REQUEST_ABORTED); - } + { + TI_DbgPrint(MIN_TRACE, ("!! Datagram sent !! (completing)\n")); + SendDatagramComplete(SendRequest, + IPPacket->NdisPacket, + NDIS_STATUS_REQUEST_ABORTED); + } /* We're done with the RCN */ DereferenceObject(RCN); - } + } else - { - /* No route to destination */ - /* FIXME: Which error code should we use here? */ - TI_DbgPrint(MIN_TRACE, ("No route to destination address (0x%X).\n", - SendRequest->RemoteAddress->Address.IPv4Address)); - SendDatagramComplete(SendRequest, - IPPacket->NdisPacket, - NDIS_STATUS_REQUEST_ABORTED); - } + { + /* No route to destination */ + /* FIXME: Which error code should we use here? */ + TI_DbgPrint(MIN_TRACE, + ("No route to destination address (0x%X).\n", + SendRequest->RemoteAddress.Address.IPv4Address)); + SendDatagramComplete(SendRequest, + IPPacket->NdisPacket, + NDIS_STATUS_REQUEST_ABORTED); + } - (*IPPacket->Free)(IPPacket); - /* Check transmit queue for more to send */ - + KeAcquireSpinLock(&AddrFile->Lock, &OldIrql); - + if (!IsListEmpty(&AddrFile->TransmitQueue)) - { + { /* Transmit queue is not empty, process one more request */ CurrentEntry = RemoveHeadList(&AddrFile->TransmitQueue); SendRequest = CONTAINING_RECORD(CurrentEntry, DATAGRAM_SEND_REQUEST, ListEntry); - + KeReleaseSpinLock(&AddrFile->Lock, OldIrql); - } + TI_DbgPrint(MIN_TRACE, ("List is not empty\n")); + } else - { + { /* Transmit queue is empty */ AF_CLR_PENDING(AddrFile, AFF_SEND); DereferenceObject(ADE); KeReleaseSpinLock(&AddrFile->Lock, OldIrql); - + TI_DbgPrint(MAX_TRACE, ("Leaving (empty queue).\n")); return; - } - } + } + } } @@ -346,7 +340,7 @@ VOID DGDeliverData( { DereferenceObject(Current->RemoteAddress); } - ExFreePool(Current); + exFreePool(Current); } } else if (AddrFile->RegisteredReceiveDatagramHandler) @@ -431,8 +425,7 @@ VOID DGCancelSendRequest( { /* Complete the request and free its resources */ (*Current->Complete)(Current->Context, STATUS_CANCELLED, 0); - DereferenceObject(Current->RemoteAddress); - ExFreePool(Current); + exFreePool(Current); } else { @@ -489,7 +482,7 @@ VOID DGCancelReceiveRequest( { DereferenceObject(Current->RemoteAddress); } - ExFreePool(Current); + exFreePool(Current); } else { @@ -528,83 +521,80 @@ NTSTATUS DGTransmit( KeReleaseSpinLock(&AddressFile->Lock, OldIrql); /* Send the datagram */ DGSend(AddressFile, SendRequest); - TI_DbgPrint(MAX_TRACE, ("Leaving (pending).\n")); + TI_DbgPrint(MAX_TRACE, ("Leaving (pending).\n")); } return STATUS_PENDING; } -NTSTATUS DGSendDatagram( - PTDI_REQUEST Request, - PTDI_CONNECTION_INFORMATION ConnInfo, - PNDIS_BUFFER Buffer, - ULONG DataSize, - DATAGRAM_BUILD_ROUTINE Build) +NTSTATUS DGSendDatagram( PTDI_REQUEST Request, + PTDI_CONNECTION_INFORMATION ConnInfo, + PIP_PACKET Packet ) { /* * FUNCTION: Sends a datagram to a remote address * ARGUMENTS: * Request = Pointer to TDI request * ConnInfo = Pointer to connection information - * Buffer = Pointer to NDIS buffer with data - * DataSize = Size in bytes of data to be sent - * Build = Pointer to datagram build routine + * Packet = Pointer to NDIS buffer with data * RETURNS: * Status of operation */ -{ - PADDRESS_FILE AddrFile; - KIRQL OldIrql; - NTSTATUS Status; - PDATAGRAM_SEND_REQUEST SendRequest; - - TI_DbgPrint(MAX_TRACE, ("Called.\n")); - - AddrFile = Request->Handle.AddressHandle; - - KeAcquireSpinLock(&AddrFile->Lock, &OldIrql); - - if (AF_IS_VALID(AddrFile)) - { - /* Initialize a send request */ - Status = BuildDatagramSendRequest(&SendRequest, - NULL, - 0, - Buffer, - DataSize, - Request->RequestNotifyObject, - Request->RequestContext, - Build, - 0); - if (NT_SUCCESS(Status)) - { - Status = AddrGetAddress(ConnInfo->RemoteAddress, - &SendRequest->RemoteAddress, - &SendRequest->RemotePort, - &AddrFile->AddrCache); - if (NT_SUCCESS(Status)) - { - KeReleaseSpinLock(&AddrFile->Lock, OldIrql); - return DGTransmit(AddrFile, SendRequest); - } - else - { - ExFreePool(SendRequest); - } - } - else - { - Status = STATUS_INSUFFICIENT_RESOURCES; - } + PADDRESS_FILE AddrFile; + KIRQL OldIrql; + NTSTATUS Status; + PDATAGRAM_SEND_REQUEST SendRequest; + + TI_DbgPrint(MAX_TRACE, ("Called.\n")); + + AddrFile = Request->Handle.AddressHandle; + + KeAcquireSpinLock(&AddrFile->Lock, &OldIrql); + + if (AF_IS_VALID(AddrFile)) { + /* Initialize a send request */ + SendRequest = exAllocatePool( NonPagedPool, + sizeof( DATAGRAM_SEND_REQUEST ) ); + + if( SendRequest ) { + KeReleaseSpinLock(&AddrFile->Lock, OldIrql); + return STATUS_INSUFFICIENT_RESOURCES; + } + + SendRequest->Complete = Request->RequestNotifyObject; + SendRequest->Context = Request->RequestContext; + NdisQueryPacketLength( Packet->NdisPacket, + &SendRequest->BufferSize ); + SendRequest->Packet = *Packet; + + if (NT_SUCCESS(Status)) { + Status = AddrGetAddress(ConnInfo->RemoteAddress, + &SendRequest->RemoteAddress, + &SendRequest->RemotePort, + &AddrFile->AddrCache); + if (NT_SUCCESS(Status)) + { + KeReleaseSpinLock(&AddrFile->Lock, OldIrql); + return DGTransmit(AddrFile, SendRequest); + } + else + { + exFreePool(SendRequest); + } + } + else + { + Status = STATUS_INSUFFICIENT_RESOURCES; + } } - else + else { - Status = STATUS_ADDRESS_CLOSED; + Status = STATUS_ADDRESS_CLOSED; } - - KeReleaseSpinLock(&AddrFile->Lock, OldIrql); - - TI_DbgPrint(MAX_TRACE, ("Leaving. Status (0x%X)\n", Status)); - - return Status; + + KeReleaseSpinLock(&AddrFile->Lock, OldIrql); + + TI_DbgPrint(MAX_TRACE, ("Leaving. Status (0x%X)\n", Status)); + + return Status; } @@ -645,7 +635,7 @@ NTSTATUS DGReceiveDatagram( if (AF_IS_VALID(AddrFile)) { - ReceiveRequest = ExAllocatePool(NonPagedPool, sizeof(DATAGRAM_RECEIVE_REQUEST)); + ReceiveRequest = exAllocatePool(NonPagedPool, sizeof(DATAGRAM_RECEIVE_REQUEST)); if (ReceiveRequest) { /* Initialize a receive request */ @@ -660,7 +650,7 @@ NTSTATUS DGReceiveDatagram( if (!NT_SUCCESS(Status)) { KeReleaseSpinLock(&AddrFile->Lock, OldIrql); - ExFreePool(ReceiveRequest); + exFreePool(ReceiveRequest); return Status; } } diff --git a/reactos/drivers/net/tcpip/transport/rawip/rawip.c b/reactos/drivers/net/tcpip/transport/rawip/rawip.c index 16c5130c967..d4181cdae0a 100644 --- a/reactos/drivers/net/tcpip/transport/rawip/rawip.c +++ b/reactos/drivers/net/tcpip/transport/rawip/rawip.c @@ -7,6 +7,7 @@ * REVISIONS: * CSH 01/08-2000 Created */ +#include #include #include #include @@ -21,8 +22,7 @@ BOOLEAN RawIPInitialized = FALSE; NTSTATUS BuildRawIPPacket( PVOID Context, PIP_ADDRESS LocalAddress, - USHORT LocalPort, - PIP_PACKET *IPPacket) + USHORT LocalPort ) /* * FUNCTION: Builds an UDP packet * ARGUMENTS: @@ -35,43 +35,26 @@ NTSTATUS BuildRawIPPacket( */ { PVOID Header; - PIP_PACKET Packet; NDIS_STATUS NdisStatus; PNDIS_BUFFER HeaderBuffer; PDATAGRAM_SEND_REQUEST SendRequest = (PDATAGRAM_SEND_REQUEST)Context; - - TI_DbgPrint(MAX_TRACE, ("TCPIP.SYS: NDIS data buffer is at (0x%X).\n", SendRequest->Buffer)); - TI_DbgPrint(MAX_TRACE, ("NDIS data buffer Next is at (0x%X).\n", SendRequest->Buffer->Next)); - TI_DbgPrint(MAX_TRACE, ("NDIS data buffer Size is (0x%X).\n", SendRequest->Buffer->Size)); - TI_DbgPrint(MAX_TRACE, ("NDIS data buffer MappedSystemVa is (0x%X).\n", SendRequest->Buffer->MappedSystemVa)); - TI_DbgPrint(MAX_TRACE, ("NDIS data buffer StartVa is (0x%X).\n", SendRequest->Buffer->StartVa)); - TI_DbgPrint(MAX_TRACE, ("NDIS data buffer ByteCount is (0x%X).\n", SendRequest->Buffer->ByteCount)); - TI_DbgPrint(MAX_TRACE, ("NDIS data buffer ByteOffset is (0x%X).\n", SendRequest->Buffer->ByteOffset)); + PIP_PACKET Packet = &SendRequest->Packet; /* Prepare packet */ /* FIXME: Assumes IPv4 */ - Packet = IPCreatePacket(IP_ADDRESS_V4); if (!Packet) return STATUS_INSUFFICIENT_RESOURCES; + IPInitializePacket(Packet,IP_ADDRESS_V4); Packet->Flags = IP_PACKET_FLAG_RAW; /* Don't touch IP header */ Packet->TotalSize = SendRequest->BufferSize; - /* Allocate NDIS packet */ - NdisAllocatePacket(&NdisStatus, &Packet->NdisPacket, GlobalPacketPool); - if (NdisStatus != NDIS_STATUS_SUCCESS) { - TI_DbgPrint(MIN_TRACE, ("Cannot allocate NDIS packet. NdisStatus = (0x%X)\n", NdisStatus)) - (*Packet->Free)(Packet); - return STATUS_INSUFFICIENT_RESOURCES; - } - if (MaxLLHeaderSize != 0) { Header = ExAllocatePool(NonPagedPool, MaxLLHeaderSize); if (!Header) { TI_DbgPrint(MIN_TRACE, ("Cannot allocate memory for packet headers.\n")); - NdisFreePacket(Packet->NdisPacket); - (*Packet->Free)(Packet); + FreeNdisPacket(Packet->NdisPacket); return STATUS_INSUFFICIENT_RESOURCES; } @@ -87,21 +70,15 @@ NTSTATUS BuildRawIPPacket( if (NdisStatus != NDIS_STATUS_SUCCESS) { TI_DbgPrint(MIN_TRACE, ("Cannot allocate NDIS buffer for packet headers. NdisStatus = (0x%X)\n", NdisStatus)); ExFreePool(Header); - NdisFreePacket(Packet->NdisPacket); - (*Packet->Free)(Packet); + FreeNdisPacket(Packet->NdisPacket); return STATUS_INSUFFICIENT_RESOURCES; } /* Chain header at front of packet */ NdisChainBufferAtFront(Packet->NdisPacket, HeaderBuffer); } - /* Chain data after link level header if it exists */ - NdisChainBufferAtBack(Packet->NdisPacket, SendRequest->Buffer); - DISPLAY_IP_PACKET(Packet); - *IPPacket = Packet; - return STATUS_SUCCESS; } @@ -122,8 +99,30 @@ NTSTATUS RawIPSendDatagram( * Status of operation */ { - return DGSendDatagram(Request, ConnInfo, - Buffer, DataSize, BuildRawIPPacket); + NDIS_STATUS Status; + PCHAR BufferData; + UINT BufferLen; + PADDRESS_FILE AddrFile = + (PADDRESS_FILE)Request->Handle.AddressHandle; + PDATAGRAM_SEND_REQUEST SendRequest; + + SendRequest = ExAllocatePool( NonPagedPool, sizeof(*SendRequest) ); + + NdisQueryBuffer( Buffer, &BufferData, &BufferLen ); + Status = AllocatePacketWithBuffer( &SendRequest->Packet.NdisPacket, + BufferData, + BufferLen ); + + if( Status != NDIS_STATUS_SUCCESS ) { + BuildRawIPPacket( SendRequest, + (PIP_ADDRESS)&AddrFile->ADE->Address->Address. + IPv4Address, + AddrFile->Port ); + Status = DGSendDatagram(Request, ConnInfo, &SendRequest->Packet); + NdisFreeBuffer( Buffer ); + } + + return Status; } diff --git a/reactos/drivers/net/tcpip/transport/tcp/event.c b/reactos/drivers/net/tcpip/transport/tcp/event.c new file mode 100644 index 00000000000..865f01589b4 --- /dev/null +++ b/reactos/drivers/net/tcpip/transport/tcp/event.c @@ -0,0 +1,323 @@ +/* + * COPYRIGHT: See COPYING in the top level directory + * PROJECT: ReactOS TCP/IP protocol driver + * FILE: transport/tcp/event.c + * PURPOSE: Transmission Control Protocol -- Events from oskittcp + * PROGRAMMERS: Art Yerkes + * REVISIONS: + * CSH 01/08-2000 Created + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern ULONG TCP_IPIdentification; + +void TCPRecvNotify( PCONNECTION_ENDPOINT Connection, UINT Flags ) { + int error = 0; + NTSTATUS Status = 0; + CHAR DataBuffer[1024]; + UINT BytesRead = 0, BytesTaken = 0; + PTDI_IND_RECEIVE ReceiveHandler; + PTDI_IND_DISCONNECT DisconnectHandler; + PVOID HandlerContext; + SOCKADDR Addr; + + TI_DbgPrint(MID_TRACE,("XX> Called\n")); + + do { + error = OskitTCPRecv( Connection->SocketContext, + &Addr, + DataBuffer, + 1024, + &BytesRead, + Flags | OSK_MSG_DONTWAIT | OSK_MSG_PEEK ); + + switch( error ) { + case 0: + ReceiveHandler = Connection->AddressFile->ReceiveHandler; + HandlerContext = Connection->AddressFile->ReceiveHandlerContext; + + TI_DbgPrint(MID_TRACE,("Received %d bytes\n", BytesRead)); + + if( Connection->AddressFile->RegisteredReceiveHandler ) + Status = ReceiveHandler( HandlerContext, + NULL, + TDI_RECEIVE_NORMAL, + BytesRead, + BytesRead, + &BytesTaken, + DataBuffer, + NULL ); + else + Status = STATUS_UNSUCCESSFUL; + + if( Status == STATUS_SUCCESS ) { + OskitTCPRecv( Connection->SocketContext, + &Addr, + DataBuffer, + BytesTaken, + &BytesRead, + Flags | OSK_MSG_DONTWAIT ); + } + break; + + case OSK_ESHUTDOWN: + case OSK_ECONNRESET: + DisconnectHandler = Connection->AddressFile->DisconnectHandler; + HandlerContext = Connection->AddressFile->DisconnectHandlerContext; + + if( Connection->AddressFile->RegisteredDisconnectHandler ) + Status = DisconnectHandler( HandlerContext, + NULL, + 0, + NULL, + 0, + NULL, + (error == OSK_ESHUTDOWN) ? + TDI_DISCONNECT_RELEASE : + TDI_DISCONNECT_ABORT ); + else + Status = STATUS_UNSUCCESSFUL; + break; + + default: + assert( 0 ); + break; + } + } while( error == 0 && BytesRead > 0 && BytesTaken > 0 ); + + TI_DbgPrint(MID_TRACE,("XX> Leaving\n")); +} + +void TCPCloseNotify( PCONNECTION_ENDPOINT Connection ) { + TCPRecvNotify( Connection, 0 ); +} + +char *FlagNames[] = { "SEL_CONNECT", + "SEL_FIN", + "SEL_ACCEPT", + "SEL_OOB", + "SEL_READ", + "SEL_WRITE", + 0 }; +int FlagValues[] = { SEL_CONNECT, + SEL_FIN, + SEL_ACCEPT, + SEL_OOB, + SEL_READ, + SEL_WRITE, + 0 }; + +void TCPSocketState( void *ClientData, + void *WhichSocket, + void *WhichConnection, + OSK_UINT Flags, + OSK_UINT SocketState ) { + int i; + PCONNECTION_ENDPOINT Connection = + (PCONNECTION_ENDPOINT)WhichConnection; + + TI_DbgPrint(MID_TRACE,("TCPSocketState: (socket %x) %x %x\n", + WhichSocket, Flags, SocketState)); + + for( i = 0; FlagValues[i]; i++ ) { + if( Flags & FlagValues[i] ) + TI_DbgPrint(MID_TRACE,("Flag %s\n", FlagNames[i])); + } + + if( Flags & SEL_CONNECT ) + /* TCPConnectNotify( Connection ); */ ; + if( Flags & SEL_FIN ) + TCPCloseNotify( Connection ); + if( Flags & SEL_ACCEPT ) + /* TCPAcceptNotify( Connection ); */ ; + if( Flags & SEL_OOB ) + TCPRecvNotify( Connection, MSG_OOB ); + if( Flags & SEL_WRITE ) + /* TCPSendNotify( Connection ); */ ; + if( Flags & SEL_READ ) + TCPRecvNotify( Connection, 0 ); +} + +void TCPPacketSendComplete( PVOID Context, + NDIS_STATUS NdisStatus, + DWORD BytesSent ) { + TI_DbgPrint(MID_TRACE,("called\n")); + PDATAGRAM_SEND_REQUEST Send = (PDATAGRAM_SEND_REQUEST)Context; + if( Send->Packet.NdisPacket ) + FreeNdisPacket( Send->Packet.NdisPacket ); + exFreePool( Send ); +} + +NTSTATUS AddHeaderIPv4( + PDATAGRAM_SEND_REQUEST SendRequest, + PIP_ADDRESS LocalAddress, + USHORT LocalPort, + PIP_ADDRESS RemoteAddress, + USHORT RemotePort) { +/* + * FUNCTION: Adds an IPv4 and TCP header to an IP packet + * ARGUMENTS: + * SendRequest = Pointer to send request + * Connection = Pointer to connection endpoint + * LocalAddress = Pointer to our local address + * LocalPort = The port we send this segment from + * IPPacket = Pointer to IP packet + * RETURNS: + * Status of operation + */ + PIPv4_HEADER IPHeader; + PIP_PACKET IPPacket; + PVOID Header; + NDIS_STATUS NdisStatus; + PNDIS_BUFFER HeaderBuffer; + PCHAR BufferContent; + ULONG BufferSize; + ULONG PayloadBufferSize; + + IPPacket = &SendRequest->Packet; + + BufferSize = MaxLLHeaderSize + sizeof(IPv4_HEADER); + Header = exAllocatePool(NonPagedPool, BufferSize); + if (!Header) + return STATUS_INSUFFICIENT_RESOURCES; + + TI_DbgPrint(MAX_TRACE, ("Allocated %d bytes for headers at 0x%X.\n", BufferSize, Header)); + + NdisQueryPacketLength( IPPacket->NdisPacket, &PayloadBufferSize ); + + /* Allocate NDIS buffer for maximum Link level, IP and TCP header */ + NdisAllocateBuffer(&NdisStatus, + &HeaderBuffer, + GlobalBufferPool, + Header, + BufferSize); + if (NdisStatus != NDIS_STATUS_SUCCESS) { + exFreePool(Header); + TI_DbgPrint(MAX_TRACE, ("Error from NDIS: %08x\n", NdisStatus)); + return STATUS_INSUFFICIENT_RESOURCES; + } + + /* Chain header at front of NDIS packet */ + NdisChainBufferAtFront(IPPacket->NdisPacket, HeaderBuffer); + IPPacket->HeaderSize = 20; + IPPacket->ContigSize = BufferSize; + IPPacket->TotalSize = IPPacket->HeaderSize + PayloadBufferSize; + IPPacket->Header = (PVOID)((ULONG_PTR)Header + MaxLLHeaderSize); + IPPacket->Flags = 0; + + /* Build IPv4 header */ + IPHeader = (PIPv4_HEADER)IPPacket->Header; + /* Version = 4, Length = 5 DWORDs */ + IPHeader->VerIHL = 0x45; + /* Normal Type-of-Service */ + IPHeader->Tos = 0; + /* Length of header and data */ + IPHeader->TotalLength = WH2N((USHORT)IPPacket->TotalSize); + /* Identification */ + IPHeader->Id = WH2N((USHORT)InterlockedIncrement(&TCP_IPIdentification)); + /* One fragment at offset 0 */ + IPHeader->FlagsFragOfs = WH2N((USHORT)IPv4_DF_MASK); + /* Time-to-Live is 128 */ + IPHeader->Ttl = 128; + /* Transmission Control Protocol */ + IPHeader->Protocol = IPPROTO_TCP; + /* Checksum is 0 (for later calculation of this) */ + IPHeader->Checksum = 0; + /* Source address */ + IPHeader->SrcAddr = LocalAddress->Address.IPv4Address; + /* Destination address. FIXME: IPv4 only */ + IPHeader->DstAddr = RemoteAddress->Address.IPv4Address; + + return STATUS_SUCCESS; +} + +int TCPPacketSend(void *ClientData, + void *WhichSocket, + void *WhichConnection, + OSK_PCHAR data, + OSK_UINT len ) { + PADDRESS_FILE AddrFile; + PNDIS_BUFFER NdisPacket; + NDIS_STATUS NdisStatus; + KIRQL OldIrql; + PDATAGRAM_SEND_REQUEST SendRequest; + PNEIGHBOR_CACHE_ENTRY NCE = 0; + PCONNECTION_ENDPOINT Connection = (PCONNECTION_ENDPOINT)WhichConnection; + IP_ADDRESS RemoteAddress, LocalAddress; + USHORT RemotePort, LocalPort; + PULONG AckNumber = (PULONG)data; + + TI_DbgPrint(MID_TRACE,("TCP OUTPUT:\n")); + OskitDumpBuffer( data, len ); + + SendRequest = + (PDATAGRAM_SEND_REQUEST) + exAllocatePool( NonPagedPool, sizeof( DATAGRAM_SEND_REQUEST ) ); + /* if( !SendRequest || !Connection ) return OSK_EINVAL; */ + + RemoteAddress.Type = LocalAddress.Type = IP_ADDRESS_V4; + + OskitTCPGetAddress( WhichSocket, + &LocalAddress.Address.IPv4Address, + &LocalPort, + &RemoteAddress.Address.IPv4Address, + &RemotePort ); + + NCE = RouterGetRoute( &RemoteAddress, NULL ); + + if( !NCE ) return OSK_EADDRNOTAVAIL; + + GetInterfaceIPv4Address(NCE->Interface, + ADE_UNICAST, + &LocalAddress.Address.IPv4Address ); + + if( Connection ) + KeAcquireSpinLock( &Connection->Lock, &OldIrql ); + + NdisStatus = + AllocatePacketWithBuffer( &SendRequest->PacketToSend, data, len ); + if (NdisStatus != NDIS_STATUS_SUCCESS) { + TI_DbgPrint(MAX_TRACE, ("Error from NDIS: %08x\n", NdisStatus)); + goto end; + } + + SendRequest->Packet.NdisPacket = SendRequest->PacketToSend; + + SendRequest->Complete = TCPPacketSendComplete; + SendRequest->Context = Connection; + SendRequest->RemoteAddress = RemoteAddress; + SendRequest->RemotePort = RemotePort; + NdisQueryPacketLength( SendRequest->Packet.NdisPacket, + &SendRequest->BufferSize ); + + AddHeaderIPv4( SendRequest, + &LocalAddress, + LocalPort, + &RemoteAddress, + RemotePort ); + + if( Connection ) + DGTransmit( Connection->AddressFile, SendRequest ); + else + DbgPrint("Transmit called without connection.\n"); + +end: + if( Connection ) + KeReleaseSpinLock( &Connection->Lock, OldIrql ); + + if( !NT_SUCCESS(NdisStatus) ) return OSK_EINVAL; + else return 0; +} + diff --git a/reactos/drivers/net/tcpip/transport/tcp/if.c b/reactos/drivers/net/tcpip/transport/tcp/if.c new file mode 100644 index 00000000000..6c344c82308 --- /dev/null +++ b/reactos/drivers/net/tcpip/transport/tcp/if.c @@ -0,0 +1,183 @@ +/* + * Copyright (c) 1997-1998 University of Utah and the Flux Group. + * All rights reserved. + * + * This file is part of the Flux OSKit. The OSKit is free software, also known + * as "open source;" you can redistribute it and/or modify it under the terms + * of the GNU General Public License (GPL), version 2, as published by the Free + * Software Foundation (FSF). To explore alternate licensing terms, contact + * the University of Utah at csl-dist@cs.utah.edu or +1-801-585-3271. + * + * The OSKit is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GPL for more details. You should have + * received a copy of the GPL along with the OSKit; see the file COPYING. If + * not, write to the FSF, 59 Temple Place #330, Boston, MA 02111-1307, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if 0 +#include +#include +#include +#include +#endif + +#include + +int if_index = 0; +struct ifaddr **ifnet_addrs; + +int ifqmaxlen = OSK_IFQ_MAXLEN; +struct ifnet *ifnet; + +/* + * Network interface utility routines. + * + * Routines with ifa_ifwith* names take sockaddr *'s as + * parameters. + */ +void +ifinit() +{ +} + +void +if_attach(ifp) + struct ifnet *ifp; +{ + KeBugCheck( 0xface ); +} + +struct ifnet * +ifunit(char *name) +{ + return 0; +} + +int ifa_iffind(addr, ifaddr, type) + struct sockaddr *addr; + struct ifaddr *ifaddr; + int type; +{ + PNEIGHBOR_CACHE_ENTRY NCE; + IP_ADDRESS Destination; + NTSTATUS Status; + struct sockaddr_in *addr_in = (struct sockaddr_in *)addr; + + TI_DbgPrint(MID_TRACE,("called for type %d\n", type)); + + if( !addr || !ifaddr ) { + TI_DbgPrint(MID_TRACE,("no addr or no ifaddr (%x %x)\n", + addr, ifaddr)); + return OSK_EINVAL; + } + + Destination.Type = IP_ADDRESS_V4; + Destination.Address.IPv4Address = addr_in->sin_addr.s_addr; + + NCE = RouterGetRoute(&Destination, NULL); + + if( !NCE || !NCE->Interface ) { + TI_DbgPrint(MID_TRACE,("no neighbor cache or no interface (%x %x)\n", + NCE, NCE->Interface)); + return OSK_EADDRNOTAVAIL; + } + + /* XXX - Point-to-point interfaces not supported yet */ + memset(&ifaddr->ifa_dstaddr, 0, sizeof( struct sockaddr ) ); + + addr_in->sin_family = PF_INET; + addr_in = (struct sockaddr_in *)&ifaddr->ifa_addr; + Status = GetInterfaceIPv4Address( NCE->Interface, + type, + &addr_in->sin_addr.s_addr ); + + if( !NT_SUCCESS(Status) ) + addr_in->sin_addr.s_addr = 0; + + ifaddr->ifa_flags = 0; /* XXX what goes here? */ + ifaddr->ifa_refcnt = 0; /* Anachronistic */ + ifaddr->ifa_metric = 1; /* We can get it like in ninfo.c, if we want */ + ifaddr->ifa_mtu = NCE->Interface->MTU; + + TI_DbgPrint(MID_TRACE,("status in iffind: %x\n", Status)); + + return NT_SUCCESS(Status) ? 0 : OSK_EADDRNOTAVAIL; +} + +/* + * Find an interface on a specific network. If many, choice + * is most specific found. + */ +int ifa_ifwithnet(addr, ifaddr) + struct sockaddr *addr; + struct ifaddr *ifaddr; +{ + return ifa_iffind(addr, ifaddr, ADE_UNICAST); +} + +/* + * Locate the point to point interface with a given destination address. + */ +/*ARGSUSED*/ +struct ifaddr * +ifa_ifwithdstaddr(addr, ifaddr) + register struct sockaddr *addr; + register struct ifaddr *ifaddr; +{ + return ifa_iffind(addr, ifaddr, ADE_POINTOPOINT); +} + +/* + * Locate an interface based on a complete address. + */ +/*ARGSUSED*/ +int ifa_ifwithaddr(addr, ifaddr) + struct sockaddr *addr; + struct ifaddr *ifaddr; +{ + int error = ifa_ifwithnet( addr, ifaddr ); + struct sockaddr_in *addr_in = (struct sockaddr_in *)addr; + struct sockaddr_in *faddr_in = (struct sockaddr_in *)ifaddr->ifa_addr; + if( error != 0 ) return error; + else return + (faddr_in->sin_addr.s_addr == addr_in->sin_addr.s_addr) ? + 0 : OSK_EADDRNOTAVAIL; +} + +/* + * Handle interface watchdog timer routines. Called + * from softclock, we decrement timers (if set) and + * call the appropriate interface routine on expiration. + */ +void +if_slowtimo(arg) + void *arg; +{ +#if 0 + register struct ifnet *ifp; + int s = splimp(); + + for (ifp = ifnet; ifp; ifp = ifp->if_next) { + if (ifp->if_timer == 0 || --ifp->if_timer) + continue; + if (ifp->if_watchdog) + (*ifp->if_watchdog)(ifp->if_unit); + } + splx(s); + timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ); +#endif +} diff --git a/reactos/drivers/net/tcpip/transport/tcp/tcp.c b/reactos/drivers/net/tcpip/transport/tcp/tcp.c index 66596c0b6cb..da778a0641a 100644 --- a/reactos/drivers/net/tcpip/transport/tcp/tcp.c +++ b/reactos/drivers/net/tcpip/transport/tcp/tcp.c @@ -7,6 +7,8 @@ * REVISIONS: * CSH 01/08-2000 Created */ +#include +#include #include #include #include @@ -14,1651 +16,13 @@ #include #include #include +#include - +LONG TCP_IPIdentification = 0; static BOOLEAN TCPInitialized = FALSE; -static LONG IPIdentification = 0; static NPAGED_LOOKASIDE_LIST TCPSegmentList; - -PTCP_SEGMENT TCPCreateSegment( - PIP_PACKET IPPacket, - ULONG SequenceNumber, - ULONG SegmentLength) -/* - * FUNCTION: Creates a TCP segment object - * ARGUMENTS: - * IPPacket = Pointer to IP packet containing segment data - * SequenceNumber = Sequence number of first byte in the segment - * SegmentLength = Number of bytes in the segment - * RETURNS: - * Pointer to the created TCP segment. NULL if there was not enough free resources. - */ -{ - PTCP_SEGMENT Segment; - - ASSERT(IPPacket); - - Segment = ExAllocateFromNPagedLookasideList(&TCPSegmentList); - if (Segment == NULL) - return NULL; - - Segment->IPPacket = IPPacket; - Segment->SequenceNumber = SequenceNumber; - Segment->Length = SegmentLength; - - TI_DbgPrint(DEBUG_TCP, ("Created TCP segment (SequenceNumber %d, Length %d)\n", - Segment->SequenceNumber, Segment->Length)); - - return Segment; -} - - -VOID TCPFreeSegment( - PTCP_SEGMENT Segment) -/* - * FUNCTION: Frees a TCP segment object - * ARGUMENTS: - * Segment = Pointer to an TCP segment structure - */ -{ - ASSERT(Segment); - - ExFreeToNPagedLookasideList(&TCPSegmentList, Segment); -} - - -VOID TCPAddSegment( - PCONNECTION_ENDPOINT Connection, - PTCP_SEGMENT Segment) -/* - * FUNCTION: Adds a TCP segment object to the receive queue of a connection - * ARGUMENTS: - * Connection = Pointer to connection endpoint - * Segment = Pointer to TCP segment object - * RETURNS: - * Nothing. - */ -{ - PLIST_ENTRY CurrentEntry; - PLIST_ENTRY NextEntry; - PTCP_SEGMENT Current; - - ASSERT(Connection); - ASSERT(Segment); - - /* FIXME: Handle sequence number wraparound */ - - /* If all segments arrive in-order then all segments will be put last in the - receive queue when they arrive */ - - if (IsListEmpty(&Connection->ReceivedSegments)) - { - /* This is the first segment received since the connection was created - or since all data is delivered to the client */ - InsertTailList(&Connection->ReceivedSegments, &Segment->ListEntry); - return; - } - - CurrentEntry = Connection->ReceivedSegments.Blink; - Current = CONTAINING_RECORD(CurrentEntry, TCP_SEGMENT, ListEntry); - if (Segment->SequenceNumber >= Current->SequenceNumber) - { - /* This segment has the highest sequence number yet received since the - connection was created */ - InsertTailList(&Connection->ReceivedSegments, &Segment->ListEntry); - return; - } - - - TI_DbgPrint(MIN_TRACE, ("FIXME: Cannot handle segments that arrive out-of-order.\n")); - -#if 0 - do - { - if (SequenceNumber + Length...) - { - } - - CurrentEntry = CurrentEntry->Blink; - Current = CONTAINING_RECORD(CurrentEntry, TCP_SEGMENT, ListEntry); - } - while (CurrentEntry != &Connection->ReceivedSegments); -#endif -} - - -NTSTATUS TCPiAddHeaderIPv4( - PDATAGRAM_SEND_REQUEST SendRequest, - PCONNECTION_ENDPOINT Connection, - PIP_ADDRESS LocalAddress, - USHORT LocalPort, - PIP_PACKET IPPacket, - PTCPv4_HEADER *pTcpHeader, - PULONG pTcpHeaderLength) -/* - * FUNCTION: Adds an IPv4 and TCP header to an IP packet - * ARGUMENTS: - * SendRequest = Pointer to send request - * Connection = Pointer to connection endpoint - * LocalAddress = Pointer to our local address - * LocalPort = The port we send this segment from - * IPPacket = Pointer to IP packet - * TcpHeader = Address of pointer to TCPv4 header (out) - * pTcpHeaderLength = Address of buffer for length of TCP header - * RETURNS: - * Status of operation - */ -{ - PIPv4_HEADER IPHeader; - PTCPv4_HEADER TCPHeader; - PVOID Header; - ULONG BufferSize; - NDIS_STATUS NdisStatus; - PNDIS_BUFFER HeaderBuffer; - PTCP_SEND_REQUEST TcpSendRequest = (PTCP_SEND_REQUEST)SendRequest->Context; - - ASSERT(SendRequest); - ASSERT(Connection); - ASSERT(LocalAddress); - ASSERT(IPPacket); - ASSERT(pTcpHeader); - - BufferSize = MaxLLHeaderSize + sizeof(IPv4_HEADER) + sizeof(TCPv4_HEADER); - Header = ExAllocatePool(NonPagedPool, BufferSize); - if (!Header) - return STATUS_INSUFFICIENT_RESOURCES; - - TI_DbgPrint(MAX_TRACE, ("Allocated %d bytes for headers at 0x%X.\n", BufferSize, Header)); - - /* Allocate NDIS buffer for maximum Link level, IP and TCP header */ - NdisAllocateBuffer(&NdisStatus, - &HeaderBuffer, - GlobalBufferPool, - Header, - BufferSize); - if (NdisStatus != NDIS_STATUS_SUCCESS) { - ExFreePool(Header); - return STATUS_INSUFFICIENT_RESOURCES; - } - - /* Chain header at front of NDIS packet */ - NdisChainBufferAtFront(IPPacket->NdisPacket, HeaderBuffer); - - IPPacket->ContigSize = BufferSize; - IPPacket->Header = (PVOID)((ULONG_PTR)Header + MaxLLHeaderSize); - IPPacket->HeaderSize = 20; - - /* Build IPv4 header */ - IPHeader = (PIPv4_HEADER)IPPacket->Header; - /* Version = 4, Length = 5 DWORDs */ - IPHeader->VerIHL = 0x45; - /* Normal Type-of-Service */ - IPHeader->Tos = 0; - /* Length of header and data */ - IPHeader->TotalLength = WH2N((USHORT)IPPacket->TotalSize); - /* Identification */ - IPHeader->Id = WH2N((USHORT)InterlockedIncrement(&IPIdentification)); - /* One fragment at offset 0 */ - IPHeader->FlagsFragOfs = 0; - /* Time-to-Live is 128 */ - IPHeader->Ttl = 128; - /* Transmission Control Protocol */ - IPHeader->Protocol = IPPROTO_TCP; - /* Checksum is 0 (for later calculation of this) */ - IPHeader->Checksum = 0; - /* Source address */ - IPHeader->SrcAddr = LocalAddress->Address.IPv4Address; - /* Destination address. FIXME: IPv4 only */ - IPHeader->DstAddr = SendRequest->RemoteAddress->Address.IPv4Address; - - /* Build TCP header */ - TCPHeader = (PTCPv4_HEADER)((ULONG_PTR)IPHeader + sizeof(IPv4_HEADER)); - /* Port values are already big-endian values */ - TCPHeader->SourcePort = LocalPort; - TCPHeader->DestinationPort = SendRequest->RemotePort; - /* TcpSendRequest->SequenceNumber is already a big-endian value */ - TCPHeader->SequenceNumber = TcpSendRequest->SequenceNumber; - /* TcpSendRequest->AckNumber is already a big-endian value */ - TCPHeader->AckNumber = ((SendRequest->Flags & SRF_ACK) > 0) ? TcpSendRequest->AckNumber : 0; - TCPHeader->DataOffset = (sizeof(TCPv4_HEADER) / 4) << 4; - TCPHeader->Flags = SendRequest->Flags; - TCPHeader->Window = 0; - /* FIXME: Calculate TCP checksum and put it in TCP header */ - TCPHeader->Checksum = 0; - TCPHeader->Urgent = 0; - - *pTcpHeader = TCPHeader; - *pTcpHeaderLength = sizeof(TCPv4_HEADER); - - return STATUS_SUCCESS; -} - - -NTSTATUS TCPiBuildPacket( - PVOID Context, - PIP_ADDRESS LocalAddress, - USHORT LocalPort, - PIP_PACKET *IPPacket) -/* - * FUNCTION: Builds a TCP packet - * ARGUMENTS: - * Context = Pointer to context information (DATAGRAM_SEND_REQUEST) - * LocalAddress = Pointer to our local address - * LocalPort = The port we send this segment from - * IPPacket = Address of pointer to IP packet - * RETURNS: - * Status of operation - * NOTES: - * The Context field in the send request structure (pointed to - * by the Context field) contains a pointer to the CONNECTION_ENDPOINT - * structure for the connection - */ -{ - NTSTATUS Status; - PIP_PACKET Packet; - NDIS_STATUS NdisStatus; - PDATAGRAM_SEND_REQUEST SendRequest; - PCONNECTION_ENDPOINT Connection; - ULONG Checksum; - TCPv4_PSEUDO_HEADER TcpPseudoHeader; - PTCPv4_HEADER TcpHeader; - ULONG TcpHeaderLength; - - ASSERT(LocalAddress); - ASSERT(IPPacket); - - TI_DbgPrint(MAX_TRACE, ("Called.\n")); - - SendRequest = (PDATAGRAM_SEND_REQUEST)Context; - ASSERT(SendRequest); - Connection = (PCONNECTION_ENDPOINT)SendRequest->Context; - ASSERT(Connection); - - /* Prepare packet */ - - /* FIXME: Assumes IPv4 */ - Packet = IPCreatePacket(IP_ADDRESS_V4); - if (Packet == NULL) - return STATUS_INSUFFICIENT_RESOURCES; - - Packet->TotalSize = sizeof(IPv4_HEADER) + - sizeof(TCPv4_HEADER) + - SendRequest->BufferSize; - - /* Allocate NDIS packet */ - NdisAllocatePacket(&NdisStatus, &Packet->NdisPacket, GlobalPacketPool); - if (NdisStatus != NDIS_STATUS_SUCCESS) { - (*Packet->Free)(Packet); - return STATUS_INSUFFICIENT_RESOURCES; - } - - switch (SendRequest->RemoteAddress->Type) { - case IP_ADDRESS_V4: - Status = TCPiAddHeaderIPv4(SendRequest, Connection, LocalAddress, - LocalPort, Packet, &TcpHeader, &TcpHeaderLength); - break; - case IP_ADDRESS_V6: - /* FIXME: Support IPv6 */ - TI_DbgPrint(MIN_TRACE, ("IPv6 TCP segments are not supported.\n")); - default: - Status = STATUS_UNSUCCESSFUL; - break; - } - if (!NT_SUCCESS(Status)) { - TI_DbgPrint(MIN_TRACE, ("Cannot add TCP header. Status (0x%X)\n", Status)); - NdisFreePacket(Packet->NdisPacket); - (*Packet->Free)(Packet); - return Status; - } - - /* Build pseudo TCP header which is used to prevent misrouted segments */ - TcpPseudoHeader.SourceAddress = LocalAddress->Address.IPv4Address; - TcpPseudoHeader.DestinationAddress = SendRequest->RemoteAddress->Address.IPv4Address; - TcpPseudoHeader.Zero = 0; - TcpPseudoHeader.Protocol = IPPROTO_TCP; - /* Length of TCP header and segment data */ - TcpPseudoHeader.TCPLength = WH2N(TcpHeaderLength + SendRequest->BufferSize); - Checksum = TCPv4Checksum((PUCHAR)&TcpPseudoHeader, sizeof(TCPv4_PSEUDO_HEADER), 0); - - /* Add checksum for TCP header */ - Checksum = TCPv4Checksum((PUCHAR)TcpHeader, TcpHeaderLength, ~Checksum); - - /* Chain data after header if it exists */ - if (SendRequest->Buffer != NULL) - { - PVOID Data; - UINT Size; - /* - * NOTE: - * Don't name the variable NdisBuffer, because that's internal name - * used by the NdisChainBufferAtBack macro and it's the easiest way - * how to achieve an elegant crash. - */ - PNDIS_BUFFER _NdisBuffer = SendRequest->Buffer; - NdisChainBufferAtBack(Packet->NdisPacket, _NdisBuffer); - /* Add checksum for segment data */ - /* FIXME: Verify that there is no problem for chained buffers with an odd length */ - while (_NdisBuffer != NULL) - { - NdisQueryBuffer(_NdisBuffer, &Data, &Size); - DbgPrint("Checksum7:(%d bytes)\n", Size); - Checksum = TCPv4Checksum(Data, Size, ~Checksum); - NdisGetNextBuffer(_NdisBuffer, &_NdisBuffer); - } - } - - TcpHeader->Checksum = Checksum; - DbgPrint("Checksum8: (0x%x)\n", Checksum); - - DISPLAY_TCP_PACKET(Packet); - - *IPPacket = Packet; - - return STATUS_SUCCESS; -} - - -VOID TCPiSendRequestComplete( - PVOID Context, - NDIS_STATUS Status, - ULONG Count) -/* - * FUNCTION: Completion routine for datagram send requests - * ARGUMENTS: - * Context = Pointer to context information (TCP_SEND_REQUEST) - * Status = Status of the request - * Count = Number of bytes sent or received - */ -{ - DATAGRAM_COMPLETION_ROUTINE Complete; - PVOID CompleteContext; - PTCP_SEND_REQUEST SendRequest = (PTCP_SEND_REQUEST)Context; - - TI_DbgPrint(MAX_TRACE, ("Called.\n")); - - Complete = SendRequest->Complete; - CompleteContext = SendRequest->Context; - ExFreePool(SendRequest); - - if (Complete != NULL) - { - TI_DbgPrint(MAX_TRACE, ("Calling completion routine.\n")); - - /* Call upper level completion routine */ - (*Complete)(CompleteContext, Status, Count); - } - - TI_DbgPrint(MAX_TRACE, ("Leaving.\n")); -} - - -VOID TCPTimeout(VOID) -/* - * FUNCTION: Transmission Control Protocol timeout handler - * NOTES: - * This routine is called by IPTimeout to perform several - * maintainance tasks - */ -{ -} - - -inline NTSTATUS TCPBuildSendRequest( - PTCP_SEND_REQUEST *SendRequest, - PDATAGRAM_SEND_REQUEST *DGSendRequest, - PCONNECTION_ENDPOINT Connection, - DATAGRAM_COMPLETION_ROUTINE Complete, - PVOID Context, - PNDIS_BUFFER Buffer, - DWORD BufferSize, - ULONG Flags) -/* - * FUNCTION: Allocates and intializes a TCP send request - * ARGUMENTS: - * SendRequest = TCP send request - * DGSendRequest = Datagram send request (optional) - * Connection = Connection endpoint - * Complete = Completion routine - * Context = Pointer to context information - * Buffer = Pointer to NDIS buffer to send (optional) - * BufferSize = Size of Buffer - * Flags = Protocol specific flags - * RETURNS: - * Status of operation - */ -{ - PDATAGRAM_SEND_REQUEST DGSendReq; - NTSTATUS Status; - - ASSERT(SendRequest); - ASSERT(Connection); - - Status = BuildTCPSendRequest( - SendRequest, - Complete, - Context, - NULL); - if (!NT_SUCCESS(Status)) - return Status; - - Status = BuildDatagramSendRequest( - &DGSendReq, /* Datagram send request */ - Connection->RemoteAddress, /* Address of remote peer */ - Connection->RemotePort, /* Port of remote peer */ - Buffer, /* Buffer */ - BufferSize, /* Size of buffer */ - (DATAGRAM_COMPLETION_ROUTINE) - TCPiSendRequestComplete, /* Completion function */ - *SendRequest, /* Context for completion function */ - TCPiBuildPacket, /* Packet build function */ - Flags); /* Protocol specific flags */ - if (!NT_SUCCESS(Status)) { - ExFreePool(*SendRequest); - return Status; - } - - if (DGSendRequest) - *DGSendRequest = DGSendReq; - - return STATUS_SUCCESS; -} - - -inline NTSTATUS TCPBuildAndTransmitSendRequest( - PCONNECTION_ENDPOINT Connection, - DATAGRAM_COMPLETION_ROUTINE Complete, - PVOID Context, - PNDIS_BUFFER Buffer, - DWORD BufferSize, - ULONG Flags) -/* - * FUNCTION: Allocates and intializes a TCP send request - * ARGUMENTS: - * Connection = Connection endpoint - * Complete = Completion routine (optional) - * Context = Pointer to context information - * Buffer = Pointer to NDIS buffer to send (optional) - * BufferSize = Size of Buffer - * Flags = Protocol specific flags - * RETURNS: - * Status of operation - */ -{ - PDATAGRAM_SEND_REQUEST DGSendRequest; - PTCP_SEND_REQUEST TCPSendRequest; - NTSTATUS Status; - - ASSERT(Connection); - - TI_DbgPrint(MAX_TRACE, ("Called.\n")); - - Status = TCPBuildSendRequest( - &TCPSendRequest, - &DGSendRequest, - Connection, /* Connection endpoint */ - Complete, /* Completion routine */ - Context, /* Completion routine context */ - Buffer, /* Buffer */ - BufferSize, /* Size of buffer */ - Flags); /* Protocol specific flags */ - if (!NT_SUCCESS(Status)) - return Status; - - Status = DGTransmit( - Connection->AddressFile, - DGSendRequest); - if (!NT_SUCCESS(Status)) { - ExFreePool(DGSendRequest); - ExFreePool(TCPSendRequest); - return Status; - } - - return STATUS_SUCCESS; -} - - -inline NTSTATUS TCPBuildAndTransmitSendRequest2( - PCONNECTION_ENDPOINT Connection, - DATAGRAM_COMPLETION_ROUTINE Complete, - PVOID Context, - PNDIS_BUFFER Buffer, - DWORD BufferSize, - ULONG Flags, - ULONG SequenceNumber, - ULONG AckNumber) -/* - * FUNCTION: Allocates and intializes a TCP send request - * ARGUMENTS: - * Connection = Connection endpoint - * Complete = Completion routine (optional) - * Context = Pointer to context information - * Buffer = Pointer to NDIS buffer to send - * BufferSize = Size of Buffer - * Flags = Protocol specific flags - * SequenceNumber = Sequence number (host byte order) - * AckNumber = Acknowledgement number (host byte order) - * RETURNS: - * Status of operation - */ -{ - PDATAGRAM_SEND_REQUEST DGSendRequest; - PTCP_SEND_REQUEST TCPSendRequest; - NTSTATUS Status; - - ASSERT(Connection); - - TI_DbgPrint(MAX_TRACE, ("Called.\n")); - - Status = TCPBuildSendRequest( - &TCPSendRequest, - &DGSendRequest, - Connection, /* Connection endpoint */ - Complete, /* Completion routine */ - Context, /* Completion routine context */ - Buffer, /* Buffer */ - BufferSize, /* Size of buffer */ - Flags); /* Protocol specific flags */ - if (!NT_SUCCESS(Status)) - return Status; - - /* Set extra information and convert to network byte order if necessary */ - TCPSendRequest->SequenceNumber = DH2N(SequenceNumber); - TCPSendRequest->AckNumber = DH2N(AckNumber); - - Status = DGTransmit( - Connection->AddressFile, - DGSendRequest); - if (!NT_SUCCESS(Status)) { - ExFreePool(DGSendRequest); - ExFreePool(TCPSendRequest); - return Status; - } - - return STATUS_SUCCESS; -} - - -NTSTATUS TCPConnect( - PTDI_REQUEST Request, - PTDI_CONNECTION_INFORMATION ConnInfo, - PTDI_CONNECTION_INFORMATION ReturnInfo) -/* - * FUNCTION: Attempts to connect to a remote peer - * ARGUMENTS: - * Request = Pointer to TDI request - * ConnInfo = Pointer to connection information - * ReturnInfo = Pointer to structure for return information (optional) - * RETURNS: - * Status of operation - * NOTES: - * This is the high level interface for connecting to remote peers - */ -{ - PDATAGRAM_SEND_REQUEST DGSendRequest; - PTCP_SEND_REQUEST TCPSendRequest; - PCONNECTION_ENDPOINT Connection; - LARGE_INTEGER DueTime; - NTSTATUS Status; - KIRQL OldIrql; - PNDIS_BUFFER NdisBuffer; - NDIS_STATUS NdisStatus; - PVOID DataBuffer; - ULONG Size; - - ASSERT(Request); - ASSERT(ConnInfo); - - TI_DbgPrint(MID_TRACE, ("Called.\n")); - - Connection = Request->Handle.ConnectionContext; - - KeAcquireSpinLock(&Connection->Lock, &OldIrql); - - if (Connection->State != ctClosed) { - /* The connection has already been opened so return success */ - KeReleaseSpinLock(&Connection->Lock, OldIrql); - return STATUS_SUCCESS; - } - - Connection->LocalAddress = Connection->AddressFile->ADE->Address; - Connection->LocalPort = Connection->AddressFile->Port; - - Status = AddrBuildAddress( - (PTA_ADDRESS)(&((PTRANSPORT_ADDRESS)ConnInfo->RemoteAddress)->Address[0]), - &Connection->RemoteAddress, - &Connection->RemotePort); - if (!NT_SUCCESS(Status)) { - KeReleaseSpinLock(&Connection->Lock, OldIrql); - return Status; - } - - /* Allocate NDIS buffer */ - - Size = sizeof(IPv4_HEADER); - DataBuffer = ExAllocatePool(NonPagedPool, Size); - if (!DataBuffer) { - return STATUS_INSUFFICIENT_RESOURCES; - } - - NdisAllocateBuffer(&NdisStatus, &NdisBuffer, GlobalBufferPool, - DataBuffer, Size); - if (NdisStatus != NDIS_STATUS_SUCCESS) { - KeReleaseSpinLock(&Connection->Lock, OldIrql); - ExFreePool(Connection->RemoteAddress); - return NdisStatus; - } - - /* Issue SYN segment */ - - Status = TCPBuildAndTransmitSendRequest( - Connection, /* Connection endpoint */ - Request->RequestNotifyObject, /* Completion routine */ - Request->RequestContext, /* Completion routine context */ - NdisBuffer, /* Buffer */ - 0, /* Size of buffer */ - SRF_SYN); /* Protocol specific flags */ - if (!NT_SUCCESS(Status)) { - KeReleaseSpinLock(&Connection->Lock, OldIrql); - ExFreePool(Connection->RemoteAddress); - return Status; - } - - KeReleaseSpinLock(&Connection->Lock, OldIrql); - - Status = STATUS_PENDING; - - TI_DbgPrint(MAX_TRACE, ("Leaving. Status (0x%X)\n", Status)); - - return Status; -} - - -NTSTATUS TCPListen( - PTDI_REQUEST Request, - PTDI_CONNECTION_INFORMATION ConnInfo, - PTDI_CONNECTION_INFORMATION ReturnInfo) -/* - * FUNCTION: Start listening for a connection from a remote peer - * ARGUMENTS: - * Request = Pointer to TDI request - * ConnInfo = Pointer to connection information - * ReturnInfo = Pointer to structure for return information (optional) - * RETURNS: - * Status of operation - * NOTES: - * This is the high level interface for listening for connections from remote peers - */ -{ - PDATAGRAM_SEND_REQUEST DGSendRequest; - PTCP_SEND_REQUEST TCPSendRequest; - PCONNECTION_ENDPOINT Connection; - LARGE_INTEGER DueTime; - NTSTATUS Status; - KIRQL OldIrql; - - ASSERT(Request); - ASSERT(ConnInfo); - - TI_DbgPrint(MID_TRACE, ("Called.\n")); - - Connection = Request->Handle.ConnectionContext; - - KeAcquireSpinLock(&Connection->Lock, &OldIrql); - - if (Connection->State != ctClosed) { - /* The connection has already been opened so return unsuccessful */ - KeReleaseSpinLock(&Connection->Lock, OldIrql); - return STATUS_UNSUCCESSFUL; - } - - Connection->LocalAddress = Connection->AddressFile->ADE->Address; - Connection->LocalPort = Connection->AddressFile->Port; - Connection->ListenRequest = Request; - - TI_DbgPrint(MIN_TRACE, ("Connection->LocalAddress (%s).\n", A2S(Connection->LocalAddress))); - TI_DbgPrint(MIN_TRACE, ("Connection->LocalPort (%d).\n", Connection->LocalPort)); - - /* Start listening for connection requests */ - Connection->State = ctListen; - - KeReleaseSpinLock(&Connection->Lock, OldIrql); - - Status = STATUS_PENDING; - - TI_DbgPrint(MAX_TRACE, ("Leaving. Status (0x%X)\n", Status)); - - return Status; -} - - -NTSTATUS TCPSendDatagram( - PTDI_REQUEST Request, - PTDI_CONNECTION_INFORMATION ConnInfo, - PNDIS_BUFFER Buffer, - ULONG DataSize) -/* - * FUNCTION: Sends TCP data to a remote address - * ARGUMENTS: - * Request = Pointer to TDI request - * ConnInfo = Pointer to connection information - * Buffer = Pointer to NDIS buffer with data - * DataSize = Size in bytes of data to be sent - * RETURNS: - * Status of operation - */ -{ - ASSERT(Request); - ASSERT(ConnInfo); - ASSERT(Buffer); - - return STATUS_SUCCESS; -} - - -static inline ULONG TCPiSelectISS( - PCONNECTION_ENDPOINT Connection) -{ - ASSERT(Connection); - - TI_DbgPrint(MIN_TRACE, ("Select ISS.\n")); - return 0x10000; -} - - -static inline VOID TCPiReceiveListen( - PADDRESS_FILE AddrFile, - PIP_PACKET IPPacket, - PTCPv4_HEADER TCPHeader) -{ - ASSERT(AddrFile); - ASSERT(IPPacket); - ASSERT(TCPHeader); - - /* FIXME: Protect AddrFile->Connection */ - - if ((TCPHeader->Flags & TCP_RST) > 0) - { - /* Discard */ - TI_DbgPrint(DEBUG_TCP, ("Discard.\n")); - return; - } - - if ((TCPHeader->Flags & TCP_ACK) > 0) - { - /* FIXME: Send RST (if no RST) - */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Send RST.\n")); - return; - } - - if ((TCPHeader->Flags & TCP_SYN) > 0) - { - PCONNECTION_ENDPOINT Connection = AddrFile->Connection; - register UCHAR DelayedControls; - register NTSTATUS Status; - - /* FIXME: If the SEG.PRC is greater than the TCB.PRC then if allowed by - the user and the system set TCB.PRC<-SEG.PRC, if not allowed - send a reset and return. */ - if (FALSE) - { - /* FIXME: Send RST (if no RST) - * - */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Send RST.\n")); - return; - } - - /* Set RCV.NXT to SEG.SEQ+1, IRS is set to SEG.SEQ and any other - control or text should be queued for processing later. ISS - should be selected and a SYN segment sent of the form: - - - - SND.NXT is set to ISS+1 and SND.UNA to ISS. The connection - state should be changed to SYN-RECEIVED. Note that any other - incoming control or data (combined with SYN) will be processed - in the SYN-RECEIVED state, but processing of SYN and ACK should - not be repeated. If the listen was not fully specified (i.e., - the foreign socket was not fully specified), then the - unspecified fields should be filled in now. - */ - TI_DbgPrint(DEBUG_TCP, ("Go to ctSynReceived connection state.\n")); - - Connection->RemotePort = TCPHeader->SourcePort; - /* FIXME: IPv4 only */ - Connection->RemoteAddress = AddrCloneAddress(&IPPacket->SrcAddr); - if (Connection->RemoteAddress == NULL) - { - /* FIXME: Send RST (if no RST) - * - */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Send RST.\n")); - return; - } - - Connection->ReceiveNext = DN2H(TCPHeader->SequenceNumber) + 1; - Connection->ReceiveIRS = DN2H(TCPHeader->SequenceNumber); - Connection->State = ctSynReceived; - - Connection->SendISS = TCPiSelectISS(Connection); - Connection->SendNext = Connection->SendISS + 1; - Connection->SendUnacknowledged = Connection->SendISS; - - DelayedControls = TCPHeader->Flags & (~(TCP_SYN | TCP_ACK)); - if (DelayedControls > 0) - { - TI_DbgPrint(MIN_TRACE, ("FIXME: Queue controls (0x%x) for later processing.\n", DelayedControls)); - } - - TI_DbgPrint(MIN_TRACE, ("IPPacket->HeaderSize: %d\n", IPPacket->HeaderSize)); - TI_DbgPrint(MIN_TRACE, ("TCPHeader->DataOffset: %d\n", TCPHeader->DataOffset)); - TI_DbgPrint(MIN_TRACE, ("IPPacket->TotalSize: %d\n", IPPacket->TotalSize)); - - if (IPPacket->HeaderSize + (TCPHeader->DataOffset & 0x0F) > IPPacket->TotalSize) - { - TI_DbgPrint(MIN_TRACE, ("FIXME: Queue segment data (%d bytes) for later processing.\n", - (IPPacket->HeaderSize + (TCPHeader->DataOffset & 0x0F)) - IPPacket->TotalSize)); - } - - /* Issue SYN/ACK segment */ - Status = TCPBuildAndTransmitSendRequest2( - Connection, /* Connection endpoint */ - NULL, /* Completion routine */ - NULL, /* Completion routine context */ - NULL, /* Buffer */ - 0, /* Size of buffer */ - SRF_SYN | SRF_ACK, /* Protocol specific flags */ - Connection->SendISS, /* Sequence number */ - Connection->ReceiveNext); /* Acknowledgement number */ - if (!NT_SUCCESS(Status)) - { - /* FIXME: Send RST (if no RST) - * - */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Send RST.\n")); - return; - } - - ASSERT(Connection->ListenRequest != NULL); - - TI_DbgPrint(DEBUG_TCP, ("Completing listen request at %p.\n", Connection->ListenRequest)); - - /* Complete the listen request */ - (*((DATAGRAM_COMPLETION_ROUTINE)Connection->ListenRequest->RequestNotifyObject))( - Connection->ListenRequest->RequestContext, - STATUS_SUCCESS, - 0); - return; - } - - /* Discard the segment as it is invalid */ - TI_DbgPrint(DEBUG_TCP, ("Discard.\n")); -} - - -static inline VOID TCPiReceiveSynSent( - PADDRESS_FILE AddrFile, - PIP_PACKET IPPacket, - PTCPv4_HEADER TCPHeader) -{ - ASSERT(AddrFile); - ASSERT(IPPacket); - ASSERT(TCPHeader); - - /* FIXME: Protect AddrFile->Connection */ - - if ((TCPHeader->Flags & TCP_ACK) > 0) - { - /* FIXME: If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send a reset (unless - the RST bit is set, if so drop the segment and return) - */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Send RST.\n")); - return; - } - - /* FIXME: If SND.UNA =< SEG.ACK =< SND.NXT then the ACK is acceptable. */ - - if ((TCPHeader->Flags & TCP_RST) > 0) - { - if (TRUE /* ACK is acceptable */) - { - AddrFile->Connection->State = ctClosed; - /* FIXME: Signal client */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Signal client.\n")); - } - else - { - /* Discard segment */ - TI_DbgPrint(DEBUG_TCP, ("Discard.\n")); - } - return; - } - - /* FIXME: If the security/compartment in the segment does not exactly - match the security/compartment in the TCB */ - if (FALSE) - { - if ((TCPHeader->Flags & TCP_ACK) > 0) - { - /* FIXME: Send RST (if no RST) - */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Send RST.\n")); - } - else - { - /* FIXME: Send RST (if no RST) - */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Send RST.\n")); - } - return; - } - - if ((TCPHeader->Flags & TCP_ACK) > 0) - { - /* FIXME: If the precedence in the segment does not match the precedence in the TCB */ - if (FALSE) - { - /* FIXME: Send RST (if no RST) - */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Send RST.\n")); - return; - } - else - { - /* FIXME: If the precedence in the segment is higher than the precedence - in the TCB then if allowed by the user and the system raise - the precedence in the TCB to that in the segment, if not - allowed to raise the prec then send a reset. */ - if (FALSE) - { - /* FIXME: Send RST (if no RST) - */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Send RST.\n")); - return; - } - else - { - /* Continue */ - } - } - TI_DbgPrint(MIN_TRACE, ("?.\n")); - return; /* ??? */ - } - - /* The ACK is ok, or there is no ACK, and it the segment did not contain a RST */ - - if ((TCPHeader->Flags & TCP_SYN) > 0) - { - /* FIXME: The security/compartment and precedence are acceptable */ - if (TRUE) - { - PCONNECTION_ENDPOINT Connection = AddrFile->Connection; - register NTSTATUS Status; - - /* FIXME: RCV.NXT is set to SEG.SEQ+1, IRS is set to - SEG.SEQ. SND.UNA should be advanced to equal SEG.ACK (if there - is an ACK), and any segments on the retransmission queue which - are thereby acknowledged should be removed. - - If SND.UNA > ISS (our SYN has been ACKed), change the connection - state to ESTABLISHED, form an ACK segment - - - - Data or controls which were queued for - transmission may be included. If there are other controls or - text in the segment then continue processing at the sixth step - below where the URG bit is checked, otherwise return. - - Otherwise enter SYN-RECEIVED, form a SYN,ACK segment - - - - and send it. If there are other controls or text in the - segment, queue them for processing after the ESTABLISHED state - has been reached, return. */ - - Connection->ReceiveNext = DN2H(TCPHeader->SequenceNumber) + 1; - Connection->ReceiveIRS = DN2H(TCPHeader->SequenceNumber); - - if ((TCPHeader->Flags & TCP_ACK) > 0) - { - /* FIXME: Remove any outstanding segments on the retransmission queue that is acknowledged by this */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Maybe remove outstanding segments on the retransmission queue.\n")); - Connection->SendUnacknowledged = TCPHeader->AckNumber; - } - - if (Connection->SendUnacknowledged > Connection->SendISS) - { - TI_DbgPrint(DEBUG_TCP, ("Go to ctEstablished connection state.\n")); - Connection->State = ctEstablished; - - TI_DbgPrint(MIN_TRACE, ("FIXME: Controls or segment data queued for transmission may be sent.\n")); - - /* Issue ACK segment */ - Status = TCPBuildAndTransmitSendRequest2( - Connection, /* Connection endpoint */ - NULL, /* Completion routine */ - NULL, /* Completion routine context */ - NULL, /* Buffer */ - 0, /* Size of buffer */ - SRF_ACK, /* Protocol specific flags */ - Connection->SendNext, /* Sequence number */ - Connection->ReceiveNext); /* Acknowledgement number */ - if (!NT_SUCCESS(Status)) - { - /* FIXME: Send RST (if no RST) - * - */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Send RST.\n")); - return; - } - } - else - { - TI_DbgPrint(MIN_TRACE, ("Go to ctSynReceived connection state.\n")); - Connection->State = ctSynReceived; - - TI_DbgPrint(MIN_TRACE, ("FIXME: Controls or segment data queued for transmission may be sent.\n")); - - /* Issue SYN/ACK segment */ - Status = TCPBuildAndTransmitSendRequest2( - Connection, /* Connection endpoint */ - NULL, /* Completion routine */ - NULL, /* Completion routine context */ - NULL, /* Buffer */ - 0, /* Size of buffer */ - SRF_SYN | SRF_ACK, /* Protocol specific flags */ - Connection->SendISS, /* Sequence number */ - Connection->ReceiveNext); /* Acknowledgement number */ - if (!NT_SUCCESS(Status)) - { - /* FIXME: Send RST (if no RST) - * - */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Send RST.\n")); - return; - } - } - } - else - { - /* FIXME: What happens here? */ - } - } - - /* FIXME: Send RST (if no RST) - */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Send RST.\n")); -} - -/* - * Returns TRUE if processing should be continued, FALSE if not - */ -static inline BOOLEAN TCPiReceiveSynReceived( - PADDRESS_FILE AddrFile, - PIP_PACKET IPPacket, - PTCPv4_HEADER TCPHeader) -{ - PCONNECTION_ENDPOINT Connection = AddrFile->Connection; - - ASSERT(AddrFile); - ASSERT(IPPacket); - ASSERT(TCPHeader); - - /* FIXME: Protect AddrFile->Connection */ - - /* FIXME: If SND.UNA =< SEG.ACK =< SND.NXT then enter ESTABLISHED state - and continue processing. - - If the segment acknowledgment is not acceptable, form a - reset segment, - - - - and send it. */ - - if ((Connection->SendUnacknowledged <= DN2H(TCPHeader->AckNumber)) - && (DN2H(TCPHeader->AckNumber) <= Connection->SendNext)) - { - Connection->State = ctEstablished; - TI_DbgPrint(DEBUG_TCP, ("Go to ctEstablished connection state.\n")); - return TRUE; - } - - /* FIXME: Send RST (if no RST) - */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Send RST.\n")); - - return FALSE; -} - - -static inline VOID TCPiReceiveData( - PADDRESS_FILE AddrFile, - PIP_PACKET IPPacket, - PTCPv4_HEADER TCPHeader, - ULONG Payload) -{ - PCONNECTION_ENDPOINT Connection = AddrFile->Connection; - PTCP_SEGMENT Segment; - NTSTATUS Status; - ULONG Acknowledged; - - ASSERT(AddrFile); - ASSERT(IPPacket); - ASSERT(TCPHeader); - - TI_DbgPrint(DEBUG_TCP, ("Called (Payload %d bytes).\n", Payload)); - - /* FIXME: Protect AddrFile->Connection */ - - /* FIXME: Once in the ESTABLISHED state, it is possible to deliver segment - text to user RECEIVE buffers. Text from segments can be moved - into buffers until either the buffer is full or the segment is - empty. If the segment empties and carries an PUSH flag, then - the user is informed, when the buffer is returned, that a PUSH - has been received. - - When the TCP takes responsibility for delivering the data to the - user it must also acknowledge the receipt of the data. - - Once the TCP takes responsibility for the data it advances - RCV.NXT over the data accepted, and adjusts RCV.WND as - apporopriate to the current buffer availability. The total of - RCV.NXT and RCV.WND should not be reduced. - - Please note the window management suggestions in section 3.7. - - Send an acknowledgment of the form: - - - - This acknowledgment should be piggybacked on a segment being - transmitted if possible without incurring undue delay. */ - - /* FIXME: If the segment is not expected, drop the segment - - A segment is judged to occupy a portion of valid receive sequence - space if - - RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND - - or - - RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND - */ - - Segment = TCPCreateSegment(IPPacket, - DN2H(TCPHeader->SequenceNumber), - Payload); - if (Segment == NULL) - { - /* FIXME: Send RST (if no RST) - * - */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Send RST.\n")); - return; - } - - TCPAddSegment(Connection, Segment /*, &Acknowledged */); - - //Connection->ReceiveNext += ; - - /* Issue ACK segment */ - Status = TCPBuildAndTransmitSendRequest2( - Connection, /* Connection endpoint */ - NULL, /* Completion routine */ - NULL, /* Completion routine context */ - NULL, /* Buffer */ - 0, /* Size of buffer */ - SRF_ACK, /* Protocol specific flags */ - Connection->SendNext, /* Sequence number */ - Connection->ReceiveNext); /* Acknowledgement number */ - if (!NT_SUCCESS(Status)) - { - /* FIXME: Send RST (if no RST) - * - */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Send RST.\n")); - return; - } - - if ((TCPHeader->Flags & TCP_PSH) > 0) - { - TI_DbgPrint(MIN_TRACE, ("FIXME: Handle PUSH flag.\n")); - } - - TI_DbgPrint(MAX_TRACE, ("\n")); -} - - -static inline VOID TCPiReceive( - PADDRESS_FILE AddrFile, - PIP_PACKET IPPacket, - PTCPv4_HEADER TCPHeader, - ULONG Payload) -{ - register CONNECTION_STATE State; - - ASSERT(AddrFile); - ASSERT(IPPacket); - ASSERT(TCPHeader); - - if (AddrFile->Connection == NULL || AddrFile->Connection->State == ctClosed) - { - if ((TCPHeader->Flags & TCP_RST) == 0) - { - /* FIXME: Send RST (if no RST) - * If the ACK bit is off, sequence number zero is used, - * - * - * - * If the ACK bit is on, - * - * - */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Send RST.\n")); - } - TI_DbgPrint(DEBUG_TCP, ("Discard.\n")); - return; - } - - if (AddrFile->Connection->State == ctListen) - { - TCPiReceiveListen(AddrFile, IPPacket, TCPHeader); - return; - } - - if (AddrFile->Connection->State == ctSynSent) - { - TCPiReceiveSynSent(AddrFile, IPPacket, TCPHeader); - } - - State = AddrFile->Connection->State; - if (State == ctSynReceived - || State == ctEstablished - || State == ctFinWait1 - || State == ctFinWait2 - || State == ctCloseWait - || State == ctClosing - || State == ctLastAck - || State == ctTimeWait) - { - /* Segments are processed in sequence. Initial tests on arrival - are used to discard old duplicates, but further processing is - done in SEG.SEQ order. If a segment's contents straddle the - boundary between old and new, only the new parts should be - processed. - - There are four cases for the acceptability test for an incoming - segment: - - Segment Receive Test - Length Window - ------- ------- ------------------------------------------- - - 0 0 SEG.SEQ = RCV.NXT - - 0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND - - >0 0 not acceptable - - >0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND - or RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND - - If the RCV.WND is zero, no segments will be acceptable, but - special allowance should be made to accept valid ACKs, URGs and - RSTs. - - If an incoming segment is not acceptable, an acknowledgment - should be sent in reply (unless the RST bit is set, if so drop - the segment and return): - - - - After sending the acknowledgment, drop the unacceptable segment - and return. */ - - if ((TCPHeader->Flags & TCP_RST) > 0) - { - if (AddrFile->Connection->State == ctSynReceived) - { - /* FIXME: If this connection was initiated with a passive OPEN (i.e., - came from the LISTEN state), then return this connection to - LISTEN state and return. The user need not be informed. If - this connection was initiated with an active OPEN (i.e., came - from SYN-SENT state) then the connection was refused, signal - the user "connection refused". In either case, all segments - on the retransmission queue should be removed. And in the - active OPEN case, enter the CLOSED state and delete the TCB, - and return. */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Maybe go to ctListen or ctClosed connection state.\n")); - return; - } - - State = AddrFile->Connection->State; - if (State == ctEstablished - || State == ctFinWait1 - || State == ctFinWait2 - || State == ctCloseWait) - { - /* FIXME: any outstanding RECEIVEs and SEND - should receive "reset" responses. All segment queues should be - flushed. Users should also receive an unsolicited general - "connection reset" signal. Enter the CLOSED state, delete the - TCB, and return. */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Go to ctClosed connection state.\n")); - return; - } - - State = AddrFile->Connection->State; - if (State == ctClosing - || State == ctLastAck - || State == ctTimeWait) - { - AddrFile->Connection->State = ctClosed; - TI_DbgPrint(DEBUG_TCP, ("Go to ctClosed connection state.\n")); - return; - } - } - - /* FIXME: check security and precedence */ - - if (AddrFile->Connection->State == ctSynReceived) - { - /* FIXME: If the security/compartment and precedence in the segment do not - exactly match the security/compartment and precedence in the TCB - then send a reset, and return. */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Check security/compartment and precedence.\n")); - } - - if (AddrFile->Connection->State == ctSynReceived) - { - /* FIXME: If the security/compartment and precedence in the segment do not - exactly match the security/compartment and precedence in the TCB - then send a reset, any outstanding RECEIVEs and SEND should - receive "reset" responses. All segment queues should be - flushed. Users should also receive an unsolicited general - "connection reset" signal. Enter the CLOSED state, delete the - TCB, and return. */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Check security/compartment and precedence.\n")); - } - - /* Note the previous check is placed following the sequence check to prevent - a segment from an old connection between these ports with a - different security or precedence from causing an abort of the - current connection. */ - - if ((TCPHeader->Flags & TCP_SYN) > 0) - { - State = AddrFile->Connection->State; - if (State == ctSynReceived - || State == ctEstablished - || State == ctFinWait1 - || State == ctFinWait2 - || State == ctCloseWait - || State == ctClosing - || State == ctLastAck - || State == ctTimeWait) - { - /* FIXME: If the SYN is in the window it is an error, send a reset, any - outstanding RECEIVEs and SEND should receive "reset" responses, - all segment queues should be flushed, the user should also - receive an unsolicited general "connection reset" signal, enter - the CLOSED state, delete the TCB, and return. - - If the SYN is not in the window this step would not be reached - and an ack would have been sent in the first step (sequence - number check). */ - - TI_DbgPrint(MIN_TRACE, ("FIXME: Maybe go to ctClosed connection state.\n")); - return; - } - } - - if ((TCPHeader->Flags & TCP_ACK) == 0) - { - /* Discard the segment */ - TI_DbgPrint(DEBUG_TCP, ("Discard.\n")); - return; - } - - if (AddrFile->Connection->State == ctSynReceived) - { - if (TCPiReceiveSynReceived(AddrFile, IPPacket, TCPHeader) == FALSE) - { - return; - } - } - - State = AddrFile->Connection->State; - if (State == ctEstablished - || State == ctCloseWait) - { - PCONNECTION_ENDPOINT Connection = AddrFile->Connection; - - /* FIXME: If SND.UNA < SEG.ACK =< SND.NXT then, set SND.UNA <- SEG.ACK. - Any segments on the retransmission queue which are thereby - entirely acknowledged are removed. Users should receive - positive acknowledgments for buffers which have been SENT and - fully acknowledged (i.e., SEND buffer should be returned with - "ok" response). If the ACK is a duplicate - (SEG.ACK < SND.UNA), it can be ignored. If the ACK acks - something not yet sent (SEG.ACK > SND.NXT) then send an ACK, - drop the segment, and return. - - If SND.UNA < SEG.ACK =< SND.NXT, the send window should be - updated. If (SND.WL1 < SEG.SEQ or (SND.WL1 = SEG.SEQ and - SND.WL2 =< SEG.ACK)), set SND.WND <- SEG.WND, set - SND.WL1 <- SEG.SEQ, and set SND.WL2 <- SEG.ACK. - - Note that SND.WND is an offset from SND.UNA, that SND.WL1 - records the sequence number of the last segment used to update - SND.WND, and that SND.WL2 records the acknowledgment number of - the last segment used to update SND.WND. The check here - prevents using old segments to update the window. */ - - if ((Connection->SendUnacknowledged <= DN2H(TCPHeader->AckNumber)) - && (DN2H(TCPHeader->AckNumber) <= Connection->SendNext)) - { - TI_DbgPrint(MIN_TRACE, ("FIXME: Maybe remove segments on retransmission queue.\n")); - Connection->SendUnacknowledged = DN2H(TCPHeader->AckNumber); - } - - if ((Connection->SendUnacknowledged < DN2H(TCPHeader->AckNumber)) - && (DN2H(TCPHeader->AckNumber) <= Connection->SendNext)) - { - TI_DbgPrint(MIN_TRACE, ("FIXME: Update send window.\n")); - } - - if ((Connection->SendWL1 < DN2H(TCPHeader->SequenceNumber)) - || (Connection->SendWL1 == (DN2H(TCPHeader->SequenceNumber)) - && (Connection->SendWL2 <= (DN2H(TCPHeader->AckNumber))))) - { - TI_DbgPrint(DEBUG_TCP, ("Updating SendWindow, SendWL1 and SendWL2.\n")); - Connection->SendWindow = DN2H(TCPHeader->Window); - Connection->SendWL1 = DN2H(TCPHeader->SequenceNumber); - Connection->SendWL2 = DN2H(TCPHeader->AckNumber); - } - - /* Process any available data in the segment */ - TCPiReceiveData(AddrFile, IPPacket, TCPHeader, Payload); - - return; - } - - if (AddrFile->Connection->State == ctFinWait1) - { - /* FIXME: In addition to the processing for the ESTABLISHED state, if - our FIN is now acknowledged then enter FIN-WAIT-2 and continue - processing in that state. */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Handle ctFinWait1 connection state.\n")); - return; - } - - if (AddrFile->Connection->State == ctFinWait2) - { - /* FIXME: In addition to the processing for the ESTABLISHED state, if - the retransmission queue is empty, the user's CLOSE can be - acknowledged ("ok") but do not delete the TCB. */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Handle ctFinWait2 connection state.\n")); - return; - } - - if (AddrFile->Connection->State == ctClosing) - { - /* FIXME: In addition to the processing for the ESTABLISHED state, if - the ACK acknowledges our FIN then enter the TIME-WAIT state, - otherwise ignore the segment. */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Handle ctClosing connection state.\n")); - return; - } - - if (AddrFile->Connection->State == ctLastAck) - { - /* FIXME: The only thing that can arrive in this state is an - acknowledgment of our FIN. If our FIN is now acknowledged, - delete the TCB, enter the CLOSED state, and return. */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Handle ctLastAck connection state.\n")); - return; - } - - if (AddrFile->Connection->State == ctTimeWait) - { - /* FIXME: The only thing that can arrive in this state is a - retransmission of the remote FIN. Acknowledge it, and restart - the 2 MSL timeout. */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Handle ctTimeWait connection state.\n")); - return; - } - - if ((TCPHeader->Flags & TCP_URG) > 0) - { - State = AddrFile->Connection->State; - if (State == ctEstablished - || State == ctFinWait1 - || State == ctFinWait2) - { - /* FIXME: If the URG bit is set, RCV.UP <- max(RCV.UP,SEG.UP), and signal - the user that the remote side has urgent data if the urgent - pointer (RCV.UP) is in advance of the data consumed. If the - user has already been signaled (or is still in the "urgent - mode") for this continuous sequence of urgent data, do not - signal the user again. */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Handle URG flag.\n")); - return; - } - - State = AddrFile->Connection->State; - if (State == ctCloseWait - || State == ctClosing - || State == ctLastAck - || State == ctTimeWait) - { - /* This should not occur, since a FIN has been received from the - remote side. Ignore the URG. */ - } - } - - State = AddrFile->Connection->State; - if (State == ctEstablished - || State == ctFinWait1 - || State == ctFinWait2) - { - /* Process any available data in the segment */ - TCPiReceiveData(AddrFile, IPPacket, TCPHeader, Payload); - return; - } - - State = AddrFile->Connection->State; - if (State == ctCloseWait - || State == ctClosing - || State == ctLastAck - || State == ctTimeWait) - { - /* This should not occur, since a FIN has been received from the - remote side. Ignore the segment text. */ - } - - if ((TCPHeader->Flags & TCP_FIN) > 0) - { - /* Do not process the FIN if the state is CLOSED, LISTEN or SYN-SENT - since the SEG.SEQ cannot be validated; drop the segment and - return. */ - State = AddrFile->Connection->State; - if (State == ctClosed - || State == ctListen - || State == ctSynSent) - { - /* Discard segment */ - TI_DbgPrint(DEBUG_TCP, ("Discard.\n")); - return; - } - - /* FIXME: If the FIN bit is set, signal the user "connection closing" and - return any pending RECEIVEs with same message, advance RCV.NXT - over the FIN, and send an acknowledgment for the FIN. Note that - FIN implies PUSH for any segment text not yet delivered to the - user. */ - - TI_DbgPrint(MIN_TRACE, ("FIXME: Handle FIN flag.\n")); - - State = AddrFile->Connection->State; - switch (State) - { - case ctSynReceived: - case ctEstablished: - { - /* FIXME: Enter ctClosed state */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Go to ctClosed connection state.\n")); - break; - } - case ctFinWait1: - { - /* FIXME: If our FIN has been ACKed (perhaps in this segment), then - enter TIME-WAIT, start the time-wait timer, turn off the other - timers; otherwise enter the CLOSING state. */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Maybe go to ctClosing connection state.\n")); - break; - } - case ctFinWait2: - { - /* FIXME: Enter the TIME-WAIT state. Start the time-wait timer, turn - off the other timers. */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Go to ctTimeWait connection state.\n")); - break; - } - case ctCloseWait: - case ctClosing: - case ctLastAck: - { - /* Remain in ctCloseWait, ctClosing or ctLastAck connection state */ - break; - } - case ctTimeWait: - { - /* FIXME: Remain in ctTimeWait connection state. Restart the 2 MSL time-wait - timeout */ - TI_DbgPrint(MIN_TRACE, ("FIXME: Restart the 2 MSL time-wait timeout.\n")); - return; - } - default: - ASSERT(FALSE); - return; - } - } - TI_DbgPrint(MIN_TRACE, ("Return.\n")); - return; - } -} - - -VOID TCPReceive( - PNET_TABLE_ENTRY NTE, - PIP_PACKET IPPacket) +VOID TCPReceive(PNET_TABLE_ENTRY NTE, PIP_PACKET IPPacket) /* * FUNCTION: Receives and queues TCP data * ARGUMENTS: @@ -1668,150 +32,220 @@ VOID TCPReceive( * This is the low level interface for receiving TCP data */ { - AF_SEARCH SearchContext; - PIPv4_HEADER IPv4Header; - PADDRESS_FILE AddrFile; - PTCPv4_HEADER TCPHeader; - PIP_ADDRESS DstAddress; - UINT DataSize, i; - TCPv4_PSEUDO_HEADER TcpPseudoHeader; - KIRQL OldIrql; + PCHAR BufferData = exAllocatePool( NonPagedPool, IPPacket->TotalSize ); - ASSERT(NTE); - ASSERT(IPPacket); + if( BufferData ) { + TI_DbgPrint(MID_TRACE,("Sending packet %d (%d) to oskit\n", + IPPacket->TotalSize, + IPPacket->HeaderSize)); - TI_DbgPrint(MAX_TRACE, ("Called.\n")); + memcpy( BufferData, IPPacket->Header, IPPacket->HeaderSize ); + memcpy( BufferData + IPPacket->HeaderSize, IPPacket->Data, + IPPacket->TotalSize - IPPacket->HeaderSize ); + + OskitTCPReceiveDatagram( BufferData, + IPPacket->TotalSize, + IPPacket->HeaderSize ); - switch (IPPacket->Type) { - /* IPv4 packet */ - case IP_ADDRESS_V4: - IPv4Header = IPPacket->Header; - DstAddress = &IPPacket->DstAddr; - break; - - /* IPv6 packet */ - case IP_ADDRESS_V6: - TI_DbgPrint(MIN_TRACE, ("Discarded IPv6 TCP data (%i bytes).\n", - IPPacket->TotalSize)); - - /* FIXME: IPv6 is not supported */ - return; - - default: - return; - } - - DISPLAY_TCP_PACKET(IPPacket); - - TCPHeader = (PTCPv4_HEADER)IPPacket->Data; - -#if 0 - /* Build pseudo TCP header which is used to prevent misrouted segments */ - TcpPseudoHeader.SourceAddress = IPPacket->SrcAddr.Address.IPv4Address; - TcpPseudoHeader.DestinationAddress = IPPacket->DstAddr.Address.IPv4Address;; - TcpPseudoHeader.Zero = 0; - TcpPseudoHeader.Protocol = 0; - /* Length of TCP header and segment data */ - TcpPseudoHeader.TCPLength = IPv4Header->TotalLength; - - /* Checksum TCP header and segment data */ - if (!TCPCorrectChecksum(&TcpPseudoHeader, IPPacket->Header, IPPacket->TotalSize)) { - TI_DbgPrint(MIN_TRACE, ("Segment received with bad checksum. Checksum field (0x%X)\n", - WN2H(TCPHeader->Checksum))); - /* Discard packet */ - return; - } -#endif - - /* FIXME: Sanity checks */ - - /* Locate the on destination address file object and deliver the - packet if one is found. If no matching address file object can be - found, drop the packet */ - - AddrFile = AddrSearchFirst(DstAddress, - TCPHeader->DestinationPort, - IPPROTO_TCP, - &SearchContext); - if (AddrFile) { - ULONG TotalLength; - ULONG DataOffset; - ULONG Payload; - - TotalLength = WN2H(IPv4Header->TotalLength); - DataOffset = (TCPHeader->DataOffset & 0xF0) >> (4 << 2); /* Left-most 4 bits (in 32-bit words)*/ - Payload = TotalLength - DataOffset; - - TI_DbgPrint(DEBUG_TCP, ("TotalLength %d, DataOffset %d (%d bytes payload)\n", - TotalLength, DataOffset, Payload)); - - /* There can be only one client */ - TI_DbgPrint(MID_TRACE, ("Found address file object for IPv4 TCP datagram to address (0x%X).\n", - DN2H(DstAddress->Address.IPv4Address))); - /* FIXME: Slow but effective synchronization */ - KeAcquireSpinLock(&AddrFile->Connection->Lock, &OldIrql); - TCPiReceive(AddrFile, IPPacket, TCPHeader, Payload); - KeReleaseSpinLock(&AddrFile->Connection->Lock, OldIrql); - } else { - /* There are no open address files that will take this datagram */ - /* FIXME: IPv4 only */ - TI_DbgPrint(MID_TRACE, ("Cannot deliver IPv4 TCP datagram to address (0x%X).\n", - DN2H(DstAddress->Address.IPv4Address))); - - /* FIXME: Send ICMP reply */ - } - - TI_DbgPrint(MAX_TRACE, ("Leaving.\n")); + exFreePool( BufferData ); + } } +/* event.c */ +void TCPSocketState( void *ClientData, + void *WhichSocket, + void *WhichConnection, + OSK_UINT SelFlags, + OSK_UINT SocketState ); -NTSTATUS TCPStartup( - VOID) +int TCPPacketSend( void *ClientData, + void *WhichSocket, + void *WhichConnection, + OSK_PCHAR Data, + OSK_UINT Len ); + +OSKITTCP_EVENT_HANDLERS EventHandlers = { + NULL, /* Client Data */ + TCPSocketState, /* SocketState */ + TCPPacketSend, /* PacketSend */ +}; + +NTSTATUS TCPStartup(VOID) /* * FUNCTION: Initializes the TCP subsystem * RETURNS: * Status of operation */ { - tcp_init(); - - /* Register this protocol with IP layer */ - IPRegisterProtocol(IPPROTO_TCP, TCPReceive); - - ExInitializeNPagedLookasideList( - &TCPSegmentList, /* Lookaside list */ - NULL, /* Allocate routine */ - NULL, /* Free routine */ - 0, /* Flags */ - sizeof(TCP_SEGMENT), /* Size of each entry */ - TAG('T','C','P','S'), /* Tag */ - 0); /* Depth */ - - TCPInitialized = TRUE; - - return STATUS_SUCCESS; + InitOskitTCP(); + RegisterOskitTCPEventHandlers( &EventHandlers ); + + /* Register this protocol with IP layer */ + IPRegisterProtocol(IPPROTO_TCP, TCPReceive); + + ExInitializeNPagedLookasideList( + &TCPSegmentList, /* Lookaside list */ + NULL, /* Allocate routine */ + NULL, /* Free routine */ + 0, /* Flags */ + sizeof(TCP_SEGMENT), /* Size of each entry */ + TAG('T','C','P','S'), /* Tag */ + 0); /* Depth */ + + TCPInitialized = TRUE; + + return STATUS_SUCCESS; } -NTSTATUS TCPShutdown( - VOID) +NTSTATUS TCPShutdown(VOID) /* * FUNCTION: Shuts down the TCP subsystem * RETURNS: * Status of operation */ { - if (!TCPInitialized) + if (!TCPInitialized) + return STATUS_SUCCESS; + + /* Deregister this protocol with IP layer */ + IPRegisterProtocol(IPPROTO_TCP, NULL); + + ExDeleteNPagedLookasideList(&TCPSegmentList); + + TCPInitialized = FALSE; + + DeinitOskitTCP(); + return STATUS_SUCCESS; +} - /* Deregister this protocol with IP layer */ - IPRegisterProtocol(IPPROTO_TCP, NULL); +NTSTATUS TCPTranslateError( int OskitError ) { + NTSTATUS Status = STATUS_UNSUCCESSFUL; - ExDeleteNPagedLookasideList(&TCPSegmentList); + switch( OskitError ) { + case 0: Status = STATUS_SUCCESS; break; + /*case OAK_EADDRNOTAVAIL: */ + case OSK_EAFNOSUPPORT: Status = STATUS_INVALID_CONNECTION; break; + case OSK_ECONNREFUSED: + case OSK_ECONNRESET: Status = STATUS_REMOTE_NOT_LISTENING; break; + default: Status = STATUS_INVALID_CONNECTION; break; + } - TCPInitialized = FALSE; + TI_DbgPrint(MID_TRACE,("Error %d -> %x\n", OskitError, Status)); + return Status; +} - return STATUS_SUCCESS; +NTSTATUS TCPConnect +( PTDI_REQUEST Request, + PTDI_CONNECTION_INFORMATION ConnInfo, + PTDI_CONNECTION_INFORMATION ReturnInfo ) { + KIRQL OldIrql; + NTSTATUS Status; + SOCKADDR_IN AddressToConnect; + PCONNECTION_ENDPOINT Connection; + + Connection = Request->Handle.ConnectionContext; + + KeAcquireSpinLock(&Connection->Lock, &OldIrql); + + PIP_ADDRESS RemoteAddress; + USHORT RemotePort; + + Status = AddrBuildAddress( + (PTA_ADDRESS)(&((PTRANSPORT_ADDRESS)ConnInfo->RemoteAddress)-> + Address[0]), + &RemoteAddress, + &RemotePort); + + if (!NT_SUCCESS(Status)) { + TI_DbgPrint(MID_TRACE, ("Could not AddrBuildAddress in TCPConnect\n")); + KeReleaseSpinLock(&Connection->Lock, OldIrql); + return Status; + } + + AddressToConnect.sin_family = AF_INET; + + memcpy( &AddressToConnect.sin_addr, + &RemoteAddress->Address.IPv4Address, + sizeof(AddressToConnect.sin_addr) ); + AddressToConnect.sin_port = RemotePort; + KeReleaseSpinLock(&Connection->Lock, OldIrql); + + return TCPTranslateError( OskitTCPConnect(Connection->SocketContext, + Connection, + &AddressToConnect, + sizeof(AddressToConnect)) ); +} + +NTSTATUS TCPClose +( PTDI_REQUEST Request ) { + PCONNECTION_ENDPOINT Connection; + + Connection = Request->Handle.ConnectionContext; + + return TCPTranslateError( OskitTCPClose( Connection->SocketContext ) ); +} + +NTSTATUS TCPListen +( PTDI_REQUEST Request, + UINT Backlog ) { + PCONNECTION_ENDPOINT Connection; + + Connection = Request->Handle.ConnectionContext; + + return TCPTranslateError( OskitTCPListen( Connection->SocketContext, + Backlog ) ); +} + +NTSTATUS TCPAccept +( PTDI_REQUEST Request, + VOID **NewSocketContext ) { +} + +NTSTATUS TCPReceiveData +( PTDI_REQUEST Request, + PNDIS_BUFFER Buffer, + ULONG ReceiveLength, + ULONG ReceiveFlags, + PULONG BytesReceived ) { + PCONNECTION_ENDPOINT Connection; + PCHAR DataBuffer; + UINT DataLen, Received = 0; + + Connection = Request->Handle.ConnectionContext; + + NdisQueryBuffer( Buffer, &DataBuffer, &DataLen ); + + return TCPTranslateError + ( OskitTCPRecv + ( Connection->SocketContext, + DataBuffer, + DataLen, + &Received, + ReceiveFlags ) ); +} + +NTSTATUS TCPSendData +( PTDI_REQUEST Request, + PTDI_CONNECTION_INFORMATION ConnInfo, + PNDIS_BUFFER Buffer, + ULONG DataSize ) { + PCONNECTION_ENDPOINT Connection; + PCHAR BufferData; + ULONG PacketSize; + int error; + + NdisQueryBuffer( Buffer, &BufferData, &PacketSize ); + + Connection = Request->Handle.ConnectionContext; + return OskitTCPSend( Connection->SocketContext, + BufferData, PacketSize, 0 ); +} + +NTSTATUS TCPTimeout(VOID) { + static int Times = 0; + if( (Times++ % 100) == 0 ) TimerOskitTCP(); } /* EOF */ diff --git a/reactos/drivers/net/tcpip/transport/tcp/tcp_input.c b/reactos/drivers/net/tcpip/transport/tcp/tcp_input.c deleted file mode 100755 index b4bb72a6a7c..00000000000 --- a/reactos/drivers/net/tcpip/transport/tcp/tcp_input.c +++ /dev/null @@ -1,4184 +0,0 @@ -/* - * COPYRIGHT: See COPYING in the top level directory - * PROJECT: ReactOS TCP/IP protocol driver - * FILE: transport/tcp/tcp_input.c - * PURPOSE: Transmission Control Protocol - * PROGRAMMERS: Casper S. Hornstrup (chorns@users.sourceforge.net) - * REVISIONS: - * CSH 15-01-2003 Imported from linux kernel 2.4.20 - */ - -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Implementation of the Transmission Control Protocol(TCP). - * - * Version: $Id: tcp_input.c,v 1.1 2003/01/15 21:57:31 chorns Exp $ - * - * Authors: Ross Biro, - * Fred N. van Kempen, - * Mark Evans, - * Corey Minyard - * Florian La Roche, - * Charles Hedrick, - * Linus Torvalds, - * Alan Cox, - * Matthew Dillon, - * Arnt Gulbrandsen, - * Jorge Cwik, - */ - -/* - * Changes: - * Pedro Roque : Fast Retransmit/Recovery. - * Two receive queues. - * Retransmit queue handled by TCP. - * Better retransmit timer handling. - * New congestion avoidance. - * Header prediction. - * Variable renaming. - * - * Eric : Fast Retransmit. - * Randy Scott : MSS option defines. - * Eric Schenk : Fixes to slow start algorithm. - * Eric Schenk : Yet another double ACK bug. - * Eric Schenk : Delayed ACK bug fixes. - * Eric Schenk : Floyd style fast retrans war avoidance. - * David S. Miller : Don't allow zero congestion window. - * Eric Schenk : Fix retransmitter so that it sends - * next packet on ack of previous packet. - * Andi Kleen : Moved open_request checking here - * and process RSTs for open_requests. - * Andi Kleen : Better prune_queue, and other fixes. - * Andrey Savochkin: Fix RTT measurements in the presnce of - * timestamps. - * Andrey Savochkin: Check sequence numbers correctly when - * removing SACKs due to in sequence incoming - * data segments. - * Andi Kleen: Make sure we never ack data there is not - * enough room for. Also make this condition - * a fatal error if it might still happen. - * Andi Kleen: Add tcp_measure_rcv_mss to make - * connections with MSS -#include -#include -#include -#include -#include -#else -#include "linux.h" -#include "tcpcore.h" -#endif - -int sysctl_tcp_timestamps = 1; -int sysctl_tcp_window_scaling = 1; -int sysctl_tcp_sack = 1; -int sysctl_tcp_fack = 1; -int sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH; -#ifdef CONFIG_INET_ECN -int sysctl_tcp_ecn = 1; -#else -int sysctl_tcp_ecn = 0; -#endif -int sysctl_tcp_dsack = 1; -int sysctl_tcp_app_win = 31; -int sysctl_tcp_adv_win_scale = 2; - -int sysctl_tcp_stdurg = 0; -int sysctl_tcp_rfc1337 = 0; -//int sysctl_tcp_max_orphans = NR_FILE; - -#define FLAG_DATA 0x01 /* Incoming frame contained data. */ -#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ -#define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */ -#define FLAG_RETRANS_DATA_ACKED 0x08 /* "" "" some of which was retransmitted. */ -#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ -#define FLAG_DATA_SACKED 0x20 /* New SACK. */ -#define FLAG_ECE 0x40 /* ECE in this ACK */ -#define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */ -#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ - -#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) -#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) -#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE) -#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) - -#define IsReno(tp) ((tp)->sack_ok == 0) -#define IsFack(tp) ((tp)->sack_ok & 2) -#define IsDSack(tp) ((tp)->sack_ok & 4) - -#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) - -/* Adapt the MSS value used to make delayed ack decision to the - * real world. - */ -static __inline__ void tcp_measure_rcv_mss(struct tcp_opt *tp, struct sk_buff *skb) -{ -#if 0 - unsigned int len, lss; - - lss = tp->ack.last_seg_size; - tp->ack.last_seg_size = 0; - - /* skb->len may jitter because of SACKs, even if peer - * sends good full-sized frames. - */ - len = skb->len; - if (len >= tp->ack.rcv_mss) { - tp->ack.rcv_mss = len; - } else { - /* Otherwise, we make more careful check taking into account, - * that SACKs block is variable. - * - * "len" is invariant segment length, including TCP header. - */ - len += skb->data - skb->h.raw; - if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) || - /* If PSH is not set, packet should be - * full sized, provided peer TCP is not badly broken. - * This observation (if it is correct 8)) allows - * to handle super-low mtu links fairly. - */ - (len >= TCP_MIN_MSS + sizeof(struct tcphdr) && - !(tcp_flag_word(skb->h.th)&TCP_REMNANT))) { - /* Subtract also invariant (if peer is RFC compliant), - * tcp header plus fixed timestamp option length. - * Resulting "len" is MSS free of SACK jitter. - */ - len -= tp->tcp_header_len; - tp->ack.last_seg_size = len; - if (len == lss) { - tp->ack.rcv_mss = len; - return; - } - } - tp->ack.pending |= TCP_ACK_PUSHED; - } -#endif -} - -static void tcp_incr_quickack(struct tcp_opt *tp) -{ -#if 0 - unsigned quickacks = tp->rcv_wnd/(2*tp->ack.rcv_mss); - - if (quickacks==0) - quickacks=2; - if (quickacks > tp->ack.quick) - tp->ack.quick = min(quickacks, TCP_MAX_QUICKACKS); -#endif -} - -void tcp_enter_quickack_mode(struct tcp_opt *tp) -{ -#if 0 - tcp_incr_quickack(tp); - tp->ack.pingpong = 0; - tp->ack.ato = TCP_ATO_MIN; -#endif -} - -/* Send ACKs quickly, if "quick" count is not exhausted - * and the session is not interactive. - */ - -static __inline__ int tcp_in_quickack_mode(struct tcp_opt *tp) -{ -#if 0 - return (tp->ack.quick && !tp->ack.pingpong); -#else - return 0; -#endif -} - -/* Buffer size and advertised window tuning. - * - * 1. Tuning sk->sndbuf, when connection enters established state. - */ - -static void tcp_fixup_sndbuf(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - int sndmem = tp->mss_clamp+MAX_TCP_HEADER+16+sizeof(struct sk_buff); - - if (sk->sndbuf < 3*sndmem) - sk->sndbuf = min(3*sndmem, sysctl_tcp_wmem[2]); -#endif -} - -/* 2. Tuning advertised window (window_clamp, rcv_ssthresh) - * - * All tcp_full_space() is split to two parts: "network" buffer, allocated - * forward and advertised in receiver window (tp->rcv_wnd) and - * "application buffer", required to isolate scheduling/application - * latencies from network. - * window_clamp is maximal advertised window. It can be less than - * tcp_full_space(), in this case tcp_full_space() - window_clamp - * is reserved for "application" buffer. The less window_clamp is - * the smoother our behaviour from viewpoint of network, but the lower - * throughput and the higher sensitivity of the connection to losses. 8) - * - * rcv_ssthresh is more strict window_clamp used at "slow start" - * phase to predict further behaviour of this connection. - * It is used for two goals: - * - to enforce header prediction at sender, even when application - * requires some significant "application buffer". It is check #1. - * - to prevent pruning of receive queue because of misprediction - * of receiver window. Check #2. - * - * The scheme does not work when sender sends good segments opening - * window and then starts to feed us spagetti. But it should work - * in common situations. Otherwise, we have to rely on queue collapsing. - */ - -/* Slow part of check#2. */ -static int -__tcp_grow_window(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb) -{ -#if 0 - /* Optimize this! */ - int truesize = tcp_win_from_space(skb->truesize)/2; - int window = tcp_full_space(sk)/2; - - while (tp->rcv_ssthresh <= window) { - if (truesize <= skb->len) - return 2*tp->ack.rcv_mss; - - truesize >>= 1; - window >>= 1; - } - return 0; -#else - return 0; -#endif -} - -static __inline__ void -tcp_grow_window(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb) -{ -#if 0 - /* Check #1 */ - if (tp->rcv_ssthresh < tp->window_clamp && - (int)tp->rcv_ssthresh < tcp_space(sk) && - !tcp_memory_pressure) { - int incr; - - /* Check #2. Increase window, if skb with such overhead - * will fit to rcvbuf in future. - */ - if (tcp_win_from_space(skb->truesize) <= skb->len) - incr = 2*tp->advmss; - else - incr = __tcp_grow_window(sk, tp, skb); - - if (incr) { - tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp); - tp->ack.quick |= 1; - } - } -#endif -} - -/* 3. Tuning rcvbuf, when connection enters established state. */ - -static void tcp_fixup_rcvbuf(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - int rcvmem = tp->advmss+MAX_TCP_HEADER+16+sizeof(struct sk_buff); - - /* Try to select rcvbuf so that 4 mss-sized segments - * will fit to window and correspoding skbs will fit to our rcvbuf. - * (was 3; 4 is minimum to allow fast retransmit to work.) - */ - while (tcp_win_from_space(rcvmem) < tp->advmss) - rcvmem += 128; - if (sk->rcvbuf < 4*rcvmem) - sk->rcvbuf = min(4*rcvmem, sysctl_tcp_rmem[2]); -#endif -} - -/* 4. Try to fixup all. It is made iimediately after connection enters - * established state. - */ -static void tcp_init_buffer_space(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - int maxwin; - - if (!(sk->userlocks&SOCK_RCVBUF_LOCK)) - tcp_fixup_rcvbuf(sk); - if (!(sk->userlocks&SOCK_SNDBUF_LOCK)) - tcp_fixup_sndbuf(sk); - - maxwin = tcp_full_space(sk); - - if (tp->window_clamp >= maxwin) { - tp->window_clamp = maxwin; - - if (sysctl_tcp_app_win && maxwin>4*tp->advmss) - tp->window_clamp = max(maxwin-(maxwin>>sysctl_tcp_app_win), 4*tp->advmss); - } - - /* Force reservation of one segment. */ - if (sysctl_tcp_app_win && - tp->window_clamp > 2*tp->advmss && - tp->window_clamp + tp->advmss > maxwin) - tp->window_clamp = max(2*tp->advmss, maxwin-tp->advmss); - - tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp); - tp->snd_cwnd_stamp = tcp_time_stamp; -#endif -} - -/* 5. Recalculate window clamp after socket hit its memory bounds. */ -static void tcp_clamp_window(struct sock *sk, struct tcp_opt *tp) -{ -#if 0 - struct sk_buff *skb; - unsigned int app_win = tp->rcv_nxt - tp->copied_seq; - int ofo_win = 0; - - tp->ack.quick = 0; - - skb_queue_walk(&tp->out_of_order_queue, skb) { - ofo_win += skb->len; - } - - /* If overcommit is due to out of order segments, - * do not clamp window. Try to expand rcvbuf instead. - */ - if (ofo_win) { - if (sk->rcvbuf < sysctl_tcp_rmem[2] && - !(sk->userlocks&SOCK_RCVBUF_LOCK) && - !tcp_memory_pressure && - atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) - sk->rcvbuf = min(atomic_read(&sk->rmem_alloc), sysctl_tcp_rmem[2]); - } - if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf) { - app_win += ofo_win; - if (atomic_read(&sk->rmem_alloc) >= 2*sk->rcvbuf) - app_win >>= 1; - if (app_win > tp->ack.rcv_mss) - app_win -= tp->ack.rcv_mss; - app_win = max(app_win, 2U*tp->advmss); - - if (!ofo_win) - tp->window_clamp = min(tp->window_clamp, app_win); - tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss); - } -#endif -} - -/* There is something which you must keep in mind when you analyze the - * behavior of the tp->ato delayed ack timeout interval. When a - * connection starts up, we want to ack as quickly as possible. The - * problem is that "good" TCP's do slow start at the beginning of data - * transmission. The means that until we send the first few ACK's the - * sender will sit on his end and only queue most of his data, because - * he can only send snd_cwnd unacked packets at any given time. For - * each ACK we send, he increments snd_cwnd and transmits more of his - * queue. -DaveM - */ -static void tcp_event_data_recv(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb) -{ -#if 0 - u32 now; - - tcp_schedule_ack(tp); - - tcp_measure_rcv_mss(tp, skb); - - now = tcp_time_stamp; - - if (!tp->ack.ato) { - /* The _first_ data packet received, initialize - * delayed ACK engine. - */ - tcp_incr_quickack(tp); - tp->ack.ato = TCP_ATO_MIN; - } else { - int m = now - tp->ack.lrcvtime; - - if (m <= TCP_ATO_MIN/2) { - /* The fastest case is the first. */ - tp->ack.ato = (tp->ack.ato>>1) + TCP_ATO_MIN/2; - } else if (m < tp->ack.ato) { - tp->ack.ato = (tp->ack.ato>>1) + m; - if (tp->ack.ato > tp->rto) - tp->ack.ato = tp->rto; - } else if (m > tp->rto) { - /* Too long gap. Apparently sender falled to - * restart window, so that we send ACKs quickly. - */ - tcp_incr_quickack(tp); - tcp_mem_reclaim(sk); - } - } - tp->ack.lrcvtime = now; - - TCP_ECN_check_ce(tp, skb); - - if (skb->len >= 128) - tcp_grow_window(sk, tp, skb); -#endif -} - -/* Called to compute a smoothed rtt estimate. The data fed to this - * routine either comes from timestamps, or from segments that were - * known _not_ to have been retransmitted [see Karn/Partridge - * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88 - * piece by Van Jacobson. - * NOTE: the next three routines used to be one big routine. - * To save cycles in the RFC 1323 implementation it was better to break - * it up into three procedures. -- erics - */ -static __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt) -{ -#if 0 - long m = mrtt; /* RTT */ - - /* The following amusing code comes from Jacobson's - * article in SIGCOMM '88. Note that rtt and mdev - * are scaled versions of rtt and mean deviation. - * This is designed to be as fast as possible - * m stands for "measurement". - * - * On a 1990 paper the rto value is changed to: - * RTO = rtt + 4 * mdev - * - * Funny. This algorithm seems to be very broken. - * These formulae increase RTO, when it should be decreased, increase - * too slowly, when it should be incresed fastly, decrease too fastly - * etc. I guess in BSD RTO takes ONE value, so that it is absolutely - * does not matter how to _calculate_ it. Seems, it was trap - * that VJ failed to avoid. 8) - */ - if(m == 0) - m = 1; - if (tp->srtt != 0) { - m -= (tp->srtt >> 3); /* m is now error in rtt est */ - tp->srtt += m; /* rtt = 7/8 rtt + 1/8 new */ - if (m < 0) { - m = -m; /* m is now abs(error) */ - m -= (tp->mdev >> 2); /* similar update on mdev */ - /* This is similar to one of Eifel findings. - * Eifel blocks mdev updates when rtt decreases. - * This solution is a bit different: we use finer gain - * for mdev in this case (alpha*beta). - * Like Eifel it also prevents growth of rto, - * but also it limits too fast rto decreases, - * happening in pure Eifel. - */ - if (m > 0) - m >>= 3; - } else { - m -= (tp->mdev >> 2); /* similar update on mdev */ - } - tp->mdev += m; /* mdev = 3/4 mdev + 1/4 new */ - if (tp->mdev > tp->mdev_max) { - tp->mdev_max = tp->mdev; - if (tp->mdev_max > tp->rttvar) - tp->rttvar = tp->mdev_max; - } - if (after(tp->snd_una, tp->rtt_seq)) { - if (tp->mdev_max < tp->rttvar) - tp->rttvar -= (tp->rttvar-tp->mdev_max)>>2; - tp->rtt_seq = tp->snd_nxt; - tp->mdev_max = TCP_RTO_MIN; - } - } else { - /* no previous measure. */ - tp->srtt = m<<3; /* take the measured time to be rtt */ - tp->mdev = m<<1; /* make sure rto = 3*rtt */ - tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN); - tp->rtt_seq = tp->snd_nxt; - } -#endif -} - -/* Calculate rto without backoff. This is the second half of Van Jacobson's - * routine referred to above. - */ -static __inline__ void tcp_set_rto(struct tcp_opt *tp) -{ -#if 0 - /* Old crap is replaced with new one. 8) - * - * More seriously: - * 1. If rtt variance happened to be less 50msec, it is hallucination. - * It cannot be less due to utterly erratic ACK generation made - * at least by solaris and freebsd. "Erratic ACKs" has _nothing_ - * to do with delayed acks, because at cwnd>2 true delack timeout - * is invisible. Actually, Linux-2.4 also generates erratic - * ACKs in some curcumstances. - */ - tp->rto = (tp->srtt >> 3) + tp->rttvar; - - /* 2. Fixups made earlier cannot be right. - * If we do not estimate RTO correctly without them, - * all the algo is pure shit and should be replaced - * with correct one. It is exaclty, which we pretend to do. - */ -#endif -} - -/* NOTE: clamping at TCP_RTO_MIN is not required, current algo - * guarantees that rto is higher. - */ -static __inline__ void tcp_bound_rto(struct tcp_opt *tp) -{ -#if 0 - if (tp->rto > TCP_RTO_MAX) - tp->rto = TCP_RTO_MAX; -#endif -} - -/* Save metrics learned by this TCP session. - This function is called only, when TCP finishes successfully - i.e. when it enters TIME-WAIT or goes from LAST-ACK to CLOSE. - */ -void tcp_update_metrics(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct dst_entry *dst = __sk_dst_get(sk); - - dst_confirm(dst); - - if (dst && (dst->flags&DST_HOST)) { - int m; - - if (tp->backoff || !tp->srtt) { - /* This session failed to estimate rtt. Why? - * Probably, no packets returned in time. - * Reset our results. - */ - if (!(dst->mxlock&(1<rtt = 0; - return; - } - - m = dst->rtt - tp->srtt; - - /* If newly calculated rtt larger than stored one, - * store new one. Otherwise, use EWMA. Remember, - * rtt overestimation is always better than underestimation. - */ - if (!(dst->mxlock&(1<rtt = tp->srtt; - else - dst->rtt -= (m>>3); - } - - if (!(dst->mxlock&(1<>= 1; - if (m < tp->mdev) - m = tp->mdev; - - if (m >= dst->rttvar) - dst->rttvar = m; - else - dst->rttvar -= (dst->rttvar - m)>>2; - } - - if (tp->snd_ssthresh >= 0xFFFF) { - /* Slow start still did not finish. */ - if (dst->ssthresh && - !(dst->mxlock&(1<snd_cwnd>>1) > dst->ssthresh) - dst->ssthresh = (tp->snd_cwnd>>1); - if (!(dst->mxlock&(1<snd_cwnd > dst->cwnd) - dst->cwnd = tp->snd_cwnd; - } else if (tp->snd_cwnd > tp->snd_ssthresh && - tp->ca_state == TCP_CA_Open) { - /* Cong. avoidance phase, cwnd is reliable. */ - if (!(dst->mxlock&(1<ssthresh = max(tp->snd_cwnd>>1, tp->snd_ssthresh); - if (!(dst->mxlock&(1<cwnd = (dst->cwnd + tp->snd_cwnd)>>1; - } else { - /* Else slow start did not finish, cwnd is non-sense, - ssthresh may be also invalid. - */ - if (!(dst->mxlock&(1<cwnd = (dst->cwnd + tp->snd_ssthresh)>>1; - if (dst->ssthresh && - !(dst->mxlock&(1<snd_ssthresh > dst->ssthresh) - dst->ssthresh = tp->snd_ssthresh; - } - - if (!(dst->mxlock&(1<reordering < tp->reordering && - tp->reordering != sysctl_tcp_reordering) - dst->reordering = tp->reordering; - } - } -#endif -} - -/* Increase initial CWND conservatively: if estimated - * RTT is low enough (<20msec) or if we have some preset ssthresh. - * - * Numbers are taken from RFC2414. - */ -__u32 tcp_init_cwnd(struct tcp_opt *tp) -{ -#if 0 - __u32 cwnd; - - if (tp->mss_cache > 1460) - return 2; - - cwnd = (tp->mss_cache > 1095) ? 3 : 4; - - if (!tp->srtt || (tp->snd_ssthresh >= 0xFFFF && tp->srtt > ((HZ/50)<<3))) - cwnd = 2; - else if (cwnd > tp->snd_ssthresh) - cwnd = tp->snd_ssthresh; - - return min_t(__u32, cwnd, tp->snd_cwnd_clamp); -#else - return 0; -#endif -} - -/* Initialize metrics on socket. */ - -static void tcp_init_metrics(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct dst_entry *dst = __sk_dst_get(sk); - - if (dst == NULL) - goto reset; - - dst_confirm(dst); - - if (dst->mxlock&(1<snd_cwnd_clamp = dst->cwnd; - if (dst->ssthresh) { - tp->snd_ssthresh = dst->ssthresh; - if (tp->snd_ssthresh > tp->snd_cwnd_clamp) - tp->snd_ssthresh = tp->snd_cwnd_clamp; - } - if (dst->reordering && tp->reordering != dst->reordering) { - tp->sack_ok &= ~2; - tp->reordering = dst->reordering; - } - - if (dst->rtt == 0) - goto reset; - - if (!tp->srtt && dst->rtt < (TCP_TIMEOUT_INIT<<3)) - goto reset; - - /* Initial rtt is determined from SYN,SYN-ACK. - * The segment is small and rtt may appear much - * less than real one. Use per-dst memory - * to make it more realistic. - * - * A bit of theory. RTT is time passed after "normal" sized packet - * is sent until it is ACKed. In normal curcumstances sending small - * packets force peer to delay ACKs and calculation is correct too. - * The algorithm is adaptive and, provided we follow specs, it - * NEVER underestimate RTT. BUT! If peer tries to make some clever - * tricks sort of "quick acks" for time long enough to decrease RTT - * to low value, and then abruptly stops to do it and starts to delay - * ACKs, wait for troubles. - */ - if (dst->rtt > tp->srtt) - tp->srtt = dst->rtt; - if (dst->rttvar > tp->mdev) { - tp->mdev = dst->rttvar; - tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN); - } - tcp_set_rto(tp); - tcp_bound_rto(tp); - if (tp->rto < TCP_TIMEOUT_INIT && !tp->saw_tstamp) - goto reset; - tp->snd_cwnd = tcp_init_cwnd(tp); - tp->snd_cwnd_stamp = tcp_time_stamp; - return; - -reset: - /* Play conservative. If timestamps are not - * supported, TCP will fail to recalculate correct - * rtt, if initial rto is too small. FORGET ALL AND RESET! - */ - if (!tp->saw_tstamp && tp->srtt) { - tp->srtt = 0; - tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; - tp->rto = TCP_TIMEOUT_INIT; - } -#endif -} - -static void tcp_update_reordering(struct tcp_opt *tp, int metric, int ts) -{ -#if 0 - if (metric > tp->reordering) { - tp->reordering = min(TCP_MAX_REORDERING, metric); - - /* This exciting event is worth to be remembered. 8) */ - if (ts) - NET_INC_STATS_BH(TCPTSReorder); - else if (IsReno(tp)) - NET_INC_STATS_BH(TCPRenoReorder); - else if (IsFack(tp)) - NET_INC_STATS_BH(TCPFACKReorder); - else - NET_INC_STATS_BH(TCPSACKReorder); -#if FASTRETRANS_DEBUG > 1 - printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n", - tp->sack_ok, tp->ca_state, - tp->reordering, tp->fackets_out, tp->sacked_out, - tp->undo_marker ? tp->undo_retrans : 0); -#endif - /* Disable FACK yet. */ - tp->sack_ok &= ~2; - } -#endif -} - -/* This procedure tags the retransmission queue when SACKs arrive. - * - * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L). - * Packets in queue with these bits set are counted in variables - * sacked_out, retrans_out and lost_out, correspondingly. - * - * Valid combinations are: - * Tag InFlight Description - * 0 1 - orig segment is in flight. - * S 0 - nothing flies, orig reached receiver. - * L 0 - nothing flies, orig lost by net. - * R 2 - both orig and retransmit are in flight. - * L|R 1 - orig is lost, retransmit is in flight. - * S|R 1 - orig reached receiver, retrans is still in flight. - * (L|S|R is logically valid, it could occur when L|R is sacked, - * but it is equivalent to plain S and code short-curcuits it to S. - * L|S is logically invalid, it would mean -1 packet in flight 8)) - * - * These 6 states form finite state machine, controlled by the following events: - * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue()) - * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue()) - * 3. Loss detection event of one of three flavors: - * A. Scoreboard estimator decided the packet is lost. - * A'. Reno "three dupacks" marks head of queue lost. - * A''. Its FACK modfication, head until snd.fack is lost. - * B. SACK arrives sacking data transmitted after never retransmitted - * hole was sent out. - * C. SACK arrives sacking SND.NXT at the moment, when the - * segment was retransmitted. - * 4. D-SACK added new rule: D-SACK changes any tag to S. - * - * It is pleasant to note, that state diagram turns out to be commutative, - * so that we are allowed not to be bothered by order of our actions, - * when multiple events arrive simultaneously. (see the function below). - * - * Reordering detection. - * -------------------- - * Reordering metric is maximal distance, which a packet can be displaced - * in packet stream. With SACKs we can estimate it: - * - * 1. SACK fills old hole and the corresponding segment was not - * ever retransmitted -> reordering. Alas, we cannot use it - * when segment was retransmitted. - * 2. The last flaw is solved with D-SACK. D-SACK arrives - * for retransmitted and already SACKed segment -> reordering.. - * Both of these heuristics are not used in Loss state, when we cannot - * account for retransmits accurately. - */ -static int -tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; - struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2); - int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; - int reord = tp->packets_out; - int prior_fackets; - u32 lost_retrans = 0; - int flag = 0; - int i; - - if (!tp->sacked_out) - tp->fackets_out = 0; - prior_fackets = tp->fackets_out; - - for (i=0; istart_seq); - __u32 end_seq = ntohl(sp->end_seq); - int fack_count = 0; - int dup_sack = 0; - - /* Check for D-SACK. */ - if (i == 0) { - u32 ack = TCP_SKB_CB(ack_skb)->ack_seq; - - if (before(start_seq, ack)) { - dup_sack = 1; - tp->sack_ok |= 4; - NET_INC_STATS_BH(TCPDSACKRecv); - } else if (num_sacks > 1 && - !after(end_seq, ntohl(sp[1].end_seq)) && - !before(start_seq, ntohl(sp[1].start_seq))) { - dup_sack = 1; - tp->sack_ok |= 4; - NET_INC_STATS_BH(TCPDSACKOfoRecv); - } - - /* D-SACK for already forgotten data... - * Do dumb counting. */ - if (dup_sack && - !after(end_seq, prior_snd_una) && - after(end_seq, tp->undo_marker)) - tp->undo_retrans--; - - /* Eliminate too old ACKs, but take into - * account more or less fresh ones, they can - * contain valid SACK info. - */ - if (before(ack, prior_snd_una-tp->max_window)) - return 0; - } - - /* Event "B" in the comment above. */ - if (after(end_seq, tp->high_seq)) - flag |= FLAG_DATA_LOST; - - for_retrans_queue(skb, sk, tp) { - u8 sacked = TCP_SKB_CB(skb)->sacked; - int in_sack; - - /* The retransmission queue is always in order, so - * we can short-circuit the walk early. - */ - if(!before(TCP_SKB_CB(skb)->seq, end_seq)) - break; - - fack_count++; - - in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && - !before(end_seq, TCP_SKB_CB(skb)->end_seq); - - /* Account D-SACK for retransmitted packet. */ - if ((dup_sack && in_sack) && - (sacked & TCPCB_RETRANS) && - after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) - tp->undo_retrans--; - - /* The frame is ACKed. */ - if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) { - if (sacked&TCPCB_RETRANS) { - if ((dup_sack && in_sack) && - (sacked&TCPCB_SACKED_ACKED)) - reord = min(fack_count, reord); - } else { - /* If it was in a hole, we detected reordering. */ - if (fack_count < prior_fackets && - !(sacked&TCPCB_SACKED_ACKED)) - reord = min(fack_count, reord); - } - - /* Nothing to do; acked frame is about to be dropped. */ - continue; - } - - if ((sacked&TCPCB_SACKED_RETRANS) && - after(end_seq, TCP_SKB_CB(skb)->ack_seq) && - (!lost_retrans || after(end_seq, lost_retrans))) - lost_retrans = end_seq; - - if (!in_sack) - continue; - - if (!(sacked&TCPCB_SACKED_ACKED)) { - if (sacked & TCPCB_SACKED_RETRANS) { - /* If the segment is not tagged as lost, - * we do not clear RETRANS, believing - * that retransmission is still in flight. - */ - if (sacked & TCPCB_LOST) { - TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); - tp->lost_out--; - tp->retrans_out--; - } - } else { - /* New sack for not retransmitted frame, - * which was in hole. It is reordering. - */ - if (!(sacked & TCPCB_RETRANS) && - fack_count < prior_fackets) - reord = min(fack_count, reord); - - if (sacked & TCPCB_LOST) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; - tp->lost_out--; - } - } - - TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED; - flag |= FLAG_DATA_SACKED; - tp->sacked_out++; - - if (fack_count > tp->fackets_out) - tp->fackets_out = fack_count; - } else { - if (dup_sack && (sacked&TCPCB_RETRANS)) - reord = min(fack_count, reord); - } - - /* D-SACK. We can detect redundant retransmission - * in S|R and plain R frames and clear it. - * undo_retrans is decreased above, L|R frames - * are accounted above as well. - */ - if (dup_sack && - (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out--; - } - } - } - - /* Check for lost retransmit. This superb idea is - * borrowed from "ratehalving". Event "C". - * Later note: FACK people cheated me again 8), - * we have to account for reordering! Ugly, - * but should help. - */ - if (lost_retrans && tp->ca_state == TCP_CA_Recovery) { - struct sk_buff *skb; - - for_retrans_queue(skb, sk, tp) { - if (after(TCP_SKB_CB(skb)->seq, lost_retrans)) - break; - if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) - continue; - if ((TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) && - after(lost_retrans, TCP_SKB_CB(skb)->ack_seq) && - (IsFack(tp) || - !before(lost_retrans, TCP_SKB_CB(skb)->ack_seq+tp->reordering*tp->mss_cache))) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out--; - - if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) { - tp->lost_out++; - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - flag |= FLAG_DATA_SACKED; - NET_INC_STATS_BH(TCPLostRetransmit); - } - } - } - } - - tp->left_out = tp->sacked_out + tp->lost_out; - - if (reord < tp->fackets_out && tp->ca_state != TCP_CA_Loss) - tcp_update_reordering(tp, (tp->fackets_out+1)-reord, 0); - -#if FASTRETRANS_DEBUG > 0 - BUG_TRAP((int)tp->sacked_out >= 0); - BUG_TRAP((int)tp->lost_out >= 0); - BUG_TRAP((int)tp->retrans_out >= 0); - BUG_TRAP((int)tcp_packets_in_flight(tp) >= 0); -#endif - return flag; -#else - return 0; -#endif -} - -void tcp_clear_retrans(struct tcp_opt *tp) -{ -#if 0 - tp->left_out = 0; - tp->retrans_out = 0; - - tp->fackets_out = 0; - tp->sacked_out = 0; - tp->lost_out = 0; - - tp->undo_marker = 0; - tp->undo_retrans = 0; -#endif -} - -/* Enter Loss state. If "how" is not zero, forget all SACK information - * and reset tags completely, otherwise preserve SACKs. If receiver - * dropped its ofo queue, we will know this due to reneging detection. - */ -void tcp_enter_loss(struct sock *sk, int how) -{ -#if 0 - struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - struct sk_buff *skb; - int cnt = 0; - - /* Reduce ssthresh if it has not yet been made inside this window. */ - if (tp->ca_state <= TCP_CA_Disorder || - tp->snd_una == tp->high_seq || - (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) { - tp->prior_ssthresh = tcp_current_ssthresh(tp); - tp->snd_ssthresh = tcp_recalc_ssthresh(tp); - } - tp->snd_cwnd = 1; - tp->snd_cwnd_cnt = 0; - tp->snd_cwnd_stamp = tcp_time_stamp; - - tcp_clear_retrans(tp); - - /* Push undo marker, if it was plain RTO and nothing - * was retransmitted. */ - if (!how) - tp->undo_marker = tp->snd_una; - - for_retrans_queue(skb, sk, tp) { - cnt++; - if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS) - tp->undo_marker = 0; - TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; - if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out++; - } else { - tp->sacked_out++; - tp->fackets_out = cnt; - } - } - tcp_sync_left_out(tp); - - tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering); - tp->ca_state = TCP_CA_Loss; - tp->high_seq = tp->snd_nxt; - TCP_ECN_queue_cwr(tp); -#endif -} - -static int tcp_check_sack_reneging(struct sock *sk, struct tcp_opt *tp) -{ -#if 0 - struct sk_buff *skb; - - /* If ACK arrived pointing to a remembered SACK, - * it means that our remembered SACKs do not reflect - * real state of receiver i.e. - * receiver _host_ is heavily congested (or buggy). - * Do processing similar to RTO timeout. - */ - if ((skb = skb_peek(&sk->write_queue)) != NULL && - (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { - NET_INC_STATS_BH(TCPSACKReneging); - - tcp_enter_loss(sk, 1); - tp->retransmits++; - tcp_retransmit_skb(sk, skb_peek(&sk->write_queue)); - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); - return 1; - } - return 0; -#else - return 0; -#endif -} - -static inline int tcp_fackets_out(struct tcp_opt *tp) -{ -#if 0 - return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out; -#else - return 0; -#endif -} - -static inline int tcp_skb_timedout(struct tcp_opt *tp, struct sk_buff *skb) -{ -#if 0 - return (tcp_time_stamp - TCP_SKB_CB(skb)->when > tp->rto); -#else - return 0; -#endif -} - -static inline int tcp_head_timedout(struct sock *sk, struct tcp_opt *tp) -{ -#if 0 - return tp->packets_out && tcp_skb_timedout(tp, skb_peek(&sk->write_queue)); -#else - return 0; -#endif -} - -/* Linux NewReno/SACK/FACK/ECN state machine. - * -------------------------------------- - * - * "Open" Normal state, no dubious events, fast path. - * "Disorder" In all the respects it is "Open", - * but requires a bit more attention. It is entered when - * we see some SACKs or dupacks. It is split of "Open" - * mainly to move some processing from fast path to slow one. - * "CWR" CWND was reduced due to some Congestion Notification event. - * It can be ECN, ICMP source quench, local device congestion. - * "Recovery" CWND was reduced, we are fast-retransmitting. - * "Loss" CWND was reduced due to RTO timeout or SACK reneging. - * - * tcp_fastretrans_alert() is entered: - * - each incoming ACK, if state is not "Open" - * - when arrived ACK is unusual, namely: - * * SACK - * * Duplicate ACK. - * * ECN ECE. - * - * Counting packets in flight is pretty simple. - * - * in_flight = packets_out - left_out + retrans_out - * - * packets_out is SND.NXT-SND.UNA counted in packets. - * - * retrans_out is number of retransmitted segments. - * - * left_out is number of segments left network, but not ACKed yet. - * - * left_out = sacked_out + lost_out - * - * sacked_out: Packets, which arrived to receiver out of order - * and hence not ACKed. With SACKs this number is simply - * amount of SACKed data. Even without SACKs - * it is easy to give pretty reliable estimate of this number, - * counting duplicate ACKs. - * - * lost_out: Packets lost by network. TCP has no explicit - * "loss notification" feedback from network (for now). - * It means that this number can be only _guessed_. - * Actually, it is the heuristics to predict lossage that - * distinguishes different algorithms. - * - * F.e. after RTO, when all the queue is considered as lost, - * lost_out = packets_out and in_flight = retrans_out. - * - * Essentially, we have now two algorithms counting - * lost packets. - * - * FACK: It is the simplest heuristics. As soon as we decided - * that something is lost, we decide that _all_ not SACKed - * packets until the most forward SACK are lost. I.e. - * lost_out = fackets_out - sacked_out and left_out = fackets_out. - * It is absolutely correct estimate, if network does not reorder - * packets. And it loses any connection to reality when reordering - * takes place. We use FACK by default until reordering - * is suspected on the path to this destination. - * - * NewReno: when Recovery is entered, we assume that one segment - * is lost (classic Reno). While we are in Recovery and - * a partial ACK arrives, we assume that one more packet - * is lost (NewReno). This heuristics are the same in NewReno - * and SACK. - * - * Imagine, that's all! Forget about all this shamanism about CWND inflation - * deflation etc. CWND is real congestion window, never inflated, changes - * only according to classic VJ rules. - * - * Really tricky (and requiring careful tuning) part of algorithm - * is hidden in functions tcp_time_to_recover() and tcp_xmit_retransmit_queue(). - * The first determines the moment _when_ we should reduce CWND and, - * hence, slow down forward transmission. In fact, it determines the moment - * when we decide that hole is caused by loss, rather than by a reorder. - * - * tcp_xmit_retransmit_queue() decides, _what_ we should retransmit to fill - * holes, caused by lost packets. - * - * And the most logically complicated part of algorithm is undo - * heuristics. We detect false retransmits due to both too early - * fast retransmit (reordering) and underestimated RTO, analyzing - * timestamps and D-SACKs. When we detect that some segments were - * retransmitted by mistake and CWND reduction was wrong, we undo - * window reduction and abort recovery phase. This logic is hidden - * inside several functions named tcp_try_undo_. - */ - -/* This function decides, when we should leave Disordered state - * and enter Recovery phase, reducing congestion window. - * - * Main question: may we further continue forward transmission - * with the same cwnd? - */ -static int -tcp_time_to_recover(struct sock *sk, struct tcp_opt *tp) -{ -#if 0 - /* Trick#1: The loss is proven. */ - if (tp->lost_out) - return 1; - - /* Not-A-Trick#2 : Classic rule... */ - if (tcp_fackets_out(tp) > tp->reordering) - return 1; - - /* Trick#3 : when we use RFC2988 timer restart, fast - * retransmit can be triggered by timeout of queue head. - */ - if (tcp_head_timedout(sk, tp)) - return 1; - - /* Trick#4: It is still not OK... But will it be useful to delay - * recovery more? - */ - if (tp->packets_out <= tp->reordering && - tp->sacked_out >= max_t(__u32, tp->packets_out/2, sysctl_tcp_reordering) && - !tcp_may_send_now(sk, tp)) { - /* We have nothing to send. This connection is limited - * either by receiver window or by application. - */ - return 1; - } - - return 0; -#else - return 0; -#endif -} - -/* If we receive more dupacks than we expected counting segments - * in assumption of absent reordering, interpret this as reordering. - * The only another reason could be bug in receiver TCP. - */ -static void tcp_check_reno_reordering(struct tcp_opt *tp, int addend) -{ -#if 0 - u32 holes; - - holes = max(tp->lost_out, 1U); - holes = min(holes, tp->packets_out); - - if (tp->sacked_out + holes > tp->packets_out) { - tp->sacked_out = tp->packets_out - holes; - tcp_update_reordering(tp, tp->packets_out+addend, 0); - } -#endif -} - -/* Emulate SACKs for SACKless connection: account for a new dupack. */ - -static void tcp_add_reno_sack(struct tcp_opt *tp) -{ -#if 0 - ++tp->sacked_out; - tcp_check_reno_reordering(tp, 0); - tcp_sync_left_out(tp); -#endif -} - -/* Account for ACK, ACKing some data in Reno Recovery phase. */ - -static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_opt *tp, int acked) -{ -#if 0 - if (acked > 0) { - /* One ACK acked hole. The rest eat duplicate ACKs. */ - if (acked-1 >= tp->sacked_out) - tp->sacked_out = 0; - else - tp->sacked_out -= acked-1; - } - tcp_check_reno_reordering(tp, acked); - tcp_sync_left_out(tp); -#endif -} - -static inline void tcp_reset_reno_sack(struct tcp_opt *tp) -{ -#if 0 - tp->sacked_out = 0; - tp->left_out = tp->lost_out; -#endif -} - -/* Mark head of queue up as lost. */ -static void -tcp_mark_head_lost(struct sock *sk, struct tcp_opt *tp, int packets, u32 high_seq) -{ -#if 0 - struct sk_buff *skb; - int cnt = packets; - - BUG_TRAP(cnt <= tp->packets_out); - - for_retrans_queue(skb, sk, tp) { - if (--cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq)) - break; - if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out++; - } - } - tcp_sync_left_out(tp); -#endif -} - -/* Account newly detected lost packet(s) */ - -static void tcp_update_scoreboard(struct sock *sk, struct tcp_opt *tp) -{ -#if 0 - if (IsFack(tp)) { - int lost = tp->fackets_out - tp->reordering; - if (lost <= 0) - lost = 1; - tcp_mark_head_lost(sk, tp, lost, tp->high_seq); - } else { - tcp_mark_head_lost(sk, tp, 1, tp->high_seq); - } - - /* New heuristics: it is possible only after we switched - * to restart timer each time when something is ACKed. - * Hence, we can detect timed out packets during fast - * retransmit without falling to slow start. - */ - if (tcp_head_timedout(sk, tp)) { - struct sk_buff *skb; - - for_retrans_queue(skb, sk, tp) { - if (tcp_skb_timedout(tp, skb) && - !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out++; - } - } - tcp_sync_left_out(tp); - } -#endif -} - -/* CWND moderation, preventing bursts due to too big ACKs - * in dubious situations. - */ -static __inline__ void tcp_moderate_cwnd(struct tcp_opt *tp) -{ -#if 0 - tp->snd_cwnd = min(tp->snd_cwnd, - tcp_packets_in_flight(tp)+tcp_max_burst(tp)); - tp->snd_cwnd_stamp = tcp_time_stamp; -#endif -} - -/* Decrease cwnd each second ack. */ - -static void tcp_cwnd_down(struct tcp_opt *tp) -{ -#if 0 - int decr = tp->snd_cwnd_cnt + 1; - - tp->snd_cwnd_cnt = decr&1; - decr >>= 1; - - if (decr && tp->snd_cwnd > tp->snd_ssthresh/2) - tp->snd_cwnd -= decr; - - tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1); - tp->snd_cwnd_stamp = tcp_time_stamp; -#endif -} - -/* Nothing was retransmitted or returned timestamp is less - * than timestamp of the first retransmission. - */ -static __inline__ int tcp_packet_delayed(struct tcp_opt *tp) -{ -#if 0 - return !tp->retrans_stamp || - (tp->saw_tstamp && tp->rcv_tsecr && - (__s32)(tp->rcv_tsecr - tp->retrans_stamp) < 0); -#else - return 0; -#endif -} - -/* Undo procedures. */ - -#if FASTRETRANS_DEBUG > 1 -static void DBGUNDO(struct sock *sk, struct tcp_opt *tp, const char *msg) -{ -#if 0 - printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n", - msg, - NIPQUAD(sk->daddr), ntohs(sk->dport), - tp->snd_cwnd, tp->left_out, - tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); -#endif -} -#else -#define DBGUNDO(x...) do { } while (0) -#endif - -static void tcp_undo_cwr(struct tcp_opt *tp, int undo) -{ -#if 0 - if (tp->prior_ssthresh) { - tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1); - - if (undo && tp->prior_ssthresh > tp->snd_ssthresh) { - tp->snd_ssthresh = tp->prior_ssthresh; - TCP_ECN_withdraw_cwr(tp); - } - } else { - tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh); - } - tcp_moderate_cwnd(tp); - tp->snd_cwnd_stamp = tcp_time_stamp; -#endif -} - -static inline int tcp_may_undo(struct tcp_opt *tp) -{ -#if 0 - return tp->undo_marker && - (!tp->undo_retrans || tcp_packet_delayed(tp)); -#else - return 0; -#endif -} - -/* People celebrate: "We love our President!" */ -static int tcp_try_undo_recovery(struct sock *sk, struct tcp_opt *tp) -{ -#if 0 - if (tcp_may_undo(tp)) { - /* Happy end! We did not retransmit anything - * or our original transmission succeeded. - */ - DBGUNDO(sk, tp, tp->ca_state == TCP_CA_Loss ? "loss" : "retrans"); - tcp_undo_cwr(tp, 1); - if (tp->ca_state == TCP_CA_Loss) - NET_INC_STATS_BH(TCPLossUndo); - else - NET_INC_STATS_BH(TCPFullUndo); - tp->undo_marker = 0; - } - if (tp->snd_una == tp->high_seq && IsReno(tp)) { - /* Hold old state until something *above* high_seq - * is ACKed. For Reno it is MUST to prevent false - * fast retransmits (RFC2582). SACK TCP is safe. */ - tcp_moderate_cwnd(tp); - return 1; - } - tp->ca_state = TCP_CA_Open; - return 0; -#else - return 0; -#endif -} - -/* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */ -static void tcp_try_undo_dsack(struct sock *sk, struct tcp_opt *tp) -{ -#if 0 - if (tp->undo_marker && !tp->undo_retrans) { - DBGUNDO(sk, tp, "D-SACK"); - tcp_undo_cwr(tp, 1); - tp->undo_marker = 0; - NET_INC_STATS_BH(TCPDSACKUndo); - } -#endif -} - -/* Undo during fast recovery after partial ACK. */ - -static int tcp_try_undo_partial(struct sock *sk, struct tcp_opt *tp, int acked) -{ -#if 0 - /* Partial ACK arrived. Force Hoe's retransmit. */ - int failed = IsReno(tp) || tp->fackets_out>tp->reordering; - - if (tcp_may_undo(tp)) { - /* Plain luck! Hole if filled with delayed - * packet, rather than with a retransmit. - */ - if (tp->retrans_out == 0) - tp->retrans_stamp = 0; - - tcp_update_reordering(tp, tcp_fackets_out(tp)+acked, 1); - - DBGUNDO(sk, tp, "Hoe"); - tcp_undo_cwr(tp, 0); - NET_INC_STATS_BH(TCPPartialUndo); - - /* So... Do not make Hoe's retransmit yet. - * If the first packet was delayed, the rest - * ones are most probably delayed as well. - */ - failed = 0; - } - return failed; -#else - return 0; -#endif -} - -/* Undo during loss recovery after partial ACK. */ -static int tcp_try_undo_loss(struct sock *sk, struct tcp_opt *tp) -{ -#if 0 - if (tcp_may_undo(tp)) { - struct sk_buff *skb; - for_retrans_queue(skb, sk, tp) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; - } - DBGUNDO(sk, tp, "partial loss"); - tp->lost_out = 0; - tp->left_out = tp->sacked_out; - tcp_undo_cwr(tp, 1); - NET_INC_STATS_BH(TCPLossUndo); - tp->retransmits = 0; - tp->undo_marker = 0; - if (!IsReno(tp)) - tp->ca_state = TCP_CA_Open; - return 1; - } - return 0; -#else - return 0; -#endif -} - -static __inline__ void tcp_complete_cwr(struct tcp_opt *tp) -{ -#if 0 - tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); - tp->snd_cwnd_stamp = tcp_time_stamp; -#endif -} - -static void tcp_try_to_open(struct sock *sk, struct tcp_opt *tp, int flag) -{ -#if 0 - tp->left_out = tp->sacked_out; - - if (tp->retrans_out == 0) - tp->retrans_stamp = 0; - - if (flag&FLAG_ECE) - tcp_enter_cwr(tp); - - if (tp->ca_state != TCP_CA_CWR) { - int state = TCP_CA_Open; - - if (tp->left_out || - tp->retrans_out || - tp->undo_marker) - state = TCP_CA_Disorder; - - if (tp->ca_state != state) { - tp->ca_state = state; - tp->high_seq = tp->snd_nxt; - } - tcp_moderate_cwnd(tp); - } else { - tcp_cwnd_down(tp); - } -#endif -} - -/* Process an event, which can update packets-in-flight not trivially. - * Main goal of this function is to calculate new estimate for left_out, - * taking into account both packets sitting in receiver's buffer and - * packets lost by network. - * - * Besides that it does CWND reduction, when packet loss is detected - * and changes state of machine. - * - * It does _not_ decide what to send, it is made in function - * tcp_xmit_retransmit_queue(). - */ -static void -tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, - int prior_packets, int flag) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - int is_dupack = (tp->snd_una == prior_snd_una && !(flag&FLAG_NOT_DUP)); - - /* Some technical things: - * 1. Reno does not count dupacks (sacked_out) automatically. */ - if (!tp->packets_out) - tp->sacked_out = 0; - /* 2. SACK counts snd_fack in packets inaccurately. */ - if (tp->sacked_out == 0) - tp->fackets_out = 0; - - /* Now state machine starts. - * A. ECE, hence prohibit cwnd undoing, the reduction is required. */ - if (flag&FLAG_ECE) - tp->prior_ssthresh = 0; - - /* B. In all the states check for reneging SACKs. */ - if (tp->sacked_out && tcp_check_sack_reneging(sk, tp)) - return; - - /* C. Process data loss notification, provided it is valid. */ - if ((flag&FLAG_DATA_LOST) && - before(tp->snd_una, tp->high_seq) && - tp->ca_state != TCP_CA_Open && - tp->fackets_out > tp->reordering) { - tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq); - NET_INC_STATS_BH(TCPLoss); - } - - /* D. Synchronize left_out to current state. */ - tcp_sync_left_out(tp); - - /* E. Check state exit conditions. State can be terminated - * when high_seq is ACKed. */ - if (tp->ca_state == TCP_CA_Open) { - BUG_TRAP(tp->retrans_out == 0); - tp->retrans_stamp = 0; - } else if (!before(tp->snd_una, tp->high_seq)) { - switch (tp->ca_state) { - case TCP_CA_Loss: - tp->retransmits = 0; - if (tcp_try_undo_recovery(sk, tp)) - return; - break; - - case TCP_CA_CWR: - /* CWR is to be held something *above* high_seq - * is ACKed for CWR bit to reach receiver. */ - if (tp->snd_una != tp->high_seq) { - tcp_complete_cwr(tp); - tp->ca_state = TCP_CA_Open; - } - break; - - case TCP_CA_Disorder: - tcp_try_undo_dsack(sk, tp); - if (!tp->undo_marker || - /* For SACK case do not Open to allow to undo - * catching for all duplicate ACKs. */ - IsReno(tp) || tp->snd_una != tp->high_seq) { - tp->undo_marker = 0; - tp->ca_state = TCP_CA_Open; - } - break; - - case TCP_CA_Recovery: - if (IsReno(tp)) - tcp_reset_reno_sack(tp); - if (tcp_try_undo_recovery(sk, tp)) - return; - tcp_complete_cwr(tp); - break; - } - } - - /* F. Process state. */ - switch (tp->ca_state) { - case TCP_CA_Recovery: - if (prior_snd_una == tp->snd_una) { - if (IsReno(tp) && is_dupack) - tcp_add_reno_sack(tp); - } else { - int acked = prior_packets - tp->packets_out; - if (IsReno(tp)) - tcp_remove_reno_sacks(sk, tp, acked); - is_dupack = tcp_try_undo_partial(sk, tp, acked); - } - break; - case TCP_CA_Loss: - if (flag&FLAG_DATA_ACKED) - tp->retransmits = 0; - if (!tcp_try_undo_loss(sk, tp)) { - tcp_moderate_cwnd(tp); - tcp_xmit_retransmit_queue(sk); - return; - } - if (tp->ca_state != TCP_CA_Open) - return; - /* Loss is undone; fall through to processing in Open state. */ - default: - if (IsReno(tp)) { - if (tp->snd_una != prior_snd_una) - tcp_reset_reno_sack(tp); - if (is_dupack) - tcp_add_reno_sack(tp); - } - - if (tp->ca_state == TCP_CA_Disorder) - tcp_try_undo_dsack(sk, tp); - - if (!tcp_time_to_recover(sk, tp)) { - tcp_try_to_open(sk, tp, flag); - return; - } - - /* Otherwise enter Recovery state */ - - if (IsReno(tp)) - NET_INC_STATS_BH(TCPRenoRecovery); - else - NET_INC_STATS_BH(TCPSackRecovery); - - tp->high_seq = tp->snd_nxt; - tp->prior_ssthresh = 0; - tp->undo_marker = tp->snd_una; - tp->undo_retrans = tp->retrans_out; - - if (tp->ca_state < TCP_CA_CWR) { - if (!(flag&FLAG_ECE)) - tp->prior_ssthresh = tcp_current_ssthresh(tp); - tp->snd_ssthresh = tcp_recalc_ssthresh(tp); - TCP_ECN_queue_cwr(tp); - } - - tp->snd_cwnd_cnt = 0; - tp->ca_state = TCP_CA_Recovery; - } - - if (is_dupack || tcp_head_timedout(sk, tp)) - tcp_update_scoreboard(sk, tp); - tcp_cwnd_down(tp); - tcp_xmit_retransmit_queue(sk); -#endif -} - -/* Read draft-ietf-tcplw-high-performance before mucking - * with this code. (Superceeds RFC1323) - */ -static void tcp_ack_saw_tstamp(struct tcp_opt *tp, int flag) -{ -#if 0 - __u32 seq_rtt; - - /* RTTM Rule: A TSecr value received in a segment is used to - * update the averaged RTT measurement only if the segment - * acknowledges some new data, i.e., only if it advances the - * left edge of the send window. - * - * See draft-ietf-tcplw-high-performance-00, section 3.3. - * 1998/04/10 Andrey V. Savochkin - * - * Changed: reset backoff as soon as we see the first valid sample. - * If we do not, we get strongly overstimated rto. With timestamps - * samples are accepted even from very old segments: f.e., when rtt=1 - * increases to 8, we retransmit 5 times and after 8 seconds delayed - * answer arrives rto becomes 120 seconds! If at least one of segments - * in window is lost... Voila. --ANK (010210) - */ - seq_rtt = tcp_time_stamp - tp->rcv_tsecr; - tcp_rtt_estimator(tp, seq_rtt); - tcp_set_rto(tp); - tp->backoff = 0; - tcp_bound_rto(tp); -#endif -} - -static void tcp_ack_no_tstamp(struct tcp_opt *tp, u32 seq_rtt, int flag) -{ -#if 0 - /* We don't have a timestamp. Can only use - * packets that are not retransmitted to determine - * rtt estimates. Also, we must not reset the - * backoff for rto until we get a non-retransmitted - * packet. This allows us to deal with a situation - * where the network delay has increased suddenly. - * I.e. Karn's algorithm. (SIGCOMM '87, p5.) - */ - - if (flag & FLAG_RETRANS_DATA_ACKED) - return; - - tcp_rtt_estimator(tp, seq_rtt); - tcp_set_rto(tp); - tp->backoff = 0; - tcp_bound_rto(tp); -#endif -} - -static __inline__ void -tcp_ack_update_rtt(struct tcp_opt *tp, int flag, s32 seq_rtt) -{ -#if 0 - /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */ - if (tp->saw_tstamp && tp->rcv_tsecr) - tcp_ack_saw_tstamp(tp, flag); - else if (seq_rtt >= 0) - tcp_ack_no_tstamp(tp, seq_rtt, flag); -#endif -} - -/* This is Jacobson's slow start and congestion avoidance. - * SIGCOMM '88, p. 328. - */ -static __inline__ void tcp_cong_avoid(struct tcp_opt *tp) -{ -#if 0 - if (tp->snd_cwnd <= tp->snd_ssthresh) { - /* In "safe" area, increase. */ - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - } else { - /* In dangerous area, increase slowly. - * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd - */ - if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - tp->snd_cwnd_cnt=0; - } else - tp->snd_cwnd_cnt++; - } - tp->snd_cwnd_stamp = tcp_time_stamp; -#endif -} - -/* Restart timer after forward progress on connection. - * RFC2988 recommends to restart timer to now+rto. - */ - -static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp) -{ -#if 0 - if (tp->packets_out==0) { - tcp_clear_xmit_timer(sk, TCP_TIME_RETRANS); - } else { - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); - } -#endif -} - -/* Remove acknowledged frames from the retransmission queue. */ -static int tcp_clean_rtx_queue(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct sk_buff *skb; - __u32 now = tcp_time_stamp; - int acked = 0; - __s32 seq_rtt = -1; - - while((skb=skb_peek(&sk->write_queue)) && (skb != tp->send_head)) { - struct tcp_skb_cb *scb = TCP_SKB_CB(skb); - __u8 sacked = scb->sacked; - - /* If our packet is before the ack sequence we can - * discard it as it's confirmed to have arrived at - * the other end. - */ - if (after(scb->end_seq, tp->snd_una)) - break; - - /* Initial outgoing SYN's get put onto the write_queue - * just like anything else we transmit. It is not - * true data, and if we misinform our callers that - * this ACK acks real data, we will erroneously exit - * connection startup slow start one packet too - * quickly. This is severely frowned upon behavior. - */ - if(!(scb->flags & TCPCB_FLAG_SYN)) { - acked |= FLAG_DATA_ACKED; - } else { - acked |= FLAG_SYN_ACKED; - tp->retrans_stamp = 0; - } - - if (sacked) { - if(sacked & TCPCB_RETRANS) { - if(sacked & TCPCB_SACKED_RETRANS) - tp->retrans_out--; - acked |= FLAG_RETRANS_DATA_ACKED; - seq_rtt = -1; - } else if (seq_rtt < 0) - seq_rtt = now - scb->when; - if(sacked & TCPCB_SACKED_ACKED) - tp->sacked_out--; - if(sacked & TCPCB_LOST) - tp->lost_out--; - if(sacked & TCPCB_URG) { - if (tp->urg_mode && - !before(scb->end_seq, tp->snd_up)) - tp->urg_mode = 0; - } - } else if (seq_rtt < 0) - seq_rtt = now - scb->when; - if(tp->fackets_out) - tp->fackets_out--; - tp->packets_out--; - __skb_unlink(skb, skb->list); - tcp_free_skb(sk, skb); - } - - if (acked&FLAG_ACKED) { - tcp_ack_update_rtt(tp, acked, seq_rtt); - tcp_ack_packets_out(sk, tp); - } - -#if FASTRETRANS_DEBUG > 0 - BUG_TRAP((int)tp->sacked_out >= 0); - BUG_TRAP((int)tp->lost_out >= 0); - BUG_TRAP((int)tp->retrans_out >= 0); - if (tp->packets_out==0 && tp->sack_ok) { - if (tp->lost_out) { - printk(KERN_DEBUG "Leak l=%u %d\n", tp->lost_out, tp->ca_state); - tp->lost_out = 0; - } - if (tp->sacked_out) { - printk(KERN_DEBUG "Leak s=%u %d\n", tp->sacked_out, tp->ca_state); - tp->sacked_out = 0; - } - if (tp->retrans_out) { - printk(KERN_DEBUG "Leak r=%u %d\n", tp->retrans_out, tp->ca_state); - tp->retrans_out = 0; - } - } -#endif - return acked; -#else - return 0; -#endif -} - -static void tcp_ack_probe(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - - /* Was it a usable window open? */ - - if (!after(TCP_SKB_CB(tp->send_head)->end_seq, tp->snd_una + tp->snd_wnd)) { - tp->backoff = 0; - tcp_clear_xmit_timer(sk, TCP_TIME_PROBE0); - /* Socket must be waked up by subsequent tcp_data_snd_check(). - * This function is not for random using! - */ - } else { - tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, - min(tp->rto << tp->backoff, TCP_RTO_MAX)); - } -#endif -} - -static __inline__ int tcp_ack_is_dubious(struct tcp_opt *tp, int flag) -{ -#if 0 - return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) || - tp->ca_state != TCP_CA_Open); -#else - return 0; -#endif -} - -static __inline__ int tcp_may_raise_cwnd(struct tcp_opt *tp, int flag) -{ -#if 0 - return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && - !((1<ca_state)&(TCPF_CA_Recovery|TCPF_CA_CWR)); -#else - return 0; -#endif -} - -/* Check that window update is acceptable. - * The function assumes that snd_una<=ack<=snd_next. - */ -static __inline__ int -tcp_may_update_window(struct tcp_opt *tp, u32 ack, u32 ack_seq, u32 nwin) -{ -#if 0 - return (after(ack, tp->snd_una) || - after(ack_seq, tp->snd_wl1) || - (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd)); -#else - return 0; -#endif -} - -/* Update our send window. - * - * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2 - * and in FreeBSD. NetBSD's one is even worse.) is wrong. - */ -static int tcp_ack_update_window(struct sock *sk, struct tcp_opt *tp, - struct sk_buff *skb, u32 ack, u32 ack_seq) -{ -#if 0 - int flag = 0; - u32 nwin = ntohs(skb->h.th->window) << tp->snd_wscale; - - if (tcp_may_update_window(tp, ack, ack_seq, nwin)) { - flag |= FLAG_WIN_UPDATE; - tcp_update_wl(tp, ack, ack_seq); - - if (tp->snd_wnd != nwin) { - tp->snd_wnd = nwin; - - /* Note, it is the only place, where - * fast path is recovered for sending TCP. - */ - tcp_fast_path_check(sk, tp); - - if (nwin > tp->max_window) { - tp->max_window = nwin; - tcp_sync_mss(sk, tp->pmtu_cookie); - } - } - } - - tp->snd_una = ack; - - return flag; -#else - return 0; -#endif -} - -/* This routine deals with incoming acks, but not outgoing ones. */ -static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - u32 prior_snd_una = tp->snd_una; - u32 ack_seq = TCP_SKB_CB(skb)->seq; - u32 ack = TCP_SKB_CB(skb)->ack_seq; - u32 prior_in_flight; - int prior_packets; - - /* If the ack is newer than sent or older than previous acks - * then we can probably ignore it. - */ - if (after(ack, tp->snd_nxt)) - goto uninteresting_ack; - - if (before(ack, prior_snd_una)) - goto old_ack; - - if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { - /* Window is constant, pure forward advance. - * No more checks are required. - * Note, we use the fact that SND.UNA>=SND.WL2. - */ - tcp_update_wl(tp, ack, ack_seq); - tp->snd_una = ack; - flag |= FLAG_WIN_UPDATE; - - NET_INC_STATS_BH(TCPHPAcks); - } else { - if (ack_seq != TCP_SKB_CB(skb)->end_seq) - flag |= FLAG_DATA; - else - NET_INC_STATS_BH(TCPPureAcks); - - flag |= tcp_ack_update_window(sk, tp, skb, ack, ack_seq); - - if (TCP_SKB_CB(skb)->sacked) - flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); - - if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th)) - flag |= FLAG_ECE; - } - - /* We passed data and got it acked, remove any soft error - * log. Something worked... - */ - sk->err_soft = 0; - tp->rcv_tstamp = tcp_time_stamp; - if ((prior_packets = tp->packets_out) == 0) - goto no_queue; - - prior_in_flight = tcp_packets_in_flight(tp); - - /* See if we can take anything off of the retransmit queue. */ - flag |= tcp_clean_rtx_queue(sk); - - if (tcp_ack_is_dubious(tp, flag)) { - /* Advanve CWND, if state allows this. */ - if ((flag&FLAG_DATA_ACKED) && prior_in_flight >= tp->snd_cwnd && - tcp_may_raise_cwnd(tp, flag)) - tcp_cong_avoid(tp); - tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag); - } else { - if ((flag&FLAG_DATA_ACKED) && prior_in_flight >= tp->snd_cwnd) - tcp_cong_avoid(tp); - } - - if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP)) - dst_confirm(sk->dst_cache); - - return 1; - -no_queue: - tp->probes_out = 0; - - /* If this ack opens up a zero window, clear backoff. It was - * being used to time the probes, and is probably far higher than - * it needs to be for normal retransmission. - */ - if (tp->send_head) - tcp_ack_probe(sk); - return 1; - -old_ack: - if (TCP_SKB_CB(skb)->sacked) - tcp_sacktag_write_queue(sk, skb, prior_snd_una); - -uninteresting_ack: - SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt); - return 0; -#else - return 0; -#endif -} - - -/* Look for tcp options. Normally only called on SYN and SYNACK packets. - * But, this can also be called on packets in the established flow when - * the fast version below fails. - */ -void tcp_parse_options(struct sk_buff *skb, struct tcp_opt *tp, int estab) -{ -#if 0 - unsigned char *ptr; - struct tcphdr *th = skb->h.th; - int length=(th->doff*4)-sizeof(struct tcphdr); - - ptr = (unsigned char *)(th + 1); - tp->saw_tstamp = 0; - - while(length>0) { - int opcode=*ptr++; - int opsize; - - switch (opcode) { - case TCPOPT_EOL: - return; - case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ - length--; - continue; - default: - opsize=*ptr++; - if (opsize < 2) /* "silly options" */ - return; - if (opsize > length) - return; /* don't parse partial options */ - switch(opcode) { - case TCPOPT_MSS: - if(opsize==TCPOLEN_MSS && th->syn && !estab) { - u16 in_mss = ntohs(*(__u16 *)ptr); - if (in_mss) { - if (tp->user_mss && tp->user_mss < in_mss) - in_mss = tp->user_mss; - tp->mss_clamp = in_mss; - } - } - break; - case TCPOPT_WINDOW: - if(opsize==TCPOLEN_WINDOW && th->syn && !estab) - if (sysctl_tcp_window_scaling) { - tp->wscale_ok = 1; - tp->snd_wscale = *(__u8 *)ptr; - if(tp->snd_wscale > 14) { - if(net_ratelimit()) - printk("tcp_parse_options: Illegal window " - "scaling value %d >14 received.", - tp->snd_wscale); - tp->snd_wscale = 14; - } - } - break; - case TCPOPT_TIMESTAMP: - if(opsize==TCPOLEN_TIMESTAMP) { - if ((estab && tp->tstamp_ok) || - (!estab && sysctl_tcp_timestamps)) { - tp->saw_tstamp = 1; - tp->rcv_tsval = ntohl(*(__u32 *)ptr); - tp->rcv_tsecr = ntohl(*(__u32 *)(ptr+4)); - } - } - break; - case TCPOPT_SACK_PERM: - if(opsize==TCPOLEN_SACK_PERM && th->syn && !estab) { - if (sysctl_tcp_sack) { - tp->sack_ok = 1; - tcp_sack_reset(tp); - } - } - break; - - case TCPOPT_SACK: - if((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) && - !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) && - tp->sack_ok) { - TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th; - } - }; - ptr+=opsize-2; - length-=opsize; - }; - } -#endif -} - -/* Fast parse options. This hopes to only see timestamps. - * If it is wrong it falls back on tcp_parse_options(). - */ -static __inline__ int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, struct tcp_opt *tp) -{ -#if 0 - if (th->doff == sizeof(struct tcphdr)>>2) { - tp->saw_tstamp = 0; - return 0; - } else if (tp->tstamp_ok && - th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { - __u32 *ptr = (__u32 *)(th + 1); - if (*ptr == ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) - | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { - tp->saw_tstamp = 1; - ++ptr; - tp->rcv_tsval = ntohl(*ptr); - ++ptr; - tp->rcv_tsecr = ntohl(*ptr); - return 1; - } - } - tcp_parse_options(skb, tp, 1); - return 1; -#else - return 0; -#endif -} - -extern __inline__ void -tcp_store_ts_recent(struct tcp_opt *tp) -{ -#if 0 - tp->ts_recent = tp->rcv_tsval; - tp->ts_recent_stamp = xtime.tv_sec; -#endif -} - -extern __inline__ void -tcp_replace_ts_recent(struct tcp_opt *tp, u32 seq) -{ -#if 0 - if (tp->saw_tstamp && !after(seq, tp->rcv_wup)) { - /* PAWS bug workaround wrt. ACK frames, the PAWS discard - * extra check below makes sure this can only happen - * for pure ACK frames. -DaveM - * - * Not only, also it occurs for expired timestamps. - */ - - if((s32)(tp->rcv_tsval - tp->ts_recent) >= 0 || - xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_24DAYS) - tcp_store_ts_recent(tp); - } -#endif -} - -/* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM - * - * It is not fatal. If this ACK does _not_ change critical state (seqs, window) - * it can pass through stack. So, the following predicate verifies that - * this segment is not used for anything but congestion avoidance or - * fast retransmit. Moreover, we even are able to eliminate most of such - * second order effects, if we apply some small "replay" window (~RTO) - * to timestamp space. - * - * All these measures still do not guarantee that we reject wrapped ACKs - * on networks with high bandwidth, when sequence space is recycled fastly, - * but it guarantees that such events will be very rare and do not affect - * connection seriously. This doesn't look nice, but alas, PAWS is really - * buggy extension. - * - * [ Later note. Even worse! It is buggy for segments _with_ data. RFC - * states that events when retransmit arrives after original data are rare. - * It is a blatant lie. VJ forgot about fast retransmit! 8)8) It is - * the biggest problem on large power networks even with minor reordering. - * OK, let's give it small replay window. If peer clock is even 1hz, it is safe - * up to bandwidth of 18Gigabit/sec. 8) ] - */ - -static int tcp_disordered_ack(struct tcp_opt *tp, struct sk_buff *skb) -{ -#if 0 - struct tcphdr *th = skb->h.th; - u32 seq = TCP_SKB_CB(skb)->seq; - u32 ack = TCP_SKB_CB(skb)->ack_seq; - - return (/* 1. Pure ACK with correct sequence number. */ - (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) && - - /* 2. ... and duplicate ACK. */ - ack == tp->snd_una && - - /* 3. ... and does not update window. */ - !tcp_may_update_window(tp, ack, seq, ntohs(th->window)<snd_wscale) && - - /* 4. ... and sits in replay window. */ - (s32)(tp->ts_recent - tp->rcv_tsval) <= (tp->rto*1024)/HZ); -#endif -} - -extern __inline__ int tcp_paws_discard(struct tcp_opt *tp, struct sk_buff *skb) -{ -#if 0 - return ((s32)(tp->ts_recent - tp->rcv_tsval) > TCP_PAWS_WINDOW && - xtime.tv_sec < tp->ts_recent_stamp + TCP_PAWS_24DAYS && - !tcp_disordered_ack(tp, skb)); -#else - return 0; -#endif -} - -/* Check segment sequence number for validity. - * - * Segment controls are considered valid, if the segment - * fits to the window after truncation to the window. Acceptability - * of data (and SYN, FIN, of course) is checked separately. - * See tcp_data_queue(), for example. - * - * Also, controls (RST is main one) are accepted using RCV.WUP instead - * of RCV.NXT. Peer still did not advance his SND.UNA when we - * delayed ACK, so that hisSND.UNA<=ourRCV.WUP. - * (borrowed from freebsd) - */ - -static inline int tcp_sequence(struct tcp_opt *tp, u32 seq, u32 end_seq) -{ -#if 0 - return !before(end_seq, tp->rcv_wup) && - !after(seq, tp->rcv_nxt + tcp_receive_window(tp)); -#else - return 0; -#endif -} - -/* When we get a reset we do this. */ -static void tcp_reset(struct sock *sk) -{ -#if 0 - /* We want the right error as BSD sees it (and indeed as we do). */ - switch (sk->state) { - case TCP_SYN_SENT: - sk->err = ECONNREFUSED; - break; - case TCP_CLOSE_WAIT: - sk->err = EPIPE; - break; - case TCP_CLOSE: - return; - default: - sk->err = ECONNRESET; - } - - if (!sk->dead) - sk->error_report(sk); - - tcp_done(sk); -#endif -} - -/* - * Process the FIN bit. This now behaves as it is supposed to work - * and the FIN takes effect when it is validly part of sequence - * space. Not before when we get holes. - * - * If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT - * (and thence onto LAST-ACK and finally, CLOSE, we never enter - * TIME-WAIT) - * - * If we are in FINWAIT-1, a received FIN indicates simultaneous - * close and we go into CLOSING (and later onto TIME-WAIT) - * - * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT. - */ -static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - - tcp_schedule_ack(tp); - - sk->shutdown |= RCV_SHUTDOWN; - sk->done = 1; - - switch(sk->state) { - case TCP_SYN_RECV: - case TCP_ESTABLISHED: - /* Move to CLOSE_WAIT */ - tcp_set_state(sk, TCP_CLOSE_WAIT); - tp->ack.pingpong = 1; - break; - - case TCP_CLOSE_WAIT: - case TCP_CLOSING: - /* Received a retransmission of the FIN, do - * nothing. - */ - break; - case TCP_LAST_ACK: - /* RFC793: Remain in the LAST-ACK state. */ - break; - - case TCP_FIN_WAIT1: - /* This case occurs when a simultaneous close - * happens, we must ack the received FIN and - * enter the CLOSING state. - */ - tcp_send_ack(sk); - tcp_set_state(sk, TCP_CLOSING); - break; - case TCP_FIN_WAIT2: - /* Received a FIN -- send ACK and enter TIME_WAIT. */ - tcp_send_ack(sk); - tcp_time_wait(sk, TCP_TIME_WAIT, 0); - break; - default: - /* Only TCP_LISTEN and TCP_CLOSE are left, in these - * cases we should never reach this piece of code. - */ - printk("tcp_fin: Impossible, sk->state=%d\n", sk->state); - break; - }; - - /* It _is_ possible, that we have something out-of-order _after_ FIN. - * Probably, we should reset in this case. For now drop them. - */ - __skb_queue_purge(&tp->out_of_order_queue); - if (tp->sack_ok) - tcp_sack_reset(tp); - tcp_mem_reclaim(sk); - - if (!sk->dead) { - sk->state_change(sk); - - /* Do not send POLL_HUP for half duplex close. */ - if (sk->shutdown == SHUTDOWN_MASK || sk->state == TCP_CLOSE) - sk_wake_async(sk, 1, POLL_HUP); - else - sk_wake_async(sk, 1, POLL_IN); - } -#endif -} - -static __inline__ int -tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq) -{ -#if 0 - if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) { - if (before(seq, sp->start_seq)) - sp->start_seq = seq; - if (after(end_seq, sp->end_seq)) - sp->end_seq = end_seq; - return 1; - } - return 0; -#else - return 0; -#endif -} - -static __inline__ void tcp_dsack_set(struct tcp_opt *tp, u32 seq, u32 end_seq) -{ -#if 0 - if (tp->sack_ok && sysctl_tcp_dsack) { - if (before(seq, tp->rcv_nxt)) - NET_INC_STATS_BH(TCPDSACKOldSent); - else - NET_INC_STATS_BH(TCPDSACKOfoSent); - - tp->dsack = 1; - tp->duplicate_sack[0].start_seq = seq; - tp->duplicate_sack[0].end_seq = end_seq; - tp->eff_sacks = min(tp->num_sacks+1, 4-tp->tstamp_ok); - } -#endif -} - -static __inline__ void tcp_dsack_extend(struct tcp_opt *tp, u32 seq, u32 end_seq) -{ -#if 0 - if (!tp->dsack) - tcp_dsack_set(tp, seq, end_seq); - else - tcp_sack_extend(tp->duplicate_sack, seq, end_seq); -#endif -} - -static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - - if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && - before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { - NET_INC_STATS_BH(DelayedACKLost); - tcp_enter_quickack_mode(tp); - - if (tp->sack_ok && sysctl_tcp_dsack) { - u32 end_seq = TCP_SKB_CB(skb)->end_seq; - - if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) - end_seq = tp->rcv_nxt; - tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, end_seq); - } - } - - tcp_send_ack(sk); -#endif -} - -/* These routines update the SACK block as out-of-order packets arrive or - * in-order packets close up the sequence space. - */ -static void tcp_sack_maybe_coalesce(struct tcp_opt *tp) -{ -#if 0 - int this_sack; - struct tcp_sack_block *sp = &tp->selective_acks[0]; - struct tcp_sack_block *swalk = sp+1; - - /* See if the recent change to the first SACK eats into - * or hits the sequence space of other SACK blocks, if so coalesce. - */ - for (this_sack = 1; this_sack < tp->num_sacks; ) { - if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) { - int i; - - /* Zap SWALK, by moving every further SACK up by one slot. - * Decrease num_sacks. - */ - tp->num_sacks--; - tp->eff_sacks = min(tp->num_sacks+tp->dsack, 4-tp->tstamp_ok); - for(i=this_sack; i < tp->num_sacks; i++) - sp[i] = sp[i+1]; - continue; - } - this_sack++, swalk++; - } -#endif -} - -static __inline__ void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2) -{ -#if 0 - __u32 tmp; - - tmp = sack1->start_seq; - sack1->start_seq = sack2->start_seq; - sack2->start_seq = tmp; - - tmp = sack1->end_seq; - sack1->end_seq = sack2->end_seq; - sack2->end_seq = tmp; -#endif -} - -static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct tcp_sack_block *sp = &tp->selective_acks[0]; - int cur_sacks = tp->num_sacks; - int this_sack; - - if (!cur_sacks) - goto new_sack; - - for (this_sack=0; this_sack0; this_sack--, sp--) - tcp_sack_swap(sp, sp-1); - if (cur_sacks > 1) - tcp_sack_maybe_coalesce(tp); - return; - } - } - - /* Could not find an adjacent existing SACK, build a new one, - * put it at the front, and shift everyone else down. We - * always know there is at least one SACK present already here. - * - * If the sack array is full, forget about the last one. - */ - if (this_sack >= 4) { - this_sack--; - tp->num_sacks--; - sp--; - } - for(; this_sack > 0; this_sack--, sp--) - *sp = *(sp-1); - -new_sack: - /* Build the new head SACK, and we're done. */ - sp->start_seq = seq; - sp->end_seq = end_seq; - tp->num_sacks++; - tp->eff_sacks = min(tp->num_sacks+tp->dsack, 4-tp->tstamp_ok); -#endif -} - -/* RCV.NXT advances, some SACKs should be eaten. */ - -static void tcp_sack_remove(struct tcp_opt *tp) -{ -#if 0 - struct tcp_sack_block *sp = &tp->selective_acks[0]; - int num_sacks = tp->num_sacks; - int this_sack; - - /* Empty ofo queue, hence, all the SACKs are eaten. Clear. */ - if (skb_queue_len(&tp->out_of_order_queue) == 0) { - tp->num_sacks = 0; - tp->eff_sacks = tp->dsack; - return; - } - - for(this_sack = 0; this_sack < num_sacks; ) { - /* Check if the start of the sack is covered by RCV.NXT. */ - if (!before(tp->rcv_nxt, sp->start_seq)) { - int i; - - /* RCV.NXT must cover all the block! */ - BUG_TRAP(!before(tp->rcv_nxt, sp->end_seq)); - - /* Zap this SACK, by moving forward any other SACKS. */ - for (i=this_sack+1; i < num_sacks; i++) - tp->selective_acks[i-1] = tp->selective_acks[i]; - num_sacks--; - continue; - } - this_sack++; - sp++; - } - if (num_sacks != tp->num_sacks) { - tp->num_sacks = num_sacks; - tp->eff_sacks = min(tp->num_sacks+tp->dsack, 4-tp->tstamp_ok); - } -#endif -} - -/* This one checks to see if we can put data from the - * out_of_order queue into the receive_queue. - */ -static void tcp_ofo_queue(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - __u32 dsack_high = tp->rcv_nxt; - struct sk_buff *skb; - - while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) { - if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) - break; - - if (before(TCP_SKB_CB(skb)->seq, dsack_high)) { - __u32 dsack = dsack_high; - if (before(TCP_SKB_CB(skb)->end_seq, dsack_high)) - dsack_high = TCP_SKB_CB(skb)->end_seq; - tcp_dsack_extend(tp, TCP_SKB_CB(skb)->seq, dsack); - } - - if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { - SOCK_DEBUG(sk, "ofo packet was already received \n"); - __skb_unlink(skb, skb->list); - __kfree_skb(skb); - continue; - } - SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n", - tp->rcv_nxt, TCP_SKB_CB(skb)->seq, - TCP_SKB_CB(skb)->end_seq); - - __skb_unlink(skb, skb->list); - __skb_queue_tail(&sk->receive_queue, skb); - tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - if(skb->h.th->fin) - tcp_fin(skb, sk, skb->h.th); - } -#endif -} - -static inline int tcp_rmem_schedule(struct sock *sk, struct sk_buff *skb) -{ -#if 0 - return (int)skb->truesize <= sk->forward_alloc || - tcp_mem_schedule(sk, skb->truesize, 1); -#else - return 0; -#endif -} - -static int tcp_prune_queue(struct sock *sk); - -static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) -{ -#if 0 - struct tcphdr *th = skb->h.th; - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - int eaten = -1; - - if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) - goto drop; - - th = skb->h.th; - __skb_pull(skb, th->doff*4); - - TCP_ECN_accept_cwr(tp, skb); - - if (tp->dsack) { - tp->dsack = 0; - tp->eff_sacks = min_t(unsigned int, tp->num_sacks, 4-tp->tstamp_ok); - } - - /* Queue data for delivery to the user. - * Packets in sequence go to the receive queue. - * Out of sequence packets to the out_of_order_queue. - */ - if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) { - if (tcp_receive_window(tp) == 0) - goto out_of_window; - - /* Ok. In sequence. In window. */ - if (tp->ucopy.task == current && - tp->copied_seq == tp->rcv_nxt && - tp->ucopy.len && - sk->lock.users && - !tp->urg_data) { - int chunk = min_t(unsigned int, skb->len, tp->ucopy.len); - - __set_current_state(TASK_RUNNING); - - local_bh_enable(); - if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) { - tp->ucopy.len -= chunk; - tp->copied_seq += chunk; - eaten = (chunk == skb->len && !th->fin); - } - local_bh_disable(); - } - - if (eaten <= 0) { -queue_and_out: - if (eaten < 0 && - (atomic_read(&sk->rmem_alloc) > sk->rcvbuf || - !tcp_rmem_schedule(sk, skb))) { - if (tcp_prune_queue(sk) < 0 || !tcp_rmem_schedule(sk, skb)) - goto drop; - } - tcp_set_owner_r(skb, sk); - __skb_queue_tail(&sk->receive_queue, skb); - } - tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - if(skb->len) - tcp_event_data_recv(sk, tp, skb); - if(th->fin) - tcp_fin(skb, sk, th); - - if (skb_queue_len(&tp->out_of_order_queue)) { - tcp_ofo_queue(sk); - - /* RFC2581. 4.2. SHOULD send immediate ACK, when - * gap in queue is filled. - */ - if (skb_queue_len(&tp->out_of_order_queue) == 0) - tp->ack.pingpong = 0; - } - - if(tp->num_sacks) - tcp_sack_remove(tp); - - tcp_fast_path_check(sk, tp); - - if (eaten > 0) { - __kfree_skb(skb); - } else if (!sk->dead) - sk->data_ready(sk, 0); - return; - } - - if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { - /* A retransmit, 2nd most common case. Force an immediate ack. */ - NET_INC_STATS_BH(DelayedACKLost); - tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); - -out_of_window: - tcp_enter_quickack_mode(tp); - tcp_schedule_ack(tp); -drop: - __kfree_skb(skb); - return; - } - - /* Out of window. F.e. zero window probe. */ - if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt+tcp_receive_window(tp))) - goto out_of_window; - - tcp_enter_quickack_mode(tp); - - if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { - /* Partial packet, seq < rcv_next < end_seq */ - SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n", - tp->rcv_nxt, TCP_SKB_CB(skb)->seq, - TCP_SKB_CB(skb)->end_seq); - - tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, tp->rcv_nxt); - - /* If window is closed, drop tail of packet. But after - * remembering D-SACK for its head made in previous line. - */ - if (!tcp_receive_window(tp)) - goto out_of_window; - goto queue_and_out; - } - - TCP_ECN_check_ce(tp, skb); - - if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf || - !tcp_rmem_schedule(sk, skb)) { - if (tcp_prune_queue(sk) < 0 || !tcp_rmem_schedule(sk, skb)) - goto drop; - } - - /* Disable header prediction. */ - tp->pred_flags = 0; - tcp_schedule_ack(tp); - - SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", - tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); - - tcp_set_owner_r(skb, sk); - - if (skb_peek(&tp->out_of_order_queue) == NULL) { - /* Initial out of order segment, build 1 SACK. */ - if(tp->sack_ok) { - tp->num_sacks = 1; - tp->dsack = 0; - tp->eff_sacks = 1; - tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; - tp->selective_acks[0].end_seq = TCP_SKB_CB(skb)->end_seq; - } - __skb_queue_head(&tp->out_of_order_queue,skb); - } else { - struct sk_buff *skb1=tp->out_of_order_queue.prev; - u32 seq = TCP_SKB_CB(skb)->seq; - u32 end_seq = TCP_SKB_CB(skb)->end_seq; - - if (seq == TCP_SKB_CB(skb1)->end_seq) { - __skb_append(skb1, skb); - - if (tp->num_sacks == 0 || - tp->selective_acks[0].end_seq != seq) - goto add_sack; - - /* Common case: data arrive in order after hole. */ - tp->selective_acks[0].end_seq = end_seq; - return; - } - - /* Find place to insert this segment. */ - do { - if (!after(TCP_SKB_CB(skb1)->seq, seq)) - break; - } while ((skb1=skb1->prev) != (struct sk_buff*)&tp->out_of_order_queue); - - /* Do skb overlap to previous one? */ - if (skb1 != (struct sk_buff*)&tp->out_of_order_queue && - before(seq, TCP_SKB_CB(skb1)->end_seq)) { - if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { - /* All the bits are present. Drop. */ - __kfree_skb(skb); - tcp_dsack_set(tp, seq, end_seq); - goto add_sack; - } - if (after(seq, TCP_SKB_CB(skb1)->seq)) { - /* Partial overlap. */ - tcp_dsack_set(tp, seq, TCP_SKB_CB(skb1)->end_seq); - } else { - skb1 = skb1->prev; - } - } - __skb_insert(skb, skb1, skb1->next, &tp->out_of_order_queue); - - /* And clean segments covered by new one as whole. */ - while ((skb1 = skb->next) != (struct sk_buff*)&tp->out_of_order_queue && - after(end_seq, TCP_SKB_CB(skb1)->seq)) { - if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { - tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, end_seq); - break; - } - __skb_unlink(skb1, skb1->list); - tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq); - __kfree_skb(skb1); - } - -add_sack: - if (tp->sack_ok) - tcp_sack_new_ofo_skb(sk, seq, end_seq); - } -#endif -} - -/* Collapse contiguous sequence of skbs head..tail with - * sequence numbers start..end. - * Segments with FIN/SYN are not collapsed (only because this - * simplifies code) - */ -static void -tcp_collapse(struct sock *sk, struct sk_buff *head, - struct sk_buff *tail, u32 start, u32 end) -{ -#if 0 - struct sk_buff *skb; - - /* First, check that queue is collapsable and find - * the point where collapsing can be useful. */ - for (skb = head; skb != tail; ) { - /* No new bits? It is possible on ofo queue. */ - if (!before(start, TCP_SKB_CB(skb)->end_seq)) { - struct sk_buff *next = skb->next; - __skb_unlink(skb, skb->list); - __kfree_skb(skb); - NET_INC_STATS_BH(TCPRcvCollapsed); - skb = next; - continue; - } - - /* The first skb to collapse is: - * - not SYN/FIN and - * - bloated or contains data before "start" or - * overlaps to the next one. - */ - if (!skb->h.th->syn && !skb->h.th->fin && - (tcp_win_from_space(skb->truesize) > skb->len || - before(TCP_SKB_CB(skb)->seq, start) || - (skb->next != tail && - TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb->next)->seq))) - break; - - /* Decided to skip this, advance start seq. */ - start = TCP_SKB_CB(skb)->end_seq; - skb = skb->next; - } - if (skb == tail || skb->h.th->syn || skb->h.th->fin) - return; - - while (before(start, end)) { - struct sk_buff *nskb; - int header = skb_headroom(skb); - int copy = (PAGE_SIZE - sizeof(struct sk_buff) - - sizeof(struct skb_shared_info) - header - 31)&~15; - - /* Too big header? This can happen with IPv6. */ - if (copy < 0) - return; - if (end-start < copy) - copy = end-start; - nskb = alloc_skb(copy+header, GFP_ATOMIC); - if (!nskb) - return; - skb_reserve(nskb, header); - memcpy(nskb->head, skb->head, header); - nskb->nh.raw = nskb->head + (skb->nh.raw-skb->head); - nskb->h.raw = nskb->head + (skb->h.raw-skb->head); - nskb->mac.raw = nskb->head + (skb->mac.raw-skb->head); - memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); - TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; - __skb_insert(nskb, skb->prev, skb, skb->list); - tcp_set_owner_r(nskb, sk); - - /* Copy data, releasing collapsed skbs. */ - while (copy > 0) { - int offset = start - TCP_SKB_CB(skb)->seq; - int size = TCP_SKB_CB(skb)->end_seq - start; - - if (offset < 0) BUG(); - if (size > 0) { - size = min(copy, size); - if (skb_copy_bits(skb, offset, skb_put(nskb, size), size)) - BUG(); - TCP_SKB_CB(nskb)->end_seq += size; - copy -= size; - start += size; - } - if (!before(start, TCP_SKB_CB(skb)->end_seq)) { - struct sk_buff *next = skb->next; - __skb_unlink(skb, skb->list); - __kfree_skb(skb); - NET_INC_STATS_BH(TCPRcvCollapsed); - skb = next; - if (skb == tail || skb->h.th->syn || skb->h.th->fin) - return; - } - } - } -#endif -} - -/* Collapse ofo queue. Algorithm: select contiguous sequence of skbs - * and tcp_collapse() them until all the queue is collapsed. - */ -static void tcp_collapse_ofo_queue(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct sk_buff *skb = skb_peek(&tp->out_of_order_queue); - struct sk_buff *head; - u32 start, end; - - if (skb == NULL) - return; - - start = TCP_SKB_CB(skb)->seq; - end = TCP_SKB_CB(skb)->end_seq; - head = skb; - - for (;;) { - skb = skb->next; - - /* Segment is terminated when we see gap or when - * we are at the end of all the queue. */ - if (skb == (struct sk_buff *)&tp->out_of_order_queue || - after(TCP_SKB_CB(skb)->seq, end) || - before(TCP_SKB_CB(skb)->end_seq, start)) { - tcp_collapse(sk, head, skb, start, end); - head = skb; - if (skb == (struct sk_buff *)&tp->out_of_order_queue) - break; - /* Start new segment */ - start = TCP_SKB_CB(skb)->seq; - end = TCP_SKB_CB(skb)->end_seq; - } else { - if (before(TCP_SKB_CB(skb)->seq, start)) - start = TCP_SKB_CB(skb)->seq; - if (after(TCP_SKB_CB(skb)->end_seq, end)) - end = TCP_SKB_CB(skb)->end_seq; - } - } -#endif -} - -/* Reduce allocated memory if we can, trying to get - * the socket within its memory limits again. - * - * Return less than zero if we should start dropping frames - * until the socket owning process reads some of the data - * to stabilize the situation. - */ -static int tcp_prune_queue(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - - SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq); - - NET_INC_STATS_BH(PruneCalled); - - if (atomic_read(&sk->rmem_alloc) >= sk->rcvbuf) - tcp_clamp_window(sk, tp); - else if (tcp_memory_pressure) - tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss); - - tcp_collapse_ofo_queue(sk); - tcp_collapse(sk, sk->receive_queue.next, - (struct sk_buff*)&sk->receive_queue, - tp->copied_seq, tp->rcv_nxt); - tcp_mem_reclaim(sk); - - if (atomic_read(&sk->rmem_alloc) <= sk->rcvbuf) - return 0; - - /* Collapsing did not help, destructive actions follow. - * This must not ever occur. */ - - /* First, purge the out_of_order queue. */ - if (skb_queue_len(&tp->out_of_order_queue)) { - net_statistics[smp_processor_id()*2].OfoPruned += skb_queue_len(&tp->out_of_order_queue); - __skb_queue_purge(&tp->out_of_order_queue); - - /* Reset SACK state. A conforming SACK implementation will - * do the same at a timeout based retransmit. When a connection - * is in a sad state like this, we care only about integrity - * of the connection not performance. - */ - if(tp->sack_ok) - tcp_sack_reset(tp); - tcp_mem_reclaim(sk); - } - - if(atomic_read(&sk->rmem_alloc) <= sk->rcvbuf) - return 0; - - /* If we are really being abused, tell the caller to silently - * drop receive data on the floor. It will get retransmitted - * and hopefully then we'll have sufficient space. - */ - NET_INC_STATS_BH(RcvPruned); - - /* Massive buffer overcommit. */ - tp->pred_flags = 0; - return -1; -#else - return 0; -#endif -} - - -/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto. - * As additional protections, we do not touch cwnd in retransmission phases, - * and if application hit its sndbuf limit recently. - */ -void tcp_cwnd_application_limited(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - - if (tp->ca_state == TCP_CA_Open && - sk->socket && !test_bit(SOCK_NOSPACE, &sk->socket->flags)) { - /* Limited by application or receiver window. */ - u32 win_used = max(tp->snd_cwnd_used, 2U); - if (win_used < tp->snd_cwnd) { - tp->snd_ssthresh = tcp_current_ssthresh(tp); - tp->snd_cwnd = (tp->snd_cwnd+win_used)>>1; - } - tp->snd_cwnd_used = 0; - } - tp->snd_cwnd_stamp = tcp_time_stamp; -#endif -} - - -/* When incoming ACK allowed to free some skb from write_queue, - * we remember this event in flag tp->queue_shrunk and wake up socket - * on the exit from tcp input handler. - */ -static void tcp_new_space(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - - if (tp->packets_out < tp->snd_cwnd && - !(sk->userlocks&SOCK_SNDBUF_LOCK) && - !tcp_memory_pressure && - atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { - int sndmem, demanded; - - sndmem = tp->mss_clamp+MAX_TCP_HEADER+16+sizeof(struct sk_buff); - demanded = max_t(unsigned int, tp->snd_cwnd, tp->reordering+1); - sndmem *= 2*demanded; - if (sndmem > sk->sndbuf) - sk->sndbuf = min(sndmem, sysctl_tcp_wmem[2]); - tp->snd_cwnd_stamp = tcp_time_stamp; - } - - sk->write_space(sk); -#endif -} - -static inline void tcp_check_space(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - - if (tp->queue_shrunk) { - tp->queue_shrunk = 0; - if (sk->socket && test_bit(SOCK_NOSPACE, &sk->socket->flags)) - tcp_new_space(sk); - } -#endif -} - -static void __tcp_data_snd_check(struct sock *sk, struct sk_buff *skb) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - - if (after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd) || - tcp_packets_in_flight(tp) >= tp->snd_cwnd || - tcp_write_xmit(sk, tp->nonagle)) - tcp_check_probe_timer(sk, tp); -#endif -} - -static __inline__ void tcp_data_snd_check(struct sock *sk) -{ -#if 0 - struct sk_buff *skb = sk->tp_pinfo.af_tcp.send_head; - - if (skb != NULL) - __tcp_data_snd_check(sk, skb); - tcp_check_space(sk); -#endif -} - -/* - * Check if sending an ack is needed. - */ -static __inline__ void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - - /* More than one full frame received... */ - if (((tp->rcv_nxt - tp->rcv_wup) > tp->ack.rcv_mss - /* ... and right edge of window advances far enough. - * (tcp_recvmsg() will send ACK otherwise). Or... - */ - && __tcp_select_window(sk) >= tp->rcv_wnd) || - /* We ACK each frame or... */ - tcp_in_quickack_mode(tp) || - /* We have out of order data. */ - (ofo_possible && - skb_peek(&tp->out_of_order_queue) != NULL)) { - /* Then ack it now */ - tcp_send_ack(sk); - } else { - /* Else, send delayed ack. */ - tcp_send_delayed_ack(sk); - } -#endif -} - -static __inline__ void tcp_ack_snd_check(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - if (!tcp_ack_scheduled(tp)) { - /* We sent a data segment already. */ - return; - } - __tcp_ack_snd_check(sk, 1); -#endif -} - -/* - * This routine is only called when we have urgent data - * signalled. Its the 'slow' part of tcp_urg. It could be - * moved inline now as tcp_urg is only called from one - * place. We handle URGent data wrong. We have to - as - * BSD still doesn't use the correction from RFC961. - * For 1003.1g we should support a new option TCP_STDURG to permit - * either form (or just set the sysctl tcp_stdurg). - */ - -static void tcp_check_urg(struct sock * sk, struct tcphdr * th) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - u32 ptr = ntohs(th->urg_ptr); - - if (ptr && !sysctl_tcp_stdurg) - ptr--; - ptr += ntohl(th->seq); - - /* Ignore urgent data that we've already seen and read. */ - if (after(tp->copied_seq, ptr)) - return; - - /* Do not replay urg ptr. - * - * NOTE: interesting situation not covered by specs. - * Misbehaving sender may send urg ptr, pointing to segment, - * which we already have in ofo queue. We are not able to fetch - * such data and will stay in TCP_URG_NOTYET until will be eaten - * by recvmsg(). Seems, we are not obliged to handle such wicked - * situations. But it is worth to think about possibility of some - * DoSes using some hypothetical application level deadlock. - */ - if (before(ptr, tp->rcv_nxt)) - return; - - /* Do we already have a newer (or duplicate) urgent pointer? */ - if (tp->urg_data && !after(ptr, tp->urg_seq)) - return; - - /* Tell the world about our new urgent pointer. */ - if (sk->proc != 0) { - if (sk->proc > 0) - kill_proc(sk->proc, SIGURG, 1); - else - kill_pg(-sk->proc, SIGURG, 1); - sk_wake_async(sk, 3, POLL_PRI); - } - - /* We may be adding urgent data when the last byte read was - * urgent. To do this requires some care. We cannot just ignore - * tp->copied_seq since we would read the last urgent byte again - * as data, nor can we alter copied_seq until this data arrives - * or we break the sematics of SIOCATMARK (and thus sockatmark()) - * - * NOTE. Double Dutch. Rendering to plain English: author of comment - * above did something sort of send("A", MSG_OOB); send("B", MSG_OOB); - * and expect that both A and B disappear from stream. This is _wrong_. - * Though this happens in BSD with high probability, this is occasional. - * Any application relying on this is buggy. Note also, that fix "works" - * only in this artificial test. Insert some normal data between A and B and we will - * decline of BSD again. Verdict: it is better to remove to trap - * buggy users. - */ - if (tp->urg_seq == tp->copied_seq && tp->urg_data && - !sk->urginline && - tp->copied_seq != tp->rcv_nxt) { - struct sk_buff *skb = skb_peek(&sk->receive_queue); - tp->copied_seq++; - if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) { - __skb_unlink(skb, skb->list); - __kfree_skb(skb); - } - } - - tp->urg_data = TCP_URG_NOTYET; - tp->urg_seq = ptr; - - /* Disable header prediction. */ - tp->pred_flags = 0; -#endif -} - -/* This is the 'fast' part of urgent handling. */ -static inline void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - - /* Check if we get a new urgent pointer - normally not. */ - if (th->urg) - tcp_check_urg(sk,th); - - /* Do we wait for any urgent data? - normally not... */ - if (tp->urg_data == TCP_URG_NOTYET) { - u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff*4) - th->syn; - - /* Is the urgent pointer pointing into this packet? */ - if (ptr < skb->len) { - u8 tmp; - if (skb_copy_bits(skb, ptr, &tmp, 1)) - BUG(); - tp->urg_data = TCP_URG_VALID | tmp; - if (!sk->dead) - sk->data_ready(sk,0); - } - } -#endif -} - -static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - int chunk = skb->len - hlen; - int err; - - local_bh_enable(); - if (skb->ip_summed==CHECKSUM_UNNECESSARY) - err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk); - else - err = skb_copy_and_csum_datagram_iovec(skb, hlen, tp->ucopy.iov); - - if (!err) { - tp->ucopy.len -= chunk; - tp->copied_seq += chunk; - } - - local_bh_disable(); - return err; -#else - return 0; -#endif -} - -static int __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb) -{ -#if 0 - int result; - - if (sk->lock.users) { - local_bh_enable(); - result = __tcp_checksum_complete(skb); - local_bh_disable(); - } else { - result = __tcp_checksum_complete(skb); - } - return result; -#else - return 0; -#endif -} - -static __inline__ int -tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb) -{ -#if 0 - return skb->ip_summed != CHECKSUM_UNNECESSARY && - __tcp_checksum_complete_user(sk, skb); -#else - return 0; -#endif -} - -/* - * TCP receive function for the ESTABLISHED state. - * - * It is split into a fast path and a slow path. The fast path is - * disabled when: - * - A zero window was announced from us - zero window probing - * is only handled properly in the slow path. - * - Out of order segments arrived. - * - Urgent data is expected. - * - There is no buffer space left - * - Unexpected TCP flags/window values/header lengths are received - * (detected by checking the TCP header against pred_flags) - * - Data is sent in both directions. Fast path only supports pure senders - * or pure receivers (this means either the sequence number or the ack - * value must stay constant) - * - Unexpected TCP option. - * - * When these conditions are not satisfied it drops into a standard - * receive procedure patterned after RFC793 to handle all cases. - * The first three cases are guaranteed by proper pred_flags setting, - * the rest is checked inline. Fast processing is turned on in - * tcp_data_queue when everything is OK. - */ -int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, unsigned len) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - - /* - * Header prediction. - * The code losely follows the one in the famous - * "30 instruction TCP receive" Van Jacobson mail. - * - * Van's trick is to deposit buffers into socket queue - * on a device interrupt, to call tcp_recv function - * on the receive process context and checksum and copy - * the buffer to user space. smart... - * - * Our current scheme is not silly either but we take the - * extra cost of the net_bh soft interrupt processing... - * We do checksum and copy also but from device to kernel. - */ - - tp->saw_tstamp = 0; - - /* pred_flags is 0xS?10 << 16 + snd_wnd - * if header_predition is to be made - * 'S' will always be tp->tcp_header_len >> 2 - * '?' will be 0 for the fast path, otherwise pred_flags is 0 to - * turn it off (when there are holes in the receive - * space for instance) - * PSH flag is ignored. - */ - - if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags && - TCP_SKB_CB(skb)->seq == tp->rcv_nxt) { - int tcp_header_len = tp->tcp_header_len; - - /* Timestamp header prediction: tcp_header_len - * is automatically equal to th->doff*4 due to pred_flags - * match. - */ - - /* Check timestamp */ - if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) { - __u32 *ptr = (__u32 *)(th + 1); - - /* No? Slow path! */ - if (*ptr != ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) - | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) - goto slow_path; - - tp->saw_tstamp = 1; - ++ptr; - tp->rcv_tsval = ntohl(*ptr); - ++ptr; - tp->rcv_tsecr = ntohl(*ptr); - - /* If PAWS failed, check it more carefully in slow path */ - if ((s32)(tp->rcv_tsval - tp->ts_recent) < 0) - goto slow_path; - - /* DO NOT update ts_recent here, if checksum fails - * and timestamp was corrupted part, it will result - * in a hung connection since we will drop all - * future packets due to the PAWS test. - */ - } - - if (len <= tcp_header_len) { - /* Bulk data transfer: sender */ - if (len == tcp_header_len) { - /* Predicted packet is in window by definition. - * seq == rcv_nxt and rcv_wup <= rcv_nxt. - * Hence, check seq<=rcv_wup reduces to: - */ - if (tcp_header_len == - (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && - tp->rcv_nxt == tp->rcv_wup) - tcp_store_ts_recent(tp); - /* We know that such packets are checksummed - * on entry. - */ - tcp_ack(sk, skb, 0); - __kfree_skb(skb); - tcp_data_snd_check(sk); - return 0; - } else { /* Header too small */ - TCP_INC_STATS_BH(TcpInErrs); - goto discard; - } - } else { - int eaten = 0; - - if (tp->ucopy.task == current && - tp->copied_seq == tp->rcv_nxt && - len - tcp_header_len <= tp->ucopy.len && - sk->lock.users) { - __set_current_state(TASK_RUNNING); - - if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) { - /* Predicted packet is in window by definition. - * seq == rcv_nxt and rcv_wup <= rcv_nxt. - * Hence, check seq<=rcv_wup reduces to: - */ - if (tcp_header_len == - (sizeof(struct tcphdr) + - TCPOLEN_TSTAMP_ALIGNED) && - tp->rcv_nxt == tp->rcv_wup) - tcp_store_ts_recent(tp); - - __skb_pull(skb, tcp_header_len); - tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - NET_INC_STATS_BH(TCPHPHitsToUser); - eaten = 1; - } - } - if (!eaten) { - if (tcp_checksum_complete_user(sk, skb)) - goto csum_error; - - /* Predicted packet is in window by definition. - * seq == rcv_nxt and rcv_wup <= rcv_nxt. - * Hence, check seq<=rcv_wup reduces to: - */ - if (tcp_header_len == - (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && - tp->rcv_nxt == tp->rcv_wup) - tcp_store_ts_recent(tp); - - if ((int)skb->truesize > sk->forward_alloc) - goto step5; - - NET_INC_STATS_BH(TCPHPHits); - - /* Bulk data transfer: receiver */ - __skb_pull(skb,tcp_header_len); - __skb_queue_tail(&sk->receive_queue, skb); - tcp_set_owner_r(skb, sk); - tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - } - - tcp_event_data_recv(sk, tp, skb); - - if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) { - /* Well, only one small jumplet in fast path... */ - tcp_ack(sk, skb, FLAG_DATA); - tcp_data_snd_check(sk); - if (!tcp_ack_scheduled(tp)) - goto no_ack; - } - - if (eaten) { - if (tcp_in_quickack_mode(tp)) { - tcp_send_ack(sk); - } else { - tcp_send_delayed_ack(sk); - } - } else { - __tcp_ack_snd_check(sk, 0); - } - -no_ack: - if (eaten) - __kfree_skb(skb); - else - sk->data_ready(sk, 0); - return 0; - } - } - -slow_path: - if (len < (th->doff<<2) || tcp_checksum_complete_user(sk, skb)) - goto csum_error; - - /* - * RFC1323: H1. Apply PAWS check first. - */ - if (tcp_fast_parse_options(skb, th, tp) && tp->saw_tstamp && - tcp_paws_discard(tp, skb)) { - if (!th->rst) { - NET_INC_STATS_BH(PAWSEstabRejected); - tcp_send_dupack(sk, skb); - goto discard; - } - /* Resets are accepted even if PAWS failed. - - ts_recent update must be made after we are sure - that the packet is in window. - */ - } - - /* - * Standard slow path. - */ - - if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { - /* RFC793, page 37: "In all states except SYN-SENT, all reset - * (RST) segments are validated by checking their SEQ-fields." - * And page 69: "If an incoming segment is not acceptable, - * an acknowledgment should be sent in reply (unless the RST bit - * is set, if so drop the segment and return)". - */ - if (!th->rst) - tcp_send_dupack(sk, skb); - goto discard; - } - - if(th->rst) { - tcp_reset(sk); - goto discard; - } - - tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); - - if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { - TCP_INC_STATS_BH(TcpInErrs); - NET_INC_STATS_BH(TCPAbortOnSyn); - tcp_reset(sk); - return 1; - } - -step5: - if(th->ack) - tcp_ack(sk, skb, FLAG_SLOWPATH); - - /* Process urgent data. */ - tcp_urg(sk, skb, th); - - /* step 7: process the segment text */ - tcp_data_queue(sk, skb); - - tcp_data_snd_check(sk); - tcp_ack_snd_check(sk); - return 0; - -csum_error: - TCP_INC_STATS_BH(TcpInErrs); - -discard: - __kfree_skb(skb); - return 0; -#else - return 0; -#endif -} - -static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, unsigned len) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - int saved_clamp = tp->mss_clamp; - - tcp_parse_options(skb, tp, 0); - - if (th->ack) { - /* rfc793: - * "If the state is SYN-SENT then - * first check the ACK bit - * If the ACK bit is set - * If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send - * a reset (unless the RST bit is set, if so drop - * the segment and return)" - * - * We do not send data with SYN, so that RFC-correct - * test reduces to: - */ - if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt) - goto reset_and_undo; - - if (tp->saw_tstamp && tp->rcv_tsecr && - !between(tp->rcv_tsecr, tp->retrans_stamp, tcp_time_stamp)) { - NET_INC_STATS_BH(PAWSActiveRejected); - goto reset_and_undo; - } - - /* Now ACK is acceptable. - * - * "If the RST bit is set - * If the ACK was acceptable then signal the user "error: - * connection reset", drop the segment, enter CLOSED state, - * delete TCB, and return." - */ - - if (th->rst) { - tcp_reset(sk); - goto discard; - } - - /* rfc793: - * "fifth, if neither of the SYN or RST bits is set then - * drop the segment and return." - * - * See note below! - * --ANK(990513) - */ - if (!th->syn) - goto discard_and_undo; - - /* rfc793: - * "If the SYN bit is on ... - * are acceptable then ... - * (our SYN has been ACKed), change the connection - * state to ESTABLISHED..." - */ - - TCP_ECN_rcv_synack(tp, th); - - tp->snd_wl1 = TCP_SKB_CB(skb)->seq; - tcp_ack(sk, skb, FLAG_SLOWPATH); - - /* Ok.. it's good. Set up sequence numbers and - * move to established. - */ - tp->rcv_nxt = TCP_SKB_CB(skb)->seq+1; - tp->rcv_wup = TCP_SKB_CB(skb)->seq+1; - - /* RFC1323: The window in SYN & SYN/ACK segments is - * never scaled. - */ - tp->snd_wnd = ntohs(th->window); - tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq); - - if (tp->wscale_ok == 0) { - tp->snd_wscale = tp->rcv_wscale = 0; - tp->window_clamp = min(tp->window_clamp, 65535U); - } - - if (tp->saw_tstamp) { - tp->tstamp_ok = 1; - tp->tcp_header_len = - sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; - tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; - tcp_store_ts_recent(tp); - } else { - tp->tcp_header_len = sizeof(struct tcphdr); - } - - if (tp->sack_ok && sysctl_tcp_fack) - tp->sack_ok |= 2; - - tcp_sync_mss(sk, tp->pmtu_cookie); - tcp_initialize_rcv_mss(sk); - tcp_init_metrics(sk); - tcp_init_buffer_space(sk); - - if (sk->keepopen) - tcp_reset_keepalive_timer(sk, keepalive_time_when(tp)); - - if (tp->snd_wscale == 0) - __tcp_fast_path_on(tp, tp->snd_wnd); - else - tp->pred_flags = 0; - - /* Remember, tcp_poll() does not lock socket! - * Change state from SYN-SENT only after copied_seq - * is initialized. */ - tp->copied_seq = tp->rcv_nxt; - mb(); - tcp_set_state(sk, TCP_ESTABLISHED); - - if(!sk->dead) { - sk->state_change(sk); - sk_wake_async(sk, 0, POLL_OUT); - } - - if (tp->write_pending || tp->defer_accept || tp->ack.pingpong) { - /* Save one ACK. Data will be ready after - * several ticks, if write_pending is set. - * - * It may be deleted, but with this feature tcpdumps - * look so _wonderfully_ clever, that I was not able - * to stand against the temptation 8) --ANK - */ - tcp_schedule_ack(tp); - tp->ack.lrcvtime = tcp_time_stamp; - tp->ack.ato = TCP_ATO_MIN; - tcp_incr_quickack(tp); - tcp_enter_quickack_mode(tp); - tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX); - -discard: - __kfree_skb(skb); - return 0; - } else { - tcp_send_ack(sk); - } - return -1; - } - - /* No ACK in the segment */ - - if (th->rst) { - /* rfc793: - * "If the RST bit is set - * - * Otherwise (no ACK) drop the segment and return." - */ - - goto discard_and_undo; - } - - /* PAWS check. */ - if (tp->ts_recent_stamp && tp->saw_tstamp && tcp_paws_check(tp, 0)) - goto discard_and_undo; - - if (th->syn) { - /* We see SYN without ACK. It is attempt of - * simultaneous connect with crossed SYNs. - * Particularly, it can be connect to self. - */ - tcp_set_state(sk, TCP_SYN_RECV); - - if (tp->saw_tstamp) { - tp->tstamp_ok = 1; - tcp_store_ts_recent(tp); - tp->tcp_header_len = - sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; - } else { - tp->tcp_header_len = sizeof(struct tcphdr); - } - - tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; - tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; - - /* RFC1323: The window in SYN & SYN/ACK segments is - * never scaled. - */ - tp->snd_wnd = ntohs(th->window); - tp->snd_wl1 = TCP_SKB_CB(skb)->seq; - tp->max_window = tp->snd_wnd; - - tcp_sync_mss(sk, tp->pmtu_cookie); - tcp_initialize_rcv_mss(sk); - - TCP_ECN_rcv_syn(tp, th); - - tcp_send_synack(sk); -#if 0 - /* Note, we could accept data and URG from this segment. - * There are no obstacles to make this. - * - * However, if we ignore data in ACKless segments sometimes, - * we have no reasons to accept it sometimes. - * Also, seems the code doing it in step6 of tcp_rcv_state_process - * is not flawless. So, discard packet for sanity. - * Uncomment this return to process the data. - */ - return -1; -#else - goto discard; -#endif - } - /* "fifth, if neither of the SYN or RST bits is set then - * drop the segment and return." - */ - -discard_and_undo: - tcp_clear_options(tp); - tp->mss_clamp = saved_clamp; - goto discard; - -reset_and_undo: - tcp_clear_options(tp); - tp->mss_clamp = saved_clamp; - return 1; -#else - return 0; -#endif -} - - -/* - * This function implements the receiving procedure of RFC 793 for - * all states except ESTABLISHED and TIME_WAIT. - * It's called from both tcp_v4_rcv and tcp_v6_rcv and should be - * address independent. - */ - -int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, unsigned len) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - int queued = 0; - - tp->saw_tstamp = 0; - - switch (sk->state) { - case TCP_CLOSE: - goto discard; - - case TCP_LISTEN: - if(th->ack) - return 1; - - if(th->rst) - goto discard; - - if(th->syn) { - if(tp->af_specific->conn_request(sk, skb) < 0) - return 1; - - /* Now we have several options: In theory there is - * nothing else in the frame. KA9Q has an option to - * send data with the syn, BSD accepts data with the - * syn up to the [to be] advertised window and - * Solaris 2.1 gives you a protocol error. For now - * we just ignore it, that fits the spec precisely - * and avoids incompatibilities. It would be nice in - * future to drop through and process the data. - * - * Now that TTCP is starting to be used we ought to - * queue this data. - * But, this leaves one open to an easy denial of - * service attack, and SYN cookies can't defend - * against this problem. So, we drop the data - * in the interest of security over speed. - */ - goto discard; - } - goto discard; - - case TCP_SYN_SENT: - queued = tcp_rcv_synsent_state_process(sk, skb, th, len); - if (queued >= 0) - return queued; - - /* Do step6 onward by hand. */ - tcp_urg(sk, skb, th); - __kfree_skb(skb); - tcp_data_snd_check(sk); - return 0; - } - - if (tcp_fast_parse_options(skb, th, tp) && tp->saw_tstamp && - tcp_paws_discard(tp, skb)) { - if (!th->rst) { - NET_INC_STATS_BH(PAWSEstabRejected); - tcp_send_dupack(sk, skb); - goto discard; - } - /* Reset is accepted even if it did not pass PAWS. */ - } - - /* step 1: check sequence number */ - if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { - if (!th->rst) - tcp_send_dupack(sk, skb); - goto discard; - } - - /* step 2: check RST bit */ - if(th->rst) { - tcp_reset(sk); - goto discard; - } - - tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); - - /* step 3: check security and precedence [ignored] */ - - /* step 4: - * - * Check for a SYN in window. - */ - if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { - NET_INC_STATS_BH(TCPAbortOnSyn); - tcp_reset(sk); - return 1; - } - - /* step 5: check the ACK field */ - if (th->ack) { - int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH); - - switch(sk->state) { - case TCP_SYN_RECV: - if (acceptable) { - tp->copied_seq = tp->rcv_nxt; - mb(); - tcp_set_state(sk, TCP_ESTABLISHED); - sk->state_change(sk); - - /* Note, that this wakeup is only for marginal - * crossed SYN case. Passively open sockets - * are not waked up, because sk->sleep == NULL - * and sk->socket == NULL. - */ - if (sk->socket) { - sk_wake_async(sk,0,POLL_OUT); - } - - tp->snd_una = TCP_SKB_CB(skb)->ack_seq; - tp->snd_wnd = ntohs(th->window) << tp->snd_wscale; - tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq); - - /* tcp_ack considers this ACK as duplicate - * and does not calculate rtt. - * Fix it at least with timestamps. - */ - if (tp->saw_tstamp && tp->rcv_tsecr && !tp->srtt) - tcp_ack_saw_tstamp(tp, 0); - - if (tp->tstamp_ok) - tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; - - tcp_init_metrics(sk); - tcp_initialize_rcv_mss(sk); - tcp_init_buffer_space(sk); - tcp_fast_path_on(tp); - } else { - return 1; - } - break; - - case TCP_FIN_WAIT1: - if (tp->snd_una == tp->write_seq) { - tcp_set_state(sk, TCP_FIN_WAIT2); - sk->shutdown |= SEND_SHUTDOWN; - dst_confirm(sk->dst_cache); - - if (!sk->dead) { - /* Wake up lingering close() */ - sk->state_change(sk); - } else { - int tmo; - - if (tp->linger2 < 0 || - (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && - after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) { - tcp_done(sk); - NET_INC_STATS_BH(TCPAbortOnData); - return 1; - } - - tmo = tcp_fin_time(tp); - if (tmo > TCP_TIMEWAIT_LEN) { - tcp_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); - } else if (th->fin || sk->lock.users) { - /* Bad case. We could lose such FIN otherwise. - * It is not a big problem, but it looks confusing - * and not so rare event. We still can lose it now, - * if it spins in bh_lock_sock(), but it is really - * marginal case. - */ - tcp_reset_keepalive_timer(sk, tmo); - } else { - tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); - goto discard; - } - } - } - break; - - case TCP_CLOSING: - if (tp->snd_una == tp->write_seq) { - tcp_time_wait(sk, TCP_TIME_WAIT, 0); - goto discard; - } - break; - - case TCP_LAST_ACK: - if (tp->snd_una == tp->write_seq) { - tcp_update_metrics(sk); - tcp_done(sk); - goto discard; - } - break; - } - } else - goto discard; - - /* step 6: check the URG bit */ - tcp_urg(sk, skb, th); - - /* step 7: process the segment text */ - switch (sk->state) { - case TCP_CLOSE_WAIT: - case TCP_CLOSING: - case TCP_LAST_ACK: - if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) - break; - case TCP_FIN_WAIT1: - case TCP_FIN_WAIT2: - /* RFC 793 says to queue data in these states, - * RFC 1122 says we MUST send a reset. - * BSD 4.4 also does reset. - */ - if (sk->shutdown & RCV_SHUTDOWN) { - if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && - after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) { - NET_INC_STATS_BH(TCPAbortOnData); - tcp_reset(sk); - return 1; - } - } - /* Fall through */ - case TCP_ESTABLISHED: - tcp_data_queue(sk, skb); - queued = 1; - break; - } - - /* tcp_data could move socket to TIME-WAIT */ - if (sk->state != TCP_CLOSE) { - tcp_data_snd_check(sk); - tcp_ack_snd_check(sk); - } - - if (!queued) { -discard: - __kfree_skb(skb); - } - return 0; -#else - return 0; -#endif -} diff --git a/reactos/drivers/net/tcpip/transport/tcp/tcp_ipv4.c b/reactos/drivers/net/tcpip/transport/tcp/tcp_ipv4.c deleted file mode 100755 index cf45c02046b..00000000000 --- a/reactos/drivers/net/tcpip/transport/tcp/tcp_ipv4.c +++ /dev/null @@ -1,2523 +0,0 @@ -/* - * COPYRIGHT: See COPYING in the top level directory - * PROJECT: ReactOS TCP/IP protocol driver - * FILE: transport/tcp/tcp_ipv4.c - * PURPOSE: Transmission Control Protocol - * PROGRAMMERS: Casper S. Hornstrup (chorns@users.sourceforge.net) - * REVISIONS: - * CSH 15-01-2003 Imported from linux kernel 2.4.20 - */ - -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Implementation of the Transmission Control Protocol(TCP). - * - * Version: $Id: tcp_ipv4.c,v 1.1 2003/01/15 21:57:31 chorns Exp $ - * - * IPv4 specific functions - * - * - * code split from: - * linux/ipv4/tcp.c - * linux/ipv4/tcp_input.c - * linux/ipv4/tcp_output.c - * - * See tcp.c for author information - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -/* - * Changes: - * David S. Miller : New socket lookup architecture. - * This code is dedicated to John Dyson. - * David S. Miller : Change semantics of established hash, - * half is devoted to TIME_WAIT sockets - * and the rest go in the other half. - * Andi Kleen : Add support for syncookies and fixed - * some bugs: ip options weren't passed to - * the TCP layer, missed a check for an ACK bit. - * Andi Kleen : Implemented fast path mtu discovery. - * Fixed many serious bugs in the - * open_request handling and moved - * most of it into the af independent code. - * Added tail drop and some other bugfixes. - * Added new listen sematics. - * Mike McLagan : Routing by source - * Juan Jose Ciarlante: ip_dynaddr bits - * Andi Kleen: various fixes. - * Vitaly E. Lavrov : Transparent proxy revived after year coma. - * Andi Kleen : Fix new listen. - * Andi Kleen : Fix accept error reporting. - */ - -#if 0 -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#else -#include "linux.h" -#include "tcpcore.h" -#endif - -extern int sysctl_ip_dynaddr; -extern int sysctl_ip_default_ttl; -int sysctl_tcp_tw_reuse = 0; - -/* Check TCP sequence numbers in ICMP packets. */ -#define ICMP_MIN_LENGTH 8 - -/* Socket used for sending RSTs */ -#if 0 -static struct inode tcp_inode; -static struct socket *tcp_socket=&tcp_inode.u.socket_i; -#endif - -void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, - struct sk_buff *skb); - -/* - * ALL members must be initialised to prevent gcc-2.7.2.3 miscompilation - */ -#if 0 -struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = { - __tcp_ehash: NULL, - __tcp_bhash: NULL, - __tcp_bhash_size: 0, - __tcp_ehash_size: 0, - __tcp_listening_hash: { NULL, }, - __tcp_lhash_lock: RW_LOCK_UNLOCKED, - __tcp_lhash_users: ATOMIC_INIT(0), - __tcp_lhash_wait: - __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.__tcp_lhash_wait), - __tcp_portalloc_lock: SPIN_LOCK_UNLOCKED -}; -#endif - -/* - * This array holds the first and last local port number. - * For high-usage systems, use sysctl to change this to - * 32768-61000 - */ -int sysctl_local_port_range[2] = { 1024, 4999 }; -int tcp_port_rover = (1024 - 1); - -static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport, - __u32 faddr, __u16 fport) -{ - int h = ((laddr ^ lport) ^ (faddr ^ fport)); - h ^= h>>16; - h ^= h>>8; - return h & (tcp_ehash_size - 1); -} - -static __inline__ int tcp_sk_hashfn(struct sock *sk) -{ - __u32 laddr = sk->rcv_saddr; - __u16 lport = sk->num; - __u32 faddr = sk->daddr; - __u16 fport = sk->dport; - - return tcp_hashfn(laddr, lport, faddr, fport); -} - -/* Allocate and initialize a new TCP local port bind bucket. - * The bindhash mutex for snum's hash chain must be held here. - */ -struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head, - unsigned short snum) -{ -#if 0 - struct tcp_bind_bucket *tb; - - tb = kmem_cache_alloc(tcp_bucket_cachep, SLAB_ATOMIC); - if(tb != NULL) { - tb->port = snum; - tb->fastreuse = 0; - tb->owners = NULL; - if((tb->next = head->chain) != NULL) - tb->next->pprev = &tb->next; - head->chain = tb; - tb->pprev = &head->chain; - } - return tb; -#else - return NULL; -#endif -} - -/* Caller must disable local BH processing. */ -static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child) -{ -#if 0 - struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(child->num)]; - struct tcp_bind_bucket *tb; - - spin_lock(&head->lock); - tb = (struct tcp_bind_bucket *)sk->prev; - if ((child->bind_next = tb->owners) != NULL) - tb->owners->bind_pprev = &child->bind_next; - tb->owners = child; - child->bind_pprev = &tb->owners; - child->prev = (struct sock *) tb; - spin_unlock(&head->lock); -#endif -} - -__inline__ void tcp_inherit_port(struct sock *sk, struct sock *child) -{ -#if 0 - local_bh_disable(); - __tcp_inherit_port(sk, child); - local_bh_enable(); -#endif -} - -static inline void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb, unsigned short snum) -{ -#if 0 - sk->num = snum; - if ((sk->bind_next = tb->owners) != NULL) - tb->owners->bind_pprev = &sk->bind_next; - tb->owners = sk; - sk->bind_pprev = &tb->owners; - sk->prev = (struct sock *) tb; -#endif -} - -static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb) -{ -#if 0 - struct sock *sk2 = tb->owners; - int sk_reuse = sk->reuse; - - for( ; sk2 != NULL; sk2 = sk2->bind_next) { - if (sk != sk2 && - sk2->reuse <= 1 && - sk->bound_dev_if == sk2->bound_dev_if) { - if (!sk_reuse || - !sk2->reuse || - sk2->state == TCP_LISTEN) { - if (!sk2->rcv_saddr || - !sk->rcv_saddr || - (sk2->rcv_saddr == sk->rcv_saddr)) - break; - } - } - } - return sk2 != NULL; -#else - return 0; -#endif -} - -/* Obtain a reference to a local port for the given sock, - * if snum is zero it means select any available local port. - */ -static int tcp_v4_get_port(struct sock *sk, unsigned short snum) -{ -#if 0 - struct tcp_bind_hashbucket *head; - struct tcp_bind_bucket *tb; - int ret; - - local_bh_disable(); - if (snum == 0) { - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int remaining = (high - low) + 1; - int rover; - - spin_lock(&tcp_portalloc_lock); - rover = tcp_port_rover; - do { rover++; - if ((rover < low) || (rover > high)) - rover = low; - head = &tcp_bhash[tcp_bhashfn(rover)]; - spin_lock(&head->lock); - for (tb = head->chain; tb; tb = tb->next) - if (tb->port == rover) - goto next; - break; - next: - spin_unlock(&head->lock); - } while (--remaining > 0); - tcp_port_rover = rover; - spin_unlock(&tcp_portalloc_lock); - - /* Exhausted local port range during search? */ - ret = 1; - if (remaining <= 0) - goto fail; - - /* OK, here is the one we will use. HEAD is - * non-NULL and we hold it's mutex. - */ - snum = rover; - tb = NULL; - } else { - head = &tcp_bhash[tcp_bhashfn(snum)]; - spin_lock(&head->lock); - for (tb = head->chain; tb != NULL; tb = tb->next) - if (tb->port == snum) - break; - } - if (tb != NULL && tb->owners != NULL) { - if (sk->reuse > 1) - goto success; - if (tb->fastreuse > 0 && sk->reuse != 0 && sk->state != TCP_LISTEN) { - goto success; - } else { - ret = 1; - if (tcp_bind_conflict(sk, tb)) - goto fail_unlock; - } - } - ret = 1; - if (tb == NULL && - (tb = tcp_bucket_create(head, snum)) == NULL) - goto fail_unlock; - if (tb->owners == NULL) { - if (sk->reuse && sk->state != TCP_LISTEN) - tb->fastreuse = 1; - else - tb->fastreuse = 0; - } else if (tb->fastreuse && - ((sk->reuse == 0) || (sk->state == TCP_LISTEN))) - tb->fastreuse = 0; -success: - if (sk->prev == NULL) - tcp_bind_hash(sk, tb, snum); - BUG_TRAP(sk->prev == (struct sock *) tb); - ret = 0; - -fail_unlock: - spin_unlock(&head->lock); -fail: - local_bh_enable(); - return ret; -#else - return 0; -#endif -} - -/* Get rid of any references to a local port held by the - * given sock. - */ -__inline__ void __tcp_put_port(struct sock *sk) -{ -#if 0 - struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(sk->num)]; - struct tcp_bind_bucket *tb; - - spin_lock(&head->lock); - tb = (struct tcp_bind_bucket *) sk->prev; - if (sk->bind_next) - sk->bind_next->bind_pprev = sk->bind_pprev; - *(sk->bind_pprev) = sk->bind_next; - sk->prev = NULL; - sk->num = 0; - if (tb->owners == NULL) { - if (tb->next) - tb->next->pprev = tb->pprev; - *(tb->pprev) = tb->next; - kmem_cache_free(tcp_bucket_cachep, tb); - } - spin_unlock(&head->lock); -#endif -} - -void tcp_put_port(struct sock *sk) -{ -#if 0 - local_bh_disable(); - __tcp_put_port(sk); - local_bh_enable(); -#endif -} - -/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. - * Look, when several writers sleep and reader wakes them up, all but one - * immediately hit write lock and grab all the cpus. Exclusive sleep solves - * this, _but_ remember, it adds useless work on UP machines (wake up each - * exclusive lock release). It should be ifdefed really. - */ - -void tcp_listen_wlock(void) -{ -#if 0 - write_lock(&tcp_lhash_lock); - - if (atomic_read(&tcp_lhash_users)) { - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue_exclusive(&tcp_lhash_wait, &wait); - for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (atomic_read(&tcp_lhash_users) == 0) - break; - write_unlock_bh(&tcp_lhash_lock); - schedule(); - write_lock_bh(&tcp_lhash_lock); - } - - __set_current_state(TASK_RUNNING); - remove_wait_queue(&tcp_lhash_wait, &wait); - } -#endif -} - -static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible) -{ -#if 0 - struct sock **skp; - rwlock_t *lock; - - BUG_TRAP(sk->pprev==NULL); - if(listen_possible && sk->state == TCP_LISTEN) { - skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)]; - lock = &tcp_lhash_lock; - tcp_listen_wlock(); - } else { - skp = &tcp_ehash[(sk->hashent = tcp_sk_hashfn(sk))].chain; - lock = &tcp_ehash[sk->hashent].lock; - write_lock(lock); - } - if((sk->next = *skp) != NULL) - (*skp)->pprev = &sk->next; - *skp = sk; - sk->pprev = skp; - sock_prot_inc_use(sk->prot); - write_unlock(lock); - if (listen_possible && sk->state == TCP_LISTEN) - wake_up(&tcp_lhash_wait); -#endif -} - -static void tcp_v4_hash(struct sock *sk) -{ -#if 0 - if (sk->state != TCP_CLOSE) { - local_bh_disable(); - __tcp_v4_hash(sk, 1); - local_bh_enable(); - } -#endif -} - -void tcp_unhash(struct sock *sk) -{ -#if 0 - rwlock_t *lock; - - if (!sk->pprev) - goto ende; - - if (sk->state == TCP_LISTEN) { - local_bh_disable(); - tcp_listen_wlock(); - lock = &tcp_lhash_lock; - } else { - struct tcp_ehash_bucket *head = &tcp_ehash[sk->hashent]; - lock = &head->lock; - write_lock_bh(&head->lock); - } - - if(sk->pprev) { - if(sk->next) - sk->next->pprev = sk->pprev; - *sk->pprev = sk->next; - sk->pprev = NULL; - sock_prot_dec_use(sk->prot); - } - write_unlock_bh(lock); - - ende: - if (sk->state == TCP_LISTEN) - wake_up(&tcp_lhash_wait); -#endif -} - -/* Don't inline this cruft. Here are some nice properties to - * exploit here. The BSD API does not allow a listening TCP - * to specify the remote port nor the remote address for the - * connection. So always assume those are both wildcarded - * during the search since they can never be otherwise. - */ -static struct sock *__tcp_v4_lookup_listener(struct sock *sk, u32 daddr, unsigned short hnum, int dif) -{ -#if 0 - struct sock *result = NULL; - int score, hiscore; - - hiscore=0; - for(; sk; sk = sk->next) { - if(sk->num == hnum) { - __u32 rcv_saddr = sk->rcv_saddr; - - score = 1; - if(rcv_saddr) { - if (rcv_saddr != daddr) - continue; - score++; - } - if (sk->bound_dev_if) { - if (sk->bound_dev_if != dif) - continue; - score++; - } - if (score == 3) - return sk; - if (score > hiscore) { - hiscore = score; - result = sk; - } - } - } - return result; -#else - return NULL; -#endif -} - -/* Optimize the common listener case. */ -__inline__ struct sock *tcp_v4_lookup_listener(u32 daddr, unsigned short hnum, int dif) -{ -#if 0 - struct sock *sk; - - read_lock(&tcp_lhash_lock); - sk = tcp_listening_hash[tcp_lhashfn(hnum)]; - if (sk) { - if (sk->num == hnum && - sk->next == NULL && - (!sk->rcv_saddr || sk->rcv_saddr == daddr) && - !sk->bound_dev_if) - goto sherry_cache; - sk = __tcp_v4_lookup_listener(sk, daddr, hnum, dif); - } - if (sk) { -sherry_cache: - sock_hold(sk); - } - read_unlock(&tcp_lhash_lock); - return sk; -#else - return NULL; -#endif -} - -/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so - * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM - * - * Local BH must be disabled here. - */ - -static inline struct sock *__tcp_v4_lookup_established(u32 saddr, u16 sport, - u32 daddr, u16 hnum, int dif) -{ -#if 0 - struct tcp_ehash_bucket *head; - TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) - __u32 ports = TCP_COMBINED_PORTS(sport, hnum); - struct sock *sk; - int hash; - - /* Optimize here for direct hit, only listening connections can - * have wildcards anyways. - */ - hash = tcp_hashfn(daddr, hnum, saddr, sport); - head = &tcp_ehash[hash]; - read_lock(&head->lock); - for(sk = head->chain; sk; sk = sk->next) { - if(TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif)) - goto hit; /* You sunk my battleship! */ - } - - /* Must check for a TIME_WAIT'er before going to listener hash. */ - for(sk = (head + tcp_ehash_size)->chain; sk; sk = sk->next) - if(TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif)) - goto hit; - read_unlock(&head->lock); - - return NULL; - -hit: - sock_hold(sk); - read_unlock(&head->lock); - return sk; -#else - return NULL; -#endif -} - -static inline struct sock *__tcp_v4_lookup(u32 saddr, u16 sport, - u32 daddr, u16 hnum, int dif) -{ -#if 0 - struct sock *sk; - - sk = __tcp_v4_lookup_established(saddr, sport, daddr, hnum, dif); - - if (sk) - return sk; - - return tcp_v4_lookup_listener(daddr, hnum, dif); -#else - return NULL; -#endif -} - -__inline__ struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif) -{ -#if 0 - struct sock *sk; - - local_bh_disable(); - sk = __tcp_v4_lookup(saddr, sport, daddr, ntohs(dport), dif); - local_bh_enable(); - - return sk; -#else - return NULL; -#endif -} - -static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb) -{ -#if 0 - return secure_tcp_sequence_number(skb->nh.iph->daddr, - skb->nh.iph->saddr, - skb->h.th->dest, - skb->h.th->source); -#else - return 0; -#endif -} - -/* called with local bh disabled */ -static int __tcp_v4_check_established(struct sock *sk, __u16 lport, - struct tcp_tw_bucket **twp) -{ -#if 0 - u32 daddr = sk->rcv_saddr; - u32 saddr = sk->daddr; - int dif = sk->bound_dev_if; - TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) - __u32 ports = TCP_COMBINED_PORTS(sk->dport, lport); - int hash = tcp_hashfn(daddr, lport, saddr, sk->dport); - struct tcp_ehash_bucket *head = &tcp_ehash[hash]; - struct sock *sk2, **skp; - struct tcp_tw_bucket *tw; - - write_lock(&head->lock); - - /* Check TIME-WAIT sockets first. */ - for(skp = &(head + tcp_ehash_size)->chain; (sk2=*skp) != NULL; - skp = &sk2->next) { - tw = (struct tcp_tw_bucket*)sk2; - - if(TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif)) { - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - - /* With PAWS, it is safe from the viewpoint - of data integrity. Even without PAWS it - is safe provided sequence spaces do not - overlap i.e. at data rates <= 80Mbit/sec. - - Actually, the idea is close to VJ's one, - only timestamp cache is held not per host, - but per port pair and TW bucket is used - as state holder. - - If TW bucket has been already destroyed we - fall back to VJ's scheme and use initial - timestamp retrieved from peer table. - */ - if (tw->ts_recent_stamp && - (!twp || (sysctl_tcp_tw_reuse && - xtime.tv_sec - tw->ts_recent_stamp > 1))) { - if ((tp->write_seq = tw->snd_nxt+65535+2) == 0) - tp->write_seq = 1; - tp->ts_recent = tw->ts_recent; - tp->ts_recent_stamp = tw->ts_recent_stamp; - sock_hold(sk2); - skp = &head->chain; - goto unique; - } else - goto not_unique; - } - } - tw = NULL; - - /* And established part... */ - for(skp = &head->chain; (sk2=*skp)!=NULL; skp = &sk2->next) { - if(TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif)) - goto not_unique; - } - -unique: - /* Must record num and sport now. Otherwise we will see - * in hash table socket with a funny identity. */ - sk->num = lport; - sk->sport = htons(lport); - BUG_TRAP(sk->pprev==NULL); - if ((sk->next = *skp) != NULL) - (*skp)->pprev = &sk->next; - - *skp = sk; - sk->pprev = skp; - sk->hashent = hash; - sock_prot_inc_use(sk->prot); - write_unlock(&head->lock); - - if (twp) { - *twp = tw; - NET_INC_STATS_BH(TimeWaitRecycled); - } else if (tw) { - /* Silly. Should hash-dance instead... */ - tcp_tw_deschedule(tw); - tcp_timewait_kill(tw); - NET_INC_STATS_BH(TimeWaitRecycled); - - tcp_tw_put(tw); - } - - return 0; - -not_unique: - write_unlock(&head->lock); - return -EADDRNOTAVAIL; -#else - return 0; -#endif -} - -/* - * Bind a port for a connect operation and hash it. - */ -static int tcp_v4_hash_connect(struct sock *sk) -{ -#if 0 - unsigned short snum = sk->num; - struct tcp_bind_hashbucket *head; - struct tcp_bind_bucket *tb; - - if (snum == 0) { - int rover; - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int remaining = (high - low) + 1; - struct tcp_tw_bucket *tw = NULL; - - local_bh_disable(); - - /* TODO. Actually it is not so bad idea to remove - * tcp_portalloc_lock before next submission to Linus. - * As soon as we touch this place at all it is time to think. - * - * Now it protects single _advisory_ variable tcp_port_rover, - * hence it is mostly useless. - * Code will work nicely if we just delete it, but - * I am afraid in contented case it will work not better or - * even worse: another cpu just will hit the same bucket - * and spin there. - * So some cpu salt could remove both contention and - * memory pingpong. Any ideas how to do this in a nice way? - */ - spin_lock(&tcp_portalloc_lock); - rover = tcp_port_rover; - - do { - rover++; - if ((rover < low) || (rover > high)) - rover = low; - head = &tcp_bhash[tcp_bhashfn(rover)]; - spin_lock(&head->lock); - - /* Does not bother with rcv_saddr checks, - * because the established check is already - * unique enough. - */ - for (tb = head->chain; tb; tb = tb->next) { - if (tb->port == rover) { - BUG_TRAP(tb->owners != NULL); - if (tb->fastreuse >= 0) - goto next_port; - if (!__tcp_v4_check_established(sk, rover, &tw)) - goto ok; - goto next_port; - } - } - - tb = tcp_bucket_create(head, rover); - if (!tb) { - spin_unlock(&head->lock); - break; - } - tb->fastreuse = -1; - goto ok; - - next_port: - spin_unlock(&head->lock); - } while (--remaining > 0); - tcp_port_rover = rover; - spin_unlock(&tcp_portalloc_lock); - - local_bh_enable(); - - return -EADDRNOTAVAIL; - - ok: - /* All locks still held and bhs disabled */ - tcp_port_rover = rover; - spin_unlock(&tcp_portalloc_lock); - - tcp_bind_hash(sk, tb, rover); - if (!sk->pprev) { - sk->sport = htons(rover); - __tcp_v4_hash(sk, 0); - } - spin_unlock(&head->lock); - - if (tw) { - tcp_tw_deschedule(tw); - tcp_timewait_kill(tw); - tcp_tw_put(tw); - } - - local_bh_enable(); - return 0; - } - - head = &tcp_bhash[tcp_bhashfn(snum)]; - tb = (struct tcp_bind_bucket *)sk->prev; - spin_lock_bh(&head->lock); - if (tb->owners == sk && sk->bind_next == NULL) { - __tcp_v4_hash(sk, 0); - spin_unlock_bh(&head->lock); - return 0; - } else { - int ret; - spin_unlock(&head->lock); - /* No definite answer... Walk to established hash table */ - ret = __tcp_v4_check_established(sk, snum, NULL); - local_bh_enable(); - return ret; - } -#else - return 0; -#endif -} - -/* This will initiate an outgoing connection. */ -int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; - struct rtable *rt; - u32 daddr, nexthop; - int tmp; - int err; - - if (addr_len < sizeof(struct sockaddr_in)) - return(-EINVAL); - - if (usin->sin_family != AF_INET) - return(-EAFNOSUPPORT); - - nexthop = daddr = usin->sin_addr.s_addr; - if (sk->protinfo.af_inet.opt && sk->protinfo.af_inet.opt->srr) { - if (daddr == 0) - return -EINVAL; - nexthop = sk->protinfo.af_inet.opt->faddr; - } - - tmp = ip_route_connect(&rt, nexthop, sk->saddr, - RT_CONN_FLAGS(sk), sk->bound_dev_if); - if (tmp < 0) - return tmp; - - if (rt->rt_flags&(RTCF_MULTICAST|RTCF_BROADCAST)) { - ip_rt_put(rt); - return -ENETUNREACH; - } - - __sk_dst_set(sk, &rt->u.dst); - sk->route_caps = rt->u.dst.dev->features; - - if (!sk->protinfo.af_inet.opt || !sk->protinfo.af_inet.opt->srr) - daddr = rt->rt_dst; - - if (!sk->saddr) - sk->saddr = rt->rt_src; - sk->rcv_saddr = sk->saddr; - - if (tp->ts_recent_stamp && sk->daddr != daddr) { - /* Reset inherited state */ - tp->ts_recent = 0; - tp->ts_recent_stamp = 0; - tp->write_seq = 0; - } - - if (sysctl_tcp_tw_recycle && - !tp->ts_recent_stamp && - rt->rt_dst == daddr) { - struct inet_peer *peer = rt_get_peer(rt); - - /* VJ's idea. We save last timestamp seen from - * the destination in peer table, when entering state TIME-WAIT - * and initialize ts_recent from it, when trying new connection. - */ - - if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) { - tp->ts_recent_stamp = peer->tcp_ts_stamp; - tp->ts_recent = peer->tcp_ts; - } - } - - sk->dport = usin->sin_port; - sk->daddr = daddr; - - tp->ext_header_len = 0; - if (sk->protinfo.af_inet.opt) - tp->ext_header_len = sk->protinfo.af_inet.opt->optlen; - - tp->mss_clamp = 536; - - /* Socket identity is still unknown (sport may be zero). - * However we set state to SYN-SENT and not releasing socket - * lock select source port, enter ourselves into the hash tables and - * complete initalization after this. - */ - tcp_set_state(sk, TCP_SYN_SENT); - err = tcp_v4_hash_connect(sk); - if (err) - goto failure; - - if (!tp->write_seq) - tp->write_seq = secure_tcp_sequence_number(sk->saddr, sk->daddr, - sk->sport, usin->sin_port); - - sk->protinfo.af_inet.id = tp->write_seq^jiffies; - - err = tcp_connect(sk); - if (err) - goto failure; - - return 0; - -failure: - tcp_set_state(sk, TCP_CLOSE); - __sk_dst_reset(sk); - sk->route_caps = 0; - sk->dport = 0; - return err; -#else - return 0; -#endif -} - -static __inline__ int tcp_v4_iif(struct sk_buff *skb) -{ -#if 0 - return ((struct rtable*)skb->dst)->rt_iif; -#else - return 0; -#endif -} - -static __inline__ unsigned tcp_v4_synq_hash(u32 raddr, u16 rport) -{ -#if 0 - unsigned h = raddr ^ rport; - h ^= h>>16; - h ^= h>>8; - return h&(TCP_SYNQ_HSIZE-1); -#else - return 0; -#endif -} - -static struct open_request *tcp_v4_search_req(struct tcp_opt *tp, - struct open_request ***prevp, - __u16 rport, - __u32 raddr, __u32 laddr) -{ -#if 0 - struct tcp_listen_opt *lopt = tp->listen_opt; - struct open_request *req, **prev; - - for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport)]; - (req = *prev) != NULL; - prev = &req->dl_next) { - if (req->rmt_port == rport && - req->af.v4_req.rmt_addr == raddr && - req->af.v4_req.loc_addr == laddr && - TCP_INET_FAMILY(req->class->family)) { - BUG_TRAP(req->sk == NULL); - *prevp = prev; - return req; - } - } - - return NULL; -#else - return NULL; -#endif -} - -static void tcp_v4_synq_add(struct sock *sk, struct open_request *req) -{ -#if 0 - struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - struct tcp_listen_opt *lopt = tp->listen_opt; - unsigned h = tcp_v4_synq_hash(req->af.v4_req.rmt_addr, req->rmt_port); - - req->expires = jiffies + TCP_TIMEOUT_INIT; - req->retrans = 0; - req->sk = NULL; - req->dl_next = lopt->syn_table[h]; - - write_lock(&tp->syn_wait_lock); - lopt->syn_table[h] = req; - write_unlock(&tp->syn_wait_lock); - - tcp_synq_added(sk); -#endif -} - - -/* - * This routine does path mtu discovery as defined in RFC1191. - */ -static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *ip, unsigned mtu) -{ -#if 0 - struct dst_entry *dst; - struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - - /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs - * send out by Linux are always <576bytes so they should go through - * unfragmented). - */ - if (sk->state == TCP_LISTEN) - return; - - /* We don't check in the destentry if pmtu discovery is forbidden - * on this route. We just assume that no packet_to_big packets - * are send back when pmtu discovery is not active. - * There is a small race when the user changes this flag in the - * route, but I think that's acceptable. - */ - if ((dst = __sk_dst_check(sk, 0)) == NULL) - return; - - ip_rt_update_pmtu(dst, mtu); - - /* Something is about to be wrong... Remember soft error - * for the case, if this connection will not able to recover. - */ - if (mtu < dst->pmtu && ip_dont_fragment(sk, dst)) - sk->err_soft = EMSGSIZE; - - if (sk->protinfo.af_inet.pmtudisc != IP_PMTUDISC_DONT && - tp->pmtu_cookie > dst->pmtu) { - tcp_sync_mss(sk, dst->pmtu); - - /* Resend the TCP packet because it's - * clear that the old packet has been - * dropped. This is the new "fast" path mtu - * discovery. - */ - tcp_simple_retransmit(sk); - } /* else let the usual retransmit timer handle it */ -#endif -} - -/* - * This routine is called by the ICMP module when it gets some - * sort of error condition. If err < 0 then the socket should - * be closed and the error returned to the user. If err > 0 - * it's just the icmp type << 8 | icmp code. After adjustment - * header points to the first 8 bytes of the tcp header. We need - * to find the appropriate port. - * - * The locking strategy used here is very "optimistic". When - * someone else accesses the socket the ICMP is just dropped - * and for some paths there is no check at all. - * A more general error queue to queue errors for later handling - * is probably better. - * - */ - -void tcp_v4_err(struct sk_buff *skb, u32 info) -{ -#if 0 - struct iphdr *iph = (struct iphdr*)skb->data; - struct tcphdr *th = (struct tcphdr*)(skb->data+(iph->ihl<<2)); - struct tcp_opt *tp; - int type = skb->h.icmph->type; - int code = skb->h.icmph->code; - struct sock *sk; - __u32 seq; - int err; - - if (skb->len < (iph->ihl << 2) + 8) { - ICMP_INC_STATS_BH(IcmpInErrors); - return; - } - - sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source, tcp_v4_iif(skb)); - if (sk == NULL) { - ICMP_INC_STATS_BH(IcmpInErrors); - return; - } - if (sk->state == TCP_TIME_WAIT) { - tcp_tw_put((struct tcp_tw_bucket*)sk); - return; - } - - bh_lock_sock(sk); - /* If too many ICMPs get dropped on busy - * servers this needs to be solved differently. - */ - if (sk->lock.users != 0) - NET_INC_STATS_BH(LockDroppedIcmps); - - if (sk->state == TCP_CLOSE) - goto out; - - tp = &sk->tp_pinfo.af_tcp; - seq = ntohl(th->seq); - if (sk->state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) { - NET_INC_STATS(OutOfWindowIcmps); - goto out; - } - - switch (type) { - case ICMP_SOURCE_QUENCH: - /* This is deprecated, but if someone generated it, - * we have no reasons to ignore it. - */ - if (sk->lock.users == 0) - tcp_enter_cwr(tp); - goto out; - case ICMP_PARAMETERPROB: - err = EPROTO; - break; - case ICMP_DEST_UNREACH: - if (code > NR_ICMP_UNREACH) - goto out; - - if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ - if (sk->lock.users == 0) - do_pmtu_discovery(sk, iph, info); - goto out; - } - - err = icmp_err_convert[code].errno; - break; - case ICMP_TIME_EXCEEDED: - err = EHOSTUNREACH; - break; - default: - goto out; - } - - switch (sk->state) { - struct open_request *req, **prev; - case TCP_LISTEN: - if (sk->lock.users != 0) - goto out; - - req = tcp_v4_search_req(tp, &prev, - th->dest, - iph->daddr, iph->saddr); - if (!req) - goto out; - - /* ICMPs are not backlogged, hence we cannot get - an established socket here. - */ - BUG_TRAP(req->sk == NULL); - - if (seq != req->snt_isn) { - NET_INC_STATS_BH(OutOfWindowIcmps); - goto out; - } - - /* - * Still in SYN_RECV, just remove it silently. - * There is no good way to pass the error to the newly - * created socket, and POSIX does not want network - * errors returned from accept(). - */ - tcp_synq_drop(sk, req, prev); - goto out; - - case TCP_SYN_SENT: - case TCP_SYN_RECV: /* Cannot happen. - It can f.e. if SYNs crossed. - */ - if (sk->lock.users == 0) { - TCP_INC_STATS_BH(TcpAttemptFails); - sk->err = err; - - sk->error_report(sk); - - tcp_done(sk); - } else { - sk->err_soft = err; - } - goto out; - } - - /* If we've already connected we will keep trying - * until we time out, or the user gives up. - * - * rfc1122 4.2.3.9 allows to consider as hard errors - * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, - * but it is obsoleted by pmtu discovery). - * - * Note, that in modern internet, where routing is unreliable - * and in each dark corner broken firewalls sit, sending random - * errors ordered by their masters even this two messages finally lose - * their original sense (even Linux sends invalid PORT_UNREACHs) - * - * Now we are in compliance with RFCs. - * --ANK (980905) - */ - - if (sk->lock.users == 0 && sk->protinfo.af_inet.recverr) { - sk->err = err; - sk->error_report(sk); - } else { /* Only an error on timeout */ - sk->err_soft = err; - } - -out: - bh_unlock_sock(sk); - sock_put(sk); -#endif -} - -/* This routine computes an IPv4 TCP checksum. */ -void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, - struct sk_buff *skb) -{ -#if 0 - if (skb->ip_summed == CHECKSUM_HW) { - th->check = ~tcp_v4_check(th, len, sk->saddr, sk->daddr, 0); - skb->csum = offsetof(struct tcphdr, check); - } else { - th->check = tcp_v4_check(th, len, sk->saddr, sk->daddr, - csum_partial((char *)th, th->doff<<2, skb->csum)); - } -#endif -} - -/* - * This routine will send an RST to the other tcp. - * - * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.) - * for reset. - * Answer: if a packet caused RST, it is not for a socket - * existing in our system, if it is matched to a socket, - * it is just duplicate segment or bug in other side's TCP. - * So that we build reply only basing on parameters - * arrived with segment. - * Exception: precedence violation. We do not implement it in any case. - */ - -static void tcp_v4_send_reset(struct sk_buff *skb) -{ -#if 0 - struct tcphdr *th = skb->h.th; - struct tcphdr rth; - struct ip_reply_arg arg; - - /* Never send a reset in response to a reset. */ - if (th->rst) - return; - - if (((struct rtable*)skb->dst)->rt_type != RTN_LOCAL) - return; - - /* Swap the send and the receive. */ - memset(&rth, 0, sizeof(struct tcphdr)); - rth.dest = th->source; - rth.source = th->dest; - rth.doff = sizeof(struct tcphdr)/4; - rth.rst = 1; - - if (th->ack) { - rth.seq = th->ack_seq; - } else { - rth.ack = 1; - rth.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin - + skb->len - (th->doff<<2)); - } - - memset(&arg, 0, sizeof arg); - arg.iov[0].iov_base = (unsigned char *)&rth; - arg.iov[0].iov_len = sizeof rth; - arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, - skb->nh.iph->saddr, /*XXX*/ - sizeof(struct tcphdr), - IPPROTO_TCP, - 0); - arg.n_iov = 1; - arg.csumoffset = offsetof(struct tcphdr, check) / 2; - - tcp_socket->sk->protinfo.af_inet.ttl = sysctl_ip_default_ttl; - ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth); - - TCP_INC_STATS_BH(TcpOutSegs); - TCP_INC_STATS_BH(TcpOutRsts); -#endif -} - -/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states - outside socket context is ugly, certainly. What can I do? - */ - -static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts) -{ -#if 0 - struct tcphdr *th = skb->h.th; - struct { - struct tcphdr th; - u32 tsopt[3]; - } rep; - struct ip_reply_arg arg; - - memset(&rep.th, 0, sizeof(struct tcphdr)); - memset(&arg, 0, sizeof arg); - - arg.iov[0].iov_base = (unsigned char *)&rep; - arg.iov[0].iov_len = sizeof(rep.th); - arg.n_iov = 1; - if (ts) { - rep.tsopt[0] = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_TIMESTAMP << 8) | - TCPOLEN_TIMESTAMP); - rep.tsopt[1] = htonl(tcp_time_stamp); - rep.tsopt[2] = htonl(ts); - arg.iov[0].iov_len = sizeof(rep); - } - - /* Swap the send and the receive. */ - rep.th.dest = th->source; - rep.th.source = th->dest; - rep.th.doff = arg.iov[0].iov_len/4; - rep.th.seq = htonl(seq); - rep.th.ack_seq = htonl(ack); - rep.th.ack = 1; - rep.th.window = htons(win); - - arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, - skb->nh.iph->saddr, /*XXX*/ - arg.iov[0].iov_len, - IPPROTO_TCP, - 0); - arg.csumoffset = offsetof(struct tcphdr, check) / 2; - - ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len); - - TCP_INC_STATS_BH(TcpOutSegs); -#endif -} - -static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) -{ -#if 0 - struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; - - tcp_v4_send_ack(skb, tw->snd_nxt, tw->rcv_nxt, - tw->rcv_wnd>>tw->rcv_wscale, tw->ts_recent); - - tcp_tw_put(tw); -#endif -} - -static void tcp_v4_or_send_ack(struct sk_buff *skb, struct open_request *req) -{ -#if 0 - tcp_v4_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, - req->ts_recent); -#endif -} - -static struct dst_entry* tcp_v4_route_req(struct sock *sk, struct open_request *req) -{ -#if 0 - struct rtable *rt; - struct ip_options *opt; - - opt = req->af.v4_req.opt; - if(ip_route_output(&rt, ((opt && opt->srr) ? - opt->faddr : - req->af.v4_req.rmt_addr), - req->af.v4_req.loc_addr, - RT_CONN_FLAGS(sk), sk->bound_dev_if)) { - IP_INC_STATS_BH(IpOutNoRoutes); - return NULL; - } - if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) { - ip_rt_put(rt); - IP_INC_STATS_BH(IpOutNoRoutes); - return NULL; - } - return &rt->u.dst; -#else - return NULL; -#endif -} - -/* - * Send a SYN-ACK after having received an ACK. - * This still operates on a open_request only, not on a big - * socket. - */ -static int tcp_v4_send_synack(struct sock *sk, struct open_request *req, - struct dst_entry *dst) -{ -#if 0 - int err = -1; - struct sk_buff * skb; - - /* First, grab a route. */ - if (dst == NULL && - (dst = tcp_v4_route_req(sk, req)) == NULL) - goto out; - - skb = tcp_make_synack(sk, dst, req); - - if (skb) { - struct tcphdr *th = skb->h.th; - - th->check = tcp_v4_check(th, skb->len, - req->af.v4_req.loc_addr, req->af.v4_req.rmt_addr, - csum_partial((char *)th, skb->len, skb->csum)); - - err = ip_build_and_send_pkt(skb, sk, req->af.v4_req.loc_addr, - req->af.v4_req.rmt_addr, req->af.v4_req.opt); - if (err == NET_XMIT_CN) - err = 0; - } - -out: - dst_release(dst); - return err; -#else - return 0; -#endif -} - -/* - * IPv4 open_request destructor. - */ -static void tcp_v4_or_free(struct open_request *req) -{ -#if 0 - if (req->af.v4_req.opt) - kfree(req->af.v4_req.opt); -#endif -} - -static inline void syn_flood_warning(struct sk_buff *skb) -{ -#if 0 - static unsigned long warntime; - - if (jiffies - warntime > HZ*60) { - warntime = jiffies; - printk(KERN_INFO - "possible SYN flooding on port %d. Sending cookies.\n", - ntohs(skb->h.th->dest)); - } -#endif -} - -/* - * Save and compile IPv4 options into the open_request if needed. - */ -static inline struct ip_options * -tcp_v4_save_options(struct sock *sk, struct sk_buff *skb) -{ -#if 0 - struct ip_options *opt = &(IPCB(skb)->opt); - struct ip_options *dopt = NULL; - - if (opt && opt->optlen) { - int opt_size = optlength(opt); - dopt = kmalloc(opt_size, GFP_ATOMIC); - if (dopt) { - if (ip_options_echo(dopt, skb)) { - kfree(dopt); - dopt = NULL; - } - } - } - return dopt; -#else - return NULL; -#endif -} - -/* - * Maximum number of SYN_RECV sockets in queue per LISTEN socket. - * One SYN_RECV socket costs about 80bytes on a 32bit machine. - * It would be better to replace it with a global counter for all sockets - * but then some measure against one socket starving all other sockets - * would be needed. - * - * It was 128 by default. Experiments with real servers show, that - * it is absolutely not enough even at 100conn/sec. 256 cures most - * of problems. This value is adjusted to 128 for very small machines - * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb). - * Further increasing requires to change hash table size. - */ -int sysctl_max_syn_backlog = 256; - -#if 0 -struct or_calltable or_ipv4 = { - PF_INET, - tcp_v4_send_synack, - tcp_v4_or_send_ack, - tcp_v4_or_free, - tcp_v4_send_reset -}; -#endif - -int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) -{ -#if 0 - struct tcp_opt tp; - struct open_request *req; - __u32 saddr = skb->nh.iph->saddr; - __u32 daddr = skb->nh.iph->daddr; - __u32 isn = TCP_SKB_CB(skb)->when; - struct dst_entry *dst = NULL; -#ifdef CONFIG_SYN_COOKIES - int want_cookie = 0; -#else -#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */ -#endif - - /* Never answer to SYNs send to broadcast or multicast */ - if (((struct rtable *)skb->dst)->rt_flags & - (RTCF_BROADCAST|RTCF_MULTICAST)) - goto drop; - - /* TW buckets are converted to open requests without - * limitations, they conserve resources and peer is - * evidently real one. - */ - if (tcp_synq_is_full(sk) && !isn) { -#ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) { - want_cookie = 1; - } else -#endif - goto drop; - } - - /* Accept backlog is full. If we have already queued enough - * of warm entries in syn queue, drop request. It is better than - * clogging syn queue with openreqs with exponentially increasing - * timeout. - */ - if (tcp_acceptq_is_full(sk) && tcp_synq_young(sk) > 1) - goto drop; - - req = tcp_openreq_alloc(); - if (req == NULL) - goto drop; - - tcp_clear_options(&tp); - tp.mss_clamp = 536; - tp.user_mss = sk->tp_pinfo.af_tcp.user_mss; - - tcp_parse_options(skb, &tp, 0); - - if (want_cookie) { - tcp_clear_options(&tp); - tp.saw_tstamp = 0; - } - - if (tp.saw_tstamp && tp.rcv_tsval == 0) { - /* Some OSes (unknown ones, but I see them on web server, which - * contains information interesting only for windows' - * users) do not send their stamp in SYN. It is easy case. - * We simply do not advertise TS support. - */ - tp.saw_tstamp = 0; - tp.tstamp_ok = 0; - } - tp.tstamp_ok = tp.saw_tstamp; - - tcp_openreq_init(req, &tp, skb); - - req->af.v4_req.loc_addr = daddr; - req->af.v4_req.rmt_addr = saddr; - req->af.v4_req.opt = tcp_v4_save_options(sk, skb); - req->class = &or_ipv4; - if (!want_cookie) - TCP_ECN_create_request(req, skb->h.th); - - if (want_cookie) { -#ifdef CONFIG_SYN_COOKIES - syn_flood_warning(skb); -#endif - isn = cookie_v4_init_sequence(sk, skb, &req->mss); - } else if (isn == 0) { - struct inet_peer *peer = NULL; - - /* VJ's idea. We save last timestamp seen - * from the destination in peer table, when entering - * state TIME-WAIT, and check against it before - * accepting new connection request. - * - * If "isn" is not zero, this request hit alive - * timewait bucket, so that all the necessary checks - * are made in the function processing timewait state. - */ - if (tp.saw_tstamp && - sysctl_tcp_tw_recycle && - (dst = tcp_v4_route_req(sk, req)) != NULL && - (peer = rt_get_peer((struct rtable*)dst)) != NULL && - peer->v4daddr == saddr) { - if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL && - (s32)(peer->tcp_ts - req->ts_recent) > TCP_PAWS_WINDOW) { - NET_INC_STATS_BH(PAWSPassiveRejected); - dst_release(dst); - goto drop_and_free; - } - } - /* Kill the following clause, if you dislike this way. */ - else if (!sysctl_tcp_syncookies && - (sysctl_max_syn_backlog - tcp_synq_len(sk) - < (sysctl_max_syn_backlog>>2)) && - (!peer || !peer->tcp_ts_stamp) && - (!dst || !dst->rtt)) { - /* Without syncookies last quarter of - * backlog is filled with destinations, proven to be alive. - * It means that we continue to communicate - * to destinations, already remembered - * to the moment of synflood. - */ - NETDEBUG(if (net_ratelimit()) \ - printk(KERN_DEBUG "TCP: drop open request from %u.%u.%u.%u/%u\n", \ - NIPQUAD(saddr), ntohs(skb->h.th->source))); - dst_release(dst); - goto drop_and_free; - } - - isn = tcp_v4_init_sequence(sk, skb); - } - req->snt_isn = isn; - - if (tcp_v4_send_synack(sk, req, dst)) - goto drop_and_free; - - if (want_cookie) { - tcp_openreq_free(req); - } else { - tcp_v4_synq_add(sk, req); - } - return 0; - -drop_and_free: - tcp_openreq_free(req); -drop: - TCP_INC_STATS_BH(TcpAttemptFails); - return 0; -#else - return 0; -#endif -} - - -/* - * The three way handshake has completed - we got a valid synack - - * now create the new socket. - */ -struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, - struct open_request *req, - struct dst_entry *dst) -{ -#if 0 - struct tcp_opt *newtp; - struct sock *newsk; - - if (tcp_acceptq_is_full(sk)) - goto exit_overflow; - - if (dst == NULL && - (dst = tcp_v4_route_req(sk, req)) == NULL) - goto exit; - - newsk = tcp_create_openreq_child(sk, req, skb); - if (!newsk) - goto exit; - - newsk->dst_cache = dst; - newsk->route_caps = dst->dev->features; - - newtp = &(newsk->tp_pinfo.af_tcp); - newsk->daddr = req->af.v4_req.rmt_addr; - newsk->saddr = req->af.v4_req.loc_addr; - newsk->rcv_saddr = req->af.v4_req.loc_addr; - newsk->protinfo.af_inet.opt = req->af.v4_req.opt; - req->af.v4_req.opt = NULL; - newsk->protinfo.af_inet.mc_index = tcp_v4_iif(skb); - newsk->protinfo.af_inet.mc_ttl = skb->nh.iph->ttl; - newtp->ext_header_len = 0; - if (newsk->protinfo.af_inet.opt) - newtp->ext_header_len = newsk->protinfo.af_inet.opt->optlen; - newsk->protinfo.af_inet.id = newtp->write_seq^jiffies; - - tcp_sync_mss(newsk, dst->pmtu); - newtp->advmss = dst->advmss; - tcp_initialize_rcv_mss(newsk); - - __tcp_v4_hash(newsk, 0); - __tcp_inherit_port(sk, newsk); - - return newsk; - -exit_overflow: - NET_INC_STATS_BH(ListenOverflows); -exit: - NET_INC_STATS_BH(ListenDrops); - dst_release(dst); - return NULL; -#else - return NULL; -#endif -} - -static struct sock *tcp_v4_hnd_req(struct sock *sk,struct sk_buff *skb) -{ -#if 0 - struct open_request *req, **prev; - struct tcphdr *th = skb->h.th; - struct iphdr *iph = skb->nh.iph; - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct sock *nsk; - - /* Find possible connection requests. */ - req = tcp_v4_search_req(tp, &prev, - th->source, - iph->saddr, iph->daddr); - if (req) - return tcp_check_req(sk, skb, req, prev); - - nsk = __tcp_v4_lookup_established(skb->nh.iph->saddr, - th->source, - skb->nh.iph->daddr, - ntohs(th->dest), - tcp_v4_iif(skb)); - - if (nsk) { - if (nsk->state != TCP_TIME_WAIT) { - bh_lock_sock(nsk); - return nsk; - } - tcp_tw_put((struct tcp_tw_bucket*)nsk); - return NULL; - } - -#ifdef CONFIG_SYN_COOKIES - if (!th->rst && !th->syn && th->ack) - sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); -#endif - return sk; -#else - return NULL; -#endif -} - -static int tcp_v4_checksum_init(struct sk_buff *skb) -{ -#if 0 - if (skb->ip_summed == CHECKSUM_HW) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - if (!tcp_v4_check(skb->h.th,skb->len,skb->nh.iph->saddr, - skb->nh.iph->daddr,skb->csum)) - return 0; - - NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "hw tcp v4 csum failed\n")); - skb->ip_summed = CHECKSUM_NONE; - } - if (skb->len <= 76) { - if (tcp_v4_check(skb->h.th,skb->len,skb->nh.iph->saddr, - skb->nh.iph->daddr, - skb_checksum(skb, 0, skb->len, 0))) - return -1; - skb->ip_summed = CHECKSUM_UNNECESSARY; - } else { - skb->csum = ~tcp_v4_check(skb->h.th,skb->len,skb->nh.iph->saddr, - skb->nh.iph->daddr,0); - } - return 0; -#else - return 0; -#endif -} - - -/* The socket must have it's spinlock held when we get - * here. - * - * We have a potential double-lock case here, so even when - * doing backlog processing we use the BH locking scheme. - * This is because we cannot sleep with the original spinlock - * held. - */ -int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) -{ -#if 0 -#ifdef CONFIG_FILTER - struct sk_filter *filter = sk->filter; - if (filter && sk_filter(skb, filter)) - goto discard; -#endif /* CONFIG_FILTER */ - - IP_INC_STATS_BH(IpInDelivers); - - if (sk->state == TCP_ESTABLISHED) { /* Fast path */ - TCP_CHECK_TIMER(sk); - if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) - goto reset; - TCP_CHECK_TIMER(sk); - return 0; - } - - if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb)) - goto csum_err; - - if (sk->state == TCP_LISTEN) { - struct sock *nsk = tcp_v4_hnd_req(sk, skb); - if (!nsk) - goto discard; - - if (nsk != sk) { - if (tcp_child_process(sk, nsk, skb)) - goto reset; - return 0; - } - } - - TCP_CHECK_TIMER(sk); - if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) - goto reset; - TCP_CHECK_TIMER(sk); - return 0; - -reset: - tcp_v4_send_reset(skb); -discard: - kfree_skb(skb); - /* Be careful here. If this function gets more complicated and - * gcc suffers from register pressure on the x86, sk (in %ebx) - * might be destroyed here. This current version compiles correctly, - * but you have been warned. - */ - return 0; - -csum_err: - TCP_INC_STATS_BH(TcpInErrs); - goto discard; -#else - return 0; -#endif -} - -/* - * From tcp_input.c - */ - -int tcp_v4_rcv(struct sk_buff *skb) -{ -#if 0 - struct tcphdr *th; - struct sock *sk; - int ret; - - if (skb->pkt_type!=PACKET_HOST) - goto discard_it; - - /* Count it even if it's bad */ - TCP_INC_STATS_BH(TcpInSegs); - - if (!pskb_may_pull(skb, sizeof(struct tcphdr))) - goto discard_it; - - th = skb->h.th; - - if (th->doff < sizeof(struct tcphdr)/4) - goto bad_packet; - if (!pskb_may_pull(skb, th->doff*4)) - goto discard_it; - - /* An explanation is required here, I think. - * Packet length and doff are validated by header prediction, - * provided case of th->doff==0 is elimineted. - * So, we defer the checks. */ - if ((skb->ip_summed != CHECKSUM_UNNECESSARY && - tcp_v4_checksum_init(skb) < 0)) - goto bad_packet; - - th = skb->h.th; - TCP_SKB_CB(skb)->seq = ntohl(th->seq); - TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + - skb->len - th->doff*4); - TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); - TCP_SKB_CB(skb)->when = 0; - TCP_SKB_CB(skb)->flags = skb->nh.iph->tos; - TCP_SKB_CB(skb)->sacked = 0; - - sk = __tcp_v4_lookup(skb->nh.iph->saddr, th->source, - skb->nh.iph->daddr, ntohs(th->dest), tcp_v4_iif(skb)); - - if (!sk) - goto no_tcp_socket; - -process: - if(!ipsec_sk_policy(sk,skb)) - goto discard_and_relse; - - if (sk->state == TCP_TIME_WAIT) - goto do_time_wait; - - skb->dev = NULL; - - bh_lock_sock(sk); - ret = 0; - if (!sk->lock.users) { - if (!tcp_prequeue(sk, skb)) - ret = tcp_v4_do_rcv(sk, skb); - } else - sk_add_backlog(sk, skb); - bh_unlock_sock(sk); - - sock_put(sk); - - return ret; - -no_tcp_socket: - if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { -bad_packet: - TCP_INC_STATS_BH(TcpInErrs); - } else { - tcp_v4_send_reset(skb); - } - -discard_it: - /* Discard frame. */ - kfree_skb(skb); - return 0; - -discard_and_relse: - sock_put(sk); - goto discard_it; - -do_time_wait: - if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { - TCP_INC_STATS_BH(TcpInErrs); - goto discard_and_relse; - } - switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk, - skb, th, skb->len)) { - case TCP_TW_SYN: - { - struct sock *sk2; - - sk2 = tcp_v4_lookup_listener(skb->nh.iph->daddr, ntohs(th->dest), tcp_v4_iif(skb)); - if (sk2 != NULL) { - tcp_tw_deschedule((struct tcp_tw_bucket *)sk); - tcp_timewait_kill((struct tcp_tw_bucket *)sk); - tcp_tw_put((struct tcp_tw_bucket *)sk); - sk = sk2; - goto process; - } - /* Fall through to ACK */ - } - case TCP_TW_ACK: - tcp_v4_timewait_ack(sk, skb); - break; - case TCP_TW_RST: - goto no_tcp_socket; - case TCP_TW_SUCCESS:; - } - goto discard_it; -#endif -} - -/* With per-bucket locks this operation is not-atomic, so that - * this version is not worse. - */ -static void __tcp_v4_rehash(struct sock *sk) -{ -#if 0 - sk->prot->unhash(sk); - sk->prot->hash(sk); -#endif -} - -static int tcp_v4_reselect_saddr(struct sock *sk) -{ -#if 0 - int err; - struct rtable *rt; - __u32 old_saddr = sk->saddr; - __u32 new_saddr; - __u32 daddr = sk->daddr; - - if(sk->protinfo.af_inet.opt && sk->protinfo.af_inet.opt->srr) - daddr = sk->protinfo.af_inet.opt->faddr; - - /* Query new route. */ - err = ip_route_connect(&rt, daddr, 0, - RT_TOS(sk->protinfo.af_inet.tos)|sk->localroute, - sk->bound_dev_if); - if (err) - return err; - - __sk_dst_set(sk, &rt->u.dst); - sk->route_caps = rt->u.dst.dev->features; - - new_saddr = rt->rt_src; - - if (new_saddr == old_saddr) - return 0; - - if (sysctl_ip_dynaddr > 1) { - printk(KERN_INFO "tcp_v4_rebuild_header(): shifting sk->saddr " - "from %d.%d.%d.%d to %d.%d.%d.%d\n", - NIPQUAD(old_saddr), - NIPQUAD(new_saddr)); - } - - sk->saddr = new_saddr; - sk->rcv_saddr = new_saddr; - - /* XXX The only one ugly spot where we need to - * XXX really change the sockets identity after - * XXX it has entered the hashes. -DaveM - * - * Besides that, it does not check for connection - * uniqueness. Wait for troubles. - */ - __tcp_v4_rehash(sk); - return 0; -#else - return 0; -#endif -} - -int tcp_v4_rebuild_header(struct sock *sk) -{ -#if 0 - struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); - u32 daddr; - int err; - - /* Route is OK, nothing to do. */ - if (rt != NULL) - return 0; - - /* Reroute. */ - daddr = sk->daddr; - if(sk->protinfo.af_inet.opt && sk->protinfo.af_inet.opt->srr) - daddr = sk->protinfo.af_inet.opt->faddr; - - err = ip_route_output(&rt, daddr, sk->saddr, - RT_CONN_FLAGS(sk), sk->bound_dev_if); - if (!err) { - __sk_dst_set(sk, &rt->u.dst); - sk->route_caps = rt->u.dst.dev->features; - return 0; - } - - /* Routing failed... */ - sk->route_caps = 0; - - if (!sysctl_ip_dynaddr || - sk->state != TCP_SYN_SENT || - (sk->userlocks & SOCK_BINDADDR_LOCK) || - (err = tcp_v4_reselect_saddr(sk)) != 0) - sk->err_soft=-err; - - return err; -#else - return 0; -#endif -} - -static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) -{ -#if 0 - struct sockaddr_in *sin = (struct sockaddr_in *) uaddr; - - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = sk->daddr; - sin->sin_port = sk->dport; -#endif -} - -/* VJ's idea. Save last timestamp seen from this destination - * and hold it at least for normal timewait interval to use for duplicate - * segment detection in subsequent connections, before they enter synchronized - * state. - */ - -int tcp_v4_remember_stamp(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - struct rtable *rt = (struct rtable*)__sk_dst_get(sk); - struct inet_peer *peer = NULL; - int release_it = 0; - - if (rt == NULL || rt->rt_dst != sk->daddr) { - peer = inet_getpeer(sk->daddr, 1); - release_it = 1; - } else { - if (rt->peer == NULL) - rt_bind_peer(rt, 1); - peer = rt->peer; - } - - if (peer) { - if ((s32)(peer->tcp_ts - tp->ts_recent) <= 0 || - (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec && - peer->tcp_ts_stamp <= tp->ts_recent_stamp)) { - peer->tcp_ts_stamp = tp->ts_recent_stamp; - peer->tcp_ts = tp->ts_recent; - } - if (release_it) - inet_putpeer(peer); - return 1; - } - - return 0; -#else - return 0; -#endif -} - -int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw) -{ -#if 0 - struct inet_peer *peer = NULL; - - peer = inet_getpeer(tw->daddr, 1); - - if (peer) { - if ((s32)(peer->tcp_ts - tw->ts_recent) <= 0 || - (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec && - peer->tcp_ts_stamp <= tw->ts_recent_stamp)) { - peer->tcp_ts_stamp = tw->ts_recent_stamp; - peer->tcp_ts = tw->ts_recent; - } - inet_putpeer(peer); - return 1; - } - - return 0; -#else - return 0; -#endif -} - -#if 0 -struct tcp_func ipv4_specific = { - ip_queue_xmit, - tcp_v4_send_check, - tcp_v4_rebuild_header, - tcp_v4_conn_request, - tcp_v4_syn_recv_sock, - tcp_v4_remember_stamp, - sizeof(struct iphdr), - - ip_setsockopt, - ip_getsockopt, - v4_addr2sockaddr, - sizeof(struct sockaddr_in) -}; -#endif - -/* NOTE: A lot of things set to zero explicitly by call to - * sk_alloc() so need not be done here. - */ -static int tcp_v4_init_sock(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - - skb_queue_head_init(&tp->out_of_order_queue); - tcp_init_xmit_timers(sk); - tcp_prequeue_init(tp); - - tp->rto = TCP_TIMEOUT_INIT; - tp->mdev = TCP_TIMEOUT_INIT; - - /* So many TCP implementations out there (incorrectly) count the - * initial SYN frame in their delayed-ACK and congestion control - * algorithms that we must have the following bandaid to talk - * efficiently to them. -DaveM - */ - tp->snd_cwnd = 2; - - /* See draft-stevens-tcpca-spec-01 for discussion of the - * initialization of these values. - */ - tp->snd_ssthresh = 0x7fffffff; /* Infinity */ - tp->snd_cwnd_clamp = ~0; - tp->mss_cache = 536; - - tp->reordering = sysctl_tcp_reordering; - - sk->state = TCP_CLOSE; - - sk->write_space = tcp_write_space; - sk->use_write_queue = 1; - - sk->tp_pinfo.af_tcp.af_specific = &ipv4_specific; - - sk->sndbuf = sysctl_tcp_wmem[1]; - sk->rcvbuf = sysctl_tcp_rmem[1]; - - atomic_inc(&tcp_sockets_allocated); - - return 0; -#else - return 0; -#endif -} - -static int tcp_v4_destroy_sock(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - - tcp_clear_xmit_timers(sk); - - /* Cleanup up the write buffer. */ - tcp_writequeue_purge(sk); - - /* Cleans up our, hopefully empty, out_of_order_queue. */ - __skb_queue_purge(&tp->out_of_order_queue); - - /* Clean prequeue, it must be empty really */ - __skb_queue_purge(&tp->ucopy.prequeue); - - /* Clean up a referenced TCP bind bucket. */ - if(sk->prev != NULL) - tcp_put_port(sk); - - /* If sendmsg cached page exists, toss it. */ - if (tp->sndmsg_page != NULL) - __free_page(tp->sndmsg_page); - - atomic_dec(&tcp_sockets_allocated); - - return 0; -#else - return 0; -#endif -} - -/* Proc filesystem TCP sock list dumping. */ -static void get_openreq(struct sock *sk, struct open_request *req, char *tmpbuf, int i, int uid) -{ -#if 0 - int ttd = req->expires - jiffies; - - sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08X %08X %5d %8d %u %d %p", - i, - req->af.v4_req.loc_addr, - ntohs(sk->sport), - req->af.v4_req.rmt_addr, - ntohs(req->rmt_port), - TCP_SYN_RECV, - 0,0, /* could print option size, but that is af dependent. */ - 1, /* timers active (only the expire timer) */ - ttd, - req->retrans, - uid, - 0, /* non standard timer */ - 0, /* open_requests have no inode */ - atomic_read(&sk->refcnt), - req - ); -#endif -} - -static void get_tcp_sock(struct sock *sp, char *tmpbuf, int i) -{ -#if 0 - unsigned int dest, src; - __u16 destp, srcp; - int timer_active; - unsigned long timer_expires; - struct tcp_opt *tp = &sp->tp_pinfo.af_tcp; - - dest = sp->daddr; - src = sp->rcv_saddr; - destp = ntohs(sp->dport); - srcp = ntohs(sp->sport); - if (tp->pending == TCP_TIME_RETRANS) { - timer_active = 1; - timer_expires = tp->timeout; - } else if (tp->pending == TCP_TIME_PROBE0) { - timer_active = 4; - timer_expires = tp->timeout; - } else if (timer_pending(&sp->timer)) { - timer_active = 2; - timer_expires = sp->timer.expires; - } else { - timer_active = 0; - timer_expires = jiffies; - } - - sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d", - i, src, srcp, dest, destp, sp->state, - tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq, - timer_active, timer_expires-jiffies, - tp->retransmits, - sock_i_uid(sp), - tp->probes_out, - sock_i_ino(sp), - atomic_read(&sp->refcnt), sp, - tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong, - tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh - ); -#endif -} - -static void get_timewait_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i) -{ -#if 0 - unsigned int dest, src; - __u16 destp, srcp; - int ttd = tw->ttd - jiffies; - - if (ttd < 0) - ttd = 0; - - dest = tw->daddr; - src = tw->rcv_saddr; - destp = ntohs(tw->dport); - srcp = ntohs(tw->sport); - - sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08X %08X %5d %8d %d %d %p", - i, src, srcp, dest, destp, tw->substate, 0, 0, - 3, ttd, 0, 0, 0, 0, - atomic_read(&tw->refcnt), tw); -#endif -} - -#define TMPSZ 150 - -int tcp_get_info(char *buffer, char **start, off_t offset, int length) -{ -#if 0 - int len = 0, num = 0, i; - off_t begin, pos = 0; - char tmpbuf[TMPSZ+1]; - - if (offset < TMPSZ) - len += sprintf(buffer, "%-*s\n", TMPSZ-1, - " sl local_address rem_address st tx_queue " - "rx_queue tr tm->when retrnsmt uid timeout inode"); - - pos = TMPSZ; - - /* First, walk listening socket table. */ - tcp_listen_lock(); - for(i = 0; i < TCP_LHTABLE_SIZE; i++) { - struct sock *sk; - struct tcp_listen_opt *lopt; - int k; - - for (sk = tcp_listening_hash[i]; sk; sk = sk->next, num++) { - struct open_request *req; - int uid; - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - - if (!TCP_INET_FAMILY(sk->family)) - goto skip_listen; - - pos += TMPSZ; - if (pos >= offset) { - get_tcp_sock(sk, tmpbuf, num); - len += sprintf(buffer+len, "%-*s\n", TMPSZ-1, tmpbuf); - if (pos >= offset + length) { - tcp_listen_unlock(); - goto out_no_bh; - } - } - -skip_listen: - uid = sock_i_uid(sk); - read_lock_bh(&tp->syn_wait_lock); - lopt = tp->listen_opt; - if (lopt && lopt->qlen != 0) { - for (k=0; ksyn_table[k]; req; req = req->dl_next, num++) { - if (!TCP_INET_FAMILY(req->class->family)) - continue; - - pos += TMPSZ; - if (pos <= offset) - continue; - get_openreq(sk, req, tmpbuf, num, uid); - len += sprintf(buffer+len, "%-*s\n", TMPSZ-1, tmpbuf); - if (pos >= offset + length) { - read_unlock_bh(&tp->syn_wait_lock); - tcp_listen_unlock(); - goto out_no_bh; - } - } - } - } - read_unlock_bh(&tp->syn_wait_lock); - - /* Completed requests are in normal socket hash table */ - } - } - tcp_listen_unlock(); - - local_bh_disable(); - - /* Next, walk established hash chain. */ - for (i = 0; i < tcp_ehash_size; i++) { - struct tcp_ehash_bucket *head = &tcp_ehash[i]; - struct sock *sk; - struct tcp_tw_bucket *tw; - - read_lock(&head->lock); - for(sk = head->chain; sk; sk = sk->next, num++) { - if (!TCP_INET_FAMILY(sk->family)) - continue; - pos += TMPSZ; - if (pos <= offset) - continue; - get_tcp_sock(sk, tmpbuf, num); - len += sprintf(buffer+len, "%-*s\n", TMPSZ-1, tmpbuf); - if (pos >= offset + length) { - read_unlock(&head->lock); - goto out; - } - } - for (tw = (struct tcp_tw_bucket *)tcp_ehash[i+tcp_ehash_size].chain; - tw != NULL; - tw = (struct tcp_tw_bucket *)tw->next, num++) { - if (!TCP_INET_FAMILY(tw->family)) - continue; - pos += TMPSZ; - if (pos <= offset) - continue; - get_timewait_sock(tw, tmpbuf, num); - len += sprintf(buffer+len, "%-*s\n", TMPSZ-1, tmpbuf); - if (pos >= offset + length) { - read_unlock(&head->lock); - goto out; - } - } - read_unlock(&head->lock); - } - -out: - local_bh_enable(); -out_no_bh: - - begin = len - (pos - offset); - *start = buffer + begin; - len -= begin; - if (len > length) - len = length; - if (len < 0) - len = 0; - return len; -#endif -} - -struct proto tcp_prot = { - name: "TCP", - close: tcp_close, - connect: tcp_v4_connect, - disconnect: tcp_disconnect, - accept: tcp_accept, - ioctl: tcp_ioctl, - init: tcp_v4_init_sock, - destroy: tcp_v4_destroy_sock, - shutdown: tcp_shutdown, - setsockopt: tcp_setsockopt, - getsockopt: tcp_getsockopt, - sendmsg: tcp_sendmsg, - recvmsg: tcp_recvmsg, - backlog_rcv: tcp_v4_do_rcv, - hash: tcp_v4_hash, - unhash: tcp_unhash, - get_port: tcp_v4_get_port, -}; - - - -void tcp_v4_init(struct net_proto_family *ops) -{ -#if 0 - int err; - - tcp_inode.i_mode = S_IFSOCK; - tcp_inode.i_sock = 1; - tcp_inode.i_uid = 0; - tcp_inode.i_gid = 0; - init_waitqueue_head(&tcp_inode.i_wait); - init_waitqueue_head(&tcp_inode.u.socket_i.wait); - - tcp_socket->inode = &tcp_inode; - tcp_socket->state = SS_UNCONNECTED; - tcp_socket->type=SOCK_RAW; - - if ((err=ops->create(tcp_socket, IPPROTO_TCP))<0) - panic("Failed to create the TCP control socket.\n"); - tcp_socket->sk->allocation=GFP_ATOMIC; - tcp_socket->sk->protinfo.af_inet.ttl = MAXTTL; - - /* Unhash it so that IP input processing does not even - * see it, we do not wish this socket to see incoming - * packets. - */ - tcp_socket->sk->prot->unhash(tcp_socket->sk); -#endif -} diff --git a/reactos/drivers/net/tcpip/transport/tcp/tcp_output.c b/reactos/drivers/net/tcpip/transport/tcp/tcp_output.c deleted file mode 100755 index c85c2aca940..00000000000 --- a/reactos/drivers/net/tcpip/transport/tcp/tcp_output.c +++ /dev/null @@ -1,1549 +0,0 @@ -/* - * COPYRIGHT: See COPYING in the top level directory - * PROJECT: ReactOS TCP/IP protocol driver - * FILE: transport/tcp/tcp_output.c - * PURPOSE: Transmission Control Protocol - * PROGRAMMERS: Casper S. Hornstrup (chorns@users.sourceforge.net) - * REVISIONS: - * CSH 15-01-2003 Imported from linux kernel 2.4.20 - */ - -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Implementation of the Transmission Control Protocol(TCP). - * - * Version: $Id: tcp_output.c,v 1.2 2003/07/24 18:14:59 royce Exp $ - * - * Authors: Ross Biro, - * Fred N. van Kempen, - * Mark Evans, - * Corey Minyard - * Florian La Roche, - * Charles Hedrick, - * Linus Torvalds, - * Alan Cox, - * Matthew Dillon, - * Arnt Gulbrandsen, - * Jorge Cwik, - */ - -/* - * Changes: Pedro Roque : Retransmit queue handled by TCP. - * : Fragmentation on mtu decrease - * : Segment collapse on retransmit - * : AF independence - * - * Linus Torvalds : send_delayed_ack - * David S. Miller : Charge memory using the right skb - * during syn/ack processing. - * David S. Miller : Output engine completely rewritten. - * Andrea Arcangeli: SYNACK carry ts_recent in tsecr. - * Cacophonix Gaul : draft-minshall-nagle-01 - * J Hadi Salim : ECN support - * - */ - -#if 0 -#include - -#include -#include -#else -#include "linux.h" -#include "tcpcore.h" -#endif - -/* People can turn this off for buggy TCP's found in printers etc. */ -int sysctl_tcp_retrans_collapse = 1; - -static __inline__ -void update_send_head(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb) -{ - tp->send_head = skb->next; - if (tp->send_head == (struct sk_buff *) &sk->write_queue) - tp->send_head = NULL; - tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; - if (tp->packets_out++ == 0) - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); -} - -/* SND.NXT, if window was not shrunk. - * If window has been shrunk, what should we make? It is not clear at all. - * Using SND.UNA we will fail to open window, SND.NXT is out of window. :-( - * Anything in between SND.UNA...SND.UNA+SND.WND also can be already - * invalid. OK, let's make this for now: - */ -static __inline__ __u32 tcp_acceptable_seq(struct sock *sk, struct tcp_opt *tp) -{ - if (!before(tp->snd_una+tp->snd_wnd, tp->snd_nxt)) - return tp->snd_nxt; - else - return tp->snd_una+tp->snd_wnd; -} - -/* Calculate mss to advertise in SYN segment. - * RFC1122, RFC1063, draft-ietf-tcpimpl-pmtud-01 state that: - * - * 1. It is independent of path mtu. - * 2. Ideally, it is maximal possible segment size i.e. 65535-40. - * 3. For IPv4 it is reasonable to calculate it from maximal MTU of - * attached devices, because some buggy hosts are confused by - * large MSS. - * 4. We do not make 3, we advertise MSS, calculated from first - * hop device mtu, but allow to raise it to ip_rt_min_advmss. - * This may be overriden via information stored in routing table. - * 5. Value 65535 for MSS is valid in IPv6 and means "as large as possible, - * probably even Jumbo". - */ -static __u16 tcp_advertise_mss(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct dst_entry *dst = __sk_dst_get(sk); - int mss = tp->advmss; - - if (dst && dst->advmss < mss) { - mss = dst->advmss; - tp->advmss = mss; - } - - return (__u16)mss; -#else - return 0; -#endif -} - -/* RFC2861. Reset CWND after idle period longer RTO to "restart window". - * This is the first part of cwnd validation mechanism. */ -static void tcp_cwnd_restart(struct tcp_opt *tp) -{ -#if 0 - s32 delta = tcp_time_stamp - tp->lsndtime; - u32 restart_cwnd = tcp_init_cwnd(tp); - u32 cwnd = tp->snd_cwnd; - - tp->snd_ssthresh = tcp_current_ssthresh(tp); - restart_cwnd = min(restart_cwnd, cwnd); - - while ((delta -= tp->rto) > 0 && cwnd > restart_cwnd) - cwnd >>= 1; - tp->snd_cwnd = max(cwnd, restart_cwnd); - tp->snd_cwnd_stamp = tcp_time_stamp; - tp->snd_cwnd_used = 0; -#endif -} - -static __inline__ void tcp_event_data_sent(struct tcp_opt *tp, struct sk_buff *skb) -{ -#if 0 - u32 now = tcp_time_stamp; - - if (!tp->packets_out && (s32)(now - tp->lsndtime) > tp->rto) - tcp_cwnd_restart(tp); - - tp->lsndtime = now; - - /* If it is a reply for ato after last received - * packet, enter pingpong mode. - */ - if ((u32)(now - tp->ack.lrcvtime) < tp->ack.ato) - tp->ack.pingpong = 1; -#endif -} - -static __inline__ void tcp_event_ack_sent(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - - tcp_dec_quickack_mode(tp); - tcp_clear_xmit_timer(sk, TCP_TIME_DACK); -#endif -} - -/* Chose a new window to advertise, update state in tcp_opt for the - * socket, and return result with RFC1323 scaling applied. The return - * value can be stuffed directly into th->window for an outgoing - * frame. - */ -static __inline__ u16 tcp_select_window(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - u32 cur_win = tcp_receive_window(tp); - u32 new_win = __tcp_select_window(sk); - - /* Never shrink the offered window */ - if(new_win < cur_win) { - /* Danger Will Robinson! - * Don't update rcv_wup/rcv_wnd here or else - * we will not be able to advertise a zero - * window in time. --DaveM - * - * Relax Will Robinson. - */ - new_win = cur_win; - } - tp->rcv_wnd = new_win; - tp->rcv_wup = tp->rcv_nxt; - - /* RFC1323 scaling applied */ - new_win >>= tp->rcv_wscale; - - /* If we advertise zero window, disable fast path. */ - if (new_win == 0) - tp->pred_flags = 0; - - return new_win; -#else - return 0; -#endif -} - - -/* This routine actually transmits TCP packets queued in by - * tcp_do_sendmsg(). This is used by both the initial - * transmission and possible later retransmissions. - * All SKB's seen here are completely headerless. It is our - * job to build the TCP header, and pass the packet down to - * IP so it can do the same plus pass the packet off to the - * device. - * - * We are working here with either a clone of the original - * SKB, or a fresh unique copy made by the retransmit engine. - */ -int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) -{ -#if 0 - if(skb != NULL) { - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); - int tcp_header_size = tp->tcp_header_len; - struct tcphdr *th; - int sysctl_flags; - int err; - -#define SYSCTL_FLAG_TSTAMPS 0x1 -#define SYSCTL_FLAG_WSCALE 0x2 -#define SYSCTL_FLAG_SACK 0x4 - - sysctl_flags = 0; - if (tcb->flags & TCPCB_FLAG_SYN) { - tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS; - if(sysctl_tcp_timestamps) { - tcp_header_size += TCPOLEN_TSTAMP_ALIGNED; - sysctl_flags |= SYSCTL_FLAG_TSTAMPS; - } - if(sysctl_tcp_window_scaling) { - tcp_header_size += TCPOLEN_WSCALE_ALIGNED; - sysctl_flags |= SYSCTL_FLAG_WSCALE; - } - if(sysctl_tcp_sack) { - sysctl_flags |= SYSCTL_FLAG_SACK; - if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS)) - tcp_header_size += TCPOLEN_SACKPERM_ALIGNED; - } - } else if (tp->eff_sacks) { - /* A SACK is 2 pad bytes, a 2 byte header, plus - * 2 32-bit sequence numbers for each SACK block. - */ - tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED + - (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK)); - } - th = (struct tcphdr *) skb_push(skb, tcp_header_size); - skb->h.th = th; - skb_set_owner_w(skb, sk); - - /* Build TCP header and checksum it. */ - th->source = sk->sport; - th->dest = sk->dport; - th->seq = htonl(tcb->seq); - th->ack_seq = htonl(tp->rcv_nxt); - *(((__u16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | tcb->flags); - if (tcb->flags & TCPCB_FLAG_SYN) { - /* RFC1323: The window in SYN & SYN/ACK segments - * is never scaled. - */ - th->window = htons(tp->rcv_wnd); - } else { - th->window = htons(tcp_select_window(sk)); - } - th->check = 0; - th->urg_ptr = 0; - - if (tp->urg_mode && - between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF)) { - th->urg_ptr = htons(tp->snd_up-tcb->seq); - th->urg = 1; - } - - if (tcb->flags & TCPCB_FLAG_SYN) { - tcp_syn_build_options((__u32 *)(th + 1), - tcp_advertise_mss(sk), - (sysctl_flags & SYSCTL_FLAG_TSTAMPS), - (sysctl_flags & SYSCTL_FLAG_SACK), - (sysctl_flags & SYSCTL_FLAG_WSCALE), - tp->rcv_wscale, - tcb->when, - tp->ts_recent); - } else { - tcp_build_and_update_options((__u32 *)(th + 1), - tp, tcb->when); - - TCP_ECN_send(sk, tp, skb, tcp_header_size); - } - tp->af_specific->send_check(sk, th, skb->len, skb); - - if (tcb->flags & TCPCB_FLAG_ACK) - tcp_event_ack_sent(sk); - - if (skb->len != tcp_header_size) - tcp_event_data_sent(tp, skb); - - TCP_INC_STATS(TcpOutSegs); - - err = tp->af_specific->queue_xmit(skb); - if (err <= 0) - return err; - - tcp_enter_cwr(tp); - - /* NET_XMIT_CN is special. It does not guarantee, - * that this packet is lost. It tells that device - * is about to start to drop packets or already - * drops some packets of the same priority and - * invokes us to send less aggressively. - */ - return err == NET_XMIT_CN ? 0 : err; - } - return -ENOBUFS; -#undef SYSCTL_FLAG_TSTAMPS -#undef SYSCTL_FLAG_WSCALE -#undef SYSCTL_FLAG_SACK -#else - return 0; -#endif -} - - -/* This is the main buffer sending routine. We queue the buffer - * and decide whether to queue or transmit now. - * - * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames, - * otherwise socket can stall. - */ -void tcp_send_skb(struct sock *sk, struct sk_buff *skb, int force_queue, unsigned cur_mss) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - - /* Advance write_seq and place onto the write_queue. */ - tp->write_seq = TCP_SKB_CB(skb)->end_seq; - __skb_queue_tail(&sk->write_queue, skb); - tcp_charge_skb(sk, skb); - - if (!force_queue && tp->send_head == NULL && tcp_snd_test(tp, skb, cur_mss, tp->nonagle)) { - /* Send it out now. */ - TCP_SKB_CB(skb)->when = tcp_time_stamp; - if (tcp_transmit_skb(sk, skb_clone(skb, sk->allocation)) == 0) { - tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; - tcp_minshall_update(tp, cur_mss, skb); - if (tp->packets_out++ == 0) - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); - return; - } - } - /* Queue it, remembering where we must start sending. */ - if (tp->send_head == NULL) - tp->send_head = skb; -#endif -} - -/* Send _single_ skb sitting at the send head. This function requires - * true push pending frames to setup probe timer etc. - */ -void tcp_push_one(struct sock *sk, unsigned cur_mss) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct sk_buff *skb = tp->send_head; - - if (tcp_snd_test(tp, skb, cur_mss, 1)) { - /* Send it out now. */ - TCP_SKB_CB(skb)->when = tcp_time_stamp; - if (tcp_transmit_skb(sk, skb_clone(skb, sk->allocation)) == 0) { - tp->send_head = NULL; - tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; - if (tp->packets_out++ == 0) - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); - return; - } - } -#endif -} - -/* Split fragmented skb to two parts at length len. */ - -static void skb_split(struct sk_buff *skb, struct sk_buff *skb1, u32 len) -{ -#if 0 - int i; - int pos = skb->len - skb->data_len; - - if (len < pos) { - /* Split line is inside header. */ - memcpy(skb_put(skb1, pos-len), skb->data + len, pos-len); - - /* And move data appendix as is. */ - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i]; - - skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags; - skb_shinfo(skb)->nr_frags = 0; - - skb1->data_len = skb->data_len; - skb1->len += skb1->data_len; - skb->data_len = 0; - skb->len = len; - skb->tail = skb->data+len; - } else { - int k = 0; - int nfrags = skb_shinfo(skb)->nr_frags; - - /* Second chunk has no header, nothing to copy. */ - - skb_shinfo(skb)->nr_frags = 0; - skb1->len = skb1->data_len = skb->len - len; - skb->len = len; - skb->data_len = len - pos; - - for (i=0; ifrags[i].size; - if (pos + size > len) { - skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i]; - - if (pos < len) { - /* Split frag. - * We have to variants in this case: - * 1. Move all the frag to the second - * part, if it is possible. F.e. - * this approach is mandatory for TUX, - * where splitting is expensive. - * 2. Split is accurately. We make this. - */ - get_page(skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb1)->frags[0].page_offset += (len-pos); - skb_shinfo(skb1)->frags[0].size -= (len-pos); - skb_shinfo(skb)->frags[i].size = len-pos; - skb_shinfo(skb)->nr_frags++; - } - k++; - } else { - skb_shinfo(skb)->nr_frags++; - } - pos += size; - } - skb_shinfo(skb1)->nr_frags = k; - } -#endif -} - -/* Function to create two new TCP segments. Shrinks the given segment - * to the specified size and appends a new segment with the rest of the - * packet to the list. This won't be called frequently, I hope. - * Remember, these are still headerless SKBs at this point. - */ -static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) -{ -#if 0 - struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - struct sk_buff *buff; - int nsize = skb->len - len; - u16 flags; - - if (skb_cloned(skb) && - skb_is_nonlinear(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) - return -ENOMEM; - - /* Get a new skb... force flag on. */ - buff = tcp_alloc_skb(sk, nsize, GFP_ATOMIC); - if (buff == NULL) - return -ENOMEM; /* We'll just try again later. */ - tcp_charge_skb(sk, buff); - - /* Correct the sequence numbers. */ - TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len; - TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq; - TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; - - /* PSH and FIN should only be set in the second packet. */ - flags = TCP_SKB_CB(skb)->flags; - TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); - TCP_SKB_CB(buff)->flags = flags; - TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_EVER_RETRANS|TCPCB_AT_TAIL); - if (TCP_SKB_CB(buff)->sacked&TCPCB_LOST) { - tp->lost_out++; - tp->left_out++; - } - TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL; - - if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) { - /* Copy and checksum data tail into the new buffer. */ - buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize), - nsize, 0); - - skb_trim(skb, len); - - skb->csum = csum_block_sub(skb->csum, buff->csum, len); - } else { - skb->ip_summed = CHECKSUM_HW; - skb_split(skb, buff, len); - } - - buff->ip_summed = skb->ip_summed; - - /* Looks stupid, but our code really uses when of - * skbs, which it never sent before. --ANK - */ - TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when; - - /* Link BUFF into the send queue. */ - __skb_append(skb, buff); - - return 0; -#else - return 0; -#endif -} - -/* This function synchronize snd mss to current pmtu/exthdr set. - - tp->user_mss is mss set by user by TCP_MAXSEG. It does NOT counts - for TCP options, but includes only bare TCP header. - - tp->mss_clamp is mss negotiated at connection setup. - It is minumum of user_mss and mss received with SYN. - It also does not include TCP options. - - tp->pmtu_cookie is last pmtu, seen by this function. - - tp->mss_cache is current effective sending mss, including - all tcp options except for SACKs. It is evaluated, - taking into account current pmtu, but never exceeds - tp->mss_clamp. - - NOTE1. rfc1122 clearly states that advertised MSS - DOES NOT include either tcp or ip options. - - NOTE2. tp->pmtu_cookie and tp->mss_cache are READ ONLY outside - this function. --ANK (980731) - */ - -int tcp_sync_mss(struct sock *sk, u32 pmtu) -{ -#if 0 - struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - int mss_now; - - /* Calculate base mss without TCP options: - It is MMS_S - sizeof(tcphdr) of rfc1122 - */ - - mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct tcphdr); - - /* Clamp it (mss_clamp does not include tcp options) */ - if (mss_now > tp->mss_clamp) - mss_now = tp->mss_clamp; - - /* Now subtract optional transport overhead */ - mss_now -= tp->ext_header_len; - - /* Then reserve room for full set of TCP options and 8 bytes of data */ - if (mss_now < 48) - mss_now = 48; - - /* Now subtract TCP options size, not including SACKs */ - mss_now -= tp->tcp_header_len - sizeof(struct tcphdr); - - /* Bound mss with half of window */ - if (tp->max_window && mss_now > (tp->max_window>>1)) - mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len); - - /* And store cached results */ - tp->pmtu_cookie = pmtu; - tp->mss_cache = mss_now; - return mss_now; -#else - return 0; -#endif -} - - -/* This routine writes packets to the network. It advances the - * send_head. This happens as incoming acks open up the remote - * window for us. - * - * Returns 1, if no segments are in flight and we have queued segments, but - * cannot send anything now because of SWS or another problem. - */ -int tcp_write_xmit(struct sock *sk, int nonagle) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - unsigned int mss_now; - - /* If we are closed, the bytes will have to remain here. - * In time closedown will finish, we empty the write queue and all - * will be happy. - */ - if(sk->state != TCP_CLOSE) { - struct sk_buff *skb; - int sent_pkts = 0; - - /* Account for SACKS, we may need to fragment due to this. - * It is just like the real MSS changing on us midstream. - * We also handle things correctly when the user adds some - * IP options mid-stream. Silly to do, but cover it. - */ - mss_now = tcp_current_mss(sk); - - while((skb = tp->send_head) && - tcp_snd_test(tp, skb, mss_now, tcp_skb_is_last(sk, skb) ? nonagle : 1)) { - if (skb->len > mss_now) { - if (tcp_fragment(sk, skb, mss_now)) - break; - } - - TCP_SKB_CB(skb)->when = tcp_time_stamp; - if (tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC))) - break; - /* Advance the send_head. This one is sent out. */ - update_send_head(sk, tp, skb); - tcp_minshall_update(tp, mss_now, skb); - sent_pkts = 1; - } - - if (sent_pkts) { - tcp_cwnd_validate(sk, tp); - return 0; - } - - return !tp->packets_out && tp->send_head; - } - return 0; -#else - return 0; -#endif -} - -/* This function returns the amount that we can raise the - * usable window based on the following constraints - * - * 1. The window can never be shrunk once it is offered (RFC 793) - * 2. We limit memory per socket - * - * RFC 1122: - * "the suggested [SWS] avoidance algorithm for the receiver is to keep - * RECV.NEXT + RCV.WIN fixed until: - * RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)" - * - * i.e. don't raise the right edge of the window until you can raise - * it at least MSS bytes. - * - * Unfortunately, the recommended algorithm breaks header prediction, - * since header prediction assumes th->window stays fixed. - * - * Strictly speaking, keeping th->window fixed violates the receiver - * side SWS prevention criteria. The problem is that under this rule - * a stream of single byte packets will cause the right side of the - * window to always advance by a single byte. - * - * Of course, if the sender implements sender side SWS prevention - * then this will not be a problem. - * - * BSD seems to make the following compromise: - * - * If the free space is less than the 1/4 of the maximum - * space available and the free space is less than 1/2 mss, - * then set the window to 0. - * [ Actually, bsd uses MSS and 1/4 of maximal _window_ ] - * Otherwise, just prevent the window from shrinking - * and from being larger than the largest representable value. - * - * This prevents incremental opening of the window in the regime - * where TCP is limited by the speed of the reader side taking - * data out of the TCP receive queue. It does nothing about - * those cases where the window is constrained on the sender side - * because the pipeline is full. - * - * BSD also seems to "accidentally" limit itself to windows that are a - * multiple of MSS, at least until the free space gets quite small. - * This would appear to be a side effect of the mbuf implementation. - * Combining these two algorithms results in the observed behavior - * of having a fixed window size at almost all times. - * - * Below we obtain similar behavior by forcing the offered window to - * a multiple of the mss when it is feasible to do so. - * - * Note, we don't "adjust" for TIMESTAMP or SACK option bytes. - * Regular options like TIMESTAMP are taken into account. - */ -u32 __tcp_select_window(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - /* MSS for the peer's data. Previous verions used mss_clamp - * here. I don't know if the value based on our guesses - * of peer's MSS is better for the performance. It's more correct - * but may be worse for the performance because of rcv_mss - * fluctuations. --SAW 1998/11/1 - */ - int mss = tp->ack.rcv_mss; - int free_space = tcp_space(sk); - int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk)); - int window; - - if (mss > full_space) - mss = full_space; - - if (free_space < full_space/2) { - tp->ack.quick = 0; - - if (tcp_memory_pressure) - tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss); - - if (free_space < mss) - return 0; - } - - if (free_space > tp->rcv_ssthresh) - free_space = tp->rcv_ssthresh; - - /* Get the largest window that is a nice multiple of mss. - * Window clamp already applied above. - * If our current window offering is within 1 mss of the - * free space we just keep it. This prevents the divide - * and multiply from happening most of the time. - * We also don't do any window rounding when the free space - * is too small. - */ - window = tp->rcv_wnd; - if (window <= free_space - mss || window > free_space) - window = (free_space/mss)*mss; - - return window; -#else - return 0; -#endif -} - -/* Attempt to collapse two adjacent SKB's during retransmission. */ -static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now) -{ -#if 0 - struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - struct sk_buff *next_skb = skb->next; - - /* The first test we must make is that neither of these two - * SKB's are still referenced by someone else. - */ - if(!skb_cloned(skb) && !skb_cloned(next_skb)) { - int skb_size = skb->len, next_skb_size = next_skb->len; - u16 flags = TCP_SKB_CB(skb)->flags; - - /* Also punt if next skb has been SACK'd. */ - if(TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED) - return; - - /* Next skb is out of window. */ - if (after(TCP_SKB_CB(next_skb)->end_seq, tp->snd_una+tp->snd_wnd)) - return; - - /* Punt if not enough space exists in the first SKB for - * the data in the second, or the total combined payload - * would exceed the MSS. - */ - if ((next_skb_size > skb_tailroom(skb)) || - ((skb_size + next_skb_size) > mss_now)) - return; - - /* Ok. We will be able to collapse the packet. */ - __skb_unlink(next_skb, next_skb->list); - - if (next_skb->ip_summed == CHECKSUM_HW) - skb->ip_summed = CHECKSUM_HW; - - if (skb->ip_summed != CHECKSUM_HW) { - memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); - skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size); - } - - /* Update sequence range on original skb. */ - TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; - - /* Merge over control information. */ - flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */ - TCP_SKB_CB(skb)->flags = flags; - - /* All done, get rid of second SKB and account for it so - * packet counting does not break. - */ - TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL); - if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS) - tp->retrans_out--; - if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST) { - tp->lost_out--; - tp->left_out--; - } - /* Reno case is special. Sigh... */ - if (!tp->sack_ok && tp->sacked_out) { - tp->sacked_out--; - tp->left_out--; - } - - /* Not quite right: it can be > snd.fack, but - * it is better to underestimate fackets. - */ - if (tp->fackets_out) - tp->fackets_out--; - tcp_free_skb(sk, next_skb); - tp->packets_out--; - } -#endif -} - -/* Do a simple retransmit without using the backoff mechanisms in - * tcp_timer. This is used for path mtu discovery. - * The socket is already locked here. - */ -void tcp_simple_retransmit(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct sk_buff *skb; - unsigned int mss = tcp_current_mss(sk); - int lost = 0; - - for_retrans_queue(skb, sk, tp) { - if (skb->len > mss && - !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { - if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out--; - } - if (!(TCP_SKB_CB(skb)->sacked&TCPCB_LOST)) { - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out++; - lost = 1; - } - } - } - - if (!lost) - return; - - tcp_sync_left_out(tp); - - /* Don't muck with the congestion window here. - * Reason is that we do not increase amount of _data_ - * in network, but units changed and effective - * cwnd/ssthresh really reduced now. - */ - if (tp->ca_state != TCP_CA_Loss) { - tp->high_seq = tp->snd_nxt; - tp->snd_ssthresh = tcp_current_ssthresh(tp); - tp->prior_ssthresh = 0; - tp->undo_marker = 0; - tp->ca_state = TCP_CA_Loss; - } - tcp_xmit_retransmit_queue(sk); -#endif -} - -/* This retransmits one SKB. Policy decisions and retransmit queue - * state updates are done by the caller. Returns non-zero if an - * error occurred which prevented the send. - */ -int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - unsigned int cur_mss = tcp_current_mss(sk); - int err; - - /* Do not sent more than we queued. 1/4 is reserved for possible - * copying overhead: frgagmentation, tunneling, mangling etc. - */ - if (atomic_read(&sk->wmem_alloc) > min(sk->wmem_queued+(sk->wmem_queued>>2),sk->sndbuf)) - return -EAGAIN; - - /* If receiver has shrunk his window, and skb is out of - * new window, do not retransmit it. The exception is the - * case, when window is shrunk to zero. In this case - * our retransmit serves as a zero window probe. - */ - if (!before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd) - && TCP_SKB_CB(skb)->seq != tp->snd_una) - return -EAGAIN; - - if(skb->len > cur_mss) { - if(tcp_fragment(sk, skb, cur_mss)) - return -ENOMEM; /* We'll try again later. */ - - /* New SKB created, account for it. */ - tp->packets_out++; - } - - /* Collapse two adjacent packets if worthwhile and we can. */ - if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) && - (skb->len < (cur_mss >> 1)) && - (skb->next != tp->send_head) && - (skb->next != (struct sk_buff *)&sk->write_queue) && - (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(skb->next)->nr_frags == 0) && - (sysctl_tcp_retrans_collapse != 0)) - tcp_retrans_try_collapse(sk, skb, cur_mss); - - if(tp->af_specific->rebuild_header(sk)) - return -EHOSTUNREACH; /* Routing failure or similar. */ - - /* Some Solaris stacks overoptimize and ignore the FIN on a - * retransmit when old data is attached. So strip it off - * since it is cheap to do so and saves bytes on the network. - */ - if(skb->len > 0 && - (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && - tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { - if (!pskb_trim(skb, 0)) { - TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1; - skb->ip_summed = CHECKSUM_NONE; - skb->csum = 0; - } - } - - /* Make a copy, if the first transmission SKB clone we made - * is still in somebody's hands, else make a clone. - */ - TCP_SKB_CB(skb)->when = tcp_time_stamp; - - err = tcp_transmit_skb(sk, (skb_cloned(skb) ? - pskb_copy(skb, GFP_ATOMIC): - skb_clone(skb, GFP_ATOMIC))); - - if (err == 0) { - /* Update global TCP statistics. */ - TCP_INC_STATS(TcpRetransSegs); - -#if FASTRETRANS_DEBUG > 0 - if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { - if (net_ratelimit()) - printk(KERN_DEBUG "retrans_out leaked.\n"); - } -#endif - TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS; - tp->retrans_out++; - - /* Save stamp of the first retransmit. */ - if (!tp->retrans_stamp) - tp->retrans_stamp = TCP_SKB_CB(skb)->when; - - tp->undo_retrans++; - - /* snd_nxt is stored to detect loss of retransmitted segment, - * see tcp_input.c tcp_sacktag_write_queue(). - */ - TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt; - } - return err; -#else - return 0; -#endif -} - -/* This gets called after a retransmit timeout, and the initially - * retransmitted data is acknowledged. It tries to continue - * resending the rest of the retransmit queue, until either - * we've sent it all or the congestion window limit is reached. - * If doing SACK, the first ACK which comes back for a timeout - * based retransmit packet might feed us FACK information again. - * If so, we use it to avoid unnecessarily retransmissions. - */ -void tcp_xmit_retransmit_queue(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct sk_buff *skb; - int packet_cnt = tp->lost_out; - - /* First pass: retransmit lost packets. */ - if (packet_cnt) { - for_retrans_queue(skb, sk, tp) { - __u8 sacked = TCP_SKB_CB(skb)->sacked; - - if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) - return; - - if (sacked&TCPCB_LOST) { - if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { - if (tcp_retransmit_skb(sk, skb)) - return; - if (tp->ca_state != TCP_CA_Loss) - NET_INC_STATS_BH(TCPFastRetrans); - else - NET_INC_STATS_BH(TCPSlowStartRetrans); - - if (skb == skb_peek(&sk->write_queue)) - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); - } - - if (--packet_cnt <= 0) - break; - } - } - } - - /* OK, demanded retransmission is finished. */ - - /* Forward retransmissions are possible only during Recovery. */ - if (tp->ca_state != TCP_CA_Recovery) - return; - - /* No forward retransmissions in Reno are possible. */ - if (!tp->sack_ok) - return; - - /* Yeah, we have to make difficult choice between forward transmission - * and retransmission... Both ways have their merits... - * - * For now we do not retrnamsit anything, while we have some new - * segments to send. - */ - - if (tcp_may_send_now(sk, tp)) - return; - - packet_cnt = 0; - - for_retrans_queue(skb, sk, tp) { - if(++packet_cnt > tp->fackets_out) - break; - - if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) - break; - - if(TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) - continue; - - /* Ok, retransmit it. */ - if(tcp_retransmit_skb(sk, skb)) - break; - - if (skb == skb_peek(&sk->write_queue)) - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); - - NET_INC_STATS_BH(TCPForwardRetrans); - } -#endif -} - - -/* Send a fin. The caller locks the socket for us. This cannot be - * allowed to fail queueing a FIN frame under any circumstances. - */ -void tcp_send_fin(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct sk_buff *skb = skb_peek_tail(&sk->write_queue); - unsigned int mss_now; - - /* Optimization, tack on the FIN if we have a queue of - * unsent frames. But be careful about outgoing SACKS - * and IP options. - */ - mss_now = tcp_current_mss(sk); - - if(tp->send_head != NULL) { - TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; - TCP_SKB_CB(skb)->end_seq++; - tp->write_seq++; - } else { - /* Socket is locked, keep trying until memory is available. */ - for (;;) { - skb = alloc_skb(MAX_TCP_HEADER, GFP_KERNEL); - if (skb) - break; - yield(); - } - - /* Reserve space for headers and prepare control bits. */ - skb_reserve(skb, MAX_TCP_HEADER); - skb->csum = 0; - TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN); - TCP_SKB_CB(skb)->sacked = 0; - - /* FIN eats a sequence byte, write_seq advanced by tcp_send_skb(). */ - TCP_SKB_CB(skb)->seq = tp->write_seq; - TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; - tcp_send_skb(sk, skb, 1, mss_now); - } - __tcp_push_pending_frames(sk, tp, mss_now, 1); -#endif -} - -/* We get here when a process closes a file descriptor (either due to - * an explicit close() or as a byproduct of exit()'ing) and there - * was unread data in the receive queue. This behavior is recommended - * by draft-ietf-tcpimpl-prob-03.txt section 3.10. -DaveM - */ -void tcp_send_active_reset(struct sock *sk, int priority) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct sk_buff *skb; - - /* NOTE: No TCP options attached and we never retransmit this. */ - skb = alloc_skb(MAX_TCP_HEADER, priority); - if (!skb) { - NET_INC_STATS(TCPAbortFailed); - return; - } - - /* Reserve space for headers and prepare control bits. */ - skb_reserve(skb, MAX_TCP_HEADER); - skb->csum = 0; - TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST); - TCP_SKB_CB(skb)->sacked = 0; - - /* Send it off. */ - TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp); - TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq; - TCP_SKB_CB(skb)->when = tcp_time_stamp; - if (tcp_transmit_skb(sk, skb)) - NET_INC_STATS(TCPAbortFailed); -#endif -} - -/* WARNING: This routine must only be called when we have already sent - * a SYN packet that crossed the incoming SYN that caused this routine - * to get called. If this assumption fails then the initial rcv_wnd - * and rcv_wscale values will not be correct. - */ -int tcp_send_synack(struct sock *sk) -{ -#if 0 - struct sk_buff* skb; - - skb = skb_peek(&sk->write_queue); - if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) { - printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); - return -EFAULT; - } - if (!(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_ACK)) { - if (skb_cloned(skb)) { - struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); - if (nskb == NULL) - return -ENOMEM; - __skb_unlink(skb, &sk->write_queue); - __skb_queue_head(&sk->write_queue, nskb); - tcp_free_skb(sk, skb); - tcp_charge_skb(sk, nskb); - skb = nskb; - } - - TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ACK; - TCP_ECN_send_synack(&sk->tp_pinfo.af_tcp, skb); - } - TCP_SKB_CB(skb)->when = tcp_time_stamp; - return tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)); -#else - return 0; -#endif -} - -/* - * Prepare a SYN-ACK. - */ -struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, - struct open_request *req) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct tcphdr *th; - int tcp_header_size; - struct sk_buff *skb; - - skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); - if (skb == NULL) - return NULL; - - /* Reserve space for headers. */ - skb_reserve(skb, MAX_TCP_HEADER); - - skb->dst = dst_clone(dst); - - tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS + - (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) + - (req->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) + - /* SACK_PERM is in the place of NOP NOP of TS */ - ((req->sack_ok && !req->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0)); - skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size); - - memset(th, 0, sizeof(struct tcphdr)); - th->syn = 1; - th->ack = 1; - TCP_ECN_make_synack(req, th); - th->source = sk->sport; - th->dest = req->rmt_port; - TCP_SKB_CB(skb)->seq = req->snt_isn; - TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; - th->seq = htonl(TCP_SKB_CB(skb)->seq); - th->ack_seq = htonl(req->rcv_isn + 1); - if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ - __u8 rcv_wscale; - /* Set this up on the first call only */ - req->window_clamp = tp->window_clamp ? : dst->window; - /* tcp_full_space because it is guaranteed to be the first packet */ - tcp_select_initial_window(tcp_full_space(sk), - dst->advmss - (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), - &req->rcv_wnd, - &req->window_clamp, - req->wscale_ok, - &rcv_wscale); - req->rcv_wscale = rcv_wscale; - } - - /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ - th->window = htons(req->rcv_wnd); - - TCP_SKB_CB(skb)->when = tcp_time_stamp; - tcp_syn_build_options((__u32 *)(th + 1), dst->advmss, req->tstamp_ok, - req->sack_ok, req->wscale_ok, req->rcv_wscale, - TCP_SKB_CB(skb)->when, - req->ts_recent); - - skb->csum = 0; - th->doff = (tcp_header_size >> 2); - TCP_INC_STATS(TcpOutSegs); - return skb; -#else - return 0; -#endif -} - -/* - * Do all connect socket setups that can be done AF independent. - */ -static inline void tcp_connect_init(struct sock *sk) -{ -#if 0 - struct dst_entry *dst = __sk_dst_get(sk); - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - - /* We'll fix this up when we get a response from the other end. - * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT. - */ - tp->tcp_header_len = sizeof(struct tcphdr) + - (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0); - - /* If user gave his TCP_MAXSEG, record it to clamp */ - if (tp->user_mss) - tp->mss_clamp = tp->user_mss; - tp->max_window = 0; - tcp_sync_mss(sk, dst->pmtu); - - if (!tp->window_clamp) - tp->window_clamp = dst->window; - tp->advmss = dst->advmss; - tcp_initialize_rcv_mss(sk); - - tcp_select_initial_window(tcp_full_space(sk), - tp->advmss - (tp->ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), - &tp->rcv_wnd, - &tp->window_clamp, - sysctl_tcp_window_scaling, - &tp->rcv_wscale); - - tp->rcv_ssthresh = tp->rcv_wnd; - - sk->err = 0; - sk->done = 0; - tp->snd_wnd = 0; - tcp_init_wl(tp, tp->write_seq, 0); - tp->snd_una = tp->write_seq; - tp->snd_sml = tp->write_seq; - tp->rcv_nxt = 0; - tp->rcv_wup = 0; - tp->copied_seq = 0; - - tp->rto = TCP_TIMEOUT_INIT; - tp->retransmits = 0; - tcp_clear_retrans(tp); -#endif -} - -/* - * Build a SYN and send it off. - */ -int tcp_connect(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct sk_buff *buff; - - tcp_connect_init(sk); - - buff = alloc_skb(MAX_TCP_HEADER + 15, sk->allocation); - if (unlikely(buff == NULL)) - return -ENOBUFS; - - /* Reserve space for headers. */ - skb_reserve(buff, MAX_TCP_HEADER); - - TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN; - TCP_ECN_send_syn(tp, buff); - TCP_SKB_CB(buff)->sacked = 0; - buff->csum = 0; - TCP_SKB_CB(buff)->seq = tp->write_seq++; - TCP_SKB_CB(buff)->end_seq = tp->write_seq; - tp->snd_nxt = tp->write_seq; - tp->pushed_seq = tp->write_seq; - - /* Send it off. */ - TCP_SKB_CB(buff)->when = tcp_time_stamp; - tp->retrans_stamp = TCP_SKB_CB(buff)->when; - __skb_queue_tail(&sk->write_queue, buff); - tcp_charge_skb(sk, buff); - tp->packets_out++; - tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL)); - TCP_INC_STATS(TcpActiveOpens); - - /* Timer for repeating the SYN until an answer. */ - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); - return 0; -#else - return 0; -#endif -} - -/* Send out a delayed ack, the caller does the policy checking - * to see if we should even be here. See tcp_input.c:tcp_ack_snd_check() - * for details. - */ -void tcp_send_delayed_ack(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - int ato = tp->ack.ato; - unsigned long timeout; - - if (ato > TCP_DELACK_MIN) { - int max_ato = HZ/2; - - if (tp->ack.pingpong || (tp->ack.pending&TCP_ACK_PUSHED)) - max_ato = TCP_DELACK_MAX; - - /* Slow path, intersegment interval is "high". */ - - /* If some rtt estimate is known, use it to bound delayed ack. - * Do not use tp->rto here, use results of rtt measurements - * directly. - */ - if (tp->srtt) { - int rtt = max(tp->srtt>>3, TCP_DELACK_MIN); - - if (rtt < max_ato) - max_ato = rtt; - } - - ato = min(ato, max_ato); - } - - /* Stay within the limit we were given */ - timeout = jiffies + ato; - - /* Use new timeout only if there wasn't a older one earlier. */ - if (tp->ack.pending&TCP_ACK_TIMER) { - /* If delack timer was blocked or is about to expire, - * send ACK now. - */ - if (tp->ack.blocked || time_before_eq(tp->ack.timeout, jiffies+(ato>>2))) { - tcp_send_ack(sk); - return; - } - - if (!time_before(timeout, tp->ack.timeout)) - timeout = tp->ack.timeout; - } - tp->ack.pending |= TCP_ACK_SCHED|TCP_ACK_TIMER; - tp->ack.timeout = timeout; - if (!mod_timer(&tp->delack_timer, timeout)) - sock_hold(sk); -#endif -} - -/* This routine sends an ack and also updates the window. */ -void tcp_send_ack(struct sock *sk) -{ -#if 0 - /* If we have been reset, we may not send again. */ - if(sk->state != TCP_CLOSE) { - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct sk_buff *buff; - - /* We are not putting this on the write queue, so - * tcp_transmit_skb() will set the ownership to this - * sock. - */ - buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); - if (buff == NULL) { - tcp_schedule_ack(tp); - tp->ack.ato = TCP_ATO_MIN; - tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX); - return; - } - - /* Reserve space for headers and prepare control bits. */ - skb_reserve(buff, MAX_TCP_HEADER); - buff->csum = 0; - TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK; - TCP_SKB_CB(buff)->sacked = 0; - - /* Send it off, this clears delayed acks for us. */ - TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp); - TCP_SKB_CB(buff)->when = tcp_time_stamp; - tcp_transmit_skb(sk, buff); - } -#else - return; -#endif -} - -/* This routine sends a packet with an out of date sequence - * number. It assumes the other end will try to ack it. - * - * Question: what should we make while urgent mode? - * 4.4BSD forces sending single byte of data. We cannot send - * out of window data, because we have SND.NXT==SND.MAX... - * - * Current solution: to send TWO zero-length segments in urgent mode: - * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is - * out-of-date with SND.UNA-1 to probe window. - */ -static int tcp_xmit_probe_skb(struct sock *sk, int urgent) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct sk_buff *skb; - - /* We don't queue it, tcp_transmit_skb() sets ownership. */ - skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); - if (skb == NULL) - return -1; - - /* Reserve space for headers and set control bits. */ - skb_reserve(skb, MAX_TCP_HEADER); - skb->csum = 0; - TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK; - TCP_SKB_CB(skb)->sacked = urgent; - - /* Use a previous sequence. This should cause the other - * end to send an ack. Don't queue or clone SKB, just - * send it. - */ - TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1; - TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq; - TCP_SKB_CB(skb)->when = tcp_time_stamp; - return tcp_transmit_skb(sk, skb); -#else - return 0; -#endif -} - -int tcp_write_wakeup(struct sock *sk) -{ -#if 0 - if (sk->state != TCP_CLOSE) { - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct sk_buff *skb; - - if ((skb = tp->send_head) != NULL && - before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) { - int err; - int mss = tcp_current_mss(sk); - int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq; - - if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq)) - tp->pushed_seq = TCP_SKB_CB(skb)->end_seq; - - /* We are probing the opening of a window - * but the window size is != 0 - * must have been a result SWS avoidance ( sender ) - */ - if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq || - skb->len > mss) { - seg_size = min(seg_size, mss); - TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; - if (tcp_fragment(sk, skb, seg_size)) - return -1; - } - TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; - TCP_SKB_CB(skb)->when = tcp_time_stamp; - err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)); - if (!err) { - update_send_head(sk, tp, skb); - } - return err; - } else { - if (tp->urg_mode && - between(tp->snd_up, tp->snd_una+1, tp->snd_una+0xFFFF)) - tcp_xmit_probe_skb(sk, TCPCB_URG); - return tcp_xmit_probe_skb(sk, 0); - } - } - return -1; -#else - return 0; -#endif -} - -/* A window probe timeout has occurred. If window is not closed send - * a partial packet else a zero probe. - */ -void tcp_send_probe0(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - int err; - - err = tcp_write_wakeup(sk); - - if (tp->packets_out || !tp->send_head) { - /* Cancel probe timer, if it is not required. */ - tp->probes_out = 0; - tp->backoff = 0; - return; - } - - if (err <= 0) { - tp->backoff++; - tp->probes_out++; - tcp_reset_xmit_timer (sk, TCP_TIME_PROBE0, - min(tp->rto << tp->backoff, TCP_RTO_MAX)); - } else { - /* If packet was not sent due to local congestion, - * do not backoff and do not remember probes_out. - * Let local senders to fight for local resources. - * - * Use accumulated backoff yet. - */ - if (!tp->probes_out) - tp->probes_out=1; - tcp_reset_xmit_timer (sk, TCP_TIME_PROBE0, - min(tp->rto << tp->backoff, TCP_RESOURCE_PROBE_INTERVAL)); - } -#endif -} diff --git a/reactos/drivers/net/tcpip/transport/tcp/tcp_timer.c b/reactos/drivers/net/tcpip/transport/tcp/tcp_timer.c deleted file mode 100755 index 5f9affc5c4d..00000000000 --- a/reactos/drivers/net/tcpip/transport/tcp/tcp_timer.c +++ /dev/null @@ -1,702 +0,0 @@ -/* - * COPYRIGHT: See COPYING in the top level directory - * PROJECT: ReactOS TCP/IP protocol driver - * FILE: transport/tcp/tcp_input.c - * PURPOSE: Transmission Control Protocol - * PROGRAMMERS: Casper S. Hornstrup (chorns@users.sourceforge.net) - * REVISIONS: - * CSH 15-01-2003 Imported from linux kernel 2.4.20 - */ - -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Implementation of the Transmission Control Protocol(TCP). - * - * Version: $Id: tcp_timer.c,v 1.1 2003/01/15 21:57:31 chorns Exp $ - * - * Authors: Ross Biro, - * Fred N. van Kempen, - * Mark Evans, - * Corey Minyard - * Florian La Roche, - * Charles Hedrick, - * Linus Torvalds, - * Alan Cox, - * Matthew Dillon, - * Arnt Gulbrandsen, - * Jorge Cwik, - */ - -#if 0 -#include -#else -#include "linux.h" -#include "tcpcore.h" -#endif - -int sysctl_tcp_syn_retries = TCP_SYN_RETRIES; -int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; -//int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; -int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; -//int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL; -int sysctl_tcp_retries1 = TCP_RETR1; -int sysctl_tcp_retries2 = TCP_RETR2; -int sysctl_tcp_orphan_retries; - -static void tcp_write_timer(unsigned long); -static void tcp_delack_timer(unsigned long); -static void tcp_keepalive_timer (unsigned long data); - -//const char timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n"; - -/* - * Using different timers for retransmit, delayed acks and probes - * We may wish use just one timer maintaining a list of expire jiffies - * to optimize. - */ - -void tcp_init_xmit_timers(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - - init_timer(&tp->retransmit_timer); - tp->retransmit_timer.function=&tcp_write_timer; - tp->retransmit_timer.data = (unsigned long) sk; - tp->pending = 0; - - init_timer(&tp->delack_timer); - tp->delack_timer.function=&tcp_delack_timer; - tp->delack_timer.data = (unsigned long) sk; - tp->ack.pending = 0; - - init_timer(&sk->timer); - sk->timer.function=&tcp_keepalive_timer; - sk->timer.data = (unsigned long) sk; -#endif -} - -void tcp_clear_xmit_timers(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - - tp->pending = 0; - if (timer_pending(&tp->retransmit_timer) && - del_timer(&tp->retransmit_timer)) - __sock_put(sk); - - tp->ack.pending = 0; - tp->ack.blocked = 0; - if (timer_pending(&tp->delack_timer) && - del_timer(&tp->delack_timer)) - __sock_put(sk); - - if(timer_pending(&sk->timer) && del_timer(&sk->timer)) - __sock_put(sk); -#endif -} - -static void tcp_write_err(struct sock *sk) -{ -#if 0 - sk->err = sk->err_soft ? : ETIMEDOUT; - sk->error_report(sk); - - tcp_done(sk); - NET_INC_STATS_BH(TCPAbortOnTimeout); -#endif -} - -/* Do not allow orphaned sockets to eat all our resources. - * This is direct violation of TCP specs, but it is required - * to prevent DoS attacks. It is called when a retransmission timeout - * or zero probe timeout occurs on orphaned socket. - * - * Criterium is still not confirmed experimentally and may change. - * We kill the socket, if: - * 1. If number of orphaned sockets exceeds an administratively configured - * limit. - * 2. If we have strong memory pressure. - */ -static int tcp_out_of_resources(struct sock *sk, int do_reset) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - int orphans = atomic_read(&tcp_orphan_count); - - /* If peer does not open window for long time, or did not transmit - * anything for long time, penalize it. */ - if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) - orphans <<= 1; - - /* If some dubious ICMP arrived, penalize even more. */ - if (sk->err_soft) - orphans <<= 1; - - if (orphans >= sysctl_tcp_max_orphans || - (sk->wmem_queued > SOCK_MIN_SNDBUF && - atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) { - if (net_ratelimit()) - printk(KERN_INFO "Out of socket memory\n"); - - /* Catch exceptional cases, when connection requires reset. - * 1. Last segment was sent recently. */ - if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN || - /* 2. Window is closed. */ - (!tp->snd_wnd && !tp->packets_out)) - do_reset = 1; - if (do_reset) - tcp_send_active_reset(sk, GFP_ATOMIC); - tcp_done(sk); - NET_INC_STATS_BH(TCPAbortOnMemory); - return 1; - } - return 0; -#else - return 0; -#endif -} - -/* Calculate maximal number or retries on an orphaned socket. */ -static int tcp_orphan_retries(struct sock *sk, int alive) -{ -#if 0 - int retries = sysctl_tcp_orphan_retries; /* May be zero. */ - - /* We know from an ICMP that something is wrong. */ - if (sk->err_soft && !alive) - retries = 0; - - /* However, if socket sent something recently, select some safe - * number of retries. 8 corresponds to >100 seconds with minimal - * RTO of 200msec. */ - if (retries == 0 && alive) - retries = 8; - return retries; -#else - return 0; -#endif -} - -/* A write timeout has occurred. Process the after effects. */ -static int tcp_write_timeout(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - int retry_until; - - if ((1<state)&(TCPF_SYN_SENT|TCPF_SYN_RECV)) { - if (tp->retransmits) - dst_negative_advice(&sk->dst_cache); - retry_until = tp->syn_retries ? : sysctl_tcp_syn_retries; - } else { - if (tp->retransmits >= sysctl_tcp_retries1) { - /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black - hole detection. :-( - - It is place to make it. It is not made. I do not want - to make it. It is disguisting. It does not work in any - case. Let me to cite the same draft, which requires for - us to implement this: - - "The one security concern raised by this memo is that ICMP black holes - are often caused by over-zealous security administrators who block - all ICMP messages. It is vitally important that those who design and - deploy security systems understand the impact of strict filtering on - upper-layer protocols. The safest web site in the world is worthless - if most TCP implementations cannot transfer data from it. It would - be far nicer to have all of the black holes fixed rather than fixing - all of the TCP implementations." - - Golden words :-). - */ - - dst_negative_advice(&sk->dst_cache); - } - - retry_until = sysctl_tcp_retries2; - if (sk->dead) { - int alive = (tp->rto < TCP_RTO_MAX); - - retry_until = tcp_orphan_retries(sk, alive); - - if (tcp_out_of_resources(sk, alive || tp->retransmits < retry_until)) - return 1; - } - } - - if (tp->retransmits >= retry_until) { - /* Has it gone just too far? */ - tcp_write_err(sk); - return 1; - } - return 0; -#else - return 0; -#endif -} - -static void tcp_delack_timer(unsigned long data) -{ -#if 0 - struct sock *sk = (struct sock*)data; - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - - bh_lock_sock(sk); - if (sk->lock.users) { - /* Try again later. */ - tp->ack.blocked = 1; - NET_INC_STATS_BH(DelayedACKLocked); - if (!mod_timer(&tp->delack_timer, jiffies + TCP_DELACK_MIN)) - sock_hold(sk); - goto out_unlock; - } - - tcp_mem_reclaim(sk); - - if (sk->state == TCP_CLOSE || !(tp->ack.pending&TCP_ACK_TIMER)) - goto out; - - if ((long)(tp->ack.timeout - jiffies) > 0) { - if (!mod_timer(&tp->delack_timer, tp->ack.timeout)) - sock_hold(sk); - goto out; - } - tp->ack.pending &= ~TCP_ACK_TIMER; - - if (skb_queue_len(&tp->ucopy.prequeue)) { - struct sk_buff *skb; - - net_statistics[smp_processor_id()*2].TCPSchedulerFailed += skb_queue_len(&tp->ucopy.prequeue); - - while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) - sk->backlog_rcv(sk, skb); - - tp->ucopy.memory = 0; - } - - if (tcp_ack_scheduled(tp)) { - if (!tp->ack.pingpong) { - /* Delayed ACK missed: inflate ATO. */ - tp->ack.ato = min(tp->ack.ato << 1, tp->rto); - } else { - /* Delayed ACK missed: leave pingpong mode and - * deflate ATO. - */ - tp->ack.pingpong = 0; - tp->ack.ato = TCP_ATO_MIN; - } - tcp_send_ack(sk); - NET_INC_STATS_BH(DelayedACKs); - } - TCP_CHECK_TIMER(sk); - -out: - if (tcp_memory_pressure) - tcp_mem_reclaim(sk); -out_unlock: - bh_unlock_sock(sk); - sock_put(sk); -#endif -} - -static void tcp_probe_timer(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - int max_probes; - - if (tp->packets_out || !tp->send_head) { - tp->probes_out = 0; - return; - } - - /* *WARNING* RFC 1122 forbids this - * - * It doesn't AFAIK, because we kill the retransmit timer -AK - * - * FIXME: We ought not to do it, Solaris 2.5 actually has fixing - * this behaviour in Solaris down as a bug fix. [AC] - * - * Let me to explain. probes_out is zeroed by incoming ACKs - * even if they advertise zero window. Hence, connection is killed only - * if we received no ACKs for normal connection timeout. It is not killed - * only because window stays zero for some time, window may be zero - * until armageddon and even later. We are in full accordance - * with RFCs, only probe timer combines both retransmission timeout - * and probe timeout in one bottle. --ANK - */ - max_probes = sysctl_tcp_retries2; - - if (sk->dead) { - int alive = ((tp->rto<backoff) < TCP_RTO_MAX); - - max_probes = tcp_orphan_retries(sk, alive); - - if (tcp_out_of_resources(sk, alive || tp->probes_out <= max_probes)) - return; - } - - if (tp->probes_out > max_probes) { - tcp_write_err(sk); - } else { - /* Only send another probe if we didn't close things up. */ - tcp_send_probe0(sk); - } -#endif -} - -/* - * The TCP retransmit timer. - */ - -static void tcp_retransmit_timer(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - - if (tp->packets_out == 0) - goto out; - - BUG_TRAP(!skb_queue_empty(&sk->write_queue)); - - if (tp->snd_wnd == 0 && !sk->dead && - !((1<state)&(TCPF_SYN_SENT|TCPF_SYN_RECV))) { - /* Receiver dastardly shrinks window. Our retransmits - * become zero probes, but we should not timeout this - * connection. If the socket is an orphan, time it out, - * we cannot allow such beasts to hang infinitely. - */ -#ifdef TCP_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n", - NIPQUAD(sk->daddr), htons(sk->dport), sk->num, - tp->snd_una, tp->snd_nxt); -#endif - if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) { - tcp_write_err(sk); - goto out; - } - tcp_enter_loss(sk, 0); - tcp_retransmit_skb(sk, skb_peek(&sk->write_queue)); - __sk_dst_reset(sk); - goto out_reset_timer; - } - - if (tcp_write_timeout(sk)) - goto out; - - if (tp->retransmits == 0) { - if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) { - if (tp->sack_ok) { - if (tp->ca_state == TCP_CA_Recovery) - NET_INC_STATS_BH(TCPSackRecoveryFail); - else - NET_INC_STATS_BH(TCPSackFailures); - } else { - if (tp->ca_state == TCP_CA_Recovery) - NET_INC_STATS_BH(TCPRenoRecoveryFail); - else - NET_INC_STATS_BH(TCPRenoFailures); - } - } else if (tp->ca_state == TCP_CA_Loss) { - NET_INC_STATS_BH(TCPLossFailures); - } else { - NET_INC_STATS_BH(TCPTimeouts); - } - } - - tcp_enter_loss(sk, 0); - - if (tcp_retransmit_skb(sk, skb_peek(&sk->write_queue)) > 0) { - /* Retransmission failed because of local congestion, - * do not backoff. - */ - if (!tp->retransmits) - tp->retransmits=1; - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, - min(tp->rto, TCP_RESOURCE_PROBE_INTERVAL)); - goto out; - } - - /* Increase the timeout each time we retransmit. Note that - * we do not increase the rtt estimate. rto is initialized - * from rtt, but increases here. Jacobson (SIGCOMM 88) suggests - * that doubling rto each time is the least we can get away with. - * In KA9Q, Karn uses this for the first few times, and then - * goes to quadratic. netBSD doubles, but only goes up to *64, - * and clamps at 1 to 64 sec afterwards. Note that 120 sec is - * defined in the protocol as the maximum possible RTT. I guess - * we'll have to use something other than TCP to talk to the - * University of Mars. - * - * PAWS allows us longer timeouts and large windows, so once - * implemented ftp to mars will work nicely. We will have to fix - * the 120 second clamps though! - */ - tp->backoff++; - tp->retransmits++; - -out_reset_timer: - tp->rto = min(tp->rto << 1, TCP_RTO_MAX); - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); - if (tp->retransmits > sysctl_tcp_retries1) - __sk_dst_reset(sk); - -out:; -#endif -} - -static void tcp_write_timer(unsigned long data) -{ -#if 0 - struct sock *sk = (struct sock*)data; - struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - int event; - - bh_lock_sock(sk); - if (sk->lock.users) { - /* Try again later */ - if (!mod_timer(&tp->retransmit_timer, jiffies + (HZ/20))) - sock_hold(sk); - goto out_unlock; - } - - if (sk->state == TCP_CLOSE || !tp->pending) - goto out; - - if ((long)(tp->timeout - jiffies) > 0) { - if (!mod_timer(&tp->retransmit_timer, tp->timeout)) - sock_hold(sk); - goto out; - } - - event = tp->pending; - tp->pending = 0; - - switch (event) { - case TCP_TIME_RETRANS: - tcp_retransmit_timer(sk); - break; - case TCP_TIME_PROBE0: - tcp_probe_timer(sk); - break; - } - TCP_CHECK_TIMER(sk); - -out: - tcp_mem_reclaim(sk); -out_unlock: - bh_unlock_sock(sk); - sock_put(sk); -#endif -} - -/* - * Timer for listening sockets - */ - -static void tcp_synack_timer(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct tcp_listen_opt *lopt = tp->listen_opt; - int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries; - int thresh = max_retries; - unsigned long now = jiffies; - struct open_request **reqp, *req; - int i, budget; - - if (lopt == NULL || lopt->qlen == 0) - return; - - /* Normally all the openreqs are young and become mature - * (i.e. converted to established socket) for first timeout. - * If synack was not acknowledged for 3 seconds, it means - * one of the following things: synack was lost, ack was lost, - * rtt is high or nobody planned to ack (i.e. synflood). - * When server is a bit loaded, queue is populated with old - * open requests, reducing effective size of queue. - * When server is well loaded, queue size reduces to zero - * after several minutes of work. It is not synflood, - * it is normal operation. The solution is pruning - * too old entries overriding normal timeout, when - * situation becomes dangerous. - * - * Essentially, we reserve half of room for young - * embrions; and abort old ones without pity, if old - * ones are about to clog our table. - */ - if (lopt->qlen>>(lopt->max_qlen_log-1)) { - int young = (lopt->qlen_young<<1); - - while (thresh > 2) { - if (lopt->qlen < young) - break; - thresh--; - young <<= 1; - } - } - - if (tp->defer_accept) - max_retries = tp->defer_accept; - - budget = 2*(TCP_SYNQ_HSIZE/(TCP_TIMEOUT_INIT/TCP_SYNQ_INTERVAL)); - i = lopt->clock_hand; - - do { - reqp=&lopt->syn_table[i]; - while ((req = *reqp) != NULL) { - if ((long)(now - req->expires) >= 0) { - if ((req->retrans < thresh || - (req->acked && req->retrans < max_retries)) - && !req->class->rtx_syn_ack(sk, req, NULL)) { - unsigned long timeo; - - if (req->retrans++ == 0) - lopt->qlen_young--; - timeo = min((TCP_TIMEOUT_INIT << req->retrans), - TCP_RTO_MAX); - req->expires = now + timeo; - reqp = &req->dl_next; - continue; - } - - /* Drop this request */ - write_lock(&tp->syn_wait_lock); - *reqp = req->dl_next; - write_unlock(&tp->syn_wait_lock); - lopt->qlen--; - if (req->retrans == 0) - lopt->qlen_young--; - tcp_openreq_free(req); - continue; - } - reqp = &req->dl_next; - } - - i = (i+1)&(TCP_SYNQ_HSIZE-1); - - } while (--budget > 0); - - lopt->clock_hand = i; - - if (lopt->qlen) - tcp_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL); -#endif -} - -void tcp_delete_keepalive_timer (struct sock *sk) -{ -#if 0 - if (timer_pending(&sk->timer) && del_timer (&sk->timer)) - __sock_put(sk); -#endif -} - -void tcp_reset_keepalive_timer (struct sock *sk, unsigned long len) -{ -#if 0 - if (!mod_timer(&sk->timer, jiffies+len)) - sock_hold(sk); -#endif -} - -void tcp_set_keepalive(struct sock *sk, int val) -{ -#if 0 - if ((1<state)&(TCPF_CLOSE|TCPF_LISTEN)) - return; - - if (val && !sk->keepopen) - tcp_reset_keepalive_timer(sk, keepalive_time_when(&sk->tp_pinfo.af_tcp)); - else if (!val) - tcp_delete_keepalive_timer(sk); -#endif -} - - -static void tcp_keepalive_timer (unsigned long data) -{ -#if 0 - struct sock *sk = (struct sock *) data; - struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - __u32 elapsed; - - /* Only process if socket is not in use. */ - bh_lock_sock(sk); - if (sk->lock.users) { - /* Try again later. */ - tcp_reset_keepalive_timer (sk, HZ/20); - goto out; - } - - if (sk->state == TCP_LISTEN) { - tcp_synack_timer(sk); - goto out; - } - - if (sk->state == TCP_FIN_WAIT2 && sk->dead) { - if (tp->linger2 >= 0) { - int tmo = tcp_fin_time(tp) - TCP_TIMEWAIT_LEN; - - if (tmo > 0) { - tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); - goto out; - } - } - tcp_send_active_reset(sk, GFP_ATOMIC); - goto death; - } - - if (!sk->keepopen || sk->state == TCP_CLOSE) - goto out; - - elapsed = keepalive_time_when(tp); - - /* It is alive without keepalive 8) */ - if (tp->packets_out || tp->send_head) - goto resched; - - elapsed = tcp_time_stamp - tp->rcv_tstamp; - - if (elapsed >= keepalive_time_when(tp)) { - if ((!tp->keepalive_probes && tp->probes_out >= sysctl_tcp_keepalive_probes) || - (tp->keepalive_probes && tp->probes_out >= tp->keepalive_probes)) { - tcp_send_active_reset(sk, GFP_ATOMIC); - tcp_write_err(sk); - goto out; - } - if (tcp_write_wakeup(sk) <= 0) { - tp->probes_out++; - elapsed = keepalive_intvl_when(tp); - } else { - /* If keepalive was lost due to local congestion, - * try harder. - */ - elapsed = TCP_RESOURCE_PROBE_INTERVAL; - } - } else { - /* It is tp->rcv_tstamp + keepalive_time_when(tp) */ - elapsed = keepalive_time_when(tp) - elapsed; - } - - TCP_CHECK_TIMER(sk); - tcp_mem_reclaim(sk); - -resched: - tcp_reset_keepalive_timer (sk, elapsed); - goto out; - -death: - tcp_done(sk); - -out: - bh_unlock_sock(sk); - sock_put(sk); -#endif -} diff --git a/reactos/drivers/net/tcpip/transport/tcp/tcpcore.c b/reactos/drivers/net/tcpip/transport/tcp/tcpcore.c deleted file mode 100755 index 8e1b3f93736..00000000000 --- a/reactos/drivers/net/tcpip/transport/tcp/tcpcore.c +++ /dev/null @@ -1,2783 +0,0 @@ -/* - * COPYRIGHT: See COPYING in the top level directory - * PROJECT: ReactOS TCP/IP protocol driver - * FILE: transport/tcp/tcpcore.c - * PURPOSE: Transmission Control Protocol - * PROGRAMMERS: Casper S. Hornstrup (chorns@users.sourceforge.net) - * REVISIONS: - * CSH 15-01-2003 Imported from linux kernel 2.4.20 - */ - -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Implementation of the Transmission Control Protocol(TCP). - * - * Version: $Id: tcpcore.c,v 1.1 2003/01/15 21:57:31 chorns Exp $ - * - * Authors: Ross Biro, - * Fred N. van Kempen, - * Mark Evans, - * Corey Minyard - * Florian La Roche, - * Charles Hedrick, - * Linus Torvalds, - * Alan Cox, - * Matthew Dillon, - * Arnt Gulbrandsen, - * Jorge Cwik, - * - * Fixes: - * Alan Cox : Numerous verify_area() calls - * Alan Cox : Set the ACK bit on a reset - * Alan Cox : Stopped it crashing if it closed while - * sk->inuse=1 and was trying to connect - * (tcp_err()). - * Alan Cox : All icmp error handling was broken - * pointers passed where wrong and the - * socket was looked up backwards. Nobody - * tested any icmp error code obviously. - * Alan Cox : tcp_err() now handled properly. It - * wakes people on errors. poll - * behaves and the icmp error race - * has gone by moving it into sock.c - * Alan Cox : tcp_send_reset() fixed to work for - * everything not just packets for - * unknown sockets. - * Alan Cox : tcp option processing. - * Alan Cox : Reset tweaked (still not 100%) [Had - * syn rule wrong] - * Herp Rosmanith : More reset fixes - * Alan Cox : No longer acks invalid rst frames. - * Acking any kind of RST is right out. - * Alan Cox : Sets an ignore me flag on an rst - * receive otherwise odd bits of prattle - * escape still - * Alan Cox : Fixed another acking RST frame bug. - * Should stop LAN workplace lockups. - * Alan Cox : Some tidyups using the new skb list - * facilities - * Alan Cox : sk->keepopen now seems to work - * Alan Cox : Pulls options out correctly on accepts - * Alan Cox : Fixed assorted sk->rqueue->next errors - * Alan Cox : PSH doesn't end a TCP read. Switched a - * bit to skb ops. - * Alan Cox : Tidied tcp_data to avoid a potential - * nasty. - * Alan Cox : Added some better commenting, as the - * tcp is hard to follow - * Alan Cox : Removed incorrect check for 20 * psh - * Michael O'Reilly : ack < copied bug fix. - * Johannes Stille : Misc tcp fixes (not all in yet). - * Alan Cox : FIN with no memory -> CRASH - * Alan Cox : Added socket option proto entries. - * Also added awareness of them to accept. - * Alan Cox : Added TCP options (SOL_TCP) - * Alan Cox : Switched wakeup calls to callbacks, - * so the kernel can layer network - * sockets. - * Alan Cox : Use ip_tos/ip_ttl settings. - * Alan Cox : Handle FIN (more) properly (we hope). - * Alan Cox : RST frames sent on unsynchronised - * state ack error. - * Alan Cox : Put in missing check for SYN bit. - * Alan Cox : Added tcp_select_window() aka NET2E - * window non shrink trick. - * Alan Cox : Added a couple of small NET2E timer - * fixes - * Charles Hedrick : TCP fixes - * Toomas Tamm : TCP window fixes - * Alan Cox : Small URG fix to rlogin ^C ack fight - * Charles Hedrick : Rewrote most of it to actually work - * Linus : Rewrote tcp_read() and URG handling - * completely - * Gerhard Koerting: Fixed some missing timer handling - * Matthew Dillon : Reworked TCP machine states as per RFC - * Gerhard Koerting: PC/TCP workarounds - * Adam Caldwell : Assorted timer/timing errors - * Matthew Dillon : Fixed another RST bug - * Alan Cox : Move to kernel side addressing changes. - * Alan Cox : Beginning work on TCP fastpathing - * (not yet usable) - * Arnt Gulbrandsen: Turbocharged tcp_check() routine. - * Alan Cox : TCP fast path debugging - * Alan Cox : Window clamping - * Michael Riepe : Bug in tcp_check() - * Matt Dillon : More TCP improvements and RST bug fixes - * Matt Dillon : Yet more small nasties remove from the - * TCP code (Be very nice to this man if - * tcp finally works 100%) 8) - * Alan Cox : BSD accept semantics. - * Alan Cox : Reset on closedown bug. - * Peter De Schrijver : ENOTCONN check missing in tcp_sendto(). - * Michael Pall : Handle poll() after URG properly in - * all cases. - * Michael Pall : Undo the last fix in tcp_read_urg() - * (multi URG PUSH broke rlogin). - * Michael Pall : Fix the multi URG PUSH problem in - * tcp_readable(), poll() after URG - * works now. - * Michael Pall : recv(...,MSG_OOB) never blocks in the - * BSD api. - * Alan Cox : Changed the semantics of sk->socket to - * fix a race and a signal problem with - * accept() and async I/O. - * Alan Cox : Relaxed the rules on tcp_sendto(). - * Yury Shevchuk : Really fixed accept() blocking problem. - * Craig I. Hagan : Allow for BSD compatible TIME_WAIT for - * clients/servers which listen in on - * fixed ports. - * Alan Cox : Cleaned the above up and shrank it to - * a sensible code size. - * Alan Cox : Self connect lockup fix. - * Alan Cox : No connect to multicast. - * Ross Biro : Close unaccepted children on master - * socket close. - * Alan Cox : Reset tracing code. - * Alan Cox : Spurious resets on shutdown. - * Alan Cox : Giant 15 minute/60 second timer error - * Alan Cox : Small whoops in polling before an - * accept. - * Alan Cox : Kept the state trace facility since - * it's handy for debugging. - * Alan Cox : More reset handler fixes. - * Alan Cox : Started rewriting the code based on - * the RFC's for other useful protocol - * references see: Comer, KA9Q NOS, and - * for a reference on the difference - * between specifications and how BSD - * works see the 4.4lite source. - * A.N.Kuznetsov : Don't time wait on completion of tidy - * close. - * Linus Torvalds : Fin/Shutdown & copied_seq changes. - * Linus Torvalds : Fixed BSD port reuse to work first syn - * Alan Cox : Reimplemented timers as per the RFC - * and using multiple timers for sanity. - * Alan Cox : Small bug fixes, and a lot of new - * comments. - * Alan Cox : Fixed dual reader crash by locking - * the buffers (much like datagram.c) - * Alan Cox : Fixed stuck sockets in probe. A probe - * now gets fed up of retrying without - * (even a no space) answer. - * Alan Cox : Extracted closing code better - * Alan Cox : Fixed the closing state machine to - * resemble the RFC. - * Alan Cox : More 'per spec' fixes. - * Jorge Cwik : Even faster checksumming. - * Alan Cox : tcp_data() doesn't ack illegal PSH - * only frames. At least one pc tcp stack - * generates them. - * Alan Cox : Cache last socket. - * Alan Cox : Per route irtt. - * Matt Day : poll()->select() match BSD precisely on error - * Alan Cox : New buffers - * Marc Tamsky : Various sk->prot->retransmits and - * sk->retransmits misupdating fixed. - * Fixed tcp_write_timeout: stuck close, - * and TCP syn retries gets used now. - * Mark Yarvis : In tcp_read_wakeup(), don't send an - * ack if state is TCP_CLOSED. - * Alan Cox : Look up device on a retransmit - routes may - * change. Doesn't yet cope with MSS shrink right - * but its a start! - * Marc Tamsky : Closing in closing fixes. - * Mike Shaver : RFC1122 verifications. - * Alan Cox : rcv_saddr errors. - * Alan Cox : Block double connect(). - * Alan Cox : Small hooks for enSKIP. - * Alexey Kuznetsov: Path MTU discovery. - * Alan Cox : Support soft errors. - * Alan Cox : Fix MTU discovery pathological case - * when the remote claims no mtu! - * Marc Tamsky : TCP_CLOSE fix. - * Colin (G3TNE) : Send a reset on syn ack replies in - * window but wrong (fixes NT lpd problems) - * Pedro Roque : Better TCP window handling, delayed ack. - * Joerg Reuter : No modification of locked buffers in - * tcp_do_retransmit() - * Eric Schenk : Changed receiver side silly window - * avoidance algorithm to BSD style - * algorithm. This doubles throughput - * against machines running Solaris, - * and seems to result in general - * improvement. - * Stefan Magdalinski : adjusted tcp_readable() to fix FIONREAD - * Willy Konynenberg : Transparent proxying support. - * Mike McLagan : Routing by source - * Keith Owens : Do proper merging with partial SKB's in - * tcp_do_sendmsg to avoid burstiness. - * Eric Schenk : Fix fast close down bug with - * shutdown() followed by close(). - * Andi Kleen : Make poll agree with SIGIO - * Salvatore Sanfilippo : Support SO_LINGER with linger == 1 and - * lingertime == 0 (RFC 793 ABORT Call) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or(at your option) any later version. - * - * Description of States: - * - * TCP_SYN_SENT sent a connection request, waiting for ack - * - * TCP_SYN_RECV received a connection request, sent ack, - * waiting for final ack in three-way handshake. - * - * TCP_ESTABLISHED connection established - * - * TCP_FIN_WAIT1 our side has shutdown, waiting to complete - * transmission of remaining buffered data - * - * TCP_FIN_WAIT2 all buffered data sent, waiting for remote - * to shutdown - * - * TCP_CLOSING both sides have shutdown but we still have - * data we have to finish sending - * - * TCP_TIME_WAIT timeout to catch resent junk before entering - * closed, can only be entered from FIN_WAIT2 - * or CLOSING. Required because the other end - * may not have gotten our last ACK causing it - * to retransmit the data packet (which we ignore) - * - * TCP_CLOSE_WAIT remote side has shutdown and is waiting for - * us to finish writing our data and to shutdown - * (we have to close() to move on to LAST_ACK) - * - * TCP_LAST_ACK out side has shutdown after remote has - * shutdown. There may still be data in our - * buffer that we have to finish sending - * - * TCP_CLOSE socket is finished - */ - -#if 0 -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#else -#include "linux.h" -#include "tcpcore.h" -#endif - -int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; - -#ifdef ROS_STATISTICS -struct tcp_mib tcp_statistics[NR_CPUS*2]; -#endif - -kmem_cache_t *tcp_openreq_cachep; -kmem_cache_t *tcp_bucket_cachep; -kmem_cache_t *tcp_timewait_cachep; - -#if 0 -atomic_t tcp_orphan_count = ATOMIC_INIT(0); -#endif - -int sysctl_tcp_mem[3]; -int sysctl_tcp_wmem[3] = { 4*1024, 16*1024, 128*1024 }; -int sysctl_tcp_rmem[3] = { 4*1024, 87380, 87380*2 }; - -atomic_t tcp_memory_allocated; /* Current allocated memory. */ -atomic_t tcp_sockets_allocated; /* Current number of TCP sockets. */ - -/* Pressure flag: try to collapse. - * Technical note: it is used by multiple contexts non atomically. - * All the tcp_mem_schedule() is of this nature: accounting - * is strict, actions are advisory and have some latency. */ -int tcp_memory_pressure; - -#define TCP_PAGES(amt) (((amt)+TCP_MEM_QUANTUM-1)/TCP_MEM_QUANTUM) - -int tcp_mem_schedule(struct sock *sk, int size, int kind) -{ - int amt = TCP_PAGES(size); - - sk->forward_alloc += amt*TCP_MEM_QUANTUM; - atomic_add(amt, &tcp_memory_allocated); - - /* Under limit. */ - if (atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { - if (tcp_memory_pressure) - tcp_memory_pressure = 0; - return 1; - } - - /* Over hard limit. */ - if (atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]) { - tcp_enter_memory_pressure(); - goto suppress_allocation; - } - - /* Under pressure. */ - if (atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[1]) - tcp_enter_memory_pressure(); - - if (kind) { - if (atomic_read(&sk->rmem_alloc) < sysctl_tcp_rmem[0]) - return 1; - } else { - if (sk->wmem_queued < sysctl_tcp_wmem[0]) - return 1; - } - - if (!tcp_memory_pressure || - sysctl_tcp_mem[2] > atomic_read(&tcp_sockets_allocated) - * TCP_PAGES(sk->wmem_queued+atomic_read(&sk->rmem_alloc)+ - sk->forward_alloc)) - return 1; - -suppress_allocation: - - if (kind == 0) { - tcp_moderate_sndbuf(sk); - - /* Fail only if socket is _under_ its sndbuf. - * In this case we cannot block, so that we have to fail. - */ - if (sk->wmem_queued+size >= sk->sndbuf) - return 1; - } - - /* Alas. Undo changes. */ - sk->forward_alloc -= amt*TCP_MEM_QUANTUM; - atomic_sub(amt, &tcp_memory_allocated); - return 0; -} - -void __tcp_mem_reclaim(struct sock *sk) -{ - if (sk->forward_alloc >= TCP_MEM_QUANTUM) { - atomic_sub(sk->forward_alloc/TCP_MEM_QUANTUM, &tcp_memory_allocated); - sk->forward_alloc &= (TCP_MEM_QUANTUM-1); - if (tcp_memory_pressure && - atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) - tcp_memory_pressure = 0; - } -} - -void tcp_rfree(struct sk_buff *skb) -{ - struct sock *sk = skb->sk; - - atomic_sub(skb->truesize, &sk->rmem_alloc); - sk->forward_alloc += skb->truesize; -} - -/* - * LISTEN is a special case for poll.. - */ -static __inline__ unsigned int tcp_listen_poll(struct sock *sk, poll_table *wait) -{ - return sk->tp_pinfo.af_tcp.accept_queue ? (POLLIN | POLLRDNORM) : 0; -} - -/* - * Wait for a TCP event. - * - * Note that we don't need to lock the socket, as the upper poll layers - * take care of normal races (between the test and the event) and we don't - * go look at any of the socket buffers directly. - */ -unsigned int tcp_poll(struct file * file, struct socket *sock, poll_table *wait) -{ -#if 0 - unsigned int mask; - struct sock *sk = sock->sk; - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - - poll_wait(file, sk->sleep, wait); - if (sk->state == TCP_LISTEN) - return tcp_listen_poll(sk, wait); - - /* Socket is not locked. We are protected from async events - by poll logic and correct handling of state changes - made by another threads is impossible in any case. - */ - - mask = 0; - if (sk->err) - mask = POLLERR; - - /* - * POLLHUP is certainly not done right. But poll() doesn't - * have a notion of HUP in just one direction, and for a - * socket the read side is more interesting. - * - * Some poll() documentation says that POLLHUP is incompatible - * with the POLLOUT/POLLWR flags, so somebody should check this - * all. But careful, it tends to be safer to return too many - * bits than too few, and you can easily break real applications - * if you don't tell them that something has hung up! - * - * Check-me. - * - * Check number 1. POLLHUP is _UNMASKABLE_ event (see UNIX98 and - * our fs/select.c). It means that after we received EOF, - * poll always returns immediately, making impossible poll() on write() - * in state CLOSE_WAIT. One solution is evident --- to set POLLHUP - * if and only if shutdown has been made in both directions. - * Actually, it is interesting to look how Solaris and DUX - * solve this dilemma. I would prefer, if PULLHUP were maskable, - * then we could set it on SND_SHUTDOWN. BTW examples given - * in Stevens' books assume exactly this behaviour, it explains - * why PULLHUP is incompatible with POLLOUT. --ANK - * - * NOTE. Check for TCP_CLOSE is added. The goal is to prevent - * blocking on fresh not-connected or disconnected socket. --ANK - */ - if (sk->shutdown == SHUTDOWN_MASK || sk->state == TCP_CLOSE) - mask |= POLLHUP; - if (sk->shutdown & RCV_SHUTDOWN) - mask |= POLLIN | POLLRDNORM; - - /* Connected? */ - if ((1 << sk->state) & ~(TCPF_SYN_SENT|TCPF_SYN_RECV)) { - /* Potential race condition. If read of tp below will - * escape above sk->state, we can be illegally awaken - * in SYN_* states. */ - if ((tp->rcv_nxt != tp->copied_seq) && - (tp->urg_seq != tp->copied_seq || - tp->rcv_nxt != tp->copied_seq+1 || - sk->urginline || !tp->urg_data)) - mask |= POLLIN | POLLRDNORM; - - if (!(sk->shutdown & SEND_SHUTDOWN)) { - if (tcp_wspace(sk) >= tcp_min_write_space(sk)) { - mask |= POLLOUT | POLLWRNORM; - } else { /* send SIGIO later */ - set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags); - set_bit(SOCK_NOSPACE, &sk->socket->flags); - - /* Race breaker. If space is freed after - * wspace test but before the flags are set, - * IO signal will be lost. - */ - if (tcp_wspace(sk) >= tcp_min_write_space(sk)) - mask |= POLLOUT | POLLWRNORM; - } - } - - if (tp->urg_data & TCP_URG_VALID) - mask |= POLLPRI; - } - return mask; -#else - return 0; -#endif -} - -/* - * TCP socket write_space callback. - */ -void tcp_write_space(struct sock *sk) -{ -#if 0 - struct socket *sock = sk->socket; - - if (tcp_wspace(sk) >= tcp_min_write_space(sk) && sock) { - clear_bit(SOCK_NOSPACE, &sock->flags); - - if (sk->sleep && waitqueue_active(sk->sleep)) - wake_up_interruptible(sk->sleep); - - if (sock->fasync_list && !(sk->shutdown&SEND_SHUTDOWN)) - sock_wake_async(sock, 2, POLL_OUT); - } -#endif -} - -int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - int answ; - - switch(cmd) { - case SIOCINQ: - if (sk->state == TCP_LISTEN) - return(-EINVAL); - - lock_sock(sk); - if ((1<state) & (TCPF_SYN_SENT|TCPF_SYN_RECV)) - answ = 0; - else if (sk->urginline || !tp->urg_data || - before(tp->urg_seq,tp->copied_seq) || - !before(tp->urg_seq,tp->rcv_nxt)) { - answ = tp->rcv_nxt - tp->copied_seq; - - /* Subtract 1, if FIN is in queue. */ - if (answ && !skb_queue_empty(&sk->receive_queue)) - answ -= ((struct sk_buff*)sk->receive_queue.prev)->h.th->fin; - } else - answ = tp->urg_seq - tp->copied_seq; - release_sock(sk); - break; - case SIOCATMARK: - { - answ = tp->urg_data && tp->urg_seq == tp->copied_seq; - break; - } - case SIOCOUTQ: - if (sk->state == TCP_LISTEN) - return(-EINVAL); - - if ((1<state) & (TCPF_SYN_SENT|TCPF_SYN_RECV)) - answ = 0; - else - answ = tp->write_seq - tp->snd_una; - break; - default: - return(-ENOIOCTLCMD); - }; - - return put_user(answ, (int *)arg); -#else -return 0; -#endif -} - - -int tcp_listen_start(struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct tcp_listen_opt *lopt; - - sk->max_ack_backlog = 0; - sk->ack_backlog = 0; - tp->accept_queue = tp->accept_queue_tail = NULL; - tp->syn_wait_lock = RW_LOCK_UNLOCKED; - tcp_delack_init(tp); - - lopt = kmalloc(sizeof(struct tcp_listen_opt), GFP_KERNEL); - if (!lopt) - return -ENOMEM; - - memset(lopt, 0, sizeof(struct tcp_listen_opt)); - for (lopt->max_qlen_log = 6; ; lopt->max_qlen_log++) - if ((1<max_qlen_log) >= sysctl_max_syn_backlog) - break; - - write_lock_bh(&tp->syn_wait_lock); - tp->listen_opt = lopt; - write_unlock_bh(&tp->syn_wait_lock); - - /* There is race window here: we announce ourselves listening, - * but this transition is still not validated by get_port(). - * It is OK, because this socket enters to hash table only - * after validation is complete. - */ - sk->state = TCP_LISTEN; - if (sk->prot->get_port(sk, sk->num) == 0) { - sk->sport = htons(sk->num); - - sk_dst_reset(sk); - sk->prot->hash(sk); - - return 0; - } - - sk->state = TCP_CLOSE; - write_lock_bh(&tp->syn_wait_lock); - tp->listen_opt = NULL; - write_unlock_bh(&tp->syn_wait_lock); - kfree(lopt); - return -EADDRINUSE; -#endif -} - -/* - * This routine closes sockets which have been at least partially - * opened, but not yet accepted. - */ - -static void tcp_listen_stop (struct sock *sk) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct tcp_listen_opt *lopt = tp->listen_opt; - struct open_request *acc_req = tp->accept_queue; - struct open_request *req; - int i; - - tcp_delete_keepalive_timer(sk); - - /* make all the listen_opt local to us */ - write_lock_bh(&tp->syn_wait_lock); - tp->listen_opt =NULL; - write_unlock_bh(&tp->syn_wait_lock); - tp->accept_queue = tp->accept_queue_tail = NULL; - - if (lopt->qlen) { - for (i=0; isyn_table[i]) != NULL) { - lopt->syn_table[i] = req->dl_next; - lopt->qlen--; - tcp_openreq_free(req); - - /* Following specs, it would be better either to send FIN - * (and enter FIN-WAIT-1, it is normal close) - * or to send active reset (abort). - * Certainly, it is pretty dangerous while synflood, but it is - * bad justification for our negligence 8) - * To be honest, we are not able to make either - * of the variants now. --ANK - */ - } - } - } - BUG_TRAP(lopt->qlen == 0); - - kfree(lopt); - - while ((req=acc_req) != NULL) { - struct sock *child = req->sk; - - acc_req = req->dl_next; - - local_bh_disable(); - bh_lock_sock(child); - BUG_TRAP(child->lock.users==0); - sock_hold(child); - - tcp_disconnect(child, O_NONBLOCK); - - sock_orphan(child); - - atomic_inc(&tcp_orphan_count); - - tcp_destroy_sock(child); - - bh_unlock_sock(child); - local_bh_enable(); - sock_put(child); - - tcp_acceptq_removed(sk); - tcp_openreq_fastfree(req); - } - BUG_TRAP(sk->ack_backlog == 0); -#endif -} - -/* - * Wait for a socket to get into the connected state - * - * Note: Must be called with the socket locked. - */ -static int wait_for_tcp_connect(struct sock * sk, int flags, long *timeo_p) -{ -#if 0 - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - - while((1 << sk->state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) { - if(sk->err) - return sock_error(sk); - if((1 << sk->state) & - ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) - return -EPIPE; - if(!*timeo_p) - return -EAGAIN; - if(signal_pending(tsk)) - return sock_intr_errno(*timeo_p); - - __set_task_state(tsk, TASK_INTERRUPTIBLE); - add_wait_queue(sk->sleep, &wait); - sk->tp_pinfo.af_tcp.write_pending++; - - release_sock(sk); - *timeo_p = schedule_timeout(*timeo_p); - lock_sock(sk); - - __set_task_state(tsk, TASK_RUNNING); - remove_wait_queue(sk->sleep, &wait); - sk->tp_pinfo.af_tcp.write_pending--; - } - return 0; -#else - return 0; -#endif -} - -static inline int tcp_memory_free(struct sock *sk) -{ - return sk->wmem_queued < sk->sndbuf; -} - -/* - * Wait for more memory for a socket - */ -static int wait_for_tcp_memory(struct sock * sk, long *timeo) -{ -#if 0 - int err = 0; - long vm_wait = 0; - long current_timeo = *timeo; - DECLARE_WAITQUEUE(wait, current); - - if (tcp_memory_free(sk)) - current_timeo = vm_wait = (net_random()%(HZ/5))+2; - - add_wait_queue(sk->sleep, &wait); - for (;;) { - set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags); - - set_current_state(TASK_INTERRUPTIBLE); - - if (sk->err || (sk->shutdown & SEND_SHUTDOWN)) - goto do_error; - if (!*timeo) - goto do_nonblock; - if (signal_pending(current)) - goto do_interrupted; - clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags); - if (tcp_memory_free(sk) && !vm_wait) - break; - - set_bit(SOCK_NOSPACE, &sk->socket->flags); - sk->tp_pinfo.af_tcp.write_pending++; - release_sock(sk); - if (!tcp_memory_free(sk) || vm_wait) - current_timeo = schedule_timeout(current_timeo); - lock_sock(sk); - sk->tp_pinfo.af_tcp.write_pending--; - - if (vm_wait) { - vm_wait -= current_timeo; - current_timeo = *timeo; - if (current_timeo != MAX_SCHEDULE_TIMEOUT && - (current_timeo -= vm_wait) < 0) - current_timeo = 0; - vm_wait = 0; - } - *timeo = current_timeo; - } -out: - current->state = TASK_RUNNING; - remove_wait_queue(sk->sleep, &wait); - return err; - -do_error: - err = -EPIPE; - goto out; -do_nonblock: - err = -EAGAIN; - goto out; -do_interrupted: - err = sock_intr_errno(*timeo); - goto out; -#endif -} - -ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags); - -static inline int -can_coalesce(struct sk_buff *skb, int i, struct page *page, int off) -{ -#if 0 - if (i) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; - return page == frag->page && - off == frag->page_offset+frag->size; - } - return 0; -#else -return 0; -#endif -} - -static inline void -fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size) -{ - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - frag->page = page; - frag->page_offset = off; - frag->size = size; - skb_shinfo(skb)->nr_frags = i+1; -} - -static inline void tcp_mark_push(struct tcp_opt *tp, struct sk_buff *skb) -{ - TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; - tp->pushed_seq = tp->write_seq; -} - -static inline int forced_push(struct tcp_opt *tp) -{ - return after(tp->write_seq, tp->pushed_seq + (tp->max_window>>1)); -} - -static inline void -skb_entail(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb) -{ - skb->csum = 0; - TCP_SKB_CB(skb)->seq = tp->write_seq; - TCP_SKB_CB(skb)->end_seq = tp->write_seq; - TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK; - TCP_SKB_CB(skb)->sacked = 0; - __skb_queue_tail(&sk->write_queue, skb); - tcp_charge_skb(sk, skb); - if (tp->send_head == NULL) - tp->send_head = skb; -} - -static inline void -tcp_mark_urg(struct tcp_opt *tp, int flags, struct sk_buff *skb) -{ -#if 0 - if (flags & MSG_OOB) { - tp->urg_mode = 1; - tp->snd_up = tp->write_seq; - TCP_SKB_CB(skb)->sacked |= TCPCB_URG; - } -#endif -} - -static inline void -tcp_push(struct sock *sk, struct tcp_opt *tp, int flags, int mss_now, int nonagle) -{ -#if 0 - if (tp->send_head) { - struct sk_buff *skb = sk->write_queue.prev; - if (!(flags&MSG_MORE) || forced_push(tp)) - tcp_mark_push(tp, skb); - tcp_mark_urg(tp, flags, skb); - __tcp_push_pending_frames(sk, tp, mss_now, (flags&MSG_MORE) ? 2 : nonagle); - } -#endif -} - -static int tcp_error(struct sock *sk, int flags, int err) -{ -#if 0 - if (err == -EPIPE) - err = sock_error(sk) ? : -EPIPE; - if (err == -EPIPE && !(flags&MSG_NOSIGNAL)) - send_sig(SIGPIPE, current, 0); - return err; -#else - return 0; -#endif -} - -ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - int mss_now; - int err; - ssize_t copied; - long timeo = sock_sndtimeo(sk, flags&MSG_DONTWAIT); - - /* Wait for a connection to finish. */ - if ((1 << sk->state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) - if((err = wait_for_tcp_connect(sk, 0, &timeo)) != 0) - goto out_err; - - clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags); - - mss_now = tcp_current_mss(sk); - copied = 0; - - err = -EPIPE; - if (sk->err || (sk->shutdown & SEND_SHUTDOWN)) - goto do_error; - - while (psize > 0) { - struct sk_buff *skb = sk->write_queue.prev; - int offset, size, copy, i; - struct page *page; - - page = pages[poffset/PAGE_SIZE]; - offset = poffset % PAGE_SIZE; - size = min_t(size_t, psize, PAGE_SIZE-offset); - - if (tp->send_head==NULL || (copy = mss_now - skb->len) <= 0) { -new_segment: - if (!tcp_memory_free(sk)) - goto wait_for_sndbuf; - - skb = tcp_alloc_pskb(sk, 0, tp->mss_cache, sk->allocation); - if (skb == NULL) - goto wait_for_memory; - - skb_entail(sk, tp, skb); - copy = mss_now; - } - - if (copy > size) - copy = size; - - i = skb_shinfo(skb)->nr_frags; - if (can_coalesce(skb, i, page, offset)) { - skb_shinfo(skb)->frags[i-1].size += copy; - } else if (i < MAX_SKB_FRAGS) { - get_page(page); - fill_page_desc(skb, i, page, offset, copy); - } else { - tcp_mark_push(tp, skb); - goto new_segment; - } - - skb->len += copy; - skb->data_len += copy; - skb->ip_summed = CHECKSUM_HW; - tp->write_seq += copy; - TCP_SKB_CB(skb)->end_seq += copy; - - if (!copied) - TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; - - copied += copy; - poffset += copy; - if (!(psize -= copy)) - goto out; - - if (skb->len != mss_now || (flags&MSG_OOB)) - continue; - - if (forced_push(tp)) { - tcp_mark_push(tp, skb); - __tcp_push_pending_frames(sk, tp, mss_now, 1); - } else if (skb == tp->send_head) - tcp_push_one(sk, mss_now); - continue; - -wait_for_sndbuf: - set_bit(SOCK_NOSPACE, &sk->socket->flags); -wait_for_memory: - if (copied) - tcp_push(sk, tp, flags&~MSG_MORE, mss_now, 1); - - if ((err = wait_for_tcp_memory(sk, &timeo)) != 0) - goto do_error; - - mss_now = tcp_current_mss(sk); - } - -out: - if (copied) - tcp_push(sk, tp, flags, mss_now, tp->nonagle); - return copied; - -do_error: - if (copied) - goto out; -out_err: - return tcp_error(sk, flags, err); -#else -return 0; -#endif -} - -ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) -{ -#if 0 - ssize_t res; - struct sock *sk = sock->sk; - -#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) - - if (!(sk->route_caps & NETIF_F_SG) || - !(sk->route_caps & TCP_ZC_CSUM_FLAGS)) - return sock_no_sendpage(sock, page, offset, size, flags); - -#undef TCP_ZC_CSUM_FLAGS - - lock_sock(sk); - TCP_CHECK_TIMER(sk); - res = do_tcp_sendpages(sk, &page, offset, size, flags); - TCP_CHECK_TIMER(sk); - release_sock(sk); - return res; -#else - return 0; -#endif -} - -#define TCP_PAGE(sk) (sk->tp_pinfo.af_tcp.sndmsg_page) -#define TCP_OFF(sk) (sk->tp_pinfo.af_tcp.sndmsg_off) - -static inline int -tcp_copy_to_page(struct sock *sk, char *from, struct sk_buff *skb, - struct page *page, int off, int copy) -{ - int err = 0; - unsigned int csum; - - csum = csum_and_copy_from_user(from, page_address(page)+off, - copy, 0, &err); - if (!err) { - if (skb->ip_summed == CHECKSUM_NONE) - skb->csum = csum_block_add(skb->csum, csum, skb->len); - skb->len += copy; - skb->data_len += copy; - skb->truesize += copy; - sk->wmem_queued += copy; - sk->forward_alloc -= copy; - } - return err; -} - -static inline int -skb_add_data(struct sk_buff *skb, char *from, int copy) -{ -#if 0 - int err = 0; - unsigned int csum; - int off = skb->len; - - csum = csum_and_copy_from_user(from, skb_put(skb, copy), - copy, 0, &err); - if (!err) { - skb->csum = csum_block_add(skb->csum, csum, off); - return 0; - } - - __skb_trim(skb, off); - return -EFAULT; -#else -return 0; -#endif -} - -static inline int select_size(struct sock *sk, struct tcp_opt *tp) -{ -#if 0 - int tmp = tp->mss_cache; - - if (sk->route_caps&NETIF_F_SG) { - int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER); - - if (tmp >= pgbreak && tmp <= pgbreak + (MAX_SKB_FRAGS-1)*PAGE_SIZE) - tmp = pgbreak; - } - return tmp; -#else - return 0; -#endif -} - -int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size) -{ -#if 0 - struct iovec *iov; - struct tcp_opt *tp; - struct sk_buff *skb; - int iovlen, flags; - int mss_now; - int err, copied; - long timeo; - - tp = &(sk->tp_pinfo.af_tcp); - - lock_sock(sk); - TCP_CHECK_TIMER(sk); - - flags = msg->msg_flags; - timeo = sock_sndtimeo(sk, flags&MSG_DONTWAIT); - - /* Wait for a connection to finish. */ - if ((1 << sk->state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) - if((err = wait_for_tcp_connect(sk, flags, &timeo)) != 0) - goto out_err; - - /* This should be in poll */ - clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags); - - mss_now = tcp_current_mss(sk); - - /* Ok commence sending. */ - iovlen = msg->msg_iovlen; - iov = msg->msg_iov; - copied = 0; - - err = -EPIPE; - if (sk->err || (sk->shutdown&SEND_SHUTDOWN)) - goto do_error; - - while (--iovlen >= 0) { - int seglen=iov->iov_len; - unsigned char * from=iov->iov_base; - - iov++; - - while (seglen > 0) { - int copy; - - skb = sk->write_queue.prev; - - if (tp->send_head == NULL || - (copy = mss_now - skb->len) <= 0) { - -new_segment: - /* Allocate new segment. If the interface is SG, - * allocate skb fitting to single page. - */ - if (!tcp_memory_free(sk)) - goto wait_for_sndbuf; - - skb = tcp_alloc_pskb(sk, select_size(sk, tp), 0, sk->allocation); - if (skb == NULL) - goto wait_for_memory; - - skb_entail(sk, tp, skb); - copy = mss_now; - } - - /* Try to append data to the end of skb. */ - if (copy > seglen) - copy = seglen; - - /* Where to copy to? */ - if (skb_tailroom(skb) > 0) { - /* We have some space in skb head. Superb! */ - if (copy > skb_tailroom(skb)) - copy = skb_tailroom(skb); - if ((err = skb_add_data(skb, from, copy)) != 0) - goto do_fault; - } else { - int merge = 0; - int i = skb_shinfo(skb)->nr_frags; - struct page *page = TCP_PAGE(sk); - int off = TCP_OFF(sk); - - if (can_coalesce(skb, i, page, off) && off != PAGE_SIZE) { - /* We can extend the last page fragment. */ - merge = 1; - } else if (i == MAX_SKB_FRAGS || - (i == 0 && !(sk->route_caps&NETIF_F_SG))) { - /* Need to add new fragment and cannot - * do this because interface is non-SG, - * or because all the page slots are busy. - */ - tcp_mark_push(tp, skb); - goto new_segment; - } else if (page) { - /* If page is cached, align - * offset to L1 cache boundary - */ - off = (off+L1_CACHE_BYTES-1)&~(L1_CACHE_BYTES-1); - if (off == PAGE_SIZE) { - put_page(page); - TCP_PAGE(sk) = page = NULL; - } - } - - if (!page) { - /* Allocate new cache page. */ - if (!(page=tcp_alloc_page(sk))) - goto wait_for_memory; - off = 0; - } - - if (copy > PAGE_SIZE-off) - copy = PAGE_SIZE-off; - - /* Time to copy data. We are close to the end! */ - err = tcp_copy_to_page(sk, from, skb, page, off, copy); - if (err) { - /* If this page was new, give it to the - * socket so it does not get leaked. - */ - if (TCP_PAGE(sk) == NULL) { - TCP_PAGE(sk) = page; - TCP_OFF(sk) = 0; - } - goto do_error; - } - - /* Update the skb. */ - if (merge) { - skb_shinfo(skb)->frags[i-1].size += copy; - } else { - fill_page_desc(skb, i, page, off, copy); - if (TCP_PAGE(sk)) { - get_page(page); - } else if (off + copy < PAGE_SIZE) { - get_page(page); - TCP_PAGE(sk) = page; - } - } - - TCP_OFF(sk) = off+copy; - } - - if (!copied) - TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; - - tp->write_seq += copy; - TCP_SKB_CB(skb)->end_seq += copy; - - from += copy; - copied += copy; - if ((seglen -= copy) == 0 && iovlen == 0) - goto out; - - if (skb->len != mss_now || (flags&MSG_OOB)) - continue; - - if (forced_push(tp)) { - tcp_mark_push(tp, skb); - __tcp_push_pending_frames(sk, tp, mss_now, 1); - } else if (skb == tp->send_head) - tcp_push_one(sk, mss_now); - continue; - -wait_for_sndbuf: - set_bit(SOCK_NOSPACE, &sk->socket->flags); -wait_for_memory: - if (copied) - tcp_push(sk, tp, flags&~MSG_MORE, mss_now, 1); - - if ((err = wait_for_tcp_memory(sk, &timeo)) != 0) - goto do_error; - - mss_now = tcp_current_mss(sk); - } - } - -out: - if (copied) - tcp_push(sk, tp, flags, mss_now, tp->nonagle); - TCP_CHECK_TIMER(sk); - release_sock(sk); - return copied; - -do_fault: - if (skb->len == 0) { - if (tp->send_head == skb) - tp->send_head = NULL; - __skb_unlink(skb, skb->list); - tcp_free_skb(sk, skb); - } - -do_error: - if (copied) - goto out; -out_err: - err = tcp_error(sk, flags, err); - TCP_CHECK_TIMER(sk); - release_sock(sk); - return err; -#else - return 0; -#endif -} - -/* - * Handle reading urgent data. BSD has very simple semantics for - * this, no blocking and very strange errors 8) - */ - -static int tcp_recv_urg(struct sock * sk, long timeo, - struct msghdr *msg, int len, int flags, - int *addr_len) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - - /* No URG data to read. */ - if (sk->urginline || !tp->urg_data || tp->urg_data == TCP_URG_READ) - return -EINVAL; /* Yes this is right ! */ - - if (sk->state==TCP_CLOSE && !sk->done) - return -ENOTCONN; - - if (tp->urg_data & TCP_URG_VALID) { - int err = 0; - char c = tp->urg_data; - - if (!(flags & MSG_PEEK)) - tp->urg_data = TCP_URG_READ; - - /* Read urgent data. */ - msg->msg_flags|=MSG_OOB; - - if(len>0) { - if (!(flags & MSG_TRUNC)) - err = memcpy_toiovec(msg->msg_iov, &c, 1); - len = 1; - } else - msg->msg_flags|=MSG_TRUNC; - - return err ? -EFAULT : len; - } - - if (sk->state == TCP_CLOSE || (sk->shutdown & RCV_SHUTDOWN)) - return 0; - - /* Fixed the recv(..., MSG_OOB) behaviour. BSD docs and - * the available implementations agree in this case: - * this call should never block, independent of the - * blocking state of the socket. - * Mike - */ - return -EAGAIN; -#else -return 0; -#endif -} - -/* - * Release a skb if it is no longer needed. This routine - * must be called with interrupts disabled or with the - * socket locked so that the sk_buff queue operation is ok. - */ - -static inline void tcp_eat_skb(struct sock *sk, struct sk_buff * skb) -{ -#if 0 - __skb_unlink(skb, &sk->receive_queue); - __kfree_skb(skb); -#endif -} - -/* Clean up the receive buffer for full frames taken by the user, - * then send an ACK if necessary. COPIED is the number of bytes - * tcp_recvmsg has given to the user so far, it speeds up the - * calculation of whether or not we must ACK for the sake of - * a window update. - */ -static void cleanup_rbuf(struct sock *sk, int copied) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - int time_to_ack = 0; - -#if TCP_DEBUG - struct sk_buff *skb = skb_peek(&sk->receive_queue); - - BUG_TRAP(skb==NULL || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)); -#endif - - if (tcp_ack_scheduled(tp)) { - /* Delayed ACKs frequently hit locked sockets during bulk receive. */ - if (tp->ack.blocked - /* Once-per-two-segments ACK was not sent by tcp_input.c */ - || tp->rcv_nxt - tp->rcv_wup > tp->ack.rcv_mss - /* - * If this read emptied read buffer, we send ACK, if - * connection is not bidirectional, user drained - * receive buffer and there was a small segment - * in queue. - */ - || (copied > 0 && - (tp->ack.pending&TCP_ACK_PUSHED) && - !tp->ack.pingpong && - atomic_read(&sk->rmem_alloc) == 0)) { - time_to_ack = 1; - } - } - - /* We send an ACK if we can now advertise a non-zero window - * which has been raised "significantly". - * - * Even if window raised up to infinity, do not send window open ACK - * in states, where we will not receive more. It is useless. - */ - if(copied > 0 && !time_to_ack && !(sk->shutdown&RCV_SHUTDOWN)) { - __u32 rcv_window_now = tcp_receive_window(tp); - - /* Optimize, __tcp_select_window() is not cheap. */ - if (2*rcv_window_now <= tp->window_clamp) { - __u32 new_window = __tcp_select_window(sk); - - /* Send ACK now, if this read freed lots of space - * in our buffer. Certainly, new_window is new window. - * We can advertise it now, if it is not less than current one. - * "Lots" means "at least twice" here. - */ - if(new_window && new_window >= 2*rcv_window_now) - time_to_ack = 1; - } - } - if (time_to_ack) - tcp_send_ack(sk); -#endif -} - -/* Now socket state including sk->err is changed only under lock, - * hence we may omit checks after joining wait queue. - * We check receive queue before schedule() only as optimization; - * it is very likely that release_sock() added new data. - */ - -static long tcp_data_wait(struct sock *sk, long timeo) -{ -#if 0 - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue(sk->sleep, &wait); - - __set_current_state(TASK_INTERRUPTIBLE); - - set_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags); - release_sock(sk); - - if (skb_queue_empty(&sk->receive_queue)) - timeo = schedule_timeout(timeo); - - lock_sock(sk); - clear_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags); - - remove_wait_queue(sk->sleep, &wait); - __set_current_state(TASK_RUNNING); - return timeo; -#else - return 0; -#endif -} - -static void tcp_prequeue_process(struct sock *sk) -{ -#if 0 - struct sk_buff *skb; - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - - net_statistics[smp_processor_id()*2+1].TCPPrequeued += skb_queue_len(&tp->ucopy.prequeue); - - /* RX process wants to run with disabled BHs, though it is not necessary */ - local_bh_disable(); - while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) - sk->backlog_rcv(sk, skb); - local_bh_enable(); - - /* Clear memory counter. */ - tp->ucopy.memory = 0; -#endif -} - -static inline -struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) -{ -#if 0 - struct sk_buff *skb; - u32 offset; - - skb_queue_walk(&sk->receive_queue, skb) { - offset = seq - TCP_SKB_CB(skb)->seq; - if (skb->h.th->syn) - offset--; - if (offset < skb->len || skb->h.th->fin) { - *off = offset; - return skb; - } - } - return NULL; -#else - return NULL; -#endif -} - -/* - * This routine provides an alternative to tcp_recvmsg() for routines - * that would like to handle copying from skbuffs directly in 'sendfile' - * fashion. - * Note: - * - It is assumed that the socket was locked by the caller. - * - The routine does not block. - * - At present, there is no support for reading OOB data - * or for 'peeking' the socket using this routine - * (although both would be easy to implement). - */ -int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor) -{ -#if 0 - struct sk_buff *skb; - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - u32 seq = tp->copied_seq; - u32 offset; - int copied = 0; - - if (sk->state == TCP_LISTEN) - return -ENOTCONN; - while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) { - if (offset < skb->len) { - size_t used, len; - - len = skb->len - offset; - /* Stop reading if we hit a patch of urgent data */ - if (tp->urg_data) { - u32 urg_offset = tp->urg_seq - seq; - if (urg_offset < len) - len = urg_offset; - if (!len) - break; - } - used = recv_actor(desc, skb, offset, len); - if (used <= len) { - seq += used; - copied += used; - offset += used; - } - if (offset != skb->len) - break; - } - if (skb->h.th->fin) { - tcp_eat_skb(sk, skb); - ++seq; - break; - } - tcp_eat_skb(sk, skb); - if (!desc->count) - break; - } - tp->copied_seq = seq; - /* Clean up data we have read: This will do ACK frames. */ - if (copied) - cleanup_rbuf(sk, copied); - return copied; -#else -#endif -} - -/* - * This routine copies from a sock struct into the user buffer. - * - * Technical note: in 2.3 we work on _locked_ socket, so that - * tricks with *seq access order and skb->users are not required. - * Probably, code can be easily improved even more. - */ - -int tcp_recvmsg(struct sock *sk, struct msghdr *msg, - int len, int nonblock, int flags, int *addr_len) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - int copied = 0; - u32 peek_seq; - u32 *seq; - unsigned long used; - int err; - int target; /* Read at least this many bytes */ - long timeo; - struct task_struct *user_recv = NULL; - - lock_sock(sk); - - TCP_CHECK_TIMER(sk); - - err = -ENOTCONN; - if (sk->state == TCP_LISTEN) - goto out; - - timeo = sock_rcvtimeo(sk, nonblock); - - /* Urgent data needs to be handled specially. */ - if (flags & MSG_OOB) - goto recv_urg; - - seq = &tp->copied_seq; - if (flags & MSG_PEEK) { - peek_seq = tp->copied_seq; - seq = &peek_seq; - } - - target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); - - do { - struct sk_buff * skb; - u32 offset; - - /* Are we at urgent data? Stop if we have read anything. */ - if (copied && tp->urg_data && tp->urg_seq == *seq) - break; - - /* We need to check signals first, to get correct SIGURG - * handling. FIXME: Need to check this doesn't impact 1003.1g - * and move it down to the bottom of the loop - */ - if (signal_pending(current)) { - if (copied) - break; - copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; - break; - } - - /* Next get a buffer. */ - - skb = skb_peek(&sk->receive_queue); - do { - if (!skb) - break; - - /* Now that we have two receive queues this - * shouldn't happen. - */ - if (before(*seq, TCP_SKB_CB(skb)->seq)) { - printk(KERN_INFO "recvmsg bug: copied %X seq %X\n", - *seq, TCP_SKB_CB(skb)->seq); - break; - } - offset = *seq - TCP_SKB_CB(skb)->seq; - if (skb->h.th->syn) - offset--; - if (offset < skb->len) - goto found_ok_skb; - if (skb->h.th->fin) - goto found_fin_ok; - BUG_TRAP(flags&MSG_PEEK); - skb = skb->next; - } while (skb != (struct sk_buff *)&sk->receive_queue); - - /* Well, if we have backlog, try to process it now yet. */ - - if (copied >= target && sk->backlog.tail == NULL) - break; - - if (copied) { - if (sk->err || - sk->state == TCP_CLOSE || - (sk->shutdown & RCV_SHUTDOWN) || - !timeo || - (flags & MSG_PEEK)) - break; - } else { - if (sk->done) - break; - - if (sk->err) { - copied = sock_error(sk); - break; - } - - if (sk->shutdown & RCV_SHUTDOWN) - break; - - if (sk->state == TCP_CLOSE) { - if (!sk->done) { - /* This occurs when user tries to read - * from never connected socket. - */ - copied = -ENOTCONN; - break; - } - break; - } - - if (!timeo) { - copied = -EAGAIN; - break; - } - } - - cleanup_rbuf(sk, copied); - - if (tp->ucopy.task == user_recv) { - /* Install new reader */ - if (user_recv == NULL && !(flags&(MSG_TRUNC|MSG_PEEK))) { - user_recv = current; - tp->ucopy.task = user_recv; - tp->ucopy.iov = msg->msg_iov; - } - - tp->ucopy.len = len; - - BUG_TRAP(tp->copied_seq == tp->rcv_nxt || (flags&(MSG_PEEK|MSG_TRUNC))); - - /* Ugly... If prequeue is not empty, we have to - * process it before releasing socket, otherwise - * order will be broken at second iteration. - * More elegant solution is required!!! - * - * Look: we have the following (pseudo)queues: - * - * 1. packets in flight - * 2. backlog - * 3. prequeue - * 4. receive_queue - * - * Each queue can be processed only if the next ones - * are empty. At this point we have empty receive_queue. - * But prequeue _can_ be not empty after second iteration, - * when we jumped to start of loop because backlog - * processing added something to receive_queue. - * We cannot release_sock(), because backlog contains - * packets arrived _after_ prequeued ones. - * - * Shortly, algorithm is clear --- to process all - * the queues in order. We could make it more directly, - * requeueing packets from backlog to prequeue, if - * is not empty. It is more elegant, but eats cycles, - * unfortunately. - */ - if (skb_queue_len(&tp->ucopy.prequeue)) - goto do_prequeue; - - /* __ Set realtime policy in scheduler __ */ - } - - if (copied >= target) { - /* Do not sleep, just process backlog. */ - release_sock(sk); - lock_sock(sk); - } else { - timeo = tcp_data_wait(sk, timeo); - } - - if (user_recv) { - int chunk; - - /* __ Restore normal policy in scheduler __ */ - - if ((chunk = len - tp->ucopy.len) != 0) { - net_statistics[smp_processor_id()*2+1].TCPDirectCopyFromBacklog += chunk; - len -= chunk; - copied += chunk; - } - - if (tp->rcv_nxt == tp->copied_seq && - skb_queue_len(&tp->ucopy.prequeue)) { -do_prequeue: - tcp_prequeue_process(sk); - - if ((chunk = len - tp->ucopy.len) != 0) { - net_statistics[smp_processor_id()*2+1].TCPDirectCopyFromPrequeue += chunk; - len -= chunk; - copied += chunk; - } - } - } - if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) { - if (net_ratelimit()) - printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n", - current->comm, current->pid); - peek_seq = tp->copied_seq; - } - continue; - - found_ok_skb: - /* Ok so how much can we use? */ - used = skb->len - offset; - if (len < used) - used = len; - - /* Do we have urgent data here? */ - if (tp->urg_data) { - u32 urg_offset = tp->urg_seq - *seq; - if (urg_offset < used) { - if (!urg_offset) { - if (!sk->urginline) { - ++*seq; - offset++; - used--; - if (!used) - goto skip_copy; - } - } else - used = urg_offset; - } - } - - if (!(flags&MSG_TRUNC)) { - err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, used); - if (err) { - /* Exception. Bailout! */ - if (!copied) - copied = -EFAULT; - break; - } - } - - *seq += used; - copied += used; - len -= used; - -skip_copy: - if (tp->urg_data && after(tp->copied_seq,tp->urg_seq)) { - tp->urg_data = 0; - tcp_fast_path_check(sk, tp); - } - if (used + offset < skb->len) - continue; - - if (skb->h.th->fin) - goto found_fin_ok; - if (!(flags & MSG_PEEK)) - tcp_eat_skb(sk, skb); - continue; - - found_fin_ok: - /* Process the FIN. */ - ++*seq; - if (!(flags & MSG_PEEK)) - tcp_eat_skb(sk, skb); - break; - } while (len > 0); - - if (user_recv) { - if (skb_queue_len(&tp->ucopy.prequeue)) { - int chunk; - - tp->ucopy.len = copied > 0 ? len : 0; - - tcp_prequeue_process(sk); - - if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) { - net_statistics[smp_processor_id()*2+1].TCPDirectCopyFromPrequeue += chunk; - len -= chunk; - copied += chunk; - } - } - - tp->ucopy.task = NULL; - tp->ucopy.len = 0; - } - - /* According to UNIX98, msg_name/msg_namelen are ignored - * on connected socket. I was just happy when found this 8) --ANK - */ - - /* Clean up data we have read: This will do ACK frames. */ - cleanup_rbuf(sk, copied); - - TCP_CHECK_TIMER(sk); - release_sock(sk); - return copied; - -out: - TCP_CHECK_TIMER(sk); - release_sock(sk); - return err; - -recv_urg: - err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len); - goto out; -#else - return 0; -#endif -} - -/* - * State processing on a close. This implements the state shift for - * sending our FIN frame. Note that we only send a FIN for some - * states. A shutdown() may have already sent the FIN, or we may be - * closed. - */ - -static unsigned char new_state[16] = { - /* current state: new state: action: */ - /* (Invalid) */ TCP_CLOSE, - /* TCP_ESTABLISHED */ TCP_FIN_WAIT1 | TCP_ACTION_FIN, - /* TCP_SYN_SENT */ TCP_CLOSE, - /* TCP_SYN_RECV */ TCP_FIN_WAIT1 | TCP_ACTION_FIN, - /* TCP_FIN_WAIT1 */ TCP_FIN_WAIT1, - /* TCP_FIN_WAIT2 */ TCP_FIN_WAIT2, - /* TCP_TIME_WAIT */ TCP_CLOSE, - /* TCP_CLOSE */ TCP_CLOSE, - /* TCP_CLOSE_WAIT */ TCP_LAST_ACK | TCP_ACTION_FIN, - /* TCP_LAST_ACK */ TCP_LAST_ACK, - /* TCP_LISTEN */ TCP_CLOSE, - /* TCP_CLOSING */ TCP_CLOSING, -}; - -static int tcp_close_state(struct sock *sk) -{ -#if 0 - int next = (int) new_state[sk->state]; - int ns = (next & TCP_STATE_MASK); - - tcp_set_state(sk, ns); - - return (next & TCP_ACTION_FIN); -#else - return 0; -#endif -} - -/* - * Shutdown the sending side of a connection. Much like close except - * that we don't receive shut down or set sk->dead. - */ - -void tcp_shutdown(struct sock *sk, int how) -{ -#if 0 - /* We need to grab some memory, and put together a FIN, - * and then put it into the queue to be sent. - * Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92. - */ - if (!(how & SEND_SHUTDOWN)) - return; - - /* If we've already sent a FIN, or it's a closed state, skip this. */ - if ((1 << sk->state) & - (TCPF_ESTABLISHED|TCPF_SYN_SENT|TCPF_SYN_RECV|TCPF_CLOSE_WAIT)) { - /* Clear out any half completed packets. FIN if needed. */ - if (tcp_close_state(sk)) - tcp_send_fin(sk); - } -#endif -} - - -/* - * Return 1 if we still have things to send in our buffers. - */ - -static inline int closing(struct sock * sk) -{ -#if 0 - return ((1 << sk->state) & (TCPF_FIN_WAIT1|TCPF_CLOSING|TCPF_LAST_ACK)); -#else - return 0; -#endif -} - -static __inline__ void tcp_kill_sk_queues(struct sock *sk) -{ -#if 0 - /* First the read buffer. */ - __skb_queue_purge(&sk->receive_queue); - - /* Next, the error queue. */ - __skb_queue_purge(&sk->error_queue); - - /* Next, the write queue. */ - BUG_TRAP(skb_queue_empty(&sk->write_queue)); - - /* Account for returned memory. */ - tcp_mem_reclaim(sk); - - BUG_TRAP(sk->wmem_queued == 0); - BUG_TRAP(sk->forward_alloc == 0); - - /* It is _impossible_ for the backlog to contain anything - * when we get here. All user references to this socket - * have gone away, only the net layer knows can touch it. - */ -#endif -} - -/* - * At this point, there should be no process reference to this - * socket, and thus no user references at all. Therefore we - * can assume the socket waitqueue is inactive and nobody will - * try to jump onto it. - */ -void tcp_destroy_sock(struct sock *sk) -{ -#if 0 - BUG_TRAP(sk->state==TCP_CLOSE); - BUG_TRAP(sk->dead); - - /* It cannot be in hash table! */ - BUG_TRAP(sk->pprev==NULL); - - /* If it has not 0 sk->num, it must be bound */ - BUG_TRAP(!sk->num || sk->prev!=NULL); - -#ifdef TCP_DEBUG - if (sk->zapped) { - printk(KERN_DEBUG "TCP: double destroy sk=%p\n", sk); - sock_hold(sk); - } - sk->zapped = 1; -#endif - - sk->prot->destroy(sk); - - tcp_kill_sk_queues(sk); - -#ifdef INET_REFCNT_DEBUG - if (atomic_read(&sk->refcnt) != 1) { - printk(KERN_DEBUG "Destruction TCP %p delayed, c=%d\n", sk, atomic_read(&sk->refcnt)); - } -#endif - - atomic_dec(&tcp_orphan_count); - sock_put(sk); -#endif -} - -void tcp_close(struct sock *sk, long timeout) -{ -#if 0 - struct sk_buff *skb; - int data_was_unread = 0; - - lock_sock(sk); - sk->shutdown = SHUTDOWN_MASK; - - if(sk->state == TCP_LISTEN) { - tcp_set_state(sk, TCP_CLOSE); - - /* Special case. */ - tcp_listen_stop(sk); - - goto adjudge_to_death; - } - - /* We need to flush the recv. buffs. We do this only on the - * descriptor close, not protocol-sourced closes, because the - * reader process may not have drained the data yet! - */ - while((skb=__skb_dequeue(&sk->receive_queue))!=NULL) { - u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq - skb->h.th->fin; - data_was_unread += len; - __kfree_skb(skb); - } - - tcp_mem_reclaim(sk); - - /* As outlined in draft-ietf-tcpimpl-prob-03.txt, section - * 3.10, we send a RST here because data was lost. To - * witness the awful effects of the old behavior of always - * doing a FIN, run an older 2.1.x kernel or 2.0.x, start - * a bulk GET in an FTP client, suspend the process, wait - * for the client to advertise a zero window, then kill -9 - * the FTP client, wheee... Note: timeout is always zero - * in such a case. - */ - if(data_was_unread != 0) { - /* Unread data was tossed, zap the connection. */ - NET_INC_STATS_USER(TCPAbortOnClose); - tcp_set_state(sk, TCP_CLOSE); - tcp_send_active_reset(sk, GFP_KERNEL); - } else if (sk->linger && sk->lingertime==0) { - /* Check zero linger _after_ checking for unread data. */ - sk->prot->disconnect(sk, 0); - NET_INC_STATS_USER(TCPAbortOnData); - } else if (tcp_close_state(sk)) { - /* We FIN if the application ate all the data before - * zapping the connection. - */ - - /* RED-PEN. Formally speaking, we have broken TCP state - * machine. State transitions: - * - * TCP_ESTABLISHED -> TCP_FIN_WAIT1 - * TCP_SYN_RECV -> TCP_FIN_WAIT1 (forget it, it's impossible) - * TCP_CLOSE_WAIT -> TCP_LAST_ACK - * - * are legal only when FIN has been sent (i.e. in window), - * rather than queued out of window. Purists blame. - * - * F.e. "RFC state" is ESTABLISHED, - * if Linux state is FIN-WAIT-1, but FIN is still not sent. - * - * The visible declinations are that sometimes - * we enter time-wait state, when it is not required really - * (harmless), do not send active resets, when they are - * required by specs (TCP_ESTABLISHED, TCP_CLOSE_WAIT, when - * they look as CLOSING or LAST_ACK for Linux) - * Probably, I missed some more holelets. - * --ANK - */ - tcp_send_fin(sk); - } - - if (timeout) { - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue(sk->sleep, &wait); - - do { - set_current_state(TASK_INTERRUPTIBLE); - if (!closing(sk)) - break; - release_sock(sk); - timeout = schedule_timeout(timeout); - lock_sock(sk); - } while (!signal_pending(tsk) && timeout); - - tsk->state = TASK_RUNNING; - remove_wait_queue(sk->sleep, &wait); - } - -adjudge_to_death: - /* It is the last release_sock in its life. It will remove backlog. */ - release_sock(sk); - - - /* Now socket is owned by kernel and we acquire BH lock - to finish close. No need to check for user refs. - */ - local_bh_disable(); - bh_lock_sock(sk); - BUG_TRAP(sk->lock.users==0); - - sock_hold(sk); - sock_orphan(sk); - - /* This is a (useful) BSD violating of the RFC. There is a - * problem with TCP as specified in that the other end could - * keep a socket open forever with no application left this end. - * We use a 3 minute timeout (about the same as BSD) then kill - * our end. If they send after that then tough - BUT: long enough - * that we won't make the old 4*rto = almost no time - whoops - * reset mistake. - * - * Nope, it was not mistake. It is really desired behaviour - * f.e. on http servers, when such sockets are useless, but - * consume significant resources. Let's do it with special - * linger2 option. --ANK - */ - - if (sk->state == TCP_FIN_WAIT2) { - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - if (tp->linger2 < 0) { - tcp_set_state(sk, TCP_CLOSE); - tcp_send_active_reset(sk, GFP_ATOMIC); - NET_INC_STATS_BH(TCPAbortOnLinger); - } else { - int tmo = tcp_fin_time(tp); - - if (tmo > TCP_TIMEWAIT_LEN) { - tcp_reset_keepalive_timer(sk, tcp_fin_time(tp)); - } else { - atomic_inc(&tcp_orphan_count); - tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); - goto out; - } - } - } - if (sk->state != TCP_CLOSE) { - tcp_mem_reclaim(sk); - if (atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans || - (sk->wmem_queued > SOCK_MIN_SNDBUF && - atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) { - if (net_ratelimit()) - printk(KERN_INFO "TCP: too many of orphaned sockets\n"); - tcp_set_state(sk, TCP_CLOSE); - tcp_send_active_reset(sk, GFP_ATOMIC); - NET_INC_STATS_BH(TCPAbortOnMemory); - } - } - atomic_inc(&tcp_orphan_count); - - if (sk->state == TCP_CLOSE) - tcp_destroy_sock(sk); - /* Otherwise, socket is reprieved until protocol close. */ - -out: - bh_unlock_sock(sk); - local_bh_enable(); - sock_put(sk); -#endif -} - -/* These states need RST on ABORT according to RFC793 */ - -extern __inline__ int tcp_need_reset(int state) -{ -#if 0 - return ((1 << state) & - (TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_FIN_WAIT1| - TCPF_FIN_WAIT2|TCPF_SYN_RECV)); -#else - return 0; -#endif -} - -int tcp_disconnect(struct sock *sk, int flags) -{ -#if 0 - struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - int old_state; - int err = 0; - - old_state = sk->state; - if (old_state != TCP_CLOSE) - tcp_set_state(sk, TCP_CLOSE); - - /* ABORT function of RFC793 */ - if (old_state == TCP_LISTEN) { - tcp_listen_stop(sk); - } else if (tcp_need_reset(old_state) || - (tp->snd_nxt != tp->write_seq && - (1<err = ECONNRESET; - } else if (old_state == TCP_SYN_SENT) - sk->err = ECONNRESET; - - tcp_clear_xmit_timers(sk); - __skb_queue_purge(&sk->receive_queue); - tcp_writequeue_purge(sk); - __skb_queue_purge(&tp->out_of_order_queue); - - sk->dport = 0; - - if (!(sk->userlocks&SOCK_BINDADDR_LOCK)) { - sk->rcv_saddr = 0; - sk->saddr = 0; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - memset(&sk->net_pinfo.af_inet6.saddr, 0, 16); - memset(&sk->net_pinfo.af_inet6.rcv_saddr, 0, 16); -#endif - } - - sk->shutdown = 0; - sk->done = 0; - tp->srtt = 0; - if ((tp->write_seq += tp->max_window+2) == 0) - tp->write_seq = 1; - tp->backoff = 0; - tp->snd_cwnd = 2; - tp->probes_out = 0; - tp->packets_out = 0; - tp->snd_ssthresh = 0x7fffffff; - tp->snd_cwnd_cnt = 0; - tp->ca_state = TCP_CA_Open; - tcp_clear_retrans(tp); - tcp_delack_init(tp); - tp->send_head = NULL; - tp->saw_tstamp = 0; - tcp_sack_reset(tp); - __sk_dst_reset(sk); - - BUG_TRAP(!sk->num || sk->prev); - - sk->error_report(sk); - return err; -#else - return 0; -#endif -} - -/* - * Wait for an incoming connection, avoid race - * conditions. This must be called with the socket locked. - */ -static int wait_for_connect(struct sock * sk, long timeo) -{ -#if 0 - DECLARE_WAITQUEUE(wait, current); - int err; - - /* - * True wake-one mechanism for incoming connections: only - * one process gets woken up, not the 'whole herd'. - * Since we do not 'race & poll' for established sockets - * anymore, the common case will execute the loop only once. - * - * Subtle issue: "add_wait_queue_exclusive()" will be added - * after any current non-exclusive waiters, and we know that - * it will always _stay_ after any new non-exclusive waiters - * because all non-exclusive waiters are added at the - * beginning of the wait-queue. As such, it's ok to "drop" - * our exclusiveness temporarily when we get woken up without - * having to remove and re-insert us on the wait queue. - */ - add_wait_queue_exclusive(sk->sleep, &wait); - for (;;) { - current->state = TASK_INTERRUPTIBLE; - release_sock(sk); - if (sk->tp_pinfo.af_tcp.accept_queue == NULL) - timeo = schedule_timeout(timeo); - lock_sock(sk); - err = 0; - if (sk->tp_pinfo.af_tcp.accept_queue) - break; - err = -EINVAL; - if (sk->state != TCP_LISTEN) - break; - err = sock_intr_errno(timeo); - if (signal_pending(current)) - break; - err = -EAGAIN; - if (!timeo) - break; - } - current->state = TASK_RUNNING; - remove_wait_queue(sk->sleep, &wait); - return err; -#else - return 0; -#endif -} - -/* - * This will accept the next outstanding connection. - */ - -struct sock *tcp_accept(struct sock *sk, int flags, int *err) -{ -#if 0 - struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - struct open_request *req; - struct sock *newsk; - int error; - - lock_sock(sk); - - /* We need to make sure that this socket is listening, - * and that it has something pending. - */ - error = -EINVAL; - if (sk->state != TCP_LISTEN) - goto out; - - /* Find already established connection */ - if (!tp->accept_queue) { - long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); - - /* If this is a non blocking socket don't sleep */ - error = -EAGAIN; - if (!timeo) - goto out; - - error = wait_for_connect(sk, timeo); - if (error) - goto out; - } - - req = tp->accept_queue; - if ((tp->accept_queue = req->dl_next) == NULL) - tp->accept_queue_tail = NULL; - - newsk = req->sk; - tcp_acceptq_removed(sk); - tcp_openreq_fastfree(req); - BUG_TRAP(newsk->state != TCP_SYN_RECV); - release_sock(sk); - return newsk; - -out: - release_sock(sk); - *err = error; - return NULL; -#else - return NULL; -#endif -} - -/* - * Socket option code for TCP. - */ - -int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, - int optlen) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - int val; - int err = 0; - - if (level != SOL_TCP) - return tp->af_specific->setsockopt(sk, level, optname, - optval, optlen); - - if(optlen MAX_TCP_WINDOW) { - err = -EINVAL; - break; - } - tp->user_mss = val; - break; - - case TCP_NODELAY: - /* You cannot try to use this and TCP_CORK in - * tandem, so let the user know. - */ - if (tp->nonagle == 2) { - err = -EINVAL; - break; - } - tp->nonagle = (val == 0) ? 0 : 1; - if (val) - tcp_push_pending_frames(sk, tp); - break; - - case TCP_CORK: - /* When set indicates to always queue non-full frames. - * Later the user clears this option and we transmit - * any pending partial frames in the queue. This is - * meant to be used alongside sendfile() to get properly - * filled frames when the user (for example) must write - * out headers with a write() call first and then use - * sendfile to send out the data parts. - * - * You cannot try to use TCP_NODELAY and this mechanism - * at the same time, so let the user know. - */ - if (tp->nonagle == 1) { - err = -EINVAL; - break; - } - if (val != 0) { - tp->nonagle = 2; - } else { - tp->nonagle = 0; - - tcp_push_pending_frames(sk, tp); - } - break; - - case TCP_KEEPIDLE: - if (val < 1 || val > MAX_TCP_KEEPIDLE) - err = -EINVAL; - else { - tp->keepalive_time = val * HZ; - if (sk->keepopen && !((1<state)&(TCPF_CLOSE|TCPF_LISTEN))) { - __u32 elapsed = tcp_time_stamp - tp->rcv_tstamp; - if (tp->keepalive_time > elapsed) - elapsed = tp->keepalive_time - elapsed; - else - elapsed = 0; - tcp_reset_keepalive_timer(sk, elapsed); - } - } - break; - case TCP_KEEPINTVL: - if (val < 1 || val > MAX_TCP_KEEPINTVL) - err = -EINVAL; - else - tp->keepalive_intvl = val * HZ; - break; - case TCP_KEEPCNT: - if (val < 1 || val > MAX_TCP_KEEPCNT) - err = -EINVAL; - else - tp->keepalive_probes = val; - break; - case TCP_SYNCNT: - if (val < 1 || val > MAX_TCP_SYNCNT) - err = -EINVAL; - else - tp->syn_retries = val; - break; - - case TCP_LINGER2: - if (val < 0) - tp->linger2 = -1; - else if (val > sysctl_tcp_fin_timeout/HZ) - tp->linger2 = 0; - else - tp->linger2 = val*HZ; - break; - - case TCP_DEFER_ACCEPT: - tp->defer_accept = 0; - if (val > 0) { - /* Translate value in seconds to number of retransmits */ - while (tp->defer_accept < 32 && val > ((TCP_TIMEOUT_INIT/HZ)<defer_accept)) - tp->defer_accept++; - tp->defer_accept++; - } - break; - - case TCP_WINDOW_CLAMP: - if (val==0) { - if (sk->state != TCP_CLOSE) { - err = -EINVAL; - break; - } - tp->window_clamp = 0; - } else { - tp->window_clamp = valack.pingpong = 1; - } else { - tp->ack.pingpong = 0; - if ((1<state)&(TCPF_ESTABLISHED|TCPF_CLOSE_WAIT) && - tcp_ack_scheduled(tp)) { - tp->ack.pending |= TCP_ACK_PUSHED; - cleanup_rbuf(sk, 1); - if (!(val & 1)) - tp->ack.pingpong = 1; - } - } - break; - - default: - err = -ENOPROTOOPT; - break; - }; - release_sock(sk); - return err; -#else - return 0; -#endif -} - -int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, - int *optlen) -{ -#if 0 - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - int val, len; - - if(level != SOL_TCP) - return tp->af_specific->getsockopt(sk, level, optname, - optval, optlen); - - if(get_user(len,optlen)) - return -EFAULT; - - len = min_t(unsigned int, len, sizeof(int)); - - if(len < 0) - return -EINVAL; - - switch(optname) { - case TCP_MAXSEG: - val = tp->mss_cache; - if (val == 0 && ((1<state)&(TCPF_CLOSE|TCPF_LISTEN))) - val = tp->user_mss; - break; - case TCP_NODELAY: - val = (tp->nonagle == 1); - break; - case TCP_CORK: - val = (tp->nonagle == 2); - break; - case TCP_KEEPIDLE: - val = (tp->keepalive_time ? : sysctl_tcp_keepalive_time)/HZ; - break; - case TCP_KEEPINTVL: - val = (tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl)/HZ; - break; - case TCP_KEEPCNT: - val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes; - break; - case TCP_SYNCNT: - val = tp->syn_retries ? : sysctl_tcp_syn_retries; - break; - case TCP_LINGER2: - val = tp->linger2; - if (val >= 0) - val = (val ? : sysctl_tcp_fin_timeout)/HZ; - break; - case TCP_DEFER_ACCEPT: - val = tp->defer_accept == 0 ? 0 : ((TCP_TIMEOUT_INIT/HZ)<<(tp->defer_accept-1)); - break; - case TCP_WINDOW_CLAMP: - val = tp->window_clamp; - break; - case TCP_INFO: - { - struct tcp_info info; - u32 now = tcp_time_stamp; - - if(get_user(len,optlen)) - return -EFAULT; - info.tcpi_state = sk->state; - info.tcpi_ca_state = tp->ca_state; - info.tcpi_retransmits = tp->retransmits; - info.tcpi_probes = tp->probes_out; - info.tcpi_backoff = tp->backoff; - info.tcpi_options = 0; - if (tp->tstamp_ok) - info.tcpi_options |= TCPI_OPT_TIMESTAMPS; - if (tp->sack_ok) - info.tcpi_options |= TCPI_OPT_SACK; - if (tp->wscale_ok) { - info.tcpi_options |= TCPI_OPT_WSCALE; - info.tcpi_snd_wscale = tp->snd_wscale; - info.tcpi_rcv_wscale = tp->rcv_wscale; - } else { - info.tcpi_snd_wscale = 0; - info.tcpi_rcv_wscale = 0; - } - if (tp->ecn_flags&TCP_ECN_OK) - info.tcpi_options |= TCPI_OPT_ECN; - - info.tcpi_rto = (1000000*tp->rto)/HZ; - info.tcpi_ato = (1000000*tp->ack.ato)/HZ; - info.tcpi_snd_mss = tp->mss_cache; - info.tcpi_rcv_mss = tp->ack.rcv_mss; - - info.tcpi_unacked = tp->packets_out; - info.tcpi_sacked = tp->sacked_out; - info.tcpi_lost = tp->lost_out; - info.tcpi_retrans = tp->retrans_out; - info.tcpi_fackets = tp->fackets_out; - - info.tcpi_last_data_sent = ((now - tp->lsndtime)*1000)/HZ; - info.tcpi_last_ack_sent = 0; - info.tcpi_last_data_recv = ((now - tp->ack.lrcvtime)*1000)/HZ; - info.tcpi_last_ack_recv = ((now - tp->rcv_tstamp)*1000)/HZ; - - info.tcpi_pmtu = tp->pmtu_cookie; - info.tcpi_rcv_ssthresh = tp->rcv_ssthresh; - info.tcpi_rtt = ((1000000*tp->srtt)/HZ)>>3; - info.tcpi_rttvar = ((1000000*tp->mdev)/HZ)>>2; - info.tcpi_snd_ssthresh = tp->snd_ssthresh; - info.tcpi_snd_cwnd = tp->snd_cwnd; - info.tcpi_advmss = tp->advmss; - info.tcpi_reordering = tp->reordering; - - len = min_t(unsigned int, len, sizeof(info)); - if(put_user(len, optlen)) - return -EFAULT; - if(copy_to_user(optval, &info,len)) - return -EFAULT; - return 0; - } - case TCP_QUICKACK: - val = !tp->ack.pingpong; - break; - default: - return -ENOPROTOOPT; - }; - - if(put_user(len, optlen)) - return -EFAULT; - if(copy_to_user(optval, &val,len)) - return -EFAULT; - return 0; -#else - return 0; -#endif -} - - -//extern void __skb_cb_too_small_for_tcp(int, int); -//extern void tcpdiag_init(void); - -void /* __init */ tcp_init(void) -{ -#if 0 - struct sk_buff *skb = NULL; - unsigned long goal; - int order, i; - - if(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)) - __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb), - sizeof(skb->cb)); - - tcp_openreq_cachep = kmem_cache_create("tcp_open_request", - sizeof(struct open_request), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if(!tcp_openreq_cachep) - panic("tcp_init: Cannot alloc open_request cache."); - - tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket", - sizeof(struct tcp_bind_bucket), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if(!tcp_bucket_cachep) - panic("tcp_init: Cannot alloc tcp_bind_bucket cache."); - - tcp_timewait_cachep = kmem_cache_create("tcp_tw_bucket", - sizeof(struct tcp_tw_bucket), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if(!tcp_timewait_cachep) - panic("tcp_init: Cannot alloc tcp_tw_bucket cache."); - - /* Size and allocate the main established and bind bucket - * hash tables. - * - * The methodology is similar to that of the buffer cache. - */ - if (num_physpages >= (128 * 1024)) - goal = num_physpages >> (21 - PAGE_SHIFT); - else - goal = num_physpages >> (23 - PAGE_SHIFT); - - for(order = 0; (1UL << order) < goal; order++) - ; - do { - tcp_ehash_size = (1UL << order) * PAGE_SIZE / - sizeof(struct tcp_ehash_bucket); - tcp_ehash_size >>= 1; - while (tcp_ehash_size & (tcp_ehash_size-1)) - tcp_ehash_size--; - tcp_ehash = (struct tcp_ehash_bucket *) - __get_free_pages(GFP_ATOMIC, order); - } while (tcp_ehash == NULL && --order > 0); - - if (!tcp_ehash) - panic("Failed to allocate TCP established hash table\n"); - for (i = 0; i < (tcp_ehash_size<<1); i++) { - tcp_ehash[i].lock = RW_LOCK_UNLOCKED; - tcp_ehash[i].chain = NULL; - } - - do { - tcp_bhash_size = (1UL << order) * PAGE_SIZE / - sizeof(struct tcp_bind_hashbucket); - if ((tcp_bhash_size > (64 * 1024)) && order > 0) - continue; - tcp_bhash = (struct tcp_bind_hashbucket *) - __get_free_pages(GFP_ATOMIC, order); - } while (tcp_bhash == NULL && --order >= 0); - - if (!tcp_bhash) - panic("Failed to allocate TCP bind hash table\n"); - for (i = 0; i < tcp_bhash_size; i++) { - tcp_bhash[i].lock = SPIN_LOCK_UNLOCKED; - tcp_bhash[i].chain = NULL; - } - - /* Try to be a bit smarter and adjust defaults depending - * on available memory. - */ - if (order > 4) { - sysctl_local_port_range[0] = 32768; - sysctl_local_port_range[1] = 61000; - sysctl_tcp_max_tw_buckets = 180000; - sysctl_tcp_max_orphans = 4096<<(order-4); - sysctl_max_syn_backlog = 1024; - } else if (order < 3) { - sysctl_local_port_range[0] = 1024*(3-order); - sysctl_tcp_max_tw_buckets >>= (3-order); - sysctl_tcp_max_orphans >>= (3-order); - sysctl_max_syn_backlog = 128; - } - tcp_port_rover = sysctl_local_port_range[0] - 1; - - sysctl_tcp_mem[0] = 768< 512) - sysctl_tcp_mem[1] = sysctl_tcp_mem[2] - 512; - if (sysctl_tcp_mem[1] - sysctl_tcp_mem[0] > 512) - sysctl_tcp_mem[0] = sysctl_tcp_mem[1] - 512; - - if (order < 3) { - sysctl_tcp_wmem[2] = 64*1024; - sysctl_tcp_rmem[0] = PAGE_SIZE; - sysctl_tcp_rmem[1] = 43689; - sysctl_tcp_rmem[2] = 2*43689; - } - - printk(KERN_INFO "TCP: Hash tables configured (established %d bind %d)\n", - tcp_ehash_size<<1, tcp_bhash_size); - - tcpdiag_init(); -#endif -} diff --git a/reactos/drivers/net/tcpip/transport/udp/udp.c b/reactos/drivers/net/tcpip/transport/udp/udp.c index 8fc586e217a..768d02f0109 100644 --- a/reactos/drivers/net/tcpip/transport/udp/udp.c +++ b/reactos/drivers/net/tcpip/transport/udp/udp.c @@ -7,6 +7,7 @@ * REVISIONS: * CSH 01/08-2000 Created */ +#include #include #include #include @@ -91,7 +92,7 @@ NTSTATUS AddUDPHeaderIPv4( /* Source address */ IPHeader->SrcAddr = LocalAddress->Address.IPv4Address; /* Destination address. FIXME: IPv4 only */ - IPHeader->DstAddr = SendRequest->RemoteAddress->Address.IPv4Address; + IPHeader->DstAddr = SendRequest->RemoteAddress.Address.IPv4Address; /* Build UDP header */ UDPHeader = (PUDP_HEADER)((ULONG_PTR)IPHeader + sizeof(IPv4_HEADER)); @@ -110,8 +111,7 @@ NTSTATUS AddUDPHeaderIPv4( NTSTATUS BuildUDPPacket( PVOID Context, PIP_ADDRESS LocalAddress, - USHORT LocalPort, - PIP_PACKET *IPPacket) + USHORT LocalPort) /* * FUNCTION: Builds an UDP packet * ARGUMENTS: @@ -124,16 +124,16 @@ NTSTATUS BuildUDPPacket( */ { NTSTATUS Status; - PIP_PACKET Packet; NDIS_STATUS NdisStatus; PDATAGRAM_SEND_REQUEST SendRequest = (PDATAGRAM_SEND_REQUEST)Context; + PIP_PACKET Packet = &SendRequest->Packet; TI_DbgPrint(MAX_TRACE, ("Called.\n")); /* Prepare packet */ /* FIXME: Assumes IPv4 */ - Packet = IPCreatePacket(IP_ADDRESS_V4); + IPInitializePacket(IP_ADDRESS_V4, &SendRequest->Packet); if (!Packet) return STATUS_INSUFFICIENT_RESOURCES; @@ -141,15 +141,7 @@ NTSTATUS BuildUDPPacket( sizeof(UDP_HEADER) + SendRequest->BufferSize; - /* Allocate NDIS packet */ - NdisAllocatePacket(&NdisStatus, &Packet->NdisPacket, GlobalPacketPool); - if (NdisStatus != NDIS_STATUS_SUCCESS) { - TI_DbgPrint(MIN_TRACE, ("Cannot allocate NDIS packet. NdisStatus = (0x%X)\n", NdisStatus)); - (*Packet->Free)(Packet); - return STATUS_INSUFFICIENT_RESOURCES; - } - - switch (SendRequest->RemoteAddress->Type) { + switch (SendRequest->RemoteAddress.Type) { case IP_ADDRESS_V4: Status = AddUDPHeaderIPv4(SendRequest, LocalAddress, LocalPort, Packet); break; @@ -162,18 +154,12 @@ NTSTATUS BuildUDPPacket( } if (!NT_SUCCESS(Status)) { TI_DbgPrint(MIN_TRACE, ("Cannot add UDP header. Status = (0x%X)\n", Status)); - NdisFreePacket(Packet->NdisPacket); - (*Packet->Free)(Packet); + FreeNdisPacket(Packet->NdisPacket); return Status; } - /* Chain data after header */ - NdisChainBufferAtBack(Packet->NdisPacket, SendRequest->Buffer); - DISPLAY_IP_PACKET(Packet); - *IPPacket = Packet; - return STATUS_SUCCESS; } @@ -194,11 +180,18 @@ NTSTATUS UDPSendDatagram( * Status of operation */ { - return DGSendDatagram(Request, - ConnInfo, - Buffer, - DataSize, - BuildUDPPacket); + PDATAGRAM_SEND_REQUEST SendRequest; + PADDRESS_FILE AddrFile = + (PADDRESS_FILE)Request->Handle.AddressHandle; + + BuildUDPPacket( SendRequest, + (PIP_ADDRESS)&AddrFile->ADE->Address->Address. + IPv4Address, + AddrFile->Port ); + + return DGSendDatagram(Request, + ConnInfo, + &SendRequest->Packet); } diff --git a/reactos/lib/msafd/makefile b/reactos/lib/msafd/makefile index e58319aef80..e3e1d55edad 100644 --- a/reactos/lib/msafd/makefile +++ b/reactos/lib/msafd/makefile @@ -1,4 +1,4 @@ -# $Id: makefile,v 1.13 2004/05/29 21:24:44 hbirr Exp $ +# $Id: makefile,v 1.14 2004/06/09 18:11:39 arty Exp $ PATH_TO_TOP = ../.. diff --git a/reactos/lib/msafd/misc/dllmain.c b/reactos/lib/msafd/misc/dllmain.c index feab9fef992..f465cda274a 100644 --- a/reactos/lib/msafd/misc/dllmain.c +++ b/reactos/lib/msafd/misc/dllmain.c @@ -15,8 +15,8 @@ #ifdef DBG /* See debug.h for debug/trace constants */ -DWORD DebugTraceLevel = MIN_TRACE; -//DWORD DebugTraceLevel = DEBUG_ULTRA; +//DWORD DebugTraceLevel = MIN_TRACE; +DWORD DebugTraceLevel = DEBUG_ULTRA; #endif /* DBG */ @@ -566,27 +566,29 @@ WSPAccept( Request.lpfnCondition = lpfnCondition; Request.dwCallbackData = dwCallbackData; + + Status = NtDeviceIoControlFile( - (HANDLE)s, - NULL, - NULL, - NULL, - &Iosb, - IOCTL_AFD_ACCEPT, - &Request, - sizeof(FILE_REQUEST_ACCEPT), - &Reply, - sizeof(FILE_REPLY_ACCEPT)); + (HANDLE)s, + NULL, + NULL, + NULL, + &Iosb, + IOCTL_AFD_ACCEPT, + &Request, + sizeof(FILE_REQUEST_ACCEPT), + &Reply, + sizeof(FILE_REPLY_ACCEPT)); if (Status == STATUS_PENDING) { - AFD_DbgPrint(MAX_TRACE, ("Waiting on transport.\n")); - /* FIXME: Wait only for blocking sockets */ - Status = NtWaitForSingleObject((HANDLE)s, FALSE, NULL); + AFD_DbgPrint(MAX_TRACE, ("Waiting on transport.\n")); + /* FIXME: Wait only for blocking sockets */ + Status = NtWaitForSingleObject((HANDLE)s, FALSE, NULL); } if (!NT_SUCCESS(Status)) { *lpErrno = Reply.Status; - return INVALID_SOCKET; - } + return INVALID_SOCKET; + } *addrlen = Reply.addrlen; diff --git a/reactos/lib/ws2_32/include/ws2_32.h b/reactos/lib/ws2_32/include/ws2_32.h index 5021f6e5c35..3abe94b777f 100644 --- a/reactos/lib/ws2_32/include/ws2_32.h +++ b/reactos/lib/ws2_32/include/ws2_32.h @@ -28,10 +28,20 @@ extern HANDLE GlobalHeap; extern BOOL Initialized; /* TRUE if WSAStartup() has been successfully called */ extern WSPUPCALLTABLE UpcallTable; +#define WS2_INTERNAL_MAX_ALIAS 16 + +typedef struct _WINSOCK_GETSERVBYNAME_CACHE { + UINT Size; + SERVENT ServerEntry; + PCHAR Aliases[WS2_INTERNAL_MAX_ALIAS]; + CHAR Data[1]; +} WINSOCK_GETSERVBYNAME_CACHE, *PWINSOCK_GETSERVBYNAME_CACHE; typedef struct _WINSOCK_THREAD_BLOCK { INT LastErrorValue; /* Error value from last function that failed */ CHAR Intoa[16]; /* Buffer for inet_ntoa() */ + PWINSOCK_GETSERVBYNAME_CACHE + Getservbyname; /* Buffer used by getservbyname */ } WINSOCK_THREAD_BLOCK, *PWINSOCK_THREAD_BLOCK; diff --git a/reactos/lib/ws2_32/makefile b/reactos/lib/ws2_32/makefile index ffcd59c2b13..a12133b50d6 100644 --- a/reactos/lib/ws2_32/makefile +++ b/reactos/lib/ws2_32/makefile @@ -1,4 +1,4 @@ -# $Id: makefile,v 1.14 2004/05/29 21:24:47 hbirr Exp $ +# $Id: makefile,v 1.15 2004/06/09 18:11:39 arty Exp $ PATH_TO_TOP = ../.. diff --git a/reactos/lib/ws2_32/misc/dllmain.c b/reactos/lib/ws2_32/misc/dllmain.c index 9a9407db56f..0b79ab9d019 100644 --- a/reactos/lib/ws2_32/misc/dllmain.c +++ b/reactos/lib/ws2_32/misc/dllmain.c @@ -17,7 +17,6 @@ /* See debug.h for debug/trace constants */ DWORD DebugTraceLevel = MIN_TRACE; -//DWORD DebugTraceLevel = DEBUG_ULTRA; #endif /* DBG */ @@ -37,7 +36,8 @@ INT EXPORT WSAGetLastError(VOID) { - PWINSOCK_THREAD_BLOCK p = NtCurrentTeb()->WinSockData; + +PWINSOCK_THREAD_BLOCK p = NtCurrentTeb()->WinSockData; if (p) { return p->LastErrorValue; @@ -667,6 +667,7 @@ DllMain(HANDLE hInstDll, } p->LastErrorValue = NO_ERROR; + p->Getservbyname = NULL; NtCurrentTeb()->WinSockData = p; break; diff --git a/reactos/lib/ws2_32/misc/ns.c b/reactos/lib/ws2_32/misc/ns.c index e5896d53be0..eb6799e2559 100644 --- a/reactos/lib/ws2_32/misc/ns.c +++ b/reactos/lib/ws2_32/misc/ns.c @@ -7,8 +7,13 @@ * REVISIONS: * CSH 01/09-2000 Created */ +#include #include +#ifndef BUFSIZ +#define BUFSIZ 1024 +#endif/*BUFSIZ*/ + /* Name resolution APIs */ /* @@ -420,9 +425,58 @@ getprotobynumber( return (LPPROTOENT)NULL; } +#define SKIPWS(ptr,act) \ +{while(*ptr && isspace(*ptr)) ptr++; if(!*ptr) act;} +#define SKIPANDMARKSTR(ptr,act) \ +{while(*ptr && !isspace(*ptr)) ptr++; \ + if(!*ptr) {act;} else { *ptr = 0; ptr++; }} + + +static BOOL DecodeServEntFromString( IN PCHAR ServiceString, + OUT PCHAR *ServiceName, + OUT PCHAR *PortNumberStr, + OUT PCHAR *ProtocolStr, + IN PCHAR *Aliases, + IN DWORD MaxAlias ) { + UINT NAliases = 0; + + WS_DbgPrint(MAX_TRACE, ("Parsing service ent [%s]\n", ServiceString)); + + SKIPWS(ServiceString, return FALSE); + *ServiceName = ServiceString; + SKIPANDMARKSTR(ServiceString, return FALSE); + SKIPWS(ServiceString, return FALSE); + *PortNumberStr = ServiceString; + SKIPANDMARKSTR(ServiceString, ;); + + while( *ServiceString && NAliases < MaxAlias - 1 ) { + SKIPWS(ServiceString, break); + if( *ServiceString ) { + SKIPANDMARKSTR(ServiceString, ;); + if( strlen(ServiceString) ) { + WS_DbgPrint(MAX_TRACE, ("Alias: %s\n", ServiceString)); + *Aliases++ = ServiceString; + NAliases++; + } + } + } + *Aliases = NULL; + + *ProtocolStr = strchr(*PortNumberStr,'/'); + if( !*ProtocolStr ) return FALSE; + **ProtocolStr = 0; (*ProtocolStr)++; + + WS_DbgPrint(MAX_TRACE, ("Parsing done: %s %s %s %d\n", + *ServiceName, *ProtocolStr, *PortNumberStr, + NAliases)); + + return TRUE; +} + +#define ADJ_PTR(p,b1,b2) p = (p - b1) + b2 /* - * @unimplemented + * @implemented */ LPSERVENT EXPORT @@ -430,9 +484,157 @@ getservbyname( IN CONST CHAR FAR* name, IN CONST CHAR FAR* proto) { - UNIMPLEMENTED + BOOL Found = FALSE; + HANDLE ServicesFile; + CHAR ServiceDBData[BUFSIZ] = { 0 }; + PCHAR SystemDirectory = ServiceDBData; /* Reuse this stack space */ + PCHAR ServicesFileLocation = "\\drivers\\etc\\services"; + PCHAR ThisLine = 0, NextLine = 0, ServiceName = 0, PortNumberStr = 0, + ProtocolStr = 0, Comment = 0; + PCHAR Aliases[WS2_INTERNAL_MAX_ALIAS] = { 0 }; + UINT i,SizeNeeded = 0, + SystemDirSize = sizeof(ServiceDBData) - 1; + DWORD ReadSize = 0, ValidData = 0; + PWINSOCK_THREAD_BLOCK p = NtCurrentTeb()->WinSockData; + + if( !p ) { + WSASetLastError( WSANOTINITIALISED ); + return NULL; + } - return (LPSERVENT)NULL; + if( !name ) { + WSASetLastError( WSANO_RECOVERY ); + return NULL; + } + + if( !GetSystemDirectoryA( SystemDirectory, SystemDirSize ) ) { + WSASetLastError( WSANO_RECOVERY ); + WS_DbgPrint(MIN_TRACE, ("Could not get windows system directory.\n")); + return NULL; /* Can't get system directory */ + } + + strncat( SystemDirectory, ServicesFileLocation, SystemDirSize ); + + ServicesFile = CreateFileA( SystemDirectory, + GENERIC_READ, + FILE_SHARE_READ, + NULL, + OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL | + FILE_FLAG_SEQUENTIAL_SCAN, + NULL ); + + if( ServicesFile == INVALID_HANDLE_VALUE ) { + WSASetLastError( WSANO_RECOVERY ); + return NULL; + } + + /* Scan the services file ... + * + * We will read up to BUFSIZ bytes per pass, until the buffer does not + * contain a full line, then we will try to read more. + * + * We fall from the loop if the buffer does not have a line terminator. + */ + + /* Initial Read */ + while( !Found && + ReadFile( ServicesFile, ServiceDBData + ValidData, + sizeof( ServiceDBData ) - ValidData, + &ReadSize, NULL ) ) { + ValidData += ReadSize; + ReadSize = 0; + NextLine = ThisLine = ServiceDBData; + + /* Find the beginning of the next line */ + while( NextLine < ServiceDBData + ValidData && + *NextLine != '\r' && *NextLine != '\n' ) NextLine++; + + /* Zero and skip, so we can treat what we have as a string */ + if( NextLine >= ServiceDBData + ValidData ) + break; + + *NextLine = 0; NextLine++; + + Comment = strchr( ThisLine, '#' ); + if( Comment ) *Comment = 0; /* Terminate at comment start */ + + if( DecodeServEntFromString( ThisLine, + &ServiceName, + &PortNumberStr, + &ProtocolStr, + Aliases, + WS2_INTERNAL_MAX_ALIAS ) && + !strcmp( ServiceName, name ) && + (proto ? !strcmp( ProtocolStr, proto ) : TRUE) ) { + WS_DbgPrint(MAX_TRACE,("Found the service entry.\n")); + + Found = TRUE; + SizeNeeded = sizeof(WINSOCK_GETSERVBYNAME_CACHE) + + (NextLine - ThisLine); + break; + } + + /* Get rid of everything we read so far */ + while( NextLine <= ServiceDBData + ValidData && + isspace( *NextLine ) ) NextLine++; + + WS_DbgPrint(MAX_TRACE,("About to move %d chars\n", + ServiceDBData + ValidData - NextLine)); + + memmove( ServiceDBData, NextLine, + ServiceDBData + ValidData - NextLine ); + ValidData -= NextLine - ServiceDBData; + WS_DbgPrint(MAX_TRACE,("Valid bytes: %d\n", ValidData)); + } + + /* This we'll do no matter what */ + CloseHandle( ServicesFile ); + + if( !Found ) { + WS_DbgPrint(MAX_TRACE,("Not found\n")); + WSASetLastError( WSANO_DATA ); + return NULL; + } + + if( !p->Getservbyname || p->Getservbyname->Size < SizeNeeded ) { + /* Free previous getservbyname buffer, allocate bigger */ + if( p->Getservbyname ) + HeapFree(GlobalHeap, 0, p->Getservbyname); + p->Getservbyname = HeapAlloc(GlobalHeap, 0, SizeNeeded); + if( !p->Getservbyname ) { + WS_DbgPrint(MIN_TRACE,("Couldn't allocate %d bytes\n", + SizeNeeded)); + WSASetLastError( WSATRY_AGAIN ); + return NULL; + } + p->Getservbyname->Size = SizeNeeded; + } + + /* Copy the data */ + memmove( p->Getservbyname->Data, + ThisLine, + NextLine - ThisLine ); + + ADJ_PTR(ServiceName,ThisLine,p->Getservbyname->Data); + ADJ_PTR(ProtocolStr,ThisLine,p->Getservbyname->Data); + WS_DbgPrint(MAX_TRACE, + ("ServiceName: %s, Protocol: %s\n", ServiceName, ProtocolStr)); + + for( i = 0; Aliases[i]; i++ ) { + ADJ_PTR(Aliases[i],ThisLine,p->Getservbyname->Data); + WS_DbgPrint(MAX_TRACE,("Aliase %d: %s\n", i, Aliases[i])); + } + + memcpy(p->Getservbyname,Aliases,sizeof(Aliases)); + + /* Create the struct proper */ + p->Getservbyname->ServerEntry.s_name = ServiceName; + p->Getservbyname->ServerEntry.s_aliases = p->Getservbyname->Aliases; + p->Getservbyname->ServerEntry.s_port = htons(atoi(PortNumberStr)); + p->Getservbyname->ServerEntry.s_proto = ProtocolStr; + + return &p->Getservbyname->ServerEntry; }