TCPConnectionUtil.cc

Go to the documentation of this file.
00001 //
00002 // Copyright (C) 2004 Andras Varga
00003 // Copyright (C) 2009-2010 Thomas Reschka
00004 //
00005 // This program is free software; you can redistribute it and/or
00006 // modify it under the terms of the GNU Lesser General Public License
00007 // as published by the Free Software Foundation; either version 2
00008 // of the License, or (at your option) any later version.
00009 //
00010 // This program is distributed in the hope that it will be useful,
00011 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013 // GNU Lesser General Public License for more details.
00014 //
00015 // You should have received a copy of the GNU Lesser General Public License
00016 // along with this program; if not, see <http://www.gnu.org/licenses/>.
00017 //
00018 
00019 
00020 #include <string.h>
00021 #include <algorithm>   // min,max
00022 #include "TCP.h"
00023 #include "TCPConnection.h"
00024 #include "TCPSegment.h"
00025 #include "TCPCommand_m.h"
00026 #include "IPControlInfo.h"
00027 #include "IPv6ControlInfo.h"
00028 #include "TCPSendQueue.h"
00029 #include "TCPSACKRexmitQueue.h"
00030 #include "TCPReceiveQueue.h"
00031 #include "TCPAlgorithm.h"
00032 
00033 //
00034 // helper functions
00035 //
00036 
00037 const char *TCPConnection::stateName(int state)
00038 {
00039 #define CASE(x) case x: s=#x+6; break
00040     const char *s = "unknown";
00041     switch (state)
00042     {
00043         CASE(TCP_S_INIT);
00044         CASE(TCP_S_CLOSED);
00045         CASE(TCP_S_LISTEN);
00046         CASE(TCP_S_SYN_SENT);
00047         CASE(TCP_S_SYN_RCVD);
00048         CASE(TCP_S_ESTABLISHED);
00049         CASE(TCP_S_CLOSE_WAIT);
00050         CASE(TCP_S_LAST_ACK);
00051         CASE(TCP_S_FIN_WAIT_1);
00052         CASE(TCP_S_FIN_WAIT_2);
00053         CASE(TCP_S_CLOSING);
00054         CASE(TCP_S_TIME_WAIT);
00055     }
00056     return s;
00057 #undef CASE
00058 }
00059 
00060 const char *TCPConnection::eventName(int event)
00061 {
00062 #define CASE(x) case x: s=#x+6; break
00063     const char *s = "unknown";
00064     switch (event)
00065     {
00066         CASE(TCP_E_IGNORE);
00067         CASE(TCP_E_OPEN_ACTIVE);
00068         CASE(TCP_E_OPEN_PASSIVE);
00069         CASE(TCP_E_SEND);
00070         CASE(TCP_E_CLOSE);
00071         CASE(TCP_E_ABORT);
00072         CASE(TCP_E_STATUS);
00073         CASE(TCP_E_RCV_DATA);
00074         CASE(TCP_E_RCV_ACK);
00075         CASE(TCP_E_RCV_SYN);
00076         CASE(TCP_E_RCV_SYN_ACK);
00077         CASE(TCP_E_RCV_FIN);
00078         CASE(TCP_E_RCV_FIN_ACK);
00079         CASE(TCP_E_RCV_RST);
00080         CASE(TCP_E_RCV_UNEXP_SYN);
00081         CASE(TCP_E_TIMEOUT_2MSL);
00082         CASE(TCP_E_TIMEOUT_CONN_ESTAB);
00083         CASE(TCP_E_TIMEOUT_FIN_WAIT_2);
00084     }
00085     return s;
00086 #undef CASE
00087 }
00088 
00089 const char *TCPConnection::indicationName(int code)
00090 {
00091 #define CASE(x) case x: s=#x+6; break
00092     const char *s = "unknown";
00093     switch (code)
00094     {
00095         CASE(TCP_I_DATA);
00096         CASE(TCP_I_URGENT_DATA);
00097         CASE(TCP_I_ESTABLISHED);
00098         CASE(TCP_I_PEER_CLOSED);
00099         CASE(TCP_I_CLOSED);
00100         CASE(TCP_I_CONNECTION_REFUSED);
00101         CASE(TCP_I_CONNECTION_RESET);
00102         CASE(TCP_I_TIMED_OUT);
00103         CASE(TCP_I_STATUS);
00104     }
00105     return s;
00106 #undef CASE
00107 }
00108 
00109 const char *TCPConnection::optionName(int option)
00110 {
00111     switch (option)
00112     {
00113         case TCPOPTION_END_OF_OPTION_LIST:   return "EOL";
00114         case TCPOPTION_NO_OPERATION:         return "NOP";
00115         case TCPOPTION_MAXIMUM_SEGMENT_SIZE: return "MSS";
00116         case TCPOPTION_WINDOW_SCALE:         return "WS";
00117         case TCPOPTION_SACK_PERMITTED:       return "SACK_PERMITTED";
00118         case TCPOPTION_SACK:                 return "SACK";
00119         case TCPOPTION_TIMESTAMP:            return "TS";
00120         default:                             return "unknown";
00121     }
00122 }
00123 
00124 void TCPConnection::printConnBrief()
00125 {
00126     tcpEV << "Connection ";
00127     tcpEV << localAddr << ":" << localPort << " to " << remoteAddr << ":" << remotePort;
00128     tcpEV << "  on app[" << appGateIndex << "],connId=" << connId;
00129     tcpEV << "  in " << stateName(fsm.getState());
00130     tcpEV << "  (ptr=0x" << this << ")\n";
00131 }
00132 
00133 void TCPConnection::printSegmentBrief(TCPSegment *tcpseg)
00134 {
00135     tcpEV << "." << tcpseg->getSrcPort() << " > ";
00136     tcpEV << "." << tcpseg->getDestPort() << ": ";
00137 
00138     if (tcpseg->getSynBit())  tcpEV << (tcpseg->getAckBit() ? "SYN+ACK " : "SYN ");
00139     if (tcpseg->getFinBit())  tcpEV << "FIN(+ACK) ";
00140     if (tcpseg->getRstBit())  tcpEV << (tcpseg->getAckBit() ? "RST+ACK " : "RST ");
00141     if (tcpseg->getPshBit())  tcpEV << "PSH ";
00142 
00143     if (tcpseg->getPayloadLength()>0 || tcpseg->getSynBit())
00144     {
00145         tcpEV << "[" << tcpseg->getSequenceNo() << ".." << (tcpseg->getSequenceNo()+tcpseg->getPayloadLength()) << ") ";
00146         tcpEV << "(l=" << tcpseg->getPayloadLength() << ") ";
00147     }
00148     if (tcpseg->getAckBit())  tcpEV << "ack " << tcpseg->getAckNo() << " ";
00149     tcpEV << "win " << tcpseg->getWindow() << " ";
00150     if (tcpseg->getUrgBit())  tcpEV << "urg " << tcpseg->getUrgentPointer() << " ";
00151     if (tcpseg->getHeaderLength() > TCP_HEADER_OCTETS) // Header options present? TCP_HEADER_OCTETS = 20
00152     {
00153         tcpEV << "options ";
00154         for (uint i=0; i<tcpseg->getOptionsArraySize(); i++)
00155         {
00156             const TCPOption& option = tcpseg->getOptions(i);
00157             short kind = option.getKind();
00158             tcpEV << optionName(kind) << " ";
00159         }
00160     }
00161     tcpEV << "\n";
00162 }
00163 
00164 TCPConnection *TCPConnection::cloneListeningConnection()
00165 {
00166     TCPConnection *conn = new TCPConnection(tcpMain,appGateIndex,connId);
00167 
00168     // following code to be kept consistent with initConnection()
00169     const char *sendQueueClass = sendQueue->getClassName();
00170     conn->sendQueue = check_and_cast<TCPSendQueue *>(createOne(sendQueueClass));
00171     conn->sendQueue->setConnection(conn);
00172 
00173     const char *receiveQueueClass = receiveQueue->getClassName();
00174     conn->receiveQueue = check_and_cast<TCPReceiveQueue *>(createOne(receiveQueueClass));
00175     conn->receiveQueue->setConnection(conn);
00176 
00177     // create SACK retransmit queue
00178     rexmitQueue = new TCPSACKRexmitQueue();
00179     rexmitQueue->setConnection(this);
00180 
00181     const char *tcpAlgorithmClass = tcpAlgorithm->getClassName();
00182     conn->tcpAlgorithm = check_and_cast<TCPAlgorithm *>(createOne(tcpAlgorithmClass));
00183     conn->tcpAlgorithm->setConnection(conn);
00184 
00185     conn->state = conn->tcpAlgorithm->getStateVariables();
00186     configureStateVariables();
00187     conn->tcpAlgorithm->initialize();
00188 
00189     // put it into LISTEN, with our localAddr/localPort
00190     conn->state->active = false;
00191     conn->state->fork = true;
00192     conn->localAddr = localAddr;
00193     conn->localPort = localPort;
00194     FSM_Goto(conn->fsm, TCP_S_LISTEN);
00195 
00196     return conn;
00197 }
00198 
00199 void TCPConnection::sendToIP(TCPSegment *tcpseg)
00200 {
00201     // record seq (only if we do send data) and ackno
00202     if (sndNxtVector && tcpseg->getPayloadLength()!=0)
00203         sndNxtVector->record(tcpseg->getSequenceNo());
00204     if (sndAckVector)
00205         sndAckVector->record(tcpseg->getAckNo());
00206 
00207     // final touches on the segment before sending
00208     tcpseg->setSrcPort(localPort);
00209     tcpseg->setDestPort(remotePort);
00210     ASSERT(tcpseg->getHeaderLength() >= TCP_HEADER_OCTETS);     // TCP_HEADER_OCTETS = 20 (without options)
00211     ASSERT(tcpseg->getHeaderLength() <= TCP_MAX_HEADER_OCTETS); // TCP_MAX_HEADER_OCTETS = 60
00212     tcpseg->setByteLength(tcpseg->getHeaderLength() + tcpseg->getPayloadLength());
00213     state->sentBytes = tcpseg->getPayloadLength(); // resetting sentBytes to 0 if sending a segment without data (e.g. ACK)
00214 
00215     tcpEV << "Sending: ";
00216     printSegmentBrief(tcpseg);
00217 
00218     // TBD reuse next function for sending
00219 
00220     if (!remoteAddr.isIPv6())
00221     {
00222         // send over IPv4
00223         IPControlInfo *controlInfo = new IPControlInfo();
00224         controlInfo->setProtocol(IP_PROT_TCP);
00225         controlInfo->setSrcAddr(localAddr.get4());
00226         controlInfo->setDestAddr(remoteAddr.get4());
00227         tcpseg->setControlInfo(controlInfo);
00228 
00229         tcpMain->send(tcpseg,"ipOut");
00230     }
00231     else
00232     {
00233         // send over IPv6
00234         IPv6ControlInfo *controlInfo = new IPv6ControlInfo();
00235         controlInfo->setProtocol(IP_PROT_TCP);
00236         controlInfo->setSrcAddr(localAddr.get6());
00237         controlInfo->setDestAddr(remoteAddr.get6());
00238         tcpseg->setControlInfo(controlInfo);
00239 
00240         tcpMain->send(tcpseg,"ipv6Out");
00241     }
00242 }
00243 
00244 void TCPConnection::sendToIP(TCPSegment *tcpseg, IPvXAddress src, IPvXAddress dest)
00245 {
00246     tcpEV << "Sending: ";
00247     printSegmentBrief(tcpseg);
00248 
00249     if (!dest.isIPv6())
00250     {
00251         // send over IPv4
00252         IPControlInfo *controlInfo = new IPControlInfo();
00253         controlInfo->setProtocol(IP_PROT_TCP);
00254         controlInfo->setSrcAddr(src.get4());
00255         controlInfo->setDestAddr(dest.get4());
00256         tcpseg->setControlInfo(controlInfo);
00257 
00258         check_and_cast<TCP *>(simulation.getContextModule())->send(tcpseg,"ipOut");
00259     }
00260     else
00261     {
00262         // send over IPv6
00263         IPv6ControlInfo *controlInfo = new IPv6ControlInfo();
00264         controlInfo->setProtocol(IP_PROT_TCP);
00265         controlInfo->setSrcAddr(src.get6());
00266         controlInfo->setDestAddr(dest.get6());
00267         tcpseg->setControlInfo(controlInfo);
00268 
00269         check_and_cast<TCP *>(simulation.getContextModule())->send(tcpseg,"ipv6Out");
00270     }
00271 }
00272 
00273 TCPSegment *TCPConnection::createTCPSegment(const char *name)
00274 {
00275     return new TCPSegment(name);
00276 }
00277 
00278 void TCPConnection::signalConnectionTimeout()
00279 {
00280     sendIndicationToApp(TCP_I_TIMED_OUT);
00281 }
00282 
00283 void TCPConnection::sendIndicationToApp(int code)
00284 {
00285     tcpEV << "Notifying app: " << indicationName(code) << "\n";
00286     cMessage *msg = new cMessage(indicationName(code));
00287     msg->setKind(code);
00288     TCPCommand *ind = new TCPCommand();
00289     ind->setConnId(connId);
00290     msg->setControlInfo(ind);
00291     tcpMain->send(msg, "appOut", appGateIndex);
00292 }
00293 
00294 void TCPConnection::sendEstabIndicationToApp()
00295 {
00296     tcpEV << "Notifying app: " << indicationName(TCP_I_ESTABLISHED) << "\n";
00297     cMessage *msg = new cMessage(indicationName(TCP_I_ESTABLISHED));
00298     msg->setKind(TCP_I_ESTABLISHED);
00299 
00300     TCPConnectInfo *ind = new TCPConnectInfo();
00301     ind->setConnId(connId);
00302     ind->setLocalAddr(localAddr);
00303     ind->setRemoteAddr(remoteAddr);
00304     ind->setLocalPort(localPort);
00305     ind->setRemotePort(remotePort);
00306 
00307     msg->setControlInfo(ind);
00308     tcpMain->send(msg, "appOut", appGateIndex);
00309 }
00310 
00311 void TCPConnection::sendToApp(cMessage *msg)
00312 {
00313     tcpMain->send(msg, "appOut", appGateIndex);
00314 }
00315 
00316 void TCPConnection::initConnection(TCPOpenCommand *openCmd)
00317 {
00318     // create send queue
00319     const char *sendQueueClass = openCmd->getSendQueueClass();
00320     if (!sendQueueClass || !sendQueueClass[0])
00321         sendQueueClass = tcpMain->par("sendQueueClass");
00322     sendQueue = check_and_cast<TCPSendQueue *>(createOne(sendQueueClass));
00323     sendQueue->setConnection(this);
00324 
00325     // create receive queue
00326     const char *receiveQueueClass = openCmd->getReceiveQueueClass();
00327     if (!receiveQueueClass || !receiveQueueClass[0])
00328         receiveQueueClass = tcpMain->par("receiveQueueClass");
00329     receiveQueue = check_and_cast<TCPReceiveQueue *>(createOne(receiveQueueClass));
00330     receiveQueue->setConnection(this);
00331 
00332     // create SACK retransmit queue
00333     rexmitQueue = new TCPSACKRexmitQueue();
00334     rexmitQueue->setConnection(this);
00335 
00336     // create algorithm
00337     const char *tcpAlgorithmClass = openCmd->getTcpAlgorithmClass();
00338     if (!tcpAlgorithmClass || !tcpAlgorithmClass[0])
00339         tcpAlgorithmClass = tcpMain->par("tcpAlgorithmClass");
00340     tcpAlgorithm = check_and_cast<TCPAlgorithm *>(createOne(tcpAlgorithmClass));
00341     tcpAlgorithm->setConnection(this);
00342 
00343     // create state block
00344     state = tcpAlgorithm->getStateVariables();
00345     configureStateVariables();
00346     tcpAlgorithm->initialize();
00347 }
00348 
00349 void TCPConnection::configureStateVariables()
00350 {
00351     long advertisedWindowPar = tcpMain->par("advertisedWindow").longValue();
00352     state->ws_support = tcpMain->par("windowScalingSupport"); // if set, this means that current host supports WS (RFC 1323)
00353     if (!state->ws_support && (advertisedWindowPar > TCP_MAX_WIN || advertisedWindowPar <= 0))
00354         throw cRuntimeError("Invalid advertisedWindow parameter: %ld", advertisedWindowPar);
00355     state->rcv_wnd = advertisedWindowPar;
00356     state->rcv_adv = advertisedWindowPar;
00357     if (state->ws_support && advertisedWindowPar > TCP_MAX_WIN)
00358     {
00359         state->rcv_wnd = TCP_MAX_WIN; // we cannot to guarantee that the other end is also supporting the Window Scale (header option) (RFC 1322)
00360         state->rcv_adv = TCP_MAX_WIN; // therefore TCP_MAX_WIN is used as initial value for rcv_wnd and rcv_adv
00361     }
00362     state->maxRcvBuffer = advertisedWindowPar;
00363     state->delayed_acks_enabled = tcpMain->par("delayedAcksEnabled"); // delayed ACK algorithm (RFC 1122) enabled/disabled
00364     state->nagle_enabled = tcpMain->par("nagleEnabled"); // Nagle's algorithm (RFC 896) enabled/disabled
00365     state->limited_transmit_enabled = tcpMain->par("limitedTransmitEnabled"); // Limited Transmit algorithm (RFC 3042) enabled/disabled
00366     state->increased_IW_enabled = tcpMain->par("increasedIWEnabled"); // Increased Initial Window (RFC 3390) enabled/disabled
00367     state->snd_mss = tcpMain->par("mss").longValue(); // Maximum Segment Size (RFC 793)
00368     state->ts_support = tcpMain->par("timestampSupport"); // if set, this means that current host supports TS (RFC 1323)
00369     state->sack_support = tcpMain->par("sackSupport"); // if set, this means that current host supports SACK (RFC 2018, 2883, 3517)
00370     if (state->sack_support)
00371     {
00372         std::string algorithmName1 = "TCPReno";
00373         std::string algorithmName2 = tcpMain->par("tcpAlgorithmClass");
00374         if (algorithmName1!=algorithmName2) // TODO add additional checks for new SACK supporting algorithms here once they are implemented
00375         {
00376             EV << "If you want to use TCP SACK please set tcpAlgorithmClass to TCPReno" << endl;
00377             ASSERT(false);
00378         }
00379     }
00380 }
00381 
00382 void TCPConnection::selectInitialSeqNum()
00383 {
00384     // set the initial send sequence number
00385     state->iss = (unsigned long)fmod(SIMTIME_DBL(simTime())*250000.0, 1.0+(double)(unsigned)0xffffffffUL) & 0xffffffffUL;
00386 
00387     state->snd_una = state->snd_nxt = state->snd_max = state->iss;
00388 
00389     sendQueue->init(state->iss+1); // +1 is for SYN
00390     rexmitQueue->init(state->iss + 1); // +1 is for SYN
00391 }
00392 
00393 bool TCPConnection::isSegmentAcceptable(TCPSegment *tcpseg)
00394 {
00395     // check that segment entirely falls in receive window
00396     // RFC 793, page 69:
00397     // "There are four cases for the acceptability test for an incoming segment:"
00398     uint32 len = tcpseg->getPayloadLength();
00399     uint32 seqNo = tcpseg->getSequenceNo();
00400     bool ret;
00401 
00402     if (len == 0)
00403     {
00404         if (state->rcv_wnd == 0)
00405             ret = (seqNo == state->rcv_nxt);
00406         else // rcv_wnd > 0
00407             ret = seqLE(state->rcv_nxt, seqNo) && seqLess(seqNo, state->rcv_nxt + state->rcv_wnd);
00408     }
00409     else // len > 0
00410     {
00411         if (state->rcv_wnd == 0)
00412             ret = false;
00413         else // rcv_wnd > 0
00414             ret = (seqLE(state->rcv_nxt, seqNo) && seqLess(seqNo, state->rcv_nxt + state->rcv_wnd))
00415             ||
00416             (seqLE(state->rcv_nxt, seqNo + len - 1) && seqLess(seqNo + len - 1, state->rcv_nxt + state->rcv_wnd));
00417     }
00418     if (!ret)
00419     {
00420         tcpEV << "Not Acceptable segment. seqNo:" << seqNo << ", len:" << len << ", rcv_nxt:" << state->rcv_nxt  << ", rcv_wnd:" << state->rcv_wnd << endl;
00421     }
00422     return ret;
00423 }
00424 
00425 void TCPConnection::sendSyn()
00426 {
00427     if (remoteAddr.isUnspecified() || remotePort==-1)
00428         opp_error("Error processing command OPEN_ACTIVE: foreign socket unspecified");
00429     if (localPort==-1)
00430         opp_error("Error processing command OPEN_ACTIVE: local port unspecified");
00431 
00432     // create segment
00433     TCPSegment *tcpseg = createTCPSegment("SYN");
00434     tcpseg->setSequenceNo(state->iss);
00435     tcpseg->setSynBit(true);
00436     updateRcvWnd();
00437     tcpseg->setWindow(state->rcv_wnd);
00438 
00439     state->snd_max = state->snd_nxt = state->iss+1;
00440 
00441     // write header options
00442     writeHeaderOptions(tcpseg);
00443 
00444     // send it
00445     sendToIP(tcpseg);
00446 }
00447 
00448 void TCPConnection::sendSynAck()
00449 {
00450     // create segment
00451     TCPSegment *tcpseg = createTCPSegment("SYN+ACK");
00452     tcpseg->setSequenceNo(state->iss);
00453     tcpseg->setAckNo(state->rcv_nxt);
00454     tcpseg->setSynBit(true);
00455     tcpseg->setAckBit(true);
00456     updateRcvWnd();
00457     tcpseg->setWindow(state->rcv_wnd);
00458 
00459     state->snd_max = state->snd_nxt = state->iss+1;
00460 
00461     // write header options
00462     writeHeaderOptions(tcpseg);
00463 
00464     // send it
00465     sendToIP(tcpseg);
00466 
00467     // notify
00468     tcpAlgorithm->ackSent();
00469 }
00470 
00471 void TCPConnection::sendRst(uint32 seqNo)
00472 {
00473     sendRst(seqNo, localAddr, remoteAddr, localPort, remotePort);
00474 }
00475 
00476 void TCPConnection::sendRst(uint32 seq, IPvXAddress src, IPvXAddress dest, int srcPort, int destPort)
00477 {
00478     TCPSegment *tcpseg = createTCPSegment("RST");
00479 
00480     tcpseg->setSrcPort(srcPort);
00481     tcpseg->setDestPort(destPort);
00482 
00483     tcpseg->setRstBit(true);
00484     tcpseg->setSequenceNo(seq);
00485 
00486     // send it
00487     sendToIP(tcpseg, src, dest);
00488 }
00489 
00490 void TCPConnection::sendRstAck(uint32 seq, uint32 ack, IPvXAddress src, IPvXAddress dest, int srcPort, int destPort)
00491 {
00492     TCPSegment *tcpseg = createTCPSegment("RST+ACK");
00493 
00494     tcpseg->setSrcPort(srcPort);
00495     tcpseg->setDestPort(destPort);
00496 
00497     tcpseg->setRstBit(true);
00498     tcpseg->setAckBit(true);
00499     tcpseg->setSequenceNo(seq);
00500     tcpseg->setAckNo(ack);
00501 
00502     // send it
00503     sendToIP(tcpseg, src, dest);
00504 
00505     // notify
00506     tcpAlgorithm->ackSent();
00507 }
00508 
00509 void TCPConnection::sendAck()
00510 {
00511     TCPSegment *tcpseg = createTCPSegment("ACK");
00512 
00513     tcpseg->setAckBit(true);
00514     tcpseg->setSequenceNo(state->snd_nxt);
00515     tcpseg->setAckNo(state->rcv_nxt);
00516     tcpseg->setWindow(updateRcvWnd());
00517 
00518     // write header options
00519     writeHeaderOptions(tcpseg);
00520 
00521     // send it
00522     sendToIP(tcpseg);
00523 
00524     // notify
00525     tcpAlgorithm->ackSent();
00526 }
00527 
00528 void TCPConnection::sendFin()
00529 {
00530     TCPSegment *tcpseg = createTCPSegment("FIN");
00531 
00532     // Note: ACK bit *must* be set for both FIN and FIN+ACK. What makes
00533     // the difference for FIN+ACK is that its ackNo acks the remote TCP's FIN.
00534     tcpseg->setFinBit(true);
00535     tcpseg->setAckBit(true);
00536     tcpseg->setAckNo(state->rcv_nxt);
00537     tcpseg->setSequenceNo(state->snd_nxt);
00538     tcpseg->setWindow(updateRcvWnd());
00539 
00540     // send it
00541     sendToIP(tcpseg);
00542 
00543     // notify
00544     tcpAlgorithm->ackSent();
00545 }
00546 
00547 void TCPConnection::sendSegment(uint32 bytes)
00548 {
00549     if (state->sack_enabled && state->afterRto)
00550     {
00551         // check rexmitQ and try to forward snd_nxt before sending new data
00552         uint32 forward = rexmitQueue->checkRexmitQueueForSackedOrRexmittedSegments(state->snd_nxt);
00553         state->snd_nxt = state->snd_nxt + forward;
00554     }
00555 
00556     ulong buffered = sendQueue->getBytesAvailable(state->snd_nxt);
00557     if (bytes > buffered) // last segment?
00558         bytes = buffered;
00559 
00560     // if header options will be added, this could reduce the number of data bytes allowed for this segment,
00561     // because following condition must to be respected:
00562     //     bytes + options_len <= snd_mss
00563     TCPSegment *tcpseg_temp = createTCPSegment(NULL);
00564     tcpseg_temp->setAckBit(true); // needed for TS option, otherwise TSecr will be set to 0
00565     writeHeaderOptions(tcpseg_temp);
00566     uint options_len = tcpseg_temp->getHeaderLength() - TCP_HEADER_OCTETS; // TCP_HEADER_OCTETS = 20
00567     while (bytes + options_len > state->snd_mss)
00568         bytes--;
00569     state->sentBytes = bytes;
00570 
00571     // send one segment of 'bytes' bytes from snd_nxt, and advance snd_nxt
00572     TCPSegment *tcpseg = sendQueue->createSegmentWithBytes(state->snd_nxt, bytes);
00573 
00574     // if sack_enabled copy region of tcpseg to rexmitQueue
00575     if (state->sack_enabled)
00576         rexmitQueue->enqueueSentData(state->snd_nxt, state->snd_nxt+bytes);
00577 
00578     tcpseg->setAckNo(state->rcv_nxt);
00579     tcpseg->setAckBit(true);
00580     tcpseg->setWindow(updateRcvWnd());
00581 
00582     // TBD when to set PSH bit?
00583     // TBD set URG bit if needed
00584     ASSERT(bytes==tcpseg->getPayloadLength());
00585 
00586     state->snd_nxt += bytes;
00587 
00588     // check if afterRto bit can be reset
00589     if (state->afterRto && seqGE(state->snd_nxt, state->snd_max))
00590         state->afterRto = false;
00591 
00592     if (state->send_fin && state->snd_nxt==state->snd_fin_seq)
00593     {
00594         tcpEV << "Setting FIN on segment\n";
00595         tcpseg->setFinBit(true);
00596         state->snd_nxt = state->snd_fin_seq+1;
00597     }
00598 
00599     // add header options and update header length (from tcpseg_temp)
00600     tcpseg->setOptionsArraySize(tcpseg_temp->getOptionsArraySize());
00601     for (uint i=0; i<tcpseg_temp->getOptionsArraySize(); i++)
00602         tcpseg->setOptions(i, tcpseg_temp->getOptions(i));
00603     tcpseg->setHeaderLength(tcpseg_temp->getHeaderLength());
00604     delete tcpseg_temp;
00605 
00606     // send it
00607     sendToIP(tcpseg);
00608 }
00609 
00610 bool TCPConnection::sendData(bool fullSegmentsOnly, uint32 congestionWindow)
00611 {
00612     if (!state->afterRto)
00613     {
00614         // we'll start sending from snd_max
00615         state->snd_nxt = state->snd_max;
00616     }
00617 
00618     uint32 old_highRxt = 0;
00619     if (state->sack_enabled)
00620         old_highRxt = rexmitQueue->getHighestRexmittedSeqNum();
00621 
00622     // check how many bytes we have
00623     ulong buffered = sendQueue->getBytesAvailable(state->snd_nxt);
00624     if (buffered==0)
00625         return false;
00626 
00627     // maxWindow is minimum of snd_wnd and congestionWindow (snd_cwnd)
00628     ulong maxWindow = std::min(state->snd_wnd, congestionWindow);
00629 
00630     // effectiveWindow: number of bytes we're allowed to send now
00631     long effectiveWin = maxWindow - (state->snd_nxt - state->snd_una);
00632     if (effectiveWin <= 0)
00633     {
00634         tcpEV << "Effective window is zero (advertised window " << state->snd_wnd <<
00635             ", congestion window " << congestionWindow << "), cannot send.\n";
00636         return false;
00637     }
00638 
00639     ulong bytesToSend = effectiveWin;
00640 
00641     if (bytesToSend > buffered)
00642         bytesToSend = buffered;
00643 
00644     uint32 effectiveMaxBytesSend = state->snd_mss;
00645     if (state->ts_enabled)
00646         effectiveMaxBytesSend -= TCP_OPTION_TS_SIZE;
00647 
00648     // last segment could be less than state->snd_mss (or less than snd_mss-TCP_OPTION_TS_SIZE is using TS option)
00649     if (fullSegmentsOnly && buffered > (ulong)effectiveWin &&
00650             (bytesToSend < (effectiveMaxBytesSend)))
00651     {
00652         tcpEV << "Cannot send, not enough data for a full segment (SMSS=" << state->snd_mss
00653             << ", in buffer " << buffered << ")\n";
00654         return false;
00655     }
00656 
00657     // start sending 'bytesToSend' bytes
00658     tcpEV << "Will send " << bytesToSend << " bytes (effectiveWindow " << effectiveWin
00659         << ", in buffer " << buffered << " bytes)\n";
00660 
00661     uint32 old_snd_nxt = state->snd_nxt;
00662     ASSERT(bytesToSend>0);
00663 
00664 #ifdef TCP_SENDFRAGMENTS  /* normally undefined */
00665     // make agressive use of the window until the last byte
00666     while (bytesToSend>0)
00667     {
00668         ulong bytes = std::min(bytesToSend, state->snd_mss);
00669         sendSegment(bytes);
00670         bytesToSend -= state->sentBytes;
00671     }
00672 #else
00673     // send <MSS segments only if it's the only segment we can send now - Note: If bytesToSend=1010, MSS=1012, ts_enabled=true => we may send 2 segments (1000 payload + optionsHeader and 10 payload + optionsHeader)
00674     // FIXME this should probably obey Nagle's alg -- to be checked
00675     if (bytesToSend <= state->snd_mss)
00676     {
00677         sendSegment(bytesToSend);
00678         bytesToSend -= state->sentBytes;
00679     }
00680     else // send whole segments only (nagle_enabled)
00681     {
00682         while (bytesToSend >= effectiveMaxBytesSend)
00683         {
00684             sendSegment(state->snd_mss);
00685             bytesToSend -= state->sentBytes;
00686         }
00687     }
00688     // check how many bytes we have - last segment could be less than state->snd_mss
00689     buffered = sendQueue->getBytesAvailable(state->snd_nxt);
00690     if (bytesToSend==buffered && buffered!=0) // last segment?
00691         sendSegment(bytesToSend);
00692     else if (bytesToSend>0)
00693         tcpEV << bytesToSend << " bytes of space left in effectiveWindow\n";
00694 #endif
00695 
00696     // remember highest seq sent (snd_nxt may be set back on retransmission,
00697     // but we'll need snd_max to check validity of ACKs -- they must ack
00698     // something we really sent)
00699     if (seqGreater(state->snd_nxt, state->snd_max))
00700         state->snd_max = state->snd_nxt;
00701     if (unackedVector) unackedVector->record(state->snd_max - state->snd_una);
00702 
00703     // notify (once is enough)
00704     tcpAlgorithm->ackSent();
00705     if (state->sack_enabled && state->lossRecovery && old_highRxt != state->highRxt)
00706     {
00707         // Note: Restart of REXMIT timer on retransmission is not part of RFC 2581, however optional in RFC 3517 if sent during recovery.
00708         tcpEV << "Retransmission sent during recovery, restarting REXMIT timer.\n";
00709         tcpAlgorithm->restartRexmitTimer();
00710     }
00711     else // don't measure RTT for retransmitted packets
00712         tcpAlgorithm->dataSent(old_snd_nxt);
00713 
00714     return true;
00715 }
00716 
00717 bool TCPConnection::sendProbe()
00718 {
00719     // we'll start sending from snd_max
00720     state->snd_nxt = state->snd_max;
00721 
00722     // check we have 1 byte to send
00723     if (sendQueue->getBytesAvailable(state->snd_nxt)==0)
00724     {
00725         tcpEV << "Cannot send probe because send buffer is empty\n";
00726         return false;
00727     }
00728 
00729     uint32 old_snd_nxt = state->snd_nxt;
00730 
00731     tcpEV << "Sending 1 byte as probe, with seq=" << state->snd_nxt << "\n";
00732     sendSegment(1);
00733 
00734     // remember highest seq sent (snd_nxt may be set back on retransmission,
00735     // but we'll need snd_max to check validity of ACKs -- they must ack
00736     // something we really sent)
00737     state->snd_max = state->snd_nxt;
00738     if (unackedVector) unackedVector->record(state->snd_max - state->snd_una);
00739 
00740     // notify
00741     tcpAlgorithm->ackSent();
00742     tcpAlgorithm->dataSent(old_snd_nxt);
00743 
00744     return true;
00745 }
00746 
00747 void TCPConnection::retransmitOneSegment(bool called_at_rto)
00748 {
00749     uint32 old_snd_nxt = state->snd_nxt;
00750 
00751     // retransmit one segment at snd_una, and set snd_nxt accordingly (if not called at RTO)
00752     state->snd_nxt = state->snd_una;
00753 
00754     // When FIN sent the snd_max-snd_nxt larger than bytes available in queue
00755     ulong bytes = std::min((ulong)std::min(state->snd_mss, state->snd_max - state->snd_nxt),
00756             sendQueue->getBytesAvailable(state->snd_nxt));
00757 
00758     ASSERT(bytes!=0);
00759 
00760     sendSegment(bytes);
00761     if (!called_at_rto)
00762     {
00763         if (seqGreater(old_snd_nxt, state->snd_nxt))
00764             state->snd_nxt = old_snd_nxt;
00765     }
00766 
00767     // notify
00768     tcpAlgorithm->ackSent();
00769 
00770     if (state->sack_enabled)
00771     {
00772         // RFC 3517, page 7: "(3) Retransmit the first data segment presumed dropped -- the segment
00773         // starting with sequence number HighACK + 1.  To prevent repeated
00774         // retransmission of the same data, set HighRxt to the highest
00775         // sequence number in the retransmitted segment."
00776         state->highRxt = rexmitQueue->getHighestRexmittedSeqNum();
00777     }
00778 }
00779 
00780 void TCPConnection::retransmitData()
00781 {
00782     // retransmit everything from snd_una
00783     state->snd_nxt = state->snd_una;
00784 
00785     uint32 bytesToSend = state->snd_max - state->snd_nxt;
00786     ASSERT(bytesToSend!=0);
00787 
00788     // TBD - avoid to send more than allowed - check cwnd and rwnd before retransmitting data!
00789     while (bytesToSend>0)
00790     {
00791         uint32 bytes = std::min(bytesToSend, state->snd_mss);
00792         bytes = std::min(bytes, (uint32)(sendQueue->getBytesAvailable(state->snd_nxt)));
00793         sendSegment(bytes);
00794         // Do not send packets after the FIN.
00795         // fixes bug that occurs in examples/inet/bulktransfer at event #64043  T=13.861159213744
00796         if (state->send_fin && state->snd_nxt==state->snd_fin_seq+1)
00797             break;
00798         bytesToSend -= state->sentBytes;
00799     }
00800 }
00801 
00802 void TCPConnection::readHeaderOptions(TCPSegment *tcpseg)
00803 {
00804     tcpEV << "TCP Header Option(s) received:\n";
00805 
00806     for (uint i=0; i<tcpseg->getOptionsArraySize(); i++)
00807     {
00808         const TCPOption& option = tcpseg->getOptions(i);
00809         short kind = option.getKind();
00810         short length = option.getLength();
00811         tcpEV << "Option type " << kind << " (" << optionName(kind) << "), length " << length << "\n";
00812         bool ok = true;
00813         switch(kind)
00814         {
00815             case TCPOPTION_END_OF_OPTION_LIST: // EOL=0
00816             case TCPOPTION_NO_OPERATION: // NOP=1
00817                 if (length != 1)
00818                 {
00819                     tcpEV << "ERROR: option length incorrect\n";
00820                     ok = false;
00821                 }
00822                 break;
00823             case TCPOPTION_MAXIMUM_SEGMENT_SIZE: // MSS=2
00824                 ok = processMSSOption(tcpseg, option);
00825                 break;
00826             case TCPOPTION_WINDOW_SCALE: // WS=3
00827                 ok = processWSOption(tcpseg, option);
00828                 break;
00829             case TCPOPTION_SACK_PERMITTED: // SACK_PERMITTED=4
00830                 ok = processSACKPermittedOption(tcpseg, option);
00831                 break;
00832             case TCPOPTION_SACK: // SACK=5
00833                 ok = processSACKOption(tcpseg, option);
00834                 break;
00835             case TCPOPTION_TIMESTAMP: // TS=8
00836                 ok = processTSOption(tcpseg, option);
00837                 break;
00838             // TODO add new TCPOptions here once they are implemented
00839             // TODO delegate to TCPAlgorithm as well -- it may want to recognized additional options
00840             default:
00841                 tcpEV << "ERROR: Unsupported TCP option kind " << kind << "\n";
00842                 break;
00843         }
00844         (void)ok; // unused
00845     }
00846 }
00847 
00848 bool TCPConnection::processMSSOption(TCPSegment *tcpseg, const TCPOption& option)
00849 {
00850     if (option.getLength() != 4)
00851     {
00852         tcpEV << "ERROR: option length incorrect\n";
00853         return false;
00854     }
00855 
00856     if (fsm.getState() != TCP_S_LISTEN && fsm.getState() != TCP_S_SYN_SENT)
00857     {
00858         tcpEV << "ERROR: TCP Header Option MSS received, but in unexpected state\n";
00859         return false;
00860     }
00861 
00862     if (option.getValuesArraySize() == 0)
00863     {
00864         // since option.getLength() was already checked, this is a programming error not a TCP error
00865         throw cRuntimeError("TCPOption for MSS does not contain the data its getLength() promises");
00866     }
00867 
00868     // RFC 2581, page 1:
00869     // "The SMSS is the size of the largest segment that the sender can transmit.
00870     // This value can be based on the maximum transmission unit of the network,
00871     // the path MTU discovery [MD90] algorithm, RMSS (see next item), or other
00872     // factors.  The size does not include the TCP/IP headers and options."
00873     //
00874     // "The RMSS is the size of the largest segment the receiver is willing to accept.
00875     // This is the value specified in the MSS option sent by the receiver during
00876     // connection startup.  Or, if the MSS option is not used, 536 bytes [Bra89].
00877     // The size does not include the TCP/IP headers and options."
00878     //
00879     //
00880     // The value of snd_mss (SMSS) is set to the minimum of snd_mss (local parameter) and
00881     // the value specified in the MSS option received during connection startup.
00882     state->snd_mss = std::min(state->snd_mss, (uint32) option.getValues(0));
00883     if (state->snd_mss==0)
00884         state->snd_mss = 536;
00885     tcpEV << "TCP Header Option MSS(=" << option.getValues(0) << ") received, SMSS is set to: " << state->snd_mss << "\n";
00886     return true;
00887 }
00888 
00889 bool TCPConnection::processWSOption(TCPSegment *tcpseg, const TCPOption& option)
00890 {
00891     if (option.getLength() != 3)
00892     {
00893         tcpEV << "ERROR: length incorrect\n";
00894         return false;
00895     }
00896 
00897     if (fsm.getState() != TCP_S_LISTEN && fsm.getState() != TCP_S_SYN_SENT)
00898     {
00899         tcpEV << "ERROR: TCP Header Option WS received, but in unexpected state\n";
00900         return false;
00901     }
00902 
00903     if (option.getValuesArraySize() == 0)
00904     {
00905         // since option.getLength() was already checked, this is a programming error not a TCP error
00906         throw cRuntimeError("TCPOption for WS does not contain the data its getLength() promises");
00907     }
00908 
00909     state->rcv_ws = true;
00910     state->ws_enabled = state->ws_support && state->snd_ws && state->rcv_ws;
00911     state->snd_wnd_scale = option.getValues(0);
00912     tcpEV << "TCP Header Option WS(=" << state->snd_wnd_scale << ") received, WS (ws_enabled) is set to: " << state->ws_enabled << "\n";
00913     if (state->snd_wnd_scale > 14) // RFC 1323, page 11: "the shift count must be limited to 14"
00914     {
00915         tcpEV << "ERROR: TCP Header Option WS received but shift count value is exceeding 14\n";
00916         state->snd_wnd_scale = 14;
00917     }
00918     return true;
00919 }
00920 
00921 bool TCPConnection::processTSOption(TCPSegment *tcpseg, const TCPOption& option)
00922 {
00923     if (option.getLength() != 10)
00924     {
00925         tcpEV << "ERROR: length incorrect\n";
00926         return false;
00927     }
00928 
00929     if ((!state->ts_enabled && fsm.getState() != TCP_S_LISTEN && fsm.getState() != TCP_S_SYN_SENT) ||
00930         (state->ts_enabled && fsm.getState() != TCP_S_SYN_RCVD && fsm.getState() != TCP_S_ESTABLISHED &&
00931                 fsm.getState() != TCP_S_FIN_WAIT_1 && fsm.getState() != TCP_S_FIN_WAIT_2))
00932     {
00933         tcpEV << "ERROR: TCP Header Option TS received, but in unexpected state\n";
00934         return false;
00935     }
00936 
00937     if (option.getValuesArraySize() != 2)
00938     {
00939         // since option.getLength() was already checked, this is a programming error not a TCP error
00940         throw cRuntimeError("TCPOption for TS does not contain the data its getLength() promises");
00941     }
00942 
00943     if (!state->ts_enabled)
00944     {
00945         state->rcv_initial_ts = true;
00946         state->ts_enabled = state->ts_support && state->snd_initial_ts && state->rcv_initial_ts;
00947         tcpEV << "TCP Header Option TS(TSval=" << option.getValues(0) << ", TSecr=" << option.getValues(1) << ") received, TS (ts_enabled) is set to: " << state->ts_enabled << "\n";
00948     }
00949     else
00950         tcpEV << "TCP Header Option TS(TSval=" << option.getValues(0) << ", TSecr=" << option.getValues(1) << ") received\n";
00951 
00952     // RFC 1323, page 35:
00953     // "Check whether the segment contains a Timestamps option and bit
00954     // Snd.TS.OK is on.  If so:
00955     //   If SEG.TSval < TS.Recent, then test whether connection has
00956     //   been idle less than 24 days; if both are true, then the
00957     //   segment is not acceptable; follow steps below for an
00958     //   unacceptable segment.
00959     //   If SEG.SEQ is equal to Last.ACK.sent, then save SEG.[TSval] in
00960     //   variable TS.Recent."
00961     if (state->ts_enabled)
00962     {
00963         if (seqLess(option.getValues(0), state->ts_recent))
00964         {
00965             if ((simTime() - state->time_last_data_sent) > PAWS_IDLE_TIME_THRESH) // PAWS_IDLE_TIME_THRESH = 24 days
00966             {
00967                 tcpEV << "PAWS: Segment is not acceptable, TSval=" << option.getValues(0) << " in " <<  stateName(fsm.getState()) << " state received: dropping segment\n";
00968                 return false;
00969             }
00970         }
00971         else if (seqLE(tcpseg->getSequenceNo(), state->last_ack_sent)) // Note: test is modified according to the latest proposal of the tcplw@cray.com list (Braden 1993/04/26)
00972         {
00973             state->ts_recent = option.getValues(0);
00974             tcpEV << "Updating ts_recent from segment: new ts_recent=" << state->ts_recent << "\n";
00975         }
00976     }
00977 
00978     return true;
00979 }
00980 
00981 bool TCPConnection::processSACKPermittedOption(TCPSegment *tcpseg, const TCPOption& option)
00982 {
00983     if (option.getLength() != 2)
00984     {
00985         tcpEV << "ERROR: length incorrect\n";
00986         return false;
00987     }
00988 
00989     if (fsm.getState() != TCP_S_LISTEN && fsm.getState() != TCP_S_SYN_SENT)
00990     {
00991         tcpEV << "ERROR: TCP Header Option SACK_PERMITTED received, but in unexpected state\n";
00992         return false;
00993     }
00994 
00995     state->rcv_sack_perm = true;
00996     state->sack_enabled = state->sack_support && state->snd_sack_perm && state->rcv_sack_perm;
00997     tcpEV << "TCP Header Option SACK_PERMITTED received, SACK (sack_enabled) is set to: " << state->sack_enabled << "\n";
00998     return true;
00999 }
01000 
01001 bool TCPConnection::processSACKOption(TCPSegment *tcpseg, const TCPOption& option)
01002 {
01003     if (option.getLength() % 8 != 2)
01004     {
01005         tcpEV << "ERROR: option length incorrect\n";
01006         return false;
01007     }
01008 
01009     if (state->sack_enabled && fsm.getState() != TCP_S_SYN_RCVD && fsm.getState() != TCP_S_ESTABLISHED && fsm.getState() != TCP_S_FIN_WAIT_1 && fsm.getState() != TCP_S_FIN_WAIT_2)
01010     {
01011         tcpEV << "ERROR: TCP Header Option SACK received, but in unexpected state\n";
01012         return false;
01013     }
01014 
01015     if (!state->sack_enabled)
01016     {
01017         tcpEV << "ERROR: " << (option.getLength()/2) << ". SACK(s) received, but sack_enabled is set to " << state->sack_enabled << "\n";
01018         return false;
01019     }
01020 
01021     uint n = option.getValuesArraySize()/2;
01022     if (n > 0) // sacks present?
01023     {
01024         tcpEV << n << " SACK(s) received:\n";
01025         uint count=0;
01026         for (uint i=0; i<n; i++)
01027         {
01028             Sack tmp;
01029             tmp.setStart(option.getValues(count));
01030             count++;
01031             tmp.setEnd(option.getValues(count));
01032             count++;
01033 
01034             tcpEV << (i+1) << ". SACK:" << " [" << tmp.getStart() << ".." << tmp.getEnd() << ")\n";
01035 
01036             // check for D-SACK
01037             if (i==0 && seqLess(tmp.getEnd(), tcpseg->getAckNo()))
01038             {
01039                 // RFC 2883, page 8:
01040                 // "In order for the sender to check that the first (D)SACK block of an
01041                 // acknowledgement in fact acknowledges duplicate data, the sender
01042                 // should compare the sequence space in the first SACK block to the
01043                 // cumulative ACK which is carried IN THE SAME PACKET.  If the SACK
01044                 // sequence space is less than this cumulative ACK, it is an indication
01045                 // that the segment identified by the SACK block has been received more
01046                 // than once by the receiver.  An implementation MUST NOT compare the
01047                 // sequence space in the SACK block to the TCP state variable snd.una
01048                 // (which carries the total cumulative ACK), as this may result in the
01049                 // wrong conclusion if ACK packets are reordered."
01050                 tcpEV << "Received D-SACK below cumulative ACK=" << tcpseg->getAckNo() << " D-SACK:" << " [" << tmp.getStart() << ".." << tmp.getEnd() << ")\n";
01051             }
01052             else if (i==0 && seqGE(tmp.getEnd(), tcpseg->getAckNo()) && n>1)
01053             {
01054                 // RFC 2883, page 8:
01055                 // "If the sequence space in the first SACK block is greater than the
01056                 // cumulative ACK, then the sender next compares the sequence space in
01057                 // the first SACK block with the sequence space in the second SACK
01058                 // block, if there is one.  This comparison can determine if the first
01059                 // SACK block is reporting duplicate data that lies above the cumulative
01060                 // ACK."
01061                 Sack tmp2;
01062                 tmp2.setStart(option.getValues(2));
01063                 tmp2.setEnd(option.getValues(3));
01064 
01065                 if (seqGE(tmp.getStart(), tmp2.getStart()) && seqLE(tmp.getEnd(), tmp2.getEnd()))
01066                 {tcpEV << "Received D-SACK above cumulative ACK=" << tcpseg->getAckNo() << " D-SACK:" << " [" << tmp.getStart() << ".." << tmp.getEnd() << ") SACK:" << " [" << tmp2.getStart() << ".." << tmp2.getEnd() << ")\n";}
01067             }
01068 
01069             if (seqGreater(tmp.getEnd(), tcpseg->getAckNo()))
01070                 rexmitQueue->setSackedBit(tmp.getStart(), tmp.getEnd());
01071         }
01072         state->rcv_sacks = state->rcv_sacks + n; // total counter, no current number
01073         if (rcvSacksVector)
01074             rcvSacksVector->record(state->rcv_sacks);
01075 
01076         // update scoreboard
01077         state->sackedBytes_old = state->sackedBytes; // needed for RFC 3042 to check if last dupAck contained new sack information
01078         state->sackedBytes = rexmitQueue->getTotalAmountOfSackedBytes();
01079         if (sackedBytesVector)
01080             sackedBytesVector->record(state->sackedBytes);
01081     }
01082     return true;
01083 }
01084 
01085 TCPSegment TCPConnection::writeHeaderOptions(TCPSegment *tcpseg)
01086 {
01087     TCPOption option;
01088     uint t = 0;
01089 
01090     if (tcpseg->getSynBit() && (fsm.getState() == TCP_S_INIT || fsm.getState() == TCP_S_LISTEN || ((fsm.getState()==TCP_S_SYN_SENT || fsm.getState()==TCP_S_SYN_RCVD) && state->syn_rexmit_count>0))) // SYN flag set and connetion in INIT or LISTEN state (or after synRexmit timeout)
01091     {
01092         // MSS header option
01093         if (state->snd_mss > 0)
01094         {
01095             option.setKind(TCPOPTION_MAXIMUM_SEGMENT_SIZE); // MSS
01096             option.setLength(4);
01097             option.setValuesArraySize(1);
01098 
01099             // Update MSS
01100             option.setValues(0,state->snd_mss);
01101             tcpEV << "TCP Header Option MSS(=" << state->snd_mss << ") sent\n";
01102             tcpseg->setOptionsArraySize(tcpseg->getOptionsArraySize()+1);
01103             tcpseg->setOptions(t,option);
01104             t++;
01105         }
01106 
01107         // WS header option
01108         if (state->ws_support && (state->rcv_ws || (fsm.getState() == TCP_S_INIT || (fsm.getState()==TCP_S_SYN_SENT && state->syn_rexmit_count>0)))) // Is WS supported by host?
01109         {
01110             // 1 padding byte
01111             option.setKind(TCPOPTION_NO_OPERATION); // NOP
01112             option.setLength(1);
01113             option.setValuesArraySize(0);
01114             tcpseg->setOptionsArraySize(tcpseg->getOptionsArraySize()+1);
01115             tcpseg->setOptions(t,option);
01116             t++;
01117 
01118             option.setKind(TCPOPTION_WINDOW_SCALE);
01119             option.setLength(3);
01120             option.setValuesArraySize(1);
01121 
01122             // Update WS variables
01123             ulong scaled_rcv_wnd = receiveQueue->getAmountOfFreeBytes(state->maxRcvBuffer);
01124             state->rcv_wnd_scale = 0;
01125             while (scaled_rcv_wnd > TCP_MAX_WIN && state->rcv_wnd_scale < 14) // RFC 1323, page 11: "the shift count must be limited to 14"
01126             {
01127                 scaled_rcv_wnd = scaled_rcv_wnd >> 1;
01128                 state->rcv_wnd_scale++;
01129             }
01130             option.setValues(0,state->rcv_wnd_scale); // rcv_wnd_scale is also set in scaleRcvWnd()
01131             state->snd_ws = true;
01132             state->ws_enabled = state->ws_support && state->snd_ws && state->rcv_ws;
01133             tcpEV << "TCP Header Option WS(=" << option.getValues(0) << ") sent, WS (ws_enabled) is set to: " << state->ws_enabled << "\n";
01134             tcpseg->setOptionsArraySize(tcpseg->getOptionsArraySize()+1);
01135             tcpseg->setOptions(t,option);
01136             t++;
01137         }
01138 
01139         // SACK_PERMITTED header option
01140         if (state->sack_support && (state->rcv_sack_perm || (fsm.getState() == TCP_S_INIT || (fsm.getState()==TCP_S_SYN_SENT && state->syn_rexmit_count>0)))) // Is SACK supported by host?
01141         {
01142             if (!state->ts_support) // if TS is supported by host, do not add NOPs to this segment
01143             {
01144                 // 2 padding bytes
01145                 option.setKind(TCPOPTION_NO_OPERATION); // NOP
01146                 option.setLength(1);
01147                 option.setValuesArraySize(0);
01148                 tcpseg->setOptionsArraySize(tcpseg->getOptionsArraySize()+2);
01149                 tcpseg->setOptions(t,option);
01150                 t++;
01151                 tcpseg->setOptions(t,option);
01152                 t++;
01153             }
01154 
01155             option.setKind(TCPOPTION_SACK_PERMITTED);
01156             option.setLength(2);
01157             option.setValuesArraySize(0);
01158 
01159             // Update SACK variables
01160             state->snd_sack_perm = true;
01161             state->sack_enabled = state->sack_support && state->snd_sack_perm && state->rcv_sack_perm;
01162             tcpEV << "TCP Header Option SACK_PERMITTED sent, SACK (sack_enabled) is set to: " << state->sack_enabled << "\n";
01163             tcpseg->setOptionsArraySize(tcpseg->getOptionsArraySize()+1);
01164             tcpseg->setOptions(t,option);
01165             t++;
01166         }
01167 
01168         // TS header option
01169         if (state->ts_support && (state->rcv_initial_ts || (fsm.getState() == TCP_S_INIT || (fsm.getState()==TCP_S_SYN_SENT && state->syn_rexmit_count>0)))) // Is TS supported by host?
01170         {
01171             if (!state->sack_support) // if SACK is supported by host, do not add NOPs to this segment
01172             {
01173                 // 2 padding bytes
01174                 option.setKind(TCPOPTION_NO_OPERATION); // NOP
01175                 option.setLength(1);
01176                 option.setValuesArraySize(0);
01177                 tcpseg->setOptionsArraySize(tcpseg->getOptionsArraySize()+2);
01178                 tcpseg->setOptions(t,option);
01179                 t++;
01180                 tcpseg->setOptions(t,option);
01181                 t++;
01182             }
01183 
01184             option.setKind(TCPOPTION_TIMESTAMP);
01185             option.setLength(10);
01186             option.setValuesArraySize(2);
01187 
01188             // Update TS variables
01189             // RFC 1323, page 13: "The Timestamp Value field (TSval) contains the current value of the timestamp clock of the TCP sending the option."
01190             option.setValues(0,convertSimtimeToTS(simTime()));
01191             // RFC 1323, page 16: "(3) When a TSopt is sent, its TSecr field is set to the current TS.Recent value."
01192             // RFC 1323, page 13:
01193             // "The Timestamp Echo Reply field (TSecr) is only valid if the ACK
01194             // bit is set in the TCP header; if it is valid, it echos a times-
01195             // tamp value that was sent by the remote TCP in the TSval field
01196             // of a Timestamps option.  When TSecr is not valid, its value
01197             // must be zero."
01198             if (tcpseg->getAckBit())
01199                 option.setValues(1,state->ts_recent);
01200             else
01201                 option.setValues(1,0);
01202             state->snd_initial_ts = true;
01203             state->ts_enabled = state->ts_support && state->snd_initial_ts && state->rcv_initial_ts;
01204             tcpEV << "TCP Header Option TS(TSval=" << option.getValues(0) << ", TSecr=" << option.getValues(1) << ") sent, TS (ts_enabled) is set to: " << state->ts_enabled << "\n";
01205             tcpseg->setOptionsArraySize(tcpseg->getOptionsArraySize()+1);
01206             tcpseg->setOptions(t,option);
01207             t++;
01208         }
01209 
01210         // TODO add new TCPOptions here once they are implemented
01211     }
01212     else if (fsm.getState()==TCP_S_SYN_SENT || fsm.getState()==TCP_S_SYN_RCVD || fsm.getState()==TCP_S_ESTABLISHED || fsm.getState()==TCP_S_FIN_WAIT_1 || fsm.getState()==TCP_S_FIN_WAIT_2) // connetion is not in INIT or LISTEN state
01213     {
01214         // TS header option
01215         if (state->ts_enabled) // Is TS enabled?
01216         {
01217             if (!(state->sack_enabled && (state->snd_sack || state->snd_dsack))) // if SACK is enabled and SACKs need to be added, do not add NOPs to this segment
01218             {
01219                 // 2 padding bytes
01220                 option.setKind(TCPOPTION_NO_OPERATION); // NOP
01221                 option.setLength(1);
01222                 option.setValuesArraySize(0);
01223                 tcpseg->setOptionsArraySize(tcpseg->getOptionsArraySize()+2);
01224                 tcpseg->setOptions(t,option);
01225                 t++;
01226                 tcpseg->setOptions(t,option);
01227                 t++;
01228             }
01229 
01230             option.setKind(TCPOPTION_TIMESTAMP);
01231             option.setLength(10);
01232             option.setValuesArraySize(2);
01233 
01234             // Update TS variables
01235             // RFC 1323, page 13: "The Timestamp Value field (TSval) contains the current value of the timestamp clock of the TCP sending the option."
01236             option.setValues(0,convertSimtimeToTS(simTime()));
01237             // RFC 1323, page 16: "(3) When a TSopt is sent, its TSecr field is set to the current TS.Recent value."
01238             // RFC 1323, page 13:
01239             // "The Timestamp Echo Reply field (TSecr) is only valid if the ACK
01240             // bit is set in the TCP header; if it is valid, it echos a times-
01241             // tamp value that was sent by the remote TCP in the TSval field
01242             // of a Timestamps option.  When TSecr is not valid, its value
01243             // must be zero."
01244             if (tcpseg->getAckBit())
01245                 option.setValues(1,state->ts_recent);
01246             else
01247                 option.setValues(1,0);
01248             tcpEV << "TCP Header Option TS(TSval=" << option.getValues(0) << ", TSecr=" << option.getValues(1) << ") sent\n";
01249             tcpseg->setOptionsArraySize(tcpseg->getOptionsArraySize()+1);
01250             tcpseg->setOptions(t,option);
01251             t++;
01252         }
01253 
01254         // SACK header option
01255 
01256         // RFC 2018, page 4:
01257         // "If sent at all, SACK options SHOULD be included in all ACKs which do
01258         // not ACK the highest sequence number in the data receiver's queue.  In
01259         // this situation the network has lost or mis-ordered data, such that
01260         // the receiver holds non-contiguous data in its queue.  RFC 1122,
01261         // Section 4.2.2.21, discusses the reasons for the receiver to send ACKs
01262         // in response to additional segments received in this state.  The
01263         // receiver SHOULD send an ACK for every valid segment that arrives
01264         // containing new data, and each of these "duplicate" ACKs SHOULD bear a
01265         // SACK option."
01266         if (state->sack_enabled && (state->snd_sack || state->snd_dsack))
01267         {
01268             if (!state->ts_enabled) // if TS is enabled, do not add NOPs to this segment
01269             {
01270                 // 2 padding bytes
01271                 option.setKind(TCPOPTION_NO_OPERATION); // NOP
01272                 option.setLength(1);
01273                 option.setValuesArraySize(0);
01274                 tcpseg->setOptionsArraySize(tcpseg->getOptionsArraySize()+2);
01275                 tcpseg->setOptions(t,option);
01276                 t++;
01277                 tcpseg->setOptions(t,option);
01278                 t++;
01279             }
01280 
01281             addSacks(tcpseg);
01282             t++;
01283         }
01284 
01285         // TODO add new TCPOptions here once they are implemented
01286 
01287         // TODO delegate to TCPAlgorithm as well -- it may want to append additional options
01288     }
01289 
01290     if (tcpseg->getOptionsArraySize() != 0)
01291     {
01292         uint options_len = 0;
01293         for (uint i=0; i<tcpseg->getOptionsArraySize(); i++)
01294             options_len = options_len + tcpseg->getOptions(i).getLength();
01295 
01296         if (options_len <= 40) // Options length allowed? - maximum: 40 Bytes
01297             tcpseg->setHeaderLength(TCP_HEADER_OCTETS+options_len); // TCP_HEADER_OCTETS = 20
01298         else
01299         {
01300             tcpseg->setHeaderLength(TCP_HEADER_OCTETS); // TCP_HEADER_OCTETS = 20
01301             tcpseg->setOptionsArraySize(0); // drop all options
01302             tcpEV << "ERROR: Options length exceeded! Segment will be sent without options" << "\n";
01303         }
01304     }
01305 
01306     return *tcpseg;
01307 }
01308 
01309 TCPSegment TCPConnection::addSacks(TCPSegment *tcpseg)
01310 {
01311     TCPOption option;
01312     uint options_len = 0;
01313     uint used_options_len = 0;
01314     uint m = 0; // number of sack blocks to be sent in current segment
01315     uint n = 0; // number of sack blocks in sacks_array before sending current segment
01316     bool skip_sacks_array = false; // set if dsack is subsets of a bigger sack block recently reported
01317     bool overlap = false; // set if recently reported sack blocks are subsets of "sacks_array[0]"
01318 
01319     uint32 start = state->start_seqno;
01320     uint32 end = state->end_seqno;
01321 
01322     ASSERT(start!=0 || end!=0);
01323 
01324     // delete old sacks (below rcv_nxt), delete duplicates and print previous status of sacks_array:
01325     tcpEV << "Previous status of sacks_array: \n";
01326     for (uint a=0; a<MAX_SACK_BLOCKS; a++) // MAX_SACK_BLOCKS is set to 60
01327     {
01328         if (state->sacks_array[a].getStart()!=0 && seqLE(state->sacks_array[a].getEnd(), state->rcv_nxt))
01329         {
01330             state->sacks_array[a].setStart(0);
01331             state->sacks_array[a].setEnd(0);
01332         }
01333         if (state->sacks_array[a].getStart()!=0 && state->sacks_array[a].getEnd()!=0) // do not print empty entries
01334             tcpEV << "\t" << (a+1) << ". SACK in sacks_array:" << " [" << state->sacks_array[a].getStart() << ".." << state->sacks_array[a].getEnd() << ")\n";
01335         else
01336             break;
01337     }
01338 
01339     for (uint a=0; a<MAX_SACK_BLOCKS-1; a++)
01340     {
01341         if (state->sacks_array[a].getStart() != 0)
01342             m++;
01343         else
01344             break;
01345     }
01346     n = m + 1; // +1 for new the new sack block
01347 
01348     // 2 padding bytes are prefixed or TS option is present
01349     if (tcpseg->getOptionsArraySize()>0)
01350     {
01351         for (uint i=0; i<tcpseg->getOptionsArraySize(); i++)
01352             used_options_len = used_options_len + tcpseg->getOptions(i).getLength();
01353         if (used_options_len>30)
01354         {
01355             tcpEV << "ERROR: Failed to addSacks - at least 10 free bytes needed for SACK - used_options_len=" << used_options_len << "\n";
01356             //reset flags:
01357             skip_sacks_array = false;
01358             state->snd_sack  = false;
01359             state->snd_dsack = false;
01360             state->start_seqno = 0;
01361             state->end_seqno = 0;
01362             return *tcpseg;
01363         }
01364         else
01365         {
01366             n = std::min (n, (((40-used_options_len)-2)/8));
01367             option.setValuesArraySize(n*2);
01368         }
01369     }
01370     else
01371     {
01372         n = std::min (n, MAX_SACK_ENTRIES);
01373         option.setValuesArraySize(n*2);
01374     }
01375 
01376     // before adding a new sack move old sacks by one to the right
01377     for (int a=(MAX_SACK_BLOCKS-1); a>=0; a--) // MAX_SACK_BLOCKS is set to 60
01378         state->sacks_array[a+1] = state->sacks_array[a];
01379 
01380     if (state->snd_dsack) // SequenceNo < rcv_nxt
01381     {
01382         // RFC 2883, page 3:
01383         // "(3) The left edge of the D-SACK block specifies the first sequence
01384         // number of the duplicate contiguous sequence, and the right edge of
01385         // the D-SACK block specifies the sequence number immediately following
01386         // the last sequence in the duplicate contiguous sequence."
01387         if (seqLess(start, state->rcv_nxt) && seqLess(state->rcv_nxt, end))
01388             end = state->rcv_nxt;
01389     }
01390     else if (start==0 && end==0) // rcv_nxt_old != rcv_nxt
01391     {
01392         // RFC 2018, page 4:
01393         // "* The first SACK block (i.e., the one immediately following the
01394         // kind and length fields in the option) MUST specify the contiguous
01395         // block of data containing the segment which triggered this ACK,
01396         // unless that segment advanced the Acknowledgment Number field in
01397         // the header.  This assures that the ACK with the SACK option
01398         // reflects the most recent change in the data receiver's buffer
01399         // queue."
01400         start = state->sacks_array[0].getStart();
01401         end = state->sacks_array[0].getEnd();
01402     }
01403     else // rcv_nxt_old == rcv_nxt or end <= rcv_nxt
01404     {
01405         // RFC 2018, page 4:
01406         // "* The first SACK block (i.e., the one immediately following the
01407         // kind and length fields in the option) MUST specify the contiguous
01408         // block of data containing the segment which triggered this ACK,"
01409         start = receiveQueue->getLE(start);
01410         end = receiveQueue->getRE(end);
01411     }
01412 
01413     state->sacks_array[0].setStart(start);
01414     state->sacks_array[0].setEnd(end);
01415 
01416     // RFC 2883, page 3:
01417     // "(4) If the D-SACK block reports a duplicate contiguous sequence from
01418     // a (possibly larger) block of data in the receiver's data queue above
01419     // the cumulative acknowledgement, then the second SACK block in that
01420     // SACK option should specify that (possibly larger) block of data.
01421     //
01422     // (5) Following the SACK blocks described above for reporting duplicate
01423     // segments, additional SACK blocks can be used for reporting additional
01424     // blocks of data, as specified in RFC 2018."
01425     if (state->snd_dsack)
01426     {
01427         uint32 start_new = receiveQueue->getLE(start);
01428         uint32 end_new = receiveQueue->getRE(end);
01429         if (start_new != start || end_new != end)
01430         {
01431             skip_sacks_array = true;
01432             for (int a=(MAX_SACK_BLOCKS-1); a>=1; a--) // MAX_SACK_BLOCKS is set to 60
01433                 state->sacks_array[a+1] = state->sacks_array[a];
01434             state->sacks_array[1].setStart(start_new); // specifies larger block of data
01435             state->sacks_array[1].setEnd(end_new);     // specifies larger block of data
01436         }
01437     }
01438 
01439     // RFC 2018, page 4:
01440     // "* The SACK option SHOULD be filled out by repeating the most
01441     // recently reported SACK blocks (based on first SACK blocks in
01442     // previous SACK options) that are not subsets of a SACK block
01443     // already included in the SACK option being constructed."
01444 
01445     // check if recently reported SACK blocks are subsets of "sacks_array[0]"
01446     for (uint a=0; a<MAX_SACK_BLOCKS-1; a++)
01447     {
01448         uint i = 1;
01449         bool matched = false;
01450 
01451         if (a==0 && skip_sacks_array)
01452             a = 1;
01453 
01454         if (state->sacks_array[a+i].getStart() == 0)
01455             break;
01456 
01457         while ((state->sacks_array[a].getStart() == state->sacks_array[a+i].getStart() ||
01458             state->sacks_array[a].getEnd() == state->sacks_array[a+i].getStart() ||
01459             state->sacks_array[a].getEnd() == state->sacks_array[a+i].getEnd())
01460             && a+i < MAX_SACK_BLOCKS && state->sacks_array[a].getStart()!=0) // MAX_SACK_BLOCKS is set to 60
01461         {
01462             matched = true;
01463             i++;
01464             overlap = true;
01465         }
01466         if (matched)
01467             state->sacks_array[a+1] = state->sacks_array[a+i];
01468     }
01469 
01470     if (!skip_sacks_array && overlap && m<4)
01471         n--;
01472 
01473     option.setKind(TCPOPTION_SACK);
01474     option.setLength(8*n+2);
01475     option.setValuesArraySize(2*n);
01476 
01477     // write sacks from sacks_array to options
01478     uint counter = 0;
01479     for (uint a=0; a<n; a++)
01480     {
01481         option.setValues(counter,state->sacks_array[a].getStart());
01482         counter++;
01483         option.setValues(counter,state->sacks_array[a].getEnd());
01484         counter++;
01485     }
01486 
01487     // independent of "n" we always need 2 padding bytes (NOP) to make: (used_options_len % 4 == 0)
01488     options_len = used_options_len + 8*n + 2; // 8 bytes for each SACK (n) + 2 bytes for kind&length
01489 
01490     if (options_len <= 40) // Options length allowed? - maximum: 40 Bytes
01491     {
01492         tcpseg->setOptionsArraySize(tcpseg->getOptionsArraySize()+1);
01493         tcpseg->setOptions((tcpseg->getOptionsArraySize()-1),option);
01494 
01495         // update number of sent sacks
01496         state->snd_sacks = state->snd_sacks+n;
01497         if (sndSacksVector)
01498             sndSacksVector->record(state->snd_sacks);
01499 
01500         uint counter = 0;
01501         tcpEV << n << " SACK(s) added to header:\n";
01502         for (uint t=0; t<(n*2); t++)
01503         {
01504             counter++;
01505             tcpEV << counter << ". SACK:" << " [" << option.getValues(t);
01506             t++;
01507             tcpEV << ".." << option.getValues(t) << ")";
01508             if (t==1)
01509             {
01510                 if (state->snd_dsack)
01511                     tcpEV << " (D-SACK)";
01512                 else if (seqLE(option.getValues(t),state->rcv_nxt))
01513                 {
01514                     tcpEV << " (received segment filled out a gap)";
01515                     state->snd_dsack = true; // Note: Set snd_dsack to delete first sack from sacks_array
01516                 }
01517             }
01518             tcpEV << "\n";
01519         }
01520     }
01521     else
01522         tcpEV << "ERROR: Option length exceeded! Segment will be sent without SACK(s)" << "\n";
01523 
01524     // RFC 2883, page 3:
01525     // "(1) A D-SACK block is only used to report a duplicate contiguous
01526     // sequence of data received by the receiver in the most recent packet.
01527     //
01528     // (2) Each duplicate contiguous sequence of data received is reported
01529     // in at most one D-SACK block.  (I.e., the receiver sends two identical
01530     // D-SACK blocks in subsequent packets only if the receiver receives two
01531     // duplicate segments.)//
01532     //
01533     // In case of d-sack: delete first sack (d-sack) and move old sacks by one to the left
01534     if (state->snd_dsack)
01535     {
01536         for (int a=1; a<MAX_SACK_BLOCKS; a++) // MAX_SACK_BLOCKS is set to 60
01537             state->sacks_array[a-1] = state->sacks_array[a];
01538 
01539         // delete/reset last sack to avoid duplicates
01540         state->sacks_array[MAX_SACK_BLOCKS-1].setStart(0);
01541         state->sacks_array[MAX_SACK_BLOCKS-1].setEnd(0);
01542     }
01543 
01544     // reset flags:
01545     skip_sacks_array = false;
01546     state->snd_sack  = false;
01547     state->snd_dsack = false;
01548     state->start_seqno = 0;
01549     state->end_seqno = 0;
01550 
01551     return *tcpseg;
01552 }
01553 
01554 uint32 TCPConnection::getTSval(TCPSegment *tcpseg)
01555 {
01556     for (uint i=0; i<tcpseg->getOptionsArraySize(); i++)
01557     {
01558         const TCPOption& option = tcpseg->getOptions(i);
01559         short kind = option.getKind();
01560         if (kind == TCPOPTION_TIMESTAMP)
01561             return option.getValues(0);
01562     }
01563     return 0;
01564 }
01565 
01566 uint32 TCPConnection::getTSecr(TCPSegment *tcpseg)
01567 {
01568     for (uint i=0; i<tcpseg->getOptionsArraySize(); i++)
01569     {
01570         const TCPOption& option = tcpseg->getOptions(i);
01571         short kind = option.getKind();
01572         if (kind == TCPOPTION_TIMESTAMP)
01573             return option.getValues(1);
01574     }
01575     return 0;
01576 }
01577 
01578 void TCPConnection::updateRcvQueueVars()
01579 {
01580     // update receive queue related state variables
01581     state->freeRcvBuffer = receiveQueue->getAmountOfFreeBytes(state->maxRcvBuffer);
01582     state->usedRcvBuffer = state->maxRcvBuffer - state->freeRcvBuffer;
01583 
01584     // update receive queue related statistics
01585     if (tcpRcvQueueBytesVector)
01586         tcpRcvQueueBytesVector->record(state->usedRcvBuffer);
01587 
01588 //    tcpEV << "receiveQ: receiveQLength=" << receiveQueue->getQueueLength() << " maxRcvBuffer=" << state->maxRcvBuffer << " usedRcvBuffer=" << state->usedRcvBuffer << " freeRcvBuffer=" << state->freeRcvBuffer << "\n";
01589 }
01590 
01591 unsigned short TCPConnection::updateRcvWnd()
01592 {
01593     uint32 win = 0;
01594 
01595     // update receive queue related state variables and statistics
01596     updateRcvQueueVars();
01597     win = state->freeRcvBuffer;
01598 
01599     // Following lines are based on [Stevens, W.R.: TCP/IP Illustrated, Volume 2, pages 878-879]:
01600     // Don't advertise less than one full-sized segment to avoid SWS
01601     if (win < (state->maxRcvBuffer / 4) && win < state->snd_mss)
01602         win = 0;
01603 
01604     // Do not shrink window
01605     // (rcv_adv minus rcv_nxt) is the amount of space still available to the sender that was previously advertised
01606     if (win < state->rcv_adv - state->rcv_nxt)
01607         win = state->rcv_adv - state->rcv_nxt;
01608 
01609     // Observe upper limit for advertised window on this connection
01610     if (win > TCP_MAX_WIN && !state->ws_enabled) // TCP_MAX_WIN = 65535 (16 bit)
01611         win = TCP_MAX_WIN; // Note: The window size is limited to a 16 bit value in the TCP header if WINDOW SCALE option (RFC 1323) is not used
01612 
01613     // Note: The order of the "Do not shrink window" and "Observe upper limit" parts has been changed to the order used in FreeBSD Release 7.1
01614 
01615     // update rcv_adv if needed
01616     if (win > 0 && seqGE(state->rcv_nxt + win, state->rcv_adv))
01617     {
01618         state->rcv_adv = state->rcv_nxt + win;
01619         if (rcvAdvVector)
01620             rcvAdvVector->record(state->rcv_adv);
01621     }
01622 
01623     state->rcv_wnd = win;
01624     if (rcvWndVector)
01625         rcvWndVector->record(state->rcv_wnd);
01626 
01627     // scale rcv_wnd:
01628     uint32 scaled_rcv_wnd = state->rcv_wnd;
01629     state->rcv_wnd_scale = 0;
01630     if (state->ws_enabled)
01631     {
01632         while (scaled_rcv_wnd > TCP_MAX_WIN && state->rcv_wnd_scale < 14) // RFC 1323, page 11: "the shift count must be limited to 14"
01633         {
01634             scaled_rcv_wnd = scaled_rcv_wnd >> 1;
01635             state->rcv_wnd_scale++;
01636         }
01637     }
01638     ASSERT(scaled_rcv_wnd == (unsigned short)scaled_rcv_wnd);
01639     return (unsigned short) scaled_rcv_wnd;
01640 }
01641 
01642 void TCPConnection::updateWndInfo(TCPSegment *tcpseg, bool doAlways)
01643 {
01644     uint32 true_window = tcpseg->getWindow();
01645     // RFC 1323, page 10:
01646     // "The window field (SEG.WND) in the header of every incoming
01647     // segment, with the exception of SYN segments, is left-shifted
01648     // by Snd.Wind.Scale bits before updating SND.WND:
01649     //    SND.WND = SEG.WND << Snd.Wind.Scale"
01650     if (state->ws_enabled && !tcpseg->getSynBit())
01651         true_window = tcpseg->getWindow() << state->snd_wnd_scale;
01652 
01653     // Following lines are based on [Stevens, W.R.: TCP/IP Illustrated, Volume 2, page 982]:
01654     if (doAlways || (tcpseg->getAckBit()
01655         && (seqLess(state->snd_wl1, tcpseg->getSequenceNo()) ||
01656         (state->snd_wl1 == tcpseg->getSequenceNo() && seqLE(state->snd_wl2, tcpseg->getAckNo())) ||
01657         (state->snd_wl2 == tcpseg->getAckNo() && true_window > state->snd_wnd))))
01658     {
01659         // send window should be updated
01660         state->snd_wnd = true_window;
01661         tcpEV << "Updating send window from segment: new wnd=" << state->snd_wnd << "\n";
01662         state->snd_wl1 = tcpseg->getSequenceNo();
01663         state->snd_wl2 = tcpseg->getAckNo();
01664         if (sndWndVector)
01665             sndWndVector->record(state->snd_wnd);
01666     }
01667 }
01668 
01669 bool TCPConnection::isLost(uint32 seqNum)
01670 {
01671     ASSERT (state->sack_enabled);
01672     // RFC 3517, page 3: "This routine returns whether the given sequence number is
01673     // considered to be lost.  The routine returns true when either
01674     // DupThresh discontiguous SACKed sequences have arrived above
01675     // 'SeqNum' or (DupThresh * SMSS) bytes with sequence numbers greater
01676     // than 'SeqNum' have been SACKed.  Otherwise, the routine returns
01677     // false."
01678     bool isLost = false;
01679 
01680     ASSERT(seqGE(seqNum,state->snd_una)); // HighAck = snd_una
01681 
01682     if (rexmitQueue->getNumOfDiscontiguousSacks(seqNum) >= DUPTHRESH ||     // DUPTHRESH = 3
01683         rexmitQueue->getAmountOfSackedBytes(seqNum) >= (DUPTHRESH * state->snd_mss))
01684         isLost = true;
01685     else
01686         isLost = false;
01687 
01688     return isLost;
01689 }
01690 
01691 void TCPConnection::setPipe()
01692 {
01693     ASSERT (state->sack_enabled);
01694     // RFC 3517, pages 1 and 2: "
01695     // "HighACK" is the sequence number of the highest byte of data that
01696     // has been cumulatively ACKed at a given point.
01697     //
01698     // "HighData" is the highest sequence number transmitted at a given
01699     // point.
01700     //
01701     // "HighRxt" is the highest sequence number which has been
01702     // retransmitted during the current loss recovery phase.
01703     //
01704     // "Pipe" is a sender's estimate of the number of bytes outstanding
01705     // in the network.  This is used during recovery for limiting the
01706     // sender's sending rate.  The pipe variable allows TCP to use a
01707     // fundamentally different congestion control than specified in
01708     // [RFC2581].  The algorithm is often referred to as the "pipe
01709     // algorithm"."
01710     // HighAck = snd_una
01711     // HighData = snd_max
01712 
01713     state->highRxt = rexmitQueue->getHighestRexmittedSeqNum();
01714     state->pipe = 0;
01715 
01716     uint32 shift = state->snd_mss;
01717     if (state->ts_enabled)
01718         shift -= TCP_OPTION_TS_SIZE;
01719 
01720     // RFC 3517, page 3: "This routine traverses the sequence space from HighACK to HighData
01721     // and MUST set the "pipe" variable to an estimate of the number of
01722     // octets that are currently in transit between the TCP sender and
01723     // the TCP receiver.  After initializing pipe to zero the following
01724     // steps are taken for each octet 'S1' in the sequence space between
01725     // HighACK and HighData that has not been SACKed:"
01726     for (uint32 s1=state->snd_una; s1<state->snd_max; s1=s1+shift)
01727     {
01728         if (rexmitQueue->getSackedBit(s1)==false)
01729         {
01730             // RFC 3517, page 3: "(a) If IsLost (S1) returns false:
01731             //
01732             //     Pipe is incremented by 1 octet.
01733             //
01734             //     The effect of this condition is that pipe is incremented for
01735             //     packets that have not been SACKed and have not been determined
01736             //     to have been lost (i.e., those segments that are still assumed
01737             //     to be in the network)."
01738             if (isLost(s1)==false)
01739                 state->pipe++;
01740 
01741             // RFC 3517, pages 3 and 4: "(b) If S1 <= HighRxt:
01742             //
01743             //     Pipe is incremented by 1 octet.
01744             //
01745             //     The effect of this condition is that pipe is incremented for
01746             //     the retransmission of the octet.
01747             //
01748             //  Note that octets retransmitted without being considered lost are
01749             //  counted twice by the above mechanism."
01750             if (seqLE(s1,state->highRxt))
01751                 state->pipe++;
01752         }
01753     }
01754 
01755     state->pipe = state->pipe * shift;
01756     if (pipeVector)
01757         pipeVector->record(state->pipe);
01758 }
01759 
01760 uint32 TCPConnection::nextSeg()
01761 {
01762     ASSERT (state->sack_enabled);
01763     // RFC 3517, page 5: "This routine uses the scoreboard data structure maintained by the
01764     // Update() function to determine what to transmit based on the SACK
01765     // information that has arrived from the data receiver (and hence
01766     // been marked in the scoreboard).  NextSeg () MUST return the
01767     // sequence number range of the next segment that is to be
01768     // transmitted, per the following rules:"
01769 
01770     state->highRxt = rexmitQueue->getHighestRexmittedSeqNum();
01771     uint32 seqNum = 0;
01772     bool found = false;
01773     uint32 shift = state->snd_mss;
01774     if (state->ts_enabled)
01775         shift -= TCP_OPTION_TS_SIZE;
01776 
01777     // RFC 3517, page 5: "(1) If there exists a smallest unSACKed sequence number 'S2' that
01778     // meets the following three criteria for determining loss, the
01779     // sequence range of one segment of up to SMSS octets starting
01780     // with S2 MUST be returned.
01781     //
01782     // (1.a) S2 is greater than HighRxt.
01783     //
01784     // (1.b) S2 is less than the highest octet covered by any
01785     //       received SACK.
01786     //
01787     // (1.c) IsLost (S2) returns true."
01788     for (uint32 s2=state->snd_una; s2<state->snd_max; s2=s2+shift)
01789     {
01790         if (rexmitQueue->getSackedBit(s2)==false)
01791         {
01792             if (seqGE(s2,state->highRxt) &&
01793                 seqLE(s2,(rexmitQueue->getHighestSackedSeqNum())) &&
01794                 isLost(s2))
01795             {
01796                 seqNum = s2;
01797                 found = true;
01798                 return seqNum;
01799             }
01800         }
01801     }
01802 
01803     // RFC 3517, page 5: "(2) If no sequence number 'S2' per rule (1) exists but there
01804     // exists available unsent data and the receiver's advertised
01805     // window allows, the sequence range of one segment of up to SMSS
01806     // octets of previously unsent data starting with sequence number
01807     // HighData+1 MUST be returned."
01808     if (!found)
01809     {
01810         // check how many unsent bytes we have
01811         ulong buffered = sendQueue->getBytesAvailable(state->snd_max);
01812         ulong maxWindow = state->snd_wnd;
01813         // effectiveWindow: number of bytes we're allowed to send now
01814         ulong effectiveWin = maxWindow - state->pipe;
01815         if (buffered > 0 && effectiveWin >= state->snd_mss)
01816         {
01817             seqNum = state->snd_max; // HighData = snd_max
01818             found = true;
01819             return seqNum;
01820         }
01821     }
01822 
01823     // RFC 3517, pages 5 and 6: "(3) If the conditions for rules (1) and (2) fail, but there exists
01824     // an unSACKed sequence number 'S3' that meets the criteria for
01825     // detecting loss given in steps (1.a) and (1.b) above
01826     // (specifically excluding step (1.c)) then one segment of up to
01827     // SMSS octets starting with S3 MAY be returned.
01828     //
01829     // Note that rule (3) is a sort of retransmission "last resort".
01830     // It allows for retransmission of sequence numbers even when the
01831     // sender has less certainty a segment has been lost than as with
01832     // rule (1).  Retransmitting segments via rule (3) will help
01833     // sustain TCP's ACK clock and therefore can potentially help
01834     // avoid retransmission timeouts.  However, in sending these
01835     // segments the sender has two copies of the same data considered
01836     // to be in the network (and also in the Pipe estimate).  When an
01837     // ACK or SACK arrives covering this retransmitted segment, the
01838     // sender cannot be sure exactly how much data left the network
01839     // (one of the two transmissions of the packet or both
01840     // transmissions of the packet).  Therefore the sender may
01841     // underestimate Pipe by considering both segments to have left
01842     // the network when it is possible that only one of the two has.
01843     //
01844     // We believe that the triggering of rule (3) will be rare and
01845     // that the implications are likely limited to corner cases
01846     // relative to the entire recovery algorithm.  Therefore we leave
01847     // the decision of whether or not to use rule (3) to
01848     // implementors."
01849     if (!found)
01850     {
01851         for (uint32 s3=state->snd_una; s3<state->snd_max; s3=s3+shift)
01852         {
01853             if (rexmitQueue->getSackedBit(s3)==false)
01854             {
01855                 if (seqGE(s3,state->highRxt) &&
01856                     seqLE(s3,(rexmitQueue->getHighestSackedSeqNum())))
01857                 {
01858                     seqNum = s3;
01859                     found = true;
01860                     return seqNum;
01861                 }
01862             }
01863         }
01864     }
01865 
01866     // RFC 3517, page 6: "(4) If the conditions for each of (1), (2), and (3) are not met,
01867     // then NextSeg () MUST indicate failure, and no segment is
01868     // returned."
01869     if (!found)
01870         seqNum = 0;
01871 
01872     return seqNum;
01873 }
01874 
01875 void TCPConnection::sendDataDuringLossRecoveryPhase(uint32 congestionWindow)
01876 {
01877     ASSERT (state->sack_enabled && state->lossRecovery);
01878     // RFC 3517 pages 7 and 8: "(5) In order to take advantage of potential additional available
01879     // cwnd, proceed to step (C) below.
01880     // (...)
01881     // (C) If cwnd - pipe >= 1 SMSS the sender SHOULD transmit one or more
01882     // segments as follows:
01883     // (...)
01884     // (C.5) If cwnd - pipe >= 1 SMSS, return to (C.1)"
01885     while (((int)congestionWindow - (int)state->pipe) >= (int)state->snd_mss) // Note: Typecast needed to avoid prohibited transmissions
01886     {
01887         // RFC 3517 pages 7 and 8: "(C.1) The scoreboard MUST be queried via NextSeg () for the
01888         // sequence number range of the next segment to transmit (if any),
01889         // and the given segment sent.  If NextSeg () returns failure (no
01890         // data to send) return without sending anything (i.e., terminate
01891         // steps C.1 -- C.5)."
01892         uint32 seqNum = nextSeg(); // if nextSeg() returns 0 (=failure): terminate steps C.1 -- C.5
01893         if (seqNum != 0)
01894         {
01895             sendSegmentDuringLossRecoveryPhase(seqNum);
01896             // RFC 3517 page 8: "(C.4) The estimate of the amount of data outstanding in the
01897             // network must be updated by incrementing pipe by the number of
01898             // octets transmitted in (C.1)."
01899             state->pipe += state->sentBytes;
01900         }
01901         else // nextSeg () returns failure: terminate steps C.1 -- C.5
01902             break;
01903     }
01904 }
01905 
01906 void TCPConnection::sendSegmentDuringLossRecoveryPhase(uint32 seqNum)
01907 {
01908     ASSERT (state->sack_enabled && state->lossRecovery);
01909     // start sending from seqNum
01910     state->snd_nxt = seqNum;
01911 
01912     uint32 old_highRxt = rexmitQueue->getHighestRexmittedSeqNum();
01913 
01914     // no need to check cwnd and rwnd - has already be done before
01915     // no need to check nagle - sending mss bytes
01916     sendSegment(state->snd_mss);
01917 
01918     uint32 sentSeqNum = seqNum + state->sentBytes;
01919 
01920     // RFC 3517 page 8: "(C.2) If any of the data octets sent in (C.1) are below HighData,
01921     // HighRxt MUST be set to the highest sequence number of the
01922     // retransmitted segment."
01923     if (seqLE(sentSeqNum, state->snd_max)) // HighData = snd_max
01924     {
01925         ASSERT (sentSeqNum==rexmitQueue->getHighestRexmittedSeqNum());
01926         state->highRxt = rexmitQueue->getHighestRexmittedSeqNum();
01927     }
01928     // RFC 3517 page 8: "(C.3) If any of the data octets sent in (C.1) are above HighData,
01929     // HighData must be updated to reflect the transmission of
01930     // previously unsent data."
01931     else if (seqGE(sentSeqNum, state->snd_max)) // HighData = snd_max
01932         state->snd_max = sentSeqNum;
01933 
01934     if (unackedVector)
01935         unackedVector->record(state->snd_max - state->snd_una);
01936 
01937     // RFC 3517, page 9: "6   Managing the RTO Timer
01938     //
01939     // The standard TCP RTO estimator is defined in [RFC2988].  Due to the
01940     // fact that the SACK algorithm in this document can have an impact on
01941     // the behavior of the estimator, implementers may wish to consider how
01942     // the timer is managed.  [RFC2988] calls for the RTO timer to be
01943     // re-armed each time an ACK arrives that advances the cumulative ACK
01944     // point.  Because the algorithm presented in this document can keep the
01945     // ACK clock going through a fairly significant loss event,
01946     // (comparatively longer than the algorithm described in [RFC2581]), on
01947     // some networks the loss event could last longer than the RTO.  In this
01948     // case the RTO timer would expire prematurely and a segment that need
01949     // not be retransmitted would be resent.
01950     //
01951     // Therefore we give implementers the latitude to use the standard
01952     // [RFC2988] style RTO management or, optionally, a more careful variant
01953     // that re-arms the RTO timer on each retransmission that is sent during
01954     // recovery MAY be used.  This provides a more conservative timer than
01955     // specified in [RFC2988], and so may not always be an attractive
01956     // alternative.  However, in some cases it may prevent needless
01957     // retransmissions, go-back-N transmission and further reduction of the
01958     // congestion window."
01959     tcpAlgorithm->ackSent();
01960     if (old_highRxt != state->highRxt)
01961     {
01962         // Note: Restart of REXMIT timer on retransmission is not part of RFC 2581, however optional in RFC 3517 if sent during recovery.
01963         tcpEV << "Retransmission sent during recovery, restarting REXMIT timer.\n";
01964         tcpAlgorithm->restartRexmitTimer();
01965     }
01966     else // don't measure RTT for retransmitted packets
01967         tcpAlgorithm->dataSent(seqNum); // seqNum = old_snd_nxt
01968 }
01969 
01970 void TCPConnection::sendOneNewSegment(bool fullSegmentsOnly, uint32 congestionWindow)
01971 {
01972     ASSERT (state->limited_transmit_enabled);
01973     // RFC 3042, page 3:
01974     // "When a TCP sender has previously unsent data queued for transmission
01975     // it SHOULD use the Limited Transmit algorithm, which calls for a TCP
01976     // sender to transmit new data upon the arrival of the first two
01977     // consecutive duplicate ACKs when the following conditions are
01978     // satisfied:
01979     //
01980     //  * The receiver's advertised window allows the transmission of the
01981     //  segment.
01982     //
01983     //  * The amount of outstanding data would remain less than or equal
01984     //  to the congestion window plus 2 segments.  In other words, the
01985     //  sender can only send two segments beyond the congestion window
01986     //  (cwnd).
01987     //
01988     // The congestion window (cwnd) MUST NOT be changed when these new
01989     // segments are transmitted.  Assuming that these new segments and the
01990     // corresponding ACKs are not dropped, this procedure allows the sender
01991     // to infer loss using the standard Fast Retransmit threshold of three
01992     // duplicate ACKs [RFC2581].  This is more robust to reordered packets
01993     // than if an old packet were retransmitted on the first or second
01994     // duplicate ACK.
01995     //
01996     // Note: If the connection is using selective acknowledgments [RFC2018],
01997     // the data sender MUST NOT send new segments in response to duplicate
01998     // ACKs that contain no new SACK information, as a misbehaving receiver
01999     // can generate such ACKs to trigger inappropriate transmission of data
02000     // segments.  See [SCWA99] for a discussion of attacks by misbehaving
02001     // receivers."
02002     if (!state->sack_enabled || (state->sack_enabled && state->sackedBytes_old!=state->sackedBytes))
02003     {
02004         // check how many bytes we have
02005         ulong buffered = sendQueue->getBytesAvailable(state->snd_max);
02006 
02007         if (buffered >= state->snd_mss || (!fullSegmentsOnly && buffered > 0))
02008         {
02009             ulong outstandingData = state->snd_max - state->snd_una;
02010             // check conditions from RFC 3042
02011             if (outstandingData + state->snd_mss <= state->snd_wnd &&
02012                 outstandingData + state->snd_mss <= congestionWindow + 2*state->snd_mss)
02013             {
02014                 uint32 effectiveWin = std::min (state->snd_wnd, congestionWindow) - outstandingData + 2*state->snd_mss; // RFC 3042, page 3: "(...)the sender can only send two segments beyond the congestion window (cwnd)."
02015                 // bytes: number of bytes we're allowed to send now
02016                 uint32 bytes = std::min(effectiveWin, state->snd_mss);
02017                 if (bytes >= state->snd_mss || (!fullSegmentsOnly && bytes > 0))
02018                 {
02019                     uint32 old_snd_nxt = state->snd_nxt;
02020                     // we'll start sending from snd_max
02021                     state->snd_nxt = state->snd_max;
02022 
02023                     tcpEV << "Limited Transmit algorithm enabled. Sending one new segment.\n";
02024                     sendSegment(bytes);
02025 
02026                     if (seqGreater(state->snd_nxt, state->snd_max))
02027                         state->snd_max = state->snd_nxt;
02028 
02029                     if (unackedVector)
02030                         unackedVector->record(state->snd_max - state->snd_una);
02031 
02032                     // reset snd_nxt if needed
02033                     if (state->afterRto)
02034                         state->snd_nxt = old_snd_nxt + state->sentBytes;
02035 
02036                     // notify
02037                     tcpAlgorithm->ackSent();
02038                     tcpAlgorithm->dataSent(old_snd_nxt);
02039                 }
02040             }
02041         }
02042     }
02043 }
02044 
02045 uint32 TCPConnection::convertSimtimeToTS(simtime_t simtime)
02046 {
02047     ASSERT (SimTime::getScaleExp() <= -3); // FIXME TODO - If the scale factor is different, we need to adjust our simTime to uint32 casts - we are currently using ms precision
02048     uint32 timestamp = (uint32) (simtime.dbl() * 1000);
02049     return timestamp;
02050 }
02051 
02052 simtime_t TCPConnection::convertTSToSimtime(uint32 timestamp)
02053 {
02054     ASSERT (SimTime::getScaleExp() <= -3); // FIXME TODO - If the scale factor is different, we need to adjust our simTime to uint32 casts - we are currently using ms precision
02055     simtime_t simtime = (simtime_t) ((double) timestamp * 0.001);
02056     return simtime;
02057 }
02058 
02059 bool TCPConnection::isSendQueueEmpty()
02060 {
02061     return (sendQueue->getBytesAvailable(state->snd_nxt) == 0);
02062 }