mirror of
https://github.com/oxen-io/lokinet.git
synced 2024-11-07 15:20:31 +00:00
4065 lines
115 KiB
C++
4065 lines
115 KiB
C++
/*
|
|
* Copyright (c) 2010-2013 BitTorrent, Inc.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
* THE SOFTWARE.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <assert.h>
|
|
#include <string.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <errno.h>
|
|
#include <limits.h> // for UINT_MAX
|
|
#include <time.h>
|
|
|
|
#include "utp_types.h"
|
|
#include "utp_packedsockaddr.h"
|
|
#include "utp_internal.h"
|
|
#include "utp_hash.h"
|
|
|
|
#define TIMEOUT_CHECK_INTERVAL 500
|
|
|
|
// number of bytes to increase max window size by, per RTT. This is
|
|
// scaled down linearly proportional to off_target. i.e. if all packets
|
|
// in one window have 0 delay, window size will increase by this number.
|
|
// Typically it's less. TCP increases one MSS per RTT, which is 1500
|
|
#define MAX_CWND_INCREASE_BYTES_PER_RTT 3000
|
|
#define CUR_DELAY_SIZE 3
|
|
// experiments suggest that a clock skew of 10 ms per 325 seconds
|
|
// is not impossible. Reset delay_base every 13 minutes. The clock
|
|
// skew is dealt with by observing the delay base in the other
|
|
// direction, and adjusting our own upwards if the opposite direction
|
|
// delay base keeps going down
|
|
#define DELAY_BASE_HISTORY 13
|
|
#define MAX_WINDOW_DECAY 100 // ms
|
|
|
|
#define REORDER_BUFFER_SIZE 32
|
|
#define REORDER_BUFFER_MAX_SIZE 1024
|
|
#define OUTGOING_BUFFER_MAX_SIZE 1024
|
|
|
|
#define PACKET_SIZE 1435
|
|
|
|
// this is the minimum max_window value. It can never drop below this
|
|
#define MIN_WINDOW_SIZE 10
|
|
|
|
// if we receive 4 or more duplicate acks, we resend the packet
|
|
// that hasn't been acked yet
|
|
#define DUPLICATE_ACKS_BEFORE_RESEND 3
|
|
|
|
// Allow a reception window of at least 3 ack_nrs behind seq_nr
|
|
// A non-SYN packet with an ack_nr difference greater than this is
|
|
// considered suspicious and ignored
|
|
#define ACK_NR_ALLOWED_WINDOW DUPLICATE_ACKS_BEFORE_RESEND
|
|
|
|
#define RST_INFO_TIMEOUT 10000
|
|
#define RST_INFO_LIMIT 1000
|
|
// 29 seconds determined from measuring many home NAT devices
|
|
#define KEEPALIVE_INTERVAL 29000
|
|
|
|
#define SEQ_NR_MASK 0xFFFF
|
|
#define ACK_NR_MASK 0xFFFF
|
|
#define TIMESTAMP_MASK 0xFFFFFFFF
|
|
|
|
#define DIV_ROUND_UP(num, denom) ((num + denom - 1) / denom)
|
|
|
|
// The totals are derived from the following data:
|
|
// 45: IPv6 address including embedded IPv4 address
|
|
// 11: Scope Id
|
|
// 2: Brackets around IPv6 address when port is present
|
|
// 6: Port (including colon)
|
|
// 1: Terminating null byte
|
|
char addrbuf[65];
|
|
#define addrfmt(x, s) x.fmt(s, sizeof(s))
|
|
|
|
#if(defined(__SVR4) && defined(__sun))
|
|
#pragma pack(1)
|
|
#else
|
|
#pragma pack(push, 1)
|
|
#endif
|
|
|
|
// these packet sizes are including the uTP header wich
|
|
// is either 20 or 23 bytes depending on version
|
|
#define PACKET_SIZE_EMPTY_BUCKET 0
|
|
#define PACKET_SIZE_EMPTY 23
|
|
#define PACKET_SIZE_SMALL_BUCKET 1
|
|
#define PACKET_SIZE_SMALL 373
|
|
#define PACKET_SIZE_MID_BUCKET 2
|
|
#define PACKET_SIZE_MID 723
|
|
#define PACKET_SIZE_BIG_BUCKET 3
|
|
#define PACKET_SIZE_BIG 1400
|
|
#define PACKET_SIZE_HUGE_BUCKET 4
|
|
|
|
struct PACKED_ATTRIBUTE PacketFormatV1
|
|
{
|
|
// packet_type (4 high bits)
|
|
// protocol version (4 low bits)
|
|
byte ver_type;
|
|
byte
|
|
version() const
|
|
{
|
|
return ver_type & 0xf;
|
|
}
|
|
byte
|
|
type() const
|
|
{
|
|
return ver_type >> 4;
|
|
}
|
|
void
|
|
set_version(byte v)
|
|
{
|
|
ver_type = (ver_type & 0xf0) | (v & 0xf);
|
|
}
|
|
void
|
|
set_type(byte t)
|
|
{
|
|
ver_type = (ver_type & 0xf) | (t << 4);
|
|
}
|
|
|
|
// Type of the first extension header
|
|
byte ext;
|
|
// connection ID
|
|
uint16_big connid;
|
|
uint32_big tv_usec;
|
|
uint32_big reply_micro;
|
|
// receive window size in bytes
|
|
uint32_big windowsize;
|
|
// Sequence number
|
|
uint16_big seq_nr;
|
|
// Acknowledgment number
|
|
uint16_big ack_nr;
|
|
};
|
|
|
|
struct PACKED_ATTRIBUTE PacketFormatAckV1
|
|
{
|
|
PacketFormatV1 pf;
|
|
byte ext_next;
|
|
byte ext_len;
|
|
byte acks[4];
|
|
};
|
|
|
|
#if(defined(__SVR4) && defined(__sun))
|
|
#pragma pack(0)
|
|
#else
|
|
#pragma pack(pop)
|
|
#endif
|
|
|
|
enum
|
|
{
|
|
ST_DATA = 0, // Data packet.
|
|
ST_FIN = 1, // Finalize the connection. This is the last packet.
|
|
ST_STATE = 2, // State packet. Used to transmit an ACK with no data.
|
|
ST_RESET = 3, // Terminate connection forcefully.
|
|
ST_SYN = 4, // Connect SYN
|
|
ST_NUM_STATES, // used for bounds checking
|
|
};
|
|
|
|
enum CONN_STATE
|
|
{
|
|
CS_UNINITIALIZED = 0,
|
|
CS_IDLE,
|
|
CS_SYN_SENT,
|
|
CS_SYN_RECV,
|
|
CS_CONNECTED,
|
|
CS_CONNECTED_FULL,
|
|
CS_RESET,
|
|
CS_DESTROY
|
|
};
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
static const cstr flagnames[] = {"ST_DATA", "ST_FIN", "ST_STATE", "ST_RESET",
|
|
"ST_SYN"};
|
|
|
|
static const cstr statenames[] = {
|
|
"UNINITIALIZED", "IDLE", "SYN_SENT", "SYN_RECV", "CONNECTED",
|
|
"CONNECTED_FULL", "DESTROY_DELAY", "RESET", "DESTROY"};
|
|
|
|
#endif
|
|
|
|
struct OutgoingPacket
|
|
{
|
|
size_t length;
|
|
size_t payload;
|
|
uint64 time_sent; // microseconds
|
|
uint transmissions : 31;
|
|
bool need_resend : 1;
|
|
byte data[1];
|
|
};
|
|
|
|
struct SizableCircularBuffer
|
|
{
|
|
// This is the mask. Since it's always a power of 2, adding 1 to this value
|
|
// will return the size.
|
|
size_t mask;
|
|
// This is the elements that the circular buffer points to
|
|
void **elements;
|
|
|
|
void *
|
|
get(size_t i) const
|
|
{
|
|
assert(elements);
|
|
return elements ? elements[i & mask] : NULL;
|
|
}
|
|
void
|
|
put(size_t i, void *data)
|
|
{
|
|
assert(elements);
|
|
elements[i & mask] = data;
|
|
}
|
|
|
|
void
|
|
grow(size_t item, size_t index);
|
|
void
|
|
ensure_size(size_t item, size_t index)
|
|
{
|
|
if(index > mask)
|
|
grow(item, index);
|
|
}
|
|
size_t
|
|
size()
|
|
{
|
|
return mask + 1;
|
|
}
|
|
};
|
|
|
|
// Item contains the element we want to make space for
|
|
// index is the index in the list.
|
|
void
|
|
SizableCircularBuffer::grow(size_t item, size_t index)
|
|
{
|
|
// Figure out the new size.
|
|
size_t size = mask + 1;
|
|
do
|
|
size *= 2;
|
|
while(index >= size);
|
|
|
|
// Allocate the new buffer
|
|
void **buf = (void **)calloc(size, sizeof(void *));
|
|
|
|
size--;
|
|
|
|
// Copy elements from the old buffer to the new buffer
|
|
for(size_t i = 0; i <= mask; i++)
|
|
{
|
|
buf[(item - index + i) & size] = get(item - index + i);
|
|
}
|
|
|
|
// Swap to the newly allocated buffer
|
|
mask = size;
|
|
free(elements);
|
|
elements = buf;
|
|
}
|
|
|
|
// compare if lhs is less than rhs, taking wrapping
|
|
// into account. if lhs is close to UINT_MAX and rhs
|
|
// is close to 0, lhs is assumed to have wrapped and
|
|
// considered smaller
|
|
bool
|
|
wrapping_compare_less(uint32 lhs, uint32 rhs, uint32 mask)
|
|
{
|
|
// distance walking from lhs to rhs, downwards
|
|
const uint32 dist_down = (lhs - rhs) & mask;
|
|
// distance walking from lhs to rhs, upwards
|
|
const uint32 dist_up = (rhs - lhs) & mask;
|
|
|
|
// if the distance walking up is shorter, lhs
|
|
// is less than rhs. If the distance walking down
|
|
// is shorter, then rhs is less than lhs
|
|
return dist_up < dist_down;
|
|
}
|
|
|
|
struct DelayHist
|
|
{
|
|
uint32 delay_base;
|
|
|
|
// this is the history of delay samples,
|
|
// normalized by using the delay_base. These
|
|
// values are always greater than 0 and measures
|
|
// the queuing delay in microseconds
|
|
uint32 cur_delay_hist[CUR_DELAY_SIZE];
|
|
size_t cur_delay_idx;
|
|
|
|
// this is the history of delay_base. It's
|
|
// a number that doesn't have an absolute meaning
|
|
// only relative. It doesn't make sense to initialize
|
|
// it to anything other than values relative to
|
|
// what's been seen in the real world.
|
|
uint32 delay_base_hist[DELAY_BASE_HISTORY];
|
|
size_t delay_base_idx;
|
|
// the time when we last stepped the delay_base_idx
|
|
uint64 delay_base_time;
|
|
|
|
bool delay_base_initialized;
|
|
|
|
void
|
|
clear(uint64 current_ms)
|
|
{
|
|
delay_base_initialized = false;
|
|
delay_base = 0;
|
|
cur_delay_idx = 0;
|
|
delay_base_idx = 0;
|
|
delay_base_time = current_ms;
|
|
for(size_t i = 0; i < CUR_DELAY_SIZE; i++)
|
|
{
|
|
cur_delay_hist[i] = 0;
|
|
}
|
|
for(size_t i = 0; i < DELAY_BASE_HISTORY; i++)
|
|
{
|
|
delay_base_hist[i] = 0;
|
|
}
|
|
}
|
|
|
|
void
|
|
shift(const uint32 offset)
|
|
{
|
|
// the offset should never be "negative"
|
|
// assert(offset < 0x10000000);
|
|
|
|
// increase all of our base delays by this amount
|
|
// this is used to take clock skew into account
|
|
// by observing the other side's changes in its base_delay
|
|
for(size_t i = 0; i < DELAY_BASE_HISTORY; i++)
|
|
{
|
|
delay_base_hist[i] += offset;
|
|
}
|
|
delay_base += offset;
|
|
}
|
|
|
|
void
|
|
add_sample(const uint32 sample, uint64 current_ms)
|
|
{
|
|
// The two clocks (in the two peers) are assumed not to
|
|
// progress at the exact same rate. They are assumed to be
|
|
// drifting, which causes the delay samples to contain
|
|
// a systematic error, either they are under-
|
|
// estimated or over-estimated. This is why we update the
|
|
// delay_base every two minutes, to adjust for this.
|
|
|
|
// This means the values will keep drifting and eventually wrap.
|
|
// We can cross the wrapping boundry in two directions, either
|
|
// going up, crossing the highest value, or going down, crossing 0.
|
|
|
|
// if the delay_base is close to the max value and sample actually
|
|
// wrapped on the other end we would see something like this:
|
|
// delay_base = 0xffffff00, sample = 0x00000400
|
|
// sample - delay_base = 0x500 which is the correct difference
|
|
|
|
// if the delay_base is instead close to 0, and we got an even lower
|
|
// sample (that will eventually update the delay_base), we may see
|
|
// something like this:
|
|
// delay_base = 0x00000400, sample = 0xffffff00
|
|
// sample - delay_base = 0xfffffb00
|
|
// this needs to be interpreted as a negative number and the actual
|
|
// recorded delay should be 0.
|
|
|
|
// It is important that all arithmetic that assume wrapping
|
|
// is done with unsigned intergers. Signed integers are not guaranteed
|
|
// to wrap the way unsigned integers do. At least GCC takes advantage
|
|
// of this relaxed rule and won't necessarily wrap signed ints.
|
|
|
|
// remove the clock offset and propagation delay.
|
|
// delay base is min of the sample and the current
|
|
// delay base. This min-operation is subject to wrapping
|
|
// and care needs to be taken to correctly choose the
|
|
// true minimum.
|
|
|
|
// specifically the problem case is when delay_base is very small
|
|
// and sample is very large (because it wrapped past zero), sample
|
|
// needs to be considered the smaller
|
|
|
|
if(!delay_base_initialized)
|
|
{
|
|
// delay_base being 0 suggests that we haven't initialized
|
|
// it or its history with any real measurements yet. Initialize
|
|
// everything with this sample.
|
|
for(size_t i = 0; i < DELAY_BASE_HISTORY; i++)
|
|
{
|
|
// if we don't have a value, set it to the current sample
|
|
delay_base_hist[i] = sample;
|
|
continue;
|
|
}
|
|
delay_base = sample;
|
|
delay_base_initialized = true;
|
|
}
|
|
|
|
if(wrapping_compare_less(sample, delay_base_hist[delay_base_idx],
|
|
TIMESTAMP_MASK))
|
|
{
|
|
// sample is smaller than the current delay_base_hist entry
|
|
// update it
|
|
delay_base_hist[delay_base_idx] = sample;
|
|
}
|
|
|
|
// is sample lower than delay_base? If so, update delay_base
|
|
if(wrapping_compare_less(sample, delay_base, TIMESTAMP_MASK))
|
|
{
|
|
// sample is smaller than the current delay_base
|
|
// update it
|
|
delay_base = sample;
|
|
}
|
|
|
|
// this operation may wrap, and is supposed to
|
|
const uint32 delay = sample - delay_base;
|
|
// sanity check. If this is triggered, something fishy is going on
|
|
// it means the measured sample was greater than 32 seconds!
|
|
// assert(delay < 0x2000000);
|
|
|
|
cur_delay_hist[cur_delay_idx] = delay;
|
|
cur_delay_idx = (cur_delay_idx + 1) % CUR_DELAY_SIZE;
|
|
|
|
// once every minute
|
|
if(current_ms - delay_base_time > 60 * 1000)
|
|
{
|
|
delay_base_time = current_ms;
|
|
delay_base_idx = (delay_base_idx + 1) % DELAY_BASE_HISTORY;
|
|
// clear up the new delay base history spot by initializing
|
|
// it to the current sample, then update it
|
|
delay_base_hist[delay_base_idx] = sample;
|
|
delay_base = delay_base_hist[0];
|
|
// Assign the lowest delay in the last 2 minutes to delay_base
|
|
for(size_t i = 0; i < DELAY_BASE_HISTORY; i++)
|
|
{
|
|
if(wrapping_compare_less(delay_base_hist[i], delay_base,
|
|
TIMESTAMP_MASK))
|
|
delay_base = delay_base_hist[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
uint32
|
|
get_value()
|
|
{
|
|
uint32 value = UINT_MAX;
|
|
for(size_t i = 0; i < CUR_DELAY_SIZE; i++)
|
|
{
|
|
value = min< uint32 >(cur_delay_hist[i], value);
|
|
}
|
|
// value could be UINT_MAX if we have no samples yet...
|
|
return value;
|
|
}
|
|
};
|
|
|
|
struct UTPSocket
|
|
{
|
|
~UTPSocket();
|
|
|
|
PackedSockAddr addr;
|
|
utp_context *ctx;
|
|
|
|
int ida; // for ack socket list
|
|
|
|
uint16 retransmit_count;
|
|
|
|
uint16 reorder_count;
|
|
byte duplicate_ack;
|
|
|
|
// the number of packets in the send queue. Packets that haven't
|
|
// yet been sent count as well as packets marked as needing resend
|
|
// the oldest un-acked packet in the send queue is seq_nr - cur_window_packets
|
|
uint16 cur_window_packets;
|
|
|
|
// how much of the window is used, number of bytes in-flight
|
|
// packets that have not yet been sent do not count, packets
|
|
// that are marked as needing to be re-sent (due to a timeout)
|
|
// don't count either
|
|
size_t cur_window;
|
|
// maximum window size, in bytes
|
|
size_t max_window;
|
|
// UTP_SNDBUF setting, in bytes
|
|
size_t opt_sndbuf;
|
|
// UTP_RCVBUF setting, in bytes
|
|
size_t opt_rcvbuf;
|
|
|
|
// this is the target delay, in microseconds
|
|
// for this socket. defaults to 100000.
|
|
size_t target_delay;
|
|
|
|
// Is a FIN packet in the reassembly buffer?
|
|
bool got_fin : 1;
|
|
// Have we reached the FIN?
|
|
bool got_fin_reached : 1;
|
|
|
|
// Have we sent our FIN?
|
|
bool fin_sent : 1;
|
|
// Has our fin been ACKed?
|
|
bool fin_sent_acked : 1;
|
|
|
|
// Reading is disabled
|
|
bool read_shutdown : 1;
|
|
// User called utp_close()
|
|
bool close_requested : 1;
|
|
|
|
// Timeout procedure
|
|
bool fast_timeout : 1;
|
|
|
|
// max receive window for other end, in bytes
|
|
size_t max_window_user;
|
|
CONN_STATE state;
|
|
// TickCount when we last decayed window (wraps)
|
|
int64 last_rwin_decay;
|
|
|
|
// the sequence number of the FIN packet. This field is only set
|
|
// when we have received a FIN, and the flag field has the FIN flag set.
|
|
// it is used to know when it is safe to destroy the socket, we must have
|
|
// received all packets up to this sequence number first.
|
|
uint16 eof_pkt;
|
|
|
|
// All sequence numbers up to including this have been properly received
|
|
// by us
|
|
uint16 ack_nr;
|
|
// This is the sequence number for the next packet to be sent.
|
|
uint16 seq_nr;
|
|
|
|
uint16 timeout_seq_nr;
|
|
|
|
// This is the sequence number of the next packet we're allowed to
|
|
// do a fast resend with. This makes sure we only do a fast-resend
|
|
// once per packet. We can resend the packet with this sequence number
|
|
// or any later packet (with a higher sequence number).
|
|
uint16 fast_resend_seq_nr;
|
|
|
|
uint32 reply_micro;
|
|
|
|
uint64 last_got_packet;
|
|
uint64 last_sent_packet;
|
|
uint64 last_measured_delay;
|
|
|
|
// timestamp of the last time the cwnd was full
|
|
// this is used to prevent the congestion window
|
|
// from growing when we're not sending at capacity
|
|
mutable uint64 last_maxed_out_window;
|
|
|
|
void *userdata;
|
|
|
|
// Round trip time
|
|
uint rtt;
|
|
// Round trip time variance
|
|
uint rtt_var;
|
|
// Round trip timeout
|
|
uint rto;
|
|
DelayHist rtt_hist;
|
|
uint retransmit_timeout;
|
|
// The RTO timer will timeout here.
|
|
uint64 rto_timeout;
|
|
// When the window size is set to zero, start this timer. It will send a new
|
|
// packet every 30secs.
|
|
uint64 zerowindow_time;
|
|
|
|
uint32 conn_seed;
|
|
// Connection ID for packets I receive
|
|
uint32 conn_id_recv;
|
|
// Connection ID for packets I send
|
|
uint32 conn_id_send;
|
|
// Last rcv window we advertised, in bytes
|
|
size_t last_rcv_win;
|
|
|
|
DelayHist our_hist;
|
|
DelayHist their_hist;
|
|
|
|
// extension bytes from SYN packet
|
|
byte extensions[8];
|
|
|
|
// MTU Discovery
|
|
// time when we should restart the MTU discovery
|
|
uint64 mtu_discover_time;
|
|
// ceiling and floor of binary search. last is the mtu size
|
|
// we're currently using
|
|
uint32 mtu_ceiling, mtu_floor, mtu_last;
|
|
// we only ever have a single probe in flight at any given time.
|
|
// this is the sequence number of that probe, and the size of
|
|
// that packet
|
|
uint32 mtu_probe_seq, mtu_probe_size;
|
|
|
|
// this is the average delay samples, as compared to the initial
|
|
// sample. It's averaged over 5 seconds
|
|
int32 average_delay;
|
|
// this is the sum of all the delay samples
|
|
// we've made recently. The important distinction
|
|
// of these samples is that they are all made compared
|
|
// to the initial sample, this is to deal with
|
|
// wrapping in a simple way.
|
|
int64 current_delay_sum;
|
|
// number of sample ins current_delay_sum
|
|
int current_delay_samples;
|
|
// initialized to 0, set to the first raw delay sample
|
|
// each sample that's added to current_delay_sum
|
|
// is subtracted from the value first, to make it
|
|
// a delay relative to this sample
|
|
uint32 average_delay_base;
|
|
// the next time we should add an average delay
|
|
// sample into average_delay_hist
|
|
uint64 average_sample_time;
|
|
// the estimated clock drift between our computer
|
|
// and the endpoint computer. The unit is microseconds
|
|
// per 5 seconds
|
|
int32 clock_drift;
|
|
// just used for logging
|
|
int32 clock_drift_raw;
|
|
|
|
SizableCircularBuffer inbuf, outbuf;
|
|
|
|
#ifdef _DEBUG
|
|
// Public per-socket statistics, returned by utp_get_stats()
|
|
utp_socket_stats _stats;
|
|
#endif
|
|
|
|
// true if we're in slow-start (exponential growth) phase
|
|
bool slow_start;
|
|
|
|
// the slow-start threshold, in bytes
|
|
size_t ssthresh;
|
|
|
|
void
|
|
log(int level, char const *fmt, ...)
|
|
{
|
|
va_list va;
|
|
char buf[4096], buf2[4096];
|
|
|
|
// don't bother with vsnprintf() etc calls if we're not going to log.
|
|
if(!ctx->would_log(level))
|
|
{
|
|
return;
|
|
}
|
|
|
|
va_start(va, fmt);
|
|
vsnprintf(buf, 4096, fmt, va);
|
|
va_end(va);
|
|
buf[4095] = '\0';
|
|
|
|
snprintf(buf2, 4096, "%p %s %06u %s", this, addrfmt(addr, addrbuf),
|
|
conn_id_recv, buf);
|
|
buf2[4095] = '\0';
|
|
|
|
ctx->log_unchecked(this, buf2);
|
|
}
|
|
|
|
void
|
|
schedule_ack();
|
|
|
|
// called every time mtu_floor or mtu_ceiling are adjusted
|
|
void
|
|
mtu_search_update();
|
|
void
|
|
mtu_reset();
|
|
|
|
// Calculates the current receive window
|
|
size_t
|
|
get_rcv_window()
|
|
{
|
|
// Trim window down according to what's already in buffer.
|
|
const size_t numbuf = utp_call_get_read_buffer_size(this->ctx, this);
|
|
assert((int)numbuf >= 0);
|
|
return opt_rcvbuf > numbuf ? opt_rcvbuf - numbuf : 0;
|
|
}
|
|
|
|
// Test if we're ready to decay max_window
|
|
// XXX this breaks when spaced by > INT_MAX/2, which is 49
|
|
// days; the failure mode in that case is we do an extra decay
|
|
// or fail to do one when we really shouldn't.
|
|
bool
|
|
can_decay_win(int64 msec) const
|
|
{
|
|
return (msec - last_rwin_decay) >= MAX_WINDOW_DECAY;
|
|
}
|
|
|
|
// If we can, decay max window, returns true if we actually did so
|
|
void
|
|
maybe_decay_win(uint64 current_ms)
|
|
{
|
|
if(can_decay_win(current_ms))
|
|
{
|
|
// TCP uses 0.5
|
|
max_window = (size_t)(max_window * .5);
|
|
last_rwin_decay = current_ms;
|
|
if(max_window < MIN_WINDOW_SIZE)
|
|
max_window = MIN_WINDOW_SIZE;
|
|
slow_start = false;
|
|
ssthresh = max_window;
|
|
}
|
|
}
|
|
|
|
size_t
|
|
get_header_size() const
|
|
{
|
|
return sizeof(PacketFormatV1);
|
|
}
|
|
|
|
size_t
|
|
get_udp_mtu()
|
|
{
|
|
socklen_t len;
|
|
SOCKADDR_STORAGE sa = addr.get_sockaddr_storage(&len);
|
|
return utp_call_get_udp_mtu(this->ctx, this, (const struct sockaddr *)&sa,
|
|
len);
|
|
}
|
|
|
|
size_t
|
|
get_udp_overhead()
|
|
{
|
|
socklen_t len;
|
|
SOCKADDR_STORAGE sa = addr.get_sockaddr_storage(&len);
|
|
return utp_call_get_udp_overhead(this->ctx, this,
|
|
(const struct sockaddr *)&sa, len);
|
|
}
|
|
|
|
size_t
|
|
get_overhead()
|
|
{
|
|
return get_udp_overhead() + get_header_size();
|
|
}
|
|
|
|
void
|
|
send_data(byte *b, size_t length, bandwidth_type_t type, uint32 flags = 0);
|
|
|
|
void
|
|
send_ack(bool synack = false);
|
|
|
|
void
|
|
send_keep_alive();
|
|
|
|
static void
|
|
send_rst(utp_context *ctx, const PackedSockAddr &addr, uint32 conn_id_send,
|
|
uint16 ack_nr, uint16 seq_nr);
|
|
|
|
void
|
|
send_packet(OutgoingPacket *pkt);
|
|
|
|
bool
|
|
is_full(int bytes = -1);
|
|
bool
|
|
flush_packets();
|
|
void
|
|
write_outgoing_packet(size_t payload, uint flags, struct utp_iovec *iovec,
|
|
size_t num_iovecs);
|
|
|
|
#ifdef _DEBUG
|
|
void
|
|
check_invariant();
|
|
#endif
|
|
|
|
void
|
|
check_timeouts();
|
|
int
|
|
ack_packet(uint16 seq);
|
|
size_t
|
|
selective_ack_bytes(uint base, const byte *mask, byte len, int64 &min_rtt);
|
|
void
|
|
selective_ack(uint base, const byte *mask, byte len);
|
|
void
|
|
apply_ccontrol(size_t bytes_acked, uint32 actual_delay, int64 min_rtt);
|
|
size_t
|
|
get_packet_size() const;
|
|
};
|
|
|
|
void
|
|
removeSocketFromAckList(UTPSocket *conn)
|
|
{
|
|
if(conn->ida >= 0)
|
|
{
|
|
UTPSocket *last =
|
|
conn->ctx->ack_sockets[conn->ctx->ack_sockets.GetCount() - 1];
|
|
|
|
assert(last->ida < (int)(conn->ctx->ack_sockets.GetCount()));
|
|
assert(conn->ctx->ack_sockets[last->ida] == last);
|
|
last->ida = conn->ida;
|
|
conn->ctx->ack_sockets[conn->ida] = last;
|
|
conn->ida = -1;
|
|
|
|
// Decrease the count
|
|
conn->ctx->ack_sockets.SetCount(conn->ctx->ack_sockets.GetCount() - 1);
|
|
}
|
|
}
|
|
|
|
static void
|
|
utp_register_sent_packet(utp_context *ctx, size_t length)
|
|
{
|
|
if(length <= PACKET_SIZE_MID)
|
|
{
|
|
if(length <= PACKET_SIZE_EMPTY)
|
|
{
|
|
ctx->context_stats._nraw_send[PACKET_SIZE_EMPTY_BUCKET]++;
|
|
}
|
|
else if(length <= PACKET_SIZE_SMALL)
|
|
{
|
|
ctx->context_stats._nraw_send[PACKET_SIZE_SMALL_BUCKET]++;
|
|
}
|
|
else
|
|
ctx->context_stats._nraw_send[PACKET_SIZE_MID_BUCKET]++;
|
|
}
|
|
else
|
|
{
|
|
if(length <= PACKET_SIZE_BIG)
|
|
{
|
|
ctx->context_stats._nraw_send[PACKET_SIZE_BIG_BUCKET]++;
|
|
}
|
|
else
|
|
ctx->context_stats._nraw_send[PACKET_SIZE_HUGE_BUCKET]++;
|
|
}
|
|
}
|
|
|
|
void
|
|
send_to_addr(utp_context *ctx, const byte *p, size_t len,
|
|
const PackedSockAddr &addr, int flags = 0)
|
|
{
|
|
socklen_t tolen;
|
|
SOCKADDR_STORAGE to = addr.get_sockaddr_storage(&tolen);
|
|
utp_register_sent_packet(ctx, len);
|
|
utp_call_sendto(ctx, NULL, p, len, (const struct sockaddr *)&to, tolen,
|
|
flags);
|
|
}
|
|
|
|
void
|
|
UTPSocket::schedule_ack()
|
|
{
|
|
if(ida == -1)
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
log(UTP_LOG_DEBUG, "schedule_ack");
|
|
#endif
|
|
ida = ctx->ack_sockets.Append(this);
|
|
}
|
|
else
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
log(UTP_LOG_DEBUG, "schedule_ack: already in list");
|
|
#endif
|
|
}
|
|
}
|
|
|
|
void
|
|
UTPSocket::send_data(byte *b, size_t length, bandwidth_type_t type,
|
|
uint32 flags)
|
|
{
|
|
// time stamp this packet with local time, the stamp goes into
|
|
// the header of every packet at the 8th byte for 8 bytes :
|
|
// two integers, check packet.h for more
|
|
uint64 time = utp_call_get_microseconds(ctx, this);
|
|
|
|
PacketFormatV1 *b1 = (PacketFormatV1 *)b;
|
|
b1->tv_usec = (uint32)time;
|
|
b1->reply_micro = reply_micro;
|
|
|
|
last_sent_packet = ctx->current_ms;
|
|
|
|
#ifdef _DEBUG
|
|
_stats.nbytes_xmit += length;
|
|
++_stats.nxmit;
|
|
#endif
|
|
|
|
if(ctx->callbacks[UTP_ON_OVERHEAD_STATISTICS])
|
|
{
|
|
size_t n;
|
|
if(type == payload_bandwidth)
|
|
{
|
|
// if this packet carries payload, just
|
|
// count the header as overhead
|
|
type = header_overhead;
|
|
n = get_overhead();
|
|
}
|
|
else
|
|
{
|
|
n = length + get_udp_overhead();
|
|
}
|
|
utp_call_on_overhead_statistics(ctx, this, true, n, type);
|
|
}
|
|
#if UTP_DEBUG_LOGGING
|
|
int flags2 = b1->type();
|
|
uint16 seq_nr = b1->seq_nr;
|
|
uint16 ack_nr = b1->ack_nr;
|
|
log(UTP_LOG_DEBUG,
|
|
"send %s len:%u id:%u timestamp:" I64u
|
|
" reply_micro:%u flags:%s seq_nr:%u ack_nr:%u",
|
|
addrfmt(addr, addrbuf), (uint)length, conn_id_send, time, reply_micro,
|
|
flagnames[flags2], seq_nr, ack_nr);
|
|
#endif
|
|
send_to_addr(ctx, b, length, addr, flags);
|
|
removeSocketFromAckList(this);
|
|
}
|
|
|
|
void
|
|
UTPSocket::send_ack(bool synack)
|
|
{
|
|
PacketFormatAckV1 pfa;
|
|
zeromem(&pfa);
|
|
|
|
size_t len;
|
|
last_rcv_win = get_rcv_window();
|
|
pfa.pf.set_version(1);
|
|
pfa.pf.set_type(ST_STATE);
|
|
pfa.pf.ext = 0;
|
|
pfa.pf.connid = conn_id_send;
|
|
pfa.pf.ack_nr = ack_nr;
|
|
pfa.pf.seq_nr = seq_nr;
|
|
pfa.pf.windowsize = (uint32)last_rcv_win;
|
|
len = sizeof(PacketFormatV1);
|
|
|
|
// we never need to send EACK for connections
|
|
// that are shutting down
|
|
if(reorder_count != 0 && !got_fin_reached)
|
|
{
|
|
// if reorder count > 0, send an EACK.
|
|
// reorder count should always be 0
|
|
// for synacks, so this should not be
|
|
// as synack
|
|
assert(!synack);
|
|
(void)synack;
|
|
pfa.pf.ext = 1;
|
|
pfa.ext_next = 0;
|
|
pfa.ext_len = 4;
|
|
uint m = 0;
|
|
|
|
// reorder count should only be non-zero
|
|
// if the packet ack_nr + 1 has not yet
|
|
// been received
|
|
assert(inbuf.get(ack_nr + 1) == NULL);
|
|
size_t window = min< size_t >(14 + 16, inbuf.size());
|
|
// Generate bit mask of segments received.
|
|
for(size_t i = 0; i < window; i++)
|
|
{
|
|
if(inbuf.get(ack_nr + i + 2) != NULL)
|
|
{
|
|
m |= 1 << i;
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
log(UTP_LOG_DEBUG, "EACK packet [%u]", ack_nr + i + 2);
|
|
#endif
|
|
}
|
|
}
|
|
pfa.acks[0] = (byte)m;
|
|
pfa.acks[1] = (byte)(m >> 8);
|
|
pfa.acks[2] = (byte)(m >> 16);
|
|
pfa.acks[3] = (byte)(m >> 24);
|
|
len += 4 + 2;
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
log(UTP_LOG_DEBUG, "Sending EACK %u [%u] bits:[%032b]", ack_nr,
|
|
conn_id_send, m);
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
log(UTP_LOG_DEBUG, "Sending ACK %u [%u]", ack_nr, conn_id_send);
|
|
#endif
|
|
}
|
|
|
|
send_data((byte *)&pfa, len, ack_overhead);
|
|
removeSocketFromAckList(this);
|
|
}
|
|
|
|
void
|
|
UTPSocket::send_keep_alive()
|
|
{
|
|
ack_nr--;
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
log(UTP_LOG_DEBUG, "Sending KeepAlive ACK %u [%u]", ack_nr, conn_id_send);
|
|
#endif
|
|
|
|
send_ack();
|
|
ack_nr++;
|
|
}
|
|
|
|
void
|
|
UTPSocket::send_rst(utp_context *ctx, const PackedSockAddr &addr,
|
|
uint32 conn_id_send, uint16 ack_nr, uint16 seq_nr)
|
|
{
|
|
PacketFormatV1 pf1;
|
|
zeromem(&pf1);
|
|
|
|
size_t len;
|
|
pf1.set_version(1);
|
|
pf1.set_type(ST_RESET);
|
|
pf1.ext = 0;
|
|
pf1.connid = conn_id_send;
|
|
pf1.ack_nr = ack_nr;
|
|
pf1.seq_nr = seq_nr;
|
|
pf1.windowsize = 0;
|
|
len = sizeof(PacketFormatV1);
|
|
|
|
// LOG_DEBUG("%s: Sending RST id:%u seq_nr:%u ack_nr:%u", addrfmt(addr,
|
|
// addrbuf), conn_id_send, seq_nr, ack_nr); LOG_DEBUG("send %s len:%u
|
|
// id:%u", addrfmt(addr, addrbuf), (uint)len, conn_id_send);
|
|
send_to_addr(ctx, (const byte *)&pf1, len, addr);
|
|
}
|
|
|
|
void
|
|
UTPSocket::send_packet(OutgoingPacket *pkt)
|
|
{
|
|
// only count against the quota the first time we
|
|
// send the packet. Don't enforce quota when closing
|
|
// a socket. Only enforce the quota when we're sending
|
|
// at slow rates (max window < packet size)
|
|
|
|
// size_t max_send = min(max_window, opt_sndbuf, max_window_user);
|
|
time_t cur_time = utp_call_get_milliseconds(this->ctx, this);
|
|
|
|
if(pkt->transmissions == 0 || pkt->need_resend)
|
|
{
|
|
cur_window += pkt->payload;
|
|
}
|
|
|
|
pkt->need_resend = false;
|
|
|
|
PacketFormatV1 *p1 = (PacketFormatV1 *)pkt->data;
|
|
p1->ack_nr = ack_nr;
|
|
pkt->time_sent = utp_call_get_microseconds(this->ctx, this);
|
|
|
|
// socklen_t salen;
|
|
// SOCKADDR_STORAGE sa = addr.get_sockaddr_storage(&salen);
|
|
bool use_as_mtu_probe = false;
|
|
|
|
// TODO: this is subject to nasty wrapping issues! Below as well
|
|
if(mtu_discover_time < (uint64)cur_time)
|
|
{
|
|
// it's time to reset our MTU assupmtions
|
|
// and trigger a new search
|
|
mtu_reset();
|
|
}
|
|
|
|
// don't use packets that are larger then mtu_ceiling
|
|
// as probes, since they were probably used as probes
|
|
// already and failed, now we need it to fragment
|
|
// just to get it through
|
|
// if seq_nr == 1, the probe would end up being 0
|
|
// which is a magic number representing no-probe
|
|
// that why we don't send a probe for a packet with
|
|
// sequence number 0
|
|
if(mtu_floor < mtu_ceiling && pkt->length > mtu_floor
|
|
&& pkt->length <= mtu_ceiling && mtu_probe_seq == 0 && seq_nr != 1
|
|
&& pkt->transmissions == 0)
|
|
{
|
|
// we've already incremented seq_nr
|
|
// for this packet
|
|
mtu_probe_seq = (seq_nr - 1) & ACK_NR_MASK;
|
|
mtu_probe_size = pkt->length;
|
|
assert(pkt->length >= mtu_floor);
|
|
assert(pkt->length <= mtu_ceiling);
|
|
use_as_mtu_probe = true;
|
|
log(UTP_LOG_MTU, "MTU [PROBE] floor:%d ceiling:%d current:%d", mtu_floor,
|
|
mtu_ceiling, mtu_probe_size);
|
|
}
|
|
|
|
pkt->transmissions++;
|
|
send_data(
|
|
(byte *)pkt->data, pkt->length,
|
|
(state == CS_SYN_SENT)
|
|
? connect_overhead
|
|
: (pkt->transmissions == 1) ? payload_bandwidth : retransmit_overhead,
|
|
use_as_mtu_probe ? UTP_UDP_DONTFRAG : 0);
|
|
}
|
|
|
|
bool
|
|
UTPSocket::is_full(int bytes)
|
|
{
|
|
size_t packet_size = get_packet_size();
|
|
if(bytes < 0)
|
|
bytes = packet_size;
|
|
else if(bytes > (int)packet_size)
|
|
bytes = (int)packet_size;
|
|
size_t max_send = min(max_window, opt_sndbuf, max_window_user);
|
|
|
|
// subtract one to save space for the FIN packet
|
|
if(cur_window_packets >= OUTGOING_BUFFER_MAX_SIZE - 1)
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
log(UTP_LOG_DEBUG, "is_full:false cur_window_packets:%d MAX:%d",
|
|
cur_window_packets, OUTGOING_BUFFER_MAX_SIZE - 1);
|
|
#endif
|
|
|
|
last_maxed_out_window = ctx->current_ms;
|
|
return true;
|
|
}
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
log(UTP_LOG_DEBUG,
|
|
"is_full:%s. cur_window:%u pkt:%u max:%u cur_window_packets:%u "
|
|
"max_window:%u",
|
|
(cur_window + bytes > max_send) ? "true" : "false", cur_window, bytes,
|
|
max_send, cur_window_packets, max_window);
|
|
#endif
|
|
|
|
if(cur_window + bytes > max_send)
|
|
{
|
|
last_maxed_out_window = ctx->current_ms;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
UTPSocket::flush_packets()
|
|
{
|
|
size_t packet_size = get_packet_size();
|
|
|
|
// send packets that are waiting on the pacer to be sent
|
|
// i has to be an unsigned 16 bit counter to wrap correctly
|
|
// signed types are not guaranteed to wrap the way you expect
|
|
for(uint16 i = seq_nr - cur_window_packets; i != seq_nr; ++i)
|
|
{
|
|
OutgoingPacket *pkt = (OutgoingPacket *)outbuf.get(i);
|
|
if(pkt == 0 || (pkt->transmissions > 0 && pkt->need_resend == false))
|
|
continue;
|
|
// have we run out of quota?
|
|
if(is_full())
|
|
return true;
|
|
|
|
// Nagle check
|
|
// don't send the last packet if we have one packet in-flight
|
|
// and the current packet is still smaller than packet_size.
|
|
if(i != ((seq_nr - 1) & ACK_NR_MASK) || cur_window_packets == 1
|
|
|| pkt->payload >= packet_size)
|
|
{
|
|
send_packet(pkt);
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// @payload: number of bytes to send
|
|
// @flags: either ST_DATA, or ST_FIN
|
|
// @iovec: base address of iovec array
|
|
// @num_iovecs: number of iovecs in array
|
|
void
|
|
UTPSocket::write_outgoing_packet(size_t payload, uint flags,
|
|
struct utp_iovec *iovec, size_t num_iovecs)
|
|
{
|
|
// Setup initial timeout timer
|
|
if(cur_window_packets == 0)
|
|
{
|
|
retransmit_timeout = rto;
|
|
rto_timeout = ctx->current_ms + retransmit_timeout;
|
|
assert(cur_window == 0);
|
|
}
|
|
|
|
size_t packet_size = get_packet_size();
|
|
do
|
|
{
|
|
assert(cur_window_packets < OUTGOING_BUFFER_MAX_SIZE);
|
|
assert(flags == ST_DATA || flags == ST_FIN);
|
|
|
|
size_t added = 0;
|
|
|
|
OutgoingPacket *pkt = NULL;
|
|
|
|
if(cur_window_packets > 0)
|
|
{
|
|
pkt = (OutgoingPacket *)outbuf.get(seq_nr - 1);
|
|
}
|
|
|
|
const size_t header_size = get_header_size();
|
|
bool append = true;
|
|
|
|
// if there's any room left in the last packet in the window
|
|
// and it hasn't been sent yet, fill that frame first
|
|
if(payload && pkt && !pkt->transmissions && pkt->payload < packet_size)
|
|
{
|
|
// Use the previous unsent packet
|
|
added =
|
|
min(payload + pkt->payload, max< size_t >(packet_size, pkt->payload))
|
|
- pkt->payload;
|
|
pkt = (OutgoingPacket *)realloc(
|
|
pkt,
|
|
(sizeof(OutgoingPacket) - 1) + header_size + pkt->payload + added);
|
|
outbuf.put(seq_nr - 1, pkt);
|
|
append = false;
|
|
assert(!pkt->need_resend);
|
|
}
|
|
else
|
|
{
|
|
// Create the packet to send.
|
|
added = payload;
|
|
pkt = (OutgoingPacket *)malloc((sizeof(OutgoingPacket) - 1) + header_size
|
|
+ added);
|
|
pkt->payload = 0;
|
|
pkt->transmissions = 0;
|
|
pkt->need_resend = false;
|
|
}
|
|
|
|
if(added)
|
|
{
|
|
assert(flags == ST_DATA);
|
|
|
|
// Fill it with data from the upper layer.
|
|
unsigned char *p = pkt->data + header_size + pkt->payload;
|
|
size_t needed = added;
|
|
|
|
/*
|
|
while (needed) {
|
|
*p = *(char*)iovec[0].iov_base;
|
|
p++;
|
|
iovec[0].iov_base = (char *)iovec[0].iov_base + 1;
|
|
needed--;
|
|
}
|
|
*/
|
|
|
|
for(size_t i = 0; i < num_iovecs && needed; i++)
|
|
{
|
|
if(iovec[i].iov_len == 0)
|
|
continue;
|
|
|
|
size_t num = min< size_t >(needed, iovec[i].iov_len);
|
|
memcpy(p, iovec[i].iov_base, num);
|
|
|
|
p += num;
|
|
|
|
iovec[i].iov_len -= num;
|
|
iovec[i].iov_base = (byte *)iovec[i].iov_base
|
|
+ num; // iovec[i].iov_base += num, but without void* pointers
|
|
needed -= num;
|
|
}
|
|
|
|
assert(needed == 0);
|
|
}
|
|
pkt->payload += added;
|
|
pkt->length = header_size + pkt->payload;
|
|
|
|
last_rcv_win = get_rcv_window();
|
|
|
|
PacketFormatV1 *p1 = (PacketFormatV1 *)pkt->data;
|
|
p1->set_version(1);
|
|
p1->set_type(flags);
|
|
p1->ext = 0;
|
|
p1->connid = conn_id_send;
|
|
p1->windowsize = (uint32)last_rcv_win;
|
|
p1->ack_nr = ack_nr;
|
|
|
|
if(append)
|
|
{
|
|
// Remember the message in the outgoing queue.
|
|
outbuf.ensure_size(seq_nr, cur_window_packets);
|
|
outbuf.put(seq_nr, pkt);
|
|
p1->seq_nr = seq_nr;
|
|
seq_nr++;
|
|
cur_window_packets++;
|
|
}
|
|
|
|
payload -= added;
|
|
|
|
} while(payload);
|
|
|
|
flush_packets();
|
|
}
|
|
|
|
#ifdef _DEBUG
|
|
void
|
|
UTPSocket::check_invariant()
|
|
{
|
|
if(reorder_count > 0)
|
|
{
|
|
assert(inbuf.get(ack_nr + 1) == NULL);
|
|
}
|
|
|
|
size_t outstanding_bytes = 0;
|
|
for(int i = 0; i < cur_window_packets; ++i)
|
|
{
|
|
OutgoingPacket *pkt = (OutgoingPacket *)outbuf.get(seq_nr - i - 1);
|
|
if(pkt == 0 || pkt->transmissions == 0 || pkt->need_resend)
|
|
continue;
|
|
outstanding_bytes += pkt->payload;
|
|
}
|
|
assert(outstanding_bytes == cur_window);
|
|
}
|
|
#endif
|
|
|
|
void
|
|
UTPSocket::check_timeouts()
|
|
{
|
|
#ifdef _DEBUG
|
|
check_invariant();
|
|
#endif
|
|
|
|
// this invariant should always be true
|
|
assert(cur_window_packets == 0 || outbuf.get(seq_nr - cur_window_packets));
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
log(UTP_LOG_DEBUG,
|
|
"CheckTimeouts timeout:%d max_window:%u cur_window:%u "
|
|
"state:%s cur_window_packets:%u",
|
|
(int)(rto_timeout - ctx->current_ms), (uint)max_window, (uint)cur_window,
|
|
statenames[state], cur_window_packets);
|
|
#endif
|
|
|
|
if(state != CS_DESTROY)
|
|
flush_packets();
|
|
|
|
switch(state)
|
|
{
|
|
case CS_SYN_SENT:
|
|
case CS_SYN_RECV:
|
|
case CS_CONNECTED_FULL:
|
|
case CS_CONNECTED:
|
|
{
|
|
// Reset max window...
|
|
if((int)(ctx->current_ms - zerowindow_time) >= 0 && max_window_user == 0)
|
|
{
|
|
max_window_user = PACKET_SIZE;
|
|
}
|
|
|
|
if((int)(ctx->current_ms - rto_timeout) >= 0 && rto_timeout > 0)
|
|
{
|
|
bool ignore_loss = false;
|
|
|
|
if(cur_window_packets == 1
|
|
&& ((seq_nr - 1) & ACK_NR_MASK) == mtu_probe_seq
|
|
&& mtu_probe_seq != 0)
|
|
{
|
|
// we only had a single outstanding packet that timed out, and it was
|
|
// the probe
|
|
mtu_ceiling = mtu_probe_size - 1;
|
|
mtu_search_update();
|
|
// this packet was most likely dropped because the packet size being
|
|
// too big and not because congestion. To accelerate the binary search
|
|
// for the MTU, resend immediately and don't reset the window size
|
|
ignore_loss = true;
|
|
log(UTP_LOG_MTU, "MTU [PROBE-TIMEOUT] floor:%d ceiling:%d current:%d",
|
|
mtu_floor, mtu_ceiling, mtu_last);
|
|
}
|
|
// we dropepd the probe, clear these fields to
|
|
// allow us to send a new one
|
|
mtu_probe_seq = mtu_probe_size = 0;
|
|
log(UTP_LOG_MTU, "MTU [TIMEOUT]");
|
|
|
|
/*
|
|
OutgoingPacket *pkt = (OutgoingPacket*)outbuf.get(seq_nr -
|
|
cur_window_packets);
|
|
|
|
// If there were a lot of retransmissions, force recomputation of round
|
|
trip time if (pkt->transmissions >= 4) rtt = 0;
|
|
*/
|
|
|
|
// Increase RTO
|
|
const uint new_timeout =
|
|
ignore_loss ? retransmit_timeout : retransmit_timeout * 2;
|
|
|
|
// They initiated the connection but failed to respond before the rto.
|
|
// A malicious client can also spoof the destination address of a ST_SYN
|
|
// bringing us to this state. Kill the connection and do not notify the
|
|
// upper layer
|
|
if(state == CS_SYN_RECV)
|
|
{
|
|
state = CS_DESTROY;
|
|
utp_call_on_error(ctx, this, UTP_ETIMEDOUT);
|
|
return;
|
|
}
|
|
|
|
// We initiated the connection but the other side failed to respond
|
|
// before the rto
|
|
if(retransmit_count >= 4
|
|
|| (state == CS_SYN_SENT && retransmit_count >= 2))
|
|
{
|
|
// 4 consecutive transmissions have timed out. Kill it. If we
|
|
// haven't even connected yet, give up after only 2 consecutive
|
|
// failed transmissions.
|
|
if(close_requested)
|
|
state = CS_DESTROY;
|
|
else
|
|
state = CS_RESET;
|
|
utp_call_on_error(ctx, this, UTP_ETIMEDOUT);
|
|
return;
|
|
}
|
|
|
|
retransmit_timeout = new_timeout;
|
|
rto_timeout = ctx->current_ms + new_timeout;
|
|
|
|
if(!ignore_loss)
|
|
{
|
|
// On Timeout
|
|
duplicate_ack = 0;
|
|
|
|
int packet_size = get_packet_size();
|
|
|
|
if((cur_window_packets == 0) && ((int)max_window > packet_size))
|
|
{
|
|
// we don't have any packets in-flight, even though
|
|
// we could. This implies that the connection is just
|
|
// idling. No need to be aggressive about resetting the
|
|
// congestion window. Just let it decay by a 3:rd.
|
|
// don't set it any lower than the packet size though
|
|
max_window = max(max_window * 2 / 3, size_t(packet_size));
|
|
}
|
|
else
|
|
{
|
|
// our delay was so high that our congestion window
|
|
// was shrunk below one packet, preventing us from
|
|
// sending anything for one time-out period. Now, reset
|
|
// the congestion window to fit one packet, to start over
|
|
// again
|
|
max_window = packet_size;
|
|
slow_start = true;
|
|
}
|
|
}
|
|
|
|
// every packet should be considered lost
|
|
for(int i = 0; i < cur_window_packets; ++i)
|
|
{
|
|
OutgoingPacket *pkt = (OutgoingPacket *)outbuf.get(seq_nr - i - 1);
|
|
if(pkt == 0 || pkt->transmissions == 0 || pkt->need_resend)
|
|
continue;
|
|
pkt->need_resend = true;
|
|
assert(cur_window >= pkt->payload);
|
|
cur_window -= pkt->payload;
|
|
}
|
|
|
|
if(cur_window_packets > 0)
|
|
{
|
|
retransmit_count++;
|
|
// used in parse_log.py
|
|
log(UTP_LOG_NORMAL,
|
|
"Packet timeout. Resend. seq_nr:%u. timeout:%u "
|
|
"max_window:%u cur_window_packets:%d",
|
|
seq_nr - cur_window_packets, retransmit_timeout, (uint)max_window,
|
|
int(cur_window_packets));
|
|
|
|
fast_timeout = true;
|
|
timeout_seq_nr = seq_nr;
|
|
|
|
OutgoingPacket *pkt =
|
|
(OutgoingPacket *)outbuf.get(seq_nr - cur_window_packets);
|
|
assert(pkt);
|
|
|
|
// Re-send the packet.
|
|
send_packet(pkt);
|
|
}
|
|
}
|
|
|
|
// Mark the socket as writable. If the cwnd has grown, or if the number of
|
|
// bytes in-flight is lower than cwnd, we need to make the socket writable
|
|
// again in case it isn't
|
|
if(state == CS_CONNECTED_FULL && !is_full())
|
|
{
|
|
state = CS_CONNECTED;
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
log(UTP_LOG_DEBUG,
|
|
"Socket writable. max_window:%u cur_window:%u packet_size:%u",
|
|
(uint)max_window, (uint)cur_window, (uint)get_packet_size());
|
|
#endif
|
|
utp_call_on_state_change(this->ctx, this, UTP_STATE_WRITABLE);
|
|
}
|
|
|
|
if(state >= CS_CONNECTED && !fin_sent)
|
|
{
|
|
if((int)(ctx->current_ms - last_sent_packet) >= KEEPALIVE_INTERVAL)
|
|
{
|
|
send_keep_alive();
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
// prevent warning
|
|
case CS_UNINITIALIZED:
|
|
case CS_IDLE:
|
|
case CS_RESET:
|
|
case CS_DESTROY:
|
|
break;
|
|
}
|
|
}
|
|
|
|
// this should be called every time we change mtu_floor or mtu_ceiling
|
|
void
|
|
UTPSocket::mtu_search_update()
|
|
{
|
|
assert(mtu_floor <= mtu_ceiling);
|
|
|
|
// binary search
|
|
mtu_last = (mtu_floor + mtu_ceiling) / 2;
|
|
|
|
// enable a new probe to be sent
|
|
mtu_probe_seq = mtu_probe_size = 0;
|
|
|
|
// if the floor and ceiling are close enough, consider the
|
|
// MTU binary search complete. We set the current value
|
|
// to floor since that's the only size we know can go through
|
|
// also set the ceiling to floor to terminate the searching
|
|
if(mtu_ceiling - mtu_floor <= 16)
|
|
{
|
|
mtu_last = mtu_floor;
|
|
log(UTP_LOG_MTU, "MTU [DONE] floor:%d ceiling:%d current:%d", mtu_floor,
|
|
mtu_ceiling, mtu_last);
|
|
mtu_ceiling = mtu_floor;
|
|
assert(mtu_floor <= mtu_ceiling);
|
|
// Do another search in 30 minutes
|
|
mtu_discover_time =
|
|
utp_call_get_milliseconds(this->ctx, this) + 30 * 60 * 1000;
|
|
}
|
|
}
|
|
|
|
void
|
|
UTPSocket::mtu_reset()
|
|
{
|
|
mtu_ceiling = get_udp_mtu();
|
|
// Less would not pass TCP...
|
|
mtu_floor = 576;
|
|
log(UTP_LOG_MTU, "MTU [RESET] floor:%d ceiling:%d current:%d", mtu_floor,
|
|
mtu_ceiling, mtu_last);
|
|
assert(mtu_floor <= mtu_ceiling);
|
|
mtu_discover_time =
|
|
utp_call_get_milliseconds(this->ctx, this) + 30 * 60 * 1000;
|
|
}
|
|
|
|
// returns:
|
|
// 0: the packet was acked.
|
|
// 1: it means that the packet had already been acked
|
|
// 2: the packet has not been sent yet
|
|
int
|
|
UTPSocket::ack_packet(uint16 seq)
|
|
{
|
|
OutgoingPacket *pkt = (OutgoingPacket *)outbuf.get(seq);
|
|
|
|
// the packet has already been acked (or not sent)
|
|
if(pkt == NULL)
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
log(UTP_LOG_DEBUG, "got ack for:%u (already acked, or never sent)", seq);
|
|
#endif
|
|
|
|
return 1;
|
|
}
|
|
|
|
// can't ack packets that haven't been sent yet!
|
|
if(pkt->transmissions == 0)
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
log(UTP_LOG_DEBUG,
|
|
"got ack for:%u (never sent, pkt_size:%u need_resend:%u)", seq,
|
|
(uint)pkt->payload, pkt->need_resend);
|
|
#endif
|
|
|
|
return 2;
|
|
}
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
log(UTP_LOG_DEBUG, "got ack for:%u (pkt_size:%u need_resend:%u)", seq,
|
|
(uint)pkt->payload, pkt->need_resend);
|
|
#endif
|
|
|
|
outbuf.put(seq, NULL);
|
|
|
|
// if we never re-sent the packet, update the RTT estimate
|
|
if(pkt->transmissions == 1)
|
|
{
|
|
// Estimate the round trip time.
|
|
const uint32 ertt = (uint32)(
|
|
(utp_call_get_microseconds(this->ctx, this) - pkt->time_sent) / 1000);
|
|
if(rtt == 0)
|
|
{
|
|
// First round trip time sample
|
|
rtt = ertt;
|
|
rtt_var = ertt / 2;
|
|
// sanity check. rtt should never be more than 6 seconds
|
|
// assert(rtt < 6000);
|
|
}
|
|
else
|
|
{
|
|
// Compute new round trip times
|
|
const int delta = (int)rtt - ertt;
|
|
rtt_var = rtt_var + (int)(abs(delta) - rtt_var) / 4;
|
|
rtt = rtt - rtt / 8 + ertt / 8;
|
|
// sanity check. rtt should never be more than 6 seconds
|
|
// assert(rtt < 6000);
|
|
rtt_hist.add_sample(ertt, ctx->current_ms);
|
|
}
|
|
rto = max< uint >(rtt + rtt_var * 4, 1000);
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
log(UTP_LOG_DEBUG, "rtt:%u avg:%u var:%u rto:%u", ertt, rtt, rtt_var, rto);
|
|
#endif
|
|
}
|
|
retransmit_timeout = rto;
|
|
rto_timeout = ctx->current_ms + rto;
|
|
// if need_resend is set, this packet has already
|
|
// been considered timed-out, and is not included in
|
|
// the cur_window anymore
|
|
if(!pkt->need_resend)
|
|
{
|
|
assert(cur_window >= pkt->payload);
|
|
cur_window -= pkt->payload;
|
|
}
|
|
free(pkt);
|
|
retransmit_count = 0;
|
|
return 0;
|
|
}
|
|
|
|
// count the number of bytes that were acked by the EACK header
|
|
size_t
|
|
UTPSocket::selective_ack_bytes(uint base, const byte *mask, byte len,
|
|
int64 &min_rtt)
|
|
{
|
|
if(cur_window_packets == 0)
|
|
return 0;
|
|
|
|
size_t acked_bytes = 0;
|
|
int bits = len * 8;
|
|
uint64 now = utp_call_get_microseconds(this->ctx, this);
|
|
|
|
do
|
|
{
|
|
uint v = base + bits;
|
|
|
|
// ignore bits that haven't been sent yet
|
|
// see comment in UTPSocket::selective_ack
|
|
if(((seq_nr - v - 1) & ACK_NR_MASK) >= (uint16)(cur_window_packets - 1))
|
|
continue;
|
|
|
|
// ignore bits that represents packets we haven't sent yet
|
|
// or packets that have already been acked
|
|
OutgoingPacket *pkt = (OutgoingPacket *)outbuf.get(v);
|
|
if(!pkt || pkt->transmissions == 0)
|
|
continue;
|
|
|
|
// Count the number of segments that were successfully received past it.
|
|
if(bits >= 0 && mask[bits >> 3] & (1 << (bits & 7)))
|
|
{
|
|
assert((int)(pkt->payload) >= 0);
|
|
acked_bytes += pkt->payload;
|
|
if(pkt->time_sent < now)
|
|
min_rtt = min< int64 >(min_rtt, now - pkt->time_sent);
|
|
else
|
|
min_rtt = min< int64 >(min_rtt, 50000);
|
|
continue;
|
|
}
|
|
} while(--bits >= -1);
|
|
return acked_bytes;
|
|
}
|
|
|
|
enum
|
|
{
|
|
MAX_EACK = 128
|
|
};
|
|
|
|
void
|
|
UTPSocket::selective_ack(uint base, const byte *mask, byte len)
|
|
{
|
|
if(cur_window_packets == 0)
|
|
return;
|
|
|
|
// the range is inclusive [0, 31] bits
|
|
int bits = len * 8 - 1;
|
|
|
|
int count = 0;
|
|
|
|
// resends is a stack of sequence numbers we need to resend. Since we
|
|
// iterate in reverse over the acked packets, at the end, the top packets
|
|
// are the ones we want to resend
|
|
int resends[MAX_EACK];
|
|
int nr = 0;
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
char bitmask[1024] = {0};
|
|
int counter = bits;
|
|
for(int i = 0; i <= bits; ++i)
|
|
{
|
|
bool bit_set = counter >= 0 && mask[counter >> 3] & (1 << (counter & 7));
|
|
bitmask[i] = bit_set ? '1' : '0';
|
|
--counter;
|
|
}
|
|
|
|
log(UTP_LOG_DEBUG, "Got EACK [%s] base:%u", bitmask, base);
|
|
#endif
|
|
|
|
do
|
|
{
|
|
// we're iterating over the bits from higher sequence numbers
|
|
// to lower (kind of in reverse order, wich might not be very
|
|
// intuitive)
|
|
uint v = base + bits;
|
|
|
|
// ignore bits that haven't been sent yet
|
|
// and bits that fall below the ACKed sequence number
|
|
// this can happen if an EACK message gets
|
|
// reordered and arrives after a packet that ACKs up past
|
|
// the base for thie EACK message
|
|
|
|
// this is essentially the same as:
|
|
// if v >= seq_nr || v <= seq_nr - cur_window_packets
|
|
// but it takes wrapping into account
|
|
|
|
// if v == seq_nr the -1 will make it wrap. if v > seq_nr
|
|
// it will also wrap (since it will fall further below 0)
|
|
// and be > cur_window_packets.
|
|
// if v == seq_nr - cur_window_packets, the result will be
|
|
// seq_nr - (seq_nr - cur_window_packets) - 1
|
|
// == seq_nr - seq_nr + cur_window_packets - 1
|
|
// == cur_window_packets - 1 which will be caught by the
|
|
// test. If v < seq_nr - cur_window_packets the result will grow
|
|
// fall furhter outside of the cur_window_packets range.
|
|
|
|
// sequence number space:
|
|
//
|
|
// rejected < accepted > rejected
|
|
// <============+--------------+============>
|
|
// ^ ^
|
|
// | |
|
|
// (seq_nr-wnd) seq_nr
|
|
|
|
if(((seq_nr - v - 1) & ACK_NR_MASK) >= (uint16)(cur_window_packets - 1))
|
|
continue;
|
|
|
|
// this counts as a duplicate ack, even though we might have
|
|
// received an ack for this packet previously (in another EACK
|
|
// message for instance)
|
|
bool bit_set = bits >= 0 && mask[bits >> 3] & (1 << (bits & 7));
|
|
|
|
// if this packet is acked, it counts towards the duplicate ack counter
|
|
if(bit_set)
|
|
count++;
|
|
|
|
// ignore bits that represents packets we haven't sent yet
|
|
// or packets that have already been acked
|
|
OutgoingPacket *pkt = (OutgoingPacket *)outbuf.get(v);
|
|
if(!pkt || pkt->transmissions == 0)
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
log(UTP_LOG_DEBUG, "skipping %u. pkt:%08x transmissions:%u %s", v, pkt,
|
|
pkt ? pkt->transmissions : 0,
|
|
pkt ? "(not sent yet?)" : "(already acked?)");
|
|
#endif
|
|
continue;
|
|
}
|
|
|
|
// Count the number of segments that were successfully received past it.
|
|
if(bit_set)
|
|
{
|
|
// the selective ack should never ACK the packet we're waiting for to
|
|
// decrement cur_window_packets
|
|
assert((v & outbuf.mask)
|
|
!= ((seq_nr - cur_window_packets) & outbuf.mask));
|
|
ack_packet(v);
|
|
continue;
|
|
}
|
|
|
|
// Resend segments
|
|
// if count is less than our re-send limit, we haven't seen enough
|
|
// acked packets in front of this one to warrant a re-send.
|
|
// if count == 0, we're still going through the tail of zeroes
|
|
if(((v - fast_resend_seq_nr) & ACK_NR_MASK) <= OUTGOING_BUFFER_MAX_SIZE
|
|
&& count >= DUPLICATE_ACKS_BEFORE_RESEND)
|
|
{
|
|
// resends is a stack, and we're mostly interested in the top of it
|
|
// if we're full, just throw away the lower half
|
|
if(nr >= MAX_EACK - 2)
|
|
{
|
|
memmove(resends, &resends[MAX_EACK / 2],
|
|
MAX_EACK / 2 * sizeof(resends[0]));
|
|
nr -= MAX_EACK / 2;
|
|
}
|
|
resends[nr++] = v;
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
log(UTP_LOG_DEBUG, "no ack for %u", v);
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
log(UTP_LOG_DEBUG,
|
|
"not resending %u count:%d dup_ack:%u fast_resend_seq_nr:%u", v,
|
|
count, duplicate_ack, fast_resend_seq_nr);
|
|
#endif
|
|
}
|
|
} while(--bits >= -1);
|
|
|
|
if(((base - 1 - fast_resend_seq_nr) & ACK_NR_MASK) <= OUTGOING_BUFFER_MAX_SIZE
|
|
&& count >= DUPLICATE_ACKS_BEFORE_RESEND)
|
|
{
|
|
// if we get enough duplicate acks to start
|
|
// resending, the first packet we should resend
|
|
// is base-1
|
|
resends[nr++] = (base - 1) & ACK_NR_MASK;
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
log(UTP_LOG_DEBUG, "no ack for %u", (base - 1) & ACK_NR_MASK);
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
log(UTP_LOG_DEBUG,
|
|
"not resending %u count:%d dup_ack:%u fast_resend_seq_nr:%u", base - 1,
|
|
count, duplicate_ack, fast_resend_seq_nr);
|
|
#endif
|
|
}
|
|
|
|
bool back_off = false;
|
|
int i = 0;
|
|
while(nr > 0)
|
|
{
|
|
uint v = resends[--nr];
|
|
// don't consider the tail of 0:es to be lost packets
|
|
// only unacked packets with acked packets after should
|
|
// be considered lost
|
|
OutgoingPacket *pkt = (OutgoingPacket *)outbuf.get(v);
|
|
|
|
// this may be an old (re-ordered) packet, and some of the
|
|
// packets in here may have been acked already. In which
|
|
// case they will not be in the send queue anymore
|
|
if(!pkt)
|
|
continue;
|
|
|
|
// used in parse_log.py
|
|
log(UTP_LOG_NORMAL, "Packet %u lost. Resending", v);
|
|
|
|
// On Loss
|
|
back_off = true;
|
|
|
|
#ifdef _DEBUG
|
|
++_stats.rexmit;
|
|
#endif
|
|
|
|
send_packet(pkt);
|
|
fast_resend_seq_nr = (v + 1) & ACK_NR_MASK;
|
|
|
|
// Re-send max 4 packets.
|
|
if(++i >= 4)
|
|
break;
|
|
}
|
|
|
|
if(back_off)
|
|
maybe_decay_win(ctx->current_ms);
|
|
|
|
duplicate_ack = count;
|
|
}
|
|
|
|
void
|
|
UTPSocket::apply_ccontrol(size_t bytes_acked, uint32 actual_delay,
|
|
int64 min_rtt)
|
|
{
|
|
// the delay can never be greater than the rtt. The min_rtt
|
|
// variable is the RTT in microseconds
|
|
|
|
assert(min_rtt >= 0);
|
|
int32 our_delay = min< uint32 >(our_hist.get_value(), uint32(min_rtt));
|
|
assert(our_delay != INT_MAX);
|
|
assert(our_delay >= 0);
|
|
|
|
utp_call_on_delay_sample(this->ctx, this, our_delay / 1000);
|
|
|
|
// This test the connection under heavy load from foreground
|
|
// traffic. Pretend that our delays are very high to force the
|
|
// connection to use sub-packet size window sizes
|
|
// our_delay *= 4;
|
|
|
|
// target is microseconds
|
|
int target = target_delay;
|
|
if(target <= 0)
|
|
target = 100000;
|
|
|
|
// this is here to compensate for very large clock drift that affects
|
|
// the congestion controller into giving certain endpoints an unfair
|
|
// share of the bandwidth. We have an estimate of the clock drift
|
|
// (clock_drift). The unit of this is microseconds per 5 seconds.
|
|
// empirically, a reasonable cut-off appears to be about 200000
|
|
// (which is pretty high). The main purpose is to compensate for
|
|
// people trying to "cheat" uTP by making their clock run slower,
|
|
// and this definitely catches that without any risk of false positives
|
|
// if clock_drift < -200000 start applying a penalty delay proportional
|
|
// to how far beoynd -200000 the clock drift is
|
|
int32 penalty = 0;
|
|
if(clock_drift < -200000)
|
|
{
|
|
penalty = (-clock_drift - 200000) / 7;
|
|
our_delay += penalty;
|
|
}
|
|
|
|
double off_target = target - our_delay;
|
|
|
|
// this is the same as:
|
|
//
|
|
// (min(off_target, target) / target) * (bytes_acked / max_window) *
|
|
// MAX_CWND_INCREASE_BYTES_PER_RTT
|
|
//
|
|
// so, it's scaling the max increase by the fraction of the window this ack
|
|
// represents, and the fraction of the target delay the current delay
|
|
// represents. The min() around off_target protects against crazy values of
|
|
// our_delay, which may happen when th timestamps wraps, or by just having a
|
|
// malicious peer sending garbage. This caps the increase of the window size
|
|
// to MAX_CWND_INCREASE_BYTES_PER_RTT per rtt. as for large negative numbers,
|
|
// this direction is already capped at the min packet size further down the
|
|
// min around the bytes_acked protects against the case where the window size
|
|
// was recently shrunk and the number of acked bytes exceeds that. This is
|
|
// considered no more than one full window, in order to keep the gain within
|
|
// sane boundries.
|
|
|
|
assert(bytes_acked > 0);
|
|
double window_factor = (double)min(bytes_acked, max_window)
|
|
/ (double)max(max_window, bytes_acked);
|
|
|
|
double delay_factor = off_target / target;
|
|
double scaled_gain =
|
|
MAX_CWND_INCREASE_BYTES_PER_RTT * window_factor * delay_factor;
|
|
|
|
// since MAX_CWND_INCREASE_BYTES_PER_RTT is a cap on how much the window size
|
|
// (max_window) may increase per RTT, we may not increase the window size more
|
|
// than that proportional to the number of bytes that were acked, so that once
|
|
// one window has been acked (one rtt) the increase limit is not exceeded the
|
|
// +1. is to allow for floating point imprecision
|
|
assert(scaled_gain <= 1.
|
|
+ MAX_CWND_INCREASE_BYTES_PER_RTT
|
|
* (double)min(bytes_acked, max_window)
|
|
/ (double)max(max_window, bytes_acked));
|
|
|
|
if(scaled_gain > 0 && ctx->current_ms - last_maxed_out_window > 1000)
|
|
{
|
|
// if it was more than 1 second since we tried to send a packet
|
|
// and stopped because we hit the max window, we're most likely rate
|
|
// limited (which prevents us from ever hitting the window size)
|
|
// if this is the case, we cannot let the max_window grow indefinitely
|
|
scaled_gain = 0;
|
|
}
|
|
|
|
size_t ledbat_cwnd = (max_window + scaled_gain < MIN_WINDOW_SIZE)
|
|
? MIN_WINDOW_SIZE
|
|
: (size_t)(max_window + scaled_gain);
|
|
|
|
if(slow_start)
|
|
{
|
|
size_t ss_cwnd = (size_t)(max_window + window_factor * get_packet_size());
|
|
if(ss_cwnd > ssthresh)
|
|
{
|
|
slow_start = false;
|
|
}
|
|
else if(our_delay > target * 0.9)
|
|
{
|
|
// even if we're a little under the target delay, we conservatively
|
|
// discontinue the slow start phase
|
|
slow_start = false;
|
|
ssthresh = max_window;
|
|
}
|
|
else
|
|
{
|
|
max_window = max(ss_cwnd, ledbat_cwnd);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
max_window = ledbat_cwnd;
|
|
}
|
|
|
|
// make sure that the congestion window is below max
|
|
// make sure that we don't shrink our window too small
|
|
max_window = clamp< size_t >(max_window, MIN_WINDOW_SIZE, opt_sndbuf);
|
|
|
|
// used in parse_log.py
|
|
log(UTP_LOG_NORMAL,
|
|
"actual_delay:%u our_delay:%d their_delay:%u off_target:%d max_window:%u "
|
|
"delay_base:%u delay_sum:%d target_delay:%d acked_bytes:%u cur_window:%u "
|
|
"scaled_gain:%f rtt:%u rate:%u wnduser:%u rto:%u timeout:%d "
|
|
"get_microseconds:" I64u
|
|
" "
|
|
"cur_window_packets:%u packet_size:%u their_delay_base:%u "
|
|
"their_actual_delay:%u "
|
|
"average_delay:%d clock_drift:%d clock_drift_raw:%d delay_penalty:%d "
|
|
"current_delay_sum:" I64u
|
|
"current_delay_samples:%d average_delay_base:%d "
|
|
"last_maxed_out_window:" I64u
|
|
" opt_sndbuf:%d "
|
|
"current_ms:" I64u "",
|
|
actual_delay, our_delay / 1000, their_hist.get_value() / 1000,
|
|
int(off_target / 1000), uint(max_window), uint32(our_hist.delay_base),
|
|
int((our_delay + their_hist.get_value()) / 1000), int(target / 1000),
|
|
uint(bytes_acked), (uint)(cur_window - bytes_acked), (float)(scaled_gain),
|
|
rtt,
|
|
(uint)(max_window * 1000
|
|
/ (rtt_hist.delay_base ? rtt_hist.delay_base : 50)),
|
|
(uint)max_window_user, rto, (int)(rto_timeout - ctx->current_ms),
|
|
utp_call_get_microseconds(this->ctx, this), cur_window_packets,
|
|
(uint)get_packet_size(), their_hist.delay_base,
|
|
their_hist.delay_base + their_hist.get_value(), average_delay,
|
|
clock_drift, clock_drift_raw, penalty / 1000, current_delay_sum,
|
|
current_delay_samples, average_delay_base, uint64(last_maxed_out_window),
|
|
int(opt_sndbuf), uint64(ctx->current_ms));
|
|
}
|
|
|
|
static void
|
|
utp_register_recv_packet(UTPSocket *conn, size_t len)
|
|
{
|
|
#ifdef _DEBUG
|
|
++conn->_stats.nrecv;
|
|
conn->_stats.nbytes_recv += len;
|
|
#endif
|
|
|
|
if(len <= PACKET_SIZE_MID)
|
|
{
|
|
if(len <= PACKET_SIZE_EMPTY)
|
|
{
|
|
conn->ctx->context_stats._nraw_recv[PACKET_SIZE_EMPTY_BUCKET]++;
|
|
}
|
|
else if(len <= PACKET_SIZE_SMALL)
|
|
{
|
|
conn->ctx->context_stats._nraw_recv[PACKET_SIZE_SMALL_BUCKET]++;
|
|
}
|
|
else
|
|
conn->ctx->context_stats._nraw_recv[PACKET_SIZE_MID_BUCKET]++;
|
|
}
|
|
else
|
|
{
|
|
if(len <= PACKET_SIZE_BIG)
|
|
{
|
|
conn->ctx->context_stats._nraw_recv[PACKET_SIZE_BIG_BUCKET]++;
|
|
}
|
|
else
|
|
conn->ctx->context_stats._nraw_recv[PACKET_SIZE_HUGE_BUCKET]++;
|
|
}
|
|
}
|
|
|
|
// returns the max number of bytes of payload the uTP
|
|
// connection is allowed to send
|
|
size_t
|
|
UTPSocket::get_packet_size() const
|
|
{
|
|
int header_size = sizeof(PacketFormatV1);
|
|
size_t mtu = mtu_last ? mtu_last : mtu_ceiling;
|
|
return mtu - header_size;
|
|
}
|
|
|
|
// Process an incoming packet
|
|
// syn is true if this is the first packet received. It will cut off parsing
|
|
// as soon as the header is done
|
|
size_t
|
|
utp_process_incoming(UTPSocket *conn, const byte *packet, size_t len,
|
|
bool syn = false)
|
|
{
|
|
utp_register_recv_packet(conn, len);
|
|
|
|
conn->ctx->current_ms = utp_call_get_milliseconds(conn->ctx, conn);
|
|
|
|
const PacketFormatV1 *pf1 = (PacketFormatV1 *)packet;
|
|
const byte *packet_end = packet + len;
|
|
|
|
uint16 pk_seq_nr = pf1->seq_nr;
|
|
uint16 pk_ack_nr = pf1->ack_nr;
|
|
uint8 pk_flags = pf1->type();
|
|
|
|
if(pk_flags >= ST_NUM_STATES)
|
|
return 0;
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG,
|
|
"Got %s. seq_nr:%u ack_nr:%u state:%s timestamp:" I64u
|
|
" reply_micro:%u",
|
|
flagnames[pk_flags], pk_seq_nr, pk_ack_nr, statenames[conn->state],
|
|
uint64(pf1->tv_usec), (uint32)(pf1->reply_micro));
|
|
#endif
|
|
|
|
// mark receipt time
|
|
uint64 time = utp_call_get_microseconds(conn->ctx, conn);
|
|
|
|
// window packets size is used to calculate a minimum
|
|
// permissible range for received acks. connections with acks falling
|
|
// out of this range are dropped
|
|
const uint16 curr_window = max< uint16 >(
|
|
conn->cur_window_packets + ACK_NR_ALLOWED_WINDOW, ACK_NR_ALLOWED_WINDOW);
|
|
|
|
// ignore packets whose ack_nr is invalid. This would imply a spoofed address
|
|
// or a malicious attempt to attach the uTP implementation.
|
|
// acking a packet that hasn't been sent yet!
|
|
// SYN packets have an exception, since there are no previous packets
|
|
if((pk_flags != ST_SYN || conn->state != CS_SYN_RECV)
|
|
&& (wrapping_compare_less(conn->seq_nr - 1, pk_ack_nr, ACK_NR_MASK)
|
|
|| wrapping_compare_less(pk_ack_nr, conn->seq_nr - 1 - curr_window,
|
|
ACK_NR_MASK)))
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG,
|
|
"Invalid ack_nr: %u. our seq_nr: %u last unacked: %u", pk_ack_nr,
|
|
conn->seq_nr,
|
|
(conn->seq_nr - conn->cur_window_packets) & ACK_NR_MASK);
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
// RSTs are handled earlier, since the connid matches the send id not the recv
|
|
// id
|
|
assert(pk_flags != ST_RESET);
|
|
|
|
// TODO: maybe send a ST_RESET if we're in CS_RESET?
|
|
|
|
const byte *selack_ptr = NULL;
|
|
|
|
// Unpack UTP packet options
|
|
// Data pointer
|
|
const byte *data = (const byte *)pf1 + conn->get_header_size();
|
|
if(conn->get_header_size() > len)
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG, "Invalid packet size (less than header size)");
|
|
#endif
|
|
|
|
return 0;
|
|
}
|
|
// Skip the extension headers
|
|
uint extension = pf1->ext;
|
|
if(extension != 0)
|
|
{
|
|
do
|
|
{
|
|
// Verify that the packet is valid.
|
|
data += 2;
|
|
|
|
if((int)(packet_end - data) < 0 || (int)(packet_end - data) < data[-1])
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG, "Invalid len of extensions");
|
|
#endif
|
|
|
|
return 0;
|
|
}
|
|
|
|
switch(extension)
|
|
{
|
|
case 1: // Selective Acknowledgment
|
|
selack_ptr = data;
|
|
break;
|
|
case 2: // extension bits
|
|
if(data[-1] != 8)
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG, "Invalid len of extension bits header");
|
|
#endif
|
|
|
|
return 0;
|
|
}
|
|
memcpy(conn->extensions, data, 8);
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG,
|
|
"got extension bits:%02x%02x%02x%02x%02x%02x%02x%02x",
|
|
conn->extensions[0], conn->extensions[1],
|
|
conn->extensions[2], conn->extensions[3],
|
|
conn->extensions[4], conn->extensions[5],
|
|
conn->extensions[6], conn->extensions[7]);
|
|
#endif
|
|
}
|
|
extension = data[-2];
|
|
data += data[-1];
|
|
} while(extension);
|
|
}
|
|
|
|
if(conn->state == CS_SYN_SENT)
|
|
{
|
|
// if this is a syn-ack, initialize our ack_nr
|
|
// to match the sequence number we got from
|
|
// the other end
|
|
conn->ack_nr = (pk_seq_nr - 1) & SEQ_NR_MASK;
|
|
}
|
|
|
|
conn->last_got_packet = conn->ctx->current_ms;
|
|
|
|
if(syn)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
// seqnr is the number of packets past the expected
|
|
// packet this is. ack_nr is the last acked, seq_nr is the
|
|
// current. Subtracring 1 makes 0 mean "this is the next
|
|
// expected packet".
|
|
const uint seqnr = (pk_seq_nr - conn->ack_nr - 1) & SEQ_NR_MASK;
|
|
|
|
// Getting an invalid sequence number?
|
|
if(seqnr >= REORDER_BUFFER_MAX_SIZE)
|
|
{
|
|
if(seqnr >= (SEQ_NR_MASK + 1) - REORDER_BUFFER_MAX_SIZE
|
|
&& pk_flags != ST_STATE)
|
|
{
|
|
conn->schedule_ack();
|
|
}
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG, " Got old Packet/Ack (%u/%u)=%u", pk_seq_nr,
|
|
conn->ack_nr, seqnr);
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
// Process acknowledgment
|
|
// acks is the number of packets that was acked
|
|
int acks =
|
|
(pk_ack_nr - (conn->seq_nr - 1 - conn->cur_window_packets)) & ACK_NR_MASK;
|
|
|
|
// this happens when we receive an old ack nr
|
|
if(acks > conn->cur_window_packets)
|
|
acks = 0;
|
|
|
|
// if we get the same ack_nr as in the last packet
|
|
// increase the duplicate_ack counter, otherwise reset
|
|
// it to 0.
|
|
// It's important to only count ACKs in ST_STATE packets. Any other
|
|
// packet (primarily ST_DATA) is likely to have been sent because of the
|
|
// other end having new outgoing data, not in response to incoming data.
|
|
// For instance, if we're receiving a steady stream of payload with no
|
|
// outgoing data, and we suddently have a few bytes of payload to send (say,
|
|
// a bittorrent HAVE message), we're very likely to see 3 duplicate ACKs
|
|
// immediately after sending our payload packet. This effectively disables
|
|
// the fast-resend on duplicate-ack logic for bi-directional connections
|
|
// (except in the case of a selective ACK). This is in line with BSD4.4 TCP
|
|
// implementation.
|
|
if(conn->cur_window_packets > 0)
|
|
{
|
|
if(pk_ack_nr
|
|
== ((conn->seq_nr - conn->cur_window_packets - 1) & ACK_NR_MASK)
|
|
&& conn->cur_window_packets > 0 && pk_flags == ST_STATE)
|
|
{
|
|
++conn->duplicate_ack;
|
|
if(conn->duplicate_ack == DUPLICATE_ACKS_BEFORE_RESEND
|
|
&& conn->mtu_probe_seq)
|
|
{
|
|
// It's likely that the probe was rejected due to its size, but we
|
|
// haven't got an ICMP report back yet
|
|
if(pk_ack_nr == ((conn->mtu_probe_seq - 1) & ACK_NR_MASK))
|
|
{
|
|
conn->mtu_ceiling = conn->mtu_probe_size - 1;
|
|
conn->mtu_search_update();
|
|
conn->log(UTP_LOG_MTU, "MTU [DUPACK] floor:%d ceiling:%d current:%d",
|
|
conn->mtu_floor, conn->mtu_ceiling, conn->mtu_last);
|
|
}
|
|
else
|
|
{
|
|
// A non-probe was blocked before our probe.
|
|
// Can't conclude much, send a new probe
|
|
conn->mtu_probe_seq = conn->mtu_probe_size = 0;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
conn->duplicate_ack = 0;
|
|
}
|
|
|
|
// TODO: if duplicate_ack == DUPLICATE_ACK_BEFORE_RESEND
|
|
// and fast_resend_seq_nr <= ack_nr + 1
|
|
// resend ack_nr + 1
|
|
// also call maybe_decay_win()
|
|
}
|
|
|
|
// figure out how many bytes were acked
|
|
size_t acked_bytes = 0;
|
|
|
|
// the minimum rtt of all acks
|
|
// this is the upper limit on the delay we get back
|
|
// from the other peer. Our delay cannot exceed
|
|
// the rtt of the packet. If it does, clamp it.
|
|
// this is done in apply_ledbat_ccontrol()
|
|
int64 min_rtt = INT64_MAX;
|
|
|
|
uint64 now = utp_call_get_microseconds(conn->ctx, conn);
|
|
|
|
for(int i = 0; i < acks; ++i)
|
|
{
|
|
int seq = (conn->seq_nr - conn->cur_window_packets + i) & ACK_NR_MASK;
|
|
OutgoingPacket *pkt = (OutgoingPacket *)conn->outbuf.get(seq);
|
|
if(pkt == 0 || pkt->transmissions == 0)
|
|
continue;
|
|
assert((int)(pkt->payload) >= 0);
|
|
acked_bytes += pkt->payload;
|
|
if(conn->mtu_probe_seq && seq == static_cast< int >(conn->mtu_probe_seq))
|
|
{
|
|
conn->mtu_floor = conn->mtu_probe_size;
|
|
conn->mtu_search_update();
|
|
conn->log(UTP_LOG_MTU, "MTU [ACK] floor:%d ceiling:%d current:%d",
|
|
conn->mtu_floor, conn->mtu_ceiling, conn->mtu_last);
|
|
}
|
|
|
|
// in case our clock is not monotonic
|
|
if(pkt->time_sent < now)
|
|
min_rtt = min< int64 >(min_rtt, now - pkt->time_sent);
|
|
else
|
|
min_rtt = min< int64 >(min_rtt, 50000);
|
|
}
|
|
|
|
// count bytes acked by EACK
|
|
if(selack_ptr != NULL)
|
|
{
|
|
acked_bytes += conn->selective_ack_bytes(
|
|
(pk_ack_nr + 2) & ACK_NR_MASK, selack_ptr, selack_ptr[-1], min_rtt);
|
|
}
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG,
|
|
"acks:%d acked_bytes:%u seq_nr:%d cur_window:%u "
|
|
"cur_window_packets:%u relative_seqnr:%u max_window:%u min_rtt:%u "
|
|
"rtt:%u",
|
|
acks, (uint)acked_bytes, conn->seq_nr, (uint)conn->cur_window,
|
|
conn->cur_window_packets, seqnr, (uint)conn->max_window,
|
|
(uint)(min_rtt / 1000), conn->rtt);
|
|
#endif
|
|
|
|
uint64 p = pf1->tv_usec;
|
|
|
|
conn->last_measured_delay = conn->ctx->current_ms;
|
|
|
|
// get delay in both directions
|
|
// record the delay to report back
|
|
const uint32 their_delay = (uint32)(p == 0 ? 0 : time - p);
|
|
conn->reply_micro = their_delay;
|
|
uint32 prev_delay_base = conn->their_hist.delay_base;
|
|
if(their_delay != 0)
|
|
conn->their_hist.add_sample(their_delay, conn->ctx->current_ms);
|
|
|
|
// if their new delay base is less than their previous one
|
|
// we should shift our delay base in the other direction in order
|
|
// to take the clock skew into account
|
|
if(prev_delay_base != 0
|
|
&& wrapping_compare_less(conn->their_hist.delay_base, prev_delay_base,
|
|
TIMESTAMP_MASK))
|
|
{
|
|
// never adjust more than 10 milliseconds
|
|
if(prev_delay_base - conn->their_hist.delay_base <= 10000)
|
|
{
|
|
conn->our_hist.shift(prev_delay_base - conn->their_hist.delay_base);
|
|
}
|
|
}
|
|
|
|
const uint32 actual_delay =
|
|
(uint32(pf1->reply_micro) == INT_MAX ? 0 : uint32(pf1->reply_micro));
|
|
|
|
// if the actual delay is 0, it means the other end
|
|
// hasn't received a sample from us yet, and doesn't
|
|
// know what it is. We can't update out history unless
|
|
// we have a true measured sample
|
|
if(actual_delay != 0)
|
|
{
|
|
conn->our_hist.add_sample(actual_delay, conn->ctx->current_ms);
|
|
|
|
// this is keeping an average of the delay samples
|
|
// we've recevied within the last 5 seconds. We sum
|
|
// all the samples and increase the count in order to
|
|
// calculate the average every 5 seconds. The samples
|
|
// are based off of the average_delay_base to deal with
|
|
// wrapping counters.
|
|
if(conn->average_delay_base == 0)
|
|
conn->average_delay_base = actual_delay;
|
|
int64 average_delay_sample = 0;
|
|
// distance walking from lhs to rhs, downwards
|
|
const uint32 dist_down = conn->average_delay_base - actual_delay;
|
|
// distance walking from lhs to rhs, upwards
|
|
const uint32 dist_up = actual_delay - conn->average_delay_base;
|
|
|
|
if(dist_down > dist_up)
|
|
{
|
|
// assert(dist_up < INT_MAX / 4);
|
|
// average_delay_base < actual_delay, we should end up
|
|
// with a positive sample
|
|
average_delay_sample = dist_up;
|
|
}
|
|
else
|
|
{
|
|
// assert(-int64(dist_down) < INT_MAX / 4);
|
|
// average_delay_base >= actual_delay, we should end up
|
|
// with a negative sample
|
|
average_delay_sample = -int64(dist_down);
|
|
}
|
|
conn->current_delay_sum += average_delay_sample;
|
|
++conn->current_delay_samples;
|
|
|
|
if(conn->ctx->current_ms > conn->average_sample_time)
|
|
{
|
|
int32 prev_average_delay = conn->average_delay;
|
|
|
|
assert(conn->current_delay_sum / conn->current_delay_samples < INT_MAX);
|
|
assert(conn->current_delay_sum / conn->current_delay_samples > -INT_MAX);
|
|
// write the new average
|
|
conn->average_delay =
|
|
(int32)(conn->current_delay_sum / conn->current_delay_samples);
|
|
// each slot represents 5 seconds
|
|
conn->average_sample_time += 5000;
|
|
|
|
conn->current_delay_sum = 0;
|
|
conn->current_delay_samples = 0;
|
|
|
|
// this makes things very confusing when logging the average delay
|
|
//#if !g_log_utp
|
|
// normalize the average samples
|
|
// since we're only interested in the slope
|
|
// of the curve formed by the average delay samples,
|
|
// we can cancel out the actual offset to make sure
|
|
// we won't have problems with wrapping.
|
|
int min_sample = min(prev_average_delay, conn->average_delay);
|
|
int max_sample = max(prev_average_delay, conn->average_delay);
|
|
|
|
// normalize around zero. Try to keep the min <= 0 and max >= 0
|
|
int adjust = 0;
|
|
if(min_sample > 0)
|
|
{
|
|
// adjust all samples (and the baseline) down by min_sample
|
|
adjust = -min_sample;
|
|
}
|
|
else if(max_sample < 0)
|
|
{
|
|
// adjust all samples (and the baseline) up by -max_sample
|
|
adjust = -max_sample;
|
|
}
|
|
if(adjust)
|
|
{
|
|
conn->average_delay_base -= adjust;
|
|
conn->average_delay += adjust;
|
|
prev_average_delay += adjust;
|
|
}
|
|
//#endif
|
|
|
|
// update the clock drift estimate
|
|
// the unit is microseconds per 5 seconds
|
|
// what we're doing is just calculating the average of the
|
|
// difference between each slot. Since each slot is 5 seconds
|
|
// and the timestamps unit are microseconds, we'll end up with
|
|
// the average slope across our history. If there is a consistent
|
|
// trend, it will show up in this value
|
|
|
|
// int64 slope = 0;
|
|
int32 drift = conn->average_delay - prev_average_delay;
|
|
|
|
// clock_drift is a rolling average
|
|
conn->clock_drift = (int64(conn->clock_drift) * 7 + drift) / 8;
|
|
conn->clock_drift_raw = drift;
|
|
}
|
|
}
|
|
|
|
// if our new delay base is less than our previous one
|
|
// we should shift the other end's delay base in the other
|
|
// direction in order to take the clock skew into account
|
|
// This is commented out because it creates bad interactions
|
|
// with our adjustment in the other direction. We don't really
|
|
// need our estimates of the other peer to be very accurate
|
|
// anyway. The problem with shifting here is that we're more
|
|
// likely shift it back later because of a low latency. This
|
|
// second shift back would cause us to shift our delay base
|
|
// which then get's into a death spiral of shifting delay bases
|
|
/* if (prev_delay_base != 0 &&
|
|
wrapping_compare_less(conn->our_hist.delay_base,
|
|
prev_delay_base)) {
|
|
// never adjust more than 10 milliseconds
|
|
if (prev_delay_base - conn->our_hist.delay_base <= 10000) {
|
|
conn->their_hist.Shift(prev_delay_base -
|
|
conn->our_hist.delay_base);
|
|
}
|
|
}
|
|
*/
|
|
|
|
// if the delay estimate exceeds the RTT, adjust the base_delay to
|
|
// compensate
|
|
assert(min_rtt >= 0);
|
|
if(int64(conn->our_hist.get_value()) > min_rtt)
|
|
{
|
|
conn->our_hist.shift((uint32)(conn->our_hist.get_value() - min_rtt));
|
|
}
|
|
|
|
// only apply the congestion controller on acks
|
|
// if we don't have a delay measurement, there's
|
|
// no point in invoking the congestion control
|
|
if(actual_delay != 0 && acked_bytes >= 1)
|
|
conn->apply_ccontrol(acked_bytes, actual_delay, min_rtt);
|
|
|
|
// sanity check, the other end should never ack packets
|
|
// past the point we've sent
|
|
if(acks <= conn->cur_window_packets)
|
|
{
|
|
conn->max_window_user = pf1->windowsize;
|
|
|
|
// If max user window is set to 0, then we startup a timer
|
|
// That will reset it to 1 after 15 seconds.
|
|
if(conn->max_window_user == 0)
|
|
// Reset max_window_user to 1 every 15 seconds.
|
|
conn->zerowindow_time = conn->ctx->current_ms + 15000;
|
|
|
|
// Respond to connect message
|
|
// Switch to CONNECTED state.
|
|
// If this is an ack and we're in still handshaking
|
|
// transition over to the connected state.
|
|
|
|
// Incoming connection completion
|
|
if(pk_flags == ST_DATA && conn->state == CS_SYN_RECV)
|
|
{
|
|
conn->state = CS_CONNECTED;
|
|
}
|
|
|
|
// Outgoing connection completion
|
|
if(pk_flags == ST_STATE && conn->state == CS_SYN_SENT)
|
|
{
|
|
conn->state = CS_CONNECTED;
|
|
|
|
// If the user has defined the ON_CONNECT callback, use that to
|
|
// notify the user that the socket is now connected. If ON_CONNECT
|
|
// has not been defined, notify the user via ON_STATE_CHANGE.
|
|
if(conn->ctx->callbacks[UTP_ON_CONNECT])
|
|
utp_call_on_connect(conn->ctx, conn);
|
|
else
|
|
utp_call_on_state_change(conn->ctx, conn, UTP_STATE_CONNECT);
|
|
|
|
// We've sent a fin, and everything was ACKed (including the FIN).
|
|
// cur_window_packets == acks means that this packet acked all
|
|
// the remaining packets that were in-flight.
|
|
}
|
|
else if(conn->fin_sent && conn->cur_window_packets == acks)
|
|
{
|
|
conn->fin_sent_acked = true;
|
|
if(conn->close_requested)
|
|
{
|
|
conn->state = CS_DESTROY;
|
|
}
|
|
}
|
|
|
|
// Update fast resend counter
|
|
if(wrapping_compare_less(conn->fast_resend_seq_nr,
|
|
(pk_ack_nr + 1) & ACK_NR_MASK, ACK_NR_MASK))
|
|
conn->fast_resend_seq_nr = (pk_ack_nr + 1) & ACK_NR_MASK;
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG, "fast_resend_seq_nr:%u", conn->fast_resend_seq_nr);
|
|
#endif
|
|
|
|
for(int i = 0; i < acks; ++i)
|
|
{
|
|
int ack_status =
|
|
conn->ack_packet(conn->seq_nr - conn->cur_window_packets);
|
|
// if ack_status is 0, the packet was acked.
|
|
// if acl_stauts is 1, it means that the packet had already been acked
|
|
// if it's 2, the packet has not been sent yet
|
|
// We need to break this loop in the latter case. This could potentially
|
|
// happen if we get an ack_nr that does not exceed what we have stuffed
|
|
// into the outgoing buffer, but does exceed what we have sent
|
|
if(ack_status == 2)
|
|
{
|
|
#ifdef _DEBUG
|
|
OutgoingPacket *pkt = (OutgoingPacket *)conn->outbuf.get(
|
|
conn->seq_nr - conn->cur_window_packets);
|
|
assert(pkt->transmissions == 0);
|
|
#endif
|
|
|
|
break;
|
|
}
|
|
conn->cur_window_packets--;
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG, "decementing cur_window_packets:%u",
|
|
conn->cur_window_packets);
|
|
#endif
|
|
}
|
|
|
|
#ifdef _DEBUG
|
|
if(conn->cur_window_packets == 0)
|
|
assert(conn->cur_window == 0);
|
|
#endif
|
|
|
|
// packets in front of this may have been acked by a
|
|
// selective ack (EACK). Keep decreasing the window packet size
|
|
// until we hit a packet that is still waiting to be acked
|
|
// in the send queue
|
|
// this is especially likely to happen when the other end
|
|
// has the EACK send bug older versions of uTP had
|
|
while(conn->cur_window_packets > 0
|
|
&& !conn->outbuf.get(conn->seq_nr - conn->cur_window_packets))
|
|
{
|
|
conn->cur_window_packets--;
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG, "decementing cur_window_packets:%u",
|
|
conn->cur_window_packets);
|
|
#endif
|
|
}
|
|
|
|
#ifdef _DEBUG
|
|
if(conn->cur_window_packets == 0)
|
|
assert(conn->cur_window == 0);
|
|
#endif
|
|
|
|
// this invariant should always be true
|
|
assert(conn->cur_window_packets == 0
|
|
|| conn->outbuf.get(conn->seq_nr - conn->cur_window_packets));
|
|
|
|
// flush Nagle
|
|
if(conn->cur_window_packets == 1)
|
|
{
|
|
OutgoingPacket *pkt =
|
|
(OutgoingPacket *)conn->outbuf.get(conn->seq_nr - 1);
|
|
// do we still have quota?
|
|
if(pkt->transmissions == 0)
|
|
{
|
|
conn->send_packet(pkt);
|
|
}
|
|
}
|
|
|
|
// Fast timeout-retry
|
|
if(conn->fast_timeout)
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG, "Fast timeout %u,%u,%u?", (uint)conn->cur_window,
|
|
conn->seq_nr - conn->timeout_seq_nr, conn->timeout_seq_nr);
|
|
#endif
|
|
|
|
// if the fast_resend_seq_nr is not pointing to the oldest outstanding
|
|
// packet, it suggests that we've already resent the packet that timed
|
|
// out, and we should leave the fast-timeout mode.
|
|
if(((conn->seq_nr - conn->cur_window_packets) & ACK_NR_MASK)
|
|
!= conn->fast_resend_seq_nr)
|
|
{
|
|
conn->fast_timeout = false;
|
|
}
|
|
else
|
|
{
|
|
// resend the oldest packet and increment fast_resend_seq_nr
|
|
// to not allow another fast resend on it again
|
|
OutgoingPacket *pkt = (OutgoingPacket *)conn->outbuf.get(
|
|
conn->seq_nr - conn->cur_window_packets);
|
|
if(pkt && pkt->transmissions > 0)
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG, "Packet %u fast timeout-retry.",
|
|
conn->seq_nr - conn->cur_window_packets);
|
|
#endif
|
|
|
|
#ifdef _DEBUG
|
|
++conn->_stats.fastrexmit;
|
|
#endif
|
|
|
|
conn->fast_resend_seq_nr++;
|
|
conn->send_packet(pkt);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Process selective acknowledgent
|
|
if(selack_ptr != NULL)
|
|
{
|
|
conn->selective_ack(pk_ack_nr + 2, selack_ptr, selack_ptr[-1]);
|
|
}
|
|
|
|
// this invariant should always be true
|
|
assert(conn->cur_window_packets == 0
|
|
|| conn->outbuf.get(conn->seq_nr - conn->cur_window_packets));
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG,
|
|
"acks:%d acked_bytes:%u seq_nr:%u cur_window:%u "
|
|
"cur_window_packets:%u ",
|
|
acks, (uint)acked_bytes, conn->seq_nr, (uint)conn->cur_window,
|
|
conn->cur_window_packets);
|
|
#endif
|
|
|
|
// In case the ack dropped the current window below
|
|
// the max_window size, Mark the socket as writable
|
|
if(conn->state == CS_CONNECTED_FULL && !conn->is_full())
|
|
{
|
|
conn->state = CS_CONNECTED;
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG,
|
|
"Socket writable. max_window:%u cur_window:%u packet_size:%u",
|
|
(uint)conn->max_window, (uint)conn->cur_window,
|
|
(uint)conn->get_packet_size());
|
|
#endif
|
|
utp_call_on_state_change(conn->ctx, conn, UTP_STATE_WRITABLE);
|
|
}
|
|
|
|
if(pk_flags == ST_STATE)
|
|
{
|
|
// This is a state packet only.
|
|
return 0;
|
|
}
|
|
|
|
// The connection is not in a state that can accept data?
|
|
if(conn->state != CS_CONNECTED && conn->state != CS_CONNECTED_FULL)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
// Is this a finalize packet?
|
|
if(pk_flags == ST_FIN && !conn->got_fin)
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG, "Got FIN eof_pkt:%u", pk_seq_nr);
|
|
#endif
|
|
|
|
conn->got_fin = true;
|
|
conn->eof_pkt = pk_seq_nr;
|
|
// at this point, it is possible for the
|
|
// other end to have sent packets with
|
|
// sequence numbers higher than seq_nr.
|
|
// if this is the case, our reorder_count
|
|
// is out of sync. This case is dealt with
|
|
// when we re-order and hit the eof_pkt.
|
|
// we'll just ignore any packets with
|
|
// sequence numbers past this
|
|
}
|
|
|
|
// Getting an in-order packet?
|
|
if(seqnr == 0)
|
|
{
|
|
size_t count = packet_end - data;
|
|
if(count > 0 && !conn->read_shutdown)
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG, "Got Data len:%u (rb:%u)", (uint)count,
|
|
(uint)utp_call_get_read_buffer_size(conn->ctx, conn));
|
|
#endif
|
|
|
|
// Post bytes to the upper layer
|
|
utp_call_on_read(conn->ctx, conn, data, count);
|
|
}
|
|
conn->ack_nr++;
|
|
|
|
// Check if the next packet has been received too, but waiting
|
|
// in the reorder buffer.
|
|
for(;;)
|
|
{
|
|
if(!conn->got_fin_reached && conn->got_fin
|
|
&& conn->eof_pkt == conn->ack_nr)
|
|
{
|
|
conn->got_fin_reached = true;
|
|
conn->rto_timeout =
|
|
conn->ctx->current_ms + min< uint >(conn->rto * 3, 60);
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG, "Posting EOF");
|
|
#endif
|
|
|
|
utp_call_on_state_change(conn->ctx, conn, UTP_STATE_EOF);
|
|
|
|
// if the other end wants to close, ack
|
|
conn->send_ack();
|
|
|
|
// reorder_count is not necessarily 0 at this point.
|
|
// even though it is most of the time, the other end
|
|
// may have sent packets with higher sequence numbers
|
|
// than what later end up being eof_pkt
|
|
// since we have received all packets up to eof_pkt
|
|
// just ignore the ones after it.
|
|
conn->reorder_count = 0;
|
|
}
|
|
|
|
// Quick get-out in case there is nothing to reorder
|
|
if(conn->reorder_count == 0)
|
|
break;
|
|
|
|
// Check if there are additional buffers in the reorder buffers
|
|
// that need delivery.
|
|
byte *p = (byte *)conn->inbuf.get(conn->ack_nr + 1);
|
|
if(p == NULL)
|
|
break;
|
|
conn->inbuf.put(conn->ack_nr + 1, NULL);
|
|
count = *(uint *)p;
|
|
if(count > 0 && !conn->read_shutdown)
|
|
{
|
|
// Pass the bytes to the upper layer
|
|
utp_call_on_read(conn->ctx, conn, p + sizeof(uint), count);
|
|
}
|
|
conn->ack_nr++;
|
|
|
|
// Free the element from the reorder buffer
|
|
free(p);
|
|
assert(conn->reorder_count > 0);
|
|
conn->reorder_count--;
|
|
}
|
|
|
|
conn->schedule_ack();
|
|
}
|
|
else
|
|
{
|
|
// Getting an out of order packet.
|
|
// The packet needs to be remembered and rearranged later.
|
|
|
|
// if we have received a FIN packet, and the EOF-sequence number
|
|
// is lower than the sequence number of the packet we just received
|
|
// something is wrong.
|
|
if(conn->got_fin && pk_seq_nr > conn->eof_pkt)
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG,
|
|
"Got an invalid packet sequence number, past EOF "
|
|
"reorder_count:%u len:%u (rb:%u)",
|
|
conn->reorder_count, (uint)(packet_end - data),
|
|
(uint)utp_call_get_read_buffer_size(conn->ctx, conn));
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
// if the sequence number is entirely off the expected
|
|
// one, just drop it. We can't allocate buffer space in
|
|
// the inbuf entirely based on untrusted input
|
|
if(seqnr > 0x3ff)
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG,
|
|
"0x%08x: Got an invalid packet sequence number, too far off "
|
|
"reorder_count:%u len:%u (rb:%u)",
|
|
conn->reorder_count, (uint)(packet_end - data),
|
|
(uint)utp_call_get_read_buffer_size(conn->ctx, conn));
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
// we need to grow the circle buffer before we
|
|
// check if the packet is already in here, so that
|
|
// we don't end up looking at an older packet (since
|
|
// the indices wraps around).
|
|
conn->inbuf.ensure_size(pk_seq_nr + 1, seqnr + 1);
|
|
|
|
// Has this packet already been received? (i.e. a duplicate)
|
|
// If that is the case, just discard it.
|
|
if(conn->inbuf.get(pk_seq_nr) != NULL)
|
|
{
|
|
#ifdef _DEBUG
|
|
++conn->_stats.nduprecv;
|
|
#endif
|
|
|
|
return 0;
|
|
}
|
|
|
|
// Allocate memory to fit the packet that needs to re-ordered
|
|
byte *mem = (byte *)malloc((packet_end - data) + sizeof(uint));
|
|
*(uint *)mem = (uint)(packet_end - data);
|
|
memcpy(mem + sizeof(uint), data, packet_end - data);
|
|
|
|
// Insert into reorder buffer and increment the count
|
|
// of # of packets to be reordered.
|
|
// we add one to seqnr in order to leave the last
|
|
// entry empty, that way the assert in send_ack
|
|
// is valid. we have to add one to seqnr too, in order
|
|
// to make the circular buffer grow around the correct
|
|
// point (which is conn->ack_nr + 1).
|
|
assert(conn->inbuf.get(pk_seq_nr) == NULL);
|
|
assert((pk_seq_nr & conn->inbuf.mask)
|
|
!= ((conn->ack_nr + 1) & conn->inbuf.mask));
|
|
conn->inbuf.put(pk_seq_nr, mem);
|
|
conn->reorder_count++;
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG,
|
|
"0x%08x: Got out of order data reorder_count:%u len:%u (rb:%u)",
|
|
conn->reorder_count, (uint)(packet_end - data),
|
|
(uint)utp_call_get_read_buffer_size(conn->ctx, conn));
|
|
#endif
|
|
|
|
conn->schedule_ack();
|
|
}
|
|
|
|
return (size_t)(packet_end - data);
|
|
}
|
|
|
|
inline byte
|
|
UTP_Version(PacketFormatV1 const *pf)
|
|
{
|
|
return (pf->type() < ST_NUM_STATES && pf->ext < 3 ? pf->version() : 0);
|
|
}
|
|
|
|
UTPSocket::~UTPSocket()
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
log(UTP_LOG_DEBUG, "Killing socket");
|
|
#endif
|
|
|
|
utp_call_on_state_change(ctx, this, UTP_STATE_DESTROYING);
|
|
|
|
if(ctx->last_utp_socket == this)
|
|
{
|
|
ctx->last_utp_socket = NULL;
|
|
}
|
|
|
|
// Remove object from the global hash table
|
|
UTPSocketKeyData *kd =
|
|
ctx->utp_sockets->Delete(UTPSocketKey(addr, conn_id_recv));
|
|
assert(kd);
|
|
(void)kd;
|
|
// remove the socket from ack_sockets if it was there also
|
|
removeSocketFromAckList(this);
|
|
|
|
// Free all memory occupied by the socket object.
|
|
for(size_t i = 0; i <= inbuf.mask; i++)
|
|
{
|
|
free(inbuf.elements[i]);
|
|
}
|
|
for(size_t i = 0; i <= outbuf.mask; i++)
|
|
{
|
|
free(outbuf.elements[i]);
|
|
}
|
|
// TODO: The circular buffer should have a destructor
|
|
free(inbuf.elements);
|
|
free(outbuf.elements);
|
|
}
|
|
|
|
void
|
|
UTP_FreeAll(struct UTPSocketHT *utp_sockets)
|
|
{
|
|
utp_hash_iterator_t it;
|
|
UTPSocketKeyData *keyData;
|
|
while((keyData = utp_sockets->Iterate(it)))
|
|
{
|
|
delete keyData->socket;
|
|
}
|
|
}
|
|
|
|
void
|
|
utp_initialize_socket(utp_socket *conn, const struct sockaddr *addr,
|
|
socklen_t addrlen, bool need_seed_gen, uint32 conn_seed,
|
|
uint32 conn_id_recv, uint32 conn_id_send)
|
|
{
|
|
PackedSockAddr psaddr =
|
|
PackedSockAddr((const SOCKADDR_STORAGE *)addr, addrlen);
|
|
|
|
if(need_seed_gen)
|
|
{
|
|
do
|
|
{
|
|
conn_seed = utp_call_get_random(conn->ctx, conn);
|
|
// we identify v1 and higher by setting the first two bytes to 0x0001
|
|
conn_seed &= 0xffff;
|
|
} while(conn->ctx->utp_sockets->Lookup(UTPSocketKey(psaddr, conn_seed)));
|
|
|
|
conn_id_recv += conn_seed;
|
|
conn_id_send += conn_seed;
|
|
}
|
|
|
|
conn->state = CS_IDLE;
|
|
conn->conn_seed = conn_seed;
|
|
conn->conn_id_recv = conn_id_recv;
|
|
conn->conn_id_send = conn_id_send;
|
|
conn->addr = psaddr;
|
|
conn->ctx->current_ms = utp_call_get_milliseconds(conn->ctx, NULL);
|
|
conn->last_got_packet = conn->ctx->current_ms;
|
|
conn->last_sent_packet = conn->ctx->current_ms;
|
|
conn->last_measured_delay = conn->ctx->current_ms + 0x70000000;
|
|
conn->average_sample_time = conn->ctx->current_ms + 5000;
|
|
conn->last_rwin_decay = conn->ctx->current_ms - MAX_WINDOW_DECAY;
|
|
|
|
conn->our_hist.clear(conn->ctx->current_ms);
|
|
conn->their_hist.clear(conn->ctx->current_ms);
|
|
conn->rtt_hist.clear(conn->ctx->current_ms);
|
|
|
|
// initialize MTU floor and ceiling
|
|
conn->mtu_reset();
|
|
conn->mtu_last = conn->mtu_ceiling;
|
|
|
|
conn->ctx->utp_sockets->Add(UTPSocketKey(conn->addr, conn->conn_id_recv))
|
|
->socket = conn;
|
|
|
|
// we need to fit one packet in the window when we start the connection
|
|
conn->max_window = conn->get_packet_size();
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG, "UTP socket initialized");
|
|
#endif
|
|
}
|
|
|
|
utp_socket *
|
|
utp_create_socket(utp_context *ctx)
|
|
{
|
|
assert(ctx);
|
|
if(!ctx)
|
|
return NULL;
|
|
|
|
UTPSocket *conn = new UTPSocket; // TODO: UTPSocket should have a constructor
|
|
|
|
conn->state = CS_UNINITIALIZED;
|
|
conn->ctx = ctx;
|
|
conn->userdata = NULL;
|
|
conn->reorder_count = 0;
|
|
conn->duplicate_ack = 0;
|
|
conn->timeout_seq_nr = 0;
|
|
conn->last_rcv_win = 0;
|
|
conn->got_fin = false;
|
|
conn->got_fin_reached = false;
|
|
conn->fin_sent = false;
|
|
conn->fin_sent_acked = false;
|
|
conn->read_shutdown = false;
|
|
conn->close_requested = false;
|
|
conn->fast_timeout = false;
|
|
conn->rtt = 0;
|
|
conn->retransmit_timeout = 0;
|
|
conn->rto_timeout = 0;
|
|
conn->zerowindow_time = 0;
|
|
conn->average_delay = 0;
|
|
conn->current_delay_samples = 0;
|
|
conn->cur_window = 0;
|
|
conn->eof_pkt = 0;
|
|
conn->last_maxed_out_window = 0;
|
|
conn->mtu_probe_seq = 0;
|
|
conn->mtu_probe_size = 0;
|
|
conn->current_delay_sum = 0;
|
|
conn->average_delay_base = 0;
|
|
conn->retransmit_count = 0;
|
|
conn->rto = 3000;
|
|
conn->rtt_var = 800;
|
|
conn->seq_nr = 1;
|
|
conn->ack_nr = 0;
|
|
conn->max_window_user = 255 * PACKET_SIZE;
|
|
conn->cur_window_packets = 0;
|
|
conn->fast_resend_seq_nr = conn->seq_nr;
|
|
conn->target_delay = ctx->target_delay;
|
|
conn->reply_micro = 0;
|
|
conn->opt_sndbuf = ctx->opt_sndbuf;
|
|
conn->opt_rcvbuf = ctx->opt_rcvbuf;
|
|
conn->slow_start = true;
|
|
conn->ssthresh = conn->opt_sndbuf;
|
|
conn->clock_drift = 0;
|
|
conn->clock_drift_raw = 0;
|
|
conn->outbuf.mask = 15;
|
|
conn->inbuf.mask = 15;
|
|
conn->outbuf.elements = (void **)calloc(16, sizeof(void *));
|
|
conn->inbuf.elements = (void **)calloc(16, sizeof(void *));
|
|
conn->ida = -1; // set the index of every new socket in ack_sockets to
|
|
// -1, which also means it is not in ack_sockets yet
|
|
|
|
memset(conn->extensions, 0, sizeof(conn->extensions));
|
|
|
|
#ifdef _DEBUG
|
|
memset(&conn->_stats, 0, sizeof(utp_socket_stats));
|
|
#endif
|
|
|
|
return conn;
|
|
}
|
|
|
|
int
|
|
utp_context_set_option(utp_context *ctx, int opt, int val)
|
|
{
|
|
assert(ctx);
|
|
if(!ctx)
|
|
return -1;
|
|
|
|
switch(opt)
|
|
{
|
|
case UTP_LOG_NORMAL:
|
|
ctx->log_normal = val ? true : false;
|
|
return 0;
|
|
|
|
case UTP_LOG_MTU:
|
|
ctx->log_mtu = val ? true : false;
|
|
return 0;
|
|
|
|
case UTP_LOG_DEBUG:
|
|
ctx->log_debug = val ? true : false;
|
|
return 0;
|
|
|
|
case UTP_TARGET_DELAY:
|
|
ctx->target_delay = val;
|
|
return 0;
|
|
|
|
case UTP_SNDBUF:
|
|
assert(val >= 1);
|
|
ctx->opt_sndbuf = val;
|
|
return 0;
|
|
|
|
case UTP_RCVBUF:
|
|
assert(val >= 1);
|
|
ctx->opt_rcvbuf = val;
|
|
return 0;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
int
|
|
utp_context_get_option(utp_context *ctx, int opt)
|
|
{
|
|
assert(ctx);
|
|
if(!ctx)
|
|
return -1;
|
|
|
|
switch(opt)
|
|
{
|
|
case UTP_LOG_NORMAL:
|
|
return ctx->log_normal ? 1 : 0;
|
|
case UTP_LOG_MTU:
|
|
return ctx->log_mtu ? 1 : 0;
|
|
case UTP_LOG_DEBUG:
|
|
return ctx->log_debug ? 1 : 0;
|
|
case UTP_TARGET_DELAY:
|
|
return ctx->target_delay;
|
|
case UTP_SNDBUF:
|
|
return ctx->opt_sndbuf;
|
|
case UTP_RCVBUF:
|
|
return ctx->opt_rcvbuf;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
int
|
|
utp_setsockopt(UTPSocket *conn, int opt, int val)
|
|
{
|
|
assert(conn);
|
|
if(!conn)
|
|
return -1;
|
|
|
|
switch(opt)
|
|
{
|
|
case UTP_SNDBUF:
|
|
assert(val >= 1);
|
|
conn->opt_sndbuf = val;
|
|
return 0;
|
|
|
|
case UTP_RCVBUF:
|
|
assert(val >= 1);
|
|
conn->opt_rcvbuf = val;
|
|
return 0;
|
|
|
|
case UTP_TARGET_DELAY:
|
|
conn->target_delay = val;
|
|
return 0;
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
int
|
|
utp_getsockopt(UTPSocket *conn, int opt)
|
|
{
|
|
assert(conn);
|
|
if(!conn)
|
|
return -1;
|
|
|
|
switch(opt)
|
|
{
|
|
case UTP_SNDBUF:
|
|
return conn->opt_sndbuf;
|
|
case UTP_RCVBUF:
|
|
return conn->opt_rcvbuf;
|
|
case UTP_TARGET_DELAY:
|
|
return conn->target_delay;
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
// Try to connect to a specified host.
|
|
int
|
|
utp_connect(utp_socket *conn, const struct sockaddr *to, socklen_t tolen)
|
|
{
|
|
assert(conn);
|
|
if(!conn)
|
|
return -1;
|
|
|
|
assert(conn->state == CS_UNINITIALIZED);
|
|
if(conn->state != CS_UNINITIALIZED)
|
|
{
|
|
conn->state = CS_DESTROY;
|
|
return -1;
|
|
}
|
|
|
|
utp_initialize_socket(conn, to, tolen, true, 0, 0, 1);
|
|
|
|
assert(conn->cur_window_packets == 0);
|
|
assert(conn->outbuf.get(conn->seq_nr) == NULL);
|
|
assert(sizeof(PacketFormatV1) == 20);
|
|
|
|
conn->state = CS_SYN_SENT;
|
|
conn->ctx->current_ms = utp_call_get_milliseconds(conn->ctx, conn);
|
|
|
|
// Create and send a connect message
|
|
|
|
// used in parse_log.py
|
|
conn->log(UTP_LOG_NORMAL,
|
|
"UTP_Connect conn_seed:%u packet_size:%u (B) "
|
|
"target_delay:%u (ms) delay_history:%u "
|
|
"delay_base_history:%u (minutes)",
|
|
conn->conn_seed, PACKET_SIZE, conn->target_delay / 1000,
|
|
CUR_DELAY_SIZE, DELAY_BASE_HISTORY);
|
|
|
|
// Setup initial timeout timer.
|
|
conn->retransmit_timeout = 3000;
|
|
conn->rto_timeout = conn->ctx->current_ms + conn->retransmit_timeout;
|
|
conn->last_rcv_win = conn->get_rcv_window();
|
|
|
|
// if you need compatibiltiy with 1.8.1, use this. it increases attackability
|
|
// though.
|
|
// conn->seq_nr = 1;
|
|
conn->seq_nr = utp_call_get_random(conn->ctx, conn);
|
|
|
|
// Create the connect packet.
|
|
const size_t header_size = sizeof(PacketFormatV1);
|
|
|
|
OutgoingPacket *pkt =
|
|
(OutgoingPacket *)malloc(sizeof(OutgoingPacket) - 1 + header_size);
|
|
PacketFormatV1 *p1 = (PacketFormatV1 *)pkt->data;
|
|
|
|
memset(p1, 0, header_size);
|
|
// SYN packets are special, and have the receive ID in the connid field,
|
|
// instead of conn_id_send.
|
|
p1->set_version(1);
|
|
p1->set_type(ST_SYN);
|
|
p1->ext = 0;
|
|
p1->connid = conn->conn_id_recv;
|
|
p1->windowsize = (uint32)conn->last_rcv_win;
|
|
p1->seq_nr = conn->seq_nr;
|
|
pkt->transmissions = 0;
|
|
pkt->length = header_size;
|
|
pkt->payload = 0;
|
|
|
|
/*
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG, "Sending connect %s [%u].",
|
|
addrfmt(conn->addr, addrbuf), conn_seed);
|
|
#endif
|
|
*/
|
|
|
|
// Remember the message in the outgoing queue.
|
|
conn->outbuf.ensure_size(conn->seq_nr, conn->cur_window_packets);
|
|
conn->outbuf.put(conn->seq_nr, pkt);
|
|
conn->seq_nr++;
|
|
conn->cur_window_packets++;
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG, "incrementing cur_window_packets:%u",
|
|
conn->cur_window_packets);
|
|
#endif
|
|
|
|
conn->send_packet(pkt);
|
|
return 0;
|
|
}
|
|
|
|
// Returns 1 if the UDP payload was recognized as a UTP packet, or 0 if it was
|
|
// not
|
|
int
|
|
utp_process_udp(utp_context *ctx, const byte *buffer, size_t len,
|
|
const struct sockaddr *to, socklen_t tolen)
|
|
{
|
|
assert(ctx);
|
|
if(!ctx)
|
|
return 0;
|
|
|
|
assert(buffer);
|
|
if(!buffer)
|
|
return 0;
|
|
|
|
assert(to);
|
|
if(!to)
|
|
return 0;
|
|
|
|
const PackedSockAddr addr((const SOCKADDR_STORAGE *)to, tolen);
|
|
|
|
if(len < sizeof(PacketFormatV1))
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
ctx->log(UTP_LOG_DEBUG, NULL, "recv %s len:%u too small",
|
|
addrfmt(addr, addrbuf), (uint)len);
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
const PacketFormatV1 *pf1 = (PacketFormatV1 *)buffer;
|
|
const byte version = UTP_Version(pf1);
|
|
const uint32 id = uint32(pf1->connid);
|
|
|
|
if(version != 1)
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
ctx->log(UTP_LOG_DEBUG, NULL,
|
|
"recv %s len:%u version:%u unsupported version",
|
|
addrfmt(addr, addrbuf), (uint)len, version);
|
|
#endif
|
|
|
|
return 0;
|
|
}
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
ctx->log(UTP_LOG_DEBUG, NULL, "recv %s len:%u id:%u", addrfmt(addr, addrbuf),
|
|
(uint)len, id);
|
|
ctx->log(UTP_LOG_DEBUG, NULL, "recv id:%u seq_nr:%u ack_nr:%u", id,
|
|
(uint)pf1->seq_nr, (uint)pf1->ack_nr);
|
|
#endif
|
|
|
|
const byte flags = pf1->type();
|
|
|
|
if(flags == ST_RESET)
|
|
{
|
|
// id is either our recv id or our send id
|
|
// if it's our send id, and we initiated the connection, our recv id is id +
|
|
// 1 if it's our send id, and we did not initiate the connection, our recv
|
|
// id is id - 1 we have to check every case
|
|
|
|
UTPSocketKeyData *keyData;
|
|
if((keyData = ctx->utp_sockets->Lookup(UTPSocketKey(addr, id)))
|
|
|| ((keyData = ctx->utp_sockets->Lookup(UTPSocketKey(addr, id + 1)))
|
|
&& keyData->socket->conn_id_send == id)
|
|
|| ((keyData = ctx->utp_sockets->Lookup(UTPSocketKey(addr, id - 1)))
|
|
&& keyData->socket->conn_id_send == id))
|
|
{
|
|
UTPSocket *conn = keyData->socket;
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
ctx->log(UTP_LOG_DEBUG, NULL, "recv RST for existing connection");
|
|
#endif
|
|
|
|
if(conn->close_requested)
|
|
conn->state = CS_DESTROY;
|
|
else
|
|
conn->state = CS_RESET;
|
|
|
|
utp_call_on_overhead_statistics(conn->ctx, conn, false,
|
|
len + conn->get_udp_overhead(),
|
|
close_overhead);
|
|
const int err =
|
|
(conn->state == CS_SYN_SENT) ? UTP_ECONNREFUSED : UTP_ECONNRESET;
|
|
utp_call_on_error(conn->ctx, conn, err);
|
|
}
|
|
else
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
ctx->log(UTP_LOG_DEBUG, NULL, "recv RST for unknown connection");
|
|
#endif
|
|
}
|
|
return 1;
|
|
}
|
|
else if(flags != ST_SYN)
|
|
{
|
|
UTPSocket *conn = NULL;
|
|
|
|
if(ctx->last_utp_socket && ctx->last_utp_socket->addr == addr
|
|
&& ctx->last_utp_socket->conn_id_recv == id)
|
|
{
|
|
conn = ctx->last_utp_socket;
|
|
}
|
|
else
|
|
{
|
|
UTPSocketKeyData *keyData =
|
|
ctx->utp_sockets->Lookup(UTPSocketKey(addr, id));
|
|
if(keyData)
|
|
{
|
|
conn = keyData->socket;
|
|
ctx->last_utp_socket = conn;
|
|
}
|
|
}
|
|
|
|
if(conn)
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
ctx->log(UTP_LOG_DEBUG, NULL, "recv processing");
|
|
#endif
|
|
|
|
const size_t read = utp_process_incoming(conn, buffer, len);
|
|
utp_call_on_overhead_statistics(conn->ctx, conn, false,
|
|
(len - read) + conn->get_udp_overhead(),
|
|
header_overhead);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
// We have not found a matching utp_socket, and this isn't a SYN. Reject it.
|
|
const uint32 seq_nr = pf1->seq_nr;
|
|
if(flags != ST_SYN)
|
|
{
|
|
ctx->current_ms = utp_call_get_milliseconds(ctx, NULL);
|
|
|
|
for(size_t i = 0; i < ctx->rst_info.GetCount(); i++)
|
|
{
|
|
if((ctx->rst_info[i].connid == id) && (ctx->rst_info[i].addr == addr)
|
|
&& (ctx->rst_info[i].ack_nr == seq_nr))
|
|
{
|
|
ctx->rst_info[i].timestamp = ctx->current_ms;
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
ctx->log(UTP_LOG_DEBUG, NULL,
|
|
"recv not sending RST to non-SYN (stored)");
|
|
#endif
|
|
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
if(ctx->rst_info.GetCount() > RST_INFO_LIMIT)
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
ctx->log(UTP_LOG_DEBUG, NULL,
|
|
"recv not sending RST to non-SYN (limit at %u stored)",
|
|
(uint)ctx->rst_info.GetCount());
|
|
#endif
|
|
|
|
return 1;
|
|
}
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
ctx->log(UTP_LOG_DEBUG, NULL, "recv send RST to non-SYN (%u stored)",
|
|
(uint)ctx->rst_info.GetCount());
|
|
#endif
|
|
|
|
RST_Info &r = ctx->rst_info.Append();
|
|
r.addr = addr;
|
|
r.connid = id;
|
|
r.ack_nr = seq_nr;
|
|
r.timestamp = ctx->current_ms;
|
|
|
|
UTPSocket::send_rst(ctx, addr, id, seq_nr, utp_call_get_random(ctx, NULL));
|
|
return 1;
|
|
}
|
|
|
|
if(ctx->callbacks[UTP_ON_ACCEPT])
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
ctx->log(UTP_LOG_DEBUG, NULL, "Incoming connection from %s",
|
|
addrfmt(addr, addrbuf));
|
|
#endif
|
|
|
|
UTPSocketKeyData *keyData =
|
|
ctx->utp_sockets->Lookup(UTPSocketKey(addr, id + 1));
|
|
if(keyData)
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
ctx->log(UTP_LOG_DEBUG, NULL,
|
|
"rejected incoming connection, connection already exists");
|
|
#endif
|
|
|
|
return 1;
|
|
}
|
|
|
|
if(ctx->utp_sockets->GetCount() > 3000)
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
ctx->log(UTP_LOG_DEBUG, NULL,
|
|
"rejected incoming connection, too many uTP sockets %d",
|
|
ctx->utp_sockets->GetCount());
|
|
#endif
|
|
|
|
return 1;
|
|
}
|
|
// true means yes, block connection. false means no, don't block.
|
|
if(utp_call_on_firewall(ctx, to, tolen))
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
ctx->log(UTP_LOG_DEBUG, NULL,
|
|
"rejected incoming connection, firewall callback returned true");
|
|
#endif
|
|
|
|
return 1;
|
|
}
|
|
|
|
// Create a new UTP socket to handle this new connection
|
|
UTPSocket *conn = utp_create_socket(ctx);
|
|
utp_initialize_socket(conn, to, tolen, false, id, id + 1, id);
|
|
conn->ack_nr = seq_nr;
|
|
conn->seq_nr = utp_call_get_random(ctx, NULL);
|
|
conn->fast_resend_seq_nr = conn->seq_nr;
|
|
conn->state = CS_SYN_RECV;
|
|
|
|
const size_t read = utp_process_incoming(conn, buffer, len, true);
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
ctx->log(UTP_LOG_DEBUG, NULL, "recv send connect ACK");
|
|
#endif
|
|
|
|
conn->send_ack(true);
|
|
|
|
utp_call_on_accept(ctx, conn, to, tolen);
|
|
|
|
// we report overhead after on_accept(), because the callbacks are setup now
|
|
utp_call_on_overhead_statistics(conn->ctx, conn, false,
|
|
(len - read) + conn->get_udp_overhead(),
|
|
header_overhead); // SYN
|
|
utp_call_on_overhead_statistics(conn->ctx, conn, true, conn->get_overhead(),
|
|
ack_overhead); // SYNACK
|
|
}
|
|
else
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
ctx->log(UTP_LOG_DEBUG, NULL,
|
|
"rejected incoming connection, UTP_ON_ACCEPT callback not set");
|
|
#endif
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
// Called by utp_process_icmp_fragmentation() and utp_process_icmp_error() below
|
|
static UTPSocket *
|
|
parse_icmp_payload(utp_context *ctx, const byte *buffer, size_t len,
|
|
const struct sockaddr *to, socklen_t tolen)
|
|
{
|
|
assert(ctx);
|
|
if(!ctx)
|
|
return NULL;
|
|
|
|
assert(buffer);
|
|
if(!buffer)
|
|
return NULL;
|
|
|
|
assert(to);
|
|
if(!to)
|
|
return NULL;
|
|
|
|
const PackedSockAddr addr((const SOCKADDR_STORAGE *)to, tolen);
|
|
|
|
// ICMP packets are only required to quote the first 8 bytes of the layer4
|
|
// payload. The UDP payload is 8 bytes, and the UTP header is another 20
|
|
// bytes. So, in order to find the entire UTP header, we need the ICMP
|
|
// packet to quote 28 bytes.
|
|
if(len < sizeof(PacketFormatV1))
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
ctx->log(UTP_LOG_DEBUG, NULL, "Ignoring ICMP from %s: runt length %d",
|
|
addrfmt(addr, addrbuf), len);
|
|
#endif
|
|
return NULL;
|
|
}
|
|
|
|
const PacketFormatV1 *pf = (PacketFormatV1 *)buffer;
|
|
const byte version = UTP_Version(pf);
|
|
const uint32 id = uint32(pf->connid);
|
|
|
|
if(version != 1)
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
ctx->log(UTP_LOG_DEBUG, NULL, "Ignoring ICMP from %s: not UTP version 1",
|
|
addrfmt(addr, addrbuf));
|
|
#endif
|
|
return NULL;
|
|
}
|
|
|
|
UTPSocketKeyData *keyData;
|
|
|
|
if((keyData = ctx->utp_sockets->Lookup(UTPSocketKey(addr, id)))
|
|
|| ((keyData = ctx->utp_sockets->Lookup(UTPSocketKey(addr, id + 1)))
|
|
&& keyData->socket->conn_id_send == id)
|
|
|| ((keyData = ctx->utp_sockets->Lookup(UTPSocketKey(addr, id - 1)))
|
|
&& keyData->socket->conn_id_send == id))
|
|
{
|
|
return keyData->socket;
|
|
}
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
ctx->log(UTP_LOG_DEBUG, NULL,
|
|
"Ignoring ICMP from %s: No matching connection found for id %u",
|
|
addrfmt(addr, addrbuf), id);
|
|
#endif
|
|
return NULL;
|
|
}
|
|
|
|
// Should be called when an ICMP Type 3, Code 4 packet (fragmentation needed) is
|
|
// received, to adjust the MTU
|
|
//
|
|
// Returns 1 if the UDP payload (delivered in the ICMP packet) was recognized as
|
|
// a UTP packet, or 0 if it was not
|
|
//
|
|
// @ctx: utp_context
|
|
// @buf: Contents of the original UDP payload, which the ICMP packet quoted.
|
|
// *Not* the ICMP packet itself.
|
|
// @len: buffer length
|
|
// @to: destination address of the original UDP pakcet
|
|
// @tolen: address length
|
|
// @next_hop_mtu:
|
|
int
|
|
utp_process_icmp_fragmentation(utp_context *ctx, const byte *buffer, size_t len,
|
|
const struct sockaddr *to, socklen_t tolen,
|
|
uint16 next_hop_mtu)
|
|
{
|
|
UTPSocket *conn = parse_icmp_payload(ctx, buffer, len, to, tolen);
|
|
if(!conn)
|
|
return 0;
|
|
|
|
// Constrain the next_hop_mtu to sane values. It might not be initialized or
|
|
// sent properly
|
|
if(next_hop_mtu >= 576 && next_hop_mtu < 0x2000)
|
|
{
|
|
conn->mtu_ceiling = min< uint32 >(next_hop_mtu, conn->mtu_ceiling);
|
|
conn->mtu_search_update();
|
|
// this is something of a speecial case, where we don't set mtu_last
|
|
// to the value in between the floor and the ceiling. We can update the
|
|
// floor, because there might be more network segments after the one
|
|
// that sent this ICMP with smaller MTUs. But we want to test this
|
|
// MTU size first. If the next probe gets through, mtu_floor is updated
|
|
conn->mtu_last = conn->mtu_ceiling;
|
|
}
|
|
else
|
|
{
|
|
// Otherwise, binary search. At this point we don't actually know
|
|
// what size the packet that failed was, and apparently we can't
|
|
// trust the next hop mtu either. It seems reasonably conservative
|
|
// to just lower the ceiling. This should not happen on working networks
|
|
// anyway.
|
|
conn->mtu_ceiling = (conn->mtu_floor + conn->mtu_ceiling) / 2;
|
|
conn->mtu_search_update();
|
|
}
|
|
|
|
conn->log(UTP_LOG_MTU, "MTU [ICMP] floor:%d ceiling:%d current:%d",
|
|
conn->mtu_floor, conn->mtu_ceiling, conn->mtu_last);
|
|
return 1;
|
|
}
|
|
|
|
// Should be called when an ICMP message is received that should tear down the
|
|
// connection.
|
|
//
|
|
// Returns 1 if the UDP payload (delivered in the ICMP packet) was recognized as
|
|
// a UTP packet, or 0 if it was not
|
|
//
|
|
// @ctx: utp_context
|
|
// @buf: Contents of the original UDP payload, which the ICMP packet quoted.
|
|
// *Not* the ICMP packet itself.
|
|
// @len: buffer length
|
|
// @to: destination address of the original UDP pakcet
|
|
// @tolen: address length
|
|
int
|
|
utp_process_icmp_error(utp_context *ctx, const byte *buffer, size_t len,
|
|
const struct sockaddr *to, socklen_t tolen)
|
|
{
|
|
UTPSocket *conn = parse_icmp_payload(ctx, buffer, len, to, tolen);
|
|
if(!conn)
|
|
return 0;
|
|
|
|
const int err =
|
|
(conn->state == CS_SYN_SENT) ? UTP_ECONNREFUSED : UTP_ECONNRESET;
|
|
const PackedSockAddr addr((const SOCKADDR_STORAGE *)to, tolen);
|
|
|
|
switch(conn->state)
|
|
{
|
|
// Don't pass on errors for idle/closed connections
|
|
case CS_IDLE:
|
|
#if UTP_DEBUG_LOGGING
|
|
ctx->log(UTP_LOG_DEBUG, NULL, "ICMP from %s in state CS_IDLE, ignoring",
|
|
addrfmt(addr, addrbuf));
|
|
#endif
|
|
return 1;
|
|
|
|
default:
|
|
if(conn->close_requested)
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
ctx->log(UTP_LOG_DEBUG, NULL,
|
|
"ICMP from %s after close, setting state to CS_DESTROY and "
|
|
"causing error %d",
|
|
addrfmt(addr, addrbuf), err);
|
|
#endif
|
|
conn->state = CS_DESTROY;
|
|
}
|
|
else
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
ctx->log(UTP_LOG_DEBUG, NULL,
|
|
"ICMP from %s, setting state to CS_RESET and causing error %d",
|
|
addrfmt(addr, addrbuf), err);
|
|
#endif
|
|
conn->state = CS_RESET;
|
|
}
|
|
break;
|
|
}
|
|
|
|
utp_call_on_error(conn->ctx, conn, err);
|
|
return 1;
|
|
}
|
|
|
|
// Write bytes to the UTP socket. Returns the number of bytes written.
|
|
// 0 indicates the socket is no longer writable, -1 indicates an error
|
|
ssize_t
|
|
utp_writev(utp_socket *conn, struct utp_iovec *iovec_input, size_t num_iovecs)
|
|
{
|
|
static utp_iovec iovec[UTP_IOV_MAX];
|
|
|
|
assert(conn);
|
|
if(!conn)
|
|
return -1;
|
|
|
|
assert(iovec_input);
|
|
if(!iovec_input)
|
|
return -1;
|
|
|
|
assert(num_iovecs);
|
|
if(!num_iovecs)
|
|
return -1;
|
|
|
|
if(num_iovecs > UTP_IOV_MAX)
|
|
num_iovecs = UTP_IOV_MAX;
|
|
|
|
memcpy(iovec, iovec_input, sizeof(struct utp_iovec) * num_iovecs);
|
|
|
|
size_t bytes = 0;
|
|
size_t sent = 0;
|
|
for(size_t i = 0; i < num_iovecs; i++)
|
|
bytes += iovec[i].iov_len;
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
size_t param = bytes;
|
|
#endif
|
|
|
|
if(conn->state != CS_CONNECTED)
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG, "UTP_Write %u bytes = false (not CS_CONNECTED)",
|
|
(uint)bytes);
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
if(conn->fin_sent)
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG, "UTP_Write %u bytes = false (fin_sent already)",
|
|
(uint)bytes);
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
conn->ctx->current_ms = utp_call_get_milliseconds(conn->ctx, conn);
|
|
|
|
// don't send unless it will all fit in the window
|
|
size_t packet_size = conn->get_packet_size();
|
|
size_t num_to_send = min< size_t >(bytes, packet_size);
|
|
while(!conn->is_full(num_to_send))
|
|
{
|
|
// Send an outgoing packet.
|
|
// Also add it to the outgoing of packets that have been sent but not ACKed.
|
|
|
|
bytes -= num_to_send;
|
|
sent += num_to_send;
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG,
|
|
"Sending packet. seq_nr:%u ack_nr:%u wnd:%u/%u/%u rcv_win:%u "
|
|
"size:%u cur_window_packets:%u",
|
|
conn->seq_nr, conn->ack_nr,
|
|
(uint)(conn->cur_window + num_to_send), (uint)conn->max_window,
|
|
(uint)conn->max_window_user, (uint)conn->last_rcv_win,
|
|
num_to_send, conn->cur_window_packets);
|
|
#endif
|
|
conn->write_outgoing_packet(num_to_send, ST_DATA, iovec, num_iovecs);
|
|
num_to_send = min< size_t >(bytes, packet_size);
|
|
|
|
if(num_to_send == 0)
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG, "UTP_Write %u bytes = true", (uint)param);
|
|
#endif
|
|
return sent;
|
|
}
|
|
}
|
|
|
|
bool full = conn->is_full();
|
|
if(full)
|
|
{
|
|
// mark the socket as not being writable.
|
|
conn->state = CS_CONNECTED_FULL;
|
|
}
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG, "UTP_Write %u bytes = %s", (uint)bytes,
|
|
full ? "false" : "true");
|
|
#endif
|
|
|
|
// returns whether or not the socket is still writable
|
|
// if the congestion window is not full, we can still write to it
|
|
// return !full;
|
|
return sent;
|
|
}
|
|
|
|
void
|
|
utp_read_drained(utp_socket *conn)
|
|
{
|
|
assert(conn);
|
|
if(!conn)
|
|
return;
|
|
|
|
assert(conn->state != CS_UNINITIALIZED);
|
|
if(conn->state == CS_UNINITIALIZED)
|
|
return;
|
|
|
|
const size_t rcvwin = conn->get_rcv_window();
|
|
|
|
if(rcvwin > conn->last_rcv_win)
|
|
{
|
|
// If last window was 0 send ACK immediately, otherwise should set timer
|
|
if(conn->last_rcv_win == 0)
|
|
{
|
|
conn->send_ack();
|
|
}
|
|
else
|
|
{
|
|
conn->ctx->current_ms = utp_call_get_milliseconds(conn->ctx, conn);
|
|
conn->schedule_ack();
|
|
}
|
|
}
|
|
}
|
|
|
|
// Should be called each time the UDP socket is drained
|
|
void
|
|
utp_issue_deferred_acks(utp_context *ctx)
|
|
{
|
|
assert(ctx);
|
|
if(!ctx)
|
|
return;
|
|
|
|
for(size_t i = 0; i < ctx->ack_sockets.GetCount(); i++)
|
|
{
|
|
UTPSocket *conn = ctx->ack_sockets[i];
|
|
conn->send_ack();
|
|
i--;
|
|
}
|
|
}
|
|
|
|
// Should be called every 500ms
|
|
void
|
|
utp_check_timeouts(utp_context *ctx)
|
|
{
|
|
assert(ctx);
|
|
if(!ctx)
|
|
return;
|
|
|
|
ctx->current_ms = utp_call_get_milliseconds(ctx, NULL);
|
|
|
|
if(ctx->current_ms - ctx->last_check < TIMEOUT_CHECK_INTERVAL)
|
|
return;
|
|
|
|
ctx->last_check = ctx->current_ms;
|
|
|
|
for(size_t i = 0; i < ctx->rst_info.GetCount(); i++)
|
|
{
|
|
if((int)(ctx->current_ms - ctx->rst_info[i].timestamp) >= RST_INFO_TIMEOUT)
|
|
{
|
|
ctx->rst_info.MoveUpLast(i);
|
|
i--;
|
|
}
|
|
}
|
|
if(ctx->rst_info.GetCount() != ctx->rst_info.GetAlloc())
|
|
{
|
|
ctx->rst_info.Compact();
|
|
}
|
|
|
|
utp_hash_iterator_t it;
|
|
UTPSocketKeyData *keyData;
|
|
while((keyData = ctx->utp_sockets->Iterate(it)))
|
|
{
|
|
UTPSocket *conn = keyData->socket;
|
|
conn->check_timeouts();
|
|
|
|
// Check if the object was deleted
|
|
if(conn->state == CS_DESTROY)
|
|
{
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG, "Destroying");
|
|
#endif
|
|
delete conn;
|
|
}
|
|
}
|
|
}
|
|
|
|
int
|
|
utp_getpeername(utp_socket *conn, struct sockaddr *addr, socklen_t *addrlen)
|
|
{
|
|
assert(addr);
|
|
if(!addr)
|
|
return -1;
|
|
|
|
assert(addrlen);
|
|
if(!addrlen)
|
|
return -1;
|
|
|
|
assert(conn);
|
|
if(!conn)
|
|
return -1;
|
|
|
|
assert(conn->state != CS_UNINITIALIZED);
|
|
if(conn->state == CS_UNINITIALIZED)
|
|
return -1;
|
|
|
|
socklen_t len;
|
|
const SOCKADDR_STORAGE sa = conn->addr.get_sockaddr_storage(&len);
|
|
*addrlen = min(len, *addrlen);
|
|
memcpy(addr, &sa, *addrlen);
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
utp_get_delays(UTPSocket *conn, uint32 *ours, uint32 *theirs, uint32 *age)
|
|
{
|
|
assert(conn);
|
|
if(!conn)
|
|
return -1;
|
|
|
|
assert(conn->state != CS_UNINITIALIZED);
|
|
if(conn->state == CS_UNINITIALIZED)
|
|
{
|
|
if(ours)
|
|
*ours = 0;
|
|
if(theirs)
|
|
*theirs = 0;
|
|
if(age)
|
|
*age = 0;
|
|
return -1;
|
|
}
|
|
|
|
if(ours)
|
|
*ours = conn->our_hist.get_value();
|
|
if(theirs)
|
|
*theirs = conn->their_hist.get_value();
|
|
if(age)
|
|
*age = (uint32)(conn->ctx->current_ms - conn->last_measured_delay);
|
|
return 0;
|
|
}
|
|
|
|
// Close the UTP socket.
|
|
// It is not valid for the upper layer to refer to socket after it is closed.
|
|
// Data will keep to try being delivered after the close.
|
|
void
|
|
utp_close(UTPSocket *conn)
|
|
{
|
|
assert(conn);
|
|
if(!conn)
|
|
return;
|
|
|
|
assert(conn->state != CS_UNINITIALIZED && conn->state != CS_DESTROY);
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG, "UTP_Close in state:%s", statenames[conn->state]);
|
|
#endif
|
|
|
|
switch(conn->state)
|
|
{
|
|
case CS_CONNECTED:
|
|
case CS_CONNECTED_FULL:
|
|
conn->read_shutdown = true;
|
|
conn->close_requested = true;
|
|
if(!conn->fin_sent)
|
|
{
|
|
conn->fin_sent = true;
|
|
conn->write_outgoing_packet(0, ST_FIN, NULL, 0);
|
|
}
|
|
else if(conn->fin_sent_acked)
|
|
{
|
|
conn->state = CS_DESTROY;
|
|
}
|
|
break;
|
|
|
|
case CS_SYN_SENT:
|
|
conn->rto_timeout = utp_call_get_milliseconds(conn->ctx, conn)
|
|
+ min< uint >(conn->rto * 2, 60);
|
|
// fall through
|
|
case CS_SYN_RECV:
|
|
// fall through
|
|
default:
|
|
conn->state = CS_DESTROY;
|
|
break;
|
|
}
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG, "UTP_Close end in state:%s",
|
|
statenames[conn->state]);
|
|
#endif
|
|
}
|
|
|
|
void
|
|
utp_shutdown(UTPSocket *conn, int how)
|
|
{
|
|
assert(conn);
|
|
if(!conn)
|
|
return;
|
|
|
|
assert(conn->state != CS_UNINITIALIZED && conn->state != CS_DESTROY);
|
|
|
|
#if UTP_DEBUG_LOGGING
|
|
conn->log(UTP_LOG_DEBUG, "UTP_shutdown(%d) in state:%s", how,
|
|
statenames[conn->state]);
|
|
#endif
|
|
|
|
if(how != SHUT_WR)
|
|
{
|
|
conn->read_shutdown = true;
|
|
}
|
|
if(how != SHUT_RD)
|
|
{
|
|
switch(conn->state)
|
|
{
|
|
case CS_CONNECTED:
|
|
case CS_CONNECTED_FULL:
|
|
if(!conn->fin_sent)
|
|
{
|
|
conn->fin_sent = true;
|
|
conn->write_outgoing_packet(0, ST_FIN, NULL, 0);
|
|
}
|
|
break;
|
|
case CS_SYN_SENT:
|
|
conn->rto_timeout = utp_call_get_milliseconds(conn->ctx, conn)
|
|
+ min< uint >(conn->rto * 2, 60);
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
utp_context *
|
|
utp_get_context(utp_socket *socket)
|
|
{
|
|
assert(socket);
|
|
return socket ? socket->ctx : NULL;
|
|
}
|
|
|
|
void *
|
|
utp_set_userdata(utp_socket *socket, void *userdata)
|
|
{
|
|
assert(socket);
|
|
if(socket)
|
|
socket->userdata = userdata;
|
|
return socket ? socket->userdata : NULL;
|
|
}
|
|
|
|
void *
|
|
utp_get_userdata(utp_socket *socket)
|
|
{
|
|
assert(socket);
|
|
return socket ? socket->userdata : NULL;
|
|
}
|
|
|
|
void
|
|
struct_utp_context::log(int level, utp_socket *socket, char const *fmt, ...)
|
|
{
|
|
if(!would_log(level))
|
|
{
|
|
return;
|
|
}
|
|
|
|
va_list va;
|
|
va_start(va, fmt);
|
|
log_unchecked(socket, fmt, va);
|
|
va_end(va);
|
|
}
|
|
|
|
void
|
|
struct_utp_context::log_unchecked(utp_socket *socket, char const *fmt, ...)
|
|
{
|
|
va_list va;
|
|
char buf[4096];
|
|
|
|
va_start(va, fmt);
|
|
vsnprintf(buf, 4096, fmt, va);
|
|
buf[4095] = '\0';
|
|
va_end(va);
|
|
|
|
utp_call_log(this, socket, (const byte *)buf);
|
|
}
|
|
|
|
inline bool
|
|
struct_utp_context::would_log(int level)
|
|
{
|
|
if(level == UTP_LOG_NORMAL)
|
|
return log_normal;
|
|
if(level == UTP_LOG_MTU)
|
|
return log_mtu;
|
|
if(level == UTP_LOG_DEBUG)
|
|
return log_debug;
|
|
return true;
|
|
}
|
|
|
|
utp_socket_stats *
|
|
utp_get_stats(utp_socket *socket)
|
|
{
|
|
#ifdef _DEBUG
|
|
assert(socket);
|
|
if(!socket)
|
|
return NULL;
|
|
socket->_stats.mtu_guess =
|
|
socket->mtu_last ? socket->mtu_last : socket->mtu_ceiling;
|
|
return &socket->_stats;
|
|
#else
|
|
(void)socket;
|
|
return NULL;
|
|
#endif
|
|
}
|