Tor 0.4.9.0-alpha-dev
circuitstats.c
Go to the documentation of this file.
1/* Copyright (c) 2001 Matej Pfajfar.
2 * Copyright (c) 2001-2004, Roger Dingledine.
3 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
4 * Copyright (c) 2007-2021, The Tor Project, Inc. */
5/* See LICENSE for licensing information */
6
7/**
8 * \file circuitstats.c
9 *
10 * \brief Maintains and analyzes statistics about circuit built times, so we
11 * can tell how long we may need to wait for a fast circuit to be constructed.
12 *
13 * By keeping these statistics, a client learns when it should time out a slow
14 * circuit for being too slow, and when it should keep a circuit open in order
15 * to wait for it to complete.
16 *
17 * The information here is kept in a circuit_built_times_t structure, which is
18 * currently a singleton, but doesn't need to be. It's updated by calls to
19 * circuit_build_times_count_timeout() from circuituse.c,
20 * circuit_build_times_count_close() from circuituse.c, and
21 * circuit_build_times_add_time() from circuitbuild.c, and inspected by other
22 * calls into this module, mostly from circuitlist.c. Observations are
23 * persisted to disk via the or_state_t-related calls.
24 */
25
26#define CIRCUITSTATS_PRIVATE
27
28#include "core/or/or.h"
31#include "app/config/config.h"
32#include "lib/confmgt/confmgt.h"
39#include "core/or/circuitlist.h"
40#include "core/or/circuituse.h"
41#include "lib/math/fp.h"
42#include "lib/time/tvdiff.h"
47
51
52#undef log
53#include <math.h>
54
56
57#define CBT_BIN_TO_MS(bin) ((bin)*CBT_BIN_WIDTH + (CBT_BIN_WIDTH/2))
58
59/** Global list of circuit build times */
60// XXXX: Add this as a member for entry_guard_t instead of global?
61// Then we could do per-guard statistics, as guards are likely to
62// vary in their own latency. The downside of this is that guards
63// can change frequently, so we'd be building a lot more circuits
64// most likely.
66
67#ifdef TOR_UNIT_TESTS
68/** If set, we're running the unit tests: we should avoid clobbering
69 * our state file or accessing get_options() or get_or_state() */
70static int unit_tests = 0;
71#else
72#define unit_tests 0
73#endif /* defined(TOR_UNIT_TESTS) */
74
75/** Return a pointer to the data structure describing our current circuit
76 * build time history and computations. */
79{
80 return &circ_times;
81}
82
83/** As get_circuit_build_times, but return a mutable pointer. */
86{
87 return &circ_times;
88}
89
90/** Return the time to wait before actually closing an under-construction, in
91 * milliseconds. */
92double
94{
95 return circ_times.close_ms;
96}
97
98/** Return the time to wait before giving up on an under-construction circuit,
99 * in milliseconds. */
100double
102{
103 return circ_times.timeout_ms;
104}
105
106/**
107 * This function decides if CBT learning should be disabled. It returns
108 * true if one or more of the following conditions are met:
109 *
110 * 1. If the cbtdisabled consensus parameter is set.
111 * 2. If the torrc option LearnCircuitBuildTimeout is false.
112 * 3. If we are a directory authority
113 * 4. If we fail to write circuit build time history to our state file.
114 * 5. If we are configured in Single Onion mode
115 */
116int
118{
119 return circuit_build_times_disabled_(options, 0);
120}
121
122/** As circuit_build_times_disabled, but take options as an argument. */
123int
125 int ignore_consensus)
126{
127 if (unit_tests) {
128 return 0;
129 } else {
130 int consensus_disabled =
131 ignore_consensus ? 0 : networkstatus_get_param(NULL, "cbtdisabled",
132 0, 0, 1);
133 int config_disabled = !options->LearnCircuitBuildTimeout;
134 int dirauth_disabled = authdir_mode(options);
135 int state_disabled = did_last_state_file_write_fail() ? 1 : 0;
136 /* LearnCircuitBuildTimeout and Single Onion Services are
137 * incompatible in two ways:
138 *
139 * - LearnCircuitBuildTimeout results in a low CBT, which
140 * Single Onion use of one-hop intro and rendezvous circuits lowers
141 * much further, producing *far* too many timeouts.
142 *
143 * - The adaptive CBT code does not update its timeout estimate
144 * using build times for single-hop circuits.
145 *
146 * If we fix both of these issues someday, we should test
147 * these modes with LearnCircuitBuildTimeout on again. */
148 int single_onion_disabled = hs_service_allow_non_anonymous_connection(
149 options);
150
151 if (consensus_disabled || config_disabled || dirauth_disabled ||
152 state_disabled || single_onion_disabled) {
153#if 0
154 log_debug(LD_CIRC,
155 "CircuitBuildTime learning is disabled. "
156 "Consensus=%d, Config=%d, AuthDir=%d, StateFile=%d",
157 consensus_disabled, config_disabled, dirauth_disabled,
158 state_disabled);
159#endif /* 0 */
160 return 1;
161 } else {
162#if 0
163 log_debug(LD_CIRC,
164 "CircuitBuildTime learning is not disabled. "
165 "Consensus=%d, Config=%d, AuthDir=%d, StateFile=%d",
166 consensus_disabled, config_disabled, dirauth_disabled,
167 state_disabled);
168#endif /* 0 */
169 return 0;
170 }
171 }
172}
173
174/**
175 * Retrieve and bounds-check the cbtmaxtimeouts consensus parameter.
176 *
177 * Effect: When this many timeouts happen in the last 'cbtrecentcount'
178 * circuit attempts, the client should discard all of its history and
179 * begin learning a fresh timeout value.
180 */
181static int32_t
183{
184 int32_t cbt_maxtimeouts;
185
186 cbt_maxtimeouts = networkstatus_get_param(NULL, "cbtmaxtimeouts",
188 CBT_MIN_MAX_RECENT_TIMEOUT_COUNT,
189 CBT_MAX_MAX_RECENT_TIMEOUT_COUNT);
190
191 if (!(get_options()->LearnCircuitBuildTimeout)) {
192 log_debug(LD_BUG,
193 "circuit_build_times_max_timeouts() called, cbtmaxtimeouts is"
194 " %d",
195 cbt_maxtimeouts);
196 }
197
198 return cbt_maxtimeouts;
199}
200
201/**
202 * Retrieve and bounds-check the cbtnummodes consensus parameter.
203 *
204 * Effect: This value governs how many modes to use in the weighted
205 * average calculation of Pareto parameter Xm. Analysis of pairs of
206 * geographically near, far, and mixed guaeds has shown that a value of
207 * 10 introduces some allows for the actual timeout rate to be within
208 * 2-7% of the cutoff quantile, for quantiles between 60-80%.
209 */
210static int32_t
212{
213 int32_t num = networkstatus_get_param(NULL, "cbtnummodes",
215 CBT_MIN_NUM_XM_MODES,
216 CBT_MAX_NUM_XM_MODES);
217
218 if (!(get_options()->LearnCircuitBuildTimeout)) {
219 log_debug(LD_BUG,
220 "circuit_build_times_default_num_xm_modes() called, cbtnummodes"
221 " is %d",
222 num);
223 }
224
225 return num;
226}
227
228/**
229 * Retrieve and bounds-check the cbtmincircs consensus parameter.
230 *
231 * Effect: This is the minimum number of circuits to build before
232 * computing a timeout.
233 */
234static int32_t
236{
237 int32_t num = networkstatus_get_param(NULL, "cbtmincircs",
239 CBT_MIN_MIN_CIRCUITS_TO_OBSERVE,
240 CBT_MAX_MIN_CIRCUITS_TO_OBSERVE);
241
242 if (!(get_options()->LearnCircuitBuildTimeout)) {
243 log_debug(LD_BUG,
244 "circuit_build_times_min_circs_to_observe() called, cbtmincircs"
245 " is %d",
246 num);
247 }
248
249 return num;
250}
251
252/** Return true iff <b>cbt</b> has recorded enough build times that we
253 * want to start acting on the timeout it implies. */
254int
256{
258}
259
260/**
261 * Retrieve and bounds-check the cbtquantile consensus parameter.
262 *
263 * Effect: This is the position on the quantile curve to use to set the
264 * timeout value. It is a percent (10-99).
265 */
266double
268{
269 int32_t num = networkstatus_get_param(NULL, "cbtquantile",
271 CBT_MIN_QUANTILE_CUTOFF,
272 CBT_MAX_QUANTILE_CUTOFF);
273
274 if (!(get_options()->LearnCircuitBuildTimeout)) {
275 log_debug(LD_BUG,
276 "circuit_build_times_quantile_cutoff() called, cbtquantile"
277 " is %d",
278 num);
279 }
280
281 return num/100.0;
282}
283
284/**
285 * Retrieve and bounds-check the cbtclosequantile consensus parameter.
286 *
287 * Effect: This is the position on the quantile curve to use to set the
288 * timeout value to use to actually close circuits. It is a percent
289 * (0-99).
290 */
291static double
293{
294 int32_t param;
295 /* Cast is safe - circuit_build_times_quantile_cutoff() is capped */
296 int32_t min = (int)tor_lround(100*circuit_build_times_quantile_cutoff());
297 param = networkstatus_get_param(NULL, "cbtclosequantile",
299 CBT_MIN_CLOSE_QUANTILE,
300 CBT_MAX_CLOSE_QUANTILE);
301
302 if (!(get_options()->LearnCircuitBuildTimeout)) {
303 log_debug(LD_BUG,
304 "circuit_build_times_close_quantile() called, cbtclosequantile"
305 " is %d", param);
306 }
307
308 if (param < min) {
309 log_warn(LD_DIR, "Consensus parameter cbtclosequantile is "
310 "too small, raising to %d", min);
311 param = min;
312 }
313 return param / 100.0;
314}
315
316/**
317 * Retrieve and bounds-check the cbttestfreq consensus parameter.
318 *
319 * Effect: Describes how often in seconds to build a test circuit to
320 * gather timeout values. Only applies if less than 'cbtmincircs'
321 * have been recorded.
322 */
323static int32_t
325{
326 int32_t num = networkstatus_get_param(NULL, "cbttestfreq",
328 CBT_MIN_TEST_FREQUENCY,
329 CBT_MAX_TEST_FREQUENCY);
330
331 if (!(get_options()->LearnCircuitBuildTimeout)) {
332 log_debug(LD_BUG,
333 "circuit_build_times_test_frequency() called, cbttestfreq is %d",
334 num);
335 }
336
337 return num;
338}
339
340/**
341 * Retrieve and bounds-check the cbtmintimeout consensus parameter.
342 *
343 * Effect: This is the minimum allowed timeout value in milliseconds.
344 * The minimum is to prevent rounding to 0 (we only check once
345 * per second).
346 */
347static int32_t
349{
350 int32_t num = networkstatus_get_param(NULL, "cbtmintimeout",
352 CBT_MIN_TIMEOUT_MIN_VALUE,
353 CBT_MAX_TIMEOUT_MIN_VALUE);
354
355 if (!(get_options()->LearnCircuitBuildTimeout)) {
356 log_debug(LD_BUG,
357 "circuit_build_times_min_timeout() called, cbtmintimeout is %d",
358 num);
359 }
360 return num;
361}
362
363/**
364 * Retrieve and bounds-check the cbtinitialtimeout consensus parameter.
365 *
366 * Effect: This is the timeout value to use before computing a timeout,
367 * in milliseconds.
368 */
369int32_t
371{
372 int32_t min = circuit_build_times_min_timeout();
373 int32_t param = networkstatus_get_param(NULL, "cbtinitialtimeout",
375 CBT_MIN_TIMEOUT_INITIAL_VALUE,
376 CBT_MAX_TIMEOUT_INITIAL_VALUE);
377
378 if (!(get_options()->LearnCircuitBuildTimeout)) {
379 log_debug(LD_BUG,
380 "circuit_build_times_initial_timeout() called, "
381 "cbtinitialtimeout is %d",
382 param);
383 }
384
385 if (param < min) {
386 log_warn(LD_DIR, "Consensus parameter cbtinitialtimeout is too small, "
387 "raising to %d", min);
388 param = min;
389 }
390 return param;
391}
392
393/**
394 * Retrieve and bounds-check the cbtrecentcount consensus parameter.
395 *
396 * Effect: This is the number of circuit build times to keep track of
397 * for deciding if we hit cbtmaxtimeouts and need to reset our state
398 * and learn a new timeout.
399 */
400static int32_t
402{
403 int32_t num;
404 num = networkstatus_get_param(ns, "cbtrecentcount",
406 CBT_MIN_RECENT_CIRCUITS,
407 CBT_MAX_RECENT_CIRCUITS);
408
409 if (!(get_options()->LearnCircuitBuildTimeout)) {
410 log_debug(LD_BUG,
411 "circuit_build_times_recent_circuit_count() called, "
412 "cbtrecentcount is %d",
413 num);
414 }
415
416 return num;
417}
418
419/**
420 * This function is called when we get a consensus update.
421 *
422 * It checks to see if we have changed any consensus parameters
423 * that require reallocation or discard of previous stats.
424 */
425void
427 const networkstatus_t *ns)
428{
429 int32_t num;
430
431 /*
432 * First check if we're doing adaptive timeouts at all; nothing to
433 * update if we aren't.
434 */
435
438
439 if (num > 0) {
440 if (num != cbt->liveness.num_recent_circs) {
441 int8_t *recent_circs;
442 if (cbt->liveness.num_recent_circs > 0) {
443 log_notice(LD_CIRC, "The Tor Directory Consensus has changed how "
444 "many circuits we must track to detect network failures "
445 "from %d to %d.", cbt->liveness.num_recent_circs, num);
446 } else {
447 log_notice(LD_CIRC, "Upon receiving a consensus directory, "
448 "re-enabling circuit-based network failure detection.");
449 }
450
452 cbt->liveness.num_recent_circs == 0);
453
454 /*
455 * Technically this is a circular array that we are reallocating
456 * and memcopying. However, since it only consists of either 1s
457 * or 0s, and is only used in a statistical test to determine when
458 * we should discard our history after a sufficient number of 1's
459 * have been reached, it is fine if order is not preserved or
460 * elements are lost.
461 *
462 * cbtrecentcount should only be changing in cases of severe network
463 * distress anyway, so memory correctness here is paramount over
464 * doing acrobatics to preserve the array.
465 */
466 recent_circs = tor_calloc(num, sizeof(int8_t));
468 cbt->liveness.num_recent_circs > 0) {
469 memcpy(recent_circs, cbt->liveness.timeouts_after_firsthop,
470 sizeof(int8_t)*MIN(num, cbt->liveness.num_recent_circs));
471 }
472
473 // Adjust the index if it needs it.
474 if (num < cbt->liveness.num_recent_circs) {
475 cbt->liveness.after_firsthop_idx = MIN(num-1,
477 }
478
480 cbt->liveness.timeouts_after_firsthop = recent_circs;
481 cbt->liveness.num_recent_circs = num;
482 }
483 /* else no change, nothing to do */
484 } else { /* num == 0 */
485 /*
486 * Weird. This probably shouldn't happen, so log a warning, but try
487 * to do something sensible anyway.
488 */
489
490 log_warn(LD_CIRC,
491 "The cbtrecentcircs consensus parameter came back zero! "
492 "This disables adaptive timeouts since we can't keep track of "
493 "any recent circuits.");
494
496 }
497 } else {
498 /*
499 * Adaptive timeouts are disabled; this might be because of the
500 * LearnCircuitBuildTimes config parameter, and hence permanent, or
501 * the cbtdisabled consensus parameter, so it may be a new condition.
502 * Treat it like getting num == 0 above and free the circuit history
503 * if we have any.
504 */
505
507 }
508}
509
510/**
511 * Return the initial default or configured timeout in milliseconds
512 */
513static double
515{
516 double timeout;
517 const or_options_t *options = get_options();
518
519 /*
520 * Check if we have LearnCircuitBuildTimeout, and if we don't,
521 * always use CircuitBuildTimeout, no questions asked.
522 */
523 if (!unit_tests && options->CircuitBuildTimeout) {
524 timeout = options->CircuitBuildTimeout*1000;
525 if (!circuit_build_times_disabled(options) &&
527 log_warn(LD_CIRC, "Config CircuitBuildTimeout too low. Setting to %ds",
530 }
531 } else {
533 }
534
535 return timeout;
536}
537
538/**
539 * Reset the build time state.
540 *
541 * Leave estimated parameters, timeout and network liveness intact
542 * for future use.
543 */
544void
546{
547 memset(cbt->circuit_build_times, 0, sizeof(cbt->circuit_build_times));
548 cbt->total_build_times = 0;
549 cbt->build_times_idx = 0;
550 cbt->have_computed_timeout = 0;
551
552 // Reset timeout and close counts
553 cbt->num_circ_succeeded = 0;
554 cbt->num_circ_closed = 0;
555 cbt->num_circ_timeouts = 0;
556}
557
558/**
559 * Initialize the buildtimes structure for first use.
560 *
561 * Sets the initial timeout values based on either the config setting,
562 * the consensus param, or the default (CBT_DEFAULT_TIMEOUT_INITIAL_VALUE).
563 */
564void
566{
567 memset(cbt, 0, sizeof(*cbt));
568 /*
569 * Check if we really are using adaptive timeouts, and don't keep
570 * track of this stuff if not.
571 */
576 tor_calloc(cbt->liveness.num_recent_circs, sizeof(int8_t));
577 } else {
578 cbt->liveness.num_recent_circs = 0;
580 }
582 cbt_control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESET);
583}
584
585/**
586 * Free the saved timeouts, if the cbtdisabled consensus parameter got turned
587 * on or something.
588 */
589
590void
592{
593 if (!cbt) return;
594
597 }
598
599 cbt->liveness.num_recent_circs = 0;
600}
601
602#if 0
603/**
604 * Rewind our build time history by n positions.
605 */
606static void
607circuit_build_times_rewind_history(circuit_build_times_t *cbt, int n)
608{
609 int i = 0;
610
611 cbt->build_times_idx -= n;
613
614 for (i = 0; i < n; i++) {
617 }
618
619 if (cbt->total_build_times > n) {
620 cbt->total_build_times -= n;
621 } else {
622 cbt->total_build_times = 0;
623 }
624
625 log_info(LD_CIRC,
626 "Rewound history by %d places. Current index: %d. "
627 "Total: %d", n, cbt->build_times_idx, cbt->total_build_times);
628}
629#endif /* 0 */
630
631/**
632 * Mark this circuit as timed out, but change its purpose
633 * so that it continues to build, allowing us to measure
634 * its full build time.
635 */
636void
638{
640 CIRC_EVENT_FAILED,
641 END_CIRC_REASON_TIMEOUT);
644 /* Record this event to check for too many timeouts
645 * in a row. This function does not record a time value yet
646 * (we do that later); it only counts the fact that we did
647 * have a timeout. We also want to avoid double-counting
648 * already "relaxed" circuits, which are counted in
649 * circuit_expire_building(). */
650 if (!circ->relaxed_timeout) {
651 int first_hop_succeeded = circ->cpath &&
652 circ->cpath->state == CPATH_STATE_OPEN;
653
656 first_hop_succeeded);
657 }
658}
659
660/**
661 * Perform the build time work that needs to be done when a circuit
662 * completes a hop.
663 *
664 * This function decides if we should record a circuit's build time
665 * in our histogram data and other statistics, and if so, records it.
666 * It also will mark circuits that have already timed out as
667 * measurement-only circuits, so they can continue to build but
668 * not get used.
669 *
670 * For this, we want to consider circuits that will eventually make
671 * it to the third hop. For circuits longer than 3 hops, we want to
672 * record their build time when they reach the third hop, but let
673 * them continue (and not count them later). For circuits that are
674 * exactly 3 hops, this will count them when they are completed. We
675 * do this so that CBT is always gathering statistics on circuits
676 * of the same length, regardless of their type.
677 */
678void
680{
681 struct timeval end;
682 long timediff;
683
684 /* If circuit build times are disabled, let circuit_expire_building()
685 * handle it.. */
687 return;
688 }
689
690 /* Is this a circuit for which the timeout applies in a straight-forward
691 * way? If so, handle it below. If not, just return (and let
692 * circuit_expire_building() eventually take care of it).
693 */
695 return;
696 }
697
698 tor_gettimeofday(&end);
699 timediff = tv_mdiff(&circ->base_.timestamp_began, &end);
700
701 /* Check if we would have timed out already. If so, change the
702 * purpose here. But don't do any timeout handling here if there
703 * are no circuits opened yet. Save it for circuit_expire_building()
704 * (to allow it to handle timeout "relaxing" over there). */
705 if (timediff > get_circuit_build_timeout_ms() &&
707
708 /* Circuits are allowed to last longer for measurement.
709 * Switch their purpose and wait. */
710 if (circ->base_.purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) {
711 log_info(LD_CIRC,
712 "Deciding to timeout circuit %"PRIu32,
713 (circ->global_identifier));
715 }
716 }
717
718 /* If the circuit is built to exactly the DEFAULT_ROUTE_LEN,
719 * add it to our buildtimes. */
721 /* If the circuit build time is much greater than we would have cut
722 * it off at, we probably had a suspend event along this codepath,
723 * and we should discard the value.
724 */
725 if (timediff < 0 ||
726 timediff > 2*get_circuit_build_close_time_ms()+1000) {
727 log_notice(LD_CIRC, "Strange value for circuit build time: %ldmsec. "
728 "Assuming clock jump. Purpose %d (%s)", timediff,
729 circ->base_.purpose,
731 } else {
732 /* Only count circuit times if the network is live */
736 (build_time_t)timediff);
738 }
739
740 if (circ->base_.purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) {
743 }
744 }
745 }
746}
747
748/**
749 * Add a new build time value <b>time</b> to the set of build times. Time
750 * units are milliseconds.
751 *
752 * circuit_build_times <b>cbt</b> is a circular array, so loop around when
753 * array is full.
754 */
755int
757{
758 if (btime <= 0 || btime > CBT_BUILD_TIME_MAX) {
759 log_warn(LD_BUG, "Circuit build time is too large (%u)."
760 "This is probably a bug.", btime);
762 return -1;
763 }
764
765 log_debug(LD_CIRC, "Adding circuit build time %u", btime);
766
767 cbt->circuit_build_times[cbt->build_times_idx] = btime;
770 cbt->total_build_times++;
771
772 if ((cbt->total_build_times % CBT_SAVE_STATE_EVERY) == 0) {
773 /* Save state every n circuit builds */
774 if (!unit_tests && !get_options()->AvoidDiskWrites)
776 }
777
778 return 0;
779}
780
781/**
782 * Return maximum circuit build time
783 */
784static build_time_t
786{
787 int i = 0;
788 build_time_t max_build_time = 0;
789 for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
790 if (cbt->circuit_build_times[i] > max_build_time
792 max_build_time = cbt->circuit_build_times[i];
793 }
794 return max_build_time;
795}
796
797#if 0
798/** Return minimum circuit build time */
800circuit_build_times_min(circuit_build_times_t *cbt)
801{
802 int i = 0;
803 build_time_t min_build_time = CBT_BUILD_TIME_MAX;
804 for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
805 if (cbt->circuit_build_times[i] && /* 0 <-> uninitialized */
806 cbt->circuit_build_times[i] < min_build_time)
807 min_build_time = cbt->circuit_build_times[i];
808 }
809 if (min_build_time == CBT_BUILD_TIME_MAX) {
810 log_warn(LD_CIRC, "No build times less than CBT_BUILD_TIME_MAX!");
811 }
812 return min_build_time;
813}
814#endif /* 0 */
815
816/**
817 * Calculate and return a histogram for the set of build times.
818 *
819 * Returns an allocated array of histrogram bins representing
820 * the frequency of index*CBT_BIN_WIDTH millisecond
821 * build times. Also outputs the number of bins in nbins.
822 *
823 * The return value must be freed by the caller.
824 */
825static uint32_t *
827 build_time_t *nbins)
828{
829 uint32_t *histogram;
830 build_time_t max_build_time = circuit_build_times_max(cbt);
831 int i, c;
832
833 *nbins = 1 + (max_build_time / CBT_BIN_WIDTH);
834 histogram = tor_calloc(*nbins, sizeof(build_time_t));
835
836 // calculate histogram
837 for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
838 if (cbt->circuit_build_times[i] == 0
840 continue; /* 0 <-> uninitialized */
841
842 c = (cbt->circuit_build_times[i] / CBT_BIN_WIDTH);
843 histogram[c]++;
844 }
845
846 return histogram;
847}
848
849/**
850 * Return the Pareto start-of-curve parameter Xm.
851 *
852 * Because we are not a true Pareto curve, we compute this as the
853 * weighted average of the 10 most frequent build time bins. This
854 * heuristic allowed for the actual timeout rate to be closest
855 * to the chosen quantile cutoff, for quantiles 60-80%, out of
856 * many variant approaches (see #40157 for analysis).
857 */
860{
861 build_time_t nbins = 0;
862 build_time_t *nth_max_bin;
863 build_time_t xm_total = 0;
864 build_time_t Xm = 0;
865 int32_t xm_counts=0;
867 uint32_t *histogram = circuit_build_times_create_histogram(cbt, &nbins);
868
869 tor_assert(nbins > 0);
870 tor_assert(num_modes > 0);
871
872 nth_max_bin = tor_calloc(num_modes, sizeof(build_time_t));
873
874 /* Determine the N most common build times, by selecting the
875 * nth largest mode, counting it, and removing it from the histogram. */
876 for (int n = 0; n < num_modes; n++) {
877 /* Get nth mode */
878 for (build_time_t i = 0; i < nbins; i++) {
879 if (histogram[i] > histogram[nth_max_bin[n]]) {
880 nth_max_bin[n] = i;
881 }
882 }
883
884 /* Update average */
885 xm_counts += histogram[nth_max_bin[n]];
886 xm_total += CBT_BIN_TO_MS(nth_max_bin[n])*histogram[nth_max_bin[n]];
887
888 /* Prevent from re-counting this value */
889 histogram[nth_max_bin[n]] = 0;
890 }
891
892 /* xm_counts can become zero if all of our last CBT_NCIRCUITS_TO_OBSERVE
893 * circuits were abandoned before they completed. This shouldn't happen,
894 * though. We should have reset/re-learned a lower timeout first. */
895 if (xm_counts == 0) {
896 log_warn(LD_CIRC,
897 "No valid circuit build time data out of %d times, %u modes, "
898 "have_timeout=%d, %lfms", cbt->total_build_times, num_modes,
900 goto done;
901 }
902
903 Xm = xm_total / xm_counts;
904
905 done:
906 tor_free(histogram);
907 tor_free(nth_max_bin);
908
909 return Xm;
910}
911
912/**
913 * Output a histogram of current circuit build times to
914 * the or_state_t state structure.
915 */
916void
918 or_state_t *state)
919{
920 uint32_t *histogram;
921 build_time_t i = 0;
922 build_time_t nbins = 0;
923 config_line_t **next, *line;
924
925 histogram = circuit_build_times_create_histogram(cbt, &nbins);
926 // write to state
927 config_free_lines(state->BuildtimeHistogram);
928 next = &state->BuildtimeHistogram;
929 *next = NULL;
930
931 state->TotalBuildTimes = cbt->total_build_times;
932 state->CircuitBuildAbandonedCount = 0;
933
934 for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
936 state->CircuitBuildAbandonedCount++;
937 }
938
939 for (i = 0; i < nbins; i++) {
940 // compress the histogram by skipping the blanks
941 if (histogram[i] == 0) continue;
942 *next = line = tor_malloc_zero(sizeof(config_line_t));
943 line->key = tor_strdup("CircuitBuildTimeBin");
944 tor_asprintf(&line->value, "%d %d",
945 CBT_BIN_TO_MS(i), histogram[i]);
946 next = &(line->next);
947 }
948
949 if (!unit_tests) {
952 }
953
954 tor_free(histogram);
955}
956
957/**
958 * Shuffle the build times array.
959 *
960 * Adapted from https://en.wikipedia.org/wiki/Fisher-Yates_shuffle
961 */
962static void
964 build_time_t *raw_times,
965 uint32_t num_times)
966{
967 uint32_t n = num_times;
968 if (num_times > CBT_NCIRCUITS_TO_OBSERVE) {
969 log_notice(LD_CIRC, "The number of circuit times that this Tor version "
970 "uses to calculate build times is less than the number stored "
971 "in your state file. Decreasing the circuit time history from "
972 "%lu to %d.", (unsigned long)num_times,
974 }
975
976 if (n > INT_MAX-1) {
977 log_warn(LD_CIRC, "For some insane reasons, you had %lu circuit build "
978 "observations in your state file. That's far too many; probably "
979 "there's a bug here.", (unsigned long)n);
980 n = INT_MAX-1;
981 }
982
983 /* This code can only be run on a compact array */
984 while (n-- > 1) {
985 int k = crypto_rand_int(n + 1); /* 0 <= k <= n. */
986 build_time_t tmp = raw_times[k];
987 raw_times[k] = raw_times[n];
988 raw_times[n] = tmp;
989 }
990
991 /* Since the times are now shuffled, take a random CBT_NCIRCUITS_TO_OBSERVE
992 * subset (ie the first CBT_NCIRCUITS_TO_OBSERVE values) */
993 for (n = 0; n < MIN(num_times, CBT_NCIRCUITS_TO_OBSERVE); n++) {
994 circuit_build_times_add_time(cbt, raw_times[n]);
995 }
996}
997
998/**
999 * Load histogram from <b>state</b>, shuffling the resulting array
1000 * after we do so. Use this result to estimate parameters and
1001 * calculate the timeout.
1002 *
1003 * Return -1 on error.
1004 */
1005int
1007 or_state_t *state)
1008{
1009 int tot_values = 0;
1010 uint32_t loaded_cnt = 0, N = 0;
1011 config_line_t *line;
1012 int i;
1013 build_time_t *loaded_times;
1014 int err = 0;
1016
1018 return 0;
1019 }
1020
1021 /* We had a case where someone removed their TotalBuildTimes from the state
1022 * files while having CircuitBuildAbandonedCount above 0 leading to a
1023 * segfault (#40437). Simply bug on it and return an error so at least the
1024 * user will learn that they broke the state file. */
1025 if (BUG(state->TotalBuildTimes <= 0 &&
1026 state->CircuitBuildAbandonedCount > 0)) {
1027 log_warn(LD_GENERAL, "CircuitBuildAbandonedCount count is above 0 but "
1028 "no TotalBuildTimes have been found. Unable to "
1029 "parse broken state file");
1030 return -1;
1031 }
1032
1033 /* build_time_t 0 means uninitialized */
1034 loaded_times = tor_calloc(state->TotalBuildTimes, sizeof(build_time_t));
1035
1036 for (line = state->BuildtimeHistogram; line; line = line->next) {
1037 smartlist_t *args = smartlist_new();
1038 smartlist_split_string(args, line->value, " ",
1039 SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, 0);
1040 if (smartlist_len(args) < 2) {
1041 log_warn(LD_GENERAL, "Unable to parse circuit build times: "
1042 "Too few arguments to CircuitBuildTime");
1043 err = 1;
1044 SMARTLIST_FOREACH(args, char*, cp, tor_free(cp));
1045 smartlist_free(args);
1046 break;
1047 } else {
1048 const char *ms_str = smartlist_get(args,0);
1049 const char *count_str = smartlist_get(args,1);
1050 uint32_t count, k;
1051 build_time_t ms;
1052 int ok;
1053 ms = (build_time_t)tor_parse_ulong(ms_str, 10, 0,
1054 CBT_BUILD_TIME_MAX, &ok, NULL);
1055 if (!ok) {
1056 log_warn(LD_GENERAL, "Unable to parse circuit build times: "
1057 "Unparsable bin number");
1058 err = 1;
1059 SMARTLIST_FOREACH(args, char*, cp, tor_free(cp));
1060 smartlist_free(args);
1061 break;
1062 }
1063 count = (uint32_t)tor_parse_ulong(count_str, 10, 0,
1064 UINT32_MAX, &ok, NULL);
1065 if (!ok) {
1066 log_warn(LD_GENERAL, "Unable to parse circuit build times: "
1067 "Unparsable bin count");
1068 err = 1;
1069 SMARTLIST_FOREACH(args, char*, cp, tor_free(cp));
1070 smartlist_free(args);
1071 break;
1072 }
1073
1074 if (loaded_cnt+count+ (unsigned)state->CircuitBuildAbandonedCount
1075 > (unsigned) state->TotalBuildTimes) {
1076 log_warn(LD_CIRC,
1077 "Too many build times in state file. "
1078 "Stopping short before %d",
1079 loaded_cnt+count);
1080 SMARTLIST_FOREACH(args, char*, cp, tor_free(cp));
1081 smartlist_free(args);
1082 break;
1083 }
1084
1085 for (k = 0; k < count; k++) {
1086 loaded_times[loaded_cnt++] = ms;
1087 }
1088 N++;
1089 SMARTLIST_FOREACH(args, char*, cp, tor_free(cp));
1090 smartlist_free(args);
1091 }
1092 }
1093
1094 log_info(LD_CIRC,
1095 "Adding %d timeouts.", state->CircuitBuildAbandonedCount);
1096 for (i=0; i < state->CircuitBuildAbandonedCount; i++) {
1097 loaded_times[loaded_cnt++] = CBT_BUILD_ABANDONED;
1098 }
1099
1100 if (loaded_cnt != (unsigned)state->TotalBuildTimes) {
1101 log_warn(LD_CIRC,
1102 "Corrupt state file? Build times count mismatch. "
1103 "Read %d times, but file says %d", loaded_cnt,
1104 state->TotalBuildTimes);
1105 err = 1;
1107 goto done;
1108 }
1109
1110 circuit_build_times_shuffle_and_store_array(cbt, loaded_times, loaded_cnt);
1111
1112 /* Verify that we didn't overwrite any indexes */
1113 for (i=0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
1114 if (!cbt->circuit_build_times[i])
1115 break;
1116 tot_values++;
1117 }
1118 log_info(LD_CIRC,
1119 "Loaded %d/%d values from %d lines in circuit time histogram",
1120 tot_values, cbt->total_build_times, N);
1121
1122 if (cbt->total_build_times != tot_values
1124 log_warn(LD_CIRC,
1125 "Corrupt state file? Shuffled build times mismatch. "
1126 "Read %d times, but file says %d", tot_values,
1127 state->TotalBuildTimes);
1128 err = 1;
1130 goto done;
1131 }
1132
1134
1135 done:
1136 tor_free(loaded_times);
1137 return err ? -1 : 0;
1138}
1139
1140/**
1141 * Estimates the Xm and Alpha parameters using
1142 * https://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation
1143 *
1144 * The notable difference is that we use mode instead of min to estimate Xm.
1145 * This is because our distribution is frechet-like. We claim this is
1146 * an acceptable approximation because we are only concerned with the
1147 * accuracy of the CDF of the tail.
1148 */
1149STATIC int
1151{
1153 double a = 0;
1154 int n=0,i=0,abandoned_count=0;
1155
1156 /* https://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation */
1157 /* We sort of cheat here and make our samples slightly more pareto-like
1158 * and less frechet-like. */
1159 cbt->Xm = circuit_build_times_get_xm(cbt);
1160
1161 /* If Xm came back 0, then too many circuits were abandoned. */
1162 if (cbt->Xm == 0)
1163 return 0;
1164
1165 tor_assert(cbt->Xm > 0);
1166
1167 for (i=0; i< CBT_NCIRCUITS_TO_OBSERVE; i++) {
1168 if (!x[i]) {
1169 continue;
1170 }
1171
1172 if (x[i] < cbt->Xm) {
1173 a += tor_mathlog(cbt->Xm);
1174 n++;
1175 } else if (x[i] == CBT_BUILD_ABANDONED) {
1176 abandoned_count++;
1177 } else {
1178 a += tor_mathlog(x[i]);
1179 n++;
1180 }
1181 }
1182
1183 /*
1184 * We are erring and asserting here because this can only happen
1185 * in codepaths other than startup. The startup state parsing code
1186 * performs this same check, and resets state if it hits it. If we
1187 * hit it at runtime, something serious has gone wrong.
1188 */
1189 if (n!=cbt->total_build_times-abandoned_count) {
1190 log_err(LD_CIRC, "Discrepancy in build times count: %d vs %d", n,
1191 cbt->total_build_times);
1192 }
1193 tor_assert_nonfatal(n==cbt->total_build_times-abandoned_count);
1194
1195 /* This is the "Maximum Likelihood Estimator" for parameter alpha of a Pareto
1196 * Distribution. See:
1197 * https://en.wikipedia.org/wiki/Pareto_distribution#Estimation_of_parameters
1198 *
1199 * The division in the estimator is done with subtraction outside the ln(),
1200 * with the sum occurring in the for loop above.
1201 *
1202 * This done is to avoid the precision issues of logs of small values.
1203 */
1204 a -= n*tor_mathlog(cbt->Xm);
1205 a = n/a;
1206
1207 cbt->alpha = a;
1208
1209 return 1;
1210}
1211
1212/**
1213 * This is the Pareto Quantile Function. It calculates the point x
1214 * in the distribution such that F(x) = quantile (ie quantile*100%
1215 * of the mass of the density function is below x on the curve).
1216 *
1217 * We use it to calculate the timeout and also to generate synthetic
1218 * values of time for circuits that timeout before completion.
1219 *
1220 * See https://en.wikipedia.org/wiki/Quantile_function,
1221 * https://en.wikipedia.org/wiki/Inverse_transform_sampling and
1222 * https://en.wikipedia.org/wiki/Pareto_distribution#Generating_a_
1223 * random_sample_from_Pareto_distribution
1224 * That's right. I'll cite wikipedia all day long.
1225 *
1226 * Return value is in milliseconds, clamped to INT32_MAX.
1227 */
1228STATIC double
1230 double quantile)
1231{
1232 double ret;
1233 tor_assert(quantile >= 0);
1234 tor_assert(1.0-quantile > 0);
1235 tor_assert(cbt->Xm > 0);
1236
1237 /* If either alpha or p are 0, we would divide by zero, yielding an
1238 * infinite (double) result; which would be clamped to INT32_MAX.
1239 * Instead, initialise ret to INT32_MAX, and skip over these
1240 * potentially illegal/trapping divides by zero.
1241 */
1242 ret = INT32_MAX;
1243
1244 if (cbt->alpha > 0) {
1245 double p;
1246 p = pow(1.0-quantile,1.0/cbt->alpha);
1247 if (p > 0) {
1248 ret = cbt->Xm/p;
1249 }
1250 }
1251
1252 if (ret > INT32_MAX) {
1253 ret = INT32_MAX;
1254 }
1255 tor_assert(ret > 0);
1256 return ret;
1257}
1258
1259#ifdef TOR_UNIT_TESTS
1260/** Pareto CDF */
1261double
1262circuit_build_times_cdf(circuit_build_times_t *cbt, double x)
1263{
1264 double ret;
1265 tor_assert(cbt->Xm > 0);
1266 ret = 1.0-pow(cbt->Xm/x,cbt->alpha);
1267 tor_assert(0 <= ret && ret <= 1.0);
1268 return ret;
1269}
1270#endif /* defined(TOR_UNIT_TESTS) */
1271
1272#ifdef TOR_UNIT_TESTS
1273/**
1274 * Generate a synthetic time using our distribution parameters.
1275 *
1276 * The return value will be within the [q_lo, q_hi) quantile points
1277 * on the CDF.
1278 */
1280circuit_build_times_generate_sample(circuit_build_times_t *cbt,
1281 double q_lo, double q_hi)
1282{
1283 double randval = crypto_rand_double();
1284 build_time_t ret;
1285 double u;
1286
1287 /* Generate between [q_lo, q_hi) */
1288 /*XXXX This is what nextafter is supposed to be for; we should use it on the
1289 * platforms that support it. */
1290 q_hi -= 1.0/(INT32_MAX);
1291
1292 tor_assert(q_lo >= 0);
1293 tor_assert(q_hi < 1);
1294 tor_assert(q_lo < q_hi);
1295
1296 u = q_lo + (q_hi-q_lo)*randval;
1297
1298 tor_assert(0 <= u && u < 1.0);
1299 /* circuit_build_times_calculate_timeout returns <= INT32_MAX */
1300 ret = (build_time_t)
1302 tor_assert(ret > 0);
1303 return ret;
1304}
1305#endif /* defined(TOR_UNIT_TESTS) */
1306
1307#ifdef TOR_UNIT_TESTS
1308/**
1309 * Estimate an initial alpha parameter by solving the quantile
1310 * function with a quantile point and a specific timeout value.
1311 */
1312void
1313circuit_build_times_initial_alpha(circuit_build_times_t *cbt,
1314 double quantile, double timeout_ms)
1315{
1316 // Q(u) = Xm/((1-u)^(1/a))
1317 // Q(0.8) = Xm/((1-0.8))^(1/a)) = CircBuildTimeout
1318 // CircBuildTimeout = Xm/((1-0.8))^(1/a))
1319 // CircBuildTimeout = Xm*((1-0.8))^(-1/a))
1320 // ln(CircBuildTimeout) = ln(Xm)+ln(((1-0.8)))*(-1/a)
1321 // -ln(1-0.8)/(ln(CircBuildTimeout)-ln(Xm))=a
1322 tor_assert(quantile >= 0);
1323 tor_assert(cbt->Xm > 0);
1324 cbt->alpha = tor_mathlog(1.0-quantile)/
1325 (tor_mathlog(cbt->Xm)-tor_mathlog(timeout_ms));
1326 tor_assert(cbt->alpha > 0);
1327}
1328#endif /* defined(TOR_UNIT_TESTS) */
1329
1330/**
1331 * Returns true if we need circuits to be built
1332 */
1333int
1335{
1336 /* Return true if < MIN_CIRCUITS_TO_OBSERVE */
1338}
1339
1340/**
1341 * Returns true if we should build a timeout test circuit
1342 * right now.
1343 */
1344int
1346{
1349}
1350
1351/**
1352 * How long should we be unreachable before we think we need to check if
1353 * our published IP address has changed.
1354 */
1355#define CIRCUIT_TIMEOUT_BEFORE_RECHECK_IP (60*3)
1356
1357/**
1358 * Called to indicate that the network showed some signs of liveness,
1359 * i.e. we received a cell.
1360 *
1361 * This is used by circuit_build_times_network_check_live() to decide
1362 * if we should record the circuit build timeout or not.
1363 *
1364 * This function is called every time we receive a cell. Avoid
1365 * syscalls, events, and other high-intensity work.
1366 */
1367void
1369{
1370 time_t now = approx_time();
1371 // XXXX this should use pubsub
1372 if (cbt->liveness.nonlive_timeouts > 0) {
1373 time_t time_since_live = now - cbt->liveness.network_last_live;
1374 log_notice(LD_CIRC,
1375 "Tor now sees network activity. Restoring circuit build "
1376 "timeout recording. Network was down for %d seconds "
1377 "during %d circuit attempts.",
1378 (int)time_since_live,
1380 if (time_since_live > CIRCUIT_TIMEOUT_BEFORE_RECHECK_IP)
1382 }
1383 cbt->liveness.network_last_live = now;
1384 cbt->liveness.nonlive_timeouts = 0;
1385
1386 /* Tell control.c */
1388}
1389
1390/**
1391 * Non-destructively scale all of our circuit success, timeout, and close
1392 * counts down by a factor of two. Scaling in this way preserves the
1393 * ratios between succeeded vs timed out vs closed circuits, so that
1394 * our statistics don't change when we scale.
1395 *
1396 * This is used only in the rare event that we build more than
1397 * INT32_MAX circuits. Since the num_circ_* variables are
1398 * uint32_t, we won't even be close to overflowing them.
1399 */
1400void
1402{
1403 cbt->num_circ_succeeded /= 2;
1404 cbt->num_circ_timeouts /= 2;
1405 cbt->num_circ_closed /= 2;
1406}
1407
1408/**
1409 * Called to indicate that we "completed" a circuit. Because this circuit
1410 * succeeded, it doesn't count as a timeout-after-the-first-hop.
1411 *
1412 * (For the purposes of the cbt code, we consider a circuit "completed" if
1413 * it has 3 hops, regardless of its final hop count. We do this because
1414 * we're trying to answer the question, "how long should a circuit take to
1415 * reach the 3-hop count".)
1416 *
1417 * This is used by circuit_build_times_network_check_changed() to determine
1418 * if we had too many recent timeouts and need to reset our learned timeout
1419 * to something higher.
1420 */
1421void
1423{
1424 // Count circuit success
1425 cbt->num_circ_succeeded++;
1426
1427 // If we're going to wrap int32, scale everything
1428 if (cbt->num_circ_succeeded >= INT32_MAX) {
1430 }
1431
1432 /* Check for NULLness because we might not be using adaptive timeouts */
1434 cbt->liveness.num_recent_circs > 0) {
1436 = 0;
1439 }
1440}
1441
1442/**
1443 * A circuit just timed out. If it failed after the first hop, record it
1444 * in our history for later deciding if the network speed has changed.
1445 *
1446 * This is used by circuit_build_times_network_check_changed() to determine
1447 * if we had too many recent timeouts and need to reset our learned timeout
1448 * to something higher.
1449 */
1450static void
1452 int did_onehop)
1453{
1454 // Count circuit timeout
1455 cbt->num_circ_timeouts++;
1456
1457 // If we're going to wrap int32, scale everything
1458 if (cbt->num_circ_timeouts >= INT32_MAX) {
1460 }
1461
1462 /* Check for NULLness because we might not be using adaptive timeouts */
1464 cbt->liveness.num_recent_circs > 0) {
1465 if (did_onehop) {
1467 = 1;
1470 }
1471 }
1472}
1473
1474/**
1475 * A circuit was just forcibly closed. If there has been no recent network
1476 * activity at all, but this circuit was launched back when we thought the
1477 * network was live, increment the number of "nonlive" circuit timeouts.
1478 *
1479 * This is used by circuit_build_times_network_check_live() to decide
1480 * if we should record the circuit build timeout or not.
1481 */
1482static void
1484 int did_onehop, time_t start_time)
1485{
1486 time_t now = time(NULL);
1487
1488 // Count circuit close
1489 cbt->num_circ_closed++;
1490
1491 // If we're going to wrap int32, scale everything
1492 if (cbt->num_circ_closed >= INT32_MAX) {
1494 }
1495
1496 /*
1497 * Check if this is a timeout that was for a circuit that spent its
1498 * entire existence during a time where we have had no network activity.
1499 */
1500 if (cbt->liveness.network_last_live < start_time) {
1501 if (did_onehop) {
1502 char last_live_buf[ISO_TIME_LEN+1];
1503 char start_time_buf[ISO_TIME_LEN+1];
1504 char now_buf[ISO_TIME_LEN+1];
1506 format_local_iso_time(start_time_buf, start_time);
1507 format_local_iso_time(now_buf, now);
1508 log_notice(LD_CIRC,
1509 "A circuit somehow completed a hop while the network was "
1510 "not live. The network was last live at %s, but the circuit "
1511 "launched at %s. It's now %s. This could mean your clock "
1512 "changed.", last_live_buf, start_time_buf, now_buf);
1513 }
1515 if (cbt->liveness.nonlive_timeouts == 1) {
1516 log_notice(LD_CIRC,
1517 "Tor has not observed any network activity for the past %d "
1518 "seconds. Disabling circuit build timeout recording.",
1519 (int)(now - cbt->liveness.network_last_live));
1520
1521 /* Tell control.c */
1523 } else {
1524 log_info(LD_CIRC,
1525 "Got non-live timeout. Current count is: %d",
1527 }
1528 }
1529}
1530
1531/**
1532 * When the network is not live, we do not record circuit build times.
1533 *
1534 * The network is considered not live if there has been at least one
1535 * circuit build that began and ended (had its close_ms measurement
1536 * period expire) since we last received a cell.
1537 *
1538 * Also has the side effect of rewinding the circuit time history
1539 * in the case of recent liveness changes.
1540 */
1541int
1543{
1544 if (cbt->liveness.nonlive_timeouts > 0) {
1545 return 0;
1546 }
1547
1548 return 1;
1549}
1550
1551/**
1552 * Returns true if we have seen more than MAX_RECENT_TIMEOUT_COUNT of
1553 * the past RECENT_CIRCUITS time out after the first hop. Used to detect
1554 * if the network connection has changed significantly, and if so,
1555 * resets our circuit build timeout to the default.
1556 *
1557 * Also resets the entire timeout history in this case and causes us
1558 * to restart the process of building test circuits and estimating a
1559 * new timeout.
1560 */
1561STATIC int
1563{
1564 int total_build_times = cbt->total_build_times;
1565 int timeout_count=0;
1566 int i;
1567
1569 cbt->liveness.num_recent_circs > 0) {
1570 /* how many of our recent circuits made it to the first hop but then
1571 * timed out? */
1572 for (i = 0; i < cbt->liveness.num_recent_circs; i++) {
1573 timeout_count += cbt->liveness.timeouts_after_firsthop[i];
1574 }
1575 }
1576
1577 /* If 80% of our recent circuits are timing out after the first hop,
1578 * we need to re-estimate a new initial alpha and timeout. */
1579 if (timeout_count < circuit_build_times_max_timeouts()) {
1580 return 0;
1581 }
1582
1585 cbt->liveness.num_recent_circs > 0) {
1586 memset(cbt->liveness.timeouts_after_firsthop, 0,
1587 sizeof(*cbt->liveness.timeouts_after_firsthop)*
1589 }
1591
1592#define MAX_TIMEOUT ((int32_t) (INT32_MAX/2))
1593 /* Check to see if this has happened before. If so, double the timeout
1594 * to give clients on abysmally bad network connections a shot at access */
1596 if (cbt->timeout_ms > MAX_TIMEOUT || cbt->close_ms > MAX_TIMEOUT) {
1597 log_warn(LD_CIRC, "Insanely large circuit build timeout value. "
1598 "(timeout = %fmsec, close = %fmsec)",
1599 cbt->timeout_ms, cbt->close_ms);
1600 } else {
1601 cbt->timeout_ms *= 2;
1602 cbt->close_ms *= 2;
1603 }
1604 } else {
1605 cbt->close_ms = cbt->timeout_ms
1607 }
1608#undef MAX_TIMEOUT
1609
1610 cbt_control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESET);
1611
1612 log_notice(LD_CIRC,
1613 "Your network connection speed appears to have changed. Resetting "
1614 "timeout to %ldms after %d timeouts and %d buildtimes.",
1615 tor_lround(cbt->timeout_ms), timeout_count, total_build_times);
1616
1617 return 1;
1618}
1619
1620/**
1621 * Count the number of timeouts in a set of cbt data.
1622 */
1623double
1625{
1626 int i=0,timeouts=0;
1627 for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
1628 if (cbt->circuit_build_times[i] >= cbt->timeout_ms) {
1629 timeouts++;
1630 }
1631 }
1632
1633 if (!cbt->total_build_times)
1634 return 0;
1635
1636 return ((double)timeouts)/cbt->total_build_times;
1637}
1638
1639/**
1640 * Count the number of closed circuits in a set of cbt data.
1641 */
1642double
1644{
1645 int i=0,closed=0;
1646 for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
1648 closed++;
1649 }
1650 }
1651
1652 if (!cbt->total_build_times)
1653 return 0;
1654
1655 return ((double)closed)/cbt->total_build_times;
1656}
1657
1658/**
1659 * Store a timeout as a synthetic value.
1660 *
1661 * Returns true if the store was successful and we should possibly
1662 * update our timeout estimate.
1663 */
1664int
1666 int did_onehop,
1667 time_t start_time)
1668{
1670 cbt->close_ms = cbt->timeout_ms
1672 return 0;
1673 }
1674
1675 /* Record this force-close to help determine if the network is dead */
1676 circuit_build_times_network_close(cbt, did_onehop, start_time);
1677
1678 /* Only count timeouts if network is live.. */
1680 return 0;
1681 }
1682
1684 return 1;
1685}
1686
1687/**
1688 * Update timeout counts to determine if we need to expire
1689 * our build time history due to excessive timeouts.
1690 *
1691 * We do not record any actual time values at this stage;
1692 * we are only interested in recording the fact that a timeout
1693 * happened. We record the time values via
1694 * circuit_build_times_count_close() and circuit_build_times_add_time().
1695 */
1696void
1698 int did_onehop)
1699{
1701 cbt->close_ms = cbt->timeout_ms
1703 return;
1704 }
1705
1706 /* Register the fact that a timeout just occurred. */
1707 circuit_build_times_network_timeout(cbt, did_onehop);
1708
1709 /* If there are a ton of timeouts, we should reset
1710 * the circuit build timeout. */
1712}
1713
1714/**
1715 * Estimate a new timeout based on history and set our timeout
1716 * variable accordingly.
1717 */
1718static int
1720{
1721 build_time_t max_time;
1723 return 0;
1724
1726 return 0;
1727
1730
1733
1734 max_time = circuit_build_times_max(cbt);
1735
1736 if (cbt->timeout_ms > max_time) {
1737 log_info(LD_CIRC,
1738 "Circuit build timeout of %dms is beyond the maximum build "
1739 "time we have ever observed. Capping it to %dms.",
1740 (int)cbt->timeout_ms, max_time);
1741 cbt->timeout_ms = max_time;
1742 }
1743
1744 if (max_time < INT32_MAX/2 && cbt->close_ms > 2*max_time) {
1745 log_info(LD_CIRC,
1746 "Circuit build measurement period of %dms is more than twice "
1747 "the maximum build time we have ever observed. Capping it to "
1748 "%dms.", (int)cbt->close_ms, 2*max_time);
1749 cbt->close_ms = 2*max_time;
1750 }
1751
1752 /* Sometimes really fast guard nodes give us such a steep curve
1753 * that this ends up being not that much greater than timeout_ms.
1754 * Make it be at least 1 min to handle this case. */
1756
1757 cbt->have_computed_timeout = 1;
1758 return 1;
1759}
1760
1761/**
1762 * Exposed function to compute a new timeout. Dispatches events and
1763 * also filters out extremely high timeout values.
1764 */
1765void
1767{
1768 long prev_timeout = tor_lround(cbt->timeout_ms/1000);
1769 double timeout_rate;
1770
1771 /*
1772 * Just return if we aren't using adaptive timeouts
1773 */
1775 return;
1776
1778 return;
1779
1781 log_notice(LD_CIRC, "Set buildtimeout to low value %fms. Setting to %dms",
1784 if (cbt->close_ms < cbt->timeout_ms) {
1785 /* This shouldn't happen because of MAX() in timeout_worker above,
1786 * but doing it just in case */
1788 }
1789 }
1790
1791 cbt_control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_COMPUTED);
1792
1793 timeout_rate = circuit_build_times_timeout_rate(cbt);
1794
1795 if (prev_timeout > tor_lround(cbt->timeout_ms/1000)) {
1796 log_info(LD_CIRC,
1797 "Based on %d circuit times, it looks like we don't need to "
1798 "wait so long for circuits to finish. We will now assume a "
1799 "circuit is too slow to use after waiting %ld milliseconds.",
1800 cbt->total_build_times,
1801 tor_lround(cbt->timeout_ms));
1802 log_info(LD_CIRC,
1803 "Circuit timeout data: %fms, %fms, Xm: %d, a: %f, r: %f",
1804 cbt->timeout_ms, cbt->close_ms, cbt->Xm, cbt->alpha,
1805 timeout_rate);
1806 } else if (prev_timeout < tor_lround(cbt->timeout_ms/1000)) {
1807 log_info(LD_CIRC,
1808 "Based on %d circuit times, it looks like we need to wait "
1809 "longer for circuits to finish. We will now assume a "
1810 "circuit is too slow to use after waiting %ld milliseconds.",
1811 cbt->total_build_times,
1812 tor_lround(cbt->timeout_ms));
1813 log_info(LD_CIRC,
1814 "Circuit timeout data: %fms, %fms, Xm: %d, a: %f, r: %f",
1815 cbt->timeout_ms, cbt->close_ms, cbt->Xm, cbt->alpha,
1816 timeout_rate);
1817 } else {
1818 log_info(LD_CIRC,
1819 "Set circuit build timeout to %ldms (%fms, %fms, Xm: %d, a: %f,"
1820 " r: %f) based on %d circuit times",
1821 tor_lround(cbt->timeout_ms),
1822 cbt->timeout_ms, cbt->close_ms, cbt->Xm, cbt->alpha, timeout_rate,
1823 cbt->total_build_times);
1824 }
1825}
1826
1827#ifdef TOR_UNIT_TESTS
1828/** Make a note that we're running unit tests (rather than running Tor
1829 * itself), so we avoid clobbering our state file. */
1830void
1831circuitbuild_running_unit_tests(void)
1832{
1833 unit_tests = 1;
1834}
1835#endif /* defined(TOR_UNIT_TESTS) */
1836
1837void
1838circuit_build_times_update_last_circ(circuit_build_times_t *cbt)
1839{
1840 cbt->last_circ_at = approx_time();
1841}
time_t approx_time(void)
Definition: approx_time.c:32
int authdir_mode(const or_options_t *options)
Definition: authmode.c:25
Header file for directory authority mode.
int circuit_timeout_want_to_count_circ(const origin_circuit_t *circ)
Definition: circuitbuild.c:870
Header file for circuitbuild.c.
int circuit_any_opened_circuits_cached(void)
Definition: circuitlist.c:774
int circuit_get_cpath_opened_len(const origin_circuit_t *circ)
Definition: circuitlist.c:2051
int circuit_event_status(origin_circuit_t *circ, circuit_status_event_t tp, int reason_code)
Definition: circuitlist.c:518
const char * circuit_purpose_to_string(uint8_t purpose)
Definition: circuitlist.c:929
Header file for circuitlist.c.
#define CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT
Definition: circuitlist.h:93
int circuit_build_times_needs_circuits_now(const circuit_build_times_t *cbt)
void circuit_build_times_free_timeouts(circuit_build_times_t *cbt)
Definition: circuitstats.c:591
circuit_build_times_t * get_circuit_build_times_mutable(void)
Definition: circuitstats.c:85
static uint32_t * circuit_build_times_create_histogram(const circuit_build_times_t *cbt, build_time_t *nbins)
Definition: circuitstats.c:826
#define CIRCUIT_TIMEOUT_BEFORE_RECHECK_IP
void circuit_build_times_network_is_live(circuit_build_times_t *cbt)
void circuit_build_times_handle_completed_hop(origin_circuit_t *circ)
Definition: circuitstats.c:679
void circuit_build_times_set_timeout(circuit_build_times_t *cbt)
double get_circuit_build_timeout_ms(void)
Definition: circuitstats.c:101
STATIC double circuit_build_times_calculate_timeout(circuit_build_times_t *cbt, double quantile)
int circuit_build_times_parse_state(circuit_build_times_t *cbt, or_state_t *state)
int circuit_build_times_network_check_live(const circuit_build_times_t *cbt)
STATIC int circuit_build_times_network_check_changed(circuit_build_times_t *cbt)
int circuit_build_times_disabled_(const or_options_t *options, int ignore_consensus)
Definition: circuitstats.c:124
void circuit_build_times_count_timeout(circuit_build_times_t *cbt, int did_onehop)
const circuit_build_times_t * get_circuit_build_times(void)
Definition: circuitstats.c:78
static void circuit_build_times_shuffle_and_store_array(circuit_build_times_t *cbt, build_time_t *raw_times, uint32_t num_times)
Definition: circuitstats.c:963
double get_circuit_build_close_time_ms(void)
Definition: circuitstats.c:93
void circuit_build_times_new_consensus_params(circuit_build_times_t *cbt, const networkstatus_t *ns)
Definition: circuitstats.c:426
double circuit_build_times_timeout_rate(const circuit_build_times_t *cbt)
void circuit_build_times_update_state(const circuit_build_times_t *cbt, or_state_t *state)
Definition: circuitstats.c:917
void circuit_build_times_mark_circ_as_measurement_only(origin_circuit_t *circ)
Definition: circuitstats.c:637
void circuit_build_times_init(circuit_build_times_t *cbt)
Definition: circuitstats.c:565
int32_t circuit_build_times_initial_timeout(void)
Definition: circuitstats.c:370
STATIC build_time_t circuit_build_times_get_xm(circuit_build_times_t *cbt)
Definition: circuitstats.c:859
void circuit_build_times_network_circ_success(circuit_build_times_t *cbt)
static circuit_build_times_t circ_times
Definition: circuitstats.c:65
STATIC int circuit_build_times_update_alpha(circuit_build_times_t *cbt)
static void circuit_build_times_scale_circ_counts(circuit_build_times_t *cbt)
static build_time_t circuit_build_times_max(const circuit_build_times_t *cbt)
Definition: circuitstats.c:785
static int32_t circuit_build_times_recent_circuit_count(const networkstatus_t *ns)
Definition: circuitstats.c:401
static void circuit_build_times_network_timeout(circuit_build_times_t *cbt, int did_onehop)
int circuit_build_times_enough_to_compute(const circuit_build_times_t *cbt)
Definition: circuitstats.c:255
static int32_t circuit_build_times_min_circs_to_observe(void)
Definition: circuitstats.c:235
int circuit_build_times_count_close(circuit_build_times_t *cbt, int did_onehop, time_t start_time)
int circuit_build_times_disabled(const or_options_t *options)
Definition: circuitstats.c:117
static double circuit_build_times_close_quantile(void)
Definition: circuitstats.c:292
double circuit_build_times_quantile_cutoff(void)
Definition: circuitstats.c:267
static void circuit_build_times_network_close(circuit_build_times_t *cbt, int did_onehop, time_t start_time)
static int32_t circuit_build_times_test_frequency(void)
Definition: circuitstats.c:324
static int32_t circuit_build_times_max_timeouts(void)
Definition: circuitstats.c:182
void circuit_build_times_reset(circuit_build_times_t *cbt)
Definition: circuitstats.c:545
static int32_t circuit_build_times_default_num_xm_modes(void)
Definition: circuitstats.c:211
static int circuit_build_times_set_timeout_worker(circuit_build_times_t *cbt)
int circuit_build_times_add_time(circuit_build_times_t *cbt, build_time_t btime)
Definition: circuitstats.c:756
static double circuit_build_times_get_initial_timeout(void)
Definition: circuitstats.c:514
static int32_t circuit_build_times_min_timeout(void)
Definition: circuitstats.c:348
double circuit_build_times_close_rate(const circuit_build_times_t *cbt)
int circuit_build_times_needs_circuits(const circuit_build_times_t *cbt)
Header file for circuitstats.c.
#define CBT_BIN_WIDTH
Definition: circuitstats.h:59
#define CBT_NCIRCUITS_TO_OBSERVE
Definition: circuitstats.h:56
#define CBT_DEFAULT_CLOSE_QUANTILE
Definition: circuitstats.h:82
#define CBT_SAVE_STATE_EVERY
Definition: circuitstats.h:74
uint32_t build_time_t
Definition: circuitstats.h:25
#define CBT_BUILD_ABANDONED
Definition: circuitstats.h:70
#define CBT_DEFAULT_TIMEOUT_INITIAL_VALUE
Definition: circuitstats.h:128
#define CBT_DEFAULT_MIN_CIRCUITS_TO_OBSERVE
Definition: circuitstats.h:107
#define CBT_DEFAULT_TEST_FREQUENCY
Definition: circuitstats.h:118
#define CBT_DEFAULT_MAX_RECENT_TIMEOUT_COUNT
Definition: circuitstats.h:102
#define CBT_DEFAULT_TIMEOUT_MIN_VALUE
Definition: circuitstats.h:123
#define CBT_DEFAULT_NUM_XM_MODES
Definition: circuitstats.h:62
#define CBT_DEFAULT_RECENT_CIRCUITS
Definition: circuitstats.h:90
#define CBT_DEFAULT_QUANTILE_CUTOFF
Definition: circuitstats.h:112
void circuit_change_purpose(circuit_t *circ, uint8_t new_purpose)
Definition: circuituse.c:3095
Header file for circuituse.c.
#define MAX(a, b)
Definition: cmp.h:22
const or_options_t * get_options(void)
Definition: config.c:944
Header file for config.c.
Header for confline.c.
Header for confmgt.c.
int control_event_network_liveness_update(int liveness)
Header file for control_events.c.
Path structures for origin circuits.
Common functions for using (pseudo-)random number generators.
double crypto_rand_double(void)
int crypto_rand_int(unsigned int max)
double tor_mathlog(double d)
Definition: fp.c:22
long tor_lround(double d)
Definition: fp.c:31
Header for fp.c.
Header file containing service data for the HS subsystem.
#define LD_BUG
Definition: log.h:86
#define LD_GENERAL
Definition: log.h:62
#define LD_DIR
Definition: log.h:88
#define LD_CIRC
Definition: log.h:82
Header file for mainloop.c.
#define tor_free(p)
Definition: malloc.h:56
int32_t networkstatus_get_param(const networkstatus_t *ns, const char *param_name, int32_t default_val, int32_t min_val, int32_t max_val)
Header file for networkstatus.c.
Master header file for Tor-specific functionality.
#define DEFAULT_ROUTE_LEN
Definition: or.h:902
#define TO_CIRCUIT(x)
Definition: or.h:848
The or_state_t structure, which represents Tor's state file.
Origin circuit structure.
unsigned long tor_parse_ulong(const char *s, int base, unsigned long min, unsigned long max, int *ok, char **next)
Definition: parse_int.c:78
int tor_asprintf(char **strp, const char *fmt,...)
Definition: printf.c:75
void reschedule_descriptor_update_check(void)
Header for feature/relay/relay_periodic.c.
Header file for router.c.
smartlist_t * smartlist_new(void)
#define SMARTLIST_FOREACH(sl, type, var, cmd)
int smartlist_split_string(smartlist_t *sl, const char *str, const char *sep, int flags, int max)
void or_state_mark_dirty(or_state_t *state, time_t when)
Definition: statefile.c:784
int did_last_state_file_write_fail(void)
Definition: statefile.c:547
or_state_t * get_or_state(void)
Definition: statefile.c:220
Header for statefile.c.
build_time_t circuit_build_times[CBT_NCIRCUITS_TO_OBSERVE]
Definition: circuitstats.h:180
network_liveness_t liveness
Definition: circuitstats.h:186
uint8_t purpose
Definition: circuit_st.h:112
struct timeval timestamp_began
Definition: circuit_st.h:166
uint8_t state
Definition: crypt_path_st.h:73
int8_t * timeouts_after_firsthop
Definition: circuitstats.h:170
int CircuitBuildTimeout
int LearnCircuitBuildTimeout
struct config_line_t * BuildtimeHistogram
Definition: or_state_st.h:80
unsigned int relaxed_timeout
crypt_path_t * cpath
#define STATIC
Definition: testsupport.h:32
void format_local_iso_time(char *buf, time_t t)
Definition: time_fmt.c:316
void tor_gettimeofday(struct timeval *timeval)
long tv_mdiff(const struct timeval *start, const struct timeval *end)
Definition: tvdiff.c:102
Header for tvdiff.c.
#define tor_assert(expr)
Definition: util_bug.h:103
#define tor_fragile_assert()
Definition: util_bug.h:278