You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

321 lines
11 KiB

  1. use prometheus::{
  2. Encoder, Gauge, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, Opts, Registry, TextEncoder,
  3. };
  4. use std::{
  5. convert::TryInto,
  6. error::Error,
  7. net::SocketAddr,
  8. sync::{
  9. atomic::{AtomicBool, AtomicU64, Ordering},
  10. Arc,
  11. },
  12. thread,
  13. time::{Duration, SystemTime, UNIX_EPOCH},
  14. };
  15. use tracing::{debug, error};
  16. type RegistryFn = Box<dyn FnOnce(&Registry) -> Result<(), prometheus::Error>>;
  17. pub struct PlayerMetrics {
  18. pub players_connected: IntCounter,
  19. pub players_disconnected: IntCounterVec, // timeout, network_error, gracefully
  20. }
  21. pub struct NetworkRequestMetrics {
  22. pub chunks_request_dropped: IntCounter,
  23. pub chunks_served_from_cache: IntCounter,
  24. pub chunks_generation_triggered: IntCounter,
  25. }
  26. pub struct ChunkGenMetrics {
  27. pub chunks_requested: IntCounter,
  28. pub chunks_served: IntCounter,
  29. pub chunks_canceled: IntCounter,
  30. }
  31. pub struct TickMetrics {
  32. pub chonks_count: IntGauge,
  33. pub chunks_count: IntGauge,
  34. pub entity_count: IntGauge,
  35. pub tick_time: IntGaugeVec,
  36. pub build_info: IntGauge,
  37. pub start_time: IntGauge,
  38. pub time_of_day: Gauge,
  39. pub light_count: IntGauge,
  40. tick: Arc<AtomicU64>,
  41. }
  42. pub struct ServerMetrics {
  43. running: Arc<AtomicBool>,
  44. handle: Option<thread::JoinHandle<()>>,
  45. registry: Option<Registry>,
  46. tick: Arc<AtomicU64>,
  47. }
  48. impl PlayerMetrics {
  49. pub fn new() -> Result<(Self, RegistryFn), prometheus::Error> {
  50. let players_connected = IntCounter::with_opts(Opts::new(
  51. "players_connected",
  52. "shows the number of clients joined to the server",
  53. ))?;
  54. let players_disconnected = IntCounterVec::new(
  55. Opts::new(
  56. "players_disconnected",
  57. "shows the number of clients disconnected from the server and the reason",
  58. ),
  59. &["reason"],
  60. )?;
  61. let players_connected_clone = players_connected.clone();
  62. let players_disconnected_clone = players_disconnected.clone();
  63. let f = |registry: &Registry| {
  64. registry.register(Box::new(players_connected_clone))?;
  65. registry.register(Box::new(players_disconnected_clone))?;
  66. Ok(())
  67. };
  68. Ok((
  69. Self {
  70. players_connected,
  71. players_disconnected,
  72. },
  73. Box::new(f),
  74. ))
  75. }
  76. }
  77. impl NetworkRequestMetrics {
  78. pub fn new() -> Result<(Self, RegistryFn), prometheus::Error> {
  79. let chunks_request_dropped = IntCounter::with_opts(Opts::new(
  80. "chunks_request_dropped",
  81. "number of all chunk request dropped, e.g because the player was to far away",
  82. ))?;
  83. let chunks_served_from_cache = IntCounter::with_opts(Opts::new(
  84. "chunks_served_from_cache",
  85. "number of all requested chunks already generated and could be served out of cache",
  86. ))?;
  87. let chunks_generation_triggered = IntCounter::with_opts(Opts::new(
  88. "chunks_generation_triggered",
  89. "number of all chunks that were requested and needs to be generated",
  90. ))?;
  91. let chunks_request_dropped_clone = chunks_request_dropped.clone();
  92. let chunks_served_from_cache_clone = chunks_served_from_cache.clone();
  93. let chunks_generation_triggered_clone = chunks_generation_triggered.clone();
  94. let f = |registry: &Registry| {
  95. registry.register(Box::new(chunks_request_dropped_clone))?;
  96. registry.register(Box::new(chunks_served_from_cache_clone))?;
  97. registry.register(Box::new(chunks_generation_triggered_clone))?;
  98. Ok(())
  99. };
  100. Ok((
  101. Self {
  102. chunks_request_dropped,
  103. chunks_served_from_cache,
  104. chunks_generation_triggered,
  105. },
  106. Box::new(f),
  107. ))
  108. }
  109. }
  110. impl ChunkGenMetrics {
  111. pub fn new() -> Result<(Self, RegistryFn), prometheus::Error> {
  112. let chunks_requested = IntCounter::with_opts(Opts::new(
  113. "chunks_requested",
  114. "number of all chunks requested on the server",
  115. ))?;
  116. let chunks_served = IntCounter::with_opts(Opts::new(
  117. "chunks_served",
  118. "number of all requested chunks already served on the server",
  119. ))?;
  120. let chunks_canceled = IntCounter::with_opts(Opts::new(
  121. "chunks_canceled",
  122. "number of all canceled chunks on the server",
  123. ))?;
  124. let chunks_requested_clone = chunks_requested.clone();
  125. let chunks_served_clone = chunks_served.clone();
  126. let chunks_canceled_clone = chunks_canceled.clone();
  127. let f = |registry: &Registry| {
  128. registry.register(Box::new(chunks_requested_clone))?;
  129. registry.register(Box::new(chunks_served_clone))?;
  130. registry.register(Box::new(chunks_canceled_clone))?;
  131. Ok(())
  132. };
  133. Ok((
  134. Self {
  135. chunks_requested,
  136. chunks_served,
  137. chunks_canceled,
  138. },
  139. Box::new(f),
  140. ))
  141. }
  142. }
  143. impl TickMetrics {
  144. pub fn new(tick: Arc<AtomicU64>) -> Result<(Self, RegistryFn), Box<dyn Error>> {
  145. let chonks_count = IntGauge::with_opts(Opts::new(
  146. "chonks_count",
  147. "number of all chonks currently active on the server",
  148. ))?;
  149. let chunks_count = IntGauge::with_opts(Opts::new(
  150. "chunks_count",
  151. "number of all chunks currently active on the server",
  152. ))?;
  153. let entity_count = IntGauge::with_opts(Opts::new(
  154. "entity_count",
  155. "number of all entities currently active on the server",
  156. ))?;
  157. let opts = Opts::new("veloren_build_info", "Build information")
  158. .const_label("hash", &common::util::GIT_HASH)
  159. .const_label("version", "");
  160. let build_info = IntGauge::with_opts(opts)?;
  161. let start_time = IntGauge::with_opts(Opts::new(
  162. "veloren_start_time",
  163. "start time of the server in seconds since EPOCH",
  164. ))?;
  165. let time_of_day =
  166. Gauge::with_opts(Opts::new("time_of_day", "ingame time in ingame-seconds"))?;
  167. let light_count = IntGauge::with_opts(Opts::new(
  168. "light_count",
  169. "number of all lights currently active on the server",
  170. ))?;
  171. let tick_time = IntGaugeVec::new(
  172. Opts::new("tick_time", "time in ns required for a tick of the server"),
  173. &["period"],
  174. )?;
  175. let since_the_epoch = SystemTime::now()
  176. .duration_since(UNIX_EPOCH)
  177. .expect("Time went backwards");
  178. start_time.set(since_the_epoch.as_secs().try_into()?);
  179. let chonks_count_clone = chonks_count.clone();
  180. let chunks_count_clone = chunks_count.clone();
  181. let entity_count_clone = entity_count.clone();
  182. let build_info_clone = build_info.clone();
  183. let start_time_clone = start_time.clone();
  184. let time_of_day_clone = time_of_day.clone();
  185. let light_count_clone = light_count.clone();
  186. let tick_time_clone = tick_time.clone();
  187. let f = |registry: &Registry| {
  188. registry.register(Box::new(chonks_count_clone))?;
  189. registry.register(Box::new(chunks_count_clone))?;
  190. registry.register(Box::new(entity_count_clone))?;
  191. registry.register(Box::new(build_info_clone))?;
  192. registry.register(Box::new(start_time_clone))?;
  193. registry.register(Box::new(time_of_day_clone))?;
  194. registry.register(Box::new(light_count_clone))?;
  195. registry.register(Box::new(tick_time_clone))?;
  196. Ok(())
  197. };
  198. Ok((
  199. Self {
  200. chonks_count,
  201. chunks_count,
  202. entity_count,
  203. tick_time,
  204. build_info,
  205. start_time,
  206. time_of_day,
  207. light_count,
  208. tick,
  209. },
  210. Box::new(f),
  211. ))
  212. }
  213. pub fn is_100th_tick(&self) -> bool { self.tick.load(Ordering::Relaxed).rem_euclid(100) == 0 }
  214. }
  215. impl ServerMetrics {
  216. #[allow(clippy::new_without_default)] // TODO: Pending review in #587
  217. pub fn new() -> Self {
  218. let running = Arc::new(AtomicBool::new(false));
  219. let tick = Arc::new(AtomicU64::new(0));
  220. let registry = Some(Registry::new());
  221. Self {
  222. running,
  223. handle: None,
  224. registry,
  225. tick,
  226. }
  227. }
  228. pub fn registry(&self) -> &Registry {
  229. match self.registry {
  230. Some(ref r) => r,
  231. None => panic!("You cannot longer register new metrics after the server has started!"),
  232. }
  233. }
  234. pub fn run(&mut self, addr: SocketAddr) -> Result<(), Box<dyn Error>> {
  235. self.running.store(true, Ordering::Relaxed);
  236. let running2 = self.running.clone();
  237. let registry = self
  238. .registry
  239. .take()
  240. .expect("ServerMetrics must be already started");
  241. //TODO: make this a job
  242. self.handle = Some(thread::spawn(move || {
  243. let server = tiny_http::Server::http(addr).unwrap();
  244. const TIMEOUT: Duration = Duration::from_secs(1);
  245. debug!("starting tiny_http server to serve metrics");
  246. while running2.load(Ordering::Relaxed) {
  247. let request = match server.recv_timeout(TIMEOUT) {
  248. Ok(Some(rq)) => rq,
  249. Ok(None) => continue,
  250. Err(e) => {
  251. error!(?e, "metrics http server error");
  252. break;
  253. },
  254. };
  255. let mf = registry.gather();
  256. let encoder = TextEncoder::new();
  257. let mut buffer = vec![];
  258. encoder
  259. .encode(&mf, &mut buffer)
  260. .expect("Failed to encoder metrics text.");
  261. let response = tiny_http::Response::from_string(
  262. String::from_utf8(buffer).expect("Failed to parse bytes as a string."),
  263. );
  264. if let Err(e) = request.respond(response) {
  265. error!(
  266. ?e,
  267. "The metrics HTTP server had encountered and error with answering",
  268. );
  269. }
  270. }
  271. debug!("stopping tiny_http server to serve metrics");
  272. }));
  273. Ok(())
  274. }
  275. pub fn tick(&self) -> u64 { self.tick.fetch_add(1, Ordering::Relaxed) + 1 }
  276. pub fn tick_clone(&self) -> Arc<AtomicU64> { self.tick.clone() }
  277. }
  278. impl Drop for ServerMetrics {
  279. fn drop(&mut self) {
  280. self.running.store(false, Ordering::Relaxed);
  281. let handle = self.handle.take();
  282. handle
  283. .expect("ServerMetrics worker handle does not exist.")
  284. .join()
  285. .expect("Error shutting down prometheus metric exporter");
  286. }
  287. }