HyperLogLog:基数统计的神器
什么是HyperLogLog? 基数统计:统计集合中不重复元素的个数(Cardinality) 问题:1亿用户UV统计需要多少内存? 方案1:Set存储所有userId 内存:1亿 * 8字节 = 763 MB 方案2:HyperLogLog 内存:12 KB(固定) 误差:0.81% 核心优势: ✅ 固定内存(12KB) ✅ 超高性能 ❌ 有误差(0.81%) ❌ 无法获取具体元素 核心命令 # 添加元素 PFADD key element [element ...] # 获取基数 PFCOUNT key [key ...] # 合并多个HyperLogLog PFMERGE destkey sourcekey [sourcekey ...] 实战案例 案例1:网站UV统计 @Service public class UVStatService { @Autowired private RedisTemplate<String, Object> redis; // 记录用户访问 public void recordVisit(String userId) { String key = "uv:" + LocalDate.now(); redis.opsForHyperLogLog().add(key, userId); redis.expire(key, 90, TimeUnit.DAYS); } // 获取今日UV public Long getTodayUV() { String key = "uv:" + LocalDate.now(); return redis.opsForHyperLogLog().size(key); } // 获取近7天UV public Long getWeekUV() { List<String> keys = new ArrayList<>(); for (int i = 0; i < 7; i++) { keys.add("uv:" + LocalDate.now().minusDays(i)); } return redis.opsForHyperLogLog().size(keys.toArray(new String[0])); } // 获取本月UV public Long getMonthUV() { LocalDate now = LocalDate.now(); List<String> keys = new ArrayList<>(); for (int i = 1; i <= now.getDayOfMonth(); i++) { keys.add("uv:" + now.withDayOfMonth(i)); } return redis.opsForHyperLogLog().size(keys.toArray(new String[0])); } } 案例2:页面独立访客统计 @Service public class PageUVService { @Autowired private RedisTemplate<String, Object> redis; // 记录页面访问 public void recordPageVisit(String pageId, String userId) { String key = "page:uv:" + pageId + ":" + LocalDate.now(); redis.opsForHyperLogLog().add(key, userId); redis.expire(key, 30, TimeUnit.DAYS); } // 获取页面UV public Long getPageUV(String pageId) { String key = "page:uv:" + pageId + ":" + LocalDate.now(); return redis.opsForHyperLogLog().size(key); } // 统计TOP 10热门页面 public Map<String, Long> getTopPages(List<String> pageIds) { Map<String, Long> result = new LinkedHashMap<>(); for (String pageId : pageIds) { Long uv = getPageUV(pageId); result.put(pageId, uv); } return result.entrySet().stream() .sorted(Map.Entry.<String, Long>comparingByValue().reversed()) .limit(10) .collect(Collectors.toMap( Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e1, LinkedHashMap::new )); } } 案例3:独立IP统计 @Service public class IPStatService { @Autowired private RedisTemplate<String, Object> redis; // 记录IP访问 public void recordIP(String ip) { String hourKey = "ip:hour:" + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMddHH")); String dayKey = "ip:day:" + LocalDate.now(); redis.opsForHyperLogLog().add(hourKey, ip); redis.opsForHyperLogLog().add(dayKey, ip); redis.expire(hourKey, 2, TimeUnit.DAYS); redis.expire(dayKey, 90, TimeUnit.DAYS); } // 当前小时独立IP public Long getCurrentHourIP() { String key = "ip:hour:" + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMddHH")); return redis.opsForHyperLogLog().size(key); } // 今日独立IP public Long getTodayIP() { String key = "ip:day:" + LocalDate.now(); return redis.opsForHyperLogLog().size(key); } // 近24小时独立IP趋势 public List<Map<String, Object>> get24HourTrend() { List<Map<String, Object>> result = new ArrayList<>(); LocalDateTime now = LocalDateTime.now(); for (int i = 23; i >= 0; i--) { LocalDateTime time = now.minusHours(i); String key = "ip:hour:" + time.format(DateTimeFormatter.ofPattern("yyyyMMddHH")); Long count = redis.opsForHyperLogLog().size(key); Map<String, Object> item = new HashMap<>(); item.put("hour", time.format(DateTimeFormatter.ofPattern("HH:00"))); item.put("count", count); result.add(item); } return result; } } 案例4:APP活跃用户统计 @Service public class DAUService { @Autowired private RedisTemplate<String, Object> redis; // 记录用户活跃 public void recordActive(Long userId) { String key = "dau:" + LocalDate.now(); redis.opsForHyperLogLog().add(key, String.valueOf(userId)); redis.expire(key, 90, TimeUnit.DAYS); } // 今日DAU(Daily Active Users) public Long getTodayDAU() { String key = "dau:" + LocalDate.now(); return redis.opsForHyperLogLog().size(key); } // 近7天DAU public Long getWeekDAU() { List<String> keys = new ArrayList<>(); for (int i = 0; i < 7; i++) { keys.add("dau:" + LocalDate.now().minusDays(i)); } return redis.opsForHyperLogLog().size(keys.toArray(new String[0])); } // 近30天DAU(MAU - Monthly Active Users) public Long getMonthMAU() { List<String> keys = new ArrayList<>(); for (int i = 0; i < 30; i++) { keys.add("dau:" + LocalDate.now().minusDays(i)); } return redis.opsForHyperLogLog().size(keys.toArray(new String[0])); } // 计算留存率 public double getRetentionRate(LocalDate startDate, int days) { String startKey = "dau:" + startDate; String endKey = "dau:" + startDate.plusDays(days); Long startDAU = redis.opsForHyperLogLog().size(startKey); Long endDAU = redis.opsForHyperLogLog().size(endKey); if (startDAU == null || startDAU == 0) { return 0.0; } return (double) endDAU / startDAU; } } 合并统计 # 合并多个HyperLogLog PFMERGE result uv:20250101 uv:20250102 uv:20250103 # 查询合并后的基数 PFCOUNT result Java实现: ...