随机化

蓄水池抽样算法(Reservoir Sampling)

给定一个数据流,数据流长度N很大,且N直到处理完所有数据之前都不可知,请问如何在只遍历一遍数据(O(N))的情况下,能够随机选取出m个不重复的数据。

题意中有三个背景:

  • 1.数据流长度N很大且不可知,所以不能一次性存入内存。

  • 2.时间复杂度为O(N)。

  • 3.随机选取m个数,每个数被选中的概率为m/N。

第1点限制了不能直接取N内的m个随机数,然后按索引取出数据。第2点限制了不能先遍历一遍,然后分块存储数据,再随机选取。第3点是数据选取绝对随机的保证。讲真,在不知道蓄水池算法前,我想破脑袋也不知道该题做何解

核心代码

int[] reservoir = new int[m];

// init
for (int i = 0; i < reservoir.length; i++)
{
    reservoir[i] = dataStream[i];
}

for (int i = m; i < dataStream.length; i++)
{
    // 随机获得一个[0, i]内的随机整数
    int d = rand.nextInt(i + 1);
    // 如果随机整数落在[0, m-1]范围内,则替换蓄水池中的元素
    if (d < m)
    {
        reservoir[d] = dataStream[i];
    }
}

方法1:蓄水池抽样

class Solution {
    ListNode head;
    Random random;

    public Solution(ListNode head) {
        this.head = head;
        this.random = new Random();
    }

    public int getRandom() {
        ListNode cur = head;
        int i = 1;
        int reserve = 0;
        while (cur != null) {
            if (random.nextInt(i) == 0) reserve = cur.val;
            cur = cur.next;
            i++;
        }
        return reserve;
    }
}

方法1:蓄水池抽样

class Solution {
            int[] nums;
            Random random;

            public Solution(int[] nums) {
                this.nums = nums;
                random = new Random();
            }

            public int pick(int target) {
                int res = 0;
                int cnt = 0;
                for (int i = 0; i < nums.length; i++) {
                    if (nums[i] == target) {
                        //统计target遇到的次数
                        cnt++;// 第 cnt 次遇到 target
                        //同一个数字的频数1/n的概率选出其中一个索引
                        int r = random.nextInt(cnt);
                        if (r == 0) {
                            res = i;
                        }
                    }
                }
                return res;
            }
        }

方法2:HashMap

class Solution {

    Map<Integer, List<Integer>> map = new HashMap<>();
    Random random = new Random();

    public Solution(int[] nums) {
        for (int i = 0; i < nums.length; i++) {
            List<Integer> list = map.getOrDefault(nums[i], new ArrayList<>());
            list.add(i);
            map.put(nums[i], list);
        }
    }

    public int pick(int target) {
        List<Integer> list = map.get(target);
        return list.get(random.nextInt(list.size()));
    }
}

方法1:拒绝采样

        class Solution {

            double sx, sy;
            double sr;

            public Solution(double radius, double x_center, double y_center) {
                sx = x_center;
                sy = y_center;
                sr = radius;
            }

            public double[] randPoint() {
                Random random = new Random();
                while (true) {
                    double tx = random.nextDouble() * 2 * sr - sr, ty = random.nextDouble() * 2 * sr - sr;
                    if (tx * tx + ty * ty <= sr * sr) {
                        return new double[]{sx + tx, sy + ty};
                    }
                }
            }
        }

方法2:极坐标

ρ= random ∗r

极坐标的的角度也是随机的 θ = 2∗π∗random

x = x_center + ρ * cos(θ)

y = y_center + ρ * sin(θ)

        class Solution {

            double sx, sy;
            double sr;


            public Solution(double radius, double x_center, double y_center) {
                sx = x_center;
                sy = y_center;
                sr = radius;
            }

            public double[] randPoint() {
                Random random = new Random();
                double l = Math.sqrt(random.nextDouble()) * sr;
                double d = random.nextDouble() * 2 * Math.PI;
                double tx = sx + l * Math.cos(d);
                double ty = sy + l * Math.sin(d);
                return new double[]{tx, ty};
            }
        }

方法1:二分+前缀和

 class Solution {
    Random random = new Random();
    int[][] rects;
    List<Integer> arr;//标记每个rect的开始的位置
    int n;

    public Solution(int[][] _rects) {
        rects = _rects;
        arr = new ArrayList<>();
        arr.add(0);
        for (int[] p : rects) {
            int a = p[0], b = p[1], x = p[2], y = p[3];
            //上个位置+当前rect中整数点的个数
            arr.add(arr.get(arr.size() - 1) + (x - a + 1) * (y - b + 1));
        }
        n = arr.size();
    }

    public int[] pick() {
        int k = random.nextInt(arr.get(n - 1));//获取[0...n-1]之间的一个随机数
        //k+1的写法 如果没有 +1 上面的 k可能会是0 这样返回的rectIndex=-1 会越界
        int rectIndex = binarySearch(arr, k + 1) - 1;//获取k应该在的rects中的索引位置
        k -= arr.get(rectIndex);//将k变成相对量,当前rectIndex上的增量
        int[] p = rects[rectIndex];//rectIndex的信息
        int a = p[0], b = p[1], x = p[2], y = p[3];
        int col = y - b + 1;//列上有多少个点,算上边框
        int delta_a = k / col;//在a这个点的增量
        int delta_b = k - col * delta_a;//剩下的是b的增量
        return new int[]{a + delta_a, b + delta_b};//在[a,b]点上增加增量返回
    }


    public int binarySearch(List<Integer> arr, int target) {
        int lo = 0, hi = n - 1;
        while (lo <= hi) {
            int mid = lo + (hi - lo) / 2;//下取整
            if (arr.get(mid) == target) return mid;
            else if (arr.get(mid) > target) hi = mid - 1;
            else lo = mid + 1;
        }
        return lo;//返回是 lo = hi+1
    }
}

方法2:蓄水池抽样

        class Solution {
            int[][] rects;
            Random random = new Random();


            public Solution(int[][] _rects) {
                rects = _rects;
            }

            public int[] pick() { //等效从一堆点中抽一个点,若在某个矩形中包含被抽到的点,则等效抽到这个矩形
                int index = -1, n = 0; //分别记录抽到的矩形下标、当前点的总数
                for (int i = 0; i < rects.length; i++) {
                    int a = rects[i][0], b = rects[i][1], x = rects[i][2], y = rects[i][3];
                    int k = (x - a + 1) * (y - b + 1); //当前矩形包含的点数量
                    n += k;
                    if (random.nextInt(n) < k) index = i; //当前矩形有k/n的概率被保留
                }
                int[] rect = rects[index]; //抽到矩形后,再从这个矩形中随机抽取x、y的值
                int x1 = rect[0], x2 = rect[2], y1 = rect[1], y2 = rect[3];
                return new int[]{x1 + random.nextInt(x2 - x1 + 1), y1 + random.nextInt(y2 - y1 + 1)};
            }
        }

Reference

Last updated