BigQuery duplicate entries
DELETE FROM `ais-data-385301.uscg.nais`
WHERE DATE(BaseDateTime) = '2022-01-23';
CREATE TABLE `ais-data-385301.uscg.no_dups` AS
SELECT
*
FROM
(
SELECT
*,
ROW_NUMBER() OVER(PARTITION BY MMSI, BaseDateTime ORDER BY MMSI) AS row_num
FROM
`ais-data-385301.uscg.nais`
)
WHERE
row_num = 1;
CREATE TABLE `ais-data-385301.uscg.dups` AS
SELECT
*
FROM
(
SELECT
*,
ROW_NUMBER() OVER(PARTITION BY MMSI, BaseDateTime ORDER BY MMSI) AS row_num
FROM
`ais-data-385301.uscg.nais`
)
WHERE
row_num > 1;
SELECT COUNT(*) FROM `ais-data-385301.uscg.nais`;
2966617246
SELECT COUNT(*) FROM `ais-data-385301.uscg.no_dups`;
2965744033
SELECT COUNT(*) FROM `ais-data-385301.uscg.dups`;
873213