When in doubt, NULL means “we don’t know the state of this thing so we can’t perform an operation on it.”

Keeping track of how NULL values are handled in different SQL dialects can be tricky. This post will serve as a living document where I keep track of how BigQuery does this.

NULL and STRING comparison

Comparison always returns NULL.

SELECT
  CAST(NULL AS STRING) = '', -- null
  CAST(NULL AS STRING) = 'value', -- null
  CAST(NULL AS STRING) = CAST(NULL AS STRING) -- null

NULL and BOOL comparison

NULL is not TRUE or FALSE. It is NULL. Comparing NULL with = or <> is NULL.

SELECT 
    CAST(NULL AS BOOL) is TRUE, -- false
    CAST(NULL AS BOOL) is FALSE, -- false
    CAST(NULL AS BOOL) is NULL, -- true
    CAST(NULL AS BOOL) = TRUE, -- null
    CAST(NULL AS BOOL) = FALSE, -- null
    CAST(NULL AS BOOL) = CAST(NULL AS BOOL), -- null
    CAST(NULL AS BOOL) <> TRUE, -- null
    CAST(NULL AS BOOL) <> FALSE, -- null
    CAST(NULL AS BOOL) <> CAST(NULL AS BOOL), -- null

NULL and BOOL operators

NULL and FALSE is False, and NULL or TRUE is True.

SELECT
  CAST(NULL AS BOOL) AND TRUE, -- null
  CAST(NULL AS BOOL) AND FALSE, -- false
  CAST(NULL AS BOOL) OR TRUE, -- true
  CAST(NULL AS BOOL) OR FALSE, -- null
  NOT CAST(NULL AS BOOL) -- null

NULL and NUMERIC operations

According to the docs:

All mathematical functions have the following behaviors:

  • They return NULL if any of the input parameters is NULL.
  • They return NaN if any of the arguments is NaN.

Which I can confirm:

SELECT 
  1 + CAST(NULL AS INT64), -- null
  1 - CAST(NULL AS INT64), -- null
  1 / CAST(NULL AS INT64), -- null
  1 * CAST(NULL AS INT64), -- null
  POW(1, CAST(NULL AS INT64)), -- null
  ABS(CAST(NULL AS INT64)), -- null
  1.1 + CAST(NULL AS FLOAT64), -- null
  1.1 - CAST(NULL AS FLOAT64), -- null
  1.1 / CAST(NULL AS FLOAT64), -- null
  1.1 * CAST(NULL AS FLOAT64), -- null
  POW(1.1, CAST(NULL AS FLOAT64)), -- null
  ABS(CAST(NULL AS FLOAT64)), -- null

NULL and NUMERIC comparison

Directly comparing returns NULL:

WITH a AS (SELECT NULL AS n)
SELECT
  1 < n, -- null
  1 > n, -- null
  1 = n, -- null
  1 <> n, -- null
  1.1 < n, -- null
  1.1 < n, -- null
FROM
  a

When ordering, NULL is the lowest value (below NaN and -infinity):

    SELECT
        n
    FROM UNNEST([
        1, 
        -1, 
        0, 
        CAST(NULL AS INT64), 
        CAST('NaN' AS FLOAT64), 
        CAST('inf' AS FLOAT64), 
        CAST('-inf' AS FLOAT64)]) AS n
    ORDER BY n

returns:

Rown
1null
2NaN
3-Infinity
4-1.0
50.0
61.0
7Infinity

NULL and STRING operations

String functions return NULL if any of the input parameters is NULL.

SELECT 
    ASCII(s) as _ascii, -- null
    BYTE_LENGTH(s) as _byte_length, -- null
    CHAR_LENGTH(s) as _char_length, -- null
    CHR(NULL) as _chr, -- null
    CONCAT(s, s) as _concat, -- null
    CONCAT(s, 'test') as _concat, -- null
    CONTAINS_SUBSTR(s, 'test') as _contains_substr, -- null
    ENDS_WITH(s, s) as _ends_with, -- null
    ENDS_WITH(s, 'test') as _ends_with, -- null
    ENDS_WITH('test', s) as _ends_with, -- null
    INITCAP(s) as _initcap, -- null
    INSTR(s, s) as _instr, -- null
    INSTR('test', s) as _instr, -- null
    LEFT(s, 0) as _left, -- null
    LENGTH(s) as _length, -- null
    LPAD(s, 5, 'A') as _lpad, -- null
    LOWER(s) as _lower, -- null
    LTRIM(s) as _ltrim, -- null
    NORMALIZE(s) as _normalize, -- null
    NORMALIZE_AND_CASEFOLD(s) as _normalize_and_casefold, -- null
    OCTET_LENGTH(s) as _octet_length, -- null
    REGEXP_CONTAINS(s, 'test') as _regexp_contains, -- null
    REGEXP_CONTAINS('test', s) as _regexp_contains, -- null
    REGEXP_EXTRACT(s, 'test') as _regexp_extract, -- null
    REGEXP_EXTRACT('test', s) as _regexp_extract, -- null
    REGEXP_EXTRACT_ALL('test', s) as _regexp_extract_all, -- null
    REGEXP_EXTRACT_ALL(s, 'test') as _regexp_extract_all, -- null
    REGEXP_INSTR('test', s) as _regexp_instr, -- null
    REGEXP_INSTR(s, 'test') as _regexp_instr, -- null
    REGEXP_REPLACE(s, 'test', 'test2') as _regexp_replace, -- null
    REGEXP_REPLACE('test', s, 'test2') as _regexp_replace, -- null
    REGEXP_REPLACE('test2', 'test', s) as _regexp_replace, -- null
    REGEXP_SUBSTR(s, 'test') as _regexp_substr, -- null
    REGEXP_SUBSTR('test', s) as _regexp_substr, -- null
    REPLACE(s, 'test', 'test2') as _replace, -- null
    REPLACE('test', s, 'test2') as _replace, -- null
    REPLACE('test2', 'test', s) as _replace, -- null
    REPEAT(s, 5) as _repeat, -- null
    REVERSE(s) as _reverse, -- null
    RIGHT(s, 5) as _right, -- null
    RIGHT(s, 0) as _right, -- null
    RPAD(s, 5, 'test') as _rpad, -- null
    RTRIM(s, 'test') as _rtrim, -- null
    SOUNDEX(s) as _soundex, -- null
    SPLIT(s, ',') as _split, -- null
    STARTS_WITH(s, 'test') as _starts_with, -- null
    STRPOS(s, 'test') as _strpos, -- null
    SUBSTR(s, 0) as _substr, -- null
    TO_CODE_POINTS(s) as _to_code_points, -- null
    TRANSLATE(s, 'a', 'A') as _translate, -- null
    TRIM(s, 'test') as _trim, -- null
    TRIM('test', s) as _trim, -- null
    UNICODE(s) as _unicode, -- null
    UPPER(s) as _upper, -- null
FROM (
    SELECT CAST(NULL AS STRING) AS s
) value

NULL and BYTES operations

BYTES operations return NULL if any of the input parameters is NULL, the same as STRING operations.

SELECT 
    ASCII(s) as _ascii, -- null
    BYTE_LENGTH(s) as _byte_length, -- null
    CHR(NULL) as _chr, -- null
    CONCAT(s, s) as _concat, --
    CONCAT(s, b'test') as _concat,
    CONTAINS_SUBSTR(s, 'test') as _contains_substr,
    ENDS_WITH(s, s) as _ends_with,
    ENDS_WITH(s, b'test') as _ends_with,
    ENDS_WITH(b'test', s) as _ends_with,
    INSTR(s, s) as _instr,
    INSTR(b'test', s) as _instr,
    LEFT(s, 0) as _left,
    LENGTH(s) as _length,
    LPAD(s, 5, b'A') as _lpad,
    LOWER(s) as _lower,
    OCTET_LENGTH(s) as _octet_length,
    REGEXP_CONTAINS(s, b'test') as _regexp_contains,
    REGEXP_CONTAINS(b'test', s) as _regexp_contains,
    REGEXP_EXTRACT(s, b'test') as _regexp_extract,
    REGEXP_EXTRACT(b'test', s) as _regexp_extract,
    REGEXP_EXTRACT_ALL(b'test', s) as _regexp_extract_all,
    REGEXP_EXTRACT_ALL(s, b'test') as _regexp_extract_all,
    REGEXP_INSTR(b'test', s) as _regexp_instr,
    REGEXP_INSTR(s, b'test') as _regexp_instr,
    REGEXP_REPLACE(s, b'test', b'test2') as _regexp_replace,
    REGEXP_REPLACE(b'test', s, b'test2') as _regexp_replace,
    REGEXP_REPLACE(b'test2', b'test', s) as _regexp_replace,
    REGEXP_SUBSTR(s, b'test') as _regexp_substr,
    REGEXP_SUBSTR(b'test', s) as _regexp_substr,
    REPLACE(s, b'test', b'test2') as _replace,
    REPLACE(b'test', s, b'test2') as _replace,
    REPLACE(b'test2', b'test', s) as _replace,
    REPEAT(s, 5) as _repeat,
    REVERSE(s) as _reverse,
    RIGHT(s, 5) as _right,
    RIGHT(s, 0) as _right,
    RPAD(s, 5, b'test') as _rpad,
    RTRIM(s, b'test') as _rtrim,
    SPLIT(s, b',') as _split,
    STARTS_WITH(s, b'test') as _starts_with,
    STRPOS(s, b'test') as _strpos,
    SUBSTR(s, 0) as _substr,
    TO_CODE_POINTS(s) as _to_code_points,
    TRANSLATE(s, b'a', b'A') as _translate,
    TRIM(s, b'test') as _trim,
    TRIM(b'test', s) as _trim,
    UPPER(s) as _upper,
FROM (
    SELECT CAST(NULL AS BYTES) AS s
) value

NULL and ARRAY types

BigQuery has two limitations for NULLs and ARRAYs:

BigQuery raises an error if query result has ARRAYs which contain NULL elements, although such ARRAYs can be used inside the query.

BigQuery translates NULL ARRAY into empty ARRAY in the query result, although inside the query NULL and empty ARRAYs are two distinct values.

So while this query works:

SELECT
    -- value,
    ARRAY_LENGTH(value) as _array_length,
FROM (
    SELECT
        [NULL] as value
)

If you uncomment the value line, you will get an error:

Array cannot have a null element; error in writing field value

Here is a comparison between NULL and empty arrays:

WITH a AS (
  SELECT
    [] AS value,
    'empty_array' as value_type
  UNION ALL
  SELECT
    CAST(NULL AS ARRAY<INT64>) AS value,
    'null_array' as value_type
  UNION ALL
  SELECT
    [NULL] AS value,
    'array_with_nulls' as value_type )
SELECT
    value_type,
    value IS NULL as is_null,
    value IS NOT NULL as is_not_null,
    ARRAY_LENGTH(value) as _array_length,
FROM a

Returns:

Rowvalue_typeis_nullis_not_null_array_length
1empty_arrayfalsetrue0
2null_arraytruefalsenull
3array_with_nullsfalsetrue1

NULL and ARRAY operations

ARRAY_CONCAT NULL and an array returns NULL:

SELECT 
  ARRAY_CONCAT(CAST(NULL AS ARRAY<STRING>), ['test'])

-- null

Compare that to ARRAY_CONCAT an array and an empty array:

SELECT 
  ARRAY_CONCAT([], ['test'])

-- ['test']

NULL and DATE operations

As expected, DATE operations return NULL if any of the input parameters is NULL.

WITH
  dates AS (
  SELECT
    CURRENT_DATE() AS value,
    'current_date' AS value_type
  UNION ALL
  SELECT
    CAST(NULL AS DATE) AS value,
    'null_date' AS value_type)
  SELECT
    value,
    EXTRACT(YEAR FROM value) as year,
    DATE_ADD(value, INTERVAL 7 DAY) as plus_week,
    DATE_SUB(value, INTERVAL 7 DAY) as minus_week,
    DATE_DIFF(value, CURRENT_DATE(), DAY) as day_diff,
    DATE_TRUNC(value, MONTH) as month,
    FORMAT_DATE('%x', value) as us_format,
    LAST_DAY(value, MONTH) as _last_day,
    UNIX_DATE(value) as _unix_date,
  FROM
    dates;
Rowvalueyearplus_weekminus_weekday_diffmonthus_format_last_day_unix_date
12021-10-0520212021-10-122021-09-2802021-10-0110/05/212021-10-3118905
2nullnullnullnullnullnullnullnullnull