#!/usr/bin/lua5.3

local function violation(str, line, tag, sevcer)
	if os.getenv('SKIP_AL'..tag) then
		return
	end
	_G.ret = _G.ret + 1
	io.stdout:write(sevcer..":".."[AL"..tag.."]:".._G.apkbuild..":"..line..":"..str.."\n")
end

local function readFile(file)
	local linenum = 0
	local f = io.open(file)
	if f == nil then
		return
	end
	while true do
		local line = f:read("*line")
		if line == nil then
			break
		end
		if line:match("^# secfixes") then

			-- Table that holds lots of important values
			local y = {}

			linenum = linenum + 1

			---
			-- Check if the secfixes: table header is missing a colon at the end
			-- to represent a table
			---
			if line:match("^# secfixes$") then
				violation("missing colon on '"..line:gsub("^# ", "").."'", linenum, "37", "SC")
				---
				-- Rewrite the string to be valid yaml after triggering a violation
				---
				line = line..":"
			end

			while line ~= nil and line:match("^#") do
				local l = line:gsub("^# ", "")
				if l == nil then
					break
				end
				---
				-- Check if the PKGVER-PKGREL: table header is missing a colon at
				-- the end, match anything since we check later if this key is
				-- actually writen with a valid pkgver and pkgrel
				---
				if (not l:match("^%s%s") or l:match("^%s%s%s")) then
					local le = l:gsub("^%s+", "")

					if le:match("^%d%S*%-r") then
						violation("pkgver-pkgrel indentation is 3 whitespaces", linenum, "48", "SC")
						l = le:gsub("^", "  ")
						if le:match("^%d%S*[^:]$") then
							violation("missing colon on '"..le.."'", linenum, "38", "SC")
							l = l..":"
						end
					end
				end
				if (l:match("^%s%s") and l:match("^%s%s%d")) then
					if (l:match("^%s%s%d%S*%-r") and l:match("^%s%s%d%S*[^:]$")) then
						violation("missing colon on '"..l:gsub("^%s+", "").."'", linenum, "38", "SC")
						l = l..":"
					end
				end
				---
				-- Checks for possible CVE identifiers with the wrong identation
				-- so match anything that doesn't start with 4 whitespaces and
				-- then match everything that has 5 whitespaces or more so we
				-- catch everything that is not purely 4 whitespaces and then
				-- not an whitespace
				---
				if (not l:match("^%s%s%s%s") or l:match("^%s%s%s%s%s")) then
					local le = l:gsub("^%s+", "")

					---
					-- Check if they begin with '- ' that means they have the correct mapping
					-- for yaml and merely have the wrong indentation
					---
					if le:match("^%-%s") then
						violation("Security identifier identation is 5 whitespaces", linenum, "47", "SC")
						l = le:gsub("^", "    ")
					end
				end
				if (l:match("^%s%s%s%s") and not l:match("^%s%s%s%s%- ")) then
					violation("missing hyphen on '"..l:gsub("^%s+", "").."'", linenum, "41", "SC")
					l = l:gsub("^%s+", "    - ")
				end
				y[linenum] = l
				line = f:read("*line")
				linenum = linenum + 1
			end
			f:close()
			return y
		end
		linenum = linenum + 1
	end
	f:close()
end

-- Check a release key of secfixes, e.g. 1.0.0-r0
local function checkRel(str, line)
	-- We treat a literal '0' as a special version
	-- that means we were never affected by this 
	-- security issue, an example would be vulns
	-- that only affect other operating systems
	if str:match("0") then
		return
	end

	-- Check if the pkgrel value is made up of only digits
	if not str:match("%-r[%d]+:$") then
		violation("invalid pkgrel", line, "40", "SC")
	end
	---
	-- Check if the pkgver value is made up only of valid charachters
	-- match twice, only checking for the appearance of -r and one not matching
	-- the apperance of -r, because a pkgver can be valid even if -r does not
	-- appear, since -r is part of pkgrel, not pkgver
	---
	if (not str:match("^%d[%d%.%-%_a-zA-Z]*:$") and
		not str:match("^%d[%d%.%-%_a-zA-Z]*-r*:$")) then
		violation("invalid pkgver", line, "39", "SC")
	end
end

--- Check the GNUTLS identifier for validy
local function checkGNUTLS(str, line)
	str = str:gsub("^GNUTLS%-SA", "")

	---
	-- Check if we were given just the string 'GNUTLS-SA'
	---
	if str:len() == 0 then
		violation("GNUTLS-SA ID is empty", line, "51", "SC")
		return
	end
	-- GNUTLS-SA Identifirs are made up of only integers and hyphens after the CVE- prefix
	if not str:match("^%-[%d%-]*$") then
		violation("GNUTLS-SA ID is not composed of only digits and hyphens", line, "51", "SC")
	end
	-- GNUTLS-SA Identifiers' first field is made of the the year in 4 digits YYYY
	if not str:match("^%-%d%d%d%d") then
		violation("GNUTLS-SA ID has a bad year, it needs to be in YYYY format", line, "51", "SC")
	end
	-- GNUTLS-SA Identifiers' second field is the month in the format MM
	if not str:match("^%-.-%-%d%d%-") then
		violation("GNUTLS-SA ID has a bad month, it needs to be in MM format", line, "51", "SC")
	else
		-- Extract the month
		local month = tonumber(str:match("^%-.-%-(%d%d)"))
		-- Check if we have a valid month
		if (month < 1 or month > 12) then
			violation("GNUTLS-SA ID has invalid month, it must be between 01 and 12", line, "51", "SC")
		end
	end
	---
	-- Match the starting hyphen, then anything until the next hyphen, then everything until the
	-- next hyphen, then try to match 2 integers, they should be the day, if lower than 10 then
	-- it should have a leading 0
	---
	if not str:match("^%-.-%-.-%-%d%d$") then
		violation("GNUTLS-SA ID has a bad day, it needs to be in DD format", line, "51", "SC")
	else
		-- Extract the day
		local day = tonumber(str:match("^%-.-%-.-%-(%d%d)"))
		-- Check if we have a valid day
		if (day < 1 or day > 31) then
			violation("GNUTLS-SA ID has a bad day, it needs to be between 01 and 31", line, "51", "SC")
		end
	end
	local _, n = str:gsub("%-", "")
	---
	-- There must be exactly 4 hyphens, as the string is GNUTLS-SA-YYYY-MM-DD, but
	-- we remove the GNUTLS-SA so we need to check for 3 only
	---
	if n ~= 3 then
		violation("GNUTLS-SA ID must have exactly 4 hyphens", line, "51", "SC")
	end
end

--- Check the XSA identifier for validity
local function checkXSA(str, line)
	---
	-- Strip the XSA identifier, we know that is good
	-- and we don't want to have to check it every time
	---
	str = str:gsub("^XSA", "")

	---
	-- Check if we were given just the string 'XSA'
	---
	if str:len() == 0 then
		violation("XSA ID is empty", line, "53", "SC")
	end
	if not str:match("^%-%d*$") then
		violation("XSA ID only have integers after the initial XSA-", line, "53", "SC")
	end
	local _, n = str:gsub("%-", "")
	-- There must be exactly 1 hyphen, as the string is XSA-X[X+]
	if n ~= 1 then
		violation("XSA IDs must have exactly 1 hyphen", line, "53", "SC")
	end
end

--- Check the CVE identifier for validity
local function checkCVE(str, line)
	---
	-- Strip the CVE prefix we know that is good
	---
	str = str:gsub("^CVE", "")

	---
	-- Check if we were given just the string 'CVE'
	---
	if str:len() == 0 then
		violation("CVE ID is empty", line, "50", "SC")
	end
	-- CVE Identifirs are made up of only integers and hyphens after the CVE- prefix
	if not str:match("^%-[%d%-]*$") then
		violation("CVE ID only have integers and hyphens after the initial CVE", line, "50", "SC")
	end
	-- The value right after CVE- is the year which must always be 4 digits (YYYY)
	if not str:match("^%-%d%d%d%d%-") then
		violation("CVE ID does not have year in 4 digit YYYY format", line, "50", "SC")
	end
	-- The last value of a CVE identifier is a collection of AT LEAST 4 digits
	if not str:match("^%-.-%-%d%d%d%d+$") then
		violation("CVE ID does not have at least 4 digits at the end", line, "50", "SC")
	end
	local _, n = str:gsub("%-", "")
	-- There must be exactly 2 hyphens, as the string is CVE-YYYY-XXXX
	if n ~= 2 then
		violation("CVE IDs must have exactly 2 hyphens", line, "50", "SC")
	end
end

--- Check the GitHub Security Lab identifier for validity
local function checkGHSL(str, line)
	--
	-- Strip the GHSL prefix we know that is good
	---
	str = str:gsub("^GHSL", "")

	---
	-- Check if we were given just the string 'GHSL'
	---
	if str:len() == 0 then
		violation("GHSL ID is empty", line, "63", "SC")
	end
	-- GHSL Identifiers are made up of only integers and hyphens after the GHSL- prefix
	if not str:match("^%-[%d%-]*$") then
		violation("GHSL ID only have integers and hyphens after the initial GHSL", line, "63", "SC")
	end
	-- The value right after GHSL- is the year which must always be 4 digits (YYYY)
	if not str:match("^%-%d%d%d%d%-") then
		violation("GHSL ID does not have year in 4 digit YYYY format", line, "63", "SC")
	end
	local _, n = str:gsub("%-", "")
	-- There must be exactly 2 hyphens, as the string is GHSL-YYYY-XXX
	if n ~= 2 then
		violation("GHSL IDs must have exactly 2 hyphens", line, "63", "SC")
	end
end

---
-- Value we will return, if we return anything but 0 it means
-- we found policy violations, the value is incremented by 1
-- with each policy violation
---
_G.ret = 0

for i = 1,#arg do
	_G.apkbuild = arg[i]

	local table = readFile(_G.apkbuild)

	---
	-- Only perform the checking operations if we have a table
	-- we can have a table be nil for various reasons including
	-- the user giving us an empty file, a file that doesn't exist
	-- or a valid APKBUILD, that just happens to not have a secfixes
	-- field
	---
	if table then
		---
		-- Create a table that will hold all the identifiers we have
		-- we need to check them for duplicates
		---
		local identifiers = {}

		for k, v in pairs(table) do
			---
			-- Uncomment this once we have a use of checking the secfixes header
			-- if v:match("^%S") then
			--     checkHeader(v, k)
			-- end
			---
			if v:match("^%s%s%S") then
				checkRel(v:gsub("^%s+", ""), k)
			elseif v:match("^%s%s%s%s%S") then
				---
				-- Strip away the preceding hyphen from the security identifier
				---
				v = v:gsub("^%s+ %- ", "")
				---
				-- Strip away anything between parentheses, those are considered comments and should
				-- be ignored by secfixes-check
				--
				-- Taken from https://stackoverflow.com/a/15810519
				---
				v = v:gsub("%b()", "")
				---
				-- Split the string by whitespace, as there can be multiple security
				-- declarations in a single line, like so:
				--
				-- # secfixes:
				--   1.0.0-r0:
				--     - CVE-2020-1000 XSA-1000 GNUTLS-SA-2020-03-01
				---
				for splitv in string.gmatch(v, "[^ ]+") do
					if splitv:match("^CVE") then
						---
						-- These are widely known, almost every project under the sun uses CVEs
						-- to track its security issues, its format is very simple:
						-- CVE-YYYY-XXXX[X+]
						--
						-- the X+ means it must have at least 4 digits but can have more
						---
						checkCVE(splitv, k)
						identifiers[k] = v
					elseif splitv:match("^GNUTLS%-SA") then
						---
						-- These are specific to GNUTLS a widely used TLS protocol
						-- implementation, its format is very simple:
						-- GNUTLS-SA-YYYY-MM-DD
						--
						-- Reference:
						-- https://gnutls.org/security-new.html
						---
						checkGNUTLS(splitv, k)
						identifiers[k] = v
					elseif splitv:match("^XSA") then
						---
						-- These are specific to the Xen project, a widely used hypervisor, its
						-- format is very simple:
						-- XSA-X[X+]
						--
						-- the X+ means it must have at least 1 digit but can have more, in XSA
						-- it increments by 1 for each vulnerability
						--
						-- Reference
						-- https://xenbits.xen.org/xsa/
						---
						checkXSA(splitv, k)
						identifiers[k] = v
					elseif splitv:match("^GHSL") then
						---
						-- These are used by GitHub to assign security vulnerabilities found
						-- by the GitHub Security Lab, its format is:
						-- GHSL-YYYY-X[X+]
						--
						-- The X+ means it must have at least 1 digit but can have more
						--
						-- No documentation was found on it so it is was deduced by looking
						-- at various GitHub Security Lab advisories
						---
						checkGHSL(splitv, k)
						identifiers[k] = v
					else
						violation("unknown security identifier '"..splitv.."'", k, "52", "SC")
					end
				end
			end
		end
		---
		-- Check for duplicates
		---
		local seen = {}
		for line, value in pairs(identifiers) do
			for splitv in string.gmatch(value, "[^ ]+") do
				---
				-- Check 'value' in the seen table, and abort early,
				-- if it is in the 'seen' table it means we have already
				-- tried to find duplicates of 'value' and we don't need
				-- to do it again
				---
				for _, val in ipairs(seen) do
					if val == splitv then
						goto continue
					end
				end
                ---
                -- GNUTLS-SA can refer to more than 1 CVE leading to situations
                -- where multiple '- CVE-YYYY-XXXX' can have the same trailing
                -- GNUTLSA-SA like this:
                --
                -- - CVE-2020-1000 GNUTLS-SA-2020-01-01
                -- - CVE-2020-1001 GNUTLS-SA-2020-01-01
                --
                -- so ignore duplicates of it because it is completely valid
                ---
                if splitv:match('^GNUTLS%-SA') then
                    ---
                    -- add the full GNUTLS-SA specification to the seen table so
                    -- we don't process ir again
                    ---
				    seen[#seen+1] = splitv
				    goto continue
                end

				---
				-- This should be true if we have found a duplicate, it allow us
				-- to print 'value' as a violation
				---
				local saw = false
				---
				-- Check if we have already checked a value, the first checking of
				-- a specific value will print out all the subsequente values
				---
				for l, v in pairs(identifiers) do
					for sv in string.gmatch(v, "[^ ]+") do
						---
						-- Check if the 'value' is the same as 'v', but make sure 'l'
						-- is different than 'line' otherwise we will always find duplicates
						-- we can print 'value' and 'line' later by setting 'saw' to true
						---
						if sv == splitv and l ~= line then
							violation("duplicate value '"..sv.."'", l, "59", "SC")
							saw = true
						end
					end
				end
				---
				-- There was a duplicate, so print the value of the loop
				---
				if saw then
					violation("duplicate value '"..splitv.."'", line, "59", "SC")
				end
				---
				-- Add the value to seen, we check at the start of every loop
				-- if we have already seen the value, this avoids us checking for
				-- duplicates on every occurrence of the value.
				---
				seen[#seen+1] = splitv
				::continue::
			end
		end
	end
end
---
-- We use these, so clean them up before we are done
-- store the global return on a local variable so we
-- can exit with its as our error code (or 0 if no
-- policy violations were found)
---
local ret = _G.ret
_G.ret = nil
_G.apkbuild = nil
os.exit(ret)
