Saltu al enhavo

Modulo:Wikidata utilities

El Vikivojaĝo

Versiokontrolo

[redakti]

Versionomo en Vikidatumoj: 2023-12-26 Ok!


-- Wikidata convenience utilities

-- documentation
local WikidataUtilities = {
	suite  = 'WikidataUtilities',
	serial = '2023-12-26',
	item   = 65439025
}

-- i18n
local wd = {
	version   = 'P348',
	retrieved = 'P813',

	gregorianCalendar = { -- calendar models
		Q12138   = 1, -- Gregorian
		Q1985727 = 1  -- proleptic Gregorian
	},

	redirectBadges = {
		Q70894304 = 1, -- intentional sitelink
		Q70893996 = 1  -- redirect sitelink
	}
}

-- module variable and administration
local wu = {
	moduleInterface = WikidataUtilities
}

-- table storing property ids used
local catTable = {
	P0 = ''
}

local function isSet( arg )
	return arg and arg ~= ''
end

function wu.getEntity( id )
	local wrongQualifier = false
	local entity
	
	if not isSet( id ) then
		return '', entity, wrongQualifier
	end
	if mw.wikibase.isValidEntityId( id ) then
		-- expensive function call
		-- redirect ids marked false, too
		entity = mw.wikibase.getEntity( id )
	end
	if not entity then
		id = ''
		wrongQualifier = true
	end

	return id, entity, wrongQualifier
end

function wu.getEntityId( id )
	local wrongQualifier = false
	local entity
	
	if not isSet( id ) then
		id = ''
	elseif mw.wikibase.isValidEntityId( id ) and mw.wikibase.entityExists( id ) then
		-- expensive function call
		-- redirect ids marked false, too
		entity = id
	else
		id = ''
		wrongQualifier = true
	end

	return id, entity, wrongQualifier
end

function wu.getLabel( entity, lang, noFallback )
	if not isSet( entity ) then
		return nil
	end
	local tp = type( entity )
	if tp == 'string' and mw.wikibase.isValidEntityId( entity ) then
		return isSet( lang ) and mw.wikibase.getLabelByLang( entity, lang )
			or ( not noFallback and mw.wikibase.getLabel( entity ) )
	elseif tp == 'table' and entity.labels then -- really a wikidata entity?
		return isSet( lang ) and entity:getLabel( lang )
			or ( not noFallback and entity:getLabel() )
	end
	return nil
end

function wu.getAliases( entity, lang )
	if type( entity ) == 'string' then -- is Q id
		entity = mw.wikibase.getEntity( entity )
	end
	if not lang then
		lang = mw.getContentLanguage():getCode()
	end
	local aliases = {}
	if entity and entity.aliases and entity.aliases[ lang ] then
		for i, alias in ipairs( entity.aliases[ lang ] ) do
			table.insert( aliases, alias.value )
		end
	end
	return aliases
end

function wu.getSitelink( entity, globalSiteId )
	if not isSet( entity ) then
		return nil
	end
	if type( entity ) == 'string' then -- entity is id
		return mw.wikibase.getSitelink( entity, globalSiteId )
	elseif entity and entity.labels then
		return entity:getSitelink( globalSiteId )
	end
	return nil
end

local function getSitelinkTable( entity, globalSiteId )
	if not isSet( entity ) or not isSet( globalSiteId ) then
		return nil
	elseif type( entity ) == 'string' then -- entity is id
		entity = mw.wikibase.getEntity( entity )
	end
	if entity and entity.sitelinks then
		return entity.sitelinks[ globalSiteId ]
	end
	return nil
end

-- getting sitelink title marking as redirect
function wu.getCheckedSitelink( entity, globalSiteId )
	local isRedirect = false
	local t = getSitelinkTable( entity, globalSiteId )
	if not t or not t.title then
		return nil, isRedirect
	end
	for i = 1, #t.badges do
		if wd.redirectBadges[ t.badges[ i ] ] then
			isRedirect = true
			break
		end
	end
	return t.title, isRedirect
end

-- getting sitelink title exclunding redirects
function wu.getFilteredSitelink( entity, globalSiteId )
	local title, isRedirect = wu.getCheckedSitelink( entity, globalSiteId )
	return ( title and not isRedirect ) and title or nil
end

-- convert from url to Q id
local function getUnitId( unit )
	if isSet( unit ) and type( unit ) == 'string' then
		return unit:gsub( 'https?://www.wikidata.org/entity/', '' )
	end
	return ''
end

local function getBestStatements( entity, p )
	local tp = type( entity )
	if tp == 'string' and mw.wikibase.isValidEntityId( entity ) then
		return mw.wikibase.getBestStatements( entity, p )
	elseif tp == 'table' and entity.labels then
		return entity:getBestStatements( p )
	end
	return {}
end

local function getStatements( entity, p, count )
	local ar = {}
	if not ( isSet( entity ) and isSet( p ) ) then
		return ar
	end

	local statements = getBestStatements( entity, p )
	count = math.min( count or #statements, #statements )
	if count <= 0 then
		return ar
	end

	local i = 0
	repeat
		i = i + 1
		if statements[ i ].mainsnak.snaktype == 'value' then
			if statements[ i ].mainsnak.datatype == 'quantity' then
				statements[ i ].mainsnak.datavalue.value.amount =
					statements[ i ].mainsnak.datavalue.value.amount:gsub( '^+', '' )
				statements[ i ].mainsnak.datavalue.value.unit = getUnitId(
					statements[ i ].mainsnak.datavalue.value.unit )
			end
			table.insert( ar, statements[ i ] )
		end
	until i >= #statements or #ar >= count

	return ar
end

function wu.getValue( entity, p )
	local statements = getStatements( entity, p, 1 )
	if #statements > 0 then
		catTable[ p ] = ''
		return statements[ 1 ].mainsnak.datavalue.value
	end
	return ''
end

function wu.getId( entity, p )
	local value = ''
	local statements = getStatements( entity, p, 1 )
	if #statements > 0 then
		value = statements[ 1 ].mainsnak.datavalue.value
		value = value.id or ''
		if value ~= '' then
			catTable[ p ] = ''
		end
	end
	return value
end

function wu.getValues( entity, p, count )
	local statements = getStatements( entity, p, count )
	if #statements > 0 then
		for i = 1, #statements, 1 do
			statements[ i ] = statements[ i ].mainsnak.datavalue.value
		end
		catTable[ p ] = ''
	end
	return statements
end

function wu.getIds( entity, p, count )
	local statements = getStatements( entity, p, count )
	if #statements > 0 then
		for i = #statements, 1, -1 do
			statements[ i ] = statements[ i ].mainsnak.datavalue.value.id
			if not statements[ i ] then
				table.remove( statements, i )
			end
		end
		if #statements > 0 then
			catTable[ p ] = ''
		end
	end
	return statements
end

function wu.getValuesByLang( entity, p, count, lang )
	local ar = {}
	local statements = getStatements( entity, p )
	if #statements > 0 then
		local value
		for i = 1, #statements, 1 do
			value = statements[ i ].mainsnak.datavalue.value
			if value.language and lang == value.language then
				table.insert( ar, value.text )
			end
			if count and #ar >= count then
				break
			end
		end
	end
	if #ar > 0 then
		catTable[ p ] = ''
	end
	return ar
end	

-- get values array for monolingual text
function wu.getMonolingualValues( entity, p )
	local result = {}
	local statements = getStatements( entity, p, nil )
	if #statements > 0 and statements[ 1 ].mainsnak.datatype == 'monolingualtext' then
		local hyphen, lng, value
		catTable[ p ] = ''
		for i = 1, #statements, 1 do
			value = statements[ i ].mainsnak.datavalue.value
			lng = value.language
			hyphen = lng:find( '-' )
			if hyphen then
				lng = lng:sub( 1, hyphen - 1 )
			end
			if not result[ lng ] then
				result[ lng ] = value.text
			end
		end
	end
	return result
end

function wu.getValuesByQualifier( entity, p, qualifierP, defaultId )
	local result = {}
	if not isSet( qualifierP ) then
		return result
	elseif type( defaultId ) ~= 'string' or defaultId == '' then
		defaultId = 'unknown'
	end

	local statements = getStatements( entity, p, nil )
	if #statements > 0 then
		catTable[ p ] = ''
		local id, statement, value
		for i = 1, #statements do
			statement = statements[ i ]
			-- defaultId is used if a qualifier is missing
			id = defaultId
			value = statement.mainsnak.datavalue.value
			if statement.qualifiers and statement.qualifiers[ qualifierP ] then
				for j, qualifier in ipairs( statement.qualifiers[ qualifierP ] ) do
					if qualifier.snaktype == 'value' then
						id = qualifier.datavalue.value.id
						if id then
							catTable[ qualifierP ] = ''
							break
						end
					end
				end
			end
			result[ id ] = value
		end
	end
	return result
end

local function analyzeDatavalue( datavalue, labelFct, ... )
	local v = datavalue.value
	local t = datavalue.type
	if type( v ) == 'table' then
		-- items which can be reduced to a string
		if t == 'wikibase-entityid' then
			v = v.id
			if type( labelFct ) == 'function' then
				v = labelFct( v, ... )
			end
		elseif t == 'quantity' then
			v.amount = v.amount:gsub( '^+', '' )
			if tonumber( v.amount ) == 0 then
				v.amount = '0'
			end
			if v.unit == '1' then
				v = tonumber( v.amount ) or 1
			else
				v.unit = getUnitId( v.unit )
			end
		elseif t == 'time' then
			v.calendarmodel = getUnitId( v.calendarmodel )
			if wd.gregorianCalendar[ v.calendarmodel ] then -- is gregorian calendar?
				v = v.time
			end
		end
	end
	return v, t
end

-- for qualifiers, references
--  { item1, item2, ... } : using named qualifiers/references
--  {} : using no qualifiers/references
--  nil : using all qualifiers/references
function wu.getValuesWithQualifiers( entity, p, values, qualifiers, references,
	count, labelFct, ... )
	local array, qual
	local function toQualifierTable( tab, key, qualTab, labelFct, ... )
		local v
		if not tab[ key ] then
			tab[ key ] = {}
		end
		for i = 1, #qualTab do
			qual = qualTab[ i ]
			if qual.snaktype == 'value' then
				v, tab[ key .. '-type' ] =
					analyzeDatavalue( qual.datavalue, labelFct, ... )
				table.insert( tab[ key ], v )
			end
		end
		if #tab[ key ] == 0 then
			tab[ key ] = nil
			tab[ key .. '-type' ] = nil
		else
			catTable[ key ] = ''
		end
	end
	local function hasValue( tab, val )
		for i = 1, #tab do
			if tab[ i ] == val then
				return true
			end
		end
		return false
	end

	local results = {}
	local statements = getStatements( entity, p, count )
	if #statements == 0 then
		return results
	end
	local i, v
	if type( values ) == 'table' and #values > 0 then
		for i = #statements, 1, -1 do
			v = statements[ i ].mainsnak.datavalue.value
			if type( v ) ~= 'string' then
				v = v.id
			end
			if not isSet( v ) or not hasValue( values, v ) then
				table.remove( statements, i )
			end
		end
		if #statements == 0 then
			return results
		end
	end
	catTable[ p ] = ''

	if type( qualifiers ) == 'string' then
		qualifiers = { qualifiers }
	end
	if type( references ) == 'string' then
		references = { references }
	end

	local key, reference, statement
	for i = 1, #statements do
		statement = statements[ i ]
		array = { value = analyzeDatavalue( statement.mainsnak.datavalue, labelFct, ... ),
			[ 'value-type' ] = statement.mainsnak.datavalue.type }

		if statement.qualifiers then
			if not qualifiers then -- all qualifier properties
				for key, qualTab in pairs( statement.qualifiers ) do
					toQualifierTable( array, key, qualTab, labelFct, ... )
				end
			else -- table of selected qualifier properties
				for j = 1, #qualifiers do
					key = qualifiers[ j ]
					if statement.qualifiers[ key ] then
						toQualifierTable( array, key, statement.qualifiers[ key ], labelFct, ... )
					end
				end
			end
		end

		array.references = {}
		if statement.references then
			for k = 1, #statement.references do
				reference = statement.references[ k ]
				if reference and reference.snaks then
					table.insert( array.references, {} )
					if not references then -- all references
						for key, refTab in pairs( reference.snaks ) do
							toQualifierTable( array.references[ #array.references ],
								key, refTab )
						end
					else -- table of selected references
						for j = 1, #references do
							key = references[ j ]
							if reference.snaks[ key ] then
								toQualifierTable( array.references[ #array.references ],
									key, reference.snaks[ key ] )
							end
						end
					end
				end
			end
		end

		table.insert( results, array )
	end

	-- clustering statements with identical value
	local helper = {}
	local sort1 = 0
	local mult = false
	local result
	for i = 1, #results do
		result = results[ i ]
		if helper[ result.value ] then
			helper[ result.value ].sort2 = helper[ result.value ].sort2 + 1
			mult = true
		else
			sort1 = sort1 + 1
			helper[ result.value ] = { sort1 = sort1, sort2 = 1 }
		end
		result.sort1 = helper[ result.value ].sort1
		result.sort2 = helper[ result.value ].sort2
	end
	if sort1 > 1 and mult and #results > 2 then
		table.sort( results,
			function( a, b )
				return a.sort1 < b.sort1 or 
					( a.sort1 == b.sort1 and a.sort2 < b.sort2 )
			end
		)
	end

	return results
end

-- extract date from time
function wu.getDateFromTime( t )
	local model = '' -- is Gregorian
	if type( t ) == 'table' then
		model = t.calendarmodel
		t = t.time
	end
	t = t:gsub( '^[%+%-]([-%d]*)T.*$', '%1' )
	return t, model
end

-- get lastEdit from reference retrieve date
function wu.getLastedit( lastEdit, statements )
	local isBoolean = type( lastEdit ) == 'boolean'
	if isBoolean and lastEdit == false then
		return lastEdit
	end
	local le = ''
	for i, statement in ipairs( statements ) do
		if statement.references then
			for j, reference in ipairs( statement.references ) do
				if reference[ wd.retrieved ] then
					for k, retrieved in ipairs( reference[ wd.retrieved ] ) do
						retrieved = wu.getDateFromTime( retrieved )
						if retrieved > le then
							le = retrieved
						end
					end
				end
			end
		end
	end
	if isBoolean then
		return ( le ~= '' ) and le or lastEdit
	else
		return ( le > lastEdit ) and le or lastEdit
	end
end

-- maintenance utilities
function wu.getCategories( formatStr )
	if not isSet( formatStr ) then
		formatStr = '[[Category:%s]]'
	end

	catTable.P0 = nil
	local result = ''
	for key, value in pairs( catTable ) do
		result = result .. formatStr:format( key )
	end
	return result
end

return wu