78 lines
2.5 KiB
TypeScript
78 lines
2.5 KiB
TypeScript
import { Injectable } from '@nestjs/common';
|
|
import { InjectRepository } from '@nestjs/typeorm';
|
|
import { Repository } from 'typeorm';
|
|
import { CatalogItemEntity } from '../entities/catalog-item.entity';
|
|
import { ClassificationTagEntity } from '../entities/classification-tag.entity';
|
|
|
|
export type ClassificationResult = {
|
|
publisher?: string;
|
|
format?: string[];
|
|
territory?: string[];
|
|
topics?: string[];
|
|
};
|
|
|
|
@Injectable()
|
|
export class ClassifierService {
|
|
constructor(
|
|
@InjectRepository(ClassificationTagEntity)
|
|
private readonly tags: Repository<ClassificationTagEntity>,
|
|
) {}
|
|
|
|
async classify(item: CatalogItemEntity): Promise<ClassificationResult> {
|
|
const res: ClassificationResult = {
|
|
publisher: item.publisher || undefined,
|
|
format: this.splitFormats(item.format),
|
|
};
|
|
|
|
const text = `${item.title || ''} ${item.description || ''}`.toLowerCase();
|
|
|
|
const territory: string[] = [];
|
|
const topics: string[] = [];
|
|
|
|
if (text.includes('madrid')) territory.push('Madrid');
|
|
if (text.includes('andaluc')) territory.push('Andalucía');
|
|
if (text.includes('catalu')) territory.push('Cataluña');
|
|
|
|
if (text.includes('subvenc')) topics.push('Subvenciones');
|
|
if (text.includes('licit')) topics.push('Licitaciones');
|
|
if (text.includes('contrat')) topics.push('Contratación');
|
|
if (text.includes('presupuesto')) topics.push('Presupuestos');
|
|
|
|
res.territory = territory.length ? territory : undefined;
|
|
res.topics = topics.length ? topics : undefined;
|
|
|
|
return res;
|
|
}
|
|
|
|
async applyTags(item: CatalogItemEntity, classification: ClassificationResult) {
|
|
await this.tags.delete({ itemId: item.id });
|
|
const next: ClassificationTagEntity[] = [];
|
|
|
|
if (classification.publisher) {
|
|
next.push(this.tags.create({ itemId: item.id, type: 'publisher', value: classification.publisher }));
|
|
}
|
|
|
|
for (const format of classification.format || []) {
|
|
next.push(this.tags.create({ itemId: item.id, type: 'format', value: format }));
|
|
}
|
|
|
|
for (const topic of classification.topics || []) {
|
|
next.push(this.tags.create({ itemId: item.id, type: 'topic', value: topic }));
|
|
}
|
|
|
|
for (const territory of classification.territory || []) {
|
|
next.push(this.tags.create({ itemId: item.id, type: 'territory', value: territory }));
|
|
}
|
|
|
|
if (next.length) await this.tags.save(next);
|
|
}
|
|
|
|
private splitFormats(value?: string): string[] {
|
|
if (!value) return [];
|
|
return value
|
|
.split(',')
|
|
.map((v) => v.trim())
|
|
.filter(Boolean);
|
|
}
|
|
}
|