[Library] Top words filtering in Filter library #8

Merged
javier merged 3 commits from library/filter/max into main 2024-03-18 15:55:10 +00:00
6 changed files with 282 additions and 79 deletions

View File

@ -0,0 +1,24 @@
//
// Word.swift
// ReviewsFilterKit
//
// Created by Javier Cicchelli on 18/03/2024.
// Copyright © 2024 Röck+Cöde VoF. All rights reserved.
//
public struct Word: Equatable {
// MARK: Constants
public let term: String
public let token: String
// MARK: Initialisers
public init(
term: String,
token: String
) {
self.term = term
self.token = token
}
}

View File

@ -0,0 +1,24 @@
//
// WordCount.swift
// ReviewsFilterKit
//
// Created by Javier Cicchelli on 18/03/2024.
// Copyright © 2024 Röck+Cöde VoF. All rights reserved.
//
public struct WordCount: Equatable {
// MARK: Constants
public let count: Int
public let word: Word
// MARK: Initialisers
public init(
word: Word,
count: Int
) {
self.count = count
self.word = word
}
}

View File

@ -1,18 +1,19 @@
//
// FilterWordsUseCase.swift
// ReviewsParserKit
// ReviewsFilterKit
//
// Created by Javier Cicchelli on 17/03/2024.
// Copyright © 2024 Röck+Cöde VoF. All rights reserved.
//
import Foundation
import ReviewsFoundationKit
public struct FilterWordsUseCase {
// MARK: Type aliases
public typealias Input = String
public typealias Output = [Tuple]
public typealias Output = [WordCount]
// MARK: Initialisers
public init() {}
@ -32,42 +33,45 @@ public struct FilterWordsUseCase {
guard !matches.isEmpty else { return [] }
let wordsAll = matches.compactMap {
let terms = matches.compactMap {
Range($0.range, in: input).map { String(input[$0]) }
}
let wordsAll = terms
.map { ($0, $0.folding(options: .caseInsensitive, locale: nil)) }
.map(Word.init)
let wordsCount = wordsAll.map { word -> WordCount in
.init(
word: word,
count: terms.filter { $0 == word.term }.count
)
}.reduce(into: [WordCount]()) { partialResult, wordCount in
guard partialResult.filter({ $0 == wordCount }).isEmpty else { return }
partialResult.append(wordCount)
}
let wordsUnique = wordsAll
.reduce(into: [String]()) { partialResult, word in
guard partialResult
.filter({ compareWords(word, $0) })
.isEmpty
else { return }
partialResult.append(
word.folding(
options: [
.caseInsensitive,
.diacriticInsensitive
],
locale: .current
)
.capitalized
)
}
return Dictionary(grouping: wordsUnique) { word in
let wordsUnique = Dictionary(grouping: wordsCount) { wordCount -> String in
wordsAll
.filter { compareWords(word, $0) }
.count
.filter { $0.token == wordCount.word.token }
.map(\.token)
.first ?? .empty
}.map { (key, values) -> WordCount in
.init(
word: .init(
term: key.capitalized,
token: key
),
count: values
.map(\.count)
.reduce(0, +)
)
}
.flatMap { (key, values) -> Output in
values.map {
.init(word: $0, count: key)
}
}
.sorted {
return wordsUnique.sorted {
guard $0.count != $1.count else {
return $0.word < $1.word
return $0.word.token < $1.word.token
}
return $0.count > $1.count
@ -92,26 +96,6 @@ private extension FilterWordsUseCase {
}
// MARK: - Structs
extension FilterWordsUseCase {
public struct Tuple: Equatable {
// MARK: Constants
let word: String
let count: Int
// MARK: Initialisers
public init(
word: String,
count: Int
) {
self.word = word
self.count = count
}
}
}
// MARK: - String+Constants
private extension String {
enum Pattern {

View File

@ -0,0 +1,58 @@
//
// TopWordsUseCase.swift
// ReviewsParserKit
//
// Created by Javier Cicchelli on 18/03/2024.
// Copyright © 2024 Röck+Cöde VoF. All rights reserved.
//
import Foundation
public struct TopWordsUseCase {
// MARK: Type aliases
public typealias Input = [[WordCount]]
public typealias Output = [WordCount]
// MARK: Initialisers
public init() {}
// MARK: Functions
public func callAsFunction(_ input: Input) -> Output {
let wordCounts = input
.reduce([WordCount](), +)
.reduce(into: [WordCount]()) { partialResult, wordCount in
guard
let wordCountFound = partialResult.first(where: { $0.word == wordCount.word })
else {
partialResult.append(wordCount)
return
}
partialResult.removeAll { $0 == wordCountFound }
partialResult.append(.init(
word: wordCount.word,
count: wordCount.count + wordCountFound.count
))
}
.sorted {
guard $0.count != $1.count else {
return $0.word.token < $1.word.token
}
return $0.count > $1.count
}
return wordCounts.count >= .Max.words
? Array(wordCounts[0...2])
: wordCounts
}
}
// MARK: - Int+Constants
private extension Int {
enum Max {
static let words: Int = 3
}
}

View File

@ -33,12 +33,12 @@ final class FilterWordsUseCaseTests: XCTestCase {
// THEN
XCTAssertFalse(output.isEmpty)
XCTAssertEqual(output, [
.init(word: "Eight", count: 1),
.init(word: "Five", count: 1),
.init(word: "Four", count: 1),
.init(word: "Nine", count: 1),
.init(word: "Seven", count: 1),
.init(word: "Three", count: 1),
.init(word: .init(term: "Eight", token: "eight"), count: 1),
.init(word: .init(term: "Five", token: "five"), count: 1),
.init(word: .init(term: "Four", token: "four"), count: 1),
.init(word: .init(term: "Nine", token: "nine"), count: 1),
.init(word: .init(term: "Seven", token: "seven"), count: 1),
.init(word: .init(term: "Three", token: "three"), count: 1),
])
}
@ -52,18 +52,18 @@ final class FilterWordsUseCaseTests: XCTestCase {
// THEN
XCTAssertFalse(output.isEmpty)
XCTAssertEqual(output, [
.init(word: "Five", count: 2),
.init(word: "Nine", count: 2),
.init(word: "Three", count: 2),
.init(word: "Eight", count: 1),
.init(word: "Four", count: 1),
.init(word: "Seven", count: 1),
.init(word: .init(term: "Five", token: "five"), count: 2),
.init(word: .init(term: "Nine", token: "nine"), count: 2),
.init(word: .init(term: "Three", token: "three"), count: 2),
.init(word: .init(term: "Eight", token: "eight"), count: 1),
.init(word: .init(term: "Four", token: "four"), count: 1),
.init(word: .init(term: "Seven", token: "seven"), count: 1),
])
}
func testCallAsFunction_withInput_hasSomeRepeatedCaseSensitiveLongWords() throws {
// GIVEN
input = "one two three Three four Five five six seven eight nine nine ten"
input = "one two three ThReE four FIVE five six seven eight NiNe nInE ten"
// WHEN
output = try sut(input)
@ -71,18 +71,18 @@ final class FilterWordsUseCaseTests: XCTestCase {
// THEN
XCTAssertFalse(output.isEmpty)
XCTAssertEqual(output, [
.init(word: "Five", count: 2),
.init(word: "Nine", count: 2),
.init(word: "Three", count: 2),
.init(word: "Eight", count: 1),
.init(word: "Four", count: 1),
.init(word: "Seven", count: 1),
.init(word: .init(term: "Five", token: "five"), count: 2),
.init(word: .init(term: "Nine", token: "nine"), count: 2),
.init(word: .init(term: "Three", token: "three"), count: 2),
.init(word: .init(term: "Eight", token: "eight"), count: 1),
.init(word: .init(term: "Four", token: "four"), count: 1),
.init(word: .init(term: "Seven", token: "seven"), count: 1),
])
}
func testCallAsFunction_withInput_hasSomeRepeatedDiacriticSensitiveLongWords() throws {
func testCallAsFunction_withInput_hasSomeDiacriticSensitiveLongWords() throws {
// GIVEN
input = "one two thrèé Three four Fíve fïve six Šëvêń seven eight niñe nine ten"
input = "one two three Thrèé four FiVe FIVE six Šëvêń seven eight niñe nine ten"
// WHEN
output = try sut(input)
@ -90,12 +90,15 @@ final class FilterWordsUseCaseTests: XCTestCase {
// THEN
XCTAssertFalse(output.isEmpty)
XCTAssertEqual(output, [
.init(word: "Five", count: 2),
.init(word: "Nine", count: 2),
.init(word: "Seven", count: 2),
.init(word: "Three", count: 2),
.init(word: "Eight", count: 1),
.init(word: "Four", count: 1),
.init(word: .init(term: "Five", token: "five"), count: 2),
.init(word: .init(term: "Eight", token: "eight"), count: 1),
.init(word: .init(term: "Four", token: "four"), count: 1),
.init(word: .init(term: "Nine", token: "nine"), count: 1),
.init(word: .init(term: "Niñe", token: "niñe"), count: 1),
.init(word: .init(term: "Seven", token: "seven"), count: 1),
.init(word: .init(term: "Three", token: "three"), count: 1),
.init(word: .init(term: "Thrèé", token: "thrèé"), count: 1),
.init(word: .init(term: "Šëvêń", token: "šëvêń"), count: 1),
])
}

View File

@ -0,0 +1,110 @@
//
// TopWordsUseCaseTests.swift
// ReviewsFilterTest
//
// Created by Javier Cicchelli on 18/03/2024.
// Copyright © 2024 Röck+Cöde VoF. All rights reserved.
//
import ReviewsFilterKit
import XCTest
final class TopWordsUseCaseTests: XCTestCase {
// MARK: Properties
private var input: TopWordsUseCase.Input!
private var output: TopWordsUseCase.Output!
private var sut: TopWordsUseCase!
// MARK: Setup
override func setUp() async throws {
sut = .init()
}
// MARK: Functions
func testCallAsFunction_withInput_hasDifferentWords() {
// GIVEN
input = [[
.init(word: .init(term: "One", token: "one"), count: 3)
], [
.init(word: .init(term: "Two", token: "two"), count: 1)
], [
.init(word: .init(term: "Three", token: "three"), count: 5),
.init(word: .init(term: "Four", token: "four"), count: 2),
]]
// WHEN
output = sut(input)
// THEN
XCTAssertFalse(output.isEmpty)
XCTAssertEqual(output.count, 3)
XCTAssertEqual(output, [
.init(word: .init(term: "Three", token: "three"), count: 5),
.init(word: .init(term: "One", token: "one"), count: 3),
.init(word: .init(term: "Four", token: "four"), count: 2)
])
}
func testCallAsFunction_withInput_hasSomeCommonWords() {
// GIVEN
input = [[
.init(word: .init(term: "One", token: "one"), count: 3),
.init(word: .init(term: "Two", token: "two"), count: 7),
.init(word: .init(term: "Three", token: "three"), count: 1),
], [
.init(word: .init(term: "Four", token: "four"), count: 5),
.init(word: .init(term: "Two", token: "two"), count: 2),
.init(word: .init(term: "Five", token: "five"), count: 6),
], [
.init(word: .init(term: "Six", token: "six"), count: 9),
.init(word: .init(term: "Four", token: "four"), count: 4),
.init(word: .init(term: "Two", token: "two"), count: 1),
]]
// WHEN
output = sut(input)
// THEN
XCTAssertFalse(output.isEmpty)
XCTAssertEqual(output.count, 3)
XCTAssertEqual(output, [
.init(word: .init(term: "Two", token: "two"), count: 10),
.init(word: .init(term: "Four", token: "four"), count: 9),
.init(word: .init(term: "Six", token: "six"), count: 9)
])
}
func testCallAsFunction_withInput_hasFewWords() {
// GIVEN
input = [[
.init(word: .init(term: "One", token: "one"), count: 3),
], [
.init(word: .init(term: "Four", token: "four"), count: 5),
]]
// WHEN
output = sut(input)
// THEN
XCTAssertFalse(output.isEmpty)
XCTAssertEqual(output.count, 2)
XCTAssertEqual(output, [
.init(word: .init(term: "Four", token: "four"), count: 5),
.init(word: .init(term: "One", token: "one"), count: 3)
])
}
func testCallAsFunction_withEmptyInput() {
// GIVEN
input = []
// WHEN
output = sut(input)
// THEN
XCTAssertTrue(output.isEmpty)
XCTAssertEqual(output, [])
}
}