[Library] Top words filtering in Filter library (#8)
This PR contains the work done to implement the `TopWordsUseCase` use case in the `Filter` library, to obtain the top words out of a given groups of word counts. Reviewed-on: #8 Co-authored-by: Javier Cicchelli <javier@rock-n-code.com> Co-committed-by: Javier Cicchelli <javier@rock-n-code.com>
This commit is contained in:
parent
72a8e77fc7
commit
26c2c0c581
24
Libraries/Filter/Kit/Sources/Models/Word.swift
Normal file
24
Libraries/Filter/Kit/Sources/Models/Word.swift
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
//
|
||||||
|
// Word.swift
|
||||||
|
// ReviewsFilterKit
|
||||||
|
//
|
||||||
|
// Created by Javier Cicchelli on 18/03/2024.
|
||||||
|
// Copyright © 2024 Röck+Cöde VoF. All rights reserved.
|
||||||
|
//
|
||||||
|
|
||||||
|
public struct Word: Equatable {
|
||||||
|
|
||||||
|
// MARK: Constants
|
||||||
|
public let term: String
|
||||||
|
public let token: String
|
||||||
|
|
||||||
|
// MARK: Initialisers
|
||||||
|
public init(
|
||||||
|
term: String,
|
||||||
|
token: String
|
||||||
|
) {
|
||||||
|
self.term = term
|
||||||
|
self.token = token
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
24
Libraries/Filter/Kit/Sources/Models/WordCount.swift
Normal file
24
Libraries/Filter/Kit/Sources/Models/WordCount.swift
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
//
|
||||||
|
// WordCount.swift
|
||||||
|
// ReviewsFilterKit
|
||||||
|
//
|
||||||
|
// Created by Javier Cicchelli on 18/03/2024.
|
||||||
|
// Copyright © 2024 Röck+Cöde VoF. All rights reserved.
|
||||||
|
//
|
||||||
|
|
||||||
|
public struct WordCount: Equatable {
|
||||||
|
|
||||||
|
// MARK: Constants
|
||||||
|
public let count: Int
|
||||||
|
public let word: Word
|
||||||
|
|
||||||
|
// MARK: Initialisers
|
||||||
|
public init(
|
||||||
|
word: Word,
|
||||||
|
count: Int
|
||||||
|
) {
|
||||||
|
self.count = count
|
||||||
|
self.word = word
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -1,18 +1,19 @@
|
|||||||
//
|
//
|
||||||
// FilterWordsUseCase.swift
|
// FilterWordsUseCase.swift
|
||||||
// ReviewsParserKit
|
// ReviewsFilterKit
|
||||||
//
|
//
|
||||||
// Created by Javier Cicchelli on 17/03/2024.
|
// Created by Javier Cicchelli on 17/03/2024.
|
||||||
// Copyright © 2024 Röck+Cöde VoF. All rights reserved.
|
// Copyright © 2024 Röck+Cöde VoF. All rights reserved.
|
||||||
//
|
//
|
||||||
|
|
||||||
import Foundation
|
import Foundation
|
||||||
|
import ReviewsFoundationKit
|
||||||
|
|
||||||
public struct FilterWordsUseCase {
|
public struct FilterWordsUseCase {
|
||||||
|
|
||||||
// MARK: Type aliases
|
// MARK: Type aliases
|
||||||
public typealias Input = String
|
public typealias Input = String
|
||||||
public typealias Output = [Tuple]
|
public typealias Output = [WordCount]
|
||||||
|
|
||||||
// MARK: Initialisers
|
// MARK: Initialisers
|
||||||
public init() {}
|
public init() {}
|
||||||
@ -32,42 +33,45 @@ public struct FilterWordsUseCase {
|
|||||||
|
|
||||||
guard !matches.isEmpty else { return [] }
|
guard !matches.isEmpty else { return [] }
|
||||||
|
|
||||||
let wordsAll = matches.compactMap {
|
let terms = matches.compactMap {
|
||||||
Range($0.range, in: input).map { String(input[$0]) }
|
Range($0.range, in: input).map { String(input[$0]) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let wordsAll = terms
|
||||||
|
.map { ($0, $0.folding(options: .caseInsensitive, locale: nil)) }
|
||||||
|
.map(Word.init)
|
||||||
|
|
||||||
|
let wordsCount = wordsAll.map { word -> WordCount in
|
||||||
|
.init(
|
||||||
|
word: word,
|
||||||
|
count: terms.filter { $0 == word.term }.count
|
||||||
|
)
|
||||||
|
}.reduce(into: [WordCount]()) { partialResult, wordCount in
|
||||||
|
guard partialResult.filter({ $0 == wordCount }).isEmpty else { return }
|
||||||
|
|
||||||
|
partialResult.append(wordCount)
|
||||||
|
}
|
||||||
|
|
||||||
let wordsUnique = wordsAll
|
let wordsUnique = Dictionary(grouping: wordsCount) { wordCount -> String in
|
||||||
.reduce(into: [String]()) { partialResult, word in
|
|
||||||
guard partialResult
|
|
||||||
.filter({ compareWords(word, $0) })
|
|
||||||
.isEmpty
|
|
||||||
else { return }
|
|
||||||
|
|
||||||
partialResult.append(
|
|
||||||
word.folding(
|
|
||||||
options: [
|
|
||||||
.caseInsensitive,
|
|
||||||
.diacriticInsensitive
|
|
||||||
],
|
|
||||||
locale: .current
|
|
||||||
)
|
|
||||||
.capitalized
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
return Dictionary(grouping: wordsUnique) { word in
|
|
||||||
wordsAll
|
wordsAll
|
||||||
.filter { compareWords(word, $0) }
|
.filter { $0.token == wordCount.word.token }
|
||||||
.count
|
.map(\.token)
|
||||||
|
.first ?? .empty
|
||||||
|
}.map { (key, values) -> WordCount in
|
||||||
|
.init(
|
||||||
|
word: .init(
|
||||||
|
term: key.capitalized,
|
||||||
|
token: key
|
||||||
|
),
|
||||||
|
count: values
|
||||||
|
.map(\.count)
|
||||||
|
.reduce(0, +)
|
||||||
|
)
|
||||||
}
|
}
|
||||||
.flatMap { (key, values) -> Output in
|
|
||||||
values.map {
|
return wordsUnique.sorted {
|
||||||
.init(word: $0, count: key)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
.sorted {
|
|
||||||
guard $0.count != $1.count else {
|
guard $0.count != $1.count else {
|
||||||
return $0.word < $1.word
|
return $0.word.token < $1.word.token
|
||||||
}
|
}
|
||||||
|
|
||||||
return $0.count > $1.count
|
return $0.count > $1.count
|
||||||
@ -92,26 +96,6 @@ private extension FilterWordsUseCase {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// MARK: - Structs
|
|
||||||
extension FilterWordsUseCase {
|
|
||||||
public struct Tuple: Equatable {
|
|
||||||
|
|
||||||
// MARK: Constants
|
|
||||||
let word: String
|
|
||||||
let count: Int
|
|
||||||
|
|
||||||
// MARK: Initialisers
|
|
||||||
public init(
|
|
||||||
word: String,
|
|
||||||
count: Int
|
|
||||||
) {
|
|
||||||
self.word = word
|
|
||||||
self.count = count
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// MARK: - String+Constants
|
// MARK: - String+Constants
|
||||||
private extension String {
|
private extension String {
|
||||||
enum Pattern {
|
enum Pattern {
|
||||||
|
58
Libraries/Filter/Kit/Sources/Use Cases/TopWordsUseCase.swift
Normal file
58
Libraries/Filter/Kit/Sources/Use Cases/TopWordsUseCase.swift
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
//
|
||||||
|
// TopWordsUseCase.swift
|
||||||
|
// ReviewsParserKit
|
||||||
|
//
|
||||||
|
// Created by Javier Cicchelli on 18/03/2024.
|
||||||
|
// Copyright © 2024 Röck+Cöde VoF. All rights reserved.
|
||||||
|
//
|
||||||
|
|
||||||
|
import Foundation
|
||||||
|
|
||||||
|
public struct TopWordsUseCase {
|
||||||
|
|
||||||
|
// MARK: Type aliases
|
||||||
|
public typealias Input = [[WordCount]]
|
||||||
|
public typealias Output = [WordCount]
|
||||||
|
|
||||||
|
// MARK: Initialisers
|
||||||
|
public init() {}
|
||||||
|
|
||||||
|
// MARK: Functions
|
||||||
|
public func callAsFunction(_ input: Input) -> Output {
|
||||||
|
let wordCounts = input
|
||||||
|
.reduce([WordCount](), +)
|
||||||
|
.reduce(into: [WordCount]()) { partialResult, wordCount in
|
||||||
|
guard
|
||||||
|
let wordCountFound = partialResult.first(where: { $0.word == wordCount.word })
|
||||||
|
else {
|
||||||
|
partialResult.append(wordCount)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
partialResult.removeAll { $0 == wordCountFound }
|
||||||
|
partialResult.append(.init(
|
||||||
|
word: wordCount.word,
|
||||||
|
count: wordCount.count + wordCountFound.count
|
||||||
|
))
|
||||||
|
}
|
||||||
|
.sorted {
|
||||||
|
guard $0.count != $1.count else {
|
||||||
|
return $0.word.token < $1.word.token
|
||||||
|
}
|
||||||
|
|
||||||
|
return $0.count > $1.count
|
||||||
|
}
|
||||||
|
|
||||||
|
return wordCounts.count >= .Max.words
|
||||||
|
? Array(wordCounts[0...2])
|
||||||
|
: wordCounts
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Int+Constants
|
||||||
|
private extension Int {
|
||||||
|
enum Max {
|
||||||
|
static let words: Int = 3
|
||||||
|
}
|
||||||
|
}
|
@ -33,12 +33,12 @@ final class FilterWordsUseCaseTests: XCTestCase {
|
|||||||
// THEN
|
// THEN
|
||||||
XCTAssertFalse(output.isEmpty)
|
XCTAssertFalse(output.isEmpty)
|
||||||
XCTAssertEqual(output, [
|
XCTAssertEqual(output, [
|
||||||
.init(word: "Eight", count: 1),
|
.init(word: .init(term: "Eight", token: "eight"), count: 1),
|
||||||
.init(word: "Five", count: 1),
|
.init(word: .init(term: "Five", token: "five"), count: 1),
|
||||||
.init(word: "Four", count: 1),
|
.init(word: .init(term: "Four", token: "four"), count: 1),
|
||||||
.init(word: "Nine", count: 1),
|
.init(word: .init(term: "Nine", token: "nine"), count: 1),
|
||||||
.init(word: "Seven", count: 1),
|
.init(word: .init(term: "Seven", token: "seven"), count: 1),
|
||||||
.init(word: "Three", count: 1),
|
.init(word: .init(term: "Three", token: "three"), count: 1),
|
||||||
])
|
])
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -52,18 +52,18 @@ final class FilterWordsUseCaseTests: XCTestCase {
|
|||||||
// THEN
|
// THEN
|
||||||
XCTAssertFalse(output.isEmpty)
|
XCTAssertFalse(output.isEmpty)
|
||||||
XCTAssertEqual(output, [
|
XCTAssertEqual(output, [
|
||||||
.init(word: "Five", count: 2),
|
.init(word: .init(term: "Five", token: "five"), count: 2),
|
||||||
.init(word: "Nine", count: 2),
|
.init(word: .init(term: "Nine", token: "nine"), count: 2),
|
||||||
.init(word: "Three", count: 2),
|
.init(word: .init(term: "Three", token: "three"), count: 2),
|
||||||
.init(word: "Eight", count: 1),
|
.init(word: .init(term: "Eight", token: "eight"), count: 1),
|
||||||
.init(word: "Four", count: 1),
|
.init(word: .init(term: "Four", token: "four"), count: 1),
|
||||||
.init(word: "Seven", count: 1),
|
.init(word: .init(term: "Seven", token: "seven"), count: 1),
|
||||||
])
|
])
|
||||||
}
|
}
|
||||||
|
|
||||||
func testCallAsFunction_withInput_hasSomeRepeatedCaseSensitiveLongWords() throws {
|
func testCallAsFunction_withInput_hasSomeRepeatedCaseSensitiveLongWords() throws {
|
||||||
// GIVEN
|
// GIVEN
|
||||||
input = "one two three Three four Five five six seven eight nine nine ten"
|
input = "one two three ThReE four FIVE five six seven eight NiNe nInE ten"
|
||||||
|
|
||||||
// WHEN
|
// WHEN
|
||||||
output = try sut(input)
|
output = try sut(input)
|
||||||
@ -71,18 +71,18 @@ final class FilterWordsUseCaseTests: XCTestCase {
|
|||||||
// THEN
|
// THEN
|
||||||
XCTAssertFalse(output.isEmpty)
|
XCTAssertFalse(output.isEmpty)
|
||||||
XCTAssertEqual(output, [
|
XCTAssertEqual(output, [
|
||||||
.init(word: "Five", count: 2),
|
.init(word: .init(term: "Five", token: "five"), count: 2),
|
||||||
.init(word: "Nine", count: 2),
|
.init(word: .init(term: "Nine", token: "nine"), count: 2),
|
||||||
.init(word: "Three", count: 2),
|
.init(word: .init(term: "Three", token: "three"), count: 2),
|
||||||
.init(word: "Eight", count: 1),
|
.init(word: .init(term: "Eight", token: "eight"), count: 1),
|
||||||
.init(word: "Four", count: 1),
|
.init(word: .init(term: "Four", token: "four"), count: 1),
|
||||||
.init(word: "Seven", count: 1),
|
.init(word: .init(term: "Seven", token: "seven"), count: 1),
|
||||||
])
|
])
|
||||||
}
|
}
|
||||||
|
|
||||||
func testCallAsFunction_withInput_hasSomeRepeatedDiacriticSensitiveLongWords() throws {
|
func testCallAsFunction_withInput_hasSomeDiacriticSensitiveLongWords() throws {
|
||||||
// GIVEN
|
// GIVEN
|
||||||
input = "one two thrèé Three four Fíve fïve six Šëvêń seven eight niñe nine ten"
|
input = "one two three Thrèé four FiVe FIVE six Šëvêń seven eight niñe nine ten"
|
||||||
|
|
||||||
// WHEN
|
// WHEN
|
||||||
output = try sut(input)
|
output = try sut(input)
|
||||||
@ -90,12 +90,15 @@ final class FilterWordsUseCaseTests: XCTestCase {
|
|||||||
// THEN
|
// THEN
|
||||||
XCTAssertFalse(output.isEmpty)
|
XCTAssertFalse(output.isEmpty)
|
||||||
XCTAssertEqual(output, [
|
XCTAssertEqual(output, [
|
||||||
.init(word: "Five", count: 2),
|
.init(word: .init(term: "Five", token: "five"), count: 2),
|
||||||
.init(word: "Nine", count: 2),
|
.init(word: .init(term: "Eight", token: "eight"), count: 1),
|
||||||
.init(word: "Seven", count: 2),
|
.init(word: .init(term: "Four", token: "four"), count: 1),
|
||||||
.init(word: "Three", count: 2),
|
.init(word: .init(term: "Nine", token: "nine"), count: 1),
|
||||||
.init(word: "Eight", count: 1),
|
.init(word: .init(term: "Niñe", token: "niñe"), count: 1),
|
||||||
.init(word: "Four", count: 1),
|
.init(word: .init(term: "Seven", token: "seven"), count: 1),
|
||||||
|
.init(word: .init(term: "Three", token: "three"), count: 1),
|
||||||
|
.init(word: .init(term: "Thrèé", token: "thrèé"), count: 1),
|
||||||
|
.init(word: .init(term: "Šëvêń", token: "šëvêń"), count: 1),
|
||||||
])
|
])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
110
Libraries/Filter/Test/Tests/Use Cases/TopWordsUseCaseTests.swift
Normal file
110
Libraries/Filter/Test/Tests/Use Cases/TopWordsUseCaseTests.swift
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
//
|
||||||
|
// TopWordsUseCaseTests.swift
|
||||||
|
// ReviewsFilterTest
|
||||||
|
//
|
||||||
|
// Created by Javier Cicchelli on 18/03/2024.
|
||||||
|
// Copyright © 2024 Röck+Cöde VoF. All rights reserved.
|
||||||
|
//
|
||||||
|
|
||||||
|
import ReviewsFilterKit
|
||||||
|
import XCTest
|
||||||
|
|
||||||
|
final class TopWordsUseCaseTests: XCTestCase {
|
||||||
|
|
||||||
|
// MARK: Properties
|
||||||
|
private var input: TopWordsUseCase.Input!
|
||||||
|
private var output: TopWordsUseCase.Output!
|
||||||
|
private var sut: TopWordsUseCase!
|
||||||
|
|
||||||
|
// MARK: Setup
|
||||||
|
override func setUp() async throws {
|
||||||
|
sut = .init()
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: Functions
|
||||||
|
func testCallAsFunction_withInput_hasDifferentWords() {
|
||||||
|
// GIVEN
|
||||||
|
input = [[
|
||||||
|
.init(word: .init(term: "One", token: "one"), count: 3)
|
||||||
|
], [
|
||||||
|
.init(word: .init(term: "Two", token: "two"), count: 1)
|
||||||
|
], [
|
||||||
|
.init(word: .init(term: "Three", token: "three"), count: 5),
|
||||||
|
.init(word: .init(term: "Four", token: "four"), count: 2),
|
||||||
|
]]
|
||||||
|
|
||||||
|
// WHEN
|
||||||
|
output = sut(input)
|
||||||
|
|
||||||
|
// THEN
|
||||||
|
XCTAssertFalse(output.isEmpty)
|
||||||
|
XCTAssertEqual(output.count, 3)
|
||||||
|
XCTAssertEqual(output, [
|
||||||
|
.init(word: .init(term: "Three", token: "three"), count: 5),
|
||||||
|
.init(word: .init(term: "One", token: "one"), count: 3),
|
||||||
|
.init(word: .init(term: "Four", token: "four"), count: 2)
|
||||||
|
])
|
||||||
|
}
|
||||||
|
|
||||||
|
func testCallAsFunction_withInput_hasSomeCommonWords() {
|
||||||
|
// GIVEN
|
||||||
|
input = [[
|
||||||
|
.init(word: .init(term: "One", token: "one"), count: 3),
|
||||||
|
.init(word: .init(term: "Two", token: "two"), count: 7),
|
||||||
|
.init(word: .init(term: "Three", token: "three"), count: 1),
|
||||||
|
], [
|
||||||
|
.init(word: .init(term: "Four", token: "four"), count: 5),
|
||||||
|
.init(word: .init(term: "Two", token: "two"), count: 2),
|
||||||
|
.init(word: .init(term: "Five", token: "five"), count: 6),
|
||||||
|
], [
|
||||||
|
.init(word: .init(term: "Six", token: "six"), count: 9),
|
||||||
|
.init(word: .init(term: "Four", token: "four"), count: 4),
|
||||||
|
.init(word: .init(term: "Two", token: "two"), count: 1),
|
||||||
|
]]
|
||||||
|
|
||||||
|
// WHEN
|
||||||
|
output = sut(input)
|
||||||
|
|
||||||
|
// THEN
|
||||||
|
XCTAssertFalse(output.isEmpty)
|
||||||
|
XCTAssertEqual(output.count, 3)
|
||||||
|
XCTAssertEqual(output, [
|
||||||
|
.init(word: .init(term: "Two", token: "two"), count: 10),
|
||||||
|
.init(word: .init(term: "Four", token: "four"), count: 9),
|
||||||
|
.init(word: .init(term: "Six", token: "six"), count: 9)
|
||||||
|
])
|
||||||
|
}
|
||||||
|
|
||||||
|
func testCallAsFunction_withInput_hasFewWords() {
|
||||||
|
// GIVEN
|
||||||
|
input = [[
|
||||||
|
.init(word: .init(term: "One", token: "one"), count: 3),
|
||||||
|
], [
|
||||||
|
.init(word: .init(term: "Four", token: "four"), count: 5),
|
||||||
|
]]
|
||||||
|
|
||||||
|
// WHEN
|
||||||
|
output = sut(input)
|
||||||
|
|
||||||
|
// THEN
|
||||||
|
XCTAssertFalse(output.isEmpty)
|
||||||
|
XCTAssertEqual(output.count, 2)
|
||||||
|
XCTAssertEqual(output, [
|
||||||
|
.init(word: .init(term: "Four", token: "four"), count: 5),
|
||||||
|
.init(word: .init(term: "One", token: "one"), count: 3)
|
||||||
|
])
|
||||||
|
}
|
||||||
|
|
||||||
|
func testCallAsFunction_withEmptyInput() {
|
||||||
|
// GIVEN
|
||||||
|
input = []
|
||||||
|
|
||||||
|
// WHEN
|
||||||
|
output = sut(input)
|
||||||
|
|
||||||
|
// THEN
|
||||||
|
XCTAssertTrue(output.isEmpty)
|
||||||
|
XCTAssertEqual(output, [])
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user