mirror of
https://hub.njuu.cf/TheAlgorithms/Python.git
synced 2023-10-11 13:06:12 +08:00
added idf-smooth (#2174)
* added idf-smooth * added idf-smooth * added idf-smooth
This commit is contained in:
parent
e92e433dbe
commit
a1d1a44f51
@ -83,16 +83,17 @@ the third document in the corpus.")
|
|||||||
return (len([doc for doc in docs if term in doc]), len(docs))
|
return (len([doc for doc in docs if term in doc]), len(docs))
|
||||||
|
|
||||||
|
|
||||||
def inverse_document_frequency(df: int, N: int) -> float:
|
def inverse_document_frequency(df: int, N: int, smoothing=False) -> float:
|
||||||
"""
|
"""
|
||||||
Return an integer denoting the importance
|
Return an integer denoting the importance
|
||||||
of a word. This measure of importance is
|
of a word. This measure of importance is
|
||||||
calculated by log10(N/df), where N is the
|
calculated by log10(N/df), where N is the
|
||||||
number of documents and df is
|
number of documents and df is
|
||||||
the Document Frequency.
|
the Document Frequency.
|
||||||
@params : df, the Document Frequency, and N,
|
@params : df, the Document Frequency, N,
|
||||||
the number of documents in the corpus.
|
the number of documents in the corpus and
|
||||||
@returns : log10(N/df)
|
smoothing, if True return the idf-smooth
|
||||||
|
@returns : log10(N/df) or 1+log10(N/1+df)
|
||||||
@examples :
|
@examples :
|
||||||
>>> inverse_document_frequency(3, 0)
|
>>> inverse_document_frequency(3, 0)
|
||||||
Traceback (most recent call last):
|
Traceback (most recent call last):
|
||||||
@ -104,7 +105,14 @@ def inverse_document_frequency(df: int, N: int) -> float:
|
|||||||
Traceback (most recent call last):
|
Traceback (most recent call last):
|
||||||
...
|
...
|
||||||
ZeroDivisionError: df must be > 0
|
ZeroDivisionError: df must be > 0
|
||||||
|
>>> inverse_document_frequency(0, 3,True)
|
||||||
|
1.477
|
||||||
"""
|
"""
|
||||||
|
if smoothing:
|
||||||
|
if N == 0:
|
||||||
|
raise ValueError("log10(0) is undefined.")
|
||||||
|
return round(1 + log10(N / (1 + df)), 3)
|
||||||
|
|
||||||
if df == 0:
|
if df == 0:
|
||||||
raise ZeroDivisionError("df must be > 0")
|
raise ZeroDivisionError("df must be > 0")
|
||||||
elif N == 0:
|
elif N == 0:
|
||||||
|
Loading…
Reference in New Issue
Block a user