4 files changed, 173 insertions, 0 deletions
diff --git a/python/problems/dictionaries/text/common.py b/python/problems/dictionaries/text/common.py
new file mode 100644
index 0000000..134f69b
--- /dev/null
+++ b/python/problems/dictionaries/text/common.py
@@ -0,0 +1,86 @@
+import re
+from python.util import has_token_sequence, string_almost_equal, \
+    string_contains_number, get_tokens, get_numbers, get_exception_desc
+from server.hints import Hint
+
+id = 20612
+number = 13
+visible = True
+
+solution = '''\
+import collections
+
+def following_words(txt):
+    words = txt.split()
+    freq = collections.defaultdict(list)
+    for word, next_word in zip(words, words[1:]):
+        freq[word].append(next_word)
+    return freq
+
+def freq_following_word(txt):
+    following = following_words(txt)
+    for f in following:
+        vals = collections.Counter(following[f])
+        s = sorted(vals.most_common(), key = lambda x: (-x[1], x[0]))
+        following[f] = s[0][0]
+    return following
+
+def text(word, full_text, num):
+    freq = freq_following_word(full_text)
+    words = []
+    for i in range(num):
+        words.append(word)
+        word = freq[word]
+    return ' '.join(words)
+'''
+
+hint_type = {
+    'final_hint': Hint('final_hint')
+}
+
+def test(python, code):
+    func_name = 'text'
+    tokens = get_tokens(code)
+    if not has_token_sequence(tokens, ['def', func_name]):
+        return False, [{'id' : 'no_func_name', 'args' : {'func_name' : func_name}}]
+
+    in_out = [
+        (('in', 'in in in in', 5), 'in in in in in'),
+        (('in', 'in to in ono in to smo mi', 5), 'in to in to in'),
+        (('danes', 'danes je lep dan danes sije sonce', 5),
+         'danes je lep dan danes'),
+        (('danes', 'danes je lep dan danes sije sonce danes sije dan ki je sonce', 5),
+         'danes sije dan danes sije'),
+    ]
+
+    test_in = [(func_name+'%s'%str(l[0]), None) for l in in_out]
+    test_out = [l[1] for l in in_out]
+
+    answers = python(code=code, inputs=test_in, timeout=1.0)
+    n_correct = 0
+    tin, tout = None, None
+    for i, (ans, to) in enumerate(zip(answers, test_out)):
+        corr = ans[0] == to
+        n_correct += corr
+        if not corr:
+            tin = test_in[i][0]
+            tout = to
+
+    passed = n_correct == len(test_in)
+    hints = [{'id': 'test_results', 'args': {'passed': n_correct, 'total': len(test_in)}}]
+    if tin:
+        hints.append({'id': 'problematic_test_case', 'args': {'testin': str(tin), 'testout': str(tout)}})
+    if passed:
+        hints.append({'id': 'final_hint'})
+    return passed, hints
+
+
+def hint(python, code):
+    tokens = get_tokens(code)
+
+    # run one test first to see if there are any exceptions
+    answer = python(code=code, inputs=[(None, None)], timeout=1.0)
+    exc = get_exception_desc(answer[0][3])
+    if exc: return exc
+
+    return None
diff --git a/python/problems/dictionaries/text/en.py b/python/problems/dictionaries/text/en.py
new file mode 100644
index 0000000..4e5f5b3
--- /dev/null
+++ b/python/problems/dictionaries/text/en.py
@@ -0,0 +1,13 @@
+id = 20612
+name = 'Generated text'
+
+description = '''\
+<p>(translation missing)</p>'''
+
+hint = {
+    'plan': '''\
+<p>(translation missing)</p>''',
+
+    'no_input_call': '''\
+<p>(translation missing)</p>''',
+}
+\ No newline at end of file
diff --git a/python/problems/dictionaries/text/sl.py b/python/problems/dictionaries/text/sl.py
new file mode 100644
index 0000000..3afeec4
--- /dev/null
+++ b/python/problems/dictionaries/text/sl.py
@@ -0,0 +1,45 @@
+import server
+mod = server.problems.load_language('python', 'sl')
+
+
+id = 20612
+name = 'Generirano besedilo'
+
+description = '''\
+<p>
+Napisati želimo program, ki bo generiral tipičen stavek. Seveda ni dobro,
+da si samo naključno izbiramo besede in jih lepimo skupaj, saj bi tako dobili
+nekaj povsem neberljivega. Naloge se bomo lotili malo pametneje.
+Recimo, da ima program na voljo nek tekst, npr. <code>'in to in ono smo mi'</code>,
+iz katerega se lahko uči. Naš tekst bomo začeli z izbrano besedo.
+Nadaljujemo tako, da se vprašamo katera beseda se v učnem tekstu pojavi
+najpogosteje za izbrano besedo. Če začnemo z besedo <code>to</code>, potem
+bo naslednja beseda <code>in</code>. Postopek nato ponovimo z besedo <code>in</code>.
+</p>
+
+<p>
+Napišite funkcijo <code>text(word, full_text, num)</code>, ki sprejme začetno
+besedo <code>word</code>, celotno besedilo <code>full_text</code>,
+ter generira besedilo dolgo <code>num</code> besed.
+</p>
+
+<p> Da bodo generirani stavki bolj zanimivi, lahko program testiraš na
+kakšnem romanu, npr. Orwellovi noveli 1984. Vendar pa tega ne boš mogel
+izvajati v CodeQ, saj nima dostopa do mreže. Poženi iz kakšnega drugega programa,
+npr. iz pyCharma ali kar iz ukazne vrstice.
+<pre>
+>>> import urllib.request
+>>> txt = urllib.request.urlopen('http://squeeb1134.tripod.com/1984.txt').read().decode('utf8')
+>>> text('Big', txt, 15)
+'Big Brother is not be a few minutes at the Party member of the Party'
+</pre>
+'''
+
+plan = []
+
+hint = {
+    'final_hint': ['''\
+<p>Program je pravilen! <br>
+</p>
+'''],
+}
diff --git a/python/problems/dictionaries/text/tmp.py b/python/problems/dictionaries/text/tmp.py
new file mode 100644
index 0000000..bf56c4f
--- /dev/null
+++ b/python/problems/dictionaries/text/tmp.py
@@ -0,0 +1,29 @@
+import collections
+
+def following_words(txt):
+    words = txt.split()
+    freq = collections.defaultdict(list)
+    for word, next_word in zip(words, words[1:]):
+        freq[word].append(next_word)
+    return freq
+
+def freq_following_word(txt):
+    following = following_words(txt)
+    for f in following:
+        vals = collections.Counter(following[f])
+        s = sorted(vals.most_common(), key = lambda x: (-x[1], x[0]))
+        following[f] = s[0][0]
+    return following
+
+def text(word, freq, num):
+    words = []
+    for i in range(num):
+        words.append(word)
+        word = freq[word]
+    return ' '.join(words)
+
+
+import urllib.request
+txt = 'danes je lep dan danes sije sonce danes sije dan ki je sonce'
+#urllib.request.urlopen('http://squeeb1134.tripod.com/1984.txt').read().decode('utf8')
+print (text('danes', freq_following_word(txt), 5))