Package pyarabic ::
Module araby
|
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 """
19 Arabic module
20 @author: Taha Zerrouki
21 @contact: taha dot zerrouki at gmail dot com
22 @copyright: Arabtechies, Arabeyes, Taha Zerrouki
23 @license: GPL
24 @date:2010/03/01
25 @version: 0.1
26 """
27 import re
28 from stack import *
29
30 """
31 the arabic chars contains all arabic letters, a sub class of unicode,
32 """
33
34 COMMA = u'\u060C'
35 SEMICOLON = u'\u061B'
36 QUESTION = u'\u061F'
37 HAMZA = u'\u0621'
38 ALEF_MADDA = u'\u0622'
39 ALEF_HAMZA_ABOVE = u'\u0623'
40 WAW_HAMZA = u'\u0624'
41 ALEF_HAMZA_BELOW = u'\u0625'
42 YEH_HAMZA = u'\u0626'
43 ALEF = u'\u0627'
44 BEH = u'\u0628'
45 TEH_MARBUTA = u'\u0629'
46 TEH = u'\u062a'
47 THEH = u'\u062b'
48 JEEM = u'\u062c'
49 HAH = u'\u062d'
50 KHAH = u'\u062e'
51 DAL = u'\u062f'
52 THAL = u'\u0630'
53 REH = u'\u0631'
54 ZAIN = u'\u0632'
55 SEEN = u'\u0633'
56 SHEEN = u'\u0634'
57 SAD = u'\u0635'
58 DAD = u'\u0636'
59 TAH = u'\u0637'
60 ZAH = u'\u0638'
61 AIN = u'\u0639'
62 GHAIN = u'\u063a'
63 TATWEEL = u'\u0640'
64 FEH = u'\u0641'
65 QAF = u'\u0642'
66 KAF = u'\u0643'
67 LAM = u'\u0644'
68 MEEM = u'\u0645'
69 NOON = u'\u0646'
70 HEH = u'\u0647'
71 WAW = u'\u0648'
72 ALEF_MAKSURA = u'\u0649'
73 YEH = u'\u064a'
74 MADDA_ABOVE = u'\u0653'
75 HAMZA_ABOVE = u'\u0654'
76 HAMZA_BELOW = u'\u0655'
77 ZERO = u'\u0660'
78 ONE = u'\u0661'
79 TWO = u'\u0662'
80 THREE = u'\u0663'
81 FOUR = u'\u0664'
82 FIVE = u'\u0665'
83 SIX = u'\u0666'
84 SEVEN = u'\u0667'
85 EIGHT = u'\u0668'
86 NINE = u'\u0669'
87 PERCENT = u'\u066a'
88 DECIMAL = u'\u066b'
89 THOUSANDS = u'\u066c'
90 STAR = u'\u066d'
91 MINI_ALEF = u'\u0670'
92 ALEF_WASLA = u'\u0671'
93 FULL_STOP = u'\u06d4'
94 BYTE_ORDER_MARK = u'\ufeff'
95
96
97 FATHATAN = u'\u064b'
98 DAMMATAN = u'\u064c'
99 KASRATAN = u'\u064d'
100 FATHA = u'\u064e'
101 DAMMA = u'\u064f'
102 KASRA = u'\u0650'
103 SHADDA = u'\u0651'
104 SUKUN = u'\u0652'
105
106
107 SMALL_ALEF =u"\u0670"
108 SMALL_WAW =u"\u06E5"
109 SMALL_YEH =u"\u06E6"
110
111 LAM_ALEF =u'\ufefb'
112 LAM_ALEF_HAMZA_ABOVE =u'\ufef7'
113 LAM_ALEF_HAMZA_BELOW =u'\ufef9'
114 LAM_ALEF_MADDA_ABOVE =u'\ufef5'
115 simple_LAM_ALEF =u'\u0644\u0627'
116 simple_LAM_ALEF_HAMZA_ABOVE =u'\u0644\u0623'
117 simple_LAM_ALEF_HAMZA_BELOW =u'\u0644\u0625'
118 simple_LAM_ALEF_MADDA_ABOVE =u'\u0644\u0622'
119
120 LETTERS=u''.join([
121 ALEF , BEH , TEH , TEH_MARBUTA , THEH , JEEM , HAH , KHAH ,
122 DAL , THAL , REH , ZAIN , SEEN , SHEEN , SAD , DAD , TAH , ZAH ,
123 AIN , GHAIN , FEH , QAF , KAF , LAM , MEEM , NOON, HEH , WAW, YEH ,
124 HAMZA , ALEF_MADDA , ALEF_HAMZA_ABOVE , WAW_HAMZA , ALEF_HAMZA_BELOW , YEH_HAMZA ,
125 ])
126
127 TASHKEEL =(FATHATAN, DAMMATAN, KASRATAN,
128 FATHA,DAMMA,KASRA,
129 SUKUN,
130 SHADDA);
131 HARAKAT =( FATHATAN, DAMMATAN, KASRATAN,
132 FATHA, DAMMA, KASRA,
133 SUKUN
134 );
135 SHORTHARAKAT =( FATHA, DAMMA, KASRA, SUKUN);
136
137 TANWIN =(FATHATAN, DAMMATAN, KASRATAN);
138
139
140 LIGUATURES=(
141 LAM_ALEF,
142 LAM_ALEF_HAMZA_ABOVE,
143 LAM_ALEF_HAMZA_BELOW,
144 LAM_ALEF_MADDA_ABOVE,
145 );
146 HAMZAT=(
147 HAMZA,
148 WAW_HAMZA,
149 YEH_HAMZA,
150 HAMZA_ABOVE,
151 HAMZA_BELOW,
152 ALEF_HAMZA_BELOW,
153 ALEF_HAMZA_ABOVE,
154 );
155 ALEFAT=(
156 ALEF,
157 ALEF_MADDA,
158 ALEF_HAMZA_ABOVE,
159 ALEF_HAMZA_BELOW,
160 ALEF_WASLA,
161 ALEF_MAKSURA,
162 SMALL_ALEF,
163
164 );
165 WEAK = ( ALEF, WAW, YEH, ALEF_MAKSURA);
166 YEHLIKE= ( YEH, YEH_HAMZA, ALEF_MAKSURA, SMALL_YEH );
167
168 WAWLIKE = ( WAW, WAW_HAMZA, SMALL_WAW );
169 TEHLIKE = ( TEH, TEH_MARBUTA );
170
171 SMALL =( SMALL_ALEF, SMALL_WAW, SMALL_YEH)
172 MOON =(HAMZA ,
173 ALEF_MADDA ,
174 ALEF_HAMZA_ABOVE ,
175 ALEF_HAMZA_BELOW ,
176 ALEF ,
177 BEH ,
178 JEEM ,
179 HAH ,
180 KHAH ,
181 AIN ,
182 GHAIN ,
183 FEH ,
184 QAF ,
185 KAF ,
186 MEEM ,
187 HEH ,
188 WAW ,
189 YEH
190 );
191 SUN=(
192 TEH ,
193 THEH ,
194 DAL ,
195 THAL ,
196 REH ,
197 ZAIN ,
198 SEEN ,
199 SHEEN ,
200 SAD ,
201 DAD ,
202 TAH ,
203 ZAH ,
204 LAM ,
205 NOON ,
206 );
207 AlphabeticOrder={
208 ALEF : 1,
209 BEH : 2,
210 TEH : 3,
211 TEH_MARBUTA : 3,
212 THEH : 4,
213 JEEM : 5,
214 HAH : 6,
215 KHAH : 7,
216 DAL : 8,
217 THAL : 9,
218 REH : 10,
219 ZAIN : 11,
220 SEEN : 12,
221 SHEEN : 13,
222 SAD : 14,
223 DAD : 15,
224 TAH : 16,
225 ZAH : 17,
226 AIN : 18,
227 GHAIN : 19,
228 FEH : 20,
229 QAF : 21,
230 KAF : 22,
231 LAM : 23,
232 MEEM : 24,
233 NOON : 25,
234 HEH : 26,
235 WAW : 27,
236 YEH : 28,
237 HAMZA : 29,
238
239 ALEF_MADDA : 29,
240 ALEF_HAMZA_ABOVE : 29,
241 WAW_HAMZA : 29,
242 ALEF_HAMZA_BELOW : 29,
243 YEH_HAMZA : 29,
244 }
245 NAMES ={
246 ALEF : u"ألف",
247 BEH : u"باء",
248 TEH : u'تاء' ,
249 TEH_MARBUTA : u'تاء مربوطة' ,
250 THEH : u'ثاء' ,
251 JEEM : u'جيم' ,
252 HAH : u'حاء' ,
253 KHAH : u'خاء' ,
254 DAL : u'دال' ,
255 THAL : u'ذال' ,
256 REH : u'راء' ,
257 ZAIN : u'زاي' ,
258 SEEN : u'سين' ,
259 SHEEN : u'شين' ,
260 SAD : u'صاد' ,
261 DAD : u'ضاد' ,
262 TAH : u'طاء' ,
263 ZAH : u'ظاء' ,
264 AIN : u'عين' ,
265 GHAIN : u'غين' ,
266 FEH : u'فاء' ,
267 QAF : u'قاف' ,
268 KAF : u'كاف' ,
269 LAM : u'لام' ,
270 MEEM : u'ميم' ,
271 NOON : u'نون' ,
272 HEH : u'هاء' ,
273 WAW : u'واو' ,
274 YEH : u'ياء' ,
275 HAMZA : u'همزة' ,
276
277 TATWEEL : u'تطويل' ,
278 ALEF_MADDA : u'ألف ممدودة' ,
279 ALEF_MAKSURA : u'ألف مقصورة' ,
280 ALEF_HAMZA_ABOVE : u'همزة على الألف' ,
281 WAW_HAMZA : u'همزة على الواو' ,
282 ALEF_HAMZA_BELOW : u'همزة تحت الألف' ,
283 YEH_HAMZA : u'همزة على الياء' ,
284 FATHATAN : u'فتحتان',
285 DAMMATAN : u'ضمتان',
286 KASRATAN : u'كسرتان',
287 FATHA : u'فتحة',
288 DAMMA : u'ضمة',
289 KASRA : u'كسرة',
290 SHADDA : u'شدة',
291 SUKUN : u'سكون',
292 }
293
294
295 HARAKAT_pattern =re.compile(ur"["+u"".join(HARAKAT)+u"]")
296 TASHKEEL_pattern =re.compile(ur"["+u"".join(TASHKEEL)+u"]")
297 HAMZAT_pattern =re.compile(ur"["+u"".join(HAMZAT)+u"]");
298 ALEFAT_pattern =re.compile(ur"["+u"".join(ALEFAT)+u"]");
299 LIGUATURES_pattern =re.compile(ur"["+u"".join(LIGUATURES)+u"]");
300
301
302
303
305 """Checks for Arabic Sukun Mark.
306 @param archar: arabic unicode char
307 @type archar: unicode
308 """
309 if archar==SUKUN:
310 return True;
311 else: return False;
312
314 """Checks for Arabic Shadda Mark.
315 @param archar: arabic unicode char
316 @type archar: unicode
317 """
318 if archar==SHADDA:
319 return True;
320 else: return False;
321
323 """Checks for Arabic Tatweel letter modifier.
324 @param archar: arabic unicode char
325 @type archar: unicode
326 """
327 if archar==TATWEEL:
328 return True;
329 else: return False;
331 """Checks for Arabic Tanwin Marks (FATHATAN, DAMMATAN, KASRATAN).
332 @param archar: arabic unicode char
333 @type archar: unicode
334 """
335 if archar in TANWIN:
336 return True;
337 else: return False;
338
340 """Checks for Arabic Tashkeel Marks (FATHA,DAMMA,KASRA, SUKUN, SHADDA, FATHATAN,DAMMATAN, KASRATAn).
341 @param archar: arabic unicode char
342 @type archar: unicode
343 """
344 if archar in TASHKEEL:
345 return True;
346 else: return False;
347
349 """Checks for Arabic Harakat Marks (FATHA,DAMMA,KASRA,SUKUN,TANWIN).
350 @param archar: arabic unicode char
351 @type archar: unicode
352 """
353 if archar in HARAKAT:
354 return True;
355 else: return False;
356
358 """Checks for Arabic short Harakat Marks (FATHA,DAMMA,KASRA,SUKUN).
359 @param archar: arabic unicode char
360 @type archar: unicode
361 """
362 if archar in SHORTHARAKAT:
363 return True;
364 else: return False;
365
367 """Checks for Arabic Ligatures like LamAlef.
368 (LAM_ALEF, LAM_ALEF_HAMZA_ABOVE, LAM_ALEF_HAMZA_BELOW, LAM_ALEF_MADDA_ABOVE)
369 @param archar: arabic unicode char
370 @type archar: unicode
371 """
372 if archar in LIGUATURES:
373 return True;
374 else: return False;
375
377 """Checks for Arabic Hamza forms.
378 HAMZAT are (HAMZA, WAW_HAMZA, YEH_HAMZA, HAMZA_ABOVE, HAMZA_BELOW,ALEF_HAMZA_BELOW, ALEF_HAMZA_ABOVE )
379 @param archar: arabic unicode char
380 @type archar: unicode
381 """
382 if archar in HAMZAT:
383 return True;
384 else: return False;
385
387 """Checks for Arabic Alef forms.
388 ALEFAT=(ALEF, ALEF_MADDA, ALEF_HAMZA_ABOVE, ALEF_HAMZA_BELOW,ALEF_WASLA, ALEF_MAKSURA );
389 @param archar: arabic unicode char
390 @type archar: unicode
391 """
392 if archar in ALEFAT:
393 return True;
394 else: return False;
395
397 """Checks for Arabic Yeh forms.
398 Yeh forms : YEH, YEH_HAMZA, SMALL_YEH, ALEF_MAKSURA
399 @param archar: arabic unicode char
400 @type archar: unicode
401 """
402 if archar in YEHLIKE:
403 return True;
404 else: return False;
405
407 """Checks for Arabic Waw like forms.
408 Waw forms : WAW, WAW_HAMZA, SMALL_WAW
409 @param archar: arabic unicode char
410 @type archar: unicode
411 """
412 if archar in WAWLIKE:
413 return True;
414 else: return False;
415
417 """Checks for Arabic Teh forms.
418 Teh forms : TEH, TEH_MARBUTA
419 @param archar: arabic unicode char
420 @type archar: unicode
421 """
422 if archar in TEHLIKE:
423 return True;
424 else: return False;
426 """Checks for Arabic Small letters.
427 SMALL Letters : SMALL ALEF, SMALL WAW, SMALL YEH
428 @param archar: arabic unicode char
429 @type archar: unicode
430 """
431 if archar in SMALL:
432 return True;
433 else: return False;
434
436 """Checks for Arabic Weak letters.
437 Weak Letters : ALEF, WAW, YEH, ALEF_MAKSURA
438 @param archar: arabic unicode char
439 @type archar: unicode
440 """
441 if archar in WEAK:
442 return True;
443 else: return False;
444
446 """Checks for Arabic Moon letters.
447 Moon Letters :
448 @param archar: arabic unicode char
449 @type archar: unicode
450 """
451
452 if archar in MOON:
453 return True;
454 else: return False;
455
457 """Checks for Arabic Sun letters.
458 Moon Letters :
459 @param archar: arabic unicode char
460 @type archar: unicode
461 """
462 if archar in SUN:
463 return True;
464 else: return False;
465
466
467
469 """return Arabic letter order between 1 and 29.
470 Alef order is 1, Yeh is 28, Hamza is 29.
471 Teh Marbuta has the same ordre with Teh, 3.
472 @param archar: arabic unicode char
473 @type archar: unicode
474 @return: arabic order.
475 @rtype: integer;
476 """
477 if AlphabeticOrder.has_key(archar):
478 return AlphabeticOrder[archar];
479 else: return 0;
480
482 """return Arabic letter name in arabic.
483 Alef order is 1, Yeh is 28, Hamza is 29.
484 Teh Marbuta has the same ordre with Teh, 3.
485 @param archar: arabic unicode char
486 @type archar: unicode
487 @return: arabic name.
488 @rtype: unicode;
489 """
490 if NAMES.has_key(archar):
491 return NAMES[archar];
492 else:
493 return u'';
494
496 """return a list of arabic characteres .
497 Return a list of characteres between \u060c to \u0652
498 @return: list of arabic characteres.
499 @rtype: unicode;
500 """
501 mylist=[];
502 for i in range(0x0600, 0x00653):
503 try :
504 mylist.append(unichr(i));
505 except ValueError:
506 pass;
507 return mylist;
508
509
510
511
512
514 """Checks if the arabic word contains shadda.
515 @param word: arabic unicode char
516 @type word: unicode
517 """
518 if re.search(SHADDA,word):
519 return True;
520 else:
521 return False;
522
523
524
525
527 """Checks if the arabic word is vocalized.
528 the word musn't have any spaces and pounctuations.
529 @param word: arabic unicode char
530 @type word: unicode
531 """
532 if word.isalpha(): return False;
533
534 else:
535 if re.search(HARAKAT_pattern,word):
536 return True;
537 else:
538 return False;
540 """Checks if the arabic text is vocalized.
541 The text can contain many words and spaces
542 @param text: arabic unicode char
543 @type text: unicode
544 """
545 if re.search(HARAKAT_pattern,text):
546 return True;
547 else:
548 return False;
550 """ Checks for an Arabic standard Unicode block characters;
551 An arabic string can contain spaces, digits and pounctuation.
552 but only arabic standard characters, not extended arabic
553 @param text: input text
554 @type text: unicode
555 @return: True if all charaters are in Arabic block
556 @rtype: Boolean
557 """
558 if re.search(u"([^\u0600-\u0652%s%s%s\s\d])"%(LAM_ALEF, LAM_ALEF_HAMZA_ABOVE,LAM_ALEF_MADDA_ABOVE),text):
559 return False;
560 return True;
561
563 """ Checks for an Arabic Unicode block characters;
564 @param text: input text
565 @type text: unicode
566 @return: True if all charaters are in Arabic block
567 @rtype: Boolean
568 """
569 if re.search(u"([^\u0600-\u06ff\ufb50-\ufdff\ufe70-\ufeff\u0750-\u077f])",text):
570 return False;
571 return True;
572
574 """ Checks for an valid Arabic word.
575 An Arabic word not contains spaces, digits and pounctuation
576 avoid some spelling error, TEH_MARBUTA must be at the end.
577 @param word: input word
578 @type word: unicode
579 @return: True if all charaters are in Arabic block
580 @rtype: Boolean
581 """
582 if len(word)==0 : return False;
583 elif re.search(u"([^\u0600-\u0652%s%s%s])"%(LAM_ALEF, LAM_ALEF_HAMZA_ABOVE,LAM_ALEF_MADDA_ABOVE),word):
584 return False;
585 elif isHaraka(word[0]) or word[0] in (WAW_HAMZA,YEH_HAMZA):
586 return False;
587
588 elif re.match(u"^(.)*[%s](.)+$"%ALEF_MAKSURA,word):
589 return False;
590 elif re.match(u"^(.)*[%s]([^%s%s%s])(.)+$"%(TEH_MARBUTA,DAMMA,KASRA,FATHA),word):
591 return False;
592 else:
593 return True;
594
595
596
597
599 """Strip Harakat from arabic word except Shadda.
600 The striped marks are :
601 - FATHA, DAMMA, KASRA
602 - SUKUN
603 - FATHATAN, DAMMATAN, KASRATAN, , , .
604 Example:
605 >>> text=u"الْعَرَبِيّةُ"
606 >>> stripTashkeel(text)
607 العربيّة
608
609 @param text: arabic text.
610 @type text: unicode.
611 @return: return a striped text.
612 @rtype: unicode.
613 """
614 return re.sub(HARAKAT_pattern,u'',text)
615
617 """Strip vowels from a text, include Shadda.
618 The striped marks are :
619 - FATHA, DAMMA, KASRA
620 - SUKUN
621 - SHADDA
622 - FATHATAN, DAMMATAN, KASRATAN, , , .
623 Example:
624 >>> text=u"الْعَرَبِيّةُ"
625 >>> stripTashkeel(text)
626 العربية
627
628 @param text: arabic text.
629 @type text: unicode.
630 @return: return a striped text.
631 @rtype: unicode.
632 """
633 return re.sub(TASHKEEL_pattern,'',text);
634
636 """
637 Strip tatweel from a text and return a result text.
638
639 Example:
640 >>> text=u"العـــــربية"
641 >>> stripTatweel(text)
642 العربية
643
644 @param text: arabic text.
645 @type text: unicode.
646 @return: return a striped text.
647 @rtype: unicode.
648 """
649 return re.sub(TATWEEL,'',text);
650
652 """Normalize Lam Alef ligatures into two letters (LAM and ALEF), and Tand return a result text.
653 Some systems present lamAlef ligature as a single letter, this function convert it into two letters,
654 The converted letters into LAM and ALEF are :
655 - LAM_ALEF, LAM_ALEF_HAMZA_ABOVE, LAM_ALEF_HAMZA_BELOW, LAM_ALEF_MADDA_ABOVE
656
657 Example:
658 >>> text=u"لانها لالء الاسلام"
659 >>> normalize_lamalef(text)
660 لانها لالئ الاسلام
661
662 @param text: arabic text.
663 @type text: unicode.
664 @return: return a converted text.
665 @rtype: unicode.
666 """
667 return LIGUATURES_pattern.sub(u'%s%s'%(LAM,ALEF), text)
668
669
671 """
672 separate the letters from the vowels, in arabic word,
673 if a letter hasn't a haraka, the not definited haraka is attributed.
674 return ( letters,vowels);
675 """
676
677 stack1=Stack(word)
678
679 stack1.items.reverse();
680 letters=Stack()
681 marks=Stack()
682 vowels=HARAKAT
683 last1=stack1.pop();
684
685
686
687 while last1 in vowels: last1=stack1.pop();
688 while last1!=None:
689 if last1 in vowels:
690
691
692 marks.pop();
693 marks.push(last1);
694 elif last1==SHADDA:
695
696
697
698 marks.pop();
699 marks.push(SUKUN);
700 marks.push(NOT_DEF_HARAKA);
701 letters.push(SHADDA);
702 else:
703 marks.push(NOT_DEF_HARAKA);
704 letters.push(last1);
705 last1=stack1.pop();
706 return (''.join(letters.items),''.join(marks.items))
707
708
709 -def joint(letters,marks):
710 """
711 joint the letters with the marks
712 the length ot letters and marks must be equal
713 return word;
714 """
715
716 debug=False;
717
718 if len(letters)!=len(marks): return "";
719
720 stackLetter=Stack(letters)
721 stackLetter.items.reverse();
722 stackMark=Stack(marks)
723 stackMark.items.reverse();
724 wordStack=Stack();
725 last1=stackLetter.pop();
726 last2=stackMark.pop();
727
728 vowels=HARAKAT
729 while last1!=None and last2!=None:
730 if last1 == SHADDA:
731 top=wordStack.pop();
732 if top not in vowels:
733 wordStack.push(top);
734 wordStack.push(last1);
735 if last2!= NOT_DEF_HARAKA:
736 wordStack.push(last2);
737 else:
738 wordStack.push(last1);
739 if last2!= NOT_DEF_HARAKA:
740 wordStack.push(last2);
741
742 last1=stackLetter.pop();
743 last2=stackMark.pop();
744 if not (stackLetter.isEmpty() and stackMark.isEmpty()):
745 return False;
746 else:
747
748 return ''.join(wordStack.items);
749
751 """
752 if the two words has the same letters and the same harakats, this fuction return True.
753 The two words can be full vocalized, or partial vocalized
754 """
755 debug=False;
756 stack1=Stack(word1)
757 stack2=Stack(word2)
758 last1=stack1.pop();
759 last2=stack2.pop();
760 if debug: print "+0", stack1, stack2;
761 vowels=HARAKAT
762 while last1!=None and last2!=None:
763 if last1==last2:
764 if debug: print "+2", stack1.items,last1, stack2.items,last2
765 last1=stack1.pop();
766 last2=stack2.pop();
767 elif last1 in vowels and last2 not in vowels:
768 if debug: print "+2", stack1.items,last1, stack2.items,last2
769 last1=stack1.pop();
770 elif last1 not in vowels and last2 in vowels:
771 if debug: print "+2", stack1.items,last1, stack2.items,last2
772 last2=stack2.pop();
773 else:
774 if debug: print "+2", stack1.items,last1, stack2.items,last2
775 break;
776 if not (stack1.isEmpty() and stack2.isEmpty()):
777 return False;
778 else: return True;
779
780
781
783 """
784 if the word1 is like a wazn (pattern),
785 the letters must be equal,
786 the wazn has FEH, AIN, LAM letters.
787 this are as generic letters.
788 The two words can be full vocalized, or partial vocalized
789 """
790 debug=False;
791 stack1=Stack(word1)
792 stack2=Stack(wazn)
793 root=Stack()
794 last1=stack1.pop();
795 last2=stack2.pop();
796 if debug: print "+0", stack1, stack2;
797 vowels=HARAKAT
798 while last1!=None and last2!=None:
799 if last1==last2 and last2 not in (FEH, AIN,LAM):
800 if debug: print "+2", stack1.items,last1, stack2.items,last2
801 last1=stack1.pop();
802 last2=stack2.pop();
803 elif last1 not in vowels and last2 in (FEH, AIN,LAM):
804 if debug: print "+2", stack1.items,last1, stack2.items,last2
805 root.push(last1);
806 print "t";
807 last1=stack1.pop();
808 last2=stack2.pop();
809 elif last1 in vowels and last2 not in vowels:
810 if debug: print "+2", stack1.items,last1, stack2.items,last2
811 last1=stack1.pop();
812 elif last1 not in vowels and last2 in vowels:
813 if debug: print "+2", stack1.items,last1, stack2.items,last2
814 last2=stack2.pop();
815 else:
816 if debug: print "+2", stack1.items,last1, stack2.items,last2
817 break;
818
819 root.items.reverse();
820 print " the root is ", root.items
821 if not (stack1.isEmpty() and stack2.isEmpty()):
822 return False;
823 else: return True;
824
826 """
827 if the two words has the same letters and the same harakats, this fuction return True.
828 The first word is partially vocalized, the second is fully
829 if the partially contians a shadda, it must be at the same place in the fully
830 """
831 debug=False;
832 partial=stripHarakat(partial);
833 fully=stripHarakat(fully)
834 Pstack=Stack(partial)
835 Vstack=Stack(fully)
836 Plast=Pstack.pop();
837 Vlast=Vstack.pop();
838 if debug: print "+0", Pstack, Vstack;
839 vowels=SHADDA
840 while Plast!=None and Vlast!=None:
841 if Plast==Vlast:
842 if debug: print "+2", Pstack.items,Plast, Vstack.items,Vlast
843 Plast=Pstack.pop();
844 Vlast=Vstack.pop();
845 elif Plast ==SHADDA and Vlast !=SHADDA:
846 if debug: print "+2", Pstack.items,Plast, Vstack.items,Vlast
847 break;
848 elif Plast !=SHADDA and Vlast ==SHADDA:
849 if debug: print "+2", Pstack.items,Plast, Vstack.items,Vlast
850 Vlast=Vstack.pop();
851 else:
852 if debug: print "+2", Pstack.items,Plast, Vstack.items,Vlast
853 break;
854 if not (Pstack.isEmpty() and Vstack.isEmpty()):
855 return False;
856 else: return True;
857