11/25
This commit is contained in:
@@ -0,0 +1,509 @@
|
||||
#############################################################################
|
||||
# Makefile for building: libgooglepinyin
|
||||
# Generated by qmake (3.1) (Qt 5.15.2)
|
||||
# Project: googlepinyin.pro
|
||||
# Template: lib
|
||||
# Command: D:\Qt\Qt5.15.12\5.15.2\mingw81_64\bin\qmake.exe -o Makefile googlepinyin.pro -spec win32-g++ "CONFIG+=qtquickcompiler"
|
||||
#############################################################################
|
||||
|
||||
MAKEFILE = Makefile
|
||||
|
||||
EQ = =
|
||||
|
||||
first: release
|
||||
install: release-install
|
||||
uninstall: release-uninstall
|
||||
QMAKE = D:\Qt\Qt5.15.12\5.15.2\mingw81_64\bin\qmake.exe
|
||||
DEL_FILE = del
|
||||
CHK_DIR_EXISTS= if not exist
|
||||
MKDIR = mkdir
|
||||
COPY = copy /y
|
||||
COPY_FILE = copy /y
|
||||
COPY_DIR = xcopy /s /q /y /i
|
||||
INSTALL_FILE = copy /y
|
||||
INSTALL_PROGRAM = copy /y
|
||||
INSTALL_DIR = xcopy /s /q /y /i
|
||||
QINSTALL = D:\Qt\Qt5.15.12\5.15.2\mingw81_64\bin\qmake.exe -install qinstall
|
||||
QINSTALL_PROGRAM = D:\Qt\Qt5.15.12\5.15.2\mingw81_64\bin\qmake.exe -install qinstall -exe
|
||||
DEL_FILE = del
|
||||
SYMLINK = $(QMAKE) -install ln -f -s
|
||||
DEL_DIR = rmdir
|
||||
MOVE = move
|
||||
IDC = idc
|
||||
IDL = midl
|
||||
ZIP = zip -r -9
|
||||
DEF_FILE =
|
||||
RES_FILE =
|
||||
SED = $(QMAKE) -install sed
|
||||
MOVE = move
|
||||
SUBTARGETS = \
|
||||
release \
|
||||
debug
|
||||
|
||||
|
||||
release: FORCE
|
||||
$(MAKE) -f $(MAKEFILE).Release
|
||||
release-make_first: FORCE
|
||||
$(MAKE) -f $(MAKEFILE).Release
|
||||
release-all: FORCE
|
||||
$(MAKE) -f $(MAKEFILE).Release all
|
||||
release-clean: FORCE
|
||||
$(MAKE) -f $(MAKEFILE).Release clean
|
||||
release-distclean: FORCE
|
||||
$(MAKE) -f $(MAKEFILE).Release distclean
|
||||
release-install: FORCE
|
||||
$(MAKE) -f $(MAKEFILE).Release install
|
||||
release-uninstall: FORCE
|
||||
$(MAKE) -f $(MAKEFILE).Release uninstall
|
||||
debug: FORCE
|
||||
$(MAKE) -f $(MAKEFILE).Debug
|
||||
debug-make_first: FORCE
|
||||
$(MAKE) -f $(MAKEFILE).Debug
|
||||
debug-all: FORCE
|
||||
$(MAKE) -f $(MAKEFILE).Debug all
|
||||
debug-clean: FORCE
|
||||
$(MAKE) -f $(MAKEFILE).Debug clean
|
||||
debug-distclean: FORCE
|
||||
$(MAKE) -f $(MAKEFILE).Debug distclean
|
||||
debug-install: FORCE
|
||||
$(MAKE) -f $(MAKEFILE).Debug install
|
||||
debug-uninstall: FORCE
|
||||
$(MAKE) -f $(MAKEFILE).Debug uninstall
|
||||
|
||||
Makefile: googlepinyin.pro D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/win32-g++/qmake.conf D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/spec_pre.prf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/qdevice.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/device_config.prf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/common/sanitize.conf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/common/gcc-base.conf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/common/g++-base.conf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/common/angle.conf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/win32/windows_vulkan_sdk.prf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/common/windows-vulkan.conf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/common/g++-win32.conf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/common/windows-desktop.conf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/qconfig.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3danimation.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3danimation_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dcore.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dcore_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dextras.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dextras_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dinput.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dinput_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dlogic.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dlogic_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dquick.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dquick_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dquickanimation.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dquickanimation_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dquickextras.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dquickextras_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dquickinput.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dquickinput_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dquickrender.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dquickrender_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dquickscene2d.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dquickscene2d_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3drender.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3drender_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_accessibility_support_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_axbase.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_axbase_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_axcontainer.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_axcontainer_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_axserver.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_axserver_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_bluetooth.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_bluetooth_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_bodymovin_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_bootstrap_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_charts.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_charts_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_concurrent.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_concurrent_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_core.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_core_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_datavisualization.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_datavisualization_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_dbus.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_dbus_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_designer.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_designer_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_designercomponents_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_devicediscovery_support_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_edid_support_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_egl_support_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_eventdispatcher_support_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_fb_support_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_fontdatabase_support_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_gamepad.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_gamepad_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_gui.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_gui_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_help.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_help_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_location.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_location_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_multimedia.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_multimedia_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_multimediawidgets.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_multimediawidgets_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_network.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_network_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_networkauth.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_networkauth_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_nfc.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_nfc_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_opengl.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_opengl_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_openglextensions.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_openglextensions_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_packetprotocol_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_platformcompositor_support_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_positioning.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_positioning_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_positioningquick.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_positioningquick_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_printsupport.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_printsupport_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_purchasing.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_purchasing_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_qml.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_qml_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_qmldebug_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_qmldevtools_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_qmlmodels.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_qmlmodels_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_qmltest.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_qmltest_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_qmlworkerscript.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_qmlworkerscript_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_qtmultimediaquicktools_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quick.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quick3d.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quick3d_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quick3dassetimport.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quick3dassetimport_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quick3drender.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quick3drender_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quick3druntimerender.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quick3druntimerender_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quick3dutils.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quick3dutils_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quick_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quickcontrols2.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quickcontrols2_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quickparticles_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quickshapes_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quicktemplates2.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quicktemplates2_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quickwidgets.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quickwidgets_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_remoteobjects.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_remoteobjects_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_repparser.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_repparser_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_script.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_script_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_scripttools.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_scripttools_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_scxml.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_scxml_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_sensors.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_sensors_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_serialbus.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_serialbus_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_serialport.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_serialport_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_sql.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_sql_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_svg.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_svg_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_testlib.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_testlib_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_texttospeech.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_texttospeech_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_theme_support_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_uiplugin.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_uitools.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_uitools_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_virtualkeyboard.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_virtualkeyboard_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_vulkan_support_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_webchannel.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_webchannel_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_websockets.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_websockets_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_widgets.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_widgets_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_windowsuiautomation_support_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_winextras.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_winextras_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_xml.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_xml_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_xmlpatterns.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_xmlpatterns_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_zlib_private.pri \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/qt_functions.prf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/qt_config.prf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/win32-g++/qmake.conf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/spec_post.prf \
|
||||
../.qmake.stash \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/exclusive_builds.prf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/toolchain.prf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/default_pre.prf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/win32/default_pre.prf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/resolve_config.prf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/exclusive_builds_post.prf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/default_post.prf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/qtquickcompiler.prf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/precompile_header.prf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/warn_on.prf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/qt.prf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/resources_functions.prf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/resources.prf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/moc.prf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/qmake_use.prf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/file_copies.prf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/win32/windows.prf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/testcase_targets.prf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/exceptions.prf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/yacc.prf \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/lex.prf \
|
||||
googlepinyin.pro \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/lib/Qt5Core.prl \
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/build_pass.prf
|
||||
$(QMAKE) -o Makefile googlepinyin.pro -spec win32-g++ "CONFIG+=qtquickcompiler"
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/spec_pre.prf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/qdevice.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/device_config.prf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/common/sanitize.conf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/common/gcc-base.conf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/common/g++-base.conf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/common/angle.conf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/win32/windows_vulkan_sdk.prf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/common/windows-vulkan.conf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/common/g++-win32.conf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/common/windows-desktop.conf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/qconfig.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3danimation.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3danimation_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dcore.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dcore_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dextras.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dextras_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dinput.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dinput_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dlogic.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dlogic_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dquick.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dquick_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dquickanimation.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dquickanimation_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dquickextras.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dquickextras_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dquickinput.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dquickinput_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dquickrender.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dquickrender_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dquickscene2d.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3dquickscene2d_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3drender.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_3drender_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_accessibility_support_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_axbase.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_axbase_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_axcontainer.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_axcontainer_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_axserver.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_axserver_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_bluetooth.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_bluetooth_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_bodymovin_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_bootstrap_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_charts.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_charts_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_concurrent.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_concurrent_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_core.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_core_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_datavisualization.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_datavisualization_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_dbus.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_dbus_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_designer.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_designer_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_designercomponents_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_devicediscovery_support_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_edid_support_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_egl_support_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_eventdispatcher_support_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_fb_support_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_fontdatabase_support_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_gamepad.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_gamepad_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_gui.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_gui_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_help.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_help_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_location.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_location_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_multimedia.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_multimedia_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_multimediawidgets.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_multimediawidgets_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_network.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_network_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_networkauth.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_networkauth_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_nfc.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_nfc_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_opengl.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_opengl_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_openglextensions.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_openglextensions_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_packetprotocol_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_platformcompositor_support_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_positioning.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_positioning_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_positioningquick.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_positioningquick_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_printsupport.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_printsupport_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_purchasing.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_purchasing_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_qml.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_qml_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_qmldebug_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_qmldevtools_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_qmlmodels.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_qmlmodels_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_qmltest.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_qmltest_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_qmlworkerscript.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_qmlworkerscript_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_qtmultimediaquicktools_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quick.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quick3d.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quick3d_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quick3dassetimport.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quick3dassetimport_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quick3drender.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quick3drender_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quick3druntimerender.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quick3druntimerender_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quick3dutils.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quick3dutils_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quick_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quickcontrols2.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quickcontrols2_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quickparticles_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quickshapes_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quicktemplates2.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quicktemplates2_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quickwidgets.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_quickwidgets_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_remoteobjects.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_remoteobjects_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_repparser.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_repparser_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_script.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_script_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_scripttools.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_scripttools_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_scxml.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_scxml_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_sensors.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_sensors_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_serialbus.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_serialbus_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_serialport.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_serialport_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_sql.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_sql_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_svg.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_svg_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_testlib.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_testlib_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_texttospeech.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_texttospeech_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_theme_support_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_uiplugin.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_uitools.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_uitools_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_virtualkeyboard.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_virtualkeyboard_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_vulkan_support_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_webchannel.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_webchannel_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_websockets.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_websockets_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_widgets.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_widgets_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_windowsuiautomation_support_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_winextras.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_winextras_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_xml.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_xml_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_xmlpatterns.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_xmlpatterns_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/modules/qt_lib_zlib_private.pri:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/qt_functions.prf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/qt_config.prf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/win32-g++/qmake.conf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/spec_post.prf:
|
||||
../.qmake.stash:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/exclusive_builds.prf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/toolchain.prf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/default_pre.prf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/win32/default_pre.prf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/resolve_config.prf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/exclusive_builds_post.prf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/default_post.prf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/qtquickcompiler.prf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/precompile_header.prf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/warn_on.prf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/qt.prf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/resources_functions.prf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/resources.prf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/moc.prf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/qmake_use.prf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/file_copies.prf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/win32/windows.prf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/testcase_targets.prf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/exceptions.prf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/yacc.prf:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/lex.prf:
|
||||
googlepinyin.pro:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/lib/Qt5Core.prl:
|
||||
D:/Qt/Qt5.15.12/5.15.2/mingw81_64/mkspecs/features/build_pass.prf:
|
||||
qmake: FORCE
|
||||
@$(QMAKE) -o Makefile googlepinyin.pro -spec win32-g++ "CONFIG+=qtquickcompiler"
|
||||
|
||||
qmake_all: FORCE
|
||||
|
||||
make_first: release-make_first debug-make_first FORCE
|
||||
all: release-all debug-all FORCE
|
||||
clean: release-clean debug-clean FORCE
|
||||
distclean: release-distclean debug-distclean FORCE
|
||||
-$(DEL_FILE) Makefile
|
||||
|
||||
release-mocclean:
|
||||
$(MAKE) -f $(MAKEFILE).Release mocclean
|
||||
debug-mocclean:
|
||||
$(MAKE) -f $(MAKEFILE).Debug mocclean
|
||||
mocclean: release-mocclean debug-mocclean
|
||||
|
||||
release-mocables:
|
||||
$(MAKE) -f $(MAKEFILE).Release mocables
|
||||
debug-mocables:
|
||||
$(MAKE) -f $(MAKEFILE).Debug mocables
|
||||
mocables: release-mocables debug-mocables
|
||||
|
||||
check: first
|
||||
|
||||
benchmark: first
|
||||
FORCE:
|
||||
|
||||
$(MAKEFILE).Release: Makefile
|
||||
$(MAKEFILE).Debug: Makefile
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -0,0 +1,269 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* This class defines AtomDictBase class which is the base class for all atom
|
||||
* dictionaries. Atom dictionaries are managed by the decoder class
|
||||
* MatrixSearch.
|
||||
*
|
||||
* When the user appends a new character to the Pinyin string, all enabled atom
|
||||
* dictionaries' extend_dict() will be called at least once to get candidates
|
||||
* ended in this step (the information of starting step is also given in the
|
||||
* parameter). Usually, when extend_dict() is called, a MileStoneHandle object
|
||||
* returned by a previous calling for a earlier step is given to speed up the
|
||||
* look-up process, and a new MileStoneHandle object will be returned if
|
||||
* the extension is successful.
|
||||
*
|
||||
* A returned MileStoneHandle object should keep alive until Function
|
||||
* reset_milestones() is called and this object is noticed to be reset.
|
||||
*
|
||||
* Usually, the atom dictionary can use step information to manage its
|
||||
* MileStoneHandle objects, or it can make the objects in ascendant order to
|
||||
* make the reset easier.
|
||||
*
|
||||
* When the decoder loads the dictionary, it will give a starting lemma id for
|
||||
* this atom dictionary to map a inner id to a global id. Global ids should be
|
||||
* used when an atom dictionary talks to any component outside.
|
||||
*/
|
||||
#ifndef PINYINIME_INCLUDE_ATOMDICTBASE_H__
|
||||
#define PINYINIME_INCLUDE_ATOMDICTBASE_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./dictdef.h"
|
||||
#include "./searchutility.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
class AtomDictBase {
|
||||
public:
|
||||
virtual ~AtomDictBase() {}
|
||||
|
||||
/**
|
||||
* Load an atom dictionary from a file.
|
||||
*
|
||||
* @param file_name The file name to load dictionary.
|
||||
* @param start_id The starting id used for this atom dictionary.
|
||||
* @param end_id The end id (included) which can be used for this atom
|
||||
* dictionary. User dictionary will always use the last id space, so it can
|
||||
* ignore this paramter. All other atom dictionaries should check this
|
||||
* parameter.
|
||||
* @return True if succeed.
|
||||
*/
|
||||
virtual bool load_dict(const char *file_name, LemmaIdType start_id,
|
||||
LemmaIdType end_id) = 0;
|
||||
|
||||
/**
|
||||
* Close this atom dictionary.
|
||||
*
|
||||
* @return True if succeed.
|
||||
*/
|
||||
virtual bool close_dict() = 0;
|
||||
|
||||
/**
|
||||
* Get the total number of lemmas in this atom dictionary.
|
||||
*
|
||||
* @return The total number of lemmas.
|
||||
*/
|
||||
virtual size_t number_of_lemmas() = 0;
|
||||
|
||||
/**
|
||||
* This function is called by the decoder when user deletes a character from
|
||||
* the input string, or begins a new input string.
|
||||
*
|
||||
* Different atom dictionaries may implement this function in different way.
|
||||
* an atom dictionary can use one of these two parameters (or both) to reset
|
||||
* its corresponding MileStoneHandle objects according its detailed
|
||||
* implementation.
|
||||
*
|
||||
* For example, if an atom dictionary uses step information to manage its
|
||||
* MileStoneHandle objects, parameter from_step can be used to identify which
|
||||
* objects should be reset; otherwise, if another atom dictionary does not
|
||||
* use the detailed step information, it only uses ascendant handles
|
||||
* (according to step. For the same step, earlier call, smaller handle), it
|
||||
* can easily reset those MileStoneHandle which are larger than from_handle.
|
||||
*
|
||||
* The decoder always reset the decoding state by step. So when it begins
|
||||
* resetting, it will call reset_milestones() of its atom dictionaries with
|
||||
* the step information, and the MileStoneHandle objects returned by the
|
||||
* earliest calling of extend_dict() for that step.
|
||||
*
|
||||
* If an atom dictionary does not implement incremental search, this function
|
||||
* can be totally ignored.
|
||||
*
|
||||
* @param from_step From which step(included) the MileStoneHandle
|
||||
* objects should be reset.
|
||||
* @param from_handle The ealiest MileStoneHandle object for step from_step
|
||||
*/
|
||||
virtual void reset_milestones(uint16 from_step,
|
||||
MileStoneHandle from_handle) = 0;
|
||||
|
||||
/**
|
||||
* Used to extend in this dictionary. The handle returned should keep valid
|
||||
* until reset_milestones() is called.
|
||||
*
|
||||
* @param from_handle Its previous returned extended handle without the new
|
||||
* spelling id, it can be used to speed up the extending.
|
||||
* @param dep The paramter used for extending.
|
||||
* @param lpi_items Used to fill in the lemmas matched.
|
||||
* @param lpi_max The length of the buffer
|
||||
* @param lpi_num Used to return the newly added items.
|
||||
* @return The new mile stone for this extending. 0 if fail.
|
||||
*/
|
||||
virtual MileStoneHandle extend_dict(MileStoneHandle from_handle,
|
||||
const DictExtPara *dep,
|
||||
LmaPsbItem *lpi_items,
|
||||
size_t lpi_max, size_t *lpi_num) = 0;
|
||||
|
||||
/**
|
||||
* Get lemma items with scores according to a spelling id stream.
|
||||
* This atom dictionary does not need to sort the returned items.
|
||||
*
|
||||
* @param splid_str The spelling id stream buffer.
|
||||
* @param splid_str_len The length of the spelling id stream buffer.
|
||||
* @param lpi_items Used to return matched lemma items with scores.
|
||||
* @param lpi_max The maximum size of the buffer to return result.
|
||||
* @return The number of matched items which have been filled in to lpi_items.
|
||||
*/
|
||||
virtual size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len,
|
||||
LmaPsbItem *lpi_items, size_t lpi_max) = 0;
|
||||
|
||||
/**
|
||||
* Get a lemma string (The Chinese string) by the given lemma id.
|
||||
*
|
||||
* @param id_lemma The lemma id to get the string.
|
||||
* @param str_buf The buffer to return the Chinese string.
|
||||
* @param str_max The maximum size of the buffer.
|
||||
* @return The length of the string, 0 if fail.
|
||||
*/
|
||||
virtual uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf,
|
||||
uint16 str_max) = 0;
|
||||
|
||||
/**
|
||||
* Get the full spelling ids for the given lemma id.
|
||||
* If the given buffer is too short, return 0.
|
||||
*
|
||||
* @param splids Used to return the spelling ids.
|
||||
* @param splids_max The maximum buffer length of splids.
|
||||
* @param arg_valid Used to indicate if the incoming parameters have been
|
||||
* initialized are valid. If it is true, the splids and splids_max are valid
|
||||
* and there may be half ids in splids to be updated to full ids. In this
|
||||
* case, splids_max is the number of valid ids in splids.
|
||||
* @return The number of ids in the buffer.
|
||||
*/
|
||||
virtual uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
|
||||
uint16 splids_max, bool arg_valid) = 0;
|
||||
|
||||
/**
|
||||
* Function used for prediction.
|
||||
* No need to sort the newly added items.
|
||||
*
|
||||
* @param last_hzs The last n Chinese chracters(called Hanzi), its length
|
||||
* should be less than or equal to kMaxPredictSize.
|
||||
* @param hzs_len specifies the length(<= kMaxPredictSize) of the history.
|
||||
* @param npre_items Used used to return the result.
|
||||
* @param npre_max The length of the buffer to return result
|
||||
* @param b4_used Number of prediction result (from npre_items[-b4_used])
|
||||
* from other atom dictionaries. A atom ditionary can just ignore it.
|
||||
* @return The number of prediction result from this atom dictionary.
|
||||
*/
|
||||
virtual size_t predict(const char16 last_hzs[], uint16 hzs_len,
|
||||
NPredictItem *npre_items, size_t npre_max,
|
||||
size_t b4_used) = 0;
|
||||
|
||||
/**
|
||||
* Add a lemma to the dictionary. If the dictionary allows to add new
|
||||
* items and this item does not exist, add it.
|
||||
*
|
||||
* @param lemma_str The Chinese string of the lemma.
|
||||
* @param splids The spelling ids of the lemma.
|
||||
* @param lemma_len The length of the Chinese lemma.
|
||||
* @param count The frequency count for this lemma.
|
||||
*/
|
||||
virtual LemmaIdType put_lemma(char16 lemma_str[], uint16 splids[],
|
||||
uint16 lemma_len, uint16 count) = 0;
|
||||
|
||||
/**
|
||||
* Update a lemma's occuring count.
|
||||
*
|
||||
* @param lemma_id The lemma id to update.
|
||||
* @param delta_count The frequnecy count to ajust.
|
||||
* @param selected Indicate whether this lemma is selected by user and
|
||||
* submitted to target edit box.
|
||||
* @return The id if succeed, 0 if fail.
|
||||
*/
|
||||
virtual LemmaIdType update_lemma(LemmaIdType lemma_id, int16 delta_count,
|
||||
bool selected) = 0;
|
||||
|
||||
/**
|
||||
* Get the lemma id for the given lemma.
|
||||
*
|
||||
* @param lemma_str The Chinese string of the lemma.
|
||||
* @param splids The spelling ids of the lemma.
|
||||
* @param lemma_len The length of the lemma.
|
||||
* @return The matched lemma id, or 0 if fail.
|
||||
*/
|
||||
virtual LemmaIdType get_lemma_id(char16 lemma_str[], uint16 splids[],
|
||||
uint16 lemma_len) = 0;
|
||||
|
||||
/**
|
||||
* Get the lemma score.
|
||||
*
|
||||
* @param lemma_id The lemma id to get score.
|
||||
* @return The score of the lemma, or 0 if fail.
|
||||
*/
|
||||
virtual LmaScoreType get_lemma_score(LemmaIdType lemma_id) = 0;
|
||||
|
||||
/**
|
||||
* Get the lemma score.
|
||||
*
|
||||
* @param lemma_str The Chinese string of the lemma.
|
||||
* @param splids The spelling ids of the lemma.
|
||||
* @param lemma_len The length of the lemma.
|
||||
* @return The score of the lamm, or 0 if fail.
|
||||
*/
|
||||
virtual LmaScoreType get_lemma_score(char16 lemma_str[], uint16 splids[],
|
||||
uint16 lemma_len) = 0;
|
||||
|
||||
/**
|
||||
* If the dictionary allowed, remove a lemma from it.
|
||||
*
|
||||
* @param lemma_id The id of the lemma to remove.
|
||||
* @return True if succeed.
|
||||
*/
|
||||
virtual bool remove_lemma(LemmaIdType lemma_id) = 0;
|
||||
|
||||
/**
|
||||
* Get the total occuring count of this atom dictionary.
|
||||
*
|
||||
* @return The total occuring count of this atom dictionary.
|
||||
*/
|
||||
virtual size_t get_total_lemma_count() = 0;
|
||||
|
||||
/**
|
||||
* Set the total occuring count of other atom dictionaries.
|
||||
*
|
||||
* @param count The total occuring count of other atom dictionaies.
|
||||
*/
|
||||
virtual void set_total_lemma_count_of_others(size_t count) = 0;
|
||||
|
||||
/**
|
||||
* Notify this atom dictionary to flush the cached data to persistent storage
|
||||
* if necessary.
|
||||
*/
|
||||
virtual void flush_cache() = 0;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // PINYINIME_INCLUDE_ATOMDICTBASE_H__
|
||||
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,171 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_DICTBUILDER_H__
|
||||
#define PINYINIME_INCLUDE_DICTBUILDER_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./utf16char.h"
|
||||
#include "./dictdef.h"
|
||||
#include "./dictlist.h"
|
||||
#include "./spellingtable.h"
|
||||
#include "./spellingtrie.h"
|
||||
#include "./splparser.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
|
||||
#define ___DO_STATISTICS___
|
||||
|
||||
class DictTrie;
|
||||
|
||||
class DictBuilder {
|
||||
private:
|
||||
// The raw lemma array buffer.
|
||||
LemmaEntry *lemma_arr_;
|
||||
size_t lemma_num_;
|
||||
|
||||
// Used to store all possible single char items.
|
||||
// Two items may have the same Hanzi while their spelling ids are different.
|
||||
SingleCharItem *scis_;
|
||||
size_t scis_num_;
|
||||
|
||||
// In the tree, root's level is -1.
|
||||
// Lemma nodes for root, and level 0
|
||||
LmaNodeLE0 *lma_nodes_le0_;
|
||||
|
||||
// Lemma nodes for layers whose levels are deeper than 0
|
||||
LmaNodeGE1 *lma_nodes_ge1_;
|
||||
|
||||
// Number of used lemma nodes
|
||||
size_t lma_nds_used_num_le0_;
|
||||
size_t lma_nds_used_num_ge1_;
|
||||
|
||||
// Used to store homophonies' ids.
|
||||
LemmaIdType *homo_idx_buf_;
|
||||
// Number of homophonies each of which only contains one Chinese character.
|
||||
size_t homo_idx_num_eq1_;
|
||||
// Number of homophonies each of which contains more than one character.
|
||||
size_t homo_idx_num_gt1_;
|
||||
|
||||
// The items with highest scores.
|
||||
LemmaEntry *top_lmas_;
|
||||
size_t top_lmas_num_;
|
||||
|
||||
SpellingTable *spl_table_;
|
||||
SpellingParser *spl_parser_;
|
||||
|
||||
#ifdef ___DO_STATISTICS___
|
||||
size_t max_sonbuf_len_[kMaxLemmaSize];
|
||||
size_t max_homobuf_len_[kMaxLemmaSize];
|
||||
|
||||
size_t total_son_num_[kMaxLemmaSize];
|
||||
size_t total_node_hasson_[kMaxLemmaSize];
|
||||
size_t total_sonbuf_num_[kMaxLemmaSize];
|
||||
size_t total_sonbuf_allnoson_[kMaxLemmaSize];
|
||||
size_t total_node_in_sonbuf_allnoson_[kMaxLemmaSize];
|
||||
size_t total_homo_num_[kMaxLemmaSize];
|
||||
|
||||
size_t sonbufs_num1_; // Number of son buffer with only 1 son
|
||||
size_t sonbufs_numgt1_; // Number of son buffer with more 1 son;
|
||||
|
||||
size_t total_lma_node_num_;
|
||||
|
||||
void stat_init();
|
||||
void stat_print();
|
||||
#endif
|
||||
|
||||
public:
|
||||
|
||||
DictBuilder();
|
||||
~DictBuilder();
|
||||
|
||||
// Build dictionary trie from the file fn_raw. File fn_validhzs provides
|
||||
// valid chars. If fn_validhzs is NULL, only chars in GB2312 will be
|
||||
// included.
|
||||
bool build_dict(const char* fn_raw, const char* fn_validhzs,
|
||||
DictTrie *dict_trie);
|
||||
|
||||
private:
|
||||
// Fill in the buffer with id. The caller guarantees that the paramters are
|
||||
// vaild.
|
||||
void id_to_charbuf(unsigned char *buf, LemmaIdType id);
|
||||
|
||||
// Update the offset of sons for a node.
|
||||
void set_son_offset(LmaNodeGE1 *node, size_t offset);
|
||||
|
||||
// Update the offset of homophonies' ids for a node.
|
||||
void set_homo_id_buf_offset(LmaNodeGE1 *node, size_t offset);
|
||||
|
||||
// Format a speling string.
|
||||
void format_spelling_str(char *spl_str);
|
||||
|
||||
// Sort the lemma_arr by the hanzi string, and give each of unique items
|
||||
// a id. Why we need to sort the lemma list according to their Hanzi string
|
||||
// is to find items started by a given prefix string to do prediction.
|
||||
// Actually, the single char items are be in other order, for example,
|
||||
// in spelling id order, etc.
|
||||
// Return value is next un-allocated idx available.
|
||||
LemmaIdType sort_lemmas_by_hz();
|
||||
|
||||
// Build the SingleCharItem list, and fill the hanzi_scis_ids in the
|
||||
// lemma buffer lemma_arr_.
|
||||
// This function should be called after the lemma array is ready.
|
||||
// Return the number of unique SingleCharItem elements.
|
||||
size_t build_scis();
|
||||
|
||||
// Construct a subtree using a subset of the spelling array (from
|
||||
// item_star to item_end)
|
||||
// parent is the parent node to update the necessary information
|
||||
// parent can be a member of LmaNodeLE0 or LmaNodeGE1
|
||||
bool construct_subset(void* parent, LemmaEntry* lemma_arr,
|
||||
size_t item_start, size_t item_end, size_t level);
|
||||
|
||||
|
||||
// Read valid Chinese Hanzis from the given file.
|
||||
// num is used to return number of chars.
|
||||
// The return buffer is sorted and caller needs to free the returned buffer.
|
||||
char16* read_valid_hanzis(const char *fn_validhzs, size_t *num);
|
||||
|
||||
|
||||
// Read a raw dictionary. max_item is the maximum number of items. If there
|
||||
// are more items in the ditionary, only the first max_item will be read.
|
||||
// Returned value is the number of items successfully read from the file.
|
||||
size_t read_raw_dict(const char* fn_raw, const char *fn_validhzs,
|
||||
size_t max_item);
|
||||
|
||||
// Try to find if a character is in hzs buffer.
|
||||
bool hz_in_hanzis_list(const char16 *hzs, size_t hzs_len, char16 hz);
|
||||
|
||||
// Try to find if all characters in str are in hzs buffer.
|
||||
bool str_in_hanzis_list(const char16 *hzs, size_t hzs_len,
|
||||
const char16 *str, size_t str_len);
|
||||
|
||||
// Get these lemmas with toppest scores.
|
||||
void get_top_lemmas();
|
||||
|
||||
// Allocate resource to build dictionary.
|
||||
// lma_num is the number of items to be loaded
|
||||
bool alloc_resource(size_t lma_num);
|
||||
|
||||
// Free resource.
|
||||
void free_resource();
|
||||
};
|
||||
#endif // ___BUILD_MODEL___
|
||||
}
|
||||
|
||||
#endif // PINYINIME_INCLUDE_DICTBUILDER_H__
|
||||
@@ -0,0 +1,157 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_DICTDEF_H__
|
||||
#define PINYINIME_INCLUDE_DICTDEF_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./utf16char.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
// Enable the following line when building the binary dictionary model.
|
||||
// #define ___BUILD_MODEL___
|
||||
|
||||
typedef unsigned char uint8;
|
||||
typedef unsigned short uint16;
|
||||
typedef unsigned int uint32;
|
||||
|
||||
typedef signed char int8;
|
||||
typedef short int16;
|
||||
typedef int int32;
|
||||
typedef long long int64;
|
||||
typedef unsigned long long uint64;
|
||||
|
||||
const bool kPrintDebug0 = false;
|
||||
const bool kPrintDebug1 = false;
|
||||
const bool kPrintDebug2 = false;
|
||||
|
||||
// The max length of a lemma.
|
||||
const size_t kMaxLemmaSize = 8;
|
||||
|
||||
// The max length of a Pinyin (spelling).
|
||||
const size_t kMaxPinyinSize = 6;
|
||||
|
||||
// The number of half spelling ids. For Chinese Pinyin, there 30 half ids.
|
||||
// See SpellingTrie.h for details.
|
||||
const size_t kHalfSpellingIdNum = 29;
|
||||
|
||||
// The maximum number of full spellings. For Chinese Pinyin, there are only
|
||||
// about 410 spellings.
|
||||
// If change this value is bigger(needs more bits), please also update
|
||||
// other structures like SpellingNode, to make sure than a spelling id can be
|
||||
// stored.
|
||||
// -1 is because that 0 is never used.
|
||||
const size_t kMaxSpellingNum = 512 - kHalfSpellingIdNum - 1;
|
||||
const size_t kMaxSearchSteps = 40;
|
||||
|
||||
// One character predicts its following characters.
|
||||
const size_t kMaxPredictSize = (kMaxLemmaSize - 1);
|
||||
|
||||
// LemmaIdType must always be size_t.
|
||||
typedef size_t LemmaIdType;
|
||||
const size_t kLemmaIdSize = 3; // Actually, a Id occupies 3 bytes in storage.
|
||||
const size_t kLemmaIdComposing = 0xffffff;
|
||||
|
||||
typedef uint16 LmaScoreType;
|
||||
typedef uint16 KeyScoreType;
|
||||
|
||||
// Number of items with highest score are kept for prediction purpose.
|
||||
const size_t kTopScoreLemmaNum = 10;
|
||||
|
||||
const size_t kMaxPredictNumByGt3 = 1;
|
||||
const size_t kMaxPredictNumBy3 = 2;
|
||||
const size_t kMaxPredictNumBy2 = 2;
|
||||
|
||||
// The last lemma id (included) for the system dictionary. The system
|
||||
// dictionary's ids always start from 1.
|
||||
const LemmaIdType kSysDictIdEnd = 500000;
|
||||
|
||||
// The first lemma id for the user dictionary.
|
||||
const LemmaIdType kUserDictIdStart = 500001;
|
||||
|
||||
// The last lemma id (included) for the user dictionary.
|
||||
const LemmaIdType kUserDictIdEnd = 600000;
|
||||
|
||||
typedef struct {
|
||||
uint16 half_splid:5;
|
||||
uint16 full_splid:11;
|
||||
} SpellingId, *PSpellingId;
|
||||
|
||||
|
||||
/**
|
||||
* We use different node types for different layers
|
||||
* Statistical data of the building result for a testing dictionary:
|
||||
* root, level 0, level 1, level 2, level 3
|
||||
* max son num of one node: 406 280 41 2 -
|
||||
* max homo num of one node: 0 90 23 2 2
|
||||
* total node num of a layer: 1 406 31766 13516 993
|
||||
* total homo num of a layer: 9 5674 44609 12667 995
|
||||
*
|
||||
* The node number for root and level 0 won't be larger than 500
|
||||
* According to the information above, two kinds of nodes can be used; one for
|
||||
* root and level 0, the other for these layers deeper than 0.
|
||||
*
|
||||
* LE = less and equal,
|
||||
* A node occupies 16 bytes. so, totallly less than 16 * 500 = 8K
|
||||
*/
|
||||
struct LmaNodeLE0 {
|
||||
uint32 son_1st_off;
|
||||
uint32 homo_idx_buf_off;
|
||||
uint16 spl_idx;
|
||||
uint16 num_of_son;
|
||||
uint16 num_of_homo;
|
||||
};
|
||||
|
||||
/**
|
||||
* GE = great and equal
|
||||
* A node occupies 8 bytes.
|
||||
*/
|
||||
struct LmaNodeGE1 {
|
||||
uint16 son_1st_off_l; // Low bits of the son_1st_off
|
||||
uint16 homo_idx_buf_off_l; // Low bits of the homo_idx_buf_off_1
|
||||
uint16 spl_idx;
|
||||
unsigned char num_of_son; // number of son nodes
|
||||
unsigned char num_of_homo; // number of homo words
|
||||
unsigned char son_1st_off_h; // high bits of the son_1st_off
|
||||
unsigned char homo_idx_buf_off_h; // high bits of the homo_idx_buf_off
|
||||
};
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
struct SingleCharItem {
|
||||
float freq;
|
||||
char16 hz;
|
||||
SpellingId splid;
|
||||
};
|
||||
|
||||
struct LemmaEntry {
|
||||
LemmaIdType idx_by_py;
|
||||
LemmaIdType idx_by_hz;
|
||||
char16 hanzi_str[kMaxLemmaSize + 1];
|
||||
|
||||
// The SingleCharItem id for each Hanzi.
|
||||
uint16 hanzi_scis_ids[kMaxLemmaSize];
|
||||
|
||||
uint16 spl_idx_arr[kMaxLemmaSize + 1];
|
||||
char pinyin_str[kMaxLemmaSize][kMaxPinyinSize + 1];
|
||||
unsigned char hz_str_len;
|
||||
float freq;
|
||||
};
|
||||
#endif // ___BUILD_MODEL___
|
||||
|
||||
} // namespace ime_pinyin
|
||||
|
||||
#endif // PINYINIME_INCLUDE_DICTDEF_H__
|
||||
@@ -0,0 +1,446 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "dictlist.h"
|
||||
#include "mystdlib.h"
|
||||
#include "ngram.h"
|
||||
#include "searchutility.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
DictList::DictList() {
|
||||
initialized_ = false;
|
||||
scis_num_ = 0;
|
||||
scis_hz_ = NULL;
|
||||
scis_splid_ = NULL;
|
||||
buf_ = NULL;
|
||||
spl_trie_ = SpellingTrie::get_cpinstance();
|
||||
|
||||
assert(kMaxLemmaSize == 8);
|
||||
cmp_func_[0] = cmp_hanzis_1;
|
||||
cmp_func_[1] = cmp_hanzis_2;
|
||||
cmp_func_[2] = cmp_hanzis_3;
|
||||
cmp_func_[3] = cmp_hanzis_4;
|
||||
cmp_func_[4] = cmp_hanzis_5;
|
||||
cmp_func_[5] = cmp_hanzis_6;
|
||||
cmp_func_[6] = cmp_hanzis_7;
|
||||
cmp_func_[7] = cmp_hanzis_8;
|
||||
}
|
||||
|
||||
DictList::~DictList() {
|
||||
free_resource();
|
||||
}
|
||||
|
||||
bool DictList::alloc_resource(size_t buf_size, size_t scis_num) {
|
||||
// Allocate memory
|
||||
buf_ = static_cast<char16*>(malloc(buf_size * sizeof(char16)));
|
||||
if (NULL == buf_)
|
||||
return false;
|
||||
|
||||
scis_num_ = scis_num;
|
||||
|
||||
scis_hz_ = static_cast<char16*>(malloc(scis_num_ * sizeof(char16)));
|
||||
if (NULL == scis_hz_)
|
||||
return false;
|
||||
|
||||
scis_splid_ = static_cast<SpellingId*>
|
||||
(malloc(scis_num_ * sizeof(SpellingId)));
|
||||
|
||||
if (NULL == scis_splid_)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void DictList::free_resource() {
|
||||
if (NULL != buf_)
|
||||
free(buf_);
|
||||
buf_ = NULL;
|
||||
|
||||
if (NULL != scis_hz_)
|
||||
free(scis_hz_);
|
||||
scis_hz_ = NULL;
|
||||
|
||||
if (NULL != scis_splid_)
|
||||
free(scis_splid_);
|
||||
scis_splid_ = NULL;
|
||||
}
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
bool DictList::init_list(const SingleCharItem *scis, size_t scis_num,
|
||||
const LemmaEntry *lemma_arr, size_t lemma_num) {
|
||||
if (NULL == scis || 0 == scis_num || NULL == lemma_arr || 0 == lemma_num)
|
||||
return false;
|
||||
|
||||
initialized_ = false;
|
||||
|
||||
if (NULL != buf_)
|
||||
free(buf_);
|
||||
|
||||
// calculate the size
|
||||
size_t buf_size = calculate_size(lemma_arr, lemma_num);
|
||||
if (0 == buf_size)
|
||||
return false;
|
||||
|
||||
if (!alloc_resource(buf_size, scis_num))
|
||||
return false;
|
||||
|
||||
fill_scis(scis, scis_num);
|
||||
|
||||
// Copy the related content from the array to inner buffer
|
||||
fill_list(lemma_arr, lemma_num);
|
||||
|
||||
initialized_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t DictList::calculate_size(const LemmaEntry* lemma_arr, size_t lemma_num) {
|
||||
size_t last_hz_len = 0;
|
||||
size_t list_size = 0;
|
||||
size_t id_num = 0;
|
||||
|
||||
for (size_t i = 0; i < lemma_num; i++) {
|
||||
if (0 == i) {
|
||||
last_hz_len = lemma_arr[i].hz_str_len;
|
||||
|
||||
assert(last_hz_len > 0);
|
||||
assert(lemma_arr[0].idx_by_hz == 1);
|
||||
|
||||
id_num++;
|
||||
start_pos_[0] = 0;
|
||||
start_id_[0] = id_num;
|
||||
|
||||
last_hz_len = 1;
|
||||
list_size += last_hz_len;
|
||||
} else {
|
||||
size_t current_hz_len = lemma_arr[i].hz_str_len;
|
||||
|
||||
assert(current_hz_len >= last_hz_len);
|
||||
|
||||
if (current_hz_len == last_hz_len) {
|
||||
list_size += current_hz_len;
|
||||
id_num++;
|
||||
} else {
|
||||
for (size_t len = last_hz_len; len < current_hz_len - 1; len++) {
|
||||
start_pos_[len] = start_pos_[len - 1];
|
||||
start_id_[len] = start_id_[len - 1];
|
||||
}
|
||||
|
||||
start_pos_[current_hz_len - 1] = list_size;
|
||||
|
||||
id_num++;
|
||||
start_id_[current_hz_len - 1] = id_num;
|
||||
|
||||
last_hz_len = current_hz_len;
|
||||
list_size += current_hz_len;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = last_hz_len; i <= kMaxLemmaSize; i++) {
|
||||
if (0 == i) {
|
||||
start_pos_[0] = 0;
|
||||
start_id_[0] = 1;
|
||||
} else {
|
||||
start_pos_[i] = list_size;
|
||||
start_id_[i] = id_num;
|
||||
}
|
||||
}
|
||||
|
||||
return start_pos_[kMaxLemmaSize];
|
||||
}
|
||||
|
||||
void DictList::fill_scis(const SingleCharItem *scis, size_t scis_num) {
|
||||
assert(scis_num_ == scis_num);
|
||||
|
||||
for (size_t pos = 0; pos < scis_num_; pos++) {
|
||||
scis_hz_[pos] = scis[pos].hz;
|
||||
scis_splid_[pos] = scis[pos].splid;
|
||||
}
|
||||
}
|
||||
|
||||
void DictList::fill_list(const LemmaEntry* lemma_arr, size_t lemma_num) {
|
||||
size_t current_pos = 0;
|
||||
|
||||
utf16_strncpy(buf_, lemma_arr[0].hanzi_str,
|
||||
lemma_arr[0].hz_str_len);
|
||||
|
||||
current_pos = lemma_arr[0].hz_str_len;
|
||||
|
||||
size_t id_num = 1;
|
||||
|
||||
for (size_t i = 1; i < lemma_num; i++) {
|
||||
utf16_strncpy(buf_ + current_pos, lemma_arr[i].hanzi_str,
|
||||
lemma_arr[i].hz_str_len);
|
||||
|
||||
id_num++;
|
||||
current_pos += lemma_arr[i].hz_str_len;
|
||||
}
|
||||
|
||||
assert(current_pos == start_pos_[kMaxLemmaSize]);
|
||||
assert(id_num == start_id_[kMaxLemmaSize]);
|
||||
}
|
||||
|
||||
char16* DictList::find_pos2_startedbyhz(char16 hz_char) {
|
||||
char16 *found_2w = static_cast<char16*>
|
||||
(mybsearch(&hz_char, buf_ + start_pos_[1],
|
||||
(start_pos_[2] - start_pos_[1]) / 2,
|
||||
sizeof(char16) * 2, cmp_hanzis_1));
|
||||
if (NULL == found_2w)
|
||||
return NULL;
|
||||
|
||||
while (found_2w > buf_ + start_pos_[1] && *found_2w == *(found_2w - 1))
|
||||
found_2w -= 2;
|
||||
|
||||
return found_2w;
|
||||
}
|
||||
#endif // ___BUILD_MODEL___
|
||||
|
||||
char16* DictList::find_pos_startedbyhzs(const char16 last_hzs[],
|
||||
size_t word_len, int (*cmp_func)(const void *, const void *)) {
|
||||
char16 *found_w = static_cast<char16*>
|
||||
(mybsearch(last_hzs, buf_ + start_pos_[word_len - 1],
|
||||
(start_pos_[word_len] - start_pos_[word_len - 1])
|
||||
/ word_len,
|
||||
sizeof(char16) * word_len, cmp_func));
|
||||
|
||||
if (NULL == found_w)
|
||||
return NULL;
|
||||
|
||||
while (found_w > buf_ + start_pos_[word_len -1] &&
|
||||
cmp_func(found_w, found_w - word_len) == 0)
|
||||
found_w -= word_len;
|
||||
|
||||
return found_w;
|
||||
}
|
||||
|
||||
size_t DictList::predict(const char16 last_hzs[], uint16 hzs_len,
|
||||
NPredictItem *npre_items, size_t npre_max,
|
||||
size_t b4_used) {
|
||||
assert(hzs_len <= kMaxPredictSize && hzs_len > 0);
|
||||
|
||||
// 1. Prepare work
|
||||
int (*cmp_func)(const void *, const void *) = cmp_func_[hzs_len - 1];
|
||||
|
||||
NGram& ngram = NGram::get_instance();
|
||||
|
||||
size_t item_num = 0;
|
||||
|
||||
// 2. Do prediction
|
||||
for (uint16 pre_len = 1; pre_len <= kMaxPredictSize + 1 - hzs_len;
|
||||
pre_len++) {
|
||||
uint16 word_len = hzs_len + pre_len;
|
||||
char16 *w_buf = find_pos_startedbyhzs(last_hzs, word_len, cmp_func);
|
||||
if (NULL == w_buf)
|
||||
continue;
|
||||
while (w_buf < buf_ + start_pos_[word_len] &&
|
||||
cmp_func(w_buf, last_hzs) == 0 &&
|
||||
item_num < npre_max) {
|
||||
memset(npre_items + item_num, 0, sizeof(NPredictItem));
|
||||
utf16_strncpy(npre_items[item_num].pre_hzs, w_buf + hzs_len, pre_len);
|
||||
npre_items[item_num].psb =
|
||||
ngram.get_uni_psb((size_t)(w_buf - buf_ - start_pos_[word_len - 1])
|
||||
/ word_len + start_id_[word_len - 1]);
|
||||
npre_items[item_num].his_len = hzs_len;
|
||||
item_num++;
|
||||
w_buf += word_len;
|
||||
}
|
||||
}
|
||||
|
||||
size_t new_num = 0;
|
||||
for (size_t i = 0; i < item_num; i++) {
|
||||
// Try to find it in the existing items
|
||||
size_t e_pos;
|
||||
for (e_pos = 1; e_pos <= b4_used; e_pos++) {
|
||||
if (utf16_strncmp((*(npre_items - e_pos)).pre_hzs, npre_items[i].pre_hzs,
|
||||
kMaxPredictSize) == 0)
|
||||
break;
|
||||
}
|
||||
if (e_pos <= b4_used)
|
||||
continue;
|
||||
|
||||
// If not found, append it to the buffer
|
||||
npre_items[new_num] = npre_items[i];
|
||||
new_num++;
|
||||
}
|
||||
|
||||
return new_num;
|
||||
}
|
||||
|
||||
uint16 DictList::get_lemma_str(LemmaIdType id_lemma, char16 *str_buf,
|
||||
uint16 str_max) {
|
||||
if (!initialized_ || id_lemma >= start_id_[kMaxLemmaSize] || NULL == str_buf
|
||||
|| str_max <= 1)
|
||||
return 0;
|
||||
|
||||
// Find the range
|
||||
for (uint16 i = 0; i < kMaxLemmaSize; i++) {
|
||||
if (i + 1 > str_max - 1)
|
||||
return 0;
|
||||
if (start_id_[i] <= id_lemma && start_id_[i + 1] > id_lemma) {
|
||||
size_t id_span = id_lemma - start_id_[i];
|
||||
|
||||
uint16 *buf = buf_ + start_pos_[i] + id_span * (i + 1);
|
||||
for (uint16 len = 0; len <= i; len++) {
|
||||
str_buf[len] = buf[len];
|
||||
}
|
||||
str_buf[i+1] = (char16)'\0';
|
||||
return i + 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint16 DictList::get_splids_for_hanzi(char16 hanzi, uint16 half_splid,
|
||||
uint16 *splids, uint16 max_splids) {
|
||||
char16 *hz_found = static_cast<char16*>
|
||||
(mybsearch(&hanzi, scis_hz_, scis_num_, sizeof(char16), cmp_hanzis_1));
|
||||
assert(NULL != hz_found && hanzi == *hz_found);
|
||||
|
||||
// Move to the first one.
|
||||
while (hz_found > scis_hz_ && hanzi == *(hz_found - 1))
|
||||
hz_found--;
|
||||
|
||||
// First try to found if strict comparison result is not zero.
|
||||
char16 *hz_f = hz_found;
|
||||
bool strict = false;
|
||||
while (hz_f < scis_hz_ + scis_num_ && hanzi == *hz_f) {
|
||||
uint16 pos = hz_f - scis_hz_;
|
||||
if (0 == half_splid || scis_splid_[pos].half_splid == half_splid) {
|
||||
strict = true;
|
||||
}
|
||||
hz_f++;
|
||||
}
|
||||
|
||||
uint16 found_num = 0;
|
||||
while (hz_found < scis_hz_ + scis_num_ && hanzi == *hz_found) {
|
||||
uint16 pos = hz_found - scis_hz_;
|
||||
if (0 == half_splid ||
|
||||
(strict && scis_splid_[pos].half_splid == half_splid) ||
|
||||
(!strict && spl_trie_->half_full_compatible(half_splid,
|
||||
scis_splid_[pos].full_splid))) {
|
||||
assert(found_num + 1 < max_splids);
|
||||
splids[found_num] = scis_splid_[pos].full_splid;
|
||||
found_num++;
|
||||
}
|
||||
hz_found++;
|
||||
}
|
||||
|
||||
return found_num;
|
||||
}
|
||||
|
||||
LemmaIdType DictList::get_lemma_id(const char16 *str, uint16 str_len) {
|
||||
if (NULL == str || str_len > kMaxLemmaSize)
|
||||
return 0;
|
||||
|
||||
char16 *found = find_pos_startedbyhzs(str, str_len, cmp_func_[str_len - 1]);
|
||||
if (NULL == found)
|
||||
return 0;
|
||||
|
||||
assert(found > buf_);
|
||||
assert(static_cast<size_t>(found - buf_) >= start_pos_[str_len - 1]);
|
||||
return static_cast<LemmaIdType>
|
||||
(start_id_[str_len - 1] +
|
||||
(found - buf_ - start_pos_[str_len - 1]) / str_len);
|
||||
}
|
||||
|
||||
void DictList::convert_to_hanzis(char16 *str, uint16 str_len) {
|
||||
assert(NULL != str);
|
||||
|
||||
for (uint16 str_pos = 0; str_pos < str_len; str_pos++) {
|
||||
str[str_pos] = scis_hz_[str[str_pos]];
|
||||
}
|
||||
}
|
||||
|
||||
void DictList::convert_to_scis_ids(char16 *str, uint16 str_len) {
|
||||
assert(NULL != str);
|
||||
|
||||
for (uint16 str_pos = 0; str_pos < str_len; str_pos++) {
|
||||
str[str_pos] = 0x100;
|
||||
}
|
||||
}
|
||||
|
||||
bool DictList::save_list(FILE *fp) {
|
||||
if (!initialized_ || NULL == fp)
|
||||
return false;
|
||||
|
||||
if (NULL == buf_ || 0 == start_pos_[kMaxLemmaSize] ||
|
||||
NULL == scis_hz_ || NULL == scis_splid_ || 0 == scis_num_)
|
||||
return false;
|
||||
|
||||
if (fwrite(&scis_num_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fwrite(start_pos_, sizeof(uint32), kMaxLemmaSize + 1, fp) !=
|
||||
kMaxLemmaSize + 1)
|
||||
return false;
|
||||
|
||||
if (fwrite(start_id_, sizeof(uint32), kMaxLemmaSize + 1, fp) !=
|
||||
kMaxLemmaSize + 1)
|
||||
return false;
|
||||
|
||||
if (fwrite(scis_hz_, sizeof(char16), scis_num_, fp) != scis_num_)
|
||||
return false;
|
||||
|
||||
if (fwrite(scis_splid_, sizeof(SpellingId), scis_num_, fp) != scis_num_)
|
||||
return false;
|
||||
|
||||
if (fwrite(buf_, sizeof(char16), start_pos_[kMaxLemmaSize], fp) !=
|
||||
start_pos_[kMaxLemmaSize])
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DictList::load_list(FILE *fp) {
|
||||
if (NULL == fp)
|
||||
return false;
|
||||
|
||||
initialized_ = false;
|
||||
|
||||
if (fread(&scis_num_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fread(start_pos_, sizeof(uint32), kMaxLemmaSize + 1, fp) !=
|
||||
kMaxLemmaSize + 1)
|
||||
return false;
|
||||
|
||||
if (fread(start_id_, sizeof(uint32), kMaxLemmaSize + 1, fp) !=
|
||||
kMaxLemmaSize + 1)
|
||||
return false;
|
||||
|
||||
free_resource();
|
||||
|
||||
if (!alloc_resource(start_pos_[kMaxLemmaSize], scis_num_))
|
||||
return false;
|
||||
|
||||
if (fread(scis_hz_, sizeof(char16), scis_num_, fp) != scis_num_)
|
||||
return false;
|
||||
|
||||
if (fread(scis_splid_, sizeof(SpellingId), scis_num_, fp) != scis_num_)
|
||||
return false;
|
||||
|
||||
if (fread(buf_, sizeof(char16), start_pos_[kMaxLemmaSize], fp) !=
|
||||
start_pos_[kMaxLemmaSize])
|
||||
return false;
|
||||
|
||||
initialized_ = true;
|
||||
return true;
|
||||
}
|
||||
} // namespace ime_pinyin
|
||||
@@ -0,0 +1,120 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_DICTLIST_H__
|
||||
#define PINYINIME_INCLUDE_DICTLIST_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "./dictdef.h"
|
||||
#include "./searchutility.h"
|
||||
#include "./spellingtrie.h"
|
||||
#include "./utf16char.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
class DictList {
|
||||
private:
|
||||
bool initialized_;
|
||||
|
||||
const SpellingTrie *spl_trie_;
|
||||
|
||||
// Number of SingCharItem. The first is blank, because id 0 is invalid.
|
||||
uint32 scis_num_;
|
||||
char16 *scis_hz_;
|
||||
SpellingId *scis_splid_;
|
||||
|
||||
// The large memory block to store the word list.
|
||||
char16 *buf_;
|
||||
|
||||
// Starting position of those words whose lengths are i+1, counted in
|
||||
// char16
|
||||
uint32 start_pos_[kMaxLemmaSize + 1];
|
||||
|
||||
uint32 start_id_[kMaxLemmaSize + 1];
|
||||
|
||||
int (*cmp_func_[kMaxLemmaSize])(const void *, const void *);
|
||||
|
||||
bool alloc_resource(size_t buf_size, size_t scim_num);
|
||||
|
||||
void free_resource();
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
// Calculate the requsted memory, including the start_pos[] buffer.
|
||||
size_t calculate_size(const LemmaEntry *lemma_arr, size_t lemma_num);
|
||||
|
||||
void fill_scis(const SingleCharItem *scis, size_t scis_num);
|
||||
|
||||
// Copy the related content to the inner buffer
|
||||
// It should be called after calculate_size()
|
||||
void fill_list(const LemmaEntry *lemma_arr, size_t lemma_num);
|
||||
|
||||
// Find the starting position for the buffer of those 2-character Chinese word
|
||||
// whose first character is the given Chinese character.
|
||||
char16* find_pos2_startedbyhz(char16 hz_char);
|
||||
#endif
|
||||
|
||||
// Find the starting position for the buffer of those words whose lengths are
|
||||
// word_len. The given parameter cmp_func decides how many characters from
|
||||
// beginning will be used to compare.
|
||||
char16* find_pos_startedbyhzs(const char16 last_hzs[],
|
||||
size_t word_Len,
|
||||
int (*cmp_func)(const void *, const void *));
|
||||
|
||||
public:
|
||||
|
||||
DictList();
|
||||
~DictList();
|
||||
|
||||
bool save_list(FILE *fp);
|
||||
bool load_list(FILE *fp);
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
// Init the list from the LemmaEntry array.
|
||||
// lemma_arr should have been sorted by the hanzi_str, and have been given
|
||||
// ids from 1
|
||||
bool init_list(const SingleCharItem *scis, size_t scis_num,
|
||||
const LemmaEntry *lemma_arr, size_t lemma_num);
|
||||
#endif
|
||||
|
||||
// Get the hanzi string for the given id
|
||||
uint16 get_lemma_str(LemmaIdType id_hz, char16 *str_buf, uint16 str_max);
|
||||
|
||||
void convert_to_hanzis(char16 *str, uint16 str_len);
|
||||
|
||||
void convert_to_scis_ids(char16 *str, uint16 str_len);
|
||||
|
||||
// last_hzs stores the last n Chinese characters history, its length should be
|
||||
// less or equal than kMaxPredictSize.
|
||||
// hzs_len specifies the length(<= kMaxPredictSize).
|
||||
// predict_buf is used to store the result.
|
||||
// buf_len specifies the buffer length.
|
||||
// b4_used specifies how many items before predict_buf have been used.
|
||||
// Returned value is the number of newly added items.
|
||||
size_t predict(const char16 last_hzs[], uint16 hzs_len,
|
||||
NPredictItem *npre_items, size_t npre_max,
|
||||
size_t b4_used);
|
||||
|
||||
// If half_splid is a valid half spelling id, return those full spelling
|
||||
// ids which share this half id.
|
||||
uint16 get_splids_for_hanzi(char16 hanzi, uint16 half_splid,
|
||||
uint16 *splids, uint16 max_splids);
|
||||
|
||||
LemmaIdType get_lemma_id(const char16 *str, uint16 str_len);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // PINYINIME_INCLUDE_DICTLIST_H__
|
||||
@@ -0,0 +1,941 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "dicttrie.h"
|
||||
#include "dictbuilder.h"
|
||||
#include "lpicache.h"
|
||||
#include "mystdlib.h"
|
||||
#include "ngram.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
DictTrie::DictTrie() {
|
||||
spl_trie_ = SpellingTrie::get_cpinstance();
|
||||
|
||||
root_ = NULL;
|
||||
splid_le0_index_ = NULL;
|
||||
lma_node_num_le0_ = 0;
|
||||
nodes_ge1_ = NULL;
|
||||
lma_node_num_ge1_ = 0;
|
||||
lma_idx_buf_ = NULL;
|
||||
lma_idx_buf_len_ = 0;
|
||||
total_lma_num_ = 0;
|
||||
top_lmas_num_ = 0;
|
||||
dict_list_ = NULL;
|
||||
|
||||
parsing_marks_ = NULL;
|
||||
mile_stones_ = NULL;
|
||||
reset_milestones(0, kFirstValidMileStoneHandle);
|
||||
}
|
||||
|
||||
DictTrie::~DictTrie() {
|
||||
free_resource(true);
|
||||
}
|
||||
|
||||
void DictTrie::free_resource(bool free_dict_list) {
|
||||
if (NULL != root_)
|
||||
free(root_);
|
||||
root_ = NULL;
|
||||
|
||||
if (NULL != splid_le0_index_)
|
||||
free(splid_le0_index_);
|
||||
splid_le0_index_ = NULL;
|
||||
|
||||
if (NULL != nodes_ge1_)
|
||||
free(nodes_ge1_);
|
||||
nodes_ge1_ = NULL;
|
||||
|
||||
if (NULL != lma_idx_buf_)
|
||||
free(lma_idx_buf_);
|
||||
lma_idx_buf_ = NULL;
|
||||
|
||||
if (free_dict_list) {
|
||||
if (NULL != dict_list_) {
|
||||
delete dict_list_;
|
||||
}
|
||||
dict_list_ = NULL;
|
||||
}
|
||||
|
||||
if (parsing_marks_)
|
||||
delete [] parsing_marks_;
|
||||
parsing_marks_ = NULL;
|
||||
|
||||
if (mile_stones_)
|
||||
delete [] mile_stones_;
|
||||
mile_stones_ = NULL;
|
||||
|
||||
reset_milestones(0, kFirstValidMileStoneHandle);
|
||||
}
|
||||
|
||||
inline size_t DictTrie::get_son_offset(const LmaNodeGE1 *node) {
|
||||
return ((size_t)node->son_1st_off_l + ((size_t)node->son_1st_off_h << 16));
|
||||
}
|
||||
|
||||
inline size_t DictTrie::get_homo_idx_buf_offset(const LmaNodeGE1 *node) {
|
||||
return ((size_t)node->homo_idx_buf_off_l +
|
||||
((size_t)node->homo_idx_buf_off_h << 16));
|
||||
}
|
||||
|
||||
inline LemmaIdType DictTrie::get_lemma_id(size_t id_offset) {
|
||||
LemmaIdType id = 0;
|
||||
for (uint16 pos = kLemmaIdSize - 1; pos > 0; pos--)
|
||||
id = (id << 8) + lma_idx_buf_[id_offset * kLemmaIdSize + pos];
|
||||
id = (id << 8) + lma_idx_buf_[id_offset * kLemmaIdSize];
|
||||
return id;
|
||||
}
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
bool DictTrie::build_dict(const char* fn_raw, const char* fn_validhzs) {
|
||||
DictBuilder* dict_builder = new DictBuilder();
|
||||
|
||||
free_resource(true);
|
||||
|
||||
return dict_builder->build_dict(fn_raw, fn_validhzs, this);
|
||||
}
|
||||
|
||||
bool DictTrie::save_dict(FILE *fp) {
|
||||
if (NULL == fp)
|
||||
return false;
|
||||
|
||||
if (fwrite(&lma_node_num_le0_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fwrite(&lma_node_num_ge1_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fwrite(&lma_idx_buf_len_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fwrite(&top_lmas_num_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fwrite(root_, sizeof(LmaNodeLE0), lma_node_num_le0_, fp)
|
||||
!= lma_node_num_le0_)
|
||||
return false;
|
||||
|
||||
if (fwrite(nodes_ge1_, sizeof(LmaNodeGE1), lma_node_num_ge1_, fp)
|
||||
!= lma_node_num_ge1_)
|
||||
return false;
|
||||
|
||||
if (fwrite(lma_idx_buf_, sizeof(unsigned char), lma_idx_buf_len_, fp) !=
|
||||
lma_idx_buf_len_)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DictTrie::save_dict(const char *filename) {
|
||||
if (NULL == filename)
|
||||
return false;
|
||||
|
||||
if (NULL == root_ || NULL == dict_list_)
|
||||
return false;
|
||||
|
||||
SpellingTrie &spl_trie = SpellingTrie::get_instance();
|
||||
NGram &ngram = NGram::get_instance();
|
||||
|
||||
FILE *fp = fopen(filename, "wb");
|
||||
if (NULL == fp)
|
||||
return false;
|
||||
|
||||
if (!spl_trie.save_spl_trie(fp) || !dict_list_->save_list(fp) ||
|
||||
!save_dict(fp) || !ngram.save_ngram(fp)) {
|
||||
fclose(fp);
|
||||
return false;
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
return true;
|
||||
}
|
||||
#endif // ___BUILD_MODEL___
|
||||
|
||||
bool DictTrie::load_dict(FILE *fp) {
|
||||
if (NULL == fp)
|
||||
return false;
|
||||
if (fread(&lma_node_num_le0_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fread(&lma_node_num_ge1_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fread(&lma_idx_buf_len_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fread(&top_lmas_num_, sizeof(uint32), 1, fp) != 1 ||
|
||||
top_lmas_num_ >= lma_idx_buf_len_)
|
||||
return false;
|
||||
|
||||
free_resource(false);
|
||||
|
||||
root_ = static_cast<LmaNodeLE0*>
|
||||
(malloc(lma_node_num_le0_ * sizeof(LmaNodeLE0)));
|
||||
nodes_ge1_ = static_cast<LmaNodeGE1*>
|
||||
(malloc(lma_node_num_ge1_ * sizeof(LmaNodeGE1)));
|
||||
lma_idx_buf_ = (unsigned char*)malloc(lma_idx_buf_len_);
|
||||
total_lma_num_ = lma_idx_buf_len_ / kLemmaIdSize;
|
||||
|
||||
size_t buf_size = SpellingTrie::get_instance().get_spelling_num() + 1;
|
||||
assert(lma_node_num_le0_ <= buf_size);
|
||||
splid_le0_index_ = static_cast<uint16*>(malloc(buf_size * sizeof(uint16)));
|
||||
|
||||
// Init the space for parsing.
|
||||
parsing_marks_ = new ParsingMark[kMaxParsingMark];
|
||||
mile_stones_ = new MileStone[kMaxMileStone];
|
||||
reset_milestones(0, kFirstValidMileStoneHandle);
|
||||
|
||||
if (NULL == root_ || NULL == nodes_ge1_ || NULL == lma_idx_buf_ ||
|
||||
NULL == splid_le0_index_ || NULL == parsing_marks_ ||
|
||||
NULL == mile_stones_) {
|
||||
free_resource(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (fread(root_, sizeof(LmaNodeLE0), lma_node_num_le0_, fp)
|
||||
!= lma_node_num_le0_)
|
||||
return false;
|
||||
|
||||
if (fread(nodes_ge1_, sizeof(LmaNodeGE1), lma_node_num_ge1_, fp)
|
||||
!= lma_node_num_ge1_)
|
||||
return false;
|
||||
|
||||
if (fread(lma_idx_buf_, sizeof(unsigned char), lma_idx_buf_len_, fp) !=
|
||||
lma_idx_buf_len_)
|
||||
return false;
|
||||
|
||||
// The quick index for the first level sons
|
||||
uint16 last_splid = kFullSplIdStart;
|
||||
size_t last_pos = 0;
|
||||
for (size_t i = 1; i < lma_node_num_le0_; i++) {
|
||||
for (uint16 splid = last_splid; splid < root_[i].spl_idx; splid++)
|
||||
splid_le0_index_[splid - kFullSplIdStart] = last_pos;
|
||||
|
||||
splid_le0_index_[root_[i].spl_idx - kFullSplIdStart] =
|
||||
static_cast<uint16>(i);
|
||||
last_splid = root_[i].spl_idx;
|
||||
last_pos = i;
|
||||
}
|
||||
|
||||
for (uint16 splid = last_splid + 1;
|
||||
splid < buf_size + kFullSplIdStart; splid++) {
|
||||
assert(static_cast<size_t>(splid - kFullSplIdStart) < buf_size);
|
||||
splid_le0_index_[splid - kFullSplIdStart] = last_pos + 1;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DictTrie::load_dict(const char *filename, LemmaIdType start_id,
|
||||
LemmaIdType end_id) {
|
||||
if (NULL == filename || end_id <= start_id)
|
||||
return false;
|
||||
|
||||
FILE *fp = fopen(filename, "rb");
|
||||
if (NULL == fp)
|
||||
return false;
|
||||
|
||||
free_resource(true);
|
||||
|
||||
dict_list_ = new DictList();
|
||||
if (NULL == dict_list_) {
|
||||
fclose(fp);
|
||||
return false;
|
||||
}
|
||||
|
||||
SpellingTrie &spl_trie = SpellingTrie::get_instance();
|
||||
NGram &ngram = NGram::get_instance();
|
||||
|
||||
if (!spl_trie.load_spl_trie(fp) || !dict_list_->load_list(fp) ||
|
||||
!load_dict(fp) || !ngram.load_ngram(fp) ||
|
||||
total_lma_num_ > end_id - start_id + 1) {
|
||||
free_resource(true);
|
||||
fclose(fp);
|
||||
return false;
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DictTrie::load_dict_fd(int sys_fd, long start_offset,
|
||||
long length, LemmaIdType start_id,
|
||||
LemmaIdType end_id) {
|
||||
if (start_offset < 0 || length <= 0 || end_id <= start_id)
|
||||
return false;
|
||||
|
||||
FILE *fp = fdopen(sys_fd, "rb");
|
||||
if (NULL == fp)
|
||||
return false;
|
||||
|
||||
if (-1 == fseek(fp, start_offset, SEEK_SET)) {
|
||||
fclose(fp);
|
||||
return false;
|
||||
}
|
||||
|
||||
free_resource(true);
|
||||
|
||||
dict_list_ = new DictList();
|
||||
if (NULL == dict_list_) {
|
||||
fclose(fp);
|
||||
return false;
|
||||
}
|
||||
|
||||
SpellingTrie &spl_trie = SpellingTrie::get_instance();
|
||||
NGram &ngram = NGram::get_instance();
|
||||
|
||||
if (!spl_trie.load_spl_trie(fp) || !dict_list_->load_list(fp) ||
|
||||
!load_dict(fp) || !ngram.load_ngram(fp) ||
|
||||
ftell(fp) < start_offset + length ||
|
||||
total_lma_num_ > end_id - start_id + 1) {
|
||||
free_resource(true);
|
||||
fclose(fp);
|
||||
return false;
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t DictTrie::fill_lpi_buffer(LmaPsbItem lpi_items[], size_t lpi_max,
|
||||
LmaNodeLE0 *node) {
|
||||
size_t lpi_num = 0;
|
||||
NGram& ngram = NGram::get_instance();
|
||||
for (size_t homo = 0; homo < (size_t)node->num_of_homo; homo++) {
|
||||
lpi_items[lpi_num].id = get_lemma_id(node->homo_idx_buf_off +
|
||||
homo);
|
||||
lpi_items[lpi_num].lma_len = 1;
|
||||
lpi_items[lpi_num].psb =
|
||||
static_cast<LmaScoreType>(ngram.get_uni_psb(lpi_items[lpi_num].id));
|
||||
lpi_num++;
|
||||
if (lpi_num >= lpi_max)
|
||||
break;
|
||||
}
|
||||
|
||||
return lpi_num;
|
||||
}
|
||||
|
||||
size_t DictTrie::fill_lpi_buffer(LmaPsbItem lpi_items[], size_t lpi_max,
|
||||
size_t homo_buf_off, LmaNodeGE1 *node,
|
||||
uint16 lma_len) {
|
||||
size_t lpi_num = 0;
|
||||
NGram& ngram = NGram::get_instance();
|
||||
for (size_t homo = 0; homo < (size_t)node->num_of_homo; homo++) {
|
||||
lpi_items[lpi_num].id = get_lemma_id(homo_buf_off + homo);
|
||||
lpi_items[lpi_num].lma_len = lma_len;
|
||||
lpi_items[lpi_num].psb =
|
||||
static_cast<LmaScoreType>(ngram.get_uni_psb(lpi_items[lpi_num].id));
|
||||
lpi_num++;
|
||||
if (lpi_num >= lpi_max)
|
||||
break;
|
||||
}
|
||||
|
||||
return lpi_num;
|
||||
}
|
||||
|
||||
void DictTrie::reset_milestones(uint16 from_step, MileStoneHandle from_handle) {
|
||||
if (0 == from_step) {
|
||||
parsing_marks_pos_ = 0;
|
||||
mile_stones_pos_ = kFirstValidMileStoneHandle;
|
||||
} else {
|
||||
if (from_handle > 0 && from_handle < mile_stones_pos_) {
|
||||
mile_stones_pos_ = from_handle;
|
||||
|
||||
MileStone *mile_stone = mile_stones_ + from_handle;
|
||||
parsing_marks_pos_ = mile_stone->mark_start;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MileStoneHandle DictTrie::extend_dict(MileStoneHandle from_handle,
|
||||
const DictExtPara *dep,
|
||||
LmaPsbItem *lpi_items, size_t lpi_max,
|
||||
size_t *lpi_num) {
|
||||
if (NULL == dep)
|
||||
return 0;
|
||||
|
||||
// from LmaNodeLE0 (root) to LmaNodeLE0
|
||||
if (0 == from_handle) {
|
||||
assert(0 == dep->splids_extended);
|
||||
return extend_dict0(from_handle, dep, lpi_items, lpi_max, lpi_num);
|
||||
}
|
||||
|
||||
// from LmaNodeLE0 to LmaNodeGE1
|
||||
if (1 == dep->splids_extended)
|
||||
return extend_dict1(from_handle, dep, lpi_items, lpi_max, lpi_num);
|
||||
|
||||
// From LmaNodeGE1 to LmaNodeGE1
|
||||
return extend_dict2(from_handle, dep, lpi_items, lpi_max, lpi_num);
|
||||
}
|
||||
|
||||
MileStoneHandle DictTrie::extend_dict0(MileStoneHandle from_handle,
|
||||
const DictExtPara *dep,
|
||||
LmaPsbItem *lpi_items,
|
||||
size_t lpi_max, size_t *lpi_num) {
|
||||
assert(NULL != dep && 0 == from_handle);
|
||||
*lpi_num = 0;
|
||||
MileStoneHandle ret_handle = 0;
|
||||
|
||||
uint16 splid = dep->splids[dep->splids_extended];
|
||||
uint16 id_start = dep->id_start;
|
||||
uint16 id_num = dep->id_num;
|
||||
|
||||
LpiCache& lpi_cache = LpiCache::get_instance();
|
||||
bool cached = lpi_cache.is_cached(splid);
|
||||
|
||||
// 2. Begin exgtending
|
||||
// 2.1 Get the LmaPsbItem list
|
||||
LmaNodeLE0 *node = root_;
|
||||
size_t son_start = splid_le0_index_[id_start - kFullSplIdStart];
|
||||
size_t son_end = splid_le0_index_[id_start + id_num - kFullSplIdStart];
|
||||
for (size_t son_pos = son_start; son_pos < son_end; son_pos++) {
|
||||
assert(1 == node->son_1st_off);
|
||||
LmaNodeLE0 *son = root_ + son_pos;
|
||||
assert(son->spl_idx >= id_start && son->spl_idx < id_start + id_num);
|
||||
|
||||
if (!cached && *lpi_num < lpi_max) {
|
||||
bool need_lpi = true;
|
||||
if (spl_trie_->is_half_id_yunmu(splid) && son_pos != son_start)
|
||||
need_lpi = false;
|
||||
|
||||
if (need_lpi)
|
||||
*lpi_num += fill_lpi_buffer(lpi_items + (*lpi_num),
|
||||
lpi_max - *lpi_num, son);
|
||||
}
|
||||
|
||||
// If necessary, fill in a new mile stone.
|
||||
if (son->spl_idx == id_start) {
|
||||
if (mile_stones_pos_ < kMaxMileStone &&
|
||||
parsing_marks_pos_ < kMaxParsingMark) {
|
||||
parsing_marks_[parsing_marks_pos_].node_offset = son_pos;
|
||||
parsing_marks_[parsing_marks_pos_].node_num = id_num;
|
||||
mile_stones_[mile_stones_pos_].mark_start = parsing_marks_pos_;
|
||||
mile_stones_[mile_stones_pos_].mark_num = 1;
|
||||
ret_handle = mile_stones_pos_;
|
||||
parsing_marks_pos_++;
|
||||
mile_stones_pos_++;
|
||||
}
|
||||
}
|
||||
|
||||
if (son->spl_idx >= id_start + id_num -1)
|
||||
break;
|
||||
}
|
||||
|
||||
// printf("----- parsing marks: %d, mile stone: %d \n", parsing_marks_pos_,
|
||||
// mile_stones_pos_);
|
||||
return ret_handle;
|
||||
}
|
||||
|
||||
MileStoneHandle DictTrie::extend_dict1(MileStoneHandle from_handle,
|
||||
const DictExtPara *dep,
|
||||
LmaPsbItem *lpi_items,
|
||||
size_t lpi_max, size_t *lpi_num) {
|
||||
assert(NULL != dep && from_handle > 0 && from_handle < mile_stones_pos_);
|
||||
|
||||
MileStoneHandle ret_handle = 0;
|
||||
|
||||
// 1. If this is a half Id, get its corresponding full starting Id and
|
||||
// number of full Id.
|
||||
size_t ret_val = 0;
|
||||
|
||||
uint16 id_start = dep->id_start;
|
||||
uint16 id_num = dep->id_num;
|
||||
|
||||
// 2. Begin extending.
|
||||
MileStone *mile_stone = mile_stones_ + from_handle;
|
||||
|
||||
for (uint16 h_pos = 0; h_pos < mile_stone->mark_num; h_pos++) {
|
||||
ParsingMark p_mark = parsing_marks_[mile_stone->mark_start + h_pos];
|
||||
uint16 ext_num = p_mark.node_num;
|
||||
for (uint16 ext_pos = 0; ext_pos < ext_num; ext_pos++) {
|
||||
LmaNodeLE0 *node = root_ + p_mark.node_offset + ext_pos;
|
||||
size_t found_start = 0;
|
||||
size_t found_num = 0;
|
||||
for (size_t son_pos = 0; son_pos < (size_t)node->num_of_son; son_pos++) {
|
||||
assert(node->son_1st_off <= lma_node_num_ge1_);
|
||||
LmaNodeGE1 *son = nodes_ge1_ + node->son_1st_off + son_pos;
|
||||
if (son->spl_idx >= id_start
|
||||
&& son->spl_idx < id_start + id_num) {
|
||||
if (*lpi_num < lpi_max) {
|
||||
size_t homo_buf_off = get_homo_idx_buf_offset(son);
|
||||
*lpi_num += fill_lpi_buffer(lpi_items + (*lpi_num),
|
||||
lpi_max - *lpi_num, homo_buf_off, son,
|
||||
2);
|
||||
}
|
||||
|
||||
// If necessary, fill in the new DTMI
|
||||
if (0 == found_num) {
|
||||
found_start = son_pos;
|
||||
}
|
||||
found_num++;
|
||||
}
|
||||
if (son->spl_idx >= id_start + id_num - 1 || son_pos ==
|
||||
(size_t)node->num_of_son - 1) {
|
||||
if (found_num > 0) {
|
||||
if (mile_stones_pos_ < kMaxMileStone &&
|
||||
parsing_marks_pos_ < kMaxParsingMark) {
|
||||
parsing_marks_[parsing_marks_pos_].node_offset =
|
||||
node->son_1st_off + found_start;
|
||||
parsing_marks_[parsing_marks_pos_].node_num = found_num;
|
||||
if (0 == ret_val)
|
||||
mile_stones_[mile_stones_pos_].mark_start =
|
||||
parsing_marks_pos_;
|
||||
parsing_marks_pos_++;
|
||||
}
|
||||
|
||||
ret_val++;
|
||||
}
|
||||
break;
|
||||
} // for son_pos
|
||||
} // for ext_pos
|
||||
} // for h_pos
|
||||
}
|
||||
|
||||
if (ret_val > 0) {
|
||||
mile_stones_[mile_stones_pos_].mark_num = ret_val;
|
||||
ret_handle = mile_stones_pos_;
|
||||
mile_stones_pos_++;
|
||||
ret_val = 1;
|
||||
}
|
||||
|
||||
// printf("----- parsing marks: %d, mile stone: %d \n", parsing_marks_pos_,
|
||||
// mile_stones_pos_);
|
||||
return ret_handle;
|
||||
}
|
||||
|
||||
MileStoneHandle DictTrie::extend_dict2(MileStoneHandle from_handle,
|
||||
const DictExtPara *dep,
|
||||
LmaPsbItem *lpi_items,
|
||||
size_t lpi_max, size_t *lpi_num) {
|
||||
assert(NULL != dep && from_handle > 0 && from_handle < mile_stones_pos_);
|
||||
|
||||
MileStoneHandle ret_handle = 0;
|
||||
|
||||
// 1. If this is a half Id, get its corresponding full starting Id and
|
||||
// number of full Id.
|
||||
size_t ret_val = 0;
|
||||
|
||||
uint16 id_start = dep->id_start;
|
||||
uint16 id_num = dep->id_num;
|
||||
|
||||
// 2. Begin extending.
|
||||
MileStone *mile_stone = mile_stones_ + from_handle;
|
||||
|
||||
for (uint16 h_pos = 0; h_pos < mile_stone->mark_num; h_pos++) {
|
||||
ParsingMark p_mark = parsing_marks_[mile_stone->mark_start + h_pos];
|
||||
uint16 ext_num = p_mark.node_num;
|
||||
for (uint16 ext_pos = 0; ext_pos < ext_num; ext_pos++) {
|
||||
LmaNodeGE1 *node = nodes_ge1_ + p_mark.node_offset + ext_pos;
|
||||
size_t found_start = 0;
|
||||
size_t found_num = 0;
|
||||
|
||||
for (size_t son_pos = 0; son_pos < (size_t)node->num_of_son; son_pos++) {
|
||||
assert(node->son_1st_off_l > 0 || node->son_1st_off_h > 0);
|
||||
LmaNodeGE1 *son = nodes_ge1_ + get_son_offset(node) + son_pos;
|
||||
if (son->spl_idx >= id_start
|
||||
&& son->spl_idx < id_start + id_num) {
|
||||
if (*lpi_num < lpi_max) {
|
||||
size_t homo_buf_off = get_homo_idx_buf_offset(son);
|
||||
*lpi_num += fill_lpi_buffer(lpi_items + (*lpi_num),
|
||||
lpi_max - *lpi_num, homo_buf_off, son,
|
||||
dep->splids_extended + 1);
|
||||
}
|
||||
|
||||
// If necessary, fill in the new DTMI
|
||||
if (0 == found_num) {
|
||||
found_start = son_pos;
|
||||
}
|
||||
found_num++;
|
||||
}
|
||||
if (son->spl_idx >= id_start + id_num - 1 || son_pos ==
|
||||
(size_t)node->num_of_son - 1) {
|
||||
if (found_num > 0) {
|
||||
if (mile_stones_pos_ < kMaxMileStone &&
|
||||
parsing_marks_pos_ < kMaxParsingMark) {
|
||||
parsing_marks_[parsing_marks_pos_].node_offset =
|
||||
get_son_offset(node) + found_start;
|
||||
parsing_marks_[parsing_marks_pos_].node_num = found_num;
|
||||
if (0 == ret_val)
|
||||
mile_stones_[mile_stones_pos_].mark_start =
|
||||
parsing_marks_pos_;
|
||||
parsing_marks_pos_++;
|
||||
}
|
||||
|
||||
ret_val++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
} // for son_pos
|
||||
} // for ext_pos
|
||||
} // for h_pos
|
||||
|
||||
if (ret_val > 0) {
|
||||
mile_stones_[mile_stones_pos_].mark_num = ret_val;
|
||||
ret_handle = mile_stones_pos_;
|
||||
mile_stones_pos_++;
|
||||
}
|
||||
|
||||
// printf("----- parsing marks: %d, mile stone: %d \n", parsing_marks_pos_,
|
||||
// mile_stones_pos_);
|
||||
return ret_handle;
|
||||
}
|
||||
|
||||
bool DictTrie::try_extend(const uint16 *splids, uint16 splid_num,
|
||||
LemmaIdType id_lemma) {
|
||||
if (0 == splid_num || NULL == splids)
|
||||
return false;
|
||||
|
||||
void *node = root_ + splid_le0_index_[splids[0] - kFullSplIdStart];
|
||||
|
||||
for (uint16 pos = 1; pos < splid_num; pos++) {
|
||||
if (1 == pos) {
|
||||
LmaNodeLE0 *node_le0 = reinterpret_cast<LmaNodeLE0*>(node);
|
||||
LmaNodeGE1 *node_son;
|
||||
uint16 son_pos;
|
||||
for (son_pos = 0; son_pos < static_cast<uint16>(node_le0->num_of_son);
|
||||
son_pos++) {
|
||||
assert(node_le0->son_1st_off <= lma_node_num_ge1_);
|
||||
node_son = nodes_ge1_ + node_le0->son_1st_off
|
||||
+ son_pos;
|
||||
if (node_son->spl_idx == splids[pos])
|
||||
break;
|
||||
}
|
||||
if (son_pos < node_le0->num_of_son)
|
||||
node = reinterpret_cast<void*>(node_son);
|
||||
else
|
||||
return false;
|
||||
} else {
|
||||
LmaNodeGE1 *node_ge1 = reinterpret_cast<LmaNodeGE1*>(node);
|
||||
LmaNodeGE1 *node_son;
|
||||
uint16 son_pos;
|
||||
for (son_pos = 0; son_pos < static_cast<uint16>(node_ge1->num_of_son);
|
||||
son_pos++) {
|
||||
assert(node_ge1->son_1st_off_l > 0 || node_ge1->son_1st_off_h > 0);
|
||||
node_son = nodes_ge1_ + get_son_offset(node_ge1) + son_pos;
|
||||
if (node_son->spl_idx == splids[pos])
|
||||
break;
|
||||
}
|
||||
if (son_pos < node_ge1->num_of_son)
|
||||
node = reinterpret_cast<void*>(node_son);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (1 == splid_num) {
|
||||
LmaNodeLE0* node_le0 = reinterpret_cast<LmaNodeLE0*>(node);
|
||||
size_t num_of_homo = (size_t)node_le0->num_of_homo;
|
||||
for (size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++) {
|
||||
LemmaIdType id_this = get_lemma_id(node_le0->homo_idx_buf_off + homo_pos);
|
||||
char16 str[2];
|
||||
get_lemma_str(id_this, str, 2);
|
||||
if (id_this == id_lemma)
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
LmaNodeGE1* node_ge1 = reinterpret_cast<LmaNodeGE1*>(node);
|
||||
size_t num_of_homo = (size_t)node_ge1->num_of_homo;
|
||||
for (size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++) {
|
||||
size_t node_homo_off = get_homo_idx_buf_offset(node_ge1);
|
||||
if (get_lemma_id(node_homo_off + homo_pos) == id_lemma)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t DictTrie::get_lpis(const uint16* splid_str, uint16 splid_str_len,
|
||||
LmaPsbItem* lma_buf, size_t max_lma_buf) {
|
||||
if (splid_str_len > kMaxLemmaSize)
|
||||
return 0;
|
||||
|
||||
#define MAX_EXTENDBUF_LEN 200
|
||||
|
||||
size_t* node_buf1[MAX_EXTENDBUF_LEN]; // use size_t for data alignment
|
||||
size_t* node_buf2[MAX_EXTENDBUF_LEN];
|
||||
LmaNodeLE0** node_fr_le0 =
|
||||
reinterpret_cast<LmaNodeLE0**>(node_buf1); // Nodes from.
|
||||
LmaNodeLE0** node_to_le0 =
|
||||
reinterpret_cast<LmaNodeLE0**>(node_buf2); // Nodes to.
|
||||
LmaNodeGE1** node_fr_ge1 = NULL;
|
||||
LmaNodeGE1** node_to_ge1 = NULL;
|
||||
size_t node_fr_num = 1;
|
||||
size_t node_to_num = 0;
|
||||
node_fr_le0[0] = root_;
|
||||
if (NULL == node_fr_le0[0])
|
||||
return 0;
|
||||
|
||||
size_t spl_pos = 0;
|
||||
|
||||
while (spl_pos < splid_str_len) {
|
||||
uint16 id_num = 1;
|
||||
uint16 id_start = splid_str[spl_pos];
|
||||
// If it is a half id
|
||||
if (spl_trie_->is_half_id(splid_str[spl_pos])) {
|
||||
id_num = spl_trie_->half_to_full(splid_str[spl_pos], &id_start);
|
||||
assert(id_num > 0);
|
||||
}
|
||||
|
||||
// Extend the nodes
|
||||
if (0 == spl_pos) { // From LmaNodeLE0 (root) to LmaNodeLE0 nodes
|
||||
for (size_t node_fr_pos = 0; node_fr_pos < node_fr_num; node_fr_pos++) {
|
||||
LmaNodeLE0 *node = node_fr_le0[node_fr_pos];
|
||||
assert(node == root_ && 1 == node_fr_num);
|
||||
size_t son_start = splid_le0_index_[id_start - kFullSplIdStart];
|
||||
size_t son_end =
|
||||
splid_le0_index_[id_start + id_num - kFullSplIdStart];
|
||||
for (size_t son_pos = son_start; son_pos < son_end; son_pos++) {
|
||||
assert(1 == node->son_1st_off);
|
||||
LmaNodeLE0 *node_son = root_ + son_pos;
|
||||
assert(node_son->spl_idx >= id_start
|
||||
&& node_son->spl_idx < id_start + id_num);
|
||||
if (node_to_num < MAX_EXTENDBUF_LEN) {
|
||||
node_to_le0[node_to_num] = node_son;
|
||||
node_to_num++;
|
||||
}
|
||||
// id_start + id_num - 1 is the last one, which has just been
|
||||
// recorded.
|
||||
if (node_son->spl_idx >= id_start + id_num - 1)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
spl_pos++;
|
||||
if (spl_pos >= splid_str_len || node_to_num == 0)
|
||||
break;
|
||||
// Prepare the nodes for next extending
|
||||
// next time, from LmaNodeLE0 to LmaNodeGE1
|
||||
LmaNodeLE0** node_tmp = node_fr_le0;
|
||||
node_fr_le0 = node_to_le0;
|
||||
node_to_le0 = NULL;
|
||||
node_to_ge1 = reinterpret_cast<LmaNodeGE1**>(node_tmp);
|
||||
} else if (1 == spl_pos) { // From LmaNodeLE0 to LmaNodeGE1 nodes
|
||||
for (size_t node_fr_pos = 0; node_fr_pos < node_fr_num; node_fr_pos++) {
|
||||
LmaNodeLE0 *node = node_fr_le0[node_fr_pos];
|
||||
for (size_t son_pos = 0; son_pos < (size_t)node->num_of_son;
|
||||
son_pos++) {
|
||||
assert(node->son_1st_off <= lma_node_num_ge1_);
|
||||
LmaNodeGE1 *node_son = nodes_ge1_ + node->son_1st_off
|
||||
+ son_pos;
|
||||
if (node_son->spl_idx >= id_start
|
||||
&& node_son->spl_idx < id_start + id_num) {
|
||||
if (node_to_num < MAX_EXTENDBUF_LEN) {
|
||||
node_to_ge1[node_to_num] = node_son;
|
||||
node_to_num++;
|
||||
}
|
||||
}
|
||||
// id_start + id_num - 1 is the last one, which has just been
|
||||
// recorded.
|
||||
if (node_son->spl_idx >= id_start + id_num - 1)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
spl_pos++;
|
||||
if (spl_pos >= splid_str_len || node_to_num == 0)
|
||||
break;
|
||||
// Prepare the nodes for next extending
|
||||
// next time, from LmaNodeGE1 to LmaNodeGE1
|
||||
node_fr_ge1 = node_to_ge1;
|
||||
node_to_ge1 = reinterpret_cast<LmaNodeGE1**>(node_fr_le0);
|
||||
node_fr_le0 = NULL;
|
||||
node_to_le0 = NULL;
|
||||
} else { // From LmaNodeGE1 to LmaNodeGE1 nodes
|
||||
for (size_t node_fr_pos = 0; node_fr_pos < node_fr_num; node_fr_pos++) {
|
||||
LmaNodeGE1 *node = node_fr_ge1[node_fr_pos];
|
||||
for (size_t son_pos = 0; son_pos < (size_t)node->num_of_son;
|
||||
son_pos++) {
|
||||
assert(node->son_1st_off_l > 0 || node->son_1st_off_h > 0);
|
||||
LmaNodeGE1 *node_son = nodes_ge1_
|
||||
+ get_son_offset(node) + son_pos;
|
||||
if (node_son->spl_idx >= id_start
|
||||
&& node_son->spl_idx < id_start + id_num) {
|
||||
if (node_to_num < MAX_EXTENDBUF_LEN) {
|
||||
node_to_ge1[node_to_num] = node_son;
|
||||
node_to_num++;
|
||||
}
|
||||
}
|
||||
// id_start + id_num - 1 is the last one, which has just been
|
||||
// recorded.
|
||||
if (node_son->spl_idx >= id_start + id_num - 1)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
spl_pos++;
|
||||
if (spl_pos >= splid_str_len || node_to_num == 0)
|
||||
break;
|
||||
// Prepare the nodes for next extending
|
||||
// next time, from LmaNodeGE1 to LmaNodeGE1
|
||||
LmaNodeGE1 **node_tmp = node_fr_ge1;
|
||||
node_fr_ge1 = node_to_ge1;
|
||||
node_to_ge1 = node_tmp;
|
||||
}
|
||||
|
||||
// The number of node for next extending
|
||||
node_fr_num = node_to_num;
|
||||
node_to_num = 0;
|
||||
} // while
|
||||
|
||||
if (0 == node_to_num)
|
||||
return 0;
|
||||
|
||||
NGram &ngram = NGram::get_instance();
|
||||
size_t lma_num = 0;
|
||||
|
||||
// If the length is 1, and the splid is a one-char Yunmu like 'a', 'o', 'e',
|
||||
// only those candidates for the full matched one-char id will be returned.
|
||||
if (1 == splid_str_len && spl_trie_->is_half_id_yunmu(splid_str[0]))
|
||||
node_to_num = node_to_num > 0 ? 1 : 0;
|
||||
|
||||
for (size_t node_pos = 0; node_pos < node_to_num; node_pos++) {
|
||||
size_t num_of_homo = 0;
|
||||
if (spl_pos <= 1) { // Get from LmaNodeLE0 nodes
|
||||
LmaNodeLE0* node_le0 = node_to_le0[node_pos];
|
||||
num_of_homo = (size_t)node_le0->num_of_homo;
|
||||
for (size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++) {
|
||||
size_t ch_pos = lma_num + homo_pos;
|
||||
lma_buf[ch_pos].id =
|
||||
get_lemma_id(node_le0->homo_idx_buf_off + homo_pos);
|
||||
lma_buf[ch_pos].lma_len = 1;
|
||||
lma_buf[ch_pos].psb =
|
||||
static_cast<LmaScoreType>(ngram.get_uni_psb(lma_buf[ch_pos].id));
|
||||
|
||||
if (lma_num + homo_pos >= max_lma_buf - 1)
|
||||
break;
|
||||
}
|
||||
} else { // Get from LmaNodeGE1 nodes
|
||||
LmaNodeGE1* node_ge1 = node_to_ge1[node_pos];
|
||||
num_of_homo = (size_t)node_ge1->num_of_homo;
|
||||
for (size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++) {
|
||||
size_t ch_pos = lma_num + homo_pos;
|
||||
size_t node_homo_off = get_homo_idx_buf_offset(node_ge1);
|
||||
lma_buf[ch_pos].id = get_lemma_id(node_homo_off + homo_pos);
|
||||
lma_buf[ch_pos].lma_len = splid_str_len;
|
||||
lma_buf[ch_pos].psb =
|
||||
static_cast<LmaScoreType>(ngram.get_uni_psb(lma_buf[ch_pos].id));
|
||||
|
||||
if (lma_num + homo_pos >= max_lma_buf - 1)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
lma_num += num_of_homo;
|
||||
if (lma_num >= max_lma_buf) {
|
||||
lma_num = max_lma_buf;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return lma_num;
|
||||
}
|
||||
|
||||
uint16 DictTrie::get_lemma_str(LemmaIdType id_lemma, char16 *str_buf,
|
||||
uint16 str_max) {
|
||||
return dict_list_->get_lemma_str(id_lemma, str_buf, str_max);
|
||||
}
|
||||
|
||||
uint16 DictTrie::get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
|
||||
uint16 splids_max, bool arg_valid) {
|
||||
char16 lma_str[kMaxLemmaSize + 1];
|
||||
uint16 lma_len = get_lemma_str(id_lemma, lma_str, kMaxLemmaSize + 1);
|
||||
assert((!arg_valid && splids_max >= lma_len) || lma_len == splids_max);
|
||||
|
||||
uint16 spl_mtrx[kMaxLemmaSize * 5];
|
||||
uint16 spl_start[kMaxLemmaSize + 1];
|
||||
spl_start[0] = 0;
|
||||
uint16 try_num = 1;
|
||||
|
||||
for (uint16 pos = 0; pos < lma_len; pos++) {
|
||||
uint16 cand_splids_this = 0;
|
||||
if (arg_valid && spl_trie_->is_full_id(splids[pos])) {
|
||||
spl_mtrx[spl_start[pos]] = splids[pos];
|
||||
cand_splids_this = 1;
|
||||
} else {
|
||||
cand_splids_this = dict_list_->get_splids_for_hanzi(lma_str[pos],
|
||||
arg_valid ? splids[pos] : 0, spl_mtrx + spl_start[pos],
|
||||
kMaxLemmaSize * 5 - spl_start[pos]);
|
||||
assert(cand_splids_this > 0);
|
||||
}
|
||||
spl_start[pos + 1] = spl_start[pos] + cand_splids_this;
|
||||
try_num *= cand_splids_this;
|
||||
}
|
||||
|
||||
for (uint16 try_pos = 0; try_pos < try_num; try_pos++) {
|
||||
uint16 mod = 1;
|
||||
for (uint16 pos = 0; pos < lma_len; pos++) {
|
||||
uint16 radix = spl_start[pos + 1] - spl_start[pos];
|
||||
splids[pos] = spl_mtrx[ spl_start[pos] + try_pos / mod % radix];
|
||||
mod *= radix;
|
||||
}
|
||||
|
||||
if (try_extend(splids, lma_len, id_lemma))
|
||||
return lma_len;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void DictTrie::set_total_lemma_count_of_others(size_t count) {
|
||||
NGram& ngram = NGram::get_instance();
|
||||
ngram.set_total_freq_none_sys(count);
|
||||
}
|
||||
|
||||
void DictTrie::convert_to_hanzis(char16 *str, uint16 str_len) {
|
||||
return dict_list_->convert_to_hanzis(str, str_len);
|
||||
}
|
||||
|
||||
void DictTrie::convert_to_scis_ids(char16 *str, uint16 str_len) {
|
||||
return dict_list_->convert_to_scis_ids(str, str_len);
|
||||
}
|
||||
|
||||
LemmaIdType DictTrie::get_lemma_id(const char16 lemma_str[], uint16 lemma_len) {
|
||||
if (NULL == lemma_str || lemma_len > kMaxLemmaSize)
|
||||
return 0;
|
||||
|
||||
return dict_list_->get_lemma_id(lemma_str, lemma_len);
|
||||
}
|
||||
|
||||
size_t DictTrie::predict_top_lmas(size_t his_len, NPredictItem *npre_items,
|
||||
size_t npre_max, size_t b4_used) {
|
||||
NGram &ngram = NGram::get_instance();
|
||||
|
||||
size_t item_num = 0;
|
||||
size_t top_lmas_id_offset = lma_idx_buf_len_ / kLemmaIdSize - top_lmas_num_;
|
||||
size_t top_lmas_pos = 0;
|
||||
while (item_num < npre_max && top_lmas_pos < top_lmas_num_) {
|
||||
memset(npre_items + item_num, 0, sizeof(NPredictItem));
|
||||
LemmaIdType top_lma_id = get_lemma_id(top_lmas_id_offset + top_lmas_pos);
|
||||
top_lmas_pos += 1;
|
||||
if (dict_list_->get_lemma_str(top_lma_id,
|
||||
npre_items[item_num].pre_hzs,
|
||||
kMaxLemmaSize - 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
npre_items[item_num].psb = ngram.get_uni_psb(top_lma_id);
|
||||
npre_items[item_num].his_len = his_len;
|
||||
item_num++;
|
||||
}
|
||||
return item_num;
|
||||
}
|
||||
|
||||
size_t DictTrie::predict(const char16 *last_hzs, uint16 hzs_len,
|
||||
NPredictItem *npre_items, size_t npre_max,
|
||||
size_t b4_used) {
|
||||
return dict_list_->predict(last_hzs, hzs_len, npre_items, npre_max, b4_used);
|
||||
}
|
||||
} // namespace ime_pinyin
|
||||
@@ -0,0 +1,233 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_DICTTRIE_H__
|
||||
#define PINYINIME_INCLUDE_DICTTRIE_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./atomdictbase.h"
|
||||
#include "./dictdef.h"
|
||||
#include "./dictlist.h"
|
||||
#include "./searchutility.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
class DictTrie : AtomDictBase {
|
||||
private:
|
||||
struct ParsingMark {
|
||||
size_t node_offset:24;
|
||||
size_t node_num:8; // Number of nodes with this spelling id given
|
||||
// by spl_id. If spl_id is a Shengmu, for nodes
|
||||
// in the first layer of DictTrie, it equals to
|
||||
// SpellingTrie::shm2full_num(); but for those
|
||||
// nodes which are not in the first layer,
|
||||
// node_num < SpellingTrie::shm2full_num().
|
||||
// For a full spelling id, node_num = 1;
|
||||
};
|
||||
|
||||
// Used to indicate an extended mile stone.
|
||||
// An extended mile stone is used to mark a partial match in the dictionary
|
||||
// trie to speed up further potential extending.
|
||||
// For example, when the user inputs "w", a mile stone is created to mark the
|
||||
// partial match status, so that when user inputs another char 'm', it will be
|
||||
// faster to extend search space based on this mile stone.
|
||||
//
|
||||
// For partial match status of "wm", there can be more than one sub mile
|
||||
// stone, for example, "wm" can be matched to "wanm", "wom", ..., etc, so
|
||||
// there may be more one parsing mark used to mark these partial matchings.
|
||||
// A mile stone records the starting position in the mark list and number of
|
||||
// marks.
|
||||
struct MileStone {
|
||||
uint16 mark_start;
|
||||
uint16 mark_num;
|
||||
};
|
||||
|
||||
DictList* dict_list_;
|
||||
|
||||
const SpellingTrie *spl_trie_;
|
||||
|
||||
LmaNodeLE0* root_; // Nodes for root and the first layer.
|
||||
LmaNodeGE1* nodes_ge1_; // Nodes for other layers.
|
||||
|
||||
// An quick index from spelling id to the LmaNodeLE0 node buffer, or
|
||||
// to the root_ buffer.
|
||||
// Index length:
|
||||
// SpellingTrie::get_instance().get_spelling_num() + 1. The last one is used
|
||||
// to get the end.
|
||||
// All Shengmu ids are not indexed because they will be converted into
|
||||
// corresponding full ids.
|
||||
// So, given an id splid, the son is:
|
||||
// root_[splid_le0_index_[splid - kFullSplIdStart]]
|
||||
uint16 *splid_le0_index_;
|
||||
|
||||
uint32 lma_node_num_le0_;
|
||||
uint32 lma_node_num_ge1_;
|
||||
|
||||
// The first part is for homophnies, and the last top_lma_num_ items are
|
||||
// lemmas with highest scores.
|
||||
unsigned char *lma_idx_buf_;
|
||||
uint32 lma_idx_buf_len_; // The total size of lma_idx_buf_ in byte.
|
||||
uint32 total_lma_num_; // Total number of lemmas in this dictionary.
|
||||
uint32 top_lmas_num_; // Number of lemma with highest scores.
|
||||
|
||||
// Parsing mark list used to mark the detailed extended statuses.
|
||||
ParsingMark *parsing_marks_;
|
||||
// The position for next available mark.
|
||||
uint16 parsing_marks_pos_;
|
||||
|
||||
// Mile stone list used to mark the extended status.
|
||||
MileStone *mile_stones_;
|
||||
// The position for the next available mile stone. We use positions (except 0)
|
||||
// as handles.
|
||||
MileStoneHandle mile_stones_pos_;
|
||||
|
||||
// Get the offset of sons for a node.
|
||||
inline size_t get_son_offset(const LmaNodeGE1 *node);
|
||||
|
||||
// Get the offset of homonious ids for a node.
|
||||
inline size_t get_homo_idx_buf_offset(const LmaNodeGE1 *node);
|
||||
|
||||
// Get the lemma id by the offset.
|
||||
inline LemmaIdType get_lemma_id(size_t id_offset);
|
||||
|
||||
void free_resource(bool free_dict_list);
|
||||
|
||||
bool load_dict(FILE *fp);
|
||||
|
||||
// Given a LmaNodeLE0 node, extract the lemmas specified by it, and fill
|
||||
// them into the lpi_items buffer.
|
||||
// This function is called by the search engine.
|
||||
size_t fill_lpi_buffer(LmaPsbItem lpi_items[], size_t max_size,
|
||||
LmaNodeLE0 *node);
|
||||
|
||||
// Given a LmaNodeGE1 node, extract the lemmas specified by it, and fill
|
||||
// them into the lpi_items buffer.
|
||||
// This function is called by inner functions extend_dict0(), extend_dict1()
|
||||
// and extend_dict2().
|
||||
size_t fill_lpi_buffer(LmaPsbItem lpi_items[], size_t max_size,
|
||||
size_t homo_buf_off, LmaNodeGE1 *node,
|
||||
uint16 lma_len);
|
||||
|
||||
// Extend in the trie from level 0.
|
||||
MileStoneHandle extend_dict0(MileStoneHandle from_handle,
|
||||
const DictExtPara *dep, LmaPsbItem *lpi_items,
|
||||
size_t lpi_max, size_t *lpi_num);
|
||||
|
||||
// Extend in the trie from level 1.
|
||||
MileStoneHandle extend_dict1(MileStoneHandle from_handle,
|
||||
const DictExtPara *dep, LmaPsbItem *lpi_items,
|
||||
size_t lpi_max, size_t *lpi_num);
|
||||
|
||||
// Extend in the trie from level 2.
|
||||
MileStoneHandle extend_dict2(MileStoneHandle from_handle,
|
||||
const DictExtPara *dep, LmaPsbItem *lpi_items,
|
||||
size_t lpi_max, size_t *lpi_num);
|
||||
|
||||
// Try to extend the given spelling id buffer, and if the given id_lemma can
|
||||
// be successfully gotten, return true;
|
||||
// The given spelling ids are all valid full ids.
|
||||
bool try_extend(const uint16 *splids, uint16 splid_num, LemmaIdType id_lemma);
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
bool save_dict(FILE *fp);
|
||||
#endif // ___BUILD_MODEL___
|
||||
|
||||
static const int kMaxMileStone = 100;
|
||||
static const int kMaxParsingMark = 600;
|
||||
static const MileStoneHandle kFirstValidMileStoneHandle = 1;
|
||||
|
||||
friend class DictParser;
|
||||
friend class DictBuilder;
|
||||
|
||||
public:
|
||||
|
||||
DictTrie();
|
||||
~DictTrie();
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
// Construct the tree from the file fn_raw.
|
||||
// fn_validhzs provide the valid hanzi list. If fn_validhzs is
|
||||
// NULL, only chars in GB2312 will be included.
|
||||
bool build_dict(const char *fn_raw, const char *fn_validhzs);
|
||||
|
||||
// Save the binary dictionary
|
||||
// Actually, the SpellingTrie/DictList instance will be also saved.
|
||||
bool save_dict(const char *filename);
|
||||
#endif // ___BUILD_MODEL___
|
||||
|
||||
void convert_to_hanzis(char16 *str, uint16 str_len);
|
||||
|
||||
void convert_to_scis_ids(char16 *str, uint16 str_len);
|
||||
|
||||
// Load a binary dictionary
|
||||
// The SpellingTrie instance/DictList will be also loaded
|
||||
bool load_dict(const char *filename, LemmaIdType start_id,
|
||||
LemmaIdType end_id);
|
||||
bool load_dict_fd(int sys_fd, long start_offset, long length,
|
||||
LemmaIdType start_id, LemmaIdType end_id);
|
||||
bool close_dict() {return true;}
|
||||
size_t number_of_lemmas() {return 0;}
|
||||
|
||||
void reset_milestones(uint16 from_step, MileStoneHandle from_handle);
|
||||
|
||||
MileStoneHandle extend_dict(MileStoneHandle from_handle,
|
||||
const DictExtPara *dep,
|
||||
LmaPsbItem *lpi_items,
|
||||
size_t lpi_max, size_t *lpi_num);
|
||||
|
||||
size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len,
|
||||
LmaPsbItem *lpi_items, size_t lpi_max);
|
||||
|
||||
uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max);
|
||||
|
||||
uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
|
||||
uint16 splids_max, bool arg_valid);
|
||||
|
||||
size_t predict(const char16 *last_hzs, uint16 hzs_len,
|
||||
NPredictItem *npre_items, size_t npre_max,
|
||||
size_t b4_used);
|
||||
|
||||
LemmaIdType put_lemma(char16 /*lemma_str*/[], uint16 /*splids*/[],
|
||||
uint16 /*lemma_len*/, uint16 /*count*/) {return 0;}
|
||||
|
||||
LemmaIdType update_lemma(LemmaIdType /*lemma_id*/, int16 /*delta_count*/,
|
||||
bool /*selected*/) {return 0;}
|
||||
|
||||
LemmaIdType get_lemma_id(char16 /*lemma_str*/[], uint16 /*splids*/[],
|
||||
uint16 /*lemma_len*/) {return 0;}
|
||||
|
||||
LmaScoreType get_lemma_score(LemmaIdType /*lemma_id*/) {return 0;}
|
||||
|
||||
LmaScoreType get_lemma_score(char16 /*lemma_str*/[], uint16 /*splids*/[],
|
||||
uint16 /*lemma_len*/) {return 0;}
|
||||
|
||||
bool remove_lemma(LemmaIdType /*lemma_id*/) {return false;}
|
||||
|
||||
size_t get_total_lemma_count() {return 0;}
|
||||
void set_total_lemma_count_of_others(size_t count);
|
||||
|
||||
void flush_cache() {}
|
||||
|
||||
LemmaIdType get_lemma_id(const char16 lemma_str[], uint16 lemma_len);
|
||||
|
||||
// Fill the lemmas with highest scores to the prediction buffer.
|
||||
// his_len is the history length to fill in the prediction buffer.
|
||||
size_t predict_top_lmas(size_t his_len, NPredictItem *npre_items,
|
||||
size_t npre_max, size_t b4_used);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // PINYINIME_INCLUDE_DICTTRIE_H__
|
||||
@@ -0,0 +1,62 @@
|
||||
QT -= gui
|
||||
|
||||
TEMPLATE = lib
|
||||
|
||||
SOURCES += \
|
||||
dictbuilder.cpp \
|
||||
dictlist.cpp \
|
||||
dicttrie.cpp \
|
||||
lpicache.cpp \
|
||||
matrixsearch.cpp \
|
||||
mystdlib.cpp \
|
||||
ngram.cpp \
|
||||
pinyinime.cpp \
|
||||
searchutility.cpp \
|
||||
spellingtable.cpp \
|
||||
spellingtrie.cpp \
|
||||
splparser.cpp \
|
||||
sync.cpp \
|
||||
userdict.cpp \
|
||||
utf16char.cpp \
|
||||
utf16reader.cpp
|
||||
|
||||
HEADERS += \
|
||||
atomdictbase.h \
|
||||
dictbuilder.h \
|
||||
dictdef.h \
|
||||
dictlist.h \
|
||||
dicttrie.h \
|
||||
lpicache.h \
|
||||
matrixsearch.h \
|
||||
mystdlib.h \
|
||||
ngram.h \
|
||||
pinyinime.h \
|
||||
searchutility.h \
|
||||
spellingtable.h \
|
||||
spellingtrie.h \
|
||||
splparser.h \
|
||||
sync.h \
|
||||
userdict.h \
|
||||
utf16char.h \
|
||||
utf16reader.h
|
||||
|
||||
CONFIG += staticlib
|
||||
TARGET = googlepinyin
|
||||
|
||||
win32{
|
||||
# CONFIG += debug_and_release build_all
|
||||
|
||||
CONFIG(debug, debug|release){
|
||||
TARGET = $$join(TARGET,,,d)
|
||||
}
|
||||
CONFIG(release, debug|release){
|
||||
TARGET = $$TARGET
|
||||
}
|
||||
}
|
||||
|
||||
DESTDIR = $$PWD/../build/lib/googlepinyin
|
||||
|
||||
MOC_DIR = $$PWD/../build/googlepinyin/moc
|
||||
RCC_DIR = $$PWD/../build/googlepinyin/res
|
||||
UI_DIR = $$PWD/../build/googlepinyin/ui
|
||||
OBJECTS_DIR = $$PWD/../build/googlepinyin/obj
|
||||
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include "lpicache.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
LpiCache* LpiCache::instance_ = NULL;
|
||||
|
||||
LpiCache::LpiCache() {
|
||||
lpi_cache_ = new LmaPsbItem[kFullSplIdStart * kMaxLpiCachePerId];
|
||||
lpi_cache_len_ = new uint16[kFullSplIdStart];
|
||||
assert(NULL != lpi_cache_);
|
||||
assert(NULL != lpi_cache_len_);
|
||||
for (uint16 id = 0; id < kFullSplIdStart; id++)
|
||||
lpi_cache_len_[id] = 0;
|
||||
}
|
||||
|
||||
LpiCache::~LpiCache() {
|
||||
if (NULL != lpi_cache_)
|
||||
delete [] lpi_cache_;
|
||||
|
||||
if (NULL != lpi_cache_len_)
|
||||
delete [] lpi_cache_len_;
|
||||
}
|
||||
|
||||
LpiCache& LpiCache::get_instance() {
|
||||
if (NULL == instance_) {
|
||||
instance_ = new LpiCache();
|
||||
assert(NULL != instance_);
|
||||
}
|
||||
return *instance_;
|
||||
}
|
||||
|
||||
bool LpiCache::is_cached(uint16 splid) {
|
||||
if (splid >= kFullSplIdStart)
|
||||
return false;
|
||||
return lpi_cache_len_[splid] != 0;
|
||||
}
|
||||
|
||||
size_t LpiCache::put_cache(uint16 splid, LmaPsbItem lpi_items[],
|
||||
size_t lpi_num) {
|
||||
uint16 num = kMaxLpiCachePerId;
|
||||
if (num > lpi_num)
|
||||
num = static_cast<uint16>(lpi_num);
|
||||
|
||||
LmaPsbItem *lpi_cache_this = lpi_cache_ + splid * kMaxLpiCachePerId;
|
||||
for (uint16 pos = 0; pos < num; pos++)
|
||||
lpi_cache_this[pos] = lpi_items[pos];
|
||||
|
||||
lpi_cache_len_[splid] = num;
|
||||
return num;
|
||||
}
|
||||
|
||||
size_t LpiCache::get_cache(uint16 splid, LmaPsbItem lpi_items[],
|
||||
size_t lpi_max) {
|
||||
if (lpi_max > lpi_cache_len_[splid])
|
||||
lpi_max = lpi_cache_len_[splid];
|
||||
|
||||
LmaPsbItem *lpi_cache_this = lpi_cache_ + splid * kMaxLpiCachePerId;
|
||||
for (uint16 pos = 0; pos < lpi_max; pos++) {
|
||||
lpi_items[pos] = lpi_cache_this[pos];
|
||||
}
|
||||
return lpi_max;
|
||||
}
|
||||
|
||||
} // namespace ime_pinyin
|
||||
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_ANDPY_INCLUDE_LPICACHE_H__
|
||||
#define PINYINIME_ANDPY_INCLUDE_LPICACHE_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./searchutility.h"
|
||||
#include "./spellingtrie.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
// Used to cache LmaPsbItem list for half spelling ids.
|
||||
class LpiCache {
|
||||
private:
|
||||
static LpiCache *instance_;
|
||||
static const int kMaxLpiCachePerId = 15;
|
||||
|
||||
LmaPsbItem *lpi_cache_;
|
||||
uint16 *lpi_cache_len_;
|
||||
|
||||
public:
|
||||
LpiCache();
|
||||
~LpiCache();
|
||||
|
||||
static LpiCache& get_instance();
|
||||
|
||||
// Test if the LPI list of the given splid has been cached.
|
||||
// If splid is a full spelling id, it returns false, because we only cache
|
||||
// list for half ids.
|
||||
bool is_cached(uint16 splid);
|
||||
|
||||
// Put LPI list to cahce. If the length of the list, lpi_num, is longer than
|
||||
// the cache buffer. the list will be truncated, and function returns the
|
||||
// maximum length of the cache buffer.
|
||||
// Note: splid must be a half id, and lpi_items must be not NULL. The
|
||||
// caller of this function should guarantee this.
|
||||
size_t put_cache(uint16 splid, LmaPsbItem lpi_items[], size_t lpi_num);
|
||||
|
||||
// Get the cached list for the given half id.
|
||||
// Return the length of the cached buffer.
|
||||
// Note: splid must be a half id, and lpi_items must be not NULL. The
|
||||
// caller of this function should guarantee this.
|
||||
size_t get_cache(uint16 splid, LmaPsbItem lpi_items[], size_t lpi_max);
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // PINYINIME_ANDPY_INCLUDE_LPICACHE_H__
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,460 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_ANDPY_INCLUDE_MATRIXSEARCH_H__
|
||||
#define PINYINIME_ANDPY_INCLUDE_MATRIXSEARCH_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./atomdictbase.h"
|
||||
#include "./dicttrie.h"
|
||||
#include "./searchutility.h"
|
||||
#include "./spellingtrie.h"
|
||||
#include "./splparser.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
static const size_t kMaxRowNum = kMaxSearchSteps;
|
||||
|
||||
typedef struct {
|
||||
// MileStoneHandle objects for the system and user dictionaries.
|
||||
MileStoneHandle dict_handles[2];
|
||||
// From which DMI node. -1 means it's from root.
|
||||
PoolPosType dmi_fr;
|
||||
// The spelling id for the Pinyin string from the previous DMI to this node.
|
||||
// If it is a half id like Shengmu, the node pointed by dict_node is the first
|
||||
// node with this Shengmu,
|
||||
uint16 spl_id;
|
||||
// What's the level of the dict node. Level of root is 0, but root is never
|
||||
// recorded by dict_node.
|
||||
unsigned char dict_level:7;
|
||||
// If this node is for composing phrase, this bit is 1.
|
||||
unsigned char c_phrase:1;
|
||||
// Whether the spl_id is parsed with a split character at the end.
|
||||
unsigned char splid_end_split:1;
|
||||
// What's the length of the spelling string for this match, for the whole
|
||||
// word.
|
||||
unsigned char splstr_len:7;
|
||||
// Used to indicate whether all spelling ids from the root are full spelling
|
||||
// ids. This information is useful for keymapping mode(not finished). Because
|
||||
// in this mode, there is no clear boundaries, we prefer those results which
|
||||
// have full spelling ids.
|
||||
unsigned char all_full_id:1;
|
||||
} DictMatchInfo, *PDictMatchInfo;
|
||||
|
||||
typedef struct MatrixNode {
|
||||
LemmaIdType id;
|
||||
float score;
|
||||
MatrixNode *from;
|
||||
// From which DMI node. Used to trace the spelling segmentation.
|
||||
PoolPosType dmi_fr;
|
||||
uint16 step;
|
||||
} MatrixNode, *PMatrixNode;
|
||||
|
||||
typedef struct {
|
||||
// The MatrixNode position in the matrix pool
|
||||
PoolPosType mtrx_nd_pos;
|
||||
// The DictMatchInfo position in the DictMatchInfo pool.
|
||||
PoolPosType dmi_pos;
|
||||
uint16 mtrx_nd_num;
|
||||
uint16 dmi_num:15;
|
||||
// Used to indicate whether there are dmi nodes in this step with full
|
||||
// spelling id. This information is used to decide whether a substring of a
|
||||
// valid Pinyin should be extended.
|
||||
//
|
||||
// Example1: shoudao
|
||||
// When the last char 'o' is added, the parser will find "dao" is a valid
|
||||
// Pinyin, and because all dmi nodes at location 'd' (including those for
|
||||
// "shoud", and those for "d") have Shengmu id only, so it is not necessary
|
||||
// to extend "ao", otherwise the result may be "shoud ao", that is not
|
||||
// reasonable.
|
||||
//
|
||||
// Example2: hengao
|
||||
// When the last 'o' is added, the parser finds "gao" is a valid Pinyin.
|
||||
// Because some dmi nodes at 'g' has Shengmu ids (hen'g and g), but some dmi
|
||||
// nodes at 'g' has full ids ('heng'), so it is necessary to extend "ao", thus
|
||||
// "heng ao" can also be the result.
|
||||
//
|
||||
// Similarly, "ganga" is expanded to "gang a".
|
||||
//
|
||||
// For Pinyin string "xian", because "xian" is a valid Pinyin, because all dmi
|
||||
// nodes at 'x' only have Shengmu ids, the parser will not try "x ian" (and it
|
||||
// is not valid either). If the parser uses break in the loop, the result
|
||||
// always be "xian"; but if the parser uses continue in the loop, "xi an" will
|
||||
// also be tried. This behaviour can be set via the function
|
||||
// set_xi_an_switch().
|
||||
uint16 dmi_has_full_id:1;
|
||||
// Points to a MatrixNode of the current step to indicate which choice the
|
||||
// user selects.
|
||||
MatrixNode *mtrx_nd_fixed;
|
||||
} MatrixRow, *PMatrixRow;
|
||||
|
||||
// When user inputs and selects candidates, the fixed lemma ids are stored in
|
||||
// lma_id_ of class MatrixSearch, and fixed_lmas_ is used to indicate how many
|
||||
// lemmas from the beginning are fixed. If user deletes Pinyin characters one
|
||||
// by one from the end, these fixed lemmas can be unlocked one by one when
|
||||
// necessary. Whenever user deletes a Chinese character and its spelling string
|
||||
// in these fixed lemmas, all fixed lemmas will be merged together into a unit
|
||||
// named ComposingPhrase with a lemma id kLemmaIdComposing, and this composing
|
||||
// phrase will be the first lemma in the sentence. Because it contains some
|
||||
// modified lemmas (by deleting a character), these merged lemmas are called
|
||||
// sub lemmas (sublma), and each of them are represented individually, so that
|
||||
// when user deletes Pinyin characters from the end, these sub lemmas can also
|
||||
// be unlocked one by one.
|
||||
typedef struct {
|
||||
uint16 spl_ids[kMaxRowNum];
|
||||
uint16 spl_start[kMaxRowNum];
|
||||
char16 chn_str[kMaxRowNum]; // Chinese string.
|
||||
uint16 sublma_start[kMaxRowNum]; // Counted in Chinese characters.
|
||||
size_t sublma_num;
|
||||
uint16 length; // Counted in Chinese characters.
|
||||
} ComposingPhrase, *TComposingPhrase;
|
||||
|
||||
class MatrixSearch {
|
||||
private:
|
||||
// If it is true, prediction list by string whose length is greater than 1
|
||||
// will be limited to a reasonable number.
|
||||
static const bool kPredictLimitGt1 = false;
|
||||
|
||||
// If it is true, the engine will prefer long history based prediction,
|
||||
// for example, when user inputs "BeiJing", we prefer "DaXue", etc., which are
|
||||
// based on the two-character history.
|
||||
static const bool kPreferLongHistoryPredict = true;
|
||||
|
||||
// If it is true, prediction will only be based on user dictionary. this flag
|
||||
// is for debug purpose.
|
||||
static const bool kOnlyUserDictPredict = false;
|
||||
|
||||
// The maximum buffer to store LmaPsbItems.
|
||||
static const size_t kMaxLmaPsbItems = 1450;
|
||||
|
||||
// How many rows for each step.
|
||||
static const size_t kMaxNodeARow = 5;
|
||||
|
||||
// The maximum length of the sentence candidates counted in chinese
|
||||
// characters
|
||||
static const size_t kMaxSentenceLength = 16;
|
||||
|
||||
// The size of the matrix node pool.
|
||||
static const size_t kMtrxNdPoolSize = 200;
|
||||
|
||||
// The size of the DMI node pool.
|
||||
static const size_t kDmiPoolSize = 800;
|
||||
|
||||
// Used to indicate whether this object has been initialized.
|
||||
bool inited_;
|
||||
|
||||
// Spelling trie.
|
||||
const SpellingTrie *spl_trie_;
|
||||
|
||||
// Used to indicate this switcher status: when "xian" is parseed, should
|
||||
// "xi an" also be extended. Default is false.
|
||||
// These cases include: xia, xian, xiang, zhuan, jiang..., etc. The string
|
||||
// should be valid for a FULL spelling, or a combination of two spellings,
|
||||
// first of which is a FULL id too. So even it is true, "da" will never be
|
||||
// split into "d a", because "d" is not a full spelling id.
|
||||
bool xi_an_enabled_;
|
||||
|
||||
// System dictionary.
|
||||
DictTrie* dict_trie_;
|
||||
|
||||
// User dictionary.
|
||||
AtomDictBase* user_dict_;
|
||||
|
||||
// Spelling parser.
|
||||
SpellingParser* spl_parser_;
|
||||
|
||||
// The maximum allowed length of spelling string (such as a Pinyin string).
|
||||
size_t max_sps_len_;
|
||||
|
||||
// The maximum allowed length of a result Chinese string.
|
||||
size_t max_hzs_len_;
|
||||
|
||||
// Pinyin string. Max length: kMaxRowNum - 1
|
||||
char pys_[kMaxRowNum];
|
||||
|
||||
// The length of the string that has been decoded successfully.
|
||||
size_t pys_decoded_len_;
|
||||
|
||||
// Shared buffer for multiple purposes.
|
||||
size_t *share_buf_;
|
||||
|
||||
MatrixNode *mtrx_nd_pool_;
|
||||
PoolPosType mtrx_nd_pool_used_; // How many nodes used in the pool
|
||||
DictMatchInfo *dmi_pool_;
|
||||
PoolPosType dmi_pool_used_; // How many items used in the pool
|
||||
|
||||
MatrixRow *matrix_; // The first row is for starting
|
||||
|
||||
DictExtPara *dep_; // Parameter used to extend DMI nodes.
|
||||
|
||||
NPredictItem *npre_items_; // Used to do prediction
|
||||
size_t npre_items_len_;
|
||||
|
||||
// The starting positions and lemma ids for the full sentence candidate.
|
||||
size_t lma_id_num_;
|
||||
uint16 lma_start_[kMaxRowNum]; // Counted in spelling ids.
|
||||
LemmaIdType lma_id_[kMaxRowNum];
|
||||
size_t fixed_lmas_;
|
||||
|
||||
// If fixed_lmas_ is bigger than i, Element i is used to indicate whether
|
||||
// the i'th lemma id in lma_id_ is the first candidate for that step.
|
||||
// If all candidates are the first one for that step, the whole string can be
|
||||
// decoded by the engine automatically, so no need to add it to user
|
||||
// dictionary. (We are considering to add it to user dictionary in the
|
||||
// future).
|
||||
uint8 fixed_lmas_no1_[kMaxRowNum];
|
||||
|
||||
// Composing phrase
|
||||
ComposingPhrase c_phrase_;
|
||||
|
||||
// If dmi_c_phrase_ is true, the decoder will try to match the
|
||||
// composing phrase (And definitely it will match successfully). If it
|
||||
// is false, the decoder will try to match lemmas items in dictionaries.
|
||||
bool dmi_c_phrase_;
|
||||
|
||||
// The starting positions and spelling ids for the first full sentence
|
||||
// candidate.
|
||||
size_t spl_id_num_; // Number of splling ids
|
||||
uint16 spl_start_[kMaxRowNum]; // Starting positions
|
||||
uint16 spl_id_[kMaxRowNum]; // Spelling ids
|
||||
// Used to remember the last fixed position, counted in Hanzi.
|
||||
size_t fixed_hzs_;
|
||||
|
||||
// Lemma Items with possibility score, two purposes:
|
||||
// 1. In Viterbi decoding, this buffer is used to get all possible candidates
|
||||
// for current step;
|
||||
// 2. When the search is done, this buffer is used to get candiates from the
|
||||
// first un-fixed step and show them to the user.
|
||||
LmaPsbItem lpi_items_[kMaxLmaPsbItems];
|
||||
size_t lpi_total_;
|
||||
|
||||
// Assign the pointers with NULL. The caller makes sure that all pointers are
|
||||
// not valid before calling it. This function only will be called in the
|
||||
// construction function and free_resource().
|
||||
void reset_pointers_to_null();
|
||||
|
||||
bool alloc_resource();
|
||||
|
||||
void free_resource();
|
||||
|
||||
// Reset the search space totally.
|
||||
bool reset_search0();
|
||||
|
||||
// Reset the search space from ch_pos step. For example, if the original
|
||||
// input Pinyin is "an", reset_search(1) will reset the search space to the
|
||||
// result of "a". If the given position is out of range, return false.
|
||||
// if clear_fixed_this_step is true, and the ch_pos step is a fixed step,
|
||||
// clear its fixed status. if clear_dmi_his_step is true, clear the DMI nodes.
|
||||
// If clear_mtrx_this_sTep is true, clear the mtrx nodes of this step.
|
||||
// The DMI nodes will be kept.
|
||||
//
|
||||
// Note: this function should not destroy content of pys_.
|
||||
bool reset_search(size_t ch_pos, bool clear_fixed_this_step,
|
||||
bool clear_dmi_this_step, bool clear_mtrx_this_step);
|
||||
|
||||
// Delete a part of the content in pys_.
|
||||
void del_in_pys(size_t start, size_t len);
|
||||
|
||||
// Delete a spelling id and its corresponding Chinese character, and merge
|
||||
// the fixed lemmas into the composing phrase.
|
||||
// del_spl_pos indicates which spelling id needs to be delete.
|
||||
// This function will update the lemma and spelling segmentation information.
|
||||
// The caller guarantees that fixed_lmas_ > 0 and del_spl_pos is within
|
||||
// the fixed lemmas.
|
||||
void merge_fixed_lmas(size_t del_spl_pos);
|
||||
|
||||
// Get spelling start posistions and ids. The result will be stored in
|
||||
// spl_id_num_, spl_start_[], spl_id_[].
|
||||
// fixed_hzs_ will be also assigned.
|
||||
void get_spl_start_id();
|
||||
|
||||
// Get all lemma ids with match the given spelling id stream(shorter than the
|
||||
// maximum length of a word).
|
||||
// If pfullsent is not NULL, means the full sentence candidate may be the
|
||||
// same with the coming lemma string, if so, remove that lemma.
|
||||
// The result is sorted in descendant order by the frequency score.
|
||||
size_t get_lpis(const uint16* splid_str, size_t splid_str_len,
|
||||
LmaPsbItem* lma_buf, size_t max_lma_buf,
|
||||
const char16 *pfullsent, bool sort_by_psb);
|
||||
|
||||
uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max);
|
||||
|
||||
uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
|
||||
uint16 splids_max, bool arg_valid);
|
||||
|
||||
|
||||
// Extend a DMI node with a spelling id. ext_len is the length of the rows
|
||||
// to extend, actually, it is the size of the spelling string of splid.
|
||||
// return value can be 1 or 0.
|
||||
// 1 means a new DMI is filled in (dmi_pool_used_ is the next blank DMI in
|
||||
// the pool).
|
||||
// 0 means either the dmi node can not be extended with splid, or the splid
|
||||
// is a Shengmu id, which is only used to get lpi_items, or the result node
|
||||
// in DictTrie has no son, it is not nccessary to keep the new DMI.
|
||||
//
|
||||
// This function modifies the content of lpi_items_ and lpi_total_.
|
||||
// lpi_items_ is used to get the LmaPsbItem list, lpi_total_ returns the size.
|
||||
// The function's returned value has no relation with the value of lpi_num.
|
||||
//
|
||||
// If dmi == NULL, this function will extend the root node of DictTrie
|
||||
//
|
||||
// This function will not change dmi_nd_pool_used_. Please change it after
|
||||
// calling this function if necessary.
|
||||
//
|
||||
// The caller should guarantees that NULL != dep.
|
||||
size_t extend_dmi(DictExtPara *dep, DictMatchInfo *dmi_s);
|
||||
|
||||
// Extend dmi for the composing phrase.
|
||||
size_t extend_dmi_c(DictExtPara *dep, DictMatchInfo *dmi_s);
|
||||
|
||||
// Extend a MatrixNode with the give LmaPsbItem list.
|
||||
// res_row is the destination row number.
|
||||
// This function does not change mtrx_nd_pool_used_. Please change it after
|
||||
// calling this function if necessary.
|
||||
// return 0 always.
|
||||
size_t extend_mtrx_nd(MatrixNode *mtrx_nd, LmaPsbItem lpi_items[],
|
||||
size_t lpi_num, PoolPosType dmi_fr, size_t res_row);
|
||||
|
||||
|
||||
// Try to find a dmi node at step_to position, and the found dmi node should
|
||||
// match the given spelling id strings.
|
||||
PoolPosType match_dmi(size_t step_to, uint16 spl_ids[], uint16 spl_id_num);
|
||||
|
||||
bool add_char(char ch);
|
||||
bool prepare_add_char(char ch);
|
||||
|
||||
// Called after prepare_add_char, so the input char has been saved.
|
||||
bool add_char_qwerty();
|
||||
|
||||
// Prepare candidates from the last fixed hanzi position.
|
||||
void prepare_candidates();
|
||||
|
||||
// Is the character in step pos a splitter character?
|
||||
// The caller guarantees that the position is valid.
|
||||
bool is_split_at(uint16 pos);
|
||||
|
||||
void fill_dmi(DictMatchInfo *dmi, MileStoneHandle *handles,
|
||||
PoolPosType dmi_fr,
|
||||
uint16 spl_id, uint16 node_num, unsigned char dict_level,
|
||||
bool splid_end_split, unsigned char splstr_len,
|
||||
unsigned char all_full_id);
|
||||
|
||||
size_t inner_predict(const char16 fixed_scis_ids[], uint16 scis_num,
|
||||
char16 predict_buf[][kMaxPredictSize + 1],
|
||||
size_t buf_len);
|
||||
|
||||
// Add the first candidate to the user dictionary.
|
||||
bool try_add_cand0_to_userdict();
|
||||
|
||||
// Add a user lemma to the user dictionary. This lemma is a subset of
|
||||
// candidate 0. lma_from is from which lemma in lma_ids_, lma_num is the
|
||||
// number of lemmas to be combined together as a new lemma. The caller
|
||||
// gurantees that the combined new lemma's length is less or equal to
|
||||
// kMaxLemmaSize.
|
||||
bool add_lma_to_userdict(uint16 lma_from, uint16 lma_num, float score);
|
||||
|
||||
// Update dictionary frequencies.
|
||||
void update_dict_freq();
|
||||
|
||||
void debug_print_dmi(PoolPosType dmi_pos, uint16 nest_level);
|
||||
|
||||
public:
|
||||
MatrixSearch();
|
||||
~MatrixSearch();
|
||||
|
||||
bool init(const char *fn_sys_dict, const char *fn_usr_dict);
|
||||
|
||||
bool init_fd(int sys_fd, long start_offset, long length,
|
||||
const char *fn_usr_dict);
|
||||
|
||||
void init_user_dictionary(const char *fn_usr_dict);
|
||||
|
||||
bool is_user_dictionary_enabled() const;
|
||||
|
||||
void set_max_lens(size_t max_sps_len, size_t max_hzs_len);
|
||||
|
||||
void close();
|
||||
|
||||
void flush_cache();
|
||||
|
||||
void set_xi_an_switch(bool xi_an_enabled);
|
||||
|
||||
bool get_xi_an_switch();
|
||||
|
||||
// Reset the search space. Equivalent to reset_search(0).
|
||||
// If inited, always return true;
|
||||
bool reset_search();
|
||||
|
||||
// Search a Pinyin string.
|
||||
// Return value is the position successfully parsed.
|
||||
size_t search(const char *py, size_t py_len);
|
||||
|
||||
// Used to delete something in the Pinyin string kept by the engine, and do
|
||||
// a re-search.
|
||||
// Return value is the new length of Pinyin string kept by the engine which
|
||||
// is parsed successfully.
|
||||
// If is_pos_in_splid is false, pos is used to indicate that pos-th Pinyin
|
||||
// character needs to be deleted. If is_pos_in_splid is true, all Pinyin
|
||||
// characters for pos-th spelling id needs to be deleted.
|
||||
// If the deleted character(s) is just after a fixed lemma or sub lemma in
|
||||
// composing phrase, clear_fixed_this_step indicates whether we needs to
|
||||
// unlock the last fixed lemma or sub lemma.
|
||||
// If is_pos_in_splid is false, and pos-th character is in the range for the
|
||||
// fixed lemmas or composing string, this function will do nothing and just
|
||||
// return the result of the previous search.
|
||||
size_t delsearch(size_t pos, bool is_pos_in_splid,
|
||||
bool clear_fixed_this_step);
|
||||
|
||||
// Get the number of candiates, called after search().
|
||||
size_t get_candidate_num();
|
||||
|
||||
// Get the Pinyin string stored by the engine.
|
||||
// *decoded_len returns the length of the successfully decoded string.
|
||||
const char* get_pystr(size_t *decoded_len);
|
||||
|
||||
// Get the spelling boundaries for the first sentence candidate.
|
||||
// Number of spellings will be returned. The number of valid elements in
|
||||
// spl_start is one more than the return value because the last one is used
|
||||
// to indicate the beginning of the next un-input speling.
|
||||
// For a Pinyin "women", the returned value is 2, spl_start is [0, 2, 5] .
|
||||
size_t get_spl_start(const uint16 *&spl_start);
|
||||
|
||||
// Get one candiate string. If full sentence candidate is available, it will
|
||||
// be the first one.
|
||||
char16* get_candidate(size_t cand_id, char16 *cand_str, size_t max_len);
|
||||
|
||||
// Get the first candiate, which is a "full sentence".
|
||||
// retstr_len is not NULL, it will be used to return the string length.
|
||||
// If only_unfixed is true, only unfixed part will be fetched.
|
||||
char16* get_candidate0(char16* cand_str, size_t max_len,
|
||||
uint16 *retstr_len, bool only_unfixed);
|
||||
|
||||
// Choose a candidate. The decoder will do a search after the fixed position.
|
||||
size_t choose(size_t cand_id);
|
||||
|
||||
// Cancel the last choosing operation, and return the new number of choices.
|
||||
size_t cancel_last_choice();
|
||||
|
||||
// Get the length of fixed Hanzis.
|
||||
size_t get_fixedlen();
|
||||
|
||||
size_t get_predicts(const char16 fixed_buf[],
|
||||
char16 predict_buf[][kMaxPredictSize + 1],
|
||||
size_t buf_len);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // PINYINIME_ANDPY_INCLUDE_MATRIXSEARCH_H__
|
||||
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
// For debug purpose. You can add a fixed version of qsort and bsearch functions
|
||||
// here so that the output will be totally the same under different platforms.
|
||||
|
||||
void myqsort(void *p, size_t n, size_t es,
|
||||
int (*cmp)(const void *, const void *)) {
|
||||
qsort(p,n, es, cmp);
|
||||
}
|
||||
|
||||
void *mybsearch(const void *k, const void *b,
|
||||
size_t n, size_t es,
|
||||
int (*cmp)(const void *, const void *)) {
|
||||
return bsearch(k, b, n, es, cmp);
|
||||
}
|
||||
} // namespace ime_pinyin
|
||||
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_MYSTDLIB_H__
|
||||
#define PINYINIME_INCLUDE_MYSTDLIB_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
void myqsort(void *p, size_t n, size_t es,
|
||||
int (*cmp)(const void *, const void *));
|
||||
|
||||
void *mybsearch(const void *key, const void *base,
|
||||
size_t nmemb, size_t size,
|
||||
int (*compar)(const void *, const void *));
|
||||
}
|
||||
|
||||
#endif // PINYINIME_INCLUDE_MYSTDLIB_H__
|
||||
@@ -0,0 +1,342 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include "mystdlib.h"
|
||||
#include "ngram.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
#define ADD_COUNT 0.3
|
||||
|
||||
int comp_double(const void *p1, const void *p2) {
|
||||
if (*static_cast<const double*>(p1) < *static_cast<const double*>(p2))
|
||||
return -1;
|
||||
if (*static_cast<const double*>(p1) > *static_cast<const double*>(p2))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline double distance(double freq, double code) {
|
||||
// return fabs(freq - code);
|
||||
return freq * fabs(log(freq) - log(code));
|
||||
}
|
||||
|
||||
// Find the index of the code value which is nearest to the given freq
|
||||
int qsearch_nearest(double code_book[], double freq, int start, int end) {
|
||||
if (start == end)
|
||||
return start;
|
||||
|
||||
if (start + 1 == end) {
|
||||
if (distance(freq, code_book[end]) > distance(freq, code_book[start]))
|
||||
return start;
|
||||
return end;
|
||||
}
|
||||
|
||||
int mid = (start + end) / 2;
|
||||
|
||||
if (code_book[mid] > freq)
|
||||
return qsearch_nearest(code_book, freq, start, mid);
|
||||
else
|
||||
return qsearch_nearest(code_book, freq, mid, end);
|
||||
}
|
||||
|
||||
size_t update_code_idx(double freqs[], size_t num, double code_book[],
|
||||
CODEBOOK_TYPE *code_idx) {
|
||||
size_t changed = 0;
|
||||
for (size_t pos = 0; pos < num; pos++) {
|
||||
CODEBOOK_TYPE idx;
|
||||
idx = qsearch_nearest(code_book, freqs[pos], 0, kCodeBookSize - 1);
|
||||
if (idx != code_idx[pos])
|
||||
changed++;
|
||||
code_idx[pos] = idx;
|
||||
}
|
||||
return changed;
|
||||
}
|
||||
|
||||
double recalculate_kernel(double freqs[], size_t num, double code_book[],
|
||||
CODEBOOK_TYPE *code_idx) {
|
||||
double ret = 0;
|
||||
|
||||
size_t *item_num = new size_t[kCodeBookSize];
|
||||
assert(item_num);
|
||||
memset(item_num, 0, sizeof(size_t) * kCodeBookSize);
|
||||
|
||||
double *cb_new = new double[kCodeBookSize];
|
||||
assert(cb_new);
|
||||
memset(cb_new, 0, sizeof(double) * kCodeBookSize);
|
||||
|
||||
for (size_t pos = 0; pos < num; pos++) {
|
||||
ret += distance(freqs[pos], code_book[code_idx[pos]]);
|
||||
|
||||
cb_new[code_idx[pos]] += freqs[pos];
|
||||
item_num[code_idx[pos]] += 1;
|
||||
}
|
||||
|
||||
for (size_t code = 0; code < kCodeBookSize; code++) {
|
||||
assert(item_num[code] > 0);
|
||||
code_book[code] = cb_new[code] / item_num[code];
|
||||
}
|
||||
|
||||
delete [] item_num;
|
||||
delete [] cb_new;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void iterate_codes(double freqs[], size_t num, double code_book[],
|
||||
CODEBOOK_TYPE *code_idx) {
|
||||
size_t iter_num = 0;
|
||||
double delta_last = 0;
|
||||
do {
|
||||
size_t changed = update_code_idx(freqs, num, code_book, code_idx);
|
||||
|
||||
double delta = recalculate_kernel(freqs, num, code_book, code_idx);
|
||||
|
||||
if (kPrintDebug0) {
|
||||
printf("---Unigram codebook iteration: %d : %d, %.9f\n",
|
||||
iter_num, changed, delta);
|
||||
}
|
||||
iter_num++;
|
||||
|
||||
if (iter_num > 1 &&
|
||||
(delta == 0 || fabs(delta_last - delta)/fabs(delta) < 0.000000001))
|
||||
break;
|
||||
delta_last = delta;
|
||||
} while (true);
|
||||
}
|
||||
|
||||
|
||||
NGram* NGram::instance_ = NULL;
|
||||
|
||||
NGram::NGram() {
|
||||
initialized_ = false;
|
||||
idx_num_ = 0;
|
||||
lma_freq_idx_ = NULL;
|
||||
sys_score_compensation_ = 0;
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
freq_codes_df_ = NULL;
|
||||
#endif
|
||||
freq_codes_ = NULL;
|
||||
}
|
||||
|
||||
NGram::~NGram() {
|
||||
if (NULL != lma_freq_idx_)
|
||||
free(lma_freq_idx_);
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
if (NULL != freq_codes_df_)
|
||||
free(freq_codes_df_);
|
||||
#endif
|
||||
|
||||
if (NULL != freq_codes_)
|
||||
free(freq_codes_);
|
||||
}
|
||||
|
||||
NGram& NGram::get_instance() {
|
||||
if (NULL == instance_)
|
||||
instance_ = new NGram();
|
||||
return *instance_;
|
||||
}
|
||||
|
||||
bool NGram::save_ngram(FILE *fp) {
|
||||
if (!initialized_ || NULL == fp)
|
||||
return false;
|
||||
|
||||
if (0 == idx_num_ || NULL == freq_codes_ || NULL == lma_freq_idx_)
|
||||
return false;
|
||||
|
||||
if (fwrite(&idx_num_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fwrite(freq_codes_, sizeof(LmaScoreType), kCodeBookSize, fp) !=
|
||||
kCodeBookSize)
|
||||
return false;
|
||||
|
||||
if (fwrite(lma_freq_idx_, sizeof(CODEBOOK_TYPE), idx_num_, fp) != idx_num_)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool NGram::load_ngram(FILE *fp) {
|
||||
if (NULL == fp)
|
||||
return false;
|
||||
|
||||
initialized_ = false;
|
||||
|
||||
if (fread(&idx_num_, sizeof(uint32), 1, fp) != 1 )
|
||||
return false;
|
||||
|
||||
if (NULL != lma_freq_idx_)
|
||||
free(lma_freq_idx_);
|
||||
|
||||
if (NULL != freq_codes_)
|
||||
free(freq_codes_);
|
||||
|
||||
lma_freq_idx_ = static_cast<CODEBOOK_TYPE*>
|
||||
(malloc(idx_num_ * sizeof(CODEBOOK_TYPE)));
|
||||
freq_codes_ = static_cast<LmaScoreType*>
|
||||
(malloc(kCodeBookSize * sizeof(LmaScoreType)));
|
||||
|
||||
if (NULL == lma_freq_idx_ || NULL == freq_codes_)
|
||||
return false;
|
||||
|
||||
if (fread(freq_codes_, sizeof(LmaScoreType), kCodeBookSize, fp) !=
|
||||
kCodeBookSize)
|
||||
return false;
|
||||
|
||||
if (fread(lma_freq_idx_, sizeof(CODEBOOK_TYPE), idx_num_, fp) != idx_num_)
|
||||
return false;
|
||||
|
||||
initialized_ = true;
|
||||
|
||||
total_freq_none_sys_ = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
void NGram::set_total_freq_none_sys(size_t freq_none_sys) {
|
||||
total_freq_none_sys_ = freq_none_sys;
|
||||
if (0 == total_freq_none_sys_) {
|
||||
sys_score_compensation_ = 0;
|
||||
} else {
|
||||
double factor = static_cast<double>(kSysDictTotalFreq) / (
|
||||
kSysDictTotalFreq + total_freq_none_sys_);
|
||||
sys_score_compensation_ = static_cast<float>(
|
||||
log(factor) * kLogValueAmplifier);
|
||||
}
|
||||
}
|
||||
|
||||
// The caller makes sure this oject is initialized.
|
||||
float NGram::get_uni_psb(LemmaIdType lma_id) {
|
||||
return static_cast<float>(freq_codes_[lma_freq_idx_[lma_id]]) +
|
||||
sys_score_compensation_;
|
||||
}
|
||||
|
||||
float NGram::convert_psb_to_score(double psb) {
|
||||
float score = static_cast<float>(
|
||||
log(psb) * static_cast<double>(kLogValueAmplifier));
|
||||
if (score > static_cast<float>(kMaxScore)) {
|
||||
score = static_cast<float>(kMaxScore);
|
||||
}
|
||||
return score;
|
||||
}
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
bool NGram::build_unigram(LemmaEntry *lemma_arr, size_t lemma_num,
|
||||
LemmaIdType next_idx_unused) {
|
||||
if (NULL == lemma_arr || 0 == lemma_num || next_idx_unused <= 1)
|
||||
return false;
|
||||
|
||||
double total_freq = 0;
|
||||
double *freqs = new double[next_idx_unused];
|
||||
if (NULL == freqs)
|
||||
return false;
|
||||
|
||||
freqs[0] = ADD_COUNT;
|
||||
total_freq += freqs[0];
|
||||
LemmaIdType idx_now = 0;
|
||||
for (size_t pos = 0; pos < lemma_num; pos++) {
|
||||
if (lemma_arr[pos].idx_by_hz == idx_now)
|
||||
continue;
|
||||
idx_now++;
|
||||
|
||||
assert(lemma_arr[pos].idx_by_hz == idx_now);
|
||||
|
||||
freqs[idx_now] = lemma_arr[pos].freq;
|
||||
if (freqs[idx_now] <= 0)
|
||||
freqs[idx_now] = 0.3;
|
||||
|
||||
total_freq += freqs[idx_now];
|
||||
}
|
||||
|
||||
double max_freq = 0;
|
||||
idx_num_ = idx_now + 1;
|
||||
assert(idx_now + 1 == next_idx_unused);
|
||||
|
||||
for (size_t pos = 0; pos < idx_num_; pos++) {
|
||||
freqs[pos] = freqs[pos] / total_freq;
|
||||
assert(freqs[pos] > 0);
|
||||
if (freqs[pos] > max_freq)
|
||||
max_freq = freqs[pos];
|
||||
}
|
||||
|
||||
// calculate the code book
|
||||
if (NULL == freq_codes_df_)
|
||||
freq_codes_df_ = new double[kCodeBookSize];
|
||||
assert(freq_codes_df_);
|
||||
memset(freq_codes_df_, 0, sizeof(double) * kCodeBookSize);
|
||||
|
||||
if (NULL == freq_codes_)
|
||||
freq_codes_ = new LmaScoreType[kCodeBookSize];
|
||||
assert(freq_codes_);
|
||||
memset(freq_codes_, 0, sizeof(LmaScoreType) * kCodeBookSize);
|
||||
|
||||
size_t freq_pos = 0;
|
||||
for (size_t code_pos = 0; code_pos < kCodeBookSize; code_pos++) {
|
||||
bool found = true;
|
||||
|
||||
while (found) {
|
||||
found = false;
|
||||
double cand = freqs[freq_pos];
|
||||
for (size_t i = 0; i < code_pos; i++)
|
||||
if (freq_codes_df_[i] == cand) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
if (found)
|
||||
freq_pos++;
|
||||
}
|
||||
|
||||
freq_codes_df_[code_pos] = freqs[freq_pos];
|
||||
freq_pos++;
|
||||
}
|
||||
|
||||
myqsort(freq_codes_df_, kCodeBookSize, sizeof(double), comp_double);
|
||||
|
||||
if (NULL == lma_freq_idx_)
|
||||
lma_freq_idx_ = new CODEBOOK_TYPE[idx_num_];
|
||||
assert(lma_freq_idx_);
|
||||
|
||||
iterate_codes(freqs, idx_num_, freq_codes_df_, lma_freq_idx_);
|
||||
|
||||
delete [] freqs;
|
||||
|
||||
if (kPrintDebug0) {
|
||||
printf("\n------Language Model Unigram Codebook------\n");
|
||||
}
|
||||
|
||||
for (size_t code_pos = 0; code_pos < kCodeBookSize; code_pos++) {
|
||||
double log_score = log(freq_codes_df_[code_pos]);
|
||||
float final_score = convert_psb_to_score(freq_codes_df_[code_pos]);
|
||||
if (kPrintDebug0) {
|
||||
printf("code:%d, probability:%.9f, log score:%.3f, final score: %.3f\n",
|
||||
code_pos, freq_codes_df_[code_pos], log_score, final_score);
|
||||
}
|
||||
freq_codes_[code_pos] = static_cast<LmaScoreType>(final_score);
|
||||
}
|
||||
|
||||
initialized_ = true;
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace ime_pinyin
|
||||
@@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_NGRAM_H__
|
||||
#define PINYINIME_INCLUDE_NGRAM_H__
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "./dictdef.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
typedef unsigned char CODEBOOK_TYPE;
|
||||
|
||||
static const size_t kCodeBookSize = 256;
|
||||
|
||||
class NGram {
|
||||
public:
|
||||
// The maximum score of a lemma item.
|
||||
static const LmaScoreType kMaxScore = 0x3fff;
|
||||
|
||||
// In order to reduce the storage size, the original log value is amplified by
|
||||
// kScoreAmplifier, and we use LmaScoreType to store.
|
||||
// After this process, an item with a lower score has a higher frequency.
|
||||
static const int kLogValueAmplifier = -800;
|
||||
|
||||
// System words' total frequency. It is not the real total frequency, instead,
|
||||
// It is only used to adjust system lemmas' scores when the user dictionary's
|
||||
// total frequency changes.
|
||||
// In this version, frequencies of system lemmas are fixed. We are considering
|
||||
// to make them changable in next version.
|
||||
static const size_t kSysDictTotalFreq = 100000000;
|
||||
|
||||
private:
|
||||
|
||||
static NGram* instance_;
|
||||
|
||||
bool initialized_;
|
||||
uint32 idx_num_;
|
||||
|
||||
size_t total_freq_none_sys_;
|
||||
|
||||
// Score compensation for system dictionary lemmas.
|
||||
// Because after user adds some user lemmas, the total frequency changes, and
|
||||
// we use this value to normalize the score.
|
||||
float sys_score_compensation_;
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
double *freq_codes_df_;
|
||||
#endif
|
||||
LmaScoreType *freq_codes_;
|
||||
CODEBOOK_TYPE *lma_freq_idx_;
|
||||
|
||||
public:
|
||||
NGram();
|
||||
~NGram();
|
||||
|
||||
static NGram& get_instance();
|
||||
|
||||
bool save_ngram(FILE *fp);
|
||||
bool load_ngram(FILE *fp);
|
||||
|
||||
// Set the total frequency of all none system dictionaries.
|
||||
void set_total_freq_none_sys(size_t freq_none_sys);
|
||||
|
||||
float get_uni_psb(LemmaIdType lma_id);
|
||||
|
||||
// Convert a probability to score. Actually, the score will be limited to
|
||||
// kMaxScore, but at runtime, we also need float expression to get accurate
|
||||
// value of the score.
|
||||
// After the conversion, a lower score indicates a higher probability of the
|
||||
// item.
|
||||
static float convert_psb_to_score(double psb);
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
// For constructing the unigram mode model.
|
||||
bool build_unigram(LemmaEntry *lemma_arr, size_t num,
|
||||
LemmaIdType next_idx_unused);
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
||||
#endif // PINYINIME_INCLUDE_NGRAM_H__
|
||||
@@ -0,0 +1,16 @@
|
||||
../build/googlepinyin/obj/dictbuilder.o
|
||||
../build/googlepinyin/obj/dictlist.o
|
||||
../build/googlepinyin/obj/dicttrie.o
|
||||
../build/googlepinyin/obj/lpicache.o
|
||||
../build/googlepinyin/obj/matrixsearch.o
|
||||
../build/googlepinyin/obj/mystdlib.o
|
||||
../build/googlepinyin/obj/ngram.o
|
||||
../build/googlepinyin/obj/pinyinime.o
|
||||
../build/googlepinyin/obj/searchutility.o
|
||||
../build/googlepinyin/obj/spellingtable.o
|
||||
../build/googlepinyin/obj/spellingtrie.o
|
||||
../build/googlepinyin/obj/splparser.o
|
||||
../build/googlepinyin/obj/sync.o
|
||||
../build/googlepinyin/obj/userdict.o
|
||||
../build/googlepinyin/obj/utf16char.o
|
||||
../build/googlepinyin/obj/utf16reader.o
|
||||
@@ -0,0 +1,16 @@
|
||||
../build/googlepinyin/obj/dictbuilder.o
|
||||
../build/googlepinyin/obj/dictlist.o
|
||||
../build/googlepinyin/obj/dicttrie.o
|
||||
../build/googlepinyin/obj/lpicache.o
|
||||
../build/googlepinyin/obj/matrixsearch.o
|
||||
../build/googlepinyin/obj/mystdlib.o
|
||||
../build/googlepinyin/obj/ngram.o
|
||||
../build/googlepinyin/obj/pinyinime.o
|
||||
../build/googlepinyin/obj/searchutility.o
|
||||
../build/googlepinyin/obj/spellingtable.o
|
||||
../build/googlepinyin/obj/spellingtrie.o
|
||||
../build/googlepinyin/obj/splparser.o
|
||||
../build/googlepinyin/obj/sync.o
|
||||
../build/googlepinyin/obj/userdict.o
|
||||
../build/googlepinyin/obj/utf16char.o
|
||||
../build/googlepinyin/obj/utf16reader.o
|
||||
@@ -0,0 +1,198 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "pinyinime.h"
|
||||
#include "dicttrie.h"
|
||||
#include "matrixsearch.h"
|
||||
#include "spellingtrie.h"
|
||||
#include <QString>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
using namespace ime_pinyin;
|
||||
|
||||
// The maximum number of the prediction items.
|
||||
static const size_t kMaxPredictNum = 500;
|
||||
|
||||
// Used to search Pinyin string and give the best candidate.
|
||||
MatrixSearch* matrix_search = NULL;
|
||||
|
||||
char16 predict_buf[kMaxPredictNum][kMaxPredictSize + 1];
|
||||
|
||||
bool im_open_decoder(const char *fn_sys_dict, const char *fn_usr_dict) {
|
||||
if (NULL != matrix_search)
|
||||
delete matrix_search;
|
||||
|
||||
matrix_search = new MatrixSearch();
|
||||
if (NULL == matrix_search) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return matrix_search->init(fn_sys_dict, fn_usr_dict);
|
||||
}
|
||||
|
||||
bool im_open_decoder_fd(int sys_fd, long start_offset, long length,
|
||||
const char *fn_usr_dict) {
|
||||
if (NULL != matrix_search)
|
||||
delete matrix_search;
|
||||
|
||||
matrix_search = new MatrixSearch();
|
||||
if (NULL == matrix_search)
|
||||
return false;
|
||||
|
||||
return matrix_search->init_fd(sys_fd, start_offset, length, fn_usr_dict);
|
||||
}
|
||||
|
||||
void im_close_decoder() {
|
||||
if (NULL != matrix_search) {
|
||||
matrix_search->close();
|
||||
delete matrix_search;
|
||||
}
|
||||
matrix_search = NULL;
|
||||
}
|
||||
|
||||
void im_set_max_lens(size_t max_sps_len, size_t max_hzs_len) {
|
||||
if (NULL != matrix_search) {
|
||||
matrix_search->set_max_lens(max_sps_len, max_hzs_len);
|
||||
}
|
||||
}
|
||||
|
||||
void im_flush_cache() {
|
||||
if (NULL != matrix_search)
|
||||
matrix_search->flush_cache();
|
||||
}
|
||||
|
||||
// To be updated.
|
||||
size_t im_search(const char* pybuf, size_t pylen) {
|
||||
if (NULL == matrix_search)
|
||||
return 0;
|
||||
|
||||
matrix_search->search(pybuf, pylen);
|
||||
return matrix_search->get_candidate_num();
|
||||
}
|
||||
|
||||
size_t im_delsearch(size_t pos, bool is_pos_in_splid,
|
||||
bool clear_fixed_this_step) {
|
||||
if (NULL == matrix_search)
|
||||
return 0;
|
||||
matrix_search->delsearch(pos, is_pos_in_splid, clear_fixed_this_step);
|
||||
return matrix_search->get_candidate_num();
|
||||
}
|
||||
|
||||
void im_reset_search() {
|
||||
if (NULL == matrix_search)
|
||||
return;
|
||||
|
||||
matrix_search->reset_search();
|
||||
}
|
||||
|
||||
// To be removed
|
||||
size_t im_add_letter(char ch) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char* im_get_sps_str(size_t *decoded_len) {
|
||||
if (NULL == matrix_search)
|
||||
return NULL;
|
||||
|
||||
return matrix_search->get_pystr(decoded_len);
|
||||
}
|
||||
|
||||
char16* im_get_candidate(size_t cand_id, char16* cand_str,
|
||||
size_t max_len) {
|
||||
if (NULL == matrix_search)
|
||||
return NULL;
|
||||
|
||||
return matrix_search->get_candidate(cand_id, cand_str, max_len);
|
||||
}
|
||||
|
||||
size_t im_get_spl_start_pos(const uint16 *&spl_start) {
|
||||
if (NULL == matrix_search)
|
||||
return 0;
|
||||
|
||||
return matrix_search->get_spl_start(spl_start);
|
||||
}
|
||||
|
||||
size_t im_choose(size_t choice_id) {
|
||||
if (NULL == matrix_search)
|
||||
return 0;
|
||||
|
||||
return matrix_search->choose(choice_id);
|
||||
}
|
||||
|
||||
size_t im_cancel_last_choice() {
|
||||
if (NULL == matrix_search)
|
||||
return 0;
|
||||
|
||||
return matrix_search->cancel_last_choice();
|
||||
}
|
||||
|
||||
size_t im_get_fixed_len() {
|
||||
if (NULL == matrix_search)
|
||||
return 0;
|
||||
|
||||
return matrix_search->get_fixedlen();
|
||||
}
|
||||
|
||||
// To be removed
|
||||
bool im_cancel_input() {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
size_t im_get_predicts(const char16 *his_buf,
|
||||
char16 (*&pre_buf)[kMaxPredictSize + 1]) {
|
||||
if (NULL == his_buf)
|
||||
return 0;
|
||||
|
||||
size_t fixed_len = utf16_strlen(his_buf);
|
||||
const char16 *fixed_ptr = his_buf;
|
||||
if (fixed_len > kMaxPredictSize) {
|
||||
fixed_ptr += fixed_len - kMaxPredictSize;
|
||||
fixed_len = kMaxPredictSize;
|
||||
}
|
||||
|
||||
pre_buf = predict_buf;
|
||||
return matrix_search->get_predicts(his_buf, pre_buf, kMaxPredictNum);
|
||||
}
|
||||
|
||||
void im_enable_shm_as_szm(bool enable) {
|
||||
SpellingTrie &spl_trie = SpellingTrie::get_instance();
|
||||
spl_trie.szm_enable_shm(enable);
|
||||
}
|
||||
|
||||
void im_enable_ym_as_szm(bool enable) {
|
||||
SpellingTrie &spl_trie = SpellingTrie::get_instance();
|
||||
spl_trie.szm_enable_ym(enable);
|
||||
}
|
||||
|
||||
void im_init_user_dictionary(const char *fn_usr_dict) {
|
||||
if (!matrix_search)
|
||||
return;
|
||||
matrix_search->flush_cache();
|
||||
matrix_search->init_user_dictionary(fn_usr_dict);
|
||||
}
|
||||
|
||||
bool im_is_user_dictionary_enabled(void) {
|
||||
return NULL != matrix_search ? matrix_search->is_user_dictionary_enabled() : false;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,223 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_ANDPYIME_H__
|
||||
#define PINYINIME_INCLUDE_ANDPYIME_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./dictdef.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
/**
|
||||
* Open the decoder engine via the system and user dictionary file names.
|
||||
*
|
||||
* @param fn_sys_dict The file name of the system dictionary.
|
||||
* @param fn_usr_dict The file name of the user dictionary.
|
||||
* @return true if open the decoder engine successfully.
|
||||
*/
|
||||
bool im_open_decoder(const char *fn_sys_dict, const char *fn_usr_dict);
|
||||
|
||||
/**
|
||||
* Open the decoder engine via the system dictionary FD and user dictionary
|
||||
* file name. Because on Android, the system dictionary is embedded in the
|
||||
* whole application apk file.
|
||||
*
|
||||
* @param sys_fd The file in which the system dictionary is embedded.
|
||||
* @param start_offset The starting position of the system dictionary in the
|
||||
* file sys_fd.
|
||||
* @param length The length of the system dictionary in the file sys_fd,
|
||||
* counted in byte.
|
||||
* @return true if succeed.
|
||||
*/
|
||||
bool im_open_decoder_fd(int sys_fd, long start_offset, long length,
|
||||
const char *fn_usr_dict);
|
||||
|
||||
/**
|
||||
* Close the decoder engine.
|
||||
*/
|
||||
void im_close_decoder();
|
||||
|
||||
/**
|
||||
* Set maximum limitations for decoding. If this function is not called,
|
||||
* default values will be used. For example, due to screen size limitation,
|
||||
* the UI engine of the IME can only show a certain number of letters(input)
|
||||
* to decode, and a certain number of Chinese characters(output). If after
|
||||
* user adds a new letter, the input or the output string is longer than the
|
||||
* limitations, the engine will discard the recent letter.
|
||||
*
|
||||
* @param max_sps_len Maximum length of the spelling string(Pinyin string).
|
||||
* @max_hzs_len Maximum length of the decoded Chinese character string.
|
||||
*/
|
||||
void im_set_max_lens(size_t max_sps_len, size_t max_hzs_len);
|
||||
|
||||
/**
|
||||
* Flush cached data to persistent memory. Because at runtime, in order to
|
||||
* achieve best performance, some data is only store in memory.
|
||||
*/
|
||||
void im_flush_cache();
|
||||
|
||||
/**
|
||||
* Use a spelling string(Pinyin string) to search. The engine will try to do
|
||||
* an incremental search based on its previous search result, so if the new
|
||||
* string has the same prefix with the previous one stored in the decoder,
|
||||
* the decoder will only continue the search from the end of the prefix.
|
||||
* If the caller needs to do a brand new search, please call im_reset_search()
|
||||
* first. Calling im_search() is equivalent to calling im_add_letter() one by
|
||||
* one.
|
||||
*
|
||||
* @param sps_buf The spelling string buffer to decode.
|
||||
* @param sps_len The length of the spelling string buffer.
|
||||
* @return The number of candidates.
|
||||
*/
|
||||
size_t im_search(const char* sps_buf, size_t sps_len);
|
||||
|
||||
/**
|
||||
* Make a delete operation in the current search result, and make research if
|
||||
* necessary.
|
||||
*
|
||||
* @param pos The posistion of char in spelling string to delete, or the
|
||||
* position of spelling id in result string to delete.
|
||||
* @param is_pos_in_splid Indicate whether the pos parameter is the position
|
||||
* in the spelling string, or the position in the result spelling id string.
|
||||
* @return The number of candidates.
|
||||
*/
|
||||
size_t im_delsearch(size_t pos, bool is_pos_in_splid,
|
||||
bool clear_fixed_this_step);
|
||||
|
||||
/**
|
||||
* Reset the previous search result.
|
||||
*/
|
||||
void im_reset_search();
|
||||
|
||||
/**
|
||||
* Add a Pinyin letter to the current spelling string kept by decoder. If the
|
||||
* decoder fails in adding the letter, it will do nothing. im_get_sps_str()
|
||||
* can be used to get the spelling string kept by decoder currently.
|
||||
*
|
||||
* @param ch The letter to add.
|
||||
* @return The number of candidates.
|
||||
*/
|
||||
size_t im_add_letter(char ch);
|
||||
|
||||
/**
|
||||
* Get the spelling string kept by the decoder.
|
||||
*
|
||||
* @param decoded_len Used to return how many characters in the spelling
|
||||
* string is successfully parsed.
|
||||
* @return The spelling string kept by the decoder.
|
||||
*/
|
||||
const char *im_get_sps_str(size_t *decoded_len);
|
||||
|
||||
/**
|
||||
* Get a candidate(or choice) string.
|
||||
*
|
||||
* @param cand_id The id to get a candidate. Started from 0. Usually, id 0
|
||||
* is a sentence-level candidate.
|
||||
* @param cand_str The buffer to store the candidate.
|
||||
* @param max_len The maximum length of the buffer.
|
||||
* @return cand_str if succeeds, otherwise NULL.
|
||||
*/
|
||||
char16* im_get_candidate(size_t cand_id, char16* cand_str,
|
||||
size_t max_len);
|
||||
|
||||
/**
|
||||
* Get the segmentation information(the starting positions) of the spelling
|
||||
* string.
|
||||
*
|
||||
* @param spl_start Used to return the starting posistions.
|
||||
* @return The number of spelling ids. If it is L, there will be L+1 valid
|
||||
* elements in spl_start, and spl_start[L] is the posistion after the end of
|
||||
* the last spelling id.
|
||||
*/
|
||||
size_t im_get_spl_start_pos(const uint16 *&spl_start);
|
||||
|
||||
/**
|
||||
* Choose a candidate and make it fixed. If the candidate does not match
|
||||
* the end of all spelling ids, new candidates will be provided from the
|
||||
* first unfixed position. If the candidate matches the end of the all
|
||||
* spelling ids, there will be only one new candidates, or the whole fixed
|
||||
* sentence.
|
||||
*
|
||||
* @param cand_id The id of candidate to select and make it fixed.
|
||||
* @return The number of candidates. If after the selection, the whole result
|
||||
* string has been fixed, there will be only one candidate.
|
||||
*/
|
||||
size_t im_choose(size_t cand_id);
|
||||
|
||||
/**
|
||||
* Cancel the last selection, or revert the last operation of im_choose().
|
||||
*
|
||||
* @return The number of candidates.
|
||||
*/
|
||||
size_t im_cancel_last_choice();
|
||||
|
||||
/**
|
||||
* Get the number of fixed spelling ids, or Chinese characters.
|
||||
*
|
||||
* @return The number of fixed spelling ids, of Chinese characters.
|
||||
*/
|
||||
size_t im_get_fixed_len();
|
||||
|
||||
/**
|
||||
* Cancel the input state and reset the search workspace.
|
||||
*/
|
||||
bool im_cancel_input();
|
||||
|
||||
/**
|
||||
* Get prediction candiates based on the given fixed Chinese string as the
|
||||
* history.
|
||||
*
|
||||
* @param his_buf The history buffer to do the prediction. It should be ended
|
||||
* with '\0'.
|
||||
* @param pre_buf Used to return prediction result list.
|
||||
* @return The number of predicted result string.
|
||||
*/
|
||||
size_t im_get_predicts(const char16 *his_buf,
|
||||
char16 (*&pre_buf)[kMaxPredictSize + 1]);
|
||||
|
||||
/**
|
||||
* Enable Shengmus in ShouZiMu mode.
|
||||
*/
|
||||
void im_enable_shm_as_szm(bool enable);
|
||||
|
||||
/**
|
||||
* Enable Yunmus in ShouZiMu mode.
|
||||
*/
|
||||
void im_enable_ym_as_szm(bool enable);
|
||||
|
||||
/**
|
||||
* Initializes or uninitializes the user dictionary.
|
||||
*
|
||||
* @param fn_usr_dict The file name of the user dictionary.
|
||||
*/
|
||||
void im_init_user_dictionary(const char *fn_usr_dict);
|
||||
|
||||
/**
|
||||
* Returns the current status of user dictinary.
|
||||
*/
|
||||
bool im_is_user_dictionary_enabled(void);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // PINYINIME_INCLUDE_ANDPYIME_H__
|
||||
@@ -0,0 +1,210 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include "mystdlib.h"
|
||||
#include "searchutility.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
bool is_system_lemma(LemmaIdType lma_id) {
|
||||
return (0 < lma_id && lma_id <= kSysDictIdEnd);
|
||||
}
|
||||
|
||||
bool is_user_lemma(LemmaIdType lma_id) {
|
||||
return (kUserDictIdStart <= lma_id && lma_id <= kUserDictIdEnd);
|
||||
}
|
||||
|
||||
bool is_composing_lemma(LemmaIdType lma_id) {
|
||||
return (kLemmaIdComposing == lma_id);
|
||||
}
|
||||
|
||||
int cmp_lpi_with_psb(const void *p1, const void *p2) {
|
||||
if ((static_cast<const LmaPsbItem*>(p1))->psb >
|
||||
(static_cast<const LmaPsbItem*>(p2))->psb)
|
||||
return 1;
|
||||
if ((static_cast<const LmaPsbItem*>(p1))->psb <
|
||||
(static_cast<const LmaPsbItem*>(p2))->psb)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmp_lpi_with_unified_psb(const void *p1, const void *p2) {
|
||||
const LmaPsbItem *item1 = static_cast<const LmaPsbItem*>(p1);
|
||||
const LmaPsbItem *item2 = static_cast<const LmaPsbItem*>(p2);
|
||||
|
||||
// The real unified psb is psb1 / lma_len1 and psb2 * lma_len2
|
||||
// But we use psb1 * lma_len2 and psb2 * lma_len1 to get better
|
||||
// precision.
|
||||
size_t up1 = item1->psb * (item2->lma_len);
|
||||
size_t up2 = item2->psb * (item1->lma_len);
|
||||
if (up1 < up2) {
|
||||
return -1;
|
||||
}
|
||||
if (up1 > up2) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmp_lpi_with_id(const void *p1, const void *p2) {
|
||||
if ((static_cast<const LmaPsbItem*>(p1))->id <
|
||||
(static_cast<const LmaPsbItem*>(p2))->id)
|
||||
return -1;
|
||||
if ((static_cast<const LmaPsbItem*>(p1))->id >
|
||||
(static_cast<const LmaPsbItem*>(p2))->id)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmp_lpi_with_hanzi(const void *p1, const void *p2) {
|
||||
if ((static_cast<const LmaPsbItem*>(p1))->hanzi <
|
||||
(static_cast<const LmaPsbItem*>(p2))->hanzi)
|
||||
return -1;
|
||||
if ((static_cast<const LmaPsbItem*>(p1))->hanzi >
|
||||
(static_cast<const LmaPsbItem*>(p2))->hanzi)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmp_lpsi_with_str(const void *p1, const void *p2) {
|
||||
return utf16_strcmp((static_cast<const LmaPsbStrItem*>(p1))->str,
|
||||
(static_cast<const LmaPsbStrItem*>(p2))->str);
|
||||
}
|
||||
|
||||
|
||||
int cmp_hanzis_1(const void *p1, const void *p2) {
|
||||
if (*static_cast<const char16*>(p1) <
|
||||
*static_cast<const char16*>(p2))
|
||||
return -1;
|
||||
|
||||
if (*static_cast<const char16*>(p1) >
|
||||
*static_cast<const char16*>(p2))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmp_hanzis_2(const void *p1, const void *p2) {
|
||||
return utf16_strncmp(static_cast<const char16*>(p1),
|
||||
static_cast<const char16*>(p2), 2);
|
||||
}
|
||||
|
||||
int cmp_hanzis_3(const void *p1, const void *p2) {
|
||||
return utf16_strncmp(static_cast<const char16*>(p1),
|
||||
static_cast<const char16*>(p2), 3);
|
||||
}
|
||||
|
||||
int cmp_hanzis_4(const void *p1, const void *p2) {
|
||||
return utf16_strncmp(static_cast<const char16*>(p1),
|
||||
static_cast<const char16*>(p2), 4);
|
||||
}
|
||||
|
||||
int cmp_hanzis_5(const void *p1, const void *p2) {
|
||||
return utf16_strncmp(static_cast<const char16*>(p1),
|
||||
static_cast<const char16*>(p2), 5);
|
||||
}
|
||||
|
||||
int cmp_hanzis_6(const void *p1, const void *p2) {
|
||||
return utf16_strncmp(static_cast<const char16*>(p1),
|
||||
static_cast<const char16*>(p2), 6);
|
||||
}
|
||||
|
||||
int cmp_hanzis_7(const void *p1, const void *p2) {
|
||||
return utf16_strncmp(static_cast<const char16*>(p1),
|
||||
static_cast<const char16*>(p2), 7);
|
||||
}
|
||||
|
||||
int cmp_hanzis_8(const void *p1, const void *p2) {
|
||||
return utf16_strncmp(static_cast<const char16*>(p1),
|
||||
static_cast<const char16*>(p2), 8);
|
||||
}
|
||||
|
||||
int cmp_npre_by_score(const void *p1, const void *p2) {
|
||||
if ((static_cast<const NPredictItem*>(p1))->psb >
|
||||
(static_cast<const NPredictItem*>(p2))->psb)
|
||||
return 1;
|
||||
|
||||
if ((static_cast<const NPredictItem*>(p1))->psb <
|
||||
(static_cast<const NPredictItem*>(p2))->psb)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmp_npre_by_hislen_score(const void *p1, const void *p2) {
|
||||
if ((static_cast<const NPredictItem*>(p1))->his_len <
|
||||
(static_cast<const NPredictItem*>(p2))->his_len)
|
||||
return 1;
|
||||
|
||||
if ((static_cast<const NPredictItem*>(p1))->his_len >
|
||||
(static_cast<const NPredictItem*>(p2))->his_len)
|
||||
return -1;
|
||||
|
||||
if ((static_cast<const NPredictItem*>(p1))->psb >
|
||||
(static_cast<const NPredictItem*>(p2))->psb)
|
||||
return 1;
|
||||
|
||||
if ((static_cast<const NPredictItem*>(p1))->psb <
|
||||
(static_cast<const NPredictItem*>(p2))->psb)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmp_npre_by_hanzi_score(const void *p1, const void *p2) {
|
||||
int ret_v = (utf16_strncmp((static_cast<const NPredictItem*>(p1))->pre_hzs,
|
||||
(static_cast<const NPredictItem*>(p2))->pre_hzs, kMaxPredictSize));
|
||||
if (0 != ret_v)
|
||||
return ret_v;
|
||||
|
||||
if ((static_cast<const NPredictItem*>(p1))->psb >
|
||||
(static_cast<const NPredictItem*>(p2))->psb)
|
||||
return 1;
|
||||
|
||||
if ((static_cast<const NPredictItem*>(p1))->psb <
|
||||
(static_cast<const NPredictItem*>(p2))->psb)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t remove_duplicate_npre(NPredictItem *npre_items, size_t npre_num) {
|
||||
if (NULL == npre_items || 0 == npre_num)
|
||||
return 0;
|
||||
|
||||
myqsort(npre_items, npre_num, sizeof(NPredictItem), cmp_npre_by_hanzi_score);
|
||||
|
||||
size_t remain_num = 1; // The first one is reserved.
|
||||
for (size_t pos = 1; pos < npre_num; pos++) {
|
||||
if (utf16_strncmp(npre_items[pos].pre_hzs,
|
||||
npre_items[remain_num - 1].pre_hzs,
|
||||
kMaxPredictSize) != 0) {
|
||||
if (remain_num != pos) {
|
||||
npre_items[remain_num] = npre_items[pos];
|
||||
}
|
||||
remain_num++;
|
||||
}
|
||||
}
|
||||
return remain_num;
|
||||
}
|
||||
|
||||
size_t align_to_size_t(size_t size) {
|
||||
size_t s = sizeof(size_t);
|
||||
return (size + s -1) / s * s;
|
||||
}
|
||||
|
||||
} // namespace ime_pinyin
|
||||
@@ -0,0 +1,142 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__
|
||||
#define PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./spellingtrie.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
// Type used to identify the size of a pool, such as id pool, etc.
|
||||
typedef uint16 PoolPosType;
|
||||
|
||||
// Type used to identify a parsing mile stone in an atom dictionary.
|
||||
typedef uint16 MileStoneHandle;
|
||||
|
||||
// Type used to express a lemma and its probability score.
|
||||
typedef struct {
|
||||
size_t id:(kLemmaIdSize * 8);
|
||||
size_t lma_len:4;
|
||||
uint16 psb; // The score, the lower psb, the higher possibility.
|
||||
// For single character items, we may also need Hanzi.
|
||||
// For multiple characer items, ignore it.
|
||||
char16 hanzi;
|
||||
} LmaPsbItem, *PLmaPsbItem;
|
||||
|
||||
// LmaPsbItem extended with string.
|
||||
typedef struct {
|
||||
LmaPsbItem lpi;
|
||||
char16 str[kMaxLemmaSize + 1];
|
||||
} LmaPsbStrItem, *PLmaPsbStrItem;
|
||||
|
||||
|
||||
typedef struct {
|
||||
float psb;
|
||||
char16 pre_hzs[kMaxPredictSize];
|
||||
uint16 his_len; // The length of the history used to do the prediction.
|
||||
} NPredictItem, *PNPredictItem;
|
||||
|
||||
// Parameter structure used to extend in a dictionary. All dictionaries
|
||||
// receives the same DictExtPara and a dictionary specific MileStoneHandle for
|
||||
// extending.
|
||||
//
|
||||
// When the user inputs a new character, AtomDictBase::extend_dict() will be
|
||||
// called at least once for each dictionary.
|
||||
//
|
||||
// For example, when the user inputs "wm", extend_dict() will be called twice,
|
||||
// and the DictExtPara parameter are as follows respectively:
|
||||
// 1. splids = {w, m}; splids_extended = 1; ext_len = 1; step_no = 1;
|
||||
// splid_end_split = false; id_start = wa(the first id start with 'w');
|
||||
// id_num = number of ids starting with 'w'.
|
||||
// 2. splids = {m}; splids_extended = 0; ext_len = 1; step_no = 1;
|
||||
// splid_end_split = false; id_start = wa; id_num = number of ids starting with
|
||||
// 'w'.
|
||||
//
|
||||
// For string "women", one of the cases of the DictExtPara parameter is:
|
||||
// splids = {wo, men}, splids_extended = 1, ext_len = 3 (length of "men"),
|
||||
// step_no = 4; splid_end_split = false; id_start = men, id_num = 1.
|
||||
//
|
||||
typedef struct {
|
||||
// Spelling ids for extending, there are splids_extended + 1 ids in the
|
||||
// buffer.
|
||||
// For a normal lemma, there can only be kMaxLemmaSize spelling ids in max,
|
||||
// but for a composing phrase, there can kMaxSearchSteps spelling ids.
|
||||
uint16 splids[kMaxSearchSteps];
|
||||
|
||||
// Number of ids that have been used before. splids[splids_extended] is the
|
||||
// newly added id for the current extension.
|
||||
uint16 splids_extended;
|
||||
|
||||
// The step span of the extension. It is also the size of the string for
|
||||
// the newly added spelling id.
|
||||
uint16 ext_len;
|
||||
|
||||
// The step number for the current extension. It is also the ending position
|
||||
// in the input Pinyin string for the substring of spelling ids in splids[].
|
||||
// For example, when the user inputs "women", step_no = 4.
|
||||
// This parameter may useful to manage the MileStoneHandle list for each
|
||||
// step. When the user deletes a character from the string, MileStoneHandle
|
||||
// objects for the the steps after that character should be reset; when the
|
||||
// user begins a new string, all MileStoneHandle objects should be reset.
|
||||
uint16 step_no;
|
||||
|
||||
// Indicate whether the newly added spelling ends with a splitting character
|
||||
bool splid_end_split;
|
||||
|
||||
// If the newly added id is a half id, id_start is the first id of the
|
||||
// corresponding full ids; if the newly added id is a full id, id_start is
|
||||
// that id.
|
||||
uint16 id_start;
|
||||
|
||||
// If the newly added id is a half id, id_num is the number of corresponding
|
||||
// ids; if it is a full id, id_num == 1.
|
||||
uint16 id_num;
|
||||
}DictExtPara, *PDictExtPara;
|
||||
|
||||
bool is_system_lemma(LemmaIdType lma_id);
|
||||
bool is_user_lemma(LemmaIdType lma_id);
|
||||
bool is_composing_lemma(LemmaIdType lma_id);
|
||||
|
||||
int cmp_lpi_with_psb(const void *p1, const void *p2);
|
||||
int cmp_lpi_with_unified_psb(const void *p1, const void *p2);
|
||||
int cmp_lpi_with_id(const void *p1, const void *p2);
|
||||
int cmp_lpi_with_hanzi(const void *p1, const void *p2);
|
||||
|
||||
int cmp_lpsi_with_str(const void *p1, const void *p2);
|
||||
|
||||
int cmp_hanzis_1(const void *p1, const void *p2);
|
||||
int cmp_hanzis_2(const void *p1, const void *p2);
|
||||
int cmp_hanzis_3(const void *p1, const void *p2);
|
||||
int cmp_hanzis_4(const void *p1, const void *p2);
|
||||
int cmp_hanzis_5(const void *p1, const void *p2);
|
||||
int cmp_hanzis_6(const void *p1, const void *p2);
|
||||
int cmp_hanzis_7(const void *p1, const void *p2);
|
||||
int cmp_hanzis_8(const void *p1, const void *p2);
|
||||
|
||||
int cmp_npre_by_score(const void *p1, const void *p2);
|
||||
int cmp_npre_by_hislen_score(const void *p1, const void *p2);
|
||||
int cmp_npre_by_hanzi_score(const void *p1, const void *p2);
|
||||
|
||||
|
||||
size_t remove_duplicate_npre(NPredictItem *npre_items, size_t npre_num);
|
||||
|
||||
size_t align_to_size_t(size_t size);
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__
|
||||
@@ -0,0 +1,313 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include "spellingtable.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
|
||||
const char SpellingTable::
|
||||
kNotSupportList[kNotSupportNum][kMaxSpellingSize + 1] = {"HM", "HNG", "NG"};
|
||||
|
||||
// "" is the biggest, so that all empty strings will be moved to the end
|
||||
// _eb mean empty is biggest
|
||||
int compare_raw_spl_eb(const void* p1, const void* p2) {
|
||||
if ('\0' == (static_cast<const RawSpelling*>(p1))->str[0])
|
||||
return 1;
|
||||
|
||||
if ('\0' == (static_cast<const RawSpelling*>(p2))->str[0])
|
||||
return -1;
|
||||
|
||||
return strcmp((static_cast<const RawSpelling*>(p1))->str,
|
||||
(static_cast<const RawSpelling*>(p2))->str);
|
||||
}
|
||||
|
||||
size_t get_odd_next(size_t value) {
|
||||
size_t v_next = value;
|
||||
while (true) {
|
||||
size_t v_next_sqrt = (size_t)sqrt(v_next);
|
||||
|
||||
bool is_odd = true;
|
||||
for (size_t v_dv = 2; v_dv < v_next_sqrt + 1; v_dv++) {
|
||||
if (v_next % v_dv == 0) {
|
||||
is_odd = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_odd)
|
||||
return v_next;
|
||||
|
||||
v_next++;
|
||||
}
|
||||
|
||||
// never reach here
|
||||
return 0;
|
||||
}
|
||||
|
||||
SpellingTable::SpellingTable() {
|
||||
need_score_ = false;
|
||||
raw_spellings_ = NULL;
|
||||
spelling_buf_ = NULL;
|
||||
spelling_num_ = 0;
|
||||
total_freq_ = 0;
|
||||
frozen_ = true;
|
||||
}
|
||||
|
||||
SpellingTable::~SpellingTable() {
|
||||
free_resource();
|
||||
}
|
||||
|
||||
size_t SpellingTable::get_hash_pos(const char* spelling_str) {
|
||||
size_t hash_pos = 0;
|
||||
for (size_t pos = 0; pos < spelling_size_; pos++) {
|
||||
if ('\0' == spelling_str[pos])
|
||||
break;
|
||||
hash_pos += (size_t)spelling_str[pos];
|
||||
}
|
||||
|
||||
hash_pos = hash_pos % spelling_max_num_;
|
||||
return hash_pos;
|
||||
}
|
||||
|
||||
size_t SpellingTable::hash_pos_next(size_t hash_pos) {
|
||||
hash_pos += 123;
|
||||
hash_pos = hash_pos % spelling_max_num_;
|
||||
return hash_pos;
|
||||
}
|
||||
|
||||
void SpellingTable::free_resource() {
|
||||
if (NULL != raw_spellings_)
|
||||
delete [] raw_spellings_;
|
||||
raw_spellings_ = NULL;
|
||||
|
||||
if (NULL != spelling_buf_)
|
||||
delete [] spelling_buf_;
|
||||
spelling_buf_ = NULL;
|
||||
}
|
||||
|
||||
bool SpellingTable::init_table(size_t pure_spl_size, size_t spl_max_num,
|
||||
bool need_score) {
|
||||
if (pure_spl_size == 0 || spl_max_num ==0)
|
||||
return false;
|
||||
|
||||
need_score_ = need_score;
|
||||
|
||||
free_resource();
|
||||
|
||||
spelling_size_ = pure_spl_size + 1;
|
||||
if (need_score)
|
||||
spelling_size_ += 1;
|
||||
spelling_max_num_ = get_odd_next(spl_max_num);
|
||||
spelling_num_ = 0;
|
||||
|
||||
raw_spellings_ = new RawSpelling[spelling_max_num_];
|
||||
spelling_buf_ = new char[spelling_max_num_ * (spelling_size_)];
|
||||
if (NULL == raw_spellings_ || NULL == spelling_buf_) {
|
||||
free_resource();
|
||||
return false;
|
||||
}
|
||||
|
||||
memset(raw_spellings_, 0, spelling_max_num_ * sizeof(RawSpelling));
|
||||
memset(spelling_buf_, 0, spelling_max_num_ * (spelling_size_));
|
||||
frozen_ = false;
|
||||
total_freq_ = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SpellingTable::put_spelling(const char* spelling_str, double freq) {
|
||||
if (frozen_ || NULL == spelling_str)
|
||||
return false;
|
||||
|
||||
for (size_t pos = 0; pos < kNotSupportNum; pos++) {
|
||||
if (strcmp(spelling_str, kNotSupportList[pos]) == 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
total_freq_ += freq;
|
||||
|
||||
size_t hash_pos = get_hash_pos(spelling_str);
|
||||
|
||||
raw_spellings_[hash_pos].str[spelling_size_ - 1] = '\0';
|
||||
|
||||
if (strncmp(raw_spellings_[hash_pos].str, spelling_str,
|
||||
spelling_size_ - 1) == 0) {
|
||||
raw_spellings_[hash_pos].freq += freq;
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t hash_pos_ori = hash_pos;
|
||||
|
||||
while (true) {
|
||||
if (strncmp(raw_spellings_[hash_pos].str,
|
||||
spelling_str, spelling_size_ - 1) == 0) {
|
||||
raw_spellings_[hash_pos].freq += freq;
|
||||
return true;
|
||||
}
|
||||
|
||||
if ('\0' == raw_spellings_[hash_pos].str[0]) {
|
||||
raw_spellings_[hash_pos].freq += freq;
|
||||
strncpy(raw_spellings_[hash_pos].str, spelling_str, spelling_size_ - 1);
|
||||
raw_spellings_[hash_pos].str[spelling_size_ - 1] = '\0';
|
||||
spelling_num_++;
|
||||
return true;
|
||||
}
|
||||
|
||||
hash_pos = hash_pos_next(hash_pos);
|
||||
if (hash_pos_ori == hash_pos)
|
||||
return false;
|
||||
}
|
||||
|
||||
// never reach here
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SpellingTable::contain(const char* spelling_str) {
|
||||
if (NULL == spelling_str || NULL == spelling_buf_ || frozen_)
|
||||
return false;
|
||||
|
||||
size_t hash_pos = get_hash_pos(spelling_str);
|
||||
|
||||
if ('\0' == raw_spellings_[hash_pos].str[0])
|
||||
return false;
|
||||
|
||||
if (strncmp(raw_spellings_[hash_pos].str, spelling_str, spelling_size_ - 1)
|
||||
== 0)
|
||||
return true;
|
||||
|
||||
size_t hash_pos_ori = hash_pos;
|
||||
|
||||
while (true) {
|
||||
hash_pos = hash_pos_next(hash_pos);
|
||||
if (hash_pos_ori == hash_pos)
|
||||
return false;
|
||||
|
||||
if ('\0' == raw_spellings_[hash_pos].str[0])
|
||||
return false;
|
||||
|
||||
if (strncmp(raw_spellings_[hash_pos].str, spelling_str, spelling_size_ - 1)
|
||||
== 0)
|
||||
return true;
|
||||
}
|
||||
|
||||
// never reach here
|
||||
return false;
|
||||
}
|
||||
|
||||
const char* SpellingTable::arrange(size_t *item_size, size_t *spl_num) {
|
||||
if (NULL == raw_spellings_ || NULL == spelling_buf_ ||
|
||||
NULL == item_size || NULL == spl_num)
|
||||
return NULL;
|
||||
|
||||
qsort(raw_spellings_, spelling_max_num_, sizeof(RawSpelling),
|
||||
compare_raw_spl_eb);
|
||||
|
||||
// After sorting, only the first spelling_num_ items are valid.
|
||||
// Copy them to the destination buffer.
|
||||
for (size_t pos = 0; pos < spelling_num_; pos++) {
|
||||
strncpy(spelling_buf_ + pos * spelling_size_, raw_spellings_[pos].str,
|
||||
spelling_size_);
|
||||
}
|
||||
|
||||
if (need_score_) {
|
||||
if (kPrintDebug0)
|
||||
printf("------------Spelling Possiblities--------------\n");
|
||||
|
||||
double max_score = 0;
|
||||
double min_score = 0;
|
||||
|
||||
// After sorting, only the first spelling_num_ items are valid.
|
||||
for (size_t pos = 0; pos < spelling_num_; pos++) {
|
||||
raw_spellings_[pos].freq /= total_freq_;
|
||||
if (need_score_) {
|
||||
if (0 == pos) {
|
||||
max_score = raw_spellings_[0].freq;
|
||||
min_score = max_score;
|
||||
} else {
|
||||
if (raw_spellings_[pos].freq > max_score)
|
||||
max_score = raw_spellings_[pos].freq;
|
||||
if (raw_spellings_[pos].freq < min_score)
|
||||
min_score = raw_spellings_[pos].freq;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (kPrintDebug0)
|
||||
printf("-----max psb: %f, min psb: %f\n", max_score, min_score);
|
||||
|
||||
max_score = log(max_score);
|
||||
min_score = log(min_score);
|
||||
|
||||
if (kPrintDebug0)
|
||||
printf("-----max log value: %f, min log value: %f\n",
|
||||
max_score, min_score);
|
||||
|
||||
// The absolute value of min_score is bigger than that of max_score because
|
||||
// both of them are negative after log function.
|
||||
score_amplifier_ = 1.0 * 255 / min_score;
|
||||
|
||||
double average_score = 0;
|
||||
for (size_t pos = 0; pos < spelling_num_; pos++) {
|
||||
double score = log(raw_spellings_[pos].freq) * score_amplifier_;
|
||||
assert(score >= 0);
|
||||
|
||||
average_score += score;
|
||||
|
||||
// Because of calculation precision issue, score might be a little bigger
|
||||
// than 255 after being amplified.
|
||||
if (score > 255)
|
||||
score = 255;
|
||||
char *this_spl_buf = spelling_buf_ + pos * spelling_size_;
|
||||
this_spl_buf[spelling_size_ - 1] =
|
||||
static_cast<char>((unsigned char)score);
|
||||
|
||||
if (kPrintDebug0) {
|
||||
printf("---pos:%d, %s, psb:%d\n", pos, this_spl_buf,
|
||||
(unsigned char)this_spl_buf[spelling_size_ -1]);
|
||||
}
|
||||
}
|
||||
average_score /= spelling_num_;
|
||||
assert(average_score <= 255);
|
||||
average_score_ = static_cast<uint8>(average_score);
|
||||
|
||||
if (kPrintDebug0)
|
||||
printf("\n----Score Amplifier: %f, Average Score: %d\n", score_amplifier_,
|
||||
average_score_);
|
||||
}
|
||||
|
||||
*item_size = spelling_size_;
|
||||
*spl_num = spelling_num_;
|
||||
frozen_ = true;
|
||||
return spelling_buf_;
|
||||
}
|
||||
|
||||
float SpellingTable::get_score_amplifier() {
|
||||
return static_cast<float>(score_amplifier_);
|
||||
}
|
||||
|
||||
unsigned char SpellingTable::get_average_score() {
|
||||
return average_score_;
|
||||
}
|
||||
|
||||
#endif // ___BUILD_MODEL___
|
||||
} // namespace ime_pinyin
|
||||
@@ -0,0 +1,111 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_SPELLINGTABLE_H__
|
||||
#define PINYINIME_INCLUDE_SPELLINGTABLE_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./dictdef.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
|
||||
const size_t kMaxSpellingSize = kMaxPinyinSize;
|
||||
|
||||
typedef struct {
|
||||
char str[kMaxSpellingSize + 1];
|
||||
double freq;
|
||||
} RawSpelling, *PRawSpelling;
|
||||
|
||||
// This class is used to store the spelling strings
|
||||
// The length of the input spelling string should be less or equal to the
|
||||
// spelling_size_ (set by init_table). If the input string is too long,
|
||||
// we only keep its first spelling_size_ chars.
|
||||
class SpellingTable {
|
||||
private:
|
||||
static const size_t kNotSupportNum = 3;
|
||||
static const char kNotSupportList[kNotSupportNum][kMaxSpellingSize + 1];
|
||||
|
||||
bool need_score_;
|
||||
|
||||
size_t spelling_max_num_;
|
||||
|
||||
RawSpelling *raw_spellings_;
|
||||
|
||||
// Used to store spelling strings. If the spelling table needs to calculate
|
||||
// score, an extra char after each spelling string is the score.
|
||||
// An item with a lower score has a higher probability.
|
||||
char *spelling_buf_;
|
||||
size_t spelling_size_;
|
||||
|
||||
double total_freq_;
|
||||
|
||||
size_t spelling_num_;
|
||||
|
||||
double score_amplifier_;
|
||||
|
||||
unsigned char average_score_;
|
||||
|
||||
// If frozen is true, put_spelling() and contain() are not allowed to call.
|
||||
bool frozen_;
|
||||
|
||||
size_t get_hash_pos(const char* spelling_str);
|
||||
size_t hash_pos_next(size_t hash_pos);
|
||||
void free_resource();
|
||||
public:
|
||||
SpellingTable();
|
||||
~SpellingTable();
|
||||
|
||||
// pure_spl_size is the pure maximum spelling string size. For example,
|
||||
// "zhuang" is the longgest item in Pinyin, so pure_spl_size should be 6.
|
||||
// spl_max_num is the maximum number of spelling strings to store.
|
||||
// need_score is used to indicate whether the caller needs to calculate a
|
||||
// score for each spelling.
|
||||
bool init_table(size_t pure_spl_size, size_t spl_max_num, bool need_score);
|
||||
|
||||
// Put a spelling string to the table.
|
||||
// It always returns false if called after arrange() withtout a new
|
||||
// init_table() operation.
|
||||
// freq is the spelling's occuring count.
|
||||
// If the spelling has been in the table, occuring count will accumulated.
|
||||
bool put_spelling(const char* spelling_str, double spl_count);
|
||||
|
||||
// Test whether a spelling string is in the table.
|
||||
// It always returns false, when being called after arrange() withtout a new
|
||||
// init_table() operation.
|
||||
bool contain(const char* spelling_str);
|
||||
|
||||
// Sort the spelling strings and put them from the begin of the buffer.
|
||||
// Return the pointer of the sorted spelling strings.
|
||||
// item_size and spl_num return the item size and number of spelling.
|
||||
// Because each spelling uses a '\0' as terminator, the returned item_size is
|
||||
// at least one char longer than the spl_size parameter specified by
|
||||
// init_table(). If the table is initialized to calculate score, item_size
|
||||
// will be increased by 1, and current_spl_str[item_size - 1] stores an
|
||||
// unsinged char score.
|
||||
// An item with a lower score has a higher probability.
|
||||
// Do not call put_spelling() and contains() after arrange().
|
||||
const char* arrange(size_t *item_size, size_t *spl_num);
|
||||
|
||||
float get_score_amplifier();
|
||||
|
||||
unsigned char get_average_score();
|
||||
};
|
||||
#endif // ___BUILD_MODEL___
|
||||
}
|
||||
|
||||
#endif // PINYINIME_INCLUDE_SPELLINGTABLE_H__
|
||||
@@ -0,0 +1,832 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include "dictdef.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#define snprintf _snprintf
|
||||
#endif
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
#include "spellingtable.h"
|
||||
#endif
|
||||
|
||||
#include "spellingtrie.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
SpellingTrie* SpellingTrie::instance_ = NULL;
|
||||
|
||||
// z/c/s is for Zh/Ch/Sh
|
||||
const char SpellingTrie::kHalfId2Sc_[kFullSplIdStart + 1] =
|
||||
"0ABCcDEFGHIJKLMNOPQRSsTUVWXYZz";
|
||||
|
||||
// Bit 0 : is it a Shengmu char?
|
||||
// Bit 1 : is it a Yunmu char? (one char is a Yunmu)
|
||||
// Bit 2 : is it enabled in ShouZiMu(first char) mode?
|
||||
unsigned char SpellingTrie::char_flags_[] = {
|
||||
// a b c d e f g
|
||||
0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01,
|
||||
// h i j k l m n
|
||||
0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01,
|
||||
// o p q r s t
|
||||
0x02, 0x01, 0x01, 0x01, 0x01, 0x01,
|
||||
// u v w x y z
|
||||
0x00, 0x00, 0x01, 0x01, 0x01, 0x01
|
||||
};
|
||||
|
||||
int compare_spl(const void* p1, const void* p2) {
|
||||
return strcmp((const char*)(p1), (const char*)(p2));
|
||||
}
|
||||
|
||||
SpellingTrie::SpellingTrie() {
|
||||
spelling_buf_ = NULL;
|
||||
spelling_size_ = 0;
|
||||
spelling_num_ = 0;
|
||||
spl_ym_ids_ = NULL;
|
||||
splstr_queried_ = NULL;
|
||||
splstr16_queried_ = NULL;
|
||||
root_ = NULL;
|
||||
dumb_node_ = NULL;
|
||||
splitter_node_ = NULL;
|
||||
instance_ = NULL;
|
||||
ym_buf_ = NULL;
|
||||
f2h_ = NULL;
|
||||
|
||||
szm_enable_shm(true);
|
||||
szm_enable_ym(true);
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
node_num_ = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
SpellingTrie::~SpellingTrie() {
|
||||
if (NULL != spelling_buf_)
|
||||
delete [] spelling_buf_;
|
||||
|
||||
if (NULL != splstr_queried_)
|
||||
delete [] splstr_queried_;
|
||||
|
||||
if (NULL != splstr16_queried_)
|
||||
delete [] splstr16_queried_;
|
||||
|
||||
if (NULL != spl_ym_ids_)
|
||||
delete [] spl_ym_ids_;
|
||||
|
||||
if (NULL != root_) {
|
||||
free_son_trie(root_);
|
||||
delete root_;
|
||||
}
|
||||
|
||||
if (NULL != dumb_node_) {
|
||||
delete [] dumb_node_;
|
||||
}
|
||||
|
||||
if (NULL != splitter_node_) {
|
||||
delete [] splitter_node_;
|
||||
}
|
||||
|
||||
if (NULL != instance_) {
|
||||
delete instance_;
|
||||
instance_ = NULL;
|
||||
}
|
||||
|
||||
if (NULL != ym_buf_)
|
||||
delete [] ym_buf_;
|
||||
|
||||
if (NULL != f2h_)
|
||||
delete [] f2h_;
|
||||
}
|
||||
|
||||
bool SpellingTrie::if_valid_id_update(uint16 *splid) const {
|
||||
if (NULL == splid || 0 == *splid)
|
||||
return false;
|
||||
|
||||
if (*splid >= kFullSplIdStart)
|
||||
return true;
|
||||
if (*splid < kFullSplIdStart) {
|
||||
char ch = kHalfId2Sc_[*splid];
|
||||
if (ch > 'Z') {
|
||||
return true;
|
||||
} else {
|
||||
if (szm_is_enabled(ch)) {
|
||||
return true;
|
||||
} else if (is_yunmu_char(ch)) {
|
||||
assert(h2f_num_[*splid] > 0);
|
||||
*splid = h2f_start_[*splid];
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SpellingTrie::is_half_id(uint16 splid) const {
|
||||
if (0 == splid || splid >= kFullSplIdStart)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SpellingTrie::is_full_id(uint16 splid) const {
|
||||
if (splid < kFullSplIdStart || splid >= kFullSplIdStart + spelling_num_)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SpellingTrie::half_full_compatible(uint16 half_id, uint16 full_id) const {
|
||||
uint16 half_fr_full = full_to_half(full_id);
|
||||
|
||||
if (half_fr_full == half_id)
|
||||
return true;
|
||||
|
||||
// &~0x20 is used to conver the char to upper case.
|
||||
// So that Zh/Ch/Sh(whose char is z/c/s) can be matched with Z/C/S.
|
||||
char ch_f = (kHalfId2Sc_[half_fr_full] & (~0x20));
|
||||
char ch_h = kHalfId2Sc_[half_id];
|
||||
if (ch_f == ch_h)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SpellingTrie::is_half_id_yunmu(uint16 splid) const {
|
||||
if (0 == splid || splid >= kFullSplIdStart)
|
||||
return false;
|
||||
|
||||
char ch = kHalfId2Sc_[splid];
|
||||
// If ch >= 'a', that means the half id is one of Zh/Ch/Sh
|
||||
if (ch >= 'a') {
|
||||
return false;
|
||||
}
|
||||
|
||||
return char_flags_[ch - 'A'] & kHalfIdYunmuMask;
|
||||
}
|
||||
|
||||
bool SpellingTrie::is_shengmu_char(char ch) const {
|
||||
return char_flags_[ch - 'A'] & kHalfIdShengmuMask;
|
||||
}
|
||||
|
||||
bool SpellingTrie::is_yunmu_char(char ch) const {
|
||||
return char_flags_[ch - 'A'] & kHalfIdYunmuMask;
|
||||
}
|
||||
|
||||
bool SpellingTrie::is_szm_char(char ch) const {
|
||||
return is_shengmu_char(ch) || is_yunmu_char(ch);
|
||||
}
|
||||
|
||||
bool SpellingTrie::szm_is_enabled(char ch) const {
|
||||
return char_flags_[ch - 'A'] & kHalfIdSzmMask;
|
||||
}
|
||||
|
||||
void SpellingTrie::szm_enable_shm(bool enable) {
|
||||
if (enable) {
|
||||
for (char ch = 'A'; ch <= 'Z'; ch++) {
|
||||
if (is_shengmu_char(ch))
|
||||
char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
|
||||
}
|
||||
} else {
|
||||
for (char ch = 'A'; ch <= 'Z'; ch++) {
|
||||
if (is_shengmu_char(ch))
|
||||
char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SpellingTrie::szm_enable_ym(bool enable) {
|
||||
if (enable) {
|
||||
for (char ch = 'A'; ch <= 'Z'; ch++) {
|
||||
if (is_yunmu_char(ch))
|
||||
char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
|
||||
}
|
||||
} else {
|
||||
for (char ch = 'A'; ch <= 'Z'; ch++) {
|
||||
if (is_yunmu_char(ch))
|
||||
char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool SpellingTrie::is_szm_enabled(char ch) const {
|
||||
return char_flags_[ch - 'A'] & kHalfIdSzmMask;
|
||||
}
|
||||
|
||||
const SpellingTrie* SpellingTrie::get_cpinstance() {
|
||||
return &get_instance();
|
||||
}
|
||||
|
||||
SpellingTrie& SpellingTrie::get_instance() {
|
||||
if (NULL == instance_)
|
||||
instance_ = new SpellingTrie();
|
||||
|
||||
return *instance_;
|
||||
}
|
||||
|
||||
uint16 SpellingTrie::half2full_num(uint16 half_id) const {
|
||||
if (NULL == root_ || half_id >= kFullSplIdStart)
|
||||
return 0;
|
||||
return h2f_num_[half_id];
|
||||
}
|
||||
|
||||
uint16 SpellingTrie::half_to_full(uint16 half_id, uint16 *spl_id_start) const {
|
||||
if (NULL == spl_id_start || NULL == root_ || half_id >= kFullSplIdStart)
|
||||
return 0;
|
||||
|
||||
*spl_id_start = h2f_start_[half_id];
|
||||
return h2f_num_[half_id];
|
||||
}
|
||||
|
||||
uint16 SpellingTrie::full_to_half(uint16 full_id) const {
|
||||
if (NULL == root_ || full_id < kFullSplIdStart ||
|
||||
full_id > spelling_num_ + kFullSplIdStart)
|
||||
return 0;
|
||||
|
||||
return f2h_[full_id - kFullSplIdStart];
|
||||
}
|
||||
|
||||
void SpellingTrie::free_son_trie(SpellingNode* node) {
|
||||
if (NULL == node)
|
||||
return;
|
||||
|
||||
for (size_t pos = 0; pos < node->num_of_son; pos++) {
|
||||
free_son_trie(node->first_son + pos);
|
||||
}
|
||||
|
||||
if (NULL != node->first_son)
|
||||
delete [] node->first_son;
|
||||
}
|
||||
|
||||
bool SpellingTrie::construct(const char* spelling_arr, size_t item_size,
|
||||
size_t item_num, float score_amplifier,
|
||||
unsigned char average_score) {
|
||||
if (spelling_arr == NULL)
|
||||
return false;
|
||||
|
||||
memset(h2f_start_, 0, sizeof(uint16) * kFullSplIdStart);
|
||||
memset(h2f_num_, 0, sizeof(uint16) * kFullSplIdStart);
|
||||
|
||||
// If the arr is the same as the buf, means this function is called by
|
||||
// load_table(), the table data are ready; otherwise the array should be
|
||||
// saved.
|
||||
if (spelling_arr != spelling_buf_) {
|
||||
if (NULL != spelling_buf_)
|
||||
delete [] spelling_buf_;
|
||||
spelling_buf_ = new char[item_size * item_num];
|
||||
if (NULL == spelling_buf_)
|
||||
return false;
|
||||
memcpy(spelling_buf_, spelling_arr, sizeof(char) * item_size * item_num);
|
||||
}
|
||||
|
||||
spelling_size_ = item_size;
|
||||
spelling_num_ = item_num;
|
||||
|
||||
score_amplifier_ = score_amplifier;
|
||||
average_score_ = average_score;
|
||||
|
||||
if (NULL != splstr_queried_)
|
||||
delete [] splstr_queried_;
|
||||
splstr_queried_ = new char[spelling_size_];
|
||||
if (NULL == splstr_queried_)
|
||||
return false;
|
||||
|
||||
if (NULL != splstr16_queried_)
|
||||
delete [] splstr16_queried_;
|
||||
splstr16_queried_ = new char16[spelling_size_];
|
||||
if (NULL == splstr16_queried_)
|
||||
return false;
|
||||
|
||||
// First, sort the buf to ensure they are in ascendant order
|
||||
qsort(spelling_buf_, spelling_num_, spelling_size_, compare_spl);
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
node_num_ = 1;
|
||||
#endif
|
||||
|
||||
root_ = new SpellingNode();
|
||||
memset(root_, 0, sizeof(SpellingNode));
|
||||
|
||||
dumb_node_ = new SpellingNode();
|
||||
memset(dumb_node_, 0, sizeof(SpellingNode));
|
||||
dumb_node_->score = average_score_;
|
||||
|
||||
splitter_node_ = new SpellingNode();
|
||||
memset(splitter_node_, 0, sizeof(SpellingNode));
|
||||
splitter_node_->score = average_score_;
|
||||
|
||||
memset(level1_sons_, 0, sizeof(SpellingNode*) * kValidSplCharNum);
|
||||
|
||||
root_->first_son = construct_spellings_subset(0, spelling_num_, 0, root_);
|
||||
|
||||
// Root's score should be cleared.
|
||||
root_->score = 0;
|
||||
|
||||
if (NULL == root_->first_son)
|
||||
return false;
|
||||
|
||||
h2f_start_[0] = h2f_num_[0] = 0;
|
||||
|
||||
if (!build_f2h())
|
||||
return false;
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
if (kPrintDebug0) {
|
||||
printf("---SpellingTrie Nodes: %d\n", (int)node_num_);
|
||||
}
|
||||
return build_ym_info();
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
const char* SpellingTrie::get_ym_str(const char *spl_str) {
|
||||
bool start_ZCS = false;
|
||||
if (is_shengmu_char(*spl_str)) {
|
||||
if ('Z' == *spl_str || 'C' == *spl_str || 'S' == *spl_str)
|
||||
start_ZCS = true;
|
||||
spl_str += 1;
|
||||
if (start_ZCS && 'h' == *spl_str)
|
||||
spl_str += 1;
|
||||
}
|
||||
return spl_str;
|
||||
}
|
||||
|
||||
bool SpellingTrie::build_ym_info() {
|
||||
bool sucess;
|
||||
SpellingTable *spl_table = new SpellingTable();
|
||||
|
||||
sucess = spl_table->init_table(kMaxPinyinSize - 1, 2 * kMaxYmNum, false);
|
||||
assert(sucess);
|
||||
|
||||
for (uint16 pos = 0; pos < spelling_num_; pos++) {
|
||||
const char *spl_str = spelling_buf_ + spelling_size_ * pos;
|
||||
spl_str = get_ym_str(spl_str);
|
||||
if ('\0' != spl_str[0]) {
|
||||
sucess = spl_table->put_spelling(spl_str, 0);
|
||||
assert(sucess);
|
||||
}
|
||||
}
|
||||
|
||||
size_t ym_item_size; // '\0' is included
|
||||
size_t ym_num;
|
||||
const char* ym_buf;
|
||||
ym_buf = spl_table->arrange(&ym_item_size, &ym_num);
|
||||
|
||||
if (NULL != ym_buf_)
|
||||
delete [] ym_buf_;
|
||||
ym_buf_ = new char[ym_item_size * ym_num];
|
||||
if (NULL == ym_buf_) {
|
||||
delete spl_table;
|
||||
return false;
|
||||
}
|
||||
|
||||
memcpy(ym_buf_, ym_buf, sizeof(char) * ym_item_size * ym_num);
|
||||
ym_size_ = ym_item_size;
|
||||
ym_num_ = ym_num;
|
||||
|
||||
delete spl_table;
|
||||
|
||||
// Generate the maping from the spelling ids to the Yunmu ids.
|
||||
if (spl_ym_ids_)
|
||||
delete spl_ym_ids_;
|
||||
spl_ym_ids_ = new uint8[spelling_num_ + kFullSplIdStart];
|
||||
if (NULL == spl_ym_ids_)
|
||||
return false;
|
||||
|
||||
memset(spl_ym_ids_, 0, sizeof(uint8) * (spelling_num_ + kFullSplIdStart));
|
||||
|
||||
for (uint16 id = 1; id < spelling_num_ + kFullSplIdStart; id++) {
|
||||
const char *str = get_spelling_str(id);
|
||||
|
||||
str = get_ym_str(str);
|
||||
if ('\0' != str[0]) {
|
||||
uint8 ym_id = get_ym_id(str);
|
||||
spl_ym_ids_[id] = ym_id;
|
||||
assert(ym_id > 0);
|
||||
} else {
|
||||
spl_ym_ids_[id] = 0;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
SpellingNode* SpellingTrie::construct_spellings_subset(
|
||||
size_t item_start, size_t item_end, size_t level, SpellingNode* parent) {
|
||||
if (level >= spelling_size_ || item_end <= item_start || NULL == parent)
|
||||
return NULL;
|
||||
|
||||
SpellingNode *first_son = NULL;
|
||||
uint16 num_of_son = 0;
|
||||
unsigned char min_son_score = 255;
|
||||
|
||||
const char *spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
|
||||
char char_for_node = spelling_last_start[level];
|
||||
assert((char_for_node >= 'A' && char_for_node <= 'Z') ||
|
||||
'h' == char_for_node);
|
||||
|
||||
// Scan the array to find how many sons
|
||||
for (size_t i = item_start + 1; i < item_end; i++) {
|
||||
const char *spelling_current = spelling_buf_ + spelling_size_ * i;
|
||||
char char_current = spelling_current[level];
|
||||
if (char_current != char_for_node) {
|
||||
num_of_son++;
|
||||
char_for_node = char_current;
|
||||
}
|
||||
}
|
||||
num_of_son++;
|
||||
|
||||
// Allocate memory
|
||||
#ifdef ___BUILD_MODEL___
|
||||
node_num_ += num_of_son;
|
||||
#endif
|
||||
first_son = new SpellingNode[num_of_son];
|
||||
memset(first_son, 0, sizeof(SpellingNode)*num_of_son);
|
||||
|
||||
// Now begin construct tree
|
||||
size_t son_pos = 0;
|
||||
|
||||
spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
|
||||
char_for_node = spelling_last_start[level];
|
||||
|
||||
bool spelling_endable = true;
|
||||
if (spelling_last_start[level + 1] != '\0')
|
||||
spelling_endable = false;
|
||||
|
||||
size_t item_start_next = item_start;
|
||||
|
||||
for (size_t i = item_start + 1; i < item_end; i++) {
|
||||
const char *spelling_current = spelling_buf_ + spelling_size_ * i;
|
||||
char char_current = spelling_current[level];
|
||||
assert(is_valid_spl_char(char_current));
|
||||
|
||||
if (char_current != char_for_node) {
|
||||
// Construct a node
|
||||
SpellingNode *node_current = first_son + son_pos;
|
||||
node_current->char_this_node = char_for_node;
|
||||
|
||||
// For quick search in the first level
|
||||
if (0 == level)
|
||||
level1_sons_[char_for_node - 'A'] = node_current;
|
||||
|
||||
if (spelling_endable) {
|
||||
node_current->spelling_idx = kFullSplIdStart + item_start_next;
|
||||
}
|
||||
|
||||
if (spelling_last_start[level + 1] != '\0' || i - item_start_next > 1) {
|
||||
size_t real_start = item_start_next;
|
||||
if (spelling_last_start[level + 1] == '\0')
|
||||
real_start++;
|
||||
|
||||
node_current->first_son =
|
||||
construct_spellings_subset(real_start, i, level + 1,
|
||||
node_current);
|
||||
|
||||
if (real_start == item_start_next + 1) {
|
||||
uint16 score_this = static_cast<unsigned char>(
|
||||
spelling_last_start[spelling_size_ - 1]);
|
||||
if (score_this < node_current->score)
|
||||
node_current->score = score_this;
|
||||
}
|
||||
} else {
|
||||
node_current->first_son = NULL;
|
||||
node_current->score = static_cast<unsigned char>(
|
||||
spelling_last_start[spelling_size_ - 1]);
|
||||
}
|
||||
|
||||
if (node_current->score < min_son_score)
|
||||
min_son_score = node_current->score;
|
||||
|
||||
bool is_half = false;
|
||||
if (level == 0 && is_szm_char(char_for_node)) {
|
||||
node_current->spelling_idx =
|
||||
static_cast<uint16>(char_for_node - 'A' + 1);
|
||||
|
||||
if (char_for_node > 'C')
|
||||
node_current->spelling_idx++;
|
||||
if (char_for_node > 'S')
|
||||
node_current->spelling_idx++;
|
||||
|
||||
h2f_num_[node_current->spelling_idx] = i - item_start_next;
|
||||
is_half = true;
|
||||
} else if (level == 1 && char_for_node == 'h') {
|
||||
char ch_level0 = spelling_last_start[0];
|
||||
uint16 part_id = 0;
|
||||
if (ch_level0 == 'C')
|
||||
part_id = 'C' - 'A' + 1 + 1;
|
||||
else if (ch_level0 == 'S')
|
||||
part_id = 'S' - 'A' + 1 + 2;
|
||||
else if (ch_level0 == 'Z')
|
||||
part_id = 'Z' - 'A' + 1 + 3;
|
||||
if (0 != part_id) {
|
||||
node_current->spelling_idx = part_id;
|
||||
h2f_num_[node_current->spelling_idx] = i - item_start_next;
|
||||
is_half = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_half) {
|
||||
if (h2f_num_[node_current->spelling_idx] > 0)
|
||||
h2f_start_[node_current->spelling_idx] =
|
||||
item_start_next + kFullSplIdStart;
|
||||
else
|
||||
h2f_start_[node_current->spelling_idx] = 0;
|
||||
}
|
||||
|
||||
// for next sibling
|
||||
spelling_last_start = spelling_current;
|
||||
char_for_node = char_current;
|
||||
item_start_next = i;
|
||||
spelling_endable = true;
|
||||
if (spelling_current[level + 1] != '\0')
|
||||
spelling_endable = false;
|
||||
|
||||
son_pos++;
|
||||
}
|
||||
}
|
||||
|
||||
// the last one
|
||||
SpellingNode *node_current = first_son + son_pos;
|
||||
node_current->char_this_node = char_for_node;
|
||||
|
||||
// For quick search in the first level
|
||||
if (0 == level)
|
||||
level1_sons_[char_for_node - 'A'] = node_current;
|
||||
|
||||
if (spelling_endable) {
|
||||
node_current->spelling_idx = kFullSplIdStart + item_start_next;
|
||||
}
|
||||
|
||||
if (spelling_last_start[level + 1] != '\0' ||
|
||||
item_end - item_start_next > 1) {
|
||||
size_t real_start = item_start_next;
|
||||
if (spelling_last_start[level + 1] == '\0')
|
||||
real_start++;
|
||||
|
||||
node_current->first_son =
|
||||
construct_spellings_subset(real_start, item_end, level + 1,
|
||||
node_current);
|
||||
|
||||
if (real_start == item_start_next + 1) {
|
||||
uint16 score_this = static_cast<unsigned char>(
|
||||
spelling_last_start[spelling_size_ - 1]);
|
||||
if (score_this < node_current->score)
|
||||
node_current->score = score_this;
|
||||
}
|
||||
} else {
|
||||
node_current->first_son = NULL;
|
||||
node_current->score = static_cast<unsigned char>(
|
||||
spelling_last_start[spelling_size_ - 1]);
|
||||
}
|
||||
|
||||
if (node_current->score < min_son_score)
|
||||
min_son_score = node_current->score;
|
||||
|
||||
assert(son_pos + 1 == num_of_son);
|
||||
|
||||
bool is_half = false;
|
||||
if (level == 0 && szm_is_enabled(char_for_node)) {
|
||||
node_current->spelling_idx = static_cast<uint16>(char_for_node - 'A' + 1);
|
||||
|
||||
if (char_for_node > 'C')
|
||||
node_current->spelling_idx++;
|
||||
if (char_for_node > 'S')
|
||||
node_current->spelling_idx++;
|
||||
|
||||
h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
|
||||
is_half = true;
|
||||
} else if (level == 1 && char_for_node == 'h') {
|
||||
char ch_level0 = spelling_last_start[0];
|
||||
uint16 part_id = 0;
|
||||
if (ch_level0 == 'C')
|
||||
part_id = 'C' - 'A' + 1 + 1;
|
||||
else if (ch_level0 == 'S')
|
||||
part_id = 'S' - 'A' + 1 + 2;
|
||||
else if (ch_level0 == 'Z')
|
||||
part_id = 'Z' - 'A' + 1 + 3;
|
||||
if (0 != part_id) {
|
||||
node_current->spelling_idx = part_id;
|
||||
h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
|
||||
is_half = true;
|
||||
}
|
||||
}
|
||||
if (is_half) {
|
||||
if (h2f_num_[node_current->spelling_idx] > 0)
|
||||
h2f_start_[node_current->spelling_idx] =
|
||||
item_start_next + kFullSplIdStart;
|
||||
else
|
||||
h2f_start_[node_current->spelling_idx] = 0;
|
||||
}
|
||||
|
||||
parent->num_of_son = num_of_son;
|
||||
parent->score = min_son_score;
|
||||
return first_son;
|
||||
}
|
||||
|
||||
bool SpellingTrie::save_spl_trie(FILE *fp) {
|
||||
if (NULL == fp || NULL == spelling_buf_)
|
||||
return false;
|
||||
|
||||
if (fwrite(&spelling_size_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fwrite(&spelling_num_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fwrite(&score_amplifier_, sizeof(float), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fwrite(&average_score_, sizeof(unsigned char), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fwrite(spelling_buf_, sizeof(char) * spelling_size_,
|
||||
spelling_num_, fp) != spelling_num_)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SpellingTrie::load_spl_trie(FILE *fp) {
|
||||
if (NULL == fp)
|
||||
return false;
|
||||
|
||||
if (fread(&spelling_size_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fread(&spelling_num_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fread(&score_amplifier_, sizeof(float), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fread(&average_score_, sizeof(unsigned char), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (NULL != spelling_buf_)
|
||||
delete [] spelling_buf_;
|
||||
|
||||
spelling_buf_ = new char[spelling_size_ * spelling_num_];
|
||||
if (NULL == spelling_buf_)
|
||||
return false;
|
||||
|
||||
if (fread(spelling_buf_, sizeof(char) * spelling_size_,
|
||||
spelling_num_, fp) != spelling_num_)
|
||||
return false;
|
||||
|
||||
return construct(spelling_buf_, spelling_size_, spelling_num_,
|
||||
score_amplifier_, average_score_);
|
||||
}
|
||||
|
||||
bool SpellingTrie::build_f2h() {
|
||||
if (NULL != f2h_)
|
||||
delete [] f2h_;
|
||||
f2h_ = new uint16[spelling_num_];
|
||||
if (NULL == f2h_)
|
||||
return false;
|
||||
|
||||
for (uint16 hid = 0; hid < kFullSplIdStart; hid++) {
|
||||
for (uint16 fid = h2f_start_[hid];
|
||||
fid < h2f_start_[hid] + h2f_num_[hid]; fid++)
|
||||
f2h_[fid - kFullSplIdStart] = hid;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t SpellingTrie::get_spelling_num() {
|
||||
return spelling_num_;
|
||||
}
|
||||
|
||||
uint8 SpellingTrie::get_ym_id(const char *ym_str) {
|
||||
if (NULL == ym_str || NULL == ym_buf_)
|
||||
return 0;
|
||||
|
||||
for (uint8 pos = 0; pos < ym_num_; pos++)
|
||||
if (strcmp(ym_buf_ + ym_size_ * pos, ym_str) == 0)
|
||||
return pos + 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char* SpellingTrie::get_spelling_str(uint16 splid) {
|
||||
splstr_queried_[0] = '\0';
|
||||
|
||||
if (splid >= kFullSplIdStart) {
|
||||
splid -= kFullSplIdStart;
|
||||
snprintf(splstr_queried_, spelling_size_, "%s",
|
||||
spelling_buf_ + splid * spelling_size_);
|
||||
} else {
|
||||
if (splid == 'C' - 'A' + 1 + 1) {
|
||||
snprintf(splstr_queried_, spelling_size_, "%s", "Ch");
|
||||
} else if (splid == 'S' - 'A' + 1 + 2) {
|
||||
snprintf(splstr_queried_, spelling_size_, "%s", "Sh");
|
||||
} else if (splid == 'Z' - 'A' + 1 + 3) {
|
||||
snprintf(splstr_queried_, spelling_size_, "%s", "Zh");
|
||||
} else {
|
||||
if (splid > 'C' - 'A' + 1)
|
||||
splid--;
|
||||
if (splid > 'S' - 'A' + 1)
|
||||
splid--;
|
||||
splstr_queried_[0] = 'A' + splid - 1;
|
||||
splstr_queried_[1] = '\0';
|
||||
}
|
||||
}
|
||||
return splstr_queried_;
|
||||
}
|
||||
|
||||
const char16* SpellingTrie::get_spelling_str16(uint16 splid) {
|
||||
splstr16_queried_[0] = '\0';
|
||||
|
||||
if (splid >= kFullSplIdStart) {
|
||||
splid -= kFullSplIdStart;
|
||||
for (size_t pos = 0; pos < spelling_size_; pos++) {
|
||||
splstr16_queried_[pos] = static_cast<char16>
|
||||
(spelling_buf_[splid * spelling_size_ + pos]);
|
||||
}
|
||||
} else {
|
||||
if (splid == 'C' - 'A' + 1 + 1) {
|
||||
splstr16_queried_[0] = static_cast<char16>('C');
|
||||
splstr16_queried_[1] = static_cast<char16>('h');
|
||||
splstr16_queried_[2] = static_cast<char16>('\0');
|
||||
} else if (splid == 'S' - 'A' + 1 + 2) {
|
||||
splstr16_queried_[0] = static_cast<char16>('S');
|
||||
splstr16_queried_[1] = static_cast<char16>('h');
|
||||
splstr16_queried_[2] = static_cast<char16>('\0');
|
||||
} else if (splid == 'Z' - 'A' + 1 + 3) {
|
||||
splstr16_queried_[0] = static_cast<char16>('Z');
|
||||
splstr16_queried_[1] = static_cast<char16>('h');
|
||||
splstr16_queried_[2] = static_cast<char16>('\0');
|
||||
} else {
|
||||
if (splid > 'C' - 'A' + 1)
|
||||
splid--;
|
||||
if (splid > 'S' - 'A' + 1)
|
||||
splid--;
|
||||
splstr16_queried_[0] = 'A' + splid - 1;
|
||||
splstr16_queried_[1] = '\0';
|
||||
}
|
||||
}
|
||||
return splstr16_queried_;
|
||||
}
|
||||
|
||||
size_t SpellingTrie::get_spelling_str16(uint16 splid, char16 *splstr16,
|
||||
size_t splstr16_len) {
|
||||
if (NULL == splstr16 || splstr16_len < kMaxPinyinSize + 1) return 0;
|
||||
|
||||
if (splid >= kFullSplIdStart) {
|
||||
splid -= kFullSplIdStart;
|
||||
for (size_t pos = 0; pos <= kMaxPinyinSize; pos++) {
|
||||
splstr16[pos] = static_cast<char16>
|
||||
(spelling_buf_[splid * spelling_size_ + pos]);
|
||||
if (static_cast<char16>('\0') == splstr16[pos]) {
|
||||
return pos;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (splid == 'C' - 'A' + 1 + 1) {
|
||||
splstr16[0] = static_cast<char16>('C');
|
||||
splstr16[1] = static_cast<char16>('h');
|
||||
splstr16[2] = static_cast<char16>('\0');
|
||||
return 2;
|
||||
} else if (splid == 'S' - 'A' + 1 + 2) {
|
||||
splstr16[0] = static_cast<char16>('S');
|
||||
splstr16[1] = static_cast<char16>('h');
|
||||
splstr16[2] = static_cast<char16>('\0');
|
||||
return 2;
|
||||
} else if (splid == 'Z' - 'A' + 1 + 3) {
|
||||
splstr16[0] = static_cast<char16>('Z');
|
||||
splstr16[1] = static_cast<char16>('h');
|
||||
splstr16[2] = static_cast<char16>('\0');
|
||||
return 2;
|
||||
} else {
|
||||
if (splid > 'C' - 'A' + 1)
|
||||
splid--;
|
||||
if (splid > 'S' - 'A' + 1)
|
||||
splid--;
|
||||
splstr16[0] = 'A' + splid - 1;
|
||||
splstr16[1] = '\0';
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Not reachable.
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace ime_pinyin
|
||||
@@ -0,0 +1,258 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_SPELLINGTRIE_H__
|
||||
#define PINYINIME_INCLUDE_SPELLINGTRIE_H__
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "./dictdef.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
static const unsigned short kFullSplIdStart = kHalfSpellingIdNum + 1;
|
||||
|
||||
// Node used for the trie of spellings
|
||||
struct SpellingNode {
|
||||
SpellingNode *first_son;
|
||||
// The spelling id for each node. If you need more bits to store
|
||||
// spelling id, please adjust this structure.
|
||||
uint16 spelling_idx:11;
|
||||
uint16 num_of_son:5;
|
||||
char char_this_node;
|
||||
unsigned char score;
|
||||
};
|
||||
|
||||
class SpellingTrie {
|
||||
private:
|
||||
static const int kMaxYmNum = 64;
|
||||
static const size_t kValidSplCharNum = 26;
|
||||
|
||||
static const uint16 kHalfIdShengmuMask = 0x01;
|
||||
static const uint16 kHalfIdYunmuMask = 0x02;
|
||||
static const uint16 kHalfIdSzmMask = 0x04;
|
||||
|
||||
// Map from half spelling id to single char.
|
||||
// For half ids of Zh/Ch/Sh, map to z/c/s (low case) respectively.
|
||||
// For example, 1 to 'A', 2 to 'B', 3 to 'C', 4 to 'c', 5 to 'D', ...,
|
||||
// 28 to 'Z', 29 to 'z'.
|
||||
// [0] is not used to achieve better efficiency.
|
||||
static const char kHalfId2Sc_[kFullSplIdStart + 1];
|
||||
|
||||
static unsigned char char_flags_[];
|
||||
static SpellingTrie* instance_;
|
||||
|
||||
// The spelling table
|
||||
char *spelling_buf_;
|
||||
|
||||
// The size of longest spelling string, includes '\0' and an extra char to
|
||||
// store score. For example, "zhuang" is the longgest item in Pinyin list,
|
||||
// so spelling_size_ is 8.
|
||||
// Structure: The string ended with '\0' + score char.
|
||||
// An item with a lower score has a higher probability.
|
||||
uint32 spelling_size_;
|
||||
|
||||
// Number of full spelling ids.
|
||||
uint32 spelling_num_;
|
||||
|
||||
float score_amplifier_;
|
||||
unsigned char average_score_;
|
||||
|
||||
// The Yunmu id list for the spelling ids (for half ids of Shengmu,
|
||||
// the Yunmu id is 0).
|
||||
// The length of the list is spelling_num_ + kFullSplIdStart,
|
||||
// so that spl_ym_ids_[splid] is the Yunmu id of the splid.
|
||||
uint8 *spl_ym_ids_;
|
||||
|
||||
// The Yunmu table.
|
||||
// Each Yunmu will be assigned with Yunmu id from 1.
|
||||
char *ym_buf_;
|
||||
size_t ym_size_; // The size of longest Yunmu string, '\0'included.
|
||||
size_t ym_num_;
|
||||
|
||||
// The spelling string just queried
|
||||
char *splstr_queried_;
|
||||
|
||||
// The spelling string just queried
|
||||
char16 *splstr16_queried_;
|
||||
|
||||
// The root node of the spelling tree
|
||||
SpellingNode* root_;
|
||||
|
||||
// If a none qwerty key such as a fnction key like ENTER is given, this node
|
||||
// will be used to indicate that this is not a QWERTY node.
|
||||
SpellingNode* dumb_node_;
|
||||
|
||||
// If a splitter key is pressed, this node will be used to indicate that this
|
||||
// is a splitter key.
|
||||
SpellingNode* splitter_node_;
|
||||
|
||||
// Used to get the first level sons.
|
||||
SpellingNode* level1_sons_[kValidSplCharNum];
|
||||
|
||||
// The full spl_id range for specific half id.
|
||||
// h2f means half to full.
|
||||
// A half id can be a ShouZiMu id (id to represent the first char of a full
|
||||
// spelling, including Shengmu and Yunmu), or id of zh/ch/sh.
|
||||
// [1..kFullSplIdStart-1] is the arrange of half id.
|
||||
uint16 h2f_start_[kFullSplIdStart];
|
||||
uint16 h2f_num_[kFullSplIdStart];
|
||||
|
||||
// Map from full id to half id.
|
||||
uint16 *f2h_;
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
// How many node used to build the trie.
|
||||
size_t node_num_;
|
||||
#endif
|
||||
|
||||
SpellingTrie();
|
||||
|
||||
void free_son_trie(SpellingNode* node);
|
||||
|
||||
// Construct a subtree using a subset of the spelling array (from
|
||||
// item_star to item_end).
|
||||
// Member spelliing_buf_ and spelling_size_ should be valid.
|
||||
// parent is used to update its num_of_son and score.
|
||||
SpellingNode* construct_spellings_subset(size_t item_start, size_t item_end,
|
||||
size_t level, SpellingNode *parent);
|
||||
bool build_f2h();
|
||||
|
||||
// The caller should guarantee ch >= 'A' && ch <= 'Z'
|
||||
bool is_shengmu_char(char ch) const;
|
||||
|
||||
// The caller should guarantee ch >= 'A' && ch <= 'Z'
|
||||
bool is_yunmu_char(char ch) const;
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
// Given a spelling string, return its Yunmu string.
|
||||
// The caller guaratees spl_str is valid.
|
||||
const char* get_ym_str(const char *spl_str);
|
||||
|
||||
// Build the Yunmu list, and the mapping relation between the full ids and the
|
||||
// Yunmu ids. This functin is called after the spelling trie is built.
|
||||
bool build_ym_info();
|
||||
#endif
|
||||
|
||||
friend class SpellingParser;
|
||||
friend class SmartSplParser;
|
||||
friend class SmartSplParser2;
|
||||
|
||||
public:
|
||||
~SpellingTrie();
|
||||
|
||||
inline static bool is_valid_spl_char(char ch) {
|
||||
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
|
||||
}
|
||||
|
||||
// The caller guarantees that the two chars are valid spelling chars.
|
||||
inline static bool is_same_spl_char(char ch1, char ch2) {
|
||||
return ch1 == ch2 || ch1 - ch2 == 'a' - 'A' || ch2 - ch1 == 'a' - 'A';
|
||||
}
|
||||
|
||||
// Construct the tree from the input pinyin array
|
||||
// The given string list should have been sorted.
|
||||
// score_amplifier is used to convert a possibility value into score.
|
||||
// average_score is the average_score of all spellings. The dumb node is
|
||||
// assigned with this score.
|
||||
bool construct(const char* spelling_arr, size_t item_size, size_t item_num,
|
||||
float score_amplifier, unsigned char average_score);
|
||||
|
||||
// Test if the given id is a valid spelling id.
|
||||
// If function returns true, the given splid may be updated like this:
|
||||
// When 'A' is not enabled in ShouZiMu mode, the parsing result for 'A' is
|
||||
// first given as a half id 1, but because 'A' is a one-char Yunmu and
|
||||
// it is a valid id, it needs to updated to its corresponding full id.
|
||||
bool if_valid_id_update(uint16 *splid) const;
|
||||
|
||||
// Test if the given id is a half id.
|
||||
bool is_half_id(uint16 splid) const;
|
||||
|
||||
bool is_full_id(uint16 splid) const;
|
||||
|
||||
// Test if the given id is a one-char Yunmu id (obviously, it is also a half
|
||||
// id), such as 'A', 'E' and 'O'.
|
||||
bool is_half_id_yunmu(uint16 splid) const;
|
||||
|
||||
// Test if this char is a ShouZiMu char. This ShouZiMu char may be not enabled.
|
||||
// For Pinyin, only i/u/v is not a ShouZiMu char.
|
||||
// The caller should guarantee that ch >= 'A' && ch <= 'Z'
|
||||
bool is_szm_char(char ch) const;
|
||||
|
||||
// Test If this char is enabled in ShouZiMu mode.
|
||||
// The caller should guarantee that ch >= 'A' && ch <= 'Z'
|
||||
bool szm_is_enabled(char ch) const;
|
||||
|
||||
// Enable/disable Shengmus in ShouZiMu mode(using the first char of a spelling
|
||||
// to input).
|
||||
void szm_enable_shm(bool enable);
|
||||
|
||||
// Enable/disable Yunmus in ShouZiMu mode.
|
||||
void szm_enable_ym(bool enable);
|
||||
|
||||
// Test if this char is enabled in ShouZiMu mode.
|
||||
// The caller should guarantee ch >= 'A' && ch <= 'Z'
|
||||
bool is_szm_enabled(char ch) const;
|
||||
|
||||
// Return the number of full ids for the given half id.
|
||||
uint16 half2full_num(uint16 half_id) const;
|
||||
|
||||
// Return the number of full ids for the given half id, and fill spl_id_start
|
||||
// to return the first full id.
|
||||
uint16 half_to_full(uint16 half_id, uint16 *spl_id_start) const;
|
||||
|
||||
// Return the corresponding half id for the given full id.
|
||||
// Not frequently used, low efficient.
|
||||
// Return 0 if fails.
|
||||
uint16 full_to_half(uint16 full_id) const;
|
||||
|
||||
// To test whether a half id is compatible with a full id.
|
||||
// Generally, when half_id == full_to_half(full_id), return true.
|
||||
// But for "Zh, Ch, Sh", if fussy mode is on, half id for 'Z' is compatible
|
||||
// with a full id like "Zhe". (Fussy mode is not ready).
|
||||
bool half_full_compatible(uint16 half_id, uint16 full_id) const;
|
||||
|
||||
static const SpellingTrie* get_cpinstance();
|
||||
|
||||
static SpellingTrie& get_instance();
|
||||
|
||||
// Save to the file stream
|
||||
bool save_spl_trie(FILE *fp);
|
||||
|
||||
// Load from the file stream
|
||||
bool load_spl_trie(FILE *fp);
|
||||
|
||||
// Get the number of spellings
|
||||
size_t get_spelling_num();
|
||||
|
||||
// Return the Yunmu id for the given Yunmu string.
|
||||
// If the string is not valid, return 0;
|
||||
uint8 get_ym_id(const char* ym_str);
|
||||
|
||||
// Get the readonly Pinyin string for a given spelling id
|
||||
const char* get_spelling_str(uint16 splid);
|
||||
|
||||
// Get the readonly Pinyin string for a given spelling id
|
||||
const char16* get_spelling_str16(uint16 splid);
|
||||
|
||||
// Get Pinyin string for a given spelling id. Return the length of the
|
||||
// string, and fill-in '\0' at the end.
|
||||
size_t get_spelling_str16(uint16 splid, char16 *splstr16,
|
||||
size_t splstr16_len);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // PINYINIME_INCLUDE_SPELLINGTRIE_H__
|
||||
@@ -0,0 +1,341 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include "splparser.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
SpellingParser::SpellingParser() {
|
||||
spl_trie_ = SpellingTrie::get_cpinstance();
|
||||
}
|
||||
|
||||
bool SpellingParser::is_valid_to_parse(char ch) {
|
||||
return SpellingTrie::is_valid_spl_char(ch);
|
||||
}
|
||||
|
||||
uint16 SpellingParser::splstr_to_idxs(const char *splstr, uint16 str_len,
|
||||
uint16 spl_idx[], uint16 start_pos[],
|
||||
uint16 max_size, bool &last_is_pre) {
|
||||
if (NULL == splstr || 0 == max_size || 0 == str_len)
|
||||
return 0;
|
||||
|
||||
if (!SpellingTrie::is_valid_spl_char(splstr[0]))
|
||||
return 0;
|
||||
|
||||
last_is_pre = false;
|
||||
|
||||
const SpellingNode *node_this = spl_trie_->root_;
|
||||
|
||||
uint16 str_pos = 0;
|
||||
uint16 idx_num = 0;
|
||||
if (NULL != start_pos)
|
||||
start_pos[0] = 0;
|
||||
bool last_is_splitter = false;
|
||||
|
||||
while (str_pos < str_len) {
|
||||
char char_this = splstr[str_pos];
|
||||
// all characters outside of [a, z] are considered as splitters
|
||||
if (!SpellingTrie::is_valid_spl_char(char_this)) {
|
||||
// test if the current node is endable
|
||||
uint16 id_this = node_this->spelling_idx;
|
||||
if (spl_trie_->if_valid_id_update(&id_this)) {
|
||||
spl_idx[idx_num] = id_this;
|
||||
|
||||
idx_num++;
|
||||
str_pos++;
|
||||
if (NULL != start_pos)
|
||||
start_pos[idx_num] = str_pos;
|
||||
if (idx_num >= max_size)
|
||||
return idx_num;
|
||||
|
||||
node_this = spl_trie_->root_;
|
||||
last_is_splitter = true;
|
||||
continue;
|
||||
} else {
|
||||
if (last_is_splitter) {
|
||||
str_pos++;
|
||||
if (NULL != start_pos)
|
||||
start_pos[idx_num] = str_pos;
|
||||
continue;
|
||||
} else {
|
||||
return idx_num;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
last_is_splitter = false;
|
||||
|
||||
SpellingNode *found_son = NULL;
|
||||
|
||||
if (0 == str_pos) {
|
||||
if (char_this >= 'a')
|
||||
found_son = spl_trie_->level1_sons_[char_this - 'a'];
|
||||
else
|
||||
found_son = spl_trie_->level1_sons_[char_this - 'A'];
|
||||
} else {
|
||||
SpellingNode *first_son = node_this->first_son;
|
||||
// Because for Zh/Ch/Sh nodes, they are the last in the buffer and
|
||||
// frequently used, so we scan from the end.
|
||||
for (int i = 0; i < node_this->num_of_son; i++) {
|
||||
SpellingNode *this_son = first_son + i;
|
||||
if (SpellingTrie::is_same_spl_char(
|
||||
this_son->char_this_node, char_this)) {
|
||||
found_son = this_son;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// found, just move the current node pointer to the the son
|
||||
if (NULL != found_son) {
|
||||
node_this = found_son;
|
||||
} else {
|
||||
// not found, test if it is endable
|
||||
uint16 id_this = node_this->spelling_idx;
|
||||
if (spl_trie_->if_valid_id_update(&id_this)) {
|
||||
// endable, remember the index
|
||||
spl_idx[idx_num] = id_this;
|
||||
|
||||
idx_num++;
|
||||
if (NULL != start_pos)
|
||||
start_pos[idx_num] = str_pos;
|
||||
if (idx_num >= max_size)
|
||||
return idx_num;
|
||||
node_this = spl_trie_->root_;
|
||||
continue;
|
||||
} else {
|
||||
return idx_num;
|
||||
}
|
||||
}
|
||||
|
||||
str_pos++;
|
||||
}
|
||||
|
||||
uint16 id_this = node_this->spelling_idx;
|
||||
if (spl_trie_->if_valid_id_update(&id_this)) {
|
||||
// endable, remember the index
|
||||
spl_idx[idx_num] = id_this;
|
||||
|
||||
idx_num++;
|
||||
if (NULL != start_pos)
|
||||
start_pos[idx_num] = str_pos;
|
||||
}
|
||||
|
||||
last_is_pre = !last_is_splitter;
|
||||
|
||||
return idx_num;
|
||||
}
|
||||
|
||||
uint16 SpellingParser::splstr_to_idxs_f(const char *splstr, uint16 str_len,
|
||||
uint16 spl_idx[], uint16 start_pos[],
|
||||
uint16 max_size, bool &last_is_pre) {
|
||||
uint16 idx_num = splstr_to_idxs(splstr, str_len, spl_idx, start_pos,
|
||||
max_size, last_is_pre);
|
||||
for (uint16 pos = 0; pos < idx_num; pos++) {
|
||||
if (spl_trie_->is_half_id_yunmu(spl_idx[pos])) {
|
||||
spl_trie_->half_to_full(spl_idx[pos], spl_idx + pos);
|
||||
if (pos == idx_num - 1) {
|
||||
last_is_pre = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return idx_num;
|
||||
}
|
||||
|
||||
uint16 SpellingParser::splstr16_to_idxs(const char16 *splstr, uint16 str_len,
|
||||
uint16 spl_idx[], uint16 start_pos[],
|
||||
uint16 max_size, bool &last_is_pre) {
|
||||
if (NULL == splstr || 0 == max_size || 0 == str_len)
|
||||
return 0;
|
||||
|
||||
if (!SpellingTrie::is_valid_spl_char(splstr[0]))
|
||||
return 0;
|
||||
|
||||
last_is_pre = false;
|
||||
|
||||
const SpellingNode *node_this = spl_trie_->root_;
|
||||
|
||||
uint16 str_pos = 0;
|
||||
uint16 idx_num = 0;
|
||||
if (NULL != start_pos)
|
||||
start_pos[0] = 0;
|
||||
bool last_is_splitter = false;
|
||||
|
||||
while (str_pos < str_len) {
|
||||
char16 char_this = splstr[str_pos];
|
||||
// all characters outside of [a, z] are considered as splitters
|
||||
if (!SpellingTrie::is_valid_spl_char(char_this)) {
|
||||
// test if the current node is endable
|
||||
uint16 id_this = node_this->spelling_idx;
|
||||
if (spl_trie_->if_valid_id_update(&id_this)) {
|
||||
spl_idx[idx_num] = id_this;
|
||||
|
||||
idx_num++;
|
||||
str_pos++;
|
||||
if (NULL != start_pos)
|
||||
start_pos[idx_num] = str_pos;
|
||||
if (idx_num >= max_size)
|
||||
return idx_num;
|
||||
|
||||
node_this = spl_trie_->root_;
|
||||
last_is_splitter = true;
|
||||
continue;
|
||||
} else {
|
||||
if (last_is_splitter) {
|
||||
str_pos++;
|
||||
if (NULL != start_pos)
|
||||
start_pos[idx_num] = str_pos;
|
||||
continue;
|
||||
} else {
|
||||
return idx_num;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
last_is_splitter = false;
|
||||
|
||||
SpellingNode *found_son = NULL;
|
||||
|
||||
if (0 == str_pos) {
|
||||
if (char_this >= 'a')
|
||||
found_son = spl_trie_->level1_sons_[char_this - 'a'];
|
||||
else
|
||||
found_son = spl_trie_->level1_sons_[char_this - 'A'];
|
||||
} else {
|
||||
SpellingNode *first_son = node_this->first_son;
|
||||
// Because for Zh/Ch/Sh nodes, they are the last in the buffer and
|
||||
// frequently used, so we scan from the end.
|
||||
for (int i = 0; i < node_this->num_of_son; i++) {
|
||||
SpellingNode *this_son = first_son + i;
|
||||
if (SpellingTrie::is_same_spl_char(
|
||||
this_son->char_this_node, char_this)) {
|
||||
found_son = this_son;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// found, just move the current node pointer to the the son
|
||||
if (NULL != found_son) {
|
||||
node_this = found_son;
|
||||
} else {
|
||||
// not found, test if it is endable
|
||||
uint16 id_this = node_this->spelling_idx;
|
||||
if (spl_trie_->if_valid_id_update(&id_this)) {
|
||||
// endable, remember the index
|
||||
spl_idx[idx_num] = id_this;
|
||||
|
||||
idx_num++;
|
||||
if (NULL != start_pos)
|
||||
start_pos[idx_num] = str_pos;
|
||||
if (idx_num >= max_size)
|
||||
return idx_num;
|
||||
node_this = spl_trie_->root_;
|
||||
continue;
|
||||
} else {
|
||||
return idx_num;
|
||||
}
|
||||
}
|
||||
|
||||
str_pos++;
|
||||
}
|
||||
|
||||
uint16 id_this = node_this->spelling_idx;
|
||||
if (spl_trie_->if_valid_id_update(&id_this)) {
|
||||
// endable, remember the index
|
||||
spl_idx[idx_num] = id_this;
|
||||
|
||||
idx_num++;
|
||||
if (NULL != start_pos)
|
||||
start_pos[idx_num] = str_pos;
|
||||
}
|
||||
|
||||
last_is_pre = !last_is_splitter;
|
||||
|
||||
return idx_num;
|
||||
}
|
||||
|
||||
uint16 SpellingParser::splstr16_to_idxs_f(const char16 *splstr, uint16 str_len,
|
||||
uint16 spl_idx[], uint16 start_pos[],
|
||||
uint16 max_size, bool &last_is_pre) {
|
||||
uint16 idx_num = splstr16_to_idxs(splstr, str_len, spl_idx, start_pos,
|
||||
max_size, last_is_pre);
|
||||
for (uint16 pos = 0; pos < idx_num; pos++) {
|
||||
if (spl_trie_->is_half_id_yunmu(spl_idx[pos])) {
|
||||
spl_trie_->half_to_full(spl_idx[pos], spl_idx + pos);
|
||||
if (pos == idx_num - 1) {
|
||||
last_is_pre = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return idx_num;
|
||||
}
|
||||
|
||||
uint16 SpellingParser::get_splid_by_str(const char *splstr, uint16 str_len,
|
||||
bool *is_pre) {
|
||||
if (NULL == is_pre)
|
||||
return 0;
|
||||
|
||||
uint16 spl_idx[2];
|
||||
uint16 start_pos[3];
|
||||
|
||||
if (splstr_to_idxs(splstr, str_len, spl_idx, start_pos, 2, *is_pre) != 1)
|
||||
return 0;
|
||||
|
||||
if (start_pos[1] != str_len)
|
||||
return 0;
|
||||
return spl_idx[0];
|
||||
}
|
||||
|
||||
uint16 SpellingParser::get_splid_by_str_f(const char *splstr, uint16 str_len,
|
||||
bool *is_pre) {
|
||||
if (NULL == is_pre)
|
||||
return 0;
|
||||
|
||||
uint16 spl_idx[2];
|
||||
uint16 start_pos[3];
|
||||
|
||||
if (splstr_to_idxs(splstr, str_len, spl_idx, start_pos, 2, *is_pre) != 1)
|
||||
return 0;
|
||||
|
||||
if (start_pos[1] != str_len)
|
||||
return 0;
|
||||
if (spl_trie_->is_half_id_yunmu(spl_idx[0])) {
|
||||
spl_trie_->half_to_full(spl_idx[0], spl_idx);
|
||||
*is_pre = false;
|
||||
}
|
||||
|
||||
return spl_idx[0];
|
||||
}
|
||||
|
||||
uint16 SpellingParser::get_splids_parallel(const char *splstr, uint16 str_len,
|
||||
uint16 splidx[], uint16 max_size,
|
||||
uint16 &full_id_num, bool &is_pre) {
|
||||
if (max_size <= 0 || !is_valid_to_parse(splstr[0]))
|
||||
return 0;
|
||||
|
||||
splidx[0] = get_splid_by_str(splstr, str_len, &is_pre);
|
||||
full_id_num = 0;
|
||||
if (0 != splidx[0]) {
|
||||
if (splidx[0] >= kFullSplIdStart)
|
||||
full_id_num = 1;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace ime_pinyin
|
||||
@@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_SPLPARSER_H__
|
||||
#define PINYINIME_INCLUDE_SPLPARSER_H__
|
||||
|
||||
#include "./dictdef.h"
|
||||
#include "./spellingtrie.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
class SpellingParser {
|
||||
protected:
|
||||
const SpellingTrie *spl_trie_;
|
||||
|
||||
public:
|
||||
SpellingParser();
|
||||
|
||||
// Given a string, parse it into a spelling id stream.
|
||||
// If the whole string are sucessfully parsed, last_is_pre will be true;
|
||||
// if the whole string is not fullly parsed, last_is_pre will return whether
|
||||
// the last part of the string is a prefix of a full spelling string. For
|
||||
// example, given string "zhengzhon", "zhon" is not a valid speling, but it is
|
||||
// the prefix of "zhong".
|
||||
//
|
||||
// If splstr starts with a character not in ['a'-z'] (it is a split char),
|
||||
// return 0.
|
||||
// Split char can only appear in the middle of the string or at the end.
|
||||
uint16 splstr_to_idxs(const char *splstr, uint16 str_len, uint16 splidx[],
|
||||
uint16 start_pos[], uint16 max_size, bool &last_is_pre);
|
||||
|
||||
// Similar to splstr_to_idxs(), the only difference is that splstr_to_idxs()
|
||||
// convert single-character Yunmus into half ids, while this function converts
|
||||
// them into full ids.
|
||||
uint16 splstr_to_idxs_f(const char *splstr, uint16 str_len, uint16 splidx[],
|
||||
uint16 start_pos[], uint16 max_size, bool &last_is_pre);
|
||||
|
||||
// Similar to splstr_to_idxs(), the only difference is that this function
|
||||
// uses char16 instead of char8.
|
||||
uint16 splstr16_to_idxs(const char16 *splstr, uint16 str_len, uint16 splidx[],
|
||||
uint16 start_pos[], uint16 max_size, bool &last_is_pre);
|
||||
|
||||
// Similar to splstr_to_idxs_f(), the only difference is that this function
|
||||
// uses char16 instead of char8.
|
||||
uint16 splstr16_to_idxs_f(const char16 *splstr16, uint16 str_len,
|
||||
uint16 splidx[], uint16 start_pos[],
|
||||
uint16 max_size, bool &last_is_pre);
|
||||
|
||||
// If the given string is a spelling, return the id, others, return 0.
|
||||
// If the give string is a single char Yunmus like "A", and the char is
|
||||
// enabled in ShouZiMu mode, the returned spelling id will be a half id.
|
||||
// When the returned spelling id is a half id, *is_pre returns whether it
|
||||
// is a prefix of a full spelling string.
|
||||
uint16 get_splid_by_str(const char *splstr, uint16 str_len, bool *is_pre);
|
||||
|
||||
// If the given string is a spelling, return the id, others, return 0.
|
||||
// If the give string is a single char Yunmus like "a", no matter the char
|
||||
// is enabled in ShouZiMu mode or not, the returned spelling id will be
|
||||
// a full id.
|
||||
// When the returned spelling id is a half id, *p_is_pre returns whether it
|
||||
// is a prefix of a full spelling string.
|
||||
uint16 get_splid_by_str_f(const char *splstr, uint16 str_len, bool *is_pre);
|
||||
|
||||
// Splitter chars are not included.
|
||||
bool is_valid_to_parse(char ch);
|
||||
|
||||
// When auto-correction is not enabled, get_splid_by_str() will be called to
|
||||
// return the single result. When auto-correction is enabled, this function
|
||||
// will be called to get the results. Auto-correction is not ready.
|
||||
// full_id_num returns number of full spelling ids.
|
||||
// is_pre returns whether the given string is the prefix of a full spelling
|
||||
// string.
|
||||
// If splstr starts with a character not in [a-zA-Z] (it is a split char),
|
||||
// return 0.
|
||||
// Split char can only appear in the middle of the string or at the end.
|
||||
// The caller should guarantee NULL != splstr && str_len > 0 && NULL != splidx
|
||||
uint16 get_splids_parallel(const char *splstr, uint16 str_len,
|
||||
uint16 splidx[], uint16 max_size,
|
||||
uint16 &full_id_num, bool &is_pre);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // PINYINIME_INCLUDE_SPLPARSER_H__
|
||||
@@ -0,0 +1,112 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "sync.h"
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef ___SYNC_ENABLED___
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
Sync::Sync()
|
||||
: userdict_(NULL),
|
||||
dictfile_(NULL),
|
||||
last_count_(0) {
|
||||
};
|
||||
|
||||
Sync::~Sync() {
|
||||
}
|
||||
|
||||
|
||||
bool Sync::begin(const char * filename) {
|
||||
if (userdict_) {
|
||||
finish();
|
||||
}
|
||||
|
||||
if (!filename) {
|
||||
return false;
|
||||
}
|
||||
|
||||
dictfile_ = strdup(filename);
|
||||
if (!dictfile_) {
|
||||
return false;
|
||||
}
|
||||
|
||||
userdict_ = new UserDict();
|
||||
if (!userdict_) {
|
||||
free(dictfile_);
|
||||
dictfile_ = NULL;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (userdict_->load_dict((const char*)dictfile_, kUserDictIdStart,
|
||||
kUserDictIdEnd) == false) {
|
||||
delete userdict_;
|
||||
userdict_ = NULL;
|
||||
free(dictfile_);
|
||||
dictfile_ = NULL;
|
||||
return false;
|
||||
}
|
||||
|
||||
userdict_->set_limit(kUserDictMaxLemmaCount, kUserDictMaxLemmaSize, kUserDictRatio);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int Sync::put_lemmas(char16 * lemmas, int len) {
|
||||
return userdict_->put_lemmas_no_sync_from_utf16le_string(lemmas, len);
|
||||
}
|
||||
|
||||
int Sync::get_lemmas(char16 * str, int size) {
|
||||
return userdict_->get_sync_lemmas_in_utf16le_string_from_beginning(str, size, &last_count_);
|
||||
}
|
||||
|
||||
int Sync::get_last_got_count() {
|
||||
return last_count_;
|
||||
}
|
||||
|
||||
int Sync::get_total_count() {
|
||||
return userdict_->get_sync_count();
|
||||
}
|
||||
|
||||
void Sync::clear_last_got() {
|
||||
if (last_count_ < 0) {
|
||||
return;
|
||||
}
|
||||
userdict_->clear_sync_lemmas(0, last_count_);
|
||||
last_count_ = 0;
|
||||
}
|
||||
|
||||
void Sync::finish() {
|
||||
if (userdict_) {
|
||||
userdict_->close_dict();
|
||||
delete userdict_;
|
||||
userdict_ = NULL;
|
||||
free(dictfile_);
|
||||
dictfile_ = NULL;
|
||||
last_count_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
int Sync::get_capacity() {
|
||||
UserDict::UserDictStat stat;
|
||||
userdict_->state(&stat);
|
||||
return stat.limit_lemma_count - stat.lemma_count;
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,85 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_SYNC_H__
|
||||
#define PINYINIME_INCLUDE_SYNC_H__
|
||||
|
||||
#define ___SYNC_ENABLED___
|
||||
|
||||
#ifdef ___SYNC_ENABLED___
|
||||
|
||||
#include "userdict.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
// Class for user dictionary synchronization
|
||||
// This class is not thread safe
|
||||
// Normal invoking flow will be
|
||||
// begin() ->
|
||||
// put_lemmas() x N ->
|
||||
// {
|
||||
// get_lemmas() ->
|
||||
// [ get_last_got_count() ] ->
|
||||
// clear_last_got() ->
|
||||
// } x N ->
|
||||
// finish()
|
||||
class Sync {
|
||||
public:
|
||||
Sync();
|
||||
~Sync();
|
||||
|
||||
static const int kUserDictMaxLemmaCount = 5000;
|
||||
static const int kUserDictMaxLemmaSize = 200000;
|
||||
static const int kUserDictRatio = 20;
|
||||
|
||||
bool begin(const char * filename);
|
||||
|
||||
// Merge lemmas downloaded from sync server into local dictionary
|
||||
// lemmas, lemmas string encoded in UTF16LE
|
||||
// len, length of lemmas string
|
||||
// Return how many lemmas merged successfully
|
||||
int put_lemmas(char16 * lemmas, int len);
|
||||
|
||||
// Get local new user lemmas into UTF16LE string
|
||||
// str, buffer ptr to store new user lemmas
|
||||
// size, size of buffer
|
||||
// Return length of returned buffer in measure of UTF16LE
|
||||
int get_lemmas(char16 * str, int size);
|
||||
|
||||
// Return lemmas count in last get_lemmas()
|
||||
int get_last_got_count();
|
||||
|
||||
// Return total lemmas count need get_lemmas()
|
||||
int get_total_count();
|
||||
|
||||
// Clear lemmas got by recent get_lemmas()
|
||||
void clear_last_got();
|
||||
|
||||
void finish();
|
||||
|
||||
int get_capacity();
|
||||
|
||||
private:
|
||||
UserDict * userdict_;
|
||||
char * dictfile_;
|
||||
int last_count_;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif // PINYINIME_INCLUDE_SYNC_H__
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,432 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_USERDICT_H__
|
||||
#define PINYINIME_INCLUDE_USERDICT_H__
|
||||
|
||||
#define ___CACHE_ENABLED___
|
||||
#define ___SYNC_ENABLED___
|
||||
#define ___PREDICT_ENABLED___
|
||||
|
||||
// Debug performance for operations
|
||||
// #define ___DEBUG_PERF___
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <winsock.h> // timeval
|
||||
#else
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
#include "atomdictbase.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
class UserDict : public AtomDictBase {
|
||||
public:
|
||||
UserDict();
|
||||
~UserDict();
|
||||
|
||||
bool load_dict(const char *file_name, LemmaIdType start_id,
|
||||
LemmaIdType end_id);
|
||||
|
||||
bool close_dict();
|
||||
|
||||
size_t number_of_lemmas();
|
||||
|
||||
void reset_milestones(uint16 from_step, MileStoneHandle from_handle);
|
||||
|
||||
MileStoneHandle extend_dict(MileStoneHandle from_handle,
|
||||
const DictExtPara *dep, LmaPsbItem *lpi_items,
|
||||
size_t lpi_max, size_t *lpi_num);
|
||||
|
||||
size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len,
|
||||
LmaPsbItem *lpi_items, size_t lpi_max);
|
||||
|
||||
uint16 get_lemma_str(LemmaIdType id_lemma, char16* str_buf,
|
||||
uint16 str_max);
|
||||
|
||||
uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
|
||||
uint16 splids_max, bool arg_valid);
|
||||
|
||||
size_t predict(const char16 last_hzs[], uint16 hzs_len,
|
||||
NPredictItem *npre_items, size_t npre_max,
|
||||
size_t b4_used);
|
||||
|
||||
// Full spelling ids are required
|
||||
LemmaIdType put_lemma(char16 lemma_str[], uint16 splids[],
|
||||
uint16 lemma_len, uint16 count);
|
||||
|
||||
LemmaIdType update_lemma(LemmaIdType lemma_id, int16 delta_count,
|
||||
bool selected);
|
||||
|
||||
LemmaIdType get_lemma_id(char16 lemma_str[], uint16 splids[],
|
||||
uint16 lemma_len);
|
||||
|
||||
LmaScoreType get_lemma_score(LemmaIdType lemma_id);
|
||||
|
||||
LmaScoreType get_lemma_score(char16 lemma_str[], uint16 splids[],
|
||||
uint16 lemma_len);
|
||||
|
||||
bool remove_lemma(LemmaIdType lemma_id);
|
||||
|
||||
size_t get_total_lemma_count();
|
||||
void set_total_lemma_count_of_others(size_t count);
|
||||
|
||||
void flush_cache();
|
||||
|
||||
void set_limit(uint32 max_lemma_count, uint32 max_lemma_size,
|
||||
uint32 reclaim_ratio);
|
||||
|
||||
void reclaim();
|
||||
|
||||
void defragment();
|
||||
|
||||
#ifdef ___SYNC_ENABLED___
|
||||
void clear_sync_lemmas(unsigned int start, unsigned int end);
|
||||
|
||||
int get_sync_count();
|
||||
|
||||
LemmaIdType put_lemma_no_sync(char16 lemma_str[], uint16 splids[],
|
||||
uint16 lemma_len, uint16 count, uint64 lmt);
|
||||
/**
|
||||
* Add lemmas encoded in UTF-16LE into dictionary without adding sync flag.
|
||||
*
|
||||
* @param lemmas in format of 'wo men,WM,0.32;da jia,DJ,0.12'
|
||||
* @param len length of lemmas string in UTF-16LE
|
||||
* @return newly added lemma count
|
||||
*/
|
||||
int put_lemmas_no_sync_from_utf16le_string(char16 * lemmas, int len);
|
||||
|
||||
/**
|
||||
* Get lemmas need sync to a UTF-16LE string of above format.
|
||||
* Note: input buffer (str) must not be too small. If str is too small to
|
||||
* contain single one lemma, there might be a dead loop.
|
||||
*
|
||||
* @param str buffer to write lemmas
|
||||
* @param size buffer size in UTF-16LE
|
||||
* @param count output value of lemma returned
|
||||
* @return UTF-16LE string length
|
||||
*/
|
||||
int get_sync_lemmas_in_utf16le_string_from_beginning(
|
||||
char16 * str, int size, int * count);
|
||||
|
||||
#endif
|
||||
|
||||
struct UserDictStat {
|
||||
uint32 version;
|
||||
const char * file_name;
|
||||
struct timeval load_time;
|
||||
struct timeval last_update;
|
||||
uint32 disk_size;
|
||||
uint32 lemma_count;
|
||||
uint32 lemma_size;
|
||||
uint32 delete_count;
|
||||
uint32 delete_size;
|
||||
#ifdef ___SYNC_ENABLED___
|
||||
uint32 sync_count;
|
||||
#endif
|
||||
uint32 reclaim_ratio;
|
||||
uint32 limit_lemma_count;
|
||||
uint32 limit_lemma_size;
|
||||
};
|
||||
|
||||
bool state(UserDictStat * stat);
|
||||
|
||||
private:
|
||||
uint32 total_other_nfreq_;
|
||||
struct timeval load_time_;
|
||||
LemmaIdType start_id_;
|
||||
uint32 version_;
|
||||
uint8 * lemmas_;
|
||||
|
||||
// In-Memory-Only flag for each lemma
|
||||
static const uint8 kUserDictLemmaFlagRemove = 1;
|
||||
// Inuse lemmas' offset
|
||||
uint32 * offsets_;
|
||||
// Highest bit in offset tells whether corresponding lemma is removed
|
||||
static const uint32 kUserDictOffsetFlagRemove = (1 << 31);
|
||||
// Maximum possible for the offset
|
||||
static const uint32 kUserDictOffsetMask = ~(kUserDictOffsetFlagRemove);
|
||||
// Bit width for last modified time, from 1 to 16
|
||||
static const uint32 kUserDictLMTBitWidth = 16;
|
||||
// Granularity for last modified time in second
|
||||
static const uint32 kUserDictLMTGranularity = 60 * 60 * 24 * 7;
|
||||
// Maximum frequency count
|
||||
static const uint16 kUserDictMaxFrequency = 0xFFFF;
|
||||
|
||||
#define COARSE_UTC(year, month, day, hour, minute, second) \
|
||||
( \
|
||||
(year - 1970) * 365 * 24 * 60 * 60 + \
|
||||
(month - 1) * 30 * 24 * 60 * 60 + \
|
||||
(day - 1) * 24 * 60 * 60 + \
|
||||
(hour - 0) * 60 * 60 + \
|
||||
(minute - 0) * 60 + \
|
||||
(second - 0) \
|
||||
)
|
||||
static const uint64 kUserDictLMTSince = COARSE_UTC(2009, 1, 1, 0, 0, 0);
|
||||
|
||||
// Correspond to offsets_
|
||||
uint32 * scores_;
|
||||
// Following two fields are only valid in memory
|
||||
uint32 * ids_;
|
||||
#ifdef ___PREDICT_ENABLED___
|
||||
uint32 * predicts_;
|
||||
#endif
|
||||
#ifdef ___SYNC_ENABLED___
|
||||
uint32 * syncs_;
|
||||
size_t sync_count_size_;
|
||||
#endif
|
||||
uint32 * offsets_by_id_;
|
||||
|
||||
size_t lemma_count_left_;
|
||||
size_t lemma_size_left_;
|
||||
|
||||
const char * dict_file_;
|
||||
|
||||
// Be sure size is 4xN
|
||||
struct UserDictInfo {
|
||||
// When limitation reached, how much percentage will be reclaimed (1 ~ 100)
|
||||
uint32 reclaim_ratio;
|
||||
// maximum lemma count, 0 means no limitation
|
||||
uint32 limit_lemma_count;
|
||||
// Maximum lemma size, it's different from
|
||||
// whole disk file size or in-mem dict size
|
||||
// 0 means no limitation
|
||||
uint32 limit_lemma_size;
|
||||
// Total lemma count including deleted and inuse
|
||||
// Also indicate offsets_ size
|
||||
uint32 lemma_count;
|
||||
// Total size of lemmas including used and freed
|
||||
uint32 lemma_size;
|
||||
// Freed lemma count
|
||||
uint32 free_count;
|
||||
// Freed lemma size in byte
|
||||
uint32 free_size;
|
||||
#ifdef ___SYNC_ENABLED___
|
||||
uint32 sync_count;
|
||||
#endif
|
||||
int32 total_nfreq;
|
||||
} dict_info_;
|
||||
|
||||
static const uint32 kUserDictVersion = 0x0ABCDEF0;
|
||||
|
||||
static const uint32 kUserDictPreAlloc = 32;
|
||||
static const uint32 kUserDictAverageNchar = 8;
|
||||
|
||||
enum UserDictState {
|
||||
// Keep in order
|
||||
USER_DICT_NONE = 0,
|
||||
USER_DICT_SYNC,
|
||||
#ifdef ___SYNC_ENABLED___
|
||||
USER_DICT_SYNC_DIRTY,
|
||||
#endif
|
||||
USER_DICT_SCORE_DIRTY,
|
||||
USER_DICT_OFFSET_DIRTY,
|
||||
USER_DICT_LEMMA_DIRTY,
|
||||
|
||||
USER_DICT_DEFRAGMENTED,
|
||||
} state_;
|
||||
|
||||
struct UserDictSearchable {
|
||||
uint16 splids_len;
|
||||
uint16 splid_start[kMaxLemmaSize];
|
||||
uint16 splid_count[kMaxLemmaSize];
|
||||
// Compact inital letters for both FuzzyCompareSpellId and cache system
|
||||
uint32 signature[kMaxLemmaSize / 4];
|
||||
};
|
||||
|
||||
#ifdef ___CACHE_ENABLED___
|
||||
enum UserDictCacheType {
|
||||
USER_DICT_CACHE,
|
||||
USER_DICT_MISS_CACHE,
|
||||
};
|
||||
|
||||
static const int kUserDictCacheSize = 4;
|
||||
static const int kUserDictMissCacheSize = kMaxLemmaSize - 1;
|
||||
|
||||
struct UserDictMissCache {
|
||||
uint32 signatures[kUserDictMissCacheSize][kMaxLemmaSize / 4];
|
||||
uint16 head, tail;
|
||||
} miss_caches_[kMaxLemmaSize];
|
||||
|
||||
struct UserDictCache {
|
||||
uint32 signatures[kUserDictCacheSize][kMaxLemmaSize / 4];
|
||||
uint32 offsets[kUserDictCacheSize];
|
||||
uint32 lengths[kUserDictCacheSize];
|
||||
// Ring buffer
|
||||
uint16 head, tail;
|
||||
} caches_[kMaxLemmaSize];
|
||||
|
||||
void cache_init();
|
||||
|
||||
void cache_push(UserDictCacheType type,
|
||||
UserDictSearchable *searchable,
|
||||
uint32 offset, uint32 length);
|
||||
|
||||
bool cache_hit(UserDictSearchable *searchable,
|
||||
uint32 *offset, uint32 *length);
|
||||
|
||||
bool load_cache(UserDictSearchable *searchable,
|
||||
uint32 *offset, uint32 *length);
|
||||
|
||||
void save_cache(UserDictSearchable *searchable,
|
||||
uint32 offset, uint32 length);
|
||||
|
||||
void reset_cache();
|
||||
|
||||
bool load_miss_cache(UserDictSearchable *searchable);
|
||||
|
||||
void save_miss_cache(UserDictSearchable *searchable);
|
||||
|
||||
void reset_miss_cache();
|
||||
#endif
|
||||
|
||||
LmaScoreType translate_score(int f);
|
||||
|
||||
int extract_score_freq(int raw_score);
|
||||
|
||||
uint64 extract_score_lmt(int raw_score);
|
||||
|
||||
inline int build_score(uint64 lmt, int freq);
|
||||
|
||||
inline int64 utf16le_atoll(uint16 *s, int len);
|
||||
|
||||
inline int utf16le_lltoa(int64 v, uint16 *s, int size);
|
||||
|
||||
LemmaIdType _put_lemma(char16 lemma_str[], uint16 splids[],
|
||||
uint16 lemma_len, uint16 count, uint64 lmt);
|
||||
|
||||
size_t _get_lpis(const uint16 *splid_str, uint16 splid_str_len,
|
||||
LmaPsbItem *lpi_items, size_t lpi_max, bool * need_extend);
|
||||
|
||||
int _get_lemma_score(char16 lemma_str[], uint16 splids[], uint16 lemma_len);
|
||||
|
||||
int _get_lemma_score(LemmaIdType lemma_id);
|
||||
|
||||
int is_fuzzy_prefix_spell_id(const uint16 * id1, uint16 len1,
|
||||
const UserDictSearchable *searchable);
|
||||
|
||||
bool is_prefix_spell_id(const uint16 * fullids,
|
||||
uint16 fulllen, const UserDictSearchable *searchable);
|
||||
|
||||
uint32 get_dict_file_size(UserDictInfo * info);
|
||||
|
||||
bool reset(const char *file);
|
||||
|
||||
bool validate(const char *file);
|
||||
|
||||
bool load(const char *file, LemmaIdType start_id);
|
||||
|
||||
bool is_valid_state();
|
||||
|
||||
bool is_valid_lemma_id(LemmaIdType id);
|
||||
|
||||
LemmaIdType get_max_lemma_id();
|
||||
|
||||
void set_lemma_flag(uint32 offset, uint8 flag);
|
||||
|
||||
char get_lemma_flag(uint32 offset);
|
||||
|
||||
char get_lemma_nchar(uint32 offset);
|
||||
|
||||
uint16 * get_lemma_spell_ids(uint32 offset);
|
||||
|
||||
uint16 * get_lemma_word(uint32 offset);
|
||||
|
||||
// Prepare searchable to fasten locate process
|
||||
void prepare_locate(UserDictSearchable *searchable,
|
||||
const uint16 * splids, uint16 len);
|
||||
|
||||
// Compare initial letters only
|
||||
int32 fuzzy_compare_spell_id(const uint16 * id1, uint16 len1,
|
||||
const UserDictSearchable *searchable);
|
||||
|
||||
// Compare exactly two spell ids
|
||||
// First argument must be a full id spell id
|
||||
bool equal_spell_id(const uint16 * fullids,
|
||||
uint16 fulllen, const UserDictSearchable *searchable);
|
||||
|
||||
// Find first item by initial letters
|
||||
int32 locate_first_in_offsets(const UserDictSearchable *searchable);
|
||||
|
||||
LemmaIdType append_a_lemma(char16 lemma_str[], uint16 splids[],
|
||||
uint16 lemma_len, uint16 count, uint64 lmt);
|
||||
|
||||
// Check if a lemma is in dictionary
|
||||
int32 locate_in_offsets(char16 lemma_str[],
|
||||
uint16 splid_str[], uint16 lemma_len);
|
||||
|
||||
bool remove_lemma_by_offset_index(int offset_index);
|
||||
#ifdef ___PREDICT_ENABLED___
|
||||
uint32 locate_where_to_insert_in_predicts(const uint16 * words,
|
||||
int lemma_len);
|
||||
|
||||
int32 locate_first_in_predicts(const uint16 * words, int lemma_len);
|
||||
|
||||
void remove_lemma_from_predict_list(uint32 offset);
|
||||
#endif
|
||||
#ifdef ___SYNC_ENABLED___
|
||||
void queue_lemma_for_sync(LemmaIdType id);
|
||||
|
||||
void remove_lemma_from_sync_list(uint32 offset);
|
||||
|
||||
void write_back_sync(int fd);
|
||||
#endif
|
||||
void write_back_score(int fd);
|
||||
void write_back_offset(int fd);
|
||||
void write_back_lemma(int fd);
|
||||
void write_back_all(int fd);
|
||||
void write_back();
|
||||
|
||||
struct UserDictScoreOffsetPair {
|
||||
int score;
|
||||
uint32 offset_index;
|
||||
};
|
||||
|
||||
inline void swap(UserDictScoreOffsetPair * sop, int i, int j);
|
||||
|
||||
void shift_down(UserDictScoreOffsetPair * sop, int i, int n);
|
||||
|
||||
// On-disk format for each lemma
|
||||
// +-------------+
|
||||
// | Version (4) |
|
||||
// +-------------+
|
||||
// +-----------+-----------+--------------------+-------------------+
|
||||
// | Spare (1) | Nchar (1) | Splids (2 x Nchar) | Lemma (2 x Nchar) |
|
||||
// +-----------+-----------+--------------------+-------------------+
|
||||
// ...
|
||||
// +-----------------------+ +-------------+ <---Offset of offset
|
||||
// | Offset1 by_splids (4) | ... | OffsetN (4) |
|
||||
// +-----------------------+ +-------------+
|
||||
#ifdef ___PREDICT_ENABLED___
|
||||
// +----------------------+ +-------------+
|
||||
// | Offset1 by_lemma (4) | ... | OffsetN (4) |
|
||||
// +----------------------+ +-------------+
|
||||
#endif
|
||||
// +------------+ +------------+
|
||||
// | Score1 (4) | ... | ScoreN (4) |
|
||||
// +------------+ +------------+
|
||||
#ifdef ___SYNC_ENABLED___
|
||||
// +-------------+ +-------------+
|
||||
// | NewAdd1 (4) | ... | NewAddN (4) |
|
||||
// +-------------+ +-------------+
|
||||
#endif
|
||||
// +----------------+
|
||||
// | Dict Info (4x) |
|
||||
// +----------------+
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,182 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "utf16char.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
char16* utf16_strtok(char16 *utf16_str, size_t *token_size,
|
||||
char16 **utf16_str_next) {
|
||||
if (NULL == utf16_str || NULL == token_size || NULL == utf16_str_next) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Skip the splitters
|
||||
size_t pos = 0;
|
||||
while ((char16)' ' == utf16_str[pos] || (char16)'\n' == utf16_str[pos]
|
||||
|| (char16)'\t' == utf16_str[pos])
|
||||
pos++;
|
||||
|
||||
utf16_str += pos;
|
||||
pos = 0;
|
||||
|
||||
while ((char16)'\0' != utf16_str[pos] && (char16)' ' != utf16_str[pos]
|
||||
&& (char16)'\n' != utf16_str[pos]
|
||||
&& (char16)'\t' != utf16_str[pos]) {
|
||||
pos++;
|
||||
}
|
||||
|
||||
char16 *ret_val = utf16_str;
|
||||
if ((char16)'\0' == utf16_str[pos]) {
|
||||
*utf16_str_next = NULL;
|
||||
if (0 == pos)
|
||||
return NULL;
|
||||
} else {
|
||||
*utf16_str_next = utf16_str + pos + 1;
|
||||
}
|
||||
|
||||
utf16_str[pos] = (char16)'\0';
|
||||
*token_size = pos;
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
int utf16_atoi(const char16 *utf16_str) {
|
||||
if (NULL == utf16_str)
|
||||
return 0;
|
||||
|
||||
int value = 0;
|
||||
int sign = 1;
|
||||
size_t pos = 0;
|
||||
|
||||
if ((char16)'-' == utf16_str[pos]) {
|
||||
sign = -1;
|
||||
pos++;
|
||||
}
|
||||
|
||||
while ((char16)'0' <= utf16_str[pos] &&
|
||||
(char16)'9' >= utf16_str[pos]) {
|
||||
value = value * 10 + static_cast<int>(utf16_str[pos] - (char16)'0');
|
||||
pos++;
|
||||
}
|
||||
|
||||
return value*sign;
|
||||
}
|
||||
|
||||
float utf16_atof(const char16 *utf16_str) {
|
||||
// A temporary implemetation.
|
||||
char char8[256];
|
||||
if (utf16_strlen(utf16_str) >= 256) return 0;
|
||||
|
||||
utf16_strcpy_tochar(char8, utf16_str);
|
||||
return atof(char8);
|
||||
}
|
||||
|
||||
size_t utf16_strlen(const char16 *utf16_str) {
|
||||
if (NULL == utf16_str)
|
||||
return 0;
|
||||
|
||||
size_t size = 0;
|
||||
while ((char16)'\0' != utf16_str[size])
|
||||
size++;
|
||||
return size;
|
||||
}
|
||||
|
||||
int utf16_strcmp(const char16* str1, const char16* str2) {
|
||||
size_t pos = 0;
|
||||
while (str1[pos] == str2[pos] && (char16)'\0' != str1[pos])
|
||||
pos++;
|
||||
|
||||
return static_cast<int>(str1[pos]) - static_cast<int>(str2[pos]);
|
||||
}
|
||||
|
||||
int utf16_strncmp(const char16 *str1, const char16 *str2, size_t size) {
|
||||
size_t pos = 0;
|
||||
while (pos < size && str1[pos] == str2[pos] && (char16)'\0' != str1[pos])
|
||||
pos++;
|
||||
|
||||
if (pos == size)
|
||||
return 0;
|
||||
|
||||
return static_cast<int>(str1[pos]) - static_cast<int>(str2[pos]);
|
||||
}
|
||||
|
||||
// we do not consider overlapping
|
||||
char16* utf16_strcpy(char16 *dst, const char16 *src) {
|
||||
if (NULL == src || NULL == dst)
|
||||
return NULL;
|
||||
|
||||
char16* cp = dst;
|
||||
|
||||
while ((char16)'\0' != *src) {
|
||||
*cp = *src;
|
||||
cp++;
|
||||
src++;
|
||||
}
|
||||
|
||||
*cp = *src;
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
char16* utf16_strncpy(char16 *dst, const char16 *src, size_t size) {
|
||||
if (NULL == src || NULL == dst || 0 == size)
|
||||
return NULL;
|
||||
|
||||
if (src == dst)
|
||||
return dst;
|
||||
|
||||
char16* cp = dst;
|
||||
|
||||
if (dst < src || (dst > src && dst >= src + size)) {
|
||||
while (size-- && (*cp++ = *src++))
|
||||
;
|
||||
} else {
|
||||
cp += size - 1;
|
||||
src += size - 1;
|
||||
while (size-- && (*cp-- == *src--))
|
||||
;
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
// We do not handle complicated cases like overlapping, because in this
|
||||
// codebase, it is not necessary.
|
||||
char* utf16_strcpy_tochar(char *dst, const char16 *src) {
|
||||
if (NULL == src || NULL == dst)
|
||||
return NULL;
|
||||
|
||||
char* cp = dst;
|
||||
|
||||
while ((char16)'\0' != *src) {
|
||||
*cp = static_cast<char>(*src);
|
||||
cp++;
|
||||
src++;
|
||||
}
|
||||
*cp = *src;
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
} // namespace ime_pinyin
|
||||
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_UTF16CHAR_H__
|
||||
#define PINYINIME_INCLUDE_UTF16CHAR_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef unsigned short char16;
|
||||
|
||||
// Get a token from utf16_str,
|
||||
// Returned pointer is a '\0'-terminated utf16 string, or NULL
|
||||
// *utf16_str_next returns the next part of the string for further tokenizing
|
||||
char16* utf16_strtok(char16 *utf16_str, size_t *token_size,
|
||||
char16 **utf16_str_next);
|
||||
|
||||
int utf16_atoi(const char16 *utf16_str);
|
||||
|
||||
float utf16_atof(const char16 *utf16_str);
|
||||
|
||||
size_t utf16_strlen(const char16 *utf16_str);
|
||||
|
||||
int utf16_strcmp(const char16 *str1, const char16 *str2);
|
||||
int utf16_strncmp(const char16 *str1, const char16 *str2, size_t size);
|
||||
|
||||
char16* utf16_strcpy(char16 *dst, const char16 *src);
|
||||
char16* utf16_strncpy(char16 *dst, const char16 *src, size_t size);
|
||||
|
||||
|
||||
char* utf16_strcpy_tochar(char *dst, const char16 *src);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // PINYINIME_INCLUDE_UTF16CHAR_H__
|
||||
@@ -0,0 +1,131 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "utf16reader.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
#define MIN_BUF_LEN 128
|
||||
#define MAX_BUF_LEN 65535
|
||||
|
||||
Utf16Reader::Utf16Reader() {
|
||||
fp_ = NULL;
|
||||
buffer_ = NULL;
|
||||
buffer_total_len_ = 0;
|
||||
buffer_next_pos_ = 0;
|
||||
buffer_valid_len_ = 0;
|
||||
}
|
||||
|
||||
Utf16Reader::~Utf16Reader() {
|
||||
if (NULL != fp_)
|
||||
fclose(fp_);
|
||||
|
||||
if (NULL != buffer_)
|
||||
delete [] buffer_;
|
||||
}
|
||||
|
||||
|
||||
bool Utf16Reader::open(const char* filename, size_t buffer_len) {
|
||||
if (filename == NULL)
|
||||
return false;
|
||||
|
||||
if (buffer_len < MIN_BUF_LEN)
|
||||
buffer_len = MIN_BUF_LEN;
|
||||
else if (buffer_len > MAX_BUF_LEN)
|
||||
buffer_len = MAX_BUF_LEN;
|
||||
|
||||
buffer_total_len_ = buffer_len;
|
||||
|
||||
if (NULL != buffer_)
|
||||
delete [] buffer_;
|
||||
buffer_ = new char16[buffer_total_len_];
|
||||
if (NULL == buffer_)
|
||||
return false;
|
||||
|
||||
if ((fp_ = fopen(filename, "rb")) == NULL)
|
||||
return false;
|
||||
|
||||
// the UTF16 file header, skip
|
||||
char16 header;
|
||||
if (fread(&header, sizeof(header), 1, fp_) != 1 || header != 0xfeff) {
|
||||
fclose(fp_);
|
||||
fp_ = NULL;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
char16* Utf16Reader::readline(char16* read_buf, size_t max_len) {
|
||||
if (NULL == fp_ || NULL == read_buf || 0 == max_len)
|
||||
return NULL;
|
||||
|
||||
size_t ret_len = 0;
|
||||
|
||||
do {
|
||||
if (buffer_valid_len_ == 0) {
|
||||
buffer_next_pos_ = 0;
|
||||
buffer_valid_len_ = fread(buffer_, sizeof(char16),
|
||||
buffer_total_len_, fp_);
|
||||
if (buffer_valid_len_ == 0) {
|
||||
if (0 == ret_len)
|
||||
return NULL;
|
||||
read_buf[ret_len] = (char16)'\0';
|
||||
return read_buf;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < buffer_valid_len_; i++) {
|
||||
if (i == max_len - 1 ||
|
||||
buffer_[buffer_next_pos_ + i] == (char16)'\n') {
|
||||
if (ret_len + i > 0 && read_buf[ret_len + i - 1] == (char16)'\r') {
|
||||
read_buf[ret_len + i - 1] = (char16)'\0';
|
||||
} else {
|
||||
read_buf[ret_len + i] = (char16)'\0';
|
||||
}
|
||||
|
||||
i++;
|
||||
buffer_next_pos_ += i;
|
||||
buffer_valid_len_ -= i;
|
||||
if (buffer_next_pos_ == buffer_total_len_) {
|
||||
buffer_next_pos_ = 0;
|
||||
buffer_valid_len_ = 0;
|
||||
}
|
||||
return read_buf;
|
||||
} else {
|
||||
read_buf[ret_len + i] = buffer_[buffer_next_pos_ + i];
|
||||
}
|
||||
}
|
||||
|
||||
ret_len += buffer_valid_len_;
|
||||
buffer_valid_len_ = 0;
|
||||
} while (true);
|
||||
|
||||
// Never reach here
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool Utf16Reader::close() {
|
||||
if (NULL != fp_)
|
||||
fclose(fp_);
|
||||
fp_ = NULL;
|
||||
|
||||
if (NULL != buffer_)
|
||||
delete [] buffer_;
|
||||
buffer_ = NULL;
|
||||
return true;
|
||||
}
|
||||
} // namespace ime_pinyin
|
||||
@@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_UTF16READER_H__
|
||||
#define PINYINIME_INCLUDE_UTF16READER_H__
|
||||
|
||||
#include <stdio.h>
|
||||
#include "./utf16char.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
class Utf16Reader {
|
||||
private:
|
||||
FILE *fp_;
|
||||
char16 *buffer_;
|
||||
size_t buffer_total_len_;
|
||||
size_t buffer_next_pos_;
|
||||
|
||||
// Always less than buffer_total_len_ - buffer_next_pos_
|
||||
size_t buffer_valid_len_;
|
||||
|
||||
public:
|
||||
Utf16Reader();
|
||||
~Utf16Reader();
|
||||
|
||||
// filename is the name of the file to open.
|
||||
// buffer_len specifies how long buffer should be allocated to speed up the
|
||||
// future reading
|
||||
bool open(const char* filename, size_t buffer_len);
|
||||
char16* readline(char16* read_buf, size_t max_len);
|
||||
bool close();
|
||||
};
|
||||
}
|
||||
|
||||
#endif // PINYINIME_INCLUDE_UTF16READER_H__
|
||||
Reference in New Issue
Block a user